{ "best_metric": null, "best_model_checkpoint": null, "epoch": 96.0, "global_step": 33165312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999927924694332e-05, "loss": 3.4997, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.999855559929604e-05, "loss": 3.2767, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.999783195164876e-05, "loss": 3.1477, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9997108304001485e-05, "loss": 3.131, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.99963861036495e-05, "loss": 3.073, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.999566245600222e-05, "loss": 3.0248, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.9994938808354945e-05, "loss": 3.0096, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.999421660800297e-05, "loss": 2.9718, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.999349296035569e-05, "loss": 2.9779, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.999276931270842e-05, "loss": 2.9819, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.999204566506114e-05, "loss": 2.9197, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.999132201741386e-05, "loss": 2.9364, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.9990598369766585e-05, "loss": 2.9215, "step": 6500 }, { "epoch": 0.02, "learning_rate": 4.998987472211931e-05, "loss": 2.9021, "step": 7000 }, { "epoch": 0.02, "learning_rate": 4.998915107447203e-05, "loss": 2.8957, "step": 7500 }, { "epoch": 0.02, "learning_rate": 4.998842742682475e-05, "loss": 2.8654, "step": 8000 }, { "epoch": 0.02, "learning_rate": 4.9987703779177474e-05, "loss": 2.8845, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.9986980131530196e-05, "loss": 2.8847, "step": 9000 }, { "epoch": 0.03, "learning_rate": 4.998625648388292e-05, "loss": 2.8642, "step": 9500 }, { "epoch": 0.03, "learning_rate": 4.998553428353094e-05, "loss": 2.883, "step": 10000 }, { "epoch": 0.03, "learning_rate": 4.998481063588366e-05, "loss": 2.8344, "step": 10500 }, { "epoch": 0.03, "learning_rate": 4.9984086988236385e-05, "loss": 2.8419, "step": 11000 }, { "epoch": 0.03, "learning_rate": 4.99833647878844e-05, "loss": 2.853, "step": 11500 }, { "epoch": 0.03, "learning_rate": 4.998264114023713e-05, "loss": 2.8367, "step": 12000 }, { "epoch": 0.04, "learning_rate": 4.998191749258985e-05, "loss": 2.8295, "step": 12500 }, { "epoch": 0.04, "learning_rate": 4.9981193844942574e-05, "loss": 2.8572, "step": 13000 }, { "epoch": 0.04, "learning_rate": 4.9980470197295297e-05, "loss": 2.8238, "step": 13500 }, { "epoch": 0.04, "learning_rate": 4.997974654964802e-05, "loss": 2.8145, "step": 14000 }, { "epoch": 0.04, "learning_rate": 4.997902290200074e-05, "loss": 2.8248, "step": 14500 }, { "epoch": 0.04, "learning_rate": 4.997829925435347e-05, "loss": 2.8202, "step": 15000 }, { "epoch": 0.04, "learning_rate": 4.997757850129678e-05, "loss": 2.7887, "step": 15500 }, { "epoch": 0.05, "learning_rate": 4.99768548536495e-05, "loss": 2.7912, "step": 16000 }, { "epoch": 0.05, "learning_rate": 4.997613120600222e-05, "loss": 2.7774, "step": 16500 }, { "epoch": 0.05, "learning_rate": 4.9975407558354946e-05, "loss": 2.7762, "step": 17000 }, { "epoch": 0.05, "learning_rate": 4.997468391070767e-05, "loss": 2.7691, "step": 17500 }, { "epoch": 0.05, "learning_rate": 4.997396026306039e-05, "loss": 2.7753, "step": 18000 }, { "epoch": 0.05, "learning_rate": 4.9973240957299e-05, "loss": 2.788, "step": 18500 }, { "epoch": 0.05, "learning_rate": 4.997251730965172e-05, "loss": 2.78, "step": 19000 }, { "epoch": 0.06, "learning_rate": 4.9971793662004443e-05, "loss": 2.7617, "step": 19500 }, { "epoch": 0.06, "learning_rate": 4.997107001435717e-05, "loss": 2.7686, "step": 20000 }, { "epoch": 0.06, "learning_rate": 4.9970346366709895e-05, "loss": 2.7487, "step": 20500 }, { "epoch": 0.06, "learning_rate": 4.996962271906262e-05, "loss": 2.7864, "step": 21000 }, { "epoch": 0.06, "learning_rate": 4.9968899071415346e-05, "loss": 2.7937, "step": 21500 }, { "epoch": 0.06, "learning_rate": 4.996817542376807e-05, "loss": 2.7423, "step": 22000 }, { "epoch": 0.07, "learning_rate": 4.996745177612079e-05, "loss": 2.7574, "step": 22500 }, { "epoch": 0.07, "learning_rate": 4.996672812847351e-05, "loss": 2.7589, "step": 23000 }, { "epoch": 0.07, "learning_rate": 4.9966004480826235e-05, "loss": 2.772, "step": 23500 }, { "epoch": 0.07, "learning_rate": 4.996528083317896e-05, "loss": 2.7527, "step": 24000 }, { "epoch": 0.07, "learning_rate": 4.996455718553168e-05, "loss": 2.7626, "step": 24500 }, { "epoch": 0.07, "learning_rate": 4.99638335378844e-05, "loss": 2.7416, "step": 25000 }, { "epoch": 0.07, "learning_rate": 4.996311133753242e-05, "loss": 2.744, "step": 25500 }, { "epoch": 0.08, "learning_rate": 4.9962387689885146e-05, "loss": 2.7605, "step": 26000 }, { "epoch": 0.08, "learning_rate": 4.996166548953316e-05, "loss": 2.7441, "step": 26500 }, { "epoch": 0.08, "learning_rate": 4.996094184188589e-05, "loss": 2.7389, "step": 27000 }, { "epoch": 0.08, "learning_rate": 4.996021819423861e-05, "loss": 2.7474, "step": 27500 }, { "epoch": 0.08, "learning_rate": 4.9959494546591335e-05, "loss": 2.7306, "step": 28000 }, { "epoch": 0.08, "learning_rate": 4.995877089894406e-05, "loss": 2.7143, "step": 28500 }, { "epoch": 0.08, "learning_rate": 4.995804725129678e-05, "loss": 2.7321, "step": 29000 }, { "epoch": 0.09, "learning_rate": 4.99573236036495e-05, "loss": 2.7652, "step": 29500 }, { "epoch": 0.09, "learning_rate": 4.9956599956002224e-05, "loss": 2.7128, "step": 30000 }, { "epoch": 0.09, "learning_rate": 4.9955876308354946e-05, "loss": 2.7794, "step": 30500 }, { "epoch": 0.09, "learning_rate": 4.995515266070767e-05, "loss": 2.7457, "step": 31000 }, { "epoch": 0.09, "learning_rate": 4.995443046035569e-05, "loss": 2.7409, "step": 31500 }, { "epoch": 0.09, "learning_rate": 4.995370681270841e-05, "loss": 2.7397, "step": 32000 }, { "epoch": 0.09, "learning_rate": 4.9952983165061135e-05, "loss": 2.7085, "step": 32500 }, { "epoch": 0.1, "learning_rate": 4.995225951741386e-05, "loss": 2.7524, "step": 33000 }, { "epoch": 0.1, "learning_rate": 4.995153586976658e-05, "loss": 2.7253, "step": 33500 }, { "epoch": 0.1, "learning_rate": 4.9950813669414595e-05, "loss": 2.7287, "step": 34000 }, { "epoch": 0.1, "learning_rate": 4.9950090021767324e-05, "loss": 2.7169, "step": 34500 }, { "epoch": 0.1, "learning_rate": 4.994936782141535e-05, "loss": 2.7318, "step": 35000 }, { "epoch": 0.1, "learning_rate": 4.994864417376807e-05, "loss": 2.7368, "step": 35500 }, { "epoch": 0.1, "learning_rate": 4.994792052612079e-05, "loss": 2.7312, "step": 36000 }, { "epoch": 0.11, "learning_rate": 4.9947196878473513e-05, "loss": 2.7155, "step": 36500 }, { "epoch": 0.11, "learning_rate": 4.9946473230826236e-05, "loss": 2.7206, "step": 37000 }, { "epoch": 0.11, "learning_rate": 4.994574958317896e-05, "loss": 2.7177, "step": 37500 }, { "epoch": 0.11, "learning_rate": 4.994502593553168e-05, "loss": 2.6947, "step": 38000 }, { "epoch": 0.11, "learning_rate": 4.99443022878844e-05, "loss": 2.6902, "step": 38500 }, { "epoch": 0.11, "learning_rate": 4.994358008753242e-05, "loss": 2.7243, "step": 39000 }, { "epoch": 0.11, "learning_rate": 4.994285643988515e-05, "loss": 2.6992, "step": 39500 }, { "epoch": 0.12, "learning_rate": 4.994213279223787e-05, "loss": 2.7074, "step": 40000 }, { "epoch": 0.12, "learning_rate": 4.994140914459059e-05, "loss": 2.7062, "step": 40500 }, { "epoch": 0.12, "learning_rate": 4.9940685496943314e-05, "loss": 2.6884, "step": 41000 }, { "epoch": 0.12, "learning_rate": 4.9939961849296036e-05, "loss": 2.6899, "step": 41500 }, { "epoch": 0.12, "learning_rate": 4.9939238201648765e-05, "loss": 2.7145, "step": 42000 }, { "epoch": 0.12, "learning_rate": 4.993851600129678e-05, "loss": 2.7053, "step": 42500 }, { "epoch": 0.12, "learning_rate": 4.99377923536495e-05, "loss": 2.7103, "step": 43000 }, { "epoch": 0.13, "learning_rate": 4.993707015329752e-05, "loss": 2.6959, "step": 43500 }, { "epoch": 0.13, "learning_rate": 4.993634650565025e-05, "loss": 2.6981, "step": 44000 }, { "epoch": 0.13, "learning_rate": 4.993562285800297e-05, "loss": 2.6876, "step": 44500 }, { "epoch": 0.13, "learning_rate": 4.993489921035569e-05, "loss": 2.6825, "step": 45000 }, { "epoch": 0.13, "learning_rate": 4.9934175562708414e-05, "loss": 2.6952, "step": 45500 }, { "epoch": 0.13, "learning_rate": 4.9933451915061136e-05, "loss": 2.6979, "step": 46000 }, { "epoch": 0.13, "learning_rate": 4.993272826741386e-05, "loss": 2.6938, "step": 46500 }, { "epoch": 0.14, "learning_rate": 4.993200461976658e-05, "loss": 2.7004, "step": 47000 }, { "epoch": 0.14, "learning_rate": 4.9931282419414596e-05, "loss": 2.6848, "step": 47500 }, { "epoch": 0.14, "learning_rate": 4.993055877176732e-05, "loss": 2.6907, "step": 48000 }, { "epoch": 0.14, "learning_rate": 4.992983512412005e-05, "loss": 2.7073, "step": 48500 }, { "epoch": 0.14, "learning_rate": 4.992911147647277e-05, "loss": 2.6378, "step": 49000 }, { "epoch": 0.14, "learning_rate": 4.992838927612079e-05, "loss": 2.6931, "step": 49500 }, { "epoch": 0.14, "learning_rate": 4.9927665628473514e-05, "loss": 2.6709, "step": 50000 }, { "epoch": 0.15, "learning_rate": 4.9926941980826236e-05, "loss": 2.7085, "step": 50500 }, { "epoch": 0.15, "learning_rate": 4.992621978047425e-05, "loss": 2.6938, "step": 51000 }, { "epoch": 0.15, "learning_rate": 4.9925496132826974e-05, "loss": 2.6894, "step": 51500 }, { "epoch": 0.15, "learning_rate": 4.9924772485179697e-05, "loss": 2.6854, "step": 52000 }, { "epoch": 0.15, "learning_rate": 4.9924048837532426e-05, "loss": 2.6828, "step": 52500 }, { "epoch": 0.15, "learning_rate": 4.992332518988515e-05, "loss": 2.7017, "step": 53000 }, { "epoch": 0.15, "learning_rate": 4.992260154223787e-05, "loss": 2.677, "step": 53500 }, { "epoch": 0.16, "learning_rate": 4.992187789459059e-05, "loss": 2.6788, "step": 54000 }, { "epoch": 0.16, "learning_rate": 4.9921154246943314e-05, "loss": 2.6774, "step": 54500 }, { "epoch": 0.16, "learning_rate": 4.992043059929604e-05, "loss": 2.6771, "step": 55000 }, { "epoch": 0.16, "learning_rate": 4.991970695164876e-05, "loss": 2.6716, "step": 55500 }, { "epoch": 0.16, "learning_rate": 4.991898330400148e-05, "loss": 2.6622, "step": 56000 }, { "epoch": 0.16, "learning_rate": 4.991825965635421e-05, "loss": 2.6711, "step": 56500 }, { "epoch": 0.16, "learning_rate": 4.991753600870693e-05, "loss": 2.7045, "step": 57000 }, { "epoch": 0.17, "learning_rate": 4.9916812361059655e-05, "loss": 2.6518, "step": 57500 }, { "epoch": 0.17, "learning_rate": 4.991608871341238e-05, "loss": 2.684, "step": 58000 }, { "epoch": 0.17, "learning_rate": 4.99153650657651e-05, "loss": 2.6713, "step": 58500 }, { "epoch": 0.17, "learning_rate": 4.991464141811782e-05, "loss": 2.6608, "step": 59000 }, { "epoch": 0.17, "learning_rate": 4.9913919217765844e-05, "loss": 2.6881, "step": 59500 }, { "epoch": 0.17, "learning_rate": 4.9913195570118566e-05, "loss": 2.6478, "step": 60000 }, { "epoch": 0.18, "learning_rate": 4.991247192247129e-05, "loss": 2.6859, "step": 60500 }, { "epoch": 0.18, "learning_rate": 4.991174827482401e-05, "loss": 2.6577, "step": 61000 }, { "epoch": 0.18, "learning_rate": 4.991102462717673e-05, "loss": 2.6815, "step": 61500 }, { "epoch": 0.18, "learning_rate": 4.991030242682475e-05, "loss": 2.6627, "step": 62000 }, { "epoch": 0.18, "learning_rate": 4.990957877917748e-05, "loss": 2.6699, "step": 62500 }, { "epoch": 0.18, "learning_rate": 4.99088551315302e-05, "loss": 2.6567, "step": 63000 }, { "epoch": 0.18, "learning_rate": 4.9908132931178215e-05, "loss": 2.6464, "step": 63500 }, { "epoch": 0.19, "learning_rate": 4.990740928353094e-05, "loss": 2.6627, "step": 64000 }, { "epoch": 0.19, "learning_rate": 4.9906685635883666e-05, "loss": 2.6536, "step": 64500 }, { "epoch": 0.19, "learning_rate": 4.990596198823639e-05, "loss": 2.6682, "step": 65000 }, { "epoch": 0.19, "learning_rate": 4.990523834058911e-05, "loss": 2.6684, "step": 65500 }, { "epoch": 0.19, "learning_rate": 4.9904516140237126e-05, "loss": 2.6456, "step": 66000 }, { "epoch": 0.19, "learning_rate": 4.990379249258985e-05, "loss": 2.6441, "step": 66500 }, { "epoch": 0.19, "learning_rate": 4.990306884494258e-05, "loss": 2.636, "step": 67000 }, { "epoch": 0.2, "learning_rate": 4.99023451972953e-05, "loss": 2.6335, "step": 67500 }, { "epoch": 0.2, "learning_rate": 4.990162154964802e-05, "loss": 2.6513, "step": 68000 }, { "epoch": 0.2, "learning_rate": 4.9900897902000744e-05, "loss": 2.6286, "step": 68500 }, { "epoch": 0.2, "learning_rate": 4.990017570164876e-05, "loss": 2.653, "step": 69000 }, { "epoch": 0.2, "learning_rate": 4.9899453501296775e-05, "loss": 2.6234, "step": 69500 }, { "epoch": 0.2, "learning_rate": 4.98987298536495e-05, "loss": 2.6718, "step": 70000 }, { "epoch": 0.2, "learning_rate": 4.9898006206002227e-05, "loss": 2.6521, "step": 70500 }, { "epoch": 0.21, "learning_rate": 4.989728255835495e-05, "loss": 2.6371, "step": 71000 }, { "epoch": 0.21, "learning_rate": 4.989655891070767e-05, "loss": 2.6477, "step": 71500 }, { "epoch": 0.21, "learning_rate": 4.98958352630604e-05, "loss": 2.6493, "step": 72000 }, { "epoch": 0.21, "learning_rate": 4.989511161541312e-05, "loss": 2.64, "step": 72500 }, { "epoch": 0.21, "learning_rate": 4.9894387967765844e-05, "loss": 2.6353, "step": 73000 }, { "epoch": 0.21, "learning_rate": 4.989366432011857e-05, "loss": 2.6465, "step": 73500 }, { "epoch": 0.21, "learning_rate": 4.989294211976658e-05, "loss": 2.6686, "step": 74000 }, { "epoch": 0.22, "learning_rate": 4.9892218472119304e-05, "loss": 2.6793, "step": 74500 }, { "epoch": 0.22, "learning_rate": 4.989149482447203e-05, "loss": 2.6164, "step": 75000 }, { "epoch": 0.22, "learning_rate": 4.989077117682475e-05, "loss": 2.6425, "step": 75500 }, { "epoch": 0.22, "learning_rate": 4.989004752917748e-05, "loss": 2.6471, "step": 76000 }, { "epoch": 0.22, "learning_rate": 4.98893238815302e-05, "loss": 2.6553, "step": 76500 }, { "epoch": 0.22, "learning_rate": 4.988860023388292e-05, "loss": 2.6485, "step": 77000 }, { "epoch": 0.22, "learning_rate": 4.9887876586235645e-05, "loss": 2.6462, "step": 77500 }, { "epoch": 0.23, "learning_rate": 4.988715293858837e-05, "loss": 2.6275, "step": 78000 }, { "epoch": 0.23, "learning_rate": 4.988643073823638e-05, "loss": 2.6418, "step": 78500 }, { "epoch": 0.23, "learning_rate": 4.988570709058911e-05, "loss": 2.6582, "step": 79000 }, { "epoch": 0.23, "learning_rate": 4.9884983442941834e-05, "loss": 2.6366, "step": 79500 }, { "epoch": 0.23, "learning_rate": 4.9884259795294556e-05, "loss": 2.6325, "step": 80000 }, { "epoch": 0.23, "learning_rate": 4.988353614764728e-05, "loss": 2.6434, "step": 80500 }, { "epoch": 0.23, "learning_rate": 4.98828125e-05, "loss": 2.624, "step": 81000 }, { "epoch": 0.24, "learning_rate": 4.988209029964802e-05, "loss": 2.6362, "step": 81500 }, { "epoch": 0.24, "learning_rate": 4.9881366652000745e-05, "loss": 2.6123, "step": 82000 }, { "epoch": 0.24, "learning_rate": 4.988064300435347e-05, "loss": 2.6453, "step": 82500 }, { "epoch": 0.24, "learning_rate": 4.987991935670619e-05, "loss": 2.6372, "step": 83000 }, { "epoch": 0.24, "learning_rate": 4.987919570905891e-05, "loss": 2.6045, "step": 83500 }, { "epoch": 0.24, "learning_rate": 4.987847350870693e-05, "loss": 2.6328, "step": 84000 }, { "epoch": 0.24, "learning_rate": 4.987774986105965e-05, "loss": 2.665, "step": 84500 }, { "epoch": 0.25, "learning_rate": 4.987702621341238e-05, "loss": 2.6354, "step": 85000 }, { "epoch": 0.25, "learning_rate": 4.98763025657651e-05, "loss": 2.6335, "step": 85500 }, { "epoch": 0.25, "learning_rate": 4.9875580365413116e-05, "loss": 2.6547, "step": 86000 }, { "epoch": 0.25, "learning_rate": 4.9874856717765845e-05, "loss": 2.6419, "step": 86500 }, { "epoch": 0.25, "learning_rate": 4.987413451741386e-05, "loss": 2.6194, "step": 87000 }, { "epoch": 0.25, "learning_rate": 4.987341086976658e-05, "loss": 2.6221, "step": 87500 }, { "epoch": 0.25, "learning_rate": 4.9872687222119305e-05, "loss": 2.6137, "step": 88000 }, { "epoch": 0.26, "learning_rate": 4.987196357447203e-05, "loss": 2.6274, "step": 88500 }, { "epoch": 0.26, "learning_rate": 4.9871239926824757e-05, "loss": 2.6297, "step": 89000 }, { "epoch": 0.26, "learning_rate": 4.987051627917748e-05, "loss": 2.6419, "step": 89500 }, { "epoch": 0.26, "learning_rate": 4.98697926315302e-05, "loss": 2.6202, "step": 90000 }, { "epoch": 0.26, "learning_rate": 4.9869070431178217e-05, "loss": 2.6238, "step": 90500 }, { "epoch": 0.26, "learning_rate": 4.986834678353094e-05, "loss": 2.6561, "step": 91000 }, { "epoch": 0.26, "learning_rate": 4.986762313588366e-05, "loss": 2.6071, "step": 91500 }, { "epoch": 0.27, "learning_rate": 4.986689948823638e-05, "loss": 2.6369, "step": 92000 }, { "epoch": 0.27, "learning_rate": 4.9866177287884406e-05, "loss": 2.6028, "step": 92500 }, { "epoch": 0.27, "learning_rate": 4.986545364023713e-05, "loss": 2.6313, "step": 93000 }, { "epoch": 0.27, "learning_rate": 4.986472999258985e-05, "loss": 2.5917, "step": 93500 }, { "epoch": 0.27, "learning_rate": 4.986400634494257e-05, "loss": 2.6462, "step": 94000 }, { "epoch": 0.27, "learning_rate": 4.98632826972953e-05, "loss": 2.6301, "step": 94500 }, { "epoch": 0.27, "learning_rate": 4.9862559049648024e-05, "loss": 2.6154, "step": 95000 }, { "epoch": 0.28, "learning_rate": 4.986183684929604e-05, "loss": 2.6074, "step": 95500 }, { "epoch": 0.28, "learning_rate": 4.986111320164876e-05, "loss": 2.6039, "step": 96000 }, { "epoch": 0.28, "learning_rate": 4.9860389554001484e-05, "loss": 2.6335, "step": 96500 }, { "epoch": 0.28, "learning_rate": 4.9859665906354206e-05, "loss": 2.6242, "step": 97000 }, { "epoch": 0.28, "learning_rate": 4.985894225870693e-05, "loss": 2.6268, "step": 97500 }, { "epoch": 0.28, "learning_rate": 4.985821861105966e-05, "loss": 2.6285, "step": 98000 }, { "epoch": 0.29, "learning_rate": 4.985749496341238e-05, "loss": 2.6378, "step": 98500 }, { "epoch": 0.29, "learning_rate": 4.98567713157651e-05, "loss": 2.6083, "step": 99000 }, { "epoch": 0.29, "learning_rate": 4.9856047668117824e-05, "loss": 2.5959, "step": 99500 }, { "epoch": 0.29, "learning_rate": 4.985532546776584e-05, "loss": 2.6288, "step": 100000 }, { "epoch": 0.29, "learning_rate": 4.9854603267413855e-05, "loss": 2.6411, "step": 100500 }, { "epoch": 0.29, "learning_rate": 4.985387961976658e-05, "loss": 2.6273, "step": 101000 }, { "epoch": 0.29, "learning_rate": 4.9853155972119306e-05, "loss": 2.6014, "step": 101500 }, { "epoch": 0.3, "learning_rate": 4.985243232447203e-05, "loss": 2.6173, "step": 102000 }, { "epoch": 0.3, "learning_rate": 4.985170867682476e-05, "loss": 2.6032, "step": 102500 }, { "epoch": 0.3, "learning_rate": 4.985098502917748e-05, "loss": 2.627, "step": 103000 }, { "epoch": 0.3, "learning_rate": 4.98502613815302e-05, "loss": 2.6256, "step": 103500 }, { "epoch": 0.3, "learning_rate": 4.984953918117822e-05, "loss": 2.6208, "step": 104000 }, { "epoch": 0.3, "learning_rate": 4.984881553353094e-05, "loss": 2.6073, "step": 104500 }, { "epoch": 0.3, "learning_rate": 4.984809188588366e-05, "loss": 2.6002, "step": 105000 }, { "epoch": 0.31, "learning_rate": 4.9847368238236384e-05, "loss": 2.6262, "step": 105500 }, { "epoch": 0.31, "learning_rate": 4.9846644590589106e-05, "loss": 2.6144, "step": 106000 }, { "epoch": 0.31, "learning_rate": 4.984592094294183e-05, "loss": 2.6177, "step": 106500 }, { "epoch": 0.31, "learning_rate": 4.984519729529456e-05, "loss": 2.6107, "step": 107000 }, { "epoch": 0.31, "learning_rate": 4.984447364764728e-05, "loss": 2.6033, "step": 107500 }, { "epoch": 0.31, "learning_rate": 4.984375e-05, "loss": 2.6085, "step": 108000 }, { "epoch": 0.31, "learning_rate": 4.9843026352352724e-05, "loss": 2.5907, "step": 108500 }, { "epoch": 0.32, "learning_rate": 4.9842304152000747e-05, "loss": 2.6222, "step": 109000 }, { "epoch": 0.32, "learning_rate": 4.984158050435347e-05, "loss": 2.5994, "step": 109500 }, { "epoch": 0.32, "learning_rate": 4.984085685670619e-05, "loss": 2.6151, "step": 110000 }, { "epoch": 0.32, "learning_rate": 4.984013320905891e-05, "loss": 2.6356, "step": 110500 }, { "epoch": 0.32, "learning_rate": 4.9839409561411635e-05, "loss": 2.6248, "step": 111000 }, { "epoch": 0.32, "learning_rate": 4.983868591376436e-05, "loss": 2.5996, "step": 111500 }, { "epoch": 0.32, "learning_rate": 4.983796226611708e-05, "loss": 2.6143, "step": 112000 }, { "epoch": 0.33, "learning_rate": 4.983723861846981e-05, "loss": 2.6101, "step": 112500 }, { "epoch": 0.33, "learning_rate": 4.9836516418117825e-05, "loss": 2.6164, "step": 113000 }, { "epoch": 0.33, "learning_rate": 4.983579421776584e-05, "loss": 2.6186, "step": 113500 }, { "epoch": 0.33, "learning_rate": 4.983507057011856e-05, "loss": 2.585, "step": 114000 }, { "epoch": 0.33, "learning_rate": 4.983434981706188e-05, "loss": 2.6116, "step": 114500 }, { "epoch": 0.33, "learning_rate": 4.98336261694146e-05, "loss": 2.5995, "step": 115000 }, { "epoch": 0.33, "learning_rate": 4.983290252176732e-05, "loss": 2.6086, "step": 115500 }, { "epoch": 0.34, "learning_rate": 4.9832178874120045e-05, "loss": 2.6049, "step": 116000 }, { "epoch": 0.34, "learning_rate": 4.983145522647277e-05, "loss": 2.6249, "step": 116500 }, { "epoch": 0.34, "learning_rate": 4.9830731578825496e-05, "loss": 2.592, "step": 117000 }, { "epoch": 0.34, "learning_rate": 4.983000793117822e-05, "loss": 2.6163, "step": 117500 }, { "epoch": 0.34, "learning_rate": 4.982928428353094e-05, "loss": 2.5989, "step": 118000 }, { "epoch": 0.34, "learning_rate": 4.982856063588366e-05, "loss": 2.6092, "step": 118500 }, { "epoch": 0.34, "learning_rate": 4.9827836988236385e-05, "loss": 2.6214, "step": 119000 }, { "epoch": 0.35, "learning_rate": 4.982711334058911e-05, "loss": 2.6576, "step": 119500 }, { "epoch": 0.35, "learning_rate": 4.982639114023713e-05, "loss": 2.6243, "step": 120000 }, { "epoch": 0.35, "learning_rate": 4.9825668939885145e-05, "loss": 2.6128, "step": 120500 }, { "epoch": 0.35, "learning_rate": 4.982494529223787e-05, "loss": 2.588, "step": 121000 }, { "epoch": 0.35, "learning_rate": 4.982422164459059e-05, "loss": 2.6178, "step": 121500 }, { "epoch": 0.35, "learning_rate": 4.982349799694331e-05, "loss": 2.6046, "step": 122000 }, { "epoch": 0.35, "learning_rate": 4.9822774349296034e-05, "loss": 2.5921, "step": 122500 }, { "epoch": 0.36, "learning_rate": 4.9822050701648756e-05, "loss": 2.6107, "step": 123000 }, { "epoch": 0.36, "learning_rate": 4.9821327054001485e-05, "loss": 2.5997, "step": 123500 }, { "epoch": 0.36, "learning_rate": 4.98206048536495e-05, "loss": 2.5989, "step": 124000 }, { "epoch": 0.36, "learning_rate": 4.981988120600223e-05, "loss": 2.6019, "step": 124500 }, { "epoch": 0.36, "learning_rate": 4.981915755835495e-05, "loss": 2.617, "step": 125000 }, { "epoch": 0.36, "learning_rate": 4.9818433910707674e-05, "loss": 2.5885, "step": 125500 }, { "epoch": 0.36, "learning_rate": 4.9817710263060396e-05, "loss": 2.6094, "step": 126000 }, { "epoch": 0.37, "learning_rate": 4.981698661541312e-05, "loss": 2.592, "step": 126500 }, { "epoch": 0.37, "learning_rate": 4.981626296776584e-05, "loss": 2.5981, "step": 127000 }, { "epoch": 0.37, "learning_rate": 4.981553932011856e-05, "loss": 2.6105, "step": 127500 }, { "epoch": 0.37, "learning_rate": 4.9814815672471285e-05, "loss": 2.6168, "step": 128000 }, { "epoch": 0.37, "learning_rate": 4.981409202482401e-05, "loss": 2.6152, "step": 128500 }, { "epoch": 0.37, "learning_rate": 4.9813368377176737e-05, "loss": 2.592, "step": 129000 }, { "epoch": 0.37, "learning_rate": 4.981264472952946e-05, "loss": 2.611, "step": 129500 }, { "epoch": 0.38, "learning_rate": 4.981192108188218e-05, "loss": 2.5882, "step": 130000 }, { "epoch": 0.38, "learning_rate": 4.9811198881530197e-05, "loss": 2.617, "step": 130500 }, { "epoch": 0.38, "learning_rate": 4.981047523388292e-05, "loss": 2.595, "step": 131000 }, { "epoch": 0.38, "learning_rate": 4.980975158623565e-05, "loss": 2.6077, "step": 131500 }, { "epoch": 0.38, "learning_rate": 4.9809029385883663e-05, "loss": 2.6118, "step": 132000 }, { "epoch": 0.38, "learning_rate": 4.9808305738236386e-05, "loss": 2.612, "step": 132500 }, { "epoch": 0.38, "learning_rate": 4.980758209058911e-05, "loss": 2.6042, "step": 133000 }, { "epoch": 0.39, "learning_rate": 4.980685844294184e-05, "loss": 2.5783, "step": 133500 }, { "epoch": 0.39, "learning_rate": 4.980613479529456e-05, "loss": 2.5667, "step": 134000 }, { "epoch": 0.39, "learning_rate": 4.980541114764728e-05, "loss": 2.5936, "step": 134500 }, { "epoch": 0.39, "learning_rate": 4.9804687500000004e-05, "loss": 2.6037, "step": 135000 }, { "epoch": 0.39, "learning_rate": 4.9803963852352726e-05, "loss": 2.607, "step": 135500 }, { "epoch": 0.39, "learning_rate": 4.980324165200074e-05, "loss": 2.595, "step": 136000 }, { "epoch": 0.4, "learning_rate": 4.9802519451648764e-05, "loss": 2.5824, "step": 136500 }, { "epoch": 0.4, "learning_rate": 4.9801795804001486e-05, "loss": 2.581, "step": 137000 }, { "epoch": 0.4, "learning_rate": 4.980107215635421e-05, "loss": 2.613, "step": 137500 }, { "epoch": 0.4, "learning_rate": 4.9800349956002224e-05, "loss": 2.5996, "step": 138000 }, { "epoch": 0.4, "learning_rate": 4.9799626308354946e-05, "loss": 2.5965, "step": 138500 }, { "epoch": 0.4, "learning_rate": 4.979890266070767e-05, "loss": 2.6006, "step": 139000 }, { "epoch": 0.4, "learning_rate": 4.97981790130604e-05, "loss": 2.6041, "step": 139500 }, { "epoch": 0.41, "learning_rate": 4.979745536541312e-05, "loss": 2.5862, "step": 140000 }, { "epoch": 0.41, "learning_rate": 4.979673171776584e-05, "loss": 2.578, "step": 140500 }, { "epoch": 0.41, "learning_rate": 4.9796008070118564e-05, "loss": 2.5761, "step": 141000 }, { "epoch": 0.41, "learning_rate": 4.9795284422471286e-05, "loss": 2.5802, "step": 141500 }, { "epoch": 0.41, "learning_rate": 4.979456077482401e-05, "loss": 2.6018, "step": 142000 }, { "epoch": 0.41, "learning_rate": 4.979383712717674e-05, "loss": 2.6103, "step": 142500 }, { "epoch": 0.41, "learning_rate": 4.979311347952946e-05, "loss": 2.606, "step": 143000 }, { "epoch": 0.42, "learning_rate": 4.9792391279177475e-05, "loss": 2.6264, "step": 143500 }, { "epoch": 0.42, "learning_rate": 4.97916676315302e-05, "loss": 2.5784, "step": 144000 }, { "epoch": 0.42, "learning_rate": 4.979094398388292e-05, "loss": 2.6203, "step": 144500 }, { "epoch": 0.42, "learning_rate": 4.979022033623564e-05, "loss": 2.6052, "step": 145000 }, { "epoch": 0.42, "learning_rate": 4.9789496688588364e-05, "loss": 2.5849, "step": 145500 }, { "epoch": 0.42, "learning_rate": 4.9788773040941086e-05, "loss": 2.6116, "step": 146000 }, { "epoch": 0.42, "learning_rate": 4.9788049393293815e-05, "loss": 2.5938, "step": 146500 }, { "epoch": 0.43, "learning_rate": 4.978732574564654e-05, "loss": 2.5817, "step": 147000 }, { "epoch": 0.43, "learning_rate": 4.978660354529456e-05, "loss": 2.5906, "step": 147500 }, { "epoch": 0.43, "learning_rate": 4.978587989764728e-05, "loss": 2.5725, "step": 148000 }, { "epoch": 0.43, "learning_rate": 4.9785156250000004e-05, "loss": 2.6172, "step": 148500 }, { "epoch": 0.43, "learning_rate": 4.9784432602352727e-05, "loss": 2.5772, "step": 149000 }, { "epoch": 0.43, "learning_rate": 4.978370895470545e-05, "loss": 2.6032, "step": 149500 }, { "epoch": 0.43, "learning_rate": 4.978298530705817e-05, "loss": 2.6136, "step": 150000 }, { "epoch": 0.44, "learning_rate": 4.978226165941089e-05, "loss": 2.5862, "step": 150500 }, { "epoch": 0.44, "learning_rate": 4.9781538011763616e-05, "loss": 2.6046, "step": 151000 }, { "epoch": 0.44, "learning_rate": 4.978081725870693e-05, "loss": 2.5817, "step": 151500 }, { "epoch": 0.44, "learning_rate": 4.978009505835495e-05, "loss": 2.5955, "step": 152000 }, { "epoch": 0.44, "learning_rate": 4.977937141070767e-05, "loss": 2.595, "step": 152500 }, { "epoch": 0.44, "learning_rate": 4.977864776306039e-05, "loss": 2.5895, "step": 153000 }, { "epoch": 0.44, "learning_rate": 4.9777924115413113e-05, "loss": 2.5952, "step": 153500 }, { "epoch": 0.45, "learning_rate": 4.977720046776584e-05, "loss": 2.5721, "step": 154000 }, { "epoch": 0.45, "learning_rate": 4.9776476820118565e-05, "loss": 2.6026, "step": 154500 }, { "epoch": 0.45, "learning_rate": 4.977575317247129e-05, "loss": 2.5926, "step": 155000 }, { "epoch": 0.45, "learning_rate": 4.9775029524824016e-05, "loss": 2.6018, "step": 155500 }, { "epoch": 0.45, "learning_rate": 4.977430587717674e-05, "loss": 2.5689, "step": 156000 }, { "epoch": 0.45, "learning_rate": 4.977358222952946e-05, "loss": 2.6006, "step": 156500 }, { "epoch": 0.45, "learning_rate": 4.977285858188218e-05, "loss": 2.6144, "step": 157000 }, { "epoch": 0.46, "learning_rate": 4.9772134934234905e-05, "loss": 2.5735, "step": 157500 }, { "epoch": 0.46, "learning_rate": 4.977141128658763e-05, "loss": 2.5917, "step": 158000 }, { "epoch": 0.46, "learning_rate": 4.977068908623564e-05, "loss": 2.5926, "step": 158500 }, { "epoch": 0.46, "learning_rate": 4.9769965438588365e-05, "loss": 2.5988, "step": 159000 }, { "epoch": 0.46, "learning_rate": 4.976924179094109e-05, "loss": 2.5623, "step": 159500 }, { "epoch": 0.46, "learning_rate": 4.9768518143293816e-05, "loss": 2.5749, "step": 160000 }, { "epoch": 0.46, "learning_rate": 4.976779449564654e-05, "loss": 2.606, "step": 160500 }, { "epoch": 0.47, "learning_rate": 4.9767072295294554e-05, "loss": 2.5791, "step": 161000 }, { "epoch": 0.47, "learning_rate": 4.976634864764728e-05, "loss": 2.5861, "step": 161500 }, { "epoch": 0.47, "learning_rate": 4.9765625000000005e-05, "loss": 2.5721, "step": 162000 }, { "epoch": 0.47, "learning_rate": 4.976490135235273e-05, "loss": 2.6001, "step": 162500 }, { "epoch": 0.47, "learning_rate": 4.976417915200074e-05, "loss": 2.5877, "step": 163000 }, { "epoch": 0.47, "learning_rate": 4.9763455504353465e-05, "loss": 2.5896, "step": 163500 }, { "epoch": 0.47, "learning_rate": 4.976273185670619e-05, "loss": 2.5604, "step": 164000 }, { "epoch": 0.48, "learning_rate": 4.9762008209058916e-05, "loss": 2.5859, "step": 164500 }, { "epoch": 0.48, "learning_rate": 4.976128456141164e-05, "loss": 2.5724, "step": 165000 }, { "epoch": 0.48, "learning_rate": 4.9760562361059654e-05, "loss": 2.5876, "step": 165500 }, { "epoch": 0.48, "learning_rate": 4.9759838713412376e-05, "loss": 2.5885, "step": 166000 }, { "epoch": 0.48, "learning_rate": 4.97591150657651e-05, "loss": 2.582, "step": 166500 }, { "epoch": 0.48, "learning_rate": 4.9758392865413114e-05, "loss": 2.573, "step": 167000 }, { "epoch": 0.48, "learning_rate": 4.975766921776584e-05, "loss": 2.5841, "step": 167500 }, { "epoch": 0.49, "learning_rate": 4.9756945570118565e-05, "loss": 2.5913, "step": 168000 }, { "epoch": 0.49, "learning_rate": 4.975622192247129e-05, "loss": 2.588, "step": 168500 }, { "epoch": 0.49, "learning_rate": 4.975549827482402e-05, "loss": 2.6033, "step": 169000 }, { "epoch": 0.49, "learning_rate": 4.975477607447203e-05, "loss": 2.575, "step": 169500 }, { "epoch": 0.49, "learning_rate": 4.9754052426824755e-05, "loss": 2.5825, "step": 170000 }, { "epoch": 0.49, "learning_rate": 4.975332877917748e-05, "loss": 2.5677, "step": 170500 }, { "epoch": 0.49, "learning_rate": 4.97526051315302e-05, "loss": 2.579, "step": 171000 }, { "epoch": 0.5, "learning_rate": 4.975188148388292e-05, "loss": 2.5699, "step": 171500 }, { "epoch": 0.5, "learning_rate": 4.9751157836235643e-05, "loss": 2.5901, "step": 172000 }, { "epoch": 0.5, "learning_rate": 4.9750434188588366e-05, "loss": 2.5933, "step": 172500 }, { "epoch": 0.5, "learning_rate": 4.974971054094109e-05, "loss": 2.5922, "step": 173000 }, { "epoch": 0.5, "learning_rate": 4.974898689329382e-05, "loss": 2.5713, "step": 173500 }, { "epoch": 0.5, "learning_rate": 4.974826469294183e-05, "loss": 2.6005, "step": 174000 }, { "epoch": 0.51, "learning_rate": 4.9747541045294555e-05, "loss": 2.6035, "step": 174500 }, { "epoch": 0.51, "learning_rate": 4.974681739764728e-05, "loss": 2.6029, "step": 175000 }, { "epoch": 0.51, "learning_rate": 4.974609375e-05, "loss": 2.5654, "step": 175500 }, { "epoch": 0.51, "learning_rate": 4.9745372996943315e-05, "loss": 2.5823, "step": 176000 }, { "epoch": 0.51, "learning_rate": 4.974464934929604e-05, "loss": 2.5894, "step": 176500 }, { "epoch": 0.51, "learning_rate": 4.9743925701648766e-05, "loss": 2.5976, "step": 177000 }, { "epoch": 0.51, "learning_rate": 4.974320205400149e-05, "loss": 2.5442, "step": 177500 }, { "epoch": 0.52, "learning_rate": 4.974247840635421e-05, "loss": 2.5717, "step": 178000 }, { "epoch": 0.52, "learning_rate": 4.974175475870693e-05, "loss": 2.5774, "step": 178500 }, { "epoch": 0.52, "learning_rate": 4.974103255835495e-05, "loss": 2.584, "step": 179000 }, { "epoch": 0.52, "learning_rate": 4.974030891070767e-05, "loss": 2.5976, "step": 179500 }, { "epoch": 0.52, "learning_rate": 4.973958526306039e-05, "loss": 2.6014, "step": 180000 }, { "epoch": 0.52, "learning_rate": 4.9738861615413115e-05, "loss": 2.6014, "step": 180500 }, { "epoch": 0.52, "learning_rate": 4.9738137967765844e-05, "loss": 2.5905, "step": 181000 }, { "epoch": 0.53, "learning_rate": 4.9737414320118566e-05, "loss": 2.5585, "step": 181500 }, { "epoch": 0.53, "learning_rate": 4.973669211976658e-05, "loss": 2.5887, "step": 182000 }, { "epoch": 0.53, "learning_rate": 4.9735968472119304e-05, "loss": 2.5591, "step": 182500 }, { "epoch": 0.53, "learning_rate": 4.973524627176732e-05, "loss": 2.5815, "step": 183000 }, { "epoch": 0.53, "learning_rate": 4.973452262412004e-05, "loss": 2.5682, "step": 183500 }, { "epoch": 0.53, "learning_rate": 4.9733798976472764e-05, "loss": 2.567, "step": 184000 }, { "epoch": 0.53, "learning_rate": 4.973307532882549e-05, "loss": 2.5902, "step": 184500 }, { "epoch": 0.54, "learning_rate": 4.9732351681178215e-05, "loss": 2.5673, "step": 185000 }, { "epoch": 0.54, "learning_rate": 4.9731628033530944e-05, "loss": 2.5725, "step": 185500 }, { "epoch": 0.54, "learning_rate": 4.9730904385883667e-05, "loss": 2.5808, "step": 186000 }, { "epoch": 0.54, "learning_rate": 4.973018218553168e-05, "loss": 2.5914, "step": 186500 }, { "epoch": 0.54, "learning_rate": 4.9729458537884404e-05, "loss": 2.5639, "step": 187000 }, { "epoch": 0.54, "learning_rate": 4.9728734890237127e-05, "loss": 2.5918, "step": 187500 }, { "epoch": 0.54, "learning_rate": 4.972801124258985e-05, "loss": 2.5788, "step": 188000 }, { "epoch": 0.55, "learning_rate": 4.972728759494257e-05, "loss": 2.5749, "step": 188500 }, { "epoch": 0.55, "learning_rate": 4.972656394729529e-05, "loss": 2.5681, "step": 189000 }, { "epoch": 0.55, "learning_rate": 4.9725840299648016e-05, "loss": 2.5967, "step": 189500 }, { "epoch": 0.55, "learning_rate": 4.9725116652000745e-05, "loss": 2.5961, "step": 190000 }, { "epoch": 0.55, "learning_rate": 4.972439300435347e-05, "loss": 2.5669, "step": 190500 }, { "epoch": 0.55, "learning_rate": 4.972366935670619e-05, "loss": 2.5713, "step": 191000 }, { "epoch": 0.55, "learning_rate": 4.972294570905892e-05, "loss": 2.5539, "step": 191500 }, { "epoch": 0.56, "learning_rate": 4.972222206141164e-05, "loss": 2.5615, "step": 192000 }, { "epoch": 0.56, "learning_rate": 4.9721499861059656e-05, "loss": 2.5902, "step": 192500 }, { "epoch": 0.56, "learning_rate": 4.972077621341238e-05, "loss": 2.5476, "step": 193000 }, { "epoch": 0.56, "learning_rate": 4.9720054013060394e-05, "loss": 2.5784, "step": 193500 }, { "epoch": 0.56, "learning_rate": 4.971933036541312e-05, "loss": 2.5748, "step": 194000 }, { "epoch": 0.56, "learning_rate": 4.9718606717765845e-05, "loss": 2.605, "step": 194500 }, { "epoch": 0.56, "learning_rate": 4.971788451741386e-05, "loss": 2.576, "step": 195000 }, { "epoch": 0.57, "learning_rate": 4.971716086976658e-05, "loss": 2.5688, "step": 195500 }, { "epoch": 0.57, "learning_rate": 4.9716437222119305e-05, "loss": 2.5431, "step": 196000 }, { "epoch": 0.57, "learning_rate": 4.971571357447203e-05, "loss": 2.589, "step": 196500 }, { "epoch": 0.57, "learning_rate": 4.971498992682475e-05, "loss": 2.5671, "step": 197000 }, { "epoch": 0.57, "learning_rate": 4.971426627917747e-05, "loss": 2.5773, "step": 197500 }, { "epoch": 0.57, "learning_rate": 4.9713542631530194e-05, "loss": 2.5651, "step": 198000 }, { "epoch": 0.57, "learning_rate": 4.971281898388292e-05, "loss": 2.5874, "step": 198500 }, { "epoch": 0.58, "learning_rate": 4.9712095336235645e-05, "loss": 2.5541, "step": 199000 }, { "epoch": 0.58, "learning_rate": 4.9711371688588374e-05, "loss": 2.6027, "step": 199500 }, { "epoch": 0.58, "learning_rate": 4.9710648040941096e-05, "loss": 2.5907, "step": 200000 }, { "epoch": 0.58, "learning_rate": 4.970992439329382e-05, "loss": 2.5391, "step": 200500 }, { "epoch": 0.58, "learning_rate": 4.970920074564654e-05, "loss": 2.5756, "step": 201000 }, { "epoch": 0.58, "learning_rate": 4.970847999258985e-05, "loss": 2.5635, "step": 201500 }, { "epoch": 0.58, "learning_rate": 4.970775634494257e-05, "loss": 2.56, "step": 202000 }, { "epoch": 0.59, "learning_rate": 4.9707032697295294e-05, "loss": 2.5507, "step": 202500 }, { "epoch": 0.59, "learning_rate": 4.970630904964802e-05, "loss": 2.5677, "step": 203000 }, { "epoch": 0.59, "learning_rate": 4.9705585402000745e-05, "loss": 2.5626, "step": 203500 }, { "epoch": 0.59, "learning_rate": 4.970486320164876e-05, "loss": 2.5593, "step": 204000 }, { "epoch": 0.59, "learning_rate": 4.970413955400148e-05, "loss": 2.5737, "step": 204500 }, { "epoch": 0.59, "learning_rate": 4.9703415906354205e-05, "loss": 2.5599, "step": 205000 }, { "epoch": 0.59, "learning_rate": 4.970269225870693e-05, "loss": 2.5798, "step": 205500 }, { "epoch": 0.6, "learning_rate": 4.970196861105965e-05, "loss": 2.5696, "step": 206000 }, { "epoch": 0.6, "learning_rate": 4.970124496341238e-05, "loss": 2.5569, "step": 206500 }, { "epoch": 0.6, "learning_rate": 4.97005213157651e-05, "loss": 2.5668, "step": 207000 }, { "epoch": 0.6, "learning_rate": 4.969979766811782e-05, "loss": 2.5624, "step": 207500 }, { "epoch": 0.6, "learning_rate": 4.9699075467765846e-05, "loss": 2.5666, "step": 208000 }, { "epoch": 0.6, "learning_rate": 4.969835182011857e-05, "loss": 2.5669, "step": 208500 }, { "epoch": 0.6, "learning_rate": 4.969762817247129e-05, "loss": 2.5701, "step": 209000 }, { "epoch": 0.61, "learning_rate": 4.969690452482401e-05, "loss": 2.5646, "step": 209500 }, { "epoch": 0.61, "learning_rate": 4.9696180877176735e-05, "loss": 2.5781, "step": 210000 }, { "epoch": 0.61, "learning_rate": 4.969545722952946e-05, "loss": 2.5663, "step": 210500 }, { "epoch": 0.61, "learning_rate": 4.969473502917747e-05, "loss": 2.5697, "step": 211000 }, { "epoch": 0.61, "learning_rate": 4.9694012828825495e-05, "loss": 2.5585, "step": 211500 }, { "epoch": 0.61, "learning_rate": 4.969328918117822e-05, "loss": 2.543, "step": 212000 }, { "epoch": 0.62, "learning_rate": 4.969256553353094e-05, "loss": 2.5626, "step": 212500 }, { "epoch": 0.62, "learning_rate": 4.969184188588366e-05, "loss": 2.5703, "step": 213000 }, { "epoch": 0.62, "learning_rate": 4.969111968553168e-05, "loss": 2.5592, "step": 213500 }, { "epoch": 0.62, "learning_rate": 4.96903960378844e-05, "loss": 2.55, "step": 214000 }, { "epoch": 0.62, "learning_rate": 4.968967239023713e-05, "loss": 2.5781, "step": 214500 }, { "epoch": 0.62, "learning_rate": 4.968894874258985e-05, "loss": 2.5719, "step": 215000 }, { "epoch": 0.62, "learning_rate": 4.968822509494257e-05, "loss": 2.5836, "step": 215500 }, { "epoch": 0.63, "learning_rate": 4.9687501447295295e-05, "loss": 2.5637, "step": 216000 }, { "epoch": 0.63, "learning_rate": 4.9686777799648024e-05, "loss": 2.5533, "step": 216500 }, { "epoch": 0.63, "learning_rate": 4.968605559929604e-05, "loss": 2.5651, "step": 217000 }, { "epoch": 0.63, "learning_rate": 4.968533195164876e-05, "loss": 2.5562, "step": 217500 }, { "epoch": 0.63, "learning_rate": 4.9684608304001484e-05, "loss": 2.5557, "step": 218000 }, { "epoch": 0.63, "learning_rate": 4.9683884656354206e-05, "loss": 2.5495, "step": 218500 }, { "epoch": 0.63, "learning_rate": 4.968316100870693e-05, "loss": 2.5603, "step": 219000 }, { "epoch": 0.64, "learning_rate": 4.968243736105965e-05, "loss": 2.5799, "step": 219500 }, { "epoch": 0.64, "learning_rate": 4.968171371341237e-05, "loss": 2.5503, "step": 220000 }, { "epoch": 0.64, "learning_rate": 4.9680990065765095e-05, "loss": 2.5524, "step": 220500 }, { "epoch": 0.64, "learning_rate": 4.968026786541312e-05, "loss": 2.5719, "step": 221000 }, { "epoch": 0.64, "learning_rate": 4.9679544217765846e-05, "loss": 2.5378, "step": 221500 }, { "epoch": 0.64, "learning_rate": 4.967882057011857e-05, "loss": 2.5751, "step": 222000 }, { "epoch": 0.64, "learning_rate": 4.967809692247129e-05, "loss": 2.5557, "step": 222500 }, { "epoch": 0.65, "learning_rate": 4.967737327482401e-05, "loss": 2.5859, "step": 223000 }, { "epoch": 0.65, "learning_rate": 4.967665107447203e-05, "loss": 2.5642, "step": 223500 }, { "epoch": 0.65, "learning_rate": 4.967592742682475e-05, "loss": 2.5683, "step": 224000 }, { "epoch": 0.65, "learning_rate": 4.967520377917747e-05, "loss": 2.552, "step": 224500 }, { "epoch": 0.65, "learning_rate": 4.9674481578825495e-05, "loss": 2.5785, "step": 225000 }, { "epoch": 0.65, "learning_rate": 4.967375793117822e-05, "loss": 2.564, "step": 225500 }, { "epoch": 0.65, "learning_rate": 4.967303428353094e-05, "loss": 2.5478, "step": 226000 }, { "epoch": 0.66, "learning_rate": 4.967231063588366e-05, "loss": 2.5504, "step": 226500 }, { "epoch": 0.66, "learning_rate": 4.9671586988236384e-05, "loss": 2.5643, "step": 227000 }, { "epoch": 0.66, "learning_rate": 4.967086334058911e-05, "loss": 2.5322, "step": 227500 }, { "epoch": 0.66, "learning_rate": 4.967013969294183e-05, "loss": 2.562, "step": 228000 }, { "epoch": 0.66, "learning_rate": 4.966941604529455e-05, "loss": 2.5706, "step": 228500 }, { "epoch": 0.66, "learning_rate": 4.966869239764728e-05, "loss": 2.5509, "step": 229000 }, { "epoch": 0.66, "learning_rate": 4.96679701972953e-05, "loss": 2.5523, "step": 229500 }, { "epoch": 0.67, "learning_rate": 4.9667246549648025e-05, "loss": 2.5591, "step": 230000 }, { "epoch": 0.67, "learning_rate": 4.966652290200075e-05, "loss": 2.56, "step": 230500 }, { "epoch": 0.67, "learning_rate": 4.966579925435347e-05, "loss": 2.5508, "step": 231000 }, { "epoch": 0.67, "learning_rate": 4.9665077054001485e-05, "loss": 2.5699, "step": 231500 }, { "epoch": 0.67, "learning_rate": 4.966435340635421e-05, "loss": 2.5605, "step": 232000 }, { "epoch": 0.67, "learning_rate": 4.966362975870693e-05, "loss": 2.5706, "step": 232500 }, { "epoch": 0.67, "learning_rate": 4.966290611105965e-05, "loss": 2.5612, "step": 233000 }, { "epoch": 0.68, "learning_rate": 4.9662182463412374e-05, "loss": 2.5758, "step": 233500 }, { "epoch": 0.68, "learning_rate": 4.96614588157651e-05, "loss": 2.5689, "step": 234000 }, { "epoch": 0.68, "learning_rate": 4.9660735168117825e-05, "loss": 2.5324, "step": 234500 }, { "epoch": 0.68, "learning_rate": 4.966001152047055e-05, "loss": 2.5659, "step": 235000 }, { "epoch": 0.68, "learning_rate": 4.965928932011856e-05, "loss": 2.5712, "step": 235500 }, { "epoch": 0.68, "learning_rate": 4.965856711976658e-05, "loss": 2.5641, "step": 236000 }, { "epoch": 0.68, "learning_rate": 4.96578434721193e-05, "loss": 2.564, "step": 236500 }, { "epoch": 0.69, "learning_rate": 4.965711982447203e-05, "loss": 2.5625, "step": 237000 }, { "epoch": 0.69, "learning_rate": 4.965639617682475e-05, "loss": 2.5742, "step": 237500 }, { "epoch": 0.69, "learning_rate": 4.9655672529177474e-05, "loss": 2.5691, "step": 238000 }, { "epoch": 0.69, "learning_rate": 4.96549488815302e-05, "loss": 2.5498, "step": 238500 }, { "epoch": 0.69, "learning_rate": 4.9654225233882925e-05, "loss": 2.5589, "step": 239000 }, { "epoch": 0.69, "learning_rate": 4.965350158623565e-05, "loss": 2.548, "step": 239500 }, { "epoch": 0.69, "learning_rate": 4.965277938588366e-05, "loss": 2.5458, "step": 240000 }, { "epoch": 0.7, "learning_rate": 4.965205718553168e-05, "loss": 2.563, "step": 240500 }, { "epoch": 0.7, "learning_rate": 4.96513335378844e-05, "loss": 2.5474, "step": 241000 }, { "epoch": 0.7, "learning_rate": 4.965060989023712e-05, "loss": 2.5495, "step": 241500 }, { "epoch": 0.7, "learning_rate": 4.964988624258985e-05, "loss": 2.5573, "step": 242000 }, { "epoch": 0.7, "learning_rate": 4.9649162594942574e-05, "loss": 2.5471, "step": 242500 }, { "epoch": 0.7, "learning_rate": 4.9648438947295296e-05, "loss": 2.5519, "step": 243000 }, { "epoch": 0.7, "learning_rate": 4.964771674694331e-05, "loss": 2.5537, "step": 243500 }, { "epoch": 0.71, "learning_rate": 4.9646993099296034e-05, "loss": 2.5506, "step": 244000 }, { "epoch": 0.71, "learning_rate": 4.964626945164876e-05, "loss": 2.5406, "step": 244500 }, { "epoch": 0.71, "learning_rate": 4.9645545804001486e-05, "loss": 2.5416, "step": 245000 }, { "epoch": 0.71, "learning_rate": 4.964482215635421e-05, "loss": 2.5441, "step": 245500 }, { "epoch": 0.71, "learning_rate": 4.964409850870693e-05, "loss": 2.5514, "step": 246000 }, { "epoch": 0.71, "learning_rate": 4.964337486105965e-05, "loss": 2.5569, "step": 246500 }, { "epoch": 0.71, "learning_rate": 4.9642651213412374e-05, "loss": 2.5481, "step": 247000 }, { "epoch": 0.72, "learning_rate": 4.9641927565765103e-05, "loss": 2.5606, "step": 247500 }, { "epoch": 0.72, "learning_rate": 4.964120536541312e-05, "loss": 2.5593, "step": 248000 }, { "epoch": 0.72, "learning_rate": 4.964048171776584e-05, "loss": 2.5548, "step": 248500 }, { "epoch": 0.72, "learning_rate": 4.9639758070118563e-05, "loss": 2.5462, "step": 249000 }, { "epoch": 0.72, "learning_rate": 4.9639034422471286e-05, "loss": 2.5785, "step": 249500 }, { "epoch": 0.72, "learning_rate": 4.963831077482401e-05, "loss": 2.563, "step": 250000 }, { "epoch": 0.73, "learning_rate": 4.963758712717673e-05, "loss": 2.5651, "step": 250500 }, { "epoch": 0.73, "learning_rate": 4.963686492682475e-05, "loss": 2.5509, "step": 251000 }, { "epoch": 0.73, "learning_rate": 4.963614127917748e-05, "loss": 2.5435, "step": 251500 }, { "epoch": 0.73, "learning_rate": 4.9635417631530204e-05, "loss": 2.5479, "step": 252000 }, { "epoch": 0.73, "learning_rate": 4.9634693983882926e-05, "loss": 2.5208, "step": 252500 }, { "epoch": 0.73, "learning_rate": 4.963397178353094e-05, "loss": 2.5491, "step": 253000 }, { "epoch": 0.73, "learning_rate": 4.9633248135883664e-05, "loss": 2.5547, "step": 253500 }, { "epoch": 0.74, "learning_rate": 4.963252593553168e-05, "loss": 2.5425, "step": 254000 }, { "epoch": 0.74, "learning_rate": 4.96318022878844e-05, "loss": 2.5343, "step": 254500 }, { "epoch": 0.74, "learning_rate": 4.963107864023713e-05, "loss": 2.5502, "step": 255000 }, { "epoch": 0.74, "learning_rate": 4.963035499258985e-05, "loss": 2.5575, "step": 255500 }, { "epoch": 0.74, "learning_rate": 4.962963423953316e-05, "loss": 2.5557, "step": 256000 }, { "epoch": 0.74, "learning_rate": 4.9628910591885884e-05, "loss": 2.5413, "step": 256500 }, { "epoch": 0.74, "learning_rate": 4.9628188391533906e-05, "loss": 2.552, "step": 257000 }, { "epoch": 0.75, "learning_rate": 4.962746474388663e-05, "loss": 2.5499, "step": 257500 }, { "epoch": 0.75, "learning_rate": 4.962674109623935e-05, "loss": 2.5515, "step": 258000 }, { "epoch": 0.75, "learning_rate": 4.962601744859207e-05, "loss": 2.5387, "step": 258500 }, { "epoch": 0.75, "learning_rate": 4.9625293800944795e-05, "loss": 2.5693, "step": 259000 }, { "epoch": 0.75, "learning_rate": 4.9624570153297524e-05, "loss": 2.5589, "step": 259500 }, { "epoch": 0.75, "learning_rate": 4.9623846505650246e-05, "loss": 2.523, "step": 260000 }, { "epoch": 0.75, "learning_rate": 4.962312285800297e-05, "loss": 2.5451, "step": 260500 }, { "epoch": 0.76, "learning_rate": 4.9622400657650984e-05, "loss": 2.5508, "step": 261000 }, { "epoch": 0.76, "learning_rate": 4.9621677010003706e-05, "loss": 2.5668, "step": 261500 }, { "epoch": 0.76, "learning_rate": 4.962095336235643e-05, "loss": 2.5312, "step": 262000 }, { "epoch": 0.76, "learning_rate": 4.962022971470916e-05, "loss": 2.5467, "step": 262500 }, { "epoch": 0.76, "learning_rate": 4.961950606706188e-05, "loss": 2.5604, "step": 263000 }, { "epoch": 0.76, "learning_rate": 4.96187824194146e-05, "loss": 2.5361, "step": 263500 }, { "epoch": 0.76, "learning_rate": 4.9618058771767324e-05, "loss": 2.5436, "step": 264000 }, { "epoch": 0.77, "learning_rate": 4.961733512412005e-05, "loss": 2.5387, "step": 264500 }, { "epoch": 0.77, "learning_rate": 4.961661147647277e-05, "loss": 2.5314, "step": 265000 }, { "epoch": 0.77, "learning_rate": 4.961588782882549e-05, "loss": 2.5349, "step": 265500 }, { "epoch": 0.77, "learning_rate": 4.961516418117821e-05, "loss": 2.5463, "step": 266000 }, { "epoch": 0.77, "learning_rate": 4.9614440533530936e-05, "loss": 2.5531, "step": 266500 }, { "epoch": 0.77, "learning_rate": 4.961371833317896e-05, "loss": 2.5421, "step": 267000 }, { "epoch": 0.77, "learning_rate": 4.961299613282698e-05, "loss": 2.548, "step": 267500 }, { "epoch": 0.78, "learning_rate": 4.96122724851797e-05, "loss": 2.5439, "step": 268000 }, { "epoch": 0.78, "learning_rate": 4.9611548837532425e-05, "loss": 2.5345, "step": 268500 }, { "epoch": 0.78, "learning_rate": 4.961082518988515e-05, "loss": 2.5571, "step": 269000 }, { "epoch": 0.78, "learning_rate": 4.961010154223787e-05, "loss": 2.5308, "step": 269500 }, { "epoch": 0.78, "learning_rate": 4.960937789459059e-05, "loss": 2.5601, "step": 270000 }, { "epoch": 0.78, "learning_rate": 4.960865569423861e-05, "loss": 2.5552, "step": 270500 }, { "epoch": 0.78, "learning_rate": 4.960793204659133e-05, "loss": 2.5522, "step": 271000 }, { "epoch": 0.79, "learning_rate": 4.960720839894406e-05, "loss": 2.5313, "step": 271500 }, { "epoch": 0.79, "learning_rate": 4.960648475129678e-05, "loss": 2.533, "step": 272000 }, { "epoch": 0.79, "learning_rate": 4.9605762550944796e-05, "loss": 2.5278, "step": 272500 }, { "epoch": 0.79, "learning_rate": 4.960504035059281e-05, "loss": 2.5569, "step": 273000 }, { "epoch": 0.79, "learning_rate": 4.9604316702945534e-05, "loss": 2.5368, "step": 273500 }, { "epoch": 0.79, "learning_rate": 4.9603593055298256e-05, "loss": 2.5321, "step": 274000 }, { "epoch": 0.79, "learning_rate": 4.960286940765098e-05, "loss": 2.5491, "step": 274500 }, { "epoch": 0.8, "learning_rate": 4.960214576000371e-05, "loss": 2.5417, "step": 275000 }, { "epoch": 0.8, "learning_rate": 4.960142211235643e-05, "loss": 2.53, "step": 275500 }, { "epoch": 0.8, "learning_rate": 4.960069846470916e-05, "loss": 2.5556, "step": 276000 }, { "epoch": 0.8, "learning_rate": 4.959997481706188e-05, "loss": 2.5469, "step": 276500 }, { "epoch": 0.8, "learning_rate": 4.95992511694146e-05, "loss": 2.5585, "step": 277000 }, { "epoch": 0.8, "learning_rate": 4.9598527521767325e-05, "loss": 2.5562, "step": 277500 }, { "epoch": 0.8, "learning_rate": 4.959780387412005e-05, "loss": 2.5432, "step": 278000 }, { "epoch": 0.81, "learning_rate": 4.959708167376806e-05, "loss": 2.53, "step": 278500 }, { "epoch": 0.81, "learning_rate": 4.959635947341608e-05, "loss": 2.5359, "step": 279000 }, { "epoch": 0.81, "learning_rate": 4.959563582576881e-05, "loss": 2.5695, "step": 279500 }, { "epoch": 0.81, "learning_rate": 4.959491217812153e-05, "loss": 2.5665, "step": 280000 }, { "epoch": 0.81, "learning_rate": 4.959418853047425e-05, "loss": 2.5413, "step": 280500 }, { "epoch": 0.81, "learning_rate": 4.9593464882826974e-05, "loss": 2.547, "step": 281000 }, { "epoch": 0.81, "learning_rate": 4.9592741235179696e-05, "loss": 2.5466, "step": 281500 }, { "epoch": 0.82, "learning_rate": 4.9592017587532425e-05, "loss": 2.5729, "step": 282000 }, { "epoch": 0.82, "learning_rate": 4.959129393988515e-05, "loss": 2.5552, "step": 282500 }, { "epoch": 0.82, "learning_rate": 4.959057029223787e-05, "loss": 2.5592, "step": 283000 }, { "epoch": 0.82, "learning_rate": 4.958984664459059e-05, "loss": 2.5439, "step": 283500 }, { "epoch": 0.82, "learning_rate": 4.9589122996943314e-05, "loss": 2.5459, "step": 284000 }, { "epoch": 0.82, "learning_rate": 4.958839934929604e-05, "loss": 2.5536, "step": 284500 }, { "epoch": 0.82, "learning_rate": 4.958767714894406e-05, "loss": 2.5262, "step": 285000 }, { "epoch": 0.83, "learning_rate": 4.958695350129678e-05, "loss": 2.5212, "step": 285500 }, { "epoch": 0.83, "learning_rate": 4.95862313009448e-05, "loss": 2.5396, "step": 286000 }, { "epoch": 0.83, "learning_rate": 4.958550765329752e-05, "loss": 2.5437, "step": 286500 }, { "epoch": 0.83, "learning_rate": 4.958478400565024e-05, "loss": 2.5319, "step": 287000 }, { "epoch": 0.83, "learning_rate": 4.9584060358002963e-05, "loss": 2.5542, "step": 287500 }, { "epoch": 0.83, "learning_rate": 4.9583336710355686e-05, "loss": 2.5505, "step": 288000 }, { "epoch": 0.84, "learning_rate": 4.958261306270841e-05, "loss": 2.5352, "step": 288500 }, { "epoch": 0.84, "learning_rate": 4.958189086235643e-05, "loss": 2.5431, "step": 289000 }, { "epoch": 0.84, "learning_rate": 4.958116721470916e-05, "loss": 2.5431, "step": 289500 }, { "epoch": 0.84, "learning_rate": 4.958044356706188e-05, "loss": 2.5352, "step": 290000 }, { "epoch": 0.84, "learning_rate": 4.9579719919414604e-05, "loss": 2.5251, "step": 290500 }, { "epoch": 0.84, "learning_rate": 4.9578996271767326e-05, "loss": 2.5557, "step": 291000 }, { "epoch": 0.84, "learning_rate": 4.957827407141534e-05, "loss": 2.5366, "step": 291500 }, { "epoch": 0.85, "learning_rate": 4.9577550423768064e-05, "loss": 2.5192, "step": 292000 }, { "epoch": 0.85, "learning_rate": 4.9576826776120786e-05, "loss": 2.5426, "step": 292500 }, { "epoch": 0.85, "learning_rate": 4.957610312847351e-05, "loss": 2.5335, "step": 293000 }, { "epoch": 0.85, "learning_rate": 4.957538092812153e-05, "loss": 2.5064, "step": 293500 }, { "epoch": 0.85, "learning_rate": 4.957465728047425e-05, "loss": 2.5572, "step": 294000 }, { "epoch": 0.85, "learning_rate": 4.9573933632826975e-05, "loss": 2.5598, "step": 294500 }, { "epoch": 0.85, "learning_rate": 4.95732099851797e-05, "loss": 2.5467, "step": 295000 }, { "epoch": 0.86, "learning_rate": 4.957248633753242e-05, "loss": 2.5209, "step": 295500 }, { "epoch": 0.86, "learning_rate": 4.9571764137180435e-05, "loss": 2.5475, "step": 296000 }, { "epoch": 0.86, "learning_rate": 4.957104048953316e-05, "loss": 2.5553, "step": 296500 }, { "epoch": 0.86, "learning_rate": 4.9570316841885886e-05, "loss": 2.5091, "step": 297000 }, { "epoch": 0.86, "learning_rate": 4.956959319423861e-05, "loss": 2.5599, "step": 297500 }, { "epoch": 0.86, "learning_rate": 4.956886954659134e-05, "loss": 2.5294, "step": 298000 }, { "epoch": 0.86, "learning_rate": 4.956814589894406e-05, "loss": 2.536, "step": 298500 }, { "epoch": 0.87, "learning_rate": 4.956742225129678e-05, "loss": 2.5513, "step": 299000 }, { "epoch": 0.87, "learning_rate": 4.9566698603649504e-05, "loss": 2.5353, "step": 299500 }, { "epoch": 0.87, "learning_rate": 4.9565974956002226e-05, "loss": 2.5504, "step": 300000 }, { "epoch": 0.87, "learning_rate": 4.956525275565024e-05, "loss": 2.518, "step": 300500 }, { "epoch": 0.87, "learning_rate": 4.9564529108002964e-05, "loss": 2.5467, "step": 301000 }, { "epoch": 0.87, "learning_rate": 4.956380690765099e-05, "loss": 2.5237, "step": 301500 }, { "epoch": 0.87, "learning_rate": 4.956308326000371e-05, "loss": 2.5409, "step": 302000 }, { "epoch": 0.88, "learning_rate": 4.956235961235643e-05, "loss": 2.52, "step": 302500 }, { "epoch": 0.88, "learning_rate": 4.956163596470915e-05, "loss": 2.5535, "step": 303000 }, { "epoch": 0.88, "learning_rate": 4.956091376435717e-05, "loss": 2.5086, "step": 303500 }, { "epoch": 0.88, "learning_rate": 4.956019011670989e-05, "loss": 2.5657, "step": 304000 }, { "epoch": 0.88, "learning_rate": 4.955946646906262e-05, "loss": 2.5526, "step": 304500 }, { "epoch": 0.88, "learning_rate": 4.955874282141534e-05, "loss": 2.5273, "step": 305000 }, { "epoch": 0.88, "learning_rate": 4.9558019173768065e-05, "loss": 2.5482, "step": 305500 }, { "epoch": 0.89, "learning_rate": 4.955729552612079e-05, "loss": 2.5281, "step": 306000 }, { "epoch": 0.89, "learning_rate": 4.955657187847351e-05, "loss": 2.5439, "step": 306500 }, { "epoch": 0.89, "learning_rate": 4.955584823082624e-05, "loss": 2.5476, "step": 307000 }, { "epoch": 0.89, "learning_rate": 4.955512458317896e-05, "loss": 2.5357, "step": 307500 }, { "epoch": 0.89, "learning_rate": 4.955440093553168e-05, "loss": 2.5434, "step": 308000 }, { "epoch": 0.89, "learning_rate": 4.9553677287884405e-05, "loss": 2.545, "step": 308500 }, { "epoch": 0.89, "learning_rate": 4.955295364023713e-05, "loss": 2.5369, "step": 309000 }, { "epoch": 0.9, "learning_rate": 4.955223143988514e-05, "loss": 2.5486, "step": 309500 }, { "epoch": 0.9, "learning_rate": 4.9551507792237865e-05, "loss": 2.5518, "step": 310000 }, { "epoch": 0.9, "learning_rate": 4.955078414459059e-05, "loss": 2.5394, "step": 310500 }, { "epoch": 0.9, "learning_rate": 4.955006049694331e-05, "loss": 2.5424, "step": 311000 }, { "epoch": 0.9, "learning_rate": 4.954933684929604e-05, "loss": 2.5329, "step": 311500 }, { "epoch": 0.9, "learning_rate": 4.954861320164876e-05, "loss": 2.5612, "step": 312000 }, { "epoch": 0.9, "learning_rate": 4.954788955400149e-05, "loss": 2.5105, "step": 312500 }, { "epoch": 0.91, "learning_rate": 4.9547167353649505e-05, "loss": 2.5164, "step": 313000 }, { "epoch": 0.91, "learning_rate": 4.954644370600223e-05, "loss": 2.5203, "step": 313500 }, { "epoch": 0.91, "learning_rate": 4.954572005835495e-05, "loss": 2.4984, "step": 314000 }, { "epoch": 0.91, "learning_rate": 4.954499641070767e-05, "loss": 2.5298, "step": 314500 }, { "epoch": 0.91, "learning_rate": 4.9544272763060394e-05, "loss": 2.5382, "step": 315000 }, { "epoch": 0.91, "learning_rate": 4.9543549115413116e-05, "loss": 2.5289, "step": 315500 }, { "epoch": 0.91, "learning_rate": 4.954282546776584e-05, "loss": 2.554, "step": 316000 }, { "epoch": 0.92, "learning_rate": 4.954210182011856e-05, "loss": 2.5542, "step": 316500 }, { "epoch": 0.92, "learning_rate": 4.954137961976658e-05, "loss": 2.5502, "step": 317000 }, { "epoch": 0.92, "learning_rate": 4.954065886670989e-05, "loss": 2.5347, "step": 317500 }, { "epoch": 0.92, "learning_rate": 4.9539935219062614e-05, "loss": 2.5296, "step": 318000 }, { "epoch": 0.92, "learning_rate": 4.9539211571415336e-05, "loss": 2.528, "step": 318500 }, { "epoch": 0.92, "learning_rate": 4.9538487923768065e-05, "loss": 2.54, "step": 319000 }, { "epoch": 0.92, "learning_rate": 4.953776427612079e-05, "loss": 2.5157, "step": 319500 }, { "epoch": 0.93, "learning_rate": 4.953704062847352e-05, "loss": 2.5307, "step": 320000 }, { "epoch": 0.93, "learning_rate": 4.953631698082624e-05, "loss": 2.5318, "step": 320500 }, { "epoch": 0.93, "learning_rate": 4.9535594780474254e-05, "loss": 2.5449, "step": 321000 }, { "epoch": 0.93, "learning_rate": 4.953487113282698e-05, "loss": 2.5484, "step": 321500 }, { "epoch": 0.93, "learning_rate": 4.95341474851797e-05, "loss": 2.5369, "step": 322000 }, { "epoch": 0.93, "learning_rate": 4.953342383753242e-05, "loss": 2.5524, "step": 322500 }, { "epoch": 0.93, "learning_rate": 4.953270018988514e-05, "loss": 2.5162, "step": 323000 }, { "epoch": 0.94, "learning_rate": 4.9531976542237866e-05, "loss": 2.5119, "step": 323500 }, { "epoch": 0.94, "learning_rate": 4.953125289459059e-05, "loss": 2.5199, "step": 324000 }, { "epoch": 0.94, "learning_rate": 4.953052924694332e-05, "loss": 2.5306, "step": 324500 }, { "epoch": 0.94, "learning_rate": 4.952980704659133e-05, "loss": 2.5014, "step": 325000 }, { "epoch": 0.94, "learning_rate": 4.9529083398944055e-05, "loss": 2.512, "step": 325500 }, { "epoch": 0.94, "learning_rate": 4.952835975129678e-05, "loss": 2.553, "step": 326000 }, { "epoch": 0.95, "learning_rate": 4.95276361036495e-05, "loss": 2.5293, "step": 326500 }, { "epoch": 0.95, "learning_rate": 4.952691245600223e-05, "loss": 2.528, "step": 327000 }, { "epoch": 0.95, "learning_rate": 4.952618880835495e-05, "loss": 2.5372, "step": 327500 }, { "epoch": 0.95, "learning_rate": 4.9525466608002966e-05, "loss": 2.5085, "step": 328000 }, { "epoch": 0.95, "learning_rate": 4.952474296035569e-05, "loss": 2.536, "step": 328500 }, { "epoch": 0.95, "learning_rate": 4.952401931270842e-05, "loss": 2.5238, "step": 329000 }, { "epoch": 0.95, "learning_rate": 4.952329566506114e-05, "loss": 2.5466, "step": 329500 }, { "epoch": 0.96, "learning_rate": 4.952257201741386e-05, "loss": 2.546, "step": 330000 }, { "epoch": 0.96, "learning_rate": 4.9521848369766584e-05, "loss": 2.5203, "step": 330500 }, { "epoch": 0.96, "learning_rate": 4.95211261694146e-05, "loss": 2.541, "step": 331000 }, { "epoch": 0.96, "learning_rate": 4.952040252176732e-05, "loss": 2.5226, "step": 331500 }, { "epoch": 0.96, "learning_rate": 4.9519678874120044e-05, "loss": 2.5345, "step": 332000 }, { "epoch": 0.96, "learning_rate": 4.9518955226472766e-05, "loss": 2.5377, "step": 332500 }, { "epoch": 0.96, "learning_rate": 4.951823157882549e-05, "loss": 2.5331, "step": 333000 }, { "epoch": 0.97, "learning_rate": 4.951750793117822e-05, "loss": 2.5586, "step": 333500 }, { "epoch": 0.97, "learning_rate": 4.951678573082623e-05, "loss": 2.5186, "step": 334000 }, { "epoch": 0.97, "learning_rate": 4.9516063530474255e-05, "loss": 2.5191, "step": 334500 }, { "epoch": 0.97, "learning_rate": 4.951533988282698e-05, "loss": 2.5165, "step": 335000 }, { "epoch": 0.97, "learning_rate": 4.95146162351797e-05, "loss": 2.533, "step": 335500 }, { "epoch": 0.97, "learning_rate": 4.951389258753242e-05, "loss": 2.5351, "step": 336000 }, { "epoch": 0.97, "learning_rate": 4.9513168939885144e-05, "loss": 2.5606, "step": 336500 }, { "epoch": 0.98, "learning_rate": 4.9512445292237866e-05, "loss": 2.5051, "step": 337000 }, { "epoch": 0.98, "learning_rate": 4.951172164459059e-05, "loss": 2.5284, "step": 337500 }, { "epoch": 0.98, "learning_rate": 4.951099799694332e-05, "loss": 2.5166, "step": 338000 }, { "epoch": 0.98, "learning_rate": 4.951027434929604e-05, "loss": 2.523, "step": 338500 }, { "epoch": 0.98, "learning_rate": 4.950955070164876e-05, "loss": 2.5179, "step": 339000 }, { "epoch": 0.98, "learning_rate": 4.9508827054001484e-05, "loss": 2.5472, "step": 339500 }, { "epoch": 0.98, "learning_rate": 4.9508103406354207e-05, "loss": 2.5212, "step": 340000 }, { "epoch": 0.99, "learning_rate": 4.950738120600222e-05, "loss": 2.5265, "step": 340500 }, { "epoch": 0.99, "learning_rate": 4.9506657558354944e-05, "loss": 2.5355, "step": 341000 }, { "epoch": 0.99, "learning_rate": 4.9505933910707667e-05, "loss": 2.5299, "step": 341500 }, { "epoch": 0.99, "learning_rate": 4.9505210263060396e-05, "loss": 2.5259, "step": 342000 }, { "epoch": 0.99, "learning_rate": 4.950448951000371e-05, "loss": 2.5472, "step": 342500 }, { "epoch": 0.99, "learning_rate": 4.950376730965173e-05, "loss": 2.542, "step": 343000 }, { "epoch": 0.99, "learning_rate": 4.950304366200445e-05, "loss": 2.5357, "step": 343500 }, { "epoch": 1.0, "learning_rate": 4.950232001435717e-05, "loss": 2.5297, "step": 344000 }, { "epoch": 1.0, "learning_rate": 4.9501596366709893e-05, "loss": 2.5294, "step": 344500 }, { "epoch": 1.0, "learning_rate": 4.9500874166357916e-05, "loss": 2.525, "step": 345000 }, { "epoch": 1.0, "eval_accuracy": 0.6262474735680433, "eval_accuracy_mlm": 0.5877085333313983, "eval_accuracy_nsp": 0.8331431347476775, "eval_loss": 2.4722180366516113, "eval_runtime": 330.2224, "eval_samples_per_second": 1321.491, "eval_steps_per_second": 55.063, "step": 345472 }, { "epoch": 1.0, "learning_rate": 4.950015051871064e-05, "loss": 2.5138, "step": 345500 }, { "epoch": 1.0, "learning_rate": 4.949942687106336e-05, "loss": 2.5146, "step": 346000 }, { "epoch": 1.0, "learning_rate": 4.949870322341608e-05, "loss": 2.5179, "step": 346500 }, { "epoch": 1.0, "learning_rate": 4.9497979575768805e-05, "loss": 2.5145, "step": 347000 }, { "epoch": 1.01, "learning_rate": 4.949725592812153e-05, "loss": 2.4945, "step": 347500 }, { "epoch": 1.01, "learning_rate": 4.949653228047425e-05, "loss": 2.5222, "step": 348000 }, { "epoch": 1.01, "learning_rate": 4.949580863282697e-05, "loss": 2.5155, "step": 348500 }, { "epoch": 1.01, "learning_rate": 4.9495084985179694e-05, "loss": 2.5079, "step": 349000 }, { "epoch": 1.01, "learning_rate": 4.9494362784827716e-05, "loss": 2.5266, "step": 349500 }, { "epoch": 1.01, "learning_rate": 4.9493639137180445e-05, "loss": 2.5136, "step": 350000 }, { "epoch": 1.01, "learning_rate": 4.949291548953317e-05, "loss": 2.5107, "step": 350500 }, { "epoch": 1.02, "learning_rate": 4.949219184188589e-05, "loss": 2.5021, "step": 351000 }, { "epoch": 1.02, "learning_rate": 4.949146819423861e-05, "loss": 2.5204, "step": 351500 }, { "epoch": 1.02, "learning_rate": 4.949074599388663e-05, "loss": 2.5008, "step": 352000 }, { "epoch": 1.02, "learning_rate": 4.949002234623935e-05, "loss": 2.5348, "step": 352500 }, { "epoch": 1.02, "learning_rate": 4.948929869859207e-05, "loss": 2.5246, "step": 353000 }, { "epoch": 1.02, "learning_rate": 4.9488575050944794e-05, "loss": 2.5132, "step": 353500 }, { "epoch": 1.02, "learning_rate": 4.9487851403297516e-05, "loss": 2.518, "step": 354000 }, { "epoch": 1.03, "learning_rate": 4.9487127755650245e-05, "loss": 2.4789, "step": 354500 }, { "epoch": 1.03, "learning_rate": 4.948640555529826e-05, "loss": 2.5195, "step": 355000 }, { "epoch": 1.03, "learning_rate": 4.948568190765098e-05, "loss": 2.5053, "step": 355500 }, { "epoch": 1.03, "learning_rate": 4.9484958260003705e-05, "loss": 2.5159, "step": 356000 }, { "epoch": 1.03, "learning_rate": 4.948423461235643e-05, "loss": 2.4797, "step": 356500 }, { "epoch": 1.03, "learning_rate": 4.948351241200444e-05, "loss": 2.5152, "step": 357000 }, { "epoch": 1.03, "learning_rate": 4.948279021165247e-05, "loss": 2.4891, "step": 357500 }, { "epoch": 1.04, "learning_rate": 4.9482066564005194e-05, "loss": 2.5081, "step": 358000 }, { "epoch": 1.04, "learning_rate": 4.948134291635792e-05, "loss": 2.5037, "step": 358500 }, { "epoch": 1.04, "learning_rate": 4.948061926871064e-05, "loss": 2.5016, "step": 359000 }, { "epoch": 1.04, "learning_rate": 4.947989562106336e-05, "loss": 2.4997, "step": 359500 }, { "epoch": 1.04, "learning_rate": 4.947917197341608e-05, "loss": 2.4921, "step": 360000 }, { "epoch": 1.04, "learning_rate": 4.9478448325768806e-05, "loss": 2.5125, "step": 360500 }, { "epoch": 1.04, "learning_rate": 4.947772612541682e-05, "loss": 2.5305, "step": 361000 }, { "epoch": 1.05, "learning_rate": 4.947700247776954e-05, "loss": 2.5027, "step": 361500 }, { "epoch": 1.05, "learning_rate": 4.947627883012227e-05, "loss": 2.51, "step": 362000 }, { "epoch": 1.05, "learning_rate": 4.9475555182474995e-05, "loss": 2.5035, "step": 362500 }, { "epoch": 1.05, "learning_rate": 4.947483153482772e-05, "loss": 2.5229, "step": 363000 }, { "epoch": 1.05, "learning_rate": 4.947410788718044e-05, "loss": 2.5101, "step": 363500 }, { "epoch": 1.05, "learning_rate": 4.947338423953316e-05, "loss": 2.5042, "step": 364000 }, { "epoch": 1.06, "learning_rate": 4.947266059188589e-05, "loss": 2.5228, "step": 364500 }, { "epoch": 1.06, "learning_rate": 4.947193694423861e-05, "loss": 2.501, "step": 365000 }, { "epoch": 1.06, "learning_rate": 4.947121619118192e-05, "loss": 2.5274, "step": 365500 }, { "epoch": 1.06, "learning_rate": 4.9470492543534644e-05, "loss": 2.5239, "step": 366000 }, { "epoch": 1.06, "learning_rate": 4.946976889588737e-05, "loss": 2.5091, "step": 366500 }, { "epoch": 1.06, "learning_rate": 4.9469045248240095e-05, "loss": 2.4958, "step": 367000 }, { "epoch": 1.06, "learning_rate": 4.946832160059282e-05, "loss": 2.5151, "step": 367500 }, { "epoch": 1.07, "learning_rate": 4.946759940024083e-05, "loss": 2.4916, "step": 368000 }, { "epoch": 1.07, "learning_rate": 4.9466875752593555e-05, "loss": 2.5197, "step": 368500 }, { "epoch": 1.07, "learning_rate": 4.946615210494628e-05, "loss": 2.5194, "step": 369000 }, { "epoch": 1.07, "learning_rate": 4.9465428457299e-05, "loss": 2.4897, "step": 369500 }, { "epoch": 1.07, "learning_rate": 4.946470480965172e-05, "loss": 2.4969, "step": 370000 }, { "epoch": 1.07, "learning_rate": 4.9463981162004444e-05, "loss": 2.5156, "step": 370500 }, { "epoch": 1.07, "learning_rate": 4.946325751435717e-05, "loss": 2.5165, "step": 371000 }, { "epoch": 1.08, "learning_rate": 4.9462533866709895e-05, "loss": 2.5243, "step": 371500 }, { "epoch": 1.08, "learning_rate": 4.9461810219062624e-05, "loss": 2.5281, "step": 372000 }, { "epoch": 1.08, "learning_rate": 4.946108801871064e-05, "loss": 2.5226, "step": 372500 }, { "epoch": 1.08, "learning_rate": 4.946036437106336e-05, "loss": 2.5179, "step": 373000 }, { "epoch": 1.08, "learning_rate": 4.9459640723416084e-05, "loss": 2.5278, "step": 373500 }, { "epoch": 1.08, "learning_rate": 4.9458917075768806e-05, "loss": 2.5329, "step": 374000 }, { "epoch": 1.08, "learning_rate": 4.945819487541682e-05, "loss": 2.5171, "step": 374500 }, { "epoch": 1.09, "learning_rate": 4.9457471227769544e-05, "loss": 2.5229, "step": 375000 }, { "epoch": 1.09, "learning_rate": 4.945674758012227e-05, "loss": 2.5151, "step": 375500 }, { "epoch": 1.09, "learning_rate": 4.9456023932474995e-05, "loss": 2.5138, "step": 376000 }, { "epoch": 1.09, "learning_rate": 4.945530028482772e-05, "loss": 2.5092, "step": 376500 }, { "epoch": 1.09, "learning_rate": 4.945457663718044e-05, "loss": 2.5097, "step": 377000 }, { "epoch": 1.09, "learning_rate": 4.9453854436828455e-05, "loss": 2.5012, "step": 377500 }, { "epoch": 1.09, "learning_rate": 4.945313078918118e-05, "loss": 2.5067, "step": 378000 }, { "epoch": 1.1, "learning_rate": 4.94524071415339e-05, "loss": 2.5044, "step": 378500 }, { "epoch": 1.1, "learning_rate": 4.945168349388662e-05, "loss": 2.5119, "step": 379000 }, { "epoch": 1.1, "learning_rate": 4.9450959846239344e-05, "loss": 2.4923, "step": 379500 }, { "epoch": 1.1, "learning_rate": 4.945023619859207e-05, "loss": 2.4909, "step": 380000 }, { "epoch": 1.1, "learning_rate": 4.9449513998240096e-05, "loss": 2.5173, "step": 380500 }, { "epoch": 1.1, "learning_rate": 4.944879035059282e-05, "loss": 2.5205, "step": 381000 }, { "epoch": 1.1, "learning_rate": 4.944806670294554e-05, "loss": 2.4839, "step": 381500 }, { "epoch": 1.11, "learning_rate": 4.944734305529826e-05, "loss": 2.5309, "step": 382000 }, { "epoch": 1.11, "learning_rate": 4.9446619407650985e-05, "loss": 2.5114, "step": 382500 }, { "epoch": 1.11, "learning_rate": 4.944589576000371e-05, "loss": 2.5328, "step": 383000 }, { "epoch": 1.11, "learning_rate": 4.944517211235643e-05, "loss": 2.54, "step": 383500 }, { "epoch": 1.11, "learning_rate": 4.9444449912004445e-05, "loss": 2.4924, "step": 384000 }, { "epoch": 1.11, "learning_rate": 4.9443726264357174e-05, "loss": 2.5132, "step": 384500 }, { "epoch": 1.11, "learning_rate": 4.9443002616709896e-05, "loss": 2.5103, "step": 385000 }, { "epoch": 1.12, "learning_rate": 4.944227896906262e-05, "loss": 2.5259, "step": 385500 }, { "epoch": 1.12, "learning_rate": 4.944155532141534e-05, "loss": 2.4902, "step": 386000 }, { "epoch": 1.12, "learning_rate": 4.9440833121063356e-05, "loss": 2.5077, "step": 386500 }, { "epoch": 1.12, "learning_rate": 4.944010947341608e-05, "loss": 2.4976, "step": 387000 }, { "epoch": 1.12, "learning_rate": 4.943938582576881e-05, "loss": 2.5192, "step": 387500 }, { "epoch": 1.12, "learning_rate": 4.943866217812153e-05, "loss": 2.5017, "step": 388000 }, { "epoch": 1.12, "learning_rate": 4.943793853047425e-05, "loss": 2.4885, "step": 388500 }, { "epoch": 1.13, "learning_rate": 4.9437214882826974e-05, "loss": 2.4972, "step": 389000 }, { "epoch": 1.13, "learning_rate": 4.9436491235179696e-05, "loss": 2.5149, "step": 389500 }, { "epoch": 1.13, "learning_rate": 4.943576903482772e-05, "loss": 2.4801, "step": 390000 }, { "epoch": 1.13, "learning_rate": 4.943504538718044e-05, "loss": 2.5214, "step": 390500 }, { "epoch": 1.13, "learning_rate": 4.943432173953316e-05, "loss": 2.5126, "step": 391000 }, { "epoch": 1.13, "learning_rate": 4.9433598091885885e-05, "loss": 2.5125, "step": 391500 }, { "epoch": 1.13, "learning_rate": 4.943287444423861e-05, "loss": 2.5096, "step": 392000 }, { "epoch": 1.14, "learning_rate": 4.943215224388662e-05, "loss": 2.5078, "step": 392500 }, { "epoch": 1.14, "learning_rate": 4.943142859623935e-05, "loss": 2.4889, "step": 393000 }, { "epoch": 1.14, "learning_rate": 4.9430704948592074e-05, "loss": 2.5152, "step": 393500 }, { "epoch": 1.14, "learning_rate": 4.9429981300944796e-05, "loss": 2.4852, "step": 394000 }, { "epoch": 1.14, "learning_rate": 4.9429257653297525e-05, "loss": 2.4944, "step": 394500 }, { "epoch": 1.14, "learning_rate": 4.942853545294554e-05, "loss": 2.4867, "step": 395000 }, { "epoch": 1.14, "learning_rate": 4.942781180529826e-05, "loss": 2.5099, "step": 395500 }, { "epoch": 1.15, "learning_rate": 4.9427088157650985e-05, "loss": 2.5048, "step": 396000 }, { "epoch": 1.15, "learning_rate": 4.942636451000371e-05, "loss": 2.5001, "step": 396500 }, { "epoch": 1.15, "learning_rate": 4.942564230965172e-05, "loss": 2.4917, "step": 397000 }, { "epoch": 1.15, "learning_rate": 4.942491866200445e-05, "loss": 2.4789, "step": 397500 }, { "epoch": 1.15, "learning_rate": 4.9424195014357174e-05, "loss": 2.4923, "step": 398000 }, { "epoch": 1.15, "learning_rate": 4.94234713667099e-05, "loss": 2.5209, "step": 398500 }, { "epoch": 1.15, "learning_rate": 4.942274771906262e-05, "loss": 2.4976, "step": 399000 }, { "epoch": 1.16, "learning_rate": 4.942202407141534e-05, "loss": 2.5105, "step": 399500 }, { "epoch": 1.16, "learning_rate": 4.942130187106336e-05, "loss": 2.4869, "step": 400000 }, { "epoch": 1.16, "learning_rate": 4.942057822341608e-05, "loss": 2.5045, "step": 400500 }, { "epoch": 1.16, "learning_rate": 4.94198545757688e-05, "loss": 2.4824, "step": 401000 }, { "epoch": 1.16, "learning_rate": 4.9419130928121523e-05, "loss": 2.5137, "step": 401500 }, { "epoch": 1.16, "learning_rate": 4.941840728047425e-05, "loss": 2.5157, "step": 402000 }, { "epoch": 1.17, "learning_rate": 4.9417685080122275e-05, "loss": 2.4984, "step": 402500 }, { "epoch": 1.17, "learning_rate": 4.9416961432475e-05, "loss": 2.5037, "step": 403000 }, { "epoch": 1.17, "learning_rate": 4.941623778482772e-05, "loss": 2.5096, "step": 403500 }, { "epoch": 1.17, "learning_rate": 4.941551413718044e-05, "loss": 2.5093, "step": 404000 }, { "epoch": 1.17, "learning_rate": 4.9414790489533164e-05, "loss": 2.5198, "step": 404500 }, { "epoch": 1.17, "learning_rate": 4.941406828918118e-05, "loss": 2.5293, "step": 405000 }, { "epoch": 1.17, "learning_rate": 4.94133446415339e-05, "loss": 2.4997, "step": 405500 }, { "epoch": 1.18, "learning_rate": 4.9412620993886624e-05, "loss": 2.5142, "step": 406000 }, { "epoch": 1.18, "learning_rate": 4.941189734623935e-05, "loss": 2.5088, "step": 406500 }, { "epoch": 1.18, "learning_rate": 4.9411173698592075e-05, "loss": 2.4991, "step": 407000 }, { "epoch": 1.18, "learning_rate": 4.94104500509448e-05, "loss": 2.5031, "step": 407500 }, { "epoch": 1.18, "learning_rate": 4.940972785059281e-05, "loss": 2.5247, "step": 408000 }, { "epoch": 1.18, "learning_rate": 4.9409004202945535e-05, "loss": 2.5288, "step": 408500 }, { "epoch": 1.18, "learning_rate": 4.940828344988885e-05, "loss": 2.5248, "step": 409000 }, { "epoch": 1.19, "learning_rate": 4.940755980224157e-05, "loss": 2.5121, "step": 409500 }, { "epoch": 1.19, "learning_rate": 4.94068361545943e-05, "loss": 2.4909, "step": 410000 }, { "epoch": 1.19, "learning_rate": 4.9406112506947024e-05, "loss": 2.4942, "step": 410500 }, { "epoch": 1.19, "learning_rate": 4.9405388859299746e-05, "loss": 2.5046, "step": 411000 }, { "epoch": 1.19, "learning_rate": 4.940466521165247e-05, "loss": 2.4941, "step": 411500 }, { "epoch": 1.19, "learning_rate": 4.940394156400519e-05, "loss": 2.5106, "step": 412000 }, { "epoch": 1.19, "learning_rate": 4.940321791635791e-05, "loss": 2.511, "step": 412500 }, { "epoch": 1.2, "learning_rate": 4.9402494268710635e-05, "loss": 2.5029, "step": 413000 }, { "epoch": 1.2, "learning_rate": 4.940177206835865e-05, "loss": 2.5084, "step": 413500 }, { "epoch": 1.2, "learning_rate": 4.940104842071138e-05, "loss": 2.495, "step": 414000 }, { "epoch": 1.2, "learning_rate": 4.94003247730641e-05, "loss": 2.5141, "step": 414500 }, { "epoch": 1.2, "learning_rate": 4.9399601125416824e-05, "loss": 2.4925, "step": 415000 }, { "epoch": 1.2, "learning_rate": 4.9398877477769547e-05, "loss": 2.5013, "step": 415500 }, { "epoch": 1.2, "learning_rate": 4.939815383012227e-05, "loss": 2.5177, "step": 416000 }, { "epoch": 1.21, "learning_rate": 4.939743018247499e-05, "loss": 2.4974, "step": 416500 }, { "epoch": 1.21, "learning_rate": 4.939670653482771e-05, "loss": 2.5108, "step": 417000 }, { "epoch": 1.21, "learning_rate": 4.939598288718044e-05, "loss": 2.4925, "step": 417500 }, { "epoch": 1.21, "learning_rate": 4.9395259239533164e-05, "loss": 2.5317, "step": 418000 }, { "epoch": 1.21, "learning_rate": 4.939453703918118e-05, "loss": 2.5045, "step": 418500 }, { "epoch": 1.21, "learning_rate": 4.93938133915339e-05, "loss": 2.4958, "step": 419000 }, { "epoch": 1.21, "learning_rate": 4.939308974388663e-05, "loss": 2.4963, "step": 419500 }, { "epoch": 1.22, "learning_rate": 4.939236754353465e-05, "loss": 2.4783, "step": 420000 }, { "epoch": 1.22, "learning_rate": 4.939164389588737e-05, "loss": 2.497, "step": 420500 }, { "epoch": 1.22, "learning_rate": 4.939092024824009e-05, "loss": 2.4957, "step": 421000 }, { "epoch": 1.22, "learning_rate": 4.9390196600592814e-05, "loss": 2.4938, "step": 421500 }, { "epoch": 1.22, "learning_rate": 4.9389472952945536e-05, "loss": 2.4833, "step": 422000 }, { "epoch": 1.22, "learning_rate": 4.938874930529826e-05, "loss": 2.5055, "step": 422500 }, { "epoch": 1.22, "learning_rate": 4.938802565765098e-05, "loss": 2.5022, "step": 423000 }, { "epoch": 1.23, "learning_rate": 4.93873020100037e-05, "loss": 2.5036, "step": 423500 }, { "epoch": 1.23, "learning_rate": 4.938657836235643e-05, "loss": 2.5116, "step": 424000 }, { "epoch": 1.23, "learning_rate": 4.938585616200445e-05, "loss": 2.5032, "step": 424500 }, { "epoch": 1.23, "learning_rate": 4.9385132514357176e-05, "loss": 2.5109, "step": 425000 }, { "epoch": 1.23, "learning_rate": 4.938441031400519e-05, "loss": 2.5154, "step": 425500 }, { "epoch": 1.23, "learning_rate": 4.9383686666357914e-05, "loss": 2.4932, "step": 426000 }, { "epoch": 1.23, "learning_rate": 4.9382963018710636e-05, "loss": 2.4841, "step": 426500 }, { "epoch": 1.24, "learning_rate": 4.938223937106336e-05, "loss": 2.5052, "step": 427000 }, { "epoch": 1.24, "learning_rate": 4.938151572341608e-05, "loss": 2.4979, "step": 427500 }, { "epoch": 1.24, "learning_rate": 4.93807920757688e-05, "loss": 2.5041, "step": 428000 }, { "epoch": 1.24, "learning_rate": 4.9380069875416825e-05, "loss": 2.5115, "step": 428500 }, { "epoch": 1.24, "learning_rate": 4.937934622776955e-05, "loss": 2.501, "step": 429000 }, { "epoch": 1.24, "learning_rate": 4.937862258012227e-05, "loss": 2.4892, "step": 429500 }, { "epoch": 1.24, "learning_rate": 4.937789893247499e-05, "loss": 2.474, "step": 430000 }, { "epoch": 1.25, "learning_rate": 4.9377175284827714e-05, "loss": 2.482, "step": 430500 }, { "epoch": 1.25, "learning_rate": 4.9376451637180436e-05, "loss": 2.5014, "step": 431000 }, { "epoch": 1.25, "learning_rate": 4.937572798953316e-05, "loss": 2.4921, "step": 431500 }, { "epoch": 1.25, "learning_rate": 4.937500434188589e-05, "loss": 2.4943, "step": 432000 }, { "epoch": 1.25, "learning_rate": 4.937428069423861e-05, "loss": 2.4741, "step": 432500 }, { "epoch": 1.25, "learning_rate": 4.937355849388663e-05, "loss": 2.4568, "step": 433000 }, { "epoch": 1.25, "learning_rate": 4.9372834846239354e-05, "loss": 2.4895, "step": 433500 }, { "epoch": 1.26, "learning_rate": 4.9372111198592077e-05, "loss": 2.5086, "step": 434000 }, { "epoch": 1.26, "learning_rate": 4.93713875509448e-05, "loss": 2.5169, "step": 434500 }, { "epoch": 1.26, "learning_rate": 4.937066390329752e-05, "loss": 2.4948, "step": 435000 }, { "epoch": 1.26, "learning_rate": 4.936994025565024e-05, "loss": 2.4773, "step": 435500 }, { "epoch": 1.26, "learning_rate": 4.9369216608002965e-05, "loss": 2.4763, "step": 436000 }, { "epoch": 1.26, "learning_rate": 4.936849296035569e-05, "loss": 2.4989, "step": 436500 }, { "epoch": 1.26, "learning_rate": 4.936776931270841e-05, "loss": 2.4928, "step": 437000 }, { "epoch": 1.27, "learning_rate": 4.9367048559651726e-05, "loss": 2.4956, "step": 437500 }, { "epoch": 1.27, "learning_rate": 4.936632635929974e-05, "loss": 2.5082, "step": 438000 }, { "epoch": 1.27, "learning_rate": 4.936560271165246e-05, "loss": 2.5149, "step": 438500 }, { "epoch": 1.27, "learning_rate": 4.9364879064005186e-05, "loss": 2.5122, "step": 439000 }, { "epoch": 1.27, "learning_rate": 4.936415541635791e-05, "loss": 2.483, "step": 439500 }, { "epoch": 1.27, "learning_rate": 4.936343176871064e-05, "loss": 2.5168, "step": 440000 }, { "epoch": 1.28, "learning_rate": 4.936270812106336e-05, "loss": 2.4978, "step": 440500 }, { "epoch": 1.28, "learning_rate": 4.936198447341608e-05, "loss": 2.4748, "step": 441000 }, { "epoch": 1.28, "learning_rate": 4.936126082576881e-05, "loss": 2.5069, "step": 441500 }, { "epoch": 1.28, "learning_rate": 4.936053717812153e-05, "loss": 2.4774, "step": 442000 }, { "epoch": 1.28, "learning_rate": 4.935981497776955e-05, "loss": 2.4984, "step": 442500 }, { "epoch": 1.28, "learning_rate": 4.935909133012227e-05, "loss": 2.5064, "step": 443000 }, { "epoch": 1.28, "learning_rate": 4.935836768247499e-05, "loss": 2.4752, "step": 443500 }, { "epoch": 1.29, "learning_rate": 4.9357644034827715e-05, "loss": 2.4882, "step": 444000 }, { "epoch": 1.29, "learning_rate": 4.935692038718044e-05, "loss": 2.4752, "step": 444500 }, { "epoch": 1.29, "learning_rate": 4.935619673953316e-05, "loss": 2.4859, "step": 445000 }, { "epoch": 1.29, "learning_rate": 4.935547309188588e-05, "loss": 2.5133, "step": 445500 }, { "epoch": 1.29, "learning_rate": 4.9354750891533904e-05, "loss": 2.5026, "step": 446000 }, { "epoch": 1.29, "learning_rate": 4.9354027243886626e-05, "loss": 2.4923, "step": 446500 }, { "epoch": 1.29, "learning_rate": 4.935330359623935e-05, "loss": 2.484, "step": 447000 }, { "epoch": 1.3, "learning_rate": 4.935257994859208e-05, "loss": 2.496, "step": 447500 }, { "epoch": 1.3, "learning_rate": 4.93518563009448e-05, "loss": 2.5054, "step": 448000 }, { "epoch": 1.3, "learning_rate": 4.935113265329752e-05, "loss": 2.4784, "step": 448500 }, { "epoch": 1.3, "learning_rate": 4.9350409005650244e-05, "loss": 2.488, "step": 449000 }, { "epoch": 1.3, "learning_rate": 4.934968680529826e-05, "loss": 2.5065, "step": 449500 }, { "epoch": 1.3, "learning_rate": 4.934896315765098e-05, "loss": 2.5161, "step": 450000 }, { "epoch": 1.3, "learning_rate": 4.934823951000371e-05, "loss": 2.4963, "step": 450500 }, { "epoch": 1.31, "learning_rate": 4.9347517309651726e-05, "loss": 2.5131, "step": 451000 }, { "epoch": 1.31, "learning_rate": 4.934679510929974e-05, "loss": 2.4986, "step": 451500 }, { "epoch": 1.31, "learning_rate": 4.9346071461652464e-05, "loss": 2.4783, "step": 452000 }, { "epoch": 1.31, "learning_rate": 4.9345347814005186e-05, "loss": 2.5201, "step": 452500 }, { "epoch": 1.31, "learning_rate": 4.934462416635791e-05, "loss": 2.4925, "step": 453000 }, { "epoch": 1.31, "learning_rate": 4.934390051871063e-05, "loss": 2.5166, "step": 453500 }, { "epoch": 1.31, "learning_rate": 4.934317687106336e-05, "loss": 2.5019, "step": 454000 }, { "epoch": 1.32, "learning_rate": 4.934245322341608e-05, "loss": 2.4894, "step": 454500 }, { "epoch": 1.32, "learning_rate": 4.934172957576881e-05, "loss": 2.4914, "step": 455000 }, { "epoch": 1.32, "learning_rate": 4.934100737541683e-05, "loss": 2.5387, "step": 455500 }, { "epoch": 1.32, "learning_rate": 4.934028372776955e-05, "loss": 2.4868, "step": 456000 }, { "epoch": 1.32, "learning_rate": 4.933956008012227e-05, "loss": 2.4947, "step": 456500 }, { "epoch": 1.32, "learning_rate": 4.933883787977029e-05, "loss": 2.5142, "step": 457000 }, { "epoch": 1.32, "learning_rate": 4.933811423212301e-05, "loss": 2.5149, "step": 457500 }, { "epoch": 1.33, "learning_rate": 4.933739058447573e-05, "loss": 2.5074, "step": 458000 }, { "epoch": 1.33, "learning_rate": 4.933666693682846e-05, "loss": 2.514, "step": 458500 }, { "epoch": 1.33, "learning_rate": 4.933594328918118e-05, "loss": 2.5022, "step": 459000 }, { "epoch": 1.33, "learning_rate": 4.9335219641533905e-05, "loss": 2.4818, "step": 459500 }, { "epoch": 1.33, "learning_rate": 4.933449599388663e-05, "loss": 2.4612, "step": 460000 }, { "epoch": 1.33, "learning_rate": 4.933377234623935e-05, "loss": 2.511, "step": 460500 }, { "epoch": 1.33, "learning_rate": 4.933304869859207e-05, "loss": 2.5015, "step": 461000 }, { "epoch": 1.34, "learning_rate": 4.9332325050944794e-05, "loss": 2.5214, "step": 461500 }, { "epoch": 1.34, "learning_rate": 4.933160140329752e-05, "loss": 2.4951, "step": 462000 }, { "epoch": 1.34, "learning_rate": 4.933087920294554e-05, "loss": 2.4968, "step": 462500 }, { "epoch": 1.34, "learning_rate": 4.933015555529826e-05, "loss": 2.4917, "step": 463000 }, { "epoch": 1.34, "learning_rate": 4.932943190765098e-05, "loss": 2.4832, "step": 463500 }, { "epoch": 1.34, "learning_rate": 4.932870826000371e-05, "loss": 2.5156, "step": 464000 }, { "epoch": 1.34, "learning_rate": 4.9327984612356434e-05, "loss": 2.5042, "step": 464500 }, { "epoch": 1.35, "learning_rate": 4.9327260964709156e-05, "loss": 2.5103, "step": 465000 }, { "epoch": 1.35, "learning_rate": 4.932653731706188e-05, "loss": 2.5106, "step": 465500 }, { "epoch": 1.35, "learning_rate": 4.93258136694146e-05, "loss": 2.4987, "step": 466000 }, { "epoch": 1.35, "learning_rate": 4.932509002176732e-05, "loss": 2.5337, "step": 466500 }, { "epoch": 1.35, "learning_rate": 4.9324366374120045e-05, "loss": 2.5077, "step": 467000 }, { "epoch": 1.35, "learning_rate": 4.932364272647277e-05, "loss": 2.5011, "step": 467500 }, { "epoch": 1.35, "learning_rate": 4.932291907882549e-05, "loss": 2.4958, "step": 468000 }, { "epoch": 1.36, "learning_rate": 4.932219687847351e-05, "loss": 2.5053, "step": 468500 }, { "epoch": 1.36, "learning_rate": 4.9321473230826234e-05, "loss": 2.5022, "step": 469000 }, { "epoch": 1.36, "learning_rate": 4.932074958317896e-05, "loss": 2.4982, "step": 469500 }, { "epoch": 1.36, "learning_rate": 4.9320025935531685e-05, "loss": 2.4773, "step": 470000 }, { "epoch": 1.36, "learning_rate": 4.9319305182474994e-05, "loss": 2.5151, "step": 470500 }, { "epoch": 1.36, "learning_rate": 4.9318581534827716e-05, "loss": 2.5076, "step": 471000 }, { "epoch": 1.36, "learning_rate": 4.931785788718044e-05, "loss": 2.501, "step": 471500 }, { "epoch": 1.37, "learning_rate": 4.931713423953316e-05, "loss": 2.4904, "step": 472000 }, { "epoch": 1.37, "learning_rate": 4.931641059188589e-05, "loss": 2.5191, "step": 472500 }, { "epoch": 1.37, "learning_rate": 4.931568694423861e-05, "loss": 2.5079, "step": 473000 }, { "epoch": 1.37, "learning_rate": 4.9314963296591334e-05, "loss": 2.5204, "step": 473500 }, { "epoch": 1.37, "learning_rate": 4.9314239648944057e-05, "loss": 2.4859, "step": 474000 }, { "epoch": 1.37, "learning_rate": 4.931351600129678e-05, "loss": 2.4855, "step": 474500 }, { "epoch": 1.37, "learning_rate": 4.93127923536495e-05, "loss": 2.518, "step": 475000 }, { "epoch": 1.38, "learning_rate": 4.931206870600222e-05, "loss": 2.488, "step": 475500 }, { "epoch": 1.38, "learning_rate": 4.931134650565024e-05, "loss": 2.4985, "step": 476000 }, { "epoch": 1.38, "learning_rate": 4.931062285800296e-05, "loss": 2.5175, "step": 476500 }, { "epoch": 1.38, "learning_rate": 4.930989921035569e-05, "loss": 2.4883, "step": 477000 }, { "epoch": 1.38, "learning_rate": 4.930917556270841e-05, "loss": 2.4961, "step": 477500 }, { "epoch": 1.38, "learning_rate": 4.9308451915061135e-05, "loss": 2.5053, "step": 478000 }, { "epoch": 1.39, "learning_rate": 4.930772971470916e-05, "loss": 2.5313, "step": 478500 }, { "epoch": 1.39, "learning_rate": 4.930700606706188e-05, "loss": 2.4957, "step": 479000 }, { "epoch": 1.39, "learning_rate": 4.93062824194146e-05, "loss": 2.4974, "step": 479500 }, { "epoch": 1.39, "learning_rate": 4.930556021906262e-05, "loss": 2.4726, "step": 480000 }, { "epoch": 1.39, "learning_rate": 4.930483657141534e-05, "loss": 2.5018, "step": 480500 }, { "epoch": 1.39, "learning_rate": 4.930411292376806e-05, "loss": 2.522, "step": 481000 }, { "epoch": 1.39, "learning_rate": 4.930338927612079e-05, "loss": 2.491, "step": 481500 }, { "epoch": 1.4, "learning_rate": 4.930266562847351e-05, "loss": 2.499, "step": 482000 }, { "epoch": 1.4, "learning_rate": 4.930194342812153e-05, "loss": 2.4959, "step": 482500 }, { "epoch": 1.4, "learning_rate": 4.930121978047425e-05, "loss": 2.5091, "step": 483000 }, { "epoch": 1.4, "learning_rate": 4.930049613282697e-05, "loss": 2.4756, "step": 483500 }, { "epoch": 1.4, "learning_rate": 4.9299772485179695e-05, "loss": 2.4984, "step": 484000 }, { "epoch": 1.4, "learning_rate": 4.9299048837532424e-05, "loss": 2.5107, "step": 484500 }, { "epoch": 1.4, "learning_rate": 4.9298325189885146e-05, "loss": 2.5076, "step": 485000 }, { "epoch": 1.41, "learning_rate": 4.929760154223787e-05, "loss": 2.4887, "step": 485500 }, { "epoch": 1.41, "learning_rate": 4.929687789459059e-05, "loss": 2.496, "step": 486000 }, { "epoch": 1.41, "learning_rate": 4.929615569423861e-05, "loss": 2.4606, "step": 486500 }, { "epoch": 1.41, "learning_rate": 4.9295432046591335e-05, "loss": 2.5043, "step": 487000 }, { "epoch": 1.41, "learning_rate": 4.929470839894406e-05, "loss": 2.5159, "step": 487500 }, { "epoch": 1.41, "learning_rate": 4.929398475129678e-05, "loss": 2.5117, "step": 488000 }, { "epoch": 1.41, "learning_rate": 4.92932611036495e-05, "loss": 2.5025, "step": 488500 }, { "epoch": 1.42, "learning_rate": 4.9292537456002224e-05, "loss": 2.4962, "step": 489000 }, { "epoch": 1.42, "learning_rate": 4.9291813808354946e-05, "loss": 2.5245, "step": 489500 }, { "epoch": 1.42, "learning_rate": 4.929109016070767e-05, "loss": 2.4995, "step": 490000 }, { "epoch": 1.42, "learning_rate": 4.929036796035569e-05, "loss": 2.5143, "step": 490500 }, { "epoch": 1.42, "learning_rate": 4.928964431270841e-05, "loss": 2.489, "step": 491000 }, { "epoch": 1.42, "learning_rate": 4.9288920665061135e-05, "loss": 2.5129, "step": 491500 }, { "epoch": 1.42, "learning_rate": 4.9288197017413864e-05, "loss": 2.49, "step": 492000 }, { "epoch": 1.43, "learning_rate": 4.9287473369766587e-05, "loss": 2.481, "step": 492500 }, { "epoch": 1.43, "learning_rate": 4.92867511694146e-05, "loss": 2.4899, "step": 493000 }, { "epoch": 1.43, "learning_rate": 4.928602896906262e-05, "loss": 2.4908, "step": 493500 }, { "epoch": 1.43, "learning_rate": 4.928530532141534e-05, "loss": 2.491, "step": 494000 }, { "epoch": 1.43, "learning_rate": 4.928458167376806e-05, "loss": 2.4896, "step": 494500 }, { "epoch": 1.43, "learning_rate": 4.928385802612079e-05, "loss": 2.5265, "step": 495000 }, { "epoch": 1.43, "learning_rate": 4.9283134378473513e-05, "loss": 2.4916, "step": 495500 }, { "epoch": 1.44, "learning_rate": 4.9282410730826236e-05, "loss": 2.4934, "step": 496000 }, { "epoch": 1.44, "learning_rate": 4.928168853047425e-05, "loss": 2.4993, "step": 496500 }, { "epoch": 1.44, "learning_rate": 4.928096633012227e-05, "loss": 2.4813, "step": 497000 }, { "epoch": 1.44, "learning_rate": 4.928024268247499e-05, "loss": 2.5088, "step": 497500 }, { "epoch": 1.44, "learning_rate": 4.927951903482772e-05, "loss": 2.5156, "step": 498000 }, { "epoch": 1.44, "learning_rate": 4.927879538718044e-05, "loss": 2.4897, "step": 498500 }, { "epoch": 1.44, "learning_rate": 4.927807173953316e-05, "loss": 2.5155, "step": 499000 }, { "epoch": 1.45, "learning_rate": 4.927734809188589e-05, "loss": 2.4775, "step": 499500 }, { "epoch": 1.45, "learning_rate": 4.9276624444238614e-05, "loss": 2.4957, "step": 500000 }, { "epoch": 1.45, "learning_rate": 4.9275900796591336e-05, "loss": 2.5048, "step": 500500 }, { "epoch": 1.45, "learning_rate": 4.927517714894406e-05, "loss": 2.477, "step": 501000 }, { "epoch": 1.45, "learning_rate": 4.927445350129678e-05, "loss": 2.4807, "step": 501500 }, { "epoch": 1.45, "learning_rate": 4.92737298536495e-05, "loss": 2.5127, "step": 502000 }, { "epoch": 1.45, "learning_rate": 4.9273006206002225e-05, "loss": 2.4951, "step": 502500 }, { "epoch": 1.46, "learning_rate": 4.927228400565024e-05, "loss": 2.5041, "step": 503000 }, { "epoch": 1.46, "learning_rate": 4.927156035800297e-05, "loss": 2.5055, "step": 503500 }, { "epoch": 1.46, "learning_rate": 4.927083671035569e-05, "loss": 2.4868, "step": 504000 }, { "epoch": 1.46, "learning_rate": 4.9270113062708414e-05, "loss": 2.4818, "step": 504500 }, { "epoch": 1.46, "learning_rate": 4.9269389415061136e-05, "loss": 2.5032, "step": 505000 }, { "epoch": 1.46, "learning_rate": 4.926866576741386e-05, "loss": 2.4784, "step": 505500 }, { "epoch": 1.46, "learning_rate": 4.926794211976658e-05, "loss": 2.4953, "step": 506000 }, { "epoch": 1.47, "learning_rate": 4.92672184721193e-05, "loss": 2.4901, "step": 506500 }, { "epoch": 1.47, "learning_rate": 4.9266496271767325e-05, "loss": 2.4891, "step": 507000 }, { "epoch": 1.47, "learning_rate": 4.926577262412005e-05, "loss": 2.5102, "step": 507500 }, { "epoch": 1.47, "learning_rate": 4.926504897647277e-05, "loss": 2.5084, "step": 508000 }, { "epoch": 1.47, "learning_rate": 4.926432532882549e-05, "loss": 2.5296, "step": 508500 }, { "epoch": 1.47, "learning_rate": 4.926360168117822e-05, "loss": 2.5006, "step": 509000 }, { "epoch": 1.47, "learning_rate": 4.926287803353094e-05, "loss": 2.4797, "step": 509500 }, { "epoch": 1.48, "learning_rate": 4.9262154385883665e-05, "loss": 2.4872, "step": 510000 }, { "epoch": 1.48, "learning_rate": 4.926143073823639e-05, "loss": 2.4826, "step": 510500 }, { "epoch": 1.48, "learning_rate": 4.926070709058911e-05, "loss": 2.4968, "step": 511000 }, { "epoch": 1.48, "learning_rate": 4.9259984890237125e-05, "loss": 2.4826, "step": 511500 }, { "epoch": 1.48, "learning_rate": 4.925926268988514e-05, "loss": 2.485, "step": 512000 }, { "epoch": 1.48, "learning_rate": 4.925853904223787e-05, "loss": 2.4727, "step": 512500 }, { "epoch": 1.48, "learning_rate": 4.925781539459059e-05, "loss": 2.5098, "step": 513000 }, { "epoch": 1.49, "learning_rate": 4.9257091746943314e-05, "loss": 2.4909, "step": 513500 }, { "epoch": 1.49, "learning_rate": 4.925636809929604e-05, "loss": 2.4925, "step": 514000 }, { "epoch": 1.49, "learning_rate": 4.9255644451648766e-05, "loss": 2.4956, "step": 514500 }, { "epoch": 1.49, "learning_rate": 4.925492080400149e-05, "loss": 2.4979, "step": 515000 }, { "epoch": 1.49, "learning_rate": 4.9254198603649503e-05, "loss": 2.5061, "step": 515500 }, { "epoch": 1.49, "learning_rate": 4.9253474956002226e-05, "loss": 2.4945, "step": 516000 }, { "epoch": 1.5, "learning_rate": 4.925275130835495e-05, "loss": 2.4895, "step": 516500 }, { "epoch": 1.5, "learning_rate": 4.925202766070767e-05, "loss": 2.4949, "step": 517000 }, { "epoch": 1.5, "learning_rate": 4.925130401306039e-05, "loss": 2.4787, "step": 517500 }, { "epoch": 1.5, "learning_rate": 4.925058036541312e-05, "loss": 2.4998, "step": 518000 }, { "epoch": 1.5, "learning_rate": 4.9249856717765844e-05, "loss": 2.4783, "step": 518500 }, { "epoch": 1.5, "learning_rate": 4.9249133070118566e-05, "loss": 2.4817, "step": 519000 }, { "epoch": 1.5, "learning_rate": 4.924840942247129e-05, "loss": 2.5147, "step": 519500 }, { "epoch": 1.51, "learning_rate": 4.9247687222119304e-05, "loss": 2.4879, "step": 520000 }, { "epoch": 1.51, "learning_rate": 4.9246963574472026e-05, "loss": 2.4775, "step": 520500 }, { "epoch": 1.51, "learning_rate": 4.924623992682475e-05, "loss": 2.5011, "step": 521000 }, { "epoch": 1.51, "learning_rate": 4.924551627917748e-05, "loss": 2.4795, "step": 521500 }, { "epoch": 1.51, "learning_rate": 4.92447926315302e-05, "loss": 2.4879, "step": 522000 }, { "epoch": 1.51, "learning_rate": 4.924406898388292e-05, "loss": 2.4898, "step": 522500 }, { "epoch": 1.51, "learning_rate": 4.9243346783530944e-05, "loss": 2.4919, "step": 523000 }, { "epoch": 1.52, "learning_rate": 4.924262458317896e-05, "loss": 2.4747, "step": 523500 }, { "epoch": 1.52, "learning_rate": 4.924190093553168e-05, "loss": 2.5038, "step": 524000 }, { "epoch": 1.52, "learning_rate": 4.9241177287884404e-05, "loss": 2.482, "step": 524500 }, { "epoch": 1.52, "learning_rate": 4.9240453640237126e-05, "loss": 2.4987, "step": 525000 }, { "epoch": 1.52, "learning_rate": 4.923972999258985e-05, "loss": 2.5086, "step": 525500 }, { "epoch": 1.52, "learning_rate": 4.9239009239533164e-05, "loss": 2.4829, "step": 526000 }, { "epoch": 1.52, "learning_rate": 4.9238285591885886e-05, "loss": 2.495, "step": 526500 }, { "epoch": 1.53, "learning_rate": 4.923756194423861e-05, "loss": 2.4826, "step": 527000 }, { "epoch": 1.53, "learning_rate": 4.923683829659133e-05, "loss": 2.495, "step": 527500 }, { "epoch": 1.53, "learning_rate": 4.9236116096239346e-05, "loss": 2.5171, "step": 528000 }, { "epoch": 1.53, "learning_rate": 4.923539244859207e-05, "loss": 2.4922, "step": 528500 }, { "epoch": 1.53, "learning_rate": 4.92346688009448e-05, "loss": 2.5008, "step": 529000 }, { "epoch": 1.53, "learning_rate": 4.923394515329752e-05, "loss": 2.4647, "step": 529500 }, { "epoch": 1.53, "learning_rate": 4.923322150565025e-05, "loss": 2.5083, "step": 530000 }, { "epoch": 1.54, "learning_rate": 4.923249785800297e-05, "loss": 2.5027, "step": 530500 }, { "epoch": 1.54, "learning_rate": 4.923177421035569e-05, "loss": 2.4817, "step": 531000 }, { "epoch": 1.54, "learning_rate": 4.9231050562708416e-05, "loss": 2.4812, "step": 531500 }, { "epoch": 1.54, "learning_rate": 4.923032691506114e-05, "loss": 2.4984, "step": 532000 }, { "epoch": 1.54, "learning_rate": 4.922960326741386e-05, "loss": 2.4879, "step": 532500 }, { "epoch": 1.54, "learning_rate": 4.9228881067061876e-05, "loss": 2.4955, "step": 533000 }, { "epoch": 1.54, "learning_rate": 4.92281574194146e-05, "loss": 2.5079, "step": 533500 }, { "epoch": 1.55, "learning_rate": 4.922743377176732e-05, "loss": 2.494, "step": 534000 }, { "epoch": 1.55, "learning_rate": 4.922671012412005e-05, "loss": 2.484, "step": 534500 }, { "epoch": 1.55, "learning_rate": 4.922598647647277e-05, "loss": 2.4868, "step": 535000 }, { "epoch": 1.55, "learning_rate": 4.9225262828825493e-05, "loss": 2.4798, "step": 535500 }, { "epoch": 1.55, "learning_rate": 4.9224539181178216e-05, "loss": 2.4837, "step": 536000 }, { "epoch": 1.55, "learning_rate": 4.922381553353094e-05, "loss": 2.4827, "step": 536500 }, { "epoch": 1.55, "learning_rate": 4.922309188588367e-05, "loss": 2.4863, "step": 537000 }, { "epoch": 1.56, "learning_rate": 4.922236968553168e-05, "loss": 2.4668, "step": 537500 }, { "epoch": 1.56, "learning_rate": 4.9221646037884405e-05, "loss": 2.4885, "step": 538000 }, { "epoch": 1.56, "learning_rate": 4.922092239023713e-05, "loss": 2.4987, "step": 538500 }, { "epoch": 1.56, "learning_rate": 4.922019874258985e-05, "loss": 2.4963, "step": 539000 }, { "epoch": 1.56, "learning_rate": 4.921947509494257e-05, "loss": 2.4776, "step": 539500 }, { "epoch": 1.56, "learning_rate": 4.9218752894590594e-05, "loss": 2.5057, "step": 540000 }, { "epoch": 1.56, "learning_rate": 4.921803069423861e-05, "loss": 2.5057, "step": 540500 }, { "epoch": 1.57, "learning_rate": 4.921730704659133e-05, "loss": 2.4833, "step": 541000 }, { "epoch": 1.57, "learning_rate": 4.9216583398944054e-05, "loss": 2.5018, "step": 541500 }, { "epoch": 1.57, "learning_rate": 4.9215859751296776e-05, "loss": 2.4705, "step": 542000 }, { "epoch": 1.57, "learning_rate": 4.92151361036495e-05, "loss": 2.518, "step": 542500 }, { "epoch": 1.57, "learning_rate": 4.921441245600222e-05, "loss": 2.4667, "step": 543000 }, { "epoch": 1.57, "learning_rate": 4.921368880835495e-05, "loss": 2.4827, "step": 543500 }, { "epoch": 1.57, "learning_rate": 4.921296516070767e-05, "loss": 2.4745, "step": 544000 }, { "epoch": 1.58, "learning_rate": 4.92122415130604e-05, "loss": 2.4953, "step": 544500 }, { "epoch": 1.58, "learning_rate": 4.921151786541312e-05, "loss": 2.4967, "step": 545000 }, { "epoch": 1.58, "learning_rate": 4.9210794217765845e-05, "loss": 2.4962, "step": 545500 }, { "epoch": 1.58, "learning_rate": 4.921007057011857e-05, "loss": 2.5051, "step": 546000 }, { "epoch": 1.58, "learning_rate": 4.920934836976658e-05, "loss": 2.4943, "step": 546500 }, { "epoch": 1.58, "learning_rate": 4.9208624722119305e-05, "loss": 2.4863, "step": 547000 }, { "epoch": 1.58, "learning_rate": 4.920790107447203e-05, "loss": 2.4967, "step": 547500 }, { "epoch": 1.59, "learning_rate": 4.920717887412005e-05, "loss": 2.4824, "step": 548000 }, { "epoch": 1.59, "learning_rate": 4.9206456673768065e-05, "loss": 2.4566, "step": 548500 }, { "epoch": 1.59, "learning_rate": 4.920573302612079e-05, "loss": 2.5106, "step": 549000 }, { "epoch": 1.59, "learning_rate": 4.920500937847351e-05, "loss": 2.4874, "step": 549500 }, { "epoch": 1.59, "learning_rate": 4.920428573082623e-05, "loss": 2.4767, "step": 550000 }, { "epoch": 1.59, "learning_rate": 4.9203562083178954e-05, "loss": 2.4894, "step": 550500 }, { "epoch": 1.59, "learning_rate": 4.9202838435531677e-05, "loss": 2.4815, "step": 551000 }, { "epoch": 1.6, "learning_rate": 4.92021147878844e-05, "loss": 2.5012, "step": 551500 }, { "epoch": 1.6, "learning_rate": 4.920139114023713e-05, "loss": 2.5052, "step": 552000 }, { "epoch": 1.6, "learning_rate": 4.920066749258985e-05, "loss": 2.4719, "step": 552500 }, { "epoch": 1.6, "learning_rate": 4.919994384494257e-05, "loss": 2.5186, "step": 553000 }, { "epoch": 1.6, "learning_rate": 4.9199221644590595e-05, "loss": 2.4941, "step": 553500 }, { "epoch": 1.6, "learning_rate": 4.919849944423861e-05, "loss": 2.4848, "step": 554000 }, { "epoch": 1.61, "learning_rate": 4.919777579659133e-05, "loss": 2.4758, "step": 554500 }, { "epoch": 1.61, "learning_rate": 4.9197052148944055e-05, "loss": 2.4923, "step": 555000 }, { "epoch": 1.61, "learning_rate": 4.919632850129678e-05, "loss": 2.4876, "step": 555500 }, { "epoch": 1.61, "learning_rate": 4.91956048536495e-05, "loss": 2.4918, "step": 556000 }, { "epoch": 1.61, "learning_rate": 4.919488120600222e-05, "loss": 2.4641, "step": 556500 }, { "epoch": 1.61, "learning_rate": 4.919415755835495e-05, "loss": 2.4979, "step": 557000 }, { "epoch": 1.61, "learning_rate": 4.919343391070767e-05, "loss": 2.499, "step": 557500 }, { "epoch": 1.62, "learning_rate": 4.919271171035569e-05, "loss": 2.4795, "step": 558000 }, { "epoch": 1.62, "learning_rate": 4.919198806270841e-05, "loss": 2.4906, "step": 558500 }, { "epoch": 1.62, "learning_rate": 4.919126441506113e-05, "loss": 2.486, "step": 559000 }, { "epoch": 1.62, "learning_rate": 4.919054076741386e-05, "loss": 2.4906, "step": 559500 }, { "epoch": 1.62, "learning_rate": 4.9189817119766584e-05, "loss": 2.4984, "step": 560000 }, { "epoch": 1.62, "learning_rate": 4.91890949194146e-05, "loss": 2.4984, "step": 560500 }, { "epoch": 1.62, "learning_rate": 4.918837127176733e-05, "loss": 2.4949, "step": 561000 }, { "epoch": 1.63, "learning_rate": 4.918764762412005e-05, "loss": 2.4666, "step": 561500 }, { "epoch": 1.63, "learning_rate": 4.918692397647277e-05, "loss": 2.4936, "step": 562000 }, { "epoch": 1.63, "learning_rate": 4.9186200328825495e-05, "loss": 2.4996, "step": 562500 }, { "epoch": 1.63, "learning_rate": 4.918547668117822e-05, "loss": 2.5063, "step": 563000 }, { "epoch": 1.63, "learning_rate": 4.918475448082623e-05, "loss": 2.4814, "step": 563500 }, { "epoch": 1.63, "learning_rate": 4.9184030833178955e-05, "loss": 2.4874, "step": 564000 }, { "epoch": 1.63, "learning_rate": 4.918330718553168e-05, "loss": 2.4583, "step": 564500 }, { "epoch": 1.64, "learning_rate": 4.91825835378844e-05, "loss": 2.4563, "step": 565000 }, { "epoch": 1.64, "learning_rate": 4.918185989023713e-05, "loss": 2.4901, "step": 565500 }, { "epoch": 1.64, "learning_rate": 4.9181137689885144e-05, "loss": 2.4948, "step": 566000 }, { "epoch": 1.64, "learning_rate": 4.9180414042237866e-05, "loss": 2.5025, "step": 566500 }, { "epoch": 1.64, "learning_rate": 4.9179690394590595e-05, "loss": 2.4818, "step": 567000 }, { "epoch": 1.64, "learning_rate": 4.917896674694332e-05, "loss": 2.502, "step": 567500 }, { "epoch": 1.64, "learning_rate": 4.917824309929604e-05, "loss": 2.5016, "step": 568000 }, { "epoch": 1.65, "learning_rate": 4.9177520898944055e-05, "loss": 2.4501, "step": 568500 }, { "epoch": 1.65, "learning_rate": 4.917679725129678e-05, "loss": 2.4832, "step": 569000 }, { "epoch": 1.65, "learning_rate": 4.91760736036495e-05, "loss": 2.4943, "step": 569500 }, { "epoch": 1.65, "learning_rate": 4.917534995600223e-05, "loss": 2.4702, "step": 570000 }, { "epoch": 1.65, "learning_rate": 4.917462630835495e-05, "loss": 2.4958, "step": 570500 }, { "epoch": 1.65, "learning_rate": 4.917390266070767e-05, "loss": 2.4763, "step": 571000 }, { "epoch": 1.65, "learning_rate": 4.9173179013060396e-05, "loss": 2.4955, "step": 571500 }, { "epoch": 1.66, "learning_rate": 4.917245536541312e-05, "loss": 2.4843, "step": 572000 }, { "epoch": 1.66, "learning_rate": 4.917173316506113e-05, "loss": 2.5, "step": 572500 }, { "epoch": 1.66, "learning_rate": 4.917101241200445e-05, "loss": 2.4998, "step": 573000 }, { "epoch": 1.66, "learning_rate": 4.917028876435717e-05, "loss": 2.5153, "step": 573500 }, { "epoch": 1.66, "learning_rate": 4.9169565116709893e-05, "loss": 2.4344, "step": 574000 }, { "epoch": 1.66, "learning_rate": 4.916884146906262e-05, "loss": 2.4757, "step": 574500 }, { "epoch": 1.66, "learning_rate": 4.9168117821415345e-05, "loss": 2.4799, "step": 575000 }, { "epoch": 1.67, "learning_rate": 4.916739562106336e-05, "loss": 2.4955, "step": 575500 }, { "epoch": 1.67, "learning_rate": 4.916667197341608e-05, "loss": 2.4672, "step": 576000 }, { "epoch": 1.67, "learning_rate": 4.9165948325768805e-05, "loss": 2.4709, "step": 576500 }, { "epoch": 1.67, "learning_rate": 4.916522467812153e-05, "loss": 2.4897, "step": 577000 }, { "epoch": 1.67, "learning_rate": 4.9164501030474256e-05, "loss": 2.4983, "step": 577500 }, { "epoch": 1.67, "learning_rate": 4.916377738282698e-05, "loss": 2.4891, "step": 578000 }, { "epoch": 1.67, "learning_rate": 4.91630537351797e-05, "loss": 2.4642, "step": 578500 }, { "epoch": 1.68, "learning_rate": 4.916233008753242e-05, "loss": 2.4995, "step": 579000 }, { "epoch": 1.68, "learning_rate": 4.9161606439885145e-05, "loss": 2.4767, "step": 579500 }, { "epoch": 1.68, "learning_rate": 4.916088279223787e-05, "loss": 2.4562, "step": 580000 }, { "epoch": 1.68, "learning_rate": 4.916015914459059e-05, "loss": 2.4799, "step": 580500 }, { "epoch": 1.68, "learning_rate": 4.915943549694331e-05, "loss": 2.489, "step": 581000 }, { "epoch": 1.68, "learning_rate": 4.9158711849296034e-05, "loss": 2.4999, "step": 581500 }, { "epoch": 1.68, "learning_rate": 4.915799109623935e-05, "loss": 2.5148, "step": 582000 }, { "epoch": 1.69, "learning_rate": 4.915726744859208e-05, "loss": 2.476, "step": 582500 }, { "epoch": 1.69, "learning_rate": 4.91565438009448e-05, "loss": 2.4783, "step": 583000 }, { "epoch": 1.69, "learning_rate": 4.915582015329752e-05, "loss": 2.4555, "step": 583500 }, { "epoch": 1.69, "learning_rate": 4.915509795294554e-05, "loss": 2.4898, "step": 584000 }, { "epoch": 1.69, "learning_rate": 4.915437430529826e-05, "loss": 2.4522, "step": 584500 }, { "epoch": 1.69, "learning_rate": 4.9153652104946276e-05, "loss": 2.4888, "step": 585000 }, { "epoch": 1.69, "learning_rate": 4.9152928457299005e-05, "loss": 2.5128, "step": 585500 }, { "epoch": 1.7, "learning_rate": 4.915220480965173e-05, "loss": 2.5045, "step": 586000 }, { "epoch": 1.7, "learning_rate": 4.915148116200445e-05, "loss": 2.4815, "step": 586500 }, { "epoch": 1.7, "learning_rate": 4.915075751435717e-05, "loss": 2.4852, "step": 587000 }, { "epoch": 1.7, "learning_rate": 4.9150033866709894e-05, "loss": 2.4794, "step": 587500 }, { "epoch": 1.7, "learning_rate": 4.9149310219062616e-05, "loss": 2.4932, "step": 588000 }, { "epoch": 1.7, "learning_rate": 4.914858657141534e-05, "loss": 2.5042, "step": 588500 }, { "epoch": 1.7, "learning_rate": 4.9147864371063354e-05, "loss": 2.4954, "step": 589000 }, { "epoch": 1.71, "learning_rate": 4.9147140723416077e-05, "loss": 2.4832, "step": 589500 }, { "epoch": 1.71, "learning_rate": 4.9146417075768806e-05, "loss": 2.5055, "step": 590000 }, { "epoch": 1.71, "learning_rate": 4.914569342812153e-05, "loss": 2.5143, "step": 590500 }, { "epoch": 1.71, "learning_rate": 4.914497122776955e-05, "loss": 2.4649, "step": 591000 }, { "epoch": 1.71, "learning_rate": 4.9144249027417566e-05, "loss": 2.4511, "step": 591500 }, { "epoch": 1.71, "learning_rate": 4.914352537977029e-05, "loss": 2.4861, "step": 592000 }, { "epoch": 1.72, "learning_rate": 4.914280173212301e-05, "loss": 2.4928, "step": 592500 }, { "epoch": 1.72, "learning_rate": 4.914207808447573e-05, "loss": 2.4933, "step": 593000 }, { "epoch": 1.72, "learning_rate": 4.9141354436828455e-05, "loss": 2.4866, "step": 593500 }, { "epoch": 1.72, "learning_rate": 4.914063223647648e-05, "loss": 2.48, "step": 594000 }, { "epoch": 1.72, "learning_rate": 4.913991003612449e-05, "loss": 2.4971, "step": 594500 }, { "epoch": 1.72, "learning_rate": 4.9139186388477215e-05, "loss": 2.4701, "step": 595000 }, { "epoch": 1.72, "learning_rate": 4.913846274082994e-05, "loss": 2.4785, "step": 595500 }, { "epoch": 1.73, "learning_rate": 4.913773909318266e-05, "loss": 2.4776, "step": 596000 }, { "epoch": 1.73, "learning_rate": 4.913701544553538e-05, "loss": 2.4903, "step": 596500 }, { "epoch": 1.73, "learning_rate": 4.9136291797888104e-05, "loss": 2.4744, "step": 597000 }, { "epoch": 1.73, "learning_rate": 4.9135569597536126e-05, "loss": 2.4594, "step": 597500 }, { "epoch": 1.73, "learning_rate": 4.9134845949888855e-05, "loss": 2.4831, "step": 598000 }, { "epoch": 1.73, "learning_rate": 4.913412230224158e-05, "loss": 2.4806, "step": 598500 }, { "epoch": 1.73, "learning_rate": 4.91333986545943e-05, "loss": 2.5012, "step": 599000 }, { "epoch": 1.74, "learning_rate": 4.913267500694702e-05, "loss": 2.4777, "step": 599500 }, { "epoch": 1.74, "learning_rate": 4.9131951359299744e-05, "loss": 2.4872, "step": 600000 }, { "epoch": 1.74, "learning_rate": 4.9131227711652466e-05, "loss": 2.4898, "step": 600500 }, { "epoch": 1.74, "learning_rate": 4.913050406400519e-05, "loss": 2.5102, "step": 601000 }, { "epoch": 1.74, "learning_rate": 4.912978041635791e-05, "loss": 2.5119, "step": 601500 }, { "epoch": 1.74, "learning_rate": 4.912905676871063e-05, "loss": 2.4605, "step": 602000 }, { "epoch": 1.74, "learning_rate": 4.9128334568358655e-05, "loss": 2.5017, "step": 602500 }, { "epoch": 1.75, "learning_rate": 4.912761092071138e-05, "loss": 2.4838, "step": 603000 }, { "epoch": 1.75, "learning_rate": 4.91268872730641e-05, "loss": 2.4753, "step": 603500 }, { "epoch": 1.75, "learning_rate": 4.912616362541682e-05, "loss": 2.502, "step": 604000 }, { "epoch": 1.75, "learning_rate": 4.9125439977769544e-05, "loss": 2.4865, "step": 604500 }, { "epoch": 1.75, "learning_rate": 4.912471633012227e-05, "loss": 2.5068, "step": 605000 }, { "epoch": 1.75, "learning_rate": 4.9123992682474995e-05, "loss": 2.4926, "step": 605500 }, { "epoch": 1.75, "learning_rate": 4.912327048212301e-05, "loss": 2.5047, "step": 606000 }, { "epoch": 1.76, "learning_rate": 4.912254683447573e-05, "loss": 2.4919, "step": 606500 }, { "epoch": 1.76, "learning_rate": 4.9121824634123755e-05, "loss": 2.4874, "step": 607000 }, { "epoch": 1.76, "learning_rate": 4.912110098647648e-05, "loss": 2.4928, "step": 607500 }, { "epoch": 1.76, "learning_rate": 4.91203773388292e-05, "loss": 2.4781, "step": 608000 }, { "epoch": 1.76, "learning_rate": 4.911965369118192e-05, "loss": 2.4617, "step": 608500 }, { "epoch": 1.76, "learning_rate": 4.9118930043534644e-05, "loss": 2.4693, "step": 609000 }, { "epoch": 1.76, "learning_rate": 4.911820639588737e-05, "loss": 2.4976, "step": 609500 }, { "epoch": 1.77, "learning_rate": 4.911748274824009e-05, "loss": 2.4636, "step": 610000 }, { "epoch": 1.77, "learning_rate": 4.911675910059281e-05, "loss": 2.4877, "step": 610500 }, { "epoch": 1.77, "learning_rate": 4.911603545294553e-05, "loss": 2.4891, "step": 611000 }, { "epoch": 1.77, "learning_rate": 4.9115313252593556e-05, "loss": 2.4957, "step": 611500 }, { "epoch": 1.77, "learning_rate": 4.911458960494628e-05, "loss": 2.5055, "step": 612000 }, { "epoch": 1.77, "learning_rate": 4.911386595729901e-05, "loss": 2.5022, "step": 612500 }, { "epoch": 1.77, "learning_rate": 4.911314230965173e-05, "loss": 2.4679, "step": 613000 }, { "epoch": 1.78, "learning_rate": 4.911241866200445e-05, "loss": 2.4918, "step": 613500 }, { "epoch": 1.78, "learning_rate": 4.911169646165247e-05, "loss": 2.483, "step": 614000 }, { "epoch": 1.78, "learning_rate": 4.911097281400519e-05, "loss": 2.4976, "step": 614500 }, { "epoch": 1.78, "learning_rate": 4.911024916635791e-05, "loss": 2.4811, "step": 615000 }, { "epoch": 1.78, "learning_rate": 4.9109526966005934e-05, "loss": 2.468, "step": 615500 }, { "epoch": 1.78, "learning_rate": 4.9108803318358656e-05, "loss": 2.481, "step": 616000 }, { "epoch": 1.78, "learning_rate": 4.910807967071138e-05, "loss": 2.4925, "step": 616500 }, { "epoch": 1.79, "learning_rate": 4.91073560230641e-05, "loss": 2.4752, "step": 617000 }, { "epoch": 1.79, "learning_rate": 4.910663237541682e-05, "loss": 2.4989, "step": 617500 }, { "epoch": 1.79, "learning_rate": 4.9105908727769545e-05, "loss": 2.4888, "step": 618000 }, { "epoch": 1.79, "learning_rate": 4.910518508012227e-05, "loss": 2.4675, "step": 618500 }, { "epoch": 1.79, "learning_rate": 4.910446143247499e-05, "loss": 2.4727, "step": 619000 }, { "epoch": 1.79, "learning_rate": 4.910373778482771e-05, "loss": 2.4667, "step": 619500 }, { "epoch": 1.79, "learning_rate": 4.910301413718044e-05, "loss": 2.4961, "step": 620000 }, { "epoch": 1.8, "learning_rate": 4.910229048953316e-05, "loss": 2.4695, "step": 620500 }, { "epoch": 1.8, "learning_rate": 4.9101566841885885e-05, "loss": 2.4772, "step": 621000 }, { "epoch": 1.8, "learning_rate": 4.910084464153391e-05, "loss": 2.476, "step": 621500 }, { "epoch": 1.8, "learning_rate": 4.910012099388663e-05, "loss": 2.492, "step": 622000 }, { "epoch": 1.8, "learning_rate": 4.909939734623935e-05, "loss": 2.4669, "step": 622500 }, { "epoch": 1.8, "learning_rate": 4.9098673698592074e-05, "loss": 2.4866, "step": 623000 }, { "epoch": 1.8, "learning_rate": 4.9097950050944796e-05, "loss": 2.4787, "step": 623500 }, { "epoch": 1.81, "learning_rate": 4.909722785059281e-05, "loss": 2.4945, "step": 624000 }, { "epoch": 1.81, "learning_rate": 4.9096504202945534e-05, "loss": 2.4766, "step": 624500 }, { "epoch": 1.81, "learning_rate": 4.9095780555298256e-05, "loss": 2.488, "step": 625000 }, { "epoch": 1.81, "learning_rate": 4.9095056907650985e-05, "loss": 2.4855, "step": 625500 }, { "epoch": 1.81, "learning_rate": 4.909433326000371e-05, "loss": 2.4832, "step": 626000 }, { "epoch": 1.81, "learning_rate": 4.909360961235643e-05, "loss": 2.4698, "step": 626500 }, { "epoch": 1.81, "learning_rate": 4.909288885929974e-05, "loss": 2.4649, "step": 627000 }, { "epoch": 1.82, "learning_rate": 4.909216521165247e-05, "loss": 2.4727, "step": 627500 }, { "epoch": 1.82, "learning_rate": 4.909144156400519e-05, "loss": 2.5006, "step": 628000 }, { "epoch": 1.82, "learning_rate": 4.909071791635791e-05, "loss": 2.4958, "step": 628500 }, { "epoch": 1.82, "learning_rate": 4.9089994268710634e-05, "loss": 2.4839, "step": 629000 }, { "epoch": 1.82, "learning_rate": 4.9089270621063363e-05, "loss": 2.4677, "step": 629500 }, { "epoch": 1.82, "learning_rate": 4.9088546973416086e-05, "loss": 2.5128, "step": 630000 }, { "epoch": 1.83, "learning_rate": 4.908782332576881e-05, "loss": 2.4619, "step": 630500 }, { "epoch": 1.83, "learning_rate": 4.908709967812153e-05, "loss": 2.4832, "step": 631000 }, { "epoch": 1.83, "learning_rate": 4.908637603047425e-05, "loss": 2.4842, "step": 631500 }, { "epoch": 1.83, "learning_rate": 4.9085652382826975e-05, "loss": 2.4805, "step": 632000 }, { "epoch": 1.83, "learning_rate": 4.90849287351797e-05, "loss": 2.4968, "step": 632500 }, { "epoch": 1.83, "learning_rate": 4.908420653482771e-05, "loss": 2.474, "step": 633000 }, { "epoch": 1.83, "learning_rate": 4.9083482887180435e-05, "loss": 2.4662, "step": 633500 }, { "epoch": 1.84, "learning_rate": 4.9082759239533164e-05, "loss": 2.4548, "step": 634000 }, { "epoch": 1.84, "learning_rate": 4.9082035591885886e-05, "loss": 2.4702, "step": 634500 }, { "epoch": 1.84, "learning_rate": 4.9081311944238615e-05, "loss": 2.4821, "step": 635000 }, { "epoch": 1.84, "learning_rate": 4.908058829659134e-05, "loss": 2.4687, "step": 635500 }, { "epoch": 1.84, "learning_rate": 4.907986609623935e-05, "loss": 2.4818, "step": 636000 }, { "epoch": 1.84, "learning_rate": 4.9079142448592075e-05, "loss": 2.4792, "step": 636500 }, { "epoch": 1.84, "learning_rate": 4.90784188009448e-05, "loss": 2.4861, "step": 637000 }, { "epoch": 1.85, "learning_rate": 4.907769515329752e-05, "loss": 2.4768, "step": 637500 }, { "epoch": 1.85, "learning_rate": 4.907697150565024e-05, "loss": 2.4473, "step": 638000 }, { "epoch": 1.85, "learning_rate": 4.9076247858002964e-05, "loss": 2.4703, "step": 638500 }, { "epoch": 1.85, "learning_rate": 4.9075524210355686e-05, "loss": 2.4754, "step": 639000 }, { "epoch": 1.85, "learning_rate": 4.9074800562708415e-05, "loss": 2.4752, "step": 639500 }, { "epoch": 1.85, "learning_rate": 4.9074079809651724e-05, "loss": 2.4809, "step": 640000 }, { "epoch": 1.85, "learning_rate": 4.9073356162004446e-05, "loss": 2.4762, "step": 640500 }, { "epoch": 1.86, "learning_rate": 4.907263251435717e-05, "loss": 2.4845, "step": 641000 }, { "epoch": 1.86, "learning_rate": 4.907190886670989e-05, "loss": 2.4835, "step": 641500 }, { "epoch": 1.86, "learning_rate": 4.907118666635791e-05, "loss": 2.4753, "step": 642000 }, { "epoch": 1.86, "learning_rate": 4.9070463018710635e-05, "loss": 2.5147, "step": 642500 }, { "epoch": 1.86, "learning_rate": 4.9069739371063364e-05, "loss": 2.4759, "step": 643000 }, { "epoch": 1.86, "learning_rate": 4.9069015723416086e-05, "loss": 2.4877, "step": 643500 }, { "epoch": 1.86, "learning_rate": 4.906829207576881e-05, "loss": 2.4614, "step": 644000 }, { "epoch": 1.87, "learning_rate": 4.906756842812153e-05, "loss": 2.4912, "step": 644500 }, { "epoch": 1.87, "learning_rate": 4.906684478047425e-05, "loss": 2.4848, "step": 645000 }, { "epoch": 1.87, "learning_rate": 4.9066121132826975e-05, "loss": 2.4814, "step": 645500 }, { "epoch": 1.87, "learning_rate": 4.90653974851797e-05, "loss": 2.4908, "step": 646000 }, { "epoch": 1.87, "learning_rate": 4.906467528482771e-05, "loss": 2.4888, "step": 646500 }, { "epoch": 1.87, "learning_rate": 4.9063953084475736e-05, "loss": 2.4565, "step": 647000 }, { "epoch": 1.87, "learning_rate": 4.906322943682846e-05, "loss": 2.4566, "step": 647500 }, { "epoch": 1.88, "learning_rate": 4.906250578918118e-05, "loss": 2.4845, "step": 648000 }, { "epoch": 1.88, "learning_rate": 4.90617821415339e-05, "loss": 2.4821, "step": 648500 }, { "epoch": 1.88, "learning_rate": 4.9061058493886624e-05, "loss": 2.4841, "step": 649000 }, { "epoch": 1.88, "learning_rate": 4.906033484623935e-05, "loss": 2.4614, "step": 649500 }, { "epoch": 1.88, "learning_rate": 4.9059611198592076e-05, "loss": 2.5012, "step": 650000 }, { "epoch": 1.88, "learning_rate": 4.90588875509448e-05, "loss": 2.4752, "step": 650500 }, { "epoch": 1.88, "learning_rate": 4.905816390329752e-05, "loss": 2.4913, "step": 651000 }, { "epoch": 1.89, "learning_rate": 4.9057441702945536e-05, "loss": 2.4803, "step": 651500 }, { "epoch": 1.89, "learning_rate": 4.905671950259356e-05, "loss": 2.4744, "step": 652000 }, { "epoch": 1.89, "learning_rate": 4.905599585494628e-05, "loss": 2.4641, "step": 652500 }, { "epoch": 1.89, "learning_rate": 4.9055272207299e-05, "loss": 2.4697, "step": 653000 }, { "epoch": 1.89, "learning_rate": 4.9054548559651725e-05, "loss": 2.4737, "step": 653500 }, { "epoch": 1.89, "learning_rate": 4.905382491200445e-05, "loss": 2.4853, "step": 654000 }, { "epoch": 1.89, "learning_rate": 4.905310126435717e-05, "loss": 2.4855, "step": 654500 }, { "epoch": 1.9, "learning_rate": 4.905237761670989e-05, "loss": 2.4808, "step": 655000 }, { "epoch": 1.9, "learning_rate": 4.9051655416357914e-05, "loss": 2.456, "step": 655500 }, { "epoch": 1.9, "learning_rate": 4.9050931768710636e-05, "loss": 2.4938, "step": 656000 }, { "epoch": 1.9, "learning_rate": 4.905020812106336e-05, "loss": 2.4884, "step": 656500 }, { "epoch": 1.9, "learning_rate": 4.904948447341608e-05, "loss": 2.5071, "step": 657000 }, { "epoch": 1.9, "learning_rate": 4.904876082576881e-05, "loss": 2.4802, "step": 657500 }, { "epoch": 1.9, "learning_rate": 4.904803717812153e-05, "loss": 2.4809, "step": 658000 }, { "epoch": 1.91, "learning_rate": 4.9047313530474254e-05, "loss": 2.4812, "step": 658500 }, { "epoch": 1.91, "learning_rate": 4.9046589882826976e-05, "loss": 2.4638, "step": 659000 }, { "epoch": 1.91, "learning_rate": 4.90458662351797e-05, "loss": 2.4786, "step": 659500 }, { "epoch": 1.91, "learning_rate": 4.9045144034827714e-05, "loss": 2.4768, "step": 660000 }, { "epoch": 1.91, "learning_rate": 4.904442038718044e-05, "loss": 2.4629, "step": 660500 }, { "epoch": 1.91, "learning_rate": 4.9043696739533165e-05, "loss": 2.484, "step": 661000 }, { "epoch": 1.91, "learning_rate": 4.904297309188589e-05, "loss": 2.4887, "step": 661500 }, { "epoch": 1.92, "learning_rate": 4.90422508915339e-05, "loss": 2.4757, "step": 662000 }, { "epoch": 1.92, "learning_rate": 4.9041527243886625e-05, "loss": 2.4783, "step": 662500 }, { "epoch": 1.92, "learning_rate": 4.904080359623935e-05, "loss": 2.462, "step": 663000 }, { "epoch": 1.92, "learning_rate": 4.904007994859207e-05, "loss": 2.4724, "step": 663500 }, { "epoch": 1.92, "learning_rate": 4.903935630094479e-05, "loss": 2.4859, "step": 664000 }, { "epoch": 1.92, "learning_rate": 4.903863265329752e-05, "loss": 2.4667, "step": 664500 }, { "epoch": 1.92, "learning_rate": 4.903790900565024e-05, "loss": 2.4517, "step": 665000 }, { "epoch": 1.93, "learning_rate": 4.9037185358002965e-05, "loss": 2.5009, "step": 665500 }, { "epoch": 1.93, "learning_rate": 4.903646315765099e-05, "loss": 2.4788, "step": 666000 }, { "epoch": 1.93, "learning_rate": 4.903573951000371e-05, "loss": 2.4601, "step": 666500 }, { "epoch": 1.93, "learning_rate": 4.903501586235643e-05, "loss": 2.4836, "step": 667000 }, { "epoch": 1.93, "learning_rate": 4.9034292214709154e-05, "loss": 2.4827, "step": 667500 }, { "epoch": 1.93, "learning_rate": 4.903356856706188e-05, "loss": 2.4738, "step": 668000 }, { "epoch": 1.94, "learning_rate": 4.90328449194146e-05, "loss": 2.4936, "step": 668500 }, { "epoch": 1.94, "learning_rate": 4.903212127176732e-05, "loss": 2.4621, "step": 669000 }, { "epoch": 1.94, "learning_rate": 4.9031397624120043e-05, "loss": 2.494, "step": 669500 }, { "epoch": 1.94, "learning_rate": 4.9030675423768066e-05, "loss": 2.4628, "step": 670000 }, { "epoch": 1.94, "learning_rate": 4.902995322341608e-05, "loss": 2.4776, "step": 670500 }, { "epoch": 1.94, "learning_rate": 4.9029229575768804e-05, "loss": 2.4852, "step": 671000 }, { "epoch": 1.94, "learning_rate": 4.9028505928121526e-05, "loss": 2.4855, "step": 671500 }, { "epoch": 1.95, "learning_rate": 4.9027782280474255e-05, "loss": 2.4762, "step": 672000 }, { "epoch": 1.95, "learning_rate": 4.902705863282698e-05, "loss": 2.4773, "step": 672500 }, { "epoch": 1.95, "learning_rate": 4.90263349851797e-05, "loss": 2.4942, "step": 673000 }, { "epoch": 1.95, "learning_rate": 4.902561133753242e-05, "loss": 2.4752, "step": 673500 }, { "epoch": 1.95, "learning_rate": 4.9024889137180444e-05, "loss": 2.4601, "step": 674000 }, { "epoch": 1.95, "learning_rate": 4.9024165489533166e-05, "loss": 2.4394, "step": 674500 }, { "epoch": 1.95, "learning_rate": 4.902344184188589e-05, "loss": 2.4907, "step": 675000 }, { "epoch": 1.96, "learning_rate": 4.902271819423861e-05, "loss": 2.4857, "step": 675500 }, { "epoch": 1.96, "learning_rate": 4.902199454659133e-05, "loss": 2.4761, "step": 676000 }, { "epoch": 1.96, "learning_rate": 4.9021270898944055e-05, "loss": 2.4873, "step": 676500 }, { "epoch": 1.96, "learning_rate": 4.902054725129678e-05, "loss": 2.4789, "step": 677000 }, { "epoch": 1.96, "learning_rate": 4.901982505094479e-05, "loss": 2.4626, "step": 677500 }, { "epoch": 1.96, "learning_rate": 4.9019102850592815e-05, "loss": 2.4559, "step": 678000 }, { "epoch": 1.96, "learning_rate": 4.901837920294554e-05, "loss": 2.5011, "step": 678500 }, { "epoch": 1.97, "learning_rate": 4.901765555529826e-05, "loss": 2.4815, "step": 679000 }, { "epoch": 1.97, "learning_rate": 4.901693190765098e-05, "loss": 2.4702, "step": 679500 }, { "epoch": 1.97, "learning_rate": 4.9016209707299004e-05, "loss": 2.484, "step": 680000 }, { "epoch": 1.97, "learning_rate": 4.9015486059651726e-05, "loss": 2.4513, "step": 680500 }, { "epoch": 1.97, "learning_rate": 4.901476241200445e-05, "loss": 2.4704, "step": 681000 }, { "epoch": 1.97, "learning_rate": 4.901403876435717e-05, "loss": 2.4706, "step": 681500 }, { "epoch": 1.97, "learning_rate": 4.901331511670989e-05, "loss": 2.4342, "step": 682000 }, { "epoch": 1.98, "learning_rate": 4.9012591469062615e-05, "loss": 2.4872, "step": 682500 }, { "epoch": 1.98, "learning_rate": 4.9011867821415344e-05, "loss": 2.4518, "step": 683000 }, { "epoch": 1.98, "learning_rate": 4.9011144173768067e-05, "loss": 2.4546, "step": 683500 }, { "epoch": 1.98, "learning_rate": 4.901042052612079e-05, "loss": 2.4902, "step": 684000 }, { "epoch": 1.98, "learning_rate": 4.9009698325768804e-05, "loss": 2.4703, "step": 684500 }, { "epoch": 1.98, "learning_rate": 4.9008974678121527e-05, "loss": 2.4694, "step": 685000 }, { "epoch": 1.98, "learning_rate": 4.900825103047425e-05, "loss": 2.4974, "step": 685500 }, { "epoch": 1.99, "learning_rate": 4.900752738282697e-05, "loss": 2.4718, "step": 686000 }, { "epoch": 1.99, "learning_rate": 4.900680373517969e-05, "loss": 2.4765, "step": 686500 }, { "epoch": 1.99, "learning_rate": 4.900608008753242e-05, "loss": 2.4749, "step": 687000 }, { "epoch": 1.99, "learning_rate": 4.9005356439885145e-05, "loss": 2.4696, "step": 687500 }, { "epoch": 1.99, "learning_rate": 4.900463279223787e-05, "loss": 2.4695, "step": 688000 }, { "epoch": 1.99, "learning_rate": 4.900391059188589e-05, "loss": 2.4779, "step": 688500 }, { "epoch": 1.99, "learning_rate": 4.900318694423861e-05, "loss": 2.4661, "step": 689000 }, { "epoch": 2.0, "learning_rate": 4.900246474388663e-05, "loss": 2.4856, "step": 689500 }, { "epoch": 2.0, "learning_rate": 4.900174109623935e-05, "loss": 2.4432, "step": 690000 }, { "epoch": 2.0, "learning_rate": 4.900101744859207e-05, "loss": 2.4816, "step": 690500 }, { "epoch": 2.0, "eval_accuracy": 0.631185162487384, "eval_accuracy_mlm": 0.5930999275902505, "eval_accuracy_nsp": 0.8354805149569418, "eval_loss": 2.435854196548462, "eval_runtime": 330.8554, "eval_samples_per_second": 1318.963, "eval_steps_per_second": 54.958, "step": 690944 }, { "epoch": 2.0, "learning_rate": 4.9000293800944794e-05, "loss": 2.4789, "step": 691000 }, { "epoch": 2.0, "learning_rate": 4.899957015329752e-05, "loss": 2.4598, "step": 691500 }, { "epoch": 2.0, "learning_rate": 4.8998846505650245e-05, "loss": 2.454, "step": 692000 }, { "epoch": 2.0, "learning_rate": 4.899812430529826e-05, "loss": 2.4464, "step": 692500 }, { "epoch": 2.01, "learning_rate": 4.899740065765098e-05, "loss": 2.4518, "step": 693000 }, { "epoch": 2.01, "learning_rate": 4.8996677010003705e-05, "loss": 2.4586, "step": 693500 }, { "epoch": 2.01, "learning_rate": 4.899595336235643e-05, "loss": 2.461, "step": 694000 }, { "epoch": 2.01, "learning_rate": 4.8995229714709156e-05, "loss": 2.4471, "step": 694500 }, { "epoch": 2.01, "learning_rate": 4.899450606706188e-05, "loss": 2.4592, "step": 695000 }, { "epoch": 2.01, "learning_rate": 4.89937824194146e-05, "loss": 2.4399, "step": 695500 }, { "epoch": 2.01, "learning_rate": 4.899305877176732e-05, "loss": 2.4515, "step": 696000 }, { "epoch": 2.02, "learning_rate": 4.8992336571415345e-05, "loss": 2.4645, "step": 696500 }, { "epoch": 2.02, "learning_rate": 4.899161292376807e-05, "loss": 2.4637, "step": 697000 }, { "epoch": 2.02, "learning_rate": 4.899088927612079e-05, "loss": 2.4408, "step": 697500 }, { "epoch": 2.02, "learning_rate": 4.899016562847351e-05, "loss": 2.4525, "step": 698000 }, { "epoch": 2.02, "learning_rate": 4.8989441980826234e-05, "loss": 2.4656, "step": 698500 }, { "epoch": 2.02, "learning_rate": 4.8988718333178956e-05, "loss": 2.4563, "step": 699000 }, { "epoch": 2.02, "learning_rate": 4.898799468553168e-05, "loss": 2.4592, "step": 699500 }, { "epoch": 2.03, "learning_rate": 4.89872710378844e-05, "loss": 2.4711, "step": 700000 }, { "epoch": 2.03, "learning_rate": 4.898654739023712e-05, "loss": 2.4476, "step": 700500 }, { "epoch": 2.03, "learning_rate": 4.8985823742589845e-05, "loss": 2.4665, "step": 701000 }, { "epoch": 2.03, "learning_rate": 4.898510154223787e-05, "loss": 2.4621, "step": 701500 }, { "epoch": 2.03, "learning_rate": 4.8984377894590597e-05, "loss": 2.4613, "step": 702000 }, { "epoch": 2.03, "learning_rate": 4.898365424694332e-05, "loss": 2.4539, "step": 702500 }, { "epoch": 2.03, "learning_rate": 4.898293059929604e-05, "loss": 2.4689, "step": 703000 }, { "epoch": 2.04, "learning_rate": 4.898220695164876e-05, "loss": 2.4776, "step": 703500 }, { "epoch": 2.04, "learning_rate": 4.8981483304001485e-05, "loss": 2.4484, "step": 704000 }, { "epoch": 2.04, "learning_rate": 4.89807611036495e-05, "loss": 2.4657, "step": 704500 }, { "epoch": 2.04, "learning_rate": 4.898003745600222e-05, "loss": 2.4293, "step": 705000 }, { "epoch": 2.04, "learning_rate": 4.8979313808354945e-05, "loss": 2.4566, "step": 705500 }, { "epoch": 2.04, "learning_rate": 4.8978590160707675e-05, "loss": 2.4353, "step": 706000 }, { "epoch": 2.05, "learning_rate": 4.897786940765098e-05, "loss": 2.4369, "step": 706500 }, { "epoch": 2.05, "learning_rate": 4.8977145760003706e-05, "loss": 2.4478, "step": 707000 }, { "epoch": 2.05, "learning_rate": 4.897642211235643e-05, "loss": 2.4426, "step": 707500 }, { "epoch": 2.05, "learning_rate": 4.897569846470915e-05, "loss": 2.4564, "step": 708000 }, { "epoch": 2.05, "learning_rate": 4.897497481706187e-05, "loss": 2.464, "step": 708500 }, { "epoch": 2.05, "learning_rate": 4.8974251169414595e-05, "loss": 2.4649, "step": 709000 }, { "epoch": 2.05, "learning_rate": 4.8973527521767324e-05, "loss": 2.4279, "step": 709500 }, { "epoch": 2.06, "learning_rate": 4.8972803874120046e-05, "loss": 2.4731, "step": 710000 }, { "epoch": 2.06, "learning_rate": 4.8972080226472775e-05, "loss": 2.4758, "step": 710500 }, { "epoch": 2.06, "learning_rate": 4.89713565788255e-05, "loss": 2.4639, "step": 711000 }, { "epoch": 2.06, "learning_rate": 4.897063293117822e-05, "loss": 2.4463, "step": 711500 }, { "epoch": 2.06, "learning_rate": 4.896990928353094e-05, "loss": 2.4581, "step": 712000 }, { "epoch": 2.06, "learning_rate": 4.896918708317896e-05, "loss": 2.4639, "step": 712500 }, { "epoch": 2.06, "learning_rate": 4.896846343553168e-05, "loss": 2.4564, "step": 713000 }, { "epoch": 2.07, "learning_rate": 4.89677397878844e-05, "loss": 2.4583, "step": 713500 }, { "epoch": 2.07, "learning_rate": 4.8967016140237124e-05, "loss": 2.4617, "step": 714000 }, { "epoch": 2.07, "learning_rate": 4.8966292492589846e-05, "loss": 2.4357, "step": 714500 }, { "epoch": 2.07, "learning_rate": 4.8965568844942575e-05, "loss": 2.4506, "step": 715000 }, { "epoch": 2.07, "learning_rate": 4.89648451972953e-05, "loss": 2.4512, "step": 715500 }, { "epoch": 2.07, "learning_rate": 4.896412299694331e-05, "loss": 2.4475, "step": 716000 }, { "epoch": 2.07, "learning_rate": 4.8963399349296035e-05, "loss": 2.4574, "step": 716500 }, { "epoch": 2.08, "learning_rate": 4.8962675701648764e-05, "loss": 2.4479, "step": 717000 }, { "epoch": 2.08, "learning_rate": 4.8961952054001486e-05, "loss": 2.4697, "step": 717500 }, { "epoch": 2.08, "learning_rate": 4.89612298536495e-05, "loss": 2.4785, "step": 718000 }, { "epoch": 2.08, "learning_rate": 4.8960506206002224e-05, "loss": 2.4566, "step": 718500 }, { "epoch": 2.08, "learning_rate": 4.8959782558354946e-05, "loss": 2.4424, "step": 719000 }, { "epoch": 2.08, "learning_rate": 4.8959058910707675e-05, "loss": 2.4512, "step": 719500 }, { "epoch": 2.08, "learning_rate": 4.89583352630604e-05, "loss": 2.4534, "step": 720000 }, { "epoch": 2.09, "learning_rate": 4.895761161541312e-05, "loss": 2.4666, "step": 720500 }, { "epoch": 2.09, "learning_rate": 4.895688796776584e-05, "loss": 2.4413, "step": 721000 }, { "epoch": 2.09, "learning_rate": 4.8956164320118564e-05, "loss": 2.489, "step": 721500 }, { "epoch": 2.09, "learning_rate": 4.895544211976658e-05, "loss": 2.4536, "step": 722000 }, { "epoch": 2.09, "learning_rate": 4.89547184721193e-05, "loss": 2.4472, "step": 722500 }, { "epoch": 2.09, "learning_rate": 4.8953994824472024e-05, "loss": 2.452, "step": 723000 }, { "epoch": 2.09, "learning_rate": 4.8953271176824746e-05, "loss": 2.4544, "step": 723500 }, { "epoch": 2.1, "learning_rate": 4.8952547529177476e-05, "loss": 2.4361, "step": 724000 }, { "epoch": 2.1, "learning_rate": 4.895182677612079e-05, "loss": 2.4403, "step": 724500 }, { "epoch": 2.1, "learning_rate": 4.8951103128473513e-05, "loss": 2.4623, "step": 725000 }, { "epoch": 2.1, "learning_rate": 4.8950379480826236e-05, "loss": 2.448, "step": 725500 }, { "epoch": 2.1, "learning_rate": 4.894965583317896e-05, "loss": 2.4598, "step": 726000 }, { "epoch": 2.1, "learning_rate": 4.8948933632826973e-05, "loss": 2.4239, "step": 726500 }, { "epoch": 2.1, "learning_rate": 4.89482099851797e-05, "loss": 2.4484, "step": 727000 }, { "epoch": 2.11, "learning_rate": 4.8947486337532425e-05, "loss": 2.463, "step": 727500 }, { "epoch": 2.11, "learning_rate": 4.894676268988515e-05, "loss": 2.4606, "step": 728000 }, { "epoch": 2.11, "learning_rate": 4.894603904223787e-05, "loss": 2.4716, "step": 728500 }, { "epoch": 2.11, "learning_rate": 4.8945316841885885e-05, "loss": 2.4688, "step": 729000 }, { "epoch": 2.11, "learning_rate": 4.894459319423861e-05, "loss": 2.471, "step": 729500 }, { "epoch": 2.11, "learning_rate": 4.894387099388662e-05, "loss": 2.4771, "step": 730000 }, { "epoch": 2.11, "learning_rate": 4.894314734623935e-05, "loss": 2.4761, "step": 730500 }, { "epoch": 2.12, "learning_rate": 4.894242514588737e-05, "loss": 2.4848, "step": 731000 }, { "epoch": 2.12, "learning_rate": 4.894170149824009e-05, "loss": 2.5034, "step": 731500 }, { "epoch": 2.12, "learning_rate": 4.894097785059281e-05, "loss": 2.4692, "step": 732000 }, { "epoch": 2.12, "learning_rate": 4.894025420294554e-05, "loss": 2.4524, "step": 732500 }, { "epoch": 2.12, "learning_rate": 4.893953055529826e-05, "loss": 2.4573, "step": 733000 }, { "epoch": 2.12, "learning_rate": 4.8938806907650985e-05, "loss": 2.4928, "step": 733500 }, { "epoch": 2.12, "learning_rate": 4.893808326000371e-05, "loss": 2.4969, "step": 734000 }, { "epoch": 2.13, "learning_rate": 4.893735961235643e-05, "loss": 2.4951, "step": 734500 }, { "epoch": 2.13, "learning_rate": 4.893663596470915e-05, "loss": 2.4963, "step": 735000 }, { "epoch": 2.13, "learning_rate": 4.8935912317061874e-05, "loss": 2.4626, "step": 735500 }, { "epoch": 2.13, "learning_rate": 4.89351886694146e-05, "loss": 2.4454, "step": 736000 }, { "epoch": 2.13, "learning_rate": 4.8934465021767325e-05, "loss": 2.4879, "step": 736500 }, { "epoch": 2.13, "learning_rate": 4.893374137412005e-05, "loss": 2.4734, "step": 737000 }, { "epoch": 2.13, "learning_rate": 4.893301772647277e-05, "loss": 2.4603, "step": 737500 }, { "epoch": 2.14, "learning_rate": 4.893229407882549e-05, "loss": 2.4828, "step": 738000 }, { "epoch": 2.14, "learning_rate": 4.8931570431178214e-05, "loss": 2.4814, "step": 738500 }, { "epoch": 2.14, "learning_rate": 4.893084823082623e-05, "loss": 2.4689, "step": 739000 }, { "epoch": 2.14, "learning_rate": 4.893012458317896e-05, "loss": 2.4681, "step": 739500 }, { "epoch": 2.14, "learning_rate": 4.892940093553168e-05, "loss": 2.4555, "step": 740000 }, { "epoch": 2.14, "learning_rate": 4.89286772878844e-05, "loss": 2.4646, "step": 740500 }, { "epoch": 2.14, "learning_rate": 4.8927953640237125e-05, "loss": 2.4376, "step": 741000 }, { "epoch": 2.15, "learning_rate": 4.892723143988515e-05, "loss": 2.4509, "step": 741500 }, { "epoch": 2.15, "learning_rate": 4.892650923953316e-05, "loss": 2.4518, "step": 742000 }, { "epoch": 2.15, "learning_rate": 4.8925785591885885e-05, "loss": 2.4623, "step": 742500 }, { "epoch": 2.15, "learning_rate": 4.892506194423861e-05, "loss": 2.4877, "step": 743000 }, { "epoch": 2.15, "learning_rate": 4.892433829659133e-05, "loss": 2.4892, "step": 743500 }, { "epoch": 2.15, "learning_rate": 4.892361464894405e-05, "loss": 2.5121, "step": 744000 }, { "epoch": 2.16, "learning_rate": 4.892289100129678e-05, "loss": 2.4631, "step": 744500 }, { "epoch": 2.16, "learning_rate": 4.89221688009448e-05, "loss": 2.4688, "step": 745000 }, { "epoch": 2.16, "learning_rate": 4.892144515329752e-05, "loss": 2.4775, "step": 745500 }, { "epoch": 2.16, "learning_rate": 4.892072150565024e-05, "loss": 2.475, "step": 746000 }, { "epoch": 2.16, "learning_rate": 4.8919997858002963e-05, "loss": 2.4943, "step": 746500 }, { "epoch": 2.16, "learning_rate": 4.891927421035569e-05, "loss": 2.4642, "step": 747000 }, { "epoch": 2.16, "learning_rate": 4.8918550562708415e-05, "loss": 2.4447, "step": 747500 }, { "epoch": 2.17, "learning_rate": 4.891782691506114e-05, "loss": 2.4554, "step": 748000 }, { "epoch": 2.17, "learning_rate": 4.891710326741386e-05, "loss": 2.4652, "step": 748500 }, { "epoch": 2.17, "learning_rate": 4.891637961976658e-05, "loss": 2.4624, "step": 749000 }, { "epoch": 2.17, "learning_rate": 4.8915657419414604e-05, "loss": 2.4517, "step": 749500 }, { "epoch": 2.17, "learning_rate": 4.8914933771767326e-05, "loss": 2.4722, "step": 750000 }, { "epoch": 2.17, "learning_rate": 4.891421012412005e-05, "loss": 2.4652, "step": 750500 }, { "epoch": 2.17, "learning_rate": 4.891348647647277e-05, "loss": 2.4866, "step": 751000 }, { "epoch": 2.18, "learning_rate": 4.891276282882549e-05, "loss": 2.5011, "step": 751500 }, { "epoch": 2.18, "learning_rate": 4.8912039181178215e-05, "loss": 2.4952, "step": 752000 }, { "epoch": 2.18, "learning_rate": 4.891131553353094e-05, "loss": 2.4565, "step": 752500 }, { "epoch": 2.18, "learning_rate": 4.891059188588366e-05, "loss": 2.4535, "step": 753000 }, { "epoch": 2.18, "learning_rate": 4.890986968553168e-05, "loss": 2.4375, "step": 753500 }, { "epoch": 2.18, "learning_rate": 4.8909146037884404e-05, "loss": 2.4754, "step": 754000 }, { "epoch": 2.18, "learning_rate": 4.890842239023713e-05, "loss": 2.4667, "step": 754500 }, { "epoch": 2.19, "learning_rate": 4.8907698742589855e-05, "loss": 2.4556, "step": 755000 }, { "epoch": 2.19, "learning_rate": 4.890697509494258e-05, "loss": 2.4461, "step": 755500 }, { "epoch": 2.19, "learning_rate": 4.890625289459059e-05, "loss": 2.4475, "step": 756000 }, { "epoch": 2.19, "learning_rate": 4.8905529246943315e-05, "loss": 2.4501, "step": 756500 }, { "epoch": 2.19, "learning_rate": 4.890480559929604e-05, "loss": 2.4606, "step": 757000 }, { "epoch": 2.19, "learning_rate": 4.890408195164876e-05, "loss": 2.4686, "step": 757500 }, { "epoch": 2.19, "learning_rate": 4.890335830400148e-05, "loss": 2.4598, "step": 758000 }, { "epoch": 2.2, "learning_rate": 4.8902636103649504e-05, "loss": 2.4641, "step": 758500 }, { "epoch": 2.2, "learning_rate": 4.8901912456002226e-05, "loss": 2.4744, "step": 759000 }, { "epoch": 2.2, "learning_rate": 4.890118880835495e-05, "loss": 2.4543, "step": 759500 }, { "epoch": 2.2, "learning_rate": 4.890046516070767e-05, "loss": 2.4908, "step": 760000 }, { "epoch": 2.2, "learning_rate": 4.889974151306039e-05, "loss": 2.4717, "step": 760500 }, { "epoch": 2.2, "learning_rate": 4.889901931270841e-05, "loss": 2.4567, "step": 761000 }, { "epoch": 2.2, "learning_rate": 4.8898298559651724e-05, "loss": 2.4823, "step": 761500 }, { "epoch": 2.21, "learning_rate": 4.8897574912004447e-05, "loss": 2.4703, "step": 762000 }, { "epoch": 2.21, "learning_rate": 4.8896851264357176e-05, "loss": 2.4668, "step": 762500 }, { "epoch": 2.21, "learning_rate": 4.88961276167099e-05, "loss": 2.4704, "step": 763000 }, { "epoch": 2.21, "learning_rate": 4.889540396906262e-05, "loss": 2.4578, "step": 763500 }, { "epoch": 2.21, "learning_rate": 4.889468032141534e-05, "loss": 2.4705, "step": 764000 }, { "epoch": 2.21, "learning_rate": 4.8893956673768065e-05, "loss": 2.4727, "step": 764500 }, { "epoch": 2.21, "learning_rate": 4.889323302612079e-05, "loss": 2.4628, "step": 765000 }, { "epoch": 2.22, "learning_rate": 4.889250937847351e-05, "loss": 2.4639, "step": 765500 }, { "epoch": 2.22, "learning_rate": 4.889179007271212e-05, "loss": 2.4698, "step": 766000 }, { "epoch": 2.22, "learning_rate": 4.889106642506484e-05, "loss": 2.4549, "step": 766500 }, { "epoch": 2.22, "learning_rate": 4.889034277741756e-05, "loss": 2.4666, "step": 767000 }, { "epoch": 2.22, "learning_rate": 4.8889619129770285e-05, "loss": 2.4899, "step": 767500 }, { "epoch": 2.22, "learning_rate": 4.888889548212301e-05, "loss": 2.4596, "step": 768000 }, { "epoch": 2.22, "learning_rate": 4.888817328177103e-05, "loss": 2.4762, "step": 768500 }, { "epoch": 2.23, "learning_rate": 4.888744963412375e-05, "loss": 2.496, "step": 769000 }, { "epoch": 2.23, "learning_rate": 4.8886725986476474e-05, "loss": 2.4773, "step": 769500 }, { "epoch": 2.23, "learning_rate": 4.88860023388292e-05, "loss": 2.4481, "step": 770000 }, { "epoch": 2.23, "learning_rate": 4.8885278691181925e-05, "loss": 2.4701, "step": 770500 }, { "epoch": 2.23, "learning_rate": 4.888455504353465e-05, "loss": 2.454, "step": 771000 }, { "epoch": 2.23, "learning_rate": 4.888383139588737e-05, "loss": 2.4689, "step": 771500 }, { "epoch": 2.23, "learning_rate": 4.888310774824009e-05, "loss": 2.4477, "step": 772000 }, { "epoch": 2.24, "learning_rate": 4.888238554788811e-05, "loss": 2.475, "step": 772500 }, { "epoch": 2.24, "learning_rate": 4.888166190024083e-05, "loss": 2.4597, "step": 773000 }, { "epoch": 2.24, "learning_rate": 4.888093825259356e-05, "loss": 2.4765, "step": 773500 }, { "epoch": 2.24, "learning_rate": 4.888021460494628e-05, "loss": 2.4601, "step": 774000 }, { "epoch": 2.24, "learning_rate": 4.8879490957299e-05, "loss": 2.4864, "step": 774500 }, { "epoch": 2.24, "learning_rate": 4.8878767309651725e-05, "loss": 2.442, "step": 775000 }, { "epoch": 2.24, "learning_rate": 4.887804366200445e-05, "loss": 2.4807, "step": 775500 }, { "epoch": 2.25, "learning_rate": 4.887732001435717e-05, "loss": 2.4628, "step": 776000 }, { "epoch": 2.25, "learning_rate": 4.887659636670989e-05, "loss": 2.4546, "step": 776500 }, { "epoch": 2.25, "learning_rate": 4.887587271906262e-05, "loss": 2.475, "step": 777000 }, { "epoch": 2.25, "learning_rate": 4.887514907141534e-05, "loss": 2.4683, "step": 777500 }, { "epoch": 2.25, "learning_rate": 4.887442687106336e-05, "loss": 2.4653, "step": 778000 }, { "epoch": 2.25, "learning_rate": 4.887370467071138e-05, "loss": 2.4586, "step": 778500 }, { "epoch": 2.25, "learning_rate": 4.88729810230641e-05, "loss": 2.4554, "step": 779000 }, { "epoch": 2.26, "learning_rate": 4.887225882271212e-05, "loss": 2.4355, "step": 779500 }, { "epoch": 2.26, "learning_rate": 4.887153517506484e-05, "loss": 2.458, "step": 780000 }, { "epoch": 2.26, "learning_rate": 4.887081152741756e-05, "loss": 2.4656, "step": 780500 }, { "epoch": 2.26, "learning_rate": 4.8870087879770285e-05, "loss": 2.4528, "step": 781000 }, { "epoch": 2.26, "learning_rate": 4.886936423212301e-05, "loss": 2.4758, "step": 781500 }, { "epoch": 2.26, "learning_rate": 4.886864058447574e-05, "loss": 2.4729, "step": 782000 }, { "epoch": 2.27, "learning_rate": 4.886791693682846e-05, "loss": 2.4626, "step": 782500 }, { "epoch": 2.27, "learning_rate": 4.886719328918118e-05, "loss": 2.4532, "step": 783000 }, { "epoch": 2.27, "learning_rate": 4.8866469641533903e-05, "loss": 2.4532, "step": 783500 }, { "epoch": 2.27, "learning_rate": 4.8865745993886626e-05, "loss": 2.4658, "step": 784000 }, { "epoch": 2.27, "learning_rate": 4.886502234623935e-05, "loss": 2.4934, "step": 784500 }, { "epoch": 2.27, "learning_rate": 4.886429869859208e-05, "loss": 2.4615, "step": 785000 }, { "epoch": 2.27, "learning_rate": 4.88635750509448e-05, "loss": 2.4628, "step": 785500 }, { "epoch": 2.28, "learning_rate": 4.886285140329752e-05, "loss": 2.4608, "step": 786000 }, { "epoch": 2.28, "learning_rate": 4.8862127755650244e-05, "loss": 2.4669, "step": 786500 }, { "epoch": 2.28, "learning_rate": 4.8861404108002966e-05, "loss": 2.4682, "step": 787000 }, { "epoch": 2.28, "learning_rate": 4.886068046035569e-05, "loss": 2.4519, "step": 787500 }, { "epoch": 2.28, "learning_rate": 4.885995681270841e-05, "loss": 2.4596, "step": 788000 }, { "epoch": 2.28, "learning_rate": 4.885923316506113e-05, "loss": 2.4557, "step": 788500 }, { "epoch": 2.28, "learning_rate": 4.885850951741386e-05, "loss": 2.4598, "step": 789000 }, { "epoch": 2.29, "learning_rate": 4.885778731706188e-05, "loss": 2.4646, "step": 789500 }, { "epoch": 2.29, "learning_rate": 4.885706511670989e-05, "loss": 2.4645, "step": 790000 }, { "epoch": 2.29, "learning_rate": 4.8856341469062615e-05, "loss": 2.4555, "step": 790500 }, { "epoch": 2.29, "learning_rate": 4.885561782141534e-05, "loss": 2.4786, "step": 791000 }, { "epoch": 2.29, "learning_rate": 4.885489417376806e-05, "loss": 2.452, "step": 791500 }, { "epoch": 2.29, "learning_rate": 4.885417052612079e-05, "loss": 2.4585, "step": 792000 }, { "epoch": 2.29, "learning_rate": 4.885344687847351e-05, "loss": 2.4583, "step": 792500 }, { "epoch": 2.3, "learning_rate": 4.885272323082623e-05, "loss": 2.4383, "step": 793000 }, { "epoch": 2.3, "learning_rate": 4.885199958317896e-05, "loss": 2.4437, "step": 793500 }, { "epoch": 2.3, "learning_rate": 4.885127738282698e-05, "loss": 2.4578, "step": 794000 }, { "epoch": 2.3, "learning_rate": 4.88505537351797e-05, "loss": 2.4858, "step": 794500 }, { "epoch": 2.3, "learning_rate": 4.884983008753242e-05, "loss": 2.4365, "step": 795000 }, { "epoch": 2.3, "learning_rate": 4.8849106439885144e-05, "loss": 2.4526, "step": 795500 }, { "epoch": 2.3, "learning_rate": 4.8848382792237866e-05, "loss": 2.4697, "step": 796000 }, { "epoch": 2.31, "learning_rate": 4.884766059188589e-05, "loss": 2.4756, "step": 796500 }, { "epoch": 2.31, "learning_rate": 4.884693694423861e-05, "loss": 2.4617, "step": 797000 }, { "epoch": 2.31, "learning_rate": 4.884621329659133e-05, "loss": 2.4737, "step": 797500 }, { "epoch": 2.31, "learning_rate": 4.8845489648944055e-05, "loss": 2.4507, "step": 798000 }, { "epoch": 2.31, "learning_rate": 4.8844768895887364e-05, "loss": 2.485, "step": 798500 }, { "epoch": 2.31, "learning_rate": 4.8844045248240086e-05, "loss": 2.4573, "step": 799000 }, { "epoch": 2.31, "learning_rate": 4.884332160059281e-05, "loss": 2.4446, "step": 799500 }, { "epoch": 2.32, "learning_rate": 4.884259795294554e-05, "loss": 2.4701, "step": 800000 }, { "epoch": 2.32, "learning_rate": 4.884187430529826e-05, "loss": 2.4703, "step": 800500 }, { "epoch": 2.32, "learning_rate": 4.884115065765099e-05, "loss": 2.4766, "step": 801000 }, { "epoch": 2.32, "learning_rate": 4.884042701000371e-05, "loss": 2.451, "step": 801500 }, { "epoch": 2.32, "learning_rate": 4.8839703362356433e-05, "loss": 2.4538, "step": 802000 }, { "epoch": 2.32, "learning_rate": 4.883898116200445e-05, "loss": 2.4579, "step": 802500 }, { "epoch": 2.32, "learning_rate": 4.883825751435717e-05, "loss": 2.4677, "step": 803000 }, { "epoch": 2.33, "learning_rate": 4.8837533866709893e-05, "loss": 2.4384, "step": 803500 }, { "epoch": 2.33, "learning_rate": 4.8836810219062616e-05, "loss": 2.4621, "step": 804000 }, { "epoch": 2.33, "learning_rate": 4.883608657141534e-05, "loss": 2.4513, "step": 804500 }, { "epoch": 2.33, "learning_rate": 4.883536292376806e-05, "loss": 2.4515, "step": 805000 }, { "epoch": 2.33, "learning_rate": 4.883463927612079e-05, "loss": 2.4732, "step": 805500 }, { "epoch": 2.33, "learning_rate": 4.883391562847351e-05, "loss": 2.4462, "step": 806000 }, { "epoch": 2.33, "learning_rate": 4.8833191980826234e-05, "loss": 2.4348, "step": 806500 }, { "epoch": 2.34, "learning_rate": 4.883247122776954e-05, "loss": 2.4618, "step": 807000 }, { "epoch": 2.34, "learning_rate": 4.883174758012227e-05, "loss": 2.4726, "step": 807500 }, { "epoch": 2.34, "learning_rate": 4.8831023932474994e-05, "loss": 2.4495, "step": 808000 }, { "epoch": 2.34, "learning_rate": 4.8830300284827716e-05, "loss": 2.466, "step": 808500 }, { "epoch": 2.34, "learning_rate": 4.882957663718044e-05, "loss": 2.4551, "step": 809000 }, { "epoch": 2.34, "learning_rate": 4.882885298953316e-05, "loss": 2.464, "step": 809500 }, { "epoch": 2.34, "learning_rate": 4.882812934188589e-05, "loss": 2.4649, "step": 810000 }, { "epoch": 2.35, "learning_rate": 4.882740569423861e-05, "loss": 2.4627, "step": 810500 }, { "epoch": 2.35, "learning_rate": 4.8826682046591334e-05, "loss": 2.4657, "step": 811000 }, { "epoch": 2.35, "learning_rate": 4.8825958398944056e-05, "loss": 2.4704, "step": 811500 }, { "epoch": 2.35, "learning_rate": 4.882523475129678e-05, "loss": 2.4645, "step": 812000 }, { "epoch": 2.35, "learning_rate": 4.88245111036495e-05, "loss": 2.436, "step": 812500 }, { "epoch": 2.35, "learning_rate": 4.8823788903297516e-05, "loss": 2.4777, "step": 813000 }, { "epoch": 2.35, "learning_rate": 4.882306525565024e-05, "loss": 2.4749, "step": 813500 }, { "epoch": 2.36, "learning_rate": 4.882234160800296e-05, "loss": 2.449, "step": 814000 }, { "epoch": 2.36, "learning_rate": 4.882161940765098e-05, "loss": 2.4526, "step": 814500 }, { "epoch": 2.36, "learning_rate": 4.882089576000371e-05, "loss": 2.456, "step": 815000 }, { "epoch": 2.36, "learning_rate": 4.8820172112356434e-05, "loss": 2.469, "step": 815500 }, { "epoch": 2.36, "learning_rate": 4.8819448464709156e-05, "loss": 2.4608, "step": 816000 }, { "epoch": 2.36, "learning_rate": 4.881872626435717e-05, "loss": 2.4611, "step": 816500 }, { "epoch": 2.36, "learning_rate": 4.8818002616709894e-05, "loss": 2.4812, "step": 817000 }, { "epoch": 2.37, "learning_rate": 4.8817278969062616e-05, "loss": 2.4608, "step": 817500 }, { "epoch": 2.37, "learning_rate": 4.881655532141534e-05, "loss": 2.4905, "step": 818000 }, { "epoch": 2.37, "learning_rate": 4.881583167376806e-05, "loss": 2.4559, "step": 818500 }, { "epoch": 2.37, "learning_rate": 4.881510947341608e-05, "loss": 2.4667, "step": 819000 }, { "epoch": 2.37, "learning_rate": 4.8814385825768806e-05, "loss": 2.4609, "step": 819500 }, { "epoch": 2.37, "learning_rate": 4.881366217812153e-05, "loss": 2.4696, "step": 820000 }, { "epoch": 2.38, "learning_rate": 4.881293853047425e-05, "loss": 2.4453, "step": 820500 }, { "epoch": 2.38, "learning_rate": 4.881221488282697e-05, "loss": 2.4563, "step": 821000 }, { "epoch": 2.38, "learning_rate": 4.881149268247499e-05, "loss": 2.474, "step": 821500 }, { "epoch": 2.38, "learning_rate": 4.881077048212301e-05, "loss": 2.4627, "step": 822000 }, { "epoch": 2.38, "learning_rate": 4.881004683447574e-05, "loss": 2.4507, "step": 822500 }, { "epoch": 2.38, "learning_rate": 4.880932318682846e-05, "loss": 2.4714, "step": 823000 }, { "epoch": 2.38, "learning_rate": 4.8808599539181184e-05, "loss": 2.4486, "step": 823500 }, { "epoch": 2.39, "learning_rate": 4.8807875891533906e-05, "loss": 2.4582, "step": 824000 }, { "epoch": 2.39, "learning_rate": 4.880715224388663e-05, "loss": 2.4489, "step": 824500 }, { "epoch": 2.39, "learning_rate": 4.880642859623935e-05, "loss": 2.4812, "step": 825000 }, { "epoch": 2.39, "learning_rate": 4.880570494859207e-05, "loss": 2.4713, "step": 825500 }, { "epoch": 2.39, "learning_rate": 4.8804981300944795e-05, "loss": 2.4539, "step": 826000 }, { "epoch": 2.39, "learning_rate": 4.880425765329752e-05, "loss": 2.4657, "step": 826500 }, { "epoch": 2.39, "learning_rate": 4.880353400565024e-05, "loss": 2.4488, "step": 827000 }, { "epoch": 2.4, "learning_rate": 4.880281035800297e-05, "loss": 2.4482, "step": 827500 }, { "epoch": 2.4, "learning_rate": 4.8802088157650984e-05, "loss": 2.4655, "step": 828000 }, { "epoch": 2.4, "learning_rate": 4.8801365957299e-05, "loss": 2.472, "step": 828500 }, { "epoch": 2.4, "learning_rate": 4.880064230965172e-05, "loss": 2.4784, "step": 829000 }, { "epoch": 2.4, "learning_rate": 4.8799918662004444e-05, "loss": 2.4726, "step": 829500 }, { "epoch": 2.4, "learning_rate": 4.8799196461652466e-05, "loss": 2.4602, "step": 830000 }, { "epoch": 2.4, "learning_rate": 4.879847281400519e-05, "loss": 2.4414, "step": 830500 }, { "epoch": 2.41, "learning_rate": 4.879774916635792e-05, "loss": 2.4778, "step": 831000 }, { "epoch": 2.41, "learning_rate": 4.879702551871064e-05, "loss": 2.4769, "step": 831500 }, { "epoch": 2.41, "learning_rate": 4.879630187106336e-05, "loss": 2.4492, "step": 832000 }, { "epoch": 2.41, "learning_rate": 4.8795578223416084e-05, "loss": 2.4726, "step": 832500 }, { "epoch": 2.41, "learning_rate": 4.8794854575768806e-05, "loss": 2.4534, "step": 833000 }, { "epoch": 2.41, "learning_rate": 4.879413092812153e-05, "loss": 2.4727, "step": 833500 }, { "epoch": 2.41, "learning_rate": 4.8793408727769544e-05, "loss": 2.4681, "step": 834000 }, { "epoch": 2.42, "learning_rate": 4.8792685080122266e-05, "loss": 2.467, "step": 834500 }, { "epoch": 2.42, "learning_rate": 4.879196143247499e-05, "loss": 2.4551, "step": 835000 }, { "epoch": 2.42, "learning_rate": 4.879123923212301e-05, "loss": 2.4706, "step": 835500 }, { "epoch": 2.42, "learning_rate": 4.879051558447573e-05, "loss": 2.4576, "step": 836000 }, { "epoch": 2.42, "learning_rate": 4.8789791936828455e-05, "loss": 2.4657, "step": 836500 }, { "epoch": 2.42, "learning_rate": 4.878906828918118e-05, "loss": 2.4434, "step": 837000 }, { "epoch": 2.42, "learning_rate": 4.8788344641533907e-05, "loss": 2.4745, "step": 837500 }, { "epoch": 2.43, "learning_rate": 4.878762099388663e-05, "loss": 2.4514, "step": 838000 }, { "epoch": 2.43, "learning_rate": 4.878689734623935e-05, "loss": 2.4496, "step": 838500 }, { "epoch": 2.43, "learning_rate": 4.878617369859207e-05, "loss": 2.4691, "step": 839000 }, { "epoch": 2.43, "learning_rate": 4.8785450050944796e-05, "loss": 2.484, "step": 839500 }, { "epoch": 2.43, "learning_rate": 4.878472640329752e-05, "loss": 2.4308, "step": 840000 }, { "epoch": 2.43, "learning_rate": 4.878400420294554e-05, "loss": 2.4559, "step": 840500 }, { "epoch": 2.43, "learning_rate": 4.878328055529826e-05, "loss": 2.4676, "step": 841000 }, { "epoch": 2.44, "learning_rate": 4.8782556907650985e-05, "loss": 2.4656, "step": 841500 }, { "epoch": 2.44, "learning_rate": 4.878183326000371e-05, "loss": 2.4534, "step": 842000 }, { "epoch": 2.44, "learning_rate": 4.878110961235643e-05, "loss": 2.4556, "step": 842500 }, { "epoch": 2.44, "learning_rate": 4.878038596470915e-05, "loss": 2.4398, "step": 843000 }, { "epoch": 2.44, "learning_rate": 4.8779662317061874e-05, "loss": 2.452, "step": 843500 }, { "epoch": 2.44, "learning_rate": 4.8778938669414596e-05, "loss": 2.4619, "step": 844000 }, { "epoch": 2.44, "learning_rate": 4.877821646906262e-05, "loss": 2.4557, "step": 844500 }, { "epoch": 2.45, "learning_rate": 4.877749282141535e-05, "loss": 2.4486, "step": 845000 }, { "epoch": 2.45, "learning_rate": 4.877676917376807e-05, "loss": 2.4471, "step": 845500 }, { "epoch": 2.45, "learning_rate": 4.877604552612079e-05, "loss": 2.4459, "step": 846000 }, { "epoch": 2.45, "learning_rate": 4.8775321878473514e-05, "loss": 2.4626, "step": 846500 }, { "epoch": 2.45, "learning_rate": 4.877459967812153e-05, "loss": 2.4616, "step": 847000 }, { "epoch": 2.45, "learning_rate": 4.877387603047425e-05, "loss": 2.4736, "step": 847500 }, { "epoch": 2.45, "learning_rate": 4.8773152382826974e-05, "loss": 2.4513, "step": 848000 }, { "epoch": 2.46, "learning_rate": 4.8772428735179696e-05, "loss": 2.4405, "step": 848500 }, { "epoch": 2.46, "learning_rate": 4.877170508753242e-05, "loss": 2.4534, "step": 849000 }, { "epoch": 2.46, "learning_rate": 4.877098143988515e-05, "loss": 2.4626, "step": 849500 }, { "epoch": 2.46, "learning_rate": 4.877025779223787e-05, "loss": 2.4684, "step": 850000 }, { "epoch": 2.46, "learning_rate": 4.876953414459059e-05, "loss": 2.4783, "step": 850500 }, { "epoch": 2.46, "learning_rate": 4.876881194423861e-05, "loss": 2.4883, "step": 851000 }, { "epoch": 2.46, "learning_rate": 4.876808829659133e-05, "loss": 2.4863, "step": 851500 }, { "epoch": 2.47, "learning_rate": 4.876736464894406e-05, "loss": 2.4668, "step": 852000 }, { "epoch": 2.47, "learning_rate": 4.8766642448592074e-05, "loss": 2.4822, "step": 852500 }, { "epoch": 2.47, "learning_rate": 4.8765918800944796e-05, "loss": 2.441, "step": 853000 }, { "epoch": 2.47, "learning_rate": 4.876519515329752e-05, "loss": 2.4355, "step": 853500 }, { "epoch": 2.47, "learning_rate": 4.876447150565025e-05, "loss": 2.4657, "step": 854000 }, { "epoch": 2.47, "learning_rate": 4.876374785800297e-05, "loss": 2.4465, "step": 854500 }, { "epoch": 2.47, "learning_rate": 4.876302421035569e-05, "loss": 2.4633, "step": 855000 }, { "epoch": 2.48, "learning_rate": 4.8762300562708414e-05, "loss": 2.4369, "step": 855500 }, { "epoch": 2.48, "learning_rate": 4.8761576915061137e-05, "loss": 2.4499, "step": 856000 }, { "epoch": 2.48, "learning_rate": 4.876085326741386e-05, "loss": 2.4599, "step": 856500 }, { "epoch": 2.48, "learning_rate": 4.876012961976658e-05, "loss": 2.4461, "step": 857000 }, { "epoch": 2.48, "learning_rate": 4.87594059721193e-05, "loss": 2.4432, "step": 857500 }, { "epoch": 2.48, "learning_rate": 4.8758682324472025e-05, "loss": 2.4752, "step": 858000 }, { "epoch": 2.49, "learning_rate": 4.875795867682475e-05, "loss": 2.4624, "step": 858500 }, { "epoch": 2.49, "learning_rate": 4.875723647647277e-05, "loss": 2.4722, "step": 859000 }, { "epoch": 2.49, "learning_rate": 4.87565128288255e-05, "loss": 2.5101, "step": 859500 }, { "epoch": 2.49, "learning_rate": 4.875578918117822e-05, "loss": 2.4928, "step": 860000 }, { "epoch": 2.49, "learning_rate": 4.8755065533530943e-05, "loss": 2.4738, "step": 860500 }, { "epoch": 2.49, "learning_rate": 4.875434333317896e-05, "loss": 2.4671, "step": 861000 }, { "epoch": 2.49, "learning_rate": 4.8753621132826975e-05, "loss": 2.4745, "step": 861500 }, { "epoch": 2.5, "learning_rate": 4.87528974851797e-05, "loss": 2.4534, "step": 862000 }, { "epoch": 2.5, "learning_rate": 4.875217383753242e-05, "loss": 2.4471, "step": 862500 }, { "epoch": 2.5, "learning_rate": 4.875145018988515e-05, "loss": 2.4532, "step": 863000 }, { "epoch": 2.5, "learning_rate": 4.875072654223787e-05, "loss": 2.4462, "step": 863500 }, { "epoch": 2.5, "learning_rate": 4.875000289459059e-05, "loss": 2.4864, "step": 864000 }, { "epoch": 2.5, "learning_rate": 4.8749279246943315e-05, "loss": 2.4572, "step": 864500 }, { "epoch": 2.5, "learning_rate": 4.874855559929604e-05, "loss": 2.4308, "step": 865000 }, { "epoch": 2.51, "learning_rate": 4.874783195164876e-05, "loss": 2.4404, "step": 865500 }, { "epoch": 2.51, "learning_rate": 4.8747109751296775e-05, "loss": 2.4205, "step": 866000 }, { "epoch": 2.51, "learning_rate": 4.87463861036495e-05, "loss": 2.462, "step": 866500 }, { "epoch": 2.51, "learning_rate": 4.8745662456002226e-05, "loss": 2.4615, "step": 867000 }, { "epoch": 2.51, "learning_rate": 4.874493880835495e-05, "loss": 2.4668, "step": 867500 }, { "epoch": 2.51, "learning_rate": 4.8744218055298264e-05, "loss": 2.4706, "step": 868000 }, { "epoch": 2.51, "learning_rate": 4.8743494407650986e-05, "loss": 2.463, "step": 868500 }, { "epoch": 2.52, "learning_rate": 4.874277076000371e-05, "loss": 2.4379, "step": 869000 }, { "epoch": 2.52, "learning_rate": 4.874204711235643e-05, "loss": 2.4478, "step": 869500 }, { "epoch": 2.52, "learning_rate": 4.874132346470915e-05, "loss": 2.4451, "step": 870000 }, { "epoch": 2.52, "learning_rate": 4.8740599817061875e-05, "loss": 2.4479, "step": 870500 }, { "epoch": 2.52, "learning_rate": 4.87398761694146e-05, "loss": 2.4983, "step": 871000 }, { "epoch": 2.52, "learning_rate": 4.873915252176732e-05, "loss": 2.4713, "step": 871500 }, { "epoch": 2.52, "learning_rate": 4.873842887412005e-05, "loss": 2.4586, "step": 872000 }, { "epoch": 2.53, "learning_rate": 4.873770522647277e-05, "loss": 2.4466, "step": 872500 }, { "epoch": 2.53, "learning_rate": 4.8736983026120786e-05, "loss": 2.4379, "step": 873000 }, { "epoch": 2.53, "learning_rate": 4.873625937847351e-05, "loss": 2.4507, "step": 873500 }, { "epoch": 2.53, "learning_rate": 4.873553573082623e-05, "loss": 2.4723, "step": 874000 }, { "epoch": 2.53, "learning_rate": 4.873481208317896e-05, "loss": 2.4562, "step": 874500 }, { "epoch": 2.53, "learning_rate": 4.8734089882826975e-05, "loss": 2.4718, "step": 875000 }, { "epoch": 2.53, "learning_rate": 4.87333662351797e-05, "loss": 2.453, "step": 875500 }, { "epoch": 2.54, "learning_rate": 4.873264258753243e-05, "loss": 2.4409, "step": 876000 }, { "epoch": 2.54, "learning_rate": 4.873191893988515e-05, "loss": 2.4678, "step": 876500 }, { "epoch": 2.54, "learning_rate": 4.873119529223787e-05, "loss": 2.4712, "step": 877000 }, { "epoch": 2.54, "learning_rate": 4.873047164459059e-05, "loss": 2.4599, "step": 877500 }, { "epoch": 2.54, "learning_rate": 4.8729747996943316e-05, "loss": 2.4334, "step": 878000 }, { "epoch": 2.54, "learning_rate": 4.872902434929604e-05, "loss": 2.4179, "step": 878500 }, { "epoch": 2.54, "learning_rate": 4.872830214894405e-05, "loss": 2.4463, "step": 879000 }, { "epoch": 2.55, "learning_rate": 4.8727578501296776e-05, "loss": 2.4667, "step": 879500 }, { "epoch": 2.55, "learning_rate": 4.87268548536495e-05, "loss": 2.475, "step": 880000 }, { "epoch": 2.55, "learning_rate": 4.872613120600223e-05, "loss": 2.4829, "step": 880500 }, { "epoch": 2.55, "learning_rate": 4.872540755835495e-05, "loss": 2.4636, "step": 881000 }, { "epoch": 2.55, "learning_rate": 4.872468391070767e-05, "loss": 2.4525, "step": 881500 }, { "epoch": 2.55, "learning_rate": 4.87239602630604e-05, "loss": 2.4419, "step": 882000 }, { "epoch": 2.55, "learning_rate": 4.872323661541312e-05, "loss": 2.4189, "step": 882500 }, { "epoch": 2.56, "learning_rate": 4.872251441506114e-05, "loss": 2.4375, "step": 883000 }, { "epoch": 2.56, "learning_rate": 4.872179076741386e-05, "loss": 2.4487, "step": 883500 }, { "epoch": 2.56, "learning_rate": 4.872106711976658e-05, "loss": 2.4688, "step": 884000 }, { "epoch": 2.56, "learning_rate": 4.8720343472119305e-05, "loss": 2.4355, "step": 884500 }, { "epoch": 2.56, "learning_rate": 4.871961982447203e-05, "loss": 2.4716, "step": 885000 }, { "epoch": 2.56, "learning_rate": 4.871889617682475e-05, "loss": 2.4849, "step": 885500 }, { "epoch": 2.56, "learning_rate": 4.871817397647277e-05, "loss": 2.4473, "step": 886000 }, { "epoch": 2.57, "learning_rate": 4.8717450328825494e-05, "loss": 2.4466, "step": 886500 }, { "epoch": 2.57, "learning_rate": 4.871672812847351e-05, "loss": 2.441, "step": 887000 }, { "epoch": 2.57, "learning_rate": 4.871600448082623e-05, "loss": 2.4405, "step": 887500 }, { "epoch": 2.57, "learning_rate": 4.8715280833178954e-05, "loss": 2.4461, "step": 888000 }, { "epoch": 2.57, "learning_rate": 4.8714557185531676e-05, "loss": 2.4505, "step": 888500 }, { "epoch": 2.57, "learning_rate": 4.87138335378844e-05, "loss": 2.4672, "step": 889000 }, { "epoch": 2.57, "learning_rate": 4.871310989023713e-05, "loss": 2.4518, "step": 889500 }, { "epoch": 2.58, "learning_rate": 4.871238624258985e-05, "loss": 2.4466, "step": 890000 }, { "epoch": 2.58, "learning_rate": 4.871166259494258e-05, "loss": 2.4747, "step": 890500 }, { "epoch": 2.58, "learning_rate": 4.87109389472953e-05, "loss": 2.4575, "step": 891000 }, { "epoch": 2.58, "learning_rate": 4.871021819423861e-05, "loss": 2.4662, "step": 891500 }, { "epoch": 2.58, "learning_rate": 4.870949454659133e-05, "loss": 2.4587, "step": 892000 }, { "epoch": 2.58, "learning_rate": 4.8708770898944054e-05, "loss": 2.4539, "step": 892500 }, { "epoch": 2.58, "learning_rate": 4.8708047251296776e-05, "loss": 2.4213, "step": 893000 }, { "epoch": 2.59, "learning_rate": 4.87073250509448e-05, "loss": 2.4429, "step": 893500 }, { "epoch": 2.59, "learning_rate": 4.870660140329752e-05, "loss": 2.4597, "step": 894000 }, { "epoch": 2.59, "learning_rate": 4.870587775565024e-05, "loss": 2.4739, "step": 894500 }, { "epoch": 2.59, "learning_rate": 4.8705154108002965e-05, "loss": 2.4595, "step": 895000 }, { "epoch": 2.59, "learning_rate": 4.870443046035569e-05, "loss": 2.4374, "step": 895500 }, { "epoch": 2.59, "learning_rate": 4.87037082600037e-05, "loss": 2.4726, "step": 896000 }, { "epoch": 2.6, "learning_rate": 4.8702984612356425e-05, "loss": 2.4594, "step": 896500 }, { "epoch": 2.6, "learning_rate": 4.870226241200445e-05, "loss": 2.4582, "step": 897000 }, { "epoch": 2.6, "learning_rate": 4.870153876435718e-05, "loss": 2.4465, "step": 897500 }, { "epoch": 2.6, "learning_rate": 4.87008151167099e-05, "loss": 2.4757, "step": 898000 }, { "epoch": 2.6, "learning_rate": 4.870009146906262e-05, "loss": 2.4431, "step": 898500 }, { "epoch": 2.6, "learning_rate": 4.8699367821415343e-05, "loss": 2.4492, "step": 899000 }, { "epoch": 2.6, "learning_rate": 4.8698644173768066e-05, "loss": 2.4233, "step": 899500 }, { "epoch": 2.61, "learning_rate": 4.869792052612079e-05, "loss": 2.4649, "step": 900000 }, { "epoch": 2.61, "learning_rate": 4.869719687847351e-05, "loss": 2.481, "step": 900500 }, { "epoch": 2.61, "learning_rate": 4.869647323082623e-05, "loss": 2.4592, "step": 901000 }, { "epoch": 2.61, "learning_rate": 4.8695749583178955e-05, "loss": 2.4467, "step": 901500 }, { "epoch": 2.61, "learning_rate": 4.869502593553168e-05, "loss": 2.4453, "step": 902000 }, { "epoch": 2.61, "learning_rate": 4.86943022878844e-05, "loss": 2.4858, "step": 902500 }, { "epoch": 2.61, "learning_rate": 4.869357864023713e-05, "loss": 2.4187, "step": 903000 }, { "epoch": 2.62, "learning_rate": 4.869285499258985e-05, "loss": 2.4596, "step": 903500 }, { "epoch": 2.62, "learning_rate": 4.869213134494257e-05, "loss": 2.468, "step": 904000 }, { "epoch": 2.62, "learning_rate": 4.86914076972953e-05, "loss": 2.428, "step": 904500 }, { "epoch": 2.62, "learning_rate": 4.8690684049648024e-05, "loss": 2.4579, "step": 905000 }, { "epoch": 2.62, "learning_rate": 4.8689960402000746e-05, "loss": 2.4583, "step": 905500 }, { "epoch": 2.62, "learning_rate": 4.868923820164876e-05, "loss": 2.4549, "step": 906000 }, { "epoch": 2.62, "learning_rate": 4.8688514554001484e-05, "loss": 2.4625, "step": 906500 }, { "epoch": 2.63, "learning_rate": 4.8687792353649506e-05, "loss": 2.461, "step": 907000 }, { "epoch": 2.63, "learning_rate": 4.868706870600223e-05, "loss": 2.4467, "step": 907500 }, { "epoch": 2.63, "learning_rate": 4.868634505835495e-05, "loss": 2.4873, "step": 908000 }, { "epoch": 2.63, "learning_rate": 4.868562141070767e-05, "loss": 2.4587, "step": 908500 }, { "epoch": 2.63, "learning_rate": 4.8684897763060395e-05, "loss": 2.4456, "step": 909000 }, { "epoch": 2.63, "learning_rate": 4.868417556270841e-05, "loss": 2.4724, "step": 909500 }, { "epoch": 2.63, "learning_rate": 4.868345191506113e-05, "loss": 2.4145, "step": 910000 }, { "epoch": 2.64, "learning_rate": 4.8682728267413855e-05, "loss": 2.4492, "step": 910500 }, { "epoch": 2.64, "learning_rate": 4.868200461976658e-05, "loss": 2.4593, "step": 911000 }, { "epoch": 2.64, "learning_rate": 4.86812824194146e-05, "loss": 2.4748, "step": 911500 }, { "epoch": 2.64, "learning_rate": 4.868055877176733e-05, "loss": 2.4635, "step": 912000 }, { "epoch": 2.64, "learning_rate": 4.867983512412005e-05, "loss": 2.4113, "step": 912500 }, { "epoch": 2.64, "learning_rate": 4.867911147647277e-05, "loss": 2.4451, "step": 913000 }, { "epoch": 2.64, "learning_rate": 4.8678387828825495e-05, "loss": 2.4659, "step": 913500 }, { "epoch": 2.65, "learning_rate": 4.867766418117822e-05, "loss": 2.4472, "step": 914000 }, { "epoch": 2.65, "learning_rate": 4.867694053353094e-05, "loss": 2.4232, "step": 914500 }, { "epoch": 2.65, "learning_rate": 4.867621688588366e-05, "loss": 2.4305, "step": 915000 }, { "epoch": 2.65, "learning_rate": 4.8675493238236384e-05, "loss": 2.4543, "step": 915500 }, { "epoch": 2.65, "learning_rate": 4.867477103788441e-05, "loss": 2.4772, "step": 916000 }, { "epoch": 2.65, "learning_rate": 4.867404739023713e-05, "loss": 2.4502, "step": 916500 }, { "epoch": 2.65, "learning_rate": 4.867332374258985e-05, "loss": 2.4392, "step": 917000 }, { "epoch": 2.66, "learning_rate": 4.8672600094942573e-05, "loss": 2.425, "step": 917500 }, { "epoch": 2.66, "learning_rate": 4.8671876447295296e-05, "loss": 2.4573, "step": 918000 }, { "epoch": 2.66, "learning_rate": 4.867115279964802e-05, "loss": 2.4606, "step": 918500 }, { "epoch": 2.66, "learning_rate": 4.867042915200075e-05, "loss": 2.4845, "step": 919000 }, { "epoch": 2.66, "learning_rate": 4.866970550435347e-05, "loss": 2.4546, "step": 919500 }, { "epoch": 2.66, "learning_rate": 4.8668983304001485e-05, "loss": 2.4337, "step": 920000 }, { "epoch": 2.66, "learning_rate": 4.866826110364951e-05, "loss": 2.4631, "step": 920500 }, { "epoch": 2.67, "learning_rate": 4.866753745600223e-05, "loss": 2.4756, "step": 921000 }, { "epoch": 2.67, "learning_rate": 4.866681380835495e-05, "loss": 2.4742, "step": 921500 }, { "epoch": 2.67, "learning_rate": 4.8666090160707674e-05, "loss": 2.4588, "step": 922000 }, { "epoch": 2.67, "learning_rate": 4.8665366513060396e-05, "loss": 2.4369, "step": 922500 }, { "epoch": 2.67, "learning_rate": 4.866464286541312e-05, "loss": 2.4665, "step": 923000 }, { "epoch": 2.67, "learning_rate": 4.866391921776584e-05, "loss": 2.4556, "step": 923500 }, { "epoch": 2.67, "learning_rate": 4.866319557011856e-05, "loss": 2.4519, "step": 924000 }, { "epoch": 2.68, "learning_rate": 4.866247336976658e-05, "loss": 2.4419, "step": 924500 }, { "epoch": 2.68, "learning_rate": 4.866174972211931e-05, "loss": 2.4378, "step": 925000 }, { "epoch": 2.68, "learning_rate": 4.866102752176732e-05, "loss": 2.4506, "step": 925500 }, { "epoch": 2.68, "learning_rate": 4.8660303874120045e-05, "loss": 2.4561, "step": 926000 }, { "epoch": 2.68, "learning_rate": 4.865958022647277e-05, "loss": 2.4601, "step": 926500 }, { "epoch": 2.68, "learning_rate": 4.8658856578825496e-05, "loss": 2.4544, "step": 927000 }, { "epoch": 2.68, "learning_rate": 4.865813293117822e-05, "loss": 2.434, "step": 927500 }, { "epoch": 2.69, "learning_rate": 4.865740928353094e-05, "loss": 2.4714, "step": 928000 }, { "epoch": 2.69, "learning_rate": 4.865668563588366e-05, "loss": 2.4271, "step": 928500 }, { "epoch": 2.69, "learning_rate": 4.8655961988236385e-05, "loss": 2.4399, "step": 929000 }, { "epoch": 2.69, "learning_rate": 4.865523834058911e-05, "loss": 2.4481, "step": 929500 }, { "epoch": 2.69, "learning_rate": 4.865451614023713e-05, "loss": 2.4669, "step": 930000 }, { "epoch": 2.69, "learning_rate": 4.865379249258985e-05, "loss": 2.4572, "step": 930500 }, { "epoch": 2.69, "learning_rate": 4.8653068844942574e-05, "loss": 2.4523, "step": 931000 }, { "epoch": 2.7, "learning_rate": 4.8652345197295296e-05, "loss": 2.4464, "step": 931500 }, { "epoch": 2.7, "learning_rate": 4.865162299694331e-05, "loss": 2.4635, "step": 932000 }, { "epoch": 2.7, "learning_rate": 4.8650900796591334e-05, "loss": 2.4644, "step": 932500 }, { "epoch": 2.7, "learning_rate": 4.8650177148944057e-05, "loss": 2.4424, "step": 933000 }, { "epoch": 2.7, "learning_rate": 4.864945350129678e-05, "loss": 2.4443, "step": 933500 }, { "epoch": 2.7, "learning_rate": 4.86487298536495e-05, "loss": 2.4308, "step": 934000 }, { "epoch": 2.7, "learning_rate": 4.864800620600223e-05, "loss": 2.4478, "step": 934500 }, { "epoch": 2.71, "learning_rate": 4.864728255835495e-05, "loss": 2.4514, "step": 935000 }, { "epoch": 2.71, "learning_rate": 4.8646558910707674e-05, "loss": 2.4682, "step": 935500 }, { "epoch": 2.71, "learning_rate": 4.86458352630604e-05, "loss": 2.4719, "step": 936000 }, { "epoch": 2.71, "learning_rate": 4.864511306270841e-05, "loss": 2.465, "step": 936500 }, { "epoch": 2.71, "learning_rate": 4.8644389415061135e-05, "loss": 2.4763, "step": 937000 }, { "epoch": 2.71, "learning_rate": 4.864366576741386e-05, "loss": 2.4436, "step": 937500 }, { "epoch": 2.72, "learning_rate": 4.8642942119766586e-05, "loss": 2.4721, "step": 938000 }, { "epoch": 2.72, "learning_rate": 4.8642221366709895e-05, "loss": 2.4606, "step": 938500 }, { "epoch": 2.72, "learning_rate": 4.864149771906262e-05, "loss": 2.4603, "step": 939000 }, { "epoch": 2.72, "learning_rate": 4.864077407141534e-05, "loss": 2.4776, "step": 939500 }, { "epoch": 2.72, "learning_rate": 4.864005042376806e-05, "loss": 2.4467, "step": 940000 }, { "epoch": 2.72, "learning_rate": 4.8639326776120784e-05, "loss": 2.455, "step": 940500 }, { "epoch": 2.72, "learning_rate": 4.8638603128473506e-05, "loss": 2.4479, "step": 941000 }, { "epoch": 2.73, "learning_rate": 4.8637879480826235e-05, "loss": 2.4769, "step": 941500 }, { "epoch": 2.73, "learning_rate": 4.863715583317896e-05, "loss": 2.4656, "step": 942000 }, { "epoch": 2.73, "learning_rate": 4.8636432185531686e-05, "loss": 2.4547, "step": 942500 }, { "epoch": 2.73, "learning_rate": 4.863570853788441e-05, "loss": 2.4633, "step": 943000 }, { "epoch": 2.73, "learning_rate": 4.863498489023713e-05, "loss": 2.4582, "step": 943500 }, { "epoch": 2.73, "learning_rate": 4.863426124258985e-05, "loss": 2.4589, "step": 944000 }, { "epoch": 2.73, "learning_rate": 4.8633537594942575e-05, "loss": 2.4509, "step": 944500 }, { "epoch": 2.74, "learning_rate": 4.863281539459059e-05, "loss": 2.4781, "step": 945000 }, { "epoch": 2.74, "learning_rate": 4.863209174694331e-05, "loss": 2.4617, "step": 945500 }, { "epoch": 2.74, "learning_rate": 4.8631369546591335e-05, "loss": 2.4691, "step": 946000 }, { "epoch": 2.74, "learning_rate": 4.863064589894406e-05, "loss": 2.4536, "step": 946500 }, { "epoch": 2.74, "learning_rate": 4.862992225129678e-05, "loss": 2.4258, "step": 947000 }, { "epoch": 2.74, "learning_rate": 4.86291986036495e-05, "loss": 2.4414, "step": 947500 }, { "epoch": 2.74, "learning_rate": 4.8628474956002224e-05, "loss": 2.4639, "step": 948000 }, { "epoch": 2.75, "learning_rate": 4.8627751308354946e-05, "loss": 2.4517, "step": 948500 }, { "epoch": 2.75, "learning_rate": 4.862702766070767e-05, "loss": 2.454, "step": 949000 }, { "epoch": 2.75, "learning_rate": 4.86263040130604e-05, "loss": 2.448, "step": 949500 }, { "epoch": 2.75, "learning_rate": 4.862558036541312e-05, "loss": 2.4216, "step": 950000 }, { "epoch": 2.75, "learning_rate": 4.8624858165061135e-05, "loss": 2.4298, "step": 950500 }, { "epoch": 2.75, "learning_rate": 4.862413451741386e-05, "loss": 2.4333, "step": 951000 }, { "epoch": 2.75, "learning_rate": 4.8623410869766587e-05, "loss": 2.4494, "step": 951500 }, { "epoch": 2.76, "learning_rate": 4.862268722211931e-05, "loss": 2.4553, "step": 952000 }, { "epoch": 2.76, "learning_rate": 4.8621965021767324e-05, "loss": 2.4588, "step": 952500 }, { "epoch": 2.76, "learning_rate": 4.8621241374120047e-05, "loss": 2.4468, "step": 953000 }, { "epoch": 2.76, "learning_rate": 4.862051772647277e-05, "loss": 2.4701, "step": 953500 }, { "epoch": 2.76, "learning_rate": 4.861979407882549e-05, "loss": 2.4475, "step": 954000 }, { "epoch": 2.76, "learning_rate": 4.861907043117821e-05, "loss": 2.4553, "step": 954500 }, { "epoch": 2.76, "learning_rate": 4.8618346783530936e-05, "loss": 2.4246, "step": 955000 }, { "epoch": 2.77, "learning_rate": 4.861762313588366e-05, "loss": 2.4622, "step": 955500 }, { "epoch": 2.77, "learning_rate": 4.861689948823639e-05, "loss": 2.4683, "step": 956000 }, { "epoch": 2.77, "learning_rate": 4.8616178735179696e-05, "loss": 2.4612, "step": 956500 }, { "epoch": 2.77, "learning_rate": 4.8615455087532425e-05, "loss": 2.4624, "step": 957000 }, { "epoch": 2.77, "learning_rate": 4.861473143988515e-05, "loss": 2.4681, "step": 957500 }, { "epoch": 2.77, "learning_rate": 4.861400779223787e-05, "loss": 2.477, "step": 958000 }, { "epoch": 2.77, "learning_rate": 4.861328414459059e-05, "loss": 2.4291, "step": 958500 }, { "epoch": 2.78, "learning_rate": 4.8612561944238614e-05, "loss": 2.4417, "step": 959000 }, { "epoch": 2.78, "learning_rate": 4.8611838296591336e-05, "loss": 2.4448, "step": 959500 }, { "epoch": 2.78, "learning_rate": 4.861111464894406e-05, "loss": 2.42, "step": 960000 }, { "epoch": 2.78, "learning_rate": 4.861039100129678e-05, "loss": 2.4458, "step": 960500 }, { "epoch": 2.78, "learning_rate": 4.86096673536495e-05, "loss": 2.4689, "step": 961000 }, { "epoch": 2.78, "learning_rate": 4.8608943706002225e-05, "loss": 2.45, "step": 961500 }, { "epoch": 2.78, "learning_rate": 4.860822005835495e-05, "loss": 2.439, "step": 962000 }, { "epoch": 2.79, "learning_rate": 4.860749641070767e-05, "loss": 2.4277, "step": 962500 }, { "epoch": 2.79, "learning_rate": 4.8606775657650985e-05, "loss": 2.4394, "step": 963000 }, { "epoch": 2.79, "learning_rate": 4.860605201000371e-05, "loss": 2.446, "step": 963500 }, { "epoch": 2.79, "learning_rate": 4.860532836235643e-05, "loss": 2.4517, "step": 964000 }, { "epoch": 2.79, "learning_rate": 4.860460471470916e-05, "loss": 2.4274, "step": 964500 }, { "epoch": 2.79, "learning_rate": 4.860388106706188e-05, "loss": 2.4332, "step": 965000 }, { "epoch": 2.79, "learning_rate": 4.86031574194146e-05, "loss": 2.4417, "step": 965500 }, { "epoch": 2.8, "learning_rate": 4.8602433771767325e-05, "loss": 2.4321, "step": 966000 }, { "epoch": 2.8, "learning_rate": 4.860171012412005e-05, "loss": 2.4553, "step": 966500 }, { "epoch": 2.8, "learning_rate": 4.860098792376806e-05, "loss": 2.467, "step": 967000 }, { "epoch": 2.8, "learning_rate": 4.8600264276120785e-05, "loss": 2.4659, "step": 967500 }, { "epoch": 2.8, "learning_rate": 4.8599540628473514e-05, "loss": 2.4483, "step": 968000 }, { "epoch": 2.8, "learning_rate": 4.8598816980826236e-05, "loss": 2.4443, "step": 968500 }, { "epoch": 2.8, "learning_rate": 4.859809333317896e-05, "loss": 2.4585, "step": 969000 }, { "epoch": 2.81, "learning_rate": 4.859736968553168e-05, "loss": 2.4495, "step": 969500 }, { "epoch": 2.81, "learning_rate": 4.859664893247499e-05, "loss": 2.4341, "step": 970000 }, { "epoch": 2.81, "learning_rate": 4.859592528482771e-05, "loss": 2.4353, "step": 970500 }, { "epoch": 2.81, "learning_rate": 4.8595201637180434e-05, "loss": 2.4307, "step": 971000 }, { "epoch": 2.81, "learning_rate": 4.859447798953316e-05, "loss": 2.4511, "step": 971500 }, { "epoch": 2.81, "learning_rate": 4.8593754341885885e-05, "loss": 2.4669, "step": 972000 }, { "epoch": 2.81, "learning_rate": 4.8593030694238614e-05, "loss": 2.4567, "step": 972500 }, { "epoch": 2.82, "learning_rate": 4.859230704659134e-05, "loss": 2.4674, "step": 973000 }, { "epoch": 2.82, "learning_rate": 4.859158339894406e-05, "loss": 2.4895, "step": 973500 }, { "epoch": 2.82, "learning_rate": 4.859085975129678e-05, "loss": 2.4793, "step": 974000 }, { "epoch": 2.82, "learning_rate": 4.85901375509448e-05, "loss": 2.4346, "step": 974500 }, { "epoch": 2.82, "learning_rate": 4.858941390329752e-05, "loss": 2.4512, "step": 975000 }, { "epoch": 2.82, "learning_rate": 4.858869025565024e-05, "loss": 2.455, "step": 975500 }, { "epoch": 2.83, "learning_rate": 4.8587966608002963e-05, "loss": 2.4627, "step": 976000 }, { "epoch": 2.83, "learning_rate": 4.8587244407650986e-05, "loss": 2.4585, "step": 976500 }, { "epoch": 2.83, "learning_rate": 4.858652076000371e-05, "loss": 2.4504, "step": 977000 }, { "epoch": 2.83, "learning_rate": 4.8585798559651724e-05, "loss": 2.4254, "step": 977500 }, { "epoch": 2.83, "learning_rate": 4.8585074912004446e-05, "loss": 2.438, "step": 978000 }, { "epoch": 2.83, "learning_rate": 4.858435126435717e-05, "loss": 2.4483, "step": 978500 }, { "epoch": 2.83, "learning_rate": 4.858362761670989e-05, "loss": 2.4106, "step": 979000 }, { "epoch": 2.84, "learning_rate": 4.858290396906262e-05, "loss": 2.4446, "step": 979500 }, { "epoch": 2.84, "learning_rate": 4.858218032141534e-05, "loss": 2.4544, "step": 980000 }, { "epoch": 2.84, "learning_rate": 4.8581456673768064e-05, "loss": 2.4631, "step": 980500 }, { "epoch": 2.84, "learning_rate": 4.858073302612079e-05, "loss": 2.4517, "step": 981000 }, { "epoch": 2.84, "learning_rate": 4.8580009378473515e-05, "loss": 2.4564, "step": 981500 }, { "epoch": 2.84, "learning_rate": 4.857928717812153e-05, "loss": 2.4526, "step": 982000 }, { "epoch": 2.84, "learning_rate": 4.857856353047425e-05, "loss": 2.4403, "step": 982500 }, { "epoch": 2.85, "learning_rate": 4.8577839882826975e-05, "loss": 2.4405, "step": 983000 }, { "epoch": 2.85, "learning_rate": 4.85771162351797e-05, "loss": 2.4396, "step": 983500 }, { "epoch": 2.85, "learning_rate": 4.857639403482771e-05, "loss": 2.4476, "step": 984000 }, { "epoch": 2.85, "learning_rate": 4.857567038718044e-05, "loss": 2.4527, "step": 984500 }, { "epoch": 2.85, "learning_rate": 4.8574946739533164e-05, "loss": 2.4661, "step": 985000 }, { "epoch": 2.85, "learning_rate": 4.8574223091885886e-05, "loss": 2.4549, "step": 985500 }, { "epoch": 2.85, "learning_rate": 4.857349944423861e-05, "loss": 2.4619, "step": 986000 }, { "epoch": 2.86, "learning_rate": 4.8572777243886624e-05, "loss": 2.4462, "step": 986500 }, { "epoch": 2.86, "learning_rate": 4.8572053596239346e-05, "loss": 2.4388, "step": 987000 }, { "epoch": 2.86, "learning_rate": 4.8571329948592075e-05, "loss": 2.453, "step": 987500 }, { "epoch": 2.86, "learning_rate": 4.85706063009448e-05, "loss": 2.4359, "step": 988000 }, { "epoch": 2.86, "learning_rate": 4.856988410059281e-05, "loss": 2.458, "step": 988500 }, { "epoch": 2.86, "learning_rate": 4.8569161900240835e-05, "loss": 2.4579, "step": 989000 }, { "epoch": 2.86, "learning_rate": 4.856843825259356e-05, "loss": 2.4353, "step": 989500 }, { "epoch": 2.87, "learning_rate": 4.856771605224157e-05, "loss": 2.4816, "step": 990000 }, { "epoch": 2.87, "learning_rate": 4.8566992404594295e-05, "loss": 2.4242, "step": 990500 }, { "epoch": 2.87, "learning_rate": 4.856626875694702e-05, "loss": 2.4759, "step": 991000 }, { "epoch": 2.87, "learning_rate": 4.856554510929974e-05, "loss": 2.4336, "step": 991500 }, { "epoch": 2.87, "learning_rate": 4.856482146165246e-05, "loss": 2.4736, "step": 992000 }, { "epoch": 2.87, "learning_rate": 4.856409781400519e-05, "loss": 2.4378, "step": 992500 }, { "epoch": 2.87, "learning_rate": 4.856337416635791e-05, "loss": 2.4494, "step": 993000 }, { "epoch": 2.88, "learning_rate": 4.8562650518710636e-05, "loss": 2.4686, "step": 993500 }, { "epoch": 2.88, "learning_rate": 4.856192687106336e-05, "loss": 2.4552, "step": 994000 }, { "epoch": 2.88, "learning_rate": 4.8561204670711373e-05, "loss": 2.4392, "step": 994500 }, { "epoch": 2.88, "learning_rate": 4.8560482470359396e-05, "loss": 2.444, "step": 995000 }, { "epoch": 2.88, "learning_rate": 4.855975882271212e-05, "loss": 2.4615, "step": 995500 }, { "epoch": 2.88, "learning_rate": 4.855903517506484e-05, "loss": 2.4409, "step": 996000 }, { "epoch": 2.88, "learning_rate": 4.855831152741757e-05, "loss": 2.4721, "step": 996500 }, { "epoch": 2.89, "learning_rate": 4.855758787977029e-05, "loss": 2.4188, "step": 997000 }, { "epoch": 2.89, "learning_rate": 4.8556864232123014e-05, "loss": 2.4752, "step": 997500 }, { "epoch": 2.89, "learning_rate": 4.8556140584475736e-05, "loss": 2.4554, "step": 998000 }, { "epoch": 2.89, "learning_rate": 4.855541693682846e-05, "loss": 2.4502, "step": 998500 }, { "epoch": 2.89, "learning_rate": 4.855469328918118e-05, "loss": 2.469, "step": 999000 }, { "epoch": 2.89, "learning_rate": 4.85539696415339e-05, "loss": 2.4287, "step": 999500 }, { "epoch": 2.89, "learning_rate": 4.8553245993886625e-05, "loss": 2.4789, "step": 1000000 }, { "epoch": 2.9, "learning_rate": 4.855252234623935e-05, "loss": 2.4481, "step": 1000500 }, { "epoch": 2.9, "learning_rate": 4.855179869859207e-05, "loss": 2.4394, "step": 1001000 }, { "epoch": 2.9, "learning_rate": 4.855107505094479e-05, "loss": 2.4331, "step": 1001500 }, { "epoch": 2.9, "learning_rate": 4.855035140329752e-05, "loss": 2.4554, "step": 1002000 }, { "epoch": 2.9, "learning_rate": 4.854962775565024e-05, "loss": 2.4142, "step": 1002500 }, { "epoch": 2.9, "learning_rate": 4.8548904108002965e-05, "loss": 2.4857, "step": 1003000 }, { "epoch": 2.9, "learning_rate": 4.8548180460355694e-05, "loss": 2.437, "step": 1003500 }, { "epoch": 2.91, "learning_rate": 4.854745826000371e-05, "loss": 2.4008, "step": 1004000 }, { "epoch": 2.91, "learning_rate": 4.854673461235643e-05, "loss": 2.4431, "step": 1004500 }, { "epoch": 2.91, "learning_rate": 4.8546010964709154e-05, "loss": 2.4424, "step": 1005000 }, { "epoch": 2.91, "learning_rate": 4.8545287317061876e-05, "loss": 2.454, "step": 1005500 }, { "epoch": 2.91, "learning_rate": 4.85445636694146e-05, "loss": 2.4359, "step": 1006000 }, { "epoch": 2.91, "learning_rate": 4.854384002176732e-05, "loss": 2.4597, "step": 1006500 }, { "epoch": 2.91, "learning_rate": 4.854311782141534e-05, "loss": 2.4399, "step": 1007000 }, { "epoch": 2.92, "learning_rate": 4.8542394173768065e-05, "loss": 2.4619, "step": 1007500 }, { "epoch": 2.92, "learning_rate": 4.854167052612079e-05, "loss": 2.4393, "step": 1008000 }, { "epoch": 2.92, "learning_rate": 4.85409483257688e-05, "loss": 2.4616, "step": 1008500 }, { "epoch": 2.92, "learning_rate": 4.8540224678121525e-05, "loss": 2.4497, "step": 1009000 }, { "epoch": 2.92, "learning_rate": 4.8539501030474254e-05, "loss": 2.4246, "step": 1009500 }, { "epoch": 2.92, "learning_rate": 4.8538777382826977e-05, "loss": 2.4532, "step": 1010000 }, { "epoch": 2.92, "learning_rate": 4.85380537351797e-05, "loss": 2.4217, "step": 1010500 }, { "epoch": 2.93, "learning_rate": 4.853733153482772e-05, "loss": 2.4443, "step": 1011000 }, { "epoch": 2.93, "learning_rate": 4.853660788718044e-05, "loss": 2.4778, "step": 1011500 }, { "epoch": 2.93, "learning_rate": 4.8535884239533166e-05, "loss": 2.4233, "step": 1012000 }, { "epoch": 2.93, "learning_rate": 4.853516059188589e-05, "loss": 2.4459, "step": 1012500 }, { "epoch": 2.93, "learning_rate": 4.853443694423861e-05, "loss": 2.4646, "step": 1013000 }, { "epoch": 2.93, "learning_rate": 4.853371329659133e-05, "loss": 2.4486, "step": 1013500 }, { "epoch": 2.94, "learning_rate": 4.8532989648944055e-05, "loss": 2.461, "step": 1014000 }, { "epoch": 2.94, "learning_rate": 4.853226600129678e-05, "loss": 2.4285, "step": 1014500 }, { "epoch": 2.94, "learning_rate": 4.853154380094479e-05, "loss": 2.4527, "step": 1015000 }, { "epoch": 2.94, "learning_rate": 4.853082015329752e-05, "loss": 2.431, "step": 1015500 }, { "epoch": 2.94, "learning_rate": 4.8530096505650244e-05, "loss": 2.4355, "step": 1016000 }, { "epoch": 2.94, "learning_rate": 4.8529372858002966e-05, "loss": 2.4793, "step": 1016500 }, { "epoch": 2.94, "learning_rate": 4.8528649210355695e-05, "loss": 2.4321, "step": 1017000 }, { "epoch": 2.95, "learning_rate": 4.852792556270842e-05, "loss": 2.4442, "step": 1017500 }, { "epoch": 2.95, "learning_rate": 4.852720336235643e-05, "loss": 2.4496, "step": 1018000 }, { "epoch": 2.95, "learning_rate": 4.8526479714709155e-05, "loss": 2.4377, "step": 1018500 }, { "epoch": 2.95, "learning_rate": 4.852575606706188e-05, "loss": 2.4384, "step": 1019000 }, { "epoch": 2.95, "learning_rate": 4.85250324194146e-05, "loss": 2.4626, "step": 1019500 }, { "epoch": 2.95, "learning_rate": 4.852430877176732e-05, "loss": 2.438, "step": 1020000 }, { "epoch": 2.95, "learning_rate": 4.8523585124120044e-05, "loss": 2.4431, "step": 1020500 }, { "epoch": 2.96, "learning_rate": 4.852286147647277e-05, "loss": 2.4611, "step": 1021000 }, { "epoch": 2.96, "learning_rate": 4.8522137828825495e-05, "loss": 2.4586, "step": 1021500 }, { "epoch": 2.96, "learning_rate": 4.852141562847351e-05, "loss": 2.4446, "step": 1022000 }, { "epoch": 2.96, "learning_rate": 4.852069198082623e-05, "loss": 2.4435, "step": 1022500 }, { "epoch": 2.96, "learning_rate": 4.8519968333178955e-05, "loss": 2.4525, "step": 1023000 }, { "epoch": 2.96, "learning_rate": 4.851924468553168e-05, "loss": 2.4259, "step": 1023500 }, { "epoch": 2.96, "learning_rate": 4.85185210378844e-05, "loss": 2.4355, "step": 1024000 }, { "epoch": 2.97, "learning_rate": 4.851779739023713e-05, "loss": 2.4419, "step": 1024500 }, { "epoch": 2.97, "learning_rate": 4.851707374258985e-05, "loss": 2.45, "step": 1025000 }, { "epoch": 2.97, "learning_rate": 4.851635009494257e-05, "loss": 2.4389, "step": 1025500 }, { "epoch": 2.97, "learning_rate": 4.8515627894590595e-05, "loss": 2.4503, "step": 1026000 }, { "epoch": 2.97, "learning_rate": 4.851490424694332e-05, "loss": 2.4466, "step": 1026500 }, { "epoch": 2.97, "learning_rate": 4.851418059929604e-05, "loss": 2.4771, "step": 1027000 }, { "epoch": 2.97, "learning_rate": 4.851345695164876e-05, "loss": 2.4416, "step": 1027500 }, { "epoch": 2.98, "learning_rate": 4.8512733304001484e-05, "loss": 2.4252, "step": 1028000 }, { "epoch": 2.98, "learning_rate": 4.85120111036495e-05, "loss": 2.4382, "step": 1028500 }, { "epoch": 2.98, "learning_rate": 4.851128745600222e-05, "loss": 2.4497, "step": 1029000 }, { "epoch": 2.98, "learning_rate": 4.8510563808354944e-05, "loss": 2.4864, "step": 1029500 }, { "epoch": 2.98, "learning_rate": 4.850984016070767e-05, "loss": 2.4526, "step": 1030000 }, { "epoch": 2.98, "learning_rate": 4.8509116513060396e-05, "loss": 2.4475, "step": 1030500 }, { "epoch": 2.98, "learning_rate": 4.850839286541312e-05, "loss": 2.4792, "step": 1031000 }, { "epoch": 2.99, "learning_rate": 4.850766921776585e-05, "loss": 2.4451, "step": 1031500 }, { "epoch": 2.99, "learning_rate": 4.850694557011857e-05, "loss": 2.4516, "step": 1032000 }, { "epoch": 2.99, "learning_rate": 4.850622481706188e-05, "loss": 2.4405, "step": 1032500 }, { "epoch": 2.99, "learning_rate": 4.85055011694146e-05, "loss": 2.4382, "step": 1033000 }, { "epoch": 2.99, "learning_rate": 4.850477896906262e-05, "loss": 2.4564, "step": 1033500 }, { "epoch": 2.99, "learning_rate": 4.8504055321415345e-05, "loss": 2.4371, "step": 1034000 }, { "epoch": 2.99, "learning_rate": 4.850333167376807e-05, "loss": 2.4276, "step": 1034500 }, { "epoch": 3.0, "learning_rate": 4.850260802612079e-05, "loss": 2.4658, "step": 1035000 }, { "epoch": 3.0, "learning_rate": 4.850188437847351e-05, "loss": 2.4655, "step": 1035500 }, { "epoch": 3.0, "learning_rate": 4.8501160730826234e-05, "loss": 2.4413, "step": 1036000 }, { "epoch": 3.0, "eval_accuracy": 0.6356301080160491, "eval_accuracy_mlm": 0.5974497137172354, "eval_accuracy_nsp": 0.840544838743681, "eval_loss": 2.409207344055176, "eval_runtime": 330.6147, "eval_samples_per_second": 1319.923, "eval_steps_per_second": 54.998, "step": 1036416 }, { "epoch": 3.0, "learning_rate": 4.850043853047425e-05, "loss": 2.4452, "step": 1036500 }, { "epoch": 3.0, "learning_rate": 4.849971488282697e-05, "loss": 2.3943, "step": 1037000 }, { "epoch": 3.0, "learning_rate": 4.84989912351797e-05, "loss": 2.4045, "step": 1037500 }, { "epoch": 3.0, "learning_rate": 4.849826758753242e-05, "loss": 2.417, "step": 1038000 }, { "epoch": 3.01, "learning_rate": 4.8497543939885145e-05, "loss": 2.414, "step": 1038500 }, { "epoch": 3.01, "learning_rate": 4.849682029223787e-05, "loss": 2.4318, "step": 1039000 }, { "epoch": 3.01, "learning_rate": 4.8496096644590596e-05, "loss": 2.4007, "step": 1039500 }, { "epoch": 3.01, "learning_rate": 4.849537444423861e-05, "loss": 2.4196, "step": 1040000 }, { "epoch": 3.01, "learning_rate": 4.8494650796591334e-05, "loss": 2.4055, "step": 1040500 }, { "epoch": 3.01, "learning_rate": 4.8493927148944056e-05, "loss": 2.3992, "step": 1041000 }, { "epoch": 3.01, "learning_rate": 4.849320350129678e-05, "loss": 2.4361, "step": 1041500 }, { "epoch": 3.02, "learning_rate": 4.84924798536495e-05, "loss": 2.4479, "step": 1042000 }, { "epoch": 3.02, "learning_rate": 4.849175620600222e-05, "loss": 2.432, "step": 1042500 }, { "epoch": 3.02, "learning_rate": 4.849103255835495e-05, "loss": 2.4275, "step": 1043000 }, { "epoch": 3.02, "learning_rate": 4.8490308910707674e-05, "loss": 2.4418, "step": 1043500 }, { "epoch": 3.02, "learning_rate": 4.8489585263060396e-05, "loss": 2.4165, "step": 1044000 }, { "epoch": 3.02, "learning_rate": 4.848886161541312e-05, "loss": 2.4225, "step": 1044500 }, { "epoch": 3.02, "learning_rate": 4.8488139415061134e-05, "loss": 2.4336, "step": 1045000 }, { "epoch": 3.03, "learning_rate": 4.8487415767413856e-05, "loss": 2.4391, "step": 1045500 }, { "epoch": 3.03, "learning_rate": 4.848669211976658e-05, "loss": 2.418, "step": 1046000 }, { "epoch": 3.03, "learning_rate": 4.84859699194146e-05, "loss": 2.4402, "step": 1046500 }, { "epoch": 3.03, "learning_rate": 4.848524627176732e-05, "loss": 2.4287, "step": 1047000 }, { "epoch": 3.03, "learning_rate": 4.848452262412005e-05, "loss": 2.4331, "step": 1047500 }, { "epoch": 3.03, "learning_rate": 4.8483798976472774e-05, "loss": 2.4332, "step": 1048000 }, { "epoch": 3.03, "learning_rate": 4.8483075328825497e-05, "loss": 2.4311, "step": 1048500 }, { "epoch": 3.04, "learning_rate": 4.848235168117822e-05, "loss": 2.4193, "step": 1049000 }, { "epoch": 3.04, "learning_rate": 4.848162803353094e-05, "loss": 2.4216, "step": 1049500 }, { "epoch": 3.04, "learning_rate": 4.848090438588366e-05, "loss": 2.4298, "step": 1050000 }, { "epoch": 3.04, "learning_rate": 4.848018218553168e-05, "loss": 2.4346, "step": 1050500 }, { "epoch": 3.04, "learning_rate": 4.84794599851797e-05, "loss": 2.4377, "step": 1051000 }, { "epoch": 3.04, "learning_rate": 4.8478736337532423e-05, "loss": 2.4087, "step": 1051500 }, { "epoch": 3.05, "learning_rate": 4.8478012689885146e-05, "loss": 2.4181, "step": 1052000 }, { "epoch": 3.05, "learning_rate": 4.847728904223787e-05, "loss": 2.4182, "step": 1052500 }, { "epoch": 3.05, "learning_rate": 4.847656539459059e-05, "loss": 2.4013, "step": 1053000 }, { "epoch": 3.05, "learning_rate": 4.847584174694331e-05, "loss": 2.4271, "step": 1053500 }, { "epoch": 3.05, "learning_rate": 4.8475118099296035e-05, "loss": 2.4351, "step": 1054000 }, { "epoch": 3.05, "learning_rate": 4.8474394451648764e-05, "loss": 2.4035, "step": 1054500 }, { "epoch": 3.05, "learning_rate": 4.847367225129678e-05, "loss": 2.4358, "step": 1055000 }, { "epoch": 3.06, "learning_rate": 4.84729486036495e-05, "loss": 2.4037, "step": 1055500 }, { "epoch": 3.06, "learning_rate": 4.8472224956002224e-05, "loss": 2.4436, "step": 1056000 }, { "epoch": 3.06, "learning_rate": 4.847150130835495e-05, "loss": 2.4227, "step": 1056500 }, { "epoch": 3.06, "learning_rate": 4.8470777660707675e-05, "loss": 2.4031, "step": 1057000 }, { "epoch": 3.06, "learning_rate": 4.84700540130604e-05, "loss": 2.4215, "step": 1057500 }, { "epoch": 3.06, "learning_rate": 4.846933036541312e-05, "loss": 2.4342, "step": 1058000 }, { "epoch": 3.06, "learning_rate": 4.846860671776584e-05, "loss": 2.423, "step": 1058500 }, { "epoch": 3.07, "learning_rate": 4.846788451741386e-05, "loss": 2.4351, "step": 1059000 }, { "epoch": 3.07, "learning_rate": 4.846716086976658e-05, "loss": 2.4426, "step": 1059500 }, { "epoch": 3.07, "learning_rate": 4.84664372221193e-05, "loss": 2.4297, "step": 1060000 }, { "epoch": 3.07, "learning_rate": 4.8465713574472024e-05, "loss": 2.434, "step": 1060500 }, { "epoch": 3.07, "learning_rate": 4.8464991374120046e-05, "loss": 2.4293, "step": 1061000 }, { "epoch": 3.07, "learning_rate": 4.8464270621063355e-05, "loss": 2.4548, "step": 1061500 }, { "epoch": 3.07, "learning_rate": 4.846354697341608e-05, "loss": 2.4127, "step": 1062000 }, { "epoch": 3.08, "learning_rate": 4.8462823325768806e-05, "loss": 2.4312, "step": 1062500 }, { "epoch": 3.08, "learning_rate": 4.846209967812153e-05, "loss": 2.4466, "step": 1063000 }, { "epoch": 3.08, "learning_rate": 4.846137603047425e-05, "loss": 2.4456, "step": 1063500 }, { "epoch": 3.08, "learning_rate": 4.846065238282698e-05, "loss": 2.4025, "step": 1064000 }, { "epoch": 3.08, "learning_rate": 4.84599287351797e-05, "loss": 2.4439, "step": 1064500 }, { "epoch": 3.08, "learning_rate": 4.8459205087532424e-05, "loss": 2.4462, "step": 1065000 }, { "epoch": 3.08, "learning_rate": 4.845848288718044e-05, "loss": 2.4142, "step": 1065500 }, { "epoch": 3.09, "learning_rate": 4.845775923953316e-05, "loss": 2.4313, "step": 1066000 }, { "epoch": 3.09, "learning_rate": 4.8457035591885884e-05, "loss": 2.4297, "step": 1066500 }, { "epoch": 3.09, "learning_rate": 4.8456311944238606e-05, "loss": 2.4325, "step": 1067000 }, { "epoch": 3.09, "learning_rate": 4.845558829659133e-05, "loss": 2.4212, "step": 1067500 }, { "epoch": 3.09, "learning_rate": 4.845486609623935e-05, "loss": 2.4053, "step": 1068000 }, { "epoch": 3.09, "learning_rate": 4.845414244859207e-05, "loss": 2.4469, "step": 1068500 }, { "epoch": 3.09, "learning_rate": 4.845342024824009e-05, "loss": 2.4292, "step": 1069000 }, { "epoch": 3.1, "learning_rate": 4.845269660059281e-05, "loss": 2.4385, "step": 1069500 }, { "epoch": 3.1, "learning_rate": 4.845197295294554e-05, "loss": 2.4314, "step": 1070000 }, { "epoch": 3.1, "learning_rate": 4.845124930529826e-05, "loss": 2.4111, "step": 1070500 }, { "epoch": 3.1, "learning_rate": 4.8450525657650985e-05, "loss": 2.4339, "step": 1071000 }, { "epoch": 3.1, "learning_rate": 4.844980201000371e-05, "loss": 2.4236, "step": 1071500 }, { "epoch": 3.1, "learning_rate": 4.844907836235643e-05, "loss": 2.4201, "step": 1072000 }, { "epoch": 3.1, "learning_rate": 4.844835471470915e-05, "loss": 2.4308, "step": 1072500 }, { "epoch": 3.11, "learning_rate": 4.844763106706188e-05, "loss": 2.415, "step": 1073000 }, { "epoch": 3.11, "learning_rate": 4.84469074194146e-05, "loss": 2.4171, "step": 1073500 }, { "epoch": 3.11, "learning_rate": 4.8446183771767325e-05, "loss": 2.4266, "step": 1074000 }, { "epoch": 3.11, "learning_rate": 4.844546012412005e-05, "loss": 2.4144, "step": 1074500 }, { "epoch": 3.11, "learning_rate": 4.844473792376806e-05, "loss": 2.4512, "step": 1075000 }, { "epoch": 3.11, "learning_rate": 4.8444014276120785e-05, "loss": 2.4423, "step": 1075500 }, { "epoch": 3.11, "learning_rate": 4.844329062847351e-05, "loss": 2.4201, "step": 1076000 }, { "epoch": 3.12, "learning_rate": 4.844256698082623e-05, "loss": 2.4084, "step": 1076500 }, { "epoch": 3.12, "learning_rate": 4.844184333317896e-05, "loss": 2.42, "step": 1077000 }, { "epoch": 3.12, "learning_rate": 4.844112113282698e-05, "loss": 2.4122, "step": 1077500 }, { "epoch": 3.12, "learning_rate": 4.84403974851797e-05, "loss": 2.408, "step": 1078000 }, { "epoch": 3.12, "learning_rate": 4.8439673837532425e-05, "loss": 2.4049, "step": 1078500 }, { "epoch": 3.12, "learning_rate": 4.843895018988515e-05, "loss": 2.4484, "step": 1079000 }, { "epoch": 3.12, "learning_rate": 4.843822654223787e-05, "loss": 2.4219, "step": 1079500 }, { "epoch": 3.13, "learning_rate": 4.843750289459059e-05, "loss": 2.4253, "step": 1080000 }, { "epoch": 3.13, "learning_rate": 4.843678069423861e-05, "loss": 2.4329, "step": 1080500 }, { "epoch": 3.13, "learning_rate": 4.843605704659133e-05, "loss": 2.4303, "step": 1081000 }, { "epoch": 3.13, "learning_rate": 4.843533339894405e-05, "loss": 2.4306, "step": 1081500 }, { "epoch": 3.13, "learning_rate": 4.843460975129678e-05, "loss": 2.436, "step": 1082000 }, { "epoch": 3.13, "learning_rate": 4.84338861036495e-05, "loss": 2.4217, "step": 1082500 }, { "epoch": 3.13, "learning_rate": 4.8433162456002225e-05, "loss": 2.4221, "step": 1083000 }, { "epoch": 3.14, "learning_rate": 4.843243880835495e-05, "loss": 2.4217, "step": 1083500 }, { "epoch": 3.14, "learning_rate": 4.843171660800296e-05, "loss": 2.4286, "step": 1084000 }, { "epoch": 3.14, "learning_rate": 4.843099296035569e-05, "loss": 2.4251, "step": 1084500 }, { "epoch": 3.14, "learning_rate": 4.8430269312708414e-05, "loss": 2.4343, "step": 1085000 }, { "epoch": 3.14, "learning_rate": 4.8429545665061136e-05, "loss": 2.4556, "step": 1085500 }, { "epoch": 3.14, "learning_rate": 4.842882346470916e-05, "loss": 2.4046, "step": 1086000 }, { "epoch": 3.14, "learning_rate": 4.8428101264357174e-05, "loss": 2.4043, "step": 1086500 }, { "epoch": 3.15, "learning_rate": 4.8427377616709897e-05, "loss": 2.4409, "step": 1087000 }, { "epoch": 3.15, "learning_rate": 4.842665396906262e-05, "loss": 2.4579, "step": 1087500 }, { "epoch": 3.15, "learning_rate": 4.842593032141534e-05, "loss": 2.4121, "step": 1088000 }, { "epoch": 3.15, "learning_rate": 4.842520667376806e-05, "loss": 2.4464, "step": 1088500 }, { "epoch": 3.15, "learning_rate": 4.8424483026120786e-05, "loss": 2.4233, "step": 1089000 }, { "epoch": 3.15, "learning_rate": 4.842376082576881e-05, "loss": 2.4584, "step": 1089500 }, { "epoch": 3.16, "learning_rate": 4.842303717812153e-05, "loss": 2.4302, "step": 1090000 }, { "epoch": 3.16, "learning_rate": 4.842231353047425e-05, "loss": 2.4151, "step": 1090500 }, { "epoch": 3.16, "learning_rate": 4.8421589882826975e-05, "loss": 2.43, "step": 1091000 }, { "epoch": 3.16, "learning_rate": 4.84208662351797e-05, "loss": 2.4354, "step": 1091500 }, { "epoch": 3.16, "learning_rate": 4.8420142587532426e-05, "loss": 2.4394, "step": 1092000 }, { "epoch": 3.16, "learning_rate": 4.841941893988515e-05, "loss": 2.4332, "step": 1092500 }, { "epoch": 3.16, "learning_rate": 4.841869529223787e-05, "loss": 2.4345, "step": 1093000 }, { "epoch": 3.17, "learning_rate": 4.8417973091885886e-05, "loss": 2.4262, "step": 1093500 }, { "epoch": 3.17, "learning_rate": 4.841724944423861e-05, "loss": 2.4161, "step": 1094000 }, { "epoch": 3.17, "learning_rate": 4.841652724388663e-05, "loss": 2.4201, "step": 1094500 }, { "epoch": 3.17, "learning_rate": 4.841580359623935e-05, "loss": 2.432, "step": 1095000 }, { "epoch": 3.17, "learning_rate": 4.8415079948592075e-05, "loss": 2.4254, "step": 1095500 }, { "epoch": 3.17, "learning_rate": 4.84143563009448e-05, "loss": 2.4094, "step": 1096000 }, { "epoch": 3.17, "learning_rate": 4.841363265329752e-05, "loss": 2.4188, "step": 1096500 }, { "epoch": 3.18, "learning_rate": 4.841290900565024e-05, "loss": 2.4185, "step": 1097000 }, { "epoch": 3.18, "learning_rate": 4.841218680529826e-05, "loss": 2.4332, "step": 1097500 }, { "epoch": 3.18, "learning_rate": 4.841146315765098e-05, "loss": 2.4131, "step": 1098000 }, { "epoch": 3.18, "learning_rate": 4.841073951000371e-05, "loss": 2.4287, "step": 1098500 }, { "epoch": 3.18, "learning_rate": 4.841001586235643e-05, "loss": 2.451, "step": 1099000 }, { "epoch": 3.18, "learning_rate": 4.840929221470916e-05, "loss": 2.4399, "step": 1099500 }, { "epoch": 3.18, "learning_rate": 4.840856856706188e-05, "loss": 2.4287, "step": 1100000 }, { "epoch": 3.19, "learning_rate": 4.8407844919414604e-05, "loss": 2.4255, "step": 1100500 }, { "epoch": 3.19, "learning_rate": 4.8407121271767326e-05, "loss": 2.4205, "step": 1101000 }, { "epoch": 3.19, "learning_rate": 4.840639762412005e-05, "loss": 2.4328, "step": 1101500 }, { "epoch": 3.19, "learning_rate": 4.840567397647277e-05, "loss": 2.4476, "step": 1102000 }, { "epoch": 3.19, "learning_rate": 4.840495032882549e-05, "loss": 2.4451, "step": 1102500 }, { "epoch": 3.19, "learning_rate": 4.840422812847351e-05, "loss": 2.4246, "step": 1103000 }, { "epoch": 3.19, "learning_rate": 4.840350448082623e-05, "loss": 2.4083, "step": 1103500 }, { "epoch": 3.2, "learning_rate": 4.840278083317896e-05, "loss": 2.4306, "step": 1104000 }, { "epoch": 3.2, "learning_rate": 4.840205718553168e-05, "loss": 2.4391, "step": 1104500 }, { "epoch": 3.2, "learning_rate": 4.8401333537884404e-05, "loss": 2.4241, "step": 1105000 }, { "epoch": 3.2, "learning_rate": 4.8400609890237127e-05, "loss": 2.4067, "step": 1105500 }, { "epoch": 3.2, "learning_rate": 4.839988624258985e-05, "loss": 2.4417, "step": 1106000 }, { "epoch": 3.2, "learning_rate": 4.839916259494257e-05, "loss": 2.4489, "step": 1106500 }, { "epoch": 3.2, "learning_rate": 4.83984389472953e-05, "loss": 2.4237, "step": 1107000 }, { "epoch": 3.21, "learning_rate": 4.839771529964802e-05, "loss": 2.4424, "step": 1107500 }, { "epoch": 3.21, "learning_rate": 4.839699309929604e-05, "loss": 2.4198, "step": 1108000 }, { "epoch": 3.21, "learning_rate": 4.839626945164876e-05, "loss": 2.436, "step": 1108500 }, { "epoch": 3.21, "learning_rate": 4.839554580400148e-05, "loss": 2.4163, "step": 1109000 }, { "epoch": 3.21, "learning_rate": 4.839482215635421e-05, "loss": 2.4078, "step": 1109500 }, { "epoch": 3.21, "learning_rate": 4.8394098508706933e-05, "loss": 2.4216, "step": 1110000 }, { "epoch": 3.21, "learning_rate": 4.839337630835495e-05, "loss": 2.4037, "step": 1110500 }, { "epoch": 3.22, "learning_rate": 4.839265266070767e-05, "loss": 2.4294, "step": 1111000 }, { "epoch": 3.22, "learning_rate": 4.839193046035569e-05, "loss": 2.4004, "step": 1111500 }, { "epoch": 3.22, "learning_rate": 4.839120681270841e-05, "loss": 2.3921, "step": 1112000 }, { "epoch": 3.22, "learning_rate": 4.839048316506113e-05, "loss": 2.4397, "step": 1112500 }, { "epoch": 3.22, "learning_rate": 4.838975951741386e-05, "loss": 2.4385, "step": 1113000 }, { "epoch": 3.22, "learning_rate": 4.838903586976658e-05, "loss": 2.4428, "step": 1113500 }, { "epoch": 3.22, "learning_rate": 4.83883136694146e-05, "loss": 2.4383, "step": 1114000 }, { "epoch": 3.23, "learning_rate": 4.838759002176733e-05, "loss": 2.4422, "step": 1114500 }, { "epoch": 3.23, "learning_rate": 4.838686637412005e-05, "loss": 2.4426, "step": 1115000 }, { "epoch": 3.23, "learning_rate": 4.838614272647277e-05, "loss": 2.4225, "step": 1115500 }, { "epoch": 3.23, "learning_rate": 4.8385419078825494e-05, "loss": 2.4309, "step": 1116000 }, { "epoch": 3.23, "learning_rate": 4.838469687847351e-05, "loss": 2.4215, "step": 1116500 }, { "epoch": 3.23, "learning_rate": 4.838397323082624e-05, "loss": 2.4181, "step": 1117000 }, { "epoch": 3.23, "learning_rate": 4.838324958317896e-05, "loss": 2.3943, "step": 1117500 }, { "epoch": 3.24, "learning_rate": 4.838252593553168e-05, "loss": 2.4258, "step": 1118000 }, { "epoch": 3.24, "learning_rate": 4.83818037351797e-05, "loss": 2.4429, "step": 1118500 }, { "epoch": 3.24, "learning_rate": 4.838108008753242e-05, "loss": 2.4329, "step": 1119000 }, { "epoch": 3.24, "learning_rate": 4.838035643988514e-05, "loss": 2.4246, "step": 1119500 }, { "epoch": 3.24, "learning_rate": 4.8379632792237865e-05, "loss": 2.4321, "step": 1120000 }, { "epoch": 3.24, "learning_rate": 4.837890914459059e-05, "loss": 2.4159, "step": 1120500 }, { "epoch": 3.24, "learning_rate": 4.837818549694331e-05, "loss": 2.4185, "step": 1121000 }, { "epoch": 3.25, "learning_rate": 4.837746329659133e-05, "loss": 2.414, "step": 1121500 }, { "epoch": 3.25, "learning_rate": 4.837673964894406e-05, "loss": 2.4198, "step": 1122000 }, { "epoch": 3.25, "learning_rate": 4.837601600129678e-05, "loss": 2.4408, "step": 1122500 }, { "epoch": 3.25, "learning_rate": 4.83752938009448e-05, "loss": 2.4407, "step": 1123000 }, { "epoch": 3.25, "learning_rate": 4.837457015329752e-05, "loss": 2.4362, "step": 1123500 }, { "epoch": 3.25, "learning_rate": 4.837384650565024e-05, "loss": 2.4417, "step": 1124000 }, { "epoch": 3.25, "learning_rate": 4.8373122858002965e-05, "loss": 2.4111, "step": 1124500 }, { "epoch": 3.26, "learning_rate": 4.837240065765099e-05, "loss": 2.4214, "step": 1125000 }, { "epoch": 3.26, "learning_rate": 4.837167701000371e-05, "loss": 2.4135, "step": 1125500 }, { "epoch": 3.26, "learning_rate": 4.837095336235643e-05, "loss": 2.4503, "step": 1126000 }, { "epoch": 3.26, "learning_rate": 4.8370229714709154e-05, "loss": 2.4433, "step": 1126500 }, { "epoch": 3.26, "learning_rate": 4.836950606706188e-05, "loss": 2.4278, "step": 1127000 }, { "epoch": 3.26, "learning_rate": 4.83687824194146e-05, "loss": 2.4666, "step": 1127500 }, { "epoch": 3.27, "learning_rate": 4.836805877176732e-05, "loss": 2.4303, "step": 1128000 }, { "epoch": 3.27, "learning_rate": 4.836733512412004e-05, "loss": 2.3959, "step": 1128500 }, { "epoch": 3.27, "learning_rate": 4.8366611476472766e-05, "loss": 2.3983, "step": 1129000 }, { "epoch": 3.27, "learning_rate": 4.8365887828825495e-05, "loss": 2.4182, "step": 1129500 }, { "epoch": 3.27, "learning_rate": 4.836516418117822e-05, "loss": 2.4196, "step": 1130000 }, { "epoch": 3.27, "learning_rate": 4.836444198082624e-05, "loss": 2.4087, "step": 1130500 }, { "epoch": 3.27, "learning_rate": 4.8363719780474255e-05, "loss": 2.4292, "step": 1131000 }, { "epoch": 3.28, "learning_rate": 4.836299613282698e-05, "loss": 2.4288, "step": 1131500 }, { "epoch": 3.28, "learning_rate": 4.83622724851797e-05, "loss": 2.411, "step": 1132000 }, { "epoch": 3.28, "learning_rate": 4.836154883753242e-05, "loss": 2.4312, "step": 1132500 }, { "epoch": 3.28, "learning_rate": 4.8360825189885144e-05, "loss": 2.4307, "step": 1133000 }, { "epoch": 3.28, "learning_rate": 4.8360101542237866e-05, "loss": 2.4319, "step": 1133500 }, { "epoch": 3.28, "learning_rate": 4.835937789459059e-05, "loss": 2.4174, "step": 1134000 }, { "epoch": 3.28, "learning_rate": 4.835865424694331e-05, "loss": 2.4448, "step": 1134500 }, { "epoch": 3.29, "learning_rate": 4.835793059929604e-05, "loss": 2.4286, "step": 1135000 }, { "epoch": 3.29, "learning_rate": 4.835720695164876e-05, "loss": 2.4344, "step": 1135500 }, { "epoch": 3.29, "learning_rate": 4.835648475129678e-05, "loss": 2.3902, "step": 1136000 }, { "epoch": 3.29, "learning_rate": 4.83557611036495e-05, "loss": 2.4348, "step": 1136500 }, { "epoch": 3.29, "learning_rate": 4.835503890329752e-05, "loss": 2.4417, "step": 1137000 }, { "epoch": 3.29, "learning_rate": 4.8354315255650244e-05, "loss": 2.4511, "step": 1137500 }, { "epoch": 3.29, "learning_rate": 4.8353591608002966e-05, "loss": 2.4133, "step": 1138000 }, { "epoch": 3.3, "learning_rate": 4.835286796035569e-05, "loss": 2.4115, "step": 1138500 }, { "epoch": 3.3, "learning_rate": 4.835214431270841e-05, "loss": 2.4133, "step": 1139000 }, { "epoch": 3.3, "learning_rate": 4.835142066506114e-05, "loss": 2.4293, "step": 1139500 }, { "epoch": 3.3, "learning_rate": 4.835069701741386e-05, "loss": 2.4113, "step": 1140000 }, { "epoch": 3.3, "learning_rate": 4.8349973369766584e-05, "loss": 2.4502, "step": 1140500 }, { "epoch": 3.3, "learning_rate": 4.8349249722119306e-05, "loss": 2.4369, "step": 1141000 }, { "epoch": 3.3, "learning_rate": 4.834852752176732e-05, "loss": 2.4519, "step": 1141500 }, { "epoch": 3.31, "learning_rate": 4.8347803874120044e-05, "loss": 2.454, "step": 1142000 }, { "epoch": 3.31, "learning_rate": 4.8347080226472766e-05, "loss": 2.4145, "step": 1142500 }, { "epoch": 3.31, "learning_rate": 4.834635657882549e-05, "loss": 2.4184, "step": 1143000 }, { "epoch": 3.31, "learning_rate": 4.834563293117821e-05, "loss": 2.4382, "step": 1143500 }, { "epoch": 3.31, "learning_rate": 4.834490928353094e-05, "loss": 2.4054, "step": 1144000 }, { "epoch": 3.31, "learning_rate": 4.834418563588366e-05, "loss": 2.4211, "step": 1144500 }, { "epoch": 3.31, "learning_rate": 4.834346198823639e-05, "loss": 2.4061, "step": 1145000 }, { "epoch": 3.32, "learning_rate": 4.834273978788441e-05, "loss": 2.4193, "step": 1145500 }, { "epoch": 3.32, "learning_rate": 4.834201614023713e-05, "loss": 2.4293, "step": 1146000 }, { "epoch": 3.32, "learning_rate": 4.834129249258985e-05, "loss": 2.4431, "step": 1146500 }, { "epoch": 3.32, "learning_rate": 4.834056884494257e-05, "loss": 2.4383, "step": 1147000 }, { "epoch": 3.32, "learning_rate": 4.8339845197295296e-05, "loss": 2.411, "step": 1147500 }, { "epoch": 3.32, "learning_rate": 4.833912154964802e-05, "loss": 2.4335, "step": 1148000 }, { "epoch": 3.32, "learning_rate": 4.833839790200074e-05, "loss": 2.4163, "step": 1148500 }, { "epoch": 3.33, "learning_rate": 4.833767425435346e-05, "loss": 2.4231, "step": 1149000 }, { "epoch": 3.33, "learning_rate": 4.8336952054001485e-05, "loss": 2.4378, "step": 1149500 }, { "epoch": 3.33, "learning_rate": 4.833622840635421e-05, "loss": 2.4059, "step": 1150000 }, { "epoch": 3.33, "learning_rate": 4.833550475870693e-05, "loss": 2.4374, "step": 1150500 }, { "epoch": 3.33, "learning_rate": 4.833478111105965e-05, "loss": 2.4324, "step": 1151000 }, { "epoch": 3.33, "learning_rate": 4.833405746341238e-05, "loss": 2.4312, "step": 1151500 }, { "epoch": 3.33, "learning_rate": 4.8333335263060396e-05, "loss": 2.4337, "step": 1152000 }, { "epoch": 3.34, "learning_rate": 4.833261161541312e-05, "loss": 2.4512, "step": 1152500 }, { "epoch": 3.34, "learning_rate": 4.833188796776584e-05, "loss": 2.4142, "step": 1153000 }, { "epoch": 3.34, "learning_rate": 4.833116432011857e-05, "loss": 2.4261, "step": 1153500 }, { "epoch": 3.34, "learning_rate": 4.8330442119766585e-05, "loss": 2.4344, "step": 1154000 }, { "epoch": 3.34, "learning_rate": 4.832971847211931e-05, "loss": 2.4287, "step": 1154500 }, { "epoch": 3.34, "learning_rate": 4.832899482447203e-05, "loss": 2.4232, "step": 1155000 }, { "epoch": 3.34, "learning_rate": 4.832827117682475e-05, "loss": 2.4272, "step": 1155500 }, { "epoch": 3.35, "learning_rate": 4.8327547529177474e-05, "loss": 2.4335, "step": 1156000 }, { "epoch": 3.35, "learning_rate": 4.832682532882549e-05, "loss": 2.4176, "step": 1156500 }, { "epoch": 3.35, "learning_rate": 4.832610168117822e-05, "loss": 2.4352, "step": 1157000 }, { "epoch": 3.35, "learning_rate": 4.832537803353094e-05, "loss": 2.4218, "step": 1157500 }, { "epoch": 3.35, "learning_rate": 4.832465438588366e-05, "loss": 2.4126, "step": 1158000 }, { "epoch": 3.35, "learning_rate": 4.8323930738236385e-05, "loss": 2.4224, "step": 1158500 }, { "epoch": 3.35, "learning_rate": 4.8323207090589114e-05, "loss": 2.4306, "step": 1159000 }, { "epoch": 3.36, "learning_rate": 4.8322483442941836e-05, "loss": 2.435, "step": 1159500 }, { "epoch": 3.36, "learning_rate": 4.8321762689885145e-05, "loss": 2.4143, "step": 1160000 }, { "epoch": 3.36, "learning_rate": 4.832103904223787e-05, "loss": 2.4402, "step": 1160500 }, { "epoch": 3.36, "learning_rate": 4.832031539459059e-05, "loss": 2.4185, "step": 1161000 }, { "epoch": 3.36, "learning_rate": 4.831959174694332e-05, "loss": 2.4112, "step": 1161500 }, { "epoch": 3.36, "learning_rate": 4.831886809929604e-05, "loss": 2.4638, "step": 1162000 }, { "epoch": 3.36, "learning_rate": 4.831814445164876e-05, "loss": 2.4319, "step": 1162500 }, { "epoch": 3.37, "learning_rate": 4.8317420804001485e-05, "loss": 2.4416, "step": 1163000 }, { "epoch": 3.37, "learning_rate": 4.831669715635421e-05, "loss": 2.4471, "step": 1163500 }, { "epoch": 3.37, "learning_rate": 4.831597350870693e-05, "loss": 2.436, "step": 1164000 }, { "epoch": 3.37, "learning_rate": 4.8315251308354945e-05, "loss": 2.4093, "step": 1164500 }, { "epoch": 3.37, "learning_rate": 4.831452910800297e-05, "loss": 2.4335, "step": 1165000 }, { "epoch": 3.37, "learning_rate": 4.831380546035569e-05, "loss": 2.4101, "step": 1165500 }, { "epoch": 3.38, "learning_rate": 4.831308181270841e-05, "loss": 2.4077, "step": 1166000 }, { "epoch": 3.38, "learning_rate": 4.8312358165061134e-05, "loss": 2.4213, "step": 1166500 }, { "epoch": 3.38, "learning_rate": 4.8311634517413863e-05, "loss": 2.4343, "step": 1167000 }, { "epoch": 3.38, "learning_rate": 4.8310910869766586e-05, "loss": 2.4155, "step": 1167500 }, { "epoch": 3.38, "learning_rate": 4.831018722211931e-05, "loss": 2.418, "step": 1168000 }, { "epoch": 3.38, "learning_rate": 4.830946357447203e-05, "loss": 2.4335, "step": 1168500 }, { "epoch": 3.38, "learning_rate": 4.830873992682475e-05, "loss": 2.434, "step": 1169000 }, { "epoch": 3.39, "learning_rate": 4.830801772647277e-05, "loss": 2.4335, "step": 1169500 }, { "epoch": 3.39, "learning_rate": 4.830729842071138e-05, "loss": 2.4446, "step": 1170000 }, { "epoch": 3.39, "learning_rate": 4.83065747730641e-05, "loss": 2.4432, "step": 1170500 }, { "epoch": 3.39, "learning_rate": 4.830585112541682e-05, "loss": 2.447, "step": 1171000 }, { "epoch": 3.39, "learning_rate": 4.8305127477769544e-05, "loss": 2.4355, "step": 1171500 }, { "epoch": 3.39, "learning_rate": 4.8304403830122266e-05, "loss": 2.4295, "step": 1172000 }, { "epoch": 3.39, "learning_rate": 4.8303680182474995e-05, "loss": 2.4018, "step": 1172500 }, { "epoch": 3.4, "learning_rate": 4.830295653482772e-05, "loss": 2.4415, "step": 1173000 }, { "epoch": 3.4, "learning_rate": 4.830223288718044e-05, "loss": 2.4028, "step": 1173500 }, { "epoch": 3.4, "learning_rate": 4.830150923953316e-05, "loss": 2.4311, "step": 1174000 }, { "epoch": 3.4, "learning_rate": 4.830078559188589e-05, "loss": 2.444, "step": 1174500 }, { "epoch": 3.4, "learning_rate": 4.830006194423861e-05, "loss": 2.4408, "step": 1175000 }, { "epoch": 3.4, "learning_rate": 4.8299338296591335e-05, "loss": 2.46, "step": 1175500 }, { "epoch": 3.4, "learning_rate": 4.829861464894406e-05, "loss": 2.4365, "step": 1176000 }, { "epoch": 3.41, "learning_rate": 4.829789100129678e-05, "loss": 2.4417, "step": 1176500 }, { "epoch": 3.41, "learning_rate": 4.82971673536495e-05, "loss": 2.4429, "step": 1177000 }, { "epoch": 3.41, "learning_rate": 4.8296443706002224e-05, "loss": 2.4227, "step": 1177500 }, { "epoch": 3.41, "learning_rate": 4.8295721505650246e-05, "loss": 2.4069, "step": 1178000 }, { "epoch": 3.41, "learning_rate": 4.829499785800297e-05, "loss": 2.4409, "step": 1178500 }, { "epoch": 3.41, "learning_rate": 4.829427421035569e-05, "loss": 2.426, "step": 1179000 }, { "epoch": 3.41, "learning_rate": 4.829355056270841e-05, "loss": 2.426, "step": 1179500 }, { "epoch": 3.42, "learning_rate": 4.8292826915061135e-05, "loss": 2.4345, "step": 1180000 }, { "epoch": 3.42, "learning_rate": 4.829210326741386e-05, "loss": 2.4392, "step": 1180500 }, { "epoch": 3.42, "learning_rate": 4.829138106706187e-05, "loss": 2.4197, "step": 1181000 }, { "epoch": 3.42, "learning_rate": 4.8290657419414595e-05, "loss": 2.4217, "step": 1181500 }, { "epoch": 3.42, "learning_rate": 4.8289933771767324e-05, "loss": 2.4195, "step": 1182000 }, { "epoch": 3.42, "learning_rate": 4.828921157141535e-05, "loss": 2.4332, "step": 1182500 }, { "epoch": 3.42, "learning_rate": 4.828848937106336e-05, "loss": 2.4353, "step": 1183000 }, { "epoch": 3.43, "learning_rate": 4.8287765723416084e-05, "loss": 2.4537, "step": 1183500 }, { "epoch": 3.43, "learning_rate": 4.828704207576881e-05, "loss": 2.4294, "step": 1184000 }, { "epoch": 3.43, "learning_rate": 4.828631842812153e-05, "loss": 2.4077, "step": 1184500 }, { "epoch": 3.43, "learning_rate": 4.828559478047425e-05, "loss": 2.4219, "step": 1185000 }, { "epoch": 3.43, "learning_rate": 4.828487113282697e-05, "loss": 2.4401, "step": 1185500 }, { "epoch": 3.43, "learning_rate": 4.8284147485179696e-05, "loss": 2.4377, "step": 1186000 }, { "epoch": 3.43, "learning_rate": 4.828342383753242e-05, "loss": 2.4103, "step": 1186500 }, { "epoch": 3.44, "learning_rate": 4.828270018988515e-05, "loss": 2.4433, "step": 1187000 }, { "epoch": 3.44, "learning_rate": 4.828197798953316e-05, "loss": 2.4175, "step": 1187500 }, { "epoch": 3.44, "learning_rate": 4.8281254341885885e-05, "loss": 2.438, "step": 1188000 }, { "epoch": 3.44, "learning_rate": 4.828053069423861e-05, "loss": 2.4471, "step": 1188500 }, { "epoch": 3.44, "learning_rate": 4.827980704659133e-05, "loss": 2.4164, "step": 1189000 }, { "epoch": 3.44, "learning_rate": 4.827908339894406e-05, "loss": 2.4303, "step": 1189500 }, { "epoch": 3.44, "learning_rate": 4.827835975129678e-05, "loss": 2.4189, "step": 1190000 }, { "epoch": 3.45, "learning_rate": 4.82776361036495e-05, "loss": 2.4233, "step": 1190500 }, { "epoch": 3.45, "learning_rate": 4.8276912456002225e-05, "loss": 2.4196, "step": 1191000 }, { "epoch": 3.45, "learning_rate": 4.827618880835495e-05, "loss": 2.4229, "step": 1191500 }, { "epoch": 3.45, "learning_rate": 4.827546660800297e-05, "loss": 2.4396, "step": 1192000 }, { "epoch": 3.45, "learning_rate": 4.8274744407650985e-05, "loss": 2.4434, "step": 1192500 }, { "epoch": 3.45, "learning_rate": 4.827402076000371e-05, "loss": 2.4126, "step": 1193000 }, { "epoch": 3.45, "learning_rate": 4.827329711235643e-05, "loss": 2.4403, "step": 1193500 }, { "epoch": 3.46, "learning_rate": 4.827257346470915e-05, "loss": 2.4368, "step": 1194000 }, { "epoch": 3.46, "learning_rate": 4.8271849817061874e-05, "loss": 2.4116, "step": 1194500 }, { "epoch": 3.46, "learning_rate": 4.8271127616709896e-05, "loss": 2.451, "step": 1195000 }, { "epoch": 3.46, "learning_rate": 4.827040396906262e-05, "loss": 2.4131, "step": 1195500 }, { "epoch": 3.46, "learning_rate": 4.826968032141534e-05, "loss": 2.425, "step": 1196000 }, { "epoch": 3.46, "learning_rate": 4.826895667376806e-05, "loss": 2.4327, "step": 1196500 }, { "epoch": 3.46, "learning_rate": 4.826823447341608e-05, "loss": 2.4246, "step": 1197000 }, { "epoch": 3.47, "learning_rate": 4.826751082576881e-05, "loss": 2.4216, "step": 1197500 }, { "epoch": 3.47, "learning_rate": 4.826678717812153e-05, "loss": 2.4227, "step": 1198000 }, { "epoch": 3.47, "learning_rate": 4.826606353047425e-05, "loss": 2.4432, "step": 1198500 }, { "epoch": 3.47, "learning_rate": 4.8265339882826974e-05, "loss": 2.4037, "step": 1199000 }, { "epoch": 3.47, "learning_rate": 4.8264616235179696e-05, "loss": 2.423, "step": 1199500 }, { "epoch": 3.47, "learning_rate": 4.826389403482772e-05, "loss": 2.4477, "step": 1200000 }, { "epoch": 3.47, "learning_rate": 4.826317038718044e-05, "loss": 2.4238, "step": 1200500 }, { "epoch": 3.48, "learning_rate": 4.826244673953316e-05, "loss": 2.4316, "step": 1201000 }, { "epoch": 3.48, "learning_rate": 4.8261723091885885e-05, "loss": 2.4349, "step": 1201500 }, { "epoch": 3.48, "learning_rate": 4.826099944423861e-05, "loss": 2.4383, "step": 1202000 }, { "epoch": 3.48, "learning_rate": 4.826027579659133e-05, "loss": 2.4125, "step": 1202500 }, { "epoch": 3.48, "learning_rate": 4.825955214894405e-05, "loss": 2.4545, "step": 1203000 }, { "epoch": 3.48, "learning_rate": 4.8258828501296774e-05, "loss": 2.4122, "step": 1203500 }, { "epoch": 3.49, "learning_rate": 4.8258104853649497e-05, "loss": 2.4183, "step": 1204000 }, { "epoch": 3.49, "learning_rate": 4.8257381206002226e-05, "loss": 2.4182, "step": 1204500 }, { "epoch": 3.49, "learning_rate": 4.825665900565025e-05, "loss": 2.4351, "step": 1205000 }, { "epoch": 3.49, "learning_rate": 4.825593535800297e-05, "loss": 2.4204, "step": 1205500 }, { "epoch": 3.49, "learning_rate": 4.825521171035569e-05, "loss": 2.416, "step": 1206000 }, { "epoch": 3.49, "learning_rate": 4.8254488062708415e-05, "loss": 2.431, "step": 1206500 }, { "epoch": 3.49, "learning_rate": 4.825376586235643e-05, "loss": 2.4293, "step": 1207000 }, { "epoch": 3.5, "learning_rate": 4.825304221470915e-05, "loss": 2.4246, "step": 1207500 }, { "epoch": 3.5, "learning_rate": 4.8252318567061875e-05, "loss": 2.4058, "step": 1208000 }, { "epoch": 3.5, "learning_rate": 4.82515949194146e-05, "loss": 2.4322, "step": 1208500 }, { "epoch": 3.5, "learning_rate": 4.8250871271767326e-05, "loss": 2.4452, "step": 1209000 }, { "epoch": 3.5, "learning_rate": 4.825014762412005e-05, "loss": 2.4331, "step": 1209500 }, { "epoch": 3.5, "learning_rate": 4.8249425423768064e-05, "loss": 2.435, "step": 1210000 }, { "epoch": 3.5, "learning_rate": 4.8248701776120786e-05, "loss": 2.4215, "step": 1210500 }, { "epoch": 3.51, "learning_rate": 4.824797812847351e-05, "loss": 2.424, "step": 1211000 }, { "epoch": 3.51, "learning_rate": 4.824725448082623e-05, "loss": 2.4337, "step": 1211500 }, { "epoch": 3.51, "learning_rate": 4.824653083317896e-05, "loss": 2.4117, "step": 1212000 }, { "epoch": 3.51, "learning_rate": 4.824580718553168e-05, "loss": 2.4298, "step": 1212500 }, { "epoch": 3.51, "learning_rate": 4.82450849851797e-05, "loss": 2.4275, "step": 1213000 }, { "epoch": 3.51, "learning_rate": 4.8244361337532426e-05, "loss": 2.4143, "step": 1213500 }, { "epoch": 3.51, "learning_rate": 4.824363768988515e-05, "loss": 2.4143, "step": 1214000 }, { "epoch": 3.52, "learning_rate": 4.824291404223787e-05, "loss": 2.4756, "step": 1214500 }, { "epoch": 3.52, "learning_rate": 4.8242191841885886e-05, "loss": 2.4193, "step": 1215000 }, { "epoch": 3.52, "learning_rate": 4.824146819423861e-05, "loss": 2.4218, "step": 1215500 }, { "epoch": 3.52, "learning_rate": 4.8240745993886624e-05, "loss": 2.4228, "step": 1216000 }, { "epoch": 3.52, "learning_rate": 4.824002234623935e-05, "loss": 2.4055, "step": 1216500 }, { "epoch": 3.52, "learning_rate": 4.8239298698592075e-05, "loss": 2.461, "step": 1217000 }, { "epoch": 3.52, "learning_rate": 4.82385750509448e-05, "loss": 2.424, "step": 1217500 }, { "epoch": 3.53, "learning_rate": 4.823785140329752e-05, "loss": 2.4423, "step": 1218000 }, { "epoch": 3.53, "learning_rate": 4.8237129202945535e-05, "loss": 2.421, "step": 1218500 }, { "epoch": 3.53, "learning_rate": 4.823640700259355e-05, "loss": 2.4237, "step": 1219000 }, { "epoch": 3.53, "learning_rate": 4.823568335494627e-05, "loss": 2.4117, "step": 1219500 }, { "epoch": 3.53, "learning_rate": 4.8234959707299e-05, "loss": 2.4164, "step": 1220000 }, { "epoch": 3.53, "learning_rate": 4.8234236059651724e-05, "loss": 2.4516, "step": 1220500 }, { "epoch": 3.53, "learning_rate": 4.823351241200445e-05, "loss": 2.4339, "step": 1221000 }, { "epoch": 3.54, "learning_rate": 4.8232788764357176e-05, "loss": 2.4274, "step": 1221500 }, { "epoch": 3.54, "learning_rate": 4.82320651167099e-05, "loss": 2.4402, "step": 1222000 }, { "epoch": 3.54, "learning_rate": 4.823134146906262e-05, "loss": 2.4327, "step": 1222500 }, { "epoch": 3.54, "learning_rate": 4.8230619268710636e-05, "loss": 2.4276, "step": 1223000 }, { "epoch": 3.54, "learning_rate": 4.822989562106336e-05, "loss": 2.4445, "step": 1223500 }, { "epoch": 3.54, "learning_rate": 4.822917197341608e-05, "loss": 2.404, "step": 1224000 }, { "epoch": 3.54, "learning_rate": 4.82284483257688e-05, "loss": 2.4296, "step": 1224500 }, { "epoch": 3.55, "learning_rate": 4.8227724678121525e-05, "loss": 2.4391, "step": 1225000 }, { "epoch": 3.55, "learning_rate": 4.8227001030474254e-05, "loss": 2.4312, "step": 1225500 }, { "epoch": 3.55, "learning_rate": 4.8226277382826976e-05, "loss": 2.4328, "step": 1226000 }, { "epoch": 3.55, "learning_rate": 4.82255537351797e-05, "loss": 2.4149, "step": 1226500 }, { "epoch": 3.55, "learning_rate": 4.822483008753243e-05, "loss": 2.4247, "step": 1227000 }, { "epoch": 3.55, "learning_rate": 4.822410788718044e-05, "loss": 2.4215, "step": 1227500 }, { "epoch": 3.55, "learning_rate": 4.8223384239533165e-05, "loss": 2.4079, "step": 1228000 }, { "epoch": 3.56, "learning_rate": 4.822266059188589e-05, "loss": 2.4283, "step": 1228500 }, { "epoch": 3.56, "learning_rate": 4.822193694423861e-05, "loss": 2.4042, "step": 1229000 }, { "epoch": 3.56, "learning_rate": 4.822121329659133e-05, "loss": 2.3948, "step": 1229500 }, { "epoch": 3.56, "learning_rate": 4.8220489648944054e-05, "loss": 2.4235, "step": 1230000 }, { "epoch": 3.56, "learning_rate": 4.8219766001296776e-05, "loss": 2.4266, "step": 1230500 }, { "epoch": 3.56, "learning_rate": 4.8219042353649505e-05, "loss": 2.4235, "step": 1231000 }, { "epoch": 3.56, "learning_rate": 4.821832015329752e-05, "loss": 2.3996, "step": 1231500 }, { "epoch": 3.57, "learning_rate": 4.8217597952945536e-05, "loss": 2.436, "step": 1232000 }, { "epoch": 3.57, "learning_rate": 4.821687575259355e-05, "loss": 2.4567, "step": 1232500 }, { "epoch": 3.57, "learning_rate": 4.8216152104946274e-05, "loss": 2.444, "step": 1233000 }, { "epoch": 3.57, "learning_rate": 4.8215428457299e-05, "loss": 2.433, "step": 1233500 }, { "epoch": 3.57, "learning_rate": 4.8214704809651725e-05, "loss": 2.4231, "step": 1234000 }, { "epoch": 3.57, "learning_rate": 4.821398116200445e-05, "loss": 2.4273, "step": 1234500 }, { "epoch": 3.57, "learning_rate": 4.8213257514357176e-05, "loss": 2.417, "step": 1235000 }, { "epoch": 3.58, "learning_rate": 4.82125338667099e-05, "loss": 2.4329, "step": 1235500 }, { "epoch": 3.58, "learning_rate": 4.821181021906262e-05, "loss": 2.3982, "step": 1236000 }, { "epoch": 3.58, "learning_rate": 4.8211088018710636e-05, "loss": 2.423, "step": 1236500 }, { "epoch": 3.58, "learning_rate": 4.821036437106336e-05, "loss": 2.4177, "step": 1237000 }, { "epoch": 3.58, "learning_rate": 4.820964072341608e-05, "loss": 2.4401, "step": 1237500 }, { "epoch": 3.58, "learning_rate": 4.82089170757688e-05, "loss": 2.4101, "step": 1238000 }, { "epoch": 3.58, "learning_rate": 4.8208193428121525e-05, "loss": 2.4292, "step": 1238500 }, { "epoch": 3.59, "learning_rate": 4.8207469780474254e-05, "loss": 2.4192, "step": 1239000 }, { "epoch": 3.59, "learning_rate": 4.8206746132826977e-05, "loss": 2.417, "step": 1239500 }, { "epoch": 3.59, "learning_rate": 4.82060224851797e-05, "loss": 2.445, "step": 1240000 }, { "epoch": 3.59, "learning_rate": 4.820530173212301e-05, "loss": 2.4244, "step": 1240500 }, { "epoch": 3.59, "learning_rate": 4.820457953177103e-05, "loss": 2.4335, "step": 1241000 }, { "epoch": 3.59, "learning_rate": 4.820385588412375e-05, "loss": 2.4446, "step": 1241500 }, { "epoch": 3.6, "learning_rate": 4.8203132236476474e-05, "loss": 2.4007, "step": 1242000 }, { "epoch": 3.6, "learning_rate": 4.8202408588829203e-05, "loss": 2.4339, "step": 1242500 }, { "epoch": 3.6, "learning_rate": 4.8201684941181926e-05, "loss": 2.4158, "step": 1243000 }, { "epoch": 3.6, "learning_rate": 4.820096129353465e-05, "loss": 2.4197, "step": 1243500 }, { "epoch": 3.6, "learning_rate": 4.8200239093182663e-05, "loss": 2.4168, "step": 1244000 }, { "epoch": 3.6, "learning_rate": 4.8199515445535386e-05, "loss": 2.4229, "step": 1244500 }, { "epoch": 3.6, "learning_rate": 4.819879179788811e-05, "loss": 2.4032, "step": 1245000 }, { "epoch": 3.61, "learning_rate": 4.819806815024083e-05, "loss": 2.4437, "step": 1245500 }, { "epoch": 3.61, "learning_rate": 4.819734594988885e-05, "loss": 2.4356, "step": 1246000 }, { "epoch": 3.61, "learning_rate": 4.8196622302241575e-05, "loss": 2.4037, "step": 1246500 }, { "epoch": 3.61, "learning_rate": 4.81958986545943e-05, "loss": 2.4229, "step": 1247000 }, { "epoch": 3.61, "learning_rate": 4.819517500694702e-05, "loss": 2.4333, "step": 1247500 }, { "epoch": 3.61, "learning_rate": 4.819445135929974e-05, "loss": 2.4445, "step": 1248000 }, { "epoch": 3.61, "learning_rate": 4.8193727711652464e-05, "loss": 2.4335, "step": 1248500 }, { "epoch": 3.62, "learning_rate": 4.819300551130048e-05, "loss": 2.4567, "step": 1249000 }, { "epoch": 3.62, "learning_rate": 4.81922818636532e-05, "loss": 2.4338, "step": 1249500 }, { "epoch": 3.62, "learning_rate": 4.819155821600593e-05, "loss": 2.4366, "step": 1250000 }, { "epoch": 3.62, "learning_rate": 4.819083456835865e-05, "loss": 2.4331, "step": 1250500 }, { "epoch": 3.62, "learning_rate": 4.8190112368006675e-05, "loss": 2.4288, "step": 1251000 }, { "epoch": 3.62, "learning_rate": 4.81893887203594e-05, "loss": 2.425, "step": 1251500 }, { "epoch": 3.62, "learning_rate": 4.818866507271212e-05, "loss": 2.4575, "step": 1252000 }, { "epoch": 3.63, "learning_rate": 4.818794142506484e-05, "loss": 2.4143, "step": 1252500 }, { "epoch": 3.63, "learning_rate": 4.8187217777417564e-05, "loss": 2.4364, "step": 1253000 }, { "epoch": 3.63, "learning_rate": 4.8186494129770286e-05, "loss": 2.4026, "step": 1253500 }, { "epoch": 3.63, "learning_rate": 4.818577048212301e-05, "loss": 2.4256, "step": 1254000 }, { "epoch": 3.63, "learning_rate": 4.818504683447573e-05, "loss": 2.4331, "step": 1254500 }, { "epoch": 3.63, "learning_rate": 4.818432318682845e-05, "loss": 2.4328, "step": 1255000 }, { "epoch": 3.63, "learning_rate": 4.818359953918118e-05, "loss": 2.4224, "step": 1255500 }, { "epoch": 3.64, "learning_rate": 4.81828773388292e-05, "loss": 2.4352, "step": 1256000 }, { "epoch": 3.64, "learning_rate": 4.818215369118192e-05, "loss": 2.4238, "step": 1256500 }, { "epoch": 3.64, "learning_rate": 4.8181431490829935e-05, "loss": 2.422, "step": 1257000 }, { "epoch": 3.64, "learning_rate": 4.8180707843182664e-05, "loss": 2.4191, "step": 1257500 }, { "epoch": 3.64, "learning_rate": 4.8179984195535387e-05, "loss": 2.4395, "step": 1258000 }, { "epoch": 3.64, "learning_rate": 4.817926054788811e-05, "loss": 2.4337, "step": 1258500 }, { "epoch": 3.64, "learning_rate": 4.817853690024083e-05, "loss": 2.4176, "step": 1259000 }, { "epoch": 3.65, "learning_rate": 4.817781469988885e-05, "loss": 2.4236, "step": 1259500 }, { "epoch": 3.65, "learning_rate": 4.8177091052241576e-05, "loss": 2.4154, "step": 1260000 }, { "epoch": 3.65, "learning_rate": 4.817636885188959e-05, "loss": 2.4269, "step": 1260500 }, { "epoch": 3.65, "learning_rate": 4.817564520424231e-05, "loss": 2.4117, "step": 1261000 }, { "epoch": 3.65, "learning_rate": 4.8174921556595036e-05, "loss": 2.4116, "step": 1261500 }, { "epoch": 3.65, "learning_rate": 4.817419790894776e-05, "loss": 2.4261, "step": 1262000 }, { "epoch": 3.65, "learning_rate": 4.817347426130048e-05, "loss": 2.4448, "step": 1262500 }, { "epoch": 3.66, "learning_rate": 4.817275061365321e-05, "loss": 2.447, "step": 1263000 }, { "epoch": 3.66, "learning_rate": 4.817202696600593e-05, "loss": 2.4519, "step": 1263500 }, { "epoch": 3.66, "learning_rate": 4.8171303318358654e-05, "loss": 2.4223, "step": 1264000 }, { "epoch": 3.66, "learning_rate": 4.8170579670711376e-05, "loss": 2.4382, "step": 1264500 }, { "epoch": 3.66, "learning_rate": 4.8169856023064105e-05, "loss": 2.4353, "step": 1265000 }, { "epoch": 3.66, "learning_rate": 4.816913237541683e-05, "loss": 2.4163, "step": 1265500 }, { "epoch": 3.66, "learning_rate": 4.816840872776955e-05, "loss": 2.433, "step": 1266000 }, { "epoch": 3.67, "learning_rate": 4.8167686527417565e-05, "loss": 2.4141, "step": 1266500 }, { "epoch": 3.67, "learning_rate": 4.816696287977029e-05, "loss": 2.4328, "step": 1267000 }, { "epoch": 3.67, "learning_rate": 4.816623923212301e-05, "loss": 2.4732, "step": 1267500 }, { "epoch": 3.67, "learning_rate": 4.816551558447573e-05, "loss": 2.4206, "step": 1268000 }, { "epoch": 3.67, "learning_rate": 4.816479193682846e-05, "loss": 2.4173, "step": 1268500 }, { "epoch": 3.67, "learning_rate": 4.816406828918118e-05, "loss": 2.4316, "step": 1269000 }, { "epoch": 3.67, "learning_rate": 4.81633460888292e-05, "loss": 2.4174, "step": 1269500 }, { "epoch": 3.68, "learning_rate": 4.816262244118192e-05, "loss": 2.434, "step": 1270000 }, { "epoch": 3.68, "learning_rate": 4.816189879353464e-05, "loss": 2.4408, "step": 1270500 }, { "epoch": 3.68, "learning_rate": 4.8161175145887365e-05, "loss": 2.4026, "step": 1271000 }, { "epoch": 3.68, "learning_rate": 4.816045149824009e-05, "loss": 2.4296, "step": 1271500 }, { "epoch": 3.68, "learning_rate": 4.815972929788811e-05, "loss": 2.3987, "step": 1272000 }, { "epoch": 3.68, "learning_rate": 4.815900565024083e-05, "loss": 2.4176, "step": 1272500 }, { "epoch": 3.68, "learning_rate": 4.815828200259356e-05, "loss": 2.423, "step": 1273000 }, { "epoch": 3.69, "learning_rate": 4.815755835494628e-05, "loss": 2.4257, "step": 1273500 }, { "epoch": 3.69, "learning_rate": 4.8156834707299005e-05, "loss": 2.4294, "step": 1274000 }, { "epoch": 3.69, "learning_rate": 4.815611105965173e-05, "loss": 2.429, "step": 1274500 }, { "epoch": 3.69, "learning_rate": 4.815538741200445e-05, "loss": 2.4067, "step": 1275000 }, { "epoch": 3.69, "learning_rate": 4.815466376435717e-05, "loss": 2.4233, "step": 1275500 }, { "epoch": 3.69, "learning_rate": 4.815394156400519e-05, "loss": 2.3878, "step": 1276000 }, { "epoch": 3.69, "learning_rate": 4.815321791635791e-05, "loss": 2.4257, "step": 1276500 }, { "epoch": 3.7, "learning_rate": 4.815249426871063e-05, "loss": 2.4187, "step": 1277000 }, { "epoch": 3.7, "learning_rate": 4.815177062106336e-05, "loss": 2.458, "step": 1277500 }, { "epoch": 3.7, "learning_rate": 4.8151048420711377e-05, "loss": 2.4246, "step": 1278000 }, { "epoch": 3.7, "learning_rate": 4.81503247730641e-05, "loss": 2.4596, "step": 1278500 }, { "epoch": 3.7, "learning_rate": 4.814960112541682e-05, "loss": 2.4157, "step": 1279000 }, { "epoch": 3.7, "learning_rate": 4.814887747776954e-05, "loss": 2.4042, "step": 1279500 }, { "epoch": 3.71, "learning_rate": 4.814815383012227e-05, "loss": 2.4086, "step": 1280000 }, { "epoch": 3.71, "learning_rate": 4.814743162977029e-05, "loss": 2.4344, "step": 1280500 }, { "epoch": 3.71, "learning_rate": 4.814670798212301e-05, "loss": 2.4476, "step": 1281000 }, { "epoch": 3.71, "learning_rate": 4.814598578177103e-05, "loss": 2.4386, "step": 1281500 }, { "epoch": 3.71, "learning_rate": 4.8145262134123755e-05, "loss": 2.4211, "step": 1282000 }, { "epoch": 3.71, "learning_rate": 4.814453848647648e-05, "loss": 2.4258, "step": 1282500 }, { "epoch": 3.71, "learning_rate": 4.81438148388292e-05, "loss": 2.415, "step": 1283000 }, { "epoch": 3.72, "learning_rate": 4.814309119118192e-05, "loss": 2.4173, "step": 1283500 }, { "epoch": 3.72, "learning_rate": 4.8142367543534644e-05, "loss": 2.4412, "step": 1284000 }, { "epoch": 3.72, "learning_rate": 4.814164534318266e-05, "loss": 2.4238, "step": 1284500 }, { "epoch": 3.72, "learning_rate": 4.814092169553539e-05, "loss": 2.4145, "step": 1285000 }, { "epoch": 3.72, "learning_rate": 4.814019804788811e-05, "loss": 2.4287, "step": 1285500 }, { "epoch": 3.72, "learning_rate": 4.813947440024083e-05, "loss": 2.4186, "step": 1286000 }, { "epoch": 3.72, "learning_rate": 4.8138750752593555e-05, "loss": 2.4485, "step": 1286500 }, { "epoch": 3.73, "learning_rate": 4.813802710494628e-05, "loss": 2.4074, "step": 1287000 }, { "epoch": 3.73, "learning_rate": 4.8137303457299006e-05, "loss": 2.4174, "step": 1287500 }, { "epoch": 3.73, "learning_rate": 4.813657980965173e-05, "loss": 2.43, "step": 1288000 }, { "epoch": 3.73, "learning_rate": 4.813585616200445e-05, "loss": 2.3858, "step": 1288500 }, { "epoch": 3.73, "learning_rate": 4.813513251435717e-05, "loss": 2.4399, "step": 1289000 }, { "epoch": 3.73, "learning_rate": 4.8134408866709895e-05, "loss": 2.4221, "step": 1289500 }, { "epoch": 3.73, "learning_rate": 4.813368666635791e-05, "loss": 2.4263, "step": 1290000 }, { "epoch": 3.74, "learning_rate": 4.813296301871064e-05, "loss": 2.4412, "step": 1290500 }, { "epoch": 3.74, "learning_rate": 4.813223937106336e-05, "loss": 2.4292, "step": 1291000 }, { "epoch": 3.74, "learning_rate": 4.8131515723416084e-05, "loss": 2.4214, "step": 1291500 }, { "epoch": 3.74, "learning_rate": 4.8130792075768806e-05, "loss": 2.4352, "step": 1292000 }, { "epoch": 3.74, "learning_rate": 4.813006842812153e-05, "loss": 2.4199, "step": 1292500 }, { "epoch": 3.74, "learning_rate": 4.812934478047425e-05, "loss": 2.4381, "step": 1293000 }, { "epoch": 3.74, "learning_rate": 4.812862113282697e-05, "loss": 2.4331, "step": 1293500 }, { "epoch": 3.75, "learning_rate": 4.8127897485179695e-05, "loss": 2.4357, "step": 1294000 }, { "epoch": 3.75, "learning_rate": 4.812717528482771e-05, "loss": 2.4215, "step": 1294500 }, { "epoch": 3.75, "learning_rate": 4.812645163718044e-05, "loss": 2.415, "step": 1295000 }, { "epoch": 3.75, "learning_rate": 4.812572798953316e-05, "loss": 2.4224, "step": 1295500 }, { "epoch": 3.75, "learning_rate": 4.8125004341885884e-05, "loss": 2.446, "step": 1296000 }, { "epoch": 3.75, "learning_rate": 4.8124282141533907e-05, "loss": 2.4502, "step": 1296500 }, { "epoch": 3.75, "learning_rate": 4.812355849388663e-05, "loss": 2.4429, "step": 1297000 }, { "epoch": 3.76, "learning_rate": 4.812283484623935e-05, "loss": 2.4396, "step": 1297500 }, { "epoch": 3.76, "learning_rate": 4.812211119859207e-05, "loss": 2.4352, "step": 1298000 }, { "epoch": 3.76, "learning_rate": 4.8121387550944795e-05, "loss": 2.4425, "step": 1298500 }, { "epoch": 3.76, "learning_rate": 4.812066390329752e-05, "loss": 2.4317, "step": 1299000 }, { "epoch": 3.76, "learning_rate": 4.811994025565024e-05, "loss": 2.44, "step": 1299500 }, { "epoch": 3.76, "learning_rate": 4.811921660800296e-05, "loss": 2.4341, "step": 1300000 }, { "epoch": 3.76, "learning_rate": 4.8118492960355684e-05, "loss": 2.4326, "step": 1300500 }, { "epoch": 3.77, "learning_rate": 4.8117769312708413e-05, "loss": 2.4014, "step": 1301000 }, { "epoch": 3.77, "learning_rate": 4.8117045665061136e-05, "loss": 2.4141, "step": 1301500 }, { "epoch": 3.77, "learning_rate": 4.811632346470916e-05, "loss": 2.4218, "step": 1302000 }, { "epoch": 3.77, "learning_rate": 4.8115601264357174e-05, "loss": 2.4317, "step": 1302500 }, { "epoch": 3.77, "learning_rate": 4.8114877616709896e-05, "loss": 2.4316, "step": 1303000 }, { "epoch": 3.77, "learning_rate": 4.811415396906262e-05, "loss": 2.4366, "step": 1303500 }, { "epoch": 3.77, "learning_rate": 4.811343176871064e-05, "loss": 2.4316, "step": 1304000 }, { "epoch": 3.78, "learning_rate": 4.811270812106336e-05, "loss": 2.4032, "step": 1304500 }, { "epoch": 3.78, "learning_rate": 4.8111984473416085e-05, "loss": 2.4203, "step": 1305000 }, { "epoch": 3.78, "learning_rate": 4.811126082576881e-05, "loss": 2.4244, "step": 1305500 }, { "epoch": 3.78, "learning_rate": 4.811053717812153e-05, "loss": 2.4367, "step": 1306000 }, { "epoch": 3.78, "learning_rate": 4.810981353047425e-05, "loss": 2.4347, "step": 1306500 }, { "epoch": 3.78, "learning_rate": 4.8109089882826974e-05, "loss": 2.4255, "step": 1307000 }, { "epoch": 3.78, "learning_rate": 4.810836768247499e-05, "loss": 2.4211, "step": 1307500 }, { "epoch": 3.79, "learning_rate": 4.810764403482771e-05, "loss": 2.428, "step": 1308000 }, { "epoch": 3.79, "learning_rate": 4.810692038718044e-05, "loss": 2.4268, "step": 1308500 }, { "epoch": 3.79, "learning_rate": 4.810619673953316e-05, "loss": 2.4191, "step": 1309000 }, { "epoch": 3.79, "learning_rate": 4.810547309188589e-05, "loss": 2.4293, "step": 1309500 }, { "epoch": 3.79, "learning_rate": 4.8104749444238614e-05, "loss": 2.4387, "step": 1310000 }, { "epoch": 3.79, "learning_rate": 4.8104025796591336e-05, "loss": 2.4398, "step": 1310500 }, { "epoch": 3.79, "learning_rate": 4.810330214894406e-05, "loss": 2.4295, "step": 1311000 }, { "epoch": 3.8, "learning_rate": 4.810257850129678e-05, "loss": 2.441, "step": 1311500 }, { "epoch": 3.8, "learning_rate": 4.81018548536495e-05, "loss": 2.4312, "step": 1312000 }, { "epoch": 3.8, "learning_rate": 4.8101131206002225e-05, "loss": 2.4329, "step": 1312500 }, { "epoch": 3.8, "learning_rate": 4.810040755835495e-05, "loss": 2.4419, "step": 1313000 }, { "epoch": 3.8, "learning_rate": 4.809968535800296e-05, "loss": 2.421, "step": 1313500 }, { "epoch": 3.8, "learning_rate": 4.8098963157650985e-05, "loss": 2.4071, "step": 1314000 }, { "epoch": 3.8, "learning_rate": 4.809823951000371e-05, "loss": 2.4252, "step": 1314500 }, { "epoch": 3.81, "learning_rate": 4.809751586235643e-05, "loss": 2.417, "step": 1315000 }, { "epoch": 3.81, "learning_rate": 4.809679221470915e-05, "loss": 2.4127, "step": 1315500 }, { "epoch": 3.81, "learning_rate": 4.8096068567061874e-05, "loss": 2.428, "step": 1316000 }, { "epoch": 3.81, "learning_rate": 4.8095344919414596e-05, "loss": 2.4273, "step": 1316500 }, { "epoch": 3.81, "learning_rate": 4.8094621271767325e-05, "loss": 2.4146, "step": 1317000 }, { "epoch": 3.81, "learning_rate": 4.809389762412005e-05, "loss": 2.4411, "step": 1317500 }, { "epoch": 3.82, "learning_rate": 4.809317542376806e-05, "loss": 2.4393, "step": 1318000 }, { "epoch": 3.82, "learning_rate": 4.8092453223416086e-05, "loss": 2.4286, "step": 1318500 }, { "epoch": 3.82, "learning_rate": 4.80917310230641e-05, "loss": 2.3934, "step": 1319000 }, { "epoch": 3.82, "learning_rate": 4.8091007375416823e-05, "loss": 2.4268, "step": 1319500 }, { "epoch": 3.82, "learning_rate": 4.8090283727769546e-05, "loss": 2.4266, "step": 1320000 }, { "epoch": 3.82, "learning_rate": 4.808956008012227e-05, "loss": 2.3891, "step": 1320500 }, { "epoch": 3.82, "learning_rate": 4.808883643247499e-05, "loss": 2.422, "step": 1321000 }, { "epoch": 3.83, "learning_rate": 4.808811278482772e-05, "loss": 2.4301, "step": 1321500 }, { "epoch": 3.83, "learning_rate": 4.8087390584475735e-05, "loss": 2.4249, "step": 1322000 }, { "epoch": 3.83, "learning_rate": 4.808666693682846e-05, "loss": 2.4192, "step": 1322500 }, { "epoch": 3.83, "learning_rate": 4.808594328918118e-05, "loss": 2.421, "step": 1323000 }, { "epoch": 3.83, "learning_rate": 4.80852196415339e-05, "loss": 2.4379, "step": 1323500 }, { "epoch": 3.83, "learning_rate": 4.8084495993886624e-05, "loss": 2.4508, "step": 1324000 }, { "epoch": 3.83, "learning_rate": 4.8083772346239346e-05, "loss": 2.4288, "step": 1324500 }, { "epoch": 3.84, "learning_rate": 4.8083048698592075e-05, "loss": 2.4091, "step": 1325000 }, { "epoch": 3.84, "learning_rate": 4.80823250509448e-05, "loss": 2.4314, "step": 1325500 }, { "epoch": 3.84, "learning_rate": 4.808160140329752e-05, "loss": 2.4258, "step": 1326000 }, { "epoch": 3.84, "learning_rate": 4.808087920294554e-05, "loss": 2.4267, "step": 1326500 }, { "epoch": 3.84, "learning_rate": 4.8080155555298264e-05, "loss": 2.4119, "step": 1327000 }, { "epoch": 3.84, "learning_rate": 4.8079431907650986e-05, "loss": 2.4082, "step": 1327500 }, { "epoch": 3.84, "learning_rate": 4.807870826000371e-05, "loss": 2.3922, "step": 1328000 }, { "epoch": 3.85, "learning_rate": 4.807798461235643e-05, "loss": 2.4193, "step": 1328500 }, { "epoch": 3.85, "learning_rate": 4.807726096470915e-05, "loss": 2.4151, "step": 1329000 }, { "epoch": 3.85, "learning_rate": 4.8076537317061875e-05, "loss": 2.396, "step": 1329500 }, { "epoch": 3.85, "learning_rate": 4.807581511670989e-05, "loss": 2.4071, "step": 1330000 }, { "epoch": 3.85, "learning_rate": 4.807509146906262e-05, "loss": 2.4629, "step": 1330500 }, { "epoch": 3.85, "learning_rate": 4.807436782141534e-05, "loss": 2.4507, "step": 1331000 }, { "epoch": 3.85, "learning_rate": 4.8073644173768064e-05, "loss": 2.4299, "step": 1331500 }, { "epoch": 3.86, "learning_rate": 4.807292052612079e-05, "loss": 2.4352, "step": 1332000 }, { "epoch": 3.86, "learning_rate": 4.8072196878473515e-05, "loss": 2.417, "step": 1332500 }, { "epoch": 3.86, "learning_rate": 4.807147323082624e-05, "loss": 2.3957, "step": 1333000 }, { "epoch": 3.86, "learning_rate": 4.807074958317896e-05, "loss": 2.4212, "step": 1333500 }, { "epoch": 3.86, "learning_rate": 4.807002593553168e-05, "loss": 2.411, "step": 1334000 }, { "epoch": 3.86, "learning_rate": 4.8069302287884404e-05, "loss": 2.4211, "step": 1334500 }, { "epoch": 3.86, "learning_rate": 4.8068578640237126e-05, "loss": 2.4157, "step": 1335000 }, { "epoch": 3.87, "learning_rate": 4.806785788718044e-05, "loss": 2.4459, "step": 1335500 }, { "epoch": 3.87, "learning_rate": 4.8067134239533164e-05, "loss": 2.4213, "step": 1336000 }, { "epoch": 3.87, "learning_rate": 4.8066410591885887e-05, "loss": 2.4198, "step": 1336500 }, { "epoch": 3.87, "learning_rate": 4.806568694423861e-05, "loss": 2.4189, "step": 1337000 }, { "epoch": 3.87, "learning_rate": 4.806496329659133e-05, "loss": 2.4328, "step": 1337500 }, { "epoch": 3.87, "learning_rate": 4.806423964894405e-05, "loss": 2.4289, "step": 1338000 }, { "epoch": 3.87, "learning_rate": 4.8063516001296776e-05, "loss": 2.414, "step": 1338500 }, { "epoch": 3.88, "learning_rate": 4.80627923536495e-05, "loss": 2.3961, "step": 1339000 }, { "epoch": 3.88, "learning_rate": 4.806206870600223e-05, "loss": 2.4342, "step": 1339500 }, { "epoch": 3.88, "learning_rate": 4.806134505835495e-05, "loss": 2.4338, "step": 1340000 }, { "epoch": 3.88, "learning_rate": 4.806062285800297e-05, "loss": 2.3912, "step": 1340500 }, { "epoch": 3.88, "learning_rate": 4.8059899210355694e-05, "loss": 2.4179, "step": 1341000 }, { "epoch": 3.88, "learning_rate": 4.8059175562708416e-05, "loss": 2.4317, "step": 1341500 }, { "epoch": 3.88, "learning_rate": 4.805845191506114e-05, "loss": 2.419, "step": 1342000 }, { "epoch": 3.89, "learning_rate": 4.805772826741386e-05, "loss": 2.4045, "step": 1342500 }, { "epoch": 3.89, "learning_rate": 4.8057006067061876e-05, "loss": 2.4215, "step": 1343000 }, { "epoch": 3.89, "learning_rate": 4.80562824194146e-05, "loss": 2.4569, "step": 1343500 }, { "epoch": 3.89, "learning_rate": 4.805555877176732e-05, "loss": 2.4238, "step": 1344000 }, { "epoch": 3.89, "learning_rate": 4.805483512412004e-05, "loss": 2.4439, "step": 1344500 }, { "epoch": 3.89, "learning_rate": 4.805411147647277e-05, "loss": 2.4197, "step": 1345000 }, { "epoch": 3.89, "learning_rate": 4.805338927612079e-05, "loss": 2.4259, "step": 1345500 }, { "epoch": 3.9, "learning_rate": 4.805266562847351e-05, "loss": 2.4058, "step": 1346000 }, { "epoch": 3.9, "learning_rate": 4.805194198082623e-05, "loss": 2.3936, "step": 1346500 }, { "epoch": 3.9, "learning_rate": 4.805121833317896e-05, "loss": 2.4384, "step": 1347000 }, { "epoch": 3.9, "learning_rate": 4.8050496132826976e-05, "loss": 2.4298, "step": 1347500 }, { "epoch": 3.9, "learning_rate": 4.80497724851797e-05, "loss": 2.4311, "step": 1348000 }, { "epoch": 3.9, "learning_rate": 4.804905028482772e-05, "loss": 2.4213, "step": 1348500 }, { "epoch": 3.9, "learning_rate": 4.804832663718044e-05, "loss": 2.4476, "step": 1349000 }, { "epoch": 3.91, "learning_rate": 4.8047602989533165e-05, "loss": 2.4244, "step": 1349500 }, { "epoch": 3.91, "learning_rate": 4.804687934188589e-05, "loss": 2.4237, "step": 1350000 }, { "epoch": 3.91, "learning_rate": 4.804615569423861e-05, "loss": 2.4254, "step": 1350500 }, { "epoch": 3.91, "learning_rate": 4.8045433493886625e-05, "loss": 2.4384, "step": 1351000 }, { "epoch": 3.91, "learning_rate": 4.804470984623935e-05, "loss": 2.4046, "step": 1351500 }, { "epoch": 3.91, "learning_rate": 4.804398619859207e-05, "loss": 2.4058, "step": 1352000 }, { "epoch": 3.91, "learning_rate": 4.80432625509448e-05, "loss": 2.4128, "step": 1352500 }, { "epoch": 3.92, "learning_rate": 4.804253890329752e-05, "loss": 2.4224, "step": 1353000 }, { "epoch": 3.92, "learning_rate": 4.8041816702945536e-05, "loss": 2.4236, "step": 1353500 }, { "epoch": 3.92, "learning_rate": 4.8041095949888845e-05, "loss": 2.4291, "step": 1354000 }, { "epoch": 3.92, "learning_rate": 4.804037230224157e-05, "loss": 2.4215, "step": 1354500 }, { "epoch": 3.92, "learning_rate": 4.8039648654594297e-05, "loss": 2.4117, "step": 1355000 }, { "epoch": 3.92, "learning_rate": 4.803892500694702e-05, "loss": 2.4358, "step": 1355500 }, { "epoch": 3.93, "learning_rate": 4.803820135929975e-05, "loss": 2.4003, "step": 1356000 }, { "epoch": 3.93, "learning_rate": 4.803747771165247e-05, "loss": 2.4192, "step": 1356500 }, { "epoch": 3.93, "learning_rate": 4.803675406400519e-05, "loss": 2.4209, "step": 1357000 }, { "epoch": 3.93, "learning_rate": 4.8036030416357915e-05, "loss": 2.425, "step": 1357500 }, { "epoch": 3.93, "learning_rate": 4.803530676871064e-05, "loss": 2.4545, "step": 1358000 }, { "epoch": 3.93, "learning_rate": 4.803458312106336e-05, "loss": 2.4534, "step": 1358500 }, { "epoch": 3.93, "learning_rate": 4.803385947341608e-05, "loss": 2.4285, "step": 1359000 }, { "epoch": 3.94, "learning_rate": 4.8033135825768803e-05, "loss": 2.4247, "step": 1359500 }, { "epoch": 3.94, "learning_rate": 4.8032412178121526e-05, "loss": 2.441, "step": 1360000 }, { "epoch": 3.94, "learning_rate": 4.803168853047425e-05, "loss": 2.4374, "step": 1360500 }, { "epoch": 3.94, "learning_rate": 4.803096488282697e-05, "loss": 2.43, "step": 1361000 }, { "epoch": 3.94, "learning_rate": 4.80302412351797e-05, "loss": 2.4067, "step": 1361500 }, { "epoch": 3.94, "learning_rate": 4.802951758753242e-05, "loss": 2.4285, "step": 1362000 }, { "epoch": 3.94, "learning_rate": 4.8028795387180444e-05, "loss": 2.3931, "step": 1362500 }, { "epoch": 3.95, "learning_rate": 4.8028071739533166e-05, "loss": 2.4061, "step": 1363000 }, { "epoch": 3.95, "learning_rate": 4.802734953918118e-05, "loss": 2.426, "step": 1363500 }, { "epoch": 3.95, "learning_rate": 4.8026625891533904e-05, "loss": 2.4208, "step": 1364000 }, { "epoch": 3.95, "learning_rate": 4.8025902243886626e-05, "loss": 2.4332, "step": 1364500 }, { "epoch": 3.95, "learning_rate": 4.802517859623935e-05, "loss": 2.4315, "step": 1365000 }, { "epoch": 3.95, "learning_rate": 4.802445639588737e-05, "loss": 2.411, "step": 1365500 }, { "epoch": 3.95, "learning_rate": 4.802373274824009e-05, "loss": 2.4098, "step": 1366000 }, { "epoch": 3.96, "learning_rate": 4.8023009100592815e-05, "loss": 2.4216, "step": 1366500 }, { "epoch": 3.96, "learning_rate": 4.802228545294554e-05, "loss": 2.4097, "step": 1367000 }, { "epoch": 3.96, "learning_rate": 4.802156180529826e-05, "loss": 2.396, "step": 1367500 }, { "epoch": 3.96, "learning_rate": 4.802083815765098e-05, "loss": 2.4259, "step": 1368000 }, { "epoch": 3.96, "learning_rate": 4.8020114510003704e-05, "loss": 2.3974, "step": 1368500 }, { "epoch": 3.96, "learning_rate": 4.8019390862356426e-05, "loss": 2.4331, "step": 1369000 }, { "epoch": 3.96, "learning_rate": 4.8018667214709155e-05, "loss": 2.4195, "step": 1369500 }, { "epoch": 3.97, "learning_rate": 4.801794356706188e-05, "loss": 2.4285, "step": 1370000 }, { "epoch": 3.97, "learning_rate": 4.80172199194146e-05, "loss": 2.4201, "step": 1370500 }, { "epoch": 3.97, "learning_rate": 4.801649771906262e-05, "loss": 2.4078, "step": 1371000 }, { "epoch": 3.97, "learning_rate": 4.8015774071415344e-05, "loss": 2.4173, "step": 1371500 }, { "epoch": 3.97, "learning_rate": 4.801505187106336e-05, "loss": 2.4188, "step": 1372000 }, { "epoch": 3.97, "learning_rate": 4.801432822341608e-05, "loss": 2.3722, "step": 1372500 }, { "epoch": 3.97, "learning_rate": 4.8013604575768804e-05, "loss": 2.4505, "step": 1373000 }, { "epoch": 3.98, "learning_rate": 4.8012880928121526e-05, "loss": 2.4144, "step": 1373500 }, { "epoch": 3.98, "learning_rate": 4.801215728047425e-05, "loss": 2.4428, "step": 1374000 }, { "epoch": 3.98, "learning_rate": 4.801143363282697e-05, "loss": 2.415, "step": 1374500 }, { "epoch": 3.98, "learning_rate": 4.80107099851797e-05, "loss": 2.4256, "step": 1375000 }, { "epoch": 3.98, "learning_rate": 4.800998633753242e-05, "loss": 2.424, "step": 1375500 }, { "epoch": 3.98, "learning_rate": 4.8009262689885144e-05, "loss": 2.4306, "step": 1376000 }, { "epoch": 3.98, "learning_rate": 4.800854048953316e-05, "loss": 2.4234, "step": 1376500 }, { "epoch": 3.99, "learning_rate": 4.8007818289181176e-05, "loss": 2.4003, "step": 1377000 }, { "epoch": 3.99, "learning_rate": 4.8007094641533905e-05, "loss": 2.4053, "step": 1377500 }, { "epoch": 3.99, "learning_rate": 4.800637099388663e-05, "loss": 2.4395, "step": 1378000 }, { "epoch": 3.99, "learning_rate": 4.800564734623935e-05, "loss": 2.4306, "step": 1378500 }, { "epoch": 3.99, "learning_rate": 4.800492369859208e-05, "loss": 2.4261, "step": 1379000 }, { "epoch": 3.99, "learning_rate": 4.80042000509448e-05, "loss": 2.3887, "step": 1379500 }, { "epoch": 3.99, "learning_rate": 4.800347640329752e-05, "loss": 2.4228, "step": 1380000 }, { "epoch": 4.0, "learning_rate": 4.8002752755650245e-05, "loss": 2.4308, "step": 1380500 }, { "epoch": 4.0, "learning_rate": 4.800202910800297e-05, "loss": 2.4063, "step": 1381000 }, { "epoch": 4.0, "learning_rate": 4.800130546035569e-05, "loss": 2.4291, "step": 1381500 }, { "epoch": 4.0, "eval_accuracy": 0.6380546923174228, "eval_accuracy_mlm": 0.5996378678679578, "eval_accuracy_nsp": 0.8439111245548666, "eval_loss": 2.3823225498199463, "eval_runtime": 330.6953, "eval_samples_per_second": 1319.602, "eval_steps_per_second": 54.984, "step": 1381888 }, { "epoch": 4.0, "learning_rate": 4.800058181270841e-05, "loss": 2.4164, "step": 1382000 }, { "epoch": 4.0, "learning_rate": 4.7999858165061134e-05, "loss": 2.3971, "step": 1382500 }, { "epoch": 4.0, "learning_rate": 4.799913741200445e-05, "loss": 2.4212, "step": 1383000 }, { "epoch": 4.0, "learning_rate": 4.799841376435717e-05, "loss": 2.3961, "step": 1383500 }, { "epoch": 4.01, "learning_rate": 4.799769156400519e-05, "loss": 2.4252, "step": 1384000 }, { "epoch": 4.01, "learning_rate": 4.799696791635791e-05, "loss": 2.4034, "step": 1384500 }, { "epoch": 4.01, "learning_rate": 4.799624426871064e-05, "loss": 2.4007, "step": 1385000 }, { "epoch": 4.01, "learning_rate": 4.799552062106336e-05, "loss": 2.4086, "step": 1385500 }, { "epoch": 4.01, "learning_rate": 4.799479697341608e-05, "loss": 2.3956, "step": 1386000 }, { "epoch": 4.01, "learning_rate": 4.7994073325768805e-05, "loss": 2.4184, "step": 1386500 }, { "epoch": 4.01, "learning_rate": 4.799334967812153e-05, "loss": 2.3853, "step": 1387000 }, { "epoch": 4.02, "learning_rate": 4.799262603047425e-05, "loss": 2.4257, "step": 1387500 }, { "epoch": 4.02, "learning_rate": 4.799190383012227e-05, "loss": 2.4009, "step": 1388000 }, { "epoch": 4.02, "learning_rate": 4.7991180182474994e-05, "loss": 2.4059, "step": 1388500 }, { "epoch": 4.02, "learning_rate": 4.7990456534827716e-05, "loss": 2.402, "step": 1389000 }, { "epoch": 4.02, "learning_rate": 4.798973433447573e-05, "loss": 2.3876, "step": 1389500 }, { "epoch": 4.02, "learning_rate": 4.7989010686828454e-05, "loss": 2.4002, "step": 1390000 }, { "epoch": 4.02, "learning_rate": 4.7988287039181176e-05, "loss": 2.3913, "step": 1390500 }, { "epoch": 4.03, "learning_rate": 4.79875633915339e-05, "loss": 2.3864, "step": 1391000 }, { "epoch": 4.03, "learning_rate": 4.798683974388663e-05, "loss": 2.4068, "step": 1391500 }, { "epoch": 4.03, "learning_rate": 4.798611609623935e-05, "loss": 2.3933, "step": 1392000 }, { "epoch": 4.03, "learning_rate": 4.798539389588737e-05, "loss": 2.4108, "step": 1392500 }, { "epoch": 4.03, "learning_rate": 4.7984670248240094e-05, "loss": 2.3859, "step": 1393000 }, { "epoch": 4.03, "learning_rate": 4.798394660059282e-05, "loss": 2.373, "step": 1393500 }, { "epoch": 4.04, "learning_rate": 4.798322295294554e-05, "loss": 2.3733, "step": 1394000 }, { "epoch": 4.04, "learning_rate": 4.798249930529826e-05, "loss": 2.3831, "step": 1394500 }, { "epoch": 4.04, "learning_rate": 4.798177565765098e-05, "loss": 2.3858, "step": 1395000 }, { "epoch": 4.04, "learning_rate": 4.7981052010003706e-05, "loss": 2.3947, "step": 1395500 }, { "epoch": 4.04, "learning_rate": 4.798032836235643e-05, "loss": 2.4082, "step": 1396000 }, { "epoch": 4.04, "learning_rate": 4.797960471470915e-05, "loss": 2.3939, "step": 1396500 }, { "epoch": 4.04, "learning_rate": 4.797888251435717e-05, "loss": 2.4033, "step": 1397000 }, { "epoch": 4.05, "learning_rate": 4.7978158866709895e-05, "loss": 2.4142, "step": 1397500 }, { "epoch": 4.05, "learning_rate": 4.797743521906262e-05, "loss": 2.3937, "step": 1398000 }, { "epoch": 4.05, "learning_rate": 4.797671301871063e-05, "loss": 2.4138, "step": 1398500 }, { "epoch": 4.05, "learning_rate": 4.7975989371063355e-05, "loss": 2.4142, "step": 1399000 }, { "epoch": 4.05, "learning_rate": 4.797526572341608e-05, "loss": 2.4306, "step": 1399500 }, { "epoch": 4.05, "learning_rate": 4.7974542075768806e-05, "loss": 2.4093, "step": 1400000 }, { "epoch": 4.05, "learning_rate": 4.797381842812153e-05, "loss": 2.3929, "step": 1400500 }, { "epoch": 4.06, "learning_rate": 4.797309622776955e-05, "loss": 2.4118, "step": 1401000 }, { "epoch": 4.06, "learning_rate": 4.797237258012227e-05, "loss": 2.3764, "step": 1401500 }, { "epoch": 4.06, "learning_rate": 4.7971648932474995e-05, "loss": 2.4231, "step": 1402000 }, { "epoch": 4.06, "learning_rate": 4.797092528482772e-05, "loss": 2.3849, "step": 1402500 }, { "epoch": 4.06, "learning_rate": 4.797020163718044e-05, "loss": 2.3885, "step": 1403000 }, { "epoch": 4.06, "learning_rate": 4.7969479436828455e-05, "loss": 2.3948, "step": 1403500 }, { "epoch": 4.06, "learning_rate": 4.796875578918118e-05, "loss": 2.4065, "step": 1404000 }, { "epoch": 4.07, "learning_rate": 4.7968032141533906e-05, "loss": 2.3936, "step": 1404500 }, { "epoch": 4.07, "learning_rate": 4.796730849388663e-05, "loss": 2.3983, "step": 1405000 }, { "epoch": 4.07, "learning_rate": 4.796658484623935e-05, "loss": 2.3947, "step": 1405500 }, { "epoch": 4.07, "learning_rate": 4.796586119859207e-05, "loss": 2.4273, "step": 1406000 }, { "epoch": 4.07, "learning_rate": 4.7965137550944795e-05, "loss": 2.3968, "step": 1406500 }, { "epoch": 4.07, "learning_rate": 4.796441535059281e-05, "loss": 2.4137, "step": 1407000 }, { "epoch": 4.07, "learning_rate": 4.796369170294554e-05, "loss": 2.4183, "step": 1407500 }, { "epoch": 4.08, "learning_rate": 4.796296805529826e-05, "loss": 2.3906, "step": 1408000 }, { "epoch": 4.08, "learning_rate": 4.7962244407650984e-05, "loss": 2.3973, "step": 1408500 }, { "epoch": 4.08, "learning_rate": 4.7961520760003706e-05, "loss": 2.4204, "step": 1409000 }, { "epoch": 4.08, "learning_rate": 4.796079711235643e-05, "loss": 2.392, "step": 1409500 }, { "epoch": 4.08, "learning_rate": 4.796007346470916e-05, "loss": 2.402, "step": 1410000 }, { "epoch": 4.08, "learning_rate": 4.795934981706188e-05, "loss": 2.4066, "step": 1410500 }, { "epoch": 4.08, "learning_rate": 4.79586261694146e-05, "loss": 2.3839, "step": 1411000 }, { "epoch": 4.09, "learning_rate": 4.7957902521767324e-05, "loss": 2.3936, "step": 1411500 }, { "epoch": 4.09, "learning_rate": 4.7957178874120047e-05, "loss": 2.3889, "step": 1412000 }, { "epoch": 4.09, "learning_rate": 4.795645667376806e-05, "loss": 2.4001, "step": 1412500 }, { "epoch": 4.09, "learning_rate": 4.7955733026120784e-05, "loss": 2.3953, "step": 1413000 }, { "epoch": 4.09, "learning_rate": 4.7955009378473507e-05, "loss": 2.3988, "step": 1413500 }, { "epoch": 4.09, "learning_rate": 4.795428717812153e-05, "loss": 2.3983, "step": 1414000 }, { "epoch": 4.09, "learning_rate": 4.795356353047426e-05, "loss": 2.4095, "step": 1414500 }, { "epoch": 4.1, "learning_rate": 4.795283988282698e-05, "loss": 2.3946, "step": 1415000 }, { "epoch": 4.1, "learning_rate": 4.79521162351797e-05, "loss": 2.4018, "step": 1415500 }, { "epoch": 4.1, "learning_rate": 4.7951392587532425e-05, "loss": 2.4063, "step": 1416000 }, { "epoch": 4.1, "learning_rate": 4.795067038718044e-05, "loss": 2.42, "step": 1416500 }, { "epoch": 4.1, "learning_rate": 4.794994673953316e-05, "loss": 2.4007, "step": 1417000 }, { "epoch": 4.1, "learning_rate": 4.7949223091885885e-05, "loss": 2.4054, "step": 1417500 }, { "epoch": 4.1, "learning_rate": 4.794849944423861e-05, "loss": 2.412, "step": 1418000 }, { "epoch": 4.11, "learning_rate": 4.794777579659133e-05, "loss": 2.4087, "step": 1418500 }, { "epoch": 4.11, "learning_rate": 4.794705214894406e-05, "loss": 2.4149, "step": 1419000 }, { "epoch": 4.11, "learning_rate": 4.7946329948592074e-05, "loss": 2.399, "step": 1419500 }, { "epoch": 4.11, "learning_rate": 4.7945606300944796e-05, "loss": 2.4041, "step": 1420000 }, { "epoch": 4.11, "learning_rate": 4.794488265329752e-05, "loss": 2.3879, "step": 1420500 }, { "epoch": 4.11, "learning_rate": 4.794415900565024e-05, "loss": 2.4068, "step": 1421000 }, { "epoch": 4.11, "learning_rate": 4.794343535800296e-05, "loss": 2.4068, "step": 1421500 }, { "epoch": 4.12, "learning_rate": 4.794271171035569e-05, "loss": 2.3957, "step": 1422000 }, { "epoch": 4.12, "learning_rate": 4.7941988062708414e-05, "loss": 2.3911, "step": 1422500 }, { "epoch": 4.12, "learning_rate": 4.7941264415061136e-05, "loss": 2.4026, "step": 1423000 }, { "epoch": 4.12, "learning_rate": 4.794054076741386e-05, "loss": 2.4124, "step": 1423500 }, { "epoch": 4.12, "learning_rate": 4.793981711976658e-05, "loss": 2.4108, "step": 1424000 }, { "epoch": 4.12, "learning_rate": 4.79390949194146e-05, "loss": 2.3982, "step": 1424500 }, { "epoch": 4.12, "learning_rate": 4.7938371271767325e-05, "loss": 2.399, "step": 1425000 }, { "epoch": 4.13, "learning_rate": 4.793764762412005e-05, "loss": 2.4116, "step": 1425500 }, { "epoch": 4.13, "learning_rate": 4.793692397647277e-05, "loss": 2.3898, "step": 1426000 }, { "epoch": 4.13, "learning_rate": 4.793620032882549e-05, "loss": 2.4199, "step": 1426500 }, { "epoch": 4.13, "learning_rate": 4.793547812847351e-05, "loss": 2.4235, "step": 1427000 }, { "epoch": 4.13, "learning_rate": 4.793475448082623e-05, "loss": 2.4074, "step": 1427500 }, { "epoch": 4.13, "learning_rate": 4.793403083317896e-05, "loss": 2.4129, "step": 1428000 }, { "epoch": 4.13, "learning_rate": 4.793330718553168e-05, "loss": 2.3994, "step": 1428500 }, { "epoch": 4.14, "learning_rate": 4.79325835378844e-05, "loss": 2.3804, "step": 1429000 }, { "epoch": 4.14, "learning_rate": 4.7931861337532425e-05, "loss": 2.4101, "step": 1429500 }, { "epoch": 4.14, "learning_rate": 4.793113768988515e-05, "loss": 2.4108, "step": 1430000 }, { "epoch": 4.14, "learning_rate": 4.793041404223787e-05, "loss": 2.3996, "step": 1430500 }, { "epoch": 4.14, "learning_rate": 4.792969039459059e-05, "loss": 2.3899, "step": 1431000 }, { "epoch": 4.14, "learning_rate": 4.7928966746943314e-05, "loss": 2.3757, "step": 1431500 }, { "epoch": 4.15, "learning_rate": 4.792824454659133e-05, "loss": 2.3744, "step": 1432000 }, { "epoch": 4.15, "learning_rate": 4.792752089894406e-05, "loss": 2.428, "step": 1432500 }, { "epoch": 4.15, "learning_rate": 4.792679725129678e-05, "loss": 2.4109, "step": 1433000 }, { "epoch": 4.15, "learning_rate": 4.79260736036495e-05, "loss": 2.4095, "step": 1433500 }, { "epoch": 4.15, "learning_rate": 4.792535140329752e-05, "loss": 2.3963, "step": 1434000 }, { "epoch": 4.15, "learning_rate": 4.792462775565024e-05, "loss": 2.3924, "step": 1434500 }, { "epoch": 4.15, "learning_rate": 4.792390410800296e-05, "loss": 2.4288, "step": 1435000 }, { "epoch": 4.16, "learning_rate": 4.7923180460355686e-05, "loss": 2.4117, "step": 1435500 }, { "epoch": 4.16, "learning_rate": 4.792245681270841e-05, "loss": 2.4063, "step": 1436000 }, { "epoch": 4.16, "learning_rate": 4.792173461235643e-05, "loss": 2.3963, "step": 1436500 }, { "epoch": 4.16, "learning_rate": 4.792101096470916e-05, "loss": 2.4123, "step": 1437000 }, { "epoch": 4.16, "learning_rate": 4.792028731706188e-05, "loss": 2.3983, "step": 1437500 }, { "epoch": 4.16, "learning_rate": 4.7919563669414604e-05, "loss": 2.4186, "step": 1438000 }, { "epoch": 4.16, "learning_rate": 4.7918840021767326e-05, "loss": 2.3993, "step": 1438500 }, { "epoch": 4.17, "learning_rate": 4.791811637412005e-05, "loss": 2.4046, "step": 1439000 }, { "epoch": 4.17, "learning_rate": 4.791739272647277e-05, "loss": 2.4206, "step": 1439500 }, { "epoch": 4.17, "learning_rate": 4.791666907882549e-05, "loss": 2.4095, "step": 1440000 }, { "epoch": 4.17, "learning_rate": 4.7915945431178215e-05, "loss": 2.4123, "step": 1440500 }, { "epoch": 4.17, "learning_rate": 4.791522467812153e-05, "loss": 2.3863, "step": 1441000 }, { "epoch": 4.17, "learning_rate": 4.791450103047425e-05, "loss": 2.4229, "step": 1441500 }, { "epoch": 4.17, "learning_rate": 4.7913777382826975e-05, "loss": 2.3915, "step": 1442000 }, { "epoch": 4.18, "learning_rate": 4.79130537351797e-05, "loss": 2.3945, "step": 1442500 }, { "epoch": 4.18, "learning_rate": 4.791233008753242e-05, "loss": 2.3893, "step": 1443000 }, { "epoch": 4.18, "learning_rate": 4.791160643988514e-05, "loss": 2.4127, "step": 1443500 }, { "epoch": 4.18, "learning_rate": 4.791088423953316e-05, "loss": 2.4116, "step": 1444000 }, { "epoch": 4.18, "learning_rate": 4.7910160591885886e-05, "loss": 2.3936, "step": 1444500 }, { "epoch": 4.18, "learning_rate": 4.790943694423861e-05, "loss": 2.4095, "step": 1445000 }, { "epoch": 4.18, "learning_rate": 4.790871329659134e-05, "loss": 2.4139, "step": 1445500 }, { "epoch": 4.19, "learning_rate": 4.790798964894406e-05, "loss": 2.4085, "step": 1446000 }, { "epoch": 4.19, "learning_rate": 4.790726600129678e-05, "loss": 2.373, "step": 1446500 }, { "epoch": 4.19, "learning_rate": 4.7906542353649504e-05, "loss": 2.4134, "step": 1447000 }, { "epoch": 4.19, "learning_rate": 4.7905818706002226e-05, "loss": 2.4054, "step": 1447500 }, { "epoch": 4.19, "learning_rate": 4.790509505835495e-05, "loss": 2.4086, "step": 1448000 }, { "epoch": 4.19, "learning_rate": 4.7904372858002964e-05, "loss": 2.4124, "step": 1448500 }, { "epoch": 4.19, "learning_rate": 4.7903649210355686e-05, "loss": 2.4193, "step": 1449000 }, { "epoch": 4.2, "learning_rate": 4.790292701000371e-05, "loss": 2.4019, "step": 1449500 }, { "epoch": 4.2, "learning_rate": 4.7902204809651724e-05, "loss": 2.4089, "step": 1450000 }, { "epoch": 4.2, "learning_rate": 4.7901481162004447e-05, "loss": 2.4057, "step": 1450500 }, { "epoch": 4.2, "learning_rate": 4.790075751435717e-05, "loss": 2.4023, "step": 1451000 }, { "epoch": 4.2, "learning_rate": 4.790003386670989e-05, "loss": 2.4455, "step": 1451500 }, { "epoch": 4.2, "learning_rate": 4.789931021906262e-05, "loss": 2.3947, "step": 1452000 }, { "epoch": 4.2, "learning_rate": 4.789858657141534e-05, "loss": 2.4088, "step": 1452500 }, { "epoch": 4.21, "learning_rate": 4.7897862923768064e-05, "loss": 2.4084, "step": 1453000 }, { "epoch": 4.21, "learning_rate": 4.789713927612079e-05, "loss": 2.4271, "step": 1453500 }, { "epoch": 4.21, "learning_rate": 4.789641707576881e-05, "loss": 2.4157, "step": 1454000 }, { "epoch": 4.21, "learning_rate": 4.789569342812153e-05, "loss": 2.413, "step": 1454500 }, { "epoch": 4.21, "learning_rate": 4.7894969780474253e-05, "loss": 2.408, "step": 1455000 }, { "epoch": 4.21, "learning_rate": 4.7894246132826976e-05, "loss": 2.4071, "step": 1455500 }, { "epoch": 4.21, "learning_rate": 4.78935224851797e-05, "loss": 2.3904, "step": 1456000 }, { "epoch": 4.22, "learning_rate": 4.7892800284827714e-05, "loss": 2.3726, "step": 1456500 }, { "epoch": 4.22, "learning_rate": 4.7892076637180436e-05, "loss": 2.4006, "step": 1457000 }, { "epoch": 4.22, "learning_rate": 4.7891352989533165e-05, "loss": 2.4204, "step": 1457500 }, { "epoch": 4.22, "learning_rate": 4.789062934188589e-05, "loss": 2.4124, "step": 1458000 }, { "epoch": 4.22, "learning_rate": 4.788990569423861e-05, "loss": 2.3924, "step": 1458500 }, { "epoch": 4.22, "learning_rate": 4.7889183493886625e-05, "loss": 2.3998, "step": 1459000 }, { "epoch": 4.22, "learning_rate": 4.788845984623935e-05, "loss": 2.4241, "step": 1459500 }, { "epoch": 4.23, "learning_rate": 4.7887736198592076e-05, "loss": 2.4058, "step": 1460000 }, { "epoch": 4.23, "learning_rate": 4.78870125509448e-05, "loss": 2.409, "step": 1460500 }, { "epoch": 4.23, "learning_rate": 4.788628890329752e-05, "loss": 2.3869, "step": 1461000 }, { "epoch": 4.23, "learning_rate": 4.788556525565024e-05, "loss": 2.3828, "step": 1461500 }, { "epoch": 4.23, "learning_rate": 4.7884841608002965e-05, "loss": 2.402, "step": 1462000 }, { "epoch": 4.23, "learning_rate": 4.788411796035569e-05, "loss": 2.4244, "step": 1462500 }, { "epoch": 4.23, "learning_rate": 4.788339431270841e-05, "loss": 2.3978, "step": 1463000 }, { "epoch": 4.24, "learning_rate": 4.788267066506114e-05, "loss": 2.3862, "step": 1463500 }, { "epoch": 4.24, "learning_rate": 4.7881948464709154e-05, "loss": 2.4021, "step": 1464000 }, { "epoch": 4.24, "learning_rate": 4.7881224817061876e-05, "loss": 2.4202, "step": 1464500 }, { "epoch": 4.24, "learning_rate": 4.78805011694146e-05, "loss": 2.4003, "step": 1465000 }, { "epoch": 4.24, "learning_rate": 4.787977752176732e-05, "loss": 2.3728, "step": 1465500 }, { "epoch": 4.24, "learning_rate": 4.787905387412004e-05, "loss": 2.3955, "step": 1466000 }, { "epoch": 4.24, "learning_rate": 4.7878331673768065e-05, "loss": 2.3951, "step": 1466500 }, { "epoch": 4.25, "learning_rate": 4.787760802612079e-05, "loss": 2.4074, "step": 1467000 }, { "epoch": 4.25, "learning_rate": 4.7876884378473517e-05, "loss": 2.4202, "step": 1467500 }, { "epoch": 4.25, "learning_rate": 4.787616217812153e-05, "loss": 2.4177, "step": 1468000 }, { "epoch": 4.25, "learning_rate": 4.7875438530474254e-05, "loss": 2.4062, "step": 1468500 }, { "epoch": 4.25, "learning_rate": 4.7874714882826977e-05, "loss": 2.4015, "step": 1469000 }, { "epoch": 4.25, "learning_rate": 4.78739912351797e-05, "loss": 2.3816, "step": 1469500 }, { "epoch": 4.26, "learning_rate": 4.787326758753242e-05, "loss": 2.3834, "step": 1470000 }, { "epoch": 4.26, "learning_rate": 4.787254393988514e-05, "loss": 2.4231, "step": 1470500 }, { "epoch": 4.26, "learning_rate": 4.7871821739533166e-05, "loss": 2.3844, "step": 1471000 }, { "epoch": 4.26, "learning_rate": 4.787109809188589e-05, "loss": 2.4058, "step": 1471500 }, { "epoch": 4.26, "learning_rate": 4.787037444423861e-05, "loss": 2.4149, "step": 1472000 }, { "epoch": 4.26, "learning_rate": 4.786965079659133e-05, "loss": 2.4195, "step": 1472500 }, { "epoch": 4.26, "learning_rate": 4.7868927148944054e-05, "loss": 2.4304, "step": 1473000 }, { "epoch": 4.27, "learning_rate": 4.786820350129678e-05, "loss": 2.3994, "step": 1473500 }, { "epoch": 4.27, "learning_rate": 4.7867482748240086e-05, "loss": 2.3795, "step": 1474000 }, { "epoch": 4.27, "learning_rate": 4.7866759100592815e-05, "loss": 2.3872, "step": 1474500 }, { "epoch": 4.27, "learning_rate": 4.786603545294554e-05, "loss": 2.4111, "step": 1475000 }, { "epoch": 4.27, "learning_rate": 4.786531325259356e-05, "loss": 2.431, "step": 1475500 }, { "epoch": 4.27, "learning_rate": 4.786458960494628e-05, "loss": 2.4072, "step": 1476000 }, { "epoch": 4.27, "learning_rate": 4.7863865957299004e-05, "loss": 2.3743, "step": 1476500 }, { "epoch": 4.28, "learning_rate": 4.7863142309651726e-05, "loss": 2.3986, "step": 1477000 }, { "epoch": 4.28, "learning_rate": 4.786241866200445e-05, "loss": 2.408, "step": 1477500 }, { "epoch": 4.28, "learning_rate": 4.786169501435717e-05, "loss": 2.408, "step": 1478000 }, { "epoch": 4.28, "learning_rate": 4.786097136670989e-05, "loss": 2.3984, "step": 1478500 }, { "epoch": 4.28, "learning_rate": 4.7860249166357915e-05, "loss": 2.398, "step": 1479000 }, { "epoch": 4.28, "learning_rate": 4.785952551871064e-05, "loss": 2.4052, "step": 1479500 }, { "epoch": 4.28, "learning_rate": 4.785880187106336e-05, "loss": 2.4227, "step": 1480000 }, { "epoch": 4.29, "learning_rate": 4.785807822341608e-05, "loss": 2.4046, "step": 1480500 }, { "epoch": 4.29, "learning_rate": 4.7857354575768804e-05, "loss": 2.3982, "step": 1481000 }, { "epoch": 4.29, "learning_rate": 4.7856630928121526e-05, "loss": 2.3867, "step": 1481500 }, { "epoch": 4.29, "learning_rate": 4.7855907280474255e-05, "loss": 2.4008, "step": 1482000 }, { "epoch": 4.29, "learning_rate": 4.785518363282698e-05, "loss": 2.4027, "step": 1482500 }, { "epoch": 4.29, "learning_rate": 4.78544599851797e-05, "loss": 2.4033, "step": 1483000 }, { "epoch": 4.29, "learning_rate": 4.785373633753242e-05, "loss": 2.3946, "step": 1483500 }, { "epoch": 4.3, "learning_rate": 4.7853012689885144e-05, "loss": 2.4017, "step": 1484000 }, { "epoch": 4.3, "learning_rate": 4.7852289042237866e-05, "loss": 2.4037, "step": 1484500 }, { "epoch": 4.3, "learning_rate": 4.785156684188589e-05, "loss": 2.4074, "step": 1485000 }, { "epoch": 4.3, "learning_rate": 4.7850844641533904e-05, "loss": 2.3921, "step": 1485500 }, { "epoch": 4.3, "learning_rate": 4.7850120993886626e-05, "loss": 2.3932, "step": 1486000 }, { "epoch": 4.3, "learning_rate": 4.784939734623935e-05, "loss": 2.4037, "step": 1486500 }, { "epoch": 4.3, "learning_rate": 4.784867369859207e-05, "loss": 2.4039, "step": 1487000 }, { "epoch": 4.31, "learning_rate": 4.784795005094479e-05, "loss": 2.4231, "step": 1487500 }, { "epoch": 4.31, "learning_rate": 4.7847226403297515e-05, "loss": 2.4135, "step": 1488000 }, { "epoch": 4.31, "learning_rate": 4.7846502755650244e-05, "loss": 2.4141, "step": 1488500 }, { "epoch": 4.31, "learning_rate": 4.7845779108002967e-05, "loss": 2.399, "step": 1489000 }, { "epoch": 4.31, "learning_rate": 4.7845055460355696e-05, "loss": 2.3927, "step": 1489500 }, { "epoch": 4.31, "learning_rate": 4.784433181270842e-05, "loss": 2.4216, "step": 1490000 }, { "epoch": 4.31, "learning_rate": 4.784360816506114e-05, "loss": 2.4004, "step": 1490500 }, { "epoch": 4.32, "learning_rate": 4.784288451741386e-05, "loss": 2.4099, "step": 1491000 }, { "epoch": 4.32, "learning_rate": 4.7842160869766584e-05, "loss": 2.4077, "step": 1491500 }, { "epoch": 4.32, "learning_rate": 4.784143722211931e-05, "loss": 2.4071, "step": 1492000 }, { "epoch": 4.32, "learning_rate": 4.784071357447203e-05, "loss": 2.4223, "step": 1492500 }, { "epoch": 4.32, "learning_rate": 4.783998992682475e-05, "loss": 2.3678, "step": 1493000 }, { "epoch": 4.32, "learning_rate": 4.783926772647277e-05, "loss": 2.4064, "step": 1493500 }, { "epoch": 4.32, "learning_rate": 4.7838544078825496e-05, "loss": 2.3764, "step": 1494000 }, { "epoch": 4.33, "learning_rate": 4.783782043117822e-05, "loss": 2.3944, "step": 1494500 }, { "epoch": 4.33, "learning_rate": 4.783709678353094e-05, "loss": 2.4337, "step": 1495000 }, { "epoch": 4.33, "learning_rate": 4.783637313588366e-05, "loss": 2.3923, "step": 1495500 }, { "epoch": 4.33, "learning_rate": 4.7835649488236385e-05, "loss": 2.4043, "step": 1496000 }, { "epoch": 4.33, "learning_rate": 4.78349272878844e-05, "loss": 2.3757, "step": 1496500 }, { "epoch": 4.33, "learning_rate": 4.783420364023713e-05, "loss": 2.3823, "step": 1497000 }, { "epoch": 4.33, "learning_rate": 4.783347999258985e-05, "loss": 2.4062, "step": 1497500 }, { "epoch": 4.34, "learning_rate": 4.7832756344942574e-05, "loss": 2.4109, "step": 1498000 }, { "epoch": 4.34, "learning_rate": 4.7832032697295296e-05, "loss": 2.4158, "step": 1498500 }, { "epoch": 4.34, "learning_rate": 4.783131049694332e-05, "loss": 2.4295, "step": 1499000 }, { "epoch": 4.34, "learning_rate": 4.783058684929604e-05, "loss": 2.3893, "step": 1499500 }, { "epoch": 4.34, "learning_rate": 4.782986320164876e-05, "loss": 2.3914, "step": 1500000 }, { "epoch": 4.34, "learning_rate": 4.7829139554001485e-05, "loss": 2.4094, "step": 1500500 }, { "epoch": 4.34, "learning_rate": 4.78284173536495e-05, "loss": 2.4106, "step": 1501000 }, { "epoch": 4.35, "learning_rate": 4.782769370600222e-05, "loss": 2.4123, "step": 1501500 }, { "epoch": 4.35, "learning_rate": 4.7826970058354945e-05, "loss": 2.3907, "step": 1502000 }, { "epoch": 4.35, "learning_rate": 4.782624641070767e-05, "loss": 2.3986, "step": 1502500 }, { "epoch": 4.35, "learning_rate": 4.782552421035569e-05, "loss": 2.4186, "step": 1503000 }, { "epoch": 4.35, "learning_rate": 4.782480056270841e-05, "loss": 2.3867, "step": 1503500 }, { "epoch": 4.35, "learning_rate": 4.7824076915061134e-05, "loss": 2.4112, "step": 1504000 }, { "epoch": 4.35, "learning_rate": 4.782335326741386e-05, "loss": 2.409, "step": 1504500 }, { "epoch": 4.36, "learning_rate": 4.7822629619766585e-05, "loss": 2.4011, "step": 1505000 }, { "epoch": 4.36, "learning_rate": 4.782190597211931e-05, "loss": 2.3981, "step": 1505500 }, { "epoch": 4.36, "learning_rate": 4.782118377176732e-05, "loss": 2.4429, "step": 1506000 }, { "epoch": 4.36, "learning_rate": 4.7820461571415345e-05, "loss": 2.3963, "step": 1506500 }, { "epoch": 4.36, "learning_rate": 4.781973792376807e-05, "loss": 2.4017, "step": 1507000 }, { "epoch": 4.36, "learning_rate": 4.781901427612079e-05, "loss": 2.3928, "step": 1507500 }, { "epoch": 4.37, "learning_rate": 4.781829062847351e-05, "loss": 2.4359, "step": 1508000 }, { "epoch": 4.37, "learning_rate": 4.7817566980826234e-05, "loss": 2.428, "step": 1508500 }, { "epoch": 4.37, "learning_rate": 4.7816843333178957e-05, "loss": 2.4102, "step": 1509000 }, { "epoch": 4.37, "learning_rate": 4.781612113282697e-05, "loss": 2.4251, "step": 1509500 }, { "epoch": 4.37, "learning_rate": 4.7815397485179694e-05, "loss": 2.4143, "step": 1510000 }, { "epoch": 4.37, "learning_rate": 4.7814673837532417e-05, "loss": 2.3965, "step": 1510500 }, { "epoch": 4.37, "learning_rate": 4.7813950189885146e-05, "loss": 2.4148, "step": 1511000 }, { "epoch": 4.38, "learning_rate": 4.781322654223787e-05, "loss": 2.4207, "step": 1511500 }, { "epoch": 4.38, "learning_rate": 4.78125028945906e-05, "loss": 2.4228, "step": 1512000 }, { "epoch": 4.38, "learning_rate": 4.781177924694332e-05, "loss": 2.4213, "step": 1512500 }, { "epoch": 4.38, "learning_rate": 4.781105559929604e-05, "loss": 2.4023, "step": 1513000 }, { "epoch": 4.38, "learning_rate": 4.7810331951648764e-05, "loss": 2.4147, "step": 1513500 }, { "epoch": 4.38, "learning_rate": 4.7809608304001486e-05, "loss": 2.3836, "step": 1514000 }, { "epoch": 4.38, "learning_rate": 4.78088861036495e-05, "loss": 2.3771, "step": 1514500 }, { "epoch": 4.39, "learning_rate": 4.7808162456002224e-05, "loss": 2.3647, "step": 1515000 }, { "epoch": 4.39, "learning_rate": 4.7807438808354946e-05, "loss": 2.407, "step": 1515500 }, { "epoch": 4.39, "learning_rate": 4.780671516070767e-05, "loss": 2.4139, "step": 1516000 }, { "epoch": 4.39, "learning_rate": 4.78059915130604e-05, "loss": 2.4004, "step": 1516500 }, { "epoch": 4.39, "learning_rate": 4.780526786541312e-05, "loss": 2.3937, "step": 1517000 }, { "epoch": 4.39, "learning_rate": 4.780454421776584e-05, "loss": 2.3948, "step": 1517500 }, { "epoch": 4.39, "learning_rate": 4.7803820570118564e-05, "loss": 2.393, "step": 1518000 }, { "epoch": 4.4, "learning_rate": 4.780309836976658e-05, "loss": 2.4086, "step": 1518500 }, { "epoch": 4.4, "learning_rate": 4.78023747221193e-05, "loss": 2.3971, "step": 1519000 }, { "epoch": 4.4, "learning_rate": 4.780165107447203e-05, "loss": 2.4093, "step": 1519500 }, { "epoch": 4.4, "learning_rate": 4.780092742682475e-05, "loss": 2.4049, "step": 1520000 }, { "epoch": 4.4, "learning_rate": 4.7800203779177475e-05, "loss": 2.4124, "step": 1520500 }, { "epoch": 4.4, "learning_rate": 4.77994815788255e-05, "loss": 2.4371, "step": 1521000 }, { "epoch": 4.4, "learning_rate": 4.779875937847351e-05, "loss": 2.4112, "step": 1521500 }, { "epoch": 4.41, "learning_rate": 4.7798035730826235e-05, "loss": 2.4119, "step": 1522000 }, { "epoch": 4.41, "learning_rate": 4.779731208317896e-05, "loss": 2.3831, "step": 1522500 }, { "epoch": 4.41, "learning_rate": 4.779658988282697e-05, "loss": 2.4081, "step": 1523000 }, { "epoch": 4.41, "learning_rate": 4.7795866235179695e-05, "loss": 2.4268, "step": 1523500 }, { "epoch": 4.41, "learning_rate": 4.7795142587532424e-05, "loss": 2.3927, "step": 1524000 }, { "epoch": 4.41, "learning_rate": 4.7794418939885146e-05, "loss": 2.3992, "step": 1524500 }, { "epoch": 4.41, "learning_rate": 4.779369529223787e-05, "loss": 2.4229, "step": 1525000 }, { "epoch": 4.42, "learning_rate": 4.779297164459059e-05, "loss": 2.4264, "step": 1525500 }, { "epoch": 4.42, "learning_rate": 4.779224799694331e-05, "loss": 2.4176, "step": 1526000 }, { "epoch": 4.42, "learning_rate": 4.7791524349296035e-05, "loss": 2.3986, "step": 1526500 }, { "epoch": 4.42, "learning_rate": 4.7790800701648764e-05, "loss": 2.3926, "step": 1527000 }, { "epoch": 4.42, "learning_rate": 4.7790077054001487e-05, "loss": 2.4146, "step": 1527500 }, { "epoch": 4.42, "learning_rate": 4.778935340635421e-05, "loss": 2.3776, "step": 1528000 }, { "epoch": 4.42, "learning_rate": 4.778862975870693e-05, "loss": 2.4156, "step": 1528500 }, { "epoch": 4.43, "learning_rate": 4.778790611105965e-05, "loss": 2.4318, "step": 1529000 }, { "epoch": 4.43, "learning_rate": 4.7787182463412376e-05, "loss": 2.399, "step": 1529500 }, { "epoch": 4.43, "learning_rate": 4.77864588157651e-05, "loss": 2.4095, "step": 1530000 }, { "epoch": 4.43, "learning_rate": 4.778573516811782e-05, "loss": 2.3916, "step": 1530500 }, { "epoch": 4.43, "learning_rate": 4.778501296776584e-05, "loss": 2.3924, "step": 1531000 }, { "epoch": 4.43, "learning_rate": 4.7784289320118565e-05, "loss": 2.409, "step": 1531500 }, { "epoch": 4.43, "learning_rate": 4.778356711976658e-05, "loss": 2.4005, "step": 1532000 }, { "epoch": 4.44, "learning_rate": 4.77828434721193e-05, "loss": 2.4065, "step": 1532500 }, { "epoch": 4.44, "learning_rate": 4.7782119824472025e-05, "loss": 2.3815, "step": 1533000 }, { "epoch": 4.44, "learning_rate": 4.778139617682475e-05, "loss": 2.3923, "step": 1533500 }, { "epoch": 4.44, "learning_rate": 4.778067397647277e-05, "loss": 2.4268, "step": 1534000 }, { "epoch": 4.44, "learning_rate": 4.77799503288255e-05, "loss": 2.4016, "step": 1534500 }, { "epoch": 4.44, "learning_rate": 4.777922668117822e-05, "loss": 2.4032, "step": 1535000 }, { "epoch": 4.44, "learning_rate": 4.777850303353094e-05, "loss": 2.4165, "step": 1535500 }, { "epoch": 4.45, "learning_rate": 4.777778083317896e-05, "loss": 2.4157, "step": 1536000 }, { "epoch": 4.45, "learning_rate": 4.777705718553168e-05, "loss": 2.414, "step": 1536500 }, { "epoch": 4.45, "learning_rate": 4.77763335378844e-05, "loss": 2.4169, "step": 1537000 }, { "epoch": 4.45, "learning_rate": 4.7775609890237125e-05, "loss": 2.3847, "step": 1537500 }, { "epoch": 4.45, "learning_rate": 4.777488768988515e-05, "loss": 2.4039, "step": 1538000 }, { "epoch": 4.45, "learning_rate": 4.777416548953316e-05, "loss": 2.4156, "step": 1538500 }, { "epoch": 4.45, "learning_rate": 4.7773441841885885e-05, "loss": 2.41, "step": 1539000 }, { "epoch": 4.46, "learning_rate": 4.777271819423861e-05, "loss": 2.4072, "step": 1539500 }, { "epoch": 4.46, "learning_rate": 4.777199454659133e-05, "loss": 2.4014, "step": 1540000 }, { "epoch": 4.46, "learning_rate": 4.777127089894405e-05, "loss": 2.368, "step": 1540500 }, { "epoch": 4.46, "learning_rate": 4.7770547251296774e-05, "loss": 2.3863, "step": 1541000 }, { "epoch": 4.46, "learning_rate": 4.7769825050944796e-05, "loss": 2.4064, "step": 1541500 }, { "epoch": 4.46, "learning_rate": 4.776910285059281e-05, "loss": 2.4054, "step": 1542000 }, { "epoch": 4.46, "learning_rate": 4.776837920294554e-05, "loss": 2.3758, "step": 1542500 }, { "epoch": 4.47, "learning_rate": 4.776765555529826e-05, "loss": 2.4019, "step": 1543000 }, { "epoch": 4.47, "learning_rate": 4.7766931907650985e-05, "loss": 2.4036, "step": 1543500 }, { "epoch": 4.47, "learning_rate": 4.776620826000371e-05, "loss": 2.3779, "step": 1544000 }, { "epoch": 4.47, "learning_rate": 4.776548461235643e-05, "loss": 2.4146, "step": 1544500 }, { "epoch": 4.47, "learning_rate": 4.776476096470915e-05, "loss": 2.4077, "step": 1545000 }, { "epoch": 4.47, "learning_rate": 4.7764037317061874e-05, "loss": 2.395, "step": 1545500 }, { "epoch": 4.48, "learning_rate": 4.77633136694146e-05, "loss": 2.4024, "step": 1546000 }, { "epoch": 4.48, "learning_rate": 4.7762590021767325e-05, "loss": 2.4123, "step": 1546500 }, { "epoch": 4.48, "learning_rate": 4.776186637412005e-05, "loss": 2.3997, "step": 1547000 }, { "epoch": 4.48, "learning_rate": 4.776114417376806e-05, "loss": 2.4081, "step": 1547500 }, { "epoch": 4.48, "learning_rate": 4.7760420526120785e-05, "loss": 2.4073, "step": 1548000 }, { "epoch": 4.48, "learning_rate": 4.775969687847351e-05, "loss": 2.4255, "step": 1548500 }, { "epoch": 4.48, "learning_rate": 4.775897323082623e-05, "loss": 2.4003, "step": 1549000 }, { "epoch": 4.49, "learning_rate": 4.775825103047425e-05, "loss": 2.4371, "step": 1549500 }, { "epoch": 4.49, "learning_rate": 4.7757527382826975e-05, "loss": 2.3938, "step": 1550000 }, { "epoch": 4.49, "learning_rate": 4.7756803735179704e-05, "loss": 2.4154, "step": 1550500 }, { "epoch": 4.49, "learning_rate": 4.7756080087532426e-05, "loss": 2.4271, "step": 1551000 }, { "epoch": 4.49, "learning_rate": 4.775535643988515e-05, "loss": 2.4115, "step": 1551500 }, { "epoch": 4.49, "learning_rate": 4.775463279223787e-05, "loss": 2.4289, "step": 1552000 }, { "epoch": 4.49, "learning_rate": 4.775390914459059e-05, "loss": 2.4203, "step": 1552500 }, { "epoch": 4.5, "learning_rate": 4.7753185496943315e-05, "loss": 2.3844, "step": 1553000 }, { "epoch": 4.5, "learning_rate": 4.775246184929604e-05, "loss": 2.41, "step": 1553500 }, { "epoch": 4.5, "learning_rate": 4.775173964894405e-05, "loss": 2.4078, "step": 1554000 }, { "epoch": 4.5, "learning_rate": 4.7751017448592075e-05, "loss": 2.3975, "step": 1554500 }, { "epoch": 4.5, "learning_rate": 4.77502938009448e-05, "loss": 2.4143, "step": 1555000 }, { "epoch": 4.5, "learning_rate": 4.774957015329752e-05, "loss": 2.4097, "step": 1555500 }, { "epoch": 4.5, "learning_rate": 4.774884650565024e-05, "loss": 2.4038, "step": 1556000 }, { "epoch": 4.51, "learning_rate": 4.7748122858002964e-05, "loss": 2.4089, "step": 1556500 }, { "epoch": 4.51, "learning_rate": 4.774740065765098e-05, "loss": 2.4178, "step": 1557000 }, { "epoch": 4.51, "learning_rate": 4.774667701000371e-05, "loss": 2.3995, "step": 1557500 }, { "epoch": 4.51, "learning_rate": 4.774595336235643e-05, "loss": 2.3735, "step": 1558000 }, { "epoch": 4.51, "learning_rate": 4.774522971470915e-05, "loss": 2.4168, "step": 1558500 }, { "epoch": 4.51, "learning_rate": 4.7744506067061875e-05, "loss": 2.4159, "step": 1559000 }, { "epoch": 4.51, "learning_rate": 4.7743782419414604e-05, "loss": 2.375, "step": 1559500 }, { "epoch": 4.52, "learning_rate": 4.774306021906262e-05, "loss": 2.377, "step": 1560000 }, { "epoch": 4.52, "learning_rate": 4.774233657141534e-05, "loss": 2.3908, "step": 1560500 }, { "epoch": 4.52, "learning_rate": 4.7741612923768064e-05, "loss": 2.407, "step": 1561000 }, { "epoch": 4.52, "learning_rate": 4.7740889276120786e-05, "loss": 2.4081, "step": 1561500 }, { "epoch": 4.52, "learning_rate": 4.774016562847351e-05, "loss": 2.4069, "step": 1562000 }, { "epoch": 4.52, "learning_rate": 4.773944198082623e-05, "loss": 2.3855, "step": 1562500 }, { "epoch": 4.52, "learning_rate": 4.773871978047425e-05, "loss": 2.4028, "step": 1563000 }, { "epoch": 4.53, "learning_rate": 4.7737996132826975e-05, "loss": 2.4095, "step": 1563500 }, { "epoch": 4.53, "learning_rate": 4.77372724851797e-05, "loss": 2.3938, "step": 1564000 }, { "epoch": 4.53, "learning_rate": 4.7736548837532427e-05, "loss": 2.4144, "step": 1564500 }, { "epoch": 4.53, "learning_rate": 4.773582518988515e-05, "loss": 2.4065, "step": 1565000 }, { "epoch": 4.53, "learning_rate": 4.773510154223787e-05, "loss": 2.42, "step": 1565500 }, { "epoch": 4.53, "learning_rate": 4.773437789459059e-05, "loss": 2.402, "step": 1566000 }, { "epoch": 4.53, "learning_rate": 4.7733654246943315e-05, "loss": 2.4092, "step": 1566500 }, { "epoch": 4.54, "learning_rate": 4.773293059929604e-05, "loss": 2.4149, "step": 1567000 }, { "epoch": 4.54, "learning_rate": 4.773220695164876e-05, "loss": 2.3853, "step": 1567500 }, { "epoch": 4.54, "learning_rate": 4.773148330400148e-05, "loss": 2.4162, "step": 1568000 }, { "epoch": 4.54, "learning_rate": 4.7730759656354204e-05, "loss": 2.3906, "step": 1568500 }, { "epoch": 4.54, "learning_rate": 4.773003890329752e-05, "loss": 2.4269, "step": 1569000 }, { "epoch": 4.54, "learning_rate": 4.772931525565024e-05, "loss": 2.4229, "step": 1569500 }, { "epoch": 4.54, "learning_rate": 4.7728591608002965e-05, "loss": 2.4178, "step": 1570000 }, { "epoch": 4.55, "learning_rate": 4.772786796035569e-05, "loss": 2.3728, "step": 1570500 }, { "epoch": 4.55, "learning_rate": 4.772714431270841e-05, "loss": 2.4172, "step": 1571000 }, { "epoch": 4.55, "learning_rate": 4.772642211235643e-05, "loss": 2.3942, "step": 1571500 }, { "epoch": 4.55, "learning_rate": 4.7725698464709154e-05, "loss": 2.4161, "step": 1572000 }, { "epoch": 4.55, "learning_rate": 4.772497481706188e-05, "loss": 2.4211, "step": 1572500 }, { "epoch": 4.55, "learning_rate": 4.7724251169414605e-05, "loss": 2.3863, "step": 1573000 }, { "epoch": 4.55, "learning_rate": 4.772352752176733e-05, "loss": 2.4052, "step": 1573500 }, { "epoch": 4.56, "learning_rate": 4.772280387412005e-05, "loss": 2.4065, "step": 1574000 }, { "epoch": 4.56, "learning_rate": 4.772208022647277e-05, "loss": 2.4304, "step": 1574500 }, { "epoch": 4.56, "learning_rate": 4.7721356578825494e-05, "loss": 2.3987, "step": 1575000 }, { "epoch": 4.56, "learning_rate": 4.77206372730641e-05, "loss": 2.4176, "step": 1575500 }, { "epoch": 4.56, "learning_rate": 4.7719913625416825e-05, "loss": 2.3861, "step": 1576000 }, { "epoch": 4.56, "learning_rate": 4.771918997776955e-05, "loss": 2.4126, "step": 1576500 }, { "epoch": 4.56, "learning_rate": 4.771846633012227e-05, "loss": 2.4089, "step": 1577000 }, { "epoch": 4.57, "learning_rate": 4.771774268247499e-05, "loss": 2.3926, "step": 1577500 }, { "epoch": 4.57, "learning_rate": 4.7717019034827714e-05, "loss": 2.4098, "step": 1578000 }, { "epoch": 4.57, "learning_rate": 4.771629683447573e-05, "loss": 2.3958, "step": 1578500 }, { "epoch": 4.57, "learning_rate": 4.771557318682845e-05, "loss": 2.4016, "step": 1579000 }, { "epoch": 4.57, "learning_rate": 4.771484953918118e-05, "loss": 2.3863, "step": 1579500 }, { "epoch": 4.57, "learning_rate": 4.77141258915339e-05, "loss": 2.4079, "step": 1580000 }, { "epoch": 4.57, "learning_rate": 4.771340224388663e-05, "loss": 2.4021, "step": 1580500 }, { "epoch": 4.58, "learning_rate": 4.7712678596239354e-05, "loss": 2.3927, "step": 1581000 }, { "epoch": 4.58, "learning_rate": 4.7711954948592076e-05, "loss": 2.3973, "step": 1581500 }, { "epoch": 4.58, "learning_rate": 4.77112313009448e-05, "loss": 2.3911, "step": 1582000 }, { "epoch": 4.58, "learning_rate": 4.771050765329752e-05, "loss": 2.3867, "step": 1582500 }, { "epoch": 4.58, "learning_rate": 4.770978400565024e-05, "loss": 2.4236, "step": 1583000 }, { "epoch": 4.58, "learning_rate": 4.770906325259356e-05, "loss": 2.3837, "step": 1583500 }, { "epoch": 4.59, "learning_rate": 4.770833960494628e-05, "loss": 2.423, "step": 1584000 }, { "epoch": 4.59, "learning_rate": 4.7707615957299e-05, "loss": 2.3865, "step": 1584500 }, { "epoch": 4.59, "learning_rate": 4.7706892309651725e-05, "loss": 2.4223, "step": 1585000 }, { "epoch": 4.59, "learning_rate": 4.770616866200445e-05, "loss": 2.4254, "step": 1585500 }, { "epoch": 4.59, "learning_rate": 4.770544501435717e-05, "loss": 2.4118, "step": 1586000 }, { "epoch": 4.59, "learning_rate": 4.770472136670989e-05, "loss": 2.3966, "step": 1586500 }, { "epoch": 4.59, "learning_rate": 4.770399771906262e-05, "loss": 2.3925, "step": 1587000 }, { "epoch": 4.6, "learning_rate": 4.770327551871064e-05, "loss": 2.4005, "step": 1587500 }, { "epoch": 4.6, "learning_rate": 4.770255187106336e-05, "loss": 2.4169, "step": 1588000 }, { "epoch": 4.6, "learning_rate": 4.770182822341608e-05, "loss": 2.4099, "step": 1588500 }, { "epoch": 4.6, "learning_rate": 4.770110457576881e-05, "loss": 2.4093, "step": 1589000 }, { "epoch": 4.6, "learning_rate": 4.770038092812153e-05, "loss": 2.4065, "step": 1589500 }, { "epoch": 4.6, "learning_rate": 4.7699657280474255e-05, "loss": 2.3872, "step": 1590000 }, { "epoch": 4.6, "learning_rate": 4.769893363282698e-05, "loss": 2.4044, "step": 1590500 }, { "epoch": 4.61, "learning_rate": 4.76982099851797e-05, "loss": 2.4164, "step": 1591000 }, { "epoch": 4.61, "learning_rate": 4.769748633753242e-05, "loss": 2.4102, "step": 1591500 }, { "epoch": 4.61, "learning_rate": 4.7696762689885144e-05, "loss": 2.3964, "step": 1592000 }, { "epoch": 4.61, "learning_rate": 4.7696039042237866e-05, "loss": 2.3916, "step": 1592500 }, { "epoch": 4.61, "learning_rate": 4.769531828918118e-05, "loss": 2.3872, "step": 1593000 }, { "epoch": 4.61, "learning_rate": 4.7694594641533904e-05, "loss": 2.3998, "step": 1593500 }, { "epoch": 4.61, "learning_rate": 4.7693870993886626e-05, "loss": 2.377, "step": 1594000 }, { "epoch": 4.62, "learning_rate": 4.769314734623935e-05, "loss": 2.4061, "step": 1594500 }, { "epoch": 4.62, "learning_rate": 4.769242369859208e-05, "loss": 2.4111, "step": 1595000 }, { "epoch": 4.62, "learning_rate": 4.76917000509448e-05, "loss": 2.4019, "step": 1595500 }, { "epoch": 4.62, "learning_rate": 4.769097640329752e-05, "loss": 2.4048, "step": 1596000 }, { "epoch": 4.62, "learning_rate": 4.7690252755650244e-05, "loss": 2.415, "step": 1596500 }, { "epoch": 4.62, "learning_rate": 4.7689529108002966e-05, "loss": 2.3938, "step": 1597000 }, { "epoch": 4.62, "learning_rate": 4.768880690765098e-05, "loss": 2.3854, "step": 1597500 }, { "epoch": 4.63, "learning_rate": 4.768808326000371e-05, "loss": 2.4076, "step": 1598000 }, { "epoch": 4.63, "learning_rate": 4.768735961235643e-05, "loss": 2.4007, "step": 1598500 }, { "epoch": 4.63, "learning_rate": 4.7686635964709155e-05, "loss": 2.4099, "step": 1599000 }, { "epoch": 4.63, "learning_rate": 4.768591231706188e-05, "loss": 2.4026, "step": 1599500 }, { "epoch": 4.63, "learning_rate": 4.768519011670989e-05, "loss": 2.4239, "step": 1600000 }, { "epoch": 4.63, "learning_rate": 4.7684466469062615e-05, "loss": 2.4118, "step": 1600500 }, { "epoch": 4.63, "learning_rate": 4.768374282141534e-05, "loss": 2.4259, "step": 1601000 }, { "epoch": 4.64, "learning_rate": 4.768301917376806e-05, "loss": 2.3864, "step": 1601500 }, { "epoch": 4.64, "learning_rate": 4.768229552612079e-05, "loss": 2.4108, "step": 1602000 }, { "epoch": 4.64, "learning_rate": 4.768157332576881e-05, "loss": 2.4319, "step": 1602500 }, { "epoch": 4.64, "learning_rate": 4.768084967812153e-05, "loss": 2.4033, "step": 1603000 }, { "epoch": 4.64, "learning_rate": 4.7680126030474255e-05, "loss": 2.4052, "step": 1603500 }, { "epoch": 4.64, "learning_rate": 4.767940238282698e-05, "loss": 2.4218, "step": 1604000 }, { "epoch": 4.64, "learning_rate": 4.767868018247499e-05, "loss": 2.4048, "step": 1604500 }, { "epoch": 4.65, "learning_rate": 4.767795798212301e-05, "loss": 2.3961, "step": 1605000 }, { "epoch": 4.65, "learning_rate": 4.767723433447573e-05, "loss": 2.4039, "step": 1605500 }, { "epoch": 4.65, "learning_rate": 4.767651068682846e-05, "loss": 2.4094, "step": 1606000 }, { "epoch": 4.65, "learning_rate": 4.767578703918118e-05, "loss": 2.4187, "step": 1606500 }, { "epoch": 4.65, "learning_rate": 4.7675063391533905e-05, "loss": 2.3923, "step": 1607000 }, { "epoch": 4.65, "learning_rate": 4.767433974388663e-05, "loss": 2.4008, "step": 1607500 }, { "epoch": 4.65, "learning_rate": 4.767361609623935e-05, "loss": 2.3778, "step": 1608000 }, { "epoch": 4.66, "learning_rate": 4.767289244859207e-05, "loss": 2.3893, "step": 1608500 }, { "epoch": 4.66, "learning_rate": 4.767217169553539e-05, "loss": 2.4255, "step": 1609000 }, { "epoch": 4.66, "learning_rate": 4.767144804788811e-05, "loss": 2.4109, "step": 1609500 }, { "epoch": 4.66, "learning_rate": 4.767072440024084e-05, "loss": 2.4215, "step": 1610000 }, { "epoch": 4.66, "learning_rate": 4.767000075259356e-05, "loss": 2.3852, "step": 1610500 }, { "epoch": 4.66, "learning_rate": 4.766927710494628e-05, "loss": 2.3951, "step": 1611000 }, { "epoch": 4.66, "learning_rate": 4.7668553457299005e-05, "loss": 2.4263, "step": 1611500 }, { "epoch": 4.67, "learning_rate": 4.766782980965173e-05, "loss": 2.3964, "step": 1612000 }, { "epoch": 4.67, "learning_rate": 4.766710760929974e-05, "loss": 2.4086, "step": 1612500 }, { "epoch": 4.67, "learning_rate": 4.7666383961652465e-05, "loss": 2.423, "step": 1613000 }, { "epoch": 4.67, "learning_rate": 4.766566031400519e-05, "loss": 2.3995, "step": 1613500 }, { "epoch": 4.67, "learning_rate": 4.766493666635791e-05, "loss": 2.4126, "step": 1614000 }, { "epoch": 4.67, "learning_rate": 4.766421301871064e-05, "loss": 2.3919, "step": 1614500 }, { "epoch": 4.67, "learning_rate": 4.7663490818358654e-05, "loss": 2.399, "step": 1615000 }, { "epoch": 4.68, "learning_rate": 4.7662767170711376e-05, "loss": 2.4192, "step": 1615500 }, { "epoch": 4.68, "learning_rate": 4.76620435230641e-05, "loss": 2.4094, "step": 1616000 }, { "epoch": 4.68, "learning_rate": 4.766131987541682e-05, "loss": 2.396, "step": 1616500 }, { "epoch": 4.68, "learning_rate": 4.766059622776954e-05, "loss": 2.4232, "step": 1617000 }, { "epoch": 4.68, "learning_rate": 4.765987258012227e-05, "loss": 2.3948, "step": 1617500 }, { "epoch": 4.68, "learning_rate": 4.7659148932474994e-05, "loss": 2.4086, "step": 1618000 }, { "epoch": 4.68, "learning_rate": 4.7658425284827716e-05, "loss": 2.4057, "step": 1618500 }, { "epoch": 4.69, "learning_rate": 4.765770308447574e-05, "loss": 2.4064, "step": 1619000 }, { "epoch": 4.69, "learning_rate": 4.765697943682846e-05, "loss": 2.4137, "step": 1619500 }, { "epoch": 4.69, "learning_rate": 4.765625578918118e-05, "loss": 2.3835, "step": 1620000 }, { "epoch": 4.69, "learning_rate": 4.7655532141533905e-05, "loss": 2.39, "step": 1620500 }, { "epoch": 4.69, "learning_rate": 4.765480849388663e-05, "loss": 2.3955, "step": 1621000 }, { "epoch": 4.69, "learning_rate": 4.765408484623935e-05, "loss": 2.3853, "step": 1621500 }, { "epoch": 4.7, "learning_rate": 4.765336119859207e-05, "loss": 2.4171, "step": 1622000 }, { "epoch": 4.7, "learning_rate": 4.7652637550944794e-05, "loss": 2.3885, "step": 1622500 }, { "epoch": 4.7, "learning_rate": 4.7651913903297516e-05, "loss": 2.3954, "step": 1623000 }, { "epoch": 4.7, "learning_rate": 4.765119315024083e-05, "loss": 2.4048, "step": 1623500 }, { "epoch": 4.7, "learning_rate": 4.7650469502593554e-05, "loss": 2.3949, "step": 1624000 }, { "epoch": 4.7, "learning_rate": 4.764974585494628e-05, "loss": 2.4042, "step": 1624500 }, { "epoch": 4.7, "learning_rate": 4.7649022207299006e-05, "loss": 2.4083, "step": 1625000 }, { "epoch": 4.71, "learning_rate": 4.764829855965173e-05, "loss": 2.3785, "step": 1625500 }, { "epoch": 4.71, "learning_rate": 4.764757491200445e-05, "loss": 2.3923, "step": 1626000 }, { "epoch": 4.71, "learning_rate": 4.764685126435717e-05, "loss": 2.3973, "step": 1626500 }, { "epoch": 4.71, "learning_rate": 4.7646127616709895e-05, "loss": 2.4261, "step": 1627000 }, { "epoch": 4.71, "learning_rate": 4.764540396906262e-05, "loss": 2.4099, "step": 1627500 }, { "epoch": 4.71, "learning_rate": 4.764468176871064e-05, "loss": 2.4242, "step": 1628000 }, { "epoch": 4.71, "learning_rate": 4.764395812106336e-05, "loss": 2.4028, "step": 1628500 }, { "epoch": 4.72, "learning_rate": 4.7643234473416084e-05, "loss": 2.3986, "step": 1629000 }, { "epoch": 4.72, "learning_rate": 4.7642510825768806e-05, "loss": 2.3936, "step": 1629500 }, { "epoch": 4.72, "learning_rate": 4.764178717812153e-05, "loss": 2.402, "step": 1630000 }, { "epoch": 4.72, "learning_rate": 4.764106353047425e-05, "loss": 2.4277, "step": 1630500 }, { "epoch": 4.72, "learning_rate": 4.7640342777417566e-05, "loss": 2.4316, "step": 1631000 }, { "epoch": 4.72, "learning_rate": 4.763961912977029e-05, "loss": 2.4045, "step": 1631500 }, { "epoch": 4.72, "learning_rate": 4.763889548212301e-05, "loss": 2.381, "step": 1632000 }, { "epoch": 4.73, "learning_rate": 4.763817183447574e-05, "loss": 2.4049, "step": 1632500 }, { "epoch": 4.73, "learning_rate": 4.763744818682846e-05, "loss": 2.4047, "step": 1633000 }, { "epoch": 4.73, "learning_rate": 4.7636724539181184e-05, "loss": 2.4086, "step": 1633500 }, { "epoch": 4.73, "learning_rate": 4.7636000891533906e-05, "loss": 2.4135, "step": 1634000 }, { "epoch": 4.73, "learning_rate": 4.763527724388663e-05, "loss": 2.3905, "step": 1634500 }, { "epoch": 4.73, "learning_rate": 4.763455359623935e-05, "loss": 2.4004, "step": 1635000 }, { "epoch": 4.73, "learning_rate": 4.7633831395887366e-05, "loss": 2.4013, "step": 1635500 }, { "epoch": 4.74, "learning_rate": 4.7633112090125975e-05, "loss": 2.39, "step": 1636000 }, { "epoch": 4.74, "learning_rate": 4.76323884424787e-05, "loss": 2.3971, "step": 1636500 }, { "epoch": 4.74, "learning_rate": 4.763166624212671e-05, "loss": 2.4329, "step": 1637000 }, { "epoch": 4.74, "learning_rate": 4.7630944041774735e-05, "loss": 2.3996, "step": 1637500 }, { "epoch": 4.74, "learning_rate": 4.763022039412746e-05, "loss": 2.4248, "step": 1638000 }, { "epoch": 4.74, "learning_rate": 4.762949674648018e-05, "loss": 2.4253, "step": 1638500 }, { "epoch": 4.74, "learning_rate": 4.76287730988329e-05, "loss": 2.3683, "step": 1639000 }, { "epoch": 4.75, "learning_rate": 4.7628049451185624e-05, "loss": 2.3863, "step": 1639500 }, { "epoch": 4.75, "learning_rate": 4.7627325803538346e-05, "loss": 2.3909, "step": 1640000 }, { "epoch": 4.75, "learning_rate": 4.7626602155891075e-05, "loss": 2.4019, "step": 1640500 }, { "epoch": 4.75, "learning_rate": 4.76258785082438e-05, "loss": 2.4288, "step": 1641000 }, { "epoch": 4.75, "learning_rate": 4.762515486059652e-05, "loss": 2.4021, "step": 1641500 }, { "epoch": 4.75, "learning_rate": 4.762443121294924e-05, "loss": 2.4128, "step": 1642000 }, { "epoch": 4.75, "learning_rate": 4.7623707565301964e-05, "loss": 2.3846, "step": 1642500 }, { "epoch": 4.76, "learning_rate": 4.7622983917654687e-05, "loss": 2.3924, "step": 1643000 }, { "epoch": 4.76, "learning_rate": 4.762226171730271e-05, "loss": 2.3907, "step": 1643500 }, { "epoch": 4.76, "learning_rate": 4.762153806965543e-05, "loss": 2.3863, "step": 1644000 }, { "epoch": 4.76, "learning_rate": 4.762081442200815e-05, "loss": 2.4026, "step": 1644500 }, { "epoch": 4.76, "learning_rate": 4.7620090774360876e-05, "loss": 2.4063, "step": 1645000 }, { "epoch": 4.76, "learning_rate": 4.76193671267136e-05, "loss": 2.3891, "step": 1645500 }, { "epoch": 4.76, "learning_rate": 4.761864347906632e-05, "loss": 2.4053, "step": 1646000 }, { "epoch": 4.77, "learning_rate": 4.761791983141904e-05, "loss": 2.3859, "step": 1646500 }, { "epoch": 4.77, "learning_rate": 4.7617196183771765e-05, "loss": 2.3951, "step": 1647000 }, { "epoch": 4.77, "learning_rate": 4.761647253612449e-05, "loss": 2.4227, "step": 1647500 }, { "epoch": 4.77, "learning_rate": 4.7615748888477216e-05, "loss": 2.4123, "step": 1648000 }, { "epoch": 4.77, "learning_rate": 4.761502524082994e-05, "loss": 2.3933, "step": 1648500 }, { "epoch": 4.77, "learning_rate": 4.761430159318267e-05, "loss": 2.4027, "step": 1649000 }, { "epoch": 4.77, "learning_rate": 4.761357794553539e-05, "loss": 2.407, "step": 1649500 }, { "epoch": 4.78, "learning_rate": 4.761285429788811e-05, "loss": 2.3933, "step": 1650000 }, { "epoch": 4.78, "learning_rate": 4.7612130650240834e-05, "loss": 2.385, "step": 1650500 }, { "epoch": 4.78, "learning_rate": 4.7611407002593556e-05, "loss": 2.3984, "step": 1651000 }, { "epoch": 4.78, "learning_rate": 4.761068335494628e-05, "loss": 2.4191, "step": 1651500 }, { "epoch": 4.78, "learning_rate": 4.7609959707299e-05, "loss": 2.4072, "step": 1652000 }, { "epoch": 4.78, "learning_rate": 4.760923605965172e-05, "loss": 2.3882, "step": 1652500 }, { "epoch": 4.78, "learning_rate": 4.7608512412004445e-05, "loss": 2.4072, "step": 1653000 }, { "epoch": 4.79, "learning_rate": 4.760778876435717e-05, "loss": 2.3693, "step": 1653500 }, { "epoch": 4.79, "learning_rate": 4.760706656400519e-05, "loss": 2.3927, "step": 1654000 }, { "epoch": 4.79, "learning_rate": 4.760634291635791e-05, "loss": 2.4096, "step": 1654500 }, { "epoch": 4.79, "learning_rate": 4.7605620716005934e-05, "loss": 2.3886, "step": 1655000 }, { "epoch": 4.79, "learning_rate": 4.7604897068358656e-05, "loss": 2.411, "step": 1655500 }, { "epoch": 4.79, "learning_rate": 4.760417342071138e-05, "loss": 2.4072, "step": 1656000 }, { "epoch": 4.79, "learning_rate": 4.76034497730641e-05, "loss": 2.3964, "step": 1656500 }, { "epoch": 4.8, "learning_rate": 4.760272612541682e-05, "loss": 2.3904, "step": 1657000 }, { "epoch": 4.8, "learning_rate": 4.7602002477769545e-05, "loss": 2.4005, "step": 1657500 }, { "epoch": 4.8, "learning_rate": 4.760127883012227e-05, "loss": 2.3994, "step": 1658000 }, { "epoch": 4.8, "learning_rate": 4.760055518247499e-05, "loss": 2.397, "step": 1658500 }, { "epoch": 4.8, "learning_rate": 4.759983153482772e-05, "loss": 2.3958, "step": 1659000 }, { "epoch": 4.8, "learning_rate": 4.759910788718044e-05, "loss": 2.4105, "step": 1659500 }, { "epoch": 4.81, "learning_rate": 4.7598385686828456e-05, "loss": 2.3894, "step": 1660000 }, { "epoch": 4.81, "learning_rate": 4.759766203918118e-05, "loss": 2.3845, "step": 1660500 }, { "epoch": 4.81, "learning_rate": 4.7596939838829194e-05, "loss": 2.429, "step": 1661000 }, { "epoch": 4.81, "learning_rate": 4.7596216191181916e-05, "loss": 2.3914, "step": 1661500 }, { "epoch": 4.81, "learning_rate": 4.7595492543534646e-05, "loss": 2.4105, "step": 1662000 }, { "epoch": 4.81, "learning_rate": 4.759476889588737e-05, "loss": 2.4035, "step": 1662500 }, { "epoch": 4.81, "learning_rate": 4.75940452482401e-05, "loss": 2.3958, "step": 1663000 }, { "epoch": 4.82, "learning_rate": 4.759332160059282e-05, "loss": 2.4198, "step": 1663500 }, { "epoch": 4.82, "learning_rate": 4.7592599400240835e-05, "loss": 2.426, "step": 1664000 }, { "epoch": 4.82, "learning_rate": 4.759187575259356e-05, "loss": 2.4335, "step": 1664500 }, { "epoch": 4.82, "learning_rate": 4.759115210494628e-05, "loss": 2.4085, "step": 1665000 }, { "epoch": 4.82, "learning_rate": 4.7590428457299e-05, "loss": 2.41, "step": 1665500 }, { "epoch": 4.82, "learning_rate": 4.7589704809651723e-05, "loss": 2.4087, "step": 1666000 }, { "epoch": 4.82, "learning_rate": 4.7588981162004446e-05, "loss": 2.4285, "step": 1666500 }, { "epoch": 4.83, "learning_rate": 4.758825751435717e-05, "loss": 2.3893, "step": 1667000 }, { "epoch": 4.83, "learning_rate": 4.758753386670989e-05, "loss": 2.3787, "step": 1667500 }, { "epoch": 4.83, "learning_rate": 4.758681021906262e-05, "loss": 2.3992, "step": 1668000 }, { "epoch": 4.83, "learning_rate": 4.7586088018710635e-05, "loss": 2.3929, "step": 1668500 }, { "epoch": 4.83, "learning_rate": 4.758536437106336e-05, "loss": 2.3992, "step": 1669000 }, { "epoch": 4.83, "learning_rate": 4.758464072341608e-05, "loss": 2.4003, "step": 1669500 }, { "epoch": 4.83, "learning_rate": 4.758391707576881e-05, "loss": 2.3888, "step": 1670000 }, { "epoch": 4.84, "learning_rate": 4.7583194875416824e-05, "loss": 2.3795, "step": 1670500 }, { "epoch": 4.84, "learning_rate": 4.7582471227769546e-05, "loss": 2.4026, "step": 1671000 }, { "epoch": 4.84, "learning_rate": 4.758174758012227e-05, "loss": 2.4039, "step": 1671500 }, { "epoch": 4.84, "learning_rate": 4.758102537977029e-05, "loss": 2.4054, "step": 1672000 }, { "epoch": 4.84, "learning_rate": 4.758030173212301e-05, "loss": 2.3865, "step": 1672500 }, { "epoch": 4.84, "learning_rate": 4.7579578084475735e-05, "loss": 2.3997, "step": 1673000 }, { "epoch": 4.84, "learning_rate": 4.757885443682846e-05, "loss": 2.4018, "step": 1673500 }, { "epoch": 4.85, "learning_rate": 4.757813223647647e-05, "loss": 2.3884, "step": 1674000 }, { "epoch": 4.85, "learning_rate": 4.7577408588829195e-05, "loss": 2.4059, "step": 1674500 }, { "epoch": 4.85, "learning_rate": 4.757668638847722e-05, "loss": 2.3905, "step": 1675000 }, { "epoch": 4.85, "learning_rate": 4.757596274082994e-05, "loss": 2.4172, "step": 1675500 }, { "epoch": 4.85, "learning_rate": 4.757523909318266e-05, "loss": 2.393, "step": 1676000 }, { "epoch": 4.85, "learning_rate": 4.7574515445535384e-05, "loss": 2.4145, "step": 1676500 }, { "epoch": 4.85, "learning_rate": 4.7573791797888106e-05, "loss": 2.3721, "step": 1677000 }, { "epoch": 4.86, "learning_rate": 4.7573068150240835e-05, "loss": 2.4038, "step": 1677500 }, { "epoch": 4.86, "learning_rate": 4.757234450259356e-05, "loss": 2.3879, "step": 1678000 }, { "epoch": 4.86, "learning_rate": 4.757162085494628e-05, "loss": 2.4002, "step": 1678500 }, { "epoch": 4.86, "learning_rate": 4.7570897207299e-05, "loss": 2.399, "step": 1679000 }, { "epoch": 4.86, "learning_rate": 4.7570173559651724e-05, "loss": 2.3991, "step": 1679500 }, { "epoch": 4.86, "learning_rate": 4.7569449912004446e-05, "loss": 2.3853, "step": 1680000 }, { "epoch": 4.86, "learning_rate": 4.756872626435717e-05, "loss": 2.4077, "step": 1680500 }, { "epoch": 4.87, "learning_rate": 4.75680026167099e-05, "loss": 2.3924, "step": 1681000 }, { "epoch": 4.87, "learning_rate": 4.756728041635791e-05, "loss": 2.3925, "step": 1681500 }, { "epoch": 4.87, "learning_rate": 4.7566556768710636e-05, "loss": 2.4028, "step": 1682000 }, { "epoch": 4.87, "learning_rate": 4.756583456835865e-05, "loss": 2.4064, "step": 1682500 }, { "epoch": 4.87, "learning_rate": 4.756511092071137e-05, "loss": 2.3987, "step": 1683000 }, { "epoch": 4.87, "learning_rate": 4.7564387273064096e-05, "loss": 2.4013, "step": 1683500 }, { "epoch": 4.87, "learning_rate": 4.756366507271212e-05, "loss": 2.3857, "step": 1684000 }, { "epoch": 4.88, "learning_rate": 4.756294142506484e-05, "loss": 2.4104, "step": 1684500 }, { "epoch": 4.88, "learning_rate": 4.756221777741757e-05, "loss": 2.3847, "step": 1685000 }, { "epoch": 4.88, "learning_rate": 4.756149412977029e-05, "loss": 2.3875, "step": 1685500 }, { "epoch": 4.88, "learning_rate": 4.7560770482123014e-05, "loss": 2.3956, "step": 1686000 }, { "epoch": 4.88, "learning_rate": 4.7560046834475736e-05, "loss": 2.3835, "step": 1686500 }, { "epoch": 4.88, "learning_rate": 4.755932318682846e-05, "loss": 2.4119, "step": 1687000 }, { "epoch": 4.88, "learning_rate": 4.755859953918118e-05, "loss": 2.4088, "step": 1687500 }, { "epoch": 4.89, "learning_rate": 4.7557877338829196e-05, "loss": 2.3904, "step": 1688000 }, { "epoch": 4.89, "learning_rate": 4.7557153691181925e-05, "loss": 2.3946, "step": 1688500 }, { "epoch": 4.89, "learning_rate": 4.755643004353465e-05, "loss": 2.395, "step": 1689000 }, { "epoch": 4.89, "learning_rate": 4.755570639588737e-05, "loss": 2.3964, "step": 1689500 }, { "epoch": 4.89, "learning_rate": 4.755498274824009e-05, "loss": 2.3735, "step": 1690000 }, { "epoch": 4.89, "learning_rate": 4.755426054788811e-05, "loss": 2.3745, "step": 1690500 }, { "epoch": 4.89, "learning_rate": 4.755353690024083e-05, "loss": 2.4106, "step": 1691000 }, { "epoch": 4.9, "learning_rate": 4.755281325259355e-05, "loss": 2.4186, "step": 1691500 }, { "epoch": 4.9, "learning_rate": 4.7552089604946274e-05, "loss": 2.3922, "step": 1692000 }, { "epoch": 4.9, "learning_rate": 4.7551367404594296e-05, "loss": 2.4133, "step": 1692500 }, { "epoch": 4.9, "learning_rate": 4.7550643756947025e-05, "loss": 2.3982, "step": 1693000 }, { "epoch": 4.9, "learning_rate": 4.754992010929975e-05, "loss": 2.3984, "step": 1693500 }, { "epoch": 4.9, "learning_rate": 4.754919646165247e-05, "loss": 2.414, "step": 1694000 }, { "epoch": 4.9, "learning_rate": 4.754847281400519e-05, "loss": 2.3721, "step": 1694500 }, { "epoch": 4.91, "learning_rate": 4.7547749166357914e-05, "loss": 2.4146, "step": 1695000 }, { "epoch": 4.91, "learning_rate": 4.7547025518710636e-05, "loss": 2.4154, "step": 1695500 }, { "epoch": 4.91, "learning_rate": 4.754630187106336e-05, "loss": 2.3934, "step": 1696000 }, { "epoch": 4.91, "learning_rate": 4.7545579670711374e-05, "loss": 2.3936, "step": 1696500 }, { "epoch": 4.91, "learning_rate": 4.7544856023064096e-05, "loss": 2.3903, "step": 1697000 }, { "epoch": 4.91, "learning_rate": 4.7544132375416825e-05, "loss": 2.3697, "step": 1697500 }, { "epoch": 4.92, "learning_rate": 4.754341017506484e-05, "loss": 2.4071, "step": 1698000 }, { "epoch": 4.92, "learning_rate": 4.754268652741756e-05, "loss": 2.413, "step": 1698500 }, { "epoch": 4.92, "learning_rate": 4.7541962879770285e-05, "loss": 2.4157, "step": 1699000 }, { "epoch": 4.92, "learning_rate": 4.754123923212301e-05, "loss": 2.3772, "step": 1699500 }, { "epoch": 4.92, "learning_rate": 4.754051558447574e-05, "loss": 2.407, "step": 1700000 }, { "epoch": 4.92, "learning_rate": 4.753979193682846e-05, "loss": 2.414, "step": 1700500 }, { "epoch": 4.92, "learning_rate": 4.753906828918118e-05, "loss": 2.3995, "step": 1701000 }, { "epoch": 4.93, "learning_rate": 4.75383446415339e-05, "loss": 2.4001, "step": 1701500 }, { "epoch": 4.93, "learning_rate": 4.7537620993886626e-05, "loss": 2.4058, "step": 1702000 }, { "epoch": 4.93, "learning_rate": 4.753689734623935e-05, "loss": 2.4071, "step": 1702500 }, { "epoch": 4.93, "learning_rate": 4.753617369859208e-05, "loss": 2.3934, "step": 1703000 }, { "epoch": 4.93, "learning_rate": 4.75354500509448e-05, "loss": 2.3817, "step": 1703500 }, { "epoch": 4.93, "learning_rate": 4.7534727850592815e-05, "loss": 2.3877, "step": 1704000 }, { "epoch": 4.93, "learning_rate": 4.753400420294554e-05, "loss": 2.4008, "step": 1704500 }, { "epoch": 4.94, "learning_rate": 4.753328055529826e-05, "loss": 2.4216, "step": 1705000 }, { "epoch": 4.94, "learning_rate": 4.753255690765098e-05, "loss": 2.3879, "step": 1705500 }, { "epoch": 4.94, "learning_rate": 4.7531833260003704e-05, "loss": 2.3824, "step": 1706000 }, { "epoch": 4.94, "learning_rate": 4.7531109612356426e-05, "loss": 2.4059, "step": 1706500 }, { "epoch": 4.94, "learning_rate": 4.7530385964709155e-05, "loss": 2.4005, "step": 1707000 }, { "epoch": 4.94, "learning_rate": 4.752966376435718e-05, "loss": 2.3979, "step": 1707500 }, { "epoch": 4.94, "learning_rate": 4.75289401167099e-05, "loss": 2.3794, "step": 1708000 }, { "epoch": 4.95, "learning_rate": 4.752821646906262e-05, "loss": 2.3907, "step": 1708500 }, { "epoch": 4.95, "learning_rate": 4.7527492821415344e-05, "loss": 2.4196, "step": 1709000 }, { "epoch": 4.95, "learning_rate": 4.7526769173768066e-05, "loss": 2.4044, "step": 1709500 }, { "epoch": 4.95, "learning_rate": 4.752604697341608e-05, "loss": 2.3837, "step": 1710000 }, { "epoch": 4.95, "learning_rate": 4.75253247730641e-05, "loss": 2.3791, "step": 1710500 }, { "epoch": 4.95, "learning_rate": 4.7524601125416826e-05, "loss": 2.3985, "step": 1711000 }, { "epoch": 4.95, "learning_rate": 4.752387747776955e-05, "loss": 2.3697, "step": 1711500 }, { "epoch": 4.96, "learning_rate": 4.752315383012227e-05, "loss": 2.3993, "step": 1712000 }, { "epoch": 4.96, "learning_rate": 4.752243018247499e-05, "loss": 2.398, "step": 1712500 }, { "epoch": 4.96, "learning_rate": 4.7521706534827715e-05, "loss": 2.4138, "step": 1713000 }, { "epoch": 4.96, "learning_rate": 4.752098288718044e-05, "loss": 2.4165, "step": 1713500 }, { "epoch": 4.96, "learning_rate": 4.752025923953316e-05, "loss": 2.386, "step": 1714000 }, { "epoch": 4.96, "learning_rate": 4.751953559188589e-05, "loss": 2.394, "step": 1714500 }, { "epoch": 4.96, "learning_rate": 4.751881194423861e-05, "loss": 2.3911, "step": 1715000 }, { "epoch": 4.97, "learning_rate": 4.7518089743886626e-05, "loss": 2.3885, "step": 1715500 }, { "epoch": 4.97, "learning_rate": 4.751736609623935e-05, "loss": 2.4196, "step": 1716000 }, { "epoch": 4.97, "learning_rate": 4.751664244859208e-05, "loss": 2.3817, "step": 1716500 }, { "epoch": 4.97, "learning_rate": 4.75159188009448e-05, "loss": 2.3664, "step": 1717000 }, { "epoch": 4.97, "learning_rate": 4.751519515329752e-05, "loss": 2.3953, "step": 1717500 }, { "epoch": 4.97, "learning_rate": 4.751447295294554e-05, "loss": 2.3918, "step": 1718000 }, { "epoch": 4.97, "learning_rate": 4.751374930529826e-05, "loss": 2.3905, "step": 1718500 }, { "epoch": 4.98, "learning_rate": 4.751302565765098e-05, "loss": 2.3795, "step": 1719000 }, { "epoch": 4.98, "learning_rate": 4.7512303457299004e-05, "loss": 2.3891, "step": 1719500 }, { "epoch": 4.98, "learning_rate": 4.751157980965173e-05, "loss": 2.4072, "step": 1720000 }, { "epoch": 4.98, "learning_rate": 4.751085616200445e-05, "loss": 2.3928, "step": 1720500 }, { "epoch": 4.98, "learning_rate": 4.751013251435717e-05, "loss": 2.4127, "step": 1721000 }, { "epoch": 4.98, "learning_rate": 4.750940886670989e-05, "loss": 2.3841, "step": 1721500 }, { "epoch": 4.98, "learning_rate": 4.750868521906262e-05, "loss": 2.3948, "step": 1722000 }, { "epoch": 4.99, "learning_rate": 4.7507961571415345e-05, "loss": 2.4033, "step": 1722500 }, { "epoch": 4.99, "learning_rate": 4.750723792376807e-05, "loss": 2.3875, "step": 1723000 }, { "epoch": 4.99, "learning_rate": 4.750651427612079e-05, "loss": 2.4035, "step": 1723500 }, { "epoch": 4.99, "learning_rate": 4.750579062847351e-05, "loss": 2.3926, "step": 1724000 }, { "epoch": 4.99, "learning_rate": 4.750506842812153e-05, "loss": 2.4168, "step": 1724500 }, { "epoch": 4.99, "learning_rate": 4.7504344780474256e-05, "loss": 2.3791, "step": 1725000 }, { "epoch": 4.99, "learning_rate": 4.750362113282698e-05, "loss": 2.4007, "step": 1725500 }, { "epoch": 5.0, "learning_rate": 4.7502898932474994e-05, "loss": 2.4131, "step": 1726000 }, { "epoch": 5.0, "learning_rate": 4.7502175284827716e-05, "loss": 2.4227, "step": 1726500 }, { "epoch": 5.0, "learning_rate": 4.750145163718044e-05, "loss": 2.3993, "step": 1727000 }, { "epoch": 5.0, "eval_accuracy": 0.6404864024640362, "eval_accuracy_mlm": 0.6024742252199724, "eval_accuracy_nsp": 0.8445596329854762, "eval_loss": 2.3650872707366943, "eval_runtime": 330.6241, "eval_samples_per_second": 1319.885, "eval_steps_per_second": 54.996, "step": 1727360 }, { "epoch": 5.0, "learning_rate": 4.7500729436828454e-05, "loss": 2.4071, "step": 1727500 }, { "epoch": 5.0, "learning_rate": 4.7500005789181176e-05, "loss": 2.367, "step": 1728000 }, { "epoch": 5.0, "learning_rate": 4.7499282141533905e-05, "loss": 2.3755, "step": 1728500 }, { "epoch": 5.0, "learning_rate": 4.749855849388663e-05, "loss": 2.3854, "step": 1729000 }, { "epoch": 5.01, "learning_rate": 4.749783484623935e-05, "loss": 2.3872, "step": 1729500 }, { "epoch": 5.01, "learning_rate": 4.749711119859208e-05, "loss": 2.3991, "step": 1730000 }, { "epoch": 5.01, "learning_rate": 4.74963875509448e-05, "loss": 2.3943, "step": 1730500 }, { "epoch": 5.01, "learning_rate": 4.749566390329752e-05, "loss": 2.3519, "step": 1731000 }, { "epoch": 5.01, "learning_rate": 4.7494940255650245e-05, "loss": 2.383, "step": 1731500 }, { "epoch": 5.01, "learning_rate": 4.749421660800297e-05, "loss": 2.3782, "step": 1732000 }, { "epoch": 5.01, "learning_rate": 4.749349296035569e-05, "loss": 2.3797, "step": 1732500 }, { "epoch": 5.02, "learning_rate": 4.749276931270841e-05, "loss": 2.3814, "step": 1733000 }, { "epoch": 5.02, "learning_rate": 4.7492045665061134e-05, "loss": 2.3829, "step": 1733500 }, { "epoch": 5.02, "learning_rate": 4.7491323464709156e-05, "loss": 2.3858, "step": 1734000 }, { "epoch": 5.02, "learning_rate": 4.749059981706188e-05, "loss": 2.3658, "step": 1734500 }, { "epoch": 5.02, "learning_rate": 4.7489877616709894e-05, "loss": 2.378, "step": 1735000 }, { "epoch": 5.02, "learning_rate": 4.748915541635791e-05, "loss": 2.3784, "step": 1735500 }, { "epoch": 5.03, "learning_rate": 4.748843176871063e-05, "loss": 2.3898, "step": 1736000 }, { "epoch": 5.03, "learning_rate": 4.7487708121063354e-05, "loss": 2.3917, "step": 1736500 }, { "epoch": 5.03, "learning_rate": 4.7486984473416076e-05, "loss": 2.3947, "step": 1737000 }, { "epoch": 5.03, "learning_rate": 4.7486260825768805e-05, "loss": 2.3529, "step": 1737500 }, { "epoch": 5.03, "learning_rate": 4.748553717812153e-05, "loss": 2.3713, "step": 1738000 }, { "epoch": 5.03, "learning_rate": 4.748481497776955e-05, "loss": 2.3602, "step": 1738500 }, { "epoch": 5.03, "learning_rate": 4.748409133012227e-05, "loss": 2.346, "step": 1739000 }, { "epoch": 5.04, "learning_rate": 4.7483367682474994e-05, "loss": 2.3819, "step": 1739500 }, { "epoch": 5.04, "learning_rate": 4.748264403482772e-05, "loss": 2.3741, "step": 1740000 }, { "epoch": 5.04, "learning_rate": 4.748192038718044e-05, "loss": 2.3783, "step": 1740500 }, { "epoch": 5.04, "learning_rate": 4.748119673953316e-05, "loss": 2.3638, "step": 1741000 }, { "epoch": 5.04, "learning_rate": 4.748047309188588e-05, "loss": 2.3874, "step": 1741500 }, { "epoch": 5.04, "learning_rate": 4.7479749444238606e-05, "loss": 2.3663, "step": 1742000 }, { "epoch": 5.04, "learning_rate": 4.747902724388663e-05, "loss": 2.3445, "step": 1742500 }, { "epoch": 5.05, "learning_rate": 4.747830359623935e-05, "loss": 2.3853, "step": 1743000 }, { "epoch": 5.05, "learning_rate": 4.747757994859207e-05, "loss": 2.3965, "step": 1743500 }, { "epoch": 5.05, "learning_rate": 4.7476856300944795e-05, "loss": 2.4022, "step": 1744000 }, { "epoch": 5.05, "learning_rate": 4.7476132653297524e-05, "loss": 2.3874, "step": 1744500 }, { "epoch": 5.05, "learning_rate": 4.747541045294554e-05, "loss": 2.3794, "step": 1745000 }, { "epoch": 5.05, "learning_rate": 4.747468680529826e-05, "loss": 2.3763, "step": 1745500 }, { "epoch": 5.05, "learning_rate": 4.7473963157650984e-05, "loss": 2.3632, "step": 1746000 }, { "epoch": 5.06, "learning_rate": 4.7473239510003706e-05, "loss": 2.3596, "step": 1746500 }, { "epoch": 5.06, "learning_rate": 4.747251586235643e-05, "loss": 2.3961, "step": 1747000 }, { "epoch": 5.06, "learning_rate": 4.747179221470916e-05, "loss": 2.3573, "step": 1747500 }, { "epoch": 5.06, "learning_rate": 4.747106856706188e-05, "loss": 2.3902, "step": 1748000 }, { "epoch": 5.06, "learning_rate": 4.74703449194146e-05, "loss": 2.3707, "step": 1748500 }, { "epoch": 5.06, "learning_rate": 4.7469621271767324e-05, "loss": 2.3878, "step": 1749000 }, { "epoch": 5.06, "learning_rate": 4.746889907141534e-05, "loss": 2.3955, "step": 1749500 }, { "epoch": 5.07, "learning_rate": 4.746817542376806e-05, "loss": 2.3746, "step": 1750000 }, { "epoch": 5.07, "learning_rate": 4.7467453223416084e-05, "loss": 2.3771, "step": 1750500 }, { "epoch": 5.07, "learning_rate": 4.7466729575768806e-05, "loss": 2.3907, "step": 1751000 }, { "epoch": 5.07, "learning_rate": 4.746600592812153e-05, "loss": 2.3933, "step": 1751500 }, { "epoch": 5.07, "learning_rate": 4.746528228047426e-05, "loss": 2.3892, "step": 1752000 }, { "epoch": 5.07, "learning_rate": 4.746455863282698e-05, "loss": 2.3967, "step": 1752500 }, { "epoch": 5.07, "learning_rate": 4.74638349851797e-05, "loss": 2.4095, "step": 1753000 }, { "epoch": 5.08, "learning_rate": 4.7463111337532424e-05, "loss": 2.3727, "step": 1753500 }, { "epoch": 5.08, "learning_rate": 4.7462387689885146e-05, "loss": 2.3861, "step": 1754000 }, { "epoch": 5.08, "learning_rate": 4.746166548953316e-05, "loss": 2.3774, "step": 1754500 }, { "epoch": 5.08, "learning_rate": 4.7460941841885884e-05, "loss": 2.4048, "step": 1755000 }, { "epoch": 5.08, "learning_rate": 4.7460218194238606e-05, "loss": 2.384, "step": 1755500 }, { "epoch": 5.08, "learning_rate": 4.7459494546591335e-05, "loss": 2.3666, "step": 1756000 }, { "epoch": 5.08, "learning_rate": 4.745877234623935e-05, "loss": 2.3775, "step": 1756500 }, { "epoch": 5.09, "learning_rate": 4.745804869859207e-05, "loss": 2.3725, "step": 1757000 }, { "epoch": 5.09, "learning_rate": 4.7457325050944795e-05, "loss": 2.3778, "step": 1757500 }, { "epoch": 5.09, "learning_rate": 4.745660285059281e-05, "loss": 2.3716, "step": 1758000 }, { "epoch": 5.09, "learning_rate": 4.745587920294553e-05, "loss": 2.3939, "step": 1758500 }, { "epoch": 5.09, "learning_rate": 4.7455155555298255e-05, "loss": 2.3838, "step": 1759000 }, { "epoch": 5.09, "learning_rate": 4.7454431907650984e-05, "loss": 2.3606, "step": 1759500 }, { "epoch": 5.09, "learning_rate": 4.745370826000371e-05, "loss": 2.374, "step": 1760000 }, { "epoch": 5.1, "learning_rate": 4.745298605965173e-05, "loss": 2.3835, "step": 1760500 }, { "epoch": 5.1, "learning_rate": 4.745226241200445e-05, "loss": 2.3654, "step": 1761000 }, { "epoch": 5.1, "learning_rate": 4.7451538764357174e-05, "loss": 2.3893, "step": 1761500 }, { "epoch": 5.1, "learning_rate": 4.7450815116709896e-05, "loss": 2.3515, "step": 1762000 }, { "epoch": 5.1, "learning_rate": 4.745009146906262e-05, "loss": 2.353, "step": 1762500 }, { "epoch": 5.1, "learning_rate": 4.744936782141534e-05, "loss": 2.3741, "step": 1763000 }, { "epoch": 5.1, "learning_rate": 4.744864417376806e-05, "loss": 2.3632, "step": 1763500 }, { "epoch": 5.11, "learning_rate": 4.7447920526120785e-05, "loss": 2.3711, "step": 1764000 }, { "epoch": 5.11, "learning_rate": 4.744719832576881e-05, "loss": 2.3788, "step": 1764500 }, { "epoch": 5.11, "learning_rate": 4.744647467812153e-05, "loss": 2.3975, "step": 1765000 }, { "epoch": 5.11, "learning_rate": 4.744575103047425e-05, "loss": 2.3832, "step": 1765500 }, { "epoch": 5.11, "learning_rate": 4.7445027382826974e-05, "loss": 2.3882, "step": 1766000 }, { "epoch": 5.11, "learning_rate": 4.7444303735179696e-05, "loss": 2.4107, "step": 1766500 }, { "epoch": 5.11, "learning_rate": 4.7443580087532425e-05, "loss": 2.3764, "step": 1767000 }, { "epoch": 5.12, "learning_rate": 4.744285643988515e-05, "loss": 2.3906, "step": 1767500 }, { "epoch": 5.12, "learning_rate": 4.744213423953316e-05, "loss": 2.3683, "step": 1768000 }, { "epoch": 5.12, "learning_rate": 4.7441412039181185e-05, "loss": 2.3758, "step": 1768500 }, { "epoch": 5.12, "learning_rate": 4.744068839153391e-05, "loss": 2.3808, "step": 1769000 }, { "epoch": 5.12, "learning_rate": 4.743996474388663e-05, "loss": 2.3902, "step": 1769500 }, { "epoch": 5.12, "learning_rate": 4.743924109623935e-05, "loss": 2.3682, "step": 1770000 }, { "epoch": 5.12, "learning_rate": 4.7438517448592074e-05, "loss": 2.3682, "step": 1770500 }, { "epoch": 5.13, "learning_rate": 4.7437793800944796e-05, "loss": 2.3859, "step": 1771000 }, { "epoch": 5.13, "learning_rate": 4.743707015329752e-05, "loss": 2.3927, "step": 1771500 }, { "epoch": 5.13, "learning_rate": 4.743634650565024e-05, "loss": 2.3812, "step": 1772000 }, { "epoch": 5.13, "learning_rate": 4.743562285800296e-05, "loss": 2.3971, "step": 1772500 }, { "epoch": 5.13, "learning_rate": 4.7434899210355685e-05, "loss": 2.3739, "step": 1773000 }, { "epoch": 5.13, "learning_rate": 4.743417701000371e-05, "loss": 2.3691, "step": 1773500 }, { "epoch": 5.14, "learning_rate": 4.743345336235643e-05, "loss": 2.395, "step": 1774000 }, { "epoch": 5.14, "learning_rate": 4.743272971470916e-05, "loss": 2.3592, "step": 1774500 }, { "epoch": 5.14, "learning_rate": 4.743200606706188e-05, "loss": 2.4117, "step": 1775000 }, { "epoch": 5.14, "learning_rate": 4.74312824194146e-05, "loss": 2.3694, "step": 1775500 }, { "epoch": 5.14, "learning_rate": 4.7430558771767325e-05, "loss": 2.3732, "step": 1776000 }, { "epoch": 5.14, "learning_rate": 4.742983512412005e-05, "loss": 2.3718, "step": 1776500 }, { "epoch": 5.14, "learning_rate": 4.742911147647277e-05, "loss": 2.3877, "step": 1777000 }, { "epoch": 5.15, "learning_rate": 4.742838782882549e-05, "loss": 2.3647, "step": 1777500 }, { "epoch": 5.15, "learning_rate": 4.742766562847351e-05, "loss": 2.3959, "step": 1778000 }, { "epoch": 5.15, "learning_rate": 4.742694342812153e-05, "loss": 2.3962, "step": 1778500 }, { "epoch": 5.15, "learning_rate": 4.742621978047425e-05, "loss": 2.3691, "step": 1779000 }, { "epoch": 5.15, "learning_rate": 4.7425496132826974e-05, "loss": 2.401, "step": 1779500 }, { "epoch": 5.15, "learning_rate": 4.742477393247499e-05, "loss": 2.3888, "step": 1780000 }, { "epoch": 5.15, "learning_rate": 4.742405028482771e-05, "loss": 2.3976, "step": 1780500 }, { "epoch": 5.16, "learning_rate": 4.7423326637180435e-05, "loss": 2.3761, "step": 1781000 }, { "epoch": 5.16, "learning_rate": 4.7422602989533164e-05, "loss": 2.367, "step": 1781500 }, { "epoch": 5.16, "learning_rate": 4.7421879341885886e-05, "loss": 2.4191, "step": 1782000 }, { "epoch": 5.16, "learning_rate": 4.742115714153391e-05, "loss": 2.3683, "step": 1782500 }, { "epoch": 5.16, "learning_rate": 4.742043349388663e-05, "loss": 2.3726, "step": 1783000 }, { "epoch": 5.16, "learning_rate": 4.741970984623935e-05, "loss": 2.3932, "step": 1783500 }, { "epoch": 5.16, "learning_rate": 4.7418986198592075e-05, "loss": 2.3841, "step": 1784000 }, { "epoch": 5.17, "learning_rate": 4.74182625509448e-05, "loss": 2.3792, "step": 1784500 }, { "epoch": 5.17, "learning_rate": 4.741753890329752e-05, "loss": 2.3805, "step": 1785000 }, { "epoch": 5.17, "learning_rate": 4.741681525565024e-05, "loss": 2.383, "step": 1785500 }, { "epoch": 5.17, "learning_rate": 4.7416091608002964e-05, "loss": 2.389, "step": 1786000 }, { "epoch": 5.17, "learning_rate": 4.7415367960355686e-05, "loss": 2.3876, "step": 1786500 }, { "epoch": 5.17, "learning_rate": 4.7414644312708415e-05, "loss": 2.3426, "step": 1787000 }, { "epoch": 5.17, "learning_rate": 4.741392066506114e-05, "loss": 2.3482, "step": 1787500 }, { "epoch": 5.18, "learning_rate": 4.741319701741386e-05, "loss": 2.3731, "step": 1788000 }, { "epoch": 5.18, "learning_rate": 4.741247336976658e-05, "loss": 2.3767, "step": 1788500 }, { "epoch": 5.18, "learning_rate": 4.74117511694146e-05, "loss": 2.3832, "step": 1789000 }, { "epoch": 5.18, "learning_rate": 4.7411027521767326e-05, "loss": 2.3954, "step": 1789500 }, { "epoch": 5.18, "learning_rate": 4.741030532141534e-05, "loss": 2.3708, "step": 1790000 }, { "epoch": 5.18, "learning_rate": 4.7409581673768064e-05, "loss": 2.3777, "step": 1790500 }, { "epoch": 5.18, "learning_rate": 4.7408858026120786e-05, "loss": 2.388, "step": 1791000 }, { "epoch": 5.19, "learning_rate": 4.7408134378473515e-05, "loss": 2.3924, "step": 1791500 }, { "epoch": 5.19, "learning_rate": 4.740741073082624e-05, "loss": 2.4062, "step": 1792000 }, { "epoch": 5.19, "learning_rate": 4.740668708317896e-05, "loss": 2.3952, "step": 1792500 }, { "epoch": 5.19, "learning_rate": 4.740596343553168e-05, "loss": 2.3561, "step": 1793000 }, { "epoch": 5.19, "learning_rate": 4.7405239787884404e-05, "loss": 2.3949, "step": 1793500 }, { "epoch": 5.19, "learning_rate": 4.740451758753242e-05, "loss": 2.3595, "step": 1794000 }, { "epoch": 5.19, "learning_rate": 4.740379393988514e-05, "loss": 2.3916, "step": 1794500 }, { "epoch": 5.2, "learning_rate": 4.7403070292237864e-05, "loss": 2.3747, "step": 1795000 }, { "epoch": 5.2, "learning_rate": 4.7402346644590586e-05, "loss": 2.3892, "step": 1795500 }, { "epoch": 5.2, "learning_rate": 4.740162444423861e-05, "loss": 2.3946, "step": 1796000 }, { "epoch": 5.2, "learning_rate": 4.740090079659133e-05, "loss": 2.3741, "step": 1796500 }, { "epoch": 5.2, "learning_rate": 4.740017714894406e-05, "loss": 2.3738, "step": 1797000 }, { "epoch": 5.2, "learning_rate": 4.739945350129678e-05, "loss": 2.3968, "step": 1797500 }, { "epoch": 5.2, "learning_rate": 4.7398729853649505e-05, "loss": 2.3736, "step": 1798000 }, { "epoch": 5.21, "learning_rate": 4.739800765329752e-05, "loss": 2.3853, "step": 1798500 }, { "epoch": 5.21, "learning_rate": 4.739728400565024e-05, "loss": 2.368, "step": 1799000 }, { "epoch": 5.21, "learning_rate": 4.7396560358002965e-05, "loss": 2.4036, "step": 1799500 }, { "epoch": 5.21, "learning_rate": 4.739583671035569e-05, "loss": 2.3831, "step": 1800000 }, { "epoch": 5.21, "learning_rate": 4.7395113062708416e-05, "loss": 2.3612, "step": 1800500 }, { "epoch": 5.21, "learning_rate": 4.739438941506114e-05, "loss": 2.3826, "step": 1801000 }, { "epoch": 5.21, "learning_rate": 4.739366576741386e-05, "loss": 2.4293, "step": 1801500 }, { "epoch": 5.22, "learning_rate": 4.739294501435717e-05, "loss": 2.3633, "step": 1802000 }, { "epoch": 5.22, "learning_rate": 4.739222281400519e-05, "loss": 2.3606, "step": 1802500 }, { "epoch": 5.22, "learning_rate": 4.7391499166357914e-05, "loss": 2.4079, "step": 1803000 }, { "epoch": 5.22, "learning_rate": 4.7390775518710636e-05, "loss": 2.363, "step": 1803500 }, { "epoch": 5.22, "learning_rate": 4.739005187106336e-05, "loss": 2.3774, "step": 1804000 }, { "epoch": 5.22, "learning_rate": 4.738932822341608e-05, "loss": 2.3716, "step": 1804500 }, { "epoch": 5.22, "learning_rate": 4.738860457576881e-05, "loss": 2.374, "step": 1805000 }, { "epoch": 5.23, "learning_rate": 4.738788092812153e-05, "loss": 2.3518, "step": 1805500 }, { "epoch": 5.23, "learning_rate": 4.7387157280474254e-05, "loss": 2.3775, "step": 1806000 }, { "epoch": 5.23, "learning_rate": 4.738643508012227e-05, "loss": 2.3628, "step": 1806500 }, { "epoch": 5.23, "learning_rate": 4.738571143247499e-05, "loss": 2.3742, "step": 1807000 }, { "epoch": 5.23, "learning_rate": 4.7384987784827714e-05, "loss": 2.3834, "step": 1807500 }, { "epoch": 5.23, "learning_rate": 4.738426413718044e-05, "loss": 2.3827, "step": 1808000 }, { "epoch": 5.23, "learning_rate": 4.7383540489533165e-05, "loss": 2.3519, "step": 1808500 }, { "epoch": 5.24, "learning_rate": 4.738281684188589e-05, "loss": 2.399, "step": 1809000 }, { "epoch": 5.24, "learning_rate": 4.738209319423861e-05, "loss": 2.3733, "step": 1809500 }, { "epoch": 5.24, "learning_rate": 4.738136954659133e-05, "loss": 2.3998, "step": 1810000 }, { "epoch": 5.24, "learning_rate": 4.7380645898944054e-05, "loss": 2.3917, "step": 1810500 }, { "epoch": 5.24, "learning_rate": 4.7379922251296776e-05, "loss": 2.3691, "step": 1811000 }, { "epoch": 5.24, "learning_rate": 4.73791986036495e-05, "loss": 2.3995, "step": 1811500 }, { "epoch": 5.24, "learning_rate": 4.737847495600223e-05, "loss": 2.3581, "step": 1812000 }, { "epoch": 5.25, "learning_rate": 4.737775130835495e-05, "loss": 2.4082, "step": 1812500 }, { "epoch": 5.25, "learning_rate": 4.737702766070767e-05, "loss": 2.3743, "step": 1813000 }, { "epoch": 5.25, "learning_rate": 4.7376305460355694e-05, "loss": 2.3733, "step": 1813500 }, { "epoch": 5.25, "learning_rate": 4.7375581812708417e-05, "loss": 2.3884, "step": 1814000 }, { "epoch": 5.25, "learning_rate": 4.737485816506114e-05, "loss": 2.3524, "step": 1814500 }, { "epoch": 5.25, "learning_rate": 4.737413451741386e-05, "loss": 2.3757, "step": 1815000 }, { "epoch": 5.26, "learning_rate": 4.737341086976658e-05, "loss": 2.3976, "step": 1815500 }, { "epoch": 5.26, "learning_rate": 4.7372687222119305e-05, "loss": 2.3632, "step": 1816000 }, { "epoch": 5.26, "learning_rate": 4.737196357447203e-05, "loss": 2.372, "step": 1816500 }, { "epoch": 5.26, "learning_rate": 4.737123992682475e-05, "loss": 2.3764, "step": 1817000 }, { "epoch": 5.26, "learning_rate": 4.7370517726472766e-05, "loss": 2.3828, "step": 1817500 }, { "epoch": 5.26, "learning_rate": 4.7369794078825495e-05, "loss": 2.3834, "step": 1818000 }, { "epoch": 5.26, "learning_rate": 4.736907043117822e-05, "loss": 2.3817, "step": 1818500 }, { "epoch": 5.27, "learning_rate": 4.736834678353094e-05, "loss": 2.3768, "step": 1819000 }, { "epoch": 5.27, "learning_rate": 4.736762458317896e-05, "loss": 2.3905, "step": 1819500 }, { "epoch": 5.27, "learning_rate": 4.7366900935531684e-05, "loss": 2.3846, "step": 1820000 }, { "epoch": 5.27, "learning_rate": 4.7366177287884406e-05, "loss": 2.3619, "step": 1820500 }, { "epoch": 5.27, "learning_rate": 4.736545364023713e-05, "loss": 2.3772, "step": 1821000 }, { "epoch": 5.27, "learning_rate": 4.736472999258985e-05, "loss": 2.3972, "step": 1821500 }, { "epoch": 5.27, "learning_rate": 4.736400634494257e-05, "loss": 2.3525, "step": 1822000 }, { "epoch": 5.28, "learning_rate": 4.7363282697295295e-05, "loss": 2.3757, "step": 1822500 }, { "epoch": 5.28, "learning_rate": 4.736256049694332e-05, "loss": 2.3885, "step": 1823000 }, { "epoch": 5.28, "learning_rate": 4.736183684929604e-05, "loss": 2.3797, "step": 1823500 }, { "epoch": 5.28, "learning_rate": 4.736111320164876e-05, "loss": 2.3754, "step": 1824000 }, { "epoch": 5.28, "learning_rate": 4.7360389554001484e-05, "loss": 2.396, "step": 1824500 }, { "epoch": 5.28, "learning_rate": 4.7359665906354206e-05, "loss": 2.3785, "step": 1825000 }, { "epoch": 5.28, "learning_rate": 4.735894225870693e-05, "loss": 2.3814, "step": 1825500 }, { "epoch": 5.29, "learning_rate": 4.735821861105965e-05, "loss": 2.3737, "step": 1826000 }, { "epoch": 5.29, "learning_rate": 4.735749496341238e-05, "loss": 2.3669, "step": 1826500 }, { "epoch": 5.29, "learning_rate": 4.73567713157651e-05, "loss": 2.3985, "step": 1827000 }, { "epoch": 5.29, "learning_rate": 4.735604911541312e-05, "loss": 2.3641, "step": 1827500 }, { "epoch": 5.29, "learning_rate": 4.735532691506114e-05, "loss": 2.4, "step": 1828000 }, { "epoch": 5.29, "learning_rate": 4.735460326741386e-05, "loss": 2.4089, "step": 1828500 }, { "epoch": 5.29, "learning_rate": 4.735388106706188e-05, "loss": 2.3796, "step": 1829000 }, { "epoch": 5.3, "learning_rate": 4.73531574194146e-05, "loss": 2.3769, "step": 1829500 }, { "epoch": 5.3, "learning_rate": 4.735243377176732e-05, "loss": 2.3571, "step": 1830000 }, { "epoch": 5.3, "learning_rate": 4.7351710124120044e-05, "loss": 2.3712, "step": 1830500 }, { "epoch": 5.3, "learning_rate": 4.7350986476472766e-05, "loss": 2.3894, "step": 1831000 }, { "epoch": 5.3, "learning_rate": 4.7350262828825495e-05, "loss": 2.3837, "step": 1831500 }, { "epoch": 5.3, "learning_rate": 4.734953918117822e-05, "loss": 2.3639, "step": 1832000 }, { "epoch": 5.3, "learning_rate": 4.734881553353094e-05, "loss": 2.3925, "step": 1832500 }, { "epoch": 5.31, "learning_rate": 4.7348093333178955e-05, "loss": 2.3843, "step": 1833000 }, { "epoch": 5.31, "learning_rate": 4.734736968553168e-05, "loss": 2.3865, "step": 1833500 }, { "epoch": 5.31, "learning_rate": 4.73466460378844e-05, "loss": 2.3784, "step": 1834000 }, { "epoch": 5.31, "learning_rate": 4.734592239023713e-05, "loss": 2.366, "step": 1834500 }, { "epoch": 5.31, "learning_rate": 4.734519874258985e-05, "loss": 2.3607, "step": 1835000 }, { "epoch": 5.31, "learning_rate": 4.734447509494257e-05, "loss": 2.3726, "step": 1835500 }, { "epoch": 5.31, "learning_rate": 4.7343751447295296e-05, "loss": 2.3853, "step": 1836000 }, { "epoch": 5.32, "learning_rate": 4.734302779964802e-05, "loss": 2.3979, "step": 1836500 }, { "epoch": 5.32, "learning_rate": 4.734230415200075e-05, "loss": 2.4087, "step": 1837000 }, { "epoch": 5.32, "learning_rate": 4.734158195164876e-05, "loss": 2.3815, "step": 1837500 }, { "epoch": 5.32, "learning_rate": 4.7340858304001485e-05, "loss": 2.3579, "step": 1838000 }, { "epoch": 5.32, "learning_rate": 4.73401361036495e-05, "loss": 2.367, "step": 1838500 }, { "epoch": 5.32, "learning_rate": 4.733941245600222e-05, "loss": 2.376, "step": 1839000 }, { "epoch": 5.32, "learning_rate": 4.7338688808354945e-05, "loss": 2.3939, "step": 1839500 }, { "epoch": 5.33, "learning_rate": 4.733796516070767e-05, "loss": 2.3648, "step": 1840000 }, { "epoch": 5.33, "learning_rate": 4.7337241513060396e-05, "loss": 2.37, "step": 1840500 }, { "epoch": 5.33, "learning_rate": 4.733651786541312e-05, "loss": 2.3816, "step": 1841000 }, { "epoch": 5.33, "learning_rate": 4.7335795665061134e-05, "loss": 2.393, "step": 1841500 }, { "epoch": 5.33, "learning_rate": 4.733507201741386e-05, "loss": 2.3828, "step": 1842000 }, { "epoch": 5.33, "learning_rate": 4.7334348369766585e-05, "loss": 2.3752, "step": 1842500 }, { "epoch": 5.33, "learning_rate": 4.733362472211931e-05, "loss": 2.3883, "step": 1843000 }, { "epoch": 5.34, "learning_rate": 4.733290396906262e-05, "loss": 2.3784, "step": 1843500 }, { "epoch": 5.34, "learning_rate": 4.7332180321415345e-05, "loss": 2.3734, "step": 1844000 }, { "epoch": 5.34, "learning_rate": 4.733145667376807e-05, "loss": 2.3978, "step": 1844500 }, { "epoch": 5.34, "learning_rate": 4.733073302612079e-05, "loss": 2.3879, "step": 1845000 }, { "epoch": 5.34, "learning_rate": 4.733000937847351e-05, "loss": 2.3861, "step": 1845500 }, { "epoch": 5.34, "learning_rate": 4.7329285730826234e-05, "loss": 2.3928, "step": 1846000 }, { "epoch": 5.34, "learning_rate": 4.7328562083178956e-05, "loss": 2.3909, "step": 1846500 }, { "epoch": 5.35, "learning_rate": 4.732783843553168e-05, "loss": 2.3773, "step": 1847000 }, { "epoch": 5.35, "learning_rate": 4.7327116235179694e-05, "loss": 2.3936, "step": 1847500 }, { "epoch": 5.35, "learning_rate": 4.732639258753242e-05, "loss": 2.3855, "step": 1848000 }, { "epoch": 5.35, "learning_rate": 4.7325668939885145e-05, "loss": 2.4164, "step": 1848500 }, { "epoch": 5.35, "learning_rate": 4.732494529223787e-05, "loss": 2.34, "step": 1849000 }, { "epoch": 5.35, "learning_rate": 4.7324221644590596e-05, "loss": 2.3758, "step": 1849500 }, { "epoch": 5.35, "learning_rate": 4.732349799694332e-05, "loss": 2.3595, "step": 1850000 }, { "epoch": 5.36, "learning_rate": 4.732277434929604e-05, "loss": 2.3902, "step": 1850500 }, { "epoch": 5.36, "learning_rate": 4.732205070164876e-05, "loss": 2.3759, "step": 1851000 }, { "epoch": 5.36, "learning_rate": 4.7321327054001485e-05, "loss": 2.3805, "step": 1851500 }, { "epoch": 5.36, "learning_rate": 4.73206048536495e-05, "loss": 2.3842, "step": 1852000 }, { "epoch": 5.36, "learning_rate": 4.731988120600222e-05, "loss": 2.3776, "step": 1852500 }, { "epoch": 5.36, "learning_rate": 4.7319157558354945e-05, "loss": 2.3794, "step": 1853000 }, { "epoch": 5.37, "learning_rate": 4.7318433910707674e-05, "loss": 2.3803, "step": 1853500 }, { "epoch": 5.37, "learning_rate": 4.731771171035569e-05, "loss": 2.3943, "step": 1854000 }, { "epoch": 5.37, "learning_rate": 4.731698806270841e-05, "loss": 2.3779, "step": 1854500 }, { "epoch": 5.37, "learning_rate": 4.731626586235643e-05, "loss": 2.3947, "step": 1855000 }, { "epoch": 5.37, "learning_rate": 4.731554221470915e-05, "loss": 2.3792, "step": 1855500 }, { "epoch": 5.37, "learning_rate": 4.731481856706187e-05, "loss": 2.3694, "step": 1856000 }, { "epoch": 5.37, "learning_rate": 4.7314094919414594e-05, "loss": 2.3817, "step": 1856500 }, { "epoch": 5.38, "learning_rate": 4.7313371271767323e-05, "loss": 2.3686, "step": 1857000 }, { "epoch": 5.38, "learning_rate": 4.7312647624120046e-05, "loss": 2.3854, "step": 1857500 }, { "epoch": 5.38, "learning_rate": 4.7311923976472775e-05, "loss": 2.4116, "step": 1858000 }, { "epoch": 5.38, "learning_rate": 4.73112003288255e-05, "loss": 2.3985, "step": 1858500 }, { "epoch": 5.38, "learning_rate": 4.731047812847351e-05, "loss": 2.3651, "step": 1859000 }, { "epoch": 5.38, "learning_rate": 4.7309754480826235e-05, "loss": 2.4042, "step": 1859500 }, { "epoch": 5.38, "learning_rate": 4.730903083317896e-05, "loss": 2.3556, "step": 1860000 }, { "epoch": 5.39, "learning_rate": 4.730830718553168e-05, "loss": 2.3837, "step": 1860500 }, { "epoch": 5.39, "learning_rate": 4.73075849851797e-05, "loss": 2.3746, "step": 1861000 }, { "epoch": 5.39, "learning_rate": 4.7306861337532424e-05, "loss": 2.3957, "step": 1861500 }, { "epoch": 5.39, "learning_rate": 4.7306137689885146e-05, "loss": 2.3825, "step": 1862000 }, { "epoch": 5.39, "learning_rate": 4.730541548953316e-05, "loss": 2.3972, "step": 1862500 }, { "epoch": 5.39, "learning_rate": 4.7304691841885884e-05, "loss": 2.3815, "step": 1863000 }, { "epoch": 5.39, "learning_rate": 4.7303968194238606e-05, "loss": 2.3895, "step": 1863500 }, { "epoch": 5.4, "learning_rate": 4.730324599388662e-05, "loss": 2.3755, "step": 1864000 }, { "epoch": 5.4, "learning_rate": 4.730252234623935e-05, "loss": 2.3808, "step": 1864500 }, { "epoch": 5.4, "learning_rate": 4.730179869859207e-05, "loss": 2.3661, "step": 1865000 }, { "epoch": 5.4, "learning_rate": 4.73010750509448e-05, "loss": 2.3536, "step": 1865500 }, { "epoch": 5.4, "learning_rate": 4.7300351403297524e-05, "loss": 2.3845, "step": 1866000 }, { "epoch": 5.4, "learning_rate": 4.7299627755650246e-05, "loss": 2.3924, "step": 1866500 }, { "epoch": 5.4, "learning_rate": 4.729890410800297e-05, "loss": 2.3821, "step": 1867000 }, { "epoch": 5.41, "learning_rate": 4.729818046035569e-05, "loss": 2.392, "step": 1867500 }, { "epoch": 5.41, "learning_rate": 4.729745681270841e-05, "loss": 2.3652, "step": 1868000 }, { "epoch": 5.41, "learning_rate": 4.7296733165061135e-05, "loss": 2.3777, "step": 1868500 }, { "epoch": 5.41, "learning_rate": 4.729600951741386e-05, "loss": 2.3909, "step": 1869000 }, { "epoch": 5.41, "learning_rate": 4.729528586976658e-05, "loss": 2.3707, "step": 1869500 }, { "epoch": 5.41, "learning_rate": 4.72945636694146e-05, "loss": 2.3875, "step": 1870000 }, { "epoch": 5.41, "learning_rate": 4.7293840021767324e-05, "loss": 2.3904, "step": 1870500 }, { "epoch": 5.42, "learning_rate": 4.7293116374120046e-05, "loss": 2.4019, "step": 1871000 }, { "epoch": 5.42, "learning_rate": 4.729239272647277e-05, "loss": 2.4042, "step": 1871500 }, { "epoch": 5.42, "learning_rate": 4.72916690788255e-05, "loss": 2.3815, "step": 1872000 }, { "epoch": 5.42, "learning_rate": 4.729094543117822e-05, "loss": 2.3943, "step": 1872500 }, { "epoch": 5.42, "learning_rate": 4.729022178353094e-05, "loss": 2.4012, "step": 1873000 }, { "epoch": 5.42, "learning_rate": 4.7289498135883664e-05, "loss": 2.3796, "step": 1873500 }, { "epoch": 5.42, "learning_rate": 4.728877448823639e-05, "loss": 2.3847, "step": 1874000 }, { "epoch": 5.43, "learning_rate": 4.72880522878844e-05, "loss": 2.3902, "step": 1874500 }, { "epoch": 5.43, "learning_rate": 4.7287328640237124e-05, "loss": 2.4118, "step": 1875000 }, { "epoch": 5.43, "learning_rate": 4.7286604992589853e-05, "loss": 2.3796, "step": 1875500 }, { "epoch": 5.43, "learning_rate": 4.728588279223787e-05, "loss": 2.389, "step": 1876000 }, { "epoch": 5.43, "learning_rate": 4.728515914459059e-05, "loss": 2.39, "step": 1876500 }, { "epoch": 5.43, "learning_rate": 4.7284435496943313e-05, "loss": 2.3849, "step": 1877000 }, { "epoch": 5.43, "learning_rate": 4.7283711849296036e-05, "loss": 2.3932, "step": 1877500 }, { "epoch": 5.44, "learning_rate": 4.728298820164876e-05, "loss": 2.3834, "step": 1878000 }, { "epoch": 5.44, "learning_rate": 4.728226455400148e-05, "loss": 2.3769, "step": 1878500 }, { "epoch": 5.44, "learning_rate": 4.728154090635421e-05, "loss": 2.3834, "step": 1879000 }, { "epoch": 5.44, "learning_rate": 4.728081725870693e-05, "loss": 2.3822, "step": 1879500 }, { "epoch": 5.44, "learning_rate": 4.7280095058354954e-05, "loss": 2.3963, "step": 1880000 }, { "epoch": 5.44, "learning_rate": 4.7279371410707676e-05, "loss": 2.3573, "step": 1880500 }, { "epoch": 5.44, "learning_rate": 4.727864921035569e-05, "loss": 2.3737, "step": 1881000 }, { "epoch": 5.45, "learning_rate": 4.7277925562708414e-05, "loss": 2.3784, "step": 1881500 }, { "epoch": 5.45, "learning_rate": 4.7277201915061136e-05, "loss": 2.392, "step": 1882000 }, { "epoch": 5.45, "learning_rate": 4.727647826741386e-05, "loss": 2.4019, "step": 1882500 }, { "epoch": 5.45, "learning_rate": 4.727575461976658e-05, "loss": 2.3869, "step": 1883000 }, { "epoch": 5.45, "learning_rate": 4.72750309721193e-05, "loss": 2.3843, "step": 1883500 }, { "epoch": 5.45, "learning_rate": 4.7274307324472025e-05, "loss": 2.3981, "step": 1884000 }, { "epoch": 5.45, "learning_rate": 4.7273583676824754e-05, "loss": 2.3824, "step": 1884500 }, { "epoch": 5.46, "learning_rate": 4.7272860029177476e-05, "loss": 2.3845, "step": 1885000 }, { "epoch": 5.46, "learning_rate": 4.727213782882549e-05, "loss": 2.4037, "step": 1885500 }, { "epoch": 5.46, "learning_rate": 4.7271414181178214e-05, "loss": 2.3769, "step": 1886000 }, { "epoch": 5.46, "learning_rate": 4.7270690533530936e-05, "loss": 2.3452, "step": 1886500 }, { "epoch": 5.46, "learning_rate": 4.7269966885883665e-05, "loss": 2.3751, "step": 1887000 }, { "epoch": 5.46, "learning_rate": 4.726924468553168e-05, "loss": 2.3725, "step": 1887500 }, { "epoch": 5.46, "learning_rate": 4.72685210378844e-05, "loss": 2.3723, "step": 1888000 }, { "epoch": 5.47, "learning_rate": 4.7267797390237125e-05, "loss": 2.3817, "step": 1888500 }, { "epoch": 5.47, "learning_rate": 4.7267073742589854e-05, "loss": 2.4071, "step": 1889000 }, { "epoch": 5.47, "learning_rate": 4.7266350094942576e-05, "loss": 2.3741, "step": 1889500 }, { "epoch": 5.47, "learning_rate": 4.72656264472953e-05, "loss": 2.3813, "step": 1890000 }, { "epoch": 5.47, "learning_rate": 4.726490279964802e-05, "loss": 2.3977, "step": 1890500 }, { "epoch": 5.47, "learning_rate": 4.726417915200074e-05, "loss": 2.4075, "step": 1891000 }, { "epoch": 5.48, "learning_rate": 4.726345695164876e-05, "loss": 2.38, "step": 1891500 }, { "epoch": 5.48, "learning_rate": 4.726273330400148e-05, "loss": 2.3625, "step": 1892000 }, { "epoch": 5.48, "learning_rate": 4.72620096563542e-05, "loss": 2.3791, "step": 1892500 }, { "epoch": 5.48, "learning_rate": 4.7261286008706925e-05, "loss": 2.3769, "step": 1893000 }, { "epoch": 5.48, "learning_rate": 4.726056380835495e-05, "loss": 2.3913, "step": 1893500 }, { "epoch": 5.48, "learning_rate": 4.725984016070767e-05, "loss": 2.3991, "step": 1894000 }, { "epoch": 5.48, "learning_rate": 4.72591165130604e-05, "loss": 2.3851, "step": 1894500 }, { "epoch": 5.49, "learning_rate": 4.725839286541312e-05, "loss": 2.3767, "step": 1895000 }, { "epoch": 5.49, "learning_rate": 4.7257669217765843e-05, "loss": 2.3751, "step": 1895500 }, { "epoch": 5.49, "learning_rate": 4.725694701741386e-05, "loss": 2.3781, "step": 1896000 }, { "epoch": 5.49, "learning_rate": 4.725622336976658e-05, "loss": 2.3955, "step": 1896500 }, { "epoch": 5.49, "learning_rate": 4.7255499722119303e-05, "loss": 2.395, "step": 1897000 }, { "epoch": 5.49, "learning_rate": 4.725477607447203e-05, "loss": 2.376, "step": 1897500 }, { "epoch": 5.49, "learning_rate": 4.725405387412005e-05, "loss": 2.3773, "step": 1898000 }, { "epoch": 5.5, "learning_rate": 4.725333022647277e-05, "loss": 2.3621, "step": 1898500 }, { "epoch": 5.5, "learning_rate": 4.725260657882549e-05, "loss": 2.3761, "step": 1899000 }, { "epoch": 5.5, "learning_rate": 4.7251882931178215e-05, "loss": 2.4153, "step": 1899500 }, { "epoch": 5.5, "learning_rate": 4.725115928353094e-05, "loss": 2.3971, "step": 1900000 }, { "epoch": 5.5, "learning_rate": 4.725043708317895e-05, "loss": 2.3833, "step": 1900500 }, { "epoch": 5.5, "learning_rate": 4.724971343553168e-05, "loss": 2.3857, "step": 1901000 }, { "epoch": 5.5, "learning_rate": 4.7248989787884404e-05, "loss": 2.3729, "step": 1901500 }, { "epoch": 5.51, "learning_rate": 4.724826614023713e-05, "loss": 2.3998, "step": 1902000 }, { "epoch": 5.51, "learning_rate": 4.7247542492589855e-05, "loss": 2.4016, "step": 1902500 }, { "epoch": 5.51, "learning_rate": 4.724681884494258e-05, "loss": 2.3877, "step": 1903000 }, { "epoch": 5.51, "learning_rate": 4.724609664459059e-05, "loss": 2.3807, "step": 1903500 }, { "epoch": 5.51, "learning_rate": 4.7245372996943315e-05, "loss": 2.3852, "step": 1904000 }, { "epoch": 5.51, "learning_rate": 4.724464934929604e-05, "loss": 2.358, "step": 1904500 }, { "epoch": 5.51, "learning_rate": 4.724392570164876e-05, "loss": 2.3786, "step": 1905000 }, { "epoch": 5.52, "learning_rate": 4.724320205400148e-05, "loss": 2.399, "step": 1905500 }, { "epoch": 5.52, "learning_rate": 4.7242478406354204e-05, "loss": 2.3834, "step": 1906000 }, { "epoch": 5.52, "learning_rate": 4.724175475870693e-05, "loss": 2.3744, "step": 1906500 }, { "epoch": 5.52, "learning_rate": 4.7241031111059655e-05, "loss": 2.3933, "step": 1907000 }, { "epoch": 5.52, "learning_rate": 4.7240310358002964e-05, "loss": 2.3643, "step": 1907500 }, { "epoch": 5.52, "learning_rate": 4.7239586710355686e-05, "loss": 2.4048, "step": 1908000 }, { "epoch": 5.52, "learning_rate": 4.723886306270841e-05, "loss": 2.3791, "step": 1908500 }, { "epoch": 5.53, "learning_rate": 4.723813941506113e-05, "loss": 2.3894, "step": 1909000 }, { "epoch": 5.53, "learning_rate": 4.723741576741386e-05, "loss": 2.3747, "step": 1909500 }, { "epoch": 5.53, "learning_rate": 4.723669211976658e-05, "loss": 2.3744, "step": 1910000 }, { "epoch": 5.53, "learning_rate": 4.7235969919414604e-05, "loss": 2.3994, "step": 1910500 }, { "epoch": 5.53, "learning_rate": 4.723524627176733e-05, "loss": 2.3694, "step": 1911000 }, { "epoch": 5.53, "learning_rate": 4.723452262412005e-05, "loss": 2.3967, "step": 1911500 }, { "epoch": 5.53, "learning_rate": 4.723379897647277e-05, "loss": 2.3869, "step": 1912000 }, { "epoch": 5.54, "learning_rate": 4.723307532882549e-05, "loss": 2.3946, "step": 1912500 }, { "epoch": 5.54, "learning_rate": 4.7232351681178216e-05, "loss": 2.3746, "step": 1913000 }, { "epoch": 5.54, "learning_rate": 4.723162803353094e-05, "loss": 2.3918, "step": 1913500 }, { "epoch": 5.54, "learning_rate": 4.723090438588366e-05, "loss": 2.3544, "step": 1914000 }, { "epoch": 5.54, "learning_rate": 4.723018218553168e-05, "loss": 2.3579, "step": 1914500 }, { "epoch": 5.54, "learning_rate": 4.7229458537884405e-05, "loss": 2.4125, "step": 1915000 }, { "epoch": 5.54, "learning_rate": 4.722873489023713e-05, "loss": 2.3694, "step": 1915500 }, { "epoch": 5.55, "learning_rate": 4.722801124258985e-05, "loss": 2.3747, "step": 1916000 }, { "epoch": 5.55, "learning_rate": 4.722728759494257e-05, "loss": 2.3482, "step": 1916500 }, { "epoch": 5.55, "learning_rate": 4.7226565394590594e-05, "loss": 2.3728, "step": 1917000 }, { "epoch": 5.55, "learning_rate": 4.7225841746943316e-05, "loss": 2.3837, "step": 1917500 }, { "epoch": 5.55, "learning_rate": 4.722511809929604e-05, "loss": 2.372, "step": 1918000 }, { "epoch": 5.55, "learning_rate": 4.722439445164876e-05, "loss": 2.4121, "step": 1918500 }, { "epoch": 5.55, "learning_rate": 4.722367080400148e-05, "loss": 2.3956, "step": 1919000 }, { "epoch": 5.56, "learning_rate": 4.7222947156354205e-05, "loss": 2.3946, "step": 1919500 }, { "epoch": 5.56, "learning_rate": 4.722222495600223e-05, "loss": 2.3633, "step": 1920000 }, { "epoch": 5.56, "learning_rate": 4.722150130835495e-05, "loss": 2.3888, "step": 1920500 }, { "epoch": 5.56, "learning_rate": 4.722077766070767e-05, "loss": 2.3929, "step": 1921000 }, { "epoch": 5.56, "learning_rate": 4.7220054013060394e-05, "loss": 2.3743, "step": 1921500 }, { "epoch": 5.56, "learning_rate": 4.7219330365413116e-05, "loss": 2.3567, "step": 1922000 }, { "epoch": 5.56, "learning_rate": 4.721860671776584e-05, "loss": 2.3792, "step": 1922500 }, { "epoch": 5.57, "learning_rate": 4.721788307011856e-05, "loss": 2.3948, "step": 1923000 }, { "epoch": 5.57, "learning_rate": 4.721715942247128e-05, "loss": 2.3983, "step": 1923500 }, { "epoch": 5.57, "learning_rate": 4.7216437222119305e-05, "loss": 2.3829, "step": 1924000 }, { "epoch": 5.57, "learning_rate": 4.7215713574472034e-05, "loss": 2.358, "step": 1924500 }, { "epoch": 5.57, "learning_rate": 4.7214989926824756e-05, "loss": 2.3706, "step": 1925000 }, { "epoch": 5.57, "learning_rate": 4.721426627917748e-05, "loss": 2.3924, "step": 1925500 }, { "epoch": 5.57, "learning_rate": 4.72135426315302e-05, "loss": 2.3907, "step": 1926000 }, { "epoch": 5.58, "learning_rate": 4.721281898388292e-05, "loss": 2.3856, "step": 1926500 }, { "epoch": 5.58, "learning_rate": 4.721209678353094e-05, "loss": 2.3961, "step": 1927000 }, { "epoch": 5.58, "learning_rate": 4.721137313588366e-05, "loss": 2.3946, "step": 1927500 }, { "epoch": 5.58, "learning_rate": 4.721064948823638e-05, "loss": 2.3719, "step": 1928000 }, { "epoch": 5.58, "learning_rate": 4.720992584058911e-05, "loss": 2.387, "step": 1928500 }, { "epoch": 5.58, "learning_rate": 4.720920364023713e-05, "loss": 2.3995, "step": 1929000 }, { "epoch": 5.59, "learning_rate": 4.720847999258985e-05, "loss": 2.3858, "step": 1929500 }, { "epoch": 5.59, "learning_rate": 4.7207757792237865e-05, "loss": 2.3955, "step": 1930000 }, { "epoch": 5.59, "learning_rate": 4.720703414459059e-05, "loss": 2.3766, "step": 1930500 }, { "epoch": 5.59, "learning_rate": 4.720631049694331e-05, "loss": 2.363, "step": 1931000 }, { "epoch": 5.59, "learning_rate": 4.720558684929603e-05, "loss": 2.384, "step": 1931500 }, { "epoch": 5.59, "learning_rate": 4.720486320164876e-05, "loss": 2.3996, "step": 1932000 }, { "epoch": 5.59, "learning_rate": 4.720413955400148e-05, "loss": 2.3959, "step": 1932500 }, { "epoch": 5.6, "learning_rate": 4.720341590635421e-05, "loss": 2.3983, "step": 1933000 }, { "epoch": 5.6, "learning_rate": 4.7202692258706935e-05, "loss": 2.3745, "step": 1933500 }, { "epoch": 5.6, "learning_rate": 4.720196861105966e-05, "loss": 2.3597, "step": 1934000 }, { "epoch": 5.6, "learning_rate": 4.720124641070767e-05, "loss": 2.3677, "step": 1934500 }, { "epoch": 5.6, "learning_rate": 4.720052421035569e-05, "loss": 2.3778, "step": 1935000 }, { "epoch": 5.6, "learning_rate": 4.719980056270841e-05, "loss": 2.372, "step": 1935500 }, { "epoch": 5.6, "learning_rate": 4.719907691506113e-05, "loss": 2.3909, "step": 1936000 }, { "epoch": 5.61, "learning_rate": 4.719835326741386e-05, "loss": 2.3738, "step": 1936500 }, { "epoch": 5.61, "learning_rate": 4.7197629619766584e-05, "loss": 2.3847, "step": 1937000 }, { "epoch": 5.61, "learning_rate": 4.7196905972119306e-05, "loss": 2.3871, "step": 1937500 }, { "epoch": 5.61, "learning_rate": 4.719618377176732e-05, "loss": 2.3885, "step": 1938000 }, { "epoch": 5.61, "learning_rate": 4.7195460124120044e-05, "loss": 2.3941, "step": 1938500 }, { "epoch": 5.61, "learning_rate": 4.7194736476472766e-05, "loss": 2.3825, "step": 1939000 }, { "epoch": 5.61, "learning_rate": 4.719401572341608e-05, "loss": 2.3999, "step": 1939500 }, { "epoch": 5.62, "learning_rate": 4.719329207576881e-05, "loss": 2.3694, "step": 1940000 }, { "epoch": 5.62, "learning_rate": 4.719256842812153e-05, "loss": 2.3837, "step": 1940500 }, { "epoch": 5.62, "learning_rate": 4.7191844780474255e-05, "loss": 2.3803, "step": 1941000 }, { "epoch": 5.62, "learning_rate": 4.719112113282698e-05, "loss": 2.4023, "step": 1941500 }, { "epoch": 5.62, "learning_rate": 4.71903974851797e-05, "loss": 2.3822, "step": 1942000 }, { "epoch": 5.62, "learning_rate": 4.718967383753242e-05, "loss": 2.3926, "step": 1942500 }, { "epoch": 5.62, "learning_rate": 4.7188950189885144e-05, "loss": 2.3649, "step": 1943000 }, { "epoch": 5.63, "learning_rate": 4.7188226542237866e-05, "loss": 2.3778, "step": 1943500 }, { "epoch": 5.63, "learning_rate": 4.718750434188589e-05, "loss": 2.3736, "step": 1944000 }, { "epoch": 5.63, "learning_rate": 4.718678069423861e-05, "loss": 2.4025, "step": 1944500 }, { "epoch": 5.63, "learning_rate": 4.718605704659133e-05, "loss": 2.3807, "step": 1945000 }, { "epoch": 5.63, "learning_rate": 4.7185333398944055e-05, "loss": 2.3834, "step": 1945500 }, { "epoch": 5.63, "learning_rate": 4.718460975129678e-05, "loss": 2.4152, "step": 1946000 }, { "epoch": 5.63, "learning_rate": 4.71838861036495e-05, "loss": 2.4023, "step": 1946500 }, { "epoch": 5.64, "learning_rate": 4.718316245600223e-05, "loss": 2.4071, "step": 1947000 }, { "epoch": 5.64, "learning_rate": 4.718243880835495e-05, "loss": 2.4014, "step": 1947500 }, { "epoch": 5.64, "learning_rate": 4.718171516070767e-05, "loss": 2.3668, "step": 1948000 }, { "epoch": 5.64, "learning_rate": 4.718099440765099e-05, "loss": 2.3775, "step": 1948500 }, { "epoch": 5.64, "learning_rate": 4.7180272207299004e-05, "loss": 2.4065, "step": 1949000 }, { "epoch": 5.64, "learning_rate": 4.717954855965173e-05, "loss": 2.3939, "step": 1949500 }, { "epoch": 5.64, "learning_rate": 4.717882491200445e-05, "loss": 2.3973, "step": 1950000 }, { "epoch": 5.65, "learning_rate": 4.717810126435717e-05, "loss": 2.3794, "step": 1950500 }, { "epoch": 5.65, "learning_rate": 4.717737761670989e-05, "loss": 2.4148, "step": 1951000 }, { "epoch": 5.65, "learning_rate": 4.7176653969062616e-05, "loss": 2.3663, "step": 1951500 }, { "epoch": 5.65, "learning_rate": 4.717593032141534e-05, "loss": 2.3833, "step": 1952000 }, { "epoch": 5.65, "learning_rate": 4.717520667376806e-05, "loss": 2.3735, "step": 1952500 }, { "epoch": 5.65, "learning_rate": 4.717448302612079e-05, "loss": 2.4025, "step": 1953000 }, { "epoch": 5.65, "learning_rate": 4.717375937847351e-05, "loss": 2.3985, "step": 1953500 }, { "epoch": 5.66, "learning_rate": 4.7173035730826234e-05, "loss": 2.3914, "step": 1954000 }, { "epoch": 5.66, "learning_rate": 4.717231208317896e-05, "loss": 2.3794, "step": 1954500 }, { "epoch": 5.66, "learning_rate": 4.7171588435531685e-05, "loss": 2.4027, "step": 1955000 }, { "epoch": 5.66, "learning_rate": 4.71708662351797e-05, "loss": 2.3727, "step": 1955500 }, { "epoch": 5.66, "learning_rate": 4.717014258753242e-05, "loss": 2.4064, "step": 1956000 }, { "epoch": 5.66, "learning_rate": 4.716942038718044e-05, "loss": 2.3803, "step": 1956500 }, { "epoch": 5.66, "learning_rate": 4.716869673953316e-05, "loss": 2.3729, "step": 1957000 }, { "epoch": 5.67, "learning_rate": 4.716797309188589e-05, "loss": 2.3676, "step": 1957500 }, { "epoch": 5.67, "learning_rate": 4.716724944423861e-05, "loss": 2.3585, "step": 1958000 }, { "epoch": 5.67, "learning_rate": 4.7166525796591334e-05, "loss": 2.3636, "step": 1958500 }, { "epoch": 5.67, "learning_rate": 4.7165802148944056e-05, "loss": 2.3827, "step": 1959000 }, { "epoch": 5.67, "learning_rate": 4.716507850129678e-05, "loss": 2.4058, "step": 1959500 }, { "epoch": 5.67, "learning_rate": 4.71643548536495e-05, "loss": 2.3796, "step": 1960000 }, { "epoch": 5.67, "learning_rate": 4.716363120600222e-05, "loss": 2.3967, "step": 1960500 }, { "epoch": 5.68, "learning_rate": 4.7162907558354945e-05, "loss": 2.3791, "step": 1961000 }, { "epoch": 5.68, "learning_rate": 4.716218391070767e-05, "loss": 2.3689, "step": 1961500 }, { "epoch": 5.68, "learning_rate": 4.7161460263060396e-05, "loss": 2.3546, "step": 1962000 }, { "epoch": 5.68, "learning_rate": 4.716073806270841e-05, "loss": 2.3829, "step": 1962500 }, { "epoch": 5.68, "learning_rate": 4.716001441506114e-05, "loss": 2.4013, "step": 1963000 }, { "epoch": 5.68, "learning_rate": 4.715929076741386e-05, "loss": 2.3984, "step": 1963500 }, { "epoch": 5.68, "learning_rate": 4.7158567119766585e-05, "loss": 2.3762, "step": 1964000 }, { "epoch": 5.69, "learning_rate": 4.715784347211931e-05, "loss": 2.3814, "step": 1964500 }, { "epoch": 5.69, "learning_rate": 4.715711982447203e-05, "loss": 2.3857, "step": 1965000 }, { "epoch": 5.69, "learning_rate": 4.715639617682475e-05, "loss": 2.3932, "step": 1965500 }, { "epoch": 5.69, "learning_rate": 4.7155672529177474e-05, "loss": 2.3902, "step": 1966000 }, { "epoch": 5.69, "learning_rate": 4.715495032882549e-05, "loss": 2.3814, "step": 1966500 }, { "epoch": 5.69, "learning_rate": 4.715422668117821e-05, "loss": 2.3788, "step": 1967000 }, { "epoch": 5.7, "learning_rate": 4.715350303353094e-05, "loss": 2.3552, "step": 1967500 }, { "epoch": 5.7, "learning_rate": 4.715277938588366e-05, "loss": 2.4134, "step": 1968000 }, { "epoch": 5.7, "learning_rate": 4.7152055738236385e-05, "loss": 2.3887, "step": 1968500 }, { "epoch": 5.7, "learning_rate": 4.71513335378844e-05, "loss": 2.38, "step": 1969000 }, { "epoch": 5.7, "learning_rate": 4.715060989023713e-05, "loss": 2.3779, "step": 1969500 }, { "epoch": 5.7, "learning_rate": 4.714988624258985e-05, "loss": 2.4008, "step": 1970000 }, { "epoch": 5.7, "learning_rate": 4.7149162594942574e-05, "loss": 2.3748, "step": 1970500 }, { "epoch": 5.71, "learning_rate": 4.71484389472953e-05, "loss": 2.3949, "step": 1971000 }, { "epoch": 5.71, "learning_rate": 4.714771819423861e-05, "loss": 2.3804, "step": 1971500 }, { "epoch": 5.71, "learning_rate": 4.7146994546591335e-05, "loss": 2.3868, "step": 1972000 }, { "epoch": 5.71, "learning_rate": 4.714627089894406e-05, "loss": 2.3698, "step": 1972500 }, { "epoch": 5.71, "learning_rate": 4.714554725129678e-05, "loss": 2.3875, "step": 1973000 }, { "epoch": 5.71, "learning_rate": 4.71448236036495e-05, "loss": 2.3954, "step": 1973500 }, { "epoch": 5.71, "learning_rate": 4.7144099956002224e-05, "loss": 2.3576, "step": 1974000 }, { "epoch": 5.72, "learning_rate": 4.7143376308354946e-05, "loss": 2.3737, "step": 1974500 }, { "epoch": 5.72, "learning_rate": 4.714265410800297e-05, "loss": 2.3711, "step": 1975000 }, { "epoch": 5.72, "learning_rate": 4.714193046035569e-05, "loss": 2.3996, "step": 1975500 }, { "epoch": 5.72, "learning_rate": 4.714120681270841e-05, "loss": 2.3747, "step": 1976000 }, { "epoch": 5.72, "learning_rate": 4.7140483165061135e-05, "loss": 2.4001, "step": 1976500 }, { "epoch": 5.72, "learning_rate": 4.7139759517413864e-05, "loss": 2.4039, "step": 1977000 }, { "epoch": 5.72, "learning_rate": 4.7139035869766586e-05, "loss": 2.4134, "step": 1977500 }, { "epoch": 5.73, "learning_rate": 4.713831222211931e-05, "loss": 2.408, "step": 1978000 }, { "epoch": 5.73, "learning_rate": 4.713758857447203e-05, "loss": 2.3788, "step": 1978500 }, { "epoch": 5.73, "learning_rate": 4.713686492682475e-05, "loss": 2.3784, "step": 1979000 }, { "epoch": 5.73, "learning_rate": 4.7136141279177475e-05, "loss": 2.4177, "step": 1979500 }, { "epoch": 5.73, "learning_rate": 4.71354176315302e-05, "loss": 2.368, "step": 1980000 }, { "epoch": 5.73, "learning_rate": 4.713469543117822e-05, "loss": 2.3758, "step": 1980500 }, { "epoch": 5.73, "learning_rate": 4.713397178353094e-05, "loss": 2.4088, "step": 1981000 }, { "epoch": 5.74, "learning_rate": 4.7133248135883664e-05, "loss": 2.3667, "step": 1981500 }, { "epoch": 5.74, "learning_rate": 4.7132524488236386e-05, "loss": 2.3874, "step": 1982000 }, { "epoch": 5.74, "learning_rate": 4.713180084058911e-05, "loss": 2.3792, "step": 1982500 }, { "epoch": 5.74, "learning_rate": 4.713107719294183e-05, "loss": 2.3587, "step": 1983000 }, { "epoch": 5.74, "learning_rate": 4.713035354529455e-05, "loss": 2.3993, "step": 1983500 }, { "epoch": 5.74, "learning_rate": 4.712963134494257e-05, "loss": 2.3943, "step": 1984000 }, { "epoch": 5.74, "learning_rate": 4.712890914459059e-05, "loss": 2.3887, "step": 1984500 }, { "epoch": 5.75, "learning_rate": 4.712818549694332e-05, "loss": 2.3722, "step": 1985000 }, { "epoch": 5.75, "learning_rate": 4.712746184929604e-05, "loss": 2.3769, "step": 1985500 }, { "epoch": 5.75, "learning_rate": 4.712674109623935e-05, "loss": 2.3837, "step": 1986000 }, { "epoch": 5.75, "learning_rate": 4.712601744859207e-05, "loss": 2.3808, "step": 1986500 }, { "epoch": 5.75, "learning_rate": 4.7125293800944795e-05, "loss": 2.3969, "step": 1987000 }, { "epoch": 5.75, "learning_rate": 4.712457015329752e-05, "loss": 2.3683, "step": 1987500 }, { "epoch": 5.75, "learning_rate": 4.712384650565024e-05, "loss": 2.3694, "step": 1988000 }, { "epoch": 5.76, "learning_rate": 4.712312285800297e-05, "loss": 2.3858, "step": 1988500 }, { "epoch": 5.76, "learning_rate": 4.712239921035569e-05, "loss": 2.376, "step": 1989000 }, { "epoch": 5.76, "learning_rate": 4.712167556270841e-05, "loss": 2.3844, "step": 1989500 }, { "epoch": 5.76, "learning_rate": 4.7120951915061136e-05, "loss": 2.3661, "step": 1990000 }, { "epoch": 5.76, "learning_rate": 4.712022826741386e-05, "loss": 2.4116, "step": 1990500 }, { "epoch": 5.76, "learning_rate": 4.711950461976658e-05, "loss": 2.3662, "step": 1991000 }, { "epoch": 5.76, "learning_rate": 4.71187809721193e-05, "loss": 2.3853, "step": 1991500 }, { "epoch": 5.77, "learning_rate": 4.711805732447203e-05, "loss": 2.3857, "step": 1992000 }, { "epoch": 5.77, "learning_rate": 4.7117333676824754e-05, "loss": 2.3943, "step": 1992500 }, { "epoch": 5.77, "learning_rate": 4.7116610029177476e-05, "loss": 2.3485, "step": 1993000 }, { "epoch": 5.77, "learning_rate": 4.71158863815302e-05, "loss": 2.394, "step": 1993500 }, { "epoch": 5.77, "learning_rate": 4.711516273388292e-05, "loss": 2.3716, "step": 1994000 }, { "epoch": 5.77, "learning_rate": 4.711444053353094e-05, "loss": 2.3826, "step": 1994500 }, { "epoch": 5.77, "learning_rate": 4.7113716885883665e-05, "loss": 2.4211, "step": 1995000 }, { "epoch": 5.78, "learning_rate": 4.711299468553168e-05, "loss": 2.3832, "step": 1995500 }, { "epoch": 5.78, "learning_rate": 4.71122710378844e-05, "loss": 2.3848, "step": 1996000 }, { "epoch": 5.78, "learning_rate": 4.711154883753242e-05, "loss": 2.3673, "step": 1996500 }, { "epoch": 5.78, "learning_rate": 4.711082518988515e-05, "loss": 2.4016, "step": 1997000 }, { "epoch": 5.78, "learning_rate": 4.711010154223787e-05, "loss": 2.3936, "step": 1997500 }, { "epoch": 5.78, "learning_rate": 4.710937789459059e-05, "loss": 2.3675, "step": 1998000 }, { "epoch": 5.78, "learning_rate": 4.7108654246943314e-05, "loss": 2.4028, "step": 1998500 }, { "epoch": 5.79, "learning_rate": 4.7107930599296036e-05, "loss": 2.3641, "step": 1999000 }, { "epoch": 5.79, "learning_rate": 4.7107206951648765e-05, "loss": 2.3918, "step": 1999500 }, { "epoch": 5.79, "learning_rate": 4.710648330400149e-05, "loss": 2.3992, "step": 2000000 }, { "epoch": 5.79, "learning_rate": 4.71057611036495e-05, "loss": 2.3716, "step": 2000500 }, { "epoch": 5.79, "learning_rate": 4.7105037456002225e-05, "loss": 2.3907, "step": 2001000 }, { "epoch": 5.79, "learning_rate": 4.710431525565025e-05, "loss": 2.3759, "step": 2001500 }, { "epoch": 5.79, "learning_rate": 4.710359160800297e-05, "loss": 2.4059, "step": 2002000 }, { "epoch": 5.8, "learning_rate": 4.710286796035569e-05, "loss": 2.3719, "step": 2002500 }, { "epoch": 5.8, "learning_rate": 4.7102144312708414e-05, "loss": 2.3871, "step": 2003000 }, { "epoch": 5.8, "learning_rate": 4.7101420665061136e-05, "loss": 2.3887, "step": 2003500 }, { "epoch": 5.8, "learning_rate": 4.710069701741386e-05, "loss": 2.399, "step": 2004000 }, { "epoch": 5.8, "learning_rate": 4.709997336976658e-05, "loss": 2.3803, "step": 2004500 }, { "epoch": 5.8, "learning_rate": 4.70992497221193e-05, "loss": 2.3503, "step": 2005000 }, { "epoch": 5.81, "learning_rate": 4.7098526074472025e-05, "loss": 2.4099, "step": 2005500 }, { "epoch": 5.81, "learning_rate": 4.709780242682475e-05, "loss": 2.3697, "step": 2006000 }, { "epoch": 5.81, "learning_rate": 4.7097078779177477e-05, "loss": 2.3846, "step": 2006500 }, { "epoch": 5.81, "learning_rate": 4.70963551315302e-05, "loss": 2.3776, "step": 2007000 }, { "epoch": 5.81, "learning_rate": 4.709563293117822e-05, "loss": 2.3733, "step": 2007500 }, { "epoch": 5.81, "learning_rate": 4.709490928353094e-05, "loss": 2.3539, "step": 2008000 }, { "epoch": 5.81, "learning_rate": 4.7094185635883666e-05, "loss": 2.3895, "step": 2008500 }, { "epoch": 5.82, "learning_rate": 4.709346343553168e-05, "loss": 2.3751, "step": 2009000 }, { "epoch": 5.82, "learning_rate": 4.70927397878844e-05, "loss": 2.3897, "step": 2009500 }, { "epoch": 5.82, "learning_rate": 4.7092016140237126e-05, "loss": 2.3627, "step": 2010000 }, { "epoch": 5.82, "learning_rate": 4.709129249258985e-05, "loss": 2.3638, "step": 2010500 }, { "epoch": 5.82, "learning_rate": 4.709057029223787e-05, "loss": 2.4012, "step": 2011000 }, { "epoch": 5.82, "learning_rate": 4.708984664459059e-05, "loss": 2.3915, "step": 2011500 }, { "epoch": 5.82, "learning_rate": 4.7089122996943315e-05, "loss": 2.3684, "step": 2012000 }, { "epoch": 5.83, "learning_rate": 4.708839934929604e-05, "loss": 2.3794, "step": 2012500 }, { "epoch": 5.83, "learning_rate": 4.708767570164876e-05, "loss": 2.3899, "step": 2013000 }, { "epoch": 5.83, "learning_rate": 4.708695205400148e-05, "loss": 2.362, "step": 2013500 }, { "epoch": 5.83, "learning_rate": 4.708622840635421e-05, "loss": 2.3655, "step": 2014000 }, { "epoch": 5.83, "learning_rate": 4.708550475870693e-05, "loss": 2.3679, "step": 2014500 }, { "epoch": 5.83, "learning_rate": 4.7084781111059655e-05, "loss": 2.384, "step": 2015000 }, { "epoch": 5.83, "learning_rate": 4.708405746341238e-05, "loss": 2.3809, "step": 2015500 }, { "epoch": 5.84, "learning_rate": 4.70833352630604e-05, "loss": 2.3962, "step": 2016000 }, { "epoch": 5.84, "learning_rate": 4.708261161541312e-05, "loss": 2.3696, "step": 2016500 }, { "epoch": 5.84, "learning_rate": 4.7081887967765844e-05, "loss": 2.3931, "step": 2017000 }, { "epoch": 5.84, "learning_rate": 4.708116576741386e-05, "loss": 2.3783, "step": 2017500 }, { "epoch": 5.84, "learning_rate": 4.708044211976658e-05, "loss": 2.3664, "step": 2018000 }, { "epoch": 5.84, "learning_rate": 4.7079718472119304e-05, "loss": 2.3734, "step": 2018500 }, { "epoch": 5.84, "learning_rate": 4.7078994824472026e-05, "loss": 2.3872, "step": 2019000 }, { "epoch": 5.85, "learning_rate": 4.707827117682475e-05, "loss": 2.3886, "step": 2019500 }, { "epoch": 5.85, "learning_rate": 4.707754752917747e-05, "loss": 2.3625, "step": 2020000 }, { "epoch": 5.85, "learning_rate": 4.707682532882549e-05, "loss": 2.3816, "step": 2020500 }, { "epoch": 5.85, "learning_rate": 4.7076101681178215e-05, "loss": 2.3705, "step": 2021000 }, { "epoch": 5.85, "learning_rate": 4.707537948082623e-05, "loss": 2.3774, "step": 2021500 }, { "epoch": 5.85, "learning_rate": 4.707465583317896e-05, "loss": 2.3927, "step": 2022000 }, { "epoch": 5.85, "learning_rate": 4.707393218553168e-05, "loss": 2.3833, "step": 2022500 }, { "epoch": 5.86, "learning_rate": 4.7073208537884404e-05, "loss": 2.3826, "step": 2023000 }, { "epoch": 5.86, "learning_rate": 4.7072484890237126e-05, "loss": 2.3857, "step": 2023500 }, { "epoch": 5.86, "learning_rate": 4.707176124258985e-05, "loss": 2.3733, "step": 2024000 }, { "epoch": 5.86, "learning_rate": 4.707103759494257e-05, "loss": 2.3873, "step": 2024500 }, { "epoch": 5.86, "learning_rate": 4.70703139472953e-05, "loss": 2.3684, "step": 2025000 }, { "epoch": 5.86, "learning_rate": 4.706959029964802e-05, "loss": 2.383, "step": 2025500 }, { "epoch": 5.86, "learning_rate": 4.706886809929604e-05, "loss": 2.3871, "step": 2026000 }, { "epoch": 5.87, "learning_rate": 4.706814445164876e-05, "loss": 2.3815, "step": 2026500 }, { "epoch": 5.87, "learning_rate": 4.7067422251296775e-05, "loss": 2.4149, "step": 2027000 }, { "epoch": 5.87, "learning_rate": 4.70666986036495e-05, "loss": 2.3954, "step": 2027500 }, { "epoch": 5.87, "learning_rate": 4.706597495600223e-05, "loss": 2.3776, "step": 2028000 }, { "epoch": 5.87, "learning_rate": 4.706525130835495e-05, "loss": 2.3585, "step": 2028500 }, { "epoch": 5.87, "learning_rate": 4.706452766070767e-05, "loss": 2.3811, "step": 2029000 }, { "epoch": 5.87, "learning_rate": 4.70638040130604e-05, "loss": 2.3892, "step": 2029500 }, { "epoch": 5.88, "learning_rate": 4.706308036541312e-05, "loss": 2.396, "step": 2030000 }, { "epoch": 5.88, "learning_rate": 4.7062356717765845e-05, "loss": 2.3786, "step": 2030500 }, { "epoch": 5.88, "learning_rate": 4.706163307011857e-05, "loss": 2.3701, "step": 2031000 }, { "epoch": 5.88, "learning_rate": 4.706090942247129e-05, "loss": 2.371, "step": 2031500 }, { "epoch": 5.88, "learning_rate": 4.7060187222119305e-05, "loss": 2.3551, "step": 2032000 }, { "epoch": 5.88, "learning_rate": 4.705946357447203e-05, "loss": 2.3897, "step": 2032500 }, { "epoch": 5.88, "learning_rate": 4.705873992682475e-05, "loss": 2.405, "step": 2033000 }, { "epoch": 5.89, "learning_rate": 4.705801627917748e-05, "loss": 2.3634, "step": 2033500 }, { "epoch": 5.89, "learning_rate": 4.70572926315302e-05, "loss": 2.4021, "step": 2034000 }, { "epoch": 5.89, "learning_rate": 4.7056570431178216e-05, "loss": 2.3885, "step": 2034500 }, { "epoch": 5.89, "learning_rate": 4.705584678353094e-05, "loss": 2.3855, "step": 2035000 }, { "epoch": 5.89, "learning_rate": 4.705512313588366e-05, "loss": 2.3706, "step": 2035500 }, { "epoch": 5.89, "learning_rate": 4.705439948823638e-05, "loss": 2.4031, "step": 2036000 }, { "epoch": 5.89, "learning_rate": 4.705367584058911e-05, "loss": 2.3733, "step": 2036500 }, { "epoch": 5.9, "learning_rate": 4.7052952192941834e-05, "loss": 2.3849, "step": 2037000 }, { "epoch": 5.9, "learning_rate": 4.7052228545294556e-05, "loss": 2.3905, "step": 2037500 }, { "epoch": 5.9, "learning_rate": 4.705150489764728e-05, "loss": 2.3929, "step": 2038000 }, { "epoch": 5.9, "learning_rate": 4.705078125e-05, "loss": 2.3736, "step": 2038500 }, { "epoch": 5.9, "learning_rate": 4.705005904964802e-05, "loss": 2.3812, "step": 2039000 }, { "epoch": 5.9, "learning_rate": 4.7049335402000745e-05, "loss": 2.3967, "step": 2039500 }, { "epoch": 5.9, "learning_rate": 4.704861320164876e-05, "loss": 2.3684, "step": 2040000 }, { "epoch": 5.91, "learning_rate": 4.7047891001296776e-05, "loss": 2.3816, "step": 2040500 }, { "epoch": 5.91, "learning_rate": 4.70471673536495e-05, "loss": 2.3795, "step": 2041000 }, { "epoch": 5.91, "learning_rate": 4.704644370600223e-05, "loss": 2.3959, "step": 2041500 }, { "epoch": 5.91, "learning_rate": 4.704572005835495e-05, "loss": 2.3928, "step": 2042000 }, { "epoch": 5.91, "learning_rate": 4.704499930529826e-05, "loss": 2.3924, "step": 2042500 }, { "epoch": 5.91, "learning_rate": 4.704427565765098e-05, "loss": 2.3793, "step": 2043000 }, { "epoch": 5.92, "learning_rate": 4.70435520100037e-05, "loss": 2.3637, "step": 2043500 }, { "epoch": 5.92, "learning_rate": 4.7042828362356425e-05, "loss": 2.3946, "step": 2044000 }, { "epoch": 5.92, "learning_rate": 4.7042104714709154e-05, "loss": 2.3805, "step": 2044500 }, { "epoch": 5.92, "learning_rate": 4.7041381067061877e-05, "loss": 2.4095, "step": 2045000 }, { "epoch": 5.92, "learning_rate": 4.70406574194146e-05, "loss": 2.3866, "step": 2045500 }, { "epoch": 5.92, "learning_rate": 4.703993377176733e-05, "loss": 2.3965, "step": 2046000 }, { "epoch": 5.92, "learning_rate": 4.703921012412005e-05, "loss": 2.3837, "step": 2046500 }, { "epoch": 5.93, "learning_rate": 4.703848647647277e-05, "loss": 2.3962, "step": 2047000 }, { "epoch": 5.93, "learning_rate": 4.7037762828825495e-05, "loss": 2.376, "step": 2047500 }, { "epoch": 5.93, "learning_rate": 4.703703918117822e-05, "loss": 2.3644, "step": 2048000 }, { "epoch": 5.93, "learning_rate": 4.703631553353094e-05, "loss": 2.3736, "step": 2048500 }, { "epoch": 5.93, "learning_rate": 4.703559188588366e-05, "loss": 2.3803, "step": 2049000 }, { "epoch": 5.93, "learning_rate": 4.7034868238236383e-05, "loss": 2.3758, "step": 2049500 }, { "epoch": 5.93, "learning_rate": 4.7034144590589106e-05, "loss": 2.3715, "step": 2050000 }, { "epoch": 5.94, "learning_rate": 4.703342239023713e-05, "loss": 2.378, "step": 2050500 }, { "epoch": 5.94, "learning_rate": 4.703269874258985e-05, "loss": 2.3933, "step": 2051000 }, { "epoch": 5.94, "learning_rate": 4.7031976542237866e-05, "loss": 2.3834, "step": 2051500 }, { "epoch": 5.94, "learning_rate": 4.7031252894590595e-05, "loss": 2.412, "step": 2052000 }, { "epoch": 5.94, "learning_rate": 4.703052924694332e-05, "loss": 2.3855, "step": 2052500 }, { "epoch": 5.94, "learning_rate": 4.702980559929604e-05, "loss": 2.3717, "step": 2053000 }, { "epoch": 5.94, "learning_rate": 4.702908195164876e-05, "loss": 2.3955, "step": 2053500 }, { "epoch": 5.95, "learning_rate": 4.7028358304001484e-05, "loss": 2.3487, "step": 2054000 }, { "epoch": 5.95, "learning_rate": 4.7027634656354206e-05, "loss": 2.3901, "step": 2054500 }, { "epoch": 5.95, "learning_rate": 4.702691100870693e-05, "loss": 2.3734, "step": 2055000 }, { "epoch": 5.95, "learning_rate": 4.702618880835495e-05, "loss": 2.3852, "step": 2055500 }, { "epoch": 5.95, "learning_rate": 4.702546516070767e-05, "loss": 2.3837, "step": 2056000 }, { "epoch": 5.95, "learning_rate": 4.7024741513060395e-05, "loss": 2.3786, "step": 2056500 }, { "epoch": 5.95, "learning_rate": 4.702401786541312e-05, "loss": 2.3686, "step": 2057000 }, { "epoch": 5.96, "learning_rate": 4.702329566506113e-05, "loss": 2.3666, "step": 2057500 }, { "epoch": 5.96, "learning_rate": 4.7022572017413855e-05, "loss": 2.4034, "step": 2058000 }, { "epoch": 5.96, "learning_rate": 4.702184836976658e-05, "loss": 2.3837, "step": 2058500 }, { "epoch": 5.96, "learning_rate": 4.7021124722119306e-05, "loss": 2.3775, "step": 2059000 }, { "epoch": 5.96, "learning_rate": 4.702040107447203e-05, "loss": 2.3759, "step": 2059500 }, { "epoch": 5.96, "learning_rate": 4.701967742682476e-05, "loss": 2.3655, "step": 2060000 }, { "epoch": 5.96, "learning_rate": 4.701895377917748e-05, "loss": 2.3825, "step": 2060500 }, { "epoch": 5.97, "learning_rate": 4.70182301315302e-05, "loss": 2.3888, "step": 2061000 }, { "epoch": 5.97, "learning_rate": 4.7017506483882924e-05, "loss": 2.3719, "step": 2061500 }, { "epoch": 5.97, "learning_rate": 4.7016782836235646e-05, "loss": 2.3644, "step": 2062000 }, { "epoch": 5.97, "learning_rate": 4.701605918858837e-05, "loss": 2.4086, "step": 2062500 }, { "epoch": 5.97, "learning_rate": 4.7015336988236384e-05, "loss": 2.3545, "step": 2063000 }, { "epoch": 5.97, "learning_rate": 4.7014613340589106e-05, "loss": 2.3857, "step": 2063500 }, { "epoch": 5.97, "learning_rate": 4.701388969294183e-05, "loss": 2.3775, "step": 2064000 }, { "epoch": 5.98, "learning_rate": 4.701316604529456e-05, "loss": 2.4024, "step": 2064500 }, { "epoch": 5.98, "learning_rate": 4.701244239764728e-05, "loss": 2.3788, "step": 2065000 }, { "epoch": 5.98, "learning_rate": 4.701171875e-05, "loss": 2.3851, "step": 2065500 }, { "epoch": 5.98, "learning_rate": 4.7010995102352724e-05, "loss": 2.3836, "step": 2066000 }, { "epoch": 5.98, "learning_rate": 4.7010271454705453e-05, "loss": 2.3854, "step": 2066500 }, { "epoch": 5.98, "learning_rate": 4.700954925435347e-05, "loss": 2.3641, "step": 2067000 }, { "epoch": 5.98, "learning_rate": 4.700882560670619e-05, "loss": 2.3776, "step": 2067500 }, { "epoch": 5.99, "learning_rate": 4.7008101959058913e-05, "loss": 2.3852, "step": 2068000 }, { "epoch": 5.99, "learning_rate": 4.7007378311411636e-05, "loss": 2.3769, "step": 2068500 }, { "epoch": 5.99, "learning_rate": 4.700665466376436e-05, "loss": 2.3651, "step": 2069000 }, { "epoch": 5.99, "learning_rate": 4.700593101611708e-05, "loss": 2.3713, "step": 2069500 }, { "epoch": 5.99, "learning_rate": 4.700520736846981e-05, "loss": 2.3794, "step": 2070000 }, { "epoch": 5.99, "learning_rate": 4.700448372082253e-05, "loss": 2.3631, "step": 2070500 }, { "epoch": 5.99, "learning_rate": 4.700376152047055e-05, "loss": 2.3789, "step": 2071000 }, { "epoch": 6.0, "learning_rate": 4.700303787282327e-05, "loss": 2.3672, "step": 2071500 }, { "epoch": 6.0, "learning_rate": 4.700231422517599e-05, "loss": 2.3673, "step": 2072000 }, { "epoch": 6.0, "learning_rate": 4.7001590577528714e-05, "loss": 2.3625, "step": 2072500 }, { "epoch": 6.0, "eval_accuracy": 0.6423148797851449, "eval_accuracy_mlm": 0.6042832537684684, "eval_accuracy_nsp": 0.8462324639195574, "eval_loss": 2.3543269634246826, "eval_runtime": 330.7221, "eval_samples_per_second": 1319.494, "eval_steps_per_second": 54.98, "step": 2072832 }, { "epoch": 6.0, "learning_rate": 4.700086837717673e-05, "loss": 2.3918, "step": 2073000 }, { "epoch": 6.0, "learning_rate": 4.700014472952946e-05, "loss": 2.3507, "step": 2073500 }, { "epoch": 6.0, "learning_rate": 4.699942108188218e-05, "loss": 2.372, "step": 2074000 }, { "epoch": 6.0, "learning_rate": 4.699869743423491e-05, "loss": 2.3506, "step": 2074500 }, { "epoch": 6.01, "learning_rate": 4.699797378658763e-05, "loss": 2.3532, "step": 2075000 }, { "epoch": 6.01, "learning_rate": 4.6997250138940354e-05, "loss": 2.3656, "step": 2075500 }, { "epoch": 6.01, "learning_rate": 4.699652793858837e-05, "loss": 2.3596, "step": 2076000 }, { "epoch": 6.01, "learning_rate": 4.699580429094109e-05, "loss": 2.374, "step": 2076500 }, { "epoch": 6.01, "learning_rate": 4.6995080643293814e-05, "loss": 2.341, "step": 2077000 }, { "epoch": 6.01, "learning_rate": 4.6994356995646536e-05, "loss": 2.3713, "step": 2077500 }, { "epoch": 6.01, "learning_rate": 4.699363334799926e-05, "loss": 2.3596, "step": 2078000 }, { "epoch": 6.02, "learning_rate": 4.699290970035198e-05, "loss": 2.3553, "step": 2078500 }, { "epoch": 6.02, "learning_rate": 4.699218605270471e-05, "loss": 2.3466, "step": 2079000 }, { "epoch": 6.02, "learning_rate": 4.699146240505743e-05, "loss": 2.3673, "step": 2079500 }, { "epoch": 6.02, "learning_rate": 4.6990738757410154e-05, "loss": 2.3483, "step": 2080000 }, { "epoch": 6.02, "learning_rate": 4.699001800435346e-05, "loss": 2.3788, "step": 2080500 }, { "epoch": 6.02, "learning_rate": 4.6989294356706185e-05, "loss": 2.3499, "step": 2081000 }, { "epoch": 6.03, "learning_rate": 4.6988570709058914e-05, "loss": 2.3337, "step": 2081500 }, { "epoch": 6.03, "learning_rate": 4.6987847061411636e-05, "loss": 2.341, "step": 2082000 }, { "epoch": 6.03, "learning_rate": 4.698712341376436e-05, "loss": 2.37, "step": 2082500 }, { "epoch": 6.03, "learning_rate": 4.698639976611708e-05, "loss": 2.3718, "step": 2083000 }, { "epoch": 6.03, "learning_rate": 4.69856775657651e-05, "loss": 2.3788, "step": 2083500 }, { "epoch": 6.03, "learning_rate": 4.6984953918117826e-05, "loss": 2.3597, "step": 2084000 }, { "epoch": 6.03, "learning_rate": 4.698423027047055e-05, "loss": 2.3582, "step": 2084500 }, { "epoch": 6.04, "learning_rate": 4.698350662282327e-05, "loss": 2.3628, "step": 2085000 }, { "epoch": 6.04, "learning_rate": 4.698278297517599e-05, "loss": 2.3534, "step": 2085500 }, { "epoch": 6.04, "learning_rate": 4.698206222211931e-05, "loss": 2.3635, "step": 2086000 }, { "epoch": 6.04, "learning_rate": 4.698133857447203e-05, "loss": 2.3822, "step": 2086500 }, { "epoch": 6.04, "learning_rate": 4.698061492682475e-05, "loss": 2.3685, "step": 2087000 }, { "epoch": 6.04, "learning_rate": 4.6979891279177475e-05, "loss": 2.3415, "step": 2087500 }, { "epoch": 6.04, "learning_rate": 4.69791676315302e-05, "loss": 2.3515, "step": 2088000 }, { "epoch": 6.05, "learning_rate": 4.697844543117821e-05, "loss": 2.3672, "step": 2088500 }, { "epoch": 6.05, "learning_rate": 4.6977721783530935e-05, "loss": 2.3742, "step": 2089000 }, { "epoch": 6.05, "learning_rate": 4.6976998135883664e-05, "loss": 2.3683, "step": 2089500 }, { "epoch": 6.05, "learning_rate": 4.6976274488236386e-05, "loss": 2.3628, "step": 2090000 }, { "epoch": 6.05, "learning_rate": 4.697555084058911e-05, "loss": 2.346, "step": 2090500 }, { "epoch": 6.05, "learning_rate": 4.697482719294184e-05, "loss": 2.3676, "step": 2091000 }, { "epoch": 6.05, "learning_rate": 4.697410354529456e-05, "loss": 2.3842, "step": 2091500 }, { "epoch": 6.06, "learning_rate": 4.697338279223787e-05, "loss": 2.3769, "step": 2092000 }, { "epoch": 6.06, "learning_rate": 4.697265914459059e-05, "loss": 2.3262, "step": 2092500 }, { "epoch": 6.06, "learning_rate": 4.697193549694331e-05, "loss": 2.372, "step": 2093000 }, { "epoch": 6.06, "learning_rate": 4.6971211849296035e-05, "loss": 2.3758, "step": 2093500 }, { "epoch": 6.06, "learning_rate": 4.697048820164876e-05, "loss": 2.3922, "step": 2094000 }, { "epoch": 6.06, "learning_rate": 4.6969764554001486e-05, "loss": 2.3735, "step": 2094500 }, { "epoch": 6.06, "learning_rate": 4.696904090635421e-05, "loss": 2.3765, "step": 2095000 }, { "epoch": 6.07, "learning_rate": 4.696831725870693e-05, "loss": 2.354, "step": 2095500 }, { "epoch": 6.07, "learning_rate": 4.696759361105965e-05, "loss": 2.3798, "step": 2096000 }, { "epoch": 6.07, "learning_rate": 4.696686996341238e-05, "loss": 2.3618, "step": 2096500 }, { "epoch": 6.07, "learning_rate": 4.6966146315765104e-05, "loss": 2.3829, "step": 2097000 }, { "epoch": 6.07, "learning_rate": 4.6965422668117826e-05, "loss": 2.3469, "step": 2097500 }, { "epoch": 6.07, "learning_rate": 4.696469902047055e-05, "loss": 2.3785, "step": 2098000 }, { "epoch": 6.07, "learning_rate": 4.6963976820118564e-05, "loss": 2.3434, "step": 2098500 }, { "epoch": 6.08, "learning_rate": 4.6963253172471286e-05, "loss": 2.3405, "step": 2099000 }, { "epoch": 6.08, "learning_rate": 4.696252952482401e-05, "loss": 2.3497, "step": 2099500 }, { "epoch": 6.08, "learning_rate": 4.696180587717674e-05, "loss": 2.3807, "step": 2100000 }, { "epoch": 6.08, "learning_rate": 4.696108222952946e-05, "loss": 2.3713, "step": 2100500 }, { "epoch": 6.08, "learning_rate": 4.696035858188218e-05, "loss": 2.375, "step": 2101000 }, { "epoch": 6.08, "learning_rate": 4.6959634934234904e-05, "loss": 2.3421, "step": 2101500 }, { "epoch": 6.08, "learning_rate": 4.6958911286587627e-05, "loss": 2.3695, "step": 2102000 }, { "epoch": 6.09, "learning_rate": 4.695818763894035e-05, "loss": 2.3596, "step": 2102500 }, { "epoch": 6.09, "learning_rate": 4.6957465438588364e-05, "loss": 2.3408, "step": 2103000 }, { "epoch": 6.09, "learning_rate": 4.6956741790941087e-05, "loss": 2.3629, "step": 2103500 }, { "epoch": 6.09, "learning_rate": 4.6956018143293816e-05, "loss": 2.376, "step": 2104000 }, { "epoch": 6.09, "learning_rate": 4.695529449564654e-05, "loss": 2.3499, "step": 2104500 }, { "epoch": 6.09, "learning_rate": 4.695457084799926e-05, "loss": 2.3687, "step": 2105000 }, { "epoch": 6.09, "learning_rate": 4.695384864764728e-05, "loss": 2.368, "step": 2105500 }, { "epoch": 6.1, "learning_rate": 4.6953125000000005e-05, "loss": 2.3579, "step": 2106000 }, { "epoch": 6.1, "learning_rate": 4.695240135235273e-05, "loss": 2.3651, "step": 2106500 }, { "epoch": 6.1, "learning_rate": 4.695167770470545e-05, "loss": 2.3676, "step": 2107000 }, { "epoch": 6.1, "learning_rate": 4.6950955504353465e-05, "loss": 2.3701, "step": 2107500 }, { "epoch": 6.1, "learning_rate": 4.695023185670619e-05, "loss": 2.3695, "step": 2108000 }, { "epoch": 6.1, "learning_rate": 4.694950820905891e-05, "loss": 2.3582, "step": 2108500 }, { "epoch": 6.1, "learning_rate": 4.694878456141164e-05, "loss": 2.3709, "step": 2109000 }, { "epoch": 6.11, "learning_rate": 4.694806091376436e-05, "loss": 2.3746, "step": 2109500 }, { "epoch": 6.11, "learning_rate": 4.694733726611708e-05, "loss": 2.3498, "step": 2110000 }, { "epoch": 6.11, "learning_rate": 4.6946613618469805e-05, "loss": 2.3483, "step": 2110500 }, { "epoch": 6.11, "learning_rate": 4.694588997082253e-05, "loss": 2.3708, "step": 2111000 }, { "epoch": 6.11, "learning_rate": 4.694516777047055e-05, "loss": 2.3852, "step": 2111500 }, { "epoch": 6.11, "learning_rate": 4.694444412282327e-05, "loss": 2.3415, "step": 2112000 }, { "epoch": 6.11, "learning_rate": 4.6943720475175994e-05, "loss": 2.356, "step": 2112500 }, { "epoch": 6.12, "learning_rate": 4.694299827482401e-05, "loss": 2.3529, "step": 2113000 }, { "epoch": 6.12, "learning_rate": 4.694227462717674e-05, "loss": 2.3696, "step": 2113500 }, { "epoch": 6.12, "learning_rate": 4.6941552426824754e-05, "loss": 2.3569, "step": 2114000 }, { "epoch": 6.12, "learning_rate": 4.6940828779177476e-05, "loss": 2.364, "step": 2114500 }, { "epoch": 6.12, "learning_rate": 4.69401051315302e-05, "loss": 2.3801, "step": 2115000 }, { "epoch": 6.12, "learning_rate": 4.693938148388292e-05, "loss": 2.3793, "step": 2115500 }, { "epoch": 6.12, "learning_rate": 4.693865783623564e-05, "loss": 2.3607, "step": 2116000 }, { "epoch": 6.13, "learning_rate": 4.6937934188588365e-05, "loss": 2.3535, "step": 2116500 }, { "epoch": 6.13, "learning_rate": 4.693721054094109e-05, "loss": 2.3726, "step": 2117000 }, { "epoch": 6.13, "learning_rate": 4.693648689329381e-05, "loss": 2.378, "step": 2117500 }, { "epoch": 6.13, "learning_rate": 4.693576469294183e-05, "loss": 2.3514, "step": 2118000 }, { "epoch": 6.13, "learning_rate": 4.693504249258985e-05, "loss": 2.3659, "step": 2118500 }, { "epoch": 6.13, "learning_rate": 4.693431884494257e-05, "loss": 2.3537, "step": 2119000 }, { "epoch": 6.14, "learning_rate": 4.69335951972953e-05, "loss": 2.3537, "step": 2119500 }, { "epoch": 6.14, "learning_rate": 4.693287154964802e-05, "loss": 2.3598, "step": 2120000 }, { "epoch": 6.14, "learning_rate": 4.693214790200074e-05, "loss": 2.3877, "step": 2120500 }, { "epoch": 6.14, "learning_rate": 4.6931424254353465e-05, "loss": 2.3866, "step": 2121000 }, { "epoch": 6.14, "learning_rate": 4.693070060670619e-05, "loss": 2.3477, "step": 2121500 }, { "epoch": 6.14, "learning_rate": 4.692997695905892e-05, "loss": 2.3544, "step": 2122000 }, { "epoch": 6.14, "learning_rate": 4.692925331141164e-05, "loss": 2.3507, "step": 2122500 }, { "epoch": 6.15, "learning_rate": 4.692852966376436e-05, "loss": 2.3723, "step": 2123000 }, { "epoch": 6.15, "learning_rate": 4.692780746341238e-05, "loss": 2.3637, "step": 2123500 }, { "epoch": 6.15, "learning_rate": 4.69270838157651e-05, "loss": 2.356, "step": 2124000 }, { "epoch": 6.15, "learning_rate": 4.692636016811782e-05, "loss": 2.3704, "step": 2124500 }, { "epoch": 6.15, "learning_rate": 4.692563652047054e-05, "loss": 2.3638, "step": 2125000 }, { "epoch": 6.15, "learning_rate": 4.6924912872823266e-05, "loss": 2.3622, "step": 2125500 }, { "epoch": 6.15, "learning_rate": 4.692418922517599e-05, "loss": 2.3803, "step": 2126000 }, { "epoch": 6.16, "learning_rate": 4.692346557752872e-05, "loss": 2.3765, "step": 2126500 }, { "epoch": 6.16, "learning_rate": 4.692274192988144e-05, "loss": 2.3619, "step": 2127000 }, { "epoch": 6.16, "learning_rate": 4.692201828223417e-05, "loss": 2.3729, "step": 2127500 }, { "epoch": 6.16, "learning_rate": 4.6921296081882184e-05, "loss": 2.3576, "step": 2128000 }, { "epoch": 6.16, "learning_rate": 4.6920572434234906e-05, "loss": 2.3688, "step": 2128500 }, { "epoch": 6.16, "learning_rate": 4.691984878658763e-05, "loss": 2.3887, "step": 2129000 }, { "epoch": 6.16, "learning_rate": 4.691912513894035e-05, "loss": 2.3907, "step": 2129500 }, { "epoch": 6.17, "learning_rate": 4.6918402938588366e-05, "loss": 2.3966, "step": 2130000 }, { "epoch": 6.17, "learning_rate": 4.691767929094109e-05, "loss": 2.3553, "step": 2130500 }, { "epoch": 6.17, "learning_rate": 4.691695564329382e-05, "loss": 2.3488, "step": 2131000 }, { "epoch": 6.17, "learning_rate": 4.691623199564654e-05, "loss": 2.3577, "step": 2131500 }, { "epoch": 6.17, "learning_rate": 4.691550834799926e-05, "loss": 2.3384, "step": 2132000 }, { "epoch": 6.17, "learning_rate": 4.691478759494257e-05, "loss": 2.3765, "step": 2132500 }, { "epoch": 6.17, "learning_rate": 4.691406394729529e-05, "loss": 2.3917, "step": 2133000 }, { "epoch": 6.18, "learning_rate": 4.6913340299648015e-05, "loss": 2.3614, "step": 2133500 }, { "epoch": 6.18, "learning_rate": 4.6912616652000744e-05, "loss": 2.357, "step": 2134000 }, { "epoch": 6.18, "learning_rate": 4.6911893004353466e-05, "loss": 2.3662, "step": 2134500 }, { "epoch": 6.18, "learning_rate": 4.691117080400149e-05, "loss": 2.378, "step": 2135000 }, { "epoch": 6.18, "learning_rate": 4.691044715635421e-05, "loss": 2.3553, "step": 2135500 }, { "epoch": 6.18, "learning_rate": 4.690972350870693e-05, "loss": 2.3592, "step": 2136000 }, { "epoch": 6.18, "learning_rate": 4.6908999861059655e-05, "loss": 2.3456, "step": 2136500 }, { "epoch": 6.19, "learning_rate": 4.690827766070767e-05, "loss": 2.3497, "step": 2137000 }, { "epoch": 6.19, "learning_rate": 4.690755401306039e-05, "loss": 2.3843, "step": 2137500 }, { "epoch": 6.19, "learning_rate": 4.6906830365413115e-05, "loss": 2.3822, "step": 2138000 }, { "epoch": 6.19, "learning_rate": 4.6906106717765844e-05, "loss": 2.3547, "step": 2138500 }, { "epoch": 6.19, "learning_rate": 4.6905383070118566e-05, "loss": 2.375, "step": 2139000 }, { "epoch": 6.19, "learning_rate": 4.690465942247129e-05, "loss": 2.3486, "step": 2139500 }, { "epoch": 6.19, "learning_rate": 4.690393577482401e-05, "loss": 2.3556, "step": 2140000 }, { "epoch": 6.2, "learning_rate": 4.690321212717673e-05, "loss": 2.3776, "step": 2140500 }, { "epoch": 6.2, "learning_rate": 4.690248992682475e-05, "loss": 2.3417, "step": 2141000 }, { "epoch": 6.2, "learning_rate": 4.690176627917748e-05, "loss": 2.3609, "step": 2141500 }, { "epoch": 6.2, "learning_rate": 4.69010426315302e-05, "loss": 2.3566, "step": 2142000 }, { "epoch": 6.2, "learning_rate": 4.690031898388292e-05, "loss": 2.3523, "step": 2142500 }, { "epoch": 6.2, "learning_rate": 4.6899595336235644e-05, "loss": 2.3796, "step": 2143000 }, { "epoch": 6.2, "learning_rate": 4.689887168858837e-05, "loss": 2.3689, "step": 2143500 }, { "epoch": 6.21, "learning_rate": 4.689814804094109e-05, "loss": 2.385, "step": 2144000 }, { "epoch": 6.21, "learning_rate": 4.689742439329382e-05, "loss": 2.3643, "step": 2144500 }, { "epoch": 6.21, "learning_rate": 4.689670074564654e-05, "loss": 2.3744, "step": 2145000 }, { "epoch": 6.21, "learning_rate": 4.689597709799926e-05, "loss": 2.3603, "step": 2145500 }, { "epoch": 6.21, "learning_rate": 4.689525489764728e-05, "loss": 2.3515, "step": 2146000 }, { "epoch": 6.21, "learning_rate": 4.689453125e-05, "loss": 2.3585, "step": 2146500 }, { "epoch": 6.21, "learning_rate": 4.689380760235272e-05, "loss": 2.3688, "step": 2147000 }, { "epoch": 6.22, "learning_rate": 4.6893083954705445e-05, "loss": 2.3699, "step": 2147500 }, { "epoch": 6.22, "learning_rate": 4.689236030705817e-05, "loss": 2.37, "step": 2148000 }, { "epoch": 6.22, "learning_rate": 4.689163810670619e-05, "loss": 2.3677, "step": 2148500 }, { "epoch": 6.22, "learning_rate": 4.689091590635421e-05, "loss": 2.3488, "step": 2149000 }, { "epoch": 6.22, "learning_rate": 4.689019370600223e-05, "loss": 2.351, "step": 2149500 }, { "epoch": 6.22, "learning_rate": 4.688947005835495e-05, "loss": 2.3574, "step": 2150000 }, { "epoch": 6.22, "learning_rate": 4.688874641070767e-05, "loss": 2.4013, "step": 2150500 }, { "epoch": 6.23, "learning_rate": 4.6888022763060394e-05, "loss": 2.3765, "step": 2151000 }, { "epoch": 6.23, "learning_rate": 4.6887299115413116e-05, "loss": 2.3605, "step": 2151500 }, { "epoch": 6.23, "learning_rate": 4.6886575467765845e-05, "loss": 2.3808, "step": 2152000 }, { "epoch": 6.23, "learning_rate": 4.688585182011857e-05, "loss": 2.3506, "step": 2152500 }, { "epoch": 6.23, "learning_rate": 4.688512961976658e-05, "loss": 2.3876, "step": 2153000 }, { "epoch": 6.23, "learning_rate": 4.6884405972119305e-05, "loss": 2.3571, "step": 2153500 }, { "epoch": 6.23, "learning_rate": 4.688368232447203e-05, "loss": 2.3668, "step": 2154000 }, { "epoch": 6.24, "learning_rate": 4.688295867682475e-05, "loss": 2.3712, "step": 2154500 }, { "epoch": 6.24, "learning_rate": 4.688223502917747e-05, "loss": 2.348, "step": 2155000 }, { "epoch": 6.24, "learning_rate": 4.6881511381530194e-05, "loss": 2.3677, "step": 2155500 }, { "epoch": 6.24, "learning_rate": 4.6880787733882916e-05, "loss": 2.3612, "step": 2156000 }, { "epoch": 6.24, "learning_rate": 4.688006553353094e-05, "loss": 2.3619, "step": 2156500 }, { "epoch": 6.24, "learning_rate": 4.687934188588367e-05, "loss": 2.381, "step": 2157000 }, { "epoch": 6.25, "learning_rate": 4.687861823823639e-05, "loss": 2.3667, "step": 2157500 }, { "epoch": 6.25, "learning_rate": 4.687789459058911e-05, "loss": 2.3327, "step": 2158000 }, { "epoch": 6.25, "learning_rate": 4.6877170942941834e-05, "loss": 2.3503, "step": 2158500 }, { "epoch": 6.25, "learning_rate": 4.6876447295294557e-05, "loss": 2.3861, "step": 2159000 }, { "epoch": 6.25, "learning_rate": 4.687572364764728e-05, "loss": 2.3485, "step": 2159500 }, { "epoch": 6.25, "learning_rate": 4.6875e-05, "loss": 2.3681, "step": 2160000 }, { "epoch": 6.25, "learning_rate": 4.687427635235272e-05, "loss": 2.3773, "step": 2160500 }, { "epoch": 6.26, "learning_rate": 4.6873552704705445e-05, "loss": 2.3604, "step": 2161000 }, { "epoch": 6.26, "learning_rate": 4.687283050435347e-05, "loss": 2.3872, "step": 2161500 }, { "epoch": 6.26, "learning_rate": 4.687210685670619e-05, "loss": 2.3777, "step": 2162000 }, { "epoch": 6.26, "learning_rate": 4.687138320905891e-05, "loss": 2.3461, "step": 2162500 }, { "epoch": 6.26, "learning_rate": 4.6870659561411634e-05, "loss": 2.3857, "step": 2163000 }, { "epoch": 6.26, "learning_rate": 4.686993591376436e-05, "loss": 2.3407, "step": 2163500 }, { "epoch": 6.26, "learning_rate": 4.6869212266117086e-05, "loss": 2.3797, "step": 2164000 }, { "epoch": 6.27, "learning_rate": 4.68684900657651e-05, "loss": 2.3387, "step": 2164500 }, { "epoch": 6.27, "learning_rate": 4.6867766418117824e-05, "loss": 2.3751, "step": 2165000 }, { "epoch": 6.27, "learning_rate": 4.6867042770470546e-05, "loss": 2.3693, "step": 2165500 }, { "epoch": 6.27, "learning_rate": 4.686631912282327e-05, "loss": 2.3725, "step": 2166000 }, { "epoch": 6.27, "learning_rate": 4.6865598369766584e-05, "loss": 2.3467, "step": 2166500 }, { "epoch": 6.27, "learning_rate": 4.6864874722119306e-05, "loss": 2.3693, "step": 2167000 }, { "epoch": 6.27, "learning_rate": 4.686415107447203e-05, "loss": 2.3826, "step": 2167500 }, { "epoch": 6.28, "learning_rate": 4.686342742682475e-05, "loss": 2.3718, "step": 2168000 }, { "epoch": 6.28, "learning_rate": 4.686270377917747e-05, "loss": 2.3606, "step": 2168500 }, { "epoch": 6.28, "learning_rate": 4.6861980131530195e-05, "loss": 2.3672, "step": 2169000 }, { "epoch": 6.28, "learning_rate": 4.6861256483882924e-05, "loss": 2.3834, "step": 2169500 }, { "epoch": 6.28, "learning_rate": 4.6860532836235646e-05, "loss": 2.3701, "step": 2170000 }, { "epoch": 6.28, "learning_rate": 4.685980918858837e-05, "loss": 2.3453, "step": 2170500 }, { "epoch": 6.28, "learning_rate": 4.6859086988236384e-05, "loss": 2.3805, "step": 2171000 }, { "epoch": 6.29, "learning_rate": 4.68583647878844e-05, "loss": 2.3817, "step": 2171500 }, { "epoch": 6.29, "learning_rate": 4.685764114023713e-05, "loss": 2.3477, "step": 2172000 }, { "epoch": 6.29, "learning_rate": 4.685691749258985e-05, "loss": 2.3609, "step": 2172500 }, { "epoch": 6.29, "learning_rate": 4.685619384494257e-05, "loss": 2.3642, "step": 2173000 }, { "epoch": 6.29, "learning_rate": 4.6855470197295295e-05, "loss": 2.3416, "step": 2173500 }, { "epoch": 6.29, "learning_rate": 4.6854746549648024e-05, "loss": 2.3709, "step": 2174000 }, { "epoch": 6.29, "learning_rate": 4.6854022902000746e-05, "loss": 2.3672, "step": 2174500 }, { "epoch": 6.3, "learning_rate": 4.685329925435347e-05, "loss": 2.3981, "step": 2175000 }, { "epoch": 6.3, "learning_rate": 4.6852577054001484e-05, "loss": 2.363, "step": 2175500 }, { "epoch": 6.3, "learning_rate": 4.6851853406354206e-05, "loss": 2.3728, "step": 2176000 }, { "epoch": 6.3, "learning_rate": 4.685112975870693e-05, "loss": 2.3595, "step": 2176500 }, { "epoch": 6.3, "learning_rate": 4.685040611105965e-05, "loss": 2.3594, "step": 2177000 }, { "epoch": 6.3, "learning_rate": 4.684968246341237e-05, "loss": 2.3706, "step": 2177500 }, { "epoch": 6.3, "learning_rate": 4.6848960263060395e-05, "loss": 2.3742, "step": 2178000 }, { "epoch": 6.31, "learning_rate": 4.684823661541312e-05, "loss": 2.3752, "step": 2178500 }, { "epoch": 6.31, "learning_rate": 4.684751296776585e-05, "loss": 2.3609, "step": 2179000 }, { "epoch": 6.31, "learning_rate": 4.684678932011857e-05, "loss": 2.3696, "step": 2179500 }, { "epoch": 6.31, "learning_rate": 4.684606567247129e-05, "loss": 2.3516, "step": 2180000 }, { "epoch": 6.31, "learning_rate": 4.684534202482401e-05, "loss": 2.3724, "step": 2180500 }, { "epoch": 6.31, "learning_rate": 4.684461982447203e-05, "loss": 2.368, "step": 2181000 }, { "epoch": 6.31, "learning_rate": 4.684389617682475e-05, "loss": 2.3759, "step": 2181500 }, { "epoch": 6.32, "learning_rate": 4.684317252917747e-05, "loss": 2.3627, "step": 2182000 }, { "epoch": 6.32, "learning_rate": 4.6842448881530196e-05, "loss": 2.3484, "step": 2182500 }, { "epoch": 6.32, "learning_rate": 4.6841725233882925e-05, "loss": 2.3777, "step": 2183000 }, { "epoch": 6.32, "learning_rate": 4.684100158623565e-05, "loss": 2.3716, "step": 2183500 }, { "epoch": 6.32, "learning_rate": 4.684027938588366e-05, "loss": 2.3728, "step": 2184000 }, { "epoch": 6.32, "learning_rate": 4.6839555738236385e-05, "loss": 2.3774, "step": 2184500 }, { "epoch": 6.32, "learning_rate": 4.683883209058911e-05, "loss": 2.3743, "step": 2185000 }, { "epoch": 6.33, "learning_rate": 4.683810844294183e-05, "loss": 2.3813, "step": 2185500 }, { "epoch": 6.33, "learning_rate": 4.6837386242589845e-05, "loss": 2.3933, "step": 2186000 }, { "epoch": 6.33, "learning_rate": 4.6836662594942574e-05, "loss": 2.3717, "step": 2186500 }, { "epoch": 6.33, "learning_rate": 4.6835938947295296e-05, "loss": 2.3362, "step": 2187000 }, { "epoch": 6.33, "learning_rate": 4.6835215299648025e-05, "loss": 2.3886, "step": 2187500 }, { "epoch": 6.33, "learning_rate": 4.683449165200075e-05, "loss": 2.3769, "step": 2188000 }, { "epoch": 6.33, "learning_rate": 4.683376800435347e-05, "loss": 2.363, "step": 2188500 }, { "epoch": 6.34, "learning_rate": 4.683304435670619e-05, "loss": 2.3578, "step": 2189000 }, { "epoch": 6.34, "learning_rate": 4.6832320709058914e-05, "loss": 2.3573, "step": 2189500 }, { "epoch": 6.34, "learning_rate": 4.6831597061411636e-05, "loss": 2.363, "step": 2190000 }, { "epoch": 6.34, "learning_rate": 4.683087341376436e-05, "loss": 2.3692, "step": 2190500 }, { "epoch": 6.34, "learning_rate": 4.683014976611708e-05, "loss": 2.3606, "step": 2191000 }, { "epoch": 6.34, "learning_rate": 4.68294261184698e-05, "loss": 2.3495, "step": 2191500 }, { "epoch": 6.34, "learning_rate": 4.6828703918117825e-05, "loss": 2.3556, "step": 2192000 }, { "epoch": 6.35, "learning_rate": 4.682798027047055e-05, "loss": 2.3772, "step": 2192500 }, { "epoch": 6.35, "learning_rate": 4.682725662282327e-05, "loss": 2.3979, "step": 2193000 }, { "epoch": 6.35, "learning_rate": 4.682653297517599e-05, "loss": 2.3626, "step": 2193500 }, { "epoch": 6.35, "learning_rate": 4.6825810774824014e-05, "loss": 2.3607, "step": 2194000 }, { "epoch": 6.35, "learning_rate": 4.6825087127176736e-05, "loss": 2.3665, "step": 2194500 }, { "epoch": 6.35, "learning_rate": 4.682436492682475e-05, "loss": 2.3769, "step": 2195000 }, { "epoch": 6.36, "learning_rate": 4.682364417376807e-05, "loss": 2.3621, "step": 2195500 }, { "epoch": 6.36, "learning_rate": 4.682292052612079e-05, "loss": 2.3919, "step": 2196000 }, { "epoch": 6.36, "learning_rate": 4.682219687847351e-05, "loss": 2.3559, "step": 2196500 }, { "epoch": 6.36, "learning_rate": 4.6821473230826234e-05, "loss": 2.3766, "step": 2197000 }, { "epoch": 6.36, "learning_rate": 4.6820749583178957e-05, "loss": 2.3619, "step": 2197500 }, { "epoch": 6.36, "learning_rate": 4.682002593553168e-05, "loss": 2.3717, "step": 2198000 }, { "epoch": 6.36, "learning_rate": 4.68193022878844e-05, "loss": 2.3744, "step": 2198500 }, { "epoch": 6.37, "learning_rate": 4.681857864023712e-05, "loss": 2.3556, "step": 2199000 }, { "epoch": 6.37, "learning_rate": 4.681785499258985e-05, "loss": 2.3724, "step": 2199500 }, { "epoch": 6.37, "learning_rate": 4.6817131344942574e-05, "loss": 2.3997, "step": 2200000 }, { "epoch": 6.37, "learning_rate": 4.68164076972953e-05, "loss": 2.3644, "step": 2200500 }, { "epoch": 6.37, "learning_rate": 4.681568404964802e-05, "loss": 2.3654, "step": 2201000 }, { "epoch": 6.37, "learning_rate": 4.681496040200075e-05, "loss": 2.3407, "step": 2201500 }, { "epoch": 6.37, "learning_rate": 4.681423675435347e-05, "loss": 2.3706, "step": 2202000 }, { "epoch": 6.38, "learning_rate": 4.6813514554001486e-05, "loss": 2.3513, "step": 2202500 }, { "epoch": 6.38, "learning_rate": 4.681279090635421e-05, "loss": 2.3764, "step": 2203000 }, { "epoch": 6.38, "learning_rate": 4.681206725870693e-05, "loss": 2.3667, "step": 2203500 }, { "epoch": 6.38, "learning_rate": 4.681134505835495e-05, "loss": 2.3659, "step": 2204000 }, { "epoch": 6.38, "learning_rate": 4.6810621410707675e-05, "loss": 2.3518, "step": 2204500 }, { "epoch": 6.38, "learning_rate": 4.68098977630604e-05, "loss": 2.361, "step": 2205000 }, { "epoch": 6.38, "learning_rate": 4.680917411541312e-05, "loss": 2.3567, "step": 2205500 }, { "epoch": 6.39, "learning_rate": 4.680845046776584e-05, "loss": 2.3524, "step": 2206000 }, { "epoch": 6.39, "learning_rate": 4.6807726820118564e-05, "loss": 2.3682, "step": 2206500 }, { "epoch": 6.39, "learning_rate": 4.6807003172471286e-05, "loss": 2.3725, "step": 2207000 }, { "epoch": 6.39, "learning_rate": 4.680627952482401e-05, "loss": 2.3803, "step": 2207500 }, { "epoch": 6.39, "learning_rate": 4.680555587717673e-05, "loss": 2.3696, "step": 2208000 }, { "epoch": 6.39, "learning_rate": 4.680483222952945e-05, "loss": 2.3454, "step": 2208500 }, { "epoch": 6.39, "learning_rate": 4.6804110029177475e-05, "loss": 2.3648, "step": 2209000 }, { "epoch": 6.4, "learning_rate": 4.6803386381530204e-05, "loss": 2.3479, "step": 2209500 }, { "epoch": 6.4, "learning_rate": 4.6802662733882926e-05, "loss": 2.3729, "step": 2210000 }, { "epoch": 6.4, "learning_rate": 4.680193908623565e-05, "loss": 2.3662, "step": 2210500 }, { "epoch": 6.4, "learning_rate": 4.680121543858837e-05, "loss": 2.3798, "step": 2211000 }, { "epoch": 6.4, "learning_rate": 4.680049179094109e-05, "loss": 2.3545, "step": 2211500 }, { "epoch": 6.4, "learning_rate": 4.6799768143293815e-05, "loss": 2.3814, "step": 2212000 }, { "epoch": 6.4, "learning_rate": 4.679904449564654e-05, "loss": 2.3774, "step": 2212500 }, { "epoch": 6.41, "learning_rate": 4.679832084799926e-05, "loss": 2.3773, "step": 2213000 }, { "epoch": 6.41, "learning_rate": 4.679759720035198e-05, "loss": 2.3539, "step": 2213500 }, { "epoch": 6.41, "learning_rate": 4.6796873552704704e-05, "loss": 2.3646, "step": 2214000 }, { "epoch": 6.41, "learning_rate": 4.6796149905057426e-05, "loss": 2.3858, "step": 2214500 }, { "epoch": 6.41, "learning_rate": 4.679542770470545e-05, "loss": 2.3749, "step": 2215000 }, { "epoch": 6.41, "learning_rate": 4.6794705504353464e-05, "loss": 2.3559, "step": 2215500 }, { "epoch": 6.41, "learning_rate": 4.6793981856706186e-05, "loss": 2.3766, "step": 2216000 }, { "epoch": 6.42, "learning_rate": 4.6793258209058915e-05, "loss": 2.3712, "step": 2216500 }, { "epoch": 6.42, "learning_rate": 4.679253600870693e-05, "loss": 2.3731, "step": 2217000 }, { "epoch": 6.42, "learning_rate": 4.679181236105965e-05, "loss": 2.3631, "step": 2217500 }, { "epoch": 6.42, "learning_rate": 4.6791088713412375e-05, "loss": 2.3669, "step": 2218000 }, { "epoch": 6.42, "learning_rate": 4.6790365065765104e-05, "loss": 2.3862, "step": 2218500 }, { "epoch": 6.42, "learning_rate": 4.678964286541312e-05, "loss": 2.3633, "step": 2219000 }, { "epoch": 6.42, "learning_rate": 4.678891921776584e-05, "loss": 2.3704, "step": 2219500 }, { "epoch": 6.43, "learning_rate": 4.6788195570118564e-05, "loss": 2.3771, "step": 2220000 }, { "epoch": 6.43, "learning_rate": 4.678747192247129e-05, "loss": 2.3643, "step": 2220500 }, { "epoch": 6.43, "learning_rate": 4.678674827482401e-05, "loss": 2.3565, "step": 2221000 }, { "epoch": 6.43, "learning_rate": 4.678602462717673e-05, "loss": 2.365, "step": 2221500 }, { "epoch": 6.43, "learning_rate": 4.6785300979529453e-05, "loss": 2.3808, "step": 2222000 }, { "epoch": 6.43, "learning_rate": 4.6784577331882176e-05, "loss": 2.3632, "step": 2222500 }, { "epoch": 6.43, "learning_rate": 4.6783853684234905e-05, "loss": 2.3745, "step": 2223000 }, { "epoch": 6.44, "learning_rate": 4.678313003658763e-05, "loss": 2.3782, "step": 2223500 }, { "epoch": 6.44, "learning_rate": 4.6782406388940356e-05, "loss": 2.3597, "step": 2224000 }, { "epoch": 6.44, "learning_rate": 4.678168274129308e-05, "loss": 2.382, "step": 2224500 }, { "epoch": 6.44, "learning_rate": 4.67809590936458e-05, "loss": 2.3748, "step": 2225000 }, { "epoch": 6.44, "learning_rate": 4.678023544599852e-05, "loss": 2.3384, "step": 2225500 }, { "epoch": 6.44, "learning_rate": 4.6779511798351245e-05, "loss": 2.361, "step": 2226000 }, { "epoch": 6.44, "learning_rate": 4.677878959799926e-05, "loss": 2.3572, "step": 2226500 }, { "epoch": 6.45, "learning_rate": 4.677806595035198e-05, "loss": 2.3402, "step": 2227000 }, { "epoch": 6.45, "learning_rate": 4.6777342302704705e-05, "loss": 2.3631, "step": 2227500 }, { "epoch": 6.45, "learning_rate": 4.677661865505743e-05, "loss": 2.3629, "step": 2228000 }, { "epoch": 6.45, "learning_rate": 4.6775895007410156e-05, "loss": 2.3656, "step": 2228500 }, { "epoch": 6.45, "learning_rate": 4.677517135976288e-05, "loss": 2.3748, "step": 2229000 }, { "epoch": 6.45, "learning_rate": 4.6774449159410894e-05, "loss": 2.3541, "step": 2229500 }, { "epoch": 6.45, "learning_rate": 4.6773725511763616e-05, "loss": 2.3596, "step": 2230000 }, { "epoch": 6.46, "learning_rate": 4.677300186411634e-05, "loss": 2.3516, "step": 2230500 }, { "epoch": 6.46, "learning_rate": 4.6772279663764354e-05, "loss": 2.3757, "step": 2231000 }, { "epoch": 6.46, "learning_rate": 4.677155601611708e-05, "loss": 2.3759, "step": 2231500 }, { "epoch": 6.46, "learning_rate": 4.6770832368469805e-05, "loss": 2.3601, "step": 2232000 }, { "epoch": 6.46, "learning_rate": 4.6770108720822534e-05, "loss": 2.394, "step": 2232500 }, { "epoch": 6.46, "learning_rate": 4.6769385073175256e-05, "loss": 2.3873, "step": 2233000 }, { "epoch": 6.47, "learning_rate": 4.676866287282327e-05, "loss": 2.3627, "step": 2233500 }, { "epoch": 6.47, "learning_rate": 4.6767939225175994e-05, "loss": 2.3596, "step": 2234000 }, { "epoch": 6.47, "learning_rate": 4.6767215577528716e-05, "loss": 2.3651, "step": 2234500 }, { "epoch": 6.47, "learning_rate": 4.676649192988144e-05, "loss": 2.3707, "step": 2235000 }, { "epoch": 6.47, "learning_rate": 4.676576828223416e-05, "loss": 2.3775, "step": 2235500 }, { "epoch": 6.47, "learning_rate": 4.676504463458688e-05, "loss": 2.3654, "step": 2236000 }, { "epoch": 6.47, "learning_rate": 4.6764320986939605e-05, "loss": 2.3503, "step": 2236500 }, { "epoch": 6.48, "learning_rate": 4.676359878658763e-05, "loss": 2.3315, "step": 2237000 }, { "epoch": 6.48, "learning_rate": 4.676287513894035e-05, "loss": 2.3735, "step": 2237500 }, { "epoch": 6.48, "learning_rate": 4.676215149129307e-05, "loss": 2.3999, "step": 2238000 }, { "epoch": 6.48, "learning_rate": 4.6761427843645794e-05, "loss": 2.3536, "step": 2238500 }, { "epoch": 6.48, "learning_rate": 4.6760704195998523e-05, "loss": 2.3719, "step": 2239000 }, { "epoch": 6.48, "learning_rate": 4.6759980548351246e-05, "loss": 2.3727, "step": 2239500 }, { "epoch": 6.48, "learning_rate": 4.675925690070397e-05, "loss": 2.3668, "step": 2240000 }, { "epoch": 6.49, "learning_rate": 4.675853325305669e-05, "loss": 2.3679, "step": 2240500 }, { "epoch": 6.49, "learning_rate": 4.6757811052704706e-05, "loss": 2.3649, "step": 2241000 }, { "epoch": 6.49, "learning_rate": 4.6757087405057435e-05, "loss": 2.3763, "step": 2241500 }, { "epoch": 6.49, "learning_rate": 4.675636520470545e-05, "loss": 2.3668, "step": 2242000 }, { "epoch": 6.49, "learning_rate": 4.675564155705817e-05, "loss": 2.3512, "step": 2242500 }, { "epoch": 6.49, "learning_rate": 4.6754917909410895e-05, "loss": 2.3881, "step": 2243000 }, { "epoch": 6.49, "learning_rate": 4.675419426176362e-05, "loss": 2.391, "step": 2243500 }, { "epoch": 6.5, "learning_rate": 4.675347061411634e-05, "loss": 2.3642, "step": 2244000 }, { "epoch": 6.5, "learning_rate": 4.6752748413764355e-05, "loss": 2.3765, "step": 2244500 }, { "epoch": 6.5, "learning_rate": 4.6752024766117084e-05, "loss": 2.3701, "step": 2245000 }, { "epoch": 6.5, "learning_rate": 4.6751301118469806e-05, "loss": 2.3619, "step": 2245500 }, { "epoch": 6.5, "learning_rate": 4.675057747082253e-05, "loss": 2.3619, "step": 2246000 }, { "epoch": 6.5, "learning_rate": 4.674985382317526e-05, "loss": 2.3714, "step": 2246500 }, { "epoch": 6.5, "learning_rate": 4.674913162282327e-05, "loss": 2.3809, "step": 2247000 }, { "epoch": 6.51, "learning_rate": 4.6748407975175995e-05, "loss": 2.3708, "step": 2247500 }, { "epoch": 6.51, "learning_rate": 4.674768432752872e-05, "loss": 2.3507, "step": 2248000 }, { "epoch": 6.51, "learning_rate": 4.674696067988144e-05, "loss": 2.3976, "step": 2248500 }, { "epoch": 6.51, "learning_rate": 4.6746238479529455e-05, "loss": 2.3717, "step": 2249000 }, { "epoch": 6.51, "learning_rate": 4.6745514831882184e-05, "loss": 2.3705, "step": 2249500 }, { "epoch": 6.51, "learning_rate": 4.6744791184234906e-05, "loss": 2.3657, "step": 2250000 }, { "epoch": 6.51, "learning_rate": 4.674406898388292e-05, "loss": 2.3953, "step": 2250500 }, { "epoch": 6.52, "learning_rate": 4.6743345336235644e-05, "loss": 2.3804, "step": 2251000 }, { "epoch": 6.52, "learning_rate": 4.6742621688588366e-05, "loss": 2.3735, "step": 2251500 }, { "epoch": 6.52, "learning_rate": 4.674189804094109e-05, "loss": 2.3932, "step": 2252000 }, { "epoch": 6.52, "learning_rate": 4.674117439329381e-05, "loss": 2.3694, "step": 2252500 }, { "epoch": 6.52, "learning_rate": 4.674045074564653e-05, "loss": 2.3733, "step": 2253000 }, { "epoch": 6.52, "learning_rate": 4.6739727097999255e-05, "loss": 2.3707, "step": 2253500 }, { "epoch": 6.52, "learning_rate": 4.6739003450351984e-05, "loss": 2.3696, "step": 2254000 }, { "epoch": 6.53, "learning_rate": 4.6738279802704706e-05, "loss": 2.3504, "step": 2254500 }, { "epoch": 6.53, "learning_rate": 4.6737556155057435e-05, "loss": 2.38, "step": 2255000 }, { "epoch": 6.53, "learning_rate": 4.673683250741016e-05, "loss": 2.3562, "step": 2255500 }, { "epoch": 6.53, "learning_rate": 4.673611030705817e-05, "loss": 2.3642, "step": 2256000 }, { "epoch": 6.53, "learning_rate": 4.6735386659410895e-05, "loss": 2.3779, "step": 2256500 }, { "epoch": 6.53, "learning_rate": 4.673466301176362e-05, "loss": 2.3865, "step": 2257000 }, { "epoch": 6.53, "learning_rate": 4.673393936411634e-05, "loss": 2.3619, "step": 2257500 }, { "epoch": 6.54, "learning_rate": 4.673321571646906e-05, "loss": 2.3782, "step": 2258000 }, { "epoch": 6.54, "learning_rate": 4.6732492068821784e-05, "loss": 2.3864, "step": 2258500 }, { "epoch": 6.54, "learning_rate": 4.673176842117451e-05, "loss": 2.3639, "step": 2259000 }, { "epoch": 6.54, "learning_rate": 4.6731044773527236e-05, "loss": 2.3859, "step": 2259500 }, { "epoch": 6.54, "learning_rate": 4.673032112587996e-05, "loss": 2.3592, "step": 2260000 }, { "epoch": 6.54, "learning_rate": 4.672959747823268e-05, "loss": 2.3578, "step": 2260500 }, { "epoch": 6.54, "learning_rate": 4.67288752778807e-05, "loss": 2.3599, "step": 2261000 }, { "epoch": 6.55, "learning_rate": 4.6728151630233425e-05, "loss": 2.3463, "step": 2261500 }, { "epoch": 6.55, "learning_rate": 4.672742798258615e-05, "loss": 2.3663, "step": 2262000 }, { "epoch": 6.55, "learning_rate": 4.672670433493887e-05, "loss": 2.3953, "step": 2262500 }, { "epoch": 6.55, "learning_rate": 4.6725982134586885e-05, "loss": 2.3726, "step": 2263000 }, { "epoch": 6.55, "learning_rate": 4.6725258486939614e-05, "loss": 2.3686, "step": 2263500 }, { "epoch": 6.55, "learning_rate": 4.6724534839292336e-05, "loss": 2.3812, "step": 2264000 }, { "epoch": 6.55, "learning_rate": 4.672381119164506e-05, "loss": 2.3718, "step": 2264500 }, { "epoch": 6.56, "learning_rate": 4.672308754399778e-05, "loss": 2.3917, "step": 2265000 }, { "epoch": 6.56, "learning_rate": 4.67223638963505e-05, "loss": 2.3881, "step": 2265500 }, { "epoch": 6.56, "learning_rate": 4.6721640248703225e-05, "loss": 2.3491, "step": 2266000 }, { "epoch": 6.56, "learning_rate": 4.672091804835124e-05, "loss": 2.3554, "step": 2266500 }, { "epoch": 6.56, "learning_rate": 4.672019584799926e-05, "loss": 2.3549, "step": 2267000 }, { "epoch": 6.56, "learning_rate": 4.6719472200351985e-05, "loss": 2.3682, "step": 2267500 }, { "epoch": 6.56, "learning_rate": 4.671874855270471e-05, "loss": 2.367, "step": 2268000 }, { "epoch": 6.57, "learning_rate": 4.6718024905057436e-05, "loss": 2.3758, "step": 2268500 }, { "epoch": 6.57, "learning_rate": 4.671730125741016e-05, "loss": 2.3955, "step": 2269000 }, { "epoch": 6.57, "learning_rate": 4.671657760976288e-05, "loss": 2.3834, "step": 2269500 }, { "epoch": 6.57, "learning_rate": 4.67158539621156e-05, "loss": 2.3969, "step": 2270000 }, { "epoch": 6.57, "learning_rate": 4.6715130314468325e-05, "loss": 2.3823, "step": 2270500 }, { "epoch": 6.57, "learning_rate": 4.671440666682105e-05, "loss": 2.3642, "step": 2271000 }, { "epoch": 6.58, "learning_rate": 4.671368301917377e-05, "loss": 2.3694, "step": 2271500 }, { "epoch": 6.58, "learning_rate": 4.671295937152649e-05, "loss": 2.3843, "step": 2272000 }, { "epoch": 6.58, "learning_rate": 4.6712237171174514e-05, "loss": 2.39, "step": 2272500 }, { "epoch": 6.58, "learning_rate": 4.6711513523527236e-05, "loss": 2.3651, "step": 2273000 }, { "epoch": 6.58, "learning_rate": 4.671079132317525e-05, "loss": 2.371, "step": 2273500 }, { "epoch": 6.58, "learning_rate": 4.6710067675527974e-05, "loss": 2.363, "step": 2274000 }, { "epoch": 6.58, "learning_rate": 4.670934547517599e-05, "loss": 2.352, "step": 2274500 }, { "epoch": 6.59, "learning_rate": 4.670862182752871e-05, "loss": 2.3782, "step": 2275000 }, { "epoch": 6.59, "learning_rate": 4.6707898179881434e-05, "loss": 2.3726, "step": 2275500 }, { "epoch": 6.59, "learning_rate": 4.670717453223416e-05, "loss": 2.3397, "step": 2276000 }, { "epoch": 6.59, "learning_rate": 4.6706450884586886e-05, "loss": 2.3566, "step": 2276500 }, { "epoch": 6.59, "learning_rate": 4.6705727236939615e-05, "loss": 2.3604, "step": 2277000 }, { "epoch": 6.59, "learning_rate": 4.670500358929234e-05, "loss": 2.4109, "step": 2277500 }, { "epoch": 6.59, "learning_rate": 4.670427994164506e-05, "loss": 2.3546, "step": 2278000 }, { "epoch": 6.6, "learning_rate": 4.670355629399778e-05, "loss": 2.3622, "step": 2278500 }, { "epoch": 6.6, "learning_rate": 4.6702832646350503e-05, "loss": 2.3509, "step": 2279000 }, { "epoch": 6.6, "learning_rate": 4.6702108998703226e-05, "loss": 2.3576, "step": 2279500 }, { "epoch": 6.6, "learning_rate": 4.670138535105595e-05, "loss": 2.3658, "step": 2280000 }, { "epoch": 6.6, "learning_rate": 4.6700663150703963e-05, "loss": 2.3563, "step": 2280500 }, { "epoch": 6.6, "learning_rate": 4.6699939503056686e-05, "loss": 2.3601, "step": 2281000 }, { "epoch": 6.6, "learning_rate": 4.6699215855409415e-05, "loss": 2.3869, "step": 2281500 }, { "epoch": 6.61, "learning_rate": 4.669849220776214e-05, "loss": 2.3667, "step": 2282000 }, { "epoch": 6.61, "learning_rate": 4.669776856011486e-05, "loss": 2.343, "step": 2282500 }, { "epoch": 6.61, "learning_rate": 4.669704491246758e-05, "loss": 2.3715, "step": 2283000 }, { "epoch": 6.61, "learning_rate": 4.669632126482031e-05, "loss": 2.3514, "step": 2283500 }, { "epoch": 6.61, "learning_rate": 4.6695599064468326e-05, "loss": 2.3638, "step": 2284000 }, { "epoch": 6.61, "learning_rate": 4.669487541682105e-05, "loss": 2.3665, "step": 2284500 }, { "epoch": 6.61, "learning_rate": 4.669415176917377e-05, "loss": 2.3683, "step": 2285000 }, { "epoch": 6.62, "learning_rate": 4.669342812152649e-05, "loss": 2.3897, "step": 2285500 }, { "epoch": 6.62, "learning_rate": 4.6692704473879215e-05, "loss": 2.359, "step": 2286000 }, { "epoch": 6.62, "learning_rate": 4.669198082623194e-05, "loss": 2.3752, "step": 2286500 }, { "epoch": 6.62, "learning_rate": 4.6691257178584666e-05, "loss": 2.3915, "step": 2287000 }, { "epoch": 6.62, "learning_rate": 4.669053353093739e-05, "loss": 2.3644, "step": 2287500 }, { "epoch": 6.62, "learning_rate": 4.668980988329011e-05, "loss": 2.3657, "step": 2288000 }, { "epoch": 6.62, "learning_rate": 4.6689087682938126e-05, "loss": 2.3596, "step": 2288500 }, { "epoch": 6.63, "learning_rate": 4.668836548258614e-05, "loss": 2.3579, "step": 2289000 }, { "epoch": 6.63, "learning_rate": 4.6687641834938864e-05, "loss": 2.3835, "step": 2289500 }, { "epoch": 6.63, "learning_rate": 4.6686919634586886e-05, "loss": 2.3672, "step": 2290000 }, { "epoch": 6.63, "learning_rate": 4.668619598693961e-05, "loss": 2.3889, "step": 2290500 }, { "epoch": 6.63, "learning_rate": 4.668547233929234e-05, "loss": 2.3416, "step": 2291000 }, { "epoch": 6.63, "learning_rate": 4.668474869164506e-05, "loss": 2.3738, "step": 2291500 }, { "epoch": 6.63, "learning_rate": 4.668402504399778e-05, "loss": 2.3787, "step": 2292000 }, { "epoch": 6.64, "learning_rate": 4.6683301396350504e-05, "loss": 2.3483, "step": 2292500 }, { "epoch": 6.64, "learning_rate": 4.6682577748703226e-05, "loss": 2.3624, "step": 2293000 }, { "epoch": 6.64, "learning_rate": 4.668185410105595e-05, "loss": 2.3656, "step": 2293500 }, { "epoch": 6.64, "learning_rate": 4.668113045340867e-05, "loss": 2.3838, "step": 2294000 }, { "epoch": 6.64, "learning_rate": 4.668040680576139e-05, "loss": 2.3809, "step": 2294500 }, { "epoch": 6.64, "learning_rate": 4.6679684605409416e-05, "loss": 2.3554, "step": 2295000 }, { "epoch": 6.64, "learning_rate": 4.667896095776214e-05, "loss": 2.3481, "step": 2295500 }, { "epoch": 6.65, "learning_rate": 4.667823731011486e-05, "loss": 2.3685, "step": 2296000 }, { "epoch": 6.65, "learning_rate": 4.667751366246758e-05, "loss": 2.3933, "step": 2296500 }, { "epoch": 6.65, "learning_rate": 4.66767914621156e-05, "loss": 2.3808, "step": 2297000 }, { "epoch": 6.65, "learning_rate": 4.667606781446832e-05, "loss": 2.3687, "step": 2297500 }, { "epoch": 6.65, "learning_rate": 4.667534416682104e-05, "loss": 2.3733, "step": 2298000 }, { "epoch": 6.65, "learning_rate": 4.667462051917377e-05, "loss": 2.4043, "step": 2298500 }, { "epoch": 6.65, "learning_rate": 4.6673896871526493e-05, "loss": 2.3438, "step": 2299000 }, { "epoch": 6.66, "learning_rate": 4.6673173223879216e-05, "loss": 2.3763, "step": 2299500 }, { "epoch": 6.66, "learning_rate": 4.6672449576231945e-05, "loss": 2.389, "step": 2300000 }, { "epoch": 6.66, "learning_rate": 4.667172592858467e-05, "loss": 2.371, "step": 2300500 }, { "epoch": 6.66, "learning_rate": 4.667100372823268e-05, "loss": 2.3942, "step": 2301000 }, { "epoch": 6.66, "learning_rate": 4.6670280080585405e-05, "loss": 2.3582, "step": 2301500 }, { "epoch": 6.66, "learning_rate": 4.666955643293813e-05, "loss": 2.3644, "step": 2302000 }, { "epoch": 6.66, "learning_rate": 4.666883278529085e-05, "loss": 2.3689, "step": 2302500 }, { "epoch": 6.67, "learning_rate": 4.666810913764357e-05, "loss": 2.3848, "step": 2303000 }, { "epoch": 6.67, "learning_rate": 4.6667386937291594e-05, "loss": 2.377, "step": 2303500 }, { "epoch": 6.67, "learning_rate": 4.6666663289644316e-05, "loss": 2.3887, "step": 2304000 }, { "epoch": 6.67, "learning_rate": 4.666593964199704e-05, "loss": 2.3615, "step": 2304500 }, { "epoch": 6.67, "learning_rate": 4.666521599434976e-05, "loss": 2.3709, "step": 2305000 }, { "epoch": 6.67, "learning_rate": 4.666449234670248e-05, "loss": 2.3548, "step": 2305500 }, { "epoch": 6.67, "learning_rate": 4.6663770146350505e-05, "loss": 2.3632, "step": 2306000 }, { "epoch": 6.68, "learning_rate": 4.666304649870323e-05, "loss": 2.3634, "step": 2306500 }, { "epoch": 6.68, "learning_rate": 4.666232285105595e-05, "loss": 2.3594, "step": 2307000 }, { "epoch": 6.68, "learning_rate": 4.666159920340867e-05, "loss": 2.3622, "step": 2307500 }, { "epoch": 6.68, "learning_rate": 4.6660875555761394e-05, "loss": 2.3867, "step": 2308000 }, { "epoch": 6.68, "learning_rate": 4.6660151908114116e-05, "loss": 2.3873, "step": 2308500 }, { "epoch": 6.68, "learning_rate": 4.6659428260466845e-05, "loss": 2.3545, "step": 2309000 }, { "epoch": 6.69, "learning_rate": 4.665870461281957e-05, "loss": 2.3582, "step": 2309500 }, { "epoch": 6.69, "learning_rate": 4.665798096517229e-05, "loss": 2.4046, "step": 2310000 }, { "epoch": 6.69, "learning_rate": 4.6657258764820305e-05, "loss": 2.3662, "step": 2310500 }, { "epoch": 6.69, "learning_rate": 4.665653656446832e-05, "loss": 2.3817, "step": 2311000 }, { "epoch": 6.69, "learning_rate": 4.665581291682104e-05, "loss": 2.3921, "step": 2311500 }, { "epoch": 6.69, "learning_rate": 4.6655090716469065e-05, "loss": 2.3638, "step": 2312000 }, { "epoch": 6.69, "learning_rate": 4.665436706882179e-05, "loss": 2.3614, "step": 2312500 }, { "epoch": 6.7, "learning_rate": 4.665364342117451e-05, "loss": 2.3463, "step": 2313000 }, { "epoch": 6.7, "learning_rate": 4.665291977352724e-05, "loss": 2.3365, "step": 2313500 }, { "epoch": 6.7, "learning_rate": 4.665219612587996e-05, "loss": 2.3892, "step": 2314000 }, { "epoch": 6.7, "learning_rate": 4.665147247823268e-05, "loss": 2.3873, "step": 2314500 }, { "epoch": 6.7, "learning_rate": 4.6650748830585406e-05, "loss": 2.3623, "step": 2315000 }, { "epoch": 6.7, "learning_rate": 4.665002518293813e-05, "loss": 2.3495, "step": 2315500 }, { "epoch": 6.7, "learning_rate": 4.664930153529085e-05, "loss": 2.3978, "step": 2316000 }, { "epoch": 6.71, "learning_rate": 4.664857788764357e-05, "loss": 2.3567, "step": 2316500 }, { "epoch": 6.71, "learning_rate": 4.6647854239996294e-05, "loss": 2.3791, "step": 2317000 }, { "epoch": 6.71, "learning_rate": 4.664713203964432e-05, "loss": 2.3733, "step": 2317500 }, { "epoch": 6.71, "learning_rate": 4.664640839199704e-05, "loss": 2.3451, "step": 2318000 }, { "epoch": 6.71, "learning_rate": 4.664568474434976e-05, "loss": 2.3656, "step": 2318500 }, { "epoch": 6.71, "learning_rate": 4.6644961096702484e-05, "loss": 2.3623, "step": 2319000 }, { "epoch": 6.71, "learning_rate": 4.66442388963505e-05, "loss": 2.3635, "step": 2319500 }, { "epoch": 6.72, "learning_rate": 4.664351524870322e-05, "loss": 2.3734, "step": 2320000 }, { "epoch": 6.72, "learning_rate": 4.6642793048351244e-05, "loss": 2.339, "step": 2320500 }, { "epoch": 6.72, "learning_rate": 4.664206940070397e-05, "loss": 2.3782, "step": 2321000 }, { "epoch": 6.72, "learning_rate": 4.6641345753056695e-05, "loss": 2.3568, "step": 2321500 }, { "epoch": 6.72, "learning_rate": 4.664062210540942e-05, "loss": 2.3476, "step": 2322000 }, { "epoch": 6.72, "learning_rate": 4.663989845776214e-05, "loss": 2.3475, "step": 2322500 }, { "epoch": 6.72, "learning_rate": 4.663917481011486e-05, "loss": 2.3831, "step": 2323000 }, { "epoch": 6.73, "learning_rate": 4.6638451162467584e-05, "loss": 2.3697, "step": 2323500 }, { "epoch": 6.73, "learning_rate": 4.6637727514820306e-05, "loss": 2.3661, "step": 2324000 }, { "epoch": 6.73, "learning_rate": 4.663700386717303e-05, "loss": 2.363, "step": 2324500 }, { "epoch": 6.73, "learning_rate": 4.663628021952575e-05, "loss": 2.3375, "step": 2325000 }, { "epoch": 6.73, "learning_rate": 4.663555657187847e-05, "loss": 2.3475, "step": 2325500 }, { "epoch": 6.73, "learning_rate": 4.6634834371526495e-05, "loss": 2.3933, "step": 2326000 }, { "epoch": 6.73, "learning_rate": 4.663411072387922e-05, "loss": 2.3695, "step": 2326500 }, { "epoch": 6.74, "learning_rate": 4.663338707623194e-05, "loss": 2.3463, "step": 2327000 }, { "epoch": 6.74, "learning_rate": 4.663266342858466e-05, "loss": 2.374, "step": 2327500 }, { "epoch": 6.74, "learning_rate": 4.6631939780937384e-05, "loss": 2.3619, "step": 2328000 }, { "epoch": 6.74, "learning_rate": 4.663121613329011e-05, "loss": 2.3693, "step": 2328500 }, { "epoch": 6.74, "learning_rate": 4.6630492485642835e-05, "loss": 2.3751, "step": 2329000 }, { "epoch": 6.74, "learning_rate": 4.662977028529085e-05, "loss": 2.3574, "step": 2329500 }, { "epoch": 6.74, "learning_rate": 4.662904663764357e-05, "loss": 2.3679, "step": 2330000 }, { "epoch": 6.75, "learning_rate": 4.6628322989996295e-05, "loss": 2.3782, "step": 2330500 }, { "epoch": 6.75, "learning_rate": 4.6627599342349024e-05, "loss": 2.3626, "step": 2331000 }, { "epoch": 6.75, "learning_rate": 4.6626875694701747e-05, "loss": 2.3977, "step": 2331500 }, { "epoch": 6.75, "learning_rate": 4.662615204705447e-05, "loss": 2.3659, "step": 2332000 }, { "epoch": 6.75, "learning_rate": 4.662542839940719e-05, "loss": 2.3607, "step": 2332500 }, { "epoch": 6.75, "learning_rate": 4.6624706199055207e-05, "loss": 2.3645, "step": 2333000 }, { "epoch": 6.75, "learning_rate": 4.662398255140793e-05, "loss": 2.3661, "step": 2333500 }, { "epoch": 6.76, "learning_rate": 4.662325890376065e-05, "loss": 2.3768, "step": 2334000 }, { "epoch": 6.76, "learning_rate": 4.662253525611337e-05, "loss": 2.3653, "step": 2334500 }, { "epoch": 6.76, "learning_rate": 4.6621811608466095e-05, "loss": 2.37, "step": 2335000 }, { "epoch": 6.76, "learning_rate": 4.6621087960818824e-05, "loss": 2.3776, "step": 2335500 }, { "epoch": 6.76, "learning_rate": 4.662036431317155e-05, "loss": 2.3806, "step": 2336000 }, { "epoch": 6.76, "learning_rate": 4.661964211281957e-05, "loss": 2.3916, "step": 2336500 }, { "epoch": 6.76, "learning_rate": 4.661891846517229e-05, "loss": 2.3743, "step": 2337000 }, { "epoch": 6.77, "learning_rate": 4.6618194817525014e-05, "loss": 2.3667, "step": 2337500 }, { "epoch": 6.77, "learning_rate": 4.661747261717303e-05, "loss": 2.3685, "step": 2338000 }, { "epoch": 6.77, "learning_rate": 4.661674896952575e-05, "loss": 2.3579, "step": 2338500 }, { "epoch": 6.77, "learning_rate": 4.6616025321878474e-05, "loss": 2.3865, "step": 2339000 }, { "epoch": 6.77, "learning_rate": 4.6615301674231196e-05, "loss": 2.3766, "step": 2339500 }, { "epoch": 6.77, "learning_rate": 4.6614578026583925e-05, "loss": 2.3569, "step": 2340000 }, { "epoch": 6.77, "learning_rate": 4.661385437893665e-05, "loss": 2.3631, "step": 2340500 }, { "epoch": 6.78, "learning_rate": 4.661313073128937e-05, "loss": 2.3821, "step": 2341000 }, { "epoch": 6.78, "learning_rate": 4.661240708364209e-05, "loss": 2.3739, "step": 2341500 }, { "epoch": 6.78, "learning_rate": 4.6611683435994814e-05, "loss": 2.3781, "step": 2342000 }, { "epoch": 6.78, "learning_rate": 4.6610959788347536e-05, "loss": 2.3672, "step": 2342500 }, { "epoch": 6.78, "learning_rate": 4.661023758799556e-05, "loss": 2.36, "step": 2343000 }, { "epoch": 6.78, "learning_rate": 4.660951394034828e-05, "loss": 2.3666, "step": 2343500 }, { "epoch": 6.78, "learning_rate": 4.6608790292701e-05, "loss": 2.3854, "step": 2344000 }, { "epoch": 6.79, "learning_rate": 4.6608066645053725e-05, "loss": 2.3722, "step": 2344500 }, { "epoch": 6.79, "learning_rate": 4.660734299740645e-05, "loss": 2.3698, "step": 2345000 }, { "epoch": 6.79, "learning_rate": 4.6606619349759176e-05, "loss": 2.3582, "step": 2345500 }, { "epoch": 6.79, "learning_rate": 4.66058957021119e-05, "loss": 2.3467, "step": 2346000 }, { "epoch": 6.79, "learning_rate": 4.660517205446462e-05, "loss": 2.3673, "step": 2346500 }, { "epoch": 6.79, "learning_rate": 4.660444840681734e-05, "loss": 2.3732, "step": 2347000 }, { "epoch": 6.8, "learning_rate": 4.6603724759170065e-05, "loss": 2.3661, "step": 2347500 }, { "epoch": 6.8, "learning_rate": 4.660300111152279e-05, "loss": 2.3865, "step": 2348000 }, { "epoch": 6.8, "learning_rate": 4.6602280358466096e-05, "loss": 2.388, "step": 2348500 }, { "epoch": 6.8, "learning_rate": 4.6601556710818825e-05, "loss": 2.3626, "step": 2349000 }, { "epoch": 6.8, "learning_rate": 4.660083306317155e-05, "loss": 2.3759, "step": 2349500 }, { "epoch": 6.8, "learning_rate": 4.660010941552427e-05, "loss": 2.3445, "step": 2350000 }, { "epoch": 6.8, "learning_rate": 4.6599385767877e-05, "loss": 2.3749, "step": 2350500 }, { "epoch": 6.81, "learning_rate": 4.659866212022972e-05, "loss": 2.3633, "step": 2351000 }, { "epoch": 6.81, "learning_rate": 4.6597939919877737e-05, "loss": 2.3735, "step": 2351500 }, { "epoch": 6.81, "learning_rate": 4.659721627223046e-05, "loss": 2.3829, "step": 2352000 }, { "epoch": 6.81, "learning_rate": 4.659649262458318e-05, "loss": 2.399, "step": 2352500 }, { "epoch": 6.81, "learning_rate": 4.65957689769359e-05, "loss": 2.3784, "step": 2353000 }, { "epoch": 6.81, "learning_rate": 4.6595045329288625e-05, "loss": 2.3593, "step": 2353500 }, { "epoch": 6.81, "learning_rate": 4.659432168164135e-05, "loss": 2.3593, "step": 2354000 }, { "epoch": 6.82, "learning_rate": 4.659359803399408e-05, "loss": 2.3739, "step": 2354500 }, { "epoch": 6.82, "learning_rate": 4.65928743863468e-05, "loss": 2.3693, "step": 2355000 }, { "epoch": 6.82, "learning_rate": 4.6592152185994815e-05, "loss": 2.3817, "step": 2355500 }, { "epoch": 6.82, "learning_rate": 4.659142853834754e-05, "loss": 2.3729, "step": 2356000 }, { "epoch": 6.82, "learning_rate": 4.659070489070026e-05, "loss": 2.3602, "step": 2356500 }, { "epoch": 6.82, "learning_rate": 4.658998124305298e-05, "loss": 2.3484, "step": 2357000 }, { "epoch": 6.82, "learning_rate": 4.6589257595405703e-05, "loss": 2.3475, "step": 2357500 }, { "epoch": 6.83, "learning_rate": 4.6588535395053726e-05, "loss": 2.3443, "step": 2358000 }, { "epoch": 6.83, "learning_rate": 4.658781174740645e-05, "loss": 2.3831, "step": 2358500 }, { "epoch": 6.83, "learning_rate": 4.658708809975918e-05, "loss": 2.3566, "step": 2359000 }, { "epoch": 6.83, "learning_rate": 4.658636589940719e-05, "loss": 2.3878, "step": 2359500 }, { "epoch": 6.83, "learning_rate": 4.6585642251759915e-05, "loss": 2.3634, "step": 2360000 }, { "epoch": 6.83, "learning_rate": 4.658491860411264e-05, "loss": 2.3503, "step": 2360500 }, { "epoch": 6.83, "learning_rate": 4.658419495646536e-05, "loss": 2.3761, "step": 2361000 }, { "epoch": 6.84, "learning_rate": 4.658347130881808e-05, "loss": 2.3566, "step": 2361500 }, { "epoch": 6.84, "learning_rate": 4.6582747661170804e-05, "loss": 2.3721, "step": 2362000 }, { "epoch": 6.84, "learning_rate": 4.6582024013523526e-05, "loss": 2.3596, "step": 2362500 }, { "epoch": 6.84, "learning_rate": 4.658130181317155e-05, "loss": 2.3811, "step": 2363000 }, { "epoch": 6.84, "learning_rate": 4.658057816552427e-05, "loss": 2.3567, "step": 2363500 }, { "epoch": 6.84, "learning_rate": 4.6579855965172286e-05, "loss": 2.3683, "step": 2364000 }, { "epoch": 6.84, "learning_rate": 4.657913231752501e-05, "loss": 2.3543, "step": 2364500 }, { "epoch": 6.85, "learning_rate": 4.657840866987773e-05, "loss": 2.3635, "step": 2365000 }, { "epoch": 6.85, "learning_rate": 4.657768502223046e-05, "loss": 2.3948, "step": 2365500 }, { "epoch": 6.85, "learning_rate": 4.657696137458318e-05, "loss": 2.3722, "step": 2366000 }, { "epoch": 6.85, "learning_rate": 4.6576237726935904e-05, "loss": 2.3509, "step": 2366500 }, { "epoch": 6.85, "learning_rate": 4.6575514079288626e-05, "loss": 2.3721, "step": 2367000 }, { "epoch": 6.85, "learning_rate": 4.6574790431641355e-05, "loss": 2.3742, "step": 2367500 }, { "epoch": 6.85, "learning_rate": 4.657406678399408e-05, "loss": 2.3646, "step": 2368000 }, { "epoch": 6.86, "learning_rate": 4.65733431363468e-05, "loss": 2.3855, "step": 2368500 }, { "epoch": 6.86, "learning_rate": 4.657261948869952e-05, "loss": 2.3762, "step": 2369000 }, { "epoch": 6.86, "learning_rate": 4.6571895841052244e-05, "loss": 2.3772, "step": 2369500 }, { "epoch": 6.86, "learning_rate": 4.6571172193404966e-05, "loss": 2.3802, "step": 2370000 }, { "epoch": 6.86, "learning_rate": 4.657044999305298e-05, "loss": 2.3768, "step": 2370500 }, { "epoch": 6.86, "learning_rate": 4.6569726345405704e-05, "loss": 2.3408, "step": 2371000 }, { "epoch": 6.86, "learning_rate": 4.6569002697758426e-05, "loss": 2.406, "step": 2371500 }, { "epoch": 6.87, "learning_rate": 4.6568279050111155e-05, "loss": 2.3444, "step": 2372000 }, { "epoch": 6.87, "learning_rate": 4.656755540246388e-05, "loss": 2.3708, "step": 2372500 }, { "epoch": 6.87, "learning_rate": 4.65668332021119e-05, "loss": 2.3894, "step": 2373000 }, { "epoch": 6.87, "learning_rate": 4.656610955446462e-05, "loss": 2.3586, "step": 2373500 }, { "epoch": 6.87, "learning_rate": 4.6565385906817345e-05, "loss": 2.3448, "step": 2374000 }, { "epoch": 6.87, "learning_rate": 4.656466225917007e-05, "loss": 2.356, "step": 2374500 }, { "epoch": 6.87, "learning_rate": 4.656393861152279e-05, "loss": 2.3706, "step": 2375000 }, { "epoch": 6.88, "learning_rate": 4.6563216411170805e-05, "loss": 2.3586, "step": 2375500 }, { "epoch": 6.88, "learning_rate": 4.656249276352353e-05, "loss": 2.3712, "step": 2376000 }, { "epoch": 6.88, "learning_rate": 4.6561769115876256e-05, "loss": 2.3665, "step": 2376500 }, { "epoch": 6.88, "learning_rate": 4.656104546822898e-05, "loss": 2.3788, "step": 2377000 }, { "epoch": 6.88, "learning_rate": 4.65603218205817e-05, "loss": 2.3624, "step": 2377500 }, { "epoch": 6.88, "learning_rate": 4.6559599620229716e-05, "loss": 2.3703, "step": 2378000 }, { "epoch": 6.88, "learning_rate": 4.655887741987773e-05, "loss": 2.3659, "step": 2378500 }, { "epoch": 6.89, "learning_rate": 4.6558153772230454e-05, "loss": 2.3495, "step": 2379000 }, { "epoch": 6.89, "learning_rate": 4.6557430124583176e-05, "loss": 2.3593, "step": 2379500 }, { "epoch": 6.89, "learning_rate": 4.6556706476935905e-05, "loss": 2.3509, "step": 2380000 }, { "epoch": 6.89, "learning_rate": 4.655598282928863e-05, "loss": 2.3664, "step": 2380500 }, { "epoch": 6.89, "learning_rate": 4.6555259181641356e-05, "loss": 2.3807, "step": 2381000 }, { "epoch": 6.89, "learning_rate": 4.655453553399408e-05, "loss": 2.3686, "step": 2381500 }, { "epoch": 6.89, "learning_rate": 4.6553813333642094e-05, "loss": 2.3645, "step": 2382000 }, { "epoch": 6.9, "learning_rate": 4.6553089685994816e-05, "loss": 2.3727, "step": 2382500 }, { "epoch": 6.9, "learning_rate": 4.655236603834754e-05, "loss": 2.3773, "step": 2383000 }, { "epoch": 6.9, "learning_rate": 4.655164239070026e-05, "loss": 2.3815, "step": 2383500 }, { "epoch": 6.9, "learning_rate": 4.655091874305298e-05, "loss": 2.3786, "step": 2384000 }, { "epoch": 6.9, "learning_rate": 4.6550195095405705e-05, "loss": 2.361, "step": 2384500 }, { "epoch": 6.9, "learning_rate": 4.654947144775843e-05, "loss": 2.3708, "step": 2385000 }, { "epoch": 6.91, "learning_rate": 4.6548747800111156e-05, "loss": 2.3851, "step": 2385500 }, { "epoch": 6.91, "learning_rate": 4.654802415246388e-05, "loss": 2.3914, "step": 2386000 }, { "epoch": 6.91, "learning_rate": 4.65473005048166e-05, "loss": 2.3632, "step": 2386500 }, { "epoch": 6.91, "learning_rate": 4.654657685716932e-05, "loss": 2.3599, "step": 2387000 }, { "epoch": 6.91, "learning_rate": 4.654585465681734e-05, "loss": 2.3746, "step": 2387500 }, { "epoch": 6.91, "learning_rate": 4.654513100917007e-05, "loss": 2.363, "step": 2388000 }, { "epoch": 6.91, "learning_rate": 4.654440880881808e-05, "loss": 2.3576, "step": 2388500 }, { "epoch": 6.92, "learning_rate": 4.6543685161170805e-05, "loss": 2.3661, "step": 2389000 }, { "epoch": 6.92, "learning_rate": 4.654296151352353e-05, "loss": 2.3627, "step": 2389500 }, { "epoch": 6.92, "learning_rate": 4.6542237865876257e-05, "loss": 2.3635, "step": 2390000 }, { "epoch": 6.92, "learning_rate": 4.654151566552427e-05, "loss": 2.3599, "step": 2390500 }, { "epoch": 6.92, "learning_rate": 4.6540792017876994e-05, "loss": 2.3704, "step": 2391000 }, { "epoch": 6.92, "learning_rate": 4.6540068370229717e-05, "loss": 2.3785, "step": 2391500 }, { "epoch": 6.92, "learning_rate": 4.653934472258244e-05, "loss": 2.3433, "step": 2392000 }, { "epoch": 6.93, "learning_rate": 4.653862107493516e-05, "loss": 2.3705, "step": 2392500 }, { "epoch": 6.93, "learning_rate": 4.653789742728788e-05, "loss": 2.3805, "step": 2393000 }, { "epoch": 6.93, "learning_rate": 4.6537173779640606e-05, "loss": 2.3674, "step": 2393500 }, { "epoch": 6.93, "learning_rate": 4.653645013199333e-05, "loss": 2.3794, "step": 2394000 }, { "epoch": 6.93, "learning_rate": 4.653572648434606e-05, "loss": 2.3576, "step": 2394500 }, { "epoch": 6.93, "learning_rate": 4.653500428399407e-05, "loss": 2.3651, "step": 2395000 }, { "epoch": 6.93, "learning_rate": 4.65342806363468e-05, "loss": 2.3979, "step": 2395500 }, { "epoch": 6.94, "learning_rate": 4.653355843599482e-05, "loss": 2.365, "step": 2396000 }, { "epoch": 6.94, "learning_rate": 4.653283478834754e-05, "loss": 2.3735, "step": 2396500 }, { "epoch": 6.94, "learning_rate": 4.653211114070026e-05, "loss": 2.3873, "step": 2397000 }, { "epoch": 6.94, "learning_rate": 4.6531387493052984e-05, "loss": 2.3867, "step": 2397500 }, { "epoch": 6.94, "learning_rate": 4.6530663845405706e-05, "loss": 2.3865, "step": 2398000 }, { "epoch": 6.94, "learning_rate": 4.6529940197758435e-05, "loss": 2.3553, "step": 2398500 }, { "epoch": 6.94, "learning_rate": 4.652921655011116e-05, "loss": 2.3491, "step": 2399000 }, { "epoch": 6.95, "learning_rate": 4.652849290246388e-05, "loss": 2.3624, "step": 2399500 }, { "epoch": 6.95, "learning_rate": 4.65277692548166e-05, "loss": 2.3769, "step": 2400000 }, { "epoch": 6.95, "learning_rate": 4.652704850175991e-05, "loss": 2.3572, "step": 2400500 }, { "epoch": 6.95, "learning_rate": 4.652632485411263e-05, "loss": 2.3813, "step": 2401000 }, { "epoch": 6.95, "learning_rate": 4.6525601206465355e-05, "loss": 2.3588, "step": 2401500 }, { "epoch": 6.95, "learning_rate": 4.6524877558818084e-05, "loss": 2.399, "step": 2402000 }, { "epoch": 6.95, "learning_rate": 4.6524153911170806e-05, "loss": 2.3505, "step": 2402500 }, { "epoch": 6.96, "learning_rate": 4.6523430263523535e-05, "loss": 2.3664, "step": 2403000 }, { "epoch": 6.96, "learning_rate": 4.652270661587626e-05, "loss": 2.3601, "step": 2403500 }, { "epoch": 6.96, "learning_rate": 4.652198296822898e-05, "loss": 2.3431, "step": 2404000 }, { "epoch": 6.96, "learning_rate": 4.65212593205817e-05, "loss": 2.3935, "step": 2404500 }, { "epoch": 6.96, "learning_rate": 4.6520535672934424e-05, "loss": 2.3774, "step": 2405000 }, { "epoch": 6.96, "learning_rate": 4.6519812025287146e-05, "loss": 2.3749, "step": 2405500 }, { "epoch": 6.96, "learning_rate": 4.651908982493516e-05, "loss": 2.3616, "step": 2406000 }, { "epoch": 6.97, "learning_rate": 4.6518366177287884e-05, "loss": 2.3737, "step": 2406500 }, { "epoch": 6.97, "learning_rate": 4.6517642529640606e-05, "loss": 2.3519, "step": 2407000 }, { "epoch": 6.97, "learning_rate": 4.6516918881993335e-05, "loss": 2.367, "step": 2407500 }, { "epoch": 6.97, "learning_rate": 4.651619668164135e-05, "loss": 2.3799, "step": 2408000 }, { "epoch": 6.97, "learning_rate": 4.651547303399407e-05, "loss": 2.3894, "step": 2408500 }, { "epoch": 6.97, "learning_rate": 4.6514749386346795e-05, "loss": 2.337, "step": 2409000 }, { "epoch": 6.97, "learning_rate": 4.651402573869952e-05, "loss": 2.3684, "step": 2409500 }, { "epoch": 6.98, "learning_rate": 4.6513302091052247e-05, "loss": 2.3523, "step": 2410000 }, { "epoch": 6.98, "learning_rate": 4.651257989070026e-05, "loss": 2.3815, "step": 2410500 }, { "epoch": 6.98, "learning_rate": 4.6511856243052984e-05, "loss": 2.3647, "step": 2411000 }, { "epoch": 6.98, "learning_rate": 4.651113259540571e-05, "loss": 2.4017, "step": 2411500 }, { "epoch": 6.98, "learning_rate": 4.6510408947758436e-05, "loss": 2.3474, "step": 2412000 }, { "epoch": 6.98, "learning_rate": 4.650968530011116e-05, "loss": 2.3772, "step": 2412500 }, { "epoch": 6.98, "learning_rate": 4.650896165246388e-05, "loss": 2.3743, "step": 2413000 }, { "epoch": 6.99, "learning_rate": 4.65082380048166e-05, "loss": 2.3843, "step": 2413500 }, { "epoch": 6.99, "learning_rate": 4.6507514357169325e-05, "loss": 2.3777, "step": 2414000 }, { "epoch": 6.99, "learning_rate": 4.650679070952205e-05, "loss": 2.3684, "step": 2414500 }, { "epoch": 6.99, "learning_rate": 4.650606850917006e-05, "loss": 2.3656, "step": 2415000 }, { "epoch": 6.99, "learning_rate": 4.6505344861522785e-05, "loss": 2.3774, "step": 2415500 }, { "epoch": 6.99, "learning_rate": 4.650462121387551e-05, "loss": 2.36, "step": 2416000 }, { "epoch": 6.99, "learning_rate": 4.650389901352353e-05, "loss": 2.3573, "step": 2416500 }, { "epoch": 7.0, "learning_rate": 4.650317536587625e-05, "loss": 2.3799, "step": 2417000 }, { "epoch": 7.0, "learning_rate": 4.6502451718228974e-05, "loss": 2.3803, "step": 2417500 }, { "epoch": 7.0, "learning_rate": 4.65017280705817e-05, "loss": 2.3814, "step": 2418000 }, { "epoch": 7.0, "eval_accuracy": 0.6437288418940945, "eval_accuracy_mlm": 0.6057274748989588, "eval_accuracy_nsp": 0.847426361065662, "eval_loss": 2.347137928009033, "eval_runtime": 330.4298, "eval_samples_per_second": 1320.662, "eval_steps_per_second": 55.028, "step": 2418304 }, { "epoch": 7.0, "learning_rate": 4.6501004422934425e-05, "loss": 2.3617, "step": 2418500 }, { "epoch": 7.0, "learning_rate": 4.650028077528715e-05, "loss": 2.3275, "step": 2419000 }, { "epoch": 7.0, "learning_rate": 4.649955712763987e-05, "loss": 2.3571, "step": 2419500 }, { "epoch": 7.0, "learning_rate": 4.649883347999259e-05, "loss": 2.3534, "step": 2420000 }, { "epoch": 7.01, "learning_rate": 4.6498109832345314e-05, "loss": 2.3498, "step": 2420500 }, { "epoch": 7.01, "learning_rate": 4.6497387631993336e-05, "loss": 2.3502, "step": 2421000 }, { "epoch": 7.01, "learning_rate": 4.649666543164135e-05, "loss": 2.3736, "step": 2421500 }, { "epoch": 7.01, "learning_rate": 4.6495941783994074e-05, "loss": 2.341, "step": 2422000 }, { "epoch": 7.01, "learning_rate": 4.649521958364209e-05, "loss": 2.3681, "step": 2422500 }, { "epoch": 7.01, "learning_rate": 4.649449593599481e-05, "loss": 2.3452, "step": 2423000 }, { "epoch": 7.02, "learning_rate": 4.6493772288347534e-05, "loss": 2.3434, "step": 2423500 }, { "epoch": 7.02, "learning_rate": 4.649304864070026e-05, "loss": 2.3234, "step": 2424000 }, { "epoch": 7.02, "learning_rate": 4.6492324993052985e-05, "loss": 2.3608, "step": 2424500 }, { "epoch": 7.02, "learning_rate": 4.649160134540571e-05, "loss": 2.3633, "step": 2425000 }, { "epoch": 7.02, "learning_rate": 4.649087914505373e-05, "loss": 2.3463, "step": 2425500 }, { "epoch": 7.02, "learning_rate": 4.6490156944701745e-05, "loss": 2.3956, "step": 2426000 }, { "epoch": 7.02, "learning_rate": 4.648943329705447e-05, "loss": 2.339, "step": 2426500 }, { "epoch": 7.03, "learning_rate": 4.648870964940719e-05, "loss": 2.3619, "step": 2427000 }, { "epoch": 7.03, "learning_rate": 4.648798600175991e-05, "loss": 2.3507, "step": 2427500 }, { "epoch": 7.03, "learning_rate": 4.6487262354112634e-05, "loss": 2.3517, "step": 2428000 }, { "epoch": 7.03, "learning_rate": 4.648653870646536e-05, "loss": 2.3389, "step": 2428500 }, { "epoch": 7.03, "learning_rate": 4.6485815058818085e-05, "loss": 2.3511, "step": 2429000 }, { "epoch": 7.03, "learning_rate": 4.648509141117081e-05, "loss": 2.3495, "step": 2429500 }, { "epoch": 7.03, "learning_rate": 4.648436776352353e-05, "loss": 2.3711, "step": 2430000 }, { "epoch": 7.04, "learning_rate": 4.6483645563171546e-05, "loss": 2.3255, "step": 2430500 }, { "epoch": 7.04, "learning_rate": 4.648292191552427e-05, "loss": 2.3315, "step": 2431000 }, { "epoch": 7.04, "learning_rate": 4.648219826787699e-05, "loss": 2.3308, "step": 2431500 }, { "epoch": 7.04, "learning_rate": 4.648147462022971e-05, "loss": 2.3458, "step": 2432000 }, { "epoch": 7.04, "learning_rate": 4.6480750972582434e-05, "loss": 2.3673, "step": 2432500 }, { "epoch": 7.04, "learning_rate": 4.6480027324935163e-05, "loss": 2.351, "step": 2433000 }, { "epoch": 7.04, "learning_rate": 4.6479303677287886e-05, "loss": 2.3423, "step": 2433500 }, { "epoch": 7.05, "learning_rate": 4.647858147693591e-05, "loss": 2.3255, "step": 2434000 }, { "epoch": 7.05, "learning_rate": 4.647785782928863e-05, "loss": 2.3455, "step": 2434500 }, { "epoch": 7.05, "learning_rate": 4.647713418164135e-05, "loss": 2.3473, "step": 2435000 }, { "epoch": 7.05, "learning_rate": 4.6476410533994075e-05, "loss": 2.354, "step": 2435500 }, { "epoch": 7.05, "learning_rate": 4.64756868863468e-05, "loss": 2.3577, "step": 2436000 }, { "epoch": 7.05, "learning_rate": 4.647496323869952e-05, "loss": 2.3581, "step": 2436500 }, { "epoch": 7.05, "learning_rate": 4.647423959105224e-05, "loss": 2.3453, "step": 2437000 }, { "epoch": 7.06, "learning_rate": 4.6473515943404964e-05, "loss": 2.3332, "step": 2437500 }, { "epoch": 7.06, "learning_rate": 4.6472792295757686e-05, "loss": 2.3679, "step": 2438000 }, { "epoch": 7.06, "learning_rate": 4.6472068648110415e-05, "loss": 2.3339, "step": 2438500 }, { "epoch": 7.06, "learning_rate": 4.647134500046314e-05, "loss": 2.3313, "step": 2439000 }, { "epoch": 7.06, "learning_rate": 4.647062135281586e-05, "loss": 2.3372, "step": 2439500 }, { "epoch": 7.06, "learning_rate": 4.646989770516859e-05, "loss": 2.3533, "step": 2440000 }, { "epoch": 7.06, "learning_rate": 4.646917405752131e-05, "loss": 2.3603, "step": 2440500 }, { "epoch": 7.07, "learning_rate": 4.6468451857169326e-05, "loss": 2.3509, "step": 2441000 }, { "epoch": 7.07, "learning_rate": 4.646772820952205e-05, "loss": 2.3333, "step": 2441500 }, { "epoch": 7.07, "learning_rate": 4.646700456187477e-05, "loss": 2.3278, "step": 2442000 }, { "epoch": 7.07, "learning_rate": 4.646628091422749e-05, "loss": 2.3414, "step": 2442500 }, { "epoch": 7.07, "learning_rate": 4.6465557266580215e-05, "loss": 2.3564, "step": 2443000 }, { "epoch": 7.07, "learning_rate": 4.646483361893294e-05, "loss": 2.332, "step": 2443500 }, { "epoch": 7.07, "learning_rate": 4.646411141858096e-05, "loss": 2.3489, "step": 2444000 }, { "epoch": 7.08, "learning_rate": 4.646338777093368e-05, "loss": 2.3592, "step": 2444500 }, { "epoch": 7.08, "learning_rate": 4.6462664123286404e-05, "loss": 2.3361, "step": 2445000 }, { "epoch": 7.08, "learning_rate": 4.6461940475639126e-05, "loss": 2.3452, "step": 2445500 }, { "epoch": 7.08, "learning_rate": 4.6461219722582435e-05, "loss": 2.3349, "step": 2446000 }, { "epoch": 7.08, "learning_rate": 4.6460496074935164e-05, "loss": 2.3356, "step": 2446500 }, { "epoch": 7.08, "learning_rate": 4.6459772427287886e-05, "loss": 2.3417, "step": 2447000 }, { "epoch": 7.08, "learning_rate": 4.645904877964061e-05, "loss": 2.3389, "step": 2447500 }, { "epoch": 7.09, "learning_rate": 4.645832513199334e-05, "loss": 2.3335, "step": 2448000 }, { "epoch": 7.09, "learning_rate": 4.645760148434606e-05, "loss": 2.3567, "step": 2448500 }, { "epoch": 7.09, "learning_rate": 4.645687783669878e-05, "loss": 2.3355, "step": 2449000 }, { "epoch": 7.09, "learning_rate": 4.6456154189051504e-05, "loss": 2.3559, "step": 2449500 }, { "epoch": 7.09, "learning_rate": 4.645543054140423e-05, "loss": 2.3431, "step": 2450000 }, { "epoch": 7.09, "learning_rate": 4.645470834105224e-05, "loss": 2.3426, "step": 2450500 }, { "epoch": 7.09, "learning_rate": 4.6453986140700265e-05, "loss": 2.3429, "step": 2451000 }, { "epoch": 7.1, "learning_rate": 4.645326249305299e-05, "loss": 2.3557, "step": 2451500 }, { "epoch": 7.1, "learning_rate": 4.645253884540571e-05, "loss": 2.3541, "step": 2452000 }, { "epoch": 7.1, "learning_rate": 4.645181519775843e-05, "loss": 2.3844, "step": 2452500 }, { "epoch": 7.1, "learning_rate": 4.6451091550111153e-05, "loss": 2.3363, "step": 2453000 }, { "epoch": 7.1, "learning_rate": 4.6450367902463876e-05, "loss": 2.3213, "step": 2453500 }, { "epoch": 7.1, "learning_rate": 4.644964570211189e-05, "loss": 2.3417, "step": 2454000 }, { "epoch": 7.1, "learning_rate": 4.6448922054464614e-05, "loss": 2.3361, "step": 2454500 }, { "epoch": 7.11, "learning_rate": 4.644819840681734e-05, "loss": 2.346, "step": 2455000 }, { "epoch": 7.11, "learning_rate": 4.6447474759170065e-05, "loss": 2.3373, "step": 2455500 }, { "epoch": 7.11, "learning_rate": 4.6446751111522794e-05, "loss": 2.3616, "step": 2456000 }, { "epoch": 7.11, "learning_rate": 4.644602891117081e-05, "loss": 2.3726, "step": 2456500 }, { "epoch": 7.11, "learning_rate": 4.644530526352353e-05, "loss": 2.3509, "step": 2457000 }, { "epoch": 7.11, "learning_rate": 4.6444581615876254e-05, "loss": 2.3539, "step": 2457500 }, { "epoch": 7.11, "learning_rate": 4.6443857968228976e-05, "loss": 2.362, "step": 2458000 }, { "epoch": 7.12, "learning_rate": 4.64431343205817e-05, "loss": 2.3787, "step": 2458500 }, { "epoch": 7.12, "learning_rate": 4.644241067293442e-05, "loss": 2.3557, "step": 2459000 }, { "epoch": 7.12, "learning_rate": 4.644168702528714e-05, "loss": 2.3194, "step": 2459500 }, { "epoch": 7.12, "learning_rate": 4.6440964824935165e-05, "loss": 2.3507, "step": 2460000 }, { "epoch": 7.12, "learning_rate": 4.644024117728789e-05, "loss": 2.3496, "step": 2460500 }, { "epoch": 7.12, "learning_rate": 4.643951752964061e-05, "loss": 2.356, "step": 2461000 }, { "epoch": 7.13, "learning_rate": 4.643879388199333e-05, "loss": 2.3433, "step": 2461500 }, { "epoch": 7.13, "learning_rate": 4.6438070234346054e-05, "loss": 2.3793, "step": 2462000 }, { "epoch": 7.13, "learning_rate": 4.643734803399407e-05, "loss": 2.3352, "step": 2462500 }, { "epoch": 7.13, "learning_rate": 4.64366243863468e-05, "loss": 2.3514, "step": 2463000 }, { "epoch": 7.13, "learning_rate": 4.643590073869952e-05, "loss": 2.3432, "step": 2463500 }, { "epoch": 7.13, "learning_rate": 4.643517709105224e-05, "loss": 2.3498, "step": 2464000 }, { "epoch": 7.13, "learning_rate": 4.6434453443404965e-05, "loss": 2.3185, "step": 2464500 }, { "epoch": 7.14, "learning_rate": 4.6433729795757694e-05, "loss": 2.3388, "step": 2465000 }, { "epoch": 7.14, "learning_rate": 4.6433006148110416e-05, "loss": 2.3357, "step": 2465500 }, { "epoch": 7.14, "learning_rate": 4.643228394775843e-05, "loss": 2.3422, "step": 2466000 }, { "epoch": 7.14, "learning_rate": 4.6431560300111154e-05, "loss": 2.3547, "step": 2466500 }, { "epoch": 7.14, "learning_rate": 4.6430836652463877e-05, "loss": 2.3599, "step": 2467000 }, { "epoch": 7.14, "learning_rate": 4.64301130048166e-05, "loss": 2.3548, "step": 2467500 }, { "epoch": 7.14, "learning_rate": 4.642938935716932e-05, "loss": 2.3399, "step": 2468000 }, { "epoch": 7.15, "learning_rate": 4.642866570952204e-05, "loss": 2.3574, "step": 2468500 }, { "epoch": 7.15, "learning_rate": 4.6427942061874765e-05, "loss": 2.3333, "step": 2469000 }, { "epoch": 7.15, "learning_rate": 4.6427218414227494e-05, "loss": 2.3678, "step": 2469500 }, { "epoch": 7.15, "learning_rate": 4.642649476658022e-05, "loss": 2.3812, "step": 2470000 }, { "epoch": 7.15, "learning_rate": 4.6425771118932946e-05, "loss": 2.3647, "step": 2470500 }, { "epoch": 7.15, "learning_rate": 4.642504747128567e-05, "loss": 2.3613, "step": 2471000 }, { "epoch": 7.15, "learning_rate": 4.642432382363839e-05, "loss": 2.348, "step": 2471500 }, { "epoch": 7.16, "learning_rate": 4.642360017599111e-05, "loss": 2.3301, "step": 2472000 }, { "epoch": 7.16, "learning_rate": 4.642287797563913e-05, "loss": 2.3526, "step": 2472500 }, { "epoch": 7.16, "learning_rate": 4.6422155775287144e-05, "loss": 2.3515, "step": 2473000 }, { "epoch": 7.16, "learning_rate": 4.6421432127639866e-05, "loss": 2.337, "step": 2473500 }, { "epoch": 7.16, "learning_rate": 4.642070992728789e-05, "loss": 2.3537, "step": 2474000 }, { "epoch": 7.16, "learning_rate": 4.641998627964061e-05, "loss": 2.3568, "step": 2474500 }, { "epoch": 7.16, "learning_rate": 4.641926263199333e-05, "loss": 2.3592, "step": 2475000 }, { "epoch": 7.17, "learning_rate": 4.6418538984346055e-05, "loss": 2.3373, "step": 2475500 }, { "epoch": 7.17, "learning_rate": 4.641781533669878e-05, "loss": 2.3604, "step": 2476000 }, { "epoch": 7.17, "learning_rate": 4.641709313634679e-05, "loss": 2.3479, "step": 2476500 }, { "epoch": 7.17, "learning_rate": 4.6416369488699515e-05, "loss": 2.3627, "step": 2477000 }, { "epoch": 7.17, "learning_rate": 4.6415645841052244e-05, "loss": 2.3427, "step": 2477500 }, { "epoch": 7.17, "learning_rate": 4.6414922193404966e-05, "loss": 2.3537, "step": 2478000 }, { "epoch": 7.17, "learning_rate": 4.6414198545757695e-05, "loss": 2.3499, "step": 2478500 }, { "epoch": 7.18, "learning_rate": 4.641347634540571e-05, "loss": 2.3668, "step": 2479000 }, { "epoch": 7.18, "learning_rate": 4.6412754145053726e-05, "loss": 2.344, "step": 2479500 }, { "epoch": 7.18, "learning_rate": 4.641203049740645e-05, "loss": 2.3437, "step": 2480000 }, { "epoch": 7.18, "learning_rate": 4.641130829705447e-05, "loss": 2.3643, "step": 2480500 }, { "epoch": 7.18, "learning_rate": 4.641058464940719e-05, "loss": 2.3746, "step": 2481000 }, { "epoch": 7.18, "learning_rate": 4.6409861001759915e-05, "loss": 2.351, "step": 2481500 }, { "epoch": 7.18, "learning_rate": 4.640913735411264e-05, "loss": 2.3674, "step": 2482000 }, { "epoch": 7.19, "learning_rate": 4.640841370646536e-05, "loss": 2.3396, "step": 2482500 }, { "epoch": 7.19, "learning_rate": 4.640769005881808e-05, "loss": 2.3609, "step": 2483000 }, { "epoch": 7.19, "learning_rate": 4.6406966411170804e-05, "loss": 2.3633, "step": 2483500 }, { "epoch": 7.19, "learning_rate": 4.6406242763523526e-05, "loss": 2.3468, "step": 2484000 }, { "epoch": 7.19, "learning_rate": 4.640551911587625e-05, "loss": 2.3437, "step": 2484500 }, { "epoch": 7.19, "learning_rate": 4.640479546822897e-05, "loss": 2.3435, "step": 2485000 }, { "epoch": 7.19, "learning_rate": 4.64040718205817e-05, "loss": 2.3693, "step": 2485500 }, { "epoch": 7.2, "learning_rate": 4.640334817293442e-05, "loss": 2.3621, "step": 2486000 }, { "epoch": 7.2, "learning_rate": 4.6402624525287144e-05, "loss": 2.3624, "step": 2486500 }, { "epoch": 7.2, "learning_rate": 4.640190087763987e-05, "loss": 2.3479, "step": 2487000 }, { "epoch": 7.2, "learning_rate": 4.6401177229992596e-05, "loss": 2.3728, "step": 2487500 }, { "epoch": 7.2, "learning_rate": 4.640045358234532e-05, "loss": 2.3557, "step": 2488000 }, { "epoch": 7.2, "learning_rate": 4.639972993469804e-05, "loss": 2.3205, "step": 2488500 }, { "epoch": 7.2, "learning_rate": 4.639900628705076e-05, "loss": 2.3266, "step": 2489000 }, { "epoch": 7.21, "learning_rate": 4.6398282639403484e-05, "loss": 2.3489, "step": 2489500 }, { "epoch": 7.21, "learning_rate": 4.639755899175621e-05, "loss": 2.3343, "step": 2490000 }, { "epoch": 7.21, "learning_rate": 4.639683534410893e-05, "loss": 2.363, "step": 2490500 }, { "epoch": 7.21, "learning_rate": 4.639611169646165e-05, "loss": 2.3585, "step": 2491000 }, { "epoch": 7.21, "learning_rate": 4.6395389496109674e-05, "loss": 2.3358, "step": 2491500 }, { "epoch": 7.21, "learning_rate": 4.6394665848462396e-05, "loss": 2.364, "step": 2492000 }, { "epoch": 7.21, "learning_rate": 4.6393942200815125e-05, "loss": 2.3396, "step": 2492500 }, { "epoch": 7.22, "learning_rate": 4.639322000046314e-05, "loss": 2.3541, "step": 2493000 }, { "epoch": 7.22, "learning_rate": 4.639249635281586e-05, "loss": 2.3636, "step": 2493500 }, { "epoch": 7.22, "learning_rate": 4.6391772705168585e-05, "loss": 2.3486, "step": 2494000 }, { "epoch": 7.22, "learning_rate": 4.639104905752131e-05, "loss": 2.3431, "step": 2494500 }, { "epoch": 7.22, "learning_rate": 4.639032540987403e-05, "loss": 2.3567, "step": 2495000 }, { "epoch": 7.22, "learning_rate": 4.638960176222675e-05, "loss": 2.3753, "step": 2495500 }, { "epoch": 7.22, "learning_rate": 4.6388878114579474e-05, "loss": 2.3596, "step": 2496000 }, { "epoch": 7.23, "learning_rate": 4.6388154466932196e-05, "loss": 2.3525, "step": 2496500 }, { "epoch": 7.23, "learning_rate": 4.638743226658022e-05, "loss": 2.3578, "step": 2497000 }, { "epoch": 7.23, "learning_rate": 4.638670861893294e-05, "loss": 2.3303, "step": 2497500 }, { "epoch": 7.23, "learning_rate": 4.638598497128566e-05, "loss": 2.3599, "step": 2498000 }, { "epoch": 7.23, "learning_rate": 4.6385261323638385e-05, "loss": 2.3431, "step": 2498500 }, { "epoch": 7.23, "learning_rate": 4.638453767599111e-05, "loss": 2.363, "step": 2499000 }, { "epoch": 7.24, "learning_rate": 4.6383814028343836e-05, "loss": 2.3494, "step": 2499500 }, { "epoch": 7.24, "learning_rate": 4.638309182799185e-05, "loss": 2.3642, "step": 2500000 }, { "epoch": 7.24, "learning_rate": 4.6382368180344574e-05, "loss": 2.3491, "step": 2500500 }, { "epoch": 7.24, "learning_rate": 4.6381644532697296e-05, "loss": 2.378, "step": 2501000 }, { "epoch": 7.24, "learning_rate": 4.6380920885050025e-05, "loss": 2.3392, "step": 2501500 }, { "epoch": 7.24, "learning_rate": 4.638019723740275e-05, "loss": 2.3645, "step": 2502000 }, { "epoch": 7.24, "learning_rate": 4.637947358975547e-05, "loss": 2.3661, "step": 2502500 }, { "epoch": 7.25, "learning_rate": 4.637874994210819e-05, "loss": 2.3244, "step": 2503000 }, { "epoch": 7.25, "learning_rate": 4.637802774175621e-05, "loss": 2.3611, "step": 2503500 }, { "epoch": 7.25, "learning_rate": 4.637730409410893e-05, "loss": 2.3544, "step": 2504000 }, { "epoch": 7.25, "learning_rate": 4.637658044646165e-05, "loss": 2.3745, "step": 2504500 }, { "epoch": 7.25, "learning_rate": 4.6375856798814374e-05, "loss": 2.3536, "step": 2505000 }, { "epoch": 7.25, "learning_rate": 4.6375133151167096e-05, "loss": 2.3692, "step": 2505500 }, { "epoch": 7.25, "learning_rate": 4.6374409503519825e-05, "loss": 2.3516, "step": 2506000 }, { "epoch": 7.26, "learning_rate": 4.637368730316784e-05, "loss": 2.3373, "step": 2506500 }, { "epoch": 7.26, "learning_rate": 4.637296365552056e-05, "loss": 2.3402, "step": 2507000 }, { "epoch": 7.26, "learning_rate": 4.637224000787329e-05, "loss": 2.3396, "step": 2507500 }, { "epoch": 7.26, "learning_rate": 4.6371516360226014e-05, "loss": 2.3797, "step": 2508000 }, { "epoch": 7.26, "learning_rate": 4.637079415987403e-05, "loss": 2.3364, "step": 2508500 }, { "epoch": 7.26, "learning_rate": 4.637007051222675e-05, "loss": 2.352, "step": 2509000 }, { "epoch": 7.26, "learning_rate": 4.6369346864579475e-05, "loss": 2.3707, "step": 2509500 }, { "epoch": 7.27, "learning_rate": 4.63686232169322e-05, "loss": 2.3662, "step": 2510000 }, { "epoch": 7.27, "learning_rate": 4.6367899569284926e-05, "loss": 2.325, "step": 2510500 }, { "epoch": 7.27, "learning_rate": 4.636717592163765e-05, "loss": 2.3457, "step": 2511000 }, { "epoch": 7.27, "learning_rate": 4.636645227399037e-05, "loss": 2.3551, "step": 2511500 }, { "epoch": 7.27, "learning_rate": 4.636572862634309e-05, "loss": 2.3441, "step": 2512000 }, { "epoch": 7.27, "learning_rate": 4.63650078732864e-05, "loss": 2.3818, "step": 2512500 }, { "epoch": 7.27, "learning_rate": 4.6364285672934424e-05, "loss": 2.369, "step": 2513000 }, { "epoch": 7.28, "learning_rate": 4.6363562025287146e-05, "loss": 2.3542, "step": 2513500 }, { "epoch": 7.28, "learning_rate": 4.636283837763987e-05, "loss": 2.3436, "step": 2514000 }, { "epoch": 7.28, "learning_rate": 4.636211472999259e-05, "loss": 2.3595, "step": 2514500 }, { "epoch": 7.28, "learning_rate": 4.6361392529640606e-05, "loss": 2.3705, "step": 2515000 }, { "epoch": 7.28, "learning_rate": 4.6360668881993335e-05, "loss": 2.3547, "step": 2515500 }, { "epoch": 7.28, "learning_rate": 4.635994523434606e-05, "loss": 2.3696, "step": 2516000 }, { "epoch": 7.28, "learning_rate": 4.635922158669878e-05, "loss": 2.3602, "step": 2516500 }, { "epoch": 7.29, "learning_rate": 4.63584979390515e-05, "loss": 2.3854, "step": 2517000 }, { "epoch": 7.29, "learning_rate": 4.6357774291404224e-05, "loss": 2.339, "step": 2517500 }, { "epoch": 7.29, "learning_rate": 4.635705064375695e-05, "loss": 2.3619, "step": 2518000 }, { "epoch": 7.29, "learning_rate": 4.635632989070026e-05, "loss": 2.3353, "step": 2518500 }, { "epoch": 7.29, "learning_rate": 4.6355606243052984e-05, "loss": 2.3343, "step": 2519000 }, { "epoch": 7.29, "learning_rate": 4.6354882595405706e-05, "loss": 2.3677, "step": 2519500 }, { "epoch": 7.29, "learning_rate": 4.635415894775843e-05, "loss": 2.3688, "step": 2520000 }, { "epoch": 7.3, "learning_rate": 4.635343674740645e-05, "loss": 2.3618, "step": 2520500 }, { "epoch": 7.3, "learning_rate": 4.635271309975917e-05, "loss": 2.3456, "step": 2521000 }, { "epoch": 7.3, "learning_rate": 4.6351989452111895e-05, "loss": 2.3543, "step": 2521500 }, { "epoch": 7.3, "learning_rate": 4.635126580446462e-05, "loss": 2.3446, "step": 2522000 }, { "epoch": 7.3, "learning_rate": 4.635054215681734e-05, "loss": 2.3505, "step": 2522500 }, { "epoch": 7.3, "learning_rate": 4.634981850917007e-05, "loss": 2.3595, "step": 2523000 }, { "epoch": 7.3, "learning_rate": 4.634909486152279e-05, "loss": 2.3568, "step": 2523500 }, { "epoch": 7.31, "learning_rate": 4.634837121387551e-05, "loss": 2.3351, "step": 2524000 }, { "epoch": 7.31, "learning_rate": 4.6347647566228235e-05, "loss": 2.3362, "step": 2524500 }, { "epoch": 7.31, "learning_rate": 4.634692391858096e-05, "loss": 2.3609, "step": 2525000 }, { "epoch": 7.31, "learning_rate": 4.634620027093368e-05, "loss": 2.3531, "step": 2525500 }, { "epoch": 7.31, "learning_rate": 4.63454766232864e-05, "loss": 2.3646, "step": 2526000 }, { "epoch": 7.31, "learning_rate": 4.6344752975639124e-05, "loss": 2.3555, "step": 2526500 }, { "epoch": 7.31, "learning_rate": 4.634402932799185e-05, "loss": 2.3648, "step": 2527000 }, { "epoch": 7.32, "learning_rate": 4.6343305680344576e-05, "loss": 2.3385, "step": 2527500 }, { "epoch": 7.32, "learning_rate": 4.634258347999259e-05, "loss": 2.3625, "step": 2528000 }, { "epoch": 7.32, "learning_rate": 4.6341859832345313e-05, "loss": 2.3579, "step": 2528500 }, { "epoch": 7.32, "learning_rate": 4.6341136184698036e-05, "loss": 2.3653, "step": 2529000 }, { "epoch": 7.32, "learning_rate": 4.634041543164135e-05, "loss": 2.3596, "step": 2529500 }, { "epoch": 7.32, "learning_rate": 4.6339691783994074e-05, "loss": 2.3424, "step": 2530000 }, { "epoch": 7.32, "learning_rate": 4.63389681363468e-05, "loss": 2.3597, "step": 2530500 }, { "epoch": 7.33, "learning_rate": 4.6338244488699525e-05, "loss": 2.3764, "step": 2531000 }, { "epoch": 7.33, "learning_rate": 4.633752084105225e-05, "loss": 2.3602, "step": 2531500 }, { "epoch": 7.33, "learning_rate": 4.633679719340497e-05, "loss": 2.3381, "step": 2532000 }, { "epoch": 7.33, "learning_rate": 4.633607354575769e-05, "loss": 2.3622, "step": 2532500 }, { "epoch": 7.33, "learning_rate": 4.6335349898110414e-05, "loss": 2.3496, "step": 2533000 }, { "epoch": 7.33, "learning_rate": 4.6334626250463136e-05, "loss": 2.3615, "step": 2533500 }, { "epoch": 7.33, "learning_rate": 4.633390405011115e-05, "loss": 2.3605, "step": 2534000 }, { "epoch": 7.34, "learning_rate": 4.633318040246388e-05, "loss": 2.3645, "step": 2534500 }, { "epoch": 7.34, "learning_rate": 4.63324567548166e-05, "loss": 2.3585, "step": 2535000 }, { "epoch": 7.34, "learning_rate": 4.6331733107169325e-05, "loss": 2.3857, "step": 2535500 }, { "epoch": 7.34, "learning_rate": 4.633100945952205e-05, "loss": 2.3719, "step": 2536000 }, { "epoch": 7.34, "learning_rate": 4.633028581187477e-05, "loss": 2.3456, "step": 2536500 }, { "epoch": 7.34, "learning_rate": 4.632956216422749e-05, "loss": 2.3526, "step": 2537000 }, { "epoch": 7.35, "learning_rate": 4.632883851658022e-05, "loss": 2.3613, "step": 2537500 }, { "epoch": 7.35, "learning_rate": 4.6328116316228236e-05, "loss": 2.3609, "step": 2538000 }, { "epoch": 7.35, "learning_rate": 4.632739266858096e-05, "loss": 2.3528, "step": 2538500 }, { "epoch": 7.35, "learning_rate": 4.632666902093368e-05, "loss": 2.3485, "step": 2539000 }, { "epoch": 7.35, "learning_rate": 4.63259453732864e-05, "loss": 2.3668, "step": 2539500 }, { "epoch": 7.35, "learning_rate": 4.632522172563913e-05, "loss": 2.3493, "step": 2540000 }, { "epoch": 7.35, "learning_rate": 4.6324498077991854e-05, "loss": 2.3602, "step": 2540500 }, { "epoch": 7.36, "learning_rate": 4.6323774430344576e-05, "loss": 2.3492, "step": 2541000 }, { "epoch": 7.36, "learning_rate": 4.63230507826973e-05, "loss": 2.3533, "step": 2541500 }, { "epoch": 7.36, "learning_rate": 4.6322328582345314e-05, "loss": 2.3659, "step": 2542000 }, { "epoch": 7.36, "learning_rate": 4.6321604934698036e-05, "loss": 2.3628, "step": 2542500 }, { "epoch": 7.36, "learning_rate": 4.632088273434605e-05, "loss": 2.3576, "step": 2543000 }, { "epoch": 7.36, "learning_rate": 4.632015908669878e-05, "loss": 2.3605, "step": 2543500 }, { "epoch": 7.36, "learning_rate": 4.63194354390515e-05, "loss": 2.3583, "step": 2544000 }, { "epoch": 7.37, "learning_rate": 4.6318711791404225e-05, "loss": 2.3646, "step": 2544500 }, { "epoch": 7.37, "learning_rate": 4.6317988143756954e-05, "loss": 2.3644, "step": 2545000 }, { "epoch": 7.37, "learning_rate": 4.631726739070026e-05, "loss": 2.3473, "step": 2545500 }, { "epoch": 7.37, "learning_rate": 4.6316543743052986e-05, "loss": 2.3522, "step": 2546000 }, { "epoch": 7.37, "learning_rate": 4.631582009540571e-05, "loss": 2.3436, "step": 2546500 }, { "epoch": 7.37, "learning_rate": 4.631509644775843e-05, "loss": 2.3565, "step": 2547000 }, { "epoch": 7.37, "learning_rate": 4.631437280011115e-05, "loss": 2.3656, "step": 2547500 }, { "epoch": 7.38, "learning_rate": 4.631364915246388e-05, "loss": 2.3649, "step": 2548000 }, { "epoch": 7.38, "learning_rate": 4.6312925504816604e-05, "loss": 2.3572, "step": 2548500 }, { "epoch": 7.38, "learning_rate": 4.6312201857169326e-05, "loss": 2.3387, "step": 2549000 }, { "epoch": 7.38, "learning_rate": 4.631147820952205e-05, "loss": 2.3277, "step": 2549500 }, { "epoch": 7.38, "learning_rate": 4.631075456187477e-05, "loss": 2.3638, "step": 2550000 }, { "epoch": 7.38, "learning_rate": 4.631003091422749e-05, "loss": 2.3626, "step": 2550500 }, { "epoch": 7.38, "learning_rate": 4.6309307266580215e-05, "loss": 2.3574, "step": 2551000 }, { "epoch": 7.39, "learning_rate": 4.630858361893294e-05, "loss": 2.3319, "step": 2551500 }, { "epoch": 7.39, "learning_rate": 4.630785997128566e-05, "loss": 2.336, "step": 2552000 }, { "epoch": 7.39, "learning_rate": 4.630713632363839e-05, "loss": 2.3317, "step": 2552500 }, { "epoch": 7.39, "learning_rate": 4.630641267599111e-05, "loss": 2.3679, "step": 2553000 }, { "epoch": 7.39, "learning_rate": 4.630568902834383e-05, "loss": 2.3666, "step": 2553500 }, { "epoch": 7.39, "learning_rate": 4.630496827528715e-05, "loss": 2.3392, "step": 2554000 }, { "epoch": 7.39, "learning_rate": 4.630424462763987e-05, "loss": 2.3525, "step": 2554500 }, { "epoch": 7.4, "learning_rate": 4.630352097999259e-05, "loss": 2.3563, "step": 2555000 }, { "epoch": 7.4, "learning_rate": 4.6302797332345315e-05, "loss": 2.3637, "step": 2555500 }, { "epoch": 7.4, "learning_rate": 4.630207368469804e-05, "loss": 2.372, "step": 2556000 }, { "epoch": 7.4, "learning_rate": 4.630135148434605e-05, "loss": 2.3515, "step": 2556500 }, { "epoch": 7.4, "learning_rate": 4.630062783669878e-05, "loss": 2.3431, "step": 2557000 }, { "epoch": 7.4, "learning_rate": 4.6299904189051504e-05, "loss": 2.3023, "step": 2557500 }, { "epoch": 7.4, "learning_rate": 4.6299180541404226e-05, "loss": 2.3363, "step": 2558000 }, { "epoch": 7.41, "learning_rate": 4.6298459788347535e-05, "loss": 2.3365, "step": 2558500 }, { "epoch": 7.41, "learning_rate": 4.629773614070026e-05, "loss": 2.3579, "step": 2559000 }, { "epoch": 7.41, "learning_rate": 4.629701249305298e-05, "loss": 2.3861, "step": 2559500 }, { "epoch": 7.41, "learning_rate": 4.6296290292701e-05, "loss": 2.3868, "step": 2560000 }, { "epoch": 7.41, "learning_rate": 4.629556664505373e-05, "loss": 2.3673, "step": 2560500 }, { "epoch": 7.41, "learning_rate": 4.629484299740645e-05, "loss": 2.3512, "step": 2561000 }, { "epoch": 7.41, "learning_rate": 4.6294119349759175e-05, "loss": 2.3552, "step": 2561500 }, { "epoch": 7.42, "learning_rate": 4.629339714940719e-05, "loss": 2.3493, "step": 2562000 }, { "epoch": 7.42, "learning_rate": 4.629267350175991e-05, "loss": 2.3507, "step": 2562500 }, { "epoch": 7.42, "learning_rate": 4.6291949854112635e-05, "loss": 2.3867, "step": 2563000 }, { "epoch": 7.42, "learning_rate": 4.629122620646536e-05, "loss": 2.3453, "step": 2563500 }, { "epoch": 7.42, "learning_rate": 4.629050400611338e-05, "loss": 2.338, "step": 2564000 }, { "epoch": 7.42, "learning_rate": 4.62897803584661e-05, "loss": 2.3344, "step": 2564500 }, { "epoch": 7.42, "learning_rate": 4.6289056710818824e-05, "loss": 2.3704, "step": 2565000 }, { "epoch": 7.43, "learning_rate": 4.628833306317155e-05, "loss": 2.3734, "step": 2565500 }, { "epoch": 7.43, "learning_rate": 4.628760941552427e-05, "loss": 2.3448, "step": 2566000 }, { "epoch": 7.43, "learning_rate": 4.628688576787699e-05, "loss": 2.3515, "step": 2566500 }, { "epoch": 7.43, "learning_rate": 4.6286162120229713e-05, "loss": 2.3849, "step": 2567000 }, { "epoch": 7.43, "learning_rate": 4.6285438472582436e-05, "loss": 2.3378, "step": 2567500 }, { "epoch": 7.43, "learning_rate": 4.6284714824935165e-05, "loss": 2.3894, "step": 2568000 }, { "epoch": 7.43, "learning_rate": 4.628399117728789e-05, "loss": 2.3624, "step": 2568500 }, { "epoch": 7.44, "learning_rate": 4.628326752964061e-05, "loss": 2.3398, "step": 2569000 }, { "epoch": 7.44, "learning_rate": 4.628254388199333e-05, "loss": 2.3612, "step": 2569500 }, { "epoch": 7.44, "learning_rate": 4.628182023434606e-05, "loss": 2.3612, "step": 2570000 }, { "epoch": 7.44, "learning_rate": 4.628109658669878e-05, "loss": 2.3718, "step": 2570500 }, { "epoch": 7.44, "learning_rate": 4.6280372939051505e-05, "loss": 2.3732, "step": 2571000 }, { "epoch": 7.44, "learning_rate": 4.627964929140423e-05, "loss": 2.3562, "step": 2571500 }, { "epoch": 7.44, "learning_rate": 4.627892564375695e-05, "loss": 2.3543, "step": 2572000 }, { "epoch": 7.45, "learning_rate": 4.627820199610967e-05, "loss": 2.3401, "step": 2572500 }, { "epoch": 7.45, "learning_rate": 4.6277478348462394e-05, "loss": 2.3581, "step": 2573000 }, { "epoch": 7.45, "learning_rate": 4.6276754700815116e-05, "loss": 2.3535, "step": 2573500 }, { "epoch": 7.45, "learning_rate": 4.627603105316784e-05, "loss": 2.3635, "step": 2574000 }, { "epoch": 7.45, "learning_rate": 4.627530740552056e-05, "loss": 2.3792, "step": 2574500 }, { "epoch": 7.45, "learning_rate": 4.627458665246388e-05, "loss": 2.311, "step": 2575000 }, { "epoch": 7.46, "learning_rate": 4.6273863004816605e-05, "loss": 2.3701, "step": 2575500 }, { "epoch": 7.46, "learning_rate": 4.627313935716933e-05, "loss": 2.3654, "step": 2576000 }, { "epoch": 7.46, "learning_rate": 4.627241570952205e-05, "loss": 2.36, "step": 2576500 }, { "epoch": 7.46, "learning_rate": 4.627169206187477e-05, "loss": 2.3579, "step": 2577000 }, { "epoch": 7.46, "learning_rate": 4.6270968414227494e-05, "loss": 2.3577, "step": 2577500 }, { "epoch": 7.46, "learning_rate": 4.627024766117081e-05, "loss": 2.3675, "step": 2578000 }, { "epoch": 7.46, "learning_rate": 4.626952401352353e-05, "loss": 2.3542, "step": 2578500 }, { "epoch": 7.47, "learning_rate": 4.6268800365876254e-05, "loss": 2.3685, "step": 2579000 }, { "epoch": 7.47, "learning_rate": 4.6268076718228976e-05, "loss": 2.371, "step": 2579500 }, { "epoch": 7.47, "learning_rate": 4.626735451787699e-05, "loss": 2.3349, "step": 2580000 }, { "epoch": 7.47, "learning_rate": 4.6266630870229714e-05, "loss": 2.3445, "step": 2580500 }, { "epoch": 7.47, "learning_rate": 4.6265907222582436e-05, "loss": 2.3505, "step": 2581000 }, { "epoch": 7.47, "learning_rate": 4.626518357493516e-05, "loss": 2.3805, "step": 2581500 }, { "epoch": 7.47, "learning_rate": 4.626445992728788e-05, "loss": 2.3327, "step": 2582000 }, { "epoch": 7.48, "learning_rate": 4.626373627964061e-05, "loss": 2.3523, "step": 2582500 }, { "epoch": 7.48, "learning_rate": 4.626301263199333e-05, "loss": 2.3691, "step": 2583000 }, { "epoch": 7.48, "learning_rate": 4.626228898434606e-05, "loss": 2.3657, "step": 2583500 }, { "epoch": 7.48, "learning_rate": 4.626156533669878e-05, "loss": 2.3459, "step": 2584000 }, { "epoch": 7.48, "learning_rate": 4.6260841689051506e-05, "loss": 2.3313, "step": 2584500 }, { "epoch": 7.48, "learning_rate": 4.626011804140423e-05, "loss": 2.3457, "step": 2585000 }, { "epoch": 7.48, "learning_rate": 4.625939439375695e-05, "loss": 2.3611, "step": 2585500 }, { "epoch": 7.49, "learning_rate": 4.625867074610967e-05, "loss": 2.3557, "step": 2586000 }, { "epoch": 7.49, "learning_rate": 4.6257947098462395e-05, "loss": 2.3558, "step": 2586500 }, { "epoch": 7.49, "learning_rate": 4.625722345081512e-05, "loss": 2.3577, "step": 2587000 }, { "epoch": 7.49, "learning_rate": 4.625649980316784e-05, "loss": 2.3583, "step": 2587500 }, { "epoch": 7.49, "learning_rate": 4.625577615552056e-05, "loss": 2.3512, "step": 2588000 }, { "epoch": 7.49, "learning_rate": 4.6255052507873283e-05, "loss": 2.3685, "step": 2588500 }, { "epoch": 7.49, "learning_rate": 4.6254330307521306e-05, "loss": 2.3606, "step": 2589000 }, { "epoch": 7.5, "learning_rate": 4.625360665987403e-05, "loss": 2.3374, "step": 2589500 }, { "epoch": 7.5, "learning_rate": 4.625288301222676e-05, "loss": 2.342, "step": 2590000 }, { "epoch": 7.5, "learning_rate": 4.625215936457948e-05, "loss": 2.3855, "step": 2590500 }, { "epoch": 7.5, "learning_rate": 4.6251437164227495e-05, "loss": 2.3399, "step": 2591000 }, { "epoch": 7.5, "learning_rate": 4.625071351658022e-05, "loss": 2.3498, "step": 2591500 }, { "epoch": 7.5, "learning_rate": 4.624999131622824e-05, "loss": 2.3479, "step": 2592000 }, { "epoch": 7.5, "learning_rate": 4.624926766858096e-05, "loss": 2.3396, "step": 2592500 }, { "epoch": 7.51, "learning_rate": 4.6248544020933684e-05, "loss": 2.3527, "step": 2593000 }, { "epoch": 7.51, "learning_rate": 4.6247820373286406e-05, "loss": 2.3679, "step": 2593500 }, { "epoch": 7.51, "learning_rate": 4.624709817293442e-05, "loss": 2.35, "step": 2594000 }, { "epoch": 7.51, "learning_rate": 4.6246374525287144e-05, "loss": 2.3675, "step": 2594500 }, { "epoch": 7.51, "learning_rate": 4.6245650877639866e-05, "loss": 2.3601, "step": 2595000 }, { "epoch": 7.51, "learning_rate": 4.624492722999259e-05, "loss": 2.3604, "step": 2595500 }, { "epoch": 7.51, "learning_rate": 4.624420358234531e-05, "loss": 2.3697, "step": 2596000 }, { "epoch": 7.52, "learning_rate": 4.624347993469804e-05, "loss": 2.3497, "step": 2596500 }, { "epoch": 7.52, "learning_rate": 4.6242757734346055e-05, "loss": 2.3393, "step": 2597000 }, { "epoch": 7.52, "learning_rate": 4.6242034086698784e-05, "loss": 2.3864, "step": 2597500 }, { "epoch": 7.52, "learning_rate": 4.6241310439051506e-05, "loss": 2.3298, "step": 2598000 }, { "epoch": 7.52, "learning_rate": 4.624058679140423e-05, "loss": 2.3432, "step": 2598500 }, { "epoch": 7.52, "learning_rate": 4.623986314375695e-05, "loss": 2.3172, "step": 2599000 }, { "epoch": 7.52, "learning_rate": 4.623913949610967e-05, "loss": 2.3599, "step": 2599500 }, { "epoch": 7.53, "learning_rate": 4.623841729575769e-05, "loss": 2.36, "step": 2600000 }, { "epoch": 7.53, "learning_rate": 4.623769364811041e-05, "loss": 2.3511, "step": 2600500 }, { "epoch": 7.53, "learning_rate": 4.623697000046314e-05, "loss": 2.3628, "step": 2601000 }, { "epoch": 7.53, "learning_rate": 4.6236247800111155e-05, "loss": 2.3583, "step": 2601500 }, { "epoch": 7.53, "learning_rate": 4.623552415246388e-05, "loss": 2.3397, "step": 2602000 }, { "epoch": 7.53, "learning_rate": 4.62348005048166e-05, "loss": 2.3579, "step": 2602500 }, { "epoch": 7.53, "learning_rate": 4.623407685716932e-05, "loss": 2.3408, "step": 2603000 }, { "epoch": 7.54, "learning_rate": 4.6233353209522044e-05, "loss": 2.3598, "step": 2603500 }, { "epoch": 7.54, "learning_rate": 4.623262956187477e-05, "loss": 2.3696, "step": 2604000 }, { "epoch": 7.54, "learning_rate": 4.623190591422749e-05, "loss": 2.3487, "step": 2604500 }, { "epoch": 7.54, "learning_rate": 4.623118226658022e-05, "loss": 2.3747, "step": 2605000 }, { "epoch": 7.54, "learning_rate": 4.623045861893294e-05, "loss": 2.3399, "step": 2605500 }, { "epoch": 7.54, "learning_rate": 4.622973497128566e-05, "loss": 2.3666, "step": 2606000 }, { "epoch": 7.54, "learning_rate": 4.6229012770933685e-05, "loss": 2.3666, "step": 2606500 }, { "epoch": 7.55, "learning_rate": 4.622828912328641e-05, "loss": 2.3533, "step": 2607000 }, { "epoch": 7.55, "learning_rate": 4.622756547563913e-05, "loss": 2.337, "step": 2607500 }, { "epoch": 7.55, "learning_rate": 4.622684182799185e-05, "loss": 2.3649, "step": 2608000 }, { "epoch": 7.55, "learning_rate": 4.6226118180344574e-05, "loss": 2.3452, "step": 2608500 }, { "epoch": 7.55, "learning_rate": 4.6225394532697296e-05, "loss": 2.3787, "step": 2609000 }, { "epoch": 7.55, "learning_rate": 4.622467088505002e-05, "loss": 2.3546, "step": 2609500 }, { "epoch": 7.55, "learning_rate": 4.622394723740274e-05, "loss": 2.3598, "step": 2610000 }, { "epoch": 7.56, "learning_rate": 4.622322358975546e-05, "loss": 2.3308, "step": 2610500 }, { "epoch": 7.56, "learning_rate": 4.6222501389403485e-05, "loss": 2.3515, "step": 2611000 }, { "epoch": 7.56, "learning_rate": 4.622177774175621e-05, "loss": 2.3634, "step": 2611500 }, { "epoch": 7.56, "learning_rate": 4.622105409410893e-05, "loss": 2.3631, "step": 2612000 }, { "epoch": 7.56, "learning_rate": 4.622033044646166e-05, "loss": 2.3534, "step": 2612500 }, { "epoch": 7.56, "learning_rate": 4.621960679881438e-05, "loss": 2.3751, "step": 2613000 }, { "epoch": 7.57, "learning_rate": 4.62188831511671e-05, "loss": 2.3645, "step": 2613500 }, { "epoch": 7.57, "learning_rate": 4.621816095081512e-05, "loss": 2.3473, "step": 2614000 }, { "epoch": 7.57, "learning_rate": 4.621743730316784e-05, "loss": 2.3684, "step": 2614500 }, { "epoch": 7.57, "learning_rate": 4.621671365552056e-05, "loss": 2.3618, "step": 2615000 }, { "epoch": 7.57, "learning_rate": 4.621599000787329e-05, "loss": 2.3846, "step": 2615500 }, { "epoch": 7.57, "learning_rate": 4.621526780752131e-05, "loss": 2.3591, "step": 2616000 }, { "epoch": 7.57, "learning_rate": 4.621454415987403e-05, "loss": 2.3563, "step": 2616500 }, { "epoch": 7.58, "learning_rate": 4.621382051222675e-05, "loss": 2.3491, "step": 2617000 }, { "epoch": 7.58, "learning_rate": 4.6213096864579474e-05, "loss": 2.3458, "step": 2617500 }, { "epoch": 7.58, "learning_rate": 4.6212373216932196e-05, "loss": 2.3561, "step": 2618000 }, { "epoch": 7.58, "learning_rate": 4.621164956928492e-05, "loss": 2.3699, "step": 2618500 }, { "epoch": 7.58, "learning_rate": 4.621092592163764e-05, "loss": 2.3664, "step": 2619000 }, { "epoch": 7.58, "learning_rate": 4.621020227399037e-05, "loss": 2.3589, "step": 2619500 }, { "epoch": 7.58, "learning_rate": 4.620948007363839e-05, "loss": 2.3745, "step": 2620000 }, { "epoch": 7.59, "learning_rate": 4.6208756425991114e-05, "loss": 2.3698, "step": 2620500 }, { "epoch": 7.59, "learning_rate": 4.6208032778343837e-05, "loss": 2.3862, "step": 2621000 }, { "epoch": 7.59, "learning_rate": 4.620730913069656e-05, "loss": 2.339, "step": 2621500 }, { "epoch": 7.59, "learning_rate": 4.6206586930344574e-05, "loss": 2.3516, "step": 2622000 }, { "epoch": 7.59, "learning_rate": 4.62058632826973e-05, "loss": 2.3625, "step": 2622500 }, { "epoch": 7.59, "learning_rate": 4.620514108234532e-05, "loss": 2.3607, "step": 2623000 }, { "epoch": 7.59, "learning_rate": 4.620441743469804e-05, "loss": 2.3586, "step": 2623500 }, { "epoch": 7.6, "learning_rate": 4.6203693787050763e-05, "loss": 2.324, "step": 2624000 }, { "epoch": 7.6, "learning_rate": 4.6202970139403486e-05, "loss": 2.3377, "step": 2624500 }, { "epoch": 7.6, "learning_rate": 4.62022479390515e-05, "loss": 2.3847, "step": 2625000 }, { "epoch": 7.6, "learning_rate": 4.6201524291404223e-05, "loss": 2.3679, "step": 2625500 }, { "epoch": 7.6, "learning_rate": 4.6200800643756946e-05, "loss": 2.3511, "step": 2626000 }, { "epoch": 7.6, "learning_rate": 4.620007699610967e-05, "loss": 2.3596, "step": 2626500 }, { "epoch": 7.6, "learning_rate": 4.619935334846239e-05, "loss": 2.3396, "step": 2627000 }, { "epoch": 7.61, "learning_rate": 4.619862970081512e-05, "loss": 2.3775, "step": 2627500 }, { "epoch": 7.61, "learning_rate": 4.619790750046314e-05, "loss": 2.3437, "step": 2628000 }, { "epoch": 7.61, "learning_rate": 4.6197183852815864e-05, "loss": 2.3413, "step": 2628500 }, { "epoch": 7.61, "learning_rate": 4.6196460205168586e-05, "loss": 2.3617, "step": 2629000 }, { "epoch": 7.61, "learning_rate": 4.61957380048166e-05, "loss": 2.3689, "step": 2629500 }, { "epoch": 7.61, "learning_rate": 4.6195014357169324e-05, "loss": 2.345, "step": 2630000 }, { "epoch": 7.61, "learning_rate": 4.6194290709522046e-05, "loss": 2.3699, "step": 2630500 }, { "epoch": 7.62, "learning_rate": 4.619356706187477e-05, "loss": 2.3584, "step": 2631000 }, { "epoch": 7.62, "learning_rate": 4.619284341422749e-05, "loss": 2.3628, "step": 2631500 }, { "epoch": 7.62, "learning_rate": 4.619211976658022e-05, "loss": 2.3551, "step": 2632000 }, { "epoch": 7.62, "learning_rate": 4.619139611893294e-05, "loss": 2.3636, "step": 2632500 }, { "epoch": 7.62, "learning_rate": 4.6190672471285664e-05, "loss": 2.3261, "step": 2633000 }, { "epoch": 7.62, "learning_rate": 4.6189948823638386e-05, "loss": 2.3729, "step": 2633500 }, { "epoch": 7.62, "learning_rate": 4.618922517599111e-05, "loss": 2.353, "step": 2634000 }, { "epoch": 7.63, "learning_rate": 4.618850152834384e-05, "loss": 2.3736, "step": 2634500 }, { "epoch": 7.63, "learning_rate": 4.618777788069656e-05, "loss": 2.3775, "step": 2635000 }, { "epoch": 7.63, "learning_rate": 4.6187055680344575e-05, "loss": 2.347, "step": 2635500 }, { "epoch": 7.63, "learning_rate": 4.61863320326973e-05, "loss": 2.333, "step": 2636000 }, { "epoch": 7.63, "learning_rate": 4.618560838505002e-05, "loss": 2.3365, "step": 2636500 }, { "epoch": 7.63, "learning_rate": 4.618488473740274e-05, "loss": 2.369, "step": 2637000 }, { "epoch": 7.63, "learning_rate": 4.618416108975547e-05, "loss": 2.3537, "step": 2637500 }, { "epoch": 7.64, "learning_rate": 4.6183438889403486e-05, "loss": 2.3695, "step": 2638000 }, { "epoch": 7.64, "learning_rate": 4.618271524175621e-05, "loss": 2.3579, "step": 2638500 }, { "epoch": 7.64, "learning_rate": 4.618199159410893e-05, "loss": 2.3611, "step": 2639000 }, { "epoch": 7.64, "learning_rate": 4.618126794646165e-05, "loss": 2.3474, "step": 2639500 }, { "epoch": 7.64, "learning_rate": 4.6180544298814375e-05, "loss": 2.3261, "step": 2640000 }, { "epoch": 7.64, "learning_rate": 4.61798206511671e-05, "loss": 2.3707, "step": 2640500 }, { "epoch": 7.64, "learning_rate": 4.617909700351982e-05, "loss": 2.3605, "step": 2641000 }, { "epoch": 7.65, "learning_rate": 4.617837335587254e-05, "loss": 2.341, "step": 2641500 }, { "epoch": 7.65, "learning_rate": 4.6177651155520564e-05, "loss": 2.3526, "step": 2642000 }, { "epoch": 7.65, "learning_rate": 4.617692895516859e-05, "loss": 2.342, "step": 2642500 }, { "epoch": 7.65, "learning_rate": 4.617620530752131e-05, "loss": 2.3708, "step": 2643000 }, { "epoch": 7.65, "learning_rate": 4.6175483107169325e-05, "loss": 2.367, "step": 2643500 }, { "epoch": 7.65, "learning_rate": 4.617475945952205e-05, "loss": 2.3547, "step": 2644000 }, { "epoch": 7.65, "learning_rate": 4.617403581187477e-05, "loss": 2.3439, "step": 2644500 }, { "epoch": 7.66, "learning_rate": 4.617331216422749e-05, "loss": 2.3368, "step": 2645000 }, { "epoch": 7.66, "learning_rate": 4.617258851658022e-05, "loss": 2.367, "step": 2645500 }, { "epoch": 7.66, "learning_rate": 4.617186486893294e-05, "loss": 2.3653, "step": 2646000 }, { "epoch": 7.66, "learning_rate": 4.6171141221285665e-05, "loss": 2.3556, "step": 2646500 }, { "epoch": 7.66, "learning_rate": 4.617041757363839e-05, "loss": 2.3271, "step": 2647000 }, { "epoch": 7.66, "learning_rate": 4.616969392599111e-05, "loss": 2.3565, "step": 2647500 }, { "epoch": 7.66, "learning_rate": 4.6168971725639125e-05, "loss": 2.3483, "step": 2648000 }, { "epoch": 7.67, "learning_rate": 4.616824807799185e-05, "loss": 2.3775, "step": 2648500 }, { "epoch": 7.67, "learning_rate": 4.616752443034457e-05, "loss": 2.3587, "step": 2649000 }, { "epoch": 7.67, "learning_rate": 4.616680222999259e-05, "loss": 2.3652, "step": 2649500 }, { "epoch": 7.67, "learning_rate": 4.616607858234532e-05, "loss": 2.346, "step": 2650000 }, { "epoch": 7.67, "learning_rate": 4.616535493469804e-05, "loss": 2.3474, "step": 2650500 }, { "epoch": 7.67, "learning_rate": 4.6164631287050765e-05, "loss": 2.3592, "step": 2651000 }, { "epoch": 7.68, "learning_rate": 4.616390763940349e-05, "loss": 2.3615, "step": 2651500 }, { "epoch": 7.68, "learning_rate": 4.616318399175621e-05, "loss": 2.3623, "step": 2652000 }, { "epoch": 7.68, "learning_rate": 4.616246034410893e-05, "loss": 2.3663, "step": 2652500 }, { "epoch": 7.68, "learning_rate": 4.6161736696461654e-05, "loss": 2.3641, "step": 2653000 }, { "epoch": 7.68, "learning_rate": 4.6161013048814376e-05, "loss": 2.3406, "step": 2653500 }, { "epoch": 7.68, "learning_rate": 4.61602908484624e-05, "loss": 2.3559, "step": 2654000 }, { "epoch": 7.68, "learning_rate": 4.6159568648110414e-05, "loss": 2.3717, "step": 2654500 }, { "epoch": 7.69, "learning_rate": 4.6158845000463136e-05, "loss": 2.3652, "step": 2655000 }, { "epoch": 7.69, "learning_rate": 4.615812135281586e-05, "loss": 2.3654, "step": 2655500 }, { "epoch": 7.69, "learning_rate": 4.615739770516858e-05, "loss": 2.3393, "step": 2656000 }, { "epoch": 7.69, "learning_rate": 4.61566740575213e-05, "loss": 2.3584, "step": 2656500 }, { "epoch": 7.69, "learning_rate": 4.6155950409874025e-05, "loss": 2.3633, "step": 2657000 }, { "epoch": 7.69, "learning_rate": 4.6155226762226754e-05, "loss": 2.3588, "step": 2657500 }, { "epoch": 7.69, "learning_rate": 4.6154503114579476e-05, "loss": 2.3493, "step": 2658000 }, { "epoch": 7.7, "learning_rate": 4.61537794669322e-05, "loss": 2.3288, "step": 2658500 }, { "epoch": 7.7, "learning_rate": 4.615305581928492e-05, "loss": 2.3494, "step": 2659000 }, { "epoch": 7.7, "learning_rate": 4.615233217163765e-05, "loss": 2.3584, "step": 2659500 }, { "epoch": 7.7, "learning_rate": 4.615160852399037e-05, "loss": 2.3461, "step": 2660000 }, { "epoch": 7.7, "learning_rate": 4.615088632363839e-05, "loss": 2.3696, "step": 2660500 }, { "epoch": 7.7, "learning_rate": 4.615016267599111e-05, "loss": 2.3635, "step": 2661000 }, { "epoch": 7.7, "learning_rate": 4.614943902834383e-05, "loss": 2.3641, "step": 2661500 }, { "epoch": 7.71, "learning_rate": 4.614871682799185e-05, "loss": 2.3615, "step": 2662000 }, { "epoch": 7.71, "learning_rate": 4.614799318034457e-05, "loss": 2.3623, "step": 2662500 }, { "epoch": 7.71, "learning_rate": 4.6147272427287886e-05, "loss": 2.377, "step": 2663000 }, { "epoch": 7.71, "learning_rate": 4.614654877964061e-05, "loss": 2.385, "step": 2663500 }, { "epoch": 7.71, "learning_rate": 4.614582513199333e-05, "loss": 2.3702, "step": 2664000 }, { "epoch": 7.71, "learning_rate": 4.614510148434605e-05, "loss": 2.363, "step": 2664500 }, { "epoch": 7.71, "learning_rate": 4.614437783669878e-05, "loss": 2.3471, "step": 2665000 }, { "epoch": 7.72, "learning_rate": 4.6143654189051504e-05, "loss": 2.3523, "step": 2665500 }, { "epoch": 7.72, "learning_rate": 4.6142930541404226e-05, "loss": 2.3534, "step": 2666000 }, { "epoch": 7.72, "learning_rate": 4.614220689375695e-05, "loss": 2.3528, "step": 2666500 }, { "epoch": 7.72, "learning_rate": 4.614148324610967e-05, "loss": 2.3619, "step": 2667000 }, { "epoch": 7.72, "learning_rate": 4.61407595984624e-05, "loss": 2.3474, "step": 2667500 }, { "epoch": 7.72, "learning_rate": 4.614003595081512e-05, "loss": 2.3698, "step": 2668000 }, { "epoch": 7.72, "learning_rate": 4.6139312303167844e-05, "loss": 2.3653, "step": 2668500 }, { "epoch": 7.73, "learning_rate": 4.613859010281586e-05, "loss": 2.339, "step": 2669000 }, { "epoch": 7.73, "learning_rate": 4.613786645516858e-05, "loss": 2.3729, "step": 2669500 }, { "epoch": 7.73, "learning_rate": 4.6137142807521304e-05, "loss": 2.3822, "step": 2670000 }, { "epoch": 7.73, "learning_rate": 4.6136420607169326e-05, "loss": 2.3594, "step": 2670500 }, { "epoch": 7.73, "learning_rate": 4.613569695952205e-05, "loss": 2.3533, "step": 2671000 }, { "epoch": 7.73, "learning_rate": 4.613497331187477e-05, "loss": 2.3666, "step": 2671500 }, { "epoch": 7.73, "learning_rate": 4.613424966422749e-05, "loss": 2.3673, "step": 2672000 }, { "epoch": 7.74, "learning_rate": 4.613352601658022e-05, "loss": 2.3489, "step": 2672500 }, { "epoch": 7.74, "learning_rate": 4.6132802368932944e-05, "loss": 2.3566, "step": 2673000 }, { "epoch": 7.74, "learning_rate": 4.6132078721285666e-05, "loss": 2.3514, "step": 2673500 }, { "epoch": 7.74, "learning_rate": 4.613135507363839e-05, "loss": 2.342, "step": 2674000 }, { "epoch": 7.74, "learning_rate": 4.613063142599111e-05, "loss": 2.377, "step": 2674500 }, { "epoch": 7.74, "learning_rate": 4.612990777834383e-05, "loss": 2.3566, "step": 2675000 }, { "epoch": 7.74, "learning_rate": 4.612918557799185e-05, "loss": 2.3468, "step": 2675500 }, { "epoch": 7.75, "learning_rate": 4.612846193034458e-05, "loss": 2.3564, "step": 2676000 }, { "epoch": 7.75, "learning_rate": 4.61277382826973e-05, "loss": 2.3468, "step": 2676500 }, { "epoch": 7.75, "learning_rate": 4.612701463505002e-05, "loss": 2.3486, "step": 2677000 }, { "epoch": 7.75, "learning_rate": 4.6126290987402744e-05, "loss": 2.3636, "step": 2677500 }, { "epoch": 7.75, "learning_rate": 4.6125567339755467e-05, "loss": 2.3648, "step": 2678000 }, { "epoch": 7.75, "learning_rate": 4.6124848033994075e-05, "loss": 2.3866, "step": 2678500 }, { "epoch": 7.75, "learning_rate": 4.61241243863468e-05, "loss": 2.3732, "step": 2679000 }, { "epoch": 7.76, "learning_rate": 4.612340073869952e-05, "loss": 2.3582, "step": 2679500 }, { "epoch": 7.76, "learning_rate": 4.6122678538347536e-05, "loss": 2.3678, "step": 2680000 }, { "epoch": 7.76, "learning_rate": 4.6121954890700265e-05, "loss": 2.3706, "step": 2680500 }, { "epoch": 7.76, "learning_rate": 4.612123124305299e-05, "loss": 2.3745, "step": 2681000 }, { "epoch": 7.76, "learning_rate": 4.612050759540571e-05, "loss": 2.3494, "step": 2681500 }, { "epoch": 7.76, "learning_rate": 4.611978394775843e-05, "loss": 2.3489, "step": 2682000 }, { "epoch": 7.76, "learning_rate": 4.6119060300111153e-05, "loss": 2.3222, "step": 2682500 }, { "epoch": 7.77, "learning_rate": 4.6118336652463876e-05, "loss": 2.3655, "step": 2683000 }, { "epoch": 7.77, "learning_rate": 4.61176130048166e-05, "loss": 2.3891, "step": 2683500 }, { "epoch": 7.77, "learning_rate": 4.611688935716933e-05, "loss": 2.3574, "step": 2684000 }, { "epoch": 7.77, "learning_rate": 4.611616570952205e-05, "loss": 2.3533, "step": 2684500 }, { "epoch": 7.77, "learning_rate": 4.611544206187477e-05, "loss": 2.3373, "step": 2685000 }, { "epoch": 7.77, "learning_rate": 4.6114718414227494e-05, "loss": 2.344, "step": 2685500 }, { "epoch": 7.77, "learning_rate": 4.6113994766580216e-05, "loss": 2.3476, "step": 2686000 }, { "epoch": 7.78, "learning_rate": 4.611327111893294e-05, "loss": 2.3288, "step": 2686500 }, { "epoch": 7.78, "learning_rate": 4.611254747128566e-05, "loss": 2.357, "step": 2687000 }, { "epoch": 7.78, "learning_rate": 4.611182382363839e-05, "loss": 2.3556, "step": 2687500 }, { "epoch": 7.78, "learning_rate": 4.611110017599111e-05, "loss": 2.3624, "step": 2688000 }, { "epoch": 7.78, "learning_rate": 4.6110376528343834e-05, "loss": 2.368, "step": 2688500 }, { "epoch": 7.78, "learning_rate": 4.6109652880696556e-05, "loss": 2.3641, "step": 2689000 }, { "epoch": 7.79, "learning_rate": 4.610893068034458e-05, "loss": 2.364, "step": 2689500 }, { "epoch": 7.79, "learning_rate": 4.6108208479992594e-05, "loss": 2.3451, "step": 2690000 }, { "epoch": 7.79, "learning_rate": 4.6107484832345316e-05, "loss": 2.3543, "step": 2690500 }, { "epoch": 7.79, "learning_rate": 4.610676118469804e-05, "loss": 2.3746, "step": 2691000 }, { "epoch": 7.79, "learning_rate": 4.610603753705076e-05, "loss": 2.3555, "step": 2691500 }, { "epoch": 7.79, "learning_rate": 4.610531388940348e-05, "loss": 2.3416, "step": 2692000 }, { "epoch": 7.79, "learning_rate": 4.6104590241756205e-05, "loss": 2.3483, "step": 2692500 }, { "epoch": 7.8, "learning_rate": 4.610386659410893e-05, "loss": 2.3344, "step": 2693000 }, { "epoch": 7.8, "learning_rate": 4.610314439375695e-05, "loss": 2.3693, "step": 2693500 }, { "epoch": 7.8, "learning_rate": 4.610242074610967e-05, "loss": 2.3745, "step": 2694000 }, { "epoch": 7.8, "learning_rate": 4.6101697098462394e-05, "loss": 2.3403, "step": 2694500 }, { "epoch": 7.8, "learning_rate": 4.610097345081512e-05, "loss": 2.3358, "step": 2695000 }, { "epoch": 7.8, "learning_rate": 4.6100249803167845e-05, "loss": 2.3645, "step": 2695500 }, { "epoch": 7.8, "learning_rate": 4.609952615552057e-05, "loss": 2.3524, "step": 2696000 }, { "epoch": 7.81, "learning_rate": 4.609880250787329e-05, "loss": 2.3531, "step": 2696500 }, { "epoch": 7.81, "learning_rate": 4.6098080307521305e-05, "loss": 2.33, "step": 2697000 }, { "epoch": 7.81, "learning_rate": 4.609735665987403e-05, "loss": 2.3612, "step": 2697500 }, { "epoch": 7.81, "learning_rate": 4.609663301222675e-05, "loss": 2.352, "step": 2698000 }, { "epoch": 7.81, "learning_rate": 4.609590936457948e-05, "loss": 2.3596, "step": 2698500 }, { "epoch": 7.81, "learning_rate": 4.60951857169322e-05, "loss": 2.3462, "step": 2699000 }, { "epoch": 7.81, "learning_rate": 4.609446206928492e-05, "loss": 2.3678, "step": 2699500 }, { "epoch": 7.82, "learning_rate": 4.6093738421637646e-05, "loss": 2.3461, "step": 2700000 }, { "epoch": 7.82, "learning_rate": 4.609301477399037e-05, "loss": 2.36, "step": 2700500 }, { "epoch": 7.82, "learning_rate": 4.609229112634309e-05, "loss": 2.3533, "step": 2701000 }, { "epoch": 7.82, "learning_rate": 4.6091570373286406e-05, "loss": 2.3584, "step": 2701500 }, { "epoch": 7.82, "learning_rate": 4.609084672563913e-05, "loss": 2.3504, "step": 2702000 }, { "epoch": 7.82, "learning_rate": 4.609012307799186e-05, "loss": 2.36, "step": 2702500 }, { "epoch": 7.82, "learning_rate": 4.608939943034458e-05, "loss": 2.3771, "step": 2703000 }, { "epoch": 7.83, "learning_rate": 4.60886757826973e-05, "loss": 2.3562, "step": 2703500 }, { "epoch": 7.83, "learning_rate": 4.6087952135050024e-05, "loss": 2.3612, "step": 2704000 }, { "epoch": 7.83, "learning_rate": 4.608723138199333e-05, "loss": 2.336, "step": 2704500 }, { "epoch": 7.83, "learning_rate": 4.6086507734346055e-05, "loss": 2.3635, "step": 2705000 }, { "epoch": 7.83, "learning_rate": 4.608578408669878e-05, "loss": 2.3652, "step": 2705500 }, { "epoch": 7.83, "learning_rate": 4.6085060439051506e-05, "loss": 2.349, "step": 2706000 }, { "epoch": 7.83, "learning_rate": 4.608433679140423e-05, "loss": 2.359, "step": 2706500 }, { "epoch": 7.84, "learning_rate": 4.608361314375695e-05, "loss": 2.3597, "step": 2707000 }, { "epoch": 7.84, "learning_rate": 4.608288949610967e-05, "loss": 2.3296, "step": 2707500 }, { "epoch": 7.84, "learning_rate": 4.6082165848462395e-05, "loss": 2.3479, "step": 2708000 }, { "epoch": 7.84, "learning_rate": 4.608144220081512e-05, "loss": 2.3327, "step": 2708500 }, { "epoch": 7.84, "learning_rate": 4.608071855316784e-05, "loss": 2.3476, "step": 2709000 }, { "epoch": 7.84, "learning_rate": 4.6079996352815855e-05, "loss": 2.3505, "step": 2709500 }, { "epoch": 7.84, "learning_rate": 4.6079272705168584e-05, "loss": 2.3586, "step": 2710000 }, { "epoch": 7.85, "learning_rate": 4.6078549057521306e-05, "loss": 2.341, "step": 2710500 }, { "epoch": 7.85, "learning_rate": 4.607782540987403e-05, "loss": 2.3368, "step": 2711000 }, { "epoch": 7.85, "learning_rate": 4.607710320952205e-05, "loss": 2.3621, "step": 2711500 }, { "epoch": 7.85, "learning_rate": 4.607637956187477e-05, "loss": 2.3524, "step": 2712000 }, { "epoch": 7.85, "learning_rate": 4.6075655914227495e-05, "loss": 2.3616, "step": 2712500 }, { "epoch": 7.85, "learning_rate": 4.607493226658022e-05, "loss": 2.3502, "step": 2713000 }, { "epoch": 7.85, "learning_rate": 4.607420861893294e-05, "loss": 2.3545, "step": 2713500 }, { "epoch": 7.86, "learning_rate": 4.607348497128566e-05, "loss": 2.3584, "step": 2714000 }, { "epoch": 7.86, "learning_rate": 4.6072761323638384e-05, "loss": 2.356, "step": 2714500 }, { "epoch": 7.86, "learning_rate": 4.6072037675991106e-05, "loss": 2.3747, "step": 2715000 }, { "epoch": 7.86, "learning_rate": 4.607131402834383e-05, "loss": 2.3583, "step": 2715500 }, { "epoch": 7.86, "learning_rate": 4.607059038069656e-05, "loss": 2.3504, "step": 2716000 }, { "epoch": 7.86, "learning_rate": 4.606986673304928e-05, "loss": 2.3698, "step": 2716500 }, { "epoch": 7.86, "learning_rate": 4.606914308540201e-05, "loss": 2.375, "step": 2717000 }, { "epoch": 7.87, "learning_rate": 4.606841943775473e-05, "loss": 2.3537, "step": 2717500 }, { "epoch": 7.87, "learning_rate": 4.606769868469804e-05, "loss": 2.3401, "step": 2718000 }, { "epoch": 7.87, "learning_rate": 4.606697503705076e-05, "loss": 2.3763, "step": 2718500 }, { "epoch": 7.87, "learning_rate": 4.6066251389403484e-05, "loss": 2.3595, "step": 2719000 }, { "epoch": 7.87, "learning_rate": 4.606552774175621e-05, "loss": 2.3392, "step": 2719500 }, { "epoch": 7.87, "learning_rate": 4.606480409410893e-05, "loss": 2.3492, "step": 2720000 }, { "epoch": 7.87, "learning_rate": 4.606408189375695e-05, "loss": 2.3559, "step": 2720500 }, { "epoch": 7.88, "learning_rate": 4.6063358246109673e-05, "loss": 2.3542, "step": 2721000 }, { "epoch": 7.88, "learning_rate": 4.6062634598462396e-05, "loss": 2.3474, "step": 2721500 }, { "epoch": 7.88, "learning_rate": 4.606191095081512e-05, "loss": 2.3456, "step": 2722000 }, { "epoch": 7.88, "learning_rate": 4.6061188750463134e-05, "loss": 2.3377, "step": 2722500 }, { "epoch": 7.88, "learning_rate": 4.6060465102815856e-05, "loss": 2.3597, "step": 2723000 }, { "epoch": 7.88, "learning_rate": 4.605974145516858e-05, "loss": 2.341, "step": 2723500 }, { "epoch": 7.88, "learning_rate": 4.605901780752131e-05, "loss": 2.3811, "step": 2724000 }, { "epoch": 7.89, "learning_rate": 4.605829415987403e-05, "loss": 2.3588, "step": 2724500 }, { "epoch": 7.89, "learning_rate": 4.605757051222676e-05, "loss": 2.354, "step": 2725000 }, { "epoch": 7.89, "learning_rate": 4.6056848311874774e-05, "loss": 2.3524, "step": 2725500 }, { "epoch": 7.89, "learning_rate": 4.6056124664227496e-05, "loss": 2.3535, "step": 2726000 }, { "epoch": 7.89, "learning_rate": 4.605540101658022e-05, "loss": 2.338, "step": 2726500 }, { "epoch": 7.89, "learning_rate": 4.605467736893294e-05, "loss": 2.363, "step": 2727000 }, { "epoch": 7.89, "learning_rate": 4.605395372128566e-05, "loss": 2.347, "step": 2727500 }, { "epoch": 7.9, "learning_rate": 4.6053230073638385e-05, "loss": 2.353, "step": 2728000 }, { "epoch": 7.9, "learning_rate": 4.605250642599111e-05, "loss": 2.3812, "step": 2728500 }, { "epoch": 7.9, "learning_rate": 4.605178277834383e-05, "loss": 2.3525, "step": 2729000 }, { "epoch": 7.9, "learning_rate": 4.605105913069656e-05, "loss": 2.3672, "step": 2729500 }, { "epoch": 7.9, "learning_rate": 4.6050336930344574e-05, "loss": 2.3479, "step": 2730000 }, { "epoch": 7.9, "learning_rate": 4.6049613282697296e-05, "loss": 2.3618, "step": 2730500 }, { "epoch": 7.91, "learning_rate": 4.604888963505002e-05, "loss": 2.3589, "step": 2731000 }, { "epoch": 7.91, "learning_rate": 4.604816598740274e-05, "loss": 2.363, "step": 2731500 }, { "epoch": 7.91, "learning_rate": 4.604744233975547e-05, "loss": 2.3213, "step": 2732000 }, { "epoch": 7.91, "learning_rate": 4.604671869210819e-05, "loss": 2.3388, "step": 2732500 }, { "epoch": 7.91, "learning_rate": 4.604599649175621e-05, "loss": 2.3743, "step": 2733000 }, { "epoch": 7.91, "learning_rate": 4.604527429140423e-05, "loss": 2.3593, "step": 2733500 }, { "epoch": 7.91, "learning_rate": 4.604455064375695e-05, "loss": 2.344, "step": 2734000 }, { "epoch": 7.92, "learning_rate": 4.6043826996109674e-05, "loss": 2.3467, "step": 2734500 }, { "epoch": 7.92, "learning_rate": 4.604310479575769e-05, "loss": 2.331, "step": 2735000 }, { "epoch": 7.92, "learning_rate": 4.604238114811041e-05, "loss": 2.3622, "step": 2735500 }, { "epoch": 7.92, "learning_rate": 4.6041657500463134e-05, "loss": 2.3549, "step": 2736000 }, { "epoch": 7.92, "learning_rate": 4.6040933852815857e-05, "loss": 2.341, "step": 2736500 }, { "epoch": 7.92, "learning_rate": 4.6040210205168586e-05, "loss": 2.3586, "step": 2737000 }, { "epoch": 7.92, "learning_rate": 4.603948655752131e-05, "loss": 2.349, "step": 2737500 }, { "epoch": 7.93, "learning_rate": 4.603876290987403e-05, "loss": 2.3413, "step": 2738000 }, { "epoch": 7.93, "learning_rate": 4.6038040709522046e-05, "loss": 2.3774, "step": 2738500 }, { "epoch": 7.93, "learning_rate": 4.603731706187477e-05, "loss": 2.3769, "step": 2739000 }, { "epoch": 7.93, "learning_rate": 4.603659341422749e-05, "loss": 2.3604, "step": 2739500 }, { "epoch": 7.93, "learning_rate": 4.603586976658022e-05, "loss": 2.3577, "step": 2740000 }, { "epoch": 7.93, "learning_rate": 4.603514611893294e-05, "loss": 2.3607, "step": 2740500 }, { "epoch": 7.93, "learning_rate": 4.6034422471285664e-05, "loss": 2.3582, "step": 2741000 }, { "epoch": 7.94, "learning_rate": 4.6033698823638386e-05, "loss": 2.3359, "step": 2741500 }, { "epoch": 7.94, "learning_rate": 4.603297517599111e-05, "loss": 2.3699, "step": 2742000 }, { "epoch": 7.94, "learning_rate": 4.603225297563913e-05, "loss": 2.3682, "step": 2742500 }, { "epoch": 7.94, "learning_rate": 4.603152932799185e-05, "loss": 2.3593, "step": 2743000 }, { "epoch": 7.94, "learning_rate": 4.6030805680344575e-05, "loss": 2.3412, "step": 2743500 }, { "epoch": 7.94, "learning_rate": 4.60300820326973e-05, "loss": 2.3606, "step": 2744000 }, { "epoch": 7.94, "learning_rate": 4.602935983234531e-05, "loss": 2.345, "step": 2744500 }, { "epoch": 7.95, "learning_rate": 4.6028637631993335e-05, "loss": 2.3484, "step": 2745000 }, { "epoch": 7.95, "learning_rate": 4.602791543164135e-05, "loss": 2.3577, "step": 2745500 }, { "epoch": 7.95, "learning_rate": 4.602719178399407e-05, "loss": 2.3519, "step": 2746000 }, { "epoch": 7.95, "learning_rate": 4.6026468136346795e-05, "loss": 2.3681, "step": 2746500 }, { "epoch": 7.95, "learning_rate": 4.602574448869952e-05, "loss": 2.3577, "step": 2747000 }, { "epoch": 7.95, "learning_rate": 4.6025020841052246e-05, "loss": 2.3423, "step": 2747500 }, { "epoch": 7.95, "learning_rate": 4.602429719340497e-05, "loss": 2.3528, "step": 2748000 }, { "epoch": 7.96, "learning_rate": 4.602357354575769e-05, "loss": 2.3647, "step": 2748500 }, { "epoch": 7.96, "learning_rate": 4.602284989811041e-05, "loss": 2.349, "step": 2749000 }, { "epoch": 7.96, "learning_rate": 4.6022126250463135e-05, "loss": 2.3612, "step": 2749500 }, { "epoch": 7.96, "learning_rate": 4.602140260281586e-05, "loss": 2.3382, "step": 2750000 }, { "epoch": 7.96, "learning_rate": 4.6020678955168586e-05, "loss": 2.342, "step": 2750500 }, { "epoch": 7.96, "learning_rate": 4.601995530752131e-05, "loss": 2.3682, "step": 2751000 }, { "epoch": 7.96, "learning_rate": 4.601923165987403e-05, "loss": 2.3658, "step": 2751500 }, { "epoch": 7.97, "learning_rate": 4.601850801222675e-05, "loss": 2.3387, "step": 2752000 }, { "epoch": 7.97, "learning_rate": 4.6017784364579475e-05, "loss": 2.3463, "step": 2752500 }, { "epoch": 7.97, "learning_rate": 4.60170607169322e-05, "loss": 2.3453, "step": 2753000 }, { "epoch": 7.97, "learning_rate": 4.601633851658021e-05, "loss": 2.3527, "step": 2753500 }, { "epoch": 7.97, "learning_rate": 4.6015614868932935e-05, "loss": 2.349, "step": 2754000 }, { "epoch": 7.97, "learning_rate": 4.601489122128566e-05, "loss": 2.3504, "step": 2754500 }, { "epoch": 7.97, "learning_rate": 4.6014167573638387e-05, "loss": 2.3543, "step": 2755000 }, { "epoch": 7.98, "learning_rate": 4.601344537328641e-05, "loss": 2.3408, "step": 2755500 }, { "epoch": 7.98, "learning_rate": 4.601272172563913e-05, "loss": 2.339, "step": 2756000 }, { "epoch": 7.98, "learning_rate": 4.601199807799185e-05, "loss": 2.347, "step": 2756500 }, { "epoch": 7.98, "learning_rate": 4.601127587763987e-05, "loss": 2.3315, "step": 2757000 }, { "epoch": 7.98, "learning_rate": 4.601055222999259e-05, "loss": 2.3646, "step": 2757500 }, { "epoch": 7.98, "learning_rate": 4.600982858234531e-05, "loss": 2.3361, "step": 2758000 }, { "epoch": 7.98, "learning_rate": 4.6009104934698036e-05, "loss": 2.3739, "step": 2758500 }, { "epoch": 7.99, "learning_rate": 4.6008381287050765e-05, "loss": 2.3534, "step": 2759000 }, { "epoch": 7.99, "learning_rate": 4.600765763940349e-05, "loss": 2.3671, "step": 2759500 }, { "epoch": 7.99, "learning_rate": 4.600693399175621e-05, "loss": 2.36, "step": 2760000 }, { "epoch": 7.99, "learning_rate": 4.6006211791404225e-05, "loss": 2.348, "step": 2760500 }, { "epoch": 7.99, "learning_rate": 4.600548814375695e-05, "loss": 2.3424, "step": 2761000 }, { "epoch": 7.99, "learning_rate": 4.600476449610967e-05, "loss": 2.3616, "step": 2761500 }, { "epoch": 7.99, "learning_rate": 4.600404084846239e-05, "loss": 2.3605, "step": 2762000 }, { "epoch": 8.0, "learning_rate": 4.6003318648110414e-05, "loss": 2.3552, "step": 2762500 }, { "epoch": 8.0, "learning_rate": 4.6002595000463136e-05, "loss": 2.365, "step": 2763000 }, { "epoch": 8.0, "learning_rate": 4.6001871352815865e-05, "loss": 2.3511, "step": 2763500 }, { "epoch": 8.0, "eval_accuracy": 0.6457251163529523, "eval_accuracy_mlm": 0.6076235167413505, "eval_accuracy_nsp": 0.8500226863373253, "eval_loss": 2.32987904548645, "eval_runtime": 330.539, "eval_samples_per_second": 1320.225, "eval_steps_per_second": 55.01, "step": 2763776 }, { "epoch": 8.0, "learning_rate": 4.600114915246388e-05, "loss": 2.3725, "step": 2764000 }, { "epoch": 8.0, "learning_rate": 4.60004255048166e-05, "loss": 2.3288, "step": 2764500 }, { "epoch": 8.0, "learning_rate": 4.5999701857169325e-05, "loss": 2.3284, "step": 2765000 }, { "epoch": 8.0, "learning_rate": 4.599897820952205e-05, "loss": 2.3444, "step": 2765500 }, { "epoch": 8.01, "learning_rate": 4.599825456187477e-05, "loss": 2.3309, "step": 2766000 }, { "epoch": 8.01, "learning_rate": 4.599753091422749e-05, "loss": 2.3115, "step": 2766500 }, { "epoch": 8.01, "learning_rate": 4.5996807266580214e-05, "loss": 2.3397, "step": 2767000 }, { "epoch": 8.01, "learning_rate": 4.5996083618932936e-05, "loss": 2.3402, "step": 2767500 }, { "epoch": 8.01, "learning_rate": 4.5995359971285665e-05, "loss": 2.346, "step": 2768000 }, { "epoch": 8.01, "learning_rate": 4.599463777093368e-05, "loss": 2.3254, "step": 2768500 }, { "epoch": 8.02, "learning_rate": 4.59939141232864e-05, "loss": 2.3336, "step": 2769000 }, { "epoch": 8.02, "learning_rate": 4.5993190475639125e-05, "loss": 2.3274, "step": 2769500 }, { "epoch": 8.02, "learning_rate": 4.5992466827991854e-05, "loss": 2.3304, "step": 2770000 }, { "epoch": 8.02, "learning_rate": 4.599174462763987e-05, "loss": 2.3226, "step": 2770500 }, { "epoch": 8.02, "learning_rate": 4.599102097999259e-05, "loss": 2.3217, "step": 2771000 }, { "epoch": 8.02, "learning_rate": 4.5990297332345314e-05, "loss": 2.3288, "step": 2771500 }, { "epoch": 8.02, "learning_rate": 4.5989573684698036e-05, "loss": 2.353, "step": 2772000 }, { "epoch": 8.03, "learning_rate": 4.5988850037050765e-05, "loss": 2.3055, "step": 2772500 }, { "epoch": 8.03, "learning_rate": 4.598812638940349e-05, "loss": 2.3257, "step": 2773000 }, { "epoch": 8.03, "learning_rate": 4.598740274175621e-05, "loss": 2.3604, "step": 2773500 }, { "epoch": 8.03, "learning_rate": 4.5986680541404225e-05, "loss": 2.3073, "step": 2774000 }, { "epoch": 8.03, "learning_rate": 4.598595689375695e-05, "loss": 2.3527, "step": 2774500 }, { "epoch": 8.03, "learning_rate": 4.598523324610967e-05, "loss": 2.3385, "step": 2775000 }, { "epoch": 8.03, "learning_rate": 4.598450959846239e-05, "loss": 2.345, "step": 2775500 }, { "epoch": 8.04, "learning_rate": 4.5983785950815114e-05, "loss": 2.335, "step": 2776000 }, { "epoch": 8.04, "learning_rate": 4.5983062303167837e-05, "loss": 2.3061, "step": 2776500 }, { "epoch": 8.04, "learning_rate": 4.5982338655520566e-05, "loss": 2.3423, "step": 2777000 }, { "epoch": 8.04, "learning_rate": 4.598161500787329e-05, "loss": 2.3404, "step": 2777500 }, { "epoch": 8.04, "learning_rate": 4.598089136022602e-05, "loss": 2.3114, "step": 2778000 }, { "epoch": 8.04, "learning_rate": 4.598016771257874e-05, "loss": 2.342, "step": 2778500 }, { "epoch": 8.04, "learning_rate": 4.597944406493146e-05, "loss": 2.3266, "step": 2779000 }, { "epoch": 8.05, "learning_rate": 4.597872186457948e-05, "loss": 2.3357, "step": 2779500 }, { "epoch": 8.05, "learning_rate": 4.59779982169322e-05, "loss": 2.3154, "step": 2780000 }, { "epoch": 8.05, "learning_rate": 4.5977276016580215e-05, "loss": 2.3237, "step": 2780500 }, { "epoch": 8.05, "learning_rate": 4.597655236893294e-05, "loss": 2.3486, "step": 2781000 }, { "epoch": 8.05, "learning_rate": 4.597583016858096e-05, "loss": 2.3192, "step": 2781500 }, { "epoch": 8.05, "learning_rate": 4.597510652093368e-05, "loss": 2.326, "step": 2782000 }, { "epoch": 8.05, "learning_rate": 4.5974382873286404e-05, "loss": 2.3288, "step": 2782500 }, { "epoch": 8.06, "learning_rate": 4.5973659225639126e-05, "loss": 2.3524, "step": 2783000 }, { "epoch": 8.06, "learning_rate": 4.597293557799185e-05, "loss": 2.3417, "step": 2783500 }, { "epoch": 8.06, "learning_rate": 4.597221193034457e-05, "loss": 2.3352, "step": 2784000 }, { "epoch": 8.06, "learning_rate": 4.597148828269729e-05, "loss": 2.3381, "step": 2784500 }, { "epoch": 8.06, "learning_rate": 4.5970766082345315e-05, "loss": 2.3327, "step": 2785000 }, { "epoch": 8.06, "learning_rate": 4.5970042434698044e-05, "loss": 2.3292, "step": 2785500 }, { "epoch": 8.06, "learning_rate": 4.5969318787050766e-05, "loss": 2.3245, "step": 2786000 }, { "epoch": 8.07, "learning_rate": 4.596859513940349e-05, "loss": 2.3397, "step": 2786500 }, { "epoch": 8.07, "learning_rate": 4.596787149175621e-05, "loss": 2.3252, "step": 2787000 }, { "epoch": 8.07, "learning_rate": 4.596714784410893e-05, "loss": 2.325, "step": 2787500 }, { "epoch": 8.07, "learning_rate": 4.5966424196461655e-05, "loss": 2.3522, "step": 2788000 }, { "epoch": 8.07, "learning_rate": 4.596570054881438e-05, "loss": 2.3346, "step": 2788500 }, { "epoch": 8.07, "learning_rate": 4.59649769011671e-05, "loss": 2.3213, "step": 2789000 }, { "epoch": 8.07, "learning_rate": 4.596425325351982e-05, "loss": 2.3389, "step": 2789500 }, { "epoch": 8.08, "learning_rate": 4.5963529605872544e-05, "loss": 2.3734, "step": 2790000 }, { "epoch": 8.08, "learning_rate": 4.5962805958225266e-05, "loss": 2.3403, "step": 2790500 }, { "epoch": 8.08, "learning_rate": 4.596208231057799e-05, "loss": 2.333, "step": 2791000 }, { "epoch": 8.08, "learning_rate": 4.596136011022601e-05, "loss": 2.3135, "step": 2791500 }, { "epoch": 8.08, "learning_rate": 4.596063646257874e-05, "loss": 2.3395, "step": 2792000 }, { "epoch": 8.08, "learning_rate": 4.595991281493146e-05, "loss": 2.3362, "step": 2792500 }, { "epoch": 8.08, "learning_rate": 4.5959189167284184e-05, "loss": 2.3352, "step": 2793000 }, { "epoch": 8.09, "learning_rate": 4.59584669669322e-05, "loss": 2.3313, "step": 2793500 }, { "epoch": 8.09, "learning_rate": 4.595774331928492e-05, "loss": 2.3297, "step": 2794000 }, { "epoch": 8.09, "learning_rate": 4.5957021118932944e-05, "loss": 2.3543, "step": 2794500 }, { "epoch": 8.09, "learning_rate": 4.595629747128567e-05, "loss": 2.3148, "step": 2795000 }, { "epoch": 8.09, "learning_rate": 4.595557382363839e-05, "loss": 2.3232, "step": 2795500 }, { "epoch": 8.09, "learning_rate": 4.595485017599111e-05, "loss": 2.3317, "step": 2796000 }, { "epoch": 8.09, "learning_rate": 4.5954126528343833e-05, "loss": 2.3556, "step": 2796500 }, { "epoch": 8.1, "learning_rate": 4.5953402880696556e-05, "loss": 2.3389, "step": 2797000 }, { "epoch": 8.1, "learning_rate": 4.595268068034457e-05, "loss": 2.3279, "step": 2797500 }, { "epoch": 8.1, "learning_rate": 4.5951958479992594e-05, "loss": 2.3357, "step": 2798000 }, { "epoch": 8.1, "learning_rate": 4.5951234832345316e-05, "loss": 2.3482, "step": 2798500 }, { "epoch": 8.1, "learning_rate": 4.595051118469804e-05, "loss": 2.3212, "step": 2799000 }, { "epoch": 8.1, "learning_rate": 4.594978753705076e-05, "loss": 2.3213, "step": 2799500 }, { "epoch": 8.1, "learning_rate": 4.594906388940349e-05, "loss": 2.3382, "step": 2800000 }, { "epoch": 8.11, "learning_rate": 4.594834024175621e-05, "loss": 2.3218, "step": 2800500 }, { "epoch": 8.11, "learning_rate": 4.5947616594108934e-05, "loss": 2.3368, "step": 2801000 }, { "epoch": 8.11, "learning_rate": 4.5946892946461656e-05, "loss": 2.3312, "step": 2801500 }, { "epoch": 8.11, "learning_rate": 4.594617074610967e-05, "loss": 2.3361, "step": 2802000 }, { "epoch": 8.11, "learning_rate": 4.5945448545757694e-05, "loss": 2.3166, "step": 2802500 }, { "epoch": 8.11, "learning_rate": 4.5944724898110416e-05, "loss": 2.339, "step": 2803000 }, { "epoch": 8.11, "learning_rate": 4.594400125046314e-05, "loss": 2.3446, "step": 2803500 }, { "epoch": 8.12, "learning_rate": 4.594327760281586e-05, "loss": 2.3636, "step": 2804000 }, { "epoch": 8.12, "learning_rate": 4.594255395516858e-05, "loss": 2.3309, "step": 2804500 }, { "epoch": 8.12, "learning_rate": 4.5941830307521305e-05, "loss": 2.3536, "step": 2805000 }, { "epoch": 8.12, "learning_rate": 4.594110665987403e-05, "loss": 2.3399, "step": 2805500 }, { "epoch": 8.12, "learning_rate": 4.594038301222675e-05, "loss": 2.3202, "step": 2806000 }, { "epoch": 8.12, "learning_rate": 4.593965936457947e-05, "loss": 2.3304, "step": 2806500 }, { "epoch": 8.13, "learning_rate": 4.5938935716932194e-05, "loss": 2.3255, "step": 2807000 }, { "epoch": 8.13, "learning_rate": 4.593821206928492e-05, "loss": 2.3555, "step": 2807500 }, { "epoch": 8.13, "learning_rate": 4.5937489868932945e-05, "loss": 2.345, "step": 2808000 }, { "epoch": 8.13, "learning_rate": 4.593676622128567e-05, "loss": 2.3275, "step": 2808500 }, { "epoch": 8.13, "learning_rate": 4.593604257363839e-05, "loss": 2.3486, "step": 2809000 }, { "epoch": 8.13, "learning_rate": 4.593531892599111e-05, "loss": 2.3345, "step": 2809500 }, { "epoch": 8.13, "learning_rate": 4.5934595278343834e-05, "loss": 2.3412, "step": 2810000 }, { "epoch": 8.14, "learning_rate": 4.5933871630696556e-05, "loss": 2.34, "step": 2810500 }, { "epoch": 8.14, "learning_rate": 4.593314798304928e-05, "loss": 2.3226, "step": 2811000 }, { "epoch": 8.14, "learning_rate": 4.5932424335402e-05, "loss": 2.3304, "step": 2811500 }, { "epoch": 8.14, "learning_rate": 4.593170068775472e-05, "loss": 2.3324, "step": 2812000 }, { "epoch": 8.14, "learning_rate": 4.5930977040107445e-05, "loss": 2.3451, "step": 2812500 }, { "epoch": 8.14, "learning_rate": 4.593025483975547e-05, "loss": 2.3313, "step": 2813000 }, { "epoch": 8.14, "learning_rate": 4.592953119210819e-05, "loss": 2.3, "step": 2813500 }, { "epoch": 8.15, "learning_rate": 4.592880754446091e-05, "loss": 2.3373, "step": 2814000 }, { "epoch": 8.15, "learning_rate": 4.592808389681364e-05, "loss": 2.3438, "step": 2814500 }, { "epoch": 8.15, "learning_rate": 4.592736169646166e-05, "loss": 2.3492, "step": 2815000 }, { "epoch": 8.15, "learning_rate": 4.592663804881438e-05, "loss": 2.3583, "step": 2815500 }, { "epoch": 8.15, "learning_rate": 4.5925915848462395e-05, "loss": 2.356, "step": 2816000 }, { "epoch": 8.15, "learning_rate": 4.5925192200815124e-05, "loss": 2.3394, "step": 2816500 }, { "epoch": 8.15, "learning_rate": 4.5924468553167846e-05, "loss": 2.3603, "step": 2817000 }, { "epoch": 8.16, "learning_rate": 4.592374490552057e-05, "loss": 2.3418, "step": 2817500 }, { "epoch": 8.16, "learning_rate": 4.592302125787329e-05, "loss": 2.349, "step": 2818000 }, { "epoch": 8.16, "learning_rate": 4.592229761022601e-05, "loss": 2.353, "step": 2818500 }, { "epoch": 8.16, "learning_rate": 4.5921573962578735e-05, "loss": 2.3387, "step": 2819000 }, { "epoch": 8.16, "learning_rate": 4.592085031493146e-05, "loss": 2.348, "step": 2819500 }, { "epoch": 8.16, "learning_rate": 4.592012811457947e-05, "loss": 2.3094, "step": 2820000 }, { "epoch": 8.16, "learning_rate": 4.5919404466932195e-05, "loss": 2.3424, "step": 2820500 }, { "epoch": 8.17, "learning_rate": 4.5918680819284924e-05, "loss": 2.3553, "step": 2821000 }, { "epoch": 8.17, "learning_rate": 4.5917957171637646e-05, "loss": 2.3297, "step": 2821500 }, { "epoch": 8.17, "learning_rate": 4.5917233523990375e-05, "loss": 2.3513, "step": 2822000 }, { "epoch": 8.17, "learning_rate": 4.59165098763431e-05, "loss": 2.2997, "step": 2822500 }, { "epoch": 8.17, "learning_rate": 4.591578622869582e-05, "loss": 2.3247, "step": 2823000 }, { "epoch": 8.17, "learning_rate": 4.591506258104854e-05, "loss": 2.3523, "step": 2823500 }, { "epoch": 8.17, "learning_rate": 4.591434038069656e-05, "loss": 2.3372, "step": 2824000 }, { "epoch": 8.18, "learning_rate": 4.591361673304928e-05, "loss": 2.3212, "step": 2824500 }, { "epoch": 8.18, "learning_rate": 4.5912893085402e-05, "loss": 2.3305, "step": 2825000 }, { "epoch": 8.18, "learning_rate": 4.5912169437754724e-05, "loss": 2.3481, "step": 2825500 }, { "epoch": 8.18, "learning_rate": 4.5911445790107446e-05, "loss": 2.3646, "step": 2826000 }, { "epoch": 8.18, "learning_rate": 4.591072358975547e-05, "loss": 2.3278, "step": 2826500 }, { "epoch": 8.18, "learning_rate": 4.590999994210819e-05, "loss": 2.3144, "step": 2827000 }, { "epoch": 8.18, "learning_rate": 4.590927629446091e-05, "loss": 2.3401, "step": 2827500 }, { "epoch": 8.19, "learning_rate": 4.5908552646813635e-05, "loss": 2.3331, "step": 2828000 }, { "epoch": 8.19, "learning_rate": 4.590782899916636e-05, "loss": 2.355, "step": 2828500 }, { "epoch": 8.19, "learning_rate": 4.590710535151908e-05, "loss": 2.3634, "step": 2829000 }, { "epoch": 8.19, "learning_rate": 4.59063831511671e-05, "loss": 2.3554, "step": 2829500 }, { "epoch": 8.19, "learning_rate": 4.5905660950815124e-05, "loss": 2.3275, "step": 2830000 }, { "epoch": 8.19, "learning_rate": 4.5904937303167847e-05, "loss": 2.3243, "step": 2830500 }, { "epoch": 8.19, "learning_rate": 4.590421365552057e-05, "loss": 2.3289, "step": 2831000 }, { "epoch": 8.2, "learning_rate": 4.590349000787329e-05, "loss": 2.3222, "step": 2831500 }, { "epoch": 8.2, "learning_rate": 4.5902767807521307e-05, "loss": 2.3311, "step": 2832000 }, { "epoch": 8.2, "learning_rate": 4.590204415987403e-05, "loss": 2.3273, "step": 2832500 }, { "epoch": 8.2, "learning_rate": 4.590132051222675e-05, "loss": 2.3402, "step": 2833000 }, { "epoch": 8.2, "learning_rate": 4.590059686457947e-05, "loss": 2.3199, "step": 2833500 }, { "epoch": 8.2, "learning_rate": 4.5899873216932196e-05, "loss": 2.3469, "step": 2834000 }, { "epoch": 8.2, "learning_rate": 4.589915101658022e-05, "loss": 2.342, "step": 2834500 }, { "epoch": 8.21, "learning_rate": 4.589842736893294e-05, "loss": 2.318, "step": 2835000 }, { "epoch": 8.21, "learning_rate": 4.5897705168580956e-05, "loss": 2.3441, "step": 2835500 }, { "epoch": 8.21, "learning_rate": 4.589698152093368e-05, "loss": 2.3181, "step": 2836000 }, { "epoch": 8.21, "learning_rate": 4.58962578732864e-05, "loss": 2.3429, "step": 2836500 }, { "epoch": 8.21, "learning_rate": 4.589553422563912e-05, "loss": 2.3235, "step": 2837000 }, { "epoch": 8.21, "learning_rate": 4.589481057799185e-05, "loss": 2.3579, "step": 2837500 }, { "epoch": 8.21, "learning_rate": 4.5894086930344574e-05, "loss": 2.3518, "step": 2838000 }, { "epoch": 8.22, "learning_rate": 4.58933632826973e-05, "loss": 2.3283, "step": 2838500 }, { "epoch": 8.22, "learning_rate": 4.5892639635050025e-05, "loss": 2.3335, "step": 2839000 }, { "epoch": 8.22, "learning_rate": 4.589191598740275e-05, "loss": 2.3258, "step": 2839500 }, { "epoch": 8.22, "learning_rate": 4.589119233975547e-05, "loss": 2.3168, "step": 2840000 }, { "epoch": 8.22, "learning_rate": 4.589046869210819e-05, "loss": 2.3415, "step": 2840500 }, { "epoch": 8.22, "learning_rate": 4.5889745044460914e-05, "loss": 2.3307, "step": 2841000 }, { "epoch": 8.22, "learning_rate": 4.588902284410893e-05, "loss": 2.3507, "step": 2841500 }, { "epoch": 8.23, "learning_rate": 4.588829919646165e-05, "loss": 2.3399, "step": 2842000 }, { "epoch": 8.23, "learning_rate": 4.5887576996109674e-05, "loss": 2.3558, "step": 2842500 }, { "epoch": 8.23, "learning_rate": 4.588685479575769e-05, "loss": 2.3229, "step": 2843000 }, { "epoch": 8.23, "learning_rate": 4.588613114811041e-05, "loss": 2.3395, "step": 2843500 }, { "epoch": 8.23, "learning_rate": 4.5885407500463134e-05, "loss": 2.317, "step": 2844000 }, { "epoch": 8.23, "learning_rate": 4.5884683852815856e-05, "loss": 2.336, "step": 2844500 }, { "epoch": 8.24, "learning_rate": 4.5883960205168585e-05, "loss": 2.3532, "step": 2845000 }, { "epoch": 8.24, "learning_rate": 4.58832380048166e-05, "loss": 2.3426, "step": 2845500 }, { "epoch": 8.24, "learning_rate": 4.588251435716932e-05, "loss": 2.3409, "step": 2846000 }, { "epoch": 8.24, "learning_rate": 4.588179070952205e-05, "loss": 2.3438, "step": 2846500 }, { "epoch": 8.24, "learning_rate": 4.5881067061874774e-05, "loss": 2.3627, "step": 2847000 }, { "epoch": 8.24, "learning_rate": 4.5880343414227496e-05, "loss": 2.3073, "step": 2847500 }, { "epoch": 8.24, "learning_rate": 4.587961976658022e-05, "loss": 2.3293, "step": 2848000 }, { "epoch": 8.25, "learning_rate": 4.587889611893294e-05, "loss": 2.3236, "step": 2848500 }, { "epoch": 8.25, "learning_rate": 4.587817247128566e-05, "loss": 2.3232, "step": 2849000 }, { "epoch": 8.25, "learning_rate": 4.5877448823638385e-05, "loss": 2.324, "step": 2849500 }, { "epoch": 8.25, "learning_rate": 4.587672517599111e-05, "loss": 2.3416, "step": 2850000 }, { "epoch": 8.25, "learning_rate": 4.587600152834383e-05, "loss": 2.3449, "step": 2850500 }, { "epoch": 8.25, "learning_rate": 4.587527788069655e-05, "loss": 2.3418, "step": 2851000 }, { "epoch": 8.25, "learning_rate": 4.5874555680344574e-05, "loss": 2.3578, "step": 2851500 }, { "epoch": 8.26, "learning_rate": 4.5873832032697297e-05, "loss": 2.3577, "step": 2852000 }, { "epoch": 8.26, "learning_rate": 4.5873108385050026e-05, "loss": 2.3221, "step": 2852500 }, { "epoch": 8.26, "learning_rate": 4.587238473740275e-05, "loss": 2.3495, "step": 2853000 }, { "epoch": 8.26, "learning_rate": 4.5871662537050763e-05, "loss": 2.3058, "step": 2853500 }, { "epoch": 8.26, "learning_rate": 4.5870938889403486e-05, "loss": 2.3633, "step": 2854000 }, { "epoch": 8.26, "learning_rate": 4.587021524175621e-05, "loss": 2.3509, "step": 2854500 }, { "epoch": 8.26, "learning_rate": 4.586949159410893e-05, "loss": 2.353, "step": 2855000 }, { "epoch": 8.27, "learning_rate": 4.586876794646165e-05, "loss": 2.3456, "step": 2855500 }, { "epoch": 8.27, "learning_rate": 4.5868044298814375e-05, "loss": 2.3384, "step": 2856000 }, { "epoch": 8.27, "learning_rate": 4.5867320651167104e-05, "loss": 2.3245, "step": 2856500 }, { "epoch": 8.27, "learning_rate": 4.5866597003519826e-05, "loss": 2.3172, "step": 2857000 }, { "epoch": 8.27, "learning_rate": 4.586587335587255e-05, "loss": 2.308, "step": 2857500 }, { "epoch": 8.27, "learning_rate": 4.5865151155520564e-05, "loss": 2.3443, "step": 2858000 }, { "epoch": 8.27, "learning_rate": 4.5864427507873286e-05, "loss": 2.319, "step": 2858500 }, { "epoch": 8.28, "learning_rate": 4.586370386022601e-05, "loss": 2.3115, "step": 2859000 }, { "epoch": 8.28, "learning_rate": 4.586298021257874e-05, "loss": 2.338, "step": 2859500 }, { "epoch": 8.28, "learning_rate": 4.586225656493146e-05, "loss": 2.335, "step": 2860000 }, { "epoch": 8.28, "learning_rate": 4.586153291728418e-05, "loss": 2.3416, "step": 2860500 }, { "epoch": 8.28, "learning_rate": 4.5860809269636904e-05, "loss": 2.3432, "step": 2861000 }, { "epoch": 8.28, "learning_rate": 4.5860087069284926e-05, "loss": 2.3697, "step": 2861500 }, { "epoch": 8.28, "learning_rate": 4.585936342163765e-05, "loss": 2.3557, "step": 2862000 }, { "epoch": 8.29, "learning_rate": 4.585863977399037e-05, "loss": 2.3421, "step": 2862500 }, { "epoch": 8.29, "learning_rate": 4.585791612634309e-05, "loss": 2.304, "step": 2863000 }, { "epoch": 8.29, "learning_rate": 4.585719392599111e-05, "loss": 2.3108, "step": 2863500 }, { "epoch": 8.29, "learning_rate": 4.585647027834383e-05, "loss": 2.342, "step": 2864000 }, { "epoch": 8.29, "learning_rate": 4.585574663069655e-05, "loss": 2.3438, "step": 2864500 }, { "epoch": 8.29, "learning_rate": 4.5855022983049275e-05, "loss": 2.3358, "step": 2865000 }, { "epoch": 8.29, "learning_rate": 4.5854299335402004e-05, "loss": 2.3274, "step": 2865500 }, { "epoch": 8.3, "learning_rate": 4.5853575687754726e-05, "loss": 2.3336, "step": 2866000 }, { "epoch": 8.3, "learning_rate": 4.585285348740274e-05, "loss": 2.349, "step": 2866500 }, { "epoch": 8.3, "learning_rate": 4.585212983975547e-05, "loss": 2.3324, "step": 2867000 }, { "epoch": 8.3, "learning_rate": 4.585140619210819e-05, "loss": 2.3537, "step": 2867500 }, { "epoch": 8.3, "learning_rate": 4.5850682544460915e-05, "loss": 2.3441, "step": 2868000 }, { "epoch": 8.3, "learning_rate": 4.584995889681364e-05, "loss": 2.3279, "step": 2868500 }, { "epoch": 8.3, "learning_rate": 4.584923669646165e-05, "loss": 2.3453, "step": 2869000 }, { "epoch": 8.31, "learning_rate": 4.584851304881438e-05, "loss": 2.3539, "step": 2869500 }, { "epoch": 8.31, "learning_rate": 4.5847789401167104e-05, "loss": 2.3414, "step": 2870000 }, { "epoch": 8.31, "learning_rate": 4.5847065753519827e-05, "loss": 2.3352, "step": 2870500 }, { "epoch": 8.31, "learning_rate": 4.584634355316784e-05, "loss": 2.3364, "step": 2871000 }, { "epoch": 8.31, "learning_rate": 4.5845619905520564e-05, "loss": 2.3511, "step": 2871500 }, { "epoch": 8.31, "learning_rate": 4.584489625787329e-05, "loss": 2.3182, "step": 2872000 }, { "epoch": 8.31, "learning_rate": 4.584417261022601e-05, "loss": 2.3301, "step": 2872500 }, { "epoch": 8.32, "learning_rate": 4.584344896257873e-05, "loss": 2.3333, "step": 2873000 }, { "epoch": 8.32, "learning_rate": 4.584272531493145e-05, "loss": 2.3325, "step": 2873500 }, { "epoch": 8.32, "learning_rate": 4.584200166728418e-05, "loss": 2.3324, "step": 2874000 }, { "epoch": 8.32, "learning_rate": 4.58412794669322e-05, "loss": 2.3334, "step": 2874500 }, { "epoch": 8.32, "learning_rate": 4.584055581928493e-05, "loss": 2.3282, "step": 2875000 }, { "epoch": 8.32, "learning_rate": 4.583983217163765e-05, "loss": 2.3326, "step": 2875500 }, { "epoch": 8.32, "learning_rate": 4.5839109971285665e-05, "loss": 2.3207, "step": 2876000 }, { "epoch": 8.33, "learning_rate": 4.583838632363839e-05, "loss": 2.3565, "step": 2876500 }, { "epoch": 8.33, "learning_rate": 4.583766267599111e-05, "loss": 2.3565, "step": 2877000 }, { "epoch": 8.33, "learning_rate": 4.583693902834383e-05, "loss": 2.3531, "step": 2877500 }, { "epoch": 8.33, "learning_rate": 4.5836215380696554e-05, "loss": 2.3334, "step": 2878000 }, { "epoch": 8.33, "learning_rate": 4.583549173304928e-05, "loss": 2.3327, "step": 2878500 }, { "epoch": 8.33, "learning_rate": 4.5834768085402005e-05, "loss": 2.3174, "step": 2879000 }, { "epoch": 8.33, "learning_rate": 4.583404443775473e-05, "loss": 2.3286, "step": 2879500 }, { "epoch": 8.34, "learning_rate": 4.583332079010745e-05, "loss": 2.365, "step": 2880000 }, { "epoch": 8.34, "learning_rate": 4.583259714246017e-05, "loss": 2.3476, "step": 2880500 }, { "epoch": 8.34, "learning_rate": 4.583187494210819e-05, "loss": 2.3487, "step": 2881000 }, { "epoch": 8.34, "learning_rate": 4.583115129446091e-05, "loss": 2.3424, "step": 2881500 }, { "epoch": 8.34, "learning_rate": 4.583042764681364e-05, "loss": 2.3415, "step": 2882000 }, { "epoch": 8.34, "learning_rate": 4.582970399916636e-05, "loss": 2.3474, "step": 2882500 }, { "epoch": 8.35, "learning_rate": 4.582898035151908e-05, "loss": 2.3645, "step": 2883000 }, { "epoch": 8.35, "learning_rate": 4.58282595984624e-05, "loss": 2.3566, "step": 2883500 }, { "epoch": 8.35, "learning_rate": 4.582753595081512e-05, "loss": 2.3499, "step": 2884000 }, { "epoch": 8.35, "learning_rate": 4.582681230316784e-05, "loss": 2.3525, "step": 2884500 }, { "epoch": 8.35, "learning_rate": 4.582609010281586e-05, "loss": 2.348, "step": 2885000 }, { "epoch": 8.35, "learning_rate": 4.582536790246388e-05, "loss": 2.336, "step": 2885500 }, { "epoch": 8.35, "learning_rate": 4.58246442548166e-05, "loss": 2.3396, "step": 2886000 }, { "epoch": 8.36, "learning_rate": 4.5823920607169325e-05, "loss": 2.358, "step": 2886500 }, { "epoch": 8.36, "learning_rate": 4.582319695952205e-05, "loss": 2.323, "step": 2887000 }, { "epoch": 8.36, "learning_rate": 4.582247331187477e-05, "loss": 2.3329, "step": 2887500 }, { "epoch": 8.36, "learning_rate": 4.5821751111522785e-05, "loss": 2.3356, "step": 2888000 }, { "epoch": 8.36, "learning_rate": 4.582102746387551e-05, "loss": 2.3471, "step": 2888500 }, { "epoch": 8.36, "learning_rate": 4.582030381622823e-05, "loss": 2.3448, "step": 2889000 }, { "epoch": 8.36, "learning_rate": 4.581958016858096e-05, "loss": 2.3613, "step": 2889500 }, { "epoch": 8.37, "learning_rate": 4.581885652093368e-05, "loss": 2.3476, "step": 2890000 }, { "epoch": 8.37, "learning_rate": 4.58181343205817e-05, "loss": 2.3384, "step": 2890500 }, { "epoch": 8.37, "learning_rate": 4.5817410672934426e-05, "loss": 2.3637, "step": 2891000 }, { "epoch": 8.37, "learning_rate": 4.581668702528715e-05, "loss": 2.3423, "step": 2891500 }, { "epoch": 8.37, "learning_rate": 4.581596337763987e-05, "loss": 2.3515, "step": 2892000 }, { "epoch": 8.37, "learning_rate": 4.581523972999259e-05, "loss": 2.3343, "step": 2892500 }, { "epoch": 8.37, "learning_rate": 4.5814516082345315e-05, "loss": 2.3208, "step": 2893000 }, { "epoch": 8.38, "learning_rate": 4.581379243469804e-05, "loss": 2.356, "step": 2893500 }, { "epoch": 8.38, "learning_rate": 4.581306878705076e-05, "loss": 2.3264, "step": 2894000 }, { "epoch": 8.38, "learning_rate": 4.581234513940348e-05, "loss": 2.3648, "step": 2894500 }, { "epoch": 8.38, "learning_rate": 4.581162149175621e-05, "loss": 2.3377, "step": 2895000 }, { "epoch": 8.38, "learning_rate": 4.581089784410893e-05, "loss": 2.3467, "step": 2895500 }, { "epoch": 8.38, "learning_rate": 4.5810174196461655e-05, "loss": 2.3456, "step": 2896000 }, { "epoch": 8.38, "learning_rate": 4.580945054881438e-05, "loss": 2.3251, "step": 2896500 }, { "epoch": 8.39, "learning_rate": 4.5808726901167106e-05, "loss": 2.3656, "step": 2897000 }, { "epoch": 8.39, "learning_rate": 4.580800325351983e-05, "loss": 2.3447, "step": 2897500 }, { "epoch": 8.39, "learning_rate": 4.5807281053167844e-05, "loss": 2.3464, "step": 2898000 }, { "epoch": 8.39, "learning_rate": 4.5806557405520566e-05, "loss": 2.3219, "step": 2898500 }, { "epoch": 8.39, "learning_rate": 4.580583375787329e-05, "loss": 2.3461, "step": 2899000 }, { "epoch": 8.39, "learning_rate": 4.580511011022601e-05, "loss": 2.3467, "step": 2899500 }, { "epoch": 8.39, "learning_rate": 4.580438790987403e-05, "loss": 2.3515, "step": 2900000 }, { "epoch": 8.4, "learning_rate": 4.5803664262226755e-05, "loss": 2.3607, "step": 2900500 }, { "epoch": 8.4, "learning_rate": 4.580294061457948e-05, "loss": 2.3344, "step": 2901000 }, { "epoch": 8.4, "learning_rate": 4.58022169669322e-05, "loss": 2.3479, "step": 2901500 }, { "epoch": 8.4, "learning_rate": 4.580149331928492e-05, "loss": 2.3287, "step": 2902000 }, { "epoch": 8.4, "learning_rate": 4.5800769671637644e-05, "loss": 2.3424, "step": 2902500 }, { "epoch": 8.4, "learning_rate": 4.5800046023990366e-05, "loss": 2.3447, "step": 2903000 }, { "epoch": 8.4, "learning_rate": 4.579932237634309e-05, "loss": 2.3357, "step": 2903500 }, { "epoch": 8.41, "learning_rate": 4.579859872869581e-05, "loss": 2.331, "step": 2904000 }, { "epoch": 8.41, "learning_rate": 4.579787652834383e-05, "loss": 2.3611, "step": 2904500 }, { "epoch": 8.41, "learning_rate": 4.579715288069656e-05, "loss": 2.3188, "step": 2905000 }, { "epoch": 8.41, "learning_rate": 4.5796429233049284e-05, "loss": 2.3564, "step": 2905500 }, { "epoch": 8.41, "learning_rate": 4.5795705585402006e-05, "loss": 2.3366, "step": 2906000 }, { "epoch": 8.41, "learning_rate": 4.579498193775473e-05, "loss": 2.3149, "step": 2906500 }, { "epoch": 8.41, "learning_rate": 4.5794259737402744e-05, "loss": 2.3446, "step": 2907000 }, { "epoch": 8.42, "learning_rate": 4.5793536089755466e-05, "loss": 2.3321, "step": 2907500 }, { "epoch": 8.42, "learning_rate": 4.579281244210819e-05, "loss": 2.3028, "step": 2908000 }, { "epoch": 8.42, "learning_rate": 4.579208879446091e-05, "loss": 2.3364, "step": 2908500 }, { "epoch": 8.42, "learning_rate": 4.579136514681363e-05, "loss": 2.3384, "step": 2909000 }, { "epoch": 8.42, "learning_rate": 4.5790642946461656e-05, "loss": 2.3425, "step": 2909500 }, { "epoch": 8.42, "learning_rate": 4.578991929881438e-05, "loss": 2.3377, "step": 2910000 }, { "epoch": 8.42, "learning_rate": 4.57891956511671e-05, "loss": 2.3518, "step": 2910500 }, { "epoch": 8.43, "learning_rate": 4.578847200351982e-05, "loss": 2.354, "step": 2911000 }, { "epoch": 8.43, "learning_rate": 4.5787748355872544e-05, "loss": 2.3488, "step": 2911500 }, { "epoch": 8.43, "learning_rate": 4.578702615552056e-05, "loss": 2.3563, "step": 2912000 }, { "epoch": 8.43, "learning_rate": 4.578630250787329e-05, "loss": 2.3692, "step": 2912500 }, { "epoch": 8.43, "learning_rate": 4.578557886022601e-05, "loss": 2.3398, "step": 2913000 }, { "epoch": 8.43, "learning_rate": 4.5784855212578733e-05, "loss": 2.3525, "step": 2913500 }, { "epoch": 8.43, "learning_rate": 4.5784133012226756e-05, "loss": 2.3504, "step": 2914000 }, { "epoch": 8.44, "learning_rate": 4.578340936457948e-05, "loss": 2.295, "step": 2914500 }, { "epoch": 8.44, "learning_rate": 4.57826857169322e-05, "loss": 2.3341, "step": 2915000 }, { "epoch": 8.44, "learning_rate": 4.578196206928492e-05, "loss": 2.3642, "step": 2915500 }, { "epoch": 8.44, "learning_rate": 4.5781238421637645e-05, "loss": 2.3089, "step": 2916000 }, { "epoch": 8.44, "learning_rate": 4.578051477399037e-05, "loss": 2.3208, "step": 2916500 }, { "epoch": 8.44, "learning_rate": 4.577979112634309e-05, "loss": 2.3486, "step": 2917000 }, { "epoch": 8.44, "learning_rate": 4.577906747869581e-05, "loss": 2.3332, "step": 2917500 }, { "epoch": 8.45, "learning_rate": 4.5778343831048534e-05, "loss": 2.3364, "step": 2918000 }, { "epoch": 8.45, "learning_rate": 4.577762018340126e-05, "loss": 2.3508, "step": 2918500 }, { "epoch": 8.45, "learning_rate": 4.5776896535753985e-05, "loss": 2.3599, "step": 2919000 }, { "epoch": 8.45, "learning_rate": 4.577617433540201e-05, "loss": 2.3508, "step": 2919500 }, { "epoch": 8.45, "learning_rate": 4.577545068775473e-05, "loss": 2.3523, "step": 2920000 }, { "epoch": 8.45, "learning_rate": 4.577472704010745e-05, "loss": 2.3496, "step": 2920500 }, { "epoch": 8.46, "learning_rate": 4.5774003392460174e-05, "loss": 2.3371, "step": 2921000 }, { "epoch": 8.46, "learning_rate": 4.5773279744812896e-05, "loss": 2.3311, "step": 2921500 }, { "epoch": 8.46, "learning_rate": 4.577255609716562e-05, "loss": 2.3434, "step": 2922000 }, { "epoch": 8.46, "learning_rate": 4.577183244951834e-05, "loss": 2.3412, "step": 2922500 }, { "epoch": 8.46, "learning_rate": 4.577110880187106e-05, "loss": 2.328, "step": 2923000 }, { "epoch": 8.46, "learning_rate": 4.5770385154223785e-05, "loss": 2.3311, "step": 2923500 }, { "epoch": 8.46, "learning_rate": 4.576966295387181e-05, "loss": 2.3301, "step": 2924000 }, { "epoch": 8.47, "learning_rate": 4.576893930622453e-05, "loss": 2.3461, "step": 2924500 }, { "epoch": 8.47, "learning_rate": 4.576821565857725e-05, "loss": 2.3246, "step": 2925000 }, { "epoch": 8.47, "learning_rate": 4.5767492010929974e-05, "loss": 2.3341, "step": 2925500 }, { "epoch": 8.47, "learning_rate": 4.5766768363282696e-05, "loss": 2.351, "step": 2926000 }, { "epoch": 8.47, "learning_rate": 4.576604471563542e-05, "loss": 2.3217, "step": 2926500 }, { "epoch": 8.47, "learning_rate": 4.576532251528344e-05, "loss": 2.3577, "step": 2927000 }, { "epoch": 8.47, "learning_rate": 4.576459886763616e-05, "loss": 2.3391, "step": 2927500 }, { "epoch": 8.48, "learning_rate": 4.5763876667284186e-05, "loss": 2.3249, "step": 2928000 }, { "epoch": 8.48, "learning_rate": 4.576315301963691e-05, "loss": 2.3475, "step": 2928500 }, { "epoch": 8.48, "learning_rate": 4.576242937198963e-05, "loss": 2.3351, "step": 2929000 }, { "epoch": 8.48, "learning_rate": 4.576170572434235e-05, "loss": 2.3342, "step": 2929500 }, { "epoch": 8.48, "learning_rate": 4.5760982076695074e-05, "loss": 2.3456, "step": 2930000 }, { "epoch": 8.48, "learning_rate": 4.57602584290478e-05, "loss": 2.3518, "step": 2930500 }, { "epoch": 8.48, "learning_rate": 4.575953478140052e-05, "loss": 2.3452, "step": 2931000 }, { "epoch": 8.49, "learning_rate": 4.575881113375324e-05, "loss": 2.3402, "step": 2931500 }, { "epoch": 8.49, "learning_rate": 4.5758087486105963e-05, "loss": 2.3211, "step": 2932000 }, { "epoch": 8.49, "learning_rate": 4.5757365285753986e-05, "loss": 2.3595, "step": 2932500 }, { "epoch": 8.49, "learning_rate": 4.575664163810671e-05, "loss": 2.3445, "step": 2933000 }, { "epoch": 8.49, "learning_rate": 4.5755919437754724e-05, "loss": 2.3538, "step": 2933500 }, { "epoch": 8.49, "learning_rate": 4.5755195790107446e-05, "loss": 2.3278, "step": 2934000 }, { "epoch": 8.49, "learning_rate": 4.5754472142460175e-05, "loss": 2.3375, "step": 2934500 }, { "epoch": 8.5, "learning_rate": 4.57537484948129e-05, "loss": 2.3459, "step": 2935000 }, { "epoch": 8.5, "learning_rate": 4.575302484716562e-05, "loss": 2.3311, "step": 2935500 }, { "epoch": 8.5, "learning_rate": 4.575230264681364e-05, "loss": 2.3487, "step": 2936000 }, { "epoch": 8.5, "learning_rate": 4.5751578999166364e-05, "loss": 2.3509, "step": 2936500 }, { "epoch": 8.5, "learning_rate": 4.5750855351519086e-05, "loss": 2.3366, "step": 2937000 }, { "epoch": 8.5, "learning_rate": 4.575013170387181e-05, "loss": 2.3392, "step": 2937500 }, { "epoch": 8.5, "learning_rate": 4.574940805622453e-05, "loss": 2.3349, "step": 2938000 }, { "epoch": 8.51, "learning_rate": 4.574868440857725e-05, "loss": 2.3271, "step": 2938500 }, { "epoch": 8.51, "learning_rate": 4.5747960760929975e-05, "loss": 2.3474, "step": 2939000 }, { "epoch": 8.51, "learning_rate": 4.57472371132827e-05, "loss": 2.3459, "step": 2939500 }, { "epoch": 8.51, "learning_rate": 4.574651491293071e-05, "loss": 2.3396, "step": 2940000 }, { "epoch": 8.51, "learning_rate": 4.574579126528344e-05, "loss": 2.3556, "step": 2940500 }, { "epoch": 8.51, "learning_rate": 4.5745067617636164e-05, "loss": 2.3154, "step": 2941000 }, { "epoch": 8.51, "learning_rate": 4.5744343969988886e-05, "loss": 2.3018, "step": 2941500 }, { "epoch": 8.52, "learning_rate": 4.5743620322341615e-05, "loss": 2.3469, "step": 2942000 }, { "epoch": 8.52, "learning_rate": 4.574289667469434e-05, "loss": 2.3574, "step": 2942500 }, { "epoch": 8.52, "learning_rate": 4.574217447434235e-05, "loss": 2.3353, "step": 2943000 }, { "epoch": 8.52, "learning_rate": 4.5741450826695075e-05, "loss": 2.3413, "step": 2943500 }, { "epoch": 8.52, "learning_rate": 4.57407271790478e-05, "loss": 2.3234, "step": 2944000 }, { "epoch": 8.52, "learning_rate": 4.574000353140052e-05, "loss": 2.3299, "step": 2944500 }, { "epoch": 8.52, "learning_rate": 4.573927988375324e-05, "loss": 2.3306, "step": 2945000 }, { "epoch": 8.53, "learning_rate": 4.5738557683401264e-05, "loss": 2.3282, "step": 2945500 }, { "epoch": 8.53, "learning_rate": 4.5737834035753987e-05, "loss": 2.3437, "step": 2946000 }, { "epoch": 8.53, "learning_rate": 4.573711038810671e-05, "loss": 2.3457, "step": 2946500 }, { "epoch": 8.53, "learning_rate": 4.573638674045943e-05, "loss": 2.3321, "step": 2947000 }, { "epoch": 8.53, "learning_rate": 4.573566309281215e-05, "loss": 2.3276, "step": 2947500 }, { "epoch": 8.53, "learning_rate": 4.5734939445164875e-05, "loss": 2.3808, "step": 2948000 }, { "epoch": 8.53, "learning_rate": 4.573421724481289e-05, "loss": 2.3262, "step": 2948500 }, { "epoch": 8.54, "learning_rate": 4.573349359716561e-05, "loss": 2.3371, "step": 2949000 }, { "epoch": 8.54, "learning_rate": 4.573276994951834e-05, "loss": 2.3306, "step": 2949500 }, { "epoch": 8.54, "learning_rate": 4.5732046301871064e-05, "loss": 2.3369, "step": 2950000 }, { "epoch": 8.54, "learning_rate": 4.5731322654223793e-05, "loss": 2.3352, "step": 2950500 }, { "epoch": 8.54, "learning_rate": 4.573060045387181e-05, "loss": 2.3214, "step": 2951000 }, { "epoch": 8.54, "learning_rate": 4.572987680622453e-05, "loss": 2.3374, "step": 2951500 }, { "epoch": 8.54, "learning_rate": 4.5729153158577254e-05, "loss": 2.3424, "step": 2952000 }, { "epoch": 8.55, "learning_rate": 4.5728429510929976e-05, "loss": 2.3416, "step": 2952500 }, { "epoch": 8.55, "learning_rate": 4.57277058632827e-05, "loss": 2.3262, "step": 2953000 }, { "epoch": 8.55, "learning_rate": 4.572698221563542e-05, "loss": 2.3348, "step": 2953500 }, { "epoch": 8.55, "learning_rate": 4.572625856798814e-05, "loss": 2.3342, "step": 2954000 }, { "epoch": 8.55, "learning_rate": 4.5725534920340865e-05, "loss": 2.3409, "step": 2954500 }, { "epoch": 8.55, "learning_rate": 4.572481271998889e-05, "loss": 2.3512, "step": 2955000 }, { "epoch": 8.55, "learning_rate": 4.572408907234161e-05, "loss": 2.34, "step": 2955500 }, { "epoch": 8.56, "learning_rate": 4.572336542469433e-05, "loss": 2.3317, "step": 2956000 }, { "epoch": 8.56, "learning_rate": 4.572264177704706e-05, "loss": 2.3258, "step": 2956500 }, { "epoch": 8.56, "learning_rate": 4.572191812939978e-05, "loss": 2.3574, "step": 2957000 }, { "epoch": 8.56, "learning_rate": 4.5721194481752505e-05, "loss": 2.3397, "step": 2957500 }, { "epoch": 8.56, "learning_rate": 4.572047228140052e-05, "loss": 2.3277, "step": 2958000 }, { "epoch": 8.56, "learning_rate": 4.571975008104854e-05, "loss": 2.347, "step": 2958500 }, { "epoch": 8.57, "learning_rate": 4.5719026433401265e-05, "loss": 2.3614, "step": 2959000 }, { "epoch": 8.57, "learning_rate": 4.571830278575399e-05, "loss": 2.3647, "step": 2959500 }, { "epoch": 8.57, "learning_rate": 4.571757913810671e-05, "loss": 2.3604, "step": 2960000 }, { "epoch": 8.57, "learning_rate": 4.571685549045943e-05, "loss": 2.3338, "step": 2960500 }, { "epoch": 8.57, "learning_rate": 4.5716131842812154e-05, "loss": 2.3338, "step": 2961000 }, { "epoch": 8.57, "learning_rate": 4.5715408195164876e-05, "loss": 2.335, "step": 2961500 }, { "epoch": 8.57, "learning_rate": 4.57146845475176e-05, "loss": 2.3363, "step": 2962000 }, { "epoch": 8.58, "learning_rate": 4.571396234716562e-05, "loss": 2.3441, "step": 2962500 }, { "epoch": 8.58, "learning_rate": 4.571323869951834e-05, "loss": 2.363, "step": 2963000 }, { "epoch": 8.58, "learning_rate": 4.5712515051871065e-05, "loss": 2.3327, "step": 2963500 }, { "epoch": 8.58, "learning_rate": 4.571179140422379e-05, "loss": 2.3503, "step": 2964000 }, { "epoch": 8.58, "learning_rate": 4.5711067756576517e-05, "loss": 2.3252, "step": 2964500 }, { "epoch": 8.58, "learning_rate": 4.571034410892924e-05, "loss": 2.3477, "step": 2965000 }, { "epoch": 8.58, "learning_rate": 4.5709621908577254e-05, "loss": 2.3178, "step": 2965500 }, { "epoch": 8.59, "learning_rate": 4.570889970822527e-05, "loss": 2.3352, "step": 2966000 }, { "epoch": 8.59, "learning_rate": 4.570817606057799e-05, "loss": 2.369, "step": 2966500 }, { "epoch": 8.59, "learning_rate": 4.5707453860226014e-05, "loss": 2.34, "step": 2967000 }, { "epoch": 8.59, "learning_rate": 4.570673021257874e-05, "loss": 2.338, "step": 2967500 }, { "epoch": 8.59, "learning_rate": 4.570600656493146e-05, "loss": 2.3399, "step": 2968000 }, { "epoch": 8.59, "learning_rate": 4.570528291728418e-05, "loss": 2.3486, "step": 2968500 }, { "epoch": 8.59, "learning_rate": 4.57045592696369e-05, "loss": 2.3466, "step": 2969000 }, { "epoch": 8.6, "learning_rate": 4.5703835621989626e-05, "loss": 2.3446, "step": 2969500 }, { "epoch": 8.6, "learning_rate": 4.570311197434235e-05, "loss": 2.3452, "step": 2970000 }, { "epoch": 8.6, "learning_rate": 4.570238832669507e-05, "loss": 2.3471, "step": 2970500 }, { "epoch": 8.6, "learning_rate": 4.570166467904779e-05, "loss": 2.3676, "step": 2971000 }, { "epoch": 8.6, "learning_rate": 4.570094103140052e-05, "loss": 2.3545, "step": 2971500 }, { "epoch": 8.6, "learning_rate": 4.5700217383753244e-05, "loss": 2.3423, "step": 2972000 }, { "epoch": 8.6, "learning_rate": 4.5699495183401266e-05, "loss": 2.3361, "step": 2972500 }, { "epoch": 8.61, "learning_rate": 4.569877153575399e-05, "loss": 2.3344, "step": 2973000 }, { "epoch": 8.61, "learning_rate": 4.569804788810671e-05, "loss": 2.3306, "step": 2973500 }, { "epoch": 8.61, "learning_rate": 4.569732424045943e-05, "loss": 2.3548, "step": 2974000 }, { "epoch": 8.61, "learning_rate": 4.5696600592812155e-05, "loss": 2.3419, "step": 2974500 }, { "epoch": 8.61, "learning_rate": 4.569587694516488e-05, "loss": 2.3102, "step": 2975000 }, { "epoch": 8.61, "learning_rate": 4.56951532975176e-05, "loss": 2.34, "step": 2975500 }, { "epoch": 8.61, "learning_rate": 4.569442964987032e-05, "loss": 2.3492, "step": 2976000 }, { "epoch": 8.62, "learning_rate": 4.5693706002223044e-05, "loss": 2.3369, "step": 2976500 }, { "epoch": 8.62, "learning_rate": 4.5692983801871066e-05, "loss": 2.3321, "step": 2977000 }, { "epoch": 8.62, "learning_rate": 4.569226160151908e-05, "loss": 2.3282, "step": 2977500 }, { "epoch": 8.62, "learning_rate": 4.5691537953871804e-05, "loss": 2.3258, "step": 2978000 }, { "epoch": 8.62, "learning_rate": 4.5690814306224526e-05, "loss": 2.3454, "step": 2978500 }, { "epoch": 8.62, "learning_rate": 4.569009065857725e-05, "loss": 2.3348, "step": 2979000 }, { "epoch": 8.62, "learning_rate": 4.568936701092998e-05, "loss": 2.3422, "step": 2979500 }, { "epoch": 8.63, "learning_rate": 4.568864481057799e-05, "loss": 2.373, "step": 2980000 }, { "epoch": 8.63, "learning_rate": 4.568792116293072e-05, "loss": 2.3475, "step": 2980500 }, { "epoch": 8.63, "learning_rate": 4.5687197515283444e-05, "loss": 2.3185, "step": 2981000 }, { "epoch": 8.63, "learning_rate": 4.5686473867636166e-05, "loss": 2.3517, "step": 2981500 }, { "epoch": 8.63, "learning_rate": 4.568575021998889e-05, "loss": 2.3324, "step": 2982000 }, { "epoch": 8.63, "learning_rate": 4.568502657234161e-05, "loss": 2.3439, "step": 2982500 }, { "epoch": 8.63, "learning_rate": 4.5684304371989626e-05, "loss": 2.3467, "step": 2983000 }, { "epoch": 8.64, "learning_rate": 4.568358072434235e-05, "loss": 2.3207, "step": 2983500 }, { "epoch": 8.64, "learning_rate": 4.568285707669507e-05, "loss": 2.3383, "step": 2984000 }, { "epoch": 8.64, "learning_rate": 4.568213342904779e-05, "loss": 2.3405, "step": 2984500 }, { "epoch": 8.64, "learning_rate": 4.5681411228695815e-05, "loss": 2.3216, "step": 2985000 }, { "epoch": 8.64, "learning_rate": 4.568068758104854e-05, "loss": 2.3484, "step": 2985500 }, { "epoch": 8.64, "learning_rate": 4.567996393340126e-05, "loss": 2.3062, "step": 2986000 }, { "epoch": 8.64, "learning_rate": 4.567924028575398e-05, "loss": 2.3421, "step": 2986500 }, { "epoch": 8.65, "learning_rate": 4.567851663810671e-05, "loss": 2.3532, "step": 2987000 }, { "epoch": 8.65, "learning_rate": 4.567779299045943e-05, "loss": 2.3285, "step": 2987500 }, { "epoch": 8.65, "learning_rate": 4.5677069342812156e-05, "loss": 2.3311, "step": 2988000 }, { "epoch": 8.65, "learning_rate": 4.567634569516488e-05, "loss": 2.3272, "step": 2988500 }, { "epoch": 8.65, "learning_rate": 4.56756220475176e-05, "loss": 2.3261, "step": 2989000 }, { "epoch": 8.65, "learning_rate": 4.567489839987032e-05, "loss": 2.3574, "step": 2989500 }, { "epoch": 8.65, "learning_rate": 4.5674176199518345e-05, "loss": 2.3527, "step": 2990000 }, { "epoch": 8.66, "learning_rate": 4.567345255187107e-05, "loss": 2.3265, "step": 2990500 }, { "epoch": 8.66, "learning_rate": 4.567272890422379e-05, "loss": 2.3338, "step": 2991000 }, { "epoch": 8.66, "learning_rate": 4.567200525657651e-05, "loss": 2.3705, "step": 2991500 }, { "epoch": 8.66, "learning_rate": 4.5671281608929234e-05, "loss": 2.3427, "step": 2992000 }, { "epoch": 8.66, "learning_rate": 4.5670557961281956e-05, "loss": 2.3713, "step": 2992500 }, { "epoch": 8.66, "learning_rate": 4.566983431363468e-05, "loss": 2.3371, "step": 2993000 }, { "epoch": 8.66, "learning_rate": 4.56691106659874e-05, "loss": 2.3455, "step": 2993500 }, { "epoch": 8.67, "learning_rate": 4.566838701834013e-05, "loss": 2.3458, "step": 2994000 }, { "epoch": 8.67, "learning_rate": 4.566766481798815e-05, "loss": 2.3319, "step": 2994500 }, { "epoch": 8.67, "learning_rate": 4.5666941170340874e-05, "loss": 2.3284, "step": 2995000 }, { "epoch": 8.67, "learning_rate": 4.566621896998889e-05, "loss": 2.3564, "step": 2995500 }, { "epoch": 8.67, "learning_rate": 4.566549532234161e-05, "loss": 2.3404, "step": 2996000 }, { "epoch": 8.67, "learning_rate": 4.5664771674694334e-05, "loss": 2.3539, "step": 2996500 }, { "epoch": 8.68, "learning_rate": 4.5664048027047056e-05, "loss": 2.327, "step": 2997000 }, { "epoch": 8.68, "learning_rate": 4.566332437939978e-05, "loss": 2.3317, "step": 2997500 }, { "epoch": 8.68, "learning_rate": 4.56626007317525e-05, "loss": 2.3549, "step": 2998000 }, { "epoch": 8.68, "learning_rate": 4.566187708410522e-05, "loss": 2.347, "step": 2998500 }, { "epoch": 8.68, "learning_rate": 4.566115343645795e-05, "loss": 2.3192, "step": 2999000 }, { "epoch": 8.68, "learning_rate": 4.5660429788810674e-05, "loss": 2.3373, "step": 2999500 }, { "epoch": 8.68, "learning_rate": 4.5659706141163396e-05, "loss": 2.3348, "step": 3000000 }, { "epoch": 8.69, "learning_rate": 4.565898394081141e-05, "loss": 2.3518, "step": 3000500 }, { "epoch": 8.69, "learning_rate": 4.5658260293164134e-05, "loss": 2.3185, "step": 3001000 }, { "epoch": 8.69, "learning_rate": 4.565753664551686e-05, "loss": 2.3406, "step": 3001500 }, { "epoch": 8.69, "learning_rate": 4.5656812997869585e-05, "loss": 2.3426, "step": 3002000 }, { "epoch": 8.69, "learning_rate": 4.565608935022231e-05, "loss": 2.3667, "step": 3002500 }, { "epoch": 8.69, "learning_rate": 4.565536570257503e-05, "loss": 2.3649, "step": 3003000 }, { "epoch": 8.69, "learning_rate": 4.565464350222305e-05, "loss": 2.3469, "step": 3003500 }, { "epoch": 8.7, "learning_rate": 4.5653919854575774e-05, "loss": 2.3449, "step": 3004000 }, { "epoch": 8.7, "learning_rate": 4.5653196206928497e-05, "loss": 2.3311, "step": 3004500 }, { "epoch": 8.7, "learning_rate": 4.565247255928122e-05, "loss": 2.3406, "step": 3005000 }, { "epoch": 8.7, "learning_rate": 4.565174891163394e-05, "loss": 2.3478, "step": 3005500 }, { "epoch": 8.7, "learning_rate": 4.5651026711281957e-05, "loss": 2.3479, "step": 3006000 }, { "epoch": 8.7, "learning_rate": 4.565030306363468e-05, "loss": 2.322, "step": 3006500 }, { "epoch": 8.7, "learning_rate": 4.56495794159874e-05, "loss": 2.3388, "step": 3007000 }, { "epoch": 8.71, "learning_rate": 4.5648857215635423e-05, "loss": 2.3523, "step": 3007500 }, { "epoch": 8.71, "learning_rate": 4.5648133567988146e-05, "loss": 2.3172, "step": 3008000 }, { "epoch": 8.71, "learning_rate": 4.564740992034087e-05, "loss": 2.3397, "step": 3008500 }, { "epoch": 8.71, "learning_rate": 4.56466862726936e-05, "loss": 2.3392, "step": 3009000 }, { "epoch": 8.71, "learning_rate": 4.564596262504632e-05, "loss": 2.3363, "step": 3009500 }, { "epoch": 8.71, "learning_rate": 4.564523897739904e-05, "loss": 2.3349, "step": 3010000 }, { "epoch": 8.71, "learning_rate": 4.5644515329751764e-05, "loss": 2.3183, "step": 3010500 }, { "epoch": 8.72, "learning_rate": 4.564379312939978e-05, "loss": 2.3577, "step": 3011000 }, { "epoch": 8.72, "learning_rate": 4.56430694817525e-05, "loss": 2.3549, "step": 3011500 }, { "epoch": 8.72, "learning_rate": 4.5642345834105224e-05, "loss": 2.3266, "step": 3012000 }, { "epoch": 8.72, "learning_rate": 4.564162218645795e-05, "loss": 2.3535, "step": 3012500 }, { "epoch": 8.72, "learning_rate": 4.5640898538810675e-05, "loss": 2.3437, "step": 3013000 }, { "epoch": 8.72, "learning_rate": 4.56401748911634e-05, "loss": 2.3423, "step": 3013500 }, { "epoch": 8.72, "learning_rate": 4.563945124351612e-05, "loss": 2.3264, "step": 3014000 }, { "epoch": 8.73, "learning_rate": 4.5638729043164135e-05, "loss": 2.3528, "step": 3014500 }, { "epoch": 8.73, "learning_rate": 4.563800539551686e-05, "loss": 2.3382, "step": 3015000 }, { "epoch": 8.73, "learning_rate": 4.563728174786958e-05, "loss": 2.3551, "step": 3015500 }, { "epoch": 8.73, "learning_rate": 4.56365581002223e-05, "loss": 2.3518, "step": 3016000 }, { "epoch": 8.73, "learning_rate": 4.563583445257503e-05, "loss": 2.3348, "step": 3016500 }, { "epoch": 8.73, "learning_rate": 4.563511080492775e-05, "loss": 2.3434, "step": 3017000 }, { "epoch": 8.73, "learning_rate": 4.5634387157280475e-05, "loss": 2.3586, "step": 3017500 }, { "epoch": 8.74, "learning_rate": 4.5633663509633204e-05, "loss": 2.3568, "step": 3018000 }, { "epoch": 8.74, "learning_rate": 4.563294130928122e-05, "loss": 2.3419, "step": 3018500 }, { "epoch": 8.74, "learning_rate": 4.563221766163394e-05, "loss": 2.3511, "step": 3019000 }, { "epoch": 8.74, "learning_rate": 4.5631494013986664e-05, "loss": 2.3184, "step": 3019500 }, { "epoch": 8.74, "learning_rate": 4.5630770366339386e-05, "loss": 2.3335, "step": 3020000 }, { "epoch": 8.74, "learning_rate": 4.563004671869211e-05, "loss": 2.3468, "step": 3020500 }, { "epoch": 8.74, "learning_rate": 4.562932307104483e-05, "loss": 2.335, "step": 3021000 }, { "epoch": 8.75, "learning_rate": 4.562860087069285e-05, "loss": 2.3166, "step": 3021500 }, { "epoch": 8.75, "learning_rate": 4.5627877223045575e-05, "loss": 2.3174, "step": 3022000 }, { "epoch": 8.75, "learning_rate": 4.56271535753983e-05, "loss": 2.3321, "step": 3022500 }, { "epoch": 8.75, "learning_rate": 4.562642992775102e-05, "loss": 2.3282, "step": 3023000 }, { "epoch": 8.75, "learning_rate": 4.562570628010374e-05, "loss": 2.3305, "step": 3023500 }, { "epoch": 8.75, "learning_rate": 4.562498263245647e-05, "loss": 2.3487, "step": 3024000 }, { "epoch": 8.75, "learning_rate": 4.562425898480919e-05, "loss": 2.3286, "step": 3024500 }, { "epoch": 8.76, "learning_rate": 4.562353678445721e-05, "loss": 2.3107, "step": 3025000 }, { "epoch": 8.76, "learning_rate": 4.562281313680993e-05, "loss": 2.3444, "step": 3025500 }, { "epoch": 8.76, "learning_rate": 4.562208948916265e-05, "loss": 2.316, "step": 3026000 }, { "epoch": 8.76, "learning_rate": 4.5621365841515376e-05, "loss": 2.3497, "step": 3026500 }, { "epoch": 8.76, "learning_rate": 4.5620642193868105e-05, "loss": 2.3321, "step": 3027000 }, { "epoch": 8.76, "learning_rate": 4.561991854622083e-05, "loss": 2.3445, "step": 3027500 }, { "epoch": 8.76, "learning_rate": 4.561919489857355e-05, "loss": 2.3382, "step": 3028000 }, { "epoch": 8.77, "learning_rate": 4.561847125092627e-05, "loss": 2.3469, "step": 3028500 }, { "epoch": 8.77, "learning_rate": 4.561775049786958e-05, "loss": 2.3304, "step": 3029000 }, { "epoch": 8.77, "learning_rate": 4.56170268502223e-05, "loss": 2.3495, "step": 3029500 }, { "epoch": 8.77, "learning_rate": 4.561630320257503e-05, "loss": 2.3233, "step": 3030000 }, { "epoch": 8.77, "learning_rate": 4.5615579554927754e-05, "loss": 2.3501, "step": 3030500 }, { "epoch": 8.77, "learning_rate": 4.5614855907280476e-05, "loss": 2.3422, "step": 3031000 }, { "epoch": 8.77, "learning_rate": 4.5614132259633205e-05, "loss": 2.3293, "step": 3031500 }, { "epoch": 8.78, "learning_rate": 4.561340861198593e-05, "loss": 2.3335, "step": 3032000 }, { "epoch": 8.78, "learning_rate": 4.561268496433865e-05, "loss": 2.336, "step": 3032500 }, { "epoch": 8.78, "learning_rate": 4.561196131669137e-05, "loss": 2.3304, "step": 3033000 }, { "epoch": 8.78, "learning_rate": 4.5611237669044094e-05, "loss": 2.3331, "step": 3033500 }, { "epoch": 8.78, "learning_rate": 4.561051546869211e-05, "loss": 2.352, "step": 3034000 }, { "epoch": 8.78, "learning_rate": 4.560979182104483e-05, "loss": 2.3391, "step": 3034500 }, { "epoch": 8.79, "learning_rate": 4.5609068173397554e-05, "loss": 2.3045, "step": 3035000 }, { "epoch": 8.79, "learning_rate": 4.560834452575028e-05, "loss": 2.333, "step": 3035500 }, { "epoch": 8.79, "learning_rate": 4.56076223253983e-05, "loss": 2.324, "step": 3036000 }, { "epoch": 8.79, "learning_rate": 4.560689867775102e-05, "loss": 2.3379, "step": 3036500 }, { "epoch": 8.79, "learning_rate": 4.560617503010374e-05, "loss": 2.3479, "step": 3037000 }, { "epoch": 8.79, "learning_rate": 4.5605451382456465e-05, "loss": 2.3426, "step": 3037500 }, { "epoch": 8.79, "learning_rate": 4.560472773480919e-05, "loss": 2.3231, "step": 3038000 }, { "epoch": 8.8, "learning_rate": 4.5604004087161916e-05, "loss": 2.3649, "step": 3038500 }, { "epoch": 8.8, "learning_rate": 4.560328188680993e-05, "loss": 2.3531, "step": 3039000 }, { "epoch": 8.8, "learning_rate": 4.5602558239162654e-05, "loss": 2.3452, "step": 3039500 }, { "epoch": 8.8, "learning_rate": 4.560183459151538e-05, "loss": 2.3391, "step": 3040000 }, { "epoch": 8.8, "learning_rate": 4.5601110943868105e-05, "loss": 2.3251, "step": 3040500 }, { "epoch": 8.8, "learning_rate": 4.560038874351612e-05, "loss": 2.3499, "step": 3041000 }, { "epoch": 8.8, "learning_rate": 4.559966509586884e-05, "loss": 2.3437, "step": 3041500 }, { "epoch": 8.81, "learning_rate": 4.559894289551686e-05, "loss": 2.3552, "step": 3042000 }, { "epoch": 8.81, "learning_rate": 4.559821924786958e-05, "loss": 2.348, "step": 3042500 }, { "epoch": 8.81, "learning_rate": 4.55974956002223e-05, "loss": 2.3375, "step": 3043000 }, { "epoch": 8.81, "learning_rate": 4.559677195257503e-05, "loss": 2.3441, "step": 3043500 }, { "epoch": 8.81, "learning_rate": 4.5596048304927754e-05, "loss": 2.3357, "step": 3044000 }, { "epoch": 8.81, "learning_rate": 4.559532465728048e-05, "loss": 2.3306, "step": 3044500 }, { "epoch": 8.81, "learning_rate": 4.559460245692849e-05, "loss": 2.3435, "step": 3045000 }, { "epoch": 8.82, "learning_rate": 4.5593878809281214e-05, "loss": 2.3273, "step": 3045500 }, { "epoch": 8.82, "learning_rate": 4.559315516163394e-05, "loss": 2.3489, "step": 3046000 }, { "epoch": 8.82, "learning_rate": 4.559243296128196e-05, "loss": 2.3352, "step": 3046500 }, { "epoch": 8.82, "learning_rate": 4.559170931363468e-05, "loss": 2.3499, "step": 3047000 }, { "epoch": 8.82, "learning_rate": 4.5590985665987403e-05, "loss": 2.3369, "step": 3047500 }, { "epoch": 8.82, "learning_rate": 4.5590263465635426e-05, "loss": 2.3442, "step": 3048000 }, { "epoch": 8.82, "learning_rate": 4.558953981798815e-05, "loss": 2.3258, "step": 3048500 }, { "epoch": 8.83, "learning_rate": 4.558881617034087e-05, "loss": 2.3411, "step": 3049000 }, { "epoch": 8.83, "learning_rate": 4.558809252269359e-05, "loss": 2.354, "step": 3049500 }, { "epoch": 8.83, "learning_rate": 4.5587368875046315e-05, "loss": 2.3305, "step": 3050000 }, { "epoch": 8.83, "learning_rate": 4.558664522739904e-05, "loss": 2.3442, "step": 3050500 }, { "epoch": 8.83, "learning_rate": 4.558592157975176e-05, "loss": 2.3281, "step": 3051000 }, { "epoch": 8.83, "learning_rate": 4.558519793210448e-05, "loss": 2.324, "step": 3051500 }, { "epoch": 8.83, "learning_rate": 4.5584474284457204e-05, "loss": 2.325, "step": 3052000 }, { "epoch": 8.84, "learning_rate": 4.558375063680993e-05, "loss": 2.3336, "step": 3052500 }, { "epoch": 8.84, "learning_rate": 4.5583026989162655e-05, "loss": 2.3677, "step": 3053000 }, { "epoch": 8.84, "learning_rate": 4.558230334151538e-05, "loss": 2.3192, "step": 3053500 }, { "epoch": 8.84, "learning_rate": 4.5581579693868106e-05, "loss": 2.3423, "step": 3054000 }, { "epoch": 8.84, "learning_rate": 4.558085604622083e-05, "loss": 2.3501, "step": 3054500 }, { "epoch": 8.84, "learning_rate": 4.558013239857355e-05, "loss": 2.325, "step": 3055000 }, { "epoch": 8.84, "learning_rate": 4.557940875092627e-05, "loss": 2.354, "step": 3055500 }, { "epoch": 8.85, "learning_rate": 4.557868799786958e-05, "loss": 2.3486, "step": 3056000 }, { "epoch": 8.85, "learning_rate": 4.557796435022231e-05, "loss": 2.3472, "step": 3056500 }, { "epoch": 8.85, "learning_rate": 4.557724070257503e-05, "loss": 2.3301, "step": 3057000 }, { "epoch": 8.85, "learning_rate": 4.5576517054927755e-05, "loss": 2.3546, "step": 3057500 }, { "epoch": 8.85, "learning_rate": 4.557579340728048e-05, "loss": 2.3476, "step": 3058000 }, { "epoch": 8.85, "learning_rate": 4.55750697596332e-05, "loss": 2.3549, "step": 3058500 }, { "epoch": 8.85, "learning_rate": 4.557434611198592e-05, "loss": 2.3248, "step": 3059000 }, { "epoch": 8.86, "learning_rate": 4.5573622464338644e-05, "loss": 2.3293, "step": 3059500 }, { "epoch": 8.86, "learning_rate": 4.5572898816691366e-05, "loss": 2.3373, "step": 3060000 }, { "epoch": 8.86, "learning_rate": 4.557217661633938e-05, "loss": 2.3444, "step": 3060500 }, { "epoch": 8.86, "learning_rate": 4.557145296869211e-05, "loss": 2.3362, "step": 3061000 }, { "epoch": 8.86, "learning_rate": 4.557072932104483e-05, "loss": 2.3682, "step": 3061500 }, { "epoch": 8.86, "learning_rate": 4.5570007120692855e-05, "loss": 2.3107, "step": 3062000 }, { "epoch": 8.86, "learning_rate": 4.556928492034087e-05, "loss": 2.3313, "step": 3062500 }, { "epoch": 8.87, "learning_rate": 4.556856127269359e-05, "loss": 2.3233, "step": 3063000 }, { "epoch": 8.87, "learning_rate": 4.5567837625046316e-05, "loss": 2.3137, "step": 3063500 }, { "epoch": 8.87, "learning_rate": 4.556711397739904e-05, "loss": 2.3336, "step": 3064000 }, { "epoch": 8.87, "learning_rate": 4.556639032975176e-05, "loss": 2.364, "step": 3064500 }, { "epoch": 8.87, "learning_rate": 4.556566668210448e-05, "loss": 2.3321, "step": 3065000 }, { "epoch": 8.87, "learning_rate": 4.556494303445721e-05, "loss": 2.3604, "step": 3065500 }, { "epoch": 8.87, "learning_rate": 4.5564219386809933e-05, "loss": 2.3225, "step": 3066000 }, { "epoch": 8.88, "learning_rate": 4.5563495739162656e-05, "loss": 2.3473, "step": 3066500 }, { "epoch": 8.88, "learning_rate": 4.556277209151538e-05, "loss": 2.3421, "step": 3067000 }, { "epoch": 8.88, "learning_rate": 4.55620484438681e-05, "loss": 2.3293, "step": 3067500 }, { "epoch": 8.88, "learning_rate": 4.556132479622082e-05, "loss": 2.3163, "step": 3068000 }, { "epoch": 8.88, "learning_rate": 4.556060114857355e-05, "loss": 2.3385, "step": 3068500 }, { "epoch": 8.88, "learning_rate": 4.555987894822157e-05, "loss": 2.353, "step": 3069000 }, { "epoch": 8.88, "learning_rate": 4.555915530057429e-05, "loss": 2.3116, "step": 3069500 }, { "epoch": 8.89, "learning_rate": 4.555843165292701e-05, "loss": 2.3098, "step": 3070000 }, { "epoch": 8.89, "learning_rate": 4.5557708005279734e-05, "loss": 2.3387, "step": 3070500 }, { "epoch": 8.89, "learning_rate": 4.555698435763246e-05, "loss": 2.3438, "step": 3071000 }, { "epoch": 8.89, "learning_rate": 4.5556260709985185e-05, "loss": 2.353, "step": 3071500 }, { "epoch": 8.89, "learning_rate": 4.5555539956928494e-05, "loss": 2.3587, "step": 3072000 }, { "epoch": 8.89, "learning_rate": 4.555481775657651e-05, "loss": 2.3371, "step": 3072500 }, { "epoch": 8.9, "learning_rate": 4.555409410892924e-05, "loss": 2.3098, "step": 3073000 }, { "epoch": 8.9, "learning_rate": 4.555337046128196e-05, "loss": 2.33, "step": 3073500 }, { "epoch": 8.9, "learning_rate": 4.555264681363468e-05, "loss": 2.3505, "step": 3074000 }, { "epoch": 8.9, "learning_rate": 4.5551923165987405e-05, "loss": 2.3379, "step": 3074500 }, { "epoch": 8.9, "learning_rate": 4.555120096563542e-05, "loss": 2.3775, "step": 3075000 }, { "epoch": 8.9, "learning_rate": 4.555047731798814e-05, "loss": 2.3421, "step": 3075500 }, { "epoch": 8.9, "learning_rate": 4.5549753670340865e-05, "loss": 2.3344, "step": 3076000 }, { "epoch": 8.91, "learning_rate": 4.5549030022693594e-05, "loss": 2.3399, "step": 3076500 }, { "epoch": 8.91, "learning_rate": 4.5548306375046316e-05, "loss": 2.3491, "step": 3077000 }, { "epoch": 8.91, "learning_rate": 4.554758272739904e-05, "loss": 2.3623, "step": 3077500 }, { "epoch": 8.91, "learning_rate": 4.554685907975176e-05, "loss": 2.3463, "step": 3078000 }, { "epoch": 8.91, "learning_rate": 4.554613543210449e-05, "loss": 2.3548, "step": 3078500 }, { "epoch": 8.91, "learning_rate": 4.554541178445721e-05, "loss": 2.3435, "step": 3079000 }, { "epoch": 8.91, "learning_rate": 4.554468958410523e-05, "loss": 2.3365, "step": 3079500 }, { "epoch": 8.92, "learning_rate": 4.554396593645795e-05, "loss": 2.3368, "step": 3080000 }, { "epoch": 8.92, "learning_rate": 4.554324228881067e-05, "loss": 2.323, "step": 3080500 }, { "epoch": 8.92, "learning_rate": 4.5542518641163394e-05, "loss": 2.3603, "step": 3081000 }, { "epoch": 8.92, "learning_rate": 4.5541794993516117e-05, "loss": 2.3366, "step": 3081500 }, { "epoch": 8.92, "learning_rate": 4.554107134586884e-05, "loss": 2.3476, "step": 3082000 }, { "epoch": 8.92, "learning_rate": 4.554034769822156e-05, "loss": 2.3432, "step": 3082500 }, { "epoch": 8.92, "learning_rate": 4.553962405057428e-05, "loss": 2.3468, "step": 3083000 }, { "epoch": 8.93, "learning_rate": 4.5538901850222306e-05, "loss": 2.3416, "step": 3083500 }, { "epoch": 8.93, "learning_rate": 4.5538178202575035e-05, "loss": 2.3346, "step": 3084000 }, { "epoch": 8.93, "learning_rate": 4.553745600222305e-05, "loss": 2.3343, "step": 3084500 }, { "epoch": 8.93, "learning_rate": 4.553673235457577e-05, "loss": 2.3469, "step": 3085000 }, { "epoch": 8.93, "learning_rate": 4.5536008706928495e-05, "loss": 2.336, "step": 3085500 }, { "epoch": 8.93, "learning_rate": 4.553528505928122e-05, "loss": 2.3553, "step": 3086000 }, { "epoch": 8.93, "learning_rate": 4.553456141163394e-05, "loss": 2.3435, "step": 3086500 }, { "epoch": 8.94, "learning_rate": 4.553383776398666e-05, "loss": 2.3799, "step": 3087000 }, { "epoch": 8.94, "learning_rate": 4.5533115563634684e-05, "loss": 2.3359, "step": 3087500 }, { "epoch": 8.94, "learning_rate": 4.55323933632827e-05, "loss": 2.3376, "step": 3088000 }, { "epoch": 8.94, "learning_rate": 4.553166971563542e-05, "loss": 2.3501, "step": 3088500 }, { "epoch": 8.94, "learning_rate": 4.5530946067988144e-05, "loss": 2.3458, "step": 3089000 }, { "epoch": 8.94, "learning_rate": 4.5530222420340866e-05, "loss": 2.3523, "step": 3089500 }, { "epoch": 8.94, "learning_rate": 4.552949877269359e-05, "loss": 2.3302, "step": 3090000 }, { "epoch": 8.95, "learning_rate": 4.552877512504631e-05, "loss": 2.3375, "step": 3090500 }, { "epoch": 8.95, "learning_rate": 4.552805147739904e-05, "loss": 2.3488, "step": 3091000 }, { "epoch": 8.95, "learning_rate": 4.552732782975176e-05, "loss": 2.344, "step": 3091500 }, { "epoch": 8.95, "learning_rate": 4.552660418210449e-05, "loss": 2.3455, "step": 3092000 }, { "epoch": 8.95, "learning_rate": 4.552588053445721e-05, "loss": 2.3389, "step": 3092500 }, { "epoch": 8.95, "learning_rate": 4.5525156886809935e-05, "loss": 2.353, "step": 3093000 }, { "epoch": 8.95, "learning_rate": 4.552443323916266e-05, "loss": 2.3455, "step": 3093500 }, { "epoch": 8.96, "learning_rate": 4.552371103881067e-05, "loss": 2.3485, "step": 3094000 }, { "epoch": 8.96, "learning_rate": 4.5522987391163395e-05, "loss": 2.3484, "step": 3094500 }, { "epoch": 8.96, "learning_rate": 4.552226374351612e-05, "loss": 2.3375, "step": 3095000 }, { "epoch": 8.96, "learning_rate": 4.552154009586884e-05, "loss": 2.3508, "step": 3095500 }, { "epoch": 8.96, "learning_rate": 4.552081789551686e-05, "loss": 2.3629, "step": 3096000 }, { "epoch": 8.96, "learning_rate": 4.5520094247869584e-05, "loss": 2.335, "step": 3096500 }, { "epoch": 8.96, "learning_rate": 4.5519370600222306e-05, "loss": 2.3455, "step": 3097000 }, { "epoch": 8.97, "learning_rate": 4.551864695257503e-05, "loss": 2.3369, "step": 3097500 }, { "epoch": 8.97, "learning_rate": 4.551792330492775e-05, "loss": 2.3592, "step": 3098000 }, { "epoch": 8.97, "learning_rate": 4.551719965728047e-05, "loss": 2.3394, "step": 3098500 }, { "epoch": 8.97, "learning_rate": 4.55164760096332e-05, "loss": 2.3569, "step": 3099000 }, { "epoch": 8.97, "learning_rate": 4.5515752361985924e-05, "loss": 2.3231, "step": 3099500 }, { "epoch": 8.97, "learning_rate": 4.5515028714338647e-05, "loss": 2.3389, "step": 3100000 }, { "epoch": 8.97, "learning_rate": 4.551430506669137e-05, "loss": 2.32, "step": 3100500 }, { "epoch": 8.98, "learning_rate": 4.551358286633939e-05, "loss": 2.341, "step": 3101000 }, { "epoch": 8.98, "learning_rate": 4.551285921869211e-05, "loss": 2.3236, "step": 3101500 }, { "epoch": 8.98, "learning_rate": 4.5512135571044836e-05, "loss": 2.3382, "step": 3102000 }, { "epoch": 8.98, "learning_rate": 4.551141337069285e-05, "loss": 2.3428, "step": 3102500 }, { "epoch": 8.98, "learning_rate": 4.551068972304557e-05, "loss": 2.3365, "step": 3103000 }, { "epoch": 8.98, "learning_rate": 4.5509966075398296e-05, "loss": 2.3366, "step": 3103500 }, { "epoch": 8.98, "learning_rate": 4.550924242775102e-05, "loss": 2.3411, "step": 3104000 }, { "epoch": 8.99, "learning_rate": 4.550851878010374e-05, "loss": 2.3268, "step": 3104500 }, { "epoch": 8.99, "learning_rate": 4.550779513245646e-05, "loss": 2.3292, "step": 3105000 }, { "epoch": 8.99, "learning_rate": 4.550707148480919e-05, "loss": 2.3428, "step": 3105500 }, { "epoch": 8.99, "learning_rate": 4.550634928445721e-05, "loss": 2.3362, "step": 3106000 }, { "epoch": 8.99, "learning_rate": 4.5505625636809936e-05, "loss": 2.3363, "step": 3106500 }, { "epoch": 8.99, "learning_rate": 4.550490198916266e-05, "loss": 2.324, "step": 3107000 }, { "epoch": 8.99, "learning_rate": 4.550417834151538e-05, "loss": 2.3453, "step": 3107500 }, { "epoch": 9.0, "learning_rate": 4.5503456141163396e-05, "loss": 2.3465, "step": 3108000 }, { "epoch": 9.0, "learning_rate": 4.550273249351612e-05, "loss": 2.3315, "step": 3108500 }, { "epoch": 9.0, "learning_rate": 4.550200884586884e-05, "loss": 2.3531, "step": 3109000 }, { "epoch": 9.0, "eval_accuracy": 0.6471148299204104, "eval_accuracy_mlm": 0.6092049587115826, "eval_accuracy_nsp": 0.8504489144931322, "eval_loss": 2.321585178375244, "eval_runtime": 330.7989, "eval_samples_per_second": 1319.188, "eval_steps_per_second": 54.967, "step": 3109248 }, { "epoch": 9.0, "learning_rate": 4.550128519822157e-05, "loss": 2.3074, "step": 3109500 }, { "epoch": 9.0, "learning_rate": 4.550056155057429e-05, "loss": 2.3264, "step": 3110000 }, { "epoch": 9.0, "learning_rate": 4.5499837902927014e-05, "loss": 2.3111, "step": 3110500 }, { "epoch": 9.01, "learning_rate": 4.5499114255279736e-05, "loss": 2.3162, "step": 3111000 }, { "epoch": 9.01, "learning_rate": 4.549839060763246e-05, "loss": 2.3384, "step": 3111500 }, { "epoch": 9.01, "learning_rate": 4.549766985457577e-05, "loss": 2.3095, "step": 3112000 }, { "epoch": 9.01, "learning_rate": 4.549694765422379e-05, "loss": 2.3044, "step": 3112500 }, { "epoch": 9.01, "learning_rate": 4.549622400657651e-05, "loss": 2.3092, "step": 3113000 }, { "epoch": 9.01, "learning_rate": 4.5495500358929234e-05, "loss": 2.3297, "step": 3113500 }, { "epoch": 9.01, "learning_rate": 4.549477671128196e-05, "loss": 2.31, "step": 3114000 }, { "epoch": 9.02, "learning_rate": 4.549405451092998e-05, "loss": 2.3126, "step": 3114500 }, { "epoch": 9.02, "learning_rate": 4.54933308632827e-05, "loss": 2.3192, "step": 3115000 }, { "epoch": 9.02, "learning_rate": 4.549260721563542e-05, "loss": 2.3338, "step": 3115500 }, { "epoch": 9.02, "learning_rate": 4.5491883567988145e-05, "loss": 2.332, "step": 3116000 }, { "epoch": 9.02, "learning_rate": 4.549115992034087e-05, "loss": 2.3382, "step": 3116500 }, { "epoch": 9.02, "learning_rate": 4.549043627269359e-05, "loss": 2.3242, "step": 3117000 }, { "epoch": 9.02, "learning_rate": 4.548971262504632e-05, "loss": 2.3122, "step": 3117500 }, { "epoch": 9.03, "learning_rate": 4.548898897739904e-05, "loss": 2.3076, "step": 3118000 }, { "epoch": 9.03, "learning_rate": 4.548826532975176e-05, "loss": 2.3187, "step": 3118500 }, { "epoch": 9.03, "learning_rate": 4.5487541682104485e-05, "loss": 2.3224, "step": 3119000 }, { "epoch": 9.03, "learning_rate": 4.548681803445721e-05, "loss": 2.3234, "step": 3119500 }, { "epoch": 9.03, "learning_rate": 4.548609438680993e-05, "loss": 2.3355, "step": 3120000 }, { "epoch": 9.03, "learning_rate": 4.548537073916265e-05, "loss": 2.3152, "step": 3120500 }, { "epoch": 9.03, "learning_rate": 4.5484647091515374e-05, "loss": 2.3239, "step": 3121000 }, { "epoch": 9.04, "learning_rate": 4.54839234438681e-05, "loss": 2.3238, "step": 3121500 }, { "epoch": 9.04, "learning_rate": 4.5483199796220826e-05, "loss": 2.3353, "step": 3122000 }, { "epoch": 9.04, "learning_rate": 4.548247614857355e-05, "loss": 2.2975, "step": 3122500 }, { "epoch": 9.04, "learning_rate": 4.548175394822157e-05, "loss": 2.3144, "step": 3123000 }, { "epoch": 9.04, "learning_rate": 4.548103030057429e-05, "loss": 2.3342, "step": 3123500 }, { "epoch": 9.04, "learning_rate": 4.5480306652927015e-05, "loss": 2.3554, "step": 3124000 }, { "epoch": 9.04, "learning_rate": 4.547958300527974e-05, "loss": 2.3197, "step": 3124500 }, { "epoch": 9.05, "learning_rate": 4.547885935763246e-05, "loss": 2.3259, "step": 3125000 }, { "epoch": 9.05, "learning_rate": 4.547813570998518e-05, "loss": 2.3229, "step": 3125500 }, { "epoch": 9.05, "learning_rate": 4.547741495692849e-05, "loss": 2.3403, "step": 3126000 }, { "epoch": 9.05, "learning_rate": 4.547669130928122e-05, "loss": 2.3158, "step": 3126500 }, { "epoch": 9.05, "learning_rate": 4.547596766163394e-05, "loss": 2.3352, "step": 3127000 }, { "epoch": 9.05, "learning_rate": 4.5475244013986664e-05, "loss": 2.3134, "step": 3127500 }, { "epoch": 9.05, "learning_rate": 4.5474520366339386e-05, "loss": 2.323, "step": 3128000 }, { "epoch": 9.06, "learning_rate": 4.547379671869211e-05, "loss": 2.3045, "step": 3128500 }, { "epoch": 9.06, "learning_rate": 4.547307307104484e-05, "loss": 2.3518, "step": 3129000 }, { "epoch": 9.06, "learning_rate": 4.547234942339756e-05, "loss": 2.34, "step": 3129500 }, { "epoch": 9.06, "learning_rate": 4.547162577575028e-05, "loss": 2.3034, "step": 3130000 }, { "epoch": 9.06, "learning_rate": 4.54709035753983e-05, "loss": 2.3032, "step": 3130500 }, { "epoch": 9.06, "learning_rate": 4.547017992775102e-05, "loss": 2.3127, "step": 3131000 }, { "epoch": 9.06, "learning_rate": 4.546945628010374e-05, "loss": 2.3171, "step": 3131500 }, { "epoch": 9.07, "learning_rate": 4.5468734079751764e-05, "loss": 2.3058, "step": 3132000 }, { "epoch": 9.07, "learning_rate": 4.5468010432104486e-05, "loss": 2.3094, "step": 3132500 }, { "epoch": 9.07, "learning_rate": 4.546728678445721e-05, "loss": 2.3274, "step": 3133000 }, { "epoch": 9.07, "learning_rate": 4.546656313680993e-05, "loss": 2.3076, "step": 3133500 }, { "epoch": 9.07, "learning_rate": 4.5465840936457946e-05, "loss": 2.3085, "step": 3134000 }, { "epoch": 9.07, "learning_rate": 4.546511728881067e-05, "loss": 2.3319, "step": 3134500 }, { "epoch": 9.07, "learning_rate": 4.54643936411634e-05, "loss": 2.3283, "step": 3135000 }, { "epoch": 9.08, "learning_rate": 4.546366999351612e-05, "loss": 2.3164, "step": 3135500 }, { "epoch": 9.08, "learning_rate": 4.546294634586884e-05, "loss": 2.3172, "step": 3136000 }, { "epoch": 9.08, "learning_rate": 4.546222269822157e-05, "loss": 2.3259, "step": 3136500 }, { "epoch": 9.08, "learning_rate": 4.546149905057429e-05, "loss": 2.3406, "step": 3137000 }, { "epoch": 9.08, "learning_rate": 4.5460775402927015e-05, "loss": 2.2911, "step": 3137500 }, { "epoch": 9.08, "learning_rate": 4.546005175527974e-05, "loss": 2.34, "step": 3138000 }, { "epoch": 9.08, "learning_rate": 4.545932955492775e-05, "loss": 2.2965, "step": 3138500 }, { "epoch": 9.09, "learning_rate": 4.5458605907280475e-05, "loss": 2.3136, "step": 3139000 }, { "epoch": 9.09, "learning_rate": 4.54578822596332e-05, "loss": 2.3031, "step": 3139500 }, { "epoch": 9.09, "learning_rate": 4.545716005928122e-05, "loss": 2.3059, "step": 3140000 }, { "epoch": 9.09, "learning_rate": 4.545643641163394e-05, "loss": 2.328, "step": 3140500 }, { "epoch": 9.09, "learning_rate": 4.5455712763986664e-05, "loss": 2.3073, "step": 3141000 }, { "epoch": 9.09, "learning_rate": 4.545498911633939e-05, "loss": 2.3333, "step": 3141500 }, { "epoch": 9.09, "learning_rate": 4.545426546869211e-05, "loss": 2.3364, "step": 3142000 }, { "epoch": 9.1, "learning_rate": 4.5453543268340124e-05, "loss": 2.3182, "step": 3142500 }, { "epoch": 9.1, "learning_rate": 4.545281962069285e-05, "loss": 2.3167, "step": 3143000 }, { "epoch": 9.1, "learning_rate": 4.545209597304557e-05, "loss": 2.3387, "step": 3143500 }, { "epoch": 9.1, "learning_rate": 4.54513723253983e-05, "loss": 2.3085, "step": 3144000 }, { "epoch": 9.1, "learning_rate": 4.545064867775102e-05, "loss": 2.3421, "step": 3144500 }, { "epoch": 9.1, "learning_rate": 4.544992503010375e-05, "loss": 2.322, "step": 3145000 }, { "epoch": 9.1, "learning_rate": 4.544920138245647e-05, "loss": 2.3241, "step": 3145500 }, { "epoch": 9.11, "learning_rate": 4.5448477734809194e-05, "loss": 2.3218, "step": 3146000 }, { "epoch": 9.11, "learning_rate": 4.5447754087161916e-05, "loss": 2.3209, "step": 3146500 }, { "epoch": 9.11, "learning_rate": 4.5447033334105225e-05, "loss": 2.3214, "step": 3147000 }, { "epoch": 9.11, "learning_rate": 4.544631113375325e-05, "loss": 2.3297, "step": 3147500 }, { "epoch": 9.11, "learning_rate": 4.544558748610597e-05, "loss": 2.3312, "step": 3148000 }, { "epoch": 9.11, "learning_rate": 4.544486383845869e-05, "loss": 2.3224, "step": 3148500 }, { "epoch": 9.12, "learning_rate": 4.5444140190811414e-05, "loss": 2.3232, "step": 3149000 }, { "epoch": 9.12, "learning_rate": 4.5443416543164136e-05, "loss": 2.3052, "step": 3149500 }, { "epoch": 9.12, "learning_rate": 4.544269289551686e-05, "loss": 2.3185, "step": 3150000 }, { "epoch": 9.12, "learning_rate": 4.544196924786958e-05, "loss": 2.315, "step": 3150500 }, { "epoch": 9.12, "learning_rate": 4.54412456002223e-05, "loss": 2.342, "step": 3151000 }, { "epoch": 9.12, "learning_rate": 4.544052484716562e-05, "loss": 2.322, "step": 3151500 }, { "epoch": 9.12, "learning_rate": 4.543980119951835e-05, "loss": 2.3044, "step": 3152000 }, { "epoch": 9.13, "learning_rate": 4.543907755187107e-05, "loss": 2.3258, "step": 3152500 }, { "epoch": 9.13, "learning_rate": 4.543835390422379e-05, "loss": 2.3254, "step": 3153000 }, { "epoch": 9.13, "learning_rate": 4.5437630256576514e-05, "loss": 2.3043, "step": 3153500 }, { "epoch": 9.13, "learning_rate": 4.5436906608929236e-05, "loss": 2.3072, "step": 3154000 }, { "epoch": 9.13, "learning_rate": 4.543618296128196e-05, "loss": 2.3171, "step": 3154500 }, { "epoch": 9.13, "learning_rate": 4.543545931363468e-05, "loss": 2.2927, "step": 3155000 }, { "epoch": 9.13, "learning_rate": 4.54347356659874e-05, "loss": 2.334, "step": 3155500 }, { "epoch": 9.14, "learning_rate": 4.5434012018340125e-05, "loss": 2.3401, "step": 3156000 }, { "epoch": 9.14, "learning_rate": 4.543328837069285e-05, "loss": 2.331, "step": 3156500 }, { "epoch": 9.14, "learning_rate": 4.543256472304557e-05, "loss": 2.3019, "step": 3157000 }, { "epoch": 9.14, "learning_rate": 4.543184252269359e-05, "loss": 2.3396, "step": 3157500 }, { "epoch": 9.14, "learning_rate": 4.5431118875046314e-05, "loss": 2.327, "step": 3158000 }, { "epoch": 9.14, "learning_rate": 4.5430395227399037e-05, "loss": 2.3271, "step": 3158500 }, { "epoch": 9.14, "learning_rate": 4.5429671579751766e-05, "loss": 2.3268, "step": 3159000 }, { "epoch": 9.15, "learning_rate": 4.542894793210449e-05, "loss": 2.3335, "step": 3159500 }, { "epoch": 9.15, "learning_rate": 4.54282257317525e-05, "loss": 2.3091, "step": 3160000 }, { "epoch": 9.15, "learning_rate": 4.5427502084105226e-05, "loss": 2.342, "step": 3160500 }, { "epoch": 9.15, "learning_rate": 4.542677843645795e-05, "loss": 2.3292, "step": 3161000 }, { "epoch": 9.15, "learning_rate": 4.542605478881068e-05, "loss": 2.3209, "step": 3161500 }, { "epoch": 9.15, "learning_rate": 4.54253311411634e-05, "loss": 2.3442, "step": 3162000 }, { "epoch": 9.15, "learning_rate": 4.542460749351612e-05, "loss": 2.3316, "step": 3162500 }, { "epoch": 9.16, "learning_rate": 4.5423883845868844e-05, "loss": 2.3228, "step": 3163000 }, { "epoch": 9.16, "learning_rate": 4.5423160198221566e-05, "loss": 2.337, "step": 3163500 }, { "epoch": 9.16, "learning_rate": 4.542243655057429e-05, "loss": 2.3141, "step": 3164000 }, { "epoch": 9.16, "learning_rate": 4.5421714350222304e-05, "loss": 2.3465, "step": 3164500 }, { "epoch": 9.16, "learning_rate": 4.5420992149870326e-05, "loss": 2.3229, "step": 3165000 }, { "epoch": 9.16, "learning_rate": 4.542026850222305e-05, "loss": 2.329, "step": 3165500 }, { "epoch": 9.16, "learning_rate": 4.541954485457577e-05, "loss": 2.3211, "step": 3166000 }, { "epoch": 9.17, "learning_rate": 4.54188212069285e-05, "loss": 2.3297, "step": 3166500 }, { "epoch": 9.17, "learning_rate": 4.541809755928122e-05, "loss": 2.3212, "step": 3167000 }, { "epoch": 9.17, "learning_rate": 4.5417373911633944e-05, "loss": 2.3189, "step": 3167500 }, { "epoch": 9.17, "learning_rate": 4.5416650263986666e-05, "loss": 2.337, "step": 3168000 }, { "epoch": 9.17, "learning_rate": 4.541592661633939e-05, "loss": 2.3106, "step": 3168500 }, { "epoch": 9.17, "learning_rate": 4.541520296869211e-05, "loss": 2.3399, "step": 3169000 }, { "epoch": 9.17, "learning_rate": 4.5414480768340126e-05, "loss": 2.3218, "step": 3169500 }, { "epoch": 9.18, "learning_rate": 4.541375712069285e-05, "loss": 2.3187, "step": 3170000 }, { "epoch": 9.18, "learning_rate": 4.541303347304558e-05, "loss": 2.3224, "step": 3170500 }, { "epoch": 9.18, "learning_rate": 4.54123098253983e-05, "loss": 2.3073, "step": 3171000 }, { "epoch": 9.18, "learning_rate": 4.5411587625046315e-05, "loss": 2.3142, "step": 3171500 }, { "epoch": 9.18, "learning_rate": 4.541086397739904e-05, "loss": 2.3431, "step": 3172000 }, { "epoch": 9.18, "learning_rate": 4.541014032975176e-05, "loss": 2.3134, "step": 3172500 }, { "epoch": 9.18, "learning_rate": 4.540941668210448e-05, "loss": 2.3265, "step": 3173000 }, { "epoch": 9.19, "learning_rate": 4.5408693034457204e-05, "loss": 2.3411, "step": 3173500 }, { "epoch": 9.19, "learning_rate": 4.540796938680993e-05, "loss": 2.3345, "step": 3174000 }, { "epoch": 9.19, "learning_rate": 4.540724718645795e-05, "loss": 2.3126, "step": 3174500 }, { "epoch": 9.19, "learning_rate": 4.540652498610597e-05, "loss": 2.3293, "step": 3175000 }, { "epoch": 9.19, "learning_rate": 4.5405802785753986e-05, "loss": 2.3364, "step": 3175500 }, { "epoch": 9.19, "learning_rate": 4.540507913810671e-05, "loss": 2.3453, "step": 3176000 }, { "epoch": 9.19, "learning_rate": 4.540435549045943e-05, "loss": 2.3358, "step": 3176500 }, { "epoch": 9.2, "learning_rate": 4.540363184281215e-05, "loss": 2.3236, "step": 3177000 }, { "epoch": 9.2, "learning_rate": 4.5402908195164875e-05, "loss": 2.3215, "step": 3177500 }, { "epoch": 9.2, "learning_rate": 4.5402184547517604e-05, "loss": 2.3047, "step": 3178000 }, { "epoch": 9.2, "learning_rate": 4.540146089987033e-05, "loss": 2.3325, "step": 3178500 }, { "epoch": 9.2, "learning_rate": 4.540073725222305e-05, "loss": 2.3281, "step": 3179000 }, { "epoch": 9.2, "learning_rate": 4.540001360457577e-05, "loss": 2.3146, "step": 3179500 }, { "epoch": 9.2, "learning_rate": 4.539928995692849e-05, "loss": 2.347, "step": 3180000 }, { "epoch": 9.21, "learning_rate": 4.539856775657651e-05, "loss": 2.327, "step": 3180500 }, { "epoch": 9.21, "learning_rate": 4.539784410892923e-05, "loss": 2.3131, "step": 3181000 }, { "epoch": 9.21, "learning_rate": 4.539712046128196e-05, "loss": 2.3303, "step": 3181500 }, { "epoch": 9.21, "learning_rate": 4.539639681363468e-05, "loss": 2.3016, "step": 3182000 }, { "epoch": 9.21, "learning_rate": 4.5395673165987405e-05, "loss": 2.3304, "step": 3182500 }, { "epoch": 9.21, "learning_rate": 4.539494951834013e-05, "loss": 2.3241, "step": 3183000 }, { "epoch": 9.21, "learning_rate": 4.539422587069285e-05, "loss": 2.3017, "step": 3183500 }, { "epoch": 9.22, "learning_rate": 4.539350222304558e-05, "loss": 2.3007, "step": 3184000 }, { "epoch": 9.22, "learning_rate": 4.53927785753983e-05, "loss": 2.3205, "step": 3184500 }, { "epoch": 9.22, "learning_rate": 4.5392056375046316e-05, "loss": 2.3351, "step": 3185000 }, { "epoch": 9.22, "learning_rate": 4.539133417469433e-05, "loss": 2.313, "step": 3185500 }, { "epoch": 9.22, "learning_rate": 4.5390610527047054e-05, "loss": 2.3406, "step": 3186000 }, { "epoch": 9.22, "learning_rate": 4.5389886879399776e-05, "loss": 2.3541, "step": 3186500 }, { "epoch": 9.23, "learning_rate": 4.5389163231752505e-05, "loss": 2.324, "step": 3187000 }, { "epoch": 9.23, "learning_rate": 4.538844103140052e-05, "loss": 2.352, "step": 3187500 }, { "epoch": 9.23, "learning_rate": 4.538771738375324e-05, "loss": 2.3345, "step": 3188000 }, { "epoch": 9.23, "learning_rate": 4.5386993736105965e-05, "loss": 2.3555, "step": 3188500 }, { "epoch": 9.23, "learning_rate": 4.5386270088458694e-05, "loss": 2.3334, "step": 3189000 }, { "epoch": 9.23, "learning_rate": 4.5385546440811416e-05, "loss": 2.3158, "step": 3189500 }, { "epoch": 9.23, "learning_rate": 4.538482424045943e-05, "loss": 2.3262, "step": 3190000 }, { "epoch": 9.24, "learning_rate": 4.5384100592812154e-05, "loss": 2.308, "step": 3190500 }, { "epoch": 9.24, "learning_rate": 4.5383376945164876e-05, "loss": 2.321, "step": 3191000 }, { "epoch": 9.24, "learning_rate": 4.5382653297517605e-05, "loss": 2.3468, "step": 3191500 }, { "epoch": 9.24, "learning_rate": 4.538192964987033e-05, "loss": 2.332, "step": 3192000 }, { "epoch": 9.24, "learning_rate": 4.538120600222305e-05, "loss": 2.3414, "step": 3192500 }, { "epoch": 9.24, "learning_rate": 4.538048235457577e-05, "loss": 2.3375, "step": 3193000 }, { "epoch": 9.24, "learning_rate": 4.5379758706928494e-05, "loss": 2.3032, "step": 3193500 }, { "epoch": 9.25, "learning_rate": 4.537903650657651e-05, "loss": 2.3318, "step": 3194000 }, { "epoch": 9.25, "learning_rate": 4.537831285892923e-05, "loss": 2.3248, "step": 3194500 }, { "epoch": 9.25, "learning_rate": 4.5377589211281954e-05, "loss": 2.3115, "step": 3195000 }, { "epoch": 9.25, "learning_rate": 4.5376865563634676e-05, "loss": 2.3098, "step": 3195500 }, { "epoch": 9.25, "learning_rate": 4.5376141915987405e-05, "loss": 2.3247, "step": 3196000 }, { "epoch": 9.25, "learning_rate": 4.537541826834013e-05, "loss": 2.3106, "step": 3196500 }, { "epoch": 9.25, "learning_rate": 4.537469462069286e-05, "loss": 2.3548, "step": 3197000 }, { "epoch": 9.26, "learning_rate": 4.537397097304558e-05, "loss": 2.3418, "step": 3197500 }, { "epoch": 9.26, "learning_rate": 4.5373248772693594e-05, "loss": 2.3158, "step": 3198000 }, { "epoch": 9.26, "learning_rate": 4.537252512504632e-05, "loss": 2.3175, "step": 3198500 }, { "epoch": 9.26, "learning_rate": 4.537180147739904e-05, "loss": 2.3271, "step": 3199000 }, { "epoch": 9.26, "learning_rate": 4.537107782975176e-05, "loss": 2.3187, "step": 3199500 }, { "epoch": 9.26, "learning_rate": 4.5370354182104483e-05, "loss": 2.3044, "step": 3200000 }, { "epoch": 9.26, "learning_rate": 4.5369630534457206e-05, "loss": 2.3259, "step": 3200500 }, { "epoch": 9.27, "learning_rate": 4.536890688680993e-05, "loss": 2.3334, "step": 3201000 }, { "epoch": 9.27, "learning_rate": 4.536818468645795e-05, "loss": 2.3338, "step": 3201500 }, { "epoch": 9.27, "learning_rate": 4.536746103881067e-05, "loss": 2.3179, "step": 3202000 }, { "epoch": 9.27, "learning_rate": 4.5366737391163395e-05, "loss": 2.3413, "step": 3202500 }, { "epoch": 9.27, "learning_rate": 4.536601374351612e-05, "loss": 2.3272, "step": 3203000 }, { "epoch": 9.27, "learning_rate": 4.536529009586884e-05, "loss": 2.3142, "step": 3203500 }, { "epoch": 9.27, "learning_rate": 4.536456644822157e-05, "loss": 2.3337, "step": 3204000 }, { "epoch": 9.28, "learning_rate": 4.536384280057429e-05, "loss": 2.3114, "step": 3204500 }, { "epoch": 9.28, "learning_rate": 4.536311915292701e-05, "loss": 2.3384, "step": 3205000 }, { "epoch": 9.28, "learning_rate": 4.5362395505279735e-05, "loss": 2.333, "step": 3205500 }, { "epoch": 9.28, "learning_rate": 4.536167185763246e-05, "loss": 2.3118, "step": 3206000 }, { "epoch": 9.28, "learning_rate": 4.536094820998518e-05, "loss": 2.3626, "step": 3206500 }, { "epoch": 9.28, "learning_rate": 4.536022456233791e-05, "loss": 2.3138, "step": 3207000 }, { "epoch": 9.28, "learning_rate": 4.535950091469063e-05, "loss": 2.3337, "step": 3207500 }, { "epoch": 9.29, "learning_rate": 4.535877726704335e-05, "loss": 2.3526, "step": 3208000 }, { "epoch": 9.29, "learning_rate": 4.5358053619396075e-05, "loss": 2.3561, "step": 3208500 }, { "epoch": 9.29, "learning_rate": 4.53573299717488e-05, "loss": 2.337, "step": 3209000 }, { "epoch": 9.29, "learning_rate": 4.535660777139681e-05, "loss": 2.3327, "step": 3209500 }, { "epoch": 9.29, "learning_rate": 4.5355884123749535e-05, "loss": 2.3276, "step": 3210000 }, { "epoch": 9.29, "learning_rate": 4.535516047610226e-05, "loss": 2.3263, "step": 3210500 }, { "epoch": 9.29, "learning_rate": 4.535443827575028e-05, "loss": 2.323, "step": 3211000 }, { "epoch": 9.3, "learning_rate": 4.535371462810301e-05, "loss": 2.321, "step": 3211500 }, { "epoch": 9.3, "learning_rate": 4.5352992427751024e-05, "loss": 2.3055, "step": 3212000 }, { "epoch": 9.3, "learning_rate": 4.5352268780103746e-05, "loss": 2.3194, "step": 3212500 }, { "epoch": 9.3, "learning_rate": 4.535154513245647e-05, "loss": 2.3113, "step": 3213000 }, { "epoch": 9.3, "learning_rate": 4.535082148480919e-05, "loss": 2.3079, "step": 3213500 }, { "epoch": 9.3, "learning_rate": 4.535009783716191e-05, "loss": 2.3454, "step": 3214000 }, { "epoch": 9.3, "learning_rate": 4.5349374189514635e-05, "loss": 2.3223, "step": 3214500 }, { "epoch": 9.31, "learning_rate": 4.534865054186736e-05, "loss": 2.3148, "step": 3215000 }, { "epoch": 9.31, "learning_rate": 4.534792689422008e-05, "loss": 2.3045, "step": 3215500 }, { "epoch": 9.31, "learning_rate": 4.53472046938681e-05, "loss": 2.3331, "step": 3216000 }, { "epoch": 9.31, "learning_rate": 4.5346481046220824e-05, "loss": 2.3267, "step": 3216500 }, { "epoch": 9.31, "learning_rate": 4.5345757398573547e-05, "loss": 2.3109, "step": 3217000 }, { "epoch": 9.31, "learning_rate": 4.534503519822156e-05, "loss": 2.3417, "step": 3217500 }, { "epoch": 9.31, "learning_rate": 4.5344311550574284e-05, "loss": 2.29, "step": 3218000 }, { "epoch": 9.32, "learning_rate": 4.534358790292701e-05, "loss": 2.3321, "step": 3218500 }, { "epoch": 9.32, "learning_rate": 4.5342864255279736e-05, "loss": 2.3146, "step": 3219000 }, { "epoch": 9.32, "learning_rate": 4.534214060763246e-05, "loss": 2.3137, "step": 3219500 }, { "epoch": 9.32, "learning_rate": 4.534141695998518e-05, "loss": 2.3171, "step": 3220000 }, { "epoch": 9.32, "learning_rate": 4.534069331233791e-05, "loss": 2.3326, "step": 3220500 }, { "epoch": 9.32, "learning_rate": 4.533996966469063e-05, "loss": 2.3355, "step": 3221000 }, { "epoch": 9.32, "learning_rate": 4.5339246017043354e-05, "loss": 2.3328, "step": 3221500 }, { "epoch": 9.33, "learning_rate": 4.533852381669137e-05, "loss": 2.3281, "step": 3222000 }, { "epoch": 9.33, "learning_rate": 4.533780016904409e-05, "loss": 2.3171, "step": 3222500 }, { "epoch": 9.33, "learning_rate": 4.5337076521396814e-05, "loss": 2.3352, "step": 3223000 }, { "epoch": 9.33, "learning_rate": 4.5336354321044836e-05, "loss": 2.3305, "step": 3223500 }, { "epoch": 9.33, "learning_rate": 4.533563067339756e-05, "loss": 2.3223, "step": 3224000 }, { "epoch": 9.33, "learning_rate": 4.533490702575028e-05, "loss": 2.3145, "step": 3224500 }, { "epoch": 9.34, "learning_rate": 4.5334183378103e-05, "loss": 2.3265, "step": 3225000 }, { "epoch": 9.34, "learning_rate": 4.5333459730455725e-05, "loss": 2.3075, "step": 3225500 }, { "epoch": 9.34, "learning_rate": 4.5332736082808454e-05, "loss": 2.322, "step": 3226000 }, { "epoch": 9.34, "learning_rate": 4.5332012435161176e-05, "loss": 2.32, "step": 3226500 }, { "epoch": 9.34, "learning_rate": 4.53312887875139e-05, "loss": 2.3182, "step": 3227000 }, { "epoch": 9.34, "learning_rate": 4.5330566587161914e-05, "loss": 2.344, "step": 3227500 }, { "epoch": 9.34, "learning_rate": 4.5329844386809936e-05, "loss": 2.3179, "step": 3228000 }, { "epoch": 9.35, "learning_rate": 4.532912073916266e-05, "loss": 2.3076, "step": 3228500 }, { "epoch": 9.35, "learning_rate": 4.532839709151538e-05, "loss": 2.3214, "step": 3229000 }, { "epoch": 9.35, "learning_rate": 4.53276734438681e-05, "loss": 2.2845, "step": 3229500 }, { "epoch": 9.35, "learning_rate": 4.532695124351612e-05, "loss": 2.3316, "step": 3230000 }, { "epoch": 9.35, "learning_rate": 4.532622759586884e-05, "loss": 2.3242, "step": 3230500 }, { "epoch": 9.35, "learning_rate": 4.532550394822156e-05, "loss": 2.3387, "step": 3231000 }, { "epoch": 9.35, "learning_rate": 4.5324780300574285e-05, "loss": 2.322, "step": 3231500 }, { "epoch": 9.36, "learning_rate": 4.532405665292701e-05, "loss": 2.3275, "step": 3232000 }, { "epoch": 9.36, "learning_rate": 4.5323333005279736e-05, "loss": 2.3034, "step": 3232500 }, { "epoch": 9.36, "learning_rate": 4.532260935763246e-05, "loss": 2.3294, "step": 3233000 }, { "epoch": 9.36, "learning_rate": 4.5321887157280474e-05, "loss": 2.343, "step": 3233500 }, { "epoch": 9.36, "learning_rate": 4.53211635096332e-05, "loss": 2.3193, "step": 3234000 }, { "epoch": 9.36, "learning_rate": 4.5320439861985925e-05, "loss": 2.3292, "step": 3234500 }, { "epoch": 9.36, "learning_rate": 4.531971621433865e-05, "loss": 2.3256, "step": 3235000 }, { "epoch": 9.37, "learning_rate": 4.531899256669137e-05, "loss": 2.3216, "step": 3235500 }, { "epoch": 9.37, "learning_rate": 4.531826891904409e-05, "loss": 2.3208, "step": 3236000 }, { "epoch": 9.37, "learning_rate": 4.5317545271396814e-05, "loss": 2.3328, "step": 3236500 }, { "epoch": 9.37, "learning_rate": 4.531682162374954e-05, "loss": 2.3013, "step": 3237000 }, { "epoch": 9.37, "learning_rate": 4.531609797610226e-05, "loss": 2.33, "step": 3237500 }, { "epoch": 9.37, "learning_rate": 4.531537577575028e-05, "loss": 2.3283, "step": 3238000 }, { "epoch": 9.37, "learning_rate": 4.5314652128103003e-05, "loss": 2.33, "step": 3238500 }, { "epoch": 9.38, "learning_rate": 4.5313928480455726e-05, "loss": 2.3104, "step": 3239000 }, { "epoch": 9.38, "learning_rate": 4.531320483280845e-05, "loss": 2.3094, "step": 3239500 }, { "epoch": 9.38, "learning_rate": 4.531248118516117e-05, "loss": 2.3434, "step": 3240000 }, { "epoch": 9.38, "learning_rate": 4.5311760432104486e-05, "loss": 2.3084, "step": 3240500 }, { "epoch": 9.38, "learning_rate": 4.531103678445721e-05, "loss": 2.3363, "step": 3241000 }, { "epoch": 9.38, "learning_rate": 4.531031458410523e-05, "loss": 2.345, "step": 3241500 }, { "epoch": 9.38, "learning_rate": 4.530959093645795e-05, "loss": 2.3257, "step": 3242000 }, { "epoch": 9.39, "learning_rate": 4.5308867288810675e-05, "loss": 2.3451, "step": 3242500 }, { "epoch": 9.39, "learning_rate": 4.53081436411634e-05, "loss": 2.3438, "step": 3243000 }, { "epoch": 9.39, "learning_rate": 4.530741999351612e-05, "loss": 2.3377, "step": 3243500 }, { "epoch": 9.39, "learning_rate": 4.530669634586884e-05, "loss": 2.3179, "step": 3244000 }, { "epoch": 9.39, "learning_rate": 4.5305972698221564e-05, "loss": 2.3072, "step": 3244500 }, { "epoch": 9.39, "learning_rate": 4.5305249050574286e-05, "loss": 2.361, "step": 3245000 }, { "epoch": 9.39, "learning_rate": 4.5304525402927015e-05, "loss": 2.3049, "step": 3245500 }, { "epoch": 9.4, "learning_rate": 4.530380320257503e-05, "loss": 2.3121, "step": 3246000 }, { "epoch": 9.4, "learning_rate": 4.530307955492775e-05, "loss": 2.3673, "step": 3246500 }, { "epoch": 9.4, "learning_rate": 4.5302355907280475e-05, "loss": 2.3155, "step": 3247000 }, { "epoch": 9.4, "learning_rate": 4.53016322596332e-05, "loss": 2.3505, "step": 3247500 }, { "epoch": 9.4, "learning_rate": 4.530090861198592e-05, "loss": 2.3316, "step": 3248000 }, { "epoch": 9.4, "learning_rate": 4.530018496433864e-05, "loss": 2.3153, "step": 3248500 }, { "epoch": 9.4, "learning_rate": 4.529946131669137e-05, "loss": 2.3206, "step": 3249000 }, { "epoch": 9.41, "learning_rate": 4.529873766904409e-05, "loss": 2.3183, "step": 3249500 }, { "epoch": 9.41, "learning_rate": 4.5298014021396815e-05, "loss": 2.3329, "step": 3250000 }, { "epoch": 9.41, "learning_rate": 4.529729182104484e-05, "loss": 2.3066, "step": 3250500 }, { "epoch": 9.41, "learning_rate": 4.529656817339756e-05, "loss": 2.3056, "step": 3251000 }, { "epoch": 9.41, "learning_rate": 4.529584452575028e-05, "loss": 2.3278, "step": 3251500 }, { "epoch": 9.41, "learning_rate": 4.52951223253983e-05, "loss": 2.3572, "step": 3252000 }, { "epoch": 9.41, "learning_rate": 4.529439867775102e-05, "loss": 2.33, "step": 3252500 }, { "epoch": 9.42, "learning_rate": 4.529367503010374e-05, "loss": 2.324, "step": 3253000 }, { "epoch": 9.42, "learning_rate": 4.5292951382456464e-05, "loss": 2.3274, "step": 3253500 }, { "epoch": 9.42, "learning_rate": 4.5292227734809186e-05, "loss": 2.3336, "step": 3254000 }, { "epoch": 9.42, "learning_rate": 4.5291504087161915e-05, "loss": 2.3297, "step": 3254500 }, { "epoch": 9.42, "learning_rate": 4.5290783334105224e-05, "loss": 2.3493, "step": 3255000 }, { "epoch": 9.42, "learning_rate": 4.5290059686457947e-05, "loss": 2.3423, "step": 3255500 }, { "epoch": 9.42, "learning_rate": 4.528933603881067e-05, "loss": 2.3262, "step": 3256000 }, { "epoch": 9.43, "learning_rate": 4.52886123911634e-05, "loss": 2.2964, "step": 3256500 }, { "epoch": 9.43, "learning_rate": 4.5287890190811413e-05, "loss": 2.325, "step": 3257000 }, { "epoch": 9.43, "learning_rate": 4.5287166543164136e-05, "loss": 2.3365, "step": 3257500 }, { "epoch": 9.43, "learning_rate": 4.5286442895516865e-05, "loss": 2.3078, "step": 3258000 }, { "epoch": 9.43, "learning_rate": 4.528572069516488e-05, "loss": 2.3288, "step": 3258500 }, { "epoch": 9.43, "learning_rate": 4.52849970475176e-05, "loss": 2.3211, "step": 3259000 }, { "epoch": 9.43, "learning_rate": 4.5284273399870325e-05, "loss": 2.3236, "step": 3259500 }, { "epoch": 9.44, "learning_rate": 4.528354975222305e-05, "loss": 2.3195, "step": 3260000 }, { "epoch": 9.44, "learning_rate": 4.528282610457577e-05, "loss": 2.3328, "step": 3260500 }, { "epoch": 9.44, "learning_rate": 4.528210245692849e-05, "loss": 2.3262, "step": 3261000 }, { "epoch": 9.44, "learning_rate": 4.5281378809281214e-05, "loss": 2.3056, "step": 3261500 }, { "epoch": 9.44, "learning_rate": 4.5280655161633936e-05, "loss": 2.3202, "step": 3262000 }, { "epoch": 9.44, "learning_rate": 4.5279931513986665e-05, "loss": 2.3051, "step": 3262500 }, { "epoch": 9.45, "learning_rate": 4.527920786633939e-05, "loss": 2.3171, "step": 3263000 }, { "epoch": 9.45, "learning_rate": 4.527848421869211e-05, "loss": 2.3066, "step": 3263500 }, { "epoch": 9.45, "learning_rate": 4.527776057104484e-05, "loss": 2.3275, "step": 3264000 }, { "epoch": 9.45, "learning_rate": 4.527703981798815e-05, "loss": 2.3175, "step": 3264500 }, { "epoch": 9.45, "learning_rate": 4.527631617034087e-05, "loss": 2.3364, "step": 3265000 }, { "epoch": 9.45, "learning_rate": 4.527559252269359e-05, "loss": 2.3244, "step": 3265500 }, { "epoch": 9.45, "learning_rate": 4.5274868875046314e-05, "loss": 2.3169, "step": 3266000 }, { "epoch": 9.46, "learning_rate": 4.5274146674694336e-05, "loss": 2.3267, "step": 3266500 }, { "epoch": 9.46, "learning_rate": 4.527342302704706e-05, "loss": 2.3287, "step": 3267000 }, { "epoch": 9.46, "learning_rate": 4.5272700826695074e-05, "loss": 2.32, "step": 3267500 }, { "epoch": 9.46, "learning_rate": 4.5271977179047796e-05, "loss": 2.3333, "step": 3268000 }, { "epoch": 9.46, "learning_rate": 4.527125353140052e-05, "loss": 2.337, "step": 3268500 }, { "epoch": 9.46, "learning_rate": 4.527052988375324e-05, "loss": 2.305, "step": 3269000 }, { "epoch": 9.46, "learning_rate": 4.526980768340126e-05, "loss": 2.303, "step": 3269500 }, { "epoch": 9.47, "learning_rate": 4.5269084035753985e-05, "loss": 2.3417, "step": 3270000 }, { "epoch": 9.47, "learning_rate": 4.526836038810671e-05, "loss": 2.3242, "step": 3270500 }, { "epoch": 9.47, "learning_rate": 4.526763674045943e-05, "loss": 2.3238, "step": 3271000 }, { "epoch": 9.47, "learning_rate": 4.526691309281215e-05, "loss": 2.3244, "step": 3271500 }, { "epoch": 9.47, "learning_rate": 4.526618944516488e-05, "loss": 2.3388, "step": 3272000 }, { "epoch": 9.47, "learning_rate": 4.52654657975176e-05, "loss": 2.3336, "step": 3272500 }, { "epoch": 9.47, "learning_rate": 4.5264742149870325e-05, "loss": 2.3406, "step": 3273000 }, { "epoch": 9.48, "learning_rate": 4.526401850222305e-05, "loss": 2.3361, "step": 3273500 }, { "epoch": 9.48, "learning_rate": 4.526329630187106e-05, "loss": 2.3436, "step": 3274000 }, { "epoch": 9.48, "learning_rate": 4.526257265422379e-05, "loss": 2.3156, "step": 3274500 }, { "epoch": 9.48, "learning_rate": 4.5261849006576515e-05, "loss": 2.3347, "step": 3275000 }, { "epoch": 9.48, "learning_rate": 4.526112535892924e-05, "loss": 2.3286, "step": 3275500 }, { "epoch": 9.48, "learning_rate": 4.526040315857725e-05, "loss": 2.3274, "step": 3276000 }, { "epoch": 9.48, "learning_rate": 4.5259679510929975e-05, "loss": 2.3146, "step": 3276500 }, { "epoch": 9.49, "learning_rate": 4.52589558632827e-05, "loss": 2.3257, "step": 3277000 }, { "epoch": 9.49, "learning_rate": 4.525823221563542e-05, "loss": 2.3325, "step": 3277500 }, { "epoch": 9.49, "learning_rate": 4.525751001528344e-05, "loss": 2.3457, "step": 3278000 }, { "epoch": 9.49, "learning_rate": 4.5256786367636164e-05, "loss": 2.3301, "step": 3278500 }, { "epoch": 9.49, "learning_rate": 4.5256062719988886e-05, "loss": 2.3449, "step": 3279000 }, { "epoch": 9.49, "learning_rate": 4.5255339072341615e-05, "loss": 2.3509, "step": 3279500 }, { "epoch": 9.49, "learning_rate": 4.525461687198963e-05, "loss": 2.3568, "step": 3280000 }, { "epoch": 9.5, "learning_rate": 4.525389322434235e-05, "loss": 2.3308, "step": 3280500 }, { "epoch": 9.5, "learning_rate": 4.5253169576695075e-05, "loss": 2.3243, "step": 3281000 }, { "epoch": 9.5, "learning_rate": 4.52524459290478e-05, "loss": 2.3296, "step": 3281500 }, { "epoch": 9.5, "learning_rate": 4.525172228140052e-05, "loss": 2.3359, "step": 3282000 }, { "epoch": 9.5, "learning_rate": 4.525099863375324e-05, "loss": 2.3435, "step": 3282500 }, { "epoch": 9.5, "learning_rate": 4.5250276433401264e-05, "loss": 2.3019, "step": 3283000 }, { "epoch": 9.5, "learning_rate": 4.5249552785753986e-05, "loss": 2.3073, "step": 3283500 }, { "epoch": 9.51, "learning_rate": 4.524882913810671e-05, "loss": 2.3046, "step": 3284000 }, { "epoch": 9.51, "learning_rate": 4.524810549045943e-05, "loss": 2.319, "step": 3284500 }, { "epoch": 9.51, "learning_rate": 4.524738184281215e-05, "loss": 2.306, "step": 3285000 }, { "epoch": 9.51, "learning_rate": 4.5246658195164875e-05, "loss": 2.3068, "step": 3285500 }, { "epoch": 9.51, "learning_rate": 4.52459345475176e-05, "loss": 2.3244, "step": 3286000 }, { "epoch": 9.51, "learning_rate": 4.5245210899870326e-05, "loss": 2.3225, "step": 3286500 }, { "epoch": 9.51, "learning_rate": 4.524448869951834e-05, "loss": 2.357, "step": 3287000 }, { "epoch": 9.52, "learning_rate": 4.524376505187107e-05, "loss": 2.3434, "step": 3287500 }, { "epoch": 9.52, "learning_rate": 4.5243042851519086e-05, "loss": 2.3044, "step": 3288000 }, { "epoch": 9.52, "learning_rate": 4.524231920387181e-05, "loss": 2.3242, "step": 3288500 }, { "epoch": 9.52, "learning_rate": 4.524159555622453e-05, "loss": 2.3448, "step": 3289000 }, { "epoch": 9.52, "learning_rate": 4.524087190857725e-05, "loss": 2.3218, "step": 3289500 }, { "epoch": 9.52, "learning_rate": 4.5240148260929975e-05, "loss": 2.3305, "step": 3290000 }, { "epoch": 9.52, "learning_rate": 4.52394246132827e-05, "loss": 2.3088, "step": 3290500 }, { "epoch": 9.53, "learning_rate": 4.523870096563542e-05, "loss": 2.323, "step": 3291000 }, { "epoch": 9.53, "learning_rate": 4.523797731798814e-05, "loss": 2.3202, "step": 3291500 }, { "epoch": 9.53, "learning_rate": 4.523725367034087e-05, "loss": 2.3296, "step": 3292000 }, { "epoch": 9.53, "learning_rate": 4.523653002269359e-05, "loss": 2.3139, "step": 3292500 }, { "epoch": 9.53, "learning_rate": 4.5235806375046315e-05, "loss": 2.313, "step": 3293000 }, { "epoch": 9.53, "learning_rate": 4.523508272739904e-05, "loss": 2.3171, "step": 3293500 }, { "epoch": 9.53, "learning_rate": 4.523435907975177e-05, "loss": 2.3063, "step": 3294000 }, { "epoch": 9.54, "learning_rate": 4.523363543210449e-05, "loss": 2.3359, "step": 3294500 }, { "epoch": 9.54, "learning_rate": 4.5232913231752505e-05, "loss": 2.3273, "step": 3295000 }, { "epoch": 9.54, "learning_rate": 4.523218958410523e-05, "loss": 2.3557, "step": 3295500 }, { "epoch": 9.54, "learning_rate": 4.523146593645795e-05, "loss": 2.3253, "step": 3296000 }, { "epoch": 9.54, "learning_rate": 4.523074228881067e-05, "loss": 2.3122, "step": 3296500 }, { "epoch": 9.54, "learning_rate": 4.5230018641163393e-05, "loss": 2.345, "step": 3297000 }, { "epoch": 9.54, "learning_rate": 4.522929499351612e-05, "loss": 2.3052, "step": 3297500 }, { "epoch": 9.55, "learning_rate": 4.5228571345868845e-05, "loss": 2.3489, "step": 3298000 }, { "epoch": 9.55, "learning_rate": 4.522784769822157e-05, "loss": 2.3297, "step": 3298500 }, { "epoch": 9.55, "learning_rate": 4.522712405057429e-05, "loss": 2.3227, "step": 3299000 }, { "epoch": 9.55, "learning_rate": 4.52264032975176e-05, "loss": 2.3217, "step": 3299500 }, { "epoch": 9.55, "learning_rate": 4.522567964987032e-05, "loss": 2.3325, "step": 3300000 }, { "epoch": 9.55, "learning_rate": 4.522495600222304e-05, "loss": 2.3343, "step": 3300500 }, { "epoch": 9.56, "learning_rate": 4.5224233801871065e-05, "loss": 2.3324, "step": 3301000 }, { "epoch": 9.56, "learning_rate": 4.522351015422379e-05, "loss": 2.3181, "step": 3301500 }, { "epoch": 9.56, "learning_rate": 4.5222786506576516e-05, "loss": 2.3371, "step": 3302000 }, { "epoch": 9.56, "learning_rate": 4.522206285892924e-05, "loss": 2.329, "step": 3302500 }, { "epoch": 9.56, "learning_rate": 4.522133921128196e-05, "loss": 2.3172, "step": 3303000 }, { "epoch": 9.56, "learning_rate": 4.522061556363468e-05, "loss": 2.3166, "step": 3303500 }, { "epoch": 9.56, "learning_rate": 4.5219891915987405e-05, "loss": 2.3025, "step": 3304000 }, { "epoch": 9.57, "learning_rate": 4.521916826834013e-05, "loss": 2.3114, "step": 3304500 }, { "epoch": 9.57, "learning_rate": 4.521844462069285e-05, "loss": 2.3314, "step": 3305000 }, { "epoch": 9.57, "learning_rate": 4.521772242034087e-05, "loss": 2.3291, "step": 3305500 }, { "epoch": 9.57, "learning_rate": 4.5216998772693594e-05, "loss": 2.3302, "step": 3306000 }, { "epoch": 9.57, "learning_rate": 4.521627657234161e-05, "loss": 2.3155, "step": 3306500 }, { "epoch": 9.57, "learning_rate": 4.521555292469433e-05, "loss": 2.3349, "step": 3307000 }, { "epoch": 9.57, "learning_rate": 4.5214829277047054e-05, "loss": 2.343, "step": 3307500 }, { "epoch": 9.58, "learning_rate": 4.5214105629399776e-05, "loss": 2.3252, "step": 3308000 }, { "epoch": 9.58, "learning_rate": 4.52133819817525e-05, "loss": 2.3499, "step": 3308500 }, { "epoch": 9.58, "learning_rate": 4.521265833410523e-05, "loss": 2.3294, "step": 3309000 }, { "epoch": 9.58, "learning_rate": 4.521193468645795e-05, "loss": 2.3329, "step": 3309500 }, { "epoch": 9.58, "learning_rate": 4.521121103881067e-05, "loss": 2.3128, "step": 3310000 }, { "epoch": 9.58, "learning_rate": 4.5210487391163394e-05, "loss": 2.3145, "step": 3310500 }, { "epoch": 9.58, "learning_rate": 4.520976374351612e-05, "loss": 2.3523, "step": 3311000 }, { "epoch": 9.59, "learning_rate": 4.5209040095868846e-05, "loss": 2.3164, "step": 3311500 }, { "epoch": 9.59, "learning_rate": 4.520831644822157e-05, "loss": 2.3291, "step": 3312000 }, { "epoch": 9.59, "learning_rate": 4.520759280057429e-05, "loss": 2.3081, "step": 3312500 }, { "epoch": 9.59, "learning_rate": 4.5206870600222306e-05, "loss": 2.3196, "step": 3313000 }, { "epoch": 9.59, "learning_rate": 4.520614695257503e-05, "loss": 2.3298, "step": 3313500 }, { "epoch": 9.59, "learning_rate": 4.520542330492775e-05, "loss": 2.3426, "step": 3314000 }, { "epoch": 9.59, "learning_rate": 4.520469965728047e-05, "loss": 2.3267, "step": 3314500 }, { "epoch": 9.6, "learning_rate": 4.5203977456928495e-05, "loss": 2.3417, "step": 3315000 }, { "epoch": 9.6, "learning_rate": 4.520325380928122e-05, "loss": 2.3384, "step": 3315500 }, { "epoch": 9.6, "learning_rate": 4.520253016163394e-05, "loss": 2.3124, "step": 3316000 }, { "epoch": 9.6, "learning_rate": 4.520180651398667e-05, "loss": 2.3277, "step": 3316500 }, { "epoch": 9.6, "learning_rate": 4.520108286633939e-05, "loss": 2.3372, "step": 3317000 }, { "epoch": 9.6, "learning_rate": 4.520035921869211e-05, "loss": 2.3427, "step": 3317500 }, { "epoch": 9.6, "learning_rate": 4.5199635571044835e-05, "loss": 2.3072, "step": 3318000 }, { "epoch": 9.61, "learning_rate": 4.519891337069285e-05, "loss": 2.3382, "step": 3318500 }, { "epoch": 9.61, "learning_rate": 4.519818972304557e-05, "loss": 2.3304, "step": 3319000 }, { "epoch": 9.61, "learning_rate": 4.5197466075398295e-05, "loss": 2.3263, "step": 3319500 }, { "epoch": 9.61, "learning_rate": 4.5196742427751024e-05, "loss": 2.331, "step": 3320000 }, { "epoch": 9.61, "learning_rate": 4.5196018780103746e-05, "loss": 2.3429, "step": 3320500 }, { "epoch": 9.61, "learning_rate": 4.519529513245647e-05, "loss": 2.3338, "step": 3321000 }, { "epoch": 9.61, "learning_rate": 4.519457148480919e-05, "loss": 2.3169, "step": 3321500 }, { "epoch": 9.62, "learning_rate": 4.519384783716191e-05, "loss": 2.3148, "step": 3322000 }, { "epoch": 9.62, "learning_rate": 4.5193124189514635e-05, "loss": 2.3314, "step": 3322500 }, { "epoch": 9.62, "learning_rate": 4.519240198916265e-05, "loss": 2.3317, "step": 3323000 }, { "epoch": 9.62, "learning_rate": 4.519167834151537e-05, "loss": 2.3173, "step": 3323500 }, { "epoch": 9.62, "learning_rate": 4.51909546938681e-05, "loss": 2.2966, "step": 3324000 }, { "epoch": 9.62, "learning_rate": 4.5190232493516124e-05, "loss": 2.3208, "step": 3324500 }, { "epoch": 9.62, "learning_rate": 4.5189508845868846e-05, "loss": 2.3194, "step": 3325000 }, { "epoch": 9.63, "learning_rate": 4.518878664551686e-05, "loss": 2.3159, "step": 3325500 }, { "epoch": 9.63, "learning_rate": 4.5188062997869584e-05, "loss": 2.3347, "step": 3326000 }, { "epoch": 9.63, "learning_rate": 4.5187339350222306e-05, "loss": 2.3323, "step": 3326500 }, { "epoch": 9.63, "learning_rate": 4.518661570257503e-05, "loss": 2.3094, "step": 3327000 }, { "epoch": 9.63, "learning_rate": 4.518589205492775e-05, "loss": 2.3204, "step": 3327500 }, { "epoch": 9.63, "learning_rate": 4.518516840728047e-05, "loss": 2.3254, "step": 3328000 }, { "epoch": 9.63, "learning_rate": 4.51844447596332e-05, "loss": 2.3304, "step": 3328500 }, { "epoch": 9.64, "learning_rate": 4.5183721111985924e-05, "loss": 2.3622, "step": 3329000 }, { "epoch": 9.64, "learning_rate": 4.5182997464338646e-05, "loss": 2.3453, "step": 3329500 }, { "epoch": 9.64, "learning_rate": 4.518227381669137e-05, "loss": 2.3293, "step": 3330000 }, { "epoch": 9.64, "learning_rate": 4.518155016904409e-05, "loss": 2.3169, "step": 3330500 }, { "epoch": 9.64, "learning_rate": 4.518082652139682e-05, "loss": 2.3153, "step": 3331000 }, { "epoch": 9.64, "learning_rate": 4.518010287374954e-05, "loss": 2.3315, "step": 3331500 }, { "epoch": 9.64, "learning_rate": 4.517938067339756e-05, "loss": 2.3276, "step": 3332000 }, { "epoch": 9.65, "learning_rate": 4.517865702575028e-05, "loss": 2.3142, "step": 3332500 }, { "epoch": 9.65, "learning_rate": 4.51779348253983e-05, "loss": 2.3411, "step": 3333000 }, { "epoch": 9.65, "learning_rate": 4.5177211177751025e-05, "loss": 2.3359, "step": 3333500 }, { "epoch": 9.65, "learning_rate": 4.517648753010375e-05, "loss": 2.3318, "step": 3334000 }, { "epoch": 9.65, "learning_rate": 4.517576388245647e-05, "loss": 2.3248, "step": 3334500 }, { "epoch": 9.65, "learning_rate": 4.517504023480919e-05, "loss": 2.3366, "step": 3335000 }, { "epoch": 9.65, "learning_rate": 4.5174316587161913e-05, "loss": 2.3261, "step": 3335500 }, { "epoch": 9.66, "learning_rate": 4.5173592939514636e-05, "loss": 2.3095, "step": 3336000 }, { "epoch": 9.66, "learning_rate": 4.517286929186736e-05, "loss": 2.3091, "step": 3336500 }, { "epoch": 9.66, "learning_rate": 4.5172147091515374e-05, "loss": 2.3292, "step": 3337000 }, { "epoch": 9.66, "learning_rate": 4.5171424891163396e-05, "loss": 2.3246, "step": 3337500 }, { "epoch": 9.66, "learning_rate": 4.517070269081141e-05, "loss": 2.3135, "step": 3338000 }, { "epoch": 9.66, "learning_rate": 4.5169979043164134e-05, "loss": 2.3196, "step": 3338500 }, { "epoch": 9.67, "learning_rate": 4.516925539551686e-05, "loss": 2.3226, "step": 3339000 }, { "epoch": 9.67, "learning_rate": 4.5168531747869585e-05, "loss": 2.3303, "step": 3339500 }, { "epoch": 9.67, "learning_rate": 4.516780810022231e-05, "loss": 2.311, "step": 3340000 }, { "epoch": 9.67, "learning_rate": 4.516708445257503e-05, "loss": 2.3345, "step": 3340500 }, { "epoch": 9.67, "learning_rate": 4.516636080492775e-05, "loss": 2.3274, "step": 3341000 }, { "epoch": 9.67, "learning_rate": 4.5165637157280474e-05, "loss": 2.351, "step": 3341500 }, { "epoch": 9.67, "learning_rate": 4.5164914956928496e-05, "loss": 2.3156, "step": 3342000 }, { "epoch": 9.68, "learning_rate": 4.516419130928122e-05, "loss": 2.3387, "step": 3342500 }, { "epoch": 9.68, "learning_rate": 4.516346766163394e-05, "loss": 2.3133, "step": 3343000 }, { "epoch": 9.68, "learning_rate": 4.516274401398666e-05, "loss": 2.3157, "step": 3343500 }, { "epoch": 9.68, "learning_rate": 4.5162020366339385e-05, "loss": 2.3209, "step": 3344000 }, { "epoch": 9.68, "learning_rate": 4.51612981659874e-05, "loss": 2.3304, "step": 3344500 }, { "epoch": 9.68, "learning_rate": 4.516057451834013e-05, "loss": 2.3231, "step": 3345000 }, { "epoch": 9.68, "learning_rate": 4.515985087069285e-05, "loss": 2.3273, "step": 3345500 }, { "epoch": 9.69, "learning_rate": 4.5159127223045574e-05, "loss": 2.3445, "step": 3346000 }, { "epoch": 9.69, "learning_rate": 4.51584035753983e-05, "loss": 2.3307, "step": 3346500 }, { "epoch": 9.69, "learning_rate": 4.5157679927751025e-05, "loss": 2.3225, "step": 3347000 }, { "epoch": 9.69, "learning_rate": 4.515695772739904e-05, "loss": 2.3356, "step": 3347500 }, { "epoch": 9.69, "learning_rate": 4.515623407975176e-05, "loss": 2.3382, "step": 3348000 }, { "epoch": 9.69, "learning_rate": 4.515551187939978e-05, "loss": 2.3314, "step": 3348500 }, { "epoch": 9.69, "learning_rate": 4.51547882317525e-05, "loss": 2.3243, "step": 3349000 }, { "epoch": 9.7, "learning_rate": 4.515406458410523e-05, "loss": 2.3396, "step": 3349500 }, { "epoch": 9.7, "learning_rate": 4.515334093645795e-05, "loss": 2.3253, "step": 3350000 }, { "epoch": 9.7, "learning_rate": 4.5152617288810674e-05, "loss": 2.3373, "step": 3350500 }, { "epoch": 9.7, "learning_rate": 4.51518936411634e-05, "loss": 2.3197, "step": 3351000 }, { "epoch": 9.7, "learning_rate": 4.515116999351612e-05, "loss": 2.3021, "step": 3351500 }, { "epoch": 9.7, "learning_rate": 4.5150447793164134e-05, "loss": 2.3321, "step": 3352000 }, { "epoch": 9.7, "learning_rate": 4.514972414551686e-05, "loss": 2.3308, "step": 3352500 }, { "epoch": 9.71, "learning_rate": 4.514900049786958e-05, "loss": 2.3281, "step": 3353000 }, { "epoch": 9.71, "learning_rate": 4.51482768502223e-05, "loss": 2.3417, "step": 3353500 }, { "epoch": 9.71, "learning_rate": 4.514755320257503e-05, "loss": 2.3097, "step": 3354000 }, { "epoch": 9.71, "learning_rate": 4.514682955492775e-05, "loss": 2.3454, "step": 3354500 }, { "epoch": 9.71, "learning_rate": 4.514610590728048e-05, "loss": 2.3336, "step": 3355000 }, { "epoch": 9.71, "learning_rate": 4.5145382259633204e-05, "loss": 2.3515, "step": 3355500 }, { "epoch": 9.71, "learning_rate": 4.5144658611985926e-05, "loss": 2.3051, "step": 3356000 }, { "epoch": 9.72, "learning_rate": 4.514393496433865e-05, "loss": 2.3247, "step": 3356500 }, { "epoch": 9.72, "learning_rate": 4.514321131669137e-05, "loss": 2.3467, "step": 3357000 }, { "epoch": 9.72, "learning_rate": 4.5142489116339386e-05, "loss": 2.3219, "step": 3357500 }, { "epoch": 9.72, "learning_rate": 4.514176546869211e-05, "loss": 2.3177, "step": 3358000 }, { "epoch": 9.72, "learning_rate": 4.514104182104483e-05, "loss": 2.3135, "step": 3358500 }, { "epoch": 9.72, "learning_rate": 4.514031817339755e-05, "loss": 2.2972, "step": 3359000 }, { "epoch": 9.72, "learning_rate": 4.513959452575028e-05, "loss": 2.3412, "step": 3359500 }, { "epoch": 9.73, "learning_rate": 4.51388723253983e-05, "loss": 2.3365, "step": 3360000 }, { "epoch": 9.73, "learning_rate": 4.513814867775102e-05, "loss": 2.3119, "step": 3360500 }, { "epoch": 9.73, "learning_rate": 4.513742503010374e-05, "loss": 2.3206, "step": 3361000 }, { "epoch": 9.73, "learning_rate": 4.513670138245647e-05, "loss": 2.3444, "step": 3361500 }, { "epoch": 9.73, "learning_rate": 4.513597773480919e-05, "loss": 2.3335, "step": 3362000 }, { "epoch": 9.73, "learning_rate": 4.5135254087161915e-05, "loss": 2.3046, "step": 3362500 }, { "epoch": 9.73, "learning_rate": 4.513453043951464e-05, "loss": 2.2967, "step": 3363000 }, { "epoch": 9.74, "learning_rate": 4.513380679186736e-05, "loss": 2.3432, "step": 3363500 }, { "epoch": 9.74, "learning_rate": 4.513308314422008e-05, "loss": 2.3322, "step": 3364000 }, { "epoch": 9.74, "learning_rate": 4.5132360943868104e-05, "loss": 2.3513, "step": 3364500 }, { "epoch": 9.74, "learning_rate": 4.5131637296220826e-05, "loss": 2.335, "step": 3365000 }, { "epoch": 9.74, "learning_rate": 4.513091364857355e-05, "loss": 2.35, "step": 3365500 }, { "epoch": 9.74, "learning_rate": 4.513019000092627e-05, "loss": 2.3336, "step": 3366000 }, { "epoch": 9.74, "learning_rate": 4.5129467800574286e-05, "loss": 2.325, "step": 3366500 }, { "epoch": 9.75, "learning_rate": 4.512874415292701e-05, "loss": 2.3189, "step": 3367000 }, { "epoch": 9.75, "learning_rate": 4.512802050527973e-05, "loss": 2.3193, "step": 3367500 }, { "epoch": 9.75, "learning_rate": 4.512729830492775e-05, "loss": 2.3087, "step": 3368000 }, { "epoch": 9.75, "learning_rate": 4.5126574657280475e-05, "loss": 2.32, "step": 3368500 }, { "epoch": 9.75, "learning_rate": 4.5125851009633204e-05, "loss": 2.3051, "step": 3369000 }, { "epoch": 9.75, "learning_rate": 4.512512736198593e-05, "loss": 2.3297, "step": 3369500 }, { "epoch": 9.75, "learning_rate": 4.512440371433865e-05, "loss": 2.347, "step": 3370000 }, { "epoch": 9.76, "learning_rate": 4.512368006669137e-05, "loss": 2.3077, "step": 3370500 }, { "epoch": 9.76, "learning_rate": 4.512295786633939e-05, "loss": 2.3406, "step": 3371000 }, { "epoch": 9.76, "learning_rate": 4.512223421869211e-05, "loss": 2.3536, "step": 3371500 }, { "epoch": 9.76, "learning_rate": 4.512151057104483e-05, "loss": 2.3295, "step": 3372000 }, { "epoch": 9.76, "learning_rate": 4.512078692339755e-05, "loss": 2.3521, "step": 3372500 }, { "epoch": 9.76, "learning_rate": 4.512006327575028e-05, "loss": 2.3469, "step": 3373000 }, { "epoch": 9.76, "learning_rate": 4.5119339628103005e-05, "loss": 2.3348, "step": 3373500 }, { "epoch": 9.77, "learning_rate": 4.511861742775102e-05, "loss": 2.3283, "step": 3374000 }, { "epoch": 9.77, "learning_rate": 4.511789378010374e-05, "loss": 2.3374, "step": 3374500 }, { "epoch": 9.77, "learning_rate": 4.5117170132456465e-05, "loss": 2.3595, "step": 3375000 }, { "epoch": 9.77, "learning_rate": 4.511644648480919e-05, "loss": 2.3254, "step": 3375500 }, { "epoch": 9.77, "learning_rate": 4.511572428445721e-05, "loss": 2.3029, "step": 3376000 }, { "epoch": 9.77, "learning_rate": 4.511500063680993e-05, "loss": 2.3188, "step": 3376500 }, { "epoch": 9.78, "learning_rate": 4.511427698916266e-05, "loss": 2.3274, "step": 3377000 }, { "epoch": 9.78, "learning_rate": 4.511355334151538e-05, "loss": 2.3129, "step": 3377500 }, { "epoch": 9.78, "learning_rate": 4.5112829693868105e-05, "loss": 2.3382, "step": 3378000 }, { "epoch": 9.78, "learning_rate": 4.511210604622083e-05, "loss": 2.3261, "step": 3378500 }, { "epoch": 9.78, "learning_rate": 4.511138239857355e-05, "loss": 2.3301, "step": 3379000 }, { "epoch": 9.78, "learning_rate": 4.511065875092627e-05, "loss": 2.3149, "step": 3379500 }, { "epoch": 9.78, "learning_rate": 4.5109935103278994e-05, "loss": 2.313, "step": 3380000 }, { "epoch": 9.79, "learning_rate": 4.5109211455631716e-05, "loss": 2.3396, "step": 3380500 }, { "epoch": 9.79, "learning_rate": 4.510848925527973e-05, "loss": 2.3575, "step": 3381000 }, { "epoch": 9.79, "learning_rate": 4.5107767054927754e-05, "loss": 2.3482, "step": 3381500 }, { "epoch": 9.79, "learning_rate": 4.5107043407280476e-05, "loss": 2.3407, "step": 3382000 }, { "epoch": 9.79, "learning_rate": 4.51063197596332e-05, "loss": 2.31, "step": 3382500 }, { "epoch": 9.79, "learning_rate": 4.510559611198592e-05, "loss": 2.3117, "step": 3383000 }, { "epoch": 9.79, "learning_rate": 4.510487246433864e-05, "loss": 2.3428, "step": 3383500 }, { "epoch": 9.8, "learning_rate": 4.510414881669137e-05, "loss": 2.3114, "step": 3384000 }, { "epoch": 9.8, "learning_rate": 4.5103425169044094e-05, "loss": 2.3283, "step": 3384500 }, { "epoch": 9.8, "learning_rate": 4.5102701521396816e-05, "loss": 2.3051, "step": 3385000 }, { "epoch": 9.8, "learning_rate": 4.510197787374954e-05, "loss": 2.3164, "step": 3385500 }, { "epoch": 9.8, "learning_rate": 4.510125422610226e-05, "loss": 2.3335, "step": 3386000 }, { "epoch": 9.8, "learning_rate": 4.510053057845498e-05, "loss": 2.2876, "step": 3386500 }, { "epoch": 9.8, "learning_rate": 4.5099806930807705e-05, "loss": 2.3192, "step": 3387000 }, { "epoch": 9.81, "learning_rate": 4.509908617775102e-05, "loss": 2.3278, "step": 3387500 }, { "epoch": 9.81, "learning_rate": 4.509836253010374e-05, "loss": 2.3359, "step": 3388000 }, { "epoch": 9.81, "learning_rate": 4.5097638882456465e-05, "loss": 2.3152, "step": 3388500 }, { "epoch": 9.81, "learning_rate": 4.509691523480919e-05, "loss": 2.3165, "step": 3389000 }, { "epoch": 9.81, "learning_rate": 4.509619158716191e-05, "loss": 2.3534, "step": 3389500 }, { "epoch": 9.81, "learning_rate": 4.509546938680993e-05, "loss": 2.3431, "step": 3390000 }, { "epoch": 9.81, "learning_rate": 4.5094745739162654e-05, "loss": 2.322, "step": 3390500 }, { "epoch": 9.82, "learning_rate": 4.509402209151538e-05, "loss": 2.2985, "step": 3391000 }, { "epoch": 9.82, "learning_rate": 4.5093298443868106e-05, "loss": 2.3345, "step": 3391500 }, { "epoch": 9.82, "learning_rate": 4.509257479622083e-05, "loss": 2.3151, "step": 3392000 }, { "epoch": 9.82, "learning_rate": 4.509185114857355e-05, "loss": 2.3381, "step": 3392500 }, { "epoch": 9.82, "learning_rate": 4.509112750092627e-05, "loss": 2.3311, "step": 3393000 }, { "epoch": 9.82, "learning_rate": 4.509040530057429e-05, "loss": 2.3288, "step": 3393500 }, { "epoch": 9.82, "learning_rate": 4.5089684547517604e-05, "loss": 2.3163, "step": 3394000 }, { "epoch": 9.83, "learning_rate": 4.5088960899870326e-05, "loss": 2.3489, "step": 3394500 }, { "epoch": 9.83, "learning_rate": 4.508823725222305e-05, "loss": 2.3378, "step": 3395000 }, { "epoch": 9.83, "learning_rate": 4.508751360457577e-05, "loss": 2.3618, "step": 3395500 }, { "epoch": 9.83, "learning_rate": 4.508678995692849e-05, "loss": 2.3199, "step": 3396000 }, { "epoch": 9.83, "learning_rate": 4.5086066309281215e-05, "loss": 2.3324, "step": 3396500 }, { "epoch": 9.83, "learning_rate": 4.508534266163394e-05, "loss": 2.3193, "step": 3397000 }, { "epoch": 9.83, "learning_rate": 4.508461901398666e-05, "loss": 2.3233, "step": 3397500 }, { "epoch": 9.84, "learning_rate": 4.508389536633938e-05, "loss": 2.3182, "step": 3398000 }, { "epoch": 9.84, "learning_rate": 4.508317171869211e-05, "loss": 2.3386, "step": 3398500 }, { "epoch": 9.84, "learning_rate": 4.508244807104483e-05, "loss": 2.3117, "step": 3399000 }, { "epoch": 9.84, "learning_rate": 4.508172442339756e-05, "loss": 2.3334, "step": 3399500 }, { "epoch": 9.84, "learning_rate": 4.5081000775750284e-05, "loss": 2.3251, "step": 3400000 }, { "epoch": 9.84, "learning_rate": 4.5080277128103006e-05, "loss": 2.3366, "step": 3400500 }, { "epoch": 9.84, "learning_rate": 4.507955348045573e-05, "loss": 2.3381, "step": 3401000 }, { "epoch": 9.85, "learning_rate": 4.507882983280845e-05, "loss": 2.3131, "step": 3401500 }, { "epoch": 9.85, "learning_rate": 4.5078107632456466e-05, "loss": 2.3466, "step": 3402000 }, { "epoch": 9.85, "learning_rate": 4.507738543210449e-05, "loss": 2.3172, "step": 3402500 }, { "epoch": 9.85, "learning_rate": 4.507666178445721e-05, "loss": 2.3446, "step": 3403000 }, { "epoch": 9.85, "learning_rate": 4.5075939584105226e-05, "loss": 2.329, "step": 3403500 }, { "epoch": 9.85, "learning_rate": 4.507521593645795e-05, "loss": 2.3248, "step": 3404000 }, { "epoch": 9.85, "learning_rate": 4.507449228881067e-05, "loss": 2.3084, "step": 3404500 }, { "epoch": 9.86, "learning_rate": 4.507376864116339e-05, "loss": 2.2927, "step": 3405000 }, { "epoch": 9.86, "learning_rate": 4.5073044993516115e-05, "loss": 2.3205, "step": 3405500 }, { "epoch": 9.86, "learning_rate": 4.507232134586884e-05, "loss": 2.3195, "step": 3406000 }, { "epoch": 9.86, "learning_rate": 4.5071597698221567e-05, "loss": 2.3458, "step": 3406500 }, { "epoch": 9.86, "learning_rate": 4.507087405057429e-05, "loss": 2.311, "step": 3407000 }, { "epoch": 9.86, "learning_rate": 4.507015040292701e-05, "loss": 2.3347, "step": 3407500 }, { "epoch": 9.86, "learning_rate": 4.506942675527974e-05, "loss": 2.3418, "step": 3408000 }, { "epoch": 9.87, "learning_rate": 4.5068704554927756e-05, "loss": 2.3261, "step": 3408500 }, { "epoch": 9.87, "learning_rate": 4.506798090728048e-05, "loss": 2.3226, "step": 3409000 }, { "epoch": 9.87, "learning_rate": 4.50672572596332e-05, "loss": 2.3253, "step": 3409500 }, { "epoch": 9.87, "learning_rate": 4.506653361198592e-05, "loss": 2.3061, "step": 3410000 }, { "epoch": 9.87, "learning_rate": 4.5065809964338644e-05, "loss": 2.3364, "step": 3410500 }, { "epoch": 9.87, "learning_rate": 4.506508631669137e-05, "loss": 2.3197, "step": 3411000 }, { "epoch": 9.87, "learning_rate": 4.506436411633939e-05, "loss": 2.3179, "step": 3411500 }, { "epoch": 9.88, "learning_rate": 4.506364046869211e-05, "loss": 2.3081, "step": 3412000 }, { "epoch": 9.88, "learning_rate": 4.5062916821044834e-05, "loss": 2.3061, "step": 3412500 }, { "epoch": 9.88, "learning_rate": 4.5062193173397556e-05, "loss": 2.3285, "step": 3413000 }, { "epoch": 9.88, "learning_rate": 4.5061469525750285e-05, "loss": 2.3329, "step": 3413500 }, { "epoch": 9.88, "learning_rate": 4.506074587810301e-05, "loss": 2.3266, "step": 3414000 }, { "epoch": 9.88, "learning_rate": 4.506002223045573e-05, "loss": 2.319, "step": 3414500 }, { "epoch": 9.89, "learning_rate": 4.505929858280845e-05, "loss": 2.3325, "step": 3415000 }, { "epoch": 9.89, "learning_rate": 4.5058574935161174e-05, "loss": 2.3383, "step": 3415500 }, { "epoch": 9.89, "learning_rate": 4.505785273480919e-05, "loss": 2.3449, "step": 3416000 }, { "epoch": 9.89, "learning_rate": 4.505713053445721e-05, "loss": 2.3295, "step": 3416500 }, { "epoch": 9.89, "learning_rate": 4.5056406886809934e-05, "loss": 2.3312, "step": 3417000 }, { "epoch": 9.89, "learning_rate": 4.5055683239162656e-05, "loss": 2.3315, "step": 3417500 }, { "epoch": 9.89, "learning_rate": 4.505495959151538e-05, "loss": 2.3177, "step": 3418000 }, { "epoch": 9.9, "learning_rate": 4.50542359438681e-05, "loss": 2.3135, "step": 3418500 }, { "epoch": 9.9, "learning_rate": 4.5053513743516116e-05, "loss": 2.3116, "step": 3419000 }, { "epoch": 9.9, "learning_rate": 4.505279009586884e-05, "loss": 2.3395, "step": 3419500 }, { "epoch": 9.9, "learning_rate": 4.505206644822156e-05, "loss": 2.3342, "step": 3420000 }, { "epoch": 9.9, "learning_rate": 4.505134280057429e-05, "loss": 2.3226, "step": 3420500 }, { "epoch": 9.9, "learning_rate": 4.505061915292701e-05, "loss": 2.3268, "step": 3421000 }, { "epoch": 9.9, "learning_rate": 4.504989550527974e-05, "loss": 2.3373, "step": 3421500 }, { "epoch": 9.91, "learning_rate": 4.504917185763246e-05, "loss": 2.3346, "step": 3422000 }, { "epoch": 9.91, "learning_rate": 4.5048448209985185e-05, "loss": 2.3306, "step": 3422500 }, { "epoch": 9.91, "learning_rate": 4.504772456233791e-05, "loss": 2.3415, "step": 3423000 }, { "epoch": 9.91, "learning_rate": 4.504700236198592e-05, "loss": 2.3437, "step": 3423500 }, { "epoch": 9.91, "learning_rate": 4.504628016163394e-05, "loss": 2.3268, "step": 3424000 }, { "epoch": 9.91, "learning_rate": 4.504555651398666e-05, "loss": 2.3542, "step": 3424500 }, { "epoch": 9.91, "learning_rate": 4.504483286633939e-05, "loss": 2.317, "step": 3425000 }, { "epoch": 9.92, "learning_rate": 4.504410921869211e-05, "loss": 2.3436, "step": 3425500 }, { "epoch": 9.92, "learning_rate": 4.5043385571044834e-05, "loss": 2.3258, "step": 3426000 }, { "epoch": 9.92, "learning_rate": 4.5042661923397557e-05, "loss": 2.3314, "step": 3426500 }, { "epoch": 9.92, "learning_rate": 4.504193827575028e-05, "loss": 2.3301, "step": 3427000 }, { "epoch": 9.92, "learning_rate": 4.5041214628103e-05, "loss": 2.2861, "step": 3427500 }, { "epoch": 9.92, "learning_rate": 4.504049098045572e-05, "loss": 2.323, "step": 3428000 }, { "epoch": 9.92, "learning_rate": 4.503976878010374e-05, "loss": 2.3564, "step": 3428500 }, { "epoch": 9.93, "learning_rate": 4.503904513245647e-05, "loss": 2.319, "step": 3429000 }, { "epoch": 9.93, "learning_rate": 4.503832148480919e-05, "loss": 2.3351, "step": 3429500 }, { "epoch": 9.93, "learning_rate": 4.503759783716191e-05, "loss": 2.3253, "step": 3430000 }, { "epoch": 9.93, "learning_rate": 4.503687418951464e-05, "loss": 2.3281, "step": 3430500 }, { "epoch": 9.93, "learning_rate": 4.5036150541867364e-05, "loss": 2.3177, "step": 3431000 }, { "epoch": 9.93, "learning_rate": 4.503542834151538e-05, "loss": 2.3294, "step": 3431500 }, { "epoch": 9.93, "learning_rate": 4.50347046938681e-05, "loss": 2.3607, "step": 3432000 }, { "epoch": 9.94, "learning_rate": 4.503398249351612e-05, "loss": 2.3445, "step": 3432500 }, { "epoch": 9.94, "learning_rate": 4.503325884586884e-05, "loss": 2.2968, "step": 3433000 }, { "epoch": 9.94, "learning_rate": 4.503253519822157e-05, "loss": 2.331, "step": 3433500 }, { "epoch": 9.94, "learning_rate": 4.503181155057429e-05, "loss": 2.3187, "step": 3434000 }, { "epoch": 9.94, "learning_rate": 4.503108790292701e-05, "loss": 2.3184, "step": 3434500 }, { "epoch": 9.94, "learning_rate": 4.503036570257503e-05, "loss": 2.3246, "step": 3435000 }, { "epoch": 9.94, "learning_rate": 4.502964205492775e-05, "loss": 2.335, "step": 3435500 }, { "epoch": 9.95, "learning_rate": 4.502891840728047e-05, "loss": 2.3015, "step": 3436000 }, { "epoch": 9.95, "learning_rate": 4.50281947596332e-05, "loss": 2.3212, "step": 3436500 }, { "epoch": 9.95, "learning_rate": 4.5027471111985924e-05, "loss": 2.3051, "step": 3437000 }, { "epoch": 9.95, "learning_rate": 4.5026747464338646e-05, "loss": 2.3372, "step": 3437500 }, { "epoch": 9.95, "learning_rate": 4.502602381669137e-05, "loss": 2.3309, "step": 3438000 }, { "epoch": 9.95, "learning_rate": 4.502530016904409e-05, "loss": 2.3318, "step": 3438500 }, { "epoch": 9.95, "learning_rate": 4.502457796869211e-05, "loss": 2.3449, "step": 3439000 }, { "epoch": 9.96, "learning_rate": 4.5023854321044835e-05, "loss": 2.3279, "step": 3439500 }, { "epoch": 9.96, "learning_rate": 4.502313067339756e-05, "loss": 2.3422, "step": 3440000 }, { "epoch": 9.96, "learning_rate": 4.502240702575028e-05, "loss": 2.3204, "step": 3440500 }, { "epoch": 9.96, "learning_rate": 4.5021683378103e-05, "loss": 2.3294, "step": 3441000 }, { "epoch": 9.96, "learning_rate": 4.502096117775102e-05, "loss": 2.3231, "step": 3441500 }, { "epoch": 9.96, "learning_rate": 4.502023753010374e-05, "loss": 2.3156, "step": 3442000 }, { "epoch": 9.96, "learning_rate": 4.501951388245647e-05, "loss": 2.3331, "step": 3442500 }, { "epoch": 9.97, "learning_rate": 4.5018791682104484e-05, "loss": 2.3379, "step": 3443000 }, { "epoch": 9.97, "learning_rate": 4.5018068034457206e-05, "loss": 2.34, "step": 3443500 }, { "epoch": 9.97, "learning_rate": 4.5017344386809935e-05, "loss": 2.3163, "step": 3444000 }, { "epoch": 9.97, "learning_rate": 4.501662073916266e-05, "loss": 2.3472, "step": 3444500 }, { "epoch": 9.97, "learning_rate": 4.501589709151538e-05, "loss": 2.346, "step": 3445000 }, { "epoch": 9.97, "learning_rate": 4.50151734438681e-05, "loss": 2.3298, "step": 3445500 }, { "epoch": 9.97, "learning_rate": 4.5014449796220824e-05, "loss": 2.3314, "step": 3446000 }, { "epoch": 9.98, "learning_rate": 4.5013726148573547e-05, "loss": 2.311, "step": 3446500 }, { "epoch": 9.98, "learning_rate": 4.501300250092627e-05, "loss": 2.3492, "step": 3447000 }, { "epoch": 9.98, "learning_rate": 4.501227885327899e-05, "loss": 2.3293, "step": 3447500 }, { "epoch": 9.98, "learning_rate": 4.501155520563172e-05, "loss": 2.3351, "step": 3448000 }, { "epoch": 9.98, "learning_rate": 4.5010833005279736e-05, "loss": 2.3085, "step": 3448500 }, { "epoch": 9.98, "learning_rate": 4.501010935763246e-05, "loss": 2.3398, "step": 3449000 }, { "epoch": 9.98, "learning_rate": 4.500938570998518e-05, "loss": 2.3143, "step": 3449500 }, { "epoch": 9.99, "learning_rate": 4.5008663509633196e-05, "loss": 2.3132, "step": 3450000 }, { "epoch": 9.99, "learning_rate": 4.500793986198592e-05, "loss": 2.3422, "step": 3450500 }, { "epoch": 9.99, "learning_rate": 4.500721621433864e-05, "loss": 2.3604, "step": 3451000 }, { "epoch": 9.99, "learning_rate": 4.500649256669137e-05, "loss": 2.3286, "step": 3451500 }, { "epoch": 9.99, "learning_rate": 4.500576891904409e-05, "loss": 2.3214, "step": 3452000 }, { "epoch": 9.99, "learning_rate": 4.500504527139682e-05, "loss": 2.359, "step": 3452500 }, { "epoch": 10.0, "learning_rate": 4.500432162374954e-05, "loss": 2.3204, "step": 3453000 }, { "epoch": 10.0, "learning_rate": 4.5003597976102265e-05, "loss": 2.338, "step": 3453500 }, { "epoch": 10.0, "learning_rate": 4.500287432845499e-05, "loss": 2.3387, "step": 3454000 }, { "epoch": 10.0, "learning_rate": 4.5002152128103e-05, "loss": 2.3167, "step": 3454500 }, { "epoch": 10.0, "eval_accuracy": 0.6491371245160269, "eval_accuracy_mlm": 0.6116889359002926, "eval_accuracy_nsp": 0.8501739285861599, "eval_loss": 2.3059029579162598, "eval_runtime": 330.448, "eval_samples_per_second": 1320.589, "eval_steps_per_second": 55.025, "step": 3454720 }, { "epoch": 10.0, "learning_rate": 4.5001428480455725e-05, "loss": 2.318, "step": 3455000 }, { "epoch": 10.0, "learning_rate": 4.500070628010374e-05, "loss": 2.3119, "step": 3455500 }, { "epoch": 10.0, "learning_rate": 4.499998263245647e-05, "loss": 2.2885, "step": 3456000 }, { "epoch": 10.01, "learning_rate": 4.499925898480919e-05, "loss": 2.3048, "step": 3456500 }, { "epoch": 10.01, "learning_rate": 4.4998535337161914e-05, "loss": 2.303, "step": 3457000 }, { "epoch": 10.01, "learning_rate": 4.4997811689514636e-05, "loss": 2.2934, "step": 3457500 }, { "epoch": 10.01, "learning_rate": 4.499708804186736e-05, "loss": 2.3209, "step": 3458000 }, { "epoch": 10.01, "learning_rate": 4.499636439422009e-05, "loss": 2.2897, "step": 3458500 }, { "epoch": 10.01, "learning_rate": 4.49956421938681e-05, "loss": 2.3233, "step": 3459000 }, { "epoch": 10.01, "learning_rate": 4.4994918546220825e-05, "loss": 2.2969, "step": 3459500 }, { "epoch": 10.02, "learning_rate": 4.499419489857355e-05, "loss": 2.3026, "step": 3460000 }, { "epoch": 10.02, "learning_rate": 4.499347125092627e-05, "loss": 2.3057, "step": 3460500 }, { "epoch": 10.02, "learning_rate": 4.499274760327899e-05, "loss": 2.3203, "step": 3461000 }, { "epoch": 10.02, "learning_rate": 4.4992025402927014e-05, "loss": 2.3155, "step": 3461500 }, { "epoch": 10.02, "learning_rate": 4.499130320257503e-05, "loss": 2.3069, "step": 3462000 }, { "epoch": 10.02, "learning_rate": 4.499057955492775e-05, "loss": 2.304, "step": 3462500 }, { "epoch": 10.02, "learning_rate": 4.4989855907280474e-05, "loss": 2.2898, "step": 3463000 }, { "epoch": 10.03, "learning_rate": 4.4989132259633196e-05, "loss": 2.3275, "step": 3463500 }, { "epoch": 10.03, "learning_rate": 4.498840861198592e-05, "loss": 2.3204, "step": 3464000 }, { "epoch": 10.03, "learning_rate": 4.498768496433865e-05, "loss": 2.3107, "step": 3464500 }, { "epoch": 10.03, "learning_rate": 4.498696131669137e-05, "loss": 2.3287, "step": 3465000 }, { "epoch": 10.03, "learning_rate": 4.498623766904409e-05, "loss": 2.304, "step": 3465500 }, { "epoch": 10.03, "learning_rate": 4.498551402139682e-05, "loss": 2.2975, "step": 3466000 }, { "epoch": 10.03, "learning_rate": 4.4984790373749543e-05, "loss": 2.3121, "step": 3466500 }, { "epoch": 10.04, "learning_rate": 4.498406817339756e-05, "loss": 2.2987, "step": 3467000 }, { "epoch": 10.04, "learning_rate": 4.498334452575028e-05, "loss": 2.3093, "step": 3467500 }, { "epoch": 10.04, "learning_rate": 4.4982620878103003e-05, "loss": 2.2933, "step": 3468000 }, { "epoch": 10.04, "learning_rate": 4.4981897230455726e-05, "loss": 2.3028, "step": 3468500 }, { "epoch": 10.04, "learning_rate": 4.498117358280845e-05, "loss": 2.2951, "step": 3469000 }, { "epoch": 10.04, "learning_rate": 4.498044993516117e-05, "loss": 2.3064, "step": 3469500 }, { "epoch": 10.04, "learning_rate": 4.49797262875139e-05, "loss": 2.309, "step": 3470000 }, { "epoch": 10.05, "learning_rate": 4.4979004087161915e-05, "loss": 2.3176, "step": 3470500 }, { "epoch": 10.05, "learning_rate": 4.497828188680993e-05, "loss": 2.288, "step": 3471000 }, { "epoch": 10.05, "learning_rate": 4.497755823916265e-05, "loss": 2.2905, "step": 3471500 }, { "epoch": 10.05, "learning_rate": 4.4976834591515375e-05, "loss": 2.3069, "step": 3472000 }, { "epoch": 10.05, "learning_rate": 4.49761109438681e-05, "loss": 2.3048, "step": 3472500 }, { "epoch": 10.05, "learning_rate": 4.497538729622082e-05, "loss": 2.3146, "step": 3473000 }, { "epoch": 10.05, "learning_rate": 4.497466364857355e-05, "loss": 2.3175, "step": 3473500 }, { "epoch": 10.06, "learning_rate": 4.497394000092627e-05, "loss": 2.3173, "step": 3474000 }, { "epoch": 10.06, "learning_rate": 4.4973216353279e-05, "loss": 2.313, "step": 3474500 }, { "epoch": 10.06, "learning_rate": 4.497249270563172e-05, "loss": 2.291, "step": 3475000 }, { "epoch": 10.06, "learning_rate": 4.497177050527974e-05, "loss": 2.3106, "step": 3475500 }, { "epoch": 10.06, "learning_rate": 4.497104685763246e-05, "loss": 2.3018, "step": 3476000 }, { "epoch": 10.06, "learning_rate": 4.497032320998518e-05, "loss": 2.3165, "step": 3476500 }, { "epoch": 10.06, "learning_rate": 4.4969599562337904e-05, "loss": 2.3238, "step": 3477000 }, { "epoch": 10.07, "learning_rate": 4.4968875914690626e-05, "loss": 2.3212, "step": 3477500 }, { "epoch": 10.07, "learning_rate": 4.496815226704335e-05, "loss": 2.3242, "step": 3478000 }, { "epoch": 10.07, "learning_rate": 4.496742861939607e-05, "loss": 2.3106, "step": 3478500 }, { "epoch": 10.07, "learning_rate": 4.49667049717488e-05, "loss": 2.2926, "step": 3479000 }, { "epoch": 10.07, "learning_rate": 4.496598132410152e-05, "loss": 2.2834, "step": 3479500 }, { "epoch": 10.07, "learning_rate": 4.496525912374954e-05, "loss": 2.3286, "step": 3480000 }, { "epoch": 10.07, "learning_rate": 4.496453692339755e-05, "loss": 2.3134, "step": 3480500 }, { "epoch": 10.08, "learning_rate": 4.496381327575028e-05, "loss": 2.3021, "step": 3481000 }, { "epoch": 10.08, "learning_rate": 4.4963089628103004e-05, "loss": 2.2945, "step": 3481500 }, { "epoch": 10.08, "learning_rate": 4.4962365980455726e-05, "loss": 2.3017, "step": 3482000 }, { "epoch": 10.08, "learning_rate": 4.496164233280845e-05, "loss": 2.2884, "step": 3482500 }, { "epoch": 10.08, "learning_rate": 4.496092013245647e-05, "loss": 2.3153, "step": 3483000 }, { "epoch": 10.08, "learning_rate": 4.496019648480919e-05, "loss": 2.3066, "step": 3483500 }, { "epoch": 10.08, "learning_rate": 4.4959472837161915e-05, "loss": 2.3235, "step": 3484000 }, { "epoch": 10.09, "learning_rate": 4.495874918951464e-05, "loss": 2.2898, "step": 3484500 }, { "epoch": 10.09, "learning_rate": 4.495802554186736e-05, "loss": 2.2885, "step": 3485000 }, { "epoch": 10.09, "learning_rate": 4.495730189422008e-05, "loss": 2.3289, "step": 3485500 }, { "epoch": 10.09, "learning_rate": 4.49565796938681e-05, "loss": 2.3213, "step": 3486000 }, { "epoch": 10.09, "learning_rate": 4.495585604622083e-05, "loss": 2.3121, "step": 3486500 }, { "epoch": 10.09, "learning_rate": 4.495513239857355e-05, "loss": 2.2947, "step": 3487000 }, { "epoch": 10.09, "learning_rate": 4.495440875092627e-05, "loss": 2.3304, "step": 3487500 }, { "epoch": 10.1, "learning_rate": 4.4953685103278993e-05, "loss": 2.3129, "step": 3488000 }, { "epoch": 10.1, "learning_rate": 4.495296145563172e-05, "loss": 2.3016, "step": 3488500 }, { "epoch": 10.1, "learning_rate": 4.4952237807984445e-05, "loss": 2.3186, "step": 3489000 }, { "epoch": 10.1, "learning_rate": 4.495151416033717e-05, "loss": 2.3172, "step": 3489500 }, { "epoch": 10.1, "learning_rate": 4.495079051268989e-05, "loss": 2.3307, "step": 3490000 }, { "epoch": 10.1, "learning_rate": 4.4950068312337905e-05, "loss": 2.3105, "step": 3490500 }, { "epoch": 10.11, "learning_rate": 4.494934611198593e-05, "loss": 2.2984, "step": 3491000 }, { "epoch": 10.11, "learning_rate": 4.494862246433865e-05, "loss": 2.2892, "step": 3491500 }, { "epoch": 10.11, "learning_rate": 4.494789881669137e-05, "loss": 2.2977, "step": 3492000 }, { "epoch": 10.11, "learning_rate": 4.4947175169044094e-05, "loss": 2.3457, "step": 3492500 }, { "epoch": 10.11, "learning_rate": 4.4946451521396816e-05, "loss": 2.3209, "step": 3493000 }, { "epoch": 10.11, "learning_rate": 4.494572787374954e-05, "loss": 2.3048, "step": 3493500 }, { "epoch": 10.11, "learning_rate": 4.494500422610226e-05, "loss": 2.32, "step": 3494000 }, { "epoch": 10.12, "learning_rate": 4.494428057845498e-05, "loss": 2.3068, "step": 3494500 }, { "epoch": 10.12, "learning_rate": 4.4943556930807705e-05, "loss": 2.2979, "step": 3495000 }, { "epoch": 10.12, "learning_rate": 4.494283617775102e-05, "loss": 2.3157, "step": 3495500 }, { "epoch": 10.12, "learning_rate": 4.494211253010374e-05, "loss": 2.3117, "step": 3496000 }, { "epoch": 10.12, "learning_rate": 4.494138888245647e-05, "loss": 2.3028, "step": 3496500 }, { "epoch": 10.12, "learning_rate": 4.4940665234809194e-05, "loss": 2.2735, "step": 3497000 }, { "epoch": 10.12, "learning_rate": 4.4939941587161916e-05, "loss": 2.3104, "step": 3497500 }, { "epoch": 10.13, "learning_rate": 4.493921793951464e-05, "loss": 2.3049, "step": 3498000 }, { "epoch": 10.13, "learning_rate": 4.493849429186736e-05, "loss": 2.3192, "step": 3498500 }, { "epoch": 10.13, "learning_rate": 4.493777064422008e-05, "loss": 2.2936, "step": 3499000 }, { "epoch": 10.13, "learning_rate": 4.4937046996572805e-05, "loss": 2.3174, "step": 3499500 }, { "epoch": 10.13, "learning_rate": 4.493632479622083e-05, "loss": 2.3222, "step": 3500000 }, { "epoch": 10.13, "learning_rate": 4.493560114857355e-05, "loss": 2.3427, "step": 3500500 }, { "epoch": 10.13, "learning_rate": 4.4934878948221565e-05, "loss": 2.2852, "step": 3501000 }, { "epoch": 10.14, "learning_rate": 4.493415530057429e-05, "loss": 2.3139, "step": 3501500 }, { "epoch": 10.14, "learning_rate": 4.493343165292701e-05, "loss": 2.3273, "step": 3502000 }, { "epoch": 10.14, "learning_rate": 4.493270800527973e-05, "loss": 2.3065, "step": 3502500 }, { "epoch": 10.14, "learning_rate": 4.493198580492775e-05, "loss": 2.3259, "step": 3503000 }, { "epoch": 10.14, "learning_rate": 4.4931262157280477e-05, "loss": 2.2944, "step": 3503500 }, { "epoch": 10.14, "learning_rate": 4.49305385096332e-05, "loss": 2.2858, "step": 3504000 }, { "epoch": 10.14, "learning_rate": 4.492981486198593e-05, "loss": 2.3192, "step": 3504500 }, { "epoch": 10.15, "learning_rate": 4.492909121433865e-05, "loss": 2.2994, "step": 3505000 }, { "epoch": 10.15, "learning_rate": 4.492836756669137e-05, "loss": 2.3184, "step": 3505500 }, { "epoch": 10.15, "learning_rate": 4.492764536633939e-05, "loss": 2.3111, "step": 3506000 }, { "epoch": 10.15, "learning_rate": 4.4926923165987403e-05, "loss": 2.3085, "step": 3506500 }, { "epoch": 10.15, "learning_rate": 4.4926199518340126e-05, "loss": 2.2825, "step": 3507000 }, { "epoch": 10.15, "learning_rate": 4.4925475870692855e-05, "loss": 2.3119, "step": 3507500 }, { "epoch": 10.15, "learning_rate": 4.492475222304558e-05, "loss": 2.3239, "step": 3508000 }, { "epoch": 10.16, "learning_rate": 4.49240285753983e-05, "loss": 2.3107, "step": 3508500 }, { "epoch": 10.16, "learning_rate": 4.492330492775102e-05, "loss": 2.3135, "step": 3509000 }, { "epoch": 10.16, "learning_rate": 4.4922581280103744e-05, "loss": 2.3089, "step": 3509500 }, { "epoch": 10.16, "learning_rate": 4.4921857632456466e-05, "loss": 2.3052, "step": 3510000 }, { "epoch": 10.16, "learning_rate": 4.492113398480919e-05, "loss": 2.3301, "step": 3510500 }, { "epoch": 10.16, "learning_rate": 4.4920411784457204e-05, "loss": 2.3085, "step": 3511000 }, { "epoch": 10.16, "learning_rate": 4.491968813680993e-05, "loss": 2.3181, "step": 3511500 }, { "epoch": 10.17, "learning_rate": 4.4918964489162655e-05, "loss": 2.3085, "step": 3512000 }, { "epoch": 10.17, "learning_rate": 4.491824084151538e-05, "loss": 2.2796, "step": 3512500 }, { "epoch": 10.17, "learning_rate": 4.49175186411634e-05, "loss": 2.2842, "step": 3513000 }, { "epoch": 10.17, "learning_rate": 4.491679499351612e-05, "loss": 2.3144, "step": 3513500 }, { "epoch": 10.17, "learning_rate": 4.4916071345868844e-05, "loss": 2.3372, "step": 3514000 }, { "epoch": 10.17, "learning_rate": 4.4915347698221566e-05, "loss": 2.3084, "step": 3514500 }, { "epoch": 10.17, "learning_rate": 4.491462405057429e-05, "loss": 2.3374, "step": 3515000 }, { "epoch": 10.18, "learning_rate": 4.491390040292701e-05, "loss": 2.2806, "step": 3515500 }, { "epoch": 10.18, "learning_rate": 4.491317675527973e-05, "loss": 2.3277, "step": 3516000 }, { "epoch": 10.18, "learning_rate": 4.4912453107632455e-05, "loss": 2.3288, "step": 3516500 }, { "epoch": 10.18, "learning_rate": 4.491173090728048e-05, "loss": 2.3314, "step": 3517000 }, { "epoch": 10.18, "learning_rate": 4.49110072596332e-05, "loss": 2.3179, "step": 3517500 }, { "epoch": 10.18, "learning_rate": 4.491028361198592e-05, "loss": 2.2908, "step": 3518000 }, { "epoch": 10.18, "learning_rate": 4.4909559964338644e-05, "loss": 2.2958, "step": 3518500 }, { "epoch": 10.19, "learning_rate": 4.490883631669137e-05, "loss": 2.2872, "step": 3519000 }, { "epoch": 10.19, "learning_rate": 4.490811411633939e-05, "loss": 2.3406, "step": 3519500 }, { "epoch": 10.19, "learning_rate": 4.490739046869211e-05, "loss": 2.2884, "step": 3520000 }, { "epoch": 10.19, "learning_rate": 4.490666682104483e-05, "loss": 2.3085, "step": 3520500 }, { "epoch": 10.19, "learning_rate": 4.4905944620692855e-05, "loss": 2.3172, "step": 3521000 }, { "epoch": 10.19, "learning_rate": 4.490522097304558e-05, "loss": 2.3198, "step": 3521500 }, { "epoch": 10.19, "learning_rate": 4.49044973253983e-05, "loss": 2.2868, "step": 3522000 }, { "epoch": 10.2, "learning_rate": 4.490377367775102e-05, "loss": 2.3186, "step": 3522500 }, { "epoch": 10.2, "learning_rate": 4.4903050030103744e-05, "loss": 2.3346, "step": 3523000 }, { "epoch": 10.2, "learning_rate": 4.490232638245647e-05, "loss": 2.3063, "step": 3523500 }, { "epoch": 10.2, "learning_rate": 4.490160418210448e-05, "loss": 2.3233, "step": 3524000 }, { "epoch": 10.2, "learning_rate": 4.4900880534457204e-05, "loss": 2.305, "step": 3524500 }, { "epoch": 10.2, "learning_rate": 4.490015688680993e-05, "loss": 2.2812, "step": 3525000 }, { "epoch": 10.2, "learning_rate": 4.4899433239162656e-05, "loss": 2.3107, "step": 3525500 }, { "epoch": 10.21, "learning_rate": 4.489870959151538e-05, "loss": 2.3506, "step": 3526000 }, { "epoch": 10.21, "learning_rate": 4.489798594386811e-05, "loss": 2.3249, "step": 3526500 }, { "epoch": 10.21, "learning_rate": 4.489726374351612e-05, "loss": 2.3267, "step": 3527000 }, { "epoch": 10.21, "learning_rate": 4.4896540095868845e-05, "loss": 2.279, "step": 3527500 }, { "epoch": 10.21, "learning_rate": 4.489581644822157e-05, "loss": 2.312, "step": 3528000 }, { "epoch": 10.21, "learning_rate": 4.489509280057429e-05, "loss": 2.2989, "step": 3528500 }, { "epoch": 10.22, "learning_rate": 4.4894370600222305e-05, "loss": 2.3034, "step": 3529000 }, { "epoch": 10.22, "learning_rate": 4.489364839987033e-05, "loss": 2.3053, "step": 3529500 }, { "epoch": 10.22, "learning_rate": 4.489292475222305e-05, "loss": 2.3234, "step": 3530000 }, { "epoch": 10.22, "learning_rate": 4.4892202551871065e-05, "loss": 2.3098, "step": 3530500 }, { "epoch": 10.22, "learning_rate": 4.489147890422379e-05, "loss": 2.2984, "step": 3531000 }, { "epoch": 10.22, "learning_rate": 4.489075525657651e-05, "loss": 2.3142, "step": 3531500 }, { "epoch": 10.22, "learning_rate": 4.489003160892923e-05, "loss": 2.3072, "step": 3532000 }, { "epoch": 10.23, "learning_rate": 4.4889307961281954e-05, "loss": 2.3606, "step": 3532500 }, { "epoch": 10.23, "learning_rate": 4.488858431363468e-05, "loss": 2.3388, "step": 3533000 }, { "epoch": 10.23, "learning_rate": 4.4887860665987405e-05, "loss": 2.3159, "step": 3533500 }, { "epoch": 10.23, "learning_rate": 4.4887137018340134e-05, "loss": 2.3085, "step": 3534000 }, { "epoch": 10.23, "learning_rate": 4.4886413370692856e-05, "loss": 2.3192, "step": 3534500 }, { "epoch": 10.23, "learning_rate": 4.488568972304558e-05, "loss": 2.3216, "step": 3535000 }, { "epoch": 10.23, "learning_rate": 4.48849660753983e-05, "loss": 2.3031, "step": 3535500 }, { "epoch": 10.24, "learning_rate": 4.488424242775102e-05, "loss": 2.2882, "step": 3536000 }, { "epoch": 10.24, "learning_rate": 4.4883518780103745e-05, "loss": 2.3313, "step": 3536500 }, { "epoch": 10.24, "learning_rate": 4.488279513245647e-05, "loss": 2.3026, "step": 3537000 }, { "epoch": 10.24, "learning_rate": 4.488207293210448e-05, "loss": 2.2925, "step": 3537500 }, { "epoch": 10.24, "learning_rate": 4.4881349284457205e-05, "loss": 2.3132, "step": 3538000 }, { "epoch": 10.24, "learning_rate": 4.4880625636809934e-05, "loss": 2.3145, "step": 3538500 }, { "epoch": 10.24, "learning_rate": 4.4879901989162656e-05, "loss": 2.327, "step": 3539000 }, { "epoch": 10.25, "learning_rate": 4.487917834151538e-05, "loss": 2.3145, "step": 3539500 }, { "epoch": 10.25, "learning_rate": 4.48784546938681e-05, "loss": 2.3109, "step": 3540000 }, { "epoch": 10.25, "learning_rate": 4.487773104622082e-05, "loss": 2.3047, "step": 3540500 }, { "epoch": 10.25, "learning_rate": 4.487700739857355e-05, "loss": 2.2971, "step": 3541000 }, { "epoch": 10.25, "learning_rate": 4.4876283750926274e-05, "loss": 2.3169, "step": 3541500 }, { "epoch": 10.25, "learning_rate": 4.487556155057429e-05, "loss": 2.3292, "step": 3542000 }, { "epoch": 10.25, "learning_rate": 4.487483790292701e-05, "loss": 2.3254, "step": 3542500 }, { "epoch": 10.26, "learning_rate": 4.4874115702575035e-05, "loss": 2.3049, "step": 3543000 }, { "epoch": 10.26, "learning_rate": 4.487339205492776e-05, "loss": 2.3144, "step": 3543500 }, { "epoch": 10.26, "learning_rate": 4.487266985457577e-05, "loss": 2.3365, "step": 3544000 }, { "epoch": 10.26, "learning_rate": 4.4871946206928495e-05, "loss": 2.3088, "step": 3544500 }, { "epoch": 10.26, "learning_rate": 4.487122255928122e-05, "loss": 2.2921, "step": 3545000 }, { "epoch": 10.26, "learning_rate": 4.487049891163394e-05, "loss": 2.3074, "step": 3545500 }, { "epoch": 10.26, "learning_rate": 4.486977526398666e-05, "loss": 2.3114, "step": 3546000 }, { "epoch": 10.27, "learning_rate": 4.4869051616339383e-05, "loss": 2.3314, "step": 3546500 }, { "epoch": 10.27, "learning_rate": 4.4868327968692106e-05, "loss": 2.3172, "step": 3547000 }, { "epoch": 10.27, "learning_rate": 4.4867604321044835e-05, "loss": 2.3221, "step": 3547500 }, { "epoch": 10.27, "learning_rate": 4.486688067339756e-05, "loss": 2.305, "step": 3548000 }, { "epoch": 10.27, "learning_rate": 4.4866157025750286e-05, "loss": 2.2769, "step": 3548500 }, { "epoch": 10.27, "learning_rate": 4.486543337810301e-05, "loss": 2.2977, "step": 3549000 }, { "epoch": 10.27, "learning_rate": 4.486470973045573e-05, "loss": 2.3094, "step": 3549500 }, { "epoch": 10.28, "learning_rate": 4.4863987530103746e-05, "loss": 2.2984, "step": 3550000 }, { "epoch": 10.28, "learning_rate": 4.486326388245647e-05, "loss": 2.308, "step": 3550500 }, { "epoch": 10.28, "learning_rate": 4.4862541682104484e-05, "loss": 2.3465, "step": 3551000 }, { "epoch": 10.28, "learning_rate": 4.4861818034457206e-05, "loss": 2.2982, "step": 3551500 }, { "epoch": 10.28, "learning_rate": 4.4861094386809935e-05, "loss": 2.3099, "step": 3552000 }, { "epoch": 10.28, "learning_rate": 4.486037073916266e-05, "loss": 2.3185, "step": 3552500 }, { "epoch": 10.28, "learning_rate": 4.485964709151538e-05, "loss": 2.2814, "step": 3553000 }, { "epoch": 10.29, "learning_rate": 4.48589234438681e-05, "loss": 2.3306, "step": 3553500 }, { "epoch": 10.29, "learning_rate": 4.4858199796220824e-05, "loss": 2.3116, "step": 3554000 }, { "epoch": 10.29, "learning_rate": 4.4857476148573546e-05, "loss": 2.2916, "step": 3554500 }, { "epoch": 10.29, "learning_rate": 4.485675250092627e-05, "loss": 2.2922, "step": 3555000 }, { "epoch": 10.29, "learning_rate": 4.4856030300574284e-05, "loss": 2.3064, "step": 3555500 }, { "epoch": 10.29, "learning_rate": 4.4855308100222306e-05, "loss": 2.3001, "step": 3556000 }, { "epoch": 10.29, "learning_rate": 4.4854584452575035e-05, "loss": 2.2938, "step": 3556500 }, { "epoch": 10.3, "learning_rate": 4.485386080492776e-05, "loss": 2.3301, "step": 3557000 }, { "epoch": 10.3, "learning_rate": 4.485313715728048e-05, "loss": 2.3039, "step": 3557500 }, { "epoch": 10.3, "learning_rate": 4.48524135096332e-05, "loss": 2.3008, "step": 3558000 }, { "epoch": 10.3, "learning_rate": 4.4851689861985924e-05, "loss": 2.3105, "step": 3558500 }, { "epoch": 10.3, "learning_rate": 4.4850966214338646e-05, "loss": 2.3178, "step": 3559000 }, { "epoch": 10.3, "learning_rate": 4.485024256669137e-05, "loss": 2.3126, "step": 3559500 }, { "epoch": 10.3, "learning_rate": 4.4849520366339384e-05, "loss": 2.3066, "step": 3560000 }, { "epoch": 10.31, "learning_rate": 4.4848796718692106e-05, "loss": 2.3288, "step": 3560500 }, { "epoch": 10.31, "learning_rate": 4.4848073071044836e-05, "loss": 2.3383, "step": 3561000 }, { "epoch": 10.31, "learning_rate": 4.484734942339756e-05, "loss": 2.3026, "step": 3561500 }, { "epoch": 10.31, "learning_rate": 4.484662722304557e-05, "loss": 2.3257, "step": 3562000 }, { "epoch": 10.31, "learning_rate": 4.4845903575398296e-05, "loss": 2.3144, "step": 3562500 }, { "epoch": 10.31, "learning_rate": 4.484518137504631e-05, "loss": 2.312, "step": 3563000 }, { "epoch": 10.31, "learning_rate": 4.4844459174694333e-05, "loss": 2.2965, "step": 3563500 }, { "epoch": 10.32, "learning_rate": 4.484373552704706e-05, "loss": 2.3335, "step": 3564000 }, { "epoch": 10.32, "learning_rate": 4.4843011879399785e-05, "loss": 2.3282, "step": 3564500 }, { "epoch": 10.32, "learning_rate": 4.484228823175251e-05, "loss": 2.3205, "step": 3565000 }, { "epoch": 10.32, "learning_rate": 4.484156458410523e-05, "loss": 2.3193, "step": 3565500 }, { "epoch": 10.32, "learning_rate": 4.484084093645795e-05, "loss": 2.3274, "step": 3566000 }, { "epoch": 10.32, "learning_rate": 4.4840117288810674e-05, "loss": 2.3086, "step": 3566500 }, { "epoch": 10.33, "learning_rate": 4.4839393641163396e-05, "loss": 2.3213, "step": 3567000 }, { "epoch": 10.33, "learning_rate": 4.483866999351612e-05, "loss": 2.3215, "step": 3567500 }, { "epoch": 10.33, "learning_rate": 4.483794634586884e-05, "loss": 2.3023, "step": 3568000 }, { "epoch": 10.33, "learning_rate": 4.483722269822156e-05, "loss": 2.3117, "step": 3568500 }, { "epoch": 10.33, "learning_rate": 4.4836499050574285e-05, "loss": 2.3112, "step": 3569000 }, { "epoch": 10.33, "learning_rate": 4.4835775402927014e-05, "loss": 2.2898, "step": 3569500 }, { "epoch": 10.33, "learning_rate": 4.4835051755279736e-05, "loss": 2.2993, "step": 3570000 }, { "epoch": 10.34, "learning_rate": 4.483432810763246e-05, "loss": 2.3147, "step": 3570500 }, { "epoch": 10.34, "learning_rate": 4.483360445998519e-05, "loss": 2.3069, "step": 3571000 }, { "epoch": 10.34, "learning_rate": 4.483288081233791e-05, "loss": 2.3149, "step": 3571500 }, { "epoch": 10.34, "learning_rate": 4.483215716469063e-05, "loss": 2.3284, "step": 3572000 }, { "epoch": 10.34, "learning_rate": 4.4831433517043354e-05, "loss": 2.3167, "step": 3572500 }, { "epoch": 10.34, "learning_rate": 4.483071131669137e-05, "loss": 2.3092, "step": 3573000 }, { "epoch": 10.34, "learning_rate": 4.482998766904409e-05, "loss": 2.3269, "step": 3573500 }, { "epoch": 10.35, "learning_rate": 4.4829264021396814e-05, "loss": 2.2891, "step": 3574000 }, { "epoch": 10.35, "learning_rate": 4.4828540373749536e-05, "loss": 2.3211, "step": 3574500 }, { "epoch": 10.35, "learning_rate": 4.482781817339756e-05, "loss": 2.2922, "step": 3575000 }, { "epoch": 10.35, "learning_rate": 4.482709452575028e-05, "loss": 2.3227, "step": 3575500 }, { "epoch": 10.35, "learning_rate": 4.4826370878103e-05, "loss": 2.3061, "step": 3576000 }, { "epoch": 10.35, "learning_rate": 4.4825647230455725e-05, "loss": 2.3221, "step": 3576500 }, { "epoch": 10.35, "learning_rate": 4.482492503010374e-05, "loss": 2.301, "step": 3577000 }, { "epoch": 10.36, "learning_rate": 4.482420138245646e-05, "loss": 2.3101, "step": 3577500 }, { "epoch": 10.36, "learning_rate": 4.4823477734809185e-05, "loss": 2.325, "step": 3578000 }, { "epoch": 10.36, "learning_rate": 4.482275553445721e-05, "loss": 2.3206, "step": 3578500 }, { "epoch": 10.36, "learning_rate": 4.4822031886809937e-05, "loss": 2.3196, "step": 3579000 }, { "epoch": 10.36, "learning_rate": 4.482130823916266e-05, "loss": 2.3545, "step": 3579500 }, { "epoch": 10.36, "learning_rate": 4.482058459151538e-05, "loss": 2.3138, "step": 3580000 }, { "epoch": 10.36, "learning_rate": 4.48198609438681e-05, "loss": 2.3355, "step": 3580500 }, { "epoch": 10.37, "learning_rate": 4.4819137296220826e-05, "loss": 2.3213, "step": 3581000 }, { "epoch": 10.37, "learning_rate": 4.481841509586884e-05, "loss": 2.3275, "step": 3581500 }, { "epoch": 10.37, "learning_rate": 4.4817692895516863e-05, "loss": 2.3366, "step": 3582000 }, { "epoch": 10.37, "learning_rate": 4.4816969247869586e-05, "loss": 2.3315, "step": 3582500 }, { "epoch": 10.37, "learning_rate": 4.481624560022231e-05, "loss": 2.3038, "step": 3583000 }, { "epoch": 10.37, "learning_rate": 4.481552195257503e-05, "loss": 2.3137, "step": 3583500 }, { "epoch": 10.37, "learning_rate": 4.481479830492775e-05, "loss": 2.3014, "step": 3584000 }, { "epoch": 10.38, "learning_rate": 4.4814074657280475e-05, "loss": 2.3165, "step": 3584500 }, { "epoch": 10.38, "learning_rate": 4.48133510096332e-05, "loss": 2.3193, "step": 3585000 }, { "epoch": 10.38, "learning_rate": 4.481262736198592e-05, "loss": 2.2968, "step": 3585500 }, { "epoch": 10.38, "learning_rate": 4.481190371433864e-05, "loss": 2.3057, "step": 3586000 }, { "epoch": 10.38, "learning_rate": 4.481118006669137e-05, "loss": 2.3089, "step": 3586500 }, { "epoch": 10.38, "learning_rate": 4.481045786633939e-05, "loss": 2.3229, "step": 3587000 }, { "epoch": 10.38, "learning_rate": 4.4809734218692115e-05, "loss": 2.3143, "step": 3587500 }, { "epoch": 10.39, "learning_rate": 4.480901057104484e-05, "loss": 2.3122, "step": 3588000 }, { "epoch": 10.39, "learning_rate": 4.480828692339756e-05, "loss": 2.3206, "step": 3588500 }, { "epoch": 10.39, "learning_rate": 4.480756327575028e-05, "loss": 2.3227, "step": 3589000 }, { "epoch": 10.39, "learning_rate": 4.4806839628103004e-05, "loss": 2.3301, "step": 3589500 }, { "epoch": 10.39, "learning_rate": 4.4806115980455726e-05, "loss": 2.3171, "step": 3590000 }, { "epoch": 10.39, "learning_rate": 4.480539233280845e-05, "loss": 2.3204, "step": 3590500 }, { "epoch": 10.39, "learning_rate": 4.4804670132456464e-05, "loss": 2.319, "step": 3591000 }, { "epoch": 10.4, "learning_rate": 4.4803946484809186e-05, "loss": 2.3049, "step": 3591500 }, { "epoch": 10.4, "learning_rate": 4.4803222837161915e-05, "loss": 2.323, "step": 3592000 }, { "epoch": 10.4, "learning_rate": 4.480249918951464e-05, "loss": 2.3086, "step": 3592500 }, { "epoch": 10.4, "learning_rate": 4.480177554186736e-05, "loss": 2.2907, "step": 3593000 }, { "epoch": 10.4, "learning_rate": 4.480105189422009e-05, "loss": 2.3337, "step": 3593500 }, { "epoch": 10.4, "learning_rate": 4.480032824657281e-05, "loss": 2.3256, "step": 3594000 }, { "epoch": 10.4, "learning_rate": 4.4799606046220826e-05, "loss": 2.3201, "step": 3594500 }, { "epoch": 10.41, "learning_rate": 4.479888384586884e-05, "loss": 2.3337, "step": 3595000 }, { "epoch": 10.41, "learning_rate": 4.4798160198221564e-05, "loss": 2.2999, "step": 3595500 }, { "epoch": 10.41, "learning_rate": 4.479743655057429e-05, "loss": 2.3098, "step": 3596000 }, { "epoch": 10.41, "learning_rate": 4.4796712902927015e-05, "loss": 2.3238, "step": 3596500 }, { "epoch": 10.41, "learning_rate": 4.479598925527974e-05, "loss": 2.3148, "step": 3597000 }, { "epoch": 10.41, "learning_rate": 4.479526560763246e-05, "loss": 2.3168, "step": 3597500 }, { "epoch": 10.41, "learning_rate": 4.479454195998518e-05, "loss": 2.2975, "step": 3598000 }, { "epoch": 10.42, "learning_rate": 4.4793818312337904e-05, "loss": 2.3296, "step": 3598500 }, { "epoch": 10.42, "learning_rate": 4.4793094664690627e-05, "loss": 2.3159, "step": 3599000 }, { "epoch": 10.42, "learning_rate": 4.479237101704335e-05, "loss": 2.3025, "step": 3599500 }, { "epoch": 10.42, "learning_rate": 4.4791648816691364e-05, "loss": 2.2736, "step": 3600000 }, { "epoch": 10.42, "learning_rate": 4.479092516904409e-05, "loss": 2.3003, "step": 3600500 }, { "epoch": 10.42, "learning_rate": 4.479020296869211e-05, "loss": 2.2924, "step": 3601000 }, { "epoch": 10.42, "learning_rate": 4.478947932104484e-05, "loss": 2.3206, "step": 3601500 }, { "epoch": 10.43, "learning_rate": 4.478875567339756e-05, "loss": 2.2963, "step": 3602000 }, { "epoch": 10.43, "learning_rate": 4.478803202575028e-05, "loss": 2.3218, "step": 3602500 }, { "epoch": 10.43, "learning_rate": 4.47873098253983e-05, "loss": 2.3097, "step": 3603000 }, { "epoch": 10.43, "learning_rate": 4.4786587625046313e-05, "loss": 2.2898, "step": 3603500 }, { "epoch": 10.43, "learning_rate": 4.478586397739904e-05, "loss": 2.3246, "step": 3604000 }, { "epoch": 10.43, "learning_rate": 4.4785140329751765e-05, "loss": 2.3324, "step": 3604500 }, { "epoch": 10.43, "learning_rate": 4.478441668210449e-05, "loss": 2.3072, "step": 3605000 }, { "epoch": 10.44, "learning_rate": 4.478369303445721e-05, "loss": 2.3204, "step": 3605500 }, { "epoch": 10.44, "learning_rate": 4.478296938680993e-05, "loss": 2.3258, "step": 3606000 }, { "epoch": 10.44, "learning_rate": 4.4782245739162654e-05, "loss": 2.2923, "step": 3606500 }, { "epoch": 10.44, "learning_rate": 4.4781522091515376e-05, "loss": 2.2975, "step": 3607000 }, { "epoch": 10.44, "learning_rate": 4.478079989116339e-05, "loss": 2.2948, "step": 3607500 }, { "epoch": 10.44, "learning_rate": 4.4780076243516114e-05, "loss": 2.3203, "step": 3608000 }, { "epoch": 10.45, "learning_rate": 4.477935259586884e-05, "loss": 2.3295, "step": 3608500 }, { "epoch": 10.45, "learning_rate": 4.4778628948221565e-05, "loss": 2.3071, "step": 3609000 }, { "epoch": 10.45, "learning_rate": 4.4777905300574294e-05, "loss": 2.3205, "step": 3609500 }, { "epoch": 10.45, "learning_rate": 4.4777181652927016e-05, "loss": 2.3056, "step": 3610000 }, { "epoch": 10.45, "learning_rate": 4.477645945257503e-05, "loss": 2.3205, "step": 3610500 }, { "epoch": 10.45, "learning_rate": 4.4775735804927754e-05, "loss": 2.2996, "step": 3611000 }, { "epoch": 10.45, "learning_rate": 4.4775012157280476e-05, "loss": 2.307, "step": 3611500 }, { "epoch": 10.46, "learning_rate": 4.47742885096332e-05, "loss": 2.3369, "step": 3612000 }, { "epoch": 10.46, "learning_rate": 4.477356486198592e-05, "loss": 2.2987, "step": 3612500 }, { "epoch": 10.46, "learning_rate": 4.477284121433864e-05, "loss": 2.2771, "step": 3613000 }, { "epoch": 10.46, "learning_rate": 4.4772117566691365e-05, "loss": 2.3447, "step": 3613500 }, { "epoch": 10.46, "learning_rate": 4.477139536633939e-05, "loss": 2.3368, "step": 3614000 }, { "epoch": 10.46, "learning_rate": 4.477067171869211e-05, "loss": 2.3306, "step": 3614500 }, { "epoch": 10.46, "learning_rate": 4.476994807104483e-05, "loss": 2.3036, "step": 3615000 }, { "epoch": 10.47, "learning_rate": 4.4769224423397554e-05, "loss": 2.3354, "step": 3615500 }, { "epoch": 10.47, "learning_rate": 4.476850077575028e-05, "loss": 2.3324, "step": 3616000 }, { "epoch": 10.47, "learning_rate": 4.4767777128103005e-05, "loss": 2.2986, "step": 3616500 }, { "epoch": 10.47, "learning_rate": 4.476705492775102e-05, "loss": 2.3304, "step": 3617000 }, { "epoch": 10.47, "learning_rate": 4.476633128010374e-05, "loss": 2.3198, "step": 3617500 }, { "epoch": 10.47, "learning_rate": 4.476560763245647e-05, "loss": 2.3054, "step": 3618000 }, { "epoch": 10.47, "learning_rate": 4.4764883984809194e-05, "loss": 2.3441, "step": 3618500 }, { "epoch": 10.48, "learning_rate": 4.476416033716192e-05, "loss": 2.2986, "step": 3619000 }, { "epoch": 10.48, "learning_rate": 4.476343668951464e-05, "loss": 2.3157, "step": 3619500 }, { "epoch": 10.48, "learning_rate": 4.476271304186736e-05, "loss": 2.3005, "step": 3620000 }, { "epoch": 10.48, "learning_rate": 4.476198939422008e-05, "loss": 2.3066, "step": 3620500 }, { "epoch": 10.48, "learning_rate": 4.4761265746572806e-05, "loss": 2.3124, "step": 3621000 }, { "epoch": 10.48, "learning_rate": 4.476054209892553e-05, "loss": 2.31, "step": 3621500 }, { "epoch": 10.48, "learning_rate": 4.475981989857354e-05, "loss": 2.3299, "step": 3622000 }, { "epoch": 10.49, "learning_rate": 4.475909625092627e-05, "loss": 2.3074, "step": 3622500 }, { "epoch": 10.49, "learning_rate": 4.4758372603278995e-05, "loss": 2.3118, "step": 3623000 }, { "epoch": 10.49, "learning_rate": 4.475764895563172e-05, "loss": 2.3074, "step": 3623500 }, { "epoch": 10.49, "learning_rate": 4.475692675527974e-05, "loss": 2.3237, "step": 3624000 }, { "epoch": 10.49, "learning_rate": 4.475620310763246e-05, "loss": 2.3325, "step": 3624500 }, { "epoch": 10.49, "learning_rate": 4.4755479459985184e-05, "loss": 2.3181, "step": 3625000 }, { "epoch": 10.49, "learning_rate": 4.4754755812337906e-05, "loss": 2.3079, "step": 3625500 }, { "epoch": 10.5, "learning_rate": 4.475403216469063e-05, "loss": 2.3159, "step": 3626000 }, { "epoch": 10.5, "learning_rate": 4.475330851704335e-05, "loss": 2.3264, "step": 3626500 }, { "epoch": 10.5, "learning_rate": 4.475258486939607e-05, "loss": 2.3158, "step": 3627000 }, { "epoch": 10.5, "learning_rate": 4.4751862669044095e-05, "loss": 2.2862, "step": 3627500 }, { "epoch": 10.5, "learning_rate": 4.475114046869211e-05, "loss": 2.3071, "step": 3628000 }, { "epoch": 10.5, "learning_rate": 4.475041682104483e-05, "loss": 2.3131, "step": 3628500 }, { "epoch": 10.5, "learning_rate": 4.474969462069285e-05, "loss": 2.2918, "step": 3629000 }, { "epoch": 10.51, "learning_rate": 4.474897097304557e-05, "loss": 2.3153, "step": 3629500 }, { "epoch": 10.51, "learning_rate": 4.474824732539829e-05, "loss": 2.2893, "step": 3630000 }, { "epoch": 10.51, "learning_rate": 4.474752367775102e-05, "loss": 2.3049, "step": 3630500 }, { "epoch": 10.51, "learning_rate": 4.4746800030103744e-05, "loss": 2.297, "step": 3631000 }, { "epoch": 10.51, "learning_rate": 4.474607638245647e-05, "loss": 2.3007, "step": 3631500 }, { "epoch": 10.51, "learning_rate": 4.4745352734809195e-05, "loss": 2.3007, "step": 3632000 }, { "epoch": 10.51, "learning_rate": 4.474462908716192e-05, "loss": 2.315, "step": 3632500 }, { "epoch": 10.52, "learning_rate": 4.474390543951464e-05, "loss": 2.3077, "step": 3633000 }, { "epoch": 10.52, "learning_rate": 4.474318179186736e-05, "loss": 2.321, "step": 3633500 }, { "epoch": 10.52, "learning_rate": 4.4742458144220084e-05, "loss": 2.3097, "step": 3634000 }, { "epoch": 10.52, "learning_rate": 4.4741734496572806e-05, "loss": 2.3118, "step": 3634500 }, { "epoch": 10.52, "learning_rate": 4.474101229622082e-05, "loss": 2.3146, "step": 3635000 }, { "epoch": 10.52, "learning_rate": 4.4740290095868844e-05, "loss": 2.3096, "step": 3635500 }, { "epoch": 10.52, "learning_rate": 4.4739566448221567e-05, "loss": 2.3122, "step": 3636000 }, { "epoch": 10.53, "learning_rate": 4.473884280057429e-05, "loss": 2.3357, "step": 3636500 }, { "epoch": 10.53, "learning_rate": 4.4738120600222304e-05, "loss": 2.3068, "step": 3637000 }, { "epoch": 10.53, "learning_rate": 4.4737396952575027e-05, "loss": 2.3483, "step": 3637500 }, { "epoch": 10.53, "learning_rate": 4.473667330492775e-05, "loss": 2.3151, "step": 3638000 }, { "epoch": 10.53, "learning_rate": 4.473594965728047e-05, "loss": 2.3061, "step": 3638500 }, { "epoch": 10.53, "learning_rate": 4.47352260096332e-05, "loss": 2.321, "step": 3639000 }, { "epoch": 10.53, "learning_rate": 4.473450236198592e-05, "loss": 2.3246, "step": 3639500 }, { "epoch": 10.54, "learning_rate": 4.4733778714338644e-05, "loss": 2.3119, "step": 3640000 }, { "epoch": 10.54, "learning_rate": 4.473305651398667e-05, "loss": 2.2968, "step": 3640500 }, { "epoch": 10.54, "learning_rate": 4.473233286633939e-05, "loss": 2.3374, "step": 3641000 }, { "epoch": 10.54, "learning_rate": 4.473160921869211e-05, "loss": 2.3078, "step": 3641500 }, { "epoch": 10.54, "learning_rate": 4.4730885571044834e-05, "loss": 2.3153, "step": 3642000 }, { "epoch": 10.54, "learning_rate": 4.473016337069285e-05, "loss": 2.3131, "step": 3642500 }, { "epoch": 10.54, "learning_rate": 4.472943972304557e-05, "loss": 2.307, "step": 3643000 }, { "epoch": 10.55, "learning_rate": 4.47287160753983e-05, "loss": 2.3166, "step": 3643500 }, { "epoch": 10.55, "learning_rate": 4.472799242775102e-05, "loss": 2.3036, "step": 3644000 }, { "epoch": 10.55, "learning_rate": 4.4727268780103745e-05, "loss": 2.3033, "step": 3644500 }, { "epoch": 10.55, "learning_rate": 4.472654513245647e-05, "loss": 2.3061, "step": 3645000 }, { "epoch": 10.55, "learning_rate": 4.472582148480919e-05, "loss": 2.314, "step": 3645500 }, { "epoch": 10.55, "learning_rate": 4.472509783716192e-05, "loss": 2.3211, "step": 3646000 }, { "epoch": 10.56, "learning_rate": 4.472437418951464e-05, "loss": 2.3232, "step": 3646500 }, { "epoch": 10.56, "learning_rate": 4.472365054186736e-05, "loss": 2.2904, "step": 3647000 }, { "epoch": 10.56, "learning_rate": 4.4722926894220085e-05, "loss": 2.3177, "step": 3647500 }, { "epoch": 10.56, "learning_rate": 4.472220324657281e-05, "loss": 2.3221, "step": 3648000 }, { "epoch": 10.56, "learning_rate": 4.472147959892553e-05, "loss": 2.3296, "step": 3648500 }, { "epoch": 10.56, "learning_rate": 4.472075595127825e-05, "loss": 2.315, "step": 3649000 }, { "epoch": 10.56, "learning_rate": 4.4720032303630974e-05, "loss": 2.3182, "step": 3649500 }, { "epoch": 10.57, "learning_rate": 4.4719308655983696e-05, "loss": 2.3072, "step": 3650000 }, { "epoch": 10.57, "learning_rate": 4.471858645563172e-05, "loss": 2.298, "step": 3650500 }, { "epoch": 10.57, "learning_rate": 4.4717864255279734e-05, "loss": 2.3486, "step": 3651000 }, { "epoch": 10.57, "learning_rate": 4.4717140607632456e-05, "loss": 2.3215, "step": 3651500 }, { "epoch": 10.57, "learning_rate": 4.471641695998518e-05, "loss": 2.3109, "step": 3652000 }, { "epoch": 10.57, "learning_rate": 4.47156947596332e-05, "loss": 2.2995, "step": 3652500 }, { "epoch": 10.57, "learning_rate": 4.471497111198592e-05, "loss": 2.3399, "step": 3653000 }, { "epoch": 10.58, "learning_rate": 4.4714247464338645e-05, "loss": 2.3253, "step": 3653500 }, { "epoch": 10.58, "learning_rate": 4.4713523816691374e-05, "loss": 2.2985, "step": 3654000 }, { "epoch": 10.58, "learning_rate": 4.4712800169044097e-05, "loss": 2.303, "step": 3654500 }, { "epoch": 10.58, "learning_rate": 4.471207652139682e-05, "loss": 2.3236, "step": 3655000 }, { "epoch": 10.58, "learning_rate": 4.471135287374954e-05, "loss": 2.2951, "step": 3655500 }, { "epoch": 10.58, "learning_rate": 4.471062922610226e-05, "loss": 2.322, "step": 3656000 }, { "epoch": 10.58, "learning_rate": 4.4709905578454985e-05, "loss": 2.3172, "step": 3656500 }, { "epoch": 10.59, "learning_rate": 4.470918193080771e-05, "loss": 2.313, "step": 3657000 }, { "epoch": 10.59, "learning_rate": 4.470845828316043e-05, "loss": 2.2927, "step": 3657500 }, { "epoch": 10.59, "learning_rate": 4.470773463551315e-05, "loss": 2.2953, "step": 3658000 }, { "epoch": 10.59, "learning_rate": 4.4707012435161174e-05, "loss": 2.3205, "step": 3658500 }, { "epoch": 10.59, "learning_rate": 4.47062887875139e-05, "loss": 2.3263, "step": 3659000 }, { "epoch": 10.59, "learning_rate": 4.470556513986662e-05, "loss": 2.343, "step": 3659500 }, { "epoch": 10.59, "learning_rate": 4.470484149221934e-05, "loss": 2.2819, "step": 3660000 }, { "epoch": 10.6, "learning_rate": 4.4704117844572063e-05, "loss": 2.3002, "step": 3660500 }, { "epoch": 10.6, "learning_rate": 4.4703395644220086e-05, "loss": 2.3189, "step": 3661000 }, { "epoch": 10.6, "learning_rate": 4.470267199657281e-05, "loss": 2.3005, "step": 3661500 }, { "epoch": 10.6, "learning_rate": 4.470194834892553e-05, "loss": 2.2919, "step": 3662000 }, { "epoch": 10.6, "learning_rate": 4.470122470127825e-05, "loss": 2.3284, "step": 3662500 }, { "epoch": 10.6, "learning_rate": 4.4700501053630975e-05, "loss": 2.311, "step": 3663000 }, { "epoch": 10.6, "learning_rate": 4.4699777405983704e-05, "loss": 2.3006, "step": 3663500 }, { "epoch": 10.61, "learning_rate": 4.469905520563172e-05, "loss": 2.3178, "step": 3664000 }, { "epoch": 10.61, "learning_rate": 4.469833155798444e-05, "loss": 2.3153, "step": 3664500 }, { "epoch": 10.61, "learning_rate": 4.4697607910337164e-05, "loss": 2.3001, "step": 3665000 }, { "epoch": 10.61, "learning_rate": 4.4696884262689886e-05, "loss": 2.3369, "step": 3665500 }, { "epoch": 10.61, "learning_rate": 4.46961620623379e-05, "loss": 2.3378, "step": 3666000 }, { "epoch": 10.61, "learning_rate": 4.4695438414690624e-05, "loss": 2.3457, "step": 3666500 }, { "epoch": 10.61, "learning_rate": 4.469471476704335e-05, "loss": 2.3198, "step": 3667000 }, { "epoch": 10.62, "learning_rate": 4.4693991119396075e-05, "loss": 2.3316, "step": 3667500 }, { "epoch": 10.62, "learning_rate": 4.469326891904409e-05, "loss": 2.3104, "step": 3668000 }, { "epoch": 10.62, "learning_rate": 4.469254527139682e-05, "loss": 2.2941, "step": 3668500 }, { "epoch": 10.62, "learning_rate": 4.469182162374954e-05, "loss": 2.3213, "step": 3669000 }, { "epoch": 10.62, "learning_rate": 4.4691097976102264e-05, "loss": 2.2985, "step": 3669500 }, { "epoch": 10.62, "learning_rate": 4.469037577575028e-05, "loss": 2.3033, "step": 3670000 }, { "epoch": 10.62, "learning_rate": 4.4689652128103e-05, "loss": 2.3113, "step": 3670500 }, { "epoch": 10.63, "learning_rate": 4.4688928480455724e-05, "loss": 2.3247, "step": 3671000 }, { "epoch": 10.63, "learning_rate": 4.468820483280845e-05, "loss": 2.3117, "step": 3671500 }, { "epoch": 10.63, "learning_rate": 4.4687481185161175e-05, "loss": 2.3388, "step": 3672000 }, { "epoch": 10.63, "learning_rate": 4.46867575375139e-05, "loss": 2.2947, "step": 3672500 }, { "epoch": 10.63, "learning_rate": 4.468603388986662e-05, "loss": 2.3024, "step": 3673000 }, { "epoch": 10.63, "learning_rate": 4.468531024221934e-05, "loss": 2.3195, "step": 3673500 }, { "epoch": 10.63, "learning_rate": 4.4684586594572064e-05, "loss": 2.2959, "step": 3674000 }, { "epoch": 10.64, "learning_rate": 4.468386439422008e-05, "loss": 2.324, "step": 3674500 }, { "epoch": 10.64, "learning_rate": 4.46831407465728e-05, "loss": 2.2992, "step": 3675000 }, { "epoch": 10.64, "learning_rate": 4.4682417098925524e-05, "loss": 2.3196, "step": 3675500 }, { "epoch": 10.64, "learning_rate": 4.468169345127825e-05, "loss": 2.3218, "step": 3676000 }, { "epoch": 10.64, "learning_rate": 4.468097269822157e-05, "loss": 2.3117, "step": 3676500 }, { "epoch": 10.64, "learning_rate": 4.468024905057429e-05, "loss": 2.3335, "step": 3677000 }, { "epoch": 10.64, "learning_rate": 4.467952540292701e-05, "loss": 2.3094, "step": 3677500 }, { "epoch": 10.65, "learning_rate": 4.4678801755279736e-05, "loss": 2.2977, "step": 3678000 }, { "epoch": 10.65, "learning_rate": 4.467807810763246e-05, "loss": 2.3174, "step": 3678500 }, { "epoch": 10.65, "learning_rate": 4.467735445998518e-05, "loss": 2.305, "step": 3679000 }, { "epoch": 10.65, "learning_rate": 4.46766308123379e-05, "loss": 2.3139, "step": 3679500 }, { "epoch": 10.65, "learning_rate": 4.467590716469063e-05, "loss": 2.3491, "step": 3680000 }, { "epoch": 10.65, "learning_rate": 4.4675183517043354e-05, "loss": 2.2985, "step": 3680500 }, { "epoch": 10.65, "learning_rate": 4.4674459869396076e-05, "loss": 2.3137, "step": 3681000 }, { "epoch": 10.66, "learning_rate": 4.467373766904409e-05, "loss": 2.3176, "step": 3681500 }, { "epoch": 10.66, "learning_rate": 4.467301546869211e-05, "loss": 2.3213, "step": 3682000 }, { "epoch": 10.66, "learning_rate": 4.467229182104483e-05, "loss": 2.3039, "step": 3682500 }, { "epoch": 10.66, "learning_rate": 4.467156817339755e-05, "loss": 2.3138, "step": 3683000 }, { "epoch": 10.66, "learning_rate": 4.467084452575028e-05, "loss": 2.3051, "step": 3683500 }, { "epoch": 10.66, "learning_rate": 4.46701223253983e-05, "loss": 2.286, "step": 3684000 }, { "epoch": 10.67, "learning_rate": 4.4669398677751025e-05, "loss": 2.3236, "step": 3684500 }, { "epoch": 10.67, "learning_rate": 4.466867503010375e-05, "loss": 2.3233, "step": 3685000 }, { "epoch": 10.67, "learning_rate": 4.466795138245647e-05, "loss": 2.3075, "step": 3685500 }, { "epoch": 10.67, "learning_rate": 4.466722773480919e-05, "loss": 2.3164, "step": 3686000 }, { "epoch": 10.67, "learning_rate": 4.4666504087161914e-05, "loss": 2.3396, "step": 3686500 }, { "epoch": 10.67, "learning_rate": 4.4665780439514636e-05, "loss": 2.313, "step": 3687000 }, { "epoch": 10.67, "learning_rate": 4.466505679186736e-05, "loss": 2.2806, "step": 3687500 }, { "epoch": 10.68, "learning_rate": 4.466433314422008e-05, "loss": 2.2998, "step": 3688000 }, { "epoch": 10.68, "learning_rate": 4.46636109438681e-05, "loss": 2.3111, "step": 3688500 }, { "epoch": 10.68, "learning_rate": 4.466288874351612e-05, "loss": 2.3123, "step": 3689000 }, { "epoch": 10.68, "learning_rate": 4.466216509586884e-05, "loss": 2.3107, "step": 3689500 }, { "epoch": 10.68, "learning_rate": 4.466144144822156e-05, "loss": 2.309, "step": 3690000 }, { "epoch": 10.68, "learning_rate": 4.4660717800574285e-05, "loss": 2.3302, "step": 3690500 }, { "epoch": 10.68, "learning_rate": 4.465999415292701e-05, "loss": 2.3073, "step": 3691000 }, { "epoch": 10.69, "learning_rate": 4.4659270505279736e-05, "loss": 2.3142, "step": 3691500 }, { "epoch": 10.69, "learning_rate": 4.465854685763246e-05, "loss": 2.3107, "step": 3692000 }, { "epoch": 10.69, "learning_rate": 4.465782320998518e-05, "loss": 2.3118, "step": 3692500 }, { "epoch": 10.69, "learning_rate": 4.46571010096332e-05, "loss": 2.3175, "step": 3693000 }, { "epoch": 10.69, "learning_rate": 4.465637880928122e-05, "loss": 2.3135, "step": 3693500 }, { "epoch": 10.69, "learning_rate": 4.465565516163394e-05, "loss": 2.3214, "step": 3694000 }, { "epoch": 10.69, "learning_rate": 4.465493151398666e-05, "loss": 2.3046, "step": 3694500 }, { "epoch": 10.7, "learning_rate": 4.4654207866339385e-05, "loss": 2.2735, "step": 3695000 }, { "epoch": 10.7, "learning_rate": 4.465348421869211e-05, "loss": 2.312, "step": 3695500 }, { "epoch": 10.7, "learning_rate": 4.465276201834013e-05, "loss": 2.3198, "step": 3696000 }, { "epoch": 10.7, "learning_rate": 4.465203837069285e-05, "loss": 2.3151, "step": 3696500 }, { "epoch": 10.7, "learning_rate": 4.4651314723045574e-05, "loss": 2.3109, "step": 3697000 }, { "epoch": 10.7, "learning_rate": 4.46505910753983e-05, "loss": 2.3303, "step": 3697500 }, { "epoch": 10.7, "learning_rate": 4.464986742775102e-05, "loss": 2.2977, "step": 3698000 }, { "epoch": 10.71, "learning_rate": 4.464914378010374e-05, "loss": 2.2965, "step": 3698500 }, { "epoch": 10.71, "learning_rate": 4.464842013245647e-05, "loss": 2.3177, "step": 3699000 }, { "epoch": 10.71, "learning_rate": 4.4647697932104486e-05, "loss": 2.3321, "step": 3699500 }, { "epoch": 10.71, "learning_rate": 4.464697573175251e-05, "loss": 2.2906, "step": 3700000 }, { "epoch": 10.71, "learning_rate": 4.464625208410523e-05, "loss": 2.3131, "step": 3700500 }, { "epoch": 10.71, "learning_rate": 4.464552843645795e-05, "loss": 2.3201, "step": 3701000 }, { "epoch": 10.71, "learning_rate": 4.4644804788810675e-05, "loss": 2.2943, "step": 3701500 }, { "epoch": 10.72, "learning_rate": 4.46440811411634e-05, "loss": 2.3072, "step": 3702000 }, { "epoch": 10.72, "learning_rate": 4.464335749351612e-05, "loss": 2.3114, "step": 3702500 }, { "epoch": 10.72, "learning_rate": 4.464263384586884e-05, "loss": 2.3206, "step": 3703000 }, { "epoch": 10.72, "learning_rate": 4.4641910198221564e-05, "loss": 2.3167, "step": 3703500 }, { "epoch": 10.72, "learning_rate": 4.4641186550574286e-05, "loss": 2.3103, "step": 3704000 }, { "epoch": 10.72, "learning_rate": 4.464046435022231e-05, "loss": 2.3133, "step": 3704500 }, { "epoch": 10.72, "learning_rate": 4.463974070257503e-05, "loss": 2.323, "step": 3705000 }, { "epoch": 10.73, "learning_rate": 4.463901705492775e-05, "loss": 2.3088, "step": 3705500 }, { "epoch": 10.73, "learning_rate": 4.463829485457577e-05, "loss": 2.3061, "step": 3706000 }, { "epoch": 10.73, "learning_rate": 4.46375712069285e-05, "loss": 2.3085, "step": 3706500 }, { "epoch": 10.73, "learning_rate": 4.463684755928122e-05, "loss": 2.3119, "step": 3707000 }, { "epoch": 10.73, "learning_rate": 4.463612391163394e-05, "loss": 2.2953, "step": 3707500 }, { "epoch": 10.73, "learning_rate": 4.4635400263986664e-05, "loss": 2.3352, "step": 3708000 }, { "epoch": 10.73, "learning_rate": 4.4634676616339386e-05, "loss": 2.3264, "step": 3708500 }, { "epoch": 10.74, "learning_rate": 4.463395296869211e-05, "loss": 2.3102, "step": 3709000 }, { "epoch": 10.74, "learning_rate": 4.463322932104483e-05, "loss": 2.2989, "step": 3709500 }, { "epoch": 10.74, "learning_rate": 4.463250567339756e-05, "loss": 2.3089, "step": 3710000 }, { "epoch": 10.74, "learning_rate": 4.463178202575028e-05, "loss": 2.3277, "step": 3710500 }, { "epoch": 10.74, "learning_rate": 4.4631058378103004e-05, "loss": 2.3192, "step": 3711000 }, { "epoch": 10.74, "learning_rate": 4.4630334730455726e-05, "loss": 2.3136, "step": 3711500 }, { "epoch": 10.74, "learning_rate": 4.462961108280845e-05, "loss": 2.2977, "step": 3712000 }, { "epoch": 10.75, "learning_rate": 4.4628888882456464e-05, "loss": 2.3193, "step": 3712500 }, { "epoch": 10.75, "learning_rate": 4.4628165234809186e-05, "loss": 2.3006, "step": 3713000 }, { "epoch": 10.75, "learning_rate": 4.4627441587161915e-05, "loss": 2.311, "step": 3713500 }, { "epoch": 10.75, "learning_rate": 4.462671793951464e-05, "loss": 2.3234, "step": 3714000 }, { "epoch": 10.75, "learning_rate": 4.462599573916266e-05, "loss": 2.3066, "step": 3714500 }, { "epoch": 10.75, "learning_rate": 4.4625273538810676e-05, "loss": 2.348, "step": 3715000 }, { "epoch": 10.75, "learning_rate": 4.46245498911634e-05, "loss": 2.2944, "step": 3715500 }, { "epoch": 10.76, "learning_rate": 4.462382624351612e-05, "loss": 2.3281, "step": 3716000 }, { "epoch": 10.76, "learning_rate": 4.462310259586884e-05, "loss": 2.3089, "step": 3716500 }, { "epoch": 10.76, "learning_rate": 4.4622378948221565e-05, "loss": 2.2989, "step": 3717000 }, { "epoch": 10.76, "learning_rate": 4.462165530057429e-05, "loss": 2.3214, "step": 3717500 }, { "epoch": 10.76, "learning_rate": 4.462093165292701e-05, "loss": 2.3225, "step": 3718000 }, { "epoch": 10.76, "learning_rate": 4.462020800527973e-05, "loss": 2.3363, "step": 3718500 }, { "epoch": 10.76, "learning_rate": 4.4619485804927754e-05, "loss": 2.2883, "step": 3719000 }, { "epoch": 10.77, "learning_rate": 4.4618762157280476e-05, "loss": 2.3253, "step": 3719500 }, { "epoch": 10.77, "learning_rate": 4.46180385096332e-05, "loss": 2.3091, "step": 3720000 }, { "epoch": 10.77, "learning_rate": 4.461731486198592e-05, "loss": 2.312, "step": 3720500 }, { "epoch": 10.77, "learning_rate": 4.461659121433864e-05, "loss": 2.3333, "step": 3721000 }, { "epoch": 10.77, "learning_rate": 4.461587046128196e-05, "loss": 2.3184, "step": 3721500 }, { "epoch": 10.77, "learning_rate": 4.461514681363469e-05, "loss": 2.2891, "step": 3722000 }, { "epoch": 10.78, "learning_rate": 4.461442316598741e-05, "loss": 2.2843, "step": 3722500 }, { "epoch": 10.78, "learning_rate": 4.461369951834013e-05, "loss": 2.2941, "step": 3723000 }, { "epoch": 10.78, "learning_rate": 4.4612975870692854e-05, "loss": 2.3364, "step": 3723500 }, { "epoch": 10.78, "learning_rate": 4.461225367034087e-05, "loss": 2.3271, "step": 3724000 }, { "epoch": 10.78, "learning_rate": 4.461153002269359e-05, "loss": 2.3317, "step": 3724500 }, { "epoch": 10.78, "learning_rate": 4.4610806375046314e-05, "loss": 2.3112, "step": 3725000 }, { "epoch": 10.78, "learning_rate": 4.4610082727399036e-05, "loss": 2.3373, "step": 3725500 }, { "epoch": 10.79, "learning_rate": 4.460935907975176e-05, "loss": 2.3123, "step": 3726000 }, { "epoch": 10.79, "learning_rate": 4.460863543210449e-05, "loss": 2.3247, "step": 3726500 }, { "epoch": 10.79, "learning_rate": 4.460791178445721e-05, "loss": 2.3101, "step": 3727000 }, { "epoch": 10.79, "learning_rate": 4.460718813680993e-05, "loss": 2.3188, "step": 3727500 }, { "epoch": 10.79, "learning_rate": 4.460646593645795e-05, "loss": 2.2933, "step": 3728000 }, { "epoch": 10.79, "learning_rate": 4.460574228881067e-05, "loss": 2.3381, "step": 3728500 }, { "epoch": 10.79, "learning_rate": 4.46050186411634e-05, "loss": 2.3135, "step": 3729000 }, { "epoch": 10.8, "learning_rate": 4.460429499351612e-05, "loss": 2.3071, "step": 3729500 }, { "epoch": 10.8, "learning_rate": 4.460357134586884e-05, "loss": 2.316, "step": 3730000 }, { "epoch": 10.8, "learning_rate": 4.4602847698221565e-05, "loss": 2.2933, "step": 3730500 }, { "epoch": 10.8, "learning_rate": 4.460212549786959e-05, "loss": 2.3147, "step": 3731000 }, { "epoch": 10.8, "learning_rate": 4.460140185022231e-05, "loss": 2.3235, "step": 3731500 }, { "epoch": 10.8, "learning_rate": 4.460067820257503e-05, "loss": 2.323, "step": 3732000 }, { "epoch": 10.8, "learning_rate": 4.4599954554927754e-05, "loss": 2.3248, "step": 3732500 }, { "epoch": 10.81, "learning_rate": 4.4599230907280477e-05, "loss": 2.2862, "step": 3733000 }, { "epoch": 10.81, "learning_rate": 4.45985072596332e-05, "loss": 2.3261, "step": 3733500 }, { "epoch": 10.81, "learning_rate": 4.459778361198592e-05, "loss": 2.3116, "step": 3734000 }, { "epoch": 10.81, "learning_rate": 4.459705996433864e-05, "loss": 2.3179, "step": 3734500 }, { "epoch": 10.81, "learning_rate": 4.459633776398666e-05, "loss": 2.287, "step": 3735000 }, { "epoch": 10.81, "learning_rate": 4.459561556363468e-05, "loss": 2.3365, "step": 3735500 }, { "epoch": 10.81, "learning_rate": 4.45948919159874e-05, "loss": 2.3006, "step": 3736000 }, { "epoch": 10.82, "learning_rate": 4.459416826834013e-05, "loss": 2.3095, "step": 3736500 }, { "epoch": 10.82, "learning_rate": 4.4593444620692855e-05, "loss": 2.3028, "step": 3737000 }, { "epoch": 10.82, "learning_rate": 4.459272242034087e-05, "loss": 2.3221, "step": 3737500 }, { "epoch": 10.82, "learning_rate": 4.459199877269359e-05, "loss": 2.3221, "step": 3738000 }, { "epoch": 10.82, "learning_rate": 4.4591275125046315e-05, "loss": 2.3182, "step": 3738500 }, { "epoch": 10.82, "learning_rate": 4.459055147739904e-05, "loss": 2.3084, "step": 3739000 }, { "epoch": 10.82, "learning_rate": 4.458982927704706e-05, "loss": 2.3347, "step": 3739500 }, { "epoch": 10.83, "learning_rate": 4.458910562939978e-05, "loss": 2.3243, "step": 3740000 }, { "epoch": 10.83, "learning_rate": 4.4588381981752504e-05, "loss": 2.3109, "step": 3740500 }, { "epoch": 10.83, "learning_rate": 4.4587658334105226e-05, "loss": 2.3257, "step": 3741000 }, { "epoch": 10.83, "learning_rate": 4.458693468645795e-05, "loss": 2.3264, "step": 3741500 }, { "epoch": 10.83, "learning_rate": 4.4586212486105964e-05, "loss": 2.3214, "step": 3742000 }, { "epoch": 10.83, "learning_rate": 4.4585488838458686e-05, "loss": 2.2818, "step": 3742500 }, { "epoch": 10.83, "learning_rate": 4.4584765190811415e-05, "loss": 2.3009, "step": 3743000 }, { "epoch": 10.84, "learning_rate": 4.458404299045943e-05, "loss": 2.3259, "step": 3743500 }, { "epoch": 10.84, "learning_rate": 4.458331934281215e-05, "loss": 2.3041, "step": 3744000 }, { "epoch": 10.84, "learning_rate": 4.458259569516488e-05, "loss": 2.2969, "step": 3744500 }, { "epoch": 10.84, "learning_rate": 4.4581872047517604e-05, "loss": 2.307, "step": 3745000 }, { "epoch": 10.84, "learning_rate": 4.4581148399870326e-05, "loss": 2.3028, "step": 3745500 }, { "epoch": 10.84, "learning_rate": 4.458042475222305e-05, "loss": 2.2941, "step": 3746000 }, { "epoch": 10.84, "learning_rate": 4.457970110457577e-05, "loss": 2.2926, "step": 3746500 }, { "epoch": 10.85, "learning_rate": 4.457897745692849e-05, "loss": 2.3077, "step": 3747000 }, { "epoch": 10.85, "learning_rate": 4.4578253809281215e-05, "loss": 2.321, "step": 3747500 }, { "epoch": 10.85, "learning_rate": 4.457753016163394e-05, "loss": 2.3167, "step": 3748000 }, { "epoch": 10.85, "learning_rate": 4.4576806513986666e-05, "loss": 2.3043, "step": 3748500 }, { "epoch": 10.85, "learning_rate": 4.457608286633939e-05, "loss": 2.3258, "step": 3749000 }, { "epoch": 10.85, "learning_rate": 4.457535921869211e-05, "loss": 2.3045, "step": 3749500 }, { "epoch": 10.85, "learning_rate": 4.457463846563542e-05, "loss": 2.3111, "step": 3750000 }, { "epoch": 10.86, "learning_rate": 4.457391481798814e-05, "loss": 2.3204, "step": 3750500 }, { "epoch": 10.86, "learning_rate": 4.4573191170340864e-05, "loss": 2.3106, "step": 3751000 }, { "epoch": 10.86, "learning_rate": 4.457246752269359e-05, "loss": 2.2958, "step": 3751500 }, { "epoch": 10.86, "learning_rate": 4.4571743875046315e-05, "loss": 2.318, "step": 3752000 }, { "epoch": 10.86, "learning_rate": 4.457102167469434e-05, "loss": 2.3144, "step": 3752500 }, { "epoch": 10.86, "learning_rate": 4.457029802704706e-05, "loss": 2.33, "step": 3753000 }, { "epoch": 10.86, "learning_rate": 4.456957437939978e-05, "loss": 2.3127, "step": 3753500 }, { "epoch": 10.87, "learning_rate": 4.4568850731752504e-05, "loss": 2.3216, "step": 3754000 }, { "epoch": 10.87, "learning_rate": 4.456812708410523e-05, "loss": 2.3337, "step": 3754500 }, { "epoch": 10.87, "learning_rate": 4.456740488375324e-05, "loss": 2.3216, "step": 3755000 }, { "epoch": 10.87, "learning_rate": 4.4566681236105965e-05, "loss": 2.2996, "step": 3755500 }, { "epoch": 10.87, "learning_rate": 4.456595758845869e-05, "loss": 2.3207, "step": 3756000 }, { "epoch": 10.87, "learning_rate": 4.4565233940811416e-05, "loss": 2.319, "step": 3756500 }, { "epoch": 10.87, "learning_rate": 4.456451029316414e-05, "loss": 2.3064, "step": 3757000 }, { "epoch": 10.88, "learning_rate": 4.456378664551686e-05, "loss": 2.2996, "step": 3757500 }, { "epoch": 10.88, "learning_rate": 4.456306299786958e-05, "loss": 2.3426, "step": 3758000 }, { "epoch": 10.88, "learning_rate": 4.4562339350222305e-05, "loss": 2.2958, "step": 3758500 }, { "epoch": 10.88, "learning_rate": 4.4561615702575034e-05, "loss": 2.2931, "step": 3759000 }, { "epoch": 10.88, "learning_rate": 4.456089494951834e-05, "loss": 2.2935, "step": 3759500 }, { "epoch": 10.88, "learning_rate": 4.4560171301871065e-05, "loss": 2.3385, "step": 3760000 }, { "epoch": 10.89, "learning_rate": 4.455944765422379e-05, "loss": 2.3039, "step": 3760500 }, { "epoch": 10.89, "learning_rate": 4.4558724006576516e-05, "loss": 2.3058, "step": 3761000 }, { "epoch": 10.89, "learning_rate": 4.455800035892924e-05, "loss": 2.291, "step": 3761500 }, { "epoch": 10.89, "learning_rate": 4.455727671128196e-05, "loss": 2.324, "step": 3762000 }, { "epoch": 10.89, "learning_rate": 4.4556554510929976e-05, "loss": 2.3053, "step": 3762500 }, { "epoch": 10.89, "learning_rate": 4.45558308632827e-05, "loss": 2.3333, "step": 3763000 }, { "epoch": 10.89, "learning_rate": 4.4555108662930714e-05, "loss": 2.3071, "step": 3763500 }, { "epoch": 10.9, "learning_rate": 4.455438501528344e-05, "loss": 2.3267, "step": 3764000 }, { "epoch": 10.9, "learning_rate": 4.4553661367636165e-05, "loss": 2.3389, "step": 3764500 }, { "epoch": 10.9, "learning_rate": 4.455293771998889e-05, "loss": 2.2908, "step": 3765000 }, { "epoch": 10.9, "learning_rate": 4.455221407234161e-05, "loss": 2.3056, "step": 3765500 }, { "epoch": 10.9, "learning_rate": 4.455149042469433e-05, "loss": 2.288, "step": 3766000 }, { "epoch": 10.9, "learning_rate": 4.455076677704706e-05, "loss": 2.3325, "step": 3766500 }, { "epoch": 10.9, "learning_rate": 4.455004312939978e-05, "loss": 2.3234, "step": 3767000 }, { "epoch": 10.91, "learning_rate": 4.4549319481752505e-05, "loss": 2.3317, "step": 3767500 }, { "epoch": 10.91, "learning_rate": 4.454859583410523e-05, "loss": 2.2849, "step": 3768000 }, { "epoch": 10.91, "learning_rate": 4.454787218645795e-05, "loss": 2.3031, "step": 3768500 }, { "epoch": 10.91, "learning_rate": 4.4547149986105965e-05, "loss": 2.2983, "step": 3769000 }, { "epoch": 10.91, "learning_rate": 4.4546426338458694e-05, "loss": 2.3219, "step": 3769500 }, { "epoch": 10.91, "learning_rate": 4.4545702690811417e-05, "loss": 2.3367, "step": 3770000 }, { "epoch": 10.91, "learning_rate": 4.454497904316414e-05, "loss": 2.3163, "step": 3770500 }, { "epoch": 10.92, "learning_rate": 4.454425539551686e-05, "loss": 2.3088, "step": 3771000 }, { "epoch": 10.92, "learning_rate": 4.454353174786958e-05, "loss": 2.3245, "step": 3771500 }, { "epoch": 10.92, "learning_rate": 4.4542808100222305e-05, "loss": 2.3252, "step": 3772000 }, { "epoch": 10.92, "learning_rate": 4.454208445257503e-05, "loss": 2.3045, "step": 3772500 }, { "epoch": 10.92, "learning_rate": 4.454136225222304e-05, "loss": 2.3251, "step": 3773000 }, { "epoch": 10.92, "learning_rate": 4.4540638604575765e-05, "loss": 2.3266, "step": 3773500 }, { "epoch": 10.92, "learning_rate": 4.453991640422379e-05, "loss": 2.3016, "step": 3774000 }, { "epoch": 10.93, "learning_rate": 4.453919275657652e-05, "loss": 2.3329, "step": 3774500 }, { "epoch": 10.93, "learning_rate": 4.453846910892924e-05, "loss": 2.3048, "step": 3775000 }, { "epoch": 10.93, "learning_rate": 4.453774546128196e-05, "loss": 2.3281, "step": 3775500 }, { "epoch": 10.93, "learning_rate": 4.4537021813634684e-05, "loss": 2.3184, "step": 3776000 }, { "epoch": 10.93, "learning_rate": 4.4536298165987406e-05, "loss": 2.3028, "step": 3776500 }, { "epoch": 10.93, "learning_rate": 4.453557451834013e-05, "loss": 2.2906, "step": 3777000 }, { "epoch": 10.93, "learning_rate": 4.453485087069285e-05, "loss": 2.3051, "step": 3777500 }, { "epoch": 10.94, "learning_rate": 4.453412722304557e-05, "loss": 2.2924, "step": 3778000 }, { "epoch": 10.94, "learning_rate": 4.4533403575398295e-05, "loss": 2.2934, "step": 3778500 }, { "epoch": 10.94, "learning_rate": 4.453267992775102e-05, "loss": 2.3213, "step": 3779000 }, { "epoch": 10.94, "learning_rate": 4.453195772739904e-05, "loss": 2.3231, "step": 3779500 }, { "epoch": 10.94, "learning_rate": 4.453123407975176e-05, "loss": 2.2997, "step": 3780000 }, { "epoch": 10.94, "learning_rate": 4.4530510432104484e-05, "loss": 2.2987, "step": 3780500 }, { "epoch": 10.94, "learning_rate": 4.4529786784457206e-05, "loss": 2.3432, "step": 3781000 }, { "epoch": 10.95, "learning_rate": 4.4529063136809935e-05, "loss": 2.309, "step": 3781500 }, { "epoch": 10.95, "learning_rate": 4.452833948916266e-05, "loss": 2.3424, "step": 3782000 }, { "epoch": 10.95, "learning_rate": 4.452761584151538e-05, "loss": 2.2964, "step": 3782500 }, { "epoch": 10.95, "learning_rate": 4.4526893641163395e-05, "loss": 2.3144, "step": 3783000 }, { "epoch": 10.95, "learning_rate": 4.452616999351612e-05, "loss": 2.3006, "step": 3783500 }, { "epoch": 10.95, "learning_rate": 4.4525446345868846e-05, "loss": 2.3378, "step": 3784000 }, { "epoch": 10.95, "learning_rate": 4.452472414551686e-05, "loss": 2.2885, "step": 3784500 }, { "epoch": 10.96, "learning_rate": 4.4524000497869584e-05, "loss": 2.3178, "step": 3785000 }, { "epoch": 10.96, "learning_rate": 4.4523276850222306e-05, "loss": 2.3245, "step": 3785500 }, { "epoch": 10.96, "learning_rate": 4.452255320257503e-05, "loss": 2.2928, "step": 3786000 }, { "epoch": 10.96, "learning_rate": 4.452182955492775e-05, "loss": 2.3215, "step": 3786500 }, { "epoch": 10.96, "learning_rate": 4.452110590728047e-05, "loss": 2.3133, "step": 3787000 }, { "epoch": 10.96, "learning_rate": 4.4520382259633195e-05, "loss": 2.3027, "step": 3787500 }, { "epoch": 10.96, "learning_rate": 4.451966005928122e-05, "loss": 2.3274, "step": 3788000 }, { "epoch": 10.97, "learning_rate": 4.451893641163394e-05, "loss": 2.3183, "step": 3788500 }, { "epoch": 10.97, "learning_rate": 4.451821276398667e-05, "loss": 2.298, "step": 3789000 }, { "epoch": 10.97, "learning_rate": 4.4517490563634684e-05, "loss": 2.3294, "step": 3789500 }, { "epoch": 10.97, "learning_rate": 4.4516766915987407e-05, "loss": 2.3153, "step": 3790000 }, { "epoch": 10.97, "learning_rate": 4.451604326834013e-05, "loss": 2.2983, "step": 3790500 }, { "epoch": 10.97, "learning_rate": 4.451531962069285e-05, "loss": 2.3059, "step": 3791000 }, { "epoch": 10.97, "learning_rate": 4.451459597304557e-05, "loss": 2.3022, "step": 3791500 }, { "epoch": 10.98, "learning_rate": 4.4513872325398296e-05, "loss": 2.3026, "step": 3792000 }, { "epoch": 10.98, "learning_rate": 4.451314867775102e-05, "loss": 2.2935, "step": 3792500 }, { "epoch": 10.98, "learning_rate": 4.451242503010375e-05, "loss": 2.3257, "step": 3793000 }, { "epoch": 10.98, "learning_rate": 4.451170138245647e-05, "loss": 2.2688, "step": 3793500 }, { "epoch": 10.98, "learning_rate": 4.451097773480919e-05, "loss": 2.3323, "step": 3794000 }, { "epoch": 10.98, "learning_rate": 4.4510254087161913e-05, "loss": 2.3091, "step": 3794500 }, { "epoch": 10.98, "learning_rate": 4.4509530439514636e-05, "loss": 2.3176, "step": 3795000 }, { "epoch": 10.99, "learning_rate": 4.450880679186736e-05, "loss": 2.3324, "step": 3795500 }, { "epoch": 10.99, "learning_rate": 4.450808314422009e-05, "loss": 2.3419, "step": 3796000 }, { "epoch": 10.99, "learning_rate": 4.45073609438681e-05, "loss": 2.3075, "step": 3796500 }, { "epoch": 10.99, "learning_rate": 4.4506637296220825e-05, "loss": 2.3348, "step": 3797000 }, { "epoch": 10.99, "learning_rate": 4.450591364857355e-05, "loss": 2.2989, "step": 3797500 }, { "epoch": 10.99, "learning_rate": 4.450519000092627e-05, "loss": 2.3362, "step": 3798000 }, { "epoch": 11.0, "learning_rate": 4.4504466353279e-05, "loss": 2.3173, "step": 3798500 }, { "epoch": 11.0, "learning_rate": 4.450374270563172e-05, "loss": 2.3239, "step": 3799000 }, { "epoch": 11.0, "learning_rate": 4.4503020505279736e-05, "loss": 2.3114, "step": 3799500 }, { "epoch": 11.0, "learning_rate": 4.450229685763246e-05, "loss": 2.321, "step": 3800000 }, { "epoch": 11.0, "eval_accuracy": 0.649606812559522, "eval_accuracy_mlm": 0.6118292335819766, "eval_accuracy_nsp": 0.8521698679609336, "eval_loss": 2.303694486618042, "eval_runtime": 330.6766, "eval_samples_per_second": 1319.676, "eval_steps_per_second": 54.987, "step": 3800192 }, { "epoch": 11.0, "learning_rate": 4.450157320998518e-05, "loss": 2.2686, "step": 3800500 }, { "epoch": 11.0, "learning_rate": 4.45008495623379e-05, "loss": 2.2723, "step": 3801000 }, { "epoch": 11.0, "learning_rate": 4.4500125914690625e-05, "loss": 2.2728, "step": 3801500 }, { "epoch": 11.01, "learning_rate": 4.449940226704335e-05, "loss": 2.2589, "step": 3802000 }, { "epoch": 11.01, "learning_rate": 4.449867861939607e-05, "loss": 2.2992, "step": 3802500 }, { "epoch": 11.01, "learning_rate": 4.44979549717488e-05, "loss": 2.2963, "step": 3803000 }, { "epoch": 11.01, "learning_rate": 4.449723421869211e-05, "loss": 2.2922, "step": 3803500 }, { "epoch": 11.01, "learning_rate": 4.4496510571044836e-05, "loss": 2.298, "step": 3804000 }, { "epoch": 11.01, "learning_rate": 4.449578692339756e-05, "loss": 2.2803, "step": 3804500 }, { "epoch": 11.01, "learning_rate": 4.449506327575028e-05, "loss": 2.3067, "step": 3805000 }, { "epoch": 11.02, "learning_rate": 4.4494339628103e-05, "loss": 2.2855, "step": 3805500 }, { "epoch": 11.02, "learning_rate": 4.4493615980455725e-05, "loss": 2.2817, "step": 3806000 }, { "epoch": 11.02, "learning_rate": 4.449289233280845e-05, "loss": 2.2773, "step": 3806500 }, { "epoch": 11.02, "learning_rate": 4.449216868516117e-05, "loss": 2.2936, "step": 3807000 }, { "epoch": 11.02, "learning_rate": 4.44914450375139e-05, "loss": 2.2836, "step": 3807500 }, { "epoch": 11.02, "learning_rate": 4.449072138986662e-05, "loss": 2.2877, "step": 3808000 }, { "epoch": 11.02, "learning_rate": 4.448999774221934e-05, "loss": 2.2951, "step": 3808500 }, { "epoch": 11.03, "learning_rate": 4.4489274094572065e-05, "loss": 2.2851, "step": 3809000 }, { "epoch": 11.03, "learning_rate": 4.448855189422008e-05, "loss": 2.271, "step": 3809500 }, { "epoch": 11.03, "learning_rate": 4.4487829693868096e-05, "loss": 2.2802, "step": 3810000 }, { "epoch": 11.03, "learning_rate": 4.4487106046220826e-05, "loss": 2.3079, "step": 3810500 }, { "epoch": 11.03, "learning_rate": 4.448638239857355e-05, "loss": 2.3086, "step": 3811000 }, { "epoch": 11.03, "learning_rate": 4.448565875092628e-05, "loss": 2.3141, "step": 3811500 }, { "epoch": 11.03, "learning_rate": 4.4484935103279e-05, "loss": 2.2847, "step": 3812000 }, { "epoch": 11.04, "learning_rate": 4.448421145563172e-05, "loss": 2.2871, "step": 3812500 }, { "epoch": 11.04, "learning_rate": 4.448348925527974e-05, "loss": 2.268, "step": 3813000 }, { "epoch": 11.04, "learning_rate": 4.448276560763246e-05, "loss": 2.2887, "step": 3813500 }, { "epoch": 11.04, "learning_rate": 4.448204195998518e-05, "loss": 2.3128, "step": 3814000 }, { "epoch": 11.04, "learning_rate": 4.44813197596332e-05, "loss": 2.2732, "step": 3814500 }, { "epoch": 11.04, "learning_rate": 4.4480596111985926e-05, "loss": 2.3099, "step": 3815000 }, { "epoch": 11.04, "learning_rate": 4.447987246433865e-05, "loss": 2.3075, "step": 3815500 }, { "epoch": 11.05, "learning_rate": 4.4479150263986664e-05, "loss": 2.3168, "step": 3816000 }, { "epoch": 11.05, "learning_rate": 4.4478426616339386e-05, "loss": 2.292, "step": 3816500 }, { "epoch": 11.05, "learning_rate": 4.447770296869211e-05, "loss": 2.2826, "step": 3817000 }, { "epoch": 11.05, "learning_rate": 4.447697932104483e-05, "loss": 2.2949, "step": 3817500 }, { "epoch": 11.05, "learning_rate": 4.447625567339755e-05, "loss": 2.2725, "step": 3818000 }, { "epoch": 11.05, "learning_rate": 4.447553202575028e-05, "loss": 2.3116, "step": 3818500 }, { "epoch": 11.05, "learning_rate": 4.4474808378103004e-05, "loss": 2.2743, "step": 3819000 }, { "epoch": 11.06, "learning_rate": 4.4474084730455726e-05, "loss": 2.3117, "step": 3819500 }, { "epoch": 11.06, "learning_rate": 4.447336253010375e-05, "loss": 2.2807, "step": 3820000 }, { "epoch": 11.06, "learning_rate": 4.447263888245647e-05, "loss": 2.2859, "step": 3820500 }, { "epoch": 11.06, "learning_rate": 4.447191523480919e-05, "loss": 2.2899, "step": 3821000 }, { "epoch": 11.06, "learning_rate": 4.4471191587161915e-05, "loss": 2.3095, "step": 3821500 }, { "epoch": 11.06, "learning_rate": 4.447046793951464e-05, "loss": 2.2984, "step": 3822000 }, { "epoch": 11.06, "learning_rate": 4.446974429186736e-05, "loss": 2.2877, "step": 3822500 }, { "epoch": 11.07, "learning_rate": 4.446902064422008e-05, "loss": 2.2967, "step": 3823000 }, { "epoch": 11.07, "learning_rate": 4.4468296996572804e-05, "loss": 2.2945, "step": 3823500 }, { "epoch": 11.07, "learning_rate": 4.4467573348925526e-05, "loss": 2.3127, "step": 3824000 }, { "epoch": 11.07, "learning_rate": 4.446685114857355e-05, "loss": 2.3369, "step": 3824500 }, { "epoch": 11.07, "learning_rate": 4.4466128948221564e-05, "loss": 2.31, "step": 3825000 }, { "epoch": 11.07, "learning_rate": 4.4465405300574286e-05, "loss": 2.2873, "step": 3825500 }, { "epoch": 11.07, "learning_rate": 4.446468165292701e-05, "loss": 2.3013, "step": 3826000 }, { "epoch": 11.08, "learning_rate": 4.446395945257503e-05, "loss": 2.2751, "step": 3826500 }, { "epoch": 11.08, "learning_rate": 4.446323580492775e-05, "loss": 2.273, "step": 3827000 }, { "epoch": 11.08, "learning_rate": 4.4462512157280475e-05, "loss": 2.3104, "step": 3827500 }, { "epoch": 11.08, "learning_rate": 4.44617885096332e-05, "loss": 2.3169, "step": 3828000 }, { "epoch": 11.08, "learning_rate": 4.4461064861985927e-05, "loss": 2.3134, "step": 3828500 }, { "epoch": 11.08, "learning_rate": 4.446034121433865e-05, "loss": 2.2931, "step": 3829000 }, { "epoch": 11.08, "learning_rate": 4.445961756669137e-05, "loss": 2.2568, "step": 3829500 }, { "epoch": 11.09, "learning_rate": 4.445889391904409e-05, "loss": 2.2833, "step": 3830000 }, { "epoch": 11.09, "learning_rate": 4.4458170271396816e-05, "loss": 2.3046, "step": 3830500 }, { "epoch": 11.09, "learning_rate": 4.445744662374954e-05, "loss": 2.2981, "step": 3831000 }, { "epoch": 11.09, "learning_rate": 4.445672297610226e-05, "loss": 2.2907, "step": 3831500 }, { "epoch": 11.09, "learning_rate": 4.445599932845498e-05, "loss": 2.2931, "step": 3832000 }, { "epoch": 11.09, "learning_rate": 4.4455277128103e-05, "loss": 2.2904, "step": 3832500 }, { "epoch": 11.09, "learning_rate": 4.445455348045573e-05, "loss": 2.2897, "step": 3833000 }, { "epoch": 11.1, "learning_rate": 4.445382983280845e-05, "loss": 2.2976, "step": 3833500 }, { "epoch": 11.1, "learning_rate": 4.445310618516118e-05, "loss": 2.3147, "step": 3834000 }, { "epoch": 11.1, "learning_rate": 4.44523825375139e-05, "loss": 2.3212, "step": 3834500 }, { "epoch": 11.1, "learning_rate": 4.445165888986662e-05, "loss": 2.289, "step": 3835000 }, { "epoch": 11.1, "learning_rate": 4.4450935242219345e-05, "loss": 2.2984, "step": 3835500 }, { "epoch": 11.1, "learning_rate": 4.445021159457207e-05, "loss": 2.298, "step": 3836000 }, { "epoch": 11.11, "learning_rate": 4.444948794692479e-05, "loss": 2.3146, "step": 3836500 }, { "epoch": 11.11, "learning_rate": 4.444876429927751e-05, "loss": 2.288, "step": 3837000 }, { "epoch": 11.11, "learning_rate": 4.4448040651630234e-05, "loss": 2.3075, "step": 3837500 }, { "epoch": 11.11, "learning_rate": 4.4447317003982956e-05, "loss": 2.3034, "step": 3838000 }, { "epoch": 11.11, "learning_rate": 4.444659480363098e-05, "loss": 2.2941, "step": 3838500 }, { "epoch": 11.11, "learning_rate": 4.44458711559837e-05, "loss": 2.3028, "step": 3839000 }, { "epoch": 11.11, "learning_rate": 4.444514750833642e-05, "loss": 2.2933, "step": 3839500 }, { "epoch": 11.12, "learning_rate": 4.4444423860689145e-05, "loss": 2.2984, "step": 3840000 }, { "epoch": 11.12, "learning_rate": 4.4443700213041874e-05, "loss": 2.2852, "step": 3840500 }, { "epoch": 11.12, "learning_rate": 4.444297801268989e-05, "loss": 2.2859, "step": 3841000 }, { "epoch": 11.12, "learning_rate": 4.444225436504261e-05, "loss": 2.2934, "step": 3841500 }, { "epoch": 11.12, "learning_rate": 4.4441530717395334e-05, "loss": 2.2947, "step": 3842000 }, { "epoch": 11.12, "learning_rate": 4.4440807069748056e-05, "loss": 2.2921, "step": 3842500 }, { "epoch": 11.12, "learning_rate": 4.444008342210078e-05, "loss": 2.294, "step": 3843000 }, { "epoch": 11.13, "learning_rate": 4.44393612217488e-05, "loss": 2.3045, "step": 3843500 }, { "epoch": 11.13, "learning_rate": 4.443863757410152e-05, "loss": 2.2957, "step": 3844000 }, { "epoch": 11.13, "learning_rate": 4.4437913926454245e-05, "loss": 2.2838, "step": 3844500 }, { "epoch": 11.13, "learning_rate": 4.443719027880697e-05, "loss": 2.2981, "step": 3845000 }, { "epoch": 11.13, "learning_rate": 4.443646807845498e-05, "loss": 2.2961, "step": 3845500 }, { "epoch": 11.13, "learning_rate": 4.4435744430807705e-05, "loss": 2.3033, "step": 3846000 }, { "epoch": 11.13, "learning_rate": 4.443502078316043e-05, "loss": 2.2924, "step": 3846500 }, { "epoch": 11.14, "learning_rate": 4.4434297135513157e-05, "loss": 2.2644, "step": 3847000 }, { "epoch": 11.14, "learning_rate": 4.443357348786588e-05, "loss": 2.3018, "step": 3847500 }, { "epoch": 11.14, "learning_rate": 4.44328498402186e-05, "loss": 2.2764, "step": 3848000 }, { "epoch": 11.14, "learning_rate": 4.443212763986662e-05, "loss": 2.2915, "step": 3848500 }, { "epoch": 11.14, "learning_rate": 4.443140543951464e-05, "loss": 2.3004, "step": 3849000 }, { "epoch": 11.14, "learning_rate": 4.443068179186736e-05, "loss": 2.2697, "step": 3849500 }, { "epoch": 11.14, "learning_rate": 4.442995814422008e-05, "loss": 2.2959, "step": 3850000 }, { "epoch": 11.15, "learning_rate": 4.4429234496572806e-05, "loss": 2.2923, "step": 3850500 }, { "epoch": 11.15, "learning_rate": 4.442851084892553e-05, "loss": 2.3147, "step": 3851000 }, { "epoch": 11.15, "learning_rate": 4.442778720127826e-05, "loss": 2.2821, "step": 3851500 }, { "epoch": 11.15, "learning_rate": 4.442706355363098e-05, "loss": 2.3035, "step": 3852000 }, { "epoch": 11.15, "learning_rate": 4.44263399059837e-05, "loss": 2.2767, "step": 3852500 }, { "epoch": 11.15, "learning_rate": 4.442561770563172e-05, "loss": 2.3207, "step": 3853000 }, { "epoch": 11.15, "learning_rate": 4.442489405798444e-05, "loss": 2.3131, "step": 3853500 }, { "epoch": 11.16, "learning_rate": 4.442417041033716e-05, "loss": 2.3055, "step": 3854000 }, { "epoch": 11.16, "learning_rate": 4.4423446762689884e-05, "loss": 2.3018, "step": 3854500 }, { "epoch": 11.16, "learning_rate": 4.4422723115042606e-05, "loss": 2.3021, "step": 3855000 }, { "epoch": 11.16, "learning_rate": 4.442199946739533e-05, "loss": 2.3047, "step": 3855500 }, { "epoch": 11.16, "learning_rate": 4.442127581974806e-05, "loss": 2.3027, "step": 3856000 }, { "epoch": 11.16, "learning_rate": 4.442055217210078e-05, "loss": 2.2977, "step": 3856500 }, { "epoch": 11.16, "learning_rate": 4.44198299717488e-05, "loss": 2.275, "step": 3857000 }, { "epoch": 11.17, "learning_rate": 4.441910777139682e-05, "loss": 2.2803, "step": 3857500 }, { "epoch": 11.17, "learning_rate": 4.441838412374954e-05, "loss": 2.2964, "step": 3858000 }, { "epoch": 11.17, "learning_rate": 4.4417661923397555e-05, "loss": 2.3217, "step": 3858500 }, { "epoch": 11.17, "learning_rate": 4.4416938275750284e-05, "loss": 2.2941, "step": 3859000 }, { "epoch": 11.17, "learning_rate": 4.4416214628103006e-05, "loss": 2.3128, "step": 3859500 }, { "epoch": 11.17, "learning_rate": 4.441549098045573e-05, "loss": 2.2891, "step": 3860000 }, { "epoch": 11.17, "learning_rate": 4.441476733280845e-05, "loss": 2.3115, "step": 3860500 }, { "epoch": 11.18, "learning_rate": 4.441404368516117e-05, "loss": 2.3146, "step": 3861000 }, { "epoch": 11.18, "learning_rate": 4.4413320037513895e-05, "loss": 2.2819, "step": 3861500 }, { "epoch": 11.18, "learning_rate": 4.441259638986662e-05, "loss": 2.2711, "step": 3862000 }, { "epoch": 11.18, "learning_rate": 4.441187274221934e-05, "loss": 2.298, "step": 3862500 }, { "epoch": 11.18, "learning_rate": 4.441114909457206e-05, "loss": 2.2763, "step": 3863000 }, { "epoch": 11.18, "learning_rate": 4.4410426894220084e-05, "loss": 2.302, "step": 3863500 }, { "epoch": 11.18, "learning_rate": 4.4409704693868106e-05, "loss": 2.29, "step": 3864000 }, { "epoch": 11.19, "learning_rate": 4.440898104622083e-05, "loss": 2.2867, "step": 3864500 }, { "epoch": 11.19, "learning_rate": 4.440825739857355e-05, "loss": 2.3127, "step": 3865000 }, { "epoch": 11.19, "learning_rate": 4.440753375092627e-05, "loss": 2.3017, "step": 3865500 }, { "epoch": 11.19, "learning_rate": 4.4406810103278995e-05, "loss": 2.2837, "step": 3866000 }, { "epoch": 11.19, "learning_rate": 4.440608645563172e-05, "loss": 2.325, "step": 3866500 }, { "epoch": 11.19, "learning_rate": 4.440536280798444e-05, "loss": 2.294, "step": 3867000 }, { "epoch": 11.19, "learning_rate": 4.440463916033716e-05, "loss": 2.2788, "step": 3867500 }, { "epoch": 11.2, "learning_rate": 4.4403915512689884e-05, "loss": 2.3067, "step": 3868000 }, { "epoch": 11.2, "learning_rate": 4.4403191865042607e-05, "loss": 2.3004, "step": 3868500 }, { "epoch": 11.2, "learning_rate": 4.440246966469063e-05, "loss": 2.2889, "step": 3869000 }, { "epoch": 11.2, "learning_rate": 4.440174601704335e-05, "loss": 2.3176, "step": 3869500 }, { "epoch": 11.2, "learning_rate": 4.440102236939607e-05, "loss": 2.27, "step": 3870000 }, { "epoch": 11.2, "learning_rate": 4.440030016904409e-05, "loss": 2.2946, "step": 3870500 }, { "epoch": 11.2, "learning_rate": 4.439957652139682e-05, "loss": 2.3177, "step": 3871000 }, { "epoch": 11.21, "learning_rate": 4.439885287374954e-05, "loss": 2.2968, "step": 3871500 }, { "epoch": 11.21, "learning_rate": 4.439812922610226e-05, "loss": 2.2872, "step": 3872000 }, { "epoch": 11.21, "learning_rate": 4.4397405578454985e-05, "loss": 2.312, "step": 3872500 }, { "epoch": 11.21, "learning_rate": 4.439668193080771e-05, "loss": 2.2964, "step": 3873000 }, { "epoch": 11.21, "learning_rate": 4.4395958283160436e-05, "loss": 2.2821, "step": 3873500 }, { "epoch": 11.21, "learning_rate": 4.439523608280845e-05, "loss": 2.2738, "step": 3874000 }, { "epoch": 11.22, "learning_rate": 4.4394512435161174e-05, "loss": 2.2641, "step": 3874500 }, { "epoch": 11.22, "learning_rate": 4.4393788787513896e-05, "loss": 2.2871, "step": 3875000 }, { "epoch": 11.22, "learning_rate": 4.439306513986662e-05, "loss": 2.2881, "step": 3875500 }, { "epoch": 11.22, "learning_rate": 4.439234149221934e-05, "loss": 2.3196, "step": 3876000 }, { "epoch": 11.22, "learning_rate": 4.439161784457206e-05, "loss": 2.3096, "step": 3876500 }, { "epoch": 11.22, "learning_rate": 4.4390894196924785e-05, "loss": 2.2789, "step": 3877000 }, { "epoch": 11.22, "learning_rate": 4.439017199657281e-05, "loss": 2.2884, "step": 3877500 }, { "epoch": 11.23, "learning_rate": 4.438944979622082e-05, "loss": 2.2814, "step": 3878000 }, { "epoch": 11.23, "learning_rate": 4.438872614857355e-05, "loss": 2.2818, "step": 3878500 }, { "epoch": 11.23, "learning_rate": 4.4388002500926274e-05, "loss": 2.2913, "step": 3879000 }, { "epoch": 11.23, "learning_rate": 4.4387278853278996e-05, "loss": 2.2857, "step": 3879500 }, { "epoch": 11.23, "learning_rate": 4.438655520563172e-05, "loss": 2.295, "step": 3880000 }, { "epoch": 11.23, "learning_rate": 4.438583155798444e-05, "loss": 2.2999, "step": 3880500 }, { "epoch": 11.23, "learning_rate": 4.438510791033716e-05, "loss": 2.3024, "step": 3881000 }, { "epoch": 11.24, "learning_rate": 4.4384384262689885e-05, "loss": 2.2967, "step": 3881500 }, { "epoch": 11.24, "learning_rate": 4.438366061504261e-05, "loss": 2.2856, "step": 3882000 }, { "epoch": 11.24, "learning_rate": 4.4382936967395336e-05, "loss": 2.3042, "step": 3882500 }, { "epoch": 11.24, "learning_rate": 4.438221331974806e-05, "loss": 2.3017, "step": 3883000 }, { "epoch": 11.24, "learning_rate": 4.438148967210078e-05, "loss": 2.3141, "step": 3883500 }, { "epoch": 11.24, "learning_rate": 4.43807660244535e-05, "loss": 2.2983, "step": 3884000 }, { "epoch": 11.24, "learning_rate": 4.4380042376806225e-05, "loss": 2.2994, "step": 3884500 }, { "epoch": 11.25, "learning_rate": 4.437932017645424e-05, "loss": 2.2948, "step": 3885000 }, { "epoch": 11.25, "learning_rate": 4.437859652880696e-05, "loss": 2.2896, "step": 3885500 }, { "epoch": 11.25, "learning_rate": 4.437787288115969e-05, "loss": 2.2839, "step": 3886000 }, { "epoch": 11.25, "learning_rate": 4.4377149233512414e-05, "loss": 2.2984, "step": 3886500 }, { "epoch": 11.25, "learning_rate": 4.4376425585865137e-05, "loss": 2.318, "step": 3887000 }, { "epoch": 11.25, "learning_rate": 4.437570193821786e-05, "loss": 2.3111, "step": 3887500 }, { "epoch": 11.25, "learning_rate": 4.437497973786588e-05, "loss": 2.3157, "step": 3888000 }, { "epoch": 11.26, "learning_rate": 4.43742560902186e-05, "loss": 2.2971, "step": 3888500 }, { "epoch": 11.26, "learning_rate": 4.437353388986662e-05, "loss": 2.3091, "step": 3889000 }, { "epoch": 11.26, "learning_rate": 4.437281024221934e-05, "loss": 2.2843, "step": 3889500 }, { "epoch": 11.26, "learning_rate": 4.437208659457206e-05, "loss": 2.3, "step": 3890000 }, { "epoch": 11.26, "learning_rate": 4.4371362946924786e-05, "loss": 2.3057, "step": 3890500 }, { "epoch": 11.26, "learning_rate": 4.437063929927751e-05, "loss": 2.285, "step": 3891000 }, { "epoch": 11.26, "learning_rate": 4.436991565163024e-05, "loss": 2.298, "step": 3891500 }, { "epoch": 11.27, "learning_rate": 4.436919200398296e-05, "loss": 2.3333, "step": 3892000 }, { "epoch": 11.27, "learning_rate": 4.436846835633568e-05, "loss": 2.2982, "step": 3892500 }, { "epoch": 11.27, "learning_rate": 4.436774470868841e-05, "loss": 2.2807, "step": 3893000 }, { "epoch": 11.27, "learning_rate": 4.436702106104113e-05, "loss": 2.2976, "step": 3893500 }, { "epoch": 11.27, "learning_rate": 4.4366297413393855e-05, "loss": 2.2927, "step": 3894000 }, { "epoch": 11.27, "learning_rate": 4.436557376574658e-05, "loss": 2.2966, "step": 3894500 }, { "epoch": 11.27, "learning_rate": 4.43648501180993e-05, "loss": 2.3097, "step": 3895000 }, { "epoch": 11.28, "learning_rate": 4.436412647045202e-05, "loss": 2.2963, "step": 3895500 }, { "epoch": 11.28, "learning_rate": 4.4363402822804744e-05, "loss": 2.2833, "step": 3896000 }, { "epoch": 11.28, "learning_rate": 4.4362679175157466e-05, "loss": 2.3086, "step": 3896500 }, { "epoch": 11.28, "learning_rate": 4.436195697480549e-05, "loss": 2.289, "step": 3897000 }, { "epoch": 11.28, "learning_rate": 4.436123332715821e-05, "loss": 2.2789, "step": 3897500 }, { "epoch": 11.28, "learning_rate": 4.4360511126806226e-05, "loss": 2.2868, "step": 3898000 }, { "epoch": 11.28, "learning_rate": 4.435978747915895e-05, "loss": 2.3086, "step": 3898500 }, { "epoch": 11.29, "learning_rate": 4.4359065278806964e-05, "loss": 2.3146, "step": 3899000 }, { "epoch": 11.29, "learning_rate": 4.4358341631159686e-05, "loss": 2.2974, "step": 3899500 }, { "epoch": 11.29, "learning_rate": 4.435761798351241e-05, "loss": 2.3185, "step": 3900000 }, { "epoch": 11.29, "learning_rate": 4.435689433586514e-05, "loss": 2.2874, "step": 3900500 }, { "epoch": 11.29, "learning_rate": 4.435617068821786e-05, "loss": 2.2886, "step": 3901000 }, { "epoch": 11.29, "learning_rate": 4.435544704057059e-05, "loss": 2.3028, "step": 3901500 }, { "epoch": 11.29, "learning_rate": 4.4354724840218604e-05, "loss": 2.3163, "step": 3902000 }, { "epoch": 11.3, "learning_rate": 4.4354001192571326e-05, "loss": 2.2947, "step": 3902500 }, { "epoch": 11.3, "learning_rate": 4.435327754492405e-05, "loss": 2.2892, "step": 3903000 }, { "epoch": 11.3, "learning_rate": 4.435255389727677e-05, "loss": 2.2943, "step": 3903500 }, { "epoch": 11.3, "learning_rate": 4.435183024962949e-05, "loss": 2.312, "step": 3904000 }, { "epoch": 11.3, "learning_rate": 4.4351106601982215e-05, "loss": 2.2868, "step": 3904500 }, { "epoch": 11.3, "learning_rate": 4.435038295433494e-05, "loss": 2.2951, "step": 3905000 }, { "epoch": 11.3, "learning_rate": 4.434965930668766e-05, "loss": 2.299, "step": 3905500 }, { "epoch": 11.31, "learning_rate": 4.434893565904039e-05, "loss": 2.32, "step": 3906000 }, { "epoch": 11.31, "learning_rate": 4.434821201139311e-05, "loss": 2.286, "step": 3906500 }, { "epoch": 11.31, "learning_rate": 4.434748836374583e-05, "loss": 2.3081, "step": 3907000 }, { "epoch": 11.31, "learning_rate": 4.4346764716098556e-05, "loss": 2.2949, "step": 3907500 }, { "epoch": 11.31, "learning_rate": 4.434604251574658e-05, "loss": 2.2969, "step": 3908000 }, { "epoch": 11.31, "learning_rate": 4.43453188680993e-05, "loss": 2.2952, "step": 3908500 }, { "epoch": 11.31, "learning_rate": 4.4344596667747316e-05, "loss": 2.2953, "step": 3909000 }, { "epoch": 11.32, "learning_rate": 4.434387302010004e-05, "loss": 2.3102, "step": 3909500 }, { "epoch": 11.32, "learning_rate": 4.434314937245277e-05, "loss": 2.2922, "step": 3910000 }, { "epoch": 11.32, "learning_rate": 4.434242572480549e-05, "loss": 2.263, "step": 3910500 }, { "epoch": 11.32, "learning_rate": 4.4341703524453505e-05, "loss": 2.2974, "step": 3911000 }, { "epoch": 11.32, "learning_rate": 4.434098132410152e-05, "loss": 2.3087, "step": 3911500 }, { "epoch": 11.32, "learning_rate": 4.434025767645424e-05, "loss": 2.304, "step": 3912000 }, { "epoch": 11.33, "learning_rate": 4.4339534028806965e-05, "loss": 2.3138, "step": 3912500 }, { "epoch": 11.33, "learning_rate": 4.433881038115969e-05, "loss": 2.3041, "step": 3913000 }, { "epoch": 11.33, "learning_rate": 4.4338086733512416e-05, "loss": 2.2963, "step": 3913500 }, { "epoch": 11.33, "learning_rate": 4.433736308586514e-05, "loss": 2.2973, "step": 3914000 }, { "epoch": 11.33, "learning_rate": 4.433663943821786e-05, "loss": 2.296, "step": 3914500 }, { "epoch": 11.33, "learning_rate": 4.433591579057058e-05, "loss": 2.3445, "step": 3915000 }, { "epoch": 11.33, "learning_rate": 4.433519214292331e-05, "loss": 2.2963, "step": 3915500 }, { "epoch": 11.34, "learning_rate": 4.4334468495276034e-05, "loss": 2.3101, "step": 3916000 }, { "epoch": 11.34, "learning_rate": 4.4333744847628756e-05, "loss": 2.2876, "step": 3916500 }, { "epoch": 11.34, "learning_rate": 4.433302119998148e-05, "loss": 2.3078, "step": 3917000 }, { "epoch": 11.34, "learning_rate": 4.4332298999629494e-05, "loss": 2.2878, "step": 3917500 }, { "epoch": 11.34, "learning_rate": 4.4331575351982216e-05, "loss": 2.2668, "step": 3918000 }, { "epoch": 11.34, "learning_rate": 4.433085170433494e-05, "loss": 2.2991, "step": 3918500 }, { "epoch": 11.34, "learning_rate": 4.433012805668767e-05, "loss": 2.306, "step": 3919000 }, { "epoch": 11.35, "learning_rate": 4.432940585633568e-05, "loss": 2.3221, "step": 3919500 }, { "epoch": 11.35, "learning_rate": 4.4328682208688405e-05, "loss": 2.3057, "step": 3920000 }, { "epoch": 11.35, "learning_rate": 4.432795856104113e-05, "loss": 2.2965, "step": 3920500 }, { "epoch": 11.35, "learning_rate": 4.432723491339385e-05, "loss": 2.3049, "step": 3921000 }, { "epoch": 11.35, "learning_rate": 4.4326512713041865e-05, "loss": 2.3, "step": 3921500 }, { "epoch": 11.35, "learning_rate": 4.432578906539459e-05, "loss": 2.2848, "step": 3922000 }, { "epoch": 11.35, "learning_rate": 4.4325065417747316e-05, "loss": 2.297, "step": 3922500 }, { "epoch": 11.36, "learning_rate": 4.432434177010004e-05, "loss": 2.3222, "step": 3923000 }, { "epoch": 11.36, "learning_rate": 4.432361812245277e-05, "loss": 2.2986, "step": 3923500 }, { "epoch": 11.36, "learning_rate": 4.432289447480549e-05, "loss": 2.323, "step": 3924000 }, { "epoch": 11.36, "learning_rate": 4.432217082715821e-05, "loss": 2.295, "step": 3924500 }, { "epoch": 11.36, "learning_rate": 4.4321447179510934e-05, "loss": 2.311, "step": 3925000 }, { "epoch": 11.36, "learning_rate": 4.432072497915895e-05, "loss": 2.3085, "step": 3925500 }, { "epoch": 11.36, "learning_rate": 4.432000133151167e-05, "loss": 2.2964, "step": 3926000 }, { "epoch": 11.37, "learning_rate": 4.4319277683864394e-05, "loss": 2.3263, "step": 3926500 }, { "epoch": 11.37, "learning_rate": 4.431855548351242e-05, "loss": 2.3014, "step": 3927000 }, { "epoch": 11.37, "learning_rate": 4.431783183586514e-05, "loss": 2.2935, "step": 3927500 }, { "epoch": 11.37, "learning_rate": 4.431710818821786e-05, "loss": 2.3236, "step": 3928000 }, { "epoch": 11.37, "learning_rate": 4.4316384540570583e-05, "loss": 2.3076, "step": 3928500 }, { "epoch": 11.37, "learning_rate": 4.4315660892923306e-05, "loss": 2.3143, "step": 3929000 }, { "epoch": 11.37, "learning_rate": 4.431493724527603e-05, "loss": 2.2974, "step": 3929500 }, { "epoch": 11.38, "learning_rate": 4.431421359762875e-05, "loss": 2.3073, "step": 3930000 }, { "epoch": 11.38, "learning_rate": 4.431348994998148e-05, "loss": 2.315, "step": 3930500 }, { "epoch": 11.38, "learning_rate": 4.43127663023342e-05, "loss": 2.2901, "step": 3931000 }, { "epoch": 11.38, "learning_rate": 4.4312042654686924e-05, "loss": 2.3244, "step": 3931500 }, { "epoch": 11.38, "learning_rate": 4.4311319007039646e-05, "loss": 2.3177, "step": 3932000 }, { "epoch": 11.38, "learning_rate": 4.431059535939237e-05, "loss": 2.2916, "step": 3932500 }, { "epoch": 11.38, "learning_rate": 4.430987315904039e-05, "loss": 2.3209, "step": 3933000 }, { "epoch": 11.39, "learning_rate": 4.430914951139311e-05, "loss": 2.298, "step": 3933500 }, { "epoch": 11.39, "learning_rate": 4.4308425863745835e-05, "loss": 2.2733, "step": 3934000 }, { "epoch": 11.39, "learning_rate": 4.430770221609856e-05, "loss": 2.2753, "step": 3934500 }, { "epoch": 11.39, "learning_rate": 4.430697856845128e-05, "loss": 2.2932, "step": 3935000 }, { "epoch": 11.39, "learning_rate": 4.4306254920804e-05, "loss": 2.3022, "step": 3935500 }, { "epoch": 11.39, "learning_rate": 4.4305531273156724e-05, "loss": 2.3055, "step": 3936000 }, { "epoch": 11.39, "learning_rate": 4.4304807625509446e-05, "loss": 2.306, "step": 3936500 }, { "epoch": 11.4, "learning_rate": 4.430408542515747e-05, "loss": 2.2852, "step": 3937000 }, { "epoch": 11.4, "learning_rate": 4.430336177751019e-05, "loss": 2.3028, "step": 3937500 }, { "epoch": 11.4, "learning_rate": 4.430263812986292e-05, "loss": 2.3001, "step": 3938000 }, { "epoch": 11.4, "learning_rate": 4.430191448221564e-05, "loss": 2.3039, "step": 3938500 }, { "epoch": 11.4, "learning_rate": 4.4301190834568364e-05, "loss": 2.3164, "step": 3939000 }, { "epoch": 11.4, "learning_rate": 4.4300467186921086e-05, "loss": 2.3127, "step": 3939500 }, { "epoch": 11.4, "learning_rate": 4.429974353927381e-05, "loss": 2.3078, "step": 3940000 }, { "epoch": 11.41, "learning_rate": 4.4299021338921824e-05, "loss": 2.2852, "step": 3940500 }, { "epoch": 11.41, "learning_rate": 4.4298299138569846e-05, "loss": 2.3351, "step": 3941000 }, { "epoch": 11.41, "learning_rate": 4.429757549092257e-05, "loss": 2.2779, "step": 3941500 }, { "epoch": 11.41, "learning_rate": 4.429685184327529e-05, "loss": 2.3009, "step": 3942000 }, { "epoch": 11.41, "learning_rate": 4.429612819562801e-05, "loss": 2.3163, "step": 3942500 }, { "epoch": 11.41, "learning_rate": 4.4295404547980735e-05, "loss": 2.3233, "step": 3943000 }, { "epoch": 11.41, "learning_rate": 4.429468090033346e-05, "loss": 2.3284, "step": 3943500 }, { "epoch": 11.42, "learning_rate": 4.429395725268618e-05, "loss": 2.3109, "step": 3944000 }, { "epoch": 11.42, "learning_rate": 4.42932336050389e-05, "loss": 2.313, "step": 3944500 }, { "epoch": 11.42, "learning_rate": 4.429251140468692e-05, "loss": 2.3092, "step": 3945000 }, { "epoch": 11.42, "learning_rate": 4.429178775703965e-05, "loss": 2.3059, "step": 3945500 }, { "epoch": 11.42, "learning_rate": 4.429106410939237e-05, "loss": 2.3068, "step": 3946000 }, { "epoch": 11.42, "learning_rate": 4.42903404617451e-05, "loss": 2.2908, "step": 3946500 }, { "epoch": 11.42, "learning_rate": 4.428961681409782e-05, "loss": 2.335, "step": 3947000 }, { "epoch": 11.43, "learning_rate": 4.4288894613745836e-05, "loss": 2.3129, "step": 3947500 }, { "epoch": 11.43, "learning_rate": 4.428817096609856e-05, "loss": 2.3018, "step": 3948000 }, { "epoch": 11.43, "learning_rate": 4.428744731845128e-05, "loss": 2.3048, "step": 3948500 }, { "epoch": 11.43, "learning_rate": 4.4286723670804e-05, "loss": 2.3053, "step": 3949000 }, { "epoch": 11.43, "learning_rate": 4.4286000023156725e-05, "loss": 2.2953, "step": 3949500 }, { "epoch": 11.43, "learning_rate": 4.428527927010004e-05, "loss": 2.2934, "step": 3950000 }, { "epoch": 11.44, "learning_rate": 4.428455562245276e-05, "loss": 2.2809, "step": 3950500 }, { "epoch": 11.44, "learning_rate": 4.4283831974805485e-05, "loss": 2.3156, "step": 3951000 }, { "epoch": 11.44, "learning_rate": 4.428310832715821e-05, "loss": 2.3016, "step": 3951500 }, { "epoch": 11.44, "learning_rate": 4.428238467951093e-05, "loss": 2.2839, "step": 3952000 }, { "epoch": 11.44, "learning_rate": 4.428166103186365e-05, "loss": 2.3052, "step": 3952500 }, { "epoch": 11.44, "learning_rate": 4.428093738421638e-05, "loss": 2.2954, "step": 3953000 }, { "epoch": 11.44, "learning_rate": 4.42802137365691e-05, "loss": 2.2658, "step": 3953500 }, { "epoch": 11.45, "learning_rate": 4.4279490088921825e-05, "loss": 2.2998, "step": 3954000 }, { "epoch": 11.45, "learning_rate": 4.427876644127455e-05, "loss": 2.3206, "step": 3954500 }, { "epoch": 11.45, "learning_rate": 4.427804424092257e-05, "loss": 2.3139, "step": 3955000 }, { "epoch": 11.45, "learning_rate": 4.427732059327529e-05, "loss": 2.2897, "step": 3955500 }, { "epoch": 11.45, "learning_rate": 4.4276596945628014e-05, "loss": 2.2835, "step": 3956000 }, { "epoch": 11.45, "learning_rate": 4.4275873297980736e-05, "loss": 2.3086, "step": 3956500 }, { "epoch": 11.45, "learning_rate": 4.427515109762875e-05, "loss": 2.3157, "step": 3957000 }, { "epoch": 11.46, "learning_rate": 4.4274427449981474e-05, "loss": 2.2893, "step": 3957500 }, { "epoch": 11.46, "learning_rate": 4.4273703802334196e-05, "loss": 2.2715, "step": 3958000 }, { "epoch": 11.46, "learning_rate": 4.427298160198222e-05, "loss": 2.3184, "step": 3958500 }, { "epoch": 11.46, "learning_rate": 4.427225795433494e-05, "loss": 2.3111, "step": 3959000 }, { "epoch": 11.46, "learning_rate": 4.427153430668766e-05, "loss": 2.3096, "step": 3959500 }, { "epoch": 11.46, "learning_rate": 4.4270810659040385e-05, "loss": 2.2955, "step": 3960000 }, { "epoch": 11.46, "learning_rate": 4.4270087011393114e-05, "loss": 2.3065, "step": 3960500 }, { "epoch": 11.47, "learning_rate": 4.4269363363745836e-05, "loss": 2.3341, "step": 3961000 }, { "epoch": 11.47, "learning_rate": 4.426863971609856e-05, "loss": 2.3112, "step": 3961500 }, { "epoch": 11.47, "learning_rate": 4.426791606845128e-05, "loss": 2.2741, "step": 3962000 }, { "epoch": 11.47, "learning_rate": 4.4267192420804e-05, "loss": 2.3238, "step": 3962500 }, { "epoch": 11.47, "learning_rate": 4.426647022045202e-05, "loss": 2.2932, "step": 3963000 }, { "epoch": 11.47, "learning_rate": 4.426574657280475e-05, "loss": 2.3027, "step": 3963500 }, { "epoch": 11.47, "learning_rate": 4.426502292515747e-05, "loss": 2.2739, "step": 3964000 }, { "epoch": 11.48, "learning_rate": 4.426429927751019e-05, "loss": 2.2876, "step": 3964500 }, { "epoch": 11.48, "learning_rate": 4.4263575629862914e-05, "loss": 2.2803, "step": 3965000 }, { "epoch": 11.48, "learning_rate": 4.426285342951093e-05, "loss": 2.3114, "step": 3965500 }, { "epoch": 11.48, "learning_rate": 4.426212978186365e-05, "loss": 2.2772, "step": 3966000 }, { "epoch": 11.48, "learning_rate": 4.4261406134216374e-05, "loss": 2.3118, "step": 3966500 }, { "epoch": 11.48, "learning_rate": 4.42606824865691e-05, "loss": 2.3169, "step": 3967000 }, { "epoch": 11.48, "learning_rate": 4.425996028621712e-05, "loss": 2.3221, "step": 3967500 }, { "epoch": 11.49, "learning_rate": 4.425923663856985e-05, "loss": 2.3125, "step": 3968000 }, { "epoch": 11.49, "learning_rate": 4.4258514438217864e-05, "loss": 2.2877, "step": 3968500 }, { "epoch": 11.49, "learning_rate": 4.4257790790570586e-05, "loss": 2.3047, "step": 3969000 }, { "epoch": 11.49, "learning_rate": 4.425706714292331e-05, "loss": 2.298, "step": 3969500 }, { "epoch": 11.49, "learning_rate": 4.425634349527603e-05, "loss": 2.3017, "step": 3970000 }, { "epoch": 11.49, "learning_rate": 4.425561984762875e-05, "loss": 2.3199, "step": 3970500 }, { "epoch": 11.49, "learning_rate": 4.4254896199981475e-05, "loss": 2.2959, "step": 3971000 }, { "epoch": 11.5, "learning_rate": 4.42541725523342e-05, "loss": 2.326, "step": 3971500 }, { "epoch": 11.5, "learning_rate": 4.4253448904686926e-05, "loss": 2.3069, "step": 3972000 }, { "epoch": 11.5, "learning_rate": 4.425272525703965e-05, "loss": 2.316, "step": 3972500 }, { "epoch": 11.5, "learning_rate": 4.425200160939237e-05, "loss": 2.3168, "step": 3973000 }, { "epoch": 11.5, "learning_rate": 4.4251279409040386e-05, "loss": 2.3295, "step": 3973500 }, { "epoch": 11.5, "learning_rate": 4.425055576139311e-05, "loss": 2.3328, "step": 3974000 }, { "epoch": 11.5, "learning_rate": 4.424983211374583e-05, "loss": 2.2868, "step": 3974500 }, { "epoch": 11.51, "learning_rate": 4.424910846609855e-05, "loss": 2.3096, "step": 3975000 }, { "epoch": 11.51, "learning_rate": 4.424838481845128e-05, "loss": 2.3272, "step": 3975500 }, { "epoch": 11.51, "learning_rate": 4.42476626180993e-05, "loss": 2.3217, "step": 3976000 }, { "epoch": 11.51, "learning_rate": 4.4246938970452026e-05, "loss": 2.3046, "step": 3976500 }, { "epoch": 11.51, "learning_rate": 4.424621677010004e-05, "loss": 2.2842, "step": 3977000 }, { "epoch": 11.51, "learning_rate": 4.4245493122452764e-05, "loss": 2.347, "step": 3977500 }, { "epoch": 11.51, "learning_rate": 4.4244769474805486e-05, "loss": 2.301, "step": 3978000 }, { "epoch": 11.52, "learning_rate": 4.424404582715821e-05, "loss": 2.3263, "step": 3978500 }, { "epoch": 11.52, "learning_rate": 4.424332217951093e-05, "loss": 2.3088, "step": 3979000 }, { "epoch": 11.52, "learning_rate": 4.424259853186365e-05, "loss": 2.3154, "step": 3979500 }, { "epoch": 11.52, "learning_rate": 4.4241874884216375e-05, "loss": 2.3006, "step": 3980000 }, { "epoch": 11.52, "learning_rate": 4.42411512365691e-05, "loss": 2.3171, "step": 3980500 }, { "epoch": 11.52, "learning_rate": 4.424042903621712e-05, "loss": 2.2837, "step": 3981000 }, { "epoch": 11.52, "learning_rate": 4.423970538856984e-05, "loss": 2.3081, "step": 3981500 }, { "epoch": 11.53, "learning_rate": 4.4238981740922564e-05, "loss": 2.3006, "step": 3982000 }, { "epoch": 11.53, "learning_rate": 4.4238258093275287e-05, "loss": 2.3255, "step": 3982500 }, { "epoch": 11.53, "learning_rate": 4.423753589292331e-05, "loss": 2.2871, "step": 3983000 }, { "epoch": 11.53, "learning_rate": 4.423681224527603e-05, "loss": 2.2935, "step": 3983500 }, { "epoch": 11.53, "learning_rate": 4.423608859762875e-05, "loss": 2.286, "step": 3984000 }, { "epoch": 11.53, "learning_rate": 4.4235364949981476e-05, "loss": 2.2812, "step": 3984500 }, { "epoch": 11.53, "learning_rate": 4.42346413023342e-05, "loss": 2.315, "step": 3985000 }, { "epoch": 11.54, "learning_rate": 4.423391765468693e-05, "loss": 2.2924, "step": 3985500 }, { "epoch": 11.54, "learning_rate": 4.423319545433494e-05, "loss": 2.2924, "step": 3986000 }, { "epoch": 11.54, "learning_rate": 4.4232471806687665e-05, "loss": 2.2976, "step": 3986500 }, { "epoch": 11.54, "learning_rate": 4.423174815904039e-05, "loss": 2.2832, "step": 3987000 }, { "epoch": 11.54, "learning_rate": 4.423102451139311e-05, "loss": 2.2738, "step": 3987500 }, { "epoch": 11.54, "learning_rate": 4.423030086374583e-05, "loss": 2.287, "step": 3988000 }, { "epoch": 11.55, "learning_rate": 4.4229578663393854e-05, "loss": 2.3203, "step": 3988500 }, { "epoch": 11.55, "learning_rate": 4.4228855015746576e-05, "loss": 2.3089, "step": 3989000 }, { "epoch": 11.55, "learning_rate": 4.422813281539459e-05, "loss": 2.319, "step": 3989500 }, { "epoch": 11.55, "learning_rate": 4.4227409167747314e-05, "loss": 2.2963, "step": 3990000 }, { "epoch": 11.55, "learning_rate": 4.422668552010004e-05, "loss": 2.2939, "step": 3990500 }, { "epoch": 11.55, "learning_rate": 4.422596331974806e-05, "loss": 2.2874, "step": 3991000 }, { "epoch": 11.55, "learning_rate": 4.422523967210078e-05, "loss": 2.2937, "step": 3991500 }, { "epoch": 11.56, "learning_rate": 4.42245160244535e-05, "loss": 2.2808, "step": 3992000 }, { "epoch": 11.56, "learning_rate": 4.4223792376806225e-05, "loss": 2.3007, "step": 3992500 }, { "epoch": 11.56, "learning_rate": 4.4223068729158954e-05, "loss": 2.3146, "step": 3993000 }, { "epoch": 11.56, "learning_rate": 4.4222345081511676e-05, "loss": 2.285, "step": 3993500 }, { "epoch": 11.56, "learning_rate": 4.42216214338644e-05, "loss": 2.309, "step": 3994000 }, { "epoch": 11.56, "learning_rate": 4.422089778621712e-05, "loss": 2.3153, "step": 3994500 }, { "epoch": 11.56, "learning_rate": 4.422017413856984e-05, "loss": 2.3166, "step": 3995000 }, { "epoch": 11.57, "learning_rate": 4.4219450490922565e-05, "loss": 2.3058, "step": 3995500 }, { "epoch": 11.57, "learning_rate": 4.421872684327529e-05, "loss": 2.3028, "step": 3996000 }, { "epoch": 11.57, "learning_rate": 4.421800319562801e-05, "loss": 2.3112, "step": 3996500 }, { "epoch": 11.57, "learning_rate": 4.421727954798073e-05, "loss": 2.2844, "step": 3997000 }, { "epoch": 11.57, "learning_rate": 4.421655879492405e-05, "loss": 2.3182, "step": 3997500 }, { "epoch": 11.57, "learning_rate": 4.4215835147276776e-05, "loss": 2.3341, "step": 3998000 }, { "epoch": 11.57, "learning_rate": 4.42151114996295e-05, "loss": 2.3018, "step": 3998500 }, { "epoch": 11.58, "learning_rate": 4.421438785198222e-05, "loss": 2.2824, "step": 3999000 }, { "epoch": 11.58, "learning_rate": 4.421366420433494e-05, "loss": 2.2931, "step": 3999500 }, { "epoch": 11.58, "learning_rate": 4.4212940556687665e-05, "loss": 2.3088, "step": 4000000 }, { "epoch": 11.58, "learning_rate": 4.421221690904039e-05, "loss": 2.334, "step": 4000500 }, { "epoch": 11.58, "learning_rate": 4.421149326139311e-05, "loss": 2.3087, "step": 4001000 }, { "epoch": 11.58, "learning_rate": 4.421076961374583e-05, "loss": 2.311, "step": 4001500 }, { "epoch": 11.58, "learning_rate": 4.4210045966098554e-05, "loss": 2.3158, "step": 4002000 }, { "epoch": 11.59, "learning_rate": 4.4209322318451277e-05, "loss": 2.3135, "step": 4002500 }, { "epoch": 11.59, "learning_rate": 4.4208598670804006e-05, "loss": 2.2959, "step": 4003000 }, { "epoch": 11.59, "learning_rate": 4.420787647045202e-05, "loss": 2.2845, "step": 4003500 }, { "epoch": 11.59, "learning_rate": 4.420715282280474e-05, "loss": 2.2957, "step": 4004000 }, { "epoch": 11.59, "learning_rate": 4.420643062245276e-05, "loss": 2.2841, "step": 4004500 }, { "epoch": 11.59, "learning_rate": 4.420570697480548e-05, "loss": 2.3077, "step": 4005000 }, { "epoch": 11.59, "learning_rate": 4.420498332715821e-05, "loss": 2.3048, "step": 4005500 }, { "epoch": 11.6, "learning_rate": 4.420425967951093e-05, "loss": 2.3119, "step": 4006000 }, { "epoch": 11.6, "learning_rate": 4.4203537479158955e-05, "loss": 2.3023, "step": 4006500 }, { "epoch": 11.6, "learning_rate": 4.420281383151168e-05, "loss": 2.2974, "step": 4007000 }, { "epoch": 11.6, "learning_rate": 4.42020901838644e-05, "loss": 2.2779, "step": 4007500 }, { "epoch": 11.6, "learning_rate": 4.420136653621712e-05, "loss": 2.2794, "step": 4008000 }, { "epoch": 11.6, "learning_rate": 4.4200642888569844e-05, "loss": 2.3066, "step": 4008500 }, { "epoch": 11.6, "learning_rate": 4.4199919240922566e-05, "loss": 2.2977, "step": 4009000 }, { "epoch": 11.61, "learning_rate": 4.419919559327529e-05, "loss": 2.3176, "step": 4009500 }, { "epoch": 11.61, "learning_rate": 4.419847194562801e-05, "loss": 2.2809, "step": 4010000 }, { "epoch": 11.61, "learning_rate": 4.419774829798073e-05, "loss": 2.3118, "step": 4010500 }, { "epoch": 11.61, "learning_rate": 4.4197026097628755e-05, "loss": 2.3104, "step": 4011000 }, { "epoch": 11.61, "learning_rate": 4.419630244998148e-05, "loss": 2.3073, "step": 4011500 }, { "epoch": 11.61, "learning_rate": 4.41955788023342e-05, "loss": 2.3047, "step": 4012000 }, { "epoch": 11.61, "learning_rate": 4.4194856601982215e-05, "loss": 2.2994, "step": 4012500 }, { "epoch": 11.62, "learning_rate": 4.4194132954334944e-05, "loss": 2.2983, "step": 4013000 }, { "epoch": 11.62, "learning_rate": 4.4193409306687666e-05, "loss": 2.3036, "step": 4013500 }, { "epoch": 11.62, "learning_rate": 4.419268565904039e-05, "loss": 2.3088, "step": 4014000 }, { "epoch": 11.62, "learning_rate": 4.419196201139311e-05, "loss": 2.2879, "step": 4014500 }, { "epoch": 11.62, "learning_rate": 4.419123836374583e-05, "loss": 2.2929, "step": 4015000 }, { "epoch": 11.62, "learning_rate": 4.4190514716098555e-05, "loss": 2.2861, "step": 4015500 }, { "epoch": 11.62, "learning_rate": 4.418979106845128e-05, "loss": 2.3032, "step": 4016000 }, { "epoch": 11.63, "learning_rate": 4.4189067420804006e-05, "loss": 2.3023, "step": 4016500 }, { "epoch": 11.63, "learning_rate": 4.418834377315673e-05, "loss": 2.297, "step": 4017000 }, { "epoch": 11.63, "learning_rate": 4.418762012550945e-05, "loss": 2.3063, "step": 4017500 }, { "epoch": 11.63, "learning_rate": 4.418689647786217e-05, "loss": 2.3267, "step": 4018000 }, { "epoch": 11.63, "learning_rate": 4.4186172830214895e-05, "loss": 2.2868, "step": 4018500 }, { "epoch": 11.63, "learning_rate": 4.418544918256762e-05, "loss": 2.3032, "step": 4019000 }, { "epoch": 11.63, "learning_rate": 4.418472698221563e-05, "loss": 2.3136, "step": 4019500 }, { "epoch": 11.64, "learning_rate": 4.418400333456836e-05, "loss": 2.2942, "step": 4020000 }, { "epoch": 11.64, "learning_rate": 4.4183279686921084e-05, "loss": 2.273, "step": 4020500 }, { "epoch": 11.64, "learning_rate": 4.418255748656911e-05, "loss": 2.2957, "step": 4021000 }, { "epoch": 11.64, "learning_rate": 4.418183383892183e-05, "loss": 2.2944, "step": 4021500 }, { "epoch": 11.64, "learning_rate": 4.418111019127455e-05, "loss": 2.3061, "step": 4022000 }, { "epoch": 11.64, "learning_rate": 4.418038654362727e-05, "loss": 2.3101, "step": 4022500 }, { "epoch": 11.64, "learning_rate": 4.4179662895979996e-05, "loss": 2.3026, "step": 4023000 }, { "epoch": 11.65, "learning_rate": 4.417893924833272e-05, "loss": 2.3102, "step": 4023500 }, { "epoch": 11.65, "learning_rate": 4.417821704798073e-05, "loss": 2.3056, "step": 4024000 }, { "epoch": 11.65, "learning_rate": 4.4177494847628756e-05, "loss": 2.3116, "step": 4024500 }, { "epoch": 11.65, "learning_rate": 4.417677119998148e-05, "loss": 2.2919, "step": 4025000 }, { "epoch": 11.65, "learning_rate": 4.41760475523342e-05, "loss": 2.2696, "step": 4025500 }, { "epoch": 11.65, "learning_rate": 4.417532390468692e-05, "loss": 2.2923, "step": 4026000 }, { "epoch": 11.66, "learning_rate": 4.4174600257039645e-05, "loss": 2.3166, "step": 4026500 }, { "epoch": 11.66, "learning_rate": 4.417387660939237e-05, "loss": 2.3212, "step": 4027000 }, { "epoch": 11.66, "learning_rate": 4.4173152961745096e-05, "loss": 2.3009, "step": 4027500 }, { "epoch": 11.66, "learning_rate": 4.417242931409782e-05, "loss": 2.3027, "step": 4028000 }, { "epoch": 11.66, "learning_rate": 4.417170566645054e-05, "loss": 2.2992, "step": 4028500 }, { "epoch": 11.66, "learning_rate": 4.417098201880326e-05, "loss": 2.2865, "step": 4029000 }, { "epoch": 11.66, "learning_rate": 4.4170258371155985e-05, "loss": 2.294, "step": 4029500 }, { "epoch": 11.67, "learning_rate": 4.416953472350871e-05, "loss": 2.2931, "step": 4030000 }, { "epoch": 11.67, "learning_rate": 4.416881252315673e-05, "loss": 2.2949, "step": 4030500 }, { "epoch": 11.67, "learning_rate": 4.416808887550945e-05, "loss": 2.2832, "step": 4031000 }, { "epoch": 11.67, "learning_rate": 4.4167365227862174e-05, "loss": 2.3111, "step": 4031500 }, { "epoch": 11.67, "learning_rate": 4.4166641580214896e-05, "loss": 2.3021, "step": 4032000 }, { "epoch": 11.67, "learning_rate": 4.416591937986291e-05, "loss": 2.2856, "step": 4032500 }, { "epoch": 11.67, "learning_rate": 4.4165195732215634e-05, "loss": 2.2924, "step": 4033000 }, { "epoch": 11.68, "learning_rate": 4.4164472084568356e-05, "loss": 2.3113, "step": 4033500 }, { "epoch": 11.68, "learning_rate": 4.4163748436921085e-05, "loss": 2.2964, "step": 4034000 }, { "epoch": 11.68, "learning_rate": 4.416302478927381e-05, "loss": 2.2989, "step": 4034500 }, { "epoch": 11.68, "learning_rate": 4.4162301141626536e-05, "loss": 2.3168, "step": 4035000 }, { "epoch": 11.68, "learning_rate": 4.416157749397926e-05, "loss": 2.2998, "step": 4035500 }, { "epoch": 11.68, "learning_rate": 4.416085384633198e-05, "loss": 2.2884, "step": 4036000 }, { "epoch": 11.68, "learning_rate": 4.416013309327529e-05, "loss": 2.2962, "step": 4036500 }, { "epoch": 11.69, "learning_rate": 4.415940944562801e-05, "loss": 2.318, "step": 4037000 }, { "epoch": 11.69, "learning_rate": 4.4158685797980734e-05, "loss": 2.2858, "step": 4037500 }, { "epoch": 11.69, "learning_rate": 4.4157962150333456e-05, "loss": 2.3189, "step": 4038000 }, { "epoch": 11.69, "learning_rate": 4.4157238502686185e-05, "loss": 2.3121, "step": 4038500 }, { "epoch": 11.69, "learning_rate": 4.415651485503891e-05, "loss": 2.3022, "step": 4039000 }, { "epoch": 11.69, "learning_rate": 4.415579120739163e-05, "loss": 2.289, "step": 4039500 }, { "epoch": 11.69, "learning_rate": 4.415506755974435e-05, "loss": 2.2943, "step": 4040000 }, { "epoch": 11.7, "learning_rate": 4.415434535939237e-05, "loss": 2.2873, "step": 4040500 }, { "epoch": 11.7, "learning_rate": 4.415362171174509e-05, "loss": 2.3376, "step": 4041000 }, { "epoch": 11.7, "learning_rate": 4.415289806409781e-05, "loss": 2.3106, "step": 4041500 }, { "epoch": 11.7, "learning_rate": 4.4152174416450534e-05, "loss": 2.2988, "step": 4042000 }, { "epoch": 11.7, "learning_rate": 4.415145076880326e-05, "loss": 2.3081, "step": 4042500 }, { "epoch": 11.7, "learning_rate": 4.4150727121155986e-05, "loss": 2.3062, "step": 4043000 }, { "epoch": 11.7, "learning_rate": 4.415000492080401e-05, "loss": 2.3117, "step": 4043500 }, { "epoch": 11.71, "learning_rate": 4.414928127315673e-05, "loss": 2.3184, "step": 4044000 }, { "epoch": 11.71, "learning_rate": 4.4148559072804746e-05, "loss": 2.2977, "step": 4044500 }, { "epoch": 11.71, "learning_rate": 4.414783542515747e-05, "loss": 2.3117, "step": 4045000 }, { "epoch": 11.71, "learning_rate": 4.414711177751019e-05, "loss": 2.3191, "step": 4045500 }, { "epoch": 11.71, "learning_rate": 4.414638812986291e-05, "loss": 2.3122, "step": 4046000 }, { "epoch": 11.71, "learning_rate": 4.4145664482215635e-05, "loss": 2.3071, "step": 4046500 }, { "epoch": 11.71, "learning_rate": 4.414494083456836e-05, "loss": 2.2947, "step": 4047000 }, { "epoch": 11.72, "learning_rate": 4.4144217186921086e-05, "loss": 2.313, "step": 4047500 }, { "epoch": 11.72, "learning_rate": 4.414349353927381e-05, "loss": 2.3196, "step": 4048000 }, { "epoch": 11.72, "learning_rate": 4.414276989162653e-05, "loss": 2.291, "step": 4048500 }, { "epoch": 11.72, "learning_rate": 4.414204624397925e-05, "loss": 2.2913, "step": 4049000 }, { "epoch": 11.72, "learning_rate": 4.414132404362727e-05, "loss": 2.3165, "step": 4049500 }, { "epoch": 11.72, "learning_rate": 4.414060039598e-05, "loss": 2.294, "step": 4050000 }, { "epoch": 11.72, "learning_rate": 4.413987674833272e-05, "loss": 2.31, "step": 4050500 }, { "epoch": 11.73, "learning_rate": 4.413915310068544e-05, "loss": 2.2875, "step": 4051000 }, { "epoch": 11.73, "learning_rate": 4.4138429453038164e-05, "loss": 2.3076, "step": 4051500 }, { "epoch": 11.73, "learning_rate": 4.4137707252686186e-05, "loss": 2.3173, "step": 4052000 }, { "epoch": 11.73, "learning_rate": 4.413698360503891e-05, "loss": 2.3119, "step": 4052500 }, { "epoch": 11.73, "learning_rate": 4.413625995739163e-05, "loss": 2.2926, "step": 4053000 }, { "epoch": 11.73, "learning_rate": 4.413553630974435e-05, "loss": 2.3058, "step": 4053500 }, { "epoch": 11.73, "learning_rate": 4.4134812662097075e-05, "loss": 2.2948, "step": 4054000 }, { "epoch": 11.74, "learning_rate": 4.413409046174509e-05, "loss": 2.3115, "step": 4054500 }, { "epoch": 11.74, "learning_rate": 4.413336681409781e-05, "loss": 2.3101, "step": 4055000 }, { "epoch": 11.74, "learning_rate": 4.4132643166450535e-05, "loss": 2.3207, "step": 4055500 }, { "epoch": 11.74, "learning_rate": 4.4131919518803264e-05, "loss": 2.2986, "step": 4056000 }, { "epoch": 11.74, "learning_rate": 4.4131195871155986e-05, "loss": 2.3221, "step": 4056500 }, { "epoch": 11.74, "learning_rate": 4.413047222350871e-05, "loss": 2.2736, "step": 4057000 }, { "epoch": 11.74, "learning_rate": 4.412975002315673e-05, "loss": 2.2975, "step": 4057500 }, { "epoch": 11.75, "learning_rate": 4.412902637550945e-05, "loss": 2.2917, "step": 4058000 }, { "epoch": 11.75, "learning_rate": 4.4128302727862175e-05, "loss": 2.3064, "step": 4058500 }, { "epoch": 11.75, "learning_rate": 4.41275790802149e-05, "loss": 2.296, "step": 4059000 }, { "epoch": 11.75, "learning_rate": 4.412685543256762e-05, "loss": 2.2987, "step": 4059500 }, { "epoch": 11.75, "learning_rate": 4.412613178492034e-05, "loss": 2.2945, "step": 4060000 }, { "epoch": 11.75, "learning_rate": 4.4125408137273064e-05, "loss": 2.3104, "step": 4060500 }, { "epoch": 11.75, "learning_rate": 4.412468593692109e-05, "loss": 2.2864, "step": 4061000 }, { "epoch": 11.76, "learning_rate": 4.412396228927381e-05, "loss": 2.301, "step": 4061500 }, { "epoch": 11.76, "learning_rate": 4.412323864162653e-05, "loss": 2.2895, "step": 4062000 }, { "epoch": 11.76, "learning_rate": 4.4122514993979253e-05, "loss": 2.2987, "step": 4062500 }, { "epoch": 11.76, "learning_rate": 4.4121791346331976e-05, "loss": 2.2841, "step": 4063000 }, { "epoch": 11.76, "learning_rate": 4.41210676986847e-05, "loss": 2.2942, "step": 4063500 }, { "epoch": 11.76, "learning_rate": 4.412034405103742e-05, "loss": 2.3106, "step": 4064000 }, { "epoch": 11.77, "learning_rate": 4.411962040339014e-05, "loss": 2.3025, "step": 4064500 }, { "epoch": 11.77, "learning_rate": 4.411889675574287e-05, "loss": 2.3014, "step": 4065000 }, { "epoch": 11.77, "learning_rate": 4.411817455539089e-05, "loss": 2.3265, "step": 4065500 }, { "epoch": 11.77, "learning_rate": 4.4117450907743616e-05, "loss": 2.3057, "step": 4066000 }, { "epoch": 11.77, "learning_rate": 4.411672726009634e-05, "loss": 2.3206, "step": 4066500 }, { "epoch": 11.77, "learning_rate": 4.411600361244906e-05, "loss": 2.2914, "step": 4067000 }, { "epoch": 11.77, "learning_rate": 4.411527996480178e-05, "loss": 2.2837, "step": 4067500 }, { "epoch": 11.78, "learning_rate": 4.41145577644498e-05, "loss": 2.3036, "step": 4068000 }, { "epoch": 11.78, "learning_rate": 4.411383411680252e-05, "loss": 2.3062, "step": 4068500 }, { "epoch": 11.78, "learning_rate": 4.4113111916450536e-05, "loss": 2.2849, "step": 4069000 }, { "epoch": 11.78, "learning_rate": 4.4112388268803265e-05, "loss": 2.2961, "step": 4069500 }, { "epoch": 11.78, "learning_rate": 4.411166462115599e-05, "loss": 2.3164, "step": 4070000 }, { "epoch": 11.78, "learning_rate": 4.411094097350871e-05, "loss": 2.3031, "step": 4070500 }, { "epoch": 11.78, "learning_rate": 4.411021732586143e-05, "loss": 2.2988, "step": 4071000 }, { "epoch": 11.79, "learning_rate": 4.4109493678214154e-05, "loss": 2.2962, "step": 4071500 }, { "epoch": 11.79, "learning_rate": 4.4108770030566876e-05, "loss": 2.2982, "step": 4072000 }, { "epoch": 11.79, "learning_rate": 4.4108046382919605e-05, "loss": 2.3011, "step": 4072500 }, { "epoch": 11.79, "learning_rate": 4.410732418256762e-05, "loss": 2.3009, "step": 4073000 }, { "epoch": 11.79, "learning_rate": 4.410660053492034e-05, "loss": 2.2899, "step": 4073500 }, { "epoch": 11.79, "learning_rate": 4.4105878334568365e-05, "loss": 2.2802, "step": 4074000 }, { "epoch": 11.79, "learning_rate": 4.410515468692109e-05, "loss": 2.3145, "step": 4074500 }, { "epoch": 11.8, "learning_rate": 4.410443103927381e-05, "loss": 2.2888, "step": 4075000 }, { "epoch": 11.8, "learning_rate": 4.410370739162653e-05, "loss": 2.2885, "step": 4075500 }, { "epoch": 11.8, "learning_rate": 4.4102983743979254e-05, "loss": 2.3152, "step": 4076000 }, { "epoch": 11.8, "learning_rate": 4.4102260096331976e-05, "loss": 2.2972, "step": 4076500 }, { "epoch": 11.8, "learning_rate": 4.41015364486847e-05, "loss": 2.2907, "step": 4077000 }, { "epoch": 11.8, "learning_rate": 4.410081280103742e-05, "loss": 2.2915, "step": 4077500 }, { "epoch": 11.8, "learning_rate": 4.410008915339014e-05, "loss": 2.2878, "step": 4078000 }, { "epoch": 11.81, "learning_rate": 4.4099366953038165e-05, "loss": 2.2933, "step": 4078500 }, { "epoch": 11.81, "learning_rate": 4.409864330539089e-05, "loss": 2.3163, "step": 4079000 }, { "epoch": 11.81, "learning_rate": 4.409791965774361e-05, "loss": 2.3006, "step": 4079500 }, { "epoch": 11.81, "learning_rate": 4.409719601009634e-05, "loss": 2.2837, "step": 4080000 }, { "epoch": 11.81, "learning_rate": 4.4096473809744354e-05, "loss": 2.3107, "step": 4080500 }, { "epoch": 11.81, "learning_rate": 4.409575016209708e-05, "loss": 2.3069, "step": 4081000 }, { "epoch": 11.81, "learning_rate": 4.409502796174509e-05, "loss": 2.2875, "step": 4081500 }, { "epoch": 11.82, "learning_rate": 4.4094304314097815e-05, "loss": 2.3154, "step": 4082000 }, { "epoch": 11.82, "learning_rate": 4.4093580666450544e-05, "loss": 2.3142, "step": 4082500 }, { "epoch": 11.82, "learning_rate": 4.4092857018803266e-05, "loss": 2.3172, "step": 4083000 }, { "epoch": 11.82, "learning_rate": 4.409213481845128e-05, "loss": 2.3143, "step": 4083500 }, { "epoch": 11.82, "learning_rate": 4.4091411170804004e-05, "loss": 2.3086, "step": 4084000 }, { "epoch": 11.82, "learning_rate": 4.4090687523156726e-05, "loss": 2.3097, "step": 4084500 }, { "epoch": 11.82, "learning_rate": 4.408996387550945e-05, "loss": 2.2923, "step": 4085000 }, { "epoch": 11.83, "learning_rate": 4.4089241675157464e-05, "loss": 2.3094, "step": 4085500 }, { "epoch": 11.83, "learning_rate": 4.408851802751019e-05, "loss": 2.2949, "step": 4086000 }, { "epoch": 11.83, "learning_rate": 4.4087794379862915e-05, "loss": 2.3103, "step": 4086500 }, { "epoch": 11.83, "learning_rate": 4.408707073221564e-05, "loss": 2.3002, "step": 4087000 }, { "epoch": 11.83, "learning_rate": 4.4086347084568366e-05, "loss": 2.285, "step": 4087500 }, { "epoch": 11.83, "learning_rate": 4.408562488421638e-05, "loss": 2.2682, "step": 4088000 }, { "epoch": 11.83, "learning_rate": 4.4084901236569104e-05, "loss": 2.3297, "step": 4088500 }, { "epoch": 11.84, "learning_rate": 4.4084177588921826e-05, "loss": 2.2834, "step": 4089000 }, { "epoch": 11.84, "learning_rate": 4.408345394127455e-05, "loss": 2.3078, "step": 4089500 }, { "epoch": 11.84, "learning_rate": 4.408273029362727e-05, "loss": 2.3086, "step": 4090000 }, { "epoch": 11.84, "learning_rate": 4.408200664597999e-05, "loss": 2.323, "step": 4090500 }, { "epoch": 11.84, "learning_rate": 4.4081282998332715e-05, "loss": 2.2837, "step": 4091000 }, { "epoch": 11.84, "learning_rate": 4.4080559350685444e-05, "loss": 2.328, "step": 4091500 }, { "epoch": 11.84, "learning_rate": 4.4079835703038166e-05, "loss": 2.3061, "step": 4092000 }, { "epoch": 11.85, "learning_rate": 4.407911350268618e-05, "loss": 2.3229, "step": 4092500 }, { "epoch": 11.85, "learning_rate": 4.4078389855038904e-05, "loss": 2.3036, "step": 4093000 }, { "epoch": 11.85, "learning_rate": 4.4077666207391626e-05, "loss": 2.304, "step": 4093500 }, { "epoch": 11.85, "learning_rate": 4.407694255974435e-05, "loss": 2.303, "step": 4094000 }, { "epoch": 11.85, "learning_rate": 4.407621891209707e-05, "loss": 2.3261, "step": 4094500 }, { "epoch": 11.85, "learning_rate": 4.40754952644498e-05, "loss": 2.2864, "step": 4095000 }, { "epoch": 11.85, "learning_rate": 4.4074773064097815e-05, "loss": 2.3366, "step": 4095500 }, { "epoch": 11.86, "learning_rate": 4.4074049416450544e-05, "loss": 2.2698, "step": 4096000 }, { "epoch": 11.86, "learning_rate": 4.4073325768803267e-05, "loss": 2.2931, "step": 4096500 }, { "epoch": 11.86, "learning_rate": 4.407260212115599e-05, "loss": 2.308, "step": 4097000 }, { "epoch": 11.86, "learning_rate": 4.407187847350871e-05, "loss": 2.2926, "step": 4097500 }, { "epoch": 11.86, "learning_rate": 4.407115482586143e-05, "loss": 2.3188, "step": 4098000 }, { "epoch": 11.86, "learning_rate": 4.407043407280474e-05, "loss": 2.3002, "step": 4098500 }, { "epoch": 11.86, "learning_rate": 4.4069710425157464e-05, "loss": 2.3053, "step": 4099000 }, { "epoch": 11.87, "learning_rate": 4.406898677751019e-05, "loss": 2.2839, "step": 4099500 }, { "epoch": 11.87, "learning_rate": 4.4068263129862916e-05, "loss": 2.3119, "step": 4100000 }, { "epoch": 11.87, "learning_rate": 4.406753948221564e-05, "loss": 2.2932, "step": 4100500 }, { "epoch": 11.87, "learning_rate": 4.406681872915895e-05, "loss": 2.2897, "step": 4101000 }, { "epoch": 11.87, "learning_rate": 4.406609508151167e-05, "loss": 2.3241, "step": 4101500 }, { "epoch": 11.87, "learning_rate": 4.406537143386439e-05, "loss": 2.3068, "step": 4102000 }, { "epoch": 11.88, "learning_rate": 4.406464778621712e-05, "loss": 2.3062, "step": 4102500 }, { "epoch": 11.88, "learning_rate": 4.406392413856984e-05, "loss": 2.3035, "step": 4103000 }, { "epoch": 11.88, "learning_rate": 4.406320049092257e-05, "loss": 2.3151, "step": 4103500 }, { "epoch": 11.88, "learning_rate": 4.4062476843275294e-05, "loss": 2.2993, "step": 4104000 }, { "epoch": 11.88, "learning_rate": 4.4061753195628016e-05, "loss": 2.297, "step": 4104500 }, { "epoch": 11.88, "learning_rate": 4.406102954798074e-05, "loss": 2.312, "step": 4105000 }, { "epoch": 11.88, "learning_rate": 4.406030590033346e-05, "loss": 2.2969, "step": 4105500 }, { "epoch": 11.89, "learning_rate": 4.405958225268618e-05, "loss": 2.3049, "step": 4106000 }, { "epoch": 11.89, "learning_rate": 4.4058858605038905e-05, "loss": 2.3005, "step": 4106500 }, { "epoch": 11.89, "learning_rate": 4.405813495739163e-05, "loss": 2.2733, "step": 4107000 }, { "epoch": 11.89, "learning_rate": 4.405741130974435e-05, "loss": 2.3173, "step": 4107500 }, { "epoch": 11.89, "learning_rate": 4.405668910939237e-05, "loss": 2.2702, "step": 4108000 }, { "epoch": 11.89, "learning_rate": 4.4055965461745094e-05, "loss": 2.2972, "step": 4108500 }, { "epoch": 11.89, "learning_rate": 4.4055241814097816e-05, "loss": 2.2941, "step": 4109000 }, { "epoch": 11.9, "learning_rate": 4.405451816645054e-05, "loss": 2.2935, "step": 4109500 }, { "epoch": 11.9, "learning_rate": 4.4053795966098554e-05, "loss": 2.2781, "step": 4110000 }, { "epoch": 11.9, "learning_rate": 4.405307231845128e-05, "loss": 2.2801, "step": 4110500 }, { "epoch": 11.9, "learning_rate": 4.4052348670804005e-05, "loss": 2.2994, "step": 4111000 }, { "epoch": 11.9, "learning_rate": 4.405162502315673e-05, "loss": 2.2714, "step": 4111500 }, { "epoch": 11.9, "learning_rate": 4.405090137550945e-05, "loss": 2.3237, "step": 4112000 }, { "epoch": 11.9, "learning_rate": 4.405017772786217e-05, "loss": 2.3093, "step": 4112500 }, { "epoch": 11.91, "learning_rate": 4.4049454080214894e-05, "loss": 2.3054, "step": 4113000 }, { "epoch": 11.91, "learning_rate": 4.404873043256762e-05, "loss": 2.2994, "step": 4113500 }, { "epoch": 11.91, "learning_rate": 4.4048006784920345e-05, "loss": 2.3025, "step": 4114000 }, { "epoch": 11.91, "learning_rate": 4.404728458456836e-05, "loss": 2.278, "step": 4114500 }, { "epoch": 11.91, "learning_rate": 4.404656093692108e-05, "loss": 2.3121, "step": 4115000 }, { "epoch": 11.91, "learning_rate": 4.4045837289273805e-05, "loss": 2.2963, "step": 4115500 }, { "epoch": 11.91, "learning_rate": 4.404511364162653e-05, "loss": 2.295, "step": 4116000 }, { "epoch": 11.92, "learning_rate": 4.404438999397925e-05, "loss": 2.3031, "step": 4116500 }, { "epoch": 11.92, "learning_rate": 4.404366634633197e-05, "loss": 2.2905, "step": 4117000 }, { "epoch": 11.92, "learning_rate": 4.40429426986847e-05, "loss": 2.2766, "step": 4117500 }, { "epoch": 11.92, "learning_rate": 4.404222049833272e-05, "loss": 2.3105, "step": 4118000 }, { "epoch": 11.92, "learning_rate": 4.4041496850685446e-05, "loss": 2.2809, "step": 4118500 }, { "epoch": 11.92, "learning_rate": 4.404077320303817e-05, "loss": 2.3152, "step": 4119000 }, { "epoch": 11.92, "learning_rate": 4.404004955539089e-05, "loss": 2.2916, "step": 4119500 }, { "epoch": 11.93, "learning_rate": 4.403932590774361e-05, "loss": 2.2978, "step": 4120000 }, { "epoch": 11.93, "learning_rate": 4.403860515468692e-05, "loss": 2.3157, "step": 4120500 }, { "epoch": 11.93, "learning_rate": 4.4037881507039643e-05, "loss": 2.2839, "step": 4121000 }, { "epoch": 11.93, "learning_rate": 4.403715785939237e-05, "loss": 2.3198, "step": 4121500 }, { "epoch": 11.93, "learning_rate": 4.4036434211745095e-05, "loss": 2.2724, "step": 4122000 }, { "epoch": 11.93, "learning_rate": 4.403571056409782e-05, "loss": 2.3063, "step": 4122500 }, { "epoch": 11.93, "learning_rate": 4.403498691645054e-05, "loss": 2.3071, "step": 4123000 }, { "epoch": 11.94, "learning_rate": 4.403426326880326e-05, "loss": 2.2885, "step": 4123500 }, { "epoch": 11.94, "learning_rate": 4.4033539621155984e-05, "loss": 2.3243, "step": 4124000 }, { "epoch": 11.94, "learning_rate": 4.4032815973508706e-05, "loss": 2.298, "step": 4124500 }, { "epoch": 11.94, "learning_rate": 4.403209377315673e-05, "loss": 2.2972, "step": 4125000 }, { "epoch": 11.94, "learning_rate": 4.403137157280475e-05, "loss": 2.3127, "step": 4125500 }, { "epoch": 11.94, "learning_rate": 4.403064792515747e-05, "loss": 2.2933, "step": 4126000 }, { "epoch": 11.94, "learning_rate": 4.4029924277510195e-05, "loss": 2.2892, "step": 4126500 }, { "epoch": 11.95, "learning_rate": 4.402920062986292e-05, "loss": 2.2909, "step": 4127000 }, { "epoch": 11.95, "learning_rate": 4.402847842951093e-05, "loss": 2.3121, "step": 4127500 }, { "epoch": 11.95, "learning_rate": 4.4027754781863655e-05, "loss": 2.3134, "step": 4128000 }, { "epoch": 11.95, "learning_rate": 4.402703113421638e-05, "loss": 2.2711, "step": 4128500 }, { "epoch": 11.95, "learning_rate": 4.40263074865691e-05, "loss": 2.3203, "step": 4129000 }, { "epoch": 11.95, "learning_rate": 4.402558528621712e-05, "loss": 2.3034, "step": 4129500 }, { "epoch": 11.95, "learning_rate": 4.4024861638569844e-05, "loss": 2.3063, "step": 4130000 }, { "epoch": 11.96, "learning_rate": 4.402413943821786e-05, "loss": 2.3168, "step": 4130500 }, { "epoch": 11.96, "learning_rate": 4.402341579057058e-05, "loss": 2.3214, "step": 4131000 }, { "epoch": 11.96, "learning_rate": 4.4022692142923304e-05, "loss": 2.3061, "step": 4131500 }, { "epoch": 11.96, "learning_rate": 4.4021968495276026e-05, "loss": 2.3007, "step": 4132000 }, { "epoch": 11.96, "learning_rate": 4.402124484762875e-05, "loss": 2.3079, "step": 4132500 }, { "epoch": 11.96, "learning_rate": 4.402052119998148e-05, "loss": 2.2661, "step": 4133000 }, { "epoch": 11.96, "learning_rate": 4.40197975523342e-05, "loss": 2.3062, "step": 4133500 }, { "epoch": 11.97, "learning_rate": 4.401907390468692e-05, "loss": 2.2874, "step": 4134000 }, { "epoch": 11.97, "learning_rate": 4.4018351704334944e-05, "loss": 2.2984, "step": 4134500 }, { "epoch": 11.97, "learning_rate": 4.4017628056687667e-05, "loss": 2.3181, "step": 4135000 }, { "epoch": 11.97, "learning_rate": 4.401690440904039e-05, "loss": 2.3004, "step": 4135500 }, { "epoch": 11.97, "learning_rate": 4.401618076139311e-05, "loss": 2.3158, "step": 4136000 }, { "epoch": 11.97, "learning_rate": 4.401545711374583e-05, "loss": 2.3053, "step": 4136500 }, { "epoch": 11.97, "learning_rate": 4.4014733466098555e-05, "loss": 2.3112, "step": 4137000 }, { "epoch": 11.98, "learning_rate": 4.401401126574657e-05, "loss": 2.3036, "step": 4137500 }, { "epoch": 11.98, "learning_rate": 4.40132876180993e-05, "loss": 2.2959, "step": 4138000 }, { "epoch": 11.98, "learning_rate": 4.401256397045202e-05, "loss": 2.272, "step": 4138500 }, { "epoch": 11.98, "learning_rate": 4.4011840322804745e-05, "loss": 2.2947, "step": 4139000 }, { "epoch": 11.98, "learning_rate": 4.401111667515747e-05, "loss": 2.3185, "step": 4139500 }, { "epoch": 11.98, "learning_rate": 4.401039302751019e-05, "loss": 2.3121, "step": 4140000 }, { "epoch": 11.99, "learning_rate": 4.400966937986292e-05, "loss": 2.3207, "step": 4140500 }, { "epoch": 11.99, "learning_rate": 4.400894573221564e-05, "loss": 2.3163, "step": 4141000 }, { "epoch": 11.99, "learning_rate": 4.4008223531863656e-05, "loss": 2.3147, "step": 4141500 }, { "epoch": 11.99, "learning_rate": 4.400749988421638e-05, "loss": 2.304, "step": 4142000 }, { "epoch": 11.99, "learning_rate": 4.40067762365691e-05, "loss": 2.2826, "step": 4142500 }, { "epoch": 11.99, "learning_rate": 4.400605403621712e-05, "loss": 2.3074, "step": 4143000 }, { "epoch": 11.99, "learning_rate": 4.4005330388569845e-05, "loss": 2.3019, "step": 4143500 }, { "epoch": 12.0, "learning_rate": 4.400460674092257e-05, "loss": 2.3096, "step": 4144000 }, { "epoch": 12.0, "learning_rate": 4.400388309327529e-05, "loss": 2.2964, "step": 4144500 }, { "epoch": 12.0, "learning_rate": 4.400315944562801e-05, "loss": 2.3088, "step": 4145000 }, { "epoch": 12.0, "learning_rate": 4.4002435797980734e-05, "loss": 2.2955, "step": 4145500 }, { "epoch": 12.0, "eval_accuracy": 0.6509588873451346, "eval_accuracy_mlm": 0.6136933702388812, "eval_accuracy_nsp": 0.8509415975764575, "eval_loss": 2.2997477054595947, "eval_runtime": 330.67, "eval_samples_per_second": 1319.702, "eval_steps_per_second": 54.988, "step": 4145664 }, { "epoch": 12.0, "learning_rate": 4.400171504492405e-05, "loss": 2.2811, "step": 4146000 }, { "epoch": 12.0, "learning_rate": 4.400099139727677e-05, "loss": 2.2889, "step": 4146500 }, { "epoch": 12.0, "learning_rate": 4.4000267749629494e-05, "loss": 2.2654, "step": 4147000 }, { "epoch": 12.01, "learning_rate": 4.3999544101982216e-05, "loss": 2.2694, "step": 4147500 }, { "epoch": 12.01, "learning_rate": 4.3998820454334945e-05, "loss": 2.3201, "step": 4148000 }, { "epoch": 12.01, "learning_rate": 4.399809680668767e-05, "loss": 2.3081, "step": 4148500 }, { "epoch": 12.01, "learning_rate": 4.399737315904039e-05, "loss": 2.2992, "step": 4149000 }, { "epoch": 12.01, "learning_rate": 4.399664951139311e-05, "loss": 2.2677, "step": 4149500 }, { "epoch": 12.01, "learning_rate": 4.3995925863745834e-05, "loss": 2.296, "step": 4150000 }, { "epoch": 12.01, "learning_rate": 4.3995202216098556e-05, "loss": 2.2578, "step": 4150500 }, { "epoch": 12.02, "learning_rate": 4.399447856845128e-05, "loss": 2.2632, "step": 4151000 }, { "epoch": 12.02, "learning_rate": 4.3993754920804e-05, "loss": 2.2901, "step": 4151500 }, { "epoch": 12.02, "learning_rate": 4.399303272045202e-05, "loss": 2.2657, "step": 4152000 }, { "epoch": 12.02, "learning_rate": 4.3992309072804745e-05, "loss": 2.2606, "step": 4152500 }, { "epoch": 12.02, "learning_rate": 4.399158542515747e-05, "loss": 2.2653, "step": 4153000 }, { "epoch": 12.02, "learning_rate": 4.399086177751019e-05, "loss": 2.2952, "step": 4153500 }, { "epoch": 12.02, "learning_rate": 4.399013812986291e-05, "loss": 2.2594, "step": 4154000 }, { "epoch": 12.03, "learning_rate": 4.3989414482215634e-05, "loss": 2.2807, "step": 4154500 }, { "epoch": 12.03, "learning_rate": 4.398869083456836e-05, "loss": 2.2612, "step": 4155000 }, { "epoch": 12.03, "learning_rate": 4.398796863421638e-05, "loss": 2.2717, "step": 4155500 }, { "epoch": 12.03, "learning_rate": 4.39872449865691e-05, "loss": 2.291, "step": 4156000 }, { "epoch": 12.03, "learning_rate": 4.398652133892183e-05, "loss": 2.2707, "step": 4156500 }, { "epoch": 12.03, "learning_rate": 4.398579769127455e-05, "loss": 2.2761, "step": 4157000 }, { "epoch": 12.03, "learning_rate": 4.398507549092257e-05, "loss": 2.2908, "step": 4157500 }, { "epoch": 12.04, "learning_rate": 4.398435184327529e-05, "loss": 2.3035, "step": 4158000 }, { "epoch": 12.04, "learning_rate": 4.3983629642923306e-05, "loss": 2.2696, "step": 4158500 }, { "epoch": 12.04, "learning_rate": 4.398290599527603e-05, "loss": 2.2731, "step": 4159000 }, { "epoch": 12.04, "learning_rate": 4.398218234762875e-05, "loss": 2.3038, "step": 4159500 }, { "epoch": 12.04, "learning_rate": 4.398145869998148e-05, "loss": 2.2794, "step": 4160000 }, { "epoch": 12.04, "learning_rate": 4.39807350523342e-05, "loss": 2.2725, "step": 4160500 }, { "epoch": 12.04, "learning_rate": 4.398001285198222e-05, "loss": 2.2751, "step": 4161000 }, { "epoch": 12.05, "learning_rate": 4.397929065163023e-05, "loss": 2.2884, "step": 4161500 }, { "epoch": 12.05, "learning_rate": 4.3978568451278255e-05, "loss": 2.2832, "step": 4162000 }, { "epoch": 12.05, "learning_rate": 4.397784480363098e-05, "loss": 2.3056, "step": 4162500 }, { "epoch": 12.05, "learning_rate": 4.39771211559837e-05, "loss": 2.2926, "step": 4163000 }, { "epoch": 12.05, "learning_rate": 4.397639750833643e-05, "loss": 2.2928, "step": 4163500 }, { "epoch": 12.05, "learning_rate": 4.397567386068915e-05, "loss": 2.2595, "step": 4164000 }, { "epoch": 12.05, "learning_rate": 4.397495021304187e-05, "loss": 2.3109, "step": 4164500 }, { "epoch": 12.06, "learning_rate": 4.3974226565394595e-05, "loss": 2.2821, "step": 4165000 }, { "epoch": 12.06, "learning_rate": 4.397350291774732e-05, "loss": 2.2694, "step": 4165500 }, { "epoch": 12.06, "learning_rate": 4.397277927010004e-05, "loss": 2.2936, "step": 4166000 }, { "epoch": 12.06, "learning_rate": 4.3972057069748055e-05, "loss": 2.2966, "step": 4166500 }, { "epoch": 12.06, "learning_rate": 4.397133342210078e-05, "loss": 2.3021, "step": 4167000 }, { "epoch": 12.06, "learning_rate": 4.39706097744535e-05, "loss": 2.3038, "step": 4167500 }, { "epoch": 12.06, "learning_rate": 4.396988612680623e-05, "loss": 2.2854, "step": 4168000 }, { "epoch": 12.07, "learning_rate": 4.396916247915895e-05, "loss": 2.2965, "step": 4168500 }, { "epoch": 12.07, "learning_rate": 4.396843883151167e-05, "loss": 2.2742, "step": 4169000 }, { "epoch": 12.07, "learning_rate": 4.3967715183864395e-05, "loss": 2.2975, "step": 4169500 }, { "epoch": 12.07, "learning_rate": 4.396699153621712e-05, "loss": 2.2876, "step": 4170000 }, { "epoch": 12.07, "learning_rate": 4.3966267888569846e-05, "loss": 2.2969, "step": 4170500 }, { "epoch": 12.07, "learning_rate": 4.396554568821786e-05, "loss": 2.3158, "step": 4171000 }, { "epoch": 12.07, "learning_rate": 4.3964822040570584e-05, "loss": 2.2862, "step": 4171500 }, { "epoch": 12.08, "learning_rate": 4.3964098392923306e-05, "loss": 2.292, "step": 4172000 }, { "epoch": 12.08, "learning_rate": 4.396337474527603e-05, "loss": 2.3026, "step": 4172500 }, { "epoch": 12.08, "learning_rate": 4.396265109762875e-05, "loss": 2.2692, "step": 4173000 }, { "epoch": 12.08, "learning_rate": 4.396192744998148e-05, "loss": 2.2834, "step": 4173500 }, { "epoch": 12.08, "learning_rate": 4.39612038023342e-05, "loss": 2.2802, "step": 4174000 }, { "epoch": 12.08, "learning_rate": 4.3960480154686924e-05, "loss": 2.2563, "step": 4174500 }, { "epoch": 12.08, "learning_rate": 4.3959756507039647e-05, "loss": 2.2814, "step": 4175000 }, { "epoch": 12.09, "learning_rate": 4.395903430668766e-05, "loss": 2.2651, "step": 4175500 }, { "epoch": 12.09, "learning_rate": 4.3958310659040384e-05, "loss": 2.273, "step": 4176000 }, { "epoch": 12.09, "learning_rate": 4.395758845868841e-05, "loss": 2.2903, "step": 4176500 }, { "epoch": 12.09, "learning_rate": 4.395686481104113e-05, "loss": 2.2825, "step": 4177000 }, { "epoch": 12.09, "learning_rate": 4.395614116339385e-05, "loss": 2.2812, "step": 4177500 }, { "epoch": 12.09, "learning_rate": 4.395541751574658e-05, "loss": 2.2721, "step": 4178000 }, { "epoch": 12.1, "learning_rate": 4.39546938680993e-05, "loss": 2.2757, "step": 4178500 }, { "epoch": 12.1, "learning_rate": 4.3953970220452025e-05, "loss": 2.2831, "step": 4179000 }, { "epoch": 12.1, "learning_rate": 4.395324657280475e-05, "loss": 2.2892, "step": 4179500 }, { "epoch": 12.1, "learning_rate": 4.395252292515747e-05, "loss": 2.3047, "step": 4180000 }, { "epoch": 12.1, "learning_rate": 4.3951800724805485e-05, "loss": 2.3052, "step": 4180500 }, { "epoch": 12.1, "learning_rate": 4.395107707715821e-05, "loss": 2.2793, "step": 4181000 }, { "epoch": 12.1, "learning_rate": 4.395035487680623e-05, "loss": 2.2668, "step": 4181500 }, { "epoch": 12.11, "learning_rate": 4.394963122915895e-05, "loss": 2.283, "step": 4182000 }, { "epoch": 12.11, "learning_rate": 4.3948907581511674e-05, "loss": 2.3011, "step": 4182500 }, { "epoch": 12.11, "learning_rate": 4.394818682845498e-05, "loss": 2.2881, "step": 4183000 }, { "epoch": 12.11, "learning_rate": 4.3947463180807705e-05, "loss": 2.2778, "step": 4183500 }, { "epoch": 12.11, "learning_rate": 4.394673953316043e-05, "loss": 2.2771, "step": 4184000 }, { "epoch": 12.11, "learning_rate": 4.3946015885513156e-05, "loss": 2.2777, "step": 4184500 }, { "epoch": 12.11, "learning_rate": 4.394529223786588e-05, "loss": 2.2881, "step": 4185000 }, { "epoch": 12.12, "learning_rate": 4.39445685902186e-05, "loss": 2.2826, "step": 4185500 }, { "epoch": 12.12, "learning_rate": 4.394384494257133e-05, "loss": 2.2715, "step": 4186000 }, { "epoch": 12.12, "learning_rate": 4.394312129492405e-05, "loss": 2.2986, "step": 4186500 }, { "epoch": 12.12, "learning_rate": 4.394239909457207e-05, "loss": 2.3083, "step": 4187000 }, { "epoch": 12.12, "learning_rate": 4.394167544692479e-05, "loss": 2.2807, "step": 4187500 }, { "epoch": 12.12, "learning_rate": 4.394095179927751e-05, "loss": 2.2901, "step": 4188000 }, { "epoch": 12.12, "learning_rate": 4.3940228151630234e-05, "loss": 2.2753, "step": 4188500 }, { "epoch": 12.13, "learning_rate": 4.3939504503982956e-05, "loss": 2.3118, "step": 4189000 }, { "epoch": 12.13, "learning_rate": 4.393878085633568e-05, "loss": 2.2994, "step": 4189500 }, { "epoch": 12.13, "learning_rate": 4.393805720868841e-05, "loss": 2.2756, "step": 4190000 }, { "epoch": 12.13, "learning_rate": 4.393733356104113e-05, "loss": 2.2828, "step": 4190500 }, { "epoch": 12.13, "learning_rate": 4.393660991339385e-05, "loss": 2.296, "step": 4191000 }, { "epoch": 12.13, "learning_rate": 4.3935886265746574e-05, "loss": 2.2889, "step": 4191500 }, { "epoch": 12.13, "learning_rate": 4.3935162618099296e-05, "loss": 2.2638, "step": 4192000 }, { "epoch": 12.14, "learning_rate": 4.393443897045202e-05, "loss": 2.2708, "step": 4192500 }, { "epoch": 12.14, "learning_rate": 4.393371677010004e-05, "loss": 2.2748, "step": 4193000 }, { "epoch": 12.14, "learning_rate": 4.3932994569748057e-05, "loss": 2.2795, "step": 4193500 }, { "epoch": 12.14, "learning_rate": 4.3932270922100786e-05, "loss": 2.2762, "step": 4194000 }, { "epoch": 12.14, "learning_rate": 4.393154727445351e-05, "loss": 2.2827, "step": 4194500 }, { "epoch": 12.14, "learning_rate": 4.393082362680623e-05, "loss": 2.2756, "step": 4195000 }, { "epoch": 12.14, "learning_rate": 4.3930101426454246e-05, "loss": 2.262, "step": 4195500 }, { "epoch": 12.15, "learning_rate": 4.392937777880697e-05, "loss": 2.2675, "step": 4196000 }, { "epoch": 12.15, "learning_rate": 4.392865413115969e-05, "loss": 2.2915, "step": 4196500 }, { "epoch": 12.15, "learning_rate": 4.392793048351241e-05, "loss": 2.2717, "step": 4197000 }, { "epoch": 12.15, "learning_rate": 4.3927206835865135e-05, "loss": 2.2906, "step": 4197500 }, { "epoch": 12.15, "learning_rate": 4.392648318821786e-05, "loss": 2.2978, "step": 4198000 }, { "epoch": 12.15, "learning_rate": 4.3925759540570586e-05, "loss": 2.2921, "step": 4198500 }, { "epoch": 12.15, "learning_rate": 4.392503589292331e-05, "loss": 2.2976, "step": 4199000 }, { "epoch": 12.16, "learning_rate": 4.3924313692571324e-05, "loss": 2.3069, "step": 4199500 }, { "epoch": 12.16, "learning_rate": 4.3923590044924046e-05, "loss": 2.27, "step": 4200000 }, { "epoch": 12.16, "learning_rate": 4.3922866397276775e-05, "loss": 2.2636, "step": 4200500 }, { "epoch": 12.16, "learning_rate": 4.39221427496295e-05, "loss": 2.2525, "step": 4201000 }, { "epoch": 12.16, "learning_rate": 4.392141910198222e-05, "loss": 2.2893, "step": 4201500 }, { "epoch": 12.16, "learning_rate": 4.3920696901630235e-05, "loss": 2.2807, "step": 4202000 }, { "epoch": 12.16, "learning_rate": 4.391997325398296e-05, "loss": 2.2702, "step": 4202500 }, { "epoch": 12.17, "learning_rate": 4.3919249606335686e-05, "loss": 2.2788, "step": 4203000 }, { "epoch": 12.17, "learning_rate": 4.391852595868841e-05, "loss": 2.2887, "step": 4203500 }, { "epoch": 12.17, "learning_rate": 4.3917803758336424e-05, "loss": 2.2828, "step": 4204000 }, { "epoch": 12.17, "learning_rate": 4.3917080110689146e-05, "loss": 2.2918, "step": 4204500 }, { "epoch": 12.17, "learning_rate": 4.391635646304187e-05, "loss": 2.2719, "step": 4205000 }, { "epoch": 12.17, "learning_rate": 4.391563281539459e-05, "loss": 2.2862, "step": 4205500 }, { "epoch": 12.17, "learning_rate": 4.391490916774731e-05, "loss": 2.2908, "step": 4206000 }, { "epoch": 12.18, "learning_rate": 4.3914185520100035e-05, "loss": 2.295, "step": 4206500 }, { "epoch": 12.18, "learning_rate": 4.391346187245276e-05, "loss": 2.2904, "step": 4207000 }, { "epoch": 12.18, "learning_rate": 4.3912738224805486e-05, "loss": 2.286, "step": 4207500 }, { "epoch": 12.18, "learning_rate": 4.391201602445351e-05, "loss": 2.282, "step": 4208000 }, { "epoch": 12.18, "learning_rate": 4.391129237680623e-05, "loss": 2.2839, "step": 4208500 }, { "epoch": 12.18, "learning_rate": 4.391056872915895e-05, "loss": 2.2997, "step": 4209000 }, { "epoch": 12.18, "learning_rate": 4.3909845081511675e-05, "loss": 2.3117, "step": 4209500 }, { "epoch": 12.19, "learning_rate": 4.39091214338644e-05, "loss": 2.3097, "step": 4210000 }, { "epoch": 12.19, "learning_rate": 4.390839923351241e-05, "loss": 2.275, "step": 4210500 }, { "epoch": 12.19, "learning_rate": 4.3907675585865135e-05, "loss": 2.2807, "step": 4211000 }, { "epoch": 12.19, "learning_rate": 4.390695338551316e-05, "loss": 2.3015, "step": 4211500 }, { "epoch": 12.19, "learning_rate": 4.390622973786588e-05, "loss": 2.299, "step": 4212000 }, { "epoch": 12.19, "learning_rate": 4.39055060902186e-05, "loss": 2.2846, "step": 4212500 }, { "epoch": 12.19, "learning_rate": 4.3904782442571324e-05, "loss": 2.2668, "step": 4213000 }, { "epoch": 12.2, "learning_rate": 4.390406024221934e-05, "loss": 2.2799, "step": 4213500 }, { "epoch": 12.2, "learning_rate": 4.390333659457206e-05, "loss": 2.2658, "step": 4214000 }, { "epoch": 12.2, "learning_rate": 4.3902614394220084e-05, "loss": 2.3017, "step": 4214500 }, { "epoch": 12.2, "learning_rate": 4.390189074657281e-05, "loss": 2.2915, "step": 4215000 }, { "epoch": 12.2, "learning_rate": 4.390116709892553e-05, "loss": 2.282, "step": 4215500 }, { "epoch": 12.2, "learning_rate": 4.390044345127826e-05, "loss": 2.2945, "step": 4216000 }, { "epoch": 12.21, "learning_rate": 4.389971980363098e-05, "loss": 2.2872, "step": 4216500 }, { "epoch": 12.21, "learning_rate": 4.38989961559837e-05, "loss": 2.2833, "step": 4217000 }, { "epoch": 12.21, "learning_rate": 4.3898272508336425e-05, "loss": 2.2631, "step": 4217500 }, { "epoch": 12.21, "learning_rate": 4.389754886068915e-05, "loss": 2.2917, "step": 4218000 }, { "epoch": 12.21, "learning_rate": 4.389682521304187e-05, "loss": 2.2932, "step": 4218500 }, { "epoch": 12.21, "learning_rate": 4.389610156539459e-05, "loss": 2.3136, "step": 4219000 }, { "epoch": 12.21, "learning_rate": 4.3895377917747314e-05, "loss": 2.2859, "step": 4219500 }, { "epoch": 12.22, "learning_rate": 4.3894654270100036e-05, "loss": 2.2875, "step": 4220000 }, { "epoch": 12.22, "learning_rate": 4.389393062245276e-05, "loss": 2.2845, "step": 4220500 }, { "epoch": 12.22, "learning_rate": 4.389320697480549e-05, "loss": 2.2803, "step": 4221000 }, { "epoch": 12.22, "learning_rate": 4.389248332715821e-05, "loss": 2.2844, "step": 4221500 }, { "epoch": 12.22, "learning_rate": 4.389175967951093e-05, "loss": 2.2717, "step": 4222000 }, { "epoch": 12.22, "learning_rate": 4.3891036031863654e-05, "loss": 2.259, "step": 4222500 }, { "epoch": 12.22, "learning_rate": 4.3890313831511676e-05, "loss": 2.2916, "step": 4223000 }, { "epoch": 12.23, "learning_rate": 4.38895901838644e-05, "loss": 2.2865, "step": 4223500 }, { "epoch": 12.23, "learning_rate": 4.3888867983512414e-05, "loss": 2.2801, "step": 4224000 }, { "epoch": 12.23, "learning_rate": 4.3888144335865136e-05, "loss": 2.2912, "step": 4224500 }, { "epoch": 12.23, "learning_rate": 4.3887420688217865e-05, "loss": 2.286, "step": 4225000 }, { "epoch": 12.23, "learning_rate": 4.388669704057059e-05, "loss": 2.3212, "step": 4225500 }, { "epoch": 12.23, "learning_rate": 4.388597339292331e-05, "loss": 2.2883, "step": 4226000 }, { "epoch": 12.23, "learning_rate": 4.3885251192571325e-05, "loss": 2.2859, "step": 4226500 }, { "epoch": 12.24, "learning_rate": 4.388452754492405e-05, "loss": 2.2918, "step": 4227000 }, { "epoch": 12.24, "learning_rate": 4.388380389727677e-05, "loss": 2.2721, "step": 4227500 }, { "epoch": 12.24, "learning_rate": 4.388308024962949e-05, "loss": 2.2519, "step": 4228000 }, { "epoch": 12.24, "learning_rate": 4.3882358049277514e-05, "loss": 2.2771, "step": 4228500 }, { "epoch": 12.24, "learning_rate": 4.3881634401630236e-05, "loss": 2.2967, "step": 4229000 }, { "epoch": 12.24, "learning_rate": 4.388091075398296e-05, "loss": 2.2798, "step": 4229500 }, { "epoch": 12.24, "learning_rate": 4.388018710633568e-05, "loss": 2.2789, "step": 4230000 }, { "epoch": 12.25, "learning_rate": 4.387946345868841e-05, "loss": 2.2813, "step": 4230500 }, { "epoch": 12.25, "learning_rate": 4.387873981104113e-05, "loss": 2.2632, "step": 4231000 }, { "epoch": 12.25, "learning_rate": 4.3878016163393854e-05, "loss": 2.3081, "step": 4231500 }, { "epoch": 12.25, "learning_rate": 4.3877292515746577e-05, "loss": 2.2689, "step": 4232000 }, { "epoch": 12.25, "learning_rate": 4.38765688680993e-05, "loss": 2.3032, "step": 4232500 }, { "epoch": 12.25, "learning_rate": 4.387584522045202e-05, "loss": 2.2927, "step": 4233000 }, { "epoch": 12.25, "learning_rate": 4.387512157280474e-05, "loss": 2.3115, "step": 4233500 }, { "epoch": 12.26, "learning_rate": 4.3874399372452766e-05, "loss": 2.3016, "step": 4234000 }, { "epoch": 12.26, "learning_rate": 4.387367572480549e-05, "loss": 2.267, "step": 4234500 }, { "epoch": 12.26, "learning_rate": 4.3872953524453503e-05, "loss": 2.2764, "step": 4235000 }, { "epoch": 12.26, "learning_rate": 4.387223132410152e-05, "loss": 2.2815, "step": 4235500 }, { "epoch": 12.26, "learning_rate": 4.387150767645424e-05, "loss": 2.2796, "step": 4236000 }, { "epoch": 12.26, "learning_rate": 4.3870784028806963e-05, "loss": 2.2598, "step": 4236500 }, { "epoch": 12.26, "learning_rate": 4.3870060381159686e-05, "loss": 2.2593, "step": 4237000 }, { "epoch": 12.27, "learning_rate": 4.3869336733512415e-05, "loss": 2.2852, "step": 4237500 }, { "epoch": 12.27, "learning_rate": 4.386861453316043e-05, "loss": 2.3163, "step": 4238000 }, { "epoch": 12.27, "learning_rate": 4.386789088551316e-05, "loss": 2.2652, "step": 4238500 }, { "epoch": 12.27, "learning_rate": 4.386716723786588e-05, "loss": 2.2645, "step": 4239000 }, { "epoch": 12.27, "learning_rate": 4.3866443590218604e-05, "loss": 2.3058, "step": 4239500 }, { "epoch": 12.27, "learning_rate": 4.3865719942571326e-05, "loss": 2.2842, "step": 4240000 }, { "epoch": 12.27, "learning_rate": 4.386499774221934e-05, "loss": 2.2893, "step": 4240500 }, { "epoch": 12.28, "learning_rate": 4.3864274094572064e-05, "loss": 2.288, "step": 4241000 }, { "epoch": 12.28, "learning_rate": 4.3863550446924786e-05, "loss": 2.3095, "step": 4241500 }, { "epoch": 12.28, "learning_rate": 4.3862826799277515e-05, "loss": 2.3336, "step": 4242000 }, { "epoch": 12.28, "learning_rate": 4.386210315163024e-05, "loss": 2.2868, "step": 4242500 }, { "epoch": 12.28, "learning_rate": 4.386137950398296e-05, "loss": 2.2732, "step": 4243000 }, { "epoch": 12.28, "learning_rate": 4.386065585633568e-05, "loss": 2.3304, "step": 4243500 }, { "epoch": 12.28, "learning_rate": 4.3859932208688404e-05, "loss": 2.3031, "step": 4244000 }, { "epoch": 12.29, "learning_rate": 4.3859208561041126e-05, "loss": 2.2946, "step": 4244500 }, { "epoch": 12.29, "learning_rate": 4.385848491339385e-05, "loss": 2.2834, "step": 4245000 }, { "epoch": 12.29, "learning_rate": 4.385776271304187e-05, "loss": 2.2767, "step": 4245500 }, { "epoch": 12.29, "learning_rate": 4.385703906539459e-05, "loss": 2.2762, "step": 4246000 }, { "epoch": 12.29, "learning_rate": 4.3856315417747315e-05, "loss": 2.2852, "step": 4246500 }, { "epoch": 12.29, "learning_rate": 4.385559177010004e-05, "loss": 2.2909, "step": 4247000 }, { "epoch": 12.29, "learning_rate": 4.3854868122452766e-05, "loss": 2.3005, "step": 4247500 }, { "epoch": 12.3, "learning_rate": 4.385414447480549e-05, "loss": 2.2792, "step": 4248000 }, { "epoch": 12.3, "learning_rate": 4.385342082715821e-05, "loss": 2.2748, "step": 4248500 }, { "epoch": 12.3, "learning_rate": 4.385269717951093e-05, "loss": 2.2851, "step": 4249000 }, { "epoch": 12.3, "learning_rate": 4.3851973531863655e-05, "loss": 2.2754, "step": 4249500 }, { "epoch": 12.3, "learning_rate": 4.385124988421638e-05, "loss": 2.3025, "step": 4250000 }, { "epoch": 12.3, "learning_rate": 4.38505262365691e-05, "loss": 2.2921, "step": 4250500 }, { "epoch": 12.3, "learning_rate": 4.3849804036217115e-05, "loss": 2.305, "step": 4251000 }, { "epoch": 12.31, "learning_rate": 4.384908038856984e-05, "loss": 2.2814, "step": 4251500 }, { "epoch": 12.31, "learning_rate": 4.384835818821786e-05, "loss": 2.2801, "step": 4252000 }, { "epoch": 12.31, "learning_rate": 4.3847635987865876e-05, "loss": 2.2854, "step": 4252500 }, { "epoch": 12.31, "learning_rate": 4.38469123402186e-05, "loss": 2.288, "step": 4253000 }, { "epoch": 12.31, "learning_rate": 4.384618869257133e-05, "loss": 2.2916, "step": 4253500 }, { "epoch": 12.31, "learning_rate": 4.384546504492405e-05, "loss": 2.2919, "step": 4254000 }, { "epoch": 12.32, "learning_rate": 4.384474139727677e-05, "loss": 2.293, "step": 4254500 }, { "epoch": 12.32, "learning_rate": 4.3844017749629493e-05, "loss": 2.2739, "step": 4255000 }, { "epoch": 12.32, "learning_rate": 4.3843294101982216e-05, "loss": 2.2952, "step": 4255500 }, { "epoch": 12.32, "learning_rate": 4.3842570454334945e-05, "loss": 2.2788, "step": 4256000 }, { "epoch": 12.32, "learning_rate": 4.384184680668767e-05, "loss": 2.2953, "step": 4256500 }, { "epoch": 12.32, "learning_rate": 4.384112315904039e-05, "loss": 2.3094, "step": 4257000 }, { "epoch": 12.32, "learning_rate": 4.3840400958688405e-05, "loss": 2.2959, "step": 4257500 }, { "epoch": 12.33, "learning_rate": 4.383967731104113e-05, "loss": 2.2894, "step": 4258000 }, { "epoch": 12.33, "learning_rate": 4.383895366339385e-05, "loss": 2.2802, "step": 4258500 }, { "epoch": 12.33, "learning_rate": 4.383823001574657e-05, "loss": 2.3114, "step": 4259000 }, { "epoch": 12.33, "learning_rate": 4.3837506368099294e-05, "loss": 2.2905, "step": 4259500 }, { "epoch": 12.33, "learning_rate": 4.3836782720452016e-05, "loss": 2.2886, "step": 4260000 }, { "epoch": 12.33, "learning_rate": 4.3836059072804745e-05, "loss": 2.2724, "step": 4260500 }, { "epoch": 12.33, "learning_rate": 4.383533542515747e-05, "loss": 2.2904, "step": 4261000 }, { "epoch": 12.34, "learning_rate": 4.3834611777510196e-05, "loss": 2.2893, "step": 4261500 }, { "epoch": 12.34, "learning_rate": 4.383388957715821e-05, "loss": 2.2689, "step": 4262000 }, { "epoch": 12.34, "learning_rate": 4.383316737680623e-05, "loss": 2.297, "step": 4262500 }, { "epoch": 12.34, "learning_rate": 4.383244372915895e-05, "loss": 2.2869, "step": 4263000 }, { "epoch": 12.34, "learning_rate": 4.383172008151167e-05, "loss": 2.2952, "step": 4263500 }, { "epoch": 12.34, "learning_rate": 4.3830996433864394e-05, "loss": 2.2802, "step": 4264000 }, { "epoch": 12.34, "learning_rate": 4.3830272786217116e-05, "loss": 2.2968, "step": 4264500 }, { "epoch": 12.35, "learning_rate": 4.3829549138569845e-05, "loss": 2.2739, "step": 4265000 }, { "epoch": 12.35, "learning_rate": 4.382882549092257e-05, "loss": 2.2589, "step": 4265500 }, { "epoch": 12.35, "learning_rate": 4.382810184327529e-05, "loss": 2.2814, "step": 4266000 }, { "epoch": 12.35, "learning_rate": 4.3827379642923305e-05, "loss": 2.3004, "step": 4266500 }, { "epoch": 12.35, "learning_rate": 4.382665599527603e-05, "loss": 2.3045, "step": 4267000 }, { "epoch": 12.35, "learning_rate": 4.382593234762875e-05, "loss": 2.3114, "step": 4267500 }, { "epoch": 12.35, "learning_rate": 4.382520869998148e-05, "loss": 2.2714, "step": 4268000 }, { "epoch": 12.36, "learning_rate": 4.38244850523342e-05, "loss": 2.2894, "step": 4268500 }, { "epoch": 12.36, "learning_rate": 4.3823762851982216e-05, "loss": 2.2775, "step": 4269000 }, { "epoch": 12.36, "learning_rate": 4.3823039204334945e-05, "loss": 2.2878, "step": 4269500 }, { "epoch": 12.36, "learning_rate": 4.382231555668767e-05, "loss": 2.29, "step": 4270000 }, { "epoch": 12.36, "learning_rate": 4.382159190904039e-05, "loss": 2.2835, "step": 4270500 }, { "epoch": 12.36, "learning_rate": 4.382086826139311e-05, "loss": 2.2797, "step": 4271000 }, { "epoch": 12.36, "learning_rate": 4.382014750833642e-05, "loss": 2.2941, "step": 4271500 }, { "epoch": 12.37, "learning_rate": 4.3819425307984443e-05, "loss": 2.2883, "step": 4272000 }, { "epoch": 12.37, "learning_rate": 4.3818701660337166e-05, "loss": 2.2769, "step": 4272500 }, { "epoch": 12.37, "learning_rate": 4.381797801268989e-05, "loss": 2.3019, "step": 4273000 }, { "epoch": 12.37, "learning_rate": 4.381725436504261e-05, "loss": 2.2941, "step": 4273500 }, { "epoch": 12.37, "learning_rate": 4.381653071739533e-05, "loss": 2.2862, "step": 4274000 }, { "epoch": 12.37, "learning_rate": 4.3815807069748055e-05, "loss": 2.2841, "step": 4274500 }, { "epoch": 12.37, "learning_rate": 4.381508342210078e-05, "loss": 2.2958, "step": 4275000 }, { "epoch": 12.38, "learning_rate": 4.3814359774453506e-05, "loss": 2.2824, "step": 4275500 }, { "epoch": 12.38, "learning_rate": 4.381363612680623e-05, "loss": 2.2795, "step": 4276000 }, { "epoch": 12.38, "learning_rate": 4.3812913926454244e-05, "loss": 2.2895, "step": 4276500 }, { "epoch": 12.38, "learning_rate": 4.381219027880697e-05, "loss": 2.2804, "step": 4277000 }, { "epoch": 12.38, "learning_rate": 4.3811466631159695e-05, "loss": 2.2839, "step": 4277500 }, { "epoch": 12.38, "learning_rate": 4.381074298351242e-05, "loss": 2.2997, "step": 4278000 }, { "epoch": 12.38, "learning_rate": 4.381001933586514e-05, "loss": 2.2956, "step": 4278500 }, { "epoch": 12.39, "learning_rate": 4.380929568821786e-05, "loss": 2.318, "step": 4279000 }, { "epoch": 12.39, "learning_rate": 4.3808572040570584e-05, "loss": 2.2882, "step": 4279500 }, { "epoch": 12.39, "learning_rate": 4.3807848392923306e-05, "loss": 2.2891, "step": 4280000 }, { "epoch": 12.39, "learning_rate": 4.380712619257132e-05, "loss": 2.3054, "step": 4280500 }, { "epoch": 12.39, "learning_rate": 4.3806402544924044e-05, "loss": 2.3001, "step": 4281000 }, { "epoch": 12.39, "learning_rate": 4.380567889727677e-05, "loss": 2.2936, "step": 4281500 }, { "epoch": 12.39, "learning_rate": 4.3804955249629495e-05, "loss": 2.2858, "step": 4282000 }, { "epoch": 12.4, "learning_rate": 4.380423160198222e-05, "loss": 2.3144, "step": 4282500 }, { "epoch": 12.4, "learning_rate": 4.3803507954334946e-05, "loss": 2.3094, "step": 4283000 }, { "epoch": 12.4, "learning_rate": 4.380278575398296e-05, "loss": 2.2685, "step": 4283500 }, { "epoch": 12.4, "learning_rate": 4.380206355363098e-05, "loss": 2.3107, "step": 4284000 }, { "epoch": 12.4, "learning_rate": 4.38013399059837e-05, "loss": 2.3217, "step": 4284500 }, { "epoch": 12.4, "learning_rate": 4.380061625833642e-05, "loss": 2.2973, "step": 4285000 }, { "epoch": 12.4, "learning_rate": 4.3799892610689144e-05, "loss": 2.2988, "step": 4285500 }, { "epoch": 12.41, "learning_rate": 4.379916896304187e-05, "loss": 2.2992, "step": 4286000 }, { "epoch": 12.41, "learning_rate": 4.3798445315394595e-05, "loss": 2.2805, "step": 4286500 }, { "epoch": 12.41, "learning_rate": 4.379772166774732e-05, "loss": 2.2965, "step": 4287000 }, { "epoch": 12.41, "learning_rate": 4.379699802010004e-05, "loss": 2.2998, "step": 4287500 }, { "epoch": 12.41, "learning_rate": 4.379627437245276e-05, "loss": 2.2934, "step": 4288000 }, { "epoch": 12.41, "learning_rate": 4.379555217210078e-05, "loss": 2.2849, "step": 4288500 }, { "epoch": 12.41, "learning_rate": 4.37948285244535e-05, "loss": 2.2734, "step": 4289000 }, { "epoch": 12.42, "learning_rate": 4.379410487680622e-05, "loss": 2.3122, "step": 4289500 }, { "epoch": 12.42, "learning_rate": 4.3793381229158944e-05, "loss": 2.2944, "step": 4290000 }, { "epoch": 12.42, "learning_rate": 4.379265758151167e-05, "loss": 2.2777, "step": 4290500 }, { "epoch": 12.42, "learning_rate": 4.3791933933864396e-05, "loss": 2.2829, "step": 4291000 }, { "epoch": 12.42, "learning_rate": 4.3791210286217125e-05, "loss": 2.2629, "step": 4291500 }, { "epoch": 12.42, "learning_rate": 4.379048808586514e-05, "loss": 2.2875, "step": 4292000 }, { "epoch": 12.43, "learning_rate": 4.3789765885513156e-05, "loss": 2.3145, "step": 4292500 }, { "epoch": 12.43, "learning_rate": 4.378904368516117e-05, "loss": 2.2983, "step": 4293000 }, { "epoch": 12.43, "learning_rate": 4.37883200375139e-05, "loss": 2.2925, "step": 4293500 }, { "epoch": 12.43, "learning_rate": 4.378759638986662e-05, "loss": 2.2872, "step": 4294000 }, { "epoch": 12.43, "learning_rate": 4.3786872742219345e-05, "loss": 2.2608, "step": 4294500 }, { "epoch": 12.43, "learning_rate": 4.378614909457207e-05, "loss": 2.3221, "step": 4295000 }, { "epoch": 12.43, "learning_rate": 4.378542544692479e-05, "loss": 2.2951, "step": 4295500 }, { "epoch": 12.44, "learning_rate": 4.378470179927751e-05, "loss": 2.2951, "step": 4296000 }, { "epoch": 12.44, "learning_rate": 4.3783978151630234e-05, "loss": 2.2808, "step": 4296500 }, { "epoch": 12.44, "learning_rate": 4.3783254503982956e-05, "loss": 2.2651, "step": 4297000 }, { "epoch": 12.44, "learning_rate": 4.378253085633568e-05, "loss": 2.2701, "step": 4297500 }, { "epoch": 12.44, "learning_rate": 4.378180720868841e-05, "loss": 2.2589, "step": 4298000 }, { "epoch": 12.44, "learning_rate": 4.378108356104113e-05, "loss": 2.3159, "step": 4298500 }, { "epoch": 12.44, "learning_rate": 4.378035991339385e-05, "loss": 2.2782, "step": 4299000 }, { "epoch": 12.45, "learning_rate": 4.3779636265746574e-05, "loss": 2.3126, "step": 4299500 }, { "epoch": 12.45, "learning_rate": 4.3778912618099296e-05, "loss": 2.2858, "step": 4300000 }, { "epoch": 12.45, "learning_rate": 4.377819041774732e-05, "loss": 2.2502, "step": 4300500 }, { "epoch": 12.45, "learning_rate": 4.377746677010004e-05, "loss": 2.272, "step": 4301000 }, { "epoch": 12.45, "learning_rate": 4.377674312245276e-05, "loss": 2.2904, "step": 4301500 }, { "epoch": 12.45, "learning_rate": 4.3776019474805485e-05, "loss": 2.2798, "step": 4302000 }, { "epoch": 12.45, "learning_rate": 4.37752972744535e-05, "loss": 2.2648, "step": 4302500 }, { "epoch": 12.46, "learning_rate": 4.377457362680622e-05, "loss": 2.3037, "step": 4303000 }, { "epoch": 12.46, "learning_rate": 4.3773849979158945e-05, "loss": 2.2826, "step": 4303500 }, { "epoch": 12.46, "learning_rate": 4.3773126331511674e-05, "loss": 2.2888, "step": 4304000 }, { "epoch": 12.46, "learning_rate": 4.377240413115969e-05, "loss": 2.291, "step": 4304500 }, { "epoch": 12.46, "learning_rate": 4.377168048351241e-05, "loss": 2.2989, "step": 4305000 }, { "epoch": 12.46, "learning_rate": 4.377095683586514e-05, "loss": 2.3125, "step": 4305500 }, { "epoch": 12.46, "learning_rate": 4.377023318821786e-05, "loss": 2.2893, "step": 4306000 }, { "epoch": 12.47, "learning_rate": 4.376951098786588e-05, "loss": 2.2844, "step": 4306500 }, { "epoch": 12.47, "learning_rate": 4.37687873402186e-05, "loss": 2.281, "step": 4307000 }, { "epoch": 12.47, "learning_rate": 4.376806369257132e-05, "loss": 2.2933, "step": 4307500 }, { "epoch": 12.47, "learning_rate": 4.376734004492405e-05, "loss": 2.2864, "step": 4308000 }, { "epoch": 12.47, "learning_rate": 4.3766616397276774e-05, "loss": 2.2789, "step": 4308500 }, { "epoch": 12.47, "learning_rate": 4.37658927496295e-05, "loss": 2.3079, "step": 4309000 }, { "epoch": 12.47, "learning_rate": 4.376516910198222e-05, "loss": 2.2919, "step": 4309500 }, { "epoch": 12.48, "learning_rate": 4.376444545433494e-05, "loss": 2.2878, "step": 4310000 }, { "epoch": 12.48, "learning_rate": 4.376372180668766e-05, "loss": 2.2617, "step": 4310500 }, { "epoch": 12.48, "learning_rate": 4.376299960633568e-05, "loss": 2.2949, "step": 4311000 }, { "epoch": 12.48, "learning_rate": 4.37622759586884e-05, "loss": 2.2948, "step": 4311500 }, { "epoch": 12.48, "learning_rate": 4.376155231104112e-05, "loss": 2.3232, "step": 4312000 }, { "epoch": 12.48, "learning_rate": 4.376082866339385e-05, "loss": 2.2472, "step": 4312500 }, { "epoch": 12.48, "learning_rate": 4.3760106463041875e-05, "loss": 2.3035, "step": 4313000 }, { "epoch": 12.49, "learning_rate": 4.375938426268989e-05, "loss": 2.2912, "step": 4313500 }, { "epoch": 12.49, "learning_rate": 4.375866061504261e-05, "loss": 2.2843, "step": 4314000 }, { "epoch": 12.49, "learning_rate": 4.3757936967395335e-05, "loss": 2.2973, "step": 4314500 }, { "epoch": 12.49, "learning_rate": 4.375721331974806e-05, "loss": 2.2993, "step": 4315000 }, { "epoch": 12.49, "learning_rate": 4.375648967210078e-05, "loss": 2.2901, "step": 4315500 }, { "epoch": 12.49, "learning_rate": 4.37557660244535e-05, "loss": 2.2945, "step": 4316000 }, { "epoch": 12.49, "learning_rate": 4.3755043824101524e-05, "loss": 2.3334, "step": 4316500 }, { "epoch": 12.5, "learning_rate": 4.375432162374954e-05, "loss": 2.2781, "step": 4317000 }, { "epoch": 12.5, "learning_rate": 4.375359797610226e-05, "loss": 2.2797, "step": 4317500 }, { "epoch": 12.5, "learning_rate": 4.3752874328454984e-05, "loss": 2.3117, "step": 4318000 }, { "epoch": 12.5, "learning_rate": 4.3752150680807706e-05, "loss": 2.3002, "step": 4318500 }, { "epoch": 12.5, "learning_rate": 4.375142703316043e-05, "loss": 2.3118, "step": 4319000 }, { "epoch": 12.5, "learning_rate": 4.375070338551315e-05, "loss": 2.2872, "step": 4319500 }, { "epoch": 12.5, "learning_rate": 4.374997973786587e-05, "loss": 2.2947, "step": 4320000 }, { "epoch": 12.51, "learning_rate": 4.37492560902186e-05, "loss": 2.3056, "step": 4320500 }, { "epoch": 12.51, "learning_rate": 4.3748532442571324e-05, "loss": 2.2945, "step": 4321000 }, { "epoch": 12.51, "learning_rate": 4.374780879492405e-05, "loss": 2.3089, "step": 4321500 }, { "epoch": 12.51, "learning_rate": 4.3747085147276775e-05, "loss": 2.2785, "step": 4322000 }, { "epoch": 12.51, "learning_rate": 4.37463614996295e-05, "loss": 2.293, "step": 4322500 }, { "epoch": 12.51, "learning_rate": 4.374563929927751e-05, "loss": 2.2935, "step": 4323000 }, { "epoch": 12.51, "learning_rate": 4.3744915651630235e-05, "loss": 2.309, "step": 4323500 }, { "epoch": 12.52, "learning_rate": 4.374419200398296e-05, "loss": 2.2773, "step": 4324000 }, { "epoch": 12.52, "learning_rate": 4.374346835633568e-05, "loss": 2.2956, "step": 4324500 }, { "epoch": 12.52, "learning_rate": 4.37427447086884e-05, "loss": 2.2785, "step": 4325000 }, { "epoch": 12.52, "learning_rate": 4.3742022508336424e-05, "loss": 2.2899, "step": 4325500 }, { "epoch": 12.52, "learning_rate": 4.3741298860689146e-05, "loss": 2.3016, "step": 4326000 }, { "epoch": 12.52, "learning_rate": 4.374057521304187e-05, "loss": 2.2642, "step": 4326500 }, { "epoch": 12.52, "learning_rate": 4.373985156539459e-05, "loss": 2.2864, "step": 4327000 }, { "epoch": 12.53, "learning_rate": 4.373912791774731e-05, "loss": 2.2883, "step": 4327500 }, { "epoch": 12.53, "learning_rate": 4.373840571739533e-05, "loss": 2.2712, "step": 4328000 }, { "epoch": 12.53, "learning_rate": 4.373768206974806e-05, "loss": 2.2953, "step": 4328500 }, { "epoch": 12.53, "learning_rate": 4.373695842210078e-05, "loss": 2.2799, "step": 4329000 }, { "epoch": 12.53, "learning_rate": 4.37362362217488e-05, "loss": 2.275, "step": 4329500 }, { "epoch": 12.53, "learning_rate": 4.3735512574101525e-05, "loss": 2.2959, "step": 4330000 }, { "epoch": 12.54, "learning_rate": 4.373478892645425e-05, "loss": 2.2691, "step": 4330500 }, { "epoch": 12.54, "learning_rate": 4.373406672610226e-05, "loss": 2.2796, "step": 4331000 }, { "epoch": 12.54, "learning_rate": 4.3733343078454985e-05, "loss": 2.3167, "step": 4331500 }, { "epoch": 12.54, "learning_rate": 4.373261943080771e-05, "loss": 2.3108, "step": 4332000 }, { "epoch": 12.54, "learning_rate": 4.373189578316043e-05, "loss": 2.2748, "step": 4332500 }, { "epoch": 12.54, "learning_rate": 4.373117213551315e-05, "loss": 2.305, "step": 4333000 }, { "epoch": 12.54, "learning_rate": 4.373044848786588e-05, "loss": 2.2748, "step": 4333500 }, { "epoch": 12.55, "learning_rate": 4.37297248402186e-05, "loss": 2.2756, "step": 4334000 }, { "epoch": 12.55, "learning_rate": 4.3729001192571325e-05, "loss": 2.3068, "step": 4334500 }, { "epoch": 12.55, "learning_rate": 4.372827754492405e-05, "loss": 2.2863, "step": 4335000 }, { "epoch": 12.55, "learning_rate": 4.372755534457206e-05, "loss": 2.278, "step": 4335500 }, { "epoch": 12.55, "learning_rate": 4.372683169692479e-05, "loss": 2.2861, "step": 4336000 }, { "epoch": 12.55, "learning_rate": 4.3726108049277514e-05, "loss": 2.3018, "step": 4336500 }, { "epoch": 12.55, "learning_rate": 4.3725384401630236e-05, "loss": 2.2777, "step": 4337000 }, { "epoch": 12.56, "learning_rate": 4.372466075398296e-05, "loss": 2.2864, "step": 4337500 }, { "epoch": 12.56, "learning_rate": 4.372393710633568e-05, "loss": 2.2723, "step": 4338000 }, { "epoch": 12.56, "learning_rate": 4.37232134586884e-05, "loss": 2.2809, "step": 4338500 }, { "epoch": 12.56, "learning_rate": 4.3722491258336425e-05, "loss": 2.301, "step": 4339000 }, { "epoch": 12.56, "learning_rate": 4.372176761068915e-05, "loss": 2.2871, "step": 4339500 }, { "epoch": 12.56, "learning_rate": 4.372104396304187e-05, "loss": 2.2922, "step": 4340000 }, { "epoch": 12.56, "learning_rate": 4.372032031539459e-05, "loss": 2.2903, "step": 4340500 }, { "epoch": 12.57, "learning_rate": 4.3719596667747314e-05, "loss": 2.2703, "step": 4341000 }, { "epoch": 12.57, "learning_rate": 4.3718873020100036e-05, "loss": 2.3063, "step": 4341500 }, { "epoch": 12.57, "learning_rate": 4.371814937245276e-05, "loss": 2.2859, "step": 4342000 }, { "epoch": 12.57, "learning_rate": 4.371742717210078e-05, "loss": 2.2946, "step": 4342500 }, { "epoch": 12.57, "learning_rate": 4.37167035244535e-05, "loss": 2.294, "step": 4343000 }, { "epoch": 12.57, "learning_rate": 4.371597987680623e-05, "loss": 2.288, "step": 4343500 }, { "epoch": 12.57, "learning_rate": 4.3715256229158954e-05, "loss": 2.2939, "step": 4344000 }, { "epoch": 12.58, "learning_rate": 4.3714532581511676e-05, "loss": 2.3053, "step": 4344500 }, { "epoch": 12.58, "learning_rate": 4.37138089338644e-05, "loss": 2.2817, "step": 4345000 }, { "epoch": 12.58, "learning_rate": 4.3713086733512414e-05, "loss": 2.2955, "step": 4345500 }, { "epoch": 12.58, "learning_rate": 4.3712363085865137e-05, "loss": 2.2824, "step": 4346000 }, { "epoch": 12.58, "learning_rate": 4.371163943821786e-05, "loss": 2.3114, "step": 4346500 }, { "epoch": 12.58, "learning_rate": 4.371091579057058e-05, "loss": 2.3125, "step": 4347000 }, { "epoch": 12.58, "learning_rate": 4.37101921429233e-05, "loss": 2.2592, "step": 4347500 }, { "epoch": 12.59, "learning_rate": 4.370946849527603e-05, "loss": 2.2719, "step": 4348000 }, { "epoch": 12.59, "learning_rate": 4.3708744847628754e-05, "loss": 2.3107, "step": 4348500 }, { "epoch": 12.59, "learning_rate": 4.370802119998148e-05, "loss": 2.3019, "step": 4349000 }, { "epoch": 12.59, "learning_rate": 4.37072975523342e-05, "loss": 2.2855, "step": 4349500 }, { "epoch": 12.59, "learning_rate": 4.370657390468692e-05, "loss": 2.2934, "step": 4350000 }, { "epoch": 12.59, "learning_rate": 4.3705851704334943e-05, "loss": 2.2843, "step": 4350500 }, { "epoch": 12.59, "learning_rate": 4.3705128056687666e-05, "loss": 2.2804, "step": 4351000 }, { "epoch": 12.6, "learning_rate": 4.370440440904039e-05, "loss": 2.2944, "step": 4351500 }, { "epoch": 12.6, "learning_rate": 4.370368076139311e-05, "loss": 2.2821, "step": 4352000 }, { "epoch": 12.6, "learning_rate": 4.370295711374583e-05, "loss": 2.2736, "step": 4352500 }, { "epoch": 12.6, "learning_rate": 4.3702234913393855e-05, "loss": 2.3085, "step": 4353000 }, { "epoch": 12.6, "learning_rate": 4.370151126574658e-05, "loss": 2.2868, "step": 4353500 }, { "epoch": 12.6, "learning_rate": 4.37007876180993e-05, "loss": 2.2909, "step": 4354000 }, { "epoch": 12.6, "learning_rate": 4.370006397045202e-05, "loss": 2.276, "step": 4354500 }, { "epoch": 12.61, "learning_rate": 4.3699340322804744e-05, "loss": 2.277, "step": 4355000 }, { "epoch": 12.61, "learning_rate": 4.3698616675157466e-05, "loss": 2.2799, "step": 4355500 }, { "epoch": 12.61, "learning_rate": 4.369789302751019e-05, "loss": 2.2776, "step": 4356000 }, { "epoch": 12.61, "learning_rate": 4.369716937986291e-05, "loss": 2.2956, "step": 4356500 }, { "epoch": 12.61, "learning_rate": 4.369644717951093e-05, "loss": 2.3011, "step": 4357000 }, { "epoch": 12.61, "learning_rate": 4.3695723531863655e-05, "loss": 2.286, "step": 4357500 }, { "epoch": 12.61, "learning_rate": 4.3694999884216384e-05, "loss": 2.2812, "step": 4358000 }, { "epoch": 12.62, "learning_rate": 4.36942776838644e-05, "loss": 2.2878, "step": 4358500 }, { "epoch": 12.62, "learning_rate": 4.369355403621712e-05, "loss": 2.283, "step": 4359000 }, { "epoch": 12.62, "learning_rate": 4.3692830388569844e-05, "loss": 2.2832, "step": 4359500 }, { "epoch": 12.62, "learning_rate": 4.3692106740922566e-05, "loss": 2.3103, "step": 4360000 }, { "epoch": 12.62, "learning_rate": 4.369138309327529e-05, "loss": 2.2841, "step": 4360500 }, { "epoch": 12.62, "learning_rate": 4.369066089292331e-05, "loss": 2.282, "step": 4361000 }, { "epoch": 12.62, "learning_rate": 4.368993724527603e-05, "loss": 2.2902, "step": 4361500 }, { "epoch": 12.63, "learning_rate": 4.3689213597628755e-05, "loss": 2.2707, "step": 4362000 }, { "epoch": 12.63, "learning_rate": 4.368848994998148e-05, "loss": 2.2999, "step": 4362500 }, { "epoch": 12.63, "learning_rate": 4.36877663023342e-05, "loss": 2.2977, "step": 4363000 }, { "epoch": 12.63, "learning_rate": 4.368704265468692e-05, "loss": 2.296, "step": 4363500 }, { "epoch": 12.63, "learning_rate": 4.3686319007039644e-05, "loss": 2.2952, "step": 4364000 }, { "epoch": 12.63, "learning_rate": 4.3685595359392366e-05, "loss": 2.3198, "step": 4364500 }, { "epoch": 12.63, "learning_rate": 4.3684871711745095e-05, "loss": 2.304, "step": 4365000 }, { "epoch": 12.64, "learning_rate": 4.368414806409782e-05, "loss": 2.2931, "step": 4365500 }, { "epoch": 12.64, "learning_rate": 4.368342586374583e-05, "loss": 2.2641, "step": 4366000 }, { "epoch": 12.64, "learning_rate": 4.3682702216098555e-05, "loss": 2.2951, "step": 4366500 }, { "epoch": 12.64, "learning_rate": 4.3681978568451284e-05, "loss": 2.2963, "step": 4367000 }, { "epoch": 12.64, "learning_rate": 4.36812563680993e-05, "loss": 2.3162, "step": 4367500 }, { "epoch": 12.64, "learning_rate": 4.3680534167747316e-05, "loss": 2.2933, "step": 4368000 }, { "epoch": 12.65, "learning_rate": 4.367981052010004e-05, "loss": 2.2692, "step": 4368500 }, { "epoch": 12.65, "learning_rate": 4.367908687245276e-05, "loss": 2.2864, "step": 4369000 }, { "epoch": 12.65, "learning_rate": 4.367836322480548e-05, "loss": 2.3025, "step": 4369500 }, { "epoch": 12.65, "learning_rate": 4.367763957715821e-05, "loss": 2.2734, "step": 4370000 }, { "epoch": 12.65, "learning_rate": 4.3676915929510934e-05, "loss": 2.2734, "step": 4370500 }, { "epoch": 12.65, "learning_rate": 4.3676192281863656e-05, "loss": 2.2531, "step": 4371000 }, { "epoch": 12.65, "learning_rate": 4.367546863421638e-05, "loss": 2.2737, "step": 4371500 }, { "epoch": 12.66, "learning_rate": 4.36747449865691e-05, "loss": 2.2671, "step": 4372000 }, { "epoch": 12.66, "learning_rate": 4.367402133892182e-05, "loss": 2.3104, "step": 4372500 }, { "epoch": 12.66, "learning_rate": 4.3673299138569845e-05, "loss": 2.2917, "step": 4373000 }, { "epoch": 12.66, "learning_rate": 4.367257549092257e-05, "loss": 2.2974, "step": 4373500 }, { "epoch": 12.66, "learning_rate": 4.367185184327529e-05, "loss": 2.3046, "step": 4374000 }, { "epoch": 12.66, "learning_rate": 4.367112819562801e-05, "loss": 2.282, "step": 4374500 }, { "epoch": 12.66, "learning_rate": 4.3670404547980734e-05, "loss": 2.3, "step": 4375000 }, { "epoch": 12.67, "learning_rate": 4.366968090033346e-05, "loss": 2.3216, "step": 4375500 }, { "epoch": 12.67, "learning_rate": 4.3668957252686185e-05, "loss": 2.265, "step": 4376000 }, { "epoch": 12.67, "learning_rate": 4.36682350523342e-05, "loss": 2.2926, "step": 4376500 }, { "epoch": 12.67, "learning_rate": 4.366751140468692e-05, "loss": 2.2926, "step": 4377000 }, { "epoch": 12.67, "learning_rate": 4.3666787757039645e-05, "loss": 2.2822, "step": 4377500 }, { "epoch": 12.67, "learning_rate": 4.366606410939237e-05, "loss": 2.2709, "step": 4378000 }, { "epoch": 12.67, "learning_rate": 4.366534046174509e-05, "loss": 2.2895, "step": 4378500 }, { "epoch": 12.68, "learning_rate": 4.366461681409781e-05, "loss": 2.2882, "step": 4379000 }, { "epoch": 12.68, "learning_rate": 4.3663894613745834e-05, "loss": 2.3158, "step": 4379500 }, { "epoch": 12.68, "learning_rate": 4.3663170966098556e-05, "loss": 2.2652, "step": 4380000 }, { "epoch": 12.68, "learning_rate": 4.3662447318451285e-05, "loss": 2.2897, "step": 4380500 }, { "epoch": 12.68, "learning_rate": 4.366172367080401e-05, "loss": 2.2783, "step": 4381000 }, { "epoch": 12.68, "learning_rate": 4.366100002315673e-05, "loss": 2.2994, "step": 4381500 }, { "epoch": 12.68, "learning_rate": 4.366027637550945e-05, "loss": 2.3178, "step": 4382000 }, { "epoch": 12.69, "learning_rate": 4.3659552727862174e-05, "loss": 2.3046, "step": 4382500 }, { "epoch": 12.69, "learning_rate": 4.3658829080214896e-05, "loss": 2.2897, "step": 4383000 }, { "epoch": 12.69, "learning_rate": 4.365810543256762e-05, "loss": 2.2789, "step": 4383500 }, { "epoch": 12.69, "learning_rate": 4.365738178492034e-05, "loss": 2.2895, "step": 4384000 }, { "epoch": 12.69, "learning_rate": 4.365665813727306e-05, "loss": 2.2846, "step": 4384500 }, { "epoch": 12.69, "learning_rate": 4.3655935936921085e-05, "loss": 2.2861, "step": 4385000 }, { "epoch": 12.69, "learning_rate": 4.365521228927381e-05, "loss": 2.2909, "step": 4385500 }, { "epoch": 12.7, "learning_rate": 4.365449008892182e-05, "loss": 2.2812, "step": 4386000 }, { "epoch": 12.7, "learning_rate": 4.3653766441274545e-05, "loss": 2.2979, "step": 4386500 }, { "epoch": 12.7, "learning_rate": 4.365304279362727e-05, "loss": 2.3065, "step": 4387000 }, { "epoch": 12.7, "learning_rate": 4.365231914598e-05, "loss": 2.303, "step": 4387500 }, { "epoch": 12.7, "learning_rate": 4.365159549833272e-05, "loss": 2.2735, "step": 4388000 }, { "epoch": 12.7, "learning_rate": 4.365087185068544e-05, "loss": 2.276, "step": 4388500 }, { "epoch": 12.7, "learning_rate": 4.3650148203038163e-05, "loss": 2.3068, "step": 4389000 }, { "epoch": 12.71, "learning_rate": 4.3649426002686186e-05, "loss": 2.3006, "step": 4389500 }, { "epoch": 12.71, "learning_rate": 4.364870235503891e-05, "loss": 2.2823, "step": 4390000 }, { "epoch": 12.71, "learning_rate": 4.364797870739163e-05, "loss": 2.2985, "step": 4390500 }, { "epoch": 12.71, "learning_rate": 4.364725505974435e-05, "loss": 2.2829, "step": 4391000 }, { "epoch": 12.71, "learning_rate": 4.364653285939237e-05, "loss": 2.2631, "step": 4391500 }, { "epoch": 12.71, "learning_rate": 4.364580921174509e-05, "loss": 2.2863, "step": 4392000 }, { "epoch": 12.71, "learning_rate": 4.364508556409781e-05, "loss": 2.3034, "step": 4392500 }, { "epoch": 12.72, "learning_rate": 4.3644361916450535e-05, "loss": 2.2801, "step": 4393000 }, { "epoch": 12.72, "learning_rate": 4.3643638268803264e-05, "loss": 2.3062, "step": 4393500 }, { "epoch": 12.72, "learning_rate": 4.3642914621155986e-05, "loss": 2.3062, "step": 4394000 }, { "epoch": 12.72, "learning_rate": 4.364219097350871e-05, "loss": 2.2737, "step": 4394500 }, { "epoch": 12.72, "learning_rate": 4.364146732586144e-05, "loss": 2.3031, "step": 4395000 }, { "epoch": 12.72, "learning_rate": 4.364074367821416e-05, "loss": 2.3045, "step": 4395500 }, { "epoch": 12.72, "learning_rate": 4.364002292515747e-05, "loss": 2.2611, "step": 4396000 }, { "epoch": 12.73, "learning_rate": 4.363929927751019e-05, "loss": 2.3004, "step": 4396500 }, { "epoch": 12.73, "learning_rate": 4.363857562986291e-05, "loss": 2.2865, "step": 4397000 }, { "epoch": 12.73, "learning_rate": 4.363785198221564e-05, "loss": 2.309, "step": 4397500 }, { "epoch": 12.73, "learning_rate": 4.3637128334568364e-05, "loss": 2.2951, "step": 4398000 }, { "epoch": 12.73, "learning_rate": 4.3636404686921086e-05, "loss": 2.2856, "step": 4398500 }, { "epoch": 12.73, "learning_rate": 4.363568103927381e-05, "loss": 2.2962, "step": 4399000 }, { "epoch": 12.73, "learning_rate": 4.363495739162653e-05, "loss": 2.2809, "step": 4399500 }, { "epoch": 12.74, "learning_rate": 4.3634235191274546e-05, "loss": 2.2879, "step": 4400000 }, { "epoch": 12.74, "learning_rate": 4.363351154362727e-05, "loss": 2.2759, "step": 4400500 }, { "epoch": 12.74, "learning_rate": 4.363278789597999e-05, "loss": 2.2959, "step": 4401000 }, { "epoch": 12.74, "learning_rate": 4.363206569562801e-05, "loss": 2.3067, "step": 4401500 }, { "epoch": 12.74, "learning_rate": 4.3631342047980735e-05, "loss": 2.2855, "step": 4402000 }, { "epoch": 12.74, "learning_rate": 4.363061840033346e-05, "loss": 2.2979, "step": 4402500 }, { "epoch": 12.74, "learning_rate": 4.3629894752686187e-05, "loss": 2.2814, "step": 4403000 }, { "epoch": 12.75, "learning_rate": 4.362917110503891e-05, "loss": 2.2698, "step": 4403500 }, { "epoch": 12.75, "learning_rate": 4.362844745739163e-05, "loss": 2.2975, "step": 4404000 }, { "epoch": 12.75, "learning_rate": 4.3627725257039647e-05, "loss": 2.3056, "step": 4404500 }, { "epoch": 12.75, "learning_rate": 4.362700160939237e-05, "loss": 2.2831, "step": 4405000 }, { "epoch": 12.75, "learning_rate": 4.362627796174509e-05, "loss": 2.3016, "step": 4405500 }, { "epoch": 12.75, "learning_rate": 4.362555431409781e-05, "loss": 2.2749, "step": 4406000 }, { "epoch": 12.76, "learning_rate": 4.362483066645054e-05, "loss": 2.2735, "step": 4406500 }, { "epoch": 12.76, "learning_rate": 4.3624107018803265e-05, "loss": 2.2926, "step": 4407000 }, { "epoch": 12.76, "learning_rate": 4.362338481845128e-05, "loss": 2.3031, "step": 4407500 }, { "epoch": 12.76, "learning_rate": 4.3622661170804e-05, "loss": 2.2725, "step": 4408000 }, { "epoch": 12.76, "learning_rate": 4.3621937523156725e-05, "loss": 2.3019, "step": 4408500 }, { "epoch": 12.76, "learning_rate": 4.362121387550945e-05, "loss": 2.3157, "step": 4409000 }, { "epoch": 12.76, "learning_rate": 4.362049022786217e-05, "loss": 2.2883, "step": 4409500 }, { "epoch": 12.77, "learning_rate": 4.36197665802149e-05, "loss": 2.3094, "step": 4410000 }, { "epoch": 12.77, "learning_rate": 4.361904293256762e-05, "loss": 2.2848, "step": 4410500 }, { "epoch": 12.77, "learning_rate": 4.361831928492034e-05, "loss": 2.2813, "step": 4411000 }, { "epoch": 12.77, "learning_rate": 4.3617597084568365e-05, "loss": 2.2721, "step": 4411500 }, { "epoch": 12.77, "learning_rate": 4.361687343692109e-05, "loss": 2.2934, "step": 4412000 }, { "epoch": 12.77, "learning_rate": 4.36161512365691e-05, "loss": 2.3088, "step": 4412500 }, { "epoch": 12.77, "learning_rate": 4.3615427588921825e-05, "loss": 2.2838, "step": 4413000 }, { "epoch": 12.78, "learning_rate": 4.361470394127455e-05, "loss": 2.2762, "step": 4413500 }, { "epoch": 12.78, "learning_rate": 4.361398174092256e-05, "loss": 2.2809, "step": 4414000 }, { "epoch": 12.78, "learning_rate": 4.361325809327529e-05, "loss": 2.2775, "step": 4414500 }, { "epoch": 12.78, "learning_rate": 4.3612534445628014e-05, "loss": 2.2707, "step": 4415000 }, { "epoch": 12.78, "learning_rate": 4.3611810797980736e-05, "loss": 2.3035, "step": 4415500 }, { "epoch": 12.78, "learning_rate": 4.361108715033346e-05, "loss": 2.2763, "step": 4416000 }, { "epoch": 12.78, "learning_rate": 4.361036350268618e-05, "loss": 2.3049, "step": 4416500 }, { "epoch": 12.79, "learning_rate": 4.36096398550389e-05, "loss": 2.3143, "step": 4417000 }, { "epoch": 12.79, "learning_rate": 4.360891620739163e-05, "loss": 2.2845, "step": 4417500 }, { "epoch": 12.79, "learning_rate": 4.3608192559744354e-05, "loss": 2.3283, "step": 4418000 }, { "epoch": 12.79, "learning_rate": 4.3607468912097076e-05, "loss": 2.2652, "step": 4418500 }, { "epoch": 12.79, "learning_rate": 4.360674815904039e-05, "loss": 2.2945, "step": 4419000 }, { "epoch": 12.79, "learning_rate": 4.3606024511393114e-05, "loss": 2.2833, "step": 4419500 }, { "epoch": 12.79, "learning_rate": 4.3605300863745836e-05, "loss": 2.302, "step": 4420000 }, { "epoch": 12.8, "learning_rate": 4.360457721609856e-05, "loss": 2.2676, "step": 4420500 }, { "epoch": 12.8, "learning_rate": 4.360385356845128e-05, "loss": 2.2876, "step": 4421000 }, { "epoch": 12.8, "learning_rate": 4.3603129920804e-05, "loss": 2.2745, "step": 4421500 }, { "epoch": 12.8, "learning_rate": 4.3602406273156725e-05, "loss": 2.3025, "step": 4422000 }, { "epoch": 12.8, "learning_rate": 4.360168262550945e-05, "loss": 2.2954, "step": 4422500 }, { "epoch": 12.8, "learning_rate": 4.360095897786217e-05, "loss": 2.284, "step": 4423000 }, { "epoch": 12.8, "learning_rate": 4.360023677751019e-05, "loss": 2.2739, "step": 4423500 }, { "epoch": 12.81, "learning_rate": 4.3599513129862914e-05, "loss": 2.2861, "step": 4424000 }, { "epoch": 12.81, "learning_rate": 4.3598789482215637e-05, "loss": 2.2708, "step": 4424500 }, { "epoch": 12.81, "learning_rate": 4.3598065834568366e-05, "loss": 2.2727, "step": 4425000 }, { "epoch": 12.81, "learning_rate": 4.359734363421638e-05, "loss": 2.2873, "step": 4425500 }, { "epoch": 12.81, "learning_rate": 4.359662288115969e-05, "loss": 2.3176, "step": 4426000 }, { "epoch": 12.81, "learning_rate": 4.359589923351242e-05, "loss": 2.286, "step": 4426500 }, { "epoch": 12.81, "learning_rate": 4.359517558586514e-05, "loss": 2.3087, "step": 4427000 }, { "epoch": 12.82, "learning_rate": 4.3594451938217864e-05, "loss": 2.2774, "step": 4427500 }, { "epoch": 12.82, "learning_rate": 4.3593728290570586e-05, "loss": 2.2544, "step": 4428000 }, { "epoch": 12.82, "learning_rate": 4.359300464292331e-05, "loss": 2.291, "step": 4428500 }, { "epoch": 12.82, "learning_rate": 4.359228099527603e-05, "loss": 2.2947, "step": 4429000 }, { "epoch": 12.82, "learning_rate": 4.359155734762875e-05, "loss": 2.3145, "step": 4429500 }, { "epoch": 12.82, "learning_rate": 4.3590833699981475e-05, "loss": 2.3209, "step": 4430000 }, { "epoch": 12.82, "learning_rate": 4.35901100523342e-05, "loss": 2.2688, "step": 4430500 }, { "epoch": 12.83, "learning_rate": 4.358938640468692e-05, "loss": 2.2786, "step": 4431000 }, { "epoch": 12.83, "learning_rate": 4.358866275703964e-05, "loss": 2.2736, "step": 4431500 }, { "epoch": 12.83, "learning_rate": 4.358793910939237e-05, "loss": 2.2968, "step": 4432000 }, { "epoch": 12.83, "learning_rate": 4.358721546174509e-05, "loss": 2.282, "step": 4432500 }, { "epoch": 12.83, "learning_rate": 4.358649181409782e-05, "loss": 2.2922, "step": 4433000 }, { "epoch": 12.83, "learning_rate": 4.3585768166450544e-05, "loss": 2.3045, "step": 4433500 }, { "epoch": 12.83, "learning_rate": 4.3585044518803266e-05, "loss": 2.2791, "step": 4434000 }, { "epoch": 12.84, "learning_rate": 4.358432231845128e-05, "loss": 2.307, "step": 4434500 }, { "epoch": 12.84, "learning_rate": 4.3583598670804004e-05, "loss": 2.2716, "step": 4435000 }, { "epoch": 12.84, "learning_rate": 4.3582875023156726e-05, "loss": 2.2893, "step": 4435500 }, { "epoch": 12.84, "learning_rate": 4.358215427010004e-05, "loss": 2.3072, "step": 4436000 }, { "epoch": 12.84, "learning_rate": 4.3581430622452764e-05, "loss": 2.2726, "step": 4436500 }, { "epoch": 12.84, "learning_rate": 4.3580706974805486e-05, "loss": 2.2981, "step": 4437000 }, { "epoch": 12.84, "learning_rate": 4.357998332715821e-05, "loss": 2.2811, "step": 4437500 }, { "epoch": 12.85, "learning_rate": 4.3579261126806224e-05, "loss": 2.2717, "step": 4438000 }, { "epoch": 12.85, "learning_rate": 4.3578537479158946e-05, "loss": 2.2845, "step": 4438500 }, { "epoch": 12.85, "learning_rate": 4.357781383151167e-05, "loss": 2.2851, "step": 4439000 }, { "epoch": 12.85, "learning_rate": 4.357709018386439e-05, "loss": 2.2978, "step": 4439500 }, { "epoch": 12.85, "learning_rate": 4.357636653621712e-05, "loss": 2.2972, "step": 4440000 }, { "epoch": 12.85, "learning_rate": 4.357564288856984e-05, "loss": 2.2827, "step": 4440500 }, { "epoch": 12.85, "learning_rate": 4.357491924092257e-05, "loss": 2.287, "step": 4441000 }, { "epoch": 12.86, "learning_rate": 4.3574197040570587e-05, "loss": 2.2737, "step": 4441500 }, { "epoch": 12.86, "learning_rate": 4.357347339292331e-05, "loss": 2.3074, "step": 4442000 }, { "epoch": 12.86, "learning_rate": 4.357274974527603e-05, "loss": 2.2981, "step": 4442500 }, { "epoch": 12.86, "learning_rate": 4.357202609762875e-05, "loss": 2.3185, "step": 4443000 }, { "epoch": 12.86, "learning_rate": 4.3571302449981475e-05, "loss": 2.2951, "step": 4443500 }, { "epoch": 12.86, "learning_rate": 4.35705788023342e-05, "loss": 2.2845, "step": 4444000 }, { "epoch": 12.87, "learning_rate": 4.356985515468692e-05, "loss": 2.3012, "step": 4444500 }, { "epoch": 12.87, "learning_rate": 4.356913150703964e-05, "loss": 2.2746, "step": 4445000 }, { "epoch": 12.87, "learning_rate": 4.356840785939237e-05, "loss": 2.2774, "step": 4445500 }, { "epoch": 12.87, "learning_rate": 4.3567684211745093e-05, "loss": 2.2789, "step": 4446000 }, { "epoch": 12.87, "learning_rate": 4.3566960564097816e-05, "loss": 2.2787, "step": 4446500 }, { "epoch": 12.87, "learning_rate": 4.356623836374583e-05, "loss": 2.2976, "step": 4447000 }, { "epoch": 12.87, "learning_rate": 4.3565514716098553e-05, "loss": 2.3033, "step": 4447500 }, { "epoch": 12.88, "learning_rate": 4.3564792515746576e-05, "loss": 2.2752, "step": 4448000 }, { "epoch": 12.88, "learning_rate": 4.35640688680993e-05, "loss": 2.2885, "step": 4448500 }, { "epoch": 12.88, "learning_rate": 4.356334522045202e-05, "loss": 2.3148, "step": 4449000 }, { "epoch": 12.88, "learning_rate": 4.356262157280475e-05, "loss": 2.3076, "step": 4449500 }, { "epoch": 12.88, "learning_rate": 4.356189792515747e-05, "loss": 2.2955, "step": 4450000 }, { "epoch": 12.88, "learning_rate": 4.3561174277510194e-05, "loss": 2.278, "step": 4450500 }, { "epoch": 12.88, "learning_rate": 4.3560450629862916e-05, "loss": 2.2934, "step": 4451000 }, { "epoch": 12.89, "learning_rate": 4.355972842951093e-05, "loss": 2.295, "step": 4451500 }, { "epoch": 12.89, "learning_rate": 4.3559004781863654e-05, "loss": 2.2765, "step": 4452000 }, { "epoch": 12.89, "learning_rate": 4.3558281134216376e-05, "loss": 2.3007, "step": 4452500 }, { "epoch": 12.89, "learning_rate": 4.35575574865691e-05, "loss": 2.3, "step": 4453000 }, { "epoch": 12.89, "learning_rate": 4.355683383892182e-05, "loss": 2.2803, "step": 4453500 }, { "epoch": 12.89, "learning_rate": 4.355611019127455e-05, "loss": 2.293, "step": 4454000 }, { "epoch": 12.89, "learning_rate": 4.355538654362727e-05, "loss": 2.3089, "step": 4454500 }, { "epoch": 12.9, "learning_rate": 4.355466289598e-05, "loss": 2.2824, "step": 4455000 }, { "epoch": 12.9, "learning_rate": 4.355393924833272e-05, "loss": 2.28, "step": 4455500 }, { "epoch": 12.9, "learning_rate": 4.3553215600685445e-05, "loss": 2.3017, "step": 4456000 }, { "epoch": 12.9, "learning_rate": 4.355249195303817e-05, "loss": 2.2649, "step": 4456500 }, { "epoch": 12.9, "learning_rate": 4.355176830539089e-05, "loss": 2.2809, "step": 4457000 }, { "epoch": 12.9, "learning_rate": 4.355104465774361e-05, "loss": 2.2821, "step": 4457500 }, { "epoch": 12.9, "learning_rate": 4.3550321010096334e-05, "loss": 2.2712, "step": 4458000 }, { "epoch": 12.91, "learning_rate": 4.3549597362449056e-05, "loss": 2.2864, "step": 4458500 }, { "epoch": 12.91, "learning_rate": 4.354887371480178e-05, "loss": 2.277, "step": 4459000 }, { "epoch": 12.91, "learning_rate": 4.35481500671545e-05, "loss": 2.298, "step": 4459500 }, { "epoch": 12.91, "learning_rate": 4.354742641950722e-05, "loss": 2.3088, "step": 4460000 }, { "epoch": 12.91, "learning_rate": 4.3546702771859945e-05, "loss": 2.2784, "step": 4460500 }, { "epoch": 12.91, "learning_rate": 4.354598057150797e-05, "loss": 2.298, "step": 4461000 }, { "epoch": 12.91, "learning_rate": 4.354525692386069e-05, "loss": 2.3199, "step": 4461500 }, { "epoch": 12.92, "learning_rate": 4.354453327621341e-05, "loss": 2.3146, "step": 4462000 }, { "epoch": 12.92, "learning_rate": 4.354380962856614e-05, "loss": 2.2779, "step": 4462500 }, { "epoch": 12.92, "learning_rate": 4.354308598091886e-05, "loss": 2.2835, "step": 4463000 }, { "epoch": 12.92, "learning_rate": 4.354236378056688e-05, "loss": 2.2597, "step": 4463500 }, { "epoch": 12.92, "learning_rate": 4.35416415802149e-05, "loss": 2.2979, "step": 4464000 }, { "epoch": 12.92, "learning_rate": 4.3540917932567623e-05, "loss": 2.3171, "step": 4464500 }, { "epoch": 12.92, "learning_rate": 4.3540194284920346e-05, "loss": 2.2654, "step": 4465000 }, { "epoch": 12.93, "learning_rate": 4.353947063727307e-05, "loss": 2.268, "step": 4465500 }, { "epoch": 12.93, "learning_rate": 4.3538748436921083e-05, "loss": 2.2823, "step": 4466000 }, { "epoch": 12.93, "learning_rate": 4.3538024789273806e-05, "loss": 2.2805, "step": 4466500 }, { "epoch": 12.93, "learning_rate": 4.353730114162653e-05, "loss": 2.287, "step": 4467000 }, { "epoch": 12.93, "learning_rate": 4.353657749397925e-05, "loss": 2.309, "step": 4467500 }, { "epoch": 12.93, "learning_rate": 4.353585384633197e-05, "loss": 2.2904, "step": 4468000 }, { "epoch": 12.93, "learning_rate": 4.3535131645979995e-05, "loss": 2.3005, "step": 4468500 }, { "epoch": 12.94, "learning_rate": 4.353440944562801e-05, "loss": 2.2781, "step": 4469000 }, { "epoch": 12.94, "learning_rate": 4.353368579798073e-05, "loss": 2.2991, "step": 4469500 }, { "epoch": 12.94, "learning_rate": 4.3532962150333455e-05, "loss": 2.3136, "step": 4470000 }, { "epoch": 12.94, "learning_rate": 4.3532238502686184e-05, "loss": 2.2814, "step": 4470500 }, { "epoch": 12.94, "learning_rate": 4.3531514855038906e-05, "loss": 2.3119, "step": 4471000 }, { "epoch": 12.94, "learning_rate": 4.353079120739163e-05, "loss": 2.2725, "step": 4471500 }, { "epoch": 12.94, "learning_rate": 4.353006755974435e-05, "loss": 2.3128, "step": 4472000 }, { "epoch": 12.95, "learning_rate": 4.352934391209707e-05, "loss": 2.282, "step": 4472500 }, { "epoch": 12.95, "learning_rate": 4.35286202644498e-05, "loss": 2.2856, "step": 4473000 }, { "epoch": 12.95, "learning_rate": 4.3527896616802524e-05, "loss": 2.2928, "step": 4473500 }, { "epoch": 12.95, "learning_rate": 4.3527172969155246e-05, "loss": 2.3122, "step": 4474000 }, { "epoch": 12.95, "learning_rate": 4.352645076880326e-05, "loss": 2.3056, "step": 4474500 }, { "epoch": 12.95, "learning_rate": 4.3525727121155984e-05, "loss": 2.2997, "step": 4475000 }, { "epoch": 12.95, "learning_rate": 4.3525003473508706e-05, "loss": 2.279, "step": 4475500 }, { "epoch": 12.96, "learning_rate": 4.352427982586143e-05, "loss": 2.2859, "step": 4476000 }, { "epoch": 12.96, "learning_rate": 4.352355617821415e-05, "loss": 2.3091, "step": 4476500 }, { "epoch": 12.96, "learning_rate": 4.352283253056687e-05, "loss": 2.2498, "step": 4477000 }, { "epoch": 12.96, "learning_rate": 4.35221088829196e-05, "loss": 2.286, "step": 4477500 }, { "epoch": 12.96, "learning_rate": 4.3521385235272324e-05, "loss": 2.3032, "step": 4478000 }, { "epoch": 12.96, "learning_rate": 4.352066158762505e-05, "loss": 2.2769, "step": 4478500 }, { "epoch": 12.96, "learning_rate": 4.351994083456836e-05, "loss": 2.2975, "step": 4479000 }, { "epoch": 12.97, "learning_rate": 4.3519217186921084e-05, "loss": 2.2931, "step": 4479500 }, { "epoch": 12.97, "learning_rate": 4.35184949865691e-05, "loss": 2.2915, "step": 4480000 }, { "epoch": 12.97, "learning_rate": 4.351777133892183e-05, "loss": 2.2989, "step": 4480500 }, { "epoch": 12.97, "learning_rate": 4.351704769127455e-05, "loss": 2.2736, "step": 4481000 }, { "epoch": 12.97, "learning_rate": 4.351632404362727e-05, "loss": 2.3193, "step": 4481500 }, { "epoch": 12.97, "learning_rate": 4.3515600395979996e-05, "loss": 2.2949, "step": 4482000 }, { "epoch": 12.98, "learning_rate": 4.351487674833272e-05, "loss": 2.2814, "step": 4482500 }, { "epoch": 12.98, "learning_rate": 4.351415310068544e-05, "loss": 2.294, "step": 4483000 }, { "epoch": 12.98, "learning_rate": 4.351342945303816e-05, "loss": 2.2778, "step": 4483500 }, { "epoch": 12.98, "learning_rate": 4.3512705805390884e-05, "loss": 2.2795, "step": 4484000 }, { "epoch": 12.98, "learning_rate": 4.351198215774361e-05, "loss": 2.2953, "step": 4484500 }, { "epoch": 12.98, "learning_rate": 4.3511258510096336e-05, "loss": 2.2685, "step": 4485000 }, { "epoch": 12.98, "learning_rate": 4.351053486244906e-05, "loss": 2.3144, "step": 4485500 }, { "epoch": 12.99, "learning_rate": 4.350981121480178e-05, "loss": 2.2776, "step": 4486000 }, { "epoch": 12.99, "learning_rate": 4.35090890144498e-05, "loss": 2.2719, "step": 4486500 }, { "epoch": 12.99, "learning_rate": 4.3508365366802525e-05, "loss": 2.2891, "step": 4487000 }, { "epoch": 12.99, "learning_rate": 4.350764171915525e-05, "loss": 2.2724, "step": 4487500 }, { "epoch": 12.99, "learning_rate": 4.350691807150797e-05, "loss": 2.2887, "step": 4488000 }, { "epoch": 12.99, "learning_rate": 4.3506195871155985e-05, "loss": 2.2841, "step": 4488500 }, { "epoch": 12.99, "learning_rate": 4.350547222350871e-05, "loss": 2.288, "step": 4489000 }, { "epoch": 13.0, "learning_rate": 4.350474857586143e-05, "loss": 2.3102, "step": 4489500 }, { "epoch": 13.0, "learning_rate": 4.350402492821415e-05, "loss": 2.2786, "step": 4490000 }, { "epoch": 13.0, "learning_rate": 4.3503302727862174e-05, "loss": 2.3065, "step": 4490500 }, { "epoch": 13.0, "learning_rate": 4.3502579080214896e-05, "loss": 2.2941, "step": 4491000 }, { "epoch": 13.0, "eval_accuracy": 0.6519667819102094, "eval_accuracy_mlm": 0.6147493240537715, "eval_accuracy_nsp": 0.8515396919241223, "eval_loss": 2.285916566848755, "eval_runtime": 330.4294, "eval_samples_per_second": 1320.663, "eval_steps_per_second": 55.028, "step": 4491136 }, { "epoch": 13.0, "learning_rate": 4.350185543256762e-05, "loss": 2.2703, "step": 4491500 }, { "epoch": 13.0, "learning_rate": 4.350113178492034e-05, "loss": 2.263, "step": 4492000 }, { "epoch": 13.0, "learning_rate": 4.350040813727307e-05, "loss": 2.2633, "step": 4492500 }, { "epoch": 13.01, "learning_rate": 4.3499685936921085e-05, "loss": 2.2604, "step": 4493000 }, { "epoch": 13.01, "learning_rate": 4.349896228927381e-05, "loss": 2.2509, "step": 4493500 }, { "epoch": 13.01, "learning_rate": 4.349823864162653e-05, "loss": 2.2627, "step": 4494000 }, { "epoch": 13.01, "learning_rate": 4.349751499397925e-05, "loss": 2.266, "step": 4494500 }, { "epoch": 13.01, "learning_rate": 4.3496792793627274e-05, "loss": 2.293, "step": 4495000 }, { "epoch": 13.01, "learning_rate": 4.3496069145979996e-05, "loss": 2.2931, "step": 4495500 }, { "epoch": 13.01, "learning_rate": 4.349534549833272e-05, "loss": 2.287, "step": 4496000 }, { "epoch": 13.02, "learning_rate": 4.349462185068544e-05, "loss": 2.2158, "step": 4496500 }, { "epoch": 13.02, "learning_rate": 4.349389820303816e-05, "loss": 2.2506, "step": 4497000 }, { "epoch": 13.02, "learning_rate": 4.3493174555390885e-05, "loss": 2.2813, "step": 4497500 }, { "epoch": 13.02, "learning_rate": 4.34924523550389e-05, "loss": 2.2819, "step": 4498000 }, { "epoch": 13.02, "learning_rate": 4.349172870739163e-05, "loss": 2.2681, "step": 4498500 }, { "epoch": 13.02, "learning_rate": 4.349100505974435e-05, "loss": 2.287, "step": 4499000 }, { "epoch": 13.02, "learning_rate": 4.3490281412097074e-05, "loss": 2.2663, "step": 4499500 }, { "epoch": 13.03, "learning_rate": 4.34895577644498e-05, "loss": 2.2696, "step": 4500000 }, { "epoch": 13.03, "learning_rate": 4.3488834116802526e-05, "loss": 2.2868, "step": 4500500 }, { "epoch": 13.03, "learning_rate": 4.348811046915525e-05, "loss": 2.2862, "step": 4501000 }, { "epoch": 13.03, "learning_rate": 4.348738682150797e-05, "loss": 2.2863, "step": 4501500 }, { "epoch": 13.03, "learning_rate": 4.3486664621155986e-05, "loss": 2.237, "step": 4502000 }, { "epoch": 13.03, "learning_rate": 4.3485942420804e-05, "loss": 2.2913, "step": 4502500 }, { "epoch": 13.03, "learning_rate": 4.348521877315673e-05, "loss": 2.2999, "step": 4503000 }, { "epoch": 13.04, "learning_rate": 4.348449512550945e-05, "loss": 2.2576, "step": 4503500 }, { "epoch": 13.04, "learning_rate": 4.3483771477862175e-05, "loss": 2.2668, "step": 4504000 }, { "epoch": 13.04, "learning_rate": 4.34830478302149e-05, "loss": 2.268, "step": 4504500 }, { "epoch": 13.04, "learning_rate": 4.348232418256762e-05, "loss": 2.2798, "step": 4505000 }, { "epoch": 13.04, "learning_rate": 4.348160053492034e-05, "loss": 2.2499, "step": 4505500 }, { "epoch": 13.04, "learning_rate": 4.3480876887273064e-05, "loss": 2.2528, "step": 4506000 }, { "epoch": 13.04, "learning_rate": 4.348015468692108e-05, "loss": 2.2651, "step": 4506500 }, { "epoch": 13.05, "learning_rate": 4.34794310392738e-05, "loss": 2.249, "step": 4507000 }, { "epoch": 13.05, "learning_rate": 4.347870739162653e-05, "loss": 2.2622, "step": 4507500 }, { "epoch": 13.05, "learning_rate": 4.347798374397925e-05, "loss": 2.2708, "step": 4508000 }, { "epoch": 13.05, "learning_rate": 4.347726009633198e-05, "loss": 2.2856, "step": 4508500 }, { "epoch": 13.05, "learning_rate": 4.3476536448684704e-05, "loss": 2.2897, "step": 4509000 }, { "epoch": 13.05, "learning_rate": 4.3475812801037426e-05, "loss": 2.2628, "step": 4509500 }, { "epoch": 13.05, "learning_rate": 4.347508915339015e-05, "loss": 2.2829, "step": 4510000 }, { "epoch": 13.06, "learning_rate": 4.347436550574287e-05, "loss": 2.2595, "step": 4510500 }, { "epoch": 13.06, "learning_rate": 4.347364185809559e-05, "loss": 2.2737, "step": 4511000 }, { "epoch": 13.06, "learning_rate": 4.3472918210448315e-05, "loss": 2.2561, "step": 4511500 }, { "epoch": 13.06, "learning_rate": 4.347219456280104e-05, "loss": 2.2928, "step": 4512000 }, { "epoch": 13.06, "learning_rate": 4.347147236244905e-05, "loss": 2.2584, "step": 4512500 }, { "epoch": 13.06, "learning_rate": 4.347074871480178e-05, "loss": 2.2889, "step": 4513000 }, { "epoch": 13.06, "learning_rate": 4.3470025067154504e-05, "loss": 2.2659, "step": 4513500 }, { "epoch": 13.07, "learning_rate": 4.3469301419507226e-05, "loss": 2.2651, "step": 4514000 }, { "epoch": 13.07, "learning_rate": 4.3468577771859955e-05, "loss": 2.2736, "step": 4514500 }, { "epoch": 13.07, "learning_rate": 4.346785557150797e-05, "loss": 2.2656, "step": 4515000 }, { "epoch": 13.07, "learning_rate": 4.346713192386069e-05, "loss": 2.2545, "step": 4515500 }, { "epoch": 13.07, "learning_rate": 4.3466408276213415e-05, "loss": 2.2689, "step": 4516000 }, { "epoch": 13.07, "learning_rate": 4.346568462856614e-05, "loss": 2.2784, "step": 4516500 }, { "epoch": 13.07, "learning_rate": 4.346496242821416e-05, "loss": 2.2917, "step": 4517000 }, { "epoch": 13.08, "learning_rate": 4.346423878056688e-05, "loss": 2.2672, "step": 4517500 }, { "epoch": 13.08, "learning_rate": 4.3463515132919604e-05, "loss": 2.2658, "step": 4518000 }, { "epoch": 13.08, "learning_rate": 4.3462791485272327e-05, "loss": 2.2881, "step": 4518500 }, { "epoch": 13.08, "learning_rate": 4.346206783762505e-05, "loss": 2.2553, "step": 4519000 }, { "epoch": 13.08, "learning_rate": 4.346134418997777e-05, "loss": 2.2551, "step": 4519500 }, { "epoch": 13.08, "learning_rate": 4.346062054233049e-05, "loss": 2.2726, "step": 4520000 }, { "epoch": 13.08, "learning_rate": 4.3459896894683215e-05, "loss": 2.2752, "step": 4520500 }, { "epoch": 13.09, "learning_rate": 4.345917469433123e-05, "loss": 2.2974, "step": 4521000 }, { "epoch": 13.09, "learning_rate": 4.345845104668396e-05, "loss": 2.2845, "step": 4521500 }, { "epoch": 13.09, "learning_rate": 4.345772739903668e-05, "loss": 2.2659, "step": 4522000 }, { "epoch": 13.09, "learning_rate": 4.345700375138941e-05, "loss": 2.2657, "step": 4522500 }, { "epoch": 13.09, "learning_rate": 4.3456280103742133e-05, "loss": 2.2786, "step": 4523000 }, { "epoch": 13.09, "learning_rate": 4.3455556456094856e-05, "loss": 2.2818, "step": 4523500 }, { "epoch": 13.1, "learning_rate": 4.345483280844758e-05, "loss": 2.2562, "step": 4524000 }, { "epoch": 13.1, "learning_rate": 4.34541091608003e-05, "loss": 2.2787, "step": 4524500 }, { "epoch": 13.1, "learning_rate": 4.345338551315302e-05, "loss": 2.2755, "step": 4525000 }, { "epoch": 13.1, "learning_rate": 4.3452661865505745e-05, "loss": 2.28, "step": 4525500 }, { "epoch": 13.1, "learning_rate": 4.345193966515376e-05, "loss": 2.2793, "step": 4526000 }, { "epoch": 13.1, "learning_rate": 4.345121601750648e-05, "loss": 2.3029, "step": 4526500 }, { "epoch": 13.1, "learning_rate": 4.345049236985921e-05, "loss": 2.2791, "step": 4527000 }, { "epoch": 13.11, "learning_rate": 4.3449768722211934e-05, "loss": 2.2947, "step": 4527500 }, { "epoch": 13.11, "learning_rate": 4.344904652185995e-05, "loss": 2.2985, "step": 4528000 }, { "epoch": 13.11, "learning_rate": 4.344832287421267e-05, "loss": 2.2667, "step": 4528500 }, { "epoch": 13.11, "learning_rate": 4.3447599226565394e-05, "loss": 2.2754, "step": 4529000 }, { "epoch": 13.11, "learning_rate": 4.344687557891812e-05, "loss": 2.2619, "step": 4529500 }, { "epoch": 13.11, "learning_rate": 4.3446151931270845e-05, "loss": 2.2596, "step": 4530000 }, { "epoch": 13.11, "learning_rate": 4.344542828362357e-05, "loss": 2.2909, "step": 4530500 }, { "epoch": 13.12, "learning_rate": 4.344470463597629e-05, "loss": 2.2688, "step": 4531000 }, { "epoch": 13.12, "learning_rate": 4.344398098832901e-05, "loss": 2.2723, "step": 4531500 }, { "epoch": 13.12, "learning_rate": 4.3443258787977034e-05, "loss": 2.2539, "step": 4532000 }, { "epoch": 13.12, "learning_rate": 4.3442535140329756e-05, "loss": 2.2503, "step": 4532500 }, { "epoch": 13.12, "learning_rate": 4.344181149268248e-05, "loss": 2.2997, "step": 4533000 }, { "epoch": 13.12, "learning_rate": 4.34410878450352e-05, "loss": 2.2763, "step": 4533500 }, { "epoch": 13.12, "learning_rate": 4.344036419738792e-05, "loss": 2.2678, "step": 4534000 }, { "epoch": 13.13, "learning_rate": 4.3439640549740645e-05, "loss": 2.2672, "step": 4534500 }, { "epoch": 13.13, "learning_rate": 4.343891834938866e-05, "loss": 2.2705, "step": 4535000 }, { "epoch": 13.13, "learning_rate": 4.343819470174138e-05, "loss": 2.2989, "step": 4535500 }, { "epoch": 13.13, "learning_rate": 4.343747105409411e-05, "loss": 2.2836, "step": 4536000 }, { "epoch": 13.13, "learning_rate": 4.3436747406446834e-05, "loss": 2.2731, "step": 4536500 }, { "epoch": 13.13, "learning_rate": 4.343602375879956e-05, "loss": 2.2403, "step": 4537000 }, { "epoch": 13.13, "learning_rate": 4.343530155844758e-05, "loss": 2.2537, "step": 4537500 }, { "epoch": 13.14, "learning_rate": 4.34345779108003e-05, "loss": 2.2727, "step": 4538000 }, { "epoch": 13.14, "learning_rate": 4.343385426315302e-05, "loss": 2.2876, "step": 4538500 }, { "epoch": 13.14, "learning_rate": 4.343313206280104e-05, "loss": 2.2937, "step": 4539000 }, { "epoch": 13.14, "learning_rate": 4.343240841515376e-05, "loss": 2.2638, "step": 4539500 }, { "epoch": 13.14, "learning_rate": 4.343168476750648e-05, "loss": 2.2566, "step": 4540000 }, { "epoch": 13.14, "learning_rate": 4.343096111985921e-05, "loss": 2.2976, "step": 4540500 }, { "epoch": 13.14, "learning_rate": 4.3430237472211934e-05, "loss": 2.2783, "step": 4541000 }, { "epoch": 13.15, "learning_rate": 4.342951382456466e-05, "loss": 2.2938, "step": 4541500 }, { "epoch": 13.15, "learning_rate": 4.342879017691738e-05, "loss": 2.2527, "step": 4542000 }, { "epoch": 13.15, "learning_rate": 4.34280665292701e-05, "loss": 2.2751, "step": 4542500 }, { "epoch": 13.15, "learning_rate": 4.3427342881622823e-05, "loss": 2.2912, "step": 4543000 }, { "epoch": 13.15, "learning_rate": 4.3426619233975546e-05, "loss": 2.2502, "step": 4543500 }, { "epoch": 13.15, "learning_rate": 4.342589558632827e-05, "loss": 2.2896, "step": 4544000 }, { "epoch": 13.15, "learning_rate": 4.3425171938681e-05, "loss": 2.2322, "step": 4544500 }, { "epoch": 13.16, "learning_rate": 4.342444829103372e-05, "loss": 2.2805, "step": 4545000 }, { "epoch": 13.16, "learning_rate": 4.342372464338644e-05, "loss": 2.2878, "step": 4545500 }, { "epoch": 13.16, "learning_rate": 4.342300389032976e-05, "loss": 2.2907, "step": 4546000 }, { "epoch": 13.16, "learning_rate": 4.342228024268248e-05, "loss": 2.25, "step": 4546500 }, { "epoch": 13.16, "learning_rate": 4.34215565950352e-05, "loss": 2.2689, "step": 4547000 }, { "epoch": 13.16, "learning_rate": 4.3420832947387924e-05, "loss": 2.2489, "step": 4547500 }, { "epoch": 13.16, "learning_rate": 4.3420109299740646e-05, "loss": 2.2577, "step": 4548000 }, { "epoch": 13.17, "learning_rate": 4.341938565209337e-05, "loss": 2.2831, "step": 4548500 }, { "epoch": 13.17, "learning_rate": 4.341866200444609e-05, "loss": 2.2789, "step": 4549000 }, { "epoch": 13.17, "learning_rate": 4.341793835679881e-05, "loss": 2.2791, "step": 4549500 }, { "epoch": 13.17, "learning_rate": 4.3417214709151535e-05, "loss": 2.2837, "step": 4550000 }, { "epoch": 13.17, "learning_rate": 4.341649250879956e-05, "loss": 2.2579, "step": 4550500 }, { "epoch": 13.17, "learning_rate": 4.341576886115228e-05, "loss": 2.2812, "step": 4551000 }, { "epoch": 13.17, "learning_rate": 4.3415045213505e-05, "loss": 2.2556, "step": 4551500 }, { "epoch": 13.18, "learning_rate": 4.341432156585773e-05, "loss": 2.2962, "step": 4552000 }, { "epoch": 13.18, "learning_rate": 4.3413599365505746e-05, "loss": 2.2688, "step": 4552500 }, { "epoch": 13.18, "learning_rate": 4.341287571785847e-05, "loss": 2.2834, "step": 4553000 }, { "epoch": 13.18, "learning_rate": 4.341215207021119e-05, "loss": 2.2661, "step": 4553500 }, { "epoch": 13.18, "learning_rate": 4.341142842256391e-05, "loss": 2.2849, "step": 4554000 }, { "epoch": 13.18, "learning_rate": 4.3410704774916635e-05, "loss": 2.2482, "step": 4554500 }, { "epoch": 13.18, "learning_rate": 4.340998257456466e-05, "loss": 2.2939, "step": 4555000 }, { "epoch": 13.19, "learning_rate": 4.340925892691738e-05, "loss": 2.2696, "step": 4555500 }, { "epoch": 13.19, "learning_rate": 4.34085352792701e-05, "loss": 2.2936, "step": 4556000 }, { "epoch": 13.19, "learning_rate": 4.3407811631622824e-05, "loss": 2.2909, "step": 4556500 }, { "epoch": 13.19, "learning_rate": 4.3407087983975546e-05, "loss": 2.2414, "step": 4557000 }, { "epoch": 13.19, "learning_rate": 4.340636433632827e-05, "loss": 2.2643, "step": 4557500 }, { "epoch": 13.19, "learning_rate": 4.340564213597629e-05, "loss": 2.2715, "step": 4558000 }, { "epoch": 13.19, "learning_rate": 4.340491848832901e-05, "loss": 2.2916, "step": 4558500 }, { "epoch": 13.2, "learning_rate": 4.3404194840681735e-05, "loss": 2.2797, "step": 4559000 }, { "epoch": 13.2, "learning_rate": 4.3403471193034464e-05, "loss": 2.2723, "step": 4559500 }, { "epoch": 13.2, "learning_rate": 4.340274754538719e-05, "loss": 2.2605, "step": 4560000 }, { "epoch": 13.2, "learning_rate": 4.34020253450352e-05, "loss": 2.2933, "step": 4560500 }, { "epoch": 13.2, "learning_rate": 4.3401301697387925e-05, "loss": 2.2655, "step": 4561000 }, { "epoch": 13.2, "learning_rate": 4.340057804974065e-05, "loss": 2.2577, "step": 4561500 }, { "epoch": 13.21, "learning_rate": 4.339985440209337e-05, "loss": 2.2941, "step": 4562000 }, { "epoch": 13.21, "learning_rate": 4.339913220174139e-05, "loss": 2.2348, "step": 4562500 }, { "epoch": 13.21, "learning_rate": 4.339841000138941e-05, "loss": 2.306, "step": 4563000 }, { "epoch": 13.21, "learning_rate": 4.339768635374213e-05, "loss": 2.2946, "step": 4563500 }, { "epoch": 13.21, "learning_rate": 4.339696270609485e-05, "loss": 2.2677, "step": 4564000 }, { "epoch": 13.21, "learning_rate": 4.3396239058447574e-05, "loss": 2.274, "step": 4564500 }, { "epoch": 13.21, "learning_rate": 4.3395515410800296e-05, "loss": 2.2817, "step": 4565000 }, { "epoch": 13.22, "learning_rate": 4.339479176315302e-05, "loss": 2.2921, "step": 4565500 }, { "epoch": 13.22, "learning_rate": 4.339406956280104e-05, "loss": 2.2645, "step": 4566000 }, { "epoch": 13.22, "learning_rate": 4.339334591515376e-05, "loss": 2.2427, "step": 4566500 }, { "epoch": 13.22, "learning_rate": 4.339262371480178e-05, "loss": 2.2691, "step": 4567000 }, { "epoch": 13.22, "learning_rate": 4.339190006715451e-05, "loss": 2.2624, "step": 4567500 }, { "epoch": 13.22, "learning_rate": 4.339117641950723e-05, "loss": 2.2782, "step": 4568000 }, { "epoch": 13.22, "learning_rate": 4.339045277185995e-05, "loss": 2.2871, "step": 4568500 }, { "epoch": 13.23, "learning_rate": 4.3389729124212674e-05, "loss": 2.2703, "step": 4569000 }, { "epoch": 13.23, "learning_rate": 4.3389005476565396e-05, "loss": 2.2788, "step": 4569500 }, { "epoch": 13.23, "learning_rate": 4.338828182891812e-05, "loss": 2.2841, "step": 4570000 }, { "epoch": 13.23, "learning_rate": 4.338755818127084e-05, "loss": 2.2704, "step": 4570500 }, { "epoch": 13.23, "learning_rate": 4.338683453362356e-05, "loss": 2.2578, "step": 4571000 }, { "epoch": 13.23, "learning_rate": 4.338611088597629e-05, "loss": 2.2785, "step": 4571500 }, { "epoch": 13.23, "learning_rate": 4.3385387238329014e-05, "loss": 2.2835, "step": 4572000 }, { "epoch": 13.24, "learning_rate": 4.3384663590681736e-05, "loss": 2.287, "step": 4572500 }, { "epoch": 13.24, "learning_rate": 4.338393994303446e-05, "loss": 2.2938, "step": 4573000 }, { "epoch": 13.24, "learning_rate": 4.3383217742682474e-05, "loss": 2.2571, "step": 4573500 }, { "epoch": 13.24, "learning_rate": 4.3382494095035196e-05, "loss": 2.2608, "step": 4574000 }, { "epoch": 13.24, "learning_rate": 4.3381770447387925e-05, "loss": 2.281, "step": 4574500 }, { "epoch": 13.24, "learning_rate": 4.338104679974065e-05, "loss": 2.2456, "step": 4575000 }, { "epoch": 13.24, "learning_rate": 4.338032315209337e-05, "loss": 2.269, "step": 4575500 }, { "epoch": 13.25, "learning_rate": 4.337960095174139e-05, "loss": 2.2778, "step": 4576000 }, { "epoch": 13.25, "learning_rate": 4.3378877304094114e-05, "loss": 2.2712, "step": 4576500 }, { "epoch": 13.25, "learning_rate": 4.3378153656446837e-05, "loss": 2.2903, "step": 4577000 }, { "epoch": 13.25, "learning_rate": 4.337743000879956e-05, "loss": 2.2551, "step": 4577500 }, { "epoch": 13.25, "learning_rate": 4.337670636115228e-05, "loss": 2.2682, "step": 4578000 }, { "epoch": 13.25, "learning_rate": 4.3375984160800297e-05, "loss": 2.2646, "step": 4578500 }, { "epoch": 13.25, "learning_rate": 4.337526051315302e-05, "loss": 2.2796, "step": 4579000 }, { "epoch": 13.26, "learning_rate": 4.337453686550574e-05, "loss": 2.254, "step": 4579500 }, { "epoch": 13.26, "learning_rate": 4.337381321785846e-05, "loss": 2.2834, "step": 4580000 }, { "epoch": 13.26, "learning_rate": 4.3373091017506486e-05, "loss": 2.274, "step": 4580500 }, { "epoch": 13.26, "learning_rate": 4.337236736985921e-05, "loss": 2.2657, "step": 4581000 }, { "epoch": 13.26, "learning_rate": 4.337164372221193e-05, "loss": 2.2861, "step": 4581500 }, { "epoch": 13.26, "learning_rate": 4.337092007456466e-05, "loss": 2.2807, "step": 4582000 }, { "epoch": 13.26, "learning_rate": 4.337019642691738e-05, "loss": 2.2688, "step": 4582500 }, { "epoch": 13.27, "learning_rate": 4.3369472779270104e-05, "loss": 2.2642, "step": 4583000 }, { "epoch": 13.27, "learning_rate": 4.3368749131622826e-05, "loss": 2.2595, "step": 4583500 }, { "epoch": 13.27, "learning_rate": 4.336802693127084e-05, "loss": 2.27, "step": 4584000 }, { "epoch": 13.27, "learning_rate": 4.336730328362357e-05, "loss": 2.2522, "step": 4584500 }, { "epoch": 13.27, "learning_rate": 4.336657963597629e-05, "loss": 2.2806, "step": 4585000 }, { "epoch": 13.27, "learning_rate": 4.3365855988329015e-05, "loss": 2.2826, "step": 4585500 }, { "epoch": 13.27, "learning_rate": 4.336513378797703e-05, "loss": 2.2898, "step": 4586000 }, { "epoch": 13.28, "learning_rate": 4.336441014032975e-05, "loss": 2.2753, "step": 4586500 }, { "epoch": 13.28, "learning_rate": 4.3363686492682475e-05, "loss": 2.2992, "step": 4587000 }, { "epoch": 13.28, "learning_rate": 4.33629628450352e-05, "loss": 2.2638, "step": 4587500 }, { "epoch": 13.28, "learning_rate": 4.336224064468322e-05, "loss": 2.2855, "step": 4588000 }, { "epoch": 13.28, "learning_rate": 4.336151699703594e-05, "loss": 2.2826, "step": 4588500 }, { "epoch": 13.28, "learning_rate": 4.3360793349388664e-05, "loss": 2.2696, "step": 4589000 }, { "epoch": 13.28, "learning_rate": 4.336006970174139e-05, "loss": 2.2835, "step": 4589500 }, { "epoch": 13.29, "learning_rate": 4.3359346054094115e-05, "loss": 2.2742, "step": 4590000 }, { "epoch": 13.29, "learning_rate": 4.335862240644684e-05, "loss": 2.2755, "step": 4590500 }, { "epoch": 13.29, "learning_rate": 4.335789875879956e-05, "loss": 2.2838, "step": 4591000 }, { "epoch": 13.29, "learning_rate": 4.3357176558447575e-05, "loss": 2.2584, "step": 4591500 }, { "epoch": 13.29, "learning_rate": 4.33564529108003e-05, "loss": 2.2966, "step": 4592000 }, { "epoch": 13.29, "learning_rate": 4.335572926315302e-05, "loss": 2.2626, "step": 4592500 }, { "epoch": 13.29, "learning_rate": 4.335500561550574e-05, "loss": 2.2788, "step": 4593000 }, { "epoch": 13.3, "learning_rate": 4.335428196785847e-05, "loss": 2.284, "step": 4593500 }, { "epoch": 13.3, "learning_rate": 4.335355832021119e-05, "loss": 2.2681, "step": 4594000 }, { "epoch": 13.3, "learning_rate": 4.3352834672563915e-05, "loss": 2.259, "step": 4594500 }, { "epoch": 13.3, "learning_rate": 4.335211102491664e-05, "loss": 2.2804, "step": 4595000 }, { "epoch": 13.3, "learning_rate": 4.335138737726936e-05, "loss": 2.2778, "step": 4595500 }, { "epoch": 13.3, "learning_rate": 4.3350665176917375e-05, "loss": 2.2566, "step": 4596000 }, { "epoch": 13.3, "learning_rate": 4.334994297656539e-05, "loss": 2.3003, "step": 4596500 }, { "epoch": 13.31, "learning_rate": 4.334921932891812e-05, "loss": 2.2807, "step": 4597000 }, { "epoch": 13.31, "learning_rate": 4.334849568127084e-05, "loss": 2.2911, "step": 4597500 }, { "epoch": 13.31, "learning_rate": 4.334777203362357e-05, "loss": 2.2634, "step": 4598000 }, { "epoch": 13.31, "learning_rate": 4.3347048385976293e-05, "loss": 2.2701, "step": 4598500 }, { "epoch": 13.31, "learning_rate": 4.3346324738329016e-05, "loss": 2.295, "step": 4599000 }, { "epoch": 13.31, "learning_rate": 4.334560109068174e-05, "loss": 2.3116, "step": 4599500 }, { "epoch": 13.32, "learning_rate": 4.334487744303446e-05, "loss": 2.2575, "step": 4600000 }, { "epoch": 13.32, "learning_rate": 4.334415379538718e-05, "loss": 2.2502, "step": 4600500 }, { "epoch": 13.32, "learning_rate": 4.3343430147739905e-05, "loss": 2.2878, "step": 4601000 }, { "epoch": 13.32, "learning_rate": 4.334270939468322e-05, "loss": 2.2917, "step": 4601500 }, { "epoch": 13.32, "learning_rate": 4.334198574703594e-05, "loss": 2.2726, "step": 4602000 }, { "epoch": 13.32, "learning_rate": 4.3341262099388665e-05, "loss": 2.2933, "step": 4602500 }, { "epoch": 13.32, "learning_rate": 4.334053845174139e-05, "loss": 2.2767, "step": 4603000 }, { "epoch": 13.33, "learning_rate": 4.333981480409411e-05, "loss": 2.2825, "step": 4603500 }, { "epoch": 13.33, "learning_rate": 4.3339092603742125e-05, "loss": 2.269, "step": 4604000 }, { "epoch": 13.33, "learning_rate": 4.3338368956094854e-05, "loss": 2.2835, "step": 4604500 }, { "epoch": 13.33, "learning_rate": 4.3337645308447576e-05, "loss": 2.2575, "step": 4605000 }, { "epoch": 13.33, "learning_rate": 4.33369216608003e-05, "loss": 2.277, "step": 4605500 }, { "epoch": 13.33, "learning_rate": 4.333619801315302e-05, "loss": 2.271, "step": 4606000 }, { "epoch": 13.33, "learning_rate": 4.333547436550574e-05, "loss": 2.273, "step": 4606500 }, { "epoch": 13.34, "learning_rate": 4.333475071785847e-05, "loss": 2.2633, "step": 4607000 }, { "epoch": 13.34, "learning_rate": 4.3334027070211194e-05, "loss": 2.2773, "step": 4607500 }, { "epoch": 13.34, "learning_rate": 4.3333303422563916e-05, "loss": 2.2775, "step": 4608000 }, { "epoch": 13.34, "learning_rate": 4.333257977491664e-05, "loss": 2.2681, "step": 4608500 }, { "epoch": 13.34, "learning_rate": 4.333185612726936e-05, "loss": 2.2637, "step": 4609000 }, { "epoch": 13.34, "learning_rate": 4.3331133926917376e-05, "loss": 2.2711, "step": 4609500 }, { "epoch": 13.34, "learning_rate": 4.33304102792701e-05, "loss": 2.2847, "step": 4610000 }, { "epoch": 13.35, "learning_rate": 4.332968663162282e-05, "loss": 2.2866, "step": 4610500 }, { "epoch": 13.35, "learning_rate": 4.332896298397554e-05, "loss": 2.2776, "step": 4611000 }, { "epoch": 13.35, "learning_rate": 4.3328240783623565e-05, "loss": 2.2634, "step": 4611500 }, { "epoch": 13.35, "learning_rate": 4.3327520030566874e-05, "loss": 2.2674, "step": 4612000 }, { "epoch": 13.35, "learning_rate": 4.33267963829196e-05, "loss": 2.3074, "step": 4612500 }, { "epoch": 13.35, "learning_rate": 4.3326072735272325e-05, "loss": 2.2791, "step": 4613000 }, { "epoch": 13.35, "learning_rate": 4.332534908762505e-05, "loss": 2.2686, "step": 4613500 }, { "epoch": 13.36, "learning_rate": 4.332462543997777e-05, "loss": 2.2572, "step": 4614000 }, { "epoch": 13.36, "learning_rate": 4.33239017923305e-05, "loss": 2.2699, "step": 4614500 }, { "epoch": 13.36, "learning_rate": 4.332317814468322e-05, "loss": 2.2701, "step": 4615000 }, { "epoch": 13.36, "learning_rate": 4.332245449703594e-05, "loss": 2.2639, "step": 4615500 }, { "epoch": 13.36, "learning_rate": 4.3321730849388665e-05, "loss": 2.2664, "step": 4616000 }, { "epoch": 13.36, "learning_rate": 4.332100720174139e-05, "loss": 2.2793, "step": 4616500 }, { "epoch": 13.36, "learning_rate": 4.33202850013894e-05, "loss": 2.3015, "step": 4617000 }, { "epoch": 13.37, "learning_rate": 4.3319561353742126e-05, "loss": 2.2776, "step": 4617500 }, { "epoch": 13.37, "learning_rate": 4.331883770609485e-05, "loss": 2.2877, "step": 4618000 }, { "epoch": 13.37, "learning_rate": 4.331811405844757e-05, "loss": 2.2533, "step": 4618500 }, { "epoch": 13.37, "learning_rate": 4.33173904108003e-05, "loss": 2.2993, "step": 4619000 }, { "epoch": 13.37, "learning_rate": 4.331666676315302e-05, "loss": 2.2775, "step": 4619500 }, { "epoch": 13.37, "learning_rate": 4.331594311550575e-05, "loss": 2.2823, "step": 4620000 }, { "epoch": 13.37, "learning_rate": 4.331521946785847e-05, "loss": 2.2511, "step": 4620500 }, { "epoch": 13.38, "learning_rate": 4.331449726750649e-05, "loss": 2.2891, "step": 4621000 }, { "epoch": 13.38, "learning_rate": 4.331377361985921e-05, "loss": 2.2892, "step": 4621500 }, { "epoch": 13.38, "learning_rate": 4.331304997221193e-05, "loss": 2.2824, "step": 4622000 }, { "epoch": 13.38, "learning_rate": 4.3312326324564655e-05, "loss": 2.2809, "step": 4622500 }, { "epoch": 13.38, "learning_rate": 4.331160267691738e-05, "loss": 2.2678, "step": 4623000 }, { "epoch": 13.38, "learning_rate": 4.33108790292701e-05, "loss": 2.284, "step": 4623500 }, { "epoch": 13.38, "learning_rate": 4.331015538162282e-05, "loss": 2.2843, "step": 4624000 }, { "epoch": 13.39, "learning_rate": 4.330943173397555e-05, "loss": 2.2681, "step": 4624500 }, { "epoch": 13.39, "learning_rate": 4.330870808632827e-05, "loss": 2.2842, "step": 4625000 }, { "epoch": 13.39, "learning_rate": 4.330798588597629e-05, "loss": 2.2702, "step": 4625500 }, { "epoch": 13.39, "learning_rate": 4.330726223832901e-05, "loss": 2.2745, "step": 4626000 }, { "epoch": 13.39, "learning_rate": 4.330653859068173e-05, "loss": 2.2643, "step": 4626500 }, { "epoch": 13.39, "learning_rate": 4.330581494303446e-05, "loss": 2.2602, "step": 4627000 }, { "epoch": 13.39, "learning_rate": 4.3305091295387184e-05, "loss": 2.244, "step": 4627500 }, { "epoch": 13.4, "learning_rate": 4.33043705423305e-05, "loss": 2.2813, "step": 4628000 }, { "epoch": 13.4, "learning_rate": 4.330364689468322e-05, "loss": 2.2818, "step": 4628500 }, { "epoch": 13.4, "learning_rate": 4.3302923247035944e-05, "loss": 2.2523, "step": 4629000 }, { "epoch": 13.4, "learning_rate": 4.3302199599388666e-05, "loss": 2.2802, "step": 4629500 }, { "epoch": 13.4, "learning_rate": 4.330147595174139e-05, "loss": 2.2787, "step": 4630000 }, { "epoch": 13.4, "learning_rate": 4.330075230409411e-05, "loss": 2.2859, "step": 4630500 }, { "epoch": 13.4, "learning_rate": 4.330002865644683e-05, "loss": 2.2864, "step": 4631000 }, { "epoch": 13.41, "learning_rate": 4.3299305008799555e-05, "loss": 2.2689, "step": 4631500 }, { "epoch": 13.41, "learning_rate": 4.329858280844758e-05, "loss": 2.2799, "step": 4632000 }, { "epoch": 13.41, "learning_rate": 4.32978591608003e-05, "loss": 2.2995, "step": 4632500 }, { "epoch": 13.41, "learning_rate": 4.329713551315302e-05, "loss": 2.2673, "step": 4633000 }, { "epoch": 13.41, "learning_rate": 4.3296411865505744e-05, "loss": 2.2572, "step": 4633500 }, { "epoch": 13.41, "learning_rate": 4.329568966515376e-05, "loss": 2.2615, "step": 4634000 }, { "epoch": 13.41, "learning_rate": 4.329496601750649e-05, "loss": 2.2884, "step": 4634500 }, { "epoch": 13.42, "learning_rate": 4.329424236985921e-05, "loss": 2.291, "step": 4635000 }, { "epoch": 13.42, "learning_rate": 4.329351872221193e-05, "loss": 2.2786, "step": 4635500 }, { "epoch": 13.42, "learning_rate": 4.3292795074564656e-05, "loss": 2.2754, "step": 4636000 }, { "epoch": 13.42, "learning_rate": 4.329207287421268e-05, "loss": 2.2802, "step": 4636500 }, { "epoch": 13.42, "learning_rate": 4.32913492265654e-05, "loss": 2.2752, "step": 4637000 }, { "epoch": 13.42, "learning_rate": 4.329062557891812e-05, "loss": 2.2747, "step": 4637500 }, { "epoch": 13.43, "learning_rate": 4.328990337856614e-05, "loss": 2.2767, "step": 4638000 }, { "epoch": 13.43, "learning_rate": 4.328917973091886e-05, "loss": 2.2606, "step": 4638500 }, { "epoch": 13.43, "learning_rate": 4.3288457530566876e-05, "loss": 2.2664, "step": 4639000 }, { "epoch": 13.43, "learning_rate": 4.32877338829196e-05, "loss": 2.2764, "step": 4639500 }, { "epoch": 13.43, "learning_rate": 4.328701023527233e-05, "loss": 2.3038, "step": 4640000 }, { "epoch": 13.43, "learning_rate": 4.328628658762505e-05, "loss": 2.2665, "step": 4640500 }, { "epoch": 13.43, "learning_rate": 4.328556293997777e-05, "loss": 2.2888, "step": 4641000 }, { "epoch": 13.44, "learning_rate": 4.3284839292330494e-05, "loss": 2.2792, "step": 4641500 }, { "epoch": 13.44, "learning_rate": 4.328411564468322e-05, "loss": 2.292, "step": 4642000 }, { "epoch": 13.44, "learning_rate": 4.3283391997035945e-05, "loss": 2.2936, "step": 4642500 }, { "epoch": 13.44, "learning_rate": 4.328266834938867e-05, "loss": 2.2587, "step": 4643000 }, { "epoch": 13.44, "learning_rate": 4.328194470174139e-05, "loss": 2.2742, "step": 4643500 }, { "epoch": 13.44, "learning_rate": 4.3281222501389405e-05, "loss": 2.2928, "step": 4644000 }, { "epoch": 13.44, "learning_rate": 4.328050030103743e-05, "loss": 2.2801, "step": 4644500 }, { "epoch": 13.45, "learning_rate": 4.327977665339015e-05, "loss": 2.2708, "step": 4645000 }, { "epoch": 13.45, "learning_rate": 4.327905300574287e-05, "loss": 2.262, "step": 4645500 }, { "epoch": 13.45, "learning_rate": 4.3278329358095594e-05, "loss": 2.2808, "step": 4646000 }, { "epoch": 13.45, "learning_rate": 4.3277605710448316e-05, "loss": 2.2567, "step": 4646500 }, { "epoch": 13.45, "learning_rate": 4.327688206280104e-05, "loss": 2.2677, "step": 4647000 }, { "epoch": 13.45, "learning_rate": 4.327615841515376e-05, "loss": 2.2793, "step": 4647500 }, { "epoch": 13.45, "learning_rate": 4.327543476750648e-05, "loss": 2.2734, "step": 4648000 }, { "epoch": 13.46, "learning_rate": 4.3274711119859205e-05, "loss": 2.2574, "step": 4648500 }, { "epoch": 13.46, "learning_rate": 4.327398747221193e-05, "loss": 2.2687, "step": 4649000 }, { "epoch": 13.46, "learning_rate": 4.3273263824564656e-05, "loss": 2.2937, "step": 4649500 }, { "epoch": 13.46, "learning_rate": 4.327254017691738e-05, "loss": 2.2753, "step": 4650000 }, { "epoch": 13.46, "learning_rate": 4.32718165292701e-05, "loss": 2.2887, "step": 4650500 }, { "epoch": 13.46, "learning_rate": 4.3271095776213416e-05, "loss": 2.2666, "step": 4651000 }, { "epoch": 13.46, "learning_rate": 4.327037212856614e-05, "loss": 2.2708, "step": 4651500 }, { "epoch": 13.47, "learning_rate": 4.326964848091886e-05, "loss": 2.2556, "step": 4652000 }, { "epoch": 13.47, "learning_rate": 4.326892483327158e-05, "loss": 2.2683, "step": 4652500 }, { "epoch": 13.47, "learning_rate": 4.3268202632919605e-05, "loss": 2.2678, "step": 4653000 }, { "epoch": 13.47, "learning_rate": 4.326747898527233e-05, "loss": 2.2883, "step": 4653500 }, { "epoch": 13.47, "learning_rate": 4.326675533762505e-05, "loss": 2.2853, "step": 4654000 }, { "epoch": 13.47, "learning_rate": 4.326603168997777e-05, "loss": 2.2697, "step": 4654500 }, { "epoch": 13.47, "learning_rate": 4.3265308042330494e-05, "loss": 2.2753, "step": 4655000 }, { "epoch": 13.48, "learning_rate": 4.326458584197851e-05, "loss": 2.306, "step": 4655500 }, { "epoch": 13.48, "learning_rate": 4.326386219433123e-05, "loss": 2.2812, "step": 4656000 }, { "epoch": 13.48, "learning_rate": 4.3263138546683954e-05, "loss": 2.2582, "step": 4656500 }, { "epoch": 13.48, "learning_rate": 4.3262414899036683e-05, "loss": 2.2708, "step": 4657000 }, { "epoch": 13.48, "learning_rate": 4.3261691251389406e-05, "loss": 2.2593, "step": 4657500 }, { "epoch": 13.48, "learning_rate": 4.326096760374213e-05, "loss": 2.2621, "step": 4658000 }, { "epoch": 13.48, "learning_rate": 4.326024395609486e-05, "loss": 2.2803, "step": 4658500 }, { "epoch": 13.49, "learning_rate": 4.325952030844758e-05, "loss": 2.2712, "step": 4659000 }, { "epoch": 13.49, "learning_rate": 4.32587966608003e-05, "loss": 2.254, "step": 4659500 }, { "epoch": 13.49, "learning_rate": 4.325807446044832e-05, "loss": 2.2886, "step": 4660000 }, { "epoch": 13.49, "learning_rate": 4.325735081280104e-05, "loss": 2.2969, "step": 4660500 }, { "epoch": 13.49, "learning_rate": 4.3256628612449055e-05, "loss": 2.2671, "step": 4661000 }, { "epoch": 13.49, "learning_rate": 4.325590785939237e-05, "loss": 2.2843, "step": 4661500 }, { "epoch": 13.49, "learning_rate": 4.325518421174509e-05, "loss": 2.2603, "step": 4662000 }, { "epoch": 13.5, "learning_rate": 4.3254460564097815e-05, "loss": 2.2749, "step": 4662500 }, { "epoch": 13.5, "learning_rate": 4.325373691645054e-05, "loss": 2.2913, "step": 4663000 }, { "epoch": 13.5, "learning_rate": 4.325301326880326e-05, "loss": 2.305, "step": 4663500 }, { "epoch": 13.5, "learning_rate": 4.325228962115598e-05, "loss": 2.2885, "step": 4664000 }, { "epoch": 13.5, "learning_rate": 4.3251565973508704e-05, "loss": 2.2705, "step": 4664500 }, { "epoch": 13.5, "learning_rate": 4.325084232586143e-05, "loss": 2.2807, "step": 4665000 }, { "epoch": 13.5, "learning_rate": 4.3250118678214155e-05, "loss": 2.2739, "step": 4665500 }, { "epoch": 13.51, "learning_rate": 4.324939503056688e-05, "loss": 2.2864, "step": 4666000 }, { "epoch": 13.51, "learning_rate": 4.3248671382919606e-05, "loss": 2.2802, "step": 4666500 }, { "epoch": 13.51, "learning_rate": 4.324794773527233e-05, "loss": 2.3008, "step": 4667000 }, { "epoch": 13.51, "learning_rate": 4.324722408762505e-05, "loss": 2.2836, "step": 4667500 }, { "epoch": 13.51, "learning_rate": 4.324650043997777e-05, "loss": 2.2615, "step": 4668000 }, { "epoch": 13.51, "learning_rate": 4.3245776792330495e-05, "loss": 2.2556, "step": 4668500 }, { "epoch": 13.51, "learning_rate": 4.324505314468322e-05, "loss": 2.2717, "step": 4669000 }, { "epoch": 13.52, "learning_rate": 4.324432949703594e-05, "loss": 2.2972, "step": 4669500 }, { "epoch": 13.52, "learning_rate": 4.324360584938866e-05, "loss": 2.2556, "step": 4670000 }, { "epoch": 13.52, "learning_rate": 4.324288364903668e-05, "loss": 2.2743, "step": 4670500 }, { "epoch": 13.52, "learning_rate": 4.3242160001389406e-05, "loss": 2.2751, "step": 4671000 }, { "epoch": 13.52, "learning_rate": 4.324143635374213e-05, "loss": 2.2558, "step": 4671500 }, { "epoch": 13.52, "learning_rate": 4.324071270609486e-05, "loss": 2.3004, "step": 4672000 }, { "epoch": 13.52, "learning_rate": 4.323999050574287e-05, "loss": 2.2806, "step": 4672500 }, { "epoch": 13.53, "learning_rate": 4.3239266858095595e-05, "loss": 2.2659, "step": 4673000 }, { "epoch": 13.53, "learning_rate": 4.323854321044832e-05, "loss": 2.2913, "step": 4673500 }, { "epoch": 13.53, "learning_rate": 4.323781956280104e-05, "loss": 2.2594, "step": 4674000 }, { "epoch": 13.53, "learning_rate": 4.3237097362449056e-05, "loss": 2.2856, "step": 4674500 }, { "epoch": 13.53, "learning_rate": 4.323637371480178e-05, "loss": 2.296, "step": 4675000 }, { "epoch": 13.53, "learning_rate": 4.323565006715451e-05, "loss": 2.2752, "step": 4675500 }, { "epoch": 13.54, "learning_rate": 4.323492641950723e-05, "loss": 2.2737, "step": 4676000 }, { "epoch": 13.54, "learning_rate": 4.323420277185995e-05, "loss": 2.2684, "step": 4676500 }, { "epoch": 13.54, "learning_rate": 4.323348057150797e-05, "loss": 2.2952, "step": 4677000 }, { "epoch": 13.54, "learning_rate": 4.323275692386069e-05, "loss": 2.2957, "step": 4677500 }, { "epoch": 13.54, "learning_rate": 4.323203327621341e-05, "loss": 2.3005, "step": 4678000 }, { "epoch": 13.54, "learning_rate": 4.3231309628566133e-05, "loss": 2.2851, "step": 4678500 }, { "epoch": 13.54, "learning_rate": 4.3230585980918856e-05, "loss": 2.2959, "step": 4679000 }, { "epoch": 13.55, "learning_rate": 4.322986378056688e-05, "loss": 2.2649, "step": 4679500 }, { "epoch": 13.55, "learning_rate": 4.322914013291961e-05, "loss": 2.2679, "step": 4680000 }, { "epoch": 13.55, "learning_rate": 4.322841793256762e-05, "loss": 2.2667, "step": 4680500 }, { "epoch": 13.55, "learning_rate": 4.3227694284920345e-05, "loss": 2.2653, "step": 4681000 }, { "epoch": 13.55, "learning_rate": 4.322697063727307e-05, "loss": 2.2831, "step": 4681500 }, { "epoch": 13.55, "learning_rate": 4.322624698962579e-05, "loss": 2.2727, "step": 4682000 }, { "epoch": 13.55, "learning_rate": 4.322552334197851e-05, "loss": 2.2813, "step": 4682500 }, { "epoch": 13.56, "learning_rate": 4.3224799694331234e-05, "loss": 2.2732, "step": 4683000 }, { "epoch": 13.56, "learning_rate": 4.3224076046683956e-05, "loss": 2.2745, "step": 4683500 }, { "epoch": 13.56, "learning_rate": 4.3223352399036685e-05, "loss": 2.2828, "step": 4684000 }, { "epoch": 13.56, "learning_rate": 4.322262875138941e-05, "loss": 2.2879, "step": 4684500 }, { "epoch": 13.56, "learning_rate": 4.322190510374213e-05, "loss": 2.2777, "step": 4685000 }, { "epoch": 13.56, "learning_rate": 4.322118145609485e-05, "loss": 2.2788, "step": 4685500 }, { "epoch": 13.56, "learning_rate": 4.3220457808447574e-05, "loss": 2.2542, "step": 4686000 }, { "epoch": 13.57, "learning_rate": 4.321973560809559e-05, "loss": 2.2742, "step": 4686500 }, { "epoch": 13.57, "learning_rate": 4.3219013407743605e-05, "loss": 2.2825, "step": 4687000 }, { "epoch": 13.57, "learning_rate": 4.3218289760096334e-05, "loss": 2.2854, "step": 4687500 }, { "epoch": 13.57, "learning_rate": 4.3217566112449056e-05, "loss": 2.2592, "step": 4688000 }, { "epoch": 13.57, "learning_rate": 4.321684535939237e-05, "loss": 2.2693, "step": 4688500 }, { "epoch": 13.57, "learning_rate": 4.3216121711745094e-05, "loss": 2.2548, "step": 4689000 }, { "epoch": 13.57, "learning_rate": 4.3215398064097816e-05, "loss": 2.2984, "step": 4689500 }, { "epoch": 13.58, "learning_rate": 4.321467441645054e-05, "loss": 2.2737, "step": 4690000 }, { "epoch": 13.58, "learning_rate": 4.321395076880326e-05, "loss": 2.2777, "step": 4690500 }, { "epoch": 13.58, "learning_rate": 4.321322712115598e-05, "loss": 2.2836, "step": 4691000 }, { "epoch": 13.58, "learning_rate": 4.3212503473508705e-05, "loss": 2.267, "step": 4691500 }, { "epoch": 13.58, "learning_rate": 4.3211779825861434e-05, "loss": 2.2669, "step": 4692000 }, { "epoch": 13.58, "learning_rate": 4.321105617821416e-05, "loss": 2.2677, "step": 4692500 }, { "epoch": 13.58, "learning_rate": 4.321033253056688e-05, "loss": 2.2796, "step": 4693000 }, { "epoch": 13.59, "learning_rate": 4.32096088829196e-05, "loss": 2.2598, "step": 4693500 }, { "epoch": 13.59, "learning_rate": 4.320888523527232e-05, "loss": 2.2677, "step": 4694000 }, { "epoch": 13.59, "learning_rate": 4.3208161587625046e-05, "loss": 2.2859, "step": 4694500 }, { "epoch": 13.59, "learning_rate": 4.320743938727307e-05, "loss": 2.3002, "step": 4695000 }, { "epoch": 13.59, "learning_rate": 4.320671573962579e-05, "loss": 2.2878, "step": 4695500 }, { "epoch": 13.59, "learning_rate": 4.320599209197851e-05, "loss": 2.2695, "step": 4696000 }, { "epoch": 13.59, "learning_rate": 4.3205268444331235e-05, "loss": 2.2798, "step": 4696500 }, { "epoch": 13.6, "learning_rate": 4.320454479668396e-05, "loss": 2.2777, "step": 4697000 }, { "epoch": 13.6, "learning_rate": 4.320382259633198e-05, "loss": 2.2578, "step": 4697500 }, { "epoch": 13.6, "learning_rate": 4.32030989486847e-05, "loss": 2.2841, "step": 4698000 }, { "epoch": 13.6, "learning_rate": 4.3202375301037424e-05, "loss": 2.2715, "step": 4698500 }, { "epoch": 13.6, "learning_rate": 4.320165454798073e-05, "loss": 2.2848, "step": 4699000 }, { "epoch": 13.6, "learning_rate": 4.320093090033346e-05, "loss": 2.2769, "step": 4699500 }, { "epoch": 13.6, "learning_rate": 4.3200207252686184e-05, "loss": 2.3036, "step": 4700000 }, { "epoch": 13.61, "learning_rate": 4.3199483605038906e-05, "loss": 2.267, "step": 4700500 }, { "epoch": 13.61, "learning_rate": 4.319875995739163e-05, "loss": 2.2784, "step": 4701000 }, { "epoch": 13.61, "learning_rate": 4.319803630974435e-05, "loss": 2.2651, "step": 4701500 }, { "epoch": 13.61, "learning_rate": 4.319731266209707e-05, "loss": 2.2828, "step": 4702000 }, { "epoch": 13.61, "learning_rate": 4.31965890144498e-05, "loss": 2.2905, "step": 4702500 }, { "epoch": 13.61, "learning_rate": 4.3195865366802524e-05, "loss": 2.2735, "step": 4703000 }, { "epoch": 13.61, "learning_rate": 4.3195141719155246e-05, "loss": 2.2728, "step": 4703500 }, { "epoch": 13.62, "learning_rate": 4.319441807150797e-05, "loss": 2.312, "step": 4704000 }, { "epoch": 13.62, "learning_rate": 4.319369442386069e-05, "loss": 2.2738, "step": 4704500 }, { "epoch": 13.62, "learning_rate": 4.319297077621341e-05, "loss": 2.2748, "step": 4705000 }, { "epoch": 13.62, "learning_rate": 4.3192247128566135e-05, "loss": 2.2808, "step": 4705500 }, { "epoch": 13.62, "learning_rate": 4.319152348091886e-05, "loss": 2.2737, "step": 4706000 }, { "epoch": 13.62, "learning_rate": 4.3190799833271586e-05, "loss": 2.2638, "step": 4706500 }, { "epoch": 13.62, "learning_rate": 4.319007618562431e-05, "loss": 2.2562, "step": 4707000 }, { "epoch": 13.63, "learning_rate": 4.318935253797703e-05, "loss": 2.2784, "step": 4707500 }, { "epoch": 13.63, "learning_rate": 4.3188630337625046e-05, "loss": 2.251, "step": 4708000 }, { "epoch": 13.63, "learning_rate": 4.318790668997777e-05, "loss": 2.2604, "step": 4708500 }, { "epoch": 13.63, "learning_rate": 4.3187184489625784e-05, "loss": 2.27, "step": 4709000 }, { "epoch": 13.63, "learning_rate": 4.318646084197851e-05, "loss": 2.2695, "step": 4709500 }, { "epoch": 13.63, "learning_rate": 4.3185737194331235e-05, "loss": 2.2973, "step": 4710000 }, { "epoch": 13.63, "learning_rate": 4.3185013546683964e-05, "loss": 2.2703, "step": 4710500 }, { "epoch": 13.64, "learning_rate": 4.318428989903669e-05, "loss": 2.2578, "step": 4711000 }, { "epoch": 13.64, "learning_rate": 4.318356625138941e-05, "loss": 2.2741, "step": 4711500 }, { "epoch": 13.64, "learning_rate": 4.3182844051037424e-05, "loss": 2.2635, "step": 4712000 }, { "epoch": 13.64, "learning_rate": 4.318212040339015e-05, "loss": 2.2814, "step": 4712500 }, { "epoch": 13.64, "learning_rate": 4.318139820303816e-05, "loss": 2.2967, "step": 4713000 }, { "epoch": 13.64, "learning_rate": 4.3180674555390884e-05, "loss": 2.2729, "step": 4713500 }, { "epoch": 13.65, "learning_rate": 4.3179950907743613e-05, "loss": 2.271, "step": 4714000 }, { "epoch": 13.65, "learning_rate": 4.3179227260096336e-05, "loss": 2.264, "step": 4714500 }, { "epoch": 13.65, "learning_rate": 4.317850361244906e-05, "loss": 2.2874, "step": 4715000 }, { "epoch": 13.65, "learning_rate": 4.317777996480178e-05, "loss": 2.2577, "step": 4715500 }, { "epoch": 13.65, "learning_rate": 4.31770563171545e-05, "loss": 2.2779, "step": 4716000 }, { "epoch": 13.65, "learning_rate": 4.3176332669507225e-05, "loss": 2.258, "step": 4716500 }, { "epoch": 13.65, "learning_rate": 4.3175609021859954e-05, "loss": 2.2954, "step": 4717000 }, { "epoch": 13.66, "learning_rate": 4.3174885374212676e-05, "loss": 2.266, "step": 4717500 }, { "epoch": 13.66, "learning_rate": 4.31741617265654e-05, "loss": 2.2925, "step": 4718000 }, { "epoch": 13.66, "learning_rate": 4.317343807891812e-05, "loss": 2.2776, "step": 4718500 }, { "epoch": 13.66, "learning_rate": 4.3172715878566136e-05, "loss": 2.2517, "step": 4719000 }, { "epoch": 13.66, "learning_rate": 4.3171992230918865e-05, "loss": 2.2825, "step": 4719500 }, { "epoch": 13.66, "learning_rate": 4.317126858327159e-05, "loss": 2.2834, "step": 4720000 }, { "epoch": 13.66, "learning_rate": 4.31705463829196e-05, "loss": 2.2792, "step": 4720500 }, { "epoch": 13.67, "learning_rate": 4.3169822735272325e-05, "loss": 2.254, "step": 4721000 }, { "epoch": 13.67, "learning_rate": 4.316909908762505e-05, "loss": 2.2799, "step": 4721500 }, { "epoch": 13.67, "learning_rate": 4.316837543997777e-05, "loss": 2.2808, "step": 4722000 }, { "epoch": 13.67, "learning_rate": 4.316765179233049e-05, "loss": 2.2533, "step": 4722500 }, { "epoch": 13.67, "learning_rate": 4.3166928144683214e-05, "loss": 2.2775, "step": 4723000 }, { "epoch": 13.67, "learning_rate": 4.3166204497035936e-05, "loss": 2.2521, "step": 4723500 }, { "epoch": 13.67, "learning_rate": 4.3165480849388665e-05, "loss": 2.2698, "step": 4724000 }, { "epoch": 13.68, "learning_rate": 4.316475720174139e-05, "loss": 2.2551, "step": 4724500 }, { "epoch": 13.68, "learning_rate": 4.3164033554094116e-05, "loss": 2.2805, "step": 4725000 }, { "epoch": 13.68, "learning_rate": 4.316331135374213e-05, "loss": 2.2842, "step": 4725500 }, { "epoch": 13.68, "learning_rate": 4.3162587706094854e-05, "loss": 2.2861, "step": 4726000 }, { "epoch": 13.68, "learning_rate": 4.3161864058447576e-05, "loss": 2.2651, "step": 4726500 }, { "epoch": 13.68, "learning_rate": 4.31611404108003e-05, "loss": 2.3007, "step": 4727000 }, { "epoch": 13.68, "learning_rate": 4.3160418210448314e-05, "loss": 2.2813, "step": 4727500 }, { "epoch": 13.69, "learning_rate": 4.3159694562801036e-05, "loss": 2.2896, "step": 4728000 }, { "epoch": 13.69, "learning_rate": 4.3158970915153765e-05, "loss": 2.2784, "step": 4728500 }, { "epoch": 13.69, "learning_rate": 4.315824726750649e-05, "loss": 2.2884, "step": 4729000 }, { "epoch": 13.69, "learning_rate": 4.315752361985921e-05, "loss": 2.2673, "step": 4729500 }, { "epoch": 13.69, "learning_rate": 4.315679997221193e-05, "loss": 2.2774, "step": 4730000 }, { "epoch": 13.69, "learning_rate": 4.3156076324564654e-05, "loss": 2.2924, "step": 4730500 }, { "epoch": 13.69, "learning_rate": 4.315535412421267e-05, "loss": 2.2719, "step": 4731000 }, { "epoch": 13.7, "learning_rate": 4.315463047656539e-05, "loss": 2.2875, "step": 4731500 }, { "epoch": 13.7, "learning_rate": 4.315390682891812e-05, "loss": 2.2835, "step": 4732000 }, { "epoch": 13.7, "learning_rate": 4.315318318127084e-05, "loss": 2.2948, "step": 4732500 }, { "epoch": 13.7, "learning_rate": 4.3152460980918866e-05, "loss": 2.2831, "step": 4733000 }, { "epoch": 13.7, "learning_rate": 4.315173733327159e-05, "loss": 2.2815, "step": 4733500 }, { "epoch": 13.7, "learning_rate": 4.315101368562431e-05, "loss": 2.3022, "step": 4734000 }, { "epoch": 13.7, "learning_rate": 4.315029003797703e-05, "loss": 2.2708, "step": 4734500 }, { "epoch": 13.71, "learning_rate": 4.3149566390329755e-05, "loss": 2.282, "step": 4735000 }, { "epoch": 13.71, "learning_rate": 4.314884418997777e-05, "loss": 2.2828, "step": 4735500 }, { "epoch": 13.71, "learning_rate": 4.314812198962579e-05, "loss": 2.2654, "step": 4736000 }, { "epoch": 13.71, "learning_rate": 4.3147398341978515e-05, "loss": 2.2996, "step": 4736500 }, { "epoch": 13.71, "learning_rate": 4.314667614162653e-05, "loss": 2.2943, "step": 4737000 }, { "epoch": 13.71, "learning_rate": 4.3145953941274546e-05, "loss": 2.282, "step": 4737500 }, { "epoch": 13.71, "learning_rate": 4.314523029362727e-05, "loss": 2.3333, "step": 4738000 }, { "epoch": 13.72, "learning_rate": 4.314450664597999e-05, "loss": 2.2764, "step": 4738500 }, { "epoch": 13.72, "learning_rate": 4.314378299833271e-05, "loss": 2.2702, "step": 4739000 }, { "epoch": 13.72, "learning_rate": 4.314305935068544e-05, "loss": 2.2642, "step": 4739500 }, { "epoch": 13.72, "learning_rate": 4.3142335703038164e-05, "loss": 2.2801, "step": 4740000 }, { "epoch": 13.72, "learning_rate": 4.314161205539089e-05, "loss": 2.2725, "step": 4740500 }, { "epoch": 13.72, "learning_rate": 4.3140888407743615e-05, "loss": 2.289, "step": 4741000 }, { "epoch": 13.72, "learning_rate": 4.314016620739163e-05, "loss": 2.2723, "step": 4741500 }, { "epoch": 13.73, "learning_rate": 4.313944255974435e-05, "loss": 2.2683, "step": 4742000 }, { "epoch": 13.73, "learning_rate": 4.3138718912097075e-05, "loss": 2.2767, "step": 4742500 }, { "epoch": 13.73, "learning_rate": 4.31379952644498e-05, "loss": 2.2597, "step": 4743000 }, { "epoch": 13.73, "learning_rate": 4.313727161680252e-05, "loss": 2.2816, "step": 4743500 }, { "epoch": 13.73, "learning_rate": 4.313654796915524e-05, "loss": 2.2807, "step": 4744000 }, { "epoch": 13.73, "learning_rate": 4.3135824321507964e-05, "loss": 2.2817, "step": 4744500 }, { "epoch": 13.73, "learning_rate": 4.313510067386069e-05, "loss": 2.2639, "step": 4745000 }, { "epoch": 13.74, "learning_rate": 4.3134377026213415e-05, "loss": 2.2685, "step": 4745500 }, { "epoch": 13.74, "learning_rate": 4.313365337856614e-05, "loss": 2.2768, "step": 4746000 }, { "epoch": 13.74, "learning_rate": 4.313292973091886e-05, "loss": 2.2818, "step": 4746500 }, { "epoch": 13.74, "learning_rate": 4.313220608327159e-05, "loss": 2.2817, "step": 4747000 }, { "epoch": 13.74, "learning_rate": 4.313148243562431e-05, "loss": 2.2635, "step": 4747500 }, { "epoch": 13.74, "learning_rate": 4.313075878797703e-05, "loss": 2.293, "step": 4748000 }, { "epoch": 13.74, "learning_rate": 4.3130035140329755e-05, "loss": 2.2811, "step": 4748500 }, { "epoch": 13.75, "learning_rate": 4.312931149268248e-05, "loss": 2.278, "step": 4749000 }, { "epoch": 13.75, "learning_rate": 4.312858929233049e-05, "loss": 2.2819, "step": 4749500 }, { "epoch": 13.75, "learning_rate": 4.3127865644683215e-05, "loss": 2.2828, "step": 4750000 }, { "epoch": 13.75, "learning_rate": 4.312714344433124e-05, "loss": 2.274, "step": 4750500 }, { "epoch": 13.75, "learning_rate": 4.312641979668396e-05, "loss": 2.294, "step": 4751000 }, { "epoch": 13.75, "learning_rate": 4.312569614903668e-05, "loss": 2.273, "step": 4751500 }, { "epoch": 13.76, "learning_rate": 4.31249739486847e-05, "loss": 2.2733, "step": 4752000 }, { "epoch": 13.76, "learning_rate": 4.312425030103742e-05, "loss": 2.2773, "step": 4752500 }, { "epoch": 13.76, "learning_rate": 4.312352665339014e-05, "loss": 2.2769, "step": 4753000 }, { "epoch": 13.76, "learning_rate": 4.3122803005742864e-05, "loss": 2.2711, "step": 4753500 }, { "epoch": 13.76, "learning_rate": 4.312208080539089e-05, "loss": 2.2863, "step": 4754000 }, { "epoch": 13.76, "learning_rate": 4.312135715774361e-05, "loss": 2.2749, "step": 4754500 }, { "epoch": 13.76, "learning_rate": 4.312063351009634e-05, "loss": 2.2697, "step": 4755000 }, { "epoch": 13.77, "learning_rate": 4.311990986244906e-05, "loss": 2.2753, "step": 4755500 }, { "epoch": 13.77, "learning_rate": 4.3119187662097076e-05, "loss": 2.3028, "step": 4756000 }, { "epoch": 13.77, "learning_rate": 4.31184640144498e-05, "loss": 2.2789, "step": 4756500 }, { "epoch": 13.77, "learning_rate": 4.311774036680252e-05, "loss": 2.2895, "step": 4757000 }, { "epoch": 13.77, "learning_rate": 4.311701671915524e-05, "loss": 2.2687, "step": 4757500 }, { "epoch": 13.77, "learning_rate": 4.311629307150797e-05, "loss": 2.2722, "step": 4758000 }, { "epoch": 13.77, "learning_rate": 4.3115569423860694e-05, "loss": 2.2705, "step": 4758500 }, { "epoch": 13.78, "learning_rate": 4.3114845776213416e-05, "loss": 2.2789, "step": 4759000 }, { "epoch": 13.78, "learning_rate": 4.311412212856614e-05, "loss": 2.2529, "step": 4759500 }, { "epoch": 13.78, "learning_rate": 4.311339848091886e-05, "loss": 2.269, "step": 4760000 }, { "epoch": 13.78, "learning_rate": 4.311267483327158e-05, "loss": 2.2947, "step": 4760500 }, { "epoch": 13.78, "learning_rate": 4.3111951185624305e-05, "loss": 2.2744, "step": 4761000 }, { "epoch": 13.78, "learning_rate": 4.311122753797703e-05, "loss": 2.2682, "step": 4761500 }, { "epoch": 13.78, "learning_rate": 4.3110503890329756e-05, "loss": 2.2844, "step": 4762000 }, { "epoch": 13.79, "learning_rate": 4.310978024268248e-05, "loss": 2.2718, "step": 4762500 }, { "epoch": 13.79, "learning_rate": 4.31090565950352e-05, "loss": 2.2467, "step": 4763000 }, { "epoch": 13.79, "learning_rate": 4.310833439468322e-05, "loss": 2.2638, "step": 4763500 }, { "epoch": 13.79, "learning_rate": 4.3107610747035945e-05, "loss": 2.2879, "step": 4764000 }, { "epoch": 13.79, "learning_rate": 4.310688709938867e-05, "loss": 2.2993, "step": 4764500 }, { "epoch": 13.79, "learning_rate": 4.310616345174139e-05, "loss": 2.2701, "step": 4765000 }, { "epoch": 13.79, "learning_rate": 4.310543980409411e-05, "loss": 2.2782, "step": 4765500 }, { "epoch": 13.8, "learning_rate": 4.3104716156446834e-05, "loss": 2.2787, "step": 4766000 }, { "epoch": 13.8, "learning_rate": 4.3103992508799556e-05, "loss": 2.2668, "step": 4766500 }, { "epoch": 13.8, "learning_rate": 4.310326886115228e-05, "loss": 2.2668, "step": 4767000 }, { "epoch": 13.8, "learning_rate": 4.3102545213505e-05, "loss": 2.3055, "step": 4767500 }, { "epoch": 13.8, "learning_rate": 4.310182156585772e-05, "loss": 2.2658, "step": 4768000 }, { "epoch": 13.8, "learning_rate": 4.3101097918210445e-05, "loss": 2.2666, "step": 4768500 }, { "epoch": 13.8, "learning_rate": 4.3100374270563174e-05, "loss": 2.2869, "step": 4769000 }, { "epoch": 13.81, "learning_rate": 4.3099650622915897e-05, "loss": 2.2865, "step": 4769500 }, { "epoch": 13.81, "learning_rate": 4.309892697526862e-05, "loss": 2.2773, "step": 4770000 }, { "epoch": 13.81, "learning_rate": 4.309820332762135e-05, "loss": 2.2548, "step": 4770500 }, { "epoch": 13.81, "learning_rate": 4.309748112726936e-05, "loss": 2.281, "step": 4771000 }, { "epoch": 13.81, "learning_rate": 4.3096757479622086e-05, "loss": 2.2673, "step": 4771500 }, { "epoch": 13.81, "learning_rate": 4.30960352792701e-05, "loss": 2.277, "step": 4772000 }, { "epoch": 13.81, "learning_rate": 4.3095311631622823e-05, "loss": 2.2713, "step": 4772500 }, { "epoch": 13.82, "learning_rate": 4.3094587983975546e-05, "loss": 2.2749, "step": 4773000 }, { "epoch": 13.82, "learning_rate": 4.309386433632827e-05, "loss": 2.2809, "step": 4773500 }, { "epoch": 13.82, "learning_rate": 4.3093140688681e-05, "loss": 2.2914, "step": 4774000 }, { "epoch": 13.82, "learning_rate": 4.309241704103372e-05, "loss": 2.2798, "step": 4774500 }, { "epoch": 13.82, "learning_rate": 4.309169339338644e-05, "loss": 2.278, "step": 4775000 }, { "epoch": 13.82, "learning_rate": 4.3090969745739164e-05, "loss": 2.2696, "step": 4775500 }, { "epoch": 13.82, "learning_rate": 4.3090246098091886e-05, "loss": 2.2989, "step": 4776000 }, { "epoch": 13.83, "learning_rate": 4.3089522450444615e-05, "loss": 2.2729, "step": 4776500 }, { "epoch": 13.83, "learning_rate": 4.308880025009263e-05, "loss": 2.2848, "step": 4777000 }, { "epoch": 13.83, "learning_rate": 4.3088078049740646e-05, "loss": 2.2644, "step": 4777500 }, { "epoch": 13.83, "learning_rate": 4.3087354402093375e-05, "loss": 2.2915, "step": 4778000 }, { "epoch": 13.83, "learning_rate": 4.30866307544461e-05, "loss": 2.2862, "step": 4778500 }, { "epoch": 13.83, "learning_rate": 4.308590710679882e-05, "loss": 2.2745, "step": 4779000 }, { "epoch": 13.83, "learning_rate": 4.308518345915154e-05, "loss": 2.2697, "step": 4779500 }, { "epoch": 13.84, "learning_rate": 4.3084459811504264e-05, "loss": 2.2838, "step": 4780000 }, { "epoch": 13.84, "learning_rate": 4.3083736163856986e-05, "loss": 2.2866, "step": 4780500 }, { "epoch": 13.84, "learning_rate": 4.308301251620971e-05, "loss": 2.2737, "step": 4781000 }, { "epoch": 13.84, "learning_rate": 4.308228886856243e-05, "loss": 2.2625, "step": 4781500 }, { "epoch": 13.84, "learning_rate": 4.3081566668210446e-05, "loss": 2.2764, "step": 4782000 }, { "epoch": 13.84, "learning_rate": 4.3080843020563175e-05, "loss": 2.2582, "step": 4782500 }, { "epoch": 13.84, "learning_rate": 4.308012082021119e-05, "loss": 2.2824, "step": 4783000 }, { "epoch": 13.85, "learning_rate": 4.307939717256391e-05, "loss": 2.284, "step": 4783500 }, { "epoch": 13.85, "learning_rate": 4.3078673524916635e-05, "loss": 2.2747, "step": 4784000 }, { "epoch": 13.85, "learning_rate": 4.3077949877269364e-05, "loss": 2.2632, "step": 4784500 }, { "epoch": 13.85, "learning_rate": 4.3077226229622086e-05, "loss": 2.2926, "step": 4785000 }, { "epoch": 13.85, "learning_rate": 4.30765040292701e-05, "loss": 2.2694, "step": 4785500 }, { "epoch": 13.85, "learning_rate": 4.3075780381622824e-05, "loss": 2.2513, "step": 4786000 }, { "epoch": 13.85, "learning_rate": 4.3075056733975546e-05, "loss": 2.2738, "step": 4786500 }, { "epoch": 13.86, "learning_rate": 4.3074333086328275e-05, "loss": 2.2731, "step": 4787000 }, { "epoch": 13.86, "learning_rate": 4.3073609438681e-05, "loss": 2.2747, "step": 4787500 }, { "epoch": 13.86, "learning_rate": 4.307288579103372e-05, "loss": 2.2591, "step": 4788000 }, { "epoch": 13.86, "learning_rate": 4.307216214338644e-05, "loss": 2.2876, "step": 4788500 }, { "epoch": 13.86, "learning_rate": 4.307143994303446e-05, "loss": 2.2841, "step": 4789000 }, { "epoch": 13.86, "learning_rate": 4.307071629538718e-05, "loss": 2.2569, "step": 4789500 }, { "epoch": 13.87, "learning_rate": 4.30699926477399e-05, "loss": 2.2644, "step": 4790000 }, { "epoch": 13.87, "learning_rate": 4.3069269000092624e-05, "loss": 2.2704, "step": 4790500 }, { "epoch": 13.87, "learning_rate": 4.306854535244535e-05, "loss": 2.2846, "step": 4791000 }, { "epoch": 13.87, "learning_rate": 4.306782315209337e-05, "loss": 2.2811, "step": 4791500 }, { "epoch": 13.87, "learning_rate": 4.30670995044461e-05, "loss": 2.2646, "step": 4792000 }, { "epoch": 13.87, "learning_rate": 4.306637585679882e-05, "loss": 2.2795, "step": 4792500 }, { "epoch": 13.87, "learning_rate": 4.306565220915154e-05, "loss": 2.2669, "step": 4793000 }, { "epoch": 13.88, "learning_rate": 4.3064928561504265e-05, "loss": 2.2824, "step": 4793500 }, { "epoch": 13.88, "learning_rate": 4.306420636115228e-05, "loss": 2.2805, "step": 4794000 }, { "epoch": 13.88, "learning_rate": 4.3063482713505e-05, "loss": 2.2904, "step": 4794500 }, { "epoch": 13.88, "learning_rate": 4.3062759065857725e-05, "loss": 2.2574, "step": 4795000 }, { "epoch": 13.88, "learning_rate": 4.306203541821045e-05, "loss": 2.284, "step": 4795500 }, { "epoch": 13.88, "learning_rate": 4.3061311770563176e-05, "loss": 2.2997, "step": 4796000 }, { "epoch": 13.88, "learning_rate": 4.30605881229159e-05, "loss": 2.2746, "step": 4796500 }, { "epoch": 13.89, "learning_rate": 4.305986447526862e-05, "loss": 2.2854, "step": 4797000 }, { "epoch": 13.89, "learning_rate": 4.3059142274916636e-05, "loss": 2.2553, "step": 4797500 }, { "epoch": 13.89, "learning_rate": 4.305841862726936e-05, "loss": 2.2775, "step": 4798000 }, { "epoch": 13.89, "learning_rate": 4.305769497962208e-05, "loss": 2.2602, "step": 4798500 }, { "epoch": 13.89, "learning_rate": 4.305697133197481e-05, "loss": 2.2821, "step": 4799000 }, { "epoch": 13.89, "learning_rate": 4.305624768432753e-05, "loss": 2.2815, "step": 4799500 }, { "epoch": 13.89, "learning_rate": 4.3055524036680254e-05, "loss": 2.2674, "step": 4800000 }, { "epoch": 13.9, "learning_rate": 4.3054800389032976e-05, "loss": 2.2836, "step": 4800500 }, { "epoch": 13.9, "learning_rate": 4.30540767413857e-05, "loss": 2.2613, "step": 4801000 }, { "epoch": 13.9, "learning_rate": 4.305335454103372e-05, "loss": 2.2871, "step": 4801500 }, { "epoch": 13.9, "learning_rate": 4.3052632340681736e-05, "loss": 2.2545, "step": 4802000 }, { "epoch": 13.9, "learning_rate": 4.305190869303446e-05, "loss": 2.2903, "step": 4802500 }, { "epoch": 13.9, "learning_rate": 4.305118504538718e-05, "loss": 2.2926, "step": 4803000 }, { "epoch": 13.9, "learning_rate": 4.30504613977399e-05, "loss": 2.2936, "step": 4803500 }, { "epoch": 13.91, "learning_rate": 4.3049737750092625e-05, "loss": 2.2763, "step": 4804000 }, { "epoch": 13.91, "learning_rate": 4.3049014102445354e-05, "loss": 2.2746, "step": 4804500 }, { "epoch": 13.91, "learning_rate": 4.3048290454798076e-05, "loss": 2.285, "step": 4805000 }, { "epoch": 13.91, "learning_rate": 4.30475668071508e-05, "loss": 2.2706, "step": 4805500 }, { "epoch": 13.91, "learning_rate": 4.3046844606798814e-05, "loss": 2.2733, "step": 4806000 }, { "epoch": 13.91, "learning_rate": 4.304612240644683e-05, "loss": 2.2703, "step": 4806500 }, { "epoch": 13.91, "learning_rate": 4.304539875879956e-05, "loss": 2.2733, "step": 4807000 }, { "epoch": 13.92, "learning_rate": 4.304467511115228e-05, "loss": 2.2758, "step": 4807500 }, { "epoch": 13.92, "learning_rate": 4.3043951463505e-05, "loss": 2.2726, "step": 4808000 }, { "epoch": 13.92, "learning_rate": 4.3043227815857725e-05, "loss": 2.2802, "step": 4808500 }, { "epoch": 13.92, "learning_rate": 4.3042504168210455e-05, "loss": 2.2676, "step": 4809000 }, { "epoch": 13.92, "learning_rate": 4.304178196785847e-05, "loss": 2.2781, "step": 4809500 }, { "epoch": 13.92, "learning_rate": 4.304105832021119e-05, "loss": 2.2561, "step": 4810000 }, { "epoch": 13.92, "learning_rate": 4.3040334672563915e-05, "loss": 2.2709, "step": 4810500 }, { "epoch": 13.93, "learning_rate": 4.303961102491664e-05, "loss": 2.2696, "step": 4811000 }, { "epoch": 13.93, "learning_rate": 4.303888737726936e-05, "loss": 2.2925, "step": 4811500 }, { "epoch": 13.93, "learning_rate": 4.3038165176917375e-05, "loss": 2.2701, "step": 4812000 }, { "epoch": 13.93, "learning_rate": 4.3037441529270104e-05, "loss": 2.2899, "step": 4812500 }, { "epoch": 13.93, "learning_rate": 4.3036717881622826e-05, "loss": 2.3022, "step": 4813000 }, { "epoch": 13.93, "learning_rate": 4.303599423397555e-05, "loss": 2.291, "step": 4813500 }, { "epoch": 13.93, "learning_rate": 4.303527058632827e-05, "loss": 2.2791, "step": 4814000 }, { "epoch": 13.94, "learning_rate": 4.3034546938681e-05, "loss": 2.2668, "step": 4814500 }, { "epoch": 13.94, "learning_rate": 4.303382329103372e-05, "loss": 2.2997, "step": 4815000 }, { "epoch": 13.94, "learning_rate": 4.3033099643386444e-05, "loss": 2.275, "step": 4815500 }, { "epoch": 13.94, "learning_rate": 4.3032375995739166e-05, "loss": 2.2564, "step": 4816000 }, { "epoch": 13.94, "learning_rate": 4.303165234809189e-05, "loss": 2.2745, "step": 4816500 }, { "epoch": 13.94, "learning_rate": 4.303092870044461e-05, "loss": 2.2821, "step": 4817000 }, { "epoch": 13.94, "learning_rate": 4.3030206500092626e-05, "loss": 2.2778, "step": 4817500 }, { "epoch": 13.95, "learning_rate": 4.3029482852445355e-05, "loss": 2.2659, "step": 4818000 }, { "epoch": 13.95, "learning_rate": 4.302875920479808e-05, "loss": 2.2587, "step": 4818500 }, { "epoch": 13.95, "learning_rate": 4.30280355571508e-05, "loss": 2.2622, "step": 4819000 }, { "epoch": 13.95, "learning_rate": 4.3027313356798815e-05, "loss": 2.2939, "step": 4819500 }, { "epoch": 13.95, "learning_rate": 4.302658970915154e-05, "loss": 2.281, "step": 4820000 }, { "epoch": 13.95, "learning_rate": 4.302586895609485e-05, "loss": 2.27, "step": 4820500 }, { "epoch": 13.95, "learning_rate": 4.3025145308447575e-05, "loss": 2.2504, "step": 4821000 }, { "epoch": 13.96, "learning_rate": 4.30244216608003e-05, "loss": 2.2966, "step": 4821500 }, { "epoch": 13.96, "learning_rate": 4.3023698013153026e-05, "loss": 2.2679, "step": 4822000 }, { "epoch": 13.96, "learning_rate": 4.302297436550575e-05, "loss": 2.2972, "step": 4822500 }, { "epoch": 13.96, "learning_rate": 4.302225071785847e-05, "loss": 2.2832, "step": 4823000 }, { "epoch": 13.96, "learning_rate": 4.302152707021119e-05, "loss": 2.2686, "step": 4823500 }, { "epoch": 13.96, "learning_rate": 4.3020803422563915e-05, "loss": 2.2594, "step": 4824000 }, { "epoch": 13.96, "learning_rate": 4.302007977491664e-05, "loss": 2.272, "step": 4824500 }, { "epoch": 13.97, "learning_rate": 4.301935612726936e-05, "loss": 2.2712, "step": 4825000 }, { "epoch": 13.97, "learning_rate": 4.301863247962208e-05, "loss": 2.2631, "step": 4825500 }, { "epoch": 13.97, "learning_rate": 4.3017908831974804e-05, "loss": 2.2726, "step": 4826000 }, { "epoch": 13.97, "learning_rate": 4.3017186631622827e-05, "loss": 2.2735, "step": 4826500 }, { "epoch": 13.97, "learning_rate": 4.301646298397555e-05, "loss": 2.2888, "step": 4827000 }, { "epoch": 13.97, "learning_rate": 4.301573933632827e-05, "loss": 2.2658, "step": 4827500 }, { "epoch": 13.98, "learning_rate": 4.301501568868099e-05, "loss": 2.2752, "step": 4828000 }, { "epoch": 13.98, "learning_rate": 4.301429348832901e-05, "loss": 2.2508, "step": 4828500 }, { "epoch": 13.98, "learning_rate": 4.301356984068173e-05, "loss": 2.2607, "step": 4829000 }, { "epoch": 13.98, "learning_rate": 4.301284619303446e-05, "loss": 2.2677, "step": 4829500 }, { "epoch": 13.98, "learning_rate": 4.301212254538718e-05, "loss": 2.278, "step": 4830000 }, { "epoch": 13.98, "learning_rate": 4.3011398897739905e-05, "loss": 2.2704, "step": 4830500 }, { "epoch": 13.98, "learning_rate": 4.301067669738793e-05, "loss": 2.2717, "step": 4831000 }, { "epoch": 13.99, "learning_rate": 4.300995304974065e-05, "loss": 2.2851, "step": 4831500 }, { "epoch": 13.99, "learning_rate": 4.300922940209337e-05, "loss": 2.2581, "step": 4832000 }, { "epoch": 13.99, "learning_rate": 4.3008505754446094e-05, "loss": 2.2865, "step": 4832500 }, { "epoch": 13.99, "learning_rate": 4.3007782106798816e-05, "loss": 2.2853, "step": 4833000 }, { "epoch": 13.99, "learning_rate": 4.300705845915154e-05, "loss": 2.2573, "step": 4833500 }, { "epoch": 13.99, "learning_rate": 4.300633481150426e-05, "loss": 2.2787, "step": 4834000 }, { "epoch": 13.99, "learning_rate": 4.300561116385698e-05, "loss": 2.276, "step": 4834500 }, { "epoch": 14.0, "learning_rate": 4.3004888963505005e-05, "loss": 2.2563, "step": 4835000 }, { "epoch": 14.0, "learning_rate": 4.300416531585773e-05, "loss": 2.2761, "step": 4835500 }, { "epoch": 14.0, "learning_rate": 4.300344166821045e-05, "loss": 2.2709, "step": 4836000 }, { "epoch": 14.0, "learning_rate": 4.300271802056318e-05, "loss": 2.2776, "step": 4836500 }, { "epoch": 14.0, "eval_accuracy": 0.653478279659735, "eval_accuracy_mlm": 0.6164064549706573, "eval_accuracy_nsp": 0.8522890285206216, "eval_loss": 2.277855396270752, "eval_runtime": 330.582, "eval_samples_per_second": 1320.054, "eval_steps_per_second": 55.003, "step": 4836608 }, { "epoch": 14.0, "learning_rate": 4.30019943729159e-05, "loss": 2.2775, "step": 4837000 }, { "epoch": 14.0, "learning_rate": 4.300127072526862e-05, "loss": 2.2426, "step": 4837500 }, { "epoch": 14.0, "learning_rate": 4.300054997221193e-05, "loss": 2.2506, "step": 4838000 }, { "epoch": 14.01, "learning_rate": 4.2999826324564654e-05, "loss": 2.2419, "step": 4838500 }, { "epoch": 14.01, "learning_rate": 4.2999104124212676e-05, "loss": 2.2666, "step": 4839000 }, { "epoch": 14.01, "learning_rate": 4.29983804765654e-05, "loss": 2.2439, "step": 4839500 }, { "epoch": 14.01, "learning_rate": 4.2997658276213414e-05, "loss": 2.2649, "step": 4840000 }, { "epoch": 14.01, "learning_rate": 4.2996934628566136e-05, "loss": 2.2709, "step": 4840500 }, { "epoch": 14.01, "learning_rate": 4.299621098091886e-05, "loss": 2.2522, "step": 4841000 }, { "epoch": 14.01, "learning_rate": 4.299548733327158e-05, "loss": 2.2533, "step": 4841500 }, { "epoch": 14.02, "learning_rate": 4.29947636856243e-05, "loss": 2.2649, "step": 4842000 }, { "epoch": 14.02, "learning_rate": 4.299404003797703e-05, "loss": 2.2809, "step": 4842500 }, { "epoch": 14.02, "learning_rate": 4.2993316390329754e-05, "loss": 2.2559, "step": 4843000 }, { "epoch": 14.02, "learning_rate": 4.2992592742682476e-05, "loss": 2.2673, "step": 4843500 }, { "epoch": 14.02, "learning_rate": 4.29918690950352e-05, "loss": 2.2553, "step": 4844000 }, { "epoch": 14.02, "learning_rate": 4.299114544738793e-05, "loss": 2.2555, "step": 4844500 }, { "epoch": 14.02, "learning_rate": 4.299042179974065e-05, "loss": 2.2878, "step": 4845000 }, { "epoch": 14.03, "learning_rate": 4.298969815209337e-05, "loss": 2.2563, "step": 4845500 }, { "epoch": 14.03, "learning_rate": 4.2988974504446094e-05, "loss": 2.2662, "step": 4846000 }, { "epoch": 14.03, "learning_rate": 4.298825085679882e-05, "loss": 2.2632, "step": 4846500 }, { "epoch": 14.03, "learning_rate": 4.298752865644683e-05, "loss": 2.2494, "step": 4847000 }, { "epoch": 14.03, "learning_rate": 4.2986805008799554e-05, "loss": 2.2687, "step": 4847500 }, { "epoch": 14.03, "learning_rate": 4.2986081361152283e-05, "loss": 2.2364, "step": 4848000 }, { "epoch": 14.03, "learning_rate": 4.2985357713505006e-05, "loss": 2.2455, "step": 4848500 }, { "epoch": 14.04, "learning_rate": 4.298463406585773e-05, "loss": 2.2464, "step": 4849000 }, { "epoch": 14.04, "learning_rate": 4.2983911865505743e-05, "loss": 2.2709, "step": 4849500 }, { "epoch": 14.04, "learning_rate": 4.2983188217858466e-05, "loss": 2.2592, "step": 4850000 }, { "epoch": 14.04, "learning_rate": 4.298246457021119e-05, "loss": 2.2477, "step": 4850500 }, { "epoch": 14.04, "learning_rate": 4.2981743817154504e-05, "loss": 2.2437, "step": 4851000 }, { "epoch": 14.04, "learning_rate": 4.2981020169507226e-05, "loss": 2.2457, "step": 4851500 }, { "epoch": 14.04, "learning_rate": 4.2980296521859955e-05, "loss": 2.2576, "step": 4852000 }, { "epoch": 14.05, "learning_rate": 4.297957287421268e-05, "loss": 2.2443, "step": 4852500 }, { "epoch": 14.05, "learning_rate": 4.29788492265654e-05, "loss": 2.2281, "step": 4853000 }, { "epoch": 14.05, "learning_rate": 4.297812557891812e-05, "loss": 2.2646, "step": 4853500 }, { "epoch": 14.05, "learning_rate": 4.2977401931270844e-05, "loss": 2.2407, "step": 4854000 }, { "epoch": 14.05, "learning_rate": 4.2976678283623566e-05, "loss": 2.2483, "step": 4854500 }, { "epoch": 14.05, "learning_rate": 4.297595463597629e-05, "loss": 2.2245, "step": 4855000 }, { "epoch": 14.05, "learning_rate": 4.297523098832901e-05, "loss": 2.2487, "step": 4855500 }, { "epoch": 14.06, "learning_rate": 4.297450734068173e-05, "loss": 2.2558, "step": 4856000 }, { "epoch": 14.06, "learning_rate": 4.297378369303446e-05, "loss": 2.237, "step": 4856500 }, { "epoch": 14.06, "learning_rate": 4.2973060045387184e-05, "loss": 2.2709, "step": 4857000 }, { "epoch": 14.06, "learning_rate": 4.29723378450352e-05, "loss": 2.2585, "step": 4857500 }, { "epoch": 14.06, "learning_rate": 4.2971615644683215e-05, "loss": 2.2751, "step": 4858000 }, { "epoch": 14.06, "learning_rate": 4.297089199703594e-05, "loss": 2.2553, "step": 4858500 }, { "epoch": 14.06, "learning_rate": 4.297016834938866e-05, "loss": 2.2256, "step": 4859000 }, { "epoch": 14.07, "learning_rate": 4.296944470174139e-05, "loss": 2.2747, "step": 4859500 }, { "epoch": 14.07, "learning_rate": 4.296872105409411e-05, "loss": 2.2526, "step": 4860000 }, { "epoch": 14.07, "learning_rate": 4.296799740644683e-05, "loss": 2.2707, "step": 4860500 }, { "epoch": 14.07, "learning_rate": 4.2967275206094855e-05, "loss": 2.26, "step": 4861000 }, { "epoch": 14.07, "learning_rate": 4.296655155844758e-05, "loss": 2.2739, "step": 4861500 }, { "epoch": 14.07, "learning_rate": 4.29658279108003e-05, "loss": 2.2471, "step": 4862000 }, { "epoch": 14.07, "learning_rate": 4.296510426315302e-05, "loss": 2.2587, "step": 4862500 }, { "epoch": 14.08, "learning_rate": 4.2964380615505744e-05, "loss": 2.2462, "step": 4863000 }, { "epoch": 14.08, "learning_rate": 4.2963656967858466e-05, "loss": 2.2616, "step": 4863500 }, { "epoch": 14.08, "learning_rate": 4.296293332021119e-05, "loss": 2.2446, "step": 4864000 }, { "epoch": 14.08, "learning_rate": 4.296220967256391e-05, "loss": 2.2519, "step": 4864500 }, { "epoch": 14.08, "learning_rate": 4.296148602491663e-05, "loss": 2.2551, "step": 4865000 }, { "epoch": 14.08, "learning_rate": 4.2960763824564655e-05, "loss": 2.2667, "step": 4865500 }, { "epoch": 14.09, "learning_rate": 4.296004017691738e-05, "loss": 2.274, "step": 4866000 }, { "epoch": 14.09, "learning_rate": 4.29593165292701e-05, "loss": 2.2634, "step": 4866500 }, { "epoch": 14.09, "learning_rate": 4.295859288162283e-05, "loss": 2.2542, "step": 4867000 }, { "epoch": 14.09, "learning_rate": 4.295786923397555e-05, "loss": 2.2569, "step": 4867500 }, { "epoch": 14.09, "learning_rate": 4.2957145586328273e-05, "loss": 2.2549, "step": 4868000 }, { "epoch": 14.09, "learning_rate": 4.295642338597629e-05, "loss": 2.2673, "step": 4868500 }, { "epoch": 14.09, "learning_rate": 4.295569973832901e-05, "loss": 2.271, "step": 4869000 }, { "epoch": 14.1, "learning_rate": 4.2954976090681733e-05, "loss": 2.2398, "step": 4869500 }, { "epoch": 14.1, "learning_rate": 4.295425244303446e-05, "loss": 2.2488, "step": 4870000 }, { "epoch": 14.1, "learning_rate": 4.2953528795387185e-05, "loss": 2.2727, "step": 4870500 }, { "epoch": 14.1, "learning_rate": 4.295280514773991e-05, "loss": 2.2654, "step": 4871000 }, { "epoch": 14.1, "learning_rate": 4.295208728927381e-05, "loss": 2.2834, "step": 4871500 }, { "epoch": 14.1, "learning_rate": 4.2951365088921825e-05, "loss": 2.2653, "step": 4872000 }, { "epoch": 14.1, "learning_rate": 4.295064144127455e-05, "loss": 2.2705, "step": 4872500 }, { "epoch": 14.11, "learning_rate": 4.294991779362727e-05, "loss": 2.2383, "step": 4873000 }, { "epoch": 14.11, "learning_rate": 4.294919414597999e-05, "loss": 2.2782, "step": 4873500 }, { "epoch": 14.11, "learning_rate": 4.2948470498332714e-05, "loss": 2.2523, "step": 4874000 }, { "epoch": 14.11, "learning_rate": 4.2947746850685436e-05, "loss": 2.2494, "step": 4874500 }, { "epoch": 14.11, "learning_rate": 4.2947023203038165e-05, "loss": 2.2346, "step": 4875000 }, { "epoch": 14.11, "learning_rate": 4.294629955539089e-05, "loss": 2.2436, "step": 4875500 }, { "epoch": 14.11, "learning_rate": 4.294557590774361e-05, "loss": 2.2569, "step": 4876000 }, { "epoch": 14.12, "learning_rate": 4.294485226009634e-05, "loss": 2.2736, "step": 4876500 }, { "epoch": 14.12, "learning_rate": 4.294412861244906e-05, "loss": 2.2727, "step": 4877000 }, { "epoch": 14.12, "learning_rate": 4.294340496480178e-05, "loss": 2.2461, "step": 4877500 }, { "epoch": 14.12, "learning_rate": 4.2942681317154505e-05, "loss": 2.2486, "step": 4878000 }, { "epoch": 14.12, "learning_rate": 4.294195766950723e-05, "loss": 2.2639, "step": 4878500 }, { "epoch": 14.12, "learning_rate": 4.294123402185995e-05, "loss": 2.2661, "step": 4879000 }, { "epoch": 14.12, "learning_rate": 4.294051037421267e-05, "loss": 2.2598, "step": 4879500 }, { "epoch": 14.13, "learning_rate": 4.2939786726565394e-05, "loss": 2.2551, "step": 4880000 }, { "epoch": 14.13, "learning_rate": 4.2939063078918116e-05, "loss": 2.2608, "step": 4880500 }, { "epoch": 14.13, "learning_rate": 4.293833943127084e-05, "loss": 2.2422, "step": 4881000 }, { "epoch": 14.13, "learning_rate": 4.293761723091886e-05, "loss": 2.257, "step": 4881500 }, { "epoch": 14.13, "learning_rate": 4.293689358327159e-05, "loss": 2.2635, "step": 4882000 }, { "epoch": 14.13, "learning_rate": 4.293616993562431e-05, "loss": 2.2609, "step": 4882500 }, { "epoch": 14.13, "learning_rate": 4.2935446287977034e-05, "loss": 2.2586, "step": 4883000 }, { "epoch": 14.14, "learning_rate": 4.2934722640329757e-05, "loss": 2.2747, "step": 4883500 }, { "epoch": 14.14, "learning_rate": 4.293399899268248e-05, "loss": 2.2482, "step": 4884000 }, { "epoch": 14.14, "learning_rate": 4.29332753450352e-05, "loss": 2.2633, "step": 4884500 }, { "epoch": 14.14, "learning_rate": 4.293255169738792e-05, "loss": 2.2682, "step": 4885000 }, { "epoch": 14.14, "learning_rate": 4.2931828049740646e-05, "loss": 2.2665, "step": 4885500 }, { "epoch": 14.14, "learning_rate": 4.293110440209337e-05, "loss": 2.2612, "step": 4886000 }, { "epoch": 14.14, "learning_rate": 4.293038075444609e-05, "loss": 2.2512, "step": 4886500 }, { "epoch": 14.15, "learning_rate": 4.292965710679881e-05, "loss": 2.2718, "step": 4887000 }, { "epoch": 14.15, "learning_rate": 4.292893345915154e-05, "loss": 2.2785, "step": 4887500 }, { "epoch": 14.15, "learning_rate": 4.292821125879956e-05, "loss": 2.2659, "step": 4888000 }, { "epoch": 14.15, "learning_rate": 4.292748761115228e-05, "loss": 2.2646, "step": 4888500 }, { "epoch": 14.15, "learning_rate": 4.2926763963505e-05, "loss": 2.2759, "step": 4889000 }, { "epoch": 14.15, "learning_rate": 4.292604031585773e-05, "loss": 2.252, "step": 4889500 }, { "epoch": 14.15, "learning_rate": 4.292531666821045e-05, "loss": 2.2559, "step": 4890000 }, { "epoch": 14.16, "learning_rate": 4.292459446785847e-05, "loss": 2.2335, "step": 4890500 }, { "epoch": 14.16, "learning_rate": 4.292387082021119e-05, "loss": 2.2609, "step": 4891000 }, { "epoch": 14.16, "learning_rate": 4.292314717256391e-05, "loss": 2.2525, "step": 4891500 }, { "epoch": 14.16, "learning_rate": 4.292242352491664e-05, "loss": 2.2755, "step": 4892000 }, { "epoch": 14.16, "learning_rate": 4.2921699877269364e-05, "loss": 2.2321, "step": 4892500 }, { "epoch": 14.16, "learning_rate": 4.2920976229622086e-05, "loss": 2.2729, "step": 4893000 }, { "epoch": 14.16, "learning_rate": 4.292025258197481e-05, "loss": 2.2363, "step": 4893500 }, { "epoch": 14.17, "learning_rate": 4.2919530381622824e-05, "loss": 2.2611, "step": 4894000 }, { "epoch": 14.17, "learning_rate": 4.2918806733975546e-05, "loss": 2.2463, "step": 4894500 }, { "epoch": 14.17, "learning_rate": 4.291808308632827e-05, "loss": 2.2788, "step": 4895000 }, { "epoch": 14.17, "learning_rate": 4.291735943868099e-05, "loss": 2.2536, "step": 4895500 }, { "epoch": 14.17, "learning_rate": 4.291663579103371e-05, "loss": 2.2386, "step": 4896000 }, { "epoch": 14.17, "learning_rate": 4.2915913590681735e-05, "loss": 2.2631, "step": 4896500 }, { "epoch": 14.17, "learning_rate": 4.291519139032976e-05, "loss": 2.2716, "step": 4897000 }, { "epoch": 14.18, "learning_rate": 4.291446774268248e-05, "loss": 2.2568, "step": 4897500 }, { "epoch": 14.18, "learning_rate": 4.29137440950352e-05, "loss": 2.2518, "step": 4898000 }, { "epoch": 14.18, "learning_rate": 4.2913020447387924e-05, "loss": 2.2475, "step": 4898500 }, { "epoch": 14.18, "learning_rate": 4.291229824703594e-05, "loss": 2.2482, "step": 4899000 }, { "epoch": 14.18, "learning_rate": 4.291157459938867e-05, "loss": 2.2677, "step": 4899500 }, { "epoch": 14.18, "learning_rate": 4.291085095174139e-05, "loss": 2.267, "step": 4900000 }, { "epoch": 14.18, "learning_rate": 4.2910128751389406e-05, "loss": 2.242, "step": 4900500 }, { "epoch": 14.19, "learning_rate": 4.290940510374213e-05, "loss": 2.2446, "step": 4901000 }, { "epoch": 14.19, "learning_rate": 4.290868145609485e-05, "loss": 2.2559, "step": 4901500 }, { "epoch": 14.19, "learning_rate": 4.290795780844757e-05, "loss": 2.2736, "step": 4902000 }, { "epoch": 14.19, "learning_rate": 4.2907234160800295e-05, "loss": 2.2744, "step": 4902500 }, { "epoch": 14.19, "learning_rate": 4.290651051315302e-05, "loss": 2.2706, "step": 4903000 }, { "epoch": 14.19, "learning_rate": 4.290578686550574e-05, "loss": 2.2342, "step": 4903500 }, { "epoch": 14.2, "learning_rate": 4.290506321785847e-05, "loss": 2.2524, "step": 4904000 }, { "epoch": 14.2, "learning_rate": 4.290433957021119e-05, "loss": 2.2515, "step": 4904500 }, { "epoch": 14.2, "learning_rate": 4.290361592256392e-05, "loss": 2.2523, "step": 4905000 }, { "epoch": 14.2, "learning_rate": 4.290289227491664e-05, "loss": 2.2821, "step": 4905500 }, { "epoch": 14.2, "learning_rate": 4.2902168627269365e-05, "loss": 2.2562, "step": 4906000 }, { "epoch": 14.2, "learning_rate": 4.290144497962209e-05, "loss": 2.2786, "step": 4906500 }, { "epoch": 14.2, "learning_rate": 4.29007227792701e-05, "loss": 2.2454, "step": 4907000 }, { "epoch": 14.21, "learning_rate": 4.2899999131622825e-05, "loss": 2.2554, "step": 4907500 }, { "epoch": 14.21, "learning_rate": 4.289927548397555e-05, "loss": 2.2519, "step": 4908000 }, { "epoch": 14.21, "learning_rate": 4.289855328362357e-05, "loss": 2.2788, "step": 4908500 }, { "epoch": 14.21, "learning_rate": 4.2897831083271585e-05, "loss": 2.2629, "step": 4909000 }, { "epoch": 14.21, "learning_rate": 4.289710743562431e-05, "loss": 2.2675, "step": 4909500 }, { "epoch": 14.21, "learning_rate": 4.289638378797703e-05, "loss": 2.2591, "step": 4910000 }, { "epoch": 14.21, "learning_rate": 4.289566014032975e-05, "loss": 2.2465, "step": 4910500 }, { "epoch": 14.22, "learning_rate": 4.2894936492682474e-05, "loss": 2.2797, "step": 4911000 }, { "epoch": 14.22, "learning_rate": 4.2894212845035196e-05, "loss": 2.2561, "step": 4911500 }, { "epoch": 14.22, "learning_rate": 4.2893489197387925e-05, "loss": 2.2525, "step": 4912000 }, { "epoch": 14.22, "learning_rate": 4.289276554974065e-05, "loss": 2.2678, "step": 4912500 }, { "epoch": 14.22, "learning_rate": 4.289204334938867e-05, "loss": 2.2369, "step": 4913000 }, { "epoch": 14.22, "learning_rate": 4.289131970174139e-05, "loss": 2.2724, "step": 4913500 }, { "epoch": 14.22, "learning_rate": 4.289059750138941e-05, "loss": 2.2833, "step": 4914000 }, { "epoch": 14.23, "learning_rate": 4.288987385374213e-05, "loss": 2.2549, "step": 4914500 }, { "epoch": 14.23, "learning_rate": 4.288915020609485e-05, "loss": 2.2486, "step": 4915000 }, { "epoch": 14.23, "learning_rate": 4.2888426558447574e-05, "loss": 2.2322, "step": 4915500 }, { "epoch": 14.23, "learning_rate": 4.2887702910800296e-05, "loss": 2.2576, "step": 4916000 }, { "epoch": 14.23, "learning_rate": 4.288697926315302e-05, "loss": 2.2663, "step": 4916500 }, { "epoch": 14.23, "learning_rate": 4.288625561550574e-05, "loss": 2.2626, "step": 4917000 }, { "epoch": 14.23, "learning_rate": 4.288553196785847e-05, "loss": 2.2496, "step": 4917500 }, { "epoch": 14.24, "learning_rate": 4.288480832021119e-05, "loss": 2.2683, "step": 4918000 }, { "epoch": 14.24, "learning_rate": 4.288408611985921e-05, "loss": 2.2781, "step": 4918500 }, { "epoch": 14.24, "learning_rate": 4.288336247221193e-05, "loss": 2.2616, "step": 4919000 }, { "epoch": 14.24, "learning_rate": 4.288263882456466e-05, "loss": 2.2706, "step": 4919500 }, { "epoch": 14.24, "learning_rate": 4.288191517691738e-05, "loss": 2.2512, "step": 4920000 }, { "epoch": 14.24, "learning_rate": 4.2881192976565396e-05, "loss": 2.2431, "step": 4920500 }, { "epoch": 14.24, "learning_rate": 4.288046932891812e-05, "loss": 2.2734, "step": 4921000 }, { "epoch": 14.25, "learning_rate": 4.287974712856614e-05, "loss": 2.2729, "step": 4921500 }, { "epoch": 14.25, "learning_rate": 4.287902348091886e-05, "loss": 2.2623, "step": 4922000 }, { "epoch": 14.25, "learning_rate": 4.2878299833271586e-05, "loss": 2.2641, "step": 4922500 }, { "epoch": 14.25, "learning_rate": 4.287757618562431e-05, "loss": 2.2766, "step": 4923000 }, { "epoch": 14.25, "learning_rate": 4.287685253797703e-05, "loss": 2.2703, "step": 4923500 }, { "epoch": 14.25, "learning_rate": 4.287612889032975e-05, "loss": 2.2897, "step": 4924000 }, { "epoch": 14.25, "learning_rate": 4.2875405242682474e-05, "loss": 2.2586, "step": 4924500 }, { "epoch": 14.26, "learning_rate": 4.28746815950352e-05, "loss": 2.265, "step": 4925000 }, { "epoch": 14.26, "learning_rate": 4.287395794738792e-05, "loss": 2.2629, "step": 4925500 }, { "epoch": 14.26, "learning_rate": 4.287323429974064e-05, "loss": 2.2591, "step": 4926000 }, { "epoch": 14.26, "learning_rate": 4.287251065209337e-05, "loss": 2.3009, "step": 4926500 }, { "epoch": 14.26, "learning_rate": 4.287178700444609e-05, "loss": 2.2629, "step": 4927000 }, { "epoch": 14.26, "learning_rate": 4.287106335679882e-05, "loss": 2.2664, "step": 4927500 }, { "epoch": 14.26, "learning_rate": 4.2870339709151544e-05, "loss": 2.263, "step": 4928000 }, { "epoch": 14.27, "learning_rate": 4.2869616061504266e-05, "loss": 2.2464, "step": 4928500 }, { "epoch": 14.27, "learning_rate": 4.286889241385699e-05, "loss": 2.2611, "step": 4929000 }, { "epoch": 14.27, "learning_rate": 4.2868170213505004e-05, "loss": 2.2501, "step": 4929500 }, { "epoch": 14.27, "learning_rate": 4.2867446565857726e-05, "loss": 2.2341, "step": 4930000 }, { "epoch": 14.27, "learning_rate": 4.286672291821045e-05, "loss": 2.2756, "step": 4930500 }, { "epoch": 14.27, "learning_rate": 4.286599927056317e-05, "loss": 2.2666, "step": 4931000 }, { "epoch": 14.27, "learning_rate": 4.286527562291589e-05, "loss": 2.2614, "step": 4931500 }, { "epoch": 14.28, "learning_rate": 4.2864553422563915e-05, "loss": 2.2504, "step": 4932000 }, { "epoch": 14.28, "learning_rate": 4.286382977491664e-05, "loss": 2.261, "step": 4932500 }, { "epoch": 14.28, "learning_rate": 4.286310612726936e-05, "loss": 2.2912, "step": 4933000 }, { "epoch": 14.28, "learning_rate": 4.286238247962208e-05, "loss": 2.2786, "step": 4933500 }, { "epoch": 14.28, "learning_rate": 4.28616602792701e-05, "loss": 2.2671, "step": 4934000 }, { "epoch": 14.28, "learning_rate": 4.2860936631622826e-05, "loss": 2.2422, "step": 4934500 }, { "epoch": 14.28, "learning_rate": 4.286021298397555e-05, "loss": 2.2527, "step": 4935000 }, { "epoch": 14.29, "learning_rate": 4.285948933632827e-05, "loss": 2.2785, "step": 4935500 }, { "epoch": 14.29, "learning_rate": 4.2858765688681e-05, "loss": 2.2773, "step": 4936000 }, { "epoch": 14.29, "learning_rate": 4.285804204103372e-05, "loss": 2.2619, "step": 4936500 }, { "epoch": 14.29, "learning_rate": 4.285731984068174e-05, "loss": 2.2637, "step": 4937000 }, { "epoch": 14.29, "learning_rate": 4.285659764032975e-05, "loss": 2.2476, "step": 4937500 }, { "epoch": 14.29, "learning_rate": 4.285587543997777e-05, "loss": 2.2484, "step": 4938000 }, { "epoch": 14.29, "learning_rate": 4.28551517923305e-05, "loss": 2.2544, "step": 4938500 }, { "epoch": 14.3, "learning_rate": 4.285442814468322e-05, "loss": 2.2723, "step": 4939000 }, { "epoch": 14.3, "learning_rate": 4.285370449703594e-05, "loss": 2.2459, "step": 4939500 }, { "epoch": 14.3, "learning_rate": 4.2852980849388664e-05, "loss": 2.2866, "step": 4940000 }, { "epoch": 14.3, "learning_rate": 4.2852257201741386e-05, "loss": 2.2489, "step": 4940500 }, { "epoch": 14.3, "learning_rate": 4.285153355409411e-05, "loss": 2.245, "step": 4941000 }, { "epoch": 14.3, "learning_rate": 4.285080990644683e-05, "loss": 2.2825, "step": 4941500 }, { "epoch": 14.31, "learning_rate": 4.285008770609485e-05, "loss": 2.2652, "step": 4942000 }, { "epoch": 14.31, "learning_rate": 4.2849364058447576e-05, "loss": 2.2722, "step": 4942500 }, { "epoch": 14.31, "learning_rate": 4.28486404108003e-05, "loss": 2.2555, "step": 4943000 }, { "epoch": 14.31, "learning_rate": 4.284791676315302e-05, "loss": 2.2733, "step": 4943500 }, { "epoch": 14.31, "learning_rate": 4.284719311550575e-05, "loss": 2.2719, "step": 4944000 }, { "epoch": 14.31, "learning_rate": 4.284646946785847e-05, "loss": 2.2711, "step": 4944500 }, { "epoch": 14.31, "learning_rate": 4.2845745820211193e-05, "loss": 2.2535, "step": 4945000 }, { "epoch": 14.32, "learning_rate": 4.2845022172563916e-05, "loss": 2.2521, "step": 4945500 }, { "epoch": 14.32, "learning_rate": 4.284429852491664e-05, "loss": 2.2566, "step": 4946000 }, { "epoch": 14.32, "learning_rate": 4.2843576324564653e-05, "loss": 2.2484, "step": 4946500 }, { "epoch": 14.32, "learning_rate": 4.2842852676917376e-05, "loss": 2.264, "step": 4947000 }, { "epoch": 14.32, "learning_rate": 4.28421290292701e-05, "loss": 2.2547, "step": 4947500 }, { "epoch": 14.32, "learning_rate": 4.284140538162282e-05, "loss": 2.2833, "step": 4948000 }, { "epoch": 14.32, "learning_rate": 4.284068173397555e-05, "loss": 2.2675, "step": 4948500 }, { "epoch": 14.33, "learning_rate": 4.283996098091886e-05, "loss": 2.2549, "step": 4949000 }, { "epoch": 14.33, "learning_rate": 4.283923733327159e-05, "loss": 2.2659, "step": 4949500 }, { "epoch": 14.33, "learning_rate": 4.283851368562431e-05, "loss": 2.2447, "step": 4950000 }, { "epoch": 14.33, "learning_rate": 4.283779003797703e-05, "loss": 2.2558, "step": 4950500 }, { "epoch": 14.33, "learning_rate": 4.2837066390329754e-05, "loss": 2.2728, "step": 4951000 }, { "epoch": 14.33, "learning_rate": 4.2836342742682476e-05, "loss": 2.2641, "step": 4951500 }, { "epoch": 14.33, "learning_rate": 4.28356190950352e-05, "loss": 2.2536, "step": 4952000 }, { "epoch": 14.34, "learning_rate": 4.283489689468322e-05, "loss": 2.2596, "step": 4952500 }, { "epoch": 14.34, "learning_rate": 4.283417324703594e-05, "loss": 2.2655, "step": 4953000 }, { "epoch": 14.34, "learning_rate": 4.2833449599388665e-05, "loss": 2.257, "step": 4953500 }, { "epoch": 14.34, "learning_rate": 4.283272595174139e-05, "loss": 2.2724, "step": 4954000 }, { "epoch": 14.34, "learning_rate": 4.283200230409411e-05, "loss": 2.2617, "step": 4954500 }, { "epoch": 14.34, "learning_rate": 4.283127865644683e-05, "loss": 2.2337, "step": 4955000 }, { "epoch": 14.34, "learning_rate": 4.2830555008799554e-05, "loss": 2.2571, "step": 4955500 }, { "epoch": 14.35, "learning_rate": 4.2829831361152276e-05, "loss": 2.2746, "step": 4956000 }, { "epoch": 14.35, "learning_rate": 4.28291091608003e-05, "loss": 2.2712, "step": 4956500 }, { "epoch": 14.35, "learning_rate": 4.282838551315303e-05, "loss": 2.2425, "step": 4957000 }, { "epoch": 14.35, "learning_rate": 4.282766186550575e-05, "loss": 2.2694, "step": 4957500 }, { "epoch": 14.35, "learning_rate": 4.282693821785847e-05, "loss": 2.2408, "step": 4958000 }, { "epoch": 14.35, "learning_rate": 4.2826214570211194e-05, "loss": 2.257, "step": 4958500 }, { "epoch": 14.35, "learning_rate": 4.2825490922563917e-05, "loss": 2.257, "step": 4959000 }, { "epoch": 14.36, "learning_rate": 4.282476727491664e-05, "loss": 2.2935, "step": 4959500 }, { "epoch": 14.36, "learning_rate": 4.282404362726936e-05, "loss": 2.2777, "step": 4960000 }, { "epoch": 14.36, "learning_rate": 4.282331997962208e-05, "loss": 2.2742, "step": 4960500 }, { "epoch": 14.36, "learning_rate": 4.28225977792701e-05, "loss": 2.2781, "step": 4961000 }, { "epoch": 14.36, "learning_rate": 4.282187413162283e-05, "loss": 2.2684, "step": 4961500 }, { "epoch": 14.36, "learning_rate": 4.282115048397555e-05, "loss": 2.2671, "step": 4962000 }, { "epoch": 14.36, "learning_rate": 4.282042683632827e-05, "loss": 2.2783, "step": 4962500 }, { "epoch": 14.37, "learning_rate": 4.2819703188680994e-05, "loss": 2.2531, "step": 4963000 }, { "epoch": 14.37, "learning_rate": 4.281897954103372e-05, "loss": 2.2707, "step": 4963500 }, { "epoch": 14.37, "learning_rate": 4.281825734068173e-05, "loss": 2.2549, "step": 4964000 }, { "epoch": 14.37, "learning_rate": 4.281753369303446e-05, "loss": 2.2475, "step": 4964500 }, { "epoch": 14.37, "learning_rate": 4.2816810045387184e-05, "loss": 2.2705, "step": 4965000 }, { "epoch": 14.37, "learning_rate": 4.28160878450352e-05, "loss": 2.2639, "step": 4965500 }, { "epoch": 14.37, "learning_rate": 4.281536419738793e-05, "loss": 2.2727, "step": 4966000 }, { "epoch": 14.38, "learning_rate": 4.281464054974065e-05, "loss": 2.2459, "step": 4966500 }, { "epoch": 14.38, "learning_rate": 4.281391690209337e-05, "loss": 2.2606, "step": 4967000 }, { "epoch": 14.38, "learning_rate": 4.2813193254446095e-05, "loss": 2.2473, "step": 4967500 }, { "epoch": 14.38, "learning_rate": 4.281246960679882e-05, "loss": 2.2509, "step": 4968000 }, { "epoch": 14.38, "learning_rate": 4.281174595915154e-05, "loss": 2.2198, "step": 4968500 }, { "epoch": 14.38, "learning_rate": 4.281102231150426e-05, "loss": 2.2572, "step": 4969000 }, { "epoch": 14.38, "learning_rate": 4.2810298663856984e-05, "loss": 2.2576, "step": 4969500 }, { "epoch": 14.39, "learning_rate": 4.2809575016209706e-05, "loss": 2.2672, "step": 4970000 }, { "epoch": 14.39, "learning_rate": 4.280885136856243e-05, "loss": 2.2527, "step": 4970500 }, { "epoch": 14.39, "learning_rate": 4.280812772091515e-05, "loss": 2.2496, "step": 4971000 }, { "epoch": 14.39, "learning_rate": 4.280740407326788e-05, "loss": 2.2624, "step": 4971500 }, { "epoch": 14.39, "learning_rate": 4.28066804256206e-05, "loss": 2.287, "step": 4972000 }, { "epoch": 14.39, "learning_rate": 4.2805956777973324e-05, "loss": 2.2787, "step": 4972500 }, { "epoch": 14.39, "learning_rate": 4.280523313032605e-05, "loss": 2.2601, "step": 4973000 }, { "epoch": 14.4, "learning_rate": 4.2804509482678775e-05, "loss": 2.2694, "step": 4973500 }, { "epoch": 14.4, "learning_rate": 4.280378728232679e-05, "loss": 2.2786, "step": 4974000 }, { "epoch": 14.4, "learning_rate": 4.280306363467951e-05, "loss": 2.2378, "step": 4974500 }, { "epoch": 14.4, "learning_rate": 4.280234143432753e-05, "loss": 2.2451, "step": 4975000 }, { "epoch": 14.4, "learning_rate": 4.280161778668025e-05, "loss": 2.2614, "step": 4975500 }, { "epoch": 14.4, "learning_rate": 4.280089558632827e-05, "loss": 2.2539, "step": 4976000 }, { "epoch": 14.4, "learning_rate": 4.2800171938680995e-05, "loss": 2.2728, "step": 4976500 }, { "epoch": 14.41, "learning_rate": 4.279944829103372e-05, "loss": 2.2882, "step": 4977000 }, { "epoch": 14.41, "learning_rate": 4.279872464338644e-05, "loss": 2.2469, "step": 4977500 }, { "epoch": 14.41, "learning_rate": 4.2798002443034455e-05, "loss": 2.2695, "step": 4978000 }, { "epoch": 14.41, "learning_rate": 4.279727879538718e-05, "loss": 2.2302, "step": 4978500 }, { "epoch": 14.41, "learning_rate": 4.2796555147739907e-05, "loss": 2.2731, "step": 4979000 }, { "epoch": 14.41, "learning_rate": 4.279583150009263e-05, "loss": 2.2492, "step": 4979500 }, { "epoch": 14.42, "learning_rate": 4.279510785244535e-05, "loss": 2.2774, "step": 4980000 }, { "epoch": 14.42, "learning_rate": 4.279438420479808e-05, "loss": 2.2572, "step": 4980500 }, { "epoch": 14.42, "learning_rate": 4.27936605571508e-05, "loss": 2.245, "step": 4981000 }, { "epoch": 14.42, "learning_rate": 4.2792936909503524e-05, "loss": 2.2688, "step": 4981500 }, { "epoch": 14.42, "learning_rate": 4.279221326185625e-05, "loss": 2.2925, "step": 4982000 }, { "epoch": 14.42, "learning_rate": 4.279148961420897e-05, "loss": 2.2866, "step": 4982500 }, { "epoch": 14.42, "learning_rate": 4.2790767413856984e-05, "loss": 2.2438, "step": 4983000 }, { "epoch": 14.43, "learning_rate": 4.279004376620971e-05, "loss": 2.2746, "step": 4983500 }, { "epoch": 14.43, "learning_rate": 4.278932156585773e-05, "loss": 2.2704, "step": 4984000 }, { "epoch": 14.43, "learning_rate": 4.278859791821045e-05, "loss": 2.2895, "step": 4984500 }, { "epoch": 14.43, "learning_rate": 4.2787874270563174e-05, "loss": 2.2683, "step": 4985000 }, { "epoch": 14.43, "learning_rate": 4.278715207021119e-05, "loss": 2.2632, "step": 4985500 }, { "epoch": 14.43, "learning_rate": 4.278642842256391e-05, "loss": 2.2472, "step": 4986000 }, { "epoch": 14.43, "learning_rate": 4.2785704774916634e-05, "loss": 2.2406, "step": 4986500 }, { "epoch": 14.44, "learning_rate": 4.278498112726936e-05, "loss": 2.2775, "step": 4987000 }, { "epoch": 14.44, "learning_rate": 4.2784257479622085e-05, "loss": 2.277, "step": 4987500 }, { "epoch": 14.44, "learning_rate": 4.278353383197481e-05, "loss": 2.2476, "step": 4988000 }, { "epoch": 14.44, "learning_rate": 4.278281018432753e-05, "loss": 2.2656, "step": 4988500 }, { "epoch": 14.44, "learning_rate": 4.278208798397555e-05, "loss": 2.26, "step": 4989000 }, { "epoch": 14.44, "learning_rate": 4.278136578362357e-05, "loss": 2.2771, "step": 4989500 }, { "epoch": 14.44, "learning_rate": 4.278064213597629e-05, "loss": 2.2691, "step": 4990000 }, { "epoch": 14.45, "learning_rate": 4.277991848832901e-05, "loss": 2.2631, "step": 4990500 }, { "epoch": 14.45, "learning_rate": 4.2779194840681734e-05, "loss": 2.2664, "step": 4991000 }, { "epoch": 14.45, "learning_rate": 4.2778471193034456e-05, "loss": 2.2421, "step": 4991500 }, { "epoch": 14.45, "learning_rate": 4.277774754538718e-05, "loss": 2.2816, "step": 4992000 }, { "epoch": 14.45, "learning_rate": 4.277702389773991e-05, "loss": 2.2738, "step": 4992500 }, { "epoch": 14.45, "learning_rate": 4.277630169738792e-05, "loss": 2.2364, "step": 4993000 }, { "epoch": 14.45, "learning_rate": 4.2775578049740645e-05, "loss": 2.2808, "step": 4993500 }, { "epoch": 14.46, "learning_rate": 4.277485440209337e-05, "loss": 2.2776, "step": 4994000 }, { "epoch": 14.46, "learning_rate": 4.2774130754446096e-05, "loss": 2.2705, "step": 4994500 }, { "epoch": 14.46, "learning_rate": 4.277340710679882e-05, "loss": 2.2608, "step": 4995000 }, { "epoch": 14.46, "learning_rate": 4.277268345915154e-05, "loss": 2.2819, "step": 4995500 }, { "epoch": 14.46, "learning_rate": 4.277195981150426e-05, "loss": 2.2703, "step": 4996000 }, { "epoch": 14.46, "learning_rate": 4.2771236163856985e-05, "loss": 2.2625, "step": 4996500 }, { "epoch": 14.46, "learning_rate": 4.277051251620971e-05, "loss": 2.2532, "step": 4997000 }, { "epoch": 14.47, "learning_rate": 4.276978886856243e-05, "loss": 2.2425, "step": 4997500 }, { "epoch": 14.47, "learning_rate": 4.276906522091516e-05, "loss": 2.3025, "step": 4998000 }, { "epoch": 14.47, "learning_rate": 4.276834157326788e-05, "loss": 2.2713, "step": 4998500 }, { "epoch": 14.47, "learning_rate": 4.27676179256206e-05, "loss": 2.2791, "step": 4999000 }, { "epoch": 14.47, "learning_rate": 4.276689572526862e-05, "loss": 2.2601, "step": 4999500 }, { "epoch": 14.47, "learning_rate": 4.2766173524916634e-05, "loss": 2.2563, "step": 5000000 }, { "epoch": 14.47, "learning_rate": 4.2765449877269357e-05, "loss": 2.2839, "step": 5000500 }, { "epoch": 14.48, "learning_rate": 4.276472622962208e-05, "loss": 2.2437, "step": 5001000 }, { "epoch": 14.48, "learning_rate": 4.276400258197481e-05, "loss": 2.2377, "step": 5001500 }, { "epoch": 14.48, "learning_rate": 4.276327893432753e-05, "loss": 2.2757, "step": 5002000 }, { "epoch": 14.48, "learning_rate": 4.276255528668026e-05, "loss": 2.2633, "step": 5002500 }, { "epoch": 14.48, "learning_rate": 4.276183163903298e-05, "loss": 2.2706, "step": 5003000 }, { "epoch": 14.48, "learning_rate": 4.2761107991385704e-05, "loss": 2.2684, "step": 5003500 }, { "epoch": 14.48, "learning_rate": 4.2760384343738426e-05, "loss": 2.2525, "step": 5004000 }, { "epoch": 14.49, "learning_rate": 4.275966069609115e-05, "loss": 2.2616, "step": 5004500 }, { "epoch": 14.49, "learning_rate": 4.275893704844387e-05, "loss": 2.2695, "step": 5005000 }, { "epoch": 14.49, "learning_rate": 4.2758214848091886e-05, "loss": 2.2734, "step": 5005500 }, { "epoch": 14.49, "learning_rate": 4.275749120044461e-05, "loss": 2.2492, "step": 5006000 }, { "epoch": 14.49, "learning_rate": 4.275676755279733e-05, "loss": 2.2521, "step": 5006500 }, { "epoch": 14.49, "learning_rate": 4.275604390515006e-05, "loss": 2.2645, "step": 5007000 }, { "epoch": 14.49, "learning_rate": 4.275532315209337e-05, "loss": 2.2505, "step": 5007500 }, { "epoch": 14.5, "learning_rate": 4.2754600951741384e-05, "loss": 2.2546, "step": 5008000 }, { "epoch": 14.5, "learning_rate": 4.27538801986847e-05, "loss": 2.2508, "step": 5008500 }, { "epoch": 14.5, "learning_rate": 4.275315655103742e-05, "loss": 2.2476, "step": 5009000 }, { "epoch": 14.5, "learning_rate": 4.2752432903390144e-05, "loss": 2.2683, "step": 5009500 }, { "epoch": 14.5, "learning_rate": 4.275170925574287e-05, "loss": 2.2751, "step": 5010000 }, { "epoch": 14.5, "learning_rate": 4.2750985608095595e-05, "loss": 2.2739, "step": 5010500 }, { "epoch": 14.5, "learning_rate": 4.275026196044832e-05, "loss": 2.274, "step": 5011000 }, { "epoch": 14.51, "learning_rate": 4.274953831280104e-05, "loss": 2.2808, "step": 5011500 }, { "epoch": 14.51, "learning_rate": 4.274881466515376e-05, "loss": 2.2541, "step": 5012000 }, { "epoch": 14.51, "learning_rate": 4.2748091017506484e-05, "loss": 2.255, "step": 5012500 }, { "epoch": 14.51, "learning_rate": 4.2747367369859206e-05, "loss": 2.2907, "step": 5013000 }, { "epoch": 14.51, "learning_rate": 4.2746643722211935e-05, "loss": 2.269, "step": 5013500 }, { "epoch": 14.51, "learning_rate": 4.274592007456466e-05, "loss": 2.2697, "step": 5014000 }, { "epoch": 14.51, "learning_rate": 4.274519642691738e-05, "loss": 2.2528, "step": 5014500 }, { "epoch": 14.52, "learning_rate": 4.27444727792701e-05, "loss": 2.2682, "step": 5015000 }, { "epoch": 14.52, "learning_rate": 4.2743749131622824e-05, "loss": 2.2597, "step": 5015500 }, { "epoch": 14.52, "learning_rate": 4.2743025483975546e-05, "loss": 2.2694, "step": 5016000 }, { "epoch": 14.52, "learning_rate": 4.274230328362356e-05, "loss": 2.2538, "step": 5016500 }, { "epoch": 14.52, "learning_rate": 4.274157963597629e-05, "loss": 2.2547, "step": 5017000 }, { "epoch": 14.52, "learning_rate": 4.274085598832901e-05, "loss": 2.2635, "step": 5017500 }, { "epoch": 14.53, "learning_rate": 4.2740132340681735e-05, "loss": 2.2839, "step": 5018000 }, { "epoch": 14.53, "learning_rate": 4.273940869303446e-05, "loss": 2.276, "step": 5018500 }, { "epoch": 14.53, "learning_rate": 4.273868649268248e-05, "loss": 2.2488, "step": 5019000 }, { "epoch": 14.53, "learning_rate": 4.27379628450352e-05, "loss": 2.2731, "step": 5019500 }, { "epoch": 14.53, "learning_rate": 4.2737239197387924e-05, "loss": 2.2651, "step": 5020000 }, { "epoch": 14.53, "learning_rate": 4.273651554974065e-05, "loss": 2.2626, "step": 5020500 }, { "epoch": 14.53, "learning_rate": 4.273579190209337e-05, "loss": 2.2678, "step": 5021000 }, { "epoch": 14.54, "learning_rate": 4.273506825444609e-05, "loss": 2.279, "step": 5021500 }, { "epoch": 14.54, "learning_rate": 4.2734344606798813e-05, "loss": 2.2589, "step": 5022000 }, { "epoch": 14.54, "learning_rate": 4.2733620959151536e-05, "loss": 2.2784, "step": 5022500 }, { "epoch": 14.54, "learning_rate": 4.273289731150426e-05, "loss": 2.2533, "step": 5023000 }, { "epoch": 14.54, "learning_rate": 4.273217366385699e-05, "loss": 2.2489, "step": 5023500 }, { "epoch": 14.54, "learning_rate": 4.273145001620971e-05, "loss": 2.2578, "step": 5024000 }, { "epoch": 14.54, "learning_rate": 4.273072781585773e-05, "loss": 2.3026, "step": 5024500 }, { "epoch": 14.55, "learning_rate": 4.2730004168210454e-05, "loss": 2.2564, "step": 5025000 }, { "epoch": 14.55, "learning_rate": 4.2729280520563176e-05, "loss": 2.2527, "step": 5025500 }, { "epoch": 14.55, "learning_rate": 4.27285568729159e-05, "loss": 2.2573, "step": 5026000 }, { "epoch": 14.55, "learning_rate": 4.272783322526862e-05, "loss": 2.271, "step": 5026500 }, { "epoch": 14.55, "learning_rate": 4.272710957762134e-05, "loss": 2.2425, "step": 5027000 }, { "epoch": 14.55, "learning_rate": 4.272638737726936e-05, "loss": 2.259, "step": 5027500 }, { "epoch": 14.55, "learning_rate": 4.272566372962209e-05, "loss": 2.2686, "step": 5028000 }, { "epoch": 14.56, "learning_rate": 4.272494008197481e-05, "loss": 2.2592, "step": 5028500 }, { "epoch": 14.56, "learning_rate": 4.272421643432753e-05, "loss": 2.2575, "step": 5029000 }, { "epoch": 14.56, "learning_rate": 4.272349423397555e-05, "loss": 2.2856, "step": 5029500 }, { "epoch": 14.56, "learning_rate": 4.272277203362356e-05, "loss": 2.2561, "step": 5030000 }, { "epoch": 14.56, "learning_rate": 4.2722048385976285e-05, "loss": 2.2627, "step": 5030500 }, { "epoch": 14.56, "learning_rate": 4.272132473832901e-05, "loss": 2.2811, "step": 5031000 }, { "epoch": 14.56, "learning_rate": 4.2720601090681736e-05, "loss": 2.2557, "step": 5031500 }, { "epoch": 14.57, "learning_rate": 4.271987744303446e-05, "loss": 2.2722, "step": 5032000 }, { "epoch": 14.57, "learning_rate": 4.271915379538719e-05, "loss": 2.2477, "step": 5032500 }, { "epoch": 14.57, "learning_rate": 4.27184315950352e-05, "loss": 2.2637, "step": 5033000 }, { "epoch": 14.57, "learning_rate": 4.2717707947387925e-05, "loss": 2.2376, "step": 5033500 }, { "epoch": 14.57, "learning_rate": 4.271698429974065e-05, "loss": 2.2571, "step": 5034000 }, { "epoch": 14.57, "learning_rate": 4.271626065209337e-05, "loss": 2.2419, "step": 5034500 }, { "epoch": 14.57, "learning_rate": 4.271553700444609e-05, "loss": 2.2338, "step": 5035000 }, { "epoch": 14.58, "learning_rate": 4.2714813356798814e-05, "loss": 2.2478, "step": 5035500 }, { "epoch": 14.58, "learning_rate": 4.2714089709151536e-05, "loss": 2.2742, "step": 5036000 }, { "epoch": 14.58, "learning_rate": 4.271336606150426e-05, "loss": 2.2755, "step": 5036500 }, { "epoch": 14.58, "learning_rate": 4.271264241385699e-05, "loss": 2.2541, "step": 5037000 }, { "epoch": 14.58, "learning_rate": 4.271191876620971e-05, "loss": 2.2717, "step": 5037500 }, { "epoch": 14.58, "learning_rate": 4.2711196565857725e-05, "loss": 2.2558, "step": 5038000 }, { "epoch": 14.58, "learning_rate": 4.271047291821045e-05, "loss": 2.2427, "step": 5038500 }, { "epoch": 14.59, "learning_rate": 4.270974927056318e-05, "loss": 2.273, "step": 5039000 }, { "epoch": 14.59, "learning_rate": 4.27090256229159e-05, "loss": 2.2897, "step": 5039500 }, { "epoch": 14.59, "learning_rate": 4.270830197526862e-05, "loss": 2.2426, "step": 5040000 }, { "epoch": 14.59, "learning_rate": 4.2707578327621343e-05, "loss": 2.2643, "step": 5040500 }, { "epoch": 14.59, "learning_rate": 4.2706854679974066e-05, "loss": 2.2773, "step": 5041000 }, { "epoch": 14.59, "learning_rate": 4.270613103232679e-05, "loss": 2.2392, "step": 5041500 }, { "epoch": 14.59, "learning_rate": 4.270540883197481e-05, "loss": 2.2834, "step": 5042000 }, { "epoch": 14.6, "learning_rate": 4.270468518432753e-05, "loss": 2.2807, "step": 5042500 }, { "epoch": 14.6, "learning_rate": 4.2703961536680255e-05, "loss": 2.2641, "step": 5043000 }, { "epoch": 14.6, "learning_rate": 4.270323788903298e-05, "loss": 2.2379, "step": 5043500 }, { "epoch": 14.6, "learning_rate": 4.27025142413857e-05, "loss": 2.2685, "step": 5044000 }, { "epoch": 14.6, "learning_rate": 4.2701792041033715e-05, "loss": 2.2851, "step": 5044500 }, { "epoch": 14.6, "learning_rate": 4.270106839338644e-05, "loss": 2.2523, "step": 5045000 }, { "epoch": 14.6, "learning_rate": 4.270034474573916e-05, "loss": 2.234, "step": 5045500 }, { "epoch": 14.61, "learning_rate": 4.269962109809189e-05, "loss": 2.2641, "step": 5046000 }, { "epoch": 14.61, "learning_rate": 4.269889745044461e-05, "loss": 2.2471, "step": 5046500 }, { "epoch": 14.61, "learning_rate": 4.269817380279734e-05, "loss": 2.2852, "step": 5047000 }, { "epoch": 14.61, "learning_rate": 4.2697451602445355e-05, "loss": 2.2789, "step": 5047500 }, { "epoch": 14.61, "learning_rate": 4.269672795479808e-05, "loss": 2.2612, "step": 5048000 }, { "epoch": 14.61, "learning_rate": 4.26960043071508e-05, "loss": 2.2683, "step": 5048500 }, { "epoch": 14.61, "learning_rate": 4.269528065950352e-05, "loss": 2.2546, "step": 5049000 }, { "epoch": 14.62, "learning_rate": 4.2694557011856244e-05, "loss": 2.2828, "step": 5049500 }, { "epoch": 14.62, "learning_rate": 4.2693834811504266e-05, "loss": 2.2591, "step": 5050000 }, { "epoch": 14.62, "learning_rate": 4.269311116385699e-05, "loss": 2.2912, "step": 5050500 }, { "epoch": 14.62, "learning_rate": 4.2692388963505004e-05, "loss": 2.2748, "step": 5051000 }, { "epoch": 14.62, "learning_rate": 4.2691665315857726e-05, "loss": 2.2566, "step": 5051500 }, { "epoch": 14.62, "learning_rate": 4.269094166821045e-05, "loss": 2.2521, "step": 5052000 }, { "epoch": 14.62, "learning_rate": 4.2690219467858464e-05, "loss": 2.2661, "step": 5052500 }, { "epoch": 14.63, "learning_rate": 4.2689495820211186e-05, "loss": 2.2664, "step": 5053000 }, { "epoch": 14.63, "learning_rate": 4.2688772172563915e-05, "loss": 2.2805, "step": 5053500 }, { "epoch": 14.63, "learning_rate": 4.268804852491664e-05, "loss": 2.2769, "step": 5054000 }, { "epoch": 14.63, "learning_rate": 4.2687324877269367e-05, "loss": 2.2681, "step": 5054500 }, { "epoch": 14.63, "learning_rate": 4.268660122962209e-05, "loss": 2.2437, "step": 5055000 }, { "epoch": 14.63, "learning_rate": 4.268587758197481e-05, "loss": 2.2504, "step": 5055500 }, { "epoch": 14.64, "learning_rate": 4.268515393432753e-05, "loss": 2.2603, "step": 5056000 }, { "epoch": 14.64, "learning_rate": 4.2684430286680255e-05, "loss": 2.259, "step": 5056500 }, { "epoch": 14.64, "learning_rate": 4.268370663903298e-05, "loss": 2.2464, "step": 5057000 }, { "epoch": 14.64, "learning_rate": 4.26829829913857e-05, "loss": 2.2718, "step": 5057500 }, { "epoch": 14.64, "learning_rate": 4.268225934373842e-05, "loss": 2.267, "step": 5058000 }, { "epoch": 14.64, "learning_rate": 4.2681535696091144e-05, "loss": 2.2833, "step": 5058500 }, { "epoch": 14.64, "learning_rate": 4.268081204844387e-05, "loss": 2.2661, "step": 5059000 }, { "epoch": 14.65, "learning_rate": 4.268008840079659e-05, "loss": 2.2771, "step": 5059500 }, { "epoch": 14.65, "learning_rate": 4.267936475314932e-05, "loss": 2.247, "step": 5060000 }, { "epoch": 14.65, "learning_rate": 4.267864400009263e-05, "loss": 2.2544, "step": 5060500 }, { "epoch": 14.65, "learning_rate": 4.267792035244535e-05, "loss": 2.2406, "step": 5061000 }, { "epoch": 14.65, "learning_rate": 4.267719670479808e-05, "loss": 2.2702, "step": 5061500 }, { "epoch": 14.65, "learning_rate": 4.26764730571508e-05, "loss": 2.2795, "step": 5062000 }, { "epoch": 14.65, "learning_rate": 4.267574940950352e-05, "loss": 2.2614, "step": 5062500 }, { "epoch": 14.66, "learning_rate": 4.2675025761856245e-05, "loss": 2.2662, "step": 5063000 }, { "epoch": 14.66, "learning_rate": 4.267430211420897e-05, "loss": 2.2829, "step": 5063500 }, { "epoch": 14.66, "learning_rate": 4.267357991385699e-05, "loss": 2.2756, "step": 5064000 }, { "epoch": 14.66, "learning_rate": 4.267285626620971e-05, "loss": 2.255, "step": 5064500 }, { "epoch": 14.66, "learning_rate": 4.2672132618562434e-05, "loss": 2.2644, "step": 5065000 }, { "epoch": 14.66, "learning_rate": 4.2671408970915156e-05, "loss": 2.2677, "step": 5065500 }, { "epoch": 14.66, "learning_rate": 4.267068532326788e-05, "loss": 2.2556, "step": 5066000 }, { "epoch": 14.67, "learning_rate": 4.26699616756206e-05, "loss": 2.2772, "step": 5066500 }, { "epoch": 14.67, "learning_rate": 4.2669239475268616e-05, "loss": 2.2784, "step": 5067000 }, { "epoch": 14.67, "learning_rate": 4.266851582762134e-05, "loss": 2.2518, "step": 5067500 }, { "epoch": 14.67, "learning_rate": 4.266779217997407e-05, "loss": 2.2794, "step": 5068000 }, { "epoch": 14.67, "learning_rate": 4.266706853232679e-05, "loss": 2.2576, "step": 5068500 }, { "epoch": 14.67, "learning_rate": 4.266634488467952e-05, "loss": 2.2702, "step": 5069000 }, { "epoch": 14.67, "learning_rate": 4.266562123703224e-05, "loss": 2.256, "step": 5069500 }, { "epoch": 14.68, "learning_rate": 4.266489758938496e-05, "loss": 2.2874, "step": 5070000 }, { "epoch": 14.68, "learning_rate": 4.2664173941737685e-05, "loss": 2.2396, "step": 5070500 }, { "epoch": 14.68, "learning_rate": 4.266345029409041e-05, "loss": 2.2536, "step": 5071000 }, { "epoch": 14.68, "learning_rate": 4.266272809373842e-05, "loss": 2.2763, "step": 5071500 }, { "epoch": 14.68, "learning_rate": 4.2662005893386445e-05, "loss": 2.2916, "step": 5072000 }, { "epoch": 14.68, "learning_rate": 4.266128224573917e-05, "loss": 2.2529, "step": 5072500 }, { "epoch": 14.68, "learning_rate": 4.266055859809189e-05, "loss": 2.2643, "step": 5073000 }, { "epoch": 14.69, "learning_rate": 4.265983495044461e-05, "loss": 2.2567, "step": 5073500 }, { "epoch": 14.69, "learning_rate": 4.2659111302797334e-05, "loss": 2.2664, "step": 5074000 }, { "epoch": 14.69, "learning_rate": 4.265839054974064e-05, "loss": 2.2675, "step": 5074500 }, { "epoch": 14.69, "learning_rate": 4.2657666902093365e-05, "loss": 2.2797, "step": 5075000 }, { "epoch": 14.69, "learning_rate": 4.2656943254446094e-05, "loss": 2.2461, "step": 5075500 }, { "epoch": 14.69, "learning_rate": 4.2656219606798817e-05, "loss": 2.2577, "step": 5076000 }, { "epoch": 14.69, "learning_rate": 4.265549740644683e-05, "loss": 2.2702, "step": 5076500 }, { "epoch": 14.7, "learning_rate": 4.265477375879956e-05, "loss": 2.2562, "step": 5077000 }, { "epoch": 14.7, "learning_rate": 4.2654050111152283e-05, "loss": 2.2568, "step": 5077500 }, { "epoch": 14.7, "learning_rate": 4.2653326463505006e-05, "loss": 2.2799, "step": 5078000 }, { "epoch": 14.7, "learning_rate": 4.265260281585773e-05, "loss": 2.265, "step": 5078500 }, { "epoch": 14.7, "learning_rate": 4.265187916821045e-05, "loss": 2.2837, "step": 5079000 }, { "epoch": 14.7, "learning_rate": 4.265115552056317e-05, "loss": 2.2636, "step": 5079500 }, { "epoch": 14.7, "learning_rate": 4.2650431872915895e-05, "loss": 2.2382, "step": 5080000 }, { "epoch": 14.71, "learning_rate": 4.264970822526862e-05, "loss": 2.2441, "step": 5080500 }, { "epoch": 14.71, "learning_rate": 4.2648984577621346e-05, "loss": 2.2519, "step": 5081000 }, { "epoch": 14.71, "learning_rate": 4.264826237726936e-05, "loss": 2.2543, "step": 5081500 }, { "epoch": 14.71, "learning_rate": 4.2647538729622084e-05, "loss": 2.2707, "step": 5082000 }, { "epoch": 14.71, "learning_rate": 4.2646815081974806e-05, "loss": 2.2567, "step": 5082500 }, { "epoch": 14.71, "learning_rate": 4.264609143432753e-05, "loss": 2.2735, "step": 5083000 }, { "epoch": 14.71, "learning_rate": 4.2645369233975544e-05, "loss": 2.2537, "step": 5083500 }, { "epoch": 14.72, "learning_rate": 4.2644645586328266e-05, "loss": 2.2508, "step": 5084000 }, { "epoch": 14.72, "learning_rate": 4.2643921938680995e-05, "loss": 2.2643, "step": 5084500 }, { "epoch": 14.72, "learning_rate": 4.264319829103372e-05, "loss": 2.2482, "step": 5085000 }, { "epoch": 14.72, "learning_rate": 4.2642474643386446e-05, "loss": 2.2786, "step": 5085500 }, { "epoch": 14.72, "learning_rate": 4.264175099573917e-05, "loss": 2.2523, "step": 5086000 }, { "epoch": 14.72, "learning_rate": 4.264102734809189e-05, "loss": 2.2452, "step": 5086500 }, { "epoch": 14.72, "learning_rate": 4.264030370044461e-05, "loss": 2.2664, "step": 5087000 }, { "epoch": 14.73, "learning_rate": 4.263958150009263e-05, "loss": 2.2546, "step": 5087500 }, { "epoch": 14.73, "learning_rate": 4.263885785244535e-05, "loss": 2.279, "step": 5088000 }, { "epoch": 14.73, "learning_rate": 4.263813420479807e-05, "loss": 2.2916, "step": 5088500 }, { "epoch": 14.73, "learning_rate": 4.2637410557150795e-05, "loss": 2.2632, "step": 5089000 }, { "epoch": 14.73, "learning_rate": 4.263668690950352e-05, "loss": 2.2829, "step": 5089500 }, { "epoch": 14.73, "learning_rate": 4.2635963261856246e-05, "loss": 2.2527, "step": 5090000 }, { "epoch": 14.73, "learning_rate": 4.263523961420897e-05, "loss": 2.2334, "step": 5090500 }, { "epoch": 14.74, "learning_rate": 4.263451596656169e-05, "loss": 2.2837, "step": 5091000 }, { "epoch": 14.74, "learning_rate": 4.263379376620971e-05, "loss": 2.2579, "step": 5091500 }, { "epoch": 14.74, "learning_rate": 4.2633070118562435e-05, "loss": 2.2582, "step": 5092000 }, { "epoch": 14.74, "learning_rate": 4.263234647091516e-05, "loss": 2.226, "step": 5092500 }, { "epoch": 14.74, "learning_rate": 4.263162282326788e-05, "loss": 2.2571, "step": 5093000 }, { "epoch": 14.74, "learning_rate": 4.26308991756206e-05, "loss": 2.2457, "step": 5093500 }, { "epoch": 14.75, "learning_rate": 4.2630175527973324e-05, "loss": 2.2832, "step": 5094000 }, { "epoch": 14.75, "learning_rate": 4.2629451880326046e-05, "loss": 2.2343, "step": 5094500 }, { "epoch": 14.75, "learning_rate": 4.262872823267877e-05, "loss": 2.2638, "step": 5095000 }, { "epoch": 14.75, "learning_rate": 4.26280045850315e-05, "loss": 2.2516, "step": 5095500 }, { "epoch": 14.75, "learning_rate": 4.262728238467951e-05, "loss": 2.2649, "step": 5096000 }, { "epoch": 14.75, "learning_rate": 4.2626558737032236e-05, "loss": 2.2628, "step": 5096500 }, { "epoch": 14.75, "learning_rate": 4.262583508938496e-05, "loss": 2.2607, "step": 5097000 }, { "epoch": 14.76, "learning_rate": 4.262511144173768e-05, "loss": 2.2666, "step": 5097500 }, { "epoch": 14.76, "learning_rate": 4.2624389241385696e-05, "loss": 2.2861, "step": 5098000 }, { "epoch": 14.76, "learning_rate": 4.262366559373842e-05, "loss": 2.2659, "step": 5098500 }, { "epoch": 14.76, "learning_rate": 4.262294194609115e-05, "loss": 2.2389, "step": 5099000 }, { "epoch": 14.76, "learning_rate": 4.262221829844387e-05, "loss": 2.236, "step": 5099500 }, { "epoch": 14.76, "learning_rate": 4.26214946507966e-05, "loss": 2.2735, "step": 5100000 }, { "epoch": 14.76, "learning_rate": 4.262077100314932e-05, "loss": 2.2446, "step": 5100500 }, { "epoch": 14.77, "learning_rate": 4.2620048802797336e-05, "loss": 2.25, "step": 5101000 }, { "epoch": 14.77, "learning_rate": 4.261932515515006e-05, "loss": 2.2492, "step": 5101500 }, { "epoch": 14.77, "learning_rate": 4.261860150750278e-05, "loss": 2.2639, "step": 5102000 }, { "epoch": 14.77, "learning_rate": 4.26178778598555e-05, "loss": 2.2573, "step": 5102500 }, { "epoch": 14.77, "learning_rate": 4.2617154212208225e-05, "loss": 2.2543, "step": 5103000 }, { "epoch": 14.77, "learning_rate": 4.261643056456095e-05, "loss": 2.2686, "step": 5103500 }, { "epoch": 14.77, "learning_rate": 4.261570691691367e-05, "loss": 2.2652, "step": 5104000 }, { "epoch": 14.78, "learning_rate": 4.26149832692664e-05, "loss": 2.2638, "step": 5104500 }, { "epoch": 14.78, "learning_rate": 4.2614261068914414e-05, "loss": 2.2506, "step": 5105000 }, { "epoch": 14.78, "learning_rate": 4.2613537421267136e-05, "loss": 2.268, "step": 5105500 }, { "epoch": 14.78, "learning_rate": 4.261281377361986e-05, "loss": 2.2545, "step": 5106000 }, { "epoch": 14.78, "learning_rate": 4.261209012597259e-05, "loss": 2.2834, "step": 5106500 }, { "epoch": 14.78, "learning_rate": 4.261136647832531e-05, "loss": 2.2715, "step": 5107000 }, { "epoch": 14.78, "learning_rate": 4.261064283067803e-05, "loss": 2.2489, "step": 5107500 }, { "epoch": 14.79, "learning_rate": 4.260992063032605e-05, "loss": 2.2593, "step": 5108000 }, { "epoch": 14.79, "learning_rate": 4.260919698267877e-05, "loss": 2.2679, "step": 5108500 }, { "epoch": 14.79, "learning_rate": 4.26084733350315e-05, "loss": 2.2666, "step": 5109000 }, { "epoch": 14.79, "learning_rate": 4.260774968738422e-05, "loss": 2.2747, "step": 5109500 }, { "epoch": 14.79, "learning_rate": 4.260702603973694e-05, "loss": 2.2757, "step": 5110000 }, { "epoch": 14.79, "learning_rate": 4.2606302392089665e-05, "loss": 2.2584, "step": 5110500 }, { "epoch": 14.79, "learning_rate": 4.260557874444239e-05, "loss": 2.2557, "step": 5111000 }, { "epoch": 14.8, "learning_rate": 4.260485509679511e-05, "loss": 2.2852, "step": 5111500 }, { "epoch": 14.8, "learning_rate": 4.2604132896443125e-05, "loss": 2.2511, "step": 5112000 }, { "epoch": 14.8, "learning_rate": 4.260340924879585e-05, "loss": 2.2449, "step": 5112500 }, { "epoch": 14.8, "learning_rate": 4.260268560114857e-05, "loss": 2.2715, "step": 5113000 }, { "epoch": 14.8, "learning_rate": 4.26019619535013e-05, "loss": 2.2405, "step": 5113500 }, { "epoch": 14.8, "learning_rate": 4.260123975314932e-05, "loss": 2.2657, "step": 5114000 }, { "epoch": 14.8, "learning_rate": 4.260051610550204e-05, "loss": 2.2585, "step": 5114500 }, { "epoch": 14.81, "learning_rate": 4.2599792457854766e-05, "loss": 2.2686, "step": 5115000 }, { "epoch": 14.81, "learning_rate": 4.259906881020749e-05, "loss": 2.2813, "step": 5115500 }, { "epoch": 14.81, "learning_rate": 4.259834516256021e-05, "loss": 2.2823, "step": 5116000 }, { "epoch": 14.81, "learning_rate": 4.2597622962208226e-05, "loss": 2.2806, "step": 5116500 }, { "epoch": 14.81, "learning_rate": 4.259689931456095e-05, "loss": 2.2544, "step": 5117000 }, { "epoch": 14.81, "learning_rate": 4.259617711420897e-05, "loss": 2.2838, "step": 5117500 }, { "epoch": 14.81, "learning_rate": 4.259545346656169e-05, "loss": 2.2719, "step": 5118000 }, { "epoch": 14.82, "learning_rate": 4.2594729818914415e-05, "loss": 2.2779, "step": 5118500 }, { "epoch": 14.82, "learning_rate": 4.259400617126714e-05, "loss": 2.2589, "step": 5119000 }, { "epoch": 14.82, "learning_rate": 4.259328252361986e-05, "loss": 2.2533, "step": 5119500 }, { "epoch": 14.82, "learning_rate": 4.259255887597258e-05, "loss": 2.2878, "step": 5120000 }, { "epoch": 14.82, "learning_rate": 4.2591835228325304e-05, "loss": 2.2841, "step": 5120500 }, { "epoch": 14.82, "learning_rate": 4.259111158067803e-05, "loss": 2.2521, "step": 5121000 }, { "epoch": 14.82, "learning_rate": 4.259038938032605e-05, "loss": 2.2668, "step": 5121500 }, { "epoch": 14.83, "learning_rate": 4.258966717997407e-05, "loss": 2.249, "step": 5122000 }, { "epoch": 14.83, "learning_rate": 4.258894353232679e-05, "loss": 2.2576, "step": 5122500 }, { "epoch": 14.83, "learning_rate": 4.2588219884679515e-05, "loss": 2.2828, "step": 5123000 }, { "epoch": 14.83, "learning_rate": 4.258749623703224e-05, "loss": 2.2737, "step": 5123500 }, { "epoch": 14.83, "learning_rate": 4.258677258938496e-05, "loss": 2.2839, "step": 5124000 }, { "epoch": 14.83, "learning_rate": 4.258604894173768e-05, "loss": 2.2663, "step": 5124500 }, { "epoch": 14.83, "learning_rate": 4.2585325294090404e-05, "loss": 2.255, "step": 5125000 }, { "epoch": 14.84, "learning_rate": 4.2584601646443126e-05, "loss": 2.2762, "step": 5125500 }, { "epoch": 14.84, "learning_rate": 4.258387799879585e-05, "loss": 2.2659, "step": 5126000 }, { "epoch": 14.84, "learning_rate": 4.258315435114858e-05, "loss": 2.2631, "step": 5126500 }, { "epoch": 14.84, "learning_rate": 4.25824307035013e-05, "loss": 2.2782, "step": 5127000 }, { "epoch": 14.84, "learning_rate": 4.258170705585402e-05, "loss": 2.2754, "step": 5127500 }, { "epoch": 14.84, "learning_rate": 4.2580983408206744e-05, "loss": 2.2746, "step": 5128000 }, { "epoch": 14.84, "learning_rate": 4.258025976055947e-05, "loss": 2.2646, "step": 5128500 }, { "epoch": 14.85, "learning_rate": 4.2579536112912195e-05, "loss": 2.2604, "step": 5129000 }, { "epoch": 14.85, "learning_rate": 4.257881391256021e-05, "loss": 2.276, "step": 5129500 }, { "epoch": 14.85, "learning_rate": 4.257809026491293e-05, "loss": 2.2626, "step": 5130000 }, { "epoch": 14.85, "learning_rate": 4.2577366617265655e-05, "loss": 2.2633, "step": 5130500 }, { "epoch": 14.85, "learning_rate": 4.257664441691368e-05, "loss": 2.2634, "step": 5131000 }, { "epoch": 14.85, "learning_rate": 4.25759207692664e-05, "loss": 2.2259, "step": 5131500 }, { "epoch": 14.86, "learning_rate": 4.257519712161912e-05, "loss": 2.2693, "step": 5132000 }, { "epoch": 14.86, "learning_rate": 4.2574473473971844e-05, "loss": 2.2537, "step": 5132500 }, { "epoch": 14.86, "learning_rate": 4.2573749826324567e-05, "loss": 2.2531, "step": 5133000 }, { "epoch": 14.86, "learning_rate": 4.257302617867729e-05, "loss": 2.2665, "step": 5133500 }, { "epoch": 14.86, "learning_rate": 4.2572303978325304e-05, "loss": 2.2561, "step": 5134000 }, { "epoch": 14.86, "learning_rate": 4.2571580330678027e-05, "loss": 2.2536, "step": 5134500 }, { "epoch": 14.86, "learning_rate": 4.257085668303075e-05, "loss": 2.2628, "step": 5135000 }, { "epoch": 14.87, "learning_rate": 4.257013303538348e-05, "loss": 2.2668, "step": 5135500 }, { "epoch": 14.87, "learning_rate": 4.25694108350315e-05, "loss": 2.2697, "step": 5136000 }, { "epoch": 14.87, "learning_rate": 4.256868718738422e-05, "loss": 2.2687, "step": 5136500 }, { "epoch": 14.87, "learning_rate": 4.2567963539736945e-05, "loss": 2.2622, "step": 5137000 }, { "epoch": 14.87, "learning_rate": 4.256723989208967e-05, "loss": 2.2873, "step": 5137500 }, { "epoch": 14.87, "learning_rate": 4.256651624444239e-05, "loss": 2.2694, "step": 5138000 }, { "epoch": 14.87, "learning_rate": 4.256579259679511e-05, "loss": 2.2477, "step": 5138500 }, { "epoch": 14.88, "learning_rate": 4.256507039644313e-05, "loss": 2.2874, "step": 5139000 }, { "epoch": 14.88, "learning_rate": 4.2564346748795856e-05, "loss": 2.2553, "step": 5139500 }, { "epoch": 14.88, "learning_rate": 4.256362310114858e-05, "loss": 2.2728, "step": 5140000 }, { "epoch": 14.88, "learning_rate": 4.2562900900796594e-05, "loss": 2.2717, "step": 5140500 }, { "epoch": 14.88, "learning_rate": 4.2562177253149316e-05, "loss": 2.2831, "step": 5141000 }, { "epoch": 14.88, "learning_rate": 4.256145360550204e-05, "loss": 2.2447, "step": 5141500 }, { "epoch": 14.88, "learning_rate": 4.256072995785476e-05, "loss": 2.2477, "step": 5142000 }, { "epoch": 14.89, "learning_rate": 4.256000631020748e-05, "loss": 2.2594, "step": 5142500 }, { "epoch": 14.89, "learning_rate": 4.2559282662560205e-05, "loss": 2.2683, "step": 5143000 }, { "epoch": 14.89, "learning_rate": 4.2558559014912934e-05, "loss": 2.2583, "step": 5143500 }, { "epoch": 14.89, "learning_rate": 4.2557835367265656e-05, "loss": 2.2596, "step": 5144000 }, { "epoch": 14.89, "learning_rate": 4.255711171961838e-05, "loss": 2.2695, "step": 5144500 }, { "epoch": 14.89, "learning_rate": 4.25563880719711e-05, "loss": 2.2741, "step": 5145000 }, { "epoch": 14.89, "learning_rate": 4.255566442432383e-05, "loss": 2.2538, "step": 5145500 }, { "epoch": 14.9, "learning_rate": 4.2554942223971845e-05, "loss": 2.2868, "step": 5146000 }, { "epoch": 14.9, "learning_rate": 4.255421857632457e-05, "loss": 2.2555, "step": 5146500 }, { "epoch": 14.9, "learning_rate": 4.255349492867729e-05, "loss": 2.2599, "step": 5147000 }, { "epoch": 14.9, "learning_rate": 4.255277128103001e-05, "loss": 2.2828, "step": 5147500 }, { "epoch": 14.9, "learning_rate": 4.2552047633382734e-05, "loss": 2.2432, "step": 5148000 }, { "epoch": 14.9, "learning_rate": 4.2551323985735456e-05, "loss": 2.2525, "step": 5148500 }, { "epoch": 14.9, "learning_rate": 4.255060033808818e-05, "loss": 2.2572, "step": 5149000 }, { "epoch": 14.91, "learning_rate": 4.25498766904409e-05, "loss": 2.2798, "step": 5149500 }, { "epoch": 14.91, "learning_rate": 4.254915449008892e-05, "loss": 2.2526, "step": 5150000 }, { "epoch": 14.91, "learning_rate": 4.254843228973694e-05, "loss": 2.2441, "step": 5150500 }, { "epoch": 14.91, "learning_rate": 4.254770864208967e-05, "loss": 2.2716, "step": 5151000 }, { "epoch": 14.91, "learning_rate": 4.254698499444239e-05, "loss": 2.2748, "step": 5151500 }, { "epoch": 14.91, "learning_rate": 4.254626134679511e-05, "loss": 2.2673, "step": 5152000 }, { "epoch": 14.91, "learning_rate": 4.2545537699147834e-05, "loss": 2.2424, "step": 5152500 }, { "epoch": 14.92, "learning_rate": 4.2544814051500557e-05, "loss": 2.2664, "step": 5153000 }, { "epoch": 14.92, "learning_rate": 4.254409040385328e-05, "loss": 2.2822, "step": 5153500 }, { "epoch": 14.92, "learning_rate": 4.25433682035013e-05, "loss": 2.2427, "step": 5154000 }, { "epoch": 14.92, "learning_rate": 4.254264455585402e-05, "loss": 2.2437, "step": 5154500 }, { "epoch": 14.92, "learning_rate": 4.2541920908206746e-05, "loss": 2.2522, "step": 5155000 }, { "epoch": 14.92, "learning_rate": 4.254119726055947e-05, "loss": 2.2555, "step": 5155500 }, { "epoch": 14.92, "learning_rate": 4.254047361291219e-05, "loss": 2.2889, "step": 5156000 }, { "epoch": 14.93, "learning_rate": 4.253974996526491e-05, "loss": 2.2503, "step": 5156500 }, { "epoch": 14.93, "learning_rate": 4.2539026317617635e-05, "loss": 2.2523, "step": 5157000 }, { "epoch": 14.93, "learning_rate": 4.253830266997036e-05, "loss": 2.2942, "step": 5157500 }, { "epoch": 14.93, "learning_rate": 4.253757902232308e-05, "loss": 2.2685, "step": 5158000 }, { "epoch": 14.93, "learning_rate": 4.253685537467581e-05, "loss": 2.277, "step": 5158500 }, { "epoch": 14.93, "learning_rate": 4.253613172702853e-05, "loss": 2.2796, "step": 5159000 }, { "epoch": 14.93, "learning_rate": 4.253540807938126e-05, "loss": 2.2481, "step": 5159500 }, { "epoch": 14.94, "learning_rate": 4.2534685879029275e-05, "loss": 2.2489, "step": 5160000 }, { "epoch": 14.94, "learning_rate": 4.2533962231382e-05, "loss": 2.2552, "step": 5160500 }, { "epoch": 14.94, "learning_rate": 4.253323858373472e-05, "loss": 2.2595, "step": 5161000 }, { "epoch": 14.94, "learning_rate": 4.2532516383382735e-05, "loss": 2.2405, "step": 5161500 }, { "epoch": 14.94, "learning_rate": 4.253179273573546e-05, "loss": 2.2633, "step": 5162000 }, { "epoch": 14.94, "learning_rate": 4.253107053538348e-05, "loss": 2.271, "step": 5162500 }, { "epoch": 14.94, "learning_rate": 4.25303468877362e-05, "loss": 2.2693, "step": 5163000 }, { "epoch": 14.95, "learning_rate": 4.2529623240088924e-05, "loss": 2.2723, "step": 5163500 }, { "epoch": 14.95, "learning_rate": 4.2528899592441646e-05, "loss": 2.2551, "step": 5164000 }, { "epoch": 14.95, "learning_rate": 4.252817594479437e-05, "loss": 2.2609, "step": 5164500 }, { "epoch": 14.95, "learning_rate": 4.2527453744442384e-05, "loss": 2.2705, "step": 5165000 }, { "epoch": 14.95, "learning_rate": 4.2526730096795106e-05, "loss": 2.2517, "step": 5165500 }, { "epoch": 14.95, "learning_rate": 4.2526006449147835e-05, "loss": 2.264, "step": 5166000 }, { "epoch": 14.95, "learning_rate": 4.252528280150056e-05, "loss": 2.275, "step": 5166500 }, { "epoch": 14.96, "learning_rate": 4.252455915385328e-05, "loss": 2.238, "step": 5167000 }, { "epoch": 14.96, "learning_rate": 4.252383550620601e-05, "loss": 2.2962, "step": 5167500 }, { "epoch": 14.96, "learning_rate": 4.252311185855873e-05, "loss": 2.2613, "step": 5168000 }, { "epoch": 14.96, "learning_rate": 4.252238821091145e-05, "loss": 2.2385, "step": 5168500 }, { "epoch": 14.96, "learning_rate": 4.2521664563264175e-05, "loss": 2.2603, "step": 5169000 }, { "epoch": 14.96, "learning_rate": 4.25209409156169e-05, "loss": 2.2604, "step": 5169500 }, { "epoch": 14.97, "learning_rate": 4.252021726796962e-05, "loss": 2.2713, "step": 5170000 }, { "epoch": 14.97, "learning_rate": 4.251949362032234e-05, "loss": 2.2727, "step": 5170500 }, { "epoch": 14.97, "learning_rate": 4.2518769972675064e-05, "loss": 2.2532, "step": 5171000 }, { "epoch": 14.97, "learning_rate": 4.251804777232308e-05, "loss": 2.2815, "step": 5171500 }, { "epoch": 14.97, "learning_rate": 4.251732412467581e-05, "loss": 2.2604, "step": 5172000 }, { "epoch": 14.97, "learning_rate": 4.251660047702853e-05, "loss": 2.2655, "step": 5172500 }, { "epoch": 14.97, "learning_rate": 4.2515878276676547e-05, "loss": 2.2664, "step": 5173000 }, { "epoch": 14.98, "learning_rate": 4.2515154629029276e-05, "loss": 2.2749, "step": 5173500 }, { "epoch": 14.98, "learning_rate": 4.2514430981382e-05, "loss": 2.2793, "step": 5174000 }, { "epoch": 14.98, "learning_rate": 4.251370733373472e-05, "loss": 2.2507, "step": 5174500 }, { "epoch": 14.98, "learning_rate": 4.251298368608744e-05, "loss": 2.2838, "step": 5175000 }, { "epoch": 14.98, "learning_rate": 4.2512260038440165e-05, "loss": 2.2499, "step": 5175500 }, { "epoch": 14.98, "learning_rate": 4.251153639079289e-05, "loss": 2.2614, "step": 5176000 }, { "epoch": 14.98, "learning_rate": 4.251081419044091e-05, "loss": 2.2452, "step": 5176500 }, { "epoch": 14.99, "learning_rate": 4.251009054279363e-05, "loss": 2.2675, "step": 5177000 }, { "epoch": 14.99, "learning_rate": 4.250936834244165e-05, "loss": 2.2728, "step": 5177500 }, { "epoch": 14.99, "learning_rate": 4.250864469479437e-05, "loss": 2.2712, "step": 5178000 }, { "epoch": 14.99, "learning_rate": 4.250792104714709e-05, "loss": 2.2484, "step": 5178500 }, { "epoch": 14.99, "learning_rate": 4.2507197399499814e-05, "loss": 2.2513, "step": 5179000 }, { "epoch": 14.99, "learning_rate": 4.2506473751852536e-05, "loss": 2.2834, "step": 5179500 }, { "epoch": 14.99, "learning_rate": 4.250575010420526e-05, "loss": 2.2745, "step": 5180000 }, { "epoch": 15.0, "learning_rate": 4.250502790385328e-05, "loss": 2.2781, "step": 5180500 }, { "epoch": 15.0, "learning_rate": 4.250430425620601e-05, "loss": 2.2418, "step": 5181000 }, { "epoch": 15.0, "learning_rate": 4.250358060855873e-05, "loss": 2.2825, "step": 5181500 }, { "epoch": 15.0, "learning_rate": 4.2502856960911454e-05, "loss": 2.2728, "step": 5182000 }, { "epoch": 15.0, "eval_accuracy": 0.653979711395374, "eval_accuracy_mlm": 0.6167969977868301, "eval_accuracy_nsp": 0.8532148144074283, "eval_loss": 2.2684831619262695, "eval_runtime": 330.5599, "eval_samples_per_second": 1320.142, "eval_steps_per_second": 55.007, "step": 5182080 }, { "epoch": 15.0, "learning_rate": 4.2502133313264176e-05, "loss": 2.2451, "step": 5182500 }, { "epoch": 15.0, "learning_rate": 4.25014096656169e-05, "loss": 2.2661, "step": 5183000 }, { "epoch": 15.0, "learning_rate": 4.2500687465264914e-05, "loss": 2.2486, "step": 5183500 }, { "epoch": 15.01, "learning_rate": 4.2499963817617636e-05, "loss": 2.2817, "step": 5184000 }, { "epoch": 15.01, "learning_rate": 4.249924016997036e-05, "loss": 2.2419, "step": 5184500 }, { "epoch": 15.01, "learning_rate": 4.249851652232309e-05, "loss": 2.2364, "step": 5185000 }, { "epoch": 15.01, "learning_rate": 4.249779287467581e-05, "loss": 2.2221, "step": 5185500 }, { "epoch": 15.01, "learning_rate": 4.249706922702853e-05, "loss": 2.2512, "step": 5186000 }, { "epoch": 15.01, "learning_rate": 4.2496345579381254e-05, "loss": 2.2404, "step": 5186500 }, { "epoch": 15.01, "learning_rate": 4.249562337902927e-05, "loss": 2.2106, "step": 5187000 }, { "epoch": 15.02, "learning_rate": 4.249489973138199e-05, "loss": 2.2569, "step": 5187500 }, { "epoch": 15.02, "learning_rate": 4.249417608373472e-05, "loss": 2.212, "step": 5188000 }, { "epoch": 15.02, "learning_rate": 4.249345243608744e-05, "loss": 2.2423, "step": 5188500 }, { "epoch": 15.02, "learning_rate": 4.2492728788440165e-05, "loss": 2.2324, "step": 5189000 }, { "epoch": 15.02, "learning_rate": 4.249200514079289e-05, "loss": 2.2615, "step": 5189500 }, { "epoch": 15.02, "learning_rate": 4.249128149314561e-05, "loss": 2.2488, "step": 5190000 }, { "epoch": 15.02, "learning_rate": 4.249055784549834e-05, "loss": 2.2393, "step": 5190500 }, { "epoch": 15.03, "learning_rate": 4.248983419785106e-05, "loss": 2.2715, "step": 5191000 }, { "epoch": 15.03, "learning_rate": 4.248911055020378e-05, "loss": 2.2405, "step": 5191500 }, { "epoch": 15.03, "learning_rate": 4.24883883498518e-05, "loss": 2.2337, "step": 5192000 }, { "epoch": 15.03, "learning_rate": 4.248766470220452e-05, "loss": 2.2352, "step": 5192500 }, { "epoch": 15.03, "learning_rate": 4.248694105455724e-05, "loss": 2.2486, "step": 5193000 }, { "epoch": 15.03, "learning_rate": 4.2486217406909966e-05, "loss": 2.2445, "step": 5193500 }, { "epoch": 15.03, "learning_rate": 4.248549375926269e-05, "loss": 2.2549, "step": 5194000 }, { "epoch": 15.04, "learning_rate": 4.248477155891071e-05, "loss": 2.2393, "step": 5194500 }, { "epoch": 15.04, "learning_rate": 4.248404791126343e-05, "loss": 2.243, "step": 5195000 }, { "epoch": 15.04, "learning_rate": 4.248332426361616e-05, "loss": 2.2466, "step": 5195500 }, { "epoch": 15.04, "learning_rate": 4.2482600615968884e-05, "loss": 2.2635, "step": 5196000 }, { "epoch": 15.04, "learning_rate": 4.2481876968321606e-05, "loss": 2.2294, "step": 5196500 }, { "epoch": 15.04, "learning_rate": 4.248115332067433e-05, "loss": 2.2419, "step": 5197000 }, { "epoch": 15.04, "learning_rate": 4.248042967302705e-05, "loss": 2.2634, "step": 5197500 }, { "epoch": 15.05, "learning_rate": 4.247970602537977e-05, "loss": 2.2423, "step": 5198000 }, { "epoch": 15.05, "learning_rate": 4.2478982377732495e-05, "loss": 2.2336, "step": 5198500 }, { "epoch": 15.05, "learning_rate": 4.247826162467581e-05, "loss": 2.2585, "step": 5199000 }, { "epoch": 15.05, "learning_rate": 4.247753797702853e-05, "loss": 2.245, "step": 5199500 }, { "epoch": 15.05, "learning_rate": 4.2476814329381255e-05, "loss": 2.2229, "step": 5200000 }, { "epoch": 15.05, "learning_rate": 4.247609212902927e-05, "loss": 2.2458, "step": 5200500 }, { "epoch": 15.05, "learning_rate": 4.247536848138199e-05, "loss": 2.2081, "step": 5201000 }, { "epoch": 15.06, "learning_rate": 4.2474644833734715e-05, "loss": 2.2697, "step": 5201500 }, { "epoch": 15.06, "learning_rate": 4.247392118608744e-05, "loss": 2.2212, "step": 5202000 }, { "epoch": 15.06, "learning_rate": 4.247319898573546e-05, "loss": 2.2116, "step": 5202500 }, { "epoch": 15.06, "learning_rate": 4.247247533808818e-05, "loss": 2.2438, "step": 5203000 }, { "epoch": 15.06, "learning_rate": 4.247175169044091e-05, "loss": 2.2646, "step": 5203500 }, { "epoch": 15.06, "learning_rate": 4.2471029490088926e-05, "loss": 2.253, "step": 5204000 }, { "epoch": 15.06, "learning_rate": 4.247030584244165e-05, "loss": 2.243, "step": 5204500 }, { "epoch": 15.07, "learning_rate": 4.246958219479437e-05, "loss": 2.2642, "step": 5205000 }, { "epoch": 15.07, "learning_rate": 4.246885854714709e-05, "loss": 2.2645, "step": 5205500 }, { "epoch": 15.07, "learning_rate": 4.2468134899499815e-05, "loss": 2.2772, "step": 5206000 }, { "epoch": 15.07, "learning_rate": 4.246741125185254e-05, "loss": 2.2442, "step": 5206500 }, { "epoch": 15.07, "learning_rate": 4.2466687604205266e-05, "loss": 2.2641, "step": 5207000 }, { "epoch": 15.07, "learning_rate": 4.246596395655799e-05, "loss": 2.2622, "step": 5207500 }, { "epoch": 15.08, "learning_rate": 4.246524030891071e-05, "loss": 2.2527, "step": 5208000 }, { "epoch": 15.08, "learning_rate": 4.246451666126343e-05, "loss": 2.2321, "step": 5208500 }, { "epoch": 15.08, "learning_rate": 4.2463793013616155e-05, "loss": 2.2684, "step": 5209000 }, { "epoch": 15.08, "learning_rate": 4.246306936596888e-05, "loss": 2.2449, "step": 5209500 }, { "epoch": 15.08, "learning_rate": 4.24623457183216e-05, "loss": 2.2503, "step": 5210000 }, { "epoch": 15.08, "learning_rate": 4.246162207067433e-05, "loss": 2.2475, "step": 5210500 }, { "epoch": 15.08, "learning_rate": 4.246089842302705e-05, "loss": 2.2432, "step": 5211000 }, { "epoch": 15.09, "learning_rate": 4.246017622267507e-05, "loss": 2.2615, "step": 5211500 }, { "epoch": 15.09, "learning_rate": 4.245945257502779e-05, "loss": 2.2604, "step": 5212000 }, { "epoch": 15.09, "learning_rate": 4.245872892738051e-05, "loss": 2.2432, "step": 5212500 }, { "epoch": 15.09, "learning_rate": 4.245800527973324e-05, "loss": 2.2356, "step": 5213000 }, { "epoch": 15.09, "learning_rate": 4.245728163208596e-05, "loss": 2.257, "step": 5213500 }, { "epoch": 15.09, "learning_rate": 4.2456557984438685e-05, "loss": 2.2352, "step": 5214000 }, { "epoch": 15.09, "learning_rate": 4.2455837231381993e-05, "loss": 2.2433, "step": 5214500 }, { "epoch": 15.1, "learning_rate": 4.2455113583734716e-05, "loss": 2.2647, "step": 5215000 }, { "epoch": 15.1, "learning_rate": 4.245438993608744e-05, "loss": 2.2462, "step": 5215500 }, { "epoch": 15.1, "learning_rate": 4.245366773573546e-05, "loss": 2.237, "step": 5216000 }, { "epoch": 15.1, "learning_rate": 4.245294408808818e-05, "loss": 2.2542, "step": 5216500 }, { "epoch": 15.1, "learning_rate": 4.2452220440440905e-05, "loss": 2.2473, "step": 5217000 }, { "epoch": 15.1, "learning_rate": 4.245149679279363e-05, "loss": 2.234, "step": 5217500 }, { "epoch": 15.1, "learning_rate": 4.2450773145146356e-05, "loss": 2.2381, "step": 5218000 }, { "epoch": 15.11, "learning_rate": 4.245005094479437e-05, "loss": 2.2359, "step": 5218500 }, { "epoch": 15.11, "learning_rate": 4.2449327297147094e-05, "loss": 2.2433, "step": 5219000 }, { "epoch": 15.11, "learning_rate": 4.2448603649499816e-05, "loss": 2.2408, "step": 5219500 }, { "epoch": 15.11, "learning_rate": 4.244788000185254e-05, "loss": 2.2545, "step": 5220000 }, { "epoch": 15.11, "learning_rate": 4.244715635420527e-05, "loss": 2.2546, "step": 5220500 }, { "epoch": 15.11, "learning_rate": 4.244643270655799e-05, "loss": 2.2652, "step": 5221000 }, { "epoch": 15.11, "learning_rate": 4.244570905891071e-05, "loss": 2.2641, "step": 5221500 }, { "epoch": 15.12, "learning_rate": 4.2444985411263434e-05, "loss": 2.2347, "step": 5222000 }, { "epoch": 15.12, "learning_rate": 4.2444261763616156e-05, "loss": 2.2568, "step": 5222500 }, { "epoch": 15.12, "learning_rate": 4.244353811596888e-05, "loss": 2.2513, "step": 5223000 }, { "epoch": 15.12, "learning_rate": 4.24428144683216e-05, "loss": 2.2505, "step": 5223500 }, { "epoch": 15.12, "learning_rate": 4.244209082067432e-05, "loss": 2.2154, "step": 5224000 }, { "epoch": 15.12, "learning_rate": 4.244136862032234e-05, "loss": 2.2549, "step": 5224500 }, { "epoch": 15.12, "learning_rate": 4.244064497267507e-05, "loss": 2.2349, "step": 5225000 }, { "epoch": 15.13, "learning_rate": 4.243992277232308e-05, "loss": 2.2335, "step": 5225500 }, { "epoch": 15.13, "learning_rate": 4.243919912467581e-05, "loss": 2.263, "step": 5226000 }, { "epoch": 15.13, "learning_rate": 4.2438475477028534e-05, "loss": 2.2534, "step": 5226500 }, { "epoch": 15.13, "learning_rate": 4.243775327667655e-05, "loss": 2.2354, "step": 5227000 }, { "epoch": 15.13, "learning_rate": 4.243702962902927e-05, "loss": 2.2767, "step": 5227500 }, { "epoch": 15.13, "learning_rate": 4.2436305981381994e-05, "loss": 2.2258, "step": 5228000 }, { "epoch": 15.13, "learning_rate": 4.2435582333734716e-05, "loss": 2.2679, "step": 5228500 }, { "epoch": 15.14, "learning_rate": 4.243485868608744e-05, "loss": 2.2372, "step": 5229000 }, { "epoch": 15.14, "learning_rate": 4.243413648573546e-05, "loss": 2.2262, "step": 5229500 }, { "epoch": 15.14, "learning_rate": 4.243341283808818e-05, "loss": 2.2508, "step": 5230000 }, { "epoch": 15.14, "learning_rate": 4.2432689190440906e-05, "loss": 2.2302, "step": 5230500 }, { "epoch": 15.14, "learning_rate": 4.243196554279363e-05, "loss": 2.2364, "step": 5231000 }, { "epoch": 15.14, "learning_rate": 4.243124189514635e-05, "loss": 2.2743, "step": 5231500 }, { "epoch": 15.14, "learning_rate": 4.243051824749907e-05, "loss": 2.2375, "step": 5232000 }, { "epoch": 15.15, "learning_rate": 4.2429794599851794e-05, "loss": 2.2523, "step": 5232500 }, { "epoch": 15.15, "learning_rate": 4.2429070952204523e-05, "loss": 2.2572, "step": 5233000 }, { "epoch": 15.15, "learning_rate": 4.2428347304557246e-05, "loss": 2.2359, "step": 5233500 }, { "epoch": 15.15, "learning_rate": 4.242762365690997e-05, "loss": 2.2484, "step": 5234000 }, { "epoch": 15.15, "learning_rate": 4.242690000926269e-05, "loss": 2.22, "step": 5234500 }, { "epoch": 15.15, "learning_rate": 4.242617636161542e-05, "loss": 2.247, "step": 5235000 }, { "epoch": 15.15, "learning_rate": 4.242545271396814e-05, "loss": 2.2531, "step": 5235500 }, { "epoch": 15.16, "learning_rate": 4.2424729066320864e-05, "loss": 2.2298, "step": 5236000 }, { "epoch": 15.16, "learning_rate": 4.242400686596888e-05, "loss": 2.255, "step": 5236500 }, { "epoch": 15.16, "learning_rate": 4.24232832183216e-05, "loss": 2.2576, "step": 5237000 }, { "epoch": 15.16, "learning_rate": 4.2422559570674324e-05, "loss": 2.2298, "step": 5237500 }, { "epoch": 15.16, "learning_rate": 4.2421835923027046e-05, "loss": 2.2419, "step": 5238000 }, { "epoch": 15.16, "learning_rate": 4.242111372267507e-05, "loss": 2.2582, "step": 5238500 }, { "epoch": 15.16, "learning_rate": 4.242039007502779e-05, "loss": 2.2341, "step": 5239000 }, { "epoch": 15.17, "learning_rate": 4.241966642738051e-05, "loss": 2.2644, "step": 5239500 }, { "epoch": 15.17, "learning_rate": 4.2418942779733235e-05, "loss": 2.2411, "step": 5240000 }, { "epoch": 15.17, "learning_rate": 4.2418219132085964e-05, "loss": 2.2435, "step": 5240500 }, { "epoch": 15.17, "learning_rate": 4.2417495484438686e-05, "loss": 2.2421, "step": 5241000 }, { "epoch": 15.17, "learning_rate": 4.241677183679141e-05, "loss": 2.245, "step": 5241500 }, { "epoch": 15.17, "learning_rate": 4.241604818914413e-05, "loss": 2.2433, "step": 5242000 }, { "epoch": 15.17, "learning_rate": 4.241532454149685e-05, "loss": 2.2405, "step": 5242500 }, { "epoch": 15.18, "learning_rate": 4.241460234114487e-05, "loss": 2.2499, "step": 5243000 }, { "epoch": 15.18, "learning_rate": 4.241387869349759e-05, "loss": 2.2392, "step": 5243500 }, { "epoch": 15.18, "learning_rate": 4.241315504585032e-05, "loss": 2.2357, "step": 5244000 }, { "epoch": 15.18, "learning_rate": 4.241243139820304e-05, "loss": 2.2412, "step": 5244500 }, { "epoch": 15.18, "learning_rate": 4.2411707750555764e-05, "loss": 2.2562, "step": 5245000 }, { "epoch": 15.18, "learning_rate": 4.2410984102908486e-05, "loss": 2.2423, "step": 5245500 }, { "epoch": 15.19, "learning_rate": 4.241026045526121e-05, "loss": 2.2487, "step": 5246000 }, { "epoch": 15.19, "learning_rate": 4.2409538254909224e-05, "loss": 2.2425, "step": 5246500 }, { "epoch": 15.19, "learning_rate": 4.2408814607261946e-05, "loss": 2.242, "step": 5247000 }, { "epoch": 15.19, "learning_rate": 4.240809095961467e-05, "loss": 2.2579, "step": 5247500 }, { "epoch": 15.19, "learning_rate": 4.24073673119674e-05, "loss": 2.24, "step": 5248000 }, { "epoch": 15.19, "learning_rate": 4.240664366432012e-05, "loss": 2.2468, "step": 5248500 }, { "epoch": 15.19, "learning_rate": 4.240592001667284e-05, "loss": 2.256, "step": 5249000 }, { "epoch": 15.2, "learning_rate": 4.240519636902557e-05, "loss": 2.261, "step": 5249500 }, { "epoch": 15.2, "learning_rate": 4.240447416867359e-05, "loss": 2.2376, "step": 5250000 }, { "epoch": 15.2, "learning_rate": 4.240375052102631e-05, "loss": 2.2406, "step": 5250500 }, { "epoch": 15.2, "learning_rate": 4.240302687337903e-05, "loss": 2.2467, "step": 5251000 }, { "epoch": 15.2, "learning_rate": 4.240230322573175e-05, "loss": 2.2485, "step": 5251500 }, { "epoch": 15.2, "learning_rate": 4.240158102537977e-05, "loss": 2.2412, "step": 5252000 }, { "epoch": 15.2, "learning_rate": 4.24008573777325e-05, "loss": 2.2461, "step": 5252500 }, { "epoch": 15.21, "learning_rate": 4.2400135177380513e-05, "loss": 2.2577, "step": 5253000 }, { "epoch": 15.21, "learning_rate": 4.2399411529733236e-05, "loss": 2.245, "step": 5253500 }, { "epoch": 15.21, "learning_rate": 4.239868788208596e-05, "loss": 2.2311, "step": 5254000 }, { "epoch": 15.21, "learning_rate": 4.239796423443868e-05, "loss": 2.2482, "step": 5254500 }, { "epoch": 15.21, "learning_rate": 4.23972405867914e-05, "loss": 2.2439, "step": 5255000 }, { "epoch": 15.21, "learning_rate": 4.239651693914413e-05, "loss": 2.2316, "step": 5255500 }, { "epoch": 15.21, "learning_rate": 4.2395793291496854e-05, "loss": 2.2464, "step": 5256000 }, { "epoch": 15.22, "learning_rate": 4.2395069643849576e-05, "loss": 2.2333, "step": 5256500 }, { "epoch": 15.22, "learning_rate": 4.23943459962023e-05, "loss": 2.2392, "step": 5257000 }, { "epoch": 15.22, "learning_rate": 4.239362234855502e-05, "loss": 2.2452, "step": 5257500 }, { "epoch": 15.22, "learning_rate": 4.239290014820304e-05, "loss": 2.2747, "step": 5258000 }, { "epoch": 15.22, "learning_rate": 4.2392176500555765e-05, "loss": 2.2409, "step": 5258500 }, { "epoch": 15.22, "learning_rate": 4.239145430020378e-05, "loss": 2.238, "step": 5259000 }, { "epoch": 15.22, "learning_rate": 4.23907306525565e-05, "loss": 2.2414, "step": 5259500 }, { "epoch": 15.23, "learning_rate": 4.2390007004909225e-05, "loss": 2.2284, "step": 5260000 }, { "epoch": 15.23, "learning_rate": 4.238928335726195e-05, "loss": 2.2442, "step": 5260500 }, { "epoch": 15.23, "learning_rate": 4.238855970961467e-05, "loss": 2.238, "step": 5261000 }, { "epoch": 15.23, "learning_rate": 4.23878360619674e-05, "loss": 2.2574, "step": 5261500 }, { "epoch": 15.23, "learning_rate": 4.238711241432012e-05, "loss": 2.269, "step": 5262000 }, { "epoch": 15.23, "learning_rate": 4.238638876667285e-05, "loss": 2.2538, "step": 5262500 }, { "epoch": 15.23, "learning_rate": 4.238566511902557e-05, "loss": 2.2392, "step": 5263000 }, { "epoch": 15.24, "learning_rate": 4.2384941471378294e-05, "loss": 2.2349, "step": 5263500 }, { "epoch": 15.24, "learning_rate": 4.2384217823731016e-05, "loss": 2.2327, "step": 5264000 }, { "epoch": 15.24, "learning_rate": 4.238349417608374e-05, "loss": 2.2831, "step": 5264500 }, { "epoch": 15.24, "learning_rate": 4.2382771975731754e-05, "loss": 2.2411, "step": 5265000 }, { "epoch": 15.24, "learning_rate": 4.238204977537977e-05, "loss": 2.2468, "step": 5265500 }, { "epoch": 15.24, "learning_rate": 4.23813261277325e-05, "loss": 2.237, "step": 5266000 }, { "epoch": 15.24, "learning_rate": 4.238060248008522e-05, "loss": 2.2449, "step": 5266500 }, { "epoch": 15.25, "learning_rate": 4.237987883243794e-05, "loss": 2.2708, "step": 5267000 }, { "epoch": 15.25, "learning_rate": 4.2379155184790665e-05, "loss": 2.2305, "step": 5267500 }, { "epoch": 15.25, "learning_rate": 4.237843298443868e-05, "loss": 2.2444, "step": 5268000 }, { "epoch": 15.25, "learning_rate": 4.23777093367914e-05, "loss": 2.2536, "step": 5268500 }, { "epoch": 15.25, "learning_rate": 4.2376985689144125e-05, "loss": 2.2726, "step": 5269000 }, { "epoch": 15.25, "learning_rate": 4.237626204149685e-05, "loss": 2.2582, "step": 5269500 }, { "epoch": 15.25, "learning_rate": 4.237553839384958e-05, "loss": 2.2246, "step": 5270000 }, { "epoch": 15.26, "learning_rate": 4.23748147462023e-05, "loss": 2.2385, "step": 5270500 }, { "epoch": 15.26, "learning_rate": 4.237409109855502e-05, "loss": 2.2432, "step": 5271000 }, { "epoch": 15.26, "learning_rate": 4.237336745090775e-05, "loss": 2.241, "step": 5271500 }, { "epoch": 15.26, "learning_rate": 4.237264380326047e-05, "loss": 2.2567, "step": 5272000 }, { "epoch": 15.26, "learning_rate": 4.2371920155613195e-05, "loss": 2.2215, "step": 5272500 }, { "epoch": 15.26, "learning_rate": 4.237119795526121e-05, "loss": 2.2565, "step": 5273000 }, { "epoch": 15.26, "learning_rate": 4.237047430761393e-05, "loss": 2.2318, "step": 5273500 }, { "epoch": 15.27, "learning_rate": 4.2369750659966655e-05, "loss": 2.2457, "step": 5274000 }, { "epoch": 15.27, "learning_rate": 4.236902701231938e-05, "loss": 2.2659, "step": 5274500 }, { "epoch": 15.27, "learning_rate": 4.23683033646721e-05, "loss": 2.2519, "step": 5275000 }, { "epoch": 15.27, "learning_rate": 4.236757971702482e-05, "loss": 2.2213, "step": 5275500 }, { "epoch": 15.27, "learning_rate": 4.2366857516672844e-05, "loss": 2.2583, "step": 5276000 }, { "epoch": 15.27, "learning_rate": 4.2366133869025566e-05, "loss": 2.275, "step": 5276500 }, { "epoch": 15.27, "learning_rate": 4.236541166867358e-05, "loss": 2.239, "step": 5277000 }, { "epoch": 15.28, "learning_rate": 4.2364688021026304e-05, "loss": 2.2408, "step": 5277500 }, { "epoch": 15.28, "learning_rate": 4.2363965820674326e-05, "loss": 2.2455, "step": 5278000 }, { "epoch": 15.28, "learning_rate": 4.236324217302705e-05, "loss": 2.2344, "step": 5278500 }, { "epoch": 15.28, "learning_rate": 4.236251852537978e-05, "loss": 2.2417, "step": 5279000 }, { "epoch": 15.28, "learning_rate": 4.23617948777325e-05, "loss": 2.2383, "step": 5279500 }, { "epoch": 15.28, "learning_rate": 4.236107123008522e-05, "loss": 2.2295, "step": 5280000 }, { "epoch": 15.28, "learning_rate": 4.2360347582437944e-05, "loss": 2.2743, "step": 5280500 }, { "epoch": 15.29, "learning_rate": 4.2359623934790666e-05, "loss": 2.2668, "step": 5281000 }, { "epoch": 15.29, "learning_rate": 4.235890028714339e-05, "loss": 2.2451, "step": 5281500 }, { "epoch": 15.29, "learning_rate": 4.235817663949611e-05, "loss": 2.2618, "step": 5282000 }, { "epoch": 15.29, "learning_rate": 4.235745299184883e-05, "loss": 2.262, "step": 5282500 }, { "epoch": 15.29, "learning_rate": 4.2356729344201555e-05, "loss": 2.2582, "step": 5283000 }, { "epoch": 15.29, "learning_rate": 4.235600569655428e-05, "loss": 2.2583, "step": 5283500 }, { "epoch": 15.3, "learning_rate": 4.2355282048907e-05, "loss": 2.2503, "step": 5284000 }, { "epoch": 15.3, "learning_rate": 4.235455984855502e-05, "loss": 2.2624, "step": 5284500 }, { "epoch": 15.3, "learning_rate": 4.235383620090775e-05, "loss": 2.259, "step": 5285000 }, { "epoch": 15.3, "learning_rate": 4.2353114000555767e-05, "loss": 2.2334, "step": 5285500 }, { "epoch": 15.3, "learning_rate": 4.235239035290849e-05, "loss": 2.273, "step": 5286000 }, { "epoch": 15.3, "learning_rate": 4.235166670526121e-05, "loss": 2.2426, "step": 5286500 }, { "epoch": 15.3, "learning_rate": 4.235094305761393e-05, "loss": 2.2268, "step": 5287000 }, { "epoch": 15.31, "learning_rate": 4.2350219409966655e-05, "loss": 2.2487, "step": 5287500 }, { "epoch": 15.31, "learning_rate": 4.234949576231938e-05, "loss": 2.268, "step": 5288000 }, { "epoch": 15.31, "learning_rate": 4.23487735619674e-05, "loss": 2.246, "step": 5288500 }, { "epoch": 15.31, "learning_rate": 4.234804991432012e-05, "loss": 2.2287, "step": 5289000 }, { "epoch": 15.31, "learning_rate": 4.2347326266672844e-05, "loss": 2.2634, "step": 5289500 }, { "epoch": 15.31, "learning_rate": 4.234660406632086e-05, "loss": 2.2473, "step": 5290000 }, { "epoch": 15.31, "learning_rate": 4.234588041867358e-05, "loss": 2.2662, "step": 5290500 }, { "epoch": 15.32, "learning_rate": 4.2345156771026304e-05, "loss": 2.2439, "step": 5291000 }, { "epoch": 15.32, "learning_rate": 4.234443312337903e-05, "loss": 2.2732, "step": 5291500 }, { "epoch": 15.32, "learning_rate": 4.234370947573175e-05, "loss": 2.2466, "step": 5292000 }, { "epoch": 15.32, "learning_rate": 4.234298582808448e-05, "loss": 2.2463, "step": 5292500 }, { "epoch": 15.32, "learning_rate": 4.23422621804372e-05, "loss": 2.2581, "step": 5293000 }, { "epoch": 15.32, "learning_rate": 4.234153853278993e-05, "loss": 2.2403, "step": 5293500 }, { "epoch": 15.32, "learning_rate": 4.234081488514265e-05, "loss": 2.2396, "step": 5294000 }, { "epoch": 15.33, "learning_rate": 4.2340091237495374e-05, "loss": 2.2808, "step": 5294500 }, { "epoch": 15.33, "learning_rate": 4.2339367589848096e-05, "loss": 2.2342, "step": 5295000 }, { "epoch": 15.33, "learning_rate": 4.233864394220082e-05, "loss": 2.2421, "step": 5295500 }, { "epoch": 15.33, "learning_rate": 4.233792029455354e-05, "loss": 2.2612, "step": 5296000 }, { "epoch": 15.33, "learning_rate": 4.233719664690626e-05, "loss": 2.2524, "step": 5296500 }, { "epoch": 15.33, "learning_rate": 4.2336472999258985e-05, "loss": 2.2769, "step": 5297000 }, { "epoch": 15.33, "learning_rate": 4.233574935161171e-05, "loss": 2.2372, "step": 5297500 }, { "epoch": 15.34, "learning_rate": 4.233502715125973e-05, "loss": 2.229, "step": 5298000 }, { "epoch": 15.34, "learning_rate": 4.233430350361245e-05, "loss": 2.245, "step": 5298500 }, { "epoch": 15.34, "learning_rate": 4.233358130326047e-05, "loss": 2.2535, "step": 5299000 }, { "epoch": 15.34, "learning_rate": 4.233285765561319e-05, "loss": 2.26, "step": 5299500 }, { "epoch": 15.34, "learning_rate": 4.233213400796592e-05, "loss": 2.2449, "step": 5300000 }, { "epoch": 15.34, "learning_rate": 4.233141036031864e-05, "loss": 2.2502, "step": 5300500 }, { "epoch": 15.34, "learning_rate": 4.233068671267136e-05, "loss": 2.2396, "step": 5301000 }, { "epoch": 15.35, "learning_rate": 4.232996451231938e-05, "loss": 2.259, "step": 5301500 }, { "epoch": 15.35, "learning_rate": 4.2329243759262694e-05, "loss": 2.2258, "step": 5302000 }, { "epoch": 15.35, "learning_rate": 4.2328520111615416e-05, "loss": 2.2715, "step": 5302500 }, { "epoch": 15.35, "learning_rate": 4.232779646396814e-05, "loss": 2.2792, "step": 5303000 }, { "epoch": 15.35, "learning_rate": 4.232707281632086e-05, "loss": 2.2551, "step": 5303500 }, { "epoch": 15.35, "learning_rate": 4.232634916867358e-05, "loss": 2.2474, "step": 5304000 }, { "epoch": 15.35, "learning_rate": 4.2325625521026305e-05, "loss": 2.2346, "step": 5304500 }, { "epoch": 15.36, "learning_rate": 4.232490187337903e-05, "loss": 2.2489, "step": 5305000 }, { "epoch": 15.36, "learning_rate": 4.2324178225731757e-05, "loss": 2.2519, "step": 5305500 }, { "epoch": 15.36, "learning_rate": 4.232345602537977e-05, "loss": 2.2441, "step": 5306000 }, { "epoch": 15.36, "learning_rate": 4.2322732377732494e-05, "loss": 2.2552, "step": 5306500 }, { "epoch": 15.36, "learning_rate": 4.2322008730085217e-05, "loss": 2.255, "step": 5307000 }, { "epoch": 15.36, "learning_rate": 4.2321285082437946e-05, "loss": 2.2426, "step": 5307500 }, { "epoch": 15.36, "learning_rate": 4.232056143479067e-05, "loss": 2.2294, "step": 5308000 }, { "epoch": 15.37, "learning_rate": 4.231983778714339e-05, "loss": 2.2568, "step": 5308500 }, { "epoch": 15.37, "learning_rate": 4.231911413949611e-05, "loss": 2.2668, "step": 5309000 }, { "epoch": 15.37, "learning_rate": 4.2318390491848835e-05, "loss": 2.2505, "step": 5309500 }, { "epoch": 15.37, "learning_rate": 4.231766684420156e-05, "loss": 2.2565, "step": 5310000 }, { "epoch": 15.37, "learning_rate": 4.231694319655428e-05, "loss": 2.257, "step": 5310500 }, { "epoch": 15.37, "learning_rate": 4.2316222443497595e-05, "loss": 2.2565, "step": 5311000 }, { "epoch": 15.37, "learning_rate": 4.231549879585032e-05, "loss": 2.2345, "step": 5311500 }, { "epoch": 15.38, "learning_rate": 4.231477514820304e-05, "loss": 2.249, "step": 5312000 }, { "epoch": 15.38, "learning_rate": 4.231405150055576e-05, "loss": 2.2582, "step": 5312500 }, { "epoch": 15.38, "learning_rate": 4.2313327852908484e-05, "loss": 2.2337, "step": 5313000 }, { "epoch": 15.38, "learning_rate": 4.2312604205261206e-05, "loss": 2.2661, "step": 5313500 }, { "epoch": 15.38, "learning_rate": 4.231188055761393e-05, "loss": 2.2479, "step": 5314000 }, { "epoch": 15.38, "learning_rate": 4.231115690996666e-05, "loss": 2.2362, "step": 5314500 }, { "epoch": 15.38, "learning_rate": 4.231043326231938e-05, "loss": 2.2477, "step": 5315000 }, { "epoch": 15.39, "learning_rate": 4.230970961467211e-05, "loss": 2.2558, "step": 5315500 }, { "epoch": 15.39, "learning_rate": 4.230898596702483e-05, "loss": 2.2726, "step": 5316000 }, { "epoch": 15.39, "learning_rate": 4.230826231937755e-05, "loss": 2.2693, "step": 5316500 }, { "epoch": 15.39, "learning_rate": 4.2307538671730275e-05, "loss": 2.2341, "step": 5317000 }, { "epoch": 15.39, "learning_rate": 4.2306815024083e-05, "loss": 2.2596, "step": 5317500 }, { "epoch": 15.39, "learning_rate": 4.230609137643572e-05, "loss": 2.2372, "step": 5318000 }, { "epoch": 15.39, "learning_rate": 4.2305369176083735e-05, "loss": 2.252, "step": 5318500 }, { "epoch": 15.4, "learning_rate": 4.230464552843646e-05, "loss": 2.2614, "step": 5319000 }, { "epoch": 15.4, "learning_rate": 4.230392188078918e-05, "loss": 2.2307, "step": 5319500 }, { "epoch": 15.4, "learning_rate": 4.23031996804372e-05, "loss": 2.2545, "step": 5320000 }, { "epoch": 15.4, "learning_rate": 4.2302476032789924e-05, "loss": 2.2363, "step": 5320500 }, { "epoch": 15.4, "learning_rate": 4.2301752385142646e-05, "loss": 2.2588, "step": 5321000 }, { "epoch": 15.4, "learning_rate": 4.230102873749537e-05, "loss": 2.2272, "step": 5321500 }, { "epoch": 15.41, "learning_rate": 4.230030508984809e-05, "loss": 2.2562, "step": 5322000 }, { "epoch": 15.41, "learning_rate": 4.229958288949611e-05, "loss": 2.2701, "step": 5322500 }, { "epoch": 15.41, "learning_rate": 4.2298859241848835e-05, "loss": 2.2487, "step": 5323000 }, { "epoch": 15.41, "learning_rate": 4.229813559420156e-05, "loss": 2.2429, "step": 5323500 }, { "epoch": 15.41, "learning_rate": 4.229741194655428e-05, "loss": 2.2574, "step": 5324000 }, { "epoch": 15.41, "learning_rate": 4.229668829890701e-05, "loss": 2.2623, "step": 5324500 }, { "epoch": 15.41, "learning_rate": 4.2295966098555024e-05, "loss": 2.258, "step": 5325000 }, { "epoch": 15.42, "learning_rate": 4.229524389820304e-05, "loss": 2.2469, "step": 5325500 }, { "epoch": 15.42, "learning_rate": 4.229452025055576e-05, "loss": 2.2252, "step": 5326000 }, { "epoch": 15.42, "learning_rate": 4.2293796602908484e-05, "loss": 2.2517, "step": 5326500 }, { "epoch": 15.42, "learning_rate": 4.2293072955261207e-05, "loss": 2.2405, "step": 5327000 }, { "epoch": 15.42, "learning_rate": 4.229234930761393e-05, "loss": 2.2643, "step": 5327500 }, { "epoch": 15.42, "learning_rate": 4.229162565996666e-05, "loss": 2.2424, "step": 5328000 }, { "epoch": 15.42, "learning_rate": 4.229090201231938e-05, "loss": 2.2445, "step": 5328500 }, { "epoch": 15.43, "learning_rate": 4.22901783646721e-05, "loss": 2.2418, "step": 5329000 }, { "epoch": 15.43, "learning_rate": 4.2289454717024825e-05, "loss": 2.26, "step": 5329500 }, { "epoch": 15.43, "learning_rate": 4.2288731069377554e-05, "loss": 2.24, "step": 5330000 }, { "epoch": 15.43, "learning_rate": 4.2288007421730276e-05, "loss": 2.2747, "step": 5330500 }, { "epoch": 15.43, "learning_rate": 4.2287283774083e-05, "loss": 2.2521, "step": 5331000 }, { "epoch": 15.43, "learning_rate": 4.228656012643572e-05, "loss": 2.2394, "step": 5331500 }, { "epoch": 15.43, "learning_rate": 4.228583647878844e-05, "loss": 2.2512, "step": 5332000 }, { "epoch": 15.44, "learning_rate": 4.2285112831141165e-05, "loss": 2.2473, "step": 5332500 }, { "epoch": 15.44, "learning_rate": 4.228439063078918e-05, "loss": 2.2541, "step": 5333000 }, { "epoch": 15.44, "learning_rate": 4.22836684304372e-05, "loss": 2.2421, "step": 5333500 }, { "epoch": 15.44, "learning_rate": 4.2282944782789925e-05, "loss": 2.2436, "step": 5334000 }, { "epoch": 15.44, "learning_rate": 4.228222113514265e-05, "loss": 2.258, "step": 5334500 }, { "epoch": 15.44, "learning_rate": 4.228149748749537e-05, "loss": 2.242, "step": 5335000 }, { "epoch": 15.44, "learning_rate": 4.228077383984809e-05, "loss": 2.2521, "step": 5335500 }, { "epoch": 15.45, "learning_rate": 4.2280050192200814e-05, "loss": 2.2702, "step": 5336000 }, { "epoch": 15.45, "learning_rate": 4.2279326544553536e-05, "loss": 2.2671, "step": 5336500 }, { "epoch": 15.45, "learning_rate": 4.227860434420156e-05, "loss": 2.2383, "step": 5337000 }, { "epoch": 15.45, "learning_rate": 4.227788069655429e-05, "loss": 2.2497, "step": 5337500 }, { "epoch": 15.45, "learning_rate": 4.227715704890701e-05, "loss": 2.2527, "step": 5338000 }, { "epoch": 15.45, "learning_rate": 4.227643340125973e-05, "loss": 2.2476, "step": 5338500 }, { "epoch": 15.45, "learning_rate": 4.2275709753612454e-05, "loss": 2.2196, "step": 5339000 }, { "epoch": 15.46, "learning_rate": 4.227498755326047e-05, "loss": 2.2522, "step": 5339500 }, { "epoch": 15.46, "learning_rate": 4.227426390561319e-05, "loss": 2.232, "step": 5340000 }, { "epoch": 15.46, "learning_rate": 4.2273540257965914e-05, "loss": 2.2325, "step": 5340500 }, { "epoch": 15.46, "learning_rate": 4.2272816610318636e-05, "loss": 2.2376, "step": 5341000 }, { "epoch": 15.46, "learning_rate": 4.227209296267136e-05, "loss": 2.2323, "step": 5341500 }, { "epoch": 15.46, "learning_rate": 4.227136931502409e-05, "loss": 2.2522, "step": 5342000 }, { "epoch": 15.46, "learning_rate": 4.227064566737681e-05, "loss": 2.2296, "step": 5342500 }, { "epoch": 15.47, "learning_rate": 4.226992201972953e-05, "loss": 2.2738, "step": 5343000 }, { "epoch": 15.47, "learning_rate": 4.2269198372082254e-05, "loss": 2.251, "step": 5343500 }, { "epoch": 15.47, "learning_rate": 4.226847617173027e-05, "loss": 2.2303, "step": 5344000 }, { "epoch": 15.47, "learning_rate": 4.226775252408299e-05, "loss": 2.2446, "step": 5344500 }, { "epoch": 15.47, "learning_rate": 4.226702887643572e-05, "loss": 2.2338, "step": 5345000 }, { "epoch": 15.47, "learning_rate": 4.2266306676083737e-05, "loss": 2.2511, "step": 5345500 }, { "epoch": 15.47, "learning_rate": 4.226558302843646e-05, "loss": 2.2677, "step": 5346000 }, { "epoch": 15.48, "learning_rate": 4.226485938078919e-05, "loss": 2.2563, "step": 5346500 }, { "epoch": 15.48, "learning_rate": 4.226413573314191e-05, "loss": 2.2605, "step": 5347000 }, { "epoch": 15.48, "learning_rate": 4.226341208549463e-05, "loss": 2.2601, "step": 5347500 }, { "epoch": 15.48, "learning_rate": 4.2262688437847355e-05, "loss": 2.2587, "step": 5348000 }, { "epoch": 15.48, "learning_rate": 4.226196479020008e-05, "loss": 2.2328, "step": 5348500 }, { "epoch": 15.48, "learning_rate": 4.22612411425528e-05, "loss": 2.217, "step": 5349000 }, { "epoch": 15.48, "learning_rate": 4.226051749490552e-05, "loss": 2.247, "step": 5349500 }, { "epoch": 15.49, "learning_rate": 4.2259793847258243e-05, "loss": 2.2569, "step": 5350000 }, { "epoch": 15.49, "learning_rate": 4.2259070199610966e-05, "loss": 2.2494, "step": 5350500 }, { "epoch": 15.49, "learning_rate": 4.225834655196369e-05, "loss": 2.2721, "step": 5351000 }, { "epoch": 15.49, "learning_rate": 4.225762290431641e-05, "loss": 2.2419, "step": 5351500 }, { "epoch": 15.49, "learning_rate": 4.225689925666914e-05, "loss": 2.2511, "step": 5352000 }, { "epoch": 15.49, "learning_rate": 4.225617705631716e-05, "loss": 2.2318, "step": 5352500 }, { "epoch": 15.49, "learning_rate": 4.2255453408669884e-05, "loss": 2.2685, "step": 5353000 }, { "epoch": 15.5, "learning_rate": 4.2254729761022606e-05, "loss": 2.2541, "step": 5353500 }, { "epoch": 15.5, "learning_rate": 4.225400611337533e-05, "loss": 2.237, "step": 5354000 }, { "epoch": 15.5, "learning_rate": 4.225328246572805e-05, "loss": 2.2667, "step": 5354500 }, { "epoch": 15.5, "learning_rate": 4.2252560265376066e-05, "loss": 2.2604, "step": 5355000 }, { "epoch": 15.5, "learning_rate": 4.225183806502409e-05, "loss": 2.2515, "step": 5355500 }, { "epoch": 15.5, "learning_rate": 4.225111441737681e-05, "loss": 2.2752, "step": 5356000 }, { "epoch": 15.5, "learning_rate": 4.225039076972953e-05, "loss": 2.2594, "step": 5356500 }, { "epoch": 15.51, "learning_rate": 4.2249667122082255e-05, "loss": 2.2435, "step": 5357000 }, { "epoch": 15.51, "learning_rate": 4.224894347443498e-05, "loss": 2.248, "step": 5357500 }, { "epoch": 15.51, "learning_rate": 4.22482198267877e-05, "loss": 2.2572, "step": 5358000 }, { "epoch": 15.51, "learning_rate": 4.224749617914042e-05, "loss": 2.2536, "step": 5358500 }, { "epoch": 15.51, "learning_rate": 4.2246772531493144e-05, "loss": 2.2593, "step": 5359000 }, { "epoch": 15.51, "learning_rate": 4.2246050331141166e-05, "loss": 2.2396, "step": 5359500 }, { "epoch": 15.52, "learning_rate": 4.224532668349389e-05, "loss": 2.2482, "step": 5360000 }, { "epoch": 15.52, "learning_rate": 4.224460303584661e-05, "loss": 2.2562, "step": 5360500 }, { "epoch": 15.52, "learning_rate": 4.224387938819934e-05, "loss": 2.2699, "step": 5361000 }, { "epoch": 15.52, "learning_rate": 4.224315574055206e-05, "loss": 2.2521, "step": 5361500 }, { "epoch": 15.52, "learning_rate": 4.224243354020008e-05, "loss": 2.239, "step": 5362000 }, { "epoch": 15.52, "learning_rate": 4.224171133984809e-05, "loss": 2.2869, "step": 5362500 }, { "epoch": 15.52, "learning_rate": 4.2240987692200815e-05, "loss": 2.2331, "step": 5363000 }, { "epoch": 15.53, "learning_rate": 4.224026549184884e-05, "loss": 2.2905, "step": 5363500 }, { "epoch": 15.53, "learning_rate": 4.223954184420156e-05, "loss": 2.2075, "step": 5364000 }, { "epoch": 15.53, "learning_rate": 4.223881819655428e-05, "loss": 2.2436, "step": 5364500 }, { "epoch": 15.53, "learning_rate": 4.2238094548907004e-05, "loss": 2.2415, "step": 5365000 }, { "epoch": 15.53, "learning_rate": 4.223737090125973e-05, "loss": 2.2532, "step": 5365500 }, { "epoch": 15.53, "learning_rate": 4.223664870090774e-05, "loss": 2.2571, "step": 5366000 }, { "epoch": 15.53, "learning_rate": 4.2235925053260464e-05, "loss": 2.2475, "step": 5366500 }, { "epoch": 15.54, "learning_rate": 4.223520285290849e-05, "loss": 2.227, "step": 5367000 }, { "epoch": 15.54, "learning_rate": 4.2234479205261216e-05, "loss": 2.253, "step": 5367500 }, { "epoch": 15.54, "learning_rate": 4.223375555761394e-05, "loss": 2.2317, "step": 5368000 }, { "epoch": 15.54, "learning_rate": 4.223303190996666e-05, "loss": 2.2348, "step": 5368500 }, { "epoch": 15.54, "learning_rate": 4.223230826231938e-05, "loss": 2.2391, "step": 5369000 }, { "epoch": 15.54, "learning_rate": 4.2231584614672105e-05, "loss": 2.2466, "step": 5369500 }, { "epoch": 15.54, "learning_rate": 4.223086096702483e-05, "loss": 2.2507, "step": 5370000 }, { "epoch": 15.55, "learning_rate": 4.223013731937755e-05, "loss": 2.2471, "step": 5370500 }, { "epoch": 15.55, "learning_rate": 4.222941367173027e-05, "loss": 2.2456, "step": 5371000 }, { "epoch": 15.55, "learning_rate": 4.2228690024082994e-05, "loss": 2.2199, "step": 5371500 }, { "epoch": 15.55, "learning_rate": 4.2227966376435716e-05, "loss": 2.2558, "step": 5372000 }, { "epoch": 15.55, "learning_rate": 4.222724272878844e-05, "loss": 2.2595, "step": 5372500 }, { "epoch": 15.55, "learning_rate": 4.222652052843646e-05, "loss": 2.2365, "step": 5373000 }, { "epoch": 15.55, "learning_rate": 4.222579688078918e-05, "loss": 2.2386, "step": 5373500 }, { "epoch": 15.56, "learning_rate": 4.2225073233141905e-05, "loss": 2.2218, "step": 5374000 }, { "epoch": 15.56, "learning_rate": 4.222434958549463e-05, "loss": 2.2374, "step": 5374500 }, { "epoch": 15.56, "learning_rate": 4.2223625937847356e-05, "loss": 2.2601, "step": 5375000 }, { "epoch": 15.56, "learning_rate": 4.222290229020008e-05, "loss": 2.264, "step": 5375500 }, { "epoch": 15.56, "learning_rate": 4.22221786425528e-05, "loss": 2.2533, "step": 5376000 }, { "epoch": 15.56, "learning_rate": 4.2221456442200816e-05, "loss": 2.2674, "step": 5376500 }, { "epoch": 15.56, "learning_rate": 4.222073279455354e-05, "loss": 2.2481, "step": 5377000 }, { "epoch": 15.57, "learning_rate": 4.222000914690627e-05, "loss": 2.2468, "step": 5377500 }, { "epoch": 15.57, "learning_rate": 4.221928549925899e-05, "loss": 2.2629, "step": 5378000 }, { "epoch": 15.57, "learning_rate": 4.221856185161171e-05, "loss": 2.2364, "step": 5378500 }, { "epoch": 15.57, "learning_rate": 4.221783965125973e-05, "loss": 2.2197, "step": 5379000 }, { "epoch": 15.57, "learning_rate": 4.221711600361245e-05, "loss": 2.2484, "step": 5379500 }, { "epoch": 15.57, "learning_rate": 4.221639235596517e-05, "loss": 2.2426, "step": 5380000 }, { "epoch": 15.57, "learning_rate": 4.221567160290849e-05, "loss": 2.2489, "step": 5380500 }, { "epoch": 15.58, "learning_rate": 4.221494795526121e-05, "loss": 2.237, "step": 5381000 }, { "epoch": 15.58, "learning_rate": 4.221422430761393e-05, "loss": 2.2481, "step": 5381500 }, { "epoch": 15.58, "learning_rate": 4.2213500659966654e-05, "loss": 2.2443, "step": 5382000 }, { "epoch": 15.58, "learning_rate": 4.221277701231938e-05, "loss": 2.2464, "step": 5382500 }, { "epoch": 15.58, "learning_rate": 4.2212053364672105e-05, "loss": 2.2645, "step": 5383000 }, { "epoch": 15.58, "learning_rate": 4.221132971702483e-05, "loss": 2.2656, "step": 5383500 }, { "epoch": 15.58, "learning_rate": 4.221060606937755e-05, "loss": 2.2458, "step": 5384000 }, { "epoch": 15.59, "learning_rate": 4.220988242173027e-05, "loss": 2.2606, "step": 5384500 }, { "epoch": 15.59, "learning_rate": 4.2209158774082994e-05, "loss": 2.2457, "step": 5385000 }, { "epoch": 15.59, "learning_rate": 4.220843512643572e-05, "loss": 2.2669, "step": 5385500 }, { "epoch": 15.59, "learning_rate": 4.220771147878844e-05, "loss": 2.247, "step": 5386000 }, { "epoch": 15.59, "learning_rate": 4.220698783114117e-05, "loss": 2.2629, "step": 5386500 }, { "epoch": 15.59, "learning_rate": 4.220626418349389e-05, "loss": 2.2547, "step": 5387000 }, { "epoch": 15.59, "learning_rate": 4.220554053584661e-05, "loss": 2.2528, "step": 5387500 }, { "epoch": 15.6, "learning_rate": 4.2204816888199335e-05, "loss": 2.2608, "step": 5388000 }, { "epoch": 15.6, "learning_rate": 4.220409468784735e-05, "loss": 2.242, "step": 5388500 }, { "epoch": 15.6, "learning_rate": 4.220337104020007e-05, "loss": 2.2433, "step": 5389000 }, { "epoch": 15.6, "learning_rate": 4.22026473925528e-05, "loss": 2.2439, "step": 5389500 }, { "epoch": 15.6, "learning_rate": 4.2201923744905524e-05, "loss": 2.2524, "step": 5390000 }, { "epoch": 15.6, "learning_rate": 4.2201200097258246e-05, "loss": 2.234, "step": 5390500 }, { "epoch": 15.6, "learning_rate": 4.220047644961097e-05, "loss": 2.2486, "step": 5391000 }, { "epoch": 15.61, "learning_rate": 4.219975280196369e-05, "loss": 2.2673, "step": 5391500 }, { "epoch": 15.61, "learning_rate": 4.219903060161171e-05, "loss": 2.2397, "step": 5392000 }, { "epoch": 15.61, "learning_rate": 4.2198306953964435e-05, "loss": 2.2631, "step": 5392500 }, { "epoch": 15.61, "learning_rate": 4.219758330631716e-05, "loss": 2.2348, "step": 5393000 }, { "epoch": 15.61, "learning_rate": 4.219685965866988e-05, "loss": 2.252, "step": 5393500 }, { "epoch": 15.61, "learning_rate": 4.2196137458317895e-05, "loss": 2.2342, "step": 5394000 }, { "epoch": 15.61, "learning_rate": 4.219541381067062e-05, "loss": 2.249, "step": 5394500 }, { "epoch": 15.62, "learning_rate": 4.219469016302334e-05, "loss": 2.2708, "step": 5395000 }, { "epoch": 15.62, "learning_rate": 4.219396651537607e-05, "loss": 2.2346, "step": 5395500 }, { "epoch": 15.62, "learning_rate": 4.219324286772879e-05, "loss": 2.2624, "step": 5396000 }, { "epoch": 15.62, "learning_rate": 4.219251922008151e-05, "loss": 2.2627, "step": 5396500 }, { "epoch": 15.62, "learning_rate": 4.219179557243424e-05, "loss": 2.2308, "step": 5397000 }, { "epoch": 15.62, "learning_rate": 4.2191071924786964e-05, "loss": 2.2394, "step": 5397500 }, { "epoch": 15.62, "learning_rate": 4.219034972443498e-05, "loss": 2.2566, "step": 5398000 }, { "epoch": 15.63, "learning_rate": 4.21896260767877e-05, "loss": 2.241, "step": 5398500 }, { "epoch": 15.63, "learning_rate": 4.2188902429140424e-05, "loss": 2.2679, "step": 5399000 }, { "epoch": 15.63, "learning_rate": 4.2188180228788446e-05, "loss": 2.2343, "step": 5399500 }, { "epoch": 15.63, "learning_rate": 4.218745658114117e-05, "loss": 2.2455, "step": 5400000 }, { "epoch": 15.63, "learning_rate": 4.218673293349389e-05, "loss": 2.2644, "step": 5400500 }, { "epoch": 15.63, "learning_rate": 4.218600928584661e-05, "loss": 2.272, "step": 5401000 }, { "epoch": 15.64, "learning_rate": 4.2185285638199335e-05, "loss": 2.2578, "step": 5401500 }, { "epoch": 15.64, "learning_rate": 4.218456199055206e-05, "loss": 2.241, "step": 5402000 }, { "epoch": 15.64, "learning_rate": 4.218383834290478e-05, "loss": 2.2559, "step": 5402500 }, { "epoch": 15.64, "learning_rate": 4.21831146952575e-05, "loss": 2.2622, "step": 5403000 }, { "epoch": 15.64, "learning_rate": 4.2182391047610224e-05, "loss": 2.2191, "step": 5403500 }, { "epoch": 15.64, "learning_rate": 4.218166884725825e-05, "loss": 2.2437, "step": 5404000 }, { "epoch": 15.64, "learning_rate": 4.218094519961097e-05, "loss": 2.2523, "step": 5404500 }, { "epoch": 15.65, "learning_rate": 4.21802215519637e-05, "loss": 2.2406, "step": 5405000 }, { "epoch": 15.65, "learning_rate": 4.217949790431642e-05, "loss": 2.245, "step": 5405500 }, { "epoch": 15.65, "learning_rate": 4.217877425666914e-05, "loss": 2.2699, "step": 5406000 }, { "epoch": 15.65, "learning_rate": 4.2178050609021865e-05, "loss": 2.2443, "step": 5406500 }, { "epoch": 15.65, "learning_rate": 4.217732840866988e-05, "loss": 2.2684, "step": 5407000 }, { "epoch": 15.65, "learning_rate": 4.21766047610226e-05, "loss": 2.2355, "step": 5407500 }, { "epoch": 15.65, "learning_rate": 4.2175881113375325e-05, "loss": 2.2347, "step": 5408000 }, { "epoch": 15.66, "learning_rate": 4.217515746572805e-05, "loss": 2.2545, "step": 5408500 }, { "epoch": 15.66, "learning_rate": 4.217443381808077e-05, "loss": 2.2344, "step": 5409000 }, { "epoch": 15.66, "learning_rate": 4.217371017043349e-05, "loss": 2.2524, "step": 5409500 }, { "epoch": 15.66, "learning_rate": 4.217298652278622e-05, "loss": 2.2674, "step": 5410000 }, { "epoch": 15.66, "learning_rate": 4.217226287513894e-05, "loss": 2.2322, "step": 5410500 }, { "epoch": 15.66, "learning_rate": 4.217154067478696e-05, "loss": 2.2353, "step": 5411000 }, { "epoch": 15.66, "learning_rate": 4.217081702713968e-05, "loss": 2.2487, "step": 5411500 }, { "epoch": 15.67, "learning_rate": 4.217009337949241e-05, "loss": 2.2482, "step": 5412000 }, { "epoch": 15.67, "learning_rate": 4.216936973184513e-05, "loss": 2.2496, "step": 5412500 }, { "epoch": 15.67, "learning_rate": 4.2168646084197854e-05, "loss": 2.2632, "step": 5413000 }, { "epoch": 15.67, "learning_rate": 4.2167922436550576e-05, "loss": 2.2422, "step": 5413500 }, { "epoch": 15.67, "learning_rate": 4.21671987889033e-05, "loss": 2.2535, "step": 5414000 }, { "epoch": 15.67, "learning_rate": 4.216647658855132e-05, "loss": 2.253, "step": 5414500 }, { "epoch": 15.67, "learning_rate": 4.216575294090404e-05, "loss": 2.236, "step": 5415000 }, { "epoch": 15.68, "learning_rate": 4.2165029293256765e-05, "loss": 2.2425, "step": 5415500 }, { "epoch": 15.68, "learning_rate": 4.216430564560949e-05, "loss": 2.2561, "step": 5416000 }, { "epoch": 15.68, "learning_rate": 4.21635834452575e-05, "loss": 2.2406, "step": 5416500 }, { "epoch": 15.68, "learning_rate": 4.216286124490552e-05, "loss": 2.2683, "step": 5417000 }, { "epoch": 15.68, "learning_rate": 4.216213759725825e-05, "loss": 2.2411, "step": 5417500 }, { "epoch": 15.68, "learning_rate": 4.216141394961097e-05, "loss": 2.2494, "step": 5418000 }, { "epoch": 15.68, "learning_rate": 4.216069030196369e-05, "loss": 2.2643, "step": 5418500 }, { "epoch": 15.69, "learning_rate": 4.2159966654316414e-05, "loss": 2.2432, "step": 5419000 }, { "epoch": 15.69, "learning_rate": 4.215924300666914e-05, "loss": 2.2565, "step": 5419500 }, { "epoch": 15.69, "learning_rate": 4.2158519359021865e-05, "loss": 2.247, "step": 5420000 }, { "epoch": 15.69, "learning_rate": 4.215779571137459e-05, "loss": 2.2362, "step": 5420500 }, { "epoch": 15.69, "learning_rate": 4.215707206372731e-05, "loss": 2.2629, "step": 5421000 }, { "epoch": 15.69, "learning_rate": 4.215634841608003e-05, "loss": 2.2438, "step": 5421500 }, { "epoch": 15.69, "learning_rate": 4.2155624768432754e-05, "loss": 2.2747, "step": 5422000 }, { "epoch": 15.7, "learning_rate": 4.2154901120785477e-05, "loss": 2.2758, "step": 5422500 }, { "epoch": 15.7, "learning_rate": 4.21541774731382e-05, "loss": 2.265, "step": 5423000 }, { "epoch": 15.7, "learning_rate": 4.215345527278622e-05, "loss": 2.2696, "step": 5423500 }, { "epoch": 15.7, "learning_rate": 4.215273162513894e-05, "loss": 2.2501, "step": 5424000 }, { "epoch": 15.7, "learning_rate": 4.2152007977491666e-05, "loss": 2.2673, "step": 5424500 }, { "epoch": 15.7, "learning_rate": 4.215128432984439e-05, "loss": 2.2452, "step": 5425000 }, { "epoch": 15.7, "learning_rate": 4.215056068219711e-05, "loss": 2.2325, "step": 5425500 }, { "epoch": 15.71, "learning_rate": 4.214983703454983e-05, "loss": 2.2294, "step": 5426000 }, { "epoch": 15.71, "learning_rate": 4.214911483419785e-05, "loss": 2.2626, "step": 5426500 }, { "epoch": 15.71, "learning_rate": 4.214839118655058e-05, "loss": 2.2511, "step": 5427000 }, { "epoch": 15.71, "learning_rate": 4.21476675389033e-05, "loss": 2.2521, "step": 5427500 }, { "epoch": 15.71, "learning_rate": 4.214694389125602e-05, "loss": 2.2495, "step": 5428000 }, { "epoch": 15.71, "learning_rate": 4.214622313819934e-05, "loss": 2.2151, "step": 5428500 }, { "epoch": 15.71, "learning_rate": 4.214549949055206e-05, "loss": 2.2449, "step": 5429000 }, { "epoch": 15.72, "learning_rate": 4.214477584290478e-05, "loss": 2.2595, "step": 5429500 }, { "epoch": 15.72, "learning_rate": 4.2144052195257504e-05, "loss": 2.2451, "step": 5430000 }, { "epoch": 15.72, "learning_rate": 4.2143328547610226e-05, "loss": 2.2594, "step": 5430500 }, { "epoch": 15.72, "learning_rate": 4.214260489996295e-05, "loss": 2.2562, "step": 5431000 }, { "epoch": 15.72, "learning_rate": 4.214188125231567e-05, "loss": 2.227, "step": 5431500 }, { "epoch": 15.72, "learning_rate": 4.21411576046684e-05, "loss": 2.2284, "step": 5432000 }, { "epoch": 15.72, "learning_rate": 4.214043395702112e-05, "loss": 2.2523, "step": 5432500 }, { "epoch": 15.73, "learning_rate": 4.2139710309373844e-05, "loss": 2.2436, "step": 5433000 }, { "epoch": 15.73, "learning_rate": 4.2138986661726566e-05, "loss": 2.2418, "step": 5433500 }, { "epoch": 15.73, "learning_rate": 4.2138263014079295e-05, "loss": 2.2504, "step": 5434000 }, { "epoch": 15.73, "learning_rate": 4.213753936643202e-05, "loss": 2.2412, "step": 5434500 }, { "epoch": 15.73, "learning_rate": 4.213681716608003e-05, "loss": 2.2949, "step": 5435000 }, { "epoch": 15.73, "learning_rate": 4.213609641302335e-05, "loss": 2.2543, "step": 5435500 }, { "epoch": 15.73, "learning_rate": 4.213537276537607e-05, "loss": 2.2417, "step": 5436000 }, { "epoch": 15.74, "learning_rate": 4.213464911772879e-05, "loss": 2.235, "step": 5436500 }, { "epoch": 15.74, "learning_rate": 4.2133925470081515e-05, "loss": 2.2489, "step": 5437000 }, { "epoch": 15.74, "learning_rate": 4.213320182243424e-05, "loss": 2.2498, "step": 5437500 }, { "epoch": 15.74, "learning_rate": 4.213247817478696e-05, "loss": 2.2597, "step": 5438000 }, { "epoch": 15.74, "learning_rate": 4.213175452713968e-05, "loss": 2.2515, "step": 5438500 }, { "epoch": 15.74, "learning_rate": 4.2131030879492404e-05, "loss": 2.2186, "step": 5439000 }, { "epoch": 15.75, "learning_rate": 4.2130307231845126e-05, "loss": 2.2589, "step": 5439500 }, { "epoch": 15.75, "learning_rate": 4.212958503149315e-05, "loss": 2.2736, "step": 5440000 }, { "epoch": 15.75, "learning_rate": 4.212886138384587e-05, "loss": 2.2567, "step": 5440500 }, { "epoch": 15.75, "learning_rate": 4.212813773619859e-05, "loss": 2.2397, "step": 5441000 }, { "epoch": 15.75, "learning_rate": 4.2127414088551315e-05, "loss": 2.2305, "step": 5441500 }, { "epoch": 15.75, "learning_rate": 4.212669188819934e-05, "loss": 2.2441, "step": 5442000 }, { "epoch": 15.75, "learning_rate": 4.212596824055206e-05, "loss": 2.2472, "step": 5442500 }, { "epoch": 15.76, "learning_rate": 4.212524459290478e-05, "loss": 2.2555, "step": 5443000 }, { "epoch": 15.76, "learning_rate": 4.2124520945257504e-05, "loss": 2.2638, "step": 5443500 }, { "epoch": 15.76, "learning_rate": 4.212379729761023e-05, "loss": 2.2231, "step": 5444000 }, { "epoch": 15.76, "learning_rate": 4.212307509725825e-05, "loss": 2.2716, "step": 5444500 }, { "epoch": 15.76, "learning_rate": 4.212235144961097e-05, "loss": 2.2513, "step": 5445000 }, { "epoch": 15.76, "learning_rate": 4.2121627801963694e-05, "loss": 2.2559, "step": 5445500 }, { "epoch": 15.76, "learning_rate": 4.2120904154316416e-05, "loss": 2.2573, "step": 5446000 }, { "epoch": 15.77, "learning_rate": 4.212018050666914e-05, "loss": 2.2642, "step": 5446500 }, { "epoch": 15.77, "learning_rate": 4.211945685902186e-05, "loss": 2.2487, "step": 5447000 }, { "epoch": 15.77, "learning_rate": 4.2118734658669876e-05, "loss": 2.2218, "step": 5447500 }, { "epoch": 15.77, "learning_rate": 4.21180110110226e-05, "loss": 2.2658, "step": 5448000 }, { "epoch": 15.77, "learning_rate": 4.211728881067062e-05, "loss": 2.2465, "step": 5448500 }, { "epoch": 15.77, "learning_rate": 4.211656516302334e-05, "loss": 2.2617, "step": 5449000 }, { "epoch": 15.77, "learning_rate": 4.211584151537607e-05, "loss": 2.2541, "step": 5449500 }, { "epoch": 15.78, "learning_rate": 4.2115117867728794e-05, "loss": 2.2453, "step": 5450000 }, { "epoch": 15.78, "learning_rate": 4.2114394220081516e-05, "loss": 2.2445, "step": 5450500 }, { "epoch": 15.78, "learning_rate": 4.211367201972953e-05, "loss": 2.2454, "step": 5451000 }, { "epoch": 15.78, "learning_rate": 4.2112948372082254e-05, "loss": 2.2674, "step": 5451500 }, { "epoch": 15.78, "learning_rate": 4.2112224724434976e-05, "loss": 2.238, "step": 5452000 }, { "epoch": 15.78, "learning_rate": 4.21115010767877e-05, "loss": 2.2449, "step": 5452500 }, { "epoch": 15.78, "learning_rate": 4.211077742914043e-05, "loss": 2.2425, "step": 5453000 }, { "epoch": 15.79, "learning_rate": 4.211005378149315e-05, "loss": 2.2528, "step": 5453500 }, { "epoch": 15.79, "learning_rate": 4.210933013384587e-05, "loss": 2.2561, "step": 5454000 }, { "epoch": 15.79, "learning_rate": 4.2108606486198594e-05, "loss": 2.2358, "step": 5454500 }, { "epoch": 15.79, "learning_rate": 4.210788428584661e-05, "loss": 2.2392, "step": 5455000 }, { "epoch": 15.79, "learning_rate": 4.210716063819933e-05, "loss": 2.2384, "step": 5455500 }, { "epoch": 15.79, "learning_rate": 4.2106436990552054e-05, "loss": 2.234, "step": 5456000 }, { "epoch": 15.79, "learning_rate": 4.2105713342904776e-05, "loss": 2.2703, "step": 5456500 }, { "epoch": 15.8, "learning_rate": 4.2104989695257505e-05, "loss": 2.2379, "step": 5457000 }, { "epoch": 15.8, "learning_rate": 4.210426604761023e-05, "loss": 2.2692, "step": 5457500 }, { "epoch": 15.8, "learning_rate": 4.210354239996295e-05, "loss": 2.2468, "step": 5458000 }, { "epoch": 15.8, "learning_rate": 4.210282019961097e-05, "loss": 2.2491, "step": 5458500 }, { "epoch": 15.8, "learning_rate": 4.2102096551963694e-05, "loss": 2.2371, "step": 5459000 }, { "epoch": 15.8, "learning_rate": 4.2101372904316417e-05, "loss": 2.2607, "step": 5459500 }, { "epoch": 15.8, "learning_rate": 4.210064925666914e-05, "loss": 2.2503, "step": 5460000 }, { "epoch": 15.81, "learning_rate": 4.209992560902186e-05, "loss": 2.2643, "step": 5460500 }, { "epoch": 15.81, "learning_rate": 4.209920196137458e-05, "loss": 2.2504, "step": 5461000 }, { "epoch": 15.81, "learning_rate": 4.2098478313727305e-05, "loss": 2.2492, "step": 5461500 }, { "epoch": 15.81, "learning_rate": 4.209775466608003e-05, "loss": 2.2122, "step": 5462000 }, { "epoch": 15.81, "learning_rate": 4.209703101843275e-05, "loss": 2.2601, "step": 5462500 }, { "epoch": 15.81, "learning_rate": 4.209630737078548e-05, "loss": 2.2287, "step": 5463000 }, { "epoch": 15.81, "learning_rate": 4.20955837231382e-05, "loss": 2.2225, "step": 5463500 }, { "epoch": 15.82, "learning_rate": 4.209486152278622e-05, "loss": 2.2486, "step": 5464000 }, { "epoch": 15.82, "learning_rate": 4.2094137875138946e-05, "loss": 2.2434, "step": 5464500 }, { "epoch": 15.82, "learning_rate": 4.209341422749167e-05, "loss": 2.2488, "step": 5465000 }, { "epoch": 15.82, "learning_rate": 4.209269057984439e-05, "loss": 2.2572, "step": 5465500 }, { "epoch": 15.82, "learning_rate": 4.2091969826787706e-05, "loss": 2.2645, "step": 5466000 }, { "epoch": 15.82, "learning_rate": 4.209124617914043e-05, "loss": 2.2163, "step": 5466500 }, { "epoch": 15.82, "learning_rate": 4.209052253149315e-05, "loss": 2.2337, "step": 5467000 }, { "epoch": 15.83, "learning_rate": 4.208979888384587e-05, "loss": 2.2605, "step": 5467500 }, { "epoch": 15.83, "learning_rate": 4.2089075236198595e-05, "loss": 2.2427, "step": 5468000 }, { "epoch": 15.83, "learning_rate": 4.208835158855132e-05, "loss": 2.2834, "step": 5468500 }, { "epoch": 15.83, "learning_rate": 4.208762794090404e-05, "loss": 2.2569, "step": 5469000 }, { "epoch": 15.83, "learning_rate": 4.208690429325676e-05, "loss": 2.2458, "step": 5469500 }, { "epoch": 15.83, "learning_rate": 4.2086180645609484e-05, "loss": 2.2688, "step": 5470000 }, { "epoch": 15.83, "learning_rate": 4.2085456997962206e-05, "loss": 2.2413, "step": 5470500 }, { "epoch": 15.84, "learning_rate": 4.208473479761023e-05, "loss": 2.218, "step": 5471000 }, { "epoch": 15.84, "learning_rate": 4.208401114996295e-05, "loss": 2.2557, "step": 5471500 }, { "epoch": 15.84, "learning_rate": 4.208328750231568e-05, "loss": 2.247, "step": 5472000 }, { "epoch": 15.84, "learning_rate": 4.20825638546684e-05, "loss": 2.2346, "step": 5472500 }, { "epoch": 15.84, "learning_rate": 4.208184165431642e-05, "loss": 2.2417, "step": 5473000 }, { "epoch": 15.84, "learning_rate": 4.208111800666914e-05, "loss": 2.2592, "step": 5473500 }, { "epoch": 15.84, "learning_rate": 4.208039435902186e-05, "loss": 2.2451, "step": 5474000 }, { "epoch": 15.85, "learning_rate": 4.2079670711374584e-05, "loss": 2.2407, "step": 5474500 }, { "epoch": 15.85, "learning_rate": 4.2078947063727306e-05, "loss": 2.2243, "step": 5475000 }, { "epoch": 15.85, "learning_rate": 4.207822341608003e-05, "loss": 2.23, "step": 5475500 }, { "epoch": 15.85, "learning_rate": 4.207749976843276e-05, "loss": 2.2402, "step": 5476000 }, { "epoch": 15.85, "learning_rate": 4.207677612078548e-05, "loss": 2.2472, "step": 5476500 }, { "epoch": 15.85, "learning_rate": 4.20760524731382e-05, "loss": 2.2678, "step": 5477000 }, { "epoch": 15.86, "learning_rate": 4.2075328825490924e-05, "loss": 2.248, "step": 5477500 }, { "epoch": 15.86, "learning_rate": 4.2074605177843646e-05, "loss": 2.2401, "step": 5478000 }, { "epoch": 15.86, "learning_rate": 4.207388153019637e-05, "loss": 2.2261, "step": 5478500 }, { "epoch": 15.86, "learning_rate": 4.207315932984439e-05, "loss": 2.2438, "step": 5479000 }, { "epoch": 15.86, "learning_rate": 4.207243568219711e-05, "loss": 2.2614, "step": 5479500 }, { "epoch": 15.86, "learning_rate": 4.2071712034549835e-05, "loss": 2.2648, "step": 5480000 }, { "epoch": 15.86, "learning_rate": 4.207098838690256e-05, "loss": 2.254, "step": 5480500 }, { "epoch": 15.87, "learning_rate": 4.207026473925528e-05, "loss": 2.2682, "step": 5481000 }, { "epoch": 15.87, "learning_rate": 4.206954109160801e-05, "loss": 2.2476, "step": 5481500 }, { "epoch": 15.87, "learning_rate": 4.206881744396073e-05, "loss": 2.2565, "step": 5482000 }, { "epoch": 15.87, "learning_rate": 4.2068093796313453e-05, "loss": 2.254, "step": 5482500 }, { "epoch": 15.87, "learning_rate": 4.206737159596147e-05, "loss": 2.2478, "step": 5483000 }, { "epoch": 15.87, "learning_rate": 4.206664794831419e-05, "loss": 2.2479, "step": 5483500 }, { "epoch": 15.87, "learning_rate": 4.2065924300666913e-05, "loss": 2.2543, "step": 5484000 }, { "epoch": 15.88, "learning_rate": 4.2065200653019636e-05, "loss": 2.2552, "step": 5484500 }, { "epoch": 15.88, "learning_rate": 4.206447700537236e-05, "loss": 2.2372, "step": 5485000 }, { "epoch": 15.88, "learning_rate": 4.206375480502038e-05, "loss": 2.2696, "step": 5485500 }, { "epoch": 15.88, "learning_rate": 4.20630311573731e-05, "loss": 2.2638, "step": 5486000 }, { "epoch": 15.88, "learning_rate": 4.206230750972583e-05, "loss": 2.2537, "step": 5486500 }, { "epoch": 15.88, "learning_rate": 4.2061583862078554e-05, "loss": 2.2389, "step": 5487000 }, { "epoch": 15.88, "learning_rate": 4.206086166172657e-05, "loss": 2.2535, "step": 5487500 }, { "epoch": 15.89, "learning_rate": 4.2060139461374585e-05, "loss": 2.2582, "step": 5488000 }, { "epoch": 15.89, "learning_rate": 4.205941581372731e-05, "loss": 2.2388, "step": 5488500 }, { "epoch": 15.89, "learning_rate": 4.205869216608003e-05, "loss": 2.2442, "step": 5489000 }, { "epoch": 15.89, "learning_rate": 4.205796851843276e-05, "loss": 2.2426, "step": 5489500 }, { "epoch": 15.89, "learning_rate": 4.2057246318080774e-05, "loss": 2.2585, "step": 5490000 }, { "epoch": 15.89, "learning_rate": 4.2056522670433496e-05, "loss": 2.2389, "step": 5490500 }, { "epoch": 15.89, "learning_rate": 4.205579902278622e-05, "loss": 2.2418, "step": 5491000 }, { "epoch": 15.9, "learning_rate": 4.205507537513894e-05, "loss": 2.2343, "step": 5491500 }, { "epoch": 15.9, "learning_rate": 4.205435172749166e-05, "loss": 2.2492, "step": 5492000 }, { "epoch": 15.9, "learning_rate": 4.2053628079844385e-05, "loss": 2.2435, "step": 5492500 }, { "epoch": 15.9, "learning_rate": 4.205290443219711e-05, "loss": 2.2508, "step": 5493000 }, { "epoch": 15.9, "learning_rate": 4.205218078454983e-05, "loss": 2.2423, "step": 5493500 }, { "epoch": 15.9, "learning_rate": 4.205145858419785e-05, "loss": 2.2551, "step": 5494000 }, { "epoch": 15.9, "learning_rate": 4.205073493655058e-05, "loss": 2.2537, "step": 5494500 }, { "epoch": 15.91, "learning_rate": 4.20500112889033e-05, "loss": 2.2314, "step": 5495000 }, { "epoch": 15.91, "learning_rate": 4.2049287641256025e-05, "loss": 2.2372, "step": 5495500 }, { "epoch": 15.91, "learning_rate": 4.2048566888199334e-05, "loss": 2.2523, "step": 5496000 }, { "epoch": 15.91, "learning_rate": 4.2047843240552056e-05, "loss": 2.2422, "step": 5496500 }, { "epoch": 15.91, "learning_rate": 4.2047119592904785e-05, "loss": 2.239, "step": 5497000 }, { "epoch": 15.91, "learning_rate": 4.204639594525751e-05, "loss": 2.2649, "step": 5497500 }, { "epoch": 15.91, "learning_rate": 4.204567229761023e-05, "loss": 2.2652, "step": 5498000 }, { "epoch": 15.92, "learning_rate": 4.204494864996295e-05, "loss": 2.2713, "step": 5498500 }, { "epoch": 15.92, "learning_rate": 4.2044225002315674e-05, "loss": 2.2363, "step": 5499000 }, { "epoch": 15.92, "learning_rate": 4.2043501354668397e-05, "loss": 2.235, "step": 5499500 }, { "epoch": 15.92, "learning_rate": 4.204277770702112e-05, "loss": 2.2549, "step": 5500000 }, { "epoch": 15.92, "learning_rate": 4.2042055506669134e-05, "loss": 2.2437, "step": 5500500 }, { "epoch": 15.92, "learning_rate": 4.204133185902186e-05, "loss": 2.2637, "step": 5501000 }, { "epoch": 15.92, "learning_rate": 4.2040608211374586e-05, "loss": 2.2569, "step": 5501500 }, { "epoch": 15.93, "learning_rate": 4.203988456372731e-05, "loss": 2.2363, "step": 5502000 }, { "epoch": 15.93, "learning_rate": 4.203916091608004e-05, "loss": 2.2667, "step": 5502500 }, { "epoch": 15.93, "learning_rate": 4.203843726843276e-05, "loss": 2.2735, "step": 5503000 }, { "epoch": 15.93, "learning_rate": 4.2037715068080775e-05, "loss": 2.2428, "step": 5503500 }, { "epoch": 15.93, "learning_rate": 4.20369914204335e-05, "loss": 2.2639, "step": 5504000 }, { "epoch": 15.93, "learning_rate": 4.203626777278622e-05, "loss": 2.2634, "step": 5504500 }, { "epoch": 15.93, "learning_rate": 4.203554412513894e-05, "loss": 2.2413, "step": 5505000 }, { "epoch": 15.94, "learning_rate": 4.2034820477491664e-05, "loss": 2.2721, "step": 5505500 }, { "epoch": 15.94, "learning_rate": 4.2034096829844386e-05, "loss": 2.2438, "step": 5506000 }, { "epoch": 15.94, "learning_rate": 4.203337462949241e-05, "loss": 2.2481, "step": 5506500 }, { "epoch": 15.94, "learning_rate": 4.203265098184513e-05, "loss": 2.2613, "step": 5507000 }, { "epoch": 15.94, "learning_rate": 4.203192733419785e-05, "loss": 2.2426, "step": 5507500 }, { "epoch": 15.94, "learning_rate": 4.2031203686550575e-05, "loss": 2.2584, "step": 5508000 }, { "epoch": 15.94, "learning_rate": 4.203048148619859e-05, "loss": 2.2542, "step": 5508500 }, { "epoch": 15.95, "learning_rate": 4.202975783855131e-05, "loss": 2.2603, "step": 5509000 }, { "epoch": 15.95, "learning_rate": 4.202903419090404e-05, "loss": 2.2483, "step": 5509500 }, { "epoch": 15.95, "learning_rate": 4.2028310543256764e-05, "loss": 2.2544, "step": 5510000 }, { "epoch": 15.95, "learning_rate": 4.2027588342904786e-05, "loss": 2.2317, "step": 5510500 }, { "epoch": 15.95, "learning_rate": 4.202686469525751e-05, "loss": 2.239, "step": 5511000 }, { "epoch": 15.95, "learning_rate": 4.202614104761023e-05, "loss": 2.2304, "step": 5511500 }, { "epoch": 15.95, "learning_rate": 4.202541739996295e-05, "loss": 2.2476, "step": 5512000 }, { "epoch": 15.96, "learning_rate": 4.202469519961097e-05, "loss": 2.2388, "step": 5512500 }, { "epoch": 15.96, "learning_rate": 4.202397155196369e-05, "loss": 2.2334, "step": 5513000 }, { "epoch": 15.96, "learning_rate": 4.202324790431641e-05, "loss": 2.2499, "step": 5513500 }, { "epoch": 15.96, "learning_rate": 4.2022524256669135e-05, "loss": 2.247, "step": 5514000 }, { "epoch": 15.96, "learning_rate": 4.202180060902186e-05, "loss": 2.2459, "step": 5514500 }, { "epoch": 15.96, "learning_rate": 4.2021076961374586e-05, "loss": 2.237, "step": 5515000 }, { "epoch": 15.97, "learning_rate": 4.202035331372731e-05, "loss": 2.247, "step": 5515500 }, { "epoch": 15.97, "learning_rate": 4.201962966608003e-05, "loss": 2.248, "step": 5516000 }, { "epoch": 15.97, "learning_rate": 4.201890601843276e-05, "loss": 2.2628, "step": 5516500 }, { "epoch": 15.97, "learning_rate": 4.201818237078548e-05, "loss": 2.2333, "step": 5517000 }, { "epoch": 15.97, "learning_rate": 4.2017458723138204e-05, "loss": 2.2472, "step": 5517500 }, { "epoch": 15.97, "learning_rate": 4.2016735075490927e-05, "loss": 2.2611, "step": 5518000 }, { "epoch": 15.97, "learning_rate": 4.201601287513894e-05, "loss": 2.2372, "step": 5518500 }, { "epoch": 15.98, "learning_rate": 4.2015289227491664e-05, "loss": 2.258, "step": 5519000 }, { "epoch": 15.98, "learning_rate": 4.201456557984439e-05, "loss": 2.255, "step": 5519500 }, { "epoch": 15.98, "learning_rate": 4.201384193219711e-05, "loss": 2.2459, "step": 5520000 }, { "epoch": 15.98, "learning_rate": 4.201311828454984e-05, "loss": 2.2439, "step": 5520500 }, { "epoch": 15.98, "learning_rate": 4.2012396084197853e-05, "loss": 2.235, "step": 5521000 }, { "epoch": 15.98, "learning_rate": 4.2011672436550576e-05, "loss": 2.2701, "step": 5521500 }, { "epoch": 15.98, "learning_rate": 4.20109487889033e-05, "loss": 2.2511, "step": 5522000 }, { "epoch": 15.99, "learning_rate": 4.201022514125602e-05, "loss": 2.256, "step": 5522500 }, { "epoch": 15.99, "learning_rate": 4.200950149360874e-05, "loss": 2.2592, "step": 5523000 }, { "epoch": 15.99, "learning_rate": 4.2008777845961465e-05, "loss": 2.2268, "step": 5523500 }, { "epoch": 15.99, "learning_rate": 4.200805564560949e-05, "loss": 2.2613, "step": 5524000 }, { "epoch": 15.99, "learning_rate": 4.2007331997962216e-05, "loss": 2.235, "step": 5524500 }, { "epoch": 15.99, "learning_rate": 4.200660835031494e-05, "loss": 2.2299, "step": 5525000 }, { "epoch": 15.99, "learning_rate": 4.200588470266766e-05, "loss": 2.2464, "step": 5525500 }, { "epoch": 16.0, "learning_rate": 4.200516105502038e-05, "loss": 2.254, "step": 5526000 }, { "epoch": 16.0, "learning_rate": 4.20044388546684e-05, "loss": 2.252, "step": 5526500 }, { "epoch": 16.0, "learning_rate": 4.200371520702112e-05, "loss": 2.252, "step": 5527000 }, { "epoch": 16.0, "learning_rate": 4.2002993006669136e-05, "loss": 2.2268, "step": 5527500 }, { "epoch": 16.0, "eval_accuracy": 0.655441833614539, "eval_accuracy_mlm": 0.6187079602036782, "eval_accuracy_nsp": 0.8525960961167407, "eval_loss": 2.2606470584869385, "eval_runtime": 330.7167, "eval_samples_per_second": 1319.516, "eval_steps_per_second": 54.981, "step": 5527552 }, { "epoch": 16.0, "learning_rate": 4.2002269359021865e-05, "loss": 2.2287, "step": 5528000 }, { "epoch": 16.0, "learning_rate": 4.200154571137459e-05, "loss": 2.221, "step": 5528500 }, { "epoch": 16.0, "learning_rate": 4.200082206372731e-05, "loss": 2.2238, "step": 5529000 }, { "epoch": 16.01, "learning_rate": 4.200009841608003e-05, "loss": 2.2213, "step": 5529500 }, { "epoch": 16.01, "learning_rate": 4.1999374768432754e-05, "loss": 2.2253, "step": 5530000 }, { "epoch": 16.01, "learning_rate": 4.1998651120785476e-05, "loss": 2.2198, "step": 5530500 }, { "epoch": 16.01, "learning_rate": 4.19979274731382e-05, "loss": 2.2025, "step": 5531000 }, { "epoch": 16.01, "learning_rate": 4.1997205272786214e-05, "loss": 2.22, "step": 5531500 }, { "epoch": 16.01, "learning_rate": 4.1996483072434236e-05, "loss": 2.2223, "step": 5532000 }, { "epoch": 16.01, "learning_rate": 4.1995759424786965e-05, "loss": 2.2348, "step": 5532500 }, { "epoch": 16.02, "learning_rate": 4.199503577713969e-05, "loss": 2.2247, "step": 5533000 }, { "epoch": 16.02, "learning_rate": 4.199431212949241e-05, "loss": 2.223, "step": 5533500 }, { "epoch": 16.02, "learning_rate": 4.199358848184513e-05, "loss": 2.2164, "step": 5534000 }, { "epoch": 16.02, "learning_rate": 4.1992864834197854e-05, "loss": 2.1993, "step": 5534500 }, { "epoch": 16.02, "learning_rate": 4.1992141186550576e-05, "loss": 2.2362, "step": 5535000 }, { "epoch": 16.02, "learning_rate": 4.19914175389033e-05, "loss": 2.24, "step": 5535500 }, { "epoch": 16.02, "learning_rate": 4.199069389125602e-05, "loss": 2.2435, "step": 5536000 }, { "epoch": 16.03, "learning_rate": 4.1989971690904036e-05, "loss": 2.2546, "step": 5536500 }, { "epoch": 16.03, "learning_rate": 4.1989248043256765e-05, "loss": 2.2314, "step": 5537000 }, { "epoch": 16.03, "learning_rate": 4.198852439560949e-05, "loss": 2.2112, "step": 5537500 }, { "epoch": 16.03, "learning_rate": 4.198780074796221e-05, "loss": 2.2135, "step": 5538000 }, { "epoch": 16.03, "learning_rate": 4.198707710031493e-05, "loss": 2.2457, "step": 5538500 }, { "epoch": 16.03, "learning_rate": 4.198635345266766e-05, "loss": 2.2064, "step": 5539000 }, { "epoch": 16.03, "learning_rate": 4.1985629805020383e-05, "loss": 2.2249, "step": 5539500 }, { "epoch": 16.04, "learning_rate": 4.1984906157373106e-05, "loss": 2.2235, "step": 5540000 }, { "epoch": 16.04, "learning_rate": 4.198418250972583e-05, "loss": 2.2096, "step": 5540500 }, { "epoch": 16.04, "learning_rate": 4.1983460309373843e-05, "loss": 2.2385, "step": 5541000 }, { "epoch": 16.04, "learning_rate": 4.1982736661726566e-05, "loss": 2.2394, "step": 5541500 }, { "epoch": 16.04, "learning_rate": 4.198201301407929e-05, "loss": 2.2266, "step": 5542000 }, { "epoch": 16.04, "learning_rate": 4.198128936643202e-05, "loss": 2.2331, "step": 5542500 }, { "epoch": 16.04, "learning_rate": 4.198056716608003e-05, "loss": 2.2257, "step": 5543000 }, { "epoch": 16.05, "learning_rate": 4.1979843518432755e-05, "loss": 2.2324, "step": 5543500 }, { "epoch": 16.05, "learning_rate": 4.197912131808077e-05, "loss": 2.2309, "step": 5544000 }, { "epoch": 16.05, "learning_rate": 4.197839767043349e-05, "loss": 2.2203, "step": 5544500 }, { "epoch": 16.05, "learning_rate": 4.197767691737681e-05, "loss": 2.2191, "step": 5545000 }, { "epoch": 16.05, "learning_rate": 4.197695326972953e-05, "loss": 2.2322, "step": 5545500 }, { "epoch": 16.05, "learning_rate": 4.197622962208225e-05, "loss": 2.2247, "step": 5546000 }, { "epoch": 16.05, "learning_rate": 4.197550742173027e-05, "loss": 2.241, "step": 5546500 }, { "epoch": 16.06, "learning_rate": 4.197478377408299e-05, "loss": 2.2305, "step": 5547000 }, { "epoch": 16.06, "learning_rate": 4.197406012643572e-05, "loss": 2.226, "step": 5547500 }, { "epoch": 16.06, "learning_rate": 4.197333647878844e-05, "loss": 2.238, "step": 5548000 }, { "epoch": 16.06, "learning_rate": 4.1972612831141164e-05, "loss": 2.2386, "step": 5548500 }, { "epoch": 16.06, "learning_rate": 4.197188918349389e-05, "loss": 2.2195, "step": 5549000 }, { "epoch": 16.06, "learning_rate": 4.1971165535846615e-05, "loss": 2.2467, "step": 5549500 }, { "epoch": 16.06, "learning_rate": 4.197044188819934e-05, "loss": 2.2445, "step": 5550000 }, { "epoch": 16.07, "learning_rate": 4.196971824055206e-05, "loss": 2.229, "step": 5550500 }, { "epoch": 16.07, "learning_rate": 4.196899459290478e-05, "loss": 2.2299, "step": 5551000 }, { "epoch": 16.07, "learning_rate": 4.1968270945257504e-05, "loss": 2.218, "step": 5551500 }, { "epoch": 16.07, "learning_rate": 4.1967547297610226e-05, "loss": 2.2262, "step": 5552000 }, { "epoch": 16.07, "learning_rate": 4.196682509725824e-05, "loss": 2.2215, "step": 5552500 }, { "epoch": 16.07, "learning_rate": 4.1966101449610964e-05, "loss": 2.2128, "step": 5553000 }, { "epoch": 16.08, "learning_rate": 4.196537780196369e-05, "loss": 2.25, "step": 5553500 }, { "epoch": 16.08, "learning_rate": 4.1964654154316415e-05, "loss": 2.2169, "step": 5554000 }, { "epoch": 16.08, "learning_rate": 4.1963930506669144e-05, "loss": 2.2652, "step": 5554500 }, { "epoch": 16.08, "learning_rate": 4.1963206859021867e-05, "loss": 2.2226, "step": 5555000 }, { "epoch": 16.08, "learning_rate": 4.196248321137459e-05, "loss": 2.2309, "step": 5555500 }, { "epoch": 16.08, "learning_rate": 4.196175956372731e-05, "loss": 2.21, "step": 5556000 }, { "epoch": 16.08, "learning_rate": 4.196103591608003e-05, "loss": 2.2112, "step": 5556500 }, { "epoch": 16.09, "learning_rate": 4.1960312268432756e-05, "loss": 2.2068, "step": 5557000 }, { "epoch": 16.09, "learning_rate": 4.195958862078548e-05, "loss": 2.2362, "step": 5557500 }, { "epoch": 16.09, "learning_rate": 4.195886642043349e-05, "loss": 2.2106, "step": 5558000 }, { "epoch": 16.09, "learning_rate": 4.1958142772786216e-05, "loss": 2.2272, "step": 5558500 }, { "epoch": 16.09, "learning_rate": 4.1957419125138945e-05, "loss": 2.2399, "step": 5559000 }, { "epoch": 16.09, "learning_rate": 4.195669692478696e-05, "loss": 2.2472, "step": 5559500 }, { "epoch": 16.09, "learning_rate": 4.195597327713968e-05, "loss": 2.21, "step": 5560000 }, { "epoch": 16.1, "learning_rate": 4.1955249629492405e-05, "loss": 2.1966, "step": 5560500 }, { "epoch": 16.1, "learning_rate": 4.195452598184513e-05, "loss": 2.2274, "step": 5561000 }, { "epoch": 16.1, "learning_rate": 4.195380233419785e-05, "loss": 2.2563, "step": 5561500 }, { "epoch": 16.1, "learning_rate": 4.195307868655058e-05, "loss": 2.2069, "step": 5562000 }, { "epoch": 16.1, "learning_rate": 4.19523550389033e-05, "loss": 2.2139, "step": 5562500 }, { "epoch": 16.1, "learning_rate": 4.195163139125602e-05, "loss": 2.2167, "step": 5563000 }, { "epoch": 16.1, "learning_rate": 4.1950907743608745e-05, "loss": 2.2355, "step": 5563500 }, { "epoch": 16.11, "learning_rate": 4.195018554325677e-05, "loss": 2.2251, "step": 5564000 }, { "epoch": 16.11, "learning_rate": 4.194946334290478e-05, "loss": 2.2395, "step": 5564500 }, { "epoch": 16.11, "learning_rate": 4.1948739695257505e-05, "loss": 2.2062, "step": 5565000 }, { "epoch": 16.11, "learning_rate": 4.194801604761023e-05, "loss": 2.2331, "step": 5565500 }, { "epoch": 16.11, "learning_rate": 4.194729239996295e-05, "loss": 2.2285, "step": 5566000 }, { "epoch": 16.11, "learning_rate": 4.194656875231567e-05, "loss": 2.244, "step": 5566500 }, { "epoch": 16.11, "learning_rate": 4.1945845104668394e-05, "loss": 2.2575, "step": 5567000 }, { "epoch": 16.12, "learning_rate": 4.1945121457021116e-05, "loss": 2.2243, "step": 5567500 }, { "epoch": 16.12, "learning_rate": 4.1944397809373845e-05, "loss": 2.233, "step": 5568000 }, { "epoch": 16.12, "learning_rate": 4.194367416172657e-05, "loss": 2.2259, "step": 5568500 }, { "epoch": 16.12, "learning_rate": 4.1942950514079296e-05, "loss": 2.2495, "step": 5569000 }, { "epoch": 16.12, "learning_rate": 4.194222686643202e-05, "loss": 2.2121, "step": 5569500 }, { "epoch": 16.12, "learning_rate": 4.194150321878474e-05, "loss": 2.2291, "step": 5570000 }, { "epoch": 16.12, "learning_rate": 4.1940781018432756e-05, "loss": 2.2322, "step": 5570500 }, { "epoch": 16.13, "learning_rate": 4.194005737078548e-05, "loss": 2.2298, "step": 5571000 }, { "epoch": 16.13, "learning_rate": 4.19393337231382e-05, "loss": 2.2197, "step": 5571500 }, { "epoch": 16.13, "learning_rate": 4.193861007549092e-05, "loss": 2.2271, "step": 5572000 }, { "epoch": 16.13, "learning_rate": 4.1937886427843645e-05, "loss": 2.2166, "step": 5572500 }, { "epoch": 16.13, "learning_rate": 4.193716278019637e-05, "loss": 2.2321, "step": 5573000 }, { "epoch": 16.13, "learning_rate": 4.1936439132549096e-05, "loss": 2.2215, "step": 5573500 }, { "epoch": 16.13, "learning_rate": 4.193571548490182e-05, "loss": 2.2013, "step": 5574000 }, { "epoch": 16.14, "learning_rate": 4.193499183725454e-05, "loss": 2.2184, "step": 5574500 }, { "epoch": 16.14, "learning_rate": 4.193426818960726e-05, "loss": 2.2162, "step": 5575000 }, { "epoch": 16.14, "learning_rate": 4.193354598925528e-05, "loss": 2.2216, "step": 5575500 }, { "epoch": 16.14, "learning_rate": 4.1932822341608e-05, "loss": 2.2355, "step": 5576000 }, { "epoch": 16.14, "learning_rate": 4.193209869396073e-05, "loss": 2.2284, "step": 5576500 }, { "epoch": 16.14, "learning_rate": 4.193137504631345e-05, "loss": 2.2499, "step": 5577000 }, { "epoch": 16.14, "learning_rate": 4.193065284596147e-05, "loss": 2.2401, "step": 5577500 }, { "epoch": 16.15, "learning_rate": 4.19299291983142e-05, "loss": 2.2365, "step": 5578000 }, { "epoch": 16.15, "learning_rate": 4.192920699796221e-05, "loss": 2.2208, "step": 5578500 }, { "epoch": 16.15, "learning_rate": 4.1928483350314935e-05, "loss": 2.2414, "step": 5579000 }, { "epoch": 16.15, "learning_rate": 4.192775970266766e-05, "loss": 2.2368, "step": 5579500 }, { "epoch": 16.15, "learning_rate": 4.192703750231567e-05, "loss": 2.221, "step": 5580000 }, { "epoch": 16.15, "learning_rate": 4.1926313854668395e-05, "loss": 2.2461, "step": 5580500 }, { "epoch": 16.15, "learning_rate": 4.1925590207021124e-05, "loss": 2.2478, "step": 5581000 }, { "epoch": 16.16, "learning_rate": 4.1924866559373846e-05, "loss": 2.2468, "step": 5581500 }, { "epoch": 16.16, "learning_rate": 4.192414291172657e-05, "loss": 2.2126, "step": 5582000 }, { "epoch": 16.16, "learning_rate": 4.192341926407929e-05, "loss": 2.2398, "step": 5582500 }, { "epoch": 16.16, "learning_rate": 4.192269561643201e-05, "loss": 2.2373, "step": 5583000 }, { "epoch": 16.16, "learning_rate": 4.1921971968784735e-05, "loss": 2.2336, "step": 5583500 }, { "epoch": 16.16, "learning_rate": 4.1921248321137464e-05, "loss": 2.2408, "step": 5584000 }, { "epoch": 16.16, "learning_rate": 4.1920524673490186e-05, "loss": 2.2273, "step": 5584500 }, { "epoch": 16.17, "learning_rate": 4.191980102584291e-05, "loss": 2.2299, "step": 5585000 }, { "epoch": 16.17, "learning_rate": 4.191907737819563e-05, "loss": 2.2128, "step": 5585500 }, { "epoch": 16.17, "learning_rate": 4.191835373054835e-05, "loss": 2.2227, "step": 5586000 }, { "epoch": 16.17, "learning_rate": 4.1917630082901075e-05, "loss": 2.2438, "step": 5586500 }, { "epoch": 16.17, "learning_rate": 4.19169078825491e-05, "loss": 2.2137, "step": 5587000 }, { "epoch": 16.17, "learning_rate": 4.191618423490182e-05, "loss": 2.2271, "step": 5587500 }, { "epoch": 16.17, "learning_rate": 4.191546058725454e-05, "loss": 2.2161, "step": 5588000 }, { "epoch": 16.18, "learning_rate": 4.1914736939607264e-05, "loss": 2.244, "step": 5588500 }, { "epoch": 16.18, "learning_rate": 4.1914013291959986e-05, "loss": 2.2635, "step": 5589000 }, { "epoch": 16.18, "learning_rate": 4.191328964431271e-05, "loss": 2.2237, "step": 5589500 }, { "epoch": 16.18, "learning_rate": 4.191256599666543e-05, "loss": 2.2404, "step": 5590000 }, { "epoch": 16.18, "learning_rate": 4.1911843796313446e-05, "loss": 2.2377, "step": 5590500 }, { "epoch": 16.18, "learning_rate": 4.1911120148666175e-05, "loss": 2.2428, "step": 5591000 }, { "epoch": 16.19, "learning_rate": 4.19103979483142e-05, "loss": 2.2242, "step": 5591500 }, { "epoch": 16.19, "learning_rate": 4.190967430066692e-05, "loss": 2.212, "step": 5592000 }, { "epoch": 16.19, "learning_rate": 4.190895065301964e-05, "loss": 2.2253, "step": 5592500 }, { "epoch": 16.19, "learning_rate": 4.1908227005372364e-05, "loss": 2.2305, "step": 5593000 }, { "epoch": 16.19, "learning_rate": 4.1907503357725087e-05, "loss": 2.2244, "step": 5593500 }, { "epoch": 16.19, "learning_rate": 4.190677971007781e-05, "loss": 2.2705, "step": 5594000 }, { "epoch": 16.19, "learning_rate": 4.1906057509725824e-05, "loss": 2.1855, "step": 5594500 }, { "epoch": 16.2, "learning_rate": 4.1905333862078547e-05, "loss": 2.2597, "step": 5595000 }, { "epoch": 16.2, "learning_rate": 4.1904610214431276e-05, "loss": 2.245, "step": 5595500 }, { "epoch": 16.2, "learning_rate": 4.1903886566784e-05, "loss": 2.246, "step": 5596000 }, { "epoch": 16.2, "learning_rate": 4.190316436643201e-05, "loss": 2.2371, "step": 5596500 }, { "epoch": 16.2, "learning_rate": 4.1902440718784736e-05, "loss": 2.2303, "step": 5597000 }, { "epoch": 16.2, "learning_rate": 4.190171707113746e-05, "loss": 2.2262, "step": 5597500 }, { "epoch": 16.2, "learning_rate": 4.190099342349018e-05, "loss": 2.2245, "step": 5598000 }, { "epoch": 16.21, "learning_rate": 4.19002697758429e-05, "loss": 2.2495, "step": 5598500 }, { "epoch": 16.21, "learning_rate": 4.189954612819563e-05, "loss": 2.25, "step": 5599000 }, { "epoch": 16.21, "learning_rate": 4.1898822480548354e-05, "loss": 2.2449, "step": 5599500 }, { "epoch": 16.21, "learning_rate": 4.1898098832901076e-05, "loss": 2.2217, "step": 5600000 }, { "epoch": 16.21, "learning_rate": 4.18973751852538e-05, "loss": 2.2515, "step": 5600500 }, { "epoch": 16.21, "learning_rate": 4.189665298490182e-05, "loss": 2.2507, "step": 5601000 }, { "epoch": 16.21, "learning_rate": 4.189592933725454e-05, "loss": 2.205, "step": 5601500 }, { "epoch": 16.22, "learning_rate": 4.1895205689607265e-05, "loss": 2.2308, "step": 5602000 }, { "epoch": 16.22, "learning_rate": 4.189448204195999e-05, "loss": 2.234, "step": 5602500 }, { "epoch": 16.22, "learning_rate": 4.189375839431271e-05, "loss": 2.2219, "step": 5603000 }, { "epoch": 16.22, "learning_rate": 4.189303474666543e-05, "loss": 2.2263, "step": 5603500 }, { "epoch": 16.22, "learning_rate": 4.1892311099018154e-05, "loss": 2.2391, "step": 5604000 }, { "epoch": 16.22, "learning_rate": 4.1891588898666176e-05, "loss": 2.2346, "step": 5604500 }, { "epoch": 16.22, "learning_rate": 4.18908652510189e-05, "loss": 2.2177, "step": 5605000 }, { "epoch": 16.23, "learning_rate": 4.189014160337162e-05, "loss": 2.2259, "step": 5605500 }, { "epoch": 16.23, "learning_rate": 4.188941795572435e-05, "loss": 2.2296, "step": 5606000 }, { "epoch": 16.23, "learning_rate": 4.188869430807707e-05, "loss": 2.237, "step": 5606500 }, { "epoch": 16.23, "learning_rate": 4.188797210772509e-05, "loss": 2.2397, "step": 5607000 }, { "epoch": 16.23, "learning_rate": 4.188724846007781e-05, "loss": 2.2506, "step": 5607500 }, { "epoch": 16.23, "learning_rate": 4.188652481243053e-05, "loss": 2.2301, "step": 5608000 }, { "epoch": 16.23, "learning_rate": 4.1885801164783254e-05, "loss": 2.2392, "step": 5608500 }, { "epoch": 16.24, "learning_rate": 4.1885077517135976e-05, "loss": 2.2213, "step": 5609000 }, { "epoch": 16.24, "learning_rate": 4.1884355316784e-05, "loss": 2.2321, "step": 5609500 }, { "epoch": 16.24, "learning_rate": 4.188363166913672e-05, "loss": 2.2428, "step": 5610000 }, { "epoch": 16.24, "learning_rate": 4.188290802148944e-05, "loss": 2.2231, "step": 5610500 }, { "epoch": 16.24, "learning_rate": 4.1882184373842165e-05, "loss": 2.2432, "step": 5611000 }, { "epoch": 16.24, "learning_rate": 4.188146217349018e-05, "loss": 2.2551, "step": 5611500 }, { "epoch": 16.24, "learning_rate": 4.18807385258429e-05, "loss": 2.2282, "step": 5612000 }, { "epoch": 16.25, "learning_rate": 4.1880014878195625e-05, "loss": 2.2361, "step": 5612500 }, { "epoch": 16.25, "learning_rate": 4.187929123054835e-05, "loss": 2.234, "step": 5613000 }, { "epoch": 16.25, "learning_rate": 4.1878567582901077e-05, "loss": 2.2373, "step": 5613500 }, { "epoch": 16.25, "learning_rate": 4.18778439352538e-05, "loss": 2.2232, "step": 5614000 }, { "epoch": 16.25, "learning_rate": 4.187712028760653e-05, "loss": 2.254, "step": 5614500 }, { "epoch": 16.25, "learning_rate": 4.187639663995925e-05, "loss": 2.2519, "step": 5615000 }, { "epoch": 16.25, "learning_rate": 4.187567299231197e-05, "loss": 2.2284, "step": 5615500 }, { "epoch": 16.26, "learning_rate": 4.1874949344664694e-05, "loss": 2.2264, "step": 5616000 }, { "epoch": 16.26, "learning_rate": 4.187422569701742e-05, "loss": 2.2542, "step": 5616500 }, { "epoch": 16.26, "learning_rate": 4.187350204937014e-05, "loss": 2.2169, "step": 5617000 }, { "epoch": 16.26, "learning_rate": 4.187277840172286e-05, "loss": 2.2343, "step": 5617500 }, { "epoch": 16.26, "learning_rate": 4.1872054754075583e-05, "loss": 2.2148, "step": 5618000 }, { "epoch": 16.26, "learning_rate": 4.1871331106428306e-05, "loss": 2.2457, "step": 5618500 }, { "epoch": 16.26, "learning_rate": 4.187060745878103e-05, "loss": 2.2466, "step": 5619000 }, { "epoch": 16.27, "learning_rate": 4.1869886705724344e-05, "loss": 2.2501, "step": 5619500 }, { "epoch": 16.27, "learning_rate": 4.1869163058077066e-05, "loss": 2.2314, "step": 5620000 }, { "epoch": 16.27, "learning_rate": 4.186843941042979e-05, "loss": 2.2294, "step": 5620500 }, { "epoch": 16.27, "learning_rate": 4.186771576278252e-05, "loss": 2.2486, "step": 5621000 }, { "epoch": 16.27, "learning_rate": 4.186699356243053e-05, "loss": 2.2374, "step": 5621500 }, { "epoch": 16.27, "learning_rate": 4.1866269914783255e-05, "loss": 2.2331, "step": 5622000 }, { "epoch": 16.27, "learning_rate": 4.186554626713598e-05, "loss": 2.2415, "step": 5622500 }, { "epoch": 16.28, "learning_rate": 4.1864822619488706e-05, "loss": 2.2336, "step": 5623000 }, { "epoch": 16.28, "learning_rate": 4.186410041913672e-05, "loss": 2.2345, "step": 5623500 }, { "epoch": 16.28, "learning_rate": 4.1863376771489444e-05, "loss": 2.2415, "step": 5624000 }, { "epoch": 16.28, "learning_rate": 4.1862653123842166e-05, "loss": 2.2324, "step": 5624500 }, { "epoch": 16.28, "learning_rate": 4.186192947619489e-05, "loss": 2.2349, "step": 5625000 }, { "epoch": 16.28, "learning_rate": 4.186120582854761e-05, "loss": 2.2368, "step": 5625500 }, { "epoch": 16.28, "learning_rate": 4.186048218090033e-05, "loss": 2.2174, "step": 5626000 }, { "epoch": 16.29, "learning_rate": 4.1859758533253055e-05, "loss": 2.2265, "step": 5626500 }, { "epoch": 16.29, "learning_rate": 4.185903488560578e-05, "loss": 2.2354, "step": 5627000 }, { "epoch": 16.29, "learning_rate": 4.1858311237958506e-05, "loss": 2.211, "step": 5627500 }, { "epoch": 16.29, "learning_rate": 4.185758759031123e-05, "loss": 2.2134, "step": 5628000 }, { "epoch": 16.29, "learning_rate": 4.185686394266396e-05, "loss": 2.2071, "step": 5628500 }, { "epoch": 16.29, "learning_rate": 4.185614029501668e-05, "loss": 2.2237, "step": 5629000 }, { "epoch": 16.3, "learning_rate": 4.1855418094664695e-05, "loss": 2.237, "step": 5629500 }, { "epoch": 16.3, "learning_rate": 4.185469444701742e-05, "loss": 2.2385, "step": 5630000 }, { "epoch": 16.3, "learning_rate": 4.185397224666543e-05, "loss": 2.2286, "step": 5630500 }, { "epoch": 16.3, "learning_rate": 4.1853248599018155e-05, "loss": 2.2616, "step": 5631000 }, { "epoch": 16.3, "learning_rate": 4.185252495137088e-05, "loss": 2.2557, "step": 5631500 }, { "epoch": 16.3, "learning_rate": 4.1851801303723607e-05, "loss": 2.2513, "step": 5632000 }, { "epoch": 16.3, "learning_rate": 4.185107765607633e-05, "loss": 2.2277, "step": 5632500 }, { "epoch": 16.31, "learning_rate": 4.185035400842905e-05, "loss": 2.2519, "step": 5633000 }, { "epoch": 16.31, "learning_rate": 4.184963036078177e-05, "loss": 2.2442, "step": 5633500 }, { "epoch": 16.31, "learning_rate": 4.1848906713134495e-05, "loss": 2.2475, "step": 5634000 }, { "epoch": 16.31, "learning_rate": 4.1848185960077804e-05, "loss": 2.218, "step": 5634500 }, { "epoch": 16.31, "learning_rate": 4.1847462312430527e-05, "loss": 2.2038, "step": 5635000 }, { "epoch": 16.31, "learning_rate": 4.1846738664783256e-05, "loss": 2.2209, "step": 5635500 }, { "epoch": 16.31, "learning_rate": 4.184601501713598e-05, "loss": 2.2108, "step": 5636000 }, { "epoch": 16.32, "learning_rate": 4.184529136948871e-05, "loss": 2.2071, "step": 5636500 }, { "epoch": 16.32, "learning_rate": 4.184456772184143e-05, "loss": 2.2402, "step": 5637000 }, { "epoch": 16.32, "learning_rate": 4.184384407419415e-05, "loss": 2.2289, "step": 5637500 }, { "epoch": 16.32, "learning_rate": 4.1843120426546874e-05, "loss": 2.2554, "step": 5638000 }, { "epoch": 16.32, "learning_rate": 4.1842396778899596e-05, "loss": 2.2403, "step": 5638500 }, { "epoch": 16.32, "learning_rate": 4.184167313125232e-05, "loss": 2.2406, "step": 5639000 }, { "epoch": 16.32, "learning_rate": 4.184094948360504e-05, "loss": 2.2435, "step": 5639500 }, { "epoch": 16.33, "learning_rate": 4.184022583595776e-05, "loss": 2.2417, "step": 5640000 }, { "epoch": 16.33, "learning_rate": 4.1839502188310485e-05, "loss": 2.2454, "step": 5640500 }, { "epoch": 16.33, "learning_rate": 4.183877854066321e-05, "loss": 2.2558, "step": 5641000 }, { "epoch": 16.33, "learning_rate": 4.183805634031123e-05, "loss": 2.2111, "step": 5641500 }, { "epoch": 16.33, "learning_rate": 4.183733269266395e-05, "loss": 2.2361, "step": 5642000 }, { "epoch": 16.33, "learning_rate": 4.1836609045016674e-05, "loss": 2.2533, "step": 5642500 }, { "epoch": 16.33, "learning_rate": 4.1835885397369396e-05, "loss": 2.2463, "step": 5643000 }, { "epoch": 16.34, "learning_rate": 4.183516319701742e-05, "loss": 2.2328, "step": 5643500 }, { "epoch": 16.34, "learning_rate": 4.183443954937014e-05, "loss": 2.2235, "step": 5644000 }, { "epoch": 16.34, "learning_rate": 4.183371590172286e-05, "loss": 2.2707, "step": 5644500 }, { "epoch": 16.34, "learning_rate": 4.1832992254075585e-05, "loss": 2.2367, "step": 5645000 }, { "epoch": 16.34, "learning_rate": 4.183226860642831e-05, "loss": 2.2465, "step": 5645500 }, { "epoch": 16.34, "learning_rate": 4.183154495878103e-05, "loss": 2.255, "step": 5646000 }, { "epoch": 16.34, "learning_rate": 4.183082131113376e-05, "loss": 2.2255, "step": 5646500 }, { "epoch": 16.35, "learning_rate": 4.183009766348648e-05, "loss": 2.2323, "step": 5647000 }, { "epoch": 16.35, "learning_rate": 4.18293740158392e-05, "loss": 2.2454, "step": 5647500 }, { "epoch": 16.35, "learning_rate": 4.1828650368191925e-05, "loss": 2.2222, "step": 5648000 }, { "epoch": 16.35, "learning_rate": 4.182792672054465e-05, "loss": 2.2534, "step": 5648500 }, { "epoch": 16.35, "learning_rate": 4.182720307289737e-05, "loss": 2.2489, "step": 5649000 }, { "epoch": 16.35, "learning_rate": 4.182647942525009e-05, "loss": 2.2369, "step": 5649500 }, { "epoch": 16.35, "learning_rate": 4.182575722489811e-05, "loss": 2.2381, "step": 5650000 }, { "epoch": 16.36, "learning_rate": 4.1825033577250836e-05, "loss": 2.241, "step": 5650500 }, { "epoch": 16.36, "learning_rate": 4.182430992960356e-05, "loss": 2.2202, "step": 5651000 }, { "epoch": 16.36, "learning_rate": 4.1823589176546874e-05, "loss": 2.2406, "step": 5651500 }, { "epoch": 16.36, "learning_rate": 4.1822865528899597e-05, "loss": 2.2237, "step": 5652000 }, { "epoch": 16.36, "learning_rate": 4.182214188125232e-05, "loss": 2.2375, "step": 5652500 }, { "epoch": 16.36, "learning_rate": 4.182141823360504e-05, "loss": 2.2462, "step": 5653000 }, { "epoch": 16.36, "learning_rate": 4.182069458595776e-05, "loss": 2.2447, "step": 5653500 }, { "epoch": 16.37, "learning_rate": 4.1819970938310486e-05, "loss": 2.2473, "step": 5654000 }, { "epoch": 16.37, "learning_rate": 4.181924729066321e-05, "loss": 2.2169, "step": 5654500 }, { "epoch": 16.37, "learning_rate": 4.181852364301593e-05, "loss": 2.2293, "step": 5655000 }, { "epoch": 16.37, "learning_rate": 4.181780144266395e-05, "loss": 2.2597, "step": 5655500 }, { "epoch": 16.37, "learning_rate": 4.1817077795016675e-05, "loss": 2.2211, "step": 5656000 }, { "epoch": 16.37, "learning_rate": 4.18163541473694e-05, "loss": 2.2341, "step": 5656500 }, { "epoch": 16.37, "learning_rate": 4.181563049972212e-05, "loss": 2.2324, "step": 5657000 }, { "epoch": 16.38, "learning_rate": 4.181490685207484e-05, "loss": 2.2299, "step": 5657500 }, { "epoch": 16.38, "learning_rate": 4.181418320442757e-05, "loss": 2.254, "step": 5658000 }, { "epoch": 16.38, "learning_rate": 4.181345955678029e-05, "loss": 2.234, "step": 5658500 }, { "epoch": 16.38, "learning_rate": 4.1812735909133015e-05, "loss": 2.2259, "step": 5659000 }, { "epoch": 16.38, "learning_rate": 4.181201226148574e-05, "loss": 2.2423, "step": 5659500 }, { "epoch": 16.38, "learning_rate": 4.181128861383846e-05, "loss": 2.2242, "step": 5660000 }, { "epoch": 16.38, "learning_rate": 4.181056496619118e-05, "loss": 2.2298, "step": 5660500 }, { "epoch": 16.39, "learning_rate": 4.1809842765839204e-05, "loss": 2.2365, "step": 5661000 }, { "epoch": 16.39, "learning_rate": 4.1809119118191926e-05, "loss": 2.22, "step": 5661500 }, { "epoch": 16.39, "learning_rate": 4.180839691783994e-05, "loss": 2.2399, "step": 5662000 }, { "epoch": 16.39, "learning_rate": 4.1807673270192664e-05, "loss": 2.2538, "step": 5662500 }, { "epoch": 16.39, "learning_rate": 4.1806949622545386e-05, "loss": 2.2378, "step": 5663000 }, { "epoch": 16.39, "learning_rate": 4.180622597489811e-05, "loss": 2.2366, "step": 5663500 }, { "epoch": 16.39, "learning_rate": 4.180550232725084e-05, "loss": 2.2114, "step": 5664000 }, { "epoch": 16.4, "learning_rate": 4.180477867960356e-05, "loss": 2.2601, "step": 5664500 }, { "epoch": 16.4, "learning_rate": 4.180405503195628e-05, "loss": 2.2517, "step": 5665000 }, { "epoch": 16.4, "learning_rate": 4.180333138430901e-05, "loss": 2.2438, "step": 5665500 }, { "epoch": 16.4, "learning_rate": 4.180260773666173e-05, "loss": 2.221, "step": 5666000 }, { "epoch": 16.4, "learning_rate": 4.1801884089014455e-05, "loss": 2.2364, "step": 5666500 }, { "epoch": 16.4, "learning_rate": 4.180116188866247e-05, "loss": 2.2344, "step": 5667000 }, { "epoch": 16.41, "learning_rate": 4.180043824101519e-05, "loss": 2.2508, "step": 5667500 }, { "epoch": 16.41, "learning_rate": 4.1799714593367915e-05, "loss": 2.2359, "step": 5668000 }, { "epoch": 16.41, "learning_rate": 4.179899094572064e-05, "loss": 2.2353, "step": 5668500 }, { "epoch": 16.41, "learning_rate": 4.179826729807336e-05, "loss": 2.2505, "step": 5669000 }, { "epoch": 16.41, "learning_rate": 4.179754365042609e-05, "loss": 2.2424, "step": 5669500 }, { "epoch": 16.41, "learning_rate": 4.17968228973694e-05, "loss": 2.2313, "step": 5670000 }, { "epoch": 16.41, "learning_rate": 4.179609924972212e-05, "loss": 2.2202, "step": 5670500 }, { "epoch": 16.42, "learning_rate": 4.179537560207484e-05, "loss": 2.2313, "step": 5671000 }, { "epoch": 16.42, "learning_rate": 4.1794651954427564e-05, "loss": 2.2454, "step": 5671500 }, { "epoch": 16.42, "learning_rate": 4.1793928306780286e-05, "loss": 2.2252, "step": 5672000 }, { "epoch": 16.42, "learning_rate": 4.179320465913301e-05, "loss": 2.2402, "step": 5672500 }, { "epoch": 16.42, "learning_rate": 4.179248101148574e-05, "loss": 2.2161, "step": 5673000 }, { "epoch": 16.42, "learning_rate": 4.179175736383846e-05, "loss": 2.246, "step": 5673500 }, { "epoch": 16.42, "learning_rate": 4.179103371619119e-05, "loss": 2.2289, "step": 5674000 }, { "epoch": 16.43, "learning_rate": 4.1790311515839205e-05, "loss": 2.2392, "step": 5674500 }, { "epoch": 16.43, "learning_rate": 4.178958786819193e-05, "loss": 2.2293, "step": 5675000 }, { "epoch": 16.43, "learning_rate": 4.178886422054465e-05, "loss": 2.2209, "step": 5675500 }, { "epoch": 16.43, "learning_rate": 4.178814057289737e-05, "loss": 2.2485, "step": 5676000 }, { "epoch": 16.43, "learning_rate": 4.1787416925250093e-05, "loss": 2.2409, "step": 5676500 }, { "epoch": 16.43, "learning_rate": 4.178669472489811e-05, "loss": 2.2301, "step": 5677000 }, { "epoch": 16.43, "learning_rate": 4.178597107725084e-05, "loss": 2.2342, "step": 5677500 }, { "epoch": 16.44, "learning_rate": 4.178524742960356e-05, "loss": 2.2484, "step": 5678000 }, { "epoch": 16.44, "learning_rate": 4.178452378195628e-05, "loss": 2.2287, "step": 5678500 }, { "epoch": 16.44, "learning_rate": 4.1783800134309005e-05, "loss": 2.2585, "step": 5679000 }, { "epoch": 16.44, "learning_rate": 4.178307793395702e-05, "loss": 2.2429, "step": 5679500 }, { "epoch": 16.44, "learning_rate": 4.178235428630974e-05, "loss": 2.2592, "step": 5680000 }, { "epoch": 16.44, "learning_rate": 4.178163063866247e-05, "loss": 2.2446, "step": 5680500 }, { "epoch": 16.44, "learning_rate": 4.1780906991015194e-05, "loss": 2.2082, "step": 5681000 }, { "epoch": 16.45, "learning_rate": 4.178018479066321e-05, "loss": 2.2327, "step": 5681500 }, { "epoch": 16.45, "learning_rate": 4.177946114301594e-05, "loss": 2.2111, "step": 5682000 }, { "epoch": 16.45, "learning_rate": 4.177873749536866e-05, "loss": 2.2348, "step": 5682500 }, { "epoch": 16.45, "learning_rate": 4.1778015295016676e-05, "loss": 2.2216, "step": 5683000 }, { "epoch": 16.45, "learning_rate": 4.17772916473694e-05, "loss": 2.2528, "step": 5683500 }, { "epoch": 16.45, "learning_rate": 4.1776569447017414e-05, "loss": 2.2326, "step": 5684000 }, { "epoch": 16.45, "learning_rate": 4.1775845799370136e-05, "loss": 2.2157, "step": 5684500 }, { "epoch": 16.46, "learning_rate": 4.1775122151722865e-05, "loss": 2.2292, "step": 5685000 }, { "epoch": 16.46, "learning_rate": 4.177439850407559e-05, "loss": 2.2369, "step": 5685500 }, { "epoch": 16.46, "learning_rate": 4.177367485642831e-05, "loss": 2.2459, "step": 5686000 }, { "epoch": 16.46, "learning_rate": 4.177295120878103e-05, "loss": 2.2292, "step": 5686500 }, { "epoch": 16.46, "learning_rate": 4.1772227561133754e-05, "loss": 2.2198, "step": 5687000 }, { "epoch": 16.46, "learning_rate": 4.1771503913486476e-05, "loss": 2.2485, "step": 5687500 }, { "epoch": 16.46, "learning_rate": 4.1770780265839205e-05, "loss": 2.2574, "step": 5688000 }, { "epoch": 16.47, "learning_rate": 4.177005661819193e-05, "loss": 2.2684, "step": 5688500 }, { "epoch": 16.47, "learning_rate": 4.176933297054465e-05, "loss": 2.2451, "step": 5689000 }, { "epoch": 16.47, "learning_rate": 4.176860932289737e-05, "loss": 2.2268, "step": 5689500 }, { "epoch": 16.47, "learning_rate": 4.1767885675250094e-05, "loss": 2.2282, "step": 5690000 }, { "epoch": 16.47, "learning_rate": 4.1767162027602817e-05, "loss": 2.2417, "step": 5690500 }, { "epoch": 16.47, "learning_rate": 4.176643837995554e-05, "loss": 2.2371, "step": 5691000 }, { "epoch": 16.47, "learning_rate": 4.176571617960356e-05, "loss": 2.249, "step": 5691500 }, { "epoch": 16.48, "learning_rate": 4.176499253195628e-05, "loss": 2.2767, "step": 5692000 }, { "epoch": 16.48, "learning_rate": 4.17642703316043e-05, "loss": 2.2224, "step": 5692500 }, { "epoch": 16.48, "learning_rate": 4.176354668395702e-05, "loss": 2.2457, "step": 5693000 }, { "epoch": 16.48, "learning_rate": 4.176282303630974e-05, "loss": 2.2315, "step": 5693500 }, { "epoch": 16.48, "learning_rate": 4.1762099388662466e-05, "loss": 2.2362, "step": 5694000 }, { "epoch": 16.48, "learning_rate": 4.176137718831049e-05, "loss": 2.2277, "step": 5694500 }, { "epoch": 16.48, "learning_rate": 4.176065354066321e-05, "loss": 2.2375, "step": 5695000 }, { "epoch": 16.49, "learning_rate": 4.175992989301593e-05, "loss": 2.2313, "step": 5695500 }, { "epoch": 16.49, "learning_rate": 4.175920624536866e-05, "loss": 2.2417, "step": 5696000 }, { "epoch": 16.49, "learning_rate": 4.1758482597721384e-05, "loss": 2.2446, "step": 5696500 }, { "epoch": 16.49, "learning_rate": 4.1757758950074106e-05, "loss": 2.2432, "step": 5697000 }, { "epoch": 16.49, "learning_rate": 4.175703530242683e-05, "loss": 2.2542, "step": 5697500 }, { "epoch": 16.49, "learning_rate": 4.175631165477955e-05, "loss": 2.2396, "step": 5698000 }, { "epoch": 16.49, "learning_rate": 4.175558800713227e-05, "loss": 2.2327, "step": 5698500 }, { "epoch": 16.5, "learning_rate": 4.175486580678029e-05, "loss": 2.2343, "step": 5699000 }, { "epoch": 16.5, "learning_rate": 4.175414215913302e-05, "loss": 2.2417, "step": 5699500 }, { "epoch": 16.5, "learning_rate": 4.175341995878103e-05, "loss": 2.2374, "step": 5700000 }, { "epoch": 16.5, "learning_rate": 4.1752696311133755e-05, "loss": 2.2414, "step": 5700500 }, { "epoch": 16.5, "learning_rate": 4.175197266348648e-05, "loss": 2.2407, "step": 5701000 }, { "epoch": 16.5, "learning_rate": 4.17512490158392e-05, "loss": 2.2296, "step": 5701500 }, { "epoch": 16.5, "learning_rate": 4.175052536819192e-05, "loss": 2.2397, "step": 5702000 }, { "epoch": 16.51, "learning_rate": 4.1749801720544644e-05, "loss": 2.2238, "step": 5702500 }, { "epoch": 16.51, "learning_rate": 4.174907807289737e-05, "loss": 2.2374, "step": 5703000 }, { "epoch": 16.51, "learning_rate": 4.1748354425250095e-05, "loss": 2.23, "step": 5703500 }, { "epoch": 16.51, "learning_rate": 4.174763077760282e-05, "loss": 2.2301, "step": 5704000 }, { "epoch": 16.51, "learning_rate": 4.174690712995554e-05, "loss": 2.2522, "step": 5704500 }, { "epoch": 16.51, "learning_rate": 4.174618348230827e-05, "loss": 2.2448, "step": 5705000 }, { "epoch": 16.52, "learning_rate": 4.1745461281956284e-05, "loss": 2.2195, "step": 5705500 }, { "epoch": 16.52, "learning_rate": 4.1744737634309006e-05, "loss": 2.2079, "step": 5706000 }, { "epoch": 16.52, "learning_rate": 4.174401398666173e-05, "loss": 2.2534, "step": 5706500 }, { "epoch": 16.52, "learning_rate": 4.174329033901445e-05, "loss": 2.2082, "step": 5707000 }, { "epoch": 16.52, "learning_rate": 4.174256669136717e-05, "loss": 2.2119, "step": 5707500 }, { "epoch": 16.52, "learning_rate": 4.174184449101519e-05, "loss": 2.2487, "step": 5708000 }, { "epoch": 16.52, "learning_rate": 4.174112084336792e-05, "loss": 2.2309, "step": 5708500 }, { "epoch": 16.53, "learning_rate": 4.174039719572064e-05, "loss": 2.2196, "step": 5709000 }, { "epoch": 16.53, "learning_rate": 4.173967354807336e-05, "loss": 2.247, "step": 5709500 }, { "epoch": 16.53, "learning_rate": 4.1738949900426084e-05, "loss": 2.2404, "step": 5710000 }, { "epoch": 16.53, "learning_rate": 4.173822770007411e-05, "loss": 2.2563, "step": 5710500 }, { "epoch": 16.53, "learning_rate": 4.173750405242683e-05, "loss": 2.2366, "step": 5711000 }, { "epoch": 16.53, "learning_rate": 4.173678040477955e-05, "loss": 2.2521, "step": 5711500 }, { "epoch": 16.53, "learning_rate": 4.173605675713227e-05, "loss": 2.2518, "step": 5712000 }, { "epoch": 16.54, "learning_rate": 4.173533455678029e-05, "loss": 2.2222, "step": 5712500 }, { "epoch": 16.54, "learning_rate": 4.173461090913302e-05, "loss": 2.2222, "step": 5713000 }, { "epoch": 16.54, "learning_rate": 4.173388726148574e-05, "loss": 2.2318, "step": 5713500 }, { "epoch": 16.54, "learning_rate": 4.173316361383846e-05, "loss": 2.2405, "step": 5714000 }, { "epoch": 16.54, "learning_rate": 4.1732439966191185e-05, "loss": 2.2491, "step": 5714500 }, { "epoch": 16.54, "learning_rate": 4.173171631854391e-05, "loss": 2.2231, "step": 5715000 }, { "epoch": 16.54, "learning_rate": 4.173099267089663e-05, "loss": 2.223, "step": 5715500 }, { "epoch": 16.55, "learning_rate": 4.173026902324935e-05, "loss": 2.2125, "step": 5716000 }, { "epoch": 16.55, "learning_rate": 4.1729545375602074e-05, "loss": 2.24, "step": 5716500 }, { "epoch": 16.55, "learning_rate": 4.1728821727954796e-05, "loss": 2.2139, "step": 5717000 }, { "epoch": 16.55, "learning_rate": 4.172810097489811e-05, "loss": 2.235, "step": 5717500 }, { "epoch": 16.55, "learning_rate": 4.172737732725084e-05, "loss": 2.2503, "step": 5718000 }, { "epoch": 16.55, "learning_rate": 4.172665367960356e-05, "loss": 2.2294, "step": 5718500 }, { "epoch": 16.55, "learning_rate": 4.1725930031956285e-05, "loss": 2.2248, "step": 5719000 }, { "epoch": 16.56, "learning_rate": 4.17252078316043e-05, "loss": 2.2393, "step": 5719500 }, { "epoch": 16.56, "learning_rate": 4.172448418395702e-05, "loss": 2.2509, "step": 5720000 }, { "epoch": 16.56, "learning_rate": 4.1723760536309745e-05, "loss": 2.2285, "step": 5720500 }, { "epoch": 16.56, "learning_rate": 4.172303688866247e-05, "loss": 2.2661, "step": 5721000 }, { "epoch": 16.56, "learning_rate": 4.1722313241015196e-05, "loss": 2.2461, "step": 5721500 }, { "epoch": 16.56, "learning_rate": 4.172158959336792e-05, "loss": 2.2367, "step": 5722000 }, { "epoch": 16.56, "learning_rate": 4.1720867393015934e-05, "loss": 2.2276, "step": 5722500 }, { "epoch": 16.57, "learning_rate": 4.1720143745368656e-05, "loss": 2.2405, "step": 5723000 }, { "epoch": 16.57, "learning_rate": 4.171942009772138e-05, "loss": 2.2341, "step": 5723500 }, { "epoch": 16.57, "learning_rate": 4.17186964500741e-05, "loss": 2.2287, "step": 5724000 }, { "epoch": 16.57, "learning_rate": 4.1717974249722116e-05, "loss": 2.2368, "step": 5724500 }, { "epoch": 16.57, "learning_rate": 4.1717250602074845e-05, "loss": 2.2286, "step": 5725000 }, { "epoch": 16.57, "learning_rate": 4.171652695442757e-05, "loss": 2.231, "step": 5725500 }, { "epoch": 16.57, "learning_rate": 4.1715803306780296e-05, "loss": 2.2326, "step": 5726000 }, { "epoch": 16.58, "learning_rate": 4.171508110642831e-05, "loss": 2.2185, "step": 5726500 }, { "epoch": 16.58, "learning_rate": 4.1714357458781034e-05, "loss": 2.2232, "step": 5727000 }, { "epoch": 16.58, "learning_rate": 4.1713633811133756e-05, "loss": 2.2387, "step": 5727500 }, { "epoch": 16.58, "learning_rate": 4.171291016348648e-05, "loss": 2.2548, "step": 5728000 }, { "epoch": 16.58, "learning_rate": 4.17121865158392e-05, "loss": 2.242, "step": 5728500 }, { "epoch": 16.58, "learning_rate": 4.171146286819192e-05, "loss": 2.2322, "step": 5729000 }, { "epoch": 16.58, "learning_rate": 4.1710739220544645e-05, "loss": 2.2383, "step": 5729500 }, { "epoch": 16.59, "learning_rate": 4.171001557289737e-05, "loss": 2.2156, "step": 5730000 }, { "epoch": 16.59, "learning_rate": 4.170929337254539e-05, "loss": 2.2433, "step": 5730500 }, { "epoch": 16.59, "learning_rate": 4.170856972489811e-05, "loss": 2.2458, "step": 5731000 }, { "epoch": 16.59, "learning_rate": 4.1707846077250834e-05, "loss": 2.2423, "step": 5731500 }, { "epoch": 16.59, "learning_rate": 4.170712242960356e-05, "loss": 2.2325, "step": 5732000 }, { "epoch": 16.59, "learning_rate": 4.170639878195628e-05, "loss": 2.2155, "step": 5732500 }, { "epoch": 16.59, "learning_rate": 4.170567513430901e-05, "loss": 2.2468, "step": 5733000 }, { "epoch": 16.6, "learning_rate": 4.170495148666173e-05, "loss": 2.2331, "step": 5733500 }, { "epoch": 16.6, "learning_rate": 4.170422783901445e-05, "loss": 2.241, "step": 5734000 }, { "epoch": 16.6, "learning_rate": 4.1703504191367175e-05, "loss": 2.2379, "step": 5734500 }, { "epoch": 16.6, "learning_rate": 4.17027805437199e-05, "loss": 2.2281, "step": 5735000 }, { "epoch": 16.6, "learning_rate": 4.170205689607262e-05, "loss": 2.2408, "step": 5735500 }, { "epoch": 16.6, "learning_rate": 4.170133324842535e-05, "loss": 2.2588, "step": 5736000 }, { "epoch": 16.6, "learning_rate": 4.1700611048073364e-05, "loss": 2.2789, "step": 5736500 }, { "epoch": 16.61, "learning_rate": 4.169988884772138e-05, "loss": 2.2652, "step": 5737000 }, { "epoch": 16.61, "learning_rate": 4.16991652000741e-05, "loss": 2.288, "step": 5737500 }, { "epoch": 16.61, "learning_rate": 4.1698441552426824e-05, "loss": 2.2229, "step": 5738000 }, { "epoch": 16.61, "learning_rate": 4.1697717904779546e-05, "loss": 2.2114, "step": 5738500 }, { "epoch": 16.61, "learning_rate": 4.169699425713227e-05, "loss": 2.2301, "step": 5739000 }, { "epoch": 16.61, "learning_rate": 4.1696270609485e-05, "loss": 2.2416, "step": 5739500 }, { "epoch": 16.61, "learning_rate": 4.169554696183772e-05, "loss": 2.1979, "step": 5740000 }, { "epoch": 16.62, "learning_rate": 4.169482331419045e-05, "loss": 2.2418, "step": 5740500 }, { "epoch": 16.62, "learning_rate": 4.169409966654317e-05, "loss": 2.2357, "step": 5741000 }, { "epoch": 16.62, "learning_rate": 4.169337891348648e-05, "loss": 2.2537, "step": 5741500 }, { "epoch": 16.62, "learning_rate": 4.16926552658392e-05, "loss": 2.2484, "step": 5742000 }, { "epoch": 16.62, "learning_rate": 4.1691931618191924e-05, "loss": 2.2569, "step": 5742500 }, { "epoch": 16.62, "learning_rate": 4.1691207970544646e-05, "loss": 2.241, "step": 5743000 }, { "epoch": 16.63, "learning_rate": 4.169048432289737e-05, "loss": 2.2407, "step": 5743500 }, { "epoch": 16.63, "learning_rate": 4.16897606752501e-05, "loss": 2.2411, "step": 5744000 }, { "epoch": 16.63, "learning_rate": 4.168903847489811e-05, "loss": 2.2247, "step": 5744500 }, { "epoch": 16.63, "learning_rate": 4.1688314827250835e-05, "loss": 2.2569, "step": 5745000 }, { "epoch": 16.63, "learning_rate": 4.168759117960356e-05, "loss": 2.2313, "step": 5745500 }, { "epoch": 16.63, "learning_rate": 4.168686753195628e-05, "loss": 2.2411, "step": 5746000 }, { "epoch": 16.63, "learning_rate": 4.1686143884309e-05, "loss": 2.247, "step": 5746500 }, { "epoch": 16.64, "learning_rate": 4.1685420236661724e-05, "loss": 2.229, "step": 5747000 }, { "epoch": 16.64, "learning_rate": 4.1684696589014446e-05, "loss": 2.2417, "step": 5747500 }, { "epoch": 16.64, "learning_rate": 4.1683972941367175e-05, "loss": 2.2066, "step": 5748000 }, { "epoch": 16.64, "learning_rate": 4.16832492937199e-05, "loss": 2.2179, "step": 5748500 }, { "epoch": 16.64, "learning_rate": 4.168252709336792e-05, "loss": 2.2387, "step": 5749000 }, { "epoch": 16.64, "learning_rate": 4.1681804893015936e-05, "loss": 2.2503, "step": 5749500 }, { "epoch": 16.64, "learning_rate": 4.168108124536866e-05, "loss": 2.2316, "step": 5750000 }, { "epoch": 16.65, "learning_rate": 4.168035759772138e-05, "loss": 2.2192, "step": 5750500 }, { "epoch": 16.65, "learning_rate": 4.1679635397369396e-05, "loss": 2.2361, "step": 5751000 }, { "epoch": 16.65, "learning_rate": 4.1678911749722125e-05, "loss": 2.2306, "step": 5751500 }, { "epoch": 16.65, "learning_rate": 4.167818810207485e-05, "loss": 2.2249, "step": 5752000 }, { "epoch": 16.65, "learning_rate": 4.167746445442757e-05, "loss": 2.2277, "step": 5752500 }, { "epoch": 16.65, "learning_rate": 4.167674080678029e-05, "loss": 2.2393, "step": 5753000 }, { "epoch": 16.65, "learning_rate": 4.1676017159133014e-05, "loss": 2.2244, "step": 5753500 }, { "epoch": 16.66, "learning_rate": 4.1675293511485736e-05, "loss": 2.2423, "step": 5754000 }, { "epoch": 16.66, "learning_rate": 4.167456986383846e-05, "loss": 2.237, "step": 5754500 }, { "epoch": 16.66, "learning_rate": 4.167384621619118e-05, "loss": 2.2193, "step": 5755000 }, { "epoch": 16.66, "learning_rate": 4.167312256854391e-05, "loss": 2.2338, "step": 5755500 }, { "epoch": 16.66, "learning_rate": 4.1672400368191925e-05, "loss": 2.2577, "step": 5756000 }, { "epoch": 16.66, "learning_rate": 4.167167672054465e-05, "loss": 2.2614, "step": 5756500 }, { "epoch": 16.66, "learning_rate": 4.1670953072897376e-05, "loss": 2.2349, "step": 5757000 }, { "epoch": 16.67, "learning_rate": 4.167023087254539e-05, "loss": 2.2253, "step": 5757500 }, { "epoch": 16.67, "learning_rate": 4.1669507224898114e-05, "loss": 2.2148, "step": 5758000 }, { "epoch": 16.67, "learning_rate": 4.1668783577250836e-05, "loss": 2.2224, "step": 5758500 }, { "epoch": 16.67, "learning_rate": 4.166805992960356e-05, "loss": 2.2531, "step": 5759000 }, { "epoch": 16.67, "learning_rate": 4.166733628195628e-05, "loss": 2.2355, "step": 5759500 }, { "epoch": 16.67, "learning_rate": 4.1666612634309e-05, "loss": 2.2364, "step": 5760000 }, { "epoch": 16.67, "learning_rate": 4.1665888986661725e-05, "loss": 2.2075, "step": 5760500 }, { "epoch": 16.68, "learning_rate": 4.166516533901445e-05, "loss": 2.2489, "step": 5761000 }, { "epoch": 16.68, "learning_rate": 4.1664441691367176e-05, "loss": 2.2393, "step": 5761500 }, { "epoch": 16.68, "learning_rate": 4.16637180437199e-05, "loss": 2.2328, "step": 5762000 }, { "epoch": 16.68, "learning_rate": 4.1662995843367914e-05, "loss": 2.2459, "step": 5762500 }, { "epoch": 16.68, "learning_rate": 4.166227219572064e-05, "loss": 2.2518, "step": 5763000 }, { "epoch": 16.68, "learning_rate": 4.1661548548073365e-05, "loss": 2.2193, "step": 5763500 }, { "epoch": 16.68, "learning_rate": 4.166082490042609e-05, "loss": 2.2464, "step": 5764000 }, { "epoch": 16.69, "learning_rate": 4.166010125277881e-05, "loss": 2.2572, "step": 5764500 }, { "epoch": 16.69, "learning_rate": 4.165937760513153e-05, "loss": 2.2372, "step": 5765000 }, { "epoch": 16.69, "learning_rate": 4.1658653957484254e-05, "loss": 2.2386, "step": 5765500 }, { "epoch": 16.69, "learning_rate": 4.1657930309836976e-05, "loss": 2.2377, "step": 5766000 }, { "epoch": 16.69, "learning_rate": 4.16572066621897e-05, "loss": 2.2366, "step": 5766500 }, { "epoch": 16.69, "learning_rate": 4.165648446183772e-05, "loss": 2.2515, "step": 5767000 }, { "epoch": 16.69, "learning_rate": 4.165576081419044e-05, "loss": 2.2313, "step": 5767500 }, { "epoch": 16.7, "learning_rate": 4.1655037166543165e-05, "loss": 2.2354, "step": 5768000 }, { "epoch": 16.7, "learning_rate": 4.165431351889589e-05, "loss": 2.2774, "step": 5768500 }, { "epoch": 16.7, "learning_rate": 4.16535913185439e-05, "loss": 2.2458, "step": 5769000 }, { "epoch": 16.7, "learning_rate": 4.1652867670896625e-05, "loss": 2.2429, "step": 5769500 }, { "epoch": 16.7, "learning_rate": 4.165214402324935e-05, "loss": 2.2365, "step": 5770000 }, { "epoch": 16.7, "learning_rate": 4.165142037560208e-05, "loss": 2.2252, "step": 5770500 }, { "epoch": 16.7, "learning_rate": 4.16506967279548e-05, "loss": 2.2382, "step": 5771000 }, { "epoch": 16.71, "learning_rate": 4.164997452760282e-05, "loss": 2.271, "step": 5771500 }, { "epoch": 16.71, "learning_rate": 4.1649250879955544e-05, "loss": 2.2503, "step": 5772000 }, { "epoch": 16.71, "learning_rate": 4.1648527232308266e-05, "loss": 2.2463, "step": 5772500 }, { "epoch": 16.71, "learning_rate": 4.1647806479251575e-05, "loss": 2.2311, "step": 5773000 }, { "epoch": 16.71, "learning_rate": 4.1647082831604304e-05, "loss": 2.2343, "step": 5773500 }, { "epoch": 16.71, "learning_rate": 4.1646359183957026e-05, "loss": 2.2162, "step": 5774000 }, { "epoch": 16.71, "learning_rate": 4.164563553630975e-05, "loss": 2.2256, "step": 5774500 }, { "epoch": 16.72, "learning_rate": 4.164491188866247e-05, "loss": 2.2356, "step": 5775000 }, { "epoch": 16.72, "learning_rate": 4.164418824101519e-05, "loss": 2.2309, "step": 5775500 }, { "epoch": 16.72, "learning_rate": 4.1643464593367915e-05, "loss": 2.2361, "step": 5776000 }, { "epoch": 16.72, "learning_rate": 4.164274094572064e-05, "loss": 2.2515, "step": 5776500 }, { "epoch": 16.72, "learning_rate": 4.164201729807336e-05, "loss": 2.2525, "step": 5777000 }, { "epoch": 16.72, "learning_rate": 4.164129365042608e-05, "loss": 2.2281, "step": 5777500 }, { "epoch": 16.72, "learning_rate": 4.164057000277881e-05, "loss": 2.2653, "step": 5778000 }, { "epoch": 16.73, "learning_rate": 4.163984635513153e-05, "loss": 2.2335, "step": 5778500 }, { "epoch": 16.73, "learning_rate": 4.1639122707484255e-05, "loss": 2.2387, "step": 5779000 }, { "epoch": 16.73, "learning_rate": 4.163839905983698e-05, "loss": 2.2384, "step": 5779500 }, { "epoch": 16.73, "learning_rate": 4.16376754121897e-05, "loss": 2.2411, "step": 5780000 }, { "epoch": 16.73, "learning_rate": 4.163695321183772e-05, "loss": 2.21, "step": 5780500 }, { "epoch": 16.73, "learning_rate": 4.1636229564190444e-05, "loss": 2.2166, "step": 5781000 }, { "epoch": 16.74, "learning_rate": 4.1635505916543166e-05, "loss": 2.236, "step": 5781500 }, { "epoch": 16.74, "learning_rate": 4.163478226889589e-05, "loss": 2.2334, "step": 5782000 }, { "epoch": 16.74, "learning_rate": 4.163405862124861e-05, "loss": 2.2427, "step": 5782500 }, { "epoch": 16.74, "learning_rate": 4.163333497360133e-05, "loss": 2.2024, "step": 5783000 }, { "epoch": 16.74, "learning_rate": 4.1632612773249355e-05, "loss": 2.2413, "step": 5783500 }, { "epoch": 16.74, "learning_rate": 4.163188912560208e-05, "loss": 2.2609, "step": 5784000 }, { "epoch": 16.74, "learning_rate": 4.16311654779548e-05, "loss": 2.2348, "step": 5784500 }, { "epoch": 16.75, "learning_rate": 4.1630443277602815e-05, "loss": 2.2206, "step": 5785000 }, { "epoch": 16.75, "learning_rate": 4.1629719629955544e-05, "loss": 2.2226, "step": 5785500 }, { "epoch": 16.75, "learning_rate": 4.1628995982308267e-05, "loss": 2.2396, "step": 5786000 }, { "epoch": 16.75, "learning_rate": 4.162827233466099e-05, "loss": 2.233, "step": 5786500 }, { "epoch": 16.75, "learning_rate": 4.162754868701371e-05, "loss": 2.2094, "step": 5787000 }, { "epoch": 16.75, "learning_rate": 4.1626826486661727e-05, "loss": 2.2519, "step": 5787500 }, { "epoch": 16.75, "learning_rate": 4.1626102839014456e-05, "loss": 2.254, "step": 5788000 }, { "epoch": 16.76, "learning_rate": 4.162537919136718e-05, "loss": 2.2462, "step": 5788500 }, { "epoch": 16.76, "learning_rate": 4.16246555437199e-05, "loss": 2.2503, "step": 5789000 }, { "epoch": 16.76, "learning_rate": 4.162393189607262e-05, "loss": 2.2252, "step": 5789500 }, { "epoch": 16.76, "learning_rate": 4.1623208248425345e-05, "loss": 2.2331, "step": 5790000 }, { "epoch": 16.76, "learning_rate": 4.162248604807336e-05, "loss": 2.2449, "step": 5790500 }, { "epoch": 16.76, "learning_rate": 4.162176240042608e-05, "loss": 2.2385, "step": 5791000 }, { "epoch": 16.76, "learning_rate": 4.1621038752778805e-05, "loss": 2.2394, "step": 5791500 }, { "epoch": 16.77, "learning_rate": 4.162031510513153e-05, "loss": 2.2381, "step": 5792000 }, { "epoch": 16.77, "learning_rate": 4.1619591457484256e-05, "loss": 2.2194, "step": 5792500 }, { "epoch": 16.77, "learning_rate": 4.161886780983698e-05, "loss": 2.2398, "step": 5793000 }, { "epoch": 16.77, "learning_rate": 4.161814416218971e-05, "loss": 2.254, "step": 5793500 }, { "epoch": 16.77, "learning_rate": 4.161742196183772e-05, "loss": 2.2258, "step": 5794000 }, { "epoch": 16.77, "learning_rate": 4.1616698314190445e-05, "loss": 2.2349, "step": 5794500 }, { "epoch": 16.77, "learning_rate": 4.161597466654317e-05, "loss": 2.2383, "step": 5795000 }, { "epoch": 16.78, "learning_rate": 4.161525101889589e-05, "loss": 2.2305, "step": 5795500 }, { "epoch": 16.78, "learning_rate": 4.161452737124861e-05, "loss": 2.2364, "step": 5796000 }, { "epoch": 16.78, "learning_rate": 4.1613803723601334e-05, "loss": 2.2383, "step": 5796500 }, { "epoch": 16.78, "learning_rate": 4.1613080075954056e-05, "loss": 2.2558, "step": 5797000 }, { "epoch": 16.78, "learning_rate": 4.161235787560208e-05, "loss": 2.2395, "step": 5797500 }, { "epoch": 16.78, "learning_rate": 4.16116342279548e-05, "loss": 2.2237, "step": 5798000 }, { "epoch": 16.78, "learning_rate": 4.161091058030752e-05, "loss": 2.2406, "step": 5798500 }, { "epoch": 16.79, "learning_rate": 4.1610186932660245e-05, "loss": 2.2415, "step": 5799000 }, { "epoch": 16.79, "learning_rate": 4.160946328501297e-05, "loss": 2.2545, "step": 5799500 }, { "epoch": 16.79, "learning_rate": 4.1608739637365696e-05, "loss": 2.2262, "step": 5800000 }, { "epoch": 16.79, "learning_rate": 4.160801598971842e-05, "loss": 2.2494, "step": 5800500 }, { "epoch": 16.79, "learning_rate": 4.160729234207114e-05, "loss": 2.2401, "step": 5801000 }, { "epoch": 16.79, "learning_rate": 4.160656869442386e-05, "loss": 2.2361, "step": 5801500 }, { "epoch": 16.79, "learning_rate": 4.1605845046776585e-05, "loss": 2.2487, "step": 5802000 }, { "epoch": 16.8, "learning_rate": 4.160512139912931e-05, "loss": 2.2181, "step": 5802500 }, { "epoch": 16.8, "learning_rate": 4.160439775148203e-05, "loss": 2.2287, "step": 5803000 }, { "epoch": 16.8, "learning_rate": 4.160367410383476e-05, "loss": 2.2133, "step": 5803500 }, { "epoch": 16.8, "learning_rate": 4.160295045618748e-05, "loss": 2.2393, "step": 5804000 }, { "epoch": 16.8, "learning_rate": 4.1602228255835496e-05, "loss": 2.2445, "step": 5804500 }, { "epoch": 16.8, "learning_rate": 4.160150460818822e-05, "loss": 2.2198, "step": 5805000 }, { "epoch": 16.8, "learning_rate": 4.160078096054094e-05, "loss": 2.2629, "step": 5805500 }, { "epoch": 16.81, "learning_rate": 4.160005731289366e-05, "loss": 2.2234, "step": 5806000 }, { "epoch": 16.81, "learning_rate": 4.1599333665246385e-05, "loss": 2.2328, "step": 5806500 }, { "epoch": 16.81, "learning_rate": 4.159861146489441e-05, "loss": 2.2305, "step": 5807000 }, { "epoch": 16.81, "learning_rate": 4.159788781724713e-05, "loss": 2.2328, "step": 5807500 }, { "epoch": 16.81, "learning_rate": 4.159716416959986e-05, "loss": 2.2135, "step": 5808000 }, { "epoch": 16.81, "learning_rate": 4.159644052195258e-05, "loss": 2.2485, "step": 5808500 }, { "epoch": 16.81, "learning_rate": 4.1595716874305303e-05, "loss": 2.2221, "step": 5809000 }, { "epoch": 16.82, "learning_rate": 4.159499467395332e-05, "loss": 2.235, "step": 5809500 }, { "epoch": 16.82, "learning_rate": 4.1594272473601335e-05, "loss": 2.2443, "step": 5810000 }, { "epoch": 16.82, "learning_rate": 4.159354882595406e-05, "loss": 2.2377, "step": 5810500 }, { "epoch": 16.82, "learning_rate": 4.159282517830678e-05, "loss": 2.2207, "step": 5811000 }, { "epoch": 16.82, "learning_rate": 4.159210153065951e-05, "loss": 2.2416, "step": 5811500 }, { "epoch": 16.82, "learning_rate": 4.159137788301223e-05, "loss": 2.2351, "step": 5812000 }, { "epoch": 16.82, "learning_rate": 4.159065423536495e-05, "loss": 2.2214, "step": 5812500 }, { "epoch": 16.83, "learning_rate": 4.1589930587717675e-05, "loss": 2.2409, "step": 5813000 }, { "epoch": 16.83, "learning_rate": 4.158920838736569e-05, "loss": 2.2329, "step": 5813500 }, { "epoch": 16.83, "learning_rate": 4.158848473971841e-05, "loss": 2.2352, "step": 5814000 }, { "epoch": 16.83, "learning_rate": 4.1587761092071135e-05, "loss": 2.224, "step": 5814500 }, { "epoch": 16.83, "learning_rate": 4.1587037444423864e-05, "loss": 2.2227, "step": 5815000 }, { "epoch": 16.83, "learning_rate": 4.1586313796776586e-05, "loss": 2.2679, "step": 5815500 }, { "epoch": 16.83, "learning_rate": 4.158559159642461e-05, "loss": 2.2454, "step": 5816000 }, { "epoch": 16.84, "learning_rate": 4.158486794877733e-05, "loss": 2.2545, "step": 5816500 }, { "epoch": 16.84, "learning_rate": 4.158414430113005e-05, "loss": 2.2233, "step": 5817000 }, { "epoch": 16.84, "learning_rate": 4.158342210077807e-05, "loss": 2.2413, "step": 5817500 }, { "epoch": 16.84, "learning_rate": 4.158269845313079e-05, "loss": 2.2272, "step": 5818000 }, { "epoch": 16.84, "learning_rate": 4.158197480548351e-05, "loss": 2.2422, "step": 5818500 }, { "epoch": 16.84, "learning_rate": 4.1581251157836235e-05, "loss": 2.2477, "step": 5819000 }, { "epoch": 16.85, "learning_rate": 4.158052751018896e-05, "loss": 2.225, "step": 5819500 }, { "epoch": 16.85, "learning_rate": 4.1579803862541686e-05, "loss": 2.2269, "step": 5820000 }, { "epoch": 16.85, "learning_rate": 4.15790816621897e-05, "loss": 2.2428, "step": 5820500 }, { "epoch": 16.85, "learning_rate": 4.1578358014542424e-05, "loss": 2.2336, "step": 5821000 }, { "epoch": 16.85, "learning_rate": 4.1577634366895146e-05, "loss": 2.2318, "step": 5821500 }, { "epoch": 16.85, "learning_rate": 4.157691071924787e-05, "loss": 2.2279, "step": 5822000 }, { "epoch": 16.85, "learning_rate": 4.15761870716006e-05, "loss": 2.2411, "step": 5822500 }, { "epoch": 16.86, "learning_rate": 4.157546342395332e-05, "loss": 2.2399, "step": 5823000 }, { "epoch": 16.86, "learning_rate": 4.157473977630604e-05, "loss": 2.2381, "step": 5823500 }, { "epoch": 16.86, "learning_rate": 4.1574016128658764e-05, "loss": 2.2219, "step": 5824000 }, { "epoch": 16.86, "learning_rate": 4.1573292481011486e-05, "loss": 2.2758, "step": 5824500 }, { "epoch": 16.86, "learning_rate": 4.157256883336421e-05, "loss": 2.2148, "step": 5825000 }, { "epoch": 16.86, "learning_rate": 4.157184518571694e-05, "loss": 2.2355, "step": 5825500 }, { "epoch": 16.86, "learning_rate": 4.157112153806966e-05, "loss": 2.2321, "step": 5826000 }, { "epoch": 16.87, "learning_rate": 4.157039789042238e-05, "loss": 2.2578, "step": 5826500 }, { "epoch": 16.87, "learning_rate": 4.1569674242775104e-05, "loss": 2.253, "step": 5827000 }, { "epoch": 16.87, "learning_rate": 4.156895204242312e-05, "loss": 2.2441, "step": 5827500 }, { "epoch": 16.87, "learning_rate": 4.1568229842071136e-05, "loss": 2.2211, "step": 5828000 }, { "epoch": 16.87, "learning_rate": 4.156750619442386e-05, "loss": 2.2403, "step": 5828500 }, { "epoch": 16.87, "learning_rate": 4.156678254677659e-05, "loss": 2.2379, "step": 5829000 }, { "epoch": 16.87, "learning_rate": 4.156605889912931e-05, "loss": 2.2092, "step": 5829500 }, { "epoch": 16.88, "learning_rate": 4.156533525148204e-05, "loss": 2.2415, "step": 5830000 }, { "epoch": 16.88, "learning_rate": 4.156461160383476e-05, "loss": 2.2189, "step": 5830500 }, { "epoch": 16.88, "learning_rate": 4.156388795618748e-05, "loss": 2.2667, "step": 5831000 }, { "epoch": 16.88, "learning_rate": 4.1563164308540205e-05, "loss": 2.2317, "step": 5831500 }, { "epoch": 16.88, "learning_rate": 4.156244066089293e-05, "loss": 2.2311, "step": 5832000 }, { "epoch": 16.88, "learning_rate": 4.156171846054094e-05, "loss": 2.2296, "step": 5832500 }, { "epoch": 16.88, "learning_rate": 4.1560994812893665e-05, "loss": 2.2225, "step": 5833000 }, { "epoch": 16.89, "learning_rate": 4.156027116524639e-05, "loss": 2.2423, "step": 5833500 }, { "epoch": 16.89, "learning_rate": 4.155954751759911e-05, "loss": 2.236, "step": 5834000 }, { "epoch": 16.89, "learning_rate": 4.155882531724713e-05, "loss": 2.2155, "step": 5834500 }, { "epoch": 16.89, "learning_rate": 4.1558101669599854e-05, "loss": 2.2543, "step": 5835000 }, { "epoch": 16.89, "learning_rate": 4.1557378021952576e-05, "loss": 2.2134, "step": 5835500 }, { "epoch": 16.89, "learning_rate": 4.155665582160059e-05, "loss": 2.2532, "step": 5836000 }, { "epoch": 16.89, "learning_rate": 4.1555932173953314e-05, "loss": 2.2303, "step": 5836500 }, { "epoch": 16.9, "learning_rate": 4.1555208526306036e-05, "loss": 2.2281, "step": 5837000 }, { "epoch": 16.9, "learning_rate": 4.1554484878658765e-05, "loss": 2.2308, "step": 5837500 }, { "epoch": 16.9, "learning_rate": 4.155376267830679e-05, "loss": 2.2503, "step": 5838000 }, { "epoch": 16.9, "learning_rate": 4.155303903065951e-05, "loss": 2.2142, "step": 5838500 }, { "epoch": 16.9, "learning_rate": 4.155231538301223e-05, "loss": 2.2159, "step": 5839000 }, { "epoch": 16.9, "learning_rate": 4.1551591735364954e-05, "loss": 2.2449, "step": 5839500 }, { "epoch": 16.9, "learning_rate": 4.1550868087717676e-05, "loss": 2.261, "step": 5840000 }, { "epoch": 16.91, "learning_rate": 4.15501444400704e-05, "loss": 2.2489, "step": 5840500 }, { "epoch": 16.91, "learning_rate": 4.154942079242312e-05, "loss": 2.2154, "step": 5841000 }, { "epoch": 16.91, "learning_rate": 4.1548698592071136e-05, "loss": 2.2447, "step": 5841500 }, { "epoch": 16.91, "learning_rate": 4.1547974944423865e-05, "loss": 2.2391, "step": 5842000 }, { "epoch": 16.91, "learning_rate": 4.154725129677659e-05, "loss": 2.2333, "step": 5842500 }, { "epoch": 16.91, "learning_rate": 4.154652764912931e-05, "loss": 2.2209, "step": 5843000 }, { "epoch": 16.91, "learning_rate": 4.154580400148203e-05, "loss": 2.2522, "step": 5843500 }, { "epoch": 16.92, "learning_rate": 4.154508180113005e-05, "loss": 2.2379, "step": 5844000 }, { "epoch": 16.92, "learning_rate": 4.154435815348277e-05, "loss": 2.222, "step": 5844500 }, { "epoch": 16.92, "learning_rate": 4.15436345058355e-05, "loss": 2.2342, "step": 5845000 }, { "epoch": 16.92, "learning_rate": 4.154291085818822e-05, "loss": 2.2183, "step": 5845500 }, { "epoch": 16.92, "learning_rate": 4.154218721054094e-05, "loss": 2.2462, "step": 5846000 }, { "epoch": 16.92, "learning_rate": 4.1541465010188966e-05, "loss": 2.222, "step": 5846500 }, { "epoch": 16.92, "learning_rate": 4.154074136254169e-05, "loss": 2.2497, "step": 5847000 }, { "epoch": 16.93, "learning_rate": 4.154001771489441e-05, "loss": 2.2047, "step": 5847500 }, { "epoch": 16.93, "learning_rate": 4.153929406724713e-05, "loss": 2.2544, "step": 5848000 }, { "epoch": 16.93, "learning_rate": 4.153857186689515e-05, "loss": 2.2127, "step": 5848500 }, { "epoch": 16.93, "learning_rate": 4.153784821924787e-05, "loss": 2.2232, "step": 5849000 }, { "epoch": 16.93, "learning_rate": 4.153712457160059e-05, "loss": 2.2222, "step": 5849500 }, { "epoch": 16.93, "learning_rate": 4.1536400923953315e-05, "loss": 2.2445, "step": 5850000 }, { "epoch": 16.93, "learning_rate": 4.153567727630604e-05, "loss": 2.2141, "step": 5850500 }, { "epoch": 16.94, "learning_rate": 4.1534953628658766e-05, "loss": 2.2186, "step": 5851000 }, { "epoch": 16.94, "learning_rate": 4.153422998101149e-05, "loss": 2.2291, "step": 5851500 }, { "epoch": 16.94, "learning_rate": 4.153350633336421e-05, "loss": 2.248, "step": 5852000 }, { "epoch": 16.94, "learning_rate": 4.153278268571694e-05, "loss": 2.236, "step": 5852500 }, { "epoch": 16.94, "learning_rate": 4.153205903806966e-05, "loss": 2.2105, "step": 5853000 }, { "epoch": 16.94, "learning_rate": 4.1531335390422384e-05, "loss": 2.2426, "step": 5853500 }, { "epoch": 16.94, "learning_rate": 4.1530611742775106e-05, "loss": 2.2113, "step": 5854000 }, { "epoch": 16.95, "learning_rate": 4.152988809512783e-05, "loss": 2.2641, "step": 5854500 }, { "epoch": 16.95, "learning_rate": 4.152916444748055e-05, "loss": 2.2335, "step": 5855000 }, { "epoch": 16.95, "learning_rate": 4.1528442247128566e-05, "loss": 2.2452, "step": 5855500 }, { "epoch": 16.95, "learning_rate": 4.152771859948129e-05, "loss": 2.2259, "step": 5856000 }, { "epoch": 16.95, "learning_rate": 4.152699639912931e-05, "loss": 2.2285, "step": 5856500 }, { "epoch": 16.95, "learning_rate": 4.152627275148203e-05, "loss": 2.2446, "step": 5857000 }, { "epoch": 16.96, "learning_rate": 4.152555055113005e-05, "loss": 2.2468, "step": 5857500 }, { "epoch": 16.96, "learning_rate": 4.152482690348277e-05, "loss": 2.2476, "step": 5858000 }, { "epoch": 16.96, "learning_rate": 4.152410325583549e-05, "loss": 2.2425, "step": 5858500 }, { "epoch": 16.96, "learning_rate": 4.1523379608188215e-05, "loss": 2.2513, "step": 5859000 }, { "epoch": 16.96, "learning_rate": 4.152265596054094e-05, "loss": 2.2321, "step": 5859500 }, { "epoch": 16.96, "learning_rate": 4.1521932312893666e-05, "loss": 2.2287, "step": 5860000 }, { "epoch": 16.96, "learning_rate": 4.152120866524639e-05, "loss": 2.2262, "step": 5860500 }, { "epoch": 16.97, "learning_rate": 4.152048501759912e-05, "loss": 2.2664, "step": 5861000 }, { "epoch": 16.97, "learning_rate": 4.151976136995184e-05, "loss": 2.2249, "step": 5861500 }, { "epoch": 16.97, "learning_rate": 4.151903772230456e-05, "loss": 2.222, "step": 5862000 }, { "epoch": 16.97, "learning_rate": 4.1518314074657284e-05, "loss": 2.2338, "step": 5862500 }, { "epoch": 16.97, "learning_rate": 4.15175918743053e-05, "loss": 2.2427, "step": 5863000 }, { "epoch": 16.97, "learning_rate": 4.151686822665802e-05, "loss": 2.2541, "step": 5863500 }, { "epoch": 16.97, "learning_rate": 4.1516144579010744e-05, "loss": 2.2571, "step": 5864000 }, { "epoch": 16.98, "learning_rate": 4.1515420931363467e-05, "loss": 2.233, "step": 5864500 }, { "epoch": 16.98, "learning_rate": 4.151469873101149e-05, "loss": 2.221, "step": 5865000 }, { "epoch": 16.98, "learning_rate": 4.151397508336421e-05, "loss": 2.2408, "step": 5865500 }, { "epoch": 16.98, "learning_rate": 4.151325143571693e-05, "loss": 2.2213, "step": 5866000 }, { "epoch": 16.98, "learning_rate": 4.1512527788069656e-05, "loss": 2.2391, "step": 5866500 }, { "epoch": 16.98, "learning_rate": 4.1511804140422385e-05, "loss": 2.2071, "step": 5867000 }, { "epoch": 16.98, "learning_rate": 4.15110819400704e-05, "loss": 2.2323, "step": 5867500 }, { "epoch": 16.99, "learning_rate": 4.1510359739718416e-05, "loss": 2.2054, "step": 5868000 }, { "epoch": 16.99, "learning_rate": 4.1509636092071145e-05, "loss": 2.2437, "step": 5868500 }, { "epoch": 16.99, "learning_rate": 4.150891244442387e-05, "loss": 2.2286, "step": 5869000 }, { "epoch": 16.99, "learning_rate": 4.150818879677659e-05, "loss": 2.2207, "step": 5869500 }, { "epoch": 16.99, "learning_rate": 4.150746514912931e-05, "loss": 2.2348, "step": 5870000 }, { "epoch": 16.99, "learning_rate": 4.1506741501482034e-05, "loss": 2.2055, "step": 5870500 }, { "epoch": 16.99, "learning_rate": 4.1506017853834756e-05, "loss": 2.2378, "step": 5871000 }, { "epoch": 17.0, "learning_rate": 4.150529420618748e-05, "loss": 2.2333, "step": 5871500 }, { "epoch": 17.0, "learning_rate": 4.1504572005835494e-05, "loss": 2.2233, "step": 5872000 }, { "epoch": 17.0, "learning_rate": 4.1503848358188216e-05, "loss": 2.2372, "step": 5872500 }, { "epoch": 17.0, "learning_rate": 4.1503124710540945e-05, "loss": 2.231, "step": 5873000 }, { "epoch": 17.0, "eval_accuracy": 0.6563087216406899, "eval_accuracy_mlm": 0.6194239431791272, "eval_accuracy_nsp": 0.8540443552267947, "eval_loss": 2.2509899139404297, "eval_runtime": 330.669, "eval_samples_per_second": 1319.707, "eval_steps_per_second": 54.989, "step": 5873024 }, { "epoch": 17.0, "learning_rate": 4.150240106289367e-05, "loss": 2.2432, "step": 5873500 }, { "epoch": 17.0, "learning_rate": 4.150167886254168e-05, "loss": 2.2048, "step": 5874000 }, { "epoch": 17.0, "learning_rate": 4.1500955214894405e-05, "loss": 2.2046, "step": 5874500 }, { "epoch": 17.01, "learning_rate": 4.1500231567247134e-05, "loss": 2.2007, "step": 5875000 }, { "epoch": 17.01, "learning_rate": 4.1499507919599856e-05, "loss": 2.1957, "step": 5875500 }, { "epoch": 17.01, "learning_rate": 4.149878427195258e-05, "loss": 2.2181, "step": 5876000 }, { "epoch": 17.01, "learning_rate": 4.14980606243053e-05, "loss": 2.2094, "step": 5876500 }, { "epoch": 17.01, "learning_rate": 4.149733697665802e-05, "loss": 2.2268, "step": 5877000 }, { "epoch": 17.01, "learning_rate": 4.1496613329010745e-05, "loss": 2.2206, "step": 5877500 }, { "epoch": 17.01, "learning_rate": 4.149588968136347e-05, "loss": 2.2188, "step": 5878000 }, { "epoch": 17.02, "learning_rate": 4.149516748101149e-05, "loss": 2.1926, "step": 5878500 }, { "epoch": 17.02, "learning_rate": 4.149444383336421e-05, "loss": 2.2231, "step": 5879000 }, { "epoch": 17.02, "learning_rate": 4.1493720185716934e-05, "loss": 2.2437, "step": 5879500 }, { "epoch": 17.02, "learning_rate": 4.1492996538069656e-05, "loss": 2.2163, "step": 5880000 }, { "epoch": 17.02, "learning_rate": 4.149227289042238e-05, "loss": 2.2195, "step": 5880500 }, { "epoch": 17.02, "learning_rate": 4.14915492427751e-05, "loss": 2.2178, "step": 5881000 }, { "epoch": 17.02, "learning_rate": 4.1490827042423116e-05, "loss": 2.2021, "step": 5881500 }, { "epoch": 17.03, "learning_rate": 4.1490103394775845e-05, "loss": 2.198, "step": 5882000 }, { "epoch": 17.03, "learning_rate": 4.148937974712857e-05, "loss": 2.2219, "step": 5882500 }, { "epoch": 17.03, "learning_rate": 4.14886560994813e-05, "loss": 2.2026, "step": 5883000 }, { "epoch": 17.03, "learning_rate": 4.148793245183402e-05, "loss": 2.1919, "step": 5883500 }, { "epoch": 17.03, "learning_rate": 4.148720880418674e-05, "loss": 2.2059, "step": 5884000 }, { "epoch": 17.03, "learning_rate": 4.148648515653946e-05, "loss": 2.2266, "step": 5884500 }, { "epoch": 17.03, "learning_rate": 4.1485761508892186e-05, "loss": 2.1901, "step": 5885000 }, { "epoch": 17.04, "learning_rate": 4.148503786124491e-05, "loss": 2.2351, "step": 5885500 }, { "epoch": 17.04, "learning_rate": 4.148431421359763e-05, "loss": 2.2107, "step": 5886000 }, { "epoch": 17.04, "learning_rate": 4.1483592013245646e-05, "loss": 2.1958, "step": 5886500 }, { "epoch": 17.04, "learning_rate": 4.148286836559837e-05, "loss": 2.2222, "step": 5887000 }, { "epoch": 17.04, "learning_rate": 4.14821447179511e-05, "loss": 2.2089, "step": 5887500 }, { "epoch": 17.04, "learning_rate": 4.148142107030382e-05, "loss": 2.2168, "step": 5888000 }, { "epoch": 17.04, "learning_rate": 4.148069742265654e-05, "loss": 2.2349, "step": 5888500 }, { "epoch": 17.05, "learning_rate": 4.147997522230456e-05, "loss": 2.2073, "step": 5889000 }, { "epoch": 17.05, "learning_rate": 4.1479251574657286e-05, "loss": 2.2031, "step": 5889500 }, { "epoch": 17.05, "learning_rate": 4.147852792701001e-05, "loss": 2.2241, "step": 5890000 }, { "epoch": 17.05, "learning_rate": 4.147780427936273e-05, "loss": 2.2123, "step": 5890500 }, { "epoch": 17.05, "learning_rate": 4.1477082079010746e-05, "loss": 2.2164, "step": 5891000 }, { "epoch": 17.05, "learning_rate": 4.147635843136347e-05, "loss": 2.215, "step": 5891500 }, { "epoch": 17.05, "learning_rate": 4.14756347837162e-05, "loss": 2.2231, "step": 5892000 }, { "epoch": 17.06, "learning_rate": 4.147491258336421e-05, "loss": 2.2469, "step": 5892500 }, { "epoch": 17.06, "learning_rate": 4.1474188935716935e-05, "loss": 2.2116, "step": 5893000 }, { "epoch": 17.06, "learning_rate": 4.147346528806966e-05, "loss": 2.1982, "step": 5893500 }, { "epoch": 17.06, "learning_rate": 4.147274164042238e-05, "loss": 2.2192, "step": 5894000 }, { "epoch": 17.06, "learning_rate": 4.14720179927751e-05, "loss": 2.2086, "step": 5894500 }, { "epoch": 17.06, "learning_rate": 4.147129723971842e-05, "loss": 2.2327, "step": 5895000 }, { "epoch": 17.07, "learning_rate": 4.147057359207114e-05, "loss": 2.223, "step": 5895500 }, { "epoch": 17.07, "learning_rate": 4.146984994442386e-05, "loss": 2.2192, "step": 5896000 }, { "epoch": 17.07, "learning_rate": 4.1469126296776584e-05, "loss": 2.2157, "step": 5896500 }, { "epoch": 17.07, "learning_rate": 4.1468402649129306e-05, "loss": 2.2177, "step": 5897000 }, { "epoch": 17.07, "learning_rate": 4.1467679001482035e-05, "loss": 2.2244, "step": 5897500 }, { "epoch": 17.07, "learning_rate": 4.146695535383476e-05, "loss": 2.2085, "step": 5898000 }, { "epoch": 17.07, "learning_rate": 4.146623170618748e-05, "loss": 2.2109, "step": 5898500 }, { "epoch": 17.08, "learning_rate": 4.14655080585402e-05, "loss": 2.2346, "step": 5899000 }, { "epoch": 17.08, "learning_rate": 4.1464785858188224e-05, "loss": 2.2155, "step": 5899500 }, { "epoch": 17.08, "learning_rate": 4.1464062210540946e-05, "loss": 2.2064, "step": 5900000 }, { "epoch": 17.08, "learning_rate": 4.146333856289367e-05, "loss": 2.2143, "step": 5900500 }, { "epoch": 17.08, "learning_rate": 4.146261491524639e-05, "loss": 2.2289, "step": 5901000 }, { "epoch": 17.08, "learning_rate": 4.1461892714894407e-05, "loss": 2.2226, "step": 5901500 }, { "epoch": 17.08, "learning_rate": 4.146116906724713e-05, "loss": 2.2197, "step": 5902000 }, { "epoch": 17.09, "learning_rate": 4.1460446866895144e-05, "loss": 2.2094, "step": 5902500 }, { "epoch": 17.09, "learning_rate": 4.145972466654317e-05, "loss": 2.2335, "step": 5903000 }, { "epoch": 17.09, "learning_rate": 4.145900101889589e-05, "loss": 2.2047, "step": 5903500 }, { "epoch": 17.09, "learning_rate": 4.1458278818543904e-05, "loss": 2.2183, "step": 5904000 }, { "epoch": 17.09, "learning_rate": 4.145755517089663e-05, "loss": 2.219, "step": 5904500 }, { "epoch": 17.09, "learning_rate": 4.145683152324935e-05, "loss": 2.2002, "step": 5905000 }, { "epoch": 17.09, "learning_rate": 4.145610787560208e-05, "loss": 2.2295, "step": 5905500 }, { "epoch": 17.1, "learning_rate": 4.14553842279548e-05, "loss": 2.2174, "step": 5906000 }, { "epoch": 17.1, "learning_rate": 4.145466058030752e-05, "loss": 2.2399, "step": 5906500 }, { "epoch": 17.1, "learning_rate": 4.1453936932660245e-05, "loss": 2.207, "step": 5907000 }, { "epoch": 17.1, "learning_rate": 4.1453213285012974e-05, "loss": 2.2205, "step": 5907500 }, { "epoch": 17.1, "learning_rate": 4.1452489637365696e-05, "loss": 2.223, "step": 5908000 }, { "epoch": 17.1, "learning_rate": 4.145176598971842e-05, "loss": 2.1971, "step": 5908500 }, { "epoch": 17.1, "learning_rate": 4.145104234207114e-05, "loss": 2.215, "step": 5909000 }, { "epoch": 17.11, "learning_rate": 4.145031869442386e-05, "loss": 2.2072, "step": 5909500 }, { "epoch": 17.11, "learning_rate": 4.1449595046776585e-05, "loss": 2.2047, "step": 5910000 }, { "epoch": 17.11, "learning_rate": 4.144887139912931e-05, "loss": 2.2288, "step": 5910500 }, { "epoch": 17.11, "learning_rate": 4.144814775148203e-05, "loss": 2.2494, "step": 5911000 }, { "epoch": 17.11, "learning_rate": 4.144742410383475e-05, "loss": 2.2137, "step": 5911500 }, { "epoch": 17.11, "learning_rate": 4.1446700456187474e-05, "loss": 2.2265, "step": 5912000 }, { "epoch": 17.11, "learning_rate": 4.14459768085402e-05, "loss": 2.2262, "step": 5912500 }, { "epoch": 17.12, "learning_rate": 4.1445253160892925e-05, "loss": 2.2159, "step": 5913000 }, { "epoch": 17.12, "learning_rate": 4.144452951324565e-05, "loss": 2.2215, "step": 5913500 }, { "epoch": 17.12, "learning_rate": 4.1443805865598376e-05, "loss": 2.2228, "step": 5914000 }, { "epoch": 17.12, "learning_rate": 4.14430822179511e-05, "loss": 2.2173, "step": 5914500 }, { "epoch": 17.12, "learning_rate": 4.144235857030382e-05, "loss": 2.1959, "step": 5915000 }, { "epoch": 17.12, "learning_rate": 4.144163492265654e-05, "loss": 2.2273, "step": 5915500 }, { "epoch": 17.12, "learning_rate": 4.1440911275009265e-05, "loss": 2.2099, "step": 5916000 }, { "epoch": 17.13, "learning_rate": 4.144018762736199e-05, "loss": 2.2272, "step": 5916500 }, { "epoch": 17.13, "learning_rate": 4.143946397971471e-05, "loss": 2.2301, "step": 5917000 }, { "epoch": 17.13, "learning_rate": 4.143874033206743e-05, "loss": 2.2488, "step": 5917500 }, { "epoch": 17.13, "learning_rate": 4.1438016684420154e-05, "loss": 2.2151, "step": 5918000 }, { "epoch": 17.13, "learning_rate": 4.1437293036772876e-05, "loss": 2.2305, "step": 5918500 }, { "epoch": 17.13, "learning_rate": 4.14365708364209e-05, "loss": 2.2166, "step": 5919000 }, { "epoch": 17.13, "learning_rate": 4.143585008336421e-05, "loss": 2.2412, "step": 5919500 }, { "epoch": 17.14, "learning_rate": 4.1435126435716937e-05, "loss": 2.2195, "step": 5920000 }, { "epoch": 17.14, "learning_rate": 4.143440278806966e-05, "loss": 2.2218, "step": 5920500 }, { "epoch": 17.14, "learning_rate": 4.1433680587717674e-05, "loss": 2.2022, "step": 5921000 }, { "epoch": 17.14, "learning_rate": 4.1432956940070397e-05, "loss": 2.2195, "step": 5921500 }, { "epoch": 17.14, "learning_rate": 4.1432233292423126e-05, "loss": 2.207, "step": 5922000 }, { "epoch": 17.14, "learning_rate": 4.143150964477585e-05, "loss": 2.2218, "step": 5922500 }, { "epoch": 17.14, "learning_rate": 4.143078599712857e-05, "loss": 2.2182, "step": 5923000 }, { "epoch": 17.15, "learning_rate": 4.143006234948129e-05, "loss": 2.2339, "step": 5923500 }, { "epoch": 17.15, "learning_rate": 4.1429338701834014e-05, "loss": 2.2244, "step": 5924000 }, { "epoch": 17.15, "learning_rate": 4.142861505418674e-05, "loss": 2.1978, "step": 5924500 }, { "epoch": 17.15, "learning_rate": 4.142789140653946e-05, "loss": 2.2086, "step": 5925000 }, { "epoch": 17.15, "learning_rate": 4.142716775889218e-05, "loss": 2.2234, "step": 5925500 }, { "epoch": 17.15, "learning_rate": 4.1426444111244903e-05, "loss": 2.23, "step": 5926000 }, { "epoch": 17.15, "learning_rate": 4.1425720463597626e-05, "loss": 2.205, "step": 5926500 }, { "epoch": 17.16, "learning_rate": 4.1424996815950355e-05, "loss": 2.2129, "step": 5927000 }, { "epoch": 17.16, "learning_rate": 4.142427316830308e-05, "loss": 2.2143, "step": 5927500 }, { "epoch": 17.16, "learning_rate": 4.14235509679511e-05, "loss": 2.2141, "step": 5928000 }, { "epoch": 17.16, "learning_rate": 4.142282732030382e-05, "loss": 2.2295, "step": 5928500 }, { "epoch": 17.16, "learning_rate": 4.142210511995184e-05, "loss": 2.2348, "step": 5929000 }, { "epoch": 17.16, "learning_rate": 4.142138147230456e-05, "loss": 2.2171, "step": 5929500 }, { "epoch": 17.16, "learning_rate": 4.142065782465728e-05, "loss": 2.1984, "step": 5930000 }, { "epoch": 17.17, "learning_rate": 4.1419934177010004e-05, "loss": 2.2053, "step": 5930500 }, { "epoch": 17.17, "learning_rate": 4.1419210529362726e-05, "loss": 2.2126, "step": 5931000 }, { "epoch": 17.17, "learning_rate": 4.141848688171545e-05, "loss": 2.2197, "step": 5931500 }, { "epoch": 17.17, "learning_rate": 4.141776323406818e-05, "loss": 2.2128, "step": 5932000 }, { "epoch": 17.17, "learning_rate": 4.14170395864209e-05, "loss": 2.2148, "step": 5932500 }, { "epoch": 17.17, "learning_rate": 4.141631593877362e-05, "loss": 2.2113, "step": 5933000 }, { "epoch": 17.18, "learning_rate": 4.141559518571693e-05, "loss": 2.218, "step": 5933500 }, { "epoch": 17.18, "learning_rate": 4.141487153806965e-05, "loss": 2.2406, "step": 5934000 }, { "epoch": 17.18, "learning_rate": 4.141414789042238e-05, "loss": 2.2155, "step": 5934500 }, { "epoch": 17.18, "learning_rate": 4.1413424242775104e-05, "loss": 2.209, "step": 5935000 }, { "epoch": 17.18, "learning_rate": 4.1412700595127826e-05, "loss": 2.2397, "step": 5935500 }, { "epoch": 17.18, "learning_rate": 4.1411976947480555e-05, "loss": 2.2202, "step": 5936000 }, { "epoch": 17.18, "learning_rate": 4.141125329983328e-05, "loss": 2.2082, "step": 5936500 }, { "epoch": 17.19, "learning_rate": 4.1410529652186e-05, "loss": 2.1924, "step": 5937000 }, { "epoch": 17.19, "learning_rate": 4.140980600453872e-05, "loss": 2.1974, "step": 5937500 }, { "epoch": 17.19, "learning_rate": 4.1409082356891444e-05, "loss": 2.239, "step": 5938000 }, { "epoch": 17.19, "learning_rate": 4.140836015653946e-05, "loss": 2.1973, "step": 5938500 }, { "epoch": 17.19, "learning_rate": 4.140763650889218e-05, "loss": 2.2194, "step": 5939000 }, { "epoch": 17.19, "learning_rate": 4.1406912861244904e-05, "loss": 2.2259, "step": 5939500 }, { "epoch": 17.19, "learning_rate": 4.1406189213597626e-05, "loss": 2.2389, "step": 5940000 }, { "epoch": 17.2, "learning_rate": 4.140546701324565e-05, "loss": 2.2197, "step": 5940500 }, { "epoch": 17.2, "learning_rate": 4.140474336559837e-05, "loss": 2.2306, "step": 5941000 }, { "epoch": 17.2, "learning_rate": 4.140401971795109e-05, "loss": 2.2155, "step": 5941500 }, { "epoch": 17.2, "learning_rate": 4.140329751759911e-05, "loss": 2.2033, "step": 5942000 }, { "epoch": 17.2, "learning_rate": 4.140257386995184e-05, "loss": 2.2075, "step": 5942500 }, { "epoch": 17.2, "learning_rate": 4.140185022230456e-05, "loss": 2.1984, "step": 5943000 }, { "epoch": 17.2, "learning_rate": 4.140112657465728e-05, "loss": 2.1966, "step": 5943500 }, { "epoch": 17.21, "learning_rate": 4.1400402927010005e-05, "loss": 2.2212, "step": 5944000 }, { "epoch": 17.21, "learning_rate": 4.139967927936273e-05, "loss": 2.2123, "step": 5944500 }, { "epoch": 17.21, "learning_rate": 4.1398955631715456e-05, "loss": 2.2164, "step": 5945000 }, { "epoch": 17.21, "learning_rate": 4.139823198406818e-05, "loss": 2.2303, "step": 5945500 }, { "epoch": 17.21, "learning_rate": 4.13975083364209e-05, "loss": 2.2319, "step": 5946000 }, { "epoch": 17.21, "learning_rate": 4.139678468877362e-05, "loss": 2.1922, "step": 5946500 }, { "epoch": 17.21, "learning_rate": 4.139606248842164e-05, "loss": 2.2289, "step": 5947000 }, { "epoch": 17.22, "learning_rate": 4.139533884077436e-05, "loss": 2.2193, "step": 5947500 }, { "epoch": 17.22, "learning_rate": 4.139461519312708e-05, "loss": 2.2099, "step": 5948000 }, { "epoch": 17.22, "learning_rate": 4.1393891545479805e-05, "loss": 2.2326, "step": 5948500 }, { "epoch": 17.22, "learning_rate": 4.139316789783253e-05, "loss": 2.2233, "step": 5949000 }, { "epoch": 17.22, "learning_rate": 4.1392444250185256e-05, "loss": 2.2409, "step": 5949500 }, { "epoch": 17.22, "learning_rate": 4.139172060253798e-05, "loss": 2.2376, "step": 5950000 }, { "epoch": 17.22, "learning_rate": 4.139099695489071e-05, "loss": 2.1995, "step": 5950500 }, { "epoch": 17.23, "learning_rate": 4.139027475453872e-05, "loss": 2.2243, "step": 5951000 }, { "epoch": 17.23, "learning_rate": 4.1389551106891445e-05, "loss": 2.2207, "step": 5951500 }, { "epoch": 17.23, "learning_rate": 4.138882745924417e-05, "loss": 2.2066, "step": 5952000 }, { "epoch": 17.23, "learning_rate": 4.138810381159689e-05, "loss": 2.2117, "step": 5952500 }, { "epoch": 17.23, "learning_rate": 4.138738016394961e-05, "loss": 2.2007, "step": 5953000 }, { "epoch": 17.23, "learning_rate": 4.1386656516302334e-05, "loss": 2.229, "step": 5953500 }, { "epoch": 17.23, "learning_rate": 4.1385932868655056e-05, "loss": 2.1929, "step": 5954000 }, { "epoch": 17.24, "learning_rate": 4.138520922100778e-05, "loss": 2.2269, "step": 5954500 }, { "epoch": 17.24, "learning_rate": 4.1384488467951094e-05, "loss": 2.2037, "step": 5955000 }, { "epoch": 17.24, "learning_rate": 4.1383764820303816e-05, "loss": 2.2114, "step": 5955500 }, { "epoch": 17.24, "learning_rate": 4.138304117265654e-05, "loss": 2.2164, "step": 5956000 }, { "epoch": 17.24, "learning_rate": 4.138231752500926e-05, "loss": 2.2139, "step": 5956500 }, { "epoch": 17.24, "learning_rate": 4.138159532465728e-05, "loss": 2.2319, "step": 5957000 }, { "epoch": 17.24, "learning_rate": 4.1380871677010005e-05, "loss": 2.2393, "step": 5957500 }, { "epoch": 17.25, "learning_rate": 4.138014802936273e-05, "loss": 2.2116, "step": 5958000 }, { "epoch": 17.25, "learning_rate": 4.1379424381715457e-05, "loss": 2.1985, "step": 5958500 }, { "epoch": 17.25, "learning_rate": 4.137870073406818e-05, "loss": 2.2126, "step": 5959000 }, { "epoch": 17.25, "learning_rate": 4.1377978533716194e-05, "loss": 2.2138, "step": 5959500 }, { "epoch": 17.25, "learning_rate": 4.1377254886068917e-05, "loss": 2.2288, "step": 5960000 }, { "epoch": 17.25, "learning_rate": 4.137653268571693e-05, "loss": 2.1827, "step": 5960500 }, { "epoch": 17.25, "learning_rate": 4.1375809038069654e-05, "loss": 2.2089, "step": 5961000 }, { "epoch": 17.26, "learning_rate": 4.137508539042238e-05, "loss": 2.2098, "step": 5961500 }, { "epoch": 17.26, "learning_rate": 4.1374361742775106e-05, "loss": 2.2312, "step": 5962000 }, { "epoch": 17.26, "learning_rate": 4.137363809512783e-05, "loss": 2.2421, "step": 5962500 }, { "epoch": 17.26, "learning_rate": 4.137291444748055e-05, "loss": 2.2033, "step": 5963000 }, { "epoch": 17.26, "learning_rate": 4.137219079983327e-05, "loss": 2.1985, "step": 5963500 }, { "epoch": 17.26, "learning_rate": 4.1371467152185995e-05, "loss": 2.2188, "step": 5964000 }, { "epoch": 17.26, "learning_rate": 4.1370743504538724e-05, "loss": 2.2023, "step": 5964500 }, { "epoch": 17.27, "learning_rate": 4.1370019856891446e-05, "loss": 2.2072, "step": 5965000 }, { "epoch": 17.27, "learning_rate": 4.136929765653946e-05, "loss": 2.206, "step": 5965500 }, { "epoch": 17.27, "learning_rate": 4.1368574008892184e-05, "loss": 2.2288, "step": 5966000 }, { "epoch": 17.27, "learning_rate": 4.1367851808540206e-05, "loss": 2.2124, "step": 5966500 }, { "epoch": 17.27, "learning_rate": 4.136712816089293e-05, "loss": 2.2302, "step": 5967000 }, { "epoch": 17.27, "learning_rate": 4.136640451324565e-05, "loss": 2.2224, "step": 5967500 }, { "epoch": 17.27, "learning_rate": 4.136568086559837e-05, "loss": 2.2095, "step": 5968000 }, { "epoch": 17.28, "learning_rate": 4.136495866524639e-05, "loss": 2.2347, "step": 5968500 }, { "epoch": 17.28, "learning_rate": 4.136423501759911e-05, "loss": 2.2154, "step": 5969000 }, { "epoch": 17.28, "learning_rate": 4.136351136995183e-05, "loss": 2.2199, "step": 5969500 }, { "epoch": 17.28, "learning_rate": 4.1362787722304555e-05, "loss": 2.2188, "step": 5970000 }, { "epoch": 17.28, "learning_rate": 4.136206552195258e-05, "loss": 2.177, "step": 5970500 }, { "epoch": 17.28, "learning_rate": 4.13613418743053e-05, "loss": 2.2213, "step": 5971000 }, { "epoch": 17.29, "learning_rate": 4.136061822665802e-05, "loss": 2.2215, "step": 5971500 }, { "epoch": 17.29, "learning_rate": 4.1359894579010744e-05, "loss": 2.2378, "step": 5972000 }, { "epoch": 17.29, "learning_rate": 4.1359172378658766e-05, "loss": 2.2098, "step": 5972500 }, { "epoch": 17.29, "learning_rate": 4.135844873101149e-05, "loss": 2.2346, "step": 5973000 }, { "epoch": 17.29, "learning_rate": 4.135772508336421e-05, "loss": 2.2305, "step": 5973500 }, { "epoch": 17.29, "learning_rate": 4.135700143571693e-05, "loss": 2.2048, "step": 5974000 }, { "epoch": 17.29, "learning_rate": 4.1356277788069655e-05, "loss": 2.2146, "step": 5974500 }, { "epoch": 17.3, "learning_rate": 4.1355554140422384e-05, "loss": 2.2224, "step": 5975000 }, { "epoch": 17.3, "learning_rate": 4.1354830492775106e-05, "loss": 2.2176, "step": 5975500 }, { "epoch": 17.3, "learning_rate": 4.135410684512783e-05, "loss": 2.2196, "step": 5976000 }, { "epoch": 17.3, "learning_rate": 4.135338319748055e-05, "loss": 2.1781, "step": 5976500 }, { "epoch": 17.3, "learning_rate": 4.135265954983327e-05, "loss": 2.2166, "step": 5977000 }, { "epoch": 17.3, "learning_rate": 4.1351935902185995e-05, "loss": 2.2151, "step": 5977500 }, { "epoch": 17.3, "learning_rate": 4.135121225453872e-05, "loss": 2.2095, "step": 5978000 }, { "epoch": 17.31, "learning_rate": 4.135049005418673e-05, "loss": 2.226, "step": 5978500 }, { "epoch": 17.31, "learning_rate": 4.1349766406539455e-05, "loss": 2.2153, "step": 5979000 }, { "epoch": 17.31, "learning_rate": 4.1349042758892184e-05, "loss": 2.2189, "step": 5979500 }, { "epoch": 17.31, "learning_rate": 4.134832055854021e-05, "loss": 2.1914, "step": 5980000 }, { "epoch": 17.31, "learning_rate": 4.134759691089293e-05, "loss": 2.2224, "step": 5980500 }, { "epoch": 17.31, "learning_rate": 4.134687326324565e-05, "loss": 2.2207, "step": 5981000 }, { "epoch": 17.31, "learning_rate": 4.1346149615598373e-05, "loss": 2.242, "step": 5981500 }, { "epoch": 17.32, "learning_rate": 4.1345425967951096e-05, "loss": 2.2178, "step": 5982000 }, { "epoch": 17.32, "learning_rate": 4.134470232030382e-05, "loss": 2.2047, "step": 5982500 }, { "epoch": 17.32, "learning_rate": 4.134397867265654e-05, "loss": 2.2049, "step": 5983000 }, { "epoch": 17.32, "learning_rate": 4.134325502500926e-05, "loss": 2.2196, "step": 5983500 }, { "epoch": 17.32, "learning_rate": 4.1342531377361985e-05, "loss": 2.2375, "step": 5984000 }, { "epoch": 17.32, "learning_rate": 4.134180772971471e-05, "loss": 2.2045, "step": 5984500 }, { "epoch": 17.32, "learning_rate": 4.1341084082067436e-05, "loss": 2.2109, "step": 5985000 }, { "epoch": 17.33, "learning_rate": 4.134036043442016e-05, "loss": 2.2308, "step": 5985500 }, { "epoch": 17.33, "learning_rate": 4.133963678677288e-05, "loss": 2.2214, "step": 5986000 }, { "epoch": 17.33, "learning_rate": 4.133891313912561e-05, "loss": 2.2142, "step": 5986500 }, { "epoch": 17.33, "learning_rate": 4.133818949147833e-05, "loss": 2.2088, "step": 5987000 }, { "epoch": 17.33, "learning_rate": 4.133746729112635e-05, "loss": 2.2259, "step": 5987500 }, { "epoch": 17.33, "learning_rate": 4.133674364347907e-05, "loss": 2.2281, "step": 5988000 }, { "epoch": 17.33, "learning_rate": 4.133601999583179e-05, "loss": 2.2221, "step": 5988500 }, { "epoch": 17.34, "learning_rate": 4.1335296348184514e-05, "loss": 2.2335, "step": 5989000 }, { "epoch": 17.34, "learning_rate": 4.1334572700537236e-05, "loss": 2.2249, "step": 5989500 }, { "epoch": 17.34, "learning_rate": 4.133384905288996e-05, "loss": 2.2307, "step": 5990000 }, { "epoch": 17.34, "learning_rate": 4.133312685253798e-05, "loss": 2.2204, "step": 5990500 }, { "epoch": 17.34, "learning_rate": 4.13324032048907e-05, "loss": 2.2362, "step": 5991000 }, { "epoch": 17.34, "learning_rate": 4.1331679557243425e-05, "loss": 2.2133, "step": 5991500 }, { "epoch": 17.34, "learning_rate": 4.133095590959615e-05, "loss": 2.225, "step": 5992000 }, { "epoch": 17.35, "learning_rate": 4.133023226194887e-05, "loss": 2.2128, "step": 5992500 }, { "epoch": 17.35, "learning_rate": 4.132950861430159e-05, "loss": 2.2247, "step": 5993000 }, { "epoch": 17.35, "learning_rate": 4.1328784966654314e-05, "loss": 2.2085, "step": 5993500 }, { "epoch": 17.35, "learning_rate": 4.132806131900704e-05, "loss": 2.2409, "step": 5994000 }, { "epoch": 17.35, "learning_rate": 4.1327337671359765e-05, "loss": 2.2208, "step": 5994500 }, { "epoch": 17.35, "learning_rate": 4.132661402371249e-05, "loss": 2.2177, "step": 5995000 }, { "epoch": 17.35, "learning_rate": 4.132589037606521e-05, "loss": 2.2187, "step": 5995500 }, { "epoch": 17.36, "learning_rate": 4.132516817571323e-05, "loss": 2.2128, "step": 5996000 }, { "epoch": 17.36, "learning_rate": 4.132444597536125e-05, "loss": 2.2088, "step": 5996500 }, { "epoch": 17.36, "learning_rate": 4.132372232771397e-05, "loss": 2.1993, "step": 5997000 }, { "epoch": 17.36, "learning_rate": 4.132299868006669e-05, "loss": 2.2243, "step": 5997500 }, { "epoch": 17.36, "learning_rate": 4.1322275032419414e-05, "loss": 2.2051, "step": 5998000 }, { "epoch": 17.36, "learning_rate": 4.1321551384772137e-05, "loss": 2.2305, "step": 5998500 }, { "epoch": 17.36, "learning_rate": 4.132082918442016e-05, "loss": 2.2103, "step": 5999000 }, { "epoch": 17.37, "learning_rate": 4.132010553677288e-05, "loss": 2.2432, "step": 5999500 }, { "epoch": 17.37, "learning_rate": 4.13193818891256e-05, "loss": 2.2062, "step": 6000000 }, { "epoch": 17.37, "learning_rate": 4.131865968877362e-05, "loss": 2.2226, "step": 6000500 }, { "epoch": 17.37, "learning_rate": 4.1317937488421634e-05, "loss": 2.231, "step": 6001000 }, { "epoch": 17.37, "learning_rate": 4.1317213840774363e-05, "loss": 2.2241, "step": 6001500 }, { "epoch": 17.37, "learning_rate": 4.1316490193127086e-05, "loss": 2.2295, "step": 6002000 }, { "epoch": 17.37, "learning_rate": 4.1315766545479815e-05, "loss": 2.2568, "step": 6002500 }, { "epoch": 17.38, "learning_rate": 4.131504289783254e-05, "loss": 2.1964, "step": 6003000 }, { "epoch": 17.38, "learning_rate": 4.131431925018526e-05, "loss": 2.2464, "step": 6003500 }, { "epoch": 17.38, "learning_rate": 4.131359560253798e-05, "loss": 2.2329, "step": 6004000 }, { "epoch": 17.38, "learning_rate": 4.1312871954890704e-05, "loss": 2.2213, "step": 6004500 }, { "epoch": 17.38, "learning_rate": 4.1312148307243426e-05, "loss": 2.2195, "step": 6005000 }, { "epoch": 17.38, "learning_rate": 4.131142465959615e-05, "loss": 2.2116, "step": 6005500 }, { "epoch": 17.38, "learning_rate": 4.131070101194887e-05, "loss": 2.2256, "step": 6006000 }, { "epoch": 17.39, "learning_rate": 4.130997736430159e-05, "loss": 2.2202, "step": 6006500 }, { "epoch": 17.39, "learning_rate": 4.1309253716654315e-05, "loss": 2.2189, "step": 6007000 }, { "epoch": 17.39, "learning_rate": 4.130853006900704e-05, "loss": 2.2343, "step": 6007500 }, { "epoch": 17.39, "learning_rate": 4.1307806421359766e-05, "loss": 2.2334, "step": 6008000 }, { "epoch": 17.39, "learning_rate": 4.130708277371249e-05, "loss": 2.2242, "step": 6008500 }, { "epoch": 17.39, "learning_rate": 4.130635912606521e-05, "loss": 2.2384, "step": 6009000 }, { "epoch": 17.4, "learning_rate": 4.130563692571323e-05, "loss": 2.2254, "step": 6009500 }, { "epoch": 17.4, "learning_rate": 4.130491472536125e-05, "loss": 2.2237, "step": 6010000 }, { "epoch": 17.4, "learning_rate": 4.130419107771397e-05, "loss": 2.2459, "step": 6010500 }, { "epoch": 17.4, "learning_rate": 4.130346743006669e-05, "loss": 2.1959, "step": 6011000 }, { "epoch": 17.4, "learning_rate": 4.1302743782419415e-05, "loss": 2.227, "step": 6011500 }, { "epoch": 17.4, "learning_rate": 4.130202013477214e-05, "loss": 2.2292, "step": 6012000 }, { "epoch": 17.4, "learning_rate": 4.130129938171545e-05, "loss": 2.2362, "step": 6012500 }, { "epoch": 17.41, "learning_rate": 4.1300575734068175e-05, "loss": 2.2331, "step": 6013000 }, { "epoch": 17.41, "learning_rate": 4.12998520864209e-05, "loss": 2.1948, "step": 6013500 }, { "epoch": 17.41, "learning_rate": 4.129912843877362e-05, "loss": 2.2116, "step": 6014000 }, { "epoch": 17.41, "learning_rate": 4.129840479112634e-05, "loss": 2.213, "step": 6014500 }, { "epoch": 17.41, "learning_rate": 4.1297682590774364e-05, "loss": 2.2589, "step": 6015000 }, { "epoch": 17.41, "learning_rate": 4.1296958943127086e-05, "loss": 2.2063, "step": 6015500 }, { "epoch": 17.41, "learning_rate": 4.129623529547981e-05, "loss": 2.2267, "step": 6016000 }, { "epoch": 17.42, "learning_rate": 4.129551164783253e-05, "loss": 2.2402, "step": 6016500 }, { "epoch": 17.42, "learning_rate": 4.129478800018526e-05, "loss": 2.2364, "step": 6017000 }, { "epoch": 17.42, "learning_rate": 4.129406435253798e-05, "loss": 2.2024, "step": 6017500 }, { "epoch": 17.42, "learning_rate": 4.1293340704890704e-05, "loss": 2.2027, "step": 6018000 }, { "epoch": 17.42, "learning_rate": 4.129261705724343e-05, "loss": 2.2173, "step": 6018500 }, { "epoch": 17.42, "learning_rate": 4.129189340959615e-05, "loss": 2.2015, "step": 6019000 }, { "epoch": 17.42, "learning_rate": 4.129116976194887e-05, "loss": 2.2159, "step": 6019500 }, { "epoch": 17.43, "learning_rate": 4.129044611430159e-05, "loss": 2.2043, "step": 6020000 }, { "epoch": 17.43, "learning_rate": 4.1289722466654316e-05, "loss": 2.2265, "step": 6020500 }, { "epoch": 17.43, "learning_rate": 4.128899881900704e-05, "loss": 2.2361, "step": 6021000 }, { "epoch": 17.43, "learning_rate": 4.128827661865506e-05, "loss": 2.2174, "step": 6021500 }, { "epoch": 17.43, "learning_rate": 4.128755297100778e-05, "loss": 2.2198, "step": 6022000 }, { "epoch": 17.43, "learning_rate": 4.12868307706558e-05, "loss": 2.2458, "step": 6022500 }, { "epoch": 17.43, "learning_rate": 4.128610712300852e-05, "loss": 2.2296, "step": 6023000 }, { "epoch": 17.44, "learning_rate": 4.128538347536124e-05, "loss": 2.2208, "step": 6023500 }, { "epoch": 17.44, "learning_rate": 4.1284659827713965e-05, "loss": 2.2054, "step": 6024000 }, { "epoch": 17.44, "learning_rate": 4.1283936180066694e-05, "loss": 2.2295, "step": 6024500 }, { "epoch": 17.44, "learning_rate": 4.1283212532419416e-05, "loss": 2.2287, "step": 6025000 }, { "epoch": 17.44, "learning_rate": 4.128248888477214e-05, "loss": 2.2409, "step": 6025500 }, { "epoch": 17.44, "learning_rate": 4.128176668442016e-05, "loss": 2.242, "step": 6026000 }, { "epoch": 17.44, "learning_rate": 4.1281044484068176e-05, "loss": 2.208, "step": 6026500 }, { "epoch": 17.45, "learning_rate": 4.12803208364209e-05, "loss": 2.2134, "step": 6027000 }, { "epoch": 17.45, "learning_rate": 4.127959718877362e-05, "loss": 2.2397, "step": 6027500 }, { "epoch": 17.45, "learning_rate": 4.127887498842164e-05, "loss": 2.2007, "step": 6028000 }, { "epoch": 17.45, "learning_rate": 4.1278151340774365e-05, "loss": 2.2172, "step": 6028500 }, { "epoch": 17.45, "learning_rate": 4.127742769312709e-05, "loss": 2.2347, "step": 6029000 }, { "epoch": 17.45, "learning_rate": 4.127670404547981e-05, "loss": 2.2233, "step": 6029500 }, { "epoch": 17.45, "learning_rate": 4.127598039783253e-05, "loss": 2.2268, "step": 6030000 }, { "epoch": 17.46, "learning_rate": 4.1275256750185254e-05, "loss": 2.2189, "step": 6030500 }, { "epoch": 17.46, "learning_rate": 4.1274533102537976e-05, "loss": 2.2185, "step": 6031000 }, { "epoch": 17.46, "learning_rate": 4.12738094548907e-05, "loss": 2.2227, "step": 6031500 }, { "epoch": 17.46, "learning_rate": 4.127308580724343e-05, "loss": 2.2355, "step": 6032000 }, { "epoch": 17.46, "learning_rate": 4.127236360689144e-05, "loss": 2.2285, "step": 6032500 }, { "epoch": 17.46, "learning_rate": 4.1271639959244165e-05, "loss": 2.2006, "step": 6033000 }, { "epoch": 17.46, "learning_rate": 4.1270916311596894e-05, "loss": 2.2495, "step": 6033500 }, { "epoch": 17.47, "learning_rate": 4.1270192663949616e-05, "loss": 2.2099, "step": 6034000 }, { "epoch": 17.47, "learning_rate": 4.126947046359763e-05, "loss": 2.2141, "step": 6034500 }, { "epoch": 17.47, "learning_rate": 4.1268746815950354e-05, "loss": 2.2411, "step": 6035000 }, { "epoch": 17.47, "learning_rate": 4.1268023168303076e-05, "loss": 2.2326, "step": 6035500 }, { "epoch": 17.47, "learning_rate": 4.12672995206558e-05, "loss": 2.2273, "step": 6036000 }, { "epoch": 17.47, "learning_rate": 4.126657587300852e-05, "loss": 2.2293, "step": 6036500 }, { "epoch": 17.47, "learning_rate": 4.126585222536124e-05, "loss": 2.2394, "step": 6037000 }, { "epoch": 17.48, "learning_rate": 4.1265130025009266e-05, "loss": 2.2301, "step": 6037500 }, { "epoch": 17.48, "learning_rate": 4.126440637736199e-05, "loss": 2.1974, "step": 6038000 }, { "epoch": 17.48, "learning_rate": 4.126368272971471e-05, "loss": 2.2347, "step": 6038500 }, { "epoch": 17.48, "learning_rate": 4.126295908206743e-05, "loss": 2.2152, "step": 6039000 }, { "epoch": 17.48, "learning_rate": 4.126223832901074e-05, "loss": 2.2113, "step": 6039500 }, { "epoch": 17.48, "learning_rate": 4.126151468136347e-05, "loss": 2.2217, "step": 6040000 }, { "epoch": 17.48, "learning_rate": 4.126079103371619e-05, "loss": 2.2138, "step": 6040500 }, { "epoch": 17.49, "learning_rate": 4.126006738606892e-05, "loss": 2.2212, "step": 6041000 }, { "epoch": 17.49, "learning_rate": 4.1259343738421644e-05, "loss": 2.2251, "step": 6041500 }, { "epoch": 17.49, "learning_rate": 4.1258620090774366e-05, "loss": 2.2265, "step": 6042000 }, { "epoch": 17.49, "learning_rate": 4.125789644312709e-05, "loss": 2.2327, "step": 6042500 }, { "epoch": 17.49, "learning_rate": 4.1257174242775104e-05, "loss": 2.2097, "step": 6043000 }, { "epoch": 17.49, "learning_rate": 4.1256450595127826e-05, "loss": 2.23, "step": 6043500 }, { "epoch": 17.49, "learning_rate": 4.125572694748055e-05, "loss": 2.2493, "step": 6044000 }, { "epoch": 17.5, "learning_rate": 4.125500329983327e-05, "loss": 2.2419, "step": 6044500 }, { "epoch": 17.5, "learning_rate": 4.125427965218599e-05, "loss": 2.2104, "step": 6045000 }, { "epoch": 17.5, "learning_rate": 4.125355600453872e-05, "loss": 2.2295, "step": 6045500 }, { "epoch": 17.5, "learning_rate": 4.1252832356891444e-05, "loss": 2.2112, "step": 6046000 }, { "epoch": 17.5, "learning_rate": 4.1252108709244166e-05, "loss": 2.2246, "step": 6046500 }, { "epoch": 17.5, "learning_rate": 4.1251385061596895e-05, "loss": 2.2191, "step": 6047000 }, { "epoch": 17.51, "learning_rate": 4.125066141394962e-05, "loss": 2.233, "step": 6047500 }, { "epoch": 17.51, "learning_rate": 4.124993776630234e-05, "loss": 2.2061, "step": 6048000 }, { "epoch": 17.51, "learning_rate": 4.124921411865506e-05, "loss": 2.2151, "step": 6048500 }, { "epoch": 17.51, "learning_rate": 4.1248490471007784e-05, "loss": 2.1958, "step": 6049000 }, { "epoch": 17.51, "learning_rate": 4.1247766823360506e-05, "loss": 2.2197, "step": 6049500 }, { "epoch": 17.51, "learning_rate": 4.124704462300852e-05, "loss": 2.225, "step": 6050000 }, { "epoch": 17.51, "learning_rate": 4.1246320975361244e-05, "loss": 2.2214, "step": 6050500 }, { "epoch": 17.52, "learning_rate": 4.1245597327713966e-05, "loss": 2.2105, "step": 6051000 }, { "epoch": 17.52, "learning_rate": 4.1244873680066695e-05, "loss": 2.2327, "step": 6051500 }, { "epoch": 17.52, "learning_rate": 4.124415003241942e-05, "loss": 2.2329, "step": 6052000 }, { "epoch": 17.52, "learning_rate": 4.124342638477214e-05, "loss": 2.2132, "step": 6052500 }, { "epoch": 17.52, "learning_rate": 4.124270273712486e-05, "loss": 2.2321, "step": 6053000 }, { "epoch": 17.52, "learning_rate": 4.1241979089477584e-05, "loss": 2.2417, "step": 6053500 }, { "epoch": 17.52, "learning_rate": 4.124125544183031e-05, "loss": 2.2298, "step": 6054000 }, { "epoch": 17.53, "learning_rate": 4.124053324147833e-05, "loss": 2.2118, "step": 6054500 }, { "epoch": 17.53, "learning_rate": 4.123980959383105e-05, "loss": 2.2412, "step": 6055000 }, { "epoch": 17.53, "learning_rate": 4.123908594618377e-05, "loss": 2.2222, "step": 6055500 }, { "epoch": 17.53, "learning_rate": 4.1238362298536495e-05, "loss": 2.1998, "step": 6056000 }, { "epoch": 17.53, "learning_rate": 4.123763865088922e-05, "loss": 2.2234, "step": 6056500 }, { "epoch": 17.53, "learning_rate": 4.123691645053724e-05, "loss": 2.2174, "step": 6057000 }, { "epoch": 17.53, "learning_rate": 4.1236194250185256e-05, "loss": 2.195, "step": 6057500 }, { "epoch": 17.54, "learning_rate": 4.123547204983327e-05, "loss": 2.2192, "step": 6058000 }, { "epoch": 17.54, "learning_rate": 4.1234749849481293e-05, "loss": 2.2076, "step": 6058500 }, { "epoch": 17.54, "learning_rate": 4.1234026201834016e-05, "loss": 2.2161, "step": 6059000 }, { "epoch": 17.54, "learning_rate": 4.123330255418674e-05, "loss": 2.2214, "step": 6059500 }, { "epoch": 17.54, "learning_rate": 4.123257890653946e-05, "loss": 2.2222, "step": 6060000 }, { "epoch": 17.54, "learning_rate": 4.123185525889218e-05, "loss": 2.2129, "step": 6060500 }, { "epoch": 17.54, "learning_rate": 4.1231131611244905e-05, "loss": 2.2087, "step": 6061000 }, { "epoch": 17.55, "learning_rate": 4.123040796359763e-05, "loss": 2.223, "step": 6061500 }, { "epoch": 17.55, "learning_rate": 4.1229684315950356e-05, "loss": 2.2042, "step": 6062000 }, { "epoch": 17.55, "learning_rate": 4.122896066830308e-05, "loss": 2.2036, "step": 6062500 }, { "epoch": 17.55, "learning_rate": 4.1228238467951094e-05, "loss": 2.2261, "step": 6063000 }, { "epoch": 17.55, "learning_rate": 4.122751482030382e-05, "loss": 2.2348, "step": 6063500 }, { "epoch": 17.55, "learning_rate": 4.1226791172656545e-05, "loss": 2.2132, "step": 6064000 }, { "epoch": 17.55, "learning_rate": 4.122606752500927e-05, "loss": 2.2318, "step": 6064500 }, { "epoch": 17.56, "learning_rate": 4.122534387736199e-05, "loss": 2.2149, "step": 6065000 }, { "epoch": 17.56, "learning_rate": 4.122462022971471e-05, "loss": 2.192, "step": 6065500 }, { "epoch": 17.56, "learning_rate": 4.1223896582067434e-05, "loss": 2.2255, "step": 6066000 }, { "epoch": 17.56, "learning_rate": 4.1223172934420156e-05, "loss": 2.2362, "step": 6066500 }, { "epoch": 17.56, "learning_rate": 4.122244928677288e-05, "loss": 2.2435, "step": 6067000 }, { "epoch": 17.56, "learning_rate": 4.1221727086420894e-05, "loss": 2.2173, "step": 6067500 }, { "epoch": 17.56, "learning_rate": 4.122100343877362e-05, "loss": 2.2052, "step": 6068000 }, { "epoch": 17.57, "learning_rate": 4.122028123842164e-05, "loss": 2.2291, "step": 6068500 }, { "epoch": 17.57, "learning_rate": 4.1219559038069654e-05, "loss": 2.2237, "step": 6069000 }, { "epoch": 17.57, "learning_rate": 4.121883539042238e-05, "loss": 2.2181, "step": 6069500 }, { "epoch": 17.57, "learning_rate": 4.1218111742775105e-05, "loss": 2.2134, "step": 6070000 }, { "epoch": 17.57, "learning_rate": 4.121738809512783e-05, "loss": 2.1953, "step": 6070500 }, { "epoch": 17.57, "learning_rate": 4.121666444748055e-05, "loss": 2.2024, "step": 6071000 }, { "epoch": 17.57, "learning_rate": 4.121594079983327e-05, "loss": 2.2382, "step": 6071500 }, { "epoch": 17.58, "learning_rate": 4.1215217152186e-05, "loss": 2.2323, "step": 6072000 }, { "epoch": 17.58, "learning_rate": 4.121449350453872e-05, "loss": 2.237, "step": 6072500 }, { "epoch": 17.58, "learning_rate": 4.1213769856891445e-05, "loss": 2.2239, "step": 6073000 }, { "epoch": 17.58, "learning_rate": 4.121304765653946e-05, "loss": 2.228, "step": 6073500 }, { "epoch": 17.58, "learning_rate": 4.121232400889218e-05, "loss": 2.2043, "step": 6074000 }, { "epoch": 17.58, "learning_rate": 4.1211600361244905e-05, "loss": 2.2326, "step": 6074500 }, { "epoch": 17.58, "learning_rate": 4.121087671359763e-05, "loss": 2.2348, "step": 6075000 }, { "epoch": 17.59, "learning_rate": 4.121015306595035e-05, "loss": 2.214, "step": 6075500 }, { "epoch": 17.59, "learning_rate": 4.120942941830307e-05, "loss": 2.1983, "step": 6076000 }, { "epoch": 17.59, "learning_rate": 4.12087057706558e-05, "loss": 2.219, "step": 6076500 }, { "epoch": 17.59, "learning_rate": 4.120798212300852e-05, "loss": 2.2336, "step": 6077000 }, { "epoch": 17.59, "learning_rate": 4.1207258475361246e-05, "loss": 2.2229, "step": 6077500 }, { "epoch": 17.59, "learning_rate": 4.1206534827713975e-05, "loss": 2.2196, "step": 6078000 }, { "epoch": 17.59, "learning_rate": 4.12058111800667e-05, "loss": 2.246, "step": 6078500 }, { "epoch": 17.6, "learning_rate": 4.120508897971471e-05, "loss": 2.2154, "step": 6079000 }, { "epoch": 17.6, "learning_rate": 4.1204365332067435e-05, "loss": 2.2153, "step": 6079500 }, { "epoch": 17.6, "learning_rate": 4.120364168442016e-05, "loss": 2.2236, "step": 6080000 }, { "epoch": 17.6, "learning_rate": 4.120291803677288e-05, "loss": 2.2391, "step": 6080500 }, { "epoch": 17.6, "learning_rate": 4.12021943891256e-05, "loss": 2.2307, "step": 6081000 }, { "epoch": 17.6, "learning_rate": 4.1201470741478324e-05, "loss": 2.2105, "step": 6081500 }, { "epoch": 17.6, "learning_rate": 4.1200748541126346e-05, "loss": 2.2257, "step": 6082000 }, { "epoch": 17.61, "learning_rate": 4.120002489347907e-05, "loss": 2.1817, "step": 6082500 }, { "epoch": 17.61, "learning_rate": 4.119930124583179e-05, "loss": 2.2259, "step": 6083000 }, { "epoch": 17.61, "learning_rate": 4.119857759818451e-05, "loss": 2.226, "step": 6083500 }, { "epoch": 17.61, "learning_rate": 4.119785395053724e-05, "loss": 2.2229, "step": 6084000 }, { "epoch": 17.61, "learning_rate": 4.1197130302889964e-05, "loss": 2.2286, "step": 6084500 }, { "epoch": 17.61, "learning_rate": 4.119640810253798e-05, "loss": 2.2414, "step": 6085000 }, { "epoch": 17.62, "learning_rate": 4.11956844548907e-05, "loss": 2.2023, "step": 6085500 }, { "epoch": 17.62, "learning_rate": 4.1194960807243424e-05, "loss": 2.2016, "step": 6086000 }, { "epoch": 17.62, "learning_rate": 4.119423715959615e-05, "loss": 2.2307, "step": 6086500 }, { "epoch": 17.62, "learning_rate": 4.1193513511948875e-05, "loss": 2.2559, "step": 6087000 }, { "epoch": 17.62, "learning_rate": 4.11927898643016e-05, "loss": 2.2428, "step": 6087500 }, { "epoch": 17.62, "learning_rate": 4.119206766394961e-05, "loss": 2.2119, "step": 6088000 }, { "epoch": 17.62, "learning_rate": 4.1191344016302335e-05, "loss": 2.2219, "step": 6088500 }, { "epoch": 17.63, "learning_rate": 4.119062036865506e-05, "loss": 2.1981, "step": 6089000 }, { "epoch": 17.63, "learning_rate": 4.118989672100778e-05, "loss": 2.2025, "step": 6089500 }, { "epoch": 17.63, "learning_rate": 4.11891745206558e-05, "loss": 2.1952, "step": 6090000 }, { "epoch": 17.63, "learning_rate": 4.1188450873008524e-05, "loss": 2.2077, "step": 6090500 }, { "epoch": 17.63, "learning_rate": 4.1187727225361246e-05, "loss": 2.2174, "step": 6091000 }, { "epoch": 17.63, "learning_rate": 4.118700502500926e-05, "loss": 2.226, "step": 6091500 }, { "epoch": 17.63, "learning_rate": 4.118628137736199e-05, "loss": 2.2276, "step": 6092000 }, { "epoch": 17.64, "learning_rate": 4.118555772971471e-05, "loss": 2.2052, "step": 6092500 }, { "epoch": 17.64, "learning_rate": 4.1184834082067435e-05, "loss": 2.2095, "step": 6093000 }, { "epoch": 17.64, "learning_rate": 4.118411043442016e-05, "loss": 2.2411, "step": 6093500 }, { "epoch": 17.64, "learning_rate": 4.118338823406817e-05, "loss": 2.2379, "step": 6094000 }, { "epoch": 17.64, "learning_rate": 4.11826645864209e-05, "loss": 2.201, "step": 6094500 }, { "epoch": 17.64, "learning_rate": 4.1181940938773624e-05, "loss": 2.2073, "step": 6095000 }, { "epoch": 17.64, "learning_rate": 4.118121729112635e-05, "loss": 2.2152, "step": 6095500 }, { "epoch": 17.65, "learning_rate": 4.118049364347907e-05, "loss": 2.2174, "step": 6096000 }, { "epoch": 17.65, "learning_rate": 4.117976999583179e-05, "loss": 2.2254, "step": 6096500 }, { "epoch": 17.65, "learning_rate": 4.117904634818451e-05, "loss": 2.2057, "step": 6097000 }, { "epoch": 17.65, "learning_rate": 4.1178322700537236e-05, "loss": 2.2266, "step": 6097500 }, { "epoch": 17.65, "learning_rate": 4.117759905288996e-05, "loss": 2.2234, "step": 6098000 }, { "epoch": 17.65, "learning_rate": 4.117687540524268e-05, "loss": 2.2011, "step": 6098500 }, { "epoch": 17.65, "learning_rate": 4.11761532048907e-05, "loss": 2.2481, "step": 6099000 }, { "epoch": 17.66, "learning_rate": 4.1175429557243425e-05, "loss": 2.2212, "step": 6099500 }, { "epoch": 17.66, "learning_rate": 4.1174705909596154e-05, "loss": 2.2289, "step": 6100000 }, { "epoch": 17.66, "learning_rate": 4.1173982261948876e-05, "loss": 2.2217, "step": 6100500 }, { "epoch": 17.66, "learning_rate": 4.11732586143016e-05, "loss": 2.2343, "step": 6101000 }, { "epoch": 17.66, "learning_rate": 4.1172536413949614e-05, "loss": 2.2431, "step": 6101500 }, { "epoch": 17.66, "learning_rate": 4.1171812766302336e-05, "loss": 2.2407, "step": 6102000 }, { "epoch": 17.66, "learning_rate": 4.117108911865506e-05, "loss": 2.1995, "step": 6102500 }, { "epoch": 17.67, "learning_rate": 4.117036691830308e-05, "loss": 2.2302, "step": 6103000 }, { "epoch": 17.67, "learning_rate": 4.11696432706558e-05, "loss": 2.1885, "step": 6103500 }, { "epoch": 17.67, "learning_rate": 4.1168919623008525e-05, "loss": 2.2365, "step": 6104000 }, { "epoch": 17.67, "learning_rate": 4.116819597536125e-05, "loss": 2.2159, "step": 6104500 }, { "epoch": 17.67, "learning_rate": 4.116747232771397e-05, "loss": 2.1995, "step": 6105000 }, { "epoch": 17.67, "learning_rate": 4.116674868006669e-05, "loss": 2.2191, "step": 6105500 }, { "epoch": 17.67, "learning_rate": 4.1166025032419414e-05, "loss": 2.2345, "step": 6106000 }, { "epoch": 17.68, "learning_rate": 4.116530138477214e-05, "loss": 2.2477, "step": 6106500 }, { "epoch": 17.68, "learning_rate": 4.1164577737124865e-05, "loss": 2.2352, "step": 6107000 }, { "epoch": 17.68, "learning_rate": 4.116385408947759e-05, "loss": 2.2065, "step": 6107500 }, { "epoch": 17.68, "learning_rate": 4.116313044183031e-05, "loss": 2.2222, "step": 6108000 }, { "epoch": 17.68, "learning_rate": 4.116240679418303e-05, "loss": 2.2218, "step": 6108500 }, { "epoch": 17.68, "learning_rate": 4.1161683146535754e-05, "loss": 2.2016, "step": 6109000 }, { "epoch": 17.68, "learning_rate": 4.1160960946183776e-05, "loss": 2.2269, "step": 6109500 }, { "epoch": 17.69, "learning_rate": 4.11602372985365e-05, "loss": 2.1999, "step": 6110000 }, { "epoch": 17.69, "learning_rate": 4.115951365088922e-05, "loss": 2.1942, "step": 6110500 }, { "epoch": 17.69, "learning_rate": 4.115879000324194e-05, "loss": 2.2393, "step": 6111000 }, { "epoch": 17.69, "learning_rate": 4.1158066355594665e-05, "loss": 2.2088, "step": 6111500 }, { "epoch": 17.69, "learning_rate": 4.115734560253798e-05, "loss": 2.2279, "step": 6112000 }, { "epoch": 17.69, "learning_rate": 4.11566219548907e-05, "loss": 2.2284, "step": 6112500 }, { "epoch": 17.69, "learning_rate": 4.1155898307243425e-05, "loss": 2.2312, "step": 6113000 }, { "epoch": 17.7, "learning_rate": 4.115517610689144e-05, "loss": 2.2016, "step": 6113500 }, { "epoch": 17.7, "learning_rate": 4.115445245924416e-05, "loss": 2.2313, "step": 6114000 }, { "epoch": 17.7, "learning_rate": 4.115372881159689e-05, "loss": 2.2438, "step": 6114500 }, { "epoch": 17.7, "learning_rate": 4.1153005163949614e-05, "loss": 2.2156, "step": 6115000 }, { "epoch": 17.7, "learning_rate": 4.115228151630234e-05, "loss": 2.2167, "step": 6115500 }, { "epoch": 17.7, "learning_rate": 4.115155786865506e-05, "loss": 2.2287, "step": 6116000 }, { "epoch": 17.7, "learning_rate": 4.115083422100778e-05, "loss": 2.2012, "step": 6116500 }, { "epoch": 17.71, "learning_rate": 4.11501105733605e-05, "loss": 2.2084, "step": 6117000 }, { "epoch": 17.71, "learning_rate": 4.114938692571323e-05, "loss": 2.2286, "step": 6117500 }, { "epoch": 17.71, "learning_rate": 4.1148663278065955e-05, "loss": 2.233, "step": 6118000 }, { "epoch": 17.71, "learning_rate": 4.114793963041868e-05, "loss": 2.2077, "step": 6118500 }, { "epoch": 17.71, "learning_rate": 4.11472159827714e-05, "loss": 2.2178, "step": 6119000 }, { "epoch": 17.71, "learning_rate": 4.114649233512412e-05, "loss": 2.235, "step": 6119500 }, { "epoch": 17.71, "learning_rate": 4.1145768687476844e-05, "loss": 2.2326, "step": 6120000 }, { "epoch": 17.72, "learning_rate": 4.114504648712486e-05, "loss": 2.2315, "step": 6120500 }, { "epoch": 17.72, "learning_rate": 4.114432283947758e-05, "loss": 2.2128, "step": 6121000 }, { "epoch": 17.72, "learning_rate": 4.114359919183031e-05, "loss": 2.223, "step": 6121500 }, { "epoch": 17.72, "learning_rate": 4.114287554418303e-05, "loss": 2.2155, "step": 6122000 }, { "epoch": 17.72, "learning_rate": 4.1142153343831055e-05, "loss": 2.2165, "step": 6122500 }, { "epoch": 17.72, "learning_rate": 4.114142969618378e-05, "loss": 2.2226, "step": 6123000 }, { "epoch": 17.73, "learning_rate": 4.114070749583179e-05, "loss": 2.2161, "step": 6123500 }, { "epoch": 17.73, "learning_rate": 4.1139983848184515e-05, "loss": 2.2129, "step": 6124000 }, { "epoch": 17.73, "learning_rate": 4.113926020053724e-05, "loss": 2.2142, "step": 6124500 }, { "epoch": 17.73, "learning_rate": 4.113853655288996e-05, "loss": 2.221, "step": 6125000 }, { "epoch": 17.73, "learning_rate": 4.113781290524268e-05, "loss": 2.2161, "step": 6125500 }, { "epoch": 17.73, "learning_rate": 4.1137089257595404e-05, "loss": 2.2488, "step": 6126000 }, { "epoch": 17.73, "learning_rate": 4.113636560994813e-05, "loss": 2.2175, "step": 6126500 }, { "epoch": 17.74, "learning_rate": 4.1135641962300855e-05, "loss": 2.2328, "step": 6127000 }, { "epoch": 17.74, "learning_rate": 4.113491831465358e-05, "loss": 2.2208, "step": 6127500 }, { "epoch": 17.74, "learning_rate": 4.11341946670063e-05, "loss": 2.2483, "step": 6128000 }, { "epoch": 17.74, "learning_rate": 4.1133472466654315e-05, "loss": 2.2177, "step": 6128500 }, { "epoch": 17.74, "learning_rate": 4.1132748819007044e-05, "loss": 2.2301, "step": 6129000 }, { "epoch": 17.74, "learning_rate": 4.113202806595036e-05, "loss": 2.1951, "step": 6129500 }, { "epoch": 17.74, "learning_rate": 4.113130441830308e-05, "loss": 2.256, "step": 6130000 }, { "epoch": 17.75, "learning_rate": 4.1130580770655804e-05, "loss": 2.2136, "step": 6130500 }, { "epoch": 17.75, "learning_rate": 4.1129857123008527e-05, "loss": 2.2035, "step": 6131000 }, { "epoch": 17.75, "learning_rate": 4.112913347536125e-05, "loss": 2.219, "step": 6131500 }, { "epoch": 17.75, "learning_rate": 4.112840982771397e-05, "loss": 2.2452, "step": 6132000 }, { "epoch": 17.75, "learning_rate": 4.112768618006669e-05, "loss": 2.2118, "step": 6132500 }, { "epoch": 17.75, "learning_rate": 4.1126962532419415e-05, "loss": 2.2507, "step": 6133000 }, { "epoch": 17.75, "learning_rate": 4.112623888477214e-05, "loss": 2.2171, "step": 6133500 }, { "epoch": 17.76, "learning_rate": 4.112551523712486e-05, "loss": 2.2377, "step": 6134000 }, { "epoch": 17.76, "learning_rate": 4.112479158947758e-05, "loss": 2.2333, "step": 6134500 }, { "epoch": 17.76, "learning_rate": 4.1124069389125604e-05, "loss": 2.2029, "step": 6135000 }, { "epoch": 17.76, "learning_rate": 4.112334574147833e-05, "loss": 2.2402, "step": 6135500 }, { "epoch": 17.76, "learning_rate": 4.112262209383105e-05, "loss": 2.2146, "step": 6136000 }, { "epoch": 17.76, "learning_rate": 4.112189844618378e-05, "loss": 2.244, "step": 6136500 }, { "epoch": 17.76, "learning_rate": 4.11211747985365e-05, "loss": 2.1881, "step": 6137000 }, { "epoch": 17.77, "learning_rate": 4.112045115088922e-05, "loss": 2.2065, "step": 6137500 }, { "epoch": 17.77, "learning_rate": 4.1119727503241945e-05, "loss": 2.241, "step": 6138000 }, { "epoch": 17.77, "learning_rate": 4.111900385559467e-05, "loss": 2.2098, "step": 6138500 }, { "epoch": 17.77, "learning_rate": 4.111828310253798e-05, "loss": 2.2299, "step": 6139000 }, { "epoch": 17.77, "learning_rate": 4.1117560902186e-05, "loss": 2.2085, "step": 6139500 }, { "epoch": 17.77, "learning_rate": 4.111683725453872e-05, "loss": 2.2023, "step": 6140000 }, { "epoch": 17.77, "learning_rate": 4.111611360689144e-05, "loss": 2.2353, "step": 6140500 }, { "epoch": 17.78, "learning_rate": 4.1115389959244165e-05, "loss": 2.2297, "step": 6141000 }, { "epoch": 17.78, "learning_rate": 4.111466631159689e-05, "loss": 2.219, "step": 6141500 }, { "epoch": 17.78, "learning_rate": 4.111394266394961e-05, "loss": 2.2216, "step": 6142000 }, { "epoch": 17.78, "learning_rate": 4.111321901630233e-05, "loss": 2.2246, "step": 6142500 }, { "epoch": 17.78, "learning_rate": 4.111249536865506e-05, "loss": 2.2362, "step": 6143000 }, { "epoch": 17.78, "learning_rate": 4.111177172100778e-05, "loss": 2.2401, "step": 6143500 }, { "epoch": 17.78, "learning_rate": 4.111104807336051e-05, "loss": 2.218, "step": 6144000 }, { "epoch": 17.79, "learning_rate": 4.1110324425713234e-05, "loss": 2.2238, "step": 6144500 }, { "epoch": 17.79, "learning_rate": 4.1109600778065956e-05, "loss": 2.2212, "step": 6145000 }, { "epoch": 17.79, "learning_rate": 4.110887713041868e-05, "loss": 2.2137, "step": 6145500 }, { "epoch": 17.79, "learning_rate": 4.11081534827714e-05, "loss": 2.2113, "step": 6146000 }, { "epoch": 17.79, "learning_rate": 4.110742983512412e-05, "loss": 2.2021, "step": 6146500 }, { "epoch": 17.79, "learning_rate": 4.1106706187476845e-05, "loss": 2.2264, "step": 6147000 }, { "epoch": 17.79, "learning_rate": 4.110598253982957e-05, "loss": 2.2426, "step": 6147500 }, { "epoch": 17.8, "learning_rate": 4.110526033947758e-05, "loss": 2.2332, "step": 6148000 }, { "epoch": 17.8, "learning_rate": 4.1104538139125605e-05, "loss": 2.2281, "step": 6148500 }, { "epoch": 17.8, "learning_rate": 4.110381449147833e-05, "loss": 2.2247, "step": 6149000 }, { "epoch": 17.8, "learning_rate": 4.110309084383105e-05, "loss": 2.2245, "step": 6149500 }, { "epoch": 17.8, "learning_rate": 4.110236719618377e-05, "loss": 2.1833, "step": 6150000 }, { "epoch": 17.8, "learning_rate": 4.1101643548536494e-05, "loss": 2.2152, "step": 6150500 }, { "epoch": 17.8, "learning_rate": 4.110092134818451e-05, "loss": 2.241, "step": 6151000 }, { "epoch": 17.81, "learning_rate": 4.110019770053724e-05, "loss": 2.2239, "step": 6151500 }, { "epoch": 17.81, "learning_rate": 4.109947405288996e-05, "loss": 2.2148, "step": 6152000 }, { "epoch": 17.81, "learning_rate": 4.109875040524268e-05, "loss": 2.2147, "step": 6152500 }, { "epoch": 17.81, "learning_rate": 4.109802675759541e-05, "loss": 2.212, "step": 6153000 }, { "epoch": 17.81, "learning_rate": 4.1097303109948134e-05, "loss": 2.2366, "step": 6153500 }, { "epoch": 17.81, "learning_rate": 4.109657946230086e-05, "loss": 2.219, "step": 6154000 }, { "epoch": 17.81, "learning_rate": 4.109585581465358e-05, "loss": 2.2265, "step": 6154500 }, { "epoch": 17.82, "learning_rate": 4.1095133614301595e-05, "loss": 2.2346, "step": 6155000 }, { "epoch": 17.82, "learning_rate": 4.109441141394961e-05, "loss": 2.2224, "step": 6155500 }, { "epoch": 17.82, "learning_rate": 4.109368776630233e-05, "loss": 2.222, "step": 6156000 }, { "epoch": 17.82, "learning_rate": 4.109296411865506e-05, "loss": 2.2319, "step": 6156500 }, { "epoch": 17.82, "learning_rate": 4.1092240471007784e-05, "loss": 2.2326, "step": 6157000 }, { "epoch": 17.82, "learning_rate": 4.1091516823360506e-05, "loss": 2.2187, "step": 6157500 }, { "epoch": 17.82, "learning_rate": 4.109079317571323e-05, "loss": 2.2278, "step": 6158000 }, { "epoch": 17.83, "learning_rate": 4.109007242265654e-05, "loss": 2.2374, "step": 6158500 }, { "epoch": 17.83, "learning_rate": 4.108934877500926e-05, "loss": 2.214, "step": 6159000 }, { "epoch": 17.83, "learning_rate": 4.108862512736199e-05, "loss": 2.2243, "step": 6159500 }, { "epoch": 17.83, "learning_rate": 4.108790147971471e-05, "loss": 2.2182, "step": 6160000 }, { "epoch": 17.83, "learning_rate": 4.108717783206744e-05, "loss": 2.2574, "step": 6160500 }, { "epoch": 17.83, "learning_rate": 4.108645418442016e-05, "loss": 2.2138, "step": 6161000 }, { "epoch": 17.84, "learning_rate": 4.1085730536772884e-05, "loss": 2.237, "step": 6161500 }, { "epoch": 17.84, "learning_rate": 4.1085006889125606e-05, "loss": 2.2166, "step": 6162000 }, { "epoch": 17.84, "learning_rate": 4.108428324147833e-05, "loss": 2.2385, "step": 6162500 }, { "epoch": 17.84, "learning_rate": 4.108356248842164e-05, "loss": 2.2419, "step": 6163000 }, { "epoch": 17.84, "learning_rate": 4.108283884077436e-05, "loss": 2.2235, "step": 6163500 }, { "epoch": 17.84, "learning_rate": 4.108211519312709e-05, "loss": 2.2321, "step": 6164000 }, { "epoch": 17.84, "learning_rate": 4.108139154547981e-05, "loss": 2.231, "step": 6164500 }, { "epoch": 17.85, "learning_rate": 4.108066789783253e-05, "loss": 2.2133, "step": 6165000 }, { "epoch": 17.85, "learning_rate": 4.1079944250185255e-05, "loss": 2.2339, "step": 6165500 }, { "epoch": 17.85, "learning_rate": 4.107922204983327e-05, "loss": 2.2349, "step": 6166000 }, { "epoch": 17.85, "learning_rate": 4.1078499849481286e-05, "loss": 2.2299, "step": 6166500 }, { "epoch": 17.85, "learning_rate": 4.1077776201834015e-05, "loss": 2.2272, "step": 6167000 }, { "epoch": 17.85, "learning_rate": 4.107705255418674e-05, "loss": 2.2492, "step": 6167500 }, { "epoch": 17.85, "learning_rate": 4.107632890653946e-05, "loss": 2.2336, "step": 6168000 }, { "epoch": 17.86, "learning_rate": 4.107560525889219e-05, "loss": 2.1937, "step": 6168500 }, { "epoch": 17.86, "learning_rate": 4.107488161124491e-05, "loss": 2.234, "step": 6169000 }, { "epoch": 17.86, "learning_rate": 4.107415796359763e-05, "loss": 2.1948, "step": 6169500 }, { "epoch": 17.86, "learning_rate": 4.1073434315950355e-05, "loss": 2.2248, "step": 6170000 }, { "epoch": 17.86, "learning_rate": 4.107271066830308e-05, "loss": 2.2354, "step": 6170500 }, { "epoch": 17.86, "learning_rate": 4.10719870206558e-05, "loss": 2.2439, "step": 6171000 }, { "epoch": 17.86, "learning_rate": 4.1071264820303815e-05, "loss": 2.2308, "step": 6171500 }, { "epoch": 17.87, "learning_rate": 4.107054117265654e-05, "loss": 2.2303, "step": 6172000 }, { "epoch": 17.87, "learning_rate": 4.106981752500926e-05, "loss": 2.2299, "step": 6172500 }, { "epoch": 17.87, "learning_rate": 4.106909387736199e-05, "loss": 2.2173, "step": 6173000 }, { "epoch": 17.87, "learning_rate": 4.106837022971471e-05, "loss": 2.2232, "step": 6173500 }, { "epoch": 17.87, "learning_rate": 4.1067646582067433e-05, "loss": 2.2132, "step": 6174000 }, { "epoch": 17.87, "learning_rate": 4.106692293442016e-05, "loss": 2.1987, "step": 6174500 }, { "epoch": 17.87, "learning_rate": 4.1066199286772885e-05, "loss": 2.1875, "step": 6175000 }, { "epoch": 17.88, "learning_rate": 4.10654770864209e-05, "loss": 2.2302, "step": 6175500 }, { "epoch": 17.88, "learning_rate": 4.1064754886068916e-05, "loss": 2.2089, "step": 6176000 }, { "epoch": 17.88, "learning_rate": 4.106403123842164e-05, "loss": 2.2312, "step": 6176500 }, { "epoch": 17.88, "learning_rate": 4.106330759077437e-05, "loss": 2.2292, "step": 6177000 }, { "epoch": 17.88, "learning_rate": 4.106258394312709e-05, "loss": 2.2014, "step": 6177500 }, { "epoch": 17.88, "learning_rate": 4.106186029547981e-05, "loss": 2.2352, "step": 6178000 }, { "epoch": 17.88, "learning_rate": 4.1061136647832534e-05, "loss": 2.2417, "step": 6178500 }, { "epoch": 17.89, "learning_rate": 4.1060413000185256e-05, "loss": 2.1997, "step": 6179000 }, { "epoch": 17.89, "learning_rate": 4.105968935253798e-05, "loss": 2.1903, "step": 6179500 }, { "epoch": 17.89, "learning_rate": 4.10589657048907e-05, "loss": 2.2158, "step": 6180000 }, { "epoch": 17.89, "learning_rate": 4.105824205724342e-05, "loss": 2.229, "step": 6180500 }, { "epoch": 17.89, "learning_rate": 4.1057518409596145e-05, "loss": 2.2159, "step": 6181000 }, { "epoch": 17.89, "learning_rate": 4.1056794761948874e-05, "loss": 2.2222, "step": 6181500 }, { "epoch": 17.89, "learning_rate": 4.1056071114301596e-05, "loss": 2.2069, "step": 6182000 }, { "epoch": 17.9, "learning_rate": 4.105534891394961e-05, "loss": 2.2155, "step": 6182500 }, { "epoch": 17.9, "learning_rate": 4.1054626713597634e-05, "loss": 2.2371, "step": 6183000 }, { "epoch": 17.9, "learning_rate": 4.1053903065950356e-05, "loss": 2.2003, "step": 6183500 }, { "epoch": 17.9, "learning_rate": 4.105317941830308e-05, "loss": 2.2234, "step": 6184000 }, { "epoch": 17.9, "learning_rate": 4.1052457217951094e-05, "loss": 2.1885, "step": 6184500 }, { "epoch": 17.9, "learning_rate": 4.1051733570303816e-05, "loss": 2.2106, "step": 6185000 }, { "epoch": 17.9, "learning_rate": 4.105100992265654e-05, "loss": 2.2027, "step": 6185500 }, { "epoch": 17.91, "learning_rate": 4.105028627500927e-05, "loss": 2.1959, "step": 6186000 }, { "epoch": 17.91, "learning_rate": 4.104956262736199e-05, "loss": 2.2362, "step": 6186500 }, { "epoch": 17.91, "learning_rate": 4.104883897971471e-05, "loss": 2.2103, "step": 6187000 }, { "epoch": 17.91, "learning_rate": 4.1048115332067434e-05, "loss": 2.2003, "step": 6187500 }, { "epoch": 17.91, "learning_rate": 4.1047391684420156e-05, "loss": 2.2204, "step": 6188000 }, { "epoch": 17.91, "learning_rate": 4.104666948406817e-05, "loss": 2.2192, "step": 6188500 }, { "epoch": 17.91, "learning_rate": 4.1045945836420894e-05, "loss": 2.2116, "step": 6189000 }, { "epoch": 17.92, "learning_rate": 4.104522218877362e-05, "loss": 2.2272, "step": 6189500 }, { "epoch": 17.92, "learning_rate": 4.104449998842164e-05, "loss": 2.2411, "step": 6190000 }, { "epoch": 17.92, "learning_rate": 4.104377634077437e-05, "loss": 2.2034, "step": 6190500 }, { "epoch": 17.92, "learning_rate": 4.104305269312709e-05, "loss": 2.2241, "step": 6191000 }, { "epoch": 17.92, "learning_rate": 4.104232904547981e-05, "loss": 2.2231, "step": 6191500 }, { "epoch": 17.92, "learning_rate": 4.1041605397832534e-05, "loss": 2.1969, "step": 6192000 }, { "epoch": 17.92, "learning_rate": 4.104088175018526e-05, "loss": 2.2181, "step": 6192500 }, { "epoch": 17.93, "learning_rate": 4.104015810253798e-05, "loss": 2.2089, "step": 6193000 }, { "epoch": 17.93, "learning_rate": 4.10394344548907e-05, "loss": 2.2246, "step": 6193500 }, { "epoch": 17.93, "learning_rate": 4.1038710807243423e-05, "loss": 2.2194, "step": 6194000 }, { "epoch": 17.93, "learning_rate": 4.1037987159596146e-05, "loss": 2.2371, "step": 6194500 }, { "epoch": 17.93, "learning_rate": 4.103726495924417e-05, "loss": 2.2185, "step": 6195000 }, { "epoch": 17.93, "learning_rate": 4.103654131159689e-05, "loss": 2.2024, "step": 6195500 }, { "epoch": 17.93, "learning_rate": 4.103581766394961e-05, "loss": 2.2258, "step": 6196000 }, { "epoch": 17.94, "learning_rate": 4.1035094016302335e-05, "loss": 2.2222, "step": 6196500 }, { "epoch": 17.94, "learning_rate": 4.103437181595036e-05, "loss": 2.2053, "step": 6197000 }, { "epoch": 17.94, "learning_rate": 4.103364816830308e-05, "loss": 2.2281, "step": 6197500 }, { "epoch": 17.94, "learning_rate": 4.10329245206558e-05, "loss": 2.2462, "step": 6198000 }, { "epoch": 17.94, "learning_rate": 4.1032200873008524e-05, "loss": 2.2296, "step": 6198500 }, { "epoch": 17.94, "learning_rate": 4.103147867265654e-05, "loss": 2.206, "step": 6199000 }, { "epoch": 17.95, "learning_rate": 4.103075647230456e-05, "loss": 2.2012, "step": 6199500 }, { "epoch": 17.95, "learning_rate": 4.1030032824657284e-05, "loss": 2.2374, "step": 6200000 }, { "epoch": 17.95, "learning_rate": 4.1029309177010006e-05, "loss": 2.2485, "step": 6200500 }, { "epoch": 17.95, "learning_rate": 4.102858697665802e-05, "loss": 2.2132, "step": 6201000 }, { "epoch": 17.95, "learning_rate": 4.1027863329010744e-05, "loss": 2.2252, "step": 6201500 }, { "epoch": 17.95, "learning_rate": 4.1027139681363466e-05, "loss": 2.2083, "step": 6202000 }, { "epoch": 17.95, "learning_rate": 4.1026416033716195e-05, "loss": 2.203, "step": 6202500 }, { "epoch": 17.96, "learning_rate": 4.102569238606892e-05, "loss": 2.2262, "step": 6203000 }, { "epoch": 17.96, "learning_rate": 4.102497018571693e-05, "loss": 2.2281, "step": 6203500 }, { "epoch": 17.96, "learning_rate": 4.1024246538069655e-05, "loss": 2.2032, "step": 6204000 }, { "epoch": 17.96, "learning_rate": 4.1023522890422384e-05, "loss": 2.2317, "step": 6204500 }, { "epoch": 17.96, "learning_rate": 4.1022799242775106e-05, "loss": 2.2337, "step": 6205000 }, { "epoch": 17.96, "learning_rate": 4.102207559512783e-05, "loss": 2.2321, "step": 6205500 }, { "epoch": 17.96, "learning_rate": 4.102135194748055e-05, "loss": 2.2385, "step": 6206000 }, { "epoch": 17.97, "learning_rate": 4.102062829983327e-05, "loss": 2.2238, "step": 6206500 }, { "epoch": 17.97, "learning_rate": 4.1019904652185995e-05, "loss": 2.2098, "step": 6207000 }, { "epoch": 17.97, "learning_rate": 4.101918100453872e-05, "loss": 2.2322, "step": 6207500 }, { "epoch": 17.97, "learning_rate": 4.1018457356891447e-05, "loss": 2.2176, "step": 6208000 }, { "epoch": 17.97, "learning_rate": 4.101773370924417e-05, "loss": 2.2117, "step": 6208500 }, { "epoch": 17.97, "learning_rate": 4.1017011508892184e-05, "loss": 2.2323, "step": 6209000 }, { "epoch": 17.97, "learning_rate": 4.1016287861244907e-05, "loss": 2.2372, "step": 6209500 }, { "epoch": 17.98, "learning_rate": 4.101556421359763e-05, "loss": 2.2379, "step": 6210000 }, { "epoch": 17.98, "learning_rate": 4.101484056595035e-05, "loss": 2.2254, "step": 6210500 }, { "epoch": 17.98, "learning_rate": 4.101411691830307e-05, "loss": 2.221, "step": 6211000 }, { "epoch": 17.98, "learning_rate": 4.1013393270655796e-05, "loss": 2.2158, "step": 6211500 }, { "epoch": 17.98, "learning_rate": 4.1012669623008525e-05, "loss": 2.2298, "step": 6212000 }, { "epoch": 17.98, "learning_rate": 4.101194597536125e-05, "loss": 2.2545, "step": 6212500 }, { "epoch": 17.98, "learning_rate": 4.101122232771397e-05, "loss": 2.2317, "step": 6213000 }, { "epoch": 17.99, "learning_rate": 4.101049868006669e-05, "loss": 2.1958, "step": 6213500 }, { "epoch": 17.99, "learning_rate": 4.1009776479714714e-05, "loss": 2.2104, "step": 6214000 }, { "epoch": 17.99, "learning_rate": 4.100905427936273e-05, "loss": 2.2432, "step": 6214500 }, { "epoch": 17.99, "learning_rate": 4.100833063171545e-05, "loss": 2.2369, "step": 6215000 }, { "epoch": 17.99, "learning_rate": 4.1007606984068174e-05, "loss": 2.2299, "step": 6215500 }, { "epoch": 17.99, "learning_rate": 4.1006883336420896e-05, "loss": 2.2177, "step": 6216000 }, { "epoch": 17.99, "learning_rate": 4.100615968877362e-05, "loss": 2.2005, "step": 6216500 }, { "epoch": 18.0, "learning_rate": 4.100543604112635e-05, "loss": 2.2107, "step": 6217000 }, { "epoch": 18.0, "learning_rate": 4.100471239347907e-05, "loss": 2.1942, "step": 6217500 }, { "epoch": 18.0, "learning_rate": 4.100398874583179e-05, "loss": 2.1974, "step": 6218000 }, { "epoch": 18.0, "eval_accuracy": 0.6574707416013661, "eval_accuracy_mlm": 0.621008649420185, "eval_accuracy_nsp": 0.8532239806043274, "eval_loss": 2.247318744659424, "eval_runtime": 330.4145, "eval_samples_per_second": 1320.723, "eval_steps_per_second": 55.031, "step": 6218496 }, { "epoch": 18.0, "learning_rate": 4.1003265098184514e-05, "loss": 2.2094, "step": 6218500 }, { "epoch": 18.0, "learning_rate": 4.100254145053724e-05, "loss": 2.1805, "step": 6219000 }, { "epoch": 18.0, "learning_rate": 4.1001817802889965e-05, "loss": 2.2035, "step": 6219500 }, { "epoch": 18.0, "learning_rate": 4.100109415524269e-05, "loss": 2.186, "step": 6220000 }, { "epoch": 18.01, "learning_rate": 4.100037050759541e-05, "loss": 2.187, "step": 6220500 }, { "epoch": 18.01, "learning_rate": 4.0999648307243425e-05, "loss": 2.1996, "step": 6221000 }, { "epoch": 18.01, "learning_rate": 4.099892465959615e-05, "loss": 2.1806, "step": 6221500 }, { "epoch": 18.01, "learning_rate": 4.099820101194887e-05, "loss": 2.2009, "step": 6222000 }, { "epoch": 18.01, "learning_rate": 4.09974773643016e-05, "loss": 2.1903, "step": 6222500 }, { "epoch": 18.01, "learning_rate": 4.099675371665432e-05, "loss": 2.1659, "step": 6223000 }, { "epoch": 18.01, "learning_rate": 4.099603006900704e-05, "loss": 2.2044, "step": 6223500 }, { "epoch": 18.02, "learning_rate": 4.0995306421359765e-05, "loss": 2.1927, "step": 6224000 }, { "epoch": 18.02, "learning_rate": 4.099458277371249e-05, "loss": 2.1789, "step": 6224500 }, { "epoch": 18.02, "learning_rate": 4.099385912606521e-05, "loss": 2.1911, "step": 6225000 }, { "epoch": 18.02, "learning_rate": 4.099313837300852e-05, "loss": 2.2013, "step": 6225500 }, { "epoch": 18.02, "learning_rate": 4.099241472536125e-05, "loss": 2.2245, "step": 6226000 }, { "epoch": 18.02, "learning_rate": 4.099169107771397e-05, "loss": 2.1782, "step": 6226500 }, { "epoch": 18.02, "learning_rate": 4.09909674300667e-05, "loss": 2.2054, "step": 6227000 }, { "epoch": 18.03, "learning_rate": 4.099024378241942e-05, "loss": 2.2007, "step": 6227500 }, { "epoch": 18.03, "learning_rate": 4.098952013477214e-05, "loss": 2.1931, "step": 6228000 }, { "epoch": 18.03, "learning_rate": 4.098879793442016e-05, "loss": 2.1865, "step": 6228500 }, { "epoch": 18.03, "learning_rate": 4.098807428677288e-05, "loss": 2.1988, "step": 6229000 }, { "epoch": 18.03, "learning_rate": 4.09873506391256e-05, "loss": 2.2037, "step": 6229500 }, { "epoch": 18.03, "learning_rate": 4.098662843877362e-05, "loss": 2.203, "step": 6230000 }, { "epoch": 18.03, "learning_rate": 4.098590623842164e-05, "loss": 2.1995, "step": 6230500 }, { "epoch": 18.04, "learning_rate": 4.098518403806966e-05, "loss": 2.1877, "step": 6231000 }, { "epoch": 18.04, "learning_rate": 4.098446039042238e-05, "loss": 2.2022, "step": 6231500 }, { "epoch": 18.04, "learning_rate": 4.09837367427751e-05, "loss": 2.1848, "step": 6232000 }, { "epoch": 18.04, "learning_rate": 4.0983013095127823e-05, "loss": 2.1901, "step": 6232500 }, { "epoch": 18.04, "learning_rate": 4.0982289447480546e-05, "loss": 2.1981, "step": 6233000 }, { "epoch": 18.04, "learning_rate": 4.0981565799833275e-05, "loss": 2.2141, "step": 6233500 }, { "epoch": 18.04, "learning_rate": 4.0980842152186e-05, "loss": 2.2312, "step": 6234000 }, { "epoch": 18.05, "learning_rate": 4.0980118504538726e-05, "loss": 2.2281, "step": 6234500 }, { "epoch": 18.05, "learning_rate": 4.097939485689145e-05, "loss": 2.2172, "step": 6235000 }, { "epoch": 18.05, "learning_rate": 4.097867120924417e-05, "loss": 2.2217, "step": 6235500 }, { "epoch": 18.05, "learning_rate": 4.097794756159689e-05, "loss": 2.1958, "step": 6236000 }, { "epoch": 18.05, "learning_rate": 4.0977223913949615e-05, "loss": 2.1713, "step": 6236500 }, { "epoch": 18.05, "learning_rate": 4.097650026630234e-05, "loss": 2.2056, "step": 6237000 }, { "epoch": 18.06, "learning_rate": 4.097577661865506e-05, "loss": 2.2119, "step": 6237500 }, { "epoch": 18.06, "learning_rate": 4.0975054418303075e-05, "loss": 2.236, "step": 6238000 }, { "epoch": 18.06, "learning_rate": 4.09743307706558e-05, "loss": 2.1786, "step": 6238500 }, { "epoch": 18.06, "learning_rate": 4.0973607123008526e-05, "loss": 2.2054, "step": 6239000 }, { "epoch": 18.06, "learning_rate": 4.097288492265654e-05, "loss": 2.2009, "step": 6239500 }, { "epoch": 18.06, "learning_rate": 4.0972161275009264e-05, "loss": 2.2083, "step": 6240000 }, { "epoch": 18.06, "learning_rate": 4.0971437627361986e-05, "loss": 2.1918, "step": 6240500 }, { "epoch": 18.07, "learning_rate": 4.097071397971471e-05, "loss": 2.2003, "step": 6241000 }, { "epoch": 18.07, "learning_rate": 4.096999033206743e-05, "loss": 2.1908, "step": 6241500 }, { "epoch": 18.07, "learning_rate": 4.096926668442016e-05, "loss": 2.1986, "step": 6242000 }, { "epoch": 18.07, "learning_rate": 4.096854303677288e-05, "loss": 2.2173, "step": 6242500 }, { "epoch": 18.07, "learning_rate": 4.0967819389125604e-05, "loss": 2.185, "step": 6243000 }, { "epoch": 18.07, "learning_rate": 4.0967095741478326e-05, "loss": 2.2058, "step": 6243500 }, { "epoch": 18.07, "learning_rate": 4.096637209383105e-05, "loss": 2.2128, "step": 6244000 }, { "epoch": 18.08, "learning_rate": 4.096564989347907e-05, "loss": 2.1966, "step": 6244500 }, { "epoch": 18.08, "learning_rate": 4.096492624583179e-05, "loss": 2.1975, "step": 6245000 }, { "epoch": 18.08, "learning_rate": 4.0964202598184515e-05, "loss": 2.2148, "step": 6245500 }, { "epoch": 18.08, "learning_rate": 4.096348039783253e-05, "loss": 2.207, "step": 6246000 }, { "epoch": 18.08, "learning_rate": 4.096275675018525e-05, "loss": 2.2186, "step": 6246500 }, { "epoch": 18.08, "learning_rate": 4.0962033102537975e-05, "loss": 2.1778, "step": 6247000 }, { "epoch": 18.08, "learning_rate": 4.09613094548907e-05, "loss": 2.221, "step": 6247500 }, { "epoch": 18.09, "learning_rate": 4.0960585807243427e-05, "loss": 2.213, "step": 6248000 }, { "epoch": 18.09, "learning_rate": 4.095986360689144e-05, "loss": 2.212, "step": 6248500 }, { "epoch": 18.09, "learning_rate": 4.0959139959244164e-05, "loss": 2.1738, "step": 6249000 }, { "epoch": 18.09, "learning_rate": 4.0958416311596893e-05, "loss": 2.2217, "step": 6249500 }, { "epoch": 18.09, "learning_rate": 4.0957692663949616e-05, "loss": 2.2128, "step": 6250000 }, { "epoch": 18.09, "learning_rate": 4.095696901630234e-05, "loss": 2.2102, "step": 6250500 }, { "epoch": 18.09, "learning_rate": 4.095624536865506e-05, "loss": 2.1828, "step": 6251000 }, { "epoch": 18.1, "learning_rate": 4.095552172100778e-05, "loss": 2.2074, "step": 6251500 }, { "epoch": 18.1, "learning_rate": 4.0954798073360505e-05, "loss": 2.2163, "step": 6252000 }, { "epoch": 18.1, "learning_rate": 4.095407442571323e-05, "loss": 2.2057, "step": 6252500 }, { "epoch": 18.1, "learning_rate": 4.095335077806595e-05, "loss": 2.2075, "step": 6253000 }, { "epoch": 18.1, "learning_rate": 4.095262857771397e-05, "loss": 2.2223, "step": 6253500 }, { "epoch": 18.1, "learning_rate": 4.095190637736199e-05, "loss": 2.1895, "step": 6254000 }, { "epoch": 18.1, "learning_rate": 4.095118272971471e-05, "loss": 2.2077, "step": 6254500 }, { "epoch": 18.11, "learning_rate": 4.095045908206743e-05, "loss": 2.1956, "step": 6255000 }, { "epoch": 18.11, "learning_rate": 4.0949735434420154e-05, "loss": 2.2121, "step": 6255500 }, { "epoch": 18.11, "learning_rate": 4.0949011786772876e-05, "loss": 2.224, "step": 6256000 }, { "epoch": 18.11, "learning_rate": 4.0948288139125605e-05, "loss": 2.1935, "step": 6256500 }, { "epoch": 18.11, "learning_rate": 4.094756593877363e-05, "loss": 2.2131, "step": 6257000 }, { "epoch": 18.11, "learning_rate": 4.094684373842164e-05, "loss": 2.1954, "step": 6257500 }, { "epoch": 18.11, "learning_rate": 4.0946120090774365e-05, "loss": 2.2209, "step": 6258000 }, { "epoch": 18.12, "learning_rate": 4.094539644312709e-05, "loss": 2.1848, "step": 6258500 }, { "epoch": 18.12, "learning_rate": 4.094467279547981e-05, "loss": 2.2127, "step": 6259000 }, { "epoch": 18.12, "learning_rate": 4.0943950595127825e-05, "loss": 2.1965, "step": 6259500 }, { "epoch": 18.12, "learning_rate": 4.0943226947480554e-05, "loss": 2.1881, "step": 6260000 }, { "epoch": 18.12, "learning_rate": 4.094250474712857e-05, "loss": 2.1915, "step": 6260500 }, { "epoch": 18.12, "learning_rate": 4.094178109948129e-05, "loss": 2.2042, "step": 6261000 }, { "epoch": 18.12, "learning_rate": 4.0941057451834014e-05, "loss": 2.2127, "step": 6261500 }, { "epoch": 18.13, "learning_rate": 4.0940333804186736e-05, "loss": 2.195, "step": 6262000 }, { "epoch": 18.13, "learning_rate": 4.093961015653946e-05, "loss": 2.1942, "step": 6262500 }, { "epoch": 18.13, "learning_rate": 4.093888650889218e-05, "loss": 2.1827, "step": 6263000 }, { "epoch": 18.13, "learning_rate": 4.09381628612449e-05, "loss": 2.1874, "step": 6263500 }, { "epoch": 18.13, "learning_rate": 4.0937439213597625e-05, "loss": 2.215, "step": 6264000 }, { "epoch": 18.13, "learning_rate": 4.0936715565950354e-05, "loss": 2.2031, "step": 6264500 }, { "epoch": 18.13, "learning_rate": 4.0935991918303076e-05, "loss": 2.252, "step": 6265000 }, { "epoch": 18.14, "learning_rate": 4.0935268270655805e-05, "loss": 2.2281, "step": 6265500 }, { "epoch": 18.14, "learning_rate": 4.093454462300853e-05, "loss": 2.1939, "step": 6266000 }, { "epoch": 18.14, "learning_rate": 4.093382097536125e-05, "loss": 2.2146, "step": 6266500 }, { "epoch": 18.14, "learning_rate": 4.0933098775009265e-05, "loss": 2.1937, "step": 6267000 }, { "epoch": 18.14, "learning_rate": 4.093237512736199e-05, "loss": 2.202, "step": 6267500 }, { "epoch": 18.14, "learning_rate": 4.093165147971471e-05, "loss": 2.1918, "step": 6268000 }, { "epoch": 18.14, "learning_rate": 4.093092783206743e-05, "loss": 2.207, "step": 6268500 }, { "epoch": 18.15, "learning_rate": 4.0930205631715455e-05, "loss": 2.2055, "step": 6269000 }, { "epoch": 18.15, "learning_rate": 4.092948343136347e-05, "loss": 2.2198, "step": 6269500 }, { "epoch": 18.15, "learning_rate": 4.0928761231011486e-05, "loss": 2.22, "step": 6270000 }, { "epoch": 18.15, "learning_rate": 4.092803758336421e-05, "loss": 2.2112, "step": 6270500 }, { "epoch": 18.15, "learning_rate": 4.092731393571693e-05, "loss": 2.1897, "step": 6271000 }, { "epoch": 18.15, "learning_rate": 4.092659028806965e-05, "loss": 2.2115, "step": 6271500 }, { "epoch": 18.15, "learning_rate": 4.092586664042238e-05, "loss": 2.2236, "step": 6272000 }, { "epoch": 18.16, "learning_rate": 4.0925142992775104e-05, "loss": 2.2052, "step": 6272500 }, { "epoch": 18.16, "learning_rate": 4.0924420792423126e-05, "loss": 2.2038, "step": 6273000 }, { "epoch": 18.16, "learning_rate": 4.092369714477585e-05, "loss": 2.2078, "step": 6273500 }, { "epoch": 18.16, "learning_rate": 4.092297349712857e-05, "loss": 2.2206, "step": 6274000 }, { "epoch": 18.16, "learning_rate": 4.0922251296776586e-05, "loss": 2.1661, "step": 6274500 }, { "epoch": 18.16, "learning_rate": 4.092152764912931e-05, "loss": 2.2155, "step": 6275000 }, { "epoch": 18.17, "learning_rate": 4.092080400148203e-05, "loss": 2.2066, "step": 6275500 }, { "epoch": 18.17, "learning_rate": 4.092008035383475e-05, "loss": 2.2096, "step": 6276000 }, { "epoch": 18.17, "learning_rate": 4.091935670618748e-05, "loss": 2.2022, "step": 6276500 }, { "epoch": 18.17, "learning_rate": 4.0918633058540204e-05, "loss": 2.195, "step": 6277000 }, { "epoch": 18.17, "learning_rate": 4.091791085818822e-05, "loss": 2.2153, "step": 6277500 }, { "epoch": 18.17, "learning_rate": 4.091718721054094e-05, "loss": 2.1798, "step": 6278000 }, { "epoch": 18.17, "learning_rate": 4.0916463562893664e-05, "loss": 2.1871, "step": 6278500 }, { "epoch": 18.18, "learning_rate": 4.091574136254168e-05, "loss": 2.1961, "step": 6279000 }, { "epoch": 18.18, "learning_rate": 4.09150177148944e-05, "loss": 2.2195, "step": 6279500 }, { "epoch": 18.18, "learning_rate": 4.091429406724713e-05, "loss": 2.2057, "step": 6280000 }, { "epoch": 18.18, "learning_rate": 4.091357041959985e-05, "loss": 2.1976, "step": 6280500 }, { "epoch": 18.18, "learning_rate": 4.091284677195258e-05, "loss": 2.212, "step": 6281000 }, { "epoch": 18.18, "learning_rate": 4.0912123124305304e-05, "loss": 2.2237, "step": 6281500 }, { "epoch": 18.18, "learning_rate": 4.0911399476658026e-05, "loss": 2.2127, "step": 6282000 }, { "epoch": 18.19, "learning_rate": 4.091067582901075e-05, "loss": 2.1955, "step": 6282500 }, { "epoch": 18.19, "learning_rate": 4.090995218136347e-05, "loss": 2.1986, "step": 6283000 }, { "epoch": 18.19, "learning_rate": 4.090922853371619e-05, "loss": 2.2097, "step": 6283500 }, { "epoch": 18.19, "learning_rate": 4.0908504886068915e-05, "loss": 2.213, "step": 6284000 }, { "epoch": 18.19, "learning_rate": 4.090778123842164e-05, "loss": 2.1966, "step": 6284500 }, { "epoch": 18.19, "learning_rate": 4.090705759077436e-05, "loss": 2.1968, "step": 6285000 }, { "epoch": 18.19, "learning_rate": 4.090633394312708e-05, "loss": 2.2055, "step": 6285500 }, { "epoch": 18.2, "learning_rate": 4.0905611742775104e-05, "loss": 2.2165, "step": 6286000 }, { "epoch": 18.2, "learning_rate": 4.0904888095127827e-05, "loss": 2.1856, "step": 6286500 }, { "epoch": 18.2, "learning_rate": 4.0904164447480556e-05, "loss": 2.2236, "step": 6287000 }, { "epoch": 18.2, "learning_rate": 4.090344079983328e-05, "loss": 2.203, "step": 6287500 }, { "epoch": 18.2, "learning_rate": 4.0902717152186e-05, "loss": 2.2089, "step": 6288000 }, { "epoch": 18.2, "learning_rate": 4.090199350453872e-05, "loss": 2.1855, "step": 6288500 }, { "epoch": 18.2, "learning_rate": 4.0901269856891445e-05, "loss": 2.1925, "step": 6289000 }, { "epoch": 18.21, "learning_rate": 4.090054620924417e-05, "loss": 2.187, "step": 6289500 }, { "epoch": 18.21, "learning_rate": 4.089982256159689e-05, "loss": 2.1972, "step": 6290000 }, { "epoch": 18.21, "learning_rate": 4.0899100361244905e-05, "loss": 2.2174, "step": 6290500 }, { "epoch": 18.21, "learning_rate": 4.0898376713597634e-05, "loss": 2.2285, "step": 6291000 }, { "epoch": 18.21, "learning_rate": 4.0897653065950356e-05, "loss": 2.2031, "step": 6291500 }, { "epoch": 18.21, "learning_rate": 4.089692941830308e-05, "loss": 2.1896, "step": 6292000 }, { "epoch": 18.21, "learning_rate": 4.08962057706558e-05, "loss": 2.1824, "step": 6292500 }, { "epoch": 18.22, "learning_rate": 4.089548212300852e-05, "loss": 2.214, "step": 6293000 }, { "epoch": 18.22, "learning_rate": 4.089475992265654e-05, "loss": 2.1879, "step": 6293500 }, { "epoch": 18.22, "learning_rate": 4.089403627500926e-05, "loss": 2.2092, "step": 6294000 }, { "epoch": 18.22, "learning_rate": 4.089331262736199e-05, "loss": 2.2006, "step": 6294500 }, { "epoch": 18.22, "learning_rate": 4.089258897971471e-05, "loss": 2.1957, "step": 6295000 }, { "epoch": 18.22, "learning_rate": 4.0891865332067434e-05, "loss": 2.2209, "step": 6295500 }, { "epoch": 18.22, "learning_rate": 4.0891141684420156e-05, "loss": 2.2028, "step": 6296000 }, { "epoch": 18.23, "learning_rate": 4.0890418036772885e-05, "loss": 2.1951, "step": 6296500 }, { "epoch": 18.23, "learning_rate": 4.088969438912561e-05, "loss": 2.2108, "step": 6297000 }, { "epoch": 18.23, "learning_rate": 4.088897074147833e-05, "loss": 2.238, "step": 6297500 }, { "epoch": 18.23, "learning_rate": 4.088824709383105e-05, "loss": 2.1793, "step": 6298000 }, { "epoch": 18.23, "learning_rate": 4.0887523446183774e-05, "loss": 2.2006, "step": 6298500 }, { "epoch": 18.23, "learning_rate": 4.0886799798536496e-05, "loss": 2.2033, "step": 6299000 }, { "epoch": 18.23, "learning_rate": 4.088607759818451e-05, "loss": 2.2051, "step": 6299500 }, { "epoch": 18.24, "learning_rate": 4.0885353950537234e-05, "loss": 2.2029, "step": 6300000 }, { "epoch": 18.24, "learning_rate": 4.0884630302889956e-05, "loss": 2.218, "step": 6300500 }, { "epoch": 18.24, "learning_rate": 4.0883906655242685e-05, "loss": 2.1918, "step": 6301000 }, { "epoch": 18.24, "learning_rate": 4.088318300759541e-05, "loss": 2.1989, "step": 6301500 }, { "epoch": 18.24, "learning_rate": 4.0882459359948136e-05, "loss": 2.1754, "step": 6302000 }, { "epoch": 18.24, "learning_rate": 4.088173571230086e-05, "loss": 2.1951, "step": 6302500 }, { "epoch": 18.24, "learning_rate": 4.088101206465358e-05, "loss": 2.2137, "step": 6303000 }, { "epoch": 18.25, "learning_rate": 4.08802884170063e-05, "loss": 2.2152, "step": 6303500 }, { "epoch": 18.25, "learning_rate": 4.0879564769359025e-05, "loss": 2.2405, "step": 6304000 }, { "epoch": 18.25, "learning_rate": 4.087884112171175e-05, "loss": 2.2363, "step": 6304500 }, { "epoch": 18.25, "learning_rate": 4.087811747406447e-05, "loss": 2.2048, "step": 6305000 }, { "epoch": 18.25, "learning_rate": 4.087739382641719e-05, "loss": 2.2139, "step": 6305500 }, { "epoch": 18.25, "learning_rate": 4.087667162606521e-05, "loss": 2.203, "step": 6306000 }, { "epoch": 18.25, "learning_rate": 4.087594797841794e-05, "loss": 2.2341, "step": 6306500 }, { "epoch": 18.26, "learning_rate": 4.087522433077066e-05, "loss": 2.2078, "step": 6307000 }, { "epoch": 18.26, "learning_rate": 4.0874502130418674e-05, "loss": 2.2169, "step": 6307500 }, { "epoch": 18.26, "learning_rate": 4.08737784827714e-05, "loss": 2.1963, "step": 6308000 }, { "epoch": 18.26, "learning_rate": 4.087305483512412e-05, "loss": 2.2266, "step": 6308500 }, { "epoch": 18.26, "learning_rate": 4.087233118747685e-05, "loss": 2.2118, "step": 6309000 }, { "epoch": 18.26, "learning_rate": 4.087160753982957e-05, "loss": 2.2246, "step": 6309500 }, { "epoch": 18.26, "learning_rate": 4.087088389218229e-05, "loss": 2.168, "step": 6310000 }, { "epoch": 18.27, "learning_rate": 4.087016169183031e-05, "loss": 2.2174, "step": 6310500 }, { "epoch": 18.27, "learning_rate": 4.086943949147833e-05, "loss": 2.1978, "step": 6311000 }, { "epoch": 18.27, "learning_rate": 4.086871584383105e-05, "loss": 2.2121, "step": 6311500 }, { "epoch": 18.27, "learning_rate": 4.0867992196183775e-05, "loss": 2.2253, "step": 6312000 }, { "epoch": 18.27, "learning_rate": 4.086726999583179e-05, "loss": 2.2224, "step": 6312500 }, { "epoch": 18.27, "learning_rate": 4.086654634818451e-05, "loss": 2.1889, "step": 6313000 }, { "epoch": 18.27, "learning_rate": 4.0865822700537235e-05, "loss": 2.1993, "step": 6313500 }, { "epoch": 18.28, "learning_rate": 4.086509905288996e-05, "loss": 2.2033, "step": 6314000 }, { "epoch": 18.28, "learning_rate": 4.0864375405242686e-05, "loss": 2.213, "step": 6314500 }, { "epoch": 18.28, "learning_rate": 4.086365175759541e-05, "loss": 2.1991, "step": 6315000 }, { "epoch": 18.28, "learning_rate": 4.086292810994813e-05, "loss": 2.223, "step": 6315500 }, { "epoch": 18.28, "learning_rate": 4.086220446230085e-05, "loss": 2.2149, "step": 6316000 }, { "epoch": 18.28, "learning_rate": 4.086148081465358e-05, "loss": 2.1947, "step": 6316500 }, { "epoch": 18.29, "learning_rate": 4.0860757167006304e-05, "loss": 2.207, "step": 6317000 }, { "epoch": 18.29, "learning_rate": 4.0860033519359026e-05, "loss": 2.198, "step": 6317500 }, { "epoch": 18.29, "learning_rate": 4.0859312766302335e-05, "loss": 2.194, "step": 6318000 }, { "epoch": 18.29, "learning_rate": 4.085858911865506e-05, "loss": 2.2228, "step": 6318500 }, { "epoch": 18.29, "learning_rate": 4.0857865471007786e-05, "loss": 2.2279, "step": 6319000 }, { "epoch": 18.29, "learning_rate": 4.08571432706558e-05, "loss": 2.2013, "step": 6319500 }, { "epoch": 18.29, "learning_rate": 4.0856419623008524e-05, "loss": 2.1864, "step": 6320000 }, { "epoch": 18.3, "learning_rate": 4.0855695975361246e-05, "loss": 2.1955, "step": 6320500 }, { "epoch": 18.3, "learning_rate": 4.085497232771397e-05, "loss": 2.2256, "step": 6321000 }, { "epoch": 18.3, "learning_rate": 4.085424868006669e-05, "loss": 2.2078, "step": 6321500 }, { "epoch": 18.3, "learning_rate": 4.085352503241941e-05, "loss": 2.2152, "step": 6322000 }, { "epoch": 18.3, "learning_rate": 4.0852801384772135e-05, "loss": 2.2024, "step": 6322500 }, { "epoch": 18.3, "learning_rate": 4.085207773712486e-05, "loss": 2.1948, "step": 6323000 }, { "epoch": 18.3, "learning_rate": 4.0851354089477587e-05, "loss": 2.2103, "step": 6323500 }, { "epoch": 18.31, "learning_rate": 4.085063044183031e-05, "loss": 2.2078, "step": 6324000 }, { "epoch": 18.31, "learning_rate": 4.084990679418304e-05, "loss": 2.1948, "step": 6324500 }, { "epoch": 18.31, "learning_rate": 4.084918314653576e-05, "loss": 2.2235, "step": 6325000 }, { "epoch": 18.31, "learning_rate": 4.084845949888848e-05, "loss": 2.1937, "step": 6325500 }, { "epoch": 18.31, "learning_rate": 4.0847735851241204e-05, "loss": 2.1923, "step": 6326000 }, { "epoch": 18.31, "learning_rate": 4.084701220359393e-05, "loss": 2.1962, "step": 6326500 }, { "epoch": 18.31, "learning_rate": 4.084628855594665e-05, "loss": 2.1993, "step": 6327000 }, { "epoch": 18.32, "learning_rate": 4.084556490829937e-05, "loss": 2.2362, "step": 6327500 }, { "epoch": 18.32, "learning_rate": 4.0844841260652093e-05, "loss": 2.2172, "step": 6328000 }, { "epoch": 18.32, "learning_rate": 4.0844117613004816e-05, "loss": 2.2263, "step": 6328500 }, { "epoch": 18.32, "learning_rate": 4.084339685994813e-05, "loss": 2.2187, "step": 6329000 }, { "epoch": 18.32, "learning_rate": 4.0842673212300854e-05, "loss": 2.2171, "step": 6329500 }, { "epoch": 18.32, "learning_rate": 4.084195101194887e-05, "loss": 2.2261, "step": 6330000 }, { "epoch": 18.32, "learning_rate": 4.084122736430159e-05, "loss": 2.2247, "step": 6330500 }, { "epoch": 18.33, "learning_rate": 4.0840503716654314e-05, "loss": 2.2004, "step": 6331000 }, { "epoch": 18.33, "learning_rate": 4.083978006900704e-05, "loss": 2.218, "step": 6331500 }, { "epoch": 18.33, "learning_rate": 4.0839056421359765e-05, "loss": 2.2125, "step": 6332000 }, { "epoch": 18.33, "learning_rate": 4.083833422100779e-05, "loss": 2.2138, "step": 6332500 }, { "epoch": 18.33, "learning_rate": 4.083761057336051e-05, "loss": 2.2387, "step": 6333000 }, { "epoch": 18.33, "learning_rate": 4.083688692571323e-05, "loss": 2.2185, "step": 6333500 }, { "epoch": 18.33, "learning_rate": 4.0836163278065954e-05, "loss": 2.2142, "step": 6334000 }, { "epoch": 18.34, "learning_rate": 4.0835439630418676e-05, "loss": 2.2031, "step": 6334500 }, { "epoch": 18.34, "learning_rate": 4.08347159827714e-05, "loss": 2.2188, "step": 6335000 }, { "epoch": 18.34, "learning_rate": 4.083399233512412e-05, "loss": 2.2132, "step": 6335500 }, { "epoch": 18.34, "learning_rate": 4.083326868747684e-05, "loss": 2.2129, "step": 6336000 }, { "epoch": 18.34, "learning_rate": 4.0832545039829565e-05, "loss": 2.179, "step": 6336500 }, { "epoch": 18.34, "learning_rate": 4.083182139218229e-05, "loss": 2.2442, "step": 6337000 }, { "epoch": 18.34, "learning_rate": 4.0831097744535016e-05, "loss": 2.2008, "step": 6337500 }, { "epoch": 18.35, "learning_rate": 4.083037554418303e-05, "loss": 2.2204, "step": 6338000 }, { "epoch": 18.35, "learning_rate": 4.082965334383105e-05, "loss": 2.22, "step": 6338500 }, { "epoch": 18.35, "learning_rate": 4.0828929696183776e-05, "loss": 2.209, "step": 6339000 }, { "epoch": 18.35, "learning_rate": 4.08282060485365e-05, "loss": 2.2159, "step": 6339500 }, { "epoch": 18.35, "learning_rate": 4.082748240088922e-05, "loss": 2.2165, "step": 6340000 }, { "epoch": 18.35, "learning_rate": 4.082675875324194e-05, "loss": 2.226, "step": 6340500 }, { "epoch": 18.35, "learning_rate": 4.0826035105594665e-05, "loss": 2.2199, "step": 6341000 }, { "epoch": 18.36, "learning_rate": 4.082531145794739e-05, "loss": 2.2158, "step": 6341500 }, { "epoch": 18.36, "learning_rate": 4.0824587810300117e-05, "loss": 2.2275, "step": 6342000 }, { "epoch": 18.36, "learning_rate": 4.082386416265284e-05, "loss": 2.2004, "step": 6342500 }, { "epoch": 18.36, "learning_rate": 4.082314051500556e-05, "loss": 2.2055, "step": 6343000 }, { "epoch": 18.36, "learning_rate": 4.082241686735828e-05, "loss": 2.2131, "step": 6343500 }, { "epoch": 18.36, "learning_rate": 4.08216946670063e-05, "loss": 2.1978, "step": 6344000 }, { "epoch": 18.36, "learning_rate": 4.082097101935902e-05, "loss": 2.2166, "step": 6344500 }, { "epoch": 18.37, "learning_rate": 4.082024737171174e-05, "loss": 2.1994, "step": 6345000 }, { "epoch": 18.37, "learning_rate": 4.0819523724064465e-05, "loss": 2.2165, "step": 6345500 }, { "epoch": 18.37, "learning_rate": 4.081880152371249e-05, "loss": 2.2125, "step": 6346000 }, { "epoch": 18.37, "learning_rate": 4.081807787606522e-05, "loss": 2.2272, "step": 6346500 }, { "epoch": 18.37, "learning_rate": 4.081735422841794e-05, "loss": 2.2166, "step": 6347000 }, { "epoch": 18.37, "learning_rate": 4.081663058077066e-05, "loss": 2.2138, "step": 6347500 }, { "epoch": 18.37, "learning_rate": 4.081590838041868e-05, "loss": 2.2211, "step": 6348000 }, { "epoch": 18.38, "learning_rate": 4.08151847327714e-05, "loss": 2.2084, "step": 6348500 }, { "epoch": 18.38, "learning_rate": 4.081446108512412e-05, "loss": 2.2263, "step": 6349000 }, { "epoch": 18.38, "learning_rate": 4.0813737437476844e-05, "loss": 2.2108, "step": 6349500 }, { "epoch": 18.38, "learning_rate": 4.0813013789829566e-05, "loss": 2.2173, "step": 6350000 }, { "epoch": 18.38, "learning_rate": 4.081229014218229e-05, "loss": 2.1767, "step": 6350500 }, { "epoch": 18.38, "learning_rate": 4.081156649453502e-05, "loss": 2.2156, "step": 6351000 }, { "epoch": 18.38, "learning_rate": 4.081084284688774e-05, "loss": 2.2319, "step": 6351500 }, { "epoch": 18.39, "learning_rate": 4.081011919924046e-05, "loss": 2.198, "step": 6352000 }, { "epoch": 18.39, "learning_rate": 4.0809395551593184e-05, "loss": 2.2356, "step": 6352500 }, { "epoch": 18.39, "learning_rate": 4.08086733512412e-05, "loss": 2.2196, "step": 6353000 }, { "epoch": 18.39, "learning_rate": 4.080794970359392e-05, "loss": 2.2042, "step": 6353500 }, { "epoch": 18.39, "learning_rate": 4.080722605594665e-05, "loss": 2.1989, "step": 6354000 }, { "epoch": 18.39, "learning_rate": 4.080650240829937e-05, "loss": 2.2192, "step": 6354500 }, { "epoch": 18.4, "learning_rate": 4.0805778760652095e-05, "loss": 2.2049, "step": 6355000 }, { "epoch": 18.4, "learning_rate": 4.080505511300482e-05, "loss": 2.2, "step": 6355500 }, { "epoch": 18.4, "learning_rate": 4.080433146535754e-05, "loss": 2.1992, "step": 6356000 }, { "epoch": 18.4, "learning_rate": 4.080360781771027e-05, "loss": 2.213, "step": 6356500 }, { "epoch": 18.4, "learning_rate": 4.080288417006299e-05, "loss": 2.1852, "step": 6357000 }, { "epoch": 18.4, "learning_rate": 4.0802161969711006e-05, "loss": 2.2163, "step": 6357500 }, { "epoch": 18.4, "learning_rate": 4.080143832206373e-05, "loss": 2.1719, "step": 6358000 }, { "epoch": 18.41, "learning_rate": 4.080071467441645e-05, "loss": 2.2057, "step": 6358500 }, { "epoch": 18.41, "learning_rate": 4.079999102676917e-05, "loss": 2.194, "step": 6359000 }, { "epoch": 18.41, "learning_rate": 4.0799267379121895e-05, "loss": 2.1995, "step": 6359500 }, { "epoch": 18.41, "learning_rate": 4.079854373147462e-05, "loss": 2.1933, "step": 6360000 }, { "epoch": 18.41, "learning_rate": 4.079782008382734e-05, "loss": 2.1939, "step": 6360500 }, { "epoch": 18.41, "learning_rate": 4.079709643618007e-05, "loss": 2.1999, "step": 6361000 }, { "epoch": 18.41, "learning_rate": 4.079637423582809e-05, "loss": 2.2173, "step": 6361500 }, { "epoch": 18.42, "learning_rate": 4.079565058818081e-05, "loss": 2.2237, "step": 6362000 }, { "epoch": 18.42, "learning_rate": 4.0794926940533535e-05, "loss": 2.2159, "step": 6362500 }, { "epoch": 18.42, "learning_rate": 4.079420329288626e-05, "loss": 2.2205, "step": 6363000 }, { "epoch": 18.42, "learning_rate": 4.079347964523898e-05, "loss": 2.2027, "step": 6363500 }, { "epoch": 18.42, "learning_rate": 4.0792758892182296e-05, "loss": 2.2153, "step": 6364000 }, { "epoch": 18.42, "learning_rate": 4.079203524453502e-05, "loss": 2.204, "step": 6364500 }, { "epoch": 18.42, "learning_rate": 4.079131159688774e-05, "loss": 2.2055, "step": 6365000 }, { "epoch": 18.43, "learning_rate": 4.079058794924046e-05, "loss": 2.1951, "step": 6365500 }, { "epoch": 18.43, "learning_rate": 4.0789864301593185e-05, "loss": 2.2341, "step": 6366000 }, { "epoch": 18.43, "learning_rate": 4.078914065394591e-05, "loss": 2.2103, "step": 6366500 }, { "epoch": 18.43, "learning_rate": 4.078841700629863e-05, "loss": 2.2011, "step": 6367000 }, { "epoch": 18.43, "learning_rate": 4.078769335865135e-05, "loss": 2.22, "step": 6367500 }, { "epoch": 18.43, "learning_rate": 4.0786969711004073e-05, "loss": 2.2226, "step": 6368000 }, { "epoch": 18.43, "learning_rate": 4.07862460633568e-05, "loss": 2.192, "step": 6368500 }, { "epoch": 18.44, "learning_rate": 4.078552386300482e-05, "loss": 2.189, "step": 6369000 }, { "epoch": 18.44, "learning_rate": 4.078480021535755e-05, "loss": 2.217, "step": 6369500 }, { "epoch": 18.44, "learning_rate": 4.078407656771027e-05, "loss": 2.1926, "step": 6370000 }, { "epoch": 18.44, "learning_rate": 4.078335292006299e-05, "loss": 2.2171, "step": 6370500 }, { "epoch": 18.44, "learning_rate": 4.0782629272415714e-05, "loss": 2.2168, "step": 6371000 }, { "epoch": 18.44, "learning_rate": 4.078190707206373e-05, "loss": 2.2005, "step": 6371500 }, { "epoch": 18.44, "learning_rate": 4.078118342441645e-05, "loss": 2.2163, "step": 6372000 }, { "epoch": 18.45, "learning_rate": 4.0780459776769174e-05, "loss": 2.2096, "step": 6372500 }, { "epoch": 18.45, "learning_rate": 4.0779736129121896e-05, "loss": 2.1958, "step": 6373000 }, { "epoch": 18.45, "learning_rate": 4.077901248147462e-05, "loss": 2.1901, "step": 6373500 }, { "epoch": 18.45, "learning_rate": 4.077828883382735e-05, "loss": 2.1985, "step": 6374000 }, { "epoch": 18.45, "learning_rate": 4.077756518618007e-05, "loss": 2.2001, "step": 6374500 }, { "epoch": 18.45, "learning_rate": 4.077684153853279e-05, "loss": 2.2285, "step": 6375000 }, { "epoch": 18.45, "learning_rate": 4.0776117890885514e-05, "loss": 2.1884, "step": 6375500 }, { "epoch": 18.46, "learning_rate": 4.077539424323824e-05, "loss": 2.2339, "step": 6376000 }, { "epoch": 18.46, "learning_rate": 4.077467204288626e-05, "loss": 2.223, "step": 6376500 }, { "epoch": 18.46, "learning_rate": 4.077394839523898e-05, "loss": 2.2426, "step": 6377000 }, { "epoch": 18.46, "learning_rate": 4.0773226194886996e-05, "loss": 2.2088, "step": 6377500 }, { "epoch": 18.46, "learning_rate": 4.077250254723972e-05, "loss": 2.1883, "step": 6378000 }, { "epoch": 18.46, "learning_rate": 4.077177889959245e-05, "loss": 2.192, "step": 6378500 }, { "epoch": 18.46, "learning_rate": 4.077105525194517e-05, "loss": 2.2184, "step": 6379000 }, { "epoch": 18.47, "learning_rate": 4.077033160429789e-05, "loss": 2.2105, "step": 6379500 }, { "epoch": 18.47, "learning_rate": 4.0769607956650614e-05, "loss": 2.2115, "step": 6380000 }, { "epoch": 18.47, "learning_rate": 4.0768884309003336e-05, "loss": 2.2089, "step": 6380500 }, { "epoch": 18.47, "learning_rate": 4.076816066135606e-05, "loss": 2.2202, "step": 6381000 }, { "epoch": 18.47, "learning_rate": 4.076743701370878e-05, "loss": 2.2147, "step": 6381500 }, { "epoch": 18.47, "learning_rate": 4.0766716260652097e-05, "loss": 2.2254, "step": 6382000 }, { "epoch": 18.47, "learning_rate": 4.076599406030011e-05, "loss": 2.2161, "step": 6382500 }, { "epoch": 18.48, "learning_rate": 4.0765270412652834e-05, "loss": 2.1944, "step": 6383000 }, { "epoch": 18.48, "learning_rate": 4.0764546765005557e-05, "loss": 2.2124, "step": 6383500 }, { "epoch": 18.48, "learning_rate": 4.0763823117358286e-05, "loss": 2.2334, "step": 6384000 }, { "epoch": 18.48, "learning_rate": 4.076309946971101e-05, "loss": 2.1812, "step": 6384500 }, { "epoch": 18.48, "learning_rate": 4.076237582206373e-05, "loss": 2.1969, "step": 6385000 }, { "epoch": 18.48, "learning_rate": 4.076165217441645e-05, "loss": 2.2115, "step": 6385500 }, { "epoch": 18.48, "learning_rate": 4.076092997406447e-05, "loss": 2.2127, "step": 6386000 }, { "epoch": 18.49, "learning_rate": 4.07602063264172e-05, "loss": 2.2198, "step": 6386500 }, { "epoch": 18.49, "learning_rate": 4.075948267876992e-05, "loss": 2.2301, "step": 6387000 }, { "epoch": 18.49, "learning_rate": 4.075875903112264e-05, "loss": 2.2226, "step": 6387500 }, { "epoch": 18.49, "learning_rate": 4.0758035383475364e-05, "loss": 2.2228, "step": 6388000 }, { "epoch": 18.49, "learning_rate": 4.0757311735828086e-05, "loss": 2.2135, "step": 6388500 }, { "epoch": 18.49, "learning_rate": 4.075658808818081e-05, "loss": 2.1943, "step": 6389000 }, { "epoch": 18.49, "learning_rate": 4.075586444053353e-05, "loss": 2.1971, "step": 6389500 }, { "epoch": 18.5, "learning_rate": 4.075514079288625e-05, "loss": 2.2041, "step": 6390000 }, { "epoch": 18.5, "learning_rate": 4.0754417145238975e-05, "loss": 2.1765, "step": 6390500 }, { "epoch": 18.5, "learning_rate": 4.0753693497591704e-05, "loss": 2.2086, "step": 6391000 }, { "epoch": 18.5, "learning_rate": 4.0752969849944426e-05, "loss": 2.2056, "step": 6391500 }, { "epoch": 18.5, "learning_rate": 4.075224764959245e-05, "loss": 2.22, "step": 6392000 }, { "epoch": 18.5, "learning_rate": 4.075152400194517e-05, "loss": 2.2204, "step": 6392500 }, { "epoch": 18.51, "learning_rate": 4.075080035429789e-05, "loss": 2.2235, "step": 6393000 }, { "epoch": 18.51, "learning_rate": 4.0750076706650615e-05, "loss": 2.2091, "step": 6393500 }, { "epoch": 18.51, "learning_rate": 4.074935305900334e-05, "loss": 2.2001, "step": 6394000 }, { "epoch": 18.51, "learning_rate": 4.0748632305946646e-05, "loss": 2.2021, "step": 6394500 }, { "epoch": 18.51, "learning_rate": 4.0747908658299375e-05, "loss": 2.2117, "step": 6395000 }, { "epoch": 18.51, "learning_rate": 4.07471850106521e-05, "loss": 2.2492, "step": 6395500 }, { "epoch": 18.51, "learning_rate": 4.074646136300482e-05, "loss": 2.2102, "step": 6396000 }, { "epoch": 18.52, "learning_rate": 4.074573771535754e-05, "loss": 2.2228, "step": 6396500 }, { "epoch": 18.52, "learning_rate": 4.0745014067710264e-05, "loss": 2.2011, "step": 6397000 }, { "epoch": 18.52, "learning_rate": 4.0744290420062986e-05, "loss": 2.2178, "step": 6397500 }, { "epoch": 18.52, "learning_rate": 4.0743568219711e-05, "loss": 2.2099, "step": 6398000 }, { "epoch": 18.52, "learning_rate": 4.074284457206373e-05, "loss": 2.2277, "step": 6398500 }, { "epoch": 18.52, "learning_rate": 4.074212092441645e-05, "loss": 2.1893, "step": 6399000 }, { "epoch": 18.52, "learning_rate": 4.0741397276769175e-05, "loss": 2.2072, "step": 6399500 }, { "epoch": 18.53, "learning_rate": 4.07406736291219e-05, "loss": 2.1991, "step": 6400000 }, { "epoch": 18.53, "learning_rate": 4.0739949981474627e-05, "loss": 2.2167, "step": 6400500 }, { "epoch": 18.53, "learning_rate": 4.073922633382735e-05, "loss": 2.2123, "step": 6401000 }, { "epoch": 18.53, "learning_rate": 4.073850268618007e-05, "loss": 2.2247, "step": 6401500 }, { "epoch": 18.53, "learning_rate": 4.073777903853279e-05, "loss": 2.2172, "step": 6402000 }, { "epoch": 18.53, "learning_rate": 4.0737055390885516e-05, "loss": 2.2101, "step": 6402500 }, { "epoch": 18.53, "learning_rate": 4.073633319053353e-05, "loss": 2.2082, "step": 6403000 }, { "epoch": 18.54, "learning_rate": 4.073560954288625e-05, "loss": 2.1909, "step": 6403500 }, { "epoch": 18.54, "learning_rate": 4.0734885895238976e-05, "loss": 2.1951, "step": 6404000 }, { "epoch": 18.54, "learning_rate": 4.07341622475917e-05, "loss": 2.2012, "step": 6404500 }, { "epoch": 18.54, "learning_rate": 4.073344004723972e-05, "loss": 2.2279, "step": 6405000 }, { "epoch": 18.54, "learning_rate": 4.073271639959244e-05, "loss": 2.1974, "step": 6405500 }, { "epoch": 18.54, "learning_rate": 4.0731994199240465e-05, "loss": 2.2066, "step": 6406000 }, { "epoch": 18.54, "learning_rate": 4.073127055159319e-05, "loss": 2.2027, "step": 6406500 }, { "epoch": 18.55, "learning_rate": 4.073054690394591e-05, "loss": 2.1847, "step": 6407000 }, { "epoch": 18.55, "learning_rate": 4.072982325629863e-05, "loss": 2.2049, "step": 6407500 }, { "epoch": 18.55, "learning_rate": 4.0729099608651354e-05, "loss": 2.22, "step": 6408000 }, { "epoch": 18.55, "learning_rate": 4.0728375961004076e-05, "loss": 2.1872, "step": 6408500 }, { "epoch": 18.55, "learning_rate": 4.07276523133568e-05, "loss": 2.1968, "step": 6409000 }, { "epoch": 18.55, "learning_rate": 4.072692866570953e-05, "loss": 2.2271, "step": 6409500 }, { "epoch": 18.55, "learning_rate": 4.072620501806225e-05, "loss": 2.2086, "step": 6410000 }, { "epoch": 18.56, "learning_rate": 4.072548137041497e-05, "loss": 2.2094, "step": 6410500 }, { "epoch": 18.56, "learning_rate": 4.0724757722767694e-05, "loss": 2.1971, "step": 6411000 }, { "epoch": 18.56, "learning_rate": 4.0724034075120416e-05, "loss": 2.218, "step": 6411500 }, { "epoch": 18.56, "learning_rate": 4.072331042747314e-05, "loss": 2.2173, "step": 6412000 }, { "epoch": 18.56, "learning_rate": 4.072258967441645e-05, "loss": 2.1927, "step": 6412500 }, { "epoch": 18.56, "learning_rate": 4.0721866026769176e-05, "loss": 2.2, "step": 6413000 }, { "epoch": 18.56, "learning_rate": 4.07211423791219e-05, "loss": 2.2176, "step": 6413500 }, { "epoch": 18.57, "learning_rate": 4.072041873147463e-05, "loss": 2.205, "step": 6414000 }, { "epoch": 18.57, "learning_rate": 4.071969508382735e-05, "loss": 2.1966, "step": 6414500 }, { "epoch": 18.57, "learning_rate": 4.071897143618007e-05, "loss": 2.1922, "step": 6415000 }, { "epoch": 18.57, "learning_rate": 4.0718247788532794e-05, "loss": 2.2279, "step": 6415500 }, { "epoch": 18.57, "learning_rate": 4.0717524140885516e-05, "loss": 2.2025, "step": 6416000 }, { "epoch": 18.57, "learning_rate": 4.071680194053353e-05, "loss": 2.1905, "step": 6416500 }, { "epoch": 18.57, "learning_rate": 4.071607974018155e-05, "loss": 2.1931, "step": 6417000 }, { "epoch": 18.58, "learning_rate": 4.0715356092534276e-05, "loss": 2.2224, "step": 6417500 }, { "epoch": 18.58, "learning_rate": 4.071463389218229e-05, "loss": 2.2145, "step": 6418000 }, { "epoch": 18.58, "learning_rate": 4.0713910244535014e-05, "loss": 2.1937, "step": 6418500 }, { "epoch": 18.58, "learning_rate": 4.0713186596887736e-05, "loss": 2.2067, "step": 6419000 }, { "epoch": 18.58, "learning_rate": 4.071246294924046e-05, "loss": 2.1892, "step": 6419500 }, { "epoch": 18.58, "learning_rate": 4.071173930159318e-05, "loss": 2.2333, "step": 6420000 }, { "epoch": 18.58, "learning_rate": 4.07110156539459e-05, "loss": 2.2102, "step": 6420500 }, { "epoch": 18.59, "learning_rate": 4.071029200629863e-05, "loss": 2.2314, "step": 6421000 }, { "epoch": 18.59, "learning_rate": 4.0709568358651354e-05, "loss": 2.1897, "step": 6421500 }, { "epoch": 18.59, "learning_rate": 4.070884471100408e-05, "loss": 2.2129, "step": 6422000 }, { "epoch": 18.59, "learning_rate": 4.07081225106521e-05, "loss": 2.1974, "step": 6422500 }, { "epoch": 18.59, "learning_rate": 4.070739886300482e-05, "loss": 2.2034, "step": 6423000 }, { "epoch": 18.59, "learning_rate": 4.070667666265284e-05, "loss": 2.1997, "step": 6423500 }, { "epoch": 18.59, "learning_rate": 4.070595301500556e-05, "loss": 2.2016, "step": 6424000 }, { "epoch": 18.6, "learning_rate": 4.070522936735828e-05, "loss": 2.2145, "step": 6424500 }, { "epoch": 18.6, "learning_rate": 4.0704505719711003e-05, "loss": 2.2172, "step": 6425000 }, { "epoch": 18.6, "learning_rate": 4.0703782072063726e-05, "loss": 2.2065, "step": 6425500 }, { "epoch": 18.6, "learning_rate": 4.0703058424416455e-05, "loss": 2.242, "step": 6426000 }, { "epoch": 18.6, "learning_rate": 4.070233477676918e-05, "loss": 2.2194, "step": 6426500 }, { "epoch": 18.6, "learning_rate": 4.07016111291219e-05, "loss": 2.2052, "step": 6427000 }, { "epoch": 18.6, "learning_rate": 4.070088748147462e-05, "loss": 2.1961, "step": 6427500 }, { "epoch": 18.61, "learning_rate": 4.070016528112264e-05, "loss": 2.2148, "step": 6428000 }, { "epoch": 18.61, "learning_rate": 4.069944308077065e-05, "loss": 2.1986, "step": 6428500 }, { "epoch": 18.61, "learning_rate": 4.069871943312338e-05, "loss": 2.2028, "step": 6429000 }, { "epoch": 18.61, "learning_rate": 4.0697995785476104e-05, "loss": 2.2003, "step": 6429500 }, { "epoch": 18.61, "learning_rate": 4.0697272137828826e-05, "loss": 2.2164, "step": 6430000 }, { "epoch": 18.61, "learning_rate": 4.0696548490181555e-05, "loss": 2.2049, "step": 6430500 }, { "epoch": 18.62, "learning_rate": 4.069582484253428e-05, "loss": 2.2078, "step": 6431000 }, { "epoch": 18.62, "learning_rate": 4.0695101194887e-05, "loss": 2.1875, "step": 6431500 }, { "epoch": 18.62, "learning_rate": 4.069437754723972e-05, "loss": 2.1991, "step": 6432000 }, { "epoch": 18.62, "learning_rate": 4.0693653899592444e-05, "loss": 2.2108, "step": 6432500 }, { "epoch": 18.62, "learning_rate": 4.0692930251945166e-05, "loss": 2.2104, "step": 6433000 }, { "epoch": 18.62, "learning_rate": 4.069220660429789e-05, "loss": 2.2093, "step": 6433500 }, { "epoch": 18.62, "learning_rate": 4.069148295665061e-05, "loss": 2.2284, "step": 6434000 }, { "epoch": 18.63, "learning_rate": 4.069075930900333e-05, "loss": 2.2032, "step": 6434500 }, { "epoch": 18.63, "learning_rate": 4.0690037108651355e-05, "loss": 2.2033, "step": 6435000 }, { "epoch": 18.63, "learning_rate": 4.068931490829937e-05, "loss": 2.2287, "step": 6435500 }, { "epoch": 18.63, "learning_rate": 4.06885912606521e-05, "loss": 2.2279, "step": 6436000 }, { "epoch": 18.63, "learning_rate": 4.068786761300482e-05, "loss": 2.2097, "step": 6436500 }, { "epoch": 18.63, "learning_rate": 4.068714541265284e-05, "loss": 2.2368, "step": 6437000 }, { "epoch": 18.63, "learning_rate": 4.068642176500556e-05, "loss": 2.2298, "step": 6437500 }, { "epoch": 18.64, "learning_rate": 4.068569811735828e-05, "loss": 2.1628, "step": 6438000 }, { "epoch": 18.64, "learning_rate": 4.0684974469711004e-05, "loss": 2.2073, "step": 6438500 }, { "epoch": 18.64, "learning_rate": 4.0684250822063726e-05, "loss": 2.1907, "step": 6439000 }, { "epoch": 18.64, "learning_rate": 4.0683527174416455e-05, "loss": 2.2381, "step": 6439500 }, { "epoch": 18.64, "learning_rate": 4.068280352676918e-05, "loss": 2.2224, "step": 6440000 }, { "epoch": 18.64, "learning_rate": 4.06820798791219e-05, "loss": 2.2251, "step": 6440500 }, { "epoch": 18.64, "learning_rate": 4.068135623147462e-05, "loss": 2.2169, "step": 6441000 }, { "epoch": 18.65, "learning_rate": 4.0680632583827344e-05, "loss": 2.2073, "step": 6441500 }, { "epoch": 18.65, "learning_rate": 4.067990893618007e-05, "loss": 2.1777, "step": 6442000 }, { "epoch": 18.65, "learning_rate": 4.067918528853279e-05, "loss": 2.2194, "step": 6442500 }, { "epoch": 18.65, "learning_rate": 4.067846164088551e-05, "loss": 2.192, "step": 6443000 }, { "epoch": 18.65, "learning_rate": 4.0677740887828834e-05, "loss": 2.2177, "step": 6443500 }, { "epoch": 18.65, "learning_rate": 4.0677017240181556e-05, "loss": 2.1985, "step": 6444000 }, { "epoch": 18.65, "learning_rate": 4.067629359253428e-05, "loss": 2.214, "step": 6444500 }, { "epoch": 18.66, "learning_rate": 4.0675569944887e-05, "loss": 2.2131, "step": 6445000 }, { "epoch": 18.66, "learning_rate": 4.067484629723972e-05, "loss": 2.2063, "step": 6445500 }, { "epoch": 18.66, "learning_rate": 4.0674122649592445e-05, "loss": 2.198, "step": 6446000 }, { "epoch": 18.66, "learning_rate": 4.067339900194517e-05, "loss": 2.2155, "step": 6446500 }, { "epoch": 18.66, "learning_rate": 4.067267535429789e-05, "loss": 2.2089, "step": 6447000 }, { "epoch": 18.66, "learning_rate": 4.067195170665061e-05, "loss": 2.2217, "step": 6447500 }, { "epoch": 18.66, "learning_rate": 4.0671228059003334e-05, "loss": 2.189, "step": 6448000 }, { "epoch": 18.67, "learning_rate": 4.067050730594665e-05, "loss": 2.2384, "step": 6448500 }, { "epoch": 18.67, "learning_rate": 4.066978365829937e-05, "loss": 2.2298, "step": 6449000 }, { "epoch": 18.67, "learning_rate": 4.0669060010652094e-05, "loss": 2.2055, "step": 6449500 }, { "epoch": 18.67, "learning_rate": 4.066833781030011e-05, "loss": 2.2142, "step": 6450000 }, { "epoch": 18.67, "learning_rate": 4.066761416265283e-05, "loss": 2.2111, "step": 6450500 }, { "epoch": 18.67, "learning_rate": 4.0666890515005554e-05, "loss": 2.2139, "step": 6451000 }, { "epoch": 18.67, "learning_rate": 4.066616686735828e-05, "loss": 2.2324, "step": 6451500 }, { "epoch": 18.68, "learning_rate": 4.0665443219711005e-05, "loss": 2.214, "step": 6452000 }, { "epoch": 18.68, "learning_rate": 4.0664719572063734e-05, "loss": 2.2089, "step": 6452500 }, { "epoch": 18.68, "learning_rate": 4.0663995924416456e-05, "loss": 2.2126, "step": 6453000 }, { "epoch": 18.68, "learning_rate": 4.066327372406447e-05, "loss": 2.202, "step": 6453500 }, { "epoch": 18.68, "learning_rate": 4.0662550076417194e-05, "loss": 2.232, "step": 6454000 }, { "epoch": 18.68, "learning_rate": 4.066182787606521e-05, "loss": 2.2082, "step": 6454500 }, { "epoch": 18.68, "learning_rate": 4.066110422841793e-05, "loss": 2.2551, "step": 6455000 }, { "epoch": 18.69, "learning_rate": 4.0660380580770654e-05, "loss": 2.212, "step": 6455500 }, { "epoch": 18.69, "learning_rate": 4.065965693312338e-05, "loss": 2.2215, "step": 6456000 }, { "epoch": 18.69, "learning_rate": 4.0658933285476105e-05, "loss": 2.212, "step": 6456500 }, { "epoch": 18.69, "learning_rate": 4.065820963782883e-05, "loss": 2.2087, "step": 6457000 }, { "epoch": 18.69, "learning_rate": 4.065748599018155e-05, "loss": 2.1818, "step": 6457500 }, { "epoch": 18.69, "learning_rate": 4.065676234253427e-05, "loss": 2.2234, "step": 6458000 }, { "epoch": 18.69, "learning_rate": 4.0656038694887e-05, "loss": 2.2052, "step": 6458500 }, { "epoch": 18.7, "learning_rate": 4.065531504723972e-05, "loss": 2.1988, "step": 6459000 }, { "epoch": 18.7, "learning_rate": 4.065459284688774e-05, "loss": 2.1825, "step": 6459500 }, { "epoch": 18.7, "learning_rate": 4.065386919924046e-05, "loss": 2.1943, "step": 6460000 }, { "epoch": 18.7, "learning_rate": 4.065314555159318e-05, "loss": 2.1997, "step": 6460500 }, { "epoch": 18.7, "learning_rate": 4.0652421903945906e-05, "loss": 2.2089, "step": 6461000 }, { "epoch": 18.7, "learning_rate": 4.0651698256298635e-05, "loss": 2.2063, "step": 6461500 }, { "epoch": 18.7, "learning_rate": 4.065097460865136e-05, "loss": 2.2328, "step": 6462000 }, { "epoch": 18.71, "learning_rate": 4.065025096100408e-05, "loss": 2.2301, "step": 6462500 }, { "epoch": 18.71, "learning_rate": 4.06495273133568e-05, "loss": 2.2358, "step": 6463000 }, { "epoch": 18.71, "learning_rate": 4.0648803665709523e-05, "loss": 2.2062, "step": 6463500 }, { "epoch": 18.71, "learning_rate": 4.0648080018062246e-05, "loss": 2.2141, "step": 6464000 }, { "epoch": 18.71, "learning_rate": 4.064735781771026e-05, "loss": 2.2126, "step": 6464500 }, { "epoch": 18.71, "learning_rate": 4.0646634170062984e-05, "loss": 2.2049, "step": 6465000 }, { "epoch": 18.71, "learning_rate": 4.0645910522415706e-05, "loss": 2.219, "step": 6465500 }, { "epoch": 18.72, "learning_rate": 4.0645186874768435e-05, "loss": 2.2115, "step": 6466000 }, { "epoch": 18.72, "learning_rate": 4.064446322712116e-05, "loss": 2.2268, "step": 6466500 }, { "epoch": 18.72, "learning_rate": 4.0643739579473886e-05, "loss": 2.2169, "step": 6467000 }, { "epoch": 18.72, "learning_rate": 4.064301593182661e-05, "loss": 2.2272, "step": 6467500 }, { "epoch": 18.72, "learning_rate": 4.064229228417933e-05, "loss": 2.22, "step": 6468000 }, { "epoch": 18.72, "learning_rate": 4.064156863653205e-05, "loss": 2.2024, "step": 6468500 }, { "epoch": 18.73, "learning_rate": 4.0640844988884775e-05, "loss": 2.2187, "step": 6469000 }, { "epoch": 18.73, "learning_rate": 4.06401213412375e-05, "loss": 2.1841, "step": 6469500 }, { "epoch": 18.73, "learning_rate": 4.063939769359022e-05, "loss": 2.2127, "step": 6470000 }, { "epoch": 18.73, "learning_rate": 4.0638675493238235e-05, "loss": 2.1938, "step": 6470500 }, { "epoch": 18.73, "learning_rate": 4.063795329288626e-05, "loss": 2.2124, "step": 6471000 }, { "epoch": 18.73, "learning_rate": 4.063722964523898e-05, "loss": 2.1897, "step": 6471500 }, { "epoch": 18.73, "learning_rate": 4.06365059975917e-05, "loss": 2.1927, "step": 6472000 }, { "epoch": 18.74, "learning_rate": 4.0635782349944424e-05, "loss": 2.1959, "step": 6472500 }, { "epoch": 18.74, "learning_rate": 4.0635058702297146e-05, "loss": 2.2161, "step": 6473000 }, { "epoch": 18.74, "learning_rate": 4.0634335054649875e-05, "loss": 2.2139, "step": 6473500 }, { "epoch": 18.74, "learning_rate": 4.06336114070026e-05, "loss": 2.1892, "step": 6474000 }, { "epoch": 18.74, "learning_rate": 4.063288775935532e-05, "loss": 2.1936, "step": 6474500 }, { "epoch": 18.74, "learning_rate": 4.063216411170804e-05, "loss": 2.2137, "step": 6475000 }, { "epoch": 18.74, "learning_rate": 4.063144335865136e-05, "loss": 2.2072, "step": 6475500 }, { "epoch": 18.75, "learning_rate": 4.063071971100408e-05, "loss": 2.2084, "step": 6476000 }, { "epoch": 18.75, "learning_rate": 4.0629997510652095e-05, "loss": 2.2148, "step": 6476500 }, { "epoch": 18.75, "learning_rate": 4.062927386300482e-05, "loss": 2.2339, "step": 6477000 }, { "epoch": 18.75, "learning_rate": 4.062855021535754e-05, "loss": 2.1988, "step": 6477500 }, { "epoch": 18.75, "learning_rate": 4.062782656771026e-05, "loss": 2.2251, "step": 6478000 }, { "epoch": 18.75, "learning_rate": 4.0627102920062984e-05, "loss": 2.2126, "step": 6478500 }, { "epoch": 18.75, "learning_rate": 4.062637927241571e-05, "loss": 2.2027, "step": 6479000 }, { "epoch": 18.76, "learning_rate": 4.0625655624768436e-05, "loss": 2.2218, "step": 6479500 }, { "epoch": 18.76, "learning_rate": 4.062493197712116e-05, "loss": 2.2059, "step": 6480000 }, { "epoch": 18.76, "learning_rate": 4.062420832947388e-05, "loss": 2.2219, "step": 6480500 }, { "epoch": 18.76, "learning_rate": 4.062348468182661e-05, "loss": 2.2047, "step": 6481000 }, { "epoch": 18.76, "learning_rate": 4.0622762481474625e-05, "loss": 2.2378, "step": 6481500 }, { "epoch": 18.76, "learning_rate": 4.062203883382735e-05, "loss": 2.1982, "step": 6482000 }, { "epoch": 18.76, "learning_rate": 4.062131518618007e-05, "loss": 2.229, "step": 6482500 }, { "epoch": 18.77, "learning_rate": 4.062059153853279e-05, "loss": 2.1974, "step": 6483000 }, { "epoch": 18.77, "learning_rate": 4.0619867890885514e-05, "loss": 2.2119, "step": 6483500 }, { "epoch": 18.77, "learning_rate": 4.0619144243238236e-05, "loss": 2.2091, "step": 6484000 }, { "epoch": 18.77, "learning_rate": 4.061842059559096e-05, "loss": 2.2292, "step": 6484500 }, { "epoch": 18.77, "learning_rate": 4.061769694794369e-05, "loss": 2.2074, "step": 6485000 }, { "epoch": 18.77, "learning_rate": 4.061697330029641e-05, "loss": 2.1986, "step": 6485500 }, { "epoch": 18.77, "learning_rate": 4.061625254723972e-05, "loss": 2.1964, "step": 6486000 }, { "epoch": 18.78, "learning_rate": 4.0615530346887734e-05, "loss": 2.2231, "step": 6486500 }, { "epoch": 18.78, "learning_rate": 4.061480669924046e-05, "loss": 2.2258, "step": 6487000 }, { "epoch": 18.78, "learning_rate": 4.0614083051593185e-05, "loss": 2.2199, "step": 6487500 }, { "epoch": 18.78, "learning_rate": 4.061335940394591e-05, "loss": 2.2291, "step": 6488000 }, { "epoch": 18.78, "learning_rate": 4.061263720359392e-05, "loss": 2.1963, "step": 6488500 }, { "epoch": 18.78, "learning_rate": 4.061191355594665e-05, "loss": 2.1982, "step": 6489000 }, { "epoch": 18.78, "learning_rate": 4.0611189908299374e-05, "loss": 2.2184, "step": 6489500 }, { "epoch": 18.79, "learning_rate": 4.061046770794739e-05, "loss": 2.2305, "step": 6490000 }, { "epoch": 18.79, "learning_rate": 4.060974406030011e-05, "loss": 2.2025, "step": 6490500 }, { "epoch": 18.79, "learning_rate": 4.0609020412652834e-05, "loss": 2.2261, "step": 6491000 }, { "epoch": 18.79, "learning_rate": 4.060829676500556e-05, "loss": 2.1933, "step": 6491500 }, { "epoch": 18.79, "learning_rate": 4.0607573117358285e-05, "loss": 2.1909, "step": 6492000 }, { "epoch": 18.79, "learning_rate": 4.060684946971101e-05, "loss": 2.2286, "step": 6492500 }, { "epoch": 18.79, "learning_rate": 4.060612582206373e-05, "loss": 2.1838, "step": 6493000 }, { "epoch": 18.8, "learning_rate": 4.060540217441645e-05, "loss": 2.2074, "step": 6493500 }, { "epoch": 18.8, "learning_rate": 4.0604678526769174e-05, "loss": 2.223, "step": 6494000 }, { "epoch": 18.8, "learning_rate": 4.060395632641719e-05, "loss": 2.2016, "step": 6494500 }, { "epoch": 18.8, "learning_rate": 4.060323267876991e-05, "loss": 2.2099, "step": 6495000 }, { "epoch": 18.8, "learning_rate": 4.0602509031122634e-05, "loss": 2.2372, "step": 6495500 }, { "epoch": 18.8, "learning_rate": 4.060178538347536e-05, "loss": 2.2179, "step": 6496000 }, { "epoch": 18.8, "learning_rate": 4.0601061735828085e-05, "loss": 2.2069, "step": 6496500 }, { "epoch": 18.81, "learning_rate": 4.0600338088180814e-05, "loss": 2.2121, "step": 6497000 }, { "epoch": 18.81, "learning_rate": 4.059961444053354e-05, "loss": 2.1871, "step": 6497500 }, { "epoch": 18.81, "learning_rate": 4.059889079288626e-05, "loss": 2.198, "step": 6498000 }, { "epoch": 18.81, "learning_rate": 4.059816714523898e-05, "loss": 2.2216, "step": 6498500 }, { "epoch": 18.81, "learning_rate": 4.05974434975917e-05, "loss": 2.1853, "step": 6499000 }, { "epoch": 18.81, "learning_rate": 4.059672129723972e-05, "loss": 2.2291, "step": 6499500 }, { "epoch": 18.81, "learning_rate": 4.059599764959244e-05, "loss": 2.1993, "step": 6500000 }, { "epoch": 18.82, "learning_rate": 4.059527400194516e-05, "loss": 2.2146, "step": 6500500 }, { "epoch": 18.82, "learning_rate": 4.0594551801593186e-05, "loss": 2.2145, "step": 6501000 }, { "epoch": 18.82, "learning_rate": 4.059382815394591e-05, "loss": 2.1997, "step": 6501500 }, { "epoch": 18.82, "learning_rate": 4.059310450629863e-05, "loss": 2.2138, "step": 6502000 }, { "epoch": 18.82, "learning_rate": 4.059238085865135e-05, "loss": 2.2072, "step": 6502500 }, { "epoch": 18.82, "learning_rate": 4.0591657211004075e-05, "loss": 2.211, "step": 6503000 }, { "epoch": 18.82, "learning_rate": 4.05909350106521e-05, "loss": 2.1991, "step": 6503500 }, { "epoch": 18.83, "learning_rate": 4.059021136300482e-05, "loss": 2.2079, "step": 6504000 }, { "epoch": 18.83, "learning_rate": 4.058948771535754e-05, "loss": 2.1962, "step": 6504500 }, { "epoch": 18.83, "learning_rate": 4.0588764067710264e-05, "loss": 2.193, "step": 6505000 }, { "epoch": 18.83, "learning_rate": 4.058804042006299e-05, "loss": 2.2241, "step": 6505500 }, { "epoch": 18.83, "learning_rate": 4.058731821971101e-05, "loss": 2.2187, "step": 6506000 }, { "epoch": 18.83, "learning_rate": 4.0586596019359024e-05, "loss": 2.1999, "step": 6506500 }, { "epoch": 18.84, "learning_rate": 4.0585872371711746e-05, "loss": 2.2268, "step": 6507000 }, { "epoch": 18.84, "learning_rate": 4.058514872406447e-05, "loss": 2.2309, "step": 6507500 }, { "epoch": 18.84, "learning_rate": 4.058442507641719e-05, "loss": 2.2105, "step": 6508000 }, { "epoch": 18.84, "learning_rate": 4.058370142876991e-05, "loss": 2.2008, "step": 6508500 }, { "epoch": 18.84, "learning_rate": 4.058297778112264e-05, "loss": 2.2343, "step": 6509000 }, { "epoch": 18.84, "learning_rate": 4.0582254133475364e-05, "loss": 2.2046, "step": 6509500 }, { "epoch": 18.84, "learning_rate": 4.0581530485828086e-05, "loss": 2.1807, "step": 6510000 }, { "epoch": 18.85, "learning_rate": 4.058080683818081e-05, "loss": 2.223, "step": 6510500 }, { "epoch": 18.85, "learning_rate": 4.058008319053354e-05, "loss": 2.2139, "step": 6511000 }, { "epoch": 18.85, "learning_rate": 4.057935954288626e-05, "loss": 2.2075, "step": 6511500 }, { "epoch": 18.85, "learning_rate": 4.057863589523898e-05, "loss": 2.1946, "step": 6512000 }, { "epoch": 18.85, "learning_rate": 4.0577913694887e-05, "loss": 2.2106, "step": 6512500 }, { "epoch": 18.85, "learning_rate": 4.057719004723972e-05, "loss": 2.2311, "step": 6513000 }, { "epoch": 18.85, "learning_rate": 4.057646639959244e-05, "loss": 2.2392, "step": 6513500 }, { "epoch": 18.86, "learning_rate": 4.0575742751945164e-05, "loss": 2.2137, "step": 6514000 }, { "epoch": 18.86, "learning_rate": 4.057501910429789e-05, "loss": 2.2066, "step": 6514500 }, { "epoch": 18.86, "learning_rate": 4.0574295456650615e-05, "loss": 2.2152, "step": 6515000 }, { "epoch": 18.86, "learning_rate": 4.057357180900334e-05, "loss": 2.1989, "step": 6515500 }, { "epoch": 18.86, "learning_rate": 4.057284960865135e-05, "loss": 2.2212, "step": 6516000 }, { "epoch": 18.86, "learning_rate": 4.0572125961004075e-05, "loss": 2.2147, "step": 6516500 }, { "epoch": 18.86, "learning_rate": 4.05714023133568e-05, "loss": 2.1978, "step": 6517000 }, { "epoch": 18.87, "learning_rate": 4.057067866570952e-05, "loss": 2.2133, "step": 6517500 }, { "epoch": 18.87, "learning_rate": 4.056995501806224e-05, "loss": 2.2104, "step": 6518000 }, { "epoch": 18.87, "learning_rate": 4.056923137041497e-05, "loss": 2.221, "step": 6518500 }, { "epoch": 18.87, "learning_rate": 4.056850772276769e-05, "loss": 2.1952, "step": 6519000 }, { "epoch": 18.87, "learning_rate": 4.0567784075120416e-05, "loss": 2.2018, "step": 6519500 }, { "epoch": 18.87, "learning_rate": 4.0567060427473145e-05, "loss": 2.217, "step": 6520000 }, { "epoch": 18.87, "learning_rate": 4.056633822712116e-05, "loss": 2.2201, "step": 6520500 }, { "epoch": 18.88, "learning_rate": 4.056561457947388e-05, "loss": 2.2014, "step": 6521000 }, { "epoch": 18.88, "learning_rate": 4.0564890931826605e-05, "loss": 2.2018, "step": 6521500 }, { "epoch": 18.88, "learning_rate": 4.056416728417933e-05, "loss": 2.2116, "step": 6522000 }, { "epoch": 18.88, "learning_rate": 4.056344508382734e-05, "loss": 2.2175, "step": 6522500 }, { "epoch": 18.88, "learning_rate": 4.0562722883475365e-05, "loss": 2.2371, "step": 6523000 }, { "epoch": 18.88, "learning_rate": 4.056199923582809e-05, "loss": 2.2433, "step": 6523500 }, { "epoch": 18.88, "learning_rate": 4.056127558818081e-05, "loss": 2.2253, "step": 6524000 }, { "epoch": 18.89, "learning_rate": 4.056055194053353e-05, "loss": 2.2185, "step": 6524500 }, { "epoch": 18.89, "learning_rate": 4.0559828292886254e-05, "loss": 2.1982, "step": 6525000 }, { "epoch": 18.89, "learning_rate": 4.0559104645238976e-05, "loss": 2.2099, "step": 6525500 }, { "epoch": 18.89, "learning_rate": 4.0558380997591705e-05, "loss": 2.2191, "step": 6526000 }, { "epoch": 18.89, "learning_rate": 4.055765734994443e-05, "loss": 2.2015, "step": 6526500 }, { "epoch": 18.89, "learning_rate": 4.055693370229715e-05, "loss": 2.1937, "step": 6527000 }, { "epoch": 18.89, "learning_rate": 4.0556211501945165e-05, "loss": 2.1924, "step": 6527500 }, { "epoch": 18.9, "learning_rate": 4.0555487854297894e-05, "loss": 2.2068, "step": 6528000 }, { "epoch": 18.9, "learning_rate": 4.0554764206650616e-05, "loss": 2.2166, "step": 6528500 }, { "epoch": 18.9, "learning_rate": 4.055404055900334e-05, "loss": 2.2143, "step": 6529000 }, { "epoch": 18.9, "learning_rate": 4.055331691135606e-05, "loss": 2.2003, "step": 6529500 }, { "epoch": 18.9, "learning_rate": 4.0552594711004076e-05, "loss": 2.2035, "step": 6530000 }, { "epoch": 18.9, "learning_rate": 4.05518710633568e-05, "loss": 2.2089, "step": 6530500 }, { "epoch": 18.9, "learning_rate": 4.055114741570952e-05, "loss": 2.1762, "step": 6531000 }, { "epoch": 18.91, "learning_rate": 4.055042376806224e-05, "loss": 2.2093, "step": 6531500 }, { "epoch": 18.91, "learning_rate": 4.0549700120414965e-05, "loss": 2.226, "step": 6532000 }, { "epoch": 18.91, "learning_rate": 4.054897792006299e-05, "loss": 2.2242, "step": 6532500 }, { "epoch": 18.91, "learning_rate": 4.054825427241571e-05, "loss": 2.1848, "step": 6533000 }, { "epoch": 18.91, "learning_rate": 4.054753062476844e-05, "loss": 2.2157, "step": 6533500 }, { "epoch": 18.91, "learning_rate": 4.054680697712116e-05, "loss": 2.2024, "step": 6534000 }, { "epoch": 18.91, "learning_rate": 4.054608332947388e-05, "loss": 2.2154, "step": 6534500 }, { "epoch": 18.92, "learning_rate": 4.0545359681826605e-05, "loss": 2.2064, "step": 6535000 }, { "epoch": 18.92, "learning_rate": 4.054463603417933e-05, "loss": 2.2277, "step": 6535500 }, { "epoch": 18.92, "learning_rate": 4.054391238653205e-05, "loss": 2.1986, "step": 6536000 }, { "epoch": 18.92, "learning_rate": 4.054319018618007e-05, "loss": 2.2211, "step": 6536500 }, { "epoch": 18.92, "learning_rate": 4.0542466538532794e-05, "loss": 2.211, "step": 6537000 }, { "epoch": 18.92, "learning_rate": 4.054174433818081e-05, "loss": 2.2004, "step": 6537500 }, { "epoch": 18.92, "learning_rate": 4.054102069053353e-05, "loss": 2.1953, "step": 6538000 }, { "epoch": 18.93, "learning_rate": 4.0540297042886254e-05, "loss": 2.2121, "step": 6538500 }, { "epoch": 18.93, "learning_rate": 4.053957339523898e-05, "loss": 2.2147, "step": 6539000 }, { "epoch": 18.93, "learning_rate": 4.053885264218229e-05, "loss": 2.2284, "step": 6539500 }, { "epoch": 18.93, "learning_rate": 4.0538128994535015e-05, "loss": 2.2147, "step": 6540000 }, { "epoch": 18.93, "learning_rate": 4.053740534688774e-05, "loss": 2.1942, "step": 6540500 }, { "epoch": 18.93, "learning_rate": 4.0536681699240466e-05, "loss": 2.2145, "step": 6541000 }, { "epoch": 18.93, "learning_rate": 4.053595805159319e-05, "loss": 2.2282, "step": 6541500 }, { "epoch": 18.94, "learning_rate": 4.053523440394591e-05, "loss": 2.1757, "step": 6542000 }, { "epoch": 18.94, "learning_rate": 4.0534512203593926e-05, "loss": 2.2036, "step": 6542500 }, { "epoch": 18.94, "learning_rate": 4.053378855594665e-05, "loss": 2.204, "step": 6543000 }, { "epoch": 18.94, "learning_rate": 4.053306490829937e-05, "loss": 2.2285, "step": 6543500 }, { "epoch": 18.94, "learning_rate": 4.053234126065209e-05, "loss": 2.1902, "step": 6544000 }, { "epoch": 18.94, "learning_rate": 4.053161761300482e-05, "loss": 2.1909, "step": 6544500 }, { "epoch": 18.95, "learning_rate": 4.0530893965357544e-05, "loss": 2.2006, "step": 6545000 }, { "epoch": 18.95, "learning_rate": 4.0530170317710266e-05, "loss": 2.2041, "step": 6545500 }, { "epoch": 18.95, "learning_rate": 4.052944667006299e-05, "loss": 2.2416, "step": 6546000 }, { "epoch": 18.95, "learning_rate": 4.052872302241571e-05, "loss": 2.2253, "step": 6546500 }, { "epoch": 18.95, "learning_rate": 4.0528000822063726e-05, "loss": 2.2375, "step": 6547000 }, { "epoch": 18.95, "learning_rate": 4.052727717441645e-05, "loss": 2.2126, "step": 6547500 }, { "epoch": 18.95, "learning_rate": 4.052655352676917e-05, "loss": 2.2172, "step": 6548000 }, { "epoch": 18.96, "learning_rate": 4.05258298791219e-05, "loss": 2.1745, "step": 6548500 }, { "epoch": 18.96, "learning_rate": 4.052510623147462e-05, "loss": 2.2124, "step": 6549000 }, { "epoch": 18.96, "learning_rate": 4.0524384031122644e-05, "loss": 2.1835, "step": 6549500 }, { "epoch": 18.96, "learning_rate": 4.052366183077066e-05, "loss": 2.209, "step": 6550000 }, { "epoch": 18.96, "learning_rate": 4.052293818312338e-05, "loss": 2.21, "step": 6550500 }, { "epoch": 18.96, "learning_rate": 4.0522214535476104e-05, "loss": 2.1825, "step": 6551000 }, { "epoch": 18.96, "learning_rate": 4.0521490887828826e-05, "loss": 2.2039, "step": 6551500 }, { "epoch": 18.97, "learning_rate": 4.052076868747685e-05, "loss": 2.2155, "step": 6552000 }, { "epoch": 18.97, "learning_rate": 4.052004503982957e-05, "loss": 2.2147, "step": 6552500 }, { "epoch": 18.97, "learning_rate": 4.051932139218229e-05, "loss": 2.1927, "step": 6553000 }, { "epoch": 18.97, "learning_rate": 4.0518597744535015e-05, "loss": 2.2009, "step": 6553500 }, { "epoch": 18.97, "learning_rate": 4.051787409688774e-05, "loss": 2.1889, "step": 6554000 }, { "epoch": 18.97, "learning_rate": 4.051715044924046e-05, "loss": 2.2055, "step": 6554500 }, { "epoch": 18.97, "learning_rate": 4.051642680159318e-05, "loss": 2.2273, "step": 6555000 }, { "epoch": 18.98, "learning_rate": 4.0515703153945904e-05, "loss": 2.2143, "step": 6555500 }, { "epoch": 18.98, "learning_rate": 4.051498095359392e-05, "loss": 2.2055, "step": 6556000 }, { "epoch": 18.98, "learning_rate": 4.051425730594665e-05, "loss": 2.1922, "step": 6556500 }, { "epoch": 18.98, "learning_rate": 4.051353365829937e-05, "loss": 2.2235, "step": 6557000 }, { "epoch": 18.98, "learning_rate": 4.05128100106521e-05, "loss": 2.2118, "step": 6557500 }, { "epoch": 18.98, "learning_rate": 4.051208636300482e-05, "loss": 2.1868, "step": 6558000 }, { "epoch": 18.98, "learning_rate": 4.0511362715357545e-05, "loss": 2.2065, "step": 6558500 }, { "epoch": 18.99, "learning_rate": 4.051063906771027e-05, "loss": 2.192, "step": 6559000 }, { "epoch": 18.99, "learning_rate": 4.050991542006299e-05, "loss": 2.2365, "step": 6559500 }, { "epoch": 18.99, "learning_rate": 4.050919177241571e-05, "loss": 2.198, "step": 6560000 }, { "epoch": 18.99, "learning_rate": 4.0508468124768434e-05, "loss": 2.2093, "step": 6560500 }, { "epoch": 18.99, "learning_rate": 4.0507744477121156e-05, "loss": 2.1989, "step": 6561000 }, { "epoch": 18.99, "learning_rate": 4.050702082947388e-05, "loss": 2.2166, "step": 6561500 }, { "epoch": 18.99, "learning_rate": 4.05062971818266e-05, "loss": 2.1775, "step": 6562000 }, { "epoch": 19.0, "learning_rate": 4.050557498147462e-05, "loss": 2.1896, "step": 6562500 }, { "epoch": 19.0, "learning_rate": 4.050485278112264e-05, "loss": 2.1944, "step": 6563000 }, { "epoch": 19.0, "learning_rate": 4.050412913347537e-05, "loss": 2.2476, "step": 6563500 }, { "epoch": 19.0, "eval_accuracy": 0.6590668850698368, "eval_accuracy_mlm": 0.6224593176492826, "eval_accuracy_nsp": 0.8552222115283259, "eval_loss": 2.2322213649749756, "eval_runtime": 330.6144, "eval_samples_per_second": 1319.924, "eval_steps_per_second": 54.998, "step": 6563968 }, { "epoch": 19.0, "learning_rate": 4.050340548582809e-05, "loss": 2.2257, "step": 6564000 }, { "epoch": 19.0, "learning_rate": 4.050268183818081e-05, "loss": 2.209, "step": 6564500 }, { "epoch": 19.0, "learning_rate": 4.0501958190533534e-05, "loss": 2.1808, "step": 6565000 }, { "epoch": 19.0, "learning_rate": 4.0501234542886256e-05, "loss": 2.194, "step": 6565500 }, { "epoch": 19.01, "learning_rate": 4.050051089523898e-05, "loss": 2.2056, "step": 6566000 }, { "epoch": 19.01, "learning_rate": 4.04997872475917e-05, "loss": 2.2181, "step": 6566500 }, { "epoch": 19.01, "learning_rate": 4.049906504723972e-05, "loss": 2.1933, "step": 6567000 }, { "epoch": 19.01, "learning_rate": 4.0498341399592445e-05, "loss": 2.1761, "step": 6567500 }, { "epoch": 19.01, "learning_rate": 4.049761775194517e-05, "loss": 2.1707, "step": 6568000 }, { "epoch": 19.01, "learning_rate": 4.049689410429789e-05, "loss": 2.1927, "step": 6568500 }, { "epoch": 19.01, "learning_rate": 4.049617045665061e-05, "loss": 2.2015, "step": 6569000 }, { "epoch": 19.02, "learning_rate": 4.049544825629863e-05, "loss": 2.1803, "step": 6569500 }, { "epoch": 19.02, "learning_rate": 4.049472460865135e-05, "loss": 2.1729, "step": 6570000 }, { "epoch": 19.02, "learning_rate": 4.049400096100407e-05, "loss": 2.1727, "step": 6570500 }, { "epoch": 19.02, "learning_rate": 4.04932773133568e-05, "loss": 2.1625, "step": 6571000 }, { "epoch": 19.02, "learning_rate": 4.049255511300482e-05, "loss": 2.1767, "step": 6571500 }, { "epoch": 19.02, "learning_rate": 4.0491831465357545e-05, "loss": 2.1767, "step": 6572000 }, { "epoch": 19.02, "learning_rate": 4.049110781771027e-05, "loss": 2.1841, "step": 6572500 }, { "epoch": 19.03, "learning_rate": 4.049038417006299e-05, "loss": 2.1705, "step": 6573000 }, { "epoch": 19.03, "learning_rate": 4.048966052241571e-05, "loss": 2.1885, "step": 6573500 }, { "epoch": 19.03, "learning_rate": 4.048893832206373e-05, "loss": 2.1676, "step": 6574000 }, { "epoch": 19.03, "learning_rate": 4.048821467441645e-05, "loss": 2.1838, "step": 6574500 }, { "epoch": 19.03, "learning_rate": 4.048749247406447e-05, "loss": 2.1708, "step": 6575000 }, { "epoch": 19.03, "learning_rate": 4.0486768826417194e-05, "loss": 2.1954, "step": 6575500 }, { "epoch": 19.03, "learning_rate": 4.048604662606521e-05, "loss": 2.1827, "step": 6576000 }, { "epoch": 19.04, "learning_rate": 4.048532297841793e-05, "loss": 2.1804, "step": 6576500 }, { "epoch": 19.04, "learning_rate": 4.0484599330770654e-05, "loss": 2.1918, "step": 6577000 }, { "epoch": 19.04, "learning_rate": 4.048387713041868e-05, "loss": 2.2117, "step": 6577500 }, { "epoch": 19.04, "learning_rate": 4.04831534827714e-05, "loss": 2.2157, "step": 6578000 }, { "epoch": 19.04, "learning_rate": 4.048242983512412e-05, "loss": 2.1904, "step": 6578500 }, { "epoch": 19.04, "learning_rate": 4.048170618747685e-05, "loss": 2.1928, "step": 6579000 }, { "epoch": 19.04, "learning_rate": 4.048098253982957e-05, "loss": 2.1847, "step": 6579500 }, { "epoch": 19.05, "learning_rate": 4.0480258892182295e-05, "loss": 2.1988, "step": 6580000 }, { "epoch": 19.05, "learning_rate": 4.047953669183031e-05, "loss": 2.1895, "step": 6580500 }, { "epoch": 19.05, "learning_rate": 4.047881304418303e-05, "loss": 2.1924, "step": 6581000 }, { "epoch": 19.05, "learning_rate": 4.0478089396535755e-05, "loss": 2.2204, "step": 6581500 }, { "epoch": 19.05, "learning_rate": 4.047736574888848e-05, "loss": 2.2065, "step": 6582000 }, { "epoch": 19.05, "learning_rate": 4.04766421012412e-05, "loss": 2.1929, "step": 6582500 }, { "epoch": 19.06, "learning_rate": 4.047591845359393e-05, "loss": 2.2114, "step": 6583000 }, { "epoch": 19.06, "learning_rate": 4.047519480594665e-05, "loss": 2.1758, "step": 6583500 }, { "epoch": 19.06, "learning_rate": 4.047447115829937e-05, "loss": 2.1846, "step": 6584000 }, { "epoch": 19.06, "learning_rate": 4.0473747510652095e-05, "loss": 2.204, "step": 6584500 }, { "epoch": 19.06, "learning_rate": 4.047302386300482e-05, "loss": 2.1918, "step": 6585000 }, { "epoch": 19.06, "learning_rate": 4.047230021535754e-05, "loss": 2.185, "step": 6585500 }, { "epoch": 19.06, "learning_rate": 4.047157656771027e-05, "loss": 2.1966, "step": 6586000 }, { "epoch": 19.07, "learning_rate": 4.047085292006299e-05, "loss": 2.2302, "step": 6586500 }, { "epoch": 19.07, "learning_rate": 4.0470130719711006e-05, "loss": 2.2058, "step": 6587000 }, { "epoch": 19.07, "learning_rate": 4.046940851935903e-05, "loss": 2.2018, "step": 6587500 }, { "epoch": 19.07, "learning_rate": 4.046868487171175e-05, "loss": 2.2082, "step": 6588000 }, { "epoch": 19.07, "learning_rate": 4.046796122406447e-05, "loss": 2.2063, "step": 6588500 }, { "epoch": 19.07, "learning_rate": 4.0467237576417195e-05, "loss": 2.1953, "step": 6589000 }, { "epoch": 19.07, "learning_rate": 4.046651537606521e-05, "loss": 2.1798, "step": 6589500 }, { "epoch": 19.08, "learning_rate": 4.046579172841793e-05, "loss": 2.2021, "step": 6590000 }, { "epoch": 19.08, "learning_rate": 4.0465068080770655e-05, "loss": 2.1942, "step": 6590500 }, { "epoch": 19.08, "learning_rate": 4.046434443312338e-05, "loss": 2.1842, "step": 6591000 }, { "epoch": 19.08, "learning_rate": 4.04636207854761e-05, "loss": 2.1926, "step": 6591500 }, { "epoch": 19.08, "learning_rate": 4.046289713782883e-05, "loss": 2.1996, "step": 6592000 }, { "epoch": 19.08, "learning_rate": 4.046217349018155e-05, "loss": 2.2088, "step": 6592500 }, { "epoch": 19.08, "learning_rate": 4.046144984253427e-05, "loss": 2.1997, "step": 6593000 }, { "epoch": 19.09, "learning_rate": 4.046072764218229e-05, "loss": 2.1806, "step": 6593500 }, { "epoch": 19.09, "learning_rate": 4.046000544183031e-05, "loss": 2.1762, "step": 6594000 }, { "epoch": 19.09, "learning_rate": 4.045928179418303e-05, "loss": 2.2227, "step": 6594500 }, { "epoch": 19.09, "learning_rate": 4.0458558146535756e-05, "loss": 2.1969, "step": 6595000 }, { "epoch": 19.09, "learning_rate": 4.045783449888848e-05, "loss": 2.2016, "step": 6595500 }, { "epoch": 19.09, "learning_rate": 4.04571122985365e-05, "loss": 2.1938, "step": 6596000 }, { "epoch": 19.09, "learning_rate": 4.045638865088922e-05, "loss": 2.1893, "step": 6596500 }, { "epoch": 19.1, "learning_rate": 4.0455665003241945e-05, "loss": 2.1763, "step": 6597000 }, { "epoch": 19.1, "learning_rate": 4.045494135559467e-05, "loss": 2.2101, "step": 6597500 }, { "epoch": 19.1, "learning_rate": 4.045421770794739e-05, "loss": 2.1869, "step": 6598000 }, { "epoch": 19.1, "learning_rate": 4.045349406030011e-05, "loss": 2.1845, "step": 6598500 }, { "epoch": 19.1, "learning_rate": 4.045277185994813e-05, "loss": 2.159, "step": 6599000 }, { "epoch": 19.1, "learning_rate": 4.0452048212300856e-05, "loss": 2.1834, "step": 6599500 }, { "epoch": 19.1, "learning_rate": 4.045132456465358e-05, "loss": 2.2199, "step": 6600000 }, { "epoch": 19.11, "learning_rate": 4.04506009170063e-05, "loss": 2.1982, "step": 6600500 }, { "epoch": 19.11, "learning_rate": 4.044987726935902e-05, "loss": 2.1871, "step": 6601000 }, { "epoch": 19.11, "learning_rate": 4.044915362171175e-05, "loss": 2.1988, "step": 6601500 }, { "epoch": 19.11, "learning_rate": 4.0448429974064474e-05, "loss": 2.181, "step": 6602000 }, { "epoch": 19.11, "learning_rate": 4.0447706326417196e-05, "loss": 2.2015, "step": 6602500 }, { "epoch": 19.11, "learning_rate": 4.044698267876992e-05, "loss": 2.199, "step": 6603000 }, { "epoch": 19.11, "learning_rate": 4.044625903112264e-05, "loss": 2.1809, "step": 6603500 }, { "epoch": 19.12, "learning_rate": 4.0445536830770656e-05, "loss": 2.1746, "step": 6604000 }, { "epoch": 19.12, "learning_rate": 4.044481318312338e-05, "loss": 2.1994, "step": 6604500 }, { "epoch": 19.12, "learning_rate": 4.044408953547611e-05, "loss": 2.1937, "step": 6605000 }, { "epoch": 19.12, "learning_rate": 4.044336733512412e-05, "loss": 2.1929, "step": 6605500 }, { "epoch": 19.12, "learning_rate": 4.0442643687476845e-05, "loss": 2.1964, "step": 6606000 }, { "epoch": 19.12, "learning_rate": 4.044192003982957e-05, "loss": 2.1868, "step": 6606500 }, { "epoch": 19.12, "learning_rate": 4.044119639218229e-05, "loss": 2.1799, "step": 6607000 }, { "epoch": 19.13, "learning_rate": 4.044047274453501e-05, "loss": 2.186, "step": 6607500 }, { "epoch": 19.13, "learning_rate": 4.0439749096887734e-05, "loss": 2.1848, "step": 6608000 }, { "epoch": 19.13, "learning_rate": 4.043902544924046e-05, "loss": 2.2073, "step": 6608500 }, { "epoch": 19.13, "learning_rate": 4.043830324888848e-05, "loss": 2.1942, "step": 6609000 }, { "epoch": 19.13, "learning_rate": 4.043757960124121e-05, "loss": 2.1969, "step": 6609500 }, { "epoch": 19.13, "learning_rate": 4.043685595359393e-05, "loss": 2.1843, "step": 6610000 }, { "epoch": 19.13, "learning_rate": 4.043613230594665e-05, "loss": 2.1945, "step": 6610500 }, { "epoch": 19.14, "learning_rate": 4.0435408658299374e-05, "loss": 2.1787, "step": 6611000 }, { "epoch": 19.14, "learning_rate": 4.0434685010652097e-05, "loss": 2.2042, "step": 6611500 }, { "epoch": 19.14, "learning_rate": 4.043396136300482e-05, "loss": 2.1861, "step": 6612000 }, { "epoch": 19.14, "learning_rate": 4.043323771535754e-05, "loss": 2.213, "step": 6612500 }, { "epoch": 19.14, "learning_rate": 4.043251406771026e-05, "loss": 2.1987, "step": 6613000 }, { "epoch": 19.14, "learning_rate": 4.0431790420062985e-05, "loss": 2.1976, "step": 6613500 }, { "epoch": 19.14, "learning_rate": 4.043106677241571e-05, "loss": 2.185, "step": 6614000 }, { "epoch": 19.15, "learning_rate": 4.043034312476843e-05, "loss": 2.1997, "step": 6614500 }, { "epoch": 19.15, "learning_rate": 4.042961947712116e-05, "loss": 2.1999, "step": 6615000 }, { "epoch": 19.15, "learning_rate": 4.042889582947388e-05, "loss": 2.1979, "step": 6615500 }, { "epoch": 19.15, "learning_rate": 4.042817218182661e-05, "loss": 2.2022, "step": 6616000 }, { "epoch": 19.15, "learning_rate": 4.0427449981474626e-05, "loss": 2.1927, "step": 6616500 }, { "epoch": 19.15, "learning_rate": 4.042672633382735e-05, "loss": 2.1835, "step": 6617000 }, { "epoch": 19.15, "learning_rate": 4.042600268618007e-05, "loss": 2.1965, "step": 6617500 }, { "epoch": 19.16, "learning_rate": 4.042527903853279e-05, "loss": 2.1653, "step": 6618000 }, { "epoch": 19.16, "learning_rate": 4.0424555390885515e-05, "loss": 2.1848, "step": 6618500 }, { "epoch": 19.16, "learning_rate": 4.042383319053353e-05, "loss": 2.1851, "step": 6619000 }, { "epoch": 19.16, "learning_rate": 4.042310954288626e-05, "loss": 2.2026, "step": 6619500 }, { "epoch": 19.16, "learning_rate": 4.042238589523898e-05, "loss": 2.1681, "step": 6620000 }, { "epoch": 19.16, "learning_rate": 4.0421662247591704e-05, "loss": 2.1686, "step": 6620500 }, { "epoch": 19.17, "learning_rate": 4.0420938599944426e-05, "loss": 2.2037, "step": 6621000 }, { "epoch": 19.17, "learning_rate": 4.042021495229715e-05, "loss": 2.1938, "step": 6621500 }, { "epoch": 19.17, "learning_rate": 4.0419492751945164e-05, "loss": 2.1828, "step": 6622000 }, { "epoch": 19.17, "learning_rate": 4.0418769104297886e-05, "loss": 2.1805, "step": 6622500 }, { "epoch": 19.17, "learning_rate": 4.041804545665061e-05, "loss": 2.1885, "step": 6623000 }, { "epoch": 19.17, "learning_rate": 4.041732180900334e-05, "loss": 2.1707, "step": 6623500 }, { "epoch": 19.17, "learning_rate": 4.041659816135606e-05, "loss": 2.1856, "step": 6624000 }, { "epoch": 19.18, "learning_rate": 4.041587451370878e-05, "loss": 2.2082, "step": 6624500 }, { "epoch": 19.18, "learning_rate": 4.041515086606151e-05, "loss": 2.1944, "step": 6625000 }, { "epoch": 19.18, "learning_rate": 4.041442721841423e-05, "loss": 2.1683, "step": 6625500 }, { "epoch": 19.18, "learning_rate": 4.0413703570766955e-05, "loss": 2.1901, "step": 6626000 }, { "epoch": 19.18, "learning_rate": 4.0412982817710264e-05, "loss": 2.2121, "step": 6626500 }, { "epoch": 19.18, "learning_rate": 4.0412259170062986e-05, "loss": 2.2108, "step": 6627000 }, { "epoch": 19.18, "learning_rate": 4.041153552241571e-05, "loss": 2.1826, "step": 6627500 }, { "epoch": 19.19, "learning_rate": 4.041081187476843e-05, "loss": 2.1906, "step": 6628000 }, { "epoch": 19.19, "learning_rate": 4.041008822712116e-05, "loss": 2.1902, "step": 6628500 }, { "epoch": 19.19, "learning_rate": 4.040936457947388e-05, "loss": 2.2146, "step": 6629000 }, { "epoch": 19.19, "learning_rate": 4.0408640931826604e-05, "loss": 2.1805, "step": 6629500 }, { "epoch": 19.19, "learning_rate": 4.040791873147462e-05, "loss": 2.2137, "step": 6630000 }, { "epoch": 19.19, "learning_rate": 4.040719508382734e-05, "loss": 2.1918, "step": 6630500 }, { "epoch": 19.19, "learning_rate": 4.0406472883475364e-05, "loss": 2.1649, "step": 6631000 }, { "epoch": 19.2, "learning_rate": 4.0405749235828087e-05, "loss": 2.1951, "step": 6631500 }, { "epoch": 19.2, "learning_rate": 4.040502558818081e-05, "loss": 2.1928, "step": 6632000 }, { "epoch": 19.2, "learning_rate": 4.040430194053353e-05, "loss": 2.2087, "step": 6632500 }, { "epoch": 19.2, "learning_rate": 4.040357974018155e-05, "loss": 2.2152, "step": 6633000 }, { "epoch": 19.2, "learning_rate": 4.0402856092534276e-05, "loss": 2.1875, "step": 6633500 }, { "epoch": 19.2, "learning_rate": 4.0402132444887e-05, "loss": 2.1719, "step": 6634000 }, { "epoch": 19.2, "learning_rate": 4.040140879723972e-05, "loss": 2.1662, "step": 6634500 }, { "epoch": 19.21, "learning_rate": 4.040068514959244e-05, "loss": 2.2123, "step": 6635000 }, { "epoch": 19.21, "learning_rate": 4.0399961501945165e-05, "loss": 2.2067, "step": 6635500 }, { "epoch": 19.21, "learning_rate": 4.039923785429789e-05, "loss": 2.2215, "step": 6636000 }, { "epoch": 19.21, "learning_rate": 4.039851420665061e-05, "loss": 2.1941, "step": 6636500 }, { "epoch": 19.21, "learning_rate": 4.039779055900333e-05, "loss": 2.1735, "step": 6637000 }, { "epoch": 19.21, "learning_rate": 4.0397068358651354e-05, "loss": 2.1873, "step": 6637500 }, { "epoch": 19.21, "learning_rate": 4.039634615829937e-05, "loss": 2.19, "step": 6638000 }, { "epoch": 19.22, "learning_rate": 4.03956225106521e-05, "loss": 2.1955, "step": 6638500 }, { "epoch": 19.22, "learning_rate": 4.039489886300482e-05, "loss": 2.1935, "step": 6639000 }, { "epoch": 19.22, "learning_rate": 4.039417521535754e-05, "loss": 2.1985, "step": 6639500 }, { "epoch": 19.22, "learning_rate": 4.0393451567710265e-05, "loss": 2.1651, "step": 6640000 }, { "epoch": 19.22, "learning_rate": 4.039272792006299e-05, "loss": 2.2108, "step": 6640500 }, { "epoch": 19.22, "learning_rate": 4.039200427241571e-05, "loss": 2.1798, "step": 6641000 }, { "epoch": 19.22, "learning_rate": 4.039128062476844e-05, "loss": 2.1984, "step": 6641500 }, { "epoch": 19.23, "learning_rate": 4.039055697712116e-05, "loss": 2.1909, "step": 6642000 }, { "epoch": 19.23, "learning_rate": 4.0389834776769176e-05, "loss": 2.1784, "step": 6642500 }, { "epoch": 19.23, "learning_rate": 4.03891111291219e-05, "loss": 2.1895, "step": 6643000 }, { "epoch": 19.23, "learning_rate": 4.038838748147462e-05, "loss": 2.2007, "step": 6643500 }, { "epoch": 19.23, "learning_rate": 4.0387665281122636e-05, "loss": 2.1809, "step": 6644000 }, { "epoch": 19.23, "learning_rate": 4.038694163347536e-05, "loss": 2.2148, "step": 6644500 }, { "epoch": 19.23, "learning_rate": 4.038621798582809e-05, "loss": 2.1965, "step": 6645000 }, { "epoch": 19.24, "learning_rate": 4.038549433818081e-05, "loss": 2.2322, "step": 6645500 }, { "epoch": 19.24, "learning_rate": 4.038477069053354e-05, "loss": 2.2017, "step": 6646000 }, { "epoch": 19.24, "learning_rate": 4.038404704288626e-05, "loss": 2.181, "step": 6646500 }, { "epoch": 19.24, "learning_rate": 4.038332339523898e-05, "loss": 2.196, "step": 6647000 }, { "epoch": 19.24, "learning_rate": 4.0382599747591705e-05, "loss": 2.1998, "step": 6647500 }, { "epoch": 19.24, "learning_rate": 4.038187754723972e-05, "loss": 2.2068, "step": 6648000 }, { "epoch": 19.24, "learning_rate": 4.038115389959244e-05, "loss": 2.1861, "step": 6648500 }, { "epoch": 19.25, "learning_rate": 4.038043169924046e-05, "loss": 2.2148, "step": 6649000 }, { "epoch": 19.25, "learning_rate": 4.037970805159319e-05, "loss": 2.2102, "step": 6649500 }, { "epoch": 19.25, "learning_rate": 4.037898440394591e-05, "loss": 2.1926, "step": 6650000 }, { "epoch": 19.25, "learning_rate": 4.037826075629863e-05, "loss": 2.1843, "step": 6650500 }, { "epoch": 19.25, "learning_rate": 4.0377537108651354e-05, "loss": 2.1763, "step": 6651000 }, { "epoch": 19.25, "learning_rate": 4.0376813461004077e-05, "loss": 2.1979, "step": 6651500 }, { "epoch": 19.25, "learning_rate": 4.03760898133568e-05, "loss": 2.1776, "step": 6652000 }, { "epoch": 19.26, "learning_rate": 4.0375367613004814e-05, "loss": 2.1904, "step": 6652500 }, { "epoch": 19.26, "learning_rate": 4.037464396535754e-05, "loss": 2.1847, "step": 6653000 }, { "epoch": 19.26, "learning_rate": 4.0373920317710266e-05, "loss": 2.2326, "step": 6653500 }, { "epoch": 19.26, "learning_rate": 4.037319667006299e-05, "loss": 2.2081, "step": 6654000 }, { "epoch": 19.26, "learning_rate": 4.037247302241571e-05, "loss": 2.2012, "step": 6654500 }, { "epoch": 19.26, "learning_rate": 4.037174937476844e-05, "loss": 2.1606, "step": 6655000 }, { "epoch": 19.26, "learning_rate": 4.037102572712116e-05, "loss": 2.1812, "step": 6655500 }, { "epoch": 19.27, "learning_rate": 4.0370302079473884e-05, "loss": 2.2279, "step": 6656000 }, { "epoch": 19.27, "learning_rate": 4.03695798791219e-05, "loss": 2.2002, "step": 6656500 }, { "epoch": 19.27, "learning_rate": 4.036885623147462e-05, "loss": 2.207, "step": 6657000 }, { "epoch": 19.27, "learning_rate": 4.0368132583827344e-05, "loss": 2.1946, "step": 6657500 }, { "epoch": 19.27, "learning_rate": 4.0367408936180066e-05, "loss": 2.189, "step": 6658000 }, { "epoch": 19.27, "learning_rate": 4.036668673582809e-05, "loss": 2.1887, "step": 6658500 }, { "epoch": 19.28, "learning_rate": 4.036596308818081e-05, "loss": 2.2034, "step": 6659000 }, { "epoch": 19.28, "learning_rate": 4.0365240887828826e-05, "loss": 2.2004, "step": 6659500 }, { "epoch": 19.28, "learning_rate": 4.036451724018155e-05, "loss": 2.2219, "step": 6660000 }, { "epoch": 19.28, "learning_rate": 4.036379359253427e-05, "loss": 2.2028, "step": 6660500 }, { "epoch": 19.28, "learning_rate": 4.0363069944887e-05, "loss": 2.2232, "step": 6661000 }, { "epoch": 19.28, "learning_rate": 4.0362347744535015e-05, "loss": 2.2095, "step": 6661500 }, { "epoch": 19.28, "learning_rate": 4.036162409688774e-05, "loss": 2.2054, "step": 6662000 }, { "epoch": 19.29, "learning_rate": 4.0360900449240466e-05, "loss": 2.2042, "step": 6662500 }, { "epoch": 19.29, "learning_rate": 4.036017680159319e-05, "loss": 2.2078, "step": 6663000 }, { "epoch": 19.29, "learning_rate": 4.035945315394591e-05, "loss": 2.2, "step": 6663500 }, { "epoch": 19.29, "learning_rate": 4.0358730953593926e-05, "loss": 2.195, "step": 6664000 }, { "epoch": 19.29, "learning_rate": 4.035800730594665e-05, "loss": 2.1939, "step": 6664500 }, { "epoch": 19.29, "learning_rate": 4.035728365829937e-05, "loss": 2.1852, "step": 6665000 }, { "epoch": 19.29, "learning_rate": 4.035656001065209e-05, "loss": 2.1952, "step": 6665500 }, { "epoch": 19.3, "learning_rate": 4.0355836363004815e-05, "loss": 2.1878, "step": 6666000 }, { "epoch": 19.3, "learning_rate": 4.035511271535754e-05, "loss": 2.1727, "step": 6666500 }, { "epoch": 19.3, "learning_rate": 4.0354389067710266e-05, "loss": 2.2001, "step": 6667000 }, { "epoch": 19.3, "learning_rate": 4.035366686735828e-05, "loss": 2.1879, "step": 6667500 }, { "epoch": 19.3, "learning_rate": 4.0352943219711004e-05, "loss": 2.187, "step": 6668000 }, { "epoch": 19.3, "learning_rate": 4.035221957206373e-05, "loss": 2.1977, "step": 6668500 }, { "epoch": 19.3, "learning_rate": 4.0351495924416455e-05, "loss": 2.2013, "step": 6669000 }, { "epoch": 19.31, "learning_rate": 4.035077227676918e-05, "loss": 2.2012, "step": 6669500 }, { "epoch": 19.31, "learning_rate": 4.03500486291219e-05, "loss": 2.218, "step": 6670000 }, { "epoch": 19.31, "learning_rate": 4.034932498147462e-05, "loss": 2.1839, "step": 6670500 }, { "epoch": 19.31, "learning_rate": 4.0348601333827344e-05, "loss": 2.1914, "step": 6671000 }, { "epoch": 19.31, "learning_rate": 4.034787768618007e-05, "loss": 2.1932, "step": 6671500 }, { "epoch": 19.31, "learning_rate": 4.034715403853279e-05, "loss": 2.1835, "step": 6672000 }, { "epoch": 19.31, "learning_rate": 4.034643039088552e-05, "loss": 2.2225, "step": 6672500 }, { "epoch": 19.32, "learning_rate": 4.0345708190533533e-05, "loss": 2.1971, "step": 6673000 }, { "epoch": 19.32, "learning_rate": 4.0344984542886256e-05, "loss": 2.2181, "step": 6673500 }, { "epoch": 19.32, "learning_rate": 4.034426089523898e-05, "loss": 2.2077, "step": 6674000 }, { "epoch": 19.32, "learning_rate": 4.03435372475917e-05, "loss": 2.1748, "step": 6674500 }, { "epoch": 19.32, "learning_rate": 4.034281359994442e-05, "loss": 2.1993, "step": 6675000 }, { "epoch": 19.32, "learning_rate": 4.0342089952297145e-05, "loss": 2.2028, "step": 6675500 }, { "epoch": 19.32, "learning_rate": 4.034136775194517e-05, "loss": 2.1766, "step": 6676000 }, { "epoch": 19.33, "learning_rate": 4.034064410429789e-05, "loss": 2.2037, "step": 6676500 }, { "epoch": 19.33, "learning_rate": 4.033992045665062e-05, "loss": 2.1895, "step": 6677000 }, { "epoch": 19.33, "learning_rate": 4.033919680900334e-05, "loss": 2.2126, "step": 6677500 }, { "epoch": 19.33, "learning_rate": 4.0338474608651356e-05, "loss": 2.1948, "step": 6678000 }, { "epoch": 19.33, "learning_rate": 4.033775096100408e-05, "loss": 2.1996, "step": 6678500 }, { "epoch": 19.33, "learning_rate": 4.03370273133568e-05, "loss": 2.1799, "step": 6679000 }, { "epoch": 19.33, "learning_rate": 4.0336305113004816e-05, "loss": 2.1976, "step": 6679500 }, { "epoch": 19.34, "learning_rate": 4.033558146535754e-05, "loss": 2.2098, "step": 6680000 }, { "epoch": 19.34, "learning_rate": 4.033485781771027e-05, "loss": 2.2152, "step": 6680500 }, { "epoch": 19.34, "learning_rate": 4.033413417006299e-05, "loss": 2.1822, "step": 6681000 }, { "epoch": 19.34, "learning_rate": 4.033341052241571e-05, "loss": 2.1933, "step": 6681500 }, { "epoch": 19.34, "learning_rate": 4.0332686874768434e-05, "loss": 2.1993, "step": 6682000 }, { "epoch": 19.34, "learning_rate": 4.0331963227121156e-05, "loss": 2.213, "step": 6682500 }, { "epoch": 19.34, "learning_rate": 4.033123957947388e-05, "loss": 2.2035, "step": 6683000 }, { "epoch": 19.35, "learning_rate": 4.033051593182661e-05, "loss": 2.2032, "step": 6683500 }, { "epoch": 19.35, "learning_rate": 4.032979228417933e-05, "loss": 2.1806, "step": 6684000 }, { "epoch": 19.35, "learning_rate": 4.032906863653205e-05, "loss": 2.1933, "step": 6684500 }, { "epoch": 19.35, "learning_rate": 4.0328344988884774e-05, "loss": 2.2224, "step": 6685000 }, { "epoch": 19.35, "learning_rate": 4.0327621341237496e-05, "loss": 2.205, "step": 6685500 }, { "epoch": 19.35, "learning_rate": 4.032689914088552e-05, "loss": 2.2125, "step": 6686000 }, { "epoch": 19.35, "learning_rate": 4.032617549323824e-05, "loss": 2.2165, "step": 6686500 }, { "epoch": 19.36, "learning_rate": 4.032545184559096e-05, "loss": 2.2037, "step": 6687000 }, { "epoch": 19.36, "learning_rate": 4.0324728197943685e-05, "loss": 2.2124, "step": 6687500 }, { "epoch": 19.36, "learning_rate": 4.03240059975917e-05, "loss": 2.1957, "step": 6688000 }, { "epoch": 19.36, "learning_rate": 4.032328234994442e-05, "loss": 2.1977, "step": 6688500 }, { "epoch": 19.36, "learning_rate": 4.0322558702297145e-05, "loss": 2.1854, "step": 6689000 }, { "epoch": 19.36, "learning_rate": 4.032183505464987e-05, "loss": 2.1769, "step": 6689500 }, { "epoch": 19.36, "learning_rate": 4.032111140700259e-05, "loss": 2.1957, "step": 6690000 }, { "epoch": 19.37, "learning_rate": 4.032038775935532e-05, "loss": 2.1904, "step": 6690500 }, { "epoch": 19.37, "learning_rate": 4.031966555900334e-05, "loss": 2.1974, "step": 6691000 }, { "epoch": 19.37, "learning_rate": 4.0318941911356063e-05, "loss": 2.2067, "step": 6691500 }, { "epoch": 19.37, "learning_rate": 4.0318218263708786e-05, "loss": 2.1782, "step": 6692000 }, { "epoch": 19.37, "learning_rate": 4.031749461606151e-05, "loss": 2.1948, "step": 6692500 }, { "epoch": 19.37, "learning_rate": 4.031677096841423e-05, "loss": 2.1947, "step": 6693000 }, { "epoch": 19.37, "learning_rate": 4.031604732076695e-05, "loss": 2.1972, "step": 6693500 }, { "epoch": 19.38, "learning_rate": 4.0315323673119675e-05, "loss": 2.1981, "step": 6694000 }, { "epoch": 19.38, "learning_rate": 4.03146000254724e-05, "loss": 2.2073, "step": 6694500 }, { "epoch": 19.38, "learning_rate": 4.031387637782512e-05, "loss": 2.2195, "step": 6695000 }, { "epoch": 19.38, "learning_rate": 4.0313155624768435e-05, "loss": 2.195, "step": 6695500 }, { "epoch": 19.38, "learning_rate": 4.031243197712116e-05, "loss": 2.2027, "step": 6696000 }, { "epoch": 19.38, "learning_rate": 4.031170832947388e-05, "loss": 2.2058, "step": 6696500 }, { "epoch": 19.39, "learning_rate": 4.03109846818266e-05, "loss": 2.1848, "step": 6697000 }, { "epoch": 19.39, "learning_rate": 4.0310261034179324e-05, "loss": 2.1989, "step": 6697500 }, { "epoch": 19.39, "learning_rate": 4.0309538833827346e-05, "loss": 2.2232, "step": 6698000 }, { "epoch": 19.39, "learning_rate": 4.030881518618007e-05, "loss": 2.19, "step": 6698500 }, { "epoch": 19.39, "learning_rate": 4.03080915385328e-05, "loss": 2.1857, "step": 6699000 }, { "epoch": 19.39, "learning_rate": 4.030736789088552e-05, "loss": 2.2108, "step": 6699500 }, { "epoch": 19.39, "learning_rate": 4.030664424323824e-05, "loss": 2.1965, "step": 6700000 }, { "epoch": 19.4, "learning_rate": 4.0305920595590964e-05, "loss": 2.2032, "step": 6700500 }, { "epoch": 19.4, "learning_rate": 4.0305196947943686e-05, "loss": 2.2082, "step": 6701000 }, { "epoch": 19.4, "learning_rate": 4.030447330029641e-05, "loss": 2.1999, "step": 6701500 }, { "epoch": 19.4, "learning_rate": 4.0303751099944424e-05, "loss": 2.2083, "step": 6702000 }, { "epoch": 19.4, "learning_rate": 4.0303027452297146e-05, "loss": 2.1802, "step": 6702500 }, { "epoch": 19.4, "learning_rate": 4.030230380464987e-05, "loss": 2.195, "step": 6703000 }, { "epoch": 19.4, "learning_rate": 4.030158160429789e-05, "loss": 2.1774, "step": 6703500 }, { "epoch": 19.41, "learning_rate": 4.030085795665061e-05, "loss": 2.1838, "step": 6704000 }, { "epoch": 19.41, "learning_rate": 4.0300134309003335e-05, "loss": 2.2008, "step": 6704500 }, { "epoch": 19.41, "learning_rate": 4.029941066135606e-05, "loss": 2.1958, "step": 6705000 }, { "epoch": 19.41, "learning_rate": 4.029868701370878e-05, "loss": 2.2014, "step": 6705500 }, { "epoch": 19.41, "learning_rate": 4.029796336606151e-05, "loss": 2.2067, "step": 6706000 }, { "epoch": 19.41, "learning_rate": 4.029723971841423e-05, "loss": 2.1841, "step": 6706500 }, { "epoch": 19.41, "learning_rate": 4.029651607076695e-05, "loss": 2.1804, "step": 6707000 }, { "epoch": 19.42, "learning_rate": 4.029579531771027e-05, "loss": 2.1914, "step": 6707500 }, { "epoch": 19.42, "learning_rate": 4.029507167006299e-05, "loss": 2.1927, "step": 6708000 }, { "epoch": 19.42, "learning_rate": 4.029434802241571e-05, "loss": 2.2043, "step": 6708500 }, { "epoch": 19.42, "learning_rate": 4.0293624374768436e-05, "loss": 2.2168, "step": 6709000 }, { "epoch": 19.42, "learning_rate": 4.029290072712116e-05, "loss": 2.1887, "step": 6709500 }, { "epoch": 19.42, "learning_rate": 4.029217707947388e-05, "loss": 2.2023, "step": 6710000 }, { "epoch": 19.42, "learning_rate": 4.0291454879121896e-05, "loss": 2.2045, "step": 6710500 }, { "epoch": 19.43, "learning_rate": 4.029073123147462e-05, "loss": 2.209, "step": 6711000 }, { "epoch": 19.43, "learning_rate": 4.029000758382735e-05, "loss": 2.1897, "step": 6711500 }, { "epoch": 19.43, "learning_rate": 4.028928393618007e-05, "loss": 2.1852, "step": 6712000 }, { "epoch": 19.43, "learning_rate": 4.028856028853279e-05, "loss": 2.1949, "step": 6712500 }, { "epoch": 19.43, "learning_rate": 4.0287836640885513e-05, "loss": 2.2071, "step": 6713000 }, { "epoch": 19.43, "learning_rate": 4.028711299323824e-05, "loss": 2.2086, "step": 6713500 }, { "epoch": 19.43, "learning_rate": 4.0286389345590965e-05, "loss": 2.2093, "step": 6714000 }, { "epoch": 19.44, "learning_rate": 4.028566569794369e-05, "loss": 2.189, "step": 6714500 }, { "epoch": 19.44, "learning_rate": 4.028494205029641e-05, "loss": 2.2057, "step": 6715000 }, { "epoch": 19.44, "learning_rate": 4.028421840264913e-05, "loss": 2.1859, "step": 6715500 }, { "epoch": 19.44, "learning_rate": 4.0283494755001854e-05, "loss": 2.1977, "step": 6716000 }, { "epoch": 19.44, "learning_rate": 4.0282771107354576e-05, "loss": 2.1706, "step": 6716500 }, { "epoch": 19.44, "learning_rate": 4.02820474597073e-05, "loss": 2.1653, "step": 6717000 }, { "epoch": 19.44, "learning_rate": 4.028132381206002e-05, "loss": 2.2289, "step": 6717500 }, { "epoch": 19.45, "learning_rate": 4.028060016441275e-05, "loss": 2.1964, "step": 6718000 }, { "epoch": 19.45, "learning_rate": 4.0279877964060765e-05, "loss": 2.2091, "step": 6718500 }, { "epoch": 19.45, "learning_rate": 4.027915431641349e-05, "loss": 2.2033, "step": 6719000 }, { "epoch": 19.45, "learning_rate": 4.027843066876621e-05, "loss": 2.2245, "step": 6719500 }, { "epoch": 19.45, "learning_rate": 4.027770702111893e-05, "loss": 2.1863, "step": 6720000 }, { "epoch": 19.45, "learning_rate": 4.027698337347166e-05, "loss": 2.2144, "step": 6720500 }, { "epoch": 19.45, "learning_rate": 4.0276261173119676e-05, "loss": 2.19, "step": 6721000 }, { "epoch": 19.46, "learning_rate": 4.02755375254724e-05, "loss": 2.2027, "step": 6721500 }, { "epoch": 19.46, "learning_rate": 4.027481387782512e-05, "loss": 2.1907, "step": 6722000 }, { "epoch": 19.46, "learning_rate": 4.027409023017785e-05, "loss": 2.2343, "step": 6722500 }, { "epoch": 19.46, "learning_rate": 4.027336658253057e-05, "loss": 2.2079, "step": 6723000 }, { "epoch": 19.46, "learning_rate": 4.027264438217859e-05, "loss": 2.2208, "step": 6723500 }, { "epoch": 19.46, "learning_rate": 4.027192073453131e-05, "loss": 2.1946, "step": 6724000 }, { "epoch": 19.46, "learning_rate": 4.027119708688403e-05, "loss": 2.211, "step": 6724500 }, { "epoch": 19.47, "learning_rate": 4.0270473439236754e-05, "loss": 2.1931, "step": 6725000 }, { "epoch": 19.47, "learning_rate": 4.026975123888477e-05, "loss": 2.1996, "step": 6725500 }, { "epoch": 19.47, "learning_rate": 4.026902903853279e-05, "loss": 2.186, "step": 6726000 }, { "epoch": 19.47, "learning_rate": 4.0268305390885514e-05, "loss": 2.1992, "step": 6726500 }, { "epoch": 19.47, "learning_rate": 4.0267581743238237e-05, "loss": 2.1925, "step": 6727000 }, { "epoch": 19.47, "learning_rate": 4.026685809559096e-05, "loss": 2.2058, "step": 6727500 }, { "epoch": 19.47, "learning_rate": 4.026613444794369e-05, "loss": 2.1998, "step": 6728000 }, { "epoch": 19.48, "learning_rate": 4.02654122475917e-05, "loss": 2.1805, "step": 6728500 }, { "epoch": 19.48, "learning_rate": 4.0264688599944426e-05, "loss": 2.1829, "step": 6729000 }, { "epoch": 19.48, "learning_rate": 4.026396495229715e-05, "loss": 2.19, "step": 6729500 }, { "epoch": 19.48, "learning_rate": 4.026324130464988e-05, "loss": 2.1806, "step": 6730000 }, { "epoch": 19.48, "learning_rate": 4.02625176570026e-05, "loss": 2.2078, "step": 6730500 }, { "epoch": 19.48, "learning_rate": 4.026179400935532e-05, "loss": 2.1777, "step": 6731000 }, { "epoch": 19.48, "learning_rate": 4.0261070361708043e-05, "loss": 2.2134, "step": 6731500 }, { "epoch": 19.49, "learning_rate": 4.0260346714060766e-05, "loss": 2.1964, "step": 6732000 }, { "epoch": 19.49, "learning_rate": 4.025962306641349e-05, "loss": 2.1955, "step": 6732500 }, { "epoch": 19.49, "learning_rate": 4.025889941876621e-05, "loss": 2.1721, "step": 6733000 }, { "epoch": 19.49, "learning_rate": 4.025817577111893e-05, "loss": 2.2037, "step": 6733500 }, { "epoch": 19.49, "learning_rate": 4.0257452123471655e-05, "loss": 2.1858, "step": 6734000 }, { "epoch": 19.49, "learning_rate": 4.025672847582438e-05, "loss": 2.1963, "step": 6734500 }, { "epoch": 19.5, "learning_rate": 4.02560048281771e-05, "loss": 2.1717, "step": 6735000 }, { "epoch": 19.5, "learning_rate": 4.025528118052983e-05, "loss": 2.1703, "step": 6735500 }, { "epoch": 19.5, "learning_rate": 4.025455753288255e-05, "loss": 2.1896, "step": 6736000 }, { "epoch": 19.5, "learning_rate": 4.025383533253057e-05, "loss": 2.1724, "step": 6736500 }, { "epoch": 19.5, "learning_rate": 4.025311313217859e-05, "loss": 2.2151, "step": 6737000 }, { "epoch": 19.5, "learning_rate": 4.025238948453131e-05, "loss": 2.2026, "step": 6737500 }, { "epoch": 19.5, "learning_rate": 4.025166583688403e-05, "loss": 2.1847, "step": 6738000 }, { "epoch": 19.51, "learning_rate": 4.0250942189236755e-05, "loss": 2.2017, "step": 6738500 }, { "epoch": 19.51, "learning_rate": 4.025021854158948e-05, "loss": 2.2132, "step": 6739000 }, { "epoch": 19.51, "learning_rate": 4.02494948939422e-05, "loss": 2.1742, "step": 6739500 }, { "epoch": 19.51, "learning_rate": 4.024877124629493e-05, "loss": 2.1921, "step": 6740000 }, { "epoch": 19.51, "learning_rate": 4.024804759864765e-05, "loss": 2.2057, "step": 6740500 }, { "epoch": 19.51, "learning_rate": 4.024732395100037e-05, "loss": 2.2099, "step": 6741000 }, { "epoch": 19.51, "learning_rate": 4.0246600303353095e-05, "loss": 2.2007, "step": 6741500 }, { "epoch": 19.52, "learning_rate": 4.024587665570582e-05, "loss": 2.1931, "step": 6742000 }, { "epoch": 19.52, "learning_rate": 4.0245153008058546e-05, "loss": 2.2025, "step": 6742500 }, { "epoch": 19.52, "learning_rate": 4.024443080770656e-05, "loss": 2.1922, "step": 6743000 }, { "epoch": 19.52, "learning_rate": 4.024371005464988e-05, "loss": 2.1923, "step": 6743500 }, { "epoch": 19.52, "learning_rate": 4.02429864070026e-05, "loss": 2.1979, "step": 6744000 }, { "epoch": 19.52, "learning_rate": 4.024226275935532e-05, "loss": 2.2192, "step": 6744500 }, { "epoch": 19.52, "learning_rate": 4.0241539111708044e-05, "loss": 2.2139, "step": 6745000 }, { "epoch": 19.53, "learning_rate": 4.0240815464060767e-05, "loss": 2.1979, "step": 6745500 }, { "epoch": 19.53, "learning_rate": 4.024009181641349e-05, "loss": 2.2098, "step": 6746000 }, { "epoch": 19.53, "learning_rate": 4.023936816876621e-05, "loss": 2.1757, "step": 6746500 }, { "epoch": 19.53, "learning_rate": 4.0238645968414227e-05, "loss": 2.1977, "step": 6747000 }, { "epoch": 19.53, "learning_rate": 4.023792232076695e-05, "loss": 2.1954, "step": 6747500 }, { "epoch": 19.53, "learning_rate": 4.023719867311968e-05, "loss": 2.2057, "step": 6748000 }, { "epoch": 19.53, "learning_rate": 4.02364750254724e-05, "loss": 2.195, "step": 6748500 }, { "epoch": 19.54, "learning_rate": 4.023575137782512e-05, "loss": 2.184, "step": 6749000 }, { "epoch": 19.54, "learning_rate": 4.0235027730177844e-05, "loss": 2.1932, "step": 6749500 }, { "epoch": 19.54, "learning_rate": 4.023430408253057e-05, "loss": 2.1906, "step": 6750000 }, { "epoch": 19.54, "learning_rate": 4.0233580434883296e-05, "loss": 2.2255, "step": 6750500 }, { "epoch": 19.54, "learning_rate": 4.023285678723602e-05, "loss": 2.1901, "step": 6751000 }, { "epoch": 19.54, "learning_rate": 4.0232134586884034e-05, "loss": 2.218, "step": 6751500 }, { "epoch": 19.54, "learning_rate": 4.0231410939236756e-05, "loss": 2.1764, "step": 6752000 }, { "epoch": 19.55, "learning_rate": 4.023068873888478e-05, "loss": 2.21, "step": 6752500 }, { "epoch": 19.55, "learning_rate": 4.02299650912375e-05, "loss": 2.216, "step": 6753000 }, { "epoch": 19.55, "learning_rate": 4.022924144359022e-05, "loss": 2.1812, "step": 6753500 }, { "epoch": 19.55, "learning_rate": 4.0228517795942945e-05, "loss": 2.1996, "step": 6754000 }, { "epoch": 19.55, "learning_rate": 4.022779559559096e-05, "loss": 2.1841, "step": 6754500 }, { "epoch": 19.55, "learning_rate": 4.0227073395238976e-05, "loss": 2.2002, "step": 6755000 }, { "epoch": 19.55, "learning_rate": 4.0226349747591705e-05, "loss": 2.208, "step": 6755500 }, { "epoch": 19.56, "learning_rate": 4.022562609994443e-05, "loss": 2.2125, "step": 6756000 }, { "epoch": 19.56, "learning_rate": 4.022490245229715e-05, "loss": 2.2029, "step": 6756500 }, { "epoch": 19.56, "learning_rate": 4.022417880464987e-05, "loss": 2.1934, "step": 6757000 }, { "epoch": 19.56, "learning_rate": 4.0223455157002594e-05, "loss": 2.165, "step": 6757500 }, { "epoch": 19.56, "learning_rate": 4.022273150935532e-05, "loss": 2.1966, "step": 6758000 }, { "epoch": 19.56, "learning_rate": 4.0222007861708045e-05, "loss": 2.2091, "step": 6758500 }, { "epoch": 19.56, "learning_rate": 4.022128421406077e-05, "loss": 2.2038, "step": 6759000 }, { "epoch": 19.57, "learning_rate": 4.022056201370878e-05, "loss": 2.1732, "step": 6759500 }, { "epoch": 19.57, "learning_rate": 4.0219838366061505e-05, "loss": 2.1909, "step": 6760000 }, { "epoch": 19.57, "learning_rate": 4.021911471841423e-05, "loss": 2.1956, "step": 6760500 }, { "epoch": 19.57, "learning_rate": 4.021839251806225e-05, "loss": 2.1845, "step": 6761000 }, { "epoch": 19.57, "learning_rate": 4.021766887041497e-05, "loss": 2.2145, "step": 6761500 }, { "epoch": 19.57, "learning_rate": 4.0216945222767694e-05, "loss": 2.2101, "step": 6762000 }, { "epoch": 19.57, "learning_rate": 4.0216221575120416e-05, "loss": 2.2059, "step": 6762500 }, { "epoch": 19.58, "learning_rate": 4.021549792747314e-05, "loss": 2.2105, "step": 6763000 }, { "epoch": 19.58, "learning_rate": 4.0214775727121154e-05, "loss": 2.1938, "step": 6763500 }, { "epoch": 19.58, "learning_rate": 4.0214053526769176e-05, "loss": 2.193, "step": 6764000 }, { "epoch": 19.58, "learning_rate": 4.02133298791219e-05, "loss": 2.1882, "step": 6764500 }, { "epoch": 19.58, "learning_rate": 4.021260623147462e-05, "loss": 2.1961, "step": 6765000 }, { "epoch": 19.58, "learning_rate": 4.021188258382734e-05, "loss": 2.1918, "step": 6765500 }, { "epoch": 19.58, "learning_rate": 4.021115893618007e-05, "loss": 2.2056, "step": 6766000 }, { "epoch": 19.59, "learning_rate": 4.0210435288532794e-05, "loss": 2.1806, "step": 6766500 }, { "epoch": 19.59, "learning_rate": 4.020971164088552e-05, "loss": 2.2175, "step": 6767000 }, { "epoch": 19.59, "learning_rate": 4.020898799323824e-05, "loss": 2.1912, "step": 6767500 }, { "epoch": 19.59, "learning_rate": 4.020826434559096e-05, "loss": 2.2138, "step": 6768000 }, { "epoch": 19.59, "learning_rate": 4.020754069794368e-05, "loss": 2.2036, "step": 6768500 }, { "epoch": 19.59, "learning_rate": 4.0206817050296406e-05, "loss": 2.183, "step": 6769000 }, { "epoch": 19.59, "learning_rate": 4.020609340264913e-05, "loss": 2.1992, "step": 6769500 }, { "epoch": 19.6, "learning_rate": 4.020536975500186e-05, "loss": 2.209, "step": 6770000 }, { "epoch": 19.6, "learning_rate": 4.020464755464987e-05, "loss": 2.1808, "step": 6770500 }, { "epoch": 19.6, "learning_rate": 4.0203923907002595e-05, "loss": 2.1888, "step": 6771000 }, { "epoch": 19.6, "learning_rate": 4.020320025935532e-05, "loss": 2.207, "step": 6771500 }, { "epoch": 19.6, "learning_rate": 4.020247661170804e-05, "loss": 2.2042, "step": 6772000 }, { "epoch": 19.6, "learning_rate": 4.0201754411356055e-05, "loss": 2.1934, "step": 6772500 }, { "epoch": 19.61, "learning_rate": 4.020103076370878e-05, "loss": 2.2191, "step": 6773000 }, { "epoch": 19.61, "learning_rate": 4.0200307116061506e-05, "loss": 2.1844, "step": 6773500 }, { "epoch": 19.61, "learning_rate": 4.019958346841423e-05, "loss": 2.21, "step": 6774000 }, { "epoch": 19.61, "learning_rate": 4.019885982076696e-05, "loss": 2.1987, "step": 6774500 }, { "epoch": 19.61, "learning_rate": 4.019813617311968e-05, "loss": 2.2102, "step": 6775000 }, { "epoch": 19.61, "learning_rate": 4.01974125254724e-05, "loss": 2.1867, "step": 6775500 }, { "epoch": 19.61, "learning_rate": 4.019669032512042e-05, "loss": 2.2003, "step": 6776000 }, { "epoch": 19.62, "learning_rate": 4.0195971019359026e-05, "loss": 2.2015, "step": 6776500 }, { "epoch": 19.62, "learning_rate": 4.019524737171175e-05, "loss": 2.2109, "step": 6777000 }, { "epoch": 19.62, "learning_rate": 4.019452372406447e-05, "loss": 2.2223, "step": 6777500 }, { "epoch": 19.62, "learning_rate": 4.019380007641719e-05, "loss": 2.1982, "step": 6778000 }, { "epoch": 19.62, "learning_rate": 4.0193076428769915e-05, "loss": 2.1773, "step": 6778500 }, { "epoch": 19.62, "learning_rate": 4.019235278112264e-05, "loss": 2.2017, "step": 6779000 }, { "epoch": 19.62, "learning_rate": 4.019163058077065e-05, "loss": 2.2142, "step": 6779500 }, { "epoch": 19.63, "learning_rate": 4.019090693312338e-05, "loss": 2.1963, "step": 6780000 }, { "epoch": 19.63, "learning_rate": 4.0190183285476104e-05, "loss": 2.1852, "step": 6780500 }, { "epoch": 19.63, "learning_rate": 4.018945963782883e-05, "loss": 2.1988, "step": 6781000 }, { "epoch": 19.63, "learning_rate": 4.0188735990181555e-05, "loss": 2.1826, "step": 6781500 }, { "epoch": 19.63, "learning_rate": 4.018801234253428e-05, "loss": 2.2059, "step": 6782000 }, { "epoch": 19.63, "learning_rate": 4.0187288694887e-05, "loss": 2.2237, "step": 6782500 }, { "epoch": 19.63, "learning_rate": 4.018656504723972e-05, "loss": 2.1972, "step": 6783000 }, { "epoch": 19.64, "learning_rate": 4.0185841399592444e-05, "loss": 2.1832, "step": 6783500 }, { "epoch": 19.64, "learning_rate": 4.0185117751945167e-05, "loss": 2.1942, "step": 6784000 }, { "epoch": 19.64, "learning_rate": 4.018439410429789e-05, "loss": 2.2242, "step": 6784500 }, { "epoch": 19.64, "learning_rate": 4.018367045665061e-05, "loss": 2.22, "step": 6785000 }, { "epoch": 19.64, "learning_rate": 4.018294680900333e-05, "loss": 2.2027, "step": 6785500 }, { "epoch": 19.64, "learning_rate": 4.0182223161356055e-05, "loss": 2.1904, "step": 6786000 }, { "epoch": 19.64, "learning_rate": 4.0181499513708784e-05, "loss": 2.1912, "step": 6786500 }, { "epoch": 19.65, "learning_rate": 4.018077586606151e-05, "loss": 2.1829, "step": 6787000 }, { "epoch": 19.65, "learning_rate": 4.018005366570952e-05, "loss": 2.1843, "step": 6787500 }, { "epoch": 19.65, "learning_rate": 4.0179330018062244e-05, "loss": 2.2278, "step": 6788000 }, { "epoch": 19.65, "learning_rate": 4.0178606370414973e-05, "loss": 2.1986, "step": 6788500 }, { "epoch": 19.65, "learning_rate": 4.0177882722767696e-05, "loss": 2.2172, "step": 6789000 }, { "epoch": 19.65, "learning_rate": 4.017715907512042e-05, "loss": 2.2212, "step": 6789500 }, { "epoch": 19.65, "learning_rate": 4.017643542747314e-05, "loss": 2.2027, "step": 6790000 }, { "epoch": 19.66, "learning_rate": 4.0175713227121156e-05, "loss": 2.208, "step": 6790500 }, { "epoch": 19.66, "learning_rate": 4.017499102676918e-05, "loss": 2.2088, "step": 6791000 }, { "epoch": 19.66, "learning_rate": 4.01742673791219e-05, "loss": 2.207, "step": 6791500 }, { "epoch": 19.66, "learning_rate": 4.017354373147462e-05, "loss": 2.1959, "step": 6792000 }, { "epoch": 19.66, "learning_rate": 4.0172820083827345e-05, "loss": 2.1899, "step": 6792500 }, { "epoch": 19.66, "learning_rate": 4.017209643618007e-05, "loss": 2.2128, "step": 6793000 }, { "epoch": 19.66, "learning_rate": 4.017137423582808e-05, "loss": 2.2035, "step": 6793500 }, { "epoch": 19.67, "learning_rate": 4.0170650588180805e-05, "loss": 2.1831, "step": 6794000 }, { "epoch": 19.67, "learning_rate": 4.0169926940533534e-05, "loss": 2.2129, "step": 6794500 }, { "epoch": 19.67, "learning_rate": 4.0169203292886256e-05, "loss": 2.197, "step": 6795000 }, { "epoch": 19.67, "learning_rate": 4.016847964523898e-05, "loss": 2.1835, "step": 6795500 }, { "epoch": 19.67, "learning_rate": 4.016775599759171e-05, "loss": 2.1974, "step": 6796000 }, { "epoch": 19.67, "learning_rate": 4.016703234994443e-05, "loss": 2.212, "step": 6796500 }, { "epoch": 19.67, "learning_rate": 4.016630870229715e-05, "loss": 2.2273, "step": 6797000 }, { "epoch": 19.68, "learning_rate": 4.0165585054649874e-05, "loss": 2.2138, "step": 6797500 }, { "epoch": 19.68, "learning_rate": 4.0164861407002596e-05, "loss": 2.2346, "step": 6798000 }, { "epoch": 19.68, "learning_rate": 4.016413775935532e-05, "loss": 2.1942, "step": 6798500 }, { "epoch": 19.68, "learning_rate": 4.016341411170804e-05, "loss": 2.1815, "step": 6799000 }, { "epoch": 19.68, "learning_rate": 4.016269046406076e-05, "loss": 2.2309, "step": 6799500 }, { "epoch": 19.68, "learning_rate": 4.0161966816413485e-05, "loss": 2.1963, "step": 6800000 }, { "epoch": 19.68, "learning_rate": 4.016124316876621e-05, "loss": 2.2142, "step": 6800500 }, { "epoch": 19.69, "learning_rate": 4.016052096841423e-05, "loss": 2.2109, "step": 6801000 }, { "epoch": 19.69, "learning_rate": 4.015979732076695e-05, "loss": 2.1888, "step": 6801500 }, { "epoch": 19.69, "learning_rate": 4.0159073673119674e-05, "loss": 2.2088, "step": 6802000 }, { "epoch": 19.69, "learning_rate": 4.015835147276769e-05, "loss": 2.1671, "step": 6802500 }, { "epoch": 19.69, "learning_rate": 4.015762782512042e-05, "loss": 2.2049, "step": 6803000 }, { "epoch": 19.69, "learning_rate": 4.0156905624768434e-05, "loss": 2.2036, "step": 6803500 }, { "epoch": 19.69, "learning_rate": 4.015618197712116e-05, "loss": 2.2175, "step": 6804000 }, { "epoch": 19.7, "learning_rate": 4.0155458329473886e-05, "loss": 2.1838, "step": 6804500 }, { "epoch": 19.7, "learning_rate": 4.015473468182661e-05, "loss": 2.2199, "step": 6805000 }, { "epoch": 19.7, "learning_rate": 4.015401103417933e-05, "loss": 2.2231, "step": 6805500 }, { "epoch": 19.7, "learning_rate": 4.015328738653205e-05, "loss": 2.2181, "step": 6806000 }, { "epoch": 19.7, "learning_rate": 4.0152563738884774e-05, "loss": 2.1993, "step": 6806500 }, { "epoch": 19.7, "learning_rate": 4.01518400912375e-05, "loss": 2.1912, "step": 6807000 }, { "epoch": 19.7, "learning_rate": 4.015111789088551e-05, "loss": 2.2173, "step": 6807500 }, { "epoch": 19.71, "learning_rate": 4.0150394243238235e-05, "loss": 2.2026, "step": 6808000 }, { "epoch": 19.71, "learning_rate": 4.0149670595590964e-05, "loss": 2.2089, "step": 6808500 }, { "epoch": 19.71, "learning_rate": 4.0148946947943686e-05, "loss": 2.1945, "step": 6809000 }, { "epoch": 19.71, "learning_rate": 4.014822330029641e-05, "loss": 2.1993, "step": 6809500 }, { "epoch": 19.71, "learning_rate": 4.014749965264913e-05, "loss": 2.1954, "step": 6810000 }, { "epoch": 19.71, "learning_rate": 4.014677600500186e-05, "loss": 2.1989, "step": 6810500 }, { "epoch": 19.72, "learning_rate": 4.014605235735458e-05, "loss": 2.2043, "step": 6811000 }, { "epoch": 19.72, "learning_rate": 4.01453301570026e-05, "loss": 2.1943, "step": 6811500 }, { "epoch": 19.72, "learning_rate": 4.014460795665061e-05, "loss": 2.2147, "step": 6812000 }, { "epoch": 19.72, "learning_rate": 4.0143884309003335e-05, "loss": 2.2199, "step": 6812500 }, { "epoch": 19.72, "learning_rate": 4.0143160661356064e-05, "loss": 2.2133, "step": 6813000 }, { "epoch": 19.72, "learning_rate": 4.0142437013708786e-05, "loss": 2.211, "step": 6813500 }, { "epoch": 19.72, "learning_rate": 4.014171336606151e-05, "loss": 2.2018, "step": 6814000 }, { "epoch": 19.73, "learning_rate": 4.014098971841423e-05, "loss": 2.2019, "step": 6814500 }, { "epoch": 19.73, "learning_rate": 4.014026607076695e-05, "loss": 2.1805, "step": 6815000 }, { "epoch": 19.73, "learning_rate": 4.0139542423119675e-05, "loss": 2.2068, "step": 6815500 }, { "epoch": 19.73, "learning_rate": 4.01388187754724e-05, "loss": 2.2421, "step": 6816000 }, { "epoch": 19.73, "learning_rate": 4.013809512782512e-05, "loss": 2.1797, "step": 6816500 }, { "epoch": 19.73, "learning_rate": 4.013737148017784e-05, "loss": 2.1602, "step": 6817000 }, { "epoch": 19.73, "learning_rate": 4.0136647832530564e-05, "loss": 2.1954, "step": 6817500 }, { "epoch": 19.74, "learning_rate": 4.0135925632178586e-05, "loss": 2.1853, "step": 6818000 }, { "epoch": 19.74, "learning_rate": 4.0135201984531315e-05, "loss": 2.2073, "step": 6818500 }, { "epoch": 19.74, "learning_rate": 4.013447833688404e-05, "loss": 2.1894, "step": 6819000 }, { "epoch": 19.74, "learning_rate": 4.013375468923676e-05, "loss": 2.207, "step": 6819500 }, { "epoch": 19.74, "learning_rate": 4.013303393618007e-05, "loss": 2.196, "step": 6820000 }, { "epoch": 19.74, "learning_rate": 4.013231028853279e-05, "loss": 2.1892, "step": 6820500 }, { "epoch": 19.74, "learning_rate": 4.013158664088551e-05, "loss": 2.1806, "step": 6821000 }, { "epoch": 19.75, "learning_rate": 4.0130862993238235e-05, "loss": 2.199, "step": 6821500 }, { "epoch": 19.75, "learning_rate": 4.0130139345590964e-05, "loss": 2.2203, "step": 6822000 }, { "epoch": 19.75, "learning_rate": 4.0129415697943687e-05, "loss": 2.1911, "step": 6822500 }, { "epoch": 19.75, "learning_rate": 4.01286934975917e-05, "loss": 2.1741, "step": 6823000 }, { "epoch": 19.75, "learning_rate": 4.012797129723972e-05, "loss": 2.2012, "step": 6823500 }, { "epoch": 19.75, "learning_rate": 4.012724764959244e-05, "loss": 2.2076, "step": 6824000 }, { "epoch": 19.75, "learning_rate": 4.012652400194516e-05, "loss": 2.2091, "step": 6824500 }, { "epoch": 19.76, "learning_rate": 4.0125800354297884e-05, "loss": 2.204, "step": 6825000 }, { "epoch": 19.76, "learning_rate": 4.012507670665061e-05, "loss": 2.2232, "step": 6825500 }, { "epoch": 19.76, "learning_rate": 4.0124353059003336e-05, "loss": 2.2057, "step": 6826000 }, { "epoch": 19.76, "learning_rate": 4.0123629411356065e-05, "loss": 2.1927, "step": 6826500 }, { "epoch": 19.76, "learning_rate": 4.012290576370879e-05, "loss": 2.2027, "step": 6827000 }, { "epoch": 19.76, "learning_rate": 4.0122185010652096e-05, "loss": 2.1895, "step": 6827500 }, { "epoch": 19.76, "learning_rate": 4.012146136300482e-05, "loss": 2.1875, "step": 6828000 }, { "epoch": 19.77, "learning_rate": 4.012073771535754e-05, "loss": 2.1964, "step": 6828500 }, { "epoch": 19.77, "learning_rate": 4.012001406771026e-05, "loss": 2.2251, "step": 6829000 }, { "epoch": 19.77, "learning_rate": 4.011929042006299e-05, "loss": 2.2008, "step": 6829500 }, { "epoch": 19.77, "learning_rate": 4.0118566772415714e-05, "loss": 2.2016, "step": 6830000 }, { "epoch": 19.77, "learning_rate": 4.011784457206373e-05, "loss": 2.1897, "step": 6830500 }, { "epoch": 19.77, "learning_rate": 4.011712092441645e-05, "loss": 2.2225, "step": 6831000 }, { "epoch": 19.77, "learning_rate": 4.0116397276769174e-05, "loss": 2.2002, "step": 6831500 }, { "epoch": 19.78, "learning_rate": 4.0115673629121896e-05, "loss": 2.2016, "step": 6832000 }, { "epoch": 19.78, "learning_rate": 4.011494998147462e-05, "loss": 2.1609, "step": 6832500 }, { "epoch": 19.78, "learning_rate": 4.011422633382734e-05, "loss": 2.1917, "step": 6833000 }, { "epoch": 19.78, "learning_rate": 4.011350413347536e-05, "loss": 2.1826, "step": 6833500 }, { "epoch": 19.78, "learning_rate": 4.011278048582809e-05, "loss": 2.1872, "step": 6834000 }, { "epoch": 19.78, "learning_rate": 4.0112056838180814e-05, "loss": 2.1748, "step": 6834500 }, { "epoch": 19.78, "learning_rate": 4.0111333190533536e-05, "loss": 2.2007, "step": 6835000 }, { "epoch": 19.79, "learning_rate": 4.011061099018155e-05, "loss": 2.2029, "step": 6835500 }, { "epoch": 19.79, "learning_rate": 4.0109887342534274e-05, "loss": 2.2043, "step": 6836000 }, { "epoch": 19.79, "learning_rate": 4.0109163694886996e-05, "loss": 2.1884, "step": 6836500 }, { "epoch": 19.79, "learning_rate": 4.010844004723972e-05, "loss": 2.1946, "step": 6837000 }, { "epoch": 19.79, "learning_rate": 4.010771639959244e-05, "loss": 2.2101, "step": 6837500 }, { "epoch": 19.79, "learning_rate": 4.010699275194516e-05, "loss": 2.2165, "step": 6838000 }, { "epoch": 19.79, "learning_rate": 4.010626910429789e-05, "loss": 2.2138, "step": 6838500 }, { "epoch": 19.8, "learning_rate": 4.0105545456650614e-05, "loss": 2.2036, "step": 6839000 }, { "epoch": 19.8, "learning_rate": 4.010482325629863e-05, "loss": 2.2116, "step": 6839500 }, { "epoch": 19.8, "learning_rate": 4.010409960865135e-05, "loss": 2.1896, "step": 6840000 }, { "epoch": 19.8, "learning_rate": 4.0103375961004074e-05, "loss": 2.1874, "step": 6840500 }, { "epoch": 19.8, "learning_rate": 4.01026523133568e-05, "loss": 2.1939, "step": 6841000 }, { "epoch": 19.8, "learning_rate": 4.0101928665709525e-05, "loss": 2.2027, "step": 6841500 }, { "epoch": 19.8, "learning_rate": 4.010120501806225e-05, "loss": 2.2109, "step": 6842000 }, { "epoch": 19.81, "learning_rate": 4.010048137041497e-05, "loss": 2.1994, "step": 6842500 }, { "epoch": 19.81, "learning_rate": 4.009975772276769e-05, "loss": 2.1692, "step": 6843000 }, { "epoch": 19.81, "learning_rate": 4.0099035522415714e-05, "loss": 2.1784, "step": 6843500 }, { "epoch": 19.81, "learning_rate": 4.009831187476844e-05, "loss": 2.1942, "step": 6844000 }, { "epoch": 19.81, "learning_rate": 4.009758822712116e-05, "loss": 2.2209, "step": 6844500 }, { "epoch": 19.81, "learning_rate": 4.009686457947388e-05, "loss": 2.2059, "step": 6845000 }, { "epoch": 19.81, "learning_rate": 4.0096140931826603e-05, "loss": 2.1967, "step": 6845500 }, { "epoch": 19.82, "learning_rate": 4.0095417284179326e-05, "loss": 2.2082, "step": 6846000 }, { "epoch": 19.82, "learning_rate": 4.009469363653205e-05, "loss": 2.1688, "step": 6846500 }, { "epoch": 19.82, "learning_rate": 4.009396998888477e-05, "loss": 2.2202, "step": 6847000 }, { "epoch": 19.82, "learning_rate": 4.009324778853279e-05, "loss": 2.1967, "step": 6847500 }, { "epoch": 19.82, "learning_rate": 4.009252558818081e-05, "loss": 2.1909, "step": 6848000 }, { "epoch": 19.82, "learning_rate": 4.009180194053354e-05, "loss": 2.1748, "step": 6848500 }, { "epoch": 19.83, "learning_rate": 4.009107974018155e-05, "loss": 2.1747, "step": 6849000 }, { "epoch": 19.83, "learning_rate": 4.0090356092534275e-05, "loss": 2.187, "step": 6849500 }, { "epoch": 19.83, "learning_rate": 4.0089632444887e-05, "loss": 2.2181, "step": 6850000 }, { "epoch": 19.83, "learning_rate": 4.008890879723972e-05, "loss": 2.203, "step": 6850500 }, { "epoch": 19.83, "learning_rate": 4.008818659688774e-05, "loss": 2.1954, "step": 6851000 }, { "epoch": 19.83, "learning_rate": 4.0087462949240464e-05, "loss": 2.2017, "step": 6851500 }, { "epoch": 19.83, "learning_rate": 4.0086739301593186e-05, "loss": 2.2075, "step": 6852000 }, { "epoch": 19.84, "learning_rate": 4.008601565394591e-05, "loss": 2.2293, "step": 6852500 }, { "epoch": 19.84, "learning_rate": 4.008529200629863e-05, "loss": 2.2148, "step": 6853000 }, { "epoch": 19.84, "learning_rate": 4.008456835865135e-05, "loss": 2.1706, "step": 6853500 }, { "epoch": 19.84, "learning_rate": 4.0083844711004075e-05, "loss": 2.1996, "step": 6854000 }, { "epoch": 19.84, "learning_rate": 4.00831210633568e-05, "loss": 2.2068, "step": 6854500 }, { "epoch": 19.84, "learning_rate": 4.008239741570952e-05, "loss": 2.1957, "step": 6855000 }, { "epoch": 19.84, "learning_rate": 4.008167376806224e-05, "loss": 2.2038, "step": 6855500 }, { "epoch": 19.85, "learning_rate": 4.008095012041497e-05, "loss": 2.1784, "step": 6856000 }, { "epoch": 19.85, "learning_rate": 4.008022792006299e-05, "loss": 2.1926, "step": 6856500 }, { "epoch": 19.85, "learning_rate": 4.0079504272415715e-05, "loss": 2.2158, "step": 6857000 }, { "epoch": 19.85, "learning_rate": 4.007878062476844e-05, "loss": 2.1827, "step": 6857500 }, { "epoch": 19.85, "learning_rate": 4.007805697712116e-05, "loss": 2.2085, "step": 6858000 }, { "epoch": 19.85, "learning_rate": 4.007733332947388e-05, "loss": 2.1999, "step": 6858500 }, { "epoch": 19.85, "learning_rate": 4.0076609681826604e-05, "loss": 2.2057, "step": 6859000 }, { "epoch": 19.86, "learning_rate": 4.0075886034179326e-05, "loss": 2.1739, "step": 6859500 }, { "epoch": 19.86, "learning_rate": 4.007516238653205e-05, "loss": 2.2106, "step": 6860000 }, { "epoch": 19.86, "learning_rate": 4.007444018618007e-05, "loss": 2.1874, "step": 6860500 }, { "epoch": 19.86, "learning_rate": 4.007371653853279e-05, "loss": 2.2094, "step": 6861000 }, { "epoch": 19.86, "learning_rate": 4.0072992890885515e-05, "loss": 2.1859, "step": 6861500 }, { "epoch": 19.86, "learning_rate": 4.007226924323824e-05, "loss": 2.2109, "step": 6862000 }, { "epoch": 19.86, "learning_rate": 4.007154559559096e-05, "loss": 2.1857, "step": 6862500 }, { "epoch": 19.87, "learning_rate": 4.007082194794369e-05, "loss": 2.2074, "step": 6863000 }, { "epoch": 19.87, "learning_rate": 4.007009830029641e-05, "loss": 2.193, "step": 6863500 }, { "epoch": 19.87, "learning_rate": 4.0069374652649133e-05, "loss": 2.2222, "step": 6864000 }, { "epoch": 19.87, "learning_rate": 4.0068651005001856e-05, "loss": 2.1954, "step": 6864500 }, { "epoch": 19.87, "learning_rate": 4.006792735735458e-05, "loss": 2.2066, "step": 6865000 }, { "epoch": 19.87, "learning_rate": 4.00672037097073e-05, "loss": 2.1944, "step": 6865500 }, { "epoch": 19.87, "learning_rate": 4.006648150935532e-05, "loss": 2.2019, "step": 6866000 }, { "epoch": 19.88, "learning_rate": 4.0065757861708045e-05, "loss": 2.1703, "step": 6866500 }, { "epoch": 19.88, "learning_rate": 4.006503421406077e-05, "loss": 2.198, "step": 6867000 }, { "epoch": 19.88, "learning_rate": 4.006431056641349e-05, "loss": 2.1998, "step": 6867500 }, { "epoch": 19.88, "learning_rate": 4.006358691876621e-05, "loss": 2.1927, "step": 6868000 }, { "epoch": 19.88, "learning_rate": 4.0062863271118934e-05, "loss": 2.2021, "step": 6868500 }, { "epoch": 19.88, "learning_rate": 4.0062139623471656e-05, "loss": 2.205, "step": 6869000 }, { "epoch": 19.88, "learning_rate": 4.006141597582438e-05, "loss": 2.1982, "step": 6869500 }, { "epoch": 19.89, "learning_rate": 4.0060693775472394e-05, "loss": 2.226, "step": 6870000 }, { "epoch": 19.89, "learning_rate": 4.005997012782512e-05, "loss": 2.1944, "step": 6870500 }, { "epoch": 19.89, "learning_rate": 4.0059247927473145e-05, "loss": 2.1788, "step": 6871000 }, { "epoch": 19.89, "learning_rate": 4.005852427982587e-05, "loss": 2.1867, "step": 6871500 }, { "epoch": 19.89, "learning_rate": 4.005780207947388e-05, "loss": 2.1796, "step": 6872000 }, { "epoch": 19.89, "learning_rate": 4.0057078431826605e-05, "loss": 2.2026, "step": 6872500 }, { "epoch": 19.89, "learning_rate": 4.005635478417933e-05, "loss": 2.213, "step": 6873000 }, { "epoch": 19.9, "learning_rate": 4.005563113653205e-05, "loss": 2.1899, "step": 6873500 }, { "epoch": 19.9, "learning_rate": 4.005490748888477e-05, "loss": 2.1841, "step": 6874000 }, { "epoch": 19.9, "learning_rate": 4.0054183841237494e-05, "loss": 2.2014, "step": 6874500 }, { "epoch": 19.9, "learning_rate": 4.005346019359022e-05, "loss": 2.1983, "step": 6875000 }, { "epoch": 19.9, "learning_rate": 4.0052736545942945e-05, "loss": 2.1877, "step": 6875500 }, { "epoch": 19.9, "learning_rate": 4.005201289829567e-05, "loss": 2.2286, "step": 6876000 }, { "epoch": 19.9, "learning_rate": 4.005128925064839e-05, "loss": 2.2294, "step": 6876500 }, { "epoch": 19.91, "learning_rate": 4.00505684975917e-05, "loss": 2.1813, "step": 6877000 }, { "epoch": 19.91, "learning_rate": 4.004984484994442e-05, "loss": 2.1948, "step": 6877500 }, { "epoch": 19.91, "learning_rate": 4.004912120229714e-05, "loss": 2.1977, "step": 6878000 }, { "epoch": 19.91, "learning_rate": 4.004839755464987e-05, "loss": 2.2074, "step": 6878500 }, { "epoch": 19.91, "learning_rate": 4.0047675354297894e-05, "loss": 2.209, "step": 6879000 }, { "epoch": 19.91, "learning_rate": 4.0046951706650617e-05, "loss": 2.1737, "step": 6879500 }, { "epoch": 19.91, "learning_rate": 4.004622805900334e-05, "loss": 2.1881, "step": 6880000 }, { "epoch": 19.92, "learning_rate": 4.004550441135606e-05, "loss": 2.2033, "step": 6880500 }, { "epoch": 19.92, "learning_rate": 4.004478076370878e-05, "loss": 2.2023, "step": 6881000 }, { "epoch": 19.92, "learning_rate": 4.0044057116061505e-05, "loss": 2.1895, "step": 6881500 }, { "epoch": 19.92, "learning_rate": 4.004333346841423e-05, "loss": 2.2082, "step": 6882000 }, { "epoch": 19.92, "learning_rate": 4.004261126806225e-05, "loss": 2.2031, "step": 6882500 }, { "epoch": 19.92, "learning_rate": 4.004188762041497e-05, "loss": 2.1948, "step": 6883000 }, { "epoch": 19.92, "learning_rate": 4.0041163972767695e-05, "loss": 2.2036, "step": 6883500 }, { "epoch": 19.93, "learning_rate": 4.004044032512042e-05, "loss": 2.225, "step": 6884000 }, { "epoch": 19.93, "learning_rate": 4.003971667747314e-05, "loss": 2.1969, "step": 6884500 }, { "epoch": 19.93, "learning_rate": 4.0038994477121155e-05, "loss": 2.204, "step": 6885000 }, { "epoch": 19.93, "learning_rate": 4.003827082947388e-05, "loss": 2.212, "step": 6885500 }, { "epoch": 19.93, "learning_rate": 4.0037547181826606e-05, "loss": 2.1954, "step": 6886000 }, { "epoch": 19.93, "learning_rate": 4.003682353417933e-05, "loss": 2.187, "step": 6886500 }, { "epoch": 19.94, "learning_rate": 4.003609988653205e-05, "loss": 2.2084, "step": 6887000 }, { "epoch": 19.94, "learning_rate": 4.003537623888477e-05, "loss": 2.2074, "step": 6887500 }, { "epoch": 19.94, "learning_rate": 4.00346525912375e-05, "loss": 2.1716, "step": 6888000 }, { "epoch": 19.94, "learning_rate": 4.003393039088552e-05, "loss": 2.2132, "step": 6888500 }, { "epoch": 19.94, "learning_rate": 4.003320674323824e-05, "loss": 2.2078, "step": 6889000 }, { "epoch": 19.94, "learning_rate": 4.003248309559096e-05, "loss": 2.2157, "step": 6889500 }, { "epoch": 19.94, "learning_rate": 4.0031759447943684e-05, "loss": 2.1805, "step": 6890000 }, { "epoch": 19.95, "learning_rate": 4.0031035800296406e-05, "loss": 2.1971, "step": 6890500 }, { "epoch": 19.95, "learning_rate": 4.003031215264913e-05, "loss": 2.2131, "step": 6891000 }, { "epoch": 19.95, "learning_rate": 4.002958850500185e-05, "loss": 2.2248, "step": 6891500 }, { "epoch": 19.95, "learning_rate": 4.002886485735457e-05, "loss": 2.185, "step": 6892000 }, { "epoch": 19.95, "learning_rate": 4.0028141209707295e-05, "loss": 2.1968, "step": 6892500 }, { "epoch": 19.95, "learning_rate": 4.0027417562060024e-05, "loss": 2.2095, "step": 6893000 }, { "epoch": 19.95, "learning_rate": 4.0026693914412746e-05, "loss": 2.2032, "step": 6893500 }, { "epoch": 19.96, "learning_rate": 4.0025970266765475e-05, "loss": 2.2116, "step": 6894000 }, { "epoch": 19.96, "learning_rate": 4.00252466191182e-05, "loss": 2.2343, "step": 6894500 }, { "epoch": 19.96, "learning_rate": 4.002452297147092e-05, "loss": 2.1686, "step": 6895000 }, { "epoch": 19.96, "learning_rate": 4.0023800771118935e-05, "loss": 2.2043, "step": 6895500 }, { "epoch": 19.96, "learning_rate": 4.002307712347166e-05, "loss": 2.1747, "step": 6896000 }, { "epoch": 19.96, "learning_rate": 4.002235347582438e-05, "loss": 2.1926, "step": 6896500 }, { "epoch": 19.96, "learning_rate": 4.00216298281771e-05, "loss": 2.1883, "step": 6897000 }, { "epoch": 19.97, "learning_rate": 4.0020907627825124e-05, "loss": 2.1899, "step": 6897500 }, { "epoch": 19.97, "learning_rate": 4.0020183980177846e-05, "loss": 2.205, "step": 6898000 }, { "epoch": 19.97, "learning_rate": 4.001946033253057e-05, "loss": 2.1912, "step": 6898500 }, { "epoch": 19.97, "learning_rate": 4.0018738132178584e-05, "loss": 2.1971, "step": 6899000 }, { "epoch": 19.97, "learning_rate": 4.0018014484531306e-05, "loss": 2.1907, "step": 6899500 }, { "epoch": 19.97, "learning_rate": 4.001729083688403e-05, "loss": 2.2183, "step": 6900000 }, { "epoch": 19.97, "learning_rate": 4.001656718923676e-05, "loss": 2.1959, "step": 6900500 }, { "epoch": 19.98, "learning_rate": 4.001584354158948e-05, "loss": 2.2013, "step": 6901000 }, { "epoch": 19.98, "learning_rate": 4.00151213412375e-05, "loss": 2.2053, "step": 6901500 }, { "epoch": 19.98, "learning_rate": 4.0014397693590225e-05, "loss": 2.2024, "step": 6902000 }, { "epoch": 19.98, "learning_rate": 4.001367549323824e-05, "loss": 2.1931, "step": 6902500 }, { "epoch": 19.98, "learning_rate": 4.001295184559096e-05, "loss": 2.2021, "step": 6903000 }, { "epoch": 19.98, "learning_rate": 4.0012228197943685e-05, "loss": 2.224, "step": 6903500 }, { "epoch": 19.98, "learning_rate": 4.001150455029641e-05, "loss": 2.2483, "step": 6904000 }, { "epoch": 19.99, "learning_rate": 4.001078090264913e-05, "loss": 2.2025, "step": 6904500 }, { "epoch": 19.99, "learning_rate": 4.001005725500185e-05, "loss": 2.2075, "step": 6905000 }, { "epoch": 19.99, "learning_rate": 4.0009333607354573e-05, "loss": 2.2215, "step": 6905500 }, { "epoch": 19.99, "learning_rate": 4.00086099597073e-05, "loss": 2.1861, "step": 6906000 }, { "epoch": 19.99, "learning_rate": 4.0007886312060025e-05, "loss": 2.1994, "step": 6906500 }, { "epoch": 19.99, "learning_rate": 4.000716411170804e-05, "loss": 2.2068, "step": 6907000 }, { "epoch": 19.99, "learning_rate": 4.000644046406076e-05, "loss": 2.1917, "step": 6907500 }, { "epoch": 20.0, "learning_rate": 4.000571681641349e-05, "loss": 2.2223, "step": 6908000 }, { "epoch": 20.0, "learning_rate": 4.0004993168766214e-05, "loss": 2.1749, "step": 6908500 }, { "epoch": 20.0, "learning_rate": 4.000427096841423e-05, "loss": 2.1973, "step": 6909000 }, { "epoch": 20.0, "eval_accuracy": 0.6594410792436429, "eval_accuracy_mlm": 0.6233413656263951, "eval_accuracy_nsp": 0.8532675200395979, "eval_loss": 2.231724500656128, "eval_runtime": 330.3097, "eval_samples_per_second": 1321.142, "eval_steps_per_second": 55.048, "step": 6909440 }, { "epoch": 20.0, "learning_rate": 4.000354732076695e-05, "loss": 2.1754, "step": 6909500 }, { "epoch": 20.0, "learning_rate": 4.0002823673119674e-05, "loss": 2.1769, "step": 6910000 }, { "epoch": 20.0, "learning_rate": 4.00021000254724e-05, "loss": 2.175, "step": 6910500 }, { "epoch": 20.0, "learning_rate": 4.0001376377825125e-05, "loss": 2.1955, "step": 6911000 }, { "epoch": 20.01, "learning_rate": 4.000065273017785e-05, "loss": 2.1907, "step": 6911500 }, { "epoch": 20.01, "learning_rate": 3.999992908253057e-05, "loss": 2.2082, "step": 6912000 }, { "epoch": 20.01, "learning_rate": 3.999920543488329e-05, "loss": 2.1514, "step": 6912500 }, { "epoch": 20.01, "learning_rate": 3.9998481787236014e-05, "loss": 2.2034, "step": 6913000 }, { "epoch": 20.01, "learning_rate": 3.999775958688403e-05, "loss": 2.1719, "step": 6913500 }, { "epoch": 20.01, "learning_rate": 3.999703593923675e-05, "loss": 2.1982, "step": 6914000 }, { "epoch": 20.01, "learning_rate": 3.9996312291589474e-05, "loss": 2.1762, "step": 6914500 }, { "epoch": 20.02, "learning_rate": 3.99955886439422e-05, "loss": 2.1888, "step": 6915000 }, { "epoch": 20.02, "learning_rate": 3.9994866443590225e-05, "loss": 2.1714, "step": 6915500 }, { "epoch": 20.02, "learning_rate": 3.9994145690533534e-05, "loss": 2.1863, "step": 6916000 }, { "epoch": 20.02, "learning_rate": 3.9993422042886256e-05, "loss": 2.1737, "step": 6916500 }, { "epoch": 20.02, "learning_rate": 3.999269839523898e-05, "loss": 2.1762, "step": 6917000 }, { "epoch": 20.02, "learning_rate": 3.99919747475917e-05, "loss": 2.1594, "step": 6917500 }, { "epoch": 20.02, "learning_rate": 3.999125109994443e-05, "loss": 2.1686, "step": 6918000 }, { "epoch": 20.03, "learning_rate": 3.999052745229715e-05, "loss": 2.1785, "step": 6918500 }, { "epoch": 20.03, "learning_rate": 3.9989803804649874e-05, "loss": 2.195, "step": 6919000 }, { "epoch": 20.03, "learning_rate": 3.99890801570026e-05, "loss": 2.1416, "step": 6919500 }, { "epoch": 20.03, "learning_rate": 3.998835795665061e-05, "loss": 2.1669, "step": 6920000 }, { "epoch": 20.03, "learning_rate": 3.9987634309003334e-05, "loss": 2.202, "step": 6920500 }, { "epoch": 20.03, "learning_rate": 3.998691066135606e-05, "loss": 2.199, "step": 6921000 }, { "epoch": 20.03, "learning_rate": 3.998618701370878e-05, "loss": 2.159, "step": 6921500 }, { "epoch": 20.04, "learning_rate": 3.99854633660615e-05, "loss": 2.1523, "step": 6922000 }, { "epoch": 20.04, "learning_rate": 3.9984741165709523e-05, "loss": 2.1608, "step": 6922500 }, { "epoch": 20.04, "learning_rate": 3.9984017518062246e-05, "loss": 2.1697, "step": 6923000 }, { "epoch": 20.04, "learning_rate": 3.9983293870414975e-05, "loss": 2.2001, "step": 6923500 }, { "epoch": 20.04, "learning_rate": 3.99825702227677e-05, "loss": 2.2104, "step": 6924000 }, { "epoch": 20.04, "learning_rate": 3.998184802241571e-05, "loss": 2.1931, "step": 6924500 }, { "epoch": 20.05, "learning_rate": 3.9981124374768435e-05, "loss": 2.191, "step": 6925000 }, { "epoch": 20.05, "learning_rate": 3.998040072712116e-05, "loss": 2.2201, "step": 6925500 }, { "epoch": 20.05, "learning_rate": 3.997967707947388e-05, "loss": 2.2081, "step": 6926000 }, { "epoch": 20.05, "learning_rate": 3.99789534318266e-05, "loss": 2.2025, "step": 6926500 }, { "epoch": 20.05, "learning_rate": 3.997822978417933e-05, "loss": 2.1786, "step": 6927000 }, { "epoch": 20.05, "learning_rate": 3.997750613653205e-05, "loss": 2.1673, "step": 6927500 }, { "epoch": 20.05, "learning_rate": 3.9976782488884775e-05, "loss": 2.1964, "step": 6928000 }, { "epoch": 20.06, "learning_rate": 3.997606028853279e-05, "loss": 2.1654, "step": 6928500 }, { "epoch": 20.06, "learning_rate": 3.997533664088551e-05, "loss": 2.1721, "step": 6929000 }, { "epoch": 20.06, "learning_rate": 3.9974612993238235e-05, "loss": 2.1773, "step": 6929500 }, { "epoch": 20.06, "learning_rate": 3.997388934559096e-05, "loss": 2.1785, "step": 6930000 }, { "epoch": 20.06, "learning_rate": 3.9973165697943686e-05, "loss": 2.1914, "step": 6930500 }, { "epoch": 20.06, "learning_rate": 3.997244205029641e-05, "loss": 2.1868, "step": 6931000 }, { "epoch": 20.06, "learning_rate": 3.997171840264913e-05, "loss": 2.1811, "step": 6931500 }, { "epoch": 20.07, "learning_rate": 3.997099620229715e-05, "loss": 2.1854, "step": 6932000 }, { "epoch": 20.07, "learning_rate": 3.9970272554649875e-05, "loss": 2.1944, "step": 6932500 }, { "epoch": 20.07, "learning_rate": 3.99695489070026e-05, "loss": 2.1912, "step": 6933000 }, { "epoch": 20.07, "learning_rate": 3.996882525935532e-05, "loss": 2.1516, "step": 6933500 }, { "epoch": 20.07, "learning_rate": 3.996810161170804e-05, "loss": 2.2039, "step": 6934000 }, { "epoch": 20.07, "learning_rate": 3.9967377964060764e-05, "loss": 2.1754, "step": 6934500 }, { "epoch": 20.07, "learning_rate": 3.9966654316413486e-05, "loss": 2.182, "step": 6935000 }, { "epoch": 20.08, "learning_rate": 3.996593066876621e-05, "loss": 2.1789, "step": 6935500 }, { "epoch": 20.08, "learning_rate": 3.996520702111893e-05, "loss": 2.1669, "step": 6936000 }, { "epoch": 20.08, "learning_rate": 3.996448482076695e-05, "loss": 2.1872, "step": 6936500 }, { "epoch": 20.08, "learning_rate": 3.9963761173119675e-05, "loss": 2.1969, "step": 6937000 }, { "epoch": 20.08, "learning_rate": 3.99630375254724e-05, "loss": 2.1933, "step": 6937500 }, { "epoch": 20.08, "learning_rate": 3.996231532512042e-05, "loss": 2.1991, "step": 6938000 }, { "epoch": 20.08, "learning_rate": 3.996159167747314e-05, "loss": 2.1975, "step": 6938500 }, { "epoch": 20.09, "learning_rate": 3.9960868029825864e-05, "loss": 2.1958, "step": 6939000 }, { "epoch": 20.09, "learning_rate": 3.996014438217859e-05, "loss": 2.1826, "step": 6939500 }, { "epoch": 20.09, "learning_rate": 3.995942073453131e-05, "loss": 2.1876, "step": 6940000 }, { "epoch": 20.09, "learning_rate": 3.995869708688403e-05, "loss": 2.197, "step": 6940500 }, { "epoch": 20.09, "learning_rate": 3.995797343923675e-05, "loss": 2.173, "step": 6941000 }, { "epoch": 20.09, "learning_rate": 3.995724979158948e-05, "loss": 2.1933, "step": 6941500 }, { "epoch": 20.09, "learning_rate": 3.9956526143942205e-05, "loss": 2.1779, "step": 6942000 }, { "epoch": 20.1, "learning_rate": 3.995580249629493e-05, "loss": 2.1874, "step": 6942500 }, { "epoch": 20.1, "learning_rate": 3.995507884864765e-05, "loss": 2.1934, "step": 6943000 }, { "epoch": 20.1, "learning_rate": 3.995435520100037e-05, "loss": 2.2024, "step": 6943500 }, { "epoch": 20.1, "learning_rate": 3.995363300064839e-05, "loss": 2.1691, "step": 6944000 }, { "epoch": 20.1, "learning_rate": 3.995290935300111e-05, "loss": 2.2027, "step": 6944500 }, { "epoch": 20.1, "learning_rate": 3.995218570535383e-05, "loss": 2.1865, "step": 6945000 }, { "epoch": 20.1, "learning_rate": 3.995146205770656e-05, "loss": 2.1712, "step": 6945500 }, { "epoch": 20.11, "learning_rate": 3.995073841005928e-05, "loss": 2.1801, "step": 6946000 }, { "epoch": 20.11, "learning_rate": 3.9950016209707305e-05, "loss": 2.1931, "step": 6946500 }, { "epoch": 20.11, "learning_rate": 3.994929256206003e-05, "loss": 2.1908, "step": 6947000 }, { "epoch": 20.11, "learning_rate": 3.994856891441275e-05, "loss": 2.2075, "step": 6947500 }, { "epoch": 20.11, "learning_rate": 3.994784526676547e-05, "loss": 2.1888, "step": 6948000 }, { "epoch": 20.11, "learning_rate": 3.994712306641349e-05, "loss": 2.185, "step": 6948500 }, { "epoch": 20.11, "learning_rate": 3.994640086606151e-05, "loss": 2.1887, "step": 6949000 }, { "epoch": 20.12, "learning_rate": 3.994567721841423e-05, "loss": 2.162, "step": 6949500 }, { "epoch": 20.12, "learning_rate": 3.9944953570766954e-05, "loss": 2.1662, "step": 6950000 }, { "epoch": 20.12, "learning_rate": 3.994423281771026e-05, "loss": 2.1872, "step": 6950500 }, { "epoch": 20.12, "learning_rate": 3.9943509170062985e-05, "loss": 2.1945, "step": 6951000 }, { "epoch": 20.12, "learning_rate": 3.994278552241571e-05, "loss": 2.1905, "step": 6951500 }, { "epoch": 20.12, "learning_rate": 3.994206187476843e-05, "loss": 2.1907, "step": 6952000 }, { "epoch": 20.12, "learning_rate": 3.994133822712116e-05, "loss": 2.1891, "step": 6952500 }, { "epoch": 20.13, "learning_rate": 3.994061457947388e-05, "loss": 2.165, "step": 6953000 }, { "epoch": 20.13, "learning_rate": 3.99398923791219e-05, "loss": 2.1748, "step": 6953500 }, { "epoch": 20.13, "learning_rate": 3.993917017876992e-05, "loss": 2.1902, "step": 6954000 }, { "epoch": 20.13, "learning_rate": 3.993844653112264e-05, "loss": 2.1953, "step": 6954500 }, { "epoch": 20.13, "learning_rate": 3.993772288347536e-05, "loss": 2.1898, "step": 6955000 }, { "epoch": 20.13, "learning_rate": 3.9936999235828085e-05, "loss": 2.1984, "step": 6955500 }, { "epoch": 20.13, "learning_rate": 3.993627558818081e-05, "loss": 2.1697, "step": 6956000 }, { "epoch": 20.14, "learning_rate": 3.9935551940533537e-05, "loss": 2.1823, "step": 6956500 }, { "epoch": 20.14, "learning_rate": 3.993482829288626e-05, "loss": 2.207, "step": 6957000 }, { "epoch": 20.14, "learning_rate": 3.993410464523898e-05, "loss": 2.1829, "step": 6957500 }, { "epoch": 20.14, "learning_rate": 3.99333809975917e-05, "loss": 2.1809, "step": 6958000 }, { "epoch": 20.14, "learning_rate": 3.993265879723972e-05, "loss": 2.213, "step": 6958500 }, { "epoch": 20.14, "learning_rate": 3.993193514959244e-05, "loss": 2.1971, "step": 6959000 }, { "epoch": 20.14, "learning_rate": 3.993121150194516e-05, "loss": 2.1764, "step": 6959500 }, { "epoch": 20.15, "learning_rate": 3.9930487854297886e-05, "loss": 2.1732, "step": 6960000 }, { "epoch": 20.15, "learning_rate": 3.992976420665061e-05, "loss": 2.2331, "step": 6960500 }, { "epoch": 20.15, "learning_rate": 3.992904200629864e-05, "loss": 2.1961, "step": 6961000 }, { "epoch": 20.15, "learning_rate": 3.992831835865136e-05, "loss": 2.1823, "step": 6961500 }, { "epoch": 20.15, "learning_rate": 3.992759471100408e-05, "loss": 2.1727, "step": 6962000 }, { "epoch": 20.15, "learning_rate": 3.9926871063356804e-05, "loss": 2.1899, "step": 6962500 }, { "epoch": 20.16, "learning_rate": 3.9926147415709526e-05, "loss": 2.1896, "step": 6963000 }, { "epoch": 20.16, "learning_rate": 3.992542376806225e-05, "loss": 2.1848, "step": 6963500 }, { "epoch": 20.16, "learning_rate": 3.992470012041497e-05, "loss": 2.1608, "step": 6964000 }, { "epoch": 20.16, "learning_rate": 3.992397647276769e-05, "loss": 2.1977, "step": 6964500 }, { "epoch": 20.16, "learning_rate": 3.9923252825120415e-05, "loss": 2.2078, "step": 6965000 }, { "epoch": 20.16, "learning_rate": 3.992252917747314e-05, "loss": 2.1759, "step": 6965500 }, { "epoch": 20.16, "learning_rate": 3.992180552982586e-05, "loss": 2.1903, "step": 6966000 }, { "epoch": 20.17, "learning_rate": 3.992108188217858e-05, "loss": 2.1703, "step": 6966500 }, { "epoch": 20.17, "learning_rate": 3.992035823453131e-05, "loss": 2.1843, "step": 6967000 }, { "epoch": 20.17, "learning_rate": 3.9919636034179326e-05, "loss": 2.1936, "step": 6967500 }, { "epoch": 20.17, "learning_rate": 3.9918912386532055e-05, "loss": 2.185, "step": 6968000 }, { "epoch": 20.17, "learning_rate": 3.991818873888478e-05, "loss": 2.1762, "step": 6968500 }, { "epoch": 20.17, "learning_rate": 3.99174650912375e-05, "loss": 2.1699, "step": 6969000 }, { "epoch": 20.17, "learning_rate": 3.991674144359022e-05, "loss": 2.1831, "step": 6969500 }, { "epoch": 20.18, "learning_rate": 3.9916017795942944e-05, "loss": 2.1411, "step": 6970000 }, { "epoch": 20.18, "learning_rate": 3.9915294148295666e-05, "loss": 2.1941, "step": 6970500 }, { "epoch": 20.18, "learning_rate": 3.991457050064839e-05, "loss": 2.1852, "step": 6971000 }, { "epoch": 20.18, "learning_rate": 3.991384830029641e-05, "loss": 2.1898, "step": 6971500 }, { "epoch": 20.18, "learning_rate": 3.991312465264913e-05, "loss": 2.1809, "step": 6972000 }, { "epoch": 20.18, "learning_rate": 3.9912401005001855e-05, "loss": 2.1772, "step": 6972500 }, { "epoch": 20.18, "learning_rate": 3.991167880464987e-05, "loss": 2.179, "step": 6973000 }, { "epoch": 20.19, "learning_rate": 3.9910956604297886e-05, "loss": 2.1684, "step": 6973500 }, { "epoch": 20.19, "learning_rate": 3.991023295665061e-05, "loss": 2.2027, "step": 6974000 }, { "epoch": 20.19, "learning_rate": 3.990950930900334e-05, "loss": 2.1633, "step": 6974500 }, { "epoch": 20.19, "learning_rate": 3.990878566135606e-05, "loss": 2.1725, "step": 6975000 }, { "epoch": 20.19, "learning_rate": 3.9908063461004075e-05, "loss": 2.1788, "step": 6975500 }, { "epoch": 20.19, "learning_rate": 3.9907339813356804e-05, "loss": 2.1985, "step": 6976000 }, { "epoch": 20.19, "learning_rate": 3.990661616570953e-05, "loss": 2.1897, "step": 6976500 }, { "epoch": 20.2, "learning_rate": 3.990589251806225e-05, "loss": 2.1774, "step": 6977000 }, { "epoch": 20.2, "learning_rate": 3.990516887041497e-05, "loss": 2.166, "step": 6977500 }, { "epoch": 20.2, "learning_rate": 3.990444522276769e-05, "loss": 2.2045, "step": 6978000 }, { "epoch": 20.2, "learning_rate": 3.9903721575120416e-05, "loss": 2.202, "step": 6978500 }, { "epoch": 20.2, "learning_rate": 3.990299937476844e-05, "loss": 2.1984, "step": 6979000 }, { "epoch": 20.2, "learning_rate": 3.990227572712116e-05, "loss": 2.191, "step": 6979500 }, { "epoch": 20.2, "learning_rate": 3.990155207947388e-05, "loss": 2.1859, "step": 6980000 }, { "epoch": 20.21, "learning_rate": 3.9900828431826605e-05, "loss": 2.1798, "step": 6980500 }, { "epoch": 20.21, "learning_rate": 3.990010478417933e-05, "loss": 2.1933, "step": 6981000 }, { "epoch": 20.21, "learning_rate": 3.989938113653205e-05, "loss": 2.1866, "step": 6981500 }, { "epoch": 20.21, "learning_rate": 3.989865748888477e-05, "loss": 2.1655, "step": 6982000 }, { "epoch": 20.21, "learning_rate": 3.9897933841237494e-05, "loss": 2.1898, "step": 6982500 }, { "epoch": 20.21, "learning_rate": 3.989721164088551e-05, "loss": 2.1785, "step": 6983000 }, { "epoch": 20.21, "learning_rate": 3.989648799323824e-05, "loss": 2.1879, "step": 6983500 }, { "epoch": 20.22, "learning_rate": 3.989576434559096e-05, "loss": 2.1776, "step": 6984000 }, { "epoch": 20.22, "learning_rate": 3.989504069794369e-05, "loss": 2.1721, "step": 6984500 }, { "epoch": 20.22, "learning_rate": 3.989431705029641e-05, "loss": 2.1749, "step": 6985000 }, { "epoch": 20.22, "learning_rate": 3.9893593402649134e-05, "loss": 2.1844, "step": 6985500 }, { "epoch": 20.22, "learning_rate": 3.9892869755001856e-05, "loss": 2.1808, "step": 6986000 }, { "epoch": 20.22, "learning_rate": 3.989214610735458e-05, "loss": 2.1985, "step": 6986500 }, { "epoch": 20.22, "learning_rate": 3.9891423907002594e-05, "loss": 2.1776, "step": 6987000 }, { "epoch": 20.23, "learning_rate": 3.9890700259355316e-05, "loss": 2.2041, "step": 6987500 }, { "epoch": 20.23, "learning_rate": 3.988997661170804e-05, "loss": 2.1811, "step": 6988000 }, { "epoch": 20.23, "learning_rate": 3.988925296406076e-05, "loss": 2.1988, "step": 6988500 }, { "epoch": 20.23, "learning_rate": 3.988852931641349e-05, "loss": 2.218, "step": 6989000 }, { "epoch": 20.23, "learning_rate": 3.9887807116061505e-05, "loss": 2.1924, "step": 6989500 }, { "epoch": 20.23, "learning_rate": 3.988708346841423e-05, "loss": 2.1868, "step": 6990000 }, { "epoch": 20.23, "learning_rate": 3.9886359820766956e-05, "loss": 2.1678, "step": 6990500 }, { "epoch": 20.24, "learning_rate": 3.988563617311968e-05, "loss": 2.1913, "step": 6991000 }, { "epoch": 20.24, "learning_rate": 3.98849125254724e-05, "loss": 2.2153, "step": 6991500 }, { "epoch": 20.24, "learning_rate": 3.988418887782512e-05, "loss": 2.1932, "step": 6992000 }, { "epoch": 20.24, "learning_rate": 3.988346667747314e-05, "loss": 2.1721, "step": 6992500 }, { "epoch": 20.24, "learning_rate": 3.988274302982586e-05, "loss": 2.1697, "step": 6993000 }, { "epoch": 20.24, "learning_rate": 3.988201938217859e-05, "loss": 2.1609, "step": 6993500 }, { "epoch": 20.24, "learning_rate": 3.988129573453131e-05, "loss": 2.1694, "step": 6994000 }, { "epoch": 20.25, "learning_rate": 3.9880572086884034e-05, "loss": 2.1845, "step": 6994500 }, { "epoch": 20.25, "learning_rate": 3.9879848439236757e-05, "loss": 2.1812, "step": 6995000 }, { "epoch": 20.25, "learning_rate": 3.987912479158948e-05, "loss": 2.1769, "step": 6995500 }, { "epoch": 20.25, "learning_rate": 3.98784011439422e-05, "loss": 2.207, "step": 6996000 }, { "epoch": 20.25, "learning_rate": 3.987767749629492e-05, "loss": 2.1728, "step": 6996500 }, { "epoch": 20.25, "learning_rate": 3.9876953848647645e-05, "loss": 2.2241, "step": 6997000 }, { "epoch": 20.25, "learning_rate": 3.987623164829566e-05, "loss": 2.209, "step": 6997500 }, { "epoch": 20.26, "learning_rate": 3.987550944794369e-05, "loss": 2.185, "step": 6998000 }, { "epoch": 20.26, "learning_rate": 3.9874787247591706e-05, "loss": 2.175, "step": 6998500 }, { "epoch": 20.26, "learning_rate": 3.987406359994443e-05, "loss": 2.195, "step": 6999000 }, { "epoch": 20.26, "learning_rate": 3.987333995229715e-05, "loss": 2.1832, "step": 6999500 }, { "epoch": 20.26, "learning_rate": 3.987261630464987e-05, "loss": 2.1897, "step": 7000000 }, { "epoch": 20.26, "learning_rate": 3.9871892657002595e-05, "loss": 2.1876, "step": 7000500 }, { "epoch": 20.27, "learning_rate": 3.987117045665062e-05, "loss": 2.202, "step": 7001000 }, { "epoch": 20.27, "learning_rate": 3.987044680900334e-05, "loss": 2.1758, "step": 7001500 }, { "epoch": 20.27, "learning_rate": 3.9869724608651355e-05, "loss": 2.1852, "step": 7002000 }, { "epoch": 20.27, "learning_rate": 3.986900096100408e-05, "loss": 2.1671, "step": 7002500 }, { "epoch": 20.27, "learning_rate": 3.98682773133568e-05, "loss": 2.1718, "step": 7003000 }, { "epoch": 20.27, "learning_rate": 3.986755366570952e-05, "loss": 2.1998, "step": 7003500 }, { "epoch": 20.27, "learning_rate": 3.9866830018062244e-05, "loss": 2.1956, "step": 7004000 }, { "epoch": 20.28, "learning_rate": 3.9866106370414966e-05, "loss": 2.2042, "step": 7004500 }, { "epoch": 20.28, "learning_rate": 3.986538272276769e-05, "loss": 2.1867, "step": 7005000 }, { "epoch": 20.28, "learning_rate": 3.986465907512042e-05, "loss": 2.1871, "step": 7005500 }, { "epoch": 20.28, "learning_rate": 3.986393542747314e-05, "loss": 2.1908, "step": 7006000 }, { "epoch": 20.28, "learning_rate": 3.986321177982587e-05, "loss": 2.1811, "step": 7006500 }, { "epoch": 20.28, "learning_rate": 3.986248813217859e-05, "loss": 2.1708, "step": 7007000 }, { "epoch": 20.28, "learning_rate": 3.9861765931826606e-05, "loss": 2.1658, "step": 7007500 }, { "epoch": 20.29, "learning_rate": 3.986104228417933e-05, "loss": 2.2123, "step": 7008000 }, { "epoch": 20.29, "learning_rate": 3.986031863653205e-05, "loss": 2.1934, "step": 7008500 }, { "epoch": 20.29, "learning_rate": 3.985959498888477e-05, "loss": 2.1881, "step": 7009000 }, { "epoch": 20.29, "learning_rate": 3.9858871341237495e-05, "loss": 2.1759, "step": 7009500 }, { "epoch": 20.29, "learning_rate": 3.985814769359022e-05, "loss": 2.1932, "step": 7010000 }, { "epoch": 20.29, "learning_rate": 3.985742404594294e-05, "loss": 2.1948, "step": 7010500 }, { "epoch": 20.29, "learning_rate": 3.985670184559096e-05, "loss": 2.1971, "step": 7011000 }, { "epoch": 20.3, "learning_rate": 3.9855978197943684e-05, "loss": 2.2012, "step": 7011500 }, { "epoch": 20.3, "learning_rate": 3.98552559975917e-05, "loss": 2.1917, "step": 7012000 }, { "epoch": 20.3, "learning_rate": 3.985453234994442e-05, "loss": 2.1855, "step": 7012500 }, { "epoch": 20.3, "learning_rate": 3.9853808702297144e-05, "loss": 2.1852, "step": 7013000 }, { "epoch": 20.3, "learning_rate": 3.985308505464987e-05, "loss": 2.2068, "step": 7013500 }, { "epoch": 20.3, "learning_rate": 3.9852361407002595e-05, "loss": 2.2028, "step": 7014000 }, { "epoch": 20.3, "learning_rate": 3.985163775935532e-05, "loss": 2.1731, "step": 7014500 }, { "epoch": 20.31, "learning_rate": 3.985091411170804e-05, "loss": 2.18, "step": 7015000 }, { "epoch": 20.31, "learning_rate": 3.985019046406077e-05, "loss": 2.1947, "step": 7015500 }, { "epoch": 20.31, "learning_rate": 3.9849468263708784e-05, "loss": 2.2185, "step": 7016000 }, { "epoch": 20.31, "learning_rate": 3.984874461606151e-05, "loss": 2.1597, "step": 7016500 }, { "epoch": 20.31, "learning_rate": 3.984802096841423e-05, "loss": 2.1912, "step": 7017000 }, { "epoch": 20.31, "learning_rate": 3.984729732076695e-05, "loss": 2.1821, "step": 7017500 }, { "epoch": 20.31, "learning_rate": 3.984657367311967e-05, "loss": 2.1817, "step": 7018000 }, { "epoch": 20.32, "learning_rate": 3.9845850025472396e-05, "loss": 2.1752, "step": 7018500 }, { "epoch": 20.32, "learning_rate": 3.984512782512042e-05, "loss": 2.2003, "step": 7019000 }, { "epoch": 20.32, "learning_rate": 3.984440417747314e-05, "loss": 2.1833, "step": 7019500 }, { "epoch": 20.32, "learning_rate": 3.984368052982586e-05, "loss": 2.1761, "step": 7020000 }, { "epoch": 20.32, "learning_rate": 3.984295688217859e-05, "loss": 2.1692, "step": 7020500 }, { "epoch": 20.32, "learning_rate": 3.9842233234531314e-05, "loss": 2.1949, "step": 7021000 }, { "epoch": 20.32, "learning_rate": 3.9841509586884036e-05, "loss": 2.1685, "step": 7021500 }, { "epoch": 20.33, "learning_rate": 3.984078593923676e-05, "loss": 2.1944, "step": 7022000 }, { "epoch": 20.33, "learning_rate": 3.984006229158948e-05, "loss": 2.1733, "step": 7022500 }, { "epoch": 20.33, "learning_rate": 3.98393386439422e-05, "loss": 2.1836, "step": 7023000 }, { "epoch": 20.33, "learning_rate": 3.983861644359022e-05, "loss": 2.1937, "step": 7023500 }, { "epoch": 20.33, "learning_rate": 3.983789279594295e-05, "loss": 2.2019, "step": 7024000 }, { "epoch": 20.33, "learning_rate": 3.983716914829567e-05, "loss": 2.185, "step": 7024500 }, { "epoch": 20.33, "learning_rate": 3.983644550064839e-05, "loss": 2.2017, "step": 7025000 }, { "epoch": 20.34, "learning_rate": 3.9835721853001114e-05, "loss": 2.1772, "step": 7025500 }, { "epoch": 20.34, "learning_rate": 3.9834998205353836e-05, "loss": 2.1999, "step": 7026000 }, { "epoch": 20.34, "learning_rate": 3.983427600500185e-05, "loss": 2.1927, "step": 7026500 }, { "epoch": 20.34, "learning_rate": 3.9833552357354574e-05, "loss": 2.1623, "step": 7027000 }, { "epoch": 20.34, "learning_rate": 3.9832830157002596e-05, "loss": 2.1911, "step": 7027500 }, { "epoch": 20.34, "learning_rate": 3.983210650935532e-05, "loss": 2.1765, "step": 7028000 }, { "epoch": 20.34, "learning_rate": 3.983138286170805e-05, "loss": 2.1804, "step": 7028500 }, { "epoch": 20.35, "learning_rate": 3.983065921406077e-05, "loss": 2.1886, "step": 7029000 }, { "epoch": 20.35, "learning_rate": 3.9829937013708785e-05, "loss": 2.1703, "step": 7029500 }, { "epoch": 20.35, "learning_rate": 3.98292148133568e-05, "loss": 2.1797, "step": 7030000 }, { "epoch": 20.35, "learning_rate": 3.982849116570952e-05, "loss": 2.2094, "step": 7030500 }, { "epoch": 20.35, "learning_rate": 3.9827767518062245e-05, "loss": 2.191, "step": 7031000 }, { "epoch": 20.35, "learning_rate": 3.982704387041497e-05, "loss": 2.1692, "step": 7031500 }, { "epoch": 20.35, "learning_rate": 3.9826320222767697e-05, "loss": 2.2012, "step": 7032000 }, { "epoch": 20.36, "learning_rate": 3.982559657512042e-05, "loss": 2.1667, "step": 7032500 }, { "epoch": 20.36, "learning_rate": 3.982487292747314e-05, "loss": 2.1905, "step": 7033000 }, { "epoch": 20.36, "learning_rate": 3.982414927982586e-05, "loss": 2.1896, "step": 7033500 }, { "epoch": 20.36, "learning_rate": 3.9823425632178585e-05, "loss": 2.1945, "step": 7034000 }, { "epoch": 20.36, "learning_rate": 3.982270198453131e-05, "loss": 2.1896, "step": 7034500 }, { "epoch": 20.36, "learning_rate": 3.982197833688403e-05, "loss": 2.2126, "step": 7035000 }, { "epoch": 20.36, "learning_rate": 3.982125468923676e-05, "loss": 2.2072, "step": 7035500 }, { "epoch": 20.37, "learning_rate": 3.982053104158948e-05, "loss": 2.1824, "step": 7036000 }, { "epoch": 20.37, "learning_rate": 3.98198073939422e-05, "loss": 2.1857, "step": 7036500 }, { "epoch": 20.37, "learning_rate": 3.981908519359022e-05, "loss": 2.1857, "step": 7037000 }, { "epoch": 20.37, "learning_rate": 3.981836154594295e-05, "loss": 2.1818, "step": 7037500 }, { "epoch": 20.37, "learning_rate": 3.981763789829567e-05, "loss": 2.174, "step": 7038000 }, { "epoch": 20.37, "learning_rate": 3.981691425064839e-05, "loss": 2.154, "step": 7038500 }, { "epoch": 20.38, "learning_rate": 3.9816190603001115e-05, "loss": 2.1931, "step": 7039000 }, { "epoch": 20.38, "learning_rate": 3.981546695535384e-05, "loss": 2.1898, "step": 7039500 }, { "epoch": 20.38, "learning_rate": 3.981474330770656e-05, "loss": 2.1961, "step": 7040000 }, { "epoch": 20.38, "learning_rate": 3.981401966005928e-05, "loss": 2.1729, "step": 7040500 }, { "epoch": 20.38, "learning_rate": 3.9813296012412004e-05, "loss": 2.1951, "step": 7041000 }, { "epoch": 20.38, "learning_rate": 3.9812572364764726e-05, "loss": 2.1889, "step": 7041500 }, { "epoch": 20.38, "learning_rate": 3.981184871711745e-05, "loss": 2.1871, "step": 7042000 }, { "epoch": 20.39, "learning_rate": 3.981112651676547e-05, "loss": 2.2107, "step": 7042500 }, { "epoch": 20.39, "learning_rate": 3.98104028691182e-05, "loss": 2.1899, "step": 7043000 }, { "epoch": 20.39, "learning_rate": 3.9809680668766215e-05, "loss": 2.1718, "step": 7043500 }, { "epoch": 20.39, "learning_rate": 3.980895702111894e-05, "loss": 2.1903, "step": 7044000 }, { "epoch": 20.39, "learning_rate": 3.980823337347166e-05, "loss": 2.1603, "step": 7044500 }, { "epoch": 20.39, "learning_rate": 3.980750972582438e-05, "loss": 2.1761, "step": 7045000 }, { "epoch": 20.39, "learning_rate": 3.9806786078177104e-05, "loss": 2.1583, "step": 7045500 }, { "epoch": 20.4, "learning_rate": 3.9806062430529826e-05, "loss": 2.1942, "step": 7046000 }, { "epoch": 20.4, "learning_rate": 3.980534023017785e-05, "loss": 2.1957, "step": 7046500 }, { "epoch": 20.4, "learning_rate": 3.980461658253057e-05, "loss": 2.194, "step": 7047000 }, { "epoch": 20.4, "learning_rate": 3.980389293488329e-05, "loss": 2.1757, "step": 7047500 }, { "epoch": 20.4, "learning_rate": 3.9803169287236015e-05, "loss": 2.1898, "step": 7048000 }, { "epoch": 20.4, "learning_rate": 3.980244563958874e-05, "loss": 2.2045, "step": 7048500 }, { "epoch": 20.4, "learning_rate": 3.980172343923675e-05, "loss": 2.1692, "step": 7049000 }, { "epoch": 20.41, "learning_rate": 3.9800999791589475e-05, "loss": 2.1962, "step": 7049500 }, { "epoch": 20.41, "learning_rate": 3.98002761439422e-05, "loss": 2.1654, "step": 7050000 }, { "epoch": 20.41, "learning_rate": 3.9799552496294926e-05, "loss": 2.1826, "step": 7050500 }, { "epoch": 20.41, "learning_rate": 3.979882884864765e-05, "loss": 2.189, "step": 7051000 }, { "epoch": 20.41, "learning_rate": 3.979810520100037e-05, "loss": 2.203, "step": 7051500 }, { "epoch": 20.41, "learning_rate": 3.97973815533531e-05, "loss": 2.1781, "step": 7052000 }, { "epoch": 20.41, "learning_rate": 3.979665790570582e-05, "loss": 2.1805, "step": 7052500 }, { "epoch": 20.42, "learning_rate": 3.9795934258058544e-05, "loss": 2.1746, "step": 7053000 }, { "epoch": 20.42, "learning_rate": 3.979521205770656e-05, "loss": 2.1918, "step": 7053500 }, { "epoch": 20.42, "learning_rate": 3.979448841005928e-05, "loss": 2.2041, "step": 7054000 }, { "epoch": 20.42, "learning_rate": 3.9793764762412004e-05, "loss": 2.2015, "step": 7054500 }, { "epoch": 20.42, "learning_rate": 3.9793041114764727e-05, "loss": 2.1931, "step": 7055000 }, { "epoch": 20.42, "learning_rate": 3.979231746711745e-05, "loss": 2.1616, "step": 7055500 }, { "epoch": 20.42, "learning_rate": 3.979159381947017e-05, "loss": 2.1781, "step": 7056000 }, { "epoch": 20.43, "learning_rate": 3.9790871619118193e-05, "loss": 2.2021, "step": 7056500 }, { "epoch": 20.43, "learning_rate": 3.9790147971470916e-05, "loss": 2.2019, "step": 7057000 }, { "epoch": 20.43, "learning_rate": 3.9789424323823645e-05, "loss": 2.1837, "step": 7057500 }, { "epoch": 20.43, "learning_rate": 3.978870067617637e-05, "loss": 2.1752, "step": 7058000 }, { "epoch": 20.43, "learning_rate": 3.978797702852909e-05, "loss": 2.1906, "step": 7058500 }, { "epoch": 20.43, "learning_rate": 3.9787254828177105e-05, "loss": 2.2004, "step": 7059000 }, { "epoch": 20.43, "learning_rate": 3.978653118052983e-05, "loss": 2.1731, "step": 7059500 }, { "epoch": 20.44, "learning_rate": 3.978580753288255e-05, "loss": 2.1845, "step": 7060000 }, { "epoch": 20.44, "learning_rate": 3.978508388523527e-05, "loss": 2.1708, "step": 7060500 }, { "epoch": 20.44, "learning_rate": 3.9784361684883294e-05, "loss": 2.2081, "step": 7061000 }, { "epoch": 20.44, "learning_rate": 3.978363948453131e-05, "loss": 2.1721, "step": 7061500 }, { "epoch": 20.44, "learning_rate": 3.9782917284179325e-05, "loss": 2.2073, "step": 7062000 }, { "epoch": 20.44, "learning_rate": 3.978219363653205e-05, "loss": 2.1906, "step": 7062500 }, { "epoch": 20.44, "learning_rate": 3.9781469988884776e-05, "loss": 2.1897, "step": 7063000 }, { "epoch": 20.45, "learning_rate": 3.97807463412375e-05, "loss": 2.203, "step": 7063500 }, { "epoch": 20.45, "learning_rate": 3.978002269359022e-05, "loss": 2.1499, "step": 7064000 }, { "epoch": 20.45, "learning_rate": 3.977929904594294e-05, "loss": 2.2014, "step": 7064500 }, { "epoch": 20.45, "learning_rate": 3.9778575398295665e-05, "loss": 2.1797, "step": 7065000 }, { "epoch": 20.45, "learning_rate": 3.9777851750648394e-05, "loss": 2.1962, "step": 7065500 }, { "epoch": 20.45, "learning_rate": 3.9777128103001116e-05, "loss": 2.1929, "step": 7066000 }, { "epoch": 20.45, "learning_rate": 3.977640590264913e-05, "loss": 2.1796, "step": 7066500 }, { "epoch": 20.46, "learning_rate": 3.9775682255001854e-05, "loss": 2.1689, "step": 7067000 }, { "epoch": 20.46, "learning_rate": 3.9774958607354576e-05, "loss": 2.2087, "step": 7067500 }, { "epoch": 20.46, "learning_rate": 3.97742349597073e-05, "loss": 2.2148, "step": 7068000 }, { "epoch": 20.46, "learning_rate": 3.977351131206003e-05, "loss": 2.1938, "step": 7068500 }, { "epoch": 20.46, "learning_rate": 3.977278766441275e-05, "loss": 2.2049, "step": 7069000 }, { "epoch": 20.46, "learning_rate": 3.977206401676547e-05, "loss": 2.192, "step": 7069500 }, { "epoch": 20.46, "learning_rate": 3.977134181641349e-05, "loss": 2.1895, "step": 7070000 }, { "epoch": 20.47, "learning_rate": 3.977061816876621e-05, "loss": 2.1864, "step": 7070500 }, { "epoch": 20.47, "learning_rate": 3.976989452111893e-05, "loss": 2.1879, "step": 7071000 }, { "epoch": 20.47, "learning_rate": 3.9769170873471654e-05, "loss": 2.2107, "step": 7071500 }, { "epoch": 20.47, "learning_rate": 3.9768448673119677e-05, "loss": 2.2035, "step": 7072000 }, { "epoch": 20.47, "learning_rate": 3.97677250254724e-05, "loss": 2.1805, "step": 7072500 }, { "epoch": 20.47, "learning_rate": 3.976700137782513e-05, "loss": 2.1792, "step": 7073000 }, { "epoch": 20.47, "learning_rate": 3.976627773017785e-05, "loss": 2.1931, "step": 7073500 }, { "epoch": 20.48, "learning_rate": 3.9765555529825866e-05, "loss": 2.2143, "step": 7074000 }, { "epoch": 20.48, "learning_rate": 3.976483188217859e-05, "loss": 2.195, "step": 7074500 }, { "epoch": 20.48, "learning_rate": 3.976410823453131e-05, "loss": 2.187, "step": 7075000 }, { "epoch": 20.48, "learning_rate": 3.976338458688403e-05, "loss": 2.2038, "step": 7075500 }, { "epoch": 20.48, "learning_rate": 3.9762660939236755e-05, "loss": 2.1757, "step": 7076000 }, { "epoch": 20.48, "learning_rate": 3.976193729158948e-05, "loss": 2.2237, "step": 7076500 }, { "epoch": 20.49, "learning_rate": 3.97612136439422e-05, "loss": 2.1807, "step": 7077000 }, { "epoch": 20.49, "learning_rate": 3.976048999629493e-05, "loss": 2.1902, "step": 7077500 }, { "epoch": 20.49, "learning_rate": 3.975976634864765e-05, "loss": 2.2015, "step": 7078000 }, { "epoch": 20.49, "learning_rate": 3.975904270100037e-05, "loss": 2.1906, "step": 7078500 }, { "epoch": 20.49, "learning_rate": 3.975832050064839e-05, "loss": 2.2108, "step": 7079000 }, { "epoch": 20.49, "learning_rate": 3.975759685300111e-05, "loss": 2.2003, "step": 7079500 }, { "epoch": 20.49, "learning_rate": 3.975687320535383e-05, "loss": 2.2145, "step": 7080000 }, { "epoch": 20.5, "learning_rate": 3.975614955770656e-05, "loss": 2.2078, "step": 7080500 }, { "epoch": 20.5, "learning_rate": 3.9755425910059284e-05, "loss": 2.2111, "step": 7081000 }, { "epoch": 20.5, "learning_rate": 3.9754703709707306e-05, "loss": 2.1868, "step": 7081500 }, { "epoch": 20.5, "learning_rate": 3.975398006206003e-05, "loss": 2.1746, "step": 7082000 }, { "epoch": 20.5, "learning_rate": 3.975325641441275e-05, "loss": 2.1731, "step": 7082500 }, { "epoch": 20.5, "learning_rate": 3.975253276676547e-05, "loss": 2.1799, "step": 7083000 }, { "epoch": 20.5, "learning_rate": 3.975181056641349e-05, "loss": 2.1802, "step": 7083500 }, { "epoch": 20.51, "learning_rate": 3.975108691876621e-05, "loss": 2.192, "step": 7084000 }, { "epoch": 20.51, "learning_rate": 3.975036327111893e-05, "loss": 2.2028, "step": 7084500 }, { "epoch": 20.51, "learning_rate": 3.9749639623471655e-05, "loss": 2.1753, "step": 7085000 }, { "epoch": 20.51, "learning_rate": 3.974891597582438e-05, "loss": 2.1644, "step": 7085500 }, { "epoch": 20.51, "learning_rate": 3.9748192328177106e-05, "loss": 2.1746, "step": 7086000 }, { "epoch": 20.51, "learning_rate": 3.974746868052983e-05, "loss": 2.1841, "step": 7086500 }, { "epoch": 20.51, "learning_rate": 3.974674792747314e-05, "loss": 2.2011, "step": 7087000 }, { "epoch": 20.52, "learning_rate": 3.974602427982586e-05, "loss": 2.1821, "step": 7087500 }, { "epoch": 20.52, "learning_rate": 3.974530063217859e-05, "loss": 2.1987, "step": 7088000 }, { "epoch": 20.52, "learning_rate": 3.974457698453131e-05, "loss": 2.1808, "step": 7088500 }, { "epoch": 20.52, "learning_rate": 3.974385333688403e-05, "loss": 2.2231, "step": 7089000 }, { "epoch": 20.52, "learning_rate": 3.9743129689236755e-05, "loss": 2.2048, "step": 7089500 }, { "epoch": 20.52, "learning_rate": 3.974240604158948e-05, "loss": 2.1878, "step": 7090000 }, { "epoch": 20.52, "learning_rate": 3.9741682393942207e-05, "loss": 2.1903, "step": 7090500 }, { "epoch": 20.53, "learning_rate": 3.974096019359022e-05, "loss": 2.2219, "step": 7091000 }, { "epoch": 20.53, "learning_rate": 3.9740236545942944e-05, "loss": 2.1865, "step": 7091500 }, { "epoch": 20.53, "learning_rate": 3.9739512898295667e-05, "loss": 2.1993, "step": 7092000 }, { "epoch": 20.53, "learning_rate": 3.973879069794368e-05, "loss": 2.1897, "step": 7092500 }, { "epoch": 20.53, "learning_rate": 3.9738067050296404e-05, "loss": 2.1883, "step": 7093000 }, { "epoch": 20.53, "learning_rate": 3.9737343402649127e-05, "loss": 2.1794, "step": 7093500 }, { "epoch": 20.53, "learning_rate": 3.9736619755001856e-05, "loss": 2.1779, "step": 7094000 }, { "epoch": 20.54, "learning_rate": 3.973589610735458e-05, "loss": 2.1929, "step": 7094500 }, { "epoch": 20.54, "learning_rate": 3.97351724597073e-05, "loss": 2.1846, "step": 7095000 }, { "epoch": 20.54, "learning_rate": 3.973444881206003e-05, "loss": 2.1924, "step": 7095500 }, { "epoch": 20.54, "learning_rate": 3.9733726611708045e-05, "loss": 2.1688, "step": 7096000 }, { "epoch": 20.54, "learning_rate": 3.973300296406077e-05, "loss": 2.2002, "step": 7096500 }, { "epoch": 20.54, "learning_rate": 3.973227931641349e-05, "loss": 2.1993, "step": 7097000 }, { "epoch": 20.54, "learning_rate": 3.973155566876621e-05, "loss": 2.1994, "step": 7097500 }, { "epoch": 20.55, "learning_rate": 3.9730832021118934e-05, "loss": 2.1679, "step": 7098000 }, { "epoch": 20.55, "learning_rate": 3.9730108373471656e-05, "loss": 2.1774, "step": 7098500 }, { "epoch": 20.55, "learning_rate": 3.972938472582438e-05, "loss": 2.1987, "step": 7099000 }, { "epoch": 20.55, "learning_rate": 3.972866107817711e-05, "loss": 2.1917, "step": 7099500 }, { "epoch": 20.55, "learning_rate": 3.972793743052983e-05, "loss": 2.2247, "step": 7100000 }, { "epoch": 20.55, "learning_rate": 3.972721378288255e-05, "loss": 2.1916, "step": 7100500 }, { "epoch": 20.55, "learning_rate": 3.9726490135235274e-05, "loss": 2.2065, "step": 7101000 }, { "epoch": 20.56, "learning_rate": 3.9725766487587996e-05, "loss": 2.1739, "step": 7101500 }, { "epoch": 20.56, "learning_rate": 3.972504283994072e-05, "loss": 2.1685, "step": 7102000 }, { "epoch": 20.56, "learning_rate": 3.972431919229345e-05, "loss": 2.2143, "step": 7102500 }, { "epoch": 20.56, "learning_rate": 3.972359699194146e-05, "loss": 2.1757, "step": 7103000 }, { "epoch": 20.56, "learning_rate": 3.9722873344294185e-05, "loss": 2.2041, "step": 7103500 }, { "epoch": 20.56, "learning_rate": 3.972214969664691e-05, "loss": 2.1868, "step": 7104000 }, { "epoch": 20.56, "learning_rate": 3.972142604899963e-05, "loss": 2.1786, "step": 7104500 }, { "epoch": 20.57, "learning_rate": 3.972070240135236e-05, "loss": 2.1694, "step": 7105000 }, { "epoch": 20.57, "learning_rate": 3.971997875370508e-05, "loss": 2.1881, "step": 7105500 }, { "epoch": 20.57, "learning_rate": 3.97192551060578e-05, "loss": 2.1967, "step": 7106000 }, { "epoch": 20.57, "learning_rate": 3.971853290570582e-05, "loss": 2.1878, "step": 7106500 }, { "epoch": 20.57, "learning_rate": 3.9717810705353834e-05, "loss": 2.1783, "step": 7107000 }, { "epoch": 20.57, "learning_rate": 3.9717087057706556e-05, "loss": 2.1804, "step": 7107500 }, { "epoch": 20.57, "learning_rate": 3.971636341005928e-05, "loss": 2.1845, "step": 7108000 }, { "epoch": 20.58, "learning_rate": 3.971563976241201e-05, "loss": 2.179, "step": 7108500 }, { "epoch": 20.58, "learning_rate": 3.971491611476473e-05, "loss": 2.178, "step": 7109000 }, { "epoch": 20.58, "learning_rate": 3.971419246711745e-05, "loss": 2.1881, "step": 7109500 }, { "epoch": 20.58, "learning_rate": 3.971346881947018e-05, "loss": 2.1806, "step": 7110000 }, { "epoch": 20.58, "learning_rate": 3.97127451718229e-05, "loss": 2.1826, "step": 7110500 }, { "epoch": 20.58, "learning_rate": 3.9712021524175626e-05, "loss": 2.1917, "step": 7111000 }, { "epoch": 20.58, "learning_rate": 3.971129787652835e-05, "loss": 2.1799, "step": 7111500 }, { "epoch": 20.59, "learning_rate": 3.971057567617636e-05, "loss": 2.2012, "step": 7112000 }, { "epoch": 20.59, "learning_rate": 3.9709853475824386e-05, "loss": 2.1971, "step": 7112500 }, { "epoch": 20.59, "learning_rate": 3.970912982817711e-05, "loss": 2.1988, "step": 7113000 }, { "epoch": 20.59, "learning_rate": 3.970840618052983e-05, "loss": 2.1677, "step": 7113500 }, { "epoch": 20.59, "learning_rate": 3.970768253288255e-05, "loss": 2.2067, "step": 7114000 }, { "epoch": 20.59, "learning_rate": 3.9706958885235275e-05, "loss": 2.1956, "step": 7114500 }, { "epoch": 20.6, "learning_rate": 3.9706235237588e-05, "loss": 2.1921, "step": 7115000 }, { "epoch": 20.6, "learning_rate": 3.970551158994072e-05, "loss": 2.1682, "step": 7115500 }, { "epoch": 20.6, "learning_rate": 3.9704789389588735e-05, "loss": 2.1757, "step": 7116000 }, { "epoch": 20.6, "learning_rate": 3.970406574194146e-05, "loss": 2.2008, "step": 7116500 }, { "epoch": 20.6, "learning_rate": 3.9703342094294186e-05, "loss": 2.2106, "step": 7117000 }, { "epoch": 20.6, "learning_rate": 3.970261844664691e-05, "loss": 2.1789, "step": 7117500 }, { "epoch": 20.6, "learning_rate": 3.970189479899964e-05, "loss": 2.2042, "step": 7118000 }, { "epoch": 20.61, "learning_rate": 3.970117115135236e-05, "loss": 2.1878, "step": 7118500 }, { "epoch": 20.61, "learning_rate": 3.970044750370508e-05, "loss": 2.1809, "step": 7119000 }, { "epoch": 20.61, "learning_rate": 3.9699723856057804e-05, "loss": 2.1625, "step": 7119500 }, { "epoch": 20.61, "learning_rate": 3.969900165570582e-05, "loss": 2.1908, "step": 7120000 }, { "epoch": 20.61, "learning_rate": 3.969827800805854e-05, "loss": 2.209, "step": 7120500 }, { "epoch": 20.61, "learning_rate": 3.9697554360411264e-05, "loss": 2.192, "step": 7121000 }, { "epoch": 20.61, "learning_rate": 3.9696830712763986e-05, "loss": 2.187, "step": 7121500 }, { "epoch": 20.62, "learning_rate": 3.969610706511671e-05, "loss": 2.2004, "step": 7122000 }, { "epoch": 20.62, "learning_rate": 3.969538486476473e-05, "loss": 2.1871, "step": 7122500 }, { "epoch": 20.62, "learning_rate": 3.9694662664412746e-05, "loss": 2.1944, "step": 7123000 }, { "epoch": 20.62, "learning_rate": 3.969393901676547e-05, "loss": 2.1808, "step": 7123500 }, { "epoch": 20.62, "learning_rate": 3.9693216816413484e-05, "loss": 2.207, "step": 7124000 }, { "epoch": 20.62, "learning_rate": 3.9692493168766206e-05, "loss": 2.1933, "step": 7124500 }, { "epoch": 20.62, "learning_rate": 3.9691769521118935e-05, "loss": 2.2003, "step": 7125000 }, { "epoch": 20.63, "learning_rate": 3.969104587347166e-05, "loss": 2.1714, "step": 7125500 }, { "epoch": 20.63, "learning_rate": 3.9690322225824386e-05, "loss": 2.1927, "step": 7126000 }, { "epoch": 20.63, "learning_rate": 3.968959857817711e-05, "loss": 2.1798, "step": 7126500 }, { "epoch": 20.63, "learning_rate": 3.968887493052983e-05, "loss": 2.1889, "step": 7127000 }, { "epoch": 20.63, "learning_rate": 3.968815128288255e-05, "loss": 2.1951, "step": 7127500 }, { "epoch": 20.63, "learning_rate": 3.9687427635235275e-05, "loss": 2.1823, "step": 7128000 }, { "epoch": 20.63, "learning_rate": 3.9686703987588e-05, "loss": 2.1805, "step": 7128500 }, { "epoch": 20.64, "learning_rate": 3.968598033994072e-05, "loss": 2.2102, "step": 7129000 }, { "epoch": 20.64, "learning_rate": 3.968525669229344e-05, "loss": 2.1742, "step": 7129500 }, { "epoch": 20.64, "learning_rate": 3.9684533044646164e-05, "loss": 2.1756, "step": 7130000 }, { "epoch": 20.64, "learning_rate": 3.9683809396998887e-05, "loss": 2.1813, "step": 7130500 }, { "epoch": 20.64, "learning_rate": 3.968308574935161e-05, "loss": 2.1899, "step": 7131000 }, { "epoch": 20.64, "learning_rate": 3.968236210170434e-05, "loss": 2.1827, "step": 7131500 }, { "epoch": 20.64, "learning_rate": 3.968163845405706e-05, "loss": 2.1851, "step": 7132000 }, { "epoch": 20.65, "learning_rate": 3.968091480640979e-05, "loss": 2.1713, "step": 7132500 }, { "epoch": 20.65, "learning_rate": 3.9680192606057805e-05, "loss": 2.2002, "step": 7133000 }, { "epoch": 20.65, "learning_rate": 3.967947040570582e-05, "loss": 2.1963, "step": 7133500 }, { "epoch": 20.65, "learning_rate": 3.967874675805854e-05, "loss": 2.1959, "step": 7134000 }, { "epoch": 20.65, "learning_rate": 3.9678023110411265e-05, "loss": 2.1962, "step": 7134500 }, { "epoch": 20.65, "learning_rate": 3.967729946276399e-05, "loss": 2.1916, "step": 7135000 }, { "epoch": 20.65, "learning_rate": 3.967657581511671e-05, "loss": 2.1807, "step": 7135500 }, { "epoch": 20.66, "learning_rate": 3.967585216746944e-05, "loss": 2.2127, "step": 7136000 }, { "epoch": 20.66, "learning_rate": 3.967512851982216e-05, "loss": 2.1899, "step": 7136500 }, { "epoch": 20.66, "learning_rate": 3.967440487217488e-05, "loss": 2.1724, "step": 7137000 }, { "epoch": 20.66, "learning_rate": 3.9673681224527605e-05, "loss": 2.2133, "step": 7137500 }, { "epoch": 20.66, "learning_rate": 3.967295757688033e-05, "loss": 2.1579, "step": 7138000 }, { "epoch": 20.66, "learning_rate": 3.967223392923305e-05, "loss": 2.1966, "step": 7138500 }, { "epoch": 20.66, "learning_rate": 3.967151028158577e-05, "loss": 2.1854, "step": 7139000 }, { "epoch": 20.67, "learning_rate": 3.96707866339385e-05, "loss": 2.2129, "step": 7139500 }, { "epoch": 20.67, "learning_rate": 3.9670064433586516e-05, "loss": 2.1787, "step": 7140000 }, { "epoch": 20.67, "learning_rate": 3.966934078593924e-05, "loss": 2.1755, "step": 7140500 }, { "epoch": 20.67, "learning_rate": 3.966861713829196e-05, "loss": 2.1842, "step": 7141000 }, { "epoch": 20.67, "learning_rate": 3.966789493793998e-05, "loss": 2.1688, "step": 7141500 }, { "epoch": 20.67, "learning_rate": 3.9667171290292705e-05, "loss": 2.1846, "step": 7142000 }, { "epoch": 20.67, "learning_rate": 3.966644764264543e-05, "loss": 2.1878, "step": 7142500 }, { "epoch": 20.68, "learning_rate": 3.966572399499815e-05, "loss": 2.2005, "step": 7143000 }, { "epoch": 20.68, "learning_rate": 3.966500034735087e-05, "loss": 2.1735, "step": 7143500 }, { "epoch": 20.68, "learning_rate": 3.9664276699703594e-05, "loss": 2.1811, "step": 7144000 }, { "epoch": 20.68, "learning_rate": 3.9663553052056316e-05, "loss": 2.1703, "step": 7144500 }, { "epoch": 20.68, "learning_rate": 3.966282940440904e-05, "loss": 2.2097, "step": 7145000 }, { "epoch": 20.68, "learning_rate": 3.966210575676176e-05, "loss": 2.1886, "step": 7145500 }, { "epoch": 20.68, "learning_rate": 3.966138210911449e-05, "loss": 2.1999, "step": 7146000 }, { "epoch": 20.69, "learning_rate": 3.9660659908762505e-05, "loss": 2.1974, "step": 7146500 }, { "epoch": 20.69, "learning_rate": 3.9659936261115234e-05, "loss": 2.1876, "step": 7147000 }, { "epoch": 20.69, "learning_rate": 3.965921406076325e-05, "loss": 2.1884, "step": 7147500 }, { "epoch": 20.69, "learning_rate": 3.965849041311597e-05, "loss": 2.1904, "step": 7148000 }, { "epoch": 20.69, "learning_rate": 3.965776821276399e-05, "loss": 2.1854, "step": 7148500 }, { "epoch": 20.69, "learning_rate": 3.965704456511672e-05, "loss": 2.189, "step": 7149000 }, { "epoch": 20.69, "learning_rate": 3.965632091746944e-05, "loss": 2.1912, "step": 7149500 }, { "epoch": 20.7, "learning_rate": 3.965559726982216e-05, "loss": 2.1867, "step": 7150000 }, { "epoch": 20.7, "learning_rate": 3.965487362217488e-05, "loss": 2.1754, "step": 7150500 }, { "epoch": 20.7, "learning_rate": 3.9654149974527606e-05, "loss": 2.1954, "step": 7151000 }, { "epoch": 20.7, "learning_rate": 3.965342632688033e-05, "loss": 2.1747, "step": 7151500 }, { "epoch": 20.7, "learning_rate": 3.965270267923305e-05, "loss": 2.1966, "step": 7152000 }, { "epoch": 20.7, "learning_rate": 3.965197903158577e-05, "loss": 2.2262, "step": 7152500 }, { "epoch": 20.71, "learning_rate": 3.9651255383938494e-05, "loss": 2.1811, "step": 7153000 }, { "epoch": 20.71, "learning_rate": 3.965053173629122e-05, "loss": 2.2038, "step": 7153500 }, { "epoch": 20.71, "learning_rate": 3.964980808864394e-05, "loss": 2.1834, "step": 7154000 }, { "epoch": 20.71, "learning_rate": 3.964908444099667e-05, "loss": 2.1938, "step": 7154500 }, { "epoch": 20.71, "learning_rate": 3.964836079334939e-05, "loss": 2.1805, "step": 7155000 }, { "epoch": 20.71, "learning_rate": 3.964763714570211e-05, "loss": 2.1713, "step": 7155500 }, { "epoch": 20.71, "learning_rate": 3.9646914945350135e-05, "loss": 2.2008, "step": 7156000 }, { "epoch": 20.72, "learning_rate": 3.964619274499815e-05, "loss": 2.2036, "step": 7156500 }, { "epoch": 20.72, "learning_rate": 3.964546909735087e-05, "loss": 2.2091, "step": 7157000 }, { "epoch": 20.72, "learning_rate": 3.964474689699889e-05, "loss": 2.2039, "step": 7157500 }, { "epoch": 20.72, "learning_rate": 3.964402324935162e-05, "loss": 2.2009, "step": 7158000 }, { "epoch": 20.72, "learning_rate": 3.964329960170434e-05, "loss": 2.1991, "step": 7158500 }, { "epoch": 20.72, "learning_rate": 3.964257595405706e-05, "loss": 2.1926, "step": 7159000 }, { "epoch": 20.72, "learning_rate": 3.9641852306409784e-05, "loss": 2.1953, "step": 7159500 }, { "epoch": 20.73, "learning_rate": 3.9641128658762506e-05, "loss": 2.1746, "step": 7160000 }, { "epoch": 20.73, "learning_rate": 3.964040501111523e-05, "loss": 2.1994, "step": 7160500 }, { "epoch": 20.73, "learning_rate": 3.963968136346795e-05, "loss": 2.183, "step": 7161000 }, { "epoch": 20.73, "learning_rate": 3.9638959163115966e-05, "loss": 2.1718, "step": 7161500 }, { "epoch": 20.73, "learning_rate": 3.963823551546869e-05, "loss": 2.2027, "step": 7162000 }, { "epoch": 20.73, "learning_rate": 3.963751186782142e-05, "loss": 2.2022, "step": 7162500 }, { "epoch": 20.73, "learning_rate": 3.963678822017414e-05, "loss": 2.1776, "step": 7163000 }, { "epoch": 20.74, "learning_rate": 3.963606457252687e-05, "loss": 2.1903, "step": 7163500 }, { "epoch": 20.74, "learning_rate": 3.9635342372174884e-05, "loss": 2.193, "step": 7164000 }, { "epoch": 20.74, "learning_rate": 3.9634618724527606e-05, "loss": 2.2214, "step": 7164500 }, { "epoch": 20.74, "learning_rate": 3.963389507688033e-05, "loss": 2.1937, "step": 7165000 }, { "epoch": 20.74, "learning_rate": 3.963317142923305e-05, "loss": 2.1901, "step": 7165500 }, { "epoch": 20.74, "learning_rate": 3.963244778158577e-05, "loss": 2.1934, "step": 7166000 }, { "epoch": 20.74, "learning_rate": 3.963172558123379e-05, "loss": 2.1893, "step": 7166500 }, { "epoch": 20.75, "learning_rate": 3.963100193358652e-05, "loss": 2.1762, "step": 7167000 }, { "epoch": 20.75, "learning_rate": 3.963027828593924e-05, "loss": 2.1888, "step": 7167500 }, { "epoch": 20.75, "learning_rate": 3.962955463829196e-05, "loss": 2.2067, "step": 7168000 }, { "epoch": 20.75, "learning_rate": 3.9628830990644684e-05, "loss": 2.199, "step": 7168500 }, { "epoch": 20.75, "learning_rate": 3.96281087902927e-05, "loss": 2.2017, "step": 7169000 }, { "epoch": 20.75, "learning_rate": 3.962738514264542e-05, "loss": 2.1802, "step": 7169500 }, { "epoch": 20.75, "learning_rate": 3.962666149499815e-05, "loss": 2.2185, "step": 7170000 }, { "epoch": 20.76, "learning_rate": 3.962593784735087e-05, "loss": 2.185, "step": 7170500 }, { "epoch": 20.76, "learning_rate": 3.9625214199703596e-05, "loss": 2.1653, "step": 7171000 }, { "epoch": 20.76, "learning_rate": 3.962449055205632e-05, "loss": 2.2023, "step": 7171500 }, { "epoch": 20.76, "learning_rate": 3.962376690440904e-05, "loss": 2.1924, "step": 7172000 }, { "epoch": 20.76, "learning_rate": 3.962304325676177e-05, "loss": 2.2058, "step": 7172500 }, { "epoch": 20.76, "learning_rate": 3.962231960911449e-05, "loss": 2.1963, "step": 7173000 }, { "epoch": 20.76, "learning_rate": 3.9621595961467214e-05, "loss": 2.1966, "step": 7173500 }, { "epoch": 20.77, "learning_rate": 3.9620872313819936e-05, "loss": 2.2057, "step": 7174000 }, { "epoch": 20.77, "learning_rate": 3.962014866617266e-05, "loss": 2.1787, "step": 7174500 }, { "epoch": 20.77, "learning_rate": 3.9619426465820674e-05, "loss": 2.198, "step": 7175000 }, { "epoch": 20.77, "learning_rate": 3.961870426546869e-05, "loss": 2.1983, "step": 7175500 }, { "epoch": 20.77, "learning_rate": 3.961798206511671e-05, "loss": 2.1938, "step": 7176000 }, { "epoch": 20.77, "learning_rate": 3.9617258417469434e-05, "loss": 2.2027, "step": 7176500 }, { "epoch": 20.77, "learning_rate": 3.9616534769822156e-05, "loss": 2.1954, "step": 7177000 }, { "epoch": 20.78, "learning_rate": 3.9615811122174885e-05, "loss": 2.2239, "step": 7177500 }, { "epoch": 20.78, "learning_rate": 3.961508747452761e-05, "loss": 2.1961, "step": 7178000 }, { "epoch": 20.78, "learning_rate": 3.961436382688033e-05, "loss": 2.1883, "step": 7178500 }, { "epoch": 20.78, "learning_rate": 3.9613641626528345e-05, "loss": 2.1904, "step": 7179000 }, { "epoch": 20.78, "learning_rate": 3.961291797888107e-05, "loss": 2.2041, "step": 7179500 }, { "epoch": 20.78, "learning_rate": 3.9612194331233796e-05, "loss": 2.1883, "step": 7180000 }, { "epoch": 20.78, "learning_rate": 3.961147068358652e-05, "loss": 2.1977, "step": 7180500 }, { "epoch": 20.79, "learning_rate": 3.961074703593924e-05, "loss": 2.194, "step": 7181000 }, { "epoch": 20.79, "learning_rate": 3.961002338829196e-05, "loss": 2.1984, "step": 7181500 }, { "epoch": 20.79, "learning_rate": 3.9609299740644685e-05, "loss": 2.1997, "step": 7182000 }, { "epoch": 20.79, "learning_rate": 3.96085775402927e-05, "loss": 2.207, "step": 7182500 }, { "epoch": 20.79, "learning_rate": 3.960785389264542e-05, "loss": 2.1786, "step": 7183000 }, { "epoch": 20.79, "learning_rate": 3.9607130244998145e-05, "loss": 2.2072, "step": 7183500 }, { "epoch": 20.79, "learning_rate": 3.960640659735087e-05, "loss": 2.1652, "step": 7184000 }, { "epoch": 20.8, "learning_rate": 3.960568439699889e-05, "loss": 2.1916, "step": 7184500 }, { "epoch": 20.8, "learning_rate": 3.960496074935162e-05, "loss": 2.1847, "step": 7185000 }, { "epoch": 20.8, "learning_rate": 3.960423710170434e-05, "loss": 2.2219, "step": 7185500 }, { "epoch": 20.8, "learning_rate": 3.960351345405706e-05, "loss": 2.1979, "step": 7186000 }, { "epoch": 20.8, "learning_rate": 3.9602789806409785e-05, "loss": 2.1909, "step": 7186500 }, { "epoch": 20.8, "learning_rate": 3.960206615876251e-05, "loss": 2.1851, "step": 7187000 }, { "epoch": 20.8, "learning_rate": 3.960134251111523e-05, "loss": 2.2095, "step": 7187500 }, { "epoch": 20.81, "learning_rate": 3.960061886346795e-05, "loss": 2.1729, "step": 7188000 }, { "epoch": 20.81, "learning_rate": 3.959989811041127e-05, "loss": 2.2107, "step": 7188500 }, { "epoch": 20.81, "learning_rate": 3.959917446276399e-05, "loss": 2.1775, "step": 7189000 }, { "epoch": 20.81, "learning_rate": 3.959845081511671e-05, "loss": 2.1794, "step": 7189500 }, { "epoch": 20.81, "learning_rate": 3.9597727167469434e-05, "loss": 2.1903, "step": 7190000 }, { "epoch": 20.81, "learning_rate": 3.959700351982216e-05, "loss": 2.1969, "step": 7190500 }, { "epoch": 20.82, "learning_rate": 3.959627987217488e-05, "loss": 2.2004, "step": 7191000 }, { "epoch": 20.82, "learning_rate": 3.95955562245276e-05, "loss": 2.1858, "step": 7191500 }, { "epoch": 20.82, "learning_rate": 3.9594832576880323e-05, "loss": 2.1923, "step": 7192000 }, { "epoch": 20.82, "learning_rate": 3.959410892923305e-05, "loss": 2.1947, "step": 7192500 }, { "epoch": 20.82, "learning_rate": 3.9593385281585775e-05, "loss": 2.1651, "step": 7193000 }, { "epoch": 20.82, "learning_rate": 3.95926616339385e-05, "loss": 2.1864, "step": 7193500 }, { "epoch": 20.82, "learning_rate": 3.959193943358652e-05, "loss": 2.1864, "step": 7194000 }, { "epoch": 20.83, "learning_rate": 3.959121578593924e-05, "loss": 2.1666, "step": 7194500 }, { "epoch": 20.83, "learning_rate": 3.9590492138291964e-05, "loss": 2.1699, "step": 7195000 }, { "epoch": 20.83, "learning_rate": 3.9589768490644686e-05, "loss": 2.1882, "step": 7195500 }, { "epoch": 20.83, "learning_rate": 3.958904484299741e-05, "loss": 2.1871, "step": 7196000 }, { "epoch": 20.83, "learning_rate": 3.9588322642645424e-05, "loss": 2.1892, "step": 7196500 }, { "epoch": 20.83, "learning_rate": 3.9587598994998146e-05, "loss": 2.2012, "step": 7197000 }, { "epoch": 20.83, "learning_rate": 3.958687534735087e-05, "loss": 2.1832, "step": 7197500 }, { "epoch": 20.84, "learning_rate": 3.9586154594294184e-05, "loss": 2.1717, "step": 7198000 }, { "epoch": 20.84, "learning_rate": 3.9585430946646906e-05, "loss": 2.1965, "step": 7198500 }, { "epoch": 20.84, "learning_rate": 3.958470729899963e-05, "loss": 2.2071, "step": 7199000 }, { "epoch": 20.84, "learning_rate": 3.958398365135235e-05, "loss": 2.1903, "step": 7199500 }, { "epoch": 20.84, "learning_rate": 3.958326000370508e-05, "loss": 2.1991, "step": 7200000 }, { "epoch": 20.84, "learning_rate": 3.95825363560578e-05, "loss": 2.1824, "step": 7200500 }, { "epoch": 20.84, "learning_rate": 3.9581812708410524e-05, "loss": 2.2161, "step": 7201000 }, { "epoch": 20.85, "learning_rate": 3.9581089060763246e-05, "loss": 2.1802, "step": 7201500 }, { "epoch": 20.85, "learning_rate": 3.958036541311597e-05, "loss": 2.2092, "step": 7202000 }, { "epoch": 20.85, "learning_rate": 3.95796417654687e-05, "loss": 2.2037, "step": 7202500 }, { "epoch": 20.85, "learning_rate": 3.957891956511671e-05, "loss": 2.2011, "step": 7203000 }, { "epoch": 20.85, "learning_rate": 3.9578195917469435e-05, "loss": 2.2149, "step": 7203500 }, { "epoch": 20.85, "learning_rate": 3.957747371711745e-05, "loss": 2.2025, "step": 7204000 }, { "epoch": 20.85, "learning_rate": 3.957675006947017e-05, "loss": 2.1809, "step": 7204500 }, { "epoch": 20.86, "learning_rate": 3.9576027869118195e-05, "loss": 2.1722, "step": 7205000 }, { "epoch": 20.86, "learning_rate": 3.957530422147092e-05, "loss": 2.1973, "step": 7205500 }, { "epoch": 20.86, "learning_rate": 3.957458057382364e-05, "loss": 2.1982, "step": 7206000 }, { "epoch": 20.86, "learning_rate": 3.957385692617636e-05, "loss": 2.2047, "step": 7206500 }, { "epoch": 20.86, "learning_rate": 3.9573133278529084e-05, "loss": 2.1824, "step": 7207000 }, { "epoch": 20.86, "learning_rate": 3.957240963088181e-05, "loss": 2.1721, "step": 7207500 }, { "epoch": 20.86, "learning_rate": 3.9571685983234536e-05, "loss": 2.1989, "step": 7208000 }, { "epoch": 20.87, "learning_rate": 3.957096233558726e-05, "loss": 2.1662, "step": 7208500 }, { "epoch": 20.87, "learning_rate": 3.957023868793998e-05, "loss": 2.1649, "step": 7209000 }, { "epoch": 20.87, "learning_rate": 3.95695150402927e-05, "loss": 2.1563, "step": 7209500 }, { "epoch": 20.87, "learning_rate": 3.9568791392645424e-05, "loss": 2.1625, "step": 7210000 }, { "epoch": 20.87, "learning_rate": 3.956806774499815e-05, "loss": 2.1936, "step": 7210500 }, { "epoch": 20.87, "learning_rate": 3.956734554464617e-05, "loss": 2.1889, "step": 7211000 }, { "epoch": 20.87, "learning_rate": 3.956662189699889e-05, "loss": 2.2026, "step": 7211500 }, { "epoch": 20.88, "learning_rate": 3.9565898249351614e-05, "loss": 2.1658, "step": 7212000 }, { "epoch": 20.88, "learning_rate": 3.9565174601704336e-05, "loss": 2.181, "step": 7212500 }, { "epoch": 20.88, "learning_rate": 3.956445095405706e-05, "loss": 2.2005, "step": 7213000 }, { "epoch": 20.88, "learning_rate": 3.956372730640978e-05, "loss": 2.1768, "step": 7213500 }, { "epoch": 20.88, "learning_rate": 3.95630036587625e-05, "loss": 2.1973, "step": 7214000 }, { "epoch": 20.88, "learning_rate": 3.9562281458410525e-05, "loss": 2.1814, "step": 7214500 }, { "epoch": 20.88, "learning_rate": 3.956155781076325e-05, "loss": 2.1893, "step": 7215000 }, { "epoch": 20.89, "learning_rate": 3.9560834163115976e-05, "loss": 2.2131, "step": 7215500 }, { "epoch": 20.89, "learning_rate": 3.95601105154687e-05, "loss": 2.2068, "step": 7216000 }, { "epoch": 20.89, "learning_rate": 3.955938686782142e-05, "loss": 2.1922, "step": 7216500 }, { "epoch": 20.89, "learning_rate": 3.955866322017414e-05, "loss": 2.1701, "step": 7217000 }, { "epoch": 20.89, "learning_rate": 3.9557939572526865e-05, "loss": 2.1862, "step": 7217500 }, { "epoch": 20.89, "learning_rate": 3.955721592487959e-05, "loss": 2.187, "step": 7218000 }, { "epoch": 20.89, "learning_rate": 3.955649227723231e-05, "loss": 2.1803, "step": 7218500 }, { "epoch": 20.9, "learning_rate": 3.9555770076880325e-05, "loss": 2.1939, "step": 7219000 }, { "epoch": 20.9, "learning_rate": 3.955504642923305e-05, "loss": 2.1774, "step": 7219500 }, { "epoch": 20.9, "learning_rate": 3.955432422888107e-05, "loss": 2.1759, "step": 7220000 }, { "epoch": 20.9, "learning_rate": 3.955360058123379e-05, "loss": 2.2085, "step": 7220500 }, { "epoch": 20.9, "learning_rate": 3.9552876933586514e-05, "loss": 2.1986, "step": 7221000 }, { "epoch": 20.9, "learning_rate": 3.9552153285939236e-05, "loss": 2.2012, "step": 7221500 }, { "epoch": 20.9, "learning_rate": 3.955142963829196e-05, "loss": 2.1802, "step": 7222000 }, { "epoch": 20.91, "learning_rate": 3.955070599064469e-05, "loss": 2.2077, "step": 7222500 }, { "epoch": 20.91, "learning_rate": 3.95499837902927e-05, "loss": 2.1825, "step": 7223000 }, { "epoch": 20.91, "learning_rate": 3.9549260142645425e-05, "loss": 2.1874, "step": 7223500 }, { "epoch": 20.91, "learning_rate": 3.954853649499815e-05, "loss": 2.1706, "step": 7224000 }, { "epoch": 20.91, "learning_rate": 3.9547812847350877e-05, "loss": 2.1958, "step": 7224500 }, { "epoch": 20.91, "learning_rate": 3.95470891997036e-05, "loss": 2.1795, "step": 7225000 }, { "epoch": 20.91, "learning_rate": 3.954636555205632e-05, "loss": 2.1917, "step": 7225500 }, { "epoch": 20.92, "learning_rate": 3.954564190440904e-05, "loss": 2.1903, "step": 7226000 }, { "epoch": 20.92, "learning_rate": 3.9544918256761765e-05, "loss": 2.2002, "step": 7226500 }, { "epoch": 20.92, "learning_rate": 3.954419460911449e-05, "loss": 2.1824, "step": 7227000 }, { "epoch": 20.92, "learning_rate": 3.954347096146721e-05, "loss": 2.1861, "step": 7227500 }, { "epoch": 20.92, "learning_rate": 3.954274731381993e-05, "loss": 2.1993, "step": 7228000 }, { "epoch": 20.92, "learning_rate": 3.9542023666172654e-05, "loss": 2.2133, "step": 7228500 }, { "epoch": 20.92, "learning_rate": 3.954130001852538e-05, "loss": 2.1809, "step": 7229000 }, { "epoch": 20.93, "learning_rate": 3.9540576370878106e-05, "loss": 2.1779, "step": 7229500 }, { "epoch": 20.93, "learning_rate": 3.953985417052613e-05, "loss": 2.1869, "step": 7230000 }, { "epoch": 20.93, "learning_rate": 3.953913052287885e-05, "loss": 2.1685, "step": 7230500 }, { "epoch": 20.93, "learning_rate": 3.9538408322526866e-05, "loss": 2.1714, "step": 7231000 }, { "epoch": 20.93, "learning_rate": 3.953768612217488e-05, "loss": 2.1683, "step": 7231500 }, { "epoch": 20.93, "learning_rate": 3.9536962474527604e-05, "loss": 2.1793, "step": 7232000 }, { "epoch": 20.94, "learning_rate": 3.9536238826880326e-05, "loss": 2.1777, "step": 7232500 }, { "epoch": 20.94, "learning_rate": 3.953551517923305e-05, "loss": 2.1841, "step": 7233000 }, { "epoch": 20.94, "learning_rate": 3.953479153158578e-05, "loss": 2.2017, "step": 7233500 }, { "epoch": 20.94, "learning_rate": 3.953406933123379e-05, "loss": 2.1756, "step": 7234000 }, { "epoch": 20.94, "learning_rate": 3.9533345683586515e-05, "loss": 2.2047, "step": 7234500 }, { "epoch": 20.94, "learning_rate": 3.953262203593924e-05, "loss": 2.1831, "step": 7235000 }, { "epoch": 20.94, "learning_rate": 3.953189838829196e-05, "loss": 2.1749, "step": 7235500 }, { "epoch": 20.95, "learning_rate": 3.953117474064468e-05, "loss": 2.194, "step": 7236000 }, { "epoch": 20.95, "learning_rate": 3.9530451092997404e-05, "loss": 2.2104, "step": 7236500 }, { "epoch": 20.95, "learning_rate": 3.952972744535013e-05, "loss": 2.2044, "step": 7237000 }, { "epoch": 20.95, "learning_rate": 3.9529003797702855e-05, "loss": 2.1808, "step": 7237500 }, { "epoch": 20.95, "learning_rate": 3.952828159735088e-05, "loss": 2.2064, "step": 7238000 }, { "epoch": 20.95, "learning_rate": 3.952755939699889e-05, "loss": 2.1714, "step": 7238500 }, { "epoch": 20.95, "learning_rate": 3.9526835749351615e-05, "loss": 2.1923, "step": 7239000 }, { "epoch": 20.96, "learning_rate": 3.952611210170434e-05, "loss": 2.1771, "step": 7239500 }, { "epoch": 20.96, "learning_rate": 3.952538845405706e-05, "loss": 2.191, "step": 7240000 }, { "epoch": 20.96, "learning_rate": 3.952466480640978e-05, "loss": 2.1641, "step": 7240500 }, { "epoch": 20.96, "learning_rate": 3.9523941158762504e-05, "loss": 2.1744, "step": 7241000 }, { "epoch": 20.96, "learning_rate": 3.9523217511115226e-05, "loss": 2.2073, "step": 7241500 }, { "epoch": 20.96, "learning_rate": 3.9522493863467955e-05, "loss": 2.1801, "step": 7242000 }, { "epoch": 20.96, "learning_rate": 3.952177021582068e-05, "loss": 2.1941, "step": 7242500 }, { "epoch": 20.97, "learning_rate": 3.95210465681734e-05, "loss": 2.186, "step": 7243000 }, { "epoch": 20.97, "learning_rate": 3.952032292052612e-05, "loss": 2.178, "step": 7243500 }, { "epoch": 20.97, "learning_rate": 3.951960072017414e-05, "loss": 2.1826, "step": 7244000 }, { "epoch": 20.97, "learning_rate": 3.9518877072526867e-05, "loss": 2.1881, "step": 7244500 }, { "epoch": 20.97, "learning_rate": 3.951815342487959e-05, "loss": 2.1954, "step": 7245000 }, { "epoch": 20.97, "learning_rate": 3.951742977723231e-05, "loss": 2.1761, "step": 7245500 }, { "epoch": 20.97, "learning_rate": 3.951670612958503e-05, "loss": 2.1957, "step": 7246000 }, { "epoch": 20.98, "learning_rate": 3.9515982481937755e-05, "loss": 2.2072, "step": 7246500 }, { "epoch": 20.98, "learning_rate": 3.951526028158578e-05, "loss": 2.1965, "step": 7247000 }, { "epoch": 20.98, "learning_rate": 3.95145366339385e-05, "loss": 2.1911, "step": 7247500 }, { "epoch": 20.98, "learning_rate": 3.951381298629122e-05, "loss": 2.2166, "step": 7248000 }, { "epoch": 20.98, "learning_rate": 3.9513089338643945e-05, "loss": 2.2259, "step": 7248500 }, { "epoch": 20.98, "learning_rate": 3.951236569099667e-05, "loss": 2.1975, "step": 7249000 }, { "epoch": 20.98, "learning_rate": 3.951164204334939e-05, "loss": 2.188, "step": 7249500 }, { "epoch": 20.99, "learning_rate": 3.951091839570211e-05, "loss": 2.2196, "step": 7250000 }, { "epoch": 20.99, "learning_rate": 3.9510194748054833e-05, "loss": 2.2185, "step": 7250500 }, { "epoch": 20.99, "learning_rate": 3.950947399499815e-05, "loss": 2.1839, "step": 7251000 }, { "epoch": 20.99, "learning_rate": 3.950875034735087e-05, "loss": 2.168, "step": 7251500 }, { "epoch": 20.99, "learning_rate": 3.9508026699703594e-05, "loss": 2.1618, "step": 7252000 }, { "epoch": 20.99, "learning_rate": 3.950730305205632e-05, "loss": 2.1876, "step": 7252500 }, { "epoch": 20.99, "learning_rate": 3.950658085170434e-05, "loss": 2.2013, "step": 7253000 }, { "epoch": 21.0, "learning_rate": 3.950585720405706e-05, "loss": 2.2074, "step": 7253500 }, { "epoch": 21.0, "learning_rate": 3.950513355640978e-05, "loss": 2.1825, "step": 7254000 }, { "epoch": 21.0, "learning_rate": 3.9504409908762505e-05, "loss": 2.1969, "step": 7254500 }, { "epoch": 21.0, "eval_accuracy": 0.6605211557582317, "eval_accuracy_mlm": 0.6243918605421886, "eval_accuracy_nsp": 0.8541291425481111, "eval_loss": 2.224362373352051, "eval_runtime": 331.3457, "eval_samples_per_second": 1317.011, "eval_steps_per_second": 54.876, "step": 7254912 }, { "epoch": 21.0, "learning_rate": 3.950368626111523e-05, "loss": 2.1811, "step": 7255000 }, { "epoch": 21.0, "learning_rate": 3.9502962613467956e-05, "loss": 2.1592, "step": 7255500 }, { "epoch": 21.0, "learning_rate": 3.950223896582068e-05, "loss": 2.1583, "step": 7256000 }, { "epoch": 21.0, "learning_rate": 3.95015153181734e-05, "loss": 2.1849, "step": 7256500 }, { "epoch": 21.01, "learning_rate": 3.950079167052612e-05, "loss": 2.1725, "step": 7257000 }, { "epoch": 21.01, "learning_rate": 3.950006947017414e-05, "loss": 2.1677, "step": 7257500 }, { "epoch": 21.01, "learning_rate": 3.9499347269822154e-05, "loss": 2.167, "step": 7258000 }, { "epoch": 21.01, "learning_rate": 3.949862362217488e-05, "loss": 2.1521, "step": 7258500 }, { "epoch": 21.01, "learning_rate": 3.9497899974527605e-05, "loss": 2.1557, "step": 7259000 }, { "epoch": 21.01, "learning_rate": 3.949717632688033e-05, "loss": 2.1681, "step": 7259500 }, { "epoch": 21.01, "learning_rate": 3.9496452679233056e-05, "loss": 2.1724, "step": 7260000 }, { "epoch": 21.02, "learning_rate": 3.949572903158578e-05, "loss": 2.1526, "step": 7260500 }, { "epoch": 21.02, "learning_rate": 3.94950053839385e-05, "loss": 2.1771, "step": 7261000 }, { "epoch": 21.02, "learning_rate": 3.949428173629122e-05, "loss": 2.1626, "step": 7261500 }, { "epoch": 21.02, "learning_rate": 3.9493558088643945e-05, "loss": 2.1695, "step": 7262000 }, { "epoch": 21.02, "learning_rate": 3.949283444099667e-05, "loss": 2.1735, "step": 7262500 }, { "epoch": 21.02, "learning_rate": 3.949211079334939e-05, "loss": 2.1681, "step": 7263000 }, { "epoch": 21.02, "learning_rate": 3.949138714570211e-05, "loss": 2.17, "step": 7263500 }, { "epoch": 21.03, "learning_rate": 3.949066494535013e-05, "loss": 2.1688, "step": 7264000 }, { "epoch": 21.03, "learning_rate": 3.9489941297702857e-05, "loss": 2.1476, "step": 7264500 }, { "epoch": 21.03, "learning_rate": 3.948921765005558e-05, "loss": 2.1606, "step": 7265000 }, { "epoch": 21.03, "learning_rate": 3.94884940024083e-05, "loss": 2.1699, "step": 7265500 }, { "epoch": 21.03, "learning_rate": 3.948777180205632e-05, "loss": 2.1625, "step": 7266000 }, { "epoch": 21.03, "learning_rate": 3.948704815440904e-05, "loss": 2.1701, "step": 7266500 }, { "epoch": 21.03, "learning_rate": 3.9486325954057054e-05, "loss": 2.155, "step": 7267000 }, { "epoch": 21.04, "learning_rate": 3.9485603753705084e-05, "loss": 2.1811, "step": 7267500 }, { "epoch": 21.04, "learning_rate": 3.9484880106057806e-05, "loss": 2.1702, "step": 7268000 }, { "epoch": 21.04, "learning_rate": 3.948415790570582e-05, "loss": 2.1634, "step": 7268500 }, { "epoch": 21.04, "learning_rate": 3.9483434258058544e-05, "loss": 2.1607, "step": 7269000 }, { "epoch": 21.04, "learning_rate": 3.9482710610411266e-05, "loss": 2.164, "step": 7269500 }, { "epoch": 21.04, "learning_rate": 3.948198696276399e-05, "loss": 2.1654, "step": 7270000 }, { "epoch": 21.05, "learning_rate": 3.948126331511671e-05, "loss": 2.1881, "step": 7270500 }, { "epoch": 21.05, "learning_rate": 3.948053966746943e-05, "loss": 2.1687, "step": 7271000 }, { "epoch": 21.05, "learning_rate": 3.9479816019822155e-05, "loss": 2.1866, "step": 7271500 }, { "epoch": 21.05, "learning_rate": 3.9479092372174884e-05, "loss": 2.1686, "step": 7272000 }, { "epoch": 21.05, "learning_rate": 3.9478368724527606e-05, "loss": 2.1756, "step": 7272500 }, { "epoch": 21.05, "learning_rate": 3.947764507688033e-05, "loss": 2.1684, "step": 7273000 }, { "epoch": 21.05, "learning_rate": 3.9476922876528344e-05, "loss": 2.155, "step": 7273500 }, { "epoch": 21.06, "learning_rate": 3.9476199228881066e-05, "loss": 2.1698, "step": 7274000 }, { "epoch": 21.06, "learning_rate": 3.947547558123379e-05, "loss": 2.1788, "step": 7274500 }, { "epoch": 21.06, "learning_rate": 3.947475193358652e-05, "loss": 2.1493, "step": 7275000 }, { "epoch": 21.06, "learning_rate": 3.947402828593924e-05, "loss": 2.1658, "step": 7275500 }, { "epoch": 21.06, "learning_rate": 3.947330463829196e-05, "loss": 2.1779, "step": 7276000 }, { "epoch": 21.06, "learning_rate": 3.9472580990644684e-05, "loss": 2.1535, "step": 7276500 }, { "epoch": 21.06, "learning_rate": 3.9471857342997406e-05, "loss": 2.1848, "step": 7277000 }, { "epoch": 21.07, "learning_rate": 3.9471133695350135e-05, "loss": 2.1536, "step": 7277500 }, { "epoch": 21.07, "learning_rate": 3.947041149499815e-05, "loss": 2.164, "step": 7278000 }, { "epoch": 21.07, "learning_rate": 3.946968784735087e-05, "loss": 2.1742, "step": 7278500 }, { "epoch": 21.07, "learning_rate": 3.9468964199703595e-05, "loss": 2.1703, "step": 7279000 }, { "epoch": 21.07, "learning_rate": 3.946824199935161e-05, "loss": 2.1969, "step": 7279500 }, { "epoch": 21.07, "learning_rate": 3.946751835170433e-05, "loss": 2.1698, "step": 7280000 }, { "epoch": 21.07, "learning_rate": 3.9466794704057055e-05, "loss": 2.1673, "step": 7280500 }, { "epoch": 21.08, "learning_rate": 3.9466071056409784e-05, "loss": 2.1703, "step": 7281000 }, { "epoch": 21.08, "learning_rate": 3.9465347408762506e-05, "loss": 2.1922, "step": 7281500 }, { "epoch": 21.08, "learning_rate": 3.946462520841052e-05, "loss": 2.1833, "step": 7282000 }, { "epoch": 21.08, "learning_rate": 3.946390156076325e-05, "loss": 2.1768, "step": 7282500 }, { "epoch": 21.08, "learning_rate": 3.9463179360411267e-05, "loss": 2.1654, "step": 7283000 }, { "epoch": 21.08, "learning_rate": 3.946245571276399e-05, "loss": 2.1596, "step": 7283500 }, { "epoch": 21.08, "learning_rate": 3.946173206511671e-05, "loss": 2.1583, "step": 7284000 }, { "epoch": 21.09, "learning_rate": 3.946100841746943e-05, "loss": 2.1923, "step": 7284500 }, { "epoch": 21.09, "learning_rate": 3.946028476982216e-05, "loss": 2.191, "step": 7285000 }, { "epoch": 21.09, "learning_rate": 3.9459561122174885e-05, "loss": 2.1561, "step": 7285500 }, { "epoch": 21.09, "learning_rate": 3.94588389218229e-05, "loss": 2.193, "step": 7286000 }, { "epoch": 21.09, "learning_rate": 3.945811527417562e-05, "loss": 2.1985, "step": 7286500 }, { "epoch": 21.09, "learning_rate": 3.9457391626528345e-05, "loss": 2.1653, "step": 7287000 }, { "epoch": 21.09, "learning_rate": 3.945666797888107e-05, "loss": 2.162, "step": 7287500 }, { "epoch": 21.1, "learning_rate": 3.945594433123379e-05, "loss": 2.1915, "step": 7288000 }, { "epoch": 21.1, "learning_rate": 3.945522068358651e-05, "loss": 2.182, "step": 7288500 }, { "epoch": 21.1, "learning_rate": 3.9454497035939233e-05, "loss": 2.1659, "step": 7289000 }, { "epoch": 21.1, "learning_rate": 3.945377338829196e-05, "loss": 2.1941, "step": 7289500 }, { "epoch": 21.1, "learning_rate": 3.9453049740644685e-05, "loss": 2.1575, "step": 7290000 }, { "epoch": 21.1, "learning_rate": 3.9452326092997414e-05, "loss": 2.1747, "step": 7290500 }, { "epoch": 21.1, "learning_rate": 3.9451602445350136e-05, "loss": 2.1626, "step": 7291000 }, { "epoch": 21.11, "learning_rate": 3.945087879770286e-05, "loss": 2.1849, "step": 7291500 }, { "epoch": 21.11, "learning_rate": 3.945015515005558e-05, "loss": 2.1592, "step": 7292000 }, { "epoch": 21.11, "learning_rate": 3.9449432949703596e-05, "loss": 2.1604, "step": 7292500 }, { "epoch": 21.11, "learning_rate": 3.944870930205632e-05, "loss": 2.1631, "step": 7293000 }, { "epoch": 21.11, "learning_rate": 3.944798565440904e-05, "loss": 2.1726, "step": 7293500 }, { "epoch": 21.11, "learning_rate": 3.944726200676176e-05, "loss": 2.1853, "step": 7294000 }, { "epoch": 21.11, "learning_rate": 3.9446539806409785e-05, "loss": 2.1589, "step": 7294500 }, { "epoch": 21.12, "learning_rate": 3.944581615876251e-05, "loss": 2.1762, "step": 7295000 }, { "epoch": 21.12, "learning_rate": 3.944509251111523e-05, "loss": 2.1781, "step": 7295500 }, { "epoch": 21.12, "learning_rate": 3.944436886346795e-05, "loss": 2.1752, "step": 7296000 }, { "epoch": 21.12, "learning_rate": 3.9443645215820674e-05, "loss": 2.183, "step": 7296500 }, { "epoch": 21.12, "learning_rate": 3.94429215681734e-05, "loss": 2.1667, "step": 7297000 }, { "epoch": 21.12, "learning_rate": 3.9442197920526125e-05, "loss": 2.1706, "step": 7297500 }, { "epoch": 21.12, "learning_rate": 3.944147572017414e-05, "loss": 2.1725, "step": 7298000 }, { "epoch": 21.13, "learning_rate": 3.944075207252686e-05, "loss": 2.1753, "step": 7298500 }, { "epoch": 21.13, "learning_rate": 3.9440028424879585e-05, "loss": 2.1702, "step": 7299000 }, { "epoch": 21.13, "learning_rate": 3.9439304777232314e-05, "loss": 2.1848, "step": 7299500 }, { "epoch": 21.13, "learning_rate": 3.943858257688033e-05, "loss": 2.1862, "step": 7300000 }, { "epoch": 21.13, "learning_rate": 3.943785892923305e-05, "loss": 2.19, "step": 7300500 }, { "epoch": 21.13, "learning_rate": 3.943713672888107e-05, "loss": 2.1662, "step": 7301000 }, { "epoch": 21.13, "learning_rate": 3.943641308123379e-05, "loss": 2.1805, "step": 7301500 }, { "epoch": 21.14, "learning_rate": 3.943568943358651e-05, "loss": 2.1605, "step": 7302000 }, { "epoch": 21.14, "learning_rate": 3.9434965785939234e-05, "loss": 2.1541, "step": 7302500 }, { "epoch": 21.14, "learning_rate": 3.943424213829196e-05, "loss": 2.1838, "step": 7303000 }, { "epoch": 21.14, "learning_rate": 3.9433518490644686e-05, "loss": 2.1809, "step": 7303500 }, { "epoch": 21.14, "learning_rate": 3.943279484299741e-05, "loss": 2.2009, "step": 7304000 }, { "epoch": 21.14, "learning_rate": 3.943207119535014e-05, "loss": 2.1774, "step": 7304500 }, { "epoch": 21.14, "learning_rate": 3.943134754770286e-05, "loss": 2.1718, "step": 7305000 }, { "epoch": 21.15, "learning_rate": 3.943062390005558e-05, "loss": 2.1606, "step": 7305500 }, { "epoch": 21.15, "learning_rate": 3.9429900252408303e-05, "loss": 2.1617, "step": 7306000 }, { "epoch": 21.15, "learning_rate": 3.9429176604761026e-05, "loss": 2.15, "step": 7306500 }, { "epoch": 21.15, "learning_rate": 3.942845295711375e-05, "loss": 2.1653, "step": 7307000 }, { "epoch": 21.15, "learning_rate": 3.942772930946647e-05, "loss": 2.1616, "step": 7307500 }, { "epoch": 21.15, "learning_rate": 3.942700566181919e-05, "loss": 2.1741, "step": 7308000 }, { "epoch": 21.16, "learning_rate": 3.9426283461467215e-05, "loss": 2.1527, "step": 7308500 }, { "epoch": 21.16, "learning_rate": 3.942555981381994e-05, "loss": 2.1781, "step": 7309000 }, { "epoch": 21.16, "learning_rate": 3.942483616617266e-05, "loss": 2.1946, "step": 7309500 }, { "epoch": 21.16, "learning_rate": 3.942411251852538e-05, "loss": 2.1558, "step": 7310000 }, { "epoch": 21.16, "learning_rate": 3.9423388870878104e-05, "loss": 2.1607, "step": 7310500 }, { "epoch": 21.16, "learning_rate": 3.9422665223230826e-05, "loss": 2.1964, "step": 7311000 }, { "epoch": 21.16, "learning_rate": 3.942194157558355e-05, "loss": 2.1828, "step": 7311500 }, { "epoch": 21.17, "learning_rate": 3.942121792793628e-05, "loss": 2.1622, "step": 7312000 }, { "epoch": 21.17, "learning_rate": 3.9420494280289e-05, "loss": 2.1892, "step": 7312500 }, { "epoch": 21.17, "learning_rate": 3.9419772079937015e-05, "loss": 2.1681, "step": 7313000 }, { "epoch": 21.17, "learning_rate": 3.941904843228974e-05, "loss": 2.1733, "step": 7313500 }, { "epoch": 21.17, "learning_rate": 3.9418324784642466e-05, "loss": 2.1789, "step": 7314000 }, { "epoch": 21.17, "learning_rate": 3.941760113699519e-05, "loss": 2.1653, "step": 7314500 }, { "epoch": 21.17, "learning_rate": 3.9416878936643204e-05, "loss": 2.1745, "step": 7315000 }, { "epoch": 21.18, "learning_rate": 3.9416155288995926e-05, "loss": 2.1559, "step": 7315500 }, { "epoch": 21.18, "learning_rate": 3.941543164134865e-05, "loss": 2.1852, "step": 7316000 }, { "epoch": 21.18, "learning_rate": 3.9414709440996664e-05, "loss": 2.1647, "step": 7316500 }, { "epoch": 21.18, "learning_rate": 3.9413985793349386e-05, "loss": 2.1863, "step": 7317000 }, { "epoch": 21.18, "learning_rate": 3.941326359299741e-05, "loss": 2.1804, "step": 7317500 }, { "epoch": 21.18, "learning_rate": 3.941253994535013e-05, "loss": 2.1875, "step": 7318000 }, { "epoch": 21.18, "learning_rate": 3.941181629770285e-05, "loss": 2.1557, "step": 7318500 }, { "epoch": 21.19, "learning_rate": 3.9411092650055575e-05, "loss": 2.1733, "step": 7319000 }, { "epoch": 21.19, "learning_rate": 3.9410369002408304e-05, "loss": 2.1816, "step": 7319500 }, { "epoch": 21.19, "learning_rate": 3.9409645354761026e-05, "loss": 2.1725, "step": 7320000 }, { "epoch": 21.19, "learning_rate": 3.940892170711375e-05, "loss": 2.137, "step": 7320500 }, { "epoch": 21.19, "learning_rate": 3.940819805946647e-05, "loss": 2.1726, "step": 7321000 }, { "epoch": 21.19, "learning_rate": 3.940747441181919e-05, "loss": 2.1622, "step": 7321500 }, { "epoch": 21.19, "learning_rate": 3.9406750764171915e-05, "loss": 2.1717, "step": 7322000 }, { "epoch": 21.2, "learning_rate": 3.940602711652464e-05, "loss": 2.1578, "step": 7322500 }, { "epoch": 21.2, "learning_rate": 3.940530346887737e-05, "loss": 2.1802, "step": 7323000 }, { "epoch": 21.2, "learning_rate": 3.940457982123009e-05, "loss": 2.1847, "step": 7323500 }, { "epoch": 21.2, "learning_rate": 3.940385617358281e-05, "loss": 2.1939, "step": 7324000 }, { "epoch": 21.2, "learning_rate": 3.940313252593553e-05, "loss": 2.1643, "step": 7324500 }, { "epoch": 21.2, "learning_rate": 3.9402408878288256e-05, "loss": 2.1956, "step": 7325000 }, { "epoch": 21.2, "learning_rate": 3.9401688125231564e-05, "loss": 2.1687, "step": 7325500 }, { "epoch": 21.21, "learning_rate": 3.9400964477584293e-05, "loss": 2.1542, "step": 7326000 }, { "epoch": 21.21, "learning_rate": 3.9400240829937016e-05, "loss": 2.1785, "step": 7326500 }, { "epoch": 21.21, "learning_rate": 3.939951718228974e-05, "loss": 2.1795, "step": 7327000 }, { "epoch": 21.21, "learning_rate": 3.939879353464247e-05, "loss": 2.1856, "step": 7327500 }, { "epoch": 21.21, "learning_rate": 3.939806988699519e-05, "loss": 2.1624, "step": 7328000 }, { "epoch": 21.21, "learning_rate": 3.939734623934791e-05, "loss": 2.1646, "step": 7328500 }, { "epoch": 21.21, "learning_rate": 3.9396622591700634e-05, "loss": 2.1746, "step": 7329000 }, { "epoch": 21.22, "learning_rate": 3.9395898944053356e-05, "loss": 2.1867, "step": 7329500 }, { "epoch": 21.22, "learning_rate": 3.939517529640608e-05, "loss": 2.1711, "step": 7330000 }, { "epoch": 21.22, "learning_rate": 3.93944516487588e-05, "loss": 2.1838, "step": 7330500 }, { "epoch": 21.22, "learning_rate": 3.939372800111152e-05, "loss": 2.169, "step": 7331000 }, { "epoch": 21.22, "learning_rate": 3.9393004353464245e-05, "loss": 2.1612, "step": 7331500 }, { "epoch": 21.22, "learning_rate": 3.939228360040756e-05, "loss": 2.1424, "step": 7332000 }, { "epoch": 21.22, "learning_rate": 3.939155995276028e-05, "loss": 2.1659, "step": 7332500 }, { "epoch": 21.23, "learning_rate": 3.9390836305113005e-05, "loss": 2.1778, "step": 7333000 }, { "epoch": 21.23, "learning_rate": 3.939011410476102e-05, "loss": 2.1708, "step": 7333500 }, { "epoch": 21.23, "learning_rate": 3.938939045711374e-05, "loss": 2.1804, "step": 7334000 }, { "epoch": 21.23, "learning_rate": 3.938866680946647e-05, "loss": 2.2183, "step": 7334500 }, { "epoch": 21.23, "learning_rate": 3.9387943161819194e-05, "loss": 2.1728, "step": 7335000 }, { "epoch": 21.23, "learning_rate": 3.9387219514171916e-05, "loss": 2.1556, "step": 7335500 }, { "epoch": 21.23, "learning_rate": 3.9386495866524645e-05, "loss": 2.1651, "step": 7336000 }, { "epoch": 21.24, "learning_rate": 3.938577221887737e-05, "loss": 2.1995, "step": 7336500 }, { "epoch": 21.24, "learning_rate": 3.938505001852538e-05, "loss": 2.1532, "step": 7337000 }, { "epoch": 21.24, "learning_rate": 3.9384326370878105e-05, "loss": 2.177, "step": 7337500 }, { "epoch": 21.24, "learning_rate": 3.938360272323083e-05, "loss": 2.1766, "step": 7338000 }, { "epoch": 21.24, "learning_rate": 3.938287907558355e-05, "loss": 2.1945, "step": 7338500 }, { "epoch": 21.24, "learning_rate": 3.938215542793627e-05, "loss": 2.1851, "step": 7339000 }, { "epoch": 21.24, "learning_rate": 3.9381431780288994e-05, "loss": 2.177, "step": 7339500 }, { "epoch": 21.25, "learning_rate": 3.9380709579937017e-05, "loss": 2.1821, "step": 7340000 }, { "epoch": 21.25, "learning_rate": 3.937998737958503e-05, "loss": 2.1676, "step": 7340500 }, { "epoch": 21.25, "learning_rate": 3.9379263731937754e-05, "loss": 2.1636, "step": 7341000 }, { "epoch": 21.25, "learning_rate": 3.9378540084290477e-05, "loss": 2.1845, "step": 7341500 }, { "epoch": 21.25, "learning_rate": 3.9377816436643206e-05, "loss": 2.1416, "step": 7342000 }, { "epoch": 21.25, "learning_rate": 3.937709278899593e-05, "loss": 2.1879, "step": 7342500 }, { "epoch": 21.25, "learning_rate": 3.937636914134865e-05, "loss": 2.1821, "step": 7343000 }, { "epoch": 21.26, "learning_rate": 3.937564549370137e-05, "loss": 2.1761, "step": 7343500 }, { "epoch": 21.26, "learning_rate": 3.9374923293349395e-05, "loss": 2.1915, "step": 7344000 }, { "epoch": 21.26, "learning_rate": 3.937419964570212e-05, "loss": 2.1481, "step": 7344500 }, { "epoch": 21.26, "learning_rate": 3.937347599805484e-05, "loss": 2.1783, "step": 7345000 }, { "epoch": 21.26, "learning_rate": 3.937275235040756e-05, "loss": 2.1802, "step": 7345500 }, { "epoch": 21.26, "learning_rate": 3.9372028702760284e-05, "loss": 2.1797, "step": 7346000 }, { "epoch": 21.27, "learning_rate": 3.9371305055113006e-05, "loss": 2.1631, "step": 7346500 }, { "epoch": 21.27, "learning_rate": 3.937058140746573e-05, "loss": 2.1934, "step": 7347000 }, { "epoch": 21.27, "learning_rate": 3.936985775981845e-05, "loss": 2.1966, "step": 7347500 }, { "epoch": 21.27, "learning_rate": 3.9369135559466466e-05, "loss": 2.1479, "step": 7348000 }, { "epoch": 21.27, "learning_rate": 3.9368411911819195e-05, "loss": 2.1707, "step": 7348500 }, { "epoch": 21.27, "learning_rate": 3.936768826417192e-05, "loss": 2.1543, "step": 7349000 }, { "epoch": 21.27, "learning_rate": 3.9366964616524646e-05, "loss": 2.1768, "step": 7349500 }, { "epoch": 21.28, "learning_rate": 3.936624096887737e-05, "loss": 2.2039, "step": 7350000 }, { "epoch": 21.28, "learning_rate": 3.936551732123009e-05, "loss": 2.1764, "step": 7350500 }, { "epoch": 21.28, "learning_rate": 3.936479367358281e-05, "loss": 2.2059, "step": 7351000 }, { "epoch": 21.28, "learning_rate": 3.936407147323083e-05, "loss": 2.1905, "step": 7351500 }, { "epoch": 21.28, "learning_rate": 3.936334782558355e-05, "loss": 2.2004, "step": 7352000 }, { "epoch": 21.28, "learning_rate": 3.936262417793627e-05, "loss": 2.19, "step": 7352500 }, { "epoch": 21.28, "learning_rate": 3.9361900530288995e-05, "loss": 2.1772, "step": 7353000 }, { "epoch": 21.29, "learning_rate": 3.936117688264172e-05, "loss": 2.1938, "step": 7353500 }, { "epoch": 21.29, "learning_rate": 3.9360453234994446e-05, "loss": 2.1745, "step": 7354000 }, { "epoch": 21.29, "learning_rate": 3.935972958734717e-05, "loss": 2.1877, "step": 7354500 }, { "epoch": 21.29, "learning_rate": 3.935900593969989e-05, "loss": 2.1442, "step": 7355000 }, { "epoch": 21.29, "learning_rate": 3.9358283739347906e-05, "loss": 2.1824, "step": 7355500 }, { "epoch": 21.29, "learning_rate": 3.935756009170063e-05, "loss": 2.1821, "step": 7356000 }, { "epoch": 21.29, "learning_rate": 3.935683644405336e-05, "loss": 2.1921, "step": 7356500 }, { "epoch": 21.3, "learning_rate": 3.935611279640608e-05, "loss": 2.1854, "step": 7357000 }, { "epoch": 21.3, "learning_rate": 3.9355390596054095e-05, "loss": 2.1697, "step": 7357500 }, { "epoch": 21.3, "learning_rate": 3.9354666948406824e-05, "loss": 2.1799, "step": 7358000 }, { "epoch": 21.3, "learning_rate": 3.9353943300759547e-05, "loss": 2.1927, "step": 7358500 }, { "epoch": 21.3, "learning_rate": 3.935321965311227e-05, "loss": 2.1768, "step": 7359000 }, { "epoch": 21.3, "learning_rate": 3.935249890005558e-05, "loss": 2.1827, "step": 7359500 }, { "epoch": 21.3, "learning_rate": 3.935177669970359e-05, "loss": 2.1646, "step": 7360000 }, { "epoch": 21.31, "learning_rate": 3.935105305205632e-05, "loss": 2.1761, "step": 7360500 }, { "epoch": 21.31, "learning_rate": 3.9350329404409044e-05, "loss": 2.1737, "step": 7361000 }, { "epoch": 21.31, "learning_rate": 3.934960575676177e-05, "loss": 2.1669, "step": 7361500 }, { "epoch": 21.31, "learning_rate": 3.934888210911449e-05, "loss": 2.1957, "step": 7362000 }, { "epoch": 21.31, "learning_rate": 3.9348159908762504e-05, "loss": 2.2002, "step": 7362500 }, { "epoch": 21.31, "learning_rate": 3.934743626111523e-05, "loss": 2.2162, "step": 7363000 }, { "epoch": 21.31, "learning_rate": 3.934671261346795e-05, "loss": 2.2243, "step": 7363500 }, { "epoch": 21.32, "learning_rate": 3.934598896582067e-05, "loss": 2.2011, "step": 7364000 }, { "epoch": 21.32, "learning_rate": 3.93452653181734e-05, "loss": 2.1808, "step": 7364500 }, { "epoch": 21.32, "learning_rate": 3.934454167052612e-05, "loss": 2.1931, "step": 7365000 }, { "epoch": 21.32, "learning_rate": 3.9343818022878845e-05, "loss": 2.2043, "step": 7365500 }, { "epoch": 21.32, "learning_rate": 3.9343094375231574e-05, "loss": 2.1645, "step": 7366000 }, { "epoch": 21.32, "learning_rate": 3.934237217487959e-05, "loss": 2.1788, "step": 7366500 }, { "epoch": 21.32, "learning_rate": 3.934164852723231e-05, "loss": 2.1872, "step": 7367000 }, { "epoch": 21.33, "learning_rate": 3.9340924879585034e-05, "loss": 2.1893, "step": 7367500 }, { "epoch": 21.33, "learning_rate": 3.9340201231937756e-05, "loss": 2.154, "step": 7368000 }, { "epoch": 21.33, "learning_rate": 3.933947903158577e-05, "loss": 2.1713, "step": 7368500 }, { "epoch": 21.33, "learning_rate": 3.9338755383938494e-05, "loss": 2.19, "step": 7369000 }, { "epoch": 21.33, "learning_rate": 3.933803173629122e-05, "loss": 2.1574, "step": 7369500 }, { "epoch": 21.33, "learning_rate": 3.933730953593924e-05, "loss": 2.167, "step": 7370000 }, { "epoch": 21.33, "learning_rate": 3.933658588829196e-05, "loss": 2.189, "step": 7370500 }, { "epoch": 21.34, "learning_rate": 3.933586224064468e-05, "loss": 2.1435, "step": 7371000 }, { "epoch": 21.34, "learning_rate": 3.9335138592997405e-05, "loss": 2.21, "step": 7371500 }, { "epoch": 21.34, "learning_rate": 3.9334414945350134e-05, "loss": 2.1827, "step": 7372000 }, { "epoch": 21.34, "learning_rate": 3.9333691297702856e-05, "loss": 2.1837, "step": 7372500 }, { "epoch": 21.34, "learning_rate": 3.933296765005558e-05, "loss": 2.1621, "step": 7373000 }, { "epoch": 21.34, "learning_rate": 3.93322440024083e-05, "loss": 2.1642, "step": 7373500 }, { "epoch": 21.34, "learning_rate": 3.933152035476102e-05, "loss": 2.1759, "step": 7374000 }, { "epoch": 21.35, "learning_rate": 3.9330796707113745e-05, "loss": 2.1893, "step": 7374500 }, { "epoch": 21.35, "learning_rate": 3.9330073059466474e-05, "loss": 2.1808, "step": 7375000 }, { "epoch": 21.35, "learning_rate": 3.9329349411819196e-05, "loss": 2.1829, "step": 7375500 }, { "epoch": 21.35, "learning_rate": 3.932862576417192e-05, "loss": 2.1685, "step": 7376000 }, { "epoch": 21.35, "learning_rate": 3.9327903563819934e-05, "loss": 2.1903, "step": 7376500 }, { "epoch": 21.35, "learning_rate": 3.9327179916172656e-05, "loss": 2.1887, "step": 7377000 }, { "epoch": 21.35, "learning_rate": 3.932645626852538e-05, "loss": 2.1607, "step": 7377500 }, { "epoch": 21.36, "learning_rate": 3.93257326208781e-05, "loss": 2.179, "step": 7378000 }, { "epoch": 21.36, "learning_rate": 3.932501042052612e-05, "loss": 2.1683, "step": 7378500 }, { "epoch": 21.36, "learning_rate": 3.9324286772878845e-05, "loss": 2.1657, "step": 7379000 }, { "epoch": 21.36, "learning_rate": 3.9323563125231574e-05, "loss": 2.1847, "step": 7379500 }, { "epoch": 21.36, "learning_rate": 3.93228394775843e-05, "loss": 2.1889, "step": 7380000 }, { "epoch": 21.36, "learning_rate": 3.932211582993702e-05, "loss": 2.1722, "step": 7380500 }, { "epoch": 21.36, "learning_rate": 3.932139218228974e-05, "loss": 2.164, "step": 7381000 }, { "epoch": 21.37, "learning_rate": 3.932066853464246e-05, "loss": 2.1855, "step": 7381500 }, { "epoch": 21.37, "learning_rate": 3.9319944886995186e-05, "loss": 2.1724, "step": 7382000 }, { "epoch": 21.37, "learning_rate": 3.931922123934791e-05, "loss": 2.1618, "step": 7382500 }, { "epoch": 21.37, "learning_rate": 3.931849759170063e-05, "loss": 2.1968, "step": 7383000 }, { "epoch": 21.37, "learning_rate": 3.931777539134865e-05, "loss": 2.1908, "step": 7383500 }, { "epoch": 21.37, "learning_rate": 3.931705463829196e-05, "loss": 2.1698, "step": 7384000 }, { "epoch": 21.38, "learning_rate": 3.9316330990644684e-05, "loss": 2.1771, "step": 7384500 }, { "epoch": 21.38, "learning_rate": 3.9315607342997406e-05, "loss": 2.1714, "step": 7385000 }, { "epoch": 21.38, "learning_rate": 3.931488369535013e-05, "loss": 2.1799, "step": 7385500 }, { "epoch": 21.38, "learning_rate": 3.931416004770285e-05, "loss": 2.166, "step": 7386000 }, { "epoch": 21.38, "learning_rate": 3.931343640005557e-05, "loss": 2.1757, "step": 7386500 }, { "epoch": 21.38, "learning_rate": 3.93127127524083e-05, "loss": 2.1987, "step": 7387000 }, { "epoch": 21.38, "learning_rate": 3.9311989104761024e-05, "loss": 2.1875, "step": 7387500 }, { "epoch": 21.39, "learning_rate": 3.931126545711375e-05, "loss": 2.1956, "step": 7388000 }, { "epoch": 21.39, "learning_rate": 3.9310541809466475e-05, "loss": 2.1745, "step": 7388500 }, { "epoch": 21.39, "learning_rate": 3.93098181618192e-05, "loss": 2.1945, "step": 7389000 }, { "epoch": 21.39, "learning_rate": 3.930909451417192e-05, "loss": 2.1807, "step": 7389500 }, { "epoch": 21.39, "learning_rate": 3.930837086652464e-05, "loss": 2.1674, "step": 7390000 }, { "epoch": 21.39, "learning_rate": 3.930764866617266e-05, "loss": 2.1812, "step": 7390500 }, { "epoch": 21.39, "learning_rate": 3.930692501852538e-05, "loss": 2.1649, "step": 7391000 }, { "epoch": 21.4, "learning_rate": 3.93062028181734e-05, "loss": 2.1796, "step": 7391500 }, { "epoch": 21.4, "learning_rate": 3.9305479170526124e-05, "loss": 2.1525, "step": 7392000 }, { "epoch": 21.4, "learning_rate": 3.9304755522878846e-05, "loss": 2.1747, "step": 7392500 }, { "epoch": 21.4, "learning_rate": 3.930403187523157e-05, "loss": 2.1768, "step": 7393000 }, { "epoch": 21.4, "learning_rate": 3.930330822758429e-05, "loss": 2.1695, "step": 7393500 }, { "epoch": 21.4, "learning_rate": 3.930258457993701e-05, "loss": 2.1586, "step": 7394000 }, { "epoch": 21.4, "learning_rate": 3.930186093228974e-05, "loss": 2.1701, "step": 7394500 }, { "epoch": 21.41, "learning_rate": 3.9301137284642464e-05, "loss": 2.1704, "step": 7395000 }, { "epoch": 21.41, "learning_rate": 3.9300413636995186e-05, "loss": 2.1689, "step": 7395500 }, { "epoch": 21.41, "learning_rate": 3.929968998934791e-05, "loss": 2.1827, "step": 7396000 }, { "epoch": 21.41, "learning_rate": 3.929896634170063e-05, "loss": 2.1671, "step": 7396500 }, { "epoch": 21.41, "learning_rate": 3.929824269405335e-05, "loss": 2.1846, "step": 7397000 }, { "epoch": 21.41, "learning_rate": 3.9297519046406075e-05, "loss": 2.183, "step": 7397500 }, { "epoch": 21.41, "learning_rate": 3.9296795398758804e-05, "loss": 2.1863, "step": 7398000 }, { "epoch": 21.42, "learning_rate": 3.9296071751111527e-05, "loss": 2.1783, "step": 7398500 }, { "epoch": 21.42, "learning_rate": 3.929534810346425e-05, "loss": 2.181, "step": 7399000 }, { "epoch": 21.42, "learning_rate": 3.929462445581697e-05, "loss": 2.1829, "step": 7399500 }, { "epoch": 21.42, "learning_rate": 3.929390080816969e-05, "loss": 2.1929, "step": 7400000 }, { "epoch": 21.42, "learning_rate": 3.9293177160522415e-05, "loss": 2.1762, "step": 7400500 }, { "epoch": 21.42, "learning_rate": 3.929245351287514e-05, "loss": 2.1899, "step": 7401000 }, { "epoch": 21.42, "learning_rate": 3.929173131252316e-05, "loss": 2.1811, "step": 7401500 }, { "epoch": 21.43, "learning_rate": 3.929100766487588e-05, "loss": 2.1795, "step": 7402000 }, { "epoch": 21.43, "learning_rate": 3.9290285464523905e-05, "loss": 2.1811, "step": 7402500 }, { "epoch": 21.43, "learning_rate": 3.928956181687663e-05, "loss": 2.1707, "step": 7403000 }, { "epoch": 21.43, "learning_rate": 3.928883816922935e-05, "loss": 2.2064, "step": 7403500 }, { "epoch": 21.43, "learning_rate": 3.928811452158207e-05, "loss": 2.1536, "step": 7404000 }, { "epoch": 21.43, "learning_rate": 3.9287390873934794e-05, "loss": 2.2146, "step": 7404500 }, { "epoch": 21.43, "learning_rate": 3.92866701208781e-05, "loss": 2.1962, "step": 7405000 }, { "epoch": 21.44, "learning_rate": 3.9285946473230825e-05, "loss": 2.1564, "step": 7405500 }, { "epoch": 21.44, "learning_rate": 3.9285222825583554e-05, "loss": 2.1938, "step": 7406000 }, { "epoch": 21.44, "learning_rate": 3.9284499177936276e-05, "loss": 2.1974, "step": 7406500 }, { "epoch": 21.44, "learning_rate": 3.9283775530289e-05, "loss": 2.1517, "step": 7407000 }, { "epoch": 21.44, "learning_rate": 3.928305188264172e-05, "loss": 2.2067, "step": 7407500 }, { "epoch": 21.44, "learning_rate": 3.928232823499444e-05, "loss": 2.1722, "step": 7408000 }, { "epoch": 21.44, "learning_rate": 3.9281604587347165e-05, "loss": 2.1874, "step": 7408500 }, { "epoch": 21.45, "learning_rate": 3.9280880939699894e-05, "loss": 2.1932, "step": 7409000 }, { "epoch": 21.45, "learning_rate": 3.9280157292052616e-05, "loss": 2.1972, "step": 7409500 }, { "epoch": 21.45, "learning_rate": 3.927943364440534e-05, "loss": 2.172, "step": 7410000 }, { "epoch": 21.45, "learning_rate": 3.9278711444053354e-05, "loss": 2.1714, "step": 7410500 }, { "epoch": 21.45, "learning_rate": 3.9277987796406076e-05, "loss": 2.1979, "step": 7411000 }, { "epoch": 21.45, "learning_rate": 3.9277264148758805e-05, "loss": 2.2076, "step": 7411500 }, { "epoch": 21.45, "learning_rate": 3.927654050111153e-05, "loss": 2.1689, "step": 7412000 }, { "epoch": 21.46, "learning_rate": 3.927581685346425e-05, "loss": 2.1666, "step": 7412500 }, { "epoch": 21.46, "learning_rate": 3.9275094653112265e-05, "loss": 2.1898, "step": 7413000 }, { "epoch": 21.46, "learning_rate": 3.927437100546499e-05, "loss": 2.1852, "step": 7413500 }, { "epoch": 21.46, "learning_rate": 3.927364735781771e-05, "loss": 2.1855, "step": 7414000 }, { "epoch": 21.46, "learning_rate": 3.927292371017043e-05, "loss": 2.171, "step": 7414500 }, { "epoch": 21.46, "learning_rate": 3.9272200062523154e-05, "loss": 2.1626, "step": 7415000 }, { "epoch": 21.46, "learning_rate": 3.9271476414875876e-05, "loss": 2.1961, "step": 7415500 }, { "epoch": 21.47, "learning_rate": 3.92707542145239e-05, "loss": 2.1506, "step": 7416000 }, { "epoch": 21.47, "learning_rate": 3.927003056687663e-05, "loss": 2.2037, "step": 7416500 }, { "epoch": 21.47, "learning_rate": 3.926930691922935e-05, "loss": 2.1585, "step": 7417000 }, { "epoch": 21.47, "learning_rate": 3.9268584718877365e-05, "loss": 2.1756, "step": 7417500 }, { "epoch": 21.47, "learning_rate": 3.926786107123009e-05, "loss": 2.1634, "step": 7418000 }, { "epoch": 21.47, "learning_rate": 3.926713742358281e-05, "loss": 2.1433, "step": 7418500 }, { "epoch": 21.47, "learning_rate": 3.926641377593553e-05, "loss": 2.1699, "step": 7419000 }, { "epoch": 21.48, "learning_rate": 3.9265691575583554e-05, "loss": 2.168, "step": 7419500 }, { "epoch": 21.48, "learning_rate": 3.926496792793628e-05, "loss": 2.2164, "step": 7420000 }, { "epoch": 21.48, "learning_rate": 3.9264244280289e-05, "loss": 2.1976, "step": 7420500 }, { "epoch": 21.48, "learning_rate": 3.926352063264172e-05, "loss": 2.1949, "step": 7421000 }, { "epoch": 21.48, "learning_rate": 3.926279843228974e-05, "loss": 2.1881, "step": 7421500 }, { "epoch": 21.48, "learning_rate": 3.926207478464246e-05, "loss": 2.183, "step": 7422000 }, { "epoch": 21.49, "learning_rate": 3.926135113699518e-05, "loss": 2.21, "step": 7422500 }, { "epoch": 21.49, "learning_rate": 3.9260627489347903e-05, "loss": 2.1824, "step": 7423000 }, { "epoch": 21.49, "learning_rate": 3.925990384170063e-05, "loss": 2.1955, "step": 7423500 }, { "epoch": 21.49, "learning_rate": 3.9259180194053355e-05, "loss": 2.1647, "step": 7424000 }, { "epoch": 21.49, "learning_rate": 3.9258456546406084e-05, "loss": 2.173, "step": 7424500 }, { "epoch": 21.49, "learning_rate": 3.92577343460541e-05, "loss": 2.197, "step": 7425000 }, { "epoch": 21.49, "learning_rate": 3.9257012145702115e-05, "loss": 2.1935, "step": 7425500 }, { "epoch": 21.5, "learning_rate": 3.925628849805484e-05, "loss": 2.1915, "step": 7426000 }, { "epoch": 21.5, "learning_rate": 3.925556485040756e-05, "loss": 2.1668, "step": 7426500 }, { "epoch": 21.5, "learning_rate": 3.925484120276028e-05, "loss": 2.1476, "step": 7427000 }, { "epoch": 21.5, "learning_rate": 3.9254117555113004e-05, "loss": 2.2069, "step": 7427500 }, { "epoch": 21.5, "learning_rate": 3.925339390746573e-05, "loss": 2.2022, "step": 7428000 }, { "epoch": 21.5, "learning_rate": 3.9252670259818455e-05, "loss": 2.1494, "step": 7428500 }, { "epoch": 21.5, "learning_rate": 3.925194661217118e-05, "loss": 2.1633, "step": 7429000 }, { "epoch": 21.51, "learning_rate": 3.92512229645239e-05, "loss": 2.1751, "step": 7429500 }, { "epoch": 21.51, "learning_rate": 3.925049931687662e-05, "loss": 2.1869, "step": 7430000 }, { "epoch": 21.51, "learning_rate": 3.9249775669229344e-05, "loss": 2.1915, "step": 7430500 }, { "epoch": 21.51, "learning_rate": 3.9249052021582066e-05, "loss": 2.1894, "step": 7431000 }, { "epoch": 21.51, "learning_rate": 3.9248328373934795e-05, "loss": 2.1581, "step": 7431500 }, { "epoch": 21.51, "learning_rate": 3.924760472628752e-05, "loss": 2.1725, "step": 7432000 }, { "epoch": 21.51, "learning_rate": 3.924688107864024e-05, "loss": 2.2109, "step": 7432500 }, { "epoch": 21.52, "learning_rate": 3.924615743099296e-05, "loss": 2.2039, "step": 7433000 }, { "epoch": 21.52, "learning_rate": 3.9245433783345684e-05, "loss": 2.1859, "step": 7433500 }, { "epoch": 21.52, "learning_rate": 3.9244710135698406e-05, "loss": 2.1771, "step": 7434000 }, { "epoch": 21.52, "learning_rate": 3.9243986488051135e-05, "loss": 2.2055, "step": 7434500 }, { "epoch": 21.52, "learning_rate": 3.924326284040386e-05, "loss": 2.1934, "step": 7435000 }, { "epoch": 21.52, "learning_rate": 3.924253919275658e-05, "loss": 2.1834, "step": 7435500 }, { "epoch": 21.52, "learning_rate": 3.92418155451093e-05, "loss": 2.1704, "step": 7436000 }, { "epoch": 21.53, "learning_rate": 3.924109334475732e-05, "loss": 2.1682, "step": 7436500 }, { "epoch": 21.53, "learning_rate": 3.924036969711004e-05, "loss": 2.1772, "step": 7437000 }, { "epoch": 21.53, "learning_rate": 3.923964604946276e-05, "loss": 2.1624, "step": 7437500 }, { "epoch": 21.53, "learning_rate": 3.9238922401815484e-05, "loss": 2.1864, "step": 7438000 }, { "epoch": 21.53, "learning_rate": 3.923820020146351e-05, "loss": 2.179, "step": 7438500 }, { "epoch": 21.53, "learning_rate": 3.9237476553816236e-05, "loss": 2.1987, "step": 7439000 }, { "epoch": 21.53, "learning_rate": 3.923675290616896e-05, "loss": 2.2051, "step": 7439500 }, { "epoch": 21.54, "learning_rate": 3.923602925852168e-05, "loss": 2.1821, "step": 7440000 }, { "epoch": 21.54, "learning_rate": 3.92353056108744e-05, "loss": 2.1852, "step": 7440500 }, { "epoch": 21.54, "learning_rate": 3.9234581963227125e-05, "loss": 2.1968, "step": 7441000 }, { "epoch": 21.54, "learning_rate": 3.923385831557985e-05, "loss": 2.1977, "step": 7441500 }, { "epoch": 21.54, "learning_rate": 3.923313466793257e-05, "loss": 2.2089, "step": 7442000 }, { "epoch": 21.54, "learning_rate": 3.923241102028529e-05, "loss": 2.1745, "step": 7442500 }, { "epoch": 21.54, "learning_rate": 3.923168881993331e-05, "loss": 2.178, "step": 7443000 }, { "epoch": 21.55, "learning_rate": 3.9230965172286036e-05, "loss": 2.1747, "step": 7443500 }, { "epoch": 21.55, "learning_rate": 3.923024152463876e-05, "loss": 2.1972, "step": 7444000 }, { "epoch": 21.55, "learning_rate": 3.922951787699148e-05, "loss": 2.1925, "step": 7444500 }, { "epoch": 21.55, "learning_rate": 3.9228795676639496e-05, "loss": 2.1751, "step": 7445000 }, { "epoch": 21.55, "learning_rate": 3.922807202899222e-05, "loss": 2.1494, "step": 7445500 }, { "epoch": 21.55, "learning_rate": 3.922734838134495e-05, "loss": 2.1624, "step": 7446000 }, { "epoch": 21.55, "learning_rate": 3.922662762828826e-05, "loss": 2.163, "step": 7446500 }, { "epoch": 21.56, "learning_rate": 3.9225903980640985e-05, "loss": 2.179, "step": 7447000 }, { "epoch": 21.56, "learning_rate": 3.9225181780289e-05, "loss": 2.2142, "step": 7447500 }, { "epoch": 21.56, "learning_rate": 3.922445813264172e-05, "loss": 2.1795, "step": 7448000 }, { "epoch": 21.56, "learning_rate": 3.9223734484994445e-05, "loss": 2.1742, "step": 7448500 }, { "epoch": 21.56, "learning_rate": 3.922301083734717e-05, "loss": 2.1664, "step": 7449000 }, { "epoch": 21.56, "learning_rate": 3.922228718969989e-05, "loss": 2.1848, "step": 7449500 }, { "epoch": 21.56, "learning_rate": 3.922156354205261e-05, "loss": 2.1794, "step": 7450000 }, { "epoch": 21.57, "learning_rate": 3.9220841341700634e-05, "loss": 2.199, "step": 7450500 }, { "epoch": 21.57, "learning_rate": 3.9220117694053356e-05, "loss": 2.1618, "step": 7451000 }, { "epoch": 21.57, "learning_rate": 3.921939404640608e-05, "loss": 2.1731, "step": 7451500 }, { "epoch": 21.57, "learning_rate": 3.92186703987588e-05, "loss": 2.1871, "step": 7452000 }, { "epoch": 21.57, "learning_rate": 3.921794675111152e-05, "loss": 2.1632, "step": 7452500 }, { "epoch": 21.57, "learning_rate": 3.9217223103464245e-05, "loss": 2.1524, "step": 7453000 }, { "epoch": 21.57, "learning_rate": 3.921649945581697e-05, "loss": 2.1799, "step": 7453500 }, { "epoch": 21.58, "learning_rate": 3.9215775808169696e-05, "loss": 2.2059, "step": 7454000 }, { "epoch": 21.58, "learning_rate": 3.921505216052242e-05, "loss": 2.178, "step": 7454500 }, { "epoch": 21.58, "learning_rate": 3.921432851287514e-05, "loss": 2.1574, "step": 7455000 }, { "epoch": 21.58, "learning_rate": 3.921360486522786e-05, "loss": 2.1919, "step": 7455500 }, { "epoch": 21.58, "learning_rate": 3.9212881217580585e-05, "loss": 2.1674, "step": 7456000 }, { "epoch": 21.58, "learning_rate": 3.9212157569933314e-05, "loss": 2.188, "step": 7456500 }, { "epoch": 21.58, "learning_rate": 3.921143392228604e-05, "loss": 2.191, "step": 7457000 }, { "epoch": 21.59, "learning_rate": 3.921071027463876e-05, "loss": 2.1843, "step": 7457500 }, { "epoch": 21.59, "learning_rate": 3.920998662699148e-05, "loss": 2.1864, "step": 7458000 }, { "epoch": 21.59, "learning_rate": 3.92092629793442e-05, "loss": 2.2013, "step": 7458500 }, { "epoch": 21.59, "learning_rate": 3.920854222628751e-05, "loss": 2.2088, "step": 7459000 }, { "epoch": 21.59, "learning_rate": 3.9207820025935535e-05, "loss": 2.182, "step": 7459500 }, { "epoch": 21.59, "learning_rate": 3.920709637828826e-05, "loss": 2.1812, "step": 7460000 }, { "epoch": 21.6, "learning_rate": 3.920637273064098e-05, "loss": 2.2006, "step": 7460500 }, { "epoch": 21.6, "learning_rate": 3.92056490829937e-05, "loss": 2.1763, "step": 7461000 }, { "epoch": 21.6, "learning_rate": 3.920492543534643e-05, "loss": 2.1878, "step": 7461500 }, { "epoch": 21.6, "learning_rate": 3.9204203234994446e-05, "loss": 2.1814, "step": 7462000 }, { "epoch": 21.6, "learning_rate": 3.920347958734717e-05, "loss": 2.1871, "step": 7462500 }, { "epoch": 21.6, "learning_rate": 3.920275593969989e-05, "loss": 2.1993, "step": 7463000 }, { "epoch": 21.6, "learning_rate": 3.920203229205261e-05, "loss": 2.2042, "step": 7463500 }, { "epoch": 21.61, "learning_rate": 3.9201308644405335e-05, "loss": 2.1742, "step": 7464000 }, { "epoch": 21.61, "learning_rate": 3.9200584996758064e-05, "loss": 2.1776, "step": 7464500 }, { "epoch": 21.61, "learning_rate": 3.9199861349110786e-05, "loss": 2.1785, "step": 7465000 }, { "epoch": 21.61, "learning_rate": 3.919913770146351e-05, "loss": 2.1707, "step": 7465500 }, { "epoch": 21.61, "learning_rate": 3.919841405381623e-05, "loss": 2.1471, "step": 7466000 }, { "epoch": 21.61, "learning_rate": 3.919769040616895e-05, "loss": 2.1765, "step": 7466500 }, { "epoch": 21.61, "learning_rate": 3.9196966758521675e-05, "loss": 2.1801, "step": 7467000 }, { "epoch": 21.62, "learning_rate": 3.91962431108744e-05, "loss": 2.1702, "step": 7467500 }, { "epoch": 21.62, "learning_rate": 3.919551946322712e-05, "loss": 2.1616, "step": 7468000 }, { "epoch": 21.62, "learning_rate": 3.919479581557985e-05, "loss": 2.1438, "step": 7468500 }, { "epoch": 21.62, "learning_rate": 3.9194073615227864e-05, "loss": 2.179, "step": 7469000 }, { "epoch": 21.62, "learning_rate": 3.9193349967580586e-05, "loss": 2.171, "step": 7469500 }, { "epoch": 21.62, "learning_rate": 3.9192626319933315e-05, "loss": 2.1588, "step": 7470000 }, { "epoch": 21.62, "learning_rate": 3.919190411958133e-05, "loss": 2.1717, "step": 7470500 }, { "epoch": 21.63, "learning_rate": 3.919118047193405e-05, "loss": 2.1756, "step": 7471000 }, { "epoch": 21.63, "learning_rate": 3.9190456824286775e-05, "loss": 2.1669, "step": 7471500 }, { "epoch": 21.63, "learning_rate": 3.918973462393479e-05, "loss": 2.156, "step": 7472000 }, { "epoch": 21.63, "learning_rate": 3.918901097628751e-05, "loss": 2.1952, "step": 7472500 }, { "epoch": 21.63, "learning_rate": 3.9188288775935535e-05, "loss": 2.1767, "step": 7473000 }, { "epoch": 21.63, "learning_rate": 3.918756512828826e-05, "loss": 2.1598, "step": 7473500 }, { "epoch": 21.63, "learning_rate": 3.918684148064098e-05, "loss": 2.1992, "step": 7474000 }, { "epoch": 21.64, "learning_rate": 3.91861178329937e-05, "loss": 2.1774, "step": 7474500 }, { "epoch": 21.64, "learning_rate": 3.9185394185346424e-05, "loss": 2.164, "step": 7475000 }, { "epoch": 21.64, "learning_rate": 3.9184670537699146e-05, "loss": 2.1937, "step": 7475500 }, { "epoch": 21.64, "learning_rate": 3.918394689005187e-05, "loss": 2.172, "step": 7476000 }, { "epoch": 21.64, "learning_rate": 3.91832232424046e-05, "loss": 2.1713, "step": 7476500 }, { "epoch": 21.64, "learning_rate": 3.918249959475732e-05, "loss": 2.1821, "step": 7477000 }, { "epoch": 21.64, "learning_rate": 3.918177594711004e-05, "loss": 2.1878, "step": 7477500 }, { "epoch": 21.65, "learning_rate": 3.9181052299462764e-05, "loss": 2.1677, "step": 7478000 }, { "epoch": 21.65, "learning_rate": 3.918032865181549e-05, "loss": 2.1788, "step": 7478500 }, { "epoch": 21.65, "learning_rate": 3.9179605004168216e-05, "loss": 2.173, "step": 7479000 }, { "epoch": 21.65, "learning_rate": 3.917888135652094e-05, "loss": 2.158, "step": 7479500 }, { "epoch": 21.65, "learning_rate": 3.917815770887366e-05, "loss": 2.1874, "step": 7480000 }, { "epoch": 21.65, "learning_rate": 3.917743406122638e-05, "loss": 2.162, "step": 7480500 }, { "epoch": 21.65, "learning_rate": 3.9176710413579105e-05, "loss": 2.1589, "step": 7481000 }, { "epoch": 21.66, "learning_rate": 3.917598821322712e-05, "loss": 2.1896, "step": 7481500 }, { "epoch": 21.66, "learning_rate": 3.917526456557984e-05, "loss": 2.1629, "step": 7482000 }, { "epoch": 21.66, "learning_rate": 3.9174540917932565e-05, "loss": 2.1803, "step": 7482500 }, { "epoch": 21.66, "learning_rate": 3.917381727028529e-05, "loss": 2.1863, "step": 7483000 }, { "epoch": 21.66, "learning_rate": 3.9173095069933316e-05, "loss": 2.1846, "step": 7483500 }, { "epoch": 21.66, "learning_rate": 3.917237142228604e-05, "loss": 2.1606, "step": 7484000 }, { "epoch": 21.66, "learning_rate": 3.917164777463876e-05, "loss": 2.2064, "step": 7484500 }, { "epoch": 21.67, "learning_rate": 3.917092412699148e-05, "loss": 2.1954, "step": 7485000 }, { "epoch": 21.67, "learning_rate": 3.9170200479344205e-05, "loss": 2.2058, "step": 7485500 }, { "epoch": 21.67, "learning_rate": 3.916947683169693e-05, "loss": 2.1758, "step": 7486000 }, { "epoch": 21.67, "learning_rate": 3.916875318404965e-05, "loss": 2.1934, "step": 7486500 }, { "epoch": 21.67, "learning_rate": 3.916802953640237e-05, "loss": 2.1885, "step": 7487000 }, { "epoch": 21.67, "learning_rate": 3.9167307336050394e-05, "loss": 2.1658, "step": 7487500 }, { "epoch": 21.67, "learning_rate": 3.916658513569841e-05, "loss": 2.1712, "step": 7488000 }, { "epoch": 21.68, "learning_rate": 3.916586148805113e-05, "loss": 2.1918, "step": 7488500 }, { "epoch": 21.68, "learning_rate": 3.9165137840403854e-05, "loss": 2.1972, "step": 7489000 }, { "epoch": 21.68, "learning_rate": 3.916441564005187e-05, "loss": 2.1542, "step": 7489500 }, { "epoch": 21.68, "learning_rate": 3.916369199240459e-05, "loss": 2.1748, "step": 7490000 }, { "epoch": 21.68, "learning_rate": 3.9162969792052614e-05, "loss": 2.1849, "step": 7490500 }, { "epoch": 21.68, "learning_rate": 3.9162246144405336e-05, "loss": 2.1759, "step": 7491000 }, { "epoch": 21.68, "learning_rate": 3.9161522496758065e-05, "loss": 2.2153, "step": 7491500 }, { "epoch": 21.69, "learning_rate": 3.916079884911079e-05, "loss": 2.1853, "step": 7492000 }, { "epoch": 21.69, "learning_rate": 3.916007520146351e-05, "loss": 2.1965, "step": 7492500 }, { "epoch": 21.69, "learning_rate": 3.915935155381623e-05, "loss": 2.1753, "step": 7493000 }, { "epoch": 21.69, "learning_rate": 3.9158627906168954e-05, "loss": 2.1816, "step": 7493500 }, { "epoch": 21.69, "learning_rate": 3.9157904258521677e-05, "loss": 2.1816, "step": 7494000 }, { "epoch": 21.69, "learning_rate": 3.91571806108744e-05, "loss": 2.2008, "step": 7494500 }, { "epoch": 21.69, "learning_rate": 3.915645696322712e-05, "loss": 2.1804, "step": 7495000 }, { "epoch": 21.7, "learning_rate": 3.915573331557984e-05, "loss": 2.1724, "step": 7495500 }, { "epoch": 21.7, "learning_rate": 3.9155011115227866e-05, "loss": 2.1961, "step": 7496000 }, { "epoch": 21.7, "learning_rate": 3.915428746758059e-05, "loss": 2.1982, "step": 7496500 }, { "epoch": 21.7, "learning_rate": 3.915356381993331e-05, "loss": 2.1869, "step": 7497000 }, { "epoch": 21.7, "learning_rate": 3.915284017228603e-05, "loss": 2.1745, "step": 7497500 }, { "epoch": 21.7, "learning_rate": 3.9152116524638754e-05, "loss": 2.1755, "step": 7498000 }, { "epoch": 21.71, "learning_rate": 3.9151392876991483e-05, "loss": 2.1728, "step": 7498500 }, { "epoch": 21.71, "learning_rate": 3.9150669229344206e-05, "loss": 2.1969, "step": 7499000 }, { "epoch": 21.71, "learning_rate": 3.914994558169693e-05, "loss": 2.1751, "step": 7499500 }, { "epoch": 21.71, "learning_rate": 3.914922193404965e-05, "loss": 2.1822, "step": 7500000 }, { "epoch": 21.71, "learning_rate": 3.914849828640237e-05, "loss": 2.1737, "step": 7500500 }, { "epoch": 21.71, "learning_rate": 3.9147774638755095e-05, "loss": 2.1687, "step": 7501000 }, { "epoch": 21.71, "learning_rate": 3.914705099110782e-05, "loss": 2.1637, "step": 7501500 }, { "epoch": 21.72, "learning_rate": 3.9146327343460546e-05, "loss": 2.1725, "step": 7502000 }, { "epoch": 21.72, "learning_rate": 3.914560369581327e-05, "loss": 2.1874, "step": 7502500 }, { "epoch": 21.72, "learning_rate": 3.914488004816599e-05, "loss": 2.1632, "step": 7503000 }, { "epoch": 21.72, "learning_rate": 3.9144157847814006e-05, "loss": 2.1687, "step": 7503500 }, { "epoch": 21.72, "learning_rate": 3.914343564746202e-05, "loss": 2.1903, "step": 7504000 }, { "epoch": 21.72, "learning_rate": 3.9142711999814744e-05, "loss": 2.1855, "step": 7504500 }, { "epoch": 21.72, "learning_rate": 3.9141989799462766e-05, "loss": 2.1708, "step": 7505000 }, { "epoch": 21.73, "learning_rate": 3.914126615181549e-05, "loss": 2.1862, "step": 7505500 }, { "epoch": 21.73, "learning_rate": 3.914054250416822e-05, "loss": 2.1881, "step": 7506000 }, { "epoch": 21.73, "learning_rate": 3.913981885652094e-05, "loss": 2.1584, "step": 7506500 }, { "epoch": 21.73, "learning_rate": 3.913909520887366e-05, "loss": 2.186, "step": 7507000 }, { "epoch": 21.73, "learning_rate": 3.9138371561226384e-05, "loss": 2.1854, "step": 7507500 }, { "epoch": 21.73, "learning_rate": 3.9137647913579106e-05, "loss": 2.2044, "step": 7508000 }, { "epoch": 21.73, "learning_rate": 3.913692426593183e-05, "loss": 2.1793, "step": 7508500 }, { "epoch": 21.74, "learning_rate": 3.913620061828455e-05, "loss": 2.1838, "step": 7509000 }, { "epoch": 21.74, "learning_rate": 3.913547697063727e-05, "loss": 2.1703, "step": 7509500 }, { "epoch": 21.74, "learning_rate": 3.9134753322989995e-05, "loss": 2.1703, "step": 7510000 }, { "epoch": 21.74, "learning_rate": 3.913402967534272e-05, "loss": 2.189, "step": 7510500 }, { "epoch": 21.74, "learning_rate": 3.9133306027695446e-05, "loss": 2.1984, "step": 7511000 }, { "epoch": 21.74, "learning_rate": 3.913258238004817e-05, "loss": 2.1858, "step": 7511500 }, { "epoch": 21.74, "learning_rate": 3.9131860179696184e-05, "loss": 2.1745, "step": 7512000 }, { "epoch": 21.75, "learning_rate": 3.9131136532048906e-05, "loss": 2.1422, "step": 7512500 }, { "epoch": 21.75, "learning_rate": 3.913041288440163e-05, "loss": 2.1975, "step": 7513000 }, { "epoch": 21.75, "learning_rate": 3.912968923675436e-05, "loss": 2.1808, "step": 7513500 }, { "epoch": 21.75, "learning_rate": 3.912896703640237e-05, "loss": 2.1855, "step": 7514000 }, { "epoch": 21.75, "learning_rate": 3.9128243388755095e-05, "loss": 2.18, "step": 7514500 }, { "epoch": 21.75, "learning_rate": 3.912751974110782e-05, "loss": 2.1831, "step": 7515000 }, { "epoch": 21.75, "learning_rate": 3.912679609346055e-05, "loss": 2.198, "step": 7515500 }, { "epoch": 21.76, "learning_rate": 3.912607244581327e-05, "loss": 2.1494, "step": 7516000 }, { "epoch": 21.76, "learning_rate": 3.9125350245461284e-05, "loss": 2.1882, "step": 7516500 }, { "epoch": 21.76, "learning_rate": 3.912462659781401e-05, "loss": 2.1999, "step": 7517000 }, { "epoch": 21.76, "learning_rate": 3.912390439746202e-05, "loss": 2.1839, "step": 7517500 }, { "epoch": 21.76, "learning_rate": 3.9123180749814744e-05, "loss": 2.178, "step": 7518000 }, { "epoch": 21.76, "learning_rate": 3.9122457102167474e-05, "loss": 2.1761, "step": 7518500 }, { "epoch": 21.76, "learning_rate": 3.9121733454520196e-05, "loss": 2.1891, "step": 7519000 }, { "epoch": 21.77, "learning_rate": 3.912100980687292e-05, "loss": 2.213, "step": 7519500 }, { "epoch": 21.77, "learning_rate": 3.912028615922564e-05, "loss": 2.1675, "step": 7520000 }, { "epoch": 21.77, "learning_rate": 3.911956251157836e-05, "loss": 2.1744, "step": 7520500 }, { "epoch": 21.77, "learning_rate": 3.9118840311226385e-05, "loss": 2.1674, "step": 7521000 }, { "epoch": 21.77, "learning_rate": 3.911811666357911e-05, "loss": 2.1704, "step": 7521500 }, { "epoch": 21.77, "learning_rate": 3.911739301593183e-05, "loss": 2.1946, "step": 7522000 }, { "epoch": 21.77, "learning_rate": 3.911666936828455e-05, "loss": 2.194, "step": 7522500 }, { "epoch": 21.78, "learning_rate": 3.9115945720637274e-05, "loss": 2.1863, "step": 7523000 }, { "epoch": 21.78, "learning_rate": 3.9115222072989996e-05, "loss": 2.1545, "step": 7523500 }, { "epoch": 21.78, "learning_rate": 3.9114498425342725e-05, "loss": 2.1872, "step": 7524000 }, { "epoch": 21.78, "learning_rate": 3.911377477769545e-05, "loss": 2.1743, "step": 7524500 }, { "epoch": 21.78, "learning_rate": 3.9113054024638756e-05, "loss": 2.1915, "step": 7525000 }, { "epoch": 21.78, "learning_rate": 3.911233037699148e-05, "loss": 2.207, "step": 7525500 }, { "epoch": 21.78, "learning_rate": 3.91116067293442e-05, "loss": 2.1635, "step": 7526000 }, { "epoch": 21.79, "learning_rate": 3.911088308169692e-05, "loss": 2.1887, "step": 7526500 }, { "epoch": 21.79, "learning_rate": 3.9110160881344945e-05, "loss": 2.1717, "step": 7527000 }, { "epoch": 21.79, "learning_rate": 3.910943723369767e-05, "loss": 2.1795, "step": 7527500 }, { "epoch": 21.79, "learning_rate": 3.910871358605039e-05, "loss": 2.1882, "step": 7528000 }, { "epoch": 21.79, "learning_rate": 3.910798993840312e-05, "loss": 2.1758, "step": 7528500 }, { "epoch": 21.79, "learning_rate": 3.910726629075584e-05, "loss": 2.1844, "step": 7529000 }, { "epoch": 21.79, "learning_rate": 3.910654264310856e-05, "loss": 2.1769, "step": 7529500 }, { "epoch": 21.8, "learning_rate": 3.9105818995461285e-05, "loss": 2.1757, "step": 7530000 }, { "epoch": 21.8, "learning_rate": 3.910509534781401e-05, "loss": 2.166, "step": 7530500 }, { "epoch": 21.8, "learning_rate": 3.910437170016673e-05, "loss": 2.1631, "step": 7531000 }, { "epoch": 21.8, "learning_rate": 3.910364805251945e-05, "loss": 2.1754, "step": 7531500 }, { "epoch": 21.8, "learning_rate": 3.9102924404872174e-05, "loss": 2.1829, "step": 7532000 }, { "epoch": 21.8, "learning_rate": 3.9102200757224896e-05, "loss": 2.1779, "step": 7532500 }, { "epoch": 21.8, "learning_rate": 3.9101477109577625e-05, "loss": 2.2018, "step": 7533000 }, { "epoch": 21.81, "learning_rate": 3.910075346193035e-05, "loss": 2.1823, "step": 7533500 }, { "epoch": 21.81, "learning_rate": 3.910002981428307e-05, "loss": 2.1655, "step": 7534000 }, { "epoch": 21.81, "learning_rate": 3.909930616663579e-05, "loss": 2.1836, "step": 7534500 }, { "epoch": 21.81, "learning_rate": 3.9098582518988514e-05, "loss": 2.1751, "step": 7535000 }, { "epoch": 21.81, "learning_rate": 3.9097858871341243e-05, "loss": 2.1929, "step": 7535500 }, { "epoch": 21.81, "learning_rate": 3.909713667098926e-05, "loss": 2.182, "step": 7536000 }, { "epoch": 21.82, "learning_rate": 3.909641302334198e-05, "loss": 2.1838, "step": 7536500 }, { "epoch": 21.82, "learning_rate": 3.909569082299e-05, "loss": 2.1738, "step": 7537000 }, { "epoch": 21.82, "learning_rate": 3.9094967175342726e-05, "loss": 2.1727, "step": 7537500 }, { "epoch": 21.82, "learning_rate": 3.909424352769545e-05, "loss": 2.1715, "step": 7538000 }, { "epoch": 21.82, "learning_rate": 3.909351988004817e-05, "loss": 2.168, "step": 7538500 }, { "epoch": 21.82, "learning_rate": 3.909279623240089e-05, "loss": 2.1789, "step": 7539000 }, { "epoch": 21.82, "learning_rate": 3.9092072584753615e-05, "loss": 2.1871, "step": 7539500 }, { "epoch": 21.83, "learning_rate": 3.909134893710634e-05, "loss": 2.1779, "step": 7540000 }, { "epoch": 21.83, "learning_rate": 3.909062528945906e-05, "loss": 2.1981, "step": 7540500 }, { "epoch": 21.83, "learning_rate": 3.908990164181178e-05, "loss": 2.1884, "step": 7541000 }, { "epoch": 21.83, "learning_rate": 3.90891794414598e-05, "loss": 2.1672, "step": 7541500 }, { "epoch": 21.83, "learning_rate": 3.9088455793812526e-05, "loss": 2.1553, "step": 7542000 }, { "epoch": 21.83, "learning_rate": 3.908773214616525e-05, "loss": 2.1748, "step": 7542500 }, { "epoch": 21.83, "learning_rate": 3.908700849851798e-05, "loss": 2.1748, "step": 7543000 }, { "epoch": 21.84, "learning_rate": 3.90862848508707e-05, "loss": 2.1851, "step": 7543500 }, { "epoch": 21.84, "learning_rate": 3.908556120322342e-05, "loss": 2.2001, "step": 7544000 }, { "epoch": 21.84, "learning_rate": 3.908483900287144e-05, "loss": 2.1648, "step": 7544500 }, { "epoch": 21.84, "learning_rate": 3.908411535522416e-05, "loss": 2.1833, "step": 7545000 }, { "epoch": 21.84, "learning_rate": 3.908339170757688e-05, "loss": 2.1634, "step": 7545500 }, { "epoch": 21.84, "learning_rate": 3.9082668059929604e-05, "loss": 2.2034, "step": 7546000 }, { "epoch": 21.84, "learning_rate": 3.9081944412282326e-05, "loss": 2.1534, "step": 7546500 }, { "epoch": 21.85, "learning_rate": 3.908122076463505e-05, "loss": 2.2166, "step": 7547000 }, { "epoch": 21.85, "learning_rate": 3.908049711698778e-05, "loss": 2.1725, "step": 7547500 }, { "epoch": 21.85, "learning_rate": 3.907977491663579e-05, "loss": 2.1905, "step": 7548000 }, { "epoch": 21.85, "learning_rate": 3.9079051268988515e-05, "loss": 2.2052, "step": 7548500 }, { "epoch": 21.85, "learning_rate": 3.907832762134124e-05, "loss": 2.2163, "step": 7549000 }, { "epoch": 21.85, "learning_rate": 3.907760397369396e-05, "loss": 2.1789, "step": 7549500 }, { "epoch": 21.85, "learning_rate": 3.9076881773341975e-05, "loss": 2.1577, "step": 7550000 }, { "epoch": 21.86, "learning_rate": 3.907615957299e-05, "loss": 2.196, "step": 7550500 }, { "epoch": 21.86, "learning_rate": 3.9075435925342727e-05, "loss": 2.173, "step": 7551000 }, { "epoch": 21.86, "learning_rate": 3.907471227769545e-05, "loss": 2.193, "step": 7551500 }, { "epoch": 21.86, "learning_rate": 3.9073990077343464e-05, "loss": 2.1713, "step": 7552000 }, { "epoch": 21.86, "learning_rate": 3.9073266429696187e-05, "loss": 2.1857, "step": 7552500 }, { "epoch": 21.86, "learning_rate": 3.907254278204891e-05, "loss": 2.1811, "step": 7553000 }, { "epoch": 21.86, "learning_rate": 3.907181913440163e-05, "loss": 2.1852, "step": 7553500 }, { "epoch": 21.87, "learning_rate": 3.907109548675435e-05, "loss": 2.1536, "step": 7554000 }, { "epoch": 21.87, "learning_rate": 3.9070371839107075e-05, "loss": 2.1752, "step": 7554500 }, { "epoch": 21.87, "learning_rate": 3.9069648191459805e-05, "loss": 2.1734, "step": 7555000 }, { "epoch": 21.87, "learning_rate": 3.906892454381253e-05, "loss": 2.2085, "step": 7555500 }, { "epoch": 21.87, "learning_rate": 3.906820234346054e-05, "loss": 2.2047, "step": 7556000 }, { "epoch": 21.87, "learning_rate": 3.9067478695813265e-05, "loss": 2.1934, "step": 7556500 }, { "epoch": 21.87, "learning_rate": 3.906675504816599e-05, "loss": 2.2059, "step": 7557000 }, { "epoch": 21.88, "learning_rate": 3.9066032847814e-05, "loss": 2.1902, "step": 7557500 }, { "epoch": 21.88, "learning_rate": 3.9065309200166725e-05, "loss": 2.1782, "step": 7558000 }, { "epoch": 21.88, "learning_rate": 3.9064585552519454e-05, "loss": 2.1751, "step": 7558500 }, { "epoch": 21.88, "learning_rate": 3.9063861904872176e-05, "loss": 2.1642, "step": 7559000 }, { "epoch": 21.88, "learning_rate": 3.9063138257224905e-05, "loss": 2.1571, "step": 7559500 }, { "epoch": 21.88, "learning_rate": 3.906241460957763e-05, "loss": 2.1717, "step": 7560000 }, { "epoch": 21.88, "learning_rate": 3.906169096193035e-05, "loss": 2.1746, "step": 7560500 }, { "epoch": 21.89, "learning_rate": 3.906096731428307e-05, "loss": 2.1788, "step": 7561000 }, { "epoch": 21.89, "learning_rate": 3.9060243666635794e-05, "loss": 2.1843, "step": 7561500 }, { "epoch": 21.89, "learning_rate": 3.9059520018988516e-05, "loss": 2.1483, "step": 7562000 }, { "epoch": 21.89, "learning_rate": 3.905879637134124e-05, "loss": 2.1761, "step": 7562500 }, { "epoch": 21.89, "learning_rate": 3.905807272369396e-05, "loss": 2.1484, "step": 7563000 }, { "epoch": 21.89, "learning_rate": 3.905734907604668e-05, "loss": 2.1707, "step": 7563500 }, { "epoch": 21.89, "learning_rate": 3.9056626875694705e-05, "loss": 2.1508, "step": 7564000 }, { "epoch": 21.9, "learning_rate": 3.905590322804743e-05, "loss": 2.2057, "step": 7564500 }, { "epoch": 21.9, "learning_rate": 3.905517958040015e-05, "loss": 2.1926, "step": 7565000 }, { "epoch": 21.9, "learning_rate": 3.905445593275288e-05, "loss": 2.1812, "step": 7565500 }, { "epoch": 21.9, "learning_rate": 3.90537322851056e-05, "loss": 2.2041, "step": 7566000 }, { "epoch": 21.9, "learning_rate": 3.9053010084753616e-05, "loss": 2.1778, "step": 7566500 }, { "epoch": 21.9, "learning_rate": 3.905228643710634e-05, "loss": 2.1626, "step": 7567000 }, { "epoch": 21.9, "learning_rate": 3.905156278945906e-05, "loss": 2.1675, "step": 7567500 }, { "epoch": 21.91, "learning_rate": 3.905083914181178e-05, "loss": 2.1802, "step": 7568000 }, { "epoch": 21.91, "learning_rate": 3.9050115494164505e-05, "loss": 2.1899, "step": 7568500 }, { "epoch": 21.91, "learning_rate": 3.904939184651723e-05, "loss": 2.1737, "step": 7569000 }, { "epoch": 21.91, "learning_rate": 3.9048668198869956e-05, "loss": 2.1874, "step": 7569500 }, { "epoch": 21.91, "learning_rate": 3.904794455122268e-05, "loss": 2.1981, "step": 7570000 }, { "epoch": 21.91, "learning_rate": 3.90472209035754e-05, "loss": 2.1752, "step": 7570500 }, { "epoch": 21.91, "learning_rate": 3.9046498703223416e-05, "loss": 2.2074, "step": 7571000 }, { "epoch": 21.92, "learning_rate": 3.904577650287143e-05, "loss": 2.1879, "step": 7571500 }, { "epoch": 21.92, "learning_rate": 3.9045052855224154e-05, "loss": 2.18, "step": 7572000 }, { "epoch": 21.92, "learning_rate": 3.9044329207576876e-05, "loss": 2.1933, "step": 7572500 }, { "epoch": 21.92, "learning_rate": 3.9043605559929606e-05, "loss": 2.1751, "step": 7573000 }, { "epoch": 21.92, "learning_rate": 3.904288191228233e-05, "loss": 2.1637, "step": 7573500 }, { "epoch": 21.92, "learning_rate": 3.904215826463506e-05, "loss": 2.1561, "step": 7574000 }, { "epoch": 21.93, "learning_rate": 3.904143461698778e-05, "loss": 2.1432, "step": 7574500 }, { "epoch": 21.93, "learning_rate": 3.90407109693405e-05, "loss": 2.1915, "step": 7575000 }, { "epoch": 21.93, "learning_rate": 3.9039987321693223e-05, "loss": 2.2253, "step": 7575500 }, { "epoch": 21.93, "learning_rate": 3.903926512134124e-05, "loss": 2.1731, "step": 7576000 }, { "epoch": 21.93, "learning_rate": 3.903854147369396e-05, "loss": 2.1778, "step": 7576500 }, { "epoch": 21.93, "learning_rate": 3.9037817826046683e-05, "loss": 2.17, "step": 7577000 }, { "epoch": 21.93, "learning_rate": 3.9037094178399406e-05, "loss": 2.2105, "step": 7577500 }, { "epoch": 21.94, "learning_rate": 3.903637053075213e-05, "loss": 2.1666, "step": 7578000 }, { "epoch": 21.94, "learning_rate": 3.903564688310486e-05, "loss": 2.1828, "step": 7578500 }, { "epoch": 21.94, "learning_rate": 3.903492323545758e-05, "loss": 2.192, "step": 7579000 }, { "epoch": 21.94, "learning_rate": 3.90341995878103e-05, "loss": 2.1726, "step": 7579500 }, { "epoch": 21.94, "learning_rate": 3.903347594016303e-05, "loss": 2.1655, "step": 7580000 }, { "epoch": 21.94, "learning_rate": 3.9032753739811046e-05, "loss": 2.1777, "step": 7580500 }, { "epoch": 21.94, "learning_rate": 3.903203009216377e-05, "loss": 2.1563, "step": 7581000 }, { "epoch": 21.95, "learning_rate": 3.903130644451649e-05, "loss": 2.1907, "step": 7581500 }, { "epoch": 21.95, "learning_rate": 3.9030584244164506e-05, "loss": 2.1966, "step": 7582000 }, { "epoch": 21.95, "learning_rate": 3.902986059651723e-05, "loss": 2.1691, "step": 7582500 }, { "epoch": 21.95, "learning_rate": 3.902913694886996e-05, "loss": 2.1687, "step": 7583000 }, { "epoch": 21.95, "learning_rate": 3.902841330122268e-05, "loss": 2.1792, "step": 7583500 }, { "epoch": 21.95, "learning_rate": 3.9027691100870695e-05, "loss": 2.1698, "step": 7584000 }, { "epoch": 21.95, "learning_rate": 3.902696745322342e-05, "loss": 2.1715, "step": 7584500 }, { "epoch": 21.96, "learning_rate": 3.902624380557614e-05, "loss": 2.1834, "step": 7585000 }, { "epoch": 21.96, "learning_rate": 3.902552015792886e-05, "loss": 2.1836, "step": 7585500 }, { "epoch": 21.96, "learning_rate": 3.9024797957576884e-05, "loss": 2.1816, "step": 7586000 }, { "epoch": 21.96, "learning_rate": 3.9024074309929606e-05, "loss": 2.1679, "step": 7586500 }, { "epoch": 21.96, "learning_rate": 3.902335210957762e-05, "loss": 2.1657, "step": 7587000 }, { "epoch": 21.96, "learning_rate": 3.9022628461930344e-05, "loss": 2.1966, "step": 7587500 }, { "epoch": 21.96, "learning_rate": 3.902190481428307e-05, "loss": 2.1623, "step": 7588000 }, { "epoch": 21.97, "learning_rate": 3.9021181166635795e-05, "loss": 2.1577, "step": 7588500 }, { "epoch": 21.97, "learning_rate": 3.902045896628381e-05, "loss": 2.2023, "step": 7589000 }, { "epoch": 21.97, "learning_rate": 3.901973531863653e-05, "loss": 2.1655, "step": 7589500 }, { "epoch": 21.97, "learning_rate": 3.9019011670989255e-05, "loss": 2.1756, "step": 7590000 }, { "epoch": 21.97, "learning_rate": 3.901828947063728e-05, "loss": 2.1583, "step": 7590500 }, { "epoch": 21.97, "learning_rate": 3.901756582299e-05, "loss": 2.1931, "step": 7591000 }, { "epoch": 21.97, "learning_rate": 3.901684217534272e-05, "loss": 2.1851, "step": 7591500 }, { "epoch": 21.98, "learning_rate": 3.9016118527695444e-05, "loss": 2.1946, "step": 7592000 }, { "epoch": 21.98, "learning_rate": 3.901539488004817e-05, "loss": 2.1678, "step": 7592500 }, { "epoch": 21.98, "learning_rate": 3.901467123240089e-05, "loss": 2.1697, "step": 7593000 }, { "epoch": 21.98, "learning_rate": 3.901394758475361e-05, "loss": 2.1563, "step": 7593500 }, { "epoch": 21.98, "learning_rate": 3.9013225384401633e-05, "loss": 2.1698, "step": 7594000 }, { "epoch": 21.98, "learning_rate": 3.9012501736754356e-05, "loss": 2.1727, "step": 7594500 }, { "epoch": 21.98, "learning_rate": 3.901177808910708e-05, "loss": 2.1725, "step": 7595000 }, { "epoch": 21.99, "learning_rate": 3.901105444145981e-05, "loss": 2.171, "step": 7595500 }, { "epoch": 21.99, "learning_rate": 3.901033079381253e-05, "loss": 2.1822, "step": 7596000 }, { "epoch": 21.99, "learning_rate": 3.900960714616525e-05, "loss": 2.1791, "step": 7596500 }, { "epoch": 21.99, "learning_rate": 3.9008883498517974e-05, "loss": 2.1904, "step": 7597000 }, { "epoch": 21.99, "learning_rate": 3.9008159850870696e-05, "loss": 2.1842, "step": 7597500 }, { "epoch": 21.99, "learning_rate": 3.900743620322342e-05, "loss": 2.1409, "step": 7598000 }, { "epoch": 21.99, "learning_rate": 3.9006714002871434e-05, "loss": 2.1635, "step": 7598500 }, { "epoch": 22.0, "learning_rate": 3.9005990355224156e-05, "loss": 2.1908, "step": 7599000 }, { "epoch": 22.0, "learning_rate": 3.9005266707576885e-05, "loss": 2.179, "step": 7599500 }, { "epoch": 22.0, "learning_rate": 3.900454305992961e-05, "loss": 2.1825, "step": 7600000 }, { "epoch": 22.0, "eval_accuracy": 0.6618738376038161, "eval_accuracy_mlm": 0.6256257501839781, "eval_accuracy_nsp": 0.8561273734721095, "eval_loss": 2.2176835536956787, "eval_runtime": 331.6809, "eval_samples_per_second": 1315.68, "eval_steps_per_second": 54.821, "step": 7600384 }, { "epoch": 22.0, "learning_rate": 3.900381941228233e-05, "loss": 2.1843, "step": 7600500 }, { "epoch": 22.0, "learning_rate": 3.900309576463505e-05, "loss": 2.1525, "step": 7601000 }, { "epoch": 22.0, "learning_rate": 3.900237356428307e-05, "loss": 2.1665, "step": 7601500 }, { "epoch": 22.0, "learning_rate": 3.900164991663579e-05, "loss": 2.1588, "step": 7602000 }, { "epoch": 22.01, "learning_rate": 3.900092626898851e-05, "loss": 2.1481, "step": 7602500 }, { "epoch": 22.01, "learning_rate": 3.900020262134124e-05, "loss": 2.1454, "step": 7603000 }, { "epoch": 22.01, "learning_rate": 3.899947897369396e-05, "loss": 2.1571, "step": 7603500 }, { "epoch": 22.01, "learning_rate": 3.8998755326046685e-05, "loss": 2.1507, "step": 7604000 }, { "epoch": 22.01, "learning_rate": 3.899803312569471e-05, "loss": 2.1619, "step": 7604500 }, { "epoch": 22.01, "learning_rate": 3.899730947804743e-05, "loss": 2.1628, "step": 7605000 }, { "epoch": 22.01, "learning_rate": 3.899658583040015e-05, "loss": 2.1625, "step": 7605500 }, { "epoch": 22.02, "learning_rate": 3.8995862182752874e-05, "loss": 2.1462, "step": 7606000 }, { "epoch": 22.02, "learning_rate": 3.899513998240089e-05, "loss": 2.1624, "step": 7606500 }, { "epoch": 22.02, "learning_rate": 3.899441778204891e-05, "loss": 2.1682, "step": 7607000 }, { "epoch": 22.02, "learning_rate": 3.8993694134401634e-05, "loss": 2.1578, "step": 7607500 }, { "epoch": 22.02, "learning_rate": 3.8992970486754356e-05, "loss": 2.1436, "step": 7608000 }, { "epoch": 22.02, "learning_rate": 3.899224683910708e-05, "loss": 2.1512, "step": 7608500 }, { "epoch": 22.02, "learning_rate": 3.89915231914598e-05, "loss": 2.1432, "step": 7609000 }, { "epoch": 22.03, "learning_rate": 3.899079954381252e-05, "loss": 2.1692, "step": 7609500 }, { "epoch": 22.03, "learning_rate": 3.8990075896165245e-05, "loss": 2.1634, "step": 7610000 }, { "epoch": 22.03, "learning_rate": 3.8989352248517974e-05, "loss": 2.1887, "step": 7610500 }, { "epoch": 22.03, "learning_rate": 3.898863004816599e-05, "loss": 2.1697, "step": 7611000 }, { "epoch": 22.03, "learning_rate": 3.898790640051871e-05, "loss": 2.1452, "step": 7611500 }, { "epoch": 22.03, "learning_rate": 3.8987182752871434e-05, "loss": 2.1169, "step": 7612000 }, { "epoch": 22.04, "learning_rate": 3.8986459105224163e-05, "loss": 2.1549, "step": 7612500 }, { "epoch": 22.04, "learning_rate": 3.898573690487218e-05, "loss": 2.1482, "step": 7613000 }, { "epoch": 22.04, "learning_rate": 3.89850132572249e-05, "loss": 2.1528, "step": 7613500 }, { "epoch": 22.04, "learning_rate": 3.8984289609577623e-05, "loss": 2.1622, "step": 7614000 }, { "epoch": 22.04, "learning_rate": 3.8983565961930346e-05, "loss": 2.1683, "step": 7614500 }, { "epoch": 22.04, "learning_rate": 3.898284231428307e-05, "loss": 2.1819, "step": 7615000 }, { "epoch": 22.04, "learning_rate": 3.898211866663579e-05, "loss": 2.1598, "step": 7615500 }, { "epoch": 22.05, "learning_rate": 3.898139501898851e-05, "loss": 2.1426, "step": 7616000 }, { "epoch": 22.05, "learning_rate": 3.8980671371341235e-05, "loss": 2.1521, "step": 7616500 }, { "epoch": 22.05, "learning_rate": 3.8979947723693964e-05, "loss": 2.1691, "step": 7617000 }, { "epoch": 22.05, "learning_rate": 3.897922552334198e-05, "loss": 2.1551, "step": 7617500 }, { "epoch": 22.05, "learning_rate": 3.897850187569471e-05, "loss": 2.156, "step": 7618000 }, { "epoch": 22.05, "learning_rate": 3.8977779675342724e-05, "loss": 2.1732, "step": 7618500 }, { "epoch": 22.05, "learning_rate": 3.8977056027695446e-05, "loss": 2.1632, "step": 7619000 }, { "epoch": 22.06, "learning_rate": 3.897633238004817e-05, "loss": 2.1695, "step": 7619500 }, { "epoch": 22.06, "learning_rate": 3.897560873240089e-05, "loss": 2.1729, "step": 7620000 }, { "epoch": 22.06, "learning_rate": 3.8974887979344206e-05, "loss": 2.1893, "step": 7620500 }, { "epoch": 22.06, "learning_rate": 3.897416433169693e-05, "loss": 2.1657, "step": 7621000 }, { "epoch": 22.06, "learning_rate": 3.897344068404965e-05, "loss": 2.1659, "step": 7621500 }, { "epoch": 22.06, "learning_rate": 3.897271703640237e-05, "loss": 2.1542, "step": 7622000 }, { "epoch": 22.06, "learning_rate": 3.8971993388755095e-05, "loss": 2.1733, "step": 7622500 }, { "epoch": 22.07, "learning_rate": 3.897127118840311e-05, "loss": 2.1531, "step": 7623000 }, { "epoch": 22.07, "learning_rate": 3.897054754075584e-05, "loss": 2.155, "step": 7623500 }, { "epoch": 22.07, "learning_rate": 3.896982389310856e-05, "loss": 2.1917, "step": 7624000 }, { "epoch": 22.07, "learning_rate": 3.8969100245461284e-05, "loss": 2.1622, "step": 7624500 }, { "epoch": 22.07, "learning_rate": 3.8968376597814006e-05, "loss": 2.1771, "step": 7625000 }, { "epoch": 22.07, "learning_rate": 3.896765295016673e-05, "loss": 2.1242, "step": 7625500 }, { "epoch": 22.07, "learning_rate": 3.896692930251946e-05, "loss": 2.1531, "step": 7626000 }, { "epoch": 22.08, "learning_rate": 3.896620565487218e-05, "loss": 2.1461, "step": 7626500 }, { "epoch": 22.08, "learning_rate": 3.89654820072249e-05, "loss": 2.1461, "step": 7627000 }, { "epoch": 22.08, "learning_rate": 3.896475980687292e-05, "loss": 2.1567, "step": 7627500 }, { "epoch": 22.08, "learning_rate": 3.896403615922564e-05, "loss": 2.157, "step": 7628000 }, { "epoch": 22.08, "learning_rate": 3.896331395887366e-05, "loss": 2.1644, "step": 7628500 }, { "epoch": 22.08, "learning_rate": 3.8962590311226384e-05, "loss": 2.1523, "step": 7629000 }, { "epoch": 22.08, "learning_rate": 3.89618681108744e-05, "loss": 2.1733, "step": 7629500 }, { "epoch": 22.09, "learning_rate": 3.896114446322712e-05, "loss": 2.1623, "step": 7630000 }, { "epoch": 22.09, "learning_rate": 3.8960420815579844e-05, "loss": 2.164, "step": 7630500 }, { "epoch": 22.09, "learning_rate": 3.895969716793257e-05, "loss": 2.1664, "step": 7631000 }, { "epoch": 22.09, "learning_rate": 3.895897352028529e-05, "loss": 2.1459, "step": 7631500 }, { "epoch": 22.09, "learning_rate": 3.895824987263801e-05, "loss": 2.157, "step": 7632000 }, { "epoch": 22.09, "learning_rate": 3.895752622499074e-05, "loss": 2.1557, "step": 7632500 }, { "epoch": 22.09, "learning_rate": 3.895680257734346e-05, "loss": 2.1615, "step": 7633000 }, { "epoch": 22.1, "learning_rate": 3.895607892969619e-05, "loss": 2.1445, "step": 7633500 }, { "epoch": 22.1, "learning_rate": 3.8955355282048914e-05, "loss": 2.1793, "step": 7634000 }, { "epoch": 22.1, "learning_rate": 3.8954631634401636e-05, "loss": 2.1503, "step": 7634500 }, { "epoch": 22.1, "learning_rate": 3.895390798675436e-05, "loss": 2.1457, "step": 7635000 }, { "epoch": 22.1, "learning_rate": 3.895318433910708e-05, "loss": 2.1628, "step": 7635500 }, { "epoch": 22.1, "learning_rate": 3.8952462138755096e-05, "loss": 2.1451, "step": 7636000 }, { "epoch": 22.1, "learning_rate": 3.895173849110782e-05, "loss": 2.1431, "step": 7636500 }, { "epoch": 22.11, "learning_rate": 3.895101484346054e-05, "loss": 2.1634, "step": 7637000 }, { "epoch": 22.11, "learning_rate": 3.895029119581326e-05, "loss": 2.1614, "step": 7637500 }, { "epoch": 22.11, "learning_rate": 3.894956754816599e-05, "loss": 2.163, "step": 7638000 }, { "epoch": 22.11, "learning_rate": 3.89488467951093e-05, "loss": 2.1657, "step": 7638500 }, { "epoch": 22.11, "learning_rate": 3.894812314746202e-05, "loss": 2.1856, "step": 7639000 }, { "epoch": 22.11, "learning_rate": 3.8947399499814745e-05, "loss": 2.1445, "step": 7639500 }, { "epoch": 22.11, "learning_rate": 3.894667585216747e-05, "loss": 2.192, "step": 7640000 }, { "epoch": 22.12, "learning_rate": 3.894595220452019e-05, "loss": 2.1555, "step": 7640500 }, { "epoch": 22.12, "learning_rate": 3.894522855687292e-05, "loss": 2.1486, "step": 7641000 }, { "epoch": 22.12, "learning_rate": 3.894450490922564e-05, "loss": 2.1745, "step": 7641500 }, { "epoch": 22.12, "learning_rate": 3.894378126157836e-05, "loss": 2.1469, "step": 7642000 }, { "epoch": 22.12, "learning_rate": 3.8943059061226385e-05, "loss": 2.1672, "step": 7642500 }, { "epoch": 22.12, "learning_rate": 3.89423368608744e-05, "loss": 2.1663, "step": 7643000 }, { "epoch": 22.12, "learning_rate": 3.894161321322712e-05, "loss": 2.1638, "step": 7643500 }, { "epoch": 22.13, "learning_rate": 3.8940889565579845e-05, "loss": 2.1588, "step": 7644000 }, { "epoch": 22.13, "learning_rate": 3.894016591793257e-05, "loss": 2.163, "step": 7644500 }, { "epoch": 22.13, "learning_rate": 3.893944227028529e-05, "loss": 2.1767, "step": 7645000 }, { "epoch": 22.13, "learning_rate": 3.893871862263801e-05, "loss": 2.171, "step": 7645500 }, { "epoch": 22.13, "learning_rate": 3.893799497499074e-05, "loss": 2.1679, "step": 7646000 }, { "epoch": 22.13, "learning_rate": 3.893727132734346e-05, "loss": 2.1847, "step": 7646500 }, { "epoch": 22.13, "learning_rate": 3.8936547679696185e-05, "loss": 2.1575, "step": 7647000 }, { "epoch": 22.14, "learning_rate": 3.893582403204891e-05, "loss": 2.1725, "step": 7647500 }, { "epoch": 22.14, "learning_rate": 3.893510038440163e-05, "loss": 2.1721, "step": 7648000 }, { "epoch": 22.14, "learning_rate": 3.893437673675436e-05, "loss": 2.1598, "step": 7648500 }, { "epoch": 22.14, "learning_rate": 3.893365308910708e-05, "loss": 2.1628, "step": 7649000 }, { "epoch": 22.14, "learning_rate": 3.89329308887551e-05, "loss": 2.1704, "step": 7649500 }, { "epoch": 22.14, "learning_rate": 3.893220724110782e-05, "loss": 2.1719, "step": 7650000 }, { "epoch": 22.15, "learning_rate": 3.893148359346054e-05, "loss": 2.1402, "step": 7650500 }, { "epoch": 22.15, "learning_rate": 3.893075994581326e-05, "loss": 2.1626, "step": 7651000 }, { "epoch": 22.15, "learning_rate": 3.893003629816599e-05, "loss": 2.1506, "step": 7651500 }, { "epoch": 22.15, "learning_rate": 3.8929312650518715e-05, "loss": 2.1639, "step": 7652000 }, { "epoch": 22.15, "learning_rate": 3.892858900287144e-05, "loss": 2.1733, "step": 7652500 }, { "epoch": 22.15, "learning_rate": 3.892786535522416e-05, "loss": 2.1566, "step": 7653000 }, { "epoch": 22.15, "learning_rate": 3.8927143154872175e-05, "loss": 2.1792, "step": 7653500 }, { "epoch": 22.16, "learning_rate": 3.89264195072249e-05, "loss": 2.1474, "step": 7654000 }, { "epoch": 22.16, "learning_rate": 3.892569585957762e-05, "loss": 2.1741, "step": 7654500 }, { "epoch": 22.16, "learning_rate": 3.892497221193034e-05, "loss": 2.1564, "step": 7655000 }, { "epoch": 22.16, "learning_rate": 3.892424856428307e-05, "loss": 2.1549, "step": 7655500 }, { "epoch": 22.16, "learning_rate": 3.8923527811226386e-05, "loss": 2.1689, "step": 7656000 }, { "epoch": 22.16, "learning_rate": 3.892280416357911e-05, "loss": 2.1525, "step": 7656500 }, { "epoch": 22.16, "learning_rate": 3.892208051593183e-05, "loss": 2.1423, "step": 7657000 }, { "epoch": 22.17, "learning_rate": 3.892135686828455e-05, "loss": 2.1789, "step": 7657500 }, { "epoch": 22.17, "learning_rate": 3.8920633220637275e-05, "loss": 2.1807, "step": 7658000 }, { "epoch": 22.17, "learning_rate": 3.891990957299e-05, "loss": 2.1792, "step": 7658500 }, { "epoch": 22.17, "learning_rate": 3.891918592534272e-05, "loss": 2.1756, "step": 7659000 }, { "epoch": 22.17, "learning_rate": 3.891846227769544e-05, "loss": 2.1662, "step": 7659500 }, { "epoch": 22.17, "learning_rate": 3.891773863004817e-05, "loss": 2.1724, "step": 7660000 }, { "epoch": 22.17, "learning_rate": 3.891701498240089e-05, "loss": 2.147, "step": 7660500 }, { "epoch": 22.18, "learning_rate": 3.8916291334753615e-05, "loss": 2.1614, "step": 7661000 }, { "epoch": 22.18, "learning_rate": 3.891556913440163e-05, "loss": 2.1831, "step": 7661500 }, { "epoch": 22.18, "learning_rate": 3.891484548675435e-05, "loss": 2.1466, "step": 7662000 }, { "epoch": 22.18, "learning_rate": 3.8914121839107075e-05, "loss": 2.1531, "step": 7662500 }, { "epoch": 22.18, "learning_rate": 3.8913398191459804e-05, "loss": 2.1472, "step": 7663000 }, { "epoch": 22.18, "learning_rate": 3.8912674543812526e-05, "loss": 2.1663, "step": 7663500 }, { "epoch": 22.18, "learning_rate": 3.891195089616525e-05, "loss": 2.1634, "step": 7664000 }, { "epoch": 22.19, "learning_rate": 3.891122869581327e-05, "loss": 2.1659, "step": 7664500 }, { "epoch": 22.19, "learning_rate": 3.891050504816599e-05, "loss": 2.164, "step": 7665000 }, { "epoch": 22.19, "learning_rate": 3.8909781400518715e-05, "loss": 2.1755, "step": 7665500 }, { "epoch": 22.19, "learning_rate": 3.890905775287144e-05, "loss": 2.1684, "step": 7666000 }, { "epoch": 22.19, "learning_rate": 3.890833555251945e-05, "loss": 2.1736, "step": 7666500 }, { "epoch": 22.19, "learning_rate": 3.8907611904872175e-05, "loss": 2.1987, "step": 7667000 }, { "epoch": 22.19, "learning_rate": 3.89068882572249e-05, "loss": 2.1778, "step": 7667500 }, { "epoch": 22.2, "learning_rate": 3.890616460957762e-05, "loss": 2.1562, "step": 7668000 }, { "epoch": 22.2, "learning_rate": 3.890544096193034e-05, "loss": 2.1766, "step": 7668500 }, { "epoch": 22.2, "learning_rate": 3.890471731428307e-05, "loss": 2.1803, "step": 7669000 }, { "epoch": 22.2, "learning_rate": 3.890399366663579e-05, "loss": 2.1537, "step": 7669500 }, { "epoch": 22.2, "learning_rate": 3.8903270018988516e-05, "loss": 2.1523, "step": 7670000 }, { "epoch": 22.2, "learning_rate": 3.8902546371341245e-05, "loss": 2.1493, "step": 7670500 }, { "epoch": 22.2, "learning_rate": 3.8901825618284553e-05, "loss": 2.1617, "step": 7671000 }, { "epoch": 22.21, "learning_rate": 3.8901101970637276e-05, "loss": 2.1887, "step": 7671500 }, { "epoch": 22.21, "learning_rate": 3.890037832299e-05, "loss": 2.1946, "step": 7672000 }, { "epoch": 22.21, "learning_rate": 3.889965467534272e-05, "loss": 2.1536, "step": 7672500 }, { "epoch": 22.21, "learning_rate": 3.889893102769544e-05, "loss": 2.1721, "step": 7673000 }, { "epoch": 22.21, "learning_rate": 3.889820738004817e-05, "loss": 2.1846, "step": 7673500 }, { "epoch": 22.21, "learning_rate": 3.889748517969619e-05, "loss": 2.1547, "step": 7674000 }, { "epoch": 22.21, "learning_rate": 3.889676153204891e-05, "loss": 2.1656, "step": 7674500 }, { "epoch": 22.22, "learning_rate": 3.889603788440163e-05, "loss": 2.1508, "step": 7675000 }, { "epoch": 22.22, "learning_rate": 3.889531568404965e-05, "loss": 2.182, "step": 7675500 }, { "epoch": 22.22, "learning_rate": 3.889459203640237e-05, "loss": 2.1695, "step": 7676000 }, { "epoch": 22.22, "learning_rate": 3.889386838875509e-05, "loss": 2.1854, "step": 7676500 }, { "epoch": 22.22, "learning_rate": 3.889314474110782e-05, "loss": 2.1706, "step": 7677000 }, { "epoch": 22.22, "learning_rate": 3.889242109346054e-05, "loss": 2.1534, "step": 7677500 }, { "epoch": 22.22, "learning_rate": 3.889169744581327e-05, "loss": 2.175, "step": 7678000 }, { "epoch": 22.23, "learning_rate": 3.8890973798165994e-05, "loss": 2.1651, "step": 7678500 }, { "epoch": 22.23, "learning_rate": 3.8890250150518716e-05, "loss": 2.1777, "step": 7679000 }, { "epoch": 22.23, "learning_rate": 3.888952650287144e-05, "loss": 2.1544, "step": 7679500 }, { "epoch": 22.23, "learning_rate": 3.888880285522416e-05, "loss": 2.1813, "step": 7680000 }, { "epoch": 22.23, "learning_rate": 3.888807920757688e-05, "loss": 2.1685, "step": 7680500 }, { "epoch": 22.23, "learning_rate": 3.8887355559929605e-05, "loss": 2.1689, "step": 7681000 }, { "epoch": 22.23, "learning_rate": 3.888663335957762e-05, "loss": 2.1667, "step": 7681500 }, { "epoch": 22.24, "learning_rate": 3.888591115922564e-05, "loss": 2.157, "step": 7682000 }, { "epoch": 22.24, "learning_rate": 3.8885187511578365e-05, "loss": 2.1786, "step": 7682500 }, { "epoch": 22.24, "learning_rate": 3.888446386393109e-05, "loss": 2.1723, "step": 7683000 }, { "epoch": 22.24, "learning_rate": 3.888374021628381e-05, "loss": 2.1667, "step": 7683500 }, { "epoch": 22.24, "learning_rate": 3.888301656863653e-05, "loss": 2.1395, "step": 7684000 }, { "epoch": 22.24, "learning_rate": 3.8882292920989254e-05, "loss": 2.1671, "step": 7684500 }, { "epoch": 22.24, "learning_rate": 3.8881569273341976e-05, "loss": 2.1683, "step": 7685000 }, { "epoch": 22.25, "learning_rate": 3.8880845625694705e-05, "loss": 2.1823, "step": 7685500 }, { "epoch": 22.25, "learning_rate": 3.888012197804743e-05, "loss": 2.179, "step": 7686000 }, { "epoch": 22.25, "learning_rate": 3.887939833040015e-05, "loss": 2.1606, "step": 7686500 }, { "epoch": 22.25, "learning_rate": 3.887867613004817e-05, "loss": 2.1783, "step": 7687000 }, { "epoch": 22.25, "learning_rate": 3.887795392969619e-05, "loss": 2.1652, "step": 7687500 }, { "epoch": 22.25, "learning_rate": 3.887723028204891e-05, "loss": 2.1588, "step": 7688000 }, { "epoch": 22.26, "learning_rate": 3.887650663440163e-05, "loss": 2.1628, "step": 7688500 }, { "epoch": 22.26, "learning_rate": 3.8875782986754354e-05, "loss": 2.171, "step": 7689000 }, { "epoch": 22.26, "learning_rate": 3.887505933910708e-05, "loss": 2.168, "step": 7689500 }, { "epoch": 22.26, "learning_rate": 3.88743356914598e-05, "loss": 2.1617, "step": 7690000 }, { "epoch": 22.26, "learning_rate": 3.887361204381252e-05, "loss": 2.1307, "step": 7690500 }, { "epoch": 22.26, "learning_rate": 3.887288839616525e-05, "loss": 2.1626, "step": 7691000 }, { "epoch": 22.26, "learning_rate": 3.887216474851797e-05, "loss": 2.149, "step": 7691500 }, { "epoch": 22.27, "learning_rate": 3.887144254816599e-05, "loss": 2.1757, "step": 7692000 }, { "epoch": 22.27, "learning_rate": 3.887071890051871e-05, "loss": 2.1794, "step": 7692500 }, { "epoch": 22.27, "learning_rate": 3.886999525287144e-05, "loss": 2.1467, "step": 7693000 }, { "epoch": 22.27, "learning_rate": 3.886927160522416e-05, "loss": 2.1599, "step": 7693500 }, { "epoch": 22.27, "learning_rate": 3.8868547957576884e-05, "loss": 2.157, "step": 7694000 }, { "epoch": 22.27, "learning_rate": 3.88678257572249e-05, "loss": 2.1619, "step": 7694500 }, { "epoch": 22.27, "learning_rate": 3.886710210957762e-05, "loss": 2.1628, "step": 7695000 }, { "epoch": 22.28, "learning_rate": 3.886637846193035e-05, "loss": 2.1698, "step": 7695500 }, { "epoch": 22.28, "learning_rate": 3.886565481428307e-05, "loss": 2.161, "step": 7696000 }, { "epoch": 22.28, "learning_rate": 3.8864931166635795e-05, "loss": 2.1825, "step": 7696500 }, { "epoch": 22.28, "learning_rate": 3.886420896628381e-05, "loss": 2.1718, "step": 7697000 }, { "epoch": 22.28, "learning_rate": 3.886348531863653e-05, "loss": 2.1692, "step": 7697500 }, { "epoch": 22.28, "learning_rate": 3.8862761670989255e-05, "loss": 2.148, "step": 7698000 }, { "epoch": 22.28, "learning_rate": 3.886203802334198e-05, "loss": 2.1812, "step": 7698500 }, { "epoch": 22.29, "learning_rate": 3.886131582299e-05, "loss": 2.1719, "step": 7699000 }, { "epoch": 22.29, "learning_rate": 3.886059217534272e-05, "loss": 2.1468, "step": 7699500 }, { "epoch": 22.29, "learning_rate": 3.885986997499074e-05, "loss": 2.1779, "step": 7700000 }, { "epoch": 22.29, "learning_rate": 3.885914632734346e-05, "loss": 2.1632, "step": 7700500 }, { "epoch": 22.29, "learning_rate": 3.885842267969619e-05, "loss": 2.1364, "step": 7701000 }, { "epoch": 22.29, "learning_rate": 3.885769903204891e-05, "loss": 2.1426, "step": 7701500 }, { "epoch": 22.29, "learning_rate": 3.885697538440163e-05, "loss": 2.1761, "step": 7702000 }, { "epoch": 22.3, "learning_rate": 3.8856251736754355e-05, "loss": 2.1409, "step": 7702500 }, { "epoch": 22.3, "learning_rate": 3.885552808910708e-05, "loss": 2.1685, "step": 7703000 }, { "epoch": 22.3, "learning_rate": 3.88548044414598e-05, "loss": 2.1555, "step": 7703500 }, { "epoch": 22.3, "learning_rate": 3.885408079381252e-05, "loss": 2.1595, "step": 7704000 }, { "epoch": 22.3, "learning_rate": 3.885335714616525e-05, "loss": 2.1716, "step": 7704500 }, { "epoch": 22.3, "learning_rate": 3.8852634945813267e-05, "loss": 2.1818, "step": 7705000 }, { "epoch": 22.3, "learning_rate": 3.885191129816599e-05, "loss": 2.1685, "step": 7705500 }, { "epoch": 22.31, "learning_rate": 3.885118765051871e-05, "loss": 2.1947, "step": 7706000 }, { "epoch": 22.31, "learning_rate": 3.885046400287143e-05, "loss": 2.1836, "step": 7706500 }, { "epoch": 22.31, "learning_rate": 3.8849740355224155e-05, "loss": 2.1672, "step": 7707000 }, { "epoch": 22.31, "learning_rate": 3.884901670757688e-05, "loss": 2.1584, "step": 7707500 }, { "epoch": 22.31, "learning_rate": 3.884829305992961e-05, "loss": 2.1617, "step": 7708000 }, { "epoch": 22.31, "learning_rate": 3.884756941228233e-05, "loss": 2.1605, "step": 7708500 }, { "epoch": 22.31, "learning_rate": 3.884684576463505e-05, "loss": 2.1945, "step": 7709000 }, { "epoch": 22.32, "learning_rate": 3.8846123564283073e-05, "loss": 2.1833, "step": 7709500 }, { "epoch": 22.32, "learning_rate": 3.8845399916635796e-05, "loss": 2.1624, "step": 7710000 }, { "epoch": 22.32, "learning_rate": 3.884467626898852e-05, "loss": 2.1593, "step": 7710500 }, { "epoch": 22.32, "learning_rate": 3.884395262134124e-05, "loss": 2.1691, "step": 7711000 }, { "epoch": 22.32, "learning_rate": 3.884322897369396e-05, "loss": 2.1955, "step": 7711500 }, { "epoch": 22.32, "learning_rate": 3.8842505326046685e-05, "loss": 2.1726, "step": 7712000 }, { "epoch": 22.32, "learning_rate": 3.884178167839941e-05, "loss": 2.1731, "step": 7712500 }, { "epoch": 22.33, "learning_rate": 3.884105803075213e-05, "loss": 2.1569, "step": 7713000 }, { "epoch": 22.33, "learning_rate": 3.884033583040015e-05, "loss": 2.1857, "step": 7713500 }, { "epoch": 22.33, "learning_rate": 3.8839612182752874e-05, "loss": 2.144, "step": 7714000 }, { "epoch": 22.33, "learning_rate": 3.883888998240089e-05, "loss": 2.1481, "step": 7714500 }, { "epoch": 22.33, "learning_rate": 3.883816633475361e-05, "loss": 2.1781, "step": 7715000 }, { "epoch": 22.33, "learning_rate": 3.883744268710634e-05, "loss": 2.1697, "step": 7715500 }, { "epoch": 22.33, "learning_rate": 3.8836720486754356e-05, "loss": 2.1551, "step": 7716000 }, { "epoch": 22.34, "learning_rate": 3.883599828640238e-05, "loss": 2.1846, "step": 7716500 }, { "epoch": 22.34, "learning_rate": 3.88352746387551e-05, "loss": 2.1514, "step": 7717000 }, { "epoch": 22.34, "learning_rate": 3.883455099110782e-05, "loss": 2.1658, "step": 7717500 }, { "epoch": 22.34, "learning_rate": 3.8833827343460545e-05, "loss": 2.1655, "step": 7718000 }, { "epoch": 22.34, "learning_rate": 3.883310369581327e-05, "loss": 2.1625, "step": 7718500 }, { "epoch": 22.34, "learning_rate": 3.883238004816599e-05, "loss": 2.174, "step": 7719000 }, { "epoch": 22.34, "learning_rate": 3.883165640051871e-05, "loss": 2.1712, "step": 7719500 }, { "epoch": 22.35, "learning_rate": 3.883093420016673e-05, "loss": 2.1678, "step": 7720000 }, { "epoch": 22.35, "learning_rate": 3.883021055251945e-05, "loss": 2.1753, "step": 7720500 }, { "epoch": 22.35, "learning_rate": 3.882948690487218e-05, "loss": 2.18, "step": 7721000 }, { "epoch": 22.35, "learning_rate": 3.88287632572249e-05, "loss": 2.1806, "step": 7721500 }, { "epoch": 22.35, "learning_rate": 3.882803960957762e-05, "loss": 2.1847, "step": 7722000 }, { "epoch": 22.35, "learning_rate": 3.8827315961930345e-05, "loss": 2.1838, "step": 7722500 }, { "epoch": 22.35, "learning_rate": 3.882659376157836e-05, "loss": 2.1834, "step": 7723000 }, { "epoch": 22.36, "learning_rate": 3.882587156122638e-05, "loss": 2.1826, "step": 7723500 }, { "epoch": 22.36, "learning_rate": 3.8825147913579105e-05, "loss": 2.1659, "step": 7724000 }, { "epoch": 22.36, "learning_rate": 3.882442426593183e-05, "loss": 2.1518, "step": 7724500 }, { "epoch": 22.36, "learning_rate": 3.882370061828455e-05, "loss": 2.1595, "step": 7725000 }, { "epoch": 22.36, "learning_rate": 3.882297697063728e-05, "loss": 2.1806, "step": 7725500 }, { "epoch": 22.36, "learning_rate": 3.882225332299e-05, "loss": 2.19, "step": 7726000 }, { "epoch": 22.37, "learning_rate": 3.882152967534272e-05, "loss": 2.159, "step": 7726500 }, { "epoch": 22.37, "learning_rate": 3.8820806027695446e-05, "loss": 2.153, "step": 7727000 }, { "epoch": 22.37, "learning_rate": 3.882008238004817e-05, "loss": 2.1653, "step": 7727500 }, { "epoch": 22.37, "learning_rate": 3.881936017969618e-05, "loss": 2.1531, "step": 7728000 }, { "epoch": 22.37, "learning_rate": 3.8818636532048906e-05, "loss": 2.1792, "step": 7728500 }, { "epoch": 22.37, "learning_rate": 3.881791288440163e-05, "loss": 2.1612, "step": 7729000 }, { "epoch": 22.37, "learning_rate": 3.881718923675435e-05, "loss": 2.1576, "step": 7729500 }, { "epoch": 22.38, "learning_rate": 3.881646558910708e-05, "loss": 2.1608, "step": 7730000 }, { "epoch": 22.38, "learning_rate": 3.88157419414598e-05, "loss": 2.165, "step": 7730500 }, { "epoch": 22.38, "learning_rate": 3.881501829381253e-05, "loss": 2.1592, "step": 7731000 }, { "epoch": 22.38, "learning_rate": 3.8814296093460546e-05, "loss": 2.1864, "step": 7731500 }, { "epoch": 22.38, "learning_rate": 3.881357244581327e-05, "loss": 2.1649, "step": 7732000 }, { "epoch": 22.38, "learning_rate": 3.881284879816599e-05, "loss": 2.1827, "step": 7732500 }, { "epoch": 22.38, "learning_rate": 3.881212515051871e-05, "loss": 2.1716, "step": 7733000 }, { "epoch": 22.39, "learning_rate": 3.8811401502871435e-05, "loss": 2.195, "step": 7733500 }, { "epoch": 22.39, "learning_rate": 3.881067930251945e-05, "loss": 2.1707, "step": 7734000 }, { "epoch": 22.39, "learning_rate": 3.880995565487218e-05, "loss": 2.1745, "step": 7734500 }, { "epoch": 22.39, "learning_rate": 3.88092320072249e-05, "loss": 2.1814, "step": 7735000 }, { "epoch": 22.39, "learning_rate": 3.8808508359577624e-05, "loss": 2.1654, "step": 7735500 }, { "epoch": 22.39, "learning_rate": 3.8807784711930346e-05, "loss": 2.1683, "step": 7736000 }, { "epoch": 22.39, "learning_rate": 3.880706106428307e-05, "loss": 2.1475, "step": 7736500 }, { "epoch": 22.4, "learning_rate": 3.8806338863931084e-05, "loss": 2.1745, "step": 7737000 }, { "epoch": 22.4, "learning_rate": 3.8805615216283806e-05, "loss": 2.1819, "step": 7737500 }, { "epoch": 22.4, "learning_rate": 3.8804891568636535e-05, "loss": 2.1592, "step": 7738000 }, { "epoch": 22.4, "learning_rate": 3.880416792098926e-05, "loss": 2.1607, "step": 7738500 }, { "epoch": 22.4, "learning_rate": 3.880344427334198e-05, "loss": 2.1712, "step": 7739000 }, { "epoch": 22.4, "learning_rate": 3.88027206256947e-05, "loss": 2.1652, "step": 7739500 }, { "epoch": 22.4, "learning_rate": 3.880199697804743e-05, "loss": 2.1734, "step": 7740000 }, { "epoch": 22.41, "learning_rate": 3.880127333040015e-05, "loss": 2.1768, "step": 7740500 }, { "epoch": 22.41, "learning_rate": 3.8800549682752875e-05, "loss": 2.1662, "step": 7741000 }, { "epoch": 22.41, "learning_rate": 3.87998260351056e-05, "loss": 2.1706, "step": 7741500 }, { "epoch": 22.41, "learning_rate": 3.879910238745832e-05, "loss": 2.1749, "step": 7742000 }, { "epoch": 22.41, "learning_rate": 3.879837873981104e-05, "loss": 2.1996, "step": 7742500 }, { "epoch": 22.41, "learning_rate": 3.8797655092163764e-05, "loss": 2.1773, "step": 7743000 }, { "epoch": 22.41, "learning_rate": 3.8796931444516486e-05, "loss": 2.1481, "step": 7743500 }, { "epoch": 22.42, "learning_rate": 3.879620779686921e-05, "loss": 2.1503, "step": 7744000 }, { "epoch": 22.42, "learning_rate": 3.879548414922193e-05, "loss": 2.1451, "step": 7744500 }, { "epoch": 22.42, "learning_rate": 3.879476050157466e-05, "loss": 2.1784, "step": 7745000 }, { "epoch": 22.42, "learning_rate": 3.879403830122268e-05, "loss": 2.1722, "step": 7745500 }, { "epoch": 22.42, "learning_rate": 3.87933161008707e-05, "loss": 2.158, "step": 7746000 }, { "epoch": 22.42, "learning_rate": 3.879259245322342e-05, "loss": 2.1522, "step": 7746500 }, { "epoch": 22.42, "learning_rate": 3.879186880557614e-05, "loss": 2.166, "step": 7747000 }, { "epoch": 22.43, "learning_rate": 3.879114660522416e-05, "loss": 2.1847, "step": 7747500 }, { "epoch": 22.43, "learning_rate": 3.879042295757688e-05, "loss": 2.1777, "step": 7748000 }, { "epoch": 22.43, "learning_rate": 3.878969930992961e-05, "loss": 2.18, "step": 7748500 }, { "epoch": 22.43, "learning_rate": 3.878897566228233e-05, "loss": 2.1532, "step": 7749000 }, { "epoch": 22.43, "learning_rate": 3.878825346193035e-05, "loss": 2.1702, "step": 7749500 }, { "epoch": 22.43, "learning_rate": 3.878752981428307e-05, "loss": 2.1611, "step": 7750000 }, { "epoch": 22.43, "learning_rate": 3.878680616663579e-05, "loss": 2.1769, "step": 7750500 }, { "epoch": 22.44, "learning_rate": 3.8786082518988514e-05, "loss": 2.157, "step": 7751000 }, { "epoch": 22.44, "learning_rate": 3.878536031863653e-05, "loss": 2.1711, "step": 7751500 }, { "epoch": 22.44, "learning_rate": 3.878463667098926e-05, "loss": 2.1618, "step": 7752000 }, { "epoch": 22.44, "learning_rate": 3.878391302334198e-05, "loss": 2.1628, "step": 7752500 }, { "epoch": 22.44, "learning_rate": 3.878318937569471e-05, "loss": 2.1816, "step": 7753000 }, { "epoch": 22.44, "learning_rate": 3.878246572804743e-05, "loss": 2.1909, "step": 7753500 }, { "epoch": 22.44, "learning_rate": 3.8781742080400154e-05, "loss": 2.1519, "step": 7754000 }, { "epoch": 22.45, "learning_rate": 3.8781018432752876e-05, "loss": 2.1684, "step": 7754500 }, { "epoch": 22.45, "learning_rate": 3.87802947851056e-05, "loss": 2.1574, "step": 7755000 }, { "epoch": 22.45, "learning_rate": 3.877957113745832e-05, "loss": 2.1792, "step": 7755500 }, { "epoch": 22.45, "learning_rate": 3.8778848937106336e-05, "loss": 2.1617, "step": 7756000 }, { "epoch": 22.45, "learning_rate": 3.877812673675436e-05, "loss": 2.1458, "step": 7756500 }, { "epoch": 22.45, "learning_rate": 3.877740308910708e-05, "loss": 2.1746, "step": 7757000 }, { "epoch": 22.45, "learning_rate": 3.87766794414598e-05, "loss": 2.1571, "step": 7757500 }, { "epoch": 22.46, "learning_rate": 3.8775955793812525e-05, "loss": 2.1687, "step": 7758000 }, { "epoch": 22.46, "learning_rate": 3.877523214616525e-05, "loss": 2.1448, "step": 7758500 }, { "epoch": 22.46, "learning_rate": 3.877450849851797e-05, "loss": 2.1768, "step": 7759000 }, { "epoch": 22.46, "learning_rate": 3.877378485087069e-05, "loss": 2.18, "step": 7759500 }, { "epoch": 22.46, "learning_rate": 3.877306265051871e-05, "loss": 2.1709, "step": 7760000 }, { "epoch": 22.46, "learning_rate": 3.8772339002871436e-05, "loss": 2.1592, "step": 7760500 }, { "epoch": 22.46, "learning_rate": 3.877161535522416e-05, "loss": 2.1725, "step": 7761000 }, { "epoch": 22.47, "learning_rate": 3.877089170757688e-05, "loss": 2.1799, "step": 7761500 }, { "epoch": 22.47, "learning_rate": 3.877016805992961e-05, "loss": 2.167, "step": 7762000 }, { "epoch": 22.47, "learning_rate": 3.8769445859577625e-05, "loss": 2.168, "step": 7762500 }, { "epoch": 22.47, "learning_rate": 3.876872221193035e-05, "loss": 2.1792, "step": 7763000 }, { "epoch": 22.47, "learning_rate": 3.876799856428307e-05, "loss": 2.1752, "step": 7763500 }, { "epoch": 22.47, "learning_rate": 3.876727491663579e-05, "loss": 2.1552, "step": 7764000 }, { "epoch": 22.48, "learning_rate": 3.8766551268988514e-05, "loss": 2.1467, "step": 7764500 }, { "epoch": 22.48, "learning_rate": 3.876582906863653e-05, "loss": 2.1835, "step": 7765000 }, { "epoch": 22.48, "learning_rate": 3.876510542098926e-05, "loss": 2.1845, "step": 7765500 }, { "epoch": 22.48, "learning_rate": 3.876438177334198e-05, "loss": 2.1669, "step": 7766000 }, { "epoch": 22.48, "learning_rate": 3.8763658125694703e-05, "loss": 2.1936, "step": 7766500 }, { "epoch": 22.48, "learning_rate": 3.876293592534272e-05, "loss": 2.1729, "step": 7767000 }, { "epoch": 22.48, "learning_rate": 3.876221227769544e-05, "loss": 2.1918, "step": 7767500 }, { "epoch": 22.49, "learning_rate": 3.876148863004817e-05, "loss": 2.1522, "step": 7768000 }, { "epoch": 22.49, "learning_rate": 3.876076498240089e-05, "loss": 2.175, "step": 7768500 }, { "epoch": 22.49, "learning_rate": 3.8760041334753615e-05, "loss": 2.1644, "step": 7769000 }, { "epoch": 22.49, "learning_rate": 3.875931913440164e-05, "loss": 2.1704, "step": 7769500 }, { "epoch": 22.49, "learning_rate": 3.875859548675436e-05, "loss": 2.1685, "step": 7770000 }, { "epoch": 22.49, "learning_rate": 3.875787183910708e-05, "loss": 2.1617, "step": 7770500 }, { "epoch": 22.49, "learning_rate": 3.8757148191459804e-05, "loss": 2.1854, "step": 7771000 }, { "epoch": 22.5, "learning_rate": 3.8756424543812526e-05, "loss": 2.1846, "step": 7771500 }, { "epoch": 22.5, "learning_rate": 3.875570089616525e-05, "loss": 2.1777, "step": 7772000 }, { "epoch": 22.5, "learning_rate": 3.875497724851797e-05, "loss": 2.1667, "step": 7772500 }, { "epoch": 22.5, "learning_rate": 3.8754256495461286e-05, "loss": 2.1533, "step": 7773000 }, { "epoch": 22.5, "learning_rate": 3.875353284781401e-05, "loss": 2.1757, "step": 7773500 }, { "epoch": 22.5, "learning_rate": 3.875280920016673e-05, "loss": 2.1791, "step": 7774000 }, { "epoch": 22.5, "learning_rate": 3.875208555251945e-05, "loss": 2.1627, "step": 7774500 }, { "epoch": 22.51, "learning_rate": 3.8751361904872175e-05, "loss": 2.1564, "step": 7775000 }, { "epoch": 22.51, "learning_rate": 3.8750638257224904e-05, "loss": 2.2073, "step": 7775500 }, { "epoch": 22.51, "learning_rate": 3.8749914609577626e-05, "loss": 2.1602, "step": 7776000 }, { "epoch": 22.51, "learning_rate": 3.874919240922564e-05, "loss": 2.2062, "step": 7776500 }, { "epoch": 22.51, "learning_rate": 3.8748468761578364e-05, "loss": 2.1559, "step": 7777000 }, { "epoch": 22.51, "learning_rate": 3.8747745113931086e-05, "loss": 2.162, "step": 7777500 }, { "epoch": 22.51, "learning_rate": 3.874702146628381e-05, "loss": 2.1691, "step": 7778000 }, { "epoch": 22.52, "learning_rate": 3.874629781863654e-05, "loss": 2.1844, "step": 7778500 }, { "epoch": 22.52, "learning_rate": 3.874557417098926e-05, "loss": 2.1498, "step": 7779000 }, { "epoch": 22.52, "learning_rate": 3.874485052334198e-05, "loss": 2.1582, "step": 7779500 }, { "epoch": 22.52, "learning_rate": 3.8744126875694704e-05, "loss": 2.188, "step": 7780000 }, { "epoch": 22.52, "learning_rate": 3.8743403228047426e-05, "loss": 2.176, "step": 7780500 }, { "epoch": 22.52, "learning_rate": 3.874267958040015e-05, "loss": 2.1601, "step": 7781000 }, { "epoch": 22.52, "learning_rate": 3.874195593275287e-05, "loss": 2.1624, "step": 7781500 }, { "epoch": 22.53, "learning_rate": 3.874123228510559e-05, "loss": 2.1615, "step": 7782000 }, { "epoch": 22.53, "learning_rate": 3.8740508637458315e-05, "loss": 2.1485, "step": 7782500 }, { "epoch": 22.53, "learning_rate": 3.873978643710634e-05, "loss": 2.1623, "step": 7783000 }, { "epoch": 22.53, "learning_rate": 3.873906278945906e-05, "loss": 2.1976, "step": 7783500 }, { "epoch": 22.53, "learning_rate": 3.873834058910708e-05, "loss": 2.1814, "step": 7784000 }, { "epoch": 22.53, "learning_rate": 3.8737616941459804e-05, "loss": 2.1771, "step": 7784500 }, { "epoch": 22.53, "learning_rate": 3.873689329381253e-05, "loss": 2.1463, "step": 7785000 }, { "epoch": 22.54, "learning_rate": 3.873616964616525e-05, "loss": 2.1693, "step": 7785500 }, { "epoch": 22.54, "learning_rate": 3.873544599851797e-05, "loss": 2.189, "step": 7786000 }, { "epoch": 22.54, "learning_rate": 3.873472379816599e-05, "loss": 2.1608, "step": 7786500 }, { "epoch": 22.54, "learning_rate": 3.873400015051871e-05, "loss": 2.167, "step": 7787000 }, { "epoch": 22.54, "learning_rate": 3.873327650287144e-05, "loss": 2.1885, "step": 7787500 }, { "epoch": 22.54, "learning_rate": 3.873255285522416e-05, "loss": 2.1841, "step": 7788000 }, { "epoch": 22.54, "learning_rate": 3.873182920757688e-05, "loss": 2.1374, "step": 7788500 }, { "epoch": 22.55, "learning_rate": 3.8731105559929605e-05, "loss": 2.1823, "step": 7789000 }, { "epoch": 22.55, "learning_rate": 3.873038191228233e-05, "loss": 2.186, "step": 7789500 }, { "epoch": 22.55, "learning_rate": 3.872965826463505e-05, "loss": 2.1766, "step": 7790000 }, { "epoch": 22.55, "learning_rate": 3.872893461698778e-05, "loss": 2.1684, "step": 7790500 }, { "epoch": 22.55, "learning_rate": 3.8728212416635794e-05, "loss": 2.1664, "step": 7791000 }, { "epoch": 22.55, "learning_rate": 3.8727488768988516e-05, "loss": 2.1762, "step": 7791500 }, { "epoch": 22.55, "learning_rate": 3.872676512134124e-05, "loss": 2.1775, "step": 7792000 }, { "epoch": 22.56, "learning_rate": 3.872604292098926e-05, "loss": 2.1771, "step": 7792500 }, { "epoch": 22.56, "learning_rate": 3.872531927334198e-05, "loss": 2.1838, "step": 7793000 }, { "epoch": 22.56, "learning_rate": 3.8724595625694705e-05, "loss": 2.1716, "step": 7793500 }, { "epoch": 22.56, "learning_rate": 3.872387342534272e-05, "loss": 2.1574, "step": 7794000 }, { "epoch": 22.56, "learning_rate": 3.872314977769544e-05, "loss": 2.1901, "step": 7794500 }, { "epoch": 22.56, "learning_rate": 3.8722426130048165e-05, "loss": 2.1842, "step": 7795000 }, { "epoch": 22.56, "learning_rate": 3.872170248240089e-05, "loss": 2.1366, "step": 7795500 }, { "epoch": 22.57, "learning_rate": 3.8720978834753616e-05, "loss": 2.1581, "step": 7796000 }, { "epoch": 22.57, "learning_rate": 3.872025663440163e-05, "loss": 2.1783, "step": 7796500 }, { "epoch": 22.57, "learning_rate": 3.8719532986754354e-05, "loss": 2.1952, "step": 7797000 }, { "epoch": 22.57, "learning_rate": 3.871881078640237e-05, "loss": 2.1839, "step": 7797500 }, { "epoch": 22.57, "learning_rate": 3.871808713875509e-05, "loss": 2.1815, "step": 7798000 }, { "epoch": 22.57, "learning_rate": 3.871736349110782e-05, "loss": 2.1613, "step": 7798500 }, { "epoch": 22.57, "learning_rate": 3.871663984346054e-05, "loss": 2.1996, "step": 7799000 }, { "epoch": 22.58, "learning_rate": 3.8715916195813265e-05, "loss": 2.1669, "step": 7799500 }, { "epoch": 22.58, "learning_rate": 3.871519254816599e-05, "loss": 2.2006, "step": 7800000 }, { "epoch": 22.58, "learning_rate": 3.8714468900518717e-05, "loss": 2.152, "step": 7800500 }, { "epoch": 22.58, "learning_rate": 3.871374525287144e-05, "loss": 2.1787, "step": 7801000 }, { "epoch": 22.58, "learning_rate": 3.871302160522416e-05, "loss": 2.1958, "step": 7801500 }, { "epoch": 22.58, "learning_rate": 3.871229795757688e-05, "loss": 2.1455, "step": 7802000 }, { "epoch": 22.59, "learning_rate": 3.8711574309929605e-05, "loss": 2.1723, "step": 7802500 }, { "epoch": 22.59, "learning_rate": 3.871085066228233e-05, "loss": 2.1608, "step": 7803000 }, { "epoch": 22.59, "learning_rate": 3.871012701463505e-05, "loss": 2.1673, "step": 7803500 }, { "epoch": 22.59, "learning_rate": 3.8709404814283066e-05, "loss": 2.141, "step": 7804000 }, { "epoch": 22.59, "learning_rate": 3.870868116663579e-05, "loss": 2.1554, "step": 7804500 }, { "epoch": 22.59, "learning_rate": 3.870795751898852e-05, "loss": 2.1863, "step": 7805000 }, { "epoch": 22.59, "learning_rate": 3.870723387134124e-05, "loss": 2.1736, "step": 7805500 }, { "epoch": 22.6, "learning_rate": 3.870651022369397e-05, "loss": 2.1445, "step": 7806000 }, { "epoch": 22.6, "learning_rate": 3.8705788023341984e-05, "loss": 2.1581, "step": 7806500 }, { "epoch": 22.6, "learning_rate": 3.8705064375694706e-05, "loss": 2.1994, "step": 7807000 }, { "epoch": 22.6, "learning_rate": 3.870434072804743e-05, "loss": 2.1474, "step": 7807500 }, { "epoch": 22.6, "learning_rate": 3.870361708040015e-05, "loss": 2.1766, "step": 7808000 }, { "epoch": 22.6, "learning_rate": 3.870289343275287e-05, "loss": 2.1509, "step": 7808500 }, { "epoch": 22.6, "learning_rate": 3.8702169785105595e-05, "loss": 2.1494, "step": 7809000 }, { "epoch": 22.61, "learning_rate": 3.870144613745832e-05, "loss": 2.181, "step": 7809500 }, { "epoch": 22.61, "learning_rate": 3.870072248981104e-05, "loss": 2.1766, "step": 7810000 }, { "epoch": 22.61, "learning_rate": 3.869999884216377e-05, "loss": 2.1602, "step": 7810500 }, { "epoch": 22.61, "learning_rate": 3.869927519451649e-05, "loss": 2.1538, "step": 7811000 }, { "epoch": 22.61, "learning_rate": 3.869855154686921e-05, "loss": 2.1746, "step": 7811500 }, { "epoch": 22.61, "learning_rate": 3.8697833688403115e-05, "loss": 2.1944, "step": 7812000 }, { "epoch": 22.61, "learning_rate": 3.869711004075584e-05, "loss": 2.1946, "step": 7812500 }, { "epoch": 22.62, "learning_rate": 3.869638639310856e-05, "loss": 2.1922, "step": 7813000 }, { "epoch": 22.62, "learning_rate": 3.869566274546129e-05, "loss": 2.1595, "step": 7813500 }, { "epoch": 22.62, "learning_rate": 3.869493909781401e-05, "loss": 2.1679, "step": 7814000 }, { "epoch": 22.62, "learning_rate": 3.8694216897462026e-05, "loss": 2.201, "step": 7814500 }, { "epoch": 22.62, "learning_rate": 3.869349324981475e-05, "loss": 2.1789, "step": 7815000 }, { "epoch": 22.62, "learning_rate": 3.869276960216747e-05, "loss": 2.1825, "step": 7815500 }, { "epoch": 22.62, "learning_rate": 3.869204595452019e-05, "loss": 2.17, "step": 7816000 }, { "epoch": 22.63, "learning_rate": 3.8691322306872915e-05, "loss": 2.1745, "step": 7816500 }, { "epoch": 22.63, "learning_rate": 3.8690598659225644e-05, "loss": 2.1779, "step": 7817000 }, { "epoch": 22.63, "learning_rate": 3.8689875011578366e-05, "loss": 2.1683, "step": 7817500 }, { "epoch": 22.63, "learning_rate": 3.868915136393109e-05, "loss": 2.1643, "step": 7818000 }, { "epoch": 22.63, "learning_rate": 3.868842771628381e-05, "loss": 2.1589, "step": 7818500 }, { "epoch": 22.63, "learning_rate": 3.868770406863653e-05, "loss": 2.1922, "step": 7819000 }, { "epoch": 22.63, "learning_rate": 3.8686980420989255e-05, "loss": 2.1725, "step": 7819500 }, { "epoch": 22.64, "learning_rate": 3.868625677334198e-05, "loss": 2.1735, "step": 7820000 }, { "epoch": 22.64, "learning_rate": 3.8685533125694707e-05, "loss": 2.1706, "step": 7820500 }, { "epoch": 22.64, "learning_rate": 3.868480947804743e-05, "loss": 2.1985, "step": 7821000 }, { "epoch": 22.64, "learning_rate": 3.8684087277695444e-05, "loss": 2.1585, "step": 7821500 }, { "epoch": 22.64, "learning_rate": 3.8683363630048167e-05, "loss": 2.1734, "step": 7822000 }, { "epoch": 22.64, "learning_rate": 3.868264142969619e-05, "loss": 2.179, "step": 7822500 }, { "epoch": 22.64, "learning_rate": 3.868191778204891e-05, "loss": 2.184, "step": 7823000 }, { "epoch": 22.65, "learning_rate": 3.8681194134401633e-05, "loss": 2.1694, "step": 7823500 }, { "epoch": 22.65, "learning_rate": 3.8680470486754356e-05, "loss": 2.1732, "step": 7824000 }, { "epoch": 22.65, "learning_rate": 3.867974683910708e-05, "loss": 2.1599, "step": 7824500 }, { "epoch": 22.65, "learning_rate": 3.86790231914598e-05, "loss": 2.1651, "step": 7825000 }, { "epoch": 22.65, "learning_rate": 3.867829954381252e-05, "loss": 2.1613, "step": 7825500 }, { "epoch": 22.65, "learning_rate": 3.867757879075584e-05, "loss": 2.1681, "step": 7826000 }, { "epoch": 22.65, "learning_rate": 3.867685514310856e-05, "loss": 2.1918, "step": 7826500 }, { "epoch": 22.66, "learning_rate": 3.867613149546128e-05, "loss": 2.1695, "step": 7827000 }, { "epoch": 22.66, "learning_rate": 3.8675407847814005e-05, "loss": 2.1918, "step": 7827500 }, { "epoch": 22.66, "learning_rate": 3.867468420016673e-05, "loss": 2.1921, "step": 7828000 }, { "epoch": 22.66, "learning_rate": 3.8673960552519456e-05, "loss": 2.18, "step": 7828500 }, { "epoch": 22.66, "learning_rate": 3.867323690487218e-05, "loss": 2.1813, "step": 7829000 }, { "epoch": 22.66, "learning_rate": 3.8672514704520194e-05, "loss": 2.1916, "step": 7829500 }, { "epoch": 22.66, "learning_rate": 3.8671791056872916e-05, "loss": 2.1613, "step": 7830000 }, { "epoch": 22.67, "learning_rate": 3.8671067409225645e-05, "loss": 2.1668, "step": 7830500 }, { "epoch": 22.67, "learning_rate": 3.867034376157837e-05, "loss": 2.185, "step": 7831000 }, { "epoch": 22.67, "learning_rate": 3.866962011393109e-05, "loss": 2.1922, "step": 7831500 }, { "epoch": 22.67, "learning_rate": 3.866889646628381e-05, "loss": 2.1818, "step": 7832000 }, { "epoch": 22.67, "learning_rate": 3.8668172818636534e-05, "loss": 2.1637, "step": 7832500 }, { "epoch": 22.67, "learning_rate": 3.8667449170989256e-05, "loss": 2.1558, "step": 7833000 }, { "epoch": 22.67, "learning_rate": 3.866672552334198e-05, "loss": 2.1697, "step": 7833500 }, { "epoch": 22.68, "learning_rate": 3.86660018756947e-05, "loss": 2.155, "step": 7834000 }, { "epoch": 22.68, "learning_rate": 3.866527822804742e-05, "loss": 2.2049, "step": 7834500 }, { "epoch": 22.68, "learning_rate": 3.8664554580400145e-05, "loss": 2.1896, "step": 7835000 }, { "epoch": 22.68, "learning_rate": 3.8663830932752874e-05, "loss": 2.1517, "step": 7835500 }, { "epoch": 22.68, "learning_rate": 3.8663107285105596e-05, "loss": 2.1678, "step": 7836000 }, { "epoch": 22.68, "learning_rate": 3.866238508475362e-05, "loss": 2.1557, "step": 7836500 }, { "epoch": 22.68, "learning_rate": 3.8661662884401634e-05, "loss": 2.1618, "step": 7837000 }, { "epoch": 22.69, "learning_rate": 3.8660939236754356e-05, "loss": 2.1806, "step": 7837500 }, { "epoch": 22.69, "learning_rate": 3.866021558910708e-05, "loss": 2.1697, "step": 7838000 }, { "epoch": 22.69, "learning_rate": 3.86594919414598e-05, "loss": 2.1788, "step": 7838500 }, { "epoch": 22.69, "learning_rate": 3.865876829381252e-05, "loss": 2.1632, "step": 7839000 }, { "epoch": 22.69, "learning_rate": 3.8658044646165245e-05, "loss": 2.1906, "step": 7839500 }, { "epoch": 22.69, "learning_rate": 3.865732099851797e-05, "loss": 2.1871, "step": 7840000 }, { "epoch": 22.7, "learning_rate": 3.8656597350870697e-05, "loss": 2.1827, "step": 7840500 }, { "epoch": 22.7, "learning_rate": 3.865587370322342e-05, "loss": 2.1924, "step": 7841000 }, { "epoch": 22.7, "learning_rate": 3.865515005557614e-05, "loss": 2.1582, "step": 7841500 }, { "epoch": 22.7, "learning_rate": 3.865442640792886e-05, "loss": 2.1586, "step": 7842000 }, { "epoch": 22.7, "learning_rate": 3.865370420757688e-05, "loss": 2.1745, "step": 7842500 }, { "epoch": 22.7, "learning_rate": 3.865298055992961e-05, "loss": 2.1928, "step": 7843000 }, { "epoch": 22.7, "learning_rate": 3.865225691228233e-05, "loss": 2.1668, "step": 7843500 }, { "epoch": 22.71, "learning_rate": 3.865153326463505e-05, "loss": 2.1708, "step": 7844000 }, { "epoch": 22.71, "learning_rate": 3.8650809616987775e-05, "loss": 2.155, "step": 7844500 }, { "epoch": 22.71, "learning_rate": 3.86500874166358e-05, "loss": 2.1662, "step": 7845000 }, { "epoch": 22.71, "learning_rate": 3.864936376898852e-05, "loss": 2.1554, "step": 7845500 }, { "epoch": 22.71, "learning_rate": 3.864864012134124e-05, "loss": 2.1818, "step": 7846000 }, { "epoch": 22.71, "learning_rate": 3.8647916473693964e-05, "loss": 2.1443, "step": 7846500 }, { "epoch": 22.71, "learning_rate": 3.8647192826046686e-05, "loss": 2.1567, "step": 7847000 }, { "epoch": 22.72, "learning_rate": 3.864646917839941e-05, "loss": 2.1653, "step": 7847500 }, { "epoch": 22.72, "learning_rate": 3.8645746978047424e-05, "loss": 2.1748, "step": 7848000 }, { "epoch": 22.72, "learning_rate": 3.8645023330400146e-05, "loss": 2.1683, "step": 7848500 }, { "epoch": 22.72, "learning_rate": 3.864429968275287e-05, "loss": 2.1805, "step": 7849000 }, { "epoch": 22.72, "learning_rate": 3.86435760351056e-05, "loss": 2.1885, "step": 7849500 }, { "epoch": 22.72, "learning_rate": 3.864285238745832e-05, "loss": 2.1443, "step": 7850000 }, { "epoch": 22.72, "learning_rate": 3.864212873981105e-05, "loss": 2.171, "step": 7850500 }, { "epoch": 22.73, "learning_rate": 3.864140509216377e-05, "loss": 2.1731, "step": 7851000 }, { "epoch": 22.73, "learning_rate": 3.864068144451649e-05, "loss": 2.1554, "step": 7851500 }, { "epoch": 22.73, "learning_rate": 3.8639957796869215e-05, "loss": 2.16, "step": 7852000 }, { "epoch": 22.73, "learning_rate": 3.863923559651723e-05, "loss": 2.1844, "step": 7852500 }, { "epoch": 22.73, "learning_rate": 3.863851194886995e-05, "loss": 2.1833, "step": 7853000 }, { "epoch": 22.73, "learning_rate": 3.8637788301222675e-05, "loss": 2.1734, "step": 7853500 }, { "epoch": 22.73, "learning_rate": 3.86370646535754e-05, "loss": 2.1769, "step": 7854000 }, { "epoch": 22.74, "learning_rate": 3.863634390051871e-05, "loss": 2.1706, "step": 7854500 }, { "epoch": 22.74, "learning_rate": 3.8635620252871435e-05, "loss": 2.1889, "step": 7855000 }, { "epoch": 22.74, "learning_rate": 3.863489660522416e-05, "loss": 2.1743, "step": 7855500 }, { "epoch": 22.74, "learning_rate": 3.863417295757688e-05, "loss": 2.1509, "step": 7856000 }, { "epoch": 22.74, "learning_rate": 3.86334493099296e-05, "loss": 2.1788, "step": 7856500 }, { "epoch": 22.74, "learning_rate": 3.8632725662282324e-05, "loss": 2.1895, "step": 7857000 }, { "epoch": 22.74, "learning_rate": 3.8632002014635046e-05, "loss": 2.1767, "step": 7857500 }, { "epoch": 22.75, "learning_rate": 3.8631278366987775e-05, "loss": 2.1783, "step": 7858000 }, { "epoch": 22.75, "learning_rate": 3.86305561666358e-05, "loss": 2.167, "step": 7858500 }, { "epoch": 22.75, "learning_rate": 3.862983251898852e-05, "loss": 2.1495, "step": 7859000 }, { "epoch": 22.75, "learning_rate": 3.862910887134124e-05, "loss": 2.1717, "step": 7859500 }, { "epoch": 22.75, "learning_rate": 3.862838667098926e-05, "loss": 2.177, "step": 7860000 }, { "epoch": 22.75, "learning_rate": 3.862766302334198e-05, "loss": 2.1671, "step": 7860500 }, { "epoch": 22.75, "learning_rate": 3.86269393756947e-05, "loss": 2.1805, "step": 7861000 }, { "epoch": 22.76, "learning_rate": 3.8626215728047424e-05, "loss": 2.171, "step": 7861500 }, { "epoch": 22.76, "learning_rate": 3.862549208040015e-05, "loss": 2.1875, "step": 7862000 }, { "epoch": 22.76, "learning_rate": 3.862476988004817e-05, "loss": 2.1924, "step": 7862500 }, { "epoch": 22.76, "learning_rate": 3.862404623240089e-05, "loss": 2.1711, "step": 7863000 }, { "epoch": 22.76, "learning_rate": 3.8623322584753613e-05, "loss": 2.172, "step": 7863500 }, { "epoch": 22.76, "learning_rate": 3.8622598937106336e-05, "loss": 2.1415, "step": 7864000 }, { "epoch": 22.76, "learning_rate": 3.862187528945906e-05, "loss": 2.1526, "step": 7864500 }, { "epoch": 22.77, "learning_rate": 3.862115164181178e-05, "loss": 2.1722, "step": 7865000 }, { "epoch": 22.77, "learning_rate": 3.862042799416451e-05, "loss": 2.174, "step": 7865500 }, { "epoch": 22.77, "learning_rate": 3.861970434651723e-05, "loss": 2.1651, "step": 7866000 }, { "epoch": 22.77, "learning_rate": 3.8618980698869954e-05, "loss": 2.1486, "step": 7866500 }, { "epoch": 22.77, "learning_rate": 3.8618257051222676e-05, "loss": 2.1296, "step": 7867000 }, { "epoch": 22.77, "learning_rate": 3.86175334035754e-05, "loss": 2.1898, "step": 7867500 }, { "epoch": 22.77, "learning_rate": 3.861680975592813e-05, "loss": 2.1796, "step": 7868000 }, { "epoch": 22.78, "learning_rate": 3.861608755557614e-05, "loss": 2.1765, "step": 7868500 }, { "epoch": 22.78, "learning_rate": 3.8615363907928865e-05, "loss": 2.1769, "step": 7869000 }, { "epoch": 22.78, "learning_rate": 3.861464026028159e-05, "loss": 2.1778, "step": 7869500 }, { "epoch": 22.78, "learning_rate": 3.861391661263431e-05, "loss": 2.1837, "step": 7870000 }, { "epoch": 22.78, "learning_rate": 3.861319296498703e-05, "loss": 2.1686, "step": 7870500 }, { "epoch": 22.78, "learning_rate": 3.8612469317339754e-05, "loss": 2.1737, "step": 7871000 }, { "epoch": 22.78, "learning_rate": 3.8611745669692476e-05, "loss": 2.1786, "step": 7871500 }, { "epoch": 22.79, "learning_rate": 3.86110220220452e-05, "loss": 2.1693, "step": 7872000 }, { "epoch": 22.79, "learning_rate": 3.861029982169322e-05, "loss": 2.1827, "step": 7872500 }, { "epoch": 22.79, "learning_rate": 3.860957617404595e-05, "loss": 2.165, "step": 7873000 }, { "epoch": 22.79, "learning_rate": 3.860885252639867e-05, "loss": 2.1688, "step": 7873500 }, { "epoch": 22.79, "learning_rate": 3.860813032604669e-05, "loss": 2.1755, "step": 7874000 }, { "epoch": 22.79, "learning_rate": 3.860740667839941e-05, "loss": 2.1641, "step": 7874500 }, { "epoch": 22.79, "learning_rate": 3.860668303075213e-05, "loss": 2.1942, "step": 7875000 }, { "epoch": 22.8, "learning_rate": 3.8605959383104854e-05, "loss": 2.1957, "step": 7875500 }, { "epoch": 22.8, "learning_rate": 3.8605235735457576e-05, "loss": 2.1693, "step": 7876000 }, { "epoch": 22.8, "learning_rate": 3.86045135351056e-05, "loss": 2.1746, "step": 7876500 }, { "epoch": 22.8, "learning_rate": 3.8603791334753614e-05, "loss": 2.147, "step": 7877000 }, { "epoch": 22.8, "learning_rate": 3.8603067687106336e-05, "loss": 2.1774, "step": 7877500 }, { "epoch": 22.8, "learning_rate": 3.860234403945906e-05, "loss": 2.175, "step": 7878000 }, { "epoch": 22.81, "learning_rate": 3.860162039181178e-05, "loss": 2.1627, "step": 7878500 }, { "epoch": 22.81, "learning_rate": 3.86008967441645e-05, "loss": 2.1579, "step": 7879000 }, { "epoch": 22.81, "learning_rate": 3.8600174543812526e-05, "loss": 2.1922, "step": 7879500 }, { "epoch": 22.81, "learning_rate": 3.859945089616525e-05, "loss": 2.1699, "step": 7880000 }, { "epoch": 22.81, "learning_rate": 3.859872724851798e-05, "loss": 2.1505, "step": 7880500 }, { "epoch": 22.81, "learning_rate": 3.85980036008707e-05, "loss": 2.1709, "step": 7881000 }, { "epoch": 22.81, "learning_rate": 3.859727995322342e-05, "loss": 2.1527, "step": 7881500 }, { "epoch": 22.82, "learning_rate": 3.8596556305576143e-05, "loss": 2.1695, "step": 7882000 }, { "epoch": 22.82, "learning_rate": 3.8595832657928866e-05, "loss": 2.1839, "step": 7882500 }, { "epoch": 22.82, "learning_rate": 3.859510901028159e-05, "loss": 2.1994, "step": 7883000 }, { "epoch": 22.82, "learning_rate": 3.859438536263431e-05, "loss": 2.1735, "step": 7883500 }, { "epoch": 22.82, "learning_rate": 3.8593664609577626e-05, "loss": 2.1946, "step": 7884000 }, { "epoch": 22.82, "learning_rate": 3.859294096193035e-05, "loss": 2.1678, "step": 7884500 }, { "epoch": 22.82, "learning_rate": 3.859221731428307e-05, "loss": 2.1744, "step": 7885000 }, { "epoch": 22.83, "learning_rate": 3.8591495113931086e-05, "loss": 2.1577, "step": 7885500 }, { "epoch": 22.83, "learning_rate": 3.859077146628381e-05, "loss": 2.1584, "step": 7886000 }, { "epoch": 22.83, "learning_rate": 3.859004781863653e-05, "loss": 2.1772, "step": 7886500 }, { "epoch": 22.83, "learning_rate": 3.858932417098925e-05, "loss": 2.1732, "step": 7887000 }, { "epoch": 22.83, "learning_rate": 3.8588600523341975e-05, "loss": 2.1525, "step": 7887500 }, { "epoch": 22.83, "learning_rate": 3.8587876875694704e-05, "loss": 2.1585, "step": 7888000 }, { "epoch": 22.83, "learning_rate": 3.8587153228047426e-05, "loss": 2.1552, "step": 7888500 }, { "epoch": 22.84, "learning_rate": 3.8586429580400155e-05, "loss": 2.1765, "step": 7889000 }, { "epoch": 22.84, "learning_rate": 3.858570593275288e-05, "loss": 2.1544, "step": 7889500 }, { "epoch": 22.84, "learning_rate": 3.85849822851056e-05, "loss": 2.1748, "step": 7890000 }, { "epoch": 22.84, "learning_rate": 3.858425863745832e-05, "loss": 2.1783, "step": 7890500 }, { "epoch": 22.84, "learning_rate": 3.8583534989811044e-05, "loss": 2.1664, "step": 7891000 }, { "epoch": 22.84, "learning_rate": 3.858281278945906e-05, "loss": 2.1422, "step": 7891500 }, { "epoch": 22.84, "learning_rate": 3.858208914181178e-05, "loss": 2.1638, "step": 7892000 }, { "epoch": 22.85, "learning_rate": 3.8581366941459804e-05, "loss": 2.1768, "step": 7892500 }, { "epoch": 22.85, "learning_rate": 3.8580643293812526e-05, "loss": 2.1856, "step": 7893000 }, { "epoch": 22.85, "learning_rate": 3.857991964616525e-05, "loss": 2.1817, "step": 7893500 }, { "epoch": 22.85, "learning_rate": 3.857919599851797e-05, "loss": 2.188, "step": 7894000 }, { "epoch": 22.85, "learning_rate": 3.857847235087069e-05, "loss": 2.1709, "step": 7894500 }, { "epoch": 22.85, "learning_rate": 3.8577748703223415e-05, "loss": 2.1758, "step": 7895000 }, { "epoch": 22.85, "learning_rate": 3.8577025055576144e-05, "loss": 2.1729, "step": 7895500 }, { "epoch": 22.86, "learning_rate": 3.8576301407928866e-05, "loss": 2.1412, "step": 7896000 }, { "epoch": 22.86, "learning_rate": 3.857557776028159e-05, "loss": 2.1444, "step": 7896500 }, { "epoch": 22.86, "learning_rate": 3.857485411263431e-05, "loss": 2.1683, "step": 7897000 }, { "epoch": 22.86, "learning_rate": 3.8574131912282327e-05, "loss": 2.1503, "step": 7897500 }, { "epoch": 22.86, "learning_rate": 3.8573408264635056e-05, "loss": 2.1566, "step": 7898000 }, { "epoch": 22.86, "learning_rate": 3.857268461698778e-05, "loss": 2.2027, "step": 7898500 }, { "epoch": 22.86, "learning_rate": 3.85719609693405e-05, "loss": 2.1658, "step": 7899000 }, { "epoch": 22.87, "learning_rate": 3.8571238768988516e-05, "loss": 2.1976, "step": 7899500 }, { "epoch": 22.87, "learning_rate": 3.857051512134124e-05, "loss": 2.1542, "step": 7900000 }, { "epoch": 22.87, "learning_rate": 3.856979147369396e-05, "loss": 2.1515, "step": 7900500 }, { "epoch": 22.87, "learning_rate": 3.856906782604668e-05, "loss": 2.1669, "step": 7901000 }, { "epoch": 22.87, "learning_rate": 3.8568345625694705e-05, "loss": 2.196, "step": 7901500 }, { "epoch": 22.87, "learning_rate": 3.856762197804743e-05, "loss": 2.1748, "step": 7902000 }, { "epoch": 22.87, "learning_rate": 3.856689833040015e-05, "loss": 2.1874, "step": 7902500 }, { "epoch": 22.88, "learning_rate": 3.856617613004817e-05, "loss": 2.1558, "step": 7903000 }, { "epoch": 22.88, "learning_rate": 3.8565452482400894e-05, "loss": 2.166, "step": 7903500 }, { "epoch": 22.88, "learning_rate": 3.8564728834753616e-05, "loss": 2.1717, "step": 7904000 }, { "epoch": 22.88, "learning_rate": 3.856400518710634e-05, "loss": 2.1853, "step": 7904500 }, { "epoch": 22.88, "learning_rate": 3.856328153945906e-05, "loss": 2.1431, "step": 7905000 }, { "epoch": 22.88, "learning_rate": 3.856255789181178e-05, "loss": 2.1779, "step": 7905500 }, { "epoch": 22.88, "learning_rate": 3.8561834244164505e-05, "loss": 2.1609, "step": 7906000 }, { "epoch": 22.89, "learning_rate": 3.856111059651723e-05, "loss": 2.1711, "step": 7906500 }, { "epoch": 22.89, "learning_rate": 3.856038839616525e-05, "loss": 2.1644, "step": 7907000 }, { "epoch": 22.89, "learning_rate": 3.855966474851797e-05, "loss": 2.1475, "step": 7907500 }, { "epoch": 22.89, "learning_rate": 3.855894254816599e-05, "loss": 2.188, "step": 7908000 }, { "epoch": 22.89, "learning_rate": 3.855821890051871e-05, "loss": 2.1722, "step": 7908500 }, { "epoch": 22.89, "learning_rate": 3.855749525287143e-05, "loss": 2.1819, "step": 7909000 }, { "epoch": 22.89, "learning_rate": 3.8556771605224154e-05, "loss": 2.1751, "step": 7909500 }, { "epoch": 22.9, "learning_rate": 3.855604795757688e-05, "loss": 2.1745, "step": 7910000 }, { "epoch": 22.9, "learning_rate": 3.8555325757224905e-05, "loss": 2.1765, "step": 7910500 }, { "epoch": 22.9, "learning_rate": 3.855460210957763e-05, "loss": 2.173, "step": 7911000 }, { "epoch": 22.9, "learning_rate": 3.855387846193035e-05, "loss": 2.1691, "step": 7911500 }, { "epoch": 22.9, "learning_rate": 3.855315481428307e-05, "loss": 2.1403, "step": 7912000 }, { "epoch": 22.9, "learning_rate": 3.8552431166635794e-05, "loss": 2.1962, "step": 7912500 }, { "epoch": 22.9, "learning_rate": 3.8551707518988516e-05, "loss": 2.1799, "step": 7913000 }, { "epoch": 22.91, "learning_rate": 3.855098387134124e-05, "loss": 2.1699, "step": 7913500 }, { "epoch": 22.91, "learning_rate": 3.855026022369396e-05, "loss": 2.1623, "step": 7914000 }, { "epoch": 22.91, "learning_rate": 3.854953657604668e-05, "loss": 2.1735, "step": 7914500 }, { "epoch": 22.91, "learning_rate": 3.854881582299e-05, "loss": 2.1808, "step": 7915000 }, { "epoch": 22.91, "learning_rate": 3.854809217534272e-05, "loss": 2.1754, "step": 7915500 }, { "epoch": 22.91, "learning_rate": 3.854736852769544e-05, "loss": 2.1588, "step": 7916000 }, { "epoch": 22.92, "learning_rate": 3.854664632734346e-05, "loss": 2.1595, "step": 7916500 }, { "epoch": 22.92, "learning_rate": 3.854592267969618e-05, "loss": 2.156, "step": 7917000 }, { "epoch": 22.92, "learning_rate": 3.85451990320489e-05, "loss": 2.1631, "step": 7917500 }, { "epoch": 22.92, "learning_rate": 3.854447538440163e-05, "loss": 2.1753, "step": 7918000 }, { "epoch": 22.92, "learning_rate": 3.8543751736754354e-05, "loss": 2.2077, "step": 7918500 }, { "epoch": 22.92, "learning_rate": 3.8543028089107083e-05, "loss": 2.1626, "step": 7919000 }, { "epoch": 22.92, "learning_rate": 3.8542304441459806e-05, "loss": 2.1937, "step": 7919500 }, { "epoch": 22.93, "learning_rate": 3.854158224110782e-05, "loss": 2.159, "step": 7920000 }, { "epoch": 22.93, "learning_rate": 3.8540858593460543e-05, "loss": 2.1952, "step": 7920500 }, { "epoch": 22.93, "learning_rate": 3.8540134945813266e-05, "loss": 2.1522, "step": 7921000 }, { "epoch": 22.93, "learning_rate": 3.853941129816599e-05, "loss": 2.1482, "step": 7921500 }, { "epoch": 22.93, "learning_rate": 3.853868765051871e-05, "loss": 2.2005, "step": 7922000 }, { "epoch": 22.93, "learning_rate": 3.853796400287143e-05, "loss": 2.1829, "step": 7922500 }, { "epoch": 22.93, "learning_rate": 3.8537240355224155e-05, "loss": 2.1448, "step": 7923000 }, { "epoch": 22.94, "learning_rate": 3.8536516707576884e-05, "loss": 2.176, "step": 7923500 }, { "epoch": 22.94, "learning_rate": 3.8535793059929606e-05, "loss": 2.1648, "step": 7924000 }, { "epoch": 22.94, "learning_rate": 3.853506941228233e-05, "loss": 2.1883, "step": 7924500 }, { "epoch": 22.94, "learning_rate": 3.853434576463505e-05, "loss": 2.1918, "step": 7925000 }, { "epoch": 22.94, "learning_rate": 3.853362211698778e-05, "loss": 2.1574, "step": 7925500 }, { "epoch": 22.94, "learning_rate": 3.85328984693405e-05, "loss": 2.1816, "step": 7926000 }, { "epoch": 22.94, "learning_rate": 3.8532174821693224e-05, "loss": 2.1578, "step": 7926500 }, { "epoch": 22.95, "learning_rate": 3.8531451174045946e-05, "loss": 2.1797, "step": 7927000 }, { "epoch": 22.95, "learning_rate": 3.853072752639867e-05, "loss": 2.1987, "step": 7927500 }, { "epoch": 22.95, "learning_rate": 3.853000387875139e-05, "loss": 2.1994, "step": 7928000 }, { "epoch": 22.95, "learning_rate": 3.8529281678399406e-05, "loss": 2.1749, "step": 7928500 }, { "epoch": 22.95, "learning_rate": 3.852855947804743e-05, "loss": 2.1965, "step": 7929000 }, { "epoch": 22.95, "learning_rate": 3.852783583040015e-05, "loss": 2.1691, "step": 7929500 }, { "epoch": 22.95, "learning_rate": 3.852711218275287e-05, "loss": 2.1685, "step": 7930000 }, { "epoch": 22.96, "learning_rate": 3.8526388535105595e-05, "loss": 2.1688, "step": 7930500 }, { "epoch": 22.96, "learning_rate": 3.852566488745832e-05, "loss": 2.176, "step": 7931000 }, { "epoch": 22.96, "learning_rate": 3.852494123981104e-05, "loss": 2.1398, "step": 7931500 }, { "epoch": 22.96, "learning_rate": 3.852421759216376e-05, "loss": 2.1552, "step": 7932000 }, { "epoch": 22.96, "learning_rate": 3.8523495391811784e-05, "loss": 2.1962, "step": 7932500 }, { "epoch": 22.96, "learning_rate": 3.8522771744164506e-05, "loss": 2.1589, "step": 7933000 }, { "epoch": 22.96, "learning_rate": 3.8522048096517235e-05, "loss": 2.128, "step": 7933500 }, { "epoch": 22.97, "learning_rate": 3.852132444886996e-05, "loss": 2.1852, "step": 7934000 }, { "epoch": 22.97, "learning_rate": 3.852060224851797e-05, "loss": 2.178, "step": 7934500 }, { "epoch": 22.97, "learning_rate": 3.8519878600870695e-05, "loss": 2.178, "step": 7935000 }, { "epoch": 22.97, "learning_rate": 3.851915495322342e-05, "loss": 2.1823, "step": 7935500 }, { "epoch": 22.97, "learning_rate": 3.851843275287143e-05, "loss": 2.1736, "step": 7936000 }, { "epoch": 22.97, "learning_rate": 3.851770910522416e-05, "loss": 2.163, "step": 7936500 }, { "epoch": 22.97, "learning_rate": 3.8516985457576884e-05, "loss": 2.1661, "step": 7937000 }, { "epoch": 22.98, "learning_rate": 3.85162632572249e-05, "loss": 2.1854, "step": 7937500 }, { "epoch": 22.98, "learning_rate": 3.851553960957762e-05, "loss": 2.167, "step": 7938000 }, { "epoch": 22.98, "learning_rate": 3.8514815961930344e-05, "loss": 2.156, "step": 7938500 }, { "epoch": 22.98, "learning_rate": 3.851409231428307e-05, "loss": 2.203, "step": 7939000 }, { "epoch": 22.98, "learning_rate": 3.851336866663579e-05, "loss": 2.1684, "step": 7939500 }, { "epoch": 22.98, "learning_rate": 3.851264501898851e-05, "loss": 2.1749, "step": 7940000 }, { "epoch": 22.98, "learning_rate": 3.851192137134124e-05, "loss": 2.1663, "step": 7940500 }, { "epoch": 22.99, "learning_rate": 3.851119772369396e-05, "loss": 2.1829, "step": 7941000 }, { "epoch": 22.99, "learning_rate": 3.8510474076046685e-05, "loss": 2.1789, "step": 7941500 }, { "epoch": 22.99, "learning_rate": 3.8509750428399414e-05, "loss": 2.1861, "step": 7942000 }, { "epoch": 22.99, "learning_rate": 3.8509026780752136e-05, "loss": 2.1714, "step": 7942500 }, { "epoch": 22.99, "learning_rate": 3.850830313310486e-05, "loss": 2.1863, "step": 7943000 }, { "epoch": 22.99, "learning_rate": 3.850757948545758e-05, "loss": 2.1632, "step": 7943500 }, { "epoch": 22.99, "learning_rate": 3.85068558378103e-05, "loss": 2.1801, "step": 7944000 }, { "epoch": 23.0, "learning_rate": 3.8506132190163025e-05, "loss": 2.2011, "step": 7944500 }, { "epoch": 23.0, "learning_rate": 3.850540854251575e-05, "loss": 2.1393, "step": 7945000 }, { "epoch": 23.0, "learning_rate": 3.850468634216376e-05, "loss": 2.1625, "step": 7945500 }, { "epoch": 23.0, "eval_accuracy": 0.6629433126284148, "eval_accuracy_mlm": 0.6268137506413546, "eval_accuracy_nsp": 0.8565788086693891, "eval_loss": 2.2072818279266357, "eval_runtime": 331.6784, "eval_samples_per_second": 1315.69, "eval_steps_per_second": 54.821, "step": 7945856 }, { "epoch": 23.0, "learning_rate": 3.8503962694516485e-05, "loss": 2.1442, "step": 7946000 }, { "epoch": 23.0, "learning_rate": 3.850324049416451e-05, "loss": 2.1303, "step": 7946500 }, { "epoch": 23.0, "learning_rate": 3.850251684651723e-05, "loss": 2.1414, "step": 7947000 }, { "epoch": 23.0, "learning_rate": 3.8501794646165245e-05, "loss": 2.1315, "step": 7947500 }, { "epoch": 23.01, "learning_rate": 3.8501070998517974e-05, "loss": 2.127, "step": 7948000 }, { "epoch": 23.01, "learning_rate": 3.8500347350870696e-05, "loss": 2.1598, "step": 7948500 }, { "epoch": 23.01, "learning_rate": 3.849962370322342e-05, "loss": 2.1488, "step": 7949000 }, { "epoch": 23.01, "learning_rate": 3.849890005557614e-05, "loss": 2.1516, "step": 7949500 }, { "epoch": 23.01, "learning_rate": 3.849817640792886e-05, "loss": 2.1613, "step": 7950000 }, { "epoch": 23.01, "learning_rate": 3.8497454207576885e-05, "loss": 2.1363, "step": 7950500 }, { "epoch": 23.01, "learning_rate": 3.849673055992961e-05, "loss": 2.1572, "step": 7951000 }, { "epoch": 23.02, "learning_rate": 3.849600691228233e-05, "loss": 2.181, "step": 7951500 }, { "epoch": 23.02, "learning_rate": 3.849528326463505e-05, "loss": 2.1382, "step": 7952000 }, { "epoch": 23.02, "learning_rate": 3.8494559616987774e-05, "loss": 2.1549, "step": 7952500 }, { "epoch": 23.02, "learning_rate": 3.8493835969340496e-05, "loss": 2.1579, "step": 7953000 }, { "epoch": 23.02, "learning_rate": 3.849311232169322e-05, "loss": 2.1404, "step": 7953500 }, { "epoch": 23.02, "learning_rate": 3.849238867404594e-05, "loss": 2.1772, "step": 7954000 }, { "epoch": 23.03, "learning_rate": 3.849166647369396e-05, "loss": 2.1351, "step": 7954500 }, { "epoch": 23.03, "learning_rate": 3.8490942826046685e-05, "loss": 2.1548, "step": 7955000 }, { "epoch": 23.03, "learning_rate": 3.849022062569471e-05, "loss": 2.1422, "step": 7955500 }, { "epoch": 23.03, "learning_rate": 3.848949697804743e-05, "loss": 2.1416, "step": 7956000 }, { "epoch": 23.03, "learning_rate": 3.848877333040015e-05, "loss": 2.1436, "step": 7956500 }, { "epoch": 23.03, "learning_rate": 3.8488049682752874e-05, "loss": 2.1563, "step": 7957000 }, { "epoch": 23.03, "learning_rate": 3.84873260351056e-05, "loss": 2.1466, "step": 7957500 }, { "epoch": 23.04, "learning_rate": 3.848660238745832e-05, "loss": 2.168, "step": 7958000 }, { "epoch": 23.04, "learning_rate": 3.848587873981104e-05, "loss": 2.1611, "step": 7958500 }, { "epoch": 23.04, "learning_rate": 3.848515509216376e-05, "loss": 2.1472, "step": 7959000 }, { "epoch": 23.04, "learning_rate": 3.8484431444516486e-05, "loss": 2.1322, "step": 7959500 }, { "epoch": 23.04, "learning_rate": 3.848370924416451e-05, "loss": 2.154, "step": 7960000 }, { "epoch": 23.04, "learning_rate": 3.848298559651723e-05, "loss": 2.1513, "step": 7960500 }, { "epoch": 23.04, "learning_rate": 3.8482263396165246e-05, "loss": 2.1473, "step": 7961000 }, { "epoch": 23.05, "learning_rate": 3.848153974851797e-05, "loss": 2.1557, "step": 7961500 }, { "epoch": 23.05, "learning_rate": 3.848081610087069e-05, "loss": 2.1321, "step": 7962000 }, { "epoch": 23.05, "learning_rate": 3.848009245322341e-05, "loss": 2.1495, "step": 7962500 }, { "epoch": 23.05, "learning_rate": 3.847936880557614e-05, "loss": 2.1675, "step": 7963000 }, { "epoch": 23.05, "learning_rate": 3.8478645157928864e-05, "loss": 2.1607, "step": 7963500 }, { "epoch": 23.05, "learning_rate": 3.847792151028159e-05, "loss": 2.1607, "step": 7964000 }, { "epoch": 23.05, "learning_rate": 3.8477197862634315e-05, "loss": 2.1329, "step": 7964500 }, { "epoch": 23.06, "learning_rate": 3.847647421498704e-05, "loss": 2.1613, "step": 7965000 }, { "epoch": 23.06, "learning_rate": 3.847575056733976e-05, "loss": 2.1416, "step": 7965500 }, { "epoch": 23.06, "learning_rate": 3.847502691969248e-05, "loss": 2.1727, "step": 7966000 }, { "epoch": 23.06, "learning_rate": 3.8474303272045204e-05, "loss": 2.1365, "step": 7966500 }, { "epoch": 23.06, "learning_rate": 3.847358107169322e-05, "loss": 2.1673, "step": 7967000 }, { "epoch": 23.06, "learning_rate": 3.847285742404594e-05, "loss": 2.1507, "step": 7967500 }, { "epoch": 23.06, "learning_rate": 3.8472135223693964e-05, "loss": 2.1445, "step": 7968000 }, { "epoch": 23.07, "learning_rate": 3.8471411576046686e-05, "loss": 2.1591, "step": 7968500 }, { "epoch": 23.07, "learning_rate": 3.847068792839941e-05, "loss": 2.1565, "step": 7969000 }, { "epoch": 23.07, "learning_rate": 3.846996428075213e-05, "loss": 2.1592, "step": 7969500 }, { "epoch": 23.07, "learning_rate": 3.8469242080400146e-05, "loss": 2.1612, "step": 7970000 }, { "epoch": 23.07, "learning_rate": 3.846851988004817e-05, "loss": 2.1491, "step": 7970500 }, { "epoch": 23.07, "learning_rate": 3.846779623240089e-05, "loss": 2.1527, "step": 7971000 }, { "epoch": 23.07, "learning_rate": 3.846707258475361e-05, "loss": 2.1514, "step": 7971500 }, { "epoch": 23.08, "learning_rate": 3.846634893710634e-05, "loss": 2.1395, "step": 7972000 }, { "epoch": 23.08, "learning_rate": 3.8465625289459064e-05, "loss": 2.166, "step": 7972500 }, { "epoch": 23.08, "learning_rate": 3.8464901641811787e-05, "loss": 2.1407, "step": 7973000 }, { "epoch": 23.08, "learning_rate": 3.846417799416451e-05, "loss": 2.153, "step": 7973500 }, { "epoch": 23.08, "learning_rate": 3.8463455793812524e-05, "loss": 2.1546, "step": 7974000 }, { "epoch": 23.08, "learning_rate": 3.8462732146165247e-05, "loss": 2.1605, "step": 7974500 }, { "epoch": 23.08, "learning_rate": 3.846200849851797e-05, "loss": 2.1606, "step": 7975000 }, { "epoch": 23.09, "learning_rate": 3.846128485087069e-05, "loss": 2.1628, "step": 7975500 }, { "epoch": 23.09, "learning_rate": 3.846056120322341e-05, "loss": 2.1589, "step": 7976000 }, { "epoch": 23.09, "learning_rate": 3.845983755557614e-05, "loss": 2.1561, "step": 7976500 }, { "epoch": 23.09, "learning_rate": 3.8459113907928864e-05, "loss": 2.1478, "step": 7977000 }, { "epoch": 23.09, "learning_rate": 3.845839026028159e-05, "loss": 2.1129, "step": 7977500 }, { "epoch": 23.09, "learning_rate": 3.8457666612634316e-05, "loss": 2.1418, "step": 7978000 }, { "epoch": 23.09, "learning_rate": 3.845694296498704e-05, "loss": 2.1404, "step": 7978500 }, { "epoch": 23.1, "learning_rate": 3.845621931733976e-05, "loss": 2.1566, "step": 7979000 }, { "epoch": 23.1, "learning_rate": 3.845549566969248e-05, "loss": 2.164, "step": 7979500 }, { "epoch": 23.1, "learning_rate": 3.8454772022045205e-05, "loss": 2.1501, "step": 7980000 }, { "epoch": 23.1, "learning_rate": 3.845404982169322e-05, "loss": 2.1566, "step": 7980500 }, { "epoch": 23.1, "learning_rate": 3.845332617404594e-05, "loss": 2.1451, "step": 7981000 }, { "epoch": 23.1, "learning_rate": 3.8452602526398665e-05, "loss": 2.1506, "step": 7981500 }, { "epoch": 23.1, "learning_rate": 3.8451878878751394e-05, "loss": 2.1511, "step": 7982000 }, { "epoch": 23.11, "learning_rate": 3.8451155231104116e-05, "loss": 2.1718, "step": 7982500 }, { "epoch": 23.11, "learning_rate": 3.845043158345684e-05, "loss": 2.1625, "step": 7983000 }, { "epoch": 23.11, "learning_rate": 3.844970793580956e-05, "loss": 2.1542, "step": 7983500 }, { "epoch": 23.11, "learning_rate": 3.8448985735457576e-05, "loss": 2.1534, "step": 7984000 }, { "epoch": 23.11, "learning_rate": 3.84482620878103e-05, "loss": 2.1445, "step": 7984500 }, { "epoch": 23.11, "learning_rate": 3.844753844016303e-05, "loss": 2.1608, "step": 7985000 }, { "epoch": 23.11, "learning_rate": 3.844681623981104e-05, "loss": 2.1556, "step": 7985500 }, { "epoch": 23.12, "learning_rate": 3.8446094039459065e-05, "loss": 2.1433, "step": 7986000 }, { "epoch": 23.12, "learning_rate": 3.844537039181179e-05, "loss": 2.1676, "step": 7986500 }, { "epoch": 23.12, "learning_rate": 3.844464674416451e-05, "loss": 2.1478, "step": 7987000 }, { "epoch": 23.12, "learning_rate": 3.844392309651723e-05, "loss": 2.1672, "step": 7987500 }, { "epoch": 23.12, "learning_rate": 3.8443199448869954e-05, "loss": 2.1471, "step": 7988000 }, { "epoch": 23.12, "learning_rate": 3.8442475801222676e-05, "loss": 2.1514, "step": 7988500 }, { "epoch": 23.12, "learning_rate": 3.84417521535754e-05, "loss": 2.1411, "step": 7989000 }, { "epoch": 23.13, "learning_rate": 3.844102995322342e-05, "loss": 2.1434, "step": 7989500 }, { "epoch": 23.13, "learning_rate": 3.844030630557614e-05, "loss": 2.1495, "step": 7990000 }, { "epoch": 23.13, "learning_rate": 3.8439582657928865e-05, "loss": 2.1476, "step": 7990500 }, { "epoch": 23.13, "learning_rate": 3.843885901028159e-05, "loss": 2.1709, "step": 7991000 }, { "epoch": 23.13, "learning_rate": 3.84381368099296e-05, "loss": 2.1532, "step": 7991500 }, { "epoch": 23.13, "learning_rate": 3.8437413162282325e-05, "loss": 2.1552, "step": 7992000 }, { "epoch": 23.14, "learning_rate": 3.843668951463505e-05, "loss": 2.1778, "step": 7992500 }, { "epoch": 23.14, "learning_rate": 3.8435965866987777e-05, "loss": 2.1377, "step": 7993000 }, { "epoch": 23.14, "learning_rate": 3.84352422193405e-05, "loss": 2.1502, "step": 7993500 }, { "epoch": 23.14, "learning_rate": 3.843451857169322e-05, "loss": 2.1729, "step": 7994000 }, { "epoch": 23.14, "learning_rate": 3.843379637134124e-05, "loss": 2.1515, "step": 7994500 }, { "epoch": 23.14, "learning_rate": 3.843307417098926e-05, "loss": 2.1511, "step": 7995000 }, { "epoch": 23.14, "learning_rate": 3.843235052334198e-05, "loss": 2.1653, "step": 7995500 }, { "epoch": 23.15, "learning_rate": 3.84316268756947e-05, "loss": 2.1508, "step": 7996000 }, { "epoch": 23.15, "learning_rate": 3.8430903228047426e-05, "loss": 2.1203, "step": 7996500 }, { "epoch": 23.15, "learning_rate": 3.843017958040015e-05, "loss": 2.155, "step": 7997000 }, { "epoch": 23.15, "learning_rate": 3.842945593275287e-05, "loss": 2.1523, "step": 7997500 }, { "epoch": 23.15, "learning_rate": 3.842873228510559e-05, "loss": 2.1477, "step": 7998000 }, { "epoch": 23.15, "learning_rate": 3.842800863745832e-05, "loss": 2.1658, "step": 7998500 }, { "epoch": 23.15, "learning_rate": 3.8427284989811044e-05, "loss": 2.1433, "step": 7999000 }, { "epoch": 23.16, "learning_rate": 3.8426561342163766e-05, "loss": 2.1297, "step": 7999500 }, { "epoch": 23.16, "learning_rate": 3.8425837694516495e-05, "loss": 2.1559, "step": 8000000 }, { "epoch": 23.16, "learning_rate": 3.842511404686922e-05, "loss": 2.1479, "step": 8000500 }, { "epoch": 23.16, "learning_rate": 3.842439039922194e-05, "loss": 2.1705, "step": 8001000 }, { "epoch": 23.16, "learning_rate": 3.842366675157466e-05, "loss": 2.163, "step": 8001500 }, { "epoch": 23.16, "learning_rate": 3.8422943103927384e-05, "loss": 2.1501, "step": 8002000 }, { "epoch": 23.16, "learning_rate": 3.8422219456280106e-05, "loss": 2.1433, "step": 8002500 }, { "epoch": 23.17, "learning_rate": 3.842149580863283e-05, "loss": 2.1643, "step": 8003000 }, { "epoch": 23.17, "learning_rate": 3.842077216098555e-05, "loss": 2.1548, "step": 8003500 }, { "epoch": 23.17, "learning_rate": 3.842004851333827e-05, "loss": 2.1574, "step": 8004000 }, { "epoch": 23.17, "learning_rate": 3.8419324865690995e-05, "loss": 2.1732, "step": 8004500 }, { "epoch": 23.17, "learning_rate": 3.841860121804372e-05, "loss": 2.159, "step": 8005000 }, { "epoch": 23.17, "learning_rate": 3.841787901769174e-05, "loss": 2.1464, "step": 8005500 }, { "epoch": 23.17, "learning_rate": 3.841715537004446e-05, "loss": 2.1482, "step": 8006000 }, { "epoch": 23.18, "learning_rate": 3.8416431722397184e-05, "loss": 2.1453, "step": 8006500 }, { "epoch": 23.18, "learning_rate": 3.8415708074749906e-05, "loss": 2.1489, "step": 8007000 }, { "epoch": 23.18, "learning_rate": 3.8414984427102635e-05, "loss": 2.1436, "step": 8007500 }, { "epoch": 23.18, "learning_rate": 3.841426222675065e-05, "loss": 2.1502, "step": 8008000 }, { "epoch": 23.18, "learning_rate": 3.841353857910337e-05, "loss": 2.1435, "step": 8008500 }, { "epoch": 23.18, "learning_rate": 3.8412814931456095e-05, "loss": 2.1633, "step": 8009000 }, { "epoch": 23.18, "learning_rate": 3.8412091283808824e-05, "loss": 2.1467, "step": 8009500 }, { "epoch": 23.19, "learning_rate": 3.8411367636161546e-05, "loss": 2.1665, "step": 8010000 }, { "epoch": 23.19, "learning_rate": 3.841064543580956e-05, "loss": 2.1544, "step": 8010500 }, { "epoch": 23.19, "learning_rate": 3.840992323545758e-05, "loss": 2.1397, "step": 8011000 }, { "epoch": 23.19, "learning_rate": 3.84091995878103e-05, "loss": 2.1601, "step": 8011500 }, { "epoch": 23.19, "learning_rate": 3.840847594016302e-05, "loss": 2.1606, "step": 8012000 }, { "epoch": 23.19, "learning_rate": 3.8407752292515744e-05, "loss": 2.188, "step": 8012500 }, { "epoch": 23.19, "learning_rate": 3.840702864486847e-05, "loss": 2.1765, "step": 8013000 }, { "epoch": 23.2, "learning_rate": 3.8406304997221195e-05, "loss": 2.1394, "step": 8013500 }, { "epoch": 23.2, "learning_rate": 3.8405584244164504e-05, "loss": 2.1532, "step": 8014000 }, { "epoch": 23.2, "learning_rate": 3.8404860596517227e-05, "loss": 2.1509, "step": 8014500 }, { "epoch": 23.2, "learning_rate": 3.840413694886995e-05, "loss": 2.1659, "step": 8015000 }, { "epoch": 23.2, "learning_rate": 3.840341330122268e-05, "loss": 2.1431, "step": 8015500 }, { "epoch": 23.2, "learning_rate": 3.84026896535754e-05, "loss": 2.1528, "step": 8016000 }, { "epoch": 23.2, "learning_rate": 3.840196600592812e-05, "loss": 2.1248, "step": 8016500 }, { "epoch": 23.21, "learning_rate": 3.8401242358280845e-05, "loss": 2.1558, "step": 8017000 }, { "epoch": 23.21, "learning_rate": 3.8400518710633574e-05, "loss": 2.1714, "step": 8017500 }, { "epoch": 23.21, "learning_rate": 3.839979651028159e-05, "loss": 2.1743, "step": 8018000 }, { "epoch": 23.21, "learning_rate": 3.83990757572249e-05, "loss": 2.1425, "step": 8018500 }, { "epoch": 23.21, "learning_rate": 3.839835210957762e-05, "loss": 2.1629, "step": 8019000 }, { "epoch": 23.21, "learning_rate": 3.839762846193035e-05, "loss": 2.1936, "step": 8019500 }, { "epoch": 23.21, "learning_rate": 3.8396906261578365e-05, "loss": 2.1583, "step": 8020000 }, { "epoch": 23.22, "learning_rate": 3.839618261393109e-05, "loss": 2.1329, "step": 8020500 }, { "epoch": 23.22, "learning_rate": 3.839545896628381e-05, "loss": 2.1771, "step": 8021000 }, { "epoch": 23.22, "learning_rate": 3.839473531863653e-05, "loss": 2.133, "step": 8021500 }, { "epoch": 23.22, "learning_rate": 3.8394011670989254e-05, "loss": 2.1578, "step": 8022000 }, { "epoch": 23.22, "learning_rate": 3.8393288023341976e-05, "loss": 2.177, "step": 8022500 }, { "epoch": 23.22, "learning_rate": 3.8392564375694705e-05, "loss": 2.1474, "step": 8023000 }, { "epoch": 23.22, "learning_rate": 3.839184072804743e-05, "loss": 2.1756, "step": 8023500 }, { "epoch": 23.23, "learning_rate": 3.839111708040015e-05, "loss": 2.1576, "step": 8024000 }, { "epoch": 23.23, "learning_rate": 3.839039488004817e-05, "loss": 2.1588, "step": 8024500 }, { "epoch": 23.23, "learning_rate": 3.8389671232400894e-05, "loss": 2.1762, "step": 8025000 }, { "epoch": 23.23, "learning_rate": 3.8388947584753616e-05, "loss": 2.1525, "step": 8025500 }, { "epoch": 23.23, "learning_rate": 3.838822538440163e-05, "loss": 2.1526, "step": 8026000 }, { "epoch": 23.23, "learning_rate": 3.838750318404965e-05, "loss": 2.1667, "step": 8026500 }, { "epoch": 23.23, "learning_rate": 3.8386779536402376e-05, "loss": 2.1483, "step": 8027000 }, { "epoch": 23.24, "learning_rate": 3.83860558887551e-05, "loss": 2.1566, "step": 8027500 }, { "epoch": 23.24, "learning_rate": 3.838533224110782e-05, "loss": 2.1473, "step": 8028000 }, { "epoch": 23.24, "learning_rate": 3.838460859346054e-05, "loss": 2.1534, "step": 8028500 }, { "epoch": 23.24, "learning_rate": 3.8383884945813265e-05, "loss": 2.1436, "step": 8029000 }, { "epoch": 23.24, "learning_rate": 3.838316129816599e-05, "loss": 2.1343, "step": 8029500 }, { "epoch": 23.24, "learning_rate": 3.838243765051871e-05, "loss": 2.1352, "step": 8030000 }, { "epoch": 23.25, "learning_rate": 3.838171400287144e-05, "loss": 2.1471, "step": 8030500 }, { "epoch": 23.25, "learning_rate": 3.838099035522416e-05, "loss": 2.1695, "step": 8031000 }, { "epoch": 23.25, "learning_rate": 3.838026670757688e-05, "loss": 2.1491, "step": 8031500 }, { "epoch": 23.25, "learning_rate": 3.8379543059929605e-05, "loss": 2.1922, "step": 8032000 }, { "epoch": 23.25, "learning_rate": 3.837881941228233e-05, "loss": 2.1257, "step": 8032500 }, { "epoch": 23.25, "learning_rate": 3.837809576463505e-05, "loss": 2.1526, "step": 8033000 }, { "epoch": 23.25, "learning_rate": 3.837737211698777e-05, "loss": 2.1614, "step": 8033500 }, { "epoch": 23.26, "learning_rate": 3.83766484693405e-05, "loss": 2.1471, "step": 8034000 }, { "epoch": 23.26, "learning_rate": 3.8375924821693223e-05, "loss": 2.1583, "step": 8034500 }, { "epoch": 23.26, "learning_rate": 3.8375201174045946e-05, "loss": 2.1541, "step": 8035000 }, { "epoch": 23.26, "learning_rate": 3.837447752639867e-05, "loss": 2.1617, "step": 8035500 }, { "epoch": 23.26, "learning_rate": 3.837375387875139e-05, "loss": 2.1525, "step": 8036000 }, { "epoch": 23.26, "learning_rate": 3.837303023110411e-05, "loss": 2.1786, "step": 8036500 }, { "epoch": 23.26, "learning_rate": 3.8372306583456835e-05, "loss": 2.1409, "step": 8037000 }, { "epoch": 23.27, "learning_rate": 3.837158438310486e-05, "loss": 2.1678, "step": 8037500 }, { "epoch": 23.27, "learning_rate": 3.837086073545758e-05, "loss": 2.175, "step": 8038000 }, { "epoch": 23.27, "learning_rate": 3.83701370878103e-05, "loss": 2.1549, "step": 8038500 }, { "epoch": 23.27, "learning_rate": 3.8369414887458324e-05, "loss": 2.1561, "step": 8039000 }, { "epoch": 23.27, "learning_rate": 3.8368691239811046e-05, "loss": 2.1541, "step": 8039500 }, { "epoch": 23.27, "learning_rate": 3.836796759216377e-05, "loss": 2.1452, "step": 8040000 }, { "epoch": 23.27, "learning_rate": 3.836724394451649e-05, "loss": 2.1547, "step": 8040500 }, { "epoch": 23.28, "learning_rate": 3.836652029686921e-05, "loss": 2.159, "step": 8041000 }, { "epoch": 23.28, "learning_rate": 3.8365796649221935e-05, "loss": 2.162, "step": 8041500 }, { "epoch": 23.28, "learning_rate": 3.836507300157466e-05, "loss": 2.1683, "step": 8042000 }, { "epoch": 23.28, "learning_rate": 3.836434935392738e-05, "loss": 2.1655, "step": 8042500 }, { "epoch": 23.28, "learning_rate": 3.83636257062801e-05, "loss": 2.1738, "step": 8043000 }, { "epoch": 23.28, "learning_rate": 3.8362903505928124e-05, "loss": 2.1574, "step": 8043500 }, { "epoch": 23.28, "learning_rate": 3.836218130557614e-05, "loss": 2.1842, "step": 8044000 }, { "epoch": 23.29, "learning_rate": 3.836145765792886e-05, "loss": 2.156, "step": 8044500 }, { "epoch": 23.29, "learning_rate": 3.8360734010281584e-05, "loss": 2.1711, "step": 8045000 }, { "epoch": 23.29, "learning_rate": 3.836001036263431e-05, "loss": 2.1611, "step": 8045500 }, { "epoch": 23.29, "learning_rate": 3.8359286714987035e-05, "loss": 2.1576, "step": 8046000 }, { "epoch": 23.29, "learning_rate": 3.835856306733976e-05, "loss": 2.1464, "step": 8046500 }, { "epoch": 23.29, "learning_rate": 3.835783941969248e-05, "loss": 2.1397, "step": 8047000 }, { "epoch": 23.29, "learning_rate": 3.83571172193405e-05, "loss": 2.1666, "step": 8047500 }, { "epoch": 23.3, "learning_rate": 3.8356393571693224e-05, "loss": 2.1435, "step": 8048000 }, { "epoch": 23.3, "learning_rate": 3.8355669924045946e-05, "loss": 2.1609, "step": 8048500 }, { "epoch": 23.3, "learning_rate": 3.835494627639867e-05, "loss": 2.1652, "step": 8049000 }, { "epoch": 23.3, "learning_rate": 3.835422262875139e-05, "loss": 2.1409, "step": 8049500 }, { "epoch": 23.3, "learning_rate": 3.835349898110411e-05, "loss": 2.1627, "step": 8050000 }, { "epoch": 23.3, "learning_rate": 3.8352775333456835e-05, "loss": 2.1641, "step": 8050500 }, { "epoch": 23.3, "learning_rate": 3.835205168580956e-05, "loss": 2.1255, "step": 8051000 }, { "epoch": 23.31, "learning_rate": 3.835132948545758e-05, "loss": 2.1319, "step": 8051500 }, { "epoch": 23.31, "learning_rate": 3.83506058378103e-05, "loss": 2.1567, "step": 8052000 }, { "epoch": 23.31, "learning_rate": 3.834988219016303e-05, "loss": 2.1554, "step": 8052500 }, { "epoch": 23.31, "learning_rate": 3.8349158542515753e-05, "loss": 2.16, "step": 8053000 }, { "epoch": 23.31, "learning_rate": 3.834843634216377e-05, "loss": 2.1413, "step": 8053500 }, { "epoch": 23.31, "learning_rate": 3.834771269451649e-05, "loss": 2.1649, "step": 8054000 }, { "epoch": 23.31, "learning_rate": 3.8346989046869213e-05, "loss": 2.1811, "step": 8054500 }, { "epoch": 23.32, "learning_rate": 3.8346265399221936e-05, "loss": 2.1672, "step": 8055000 }, { "epoch": 23.32, "learning_rate": 3.834554175157466e-05, "loss": 2.1631, "step": 8055500 }, { "epoch": 23.32, "learning_rate": 3.834481810392738e-05, "loss": 2.152, "step": 8056000 }, { "epoch": 23.32, "learning_rate": 3.83440959035754e-05, "loss": 2.1567, "step": 8056500 }, { "epoch": 23.32, "learning_rate": 3.8343372255928125e-05, "loss": 2.1679, "step": 8057000 }, { "epoch": 23.32, "learning_rate": 3.834264860828085e-05, "loss": 2.193, "step": 8057500 }, { "epoch": 23.32, "learning_rate": 3.834192496063357e-05, "loss": 2.1605, "step": 8058000 }, { "epoch": 23.33, "learning_rate": 3.834120131298629e-05, "loss": 2.1514, "step": 8058500 }, { "epoch": 23.33, "learning_rate": 3.83404805599296e-05, "loss": 2.1341, "step": 8059000 }, { "epoch": 23.33, "learning_rate": 3.833975691228233e-05, "loss": 2.1524, "step": 8059500 }, { "epoch": 23.33, "learning_rate": 3.833903326463505e-05, "loss": 2.1455, "step": 8060000 }, { "epoch": 23.33, "learning_rate": 3.833830961698778e-05, "loss": 2.1356, "step": 8060500 }, { "epoch": 23.33, "learning_rate": 3.83375859693405e-05, "loss": 2.142, "step": 8061000 }, { "epoch": 23.33, "learning_rate": 3.8336862321693225e-05, "loss": 2.1608, "step": 8061500 }, { "epoch": 23.34, "learning_rate": 3.833613867404595e-05, "loss": 2.1584, "step": 8062000 }, { "epoch": 23.34, "learning_rate": 3.833541502639867e-05, "loss": 2.1618, "step": 8062500 }, { "epoch": 23.34, "learning_rate": 3.833469137875139e-05, "loss": 2.152, "step": 8063000 }, { "epoch": 23.34, "learning_rate": 3.8333967731104114e-05, "loss": 2.1822, "step": 8063500 }, { "epoch": 23.34, "learning_rate": 3.833324553075213e-05, "loss": 2.1364, "step": 8064000 }, { "epoch": 23.34, "learning_rate": 3.833252188310485e-05, "loss": 2.1742, "step": 8064500 }, { "epoch": 23.34, "learning_rate": 3.833179823545758e-05, "loss": 2.1271, "step": 8065000 }, { "epoch": 23.35, "learning_rate": 3.8331076035105596e-05, "loss": 2.1706, "step": 8065500 }, { "epoch": 23.35, "learning_rate": 3.833035238745832e-05, "loss": 2.1534, "step": 8066000 }, { "epoch": 23.35, "learning_rate": 3.832962873981104e-05, "loss": 2.188, "step": 8066500 }, { "epoch": 23.35, "learning_rate": 3.832890509216376e-05, "loss": 2.143, "step": 8067000 }, { "epoch": 23.35, "learning_rate": 3.832818144451649e-05, "loss": 2.1687, "step": 8067500 }, { "epoch": 23.35, "learning_rate": 3.8327457796869214e-05, "loss": 2.1594, "step": 8068000 }, { "epoch": 23.36, "learning_rate": 3.8326734149221936e-05, "loss": 2.1665, "step": 8068500 }, { "epoch": 23.36, "learning_rate": 3.832601050157466e-05, "loss": 2.1698, "step": 8069000 }, { "epoch": 23.36, "learning_rate": 3.832528685392738e-05, "loss": 2.146, "step": 8069500 }, { "epoch": 23.36, "learning_rate": 3.83245646535754e-05, "loss": 2.1528, "step": 8070000 }, { "epoch": 23.36, "learning_rate": 3.8323841005928125e-05, "loss": 2.1544, "step": 8070500 }, { "epoch": 23.36, "learning_rate": 3.832311735828085e-05, "loss": 2.1589, "step": 8071000 }, { "epoch": 23.36, "learning_rate": 3.832239515792886e-05, "loss": 2.1878, "step": 8071500 }, { "epoch": 23.37, "learning_rate": 3.8321671510281586e-05, "loss": 2.1357, "step": 8072000 }, { "epoch": 23.37, "learning_rate": 3.832094786263431e-05, "loss": 2.1545, "step": 8072500 }, { "epoch": 23.37, "learning_rate": 3.832022421498703e-05, "loss": 2.156, "step": 8073000 }, { "epoch": 23.37, "learning_rate": 3.831950056733975e-05, "loss": 2.1636, "step": 8073500 }, { "epoch": 23.37, "learning_rate": 3.831877691969248e-05, "loss": 2.1689, "step": 8074000 }, { "epoch": 23.37, "learning_rate": 3.831805616663579e-05, "loss": 2.1667, "step": 8074500 }, { "epoch": 23.37, "learning_rate": 3.831733251898851e-05, "loss": 2.1807, "step": 8075000 }, { "epoch": 23.38, "learning_rate": 3.831660887134124e-05, "loss": 2.1491, "step": 8075500 }, { "epoch": 23.38, "learning_rate": 3.8315885223693964e-05, "loss": 2.1855, "step": 8076000 }, { "epoch": 23.38, "learning_rate": 3.8315161576046686e-05, "loss": 2.1419, "step": 8076500 }, { "epoch": 23.38, "learning_rate": 3.831443792839941e-05, "loss": 2.1618, "step": 8077000 }, { "epoch": 23.38, "learning_rate": 3.831371428075213e-05, "loss": 2.1637, "step": 8077500 }, { "epoch": 23.38, "learning_rate": 3.831299063310486e-05, "loss": 2.1749, "step": 8078000 }, { "epoch": 23.38, "learning_rate": 3.831226698545758e-05, "loss": 2.1335, "step": 8078500 }, { "epoch": 23.39, "learning_rate": 3.8311543337810304e-05, "loss": 2.1451, "step": 8079000 }, { "epoch": 23.39, "learning_rate": 3.8310819690163026e-05, "loss": 2.1549, "step": 8079500 }, { "epoch": 23.39, "learning_rate": 3.831009604251575e-05, "loss": 2.131, "step": 8080000 }, { "epoch": 23.39, "learning_rate": 3.830937239486847e-05, "loss": 2.1704, "step": 8080500 }, { "epoch": 23.39, "learning_rate": 3.830864874722119e-05, "loss": 2.1779, "step": 8081000 }, { "epoch": 23.39, "learning_rate": 3.8307925099573915e-05, "loss": 2.1493, "step": 8081500 }, { "epoch": 23.39, "learning_rate": 3.830720145192664e-05, "loss": 2.1648, "step": 8082000 }, { "epoch": 23.4, "learning_rate": 3.8306477804279366e-05, "loss": 2.1504, "step": 8082500 }, { "epoch": 23.4, "learning_rate": 3.830575415663209e-05, "loss": 2.1686, "step": 8083000 }, { "epoch": 23.4, "learning_rate": 3.830503050898481e-05, "loss": 2.1385, "step": 8083500 }, { "epoch": 23.4, "learning_rate": 3.830430686133753e-05, "loss": 2.1396, "step": 8084000 }, { "epoch": 23.4, "learning_rate": 3.830358610828085e-05, "loss": 2.1813, "step": 8084500 }, { "epoch": 23.4, "learning_rate": 3.830286246063357e-05, "loss": 2.1622, "step": 8085000 }, { "epoch": 23.4, "learning_rate": 3.830213881298629e-05, "loss": 2.1518, "step": 8085500 }, { "epoch": 23.41, "learning_rate": 3.8301415165339015e-05, "loss": 2.164, "step": 8086000 }, { "epoch": 23.41, "learning_rate": 3.830069151769174e-05, "loss": 2.1583, "step": 8086500 }, { "epoch": 23.41, "learning_rate": 3.829996787004446e-05, "loss": 2.1553, "step": 8087000 }, { "epoch": 23.41, "learning_rate": 3.829924422239718e-05, "loss": 2.1529, "step": 8087500 }, { "epoch": 23.41, "learning_rate": 3.829852057474991e-05, "loss": 2.1819, "step": 8088000 }, { "epoch": 23.41, "learning_rate": 3.829779692710263e-05, "loss": 2.1666, "step": 8088500 }, { "epoch": 23.41, "learning_rate": 3.8297073279455355e-05, "loss": 2.1452, "step": 8089000 }, { "epoch": 23.42, "learning_rate": 3.8296349631808084e-05, "loss": 2.1529, "step": 8089500 }, { "epoch": 23.42, "learning_rate": 3.829562598416081e-05, "loss": 2.178, "step": 8090000 }, { "epoch": 23.42, "learning_rate": 3.829490233651353e-05, "loss": 2.1613, "step": 8090500 }, { "epoch": 23.42, "learning_rate": 3.8294180136161544e-05, "loss": 2.1622, "step": 8091000 }, { "epoch": 23.42, "learning_rate": 3.829345648851427e-05, "loss": 2.1375, "step": 8091500 }, { "epoch": 23.42, "learning_rate": 3.829273284086699e-05, "loss": 2.1878, "step": 8092000 }, { "epoch": 23.42, "learning_rate": 3.829201064051501e-05, "loss": 2.1726, "step": 8092500 }, { "epoch": 23.43, "learning_rate": 3.8291286992867733e-05, "loss": 2.1887, "step": 8093000 }, { "epoch": 23.43, "learning_rate": 3.829056479251575e-05, "loss": 2.1584, "step": 8093500 }, { "epoch": 23.43, "learning_rate": 3.828984114486847e-05, "loss": 2.1612, "step": 8094000 }, { "epoch": 23.43, "learning_rate": 3.8289117497221193e-05, "loss": 2.156, "step": 8094500 }, { "epoch": 23.43, "learning_rate": 3.8288393849573916e-05, "loss": 2.1661, "step": 8095000 }, { "epoch": 23.43, "learning_rate": 3.828767020192664e-05, "loss": 2.1471, "step": 8095500 }, { "epoch": 23.43, "learning_rate": 3.828694655427936e-05, "loss": 2.1523, "step": 8096000 }, { "epoch": 23.44, "learning_rate": 3.828622290663208e-05, "loss": 2.1495, "step": 8096500 }, { "epoch": 23.44, "learning_rate": 3.8285500706280105e-05, "loss": 2.1502, "step": 8097000 }, { "epoch": 23.44, "learning_rate": 3.8284777058632834e-05, "loss": 2.1731, "step": 8097500 }, { "epoch": 23.44, "learning_rate": 3.8284053410985556e-05, "loss": 2.1594, "step": 8098000 }, { "epoch": 23.44, "learning_rate": 3.828332976333828e-05, "loss": 2.1675, "step": 8098500 }, { "epoch": 23.44, "learning_rate": 3.8282606115691e-05, "loss": 2.1627, "step": 8099000 }, { "epoch": 23.44, "learning_rate": 3.828188246804372e-05, "loss": 2.1564, "step": 8099500 }, { "epoch": 23.45, "learning_rate": 3.8281158820396445e-05, "loss": 2.1466, "step": 8100000 }, { "epoch": 23.45, "learning_rate": 3.828043806733976e-05, "loss": 2.1667, "step": 8100500 }, { "epoch": 23.45, "learning_rate": 3.827971441969248e-05, "loss": 2.1631, "step": 8101000 }, { "epoch": 23.45, "learning_rate": 3.8278990772045205e-05, "loss": 2.1652, "step": 8101500 }, { "epoch": 23.45, "learning_rate": 3.827826712439793e-05, "loss": 2.1661, "step": 8102000 }, { "epoch": 23.45, "learning_rate": 3.827754347675065e-05, "loss": 2.1744, "step": 8102500 }, { "epoch": 23.45, "learning_rate": 3.827681982910337e-05, "loss": 2.1489, "step": 8103000 }, { "epoch": 23.46, "learning_rate": 3.8276096181456094e-05, "loss": 2.1685, "step": 8103500 }, { "epoch": 23.46, "learning_rate": 3.8275372533808816e-05, "loss": 2.166, "step": 8104000 }, { "epoch": 23.46, "learning_rate": 3.827464888616154e-05, "loss": 2.1722, "step": 8104500 }, { "epoch": 23.46, "learning_rate": 3.827392523851427e-05, "loss": 2.1875, "step": 8105000 }, { "epoch": 23.46, "learning_rate": 3.827320303816228e-05, "loss": 2.1437, "step": 8105500 }, { "epoch": 23.46, "learning_rate": 3.827247939051501e-05, "loss": 2.1674, "step": 8106000 }, { "epoch": 23.46, "learning_rate": 3.8271755742867734e-05, "loss": 2.1638, "step": 8106500 }, { "epoch": 23.47, "learning_rate": 3.8271032095220456e-05, "loss": 2.1623, "step": 8107000 }, { "epoch": 23.47, "learning_rate": 3.827030844757318e-05, "loss": 2.1803, "step": 8107500 }, { "epoch": 23.47, "learning_rate": 3.82695847999259e-05, "loss": 2.1605, "step": 8108000 }, { "epoch": 23.47, "learning_rate": 3.8268862599573917e-05, "loss": 2.1829, "step": 8108500 }, { "epoch": 23.47, "learning_rate": 3.826813895192664e-05, "loss": 2.1731, "step": 8109000 }, { "epoch": 23.47, "learning_rate": 3.826741530427936e-05, "loss": 2.1725, "step": 8109500 }, { "epoch": 23.48, "learning_rate": 3.826669165663208e-05, "loss": 2.1808, "step": 8110000 }, { "epoch": 23.48, "learning_rate": 3.826596800898481e-05, "loss": 2.1528, "step": 8110500 }, { "epoch": 23.48, "learning_rate": 3.8265244361337534e-05, "loss": 2.1356, "step": 8111000 }, { "epoch": 23.48, "learning_rate": 3.826452071369026e-05, "loss": 2.1422, "step": 8111500 }, { "epoch": 23.48, "learning_rate": 3.8263797066042986e-05, "loss": 2.1799, "step": 8112000 }, { "epoch": 23.48, "learning_rate": 3.8263074865691e-05, "loss": 2.1856, "step": 8112500 }, { "epoch": 23.48, "learning_rate": 3.8262351218043723e-05, "loss": 2.1643, "step": 8113000 }, { "epoch": 23.49, "learning_rate": 3.8261627570396446e-05, "loss": 2.1486, "step": 8113500 }, { "epoch": 23.49, "learning_rate": 3.826090392274917e-05, "loss": 2.1454, "step": 8114000 }, { "epoch": 23.49, "learning_rate": 3.826018027510189e-05, "loss": 2.1712, "step": 8114500 }, { "epoch": 23.49, "learning_rate": 3.825945662745461e-05, "loss": 2.1691, "step": 8115000 }, { "epoch": 23.49, "learning_rate": 3.8258732979807335e-05, "loss": 2.1571, "step": 8115500 }, { "epoch": 23.49, "learning_rate": 3.825801077945536e-05, "loss": 2.1648, "step": 8116000 }, { "epoch": 23.49, "learning_rate": 3.825728713180808e-05, "loss": 2.1826, "step": 8116500 }, { "epoch": 23.5, "learning_rate": 3.82565634841608e-05, "loss": 2.1806, "step": 8117000 }, { "epoch": 23.5, "learning_rate": 3.8255839836513524e-05, "loss": 2.1382, "step": 8117500 }, { "epoch": 23.5, "learning_rate": 3.8255116188866246e-05, "loss": 2.1702, "step": 8118000 }, { "epoch": 23.5, "learning_rate": 3.825439254121897e-05, "loss": 2.1434, "step": 8118500 }, { "epoch": 23.5, "learning_rate": 3.825367034086699e-05, "loss": 2.1758, "step": 8119000 }, { "epoch": 23.5, "learning_rate": 3.825294669321971e-05, "loss": 2.1716, "step": 8119500 }, { "epoch": 23.5, "learning_rate": 3.825222304557244e-05, "loss": 2.1926, "step": 8120000 }, { "epoch": 23.51, "learning_rate": 3.8251499397925164e-05, "loss": 2.1539, "step": 8120500 }, { "epoch": 23.51, "learning_rate": 3.8250775750277886e-05, "loss": 2.1306, "step": 8121000 }, { "epoch": 23.51, "learning_rate": 3.825005210263061e-05, "loss": 2.1608, "step": 8121500 }, { "epoch": 23.51, "learning_rate": 3.8249329902278624e-05, "loss": 2.1581, "step": 8122000 }, { "epoch": 23.51, "learning_rate": 3.824860770192664e-05, "loss": 2.1733, "step": 8122500 }, { "epoch": 23.51, "learning_rate": 3.824788405427936e-05, "loss": 2.1464, "step": 8123000 }, { "epoch": 23.51, "learning_rate": 3.824716040663209e-05, "loss": 2.169, "step": 8123500 }, { "epoch": 23.52, "learning_rate": 3.824643675898481e-05, "loss": 2.1347, "step": 8124000 }, { "epoch": 23.52, "learning_rate": 3.8245713111337535e-05, "loss": 2.1534, "step": 8124500 }, { "epoch": 23.52, "learning_rate": 3.824499091098555e-05, "loss": 2.1684, "step": 8125000 }, { "epoch": 23.52, "learning_rate": 3.824426726333827e-05, "loss": 2.1778, "step": 8125500 }, { "epoch": 23.52, "learning_rate": 3.8243543615690995e-05, "loss": 2.1803, "step": 8126000 }, { "epoch": 23.52, "learning_rate": 3.824281996804372e-05, "loss": 2.1998, "step": 8126500 }, { "epoch": 23.52, "learning_rate": 3.824209632039644e-05, "loss": 2.1521, "step": 8127000 }, { "epoch": 23.53, "learning_rate": 3.824137267274917e-05, "loss": 2.1643, "step": 8127500 }, { "epoch": 23.53, "learning_rate": 3.824064902510189e-05, "loss": 2.1551, "step": 8128000 }, { "epoch": 23.53, "learning_rate": 3.823992537745461e-05, "loss": 2.1777, "step": 8128500 }, { "epoch": 23.53, "learning_rate": 3.8239203177102636e-05, "loss": 2.1739, "step": 8129000 }, { "epoch": 23.53, "learning_rate": 3.823847952945536e-05, "loss": 2.1587, "step": 8129500 }, { "epoch": 23.53, "learning_rate": 3.823775732910337e-05, "loss": 2.1541, "step": 8130000 }, { "epoch": 23.53, "learning_rate": 3.8237033681456096e-05, "loss": 2.1632, "step": 8130500 }, { "epoch": 23.54, "learning_rate": 3.823631003380882e-05, "loss": 2.1414, "step": 8131000 }, { "epoch": 23.54, "learning_rate": 3.823558638616154e-05, "loss": 2.1897, "step": 8131500 }, { "epoch": 23.54, "learning_rate": 3.823486273851426e-05, "loss": 2.15, "step": 8132000 }, { "epoch": 23.54, "learning_rate": 3.823413909086699e-05, "loss": 2.1688, "step": 8132500 }, { "epoch": 23.54, "learning_rate": 3.8233415443219714e-05, "loss": 2.1456, "step": 8133000 }, { "epoch": 23.54, "learning_rate": 3.8232691795572436e-05, "loss": 2.1744, "step": 8133500 }, { "epoch": 23.54, "learning_rate": 3.823196814792516e-05, "loss": 2.1384, "step": 8134000 }, { "epoch": 23.55, "learning_rate": 3.823124450027789e-05, "loss": 2.1718, "step": 8134500 }, { "epoch": 23.55, "learning_rate": 3.823052085263061e-05, "loss": 2.1671, "step": 8135000 }, { "epoch": 23.55, "learning_rate": 3.822979720498333e-05, "loss": 2.1511, "step": 8135500 }, { "epoch": 23.55, "learning_rate": 3.8229073557336054e-05, "loss": 2.1734, "step": 8136000 }, { "epoch": 23.55, "learning_rate": 3.822835135698407e-05, "loss": 2.1548, "step": 8136500 }, { "epoch": 23.55, "learning_rate": 3.822762770933679e-05, "loss": 2.1718, "step": 8137000 }, { "epoch": 23.55, "learning_rate": 3.8226904061689514e-05, "loss": 2.1741, "step": 8137500 }, { "epoch": 23.56, "learning_rate": 3.822618041404224e-05, "loss": 2.1555, "step": 8138000 }, { "epoch": 23.56, "learning_rate": 3.8225456766394965e-05, "loss": 2.1442, "step": 8138500 }, { "epoch": 23.56, "learning_rate": 3.822473311874769e-05, "loss": 2.1682, "step": 8139000 }, { "epoch": 23.56, "learning_rate": 3.822400947110041e-05, "loss": 2.1603, "step": 8139500 }, { "epoch": 23.56, "learning_rate": 3.8223287270748425e-05, "loss": 2.146, "step": 8140000 }, { "epoch": 23.56, "learning_rate": 3.822256362310115e-05, "loss": 2.1843, "step": 8140500 }, { "epoch": 23.56, "learning_rate": 3.822183997545387e-05, "loss": 2.1576, "step": 8141000 }, { "epoch": 23.57, "learning_rate": 3.822111632780659e-05, "loss": 2.1579, "step": 8141500 }, { "epoch": 23.57, "learning_rate": 3.8220394127454614e-05, "loss": 2.1499, "step": 8142000 }, { "epoch": 23.57, "learning_rate": 3.821967047980734e-05, "loss": 2.1718, "step": 8142500 }, { "epoch": 23.57, "learning_rate": 3.8218946832160065e-05, "loss": 2.1596, "step": 8143000 }, { "epoch": 23.57, "learning_rate": 3.821822318451279e-05, "loss": 2.1691, "step": 8143500 }, { "epoch": 23.57, "learning_rate": 3.821749953686551e-05, "loss": 2.1697, "step": 8144000 }, { "epoch": 23.57, "learning_rate": 3.821677588921823e-05, "loss": 2.1672, "step": 8144500 }, { "epoch": 23.58, "learning_rate": 3.821605368886625e-05, "loss": 2.1731, "step": 8145000 }, { "epoch": 23.58, "learning_rate": 3.821533004121897e-05, "loss": 2.167, "step": 8145500 }, { "epoch": 23.58, "learning_rate": 3.821460639357169e-05, "loss": 2.1822, "step": 8146000 }, { "epoch": 23.58, "learning_rate": 3.8213882745924414e-05, "loss": 2.1531, "step": 8146500 }, { "epoch": 23.58, "learning_rate": 3.821315909827714e-05, "loss": 2.1819, "step": 8147000 }, { "epoch": 23.58, "learning_rate": 3.8212435450629865e-05, "loss": 2.1773, "step": 8147500 }, { "epoch": 23.59, "learning_rate": 3.821171325027788e-05, "loss": 2.1347, "step": 8148000 }, { "epoch": 23.59, "learning_rate": 3.82109896026306e-05, "loss": 2.1964, "step": 8148500 }, { "epoch": 23.59, "learning_rate": 3.8210265954983325e-05, "loss": 2.1456, "step": 8149000 }, { "epoch": 23.59, "learning_rate": 3.8209542307336054e-05, "loss": 2.166, "step": 8149500 }, { "epoch": 23.59, "learning_rate": 3.820881865968878e-05, "loss": 2.1605, "step": 8150000 }, { "epoch": 23.59, "learning_rate": 3.820809645933679e-05, "loss": 2.1586, "step": 8150500 }, { "epoch": 23.59, "learning_rate": 3.8207374258984815e-05, "loss": 2.1788, "step": 8151000 }, { "epoch": 23.6, "learning_rate": 3.820665061133754e-05, "loss": 2.1458, "step": 8151500 }, { "epoch": 23.6, "learning_rate": 3.820592696369026e-05, "loss": 2.1648, "step": 8152000 }, { "epoch": 23.6, "learning_rate": 3.820520331604298e-05, "loss": 2.1648, "step": 8152500 }, { "epoch": 23.6, "learning_rate": 3.8204479668395704e-05, "loss": 2.181, "step": 8153000 }, { "epoch": 23.6, "learning_rate": 3.8203756020748426e-05, "loss": 2.1581, "step": 8153500 }, { "epoch": 23.6, "learning_rate": 3.820303237310115e-05, "loss": 2.1623, "step": 8154000 }, { "epoch": 23.6, "learning_rate": 3.820230872545387e-05, "loss": 2.1895, "step": 8154500 }, { "epoch": 23.61, "learning_rate": 3.820158507780659e-05, "loss": 2.1604, "step": 8155000 }, { "epoch": 23.61, "learning_rate": 3.820086143015932e-05, "loss": 2.1569, "step": 8155500 }, { "epoch": 23.61, "learning_rate": 3.8200137782512044e-05, "loss": 2.1513, "step": 8156000 }, { "epoch": 23.61, "learning_rate": 3.819941558216006e-05, "loss": 2.1729, "step": 8156500 }, { "epoch": 23.61, "learning_rate": 3.819869193451279e-05, "loss": 2.1657, "step": 8157000 }, { "epoch": 23.61, "learning_rate": 3.819796828686551e-05, "loss": 2.1643, "step": 8157500 }, { "epoch": 23.61, "learning_rate": 3.819724463921823e-05, "loss": 2.1793, "step": 8158000 }, { "epoch": 23.62, "learning_rate": 3.8196520991570955e-05, "loss": 2.1531, "step": 8158500 }, { "epoch": 23.62, "learning_rate": 3.819579879121897e-05, "loss": 2.1506, "step": 8159000 }, { "epoch": 23.62, "learning_rate": 3.819507514357169e-05, "loss": 2.1548, "step": 8159500 }, { "epoch": 23.62, "learning_rate": 3.819435149592442e-05, "loss": 2.1676, "step": 8160000 }, { "epoch": 23.62, "learning_rate": 3.8193627848277144e-05, "loss": 2.1442, "step": 8160500 }, { "epoch": 23.62, "learning_rate": 3.8192904200629866e-05, "loss": 2.1657, "step": 8161000 }, { "epoch": 23.62, "learning_rate": 3.819218055298259e-05, "loss": 2.1817, "step": 8161500 }, { "epoch": 23.63, "learning_rate": 3.819145690533531e-05, "loss": 2.1683, "step": 8162000 }, { "epoch": 23.63, "learning_rate": 3.819073325768803e-05, "loss": 2.1698, "step": 8162500 }, { "epoch": 23.63, "learning_rate": 3.8190009610040755e-05, "loss": 2.1682, "step": 8163000 }, { "epoch": 23.63, "learning_rate": 3.818928596239348e-05, "loss": 2.1602, "step": 8163500 }, { "epoch": 23.63, "learning_rate": 3.818856376204149e-05, "loss": 2.1651, "step": 8164000 }, { "epoch": 23.63, "learning_rate": 3.818784011439422e-05, "loss": 2.1676, "step": 8164500 }, { "epoch": 23.63, "learning_rate": 3.8187116466746944e-05, "loss": 2.1715, "step": 8165000 }, { "epoch": 23.64, "learning_rate": 3.818639281909967e-05, "loss": 2.1686, "step": 8165500 }, { "epoch": 23.64, "learning_rate": 3.8185669171452395e-05, "loss": 2.1636, "step": 8166000 }, { "epoch": 23.64, "learning_rate": 3.818494552380512e-05, "loss": 2.1742, "step": 8166500 }, { "epoch": 23.64, "learning_rate": 3.818422332345313e-05, "loss": 2.1632, "step": 8167000 }, { "epoch": 23.64, "learning_rate": 3.8183499675805855e-05, "loss": 2.1405, "step": 8167500 }, { "epoch": 23.64, "learning_rate": 3.818277602815858e-05, "loss": 2.1556, "step": 8168000 }, { "epoch": 23.64, "learning_rate": 3.81820523805113e-05, "loss": 2.1711, "step": 8168500 }, { "epoch": 23.65, "learning_rate": 3.818133018015932e-05, "loss": 2.1692, "step": 8169000 }, { "epoch": 23.65, "learning_rate": 3.8180606532512045e-05, "loss": 2.1519, "step": 8169500 }, { "epoch": 23.65, "learning_rate": 3.817988288486477e-05, "loss": 2.1584, "step": 8170000 }, { "epoch": 23.65, "learning_rate": 3.817915923721749e-05, "loss": 2.1754, "step": 8170500 }, { "epoch": 23.65, "learning_rate": 3.817843558957021e-05, "loss": 2.1513, "step": 8171000 }, { "epoch": 23.65, "learning_rate": 3.817771194192294e-05, "loss": 2.1734, "step": 8171500 }, { "epoch": 23.65, "learning_rate": 3.8176989741570956e-05, "loss": 2.1654, "step": 8172000 }, { "epoch": 23.66, "learning_rate": 3.817626609392368e-05, "loss": 2.1565, "step": 8172500 }, { "epoch": 23.66, "learning_rate": 3.81755424462764e-05, "loss": 2.1423, "step": 8173000 }, { "epoch": 23.66, "learning_rate": 3.817481879862912e-05, "loss": 2.1542, "step": 8173500 }, { "epoch": 23.66, "learning_rate": 3.8174096598277145e-05, "loss": 2.1716, "step": 8174000 }, { "epoch": 23.66, "learning_rate": 3.817337295062987e-05, "loss": 2.1389, "step": 8174500 }, { "epoch": 23.66, "learning_rate": 3.817265075027788e-05, "loss": 2.1575, "step": 8175000 }, { "epoch": 23.66, "learning_rate": 3.8171927102630605e-05, "loss": 2.1508, "step": 8175500 }, { "epoch": 23.67, "learning_rate": 3.817120345498333e-05, "loss": 2.176, "step": 8176000 }, { "epoch": 23.67, "learning_rate": 3.817047980733605e-05, "loss": 2.1682, "step": 8176500 }, { "epoch": 23.67, "learning_rate": 3.816975615968877e-05, "loss": 2.1711, "step": 8177000 }, { "epoch": 23.67, "learning_rate": 3.8169033959336794e-05, "loss": 2.1749, "step": 8177500 }, { "epoch": 23.67, "learning_rate": 3.816831175898481e-05, "loss": 2.1356, "step": 8178000 }, { "epoch": 23.67, "learning_rate": 3.816758811133753e-05, "loss": 2.1597, "step": 8178500 }, { "epoch": 23.67, "learning_rate": 3.8166864463690254e-05, "loss": 2.1596, "step": 8179000 }, { "epoch": 23.68, "learning_rate": 3.816614081604298e-05, "loss": 2.1462, "step": 8179500 }, { "epoch": 23.68, "learning_rate": 3.8165417168395705e-05, "loss": 2.183, "step": 8180000 }, { "epoch": 23.68, "learning_rate": 3.816469352074843e-05, "loss": 2.1572, "step": 8180500 }, { "epoch": 23.68, "learning_rate": 3.816396987310115e-05, "loss": 2.1823, "step": 8181000 }, { "epoch": 23.68, "learning_rate": 3.816324622545387e-05, "loss": 2.165, "step": 8181500 }, { "epoch": 23.68, "learning_rate": 3.81625225778066e-05, "loss": 2.1467, "step": 8182000 }, { "epoch": 23.68, "learning_rate": 3.816179893015932e-05, "loss": 2.1667, "step": 8182500 }, { "epoch": 23.69, "learning_rate": 3.8161075282512045e-05, "loss": 2.1691, "step": 8183000 }, { "epoch": 23.69, "learning_rate": 3.816035163486477e-05, "loss": 2.1658, "step": 8183500 }, { "epoch": 23.69, "learning_rate": 3.815962798721749e-05, "loss": 2.1463, "step": 8184000 }, { "epoch": 23.69, "learning_rate": 3.815890433957021e-05, "loss": 2.1441, "step": 8184500 }, { "epoch": 23.69, "learning_rate": 3.8158180691922934e-05, "loss": 2.1675, "step": 8185000 }, { "epoch": 23.69, "learning_rate": 3.8157457044275656e-05, "loss": 2.1554, "step": 8185500 }, { "epoch": 23.7, "learning_rate": 3.815673484392367e-05, "loss": 2.1299, "step": 8186000 }, { "epoch": 23.7, "learning_rate": 3.81560111962764e-05, "loss": 2.1299, "step": 8186500 }, { "epoch": 23.7, "learning_rate": 3.815528754862912e-05, "loss": 2.1451, "step": 8187000 }, { "epoch": 23.7, "learning_rate": 3.815456390098185e-05, "loss": 2.1558, "step": 8187500 }, { "epoch": 23.7, "learning_rate": 3.8153840253334575e-05, "loss": 2.1581, "step": 8188000 }, { "epoch": 23.7, "learning_rate": 3.81531166056873e-05, "loss": 2.1481, "step": 8188500 }, { "epoch": 23.7, "learning_rate": 3.815239440533531e-05, "loss": 2.1685, "step": 8189000 }, { "epoch": 23.71, "learning_rate": 3.8151670757688035e-05, "loss": 2.165, "step": 8189500 }, { "epoch": 23.71, "learning_rate": 3.815094855733605e-05, "loss": 2.1563, "step": 8190000 }, { "epoch": 23.71, "learning_rate": 3.815022490968877e-05, "loss": 2.1851, "step": 8190500 }, { "epoch": 23.71, "learning_rate": 3.81495012620415e-05, "loss": 2.1733, "step": 8191000 }, { "epoch": 23.71, "learning_rate": 3.8148777614394224e-05, "loss": 2.153, "step": 8191500 }, { "epoch": 23.71, "learning_rate": 3.8148053966746946e-05, "loss": 2.1626, "step": 8192000 }, { "epoch": 23.71, "learning_rate": 3.814733176639496e-05, "loss": 2.1456, "step": 8192500 }, { "epoch": 23.72, "learning_rate": 3.814660956604298e-05, "loss": 2.1472, "step": 8193000 }, { "epoch": 23.72, "learning_rate": 3.81458859183957e-05, "loss": 2.1673, "step": 8193500 }, { "epoch": 23.72, "learning_rate": 3.814516371804372e-05, "loss": 2.1704, "step": 8194000 }, { "epoch": 23.72, "learning_rate": 3.8144440070396444e-05, "loss": 2.1626, "step": 8194500 }, { "epoch": 23.72, "learning_rate": 3.8143717870044466e-05, "loss": 2.1555, "step": 8195000 }, { "epoch": 23.72, "learning_rate": 3.814299422239719e-05, "loss": 2.1598, "step": 8195500 }, { "epoch": 23.72, "learning_rate": 3.814227057474991e-05, "loss": 2.1894, "step": 8196000 }, { "epoch": 23.73, "learning_rate": 3.814154692710263e-05, "loss": 2.1539, "step": 8196500 }, { "epoch": 23.73, "learning_rate": 3.8140823279455355e-05, "loss": 2.1522, "step": 8197000 }, { "epoch": 23.73, "learning_rate": 3.814009963180808e-05, "loss": 2.1872, "step": 8197500 }, { "epoch": 23.73, "learning_rate": 3.81393759841608e-05, "loss": 2.1452, "step": 8198000 }, { "epoch": 23.73, "learning_rate": 3.813865233651353e-05, "loss": 2.1418, "step": 8198500 }, { "epoch": 23.73, "learning_rate": 3.813792868886625e-05, "loss": 2.1533, "step": 8199000 }, { "epoch": 23.73, "learning_rate": 3.813720504121897e-05, "loss": 2.1705, "step": 8199500 }, { "epoch": 23.74, "learning_rate": 3.8136481393571695e-05, "loss": 2.1527, "step": 8200000 }, { "epoch": 23.74, "learning_rate": 3.813575774592442e-05, "loss": 2.1689, "step": 8200500 }, { "epoch": 23.74, "learning_rate": 3.813503409827714e-05, "loss": 2.1685, "step": 8201000 }, { "epoch": 23.74, "learning_rate": 3.813431045062986e-05, "loss": 2.1583, "step": 8201500 }, { "epoch": 23.74, "learning_rate": 3.813358680298259e-05, "loss": 2.1638, "step": 8202000 }, { "epoch": 23.74, "learning_rate": 3.813286315533531e-05, "loss": 2.1887, "step": 8202500 }, { "epoch": 23.74, "learning_rate": 3.8132139507688035e-05, "loss": 2.1681, "step": 8203000 }, { "epoch": 23.75, "learning_rate": 3.813141586004076e-05, "loss": 2.1706, "step": 8203500 }, { "epoch": 23.75, "learning_rate": 3.813069221239348e-05, "loss": 2.1541, "step": 8204000 }, { "epoch": 23.75, "learning_rate": 3.81299700120415e-05, "loss": 2.1435, "step": 8204500 }, { "epoch": 23.75, "learning_rate": 3.8129246364394224e-05, "loss": 2.17, "step": 8205000 }, { "epoch": 23.75, "learning_rate": 3.8128522716746947e-05, "loss": 2.1855, "step": 8205500 }, { "epoch": 23.75, "learning_rate": 3.812779906909967e-05, "loss": 2.1642, "step": 8206000 }, { "epoch": 23.75, "learning_rate": 3.812707542145239e-05, "loss": 2.1594, "step": 8206500 }, { "epoch": 23.76, "learning_rate": 3.812635177380511e-05, "loss": 2.1621, "step": 8207000 }, { "epoch": 23.76, "learning_rate": 3.8125628126157836e-05, "loss": 2.1387, "step": 8207500 }, { "epoch": 23.76, "learning_rate": 3.812490447851056e-05, "loss": 2.1518, "step": 8208000 }, { "epoch": 23.76, "learning_rate": 3.812418083086328e-05, "loss": 2.1803, "step": 8208500 }, { "epoch": 23.76, "learning_rate": 3.812345718321601e-05, "loss": 2.1733, "step": 8209000 }, { "epoch": 23.76, "learning_rate": 3.8122734982864025e-05, "loss": 2.1558, "step": 8209500 }, { "epoch": 23.76, "learning_rate": 3.812201278251205e-05, "loss": 2.1262, "step": 8210000 }, { "epoch": 23.77, "learning_rate": 3.812128913486477e-05, "loss": 2.1506, "step": 8210500 }, { "epoch": 23.77, "learning_rate": 3.812056548721749e-05, "loss": 2.1664, "step": 8211000 }, { "epoch": 23.77, "learning_rate": 3.8119841839570214e-05, "loss": 2.1344, "step": 8211500 }, { "epoch": 23.77, "learning_rate": 3.8119118191922936e-05, "loss": 2.1596, "step": 8212000 }, { "epoch": 23.77, "learning_rate": 3.811839454427566e-05, "loss": 2.1568, "step": 8212500 }, { "epoch": 23.77, "learning_rate": 3.811767089662838e-05, "loss": 2.1704, "step": 8213000 }, { "epoch": 23.77, "learning_rate": 3.8116950143571696e-05, "loss": 2.1406, "step": 8213500 }, { "epoch": 23.78, "learning_rate": 3.811622649592442e-05, "loss": 2.1838, "step": 8214000 }, { "epoch": 23.78, "learning_rate": 3.811550284827714e-05, "loss": 2.1785, "step": 8214500 }, { "epoch": 23.78, "learning_rate": 3.811477920062986e-05, "loss": 2.1736, "step": 8215000 }, { "epoch": 23.78, "learning_rate": 3.8114055552982585e-05, "loss": 2.1585, "step": 8215500 }, { "epoch": 23.78, "learning_rate": 3.811333190533531e-05, "loss": 2.1485, "step": 8216000 }, { "epoch": 23.78, "learning_rate": 3.811260825768803e-05, "loss": 2.1771, "step": 8216500 }, { "epoch": 23.78, "learning_rate": 3.811188461004076e-05, "loss": 2.1565, "step": 8217000 }, { "epoch": 23.79, "learning_rate": 3.811116096239348e-05, "loss": 2.1305, "step": 8217500 }, { "epoch": 23.79, "learning_rate": 3.81104373147462e-05, "loss": 2.1484, "step": 8218000 }, { "epoch": 23.79, "learning_rate": 3.810971366709893e-05, "loss": 2.1589, "step": 8218500 }, { "epoch": 23.79, "learning_rate": 3.8108990019451654e-05, "loss": 2.1727, "step": 8219000 }, { "epoch": 23.79, "learning_rate": 3.810826926639496e-05, "loss": 2.1719, "step": 8219500 }, { "epoch": 23.79, "learning_rate": 3.8107545618747685e-05, "loss": 2.1549, "step": 8220000 }, { "epoch": 23.79, "learning_rate": 3.810682197110041e-05, "loss": 2.1434, "step": 8220500 }, { "epoch": 23.8, "learning_rate": 3.810609832345313e-05, "loss": 2.1574, "step": 8221000 }, { "epoch": 23.8, "learning_rate": 3.810537467580585e-05, "loss": 2.1528, "step": 8221500 }, { "epoch": 23.8, "learning_rate": 3.810465102815858e-05, "loss": 2.163, "step": 8222000 }, { "epoch": 23.8, "learning_rate": 3.81039273805113e-05, "loss": 2.1647, "step": 8222500 }, { "epoch": 23.8, "learning_rate": 3.8103203732864025e-05, "loss": 2.1577, "step": 8223000 }, { "epoch": 23.8, "learning_rate": 3.810248008521675e-05, "loss": 2.1901, "step": 8223500 }, { "epoch": 23.81, "learning_rate": 3.8101756437569477e-05, "loss": 2.1336, "step": 8224000 }, { "epoch": 23.81, "learning_rate": 3.81010327899222e-05, "loss": 2.1838, "step": 8224500 }, { "epoch": 23.81, "learning_rate": 3.810030914227492e-05, "loss": 2.1539, "step": 8225000 }, { "epoch": 23.81, "learning_rate": 3.809958694192294e-05, "loss": 2.1486, "step": 8225500 }, { "epoch": 23.81, "learning_rate": 3.809886329427566e-05, "loss": 2.1652, "step": 8226000 }, { "epoch": 23.81, "learning_rate": 3.809813964662838e-05, "loss": 2.1783, "step": 8226500 }, { "epoch": 23.81, "learning_rate": 3.80974159989811e-05, "loss": 2.1726, "step": 8227000 }, { "epoch": 23.82, "learning_rate": 3.809669235133383e-05, "loss": 2.1881, "step": 8227500 }, { "epoch": 23.82, "learning_rate": 3.8095968703686555e-05, "loss": 2.1718, "step": 8228000 }, { "epoch": 23.82, "learning_rate": 3.809524505603928e-05, "loss": 2.1634, "step": 8228500 }, { "epoch": 23.82, "learning_rate": 3.8094521408392e-05, "loss": 2.1803, "step": 8229000 }, { "epoch": 23.82, "learning_rate": 3.809379776074472e-05, "loss": 2.1676, "step": 8229500 }, { "epoch": 23.82, "learning_rate": 3.8093074113097444e-05, "loss": 2.1476, "step": 8230000 }, { "epoch": 23.82, "learning_rate": 3.8092350465450166e-05, "loss": 2.1511, "step": 8230500 }, { "epoch": 23.83, "learning_rate": 3.809162826509818e-05, "loss": 2.152, "step": 8231000 }, { "epoch": 23.83, "learning_rate": 3.809090461745091e-05, "loss": 2.151, "step": 8231500 }, { "epoch": 23.83, "learning_rate": 3.809018096980363e-05, "loss": 2.1798, "step": 8232000 }, { "epoch": 23.83, "learning_rate": 3.8089457322156355e-05, "loss": 2.154, "step": 8232500 }, { "epoch": 23.83, "learning_rate": 3.8088733674509084e-05, "loss": 2.176, "step": 8233000 }, { "epoch": 23.83, "learning_rate": 3.80880114741571e-05, "loss": 2.1372, "step": 8233500 }, { "epoch": 23.83, "learning_rate": 3.808728782650982e-05, "loss": 2.1523, "step": 8234000 }, { "epoch": 23.84, "learning_rate": 3.8086564178862544e-05, "loss": 2.1458, "step": 8234500 }, { "epoch": 23.84, "learning_rate": 3.8085840531215266e-05, "loss": 2.1608, "step": 8235000 }, { "epoch": 23.84, "learning_rate": 3.808511688356799e-05, "loss": 2.1817, "step": 8235500 }, { "epoch": 23.84, "learning_rate": 3.808439323592071e-05, "loss": 2.1701, "step": 8236000 }, { "epoch": 23.84, "learning_rate": 3.808367103556873e-05, "loss": 2.1682, "step": 8236500 }, { "epoch": 23.84, "learning_rate": 3.808294883521675e-05, "loss": 2.1494, "step": 8237000 }, { "epoch": 23.84, "learning_rate": 3.8082226634864764e-05, "loss": 2.1437, "step": 8237500 }, { "epoch": 23.85, "learning_rate": 3.8081502987217486e-05, "loss": 2.1672, "step": 8238000 }, { "epoch": 23.85, "learning_rate": 3.808077933957021e-05, "loss": 2.1494, "step": 8238500 }, { "epoch": 23.85, "learning_rate": 3.808005569192294e-05, "loss": 2.1635, "step": 8239000 }, { "epoch": 23.85, "learning_rate": 3.807933349157096e-05, "loss": 2.1551, "step": 8239500 }, { "epoch": 23.85, "learning_rate": 3.807860984392368e-05, "loss": 2.1856, "step": 8240000 }, { "epoch": 23.85, "learning_rate": 3.8077886196276404e-05, "loss": 2.1547, "step": 8240500 }, { "epoch": 23.85, "learning_rate": 3.8077162548629126e-05, "loss": 2.1465, "step": 8241000 }, { "epoch": 23.86, "learning_rate": 3.807643890098185e-05, "loss": 2.1627, "step": 8241500 }, { "epoch": 23.86, "learning_rate": 3.807571525333457e-05, "loss": 2.1772, "step": 8242000 }, { "epoch": 23.86, "learning_rate": 3.807499160568729e-05, "loss": 2.1372, "step": 8242500 }, { "epoch": 23.86, "learning_rate": 3.8074267958040015e-05, "loss": 2.1626, "step": 8243000 }, { "epoch": 23.86, "learning_rate": 3.807354431039274e-05, "loss": 2.1571, "step": 8243500 }, { "epoch": 23.86, "learning_rate": 3.807282066274546e-05, "loss": 2.1608, "step": 8244000 }, { "epoch": 23.86, "learning_rate": 3.807209701509818e-05, "loss": 2.1878, "step": 8244500 }, { "epoch": 23.87, "learning_rate": 3.8071373367450904e-05, "loss": 2.1578, "step": 8245000 }, { "epoch": 23.87, "learning_rate": 3.807065116709893e-05, "loss": 2.1823, "step": 8245500 }, { "epoch": 23.87, "learning_rate": 3.806992896674694e-05, "loss": 2.1539, "step": 8246000 }, { "epoch": 23.87, "learning_rate": 3.806920531909967e-05, "loss": 2.181, "step": 8246500 }, { "epoch": 23.87, "learning_rate": 3.8068481671452393e-05, "loss": 2.1546, "step": 8247000 }, { "epoch": 23.87, "learning_rate": 3.806775947110041e-05, "loss": 2.1418, "step": 8247500 }, { "epoch": 23.87, "learning_rate": 3.806703582345313e-05, "loss": 2.1687, "step": 8248000 }, { "epoch": 23.88, "learning_rate": 3.806631217580586e-05, "loss": 2.1731, "step": 8248500 }, { "epoch": 23.88, "learning_rate": 3.806558852815858e-05, "loss": 2.18, "step": 8249000 }, { "epoch": 23.88, "learning_rate": 3.8064864880511305e-05, "loss": 2.1846, "step": 8249500 }, { "epoch": 23.88, "learning_rate": 3.806414123286403e-05, "loss": 2.1556, "step": 8250000 }, { "epoch": 23.88, "learning_rate": 3.806341758521675e-05, "loss": 2.1636, "step": 8250500 }, { "epoch": 23.88, "learning_rate": 3.806269393756947e-05, "loss": 2.1484, "step": 8251000 }, { "epoch": 23.88, "learning_rate": 3.8061970289922194e-05, "loss": 2.141, "step": 8251500 }, { "epoch": 23.89, "learning_rate": 3.8061246642274916e-05, "loss": 2.1538, "step": 8252000 }, { "epoch": 23.89, "learning_rate": 3.806052299462764e-05, "loss": 2.1404, "step": 8252500 }, { "epoch": 23.89, "learning_rate": 3.805980079427566e-05, "loss": 2.1597, "step": 8253000 }, { "epoch": 23.89, "learning_rate": 3.805907714662838e-05, "loss": 2.1495, "step": 8253500 }, { "epoch": 23.89, "learning_rate": 3.805835349898111e-05, "loss": 2.1631, "step": 8254000 }, { "epoch": 23.89, "learning_rate": 3.8057629851333834e-05, "loss": 2.161, "step": 8254500 }, { "epoch": 23.89, "learning_rate": 3.8056906203686556e-05, "loss": 2.1557, "step": 8255000 }, { "epoch": 23.9, "learning_rate": 3.805618255603928e-05, "loss": 2.1608, "step": 8255500 }, { "epoch": 23.9, "learning_rate": 3.8055458908392e-05, "loss": 2.1414, "step": 8256000 }, { "epoch": 23.9, "learning_rate": 3.805473526074472e-05, "loss": 2.156, "step": 8256500 }, { "epoch": 23.9, "learning_rate": 3.8054011613097445e-05, "loss": 2.1595, "step": 8257000 }, { "epoch": 23.9, "learning_rate": 3.805328796545017e-05, "loss": 2.1569, "step": 8257500 }, { "epoch": 23.9, "learning_rate": 3.805256431780289e-05, "loss": 2.1546, "step": 8258000 }, { "epoch": 23.9, "learning_rate": 3.805184067015561e-05, "loss": 2.1463, "step": 8258500 }, { "epoch": 23.91, "learning_rate": 3.8051118469803634e-05, "loss": 2.1783, "step": 8259000 }, { "epoch": 23.91, "learning_rate": 3.8050394822156356e-05, "loss": 2.1647, "step": 8259500 }, { "epoch": 23.91, "learning_rate": 3.804967262180437e-05, "loss": 2.1527, "step": 8260000 }, { "epoch": 23.91, "learning_rate": 3.804895042145239e-05, "loss": 2.1926, "step": 8260500 }, { "epoch": 23.91, "learning_rate": 3.804822677380511e-05, "loss": 2.1554, "step": 8261000 }, { "epoch": 23.91, "learning_rate": 3.804750312615784e-05, "loss": 2.1503, "step": 8261500 }, { "epoch": 23.92, "learning_rate": 3.804677947851056e-05, "loss": 2.1311, "step": 8262000 }, { "epoch": 23.92, "learning_rate": 3.804605583086328e-05, "loss": 2.1653, "step": 8262500 }, { "epoch": 23.92, "learning_rate": 3.804533218321601e-05, "loss": 2.1615, "step": 8263000 }, { "epoch": 23.92, "learning_rate": 3.8044608535568734e-05, "loss": 2.1605, "step": 8263500 }, { "epoch": 23.92, "learning_rate": 3.804388488792146e-05, "loss": 2.1875, "step": 8264000 }, { "epoch": 23.92, "learning_rate": 3.804316124027418e-05, "loss": 2.1689, "step": 8264500 }, { "epoch": 23.92, "learning_rate": 3.80424375926269e-05, "loss": 2.1572, "step": 8265000 }, { "epoch": 23.93, "learning_rate": 3.804171539227492e-05, "loss": 2.1518, "step": 8265500 }, { "epoch": 23.93, "learning_rate": 3.804099174462764e-05, "loss": 2.1638, "step": 8266000 }, { "epoch": 23.93, "learning_rate": 3.804026809698036e-05, "loss": 2.1563, "step": 8266500 }, { "epoch": 23.93, "learning_rate": 3.8039545896628383e-05, "loss": 2.191, "step": 8267000 }, { "epoch": 23.93, "learning_rate": 3.8038822248981106e-05, "loss": 2.15, "step": 8267500 }, { "epoch": 23.93, "learning_rate": 3.803809860133383e-05, "loss": 2.1866, "step": 8268000 }, { "epoch": 23.93, "learning_rate": 3.803737495368655e-05, "loss": 2.164, "step": 8268500 }, { "epoch": 23.94, "learning_rate": 3.803665130603928e-05, "loss": 2.1533, "step": 8269000 }, { "epoch": 23.94, "learning_rate": 3.8035927658392e-05, "loss": 2.1681, "step": 8269500 }, { "epoch": 23.94, "learning_rate": 3.8035204010744724e-05, "loss": 2.1631, "step": 8270000 }, { "epoch": 23.94, "learning_rate": 3.803448181039274e-05, "loss": 2.1604, "step": 8270500 }, { "epoch": 23.94, "learning_rate": 3.803375816274546e-05, "loss": 2.172, "step": 8271000 }, { "epoch": 23.94, "learning_rate": 3.8033034515098184e-05, "loss": 2.1431, "step": 8271500 }, { "epoch": 23.94, "learning_rate": 3.803231086745091e-05, "loss": 2.1359, "step": 8272000 }, { "epoch": 23.95, "learning_rate": 3.803158866709893e-05, "loss": 2.1798, "step": 8272500 }, { "epoch": 23.95, "learning_rate": 3.803086501945165e-05, "loss": 2.1435, "step": 8273000 }, { "epoch": 23.95, "learning_rate": 3.803014137180437e-05, "loss": 2.1544, "step": 8273500 }, { "epoch": 23.95, "learning_rate": 3.8029417724157095e-05, "loss": 2.1764, "step": 8274000 }, { "epoch": 23.95, "learning_rate": 3.802869407650982e-05, "loss": 2.1387, "step": 8274500 }, { "epoch": 23.95, "learning_rate": 3.802797042886254e-05, "loss": 2.1784, "step": 8275000 }, { "epoch": 23.95, "learning_rate": 3.802724678121526e-05, "loss": 2.1562, "step": 8275500 }, { "epoch": 23.96, "learning_rate": 3.8026523133567984e-05, "loss": 2.1731, "step": 8276000 }, { "epoch": 23.96, "learning_rate": 3.802579948592071e-05, "loss": 2.1758, "step": 8276500 }, { "epoch": 23.96, "learning_rate": 3.8025077285568735e-05, "loss": 2.1705, "step": 8277000 }, { "epoch": 23.96, "learning_rate": 3.802435363792146e-05, "loss": 2.1576, "step": 8277500 }, { "epoch": 23.96, "learning_rate": 3.802362999027418e-05, "loss": 2.1841, "step": 8278000 }, { "epoch": 23.96, "learning_rate": 3.80229063426269e-05, "loss": 2.1704, "step": 8278500 }, { "epoch": 23.96, "learning_rate": 3.802218414227492e-05, "loss": 2.1693, "step": 8279000 }, { "epoch": 23.97, "learning_rate": 3.802146049462764e-05, "loss": 2.1698, "step": 8279500 }, { "epoch": 23.97, "learning_rate": 3.802073684698036e-05, "loss": 2.1376, "step": 8280000 }, { "epoch": 23.97, "learning_rate": 3.802001319933309e-05, "loss": 2.1496, "step": 8280500 }, { "epoch": 23.97, "learning_rate": 3.801928955168581e-05, "loss": 2.1751, "step": 8281000 }, { "epoch": 23.97, "learning_rate": 3.801856735133383e-05, "loss": 2.1772, "step": 8281500 }, { "epoch": 23.97, "learning_rate": 3.801784370368655e-05, "loss": 2.1202, "step": 8282000 }, { "epoch": 23.97, "learning_rate": 3.801712005603927e-05, "loss": 2.1796, "step": 8282500 }, { "epoch": 23.98, "learning_rate": 3.8016396408391995e-05, "loss": 2.1609, "step": 8283000 }, { "epoch": 23.98, "learning_rate": 3.801567276074472e-05, "loss": 2.1416, "step": 8283500 }, { "epoch": 23.98, "learning_rate": 3.801495056039274e-05, "loss": 2.1584, "step": 8284000 }, { "epoch": 23.98, "learning_rate": 3.801422836004076e-05, "loss": 2.1732, "step": 8284500 }, { "epoch": 23.98, "learning_rate": 3.8013504712393485e-05, "loss": 2.1624, "step": 8285000 }, { "epoch": 23.98, "learning_rate": 3.801278106474621e-05, "loss": 2.1438, "step": 8285500 }, { "epoch": 23.98, "learning_rate": 3.801205741709893e-05, "loss": 2.1604, "step": 8286000 }, { "epoch": 23.99, "learning_rate": 3.801133376945165e-05, "loss": 2.1592, "step": 8286500 }, { "epoch": 23.99, "learning_rate": 3.8010610121804374e-05, "loss": 2.1515, "step": 8287000 }, { "epoch": 23.99, "learning_rate": 3.8009886474157096e-05, "loss": 2.1628, "step": 8287500 }, { "epoch": 23.99, "learning_rate": 3.800916282650982e-05, "loss": 2.1588, "step": 8288000 }, { "epoch": 23.99, "learning_rate": 3.800843917886254e-05, "loss": 2.1775, "step": 8288500 }, { "epoch": 23.99, "learning_rate": 3.800771553121526e-05, "loss": 2.1663, "step": 8289000 }, { "epoch": 23.99, "learning_rate": 3.800699188356799e-05, "loss": 2.1489, "step": 8289500 }, { "epoch": 24.0, "learning_rate": 3.8006268235920714e-05, "loss": 2.1821, "step": 8290000 }, { "epoch": 24.0, "learning_rate": 3.8005544588273436e-05, "loss": 2.15, "step": 8290500 }, { "epoch": 24.0, "learning_rate": 3.8004820940626165e-05, "loss": 2.1397, "step": 8291000 }, { "epoch": 24.0, "eval_accuracy": 0.6629079772621248, "eval_accuracy_mlm": 0.6270114574392218, "eval_accuracy_nsp": 0.8555338622228944, "eval_loss": 2.2102925777435303, "eval_runtime": 331.9145, "eval_samples_per_second": 1314.754, "eval_steps_per_second": 54.782, "step": 8291328 }, { "epoch": 24.0, "learning_rate": 3.800409874027418e-05, "loss": 2.1363, "step": 8291500 }, { "epoch": 24.0, "learning_rate": 3.80033750926269e-05, "loss": 2.1397, "step": 8292000 }, { "epoch": 24.0, "learning_rate": 3.8002651444979625e-05, "loss": 2.1261, "step": 8292500 }, { "epoch": 24.0, "learning_rate": 3.800192779733235e-05, "loss": 2.1153, "step": 8293000 }, { "epoch": 24.01, "learning_rate": 3.800120559698036e-05, "loss": 2.1465, "step": 8293500 }, { "epoch": 24.01, "learning_rate": 3.800048194933309e-05, "loss": 2.1581, "step": 8294000 }, { "epoch": 24.01, "learning_rate": 3.7999758301685814e-05, "loss": 2.1546, "step": 8294500 }, { "epoch": 24.01, "learning_rate": 3.7999034654038536e-05, "loss": 2.1238, "step": 8295000 }, { "epoch": 24.01, "learning_rate": 3.799831245368655e-05, "loss": 2.1491, "step": 8295500 }, { "epoch": 24.01, "learning_rate": 3.7997588806039274e-05, "loss": 2.1534, "step": 8296000 }, { "epoch": 24.01, "learning_rate": 3.7996865158391996e-05, "loss": 2.1569, "step": 8296500 }, { "epoch": 24.02, "learning_rate": 3.799614151074472e-05, "loss": 2.162, "step": 8297000 }, { "epoch": 24.02, "learning_rate": 3.799541786309744e-05, "loss": 2.1316, "step": 8297500 }, { "epoch": 24.02, "learning_rate": 3.799469566274546e-05, "loss": 2.1299, "step": 8298000 }, { "epoch": 24.02, "learning_rate": 3.7993972015098185e-05, "loss": 2.1527, "step": 8298500 }, { "epoch": 24.02, "learning_rate": 3.799324981474621e-05, "loss": 2.1406, "step": 8299000 }, { "epoch": 24.02, "learning_rate": 3.799252616709893e-05, "loss": 2.1478, "step": 8299500 }, { "epoch": 24.03, "learning_rate": 3.799180251945165e-05, "loss": 2.1649, "step": 8300000 }, { "epoch": 24.03, "learning_rate": 3.7991078871804374e-05, "loss": 2.127, "step": 8300500 }, { "epoch": 24.03, "learning_rate": 3.7990355224157097e-05, "loss": 2.1331, "step": 8301000 }, { "epoch": 24.03, "learning_rate": 3.798963157650982e-05, "loss": 2.1607, "step": 8301500 }, { "epoch": 24.03, "learning_rate": 3.798890792886254e-05, "loss": 2.1315, "step": 8302000 }, { "epoch": 24.03, "learning_rate": 3.798818428121526e-05, "loss": 2.1581, "step": 8302500 }, { "epoch": 24.03, "learning_rate": 3.798746063356799e-05, "loss": 2.1367, "step": 8303000 }, { "epoch": 24.04, "learning_rate": 3.798673843321601e-05, "loss": 2.1365, "step": 8303500 }, { "epoch": 24.04, "learning_rate": 3.798601478556873e-05, "loss": 2.1485, "step": 8304000 }, { "epoch": 24.04, "learning_rate": 3.798529113792145e-05, "loss": 2.1408, "step": 8304500 }, { "epoch": 24.04, "learning_rate": 3.7984567490274175e-05, "loss": 2.1714, "step": 8305000 }, { "epoch": 24.04, "learning_rate": 3.79838438426269e-05, "loss": 2.1472, "step": 8305500 }, { "epoch": 24.04, "learning_rate": 3.798312019497962e-05, "loss": 2.1618, "step": 8306000 }, { "epoch": 24.04, "learning_rate": 3.798239654733235e-05, "loss": 2.1577, "step": 8306500 }, { "epoch": 24.05, "learning_rate": 3.798167289968507e-05, "loss": 2.1283, "step": 8307000 }, { "epoch": 24.05, "learning_rate": 3.798094925203779e-05, "loss": 2.1504, "step": 8307500 }, { "epoch": 24.05, "learning_rate": 3.798022849898111e-05, "loss": 2.1256, "step": 8308000 }, { "epoch": 24.05, "learning_rate": 3.7979506298629124e-05, "loss": 2.1087, "step": 8308500 }, { "epoch": 24.05, "learning_rate": 3.7978782650981846e-05, "loss": 2.1414, "step": 8309000 }, { "epoch": 24.05, "learning_rate": 3.797805900333457e-05, "loss": 2.1258, "step": 8309500 }, { "epoch": 24.05, "learning_rate": 3.797733535568729e-05, "loss": 2.1683, "step": 8310000 }, { "epoch": 24.06, "learning_rate": 3.797661170804002e-05, "loss": 2.1434, "step": 8310500 }, { "epoch": 24.06, "learning_rate": 3.797588806039274e-05, "loss": 2.1418, "step": 8311000 }, { "epoch": 24.06, "learning_rate": 3.7975164412745464e-05, "loss": 2.1152, "step": 8311500 }, { "epoch": 24.06, "learning_rate": 3.7974440765098186e-05, "loss": 2.1548, "step": 8312000 }, { "epoch": 24.06, "learning_rate": 3.79737185647462e-05, "loss": 2.1485, "step": 8312500 }, { "epoch": 24.06, "learning_rate": 3.7972994917098924e-05, "loss": 2.141, "step": 8313000 }, { "epoch": 24.06, "learning_rate": 3.7972271269451646e-05, "loss": 2.1313, "step": 8313500 }, { "epoch": 24.07, "learning_rate": 3.7971547621804375e-05, "loss": 2.1218, "step": 8314000 }, { "epoch": 24.07, "learning_rate": 3.797082542145239e-05, "loss": 2.156, "step": 8314500 }, { "epoch": 24.07, "learning_rate": 3.797010177380512e-05, "loss": 2.1518, "step": 8315000 }, { "epoch": 24.07, "learning_rate": 3.796937812615784e-05, "loss": 2.1754, "step": 8315500 }, { "epoch": 24.07, "learning_rate": 3.7968654478510564e-05, "loss": 2.162, "step": 8316000 }, { "epoch": 24.07, "learning_rate": 3.7967930830863286e-05, "loss": 2.1538, "step": 8316500 }, { "epoch": 24.07, "learning_rate": 3.796720718321601e-05, "loss": 2.133, "step": 8317000 }, { "epoch": 24.08, "learning_rate": 3.7966484982864024e-05, "loss": 2.1546, "step": 8317500 }, { "epoch": 24.08, "learning_rate": 3.7965761335216746e-05, "loss": 2.1505, "step": 8318000 }, { "epoch": 24.08, "learning_rate": 3.796503768756947e-05, "loss": 2.1261, "step": 8318500 }, { "epoch": 24.08, "learning_rate": 3.796431403992219e-05, "loss": 2.1514, "step": 8319000 }, { "epoch": 24.08, "learning_rate": 3.796359039227492e-05, "loss": 2.1407, "step": 8319500 }, { "epoch": 24.08, "learning_rate": 3.7962868191922935e-05, "loss": 2.1468, "step": 8320000 }, { "epoch": 24.08, "learning_rate": 3.796214454427566e-05, "loss": 2.1241, "step": 8320500 }, { "epoch": 24.09, "learning_rate": 3.796142089662838e-05, "loss": 2.1218, "step": 8321000 }, { "epoch": 24.09, "learning_rate": 3.796069724898111e-05, "loss": 2.1301, "step": 8321500 }, { "epoch": 24.09, "learning_rate": 3.795997360133383e-05, "loss": 2.1362, "step": 8322000 }, { "epoch": 24.09, "learning_rate": 3.795925140098185e-05, "loss": 2.161, "step": 8322500 }, { "epoch": 24.09, "learning_rate": 3.795852775333457e-05, "loss": 2.154, "step": 8323000 }, { "epoch": 24.09, "learning_rate": 3.795780555298259e-05, "loss": 2.1502, "step": 8323500 }, { "epoch": 24.09, "learning_rate": 3.7957081905335313e-05, "loss": 2.1301, "step": 8324000 }, { "epoch": 24.1, "learning_rate": 3.7956358257688036e-05, "loss": 2.1366, "step": 8324500 }, { "epoch": 24.1, "learning_rate": 3.795563461004076e-05, "loss": 2.1506, "step": 8325000 }, { "epoch": 24.1, "learning_rate": 3.795491096239348e-05, "loss": 2.1507, "step": 8325500 }, { "epoch": 24.1, "learning_rate": 3.79541873147462e-05, "loss": 2.1491, "step": 8326000 }, { "epoch": 24.1, "learning_rate": 3.7953463667098925e-05, "loss": 2.1503, "step": 8326500 }, { "epoch": 24.1, "learning_rate": 3.795274001945165e-05, "loss": 2.1587, "step": 8327000 }, { "epoch": 24.1, "learning_rate": 3.795201637180437e-05, "loss": 2.1578, "step": 8327500 }, { "epoch": 24.11, "learning_rate": 3.795129417145239e-05, "loss": 2.1488, "step": 8328000 }, { "epoch": 24.11, "learning_rate": 3.7950570523805114e-05, "loss": 2.1581, "step": 8328500 }, { "epoch": 24.11, "learning_rate": 3.794984687615784e-05, "loss": 2.1502, "step": 8329000 }, { "epoch": 24.11, "learning_rate": 3.7949123228510565e-05, "loss": 2.1713, "step": 8329500 }, { "epoch": 24.11, "learning_rate": 3.794840102815858e-05, "loss": 2.1542, "step": 8330000 }, { "epoch": 24.11, "learning_rate": 3.79476773805113e-05, "loss": 2.138, "step": 8330500 }, { "epoch": 24.11, "learning_rate": 3.7946953732864025e-05, "loss": 2.1514, "step": 8331000 }, { "epoch": 24.12, "learning_rate": 3.794623008521675e-05, "loss": 2.1401, "step": 8331500 }, { "epoch": 24.12, "learning_rate": 3.794550643756947e-05, "loss": 2.1389, "step": 8332000 }, { "epoch": 24.12, "learning_rate": 3.79447827899222e-05, "loss": 2.1453, "step": 8332500 }, { "epoch": 24.12, "learning_rate": 3.794405914227492e-05, "loss": 2.1255, "step": 8333000 }, { "epoch": 24.12, "learning_rate": 3.794333549462764e-05, "loss": 2.1395, "step": 8333500 }, { "epoch": 24.12, "learning_rate": 3.7942611846980365e-05, "loss": 2.1536, "step": 8334000 }, { "epoch": 24.12, "learning_rate": 3.794188819933309e-05, "loss": 2.1509, "step": 8334500 }, { "epoch": 24.13, "learning_rate": 3.7941167446276396e-05, "loss": 2.145, "step": 8335000 }, { "epoch": 24.13, "learning_rate": 3.794044379862912e-05, "loss": 2.1287, "step": 8335500 }, { "epoch": 24.13, "learning_rate": 3.793972159827714e-05, "loss": 2.1377, "step": 8336000 }, { "epoch": 24.13, "learning_rate": 3.793899795062986e-05, "loss": 2.1452, "step": 8336500 }, { "epoch": 24.13, "learning_rate": 3.793827430298259e-05, "loss": 2.1314, "step": 8337000 }, { "epoch": 24.13, "learning_rate": 3.7937550655335314e-05, "loss": 2.1252, "step": 8337500 }, { "epoch": 24.14, "learning_rate": 3.7936827007688037e-05, "loss": 2.18, "step": 8338000 }, { "epoch": 24.14, "learning_rate": 3.793610336004076e-05, "loss": 2.145, "step": 8338500 }, { "epoch": 24.14, "learning_rate": 3.793537971239348e-05, "loss": 2.1529, "step": 8339000 }, { "epoch": 24.14, "learning_rate": 3.79346560647462e-05, "loss": 2.1396, "step": 8339500 }, { "epoch": 24.14, "learning_rate": 3.7933932417098925e-05, "loss": 2.1374, "step": 8340000 }, { "epoch": 24.14, "learning_rate": 3.793320876945165e-05, "loss": 2.1499, "step": 8340500 }, { "epoch": 24.14, "learning_rate": 3.793248656909967e-05, "loss": 2.1372, "step": 8341000 }, { "epoch": 24.15, "learning_rate": 3.793176292145239e-05, "loss": 2.1557, "step": 8341500 }, { "epoch": 24.15, "learning_rate": 3.7931039273805114e-05, "loss": 2.1511, "step": 8342000 }, { "epoch": 24.15, "learning_rate": 3.793031562615784e-05, "loss": 2.1651, "step": 8342500 }, { "epoch": 24.15, "learning_rate": 3.792959342580585e-05, "loss": 2.1475, "step": 8343000 }, { "epoch": 24.15, "learning_rate": 3.7928869778158575e-05, "loss": 2.1299, "step": 8343500 }, { "epoch": 24.15, "learning_rate": 3.7928146130511304e-05, "loss": 2.1563, "step": 8344000 }, { "epoch": 24.15, "learning_rate": 3.7927422482864026e-05, "loss": 2.1483, "step": 8344500 }, { "epoch": 24.16, "learning_rate": 3.792669883521675e-05, "loss": 2.154, "step": 8345000 }, { "epoch": 24.16, "learning_rate": 3.792597518756947e-05, "loss": 2.1513, "step": 8345500 }, { "epoch": 24.16, "learning_rate": 3.79252515399222e-05, "loss": 2.131, "step": 8346000 }, { "epoch": 24.16, "learning_rate": 3.792452789227492e-05, "loss": 2.136, "step": 8346500 }, { "epoch": 24.16, "learning_rate": 3.792380569192294e-05, "loss": 2.1469, "step": 8347000 }, { "epoch": 24.16, "learning_rate": 3.792308204427566e-05, "loss": 2.1459, "step": 8347500 }, { "epoch": 24.16, "learning_rate": 3.792235839662838e-05, "loss": 2.1374, "step": 8348000 }, { "epoch": 24.17, "learning_rate": 3.7921634748981104e-05, "loss": 2.1338, "step": 8348500 }, { "epoch": 24.17, "learning_rate": 3.7920911101333826e-05, "loss": 2.1399, "step": 8349000 }, { "epoch": 24.17, "learning_rate": 3.792018745368655e-05, "loss": 2.1486, "step": 8349500 }, { "epoch": 24.17, "learning_rate": 3.791946380603927e-05, "loss": 2.1324, "step": 8350000 }, { "epoch": 24.17, "learning_rate": 3.7918740158392e-05, "loss": 2.1359, "step": 8350500 }, { "epoch": 24.17, "learning_rate": 3.791801651074472e-05, "loss": 2.1622, "step": 8351000 }, { "epoch": 24.17, "learning_rate": 3.7917294310392744e-05, "loss": 2.1389, "step": 8351500 }, { "epoch": 24.18, "learning_rate": 3.7916570662745466e-05, "loss": 2.147, "step": 8352000 }, { "epoch": 24.18, "learning_rate": 3.791584701509819e-05, "loss": 2.1553, "step": 8352500 }, { "epoch": 24.18, "learning_rate": 3.791512336745091e-05, "loss": 2.1577, "step": 8353000 }, { "epoch": 24.18, "learning_rate": 3.791439971980363e-05, "loss": 2.151, "step": 8353500 }, { "epoch": 24.18, "learning_rate": 3.7913676072156355e-05, "loss": 2.1455, "step": 8354000 }, { "epoch": 24.18, "learning_rate": 3.791295242450908e-05, "loss": 2.1295, "step": 8354500 }, { "epoch": 24.18, "learning_rate": 3.79122287768618e-05, "loss": 2.1686, "step": 8355000 }, { "epoch": 24.19, "learning_rate": 3.791150512921452e-05, "loss": 2.1363, "step": 8355500 }, { "epoch": 24.19, "learning_rate": 3.791078148156725e-05, "loss": 2.1717, "step": 8356000 }, { "epoch": 24.19, "learning_rate": 3.791005783391997e-05, "loss": 2.1328, "step": 8356500 }, { "epoch": 24.19, "learning_rate": 3.790933708086328e-05, "loss": 2.1363, "step": 8357000 }, { "epoch": 24.19, "learning_rate": 3.7908613433216004e-05, "loss": 2.1395, "step": 8357500 }, { "epoch": 24.19, "learning_rate": 3.7907889785568726e-05, "loss": 2.1482, "step": 8358000 }, { "epoch": 24.19, "learning_rate": 3.790716613792145e-05, "loss": 2.1789, "step": 8358500 }, { "epoch": 24.2, "learning_rate": 3.790644249027418e-05, "loss": 2.1602, "step": 8359000 }, { "epoch": 24.2, "learning_rate": 3.79057188426269e-05, "loss": 2.1632, "step": 8359500 }, { "epoch": 24.2, "learning_rate": 3.790499519497963e-05, "loss": 2.1464, "step": 8360000 }, { "epoch": 24.2, "learning_rate": 3.790427154733235e-05, "loss": 2.144, "step": 8360500 }, { "epoch": 24.2, "learning_rate": 3.7903547899685073e-05, "loss": 2.1492, "step": 8361000 }, { "epoch": 24.2, "learning_rate": 3.790282569933309e-05, "loss": 2.1499, "step": 8361500 }, { "epoch": 24.2, "learning_rate": 3.790210205168581e-05, "loss": 2.152, "step": 8362000 }, { "epoch": 24.21, "learning_rate": 3.7901378404038533e-05, "loss": 2.1433, "step": 8362500 }, { "epoch": 24.21, "learning_rate": 3.7900654756391256e-05, "loss": 2.1359, "step": 8363000 }, { "epoch": 24.21, "learning_rate": 3.789993110874398e-05, "loss": 2.1379, "step": 8363500 }, { "epoch": 24.21, "learning_rate": 3.7899208908392e-05, "loss": 2.1399, "step": 8364000 }, { "epoch": 24.21, "learning_rate": 3.789848526074472e-05, "loss": 2.1406, "step": 8364500 }, { "epoch": 24.21, "learning_rate": 3.7897761613097445e-05, "loss": 2.1426, "step": 8365000 }, { "epoch": 24.21, "learning_rate": 3.7897040860040754e-05, "loss": 2.125, "step": 8365500 }, { "epoch": 24.22, "learning_rate": 3.7896317212393476e-05, "loss": 2.1415, "step": 8366000 }, { "epoch": 24.22, "learning_rate": 3.7895593564746205e-05, "loss": 2.143, "step": 8366500 }, { "epoch": 24.22, "learning_rate": 3.789486991709893e-05, "loss": 2.1408, "step": 8367000 }, { "epoch": 24.22, "learning_rate": 3.789414626945165e-05, "loss": 2.1394, "step": 8367500 }, { "epoch": 24.22, "learning_rate": 3.789342262180438e-05, "loss": 2.1476, "step": 8368000 }, { "epoch": 24.22, "learning_rate": 3.78926989741571e-05, "loss": 2.1441, "step": 8368500 }, { "epoch": 24.22, "learning_rate": 3.789197532650982e-05, "loss": 2.153, "step": 8369000 }, { "epoch": 24.23, "learning_rate": 3.7891251678862545e-05, "loss": 2.1368, "step": 8369500 }, { "epoch": 24.23, "learning_rate": 3.789052803121527e-05, "loss": 2.1512, "step": 8370000 }, { "epoch": 24.23, "learning_rate": 3.788980438356799e-05, "loss": 2.1435, "step": 8370500 }, { "epoch": 24.23, "learning_rate": 3.788908073592071e-05, "loss": 2.1428, "step": 8371000 }, { "epoch": 24.23, "learning_rate": 3.7888357088273434e-05, "loss": 2.1456, "step": 8371500 }, { "epoch": 24.23, "learning_rate": 3.7887633440626156e-05, "loss": 2.1354, "step": 8372000 }, { "epoch": 24.23, "learning_rate": 3.788690979297888e-05, "loss": 2.1351, "step": 8372500 }, { "epoch": 24.24, "learning_rate": 3.78861861453316e-05, "loss": 2.1369, "step": 8373000 }, { "epoch": 24.24, "learning_rate": 3.788546249768433e-05, "loss": 2.1479, "step": 8373500 }, { "epoch": 24.24, "learning_rate": 3.788474029733235e-05, "loss": 2.1492, "step": 8374000 }, { "epoch": 24.24, "learning_rate": 3.7884016649685074e-05, "loss": 2.1351, "step": 8374500 }, { "epoch": 24.24, "learning_rate": 3.7883293002037796e-05, "loss": 2.1281, "step": 8375000 }, { "epoch": 24.24, "learning_rate": 3.788256935439052e-05, "loss": 2.1383, "step": 8375500 }, { "epoch": 24.25, "learning_rate": 3.788184570674324e-05, "loss": 2.1312, "step": 8376000 }, { "epoch": 24.25, "learning_rate": 3.7881123506391256e-05, "loss": 2.1453, "step": 8376500 }, { "epoch": 24.25, "learning_rate": 3.788039985874398e-05, "loss": 2.1364, "step": 8377000 }, { "epoch": 24.25, "learning_rate": 3.78796762110967e-05, "loss": 2.1765, "step": 8377500 }, { "epoch": 24.25, "learning_rate": 3.787895256344943e-05, "loss": 2.1426, "step": 8378000 }, { "epoch": 24.25, "learning_rate": 3.7878230363097445e-05, "loss": 2.1264, "step": 8378500 }, { "epoch": 24.25, "learning_rate": 3.787750816274546e-05, "loss": 2.1494, "step": 8379000 }, { "epoch": 24.26, "learning_rate": 3.7876785962393477e-05, "loss": 2.145, "step": 8379500 }, { "epoch": 24.26, "learning_rate": 3.7876062314746206e-05, "loss": 2.13, "step": 8380000 }, { "epoch": 24.26, "learning_rate": 3.787533866709893e-05, "loss": 2.1509, "step": 8380500 }, { "epoch": 24.26, "learning_rate": 3.787461501945165e-05, "loss": 2.1585, "step": 8381000 }, { "epoch": 24.26, "learning_rate": 3.787389137180438e-05, "loss": 2.146, "step": 8381500 }, { "epoch": 24.26, "learning_rate": 3.78731677241571e-05, "loss": 2.1405, "step": 8382000 }, { "epoch": 24.26, "learning_rate": 3.787244552380512e-05, "loss": 2.1364, "step": 8382500 }, { "epoch": 24.27, "learning_rate": 3.787172187615784e-05, "loss": 2.1505, "step": 8383000 }, { "epoch": 24.27, "learning_rate": 3.787099822851056e-05, "loss": 2.1586, "step": 8383500 }, { "epoch": 24.27, "learning_rate": 3.7870274580863284e-05, "loss": 2.1387, "step": 8384000 }, { "epoch": 24.27, "learning_rate": 3.7869550933216006e-05, "loss": 2.1475, "step": 8384500 }, { "epoch": 24.27, "learning_rate": 3.786882728556873e-05, "loss": 2.1466, "step": 8385000 }, { "epoch": 24.27, "learning_rate": 3.786810363792146e-05, "loss": 2.1278, "step": 8385500 }, { "epoch": 24.27, "learning_rate": 3.786737999027418e-05, "loss": 2.1429, "step": 8386000 }, { "epoch": 24.28, "learning_rate": 3.78666563426269e-05, "loss": 2.1506, "step": 8386500 }, { "epoch": 24.28, "learning_rate": 3.7865932694979624e-05, "loss": 2.1569, "step": 8387000 }, { "epoch": 24.28, "learning_rate": 3.7865209047332346e-05, "loss": 2.1404, "step": 8387500 }, { "epoch": 24.28, "learning_rate": 3.786448539968507e-05, "loss": 2.1653, "step": 8388000 }, { "epoch": 24.28, "learning_rate": 3.786376464662838e-05, "loss": 2.1593, "step": 8388500 }, { "epoch": 24.28, "learning_rate": 3.7863040998981106e-05, "loss": 2.1451, "step": 8389000 }, { "epoch": 24.28, "learning_rate": 3.786231735133383e-05, "loss": 2.1534, "step": 8389500 }, { "epoch": 24.29, "learning_rate": 3.786159370368656e-05, "loss": 2.1465, "step": 8390000 }, { "epoch": 24.29, "learning_rate": 3.786087005603928e-05, "loss": 2.1331, "step": 8390500 }, { "epoch": 24.29, "learning_rate": 3.7860146408392e-05, "loss": 2.1543, "step": 8391000 }, { "epoch": 24.29, "learning_rate": 3.7859422760744724e-05, "loss": 2.1694, "step": 8391500 }, { "epoch": 24.29, "learning_rate": 3.7858699113097446e-05, "loss": 2.1353, "step": 8392000 }, { "epoch": 24.29, "learning_rate": 3.785797546545017e-05, "loss": 2.1492, "step": 8392500 }, { "epoch": 24.29, "learning_rate": 3.785725181780289e-05, "loss": 2.1494, "step": 8393000 }, { "epoch": 24.3, "learning_rate": 3.785652817015561e-05, "loss": 2.1425, "step": 8393500 }, { "epoch": 24.3, "learning_rate": 3.7855804522508335e-05, "loss": 2.1467, "step": 8394000 }, { "epoch": 24.3, "learning_rate": 3.785508232215636e-05, "loss": 2.1427, "step": 8394500 }, { "epoch": 24.3, "learning_rate": 3.785435867450908e-05, "loss": 2.1347, "step": 8395000 }, { "epoch": 24.3, "learning_rate": 3.78536350268618e-05, "loss": 2.1628, "step": 8395500 }, { "epoch": 24.3, "learning_rate": 3.785291282650982e-05, "loss": 2.1373, "step": 8396000 }, { "epoch": 24.3, "learning_rate": 3.7852189178862547e-05, "loss": 2.1618, "step": 8396500 }, { "epoch": 24.31, "learning_rate": 3.785146697851056e-05, "loss": 2.1584, "step": 8397000 }, { "epoch": 24.31, "learning_rate": 3.7850743330863284e-05, "loss": 2.1398, "step": 8397500 }, { "epoch": 24.31, "learning_rate": 3.7850019683216007e-05, "loss": 2.1548, "step": 8398000 }, { "epoch": 24.31, "learning_rate": 3.784929603556873e-05, "loss": 2.1873, "step": 8398500 }, { "epoch": 24.31, "learning_rate": 3.784857238792146e-05, "loss": 2.141, "step": 8399000 }, { "epoch": 24.31, "learning_rate": 3.784784874027418e-05, "loss": 2.1438, "step": 8399500 }, { "epoch": 24.31, "learning_rate": 3.78471250926269e-05, "loss": 2.156, "step": 8400000 }, { "epoch": 24.32, "learning_rate": 3.7846401444979625e-05, "loss": 2.1435, "step": 8400500 }, { "epoch": 24.32, "learning_rate": 3.784567779733235e-05, "loss": 2.1652, "step": 8401000 }, { "epoch": 24.32, "learning_rate": 3.784495414968507e-05, "loss": 2.1485, "step": 8401500 }, { "epoch": 24.32, "learning_rate": 3.7844231949333085e-05, "loss": 2.1684, "step": 8402000 }, { "epoch": 24.32, "learning_rate": 3.784350830168581e-05, "loss": 2.1509, "step": 8402500 }, { "epoch": 24.32, "learning_rate": 3.784278465403853e-05, "loss": 2.141, "step": 8403000 }, { "epoch": 24.32, "learning_rate": 3.784206100639126e-05, "loss": 2.1532, "step": 8403500 }, { "epoch": 24.33, "learning_rate": 3.784133735874398e-05, "loss": 2.1508, "step": 8404000 }, { "epoch": 24.33, "learning_rate": 3.784061371109671e-05, "loss": 2.1503, "step": 8404500 }, { "epoch": 24.33, "learning_rate": 3.783989006344943e-05, "loss": 2.1654, "step": 8405000 }, { "epoch": 24.33, "learning_rate": 3.7839166415802154e-05, "loss": 2.1598, "step": 8405500 }, { "epoch": 24.33, "learning_rate": 3.7838442768154876e-05, "loss": 2.1418, "step": 8406000 }, { "epoch": 24.33, "learning_rate": 3.78377191205076e-05, "loss": 2.1587, "step": 8406500 }, { "epoch": 24.33, "learning_rate": 3.783699547286032e-05, "loss": 2.1497, "step": 8407000 }, { "epoch": 24.34, "learning_rate": 3.7836273272508336e-05, "loss": 2.1368, "step": 8407500 }, { "epoch": 24.34, "learning_rate": 3.783554962486106e-05, "loss": 2.1602, "step": 8408000 }, { "epoch": 24.34, "learning_rate": 3.783482742450908e-05, "loss": 2.1556, "step": 8408500 }, { "epoch": 24.34, "learning_rate": 3.78341037768618e-05, "loss": 2.1496, "step": 8409000 }, { "epoch": 24.34, "learning_rate": 3.783338157650982e-05, "loss": 2.1522, "step": 8409500 }, { "epoch": 24.34, "learning_rate": 3.783265792886254e-05, "loss": 2.1293, "step": 8410000 }, { "epoch": 24.34, "learning_rate": 3.783193428121526e-05, "loss": 2.1526, "step": 8410500 }, { "epoch": 24.35, "learning_rate": 3.7831210633567985e-05, "loss": 2.1673, "step": 8411000 }, { "epoch": 24.35, "learning_rate": 3.7830486985920714e-05, "loss": 2.1437, "step": 8411500 }, { "epoch": 24.35, "learning_rate": 3.7829763338273436e-05, "loss": 2.15, "step": 8412000 }, { "epoch": 24.35, "learning_rate": 3.782903969062616e-05, "loss": 2.1523, "step": 8412500 }, { "epoch": 24.35, "learning_rate": 3.782831749027418e-05, "loss": 2.1173, "step": 8413000 }, { "epoch": 24.35, "learning_rate": 3.78275938426269e-05, "loss": 2.1669, "step": 8413500 }, { "epoch": 24.36, "learning_rate": 3.782687164227492e-05, "loss": 2.1469, "step": 8414000 }, { "epoch": 24.36, "learning_rate": 3.782614799462764e-05, "loss": 2.1393, "step": 8414500 }, { "epoch": 24.36, "learning_rate": 3.782542434698036e-05, "loss": 2.1285, "step": 8415000 }, { "epoch": 24.36, "learning_rate": 3.7824700699333085e-05, "loss": 2.1708, "step": 8415500 }, { "epoch": 24.36, "learning_rate": 3.782397705168581e-05, "loss": 2.1573, "step": 8416000 }, { "epoch": 24.36, "learning_rate": 3.782325485133383e-05, "loss": 2.1815, "step": 8416500 }, { "epoch": 24.36, "learning_rate": 3.782253120368655e-05, "loss": 2.1518, "step": 8417000 }, { "epoch": 24.37, "learning_rate": 3.7821807556039274e-05, "loss": 2.1402, "step": 8417500 }, { "epoch": 24.37, "learning_rate": 3.7821083908392e-05, "loss": 2.1504, "step": 8418000 }, { "epoch": 24.37, "learning_rate": 3.782036026074472e-05, "loss": 2.1609, "step": 8418500 }, { "epoch": 24.37, "learning_rate": 3.781963661309745e-05, "loss": 2.1662, "step": 8419000 }, { "epoch": 24.37, "learning_rate": 3.781891296545017e-05, "loss": 2.1435, "step": 8419500 }, { "epoch": 24.37, "learning_rate": 3.781818931780289e-05, "loss": 2.1426, "step": 8420000 }, { "epoch": 24.37, "learning_rate": 3.7817465670155615e-05, "loss": 2.1394, "step": 8420500 }, { "epoch": 24.38, "learning_rate": 3.781674202250834e-05, "loss": 2.1541, "step": 8421000 }, { "epoch": 24.38, "learning_rate": 3.781601837486106e-05, "loss": 2.1589, "step": 8421500 }, { "epoch": 24.38, "learning_rate": 3.781529472721379e-05, "loss": 2.1702, "step": 8422000 }, { "epoch": 24.38, "learning_rate": 3.781457107956651e-05, "loss": 2.1515, "step": 8422500 }, { "epoch": 24.38, "learning_rate": 3.781384743191923e-05, "loss": 2.1466, "step": 8423000 }, { "epoch": 24.38, "learning_rate": 3.7813123784271955e-05, "loss": 2.1413, "step": 8423500 }, { "epoch": 24.38, "learning_rate": 3.781240013662468e-05, "loss": 2.1335, "step": 8424000 }, { "epoch": 24.39, "learning_rate": 3.78116764889774e-05, "loss": 2.1279, "step": 8424500 }, { "epoch": 24.39, "learning_rate": 3.781095284133012e-05, "loss": 2.1441, "step": 8425000 }, { "epoch": 24.39, "learning_rate": 3.7810229193682844e-05, "loss": 2.1484, "step": 8425500 }, { "epoch": 24.39, "learning_rate": 3.7809506993330866e-05, "loss": 2.152, "step": 8426000 }, { "epoch": 24.39, "learning_rate": 3.780878334568359e-05, "loss": 2.1665, "step": 8426500 }, { "epoch": 24.39, "learning_rate": 3.780805969803631e-05, "loss": 2.1696, "step": 8427000 }, { "epoch": 24.39, "learning_rate": 3.780733749768433e-05, "loss": 2.1409, "step": 8427500 }, { "epoch": 24.4, "learning_rate": 3.780661529733235e-05, "loss": 2.1578, "step": 8428000 }, { "epoch": 24.4, "learning_rate": 3.780589164968507e-05, "loss": 2.163, "step": 8428500 }, { "epoch": 24.4, "learning_rate": 3.780516800203779e-05, "loss": 2.1503, "step": 8429000 }, { "epoch": 24.4, "learning_rate": 3.7804444354390515e-05, "loss": 2.1647, "step": 8429500 }, { "epoch": 24.4, "learning_rate": 3.780372070674324e-05, "loss": 2.1648, "step": 8430000 }, { "epoch": 24.4, "learning_rate": 3.780299705909596e-05, "loss": 2.1327, "step": 8430500 }, { "epoch": 24.4, "learning_rate": 3.780227341144869e-05, "loss": 2.1431, "step": 8431000 }, { "epoch": 24.41, "learning_rate": 3.780154976380141e-05, "loss": 2.1654, "step": 8431500 }, { "epoch": 24.41, "learning_rate": 3.780082611615413e-05, "loss": 2.153, "step": 8432000 }, { "epoch": 24.41, "learning_rate": 3.7800102468506855e-05, "loss": 2.1527, "step": 8432500 }, { "epoch": 24.41, "learning_rate": 3.779937882085958e-05, "loss": 2.147, "step": 8433000 }, { "epoch": 24.41, "learning_rate": 3.77986566205076e-05, "loss": 2.1484, "step": 8433500 }, { "epoch": 24.41, "learning_rate": 3.779793297286032e-05, "loss": 2.1595, "step": 8434000 }, { "epoch": 24.41, "learning_rate": 3.7797209325213044e-05, "loss": 2.1551, "step": 8434500 }, { "epoch": 24.42, "learning_rate": 3.7796485677565767e-05, "loss": 2.1628, "step": 8435000 }, { "epoch": 24.42, "learning_rate": 3.779576202991849e-05, "loss": 2.1486, "step": 8435500 }, { "epoch": 24.42, "learning_rate": 3.779503838227121e-05, "loss": 2.1471, "step": 8436000 }, { "epoch": 24.42, "learning_rate": 3.779431473462394e-05, "loss": 2.1442, "step": 8436500 }, { "epoch": 24.42, "learning_rate": 3.779359108697666e-05, "loss": 2.1657, "step": 8437000 }, { "epoch": 24.42, "learning_rate": 3.7792867439329384e-05, "loss": 2.1852, "step": 8437500 }, { "epoch": 24.42, "learning_rate": 3.779214379168211e-05, "loss": 2.1666, "step": 8438000 }, { "epoch": 24.43, "learning_rate": 3.779142014403483e-05, "loss": 2.1416, "step": 8438500 }, { "epoch": 24.43, "learning_rate": 3.779069649638755e-05, "loss": 2.157, "step": 8439000 }, { "epoch": 24.43, "learning_rate": 3.7789972848740273e-05, "loss": 2.1641, "step": 8439500 }, { "epoch": 24.43, "learning_rate": 3.778925209568359e-05, "loss": 2.1303, "step": 8440000 }, { "epoch": 24.43, "learning_rate": 3.778852844803631e-05, "loss": 2.1406, "step": 8440500 }, { "epoch": 24.43, "learning_rate": 3.778780480038904e-05, "loss": 2.1593, "step": 8441000 }, { "epoch": 24.43, "learning_rate": 3.778708115274176e-05, "loss": 2.1471, "step": 8441500 }, { "epoch": 24.44, "learning_rate": 3.7786357505094485e-05, "loss": 2.1387, "step": 8442000 }, { "epoch": 24.44, "learning_rate": 3.778563385744721e-05, "loss": 2.1386, "step": 8442500 }, { "epoch": 24.44, "learning_rate": 3.778491165709522e-05, "loss": 2.148, "step": 8443000 }, { "epoch": 24.44, "learning_rate": 3.7784188009447945e-05, "loss": 2.1408, "step": 8443500 }, { "epoch": 24.44, "learning_rate": 3.778346436180067e-05, "loss": 2.1449, "step": 8444000 }, { "epoch": 24.44, "learning_rate": 3.778274071415339e-05, "loss": 2.1464, "step": 8444500 }, { "epoch": 24.44, "learning_rate": 3.778201851380141e-05, "loss": 2.1662, "step": 8445000 }, { "epoch": 24.45, "learning_rate": 3.7781294866154134e-05, "loss": 2.1528, "step": 8445500 }, { "epoch": 24.45, "learning_rate": 3.7780571218506856e-05, "loss": 2.1194, "step": 8446000 }, { "epoch": 24.45, "learning_rate": 3.777984901815487e-05, "loss": 2.1515, "step": 8446500 }, { "epoch": 24.45, "learning_rate": 3.7779125370507594e-05, "loss": 2.1565, "step": 8447000 }, { "epoch": 24.45, "learning_rate": 3.7778401722860316e-05, "loss": 2.1509, "step": 8447500 }, { "epoch": 24.45, "learning_rate": 3.777767807521304e-05, "loss": 2.157, "step": 8448000 }, { "epoch": 24.45, "learning_rate": 3.777695442756577e-05, "loss": 2.1513, "step": 8448500 }, { "epoch": 24.46, "learning_rate": 3.777623077991849e-05, "loss": 2.1479, "step": 8449000 }, { "epoch": 24.46, "learning_rate": 3.777550713227121e-05, "loss": 2.159, "step": 8449500 }, { "epoch": 24.46, "learning_rate": 3.777478348462394e-05, "loss": 2.1536, "step": 8450000 }, { "epoch": 24.46, "learning_rate": 3.7774061284271956e-05, "loss": 2.1424, "step": 8450500 }, { "epoch": 24.46, "learning_rate": 3.777333763662468e-05, "loss": 2.1505, "step": 8451000 }, { "epoch": 24.46, "learning_rate": 3.77726139889774e-05, "loss": 2.1396, "step": 8451500 }, { "epoch": 24.47, "learning_rate": 3.7771891788625416e-05, "loss": 2.1605, "step": 8452000 }, { "epoch": 24.47, "learning_rate": 3.777116814097814e-05, "loss": 2.1354, "step": 8452500 }, { "epoch": 24.47, "learning_rate": 3.777044449333087e-05, "loss": 2.1648, "step": 8453000 }, { "epoch": 24.47, "learning_rate": 3.776972084568359e-05, "loss": 2.1515, "step": 8453500 }, { "epoch": 24.47, "learning_rate": 3.776899719803631e-05, "loss": 2.1708, "step": 8454000 }, { "epoch": 24.47, "learning_rate": 3.7768273550389034e-05, "loss": 2.1387, "step": 8454500 }, { "epoch": 24.47, "learning_rate": 3.7767549902741757e-05, "loss": 2.1645, "step": 8455000 }, { "epoch": 24.48, "learning_rate": 3.7766826255094486e-05, "loss": 2.1645, "step": 8455500 }, { "epoch": 24.48, "learning_rate": 3.776610260744721e-05, "loss": 2.1446, "step": 8456000 }, { "epoch": 24.48, "learning_rate": 3.776537895979993e-05, "loss": 2.1566, "step": 8456500 }, { "epoch": 24.48, "learning_rate": 3.776465531215265e-05, "loss": 2.1508, "step": 8457000 }, { "epoch": 24.48, "learning_rate": 3.7763931664505374e-05, "loss": 2.1704, "step": 8457500 }, { "epoch": 24.48, "learning_rate": 3.77632080168581e-05, "loss": 2.1594, "step": 8458000 }, { "epoch": 24.48, "learning_rate": 3.776248436921082e-05, "loss": 2.1443, "step": 8458500 }, { "epoch": 24.49, "learning_rate": 3.776176072156354e-05, "loss": 2.1675, "step": 8459000 }, { "epoch": 24.49, "learning_rate": 3.7761037073916263e-05, "loss": 2.159, "step": 8459500 }, { "epoch": 24.49, "learning_rate": 3.776031342626899e-05, "loss": 2.1595, "step": 8460000 }, { "epoch": 24.49, "learning_rate": 3.7759589778621715e-05, "loss": 2.1358, "step": 8460500 }, { "epoch": 24.49, "learning_rate": 3.775886613097444e-05, "loss": 2.1686, "step": 8461000 }, { "epoch": 24.49, "learning_rate": 3.775814248332716e-05, "loss": 2.1126, "step": 8461500 }, { "epoch": 24.49, "learning_rate": 3.775741883567988e-05, "loss": 2.126, "step": 8462000 }, { "epoch": 24.5, "learning_rate": 3.775669808262319e-05, "loss": 2.1548, "step": 8462500 }, { "epoch": 24.5, "learning_rate": 3.775597443497592e-05, "loss": 2.1439, "step": 8463000 }, { "epoch": 24.5, "learning_rate": 3.775525078732864e-05, "loss": 2.1254, "step": 8463500 }, { "epoch": 24.5, "learning_rate": 3.775452713968137e-05, "loss": 2.1541, "step": 8464000 }, { "epoch": 24.5, "learning_rate": 3.775380349203409e-05, "loss": 2.1311, "step": 8464500 }, { "epoch": 24.5, "learning_rate": 3.7753079844386815e-05, "loss": 2.1514, "step": 8465000 }, { "epoch": 24.5, "learning_rate": 3.775235764403483e-05, "loss": 2.1432, "step": 8465500 }, { "epoch": 24.51, "learning_rate": 3.775163399638755e-05, "loss": 2.1862, "step": 8466000 }, { "epoch": 24.51, "learning_rate": 3.7750910348740275e-05, "loss": 2.1487, "step": 8466500 }, { "epoch": 24.51, "learning_rate": 3.7750186701093e-05, "loss": 2.1608, "step": 8467000 }, { "epoch": 24.51, "learning_rate": 3.774946305344572e-05, "loss": 2.1695, "step": 8467500 }, { "epoch": 24.51, "learning_rate": 3.774874085309374e-05, "loss": 2.168, "step": 8468000 }, { "epoch": 24.51, "learning_rate": 3.7748017205446464e-05, "loss": 2.1523, "step": 8468500 }, { "epoch": 24.51, "learning_rate": 3.774729500509448e-05, "loss": 2.14, "step": 8469000 }, { "epoch": 24.52, "learning_rate": 3.77465713574472e-05, "loss": 2.1614, "step": 8469500 }, { "epoch": 24.52, "learning_rate": 3.7745847709799924e-05, "loss": 2.1773, "step": 8470000 }, { "epoch": 24.52, "learning_rate": 3.774512406215265e-05, "loss": 2.165, "step": 8470500 }, { "epoch": 24.52, "learning_rate": 3.774440186180067e-05, "loss": 2.1444, "step": 8471000 }, { "epoch": 24.52, "learning_rate": 3.774367821415339e-05, "loss": 2.1514, "step": 8471500 }, { "epoch": 24.52, "learning_rate": 3.774295456650612e-05, "loss": 2.1656, "step": 8472000 }, { "epoch": 24.52, "learning_rate": 3.774223091885884e-05, "loss": 2.1391, "step": 8472500 }, { "epoch": 24.53, "learning_rate": 3.774150871850686e-05, "loss": 2.1551, "step": 8473000 }, { "epoch": 24.53, "learning_rate": 3.774078507085958e-05, "loss": 2.1434, "step": 8473500 }, { "epoch": 24.53, "learning_rate": 3.77400614232123e-05, "loss": 2.1786, "step": 8474000 }, { "epoch": 24.53, "learning_rate": 3.7739337775565024e-05, "loss": 2.1434, "step": 8474500 }, { "epoch": 24.53, "learning_rate": 3.7738614127917747e-05, "loss": 2.1644, "step": 8475000 }, { "epoch": 24.53, "learning_rate": 3.773789048027047e-05, "loss": 2.1711, "step": 8475500 }, { "epoch": 24.53, "learning_rate": 3.773716683262319e-05, "loss": 2.1603, "step": 8476000 }, { "epoch": 24.54, "learning_rate": 3.773644463227121e-05, "loss": 2.1337, "step": 8476500 }, { "epoch": 24.54, "learning_rate": 3.7735720984623936e-05, "loss": 2.1638, "step": 8477000 }, { "epoch": 24.54, "learning_rate": 3.773499733697666e-05, "loss": 2.1511, "step": 8477500 }, { "epoch": 24.54, "learning_rate": 3.773427368932939e-05, "loss": 2.1592, "step": 8478000 }, { "epoch": 24.54, "learning_rate": 3.773355004168211e-05, "loss": 2.156, "step": 8478500 }, { "epoch": 24.54, "learning_rate": 3.773282639403483e-05, "loss": 2.1665, "step": 8479000 }, { "epoch": 24.54, "learning_rate": 3.7732102746387554e-05, "loss": 2.1623, "step": 8479500 }, { "epoch": 24.55, "learning_rate": 3.773138054603557e-05, "loss": 2.1508, "step": 8480000 }, { "epoch": 24.55, "learning_rate": 3.773065689838829e-05, "loss": 2.1446, "step": 8480500 }, { "epoch": 24.55, "learning_rate": 3.772993325074102e-05, "loss": 2.1516, "step": 8481000 }, { "epoch": 24.55, "learning_rate": 3.772920960309374e-05, "loss": 2.174, "step": 8481500 }, { "epoch": 24.55, "learning_rate": 3.772848740274176e-05, "loss": 2.1404, "step": 8482000 }, { "epoch": 24.55, "learning_rate": 3.772776375509448e-05, "loss": 2.1727, "step": 8482500 }, { "epoch": 24.55, "learning_rate": 3.77270401074472e-05, "loss": 2.1517, "step": 8483000 }, { "epoch": 24.56, "learning_rate": 3.7726316459799925e-05, "loss": 2.1576, "step": 8483500 }, { "epoch": 24.56, "learning_rate": 3.772559281215265e-05, "loss": 2.1743, "step": 8484000 }, { "epoch": 24.56, "learning_rate": 3.772486916450537e-05, "loss": 2.1554, "step": 8484500 }, { "epoch": 24.56, "learning_rate": 3.772414696415339e-05, "loss": 2.1434, "step": 8485000 }, { "epoch": 24.56, "learning_rate": 3.772342331650612e-05, "loss": 2.1564, "step": 8485500 }, { "epoch": 24.56, "learning_rate": 3.772269966885884e-05, "loss": 2.1354, "step": 8486000 }, { "epoch": 24.56, "learning_rate": 3.772197746850686e-05, "loss": 2.1493, "step": 8486500 }, { "epoch": 24.57, "learning_rate": 3.772125382085958e-05, "loss": 2.1307, "step": 8487000 }, { "epoch": 24.57, "learning_rate": 3.77205301732123e-05, "loss": 2.1443, "step": 8487500 }, { "epoch": 24.57, "learning_rate": 3.7719806525565025e-05, "loss": 2.142, "step": 8488000 }, { "epoch": 24.57, "learning_rate": 3.771908287791775e-05, "loss": 2.1597, "step": 8488500 }, { "epoch": 24.57, "learning_rate": 3.771835923027047e-05, "loss": 2.1447, "step": 8489000 }, { "epoch": 24.57, "learning_rate": 3.77176355826232e-05, "loss": 2.1432, "step": 8489500 }, { "epoch": 24.58, "learning_rate": 3.771691193497592e-05, "loss": 2.1555, "step": 8490000 }, { "epoch": 24.58, "learning_rate": 3.771618828732864e-05, "loss": 2.1501, "step": 8490500 }, { "epoch": 24.58, "learning_rate": 3.7715464639681365e-05, "loss": 2.1475, "step": 8491000 }, { "epoch": 24.58, "learning_rate": 3.771474099203409e-05, "loss": 2.1381, "step": 8491500 }, { "epoch": 24.58, "learning_rate": 3.771401734438681e-05, "loss": 2.1692, "step": 8492000 }, { "epoch": 24.58, "learning_rate": 3.771329369673953e-05, "loss": 2.1397, "step": 8492500 }, { "epoch": 24.58, "learning_rate": 3.7712571496387554e-05, "loss": 2.1638, "step": 8493000 }, { "epoch": 24.59, "learning_rate": 3.7711847848740277e-05, "loss": 2.1483, "step": 8493500 }, { "epoch": 24.59, "learning_rate": 3.7711124201093e-05, "loss": 2.1694, "step": 8494000 }, { "epoch": 24.59, "learning_rate": 3.771040055344572e-05, "loss": 2.1654, "step": 8494500 }, { "epoch": 24.59, "learning_rate": 3.770967835309374e-05, "loss": 2.151, "step": 8495000 }, { "epoch": 24.59, "learning_rate": 3.7708954705446466e-05, "loss": 2.1478, "step": 8495500 }, { "epoch": 24.59, "learning_rate": 3.770823105779919e-05, "loss": 2.1552, "step": 8496000 }, { "epoch": 24.59, "learning_rate": 3.770750741015191e-05, "loss": 2.1453, "step": 8496500 }, { "epoch": 24.6, "learning_rate": 3.770678376250463e-05, "loss": 2.1508, "step": 8497000 }, { "epoch": 24.6, "learning_rate": 3.770606156215265e-05, "loss": 2.1658, "step": 8497500 }, { "epoch": 24.6, "learning_rate": 3.770533791450537e-05, "loss": 2.1358, "step": 8498000 }, { "epoch": 24.6, "learning_rate": 3.770461571415339e-05, "loss": 2.1402, "step": 8498500 }, { "epoch": 24.6, "learning_rate": 3.770389351380141e-05, "loss": 2.1474, "step": 8499000 }, { "epoch": 24.6, "learning_rate": 3.770316986615413e-05, "loss": 2.1489, "step": 8499500 }, { "epoch": 24.6, "learning_rate": 3.770244621850685e-05, "loss": 2.1612, "step": 8500000 }, { "epoch": 24.61, "learning_rate": 3.7701722570859575e-05, "loss": 2.1618, "step": 8500500 }, { "epoch": 24.61, "learning_rate": 3.7700998923212304e-05, "loss": 2.1287, "step": 8501000 }, { "epoch": 24.61, "learning_rate": 3.7700275275565026e-05, "loss": 2.1393, "step": 8501500 }, { "epoch": 24.61, "learning_rate": 3.769955162791775e-05, "loss": 2.1764, "step": 8502000 }, { "epoch": 24.61, "learning_rate": 3.769882798027047e-05, "loss": 2.1439, "step": 8502500 }, { "epoch": 24.61, "learning_rate": 3.76981043326232e-05, "loss": 2.1417, "step": 8503000 }, { "epoch": 24.61, "learning_rate": 3.7697382132271215e-05, "loss": 2.1384, "step": 8503500 }, { "epoch": 24.62, "learning_rate": 3.769665848462394e-05, "loss": 2.1684, "step": 8504000 }, { "epoch": 24.62, "learning_rate": 3.769593483697666e-05, "loss": 2.1463, "step": 8504500 }, { "epoch": 24.62, "learning_rate": 3.769521118932938e-05, "loss": 2.152, "step": 8505000 }, { "epoch": 24.62, "learning_rate": 3.76944889889774e-05, "loss": 2.1421, "step": 8505500 }, { "epoch": 24.62, "learning_rate": 3.769376534133012e-05, "loss": 2.153, "step": 8506000 }, { "epoch": 24.62, "learning_rate": 3.769304169368285e-05, "loss": 2.168, "step": 8506500 }, { "epoch": 24.62, "learning_rate": 3.769231804603557e-05, "loss": 2.1493, "step": 8507000 }, { "epoch": 24.63, "learning_rate": 3.769159439838829e-05, "loss": 2.1357, "step": 8507500 }, { "epoch": 24.63, "learning_rate": 3.769087075074102e-05, "loss": 2.1444, "step": 8508000 }, { "epoch": 24.63, "learning_rate": 3.7690147103093744e-05, "loss": 2.1287, "step": 8508500 }, { "epoch": 24.63, "learning_rate": 3.7689423455446466e-05, "loss": 2.1488, "step": 8509000 }, { "epoch": 24.63, "learning_rate": 3.768869980779919e-05, "loss": 2.1404, "step": 8509500 }, { "epoch": 24.63, "learning_rate": 3.768797616015191e-05, "loss": 2.146, "step": 8510000 }, { "epoch": 24.63, "learning_rate": 3.768725251250463e-05, "loss": 2.147, "step": 8510500 }, { "epoch": 24.64, "learning_rate": 3.768653031215265e-05, "loss": 2.1665, "step": 8511000 }, { "epoch": 24.64, "learning_rate": 3.768580666450537e-05, "loss": 2.1381, "step": 8511500 }, { "epoch": 24.64, "learning_rate": 3.76850830168581e-05, "loss": 2.1553, "step": 8512000 }, { "epoch": 24.64, "learning_rate": 3.768435936921082e-05, "loss": 2.1499, "step": 8512500 }, { "epoch": 24.64, "learning_rate": 3.7683635721563544e-05, "loss": 2.1427, "step": 8513000 }, { "epoch": 24.64, "learning_rate": 3.7682912073916267e-05, "loss": 2.1481, "step": 8513500 }, { "epoch": 24.64, "learning_rate": 3.768218842626899e-05, "loss": 2.1519, "step": 8514000 }, { "epoch": 24.65, "learning_rate": 3.768146477862171e-05, "loss": 2.1449, "step": 8514500 }, { "epoch": 24.65, "learning_rate": 3.768074113097443e-05, "loss": 2.1476, "step": 8515000 }, { "epoch": 24.65, "learning_rate": 3.7680018930622456e-05, "loss": 2.1553, "step": 8515500 }, { "epoch": 24.65, "learning_rate": 3.767929528297518e-05, "loss": 2.1688, "step": 8516000 }, { "epoch": 24.65, "learning_rate": 3.76785716353279e-05, "loss": 2.1618, "step": 8516500 }, { "epoch": 24.65, "learning_rate": 3.767784798768062e-05, "loss": 2.1477, "step": 8517000 }, { "epoch": 24.65, "learning_rate": 3.767712434003335e-05, "loss": 2.1426, "step": 8517500 }, { "epoch": 24.66, "learning_rate": 3.767640213968137e-05, "loss": 2.1434, "step": 8518000 }, { "epoch": 24.66, "learning_rate": 3.767567849203409e-05, "loss": 2.1435, "step": 8518500 }, { "epoch": 24.66, "learning_rate": 3.767495484438681e-05, "loss": 2.1395, "step": 8519000 }, { "epoch": 24.66, "learning_rate": 3.7674231196739534e-05, "loss": 2.1539, "step": 8519500 }, { "epoch": 24.66, "learning_rate": 3.7673507549092256e-05, "loss": 2.1318, "step": 8520000 }, { "epoch": 24.66, "learning_rate": 3.767278534874028e-05, "loss": 2.1463, "step": 8520500 }, { "epoch": 24.66, "learning_rate": 3.7672061701093e-05, "loss": 2.1514, "step": 8521000 }, { "epoch": 24.67, "learning_rate": 3.767133805344572e-05, "loss": 2.135, "step": 8521500 }, { "epoch": 24.67, "learning_rate": 3.7670614405798445e-05, "loss": 2.1493, "step": 8522000 }, { "epoch": 24.67, "learning_rate": 3.766989075815117e-05, "loss": 2.1363, "step": 8522500 }, { "epoch": 24.67, "learning_rate": 3.766916855779919e-05, "loss": 2.1642, "step": 8523000 }, { "epoch": 24.67, "learning_rate": 3.766844491015191e-05, "loss": 2.1185, "step": 8523500 }, { "epoch": 24.67, "learning_rate": 3.7667721262504634e-05, "loss": 2.1613, "step": 8524000 }, { "epoch": 24.67, "learning_rate": 3.7666997614857356e-05, "loss": 2.1347, "step": 8524500 }, { "epoch": 24.68, "learning_rate": 3.766627541450538e-05, "loss": 2.1434, "step": 8525000 }, { "epoch": 24.68, "learning_rate": 3.76655517668581e-05, "loss": 2.1502, "step": 8525500 }, { "epoch": 24.68, "learning_rate": 3.766482811921082e-05, "loss": 2.1379, "step": 8526000 }, { "epoch": 24.68, "learning_rate": 3.7664104471563545e-05, "loss": 2.1669, "step": 8526500 }, { "epoch": 24.68, "learning_rate": 3.766338082391627e-05, "loss": 2.1514, "step": 8527000 }, { "epoch": 24.68, "learning_rate": 3.766265862356428e-05, "loss": 2.1491, "step": 8527500 }, { "epoch": 24.69, "learning_rate": 3.76619364232123e-05, "loss": 2.1686, "step": 8528000 }, { "epoch": 24.69, "learning_rate": 3.766121277556503e-05, "loss": 2.1302, "step": 8528500 }, { "epoch": 24.69, "learning_rate": 3.766048912791775e-05, "loss": 2.1454, "step": 8529000 }, { "epoch": 24.69, "learning_rate": 3.765976548027047e-05, "loss": 2.1506, "step": 8529500 }, { "epoch": 24.69, "learning_rate": 3.7659041832623194e-05, "loss": 2.1558, "step": 8530000 }, { "epoch": 24.69, "learning_rate": 3.765831818497592e-05, "loss": 2.1551, "step": 8530500 }, { "epoch": 24.69, "learning_rate": 3.7657594537328645e-05, "loss": 2.1655, "step": 8531000 }, { "epoch": 24.7, "learning_rate": 3.765687088968137e-05, "loss": 2.137, "step": 8531500 }, { "epoch": 24.7, "learning_rate": 3.765614724203409e-05, "loss": 2.1513, "step": 8532000 }, { "epoch": 24.7, "learning_rate": 3.7655425041682105e-05, "loss": 2.1449, "step": 8532500 }, { "epoch": 24.7, "learning_rate": 3.765470139403483e-05, "loss": 2.1463, "step": 8533000 }, { "epoch": 24.7, "learning_rate": 3.765398064097814e-05, "loss": 2.1611, "step": 8533500 }, { "epoch": 24.7, "learning_rate": 3.7653256993330866e-05, "loss": 2.1431, "step": 8534000 }, { "epoch": 24.7, "learning_rate": 3.765253334568359e-05, "loss": 2.1596, "step": 8534500 }, { "epoch": 24.71, "learning_rate": 3.765180969803631e-05, "loss": 2.1665, "step": 8535000 }, { "epoch": 24.71, "learning_rate": 3.765108605038903e-05, "loss": 2.1813, "step": 8535500 }, { "epoch": 24.71, "learning_rate": 3.7650362402741755e-05, "loss": 2.1393, "step": 8536000 }, { "epoch": 24.71, "learning_rate": 3.764963875509448e-05, "loss": 2.1481, "step": 8536500 }, { "epoch": 24.71, "learning_rate": 3.764891800203779e-05, "loss": 2.1378, "step": 8537000 }, { "epoch": 24.71, "learning_rate": 3.7648194354390515e-05, "loss": 2.1382, "step": 8537500 }, { "epoch": 24.71, "learning_rate": 3.764747070674324e-05, "loss": 2.1477, "step": 8538000 }, { "epoch": 24.72, "learning_rate": 3.7646747059095966e-05, "loss": 2.1378, "step": 8538500 }, { "epoch": 24.72, "learning_rate": 3.764602341144869e-05, "loss": 2.1487, "step": 8539000 }, { "epoch": 24.72, "learning_rate": 3.764529976380141e-05, "loss": 2.1462, "step": 8539500 }, { "epoch": 24.72, "learning_rate": 3.764457611615413e-05, "loss": 2.1527, "step": 8540000 }, { "epoch": 24.72, "learning_rate": 3.7643852468506855e-05, "loss": 2.1642, "step": 8540500 }, { "epoch": 24.72, "learning_rate": 3.764312882085958e-05, "loss": 2.1475, "step": 8541000 }, { "epoch": 24.72, "learning_rate": 3.7642405173212306e-05, "loss": 2.1504, "step": 8541500 }, { "epoch": 24.73, "learning_rate": 3.764168152556503e-05, "loss": 2.1524, "step": 8542000 }, { "epoch": 24.73, "learning_rate": 3.764095787791775e-05, "loss": 2.1634, "step": 8542500 }, { "epoch": 24.73, "learning_rate": 3.764023423027047e-05, "loss": 2.1607, "step": 8543000 }, { "epoch": 24.73, "learning_rate": 3.7639510582623195e-05, "loss": 2.1791, "step": 8543500 }, { "epoch": 24.73, "learning_rate": 3.763878693497592e-05, "loss": 2.1314, "step": 8544000 }, { "epoch": 24.73, "learning_rate": 3.763806328732864e-05, "loss": 2.1775, "step": 8544500 }, { "epoch": 24.73, "learning_rate": 3.763733963968136e-05, "loss": 2.1709, "step": 8545000 }, { "epoch": 24.74, "learning_rate": 3.763661599203409e-05, "loss": 2.1546, "step": 8545500 }, { "epoch": 24.74, "learning_rate": 3.7635893791682106e-05, "loss": 2.1463, "step": 8546000 }, { "epoch": 24.74, "learning_rate": 3.763517014403483e-05, "loss": 2.1397, "step": 8546500 }, { "epoch": 24.74, "learning_rate": 3.763444649638756e-05, "loss": 2.1586, "step": 8547000 }, { "epoch": 24.74, "learning_rate": 3.763372284874028e-05, "loss": 2.1403, "step": 8547500 }, { "epoch": 24.74, "learning_rate": 3.7632999201093e-05, "loss": 2.1604, "step": 8548000 }, { "epoch": 24.74, "learning_rate": 3.7632275553445724e-05, "loss": 2.1639, "step": 8548500 }, { "epoch": 24.75, "learning_rate": 3.7631551905798446e-05, "loss": 2.1615, "step": 8549000 }, { "epoch": 24.75, "learning_rate": 3.763082970544646e-05, "loss": 2.1237, "step": 8549500 }, { "epoch": 24.75, "learning_rate": 3.7630106057799184e-05, "loss": 2.1689, "step": 8550000 }, { "epoch": 24.75, "learning_rate": 3.7629382410151906e-05, "loss": 2.1586, "step": 8550500 }, { "epoch": 24.75, "learning_rate": 3.762866020979993e-05, "loss": 2.1449, "step": 8551000 }, { "epoch": 24.75, "learning_rate": 3.7627938009447944e-05, "loss": 2.1342, "step": 8551500 }, { "epoch": 24.75, "learning_rate": 3.7627214361800667e-05, "loss": 2.1597, "step": 8552000 }, { "epoch": 24.76, "learning_rate": 3.762649071415339e-05, "loss": 2.1683, "step": 8552500 }, { "epoch": 24.76, "learning_rate": 3.762576706650612e-05, "loss": 2.1685, "step": 8553000 }, { "epoch": 24.76, "learning_rate": 3.762504341885884e-05, "loss": 2.1642, "step": 8553500 }, { "epoch": 24.76, "learning_rate": 3.762431977121156e-05, "loss": 2.1676, "step": 8554000 }, { "epoch": 24.76, "learning_rate": 3.7623596123564285e-05, "loss": 2.1489, "step": 8554500 }, { "epoch": 24.76, "learning_rate": 3.762287247591701e-05, "loss": 2.167, "step": 8555000 }, { "epoch": 24.76, "learning_rate": 3.762214882826973e-05, "loss": 2.1369, "step": 8555500 }, { "epoch": 24.77, "learning_rate": 3.762142518062246e-05, "loss": 2.133, "step": 8556000 }, { "epoch": 24.77, "learning_rate": 3.7620702980270474e-05, "loss": 2.1603, "step": 8556500 }, { "epoch": 24.77, "learning_rate": 3.7619979332623196e-05, "loss": 2.1641, "step": 8557000 }, { "epoch": 24.77, "learning_rate": 3.761925568497592e-05, "loss": 2.1751, "step": 8557500 }, { "epoch": 24.77, "learning_rate": 3.761853203732864e-05, "loss": 2.1658, "step": 8558000 }, { "epoch": 24.77, "learning_rate": 3.7617809836976656e-05, "loss": 2.1656, "step": 8558500 }, { "epoch": 24.77, "learning_rate": 3.761708618932938e-05, "loss": 2.1233, "step": 8559000 }, { "epoch": 24.78, "learning_rate": 3.761636254168211e-05, "loss": 2.1824, "step": 8559500 }, { "epoch": 24.78, "learning_rate": 3.761563889403483e-05, "loss": 2.1388, "step": 8560000 }, { "epoch": 24.78, "learning_rate": 3.761491524638756e-05, "loss": 2.1483, "step": 8560500 }, { "epoch": 24.78, "learning_rate": 3.761419159874028e-05, "loss": 2.1481, "step": 8561000 }, { "epoch": 24.78, "learning_rate": 3.7613467951093e-05, "loss": 2.1261, "step": 8561500 }, { "epoch": 24.78, "learning_rate": 3.7612744303445725e-05, "loss": 2.162, "step": 8562000 }, { "epoch": 24.78, "learning_rate": 3.761202065579845e-05, "loss": 2.1458, "step": 8562500 }, { "epoch": 24.79, "learning_rate": 3.761129700815117e-05, "loss": 2.1722, "step": 8563000 }, { "epoch": 24.79, "learning_rate": 3.761057336050389e-05, "loss": 2.1785, "step": 8563500 }, { "epoch": 24.79, "learning_rate": 3.7609849712856614e-05, "loss": 2.144, "step": 8564000 }, { "epoch": 24.79, "learning_rate": 3.7609126065209336e-05, "loss": 2.143, "step": 8564500 }, { "epoch": 24.79, "learning_rate": 3.760840386485736e-05, "loss": 2.1617, "step": 8565000 }, { "epoch": 24.79, "learning_rate": 3.760768021721008e-05, "loss": 2.1707, "step": 8565500 }, { "epoch": 24.8, "learning_rate": 3.7606958016858096e-05, "loss": 2.1646, "step": 8566000 }, { "epoch": 24.8, "learning_rate": 3.760623436921082e-05, "loss": 2.1984, "step": 8566500 }, { "epoch": 24.8, "learning_rate": 3.760551072156354e-05, "loss": 2.1484, "step": 8567000 }, { "epoch": 24.8, "learning_rate": 3.760478707391626e-05, "loss": 2.1529, "step": 8567500 }, { "epoch": 24.8, "learning_rate": 3.760406342626899e-05, "loss": 2.1374, "step": 8568000 }, { "epoch": 24.8, "learning_rate": 3.7603339778621714e-05, "loss": 2.1576, "step": 8568500 }, { "epoch": 24.8, "learning_rate": 3.7602616130974436e-05, "loss": 2.1527, "step": 8569000 }, { "epoch": 24.81, "learning_rate": 3.760189248332716e-05, "loss": 2.1506, "step": 8569500 }, { "epoch": 24.81, "learning_rate": 3.760116883567988e-05, "loss": 2.1429, "step": 8570000 }, { "epoch": 24.81, "learning_rate": 3.760044518803261e-05, "loss": 2.1583, "step": 8570500 }, { "epoch": 24.81, "learning_rate": 3.7599722987680626e-05, "loss": 2.1784, "step": 8571000 }, { "epoch": 24.81, "learning_rate": 3.759899934003335e-05, "loss": 2.1536, "step": 8571500 }, { "epoch": 24.81, "learning_rate": 3.759827569238607e-05, "loss": 2.1325, "step": 8572000 }, { "epoch": 24.81, "learning_rate": 3.759755204473879e-05, "loss": 2.164, "step": 8572500 }, { "epoch": 24.82, "learning_rate": 3.7596828397091514e-05, "loss": 2.1706, "step": 8573000 }, { "epoch": 24.82, "learning_rate": 3.759610474944424e-05, "loss": 2.1727, "step": 8573500 }, { "epoch": 24.82, "learning_rate": 3.759538110179696e-05, "loss": 2.1567, "step": 8574000 }, { "epoch": 24.82, "learning_rate": 3.759465745414968e-05, "loss": 2.1439, "step": 8574500 }, { "epoch": 24.82, "learning_rate": 3.759393525379771e-05, "loss": 2.1419, "step": 8575000 }, { "epoch": 24.82, "learning_rate": 3.759321160615043e-05, "loss": 2.1888, "step": 8575500 }, { "epoch": 24.82, "learning_rate": 3.759248940579845e-05, "loss": 2.1594, "step": 8576000 }, { "epoch": 24.83, "learning_rate": 3.7591767205446464e-05, "loss": 2.1384, "step": 8576500 }, { "epoch": 24.83, "learning_rate": 3.7591043557799186e-05, "loss": 2.1556, "step": 8577000 }, { "epoch": 24.83, "learning_rate": 3.759031991015191e-05, "loss": 2.1577, "step": 8577500 }, { "epoch": 24.83, "learning_rate": 3.758959626250464e-05, "loss": 2.1828, "step": 8578000 }, { "epoch": 24.83, "learning_rate": 3.758887406215265e-05, "loss": 2.1814, "step": 8578500 }, { "epoch": 24.83, "learning_rate": 3.7588150414505375e-05, "loss": 2.1376, "step": 8579000 }, { "epoch": 24.83, "learning_rate": 3.75874267668581e-05, "loss": 2.1499, "step": 8579500 }, { "epoch": 24.84, "learning_rate": 3.758670311921082e-05, "loss": 2.1818, "step": 8580000 }, { "epoch": 24.84, "learning_rate": 3.758597947156354e-05, "loss": 2.1828, "step": 8580500 }, { "epoch": 24.84, "learning_rate": 3.7585255823916264e-05, "loss": 2.1494, "step": 8581000 }, { "epoch": 24.84, "learning_rate": 3.7584532176268986e-05, "loss": 2.165, "step": 8581500 }, { "epoch": 24.84, "learning_rate": 3.758380852862171e-05, "loss": 2.1683, "step": 8582000 }, { "epoch": 24.84, "learning_rate": 3.758308488097444e-05, "loss": 2.1592, "step": 8582500 }, { "epoch": 24.84, "learning_rate": 3.758236123332716e-05, "loss": 2.1569, "step": 8583000 }, { "epoch": 24.85, "learning_rate": 3.758163903297518e-05, "loss": 2.1516, "step": 8583500 }, { "epoch": 24.85, "learning_rate": 3.7580915385327904e-05, "loss": 2.1631, "step": 8584000 }, { "epoch": 24.85, "learning_rate": 3.7580191737680626e-05, "loss": 2.1678, "step": 8584500 }, { "epoch": 24.85, "learning_rate": 3.757946809003335e-05, "loss": 2.1427, "step": 8585000 }, { "epoch": 24.85, "learning_rate": 3.757874444238607e-05, "loss": 2.1494, "step": 8585500 }, { "epoch": 24.85, "learning_rate": 3.757802079473879e-05, "loss": 2.1409, "step": 8586000 }, { "epoch": 24.85, "learning_rate": 3.7577297147091515e-05, "loss": 2.1849, "step": 8586500 }, { "epoch": 24.86, "learning_rate": 3.757657494673954e-05, "loss": 2.1366, "step": 8587000 }, { "epoch": 24.86, "learning_rate": 3.757585274638755e-05, "loss": 2.1416, "step": 8587500 }, { "epoch": 24.86, "learning_rate": 3.757513054603557e-05, "loss": 2.1363, "step": 8588000 }, { "epoch": 24.86, "learning_rate": 3.757440689838829e-05, "loss": 2.143, "step": 8588500 }, { "epoch": 24.86, "learning_rate": 3.757368325074101e-05, "loss": 2.1744, "step": 8589000 }, { "epoch": 24.86, "learning_rate": 3.7572959603093735e-05, "loss": 2.1523, "step": 8589500 }, { "epoch": 24.86, "learning_rate": 3.757223595544646e-05, "loss": 2.1579, "step": 8590000 }, { "epoch": 24.87, "learning_rate": 3.757151230779919e-05, "loss": 2.1664, "step": 8590500 }, { "epoch": 24.87, "learning_rate": 3.757078866015191e-05, "loss": 2.1494, "step": 8591000 }, { "epoch": 24.87, "learning_rate": 3.757006501250464e-05, "loss": 2.1543, "step": 8591500 }, { "epoch": 24.87, "learning_rate": 3.756934136485736e-05, "loss": 2.1459, "step": 8592000 }, { "epoch": 24.87, "learning_rate": 3.756861771721008e-05, "loss": 2.1324, "step": 8592500 }, { "epoch": 24.87, "learning_rate": 3.7567894069562805e-05, "loss": 2.1464, "step": 8593000 }, { "epoch": 24.87, "learning_rate": 3.756717042191553e-05, "loss": 2.1465, "step": 8593500 }, { "epoch": 24.88, "learning_rate": 3.756644677426825e-05, "loss": 2.1682, "step": 8594000 }, { "epoch": 24.88, "learning_rate": 3.756572312662097e-05, "loss": 2.156, "step": 8594500 }, { "epoch": 24.88, "learning_rate": 3.7564999478973694e-05, "loss": 2.1662, "step": 8595000 }, { "epoch": 24.88, "learning_rate": 3.7564275831326416e-05, "loss": 2.1568, "step": 8595500 }, { "epoch": 24.88, "learning_rate": 3.756355363097444e-05, "loss": 2.1578, "step": 8596000 }, { "epoch": 24.88, "learning_rate": 3.7562831430622454e-05, "loss": 2.1709, "step": 8596500 }, { "epoch": 24.88, "learning_rate": 3.7562107782975176e-05, "loss": 2.1561, "step": 8597000 }, { "epoch": 24.89, "learning_rate": 3.75613841353279e-05, "loss": 2.1699, "step": 8597500 }, { "epoch": 24.89, "learning_rate": 3.756066048768063e-05, "loss": 2.1833, "step": 8598000 }, { "epoch": 24.89, "learning_rate": 3.755993684003335e-05, "loss": 2.1327, "step": 8598500 }, { "epoch": 24.89, "learning_rate": 3.755921319238607e-05, "loss": 2.1584, "step": 8599000 }, { "epoch": 24.89, "learning_rate": 3.7558489544738794e-05, "loss": 2.1395, "step": 8599500 }, { "epoch": 24.89, "learning_rate": 3.7557767344386816e-05, "loss": 2.1771, "step": 8600000 }, { "epoch": 24.89, "learning_rate": 3.755704369673954e-05, "loss": 2.1429, "step": 8600500 }, { "epoch": 24.9, "learning_rate": 3.7556321496387554e-05, "loss": 2.1833, "step": 8601000 }, { "epoch": 24.9, "learning_rate": 3.755559929603557e-05, "loss": 2.18, "step": 8601500 }, { "epoch": 24.9, "learning_rate": 3.755487564838829e-05, "loss": 2.1448, "step": 8602000 }, { "epoch": 24.9, "learning_rate": 3.7554152000741014e-05, "loss": 2.1571, "step": 8602500 }, { "epoch": 24.9, "learning_rate": 3.7553428353093736e-05, "loss": 2.1701, "step": 8603000 }, { "epoch": 24.9, "learning_rate": 3.7552704705446465e-05, "loss": 2.17, "step": 8603500 }, { "epoch": 24.91, "learning_rate": 3.755198250509448e-05, "loss": 2.1698, "step": 8604000 }, { "epoch": 24.91, "learning_rate": 3.75512588574472e-05, "loss": 2.1715, "step": 8604500 }, { "epoch": 24.91, "learning_rate": 3.7550535209799925e-05, "loss": 2.1627, "step": 8605000 }, { "epoch": 24.91, "learning_rate": 3.7549811562152654e-05, "loss": 2.1445, "step": 8605500 }, { "epoch": 24.91, "learning_rate": 3.7549087914505376e-05, "loss": 2.1485, "step": 8606000 }, { "epoch": 24.91, "learning_rate": 3.75483642668581e-05, "loss": 2.1535, "step": 8606500 }, { "epoch": 24.91, "learning_rate": 3.754764061921082e-05, "loss": 2.165, "step": 8607000 }, { "epoch": 24.92, "learning_rate": 3.754691697156354e-05, "loss": 2.1476, "step": 8607500 }, { "epoch": 24.92, "learning_rate": 3.7546193323916265e-05, "loss": 2.159, "step": 8608000 }, { "epoch": 24.92, "learning_rate": 3.754546967626899e-05, "loss": 2.1687, "step": 8608500 }, { "epoch": 24.92, "learning_rate": 3.754474602862172e-05, "loss": 2.1579, "step": 8609000 }, { "epoch": 24.92, "learning_rate": 3.754402238097444e-05, "loss": 2.1337, "step": 8609500 }, { "epoch": 24.92, "learning_rate": 3.754329873332716e-05, "loss": 2.1646, "step": 8610000 }, { "epoch": 24.92, "learning_rate": 3.754257508567988e-05, "loss": 2.175, "step": 8610500 }, { "epoch": 24.93, "learning_rate": 3.7541851438032606e-05, "loss": 2.1481, "step": 8611000 }, { "epoch": 24.93, "learning_rate": 3.754112779038533e-05, "loss": 2.1402, "step": 8611500 }, { "epoch": 24.93, "learning_rate": 3.754040414273805e-05, "loss": 2.1396, "step": 8612000 }, { "epoch": 24.93, "learning_rate": 3.7539683389681366e-05, "loss": 2.1379, "step": 8612500 }, { "epoch": 24.93, "learning_rate": 3.753895974203409e-05, "loss": 2.127, "step": 8613000 }, { "epoch": 24.93, "learning_rate": 3.753823609438682e-05, "loss": 2.1571, "step": 8613500 }, { "epoch": 24.93, "learning_rate": 3.753751244673954e-05, "loss": 2.16, "step": 8614000 }, { "epoch": 24.94, "learning_rate": 3.753678879909226e-05, "loss": 2.1639, "step": 8614500 }, { "epoch": 24.94, "learning_rate": 3.7536065151444984e-05, "loss": 2.1724, "step": 8615000 }, { "epoch": 24.94, "learning_rate": 3.7535342951093e-05, "loss": 2.1632, "step": 8615500 }, { "epoch": 24.94, "learning_rate": 3.753461930344572e-05, "loss": 2.1744, "step": 8616000 }, { "epoch": 24.94, "learning_rate": 3.753389710309374e-05, "loss": 2.1434, "step": 8616500 }, { "epoch": 24.94, "learning_rate": 3.7533173455446466e-05, "loss": 2.156, "step": 8617000 }, { "epoch": 24.94, "learning_rate": 3.753244980779919e-05, "loss": 2.1484, "step": 8617500 }, { "epoch": 24.95, "learning_rate": 3.753172616015191e-05, "loss": 2.1475, "step": 8618000 }, { "epoch": 24.95, "learning_rate": 3.753100251250463e-05, "loss": 2.1588, "step": 8618500 }, { "epoch": 24.95, "learning_rate": 3.7530278864857355e-05, "loss": 2.1377, "step": 8619000 }, { "epoch": 24.95, "learning_rate": 3.752955521721008e-05, "loss": 2.1535, "step": 8619500 }, { "epoch": 24.95, "learning_rate": 3.75288315695628e-05, "loss": 2.1499, "step": 8620000 }, { "epoch": 24.95, "learning_rate": 3.752810792191553e-05, "loss": 2.1498, "step": 8620500 }, { "epoch": 24.95, "learning_rate": 3.752738427426825e-05, "loss": 2.1387, "step": 8621000 }, { "epoch": 24.96, "learning_rate": 3.752666062662097e-05, "loss": 2.1541, "step": 8621500 }, { "epoch": 24.96, "learning_rate": 3.7525936978973695e-05, "loss": 2.1232, "step": 8622000 }, { "epoch": 24.96, "learning_rate": 3.752521477862172e-05, "loss": 2.1425, "step": 8622500 }, { "epoch": 24.96, "learning_rate": 3.752449113097444e-05, "loss": 2.1524, "step": 8623000 }, { "epoch": 24.96, "learning_rate": 3.7523768930622455e-05, "loss": 2.1705, "step": 8623500 }, { "epoch": 24.96, "learning_rate": 3.752304528297518e-05, "loss": 2.1598, "step": 8624000 }, { "epoch": 24.96, "learning_rate": 3.75223216353279e-05, "loss": 2.1421, "step": 8624500 }, { "epoch": 24.97, "learning_rate": 3.752159798768062e-05, "loss": 2.1577, "step": 8625000 }, { "epoch": 24.97, "learning_rate": 3.7520874340033344e-05, "loss": 2.1655, "step": 8625500 }, { "epoch": 24.97, "learning_rate": 3.7520150692386066e-05, "loss": 2.1316, "step": 8626000 }, { "epoch": 24.97, "learning_rate": 3.751942704473879e-05, "loss": 2.1615, "step": 8626500 }, { "epoch": 24.97, "learning_rate": 3.751870339709152e-05, "loss": 2.1835, "step": 8627000 }, { "epoch": 24.97, "learning_rate": 3.751798119673953e-05, "loss": 2.1476, "step": 8627500 }, { "epoch": 24.97, "learning_rate": 3.751725754909226e-05, "loss": 2.1477, "step": 8628000 }, { "epoch": 24.98, "learning_rate": 3.751653534874028e-05, "loss": 2.1672, "step": 8628500 }, { "epoch": 24.98, "learning_rate": 3.7515811701093e-05, "loss": 2.1265, "step": 8629000 }, { "epoch": 24.98, "learning_rate": 3.751508805344572e-05, "loss": 2.1589, "step": 8629500 }, { "epoch": 24.98, "learning_rate": 3.7514364405798444e-05, "loss": 2.153, "step": 8630000 }, { "epoch": 24.98, "learning_rate": 3.751364075815117e-05, "loss": 2.1568, "step": 8630500 }, { "epoch": 24.98, "learning_rate": 3.751291855779919e-05, "loss": 2.1553, "step": 8631000 }, { "epoch": 24.98, "learning_rate": 3.751219491015191e-05, "loss": 2.1537, "step": 8631500 }, { "epoch": 24.99, "learning_rate": 3.7511471262504633e-05, "loss": 2.1553, "step": 8632000 }, { "epoch": 24.99, "learning_rate": 3.7510747614857356e-05, "loss": 2.1513, "step": 8632500 }, { "epoch": 24.99, "learning_rate": 3.751002396721008e-05, "loss": 2.1253, "step": 8633000 }, { "epoch": 24.99, "learning_rate": 3.75093003195628e-05, "loss": 2.1523, "step": 8633500 }, { "epoch": 24.99, "learning_rate": 3.7508578119210816e-05, "loss": 2.1388, "step": 8634000 }, { "epoch": 24.99, "learning_rate": 3.7507854471563545e-05, "loss": 2.1643, "step": 8634500 }, { "epoch": 24.99, "learning_rate": 3.750713082391627e-05, "loss": 2.1456, "step": 8635000 }, { "epoch": 25.0, "learning_rate": 3.7506407176268996e-05, "loss": 2.1356, "step": 8635500 }, { "epoch": 25.0, "learning_rate": 3.750568352862172e-05, "loss": 2.151, "step": 8636000 }, { "epoch": 25.0, "learning_rate": 3.750495988097444e-05, "loss": 2.1853, "step": 8636500 }, { "epoch": 25.0, "eval_accuracy": 0.6643911972760643, "eval_accuracy_mlm": 0.6288164705108058, "eval_accuracy_nsp": 0.855142007305459, "eval_loss": 2.1985487937927246, "eval_runtime": 331.5793, "eval_samples_per_second": 1316.084, "eval_steps_per_second": 54.838, "step": 8636800 }, { "epoch": 25.0, "learning_rate": 3.750423623332716e-05, "loss": 2.1611, "step": 8637000 }, { "epoch": 25.0, "learning_rate": 3.7503512585679885e-05, "loss": 2.1248, "step": 8637500 }, { "epoch": 25.0, "learning_rate": 3.750278893803261e-05, "loss": 2.116, "step": 8638000 }, { "epoch": 25.0, "learning_rate": 3.750206529038533e-05, "loss": 2.1289, "step": 8638500 }, { "epoch": 25.01, "learning_rate": 3.7501343090033345e-05, "loss": 2.1352, "step": 8639000 }, { "epoch": 25.01, "learning_rate": 3.750061944238607e-05, "loss": 2.1219, "step": 8639500 }, { "epoch": 25.01, "learning_rate": 3.7499895794738796e-05, "loss": 2.1537, "step": 8640000 }, { "epoch": 25.01, "learning_rate": 3.749917214709152e-05, "loss": 2.1422, "step": 8640500 }, { "epoch": 25.01, "learning_rate": 3.7498449946739534e-05, "loss": 2.1252, "step": 8641000 }, { "epoch": 25.01, "learning_rate": 3.7497726299092256e-05, "loss": 2.1215, "step": 8641500 }, { "epoch": 25.02, "learning_rate": 3.749700265144498e-05, "loss": 2.109, "step": 8642000 }, { "epoch": 25.02, "learning_rate": 3.7496280451092994e-05, "loss": 2.1269, "step": 8642500 }, { "epoch": 25.02, "learning_rate": 3.749555680344572e-05, "loss": 2.113, "step": 8643000 }, { "epoch": 25.02, "learning_rate": 3.7494833155798445e-05, "loss": 2.1451, "step": 8643500 }, { "epoch": 25.02, "learning_rate": 3.749410950815117e-05, "loss": 2.1177, "step": 8644000 }, { "epoch": 25.02, "learning_rate": 3.749338730779919e-05, "loss": 2.1566, "step": 8644500 }, { "epoch": 25.02, "learning_rate": 3.749266366015191e-05, "loss": 2.1421, "step": 8645000 }, { "epoch": 25.03, "learning_rate": 3.7491940012504634e-05, "loss": 2.1486, "step": 8645500 }, { "epoch": 25.03, "learning_rate": 3.7491216364857357e-05, "loss": 2.1403, "step": 8646000 }, { "epoch": 25.03, "learning_rate": 3.749049271721008e-05, "loss": 2.1392, "step": 8646500 }, { "epoch": 25.03, "learning_rate": 3.74897690695628e-05, "loss": 2.1235, "step": 8647000 }, { "epoch": 25.03, "learning_rate": 3.748904542191552e-05, "loss": 2.1229, "step": 8647500 }, { "epoch": 25.03, "learning_rate": 3.7488321774268245e-05, "loss": 2.1268, "step": 8648000 }, { "epoch": 25.03, "learning_rate": 3.748759812662097e-05, "loss": 2.1283, "step": 8648500 }, { "epoch": 25.04, "learning_rate": 3.748687592626899e-05, "loss": 2.1199, "step": 8649000 }, { "epoch": 25.04, "learning_rate": 3.748615227862171e-05, "loss": 2.1399, "step": 8649500 }, { "epoch": 25.04, "learning_rate": 3.748543007826973e-05, "loss": 2.1184, "step": 8650000 }, { "epoch": 25.04, "learning_rate": 3.748470643062246e-05, "loss": 2.1636, "step": 8650500 }, { "epoch": 25.04, "learning_rate": 3.748398278297518e-05, "loss": 2.1227, "step": 8651000 }, { "epoch": 25.04, "learning_rate": 3.74832591353279e-05, "loss": 2.1397, "step": 8651500 }, { "epoch": 25.04, "learning_rate": 3.7482535487680624e-05, "loss": 2.1339, "step": 8652000 }, { "epoch": 25.05, "learning_rate": 3.7481811840033346e-05, "loss": 2.1513, "step": 8652500 }, { "epoch": 25.05, "learning_rate": 3.748108819238607e-05, "loss": 2.1268, "step": 8653000 }, { "epoch": 25.05, "learning_rate": 3.748036599203409e-05, "loss": 2.1305, "step": 8653500 }, { "epoch": 25.05, "learning_rate": 3.747964234438681e-05, "loss": 2.1514, "step": 8654000 }, { "epoch": 25.05, "learning_rate": 3.7478918696739535e-05, "loss": 2.1455, "step": 8654500 }, { "epoch": 25.05, "learning_rate": 3.747819504909226e-05, "loss": 2.1103, "step": 8655000 }, { "epoch": 25.05, "learning_rate": 3.747747284874027e-05, "loss": 2.1387, "step": 8655500 }, { "epoch": 25.06, "learning_rate": 3.7476749201092995e-05, "loss": 2.1374, "step": 8656000 }, { "epoch": 25.06, "learning_rate": 3.7476025553445724e-05, "loss": 2.1478, "step": 8656500 }, { "epoch": 25.06, "learning_rate": 3.7475301905798446e-05, "loss": 2.1449, "step": 8657000 }, { "epoch": 25.06, "learning_rate": 3.747457825815117e-05, "loss": 2.165, "step": 8657500 }, { "epoch": 25.06, "learning_rate": 3.74738546105039e-05, "loss": 2.1407, "step": 8658000 }, { "epoch": 25.06, "learning_rate": 3.747313096285662e-05, "loss": 2.1436, "step": 8658500 }, { "epoch": 25.06, "learning_rate": 3.747240731520934e-05, "loss": 2.1387, "step": 8659000 }, { "epoch": 25.07, "learning_rate": 3.7471683667562064e-05, "loss": 2.1306, "step": 8659500 }, { "epoch": 25.07, "learning_rate": 3.7470960019914786e-05, "loss": 2.133, "step": 8660000 }, { "epoch": 25.07, "learning_rate": 3.747023637226751e-05, "loss": 2.146, "step": 8660500 }, { "epoch": 25.07, "learning_rate": 3.746951272462023e-05, "loss": 2.132, "step": 8661000 }, { "epoch": 25.07, "learning_rate": 3.746878907697295e-05, "loss": 2.1471, "step": 8661500 }, { "epoch": 25.07, "learning_rate": 3.7468065429325675e-05, "loss": 2.1235, "step": 8662000 }, { "epoch": 25.07, "learning_rate": 3.74673432289737e-05, "loss": 2.1359, "step": 8662500 }, { "epoch": 25.08, "learning_rate": 3.746661958132642e-05, "loss": 2.1529, "step": 8663000 }, { "epoch": 25.08, "learning_rate": 3.7465897380974435e-05, "loss": 2.1217, "step": 8663500 }, { "epoch": 25.08, "learning_rate": 3.746517518062245e-05, "loss": 2.1262, "step": 8664000 }, { "epoch": 25.08, "learning_rate": 3.746445153297517e-05, "loss": 2.1333, "step": 8664500 }, { "epoch": 25.08, "learning_rate": 3.7463727885327895e-05, "loss": 2.1118, "step": 8665000 }, { "epoch": 25.08, "learning_rate": 3.7463004237680624e-05, "loss": 2.1291, "step": 8665500 }, { "epoch": 25.08, "learning_rate": 3.7462280590033347e-05, "loss": 2.1271, "step": 8666000 }, { "epoch": 25.09, "learning_rate": 3.7461556942386076e-05, "loss": 2.1451, "step": 8666500 }, { "epoch": 25.09, "learning_rate": 3.74608332947388e-05, "loss": 2.1524, "step": 8667000 }, { "epoch": 25.09, "learning_rate": 3.746010964709152e-05, "loss": 2.1012, "step": 8667500 }, { "epoch": 25.09, "learning_rate": 3.745938599944424e-05, "loss": 2.1293, "step": 8668000 }, { "epoch": 25.09, "learning_rate": 3.7458662351796964e-05, "loss": 2.1278, "step": 8668500 }, { "epoch": 25.09, "learning_rate": 3.745793870414969e-05, "loss": 2.1313, "step": 8669000 }, { "epoch": 25.09, "learning_rate": 3.74572165037977e-05, "loss": 2.1278, "step": 8669500 }, { "epoch": 25.1, "learning_rate": 3.7456492856150425e-05, "loss": 2.1348, "step": 8670000 }, { "epoch": 25.1, "learning_rate": 3.745576920850315e-05, "loss": 2.1306, "step": 8670500 }, { "epoch": 25.1, "learning_rate": 3.7455045560855876e-05, "loss": 2.1573, "step": 8671000 }, { "epoch": 25.1, "learning_rate": 3.74543219132086e-05, "loss": 2.1213, "step": 8671500 }, { "epoch": 25.1, "learning_rate": 3.745359826556132e-05, "loss": 2.1283, "step": 8672000 }, { "epoch": 25.1, "learning_rate": 3.745287461791405e-05, "loss": 2.1506, "step": 8672500 }, { "epoch": 25.1, "learning_rate": 3.745215097026677e-05, "loss": 2.155, "step": 8673000 }, { "epoch": 25.11, "learning_rate": 3.7451427322619494e-05, "loss": 2.1521, "step": 8673500 }, { "epoch": 25.11, "learning_rate": 3.745070512226751e-05, "loss": 2.1436, "step": 8674000 }, { "epoch": 25.11, "learning_rate": 3.744998147462023e-05, "loss": 2.1416, "step": 8674500 }, { "epoch": 25.11, "learning_rate": 3.7449257826972954e-05, "loss": 2.1242, "step": 8675000 }, { "epoch": 25.11, "learning_rate": 3.7448534179325676e-05, "loss": 2.1427, "step": 8675500 }, { "epoch": 25.11, "learning_rate": 3.74478105316784e-05, "loss": 2.164, "step": 8676000 }, { "epoch": 25.11, "learning_rate": 3.744708833132642e-05, "loss": 2.1568, "step": 8676500 }, { "epoch": 25.12, "learning_rate": 3.744636468367914e-05, "loss": 2.1389, "step": 8677000 }, { "epoch": 25.12, "learning_rate": 3.744564248332716e-05, "loss": 2.1179, "step": 8677500 }, { "epoch": 25.12, "learning_rate": 3.744491883567988e-05, "loss": 2.1256, "step": 8678000 }, { "epoch": 25.12, "learning_rate": 3.74441951880326e-05, "loss": 2.1189, "step": 8678500 }, { "epoch": 25.12, "learning_rate": 3.7443471540385325e-05, "loss": 2.1361, "step": 8679000 }, { "epoch": 25.12, "learning_rate": 3.744274789273805e-05, "loss": 2.1518, "step": 8679500 }, { "epoch": 25.13, "learning_rate": 3.7442024245090776e-05, "loss": 2.157, "step": 8680000 }, { "epoch": 25.13, "learning_rate": 3.74413005974435e-05, "loss": 2.1375, "step": 8680500 }, { "epoch": 25.13, "learning_rate": 3.7440579844386814e-05, "loss": 2.1262, "step": 8681000 }, { "epoch": 25.13, "learning_rate": 3.7439856196739536e-05, "loss": 2.1367, "step": 8681500 }, { "epoch": 25.13, "learning_rate": 3.743913254909226e-05, "loss": 2.1424, "step": 8682000 }, { "epoch": 25.13, "learning_rate": 3.743840890144498e-05, "loss": 2.1308, "step": 8682500 }, { "epoch": 25.13, "learning_rate": 3.74376852537977e-05, "loss": 2.1413, "step": 8683000 }, { "epoch": 25.14, "learning_rate": 3.7436961606150425e-05, "loss": 2.1377, "step": 8683500 }, { "epoch": 25.14, "learning_rate": 3.743623795850315e-05, "loss": 2.1568, "step": 8684000 }, { "epoch": 25.14, "learning_rate": 3.7435514310855877e-05, "loss": 2.1175, "step": 8684500 }, { "epoch": 25.14, "learning_rate": 3.74347906632086e-05, "loss": 2.1596, "step": 8685000 }, { "epoch": 25.14, "learning_rate": 3.743406701556132e-05, "loss": 2.1602, "step": 8685500 }, { "epoch": 25.14, "learning_rate": 3.743334336791404e-05, "loss": 2.1425, "step": 8686000 }, { "epoch": 25.14, "learning_rate": 3.7432619720266765e-05, "loss": 2.1432, "step": 8686500 }, { "epoch": 25.15, "learning_rate": 3.743189607261949e-05, "loss": 2.1264, "step": 8687000 }, { "epoch": 25.15, "learning_rate": 3.743117242497222e-05, "loss": 2.1438, "step": 8687500 }, { "epoch": 25.15, "learning_rate": 3.743044877732494e-05, "loss": 2.1249, "step": 8688000 }, { "epoch": 25.15, "learning_rate": 3.742972512967766e-05, "loss": 2.1357, "step": 8688500 }, { "epoch": 25.15, "learning_rate": 3.7429001482030383e-05, "loss": 2.1598, "step": 8689000 }, { "epoch": 25.15, "learning_rate": 3.7428277834383106e-05, "loss": 2.1356, "step": 8689500 }, { "epoch": 25.15, "learning_rate": 3.742755418673583e-05, "loss": 2.1433, "step": 8690000 }, { "epoch": 25.16, "learning_rate": 3.742683053908855e-05, "loss": 2.1297, "step": 8690500 }, { "epoch": 25.16, "learning_rate": 3.742610833873657e-05, "loss": 2.1297, "step": 8691000 }, { "epoch": 25.16, "learning_rate": 3.742538613838459e-05, "loss": 2.1294, "step": 8691500 }, { "epoch": 25.16, "learning_rate": 3.742466249073731e-05, "loss": 2.1544, "step": 8692000 }, { "epoch": 25.16, "learning_rate": 3.742393884309003e-05, "loss": 2.1588, "step": 8692500 }, { "epoch": 25.16, "learning_rate": 3.7423215195442755e-05, "loss": 2.1517, "step": 8693000 }, { "epoch": 25.16, "learning_rate": 3.742249299509078e-05, "loss": 2.1401, "step": 8693500 }, { "epoch": 25.17, "learning_rate": 3.742177079473879e-05, "loss": 2.1348, "step": 8694000 }, { "epoch": 25.17, "learning_rate": 3.7421047147091515e-05, "loss": 2.119, "step": 8694500 }, { "epoch": 25.17, "learning_rate": 3.7420323499444244e-05, "loss": 2.1308, "step": 8695000 }, { "epoch": 25.17, "learning_rate": 3.7419599851796966e-05, "loss": 2.1231, "step": 8695500 }, { "epoch": 25.17, "learning_rate": 3.741887620414969e-05, "loss": 2.1295, "step": 8696000 }, { "epoch": 25.17, "learning_rate": 3.741815255650241e-05, "loss": 2.1423, "step": 8696500 }, { "epoch": 25.17, "learning_rate": 3.7417430356150426e-05, "loss": 2.1238, "step": 8697000 }, { "epoch": 25.18, "learning_rate": 3.7416706708503155e-05, "loss": 2.136, "step": 8697500 }, { "epoch": 25.18, "learning_rate": 3.741598306085588e-05, "loss": 2.1424, "step": 8698000 }, { "epoch": 25.18, "learning_rate": 3.74152594132086e-05, "loss": 2.1348, "step": 8698500 }, { "epoch": 25.18, "learning_rate": 3.741453576556132e-05, "loss": 2.1387, "step": 8699000 }, { "epoch": 25.18, "learning_rate": 3.741381356520934e-05, "loss": 2.1476, "step": 8699500 }, { "epoch": 25.18, "learning_rate": 3.741308991756206e-05, "loss": 2.1415, "step": 8700000 }, { "epoch": 25.18, "learning_rate": 3.741236626991478e-05, "loss": 2.1634, "step": 8700500 }, { "epoch": 25.19, "learning_rate": 3.7411642622267504e-05, "loss": 2.1488, "step": 8701000 }, { "epoch": 25.19, "learning_rate": 3.7410918974620226e-05, "loss": 2.1551, "step": 8701500 }, { "epoch": 25.19, "learning_rate": 3.7410195326972955e-05, "loss": 2.1446, "step": 8702000 }, { "epoch": 25.19, "learning_rate": 3.740947167932568e-05, "loss": 2.144, "step": 8702500 }, { "epoch": 25.19, "learning_rate": 3.7408748031678407e-05, "loss": 2.1337, "step": 8703000 }, { "epoch": 25.19, "learning_rate": 3.740802583132642e-05, "loss": 2.1356, "step": 8703500 }, { "epoch": 25.19, "learning_rate": 3.740730363097444e-05, "loss": 2.1515, "step": 8704000 }, { "epoch": 25.2, "learning_rate": 3.740657998332716e-05, "loss": 2.1283, "step": 8704500 }, { "epoch": 25.2, "learning_rate": 3.740585633567988e-05, "loss": 2.1364, "step": 8705000 }, { "epoch": 25.2, "learning_rate": 3.7405132688032604e-05, "loss": 2.1344, "step": 8705500 }, { "epoch": 25.2, "learning_rate": 3.740441048768063e-05, "loss": 2.1311, "step": 8706000 }, { "epoch": 25.2, "learning_rate": 3.740368684003335e-05, "loss": 2.1441, "step": 8706500 }, { "epoch": 25.2, "learning_rate": 3.7402964639681364e-05, "loss": 2.1606, "step": 8707000 }, { "epoch": 25.2, "learning_rate": 3.740224099203409e-05, "loss": 2.1437, "step": 8707500 }, { "epoch": 25.21, "learning_rate": 3.740151734438681e-05, "loss": 2.1245, "step": 8708000 }, { "epoch": 25.21, "learning_rate": 3.740079369673953e-05, "loss": 2.1367, "step": 8708500 }, { "epoch": 25.21, "learning_rate": 3.7400070049092253e-05, "loss": 2.1456, "step": 8709000 }, { "epoch": 25.21, "learning_rate": 3.7399346401444976e-05, "loss": 2.1287, "step": 8709500 }, { "epoch": 25.21, "learning_rate": 3.7398622753797705e-05, "loss": 2.1102, "step": 8710000 }, { "epoch": 25.21, "learning_rate": 3.739789910615043e-05, "loss": 2.1369, "step": 8710500 }, { "epoch": 25.21, "learning_rate": 3.7397175458503156e-05, "loss": 2.1073, "step": 8711000 }, { "epoch": 25.22, "learning_rate": 3.739645181085588e-05, "loss": 2.1512, "step": 8711500 }, { "epoch": 25.22, "learning_rate": 3.7395729610503894e-05, "loss": 2.1504, "step": 8712000 }, { "epoch": 25.22, "learning_rate": 3.7395005962856616e-05, "loss": 2.1513, "step": 8712500 }, { "epoch": 25.22, "learning_rate": 3.739428231520934e-05, "loss": 2.1373, "step": 8713000 }, { "epoch": 25.22, "learning_rate": 3.739355866756206e-05, "loss": 2.1274, "step": 8713500 }, { "epoch": 25.22, "learning_rate": 3.739283501991478e-05, "loss": 2.1575, "step": 8714000 }, { "epoch": 25.22, "learning_rate": 3.7392112819562805e-05, "loss": 2.1315, "step": 8714500 }, { "epoch": 25.23, "learning_rate": 3.739138917191553e-05, "loss": 2.1499, "step": 8715000 }, { "epoch": 25.23, "learning_rate": 3.739066552426825e-05, "loss": 2.1287, "step": 8715500 }, { "epoch": 25.23, "learning_rate": 3.738994187662097e-05, "loss": 2.1483, "step": 8716000 }, { "epoch": 25.23, "learning_rate": 3.7389218228973694e-05, "loss": 2.1349, "step": 8716500 }, { "epoch": 25.23, "learning_rate": 3.7388494581326416e-05, "loss": 2.1438, "step": 8717000 }, { "epoch": 25.23, "learning_rate": 3.7387770933679145e-05, "loss": 2.1411, "step": 8717500 }, { "epoch": 25.24, "learning_rate": 3.738704728603187e-05, "loss": 2.1614, "step": 8718000 }, { "epoch": 25.24, "learning_rate": 3.738632363838459e-05, "loss": 2.135, "step": 8718500 }, { "epoch": 25.24, "learning_rate": 3.738559999073731e-05, "loss": 2.1449, "step": 8719000 }, { "epoch": 25.24, "learning_rate": 3.7384876343090034e-05, "loss": 2.1572, "step": 8719500 }, { "epoch": 25.24, "learning_rate": 3.7384152695442756e-05, "loss": 2.1376, "step": 8720000 }, { "epoch": 25.24, "learning_rate": 3.738343049509078e-05, "loss": 2.1297, "step": 8720500 }, { "epoch": 25.24, "learning_rate": 3.73827068474435e-05, "loss": 2.1333, "step": 8721000 }, { "epoch": 25.25, "learning_rate": 3.738198319979622e-05, "loss": 2.1251, "step": 8721500 }, { "epoch": 25.25, "learning_rate": 3.738126099944424e-05, "loss": 2.1384, "step": 8722000 }, { "epoch": 25.25, "learning_rate": 3.738053735179696e-05, "loss": 2.13, "step": 8722500 }, { "epoch": 25.25, "learning_rate": 3.737981370414968e-05, "loss": 2.157, "step": 8723000 }, { "epoch": 25.25, "learning_rate": 3.7379090056502405e-05, "loss": 2.1382, "step": 8723500 }, { "epoch": 25.25, "learning_rate": 3.737836785615043e-05, "loss": 2.1392, "step": 8724000 }, { "epoch": 25.25, "learning_rate": 3.737764420850315e-05, "loss": 2.12, "step": 8724500 }, { "epoch": 25.26, "learning_rate": 3.737692056085588e-05, "loss": 2.1294, "step": 8725000 }, { "epoch": 25.26, "learning_rate": 3.73761969132086e-05, "loss": 2.1359, "step": 8725500 }, { "epoch": 25.26, "learning_rate": 3.7375473265561323e-05, "loss": 2.1526, "step": 8726000 }, { "epoch": 25.26, "learning_rate": 3.7374749617914046e-05, "loss": 2.1395, "step": 8726500 }, { "epoch": 25.26, "learning_rate": 3.737402597026677e-05, "loss": 2.1501, "step": 8727000 }, { "epoch": 25.26, "learning_rate": 3.7373303769914783e-05, "loss": 2.1576, "step": 8727500 }, { "epoch": 25.26, "learning_rate": 3.7372580122267506e-05, "loss": 2.1446, "step": 8728000 }, { "epoch": 25.27, "learning_rate": 3.7371856474620235e-05, "loss": 2.1384, "step": 8728500 }, { "epoch": 25.27, "learning_rate": 3.737113282697296e-05, "loss": 2.1047, "step": 8729000 }, { "epoch": 25.27, "learning_rate": 3.737041062662097e-05, "loss": 2.1382, "step": 8729500 }, { "epoch": 25.27, "learning_rate": 3.7369686978973695e-05, "loss": 2.1428, "step": 8730000 }, { "epoch": 25.27, "learning_rate": 3.736896333132642e-05, "loss": 2.1387, "step": 8730500 }, { "epoch": 25.27, "learning_rate": 3.736823968367914e-05, "loss": 2.1367, "step": 8731000 }, { "epoch": 25.27, "learning_rate": 3.736751603603186e-05, "loss": 2.122, "step": 8731500 }, { "epoch": 25.28, "learning_rate": 3.7366792388384584e-05, "loss": 2.1418, "step": 8732000 }, { "epoch": 25.28, "learning_rate": 3.736606874073731e-05, "loss": 2.117, "step": 8732500 }, { "epoch": 25.28, "learning_rate": 3.7365346540385335e-05, "loss": 2.1505, "step": 8733000 }, { "epoch": 25.28, "learning_rate": 3.736462289273806e-05, "loss": 2.1168, "step": 8733500 }, { "epoch": 25.28, "learning_rate": 3.736389924509078e-05, "loss": 2.1482, "step": 8734000 }, { "epoch": 25.28, "learning_rate": 3.73631755974435e-05, "loss": 2.1437, "step": 8734500 }, { "epoch": 25.28, "learning_rate": 3.7362451949796224e-05, "loss": 2.1474, "step": 8735000 }, { "epoch": 25.29, "learning_rate": 3.7361728302148946e-05, "loss": 2.1335, "step": 8735500 }, { "epoch": 25.29, "learning_rate": 3.736100465450167e-05, "loss": 2.1469, "step": 8736000 }, { "epoch": 25.29, "learning_rate": 3.736028100685439e-05, "loss": 2.1337, "step": 8736500 }, { "epoch": 25.29, "learning_rate": 3.735955735920711e-05, "loss": 2.142, "step": 8737000 }, { "epoch": 25.29, "learning_rate": 3.7358833711559835e-05, "loss": 2.1372, "step": 8737500 }, { "epoch": 25.29, "learning_rate": 3.735811006391256e-05, "loss": 2.1352, "step": 8738000 }, { "epoch": 25.29, "learning_rate": 3.7357386416265286e-05, "loss": 2.1173, "step": 8738500 }, { "epoch": 25.3, "learning_rate": 3.7356665663208595e-05, "loss": 2.1318, "step": 8739000 }, { "epoch": 25.3, "learning_rate": 3.735594201556132e-05, "loss": 2.1281, "step": 8739500 }, { "epoch": 25.3, "learning_rate": 3.7355218367914046e-05, "loss": 2.1388, "step": 8740000 }, { "epoch": 25.3, "learning_rate": 3.735449472026677e-05, "loss": 2.158, "step": 8740500 }, { "epoch": 25.3, "learning_rate": 3.7353772519914784e-05, "loss": 2.1679, "step": 8741000 }, { "epoch": 25.3, "learning_rate": 3.7353048872267506e-05, "loss": 2.1372, "step": 8741500 }, { "epoch": 25.3, "learning_rate": 3.7352325224620235e-05, "loss": 2.1399, "step": 8742000 }, { "epoch": 25.31, "learning_rate": 3.735160157697296e-05, "loss": 2.1442, "step": 8742500 }, { "epoch": 25.31, "learning_rate": 3.735087792932568e-05, "loss": 2.143, "step": 8743000 }, { "epoch": 25.31, "learning_rate": 3.7350155728973695e-05, "loss": 2.1479, "step": 8743500 }, { "epoch": 25.31, "learning_rate": 3.734943208132642e-05, "loss": 2.1425, "step": 8744000 }, { "epoch": 25.31, "learning_rate": 3.734870843367914e-05, "loss": 2.1431, "step": 8744500 }, { "epoch": 25.31, "learning_rate": 3.734798478603186e-05, "loss": 2.1581, "step": 8745000 }, { "epoch": 25.31, "learning_rate": 3.7347261138384584e-05, "loss": 2.1435, "step": 8745500 }, { "epoch": 25.32, "learning_rate": 3.734653749073731e-05, "loss": 2.1275, "step": 8746000 }, { "epoch": 25.32, "learning_rate": 3.7345813843090036e-05, "loss": 2.1488, "step": 8746500 }, { "epoch": 25.32, "learning_rate": 3.734509019544276e-05, "loss": 2.1501, "step": 8747000 }, { "epoch": 25.32, "learning_rate": 3.734436654779549e-05, "loss": 2.1434, "step": 8747500 }, { "epoch": 25.32, "learning_rate": 3.734364290014821e-05, "loss": 2.1363, "step": 8748000 }, { "epoch": 25.32, "learning_rate": 3.7342920699796225e-05, "loss": 2.1485, "step": 8748500 }, { "epoch": 25.32, "learning_rate": 3.734219849944424e-05, "loss": 2.1238, "step": 8749000 }, { "epoch": 25.33, "learning_rate": 3.734147629909226e-05, "loss": 2.1669, "step": 8749500 }, { "epoch": 25.33, "learning_rate": 3.7340752651444985e-05, "loss": 2.1388, "step": 8750000 }, { "epoch": 25.33, "learning_rate": 3.734002900379771e-05, "loss": 2.1322, "step": 8750500 }, { "epoch": 25.33, "learning_rate": 3.733930535615043e-05, "loss": 2.1292, "step": 8751000 }, { "epoch": 25.33, "learning_rate": 3.733858170850315e-05, "loss": 2.133, "step": 8751500 }, { "epoch": 25.33, "learning_rate": 3.733785950815117e-05, "loss": 2.1313, "step": 8752000 }, { "epoch": 25.33, "learning_rate": 3.733713586050389e-05, "loss": 2.1622, "step": 8752500 }, { "epoch": 25.34, "learning_rate": 3.733641221285661e-05, "loss": 2.1408, "step": 8753000 }, { "epoch": 25.34, "learning_rate": 3.7335688565209334e-05, "loss": 2.1379, "step": 8753500 }, { "epoch": 25.34, "learning_rate": 3.733496491756206e-05, "loss": 2.1283, "step": 8754000 }, { "epoch": 25.34, "learning_rate": 3.733424271721008e-05, "loss": 2.1493, "step": 8754500 }, { "epoch": 25.34, "learning_rate": 3.73335190695628e-05, "loss": 2.1383, "step": 8755000 }, { "epoch": 25.34, "learning_rate": 3.733279542191553e-05, "loss": 2.131, "step": 8755500 }, { "epoch": 25.35, "learning_rate": 3.733207177426825e-05, "loss": 2.1597, "step": 8756000 }, { "epoch": 25.35, "learning_rate": 3.7331348126620974e-05, "loss": 2.1396, "step": 8756500 }, { "epoch": 25.35, "learning_rate": 3.7330624478973696e-05, "loss": 2.1267, "step": 8757000 }, { "epoch": 25.35, "learning_rate": 3.732990227862171e-05, "loss": 2.151, "step": 8757500 }, { "epoch": 25.35, "learning_rate": 3.7329178630974434e-05, "loss": 2.143, "step": 8758000 }, { "epoch": 25.35, "learning_rate": 3.732845498332716e-05, "loss": 2.1423, "step": 8758500 }, { "epoch": 25.35, "learning_rate": 3.732773278297518e-05, "loss": 2.1291, "step": 8759000 }, { "epoch": 25.36, "learning_rate": 3.73270091353279e-05, "loss": 2.1456, "step": 8759500 }, { "epoch": 25.36, "learning_rate": 3.732628548768062e-05, "loss": 2.1502, "step": 8760000 }, { "epoch": 25.36, "learning_rate": 3.7325561840033345e-05, "loss": 2.1525, "step": 8760500 }, { "epoch": 25.36, "learning_rate": 3.732483819238607e-05, "loss": 2.1447, "step": 8761000 }, { "epoch": 25.36, "learning_rate": 3.732411454473879e-05, "loss": 2.1554, "step": 8761500 }, { "epoch": 25.36, "learning_rate": 3.732339089709151e-05, "loss": 2.1539, "step": 8762000 }, { "epoch": 25.36, "learning_rate": 3.732266724944424e-05, "loss": 2.1576, "step": 8762500 }, { "epoch": 25.37, "learning_rate": 3.732194360179696e-05, "loss": 2.1255, "step": 8763000 }, { "epoch": 25.37, "learning_rate": 3.7321219954149686e-05, "loss": 2.1441, "step": 8763500 }, { "epoch": 25.37, "learning_rate": 3.7320496306502415e-05, "loss": 2.1539, "step": 8764000 }, { "epoch": 25.37, "learning_rate": 3.731977410615043e-05, "loss": 2.1425, "step": 8764500 }, { "epoch": 25.37, "learning_rate": 3.731905045850315e-05, "loss": 2.1528, "step": 8765000 }, { "epoch": 25.37, "learning_rate": 3.7318326810855875e-05, "loss": 2.1503, "step": 8765500 }, { "epoch": 25.37, "learning_rate": 3.73176031632086e-05, "loss": 2.1622, "step": 8766000 }, { "epoch": 25.38, "learning_rate": 3.731687951556132e-05, "loss": 2.145, "step": 8766500 }, { "epoch": 25.38, "learning_rate": 3.731615586791404e-05, "loss": 2.1288, "step": 8767000 }, { "epoch": 25.38, "learning_rate": 3.7315432220266763e-05, "loss": 2.1321, "step": 8767500 }, { "epoch": 25.38, "learning_rate": 3.7314708572619486e-05, "loss": 2.1285, "step": 8768000 }, { "epoch": 25.38, "learning_rate": 3.7313984924972215e-05, "loss": 2.1213, "step": 8768500 }, { "epoch": 25.38, "learning_rate": 3.731326127732494e-05, "loss": 2.1506, "step": 8769000 }, { "epoch": 25.38, "learning_rate": 3.731253907697295e-05, "loss": 2.1513, "step": 8769500 }, { "epoch": 25.39, "learning_rate": 3.731181542932568e-05, "loss": 2.1257, "step": 8770000 }, { "epoch": 25.39, "learning_rate": 3.7311091781678404e-05, "loss": 2.1353, "step": 8770500 }, { "epoch": 25.39, "learning_rate": 3.7310368134031126e-05, "loss": 2.1617, "step": 8771000 }, { "epoch": 25.39, "learning_rate": 3.730964593367914e-05, "loss": 2.1375, "step": 8771500 }, { "epoch": 25.39, "learning_rate": 3.7308922286031864e-05, "loss": 2.1609, "step": 8772000 }, { "epoch": 25.39, "learning_rate": 3.7308198638384586e-05, "loss": 2.1546, "step": 8772500 }, { "epoch": 25.39, "learning_rate": 3.730747643803261e-05, "loss": 2.1633, "step": 8773000 }, { "epoch": 25.4, "learning_rate": 3.730675279038533e-05, "loss": 2.1565, "step": 8773500 }, { "epoch": 25.4, "learning_rate": 3.730602914273805e-05, "loss": 2.1306, "step": 8774000 }, { "epoch": 25.4, "learning_rate": 3.7305305495090775e-05, "loss": 2.1513, "step": 8774500 }, { "epoch": 25.4, "learning_rate": 3.73045818474435e-05, "loss": 2.1453, "step": 8775000 }, { "epoch": 25.4, "learning_rate": 3.730385819979622e-05, "loss": 2.1375, "step": 8775500 }, { "epoch": 25.4, "learning_rate": 3.730313455214894e-05, "loss": 2.1445, "step": 8776000 }, { "epoch": 25.4, "learning_rate": 3.7302410904501664e-05, "loss": 2.1442, "step": 8776500 }, { "epoch": 25.41, "learning_rate": 3.7301688704149686e-05, "loss": 2.1414, "step": 8777000 }, { "epoch": 25.41, "learning_rate": 3.7300965056502415e-05, "loss": 2.1414, "step": 8777500 }, { "epoch": 25.41, "learning_rate": 3.730024140885514e-05, "loss": 2.1443, "step": 8778000 }, { "epoch": 25.41, "learning_rate": 3.729951776120786e-05, "loss": 2.1391, "step": 8778500 }, { "epoch": 25.41, "learning_rate": 3.729879411356058e-05, "loss": 2.1352, "step": 8779000 }, { "epoch": 25.41, "learning_rate": 3.7298070465913304e-05, "loss": 2.1166, "step": 8779500 }, { "epoch": 25.41, "learning_rate": 3.7297346818266026e-05, "loss": 2.146, "step": 8780000 }, { "epoch": 25.42, "learning_rate": 3.729662317061875e-05, "loss": 2.1499, "step": 8780500 }, { "epoch": 25.42, "learning_rate": 3.729589952297147e-05, "loss": 2.1289, "step": 8781000 }, { "epoch": 25.42, "learning_rate": 3.729517732261949e-05, "loss": 2.1581, "step": 8781500 }, { "epoch": 25.42, "learning_rate": 3.7294453674972216e-05, "loss": 2.1548, "step": 8782000 }, { "epoch": 25.42, "learning_rate": 3.729373002732494e-05, "loss": 2.152, "step": 8782500 }, { "epoch": 25.42, "learning_rate": 3.729300637967766e-05, "loss": 2.1315, "step": 8783000 }, { "epoch": 25.42, "learning_rate": 3.7292284179325676e-05, "loss": 2.1271, "step": 8783500 }, { "epoch": 25.43, "learning_rate": 3.72915605316784e-05, "loss": 2.1267, "step": 8784000 }, { "epoch": 25.43, "learning_rate": 3.729083688403112e-05, "loss": 2.1561, "step": 8784500 }, { "epoch": 25.43, "learning_rate": 3.729011323638385e-05, "loss": 2.1385, "step": 8785000 }, { "epoch": 25.43, "learning_rate": 3.7289391036031865e-05, "loss": 2.1532, "step": 8785500 }, { "epoch": 25.43, "learning_rate": 3.7288667388384594e-05, "loss": 2.1355, "step": 8786000 }, { "epoch": 25.43, "learning_rate": 3.728794518803261e-05, "loss": 2.1468, "step": 8786500 }, { "epoch": 25.43, "learning_rate": 3.728722154038533e-05, "loss": 2.1671, "step": 8787000 }, { "epoch": 25.44, "learning_rate": 3.7286497892738054e-05, "loss": 2.1503, "step": 8787500 }, { "epoch": 25.44, "learning_rate": 3.728577569238607e-05, "loss": 2.1667, "step": 8788000 }, { "epoch": 25.44, "learning_rate": 3.728505204473879e-05, "loss": 2.1319, "step": 8788500 }, { "epoch": 25.44, "learning_rate": 3.7284328397091514e-05, "loss": 2.1254, "step": 8789000 }, { "epoch": 25.44, "learning_rate": 3.728360474944424e-05, "loss": 2.1396, "step": 8789500 }, { "epoch": 25.44, "learning_rate": 3.7282881101796965e-05, "loss": 2.1369, "step": 8790000 }, { "epoch": 25.44, "learning_rate": 3.728215745414969e-05, "loss": 2.1298, "step": 8790500 }, { "epoch": 25.45, "learning_rate": 3.728143380650241e-05, "loss": 2.1316, "step": 8791000 }, { "epoch": 25.45, "learning_rate": 3.728071015885513e-05, "loss": 2.1597, "step": 8791500 }, { "epoch": 25.45, "learning_rate": 3.7279986511207854e-05, "loss": 2.1239, "step": 8792000 }, { "epoch": 25.45, "learning_rate": 3.727926286356058e-05, "loss": 2.1307, "step": 8792500 }, { "epoch": 25.45, "learning_rate": 3.7278539215913305e-05, "loss": 2.1287, "step": 8793000 }, { "epoch": 25.45, "learning_rate": 3.727781556826603e-05, "loss": 2.1383, "step": 8793500 }, { "epoch": 25.46, "learning_rate": 3.727709192061875e-05, "loss": 2.1386, "step": 8794000 }, { "epoch": 25.46, "learning_rate": 3.727636827297147e-05, "loss": 2.1388, "step": 8794500 }, { "epoch": 25.46, "learning_rate": 3.7275644625324194e-05, "loss": 2.1361, "step": 8795000 }, { "epoch": 25.46, "learning_rate": 3.7274920977676916e-05, "loss": 2.1362, "step": 8795500 }, { "epoch": 25.46, "learning_rate": 3.7274197330029645e-05, "loss": 2.1527, "step": 8796000 }, { "epoch": 25.46, "learning_rate": 3.727347368238237e-05, "loss": 2.1298, "step": 8796500 }, { "epoch": 25.46, "learning_rate": 3.727275003473509e-05, "loss": 2.1528, "step": 8797000 }, { "epoch": 25.47, "learning_rate": 3.727202638708781e-05, "loss": 2.1531, "step": 8797500 }, { "epoch": 25.47, "learning_rate": 3.727130418673583e-05, "loss": 2.1447, "step": 8798000 }, { "epoch": 25.47, "learning_rate": 3.727058343367914e-05, "loss": 2.1628, "step": 8798500 }, { "epoch": 25.47, "learning_rate": 3.7269859786031865e-05, "loss": 2.1406, "step": 8799000 }, { "epoch": 25.47, "learning_rate": 3.726913613838459e-05, "loss": 2.1397, "step": 8799500 }, { "epoch": 25.47, "learning_rate": 3.726841249073732e-05, "loss": 2.1713, "step": 8800000 }, { "epoch": 25.47, "learning_rate": 3.726769029038533e-05, "loss": 2.126, "step": 8800500 }, { "epoch": 25.48, "learning_rate": 3.7266966642738054e-05, "loss": 2.133, "step": 8801000 }, { "epoch": 25.48, "learning_rate": 3.726624299509078e-05, "loss": 2.1465, "step": 8801500 }, { "epoch": 25.48, "learning_rate": 3.72655193474435e-05, "loss": 2.1265, "step": 8802000 }, { "epoch": 25.48, "learning_rate": 3.726479714709152e-05, "loss": 2.1425, "step": 8802500 }, { "epoch": 25.48, "learning_rate": 3.7264073499444243e-05, "loss": 2.1311, "step": 8803000 }, { "epoch": 25.48, "learning_rate": 3.7263349851796966e-05, "loss": 2.1497, "step": 8803500 }, { "epoch": 25.48, "learning_rate": 3.726262620414969e-05, "loss": 2.1495, "step": 8804000 }, { "epoch": 25.49, "learning_rate": 3.726190255650241e-05, "loss": 2.149, "step": 8804500 }, { "epoch": 25.49, "learning_rate": 3.726117890885513e-05, "loss": 2.1636, "step": 8805000 }, { "epoch": 25.49, "learning_rate": 3.7260455261207855e-05, "loss": 2.1421, "step": 8805500 }, { "epoch": 25.49, "learning_rate": 3.725973161356058e-05, "loss": 2.1386, "step": 8806000 }, { "epoch": 25.49, "learning_rate": 3.72590079659133e-05, "loss": 2.1569, "step": 8806500 }, { "epoch": 25.49, "learning_rate": 3.725828431826602e-05, "loss": 2.1697, "step": 8807000 }, { "epoch": 25.49, "learning_rate": 3.725756067061875e-05, "loss": 2.1843, "step": 8807500 }, { "epoch": 25.5, "learning_rate": 3.725683702297147e-05, "loss": 2.1585, "step": 8808000 }, { "epoch": 25.5, "learning_rate": 3.7256113375324195e-05, "loss": 2.1302, "step": 8808500 }, { "epoch": 25.5, "learning_rate": 3.725538972767692e-05, "loss": 2.122, "step": 8809000 }, { "epoch": 25.5, "learning_rate": 3.7254666080029646e-05, "loss": 2.1358, "step": 8809500 }, { "epoch": 25.5, "learning_rate": 3.725394243238237e-05, "loss": 2.1437, "step": 8810000 }, { "epoch": 25.5, "learning_rate": 3.725321878473509e-05, "loss": 2.138, "step": 8810500 }, { "epoch": 25.5, "learning_rate": 3.7252496584383106e-05, "loss": 2.152, "step": 8811000 }, { "epoch": 25.51, "learning_rate": 3.725177438403112e-05, "loss": 2.1411, "step": 8811500 }, { "epoch": 25.51, "learning_rate": 3.7251050736383844e-05, "loss": 2.1417, "step": 8812000 }, { "epoch": 25.51, "learning_rate": 3.725032708873657e-05, "loss": 2.1396, "step": 8812500 }, { "epoch": 25.51, "learning_rate": 3.7249603441089295e-05, "loss": 2.127, "step": 8813000 }, { "epoch": 25.51, "learning_rate": 3.724887979344202e-05, "loss": 2.1191, "step": 8813500 }, { "epoch": 25.51, "learning_rate": 3.724815759309003e-05, "loss": 2.1389, "step": 8814000 }, { "epoch": 25.51, "learning_rate": 3.7247433945442755e-05, "loss": 2.1538, "step": 8814500 }, { "epoch": 25.52, "learning_rate": 3.7246710297795484e-05, "loss": 2.1251, "step": 8815000 }, { "epoch": 25.52, "learning_rate": 3.7245986650148206e-05, "loss": 2.1593, "step": 8815500 }, { "epoch": 25.52, "learning_rate": 3.724526589709152e-05, "loss": 2.1379, "step": 8816000 }, { "epoch": 25.52, "learning_rate": 3.724454369673954e-05, "loss": 2.1711, "step": 8816500 }, { "epoch": 25.52, "learning_rate": 3.724382004909226e-05, "loss": 2.1667, "step": 8817000 }, { "epoch": 25.52, "learning_rate": 3.724309640144498e-05, "loss": 2.111, "step": 8817500 }, { "epoch": 25.52, "learning_rate": 3.7242372753797704e-05, "loss": 2.1671, "step": 8818000 }, { "epoch": 25.53, "learning_rate": 3.7241649106150426e-05, "loss": 2.136, "step": 8818500 }, { "epoch": 25.53, "learning_rate": 3.724092545850315e-05, "loss": 2.1636, "step": 8819000 }, { "epoch": 25.53, "learning_rate": 3.724020181085587e-05, "loss": 2.1358, "step": 8819500 }, { "epoch": 25.53, "learning_rate": 3.723947816320859e-05, "loss": 2.1514, "step": 8820000 }, { "epoch": 25.53, "learning_rate": 3.723875451556132e-05, "loss": 2.1552, "step": 8820500 }, { "epoch": 25.53, "learning_rate": 3.7238030867914044e-05, "loss": 2.1541, "step": 8821000 }, { "epoch": 25.53, "learning_rate": 3.723730866756206e-05, "loss": 2.1513, "step": 8821500 }, { "epoch": 25.54, "learning_rate": 3.723658501991478e-05, "loss": 2.1404, "step": 8822000 }, { "epoch": 25.54, "learning_rate": 3.723586137226751e-05, "loss": 2.1584, "step": 8822500 }, { "epoch": 25.54, "learning_rate": 3.723513917191553e-05, "loss": 2.1296, "step": 8823000 }, { "epoch": 25.54, "learning_rate": 3.723441552426825e-05, "loss": 2.1703, "step": 8823500 }, { "epoch": 25.54, "learning_rate": 3.723369187662097e-05, "loss": 2.1471, "step": 8824000 }, { "epoch": 25.54, "learning_rate": 3.72329682289737e-05, "loss": 2.1183, "step": 8824500 }, { "epoch": 25.54, "learning_rate": 3.7232246028621716e-05, "loss": 2.1427, "step": 8825000 }, { "epoch": 25.55, "learning_rate": 3.723152238097444e-05, "loss": 2.1526, "step": 8825500 }, { "epoch": 25.55, "learning_rate": 3.723079873332716e-05, "loss": 2.1572, "step": 8826000 }, { "epoch": 25.55, "learning_rate": 3.723007508567988e-05, "loss": 2.1235, "step": 8826500 }, { "epoch": 25.55, "learning_rate": 3.7229351438032605e-05, "loss": 2.1413, "step": 8827000 }, { "epoch": 25.55, "learning_rate": 3.722862779038533e-05, "loss": 2.1589, "step": 8827500 }, { "epoch": 25.55, "learning_rate": 3.722790414273805e-05, "loss": 2.1673, "step": 8828000 }, { "epoch": 25.55, "learning_rate": 3.722718049509077e-05, "loss": 2.1736, "step": 8828500 }, { "epoch": 25.56, "learning_rate": 3.72264568474435e-05, "loss": 2.1289, "step": 8829000 }, { "epoch": 25.56, "learning_rate": 3.7225734647091516e-05, "loss": 2.1274, "step": 8829500 }, { "epoch": 25.56, "learning_rate": 3.7225010999444245e-05, "loss": 2.1674, "step": 8830000 }, { "epoch": 25.56, "learning_rate": 3.722428735179697e-05, "loss": 2.1582, "step": 8830500 }, { "epoch": 25.56, "learning_rate": 3.722356370414969e-05, "loss": 2.1191, "step": 8831000 }, { "epoch": 25.56, "learning_rate": 3.722284005650241e-05, "loss": 2.1365, "step": 8831500 }, { "epoch": 25.57, "learning_rate": 3.7222116408855134e-05, "loss": 2.1457, "step": 8832000 }, { "epoch": 25.57, "learning_rate": 3.7221392761207856e-05, "loss": 2.1431, "step": 8832500 }, { "epoch": 25.57, "learning_rate": 3.722066911356058e-05, "loss": 2.134, "step": 8833000 }, { "epoch": 25.57, "learning_rate": 3.72199469132086e-05, "loss": 2.1304, "step": 8833500 }, { "epoch": 25.57, "learning_rate": 3.721922326556132e-05, "loss": 2.1316, "step": 8834000 }, { "epoch": 25.57, "learning_rate": 3.7218499617914045e-05, "loss": 2.1543, "step": 8834500 }, { "epoch": 25.57, "learning_rate": 3.721777597026677e-05, "loss": 2.1429, "step": 8835000 }, { "epoch": 25.58, "learning_rate": 3.721705376991478e-05, "loss": 2.1336, "step": 8835500 }, { "epoch": 25.58, "learning_rate": 3.7216330122267505e-05, "loss": 2.1074, "step": 8836000 }, { "epoch": 25.58, "learning_rate": 3.721560647462023e-05, "loss": 2.1454, "step": 8836500 }, { "epoch": 25.58, "learning_rate": 3.721488427426825e-05, "loss": 2.1506, "step": 8837000 }, { "epoch": 25.58, "learning_rate": 3.721416062662097e-05, "loss": 2.1578, "step": 8837500 }, { "epoch": 25.58, "learning_rate": 3.72134369789737e-05, "loss": 2.1417, "step": 8838000 }, { "epoch": 25.58, "learning_rate": 3.721271333132642e-05, "loss": 2.163, "step": 8838500 }, { "epoch": 25.59, "learning_rate": 3.7211989683679146e-05, "loss": 2.1459, "step": 8839000 }, { "epoch": 25.59, "learning_rate": 3.721126603603187e-05, "loss": 2.1443, "step": 8839500 }, { "epoch": 25.59, "learning_rate": 3.721054238838459e-05, "loss": 2.1338, "step": 8840000 }, { "epoch": 25.59, "learning_rate": 3.720981874073731e-05, "loss": 2.1389, "step": 8840500 }, { "epoch": 25.59, "learning_rate": 3.7209095093090034e-05, "loss": 2.158, "step": 8841000 }, { "epoch": 25.59, "learning_rate": 3.720837144544276e-05, "loss": 2.1521, "step": 8841500 }, { "epoch": 25.59, "learning_rate": 3.720764779779548e-05, "loss": 2.133, "step": 8842000 }, { "epoch": 25.6, "learning_rate": 3.72069241501482e-05, "loss": 2.1157, "step": 8842500 }, { "epoch": 25.6, "learning_rate": 3.7206201949796223e-05, "loss": 2.1605, "step": 8843000 }, { "epoch": 25.6, "learning_rate": 3.7205478302148946e-05, "loss": 2.1536, "step": 8843500 }, { "epoch": 25.6, "learning_rate": 3.720475465450167e-05, "loss": 2.1313, "step": 8844000 }, { "epoch": 25.6, "learning_rate": 3.720403100685439e-05, "loss": 2.1387, "step": 8844500 }, { "epoch": 25.6, "learning_rate": 3.720330735920712e-05, "loss": 2.1446, "step": 8845000 }, { "epoch": 25.6, "learning_rate": 3.720258371155984e-05, "loss": 2.134, "step": 8845500 }, { "epoch": 25.61, "learning_rate": 3.7201860063912564e-05, "loss": 2.1326, "step": 8846000 }, { "epoch": 25.61, "learning_rate": 3.7201136416265286e-05, "loss": 2.1532, "step": 8846500 }, { "epoch": 25.61, "learning_rate": 3.72004142159133e-05, "loss": 2.1624, "step": 8847000 }, { "epoch": 25.61, "learning_rate": 3.7199692015561324e-05, "loss": 2.136, "step": 8847500 }, { "epoch": 25.61, "learning_rate": 3.7198968367914046e-05, "loss": 2.1259, "step": 8848000 }, { "epoch": 25.61, "learning_rate": 3.719824472026677e-05, "loss": 2.1583, "step": 8848500 }, { "epoch": 25.61, "learning_rate": 3.719752107261949e-05, "loss": 2.1116, "step": 8849000 }, { "epoch": 25.62, "learning_rate": 3.719679742497221e-05, "loss": 2.1055, "step": 8849500 }, { "epoch": 25.62, "learning_rate": 3.7196073777324935e-05, "loss": 2.1308, "step": 8850000 }, { "epoch": 25.62, "learning_rate": 3.719535012967766e-05, "loss": 2.1281, "step": 8850500 }, { "epoch": 25.62, "learning_rate": 3.719462648203038e-05, "loss": 2.1534, "step": 8851000 }, { "epoch": 25.62, "learning_rate": 3.71939042816784e-05, "loss": 2.151, "step": 8851500 }, { "epoch": 25.62, "learning_rate": 3.719318208132642e-05, "loss": 2.1499, "step": 8852000 }, { "epoch": 25.62, "learning_rate": 3.7192458433679146e-05, "loss": 2.1329, "step": 8852500 }, { "epoch": 25.63, "learning_rate": 3.719173623332716e-05, "loss": 2.1712, "step": 8853000 }, { "epoch": 25.63, "learning_rate": 3.7191012585679884e-05, "loss": 2.1569, "step": 8853500 }, { "epoch": 25.63, "learning_rate": 3.7190288938032606e-05, "loss": 2.1365, "step": 8854000 }, { "epoch": 25.63, "learning_rate": 3.718956673768063e-05, "loss": 2.154, "step": 8854500 }, { "epoch": 25.63, "learning_rate": 3.718884309003335e-05, "loss": 2.1352, "step": 8855000 }, { "epoch": 25.63, "learning_rate": 3.718811944238607e-05, "loss": 2.1265, "step": 8855500 }, { "epoch": 25.63, "learning_rate": 3.7187395794738795e-05, "loss": 2.1489, "step": 8856000 }, { "epoch": 25.64, "learning_rate": 3.718667214709152e-05, "loss": 2.1435, "step": 8856500 }, { "epoch": 25.64, "learning_rate": 3.718594849944424e-05, "loss": 2.157, "step": 8857000 }, { "epoch": 25.64, "learning_rate": 3.718522485179696e-05, "loss": 2.1586, "step": 8857500 }, { "epoch": 25.64, "learning_rate": 3.7184501204149684e-05, "loss": 2.1255, "step": 8858000 }, { "epoch": 25.64, "learning_rate": 3.7183777556502407e-05, "loss": 2.1367, "step": 8858500 }, { "epoch": 25.64, "learning_rate": 3.718305390885513e-05, "loss": 2.1572, "step": 8859000 }, { "epoch": 25.64, "learning_rate": 3.718233026120785e-05, "loss": 2.1621, "step": 8859500 }, { "epoch": 25.65, "learning_rate": 3.718160806085588e-05, "loss": 2.1704, "step": 8860000 }, { "epoch": 25.65, "learning_rate": 3.71808844132086e-05, "loss": 2.1306, "step": 8860500 }, { "epoch": 25.65, "learning_rate": 3.7180160765561325e-05, "loss": 2.1422, "step": 8861000 }, { "epoch": 25.65, "learning_rate": 3.717943856520934e-05, "loss": 2.1629, "step": 8861500 }, { "epoch": 25.65, "learning_rate": 3.717871491756206e-05, "loss": 2.1433, "step": 8862000 }, { "epoch": 25.65, "learning_rate": 3.7177991269914785e-05, "loss": 2.1427, "step": 8862500 }, { "epoch": 25.65, "learning_rate": 3.71772690695628e-05, "loss": 2.1365, "step": 8863000 }, { "epoch": 25.66, "learning_rate": 3.717654542191553e-05, "loss": 2.1534, "step": 8863500 }, { "epoch": 25.66, "learning_rate": 3.717582177426825e-05, "loss": 2.113, "step": 8864000 }, { "epoch": 25.66, "learning_rate": 3.7175098126620974e-05, "loss": 2.1388, "step": 8864500 }, { "epoch": 25.66, "learning_rate": 3.7174374478973696e-05, "loss": 2.162, "step": 8865000 }, { "epoch": 25.66, "learning_rate": 3.717365083132642e-05, "loss": 2.134, "step": 8865500 }, { "epoch": 25.66, "learning_rate": 3.717292718367914e-05, "loss": 2.1473, "step": 8866000 }, { "epoch": 25.66, "learning_rate": 3.717220353603186e-05, "loss": 2.1371, "step": 8866500 }, { "epoch": 25.67, "learning_rate": 3.7171479888384585e-05, "loss": 2.1553, "step": 8867000 }, { "epoch": 25.67, "learning_rate": 3.7170756240737314e-05, "loss": 2.1349, "step": 8867500 }, { "epoch": 25.67, "learning_rate": 3.7170032593090036e-05, "loss": 2.1482, "step": 8868000 }, { "epoch": 25.67, "learning_rate": 3.716931184003335e-05, "loss": 2.1318, "step": 8868500 }, { "epoch": 25.67, "learning_rate": 3.7168588192386074e-05, "loss": 2.1187, "step": 8869000 }, { "epoch": 25.67, "learning_rate": 3.7167864544738796e-05, "loss": 2.133, "step": 8869500 }, { "epoch": 25.68, "learning_rate": 3.716714089709152e-05, "loss": 2.152, "step": 8870000 }, { "epoch": 25.68, "learning_rate": 3.716641724944424e-05, "loss": 2.1549, "step": 8870500 }, { "epoch": 25.68, "learning_rate": 3.716569360179696e-05, "loss": 2.152, "step": 8871000 }, { "epoch": 25.68, "learning_rate": 3.7164969954149685e-05, "loss": 2.1777, "step": 8871500 }, { "epoch": 25.68, "learning_rate": 3.716424630650241e-05, "loss": 2.1192, "step": 8872000 }, { "epoch": 25.68, "learning_rate": 3.716352265885513e-05, "loss": 2.1401, "step": 8872500 }, { "epoch": 25.68, "learning_rate": 3.716279901120785e-05, "loss": 2.1459, "step": 8873000 }, { "epoch": 25.69, "learning_rate": 3.716207536356058e-05, "loss": 2.1714, "step": 8873500 }, { "epoch": 25.69, "learning_rate": 3.71613517159133e-05, "loss": 2.1425, "step": 8874000 }, { "epoch": 25.69, "learning_rate": 3.7160628068266025e-05, "loss": 2.1509, "step": 8874500 }, { "epoch": 25.69, "learning_rate": 3.715990586791405e-05, "loss": 2.1224, "step": 8875000 }, { "epoch": 25.69, "learning_rate": 3.715918222026677e-05, "loss": 2.1457, "step": 8875500 }, { "epoch": 25.69, "learning_rate": 3.715845857261949e-05, "loss": 2.172, "step": 8876000 }, { "epoch": 25.69, "learning_rate": 3.7157734924972214e-05, "loss": 2.1284, "step": 8876500 }, { "epoch": 25.7, "learning_rate": 3.7157011277324937e-05, "loss": 2.1368, "step": 8877000 }, { "epoch": 25.7, "learning_rate": 3.715628762967766e-05, "loss": 2.1268, "step": 8877500 }, { "epoch": 25.7, "learning_rate": 3.715556398203038e-05, "loss": 2.1363, "step": 8878000 }, { "epoch": 25.7, "learning_rate": 3.71548403343831e-05, "loss": 2.1381, "step": 8878500 }, { "epoch": 25.7, "learning_rate": 3.715411668673583e-05, "loss": 2.1409, "step": 8879000 }, { "epoch": 25.7, "learning_rate": 3.7153393039088554e-05, "loss": 2.1436, "step": 8879500 }, { "epoch": 25.7, "learning_rate": 3.715267083873657e-05, "loss": 2.1651, "step": 8880000 }, { "epoch": 25.71, "learning_rate": 3.715194719108929e-05, "loss": 2.165, "step": 8880500 }, { "epoch": 25.71, "learning_rate": 3.715122499073731e-05, "loss": 2.1547, "step": 8881000 }, { "epoch": 25.71, "learning_rate": 3.715050134309003e-05, "loss": 2.1316, "step": 8881500 }, { "epoch": 25.71, "learning_rate": 3.714977769544275e-05, "loss": 2.1693, "step": 8882000 }, { "epoch": 25.71, "learning_rate": 3.714905404779548e-05, "loss": 2.1481, "step": 8882500 }, { "epoch": 25.71, "learning_rate": 3.7148330400148204e-05, "loss": 2.1481, "step": 8883000 }, { "epoch": 25.71, "learning_rate": 3.7147608199796226e-05, "loss": 2.1496, "step": 8883500 }, { "epoch": 25.72, "learning_rate": 3.714688599944424e-05, "loss": 2.1588, "step": 8884000 }, { "epoch": 25.72, "learning_rate": 3.7146162351796964e-05, "loss": 2.1363, "step": 8884500 }, { "epoch": 25.72, "learning_rate": 3.7145438704149686e-05, "loss": 2.1599, "step": 8885000 }, { "epoch": 25.72, "learning_rate": 3.714471505650241e-05, "loss": 2.1307, "step": 8885500 }, { "epoch": 25.72, "learning_rate": 3.714399140885513e-05, "loss": 2.1512, "step": 8886000 }, { "epoch": 25.72, "learning_rate": 3.714326776120786e-05, "loss": 2.1565, "step": 8886500 }, { "epoch": 25.72, "learning_rate": 3.7142545560855875e-05, "loss": 2.1398, "step": 8887000 }, { "epoch": 25.73, "learning_rate": 3.71418219132086e-05, "loss": 2.1457, "step": 8887500 }, { "epoch": 25.73, "learning_rate": 3.714109826556132e-05, "loss": 2.127, "step": 8888000 }, { "epoch": 25.73, "learning_rate": 3.714037461791404e-05, "loss": 2.1493, "step": 8888500 }, { "epoch": 25.73, "learning_rate": 3.7139650970266764e-05, "loss": 2.1459, "step": 8889000 }, { "epoch": 25.73, "learning_rate": 3.7138927322619486e-05, "loss": 2.1224, "step": 8889500 }, { "epoch": 25.73, "learning_rate": 3.7138203674972215e-05, "loss": 2.1247, "step": 8890000 }, { "epoch": 25.73, "learning_rate": 3.713748292191553e-05, "loss": 2.1521, "step": 8890500 }, { "epoch": 25.74, "learning_rate": 3.7136760721563546e-05, "loss": 2.1532, "step": 8891000 }, { "epoch": 25.74, "learning_rate": 3.713603707391627e-05, "loss": 2.159, "step": 8891500 }, { "epoch": 25.74, "learning_rate": 3.713531342626899e-05, "loss": 2.1714, "step": 8892000 }, { "epoch": 25.74, "learning_rate": 3.713458977862171e-05, "loss": 2.1552, "step": 8892500 }, { "epoch": 25.74, "learning_rate": 3.7133866130974435e-05, "loss": 2.1712, "step": 8893000 }, { "epoch": 25.74, "learning_rate": 3.713314248332716e-05, "loss": 2.133, "step": 8893500 }, { "epoch": 25.74, "learning_rate": 3.713241883567988e-05, "loss": 2.1477, "step": 8894000 }, { "epoch": 25.75, "learning_rate": 3.713169518803261e-05, "loss": 2.1126, "step": 8894500 }, { "epoch": 25.75, "learning_rate": 3.713097154038533e-05, "loss": 2.158, "step": 8895000 }, { "epoch": 25.75, "learning_rate": 3.713024789273805e-05, "loss": 2.1387, "step": 8895500 }, { "epoch": 25.75, "learning_rate": 3.7129524245090775e-05, "loss": 2.1262, "step": 8896000 }, { "epoch": 25.75, "learning_rate": 3.71288005974435e-05, "loss": 2.1486, "step": 8896500 }, { "epoch": 25.75, "learning_rate": 3.712807694979622e-05, "loss": 2.127, "step": 8897000 }, { "epoch": 25.75, "learning_rate": 3.712735330214895e-05, "loss": 2.1321, "step": 8897500 }, { "epoch": 25.76, "learning_rate": 3.712662965450167e-05, "loss": 2.1612, "step": 8898000 }, { "epoch": 25.76, "learning_rate": 3.712590745414969e-05, "loss": 2.1608, "step": 8898500 }, { "epoch": 25.76, "learning_rate": 3.712518380650241e-05, "loss": 2.1371, "step": 8899000 }, { "epoch": 25.76, "learning_rate": 3.712446015885513e-05, "loss": 2.1416, "step": 8899500 }, { "epoch": 25.76, "learning_rate": 3.712373651120786e-05, "loss": 2.1373, "step": 8900000 }, { "epoch": 25.76, "learning_rate": 3.712301286356058e-05, "loss": 2.1311, "step": 8900500 }, { "epoch": 25.76, "learning_rate": 3.7122289215913305e-05, "loss": 2.1498, "step": 8901000 }, { "epoch": 25.77, "learning_rate": 3.712156556826603e-05, "loss": 2.1789, "step": 8901500 }, { "epoch": 25.77, "learning_rate": 3.712084192061875e-05, "loss": 2.1502, "step": 8902000 }, { "epoch": 25.77, "learning_rate": 3.712011827297147e-05, "loss": 2.1536, "step": 8902500 }, { "epoch": 25.77, "learning_rate": 3.7119394625324194e-05, "loss": 2.1377, "step": 8903000 }, { "epoch": 25.77, "learning_rate": 3.7118670977676916e-05, "loss": 2.1456, "step": 8903500 }, { "epoch": 25.77, "learning_rate": 3.711794733002964e-05, "loss": 2.1476, "step": 8904000 }, { "epoch": 25.77, "learning_rate": 3.711722368238237e-05, "loss": 2.1421, "step": 8904500 }, { "epoch": 25.78, "learning_rate": 3.711650003473509e-05, "loss": 2.1247, "step": 8905000 }, { "epoch": 25.78, "learning_rate": 3.711577638708781e-05, "loss": 2.1566, "step": 8905500 }, { "epoch": 25.78, "learning_rate": 3.7115052739440534e-05, "loss": 2.1444, "step": 8906000 }, { "epoch": 25.78, "learning_rate": 3.711432909179326e-05, "loss": 2.1558, "step": 8906500 }, { "epoch": 25.78, "learning_rate": 3.711360689144128e-05, "loss": 2.1424, "step": 8907000 }, { "epoch": 25.78, "learning_rate": 3.7112883243794e-05, "loss": 2.1381, "step": 8907500 }, { "epoch": 25.79, "learning_rate": 3.711215959614672e-05, "loss": 2.1233, "step": 8908000 }, { "epoch": 25.79, "learning_rate": 3.711143739579474e-05, "loss": 2.136, "step": 8908500 }, { "epoch": 25.79, "learning_rate": 3.711071374814746e-05, "loss": 2.137, "step": 8909000 }, { "epoch": 25.79, "learning_rate": 3.710999010050018e-05, "loss": 2.1273, "step": 8909500 }, { "epoch": 25.79, "learning_rate": 3.710926645285291e-05, "loss": 2.1615, "step": 8910000 }, { "epoch": 25.79, "learning_rate": 3.7108542805205634e-05, "loss": 2.1267, "step": 8910500 }, { "epoch": 25.79, "learning_rate": 3.7107819157558356e-05, "loss": 2.1354, "step": 8911000 }, { "epoch": 25.8, "learning_rate": 3.710709550991108e-05, "loss": 2.1571, "step": 8911500 }, { "epoch": 25.8, "learning_rate": 3.710637186226381e-05, "loss": 2.1581, "step": 8912000 }, { "epoch": 25.8, "learning_rate": 3.710564821461653e-05, "loss": 2.1469, "step": 8912500 }, { "epoch": 25.8, "learning_rate": 3.710492456696925e-05, "loss": 2.1563, "step": 8913000 }, { "epoch": 25.8, "learning_rate": 3.7104200919321974e-05, "loss": 2.1527, "step": 8913500 }, { "epoch": 25.8, "learning_rate": 3.710347871896999e-05, "loss": 2.1488, "step": 8914000 }, { "epoch": 25.8, "learning_rate": 3.710275651861801e-05, "loss": 2.1524, "step": 8914500 }, { "epoch": 25.81, "learning_rate": 3.710203431826603e-05, "loss": 2.1635, "step": 8915000 }, { "epoch": 25.81, "learning_rate": 3.710131067061875e-05, "loss": 2.1471, "step": 8915500 }, { "epoch": 25.81, "learning_rate": 3.710058702297147e-05, "loss": 2.1429, "step": 8916000 }, { "epoch": 25.81, "learning_rate": 3.709986482261949e-05, "loss": 2.1334, "step": 8916500 }, { "epoch": 25.81, "learning_rate": 3.709914117497221e-05, "loss": 2.1579, "step": 8917000 }, { "epoch": 25.81, "learning_rate": 3.709841752732494e-05, "loss": 2.1411, "step": 8917500 }, { "epoch": 25.81, "learning_rate": 3.7097695326972954e-05, "loss": 2.1445, "step": 8918000 }, { "epoch": 25.82, "learning_rate": 3.709697167932568e-05, "loss": 2.1207, "step": 8918500 }, { "epoch": 25.82, "learning_rate": 3.70962480316784e-05, "loss": 2.1472, "step": 8919000 }, { "epoch": 25.82, "learning_rate": 3.709552438403112e-05, "loss": 2.1468, "step": 8919500 }, { "epoch": 25.82, "learning_rate": 3.709480073638385e-05, "loss": 2.1532, "step": 8920000 }, { "epoch": 25.82, "learning_rate": 3.709407708873657e-05, "loss": 2.1575, "step": 8920500 }, { "epoch": 25.82, "learning_rate": 3.7093353441089295e-05, "loss": 2.1434, "step": 8921000 }, { "epoch": 25.82, "learning_rate": 3.709262979344202e-05, "loss": 2.1485, "step": 8921500 }, { "epoch": 25.83, "learning_rate": 3.709190614579474e-05, "loss": 2.167, "step": 8922000 }, { "epoch": 25.83, "learning_rate": 3.709118394544276e-05, "loss": 2.1712, "step": 8922500 }, { "epoch": 25.83, "learning_rate": 3.7090460297795484e-05, "loss": 2.1275, "step": 8923000 }, { "epoch": 25.83, "learning_rate": 3.7089736650148206e-05, "loss": 2.1571, "step": 8923500 }, { "epoch": 25.83, "learning_rate": 3.708901300250093e-05, "loss": 2.1436, "step": 8924000 }, { "epoch": 25.83, "learning_rate": 3.708828935485365e-05, "loss": 2.157, "step": 8924500 }, { "epoch": 25.83, "learning_rate": 3.708756570720637e-05, "loss": 2.141, "step": 8925000 }, { "epoch": 25.84, "learning_rate": 3.7086842059559095e-05, "loss": 2.1461, "step": 8925500 }, { "epoch": 25.84, "learning_rate": 3.708611841191182e-05, "loss": 2.1437, "step": 8926000 }, { "epoch": 25.84, "learning_rate": 3.708539476426454e-05, "loss": 2.1508, "step": 8926500 }, { "epoch": 25.84, "learning_rate": 3.708467111661727e-05, "loss": 2.1196, "step": 8927000 }, { "epoch": 25.84, "learning_rate": 3.708394891626529e-05, "loss": 2.1668, "step": 8927500 }, { "epoch": 25.84, "learning_rate": 3.708322526861801e-05, "loss": 2.1362, "step": 8928000 }, { "epoch": 25.84, "learning_rate": 3.7082501620970735e-05, "loss": 2.133, "step": 8928500 }, { "epoch": 25.85, "learning_rate": 3.708177797332346e-05, "loss": 2.1493, "step": 8929000 }, { "epoch": 25.85, "learning_rate": 3.708105432567618e-05, "loss": 2.1433, "step": 8929500 }, { "epoch": 25.85, "learning_rate": 3.70803306780289e-05, "loss": 2.1386, "step": 8930000 }, { "epoch": 25.85, "learning_rate": 3.7079607030381624e-05, "loss": 2.128, "step": 8930500 }, { "epoch": 25.85, "learning_rate": 3.7078883382734346e-05, "loss": 2.1333, "step": 8931000 }, { "epoch": 25.85, "learning_rate": 3.707815973508707e-05, "loss": 2.1296, "step": 8931500 }, { "epoch": 25.85, "learning_rate": 3.707743608743979e-05, "loss": 2.1245, "step": 8932000 }, { "epoch": 25.86, "learning_rate": 3.707671243979251e-05, "loss": 2.1415, "step": 8932500 }, { "epoch": 25.86, "learning_rate": 3.7075990239440535e-05, "loss": 2.1393, "step": 8933000 }, { "epoch": 25.86, "learning_rate": 3.707526803908855e-05, "loss": 2.1125, "step": 8933500 }, { "epoch": 25.86, "learning_rate": 3.7074545838736566e-05, "loss": 2.1401, "step": 8934000 }, { "epoch": 25.86, "learning_rate": 3.707382363838459e-05, "loss": 2.1541, "step": 8934500 }, { "epoch": 25.86, "learning_rate": 3.707309999073732e-05, "loss": 2.1542, "step": 8935000 }, { "epoch": 25.86, "learning_rate": 3.707237634309004e-05, "loss": 2.1553, "step": 8935500 }, { "epoch": 25.87, "learning_rate": 3.707165269544276e-05, "loss": 2.1308, "step": 8936000 }, { "epoch": 25.87, "learning_rate": 3.7070929047795484e-05, "loss": 2.1555, "step": 8936500 }, { "epoch": 25.87, "learning_rate": 3.707020540014821e-05, "loss": 2.1467, "step": 8937000 }, { "epoch": 25.87, "learning_rate": 3.706948175250093e-05, "loss": 2.122, "step": 8937500 }, { "epoch": 25.87, "learning_rate": 3.706875810485365e-05, "loss": 2.145, "step": 8938000 }, { "epoch": 25.87, "learning_rate": 3.7068034457206373e-05, "loss": 2.1369, "step": 8938500 }, { "epoch": 25.87, "learning_rate": 3.7067310809559096e-05, "loss": 2.158, "step": 8939000 }, { "epoch": 25.88, "learning_rate": 3.706658716191182e-05, "loss": 2.157, "step": 8939500 }, { "epoch": 25.88, "learning_rate": 3.706586351426454e-05, "loss": 2.1417, "step": 8940000 }, { "epoch": 25.88, "learning_rate": 3.706513986661726e-05, "loss": 2.1469, "step": 8940500 }, { "epoch": 25.88, "learning_rate": 3.7064417666265285e-05, "loss": 2.1528, "step": 8941000 }, { "epoch": 25.88, "learning_rate": 3.706369401861801e-05, "loss": 2.1294, "step": 8941500 }, { "epoch": 25.88, "learning_rate": 3.7062970370970736e-05, "loss": 2.1534, "step": 8942000 }, { "epoch": 25.88, "learning_rate": 3.706224672332346e-05, "loss": 2.1097, "step": 8942500 }, { "epoch": 25.89, "learning_rate": 3.706152307567618e-05, "loss": 2.1597, "step": 8943000 }, { "epoch": 25.89, "learning_rate": 3.70607994280289e-05, "loss": 2.1183, "step": 8943500 }, { "epoch": 25.89, "learning_rate": 3.7060075780381625e-05, "loss": 2.1356, "step": 8944000 }, { "epoch": 25.89, "learning_rate": 3.705935213273435e-05, "loss": 2.1415, "step": 8944500 }, { "epoch": 25.89, "learning_rate": 3.705862848508707e-05, "loss": 2.1458, "step": 8945000 }, { "epoch": 25.89, "learning_rate": 3.705790483743979e-05, "loss": 2.1247, "step": 8945500 }, { "epoch": 25.9, "learning_rate": 3.7057182637087814e-05, "loss": 2.1485, "step": 8946000 }, { "epoch": 25.9, "learning_rate": 3.7056458989440536e-05, "loss": 2.1638, "step": 8946500 }, { "epoch": 25.9, "learning_rate": 3.705573534179326e-05, "loss": 2.133, "step": 8947000 }, { "epoch": 25.9, "learning_rate": 3.705501169414598e-05, "loss": 2.1538, "step": 8947500 }, { "epoch": 25.9, "learning_rate": 3.70542880464987e-05, "loss": 2.1604, "step": 8948000 }, { "epoch": 25.9, "learning_rate": 3.7053564398851425e-05, "loss": 2.1223, "step": 8948500 }, { "epoch": 25.9, "learning_rate": 3.7052840751204154e-05, "loss": 2.1594, "step": 8949000 }, { "epoch": 25.91, "learning_rate": 3.7052117103556876e-05, "loss": 2.1586, "step": 8949500 }, { "epoch": 25.91, "learning_rate": 3.70513934559096e-05, "loss": 2.1465, "step": 8950000 }, { "epoch": 25.91, "learning_rate": 3.7050671255557614e-05, "loss": 2.1271, "step": 8950500 }, { "epoch": 25.91, "learning_rate": 3.704994760791034e-05, "loss": 2.1594, "step": 8951000 }, { "epoch": 25.91, "learning_rate": 3.7049223960263065e-05, "loss": 2.1451, "step": 8951500 }, { "epoch": 25.91, "learning_rate": 3.704850031261579e-05, "loss": 2.1443, "step": 8952000 }, { "epoch": 25.91, "learning_rate": 3.704777666496851e-05, "loss": 2.1507, "step": 8952500 }, { "epoch": 25.92, "learning_rate": 3.704705301732123e-05, "loss": 2.156, "step": 8953000 }, { "epoch": 25.92, "learning_rate": 3.704633226426454e-05, "loss": 2.1362, "step": 8953500 }, { "epoch": 25.92, "learning_rate": 3.704560861661727e-05, "loss": 2.1435, "step": 8954000 }, { "epoch": 25.92, "learning_rate": 3.704488496896999e-05, "loss": 2.142, "step": 8954500 }, { "epoch": 25.92, "learning_rate": 3.7044161321322714e-05, "loss": 2.1775, "step": 8955000 }, { "epoch": 25.92, "learning_rate": 3.704343767367544e-05, "loss": 2.1431, "step": 8955500 }, { "epoch": 25.92, "learning_rate": 3.704271402602816e-05, "loss": 2.125, "step": 8956000 }, { "epoch": 25.93, "learning_rate": 3.704199037838088e-05, "loss": 2.1473, "step": 8956500 }, { "epoch": 25.93, "learning_rate": 3.7041268178028903e-05, "loss": 2.1452, "step": 8957000 }, { "epoch": 25.93, "learning_rate": 3.7040544530381626e-05, "loss": 2.1516, "step": 8957500 }, { "epoch": 25.93, "learning_rate": 3.703982088273435e-05, "loss": 2.1366, "step": 8958000 }, { "epoch": 25.93, "learning_rate": 3.703909868238237e-05, "loss": 2.1659, "step": 8958500 }, { "epoch": 25.93, "learning_rate": 3.703837503473509e-05, "loss": 2.1461, "step": 8959000 }, { "epoch": 25.93, "learning_rate": 3.7037651387087815e-05, "loss": 2.1582, "step": 8959500 }, { "epoch": 25.94, "learning_rate": 3.703692773944054e-05, "loss": 2.1329, "step": 8960000 }, { "epoch": 25.94, "learning_rate": 3.703620409179326e-05, "loss": 2.1535, "step": 8960500 }, { "epoch": 25.94, "learning_rate": 3.703548044414598e-05, "loss": 2.1443, "step": 8961000 }, { "epoch": 25.94, "learning_rate": 3.7034756796498704e-05, "loss": 2.1299, "step": 8961500 }, { "epoch": 25.94, "learning_rate": 3.7034033148851426e-05, "loss": 2.1598, "step": 8962000 }, { "epoch": 25.94, "learning_rate": 3.703330950120415e-05, "loss": 2.1196, "step": 8962500 }, { "epoch": 25.94, "learning_rate": 3.703258585355687e-05, "loss": 2.1332, "step": 8963000 }, { "epoch": 25.95, "learning_rate": 3.703186220590959e-05, "loss": 2.1577, "step": 8963500 }, { "epoch": 25.95, "learning_rate": 3.703113855826232e-05, "loss": 2.1334, "step": 8964000 }, { "epoch": 25.95, "learning_rate": 3.7030416357910344e-05, "loss": 2.1257, "step": 8964500 }, { "epoch": 25.95, "learning_rate": 3.7029692710263066e-05, "loss": 2.143, "step": 8965000 }, { "epoch": 25.95, "learning_rate": 3.702897050991108e-05, "loss": 2.152, "step": 8965500 }, { "epoch": 25.95, "learning_rate": 3.7028246862263804e-05, "loss": 2.1524, "step": 8966000 }, { "epoch": 25.95, "learning_rate": 3.7027523214616526e-05, "loss": 2.1357, "step": 8966500 }, { "epoch": 25.96, "learning_rate": 3.702679956696925e-05, "loss": 2.1274, "step": 8967000 }, { "epoch": 25.96, "learning_rate": 3.702607591932197e-05, "loss": 2.1353, "step": 8967500 }, { "epoch": 25.96, "learning_rate": 3.702535371896999e-05, "loss": 2.1502, "step": 8968000 }, { "epoch": 25.96, "learning_rate": 3.7024630071322715e-05, "loss": 2.1368, "step": 8968500 }, { "epoch": 25.96, "learning_rate": 3.702390642367544e-05, "loss": 2.1393, "step": 8969000 }, { "epoch": 25.96, "learning_rate": 3.702318277602816e-05, "loss": 2.1497, "step": 8969500 }, { "epoch": 25.96, "learning_rate": 3.702245912838088e-05, "loss": 2.1291, "step": 8970000 }, { "epoch": 25.97, "learning_rate": 3.7021735480733604e-05, "loss": 2.163, "step": 8970500 }, { "epoch": 25.97, "learning_rate": 3.702101328038162e-05, "loss": 2.1246, "step": 8971000 }, { "epoch": 25.97, "learning_rate": 3.702029108002964e-05, "loss": 2.1488, "step": 8971500 }, { "epoch": 25.97, "learning_rate": 3.701956743238237e-05, "loss": 2.1501, "step": 8972000 }, { "epoch": 25.97, "learning_rate": 3.701884378473509e-05, "loss": 2.1442, "step": 8972500 }, { "epoch": 25.97, "learning_rate": 3.7018120137087815e-05, "loss": 2.137, "step": 8973000 }, { "epoch": 25.97, "learning_rate": 3.701739648944054e-05, "loss": 2.1569, "step": 8973500 }, { "epoch": 25.98, "learning_rate": 3.701667284179326e-05, "loss": 2.1253, "step": 8974000 }, { "epoch": 25.98, "learning_rate": 3.701594919414598e-05, "loss": 2.131, "step": 8974500 }, { "epoch": 25.98, "learning_rate": 3.7015225546498704e-05, "loss": 2.1437, "step": 8975000 }, { "epoch": 25.98, "learning_rate": 3.701450189885143e-05, "loss": 2.1385, "step": 8975500 }, { "epoch": 25.98, "learning_rate": 3.701377969849944e-05, "loss": 2.1423, "step": 8976000 }, { "epoch": 25.98, "learning_rate": 3.7013057498147465e-05, "loss": 2.1507, "step": 8976500 }, { "epoch": 25.98, "learning_rate": 3.701233385050019e-05, "loss": 2.1345, "step": 8977000 }, { "epoch": 25.99, "learning_rate": 3.701161020285291e-05, "loss": 2.1249, "step": 8977500 }, { "epoch": 25.99, "learning_rate": 3.701088655520563e-05, "loss": 2.1573, "step": 8978000 }, { "epoch": 25.99, "learning_rate": 3.701016435485365e-05, "loss": 2.1351, "step": 8978500 }, { "epoch": 25.99, "learning_rate": 3.700944070720637e-05, "loss": 2.1653, "step": 8979000 }, { "epoch": 25.99, "learning_rate": 3.70087170595591e-05, "loss": 2.1664, "step": 8979500 }, { "epoch": 25.99, "learning_rate": 3.700799341191182e-05, "loss": 2.1596, "step": 8980000 }, { "epoch": 25.99, "learning_rate": 3.700726976426455e-05, "loss": 2.1665, "step": 8980500 }, { "epoch": 26.0, "learning_rate": 3.700654611661727e-05, "loss": 2.1455, "step": 8981000 }, { "epoch": 26.0, "learning_rate": 3.7005822468969994e-05, "loss": 2.1675, "step": 8981500 }, { "epoch": 26.0, "learning_rate": 3.7005098821322716e-05, "loss": 2.1458, "step": 8982000 }, { "epoch": 26.0, "eval_accuracy": 0.6646410012811101, "eval_accuracy_mlm": 0.6288016755078222, "eval_accuracy_nsp": 0.8570210776697694, "eval_loss": 2.196295738220215, "eval_runtime": 331.5653, "eval_samples_per_second": 1316.139, "eval_steps_per_second": 54.84, "step": 8982272 }, { "epoch": 26.0, "learning_rate": 3.700437517367544e-05, "loss": 2.115, "step": 8982500 }, { "epoch": 26.0, "learning_rate": 3.700365152602816e-05, "loss": 2.1196, "step": 8983000 }, { "epoch": 26.0, "learning_rate": 3.700292787838088e-05, "loss": 2.1178, "step": 8983500 }, { "epoch": 26.01, "learning_rate": 3.70022056780289e-05, "loss": 2.1168, "step": 8984000 }, { "epoch": 26.01, "learning_rate": 3.700148203038162e-05, "loss": 2.1063, "step": 8984500 }, { "epoch": 26.01, "learning_rate": 3.700075838273435e-05, "loss": 2.1416, "step": 8985000 }, { "epoch": 26.01, "learning_rate": 3.700003473508707e-05, "loss": 2.1263, "step": 8985500 }, { "epoch": 26.01, "learning_rate": 3.6999311087439794e-05, "loss": 2.1203, "step": 8986000 }, { "epoch": 26.01, "learning_rate": 3.699858888708781e-05, "loss": 2.1251, "step": 8986500 }, { "epoch": 26.01, "learning_rate": 3.699786523944054e-05, "loss": 2.1247, "step": 8987000 }, { "epoch": 26.02, "learning_rate": 3.699714159179326e-05, "loss": 2.1123, "step": 8987500 }, { "epoch": 26.02, "learning_rate": 3.699641794414598e-05, "loss": 2.1132, "step": 8988000 }, { "epoch": 26.02, "learning_rate": 3.6995694296498705e-05, "loss": 2.1001, "step": 8988500 }, { "epoch": 26.02, "learning_rate": 3.699497064885143e-05, "loss": 2.1507, "step": 8989000 }, { "epoch": 26.02, "learning_rate": 3.699424844849945e-05, "loss": 2.1417, "step": 8989500 }, { "epoch": 26.02, "learning_rate": 3.6993526248147465e-05, "loss": 2.1089, "step": 8990000 }, { "epoch": 26.02, "learning_rate": 3.699280260050019e-05, "loss": 2.117, "step": 8990500 }, { "epoch": 26.03, "learning_rate": 3.699207895285291e-05, "loss": 2.1257, "step": 8991000 }, { "epoch": 26.03, "learning_rate": 3.699135530520563e-05, "loss": 2.1017, "step": 8991500 }, { "epoch": 26.03, "learning_rate": 3.6990631657558354e-05, "loss": 2.1304, "step": 8992000 }, { "epoch": 26.03, "learning_rate": 3.6989908009911077e-05, "loss": 2.1561, "step": 8992500 }, { "epoch": 26.03, "learning_rate": 3.69891858095591e-05, "loss": 2.1537, "step": 8993000 }, { "epoch": 26.03, "learning_rate": 3.698846216191182e-05, "loss": 2.1263, "step": 8993500 }, { "epoch": 26.03, "learning_rate": 3.698774140885513e-05, "loss": 2.1262, "step": 8994000 }, { "epoch": 26.04, "learning_rate": 3.698701776120785e-05, "loss": 2.1102, "step": 8994500 }, { "epoch": 26.04, "learning_rate": 3.698629411356058e-05, "loss": 2.1301, "step": 8995000 }, { "epoch": 26.04, "learning_rate": 3.6985570465913303e-05, "loss": 2.1059, "step": 8995500 }, { "epoch": 26.04, "learning_rate": 3.6984846818266026e-05, "loss": 2.1171, "step": 8996000 }, { "epoch": 26.04, "learning_rate": 3.698412317061875e-05, "loss": 2.1184, "step": 8996500 }, { "epoch": 26.04, "learning_rate": 3.698339952297148e-05, "loss": 2.1239, "step": 8997000 }, { "epoch": 26.04, "learning_rate": 3.69826758753242e-05, "loss": 2.1337, "step": 8997500 }, { "epoch": 26.05, "learning_rate": 3.698195222767692e-05, "loss": 2.1337, "step": 8998000 }, { "epoch": 26.05, "learning_rate": 3.6981228580029644e-05, "loss": 2.1263, "step": 8998500 }, { "epoch": 26.05, "learning_rate": 3.6980504932382366e-05, "loss": 2.1372, "step": 8999000 }, { "epoch": 26.05, "learning_rate": 3.697978128473509e-05, "loss": 2.1075, "step": 8999500 }, { "epoch": 26.05, "learning_rate": 3.697905763708781e-05, "loss": 2.1373, "step": 9000000 }, { "epoch": 26.05, "learning_rate": 3.697833398944053e-05, "loss": 2.1132, "step": 9000500 }, { "epoch": 26.05, "learning_rate": 3.6977610341793255e-05, "loss": 2.1306, "step": 9001000 }, { "epoch": 26.06, "learning_rate": 3.697688669414598e-05, "loss": 2.1407, "step": 9001500 }, { "epoch": 26.06, "learning_rate": 3.6976164493794e-05, "loss": 2.162, "step": 9002000 }, { "epoch": 26.06, "learning_rate": 3.697544229344202e-05, "loss": 2.1394, "step": 9002500 }, { "epoch": 26.06, "learning_rate": 3.6974718645794744e-05, "loss": 2.1344, "step": 9003000 }, { "epoch": 26.06, "learning_rate": 3.6973994998147466e-05, "loss": 2.1266, "step": 9003500 }, { "epoch": 26.06, "learning_rate": 3.697327135050019e-05, "loss": 2.1385, "step": 9004000 }, { "epoch": 26.06, "learning_rate": 3.697254770285291e-05, "loss": 2.152, "step": 9004500 }, { "epoch": 26.07, "learning_rate": 3.697182405520563e-05, "loss": 2.137, "step": 9005000 }, { "epoch": 26.07, "learning_rate": 3.6971100407558355e-05, "loss": 2.1074, "step": 9005500 }, { "epoch": 26.07, "learning_rate": 3.697037675991108e-05, "loss": 2.1188, "step": 9006000 }, { "epoch": 26.07, "learning_rate": 3.69696531122638e-05, "loss": 2.1222, "step": 9006500 }, { "epoch": 26.07, "learning_rate": 3.696893091191182e-05, "loss": 2.1264, "step": 9007000 }, { "epoch": 26.07, "learning_rate": 3.696820871155984e-05, "loss": 2.1113, "step": 9007500 }, { "epoch": 26.07, "learning_rate": 3.696748506391256e-05, "loss": 2.1249, "step": 9008000 }, { "epoch": 26.08, "learning_rate": 3.696676141626528e-05, "loss": 2.1115, "step": 9008500 }, { "epoch": 26.08, "learning_rate": 3.6966037768618004e-05, "loss": 2.1572, "step": 9009000 }, { "epoch": 26.08, "learning_rate": 3.696531412097073e-05, "loss": 2.121, "step": 9009500 }, { "epoch": 26.08, "learning_rate": 3.696459192061875e-05, "loss": 2.1272, "step": 9010000 }, { "epoch": 26.08, "learning_rate": 3.696386827297148e-05, "loss": 2.1275, "step": 9010500 }, { "epoch": 26.08, "learning_rate": 3.69631446253242e-05, "loss": 2.1338, "step": 9011000 }, { "epoch": 26.08, "learning_rate": 3.696242097767692e-05, "loss": 2.1312, "step": 9011500 }, { "epoch": 26.09, "learning_rate": 3.6961697330029644e-05, "loss": 2.1388, "step": 9012000 }, { "epoch": 26.09, "learning_rate": 3.696097368238237e-05, "loss": 2.1167, "step": 9012500 }, { "epoch": 26.09, "learning_rate": 3.696025003473509e-05, "loss": 2.1326, "step": 9013000 }, { "epoch": 26.09, "learning_rate": 3.695952638708781e-05, "loss": 2.1263, "step": 9013500 }, { "epoch": 26.09, "learning_rate": 3.695880418673583e-05, "loss": 2.1322, "step": 9014000 }, { "epoch": 26.09, "learning_rate": 3.695808053908855e-05, "loss": 2.1408, "step": 9014500 }, { "epoch": 26.09, "learning_rate": 3.695735689144128e-05, "loss": 2.1419, "step": 9015000 }, { "epoch": 26.1, "learning_rate": 3.6956634691089293e-05, "loss": 2.1355, "step": 9015500 }, { "epoch": 26.1, "learning_rate": 3.6955911043442016e-05, "loss": 2.1144, "step": 9016000 }, { "epoch": 26.1, "learning_rate": 3.695518739579474e-05, "loss": 2.1168, "step": 9016500 }, { "epoch": 26.1, "learning_rate": 3.6954465195442753e-05, "loss": 2.1385, "step": 9017000 }, { "epoch": 26.1, "learning_rate": 3.695374154779548e-05, "loss": 2.1045, "step": 9017500 }, { "epoch": 26.1, "learning_rate": 3.6953017900148205e-05, "loss": 2.1059, "step": 9018000 }, { "epoch": 26.1, "learning_rate": 3.695229425250093e-05, "loss": 2.1003, "step": 9018500 }, { "epoch": 26.11, "learning_rate": 3.695157060485365e-05, "loss": 2.1116, "step": 9019000 }, { "epoch": 26.11, "learning_rate": 3.695084695720638e-05, "loss": 2.1263, "step": 9019500 }, { "epoch": 26.11, "learning_rate": 3.69501233095591e-05, "loss": 2.1269, "step": 9020000 }, { "epoch": 26.11, "learning_rate": 3.694939966191182e-05, "loss": 2.1252, "step": 9020500 }, { "epoch": 26.11, "learning_rate": 3.6948676014264545e-05, "loss": 2.132, "step": 9021000 }, { "epoch": 26.11, "learning_rate": 3.694795236661727e-05, "loss": 2.1331, "step": 9021500 }, { "epoch": 26.11, "learning_rate": 3.694723016626528e-05, "loss": 2.1381, "step": 9022000 }, { "epoch": 26.12, "learning_rate": 3.6946506518618005e-05, "loss": 2.118, "step": 9022500 }, { "epoch": 26.12, "learning_rate": 3.694578287097073e-05, "loss": 2.1239, "step": 9023000 }, { "epoch": 26.12, "learning_rate": 3.694505922332345e-05, "loss": 2.1418, "step": 9023500 }, { "epoch": 26.12, "learning_rate": 3.694433557567618e-05, "loss": 2.1505, "step": 9024000 }, { "epoch": 26.12, "learning_rate": 3.69436119280289e-05, "loss": 2.1188, "step": 9024500 }, { "epoch": 26.12, "learning_rate": 3.694288972767692e-05, "loss": 2.1366, "step": 9025000 }, { "epoch": 26.13, "learning_rate": 3.6942166080029645e-05, "loss": 2.1231, "step": 9025500 }, { "epoch": 26.13, "learning_rate": 3.694144243238237e-05, "loss": 2.1257, "step": 9026000 }, { "epoch": 26.13, "learning_rate": 3.694071878473509e-05, "loss": 2.1105, "step": 9026500 }, { "epoch": 26.13, "learning_rate": 3.693999513708781e-05, "loss": 2.1502, "step": 9027000 }, { "epoch": 26.13, "learning_rate": 3.6939271489440534e-05, "loss": 2.1258, "step": 9027500 }, { "epoch": 26.13, "learning_rate": 3.6938549289088556e-05, "loss": 2.134, "step": 9028000 }, { "epoch": 26.13, "learning_rate": 3.693782564144128e-05, "loss": 2.1355, "step": 9028500 }, { "epoch": 26.14, "learning_rate": 3.6937101993794e-05, "loss": 2.1275, "step": 9029000 }, { "epoch": 26.14, "learning_rate": 3.693637834614672e-05, "loss": 2.1374, "step": 9029500 }, { "epoch": 26.14, "learning_rate": 3.6935654698499445e-05, "loss": 2.1248, "step": 9030000 }, { "epoch": 26.14, "learning_rate": 3.693493105085217e-05, "loss": 2.1246, "step": 9030500 }, { "epoch": 26.14, "learning_rate": 3.693420740320489e-05, "loss": 2.1231, "step": 9031000 }, { "epoch": 26.14, "learning_rate": 3.693348375555761e-05, "loss": 2.1273, "step": 9031500 }, { "epoch": 26.14, "learning_rate": 3.6932761555205634e-05, "loss": 2.1419, "step": 9032000 }, { "epoch": 26.15, "learning_rate": 3.693203790755836e-05, "loss": 2.1537, "step": 9032500 }, { "epoch": 26.15, "learning_rate": 3.693131425991108e-05, "loss": 2.1194, "step": 9033000 }, { "epoch": 26.15, "learning_rate": 3.693059061226381e-05, "loss": 2.1415, "step": 9033500 }, { "epoch": 26.15, "learning_rate": 3.692986696461653e-05, "loss": 2.119, "step": 9034000 }, { "epoch": 26.15, "learning_rate": 3.692914331696925e-05, "loss": 2.1363, "step": 9034500 }, { "epoch": 26.15, "learning_rate": 3.6928419669321975e-05, "loss": 2.1507, "step": 9035000 }, { "epoch": 26.15, "learning_rate": 3.69276960216747e-05, "loss": 2.1318, "step": 9035500 }, { "epoch": 26.16, "learning_rate": 3.692697237402742e-05, "loss": 2.1336, "step": 9036000 }, { "epoch": 26.16, "learning_rate": 3.6926250173675435e-05, "loss": 2.1171, "step": 9036500 }, { "epoch": 26.16, "learning_rate": 3.692552652602816e-05, "loss": 2.1344, "step": 9037000 }, { "epoch": 26.16, "learning_rate": 3.692480432567618e-05, "loss": 2.1271, "step": 9037500 }, { "epoch": 26.16, "learning_rate": 3.69240806780289e-05, "loss": 2.1277, "step": 9038000 }, { "epoch": 26.16, "learning_rate": 3.6923357030381624e-05, "loss": 2.1138, "step": 9038500 }, { "epoch": 26.16, "learning_rate": 3.6922633382734346e-05, "loss": 2.1341, "step": 9039000 }, { "epoch": 26.17, "learning_rate": 3.6921909735087075e-05, "loss": 2.1369, "step": 9039500 }, { "epoch": 26.17, "learning_rate": 3.69211860874398e-05, "loss": 2.1049, "step": 9040000 }, { "epoch": 26.17, "learning_rate": 3.692046243979252e-05, "loss": 2.133, "step": 9040500 }, { "epoch": 26.17, "learning_rate": 3.6919740239440535e-05, "loss": 2.1326, "step": 9041000 }, { "epoch": 26.17, "learning_rate": 3.691901659179326e-05, "loss": 2.1246, "step": 9041500 }, { "epoch": 26.17, "learning_rate": 3.691829294414598e-05, "loss": 2.1609, "step": 9042000 }, { "epoch": 26.17, "learning_rate": 3.691756929649871e-05, "loss": 2.135, "step": 9042500 }, { "epoch": 26.18, "learning_rate": 3.691684564885143e-05, "loss": 2.1292, "step": 9043000 }, { "epoch": 26.18, "learning_rate": 3.6916123448499446e-05, "loss": 2.1372, "step": 9043500 }, { "epoch": 26.18, "learning_rate": 3.691539980085217e-05, "loss": 2.1319, "step": 9044000 }, { "epoch": 26.18, "learning_rate": 3.691467615320489e-05, "loss": 2.1338, "step": 9044500 }, { "epoch": 26.18, "learning_rate": 3.691395250555761e-05, "loss": 2.1113, "step": 9045000 }, { "epoch": 26.18, "learning_rate": 3.6913228857910335e-05, "loss": 2.1211, "step": 9045500 }, { "epoch": 26.18, "learning_rate": 3.691250665755836e-05, "loss": 2.1096, "step": 9046000 }, { "epoch": 26.19, "learning_rate": 3.691178300991108e-05, "loss": 2.1257, "step": 9046500 }, { "epoch": 26.19, "learning_rate": 3.691105936226381e-05, "loss": 2.1303, "step": 9047000 }, { "epoch": 26.19, "learning_rate": 3.691033571461653e-05, "loss": 2.1123, "step": 9047500 }, { "epoch": 26.19, "learning_rate": 3.6909613514264546e-05, "loss": 2.1369, "step": 9048000 }, { "epoch": 26.19, "learning_rate": 3.690888986661727e-05, "loss": 2.1272, "step": 9048500 }, { "epoch": 26.19, "learning_rate": 3.690816621896999e-05, "loss": 2.1053, "step": 9049000 }, { "epoch": 26.19, "learning_rate": 3.690744257132271e-05, "loss": 2.1362, "step": 9049500 }, { "epoch": 26.2, "learning_rate": 3.6906718923675435e-05, "loss": 2.1486, "step": 9050000 }, { "epoch": 26.2, "learning_rate": 3.690599527602816e-05, "loss": 2.1572, "step": 9050500 }, { "epoch": 26.2, "learning_rate": 3.690527162838088e-05, "loss": 2.1528, "step": 9051000 }, { "epoch": 26.2, "learning_rate": 3.690454798073361e-05, "loss": 2.1333, "step": 9051500 }, { "epoch": 26.2, "learning_rate": 3.690382433308633e-05, "loss": 2.1269, "step": 9052000 }, { "epoch": 26.2, "learning_rate": 3.690310068543905e-05, "loss": 2.1424, "step": 9052500 }, { "epoch": 26.2, "learning_rate": 3.6902377037791776e-05, "loss": 2.1244, "step": 9053000 }, { "epoch": 26.21, "learning_rate": 3.69016533901445e-05, "loss": 2.1345, "step": 9053500 }, { "epoch": 26.21, "learning_rate": 3.690092974249723e-05, "loss": 2.1417, "step": 9054000 }, { "epoch": 26.21, "learning_rate": 3.690020609484995e-05, "loss": 2.1199, "step": 9054500 }, { "epoch": 26.21, "learning_rate": 3.689948244720267e-05, "loss": 2.1327, "step": 9055000 }, { "epoch": 26.21, "learning_rate": 3.6898758799555394e-05, "loss": 2.1441, "step": 9055500 }, { "epoch": 26.21, "learning_rate": 3.689803804649871e-05, "loss": 2.1296, "step": 9056000 }, { "epoch": 26.21, "learning_rate": 3.689731439885143e-05, "loss": 2.1436, "step": 9056500 }, { "epoch": 26.22, "learning_rate": 3.689659219849945e-05, "loss": 2.1082, "step": 9057000 }, { "epoch": 26.22, "learning_rate": 3.689586855085217e-05, "loss": 2.1487, "step": 9057500 }, { "epoch": 26.22, "learning_rate": 3.689514490320489e-05, "loss": 2.1502, "step": 9058000 }, { "epoch": 26.22, "learning_rate": 3.6894421255557614e-05, "loss": 2.1373, "step": 9058500 }, { "epoch": 26.22, "learning_rate": 3.6893697607910336e-05, "loss": 2.1291, "step": 9059000 }, { "epoch": 26.22, "learning_rate": 3.689297396026306e-05, "loss": 2.1238, "step": 9059500 }, { "epoch": 26.22, "learning_rate": 3.689225031261578e-05, "loss": 2.1491, "step": 9060000 }, { "epoch": 26.23, "learning_rate": 3.689152666496851e-05, "loss": 2.1268, "step": 9060500 }, { "epoch": 26.23, "learning_rate": 3.689080301732123e-05, "loss": 2.1219, "step": 9061000 }, { "epoch": 26.23, "learning_rate": 3.689007936967396e-05, "loss": 2.134, "step": 9061500 }, { "epoch": 26.23, "learning_rate": 3.6889357169321976e-05, "loss": 2.1225, "step": 9062000 }, { "epoch": 26.23, "learning_rate": 3.68886335216747e-05, "loss": 2.1288, "step": 9062500 }, { "epoch": 26.23, "learning_rate": 3.688790987402742e-05, "loss": 2.1168, "step": 9063000 }, { "epoch": 26.24, "learning_rate": 3.688718622638014e-05, "loss": 2.1528, "step": 9063500 }, { "epoch": 26.24, "learning_rate": 3.6886462578732865e-05, "loss": 2.1139, "step": 9064000 }, { "epoch": 26.24, "learning_rate": 3.688574037838089e-05, "loss": 2.1238, "step": 9064500 }, { "epoch": 26.24, "learning_rate": 3.688501673073361e-05, "loss": 2.1232, "step": 9065000 }, { "epoch": 26.24, "learning_rate": 3.6884294530381625e-05, "loss": 2.1337, "step": 9065500 }, { "epoch": 26.24, "learning_rate": 3.688357233002964e-05, "loss": 2.1341, "step": 9066000 }, { "epoch": 26.24, "learning_rate": 3.688284868238236e-05, "loss": 2.1463, "step": 9066500 }, { "epoch": 26.25, "learning_rate": 3.6882125034735085e-05, "loss": 2.1204, "step": 9067000 }, { "epoch": 26.25, "learning_rate": 3.688140138708781e-05, "loss": 2.1451, "step": 9067500 }, { "epoch": 26.25, "learning_rate": 3.6880677739440537e-05, "loss": 2.1229, "step": 9068000 }, { "epoch": 26.25, "learning_rate": 3.687995409179326e-05, "loss": 2.1478, "step": 9068500 }, { "epoch": 26.25, "learning_rate": 3.687923044414598e-05, "loss": 2.131, "step": 9069000 }, { "epoch": 26.25, "learning_rate": 3.687850679649871e-05, "loss": 2.1204, "step": 9069500 }, { "epoch": 26.25, "learning_rate": 3.6877784596146726e-05, "loss": 2.1131, "step": 9070000 }, { "epoch": 26.26, "learning_rate": 3.687706094849945e-05, "loss": 2.1423, "step": 9070500 }, { "epoch": 26.26, "learning_rate": 3.687633874814746e-05, "loss": 2.1051, "step": 9071000 }, { "epoch": 26.26, "learning_rate": 3.6875616547795486e-05, "loss": 2.133, "step": 9071500 }, { "epoch": 26.26, "learning_rate": 3.687489290014821e-05, "loss": 2.1328, "step": 9072000 }, { "epoch": 26.26, "learning_rate": 3.687416925250093e-05, "loss": 2.144, "step": 9072500 }, { "epoch": 26.26, "learning_rate": 3.687344560485365e-05, "loss": 2.1069, "step": 9073000 }, { "epoch": 26.26, "learning_rate": 3.6872721957206375e-05, "loss": 2.1227, "step": 9073500 }, { "epoch": 26.27, "learning_rate": 3.68719983095591e-05, "loss": 2.1207, "step": 9074000 }, { "epoch": 26.27, "learning_rate": 3.687127466191182e-05, "loss": 2.1541, "step": 9074500 }, { "epoch": 26.27, "learning_rate": 3.687055101426454e-05, "loss": 2.1103, "step": 9075000 }, { "epoch": 26.27, "learning_rate": 3.6869827366617264e-05, "loss": 2.1335, "step": 9075500 }, { "epoch": 26.27, "learning_rate": 3.6869103718969986e-05, "loss": 2.1551, "step": 9076000 }, { "epoch": 26.27, "learning_rate": 3.686838007132271e-05, "loss": 2.1337, "step": 9076500 }, { "epoch": 26.27, "learning_rate": 3.686765642367544e-05, "loss": 2.1507, "step": 9077000 }, { "epoch": 26.28, "learning_rate": 3.686693277602816e-05, "loss": 2.1285, "step": 9077500 }, { "epoch": 26.28, "learning_rate": 3.686620912838089e-05, "loss": 2.1198, "step": 9078000 }, { "epoch": 26.28, "learning_rate": 3.686548548073361e-05, "loss": 2.1481, "step": 9078500 }, { "epoch": 26.28, "learning_rate": 3.6864763280381626e-05, "loss": 2.1511, "step": 9079000 }, { "epoch": 26.28, "learning_rate": 3.686403963273435e-05, "loss": 2.1319, "step": 9079500 }, { "epoch": 26.28, "learning_rate": 3.686331598508707e-05, "loss": 2.1197, "step": 9080000 }, { "epoch": 26.28, "learning_rate": 3.686259233743979e-05, "loss": 2.1341, "step": 9080500 }, { "epoch": 26.29, "learning_rate": 3.6861868689792515e-05, "loss": 2.1118, "step": 9081000 }, { "epoch": 26.29, "learning_rate": 3.686114504214524e-05, "loss": 2.0959, "step": 9081500 }, { "epoch": 26.29, "learning_rate": 3.686042139449796e-05, "loss": 2.1312, "step": 9082000 }, { "epoch": 26.29, "learning_rate": 3.685969774685069e-05, "loss": 2.1285, "step": 9082500 }, { "epoch": 26.29, "learning_rate": 3.685897409920341e-05, "loss": 2.1444, "step": 9083000 }, { "epoch": 26.29, "learning_rate": 3.685825045155613e-05, "loss": 2.1235, "step": 9083500 }, { "epoch": 26.29, "learning_rate": 3.685752680390886e-05, "loss": 2.1134, "step": 9084000 }, { "epoch": 26.3, "learning_rate": 3.6856803156261584e-05, "loss": 2.1482, "step": 9084500 }, { "epoch": 26.3, "learning_rate": 3.68560809559096e-05, "loss": 2.1447, "step": 9085000 }, { "epoch": 26.3, "learning_rate": 3.685535730826232e-05, "loss": 2.1316, "step": 9085500 }, { "epoch": 26.3, "learning_rate": 3.6854633660615044e-05, "loss": 2.1416, "step": 9086000 }, { "epoch": 26.3, "learning_rate": 3.6853910012967766e-05, "loss": 2.123, "step": 9086500 }, { "epoch": 26.3, "learning_rate": 3.685318636532049e-05, "loss": 2.1418, "step": 9087000 }, { "epoch": 26.3, "learning_rate": 3.685246271767321e-05, "loss": 2.1396, "step": 9087500 }, { "epoch": 26.31, "learning_rate": 3.685173907002594e-05, "loss": 2.1622, "step": 9088000 }, { "epoch": 26.31, "learning_rate": 3.6851016869673955e-05, "loss": 2.1469, "step": 9088500 }, { "epoch": 26.31, "learning_rate": 3.685029322202668e-05, "loss": 2.1323, "step": 9089000 }, { "epoch": 26.31, "learning_rate": 3.68495695743794e-05, "loss": 2.1256, "step": 9089500 }, { "epoch": 26.31, "learning_rate": 3.684884592673212e-05, "loss": 2.1258, "step": 9090000 }, { "epoch": 26.31, "learning_rate": 3.6848122279084844e-05, "loss": 2.1376, "step": 9090500 }, { "epoch": 26.31, "learning_rate": 3.684739863143757e-05, "loss": 2.1762, "step": 9091000 }, { "epoch": 26.32, "learning_rate": 3.684667643108559e-05, "loss": 2.1337, "step": 9091500 }, { "epoch": 26.32, "learning_rate": 3.684595278343831e-05, "loss": 2.1196, "step": 9092000 }, { "epoch": 26.32, "learning_rate": 3.6845230583086334e-05, "loss": 2.1242, "step": 9092500 }, { "epoch": 26.32, "learning_rate": 3.684450838273435e-05, "loss": 2.1347, "step": 9093000 }, { "epoch": 26.32, "learning_rate": 3.6843786182382365e-05, "loss": 2.1126, "step": 9093500 }, { "epoch": 26.32, "learning_rate": 3.684306253473509e-05, "loss": 2.153, "step": 9094000 }, { "epoch": 26.32, "learning_rate": 3.6842338887087816e-05, "loss": 2.1585, "step": 9094500 }, { "epoch": 26.33, "learning_rate": 3.684161523944054e-05, "loss": 2.1367, "step": 9095000 }, { "epoch": 26.33, "learning_rate": 3.684089159179326e-05, "loss": 2.1191, "step": 9095500 }, { "epoch": 26.33, "learning_rate": 3.684016794414598e-05, "loss": 2.1624, "step": 9096000 }, { "epoch": 26.33, "learning_rate": 3.6839445743794e-05, "loss": 2.1598, "step": 9096500 }, { "epoch": 26.33, "learning_rate": 3.683872209614672e-05, "loss": 2.1327, "step": 9097000 }, { "epoch": 26.33, "learning_rate": 3.683799844849944e-05, "loss": 2.1274, "step": 9097500 }, { "epoch": 26.33, "learning_rate": 3.6837274800852165e-05, "loss": 2.1354, "step": 9098000 }, { "epoch": 26.34, "learning_rate": 3.683655115320489e-05, "loss": 2.1448, "step": 9098500 }, { "epoch": 26.34, "learning_rate": 3.6835827505557616e-05, "loss": 2.1467, "step": 9099000 }, { "epoch": 26.34, "learning_rate": 3.683510385791034e-05, "loss": 2.1147, "step": 9099500 }, { "epoch": 26.34, "learning_rate": 3.683438021026307e-05, "loss": 2.152, "step": 9100000 }, { "epoch": 26.34, "learning_rate": 3.683365656261579e-05, "loss": 2.137, "step": 9100500 }, { "epoch": 26.34, "learning_rate": 3.683293291496851e-05, "loss": 2.1129, "step": 9101000 }, { "epoch": 26.35, "learning_rate": 3.6832209267321234e-05, "loss": 2.1538, "step": 9101500 }, { "epoch": 26.35, "learning_rate": 3.6831485619673956e-05, "loss": 2.1512, "step": 9102000 }, { "epoch": 26.35, "learning_rate": 3.683076197202668e-05, "loss": 2.1229, "step": 9102500 }, { "epoch": 26.35, "learning_rate": 3.68300383243794e-05, "loss": 2.1289, "step": 9103000 }, { "epoch": 26.35, "learning_rate": 3.682931467673212e-05, "loss": 2.1404, "step": 9103500 }, { "epoch": 26.35, "learning_rate": 3.682859247638014e-05, "loss": 2.1519, "step": 9104000 }, { "epoch": 26.35, "learning_rate": 3.682787027602816e-05, "loss": 2.125, "step": 9104500 }, { "epoch": 26.36, "learning_rate": 3.682714662838088e-05, "loss": 2.1236, "step": 9105000 }, { "epoch": 26.36, "learning_rate": 3.6826422980733605e-05, "loss": 2.1367, "step": 9105500 }, { "epoch": 26.36, "learning_rate": 3.682569933308633e-05, "loss": 2.1129, "step": 9106000 }, { "epoch": 26.36, "learning_rate": 3.6824975685439057e-05, "loss": 2.136, "step": 9106500 }, { "epoch": 26.36, "learning_rate": 3.682425203779178e-05, "loss": 2.1213, "step": 9107000 }, { "epoch": 26.36, "learning_rate": 3.68235283901445e-05, "loss": 2.1441, "step": 9107500 }, { "epoch": 26.36, "learning_rate": 3.682280474249722e-05, "loss": 2.1304, "step": 9108000 }, { "epoch": 26.37, "learning_rate": 3.6822081094849945e-05, "loss": 2.1307, "step": 9108500 }, { "epoch": 26.37, "learning_rate": 3.682135889449797e-05, "loss": 2.1587, "step": 9109000 }, { "epoch": 26.37, "learning_rate": 3.682063669414598e-05, "loss": 2.1129, "step": 9109500 }, { "epoch": 26.37, "learning_rate": 3.6819913046498706e-05, "loss": 2.1541, "step": 9110000 }, { "epoch": 26.37, "learning_rate": 3.681918939885143e-05, "loss": 2.1313, "step": 9110500 }, { "epoch": 26.37, "learning_rate": 3.681846575120415e-05, "loss": 2.1533, "step": 9111000 }, { "epoch": 26.37, "learning_rate": 3.681774210355687e-05, "loss": 2.1137, "step": 9111500 }, { "epoch": 26.38, "learning_rate": 3.6817018455909595e-05, "loss": 2.1638, "step": 9112000 }, { "epoch": 26.38, "learning_rate": 3.681629480826232e-05, "loss": 2.1494, "step": 9112500 }, { "epoch": 26.38, "learning_rate": 3.681557116061504e-05, "loss": 2.1222, "step": 9113000 }, { "epoch": 26.38, "learning_rate": 3.681484751296777e-05, "loss": 2.1166, "step": 9113500 }, { "epoch": 26.38, "learning_rate": 3.681412386532049e-05, "loss": 2.1556, "step": 9114000 }, { "epoch": 26.38, "learning_rate": 3.681340021767322e-05, "loss": 2.1594, "step": 9114500 }, { "epoch": 26.38, "learning_rate": 3.681267657002594e-05, "loss": 2.1309, "step": 9115000 }, { "epoch": 26.39, "learning_rate": 3.6811952922378664e-05, "loss": 2.1399, "step": 9115500 }, { "epoch": 26.39, "learning_rate": 3.681123072202668e-05, "loss": 2.1268, "step": 9116000 }, { "epoch": 26.39, "learning_rate": 3.68105070743794e-05, "loss": 2.1392, "step": 9116500 }, { "epoch": 26.39, "learning_rate": 3.6809783426732124e-05, "loss": 2.1116, "step": 9117000 }, { "epoch": 26.39, "learning_rate": 3.6809059779084846e-05, "loss": 2.1291, "step": 9117500 }, { "epoch": 26.39, "learning_rate": 3.680833613143757e-05, "loss": 2.1357, "step": 9118000 }, { "epoch": 26.39, "learning_rate": 3.680761248379029e-05, "loss": 2.1135, "step": 9118500 }, { "epoch": 26.4, "learning_rate": 3.680689028343831e-05, "loss": 2.1408, "step": 9119000 }, { "epoch": 26.4, "learning_rate": 3.680616808308633e-05, "loss": 2.1234, "step": 9119500 }, { "epoch": 26.4, "learning_rate": 3.680544443543905e-05, "loss": 2.114, "step": 9120000 }, { "epoch": 26.4, "learning_rate": 3.680472078779177e-05, "loss": 2.1281, "step": 9120500 }, { "epoch": 26.4, "learning_rate": 3.6803997140144495e-05, "loss": 2.128, "step": 9121000 }, { "epoch": 26.4, "learning_rate": 3.6803273492497224e-05, "loss": 2.1517, "step": 9121500 }, { "epoch": 26.4, "learning_rate": 3.6802549844849946e-05, "loss": 2.1459, "step": 9122000 }, { "epoch": 26.41, "learning_rate": 3.680182619720267e-05, "loss": 2.1253, "step": 9122500 }, { "epoch": 26.41, "learning_rate": 3.680110254955539e-05, "loss": 2.1332, "step": 9123000 }, { "epoch": 26.41, "learning_rate": 3.680037890190812e-05, "loss": 2.1396, "step": 9123500 }, { "epoch": 26.41, "learning_rate": 3.6799656701556135e-05, "loss": 2.1593, "step": 9124000 }, { "epoch": 26.41, "learning_rate": 3.679893450120415e-05, "loss": 2.1489, "step": 9124500 }, { "epoch": 26.41, "learning_rate": 3.679821085355687e-05, "loss": 2.1372, "step": 9125000 }, { "epoch": 26.41, "learning_rate": 3.6797487205909595e-05, "loss": 2.1567, "step": 9125500 }, { "epoch": 26.42, "learning_rate": 3.679676355826232e-05, "loss": 2.1535, "step": 9126000 }, { "epoch": 26.42, "learning_rate": 3.679604135791034e-05, "loss": 2.1742, "step": 9126500 }, { "epoch": 26.42, "learning_rate": 3.6795319157558355e-05, "loss": 2.1456, "step": 9127000 }, { "epoch": 26.42, "learning_rate": 3.679459550991108e-05, "loss": 2.1333, "step": 9127500 }, { "epoch": 26.42, "learning_rate": 3.67938718622638e-05, "loss": 2.151, "step": 9128000 }, { "epoch": 26.42, "learning_rate": 3.679314821461652e-05, "loss": 2.1298, "step": 9128500 }, { "epoch": 26.42, "learning_rate": 3.6792424566969244e-05, "loss": 2.1186, "step": 9129000 }, { "epoch": 26.43, "learning_rate": 3.6791700919321973e-05, "loss": 2.1293, "step": 9129500 }, { "epoch": 26.43, "learning_rate": 3.6790977271674696e-05, "loss": 2.1047, "step": 9130000 }, { "epoch": 26.43, "learning_rate": 3.679025362402742e-05, "loss": 2.1281, "step": 9130500 }, { "epoch": 26.43, "learning_rate": 3.678953142367544e-05, "loss": 2.1327, "step": 9131000 }, { "epoch": 26.43, "learning_rate": 3.678880777602816e-05, "loss": 2.1541, "step": 9131500 }, { "epoch": 26.43, "learning_rate": 3.678808557567618e-05, "loss": 2.1098, "step": 9132000 }, { "epoch": 26.43, "learning_rate": 3.67873619280289e-05, "loss": 2.1419, "step": 9132500 }, { "epoch": 26.44, "learning_rate": 3.678663828038162e-05, "loss": 2.1375, "step": 9133000 }, { "epoch": 26.44, "learning_rate": 3.6785914632734345e-05, "loss": 2.1292, "step": 9133500 }, { "epoch": 26.44, "learning_rate": 3.678519098508707e-05, "loss": 2.128, "step": 9134000 }, { "epoch": 26.44, "learning_rate": 3.6784467337439796e-05, "loss": 2.1289, "step": 9134500 }, { "epoch": 26.44, "learning_rate": 3.678374368979252e-05, "loss": 2.1182, "step": 9135000 }, { "epoch": 26.44, "learning_rate": 3.678302004214524e-05, "loss": 2.1254, "step": 9135500 }, { "epoch": 26.44, "learning_rate": 3.678229639449796e-05, "loss": 2.1327, "step": 9136000 }, { "epoch": 26.45, "learning_rate": 3.678157564144127e-05, "loss": 2.1531, "step": 9136500 }, { "epoch": 26.45, "learning_rate": 3.6780851993794e-05, "loss": 2.1163, "step": 9137000 }, { "epoch": 26.45, "learning_rate": 3.678012834614672e-05, "loss": 2.1233, "step": 9137500 }, { "epoch": 26.45, "learning_rate": 3.6779404698499445e-05, "loss": 2.1228, "step": 9138000 }, { "epoch": 26.45, "learning_rate": 3.6778681050852174e-05, "loss": 2.133, "step": 9138500 }, { "epoch": 26.45, "learning_rate": 3.6777957403204896e-05, "loss": 2.1509, "step": 9139000 }, { "epoch": 26.46, "learning_rate": 3.677723520285291e-05, "loss": 2.1352, "step": 9139500 }, { "epoch": 26.46, "learning_rate": 3.6776511555205634e-05, "loss": 2.1335, "step": 9140000 }, { "epoch": 26.46, "learning_rate": 3.6775787907558356e-05, "loss": 2.1556, "step": 9140500 }, { "epoch": 26.46, "learning_rate": 3.677506570720637e-05, "loss": 2.167, "step": 9141000 }, { "epoch": 26.46, "learning_rate": 3.6774343506854394e-05, "loss": 2.1393, "step": 9141500 }, { "epoch": 26.46, "learning_rate": 3.6773619859207116e-05, "loss": 2.1128, "step": 9142000 }, { "epoch": 26.46, "learning_rate": 3.677289621155984e-05, "loss": 2.1315, "step": 9142500 }, { "epoch": 26.47, "learning_rate": 3.677217256391256e-05, "loss": 2.1425, "step": 9143000 }, { "epoch": 26.47, "learning_rate": 3.677144891626528e-05, "loss": 2.1512, "step": 9143500 }, { "epoch": 26.47, "learning_rate": 3.6770725268618005e-05, "loss": 2.125, "step": 9144000 }, { "epoch": 26.47, "learning_rate": 3.6770001620970734e-05, "loss": 2.1214, "step": 9144500 }, { "epoch": 26.47, "learning_rate": 3.6769277973323457e-05, "loss": 2.1471, "step": 9145000 }, { "epoch": 26.47, "learning_rate": 3.676855432567618e-05, "loss": 2.1413, "step": 9145500 }, { "epoch": 26.47, "learning_rate": 3.67678306780289e-05, "loss": 2.1458, "step": 9146000 }, { "epoch": 26.48, "learning_rate": 3.676710703038162e-05, "loss": 2.1184, "step": 9146500 }, { "epoch": 26.48, "learning_rate": 3.6766383382734345e-05, "loss": 2.1495, "step": 9147000 }, { "epoch": 26.48, "learning_rate": 3.6765659735087075e-05, "loss": 2.141, "step": 9147500 }, { "epoch": 26.48, "learning_rate": 3.67649360874398e-05, "loss": 2.1262, "step": 9148000 }, { "epoch": 26.48, "learning_rate": 3.676421243979252e-05, "loss": 2.1389, "step": 9148500 }, { "epoch": 26.48, "learning_rate": 3.676348879214524e-05, "loss": 2.1343, "step": 9149000 }, { "epoch": 26.48, "learning_rate": 3.6762765144497963e-05, "loss": 2.1418, "step": 9149500 }, { "epoch": 26.49, "learning_rate": 3.6762041496850686e-05, "loss": 2.1259, "step": 9150000 }, { "epoch": 26.49, "learning_rate": 3.676131784920341e-05, "loss": 2.1004, "step": 9150500 }, { "epoch": 26.49, "learning_rate": 3.6760595648851423e-05, "loss": 2.1433, "step": 9151000 }, { "epoch": 26.49, "learning_rate": 3.675987200120415e-05, "loss": 2.1264, "step": 9151500 }, { "epoch": 26.49, "learning_rate": 3.6759148353556875e-05, "loss": 2.1295, "step": 9152000 }, { "epoch": 26.49, "learning_rate": 3.67584261532049e-05, "loss": 2.1401, "step": 9152500 }, { "epoch": 26.49, "learning_rate": 3.675770250555762e-05, "loss": 2.1281, "step": 9153000 }, { "epoch": 26.5, "learning_rate": 3.675697885791034e-05, "loss": 2.1347, "step": 9153500 }, { "epoch": 26.5, "learning_rate": 3.6756255210263064e-05, "loss": 2.1333, "step": 9154000 }, { "epoch": 26.5, "learning_rate": 3.6755531562615786e-05, "loss": 2.1514, "step": 9154500 }, { "epoch": 26.5, "learning_rate": 3.675480791496851e-05, "loss": 2.1318, "step": 9155000 }, { "epoch": 26.5, "learning_rate": 3.675408426732123e-05, "loss": 2.1404, "step": 9155500 }, { "epoch": 26.5, "learning_rate": 3.675336061967395e-05, "loss": 2.1406, "step": 9156000 }, { "epoch": 26.5, "learning_rate": 3.6752636972026675e-05, "loss": 2.1468, "step": 9156500 }, { "epoch": 26.51, "learning_rate": 3.67519133243794e-05, "loss": 2.1264, "step": 9157000 }, { "epoch": 26.51, "learning_rate": 3.6751189676732126e-05, "loss": 2.1622, "step": 9157500 }, { "epoch": 26.51, "learning_rate": 3.675046602908485e-05, "loss": 2.1551, "step": 9158000 }, { "epoch": 26.51, "learning_rate": 3.6749743828732864e-05, "loss": 2.1614, "step": 9158500 }, { "epoch": 26.51, "learning_rate": 3.674902018108559e-05, "loss": 2.1525, "step": 9159000 }, { "epoch": 26.51, "learning_rate": 3.6748296533438315e-05, "loss": 2.1326, "step": 9159500 }, { "epoch": 26.51, "learning_rate": 3.674757288579104e-05, "loss": 2.1278, "step": 9160000 }, { "epoch": 26.52, "learning_rate": 3.674684923814376e-05, "loss": 2.1561, "step": 9160500 }, { "epoch": 26.52, "learning_rate": 3.6746127037791775e-05, "loss": 2.1438, "step": 9161000 }, { "epoch": 26.52, "learning_rate": 3.67454033901445e-05, "loss": 2.1258, "step": 9161500 }, { "epoch": 26.52, "learning_rate": 3.6744679742497226e-05, "loss": 2.1378, "step": 9162000 }, { "epoch": 26.52, "learning_rate": 3.674395609484995e-05, "loss": 2.147, "step": 9162500 }, { "epoch": 26.52, "learning_rate": 3.674323244720267e-05, "loss": 2.131, "step": 9163000 }, { "epoch": 26.52, "learning_rate": 3.674250879955539e-05, "loss": 2.1427, "step": 9163500 }, { "epoch": 26.53, "learning_rate": 3.674178659920341e-05, "loss": 2.1377, "step": 9164000 }, { "epoch": 26.53, "learning_rate": 3.674106295155613e-05, "loss": 2.1482, "step": 9164500 }, { "epoch": 26.53, "learning_rate": 3.674033930390885e-05, "loss": 2.139, "step": 9165000 }, { "epoch": 26.53, "learning_rate": 3.6739615656261575e-05, "loss": 2.1478, "step": 9165500 }, { "epoch": 26.53, "learning_rate": 3.67388920086143e-05, "loss": 2.1425, "step": 9166000 }, { "epoch": 26.53, "learning_rate": 3.673816836096703e-05, "loss": 2.1377, "step": 9166500 }, { "epoch": 26.53, "learning_rate": 3.673744471331975e-05, "loss": 2.1237, "step": 9167000 }, { "epoch": 26.54, "learning_rate": 3.673672106567248e-05, "loss": 2.1322, "step": 9167500 }, { "epoch": 26.54, "learning_rate": 3.67359974180252e-05, "loss": 2.1451, "step": 9168000 }, { "epoch": 26.54, "learning_rate": 3.673527377037792e-05, "loss": 2.1698, "step": 9168500 }, { "epoch": 26.54, "learning_rate": 3.6734550122730645e-05, "loss": 2.1015, "step": 9169000 }, { "epoch": 26.54, "learning_rate": 3.673382647508337e-05, "loss": 2.1358, "step": 9169500 }, { "epoch": 26.54, "learning_rate": 3.673310282743609e-05, "loss": 2.1305, "step": 9170000 }, { "epoch": 26.54, "learning_rate": 3.6732380627084105e-05, "loss": 2.1278, "step": 9170500 }, { "epoch": 26.55, "learning_rate": 3.673165697943683e-05, "loss": 2.1284, "step": 9171000 }, { "epoch": 26.55, "learning_rate": 3.673093333178955e-05, "loss": 2.1518, "step": 9171500 }, { "epoch": 26.55, "learning_rate": 3.673021113143757e-05, "loss": 2.1475, "step": 9172000 }, { "epoch": 26.55, "learning_rate": 3.6729487483790294e-05, "loss": 2.1455, "step": 9172500 }, { "epoch": 26.55, "learning_rate": 3.6728763836143016e-05, "loss": 2.1337, "step": 9173000 }, { "epoch": 26.55, "learning_rate": 3.6728040188495745e-05, "loss": 2.1241, "step": 9173500 }, { "epoch": 26.55, "learning_rate": 3.672731654084847e-05, "loss": 2.1295, "step": 9174000 }, { "epoch": 26.56, "learning_rate": 3.672659289320119e-05, "loss": 2.1186, "step": 9174500 }, { "epoch": 26.56, "learning_rate": 3.672586924555391e-05, "loss": 2.1272, "step": 9175000 }, { "epoch": 26.56, "learning_rate": 3.672514704520193e-05, "loss": 2.1527, "step": 9175500 }, { "epoch": 26.56, "learning_rate": 3.672442339755465e-05, "loss": 2.1433, "step": 9176000 }, { "epoch": 26.56, "learning_rate": 3.672369974990738e-05, "loss": 2.126, "step": 9176500 }, { "epoch": 26.56, "learning_rate": 3.67229761022601e-05, "loss": 2.1539, "step": 9177000 }, { "epoch": 26.57, "learning_rate": 3.672225245461282e-05, "loss": 2.1526, "step": 9177500 }, { "epoch": 26.57, "learning_rate": 3.6721528806965545e-05, "loss": 2.1414, "step": 9178000 }, { "epoch": 26.57, "learning_rate": 3.672080660661356e-05, "loss": 2.1362, "step": 9178500 }, { "epoch": 26.57, "learning_rate": 3.6720084406261576e-05, "loss": 2.1084, "step": 9179000 }, { "epoch": 26.57, "learning_rate": 3.67193607586143e-05, "loss": 2.1167, "step": 9179500 }, { "epoch": 26.57, "learning_rate": 3.671863711096703e-05, "loss": 2.1127, "step": 9180000 }, { "epoch": 26.57, "learning_rate": 3.671791346331975e-05, "loss": 2.1119, "step": 9180500 }, { "epoch": 26.58, "learning_rate": 3.671718981567247e-05, "loss": 2.1605, "step": 9181000 }, { "epoch": 26.58, "learning_rate": 3.67164661680252e-05, "loss": 2.1275, "step": 9181500 }, { "epoch": 26.58, "learning_rate": 3.6715743967673216e-05, "loss": 2.1273, "step": 9182000 }, { "epoch": 26.58, "learning_rate": 3.671502032002594e-05, "loss": 2.1093, "step": 9182500 }, { "epoch": 26.58, "learning_rate": 3.671429667237866e-05, "loss": 2.1511, "step": 9183000 }, { "epoch": 26.58, "learning_rate": 3.671357302473138e-05, "loss": 2.1362, "step": 9183500 }, { "epoch": 26.58, "learning_rate": 3.6712849377084105e-05, "loss": 2.1469, "step": 9184000 }, { "epoch": 26.59, "learning_rate": 3.671212572943683e-05, "loss": 2.1289, "step": 9184500 }, { "epoch": 26.59, "learning_rate": 3.671140208178955e-05, "loss": 2.1461, "step": 9185000 }, { "epoch": 26.59, "learning_rate": 3.671067843414228e-05, "loss": 2.1371, "step": 9185500 }, { "epoch": 26.59, "learning_rate": 3.6709956233790294e-05, "loss": 2.128, "step": 9186000 }, { "epoch": 26.59, "learning_rate": 3.670923258614302e-05, "loss": 2.154, "step": 9186500 }, { "epoch": 26.59, "learning_rate": 3.670850893849574e-05, "loss": 2.1334, "step": 9187000 }, { "epoch": 26.59, "learning_rate": 3.670778529084846e-05, "loss": 2.1331, "step": 9187500 }, { "epoch": 26.6, "learning_rate": 3.670706309049648e-05, "loss": 2.1125, "step": 9188000 }, { "epoch": 26.6, "learning_rate": 3.670634233743979e-05, "loss": 2.1372, "step": 9188500 }, { "epoch": 26.6, "learning_rate": 3.670561868979252e-05, "loss": 2.1458, "step": 9189000 }, { "epoch": 26.6, "learning_rate": 3.6704895042145244e-05, "loss": 2.141, "step": 9189500 }, { "epoch": 26.6, "learning_rate": 3.6704171394497966e-05, "loss": 2.1386, "step": 9190000 }, { "epoch": 26.6, "learning_rate": 3.670344774685069e-05, "loss": 2.1422, "step": 9190500 }, { "epoch": 26.6, "learning_rate": 3.670272409920341e-05, "loss": 2.1495, "step": 9191000 }, { "epoch": 26.61, "learning_rate": 3.670200045155613e-05, "loss": 2.1428, "step": 9191500 }, { "epoch": 26.61, "learning_rate": 3.6701276803908855e-05, "loss": 2.1358, "step": 9192000 }, { "epoch": 26.61, "learning_rate": 3.670055315626158e-05, "loss": 2.1554, "step": 9192500 }, { "epoch": 26.61, "learning_rate": 3.6699829508614306e-05, "loss": 2.1434, "step": 9193000 }, { "epoch": 26.61, "learning_rate": 3.669910586096703e-05, "loss": 2.1222, "step": 9193500 }, { "epoch": 26.61, "learning_rate": 3.669838221331975e-05, "loss": 2.1239, "step": 9194000 }, { "epoch": 26.61, "learning_rate": 3.669765856567247e-05, "loss": 2.115, "step": 9194500 }, { "epoch": 26.62, "learning_rate": 3.669693636532049e-05, "loss": 2.1451, "step": 9195000 }, { "epoch": 26.62, "learning_rate": 3.669621271767321e-05, "loss": 2.125, "step": 9195500 }, { "epoch": 26.62, "learning_rate": 3.6695491964616526e-05, "loss": 2.1508, "step": 9196000 }, { "epoch": 26.62, "learning_rate": 3.669476831696925e-05, "loss": 2.1543, "step": 9196500 }, { "epoch": 26.62, "learning_rate": 3.669404466932198e-05, "loss": 2.1166, "step": 9197000 }, { "epoch": 26.62, "learning_rate": 3.66933210216747e-05, "loss": 2.1467, "step": 9197500 }, { "epoch": 26.62, "learning_rate": 3.669259737402742e-05, "loss": 2.1466, "step": 9198000 }, { "epoch": 26.63, "learning_rate": 3.6691873726380144e-05, "loss": 2.1286, "step": 9198500 }, { "epoch": 26.63, "learning_rate": 3.6691150078732866e-05, "loss": 2.1323, "step": 9199000 }, { "epoch": 26.63, "learning_rate": 3.669042643108559e-05, "loss": 2.1461, "step": 9199500 }, { "epoch": 26.63, "learning_rate": 3.668970278343831e-05, "loss": 2.1402, "step": 9200000 }, { "epoch": 26.63, "learning_rate": 3.668897913579103e-05, "loss": 2.1385, "step": 9200500 }, { "epoch": 26.63, "learning_rate": 3.6688255488143755e-05, "loss": 2.1449, "step": 9201000 }, { "epoch": 26.63, "learning_rate": 3.668753184049648e-05, "loss": 2.1374, "step": 9201500 }, { "epoch": 26.64, "learning_rate": 3.6686808192849206e-05, "loss": 2.1375, "step": 9202000 }, { "epoch": 26.64, "learning_rate": 3.668608454520193e-05, "loss": 2.1437, "step": 9202500 }, { "epoch": 26.64, "learning_rate": 3.6685362344849944e-05, "loss": 2.1153, "step": 9203000 }, { "epoch": 26.64, "learning_rate": 3.6684638697202667e-05, "loss": 2.1451, "step": 9203500 }, { "epoch": 26.64, "learning_rate": 3.6683915049555396e-05, "loss": 2.1506, "step": 9204000 }, { "epoch": 26.64, "learning_rate": 3.668319140190812e-05, "loss": 2.1622, "step": 9204500 }, { "epoch": 26.64, "learning_rate": 3.668246775426084e-05, "loss": 2.1542, "step": 9205000 }, { "epoch": 26.65, "learning_rate": 3.668174410661356e-05, "loss": 2.1223, "step": 9205500 }, { "epoch": 26.65, "learning_rate": 3.6681020458966284e-05, "loss": 2.1261, "step": 9206000 }, { "epoch": 26.65, "learning_rate": 3.668029681131901e-05, "loss": 2.1192, "step": 9206500 }, { "epoch": 26.65, "learning_rate": 3.667957316367173e-05, "loss": 2.1366, "step": 9207000 }, { "epoch": 26.65, "learning_rate": 3.667885096331975e-05, "loss": 2.1092, "step": 9207500 }, { "epoch": 26.65, "learning_rate": 3.6678127315672473e-05, "loss": 2.1258, "step": 9208000 }, { "epoch": 26.65, "learning_rate": 3.667740511532049e-05, "loss": 2.1426, "step": 9208500 }, { "epoch": 26.66, "learning_rate": 3.667668146767321e-05, "loss": 2.1382, "step": 9209000 }, { "epoch": 26.66, "learning_rate": 3.6675957820025934e-05, "loss": 2.1398, "step": 9209500 }, { "epoch": 26.66, "learning_rate": 3.6675234172378656e-05, "loss": 2.137, "step": 9210000 }, { "epoch": 26.66, "learning_rate": 3.667451197202668e-05, "loss": 2.1328, "step": 9210500 }, { "epoch": 26.66, "learning_rate": 3.6673789771674694e-05, "loss": 2.1133, "step": 9211000 }, { "epoch": 26.66, "learning_rate": 3.667306612402742e-05, "loss": 2.1502, "step": 9211500 }, { "epoch": 26.66, "learning_rate": 3.6672342476380145e-05, "loss": 2.1196, "step": 9212000 }, { "epoch": 26.67, "learning_rate": 3.667161882873287e-05, "loss": 2.1227, "step": 9212500 }, { "epoch": 26.67, "learning_rate": 3.667089518108559e-05, "loss": 2.1302, "step": 9213000 }, { "epoch": 26.67, "learning_rate": 3.667017153343831e-05, "loss": 2.1368, "step": 9213500 }, { "epoch": 26.67, "learning_rate": 3.6669447885791034e-05, "loss": 2.1432, "step": 9214000 }, { "epoch": 26.67, "learning_rate": 3.6668724238143756e-05, "loss": 2.1367, "step": 9214500 }, { "epoch": 26.67, "learning_rate": 3.6668000590496485e-05, "loss": 2.1125, "step": 9215000 }, { "epoch": 26.68, "learning_rate": 3.666727694284921e-05, "loss": 2.1507, "step": 9215500 }, { "epoch": 26.68, "learning_rate": 3.666655474249722e-05, "loss": 2.1429, "step": 9216000 }, { "epoch": 26.68, "learning_rate": 3.6665831094849945e-05, "loss": 2.1362, "step": 9216500 }, { "epoch": 26.68, "learning_rate": 3.666510744720267e-05, "loss": 2.1175, "step": 9217000 }, { "epoch": 26.68, "learning_rate": 3.666438379955539e-05, "loss": 2.1568, "step": 9217500 }, { "epoch": 26.68, "learning_rate": 3.666366015190811e-05, "loss": 2.1417, "step": 9218000 }, { "epoch": 26.68, "learning_rate": 3.6662936504260834e-05, "loss": 2.1523, "step": 9218500 }, { "epoch": 26.69, "learning_rate": 3.666221285661356e-05, "loss": 2.1162, "step": 9219000 }, { "epoch": 26.69, "learning_rate": 3.6661489208966285e-05, "loss": 2.1588, "step": 9219500 }, { "epoch": 26.69, "learning_rate": 3.666076556131901e-05, "loss": 2.1314, "step": 9220000 }, { "epoch": 26.69, "learning_rate": 3.6660041913671737e-05, "loss": 2.1179, "step": 9220500 }, { "epoch": 26.69, "learning_rate": 3.665931826602446e-05, "loss": 2.1386, "step": 9221000 }, { "epoch": 26.69, "learning_rate": 3.665859461837718e-05, "loss": 2.1212, "step": 9221500 }, { "epoch": 26.69, "learning_rate": 3.66578709707299e-05, "loss": 2.1295, "step": 9222000 }, { "epoch": 26.7, "learning_rate": 3.665714877037792e-05, "loss": 2.1387, "step": 9222500 }, { "epoch": 26.7, "learning_rate": 3.665642512273064e-05, "loss": 2.1558, "step": 9223000 }, { "epoch": 26.7, "learning_rate": 3.665570147508336e-05, "loss": 2.1516, "step": 9223500 }, { "epoch": 26.7, "learning_rate": 3.6654977827436085e-05, "loss": 2.1343, "step": 9224000 }, { "epoch": 26.7, "learning_rate": 3.66542570743794e-05, "loss": 2.1452, "step": 9224500 }, { "epoch": 26.7, "learning_rate": 3.665353342673212e-05, "loss": 2.1307, "step": 9225000 }, { "epoch": 26.7, "learning_rate": 3.6652809779084846e-05, "loss": 2.1329, "step": 9225500 }, { "epoch": 26.71, "learning_rate": 3.665208613143757e-05, "loss": 2.1144, "step": 9226000 }, { "epoch": 26.71, "learning_rate": 3.6651365378380883e-05, "loss": 2.1593, "step": 9226500 }, { "epoch": 26.71, "learning_rate": 3.665064173073361e-05, "loss": 2.1296, "step": 9227000 }, { "epoch": 26.71, "learning_rate": 3.6649918083086335e-05, "loss": 2.153, "step": 9227500 }, { "epoch": 26.71, "learning_rate": 3.664919443543906e-05, "loss": 2.145, "step": 9228000 }, { "epoch": 26.71, "learning_rate": 3.664847078779178e-05, "loss": 2.1672, "step": 9228500 }, { "epoch": 26.71, "learning_rate": 3.66477471401445e-05, "loss": 2.1151, "step": 9229000 }, { "epoch": 26.72, "learning_rate": 3.6647023492497224e-05, "loss": 2.1376, "step": 9229500 }, { "epoch": 26.72, "learning_rate": 3.6646299844849946e-05, "loss": 2.1237, "step": 9230000 }, { "epoch": 26.72, "learning_rate": 3.664557619720267e-05, "loss": 2.1373, "step": 9230500 }, { "epoch": 26.72, "learning_rate": 3.664485254955539e-05, "loss": 2.1437, "step": 9231000 }, { "epoch": 26.72, "learning_rate": 3.664412890190811e-05, "loss": 2.1523, "step": 9231500 }, { "epoch": 26.72, "learning_rate": 3.6643405254260835e-05, "loss": 2.149, "step": 9232000 }, { "epoch": 26.72, "learning_rate": 3.664268160661356e-05, "loss": 2.1558, "step": 9232500 }, { "epoch": 26.73, "learning_rate": 3.6641957958966286e-05, "loss": 2.1288, "step": 9233000 }, { "epoch": 26.73, "learning_rate": 3.664123431131901e-05, "loss": 2.165, "step": 9233500 }, { "epoch": 26.73, "learning_rate": 3.664051066367174e-05, "loss": 2.1393, "step": 9234000 }, { "epoch": 26.73, "learning_rate": 3.663978701602446e-05, "loss": 2.1356, "step": 9234500 }, { "epoch": 26.73, "learning_rate": 3.6639064815672475e-05, "loss": 2.1422, "step": 9235000 }, { "epoch": 26.73, "learning_rate": 3.66383411680252e-05, "loss": 2.1455, "step": 9235500 }, { "epoch": 26.73, "learning_rate": 3.663761896767321e-05, "loss": 2.1487, "step": 9236000 }, { "epoch": 26.74, "learning_rate": 3.6636895320025935e-05, "loss": 2.1423, "step": 9236500 }, { "epoch": 26.74, "learning_rate": 3.6636171672378664e-05, "loss": 2.122, "step": 9237000 }, { "epoch": 26.74, "learning_rate": 3.6635448024731386e-05, "loss": 2.156, "step": 9237500 }, { "epoch": 26.74, "learning_rate": 3.663472437708411e-05, "loss": 2.1476, "step": 9238000 }, { "epoch": 26.74, "learning_rate": 3.663400072943683e-05, "loss": 2.1299, "step": 9238500 }, { "epoch": 26.74, "learning_rate": 3.6633278529084846e-05, "loss": 2.1625, "step": 9239000 }, { "epoch": 26.74, "learning_rate": 3.663255488143757e-05, "loss": 2.1428, "step": 9239500 }, { "epoch": 26.75, "learning_rate": 3.663183123379029e-05, "loss": 2.1355, "step": 9240000 }, { "epoch": 26.75, "learning_rate": 3.663110758614301e-05, "loss": 2.1319, "step": 9240500 }, { "epoch": 26.75, "learning_rate": 3.6630385385791035e-05, "loss": 2.1423, "step": 9241000 }, { "epoch": 26.75, "learning_rate": 3.6629661738143764e-05, "loss": 2.1486, "step": 9241500 }, { "epoch": 26.75, "learning_rate": 3.662893809049649e-05, "loss": 2.1383, "step": 9242000 }, { "epoch": 26.75, "learning_rate": 3.662821444284921e-05, "loss": 2.1648, "step": 9242500 }, { "epoch": 26.75, "learning_rate": 3.662749079520193e-05, "loss": 2.136, "step": 9243000 }, { "epoch": 26.76, "learning_rate": 3.662676714755465e-05, "loss": 2.1374, "step": 9243500 }, { "epoch": 26.76, "learning_rate": 3.6626043499907376e-05, "loss": 2.1315, "step": 9244000 }, { "epoch": 26.76, "learning_rate": 3.66253198522601e-05, "loss": 2.1656, "step": 9244500 }, { "epoch": 26.76, "learning_rate": 3.662459620461282e-05, "loss": 2.1479, "step": 9245000 }, { "epoch": 26.76, "learning_rate": 3.6623874004260836e-05, "loss": 2.1446, "step": 9245500 }, { "epoch": 26.76, "learning_rate": 3.6623150356613565e-05, "loss": 2.135, "step": 9246000 }, { "epoch": 26.76, "learning_rate": 3.662242815626158e-05, "loss": 2.1551, "step": 9246500 }, { "epoch": 26.77, "learning_rate": 3.66217045086143e-05, "loss": 2.1248, "step": 9247000 }, { "epoch": 26.77, "learning_rate": 3.6620980860967025e-05, "loss": 2.1632, "step": 9247500 }, { "epoch": 26.77, "learning_rate": 3.662025866061504e-05, "loss": 2.1292, "step": 9248000 }, { "epoch": 26.77, "learning_rate": 3.661953501296776e-05, "loss": 2.1409, "step": 9248500 }, { "epoch": 26.77, "learning_rate": 3.661881136532049e-05, "loss": 2.1437, "step": 9249000 }, { "epoch": 26.77, "learning_rate": 3.6618087717673214e-05, "loss": 2.1611, "step": 9249500 }, { "epoch": 26.77, "learning_rate": 3.6617364070025936e-05, "loss": 2.1592, "step": 9250000 }, { "epoch": 26.78, "learning_rate": 3.6616640422378665e-05, "loss": 2.1548, "step": 9250500 }, { "epoch": 26.78, "learning_rate": 3.661591677473139e-05, "loss": 2.1295, "step": 9251000 }, { "epoch": 26.78, "learning_rate": 3.661519312708411e-05, "loss": 2.1375, "step": 9251500 }, { "epoch": 26.78, "learning_rate": 3.661446947943683e-05, "loss": 2.1253, "step": 9252000 }, { "epoch": 26.78, "learning_rate": 3.661374727908485e-05, "loss": 2.133, "step": 9252500 }, { "epoch": 26.78, "learning_rate": 3.661302363143757e-05, "loss": 2.1567, "step": 9253000 }, { "epoch": 26.79, "learning_rate": 3.661229998379029e-05, "loss": 2.1385, "step": 9253500 }, { "epoch": 26.79, "learning_rate": 3.6611576336143014e-05, "loss": 2.1184, "step": 9254000 }, { "epoch": 26.79, "learning_rate": 3.6610852688495736e-05, "loss": 2.1171, "step": 9254500 }, { "epoch": 26.79, "learning_rate": 3.661013048814376e-05, "loss": 2.1176, "step": 9255000 }, { "epoch": 26.79, "learning_rate": 3.660940684049648e-05, "loss": 2.1424, "step": 9255500 }, { "epoch": 26.79, "learning_rate": 3.66086831928492e-05, "loss": 2.1311, "step": 9256000 }, { "epoch": 26.79, "learning_rate": 3.660795954520193e-05, "loss": 2.1362, "step": 9256500 }, { "epoch": 26.8, "learning_rate": 3.6607235897554654e-05, "loss": 2.1427, "step": 9257000 }, { "epoch": 26.8, "learning_rate": 3.6606512249907376e-05, "loss": 2.1485, "step": 9257500 }, { "epoch": 26.8, "learning_rate": 3.66057886022601e-05, "loss": 2.1493, "step": 9258000 }, { "epoch": 26.8, "learning_rate": 3.660506495461282e-05, "loss": 2.1304, "step": 9258500 }, { "epoch": 26.8, "learning_rate": 3.6604342754260836e-05, "loss": 2.11, "step": 9259000 }, { "epoch": 26.8, "learning_rate": 3.6603619106613565e-05, "loss": 2.1388, "step": 9259500 }, { "epoch": 26.8, "learning_rate": 3.660289545896629e-05, "loss": 2.1235, "step": 9260000 }, { "epoch": 26.81, "learning_rate": 3.66021732586143e-05, "loss": 2.1608, "step": 9260500 }, { "epoch": 26.81, "learning_rate": 3.6601449610967025e-05, "loss": 2.1398, "step": 9261000 }, { "epoch": 26.81, "learning_rate": 3.660072741061504e-05, "loss": 2.144, "step": 9261500 }, { "epoch": 26.81, "learning_rate": 3.660000376296776e-05, "loss": 2.1471, "step": 9262000 }, { "epoch": 26.81, "learning_rate": 3.659928011532049e-05, "loss": 2.1408, "step": 9262500 }, { "epoch": 26.81, "learning_rate": 3.6598556467673214e-05, "loss": 2.1159, "step": 9263000 }, { "epoch": 26.81, "learning_rate": 3.659783282002594e-05, "loss": 2.1462, "step": 9263500 }, { "epoch": 26.82, "learning_rate": 3.6597109172378666e-05, "loss": 2.1333, "step": 9264000 }, { "epoch": 26.82, "learning_rate": 3.659638552473139e-05, "loss": 2.1169, "step": 9264500 }, { "epoch": 26.82, "learning_rate": 3.659566187708411e-05, "loss": 2.1264, "step": 9265000 }, { "epoch": 26.82, "learning_rate": 3.659493822943683e-05, "loss": 2.1359, "step": 9265500 }, { "epoch": 26.82, "learning_rate": 3.6594214581789555e-05, "loss": 2.1231, "step": 9266000 }, { "epoch": 26.82, "learning_rate": 3.659349093414228e-05, "loss": 2.1545, "step": 9266500 }, { "epoch": 26.82, "learning_rate": 3.6592767286495e-05, "loss": 2.1399, "step": 9267000 }, { "epoch": 26.83, "learning_rate": 3.6592045086143015e-05, "loss": 2.1413, "step": 9267500 }, { "epoch": 26.83, "learning_rate": 3.6591321438495744e-05, "loss": 2.133, "step": 9268000 }, { "epoch": 26.83, "learning_rate": 3.659059923814376e-05, "loss": 2.1533, "step": 9268500 }, { "epoch": 26.83, "learning_rate": 3.6589877037791775e-05, "loss": 2.1318, "step": 9269000 }, { "epoch": 26.83, "learning_rate": 3.65891533901445e-05, "loss": 2.1441, "step": 9269500 }, { "epoch": 26.83, "learning_rate": 3.658842974249722e-05, "loss": 2.1386, "step": 9270000 }, { "epoch": 26.83, "learning_rate": 3.658770609484994e-05, "loss": 2.1183, "step": 9270500 }, { "epoch": 26.84, "learning_rate": 3.6586982447202664e-05, "loss": 2.1395, "step": 9271000 }, { "epoch": 26.84, "learning_rate": 3.658625879955539e-05, "loss": 2.1544, "step": 9271500 }, { "epoch": 26.84, "learning_rate": 3.6585535151908115e-05, "loss": 2.1648, "step": 9272000 }, { "epoch": 26.84, "learning_rate": 3.6584811504260844e-05, "loss": 2.1506, "step": 9272500 }, { "epoch": 26.84, "learning_rate": 3.6584087856613566e-05, "loss": 2.1505, "step": 9273000 }, { "epoch": 26.84, "learning_rate": 3.658336420896629e-05, "loss": 2.1548, "step": 9273500 }, { "epoch": 26.84, "learning_rate": 3.658264056131901e-05, "loss": 2.1496, "step": 9274000 }, { "epoch": 26.85, "learning_rate": 3.658191691367173e-05, "loss": 2.1281, "step": 9274500 }, { "epoch": 26.85, "learning_rate": 3.6581193266024455e-05, "loss": 2.1403, "step": 9275000 }, { "epoch": 26.85, "learning_rate": 3.658046961837718e-05, "loss": 2.1404, "step": 9275500 }, { "epoch": 26.85, "learning_rate": 3.65797459707299e-05, "loss": 2.1401, "step": 9276000 }, { "epoch": 26.85, "learning_rate": 3.657902232308262e-05, "loss": 2.1477, "step": 9276500 }, { "epoch": 26.85, "learning_rate": 3.6578298675435344e-05, "loss": 2.1195, "step": 9277000 }, { "epoch": 26.85, "learning_rate": 3.6577575027788066e-05, "loss": 2.1554, "step": 9277500 }, { "epoch": 26.86, "learning_rate": 3.6576851380140795e-05, "loss": 2.1367, "step": 9278000 }, { "epoch": 26.86, "learning_rate": 3.6576130627084104e-05, "loss": 2.1406, "step": 9278500 }, { "epoch": 26.86, "learning_rate": 3.657540697943683e-05, "loss": 2.1552, "step": 9279000 }, { "epoch": 26.86, "learning_rate": 3.6574683331789555e-05, "loss": 2.1598, "step": 9279500 }, { "epoch": 26.86, "learning_rate": 3.657395968414228e-05, "loss": 2.1591, "step": 9280000 }, { "epoch": 26.86, "learning_rate": 3.6573236036495e-05, "loss": 2.1188, "step": 9280500 }, { "epoch": 26.86, "learning_rate": 3.657251238884772e-05, "loss": 2.1295, "step": 9281000 }, { "epoch": 26.87, "learning_rate": 3.6571788741200444e-05, "loss": 2.1022, "step": 9281500 }, { "epoch": 26.87, "learning_rate": 3.6571065093553167e-05, "loss": 2.1486, "step": 9282000 }, { "epoch": 26.87, "learning_rate": 3.6570341445905896e-05, "loss": 2.1545, "step": 9282500 }, { "epoch": 26.87, "learning_rate": 3.656961779825862e-05, "loss": 2.1382, "step": 9283000 }, { "epoch": 26.87, "learning_rate": 3.656889415061134e-05, "loss": 2.1414, "step": 9283500 }, { "epoch": 26.87, "learning_rate": 3.6568171950259356e-05, "loss": 2.1492, "step": 9284000 }, { "epoch": 26.87, "learning_rate": 3.656744974990737e-05, "loss": 2.1552, "step": 9284500 }, { "epoch": 26.88, "learning_rate": 3.6566726102260093e-05, "loss": 2.1419, "step": 9285000 }, { "epoch": 26.88, "learning_rate": 3.6566002454612816e-05, "loss": 2.1349, "step": 9285500 }, { "epoch": 26.88, "learning_rate": 3.656528025426084e-05, "loss": 2.1363, "step": 9286000 }, { "epoch": 26.88, "learning_rate": 3.656455660661357e-05, "loss": 2.1527, "step": 9286500 }, { "epoch": 26.88, "learning_rate": 3.656383295896629e-05, "loss": 2.1319, "step": 9287000 }, { "epoch": 26.88, "learning_rate": 3.6563110758614305e-05, "loss": 2.1648, "step": 9287500 }, { "epoch": 26.88, "learning_rate": 3.656238711096703e-05, "loss": 2.1269, "step": 9288000 }, { "epoch": 26.89, "learning_rate": 3.656166346331975e-05, "loss": 2.1514, "step": 9288500 }, { "epoch": 26.89, "learning_rate": 3.656093981567247e-05, "loss": 2.1763, "step": 9289000 }, { "epoch": 26.89, "learning_rate": 3.6560216168025194e-05, "loss": 2.1329, "step": 9289500 }, { "epoch": 26.89, "learning_rate": 3.6559492520377916e-05, "loss": 2.1321, "step": 9290000 }, { "epoch": 26.89, "learning_rate": 3.6558768872730645e-05, "loss": 2.1622, "step": 9290500 }, { "epoch": 26.89, "learning_rate": 3.655804522508337e-05, "loss": 2.1351, "step": 9291000 }, { "epoch": 26.9, "learning_rate": 3.655732157743609e-05, "loss": 2.1534, "step": 9291500 }, { "epoch": 26.9, "learning_rate": 3.655659792978881e-05, "loss": 2.1489, "step": 9292000 }, { "epoch": 26.9, "learning_rate": 3.6555874282141534e-05, "loss": 2.1286, "step": 9292500 }, { "epoch": 26.9, "learning_rate": 3.6555150634494256e-05, "loss": 2.1301, "step": 9293000 }, { "epoch": 26.9, "learning_rate": 3.6554426986846985e-05, "loss": 2.1108, "step": 9293500 }, { "epoch": 26.9, "learning_rate": 3.655370333919971e-05, "loss": 2.1345, "step": 9294000 }, { "epoch": 26.9, "learning_rate": 3.655298113884772e-05, "loss": 2.1089, "step": 9294500 }, { "epoch": 26.91, "learning_rate": 3.6552257491200445e-05, "loss": 2.157, "step": 9295000 }, { "epoch": 26.91, "learning_rate": 3.655153384355317e-05, "loss": 2.1542, "step": 9295500 }, { "epoch": 26.91, "learning_rate": 3.655081164320119e-05, "loss": 2.1379, "step": 9296000 }, { "epoch": 26.91, "learning_rate": 3.655008799555391e-05, "loss": 2.153, "step": 9296500 }, { "epoch": 26.91, "learning_rate": 3.6549364347906634e-05, "loss": 2.1381, "step": 9297000 }, { "epoch": 26.91, "learning_rate": 3.6548640700259356e-05, "loss": 2.1364, "step": 9297500 }, { "epoch": 26.91, "learning_rate": 3.654791705261208e-05, "loss": 2.1477, "step": 9298000 }, { "epoch": 26.92, "learning_rate": 3.65471934049648e-05, "loss": 2.1424, "step": 9298500 }, { "epoch": 26.92, "learning_rate": 3.654646975731752e-05, "loss": 2.1482, "step": 9299000 }, { "epoch": 26.92, "learning_rate": 3.6545746109670245e-05, "loss": 2.1598, "step": 9299500 }, { "epoch": 26.92, "learning_rate": 3.654502246202297e-05, "loss": 2.1278, "step": 9300000 }, { "epoch": 26.92, "learning_rate": 3.654430026167099e-05, "loss": 2.1282, "step": 9300500 }, { "epoch": 26.92, "learning_rate": 3.654357661402372e-05, "loss": 2.1273, "step": 9301000 }, { "epoch": 26.92, "learning_rate": 3.6542854413671735e-05, "loss": 2.1472, "step": 9301500 }, { "epoch": 26.93, "learning_rate": 3.654213076602446e-05, "loss": 2.157, "step": 9302000 }, { "epoch": 26.93, "learning_rate": 3.654140856567247e-05, "loss": 2.1444, "step": 9302500 }, { "epoch": 26.93, "learning_rate": 3.6540684918025195e-05, "loss": 2.1562, "step": 9303000 }, { "epoch": 26.93, "learning_rate": 3.6539961270377924e-05, "loss": 2.1473, "step": 9303500 }, { "epoch": 26.93, "learning_rate": 3.6539237622730646e-05, "loss": 2.1421, "step": 9304000 }, { "epoch": 26.93, "learning_rate": 3.6538516869673955e-05, "loss": 2.16, "step": 9304500 }, { "epoch": 26.93, "learning_rate": 3.653779322202668e-05, "loss": 2.1664, "step": 9305000 }, { "epoch": 26.94, "learning_rate": 3.65370695743794e-05, "loss": 2.1723, "step": 9305500 }, { "epoch": 26.94, "learning_rate": 3.653634592673212e-05, "loss": 2.1505, "step": 9306000 }, { "epoch": 26.94, "learning_rate": 3.6535622279084844e-05, "loss": 2.124, "step": 9306500 }, { "epoch": 26.94, "learning_rate": 3.653489863143757e-05, "loss": 2.1374, "step": 9307000 }, { "epoch": 26.94, "learning_rate": 3.6534174983790295e-05, "loss": 2.1466, "step": 9307500 }, { "epoch": 26.94, "learning_rate": 3.653345133614302e-05, "loss": 2.1059, "step": 9308000 }, { "epoch": 26.94, "learning_rate": 3.6532727688495746e-05, "loss": 2.1284, "step": 9308500 }, { "epoch": 26.95, "learning_rate": 3.653200404084847e-05, "loss": 2.1206, "step": 9309000 }, { "epoch": 26.95, "learning_rate": 3.653128039320119e-05, "loss": 2.1431, "step": 9309500 }, { "epoch": 26.95, "learning_rate": 3.653055674555391e-05, "loss": 2.1342, "step": 9310000 }, { "epoch": 26.95, "learning_rate": 3.6529833097906635e-05, "loss": 2.1258, "step": 9310500 }, { "epoch": 26.95, "learning_rate": 3.652910945025936e-05, "loss": 2.1348, "step": 9311000 }, { "epoch": 26.95, "learning_rate": 3.652838580261208e-05, "loss": 2.1432, "step": 9311500 }, { "epoch": 26.95, "learning_rate": 3.65276621549648e-05, "loss": 2.1449, "step": 9312000 }, { "epoch": 26.96, "learning_rate": 3.6526939954612824e-05, "loss": 2.1341, "step": 9312500 }, { "epoch": 26.96, "learning_rate": 3.6526216306965546e-05, "loss": 2.1419, "step": 9313000 }, { "epoch": 26.96, "learning_rate": 3.652549265931827e-05, "loss": 2.1376, "step": 9313500 }, { "epoch": 26.96, "learning_rate": 3.6524770458966284e-05, "loss": 2.1499, "step": 9314000 }, { "epoch": 26.96, "learning_rate": 3.6524046811319006e-05, "loss": 2.1365, "step": 9314500 }, { "epoch": 26.96, "learning_rate": 3.652332316367173e-05, "loss": 2.1465, "step": 9315000 }, { "epoch": 26.96, "learning_rate": 3.6522600963319744e-05, "loss": 2.1642, "step": 9315500 }, { "epoch": 26.97, "learning_rate": 3.652187731567247e-05, "loss": 2.1571, "step": 9316000 }, { "epoch": 26.97, "learning_rate": 3.6521153668025195e-05, "loss": 2.1414, "step": 9316500 }, { "epoch": 26.97, "learning_rate": 3.6520430020377924e-05, "loss": 2.1382, "step": 9317000 }, { "epoch": 26.97, "learning_rate": 3.6519706372730647e-05, "loss": 2.1122, "step": 9317500 }, { "epoch": 26.97, "learning_rate": 3.651898272508337e-05, "loss": 2.1298, "step": 9318000 }, { "epoch": 26.97, "learning_rate": 3.651825907743609e-05, "loss": 2.152, "step": 9318500 }, { "epoch": 26.97, "learning_rate": 3.651753542978881e-05, "loss": 2.1294, "step": 9319000 }, { "epoch": 26.98, "learning_rate": 3.6516811782141535e-05, "loss": 2.1448, "step": 9319500 }, { "epoch": 26.98, "learning_rate": 3.651608813449426e-05, "loss": 2.1297, "step": 9320000 }, { "epoch": 26.98, "learning_rate": 3.651536448684698e-05, "loss": 2.1291, "step": 9320500 }, { "epoch": 26.98, "learning_rate": 3.65146408391997e-05, "loss": 2.1351, "step": 9321000 }, { "epoch": 26.98, "learning_rate": 3.6513917191552424e-05, "loss": 2.1276, "step": 9321500 }, { "epoch": 26.98, "learning_rate": 3.651319354390515e-05, "loss": 2.1426, "step": 9322000 }, { "epoch": 26.98, "learning_rate": 3.651247134355317e-05, "loss": 2.1344, "step": 9322500 }, { "epoch": 26.99, "learning_rate": 3.6511749143201185e-05, "loss": 2.1516, "step": 9323000 }, { "epoch": 26.99, "learning_rate": 3.6511025495553914e-05, "loss": 2.1658, "step": 9323500 }, { "epoch": 26.99, "learning_rate": 3.6510301847906636e-05, "loss": 2.1379, "step": 9324000 }, { "epoch": 26.99, "learning_rate": 3.650957820025936e-05, "loss": 2.1527, "step": 9324500 }, { "epoch": 26.99, "learning_rate": 3.650885455261208e-05, "loss": 2.11, "step": 9325000 }, { "epoch": 26.99, "learning_rate": 3.65081309049648e-05, "loss": 2.1295, "step": 9325500 }, { "epoch": 26.99, "learning_rate": 3.6507407257317525e-05, "loss": 2.1344, "step": 9326000 }, { "epoch": 27.0, "learning_rate": 3.650668360967025e-05, "loss": 2.1454, "step": 9326500 }, { "epoch": 27.0, "learning_rate": 3.6505959962022976e-05, "loss": 2.1264, "step": 9327000 }, { "epoch": 27.0, "learning_rate": 3.650523776167099e-05, "loss": 2.1357, "step": 9327500 }, { "epoch": 27.0, "eval_accuracy": 0.6654050335667157, "eval_accuracy_mlm": 0.6298091509434769, "eval_accuracy_nsp": 0.8562923650162929, "eval_loss": 2.195469617843628, "eval_runtime": 332.0987, "eval_samples_per_second": 1314.025, "eval_steps_per_second": 54.752, "step": 9327744 }, { "epoch": 27.0, "learning_rate": 3.6504514114023714e-05, "loss": 2.1241, "step": 9328000 }, { "epoch": 27.0, "learning_rate": 3.6503790466376436e-05, "loss": 2.1168, "step": 9328500 }, { "epoch": 27.0, "learning_rate": 3.650306681872916e-05, "loss": 2.1252, "step": 9329000 }, { "epoch": 27.01, "learning_rate": 3.6502344618377174e-05, "loss": 2.1047, "step": 9329500 }, { "epoch": 27.01, "learning_rate": 3.6501622418025196e-05, "loss": 2.1268, "step": 9330000 }, { "epoch": 27.01, "learning_rate": 3.650089877037792e-05, "loss": 2.1355, "step": 9330500 }, { "epoch": 27.01, "learning_rate": 3.650017512273065e-05, "loss": 2.112, "step": 9331000 }, { "epoch": 27.01, "learning_rate": 3.649945147508337e-05, "loss": 2.1301, "step": 9331500 }, { "epoch": 27.01, "learning_rate": 3.649872782743609e-05, "loss": 2.1215, "step": 9332000 }, { "epoch": 27.01, "learning_rate": 3.6498004179788814e-05, "loss": 2.1114, "step": 9332500 }, { "epoch": 27.02, "learning_rate": 3.6497280532141536e-05, "loss": 2.127, "step": 9333000 }, { "epoch": 27.02, "learning_rate": 3.649655688449426e-05, "loss": 2.107, "step": 9333500 }, { "epoch": 27.02, "learning_rate": 3.649583323684698e-05, "loss": 2.1007, "step": 9334000 }, { "epoch": 27.02, "learning_rate": 3.64951095891997e-05, "loss": 2.1159, "step": 9334500 }, { "epoch": 27.02, "learning_rate": 3.6494385941552425e-05, "loss": 2.1118, "step": 9335000 }, { "epoch": 27.02, "learning_rate": 3.6493662293905154e-05, "loss": 2.1256, "step": 9335500 }, { "epoch": 27.02, "learning_rate": 3.6492938646257876e-05, "loss": 2.1073, "step": 9336000 }, { "epoch": 27.03, "learning_rate": 3.649221644590589e-05, "loss": 2.0944, "step": 9336500 }, { "epoch": 27.03, "learning_rate": 3.6491492798258614e-05, "loss": 2.0876, "step": 9337000 }, { "epoch": 27.03, "learning_rate": 3.6490769150611336e-05, "loss": 2.1152, "step": 9337500 }, { "epoch": 27.03, "learning_rate": 3.649004695025935e-05, "loss": 2.1064, "step": 9338000 }, { "epoch": 27.03, "learning_rate": 3.648932330261208e-05, "loss": 2.1165, "step": 9338500 }, { "epoch": 27.03, "learning_rate": 3.64885996549648e-05, "loss": 2.1109, "step": 9339000 }, { "epoch": 27.03, "learning_rate": 3.6487876007317526e-05, "loss": 2.1144, "step": 9339500 }, { "epoch": 27.04, "learning_rate": 3.6487152359670255e-05, "loss": 2.1415, "step": 9340000 }, { "epoch": 27.04, "learning_rate": 3.648642871202298e-05, "loss": 2.1132, "step": 9340500 }, { "epoch": 27.04, "learning_rate": 3.64857050643757e-05, "loss": 2.1361, "step": 9341000 }, { "epoch": 27.04, "learning_rate": 3.648498141672842e-05, "loss": 2.1143, "step": 9341500 }, { "epoch": 27.04, "learning_rate": 3.648425921637644e-05, "loss": 2.1237, "step": 9342000 }, { "epoch": 27.04, "learning_rate": 3.648353556872916e-05, "loss": 2.1159, "step": 9342500 }, { "epoch": 27.04, "learning_rate": 3.648281192108188e-05, "loss": 2.1212, "step": 9343000 }, { "epoch": 27.05, "learning_rate": 3.6482089720729904e-05, "loss": 2.1147, "step": 9343500 }, { "epoch": 27.05, "learning_rate": 3.6481366073082626e-05, "loss": 2.0983, "step": 9344000 }, { "epoch": 27.05, "learning_rate": 3.648064242543535e-05, "loss": 2.1101, "step": 9344500 }, { "epoch": 27.05, "learning_rate": 3.647991877778807e-05, "loss": 2.1243, "step": 9345000 }, { "epoch": 27.05, "learning_rate": 3.647919513014079e-05, "loss": 2.12, "step": 9345500 }, { "epoch": 27.05, "learning_rate": 3.64784758243794e-05, "loss": 2.1144, "step": 9346000 }, { "epoch": 27.05, "learning_rate": 3.647775217673213e-05, "loss": 2.1069, "step": 9346500 }, { "epoch": 27.06, "learning_rate": 3.647702852908485e-05, "loss": 2.1268, "step": 9347000 }, { "epoch": 27.06, "learning_rate": 3.6476304881437575e-05, "loss": 2.141, "step": 9347500 }, { "epoch": 27.06, "learning_rate": 3.64755812337903e-05, "loss": 2.1165, "step": 9348000 }, { "epoch": 27.06, "learning_rate": 3.647485758614302e-05, "loss": 2.1145, "step": 9348500 }, { "epoch": 27.06, "learning_rate": 3.647413393849574e-05, "loss": 2.1054, "step": 9349000 }, { "epoch": 27.06, "learning_rate": 3.6473410290848464e-05, "loss": 2.1317, "step": 9349500 }, { "epoch": 27.06, "learning_rate": 3.6472686643201186e-05, "loss": 2.1535, "step": 9350000 }, { "epoch": 27.07, "learning_rate": 3.647196299555391e-05, "loss": 2.1028, "step": 9350500 }, { "epoch": 27.07, "learning_rate": 3.647123934790663e-05, "loss": 2.1305, "step": 9351000 }, { "epoch": 27.07, "learning_rate": 3.647051570025935e-05, "loss": 2.1478, "step": 9351500 }, { "epoch": 27.07, "learning_rate": 3.6469792052612075e-05, "loss": 2.1249, "step": 9352000 }, { "epoch": 27.07, "learning_rate": 3.6469068404964804e-05, "loss": 2.1451, "step": 9352500 }, { "epoch": 27.07, "learning_rate": 3.6468344757317526e-05, "loss": 2.1325, "step": 9353000 }, { "epoch": 27.07, "learning_rate": 3.6467621109670255e-05, "loss": 2.1186, "step": 9353500 }, { "epoch": 27.08, "learning_rate": 3.646689890931827e-05, "loss": 2.1638, "step": 9354000 }, { "epoch": 27.08, "learning_rate": 3.646617526167099e-05, "loss": 2.1217, "step": 9354500 }, { "epoch": 27.08, "learning_rate": 3.6465451614023715e-05, "loss": 2.1206, "step": 9355000 }, { "epoch": 27.08, "learning_rate": 3.646472796637644e-05, "loss": 2.1012, "step": 9355500 }, { "epoch": 27.08, "learning_rate": 3.646400576602445e-05, "loss": 2.1042, "step": 9356000 }, { "epoch": 27.08, "learning_rate": 3.6463283565672475e-05, "loss": 2.1169, "step": 9356500 }, { "epoch": 27.08, "learning_rate": 3.64625599180252e-05, "loss": 2.1303, "step": 9357000 }, { "epoch": 27.09, "learning_rate": 3.646183627037792e-05, "loss": 2.0985, "step": 9357500 }, { "epoch": 27.09, "learning_rate": 3.646111262273064e-05, "loss": 2.1195, "step": 9358000 }, { "epoch": 27.09, "learning_rate": 3.6460388975083364e-05, "loss": 2.1092, "step": 9358500 }, { "epoch": 27.09, "learning_rate": 3.645966532743609e-05, "loss": 2.1248, "step": 9359000 }, { "epoch": 27.09, "learning_rate": 3.645894167978881e-05, "loss": 2.1193, "step": 9359500 }, { "epoch": 27.09, "learning_rate": 3.645821803214153e-05, "loss": 2.1186, "step": 9360000 }, { "epoch": 27.09, "learning_rate": 3.645749438449425e-05, "loss": 2.1365, "step": 9360500 }, { "epoch": 27.1, "learning_rate": 3.645677073684698e-05, "loss": 2.1207, "step": 9361000 }, { "epoch": 27.1, "learning_rate": 3.6456048536495005e-05, "loss": 2.1427, "step": 9361500 }, { "epoch": 27.1, "learning_rate": 3.645532488884773e-05, "loss": 2.1196, "step": 9362000 }, { "epoch": 27.1, "learning_rate": 3.645460124120045e-05, "loss": 2.1274, "step": 9362500 }, { "epoch": 27.1, "learning_rate": 3.645387759355317e-05, "loss": 2.0993, "step": 9363000 }, { "epoch": 27.1, "learning_rate": 3.6453153945905894e-05, "loss": 2.1174, "step": 9363500 }, { "epoch": 27.1, "learning_rate": 3.6452430298258616e-05, "loss": 2.1102, "step": 9364000 }, { "epoch": 27.11, "learning_rate": 3.645170665061134e-05, "loss": 2.1362, "step": 9364500 }, { "epoch": 27.11, "learning_rate": 3.6450984450259354e-05, "loss": 2.1262, "step": 9365000 }, { "epoch": 27.11, "learning_rate": 3.6450262249907376e-05, "loss": 2.1112, "step": 9365500 }, { "epoch": 27.11, "learning_rate": 3.64495386022601e-05, "loss": 2.1292, "step": 9366000 }, { "epoch": 27.11, "learning_rate": 3.644881495461282e-05, "loss": 2.0959, "step": 9366500 }, { "epoch": 27.11, "learning_rate": 3.644809130696554e-05, "loss": 2.1159, "step": 9367000 }, { "epoch": 27.12, "learning_rate": 3.6447367659318265e-05, "loss": 2.126, "step": 9367500 }, { "epoch": 27.12, "learning_rate": 3.644664401167099e-05, "loss": 2.1283, "step": 9368000 }, { "epoch": 27.12, "learning_rate": 3.6445920364023716e-05, "loss": 2.1089, "step": 9368500 }, { "epoch": 27.12, "learning_rate": 3.644519671637644e-05, "loss": 2.1168, "step": 9369000 }, { "epoch": 27.12, "learning_rate": 3.644447306872916e-05, "loss": 2.1102, "step": 9369500 }, { "epoch": 27.12, "learning_rate": 3.644375086837718e-05, "loss": 2.1243, "step": 9370000 }, { "epoch": 27.12, "learning_rate": 3.6443027220729905e-05, "loss": 2.1364, "step": 9370500 }, { "epoch": 27.13, "learning_rate": 3.644230502037792e-05, "loss": 2.1184, "step": 9371000 }, { "epoch": 27.13, "learning_rate": 3.644158137273064e-05, "loss": 2.133, "step": 9371500 }, { "epoch": 27.13, "learning_rate": 3.6440857725083365e-05, "loss": 2.1167, "step": 9372000 }, { "epoch": 27.13, "learning_rate": 3.644013407743609e-05, "loss": 2.1125, "step": 9372500 }, { "epoch": 27.13, "learning_rate": 3.643941042978881e-05, "loss": 2.1, "step": 9373000 }, { "epoch": 27.13, "learning_rate": 3.643868678214153e-05, "loss": 2.1185, "step": 9373500 }, { "epoch": 27.13, "learning_rate": 3.6437963134494254e-05, "loss": 2.1121, "step": 9374000 }, { "epoch": 27.14, "learning_rate": 3.643723948684698e-05, "loss": 2.112, "step": 9374500 }, { "epoch": 27.14, "learning_rate": 3.6436515839199705e-05, "loss": 2.1383, "step": 9375000 }, { "epoch": 27.14, "learning_rate": 3.643579219155243e-05, "loss": 2.1072, "step": 9375500 }, { "epoch": 27.14, "learning_rate": 3.643506854390516e-05, "loss": 2.1162, "step": 9376000 }, { "epoch": 27.14, "learning_rate": 3.643434489625788e-05, "loss": 2.1288, "step": 9376500 }, { "epoch": 27.14, "learning_rate": 3.64336212486106e-05, "loss": 2.1308, "step": 9377000 }, { "epoch": 27.14, "learning_rate": 3.643289760096332e-05, "loss": 2.1348, "step": 9377500 }, { "epoch": 27.15, "learning_rate": 3.643217540061134e-05, "loss": 2.107, "step": 9378000 }, { "epoch": 27.15, "learning_rate": 3.643145175296406e-05, "loss": 2.1144, "step": 9378500 }, { "epoch": 27.15, "learning_rate": 3.6430729552612083e-05, "loss": 2.1105, "step": 9379000 }, { "epoch": 27.15, "learning_rate": 3.6430005904964806e-05, "loss": 2.1332, "step": 9379500 }, { "epoch": 27.15, "learning_rate": 3.642928225731753e-05, "loss": 2.111, "step": 9380000 }, { "epoch": 27.15, "learning_rate": 3.642855860967025e-05, "loss": 2.1396, "step": 9380500 }, { "epoch": 27.15, "learning_rate": 3.642783496202297e-05, "loss": 2.1539, "step": 9381000 }, { "epoch": 27.16, "learning_rate": 3.6427111314375695e-05, "loss": 2.1049, "step": 9381500 }, { "epoch": 27.16, "learning_rate": 3.642638766672842e-05, "loss": 2.1525, "step": 9382000 }, { "epoch": 27.16, "learning_rate": 3.642566401908114e-05, "loss": 2.116, "step": 9382500 }, { "epoch": 27.16, "learning_rate": 3.6424941818729155e-05, "loss": 2.1464, "step": 9383000 }, { "epoch": 27.16, "learning_rate": 3.6424218171081884e-05, "loss": 2.1183, "step": 9383500 }, { "epoch": 27.16, "learning_rate": 3.6423495970729906e-05, "loss": 2.1141, "step": 9384000 }, { "epoch": 27.16, "learning_rate": 3.642277232308263e-05, "loss": 2.1176, "step": 9384500 }, { "epoch": 27.17, "learning_rate": 3.642204867543535e-05, "loss": 2.1113, "step": 9385000 }, { "epoch": 27.17, "learning_rate": 3.642132502778807e-05, "loss": 2.1593, "step": 9385500 }, { "epoch": 27.17, "learning_rate": 3.6420601380140795e-05, "loss": 2.134, "step": 9386000 }, { "epoch": 27.17, "learning_rate": 3.641987773249352e-05, "loss": 2.0956, "step": 9386500 }, { "epoch": 27.17, "learning_rate": 3.641915553214153e-05, "loss": 2.1253, "step": 9387000 }, { "epoch": 27.17, "learning_rate": 3.641843188449426e-05, "loss": 2.1252, "step": 9387500 }, { "epoch": 27.17, "learning_rate": 3.6417708236846984e-05, "loss": 2.1253, "step": 9388000 }, { "epoch": 27.18, "learning_rate": 3.6416986036495e-05, "loss": 2.1338, "step": 9388500 }, { "epoch": 27.18, "learning_rate": 3.6416263836143015e-05, "loss": 2.1018, "step": 9389000 }, { "epoch": 27.18, "learning_rate": 3.641554018849574e-05, "loss": 2.1206, "step": 9389500 }, { "epoch": 27.18, "learning_rate": 3.641481654084846e-05, "loss": 2.1274, "step": 9390000 }, { "epoch": 27.18, "learning_rate": 3.641409289320118e-05, "loss": 2.1407, "step": 9390500 }, { "epoch": 27.18, "learning_rate": 3.641336924555391e-05, "loss": 2.1033, "step": 9391000 }, { "epoch": 27.18, "learning_rate": 3.641264559790663e-05, "loss": 2.1043, "step": 9391500 }, { "epoch": 27.19, "learning_rate": 3.641192195025936e-05, "loss": 2.1227, "step": 9392000 }, { "epoch": 27.19, "learning_rate": 3.6411198302612084e-05, "loss": 2.1133, "step": 9392500 }, { "epoch": 27.19, "learning_rate": 3.6410474654964806e-05, "loss": 2.1042, "step": 9393000 }, { "epoch": 27.19, "learning_rate": 3.640975100731753e-05, "loss": 2.1152, "step": 9393500 }, { "epoch": 27.19, "learning_rate": 3.640902735967025e-05, "loss": 2.1534, "step": 9394000 }, { "epoch": 27.19, "learning_rate": 3.640830371202297e-05, "loss": 2.1233, "step": 9394500 }, { "epoch": 27.19, "learning_rate": 3.640758151167099e-05, "loss": 2.1142, "step": 9395000 }, { "epoch": 27.2, "learning_rate": 3.640685786402371e-05, "loss": 2.137, "step": 9395500 }, { "epoch": 27.2, "learning_rate": 3.640613421637643e-05, "loss": 2.1133, "step": 9396000 }, { "epoch": 27.2, "learning_rate": 3.640541056872916e-05, "loss": 2.1185, "step": 9396500 }, { "epoch": 27.2, "learning_rate": 3.6404686921081884e-05, "loss": 2.1367, "step": 9397000 }, { "epoch": 27.2, "learning_rate": 3.640396327343461e-05, "loss": 2.1468, "step": 9397500 }, { "epoch": 27.2, "learning_rate": 3.6403239625787336e-05, "loss": 2.1351, "step": 9398000 }, { "epoch": 27.2, "learning_rate": 3.640251597814006e-05, "loss": 2.1358, "step": 9398500 }, { "epoch": 27.21, "learning_rate": 3.640179233049278e-05, "loss": 2.1508, "step": 9399000 }, { "epoch": 27.21, "learning_rate": 3.64010686828455e-05, "loss": 2.112, "step": 9399500 }, { "epoch": 27.21, "learning_rate": 3.6400345035198225e-05, "loss": 2.1166, "step": 9400000 }, { "epoch": 27.21, "learning_rate": 3.639962138755095e-05, "loss": 2.1465, "step": 9400500 }, { "epoch": 27.21, "learning_rate": 3.639889773990367e-05, "loss": 2.1014, "step": 9401000 }, { "epoch": 27.21, "learning_rate": 3.639817409225639e-05, "loss": 2.1333, "step": 9401500 }, { "epoch": 27.21, "learning_rate": 3.6397450444609114e-05, "loss": 2.1416, "step": 9402000 }, { "epoch": 27.22, "learning_rate": 3.6396728244257136e-05, "loss": 2.1028, "step": 9402500 }, { "epoch": 27.22, "learning_rate": 3.639600459660986e-05, "loss": 2.136, "step": 9403000 }, { "epoch": 27.22, "learning_rate": 3.639528094896258e-05, "loss": 2.1353, "step": 9403500 }, { "epoch": 27.22, "learning_rate": 3.63945573013153e-05, "loss": 2.1107, "step": 9404000 }, { "epoch": 27.22, "learning_rate": 3.6393833653668025e-05, "loss": 2.0979, "step": 9404500 }, { "epoch": 27.22, "learning_rate": 3.639311145331604e-05, "loss": 2.1314, "step": 9405000 }, { "epoch": 27.23, "learning_rate": 3.639238780566877e-05, "loss": 2.1138, "step": 9405500 }, { "epoch": 27.23, "learning_rate": 3.6391665605316785e-05, "loss": 2.1193, "step": 9406000 }, { "epoch": 27.23, "learning_rate": 3.6390941957669514e-05, "loss": 2.1203, "step": 9406500 }, { "epoch": 27.23, "learning_rate": 3.6390218310022236e-05, "loss": 2.1295, "step": 9407000 }, { "epoch": 27.23, "learning_rate": 3.638949466237496e-05, "loss": 2.1479, "step": 9407500 }, { "epoch": 27.23, "learning_rate": 3.638877101472768e-05, "loss": 2.1429, "step": 9408000 }, { "epoch": 27.23, "learning_rate": 3.6388048814375696e-05, "loss": 2.1206, "step": 9408500 }, { "epoch": 27.24, "learning_rate": 3.638732516672842e-05, "loss": 2.1061, "step": 9409000 }, { "epoch": 27.24, "learning_rate": 3.638660151908114e-05, "loss": 2.1364, "step": 9409500 }, { "epoch": 27.24, "learning_rate": 3.638587787143386e-05, "loss": 2.1165, "step": 9410000 }, { "epoch": 27.24, "learning_rate": 3.6385154223786585e-05, "loss": 2.1244, "step": 9410500 }, { "epoch": 27.24, "learning_rate": 3.638443202343461e-05, "loss": 2.1306, "step": 9411000 }, { "epoch": 27.24, "learning_rate": 3.638370982308262e-05, "loss": 2.1197, "step": 9411500 }, { "epoch": 27.24, "learning_rate": 3.6382986175435345e-05, "loss": 2.1139, "step": 9412000 }, { "epoch": 27.25, "learning_rate": 3.638226252778807e-05, "loss": 2.1179, "step": 9412500 }, { "epoch": 27.25, "learning_rate": 3.638153888014079e-05, "loss": 2.1289, "step": 9413000 }, { "epoch": 27.25, "learning_rate": 3.638081523249352e-05, "loss": 2.1565, "step": 9413500 }, { "epoch": 27.25, "learning_rate": 3.638009303214154e-05, "loss": 2.117, "step": 9414000 }, { "epoch": 27.25, "learning_rate": 3.637936938449426e-05, "loss": 2.1167, "step": 9414500 }, { "epoch": 27.25, "learning_rate": 3.6378645736846986e-05, "loss": 2.1219, "step": 9415000 }, { "epoch": 27.25, "learning_rate": 3.637792208919971e-05, "loss": 2.1162, "step": 9415500 }, { "epoch": 27.26, "learning_rate": 3.637719844155243e-05, "loss": 2.1259, "step": 9416000 }, { "epoch": 27.26, "learning_rate": 3.637647479390515e-05, "loss": 2.1318, "step": 9416500 }, { "epoch": 27.26, "learning_rate": 3.6375751146257874e-05, "loss": 2.1223, "step": 9417000 }, { "epoch": 27.26, "learning_rate": 3.63750274986106e-05, "loss": 2.1176, "step": 9417500 }, { "epoch": 27.26, "learning_rate": 3.637430385096332e-05, "loss": 2.1136, "step": 9418000 }, { "epoch": 27.26, "learning_rate": 3.637358020331604e-05, "loss": 2.1365, "step": 9418500 }, { "epoch": 27.26, "learning_rate": 3.6372856555668763e-05, "loss": 2.1179, "step": 9419000 }, { "epoch": 27.27, "learning_rate": 3.6372132908021486e-05, "loss": 2.1056, "step": 9419500 }, { "epoch": 27.27, "learning_rate": 3.6371409260374215e-05, "loss": 2.1146, "step": 9420000 }, { "epoch": 27.27, "learning_rate": 3.637068561272694e-05, "loss": 2.1341, "step": 9420500 }, { "epoch": 27.27, "learning_rate": 3.636996341237496e-05, "loss": 2.1421, "step": 9421000 }, { "epoch": 27.27, "learning_rate": 3.636923976472768e-05, "loss": 2.1355, "step": 9421500 }, { "epoch": 27.27, "learning_rate": 3.6368516117080404e-05, "loss": 2.1241, "step": 9422000 }, { "epoch": 27.27, "learning_rate": 3.6367792469433126e-05, "loss": 2.1196, "step": 9422500 }, { "epoch": 27.28, "learning_rate": 3.636706882178585e-05, "loss": 2.1396, "step": 9423000 }, { "epoch": 27.28, "learning_rate": 3.6366346621433864e-05, "loss": 2.1212, "step": 9423500 }, { "epoch": 27.28, "learning_rate": 3.636562297378659e-05, "loss": 2.1376, "step": 9424000 }, { "epoch": 27.28, "learning_rate": 3.6364899326139315e-05, "loss": 2.124, "step": 9424500 }, { "epoch": 27.28, "learning_rate": 3.636417567849204e-05, "loss": 2.1347, "step": 9425000 }, { "epoch": 27.28, "learning_rate": 3.636345203084476e-05, "loss": 2.1274, "step": 9425500 }, { "epoch": 27.28, "learning_rate": 3.636272838319748e-05, "loss": 2.1162, "step": 9426000 }, { "epoch": 27.29, "learning_rate": 3.6362004735550204e-05, "loss": 2.1105, "step": 9426500 }, { "epoch": 27.29, "learning_rate": 3.636128253519822e-05, "loss": 2.1147, "step": 9427000 }, { "epoch": 27.29, "learning_rate": 3.636056033484624e-05, "loss": 2.1417, "step": 9427500 }, { "epoch": 27.29, "learning_rate": 3.6359836687198964e-05, "loss": 2.1242, "step": 9428000 }, { "epoch": 27.29, "learning_rate": 3.635911303955169e-05, "loss": 2.1308, "step": 9428500 }, { "epoch": 27.29, "learning_rate": 3.6358389391904415e-05, "loss": 2.1121, "step": 9429000 }, { "epoch": 27.29, "learning_rate": 3.635766574425714e-05, "loss": 2.141, "step": 9429500 }, { "epoch": 27.3, "learning_rate": 3.635694209660986e-05, "loss": 2.1219, "step": 9430000 }, { "epoch": 27.3, "learning_rate": 3.635621844896258e-05, "loss": 2.1323, "step": 9430500 }, { "epoch": 27.3, "learning_rate": 3.6355494801315304e-05, "loss": 2.1395, "step": 9431000 }, { "epoch": 27.3, "learning_rate": 3.6354771153668026e-05, "loss": 2.1123, "step": 9431500 }, { "epoch": 27.3, "learning_rate": 3.635404750602075e-05, "loss": 2.1056, "step": 9432000 }, { "epoch": 27.3, "learning_rate": 3.635332385837347e-05, "loss": 2.1416, "step": 9432500 }, { "epoch": 27.3, "learning_rate": 3.635260165802149e-05, "loss": 2.1166, "step": 9433000 }, { "epoch": 27.31, "learning_rate": 3.635187945766951e-05, "loss": 2.1069, "step": 9433500 }, { "epoch": 27.31, "learning_rate": 3.635115581002223e-05, "loss": 2.1161, "step": 9434000 }, { "epoch": 27.31, "learning_rate": 3.635043216237495e-05, "loss": 2.1283, "step": 9434500 }, { "epoch": 27.31, "learning_rate": 3.6349708514727675e-05, "loss": 2.1177, "step": 9435000 }, { "epoch": 27.31, "learning_rate": 3.6348984867080404e-05, "loss": 2.1167, "step": 9435500 }, { "epoch": 27.31, "learning_rate": 3.634826121943313e-05, "loss": 2.1189, "step": 9436000 }, { "epoch": 27.31, "learning_rate": 3.634753757178585e-05, "loss": 2.135, "step": 9436500 }, { "epoch": 27.32, "learning_rate": 3.634681392413857e-05, "loss": 2.1005, "step": 9437000 }, { "epoch": 27.32, "learning_rate": 3.6346090276491293e-05, "loss": 2.144, "step": 9437500 }, { "epoch": 27.32, "learning_rate": 3.6345366628844016e-05, "loss": 2.1205, "step": 9438000 }, { "epoch": 27.32, "learning_rate": 3.634464587578733e-05, "loss": 2.1247, "step": 9438500 }, { "epoch": 27.32, "learning_rate": 3.6343922228140054e-05, "loss": 2.1106, "step": 9439000 }, { "epoch": 27.32, "learning_rate": 3.6343198580492776e-05, "loss": 2.1286, "step": 9439500 }, { "epoch": 27.32, "learning_rate": 3.63424749328455e-05, "loss": 2.131, "step": 9440000 }, { "epoch": 27.33, "learning_rate": 3.634175128519822e-05, "loss": 2.1254, "step": 9440500 }, { "epoch": 27.33, "learning_rate": 3.634102763755094e-05, "loss": 2.1117, "step": 9441000 }, { "epoch": 27.33, "learning_rate": 3.6340303989903665e-05, "loss": 2.1368, "step": 9441500 }, { "epoch": 27.33, "learning_rate": 3.6339580342256394e-05, "loss": 2.1311, "step": 9442000 }, { "epoch": 27.33, "learning_rate": 3.633885814190441e-05, "loss": 2.1257, "step": 9442500 }, { "epoch": 27.33, "learning_rate": 3.633813449425714e-05, "loss": 2.1567, "step": 9443000 }, { "epoch": 27.34, "learning_rate": 3.633741084660986e-05, "loss": 2.1161, "step": 9443500 }, { "epoch": 27.34, "learning_rate": 3.633668719896258e-05, "loss": 2.1364, "step": 9444000 }, { "epoch": 27.34, "learning_rate": 3.6335963551315305e-05, "loss": 2.1235, "step": 9444500 }, { "epoch": 27.34, "learning_rate": 3.633523990366803e-05, "loss": 2.1249, "step": 9445000 }, { "epoch": 27.34, "learning_rate": 3.633451625602075e-05, "loss": 2.1353, "step": 9445500 }, { "epoch": 27.34, "learning_rate": 3.6333794055668765e-05, "loss": 2.1088, "step": 9446000 }, { "epoch": 27.34, "learning_rate": 3.6333070408021494e-05, "loss": 2.1205, "step": 9446500 }, { "epoch": 27.35, "learning_rate": 3.6332346760374216e-05, "loss": 2.1349, "step": 9447000 }, { "epoch": 27.35, "learning_rate": 3.633162311272694e-05, "loss": 2.1224, "step": 9447500 }, { "epoch": 27.35, "learning_rate": 3.633089946507966e-05, "loss": 2.1554, "step": 9448000 }, { "epoch": 27.35, "learning_rate": 3.633017581743238e-05, "loss": 2.1129, "step": 9448500 }, { "epoch": 27.35, "learning_rate": 3.6329452169785105e-05, "loss": 2.1289, "step": 9449000 }, { "epoch": 27.35, "learning_rate": 3.632872852213783e-05, "loss": 2.1087, "step": 9449500 }, { "epoch": 27.35, "learning_rate": 3.6328004874490556e-05, "loss": 2.1153, "step": 9450000 }, { "epoch": 27.36, "learning_rate": 3.632728122684328e-05, "loss": 2.1417, "step": 9450500 }, { "epoch": 27.36, "learning_rate": 3.6326557579196e-05, "loss": 2.1387, "step": 9451000 }, { "epoch": 27.36, "learning_rate": 3.632583393154872e-05, "loss": 2.1071, "step": 9451500 }, { "epoch": 27.36, "learning_rate": 3.632511317849204e-05, "loss": 2.1391, "step": 9452000 }, { "epoch": 27.36, "learning_rate": 3.632438953084476e-05, "loss": 2.1154, "step": 9452500 }, { "epoch": 27.36, "learning_rate": 3.632366588319748e-05, "loss": 2.0988, "step": 9453000 }, { "epoch": 27.36, "learning_rate": 3.6322942235550205e-05, "loss": 2.1395, "step": 9453500 }, { "epoch": 27.37, "learning_rate": 3.632222003519822e-05, "loss": 2.1016, "step": 9454000 }, { "epoch": 27.37, "learning_rate": 3.632149638755094e-05, "loss": 2.1325, "step": 9454500 }, { "epoch": 27.37, "learning_rate": 3.632077273990367e-05, "loss": 2.134, "step": 9455000 }, { "epoch": 27.37, "learning_rate": 3.632005053955169e-05, "loss": 2.1332, "step": 9455500 }, { "epoch": 27.37, "learning_rate": 3.631932689190441e-05, "loss": 2.1176, "step": 9456000 }, { "epoch": 27.37, "learning_rate": 3.631860324425713e-05, "loss": 2.1223, "step": 9456500 }, { "epoch": 27.37, "learning_rate": 3.6317879596609855e-05, "loss": 2.144, "step": 9457000 }, { "epoch": 27.38, "learning_rate": 3.631715594896258e-05, "loss": 2.1235, "step": 9457500 }, { "epoch": 27.38, "learning_rate": 3.6316432301315306e-05, "loss": 2.1245, "step": 9458000 }, { "epoch": 27.38, "learning_rate": 3.631570865366803e-05, "loss": 2.104, "step": 9458500 }, { "epoch": 27.38, "learning_rate": 3.631498500602075e-05, "loss": 2.118, "step": 9459000 }, { "epoch": 27.38, "learning_rate": 3.631426135837347e-05, "loss": 2.149, "step": 9459500 }, { "epoch": 27.38, "learning_rate": 3.6313537710726195e-05, "loss": 2.1454, "step": 9460000 }, { "epoch": 27.38, "learning_rate": 3.6312814063078924e-05, "loss": 2.1441, "step": 9460500 }, { "epoch": 27.39, "learning_rate": 3.6312090415431646e-05, "loss": 2.1428, "step": 9461000 }, { "epoch": 27.39, "learning_rate": 3.631136676778437e-05, "loss": 2.1154, "step": 9461500 }, { "epoch": 27.39, "learning_rate": 3.631064312013709e-05, "loss": 2.1374, "step": 9462000 }, { "epoch": 27.39, "learning_rate": 3.6309920919785106e-05, "loss": 2.1253, "step": 9462500 }, { "epoch": 27.39, "learning_rate": 3.630919727213783e-05, "loss": 2.1299, "step": 9463000 }, { "epoch": 27.39, "learning_rate": 3.630847362449055e-05, "loss": 2.1074, "step": 9463500 }, { "epoch": 27.39, "learning_rate": 3.630774997684327e-05, "loss": 2.1154, "step": 9464000 }, { "epoch": 27.4, "learning_rate": 3.6307026329195995e-05, "loss": 2.1189, "step": 9464500 }, { "epoch": 27.4, "learning_rate": 3.6306302681548724e-05, "loss": 2.1306, "step": 9465000 }, { "epoch": 27.4, "learning_rate": 3.6305580481196746e-05, "loss": 2.1135, "step": 9465500 }, { "epoch": 27.4, "learning_rate": 3.630485828084476e-05, "loss": 2.1419, "step": 9466000 }, { "epoch": 27.4, "learning_rate": 3.6304134633197484e-05, "loss": 2.1205, "step": 9466500 }, { "epoch": 27.4, "learning_rate": 3.6303410985550206e-05, "loss": 2.123, "step": 9467000 }, { "epoch": 27.4, "learning_rate": 3.630268733790293e-05, "loss": 2.1353, "step": 9467500 }, { "epoch": 27.41, "learning_rate": 3.630196369025565e-05, "loss": 2.1059, "step": 9468000 }, { "epoch": 27.41, "learning_rate": 3.630124004260837e-05, "loss": 2.1037, "step": 9468500 }, { "epoch": 27.41, "learning_rate": 3.6300516394961095e-05, "loss": 2.1309, "step": 9469000 }, { "epoch": 27.41, "learning_rate": 3.629979564190441e-05, "loss": 2.1089, "step": 9469500 }, { "epoch": 27.41, "learning_rate": 3.629907199425713e-05, "loss": 2.1391, "step": 9470000 }, { "epoch": 27.41, "learning_rate": 3.6298348346609855e-05, "loss": 2.1275, "step": 9470500 }, { "epoch": 27.41, "learning_rate": 3.629762469896258e-05, "loss": 2.1259, "step": 9471000 }, { "epoch": 27.42, "learning_rate": 3.62969010513153e-05, "loss": 2.1009, "step": 9471500 }, { "epoch": 27.42, "learning_rate": 3.629617885096332e-05, "loss": 2.1243, "step": 9472000 }, { "epoch": 27.42, "learning_rate": 3.6295455203316044e-05, "loss": 2.1183, "step": 9472500 }, { "epoch": 27.42, "learning_rate": 3.629473155566877e-05, "loss": 2.1456, "step": 9473000 }, { "epoch": 27.42, "learning_rate": 3.6294007908021496e-05, "loss": 2.1257, "step": 9473500 }, { "epoch": 27.42, "learning_rate": 3.629328426037422e-05, "loss": 2.1111, "step": 9474000 }, { "epoch": 27.42, "learning_rate": 3.629256061272694e-05, "loss": 2.1249, "step": 9474500 }, { "epoch": 27.43, "learning_rate": 3.629183696507966e-05, "loss": 2.1227, "step": 9475000 }, { "epoch": 27.43, "learning_rate": 3.629111476472768e-05, "loss": 2.1397, "step": 9475500 }, { "epoch": 27.43, "learning_rate": 3.62903911170804e-05, "loss": 2.1201, "step": 9476000 }, { "epoch": 27.43, "learning_rate": 3.628966746943312e-05, "loss": 2.1195, "step": 9476500 }, { "epoch": 27.43, "learning_rate": 3.628894382178585e-05, "loss": 2.1175, "step": 9477000 }, { "epoch": 27.43, "learning_rate": 3.6288220174138574e-05, "loss": 2.1034, "step": 9477500 }, { "epoch": 27.43, "learning_rate": 3.6287496526491296e-05, "loss": 2.1252, "step": 9478000 }, { "epoch": 27.44, "learning_rate": 3.628677287884402e-05, "loss": 2.1167, "step": 9478500 }, { "epoch": 27.44, "learning_rate": 3.628604923119674e-05, "loss": 2.1445, "step": 9479000 }, { "epoch": 27.44, "learning_rate": 3.628532558354946e-05, "loss": 2.1118, "step": 9479500 }, { "epoch": 27.44, "learning_rate": 3.628460193590219e-05, "loss": 2.1121, "step": 9480000 }, { "epoch": 27.44, "learning_rate": 3.6283878288254914e-05, "loss": 2.1105, "step": 9480500 }, { "epoch": 27.44, "learning_rate": 3.6283154640607636e-05, "loss": 2.114, "step": 9481000 }, { "epoch": 27.45, "learning_rate": 3.628243099296036e-05, "loss": 2.1317, "step": 9481500 }, { "epoch": 27.45, "learning_rate": 3.628170734531308e-05, "loss": 2.1198, "step": 9482000 }, { "epoch": 27.45, "learning_rate": 3.6280985144961096e-05, "loss": 2.1091, "step": 9482500 }, { "epoch": 27.45, "learning_rate": 3.6280261497313825e-05, "loss": 2.1437, "step": 9483000 }, { "epoch": 27.45, "learning_rate": 3.627953929696184e-05, "loss": 2.1233, "step": 9483500 }, { "epoch": 27.45, "learning_rate": 3.627881564931456e-05, "loss": 2.1414, "step": 9484000 }, { "epoch": 27.45, "learning_rate": 3.6278092001667285e-05, "loss": 2.1299, "step": 9484500 }, { "epoch": 27.46, "learning_rate": 3.62773698013153e-05, "loss": 2.1404, "step": 9485000 }, { "epoch": 27.46, "learning_rate": 3.627664615366802e-05, "loss": 2.1494, "step": 9485500 }, { "epoch": 27.46, "learning_rate": 3.627592250602075e-05, "loss": 2.1407, "step": 9486000 }, { "epoch": 27.46, "learning_rate": 3.6275198858373474e-05, "loss": 2.1245, "step": 9486500 }, { "epoch": 27.46, "learning_rate": 3.6274475210726196e-05, "loss": 2.1194, "step": 9487000 }, { "epoch": 27.46, "learning_rate": 3.627375156307892e-05, "loss": 2.1281, "step": 9487500 }, { "epoch": 27.46, "learning_rate": 3.627302791543165e-05, "loss": 2.132, "step": 9488000 }, { "epoch": 27.47, "learning_rate": 3.627230426778437e-05, "loss": 2.1419, "step": 9488500 }, { "epoch": 27.47, "learning_rate": 3.627158062013709e-05, "loss": 2.1498, "step": 9489000 }, { "epoch": 27.47, "learning_rate": 3.6270856972489814e-05, "loss": 2.1208, "step": 9489500 }, { "epoch": 27.47, "learning_rate": 3.627013477213783e-05, "loss": 2.1102, "step": 9490000 }, { "epoch": 27.47, "learning_rate": 3.626941257178585e-05, "loss": 2.14, "step": 9490500 }, { "epoch": 27.47, "learning_rate": 3.6268688924138574e-05, "loss": 2.1083, "step": 9491000 }, { "epoch": 27.47, "learning_rate": 3.6267965276491297e-05, "loss": 2.1337, "step": 9491500 }, { "epoch": 27.48, "learning_rate": 3.626724162884402e-05, "loss": 2.1272, "step": 9492000 }, { "epoch": 27.48, "learning_rate": 3.626651798119674e-05, "loss": 2.097, "step": 9492500 }, { "epoch": 27.48, "learning_rate": 3.626579433354946e-05, "loss": 2.128, "step": 9493000 }, { "epoch": 27.48, "learning_rate": 3.6265070685902186e-05, "loss": 2.1427, "step": 9493500 }, { "epoch": 27.48, "learning_rate": 3.626434703825491e-05, "loss": 2.1121, "step": 9494000 }, { "epoch": 27.48, "learning_rate": 3.626362339060763e-05, "loss": 2.1649, "step": 9494500 }, { "epoch": 27.48, "learning_rate": 3.626290119025565e-05, "loss": 2.082, "step": 9495000 }, { "epoch": 27.49, "learning_rate": 3.6262177542608375e-05, "loss": 2.1075, "step": 9495500 }, { "epoch": 27.49, "learning_rate": 3.6261453894961104e-05, "loss": 2.1376, "step": 9496000 }, { "epoch": 27.49, "learning_rate": 3.6260730247313826e-05, "loss": 2.1288, "step": 9496500 }, { "epoch": 27.49, "learning_rate": 3.626000659966655e-05, "loss": 2.156, "step": 9497000 }, { "epoch": 27.49, "learning_rate": 3.625928295201927e-05, "loss": 2.1263, "step": 9497500 }, { "epoch": 27.49, "learning_rate": 3.625855930437199e-05, "loss": 2.1282, "step": 9498000 }, { "epoch": 27.49, "learning_rate": 3.625783710402001e-05, "loss": 2.1351, "step": 9498500 }, { "epoch": 27.5, "learning_rate": 3.625711345637273e-05, "loss": 2.1434, "step": 9499000 }, { "epoch": 27.5, "learning_rate": 3.625638980872545e-05, "loss": 2.1384, "step": 9499500 }, { "epoch": 27.5, "learning_rate": 3.6255666161078175e-05, "loss": 2.1443, "step": 9500000 }, { "epoch": 27.5, "learning_rate": 3.6254942513430904e-05, "loss": 2.1507, "step": 9500500 }, { "epoch": 27.5, "learning_rate": 3.6254218865783626e-05, "loss": 2.1403, "step": 9501000 }, { "epoch": 27.5, "learning_rate": 3.625349666543164e-05, "loss": 2.1203, "step": 9501500 }, { "epoch": 27.5, "learning_rate": 3.625277446507966e-05, "loss": 2.1091, "step": 9502000 }, { "epoch": 27.51, "learning_rate": 3.625205081743238e-05, "loss": 2.1346, "step": 9502500 }, { "epoch": 27.51, "learning_rate": 3.625132716978511e-05, "loss": 2.135, "step": 9503000 }, { "epoch": 27.51, "learning_rate": 3.625060352213783e-05, "loss": 2.108, "step": 9503500 }, { "epoch": 27.51, "learning_rate": 3.624987987449055e-05, "loss": 2.1312, "step": 9504000 }, { "epoch": 27.51, "learning_rate": 3.6249156226843275e-05, "loss": 2.1144, "step": 9504500 }, { "epoch": 27.51, "learning_rate": 3.6248432579196004e-05, "loss": 2.1127, "step": 9505000 }, { "epoch": 27.51, "learning_rate": 3.6247708931548726e-05, "loss": 2.1425, "step": 9505500 }, { "epoch": 27.52, "learning_rate": 3.624698673119674e-05, "loss": 2.1135, "step": 9506000 }, { "epoch": 27.52, "learning_rate": 3.6246263083549464e-05, "loss": 2.1232, "step": 9506500 }, { "epoch": 27.52, "learning_rate": 3.6245539435902186e-05, "loss": 2.112, "step": 9507000 }, { "epoch": 27.52, "learning_rate": 3.624481578825491e-05, "loss": 2.1397, "step": 9507500 }, { "epoch": 27.52, "learning_rate": 3.624409214060763e-05, "loss": 2.1157, "step": 9508000 }, { "epoch": 27.52, "learning_rate": 3.624336849296035e-05, "loss": 2.133, "step": 9508500 }, { "epoch": 27.52, "learning_rate": 3.6242644845313075e-05, "loss": 2.1287, "step": 9509000 }, { "epoch": 27.53, "learning_rate": 3.6241921197665804e-05, "loss": 2.1079, "step": 9509500 }, { "epoch": 27.53, "learning_rate": 3.6241197550018526e-05, "loss": 2.1054, "step": 9510000 }, { "epoch": 27.53, "learning_rate": 3.6240473902371256e-05, "loss": 2.1095, "step": 9510500 }, { "epoch": 27.53, "learning_rate": 3.623975025472398e-05, "loss": 2.1208, "step": 9511000 }, { "epoch": 27.53, "learning_rate": 3.62390266070767e-05, "loss": 2.1257, "step": 9511500 }, { "epoch": 27.53, "learning_rate": 3.623830295942942e-05, "loss": 2.1262, "step": 9512000 }, { "epoch": 27.53, "learning_rate": 3.6237579311782144e-05, "loss": 2.1256, "step": 9512500 }, { "epoch": 27.54, "learning_rate": 3.623685711143016e-05, "loss": 2.122, "step": 9513000 }, { "epoch": 27.54, "learning_rate": 3.623613346378288e-05, "loss": 2.124, "step": 9513500 }, { "epoch": 27.54, "learning_rate": 3.6235409816135604e-05, "loss": 2.1347, "step": 9514000 }, { "epoch": 27.54, "learning_rate": 3.623468761578363e-05, "loss": 2.1202, "step": 9514500 }, { "epoch": 27.54, "learning_rate": 3.623396396813635e-05, "loss": 2.1505, "step": 9515000 }, { "epoch": 27.54, "learning_rate": 3.623324032048907e-05, "loss": 2.1059, "step": 9515500 }, { "epoch": 27.54, "learning_rate": 3.6232516672841793e-05, "loss": 2.1142, "step": 9516000 }, { "epoch": 27.55, "learning_rate": 3.6231793025194516e-05, "loss": 2.1231, "step": 9516500 }, { "epoch": 27.55, "learning_rate": 3.623106937754724e-05, "loss": 2.1407, "step": 9517000 }, { "epoch": 27.55, "learning_rate": 3.6230348624490554e-05, "loss": 2.1399, "step": 9517500 }, { "epoch": 27.55, "learning_rate": 3.622962497684328e-05, "loss": 2.1157, "step": 9518000 }, { "epoch": 27.55, "learning_rate": 3.6228901329196005e-05, "loss": 2.1261, "step": 9518500 }, { "epoch": 27.55, "learning_rate": 3.622817768154873e-05, "loss": 2.1342, "step": 9519000 }, { "epoch": 27.56, "learning_rate": 3.622745403390145e-05, "loss": 2.1429, "step": 9519500 }, { "epoch": 27.56, "learning_rate": 3.622673038625417e-05, "loss": 2.1262, "step": 9520000 }, { "epoch": 27.56, "learning_rate": 3.6226006738606894e-05, "loss": 2.108, "step": 9520500 }, { "epoch": 27.56, "learning_rate": 3.6225283090959616e-05, "loss": 2.1221, "step": 9521000 }, { "epoch": 27.56, "learning_rate": 3.622455944331234e-05, "loss": 2.1313, "step": 9521500 }, { "epoch": 27.56, "learning_rate": 3.622383579566506e-05, "loss": 2.1431, "step": 9522000 }, { "epoch": 27.56, "learning_rate": 3.622311214801778e-05, "loss": 2.146, "step": 9522500 }, { "epoch": 27.57, "learning_rate": 3.6222389947665805e-05, "loss": 2.1275, "step": 9523000 }, { "epoch": 27.57, "learning_rate": 3.622166630001853e-05, "loss": 2.1547, "step": 9523500 }, { "epoch": 27.57, "learning_rate": 3.622094265237125e-05, "loss": 2.1156, "step": 9524000 }, { "epoch": 27.57, "learning_rate": 3.6220220452019265e-05, "loss": 2.1408, "step": 9524500 }, { "epoch": 27.57, "learning_rate": 3.6219496804371994e-05, "loss": 2.1132, "step": 9525000 }, { "epoch": 27.57, "learning_rate": 3.6218773156724716e-05, "loss": 2.1366, "step": 9525500 }, { "epoch": 27.57, "learning_rate": 3.621804950907744e-05, "loss": 2.1272, "step": 9526000 }, { "epoch": 27.58, "learning_rate": 3.621732586143016e-05, "loss": 2.1233, "step": 9526500 }, { "epoch": 27.58, "learning_rate": 3.621660221378288e-05, "loss": 2.134, "step": 9527000 }, { "epoch": 27.58, "learning_rate": 3.6215878566135605e-05, "loss": 2.1163, "step": 9527500 }, { "epoch": 27.58, "learning_rate": 3.6215154918488334e-05, "loss": 2.1026, "step": 9528000 }, { "epoch": 27.58, "learning_rate": 3.6214431270841057e-05, "loss": 2.1383, "step": 9528500 }, { "epoch": 27.58, "learning_rate": 3.621370907048907e-05, "loss": 2.1563, "step": 9529000 }, { "epoch": 27.58, "learning_rate": 3.621298687013709e-05, "loss": 2.118, "step": 9529500 }, { "epoch": 27.59, "learning_rate": 3.621226322248981e-05, "loss": 2.1579, "step": 9530000 }, { "epoch": 27.59, "learning_rate": 3.621153957484253e-05, "loss": 2.1466, "step": 9530500 }, { "epoch": 27.59, "learning_rate": 3.6210815927195254e-05, "loss": 2.1263, "step": 9531000 }, { "epoch": 27.59, "learning_rate": 3.621009227954798e-05, "loss": 2.1511, "step": 9531500 }, { "epoch": 27.59, "learning_rate": 3.6209368631900706e-05, "loss": 2.1201, "step": 9532000 }, { "epoch": 27.59, "learning_rate": 3.6208644984253435e-05, "loss": 2.0989, "step": 9532500 }, { "epoch": 27.59, "learning_rate": 3.620792133660616e-05, "loss": 2.1107, "step": 9533000 }, { "epoch": 27.6, "learning_rate": 3.620719768895888e-05, "loss": 2.1143, "step": 9533500 }, { "epoch": 27.6, "learning_rate": 3.6206475488606895e-05, "loss": 2.1139, "step": 9534000 }, { "epoch": 27.6, "learning_rate": 3.620575328825491e-05, "loss": 2.1432, "step": 9534500 }, { "epoch": 27.6, "learning_rate": 3.620502964060763e-05, "loss": 2.1453, "step": 9535000 }, { "epoch": 27.6, "learning_rate": 3.6204307440255655e-05, "loss": 2.148, "step": 9535500 }, { "epoch": 27.6, "learning_rate": 3.620358379260838e-05, "loss": 2.1096, "step": 9536000 }, { "epoch": 27.6, "learning_rate": 3.62028601449611e-05, "loss": 2.1463, "step": 9536500 }, { "epoch": 27.61, "learning_rate": 3.620213649731382e-05, "loss": 2.1185, "step": 9537000 }, { "epoch": 27.61, "learning_rate": 3.6201412849666544e-05, "loss": 2.1344, "step": 9537500 }, { "epoch": 27.61, "learning_rate": 3.6200689202019266e-05, "loss": 2.1196, "step": 9538000 }, { "epoch": 27.61, "learning_rate": 3.619996555437199e-05, "loss": 2.1241, "step": 9538500 }, { "epoch": 27.61, "learning_rate": 3.619924190672471e-05, "loss": 2.1032, "step": 9539000 }, { "epoch": 27.61, "learning_rate": 3.619851970637273e-05, "loss": 2.1291, "step": 9539500 }, { "epoch": 27.61, "learning_rate": 3.619779605872546e-05, "loss": 2.1353, "step": 9540000 }, { "epoch": 27.62, "learning_rate": 3.619707385837348e-05, "loss": 2.1274, "step": 9540500 }, { "epoch": 27.62, "learning_rate": 3.61963502107262e-05, "loss": 2.1299, "step": 9541000 }, { "epoch": 27.62, "learning_rate": 3.619562656307892e-05, "loss": 2.1234, "step": 9541500 }, { "epoch": 27.62, "learning_rate": 3.6194902915431644e-05, "loss": 2.1596, "step": 9542000 }, { "epoch": 27.62, "learning_rate": 3.6194179267784366e-05, "loss": 2.1227, "step": 9542500 }, { "epoch": 27.62, "learning_rate": 3.619345706743238e-05, "loss": 2.1253, "step": 9543000 }, { "epoch": 27.62, "learning_rate": 3.619273341978511e-05, "loss": 2.1426, "step": 9543500 }, { "epoch": 27.63, "learning_rate": 3.619200977213783e-05, "loss": 2.1342, "step": 9544000 }, { "epoch": 27.63, "learning_rate": 3.6191286124490555e-05, "loss": 2.1213, "step": 9544500 }, { "epoch": 27.63, "learning_rate": 3.619056392413857e-05, "loss": 2.1249, "step": 9545000 }, { "epoch": 27.63, "learning_rate": 3.618984027649129e-05, "loss": 2.1537, "step": 9545500 }, { "epoch": 27.63, "learning_rate": 3.6189116628844015e-05, "loss": 2.1371, "step": 9546000 }, { "epoch": 27.63, "learning_rate": 3.618839442849203e-05, "loss": 2.1244, "step": 9546500 }, { "epoch": 27.63, "learning_rate": 3.618767078084476e-05, "loss": 2.1411, "step": 9547000 }, { "epoch": 27.64, "learning_rate": 3.618694713319748e-05, "loss": 2.1214, "step": 9547500 }, { "epoch": 27.64, "learning_rate": 3.6186224932845504e-05, "loss": 2.1341, "step": 9548000 }, { "epoch": 27.64, "learning_rate": 3.6185501285198227e-05, "loss": 2.137, "step": 9548500 }, { "epoch": 27.64, "learning_rate": 3.618477763755095e-05, "loss": 2.157, "step": 9549000 }, { "epoch": 27.64, "learning_rate": 3.618405398990367e-05, "loss": 2.1509, "step": 9549500 }, { "epoch": 27.64, "learning_rate": 3.618333034225639e-05, "loss": 2.1422, "step": 9550000 }, { "epoch": 27.64, "learning_rate": 3.6182606694609116e-05, "loss": 2.1156, "step": 9550500 }, { "epoch": 27.65, "learning_rate": 3.618188304696184e-05, "loss": 2.1268, "step": 9551000 }, { "epoch": 27.65, "learning_rate": 3.618116084660986e-05, "loss": 2.1137, "step": 9551500 }, { "epoch": 27.65, "learning_rate": 3.618043719896258e-05, "loss": 2.1199, "step": 9552000 }, { "epoch": 27.65, "learning_rate": 3.6179713551315305e-05, "loss": 2.1365, "step": 9552500 }, { "epoch": 27.65, "learning_rate": 3.617898990366803e-05, "loss": 2.1416, "step": 9553000 }, { "epoch": 27.65, "learning_rate": 3.617826625602075e-05, "loss": 2.136, "step": 9553500 }, { "epoch": 27.65, "learning_rate": 3.6177544055668765e-05, "loss": 2.1532, "step": 9554000 }, { "epoch": 27.66, "learning_rate": 3.617682040802149e-05, "loss": 2.129, "step": 9554500 }, { "epoch": 27.66, "learning_rate": 3.617609676037421e-05, "loss": 2.1294, "step": 9555000 }, { "epoch": 27.66, "learning_rate": 3.617537311272694e-05, "loss": 2.1228, "step": 9555500 }, { "epoch": 27.66, "learning_rate": 3.617464946507966e-05, "loss": 2.1441, "step": 9556000 }, { "epoch": 27.66, "learning_rate": 3.617392581743238e-05, "loss": 2.1113, "step": 9556500 }, { "epoch": 27.66, "learning_rate": 3.617320216978511e-05, "loss": 2.1351, "step": 9557000 }, { "epoch": 27.67, "learning_rate": 3.6172478522137834e-05, "loss": 2.1173, "step": 9557500 }, { "epoch": 27.67, "learning_rate": 3.6171754874490556e-05, "loss": 2.1271, "step": 9558000 }, { "epoch": 27.67, "learning_rate": 3.617103122684328e-05, "loss": 2.1384, "step": 9558500 }, { "epoch": 27.67, "learning_rate": 3.6170307579196e-05, "loss": 2.1536, "step": 9559000 }, { "epoch": 27.67, "learning_rate": 3.616958393154872e-05, "loss": 2.1392, "step": 9559500 }, { "epoch": 27.67, "learning_rate": 3.6168860283901445e-05, "loss": 2.1494, "step": 9560000 }, { "epoch": 27.67, "learning_rate": 3.616813663625417e-05, "loss": 2.1201, "step": 9560500 }, { "epoch": 27.68, "learning_rate": 3.616741443590218e-05, "loss": 2.1564, "step": 9561000 }, { "epoch": 27.68, "learning_rate": 3.616669078825491e-05, "loss": 2.1187, "step": 9561500 }, { "epoch": 27.68, "learning_rate": 3.6165967140607634e-05, "loss": 2.1649, "step": 9562000 }, { "epoch": 27.68, "learning_rate": 3.616524349296036e-05, "loss": 2.1708, "step": 9562500 }, { "epoch": 27.68, "learning_rate": 3.6164519845313085e-05, "loss": 2.1444, "step": 9563000 }, { "epoch": 27.68, "learning_rate": 3.616379619766581e-05, "loss": 2.1387, "step": 9563500 }, { "epoch": 27.68, "learning_rate": 3.616307255001853e-05, "loss": 2.1207, "step": 9564000 }, { "epoch": 27.69, "learning_rate": 3.616234890237125e-05, "loss": 2.1487, "step": 9564500 }, { "epoch": 27.69, "learning_rate": 3.6161625254723974e-05, "loss": 2.1186, "step": 9565000 }, { "epoch": 27.69, "learning_rate": 3.6160901607076696e-05, "loss": 2.1254, "step": 9565500 }, { "epoch": 27.69, "learning_rate": 3.616017940672471e-05, "loss": 2.1133, "step": 9566000 }, { "epoch": 27.69, "learning_rate": 3.6159455759077434e-05, "loss": 2.1251, "step": 9566500 }, { "epoch": 27.69, "learning_rate": 3.615873211143016e-05, "loss": 2.1488, "step": 9567000 }, { "epoch": 27.69, "learning_rate": 3.6158008463782885e-05, "loss": 2.1059, "step": 9567500 }, { "epoch": 27.7, "learning_rate": 3.615728481613561e-05, "loss": 2.1446, "step": 9568000 }, { "epoch": 27.7, "learning_rate": 3.615656116848833e-05, "loss": 2.1164, "step": 9568500 }, { "epoch": 27.7, "learning_rate": 3.615584041543164e-05, "loss": 2.1339, "step": 9569000 }, { "epoch": 27.7, "learning_rate": 3.615511676778436e-05, "loss": 2.1331, "step": 9569500 }, { "epoch": 27.7, "learning_rate": 3.615439312013709e-05, "loss": 2.1381, "step": 9570000 }, { "epoch": 27.7, "learning_rate": 3.615366947248981e-05, "loss": 2.1504, "step": 9570500 }, { "epoch": 27.7, "learning_rate": 3.615294582484254e-05, "loss": 2.1334, "step": 9571000 }, { "epoch": 27.71, "learning_rate": 3.6152222177195263e-05, "loss": 2.1107, "step": 9571500 }, { "epoch": 27.71, "learning_rate": 3.6151498529547986e-05, "loss": 2.1297, "step": 9572000 }, { "epoch": 27.71, "learning_rate": 3.6150776329196e-05, "loss": 2.1263, "step": 9572500 }, { "epoch": 27.71, "learning_rate": 3.6150052681548724e-05, "loss": 2.1453, "step": 9573000 }, { "epoch": 27.71, "learning_rate": 3.6149329033901446e-05, "loss": 2.1341, "step": 9573500 }, { "epoch": 27.71, "learning_rate": 3.614860538625417e-05, "loss": 2.1402, "step": 9574000 }, { "epoch": 27.71, "learning_rate": 3.614788318590219e-05, "loss": 2.1449, "step": 9574500 }, { "epoch": 27.72, "learning_rate": 3.614715953825491e-05, "loss": 2.1377, "step": 9575000 }, { "epoch": 27.72, "learning_rate": 3.614643733790293e-05, "loss": 2.113, "step": 9575500 }, { "epoch": 27.72, "learning_rate": 3.614571369025565e-05, "loss": 2.1473, "step": 9576000 }, { "epoch": 27.72, "learning_rate": 3.614499004260837e-05, "loss": 2.1225, "step": 9576500 }, { "epoch": 27.72, "learning_rate": 3.6144266394961095e-05, "loss": 2.1249, "step": 9577000 }, { "epoch": 27.72, "learning_rate": 3.6143542747313824e-05, "loss": 2.1205, "step": 9577500 }, { "epoch": 27.72, "learning_rate": 3.6142819099666546e-05, "loss": 2.1276, "step": 9578000 }, { "epoch": 27.73, "learning_rate": 3.614209545201927e-05, "loss": 2.1068, "step": 9578500 }, { "epoch": 27.73, "learning_rate": 3.614137180437199e-05, "loss": 2.1187, "step": 9579000 }, { "epoch": 27.73, "learning_rate": 3.614064815672471e-05, "loss": 2.1292, "step": 9579500 }, { "epoch": 27.73, "learning_rate": 3.613992450907744e-05, "loss": 2.1332, "step": 9580000 }, { "epoch": 27.73, "learning_rate": 3.6139200861430164e-05, "loss": 2.1407, "step": 9580500 }, { "epoch": 27.73, "learning_rate": 3.6138477213782886e-05, "loss": 2.1521, "step": 9581000 }, { "epoch": 27.73, "learning_rate": 3.61377550134309e-05, "loss": 2.1114, "step": 9581500 }, { "epoch": 27.74, "learning_rate": 3.6137031365783624e-05, "loss": 2.1432, "step": 9582000 }, { "epoch": 27.74, "learning_rate": 3.6136307718136346e-05, "loss": 2.1442, "step": 9582500 }, { "epoch": 27.74, "learning_rate": 3.613558407048907e-05, "loss": 2.1274, "step": 9583000 }, { "epoch": 27.74, "learning_rate": 3.613486187013709e-05, "loss": 2.1041, "step": 9583500 }, { "epoch": 27.74, "learning_rate": 3.613413822248981e-05, "loss": 2.1089, "step": 9584000 }, { "epoch": 27.74, "learning_rate": 3.6133414574842535e-05, "loss": 2.1186, "step": 9584500 }, { "epoch": 27.74, "learning_rate": 3.6132690927195264e-05, "loss": 2.1394, "step": 9585000 }, { "epoch": 27.75, "learning_rate": 3.6131967279547987e-05, "loss": 2.1353, "step": 9585500 }, { "epoch": 27.75, "learning_rate": 3.613124363190071e-05, "loss": 2.1187, "step": 9586000 }, { "epoch": 27.75, "learning_rate": 3.613051998425343e-05, "loss": 2.1251, "step": 9586500 }, { "epoch": 27.75, "learning_rate": 3.612979633660615e-05, "loss": 2.1643, "step": 9587000 }, { "epoch": 27.75, "learning_rate": 3.6129072688958875e-05, "loss": 2.1161, "step": 9587500 }, { "epoch": 27.75, "learning_rate": 3.61283490413116e-05, "loss": 2.1215, "step": 9588000 }, { "epoch": 27.75, "learning_rate": 3.612762539366432e-05, "loss": 2.1219, "step": 9588500 }, { "epoch": 27.76, "learning_rate": 3.612690174601704e-05, "loss": 2.1098, "step": 9589000 }, { "epoch": 27.76, "learning_rate": 3.6126178098369764e-05, "loss": 2.1078, "step": 9589500 }, { "epoch": 27.76, "learning_rate": 3.612545445072249e-05, "loss": 2.1452, "step": 9590000 }, { "epoch": 27.76, "learning_rate": 3.612473225037051e-05, "loss": 2.1479, "step": 9590500 }, { "epoch": 27.76, "learning_rate": 3.612400860272323e-05, "loss": 2.1195, "step": 9591000 }, { "epoch": 27.76, "learning_rate": 3.612328640237125e-05, "loss": 2.1112, "step": 9591500 }, { "epoch": 27.76, "learning_rate": 3.612256275472397e-05, "loss": 2.1198, "step": 9592000 }, { "epoch": 27.77, "learning_rate": 3.61218391070767e-05, "loss": 2.1215, "step": 9592500 }, { "epoch": 27.77, "learning_rate": 3.612111545942942e-05, "loss": 2.1268, "step": 9593000 }, { "epoch": 27.77, "learning_rate": 3.612039181178214e-05, "loss": 2.1409, "step": 9593500 }, { "epoch": 27.77, "learning_rate": 3.6119669611430165e-05, "loss": 2.1392, "step": 9594000 }, { "epoch": 27.77, "learning_rate": 3.611894596378289e-05, "loss": 2.1109, "step": 9594500 }, { "epoch": 27.77, "learning_rate": 3.611822231613561e-05, "loss": 2.1669, "step": 9595000 }, { "epoch": 27.78, "learning_rate": 3.611749866848833e-05, "loss": 2.1258, "step": 9595500 }, { "epoch": 27.78, "learning_rate": 3.6116775020841054e-05, "loss": 2.15, "step": 9596000 }, { "epoch": 27.78, "learning_rate": 3.6116051373193776e-05, "loss": 2.1218, "step": 9596500 }, { "epoch": 27.78, "learning_rate": 3.61153277255465e-05, "loss": 2.1138, "step": 9597000 }, { "epoch": 27.78, "learning_rate": 3.611460407789922e-05, "loss": 2.1274, "step": 9597500 }, { "epoch": 27.78, "learning_rate": 3.611388043025194e-05, "loss": 2.0926, "step": 9598000 }, { "epoch": 27.78, "learning_rate": 3.6113156782604665e-05, "loss": 2.1139, "step": 9598500 }, { "epoch": 27.79, "learning_rate": 3.611243458225269e-05, "loss": 2.1334, "step": 9599000 }, { "epoch": 27.79, "learning_rate": 3.6111710934605416e-05, "loss": 2.1543, "step": 9599500 }, { "epoch": 27.79, "learning_rate": 3.6110990181548725e-05, "loss": 2.107, "step": 9600000 }, { "epoch": 27.79, "learning_rate": 3.611026653390145e-05, "loss": 2.1197, "step": 9600500 }, { "epoch": 27.79, "learning_rate": 3.610954288625417e-05, "loss": 2.123, "step": 9601000 }, { "epoch": 27.79, "learning_rate": 3.610881923860689e-05, "loss": 2.1228, "step": 9601500 }, { "epoch": 27.79, "learning_rate": 3.610809559095962e-05, "loss": 2.1338, "step": 9602000 }, { "epoch": 27.8, "learning_rate": 3.610737194331234e-05, "loss": 2.1162, "step": 9602500 }, { "epoch": 27.8, "learning_rate": 3.6106648295665065e-05, "loss": 2.1359, "step": 9603000 }, { "epoch": 27.8, "learning_rate": 3.610592464801779e-05, "loss": 2.1264, "step": 9603500 }, { "epoch": 27.8, "learning_rate": 3.610520100037051e-05, "loss": 2.1134, "step": 9604000 }, { "epoch": 27.8, "learning_rate": 3.610447735272323e-05, "loss": 2.1314, "step": 9604500 }, { "epoch": 27.8, "learning_rate": 3.610375515237125e-05, "loss": 2.1377, "step": 9605000 }, { "epoch": 27.8, "learning_rate": 3.610303150472397e-05, "loss": 2.1348, "step": 9605500 }, { "epoch": 27.81, "learning_rate": 3.610230785707669e-05, "loss": 2.1335, "step": 9606000 }, { "epoch": 27.81, "learning_rate": 3.610158420942942e-05, "loss": 2.1015, "step": 9606500 }, { "epoch": 27.81, "learning_rate": 3.610086056178214e-05, "loss": 2.1356, "step": 9607000 }, { "epoch": 27.81, "learning_rate": 3.610013691413487e-05, "loss": 2.1391, "step": 9607500 }, { "epoch": 27.81, "learning_rate": 3.6099413266487594e-05, "loss": 2.1297, "step": 9608000 }, { "epoch": 27.81, "learning_rate": 3.609868961884032e-05, "loss": 2.1328, "step": 9608500 }, { "epoch": 27.81, "learning_rate": 3.609796597119304e-05, "loss": 2.1067, "step": 9609000 }, { "epoch": 27.82, "learning_rate": 3.6097243770841055e-05, "loss": 2.15, "step": 9609500 }, { "epoch": 27.82, "learning_rate": 3.609652157048907e-05, "loss": 2.1284, "step": 9610000 }, { "epoch": 27.82, "learning_rate": 3.609579792284179e-05, "loss": 2.1375, "step": 9610500 }, { "epoch": 27.82, "learning_rate": 3.609507427519452e-05, "loss": 2.1493, "step": 9611000 }, { "epoch": 27.82, "learning_rate": 3.6094350627547244e-05, "loss": 2.1137, "step": 9611500 }, { "epoch": 27.82, "learning_rate": 3.609362842719526e-05, "loss": 2.1391, "step": 9612000 }, { "epoch": 27.82, "learning_rate": 3.609290477954798e-05, "loss": 2.1063, "step": 9612500 }, { "epoch": 27.83, "learning_rate": 3.6092181131900704e-05, "loss": 2.1398, "step": 9613000 }, { "epoch": 27.83, "learning_rate": 3.6091457484253426e-05, "loss": 2.1307, "step": 9613500 }, { "epoch": 27.83, "learning_rate": 3.609073383660615e-05, "loss": 2.1197, "step": 9614000 }, { "epoch": 27.83, "learning_rate": 3.609001018895887e-05, "loss": 2.1372, "step": 9614500 }, { "epoch": 27.83, "learning_rate": 3.60892865413116e-05, "loss": 2.1388, "step": 9615000 }, { "epoch": 27.83, "learning_rate": 3.608856289366432e-05, "loss": 2.1187, "step": 9615500 }, { "epoch": 27.83, "learning_rate": 3.6087839246017044e-05, "loss": 2.1265, "step": 9616000 }, { "epoch": 27.84, "learning_rate": 3.608711559836977e-05, "loss": 2.1271, "step": 9616500 }, { "epoch": 27.84, "learning_rate": 3.6086391950722495e-05, "loss": 2.1419, "step": 9617000 }, { "epoch": 27.84, "learning_rate": 3.608566830307522e-05, "loss": 2.1247, "step": 9617500 }, { "epoch": 27.84, "learning_rate": 3.608494610272323e-05, "loss": 2.1075, "step": 9618000 }, { "epoch": 27.84, "learning_rate": 3.6084222455075955e-05, "loss": 2.1124, "step": 9618500 }, { "epoch": 27.84, "learning_rate": 3.608349880742868e-05, "loss": 2.1225, "step": 9619000 }, { "epoch": 27.84, "learning_rate": 3.60827751597814e-05, "loss": 2.1242, "step": 9619500 }, { "epoch": 27.85, "learning_rate": 3.608205151213412e-05, "loss": 2.1294, "step": 9620000 }, { "epoch": 27.85, "learning_rate": 3.6081327864486844e-05, "loss": 2.13, "step": 9620500 }, { "epoch": 27.85, "learning_rate": 3.608060421683957e-05, "loss": 2.1577, "step": 9621000 }, { "epoch": 27.85, "learning_rate": 3.6079880569192295e-05, "loss": 2.1386, "step": 9621500 }, { "epoch": 27.85, "learning_rate": 3.6079156921545024e-05, "loss": 2.1183, "step": 9622000 }, { "epoch": 27.85, "learning_rate": 3.6078433273897746e-05, "loss": 2.1544, "step": 9622500 }, { "epoch": 27.85, "learning_rate": 3.607770962625047e-05, "loss": 2.1098, "step": 9623000 }, { "epoch": 27.86, "learning_rate": 3.607698597860319e-05, "loss": 2.1281, "step": 9623500 }, { "epoch": 27.86, "learning_rate": 3.6076263778251206e-05, "loss": 2.1417, "step": 9624000 }, { "epoch": 27.86, "learning_rate": 3.607554013060393e-05, "loss": 2.1485, "step": 9624500 }, { "epoch": 27.86, "learning_rate": 3.607481648295665e-05, "loss": 2.1342, "step": 9625000 }, { "epoch": 27.86, "learning_rate": 3.607409283530937e-05, "loss": 2.1379, "step": 9625500 }, { "epoch": 27.86, "learning_rate": 3.6073369187662095e-05, "loss": 2.1298, "step": 9626000 }, { "epoch": 27.86, "learning_rate": 3.607264698731012e-05, "loss": 2.1281, "step": 9626500 }, { "epoch": 27.87, "learning_rate": 3.607192478695813e-05, "loss": 2.1611, "step": 9627000 }, { "epoch": 27.87, "learning_rate": 3.6071201139310855e-05, "loss": 2.1368, "step": 9627500 }, { "epoch": 27.87, "learning_rate": 3.607047749166358e-05, "loss": 2.1243, "step": 9628000 }, { "epoch": 27.87, "learning_rate": 3.60697538440163e-05, "loss": 2.1295, "step": 9628500 }, { "epoch": 27.87, "learning_rate": 3.606903019636902e-05, "loss": 2.1619, "step": 9629000 }, { "epoch": 27.87, "learning_rate": 3.606830654872175e-05, "loss": 2.1307, "step": 9629500 }, { "epoch": 27.87, "learning_rate": 3.6067582901074473e-05, "loss": 2.1454, "step": 9630000 }, { "epoch": 27.88, "learning_rate": 3.6066860700722496e-05, "loss": 2.1367, "step": 9630500 }, { "epoch": 27.88, "learning_rate": 3.606613705307522e-05, "loss": 2.1338, "step": 9631000 }, { "epoch": 27.88, "learning_rate": 3.606541340542794e-05, "loss": 2.1179, "step": 9631500 }, { "epoch": 27.88, "learning_rate": 3.606468975778066e-05, "loss": 2.1327, "step": 9632000 }, { "epoch": 27.88, "learning_rate": 3.6063966110133385e-05, "loss": 2.1236, "step": 9632500 }, { "epoch": 27.88, "learning_rate": 3.606324246248611e-05, "loss": 2.122, "step": 9633000 }, { "epoch": 27.89, "learning_rate": 3.606251881483883e-05, "loss": 2.1371, "step": 9633500 }, { "epoch": 27.89, "learning_rate": 3.606179516719155e-05, "loss": 2.1099, "step": 9634000 }, { "epoch": 27.89, "learning_rate": 3.6061071519544274e-05, "loss": 2.1343, "step": 9634500 }, { "epoch": 27.89, "learning_rate": 3.6060347871896996e-05, "loss": 2.134, "step": 9635000 }, { "epoch": 27.89, "learning_rate": 3.6059624224249725e-05, "loss": 2.1288, "step": 9635500 }, { "epoch": 27.89, "learning_rate": 3.605890057660245e-05, "loss": 2.1352, "step": 9636000 }, { "epoch": 27.89, "learning_rate": 3.6058176928955176e-05, "loss": 2.107, "step": 9636500 }, { "epoch": 27.9, "learning_rate": 3.605745472860319e-05, "loss": 2.1166, "step": 9637000 }, { "epoch": 27.9, "learning_rate": 3.605673252825121e-05, "loss": 2.1099, "step": 9637500 }, { "epoch": 27.9, "learning_rate": 3.605600888060393e-05, "loss": 2.1298, "step": 9638000 }, { "epoch": 27.9, "learning_rate": 3.605528523295665e-05, "loss": 2.1625, "step": 9638500 }, { "epoch": 27.9, "learning_rate": 3.6054561585309374e-05, "loss": 2.1411, "step": 9639000 }, { "epoch": 27.9, "learning_rate": 3.6053837937662096e-05, "loss": 2.1446, "step": 9639500 }, { "epoch": 27.9, "learning_rate": 3.6053114290014825e-05, "loss": 2.1408, "step": 9640000 }, { "epoch": 27.91, "learning_rate": 3.605239208966284e-05, "loss": 2.0942, "step": 9640500 }, { "epoch": 27.91, "learning_rate": 3.605166844201556e-05, "loss": 2.1201, "step": 9641000 }, { "epoch": 27.91, "learning_rate": 3.6050944794368285e-05, "loss": 2.1447, "step": 9641500 }, { "epoch": 27.91, "learning_rate": 3.605022114672101e-05, "loss": 2.107, "step": 9642000 }, { "epoch": 27.91, "learning_rate": 3.604949894636902e-05, "loss": 2.1312, "step": 9642500 }, { "epoch": 27.91, "learning_rate": 3.604877529872175e-05, "loss": 2.1211, "step": 9643000 }, { "epoch": 27.91, "learning_rate": 3.6048051651074474e-05, "loss": 2.1299, "step": 9643500 }, { "epoch": 27.92, "learning_rate": 3.604732945072249e-05, "loss": 2.1183, "step": 9644000 }, { "epoch": 27.92, "learning_rate": 3.604660580307522e-05, "loss": 2.1179, "step": 9644500 }, { "epoch": 27.92, "learning_rate": 3.604588215542794e-05, "loss": 2.1398, "step": 9645000 }, { "epoch": 27.92, "learning_rate": 3.604515850778066e-05, "loss": 2.1322, "step": 9645500 }, { "epoch": 27.92, "learning_rate": 3.6044434860133386e-05, "loss": 2.1187, "step": 9646000 }, { "epoch": 27.92, "learning_rate": 3.60437126597814e-05, "loss": 2.1373, "step": 9646500 }, { "epoch": 27.92, "learning_rate": 3.604298901213412e-05, "loss": 2.1187, "step": 9647000 }, { "epoch": 27.93, "learning_rate": 3.604226536448685e-05, "loss": 2.1388, "step": 9647500 }, { "epoch": 27.93, "learning_rate": 3.6041541716839575e-05, "loss": 2.1416, "step": 9648000 }, { "epoch": 27.93, "learning_rate": 3.604081951648759e-05, "loss": 2.1082, "step": 9648500 }, { "epoch": 27.93, "learning_rate": 3.6040097316135606e-05, "loss": 2.1092, "step": 9649000 }, { "epoch": 27.93, "learning_rate": 3.603937366848833e-05, "loss": 2.1464, "step": 9649500 }, { "epoch": 27.93, "learning_rate": 3.603865002084105e-05, "loss": 2.1291, "step": 9650000 }, { "epoch": 27.93, "learning_rate": 3.603792637319377e-05, "loss": 2.1387, "step": 9650500 }, { "epoch": 27.94, "learning_rate": 3.60372027255465e-05, "loss": 2.1787, "step": 9651000 }, { "epoch": 27.94, "learning_rate": 3.6036479077899224e-05, "loss": 2.1305, "step": 9651500 }, { "epoch": 27.94, "learning_rate": 3.603575543025195e-05, "loss": 2.1446, "step": 9652000 }, { "epoch": 27.94, "learning_rate": 3.603503322989997e-05, "loss": 2.1378, "step": 9652500 }, { "epoch": 27.94, "learning_rate": 3.603430958225269e-05, "loss": 2.1284, "step": 9653000 }, { "epoch": 27.94, "learning_rate": 3.603358593460541e-05, "loss": 2.123, "step": 9653500 }, { "epoch": 27.94, "learning_rate": 3.6032862286958135e-05, "loss": 2.1125, "step": 9654000 }, { "epoch": 27.95, "learning_rate": 3.603213863931086e-05, "loss": 2.1343, "step": 9654500 }, { "epoch": 27.95, "learning_rate": 3.603141499166358e-05, "loss": 2.1392, "step": 9655000 }, { "epoch": 27.95, "learning_rate": 3.60306913440163e-05, "loss": 2.1062, "step": 9655500 }, { "epoch": 27.95, "learning_rate": 3.6029967696369024e-05, "loss": 2.1388, "step": 9656000 }, { "epoch": 27.95, "learning_rate": 3.602924404872175e-05, "loss": 2.1242, "step": 9656500 }, { "epoch": 27.95, "learning_rate": 3.6028520401074475e-05, "loss": 2.1361, "step": 9657000 }, { "epoch": 27.95, "learning_rate": 3.60277967534272e-05, "loss": 2.1455, "step": 9657500 }, { "epoch": 27.96, "learning_rate": 3.602707455307521e-05, "loss": 2.1104, "step": 9658000 }, { "epoch": 27.96, "learning_rate": 3.602635235272323e-05, "loss": 2.1188, "step": 9658500 }, { "epoch": 27.96, "learning_rate": 3.602562870507595e-05, "loss": 2.1517, "step": 9659000 }, { "epoch": 27.96, "learning_rate": 3.602490505742868e-05, "loss": 2.1288, "step": 9659500 }, { "epoch": 27.96, "learning_rate": 3.60241814097814e-05, "loss": 2.1118, "step": 9660000 }, { "epoch": 27.96, "learning_rate": 3.6023457762134124e-05, "loss": 2.1258, "step": 9660500 }, { "epoch": 27.96, "learning_rate": 3.602273411448685e-05, "loss": 2.1268, "step": 9661000 }, { "epoch": 27.97, "learning_rate": 3.6022010466839575e-05, "loss": 2.1035, "step": 9661500 }, { "epoch": 27.97, "learning_rate": 3.60212868191923e-05, "loss": 2.1396, "step": 9662000 }, { "epoch": 27.97, "learning_rate": 3.602056461884031e-05, "loss": 2.1087, "step": 9662500 }, { "epoch": 27.97, "learning_rate": 3.6019840971193035e-05, "loss": 2.1088, "step": 9663000 }, { "epoch": 27.97, "learning_rate": 3.601911732354576e-05, "loss": 2.1218, "step": 9663500 }, { "epoch": 27.97, "learning_rate": 3.601839367589848e-05, "loss": 2.1318, "step": 9664000 }, { "epoch": 27.97, "learning_rate": 3.60176700282512e-05, "loss": 2.1261, "step": 9664500 }, { "epoch": 27.98, "learning_rate": 3.6016946380603924e-05, "loss": 2.1323, "step": 9665000 }, { "epoch": 27.98, "learning_rate": 3.601622273295665e-05, "loss": 2.1172, "step": 9665500 }, { "epoch": 27.98, "learning_rate": 3.6015499085309376e-05, "loss": 2.1254, "step": 9666000 }, { "epoch": 27.98, "learning_rate": 3.60147754376621e-05, "loss": 2.1237, "step": 9666500 }, { "epoch": 27.98, "learning_rate": 3.601405179001483e-05, "loss": 2.1486, "step": 9667000 }, { "epoch": 27.98, "learning_rate": 3.601332814236755e-05, "loss": 2.1296, "step": 9667500 }, { "epoch": 27.98, "learning_rate": 3.601260449472027e-05, "loss": 2.1071, "step": 9668000 }, { "epoch": 27.99, "learning_rate": 3.601188229436829e-05, "loss": 2.1483, "step": 9668500 }, { "epoch": 27.99, "learning_rate": 3.60111600940163e-05, "loss": 2.1151, "step": 9669000 }, { "epoch": 27.99, "learning_rate": 3.601043644636903e-05, "loss": 2.1529, "step": 9669500 }, { "epoch": 27.99, "learning_rate": 3.6009712798721754e-05, "loss": 2.1361, "step": 9670000 }, { "epoch": 27.99, "learning_rate": 3.6008989151074476e-05, "loss": 2.1446, "step": 9670500 }, { "epoch": 27.99, "learning_rate": 3.60082655034272e-05, "loss": 2.1146, "step": 9671000 }, { "epoch": 28.0, "learning_rate": 3.600754185577992e-05, "loss": 2.1497, "step": 9671500 }, { "epoch": 28.0, "learning_rate": 3.600681820813264e-05, "loss": 2.1167, "step": 9672000 }, { "epoch": 28.0, "learning_rate": 3.600609600778066e-05, "loss": 2.1237, "step": 9672500 }, { "epoch": 28.0, "learning_rate": 3.600537236013338e-05, "loss": 2.1563, "step": 9673000 }, { "epoch": 28.0, "eval_accuracy": 0.6667694438971397, "eval_accuracy_mlm": 0.6312277095277679, "eval_accuracy_nsp": 0.8573396030120123, "eval_loss": 2.182663917541504, "eval_runtime": 331.438, "eval_samples_per_second": 1316.645, "eval_steps_per_second": 54.861, "step": 9673216 }, { "epoch": 28.0, "learning_rate": 3.60046487124861e-05, "loss": 2.1061, "step": 9673500 }, { "epoch": 28.0, "learning_rate": 3.600392506483883e-05, "loss": 2.1082, "step": 9674000 }, { "epoch": 28.0, "learning_rate": 3.6003201417191554e-05, "loss": 2.1198, "step": 9674500 }, { "epoch": 28.01, "learning_rate": 3.600247776954428e-05, "loss": 2.1286, "step": 9675000 }, { "epoch": 28.01, "learning_rate": 3.6001754121897005e-05, "loss": 2.1053, "step": 9675500 }, { "epoch": 28.01, "learning_rate": 3.600103047424973e-05, "loss": 2.1165, "step": 9676000 }, { "epoch": 28.01, "learning_rate": 3.600030827389774e-05, "loss": 2.0749, "step": 9676500 }, { "epoch": 28.01, "learning_rate": 3.5999584626250465e-05, "loss": 2.107, "step": 9677000 }, { "epoch": 28.01, "learning_rate": 3.599886097860319e-05, "loss": 2.1012, "step": 9677500 }, { "epoch": 28.01, "learning_rate": 3.599813733095591e-05, "loss": 2.074, "step": 9678000 }, { "epoch": 28.02, "learning_rate": 3.599741513060393e-05, "loss": 2.0736, "step": 9678500 }, { "epoch": 28.02, "learning_rate": 3.5996691482956654e-05, "loss": 2.1095, "step": 9679000 }, { "epoch": 28.02, "learning_rate": 3.5995967835309376e-05, "loss": 2.1076, "step": 9679500 }, { "epoch": 28.02, "learning_rate": 3.59952441876621e-05, "loss": 2.0927, "step": 9680000 }, { "epoch": 28.02, "learning_rate": 3.599452054001482e-05, "loss": 2.0745, "step": 9680500 }, { "epoch": 28.02, "learning_rate": 3.599379689236754e-05, "loss": 2.1202, "step": 9681000 }, { "epoch": 28.02, "learning_rate": 3.599307324472027e-05, "loss": 2.1223, "step": 9681500 }, { "epoch": 28.03, "learning_rate": 3.599235104436829e-05, "loss": 2.0964, "step": 9682000 }, { "epoch": 28.03, "learning_rate": 3.599162739672101e-05, "loss": 2.099, "step": 9682500 }, { "epoch": 28.03, "learning_rate": 3.599090374907373e-05, "loss": 2.1141, "step": 9683000 }, { "epoch": 28.03, "learning_rate": 3.5990180101426454e-05, "loss": 2.1131, "step": 9683500 }, { "epoch": 28.03, "learning_rate": 3.598945645377918e-05, "loss": 2.1317, "step": 9684000 }, { "epoch": 28.03, "learning_rate": 3.59887342534272e-05, "loss": 2.0862, "step": 9684500 }, { "epoch": 28.03, "learning_rate": 3.598801060577992e-05, "loss": 2.1147, "step": 9685000 }, { "epoch": 28.04, "learning_rate": 3.598728695813264e-05, "loss": 2.0986, "step": 9685500 }, { "epoch": 28.04, "learning_rate": 3.5986563310485366e-05, "loss": 2.1117, "step": 9686000 }, { "epoch": 28.04, "learning_rate": 3.598583966283809e-05, "loss": 2.1287, "step": 9686500 }, { "epoch": 28.04, "learning_rate": 3.59851174624861e-05, "loss": 2.1126, "step": 9687000 }, { "epoch": 28.04, "learning_rate": 3.598439381483883e-05, "loss": 2.097, "step": 9687500 }, { "epoch": 28.04, "learning_rate": 3.598367161448685e-05, "loss": 2.1014, "step": 9688000 }, { "epoch": 28.04, "learning_rate": 3.598294796683957e-05, "loss": 2.1132, "step": 9688500 }, { "epoch": 28.05, "learning_rate": 3.598222431919229e-05, "loss": 2.0837, "step": 9689000 }, { "epoch": 28.05, "learning_rate": 3.598150067154502e-05, "loss": 2.1264, "step": 9689500 }, { "epoch": 28.05, "learning_rate": 3.5980777023897744e-05, "loss": 2.1075, "step": 9690000 }, { "epoch": 28.05, "learning_rate": 3.5980053376250466e-05, "loss": 2.1201, "step": 9690500 }, { "epoch": 28.05, "learning_rate": 3.597932972860319e-05, "loss": 2.1236, "step": 9691000 }, { "epoch": 28.05, "learning_rate": 3.597860608095591e-05, "loss": 2.1086, "step": 9691500 }, { "epoch": 28.05, "learning_rate": 3.597788243330863e-05, "loss": 2.1238, "step": 9692000 }, { "epoch": 28.06, "learning_rate": 3.597716168025195e-05, "loss": 2.1132, "step": 9692500 }, { "epoch": 28.06, "learning_rate": 3.597643803260467e-05, "loss": 2.1133, "step": 9693000 }, { "epoch": 28.06, "learning_rate": 3.597571438495739e-05, "loss": 2.1301, "step": 9693500 }, { "epoch": 28.06, "learning_rate": 3.597499218460541e-05, "loss": 2.1253, "step": 9694000 }, { "epoch": 28.06, "learning_rate": 3.597426853695813e-05, "loss": 2.1069, "step": 9694500 }, { "epoch": 28.06, "learning_rate": 3.597354488931086e-05, "loss": 2.1051, "step": 9695000 }, { "epoch": 28.06, "learning_rate": 3.597282124166358e-05, "loss": 2.1015, "step": 9695500 }, { "epoch": 28.07, "learning_rate": 3.5972097594016304e-05, "loss": 2.1002, "step": 9696000 }, { "epoch": 28.07, "learning_rate": 3.5971373946369026e-05, "loss": 2.1073, "step": 9696500 }, { "epoch": 28.07, "learning_rate": 3.5970650298721755e-05, "loss": 2.113, "step": 9697000 }, { "epoch": 28.07, "learning_rate": 3.596992665107448e-05, "loss": 2.1286, "step": 9697500 }, { "epoch": 28.07, "learning_rate": 3.59692030034272e-05, "loss": 2.0978, "step": 9698000 }, { "epoch": 28.07, "learning_rate": 3.596847935577992e-05, "loss": 2.1021, "step": 9698500 }, { "epoch": 28.07, "learning_rate": 3.5967755708132644e-05, "loss": 2.1171, "step": 9699000 }, { "epoch": 28.08, "learning_rate": 3.5967032060485366e-05, "loss": 2.1204, "step": 9699500 }, { "epoch": 28.08, "learning_rate": 3.596630841283809e-05, "loss": 2.1066, "step": 9700000 }, { "epoch": 28.08, "learning_rate": 3.596558476519081e-05, "loss": 2.1341, "step": 9700500 }, { "epoch": 28.08, "learning_rate": 3.596486111754353e-05, "loss": 2.0861, "step": 9701000 }, { "epoch": 28.08, "learning_rate": 3.5964137469896255e-05, "loss": 2.1171, "step": 9701500 }, { "epoch": 28.08, "learning_rate": 3.5963413822248984e-05, "loss": 2.1173, "step": 9702000 }, { "epoch": 28.08, "learning_rate": 3.5962690174601707e-05, "loss": 2.1187, "step": 9702500 }, { "epoch": 28.09, "learning_rate": 3.596196652695443e-05, "loss": 2.1059, "step": 9703000 }, { "epoch": 28.09, "learning_rate": 3.596124287930715e-05, "loss": 2.1111, "step": 9703500 }, { "epoch": 28.09, "learning_rate": 3.596051923165988e-05, "loss": 2.0957, "step": 9704000 }, { "epoch": 28.09, "learning_rate": 3.59597955840126e-05, "loss": 2.0935, "step": 9704500 }, { "epoch": 28.09, "learning_rate": 3.595907338366062e-05, "loss": 2.1162, "step": 9705000 }, { "epoch": 28.09, "learning_rate": 3.595834973601334e-05, "loss": 2.0782, "step": 9705500 }, { "epoch": 28.09, "learning_rate": 3.595762608836606e-05, "loss": 2.1191, "step": 9706000 }, { "epoch": 28.1, "learning_rate": 3.5956902440718784e-05, "loss": 2.111, "step": 9706500 }, { "epoch": 28.1, "learning_rate": 3.595617879307151e-05, "loss": 2.1414, "step": 9707000 }, { "epoch": 28.1, "learning_rate": 3.5955455145424236e-05, "loss": 2.1114, "step": 9707500 }, { "epoch": 28.1, "learning_rate": 3.595473294507225e-05, "loss": 2.1124, "step": 9708000 }, { "epoch": 28.1, "learning_rate": 3.5954009297424974e-05, "loss": 2.122, "step": 9708500 }, { "epoch": 28.1, "learning_rate": 3.5953285649777696e-05, "loss": 2.1112, "step": 9709000 }, { "epoch": 28.11, "learning_rate": 3.595256200213042e-05, "loss": 2.1096, "step": 9709500 }, { "epoch": 28.11, "learning_rate": 3.5951839801778434e-05, "loss": 2.0982, "step": 9710000 }, { "epoch": 28.11, "learning_rate": 3.595111615413116e-05, "loss": 2.1295, "step": 9710500 }, { "epoch": 28.11, "learning_rate": 3.5950392506483885e-05, "loss": 2.1374, "step": 9711000 }, { "epoch": 28.11, "learning_rate": 3.594967030613191e-05, "loss": 2.1336, "step": 9711500 }, { "epoch": 28.11, "learning_rate": 3.594894665848463e-05, "loss": 2.0976, "step": 9712000 }, { "epoch": 28.11, "learning_rate": 3.594822301083735e-05, "loss": 2.1118, "step": 9712500 }, { "epoch": 28.12, "learning_rate": 3.5947499363190074e-05, "loss": 2.1132, "step": 9713000 }, { "epoch": 28.12, "learning_rate": 3.5946775715542796e-05, "loss": 2.1001, "step": 9713500 }, { "epoch": 28.12, "learning_rate": 3.594605351519081e-05, "loss": 2.1274, "step": 9714000 }, { "epoch": 28.12, "learning_rate": 3.5945329867543534e-05, "loss": 2.1312, "step": 9714500 }, { "epoch": 28.12, "learning_rate": 3.594460621989626e-05, "loss": 2.0991, "step": 9715000 }, { "epoch": 28.12, "learning_rate": 3.5943882572248985e-05, "loss": 2.1227, "step": 9715500 }, { "epoch": 28.12, "learning_rate": 3.594315892460171e-05, "loss": 2.1156, "step": 9716000 }, { "epoch": 28.13, "learning_rate": 3.594243527695443e-05, "loss": 2.1033, "step": 9716500 }, { "epoch": 28.13, "learning_rate": 3.594171162930715e-05, "loss": 2.1374, "step": 9717000 }, { "epoch": 28.13, "learning_rate": 3.5940987981659874e-05, "loss": 2.1077, "step": 9717500 }, { "epoch": 28.13, "learning_rate": 3.594026578130789e-05, "loss": 2.1252, "step": 9718000 }, { "epoch": 28.13, "learning_rate": 3.593954213366061e-05, "loss": 2.116, "step": 9718500 }, { "epoch": 28.13, "learning_rate": 3.593881848601334e-05, "loss": 2.1305, "step": 9719000 }, { "epoch": 28.13, "learning_rate": 3.593809483836606e-05, "loss": 2.1143, "step": 9719500 }, { "epoch": 28.14, "learning_rate": 3.5937371190718785e-05, "loss": 2.1179, "step": 9720000 }, { "epoch": 28.14, "learning_rate": 3.593664899036681e-05, "loss": 2.1189, "step": 9720500 }, { "epoch": 28.14, "learning_rate": 3.593592534271953e-05, "loss": 2.1159, "step": 9721000 }, { "epoch": 28.14, "learning_rate": 3.593520169507225e-05, "loss": 2.106, "step": 9721500 }, { "epoch": 28.14, "learning_rate": 3.5934478047424974e-05, "loss": 2.1107, "step": 9722000 }, { "epoch": 28.14, "learning_rate": 3.5933754399777697e-05, "loss": 2.1076, "step": 9722500 }, { "epoch": 28.14, "learning_rate": 3.593303219942571e-05, "loss": 2.0999, "step": 9723000 }, { "epoch": 28.15, "learning_rate": 3.5932308551778434e-05, "loss": 2.1012, "step": 9723500 }, { "epoch": 28.15, "learning_rate": 3.593158490413116e-05, "loss": 2.1255, "step": 9724000 }, { "epoch": 28.15, "learning_rate": 3.5930861256483886e-05, "loss": 2.1432, "step": 9724500 }, { "epoch": 28.15, "learning_rate": 3.593013760883661e-05, "loss": 2.1175, "step": 9725000 }, { "epoch": 28.15, "learning_rate": 3.592941396118933e-05, "loss": 2.0825, "step": 9725500 }, { "epoch": 28.15, "learning_rate": 3.592869031354205e-05, "loss": 2.1055, "step": 9726000 }, { "epoch": 28.15, "learning_rate": 3.592796666589478e-05, "loss": 2.0972, "step": 9726500 }, { "epoch": 28.16, "learning_rate": 3.59272444655428e-05, "loss": 2.1253, "step": 9727000 }, { "epoch": 28.16, "learning_rate": 3.592652226519081e-05, "loss": 2.1051, "step": 9727500 }, { "epoch": 28.16, "learning_rate": 3.5925798617543535e-05, "loss": 2.113, "step": 9728000 }, { "epoch": 28.16, "learning_rate": 3.5925074969896264e-05, "loss": 2.1197, "step": 9728500 }, { "epoch": 28.16, "learning_rate": 3.5924351322248986e-05, "loss": 2.1027, "step": 9729000 }, { "epoch": 28.16, "learning_rate": 3.592362767460171e-05, "loss": 2.1086, "step": 9729500 }, { "epoch": 28.16, "learning_rate": 3.592290402695443e-05, "loss": 2.1291, "step": 9730000 }, { "epoch": 28.17, "learning_rate": 3.592218037930715e-05, "loss": 2.1228, "step": 9730500 }, { "epoch": 28.17, "learning_rate": 3.592145817895517e-05, "loss": 2.0911, "step": 9731000 }, { "epoch": 28.17, "learning_rate": 3.592073453130789e-05, "loss": 2.0891, "step": 9731500 }, { "epoch": 28.17, "learning_rate": 3.592001088366061e-05, "loss": 2.1078, "step": 9732000 }, { "epoch": 28.17, "learning_rate": 3.5919287236013335e-05, "loss": 2.1269, "step": 9732500 }, { "epoch": 28.17, "learning_rate": 3.5918563588366064e-05, "loss": 2.113, "step": 9733000 }, { "epoch": 28.17, "learning_rate": 3.5917839940718786e-05, "loss": 2.1318, "step": 9733500 }, { "epoch": 28.18, "learning_rate": 3.5917116293071515e-05, "loss": 2.1302, "step": 9734000 }, { "epoch": 28.18, "learning_rate": 3.591639264542424e-05, "loss": 2.1092, "step": 9734500 }, { "epoch": 28.18, "learning_rate": 3.591566899777696e-05, "loss": 2.1128, "step": 9735000 }, { "epoch": 28.18, "learning_rate": 3.591494535012968e-05, "loss": 2.1336, "step": 9735500 }, { "epoch": 28.18, "learning_rate": 3.5914221702482404e-05, "loss": 2.129, "step": 9736000 }, { "epoch": 28.18, "learning_rate": 3.591349950213042e-05, "loss": 2.1164, "step": 9736500 }, { "epoch": 28.18, "learning_rate": 3.591277585448314e-05, "loss": 2.1208, "step": 9737000 }, { "epoch": 28.19, "learning_rate": 3.5912052206835864e-05, "loss": 2.1181, "step": 9737500 }, { "epoch": 28.19, "learning_rate": 3.5911330006483886e-05, "loss": 2.1108, "step": 9738000 }, { "epoch": 28.19, "learning_rate": 3.591060635883661e-05, "loss": 2.1379, "step": 9738500 }, { "epoch": 28.19, "learning_rate": 3.5909884158484624e-05, "loss": 2.1188, "step": 9739000 }, { "epoch": 28.19, "learning_rate": 3.5909160510837346e-05, "loss": 2.1071, "step": 9739500 }, { "epoch": 28.19, "learning_rate": 3.590843686319007e-05, "loss": 2.1368, "step": 9740000 }, { "epoch": 28.19, "learning_rate": 3.590771321554279e-05, "loss": 2.1228, "step": 9740500 }, { "epoch": 28.2, "learning_rate": 3.590698956789551e-05, "loss": 2.0931, "step": 9741000 }, { "epoch": 28.2, "learning_rate": 3.590626736754354e-05, "loss": 2.1131, "step": 9741500 }, { "epoch": 28.2, "learning_rate": 3.5905543719896264e-05, "loss": 2.1081, "step": 9742000 }, { "epoch": 28.2, "learning_rate": 3.590482007224899e-05, "loss": 2.1264, "step": 9742500 }, { "epoch": 28.2, "learning_rate": 3.590409642460171e-05, "loss": 2.1113, "step": 9743000 }, { "epoch": 28.2, "learning_rate": 3.590337277695443e-05, "loss": 2.1298, "step": 9743500 }, { "epoch": 28.2, "learning_rate": 3.590264912930715e-05, "loss": 2.0918, "step": 9744000 }, { "epoch": 28.21, "learning_rate": 3.5901925481659876e-05, "loss": 2.1193, "step": 9744500 }, { "epoch": 28.21, "learning_rate": 3.590120328130789e-05, "loss": 2.1092, "step": 9745000 }, { "epoch": 28.21, "learning_rate": 3.5900481080955914e-05, "loss": 2.1235, "step": 9745500 }, { "epoch": 28.21, "learning_rate": 3.5899757433308636e-05, "loss": 2.133, "step": 9746000 }, { "epoch": 28.21, "learning_rate": 3.589903378566136e-05, "loss": 2.1233, "step": 9746500 }, { "epoch": 28.21, "learning_rate": 3.589831013801408e-05, "loss": 2.1275, "step": 9747000 }, { "epoch": 28.22, "learning_rate": 3.58975864903668e-05, "loss": 2.1009, "step": 9747500 }, { "epoch": 28.22, "learning_rate": 3.5896862842719525e-05, "loss": 2.0962, "step": 9748000 }, { "epoch": 28.22, "learning_rate": 3.589613919507225e-05, "loss": 2.1337, "step": 9748500 }, { "epoch": 28.22, "learning_rate": 3.5895415547424976e-05, "loss": 2.1135, "step": 9749000 }, { "epoch": 28.22, "learning_rate": 3.58946918997777e-05, "loss": 2.1099, "step": 9749500 }, { "epoch": 28.22, "learning_rate": 3.589396825213042e-05, "loss": 2.1018, "step": 9750000 }, { "epoch": 28.22, "learning_rate": 3.589324460448314e-05, "loss": 2.1461, "step": 9750500 }, { "epoch": 28.23, "learning_rate": 3.5892520956835865e-05, "loss": 2.123, "step": 9751000 }, { "epoch": 28.23, "learning_rate": 3.5891797309188594e-05, "loss": 2.1269, "step": 9751500 }, { "epoch": 28.23, "learning_rate": 3.5891073661541316e-05, "loss": 2.1197, "step": 9752000 }, { "epoch": 28.23, "learning_rate": 3.589035001389404e-05, "loss": 2.1178, "step": 9752500 }, { "epoch": 28.23, "learning_rate": 3.588962636624676e-05, "loss": 2.0853, "step": 9753000 }, { "epoch": 28.23, "learning_rate": 3.588890271859948e-05, "loss": 2.0897, "step": 9753500 }, { "epoch": 28.23, "learning_rate": 3.5888179070952205e-05, "loss": 2.1092, "step": 9754000 }, { "epoch": 28.24, "learning_rate": 3.588745542330493e-05, "loss": 2.1079, "step": 9754500 }, { "epoch": 28.24, "learning_rate": 3.588673177565765e-05, "loss": 2.12, "step": 9755000 }, { "epoch": 28.24, "learning_rate": 3.5886009575305665e-05, "loss": 2.1188, "step": 9755500 }, { "epoch": 28.24, "learning_rate": 3.5885285927658394e-05, "loss": 2.1172, "step": 9756000 }, { "epoch": 28.24, "learning_rate": 3.5884562280011116e-05, "loss": 2.0877, "step": 9756500 }, { "epoch": 28.24, "learning_rate": 3.5883838632363845e-05, "loss": 2.1093, "step": 9757000 }, { "epoch": 28.24, "learning_rate": 3.588311498471657e-05, "loss": 2.0726, "step": 9757500 }, { "epoch": 28.25, "learning_rate": 3.588239278436458e-05, "loss": 2.1199, "step": 9758000 }, { "epoch": 28.25, "learning_rate": 3.58816705840126e-05, "loss": 2.1303, "step": 9758500 }, { "epoch": 28.25, "learning_rate": 3.588094693636532e-05, "loss": 2.1251, "step": 9759000 }, { "epoch": 28.25, "learning_rate": 3.588022328871804e-05, "loss": 2.1037, "step": 9759500 }, { "epoch": 28.25, "learning_rate": 3.5879501088366065e-05, "loss": 2.1147, "step": 9760000 }, { "epoch": 28.25, "learning_rate": 3.587877744071879e-05, "loss": 2.1048, "step": 9760500 }, { "epoch": 28.25, "learning_rate": 3.587805379307151e-05, "loss": 2.1051, "step": 9761000 }, { "epoch": 28.26, "learning_rate": 3.587733014542423e-05, "loss": 2.1207, "step": 9761500 }, { "epoch": 28.26, "learning_rate": 3.5876606497776954e-05, "loss": 2.108, "step": 9762000 }, { "epoch": 28.26, "learning_rate": 3.587588285012968e-05, "loss": 2.1296, "step": 9762500 }, { "epoch": 28.26, "learning_rate": 3.58751592024824e-05, "loss": 2.1368, "step": 9763000 }, { "epoch": 28.26, "learning_rate": 3.587443555483513e-05, "loss": 2.1373, "step": 9763500 }, { "epoch": 28.26, "learning_rate": 3.587371190718785e-05, "loss": 2.1152, "step": 9764000 }, { "epoch": 28.26, "learning_rate": 3.5872991154131166e-05, "loss": 2.0997, "step": 9764500 }, { "epoch": 28.27, "learning_rate": 3.587226750648389e-05, "loss": 2.1188, "step": 9765000 }, { "epoch": 28.27, "learning_rate": 3.587154385883661e-05, "loss": 2.121, "step": 9765500 }, { "epoch": 28.27, "learning_rate": 3.587082021118933e-05, "loss": 2.1049, "step": 9766000 }, { "epoch": 28.27, "learning_rate": 3.587009801083735e-05, "loss": 2.1265, "step": 9766500 }, { "epoch": 28.27, "learning_rate": 3.586937436319007e-05, "loss": 2.1336, "step": 9767000 }, { "epoch": 28.27, "learning_rate": 3.586865071554279e-05, "loss": 2.118, "step": 9767500 }, { "epoch": 28.27, "learning_rate": 3.586792706789552e-05, "loss": 2.0857, "step": 9768000 }, { "epoch": 28.28, "learning_rate": 3.5867203420248244e-05, "loss": 2.1091, "step": 9768500 }, { "epoch": 28.28, "learning_rate": 3.5866479772600966e-05, "loss": 2.1117, "step": 9769000 }, { "epoch": 28.28, "learning_rate": 3.586575612495369e-05, "loss": 2.1078, "step": 9769500 }, { "epoch": 28.28, "learning_rate": 3.586503247730641e-05, "loss": 2.1325, "step": 9770000 }, { "epoch": 28.28, "learning_rate": 3.5864310276954426e-05, "loss": 2.0993, "step": 9770500 }, { "epoch": 28.28, "learning_rate": 3.586358662930715e-05, "loss": 2.1342, "step": 9771000 }, { "epoch": 28.28, "learning_rate": 3.586286298165988e-05, "loss": 2.1069, "step": 9771500 }, { "epoch": 28.29, "learning_rate": 3.58621393340126e-05, "loss": 2.1118, "step": 9772000 }, { "epoch": 28.29, "learning_rate": 3.586141568636532e-05, "loss": 2.1175, "step": 9772500 }, { "epoch": 28.29, "learning_rate": 3.5860692038718044e-05, "loss": 2.098, "step": 9773000 }, { "epoch": 28.29, "learning_rate": 3.585996839107077e-05, "loss": 2.1196, "step": 9773500 }, { "epoch": 28.29, "learning_rate": 3.5859244743423495e-05, "loss": 2.1388, "step": 9774000 }, { "epoch": 28.29, "learning_rate": 3.585852109577622e-05, "loss": 2.1055, "step": 9774500 }, { "epoch": 28.29, "learning_rate": 3.585779889542423e-05, "loss": 2.1197, "step": 9775000 }, { "epoch": 28.3, "learning_rate": 3.585707669507225e-05, "loss": 2.1116, "step": 9775500 }, { "epoch": 28.3, "learning_rate": 3.585635304742497e-05, "loss": 2.1101, "step": 9776000 }, { "epoch": 28.3, "learning_rate": 3.585562939977769e-05, "loss": 2.1035, "step": 9776500 }, { "epoch": 28.3, "learning_rate": 3.585490575213042e-05, "loss": 2.1427, "step": 9777000 }, { "epoch": 28.3, "learning_rate": 3.585418355177844e-05, "loss": 2.1214, "step": 9777500 }, { "epoch": 28.3, "learning_rate": 3.585345990413116e-05, "loss": 2.1162, "step": 9778000 }, { "epoch": 28.3, "learning_rate": 3.585273625648388e-05, "loss": 2.114, "step": 9778500 }, { "epoch": 28.31, "learning_rate": 3.5852014056131904e-05, "loss": 2.0958, "step": 9779000 }, { "epoch": 28.31, "learning_rate": 3.5851290408484627e-05, "loss": 2.1082, "step": 9779500 }, { "epoch": 28.31, "learning_rate": 3.585056676083735e-05, "loss": 2.1168, "step": 9780000 }, { "epoch": 28.31, "learning_rate": 3.584984456048537e-05, "loss": 2.1255, "step": 9780500 }, { "epoch": 28.31, "learning_rate": 3.584912091283809e-05, "loss": 2.1245, "step": 9781000 }, { "epoch": 28.31, "learning_rate": 3.5848397265190816e-05, "loss": 2.1295, "step": 9781500 }, { "epoch": 28.31, "learning_rate": 3.584767361754354e-05, "loss": 2.1004, "step": 9782000 }, { "epoch": 28.32, "learning_rate": 3.584694996989626e-05, "loss": 2.1135, "step": 9782500 }, { "epoch": 28.32, "learning_rate": 3.584622632224898e-05, "loss": 2.1314, "step": 9783000 }, { "epoch": 28.32, "learning_rate": 3.5845502674601705e-05, "loss": 2.1205, "step": 9783500 }, { "epoch": 28.32, "learning_rate": 3.584477902695443e-05, "loss": 2.1112, "step": 9784000 }, { "epoch": 28.32, "learning_rate": 3.584405682660244e-05, "loss": 2.1346, "step": 9784500 }, { "epoch": 28.32, "learning_rate": 3.584333317895517e-05, "loss": 2.1227, "step": 9785000 }, { "epoch": 28.33, "learning_rate": 3.5842609531307894e-05, "loss": 2.1237, "step": 9785500 }, { "epoch": 28.33, "learning_rate": 3.5841885883660616e-05, "loss": 2.1023, "step": 9786000 }, { "epoch": 28.33, "learning_rate": 3.5841162236013345e-05, "loss": 2.1134, "step": 9786500 }, { "epoch": 28.33, "learning_rate": 3.584043858836607e-05, "loss": 2.1003, "step": 9787000 }, { "epoch": 28.33, "learning_rate": 3.583971494071879e-05, "loss": 2.0957, "step": 9787500 }, { "epoch": 28.33, "learning_rate": 3.583899129307151e-05, "loss": 2.1128, "step": 9788000 }, { "epoch": 28.33, "learning_rate": 3.583826909271953e-05, "loss": 2.1329, "step": 9788500 }, { "epoch": 28.34, "learning_rate": 3.583754544507225e-05, "loss": 2.1094, "step": 9789000 }, { "epoch": 28.34, "learning_rate": 3.583682179742497e-05, "loss": 2.1169, "step": 9789500 }, { "epoch": 28.34, "learning_rate": 3.5836098149777694e-05, "loss": 2.135, "step": 9790000 }, { "epoch": 28.34, "learning_rate": 3.583537450213042e-05, "loss": 2.1215, "step": 9790500 }, { "epoch": 28.34, "learning_rate": 3.5834650854483145e-05, "loss": 2.0952, "step": 9791000 }, { "epoch": 28.34, "learning_rate": 3.583392720683587e-05, "loss": 2.1154, "step": 9791500 }, { "epoch": 28.34, "learning_rate": 3.583320355918859e-05, "loss": 2.1077, "step": 9792000 }, { "epoch": 28.35, "learning_rate": 3.583247991154131e-05, "loss": 2.0838, "step": 9792500 }, { "epoch": 28.35, "learning_rate": 3.5831756263894034e-05, "loss": 2.1259, "step": 9793000 }, { "epoch": 28.35, "learning_rate": 3.583103261624676e-05, "loss": 2.1493, "step": 9793500 }, { "epoch": 28.35, "learning_rate": 3.5830308968599485e-05, "loss": 2.1401, "step": 9794000 }, { "epoch": 28.35, "learning_rate": 3.582958532095221e-05, "loss": 2.0956, "step": 9794500 }, { "epoch": 28.35, "learning_rate": 3.582886167330493e-05, "loss": 2.1065, "step": 9795000 }, { "epoch": 28.35, "learning_rate": 3.582813802565765e-05, "loss": 2.1162, "step": 9795500 }, { "epoch": 28.36, "learning_rate": 3.5827414378010374e-05, "loss": 2.1199, "step": 9796000 }, { "epoch": 28.36, "learning_rate": 3.5826690730363096e-05, "loss": 2.1204, "step": 9796500 }, { "epoch": 28.36, "learning_rate": 3.5825967082715825e-05, "loss": 2.1336, "step": 9797000 }, { "epoch": 28.36, "learning_rate": 3.582524343506855e-05, "loss": 2.0934, "step": 9797500 }, { "epoch": 28.36, "learning_rate": 3.582452123471656e-05, "loss": 2.1321, "step": 9798000 }, { "epoch": 28.36, "learning_rate": 3.5823797587069285e-05, "loss": 2.1188, "step": 9798500 }, { "epoch": 28.36, "learning_rate": 3.582307393942201e-05, "loss": 2.1139, "step": 9799000 }, { "epoch": 28.37, "learning_rate": 3.582235029177473e-05, "loss": 2.1445, "step": 9799500 }, { "epoch": 28.37, "learning_rate": 3.5821628091422745e-05, "loss": 2.1099, "step": 9800000 }, { "epoch": 28.37, "learning_rate": 3.5820904443775474e-05, "loss": 2.1213, "step": 9800500 }, { "epoch": 28.37, "learning_rate": 3.58201807961282e-05, "loss": 2.1135, "step": 9801000 }, { "epoch": 28.37, "learning_rate": 3.581945859577622e-05, "loss": 2.1255, "step": 9801500 }, { "epoch": 28.37, "learning_rate": 3.581873494812894e-05, "loss": 2.1185, "step": 9802000 }, { "epoch": 28.37, "learning_rate": 3.5818011300481663e-05, "loss": 2.1169, "step": 9802500 }, { "epoch": 28.38, "learning_rate": 3.5817287652834386e-05, "loss": 2.1245, "step": 9803000 }, { "epoch": 28.38, "learning_rate": 3.581656400518711e-05, "loss": 2.113, "step": 9803500 }, { "epoch": 28.38, "learning_rate": 3.581584035753983e-05, "loss": 2.1206, "step": 9804000 }, { "epoch": 28.38, "learning_rate": 3.581511670989255e-05, "loss": 2.1102, "step": 9804500 }, { "epoch": 28.38, "learning_rate": 3.5814393062245275e-05, "loss": 2.1223, "step": 9805000 }, { "epoch": 28.38, "learning_rate": 3.5813669414598e-05, "loss": 2.1247, "step": 9805500 }, { "epoch": 28.38, "learning_rate": 3.5812945766950726e-05, "loss": 2.1378, "step": 9806000 }, { "epoch": 28.39, "learning_rate": 3.581222211930345e-05, "loss": 2.1345, "step": 9806500 }, { "epoch": 28.39, "learning_rate": 3.581149847165617e-05, "loss": 2.1241, "step": 9807000 }, { "epoch": 28.39, "learning_rate": 3.581077482400889e-05, "loss": 2.1054, "step": 9807500 }, { "epoch": 28.39, "learning_rate": 3.581005262365691e-05, "loss": 2.1356, "step": 9808000 }, { "epoch": 28.39, "learning_rate": 3.580932897600964e-05, "loss": 2.1182, "step": 9808500 }, { "epoch": 28.39, "learning_rate": 3.580860677565765e-05, "loss": 2.1131, "step": 9809000 }, { "epoch": 28.39, "learning_rate": 3.5807883128010375e-05, "loss": 2.1035, "step": 9809500 }, { "epoch": 28.4, "learning_rate": 3.58071594803631e-05, "loss": 2.1201, "step": 9810000 }, { "epoch": 28.4, "learning_rate": 3.5806435832715826e-05, "loss": 2.1187, "step": 9810500 }, { "epoch": 28.4, "learning_rate": 3.580571218506855e-05, "loss": 2.131, "step": 9811000 }, { "epoch": 28.4, "learning_rate": 3.580498853742127e-05, "loss": 2.126, "step": 9811500 }, { "epoch": 28.4, "learning_rate": 3.580426488977399e-05, "loss": 2.1069, "step": 9812000 }, { "epoch": 28.4, "learning_rate": 3.5803541242126715e-05, "loss": 2.1087, "step": 9812500 }, { "epoch": 28.4, "learning_rate": 3.580281904177473e-05, "loss": 2.1309, "step": 9813000 }, { "epoch": 28.41, "learning_rate": 3.580209539412745e-05, "loss": 2.1127, "step": 9813500 }, { "epoch": 28.41, "learning_rate": 3.5801371746480175e-05, "loss": 2.1318, "step": 9814000 }, { "epoch": 28.41, "learning_rate": 3.58006480988329e-05, "loss": 2.1047, "step": 9814500 }, { "epoch": 28.41, "learning_rate": 3.579992734577621e-05, "loss": 2.1024, "step": 9815000 }, { "epoch": 28.41, "learning_rate": 3.5799203698128935e-05, "loss": 2.1306, "step": 9815500 }, { "epoch": 28.41, "learning_rate": 3.5798480050481664e-05, "loss": 2.1236, "step": 9816000 }, { "epoch": 28.41, "learning_rate": 3.5797756402834386e-05, "loss": 2.1389, "step": 9816500 }, { "epoch": 28.42, "learning_rate": 3.579703275518711e-05, "loss": 2.1108, "step": 9817000 }, { "epoch": 28.42, "learning_rate": 3.5796310554835124e-05, "loss": 2.1106, "step": 9817500 }, { "epoch": 28.42, "learning_rate": 3.579558690718785e-05, "loss": 2.1048, "step": 9818000 }, { "epoch": 28.42, "learning_rate": 3.5794863259540576e-05, "loss": 2.1417, "step": 9818500 }, { "epoch": 28.42, "learning_rate": 3.57941396118933e-05, "loss": 2.1071, "step": 9819000 }, { "epoch": 28.42, "learning_rate": 3.579341741154131e-05, "loss": 2.1094, "step": 9819500 }, { "epoch": 28.42, "learning_rate": 3.5792693763894036e-05, "loss": 2.1025, "step": 9820000 }, { "epoch": 28.43, "learning_rate": 3.579197011624676e-05, "loss": 2.118, "step": 9820500 }, { "epoch": 28.43, "learning_rate": 3.579124791589477e-05, "loss": 2.1428, "step": 9821000 }, { "epoch": 28.43, "learning_rate": 3.57905242682475e-05, "loss": 2.1052, "step": 9821500 }, { "epoch": 28.43, "learning_rate": 3.5789800620600225e-05, "loss": 2.1111, "step": 9822000 }, { "epoch": 28.43, "learning_rate": 3.578907697295295e-05, "loss": 2.1208, "step": 9822500 }, { "epoch": 28.43, "learning_rate": 3.578835332530567e-05, "loss": 2.1222, "step": 9823000 }, { "epoch": 28.44, "learning_rate": 3.57876296776584e-05, "loss": 2.1196, "step": 9823500 }, { "epoch": 28.44, "learning_rate": 3.578690603001112e-05, "loss": 2.1002, "step": 9824000 }, { "epoch": 28.44, "learning_rate": 3.578618238236384e-05, "loss": 2.1224, "step": 9824500 }, { "epoch": 28.44, "learning_rate": 3.5785458734716565e-05, "loss": 2.1259, "step": 9825000 }, { "epoch": 28.44, "learning_rate": 3.578473508706929e-05, "loss": 2.1108, "step": 9825500 }, { "epoch": 28.44, "learning_rate": 3.578401143942201e-05, "loss": 2.1137, "step": 9826000 }, { "epoch": 28.44, "learning_rate": 3.578328779177473e-05, "loss": 2.1241, "step": 9826500 }, { "epoch": 28.45, "learning_rate": 3.5782564144127454e-05, "loss": 2.0793, "step": 9827000 }, { "epoch": 28.45, "learning_rate": 3.5781840496480176e-05, "loss": 2.1182, "step": 9827500 }, { "epoch": 28.45, "learning_rate": 3.5781116848832905e-05, "loss": 2.0881, "step": 9828000 }, { "epoch": 28.45, "learning_rate": 3.5780396095776214e-05, "loss": 2.1403, "step": 9828500 }, { "epoch": 28.45, "learning_rate": 3.5779672448128936e-05, "loss": 2.0986, "step": 9829000 }, { "epoch": 28.45, "learning_rate": 3.577894880048166e-05, "loss": 2.1291, "step": 9829500 }, { "epoch": 28.45, "learning_rate": 3.577822515283438e-05, "loss": 2.0975, "step": 9830000 }, { "epoch": 28.46, "learning_rate": 3.57775015051871e-05, "loss": 2.1168, "step": 9830500 }, { "epoch": 28.46, "learning_rate": 3.577677785753983e-05, "loss": 2.1261, "step": 9831000 }, { "epoch": 28.46, "learning_rate": 3.5776054209892554e-05, "loss": 2.1234, "step": 9831500 }, { "epoch": 28.46, "learning_rate": 3.5775330562245276e-05, "loss": 2.1235, "step": 9832000 }, { "epoch": 28.46, "learning_rate": 3.5774606914598005e-05, "loss": 2.1073, "step": 9832500 }, { "epoch": 28.46, "learning_rate": 3.577388326695073e-05, "loss": 2.1105, "step": 9833000 }, { "epoch": 28.46, "learning_rate": 3.577315961930345e-05, "loss": 2.1124, "step": 9833500 }, { "epoch": 28.47, "learning_rate": 3.577243597165617e-05, "loss": 2.1292, "step": 9834000 }, { "epoch": 28.47, "learning_rate": 3.5771712324008894e-05, "loss": 2.1294, "step": 9834500 }, { "epoch": 28.47, "learning_rate": 3.5770988676361616e-05, "loss": 2.1268, "step": 9835000 }, { "epoch": 28.47, "learning_rate": 3.577026502871434e-05, "loss": 2.1317, "step": 9835500 }, { "epoch": 28.47, "learning_rate": 3.576954138106706e-05, "loss": 2.105, "step": 9836000 }, { "epoch": 28.47, "learning_rate": 3.576881773341978e-05, "loss": 2.1292, "step": 9836500 }, { "epoch": 28.47, "learning_rate": 3.5768095533067805e-05, "loss": 2.1137, "step": 9837000 }, { "epoch": 28.48, "learning_rate": 3.576737333271582e-05, "loss": 2.1355, "step": 9837500 }, { "epoch": 28.48, "learning_rate": 3.576664968506854e-05, "loss": 2.1267, "step": 9838000 }, { "epoch": 28.48, "learning_rate": 3.576592603742127e-05, "loss": 2.1257, "step": 9838500 }, { "epoch": 28.48, "learning_rate": 3.5765202389773994e-05, "loss": 2.1247, "step": 9839000 }, { "epoch": 28.48, "learning_rate": 3.576448018942201e-05, "loss": 2.1351, "step": 9839500 }, { "epoch": 28.48, "learning_rate": 3.576375798907003e-05, "loss": 2.1, "step": 9840000 }, { "epoch": 28.48, "learning_rate": 3.5763034341422755e-05, "loss": 2.093, "step": 9840500 }, { "epoch": 28.49, "learning_rate": 3.576231069377548e-05, "loss": 2.1247, "step": 9841000 }, { "epoch": 28.49, "learning_rate": 3.57615870461282e-05, "loss": 2.112, "step": 9841500 }, { "epoch": 28.49, "learning_rate": 3.576086339848092e-05, "loss": 2.1111, "step": 9842000 }, { "epoch": 28.49, "learning_rate": 3.5760139750833644e-05, "loss": 2.1213, "step": 9842500 }, { "epoch": 28.49, "learning_rate": 3.575941755048166e-05, "loss": 2.1113, "step": 9843000 }, { "epoch": 28.49, "learning_rate": 3.575869535012968e-05, "loss": 2.1223, "step": 9843500 }, { "epoch": 28.49, "learning_rate": 3.5757971702482404e-05, "loss": 2.1162, "step": 9844000 }, { "epoch": 28.5, "learning_rate": 3.5757248054835126e-05, "loss": 2.1232, "step": 9844500 }, { "epoch": 28.5, "learning_rate": 3.575652440718785e-05, "loss": 2.1208, "step": 9845000 }, { "epoch": 28.5, "learning_rate": 3.5755802206835864e-05, "loss": 2.0977, "step": 9845500 }, { "epoch": 28.5, "learning_rate": 3.575507855918859e-05, "loss": 2.1307, "step": 9846000 }, { "epoch": 28.5, "learning_rate": 3.5754354911541315e-05, "loss": 2.1225, "step": 9846500 }, { "epoch": 28.5, "learning_rate": 3.575363126389404e-05, "loss": 2.1274, "step": 9847000 }, { "epoch": 28.5, "learning_rate": 3.575290906354205e-05, "loss": 2.1438, "step": 9847500 }, { "epoch": 28.51, "learning_rate": 3.575218541589478e-05, "loss": 2.1223, "step": 9848000 }, { "epoch": 28.51, "learning_rate": 3.5751461768247504e-05, "loss": 2.1202, "step": 9848500 }, { "epoch": 28.51, "learning_rate": 3.5750738120600226e-05, "loss": 2.1022, "step": 9849000 }, { "epoch": 28.51, "learning_rate": 3.575001447295295e-05, "loss": 2.104, "step": 9849500 }, { "epoch": 28.51, "learning_rate": 3.574929082530567e-05, "loss": 2.1203, "step": 9850000 }, { "epoch": 28.51, "learning_rate": 3.574856717765839e-05, "loss": 2.1342, "step": 9850500 }, { "epoch": 28.51, "learning_rate": 3.5747843530011115e-05, "loss": 2.1152, "step": 9851000 }, { "epoch": 28.52, "learning_rate": 3.574711988236384e-05, "loss": 2.12, "step": 9851500 }, { "epoch": 28.52, "learning_rate": 3.574639623471656e-05, "loss": 2.1258, "step": 9852000 }, { "epoch": 28.52, "learning_rate": 3.574567258706928e-05, "loss": 2.1044, "step": 9852500 }, { "epoch": 28.52, "learning_rate": 3.5744948939422004e-05, "loss": 2.137, "step": 9853000 }, { "epoch": 28.52, "learning_rate": 3.574422529177473e-05, "loss": 2.1273, "step": 9853500 }, { "epoch": 28.52, "learning_rate": 3.5743501644127455e-05, "loss": 2.1254, "step": 9854000 }, { "epoch": 28.52, "learning_rate": 3.574277944377548e-05, "loss": 2.1192, "step": 9854500 }, { "epoch": 28.53, "learning_rate": 3.57420557961282e-05, "loss": 2.1, "step": 9855000 }, { "epoch": 28.53, "learning_rate": 3.574133214848092e-05, "loss": 2.1237, "step": 9855500 }, { "epoch": 28.53, "learning_rate": 3.5740608500833644e-05, "loss": 2.1088, "step": 9856000 }, { "epoch": 28.53, "learning_rate": 3.573988919507225e-05, "loss": 2.1301, "step": 9856500 }, { "epoch": 28.53, "learning_rate": 3.5739165547424976e-05, "loss": 2.1057, "step": 9857000 }, { "epoch": 28.53, "learning_rate": 3.57384418997777e-05, "loss": 2.1023, "step": 9857500 }, { "epoch": 28.53, "learning_rate": 3.573771825213042e-05, "loss": 2.1206, "step": 9858000 }, { "epoch": 28.54, "learning_rate": 3.5736996051778436e-05, "loss": 2.1206, "step": 9858500 }, { "epoch": 28.54, "learning_rate": 3.573627385142646e-05, "loss": 2.1222, "step": 9859000 }, { "epoch": 28.54, "learning_rate": 3.573555020377918e-05, "loss": 2.1742, "step": 9859500 }, { "epoch": 28.54, "learning_rate": 3.57348265561319e-05, "loss": 2.127, "step": 9860000 }, { "epoch": 28.54, "learning_rate": 3.5734102908484625e-05, "loss": 2.1315, "step": 9860500 }, { "epoch": 28.54, "learning_rate": 3.573337926083735e-05, "loss": 2.1113, "step": 9861000 }, { "epoch": 28.55, "learning_rate": 3.5732655613190076e-05, "loss": 2.1281, "step": 9861500 }, { "epoch": 28.55, "learning_rate": 3.57319319655428e-05, "loss": 2.1248, "step": 9862000 }, { "epoch": 28.55, "learning_rate": 3.573120831789552e-05, "loss": 2.1267, "step": 9862500 }, { "epoch": 28.55, "learning_rate": 3.573048467024824e-05, "loss": 2.1038, "step": 9863000 }, { "epoch": 28.55, "learning_rate": 3.572976246989626e-05, "loss": 2.1268, "step": 9863500 }, { "epoch": 28.55, "learning_rate": 3.572903882224898e-05, "loss": 2.1396, "step": 9864000 }, { "epoch": 28.55, "learning_rate": 3.572831517460171e-05, "loss": 2.112, "step": 9864500 }, { "epoch": 28.56, "learning_rate": 3.572759152695443e-05, "loss": 2.1244, "step": 9865000 }, { "epoch": 28.56, "learning_rate": 3.5726867879307154e-05, "loss": 2.1293, "step": 9865500 }, { "epoch": 28.56, "learning_rate": 3.572614567895517e-05, "loss": 2.1077, "step": 9866000 }, { "epoch": 28.56, "learning_rate": 3.572542203130789e-05, "loss": 2.099, "step": 9866500 }, { "epoch": 28.56, "learning_rate": 3.572469983095591e-05, "loss": 2.1153, "step": 9867000 }, { "epoch": 28.56, "learning_rate": 3.5723976183308636e-05, "loss": 2.1222, "step": 9867500 }, { "epoch": 28.56, "learning_rate": 3.572325253566136e-05, "loss": 2.1436, "step": 9868000 }, { "epoch": 28.57, "learning_rate": 3.572252888801408e-05, "loss": 2.118, "step": 9868500 }, { "epoch": 28.57, "learning_rate": 3.572180524036681e-05, "loss": 2.1236, "step": 9869000 }, { "epoch": 28.57, "learning_rate": 3.5721083040014825e-05, "loss": 2.1346, "step": 9869500 }, { "epoch": 28.57, "learning_rate": 3.572035939236755e-05, "loss": 2.1186, "step": 9870000 }, { "epoch": 28.57, "learning_rate": 3.571963574472027e-05, "loss": 2.1223, "step": 9870500 }, { "epoch": 28.57, "learning_rate": 3.571891209707299e-05, "loss": 2.109, "step": 9871000 }, { "epoch": 28.57, "learning_rate": 3.5718188449425714e-05, "loss": 2.133, "step": 9871500 }, { "epoch": 28.58, "learning_rate": 3.5717464801778436e-05, "loss": 2.1274, "step": 9872000 }, { "epoch": 28.58, "learning_rate": 3.571674115413116e-05, "loss": 2.1256, "step": 9872500 }, { "epoch": 28.58, "learning_rate": 3.571601750648389e-05, "loss": 2.1406, "step": 9873000 }, { "epoch": 28.58, "learning_rate": 3.571529385883661e-05, "loss": 2.1038, "step": 9873500 }, { "epoch": 28.58, "learning_rate": 3.5714571658484625e-05, "loss": 2.1238, "step": 9874000 }, { "epoch": 28.58, "learning_rate": 3.571384801083735e-05, "loss": 2.111, "step": 9874500 }, { "epoch": 28.58, "learning_rate": 3.571312436319007e-05, "loss": 2.1085, "step": 9875000 }, { "epoch": 28.59, "learning_rate": 3.5712402162838085e-05, "loss": 2.1222, "step": 9875500 }, { "epoch": 28.59, "learning_rate": 3.571167996248611e-05, "loss": 2.1288, "step": 9876000 }, { "epoch": 28.59, "learning_rate": 3.571095631483883e-05, "loss": 2.1315, "step": 9876500 }, { "epoch": 28.59, "learning_rate": 3.571023266719156e-05, "loss": 2.1071, "step": 9877000 }, { "epoch": 28.59, "learning_rate": 3.570950901954428e-05, "loss": 2.1149, "step": 9877500 }, { "epoch": 28.59, "learning_rate": 3.5708785371897003e-05, "loss": 2.1025, "step": 9878000 }, { "epoch": 28.59, "learning_rate": 3.5708061724249726e-05, "loss": 2.1029, "step": 9878500 }, { "epoch": 28.6, "learning_rate": 3.570733807660245e-05, "loss": 2.1142, "step": 9879000 }, { "epoch": 28.6, "learning_rate": 3.570661442895517e-05, "loss": 2.1267, "step": 9879500 }, { "epoch": 28.6, "learning_rate": 3.570589078130789e-05, "loss": 2.1202, "step": 9880000 }, { "epoch": 28.6, "learning_rate": 3.5705167133660615e-05, "loss": 2.1123, "step": 9880500 }, { "epoch": 28.6, "learning_rate": 3.570444348601334e-05, "loss": 2.1053, "step": 9881000 }, { "epoch": 28.6, "learning_rate": 3.570371983836606e-05, "loss": 2.1314, "step": 9881500 }, { "epoch": 28.6, "learning_rate": 3.570299619071879e-05, "loss": 2.1404, "step": 9882000 }, { "epoch": 28.61, "learning_rate": 3.570227254307151e-05, "loss": 2.1321, "step": 9882500 }, { "epoch": 28.61, "learning_rate": 3.570154889542423e-05, "loss": 2.1232, "step": 9883000 }, { "epoch": 28.61, "learning_rate": 3.5700825247776955e-05, "loss": 2.1228, "step": 9883500 }, { "epoch": 28.61, "learning_rate": 3.5700101600129684e-05, "loss": 2.1087, "step": 9884000 }, { "epoch": 28.61, "learning_rate": 3.5699377952482406e-05, "loss": 2.0966, "step": 9884500 }, { "epoch": 28.61, "learning_rate": 3.569865430483513e-05, "loss": 2.1453, "step": 9885000 }, { "epoch": 28.61, "learning_rate": 3.569793065718785e-05, "loss": 2.1107, "step": 9885500 }, { "epoch": 28.62, "learning_rate": 3.5697208456835866e-05, "loss": 2.1266, "step": 9886000 }, { "epoch": 28.62, "learning_rate": 3.569648480918859e-05, "loss": 2.1393, "step": 9886500 }, { "epoch": 28.62, "learning_rate": 3.569576116154131e-05, "loss": 2.1222, "step": 9887000 }, { "epoch": 28.62, "learning_rate": 3.569503896118933e-05, "loss": 2.1214, "step": 9887500 }, { "epoch": 28.62, "learning_rate": 3.5694315313542055e-05, "loss": 2.116, "step": 9888000 }, { "epoch": 28.62, "learning_rate": 3.569359166589478e-05, "loss": 2.1235, "step": 9888500 }, { "epoch": 28.62, "learning_rate": 3.56928680182475e-05, "loss": 2.1251, "step": 9889000 }, { "epoch": 28.63, "learning_rate": 3.569214437060022e-05, "loss": 2.1259, "step": 9889500 }, { "epoch": 28.63, "learning_rate": 3.5691420722952944e-05, "loss": 2.1277, "step": 9890000 }, { "epoch": 28.63, "learning_rate": 3.5690697075305666e-05, "loss": 2.1083, "step": 9890500 }, { "epoch": 28.63, "learning_rate": 3.568997487495369e-05, "loss": 2.089, "step": 9891000 }, { "epoch": 28.63, "learning_rate": 3.568925122730641e-05, "loss": 2.12, "step": 9891500 }, { "epoch": 28.63, "learning_rate": 3.568852757965914e-05, "loss": 2.1131, "step": 9892000 }, { "epoch": 28.63, "learning_rate": 3.568780393201186e-05, "loss": 2.1152, "step": 9892500 }, { "epoch": 28.64, "learning_rate": 3.5687080284364584e-05, "loss": 2.096, "step": 9893000 }, { "epoch": 28.64, "learning_rate": 3.5686356636717307e-05, "loss": 2.1328, "step": 9893500 }, { "epoch": 28.64, "learning_rate": 3.568563298907003e-05, "loss": 2.0819, "step": 9894000 }, { "epoch": 28.64, "learning_rate": 3.568490934142275e-05, "loss": 2.119, "step": 9894500 }, { "epoch": 28.64, "learning_rate": 3.5684187141070767e-05, "loss": 2.1089, "step": 9895000 }, { "epoch": 28.64, "learning_rate": 3.568346349342349e-05, "loss": 2.1064, "step": 9895500 }, { "epoch": 28.64, "learning_rate": 3.568273984577621e-05, "loss": 2.1283, "step": 9896000 }, { "epoch": 28.65, "learning_rate": 3.568201619812894e-05, "loss": 2.1276, "step": 9896500 }, { "epoch": 28.65, "learning_rate": 3.568129255048166e-05, "loss": 2.1333, "step": 9897000 }, { "epoch": 28.65, "learning_rate": 3.5680568902834384e-05, "loss": 2.1138, "step": 9897500 }, { "epoch": 28.65, "learning_rate": 3.567984525518711e-05, "loss": 2.1138, "step": 9898000 }, { "epoch": 28.65, "learning_rate": 3.5679121607539836e-05, "loss": 2.1133, "step": 9898500 }, { "epoch": 28.65, "learning_rate": 3.567839795989256e-05, "loss": 2.1412, "step": 9899000 }, { "epoch": 28.65, "learning_rate": 3.5677675759540574e-05, "loss": 2.1368, "step": 9899500 }, { "epoch": 28.66, "learning_rate": 3.567695355918859e-05, "loss": 2.1011, "step": 9900000 }, { "epoch": 28.66, "learning_rate": 3.567622991154131e-05, "loss": 2.1404, "step": 9900500 }, { "epoch": 28.66, "learning_rate": 3.567550626389404e-05, "loss": 2.1189, "step": 9901000 }, { "epoch": 28.66, "learning_rate": 3.567478261624676e-05, "loss": 2.1181, "step": 9901500 }, { "epoch": 28.66, "learning_rate": 3.5674058968599485e-05, "loss": 2.1057, "step": 9902000 }, { "epoch": 28.66, "learning_rate": 3.567333532095221e-05, "loss": 2.1118, "step": 9902500 }, { "epoch": 28.67, "learning_rate": 3.567261167330493e-05, "loss": 2.1242, "step": 9903000 }, { "epoch": 28.67, "learning_rate": 3.567188802565765e-05, "loss": 2.0965, "step": 9903500 }, { "epoch": 28.67, "learning_rate": 3.5671164378010374e-05, "loss": 2.116, "step": 9904000 }, { "epoch": 28.67, "learning_rate": 3.567044217765839e-05, "loss": 2.1028, "step": 9904500 }, { "epoch": 28.67, "learning_rate": 3.566971997730641e-05, "loss": 2.1047, "step": 9905000 }, { "epoch": 28.67, "learning_rate": 3.5668996329659134e-05, "loss": 2.1127, "step": 9905500 }, { "epoch": 28.67, "learning_rate": 3.566827412930715e-05, "loss": 2.1076, "step": 9906000 }, { "epoch": 28.68, "learning_rate": 3.566755048165988e-05, "loss": 2.1361, "step": 9906500 }, { "epoch": 28.68, "learning_rate": 3.56668268340126e-05, "loss": 2.1333, "step": 9907000 }, { "epoch": 28.68, "learning_rate": 3.5666104633660616e-05, "loss": 2.1383, "step": 9907500 }, { "epoch": 28.68, "learning_rate": 3.566538098601334e-05, "loss": 2.1064, "step": 9908000 }, { "epoch": 28.68, "learning_rate": 3.566465733836607e-05, "loss": 2.1328, "step": 9908500 }, { "epoch": 28.68, "learning_rate": 3.566393369071879e-05, "loss": 2.0844, "step": 9909000 }, { "epoch": 28.68, "learning_rate": 3.566321004307151e-05, "loss": 2.1031, "step": 9909500 }, { "epoch": 28.69, "learning_rate": 3.566248784271953e-05, "loss": 2.1222, "step": 9910000 }, { "epoch": 28.69, "learning_rate": 3.566176419507225e-05, "loss": 2.1328, "step": 9910500 }, { "epoch": 28.69, "learning_rate": 3.5661041994720265e-05, "loss": 2.1261, "step": 9911000 }, { "epoch": 28.69, "learning_rate": 3.566031834707299e-05, "loss": 2.1293, "step": 9911500 }, { "epoch": 28.69, "learning_rate": 3.5659594699425716e-05, "loss": 2.1336, "step": 9912000 }, { "epoch": 28.69, "learning_rate": 3.565887105177844e-05, "loss": 2.1166, "step": 9912500 }, { "epoch": 28.69, "learning_rate": 3.565814740413116e-05, "loss": 2.1113, "step": 9913000 }, { "epoch": 28.7, "learning_rate": 3.565742375648388e-05, "loss": 2.106, "step": 9913500 }, { "epoch": 28.7, "learning_rate": 3.565670010883661e-05, "loss": 2.1276, "step": 9914000 }, { "epoch": 28.7, "learning_rate": 3.5655976461189334e-05, "loss": 2.1252, "step": 9914500 }, { "epoch": 28.7, "learning_rate": 3.565525281354206e-05, "loss": 2.1281, "step": 9915000 }, { "epoch": 28.7, "learning_rate": 3.565452916589478e-05, "loss": 2.1347, "step": 9915500 }, { "epoch": 28.7, "learning_rate": 3.56538055182475e-05, "loss": 2.1117, "step": 9916000 }, { "epoch": 28.7, "learning_rate": 3.565308187060022e-05, "loss": 2.153, "step": 9916500 }, { "epoch": 28.71, "learning_rate": 3.5652358222952946e-05, "loss": 2.1481, "step": 9917000 }, { "epoch": 28.71, "learning_rate": 3.565163457530567e-05, "loss": 2.1251, "step": 9917500 }, { "epoch": 28.71, "learning_rate": 3.565091237495369e-05, "loss": 2.1152, "step": 9918000 }, { "epoch": 28.71, "learning_rate": 3.565018872730641e-05, "loss": 2.1146, "step": 9918500 }, { "epoch": 28.71, "learning_rate": 3.5649465079659135e-05, "loss": 2.1237, "step": 9919000 }, { "epoch": 28.71, "learning_rate": 3.564874143201186e-05, "loss": 2.1236, "step": 9919500 }, { "epoch": 28.71, "learning_rate": 3.564801778436458e-05, "loss": 2.1139, "step": 9920000 }, { "epoch": 28.72, "learning_rate": 3.5647295584012595e-05, "loss": 2.123, "step": 9920500 }, { "epoch": 28.72, "learning_rate": 3.564657338366062e-05, "loss": 2.1333, "step": 9921000 }, { "epoch": 28.72, "learning_rate": 3.564584973601334e-05, "loss": 2.1068, "step": 9921500 }, { "epoch": 28.72, "learning_rate": 3.564512608836607e-05, "loss": 2.1423, "step": 9922000 }, { "epoch": 28.72, "learning_rate": 3.564440244071879e-05, "loss": 2.1118, "step": 9922500 }, { "epoch": 28.72, "learning_rate": 3.564367879307151e-05, "loss": 2.1491, "step": 9923000 }, { "epoch": 28.72, "learning_rate": 3.5642955145424235e-05, "loss": 2.1252, "step": 9923500 }, { "epoch": 28.73, "learning_rate": 3.564223149777696e-05, "loss": 2.1035, "step": 9924000 }, { "epoch": 28.73, "learning_rate": 3.564150785012968e-05, "loss": 2.1048, "step": 9924500 }, { "epoch": 28.73, "learning_rate": 3.56407842024824e-05, "loss": 2.1005, "step": 9925000 }, { "epoch": 28.73, "learning_rate": 3.5640060554835124e-05, "loss": 2.1277, "step": 9925500 }, { "epoch": 28.73, "learning_rate": 3.5639336907187846e-05, "loss": 2.1255, "step": 9926000 }, { "epoch": 28.73, "learning_rate": 3.563861325954057e-05, "loss": 2.125, "step": 9926500 }, { "epoch": 28.73, "learning_rate": 3.563788961189329e-05, "loss": 2.1176, "step": 9927000 }, { "epoch": 28.74, "learning_rate": 3.563716596424602e-05, "loss": 2.1107, "step": 9927500 }, { "epoch": 28.74, "learning_rate": 3.563644231659874e-05, "loss": 2.1106, "step": 9928000 }, { "epoch": 28.74, "learning_rate": 3.5635720116246764e-05, "loss": 2.1156, "step": 9928500 }, { "epoch": 28.74, "learning_rate": 3.5634996468599486e-05, "loss": 2.1397, "step": 9929000 }, { "epoch": 28.74, "learning_rate": 3.563427282095221e-05, "loss": 2.1293, "step": 9929500 }, { "epoch": 28.74, "learning_rate": 3.563354917330493e-05, "loss": 2.1244, "step": 9930000 }, { "epoch": 28.74, "learning_rate": 3.5632826972952946e-05, "loss": 2.1279, "step": 9930500 }, { "epoch": 28.75, "learning_rate": 3.563210477260097e-05, "loss": 2.109, "step": 9931000 }, { "epoch": 28.75, "learning_rate": 3.563138112495369e-05, "loss": 2.1182, "step": 9931500 }, { "epoch": 28.75, "learning_rate": 3.563065747730641e-05, "loss": 2.1414, "step": 9932000 }, { "epoch": 28.75, "learning_rate": 3.5629933829659135e-05, "loss": 2.1029, "step": 9932500 }, { "epoch": 28.75, "learning_rate": 3.562921018201186e-05, "loss": 2.1087, "step": 9933000 }, { "epoch": 28.75, "learning_rate": 3.562848653436458e-05, "loss": 2.1121, "step": 9933500 }, { "epoch": 28.75, "learning_rate": 3.56277628867173e-05, "loss": 2.1282, "step": 9934000 }, { "epoch": 28.76, "learning_rate": 3.5627039239070024e-05, "loss": 2.1274, "step": 9934500 }, { "epoch": 28.76, "learning_rate": 3.5626315591422747e-05, "loss": 2.1175, "step": 9935000 }, { "epoch": 28.76, "learning_rate": 3.562559194377547e-05, "loss": 2.1191, "step": 9935500 }, { "epoch": 28.76, "learning_rate": 3.56248682961282e-05, "loss": 2.0979, "step": 9936000 }, { "epoch": 28.76, "learning_rate": 3.562414609577622e-05, "loss": 2.1195, "step": 9936500 }, { "epoch": 28.76, "learning_rate": 3.562342244812894e-05, "loss": 2.1039, "step": 9937000 }, { "epoch": 28.76, "learning_rate": 3.5622698800481665e-05, "loss": 2.1042, "step": 9937500 }, { "epoch": 28.77, "learning_rate": 3.562197515283439e-05, "loss": 2.1364, "step": 9938000 }, { "epoch": 28.77, "learning_rate": 3.562125150518711e-05, "loss": 2.1089, "step": 9938500 }, { "epoch": 28.77, "learning_rate": 3.562052785753983e-05, "loss": 2.1136, "step": 9939000 }, { "epoch": 28.77, "learning_rate": 3.5619804209892554e-05, "loss": 2.1247, "step": 9939500 }, { "epoch": 28.77, "learning_rate": 3.5619080562245276e-05, "loss": 2.1139, "step": 9940000 }, { "epoch": 28.77, "learning_rate": 3.5618356914598e-05, "loss": 2.1322, "step": 9940500 }, { "epoch": 28.78, "learning_rate": 3.561763326695072e-05, "loss": 2.116, "step": 9941000 }, { "epoch": 28.78, "learning_rate": 3.561690961930344e-05, "loss": 2.1115, "step": 9941500 }, { "epoch": 28.78, "learning_rate": 3.561618597165617e-05, "loss": 2.1234, "step": 9942000 }, { "epoch": 28.78, "learning_rate": 3.5615462324008894e-05, "loss": 2.1311, "step": 9942500 }, { "epoch": 28.78, "learning_rate": 3.561474012365691e-05, "loss": 2.1029, "step": 9943000 }, { "epoch": 28.78, "learning_rate": 3.561401792330493e-05, "loss": 2.1162, "step": 9943500 }, { "epoch": 28.78, "learning_rate": 3.5613294275657654e-05, "loss": 2.1395, "step": 9944000 }, { "epoch": 28.79, "learning_rate": 3.5612570628010376e-05, "loss": 2.1428, "step": 9944500 }, { "epoch": 28.79, "learning_rate": 3.56118469803631e-05, "loss": 2.1258, "step": 9945000 }, { "epoch": 28.79, "learning_rate": 3.561112333271582e-05, "loss": 2.1098, "step": 9945500 }, { "epoch": 28.79, "learning_rate": 3.561039968506854e-05, "loss": 2.1309, "step": 9946000 }, { "epoch": 28.79, "learning_rate": 3.560967603742127e-05, "loss": 2.1278, "step": 9946500 }, { "epoch": 28.79, "learning_rate": 3.560895383706929e-05, "loss": 2.116, "step": 9947000 }, { "epoch": 28.79, "learning_rate": 3.560823018942201e-05, "loss": 2.1039, "step": 9947500 }, { "epoch": 28.8, "learning_rate": 3.5607507989070025e-05, "loss": 2.1296, "step": 9948000 }, { "epoch": 28.8, "learning_rate": 3.560678434142275e-05, "loss": 2.1257, "step": 9948500 }, { "epoch": 28.8, "learning_rate": 3.560606069377547e-05, "loss": 2.1199, "step": 9949000 }, { "epoch": 28.8, "learning_rate": 3.56053370461282e-05, "loss": 2.1199, "step": 9949500 }, { "epoch": 28.8, "learning_rate": 3.560461339848092e-05, "loss": 2.1225, "step": 9950000 }, { "epoch": 28.8, "learning_rate": 3.560388975083364e-05, "loss": 2.1296, "step": 9950500 }, { "epoch": 28.8, "learning_rate": 3.5603167550481665e-05, "loss": 2.1256, "step": 9951000 }, { "epoch": 28.81, "learning_rate": 3.560244535012968e-05, "loss": 2.1484, "step": 9951500 }, { "epoch": 28.81, "learning_rate": 3.56017217024824e-05, "loss": 2.1169, "step": 9952000 }, { "epoch": 28.81, "learning_rate": 3.5600998054835125e-05, "loss": 2.1363, "step": 9952500 }, { "epoch": 28.81, "learning_rate": 3.560027440718785e-05, "loss": 2.1287, "step": 9953000 }, { "epoch": 28.81, "learning_rate": 3.559955075954057e-05, "loss": 2.1115, "step": 9953500 }, { "epoch": 28.81, "learning_rate": 3.55988271118933e-05, "loss": 2.1599, "step": 9954000 }, { "epoch": 28.81, "learning_rate": 3.559810346424602e-05, "loss": 2.1142, "step": 9954500 }, { "epoch": 28.82, "learning_rate": 3.5597379816598743e-05, "loss": 2.1238, "step": 9955000 }, { "epoch": 28.82, "learning_rate": 3.5596656168951466e-05, "loss": 2.1118, "step": 9955500 }, { "epoch": 28.82, "learning_rate": 3.559593252130419e-05, "loss": 2.1253, "step": 9956000 }, { "epoch": 28.82, "learning_rate": 3.5595210320952203e-05, "loss": 2.1195, "step": 9956500 }, { "epoch": 28.82, "learning_rate": 3.5594486673304926e-05, "loss": 2.1301, "step": 9957000 }, { "epoch": 28.82, "learning_rate": 3.559376302565765e-05, "loss": 2.1329, "step": 9957500 }, { "epoch": 28.82, "learning_rate": 3.559303937801037e-05, "loss": 2.1453, "step": 9958000 }, { "epoch": 28.83, "learning_rate": 3.55923157303631e-05, "loss": 2.1479, "step": 9958500 }, { "epoch": 28.83, "learning_rate": 3.559159208271582e-05, "loss": 2.1278, "step": 9959000 }, { "epoch": 28.83, "learning_rate": 3.559086843506855e-05, "loss": 2.119, "step": 9959500 }, { "epoch": 28.83, "learning_rate": 3.559014478742127e-05, "loss": 2.1224, "step": 9960000 }, { "epoch": 28.83, "learning_rate": 3.5589421139773995e-05, "loss": 2.137, "step": 9960500 }, { "epoch": 28.83, "learning_rate": 3.558869893942201e-05, "loss": 2.1381, "step": 9961000 }, { "epoch": 28.83, "learning_rate": 3.558797529177473e-05, "loss": 2.1382, "step": 9961500 }, { "epoch": 28.84, "learning_rate": 3.5587251644127455e-05, "loss": 2.1392, "step": 9962000 }, { "epoch": 28.84, "learning_rate": 3.558652799648018e-05, "loss": 2.1117, "step": 9962500 }, { "epoch": 28.84, "learning_rate": 3.55858043488329e-05, "loss": 2.1267, "step": 9963000 }, { "epoch": 28.84, "learning_rate": 3.558508070118562e-05, "loss": 2.0989, "step": 9963500 }, { "epoch": 28.84, "learning_rate": 3.5584358500833644e-05, "loss": 2.1455, "step": 9964000 }, { "epoch": 28.84, "learning_rate": 3.5583634853186366e-05, "loss": 2.1252, "step": 9964500 }, { "epoch": 28.84, "learning_rate": 3.558291265283438e-05, "loss": 2.1478, "step": 9965000 }, { "epoch": 28.85, "learning_rate": 3.5582189005187104e-05, "loss": 2.1452, "step": 9965500 }, { "epoch": 28.85, "learning_rate": 3.558146535753983e-05, "loss": 2.1156, "step": 9966000 }, { "epoch": 28.85, "learning_rate": 3.5580741709892555e-05, "loss": 2.1415, "step": 9966500 }, { "epoch": 28.85, "learning_rate": 3.558001806224528e-05, "loss": 2.1377, "step": 9967000 }, { "epoch": 28.85, "learning_rate": 3.5579294414598e-05, "loss": 2.1316, "step": 9967500 }, { "epoch": 28.85, "learning_rate": 3.557857076695072e-05, "loss": 2.1107, "step": 9968000 }, { "epoch": 28.85, "learning_rate": 3.557784711930345e-05, "loss": 2.1122, "step": 9968500 }, { "epoch": 28.86, "learning_rate": 3.557712347165617e-05, "loss": 2.1062, "step": 9969000 }, { "epoch": 28.86, "learning_rate": 3.5576399824008895e-05, "loss": 2.1113, "step": 9969500 }, { "epoch": 28.86, "learning_rate": 3.557567762365691e-05, "loss": 2.1405, "step": 9970000 }, { "epoch": 28.86, "learning_rate": 3.557495397600963e-05, "loss": 2.1206, "step": 9970500 }, { "epoch": 28.86, "learning_rate": 3.557423177565765e-05, "loss": 2.1295, "step": 9971000 }, { "epoch": 28.86, "learning_rate": 3.557350812801038e-05, "loss": 2.1024, "step": 9971500 }, { "epoch": 28.86, "learning_rate": 3.55727844803631e-05, "loss": 2.1213, "step": 9972000 }, { "epoch": 28.87, "learning_rate": 3.557206083271582e-05, "loss": 2.1113, "step": 9972500 }, { "epoch": 28.87, "learning_rate": 3.5571337185068544e-05, "loss": 2.1388, "step": 9973000 }, { "epoch": 28.87, "learning_rate": 3.5570613537421273e-05, "loss": 2.1143, "step": 9973500 }, { "epoch": 28.87, "learning_rate": 3.5569889889773996e-05, "loss": 2.1291, "step": 9974000 }, { "epoch": 28.87, "learning_rate": 3.556916624212672e-05, "loss": 2.119, "step": 9974500 }, { "epoch": 28.87, "learning_rate": 3.556844259447944e-05, "loss": 2.1275, "step": 9975000 }, { "epoch": 28.87, "learning_rate": 3.556771894683216e-05, "loss": 2.1332, "step": 9975500 }, { "epoch": 28.88, "learning_rate": 3.5566995299184885e-05, "loss": 2.1147, "step": 9976000 }, { "epoch": 28.88, "learning_rate": 3.556627165153761e-05, "loss": 2.149, "step": 9976500 }, { "epoch": 28.88, "learning_rate": 3.556554945118563e-05, "loss": 2.1211, "step": 9977000 }, { "epoch": 28.88, "learning_rate": 3.556482580353835e-05, "loss": 2.1241, "step": 9977500 }, { "epoch": 28.88, "learning_rate": 3.5564102155891074e-05, "loss": 2.1157, "step": 9978000 }, { "epoch": 28.88, "learning_rate": 3.5563378508243796e-05, "loss": 2.1271, "step": 9978500 }, { "epoch": 28.89, "learning_rate": 3.556265630789181e-05, "loss": 2.144, "step": 9979000 }, { "epoch": 28.89, "learning_rate": 3.556193555483513e-05, "loss": 2.1351, "step": 9979500 }, { "epoch": 28.89, "learning_rate": 3.556121190718785e-05, "loss": 2.1219, "step": 9980000 }, { "epoch": 28.89, "learning_rate": 3.5560489706835865e-05, "loss": 2.1085, "step": 9980500 }, { "epoch": 28.89, "learning_rate": 3.5559766059188594e-05, "loss": 2.1248, "step": 9981000 }, { "epoch": 28.89, "learning_rate": 3.5559042411541316e-05, "loss": 2.1212, "step": 9981500 }, { "epoch": 28.89, "learning_rate": 3.555831876389404e-05, "loss": 2.1092, "step": 9982000 }, { "epoch": 28.9, "learning_rate": 3.555759511624676e-05, "loss": 2.1262, "step": 9982500 }, { "epoch": 28.9, "learning_rate": 3.555687146859948e-05, "loss": 2.1341, "step": 9983000 }, { "epoch": 28.9, "learning_rate": 3.5556147820952205e-05, "loss": 2.1081, "step": 9983500 }, { "epoch": 28.9, "learning_rate": 3.555542417330493e-05, "loss": 2.1099, "step": 9984000 }, { "epoch": 28.9, "learning_rate": 3.555470052565765e-05, "loss": 2.1126, "step": 9984500 }, { "epoch": 28.9, "learning_rate": 3.555397832530567e-05, "loss": 2.1272, "step": 9985000 }, { "epoch": 28.9, "learning_rate": 3.5553254677658394e-05, "loss": 2.1062, "step": 9985500 }, { "epoch": 28.91, "learning_rate": 3.5552531030011116e-05, "loss": 2.118, "step": 9986000 }, { "epoch": 28.91, "learning_rate": 3.555180738236384e-05, "loss": 2.1222, "step": 9986500 }, { "epoch": 28.91, "learning_rate": 3.555108373471656e-05, "loss": 2.1528, "step": 9987000 }, { "epoch": 28.91, "learning_rate": 3.555036008706928e-05, "loss": 2.1166, "step": 9987500 }, { "epoch": 28.91, "learning_rate": 3.5549636439422005e-05, "loss": 2.0957, "step": 9988000 }, { "epoch": 28.91, "learning_rate": 3.5548912791774734e-05, "loss": 2.1213, "step": 9988500 }, { "epoch": 28.91, "learning_rate": 3.554819059142275e-05, "loss": 2.1021, "step": 9989000 }, { "epoch": 28.92, "learning_rate": 3.554746694377548e-05, "loss": 2.1138, "step": 9989500 }, { "epoch": 28.92, "learning_rate": 3.55467432961282e-05, "loss": 2.1155, "step": 9990000 }, { "epoch": 28.92, "learning_rate": 3.554601964848092e-05, "loss": 2.1273, "step": 9990500 }, { "epoch": 28.92, "learning_rate": 3.5545296000833645e-05, "loss": 2.1018, "step": 9991000 }, { "epoch": 28.92, "learning_rate": 3.554457235318637e-05, "loss": 2.1229, "step": 9991500 }, { "epoch": 28.92, "learning_rate": 3.554384870553909e-05, "loss": 2.1173, "step": 9992000 }, { "epoch": 28.92, "learning_rate": 3.554312505789181e-05, "loss": 2.1118, "step": 9992500 }, { "epoch": 28.93, "learning_rate": 3.5542401410244534e-05, "loss": 2.1104, "step": 9993000 }, { "epoch": 28.93, "learning_rate": 3.554167920989255e-05, "loss": 2.1203, "step": 9993500 }, { "epoch": 28.93, "learning_rate": 3.554095700954057e-05, "loss": 2.1198, "step": 9994000 }, { "epoch": 28.93, "learning_rate": 3.5540233361893295e-05, "loss": 2.12, "step": 9994500 }, { "epoch": 28.93, "learning_rate": 3.5539512608836603e-05, "loss": 2.1148, "step": 9995000 }, { "epoch": 28.93, "learning_rate": 3.5538788961189326e-05, "loss": 2.1509, "step": 9995500 }, { "epoch": 28.93, "learning_rate": 3.5538065313542055e-05, "loss": 2.1294, "step": 9996000 }, { "epoch": 28.94, "learning_rate": 3.553734166589478e-05, "loss": 2.1334, "step": 9996500 }, { "epoch": 28.94, "learning_rate": 3.5536618018247506e-05, "loss": 2.1393, "step": 9997000 }, { "epoch": 28.94, "learning_rate": 3.553589437060023e-05, "loss": 2.1287, "step": 9997500 }, { "epoch": 28.94, "learning_rate": 3.553517072295295e-05, "loss": 2.1171, "step": 9998000 }, { "epoch": 28.94, "learning_rate": 3.553444707530567e-05, "loss": 2.1068, "step": 9998500 }, { "epoch": 28.94, "learning_rate": 3.5533723427658395e-05, "loss": 2.1243, "step": 9999000 }, { "epoch": 28.94, "learning_rate": 3.553299978001112e-05, "loss": 2.1243, "step": 9999500 }, { "epoch": 28.95, "learning_rate": 3.553227613236384e-05, "loss": 2.1165, "step": 10000000 }, { "epoch": 28.95, "learning_rate": 3.553155248471656e-05, "loss": 2.1262, "step": 10000500 }, { "epoch": 28.95, "learning_rate": 3.5530828837069284e-05, "loss": 2.1372, "step": 10001000 }, { "epoch": 28.95, "learning_rate": 3.5530105189422006e-05, "loss": 2.113, "step": 10001500 }, { "epoch": 28.95, "learning_rate": 3.552938154177473e-05, "loss": 2.1161, "step": 10002000 }, { "epoch": 28.95, "learning_rate": 3.552865789412746e-05, "loss": 2.117, "step": 10002500 }, { "epoch": 28.95, "learning_rate": 3.552793424648018e-05, "loss": 2.1401, "step": 10003000 }, { "epoch": 28.96, "learning_rate": 3.55272120461282e-05, "loss": 2.1165, "step": 10003500 }, { "epoch": 28.96, "learning_rate": 3.5526488398480924e-05, "loss": 2.1269, "step": 10004000 }, { "epoch": 28.96, "learning_rate": 3.5525764750833646e-05, "loss": 2.1217, "step": 10004500 }, { "epoch": 28.96, "learning_rate": 3.552504110318637e-05, "loss": 2.1015, "step": 10005000 }, { "epoch": 28.96, "learning_rate": 3.5524318902834384e-05, "loss": 2.1069, "step": 10005500 }, { "epoch": 28.96, "learning_rate": 3.5523595255187106e-05, "loss": 2.1272, "step": 10006000 }, { "epoch": 28.96, "learning_rate": 3.552287160753983e-05, "loss": 2.1312, "step": 10006500 }, { "epoch": 28.97, "learning_rate": 3.552214795989256e-05, "loss": 2.123, "step": 10007000 }, { "epoch": 28.97, "learning_rate": 3.552142431224528e-05, "loss": 2.1673, "step": 10007500 }, { "epoch": 28.97, "learning_rate": 3.5520702111893295e-05, "loss": 2.1334, "step": 10008000 }, { "epoch": 28.97, "learning_rate": 3.551997846424602e-05, "loss": 2.1039, "step": 10008500 }, { "epoch": 28.97, "learning_rate": 3.551925481659874e-05, "loss": 2.134, "step": 10009000 }, { "epoch": 28.97, "learning_rate": 3.551853116895146e-05, "loss": 2.1271, "step": 10009500 }, { "epoch": 28.97, "learning_rate": 3.5517807521304184e-05, "loss": 2.1175, "step": 10010000 }, { "epoch": 28.98, "learning_rate": 3.5517085320952207e-05, "loss": 2.123, "step": 10010500 }, { "epoch": 28.98, "learning_rate": 3.551636167330493e-05, "loss": 2.1119, "step": 10011000 }, { "epoch": 28.98, "learning_rate": 3.551563802565766e-05, "loss": 2.1212, "step": 10011500 }, { "epoch": 28.98, "learning_rate": 3.551491437801038e-05, "loss": 2.1231, "step": 10012000 }, { "epoch": 28.98, "learning_rate": 3.55141907303631e-05, "loss": 2.1192, "step": 10012500 }, { "epoch": 28.98, "learning_rate": 3.5513467082715825e-05, "loss": 2.1198, "step": 10013000 }, { "epoch": 28.98, "learning_rate": 3.551274343506855e-05, "loss": 2.104, "step": 10013500 }, { "epoch": 28.99, "learning_rate": 3.551201978742127e-05, "loss": 2.1169, "step": 10014000 }, { "epoch": 28.99, "learning_rate": 3.551129613977399e-05, "loss": 2.1155, "step": 10014500 }, { "epoch": 28.99, "learning_rate": 3.551057393942201e-05, "loss": 2.1271, "step": 10015000 }, { "epoch": 28.99, "learning_rate": 3.550985173907003e-05, "loss": 2.1298, "step": 10015500 }, { "epoch": 28.99, "learning_rate": 3.5509129538718045e-05, "loss": 2.1075, "step": 10016000 }, { "epoch": 28.99, "learning_rate": 3.550840589107077e-05, "loss": 2.1213, "step": 10016500 }, { "epoch": 29.0, "learning_rate": 3.550768369071878e-05, "loss": 2.1133, "step": 10017000 }, { "epoch": 29.0, "learning_rate": 3.5506960043071505e-05, "loss": 2.1327, "step": 10017500 }, { "epoch": 29.0, "learning_rate": 3.5506236395424234e-05, "loss": 2.1346, "step": 10018000 }, { "epoch": 29.0, "learning_rate": 3.5505512747776956e-05, "loss": 2.1271, "step": 10018500 }, { "epoch": 29.0, "eval_accuracy": 0.6662792394368734, "eval_accuracy_mlm": 0.6307335576560936, "eval_accuracy_nsp": 0.856817129788765, "eval_loss": 2.1900088787078857, "eval_runtime": 331.3704, "eval_samples_per_second": 1316.913, "eval_steps_per_second": 54.872, "step": 10018688 }, { "epoch": 29.0, "learning_rate": 3.5504789100129685e-05, "loss": 2.0982, "step": 10019000 }, { "epoch": 29.0, "learning_rate": 3.550406545248241e-05, "loss": 2.1009, "step": 10019500 }, { "epoch": 29.0, "learning_rate": 3.550334180483513e-05, "loss": 2.1048, "step": 10020000 }, { "epoch": 29.01, "learning_rate": 3.550261815718785e-05, "loss": 2.1176, "step": 10020500 }, { "epoch": 29.01, "learning_rate": 3.5501894509540574e-05, "loss": 2.0853, "step": 10021000 }, { "epoch": 29.01, "learning_rate": 3.5501170861893296e-05, "loss": 2.0882, "step": 10021500 }, { "epoch": 29.01, "learning_rate": 3.550044866154131e-05, "loss": 2.0952, "step": 10022000 }, { "epoch": 29.01, "learning_rate": 3.5499726461189334e-05, "loss": 2.1082, "step": 10022500 }, { "epoch": 29.01, "learning_rate": 3.5499002813542056e-05, "loss": 2.0918, "step": 10023000 }, { "epoch": 29.01, "learning_rate": 3.549827916589478e-05, "loss": 2.0879, "step": 10023500 }, { "epoch": 29.02, "learning_rate": 3.54975555182475e-05, "loss": 2.1145, "step": 10024000 }, { "epoch": 29.02, "learning_rate": 3.5496833317895516e-05, "loss": 2.1052, "step": 10024500 }, { "epoch": 29.02, "learning_rate": 3.549610967024824e-05, "loss": 2.1257, "step": 10025000 }, { "epoch": 29.02, "learning_rate": 3.549538602260096e-05, "loss": 2.092, "step": 10025500 }, { "epoch": 29.02, "learning_rate": 3.549466237495368e-05, "loss": 2.1166, "step": 10026000 }, { "epoch": 29.02, "learning_rate": 3.549393872730641e-05, "loss": 2.0745, "step": 10026500 }, { "epoch": 29.02, "learning_rate": 3.5493216526954434e-05, "loss": 2.1014, "step": 10027000 }, { "epoch": 29.03, "learning_rate": 3.5492492879307157e-05, "loss": 2.0886, "step": 10027500 }, { "epoch": 29.03, "learning_rate": 3.549176923165988e-05, "loss": 2.0981, "step": 10028000 }, { "epoch": 29.03, "learning_rate": 3.54910455840126e-05, "loss": 2.082, "step": 10028500 }, { "epoch": 29.03, "learning_rate": 3.549032193636532e-05, "loss": 2.1012, "step": 10029000 }, { "epoch": 29.03, "learning_rate": 3.5489598288718045e-05, "loss": 2.0996, "step": 10029500 }, { "epoch": 29.03, "learning_rate": 3.548887464107077e-05, "loss": 2.0904, "step": 10030000 }, { "epoch": 29.03, "learning_rate": 3.548815099342349e-05, "loss": 2.101, "step": 10030500 }, { "epoch": 29.04, "learning_rate": 3.5487428793071506e-05, "loss": 2.1211, "step": 10031000 }, { "epoch": 29.04, "learning_rate": 3.5486705145424235e-05, "loss": 2.1169, "step": 10031500 }, { "epoch": 29.04, "learning_rate": 3.548598439236754e-05, "loss": 2.0889, "step": 10032000 }, { "epoch": 29.04, "learning_rate": 3.5485260744720266e-05, "loss": 2.1029, "step": 10032500 }, { "epoch": 29.04, "learning_rate": 3.548453709707299e-05, "loss": 2.098, "step": 10033000 }, { "epoch": 29.04, "learning_rate": 3.548381344942571e-05, "loss": 2.0799, "step": 10033500 }, { "epoch": 29.04, "learning_rate": 3.548308980177844e-05, "loss": 2.0904, "step": 10034000 }, { "epoch": 29.05, "learning_rate": 3.548236760142646e-05, "loss": 2.0925, "step": 10034500 }, { "epoch": 29.05, "learning_rate": 3.5481643953779184e-05, "loss": 2.1226, "step": 10035000 }, { "epoch": 29.05, "learning_rate": 3.5480920306131906e-05, "loss": 2.1151, "step": 10035500 }, { "epoch": 29.05, "learning_rate": 3.548019665848463e-05, "loss": 2.0998, "step": 10036000 }, { "epoch": 29.05, "learning_rate": 3.547947301083735e-05, "loss": 2.12, "step": 10036500 }, { "epoch": 29.05, "learning_rate": 3.547874936319007e-05, "loss": 2.1126, "step": 10037000 }, { "epoch": 29.05, "learning_rate": 3.5478025715542795e-05, "loss": 2.084, "step": 10037500 }, { "epoch": 29.06, "learning_rate": 3.547730206789552e-05, "loss": 2.1149, "step": 10038000 }, { "epoch": 29.06, "learning_rate": 3.547657842024824e-05, "loss": 2.104, "step": 10038500 }, { "epoch": 29.06, "learning_rate": 3.547585477260096e-05, "loss": 2.104, "step": 10039000 }, { "epoch": 29.06, "learning_rate": 3.5475131124953684e-05, "loss": 2.0888, "step": 10039500 }, { "epoch": 29.06, "learning_rate": 3.547440747730641e-05, "loss": 2.1084, "step": 10040000 }, { "epoch": 29.06, "learning_rate": 3.5473683829659135e-05, "loss": 2.0827, "step": 10040500 }, { "epoch": 29.06, "learning_rate": 3.547296162930715e-05, "loss": 2.1294, "step": 10041000 }, { "epoch": 29.07, "learning_rate": 3.547223798165988e-05, "loss": 2.1124, "step": 10041500 }, { "epoch": 29.07, "learning_rate": 3.54715143340126e-05, "loss": 2.0882, "step": 10042000 }, { "epoch": 29.07, "learning_rate": 3.5470790686365324e-05, "loss": 2.0722, "step": 10042500 }, { "epoch": 29.07, "learning_rate": 3.547006848601334e-05, "loss": 2.1021, "step": 10043000 }, { "epoch": 29.07, "learning_rate": 3.546934483836606e-05, "loss": 2.0901, "step": 10043500 }, { "epoch": 29.07, "learning_rate": 3.5468621190718784e-05, "loss": 2.1086, "step": 10044000 }, { "epoch": 29.07, "learning_rate": 3.546789754307151e-05, "loss": 2.1175, "step": 10044500 }, { "epoch": 29.08, "learning_rate": 3.546717534271953e-05, "loss": 2.108, "step": 10045000 }, { "epoch": 29.08, "learning_rate": 3.546645169507225e-05, "loss": 2.0909, "step": 10045500 }, { "epoch": 29.08, "learning_rate": 3.5465729494720266e-05, "loss": 2.0755, "step": 10046000 }, { "epoch": 29.08, "learning_rate": 3.546500584707299e-05, "loss": 2.0929, "step": 10046500 }, { "epoch": 29.08, "learning_rate": 3.546428219942571e-05, "loss": 2.1164, "step": 10047000 }, { "epoch": 29.08, "learning_rate": 3.546355855177843e-05, "loss": 2.1038, "step": 10047500 }, { "epoch": 29.08, "learning_rate": 3.546283490413116e-05, "loss": 2.0862, "step": 10048000 }, { "epoch": 29.09, "learning_rate": 3.5462111256483884e-05, "loss": 2.1005, "step": 10048500 }, { "epoch": 29.09, "learning_rate": 3.546138760883661e-05, "loss": 2.0959, "step": 10049000 }, { "epoch": 29.09, "learning_rate": 3.5460663961189336e-05, "loss": 2.1033, "step": 10049500 }, { "epoch": 29.09, "learning_rate": 3.545994031354206e-05, "loss": 2.1339, "step": 10050000 }, { "epoch": 29.09, "learning_rate": 3.545921666589478e-05, "loss": 2.0974, "step": 10050500 }, { "epoch": 29.09, "learning_rate": 3.54584930182475e-05, "loss": 2.1015, "step": 10051000 }, { "epoch": 29.09, "learning_rate": 3.5457769370600225e-05, "loss": 2.1039, "step": 10051500 }, { "epoch": 29.1, "learning_rate": 3.545704572295295e-05, "loss": 2.0994, "step": 10052000 }, { "epoch": 29.1, "learning_rate": 3.545632352260096e-05, "loss": 2.0851, "step": 10052500 }, { "epoch": 29.1, "learning_rate": 3.545560276954428e-05, "loss": 2.0928, "step": 10053000 }, { "epoch": 29.1, "learning_rate": 3.5454879121897e-05, "loss": 2.094, "step": 10053500 }, { "epoch": 29.1, "learning_rate": 3.5454156921545016e-05, "loss": 2.1112, "step": 10054000 }, { "epoch": 29.1, "learning_rate": 3.545343327389774e-05, "loss": 2.0995, "step": 10054500 }, { "epoch": 29.11, "learning_rate": 3.545270962625046e-05, "loss": 2.0966, "step": 10055000 }, { "epoch": 29.11, "learning_rate": 3.545198597860319e-05, "loss": 2.0848, "step": 10055500 }, { "epoch": 29.11, "learning_rate": 3.545126233095591e-05, "loss": 2.0994, "step": 10056000 }, { "epoch": 29.11, "learning_rate": 3.545053868330864e-05, "loss": 2.1201, "step": 10056500 }, { "epoch": 29.11, "learning_rate": 3.544981503566136e-05, "loss": 2.1009, "step": 10057000 }, { "epoch": 29.11, "learning_rate": 3.544909283530938e-05, "loss": 2.0938, "step": 10057500 }, { "epoch": 29.11, "learning_rate": 3.54483691876621e-05, "loss": 2.1009, "step": 10058000 }, { "epoch": 29.12, "learning_rate": 3.544764554001482e-05, "loss": 2.1127, "step": 10058500 }, { "epoch": 29.12, "learning_rate": 3.5446921892367545e-05, "loss": 2.1195, "step": 10059000 }, { "epoch": 29.12, "learning_rate": 3.544619824472027e-05, "loss": 2.1151, "step": 10059500 }, { "epoch": 29.12, "learning_rate": 3.544547459707299e-05, "loss": 2.1096, "step": 10060000 }, { "epoch": 29.12, "learning_rate": 3.544475094942571e-05, "loss": 2.0873, "step": 10060500 }, { "epoch": 29.12, "learning_rate": 3.5444028749073734e-05, "loss": 2.1079, "step": 10061000 }, { "epoch": 29.12, "learning_rate": 3.5443305101426456e-05, "loss": 2.0924, "step": 10061500 }, { "epoch": 29.13, "learning_rate": 3.544258145377918e-05, "loss": 2.1137, "step": 10062000 }, { "epoch": 29.13, "learning_rate": 3.54418578061319e-05, "loss": 2.0996, "step": 10062500 }, { "epoch": 29.13, "learning_rate": 3.544113415848462e-05, "loss": 2.1074, "step": 10063000 }, { "epoch": 29.13, "learning_rate": 3.5440410510837345e-05, "loss": 2.0977, "step": 10063500 }, { "epoch": 29.13, "learning_rate": 3.5439686863190074e-05, "loss": 2.097, "step": 10064000 }, { "epoch": 29.13, "learning_rate": 3.5438963215542796e-05, "loss": 2.1042, "step": 10064500 }, { "epoch": 29.13, "learning_rate": 3.543823956789552e-05, "loss": 2.0965, "step": 10065000 }, { "epoch": 29.14, "learning_rate": 3.543751592024824e-05, "loss": 2.1078, "step": 10065500 }, { "epoch": 29.14, "learning_rate": 3.543679227260096e-05, "loss": 2.1071, "step": 10066000 }, { "epoch": 29.14, "learning_rate": 3.543606862495369e-05, "loss": 2.093, "step": 10066500 }, { "epoch": 29.14, "learning_rate": 3.5435344977306414e-05, "loss": 2.1206, "step": 10067000 }, { "epoch": 29.14, "learning_rate": 3.5434621329659137e-05, "loss": 2.129, "step": 10067500 }, { "epoch": 29.14, "learning_rate": 3.543389912930715e-05, "loss": 2.1102, "step": 10068000 }, { "epoch": 29.14, "learning_rate": 3.5433175481659874e-05, "loss": 2.0843, "step": 10068500 }, { "epoch": 29.15, "learning_rate": 3.54324518340126e-05, "loss": 2.1039, "step": 10069000 }, { "epoch": 29.15, "learning_rate": 3.543172818636532e-05, "loss": 2.1112, "step": 10069500 }, { "epoch": 29.15, "learning_rate": 3.543100453871804e-05, "loss": 2.1044, "step": 10070000 }, { "epoch": 29.15, "learning_rate": 3.543028089107076e-05, "loss": 2.0929, "step": 10070500 }, { "epoch": 29.15, "learning_rate": 3.5429558690718786e-05, "loss": 2.1076, "step": 10071000 }, { "epoch": 29.15, "learning_rate": 3.5428835043071515e-05, "loss": 2.1004, "step": 10071500 }, { "epoch": 29.15, "learning_rate": 3.542811139542424e-05, "loss": 2.1186, "step": 10072000 }, { "epoch": 29.16, "learning_rate": 3.542738919507225e-05, "loss": 2.1283, "step": 10072500 }, { "epoch": 29.16, "learning_rate": 3.5426665547424975e-05, "loss": 2.087, "step": 10073000 }, { "epoch": 29.16, "learning_rate": 3.54259418997777e-05, "loss": 2.0945, "step": 10073500 }, { "epoch": 29.16, "learning_rate": 3.542521825213042e-05, "loss": 2.0937, "step": 10074000 }, { "epoch": 29.16, "learning_rate": 3.542449460448314e-05, "loss": 2.1217, "step": 10074500 }, { "epoch": 29.16, "learning_rate": 3.5423770956835864e-05, "loss": 2.1126, "step": 10075000 }, { "epoch": 29.16, "learning_rate": 3.542304730918859e-05, "loss": 2.1206, "step": 10075500 }, { "epoch": 29.17, "learning_rate": 3.542232510883661e-05, "loss": 2.1067, "step": 10076000 }, { "epoch": 29.17, "learning_rate": 3.542160146118933e-05, "loss": 2.1164, "step": 10076500 }, { "epoch": 29.17, "learning_rate": 3.542087781354205e-05, "loss": 2.0922, "step": 10077000 }, { "epoch": 29.17, "learning_rate": 3.5420154165894775e-05, "loss": 2.0974, "step": 10077500 }, { "epoch": 29.17, "learning_rate": 3.54194305182475e-05, "loss": 2.0901, "step": 10078000 }, { "epoch": 29.17, "learning_rate": 3.5418706870600226e-05, "loss": 2.1171, "step": 10078500 }, { "epoch": 29.17, "learning_rate": 3.541798322295295e-05, "loss": 2.1219, "step": 10079000 }, { "epoch": 29.18, "learning_rate": 3.541725957530567e-05, "loss": 2.1036, "step": 10079500 }, { "epoch": 29.18, "learning_rate": 3.541653592765839e-05, "loss": 2.0928, "step": 10080000 }, { "epoch": 29.18, "learning_rate": 3.5415812280011115e-05, "loss": 2.0988, "step": 10080500 }, { "epoch": 29.18, "learning_rate": 3.5415088632363844e-05, "loss": 2.108, "step": 10081000 }, { "epoch": 29.18, "learning_rate": 3.5414364984716566e-05, "loss": 2.0931, "step": 10081500 }, { "epoch": 29.18, "learning_rate": 3.541364133706929e-05, "loss": 2.0936, "step": 10082000 }, { "epoch": 29.18, "learning_rate": 3.5412919136717304e-05, "loss": 2.1037, "step": 10082500 }, { "epoch": 29.19, "learning_rate": 3.5412195489070026e-05, "loss": 2.1157, "step": 10083000 }, { "epoch": 29.19, "learning_rate": 3.541147184142275e-05, "loss": 2.1112, "step": 10083500 }, { "epoch": 29.19, "learning_rate": 3.541074819377547e-05, "loss": 2.1344, "step": 10084000 }, { "epoch": 29.19, "learning_rate": 3.541002599342349e-05, "loss": 2.1111, "step": 10084500 }, { "epoch": 29.19, "learning_rate": 3.540930379307151e-05, "loss": 2.0956, "step": 10085000 }, { "epoch": 29.19, "learning_rate": 3.540858014542423e-05, "loss": 2.1105, "step": 10085500 }, { "epoch": 29.19, "learning_rate": 3.540785649777695e-05, "loss": 2.0927, "step": 10086000 }, { "epoch": 29.2, "learning_rate": 3.540713285012968e-05, "loss": 2.0944, "step": 10086500 }, { "epoch": 29.2, "learning_rate": 3.5406409202482404e-05, "loss": 2.1158, "step": 10087000 }, { "epoch": 29.2, "learning_rate": 3.540568555483513e-05, "loss": 2.1223, "step": 10087500 }, { "epoch": 29.2, "learning_rate": 3.540496190718785e-05, "loss": 2.1121, "step": 10088000 }, { "epoch": 29.2, "learning_rate": 3.5404239706835864e-05, "loss": 2.0998, "step": 10088500 }, { "epoch": 29.2, "learning_rate": 3.540351750648389e-05, "loss": 2.1016, "step": 10089000 }, { "epoch": 29.2, "learning_rate": 3.540279385883661e-05, "loss": 2.1194, "step": 10089500 }, { "epoch": 29.21, "learning_rate": 3.540207021118933e-05, "loss": 2.1171, "step": 10090000 }, { "epoch": 29.21, "learning_rate": 3.5401346563542053e-05, "loss": 2.1074, "step": 10090500 }, { "epoch": 29.21, "learning_rate": 3.5400622915894776e-05, "loss": 2.1131, "step": 10091000 }, { "epoch": 29.21, "learning_rate": 3.53998992682475e-05, "loss": 2.1268, "step": 10091500 }, { "epoch": 29.21, "learning_rate": 3.539917562060022e-05, "loss": 2.098, "step": 10092000 }, { "epoch": 29.21, "learning_rate": 3.539845197295294e-05, "loss": 2.1144, "step": 10092500 }, { "epoch": 29.22, "learning_rate": 3.5397729772600965e-05, "loss": 2.1159, "step": 10093000 }, { "epoch": 29.22, "learning_rate": 3.539700757224898e-05, "loss": 2.1003, "step": 10093500 }, { "epoch": 29.22, "learning_rate": 3.539628392460171e-05, "loss": 2.1299, "step": 10094000 }, { "epoch": 29.22, "learning_rate": 3.539556027695443e-05, "loss": 2.0894, "step": 10094500 }, { "epoch": 29.22, "learning_rate": 3.5394836629307154e-05, "loss": 2.1106, "step": 10095000 }, { "epoch": 29.22, "learning_rate": 3.5394112981659876e-05, "loss": 2.11, "step": 10095500 }, { "epoch": 29.22, "learning_rate": 3.53933893340126e-05, "loss": 2.0997, "step": 10096000 }, { "epoch": 29.23, "learning_rate": 3.539266568636532e-05, "loss": 2.1033, "step": 10096500 }, { "epoch": 29.23, "learning_rate": 3.539194203871804e-05, "loss": 2.1105, "step": 10097000 }, { "epoch": 29.23, "learning_rate": 3.539121839107077e-05, "loss": 2.1027, "step": 10097500 }, { "epoch": 29.23, "learning_rate": 3.5390494743423494e-05, "loss": 2.1187, "step": 10098000 }, { "epoch": 29.23, "learning_rate": 3.5389771095776216e-05, "loss": 2.1438, "step": 10098500 }, { "epoch": 29.23, "learning_rate": 3.538904744812894e-05, "loss": 2.131, "step": 10099000 }, { "epoch": 29.23, "learning_rate": 3.538832380048166e-05, "loss": 2.1234, "step": 10099500 }, { "epoch": 29.24, "learning_rate": 3.538760015283438e-05, "loss": 2.1057, "step": 10100000 }, { "epoch": 29.24, "learning_rate": 3.53868779524824e-05, "loss": 2.1114, "step": 10100500 }, { "epoch": 29.24, "learning_rate": 3.538615430483513e-05, "loss": 2.1141, "step": 10101000 }, { "epoch": 29.24, "learning_rate": 3.538543065718785e-05, "loss": 2.116, "step": 10101500 }, { "epoch": 29.24, "learning_rate": 3.538470700954057e-05, "loss": 2.1367, "step": 10102000 }, { "epoch": 29.24, "learning_rate": 3.5383983361893294e-05, "loss": 2.0767, "step": 10102500 }, { "epoch": 29.24, "learning_rate": 3.538325971424602e-05, "loss": 2.1188, "step": 10103000 }, { "epoch": 29.25, "learning_rate": 3.538253751389404e-05, "loss": 2.1192, "step": 10103500 }, { "epoch": 29.25, "learning_rate": 3.538181386624676e-05, "loss": 2.1221, "step": 10104000 }, { "epoch": 29.25, "learning_rate": 3.538109021859948e-05, "loss": 2.1275, "step": 10104500 }, { "epoch": 29.25, "learning_rate": 3.5380366570952205e-05, "loss": 2.1072, "step": 10105000 }, { "epoch": 29.25, "learning_rate": 3.537964292330493e-05, "loss": 2.1255, "step": 10105500 }, { "epoch": 29.25, "learning_rate": 3.537891927565765e-05, "loss": 2.1018, "step": 10106000 }, { "epoch": 29.25, "learning_rate": 3.537819562801037e-05, "loss": 2.1032, "step": 10106500 }, { "epoch": 29.26, "learning_rate": 3.5377471980363094e-05, "loss": 2.1093, "step": 10107000 }, { "epoch": 29.26, "learning_rate": 3.537674833271582e-05, "loss": 2.1085, "step": 10107500 }, { "epoch": 29.26, "learning_rate": 3.5376024685068546e-05, "loss": 2.1094, "step": 10108000 }, { "epoch": 29.26, "learning_rate": 3.5375301037421275e-05, "loss": 2.0979, "step": 10108500 }, { "epoch": 29.26, "learning_rate": 3.5374577389774e-05, "loss": 2.0836, "step": 10109000 }, { "epoch": 29.26, "learning_rate": 3.537385374212672e-05, "loss": 2.0949, "step": 10109500 }, { "epoch": 29.26, "learning_rate": 3.537313009447944e-05, "loss": 2.1178, "step": 10110000 }, { "epoch": 29.27, "learning_rate": 3.5372406446832164e-05, "loss": 2.0883, "step": 10110500 }, { "epoch": 29.27, "learning_rate": 3.5371682799184886e-05, "loss": 2.1264, "step": 10111000 }, { "epoch": 29.27, "learning_rate": 3.53709605988329e-05, "loss": 2.09, "step": 10111500 }, { "epoch": 29.27, "learning_rate": 3.5370236951185624e-05, "loss": 2.0995, "step": 10112000 }, { "epoch": 29.27, "learning_rate": 3.5369513303538346e-05, "loss": 2.1285, "step": 10112500 }, { "epoch": 29.27, "learning_rate": 3.536879255048166e-05, "loss": 2.1151, "step": 10113000 }, { "epoch": 29.27, "learning_rate": 3.5368068902834384e-05, "loss": 2.0835, "step": 10113500 }, { "epoch": 29.28, "learning_rate": 3.5367345255187106e-05, "loss": 2.1137, "step": 10114000 }, { "epoch": 29.28, "learning_rate": 3.536662160753983e-05, "loss": 2.1143, "step": 10114500 }, { "epoch": 29.28, "learning_rate": 3.536589795989255e-05, "loss": 2.116, "step": 10115000 }, { "epoch": 29.28, "learning_rate": 3.536517575954057e-05, "loss": 2.1189, "step": 10115500 }, { "epoch": 29.28, "learning_rate": 3.5364453559188595e-05, "loss": 2.1276, "step": 10116000 }, { "epoch": 29.28, "learning_rate": 3.536372991154132e-05, "loss": 2.0934, "step": 10116500 }, { "epoch": 29.28, "learning_rate": 3.536300626389404e-05, "loss": 2.1313, "step": 10117000 }, { "epoch": 29.29, "learning_rate": 3.536228261624676e-05, "loss": 2.0721, "step": 10117500 }, { "epoch": 29.29, "learning_rate": 3.5361558968599484e-05, "loss": 2.0895, "step": 10118000 }, { "epoch": 29.29, "learning_rate": 3.5360835320952206e-05, "loss": 2.1166, "step": 10118500 }, { "epoch": 29.29, "learning_rate": 3.536011167330493e-05, "loss": 2.0963, "step": 10119000 }, { "epoch": 29.29, "learning_rate": 3.535938802565765e-05, "loss": 2.122, "step": 10119500 }, { "epoch": 29.29, "learning_rate": 3.535866437801037e-05, "loss": 2.1048, "step": 10120000 }, { "epoch": 29.29, "learning_rate": 3.5357940730363095e-05, "loss": 2.1212, "step": 10120500 }, { "epoch": 29.3, "learning_rate": 3.5357217082715824e-05, "loss": 2.0951, "step": 10121000 }, { "epoch": 29.3, "learning_rate": 3.5356493435068546e-05, "loss": 2.0906, "step": 10121500 }, { "epoch": 29.3, "learning_rate": 3.535576978742127e-05, "loss": 2.1043, "step": 10122000 }, { "epoch": 29.3, "learning_rate": 3.5355047587069284e-05, "loss": 2.0728, "step": 10122500 }, { "epoch": 29.3, "learning_rate": 3.53543253867173e-05, "loss": 2.0867, "step": 10123000 }, { "epoch": 29.3, "learning_rate": 3.535360173907003e-05, "loss": 2.0928, "step": 10123500 }, { "epoch": 29.3, "learning_rate": 3.535287809142275e-05, "loss": 2.1054, "step": 10124000 }, { "epoch": 29.31, "learning_rate": 3.535215444377547e-05, "loss": 2.1018, "step": 10124500 }, { "epoch": 29.31, "learning_rate": 3.535143369071879e-05, "loss": 2.1008, "step": 10125000 }, { "epoch": 29.31, "learning_rate": 3.535071004307151e-05, "loss": 2.105, "step": 10125500 }, { "epoch": 29.31, "learning_rate": 3.534998639542423e-05, "loss": 2.1305, "step": 10126000 }, { "epoch": 29.31, "learning_rate": 3.5349262747776956e-05, "loss": 2.1059, "step": 10126500 }, { "epoch": 29.31, "learning_rate": 3.534853910012968e-05, "loss": 2.1162, "step": 10127000 }, { "epoch": 29.31, "learning_rate": 3.53478154524824e-05, "loss": 2.121, "step": 10127500 }, { "epoch": 29.32, "learning_rate": 3.534709180483512e-05, "loss": 2.0983, "step": 10128000 }, { "epoch": 29.32, "learning_rate": 3.534636815718785e-05, "loss": 2.1087, "step": 10128500 }, { "epoch": 29.32, "learning_rate": 3.534564595683587e-05, "loss": 2.1009, "step": 10129000 }, { "epoch": 29.32, "learning_rate": 3.534492230918859e-05, "loss": 2.1082, "step": 10129500 }, { "epoch": 29.32, "learning_rate": 3.534419866154131e-05, "loss": 2.1222, "step": 10130000 }, { "epoch": 29.32, "learning_rate": 3.5343475013894034e-05, "loss": 2.0979, "step": 10130500 }, { "epoch": 29.33, "learning_rate": 3.534275136624676e-05, "loss": 2.093, "step": 10131000 }, { "epoch": 29.33, "learning_rate": 3.534202916589478e-05, "loss": 2.1366, "step": 10131500 }, { "epoch": 29.33, "learning_rate": 3.53413055182475e-05, "loss": 2.0985, "step": 10132000 }, { "epoch": 29.33, "learning_rate": 3.534058187060022e-05, "loss": 2.1058, "step": 10132500 }, { "epoch": 29.33, "learning_rate": 3.533985822295295e-05, "loss": 2.1194, "step": 10133000 }, { "epoch": 29.33, "learning_rate": 3.5339134575305674e-05, "loss": 2.0976, "step": 10133500 }, { "epoch": 29.33, "learning_rate": 3.5338410927658396e-05, "loss": 2.0871, "step": 10134000 }, { "epoch": 29.34, "learning_rate": 3.533768728001112e-05, "loss": 2.1024, "step": 10134500 }, { "epoch": 29.34, "learning_rate": 3.533696363236384e-05, "loss": 2.1072, "step": 10135000 }, { "epoch": 29.34, "learning_rate": 3.533623998471656e-05, "loss": 2.1013, "step": 10135500 }, { "epoch": 29.34, "learning_rate": 3.533551778436458e-05, "loss": 2.105, "step": 10136000 }, { "epoch": 29.34, "learning_rate": 3.53347941367173e-05, "loss": 2.1342, "step": 10136500 }, { "epoch": 29.34, "learning_rate": 3.533407193636532e-05, "loss": 2.1156, "step": 10137000 }, { "epoch": 29.34, "learning_rate": 3.5333348288718045e-05, "loss": 2.103, "step": 10137500 }, { "epoch": 29.35, "learning_rate": 3.533262608836606e-05, "loss": 2.1184, "step": 10138000 }, { "epoch": 29.35, "learning_rate": 3.533190244071878e-05, "loss": 2.1141, "step": 10138500 }, { "epoch": 29.35, "learning_rate": 3.533117879307151e-05, "loss": 2.1148, "step": 10139000 }, { "epoch": 29.35, "learning_rate": 3.5330455145424234e-05, "loss": 2.1212, "step": 10139500 }, { "epoch": 29.35, "learning_rate": 3.5329731497776956e-05, "loss": 2.1252, "step": 10140000 }, { "epoch": 29.35, "learning_rate": 3.532900785012968e-05, "loss": 2.1205, "step": 10140500 }, { "epoch": 29.35, "learning_rate": 3.53282842024824e-05, "loss": 2.1031, "step": 10141000 }, { "epoch": 29.36, "learning_rate": 3.532756055483512e-05, "loss": 2.109, "step": 10141500 }, { "epoch": 29.36, "learning_rate": 3.532683690718785e-05, "loss": 2.0919, "step": 10142000 }, { "epoch": 29.36, "learning_rate": 3.5326113259540574e-05, "loss": 2.1027, "step": 10142500 }, { "epoch": 29.36, "learning_rate": 3.5325389611893297e-05, "loss": 2.1076, "step": 10143000 }, { "epoch": 29.36, "learning_rate": 3.532466596424602e-05, "loss": 2.1344, "step": 10143500 }, { "epoch": 29.36, "learning_rate": 3.5323943763894034e-05, "loss": 2.1049, "step": 10144000 }, { "epoch": 29.36, "learning_rate": 3.5323220116246757e-05, "loss": 2.0946, "step": 10144500 }, { "epoch": 29.37, "learning_rate": 3.532249646859948e-05, "loss": 2.1283, "step": 10145000 }, { "epoch": 29.37, "learning_rate": 3.53217728209522e-05, "loss": 2.1056, "step": 10145500 }, { "epoch": 29.37, "learning_rate": 3.532104917330493e-05, "loss": 2.0956, "step": 10146000 }, { "epoch": 29.37, "learning_rate": 3.532032552565765e-05, "loss": 2.1071, "step": 10146500 }, { "epoch": 29.37, "learning_rate": 3.5319601878010374e-05, "loss": 2.1017, "step": 10147000 }, { "epoch": 29.37, "learning_rate": 3.5318878230363103e-05, "loss": 2.1078, "step": 10147500 }, { "epoch": 29.37, "learning_rate": 3.5318154582715826e-05, "loss": 2.0792, "step": 10148000 }, { "epoch": 29.38, "learning_rate": 3.531743093506855e-05, "loss": 2.1029, "step": 10148500 }, { "epoch": 29.38, "learning_rate": 3.531670728742127e-05, "loss": 2.1167, "step": 10149000 }, { "epoch": 29.38, "learning_rate": 3.5315985087069286e-05, "loss": 2.1294, "step": 10149500 }, { "epoch": 29.38, "learning_rate": 3.531526143942201e-05, "loss": 2.1129, "step": 10150000 }, { "epoch": 29.38, "learning_rate": 3.531453779177473e-05, "loss": 2.1165, "step": 10150500 }, { "epoch": 29.38, "learning_rate": 3.531381414412745e-05, "loss": 2.1068, "step": 10151000 }, { "epoch": 29.38, "learning_rate": 3.5313090496480175e-05, "loss": 2.1112, "step": 10151500 }, { "epoch": 29.39, "learning_rate": 3.5312366848832904e-05, "loss": 2.1092, "step": 10152000 }, { "epoch": 29.39, "learning_rate": 3.5311643201185626e-05, "loss": 2.1378, "step": 10152500 }, { "epoch": 29.39, "learning_rate": 3.5310919553538355e-05, "loss": 2.1203, "step": 10153000 }, { "epoch": 29.39, "learning_rate": 3.531019590589108e-05, "loss": 2.0985, "step": 10153500 }, { "epoch": 29.39, "learning_rate": 3.530947370553909e-05, "loss": 2.0924, "step": 10154000 }, { "epoch": 29.39, "learning_rate": 3.5308750057891815e-05, "loss": 2.099, "step": 10154500 }, { "epoch": 29.39, "learning_rate": 3.530802641024454e-05, "loss": 2.1033, "step": 10155000 }, { "epoch": 29.4, "learning_rate": 3.530730276259726e-05, "loss": 2.1017, "step": 10155500 }, { "epoch": 29.4, "learning_rate": 3.5306580562245275e-05, "loss": 2.0951, "step": 10156000 }, { "epoch": 29.4, "learning_rate": 3.5305856914598004e-05, "loss": 2.1089, "step": 10156500 }, { "epoch": 29.4, "learning_rate": 3.5305133266950726e-05, "loss": 2.1044, "step": 10157000 }, { "epoch": 29.4, "learning_rate": 3.530440961930345e-05, "loss": 2.103, "step": 10157500 }, { "epoch": 29.4, "learning_rate": 3.530368597165617e-05, "loss": 2.1001, "step": 10158000 }, { "epoch": 29.4, "learning_rate": 3.5302963771304186e-05, "loss": 2.1024, "step": 10158500 }, { "epoch": 29.41, "learning_rate": 3.53022430182475e-05, "loss": 2.1164, "step": 10159000 }, { "epoch": 29.41, "learning_rate": 3.5301519370600224e-05, "loss": 2.1045, "step": 10159500 }, { "epoch": 29.41, "learning_rate": 3.5300795722952946e-05, "loss": 2.1154, "step": 10160000 }, { "epoch": 29.41, "learning_rate": 3.530007207530567e-05, "loss": 2.0998, "step": 10160500 }, { "epoch": 29.41, "learning_rate": 3.52993484276584e-05, "loss": 2.1152, "step": 10161000 }, { "epoch": 29.41, "learning_rate": 3.529862478001112e-05, "loss": 2.0962, "step": 10161500 }, { "epoch": 29.41, "learning_rate": 3.529790113236384e-05, "loss": 2.1202, "step": 10162000 }, { "epoch": 29.42, "learning_rate": 3.5297177484716564e-05, "loss": 2.1352, "step": 10162500 }, { "epoch": 29.42, "learning_rate": 3.5296453837069287e-05, "loss": 2.1282, "step": 10163000 }, { "epoch": 29.42, "learning_rate": 3.529573018942201e-05, "loss": 2.1083, "step": 10163500 }, { "epoch": 29.42, "learning_rate": 3.529500654177473e-05, "loss": 2.1109, "step": 10164000 }, { "epoch": 29.42, "learning_rate": 3.529428289412745e-05, "loss": 2.0879, "step": 10164500 }, { "epoch": 29.42, "learning_rate": 3.529355924648018e-05, "loss": 2.0928, "step": 10165000 }, { "epoch": 29.42, "learning_rate": 3.52928370461282e-05, "loss": 2.126, "step": 10165500 }, { "epoch": 29.43, "learning_rate": 3.529211484577621e-05, "loss": 2.1132, "step": 10166000 }, { "epoch": 29.43, "learning_rate": 3.5291391198128936e-05, "loss": 2.1046, "step": 10166500 }, { "epoch": 29.43, "learning_rate": 3.529066755048166e-05, "loss": 2.1224, "step": 10167000 }, { "epoch": 29.43, "learning_rate": 3.528994390283438e-05, "loss": 2.1248, "step": 10167500 }, { "epoch": 29.43, "learning_rate": 3.52892202551871e-05, "loss": 2.0978, "step": 10168000 }, { "epoch": 29.43, "learning_rate": 3.528849805483513e-05, "loss": 2.11, "step": 10168500 }, { "epoch": 29.44, "learning_rate": 3.528777585448315e-05, "loss": 2.1371, "step": 10169000 }, { "epoch": 29.44, "learning_rate": 3.528705220683587e-05, "loss": 2.1095, "step": 10169500 }, { "epoch": 29.44, "learning_rate": 3.528632855918859e-05, "loss": 2.1371, "step": 10170000 }, { "epoch": 29.44, "learning_rate": 3.5285604911541314e-05, "loss": 2.1178, "step": 10170500 }, { "epoch": 29.44, "learning_rate": 3.5284881263894036e-05, "loss": 2.1129, "step": 10171000 }, { "epoch": 29.44, "learning_rate": 3.528415761624676e-05, "loss": 2.1197, "step": 10171500 }, { "epoch": 29.44, "learning_rate": 3.528343396859948e-05, "loss": 2.1004, "step": 10172000 }, { "epoch": 29.45, "learning_rate": 3.528271466283809e-05, "loss": 2.106, "step": 10172500 }, { "epoch": 29.45, "learning_rate": 3.528199101519081e-05, "loss": 2.1093, "step": 10173000 }, { "epoch": 29.45, "learning_rate": 3.5281267367543534e-05, "loss": 2.1193, "step": 10173500 }, { "epoch": 29.45, "learning_rate": 3.5280543719896256e-05, "loss": 2.0901, "step": 10174000 }, { "epoch": 29.45, "learning_rate": 3.527982007224898e-05, "loss": 2.1182, "step": 10174500 }, { "epoch": 29.45, "learning_rate": 3.527909642460171e-05, "loss": 2.1215, "step": 10175000 }, { "epoch": 29.45, "learning_rate": 3.527837277695443e-05, "loss": 2.0913, "step": 10175500 }, { "epoch": 29.46, "learning_rate": 3.527764912930715e-05, "loss": 2.1154, "step": 10176000 }, { "epoch": 29.46, "learning_rate": 3.527692548165988e-05, "loss": 2.0852, "step": 10176500 }, { "epoch": 29.46, "learning_rate": 3.52762018340126e-05, "loss": 2.0995, "step": 10177000 }, { "epoch": 29.46, "learning_rate": 3.5275478186365325e-05, "loss": 2.1336, "step": 10177500 }, { "epoch": 29.46, "learning_rate": 3.527475453871805e-05, "loss": 2.1288, "step": 10178000 }, { "epoch": 29.46, "learning_rate": 3.527403089107077e-05, "loss": 2.1193, "step": 10178500 }, { "epoch": 29.46, "learning_rate": 3.527330724342349e-05, "loss": 2.1213, "step": 10179000 }, { "epoch": 29.47, "learning_rate": 3.5272583595776214e-05, "loss": 2.1178, "step": 10179500 }, { "epoch": 29.47, "learning_rate": 3.5271859948128936e-05, "loss": 2.1031, "step": 10180000 }, { "epoch": 29.47, "learning_rate": 3.527113630048166e-05, "loss": 2.1057, "step": 10180500 }, { "epoch": 29.47, "learning_rate": 3.527041265283438e-05, "loss": 2.0922, "step": 10181000 }, { "epoch": 29.47, "learning_rate": 3.526968900518711e-05, "loss": 2.0927, "step": 10181500 }, { "epoch": 29.47, "learning_rate": 3.5268966804835125e-05, "loss": 2.1036, "step": 10182000 }, { "epoch": 29.47, "learning_rate": 3.526824315718785e-05, "loss": 2.1107, "step": 10182500 }, { "epoch": 29.48, "learning_rate": 3.526751950954057e-05, "loss": 2.1316, "step": 10183000 }, { "epoch": 29.48, "learning_rate": 3.52667958618933e-05, "loss": 2.1473, "step": 10183500 }, { "epoch": 29.48, "learning_rate": 3.526607221424602e-05, "loss": 2.1143, "step": 10184000 }, { "epoch": 29.48, "learning_rate": 3.526535001389404e-05, "loss": 2.1082, "step": 10184500 }, { "epoch": 29.48, "learning_rate": 3.526462636624676e-05, "loss": 2.1121, "step": 10185000 }, { "epoch": 29.48, "learning_rate": 3.526390271859948e-05, "loss": 2.0948, "step": 10185500 }, { "epoch": 29.48, "learning_rate": 3.526317907095221e-05, "loss": 2.1285, "step": 10186000 }, { "epoch": 29.49, "learning_rate": 3.526245542330493e-05, "loss": 2.1175, "step": 10186500 }, { "epoch": 29.49, "learning_rate": 3.526173322295295e-05, "loss": 2.1139, "step": 10187000 }, { "epoch": 29.49, "learning_rate": 3.526100957530567e-05, "loss": 2.0972, "step": 10187500 }, { "epoch": 29.49, "learning_rate": 3.526028592765839e-05, "loss": 2.1153, "step": 10188000 }, { "epoch": 29.49, "learning_rate": 3.5259562280011115e-05, "loss": 2.0871, "step": 10188500 }, { "epoch": 29.49, "learning_rate": 3.525883863236384e-05, "loss": 2.104, "step": 10189000 }, { "epoch": 29.49, "learning_rate": 3.525811498471656e-05, "loss": 2.1058, "step": 10189500 }, { "epoch": 29.5, "learning_rate": 3.525739133706928e-05, "loss": 2.1092, "step": 10190000 }, { "epoch": 29.5, "learning_rate": 3.5256669136717304e-05, "loss": 2.1488, "step": 10190500 }, { "epoch": 29.5, "learning_rate": 3.525594693636532e-05, "loss": 2.1031, "step": 10191000 }, { "epoch": 29.5, "learning_rate": 3.525522473601334e-05, "loss": 2.1264, "step": 10191500 }, { "epoch": 29.5, "learning_rate": 3.5254501088366064e-05, "loss": 2.1004, "step": 10192000 }, { "epoch": 29.5, "learning_rate": 3.5253777440718786e-05, "loss": 2.1073, "step": 10192500 }, { "epoch": 29.5, "learning_rate": 3.525305379307151e-05, "loss": 2.1287, "step": 10193000 }, { "epoch": 29.51, "learning_rate": 3.525233014542423e-05, "loss": 2.1277, "step": 10193500 }, { "epoch": 29.51, "learning_rate": 3.525160649777696e-05, "loss": 2.1333, "step": 10194000 }, { "epoch": 29.51, "learning_rate": 3.525088285012968e-05, "loss": 2.0879, "step": 10194500 }, { "epoch": 29.51, "learning_rate": 3.52501606497777e-05, "loss": 2.1002, "step": 10195000 }, { "epoch": 29.51, "learning_rate": 3.524943700213042e-05, "loss": 2.1175, "step": 10195500 }, { "epoch": 29.51, "learning_rate": 3.524871335448314e-05, "loss": 2.1135, "step": 10196000 }, { "epoch": 29.51, "learning_rate": 3.5247989706835864e-05, "loss": 2.1047, "step": 10196500 }, { "epoch": 29.52, "learning_rate": 3.5247266059188586e-05, "loss": 2.1317, "step": 10197000 }, { "epoch": 29.52, "learning_rate": 3.524654241154131e-05, "loss": 2.1054, "step": 10197500 }, { "epoch": 29.52, "learning_rate": 3.524581876389403e-05, "loss": 2.1159, "step": 10198000 }, { "epoch": 29.52, "learning_rate": 3.524509511624676e-05, "loss": 2.107, "step": 10198500 }, { "epoch": 29.52, "learning_rate": 3.524437146859948e-05, "loss": 2.1379, "step": 10199000 }, { "epoch": 29.52, "learning_rate": 3.524364782095221e-05, "loss": 2.1186, "step": 10199500 }, { "epoch": 29.52, "learning_rate": 3.524292417330493e-05, "loss": 2.1054, "step": 10200000 }, { "epoch": 29.53, "learning_rate": 3.5242200525657655e-05, "loss": 2.1386, "step": 10200500 }, { "epoch": 29.53, "learning_rate": 3.524147687801038e-05, "loss": 2.0921, "step": 10201000 }, { "epoch": 29.53, "learning_rate": 3.524075467765839e-05, "loss": 2.1101, "step": 10201500 }, { "epoch": 29.53, "learning_rate": 3.5240031030011115e-05, "loss": 2.1308, "step": 10202000 }, { "epoch": 29.53, "learning_rate": 3.523930882965914e-05, "loss": 2.1321, "step": 10202500 }, { "epoch": 29.53, "learning_rate": 3.523858518201186e-05, "loss": 2.102, "step": 10203000 }, { "epoch": 29.53, "learning_rate": 3.523786153436458e-05, "loss": 2.1208, "step": 10203500 }, { "epoch": 29.54, "learning_rate": 3.5237137886717304e-05, "loss": 2.1341, "step": 10204000 }, { "epoch": 29.54, "learning_rate": 3.523641423907003e-05, "loss": 2.1154, "step": 10204500 }, { "epoch": 29.54, "learning_rate": 3.523569059142275e-05, "loss": 2.1375, "step": 10205000 }, { "epoch": 29.54, "learning_rate": 3.5234968391070765e-05, "loss": 2.1052, "step": 10205500 }, { "epoch": 29.54, "learning_rate": 3.5234244743423494e-05, "loss": 2.1009, "step": 10206000 }, { "epoch": 29.54, "learning_rate": 3.5233521095776216e-05, "loss": 2.0865, "step": 10206500 }, { "epoch": 29.55, "learning_rate": 3.523279744812894e-05, "loss": 2.1158, "step": 10207000 }, { "epoch": 29.55, "learning_rate": 3.523207380048166e-05, "loss": 2.1036, "step": 10207500 }, { "epoch": 29.55, "learning_rate": 3.523135015283439e-05, "loss": 2.0814, "step": 10208000 }, { "epoch": 29.55, "learning_rate": 3.523062650518711e-05, "loss": 2.1023, "step": 10208500 }, { "epoch": 29.55, "learning_rate": 3.5229902857539834e-05, "loss": 2.1272, "step": 10209000 }, { "epoch": 29.55, "learning_rate": 3.5229179209892556e-05, "loss": 2.1229, "step": 10209500 }, { "epoch": 29.55, "learning_rate": 3.522845556224528e-05, "loss": 2.1319, "step": 10210000 }, { "epoch": 29.56, "learning_rate": 3.5227731914598e-05, "loss": 2.0992, "step": 10210500 }, { "epoch": 29.56, "learning_rate": 3.5227009714246016e-05, "loss": 2.1075, "step": 10211000 }, { "epoch": 29.56, "learning_rate": 3.522628606659874e-05, "loss": 2.1068, "step": 10211500 }, { "epoch": 29.56, "learning_rate": 3.522556241895146e-05, "loss": 2.092, "step": 10212000 }, { "epoch": 29.56, "learning_rate": 3.522483877130419e-05, "loss": 2.1199, "step": 10212500 }, { "epoch": 29.56, "learning_rate": 3.522411512365691e-05, "loss": 2.1159, "step": 10213000 }, { "epoch": 29.56, "learning_rate": 3.522339147600964e-05, "loss": 2.1518, "step": 10213500 }, { "epoch": 29.57, "learning_rate": 3.5222669275657656e-05, "loss": 2.1365, "step": 10214000 }, { "epoch": 29.57, "learning_rate": 3.522194562801038e-05, "loss": 2.1085, "step": 10214500 }, { "epoch": 29.57, "learning_rate": 3.5221223427658394e-05, "loss": 2.0976, "step": 10215000 }, { "epoch": 29.57, "learning_rate": 3.5220499780011116e-05, "loss": 2.1006, "step": 10215500 }, { "epoch": 29.57, "learning_rate": 3.521977613236384e-05, "loss": 2.1127, "step": 10216000 }, { "epoch": 29.57, "learning_rate": 3.521905248471656e-05, "loss": 2.1273, "step": 10216500 }, { "epoch": 29.57, "learning_rate": 3.521833028436458e-05, "loss": 2.1, "step": 10217000 }, { "epoch": 29.58, "learning_rate": 3.5217606636717305e-05, "loss": 2.1045, "step": 10217500 }, { "epoch": 29.58, "learning_rate": 3.521688298907003e-05, "loss": 2.1024, "step": 10218000 }, { "epoch": 29.58, "learning_rate": 3.521615934142275e-05, "loss": 2.1136, "step": 10218500 }, { "epoch": 29.58, "learning_rate": 3.521543569377547e-05, "loss": 2.0962, "step": 10219000 }, { "epoch": 29.58, "learning_rate": 3.5214712046128194e-05, "loss": 2.1329, "step": 10219500 }, { "epoch": 29.58, "learning_rate": 3.5213988398480916e-05, "loss": 2.1099, "step": 10220000 }, { "epoch": 29.58, "learning_rate": 3.521326475083364e-05, "loss": 2.0751, "step": 10220500 }, { "epoch": 29.59, "learning_rate": 3.521254110318637e-05, "loss": 2.1279, "step": 10221000 }, { "epoch": 29.59, "learning_rate": 3.521181890283439e-05, "loss": 2.1184, "step": 10221500 }, { "epoch": 29.59, "learning_rate": 3.521109525518711e-05, "loss": 2.108, "step": 10222000 }, { "epoch": 29.59, "learning_rate": 3.5210371607539834e-05, "loss": 2.1202, "step": 10222500 }, { "epoch": 29.59, "learning_rate": 3.520964795989256e-05, "loss": 2.1204, "step": 10223000 }, { "epoch": 29.59, "learning_rate": 3.520892431224528e-05, "loss": 2.116, "step": 10223500 }, { "epoch": 29.59, "learning_rate": 3.5208200664598e-05, "loss": 2.0814, "step": 10224000 }, { "epoch": 29.6, "learning_rate": 3.5207477016950723e-05, "loss": 2.1033, "step": 10224500 }, { "epoch": 29.6, "learning_rate": 3.5206753369303446e-05, "loss": 2.1197, "step": 10225000 }, { "epoch": 29.6, "learning_rate": 3.520602972165617e-05, "loss": 2.1186, "step": 10225500 }, { "epoch": 29.6, "learning_rate": 3.520530607400889e-05, "loss": 2.1058, "step": 10226000 }, { "epoch": 29.6, "learning_rate": 3.520458242636161e-05, "loss": 2.1148, "step": 10226500 }, { "epoch": 29.6, "learning_rate": 3.5203860226009635e-05, "loss": 2.1362, "step": 10227000 }, { "epoch": 29.6, "learning_rate": 3.520313802565765e-05, "loss": 2.1019, "step": 10227500 }, { "epoch": 29.61, "learning_rate": 3.520241437801037e-05, "loss": 2.1289, "step": 10228000 }, { "epoch": 29.61, "learning_rate": 3.52016907303631e-05, "loss": 2.0993, "step": 10228500 }, { "epoch": 29.61, "learning_rate": 3.520096853001112e-05, "loss": 2.1159, "step": 10229000 }, { "epoch": 29.61, "learning_rate": 3.520024488236384e-05, "loss": 2.1293, "step": 10229500 }, { "epoch": 29.61, "learning_rate": 3.519952123471656e-05, "loss": 2.1072, "step": 10230000 }, { "epoch": 29.61, "learning_rate": 3.519879758706929e-05, "loss": 2.1008, "step": 10230500 }, { "epoch": 29.61, "learning_rate": 3.519807393942201e-05, "loss": 2.1046, "step": 10231000 }, { "epoch": 29.62, "learning_rate": 3.5197350291774735e-05, "loss": 2.0959, "step": 10231500 }, { "epoch": 29.62, "learning_rate": 3.519662664412746e-05, "loss": 2.132, "step": 10232000 }, { "epoch": 29.62, "learning_rate": 3.519590299648018e-05, "loss": 2.1051, "step": 10232500 }, { "epoch": 29.62, "learning_rate": 3.51951793488329e-05, "loss": 2.1322, "step": 10233000 }, { "epoch": 29.62, "learning_rate": 3.5194455701185624e-05, "loss": 2.0929, "step": 10233500 }, { "epoch": 29.62, "learning_rate": 3.5193732053538346e-05, "loss": 2.1258, "step": 10234000 }, { "epoch": 29.62, "learning_rate": 3.519300840589107e-05, "loss": 2.1223, "step": 10234500 }, { "epoch": 29.63, "learning_rate": 3.519228475824379e-05, "loss": 2.1199, "step": 10235000 }, { "epoch": 29.63, "learning_rate": 3.519156111059652e-05, "loss": 2.1067, "step": 10235500 }, { "epoch": 29.63, "learning_rate": 3.519083746294924e-05, "loss": 2.108, "step": 10236000 }, { "epoch": 29.63, "learning_rate": 3.5190113815301964e-05, "loss": 2.1081, "step": 10236500 }, { "epoch": 29.63, "learning_rate": 3.518939016765469e-05, "loss": 2.1255, "step": 10237000 }, { "epoch": 29.63, "learning_rate": 3.5188666520007415e-05, "loss": 2.1109, "step": 10237500 }, { "epoch": 29.63, "learning_rate": 3.518794287236014e-05, "loss": 2.1141, "step": 10238000 }, { "epoch": 29.64, "learning_rate": 3.518721922471286e-05, "loss": 2.105, "step": 10238500 }, { "epoch": 29.64, "learning_rate": 3.5186497024360875e-05, "loss": 2.106, "step": 10239000 }, { "epoch": 29.64, "learning_rate": 3.51857733767136e-05, "loss": 2.1004, "step": 10239500 }, { "epoch": 29.64, "learning_rate": 3.518505117636161e-05, "loss": 2.098, "step": 10240000 }, { "epoch": 29.64, "learning_rate": 3.5184328976009635e-05, "loss": 2.1135, "step": 10240500 }, { "epoch": 29.64, "learning_rate": 3.518360532836236e-05, "loss": 2.1278, "step": 10241000 }, { "epoch": 29.64, "learning_rate": 3.518288312801037e-05, "loss": 2.1082, "step": 10241500 }, { "epoch": 29.65, "learning_rate": 3.518216092765839e-05, "loss": 2.1071, "step": 10242000 }, { "epoch": 29.65, "learning_rate": 3.518143728001112e-05, "loss": 2.1356, "step": 10242500 }, { "epoch": 29.65, "learning_rate": 3.518071363236384e-05, "loss": 2.1168, "step": 10243000 }, { "epoch": 29.65, "learning_rate": 3.517998998471657e-05, "loss": 2.1038, "step": 10243500 }, { "epoch": 29.65, "learning_rate": 3.517926633706929e-05, "loss": 2.1229, "step": 10244000 }, { "epoch": 29.65, "learning_rate": 3.5178542689422014e-05, "loss": 2.1182, "step": 10244500 }, { "epoch": 29.66, "learning_rate": 3.5177819041774736e-05, "loss": 2.1204, "step": 10245000 }, { "epoch": 29.66, "learning_rate": 3.517709539412746e-05, "loss": 2.13, "step": 10245500 }, { "epoch": 29.66, "learning_rate": 3.517637174648018e-05, "loss": 2.126, "step": 10246000 }, { "epoch": 29.66, "learning_rate": 3.51756480988329e-05, "loss": 2.1271, "step": 10246500 }, { "epoch": 29.66, "learning_rate": 3.5174924451185625e-05, "loss": 2.1293, "step": 10247000 }, { "epoch": 29.66, "learning_rate": 3.517420080353835e-05, "loss": 2.1183, "step": 10247500 }, { "epoch": 29.66, "learning_rate": 3.517347715589107e-05, "loss": 2.0908, "step": 10248000 }, { "epoch": 29.67, "learning_rate": 3.517275350824379e-05, "loss": 2.1158, "step": 10248500 }, { "epoch": 29.67, "learning_rate": 3.5172031307891814e-05, "loss": 2.123, "step": 10249000 }, { "epoch": 29.67, "learning_rate": 3.5171307660244536e-05, "loss": 2.1139, "step": 10249500 }, { "epoch": 29.67, "learning_rate": 3.517058401259726e-05, "loss": 2.1062, "step": 10250000 }, { "epoch": 29.67, "learning_rate": 3.516986036494999e-05, "loss": 2.1003, "step": 10250500 }, { "epoch": 29.67, "learning_rate": 3.516913671730271e-05, "loss": 2.1267, "step": 10251000 }, { "epoch": 29.67, "learning_rate": 3.5168414516950725e-05, "loss": 2.1204, "step": 10251500 }, { "epoch": 29.68, "learning_rate": 3.516769086930345e-05, "loss": 2.0959, "step": 10252000 }, { "epoch": 29.68, "learning_rate": 3.516696866895147e-05, "loss": 2.096, "step": 10252500 }, { "epoch": 29.68, "learning_rate": 3.516624502130419e-05, "loss": 2.1229, "step": 10253000 }, { "epoch": 29.68, "learning_rate": 3.5165521373656914e-05, "loss": 2.1272, "step": 10253500 }, { "epoch": 29.68, "learning_rate": 3.5164797726009636e-05, "loss": 2.0996, "step": 10254000 }, { "epoch": 29.68, "learning_rate": 3.516407407836236e-05, "loss": 2.1232, "step": 10254500 }, { "epoch": 29.68, "learning_rate": 3.5163351878010374e-05, "loss": 2.1184, "step": 10255000 }, { "epoch": 29.69, "learning_rate": 3.5162628230363096e-05, "loss": 2.1159, "step": 10255500 }, { "epoch": 29.69, "learning_rate": 3.516190458271582e-05, "loss": 2.1139, "step": 10256000 }, { "epoch": 29.69, "learning_rate": 3.516118093506854e-05, "loss": 2.1479, "step": 10256500 }, { "epoch": 29.69, "learning_rate": 3.516045728742127e-05, "loss": 2.0913, "step": 10257000 }, { "epoch": 29.69, "learning_rate": 3.515973363977399e-05, "loss": 2.0999, "step": 10257500 }, { "epoch": 29.69, "learning_rate": 3.515900999212672e-05, "loss": 2.108, "step": 10258000 }, { "epoch": 29.69, "learning_rate": 3.515828634447944e-05, "loss": 2.1207, "step": 10258500 }, { "epoch": 29.7, "learning_rate": 3.5157562696832165e-05, "loss": 2.1045, "step": 10259000 }, { "epoch": 29.7, "learning_rate": 3.515683904918489e-05, "loss": 2.0791, "step": 10259500 }, { "epoch": 29.7, "learning_rate": 3.515611540153761e-05, "loss": 2.0841, "step": 10260000 }, { "epoch": 29.7, "learning_rate": 3.515539175389033e-05, "loss": 2.0996, "step": 10260500 }, { "epoch": 29.7, "learning_rate": 3.515466955353835e-05, "loss": 2.1166, "step": 10261000 }, { "epoch": 29.7, "learning_rate": 3.515394590589107e-05, "loss": 2.1033, "step": 10261500 }, { "epoch": 29.7, "learning_rate": 3.515322225824379e-05, "loss": 2.1085, "step": 10262000 }, { "epoch": 29.71, "learning_rate": 3.515249861059652e-05, "loss": 2.0997, "step": 10262500 }, { "epoch": 29.71, "learning_rate": 3.5151774962949243e-05, "loss": 2.0946, "step": 10263000 }, { "epoch": 29.71, "learning_rate": 3.515105276259726e-05, "loss": 2.1382, "step": 10263500 }, { "epoch": 29.71, "learning_rate": 3.515032911494998e-05, "loss": 2.1323, "step": 10264000 }, { "epoch": 29.71, "learning_rate": 3.5149605467302703e-05, "loss": 2.1098, "step": 10264500 }, { "epoch": 29.71, "learning_rate": 3.5148881819655426e-05, "loss": 2.0981, "step": 10265000 }, { "epoch": 29.71, "learning_rate": 3.5148158172008155e-05, "loss": 2.1097, "step": 10265500 }, { "epoch": 29.72, "learning_rate": 3.514743452436088e-05, "loss": 2.1366, "step": 10266000 }, { "epoch": 29.72, "learning_rate": 3.51467108767136e-05, "loss": 2.0885, "step": 10266500 }, { "epoch": 29.72, "learning_rate": 3.514598722906632e-05, "loss": 2.1093, "step": 10267000 }, { "epoch": 29.72, "learning_rate": 3.5145265028714344e-05, "loss": 2.1117, "step": 10267500 }, { "epoch": 29.72, "learning_rate": 3.514454282836236e-05, "loss": 2.1112, "step": 10268000 }, { "epoch": 29.72, "learning_rate": 3.514381918071508e-05, "loss": 2.1242, "step": 10268500 }, { "epoch": 29.72, "learning_rate": 3.5143095533067804e-05, "loss": 2.1179, "step": 10269000 }, { "epoch": 29.73, "learning_rate": 3.5142371885420526e-05, "loss": 2.0993, "step": 10269500 }, { "epoch": 29.73, "learning_rate": 3.514164968506855e-05, "loss": 2.0976, "step": 10270000 }, { "epoch": 29.73, "learning_rate": 3.514092603742127e-05, "loss": 2.1208, "step": 10270500 }, { "epoch": 29.73, "learning_rate": 3.514020238977399e-05, "loss": 2.118, "step": 10271000 }, { "epoch": 29.73, "learning_rate": 3.5139478742126715e-05, "loss": 2.116, "step": 10271500 }, { "epoch": 29.73, "learning_rate": 3.513875509447944e-05, "loss": 2.1124, "step": 10272000 }, { "epoch": 29.73, "learning_rate": 3.513803144683216e-05, "loss": 2.0891, "step": 10272500 }, { "epoch": 29.74, "learning_rate": 3.513730779918489e-05, "loss": 2.1068, "step": 10273000 }, { "epoch": 29.74, "learning_rate": 3.513658415153761e-05, "loss": 2.1024, "step": 10273500 }, { "epoch": 29.74, "learning_rate": 3.513586050389033e-05, "loss": 2.1281, "step": 10274000 }, { "epoch": 29.74, "learning_rate": 3.513513830353835e-05, "loss": 2.1109, "step": 10274500 }, { "epoch": 29.74, "learning_rate": 3.513441465589107e-05, "loss": 2.0959, "step": 10275000 }, { "epoch": 29.74, "learning_rate": 3.51336910082438e-05, "loss": 2.1353, "step": 10275500 }, { "epoch": 29.74, "learning_rate": 3.5132968807891815e-05, "loss": 2.141, "step": 10276000 }, { "epoch": 29.75, "learning_rate": 3.513224516024454e-05, "loss": 2.1089, "step": 10276500 }, { "epoch": 29.75, "learning_rate": 3.513152151259726e-05, "loss": 2.0886, "step": 10277000 }, { "epoch": 29.75, "learning_rate": 3.513079786494998e-05, "loss": 2.1127, "step": 10277500 }, { "epoch": 29.75, "learning_rate": 3.5130074217302704e-05, "loss": 2.1377, "step": 10278000 }, { "epoch": 29.75, "learning_rate": 3.5129350569655427e-05, "loss": 2.1022, "step": 10278500 }, { "epoch": 29.75, "learning_rate": 3.512862692200815e-05, "loss": 2.1141, "step": 10279000 }, { "epoch": 29.75, "learning_rate": 3.512790327436087e-05, "loss": 2.1152, "step": 10279500 }, { "epoch": 29.76, "learning_rate": 3.51271796267136e-05, "loss": 2.1037, "step": 10280000 }, { "epoch": 29.76, "learning_rate": 3.512645742636162e-05, "loss": 2.0929, "step": 10280500 }, { "epoch": 29.76, "learning_rate": 3.5125733778714345e-05, "loss": 2.109, "step": 10281000 }, { "epoch": 29.76, "learning_rate": 3.512501013106707e-05, "loss": 2.1228, "step": 10281500 }, { "epoch": 29.76, "learning_rate": 3.512428648341979e-05, "loss": 2.1201, "step": 10282000 }, { "epoch": 29.76, "learning_rate": 3.512356283577251e-05, "loss": 2.1225, "step": 10282500 }, { "epoch": 29.77, "learning_rate": 3.512284208271582e-05, "loss": 2.1054, "step": 10283000 }, { "epoch": 29.77, "learning_rate": 3.512211843506855e-05, "loss": 2.1151, "step": 10283500 }, { "epoch": 29.77, "learning_rate": 3.512139478742127e-05, "loss": 2.1429, "step": 10284000 }, { "epoch": 29.77, "learning_rate": 3.512067258706929e-05, "loss": 2.1193, "step": 10284500 }, { "epoch": 29.77, "learning_rate": 3.511994893942201e-05, "loss": 2.0916, "step": 10285000 }, { "epoch": 29.77, "learning_rate": 3.511922529177473e-05, "loss": 2.11, "step": 10285500 }, { "epoch": 29.77, "learning_rate": 3.5118501644127454e-05, "loss": 2.1206, "step": 10286000 }, { "epoch": 29.78, "learning_rate": 3.5117777996480176e-05, "loss": 2.1103, "step": 10286500 }, { "epoch": 29.78, "learning_rate": 3.51170543488329e-05, "loss": 2.1012, "step": 10287000 }, { "epoch": 29.78, "learning_rate": 3.511633070118562e-05, "loss": 2.0877, "step": 10287500 }, { "epoch": 29.78, "learning_rate": 3.511560705353835e-05, "loss": 2.0958, "step": 10288000 }, { "epoch": 29.78, "learning_rate": 3.511488485318637e-05, "loss": 2.0903, "step": 10288500 }, { "epoch": 29.78, "learning_rate": 3.5114161205539094e-05, "loss": 2.1312, "step": 10289000 }, { "epoch": 29.78, "learning_rate": 3.5113437557891816e-05, "loss": 2.1131, "step": 10289500 }, { "epoch": 29.79, "learning_rate": 3.511271391024454e-05, "loss": 2.127, "step": 10290000 }, { "epoch": 29.79, "learning_rate": 3.511199026259726e-05, "loss": 2.0806, "step": 10290500 }, { "epoch": 29.79, "learning_rate": 3.511126661494998e-05, "loss": 2.1315, "step": 10291000 }, { "epoch": 29.79, "learning_rate": 3.5110542967302705e-05, "loss": 2.1237, "step": 10291500 }, { "epoch": 29.79, "learning_rate": 3.510981931965543e-05, "loss": 2.1159, "step": 10292000 }, { "epoch": 29.79, "learning_rate": 3.510909567200815e-05, "loss": 2.0942, "step": 10292500 }, { "epoch": 29.79, "learning_rate": 3.510837202436087e-05, "loss": 2.119, "step": 10293000 }, { "epoch": 29.8, "learning_rate": 3.51076483767136e-05, "loss": 2.1045, "step": 10293500 }, { "epoch": 29.8, "learning_rate": 3.5106926176361616e-05, "loss": 2.1205, "step": 10294000 }, { "epoch": 29.8, "learning_rate": 3.510620252871434e-05, "loss": 2.0996, "step": 10294500 }, { "epoch": 29.8, "learning_rate": 3.510547888106706e-05, "loss": 2.121, "step": 10295000 }, { "epoch": 29.8, "learning_rate": 3.510475523341979e-05, "loss": 2.0856, "step": 10295500 }, { "epoch": 29.8, "learning_rate": 3.510403158577251e-05, "loss": 2.1039, "step": 10296000 }, { "epoch": 29.8, "learning_rate": 3.510330938542053e-05, "loss": 2.1253, "step": 10296500 }, { "epoch": 29.81, "learning_rate": 3.510258573777325e-05, "loss": 2.1182, "step": 10297000 }, { "epoch": 29.81, "learning_rate": 3.510186209012597e-05, "loss": 2.1173, "step": 10297500 }, { "epoch": 29.81, "learning_rate": 3.51011384424787e-05, "loss": 2.1053, "step": 10298000 }, { "epoch": 29.81, "learning_rate": 3.510041479483142e-05, "loss": 2.124, "step": 10298500 }, { "epoch": 29.81, "learning_rate": 3.5099691147184146e-05, "loss": 2.0907, "step": 10299000 }, { "epoch": 29.81, "learning_rate": 3.509896749953687e-05, "loss": 2.1057, "step": 10299500 }, { "epoch": 29.81, "learning_rate": 3.509824385188959e-05, "loss": 2.1031, "step": 10300000 }, { "epoch": 29.82, "learning_rate": 3.509752020424231e-05, "loss": 2.1287, "step": 10300500 }, { "epoch": 29.82, "learning_rate": 3.5096796556595034e-05, "loss": 2.093, "step": 10301000 }, { "epoch": 29.82, "learning_rate": 3.509607435624305e-05, "loss": 2.0915, "step": 10301500 }, { "epoch": 29.82, "learning_rate": 3.509535070859577e-05, "loss": 2.1291, "step": 10302000 }, { "epoch": 29.82, "learning_rate": 3.50946270609485e-05, "loss": 2.0945, "step": 10302500 }, { "epoch": 29.82, "learning_rate": 3.5093903413301224e-05, "loss": 2.1464, "step": 10303000 }, { "epoch": 29.82, "learning_rate": 3.509317976565395e-05, "loss": 2.0889, "step": 10303500 }, { "epoch": 29.83, "learning_rate": 3.5092456118006675e-05, "loss": 2.1135, "step": 10304000 }, { "epoch": 29.83, "learning_rate": 3.509173391765469e-05, "loss": 2.0937, "step": 10304500 }, { "epoch": 29.83, "learning_rate": 3.509101027000741e-05, "loss": 2.0971, "step": 10305000 }, { "epoch": 29.83, "learning_rate": 3.5090286622360135e-05, "loss": 2.0857, "step": 10305500 }, { "epoch": 29.83, "learning_rate": 3.508956297471286e-05, "loss": 2.1109, "step": 10306000 }, { "epoch": 29.83, "learning_rate": 3.508883932706558e-05, "loss": 2.0777, "step": 10306500 }, { "epoch": 29.83, "learning_rate": 3.50881156794183e-05, "loss": 2.1212, "step": 10307000 }, { "epoch": 29.84, "learning_rate": 3.5087392031771024e-05, "loss": 2.1044, "step": 10307500 }, { "epoch": 29.84, "learning_rate": 3.5086669831419046e-05, "loss": 2.1279, "step": 10308000 }, { "epoch": 29.84, "learning_rate": 3.508594618377177e-05, "loss": 2.114, "step": 10308500 }, { "epoch": 29.84, "learning_rate": 3.508522253612449e-05, "loss": 2.1412, "step": 10309000 }, { "epoch": 29.84, "learning_rate": 3.508449888847721e-05, "loss": 2.1124, "step": 10309500 }, { "epoch": 29.84, "learning_rate": 3.508377668812523e-05, "loss": 2.1197, "step": 10310000 }, { "epoch": 29.84, "learning_rate": 3.508305304047796e-05, "loss": 2.1028, "step": 10310500 }, { "epoch": 29.85, "learning_rate": 3.508233084012598e-05, "loss": 2.1179, "step": 10311000 }, { "epoch": 29.85, "learning_rate": 3.50816071924787e-05, "loss": 2.1142, "step": 10311500 }, { "epoch": 29.85, "learning_rate": 3.5080883544831424e-05, "loss": 2.1107, "step": 10312000 }, { "epoch": 29.85, "learning_rate": 3.5080159897184146e-05, "loss": 2.0965, "step": 10312500 }, { "epoch": 29.85, "learning_rate": 3.507943624953687e-05, "loss": 2.1237, "step": 10313000 }, { "epoch": 29.85, "learning_rate": 3.507871260188959e-05, "loss": 2.1081, "step": 10313500 }, { "epoch": 29.85, "learning_rate": 3.507798895424231e-05, "loss": 2.0986, "step": 10314000 }, { "epoch": 29.86, "learning_rate": 3.5077265306595035e-05, "loss": 2.1146, "step": 10314500 }, { "epoch": 29.86, "learning_rate": 3.507654310624305e-05, "loss": 2.0997, "step": 10315000 }, { "epoch": 29.86, "learning_rate": 3.507582090589107e-05, "loss": 2.0968, "step": 10315500 }, { "epoch": 29.86, "learning_rate": 3.5075097258243795e-05, "loss": 2.1059, "step": 10316000 }, { "epoch": 29.86, "learning_rate": 3.507437361059652e-05, "loss": 2.1141, "step": 10316500 }, { "epoch": 29.86, "learning_rate": 3.507364996294924e-05, "loss": 2.13, "step": 10317000 }, { "epoch": 29.86, "learning_rate": 3.507292631530196e-05, "loss": 2.126, "step": 10317500 }, { "epoch": 29.87, "learning_rate": 3.507220266765469e-05, "loss": 2.1293, "step": 10318000 }, { "epoch": 29.87, "learning_rate": 3.507147902000741e-05, "loss": 2.1024, "step": 10318500 }, { "epoch": 29.87, "learning_rate": 3.5070755372360136e-05, "loss": 2.0927, "step": 10319000 }, { "epoch": 29.87, "learning_rate": 3.507003317200815e-05, "loss": 2.1222, "step": 10319500 }, { "epoch": 29.87, "learning_rate": 3.506930952436088e-05, "loss": 2.1126, "step": 10320000 }, { "epoch": 29.87, "learning_rate": 3.50685858767136e-05, "loss": 2.124, "step": 10320500 }, { "epoch": 29.88, "learning_rate": 3.5067862229066325e-05, "loss": 2.1235, "step": 10321000 }, { "epoch": 29.88, "learning_rate": 3.506713858141905e-05, "loss": 2.0973, "step": 10321500 }, { "epoch": 29.88, "learning_rate": 3.506641493377177e-05, "loss": 2.1164, "step": 10322000 }, { "epoch": 29.88, "learning_rate": 3.5065692733419785e-05, "loss": 2.1189, "step": 10322500 }, { "epoch": 29.88, "learning_rate": 3.50649705330678e-05, "loss": 2.095, "step": 10323000 }, { "epoch": 29.88, "learning_rate": 3.506424688542053e-05, "loss": 2.1269, "step": 10323500 }, { "epoch": 29.88, "learning_rate": 3.506352323777325e-05, "loss": 2.107, "step": 10324000 }, { "epoch": 29.89, "learning_rate": 3.5062799590125974e-05, "loss": 2.0958, "step": 10324500 }, { "epoch": 29.89, "learning_rate": 3.5062075942478696e-05, "loss": 2.1201, "step": 10325000 }, { "epoch": 29.89, "learning_rate": 3.5061352294831425e-05, "loss": 2.1084, "step": 10325500 }, { "epoch": 29.89, "learning_rate": 3.506062864718415e-05, "loss": 2.1035, "step": 10326000 }, { "epoch": 29.89, "learning_rate": 3.505990499953687e-05, "loss": 2.1009, "step": 10326500 }, { "epoch": 29.89, "learning_rate": 3.505918135188959e-05, "loss": 2.0851, "step": 10327000 }, { "epoch": 29.89, "learning_rate": 3.5058457704242314e-05, "loss": 2.0958, "step": 10327500 }, { "epoch": 29.9, "learning_rate": 3.5057734056595036e-05, "loss": 2.0774, "step": 10328000 }, { "epoch": 29.9, "learning_rate": 3.505701040894776e-05, "loss": 2.1328, "step": 10328500 }, { "epoch": 29.9, "learning_rate": 3.505628820859578e-05, "loss": 2.1031, "step": 10329000 }, { "epoch": 29.9, "learning_rate": 3.5055566008243796e-05, "loss": 2.1249, "step": 10329500 }, { "epoch": 29.9, "learning_rate": 3.505484236059652e-05, "loss": 2.0982, "step": 10330000 }, { "epoch": 29.9, "learning_rate": 3.505411871294924e-05, "loss": 2.0988, "step": 10330500 }, { "epoch": 29.9, "learning_rate": 3.505339506530196e-05, "loss": 2.1075, "step": 10331000 }, { "epoch": 29.91, "learning_rate": 3.5052671417654685e-05, "loss": 2.1253, "step": 10331500 }, { "epoch": 29.91, "learning_rate": 3.505194777000741e-05, "loss": 2.1128, "step": 10332000 }, { "epoch": 29.91, "learning_rate": 3.505122412236013e-05, "loss": 2.1027, "step": 10332500 }, { "epoch": 29.91, "learning_rate": 3.505050047471286e-05, "loss": 2.1191, "step": 10333000 }, { "epoch": 29.91, "learning_rate": 3.504977682706558e-05, "loss": 2.0969, "step": 10333500 }, { "epoch": 29.91, "learning_rate": 3.50490531794183e-05, "loss": 2.11, "step": 10334000 }, { "epoch": 29.91, "learning_rate": 3.504832953177103e-05, "loss": 2.1086, "step": 10334500 }, { "epoch": 29.92, "learning_rate": 3.5047605884123754e-05, "loss": 2.0911, "step": 10335000 }, { "epoch": 29.92, "learning_rate": 3.5046882236476477e-05, "loss": 2.1363, "step": 10335500 }, { "epoch": 29.92, "learning_rate": 3.50461585888292e-05, "loss": 2.1077, "step": 10336000 }, { "epoch": 29.92, "learning_rate": 3.504543783577251e-05, "loss": 2.1242, "step": 10336500 }, { "epoch": 29.92, "learning_rate": 3.504471418812523e-05, "loss": 2.1258, "step": 10337000 }, { "epoch": 29.92, "learning_rate": 3.504399054047796e-05, "loss": 2.1157, "step": 10337500 }, { "epoch": 29.92, "learning_rate": 3.504326689283068e-05, "loss": 2.1037, "step": 10338000 }, { "epoch": 29.93, "learning_rate": 3.50425432451834e-05, "loss": 2.1157, "step": 10338500 }, { "epoch": 29.93, "learning_rate": 3.5041819597536126e-05, "loss": 2.1323, "step": 10339000 }, { "epoch": 29.93, "learning_rate": 3.504109594988885e-05, "loss": 2.1242, "step": 10339500 }, { "epoch": 29.93, "learning_rate": 3.504037230224158e-05, "loss": 2.1248, "step": 10340000 }, { "epoch": 29.93, "learning_rate": 3.50396486545943e-05, "loss": 2.1205, "step": 10340500 }, { "epoch": 29.93, "learning_rate": 3.503892500694702e-05, "loss": 2.1261, "step": 10341000 }, { "epoch": 29.93, "learning_rate": 3.503820425389033e-05, "loss": 2.1112, "step": 10341500 }, { "epoch": 29.94, "learning_rate": 3.503748060624306e-05, "loss": 2.1174, "step": 10342000 }, { "epoch": 29.94, "learning_rate": 3.503675695859578e-05, "loss": 2.1267, "step": 10342500 }, { "epoch": 29.94, "learning_rate": 3.5036033310948504e-05, "loss": 2.1154, "step": 10343000 }, { "epoch": 29.94, "learning_rate": 3.5035309663301226e-05, "loss": 2.1113, "step": 10343500 }, { "epoch": 29.94, "learning_rate": 3.503458601565395e-05, "loss": 2.0955, "step": 10344000 }, { "epoch": 29.94, "learning_rate": 3.503386236800667e-05, "loss": 2.1247, "step": 10344500 }, { "epoch": 29.94, "learning_rate": 3.5033140167654686e-05, "loss": 2.0924, "step": 10345000 }, { "epoch": 29.95, "learning_rate": 3.503241796730271e-05, "loss": 2.1218, "step": 10345500 }, { "epoch": 29.95, "learning_rate": 3.503169431965543e-05, "loss": 2.1285, "step": 10346000 }, { "epoch": 29.95, "learning_rate": 3.503097067200815e-05, "loss": 2.1402, "step": 10346500 }, { "epoch": 29.95, "learning_rate": 3.5030247024360875e-05, "loss": 2.1073, "step": 10347000 }, { "epoch": 29.95, "learning_rate": 3.50295233767136e-05, "loss": 2.1025, "step": 10347500 }, { "epoch": 29.95, "learning_rate": 3.5028799729066326e-05, "loss": 2.0933, "step": 10348000 }, { "epoch": 29.95, "learning_rate": 3.502807752871434e-05, "loss": 2.1174, "step": 10348500 }, { "epoch": 29.96, "learning_rate": 3.5027353881067064e-05, "loss": 2.1067, "step": 10349000 }, { "epoch": 29.96, "learning_rate": 3.5026630233419786e-05, "loss": 2.1269, "step": 10349500 }, { "epoch": 29.96, "learning_rate": 3.502590658577251e-05, "loss": 2.1089, "step": 10350000 }, { "epoch": 29.96, "learning_rate": 3.502518293812523e-05, "loss": 2.1373, "step": 10350500 }, { "epoch": 29.96, "learning_rate": 3.502445929047796e-05, "loss": 2.1092, "step": 10351000 }, { "epoch": 29.96, "learning_rate": 3.502373564283068e-05, "loss": 2.1041, "step": 10351500 }, { "epoch": 29.96, "learning_rate": 3.5023011995183404e-05, "loss": 2.1282, "step": 10352000 }, { "epoch": 29.97, "learning_rate": 3.5022288347536126e-05, "loss": 2.1508, "step": 10352500 }, { "epoch": 29.97, "learning_rate": 3.502156469988885e-05, "loss": 2.1082, "step": 10353000 }, { "epoch": 29.97, "learning_rate": 3.502084105224157e-05, "loss": 2.1203, "step": 10353500 }, { "epoch": 29.97, "learning_rate": 3.502011740459429e-05, "loss": 2.125, "step": 10354000 }, { "epoch": 29.97, "learning_rate": 3.5019393756947015e-05, "loss": 2.1222, "step": 10354500 }, { "epoch": 29.97, "learning_rate": 3.5018670109299744e-05, "loss": 2.1277, "step": 10355000 }, { "epoch": 29.97, "learning_rate": 3.5017946461652467e-05, "loss": 2.1167, "step": 10355500 }, { "epoch": 29.98, "learning_rate": 3.501722281400519e-05, "loss": 2.1318, "step": 10356000 }, { "epoch": 29.98, "learning_rate": 3.50165035082438e-05, "loss": 2.1085, "step": 10356500 }, { "epoch": 29.98, "learning_rate": 3.501577986059652e-05, "loss": 2.1203, "step": 10357000 }, { "epoch": 29.98, "learning_rate": 3.501505621294924e-05, "loss": 2.1128, "step": 10357500 }, { "epoch": 29.98, "learning_rate": 3.5014332565301964e-05, "loss": 2.1036, "step": 10358000 }, { "epoch": 29.98, "learning_rate": 3.501360891765469e-05, "loss": 2.1303, "step": 10358500 }, { "epoch": 29.99, "learning_rate": 3.501288527000741e-05, "loss": 2.1141, "step": 10359000 }, { "epoch": 29.99, "learning_rate": 3.501216162236013e-05, "loss": 2.1066, "step": 10359500 }, { "epoch": 29.99, "learning_rate": 3.501143797471286e-05, "loss": 2.1166, "step": 10360000 }, { "epoch": 29.99, "learning_rate": 3.501071432706558e-05, "loss": 2.0972, "step": 10360500 }, { "epoch": 29.99, "learning_rate": 3.50099921267136e-05, "loss": 2.1028, "step": 10361000 }, { "epoch": 29.99, "learning_rate": 3.500926847906632e-05, "loss": 2.1232, "step": 10361500 }, { "epoch": 29.99, "learning_rate": 3.500854483141904e-05, "loss": 2.1076, "step": 10362000 }, { "epoch": 30.0, "learning_rate": 3.5007821183771765e-05, "loss": 2.1259, "step": 10362500 }, { "epoch": 30.0, "learning_rate": 3.5007097536124494e-05, "loss": 2.1214, "step": 10363000 }, { "epoch": 30.0, "learning_rate": 3.5006373888477216e-05, "loss": 2.1293, "step": 10363500 }, { "epoch": 30.0, "learning_rate": 3.500565024082994e-05, "loss": 2.0928, "step": 10364000 }, { "epoch": 30.0, "eval_accuracy": 0.6665779107038252, "eval_accuracy_mlm": 0.6310435936946435, "eval_accuracy_nsp": 0.8573464776596866, "eval_loss": 2.1807310581207275, "eval_runtime": 331.5713, "eval_samples_per_second": 1316.115, "eval_steps_per_second": 54.839, "step": 10364160 }, { "epoch": 30.0, "learning_rate": 3.500492804047796e-05, "loss": 2.1213, "step": 10364500 }, { "epoch": 30.0, "learning_rate": 3.500420439283068e-05, "loss": 2.1103, "step": 10365000 }, { "epoch": 30.0, "learning_rate": 3.50034821924787e-05, "loss": 2.1067, "step": 10365500 }, { "epoch": 30.01, "learning_rate": 3.500276143942201e-05, "loss": 2.0826, "step": 10366000 }, { "epoch": 30.01, "learning_rate": 3.5002037791774736e-05, "loss": 2.0969, "step": 10366500 }, { "epoch": 30.01, "learning_rate": 3.500131414412746e-05, "loss": 2.0776, "step": 10367000 }, { "epoch": 30.01, "learning_rate": 3.500059049648018e-05, "loss": 2.0948, "step": 10367500 }, { "epoch": 30.01, "learning_rate": 3.49998668488329e-05, "loss": 2.0853, "step": 10368000 }, { "epoch": 30.01, "learning_rate": 3.4999143201185625e-05, "loss": 2.0972, "step": 10368500 }, { "epoch": 30.01, "learning_rate": 3.499841955353835e-05, "loss": 2.0717, "step": 10369000 }, { "epoch": 30.02, "learning_rate": 3.499769590589107e-05, "loss": 2.0894, "step": 10369500 }, { "epoch": 30.02, "learning_rate": 3.499697225824379e-05, "loss": 2.0816, "step": 10370000 }, { "epoch": 30.02, "learning_rate": 3.499624861059652e-05, "loss": 2.0836, "step": 10370500 }, { "epoch": 30.02, "learning_rate": 3.499552496294924e-05, "loss": 2.0927, "step": 10371000 }, { "epoch": 30.02, "learning_rate": 3.4994801315301965e-05, "loss": 2.1031, "step": 10371500 }, { "epoch": 30.02, "learning_rate": 3.499407766765469e-05, "loss": 2.1029, "step": 10372000 }, { "epoch": 30.02, "learning_rate": 3.499335546730271e-05, "loss": 2.1119, "step": 10372500 }, { "epoch": 30.03, "learning_rate": 3.499263181965543e-05, "loss": 2.0929, "step": 10373000 }, { "epoch": 30.03, "learning_rate": 3.4991908172008154e-05, "loss": 2.0962, "step": 10373500 }, { "epoch": 30.03, "learning_rate": 3.4991184524360877e-05, "loss": 2.0846, "step": 10374000 }, { "epoch": 30.03, "learning_rate": 3.499046232400889e-05, "loss": 2.1082, "step": 10374500 }, { "epoch": 30.03, "learning_rate": 3.4989738676361614e-05, "loss": 2.1004, "step": 10375000 }, { "epoch": 30.03, "learning_rate": 3.4989015028714337e-05, "loss": 2.0722, "step": 10375500 }, { "epoch": 30.03, "learning_rate": 3.498829138106706e-05, "loss": 2.0787, "step": 10376000 }, { "epoch": 30.04, "learning_rate": 3.498756773341979e-05, "loss": 2.1036, "step": 10376500 }, { "epoch": 30.04, "learning_rate": 3.49868455330678e-05, "loss": 2.1034, "step": 10377000 }, { "epoch": 30.04, "learning_rate": 3.498612333271582e-05, "loss": 2.0852, "step": 10377500 }, { "epoch": 30.04, "learning_rate": 3.498539968506854e-05, "loss": 2.1011, "step": 10378000 }, { "epoch": 30.04, "learning_rate": 3.498467603742127e-05, "loss": 2.0806, "step": 10378500 }, { "epoch": 30.04, "learning_rate": 3.498395238977399e-05, "loss": 2.0907, "step": 10379000 }, { "epoch": 30.04, "learning_rate": 3.4983228742126715e-05, "loss": 2.0767, "step": 10379500 }, { "epoch": 30.05, "learning_rate": 3.498250509447944e-05, "loss": 2.0895, "step": 10380000 }, { "epoch": 30.05, "learning_rate": 3.4981781446832166e-05, "loss": 2.1007, "step": 10380500 }, { "epoch": 30.05, "learning_rate": 3.498105779918489e-05, "loss": 2.089, "step": 10381000 }, { "epoch": 30.05, "learning_rate": 3.498033415153761e-05, "loss": 2.0967, "step": 10381500 }, { "epoch": 30.05, "learning_rate": 3.4979611951185626e-05, "loss": 2.065, "step": 10382000 }, { "epoch": 30.05, "learning_rate": 3.497888830353835e-05, "loss": 2.0805, "step": 10382500 }, { "epoch": 30.05, "learning_rate": 3.497816465589107e-05, "loss": 2.0907, "step": 10383000 }, { "epoch": 30.06, "learning_rate": 3.497744100824379e-05, "loss": 2.0715, "step": 10383500 }, { "epoch": 30.06, "learning_rate": 3.4976717360596515e-05, "loss": 2.0941, "step": 10384000 }, { "epoch": 30.06, "learning_rate": 3.497599516024454e-05, "loss": 2.0916, "step": 10384500 }, { "epoch": 30.06, "learning_rate": 3.497527151259726e-05, "loss": 2.1088, "step": 10385000 }, { "epoch": 30.06, "learning_rate": 3.497454786494999e-05, "loss": 2.097, "step": 10385500 }, { "epoch": 30.06, "learning_rate": 3.497382421730271e-05, "loss": 2.1012, "step": 10386000 }, { "epoch": 30.06, "learning_rate": 3.497310056965543e-05, "loss": 2.0997, "step": 10386500 }, { "epoch": 30.07, "learning_rate": 3.497237836930345e-05, "loss": 2.1141, "step": 10387000 }, { "epoch": 30.07, "learning_rate": 3.497165472165617e-05, "loss": 2.0816, "step": 10387500 }, { "epoch": 30.07, "learning_rate": 3.497093107400889e-05, "loss": 2.1172, "step": 10388000 }, { "epoch": 30.07, "learning_rate": 3.4970207426361615e-05, "loss": 2.1078, "step": 10388500 }, { "epoch": 30.07, "learning_rate": 3.496948377871434e-05, "loss": 2.0878, "step": 10389000 }, { "epoch": 30.07, "learning_rate": 3.4968760131067066e-05, "loss": 2.0848, "step": 10389500 }, { "epoch": 30.07, "learning_rate": 3.496803793071508e-05, "loss": 2.1086, "step": 10390000 }, { "epoch": 30.08, "learning_rate": 3.4967314283067804e-05, "loss": 2.1157, "step": 10390500 }, { "epoch": 30.08, "learning_rate": 3.4966590635420526e-05, "loss": 2.0851, "step": 10391000 }, { "epoch": 30.08, "learning_rate": 3.496586698777325e-05, "loss": 2.0881, "step": 10391500 }, { "epoch": 30.08, "learning_rate": 3.496514334012597e-05, "loss": 2.0887, "step": 10392000 }, { "epoch": 30.08, "learning_rate": 3.4964421139773986e-05, "loss": 2.0919, "step": 10392500 }, { "epoch": 30.08, "learning_rate": 3.4963697492126715e-05, "loss": 2.097, "step": 10393000 }, { "epoch": 30.08, "learning_rate": 3.496297384447944e-05, "loss": 2.0882, "step": 10393500 }, { "epoch": 30.09, "learning_rate": 3.496225019683217e-05, "loss": 2.1106, "step": 10394000 }, { "epoch": 30.09, "learning_rate": 3.4961529443775476e-05, "loss": 2.0996, "step": 10394500 }, { "epoch": 30.09, "learning_rate": 3.49608057961282e-05, "loss": 2.0901, "step": 10395000 }, { "epoch": 30.09, "learning_rate": 3.496008214848092e-05, "loss": 2.0748, "step": 10395500 }, { "epoch": 30.09, "learning_rate": 3.495935850083364e-05, "loss": 2.1166, "step": 10396000 }, { "epoch": 30.09, "learning_rate": 3.4958634853186364e-05, "loss": 2.0924, "step": 10396500 }, { "epoch": 30.1, "learning_rate": 3.495791120553909e-05, "loss": 2.0888, "step": 10397000 }, { "epoch": 30.1, "learning_rate": 3.4957187557891816e-05, "loss": 2.1078, "step": 10397500 }, { "epoch": 30.1, "learning_rate": 3.495646391024454e-05, "loss": 2.0979, "step": 10398000 }, { "epoch": 30.1, "learning_rate": 3.495574026259726e-05, "loss": 2.1075, "step": 10398500 }, { "epoch": 30.1, "learning_rate": 3.495501661494998e-05, "loss": 2.0849, "step": 10399000 }, { "epoch": 30.1, "learning_rate": 3.4954292967302705e-05, "loss": 2.0983, "step": 10399500 }, { "epoch": 30.1, "learning_rate": 3.495356931965543e-05, "loss": 2.1227, "step": 10400000 }, { "epoch": 30.11, "learning_rate": 3.4952845672008156e-05, "loss": 2.102, "step": 10400500 }, { "epoch": 30.11, "learning_rate": 3.495212202436088e-05, "loss": 2.1033, "step": 10401000 }, { "epoch": 30.11, "learning_rate": 3.49513983767136e-05, "loss": 2.0828, "step": 10401500 }, { "epoch": 30.11, "learning_rate": 3.495067472906632e-05, "loss": 2.0795, "step": 10402000 }, { "epoch": 30.11, "learning_rate": 3.494995252871434e-05, "loss": 2.0932, "step": 10402500 }, { "epoch": 30.11, "learning_rate": 3.494922888106707e-05, "loss": 2.0807, "step": 10403000 }, { "epoch": 30.11, "learning_rate": 3.494850523341979e-05, "loss": 2.1094, "step": 10403500 }, { "epoch": 30.12, "learning_rate": 3.494778158577251e-05, "loss": 2.0948, "step": 10404000 }, { "epoch": 30.12, "learning_rate": 3.494705938542053e-05, "loss": 2.0983, "step": 10404500 }, { "epoch": 30.12, "learning_rate": 3.494633573777325e-05, "loss": 2.1104, "step": 10405000 }, { "epoch": 30.12, "learning_rate": 3.494561209012597e-05, "loss": 2.077, "step": 10405500 }, { "epoch": 30.12, "learning_rate": 3.4944888442478694e-05, "loss": 2.0896, "step": 10406000 }, { "epoch": 30.12, "learning_rate": 3.4944164794831416e-05, "loss": 2.1105, "step": 10406500 }, { "epoch": 30.12, "learning_rate": 3.494344114718414e-05, "loss": 2.0885, "step": 10407000 }, { "epoch": 30.13, "learning_rate": 3.494271749953687e-05, "loss": 2.101, "step": 10407500 }, { "epoch": 30.13, "learning_rate": 3.494199385188959e-05, "loss": 2.0973, "step": 10408000 }, { "epoch": 30.13, "learning_rate": 3.494127165153761e-05, "loss": 2.0951, "step": 10408500 }, { "epoch": 30.13, "learning_rate": 3.4940548003890334e-05, "loss": 2.0993, "step": 10409000 }, { "epoch": 30.13, "learning_rate": 3.4939824356243056e-05, "loss": 2.1071, "step": 10409500 }, { "epoch": 30.13, "learning_rate": 3.493910070859578e-05, "loss": 2.1001, "step": 10410000 }, { "epoch": 30.13, "learning_rate": 3.49383770609485e-05, "loss": 2.1157, "step": 10410500 }, { "epoch": 30.14, "learning_rate": 3.493765341330122e-05, "loss": 2.0842, "step": 10411000 }, { "epoch": 30.14, "learning_rate": 3.4936929765653945e-05, "loss": 2.1065, "step": 10411500 }, { "epoch": 30.14, "learning_rate": 3.493620611800667e-05, "loss": 2.0963, "step": 10412000 }, { "epoch": 30.14, "learning_rate": 3.493548247035939e-05, "loss": 2.1174, "step": 10412500 }, { "epoch": 30.14, "learning_rate": 3.493476027000741e-05, "loss": 2.0945, "step": 10413000 }, { "epoch": 30.14, "learning_rate": 3.4934036622360134e-05, "loss": 2.0761, "step": 10413500 }, { "epoch": 30.14, "learning_rate": 3.4933312974712857e-05, "loss": 2.0952, "step": 10414000 }, { "epoch": 30.15, "learning_rate": 3.493258932706558e-05, "loss": 2.096, "step": 10414500 }, { "epoch": 30.15, "learning_rate": 3.493186567941831e-05, "loss": 2.0889, "step": 10415000 }, { "epoch": 30.15, "learning_rate": 3.4931143479066323e-05, "loss": 2.0994, "step": 10415500 }, { "epoch": 30.15, "learning_rate": 3.4930421278714346e-05, "loss": 2.0818, "step": 10416000 }, { "epoch": 30.15, "learning_rate": 3.492969763106707e-05, "loss": 2.0852, "step": 10416500 }, { "epoch": 30.15, "learning_rate": 3.492897398341979e-05, "loss": 2.1043, "step": 10417000 }, { "epoch": 30.15, "learning_rate": 3.4928251783067806e-05, "loss": 2.1028, "step": 10417500 }, { "epoch": 30.16, "learning_rate": 3.492752813542053e-05, "loss": 2.0783, "step": 10418000 }, { "epoch": 30.16, "learning_rate": 3.492680448777325e-05, "loss": 2.0817, "step": 10418500 }, { "epoch": 30.16, "learning_rate": 3.492608084012597e-05, "loss": 2.1086, "step": 10419000 }, { "epoch": 30.16, "learning_rate": 3.492536008706929e-05, "loss": 2.0912, "step": 10419500 }, { "epoch": 30.16, "learning_rate": 3.492463643942201e-05, "loss": 2.0897, "step": 10420000 }, { "epoch": 30.16, "learning_rate": 3.492391279177473e-05, "loss": 2.1039, "step": 10420500 }, { "epoch": 30.16, "learning_rate": 3.4923189144127455e-05, "loss": 2.1048, "step": 10421000 }, { "epoch": 30.17, "learning_rate": 3.492246549648018e-05, "loss": 2.0901, "step": 10421500 }, { "epoch": 30.17, "learning_rate": 3.49217418488329e-05, "loss": 2.0923, "step": 10422000 }, { "epoch": 30.17, "learning_rate": 3.492101820118562e-05, "loss": 2.0534, "step": 10422500 }, { "epoch": 30.17, "learning_rate": 3.492029455353835e-05, "loss": 2.098, "step": 10423000 }, { "epoch": 30.17, "learning_rate": 3.4919572353186366e-05, "loss": 2.0857, "step": 10423500 }, { "epoch": 30.17, "learning_rate": 3.4918848705539095e-05, "loss": 2.0862, "step": 10424000 }, { "epoch": 30.17, "learning_rate": 3.491812505789182e-05, "loss": 2.0934, "step": 10424500 }, { "epoch": 30.18, "learning_rate": 3.491740141024454e-05, "loss": 2.112, "step": 10425000 }, { "epoch": 30.18, "learning_rate": 3.491667776259726e-05, "loss": 2.0991, "step": 10425500 }, { "epoch": 30.18, "learning_rate": 3.4915954114949984e-05, "loss": 2.099, "step": 10426000 }, { "epoch": 30.18, "learning_rate": 3.4915230467302706e-05, "loss": 2.0994, "step": 10426500 }, { "epoch": 30.18, "learning_rate": 3.491450826695072e-05, "loss": 2.1091, "step": 10427000 }, { "epoch": 30.18, "learning_rate": 3.4913784619303444e-05, "loss": 2.1217, "step": 10427500 }, { "epoch": 30.18, "learning_rate": 3.4913060971656166e-05, "loss": 2.102, "step": 10428000 }, { "epoch": 30.19, "learning_rate": 3.4912337324008895e-05, "loss": 2.1201, "step": 10428500 }, { "epoch": 30.19, "learning_rate": 3.491161367636162e-05, "loss": 2.097, "step": 10429000 }, { "epoch": 30.19, "learning_rate": 3.491089147600963e-05, "loss": 2.1024, "step": 10429500 }, { "epoch": 30.19, "learning_rate": 3.4910167828362355e-05, "loss": 2.0938, "step": 10430000 }, { "epoch": 30.19, "learning_rate": 3.4909444180715084e-05, "loss": 2.094, "step": 10430500 }, { "epoch": 30.19, "learning_rate": 3.4908720533067807e-05, "loss": 2.1125, "step": 10431000 }, { "epoch": 30.19, "learning_rate": 3.490799688542053e-05, "loss": 2.1181, "step": 10431500 }, { "epoch": 30.2, "learning_rate": 3.490727323777325e-05, "loss": 2.0849, "step": 10432000 }, { "epoch": 30.2, "learning_rate": 3.490655103742127e-05, "loss": 2.0887, "step": 10432500 }, { "epoch": 30.2, "learning_rate": 3.4905827389773996e-05, "loss": 2.084, "step": 10433000 }, { "epoch": 30.2, "learning_rate": 3.490510374212672e-05, "loss": 2.0454, "step": 10433500 }, { "epoch": 30.2, "learning_rate": 3.490438154177473e-05, "loss": 2.1311, "step": 10434000 }, { "epoch": 30.2, "learning_rate": 3.4903657894127456e-05, "loss": 2.118, "step": 10434500 }, { "epoch": 30.21, "learning_rate": 3.490293424648018e-05, "loss": 2.0895, "step": 10435000 }, { "epoch": 30.21, "learning_rate": 3.49022105988329e-05, "loss": 2.0945, "step": 10435500 }, { "epoch": 30.21, "learning_rate": 3.490148695118562e-05, "loss": 2.0768, "step": 10436000 }, { "epoch": 30.21, "learning_rate": 3.490076619812894e-05, "loss": 2.1068, "step": 10436500 }, { "epoch": 30.21, "learning_rate": 3.490004255048166e-05, "loss": 2.1096, "step": 10437000 }, { "epoch": 30.21, "learning_rate": 3.489931890283438e-05, "loss": 2.1142, "step": 10437500 }, { "epoch": 30.21, "learning_rate": 3.4898595255187105e-05, "loss": 2.1015, "step": 10438000 }, { "epoch": 30.22, "learning_rate": 3.489787305483513e-05, "loss": 2.0993, "step": 10438500 }, { "epoch": 30.22, "learning_rate": 3.489714940718785e-05, "loss": 2.0991, "step": 10439000 }, { "epoch": 30.22, "learning_rate": 3.489642575954057e-05, "loss": 2.0732, "step": 10439500 }, { "epoch": 30.22, "learning_rate": 3.4895702111893294e-05, "loss": 2.091, "step": 10440000 }, { "epoch": 30.22, "learning_rate": 3.489497846424602e-05, "loss": 2.0875, "step": 10440500 }, { "epoch": 30.22, "learning_rate": 3.4894254816598745e-05, "loss": 2.0889, "step": 10441000 }, { "epoch": 30.22, "learning_rate": 3.489353116895147e-05, "loss": 2.1032, "step": 10441500 }, { "epoch": 30.23, "learning_rate": 3.489280752130419e-05, "loss": 2.0874, "step": 10442000 }, { "epoch": 30.23, "learning_rate": 3.489208387365691e-05, "loss": 2.0921, "step": 10442500 }, { "epoch": 30.23, "learning_rate": 3.4891360226009634e-05, "loss": 2.0944, "step": 10443000 }, { "epoch": 30.23, "learning_rate": 3.4890636578362356e-05, "loss": 2.0889, "step": 10443500 }, { "epoch": 30.23, "learning_rate": 3.488991293071508e-05, "loss": 2.1125, "step": 10444000 }, { "epoch": 30.23, "learning_rate": 3.48891892830678e-05, "loss": 2.1107, "step": 10444500 }, { "epoch": 30.23, "learning_rate": 3.4888468530011116e-05, "loss": 2.135, "step": 10445000 }, { "epoch": 30.24, "learning_rate": 3.488774488236384e-05, "loss": 2.1006, "step": 10445500 }, { "epoch": 30.24, "learning_rate": 3.488702268201186e-05, "loss": 2.0979, "step": 10446000 }, { "epoch": 30.24, "learning_rate": 3.4886300481659876e-05, "loss": 2.1041, "step": 10446500 }, { "epoch": 30.24, "learning_rate": 3.48855768340126e-05, "loss": 2.1155, "step": 10447000 }, { "epoch": 30.24, "learning_rate": 3.488485318636532e-05, "loss": 2.1172, "step": 10447500 }, { "epoch": 30.24, "learning_rate": 3.488412953871805e-05, "loss": 2.0934, "step": 10448000 }, { "epoch": 30.24, "learning_rate": 3.488340589107077e-05, "loss": 2.1162, "step": 10448500 }, { "epoch": 30.25, "learning_rate": 3.4882682243423494e-05, "loss": 2.1004, "step": 10449000 }, { "epoch": 30.25, "learning_rate": 3.4881958595776217e-05, "loss": 2.1148, "step": 10449500 }, { "epoch": 30.25, "learning_rate": 3.488123494812894e-05, "loss": 2.101, "step": 10450000 }, { "epoch": 30.25, "learning_rate": 3.488051130048166e-05, "loss": 2.0839, "step": 10450500 }, { "epoch": 30.25, "learning_rate": 3.487978765283438e-05, "loss": 2.1271, "step": 10451000 }, { "epoch": 30.25, "learning_rate": 3.4879064005187105e-05, "loss": 2.1042, "step": 10451500 }, { "epoch": 30.25, "learning_rate": 3.487834035753983e-05, "loss": 2.0907, "step": 10452000 }, { "epoch": 30.26, "learning_rate": 3.487761815718785e-05, "loss": 2.0969, "step": 10452500 }, { "epoch": 30.26, "learning_rate": 3.487689450954057e-05, "loss": 2.0737, "step": 10453000 }, { "epoch": 30.26, "learning_rate": 3.48761708618933e-05, "loss": 2.0969, "step": 10453500 }, { "epoch": 30.26, "learning_rate": 3.4875447214246023e-05, "loss": 2.0856, "step": 10454000 }, { "epoch": 30.26, "learning_rate": 3.4874723566598746e-05, "loss": 2.1022, "step": 10454500 }, { "epoch": 30.26, "learning_rate": 3.487399991895147e-05, "loss": 2.0988, "step": 10455000 }, { "epoch": 30.26, "learning_rate": 3.487327627130419e-05, "loss": 2.1063, "step": 10455500 }, { "epoch": 30.27, "learning_rate": 3.487255262365691e-05, "loss": 2.0833, "step": 10456000 }, { "epoch": 30.27, "learning_rate": 3.4871828976009635e-05, "loss": 2.1004, "step": 10456500 }, { "epoch": 30.27, "learning_rate": 3.487110532836236e-05, "loss": 2.1153, "step": 10457000 }, { "epoch": 30.27, "learning_rate": 3.487038168071508e-05, "loss": 2.0634, "step": 10457500 }, { "epoch": 30.27, "learning_rate": 3.48696580330678e-05, "loss": 2.0819, "step": 10458000 }, { "epoch": 30.27, "learning_rate": 3.4868934385420524e-05, "loss": 2.0988, "step": 10458500 }, { "epoch": 30.27, "learning_rate": 3.4868210737773246e-05, "loss": 2.0891, "step": 10459000 }, { "epoch": 30.28, "learning_rate": 3.4867487090125975e-05, "loss": 2.1027, "step": 10459500 }, { "epoch": 30.28, "learning_rate": 3.48667634424787e-05, "loss": 2.089, "step": 10460000 }, { "epoch": 30.28, "learning_rate": 3.4866039794831426e-05, "loss": 2.0912, "step": 10460500 }, { "epoch": 30.28, "learning_rate": 3.486531759447944e-05, "loss": 2.0854, "step": 10461000 }, { "epoch": 30.28, "learning_rate": 3.4864593946832164e-05, "loss": 2.0956, "step": 10461500 }, { "epoch": 30.28, "learning_rate": 3.486387174648018e-05, "loss": 2.106, "step": 10462000 }, { "epoch": 30.28, "learning_rate": 3.48631480988329e-05, "loss": 2.0862, "step": 10462500 }, { "epoch": 30.29, "learning_rate": 3.4862424451185624e-05, "loss": 2.1113, "step": 10463000 }, { "epoch": 30.29, "learning_rate": 3.486170080353835e-05, "loss": 2.1022, "step": 10463500 }, { "epoch": 30.29, "learning_rate": 3.4860977155891075e-05, "loss": 2.0718, "step": 10464000 }, { "epoch": 30.29, "learning_rate": 3.48602535082438e-05, "loss": 2.1125, "step": 10464500 }, { "epoch": 30.29, "learning_rate": 3.485952986059652e-05, "loss": 2.117, "step": 10465000 }, { "epoch": 30.29, "learning_rate": 3.485880621294924e-05, "loss": 2.0922, "step": 10465500 }, { "epoch": 30.29, "learning_rate": 3.4858082565301964e-05, "loss": 2.1053, "step": 10466000 }, { "epoch": 30.3, "learning_rate": 3.485736036494998e-05, "loss": 2.0858, "step": 10466500 }, { "epoch": 30.3, "learning_rate": 3.48566367173027e-05, "loss": 2.098, "step": 10467000 }, { "epoch": 30.3, "learning_rate": 3.4855913069655424e-05, "loss": 2.0941, "step": 10467500 }, { "epoch": 30.3, "learning_rate": 3.485518942200815e-05, "loss": 2.1012, "step": 10468000 }, { "epoch": 30.3, "learning_rate": 3.4854465774360875e-05, "loss": 2.1077, "step": 10468500 }, { "epoch": 30.3, "learning_rate": 3.4853742126713604e-05, "loss": 2.0932, "step": 10469000 }, { "epoch": 30.3, "learning_rate": 3.4853018479066327e-05, "loss": 2.0795, "step": 10469500 }, { "epoch": 30.31, "learning_rate": 3.485229483141905e-05, "loss": 2.099, "step": 10470000 }, { "epoch": 30.31, "learning_rate": 3.485157118377177e-05, "loss": 2.1142, "step": 10470500 }, { "epoch": 30.31, "learning_rate": 3.485084753612449e-05, "loss": 2.089, "step": 10471000 }, { "epoch": 30.31, "learning_rate": 3.4850123888477216e-05, "loss": 2.1337, "step": 10471500 }, { "epoch": 30.31, "learning_rate": 3.484940168812523e-05, "loss": 2.1182, "step": 10472000 }, { "epoch": 30.31, "learning_rate": 3.484867804047795e-05, "loss": 2.0872, "step": 10472500 }, { "epoch": 30.32, "learning_rate": 3.4847955840125976e-05, "loss": 2.0921, "step": 10473000 }, { "epoch": 30.32, "learning_rate": 3.484723363977399e-05, "loss": 2.103, "step": 10473500 }, { "epoch": 30.32, "learning_rate": 3.4846509992126713e-05, "loss": 2.1102, "step": 10474000 }, { "epoch": 30.32, "learning_rate": 3.4845786344479436e-05, "loss": 2.1045, "step": 10474500 }, { "epoch": 30.32, "learning_rate": 3.484506269683216e-05, "loss": 2.1252, "step": 10475000 }, { "epoch": 30.32, "learning_rate": 3.484433904918489e-05, "loss": 2.1201, "step": 10475500 }, { "epoch": 30.32, "learning_rate": 3.484361540153761e-05, "loss": 2.0907, "step": 10476000 }, { "epoch": 30.33, "learning_rate": 3.484289175389033e-05, "loss": 2.0748, "step": 10476500 }, { "epoch": 30.33, "learning_rate": 3.4842169553538354e-05, "loss": 2.126, "step": 10477000 }, { "epoch": 30.33, "learning_rate": 3.4841445905891076e-05, "loss": 2.0855, "step": 10477500 }, { "epoch": 30.33, "learning_rate": 3.48407222582438e-05, "loss": 2.0942, "step": 10478000 }, { "epoch": 30.33, "learning_rate": 3.483999861059652e-05, "loss": 2.1122, "step": 10478500 }, { "epoch": 30.33, "learning_rate": 3.483927496294924e-05, "loss": 2.0802, "step": 10479000 }, { "epoch": 30.33, "learning_rate": 3.4838551315301965e-05, "loss": 2.1032, "step": 10479500 }, { "epoch": 30.34, "learning_rate": 3.483782766765469e-05, "loss": 2.0793, "step": 10480000 }, { "epoch": 30.34, "learning_rate": 3.483710402000741e-05, "loss": 2.0851, "step": 10480500 }, { "epoch": 30.34, "learning_rate": 3.483638037236013e-05, "loss": 2.108, "step": 10481000 }, { "epoch": 30.34, "learning_rate": 3.483565961930345e-05, "loss": 2.1147, "step": 10481500 }, { "epoch": 30.34, "learning_rate": 3.483493741895146e-05, "loss": 2.1067, "step": 10482000 }, { "epoch": 30.34, "learning_rate": 3.4834213771304185e-05, "loss": 2.1116, "step": 10482500 }, { "epoch": 30.34, "learning_rate": 3.48334915709522e-05, "loss": 2.0866, "step": 10483000 }, { "epoch": 30.35, "learning_rate": 3.483276792330493e-05, "loss": 2.1022, "step": 10483500 }, { "epoch": 30.35, "learning_rate": 3.483204427565765e-05, "loss": 2.1074, "step": 10484000 }, { "epoch": 30.35, "learning_rate": 3.483132062801038e-05, "loss": 2.0932, "step": 10484500 }, { "epoch": 30.35, "learning_rate": 3.48305969803631e-05, "loss": 2.1031, "step": 10485000 }, { "epoch": 30.35, "learning_rate": 3.4829873332715825e-05, "loss": 2.0863, "step": 10485500 }, { "epoch": 30.35, "learning_rate": 3.482914968506855e-05, "loss": 2.0826, "step": 10486000 }, { "epoch": 30.35, "learning_rate": 3.482842603742127e-05, "loss": 2.1108, "step": 10486500 }, { "epoch": 30.36, "learning_rate": 3.482770238977399e-05, "loss": 2.1278, "step": 10487000 }, { "epoch": 30.36, "learning_rate": 3.482698018942201e-05, "loss": 2.1054, "step": 10487500 }, { "epoch": 30.36, "learning_rate": 3.482625654177473e-05, "loss": 2.1043, "step": 10488000 }, { "epoch": 30.36, "learning_rate": 3.482553289412745e-05, "loss": 2.0743, "step": 10488500 }, { "epoch": 30.36, "learning_rate": 3.482480924648018e-05, "loss": 2.1235, "step": 10489000 }, { "epoch": 30.36, "learning_rate": 3.4824087046128197e-05, "loss": 2.0896, "step": 10489500 }, { "epoch": 30.36, "learning_rate": 3.482336339848092e-05, "loss": 2.0856, "step": 10490000 }, { "epoch": 30.37, "learning_rate": 3.482263975083364e-05, "loss": 2.1063, "step": 10490500 }, { "epoch": 30.37, "learning_rate": 3.482191610318637e-05, "loss": 2.1214, "step": 10491000 }, { "epoch": 30.37, "learning_rate": 3.482119245553909e-05, "loss": 2.1202, "step": 10491500 }, { "epoch": 30.37, "learning_rate": 3.4820468807891815e-05, "loss": 2.098, "step": 10492000 }, { "epoch": 30.37, "learning_rate": 3.481974516024454e-05, "loss": 2.0809, "step": 10492500 }, { "epoch": 30.37, "learning_rate": 3.481902295989255e-05, "loss": 2.0923, "step": 10493000 }, { "epoch": 30.37, "learning_rate": 3.4818300759540575e-05, "loss": 2.082, "step": 10493500 }, { "epoch": 30.38, "learning_rate": 3.48175771118933e-05, "loss": 2.096, "step": 10494000 }, { "epoch": 30.38, "learning_rate": 3.481685346424602e-05, "loss": 2.1176, "step": 10494500 }, { "epoch": 30.38, "learning_rate": 3.481612981659874e-05, "loss": 2.1086, "step": 10495000 }, { "epoch": 30.38, "learning_rate": 3.4815406168951464e-05, "loss": 2.1244, "step": 10495500 }, { "epoch": 30.38, "learning_rate": 3.4814682521304186e-05, "loss": 2.0735, "step": 10496000 }, { "epoch": 30.38, "learning_rate": 3.481395887365691e-05, "loss": 2.1281, "step": 10496500 }, { "epoch": 30.38, "learning_rate": 3.481323522600963e-05, "loss": 2.0921, "step": 10497000 }, { "epoch": 30.39, "learning_rate": 3.481251302565765e-05, "loss": 2.1079, "step": 10497500 }, { "epoch": 30.39, "learning_rate": 3.4811789378010375e-05, "loss": 2.1079, "step": 10498000 }, { "epoch": 30.39, "learning_rate": 3.4811065730363104e-05, "loss": 2.1152, "step": 10498500 }, { "epoch": 30.39, "learning_rate": 3.4810342082715826e-05, "loss": 2.1016, "step": 10499000 }, { "epoch": 30.39, "learning_rate": 3.480961843506855e-05, "loss": 2.1077, "step": 10499500 }, { "epoch": 30.39, "learning_rate": 3.480889478742127e-05, "loss": 2.1101, "step": 10500000 }, { "epoch": 30.39, "learning_rate": 3.480817113977399e-05, "loss": 2.0933, "step": 10500500 }, { "epoch": 30.4, "learning_rate": 3.4807447492126715e-05, "loss": 2.1145, "step": 10501000 }, { "epoch": 30.4, "learning_rate": 3.480672384447944e-05, "loss": 2.0921, "step": 10501500 }, { "epoch": 30.4, "learning_rate": 3.480600019683216e-05, "loss": 2.1182, "step": 10502000 }, { "epoch": 30.4, "learning_rate": 3.480527654918488e-05, "loss": 2.1088, "step": 10502500 }, { "epoch": 30.4, "learning_rate": 3.4804552901537604e-05, "loss": 2.1015, "step": 10503000 }, { "epoch": 30.4, "learning_rate": 3.480382925389033e-05, "loss": 2.1247, "step": 10503500 }, { "epoch": 30.4, "learning_rate": 3.480310705353835e-05, "loss": 2.0996, "step": 10504000 }, { "epoch": 30.41, "learning_rate": 3.480238340589107e-05, "loss": 2.1378, "step": 10504500 }, { "epoch": 30.41, "learning_rate": 3.480165975824379e-05, "loss": 2.1126, "step": 10505000 }, { "epoch": 30.41, "learning_rate": 3.4800937557891815e-05, "loss": 2.0946, "step": 10505500 }, { "epoch": 30.41, "learning_rate": 3.480021391024454e-05, "loss": 2.1013, "step": 10506000 }, { "epoch": 30.41, "learning_rate": 3.479949026259726e-05, "loss": 2.1005, "step": 10506500 }, { "epoch": 30.41, "learning_rate": 3.479876661494998e-05, "loss": 2.0826, "step": 10507000 }, { "epoch": 30.41, "learning_rate": 3.4798042967302704e-05, "loss": 2.1098, "step": 10507500 }, { "epoch": 30.42, "learning_rate": 3.479731931965543e-05, "loss": 2.121, "step": 10508000 }, { "epoch": 30.42, "learning_rate": 3.4796595672008155e-05, "loss": 2.1286, "step": 10508500 }, { "epoch": 30.42, "learning_rate": 3.479587202436088e-05, "loss": 2.0955, "step": 10509000 }, { "epoch": 30.42, "learning_rate": 3.47951483767136e-05, "loss": 2.1016, "step": 10509500 }, { "epoch": 30.42, "learning_rate": 3.4794426176361616e-05, "loss": 2.1107, "step": 10510000 }, { "epoch": 30.42, "learning_rate": 3.479370252871434e-05, "loss": 2.0876, "step": 10510500 }, { "epoch": 30.43, "learning_rate": 3.479297888106706e-05, "loss": 2.0942, "step": 10511000 }, { "epoch": 30.43, "learning_rate": 3.479225523341978e-05, "loss": 2.1061, "step": 10511500 }, { "epoch": 30.43, "learning_rate": 3.4791531585772504e-05, "loss": 2.1005, "step": 10512000 }, { "epoch": 30.43, "learning_rate": 3.4790807938125233e-05, "loss": 2.1172, "step": 10512500 }, { "epoch": 30.43, "learning_rate": 3.4790084290477956e-05, "loss": 2.0962, "step": 10513000 }, { "epoch": 30.43, "learning_rate": 3.4789360642830685e-05, "loss": 2.0949, "step": 10513500 }, { "epoch": 30.43, "learning_rate": 3.478863699518341e-05, "loss": 2.0859, "step": 10514000 }, { "epoch": 30.44, "learning_rate": 3.478791334753613e-05, "loss": 2.0992, "step": 10514500 }, { "epoch": 30.44, "learning_rate": 3.478718969988885e-05, "loss": 2.0895, "step": 10515000 }, { "epoch": 30.44, "learning_rate": 3.478646749953687e-05, "loss": 2.1089, "step": 10515500 }, { "epoch": 30.44, "learning_rate": 3.478574385188959e-05, "loss": 2.1144, "step": 10516000 }, { "epoch": 30.44, "learning_rate": 3.478502020424231e-05, "loss": 2.101, "step": 10516500 }, { "epoch": 30.44, "learning_rate": 3.4784296556595034e-05, "loss": 2.1223, "step": 10517000 }, { "epoch": 30.44, "learning_rate": 3.4783572908947756e-05, "loss": 2.1121, "step": 10517500 }, { "epoch": 30.45, "learning_rate": 3.4782849261300485e-05, "loss": 2.1003, "step": 10518000 }, { "epoch": 30.45, "learning_rate": 3.478212561365321e-05, "loss": 2.0867, "step": 10518500 }, { "epoch": 30.45, "learning_rate": 3.478140196600593e-05, "loss": 2.1225, "step": 10519000 }, { "epoch": 30.45, "learning_rate": 3.4780679765653945e-05, "loss": 2.1012, "step": 10519500 }, { "epoch": 30.45, "learning_rate": 3.4779956118006674e-05, "loss": 2.085, "step": 10520000 }, { "epoch": 30.45, "learning_rate": 3.4779232470359396e-05, "loss": 2.1006, "step": 10520500 }, { "epoch": 30.45, "learning_rate": 3.477850882271212e-05, "loss": 2.1184, "step": 10521000 }, { "epoch": 30.46, "learning_rate": 3.4777786622360134e-05, "loss": 2.0995, "step": 10521500 }, { "epoch": 30.46, "learning_rate": 3.4777062974712856e-05, "loss": 2.0993, "step": 10522000 }, { "epoch": 30.46, "learning_rate": 3.4776339327065585e-05, "loss": 2.1256, "step": 10522500 }, { "epoch": 30.46, "learning_rate": 3.477561567941831e-05, "loss": 2.1224, "step": 10523000 }, { "epoch": 30.46, "learning_rate": 3.477489347906632e-05, "loss": 2.1225, "step": 10523500 }, { "epoch": 30.46, "learning_rate": 3.4774169831419045e-05, "loss": 2.1086, "step": 10524000 }, { "epoch": 30.46, "learning_rate": 3.477344618377177e-05, "loss": 2.0947, "step": 10524500 }, { "epoch": 30.47, "learning_rate": 3.477272253612449e-05, "loss": 2.0866, "step": 10525000 }, { "epoch": 30.47, "learning_rate": 3.477200033577251e-05, "loss": 2.1321, "step": 10525500 }, { "epoch": 30.47, "learning_rate": 3.4771276688125234e-05, "loss": 2.1213, "step": 10526000 }, { "epoch": 30.47, "learning_rate": 3.4770553040477956e-05, "loss": 2.0891, "step": 10526500 }, { "epoch": 30.47, "learning_rate": 3.476982939283068e-05, "loss": 2.1095, "step": 10527000 }, { "epoch": 30.47, "learning_rate": 3.476910574518341e-05, "loss": 2.0887, "step": 10527500 }, { "epoch": 30.47, "learning_rate": 3.476838209753613e-05, "loss": 2.1086, "step": 10528000 }, { "epoch": 30.48, "learning_rate": 3.476765844988885e-05, "loss": 2.1144, "step": 10528500 }, { "epoch": 30.48, "learning_rate": 3.4766934802241574e-05, "loss": 2.0794, "step": 10529000 }, { "epoch": 30.48, "learning_rate": 3.47662111545943e-05, "loss": 2.1258, "step": 10529500 }, { "epoch": 30.48, "learning_rate": 3.476548895424231e-05, "loss": 2.1027, "step": 10530000 }, { "epoch": 30.48, "learning_rate": 3.4764765306595034e-05, "loss": 2.0945, "step": 10530500 }, { "epoch": 30.48, "learning_rate": 3.476404310624306e-05, "loss": 2.1142, "step": 10531000 }, { "epoch": 30.48, "learning_rate": 3.476331945859578e-05, "loss": 2.0891, "step": 10531500 }, { "epoch": 30.49, "learning_rate": 3.47625958109485e-05, "loss": 2.0964, "step": 10532000 }, { "epoch": 30.49, "learning_rate": 3.4761872163301223e-05, "loss": 2.1022, "step": 10532500 }, { "epoch": 30.49, "learning_rate": 3.476114996294924e-05, "loss": 2.0993, "step": 10533000 }, { "epoch": 30.49, "learning_rate": 3.476042631530196e-05, "loss": 2.0903, "step": 10533500 }, { "epoch": 30.49, "learning_rate": 3.4759702667654684e-05, "loss": 2.1145, "step": 10534000 }, { "epoch": 30.49, "learning_rate": 3.475897902000741e-05, "loss": 2.1025, "step": 10534500 }, { "epoch": 30.49, "learning_rate": 3.4758255372360135e-05, "loss": 2.0829, "step": 10535000 }, { "epoch": 30.5, "learning_rate": 3.4757531724712864e-05, "loss": 2.0857, "step": 10535500 }, { "epoch": 30.5, "learning_rate": 3.4756808077065586e-05, "loss": 2.0852, "step": 10536000 }, { "epoch": 30.5, "learning_rate": 3.475608442941831e-05, "loss": 2.1172, "step": 10536500 }, { "epoch": 30.5, "learning_rate": 3.475536078177103e-05, "loss": 2.088, "step": 10537000 }, { "epoch": 30.5, "learning_rate": 3.475463713412375e-05, "loss": 2.1164, "step": 10537500 }, { "epoch": 30.5, "learning_rate": 3.4753913486476475e-05, "loss": 2.108, "step": 10538000 }, { "epoch": 30.5, "learning_rate": 3.47531898388292e-05, "loss": 2.1157, "step": 10538500 }, { "epoch": 30.51, "learning_rate": 3.475246619118192e-05, "loss": 2.0807, "step": 10539000 }, { "epoch": 30.51, "learning_rate": 3.475174254353464e-05, "loss": 2.0747, "step": 10539500 }, { "epoch": 30.51, "learning_rate": 3.4751018895887364e-05, "loss": 2.1153, "step": 10540000 }, { "epoch": 30.51, "learning_rate": 3.4750295248240086e-05, "loss": 2.1244, "step": 10540500 }, { "epoch": 30.51, "learning_rate": 3.474957304788811e-05, "loss": 2.0827, "step": 10541000 }, { "epoch": 30.51, "learning_rate": 3.474884940024083e-05, "loss": 2.0851, "step": 10541500 }, { "epoch": 30.51, "learning_rate": 3.474812575259355e-05, "loss": 2.0935, "step": 10542000 }, { "epoch": 30.52, "learning_rate": 3.474740210494628e-05, "loss": 2.1113, "step": 10542500 }, { "epoch": 30.52, "learning_rate": 3.4746678457299004e-05, "loss": 2.1156, "step": 10543000 }, { "epoch": 30.52, "learning_rate": 3.474595770424231e-05, "loss": 2.1006, "step": 10543500 }, { "epoch": 30.52, "learning_rate": 3.4745234056595035e-05, "loss": 2.0893, "step": 10544000 }, { "epoch": 30.52, "learning_rate": 3.474451185624306e-05, "loss": 2.108, "step": 10544500 }, { "epoch": 30.52, "learning_rate": 3.474378820859578e-05, "loss": 2.1079, "step": 10545000 }, { "epoch": 30.52, "learning_rate": 3.47430645609485e-05, "loss": 2.1155, "step": 10545500 }, { "epoch": 30.53, "learning_rate": 3.4742340913301224e-05, "loss": 2.1003, "step": 10546000 }, { "epoch": 30.53, "learning_rate": 3.4741617265653947e-05, "loss": 2.1159, "step": 10546500 }, { "epoch": 30.53, "learning_rate": 3.474089506530196e-05, "loss": 2.1369, "step": 10547000 }, { "epoch": 30.53, "learning_rate": 3.474017141765469e-05, "loss": 2.0783, "step": 10547500 }, { "epoch": 30.53, "learning_rate": 3.473944777000741e-05, "loss": 2.1329, "step": 10548000 }, { "epoch": 30.53, "learning_rate": 3.4738724122360136e-05, "loss": 2.1144, "step": 10548500 }, { "epoch": 30.54, "learning_rate": 3.473800047471286e-05, "loss": 2.1095, "step": 10549000 }, { "epoch": 30.54, "learning_rate": 3.473727682706558e-05, "loss": 2.1005, "step": 10549500 }, { "epoch": 30.54, "learning_rate": 3.473655317941831e-05, "loss": 2.1084, "step": 10550000 }, { "epoch": 30.54, "learning_rate": 3.4735830979066325e-05, "loss": 2.0824, "step": 10550500 }, { "epoch": 30.54, "learning_rate": 3.473511022600964e-05, "loss": 2.1026, "step": 10551000 }, { "epoch": 30.54, "learning_rate": 3.473438657836236e-05, "loss": 2.0889, "step": 10551500 }, { "epoch": 30.54, "learning_rate": 3.4733662930715085e-05, "loss": 2.1154, "step": 10552000 }, { "epoch": 30.55, "learning_rate": 3.473293928306781e-05, "loss": 2.0945, "step": 10552500 }, { "epoch": 30.55, "learning_rate": 3.473221563542053e-05, "loss": 2.1108, "step": 10553000 }, { "epoch": 30.55, "learning_rate": 3.473149198777325e-05, "loss": 2.0928, "step": 10553500 }, { "epoch": 30.55, "learning_rate": 3.4730768340125974e-05, "loss": 2.1428, "step": 10554000 }, { "epoch": 30.55, "learning_rate": 3.4730044692478696e-05, "loss": 2.103, "step": 10554500 }, { "epoch": 30.55, "learning_rate": 3.472932104483142e-05, "loss": 2.1319, "step": 10555000 }, { "epoch": 30.55, "learning_rate": 3.472859739718414e-05, "loss": 2.1045, "step": 10555500 }, { "epoch": 30.56, "learning_rate": 3.472787374953686e-05, "loss": 2.1106, "step": 10556000 }, { "epoch": 30.56, "learning_rate": 3.4727151549184885e-05, "loss": 2.1103, "step": 10556500 }, { "epoch": 30.56, "learning_rate": 3.472642790153761e-05, "loss": 2.1271, "step": 10557000 }, { "epoch": 30.56, "learning_rate": 3.472570425389033e-05, "loss": 2.1034, "step": 10557500 }, { "epoch": 30.56, "learning_rate": 3.472498060624306e-05, "loss": 2.088, "step": 10558000 }, { "epoch": 30.56, "learning_rate": 3.472425695859578e-05, "loss": 2.1319, "step": 10558500 }, { "epoch": 30.56, "learning_rate": 3.47235333109485e-05, "loss": 2.1103, "step": 10559000 }, { "epoch": 30.57, "learning_rate": 3.472281111059652e-05, "loss": 2.1127, "step": 10559500 }, { "epoch": 30.57, "learning_rate": 3.472208746294924e-05, "loss": 2.1161, "step": 10560000 }, { "epoch": 30.57, "learning_rate": 3.472136381530196e-05, "loss": 2.1091, "step": 10560500 }, { "epoch": 30.57, "learning_rate": 3.472064016765469e-05, "loss": 2.0902, "step": 10561000 }, { "epoch": 30.57, "learning_rate": 3.4719916520007414e-05, "loss": 2.104, "step": 10561500 }, { "epoch": 30.57, "learning_rate": 3.4719192872360136e-05, "loss": 2.1171, "step": 10562000 }, { "epoch": 30.57, "learning_rate": 3.471846922471286e-05, "loss": 2.0987, "step": 10562500 }, { "epoch": 30.58, "learning_rate": 3.4717747024360874e-05, "loss": 2.097, "step": 10563000 }, { "epoch": 30.58, "learning_rate": 3.4717023376713596e-05, "loss": 2.0914, "step": 10563500 }, { "epoch": 30.58, "learning_rate": 3.471629972906632e-05, "loss": 2.113, "step": 10564000 }, { "epoch": 30.58, "learning_rate": 3.471557608141904e-05, "loss": 2.1005, "step": 10564500 }, { "epoch": 30.58, "learning_rate": 3.471485243377176e-05, "loss": 2.1241, "step": 10565000 }, { "epoch": 30.58, "learning_rate": 3.471412878612449e-05, "loss": 2.0977, "step": 10565500 }, { "epoch": 30.58, "learning_rate": 3.4713405138477214e-05, "loss": 2.0874, "step": 10566000 }, { "epoch": 30.59, "learning_rate": 3.471268149082994e-05, "loss": 2.0743, "step": 10566500 }, { "epoch": 30.59, "learning_rate": 3.4711957843182666e-05, "loss": 2.079, "step": 10567000 }, { "epoch": 30.59, "learning_rate": 3.471123564283068e-05, "loss": 2.1048, "step": 10567500 }, { "epoch": 30.59, "learning_rate": 3.47105119951834e-05, "loss": 2.0906, "step": 10568000 }, { "epoch": 30.59, "learning_rate": 3.470978979483142e-05, "loss": 2.0914, "step": 10568500 }, { "epoch": 30.59, "learning_rate": 3.470906614718414e-05, "loss": 2.1006, "step": 10569000 }, { "epoch": 30.59, "learning_rate": 3.470834249953686e-05, "loss": 2.1233, "step": 10569500 }, { "epoch": 30.6, "learning_rate": 3.470761885188959e-05, "loss": 2.093, "step": 10570000 }, { "epoch": 30.6, "learning_rate": 3.4706895204242315e-05, "loss": 2.1286, "step": 10570500 }, { "epoch": 30.6, "learning_rate": 3.470617155659504e-05, "loss": 2.1026, "step": 10571000 }, { "epoch": 30.6, "learning_rate": 3.470544790894776e-05, "loss": 2.0967, "step": 10571500 }, { "epoch": 30.6, "learning_rate": 3.470472426130048e-05, "loss": 2.0947, "step": 10572000 }, { "epoch": 30.6, "learning_rate": 3.470400061365321e-05, "loss": 2.107, "step": 10572500 }, { "epoch": 30.6, "learning_rate": 3.470327696600593e-05, "loss": 2.0974, "step": 10573000 }, { "epoch": 30.61, "learning_rate": 3.4702553318358655e-05, "loss": 2.0987, "step": 10573500 }, { "epoch": 30.61, "learning_rate": 3.470183111800667e-05, "loss": 2.1058, "step": 10574000 }, { "epoch": 30.61, "learning_rate": 3.470110747035939e-05, "loss": 2.0925, "step": 10574500 }, { "epoch": 30.61, "learning_rate": 3.4700383822712115e-05, "loss": 2.1235, "step": 10575000 }, { "epoch": 30.61, "learning_rate": 3.4699660175064844e-05, "loss": 2.0879, "step": 10575500 }, { "epoch": 30.61, "learning_rate": 3.4698936527417566e-05, "loss": 2.1142, "step": 10576000 }, { "epoch": 30.61, "learning_rate": 3.469821287977029e-05, "loss": 2.0795, "step": 10576500 }, { "epoch": 30.62, "learning_rate": 3.469748923212301e-05, "loss": 2.1155, "step": 10577000 }, { "epoch": 30.62, "learning_rate": 3.469676558447573e-05, "loss": 2.1153, "step": 10577500 }, { "epoch": 30.62, "learning_rate": 3.469604338412375e-05, "loss": 2.1202, "step": 10578000 }, { "epoch": 30.62, "learning_rate": 3.469532118377177e-05, "loss": 2.1204, "step": 10578500 }, { "epoch": 30.62, "learning_rate": 3.469459753612449e-05, "loss": 2.1243, "step": 10579000 }, { "epoch": 30.62, "learning_rate": 3.4693873888477215e-05, "loss": 2.112, "step": 10579500 }, { "epoch": 30.62, "learning_rate": 3.4693150240829944e-05, "loss": 2.1113, "step": 10580000 }, { "epoch": 30.63, "learning_rate": 3.4692426593182666e-05, "loss": 2.0985, "step": 10580500 }, { "epoch": 30.63, "learning_rate": 3.469170294553539e-05, "loss": 2.1281, "step": 10581000 }, { "epoch": 30.63, "learning_rate": 3.469097929788811e-05, "loss": 2.0971, "step": 10581500 }, { "epoch": 30.63, "learning_rate": 3.469025565024083e-05, "loss": 2.1048, "step": 10582000 }, { "epoch": 30.63, "learning_rate": 3.4689532002593555e-05, "loss": 2.1022, "step": 10582500 }, { "epoch": 30.63, "learning_rate": 3.468880835494628e-05, "loss": 2.1156, "step": 10583000 }, { "epoch": 30.63, "learning_rate": 3.4688084707299e-05, "loss": 2.0974, "step": 10583500 }, { "epoch": 30.64, "learning_rate": 3.468736105965172e-05, "loss": 2.1066, "step": 10584000 }, { "epoch": 30.64, "learning_rate": 3.4686637412004444e-05, "loss": 2.0973, "step": 10584500 }, { "epoch": 30.64, "learning_rate": 3.4685915211652467e-05, "loss": 2.1149, "step": 10585000 }, { "epoch": 30.64, "learning_rate": 3.468519301130048e-05, "loss": 2.0976, "step": 10585500 }, { "epoch": 30.64, "learning_rate": 3.4684469363653204e-05, "loss": 2.1073, "step": 10586000 }, { "epoch": 30.64, "learning_rate": 3.4683745716005927e-05, "loss": 2.1206, "step": 10586500 }, { "epoch": 30.65, "learning_rate": 3.468302206835865e-05, "loss": 2.1014, "step": 10587000 }, { "epoch": 30.65, "learning_rate": 3.468229842071138e-05, "loss": 2.1026, "step": 10587500 }, { "epoch": 30.65, "learning_rate": 3.46815747730641e-05, "loss": 2.0665, "step": 10588000 }, { "epoch": 30.65, "learning_rate": 3.468085112541682e-05, "loss": 2.0805, "step": 10588500 }, { "epoch": 30.65, "learning_rate": 3.4680127477769545e-05, "loss": 2.1147, "step": 10589000 }, { "epoch": 30.65, "learning_rate": 3.467940383012227e-05, "loss": 2.0928, "step": 10589500 }, { "epoch": 30.65, "learning_rate": 3.467868162977029e-05, "loss": 2.1106, "step": 10590000 }, { "epoch": 30.66, "learning_rate": 3.467795798212301e-05, "loss": 2.099, "step": 10590500 }, { "epoch": 30.66, "learning_rate": 3.4677234334475734e-05, "loss": 2.1048, "step": 10591000 }, { "epoch": 30.66, "learning_rate": 3.4676510686828456e-05, "loss": 2.0968, "step": 10591500 }, { "epoch": 30.66, "learning_rate": 3.467578848647647e-05, "loss": 2.1245, "step": 10592000 }, { "epoch": 30.66, "learning_rate": 3.4675066286124494e-05, "loss": 2.107, "step": 10592500 }, { "epoch": 30.66, "learning_rate": 3.4674342638477216e-05, "loss": 2.0908, "step": 10593000 }, { "epoch": 30.66, "learning_rate": 3.467362043812523e-05, "loss": 2.0927, "step": 10593500 }, { "epoch": 30.67, "learning_rate": 3.4672896790477954e-05, "loss": 2.1311, "step": 10594000 }, { "epoch": 30.67, "learning_rate": 3.4672173142830676e-05, "loss": 2.1037, "step": 10594500 }, { "epoch": 30.67, "learning_rate": 3.4671449495183405e-05, "loss": 2.123, "step": 10595000 }, { "epoch": 30.67, "learning_rate": 3.467072729483142e-05, "loss": 2.1299, "step": 10595500 }, { "epoch": 30.67, "learning_rate": 3.467000364718414e-05, "loss": 2.0969, "step": 10596000 }, { "epoch": 30.67, "learning_rate": 3.466927999953687e-05, "loss": 2.1337, "step": 10596500 }, { "epoch": 30.67, "learning_rate": 3.4668556351889594e-05, "loss": 2.1021, "step": 10597000 }, { "epoch": 30.68, "learning_rate": 3.4667832704242316e-05, "loss": 2.1204, "step": 10597500 }, { "epoch": 30.68, "learning_rate": 3.466710905659504e-05, "loss": 2.1056, "step": 10598000 }, { "epoch": 30.68, "learning_rate": 3.466638540894776e-05, "loss": 2.1403, "step": 10598500 }, { "epoch": 30.68, "learning_rate": 3.466566176130048e-05, "loss": 2.1092, "step": 10599000 }, { "epoch": 30.68, "learning_rate": 3.46649395609485e-05, "loss": 2.1259, "step": 10599500 }, { "epoch": 30.68, "learning_rate": 3.466421591330122e-05, "loss": 2.1083, "step": 10600000 }, { "epoch": 30.68, "learning_rate": 3.466349226565394e-05, "loss": 2.1092, "step": 10600500 }, { "epoch": 30.69, "learning_rate": 3.466276861800667e-05, "loss": 2.1048, "step": 10601000 }, { "epoch": 30.69, "learning_rate": 3.4662044970359394e-05, "loss": 2.1048, "step": 10601500 }, { "epoch": 30.69, "learning_rate": 3.466132277000741e-05, "loss": 2.1221, "step": 10602000 }, { "epoch": 30.69, "learning_rate": 3.466059912236013e-05, "loss": 2.081, "step": 10602500 }, { "epoch": 30.69, "learning_rate": 3.465987547471286e-05, "loss": 2.1052, "step": 10603000 }, { "epoch": 30.69, "learning_rate": 3.465915182706558e-05, "loss": 2.107, "step": 10603500 }, { "epoch": 30.69, "learning_rate": 3.4658428179418305e-05, "loss": 2.0856, "step": 10604000 }, { "epoch": 30.7, "learning_rate": 3.465770453177103e-05, "loss": 2.115, "step": 10604500 }, { "epoch": 30.7, "learning_rate": 3.465698088412375e-05, "loss": 2.1242, "step": 10605000 }, { "epoch": 30.7, "learning_rate": 3.465625723647647e-05, "loss": 2.1285, "step": 10605500 }, { "epoch": 30.7, "learning_rate": 3.4655535036124494e-05, "loss": 2.1191, "step": 10606000 }, { "epoch": 30.7, "learning_rate": 3.465481138847722e-05, "loss": 2.1342, "step": 10606500 }, { "epoch": 30.7, "learning_rate": 3.465408774082994e-05, "loss": 2.0741, "step": 10607000 }, { "epoch": 30.7, "learning_rate": 3.465336409318266e-05, "loss": 2.1249, "step": 10607500 }, { "epoch": 30.71, "learning_rate": 3.4652640445535383e-05, "loss": 2.1406, "step": 10608000 }, { "epoch": 30.71, "learning_rate": 3.4651916797888106e-05, "loss": 2.1068, "step": 10608500 }, { "epoch": 30.71, "learning_rate": 3.465119315024083e-05, "loss": 2.112, "step": 10609000 }, { "epoch": 30.71, "learning_rate": 3.465047094988885e-05, "loss": 2.0934, "step": 10609500 }, { "epoch": 30.71, "learning_rate": 3.464974730224157e-05, "loss": 2.1061, "step": 10610000 }, { "epoch": 30.71, "learning_rate": 3.46490236545943e-05, "loss": 2.0968, "step": 10610500 }, { "epoch": 30.71, "learning_rate": 3.464830145424232e-05, "loss": 2.1406, "step": 10611000 }, { "epoch": 30.72, "learning_rate": 3.464757780659504e-05, "loss": 2.1136, "step": 10611500 }, { "epoch": 30.72, "learning_rate": 3.464685415894776e-05, "loss": 2.1141, "step": 10612000 }, { "epoch": 30.72, "learning_rate": 3.4646130511300484e-05, "loss": 2.125, "step": 10612500 }, { "epoch": 30.72, "learning_rate": 3.46454083109485e-05, "loss": 2.094, "step": 10613000 }, { "epoch": 30.72, "learning_rate": 3.464468466330122e-05, "loss": 2.1236, "step": 10613500 }, { "epoch": 30.72, "learning_rate": 3.464396101565395e-05, "loss": 2.1084, "step": 10614000 }, { "epoch": 30.72, "learning_rate": 3.464323736800667e-05, "loss": 2.108, "step": 10614500 }, { "epoch": 30.73, "learning_rate": 3.4642513720359395e-05, "loss": 2.1089, "step": 10615000 }, { "epoch": 30.73, "learning_rate": 3.464179007271212e-05, "loss": 2.126, "step": 10615500 }, { "epoch": 30.73, "learning_rate": 3.464106642506484e-05, "loss": 2.0948, "step": 10616000 }, { "epoch": 30.73, "learning_rate": 3.464034277741756e-05, "loss": 2.1059, "step": 10616500 }, { "epoch": 30.73, "learning_rate": 3.4639619129770284e-05, "loss": 2.1235, "step": 10617000 }, { "epoch": 30.73, "learning_rate": 3.4638896929418306e-05, "loss": 2.0988, "step": 10617500 }, { "epoch": 30.73, "learning_rate": 3.463817472906632e-05, "loss": 2.1331, "step": 10618000 }, { "epoch": 30.74, "learning_rate": 3.463745108141905e-05, "loss": 2.1103, "step": 10618500 }, { "epoch": 30.74, "learning_rate": 3.4636728881067066e-05, "loss": 2.1302, "step": 10619000 }, { "epoch": 30.74, "learning_rate": 3.463600523341979e-05, "loss": 2.1352, "step": 10619500 }, { "epoch": 30.74, "learning_rate": 3.463528158577251e-05, "loss": 2.134, "step": 10620000 }, { "epoch": 30.74, "learning_rate": 3.463455793812523e-05, "loss": 2.0998, "step": 10620500 }, { "epoch": 30.74, "learning_rate": 3.4633834290477955e-05, "loss": 2.0873, "step": 10621000 }, { "epoch": 30.74, "learning_rate": 3.463311064283068e-05, "loss": 2.1095, "step": 10621500 }, { "epoch": 30.75, "learning_rate": 3.46323869951834e-05, "loss": 2.0785, "step": 10622000 }, { "epoch": 30.75, "learning_rate": 3.463166334753612e-05, "loss": 2.1043, "step": 10622500 }, { "epoch": 30.75, "learning_rate": 3.4630941147184144e-05, "loss": 2.1015, "step": 10623000 }, { "epoch": 30.75, "learning_rate": 3.4630217499536867e-05, "loss": 2.1199, "step": 10623500 }, { "epoch": 30.75, "learning_rate": 3.462949385188959e-05, "loss": 2.0892, "step": 10624000 }, { "epoch": 30.75, "learning_rate": 3.462877020424231e-05, "loss": 2.1227, "step": 10624500 }, { "epoch": 30.76, "learning_rate": 3.462804655659504e-05, "loss": 2.1063, "step": 10625000 }, { "epoch": 30.76, "learning_rate": 3.462732290894776e-05, "loss": 2.1008, "step": 10625500 }, { "epoch": 30.76, "learning_rate": 3.4626599261300484e-05, "loss": 2.1084, "step": 10626000 }, { "epoch": 30.76, "learning_rate": 3.462587561365321e-05, "loss": 2.082, "step": 10626500 }, { "epoch": 30.76, "learning_rate": 3.462515341330122e-05, "loss": 2.1205, "step": 10627000 }, { "epoch": 30.76, "learning_rate": 3.4624431212949245e-05, "loss": 2.0917, "step": 10627500 }, { "epoch": 30.76, "learning_rate": 3.462370756530197e-05, "loss": 2.0926, "step": 10628000 }, { "epoch": 30.77, "learning_rate": 3.462298391765469e-05, "loss": 2.1074, "step": 10628500 }, { "epoch": 30.77, "learning_rate": 3.462226027000741e-05, "loss": 2.1121, "step": 10629000 }, { "epoch": 30.77, "learning_rate": 3.4621536622360134e-05, "loss": 2.0819, "step": 10629500 }, { "epoch": 30.77, "learning_rate": 3.4620812974712856e-05, "loss": 2.1125, "step": 10630000 }, { "epoch": 30.77, "learning_rate": 3.462008932706558e-05, "loss": 2.1056, "step": 10630500 }, { "epoch": 30.77, "learning_rate": 3.46193656794183e-05, "loss": 2.1028, "step": 10631000 }, { "epoch": 30.77, "learning_rate": 3.461864203177102e-05, "loss": 2.1043, "step": 10631500 }, { "epoch": 30.78, "learning_rate": 3.461791838412375e-05, "loss": 2.0865, "step": 10632000 }, { "epoch": 30.78, "learning_rate": 3.461719618377177e-05, "loss": 2.1143, "step": 10632500 }, { "epoch": 30.78, "learning_rate": 3.4616472536124496e-05, "loss": 2.0973, "step": 10633000 }, { "epoch": 30.78, "learning_rate": 3.461574888847722e-05, "loss": 2.1167, "step": 10633500 }, { "epoch": 30.78, "learning_rate": 3.461502524082994e-05, "loss": 2.0848, "step": 10634000 }, { "epoch": 30.78, "learning_rate": 3.461430159318266e-05, "loss": 2.1045, "step": 10634500 }, { "epoch": 30.78, "learning_rate": 3.4613577945535385e-05, "loss": 2.1214, "step": 10635000 }, { "epoch": 30.79, "learning_rate": 3.461285429788811e-05, "loss": 2.1033, "step": 10635500 }, { "epoch": 30.79, "learning_rate": 3.461213065024083e-05, "loss": 2.1035, "step": 10636000 }, { "epoch": 30.79, "learning_rate": 3.461140844988885e-05, "loss": 2.0896, "step": 10636500 }, { "epoch": 30.79, "learning_rate": 3.4610684802241574e-05, "loss": 2.1127, "step": 10637000 }, { "epoch": 30.79, "learning_rate": 3.4609961154594296e-05, "loss": 2.1077, "step": 10637500 }, { "epoch": 30.79, "learning_rate": 3.460923750694702e-05, "loss": 2.1448, "step": 10638000 }, { "epoch": 30.79, "learning_rate": 3.4608515306595034e-05, "loss": 2.1095, "step": 10638500 }, { "epoch": 30.8, "learning_rate": 3.4607791658947756e-05, "loss": 2.0712, "step": 10639000 }, { "epoch": 30.8, "learning_rate": 3.460706801130048e-05, "loss": 2.1032, "step": 10639500 }, { "epoch": 30.8, "learning_rate": 3.460634436365321e-05, "loss": 2.1296, "step": 10640000 }, { "epoch": 30.8, "learning_rate": 3.460562071600593e-05, "loss": 2.1002, "step": 10640500 }, { "epoch": 30.8, "learning_rate": 3.460489851565395e-05, "loss": 2.1017, "step": 10641000 }, { "epoch": 30.8, "learning_rate": 3.4604174868006674e-05, "loss": 2.1172, "step": 10641500 }, { "epoch": 30.8, "learning_rate": 3.4603451220359397e-05, "loss": 2.1023, "step": 10642000 }, { "epoch": 30.81, "learning_rate": 3.460272902000741e-05, "loss": 2.1137, "step": 10642500 }, { "epoch": 30.81, "learning_rate": 3.4602005372360134e-05, "loss": 2.1161, "step": 10643000 }, { "epoch": 30.81, "learning_rate": 3.460128317200815e-05, "loss": 2.1348, "step": 10643500 }, { "epoch": 30.81, "learning_rate": 3.460055952436088e-05, "loss": 2.1076, "step": 10644000 }, { "epoch": 30.81, "learning_rate": 3.45998358767136e-05, "loss": 2.1262, "step": 10644500 }, { "epoch": 30.81, "learning_rate": 3.459911222906632e-05, "loss": 2.0831, "step": 10645000 }, { "epoch": 30.81, "learning_rate": 3.4598388581419046e-05, "loss": 2.1251, "step": 10645500 }, { "epoch": 30.82, "learning_rate": 3.459766493377177e-05, "loss": 2.1163, "step": 10646000 }, { "epoch": 30.82, "learning_rate": 3.459694128612449e-05, "loss": 2.1153, "step": 10646500 }, { "epoch": 30.82, "learning_rate": 3.459621763847721e-05, "loss": 2.0995, "step": 10647000 }, { "epoch": 30.82, "learning_rate": 3.459549399082994e-05, "loss": 2.1295, "step": 10647500 }, { "epoch": 30.82, "learning_rate": 3.4594770343182664e-05, "loss": 2.0819, "step": 10648000 }, { "epoch": 30.82, "learning_rate": 3.4594046695535386e-05, "loss": 2.1078, "step": 10648500 }, { "epoch": 30.82, "learning_rate": 3.459332304788811e-05, "loss": 2.1318, "step": 10649000 }, { "epoch": 30.83, "learning_rate": 3.459259940024083e-05, "loss": 2.1269, "step": 10649500 }, { "epoch": 30.83, "learning_rate": 3.459187719988885e-05, "loss": 2.1018, "step": 10650000 }, { "epoch": 30.83, "learning_rate": 3.4591153552241575e-05, "loss": 2.1007, "step": 10650500 }, { "epoch": 30.83, "learning_rate": 3.45904299045943e-05, "loss": 2.1133, "step": 10651000 }, { "epoch": 30.83, "learning_rate": 3.458970625694702e-05, "loss": 2.1134, "step": 10651500 }, { "epoch": 30.83, "learning_rate": 3.4588984056595035e-05, "loss": 2.1079, "step": 10652000 }, { "epoch": 30.83, "learning_rate": 3.458826185624306e-05, "loss": 2.0908, "step": 10652500 }, { "epoch": 30.84, "learning_rate": 3.458753820859578e-05, "loss": 2.0961, "step": 10653000 }, { "epoch": 30.84, "learning_rate": 3.45868145609485e-05, "loss": 2.1317, "step": 10653500 }, { "epoch": 30.84, "learning_rate": 3.4586090913301224e-05, "loss": 2.0974, "step": 10654000 }, { "epoch": 30.84, "learning_rate": 3.4585367265653946e-05, "loss": 2.1093, "step": 10654500 }, { "epoch": 30.84, "learning_rate": 3.4584643618006675e-05, "loss": 2.0993, "step": 10655000 }, { "epoch": 30.84, "learning_rate": 3.45839199703594e-05, "loss": 2.1435, "step": 10655500 }, { "epoch": 30.84, "learning_rate": 3.458319777000741e-05, "loss": 2.101, "step": 10656000 }, { "epoch": 30.85, "learning_rate": 3.458247556965543e-05, "loss": 2.0932, "step": 10656500 }, { "epoch": 30.85, "learning_rate": 3.458175192200816e-05, "loss": 2.1, "step": 10657000 }, { "epoch": 30.85, "learning_rate": 3.458102827436088e-05, "loss": 2.0907, "step": 10657500 }, { "epoch": 30.85, "learning_rate": 3.45803046267136e-05, "loss": 2.115, "step": 10658000 }, { "epoch": 30.85, "learning_rate": 3.4579580979066324e-05, "loss": 2.1094, "step": 10658500 }, { "epoch": 30.85, "learning_rate": 3.4578857331419046e-05, "loss": 2.1002, "step": 10659000 }, { "epoch": 30.85, "learning_rate": 3.457813368377177e-05, "loss": 2.0962, "step": 10659500 }, { "epoch": 30.86, "learning_rate": 3.457741003612449e-05, "loss": 2.095, "step": 10660000 }, { "epoch": 30.86, "learning_rate": 3.457668638847721e-05, "loss": 2.1143, "step": 10660500 }, { "epoch": 30.86, "learning_rate": 3.4575962740829935e-05, "loss": 2.1077, "step": 10661000 }, { "epoch": 30.86, "learning_rate": 3.457523909318266e-05, "loss": 2.1018, "step": 10661500 }, { "epoch": 30.86, "learning_rate": 3.457451689283068e-05, "loss": 2.087, "step": 10662000 }, { "epoch": 30.86, "learning_rate": 3.4573794692478695e-05, "loss": 2.1108, "step": 10662500 }, { "epoch": 30.87, "learning_rate": 3.4573071044831424e-05, "loss": 2.1193, "step": 10663000 }, { "epoch": 30.87, "learning_rate": 3.457234739718415e-05, "loss": 2.1091, "step": 10663500 }, { "epoch": 30.87, "learning_rate": 3.457162374953687e-05, "loss": 2.1125, "step": 10664000 }, { "epoch": 30.87, "learning_rate": 3.457090010188959e-05, "loss": 2.0936, "step": 10664500 }, { "epoch": 30.87, "learning_rate": 3.4570176454242313e-05, "loss": 2.1075, "step": 10665000 }, { "epoch": 30.87, "learning_rate": 3.4569452806595036e-05, "loss": 2.1165, "step": 10665500 }, { "epoch": 30.87, "learning_rate": 3.456872915894776e-05, "loss": 2.1046, "step": 10666000 }, { "epoch": 30.88, "learning_rate": 3.456800551130048e-05, "loss": 2.1345, "step": 10666500 }, { "epoch": 30.88, "learning_rate": 3.456728186365321e-05, "loss": 2.132, "step": 10667000 }, { "epoch": 30.88, "learning_rate": 3.456655821600593e-05, "loss": 2.1084, "step": 10667500 }, { "epoch": 30.88, "learning_rate": 3.456583601565395e-05, "loss": 2.0939, "step": 10668000 }, { "epoch": 30.88, "learning_rate": 3.456511236800667e-05, "loss": 2.099, "step": 10668500 }, { "epoch": 30.88, "learning_rate": 3.456438872035939e-05, "loss": 2.103, "step": 10669000 }, { "epoch": 30.88, "learning_rate": 3.4563665072712114e-05, "loss": 2.1018, "step": 10669500 }, { "epoch": 30.89, "learning_rate": 3.456294142506484e-05, "loss": 2.1097, "step": 10670000 }, { "epoch": 30.89, "learning_rate": 3.456221922471286e-05, "loss": 2.1088, "step": 10670500 }, { "epoch": 30.89, "learning_rate": 3.456149557706558e-05, "loss": 2.1186, "step": 10671000 }, { "epoch": 30.89, "learning_rate": 3.456077192941831e-05, "loss": 2.1183, "step": 10671500 }, { "epoch": 30.89, "learning_rate": 3.456004828177103e-05, "loss": 2.0985, "step": 10672000 }, { "epoch": 30.89, "learning_rate": 3.4559324634123754e-05, "loss": 2.0993, "step": 10672500 }, { "epoch": 30.89, "learning_rate": 3.4558600986476476e-05, "loss": 2.1145, "step": 10673000 }, { "epoch": 30.9, "learning_rate": 3.45578773388292e-05, "loss": 2.1013, "step": 10673500 }, { "epoch": 30.9, "learning_rate": 3.455715369118192e-05, "loss": 2.1034, "step": 10674000 }, { "epoch": 30.9, "learning_rate": 3.4556431490829936e-05, "loss": 2.0987, "step": 10674500 }, { "epoch": 30.9, "learning_rate": 3.455570784318266e-05, "loss": 2.1138, "step": 10675000 }, { "epoch": 30.9, "learning_rate": 3.455498419553538e-05, "loss": 2.0997, "step": 10675500 }, { "epoch": 30.9, "learning_rate": 3.455426054788811e-05, "loss": 2.1045, "step": 10676000 }, { "epoch": 30.9, "learning_rate": 3.455353690024083e-05, "loss": 2.1221, "step": 10676500 }, { "epoch": 30.91, "learning_rate": 3.4552813252593554e-05, "loss": 2.1028, "step": 10677000 }, { "epoch": 30.91, "learning_rate": 3.455208960494628e-05, "loss": 2.0933, "step": 10677500 }, { "epoch": 30.91, "learning_rate": 3.4551365957299005e-05, "loss": 2.0702, "step": 10678000 }, { "epoch": 30.91, "learning_rate": 3.455064230965173e-05, "loss": 2.123, "step": 10678500 }, { "epoch": 30.91, "learning_rate": 3.454991866200445e-05, "loss": 2.1145, "step": 10679000 }, { "epoch": 30.91, "learning_rate": 3.454919501435717e-05, "loss": 2.1137, "step": 10679500 }, { "epoch": 30.91, "learning_rate": 3.4548471366709894e-05, "loss": 2.1112, "step": 10680000 }, { "epoch": 30.92, "learning_rate": 3.454775061365321e-05, "loss": 2.0965, "step": 10680500 }, { "epoch": 30.92, "learning_rate": 3.454702696600593e-05, "loss": 2.1289, "step": 10681000 }, { "epoch": 30.92, "learning_rate": 3.4546303318358654e-05, "loss": 2.0981, "step": 10681500 }, { "epoch": 30.92, "learning_rate": 3.4545579670711377e-05, "loss": 2.1094, "step": 10682000 }, { "epoch": 30.92, "learning_rate": 3.45448560230641e-05, "loss": 2.1064, "step": 10682500 }, { "epoch": 30.92, "learning_rate": 3.4544133822712114e-05, "loss": 2.0846, "step": 10683000 }, { "epoch": 30.92, "learning_rate": 3.454341017506484e-05, "loss": 2.145, "step": 10683500 }, { "epoch": 30.93, "learning_rate": 3.454268652741756e-05, "loss": 2.1017, "step": 10684000 }, { "epoch": 30.93, "learning_rate": 3.454196287977028e-05, "loss": 2.1485, "step": 10684500 }, { "epoch": 30.93, "learning_rate": 3.454123923212301e-05, "loss": 2.0905, "step": 10685000 }, { "epoch": 30.93, "learning_rate": 3.454051703177103e-05, "loss": 2.1336, "step": 10685500 }, { "epoch": 30.93, "learning_rate": 3.4539793384123755e-05, "loss": 2.0857, "step": 10686000 }, { "epoch": 30.93, "learning_rate": 3.453907118377177e-05, "loss": 2.1163, "step": 10686500 }, { "epoch": 30.93, "learning_rate": 3.453834753612449e-05, "loss": 2.1486, "step": 10687000 }, { "epoch": 30.94, "learning_rate": 3.453762533577251e-05, "loss": 2.108, "step": 10687500 }, { "epoch": 30.94, "learning_rate": 3.453690168812524e-05, "loss": 2.1128, "step": 10688000 }, { "epoch": 30.94, "learning_rate": 3.453617804047796e-05, "loss": 2.0781, "step": 10688500 }, { "epoch": 30.94, "learning_rate": 3.453545439283068e-05, "loss": 2.0972, "step": 10689000 }, { "epoch": 30.94, "learning_rate": 3.4534730745183404e-05, "loss": 2.1034, "step": 10689500 }, { "epoch": 30.94, "learning_rate": 3.4534007097536126e-05, "loss": 2.1139, "step": 10690000 }, { "epoch": 30.94, "learning_rate": 3.453328344988885e-05, "loss": 2.1235, "step": 10690500 }, { "epoch": 30.95, "learning_rate": 3.453255980224157e-05, "loss": 2.113, "step": 10691000 }, { "epoch": 30.95, "learning_rate": 3.453183615459429e-05, "loss": 2.109, "step": 10691500 }, { "epoch": 30.95, "learning_rate": 3.4531112506947015e-05, "loss": 2.1155, "step": 10692000 }, { "epoch": 30.95, "learning_rate": 3.4530388859299744e-05, "loss": 2.1178, "step": 10692500 }, { "epoch": 30.95, "learning_rate": 3.4529665211652466e-05, "loss": 2.0883, "step": 10693000 }, { "epoch": 30.95, "learning_rate": 3.452894301130049e-05, "loss": 2.0839, "step": 10693500 }, { "epoch": 30.95, "learning_rate": 3.452821936365321e-05, "loss": 2.109, "step": 10694000 }, { "epoch": 30.96, "learning_rate": 3.452749571600593e-05, "loss": 2.0958, "step": 10694500 }, { "epoch": 30.96, "learning_rate": 3.452677351565395e-05, "loss": 2.0982, "step": 10695000 }, { "epoch": 30.96, "learning_rate": 3.452604986800667e-05, "loss": 2.1025, "step": 10695500 }, { "epoch": 30.96, "learning_rate": 3.4525327667654686e-05, "loss": 2.0891, "step": 10696000 }, { "epoch": 30.96, "learning_rate": 3.452460402000741e-05, "loss": 2.0991, "step": 10696500 }, { "epoch": 30.96, "learning_rate": 3.452388181965543e-05, "loss": 2.1349, "step": 10697000 }, { "epoch": 30.96, "learning_rate": 3.452315817200815e-05, "loss": 2.1267, "step": 10697500 }, { "epoch": 30.97, "learning_rate": 3.4522434524360875e-05, "loss": 2.1275, "step": 10698000 }, { "epoch": 30.97, "learning_rate": 3.45217108767136e-05, "loss": 2.1063, "step": 10698500 }, { "epoch": 30.97, "learning_rate": 3.452098722906632e-05, "loss": 2.1058, "step": 10699000 }, { "epoch": 30.97, "learning_rate": 3.452026358141904e-05, "loss": 2.1145, "step": 10699500 }, { "epoch": 30.97, "learning_rate": 3.4519539933771764e-05, "loss": 2.1079, "step": 10700000 }, { "epoch": 30.97, "learning_rate": 3.451881628612449e-05, "loss": 2.1016, "step": 10700500 }, { "epoch": 30.98, "learning_rate": 3.4518092638477215e-05, "loss": 2.0703, "step": 10701000 }, { "epoch": 30.98, "learning_rate": 3.451736899082994e-05, "loss": 2.1236, "step": 10701500 }, { "epoch": 30.98, "learning_rate": 3.451664534318266e-05, "loss": 2.1111, "step": 10702000 }, { "epoch": 30.98, "learning_rate": 3.451592314283068e-05, "loss": 2.1126, "step": 10702500 }, { "epoch": 30.98, "learning_rate": 3.45152009424787e-05, "loss": 2.1096, "step": 10703000 }, { "epoch": 30.98, "learning_rate": 3.451447729483142e-05, "loss": 2.1062, "step": 10703500 }, { "epoch": 30.98, "learning_rate": 3.451375364718414e-05, "loss": 2.0982, "step": 10704000 }, { "epoch": 30.99, "learning_rate": 3.4513029999536865e-05, "loss": 2.0822, "step": 10704500 }, { "epoch": 30.99, "learning_rate": 3.451230635188959e-05, "loss": 2.1035, "step": 10705000 }, { "epoch": 30.99, "learning_rate": 3.451158415153761e-05, "loss": 2.1218, "step": 10705500 }, { "epoch": 30.99, "learning_rate": 3.451086050389033e-05, "loss": 2.1137, "step": 10706000 }, { "epoch": 30.99, "learning_rate": 3.4510136856243054e-05, "loss": 2.1128, "step": 10706500 }, { "epoch": 30.99, "learning_rate": 3.4509413208595776e-05, "loss": 2.1017, "step": 10707000 }, { "epoch": 30.99, "learning_rate": 3.45086895609485e-05, "loss": 2.0891, "step": 10707500 }, { "epoch": 31.0, "learning_rate": 3.450796591330123e-05, "loss": 2.1031, "step": 10708000 }, { "epoch": 31.0, "learning_rate": 3.450724226565395e-05, "loss": 2.0928, "step": 10708500 }, { "epoch": 31.0, "learning_rate": 3.4506520065301965e-05, "loss": 2.1086, "step": 10709000 }, { "epoch": 31.0, "learning_rate": 3.450579641765469e-05, "loss": 2.0887, "step": 10709500 }, { "epoch": 31.0, "eval_accuracy": 0.66679647998167, "eval_accuracy_mlm": 0.6318043767726271, "eval_accuracy_nsp": 0.8543835045120605, "eval_loss": 2.1734020709991455, "eval_runtime": 331.1869, "eval_samples_per_second": 1317.643, "eval_steps_per_second": 54.903, "step": 10709632 }, { "epoch": 31.0, "learning_rate": 3.4505072770007416e-05, "loss": 2.0638, "step": 10710000 }, { "epoch": 31.0, "learning_rate": 3.450434912236014e-05, "loss": 2.075, "step": 10710500 }, { "epoch": 31.0, "learning_rate": 3.450362547471286e-05, "loss": 2.0787, "step": 10711000 }, { "epoch": 31.01, "learning_rate": 3.450290182706558e-05, "loss": 2.0977, "step": 10711500 }, { "epoch": 31.01, "learning_rate": 3.4502178179418305e-05, "loss": 2.0766, "step": 10712000 }, { "epoch": 31.01, "learning_rate": 3.450145453177103e-05, "loss": 2.071, "step": 10712500 }, { "epoch": 31.01, "learning_rate": 3.450073088412375e-05, "loss": 2.0904, "step": 10713000 }, { "epoch": 31.01, "learning_rate": 3.450000723647647e-05, "loss": 2.0882, "step": 10713500 }, { "epoch": 31.01, "learning_rate": 3.449928648341979e-05, "loss": 2.0788, "step": 10714000 }, { "epoch": 31.01, "learning_rate": 3.449856283577251e-05, "loss": 2.1167, "step": 10714500 }, { "epoch": 31.02, "learning_rate": 3.449783918812523e-05, "loss": 2.0884, "step": 10715000 }, { "epoch": 31.02, "learning_rate": 3.449711554047796e-05, "loss": 2.0807, "step": 10715500 }, { "epoch": 31.02, "learning_rate": 3.449639189283068e-05, "loss": 2.0777, "step": 10716000 }, { "epoch": 31.02, "learning_rate": 3.4495668245183405e-05, "loss": 2.0795, "step": 10716500 }, { "epoch": 31.02, "learning_rate": 3.449494459753613e-05, "loss": 2.0772, "step": 10717000 }, { "epoch": 31.02, "learning_rate": 3.449422094988885e-05, "loss": 2.0969, "step": 10717500 }, { "epoch": 31.02, "learning_rate": 3.449349730224157e-05, "loss": 2.1089, "step": 10718000 }, { "epoch": 31.03, "learning_rate": 3.4492773654594294e-05, "loss": 2.0837, "step": 10718500 }, { "epoch": 31.03, "learning_rate": 3.4492050006947016e-05, "loss": 2.0876, "step": 10719000 }, { "epoch": 31.03, "learning_rate": 3.449132635929974e-05, "loss": 2.0689, "step": 10719500 }, { "epoch": 31.03, "learning_rate": 3.449060415894776e-05, "loss": 2.0878, "step": 10720000 }, { "epoch": 31.03, "learning_rate": 3.448988051130048e-05, "loss": 2.0833, "step": 10720500 }, { "epoch": 31.03, "learning_rate": 3.4489156863653206e-05, "loss": 2.12, "step": 10721000 }, { "epoch": 31.03, "learning_rate": 3.448843321600593e-05, "loss": 2.0591, "step": 10721500 }, { "epoch": 31.04, "learning_rate": 3.448770956835865e-05, "loss": 2.0979, "step": 10722000 }, { "epoch": 31.04, "learning_rate": 3.448698736800667e-05, "loss": 2.0614, "step": 10722500 }, { "epoch": 31.04, "learning_rate": 3.4486263720359395e-05, "loss": 2.0765, "step": 10723000 }, { "epoch": 31.04, "learning_rate": 3.448554152000742e-05, "loss": 2.0922, "step": 10723500 }, { "epoch": 31.04, "learning_rate": 3.448481787236014e-05, "loss": 2.0879, "step": 10724000 }, { "epoch": 31.04, "learning_rate": 3.448409422471286e-05, "loss": 2.0897, "step": 10724500 }, { "epoch": 31.04, "learning_rate": 3.4483370577065584e-05, "loss": 2.1274, "step": 10725000 }, { "epoch": 31.05, "learning_rate": 3.4482646929418306e-05, "loss": 2.068, "step": 10725500 }, { "epoch": 31.05, "learning_rate": 3.448192472906632e-05, "loss": 2.0966, "step": 10726000 }, { "epoch": 31.05, "learning_rate": 3.448120252871434e-05, "loss": 2.1057, "step": 10726500 }, { "epoch": 31.05, "learning_rate": 3.4480478881067066e-05, "loss": 2.0975, "step": 10727000 }, { "epoch": 31.05, "learning_rate": 3.447975523341979e-05, "loss": 2.1057, "step": 10727500 }, { "epoch": 31.05, "learning_rate": 3.447903158577251e-05, "loss": 2.0921, "step": 10728000 }, { "epoch": 31.05, "learning_rate": 3.447830793812523e-05, "loss": 2.071, "step": 10728500 }, { "epoch": 31.06, "learning_rate": 3.4477584290477955e-05, "loss": 2.086, "step": 10729000 }, { "epoch": 31.06, "learning_rate": 3.447686064283068e-05, "loss": 2.0777, "step": 10729500 }, { "epoch": 31.06, "learning_rate": 3.4476136995183406e-05, "loss": 2.1007, "step": 10730000 }, { "epoch": 31.06, "learning_rate": 3.447541334753613e-05, "loss": 2.0912, "step": 10730500 }, { "epoch": 31.06, "learning_rate": 3.447468969988885e-05, "loss": 2.0892, "step": 10731000 }, { "epoch": 31.06, "learning_rate": 3.447396605224157e-05, "loss": 2.0664, "step": 10731500 }, { "epoch": 31.06, "learning_rate": 3.447324385188959e-05, "loss": 2.1078, "step": 10732000 }, { "epoch": 31.07, "learning_rate": 3.447252020424232e-05, "loss": 2.0718, "step": 10732500 }, { "epoch": 31.07, "learning_rate": 3.447179655659504e-05, "loss": 2.1038, "step": 10733000 }, { "epoch": 31.07, "learning_rate": 3.447107290894776e-05, "loss": 2.0838, "step": 10733500 }, { "epoch": 31.07, "learning_rate": 3.447035070859578e-05, "loss": 2.1088, "step": 10734000 }, { "epoch": 31.07, "learning_rate": 3.44696270609485e-05, "loss": 2.0947, "step": 10734500 }, { "epoch": 31.07, "learning_rate": 3.446890341330122e-05, "loss": 2.0899, "step": 10735000 }, { "epoch": 31.07, "learning_rate": 3.4468179765653944e-05, "loss": 2.0798, "step": 10735500 }, { "epoch": 31.08, "learning_rate": 3.4467456118006666e-05, "loss": 2.1049, "step": 10736000 }, { "epoch": 31.08, "learning_rate": 3.446673391765469e-05, "loss": 2.073, "step": 10736500 }, { "epoch": 31.08, "learning_rate": 3.446601027000741e-05, "loss": 2.0686, "step": 10737000 }, { "epoch": 31.08, "learning_rate": 3.446528662236013e-05, "loss": 2.0981, "step": 10737500 }, { "epoch": 31.08, "learning_rate": 3.446456297471286e-05, "loss": 2.0835, "step": 10738000 }, { "epoch": 31.08, "learning_rate": 3.4463839327065584e-05, "loss": 2.0774, "step": 10738500 }, { "epoch": 31.09, "learning_rate": 3.4463115679418307e-05, "loss": 2.0779, "step": 10739000 }, { "epoch": 31.09, "learning_rate": 3.446239203177103e-05, "loss": 2.0736, "step": 10739500 }, { "epoch": 31.09, "learning_rate": 3.446166838412375e-05, "loss": 2.0892, "step": 10740000 }, { "epoch": 31.09, "learning_rate": 3.446094618377177e-05, "loss": 2.0819, "step": 10740500 }, { "epoch": 31.09, "learning_rate": 3.4460222536124496e-05, "loss": 2.0993, "step": 10741000 }, { "epoch": 31.09, "learning_rate": 3.445950033577251e-05, "loss": 2.0968, "step": 10741500 }, { "epoch": 31.09, "learning_rate": 3.4458776688125233e-05, "loss": 2.0609, "step": 10742000 }, { "epoch": 31.1, "learning_rate": 3.4458053040477956e-05, "loss": 2.0779, "step": 10742500 }, { "epoch": 31.1, "learning_rate": 3.445732939283068e-05, "loss": 2.1065, "step": 10743000 }, { "epoch": 31.1, "learning_rate": 3.44566057451834e-05, "loss": 2.1202, "step": 10743500 }, { "epoch": 31.1, "learning_rate": 3.445588209753612e-05, "loss": 2.0803, "step": 10744000 }, { "epoch": 31.1, "learning_rate": 3.4455158449888845e-05, "loss": 2.092, "step": 10744500 }, { "epoch": 31.1, "learning_rate": 3.4454434802241574e-05, "loss": 2.1197, "step": 10745000 }, { "epoch": 31.1, "learning_rate": 3.4453711154594296e-05, "loss": 2.1096, "step": 10745500 }, { "epoch": 31.11, "learning_rate": 3.445298750694702e-05, "loss": 2.0993, "step": 10746000 }, { "epoch": 31.11, "learning_rate": 3.445226385929975e-05, "loss": 2.0928, "step": 10746500 }, { "epoch": 31.11, "learning_rate": 3.445154021165247e-05, "loss": 2.1068, "step": 10747000 }, { "epoch": 31.11, "learning_rate": 3.445081656400519e-05, "loss": 2.1097, "step": 10747500 }, { "epoch": 31.11, "learning_rate": 3.4450092916357914e-05, "loss": 2.098, "step": 10748000 }, { "epoch": 31.11, "learning_rate": 3.4449369268710636e-05, "loss": 2.0802, "step": 10748500 }, { "epoch": 31.11, "learning_rate": 3.444864706835865e-05, "loss": 2.0878, "step": 10749000 }, { "epoch": 31.12, "learning_rate": 3.444792486800667e-05, "loss": 2.0926, "step": 10749500 }, { "epoch": 31.12, "learning_rate": 3.4447201220359396e-05, "loss": 2.0935, "step": 10750000 }, { "epoch": 31.12, "learning_rate": 3.444647757271212e-05, "loss": 2.095, "step": 10750500 }, { "epoch": 31.12, "learning_rate": 3.444575392506484e-05, "loss": 2.1155, "step": 10751000 }, { "epoch": 31.12, "learning_rate": 3.444503027741756e-05, "loss": 2.1019, "step": 10751500 }, { "epoch": 31.12, "learning_rate": 3.444430807706558e-05, "loss": 2.0898, "step": 10752000 }, { "epoch": 31.12, "learning_rate": 3.444358442941831e-05, "loss": 2.0855, "step": 10752500 }, { "epoch": 31.13, "learning_rate": 3.444286078177103e-05, "loss": 2.1153, "step": 10753000 }, { "epoch": 31.13, "learning_rate": 3.444213713412375e-05, "loss": 2.1018, "step": 10753500 }, { "epoch": 31.13, "learning_rate": 3.444141493377177e-05, "loss": 2.1045, "step": 10754000 }, { "epoch": 31.13, "learning_rate": 3.4440691286124496e-05, "loss": 2.0989, "step": 10754500 }, { "epoch": 31.13, "learning_rate": 3.443996763847722e-05, "loss": 2.0887, "step": 10755000 }, { "epoch": 31.13, "learning_rate": 3.443924688542053e-05, "loss": 2.0815, "step": 10755500 }, { "epoch": 31.13, "learning_rate": 3.443852323777325e-05, "loss": 2.0994, "step": 10756000 }, { "epoch": 31.14, "learning_rate": 3.443779959012597e-05, "loss": 2.0949, "step": 10756500 }, { "epoch": 31.14, "learning_rate": 3.4437075942478694e-05, "loss": 2.1008, "step": 10757000 }, { "epoch": 31.14, "learning_rate": 3.443635229483142e-05, "loss": 2.0858, "step": 10757500 }, { "epoch": 31.14, "learning_rate": 3.4435628647184145e-05, "loss": 2.0785, "step": 10758000 }, { "epoch": 31.14, "learning_rate": 3.443490499953687e-05, "loss": 2.0872, "step": 10758500 }, { "epoch": 31.14, "learning_rate": 3.443418135188959e-05, "loss": 2.102, "step": 10759000 }, { "epoch": 31.14, "learning_rate": 3.443345770424231e-05, "loss": 2.0882, "step": 10759500 }, { "epoch": 31.15, "learning_rate": 3.443273405659504e-05, "loss": 2.0813, "step": 10760000 }, { "epoch": 31.15, "learning_rate": 3.4432010408947763e-05, "loss": 2.1039, "step": 10760500 }, { "epoch": 31.15, "learning_rate": 3.4431286761300486e-05, "loss": 2.0822, "step": 10761000 }, { "epoch": 31.15, "learning_rate": 3.443056311365321e-05, "loss": 2.1055, "step": 10761500 }, { "epoch": 31.15, "learning_rate": 3.4429840913301223e-05, "loss": 2.0929, "step": 10762000 }, { "epoch": 31.15, "learning_rate": 3.4429117265653946e-05, "loss": 2.0828, "step": 10762500 }, { "epoch": 31.15, "learning_rate": 3.442839361800667e-05, "loss": 2.0714, "step": 10763000 }, { "epoch": 31.16, "learning_rate": 3.44276699703594e-05, "loss": 2.1033, "step": 10763500 }, { "epoch": 31.16, "learning_rate": 3.442694632271212e-05, "loss": 2.0889, "step": 10764000 }, { "epoch": 31.16, "learning_rate": 3.442622267506484e-05, "loss": 2.096, "step": 10764500 }, { "epoch": 31.16, "learning_rate": 3.4425499027417564e-05, "loss": 2.0881, "step": 10765000 }, { "epoch": 31.16, "learning_rate": 3.4424775379770286e-05, "loss": 2.0817, "step": 10765500 }, { "epoch": 31.16, "learning_rate": 3.442405173212301e-05, "loss": 2.0926, "step": 10766000 }, { "epoch": 31.16, "learning_rate": 3.4423329531771024e-05, "loss": 2.0763, "step": 10766500 }, { "epoch": 31.17, "learning_rate": 3.4422605884123746e-05, "loss": 2.0771, "step": 10767000 }, { "epoch": 31.17, "learning_rate": 3.4421882236476475e-05, "loss": 2.1136, "step": 10767500 }, { "epoch": 31.17, "learning_rate": 3.44211585888292e-05, "loss": 2.0859, "step": 10768000 }, { "epoch": 31.17, "learning_rate": 3.442043494118192e-05, "loss": 2.1017, "step": 10768500 }, { "epoch": 31.17, "learning_rate": 3.441971274082994e-05, "loss": 2.0705, "step": 10769000 }, { "epoch": 31.17, "learning_rate": 3.4418989093182664e-05, "loss": 2.083, "step": 10769500 }, { "epoch": 31.17, "learning_rate": 3.441826689283068e-05, "loss": 2.0818, "step": 10770000 }, { "epoch": 31.18, "learning_rate": 3.44175432451834e-05, "loss": 2.088, "step": 10770500 }, { "epoch": 31.18, "learning_rate": 3.4416819597536124e-05, "loss": 2.0957, "step": 10771000 }, { "epoch": 31.18, "learning_rate": 3.4416095949888846e-05, "loss": 2.0825, "step": 10771500 }, { "epoch": 31.18, "learning_rate": 3.4415372302241575e-05, "loss": 2.095, "step": 10772000 }, { "epoch": 31.18, "learning_rate": 3.44146486545943e-05, "loss": 2.0682, "step": 10772500 }, { "epoch": 31.18, "learning_rate": 3.441392645424231e-05, "loss": 2.086, "step": 10773000 }, { "epoch": 31.18, "learning_rate": 3.4413202806595035e-05, "loss": 2.1061, "step": 10773500 }, { "epoch": 31.19, "learning_rate": 3.441247915894776e-05, "loss": 2.1071, "step": 10774000 }, { "epoch": 31.19, "learning_rate": 3.441175695859577e-05, "loss": 2.1062, "step": 10774500 }, { "epoch": 31.19, "learning_rate": 3.4411033310948495e-05, "loss": 2.1015, "step": 10775000 }, { "epoch": 31.19, "learning_rate": 3.4410309663301224e-05, "loss": 2.1257, "step": 10775500 }, { "epoch": 31.19, "learning_rate": 3.4409586015653946e-05, "loss": 2.1254, "step": 10776000 }, { "epoch": 31.19, "learning_rate": 3.4408862368006675e-05, "loss": 2.0726, "step": 10776500 }, { "epoch": 31.2, "learning_rate": 3.44081387203594e-05, "loss": 2.0901, "step": 10777000 }, { "epoch": 31.2, "learning_rate": 3.440741652000741e-05, "loss": 2.1135, "step": 10777500 }, { "epoch": 31.2, "learning_rate": 3.4406692872360136e-05, "loss": 2.0998, "step": 10778000 }, { "epoch": 31.2, "learning_rate": 3.440596922471286e-05, "loss": 2.0861, "step": 10778500 }, { "epoch": 31.2, "learning_rate": 3.440524557706558e-05, "loss": 2.0851, "step": 10779000 }, { "epoch": 31.2, "learning_rate": 3.44045219294183e-05, "loss": 2.1217, "step": 10779500 }, { "epoch": 31.2, "learning_rate": 3.4403798281771024e-05, "loss": 2.0857, "step": 10780000 }, { "epoch": 31.21, "learning_rate": 3.440307463412375e-05, "loss": 2.098, "step": 10780500 }, { "epoch": 31.21, "learning_rate": 3.4402350986476476e-05, "loss": 2.0865, "step": 10781000 }, { "epoch": 31.21, "learning_rate": 3.44016273388292e-05, "loss": 2.1236, "step": 10781500 }, { "epoch": 31.21, "learning_rate": 3.440090369118192e-05, "loss": 2.0979, "step": 10782000 }, { "epoch": 31.21, "learning_rate": 3.440018004353465e-05, "loss": 2.0919, "step": 10782500 }, { "epoch": 31.21, "learning_rate": 3.439945639588737e-05, "loss": 2.0959, "step": 10783000 }, { "epoch": 31.21, "learning_rate": 3.439873419553539e-05, "loss": 2.1093, "step": 10783500 }, { "epoch": 31.22, "learning_rate": 3.439801054788811e-05, "loss": 2.1078, "step": 10784000 }, { "epoch": 31.22, "learning_rate": 3.439728690024083e-05, "loss": 2.0734, "step": 10784500 }, { "epoch": 31.22, "learning_rate": 3.4396563252593554e-05, "loss": 2.1036, "step": 10785000 }, { "epoch": 31.22, "learning_rate": 3.4395839604946276e-05, "loss": 2.1082, "step": 10785500 }, { "epoch": 31.22, "learning_rate": 3.4395115957299e-05, "loss": 2.1076, "step": 10786000 }, { "epoch": 31.22, "learning_rate": 3.439439230965173e-05, "loss": 2.0776, "step": 10786500 }, { "epoch": 31.22, "learning_rate": 3.439366866200445e-05, "loss": 2.1019, "step": 10787000 }, { "epoch": 31.23, "learning_rate": 3.439294501435717e-05, "loss": 2.0676, "step": 10787500 }, { "epoch": 31.23, "learning_rate": 3.439222281400519e-05, "loss": 2.103, "step": 10788000 }, { "epoch": 31.23, "learning_rate": 3.439149916635791e-05, "loss": 2.0785, "step": 10788500 }, { "epoch": 31.23, "learning_rate": 3.439077551871063e-05, "loss": 2.0874, "step": 10789000 }, { "epoch": 31.23, "learning_rate": 3.4390051871063354e-05, "loss": 2.0858, "step": 10789500 }, { "epoch": 31.23, "learning_rate": 3.438932822341608e-05, "loss": 2.0758, "step": 10790000 }, { "epoch": 31.23, "learning_rate": 3.43886060230641e-05, "loss": 2.0788, "step": 10790500 }, { "epoch": 31.24, "learning_rate": 3.438788237541683e-05, "loss": 2.0967, "step": 10791000 }, { "epoch": 31.24, "learning_rate": 3.438715872776955e-05, "loss": 2.1101, "step": 10791500 }, { "epoch": 31.24, "learning_rate": 3.438643508012227e-05, "loss": 2.1064, "step": 10792000 }, { "epoch": 31.24, "learning_rate": 3.438571432706558e-05, "loss": 2.0906, "step": 10792500 }, { "epoch": 31.24, "learning_rate": 3.43849906794183e-05, "loss": 2.1126, "step": 10793000 }, { "epoch": 31.24, "learning_rate": 3.4384267031771025e-05, "loss": 2.1057, "step": 10793500 }, { "epoch": 31.24, "learning_rate": 3.438354338412375e-05, "loss": 2.1015, "step": 10794000 }, { "epoch": 31.25, "learning_rate": 3.4382819736476476e-05, "loss": 2.0912, "step": 10794500 }, { "epoch": 31.25, "learning_rate": 3.43820960888292e-05, "loss": 2.0883, "step": 10795000 }, { "epoch": 31.25, "learning_rate": 3.438137244118192e-05, "loss": 2.0857, "step": 10795500 }, { "epoch": 31.25, "learning_rate": 3.438064879353464e-05, "loss": 2.0828, "step": 10796000 }, { "epoch": 31.25, "learning_rate": 3.4379925145887365e-05, "loss": 2.0939, "step": 10796500 }, { "epoch": 31.25, "learning_rate": 3.437920149824009e-05, "loss": 2.1193, "step": 10797000 }, { "epoch": 31.25, "learning_rate": 3.437847785059282e-05, "loss": 2.0771, "step": 10797500 }, { "epoch": 31.26, "learning_rate": 3.437775420294554e-05, "loss": 2.1036, "step": 10798000 }, { "epoch": 31.26, "learning_rate": 3.437703055529826e-05, "loss": 2.119, "step": 10798500 }, { "epoch": 31.26, "learning_rate": 3.437630835494628e-05, "loss": 2.0783, "step": 10799000 }, { "epoch": 31.26, "learning_rate": 3.43755861545943e-05, "loss": 2.1315, "step": 10799500 }, { "epoch": 31.26, "learning_rate": 3.4374863954242315e-05, "loss": 2.0895, "step": 10800000 }, { "epoch": 31.26, "learning_rate": 3.437414030659504e-05, "loss": 2.095, "step": 10800500 }, { "epoch": 31.26, "learning_rate": 3.437341665894776e-05, "loss": 2.0913, "step": 10801000 }, { "epoch": 31.27, "learning_rate": 3.437269301130048e-05, "loss": 2.0927, "step": 10801500 }, { "epoch": 31.27, "learning_rate": 3.4371969363653204e-05, "loss": 2.0789, "step": 10802000 }, { "epoch": 31.27, "learning_rate": 3.4371245716005926e-05, "loss": 2.1138, "step": 10802500 }, { "epoch": 31.27, "learning_rate": 3.4370522068358655e-05, "loss": 2.0896, "step": 10803000 }, { "epoch": 31.27, "learning_rate": 3.436979842071138e-05, "loss": 2.1014, "step": 10803500 }, { "epoch": 31.27, "learning_rate": 3.436907622035939e-05, "loss": 2.1004, "step": 10804000 }, { "epoch": 31.27, "learning_rate": 3.4368352572712115e-05, "loss": 2.0893, "step": 10804500 }, { "epoch": 31.28, "learning_rate": 3.4367628925064844e-05, "loss": 2.1175, "step": 10805000 }, { "epoch": 31.28, "learning_rate": 3.436690672471286e-05, "loss": 2.1006, "step": 10805500 }, { "epoch": 31.28, "learning_rate": 3.436618307706558e-05, "loss": 2.0857, "step": 10806000 }, { "epoch": 31.28, "learning_rate": 3.4365459429418304e-05, "loss": 2.1031, "step": 10806500 }, { "epoch": 31.28, "learning_rate": 3.4364735781771026e-05, "loss": 2.0897, "step": 10807000 }, { "epoch": 31.28, "learning_rate": 3.4364012134123755e-05, "loss": 2.0926, "step": 10807500 }, { "epoch": 31.28, "learning_rate": 3.436328848647648e-05, "loss": 2.0875, "step": 10808000 }, { "epoch": 31.29, "learning_rate": 3.43625648388292e-05, "loss": 2.0746, "step": 10808500 }, { "epoch": 31.29, "learning_rate": 3.4361842638477215e-05, "loss": 2.0896, "step": 10809000 }, { "epoch": 31.29, "learning_rate": 3.436111899082994e-05, "loss": 2.0824, "step": 10809500 }, { "epoch": 31.29, "learning_rate": 3.436039534318266e-05, "loss": 2.1134, "step": 10810000 }, { "epoch": 31.29, "learning_rate": 3.435967169553538e-05, "loss": 2.1017, "step": 10810500 }, { "epoch": 31.29, "learning_rate": 3.4358948047888104e-05, "loss": 2.0751, "step": 10811000 }, { "epoch": 31.29, "learning_rate": 3.4358224400240826e-05, "loss": 2.0961, "step": 10811500 }, { "epoch": 31.3, "learning_rate": 3.4357500752593555e-05, "loss": 2.1102, "step": 10812000 }, { "epoch": 31.3, "learning_rate": 3.435677710494628e-05, "loss": 2.0795, "step": 10812500 }, { "epoch": 31.3, "learning_rate": 3.4356053457299007e-05, "loss": 2.0851, "step": 10813000 }, { "epoch": 31.3, "learning_rate": 3.435532980965173e-05, "loss": 2.0845, "step": 10813500 }, { "epoch": 31.3, "learning_rate": 3.435460616200445e-05, "loss": 2.0845, "step": 10814000 }, { "epoch": 31.3, "learning_rate": 3.435388251435717e-05, "loss": 2.0991, "step": 10814500 }, { "epoch": 31.3, "learning_rate": 3.435316031400519e-05, "loss": 2.1168, "step": 10815000 }, { "epoch": 31.31, "learning_rate": 3.435243666635791e-05, "loss": 2.098, "step": 10815500 }, { "epoch": 31.31, "learning_rate": 3.4351714466005927e-05, "loss": 2.1273, "step": 10816000 }, { "epoch": 31.31, "learning_rate": 3.435099226565395e-05, "loss": 2.1069, "step": 10816500 }, { "epoch": 31.31, "learning_rate": 3.435026861800667e-05, "loss": 2.09, "step": 10817000 }, { "epoch": 31.31, "learning_rate": 3.434954497035939e-05, "loss": 2.1015, "step": 10817500 }, { "epoch": 31.31, "learning_rate": 3.4348821322712116e-05, "loss": 2.0699, "step": 10818000 }, { "epoch": 31.32, "learning_rate": 3.434809767506484e-05, "loss": 2.0949, "step": 10818500 }, { "epoch": 31.32, "learning_rate": 3.434737547471285e-05, "loss": 2.1007, "step": 10819000 }, { "epoch": 31.32, "learning_rate": 3.434665182706558e-05, "loss": 2.0969, "step": 10819500 }, { "epoch": 31.32, "learning_rate": 3.4345928179418305e-05, "loss": 2.0821, "step": 10820000 }, { "epoch": 31.32, "learning_rate": 3.4345204531771034e-05, "loss": 2.108, "step": 10820500 }, { "epoch": 31.32, "learning_rate": 3.4344480884123756e-05, "loss": 2.1132, "step": 10821000 }, { "epoch": 31.32, "learning_rate": 3.434375723647648e-05, "loss": 2.1031, "step": 10821500 }, { "epoch": 31.33, "learning_rate": 3.43430335888292e-05, "loss": 2.0709, "step": 10822000 }, { "epoch": 31.33, "learning_rate": 3.434230994118192e-05, "loss": 2.0799, "step": 10822500 }, { "epoch": 31.33, "learning_rate": 3.4341586293534645e-05, "loss": 2.1011, "step": 10823000 }, { "epoch": 31.33, "learning_rate": 3.434086409318266e-05, "loss": 2.1128, "step": 10823500 }, { "epoch": 31.33, "learning_rate": 3.434014044553538e-05, "loss": 2.0982, "step": 10824000 }, { "epoch": 31.33, "learning_rate": 3.4339416797888105e-05, "loss": 2.1034, "step": 10824500 }, { "epoch": 31.33, "learning_rate": 3.4338693150240834e-05, "loss": 2.0936, "step": 10825000 }, { "epoch": 31.34, "learning_rate": 3.4337969502593556e-05, "loss": 2.1077, "step": 10825500 }, { "epoch": 31.34, "learning_rate": 3.433724730224157e-05, "loss": 2.1107, "step": 10826000 }, { "epoch": 31.34, "learning_rate": 3.4336523654594294e-05, "loss": 2.1106, "step": 10826500 }, { "epoch": 31.34, "learning_rate": 3.4335800006947016e-05, "loss": 2.1106, "step": 10827000 }, { "epoch": 31.34, "learning_rate": 3.4335076359299745e-05, "loss": 2.0784, "step": 10827500 }, { "epoch": 31.34, "learning_rate": 3.433435415894776e-05, "loss": 2.1091, "step": 10828000 }, { "epoch": 31.34, "learning_rate": 3.433363051130048e-05, "loss": 2.1297, "step": 10828500 }, { "epoch": 31.35, "learning_rate": 3.4332908310948505e-05, "loss": 2.1143, "step": 10829000 }, { "epoch": 31.35, "learning_rate": 3.433218466330123e-05, "loss": 2.0899, "step": 10829500 }, { "epoch": 31.35, "learning_rate": 3.433146101565395e-05, "loss": 2.1223, "step": 10830000 }, { "epoch": 31.35, "learning_rate": 3.433073736800667e-05, "loss": 2.0839, "step": 10830500 }, { "epoch": 31.35, "learning_rate": 3.4330013720359394e-05, "loss": 2.0801, "step": 10831000 }, { "epoch": 31.35, "learning_rate": 3.432929152000741e-05, "loss": 2.0864, "step": 10831500 }, { "epoch": 31.35, "learning_rate": 3.432856787236013e-05, "loss": 2.0825, "step": 10832000 }, { "epoch": 31.36, "learning_rate": 3.4327845672008154e-05, "loss": 2.1311, "step": 10832500 }, { "epoch": 31.36, "learning_rate": 3.4327122024360876e-05, "loss": 2.0945, "step": 10833000 }, { "epoch": 31.36, "learning_rate": 3.432639982400889e-05, "loss": 2.0941, "step": 10833500 }, { "epoch": 31.36, "learning_rate": 3.4325676176361614e-05, "loss": 2.1079, "step": 10834000 }, { "epoch": 31.36, "learning_rate": 3.4324952528714337e-05, "loss": 2.055, "step": 10834500 }, { "epoch": 31.36, "learning_rate": 3.432422888106706e-05, "loss": 2.1063, "step": 10835000 }, { "epoch": 31.36, "learning_rate": 3.432350523341979e-05, "loss": 2.0908, "step": 10835500 }, { "epoch": 31.37, "learning_rate": 3.432278158577251e-05, "loss": 2.1023, "step": 10836000 }, { "epoch": 31.37, "learning_rate": 3.432205793812523e-05, "loss": 2.0847, "step": 10836500 }, { "epoch": 31.37, "learning_rate": 3.4321334290477954e-05, "loss": 2.0946, "step": 10837000 }, { "epoch": 31.37, "learning_rate": 3.4320610642830683e-05, "loss": 2.0747, "step": 10837500 }, { "epoch": 31.37, "learning_rate": 3.4319886995183406e-05, "loss": 2.1078, "step": 10838000 }, { "epoch": 31.37, "learning_rate": 3.431916334753613e-05, "loss": 2.0835, "step": 10838500 }, { "epoch": 31.37, "learning_rate": 3.431843969988885e-05, "loss": 2.1133, "step": 10839000 }, { "epoch": 31.38, "learning_rate": 3.431771605224157e-05, "loss": 2.1159, "step": 10839500 }, { "epoch": 31.38, "learning_rate": 3.4316992404594295e-05, "loss": 2.0959, "step": 10840000 }, { "epoch": 31.38, "learning_rate": 3.431627020424231e-05, "loss": 2.1028, "step": 10840500 }, { "epoch": 31.38, "learning_rate": 3.431554655659503e-05, "loss": 2.0914, "step": 10841000 }, { "epoch": 31.38, "learning_rate": 3.4314822908947755e-05, "loss": 2.0882, "step": 10841500 }, { "epoch": 31.38, "learning_rate": 3.4314099261300484e-05, "loss": 2.1068, "step": 10842000 }, { "epoch": 31.38, "learning_rate": 3.4313375613653206e-05, "loss": 2.1187, "step": 10842500 }, { "epoch": 31.39, "learning_rate": 3.4312651966005935e-05, "loss": 2.0559, "step": 10843000 }, { "epoch": 31.39, "learning_rate": 3.431192831835866e-05, "loss": 2.0938, "step": 10843500 }, { "epoch": 31.39, "learning_rate": 3.431120611800667e-05, "loss": 2.1121, "step": 10844000 }, { "epoch": 31.39, "learning_rate": 3.4310482470359395e-05, "loss": 2.1055, "step": 10844500 }, { "epoch": 31.39, "learning_rate": 3.430975882271212e-05, "loss": 2.0949, "step": 10845000 }, { "epoch": 31.39, "learning_rate": 3.430903517506484e-05, "loss": 2.1139, "step": 10845500 }, { "epoch": 31.39, "learning_rate": 3.430831152741756e-05, "loss": 2.1157, "step": 10846000 }, { "epoch": 31.4, "learning_rate": 3.4307587879770284e-05, "loss": 2.1047, "step": 10846500 }, { "epoch": 31.4, "learning_rate": 3.4306865679418306e-05, "loss": 2.0747, "step": 10847000 }, { "epoch": 31.4, "learning_rate": 3.430614347906632e-05, "loss": 2.1091, "step": 10847500 }, { "epoch": 31.4, "learning_rate": 3.4305419831419044e-05, "loss": 2.0779, "step": 10848000 }, { "epoch": 31.4, "learning_rate": 3.4304696183771766e-05, "loss": 2.0986, "step": 10848500 }, { "epoch": 31.4, "learning_rate": 3.430397253612449e-05, "loss": 2.0735, "step": 10849000 }, { "epoch": 31.4, "learning_rate": 3.430325033577251e-05, "loss": 2.1085, "step": 10849500 }, { "epoch": 31.41, "learning_rate": 3.430252668812523e-05, "loss": 2.0823, "step": 10850000 }, { "epoch": 31.41, "learning_rate": 3.430180304047796e-05, "loss": 2.0913, "step": 10850500 }, { "epoch": 31.41, "learning_rate": 3.4301079392830684e-05, "loss": 2.0983, "step": 10851000 }, { "epoch": 31.41, "learning_rate": 3.4300355745183406e-05, "loss": 2.0837, "step": 10851500 }, { "epoch": 31.41, "learning_rate": 3.429963209753613e-05, "loss": 2.0772, "step": 10852000 }, { "epoch": 31.41, "learning_rate": 3.429890844988885e-05, "loss": 2.1056, "step": 10852500 }, { "epoch": 31.41, "learning_rate": 3.429818480224157e-05, "loss": 2.0794, "step": 10853000 }, { "epoch": 31.42, "learning_rate": 3.429746260188959e-05, "loss": 2.0961, "step": 10853500 }, { "epoch": 31.42, "learning_rate": 3.429673895424231e-05, "loss": 2.0784, "step": 10854000 }, { "epoch": 31.42, "learning_rate": 3.429601530659503e-05, "loss": 2.1127, "step": 10854500 }, { "epoch": 31.42, "learning_rate": 3.4295293106243056e-05, "loss": 2.0786, "step": 10855000 }, { "epoch": 31.42, "learning_rate": 3.429456945859578e-05, "loss": 2.0987, "step": 10855500 }, { "epoch": 31.42, "learning_rate": 3.42938458109485e-05, "loss": 2.0833, "step": 10856000 }, { "epoch": 31.43, "learning_rate": 3.429312216330122e-05, "loss": 2.1303, "step": 10856500 }, { "epoch": 31.43, "learning_rate": 3.4292398515653944e-05, "loss": 2.1016, "step": 10857000 }, { "epoch": 31.43, "learning_rate": 3.4291674868006673e-05, "loss": 2.0897, "step": 10857500 }, { "epoch": 31.43, "learning_rate": 3.4290951220359396e-05, "loss": 2.0975, "step": 10858000 }, { "epoch": 31.43, "learning_rate": 3.429022902000741e-05, "loss": 2.0983, "step": 10858500 }, { "epoch": 31.43, "learning_rate": 3.4289505372360134e-05, "loss": 2.1078, "step": 10859000 }, { "epoch": 31.43, "learning_rate": 3.428878172471286e-05, "loss": 2.0993, "step": 10859500 }, { "epoch": 31.44, "learning_rate": 3.4288058077065585e-05, "loss": 2.1001, "step": 10860000 }, { "epoch": 31.44, "learning_rate": 3.428733442941831e-05, "loss": 2.0968, "step": 10860500 }, { "epoch": 31.44, "learning_rate": 3.428661078177103e-05, "loss": 2.1032, "step": 10861000 }, { "epoch": 31.44, "learning_rate": 3.428588713412375e-05, "loss": 2.0934, "step": 10861500 }, { "epoch": 31.44, "learning_rate": 3.4285163486476474e-05, "loss": 2.096, "step": 10862000 }, { "epoch": 31.44, "learning_rate": 3.4284439838829196e-05, "loss": 2.0808, "step": 10862500 }, { "epoch": 31.44, "learning_rate": 3.428371619118192e-05, "loss": 2.0996, "step": 10863000 }, { "epoch": 31.45, "learning_rate": 3.428299254353464e-05, "loss": 2.0976, "step": 10863500 }, { "epoch": 31.45, "learning_rate": 3.428226889588736e-05, "loss": 2.0983, "step": 10864000 }, { "epoch": 31.45, "learning_rate": 3.4281545248240085e-05, "loss": 2.1039, "step": 10864500 }, { "epoch": 31.45, "learning_rate": 3.4280821600592814e-05, "loss": 2.0984, "step": 10865000 }, { "epoch": 31.45, "learning_rate": 3.4280097952945536e-05, "loss": 2.0732, "step": 10865500 }, { "epoch": 31.45, "learning_rate": 3.427937575259356e-05, "loss": 2.0955, "step": 10866000 }, { "epoch": 31.45, "learning_rate": 3.427865210494628e-05, "loss": 2.0859, "step": 10866500 }, { "epoch": 31.46, "learning_rate": 3.4277928457299e-05, "loss": 2.1031, "step": 10867000 }, { "epoch": 31.46, "learning_rate": 3.4277204809651725e-05, "loss": 2.1112, "step": 10867500 }, { "epoch": 31.46, "learning_rate": 3.427648260929974e-05, "loss": 2.1046, "step": 10868000 }, { "epoch": 31.46, "learning_rate": 3.427575896165246e-05, "loss": 2.1204, "step": 10868500 }, { "epoch": 31.46, "learning_rate": 3.4275035314005185e-05, "loss": 2.0923, "step": 10869000 }, { "epoch": 31.46, "learning_rate": 3.427431311365321e-05, "loss": 2.1067, "step": 10869500 }, { "epoch": 31.46, "learning_rate": 3.427358946600593e-05, "loss": 2.0891, "step": 10870000 }, { "epoch": 31.47, "learning_rate": 3.427286581835865e-05, "loss": 2.1113, "step": 10870500 }, { "epoch": 31.47, "learning_rate": 3.4272142170711374e-05, "loss": 2.0872, "step": 10871000 }, { "epoch": 31.47, "learning_rate": 3.4271418523064096e-05, "loss": 2.0944, "step": 10871500 }, { "epoch": 31.47, "learning_rate": 3.427069632271211e-05, "loss": 2.0745, "step": 10872000 }, { "epoch": 31.47, "learning_rate": 3.426997267506484e-05, "loss": 2.0872, "step": 10872500 }, { "epoch": 31.47, "learning_rate": 3.426924902741756e-05, "loss": 2.0983, "step": 10873000 }, { "epoch": 31.47, "learning_rate": 3.4268525379770285e-05, "loss": 2.1221, "step": 10873500 }, { "epoch": 31.48, "learning_rate": 3.4267801732123014e-05, "loss": 2.1001, "step": 10874000 }, { "epoch": 31.48, "learning_rate": 3.426707808447574e-05, "loss": 2.0905, "step": 10874500 }, { "epoch": 31.48, "learning_rate": 3.426635588412375e-05, "loss": 2.087, "step": 10875000 }, { "epoch": 31.48, "learning_rate": 3.4265632236476474e-05, "loss": 2.1269, "step": 10875500 }, { "epoch": 31.48, "learning_rate": 3.42649085888292e-05, "loss": 2.1054, "step": 10876000 }, { "epoch": 31.48, "learning_rate": 3.426418494118192e-05, "loss": 2.0945, "step": 10876500 }, { "epoch": 31.48, "learning_rate": 3.426346274082994e-05, "loss": 2.0911, "step": 10877000 }, { "epoch": 31.49, "learning_rate": 3.4262739093182664e-05, "loss": 2.0731, "step": 10877500 }, { "epoch": 31.49, "learning_rate": 3.4262015445535386e-05, "loss": 2.096, "step": 10878000 }, { "epoch": 31.49, "learning_rate": 3.426129179788811e-05, "loss": 2.094, "step": 10878500 }, { "epoch": 31.49, "learning_rate": 3.426056815024083e-05, "loss": 2.0833, "step": 10879000 }, { "epoch": 31.49, "learning_rate": 3.425984450259355e-05, "loss": 2.1098, "step": 10879500 }, { "epoch": 31.49, "learning_rate": 3.425912085494628e-05, "loss": 2.1133, "step": 10880000 }, { "epoch": 31.49, "learning_rate": 3.4258397207299004e-05, "loss": 2.1041, "step": 10880500 }, { "epoch": 31.5, "learning_rate": 3.4257673559651726e-05, "loss": 2.1054, "step": 10881000 }, { "epoch": 31.5, "learning_rate": 3.425695135929974e-05, "loss": 2.0711, "step": 10881500 }, { "epoch": 31.5, "learning_rate": 3.4256227711652464e-05, "loss": 2.0978, "step": 10882000 }, { "epoch": 31.5, "learning_rate": 3.4255505511300486e-05, "loss": 2.0898, "step": 10882500 }, { "epoch": 31.5, "learning_rate": 3.425478186365321e-05, "loss": 2.1039, "step": 10883000 }, { "epoch": 31.5, "learning_rate": 3.425405821600593e-05, "loss": 2.068, "step": 10883500 }, { "epoch": 31.5, "learning_rate": 3.425333456835865e-05, "loss": 2.0865, "step": 10884000 }, { "epoch": 31.51, "learning_rate": 3.4252610920711375e-05, "loss": 2.0796, "step": 10884500 }, { "epoch": 31.51, "learning_rate": 3.42518872730641e-05, "loss": 2.084, "step": 10885000 }, { "epoch": 31.51, "learning_rate": 3.425116362541682e-05, "loss": 2.0894, "step": 10885500 }, { "epoch": 31.51, "learning_rate": 3.425043997776954e-05, "loss": 2.0892, "step": 10886000 }, { "epoch": 31.51, "learning_rate": 3.4249716330122264e-05, "loss": 2.1096, "step": 10886500 }, { "epoch": 31.51, "learning_rate": 3.424899268247499e-05, "loss": 2.0915, "step": 10887000 }, { "epoch": 31.51, "learning_rate": 3.4248270482123015e-05, "loss": 2.0995, "step": 10887500 }, { "epoch": 31.52, "learning_rate": 3.424754683447574e-05, "loss": 2.11, "step": 10888000 }, { "epoch": 31.52, "learning_rate": 3.424682318682846e-05, "loss": 2.0688, "step": 10888500 }, { "epoch": 31.52, "learning_rate": 3.424609953918118e-05, "loss": 2.093, "step": 10889000 }, { "epoch": 31.52, "learning_rate": 3.4245375891533904e-05, "loss": 2.1091, "step": 10889500 }, { "epoch": 31.52, "learning_rate": 3.4244652243886626e-05, "loss": 2.1233, "step": 10890000 }, { "epoch": 31.52, "learning_rate": 3.424392859623935e-05, "loss": 2.0905, "step": 10890500 }, { "epoch": 31.52, "learning_rate": 3.424320494859207e-05, "loss": 2.0978, "step": 10891000 }, { "epoch": 31.53, "learning_rate": 3.424248130094479e-05, "loss": 2.0914, "step": 10891500 }, { "epoch": 31.53, "learning_rate": 3.4241757653297515e-05, "loss": 2.0883, "step": 10892000 }, { "epoch": 31.53, "learning_rate": 3.4241034005650244e-05, "loss": 2.0893, "step": 10892500 }, { "epoch": 31.53, "learning_rate": 3.424031325259355e-05, "loss": 2.1158, "step": 10893000 }, { "epoch": 31.53, "learning_rate": 3.4239589604946275e-05, "loss": 2.09, "step": 10893500 }, { "epoch": 31.53, "learning_rate": 3.4238865957299e-05, "loss": 2.1035, "step": 10894000 }, { "epoch": 31.54, "learning_rate": 3.423814375694701e-05, "loss": 2.0978, "step": 10894500 }, { "epoch": 31.54, "learning_rate": 3.423742010929974e-05, "loss": 2.1041, "step": 10895000 }, { "epoch": 31.54, "learning_rate": 3.4236696461652465e-05, "loss": 2.1077, "step": 10895500 }, { "epoch": 31.54, "learning_rate": 3.4235972814005194e-05, "loss": 2.0975, "step": 10896000 }, { "epoch": 31.54, "learning_rate": 3.4235249166357916e-05, "loss": 2.0728, "step": 10896500 }, { "epoch": 31.54, "learning_rate": 3.423452551871064e-05, "loss": 2.0961, "step": 10897000 }, { "epoch": 31.54, "learning_rate": 3.423380187106336e-05, "loss": 2.0974, "step": 10897500 }, { "epoch": 31.55, "learning_rate": 3.423307822341608e-05, "loss": 2.0854, "step": 10898000 }, { "epoch": 31.55, "learning_rate": 3.4232354575768805e-05, "loss": 2.0985, "step": 10898500 }, { "epoch": 31.55, "learning_rate": 3.423163092812153e-05, "loss": 2.0783, "step": 10899000 }, { "epoch": 31.55, "learning_rate": 3.423090728047425e-05, "loss": 2.1012, "step": 10899500 }, { "epoch": 31.55, "learning_rate": 3.423018363282697e-05, "loss": 2.0991, "step": 10900000 }, { "epoch": 31.55, "learning_rate": 3.4229459985179694e-05, "loss": 2.0948, "step": 10900500 }, { "epoch": 31.55, "learning_rate": 3.4228736337532416e-05, "loss": 2.0835, "step": 10901000 }, { "epoch": 31.56, "learning_rate": 3.4228012689885145e-05, "loss": 2.1164, "step": 10901500 }, { "epoch": 31.56, "learning_rate": 3.422728904223787e-05, "loss": 2.0969, "step": 10902000 }, { "epoch": 31.56, "learning_rate": 3.4226565394590596e-05, "loss": 2.0923, "step": 10902500 }, { "epoch": 31.56, "learning_rate": 3.422584319423861e-05, "loss": 2.0715, "step": 10903000 }, { "epoch": 31.56, "learning_rate": 3.4225119546591334e-05, "loss": 2.1102, "step": 10903500 }, { "epoch": 31.56, "learning_rate": 3.4224395898944056e-05, "loss": 2.1148, "step": 10904000 }, { "epoch": 31.56, "learning_rate": 3.422367369859207e-05, "loss": 2.1002, "step": 10904500 }, { "epoch": 31.57, "learning_rate": 3.4222950050944794e-05, "loss": 2.1048, "step": 10905000 }, { "epoch": 31.57, "learning_rate": 3.4222226403297516e-05, "loss": 2.1031, "step": 10905500 }, { "epoch": 31.57, "learning_rate": 3.422150420294554e-05, "loss": 2.0951, "step": 10906000 }, { "epoch": 31.57, "learning_rate": 3.422078055529826e-05, "loss": 2.0905, "step": 10906500 }, { "epoch": 31.57, "learning_rate": 3.4220058354946276e-05, "loss": 2.1179, "step": 10907000 }, { "epoch": 31.57, "learning_rate": 3.4219334707299e-05, "loss": 2.1117, "step": 10907500 }, { "epoch": 31.57, "learning_rate": 3.421861105965172e-05, "loss": 2.0828, "step": 10908000 }, { "epoch": 31.58, "learning_rate": 3.421788741200444e-05, "loss": 2.1007, "step": 10908500 }, { "epoch": 31.58, "learning_rate": 3.4217163764357165e-05, "loss": 2.1162, "step": 10909000 }, { "epoch": 31.58, "learning_rate": 3.4216440116709894e-05, "loss": 2.1099, "step": 10909500 }, { "epoch": 31.58, "learning_rate": 3.4215716469062616e-05, "loss": 2.1086, "step": 10910000 }, { "epoch": 31.58, "learning_rate": 3.4214992821415345e-05, "loss": 2.0836, "step": 10910500 }, { "epoch": 31.58, "learning_rate": 3.421426917376807e-05, "loss": 2.0936, "step": 10911000 }, { "epoch": 31.58, "learning_rate": 3.421354552612079e-05, "loss": 2.0824, "step": 10911500 }, { "epoch": 31.59, "learning_rate": 3.421282187847351e-05, "loss": 2.0822, "step": 10912000 }, { "epoch": 31.59, "learning_rate": 3.4212098230826234e-05, "loss": 2.1023, "step": 10912500 }, { "epoch": 31.59, "learning_rate": 3.421137458317896e-05, "loss": 2.0839, "step": 10913000 }, { "epoch": 31.59, "learning_rate": 3.421065093553168e-05, "loss": 2.1395, "step": 10913500 }, { "epoch": 31.59, "learning_rate": 3.42099272878844e-05, "loss": 2.1137, "step": 10914000 }, { "epoch": 31.59, "learning_rate": 3.420920364023712e-05, "loss": 2.1097, "step": 10914500 }, { "epoch": 31.59, "learning_rate": 3.4208479992589846e-05, "loss": 2.1132, "step": 10915000 }, { "epoch": 31.6, "learning_rate": 3.420775634494257e-05, "loss": 2.1244, "step": 10915500 }, { "epoch": 31.6, "learning_rate": 3.42070326972953e-05, "loss": 2.0953, "step": 10916000 }, { "epoch": 31.6, "learning_rate": 3.420630904964802e-05, "loss": 2.0949, "step": 10916500 }, { "epoch": 31.6, "learning_rate": 3.420558540200075e-05, "loss": 2.1194, "step": 10917000 }, { "epoch": 31.6, "learning_rate": 3.4204863201648764e-05, "loss": 2.0949, "step": 10917500 }, { "epoch": 31.6, "learning_rate": 3.4204139554001486e-05, "loss": 2.1254, "step": 10918000 }, { "epoch": 31.6, "learning_rate": 3.42034173536495e-05, "loss": 2.0932, "step": 10918500 }, { "epoch": 31.61, "learning_rate": 3.4202693706002224e-05, "loss": 2.0915, "step": 10919000 }, { "epoch": 31.61, "learning_rate": 3.4201970058354946e-05, "loss": 2.0977, "step": 10919500 }, { "epoch": 31.61, "learning_rate": 3.420124785800297e-05, "loss": 2.1169, "step": 10920000 }, { "epoch": 31.61, "learning_rate": 3.4200525657650984e-05, "loss": 2.1015, "step": 10920500 }, { "epoch": 31.61, "learning_rate": 3.4199802010003706e-05, "loss": 2.1039, "step": 10921000 }, { "epoch": 31.61, "learning_rate": 3.419907836235643e-05, "loss": 2.0963, "step": 10921500 }, { "epoch": 31.61, "learning_rate": 3.419835471470915e-05, "loss": 2.0729, "step": 10922000 }, { "epoch": 31.62, "learning_rate": 3.419763106706187e-05, "loss": 2.1114, "step": 10922500 }, { "epoch": 31.62, "learning_rate": 3.4196907419414595e-05, "loss": 2.1, "step": 10923000 }, { "epoch": 31.62, "learning_rate": 3.4196183771767324e-05, "loss": 2.0952, "step": 10923500 }, { "epoch": 31.62, "learning_rate": 3.4195460124120046e-05, "loss": 2.1091, "step": 10924000 }, { "epoch": 31.62, "learning_rate": 3.419473792376807e-05, "loss": 2.1133, "step": 10924500 }, { "epoch": 31.62, "learning_rate": 3.4194015723416084e-05, "loss": 2.0939, "step": 10925000 }, { "epoch": 31.62, "learning_rate": 3.4193292075768806e-05, "loss": 2.098, "step": 10925500 }, { "epoch": 31.63, "learning_rate": 3.419256842812153e-05, "loss": 2.0894, "step": 10926000 }, { "epoch": 31.63, "learning_rate": 3.419184478047425e-05, "loss": 2.0987, "step": 10926500 }, { "epoch": 31.63, "learning_rate": 3.419112113282697e-05, "loss": 2.0984, "step": 10927000 }, { "epoch": 31.63, "learning_rate": 3.4190397485179695e-05, "loss": 2.0829, "step": 10927500 }, { "epoch": 31.63, "learning_rate": 3.4189673837532424e-05, "loss": 2.0904, "step": 10928000 }, { "epoch": 31.63, "learning_rate": 3.4188950189885146e-05, "loss": 2.1089, "step": 10928500 }, { "epoch": 31.63, "learning_rate": 3.418822654223787e-05, "loss": 2.0907, "step": 10929000 }, { "epoch": 31.64, "learning_rate": 3.418750289459059e-05, "loss": 2.0969, "step": 10929500 }, { "epoch": 31.64, "learning_rate": 3.418677924694331e-05, "loss": 2.0721, "step": 10930000 }, { "epoch": 31.64, "learning_rate": 3.4186055599296035e-05, "loss": 2.0954, "step": 10930500 }, { "epoch": 31.64, "learning_rate": 3.418533195164876e-05, "loss": 2.1008, "step": 10931000 }, { "epoch": 31.64, "learning_rate": 3.418460830400149e-05, "loss": 2.0946, "step": 10931500 }, { "epoch": 31.64, "learning_rate": 3.41838861036495e-05, "loss": 2.1146, "step": 10932000 }, { "epoch": 31.65, "learning_rate": 3.4183162456002224e-05, "loss": 2.0996, "step": 10932500 }, { "epoch": 31.65, "learning_rate": 3.418243880835495e-05, "loss": 2.1003, "step": 10933000 }, { "epoch": 31.65, "learning_rate": 3.4181715160707676e-05, "loss": 2.0839, "step": 10933500 }, { "epoch": 31.65, "learning_rate": 3.41809915130604e-05, "loss": 2.0881, "step": 10934000 }, { "epoch": 31.65, "learning_rate": 3.418026786541312e-05, "loss": 2.1211, "step": 10934500 }, { "epoch": 31.65, "learning_rate": 3.417954421776584e-05, "loss": 2.082, "step": 10935000 }, { "epoch": 31.65, "learning_rate": 3.417882201741386e-05, "loss": 2.1041, "step": 10935500 }, { "epoch": 31.66, "learning_rate": 3.4178099817061873e-05, "loss": 2.0947, "step": 10936000 }, { "epoch": 31.66, "learning_rate": 3.4177376169414596e-05, "loss": 2.0974, "step": 10936500 }, { "epoch": 31.66, "learning_rate": 3.4176652521767325e-05, "loss": 2.112, "step": 10937000 }, { "epoch": 31.66, "learning_rate": 3.417592887412005e-05, "loss": 2.1184, "step": 10937500 }, { "epoch": 31.66, "learning_rate": 3.417520522647277e-05, "loss": 2.1018, "step": 10938000 }, { "epoch": 31.66, "learning_rate": 3.4174483026120785e-05, "loss": 2.1195, "step": 10938500 }, { "epoch": 31.66, "learning_rate": 3.417375937847351e-05, "loss": 2.1054, "step": 10939000 }, { "epoch": 31.67, "learning_rate": 3.4173035730826236e-05, "loss": 2.0946, "step": 10939500 }, { "epoch": 31.67, "learning_rate": 3.417231353047425e-05, "loss": 2.1097, "step": 10940000 }, { "epoch": 31.67, "learning_rate": 3.4171589882826974e-05, "loss": 2.096, "step": 10940500 }, { "epoch": 31.67, "learning_rate": 3.4170866235179696e-05, "loss": 2.1072, "step": 10941000 }, { "epoch": 31.67, "learning_rate": 3.4170142587532425e-05, "loss": 2.1039, "step": 10941500 }, { "epoch": 31.67, "learning_rate": 3.416941893988515e-05, "loss": 2.089, "step": 10942000 }, { "epoch": 31.67, "learning_rate": 3.416869529223787e-05, "loss": 2.0889, "step": 10942500 }, { "epoch": 31.68, "learning_rate": 3.416797164459059e-05, "loss": 2.0931, "step": 10943000 }, { "epoch": 31.68, "learning_rate": 3.4167247996943314e-05, "loss": 2.1067, "step": 10943500 }, { "epoch": 31.68, "learning_rate": 3.4166524349296036e-05, "loss": 2.1005, "step": 10944000 }, { "epoch": 31.68, "learning_rate": 3.416580070164876e-05, "loss": 2.0979, "step": 10944500 }, { "epoch": 31.68, "learning_rate": 3.416507705400148e-05, "loss": 2.1032, "step": 10945000 }, { "epoch": 31.68, "learning_rate": 3.4164356300944796e-05, "loss": 2.0937, "step": 10945500 }, { "epoch": 31.68, "learning_rate": 3.416363265329752e-05, "loss": 2.1034, "step": 10946000 }, { "epoch": 31.69, "learning_rate": 3.416290900565024e-05, "loss": 2.134, "step": 10946500 }, { "epoch": 31.69, "learning_rate": 3.416218535800297e-05, "loss": 2.0973, "step": 10947000 }, { "epoch": 31.69, "learning_rate": 3.416146171035569e-05, "loss": 2.0949, "step": 10947500 }, { "epoch": 31.69, "learning_rate": 3.4160738062708414e-05, "loss": 2.0857, "step": 10948000 }, { "epoch": 31.69, "learning_rate": 3.4160014415061136e-05, "loss": 2.1004, "step": 10948500 }, { "epoch": 31.69, "learning_rate": 3.415929076741386e-05, "loss": 2.1149, "step": 10949000 }, { "epoch": 31.69, "learning_rate": 3.415856711976658e-05, "loss": 2.0953, "step": 10949500 }, { "epoch": 31.7, "learning_rate": 3.41578434721193e-05, "loss": 2.1168, "step": 10950000 }, { "epoch": 31.7, "learning_rate": 3.4157121271767326e-05, "loss": 2.1018, "step": 10950500 }, { "epoch": 31.7, "learning_rate": 3.415639762412005e-05, "loss": 2.1135, "step": 10951000 }, { "epoch": 31.7, "learning_rate": 3.415567397647277e-05, "loss": 2.1014, "step": 10951500 }, { "epoch": 31.7, "learning_rate": 3.415495032882549e-05, "loss": 2.0938, "step": 10952000 }, { "epoch": 31.7, "learning_rate": 3.4154226681178214e-05, "loss": 2.1251, "step": 10952500 }, { "epoch": 31.7, "learning_rate": 3.415350303353094e-05, "loss": 2.0813, "step": 10953000 }, { "epoch": 31.71, "learning_rate": 3.415277938588366e-05, "loss": 2.1092, "step": 10953500 }, { "epoch": 31.71, "learning_rate": 3.415205573823639e-05, "loss": 2.0988, "step": 10954000 }, { "epoch": 31.71, "learning_rate": 3.415133209058911e-05, "loss": 2.0968, "step": 10954500 }, { "epoch": 31.71, "learning_rate": 3.415060844294183e-05, "loss": 2.0729, "step": 10955000 }, { "epoch": 31.71, "learning_rate": 3.4149886242589855e-05, "loss": 2.1287, "step": 10955500 }, { "epoch": 31.71, "learning_rate": 3.414916404223787e-05, "loss": 2.0745, "step": 10956000 }, { "epoch": 31.71, "learning_rate": 3.4148441841885886e-05, "loss": 2.1094, "step": 10956500 }, { "epoch": 31.72, "learning_rate": 3.414771819423861e-05, "loss": 2.124, "step": 10957000 }, { "epoch": 31.72, "learning_rate": 3.414699454659133e-05, "loss": 2.1024, "step": 10957500 }, { "epoch": 31.72, "learning_rate": 3.414627089894405e-05, "loss": 2.0991, "step": 10958000 }, { "epoch": 31.72, "learning_rate": 3.4145547251296775e-05, "loss": 2.1159, "step": 10958500 }, { "epoch": 31.72, "learning_rate": 3.4144823603649504e-05, "loss": 2.1087, "step": 10959000 }, { "epoch": 31.72, "learning_rate": 3.4144099956002226e-05, "loss": 2.0965, "step": 10959500 }, { "epoch": 31.72, "learning_rate": 3.414337775565024e-05, "loss": 2.1053, "step": 10960000 }, { "epoch": 31.73, "learning_rate": 3.4142654108002964e-05, "loss": 2.0802, "step": 10960500 }, { "epoch": 31.73, "learning_rate": 3.4141930460355686e-05, "loss": 2.1218, "step": 10961000 }, { "epoch": 31.73, "learning_rate": 3.414120681270841e-05, "loss": 2.0974, "step": 10961500 }, { "epoch": 31.73, "learning_rate": 3.414048316506114e-05, "loss": 2.1067, "step": 10962000 }, { "epoch": 31.73, "learning_rate": 3.413975951741386e-05, "loss": 2.0764, "step": 10962500 }, { "epoch": 31.73, "learning_rate": 3.4139037317061875e-05, "loss": 2.1022, "step": 10963000 }, { "epoch": 31.73, "learning_rate": 3.4138313669414604e-05, "loss": 2.1155, "step": 10963500 }, { "epoch": 31.74, "learning_rate": 3.4137590021767326e-05, "loss": 2.104, "step": 10964000 }, { "epoch": 31.74, "learning_rate": 3.413686637412005e-05, "loss": 2.1135, "step": 10964500 }, { "epoch": 31.74, "learning_rate": 3.413614272647277e-05, "loss": 2.0734, "step": 10965000 }, { "epoch": 31.74, "learning_rate": 3.413541907882549e-05, "loss": 2.0836, "step": 10965500 }, { "epoch": 31.74, "learning_rate": 3.4134695431178215e-05, "loss": 2.085, "step": 10966000 }, { "epoch": 31.74, "learning_rate": 3.413397178353094e-05, "loss": 2.0995, "step": 10966500 }, { "epoch": 31.74, "learning_rate": 3.413324813588366e-05, "loss": 2.0803, "step": 10967000 }, { "epoch": 31.75, "learning_rate": 3.4132525935531675e-05, "loss": 2.1066, "step": 10967500 }, { "epoch": 31.75, "learning_rate": 3.4131802287884404e-05, "loss": 2.1115, "step": 10968000 }, { "epoch": 31.75, "learning_rate": 3.4131078640237127e-05, "loss": 2.0802, "step": 10968500 }, { "epoch": 31.75, "learning_rate": 3.4130354992589856e-05, "loss": 2.0897, "step": 10969000 }, { "epoch": 31.75, "learning_rate": 3.412963279223787e-05, "loss": 2.0987, "step": 10969500 }, { "epoch": 31.75, "learning_rate": 3.412890914459059e-05, "loss": 2.0865, "step": 10970000 }, { "epoch": 31.76, "learning_rate": 3.4128185496943316e-05, "loss": 2.073, "step": 10970500 }, { "epoch": 31.76, "learning_rate": 3.412746184929604e-05, "loss": 2.0925, "step": 10971000 }, { "epoch": 31.76, "learning_rate": 3.412673820164876e-05, "loss": 2.0883, "step": 10971500 }, { "epoch": 31.76, "learning_rate": 3.4126016001296776e-05, "loss": 2.1024, "step": 10972000 }, { "epoch": 31.76, "learning_rate": 3.4125292353649505e-05, "loss": 2.1092, "step": 10972500 }, { "epoch": 31.76, "learning_rate": 3.412456870600223e-05, "loss": 2.0778, "step": 10973000 }, { "epoch": 31.76, "learning_rate": 3.412384505835495e-05, "loss": 2.0979, "step": 10973500 }, { "epoch": 31.77, "learning_rate": 3.412312141070767e-05, "loss": 2.0916, "step": 10974000 }, { "epoch": 31.77, "learning_rate": 3.4122397763060394e-05, "loss": 2.1182, "step": 10974500 }, { "epoch": 31.77, "learning_rate": 3.4121674115413116e-05, "loss": 2.1062, "step": 10975000 }, { "epoch": 31.77, "learning_rate": 3.412095191506113e-05, "loss": 2.0791, "step": 10975500 }, { "epoch": 31.77, "learning_rate": 3.4120228267413854e-05, "loss": 2.1213, "step": 10976000 }, { "epoch": 31.77, "learning_rate": 3.4119504619766576e-05, "loss": 2.1095, "step": 10976500 }, { "epoch": 31.77, "learning_rate": 3.4118780972119305e-05, "loss": 2.0942, "step": 10977000 }, { "epoch": 31.78, "learning_rate": 3.411805732447203e-05, "loss": 2.0898, "step": 10977500 }, { "epoch": 31.78, "learning_rate": 3.4117333676824756e-05, "loss": 2.1023, "step": 10978000 }, { "epoch": 31.78, "learning_rate": 3.411661002917748e-05, "loss": 2.0885, "step": 10978500 }, { "epoch": 31.78, "learning_rate": 3.41158863815302e-05, "loss": 2.1062, "step": 10979000 }, { "epoch": 31.78, "learning_rate": 3.4115164181178216e-05, "loss": 2.0876, "step": 10979500 }, { "epoch": 31.78, "learning_rate": 3.411444053353094e-05, "loss": 2.1045, "step": 10980000 }, { "epoch": 31.78, "learning_rate": 3.411371688588366e-05, "loss": 2.1202, "step": 10980500 }, { "epoch": 31.79, "learning_rate": 3.411299323823638e-05, "loss": 2.0842, "step": 10981000 }, { "epoch": 31.79, "learning_rate": 3.4112269590589105e-05, "loss": 2.1279, "step": 10981500 }, { "epoch": 31.79, "learning_rate": 3.411154739023713e-05, "loss": 2.1003, "step": 10982000 }, { "epoch": 31.79, "learning_rate": 3.411082374258985e-05, "loss": 2.112, "step": 10982500 }, { "epoch": 31.79, "learning_rate": 3.411010009494257e-05, "loss": 2.0932, "step": 10983000 }, { "epoch": 31.79, "learning_rate": 3.410937789459059e-05, "loss": 2.0937, "step": 10983500 }, { "epoch": 31.79, "learning_rate": 3.410865424694331e-05, "loss": 2.0832, "step": 10984000 }, { "epoch": 31.8, "learning_rate": 3.410793059929604e-05, "loss": 2.0915, "step": 10984500 }, { "epoch": 31.8, "learning_rate": 3.410720695164876e-05, "loss": 2.1044, "step": 10985000 }, { "epoch": 31.8, "learning_rate": 3.410648330400148e-05, "loss": 2.1193, "step": 10985500 }, { "epoch": 31.8, "learning_rate": 3.4105761103649505e-05, "loss": 2.0999, "step": 10986000 }, { "epoch": 31.8, "learning_rate": 3.410503745600223e-05, "loss": 2.1316, "step": 10986500 }, { "epoch": 31.8, "learning_rate": 3.410431380835495e-05, "loss": 2.0909, "step": 10987000 }, { "epoch": 31.8, "learning_rate": 3.410359016070767e-05, "loss": 2.103, "step": 10987500 }, { "epoch": 31.81, "learning_rate": 3.4102866513060394e-05, "loss": 2.0997, "step": 10988000 }, { "epoch": 31.81, "learning_rate": 3.410214431270841e-05, "loss": 2.123, "step": 10988500 }, { "epoch": 31.81, "learning_rate": 3.410142066506113e-05, "loss": 2.1264, "step": 10989000 }, { "epoch": 31.81, "learning_rate": 3.4100697017413854e-05, "loss": 2.0962, "step": 10989500 }, { "epoch": 31.81, "learning_rate": 3.409997336976658e-05, "loss": 2.1111, "step": 10990000 }, { "epoch": 31.81, "learning_rate": 3.4099249722119306e-05, "loss": 2.1121, "step": 10990500 }, { "epoch": 31.81, "learning_rate": 3.409852752176732e-05, "loss": 2.0882, "step": 10991000 }, { "epoch": 31.82, "learning_rate": 3.409780387412004e-05, "loss": 2.1222, "step": 10991500 }, { "epoch": 31.82, "learning_rate": 3.409708022647277e-05, "loss": 2.1022, "step": 10992000 }, { "epoch": 31.82, "learning_rate": 3.4096356578825495e-05, "loss": 2.1035, "step": 10992500 }, { "epoch": 31.82, "learning_rate": 3.409563293117822e-05, "loss": 2.1012, "step": 10993000 }, { "epoch": 31.82, "learning_rate": 3.409490928353094e-05, "loss": 2.0714, "step": 10993500 }, { "epoch": 31.82, "learning_rate": 3.409418563588366e-05, "loss": 2.0741, "step": 10994000 }, { "epoch": 31.82, "learning_rate": 3.4093461988236384e-05, "loss": 2.0775, "step": 10994500 }, { "epoch": 31.83, "learning_rate": 3.4092739787884406e-05, "loss": 2.1194, "step": 10995000 }, { "epoch": 31.83, "learning_rate": 3.409201758753242e-05, "loss": 2.0808, "step": 10995500 }, { "epoch": 31.83, "learning_rate": 3.4091293939885144e-05, "loss": 2.1009, "step": 10996000 }, { "epoch": 31.83, "learning_rate": 3.4090570292237866e-05, "loss": 2.1154, "step": 10996500 }, { "epoch": 31.83, "learning_rate": 3.408984664459059e-05, "loss": 2.1056, "step": 10997000 }, { "epoch": 31.83, "learning_rate": 3.408912299694331e-05, "loss": 2.106, "step": 10997500 }, { "epoch": 31.83, "learning_rate": 3.408840079659133e-05, "loss": 2.0857, "step": 10998000 }, { "epoch": 31.84, "learning_rate": 3.408767859623935e-05, "loss": 2.1097, "step": 10998500 }, { "epoch": 31.84, "learning_rate": 3.408695494859207e-05, "loss": 2.0926, "step": 10999000 }, { "epoch": 31.84, "learning_rate": 3.40862313009448e-05, "loss": 2.0938, "step": 10999500 }, { "epoch": 31.84, "learning_rate": 3.408550765329752e-05, "loss": 2.0924, "step": 11000000 }, { "epoch": 31.84, "learning_rate": 3.408478545294554e-05, "loss": 2.0892, "step": 11000500 }, { "epoch": 31.84, "learning_rate": 3.408406180529826e-05, "loss": 2.1199, "step": 11001000 }, { "epoch": 31.84, "learning_rate": 3.408333815765098e-05, "loss": 2.0798, "step": 11001500 }, { "epoch": 31.85, "learning_rate": 3.408261451000371e-05, "loss": 2.1098, "step": 11002000 }, { "epoch": 31.85, "learning_rate": 3.408189086235643e-05, "loss": 2.0651, "step": 11002500 }, { "epoch": 31.85, "learning_rate": 3.4081167214709155e-05, "loss": 2.0822, "step": 11003000 }, { "epoch": 31.85, "learning_rate": 3.408044356706188e-05, "loss": 2.0953, "step": 11003500 }, { "epoch": 31.85, "learning_rate": 3.40797199194146e-05, "loss": 2.0927, "step": 11004000 }, { "epoch": 31.85, "learning_rate": 3.407899627176732e-05, "loss": 2.1134, "step": 11004500 }, { "epoch": 31.85, "learning_rate": 3.4078272624120044e-05, "loss": 2.0864, "step": 11005000 }, { "epoch": 31.86, "learning_rate": 3.4077548976472766e-05, "loss": 2.0868, "step": 11005500 }, { "epoch": 31.86, "learning_rate": 3.407682532882549e-05, "loss": 2.1155, "step": 11006000 }, { "epoch": 31.86, "learning_rate": 3.407610168117822e-05, "loss": 2.1002, "step": 11006500 }, { "epoch": 31.86, "learning_rate": 3.407537803353094e-05, "loss": 2.1142, "step": 11007000 }, { "epoch": 31.86, "learning_rate": 3.407465583317896e-05, "loss": 2.0898, "step": 11007500 }, { "epoch": 31.86, "learning_rate": 3.4073932185531684e-05, "loss": 2.1062, "step": 11008000 }, { "epoch": 31.87, "learning_rate": 3.407320853788441e-05, "loss": 2.0961, "step": 11008500 }, { "epoch": 31.87, "learning_rate": 3.407248489023713e-05, "loss": 2.1142, "step": 11009000 }, { "epoch": 31.87, "learning_rate": 3.407176124258985e-05, "loss": 2.0889, "step": 11009500 }, { "epoch": 31.87, "learning_rate": 3.407103759494257e-05, "loss": 2.0843, "step": 11010000 }, { "epoch": 31.87, "learning_rate": 3.4070313947295296e-05, "loss": 2.0904, "step": 11010500 }, { "epoch": 31.87, "learning_rate": 3.406959319423861e-05, "loss": 2.0881, "step": 11011000 }, { "epoch": 31.87, "learning_rate": 3.4068869546591333e-05, "loss": 2.0938, "step": 11011500 }, { "epoch": 31.88, "learning_rate": 3.4068145898944056e-05, "loss": 2.0809, "step": 11012000 }, { "epoch": 31.88, "learning_rate": 3.406742225129678e-05, "loss": 2.0756, "step": 11012500 }, { "epoch": 31.88, "learning_rate": 3.4066700050944794e-05, "loss": 2.1003, "step": 11013000 }, { "epoch": 31.88, "learning_rate": 3.4065976403297516e-05, "loss": 2.1023, "step": 11013500 }, { "epoch": 31.88, "learning_rate": 3.406525275565024e-05, "loss": 2.1022, "step": 11014000 }, { "epoch": 31.88, "learning_rate": 3.406452910800297e-05, "loss": 2.1127, "step": 11014500 }, { "epoch": 31.88, "learning_rate": 3.406380546035569e-05, "loss": 2.1, "step": 11015000 }, { "epoch": 31.89, "learning_rate": 3.406308326000371e-05, "loss": 2.0899, "step": 11015500 }, { "epoch": 31.89, "learning_rate": 3.4062359612356434e-05, "loss": 2.1049, "step": 11016000 }, { "epoch": 31.89, "learning_rate": 3.4061635964709156e-05, "loss": 2.123, "step": 11016500 }, { "epoch": 31.89, "learning_rate": 3.406091231706188e-05, "loss": 2.1166, "step": 11017000 }, { "epoch": 31.89, "learning_rate": 3.40601886694146e-05, "loss": 2.1091, "step": 11017500 }, { "epoch": 31.89, "learning_rate": 3.405946502176732e-05, "loss": 2.1262, "step": 11018000 }, { "epoch": 31.89, "learning_rate": 3.4058741374120045e-05, "loss": 2.1127, "step": 11018500 }, { "epoch": 31.9, "learning_rate": 3.405801772647277e-05, "loss": 2.1186, "step": 11019000 }, { "epoch": 31.9, "learning_rate": 3.405729552612078e-05, "loss": 2.1037, "step": 11019500 }, { "epoch": 31.9, "learning_rate": 3.405657187847351e-05, "loss": 2.0914, "step": 11020000 }, { "epoch": 31.9, "learning_rate": 3.4055848230826234e-05, "loss": 2.1114, "step": 11020500 }, { "epoch": 31.9, "learning_rate": 3.4055124583178956e-05, "loss": 2.1115, "step": 11021000 }, { "epoch": 31.9, "learning_rate": 3.405440093553168e-05, "loss": 2.1175, "step": 11021500 }, { "epoch": 31.9, "learning_rate": 3.405367728788441e-05, "loss": 2.0948, "step": 11022000 }, { "epoch": 31.91, "learning_rate": 3.405295508753242e-05, "loss": 2.0919, "step": 11022500 }, { "epoch": 31.91, "learning_rate": 3.405223288718044e-05, "loss": 2.1249, "step": 11023000 }, { "epoch": 31.91, "learning_rate": 3.405150923953316e-05, "loss": 2.0901, "step": 11023500 }, { "epoch": 31.91, "learning_rate": 3.405078559188589e-05, "loss": 2.083, "step": 11024000 }, { "epoch": 31.91, "learning_rate": 3.405006194423861e-05, "loss": 2.0982, "step": 11024500 }, { "epoch": 31.91, "learning_rate": 3.404933974388663e-05, "loss": 2.1035, "step": 11025000 }, { "epoch": 31.91, "learning_rate": 3.404861754353464e-05, "loss": 2.0904, "step": 11025500 }, { "epoch": 31.92, "learning_rate": 3.4047893895887365e-05, "loss": 2.0962, "step": 11026000 }, { "epoch": 31.92, "learning_rate": 3.404717024824009e-05, "loss": 2.1016, "step": 11026500 }, { "epoch": 31.92, "learning_rate": 3.404644660059281e-05, "loss": 2.1133, "step": 11027000 }, { "epoch": 31.92, "learning_rate": 3.404572295294554e-05, "loss": 2.1082, "step": 11027500 }, { "epoch": 31.92, "learning_rate": 3.404499930529826e-05, "loss": 2.1004, "step": 11028000 }, { "epoch": 31.92, "learning_rate": 3.404427565765098e-05, "loss": 2.0961, "step": 11028500 }, { "epoch": 31.92, "learning_rate": 3.4043552010003706e-05, "loss": 2.1086, "step": 11029000 }, { "epoch": 31.93, "learning_rate": 3.4042828362356435e-05, "loss": 2.1021, "step": 11029500 }, { "epoch": 31.93, "learning_rate": 3.404210471470916e-05, "loss": 2.1187, "step": 11030000 }, { "epoch": 31.93, "learning_rate": 3.404138106706188e-05, "loss": 2.0988, "step": 11030500 }, { "epoch": 31.93, "learning_rate": 3.40406574194146e-05, "loss": 2.1115, "step": 11031000 }, { "epoch": 31.93, "learning_rate": 3.4039933771767324e-05, "loss": 2.0857, "step": 11031500 }, { "epoch": 31.93, "learning_rate": 3.4039210124120046e-05, "loss": 2.1237, "step": 11032000 }, { "epoch": 31.93, "learning_rate": 3.403848792376806e-05, "loss": 2.1112, "step": 11032500 }, { "epoch": 31.94, "learning_rate": 3.403776427612079e-05, "loss": 2.1057, "step": 11033000 }, { "epoch": 31.94, "learning_rate": 3.403704062847351e-05, "loss": 2.0866, "step": 11033500 }, { "epoch": 31.94, "learning_rate": 3.4036316980826235e-05, "loss": 2.1, "step": 11034000 }, { "epoch": 31.94, "learning_rate": 3.403559333317896e-05, "loss": 2.0885, "step": 11034500 }, { "epoch": 31.94, "learning_rate": 3.403486968553168e-05, "loss": 2.0845, "step": 11035000 }, { "epoch": 31.94, "learning_rate": 3.40341460378844e-05, "loss": 2.0994, "step": 11035500 }, { "epoch": 31.94, "learning_rate": 3.4033422390237124e-05, "loss": 2.0806, "step": 11036000 }, { "epoch": 31.95, "learning_rate": 3.403270018988514e-05, "loss": 2.1238, "step": 11036500 }, { "epoch": 31.95, "learning_rate": 3.403197654223787e-05, "loss": 2.1197, "step": 11037000 }, { "epoch": 31.95, "learning_rate": 3.403125289459059e-05, "loss": 2.1138, "step": 11037500 }, { "epoch": 31.95, "learning_rate": 3.403052924694331e-05, "loss": 2.1471, "step": 11038000 }, { "epoch": 31.95, "learning_rate": 3.4029807046591335e-05, "loss": 2.0917, "step": 11038500 }, { "epoch": 31.95, "learning_rate": 3.402908339894406e-05, "loss": 2.0972, "step": 11039000 }, { "epoch": 31.95, "learning_rate": 3.402836119859207e-05, "loss": 2.095, "step": 11039500 }, { "epoch": 31.96, "learning_rate": 3.4027637550944795e-05, "loss": 2.1102, "step": 11040000 }, { "epoch": 31.96, "learning_rate": 3.402691390329752e-05, "loss": 2.1143, "step": 11040500 }, { "epoch": 31.96, "learning_rate": 3.402619025565024e-05, "loss": 2.0817, "step": 11041000 }, { "epoch": 31.96, "learning_rate": 3.402546660800296e-05, "loss": 2.0853, "step": 11041500 }, { "epoch": 31.96, "learning_rate": 3.402474296035569e-05, "loss": 2.1062, "step": 11042000 }, { "epoch": 31.96, "learning_rate": 3.402401931270841e-05, "loss": 2.092, "step": 11042500 }, { "epoch": 31.96, "learning_rate": 3.4023295665061135e-05, "loss": 2.1142, "step": 11043000 }, { "epoch": 31.97, "learning_rate": 3.402257201741386e-05, "loss": 2.0706, "step": 11043500 }, { "epoch": 31.97, "learning_rate": 3.402184836976658e-05, "loss": 2.1074, "step": 11044000 }, { "epoch": 31.97, "learning_rate": 3.402112472211931e-05, "loss": 2.0848, "step": 11044500 }, { "epoch": 31.97, "learning_rate": 3.402040107447203e-05, "loss": 2.0974, "step": 11045000 }, { "epoch": 31.97, "learning_rate": 3.401967742682475e-05, "loss": 2.0988, "step": 11045500 }, { "epoch": 31.97, "learning_rate": 3.4018953779177475e-05, "loss": 2.0857, "step": 11046000 }, { "epoch": 31.98, "learning_rate": 3.40182301315302e-05, "loss": 2.1264, "step": 11046500 }, { "epoch": 31.98, "learning_rate": 3.401750937847351e-05, "loss": 2.0891, "step": 11047000 }, { "epoch": 31.98, "learning_rate": 3.401678717812153e-05, "loss": 2.1024, "step": 11047500 }, { "epoch": 31.98, "learning_rate": 3.401606353047425e-05, "loss": 2.0933, "step": 11048000 }, { "epoch": 31.98, "learning_rate": 3.401533988282697e-05, "loss": 2.1329, "step": 11048500 }, { "epoch": 31.98, "learning_rate": 3.4014616235179696e-05, "loss": 2.0897, "step": 11049000 }, { "epoch": 31.98, "learning_rate": 3.401389258753242e-05, "loss": 2.1008, "step": 11049500 }, { "epoch": 31.99, "learning_rate": 3.401317038718044e-05, "loss": 2.1168, "step": 11050000 }, { "epoch": 31.99, "learning_rate": 3.401244673953316e-05, "loss": 2.0799, "step": 11050500 }, { "epoch": 31.99, "learning_rate": 3.4011723091885885e-05, "loss": 2.1242, "step": 11051000 }, { "epoch": 31.99, "learning_rate": 3.401099944423861e-05, "loss": 2.1073, "step": 11051500 }, { "epoch": 31.99, "learning_rate": 3.4010275796591336e-05, "loss": 2.108, "step": 11052000 }, { "epoch": 31.99, "learning_rate": 3.400955214894406e-05, "loss": 2.0969, "step": 11052500 }, { "epoch": 31.99, "learning_rate": 3.400882850129678e-05, "loss": 2.1011, "step": 11053000 }, { "epoch": 32.0, "learning_rate": 3.40081048536495e-05, "loss": 2.1123, "step": 11053500 }, { "epoch": 32.0, "learning_rate": 3.4007381206002225e-05, "loss": 2.1004, "step": 11054000 }, { "epoch": 32.0, "learning_rate": 3.400665755835495e-05, "loss": 2.1224, "step": 11054500 }, { "epoch": 32.0, "learning_rate": 3.400593391070767e-05, "loss": 2.0872, "step": 11055000 }, { "epoch": 32.0, "eval_accuracy": 0.6675262658987756, "eval_accuracy_mlm": 0.6324042771749461, "eval_accuracy_nsp": 0.8559600903787015, "eval_loss": 2.1762564182281494, "eval_runtime": 331.7013, "eval_samples_per_second": 1315.599, "eval_steps_per_second": 54.817, "step": 11055104 }, { "epoch": 32.0, "learning_rate": 3.400521171035569e-05, "loss": 2.0846, "step": 11055500 }, { "epoch": 32.0, "learning_rate": 3.4004488062708414e-05, "loss": 2.06, "step": 11056000 }, { "epoch": 32.0, "learning_rate": 3.400376586235643e-05, "loss": 2.0646, "step": 11056500 }, { "epoch": 32.01, "learning_rate": 3.400304221470915e-05, "loss": 2.0927, "step": 11057000 }, { "epoch": 32.01, "learning_rate": 3.4002318567061874e-05, "loss": 2.0445, "step": 11057500 }, { "epoch": 32.01, "learning_rate": 3.4001594919414596e-05, "loss": 2.0489, "step": 11058000 }, { "epoch": 32.01, "learning_rate": 3.400087271906262e-05, "loss": 2.0669, "step": 11058500 }, { "epoch": 32.01, "learning_rate": 3.400014907141534e-05, "loss": 2.0876, "step": 11059000 }, { "epoch": 32.01, "learning_rate": 3.399942542376807e-05, "loss": 2.099, "step": 11059500 }, { "epoch": 32.01, "learning_rate": 3.399870177612079e-05, "loss": 2.0681, "step": 11060000 }, { "epoch": 32.02, "learning_rate": 3.3997978128473514e-05, "loss": 2.076, "step": 11060500 }, { "epoch": 32.02, "learning_rate": 3.3997254480826236e-05, "loss": 2.0616, "step": 11061000 }, { "epoch": 32.02, "learning_rate": 3.399653083317896e-05, "loss": 2.0765, "step": 11061500 }, { "epoch": 32.02, "learning_rate": 3.399580718553168e-05, "loss": 2.0781, "step": 11062000 }, { "epoch": 32.02, "learning_rate": 3.39950835378844e-05, "loss": 2.0885, "step": 11062500 }, { "epoch": 32.02, "learning_rate": 3.3994359890237125e-05, "loss": 2.0804, "step": 11063000 }, { "epoch": 32.02, "learning_rate": 3.399363624258985e-05, "loss": 2.0707, "step": 11063500 }, { "epoch": 32.03, "learning_rate": 3.399291259494257e-05, "loss": 2.0815, "step": 11064000 }, { "epoch": 32.03, "learning_rate": 3.399218894729529e-05, "loss": 2.0873, "step": 11064500 }, { "epoch": 32.03, "learning_rate": 3.3991465299648014e-05, "loss": 2.0757, "step": 11065000 }, { "epoch": 32.03, "learning_rate": 3.399074165200074e-05, "loss": 2.0726, "step": 11065500 }, { "epoch": 32.03, "learning_rate": 3.399001945164876e-05, "loss": 2.0832, "step": 11066000 }, { "epoch": 32.03, "learning_rate": 3.398929580400149e-05, "loss": 2.087, "step": 11066500 }, { "epoch": 32.03, "learning_rate": 3.398857215635421e-05, "loss": 2.0782, "step": 11067000 }, { "epoch": 32.04, "learning_rate": 3.398784850870693e-05, "loss": 2.0785, "step": 11067500 }, { "epoch": 32.04, "learning_rate": 3.3987124861059655e-05, "loss": 2.0597, "step": 11068000 }, { "epoch": 32.04, "learning_rate": 3.398640121341238e-05, "loss": 2.0627, "step": 11068500 }, { "epoch": 32.04, "learning_rate": 3.398567901306039e-05, "loss": 2.0933, "step": 11069000 }, { "epoch": 32.04, "learning_rate": 3.398495536541312e-05, "loss": 2.0995, "step": 11069500 }, { "epoch": 32.04, "learning_rate": 3.3984231717765844e-05, "loss": 2.0712, "step": 11070000 }, { "epoch": 32.04, "learning_rate": 3.398350951741386e-05, "loss": 2.0735, "step": 11070500 }, { "epoch": 32.05, "learning_rate": 3.398278586976658e-05, "loss": 2.0865, "step": 11071000 }, { "epoch": 32.05, "learning_rate": 3.3982062222119304e-05, "loss": 2.0783, "step": 11071500 }, { "epoch": 32.05, "learning_rate": 3.3981338574472026e-05, "loss": 2.0867, "step": 11072000 }, { "epoch": 32.05, "learning_rate": 3.398061492682475e-05, "loss": 2.0735, "step": 11072500 }, { "epoch": 32.05, "learning_rate": 3.397989272647277e-05, "loss": 2.107, "step": 11073000 }, { "epoch": 32.05, "learning_rate": 3.397916907882549e-05, "loss": 2.0847, "step": 11073500 }, { "epoch": 32.05, "learning_rate": 3.397844543117822e-05, "loss": 2.0888, "step": 11074000 }, { "epoch": 32.06, "learning_rate": 3.3977721783530944e-05, "loss": 2.0778, "step": 11074500 }, { "epoch": 32.06, "learning_rate": 3.3976998135883666e-05, "loss": 2.0654, "step": 11075000 }, { "epoch": 32.06, "learning_rate": 3.397627448823639e-05, "loss": 2.0775, "step": 11075500 }, { "epoch": 32.06, "learning_rate": 3.397555084058911e-05, "loss": 2.0729, "step": 11076000 }, { "epoch": 32.06, "learning_rate": 3.397482719294183e-05, "loss": 2.0838, "step": 11076500 }, { "epoch": 32.06, "learning_rate": 3.3974103545294555e-05, "loss": 2.1062, "step": 11077000 }, { "epoch": 32.06, "learning_rate": 3.397337989764728e-05, "loss": 2.0761, "step": 11077500 }, { "epoch": 32.07, "learning_rate": 3.397265625e-05, "loss": 2.0998, "step": 11078000 }, { "epoch": 32.07, "learning_rate": 3.397193260235272e-05, "loss": 2.0944, "step": 11078500 }, { "epoch": 32.07, "learning_rate": 3.3971210402000744e-05, "loss": 2.1006, "step": 11079000 }, { "epoch": 32.07, "learning_rate": 3.3970486754353466e-05, "loss": 2.1022, "step": 11079500 }, { "epoch": 32.07, "learning_rate": 3.396976310670619e-05, "loss": 2.088, "step": 11080000 }, { "epoch": 32.07, "learning_rate": 3.396903945905891e-05, "loss": 2.0724, "step": 11080500 }, { "epoch": 32.07, "learning_rate": 3.396831581141163e-05, "loss": 2.1133, "step": 11081000 }, { "epoch": 32.08, "learning_rate": 3.3967593611059655e-05, "loss": 2.0864, "step": 11081500 }, { "epoch": 32.08, "learning_rate": 3.396686996341238e-05, "loss": 2.0808, "step": 11082000 }, { "epoch": 32.08, "learning_rate": 3.39661463157651e-05, "loss": 2.0805, "step": 11082500 }, { "epoch": 32.08, "learning_rate": 3.396542266811782e-05, "loss": 2.0874, "step": 11083000 }, { "epoch": 32.08, "learning_rate": 3.3964699020470544e-05, "loss": 2.077, "step": 11083500 }, { "epoch": 32.08, "learning_rate": 3.396397537282327e-05, "loss": 2.0846, "step": 11084000 }, { "epoch": 32.09, "learning_rate": 3.3963251725175995e-05, "loss": 2.1005, "step": 11084500 }, { "epoch": 32.09, "learning_rate": 3.396252807752872e-05, "loss": 2.0891, "step": 11085000 }, { "epoch": 32.09, "learning_rate": 3.396180442988144e-05, "loss": 2.0949, "step": 11085500 }, { "epoch": 32.09, "learning_rate": 3.396108078223416e-05, "loss": 2.0748, "step": 11086000 }, { "epoch": 32.09, "learning_rate": 3.396035858188218e-05, "loss": 2.0744, "step": 11086500 }, { "epoch": 32.09, "learning_rate": 3.39596349342349e-05, "loss": 2.0722, "step": 11087000 }, { "epoch": 32.09, "learning_rate": 3.395891273388292e-05, "loss": 2.0697, "step": 11087500 }, { "epoch": 32.1, "learning_rate": 3.3958189086235645e-05, "loss": 2.0847, "step": 11088000 }, { "epoch": 32.1, "learning_rate": 3.395746688588366e-05, "loss": 2.0969, "step": 11088500 }, { "epoch": 32.1, "learning_rate": 3.395674323823639e-05, "loss": 2.0733, "step": 11089000 }, { "epoch": 32.1, "learning_rate": 3.395601959058911e-05, "loss": 2.0939, "step": 11089500 }, { "epoch": 32.1, "learning_rate": 3.3955295942941834e-05, "loss": 2.1077, "step": 11090000 }, { "epoch": 32.1, "learning_rate": 3.3954572295294556e-05, "loss": 2.1059, "step": 11090500 }, { "epoch": 32.1, "learning_rate": 3.395384864764728e-05, "loss": 2.0811, "step": 11091000 }, { "epoch": 32.11, "learning_rate": 3.3953125e-05, "loss": 2.0783, "step": 11091500 }, { "epoch": 32.11, "learning_rate": 3.395240135235272e-05, "loss": 2.085, "step": 11092000 }, { "epoch": 32.11, "learning_rate": 3.3951677704705445e-05, "loss": 2.0935, "step": 11092500 }, { "epoch": 32.11, "learning_rate": 3.3950954057058174e-05, "loss": 2.1206, "step": 11093000 }, { "epoch": 32.11, "learning_rate": 3.3950230409410896e-05, "loss": 2.0831, "step": 11093500 }, { "epoch": 32.11, "learning_rate": 3.394950820905891e-05, "loss": 2.0824, "step": 11094000 }, { "epoch": 32.11, "learning_rate": 3.3948784561411634e-05, "loss": 2.0653, "step": 11094500 }, { "epoch": 32.12, "learning_rate": 3.3948060913764356e-05, "loss": 2.0849, "step": 11095000 }, { "epoch": 32.12, "learning_rate": 3.394733871341237e-05, "loss": 2.0787, "step": 11095500 }, { "epoch": 32.12, "learning_rate": 3.39466150657651e-05, "loss": 2.0752, "step": 11096000 }, { "epoch": 32.12, "learning_rate": 3.394589141811782e-05, "loss": 2.0753, "step": 11096500 }, { "epoch": 32.12, "learning_rate": 3.3945167770470545e-05, "loss": 2.0889, "step": 11097000 }, { "epoch": 32.12, "learning_rate": 3.3944444122823274e-05, "loss": 2.0537, "step": 11097500 }, { "epoch": 32.12, "learning_rate": 3.3943720475175996e-05, "loss": 2.1074, "step": 11098000 }, { "epoch": 32.13, "learning_rate": 3.394299682752872e-05, "loss": 2.0719, "step": 11098500 }, { "epoch": 32.13, "learning_rate": 3.394227317988144e-05, "loss": 2.089, "step": 11099000 }, { "epoch": 32.13, "learning_rate": 3.394154953223416e-05, "loss": 2.0875, "step": 11099500 }, { "epoch": 32.13, "learning_rate": 3.394082733188218e-05, "loss": 2.0743, "step": 11100000 }, { "epoch": 32.13, "learning_rate": 3.39401036842349e-05, "loss": 2.0679, "step": 11100500 }, { "epoch": 32.13, "learning_rate": 3.393938003658762e-05, "loss": 2.0899, "step": 11101000 }, { "epoch": 32.13, "learning_rate": 3.3938656388940345e-05, "loss": 2.0655, "step": 11101500 }, { "epoch": 32.14, "learning_rate": 3.3937932741293074e-05, "loss": 2.0916, "step": 11102000 }, { "epoch": 32.14, "learning_rate": 3.3937209093645796e-05, "loss": 2.0972, "step": 11102500 }, { "epoch": 32.14, "learning_rate": 3.393648544599852e-05, "loss": 2.1027, "step": 11103000 }, { "epoch": 32.14, "learning_rate": 3.393576179835125e-05, "loss": 2.0755, "step": 11103500 }, { "epoch": 32.14, "learning_rate": 3.393503959799926e-05, "loss": 2.0763, "step": 11104000 }, { "epoch": 32.14, "learning_rate": 3.3934315950351986e-05, "loss": 2.0968, "step": 11104500 }, { "epoch": 32.14, "learning_rate": 3.393359230270471e-05, "loss": 2.092, "step": 11105000 }, { "epoch": 32.15, "learning_rate": 3.393286865505743e-05, "loss": 2.1001, "step": 11105500 }, { "epoch": 32.15, "learning_rate": 3.393214645470545e-05, "loss": 2.0733, "step": 11106000 }, { "epoch": 32.15, "learning_rate": 3.3931422807058175e-05, "loss": 2.1094, "step": 11106500 }, { "epoch": 32.15, "learning_rate": 3.39306991594109e-05, "loss": 2.083, "step": 11107000 }, { "epoch": 32.15, "learning_rate": 3.392997551176362e-05, "loss": 2.0913, "step": 11107500 }, { "epoch": 32.15, "learning_rate": 3.392925186411634e-05, "loss": 2.0907, "step": 11108000 }, { "epoch": 32.15, "learning_rate": 3.392852966376436e-05, "loss": 2.07, "step": 11108500 }, { "epoch": 32.16, "learning_rate": 3.392780601611708e-05, "loss": 2.0948, "step": 11109000 }, { "epoch": 32.16, "learning_rate": 3.39270823684698e-05, "loss": 2.0947, "step": 11109500 }, { "epoch": 32.16, "learning_rate": 3.3926358720822524e-05, "loss": 2.1131, "step": 11110000 }, { "epoch": 32.16, "learning_rate": 3.392563507317525e-05, "loss": 2.0729, "step": 11110500 }, { "epoch": 32.16, "learning_rate": 3.3924911425527975e-05, "loss": 2.0791, "step": 11111000 }, { "epoch": 32.16, "learning_rate": 3.3924187777880704e-05, "loss": 2.1045, "step": 11111500 }, { "epoch": 32.16, "learning_rate": 3.392346557752872e-05, "loss": 2.1057, "step": 11112000 }, { "epoch": 32.17, "learning_rate": 3.392274771906262e-05, "loss": 2.0983, "step": 11112500 }, { "epoch": 32.17, "learning_rate": 3.3922024071415344e-05, "loss": 2.0947, "step": 11113000 }, { "epoch": 32.17, "learning_rate": 3.3921300423768066e-05, "loss": 2.0808, "step": 11113500 }, { "epoch": 32.17, "learning_rate": 3.392057677612079e-05, "loss": 2.1142, "step": 11114000 }, { "epoch": 32.17, "learning_rate": 3.391985312847351e-05, "loss": 2.081, "step": 11114500 }, { "epoch": 32.17, "learning_rate": 3.391912948082623e-05, "loss": 2.091, "step": 11115000 }, { "epoch": 32.17, "learning_rate": 3.3918405833178955e-05, "loss": 2.0748, "step": 11115500 }, { "epoch": 32.18, "learning_rate": 3.391768218553168e-05, "loss": 2.0833, "step": 11116000 }, { "epoch": 32.18, "learning_rate": 3.39169585378844e-05, "loss": 2.0951, "step": 11116500 }, { "epoch": 32.18, "learning_rate": 3.391623489023713e-05, "loss": 2.0947, "step": 11117000 }, { "epoch": 32.18, "learning_rate": 3.391551124258985e-05, "loss": 2.0941, "step": 11117500 }, { "epoch": 32.18, "learning_rate": 3.391478759494257e-05, "loss": 2.0664, "step": 11118000 }, { "epoch": 32.18, "learning_rate": 3.3914063947295295e-05, "loss": 2.084, "step": 11118500 }, { "epoch": 32.18, "learning_rate": 3.3913340299648024e-05, "loss": 2.0732, "step": 11119000 }, { "epoch": 32.19, "learning_rate": 3.3912616652000746e-05, "loss": 2.0726, "step": 11119500 }, { "epoch": 32.19, "learning_rate": 3.391189300435347e-05, "loss": 2.0911, "step": 11120000 }, { "epoch": 32.19, "learning_rate": 3.391116935670619e-05, "loss": 2.1006, "step": 11120500 }, { "epoch": 32.19, "learning_rate": 3.391044570905891e-05, "loss": 2.0632, "step": 11121000 }, { "epoch": 32.19, "learning_rate": 3.3909722061411635e-05, "loss": 2.1067, "step": 11121500 }, { "epoch": 32.19, "learning_rate": 3.390899841376436e-05, "loss": 2.0869, "step": 11122000 }, { "epoch": 32.2, "learning_rate": 3.390827476611708e-05, "loss": 2.1037, "step": 11122500 }, { "epoch": 32.2, "learning_rate": 3.39075511184698e-05, "loss": 2.0864, "step": 11123000 }, { "epoch": 32.2, "learning_rate": 3.3906827470822524e-05, "loss": 2.078, "step": 11123500 }, { "epoch": 32.2, "learning_rate": 3.390610382317525e-05, "loss": 2.0904, "step": 11124000 }, { "epoch": 32.2, "learning_rate": 3.3905380175527976e-05, "loss": 2.0825, "step": 11124500 }, { "epoch": 32.2, "learning_rate": 3.39046565278807e-05, "loss": 2.0744, "step": 11125000 }, { "epoch": 32.2, "learning_rate": 3.390393432752871e-05, "loss": 2.0716, "step": 11125500 }, { "epoch": 32.21, "learning_rate": 3.390321067988144e-05, "loss": 2.0882, "step": 11126000 }, { "epoch": 32.21, "learning_rate": 3.3902487032234165e-05, "loss": 2.0557, "step": 11126500 }, { "epoch": 32.21, "learning_rate": 3.390176338458689e-05, "loss": 2.0811, "step": 11127000 }, { "epoch": 32.21, "learning_rate": 3.390103973693961e-05, "loss": 2.0752, "step": 11127500 }, { "epoch": 32.21, "learning_rate": 3.390031608929233e-05, "loss": 2.069, "step": 11128000 }, { "epoch": 32.21, "learning_rate": 3.3899592441645054e-05, "loss": 2.1071, "step": 11128500 }, { "epoch": 32.21, "learning_rate": 3.3898868793997776e-05, "loss": 2.0685, "step": 11129000 }, { "epoch": 32.22, "learning_rate": 3.3898145146350505e-05, "loss": 2.0597, "step": 11129500 }, { "epoch": 32.22, "learning_rate": 3.389742294599852e-05, "loss": 2.0599, "step": 11130000 }, { "epoch": 32.22, "learning_rate": 3.389669929835124e-05, "loss": 2.0682, "step": 11130500 }, { "epoch": 32.22, "learning_rate": 3.3895975650703965e-05, "loss": 2.076, "step": 11131000 }, { "epoch": 32.22, "learning_rate": 3.389525200305669e-05, "loss": 2.0684, "step": 11131500 }, { "epoch": 32.22, "learning_rate": 3.389452835540941e-05, "loss": 2.052, "step": 11132000 }, { "epoch": 32.22, "learning_rate": 3.3893806155057425e-05, "loss": 2.0908, "step": 11132500 }, { "epoch": 32.23, "learning_rate": 3.3893082507410154e-05, "loss": 2.0819, "step": 11133000 }, { "epoch": 32.23, "learning_rate": 3.3892358859762876e-05, "loss": 2.1044, "step": 11133500 }, { "epoch": 32.23, "learning_rate": 3.38916366594109e-05, "loss": 2.0985, "step": 11134000 }, { "epoch": 32.23, "learning_rate": 3.389091301176362e-05, "loss": 2.047, "step": 11134500 }, { "epoch": 32.23, "learning_rate": 3.389018936411634e-05, "loss": 2.0924, "step": 11135000 }, { "epoch": 32.23, "learning_rate": 3.3889465716469065e-05, "loss": 2.0566, "step": 11135500 }, { "epoch": 32.23, "learning_rate": 3.388874206882179e-05, "loss": 2.0814, "step": 11136000 }, { "epoch": 32.24, "learning_rate": 3.388801842117451e-05, "loss": 2.0559, "step": 11136500 }, { "epoch": 32.24, "learning_rate": 3.388729477352723e-05, "loss": 2.0783, "step": 11137000 }, { "epoch": 32.24, "learning_rate": 3.3886572573175254e-05, "loss": 2.0934, "step": 11137500 }, { "epoch": 32.24, "learning_rate": 3.388585037282327e-05, "loss": 2.0781, "step": 11138000 }, { "epoch": 32.24, "learning_rate": 3.388512672517599e-05, "loss": 2.0807, "step": 11138500 }, { "epoch": 32.24, "learning_rate": 3.3884403077528714e-05, "loss": 2.1042, "step": 11139000 }, { "epoch": 32.24, "learning_rate": 3.3883679429881436e-05, "loss": 2.0761, "step": 11139500 }, { "epoch": 32.25, "learning_rate": 3.388295578223416e-05, "loss": 2.0859, "step": 11140000 }, { "epoch": 32.25, "learning_rate": 3.388223213458688e-05, "loss": 2.1075, "step": 11140500 }, { "epoch": 32.25, "learning_rate": 3.388150848693961e-05, "loss": 2.1082, "step": 11141000 }, { "epoch": 32.25, "learning_rate": 3.388078628658763e-05, "loss": 2.0654, "step": 11141500 }, { "epoch": 32.25, "learning_rate": 3.3880062638940354e-05, "loss": 2.0954, "step": 11142000 }, { "epoch": 32.25, "learning_rate": 3.387933899129308e-05, "loss": 2.0707, "step": 11142500 }, { "epoch": 32.25, "learning_rate": 3.38786153436458e-05, "loss": 2.0878, "step": 11143000 }, { "epoch": 32.26, "learning_rate": 3.387789169599852e-05, "loss": 2.0615, "step": 11143500 }, { "epoch": 32.26, "learning_rate": 3.387716804835124e-05, "loss": 2.1077, "step": 11144000 }, { "epoch": 32.26, "learning_rate": 3.3876444400703966e-05, "loss": 2.0768, "step": 11144500 }, { "epoch": 32.26, "learning_rate": 3.387572075305669e-05, "loss": 2.0653, "step": 11145000 }, { "epoch": 32.26, "learning_rate": 3.387499710540941e-05, "loss": 2.0721, "step": 11145500 }, { "epoch": 32.26, "learning_rate": 3.387427345776213e-05, "loss": 2.0938, "step": 11146000 }, { "epoch": 32.26, "learning_rate": 3.3873551257410155e-05, "loss": 2.0851, "step": 11146500 }, { "epoch": 32.27, "learning_rate": 3.387282760976288e-05, "loss": 2.0681, "step": 11147000 }, { "epoch": 32.27, "learning_rate": 3.38721039621156e-05, "loss": 2.0779, "step": 11147500 }, { "epoch": 32.27, "learning_rate": 3.387138031446832e-05, "loss": 2.0749, "step": 11148000 }, { "epoch": 32.27, "learning_rate": 3.387065666682105e-05, "loss": 2.0871, "step": 11148500 }, { "epoch": 32.27, "learning_rate": 3.386993301917377e-05, "loss": 2.0828, "step": 11149000 }, { "epoch": 32.27, "learning_rate": 3.3869209371526495e-05, "loss": 2.0798, "step": 11149500 }, { "epoch": 32.27, "learning_rate": 3.386848717117451e-05, "loss": 2.0705, "step": 11150000 }, { "epoch": 32.28, "learning_rate": 3.386776352352723e-05, "loss": 2.095, "step": 11150500 }, { "epoch": 32.28, "learning_rate": 3.3867039875879955e-05, "loss": 2.0804, "step": 11151000 }, { "epoch": 32.28, "learning_rate": 3.3866316228232684e-05, "loss": 2.0664, "step": 11151500 }, { "epoch": 32.28, "learning_rate": 3.386559547517599e-05, "loss": 2.0525, "step": 11152000 }, { "epoch": 32.28, "learning_rate": 3.3864871827528715e-05, "loss": 2.0952, "step": 11152500 }, { "epoch": 32.28, "learning_rate": 3.386414817988144e-05, "loss": 2.081, "step": 11153000 }, { "epoch": 32.28, "learning_rate": 3.386342453223416e-05, "loss": 2.0729, "step": 11153500 }, { "epoch": 32.29, "learning_rate": 3.386270088458688e-05, "loss": 2.0992, "step": 11154000 }, { "epoch": 32.29, "learning_rate": 3.3861977236939604e-05, "loss": 2.0812, "step": 11154500 }, { "epoch": 32.29, "learning_rate": 3.386125648388292e-05, "loss": 2.0853, "step": 11155000 }, { "epoch": 32.29, "learning_rate": 3.386053283623564e-05, "loss": 2.0781, "step": 11155500 }, { "epoch": 32.29, "learning_rate": 3.3859809188588364e-05, "loss": 2.0671, "step": 11156000 }, { "epoch": 32.29, "learning_rate": 3.385908554094109e-05, "loss": 2.0945, "step": 11156500 }, { "epoch": 32.29, "learning_rate": 3.3858361893293815e-05, "loss": 2.0844, "step": 11157000 }, { "epoch": 32.3, "learning_rate": 3.385763969294183e-05, "loss": 2.0966, "step": 11157500 }, { "epoch": 32.3, "learning_rate": 3.385691604529456e-05, "loss": 2.0792, "step": 11158000 }, { "epoch": 32.3, "learning_rate": 3.385619239764728e-05, "loss": 2.0907, "step": 11158500 }, { "epoch": 32.3, "learning_rate": 3.3855468750000004e-05, "loss": 2.0776, "step": 11159000 }, { "epoch": 32.3, "learning_rate": 3.3854745102352726e-05, "loss": 2.0798, "step": 11159500 }, { "epoch": 32.3, "learning_rate": 3.385402145470545e-05, "loss": 2.0812, "step": 11160000 }, { "epoch": 32.31, "learning_rate": 3.385329780705817e-05, "loss": 2.094, "step": 11160500 }, { "epoch": 32.31, "learning_rate": 3.385257415941089e-05, "loss": 2.0936, "step": 11161000 }, { "epoch": 32.31, "learning_rate": 3.3851850511763615e-05, "loss": 2.09, "step": 11161500 }, { "epoch": 32.31, "learning_rate": 3.385112686411634e-05, "loss": 2.0672, "step": 11162000 }, { "epoch": 32.31, "learning_rate": 3.385040466376436e-05, "loss": 2.0852, "step": 11162500 }, { "epoch": 32.31, "learning_rate": 3.384968101611708e-05, "loss": 2.0829, "step": 11163000 }, { "epoch": 32.31, "learning_rate": 3.38489588157651e-05, "loss": 2.0884, "step": 11163500 }, { "epoch": 32.32, "learning_rate": 3.384823516811783e-05, "loss": 2.0782, "step": 11164000 }, { "epoch": 32.32, "learning_rate": 3.384751152047055e-05, "loss": 2.0956, "step": 11164500 }, { "epoch": 32.32, "learning_rate": 3.384678787282327e-05, "loss": 2.084, "step": 11165000 }, { "epoch": 32.32, "learning_rate": 3.3846064225175993e-05, "loss": 2.08, "step": 11165500 }, { "epoch": 32.32, "learning_rate": 3.384534347211931e-05, "loss": 2.0759, "step": 11166000 }, { "epoch": 32.32, "learning_rate": 3.384461982447203e-05, "loss": 2.1057, "step": 11166500 }, { "epoch": 32.32, "learning_rate": 3.384389762412005e-05, "loss": 2.1112, "step": 11167000 }, { "epoch": 32.33, "learning_rate": 3.384317397647277e-05, "loss": 2.0893, "step": 11167500 }, { "epoch": 32.33, "learning_rate": 3.384245032882549e-05, "loss": 2.1149, "step": 11168000 }, { "epoch": 32.33, "learning_rate": 3.3841726681178214e-05, "loss": 2.0912, "step": 11168500 }, { "epoch": 32.33, "learning_rate": 3.3841003033530936e-05, "loss": 2.0752, "step": 11169000 }, { "epoch": 32.33, "learning_rate": 3.384027938588366e-05, "loss": 2.0858, "step": 11169500 }, { "epoch": 32.33, "learning_rate": 3.383955573823638e-05, "loss": 2.089, "step": 11170000 }, { "epoch": 32.33, "learning_rate": 3.383883209058911e-05, "loss": 2.0805, "step": 11170500 }, { "epoch": 32.34, "learning_rate": 3.383810844294183e-05, "loss": 2.0929, "step": 11171000 }, { "epoch": 32.34, "learning_rate": 3.383738479529456e-05, "loss": 2.0887, "step": 11171500 }, { "epoch": 32.34, "learning_rate": 3.383666114764728e-05, "loss": 2.1026, "step": 11172000 }, { "epoch": 32.34, "learning_rate": 3.3835937500000005e-05, "loss": 2.0894, "step": 11172500 }, { "epoch": 32.34, "learning_rate": 3.383521385235273e-05, "loss": 2.0812, "step": 11173000 }, { "epoch": 32.34, "learning_rate": 3.383449165200074e-05, "loss": 2.0684, "step": 11173500 }, { "epoch": 32.34, "learning_rate": 3.3833768004353465e-05, "loss": 2.0899, "step": 11174000 }, { "epoch": 32.35, "learning_rate": 3.383304435670619e-05, "loss": 2.064, "step": 11174500 }, { "epoch": 32.35, "learning_rate": 3.383232070905891e-05, "loss": 2.079, "step": 11175000 }, { "epoch": 32.35, "learning_rate": 3.383159706141163e-05, "loss": 2.0886, "step": 11175500 }, { "epoch": 32.35, "learning_rate": 3.383087341376436e-05, "loss": 2.1082, "step": 11176000 }, { "epoch": 32.35, "learning_rate": 3.383014976611708e-05, "loss": 2.0656, "step": 11176500 }, { "epoch": 32.35, "learning_rate": 3.3829426118469805e-05, "loss": 2.0977, "step": 11177000 }, { "epoch": 32.35, "learning_rate": 3.382870247082253e-05, "loss": 2.0813, "step": 11177500 }, { "epoch": 32.36, "learning_rate": 3.382797882317525e-05, "loss": 2.1031, "step": 11178000 }, { "epoch": 32.36, "learning_rate": 3.3827256622823265e-05, "loss": 2.0898, "step": 11178500 }, { "epoch": 32.36, "learning_rate": 3.3826532975175994e-05, "loss": 2.0997, "step": 11179000 }, { "epoch": 32.36, "learning_rate": 3.3825809327528717e-05, "loss": 2.0862, "step": 11179500 }, { "epoch": 32.36, "learning_rate": 3.382508567988144e-05, "loss": 2.1061, "step": 11180000 }, { "epoch": 32.36, "learning_rate": 3.382436203223416e-05, "loss": 2.0902, "step": 11180500 }, { "epoch": 32.36, "learning_rate": 3.382363838458688e-05, "loss": 2.0885, "step": 11181000 }, { "epoch": 32.37, "learning_rate": 3.3822916184234906e-05, "loss": 2.0977, "step": 11181500 }, { "epoch": 32.37, "learning_rate": 3.382219253658763e-05, "loss": 2.0862, "step": 11182000 }, { "epoch": 32.37, "learning_rate": 3.382146888894035e-05, "loss": 2.0839, "step": 11182500 }, { "epoch": 32.37, "learning_rate": 3.382074524129307e-05, "loss": 2.0732, "step": 11183000 }, { "epoch": 32.37, "learning_rate": 3.3820021593645794e-05, "loss": 2.079, "step": 11183500 }, { "epoch": 32.37, "learning_rate": 3.381929794599852e-05, "loss": 2.1104, "step": 11184000 }, { "epoch": 32.37, "learning_rate": 3.381857429835124e-05, "loss": 2.0937, "step": 11184500 }, { "epoch": 32.38, "learning_rate": 3.381785065070396e-05, "loss": 2.0813, "step": 11185000 }, { "epoch": 32.38, "learning_rate": 3.3817127003056683e-05, "loss": 2.0905, "step": 11185500 }, { "epoch": 32.38, "learning_rate": 3.381640335540941e-05, "loss": 2.1026, "step": 11186000 }, { "epoch": 32.38, "learning_rate": 3.3815679707762135e-05, "loss": 2.0749, "step": 11186500 }, { "epoch": 32.38, "learning_rate": 3.381495750741016e-05, "loss": 2.1167, "step": 11187000 }, { "epoch": 32.38, "learning_rate": 3.381423385976288e-05, "loss": 2.0801, "step": 11187500 }, { "epoch": 32.38, "learning_rate": 3.38135102121156e-05, "loss": 2.0857, "step": 11188000 }, { "epoch": 32.39, "learning_rate": 3.3812786564468324e-05, "loss": 2.1024, "step": 11188500 }, { "epoch": 32.39, "learning_rate": 3.3812062916821046e-05, "loss": 2.0947, "step": 11189000 }, { "epoch": 32.39, "learning_rate": 3.381133926917377e-05, "loss": 2.0816, "step": 11189500 }, { "epoch": 32.39, "learning_rate": 3.381061562152649e-05, "loss": 2.0785, "step": 11190000 }, { "epoch": 32.39, "learning_rate": 3.380989197387921e-05, "loss": 2.0753, "step": 11190500 }, { "epoch": 32.39, "learning_rate": 3.3809168326231935e-05, "loss": 2.0976, "step": 11191000 }, { "epoch": 32.39, "learning_rate": 3.3808444678584664e-05, "loss": 2.0756, "step": 11191500 }, { "epoch": 32.4, "learning_rate": 3.380772247823268e-05, "loss": 2.0869, "step": 11192000 }, { "epoch": 32.4, "learning_rate": 3.38069988305854e-05, "loss": 2.0933, "step": 11192500 }, { "epoch": 32.4, "learning_rate": 3.3806275182938124e-05, "loss": 2.09, "step": 11193000 }, { "epoch": 32.4, "learning_rate": 3.380555442988144e-05, "loss": 2.0881, "step": 11193500 }, { "epoch": 32.4, "learning_rate": 3.380483078223416e-05, "loss": 2.1068, "step": 11194000 }, { "epoch": 32.4, "learning_rate": 3.380410713458689e-05, "loss": 2.0821, "step": 11194500 }, { "epoch": 32.4, "learning_rate": 3.380338348693961e-05, "loss": 2.1017, "step": 11195000 }, { "epoch": 32.41, "learning_rate": 3.3802659839292335e-05, "loss": 2.0759, "step": 11195500 }, { "epoch": 32.41, "learning_rate": 3.380193619164506e-05, "loss": 2.072, "step": 11196000 }, { "epoch": 32.41, "learning_rate": 3.380121254399778e-05, "loss": 2.1036, "step": 11196500 }, { "epoch": 32.41, "learning_rate": 3.38004888963505e-05, "loss": 2.1077, "step": 11197000 }, { "epoch": 32.41, "learning_rate": 3.3799765248703224e-05, "loss": 2.0965, "step": 11197500 }, { "epoch": 32.41, "learning_rate": 3.3799041601055946e-05, "loss": 2.0795, "step": 11198000 }, { "epoch": 32.42, "learning_rate": 3.379831795340867e-05, "loss": 2.0776, "step": 11198500 }, { "epoch": 32.42, "learning_rate": 3.379759430576139e-05, "loss": 2.0629, "step": 11199000 }, { "epoch": 32.42, "learning_rate": 3.379687065811411e-05, "loss": 2.0885, "step": 11199500 }, { "epoch": 32.42, "learning_rate": 3.379614990505743e-05, "loss": 2.0822, "step": 11200000 }, { "epoch": 32.42, "learning_rate": 3.379542625741015e-05, "loss": 2.0746, "step": 11200500 }, { "epoch": 32.42, "learning_rate": 3.379470260976288e-05, "loss": 2.0891, "step": 11201000 }, { "epoch": 32.42, "learning_rate": 3.37939789621156e-05, "loss": 2.0929, "step": 11201500 }, { "epoch": 32.43, "learning_rate": 3.3793255314468324e-05, "loss": 2.0688, "step": 11202000 }, { "epoch": 32.43, "learning_rate": 3.379253166682105e-05, "loss": 2.0744, "step": 11202500 }, { "epoch": 32.43, "learning_rate": 3.379180801917377e-05, "loss": 2.1042, "step": 11203000 }, { "epoch": 32.43, "learning_rate": 3.379108437152649e-05, "loss": 2.0733, "step": 11203500 }, { "epoch": 32.43, "learning_rate": 3.3790362171174514e-05, "loss": 2.0711, "step": 11204000 }, { "epoch": 32.43, "learning_rate": 3.378963997082253e-05, "loss": 2.0879, "step": 11204500 }, { "epoch": 32.43, "learning_rate": 3.378891632317525e-05, "loss": 2.0799, "step": 11205000 }, { "epoch": 32.44, "learning_rate": 3.3788192675527974e-05, "loss": 2.0805, "step": 11205500 }, { "epoch": 32.44, "learning_rate": 3.3787469027880696e-05, "loss": 2.0987, "step": 11206000 }, { "epoch": 32.44, "learning_rate": 3.378674682752871e-05, "loss": 2.1066, "step": 11206500 }, { "epoch": 32.44, "learning_rate": 3.378602317988144e-05, "loss": 2.0684, "step": 11207000 }, { "epoch": 32.44, "learning_rate": 3.3785300979529456e-05, "loss": 2.0823, "step": 11207500 }, { "epoch": 32.44, "learning_rate": 3.378457733188218e-05, "loss": 2.1032, "step": 11208000 }, { "epoch": 32.44, "learning_rate": 3.37838536842349e-05, "loss": 2.0986, "step": 11208500 }, { "epoch": 32.45, "learning_rate": 3.378313003658763e-05, "loss": 2.0898, "step": 11209000 }, { "epoch": 32.45, "learning_rate": 3.378240638894035e-05, "loss": 2.0918, "step": 11209500 }, { "epoch": 32.45, "learning_rate": 3.3781682741293074e-05, "loss": 2.0964, "step": 11210000 }, { "epoch": 32.45, "learning_rate": 3.3780959093645796e-05, "loss": 2.0627, "step": 11210500 }, { "epoch": 32.45, "learning_rate": 3.378023544599852e-05, "loss": 2.1028, "step": 11211000 }, { "epoch": 32.45, "learning_rate": 3.377951179835124e-05, "loss": 2.0859, "step": 11211500 }, { "epoch": 32.45, "learning_rate": 3.377878815070396e-05, "loss": 2.0824, "step": 11212000 }, { "epoch": 32.46, "learning_rate": 3.377806450305669e-05, "loss": 2.0965, "step": 11212500 }, { "epoch": 32.46, "learning_rate": 3.3777340855409414e-05, "loss": 2.0746, "step": 11213000 }, { "epoch": 32.46, "learning_rate": 3.3776617207762136e-05, "loss": 2.0992, "step": 11213500 }, { "epoch": 32.46, "learning_rate": 3.377589500741015e-05, "loss": 2.1114, "step": 11214000 }, { "epoch": 32.46, "learning_rate": 3.3775171359762874e-05, "loss": 2.1001, "step": 11214500 }, { "epoch": 32.46, "learning_rate": 3.377444915941089e-05, "loss": 2.0893, "step": 11215000 }, { "epoch": 32.46, "learning_rate": 3.377372551176362e-05, "loss": 2.0945, "step": 11215500 }, { "epoch": 32.47, "learning_rate": 3.377300186411634e-05, "loss": 2.0785, "step": 11216000 }, { "epoch": 32.47, "learning_rate": 3.377227821646907e-05, "loss": 2.0726, "step": 11216500 }, { "epoch": 32.47, "learning_rate": 3.377155456882179e-05, "loss": 2.0993, "step": 11217000 }, { "epoch": 32.47, "learning_rate": 3.377083236846981e-05, "loss": 2.1135, "step": 11217500 }, { "epoch": 32.47, "learning_rate": 3.377010872082253e-05, "loss": 2.0802, "step": 11218000 }, { "epoch": 32.47, "learning_rate": 3.376938507317525e-05, "loss": 2.1042, "step": 11218500 }, { "epoch": 32.47, "learning_rate": 3.3768661425527974e-05, "loss": 2.0902, "step": 11219000 }, { "epoch": 32.48, "learning_rate": 3.3767937777880697e-05, "loss": 2.0698, "step": 11219500 }, { "epoch": 32.48, "learning_rate": 3.376721557752872e-05, "loss": 2.0865, "step": 11220000 }, { "epoch": 32.48, "learning_rate": 3.376649192988144e-05, "loss": 2.0949, "step": 11220500 }, { "epoch": 32.48, "learning_rate": 3.376576828223416e-05, "loss": 2.094, "step": 11221000 }, { "epoch": 32.48, "learning_rate": 3.3765044634586886e-05, "loss": 2.1094, "step": 11221500 }, { "epoch": 32.48, "learning_rate": 3.37643224342349e-05, "loss": 2.0853, "step": 11222000 }, { "epoch": 32.48, "learning_rate": 3.3763598786587623e-05, "loss": 2.1028, "step": 11222500 }, { "epoch": 32.49, "learning_rate": 3.3762875138940346e-05, "loss": 2.0865, "step": 11223000 }, { "epoch": 32.49, "learning_rate": 3.3762151491293075e-05, "loss": 2.0944, "step": 11223500 }, { "epoch": 32.49, "learning_rate": 3.37614278436458e-05, "loss": 2.0765, "step": 11224000 }, { "epoch": 32.49, "learning_rate": 3.376070419599852e-05, "loss": 2.0894, "step": 11224500 }, { "epoch": 32.49, "learning_rate": 3.375998054835124e-05, "loss": 2.0821, "step": 11225000 }, { "epoch": 32.49, "learning_rate": 3.375925690070397e-05, "loss": 2.093, "step": 11225500 }, { "epoch": 32.49, "learning_rate": 3.375853325305669e-05, "loss": 2.084, "step": 11226000 }, { "epoch": 32.5, "learning_rate": 3.375781105270471e-05, "loss": 2.0619, "step": 11226500 }, { "epoch": 32.5, "learning_rate": 3.375708740505743e-05, "loss": 2.101, "step": 11227000 }, { "epoch": 32.5, "learning_rate": 3.375636375741015e-05, "loss": 2.1095, "step": 11227500 }, { "epoch": 32.5, "learning_rate": 3.375564155705817e-05, "loss": 2.0583, "step": 11228000 }, { "epoch": 32.5, "learning_rate": 3.375491790941089e-05, "loss": 2.0993, "step": 11228500 }, { "epoch": 32.5, "learning_rate": 3.375419426176362e-05, "loss": 2.1048, "step": 11229000 }, { "epoch": 32.5, "learning_rate": 3.375347061411634e-05, "loss": 2.1003, "step": 11229500 }, { "epoch": 32.51, "learning_rate": 3.3752746966469064e-05, "loss": 2.0843, "step": 11230000 }, { "epoch": 32.51, "learning_rate": 3.3752023318821786e-05, "loss": 2.0868, "step": 11230500 }, { "epoch": 32.51, "learning_rate": 3.375130111846981e-05, "loss": 2.0789, "step": 11231000 }, { "epoch": 32.51, "learning_rate": 3.375057747082253e-05, "loss": 2.084, "step": 11231500 }, { "epoch": 32.51, "learning_rate": 3.374985382317525e-05, "loss": 2.0965, "step": 11232000 }, { "epoch": 32.51, "learning_rate": 3.3749130175527975e-05, "loss": 2.1112, "step": 11232500 }, { "epoch": 32.51, "learning_rate": 3.37484065278807e-05, "loss": 2.0792, "step": 11233000 }, { "epoch": 32.52, "learning_rate": 3.374768288023342e-05, "loss": 2.0668, "step": 11233500 }, { "epoch": 32.52, "learning_rate": 3.374695923258614e-05, "loss": 2.083, "step": 11234000 }, { "epoch": 32.52, "learning_rate": 3.374623558493887e-05, "loss": 2.0988, "step": 11234500 }, { "epoch": 32.52, "learning_rate": 3.374551193729159e-05, "loss": 2.1102, "step": 11235000 }, { "epoch": 32.52, "learning_rate": 3.3744788289644315e-05, "loss": 2.09, "step": 11235500 }, { "epoch": 32.52, "learning_rate": 3.374406464199704e-05, "loss": 2.1049, "step": 11236000 }, { "epoch": 32.53, "learning_rate": 3.374334244164505e-05, "loss": 2.097, "step": 11236500 }, { "epoch": 32.53, "learning_rate": 3.374262024129307e-05, "loss": 2.0907, "step": 11237000 }, { "epoch": 32.53, "learning_rate": 3.374189659364579e-05, "loss": 2.102, "step": 11237500 }, { "epoch": 32.53, "learning_rate": 3.374117294599852e-05, "loss": 2.0797, "step": 11238000 }, { "epoch": 32.53, "learning_rate": 3.374044929835124e-05, "loss": 2.093, "step": 11238500 }, { "epoch": 32.53, "learning_rate": 3.373972565070397e-05, "loss": 2.0878, "step": 11239000 }, { "epoch": 32.53, "learning_rate": 3.373900200305669e-05, "loss": 2.092, "step": 11239500 }, { "epoch": 32.54, "learning_rate": 3.373827980270471e-05, "loss": 2.0813, "step": 11240000 }, { "epoch": 32.54, "learning_rate": 3.373755615505743e-05, "loss": 2.0877, "step": 11240500 }, { "epoch": 32.54, "learning_rate": 3.373683395470545e-05, "loss": 2.106, "step": 11241000 }, { "epoch": 32.54, "learning_rate": 3.373611030705817e-05, "loss": 2.0805, "step": 11241500 }, { "epoch": 32.54, "learning_rate": 3.37353866594109e-05, "loss": 2.0982, "step": 11242000 }, { "epoch": 32.54, "learning_rate": 3.373466301176362e-05, "loss": 2.088, "step": 11242500 }, { "epoch": 32.54, "learning_rate": 3.3733940811411636e-05, "loss": 2.1118, "step": 11243000 }, { "epoch": 32.55, "learning_rate": 3.373321716376436e-05, "loss": 2.0854, "step": 11243500 }, { "epoch": 32.55, "learning_rate": 3.373249351611708e-05, "loss": 2.12, "step": 11244000 }, { "epoch": 32.55, "learning_rate": 3.37317698684698e-05, "loss": 2.102, "step": 11244500 }, { "epoch": 32.55, "learning_rate": 3.3731046220822525e-05, "loss": 2.085, "step": 11245000 }, { "epoch": 32.55, "learning_rate": 3.373032257317525e-05, "loss": 2.0741, "step": 11245500 }, { "epoch": 32.55, "learning_rate": 3.3729598925527976e-05, "loss": 2.1125, "step": 11246000 }, { "epoch": 32.55, "learning_rate": 3.37288752778807e-05, "loss": 2.0879, "step": 11246500 }, { "epoch": 32.56, "learning_rate": 3.372815307752872e-05, "loss": 2.0923, "step": 11247000 }, { "epoch": 32.56, "learning_rate": 3.372742942988144e-05, "loss": 2.0812, "step": 11247500 }, { "epoch": 32.56, "learning_rate": 3.3726705782234165e-05, "loss": 2.103, "step": 11248000 }, { "epoch": 32.56, "learning_rate": 3.372598213458689e-05, "loss": 2.0896, "step": 11248500 }, { "epoch": 32.56, "learning_rate": 3.372525848693961e-05, "loss": 2.112, "step": 11249000 }, { "epoch": 32.56, "learning_rate": 3.372453483929233e-05, "loss": 2.1155, "step": 11249500 }, { "epoch": 32.56, "learning_rate": 3.3723811191645054e-05, "loss": 2.1146, "step": 11250000 }, { "epoch": 32.57, "learning_rate": 3.3723087543997776e-05, "loss": 2.1001, "step": 11250500 }, { "epoch": 32.57, "learning_rate": 3.37223638963505e-05, "loss": 2.1008, "step": 11251000 }, { "epoch": 32.57, "learning_rate": 3.372164024870322e-05, "loss": 2.1154, "step": 11251500 }, { "epoch": 32.57, "learning_rate": 3.372091804835124e-05, "loss": 2.073, "step": 11252000 }, { "epoch": 32.57, "learning_rate": 3.3720194400703965e-05, "loss": 2.073, "step": 11252500 }, { "epoch": 32.57, "learning_rate": 3.371947075305669e-05, "loss": 2.0811, "step": 11253000 }, { "epoch": 32.57, "learning_rate": 3.3718747105409416e-05, "loss": 2.1081, "step": 11253500 }, { "epoch": 32.58, "learning_rate": 3.371802345776214e-05, "loss": 2.1001, "step": 11254000 }, { "epoch": 32.58, "learning_rate": 3.371729981011486e-05, "loss": 2.0837, "step": 11254500 }, { "epoch": 32.58, "learning_rate": 3.371657616246758e-05, "loss": 2.0763, "step": 11255000 }, { "epoch": 32.58, "learning_rate": 3.37158539621156e-05, "loss": 2.0844, "step": 11255500 }, { "epoch": 32.58, "learning_rate": 3.371513031446832e-05, "loss": 2.1067, "step": 11256000 }, { "epoch": 32.58, "learning_rate": 3.371440666682105e-05, "loss": 2.0847, "step": 11256500 }, { "epoch": 32.58, "learning_rate": 3.371368301917377e-05, "loss": 2.1134, "step": 11257000 }, { "epoch": 32.59, "learning_rate": 3.371296081882179e-05, "loss": 2.0919, "step": 11257500 }, { "epoch": 32.59, "learning_rate": 3.371223717117451e-05, "loss": 2.0988, "step": 11258000 }, { "epoch": 32.59, "learning_rate": 3.371151352352723e-05, "loss": 2.0723, "step": 11258500 }, { "epoch": 32.59, "learning_rate": 3.3710789875879954e-05, "loss": 2.1005, "step": 11259000 }, { "epoch": 32.59, "learning_rate": 3.371006622823268e-05, "loss": 2.0693, "step": 11259500 }, { "epoch": 32.59, "learning_rate": 3.37093425805854e-05, "loss": 2.0951, "step": 11260000 }, { "epoch": 32.59, "learning_rate": 3.370862038023342e-05, "loss": 2.0944, "step": 11260500 }, { "epoch": 32.6, "learning_rate": 3.3707898179881444e-05, "loss": 2.115, "step": 11261000 }, { "epoch": 32.6, "learning_rate": 3.3707174532234166e-05, "loss": 2.071, "step": 11261500 }, { "epoch": 32.6, "learning_rate": 3.370645088458689e-05, "loss": 2.0731, "step": 11262000 }, { "epoch": 32.6, "learning_rate": 3.370572723693961e-05, "loss": 2.0894, "step": 11262500 }, { "epoch": 32.6, "learning_rate": 3.370500358929233e-05, "loss": 2.1161, "step": 11263000 }, { "epoch": 32.6, "learning_rate": 3.3704279941645055e-05, "loss": 2.0792, "step": 11263500 }, { "epoch": 32.6, "learning_rate": 3.370355629399778e-05, "loss": 2.1103, "step": 11264000 }, { "epoch": 32.61, "learning_rate": 3.37028326463505e-05, "loss": 2.0601, "step": 11264500 }, { "epoch": 32.61, "learning_rate": 3.370210899870322e-05, "loss": 2.0947, "step": 11265000 }, { "epoch": 32.61, "learning_rate": 3.370138535105595e-05, "loss": 2.0628, "step": 11265500 }, { "epoch": 32.61, "learning_rate": 3.370066170340867e-05, "loss": 2.0947, "step": 11266000 }, { "epoch": 32.61, "learning_rate": 3.3699938055761395e-05, "loss": 2.0868, "step": 11266500 }, { "epoch": 32.61, "learning_rate": 3.3699217302704704e-05, "loss": 2.0957, "step": 11267000 }, { "epoch": 32.61, "learning_rate": 3.3698493655057426e-05, "loss": 2.1232, "step": 11267500 }, { "epoch": 32.62, "learning_rate": 3.369777145470545e-05, "loss": 2.0901, "step": 11268000 }, { "epoch": 32.62, "learning_rate": 3.369704780705817e-05, "loss": 2.1072, "step": 11268500 }, { "epoch": 32.62, "learning_rate": 3.36963241594109e-05, "loss": 2.084, "step": 11269000 }, { "epoch": 32.62, "learning_rate": 3.369560051176362e-05, "loss": 2.0901, "step": 11269500 }, { "epoch": 32.62, "learning_rate": 3.3694876864116344e-05, "loss": 2.1101, "step": 11270000 }, { "epoch": 32.62, "learning_rate": 3.369415466376436e-05, "loss": 2.0799, "step": 11270500 }, { "epoch": 32.62, "learning_rate": 3.369343101611708e-05, "loss": 2.1083, "step": 11271000 }, { "epoch": 32.63, "learning_rate": 3.3692707368469804e-05, "loss": 2.0771, "step": 11271500 }, { "epoch": 32.63, "learning_rate": 3.3691985168117826e-05, "loss": 2.1, "step": 11272000 }, { "epoch": 32.63, "learning_rate": 3.369126152047055e-05, "loss": 2.0937, "step": 11272500 }, { "epoch": 32.63, "learning_rate": 3.369053787282327e-05, "loss": 2.0851, "step": 11273000 }, { "epoch": 32.63, "learning_rate": 3.368981422517599e-05, "loss": 2.0988, "step": 11273500 }, { "epoch": 32.63, "learning_rate": 3.368909202482401e-05, "loss": 2.1007, "step": 11274000 }, { "epoch": 32.64, "learning_rate": 3.368836837717673e-05, "loss": 2.1077, "step": 11274500 }, { "epoch": 32.64, "learning_rate": 3.368764472952945e-05, "loss": 2.0934, "step": 11275000 }, { "epoch": 32.64, "learning_rate": 3.3686921081882175e-05, "loss": 2.0773, "step": 11275500 }, { "epoch": 32.64, "learning_rate": 3.36861974342349e-05, "loss": 2.0915, "step": 11276000 }, { "epoch": 32.64, "learning_rate": 3.3685473786587627e-05, "loss": 2.108, "step": 11276500 }, { "epoch": 32.64, "learning_rate": 3.368475013894035e-05, "loss": 2.0966, "step": 11277000 }, { "epoch": 32.64, "learning_rate": 3.368402649129308e-05, "loss": 2.0984, "step": 11277500 }, { "epoch": 32.65, "learning_rate": 3.36833028436458e-05, "loss": 2.0917, "step": 11278000 }, { "epoch": 32.65, "learning_rate": 3.368257919599852e-05, "loss": 2.0909, "step": 11278500 }, { "epoch": 32.65, "learning_rate": 3.3681855548351245e-05, "loss": 2.1118, "step": 11279000 }, { "epoch": 32.65, "learning_rate": 3.368113190070397e-05, "loss": 2.1053, "step": 11279500 }, { "epoch": 32.65, "learning_rate": 3.368040825305669e-05, "loss": 2.1065, "step": 11280000 }, { "epoch": 32.65, "learning_rate": 3.367968460540941e-05, "loss": 2.1134, "step": 11280500 }, { "epoch": 32.65, "learning_rate": 3.3678960957762133e-05, "loss": 2.0757, "step": 11281000 }, { "epoch": 32.66, "learning_rate": 3.367823875741015e-05, "loss": 2.0862, "step": 11281500 }, { "epoch": 32.66, "learning_rate": 3.367751510976288e-05, "loss": 2.113, "step": 11282000 }, { "epoch": 32.66, "learning_rate": 3.36767914621156e-05, "loss": 2.1142, "step": 11282500 }, { "epoch": 32.66, "learning_rate": 3.367606781446832e-05, "loss": 2.105, "step": 11283000 }, { "epoch": 32.66, "learning_rate": 3.367534416682105e-05, "loss": 2.1001, "step": 11283500 }, { "epoch": 32.66, "learning_rate": 3.3674620519173774e-05, "loss": 2.084, "step": 11284000 }, { "epoch": 32.66, "learning_rate": 3.3673896871526496e-05, "loss": 2.0848, "step": 11284500 }, { "epoch": 32.67, "learning_rate": 3.367317467117451e-05, "loss": 2.0942, "step": 11285000 }, { "epoch": 32.67, "learning_rate": 3.3672451023527234e-05, "loss": 2.0953, "step": 11285500 }, { "epoch": 32.67, "learning_rate": 3.3671727375879956e-05, "loss": 2.0793, "step": 11286000 }, { "epoch": 32.67, "learning_rate": 3.367100372823268e-05, "loss": 2.0688, "step": 11286500 }, { "epoch": 32.67, "learning_rate": 3.36702800805854e-05, "loss": 2.0824, "step": 11287000 }, { "epoch": 32.67, "learning_rate": 3.366955643293813e-05, "loss": 2.0989, "step": 11287500 }, { "epoch": 32.67, "learning_rate": 3.366883278529085e-05, "loss": 2.085, "step": 11288000 }, { "epoch": 32.68, "learning_rate": 3.3668109137643574e-05, "loss": 2.1048, "step": 11288500 }, { "epoch": 32.68, "learning_rate": 3.3667385489996296e-05, "loss": 2.0837, "step": 11289000 }, { "epoch": 32.68, "learning_rate": 3.366666328964431e-05, "loss": 2.1116, "step": 11289500 }, { "epoch": 32.68, "learning_rate": 3.3665939641997034e-05, "loss": 2.0819, "step": 11290000 }, { "epoch": 32.68, "learning_rate": 3.3665215994349756e-05, "loss": 2.0911, "step": 11290500 }, { "epoch": 32.68, "learning_rate": 3.3664492346702485e-05, "loss": 2.0735, "step": 11291000 }, { "epoch": 32.68, "learning_rate": 3.366376869905521e-05, "loss": 2.1221, "step": 11291500 }, { "epoch": 32.69, "learning_rate": 3.366304649870323e-05, "loss": 2.0917, "step": 11292000 }, { "epoch": 32.69, "learning_rate": 3.3662324298351245e-05, "loss": 2.0987, "step": 11292500 }, { "epoch": 32.69, "learning_rate": 3.366160065070397e-05, "loss": 2.0973, "step": 11293000 }, { "epoch": 32.69, "learning_rate": 3.366087700305669e-05, "loss": 2.1181, "step": 11293500 }, { "epoch": 32.69, "learning_rate": 3.366015335540941e-05, "loss": 2.1046, "step": 11294000 }, { "epoch": 32.69, "learning_rate": 3.365943115505743e-05, "loss": 2.1022, "step": 11294500 }, { "epoch": 32.69, "learning_rate": 3.3658707507410157e-05, "loss": 2.0805, "step": 11295000 }, { "epoch": 32.7, "learning_rate": 3.365798385976288e-05, "loss": 2.1143, "step": 11295500 }, { "epoch": 32.7, "learning_rate": 3.36572602121156e-05, "loss": 2.1168, "step": 11296000 }, { "epoch": 32.7, "learning_rate": 3.365653656446832e-05, "loss": 2.0981, "step": 11296500 }, { "epoch": 32.7, "learning_rate": 3.365581436411634e-05, "loss": 2.1093, "step": 11297000 }, { "epoch": 32.7, "learning_rate": 3.3655092163764354e-05, "loss": 2.0904, "step": 11297500 }, { "epoch": 32.7, "learning_rate": 3.365436851611708e-05, "loss": 2.1018, "step": 11298000 }, { "epoch": 32.7, "learning_rate": 3.3653644868469806e-05, "loss": 2.1027, "step": 11298500 }, { "epoch": 32.71, "learning_rate": 3.365292122082253e-05, "loss": 2.0853, "step": 11299000 }, { "epoch": 32.71, "learning_rate": 3.365219757317526e-05, "loss": 2.1083, "step": 11299500 }, { "epoch": 32.71, "learning_rate": 3.365147392552798e-05, "loss": 2.0685, "step": 11300000 }, { "epoch": 32.71, "learning_rate": 3.36507502778807e-05, "loss": 2.1188, "step": 11300500 }, { "epoch": 32.71, "learning_rate": 3.3650026630233424e-05, "loss": 2.084, "step": 11301000 }, { "epoch": 32.71, "learning_rate": 3.3649302982586146e-05, "loss": 2.0781, "step": 11301500 }, { "epoch": 32.71, "learning_rate": 3.364857933493887e-05, "loss": 2.1031, "step": 11302000 }, { "epoch": 32.72, "learning_rate": 3.364785568729159e-05, "loss": 2.1133, "step": 11302500 }, { "epoch": 32.72, "learning_rate": 3.364713203964431e-05, "loss": 2.0922, "step": 11303000 }, { "epoch": 32.72, "learning_rate": 3.3646408391997035e-05, "loss": 2.0739, "step": 11303500 }, { "epoch": 32.72, "learning_rate": 3.364568474434976e-05, "loss": 2.1121, "step": 11304000 }, { "epoch": 32.72, "learning_rate": 3.364496109670248e-05, "loss": 2.0926, "step": 11304500 }, { "epoch": 32.72, "learning_rate": 3.36442374490552e-05, "loss": 2.1013, "step": 11305000 }, { "epoch": 32.72, "learning_rate": 3.364351380140793e-05, "loss": 2.1293, "step": 11305500 }, { "epoch": 32.73, "learning_rate": 3.3642793048351246e-05, "loss": 2.12, "step": 11306000 }, { "epoch": 32.73, "learning_rate": 3.364207084799926e-05, "loss": 2.081, "step": 11306500 }, { "epoch": 32.73, "learning_rate": 3.3641347200351984e-05, "loss": 2.0862, "step": 11307000 }, { "epoch": 32.73, "learning_rate": 3.3640623552704706e-05, "loss": 2.1169, "step": 11307500 }, { "epoch": 32.73, "learning_rate": 3.363990135235273e-05, "loss": 2.0968, "step": 11308000 }, { "epoch": 32.73, "learning_rate": 3.363917770470545e-05, "loss": 2.1026, "step": 11308500 }, { "epoch": 32.73, "learning_rate": 3.363845405705817e-05, "loss": 2.0812, "step": 11309000 }, { "epoch": 32.74, "learning_rate": 3.3637730409410895e-05, "loss": 2.1379, "step": 11309500 }, { "epoch": 32.74, "learning_rate": 3.363700676176362e-05, "loss": 2.0791, "step": 11310000 }, { "epoch": 32.74, "learning_rate": 3.363628311411634e-05, "loss": 2.0931, "step": 11310500 }, { "epoch": 32.74, "learning_rate": 3.363555946646906e-05, "loss": 2.098, "step": 11311000 }, { "epoch": 32.74, "learning_rate": 3.363483726611708e-05, "loss": 2.084, "step": 11311500 }, { "epoch": 32.74, "learning_rate": 3.3634113618469806e-05, "loss": 2.0872, "step": 11312000 }, { "epoch": 32.75, "learning_rate": 3.363338997082253e-05, "loss": 2.0726, "step": 11312500 }, { "epoch": 32.75, "learning_rate": 3.363266632317525e-05, "loss": 2.1144, "step": 11313000 }, { "epoch": 32.75, "learning_rate": 3.363194267552798e-05, "loss": 2.0936, "step": 11313500 }, { "epoch": 32.75, "learning_rate": 3.3631220475175995e-05, "loss": 2.089, "step": 11314000 }, { "epoch": 32.75, "learning_rate": 3.363049682752872e-05, "loss": 2.0971, "step": 11314500 }, { "epoch": 32.75, "learning_rate": 3.362977317988144e-05, "loss": 2.0992, "step": 11315000 }, { "epoch": 32.75, "learning_rate": 3.3629052426824756e-05, "loss": 2.1014, "step": 11315500 }, { "epoch": 32.76, "learning_rate": 3.362832877917748e-05, "loss": 2.0929, "step": 11316000 }, { "epoch": 32.76, "learning_rate": 3.36276051315302e-05, "loss": 2.1029, "step": 11316500 }, { "epoch": 32.76, "learning_rate": 3.362688148388292e-05, "loss": 2.0849, "step": 11317000 }, { "epoch": 32.76, "learning_rate": 3.3626157836235645e-05, "loss": 2.102, "step": 11317500 }, { "epoch": 32.76, "learning_rate": 3.362543418858837e-05, "loss": 2.0979, "step": 11318000 }, { "epoch": 32.76, "learning_rate": 3.362471054094109e-05, "loss": 2.089, "step": 11318500 }, { "epoch": 32.76, "learning_rate": 3.362398689329381e-05, "loss": 2.0959, "step": 11319000 }, { "epoch": 32.77, "learning_rate": 3.3623263245646533e-05, "loss": 2.0978, "step": 11319500 }, { "epoch": 32.77, "learning_rate": 3.3622539597999256e-05, "loss": 2.0868, "step": 11320000 }, { "epoch": 32.77, "learning_rate": 3.3621815950351985e-05, "loss": 2.1081, "step": 11320500 }, { "epoch": 32.77, "learning_rate": 3.362109230270471e-05, "loss": 2.0985, "step": 11321000 }, { "epoch": 32.77, "learning_rate": 3.3620368655057436e-05, "loss": 2.095, "step": 11321500 }, { "epoch": 32.77, "learning_rate": 3.361964500741016e-05, "loss": 2.1272, "step": 11322000 }, { "epoch": 32.77, "learning_rate": 3.361892135976288e-05, "loss": 2.0827, "step": 11322500 }, { "epoch": 32.78, "learning_rate": 3.36181977121156e-05, "loss": 2.079, "step": 11323000 }, { "epoch": 32.78, "learning_rate": 3.361747551176362e-05, "loss": 2.1054, "step": 11323500 }, { "epoch": 32.78, "learning_rate": 3.361675186411634e-05, "loss": 2.0962, "step": 11324000 }, { "epoch": 32.78, "learning_rate": 3.361602821646906e-05, "loss": 2.1041, "step": 11324500 }, { "epoch": 32.78, "learning_rate": 3.3615304568821785e-05, "loss": 2.0852, "step": 11325000 }, { "epoch": 32.78, "learning_rate": 3.361458092117451e-05, "loss": 2.106, "step": 11325500 }, { "epoch": 32.78, "learning_rate": 3.3613857273527236e-05, "loss": 2.0935, "step": 11326000 }, { "epoch": 32.79, "learning_rate": 3.361313362587996e-05, "loss": 2.076, "step": 11326500 }, { "epoch": 32.79, "learning_rate": 3.361240997823268e-05, "loss": 2.0947, "step": 11327000 }, { "epoch": 32.79, "learning_rate": 3.36116863305854e-05, "loss": 2.0925, "step": 11327500 }, { "epoch": 32.79, "learning_rate": 3.3610962682938125e-05, "loss": 2.0888, "step": 11328000 }, { "epoch": 32.79, "learning_rate": 3.3610239035290854e-05, "loss": 2.0735, "step": 11328500 }, { "epoch": 32.79, "learning_rate": 3.360951683493887e-05, "loss": 2.0968, "step": 11329000 }, { "epoch": 32.79, "learning_rate": 3.360879318729159e-05, "loss": 2.0888, "step": 11329500 }, { "epoch": 32.8, "learning_rate": 3.3608069539644314e-05, "loss": 2.0904, "step": 11330000 }, { "epoch": 32.8, "learning_rate": 3.3607345891997036e-05, "loss": 2.1117, "step": 11330500 }, { "epoch": 32.8, "learning_rate": 3.360662369164506e-05, "loss": 2.0867, "step": 11331000 }, { "epoch": 32.8, "learning_rate": 3.360590004399778e-05, "loss": 2.1024, "step": 11331500 }, { "epoch": 32.8, "learning_rate": 3.36051763963505e-05, "loss": 2.0573, "step": 11332000 }, { "epoch": 32.8, "learning_rate": 3.3604452748703225e-05, "loss": 2.0718, "step": 11332500 }, { "epoch": 32.8, "learning_rate": 3.360373054835124e-05, "loss": 2.0827, "step": 11333000 }, { "epoch": 32.81, "learning_rate": 3.360300690070396e-05, "loss": 2.062, "step": 11333500 }, { "epoch": 32.81, "learning_rate": 3.3602283253056685e-05, "loss": 2.1088, "step": 11334000 }, { "epoch": 32.81, "learning_rate": 3.360155960540941e-05, "loss": 2.0983, "step": 11334500 }, { "epoch": 32.81, "learning_rate": 3.360083595776214e-05, "loss": 2.1146, "step": 11335000 }, { "epoch": 32.81, "learning_rate": 3.360011231011486e-05, "loss": 2.0939, "step": 11335500 }, { "epoch": 32.81, "learning_rate": 3.359938866246759e-05, "loss": 2.0852, "step": 11336000 }, { "epoch": 32.81, "learning_rate": 3.359866501482031e-05, "loss": 2.1146, "step": 11336500 }, { "epoch": 32.82, "learning_rate": 3.359794136717303e-05, "loss": 2.0949, "step": 11337000 }, { "epoch": 32.82, "learning_rate": 3.3597217719525755e-05, "loss": 2.0887, "step": 11337500 }, { "epoch": 32.82, "learning_rate": 3.359649407187848e-05, "loss": 2.0961, "step": 11338000 }, { "epoch": 32.82, "learning_rate": 3.359577187152649e-05, "loss": 2.1032, "step": 11338500 }, { "epoch": 32.82, "learning_rate": 3.3595048223879215e-05, "loss": 2.1102, "step": 11339000 }, { "epoch": 32.82, "learning_rate": 3.359432457623194e-05, "loss": 2.1022, "step": 11339500 }, { "epoch": 32.82, "learning_rate": 3.359360092858466e-05, "loss": 2.1093, "step": 11340000 }, { "epoch": 32.83, "learning_rate": 3.359287728093739e-05, "loss": 2.0867, "step": 11340500 }, { "epoch": 32.83, "learning_rate": 3.359215363329011e-05, "loss": 2.0863, "step": 11341000 }, { "epoch": 32.83, "learning_rate": 3.3591431432938126e-05, "loss": 2.1054, "step": 11341500 }, { "epoch": 32.83, "learning_rate": 3.359070778529085e-05, "loss": 2.0876, "step": 11342000 }, { "epoch": 32.83, "learning_rate": 3.358998413764357e-05, "loss": 2.0917, "step": 11342500 }, { "epoch": 32.83, "learning_rate": 3.35892604899963e-05, "loss": 2.0925, "step": 11343000 }, { "epoch": 32.83, "learning_rate": 3.358853684234902e-05, "loss": 2.109, "step": 11343500 }, { "epoch": 32.84, "learning_rate": 3.3587813194701744e-05, "loss": 2.1015, "step": 11344000 }, { "epoch": 32.84, "learning_rate": 3.3587089547054466e-05, "loss": 2.1005, "step": 11344500 }, { "epoch": 32.84, "learning_rate": 3.358636589940719e-05, "loss": 2.0668, "step": 11345000 }, { "epoch": 32.84, "learning_rate": 3.358564369905521e-05, "loss": 2.1128, "step": 11345500 }, { "epoch": 32.84, "learning_rate": 3.358492005140793e-05, "loss": 2.1104, "step": 11346000 }, { "epoch": 32.84, "learning_rate": 3.3584196403760655e-05, "loss": 2.0704, "step": 11346500 }, { "epoch": 32.84, "learning_rate": 3.358347275611338e-05, "loss": 2.1089, "step": 11347000 }, { "epoch": 32.85, "learning_rate": 3.35827491084661e-05, "loss": 2.1091, "step": 11347500 }, { "epoch": 32.85, "learning_rate": 3.358202546081882e-05, "loss": 2.0904, "step": 11348000 }, { "epoch": 32.85, "learning_rate": 3.3581301813171544e-05, "loss": 2.1002, "step": 11348500 }, { "epoch": 32.85, "learning_rate": 3.3580578165524266e-05, "loss": 2.0919, "step": 11349000 }, { "epoch": 32.85, "learning_rate": 3.357985451787699e-05, "loss": 2.1037, "step": 11349500 }, { "epoch": 32.85, "learning_rate": 3.357913087022971e-05, "loss": 2.0985, "step": 11350000 }, { "epoch": 32.86, "learning_rate": 3.357840866987774e-05, "loss": 2.097, "step": 11350500 }, { "epoch": 32.86, "learning_rate": 3.3577686469525755e-05, "loss": 2.1152, "step": 11351000 }, { "epoch": 32.86, "learning_rate": 3.357696282187848e-05, "loss": 2.1039, "step": 11351500 }, { "epoch": 32.86, "learning_rate": 3.35762391742312e-05, "loss": 2.0926, "step": 11352000 }, { "epoch": 32.86, "learning_rate": 3.357551552658392e-05, "loss": 2.0684, "step": 11352500 }, { "epoch": 32.86, "learning_rate": 3.3574791878936644e-05, "loss": 2.0872, "step": 11353000 }, { "epoch": 32.86, "learning_rate": 3.3574068231289367e-05, "loss": 2.0898, "step": 11353500 }, { "epoch": 32.87, "learning_rate": 3.357334458364209e-05, "loss": 2.0997, "step": 11354000 }, { "epoch": 32.87, "learning_rate": 3.357262093599481e-05, "loss": 2.1043, "step": 11354500 }, { "epoch": 32.87, "learning_rate": 3.357189728834754e-05, "loss": 2.1159, "step": 11355000 }, { "epoch": 32.87, "learning_rate": 3.357117364070026e-05, "loss": 2.0984, "step": 11355500 }, { "epoch": 32.87, "learning_rate": 3.3570449993052984e-05, "loss": 2.1179, "step": 11356000 }, { "epoch": 32.87, "learning_rate": 3.356972634540571e-05, "loss": 2.1214, "step": 11356500 }, { "epoch": 32.87, "learning_rate": 3.356900414505372e-05, "loss": 2.0721, "step": 11357000 }, { "epoch": 32.88, "learning_rate": 3.3568280497406445e-05, "loss": 2.1199, "step": 11357500 }, { "epoch": 32.88, "learning_rate": 3.3567556849759174e-05, "loss": 2.0843, "step": 11358000 }, { "epoch": 32.88, "learning_rate": 3.3566833202111896e-05, "loss": 2.0991, "step": 11358500 }, { "epoch": 32.88, "learning_rate": 3.356610955446462e-05, "loss": 2.1026, "step": 11359000 }, { "epoch": 32.88, "learning_rate": 3.356538590681734e-05, "loss": 2.0788, "step": 11359500 }, { "epoch": 32.88, "learning_rate": 3.356466370646536e-05, "loss": 2.1061, "step": 11360000 }, { "epoch": 32.88, "learning_rate": 3.3563940058818085e-05, "loss": 2.0739, "step": 11360500 }, { "epoch": 32.89, "learning_rate": 3.356321641117081e-05, "loss": 2.0938, "step": 11361000 }, { "epoch": 32.89, "learning_rate": 3.356249276352353e-05, "loss": 2.1041, "step": 11361500 }, { "epoch": 32.89, "learning_rate": 3.356177201046684e-05, "loss": 2.096, "step": 11362000 }, { "epoch": 32.89, "learning_rate": 3.356104836281957e-05, "loss": 2.0736, "step": 11362500 }, { "epoch": 32.89, "learning_rate": 3.356032471517229e-05, "loss": 2.0807, "step": 11363000 }, { "epoch": 32.89, "learning_rate": 3.355960106752501e-05, "loss": 2.087, "step": 11363500 }, { "epoch": 32.89, "learning_rate": 3.355887886717303e-05, "loss": 2.1138, "step": 11364000 }, { "epoch": 32.9, "learning_rate": 3.355815521952575e-05, "loss": 2.1079, "step": 11364500 }, { "epoch": 32.9, "learning_rate": 3.355743157187847e-05, "loss": 2.083, "step": 11365000 }, { "epoch": 32.9, "learning_rate": 3.35567079242312e-05, "loss": 2.1071, "step": 11365500 }, { "epoch": 32.9, "learning_rate": 3.3555985723879216e-05, "loss": 2.1076, "step": 11366000 }, { "epoch": 32.9, "learning_rate": 3.355526207623194e-05, "loss": 2.084, "step": 11366500 }, { "epoch": 32.9, "learning_rate": 3.355453842858467e-05, "loss": 2.0993, "step": 11367000 }, { "epoch": 32.9, "learning_rate": 3.355381478093739e-05, "loss": 2.1086, "step": 11367500 }, { "epoch": 32.91, "learning_rate": 3.355309113329011e-05, "loss": 2.0906, "step": 11368000 }, { "epoch": 32.91, "learning_rate": 3.3552367485642834e-05, "loss": 2.1024, "step": 11368500 }, { "epoch": 32.91, "learning_rate": 3.3551643837995556e-05, "loss": 2.0882, "step": 11369000 }, { "epoch": 32.91, "learning_rate": 3.355092019034828e-05, "loss": 2.0854, "step": 11369500 }, { "epoch": 32.91, "learning_rate": 3.3550197989996294e-05, "loss": 2.0926, "step": 11370000 }, { "epoch": 32.91, "learning_rate": 3.3549474342349016e-05, "loss": 2.1039, "step": 11370500 }, { "epoch": 32.91, "learning_rate": 3.354875069470174e-05, "loss": 2.1137, "step": 11371000 }, { "epoch": 32.92, "learning_rate": 3.354802849434976e-05, "loss": 2.0968, "step": 11371500 }, { "epoch": 32.92, "learning_rate": 3.354730484670248e-05, "loss": 2.0875, "step": 11372000 }, { "epoch": 32.92, "learning_rate": 3.3546581199055205e-05, "loss": 2.0789, "step": 11372500 }, { "epoch": 32.92, "learning_rate": 3.354585899870322e-05, "loss": 2.1142, "step": 11373000 }, { "epoch": 32.92, "learning_rate": 3.354513535105595e-05, "loss": 2.0667, "step": 11373500 }, { "epoch": 32.92, "learning_rate": 3.354441170340867e-05, "loss": 2.1118, "step": 11374000 }, { "epoch": 32.92, "learning_rate": 3.3543688055761394e-05, "loss": 2.0949, "step": 11374500 }, { "epoch": 32.93, "learning_rate": 3.354296440811412e-05, "loss": 2.1049, "step": 11375000 }, { "epoch": 32.93, "learning_rate": 3.354224076046684e-05, "loss": 2.1126, "step": 11375500 }, { "epoch": 32.93, "learning_rate": 3.354151711281957e-05, "loss": 2.1137, "step": 11376000 }, { "epoch": 32.93, "learning_rate": 3.354079346517229e-05, "loss": 2.0933, "step": 11376500 }, { "epoch": 32.93, "learning_rate": 3.354006981752501e-05, "loss": 2.0885, "step": 11377000 }, { "epoch": 32.93, "learning_rate": 3.3539346169877735e-05, "loss": 2.0898, "step": 11377500 }, { "epoch": 32.93, "learning_rate": 3.353862252223046e-05, "loss": 2.1123, "step": 11378000 }, { "epoch": 32.94, "learning_rate": 3.353789887458318e-05, "loss": 2.0843, "step": 11378500 }, { "epoch": 32.94, "learning_rate": 3.35371752269359e-05, "loss": 2.0734, "step": 11379000 }, { "epoch": 32.94, "learning_rate": 3.3536451579288624e-05, "loss": 2.1219, "step": 11379500 }, { "epoch": 32.94, "learning_rate": 3.3535727931641346e-05, "loss": 2.1021, "step": 11380000 }, { "epoch": 32.94, "learning_rate": 3.3535004283994075e-05, "loss": 2.1185, "step": 11380500 }, { "epoch": 32.94, "learning_rate": 3.353428208364209e-05, "loss": 2.0792, "step": 11381000 }, { "epoch": 32.94, "learning_rate": 3.353355988329011e-05, "loss": 2.0908, "step": 11381500 }, { "epoch": 32.95, "learning_rate": 3.3532836235642835e-05, "loss": 2.0868, "step": 11382000 }, { "epoch": 32.95, "learning_rate": 3.353211403529085e-05, "loss": 2.0985, "step": 11382500 }, { "epoch": 32.95, "learning_rate": 3.353139038764357e-05, "loss": 2.0991, "step": 11383000 }, { "epoch": 32.95, "learning_rate": 3.3530666739996295e-05, "loss": 2.1102, "step": 11383500 }, { "epoch": 32.95, "learning_rate": 3.352994309234902e-05, "loss": 2.0819, "step": 11384000 }, { "epoch": 32.95, "learning_rate": 3.352921944470174e-05, "loss": 2.1002, "step": 11384500 }, { "epoch": 32.95, "learning_rate": 3.352849579705447e-05, "loss": 2.1269, "step": 11385000 }, { "epoch": 32.96, "learning_rate": 3.3527773596702484e-05, "loss": 2.0805, "step": 11385500 }, { "epoch": 32.96, "learning_rate": 3.3527049949055206e-05, "loss": 2.0919, "step": 11386000 }, { "epoch": 32.96, "learning_rate": 3.352632630140793e-05, "loss": 2.0928, "step": 11386500 }, { "epoch": 32.96, "learning_rate": 3.352560265376065e-05, "loss": 2.0823, "step": 11387000 }, { "epoch": 32.96, "learning_rate": 3.352487900611337e-05, "loss": 2.0988, "step": 11387500 }, { "epoch": 32.96, "learning_rate": 3.35241553584661e-05, "loss": 2.0692, "step": 11388000 }, { "epoch": 32.97, "learning_rate": 3.3523431710818824e-05, "loss": 2.0975, "step": 11388500 }, { "epoch": 32.97, "learning_rate": 3.3522709510466847e-05, "loss": 2.0655, "step": 11389000 }, { "epoch": 32.97, "learning_rate": 3.352198731011486e-05, "loss": 2.0752, "step": 11389500 }, { "epoch": 32.97, "learning_rate": 3.3521263662467584e-05, "loss": 2.1022, "step": 11390000 }, { "epoch": 32.97, "learning_rate": 3.3520540014820307e-05, "loss": 2.0926, "step": 11390500 }, { "epoch": 32.97, "learning_rate": 3.351981636717303e-05, "loss": 2.0916, "step": 11391000 }, { "epoch": 32.97, "learning_rate": 3.3519094166821044e-05, "loss": 2.0954, "step": 11391500 }, { "epoch": 32.98, "learning_rate": 3.3518370519173767e-05, "loss": 2.0929, "step": 11392000 }, { "epoch": 32.98, "learning_rate": 3.3517646871526496e-05, "loss": 2.1118, "step": 11392500 }, { "epoch": 32.98, "learning_rate": 3.351692322387922e-05, "loss": 2.0894, "step": 11393000 }, { "epoch": 32.98, "learning_rate": 3.351619957623194e-05, "loss": 2.0921, "step": 11393500 }, { "epoch": 32.98, "learning_rate": 3.351547592858466e-05, "loss": 2.0946, "step": 11394000 }, { "epoch": 32.98, "learning_rate": 3.351475372823268e-05, "loss": 2.1136, "step": 11394500 }, { "epoch": 32.98, "learning_rate": 3.351403152788069e-05, "loss": 2.1199, "step": 11395000 }, { "epoch": 32.99, "learning_rate": 3.3513307880233416e-05, "loss": 2.0861, "step": 11395500 }, { "epoch": 32.99, "learning_rate": 3.3512584232586145e-05, "loss": 2.1246, "step": 11396000 }, { "epoch": 32.99, "learning_rate": 3.351186203223417e-05, "loss": 2.0875, "step": 11396500 }, { "epoch": 32.99, "learning_rate": 3.351113838458689e-05, "loss": 2.1086, "step": 11397000 }, { "epoch": 32.99, "learning_rate": 3.351041473693961e-05, "loss": 2.0919, "step": 11397500 }, { "epoch": 32.99, "learning_rate": 3.3509691089292334e-05, "loss": 2.0804, "step": 11398000 }, { "epoch": 32.99, "learning_rate": 3.3508967441645056e-05, "loss": 2.1205, "step": 11398500 }, { "epoch": 33.0, "learning_rate": 3.350824379399778e-05, "loss": 2.0986, "step": 11399000 }, { "epoch": 33.0, "learning_rate": 3.35075201463505e-05, "loss": 2.093, "step": 11399500 }, { "epoch": 33.0, "learning_rate": 3.350679649870322e-05, "loss": 2.0906, "step": 11400000 }, { "epoch": 33.0, "learning_rate": 3.3506072851055945e-05, "loss": 2.1081, "step": 11400500 }, { "epoch": 33.0, "eval_accuracy": 0.6677652966372922, "eval_accuracy_mlm": 0.6326864891368785, "eval_accuracy_nsp": 0.8559715481248252, "eval_loss": 2.179738759994507, "eval_runtime": 331.543, "eval_samples_per_second": 1316.227, "eval_steps_per_second": 54.844, "step": 11400576 }, { "epoch": 33.0, "learning_rate": 3.350534920340867e-05, "loss": 2.0683, "step": 11401000 }, { "epoch": 33.0, "learning_rate": 3.3504625555761396e-05, "loss": 2.0833, "step": 11401500 }, { "epoch": 33.0, "learning_rate": 3.350390190811412e-05, "loss": 2.0711, "step": 11402000 }, { "epoch": 33.01, "learning_rate": 3.350317826046684e-05, "loss": 2.1027, "step": 11402500 }, { "epoch": 33.01, "learning_rate": 3.350245461281957e-05, "loss": 2.0772, "step": 11403000 }, { "epoch": 33.01, "learning_rate": 3.350173096517229e-05, "loss": 2.0842, "step": 11403500 }, { "epoch": 33.01, "learning_rate": 3.350100876482031e-05, "loss": 2.0676, "step": 11404000 }, { "epoch": 33.01, "learning_rate": 3.350028511717303e-05, "loss": 2.0813, "step": 11404500 }, { "epoch": 33.01, "learning_rate": 3.349956146952575e-05, "loss": 2.0817, "step": 11405000 }, { "epoch": 33.01, "learning_rate": 3.3498837821878474e-05, "loss": 2.0761, "step": 11405500 }, { "epoch": 33.02, "learning_rate": 3.3498114174231196e-05, "loss": 2.0555, "step": 11406000 }, { "epoch": 33.02, "learning_rate": 3.349739052658392e-05, "loss": 2.0629, "step": 11406500 }, { "epoch": 33.02, "learning_rate": 3.349666687893665e-05, "loss": 2.113, "step": 11407000 }, { "epoch": 33.02, "learning_rate": 3.349594323128937e-05, "loss": 2.0806, "step": 11407500 }, { "epoch": 33.02, "learning_rate": 3.3495221030937385e-05, "loss": 2.0568, "step": 11408000 }, { "epoch": 33.02, "learning_rate": 3.349449738329011e-05, "loss": 2.0908, "step": 11408500 }, { "epoch": 33.02, "learning_rate": 3.349377373564283e-05, "loss": 2.0872, "step": 11409000 }, { "epoch": 33.03, "learning_rate": 3.349305008799555e-05, "loss": 2.0867, "step": 11409500 }, { "epoch": 33.03, "learning_rate": 3.349232788764357e-05, "loss": 2.0412, "step": 11410000 }, { "epoch": 33.03, "learning_rate": 3.3491604239996297e-05, "loss": 2.0786, "step": 11410500 }, { "epoch": 33.03, "learning_rate": 3.349088059234902e-05, "loss": 2.0591, "step": 11411000 }, { "epoch": 33.03, "learning_rate": 3.349015694470175e-05, "loss": 2.1039, "step": 11411500 }, { "epoch": 33.03, "learning_rate": 3.348943329705447e-05, "loss": 2.0768, "step": 11412000 }, { "epoch": 33.03, "learning_rate": 3.348870964940719e-05, "loss": 2.092, "step": 11412500 }, { "epoch": 33.04, "learning_rate": 3.3487986001759915e-05, "loss": 2.0586, "step": 11413000 }, { "epoch": 33.04, "learning_rate": 3.348726235411264e-05, "loss": 2.0882, "step": 11413500 }, { "epoch": 33.04, "learning_rate": 3.348653870646536e-05, "loss": 2.0505, "step": 11414000 }, { "epoch": 33.04, "learning_rate": 3.348581505881808e-05, "loss": 2.054, "step": 11414500 }, { "epoch": 33.04, "learning_rate": 3.3485091411170803e-05, "loss": 2.1065, "step": 11415000 }, { "epoch": 33.04, "learning_rate": 3.348436921081882e-05, "loss": 2.0608, "step": 11415500 }, { "epoch": 33.04, "learning_rate": 3.348364556317155e-05, "loss": 2.0652, "step": 11416000 }, { "epoch": 33.05, "learning_rate": 3.348292191552427e-05, "loss": 2.0784, "step": 11416500 }, { "epoch": 33.05, "learning_rate": 3.348219826787699e-05, "loss": 2.0764, "step": 11417000 }, { "epoch": 33.05, "learning_rate": 3.348147606752501e-05, "loss": 2.1013, "step": 11417500 }, { "epoch": 33.05, "learning_rate": 3.348075241987774e-05, "loss": 2.0917, "step": 11418000 }, { "epoch": 33.05, "learning_rate": 3.348003021952575e-05, "loss": 2.0621, "step": 11418500 }, { "epoch": 33.05, "learning_rate": 3.3479306571878475e-05, "loss": 2.1033, "step": 11419000 }, { "epoch": 33.05, "learning_rate": 3.34785829242312e-05, "loss": 2.0716, "step": 11419500 }, { "epoch": 33.06, "learning_rate": 3.3477859276583926e-05, "loss": 2.0767, "step": 11420000 }, { "epoch": 33.06, "learning_rate": 3.347713562893665e-05, "loss": 2.0628, "step": 11420500 }, { "epoch": 33.06, "learning_rate": 3.3476413428584664e-05, "loss": 2.1032, "step": 11421000 }, { "epoch": 33.06, "learning_rate": 3.3475689780937386e-05, "loss": 2.0615, "step": 11421500 }, { "epoch": 33.06, "learning_rate": 3.347496613329011e-05, "loss": 2.0781, "step": 11422000 }, { "epoch": 33.06, "learning_rate": 3.347424248564283e-05, "loss": 2.0958, "step": 11422500 }, { "epoch": 33.06, "learning_rate": 3.347351883799555e-05, "loss": 2.061, "step": 11423000 }, { "epoch": 33.07, "learning_rate": 3.3472795190348275e-05, "loss": 2.0641, "step": 11423500 }, { "epoch": 33.07, "learning_rate": 3.34720729899963e-05, "loss": 2.0751, "step": 11424000 }, { "epoch": 33.07, "learning_rate": 3.347135078964431e-05, "loss": 2.0642, "step": 11424500 }, { "epoch": 33.07, "learning_rate": 3.3470627141997035e-05, "loss": 2.0978, "step": 11425000 }, { "epoch": 33.07, "learning_rate": 3.346990349434976e-05, "loss": 2.078, "step": 11425500 }, { "epoch": 33.07, "learning_rate": 3.3469179846702486e-05, "loss": 2.0577, "step": 11426000 }, { "epoch": 33.08, "learning_rate": 3.346845619905521e-05, "loss": 2.0678, "step": 11426500 }, { "epoch": 33.08, "learning_rate": 3.346773255140793e-05, "loss": 2.0769, "step": 11427000 }, { "epoch": 33.08, "learning_rate": 3.346700890376065e-05, "loss": 2.0774, "step": 11427500 }, { "epoch": 33.08, "learning_rate": 3.3466285256113375e-05, "loss": 2.0919, "step": 11428000 }, { "epoch": 33.08, "learning_rate": 3.34655616084661e-05, "loss": 2.0641, "step": 11428500 }, { "epoch": 33.08, "learning_rate": 3.346483940811412e-05, "loss": 2.0819, "step": 11429000 }, { "epoch": 33.08, "learning_rate": 3.346411576046684e-05, "loss": 2.0918, "step": 11429500 }, { "epoch": 33.09, "learning_rate": 3.3463392112819564e-05, "loss": 2.0683, "step": 11430000 }, { "epoch": 33.09, "learning_rate": 3.3462668465172287e-05, "loss": 2.0706, "step": 11430500 }, { "epoch": 33.09, "learning_rate": 3.346194481752501e-05, "loss": 2.0698, "step": 11431000 }, { "epoch": 33.09, "learning_rate": 3.346122116987773e-05, "loss": 2.0849, "step": 11431500 }, { "epoch": 33.09, "learning_rate": 3.346049752223045e-05, "loss": 2.0735, "step": 11432000 }, { "epoch": 33.09, "learning_rate": 3.3459773874583176e-05, "loss": 2.1006, "step": 11432500 }, { "epoch": 33.09, "learning_rate": 3.3459050226935905e-05, "loss": 2.0963, "step": 11433000 }, { "epoch": 33.1, "learning_rate": 3.345832802658393e-05, "loss": 2.072, "step": 11433500 }, { "epoch": 33.1, "learning_rate": 3.345760437893665e-05, "loss": 2.0803, "step": 11434000 }, { "epoch": 33.1, "learning_rate": 3.345688073128937e-05, "loss": 2.0648, "step": 11434500 }, { "epoch": 33.1, "learning_rate": 3.345615853093739e-05, "loss": 2.0695, "step": 11435000 }, { "epoch": 33.1, "learning_rate": 3.34554363305854e-05, "loss": 2.0677, "step": 11435500 }, { "epoch": 33.1, "learning_rate": 3.3454712682938125e-05, "loss": 2.0911, "step": 11436000 }, { "epoch": 33.1, "learning_rate": 3.345398903529085e-05, "loss": 2.0878, "step": 11436500 }, { "epoch": 33.11, "learning_rate": 3.3453265387643576e-05, "loss": 2.0665, "step": 11437000 }, { "epoch": 33.11, "learning_rate": 3.34525417399963e-05, "loss": 2.0942, "step": 11437500 }, { "epoch": 33.11, "learning_rate": 3.345181809234902e-05, "loss": 2.0719, "step": 11438000 }, { "epoch": 33.11, "learning_rate": 3.345109444470174e-05, "loss": 2.088, "step": 11438500 }, { "epoch": 33.11, "learning_rate": 3.3450370797054465e-05, "loss": 2.0724, "step": 11439000 }, { "epoch": 33.11, "learning_rate": 3.344964714940719e-05, "loss": 2.0826, "step": 11439500 }, { "epoch": 33.11, "learning_rate": 3.344892350175991e-05, "loss": 2.0807, "step": 11440000 }, { "epoch": 33.12, "learning_rate": 3.344819985411264e-05, "loss": 2.0644, "step": 11440500 }, { "epoch": 33.12, "learning_rate": 3.344747620646536e-05, "loss": 2.0844, "step": 11441000 }, { "epoch": 33.12, "learning_rate": 3.344675255881808e-05, "loss": 2.0972, "step": 11441500 }, { "epoch": 33.12, "learning_rate": 3.3446028911170805e-05, "loss": 2.0783, "step": 11442000 }, { "epoch": 33.12, "learning_rate": 3.344530526352353e-05, "loss": 2.0699, "step": 11442500 }, { "epoch": 33.12, "learning_rate": 3.344458161587625e-05, "loss": 2.0467, "step": 11443000 }, { "epoch": 33.12, "learning_rate": 3.344385941552427e-05, "loss": 2.055, "step": 11443500 }, { "epoch": 33.13, "learning_rate": 3.344313721517229e-05, "loss": 2.0756, "step": 11444000 }, { "epoch": 33.13, "learning_rate": 3.344241356752501e-05, "loss": 2.1077, "step": 11444500 }, { "epoch": 33.13, "learning_rate": 3.344168991987773e-05, "loss": 2.0813, "step": 11445000 }, { "epoch": 33.13, "learning_rate": 3.3440966272230454e-05, "loss": 2.0583, "step": 11445500 }, { "epoch": 33.13, "learning_rate": 3.3440242624583176e-05, "loss": 2.0638, "step": 11446000 }, { "epoch": 33.13, "learning_rate": 3.34395189769359e-05, "loss": 2.0927, "step": 11446500 }, { "epoch": 33.13, "learning_rate": 3.343879532928863e-05, "loss": 2.06, "step": 11447000 }, { "epoch": 33.14, "learning_rate": 3.343807168164135e-05, "loss": 2.07, "step": 11447500 }, { "epoch": 33.14, "learning_rate": 3.343734948128937e-05, "loss": 2.0928, "step": 11448000 }, { "epoch": 33.14, "learning_rate": 3.3436625833642094e-05, "loss": 2.0777, "step": 11448500 }, { "epoch": 33.14, "learning_rate": 3.3435902185994817e-05, "loss": 2.0747, "step": 11449000 }, { "epoch": 33.14, "learning_rate": 3.343517998564283e-05, "loss": 2.096, "step": 11449500 }, { "epoch": 33.14, "learning_rate": 3.3434456337995554e-05, "loss": 2.0788, "step": 11450000 }, { "epoch": 33.14, "learning_rate": 3.343373269034828e-05, "loss": 2.0896, "step": 11450500 }, { "epoch": 33.15, "learning_rate": 3.3433009042701006e-05, "loss": 2.0989, "step": 11451000 }, { "epoch": 33.15, "learning_rate": 3.343228684234902e-05, "loss": 2.0513, "step": 11451500 }, { "epoch": 33.15, "learning_rate": 3.343156464199704e-05, "loss": 2.0647, "step": 11452000 }, { "epoch": 33.15, "learning_rate": 3.343084099434976e-05, "loss": 2.0896, "step": 11452500 }, { "epoch": 33.15, "learning_rate": 3.343011734670248e-05, "loss": 2.0622, "step": 11453000 }, { "epoch": 33.15, "learning_rate": 3.3429393699055203e-05, "loss": 2.0936, "step": 11453500 }, { "epoch": 33.15, "learning_rate": 3.3428670051407926e-05, "loss": 2.123, "step": 11454000 }, { "epoch": 33.16, "learning_rate": 3.3427946403760655e-05, "loss": 2.093, "step": 11454500 }, { "epoch": 33.16, "learning_rate": 3.342722275611338e-05, "loss": 2.0935, "step": 11455000 }, { "epoch": 33.16, "learning_rate": 3.3426499108466106e-05, "loss": 2.0646, "step": 11455500 }, { "epoch": 33.16, "learning_rate": 3.342577690811412e-05, "loss": 2.0842, "step": 11456000 }, { "epoch": 33.16, "learning_rate": 3.3425053260466844e-05, "loss": 2.0923, "step": 11456500 }, { "epoch": 33.16, "learning_rate": 3.3424329612819566e-05, "loss": 2.0735, "step": 11457000 }, { "epoch": 33.16, "learning_rate": 3.342360596517229e-05, "loss": 2.0636, "step": 11457500 }, { "epoch": 33.17, "learning_rate": 3.342288231752501e-05, "loss": 2.0603, "step": 11458000 }, { "epoch": 33.17, "learning_rate": 3.342215866987773e-05, "loss": 2.0746, "step": 11458500 }, { "epoch": 33.17, "learning_rate": 3.3421435022230455e-05, "loss": 2.0655, "step": 11459000 }, { "epoch": 33.17, "learning_rate": 3.342071137458318e-05, "loss": 2.0839, "step": 11459500 }, { "epoch": 33.17, "learning_rate": 3.34199891742312e-05, "loss": 2.0877, "step": 11460000 }, { "epoch": 33.17, "learning_rate": 3.341926552658392e-05, "loss": 2.0788, "step": 11460500 }, { "epoch": 33.17, "learning_rate": 3.3418541878936644e-05, "loss": 2.086, "step": 11461000 }, { "epoch": 33.18, "learning_rate": 3.3417818231289366e-05, "loss": 2.1039, "step": 11461500 }, { "epoch": 33.18, "learning_rate": 3.341709603093738e-05, "loss": 2.0772, "step": 11462000 }, { "epoch": 33.18, "learning_rate": 3.3416372383290104e-05, "loss": 2.081, "step": 11462500 }, { "epoch": 33.18, "learning_rate": 3.341564873564283e-05, "loss": 2.0633, "step": 11463000 }, { "epoch": 33.18, "learning_rate": 3.3414925087995555e-05, "loss": 2.0848, "step": 11463500 }, { "epoch": 33.18, "learning_rate": 3.341420144034828e-05, "loss": 2.0765, "step": 11464000 }, { "epoch": 33.19, "learning_rate": 3.3413477792701006e-05, "loss": 2.079, "step": 11464500 }, { "epoch": 33.19, "learning_rate": 3.341275414505373e-05, "loss": 2.0342, "step": 11465000 }, { "epoch": 33.19, "learning_rate": 3.341203049740645e-05, "loss": 2.0952, "step": 11465500 }, { "epoch": 33.19, "learning_rate": 3.341130684975917e-05, "loss": 2.0717, "step": 11466000 }, { "epoch": 33.19, "learning_rate": 3.3410583202111895e-05, "loss": 2.0851, "step": 11466500 }, { "epoch": 33.19, "learning_rate": 3.340985955446462e-05, "loss": 2.0834, "step": 11467000 }, { "epoch": 33.19, "learning_rate": 3.340913590681734e-05, "loss": 2.0978, "step": 11467500 }, { "epoch": 33.2, "learning_rate": 3.340841225917006e-05, "loss": 2.0766, "step": 11468000 }, { "epoch": 33.2, "learning_rate": 3.3407688611522784e-05, "loss": 2.077, "step": 11468500 }, { "epoch": 33.2, "learning_rate": 3.3406964963875507e-05, "loss": 2.0923, "step": 11469000 }, { "epoch": 33.2, "learning_rate": 3.340624131622823e-05, "loss": 2.0699, "step": 11469500 }, { "epoch": 33.2, "learning_rate": 3.340551911587626e-05, "loss": 2.0861, "step": 11470000 }, { "epoch": 33.2, "learning_rate": 3.340479546822898e-05, "loss": 2.0851, "step": 11470500 }, { "epoch": 33.2, "learning_rate": 3.34040718205817e-05, "loss": 2.0693, "step": 11471000 }, { "epoch": 33.21, "learning_rate": 3.340334962022972e-05, "loss": 2.0764, "step": 11471500 }, { "epoch": 33.21, "learning_rate": 3.340262597258244e-05, "loss": 2.0567, "step": 11472000 }, { "epoch": 33.21, "learning_rate": 3.340190232493516e-05, "loss": 2.0875, "step": 11472500 }, { "epoch": 33.21, "learning_rate": 3.3401178677287885e-05, "loss": 2.0741, "step": 11473000 }, { "epoch": 33.21, "learning_rate": 3.340045502964061e-05, "loss": 2.0924, "step": 11473500 }, { "epoch": 33.21, "learning_rate": 3.339973282928863e-05, "loss": 2.084, "step": 11474000 }, { "epoch": 33.21, "learning_rate": 3.339900918164135e-05, "loss": 2.0798, "step": 11474500 }, { "epoch": 33.22, "learning_rate": 3.3398285533994074e-05, "loss": 2.0763, "step": 11475000 }, { "epoch": 33.22, "learning_rate": 3.3397561886346796e-05, "loss": 2.1009, "step": 11475500 }, { "epoch": 33.22, "learning_rate": 3.339683823869952e-05, "loss": 2.0899, "step": 11476000 }, { "epoch": 33.22, "learning_rate": 3.3396116038347534e-05, "loss": 2.0763, "step": 11476500 }, { "epoch": 33.22, "learning_rate": 3.3395392390700256e-05, "loss": 2.0953, "step": 11477000 }, { "epoch": 33.22, "learning_rate": 3.339466874305298e-05, "loss": 2.0838, "step": 11477500 }, { "epoch": 33.22, "learning_rate": 3.339394509540571e-05, "loss": 2.0802, "step": 11478000 }, { "epoch": 33.23, "learning_rate": 3.339322144775843e-05, "loss": 2.0884, "step": 11478500 }, { "epoch": 33.23, "learning_rate": 3.339249780011116e-05, "loss": 2.081, "step": 11479000 }, { "epoch": 33.23, "learning_rate": 3.3391775599759174e-05, "loss": 2.077, "step": 11479500 }, { "epoch": 33.23, "learning_rate": 3.3391051952111896e-05, "loss": 2.0723, "step": 11480000 }, { "epoch": 33.23, "learning_rate": 3.339032830446462e-05, "loss": 2.0806, "step": 11480500 }, { "epoch": 33.23, "learning_rate": 3.338960465681734e-05, "loss": 2.0627, "step": 11481000 }, { "epoch": 33.23, "learning_rate": 3.338888100917006e-05, "loss": 2.1063, "step": 11481500 }, { "epoch": 33.24, "learning_rate": 3.3388157361522785e-05, "loss": 2.0832, "step": 11482000 }, { "epoch": 33.24, "learning_rate": 3.338743371387551e-05, "loss": 2.0894, "step": 11482500 }, { "epoch": 33.24, "learning_rate": 3.338671006622823e-05, "loss": 2.0948, "step": 11483000 }, { "epoch": 33.24, "learning_rate": 3.338598786587625e-05, "loss": 2.0817, "step": 11483500 }, { "epoch": 33.24, "learning_rate": 3.3385264218228974e-05, "loss": 2.1093, "step": 11484000 }, { "epoch": 33.24, "learning_rate": 3.3384540570581696e-05, "loss": 2.1173, "step": 11484500 }, { "epoch": 33.24, "learning_rate": 3.338381837022971e-05, "loss": 2.0922, "step": 11485000 }, { "epoch": 33.25, "learning_rate": 3.338309472258244e-05, "loss": 2.0476, "step": 11485500 }, { "epoch": 33.25, "learning_rate": 3.338237107493516e-05, "loss": 2.0984, "step": 11486000 }, { "epoch": 33.25, "learning_rate": 3.3381647427287885e-05, "loss": 2.1033, "step": 11486500 }, { "epoch": 33.25, "learning_rate": 3.338092522693591e-05, "loss": 2.0707, "step": 11487000 }, { "epoch": 33.25, "learning_rate": 3.338020157928863e-05, "loss": 2.0766, "step": 11487500 }, { "epoch": 33.25, "learning_rate": 3.337947793164135e-05, "loss": 2.0936, "step": 11488000 }, { "epoch": 33.25, "learning_rate": 3.3378754283994074e-05, "loss": 2.0636, "step": 11488500 }, { "epoch": 33.26, "learning_rate": 3.337803208364209e-05, "loss": 2.0943, "step": 11489000 }, { "epoch": 33.26, "learning_rate": 3.337730843599481e-05, "loss": 2.0804, "step": 11489500 }, { "epoch": 33.26, "learning_rate": 3.3376586235642835e-05, "loss": 2.0611, "step": 11490000 }, { "epoch": 33.26, "learning_rate": 3.337586258799556e-05, "loss": 2.0696, "step": 11490500 }, { "epoch": 33.26, "learning_rate": 3.337514038764357e-05, "loss": 2.0771, "step": 11491000 }, { "epoch": 33.26, "learning_rate": 3.3374416739996295e-05, "loss": 2.0766, "step": 11491500 }, { "epoch": 33.26, "learning_rate": 3.337369309234902e-05, "loss": 2.0777, "step": 11492000 }, { "epoch": 33.27, "learning_rate": 3.337296944470174e-05, "loss": 2.095, "step": 11492500 }, { "epoch": 33.27, "learning_rate": 3.337224579705447e-05, "loss": 2.0916, "step": 11493000 }, { "epoch": 33.27, "learning_rate": 3.337152214940719e-05, "loss": 2.0849, "step": 11493500 }, { "epoch": 33.27, "learning_rate": 3.337079850175991e-05, "loss": 2.0765, "step": 11494000 }, { "epoch": 33.27, "learning_rate": 3.3370074854112635e-05, "loss": 2.076, "step": 11494500 }, { "epoch": 33.27, "learning_rate": 3.336935120646536e-05, "loss": 2.0734, "step": 11495000 }, { "epoch": 33.27, "learning_rate": 3.3368627558818086e-05, "loss": 2.0969, "step": 11495500 }, { "epoch": 33.28, "learning_rate": 3.336790391117081e-05, "loss": 2.0708, "step": 11496000 }, { "epoch": 33.28, "learning_rate": 3.336718026352353e-05, "loss": 2.0842, "step": 11496500 }, { "epoch": 33.28, "learning_rate": 3.336645661587625e-05, "loss": 2.0752, "step": 11497000 }, { "epoch": 33.28, "learning_rate": 3.336573441552427e-05, "loss": 2.0887, "step": 11497500 }, { "epoch": 33.28, "learning_rate": 3.336501076787699e-05, "loss": 2.0941, "step": 11498000 }, { "epoch": 33.28, "learning_rate": 3.336428712022971e-05, "loss": 2.0907, "step": 11498500 }, { "epoch": 33.28, "learning_rate": 3.3363563472582435e-05, "loss": 2.0685, "step": 11499000 }, { "epoch": 33.29, "learning_rate": 3.336283982493516e-05, "loss": 2.0836, "step": 11499500 }, { "epoch": 33.29, "learning_rate": 3.3362116177287886e-05, "loss": 2.1096, "step": 11500000 }, { "epoch": 33.29, "learning_rate": 3.336139252964061e-05, "loss": 2.0907, "step": 11500500 }, { "epoch": 33.29, "learning_rate": 3.336066888199334e-05, "loss": 2.1103, "step": 11501000 }, { "epoch": 33.29, "learning_rate": 3.335994523434606e-05, "loss": 2.0673, "step": 11501500 }, { "epoch": 33.29, "learning_rate": 3.335922158669878e-05, "loss": 2.0736, "step": 11502000 }, { "epoch": 33.3, "learning_rate": 3.3358497939051504e-05, "loss": 2.0711, "step": 11502500 }, { "epoch": 33.3, "learning_rate": 3.335777573869952e-05, "loss": 2.1109, "step": 11503000 }, { "epoch": 33.3, "learning_rate": 3.335705209105224e-05, "loss": 2.0901, "step": 11503500 }, { "epoch": 33.3, "learning_rate": 3.3356328443404964e-05, "loss": 2.0703, "step": 11504000 }, { "epoch": 33.3, "learning_rate": 3.3355604795757686e-05, "loss": 2.0939, "step": 11504500 }, { "epoch": 33.3, "learning_rate": 3.335488114811041e-05, "loss": 2.0705, "step": 11505000 }, { "epoch": 33.3, "learning_rate": 3.335415750046314e-05, "loss": 2.1021, "step": 11505500 }, { "epoch": 33.31, "learning_rate": 3.335343530011115e-05, "loss": 2.0933, "step": 11506000 }, { "epoch": 33.31, "learning_rate": 3.335271309975917e-05, "loss": 2.0714, "step": 11506500 }, { "epoch": 33.31, "learning_rate": 3.335198945211189e-05, "loss": 2.0749, "step": 11507000 }, { "epoch": 33.31, "learning_rate": 3.335126580446461e-05, "loss": 2.095, "step": 11507500 }, { "epoch": 33.31, "learning_rate": 3.335054215681734e-05, "loss": 2.0777, "step": 11508000 }, { "epoch": 33.31, "learning_rate": 3.3349818509170064e-05, "loss": 2.0662, "step": 11508500 }, { "epoch": 33.31, "learning_rate": 3.334909486152279e-05, "loss": 2.0801, "step": 11509000 }, { "epoch": 33.32, "learning_rate": 3.334837121387551e-05, "loss": 2.0928, "step": 11509500 }, { "epoch": 33.32, "learning_rate": 3.334764756622824e-05, "loss": 2.0737, "step": 11510000 }, { "epoch": 33.32, "learning_rate": 3.334692391858096e-05, "loss": 2.0943, "step": 11510500 }, { "epoch": 33.32, "learning_rate": 3.334620027093368e-05, "loss": 2.0996, "step": 11511000 }, { "epoch": 33.32, "learning_rate": 3.3345476623286405e-05, "loss": 2.0876, "step": 11511500 }, { "epoch": 33.32, "learning_rate": 3.334475297563913e-05, "loss": 2.0753, "step": 11512000 }, { "epoch": 33.32, "learning_rate": 3.334402932799185e-05, "loss": 2.0833, "step": 11512500 }, { "epoch": 33.33, "learning_rate": 3.334330568034457e-05, "loss": 2.0791, "step": 11513000 }, { "epoch": 33.33, "learning_rate": 3.3342582032697294e-05, "loss": 2.0813, "step": 11513500 }, { "epoch": 33.33, "learning_rate": 3.334185983234531e-05, "loss": 2.0702, "step": 11514000 }, { "epoch": 33.33, "learning_rate": 3.334113618469804e-05, "loss": 2.0834, "step": 11514500 }, { "epoch": 33.33, "learning_rate": 3.334041253705076e-05, "loss": 2.063, "step": 11515000 }, { "epoch": 33.33, "learning_rate": 3.333968888940349e-05, "loss": 2.0778, "step": 11515500 }, { "epoch": 33.33, "learning_rate": 3.333896524175621e-05, "loss": 2.0858, "step": 11516000 }, { "epoch": 33.34, "learning_rate": 3.3338241594108934e-05, "loss": 2.0836, "step": 11516500 }, { "epoch": 33.34, "learning_rate": 3.3337517946461656e-05, "loss": 2.094, "step": 11517000 }, { "epoch": 33.34, "learning_rate": 3.333679574610967e-05, "loss": 2.1021, "step": 11517500 }, { "epoch": 33.34, "learning_rate": 3.333607354575769e-05, "loss": 2.0933, "step": 11518000 }, { "epoch": 33.34, "learning_rate": 3.3335349898110416e-05, "loss": 2.1025, "step": 11518500 }, { "epoch": 33.34, "learning_rate": 3.333462769775843e-05, "loss": 2.098, "step": 11519000 }, { "epoch": 33.34, "learning_rate": 3.3333904050111154e-05, "loss": 2.0757, "step": 11519500 }, { "epoch": 33.35, "learning_rate": 3.3333180402463876e-05, "loss": 2.0699, "step": 11520000 }, { "epoch": 33.35, "learning_rate": 3.33324567548166e-05, "loss": 2.1122, "step": 11520500 }, { "epoch": 33.35, "learning_rate": 3.333173310716932e-05, "loss": 2.0884, "step": 11521000 }, { "epoch": 33.35, "learning_rate": 3.333100945952204e-05, "loss": 2.0961, "step": 11521500 }, { "epoch": 33.35, "learning_rate": 3.3330285811874765e-05, "loss": 2.075, "step": 11522000 }, { "epoch": 33.35, "learning_rate": 3.3329562164227494e-05, "loss": 2.0751, "step": 11522500 }, { "epoch": 33.35, "learning_rate": 3.3328838516580216e-05, "loss": 2.0973, "step": 11523000 }, { "epoch": 33.36, "learning_rate": 3.332811631622824e-05, "loss": 2.0818, "step": 11523500 }, { "epoch": 33.36, "learning_rate": 3.3327394115876254e-05, "loss": 2.074, "step": 11524000 }, { "epoch": 33.36, "learning_rate": 3.332667191552427e-05, "loss": 2.0958, "step": 11524500 }, { "epoch": 33.36, "learning_rate": 3.332594826787699e-05, "loss": 2.0992, "step": 11525000 }, { "epoch": 33.36, "learning_rate": 3.3325224620229714e-05, "loss": 2.0839, "step": 11525500 }, { "epoch": 33.36, "learning_rate": 3.3324500972582437e-05, "loss": 2.0944, "step": 11526000 }, { "epoch": 33.36, "learning_rate": 3.3323777324935166e-05, "loss": 2.0777, "step": 11526500 }, { "epoch": 33.37, "learning_rate": 3.332305367728789e-05, "loss": 2.1044, "step": 11527000 }, { "epoch": 33.37, "learning_rate": 3.332233002964061e-05, "loss": 2.0879, "step": 11527500 }, { "epoch": 33.37, "learning_rate": 3.332160638199333e-05, "loss": 2.0903, "step": 11528000 }, { "epoch": 33.37, "learning_rate": 3.3320882734346054e-05, "loss": 2.095, "step": 11528500 }, { "epoch": 33.37, "learning_rate": 3.332016053399407e-05, "loss": 2.0844, "step": 11529000 }, { "epoch": 33.37, "learning_rate": 3.331943688634679e-05, "loss": 2.0696, "step": 11529500 }, { "epoch": 33.37, "learning_rate": 3.331871323869952e-05, "loss": 2.0752, "step": 11530000 }, { "epoch": 33.38, "learning_rate": 3.3317989591052244e-05, "loss": 2.0717, "step": 11530500 }, { "epoch": 33.38, "learning_rate": 3.3317265943404966e-05, "loss": 2.0931, "step": 11531000 }, { "epoch": 33.38, "learning_rate": 3.331654229575769e-05, "loss": 2.079, "step": 11531500 }, { "epoch": 33.38, "learning_rate": 3.331581864811042e-05, "loss": 2.083, "step": 11532000 }, { "epoch": 33.38, "learning_rate": 3.331509500046314e-05, "loss": 2.0682, "step": 11532500 }, { "epoch": 33.38, "learning_rate": 3.331437135281586e-05, "loss": 2.0844, "step": 11533000 }, { "epoch": 33.38, "learning_rate": 3.3313647705168584e-05, "loss": 2.0643, "step": 11533500 }, { "epoch": 33.39, "learning_rate": 3.33129255048166e-05, "loss": 2.0775, "step": 11534000 }, { "epoch": 33.39, "learning_rate": 3.331220185716932e-05, "loss": 2.0757, "step": 11534500 }, { "epoch": 33.39, "learning_rate": 3.3311478209522044e-05, "loss": 2.1054, "step": 11535000 }, { "epoch": 33.39, "learning_rate": 3.3310754561874766e-05, "loss": 2.0905, "step": 11535500 }, { "epoch": 33.39, "learning_rate": 3.331003091422749e-05, "loss": 2.0975, "step": 11536000 }, { "epoch": 33.39, "learning_rate": 3.330930726658022e-05, "loss": 2.0661, "step": 11536500 }, { "epoch": 33.39, "learning_rate": 3.330858361893294e-05, "loss": 2.0998, "step": 11537000 }, { "epoch": 33.4, "learning_rate": 3.330785997128567e-05, "loss": 2.0972, "step": 11537500 }, { "epoch": 33.4, "learning_rate": 3.330713921822898e-05, "loss": 2.1132, "step": 11538000 }, { "epoch": 33.4, "learning_rate": 3.330641846517229e-05, "loss": 2.098, "step": 11538500 }, { "epoch": 33.4, "learning_rate": 3.3305694817525015e-05, "loss": 2.0933, "step": 11539000 }, { "epoch": 33.4, "learning_rate": 3.330497116987774e-05, "loss": 2.0741, "step": 11539500 }, { "epoch": 33.4, "learning_rate": 3.330424752223046e-05, "loss": 2.0794, "step": 11540000 }, { "epoch": 33.41, "learning_rate": 3.330352387458318e-05, "loss": 2.0816, "step": 11540500 }, { "epoch": 33.41, "learning_rate": 3.3302800226935904e-05, "loss": 2.068, "step": 11541000 }, { "epoch": 33.41, "learning_rate": 3.3302076579288626e-05, "loss": 2.0877, "step": 11541500 }, { "epoch": 33.41, "learning_rate": 3.330135293164135e-05, "loss": 2.0897, "step": 11542000 }, { "epoch": 33.41, "learning_rate": 3.330062928399407e-05, "loss": 2.0861, "step": 11542500 }, { "epoch": 33.41, "learning_rate": 3.329990563634679e-05, "loss": 2.0585, "step": 11543000 }, { "epoch": 33.41, "learning_rate": 3.3299181988699515e-05, "loss": 2.0959, "step": 11543500 }, { "epoch": 33.42, "learning_rate": 3.3298458341052244e-05, "loss": 2.1084, "step": 11544000 }, { "epoch": 33.42, "learning_rate": 3.329773614070026e-05, "loss": 2.0686, "step": 11544500 }, { "epoch": 33.42, "learning_rate": 3.329701249305298e-05, "loss": 2.0929, "step": 11545000 }, { "epoch": 33.42, "learning_rate": 3.329628884540571e-05, "loss": 2.0968, "step": 11545500 }, { "epoch": 33.42, "learning_rate": 3.329556519775843e-05, "loss": 2.0719, "step": 11546000 }, { "epoch": 33.42, "learning_rate": 3.3294841550111156e-05, "loss": 2.068, "step": 11546500 }, { "epoch": 33.42, "learning_rate": 3.329411790246388e-05, "loss": 2.1199, "step": 11547000 }, { "epoch": 33.43, "learning_rate": 3.32933942548166e-05, "loss": 2.0854, "step": 11547500 }, { "epoch": 33.43, "learning_rate": 3.329267060716932e-05, "loss": 2.078, "step": 11548000 }, { "epoch": 33.43, "learning_rate": 3.3291946959522044e-05, "loss": 2.0751, "step": 11548500 }, { "epoch": 33.43, "learning_rate": 3.329122331187477e-05, "loss": 2.0871, "step": 11549000 }, { "epoch": 33.43, "learning_rate": 3.3290499664227496e-05, "loss": 2.0945, "step": 11549500 }, { "epoch": 33.43, "learning_rate": 3.328977601658022e-05, "loss": 2.1027, "step": 11550000 }, { "epoch": 33.43, "learning_rate": 3.328905236893294e-05, "loss": 2.0926, "step": 11550500 }, { "epoch": 33.44, "learning_rate": 3.328832872128566e-05, "loss": 2.1086, "step": 11551000 }, { "epoch": 33.44, "learning_rate": 3.328760652093368e-05, "loss": 2.0981, "step": 11551500 }, { "epoch": 33.44, "learning_rate": 3.32868828732864e-05, "loss": 2.0928, "step": 11552000 }, { "epoch": 33.44, "learning_rate": 3.328616067293442e-05, "loss": 2.0993, "step": 11552500 }, { "epoch": 33.44, "learning_rate": 3.3285437025287145e-05, "loss": 2.0687, "step": 11553000 }, { "epoch": 33.44, "learning_rate": 3.328471337763987e-05, "loss": 2.0729, "step": 11553500 }, { "epoch": 33.44, "learning_rate": 3.328399117728789e-05, "loss": 2.0749, "step": 11554000 }, { "epoch": 33.45, "learning_rate": 3.328326752964061e-05, "loss": 2.1239, "step": 11554500 }, { "epoch": 33.45, "learning_rate": 3.3282543881993334e-05, "loss": 2.0652, "step": 11555000 }, { "epoch": 33.45, "learning_rate": 3.3281820234346056e-05, "loss": 2.0885, "step": 11555500 }, { "epoch": 33.45, "learning_rate": 3.328109658669878e-05, "loss": 2.0694, "step": 11556000 }, { "epoch": 33.45, "learning_rate": 3.32803729390515e-05, "loss": 2.1112, "step": 11556500 }, { "epoch": 33.45, "learning_rate": 3.327964929140422e-05, "loss": 2.0858, "step": 11557000 }, { "epoch": 33.45, "learning_rate": 3.3278925643756945e-05, "loss": 2.0997, "step": 11557500 }, { "epoch": 33.46, "learning_rate": 3.327820199610967e-05, "loss": 2.0658, "step": 11558000 }, { "epoch": 33.46, "learning_rate": 3.3277478348462396e-05, "loss": 2.0909, "step": 11558500 }, { "epoch": 33.46, "learning_rate": 3.327675614811041e-05, "loss": 2.0986, "step": 11559000 }, { "epoch": 33.46, "learning_rate": 3.3276032500463134e-05, "loss": 2.0684, "step": 11559500 }, { "epoch": 33.46, "learning_rate": 3.3275310300111156e-05, "loss": 2.089, "step": 11560000 }, { "epoch": 33.46, "learning_rate": 3.327458665246388e-05, "loss": 2.0624, "step": 11560500 }, { "epoch": 33.46, "learning_rate": 3.32738630048166e-05, "loss": 2.0653, "step": 11561000 }, { "epoch": 33.47, "learning_rate": 3.327313935716932e-05, "loss": 2.0674, "step": 11561500 }, { "epoch": 33.47, "learning_rate": 3.3272415709522045e-05, "loss": 2.0904, "step": 11562000 }, { "epoch": 33.47, "learning_rate": 3.327169206187477e-05, "loss": 2.0747, "step": 11562500 }, { "epoch": 33.47, "learning_rate": 3.3270968414227497e-05, "loss": 2.1005, "step": 11563000 }, { "epoch": 33.47, "learning_rate": 3.327024476658022e-05, "loss": 2.1081, "step": 11563500 }, { "epoch": 33.47, "learning_rate": 3.326952111893294e-05, "loss": 2.0867, "step": 11564000 }, { "epoch": 33.47, "learning_rate": 3.3268798918580957e-05, "loss": 2.1002, "step": 11564500 }, { "epoch": 33.48, "learning_rate": 3.326807527093368e-05, "loss": 2.0904, "step": 11565000 }, { "epoch": 33.48, "learning_rate": 3.32673516232864e-05, "loss": 2.0602, "step": 11565500 }, { "epoch": 33.48, "learning_rate": 3.326662797563912e-05, "loss": 2.0829, "step": 11566000 }, { "epoch": 33.48, "learning_rate": 3.3265904327991845e-05, "loss": 2.1092, "step": 11566500 }, { "epoch": 33.48, "learning_rate": 3.326518068034457e-05, "loss": 2.0531, "step": 11567000 }, { "epoch": 33.48, "learning_rate": 3.32644584799926e-05, "loss": 2.0869, "step": 11567500 }, { "epoch": 33.48, "learning_rate": 3.326373483234532e-05, "loss": 2.0923, "step": 11568000 }, { "epoch": 33.49, "learning_rate": 3.3263012631993335e-05, "loss": 2.0881, "step": 11568500 }, { "epoch": 33.49, "learning_rate": 3.326228898434606e-05, "loss": 2.0997, "step": 11569000 }, { "epoch": 33.49, "learning_rate": 3.326156533669878e-05, "loss": 2.0881, "step": 11569500 }, { "epoch": 33.49, "learning_rate": 3.32608416890515e-05, "loss": 2.1037, "step": 11570000 }, { "epoch": 33.49, "learning_rate": 3.3260118041404224e-05, "loss": 2.0884, "step": 11570500 }, { "epoch": 33.49, "learning_rate": 3.3259394393756946e-05, "loss": 2.0825, "step": 11571000 }, { "epoch": 33.49, "learning_rate": 3.325867219340497e-05, "loss": 2.0976, "step": 11571500 }, { "epoch": 33.5, "learning_rate": 3.325794854575769e-05, "loss": 2.0872, "step": 11572000 }, { "epoch": 33.5, "learning_rate": 3.325722489811041e-05, "loss": 2.0889, "step": 11572500 }, { "epoch": 33.5, "learning_rate": 3.3256501250463135e-05, "loss": 2.092, "step": 11573000 }, { "epoch": 33.5, "learning_rate": 3.325577760281586e-05, "loss": 2.0732, "step": 11573500 }, { "epoch": 33.5, "learning_rate": 3.325505540246387e-05, "loss": 2.1021, "step": 11574000 }, { "epoch": 33.5, "learning_rate": 3.3254331754816595e-05, "loss": 2.0922, "step": 11574500 }, { "epoch": 33.5, "learning_rate": 3.3253608107169324e-05, "loss": 2.1004, "step": 11575000 }, { "epoch": 33.51, "learning_rate": 3.3252884459522046e-05, "loss": 2.1022, "step": 11575500 }, { "epoch": 33.51, "learning_rate": 3.325216225917007e-05, "loss": 2.0685, "step": 11576000 }, { "epoch": 33.51, "learning_rate": 3.325143861152279e-05, "loss": 2.0806, "step": 11576500 }, { "epoch": 33.51, "learning_rate": 3.325071496387551e-05, "loss": 2.0952, "step": 11577000 }, { "epoch": 33.51, "learning_rate": 3.3249991316228235e-05, "loss": 2.0781, "step": 11577500 }, { "epoch": 33.51, "learning_rate": 3.324926766858096e-05, "loss": 2.0673, "step": 11578000 }, { "epoch": 33.52, "learning_rate": 3.324854402093368e-05, "loss": 2.1041, "step": 11578500 }, { "epoch": 33.52, "learning_rate": 3.32478203732864e-05, "loss": 2.0995, "step": 11579000 }, { "epoch": 33.52, "learning_rate": 3.3247096725639124e-05, "loss": 2.098, "step": 11579500 }, { "epoch": 33.52, "learning_rate": 3.3246373077991846e-05, "loss": 2.0748, "step": 11580000 }, { "epoch": 33.52, "learning_rate": 3.3245649430344575e-05, "loss": 2.0739, "step": 11580500 }, { "epoch": 33.52, "learning_rate": 3.324492722999259e-05, "loss": 2.0889, "step": 11581000 }, { "epoch": 33.52, "learning_rate": 3.324420358234531e-05, "loss": 2.1168, "step": 11581500 }, { "epoch": 33.53, "learning_rate": 3.3243479934698035e-05, "loss": 2.0992, "step": 11582000 }, { "epoch": 33.53, "learning_rate": 3.3242756287050764e-05, "loss": 2.1035, "step": 11582500 }, { "epoch": 33.53, "learning_rate": 3.3242032639403487e-05, "loss": 2.0847, "step": 11583000 }, { "epoch": 33.53, "learning_rate": 3.32413104390515e-05, "loss": 2.0796, "step": 11583500 }, { "epoch": 33.53, "learning_rate": 3.3240588238699524e-05, "loss": 2.0792, "step": 11584000 }, { "epoch": 33.53, "learning_rate": 3.323986459105225e-05, "loss": 2.1101, "step": 11584500 }, { "epoch": 33.53, "learning_rate": 3.323914094340497e-05, "loss": 2.094, "step": 11585000 }, { "epoch": 33.54, "learning_rate": 3.3238418743052984e-05, "loss": 2.0644, "step": 11585500 }, { "epoch": 33.54, "learning_rate": 3.323769509540571e-05, "loss": 2.0967, "step": 11586000 }, { "epoch": 33.54, "learning_rate": 3.323697144775843e-05, "loss": 2.1022, "step": 11586500 }, { "epoch": 33.54, "learning_rate": 3.323624780011115e-05, "loss": 2.0837, "step": 11587000 }, { "epoch": 33.54, "learning_rate": 3.3235524152463873e-05, "loss": 2.1079, "step": 11587500 }, { "epoch": 33.54, "learning_rate": 3.3234800504816596e-05, "loss": 2.0808, "step": 11588000 }, { "epoch": 33.54, "learning_rate": 3.3234076857169325e-05, "loss": 2.0879, "step": 11588500 }, { "epoch": 33.55, "learning_rate": 3.323335320952205e-05, "loss": 2.084, "step": 11589000 }, { "epoch": 33.55, "learning_rate": 3.323262956187477e-05, "loss": 2.0853, "step": 11589500 }, { "epoch": 33.55, "learning_rate": 3.323190736152279e-05, "loss": 2.0783, "step": 11590000 }, { "epoch": 33.55, "learning_rate": 3.3231183713875514e-05, "loss": 2.0802, "step": 11590500 }, { "epoch": 33.55, "learning_rate": 3.3230460066228236e-05, "loss": 2.09, "step": 11591000 }, { "epoch": 33.55, "learning_rate": 3.322973641858096e-05, "loss": 2.0863, "step": 11591500 }, { "epoch": 33.55, "learning_rate": 3.322901277093368e-05, "loss": 2.0967, "step": 11592000 }, { "epoch": 33.56, "learning_rate": 3.32282905705817e-05, "loss": 2.0886, "step": 11592500 }, { "epoch": 33.56, "learning_rate": 3.3227566922934425e-05, "loss": 2.085, "step": 11593000 }, { "epoch": 33.56, "learning_rate": 3.322684327528715e-05, "loss": 2.063, "step": 11593500 }, { "epoch": 33.56, "learning_rate": 3.322611962763987e-05, "loss": 2.0998, "step": 11594000 }, { "epoch": 33.56, "learning_rate": 3.322539597999259e-05, "loss": 2.0888, "step": 11594500 }, { "epoch": 33.56, "learning_rate": 3.322467377964061e-05, "loss": 2.0835, "step": 11595000 }, { "epoch": 33.56, "learning_rate": 3.322395013199333e-05, "loss": 2.0818, "step": 11595500 }, { "epoch": 33.57, "learning_rate": 3.322322648434605e-05, "loss": 2.1062, "step": 11596000 }, { "epoch": 33.57, "learning_rate": 3.3222504283994074e-05, "loss": 2.0759, "step": 11596500 }, { "epoch": 33.57, "learning_rate": 3.3221780636346796e-05, "loss": 2.0884, "step": 11597000 }, { "epoch": 33.57, "learning_rate": 3.3221056988699525e-05, "loss": 2.1077, "step": 11597500 }, { "epoch": 33.57, "learning_rate": 3.322033334105225e-05, "loss": 2.1144, "step": 11598000 }, { "epoch": 33.57, "learning_rate": 3.321960969340497e-05, "loss": 2.0901, "step": 11598500 }, { "epoch": 33.57, "learning_rate": 3.321888604575769e-05, "loss": 2.0746, "step": 11599000 }, { "epoch": 33.58, "learning_rate": 3.3218162398110414e-05, "loss": 2.0872, "step": 11599500 }, { "epoch": 33.58, "learning_rate": 3.3217438750463136e-05, "loss": 2.1001, "step": 11600000 }, { "epoch": 33.58, "learning_rate": 3.321671510281586e-05, "loss": 2.1071, "step": 11600500 }, { "epoch": 33.58, "learning_rate": 3.321599145516858e-05, "loss": 2.089, "step": 11601000 }, { "epoch": 33.58, "learning_rate": 3.32152678075213e-05, "loss": 2.1002, "step": 11601500 }, { "epoch": 33.58, "learning_rate": 3.3214544159874025e-05, "loss": 2.1014, "step": 11602000 }, { "epoch": 33.58, "learning_rate": 3.321382195952205e-05, "loss": 2.0694, "step": 11602500 }, { "epoch": 33.59, "learning_rate": 3.321309831187477e-05, "loss": 2.0773, "step": 11603000 }, { "epoch": 33.59, "learning_rate": 3.321237466422749e-05, "loss": 2.0994, "step": 11603500 }, { "epoch": 33.59, "learning_rate": 3.321165246387551e-05, "loss": 2.0688, "step": 11604000 }, { "epoch": 33.59, "learning_rate": 3.321092881622823e-05, "loss": 2.0688, "step": 11604500 }, { "epoch": 33.59, "learning_rate": 3.321020516858096e-05, "loss": 2.086, "step": 11605000 }, { "epoch": 33.59, "learning_rate": 3.320948152093368e-05, "loss": 2.1103, "step": 11605500 }, { "epoch": 33.59, "learning_rate": 3.3208757873286403e-05, "loss": 2.093, "step": 11606000 }, { "epoch": 33.6, "learning_rate": 3.3208034225639126e-05, "loss": 2.0946, "step": 11606500 }, { "epoch": 33.6, "learning_rate": 3.3207310577991855e-05, "loss": 2.0923, "step": 11607000 }, { "epoch": 33.6, "learning_rate": 3.320658693034458e-05, "loss": 2.0866, "step": 11607500 }, { "epoch": 33.6, "learning_rate": 3.32058632826973e-05, "loss": 2.1166, "step": 11608000 }, { "epoch": 33.6, "learning_rate": 3.3205141082345315e-05, "loss": 2.1265, "step": 11608500 }, { "epoch": 33.6, "learning_rate": 3.320441743469804e-05, "loss": 2.0866, "step": 11609000 }, { "epoch": 33.6, "learning_rate": 3.320369378705076e-05, "loss": 2.078, "step": 11609500 }, { "epoch": 33.61, "learning_rate": 3.320297013940348e-05, "loss": 2.0856, "step": 11610000 }, { "epoch": 33.61, "learning_rate": 3.3202246491756204e-05, "loss": 2.0681, "step": 11610500 }, { "epoch": 33.61, "learning_rate": 3.3201522844108926e-05, "loss": 2.0941, "step": 11611000 }, { "epoch": 33.61, "learning_rate": 3.3200799196461655e-05, "loss": 2.0878, "step": 11611500 }, { "epoch": 33.61, "learning_rate": 3.320007554881438e-05, "loss": 2.0854, "step": 11612000 }, { "epoch": 33.61, "learning_rate": 3.31993533484624e-05, "loss": 2.0848, "step": 11612500 }, { "epoch": 33.61, "learning_rate": 3.319862970081512e-05, "loss": 2.0728, "step": 11613000 }, { "epoch": 33.62, "learning_rate": 3.3197906053167844e-05, "loss": 2.071, "step": 11613500 }, { "epoch": 33.62, "learning_rate": 3.3197182405520566e-05, "loss": 2.0997, "step": 11614000 }, { "epoch": 33.62, "learning_rate": 3.319646020516858e-05, "loss": 2.0955, "step": 11614500 }, { "epoch": 33.62, "learning_rate": 3.3195738004816604e-05, "loss": 2.0833, "step": 11615000 }, { "epoch": 33.62, "learning_rate": 3.3195014357169326e-05, "loss": 2.0796, "step": 11615500 }, { "epoch": 33.62, "learning_rate": 3.319429070952205e-05, "loss": 2.0919, "step": 11616000 }, { "epoch": 33.63, "learning_rate": 3.319356706187477e-05, "loss": 2.0931, "step": 11616500 }, { "epoch": 33.63, "learning_rate": 3.319284341422749e-05, "loss": 2.0923, "step": 11617000 }, { "epoch": 33.63, "learning_rate": 3.3192119766580215e-05, "loss": 2.0906, "step": 11617500 }, { "epoch": 33.63, "learning_rate": 3.319139611893294e-05, "loss": 2.0889, "step": 11618000 }, { "epoch": 33.63, "learning_rate": 3.319067247128566e-05, "loss": 2.0902, "step": 11618500 }, { "epoch": 33.63, "learning_rate": 3.318994882363838e-05, "loss": 2.095, "step": 11619000 }, { "epoch": 33.63, "learning_rate": 3.3189226623286404e-05, "loss": 2.1011, "step": 11619500 }, { "epoch": 33.64, "learning_rate": 3.3188502975639126e-05, "loss": 2.0994, "step": 11620000 }, { "epoch": 33.64, "learning_rate": 3.318778222258244e-05, "loss": 2.0852, "step": 11620500 }, { "epoch": 33.64, "learning_rate": 3.3187058574935164e-05, "loss": 2.0961, "step": 11621000 }, { "epoch": 33.64, "learning_rate": 3.318633637458318e-05, "loss": 2.0978, "step": 11621500 }, { "epoch": 33.64, "learning_rate": 3.31856127269359e-05, "loss": 2.0926, "step": 11622000 }, { "epoch": 33.64, "learning_rate": 3.318488907928863e-05, "loss": 2.1234, "step": 11622500 }, { "epoch": 33.64, "learning_rate": 3.318416543164135e-05, "loss": 2.0914, "step": 11623000 }, { "epoch": 33.65, "learning_rate": 3.3183441783994076e-05, "loss": 2.0934, "step": 11623500 }, { "epoch": 33.65, "learning_rate": 3.31827181363468e-05, "loss": 2.0761, "step": 11624000 }, { "epoch": 33.65, "learning_rate": 3.318199593599481e-05, "loss": 2.1006, "step": 11624500 }, { "epoch": 33.65, "learning_rate": 3.3181272288347536e-05, "loss": 2.0661, "step": 11625000 }, { "epoch": 33.65, "learning_rate": 3.318055008799555e-05, "loss": 2.0856, "step": 11625500 }, { "epoch": 33.65, "learning_rate": 3.317982644034828e-05, "loss": 2.0716, "step": 11626000 }, { "epoch": 33.65, "learning_rate": 3.3179102792701e-05, "loss": 2.0958, "step": 11626500 }, { "epoch": 33.66, "learning_rate": 3.3178379145053725e-05, "loss": 2.1009, "step": 11627000 }, { "epoch": 33.66, "learning_rate": 3.317765549740645e-05, "loss": 2.093, "step": 11627500 }, { "epoch": 33.66, "learning_rate": 3.3176931849759176e-05, "loss": 2.0871, "step": 11628000 }, { "epoch": 33.66, "learning_rate": 3.317620964940719e-05, "loss": 2.0731, "step": 11628500 }, { "epoch": 33.66, "learning_rate": 3.3175486001759914e-05, "loss": 2.1009, "step": 11629000 }, { "epoch": 33.66, "learning_rate": 3.3174762354112636e-05, "loss": 2.0928, "step": 11629500 }, { "epoch": 33.66, "learning_rate": 3.317403870646536e-05, "loss": 2.099, "step": 11630000 }, { "epoch": 33.67, "learning_rate": 3.317331505881808e-05, "loss": 2.0959, "step": 11630500 }, { "epoch": 33.67, "learning_rate": 3.31725914111708e-05, "loss": 2.079, "step": 11631000 }, { "epoch": 33.67, "learning_rate": 3.317186776352353e-05, "loss": 2.0774, "step": 11631500 }, { "epoch": 33.67, "learning_rate": 3.3171144115876254e-05, "loss": 2.1021, "step": 11632000 }, { "epoch": 33.67, "learning_rate": 3.317042191552427e-05, "loss": 2.0902, "step": 11632500 }, { "epoch": 33.67, "learning_rate": 3.316969826787699e-05, "loss": 2.0684, "step": 11633000 }, { "epoch": 33.67, "learning_rate": 3.3168974620229714e-05, "loss": 2.1003, "step": 11633500 }, { "epoch": 33.68, "learning_rate": 3.3168250972582436e-05, "loss": 2.092, "step": 11634000 }, { "epoch": 33.68, "learning_rate": 3.316752732493516e-05, "loss": 2.0773, "step": 11634500 }, { "epoch": 33.68, "learning_rate": 3.316680367728789e-05, "loss": 2.0833, "step": 11635000 }, { "epoch": 33.68, "learning_rate": 3.316608002964061e-05, "loss": 2.1025, "step": 11635500 }, { "epoch": 33.68, "learning_rate": 3.316535638199333e-05, "loss": 2.0918, "step": 11636000 }, { "epoch": 33.68, "learning_rate": 3.3164632734346054e-05, "loss": 2.085, "step": 11636500 }, { "epoch": 33.68, "learning_rate": 3.316390908669878e-05, "loss": 2.0753, "step": 11637000 }, { "epoch": 33.69, "learning_rate": 3.3163185439051505e-05, "loss": 2.0601, "step": 11637500 }, { "epoch": 33.69, "learning_rate": 3.316246179140423e-05, "loss": 2.0716, "step": 11638000 }, { "epoch": 33.69, "learning_rate": 3.316173814375695e-05, "loss": 2.0851, "step": 11638500 }, { "epoch": 33.69, "learning_rate": 3.316101449610967e-05, "loss": 2.0798, "step": 11639000 }, { "epoch": 33.69, "learning_rate": 3.316029229575769e-05, "loss": 2.0774, "step": 11639500 }, { "epoch": 33.69, "learning_rate": 3.315956864811041e-05, "loss": 2.0796, "step": 11640000 }, { "epoch": 33.69, "learning_rate": 3.315884500046313e-05, "loss": 2.0668, "step": 11640500 }, { "epoch": 33.7, "learning_rate": 3.3158121352815854e-05, "loss": 2.0986, "step": 11641000 }, { "epoch": 33.7, "learning_rate": 3.3157399152463877e-05, "loss": 2.1004, "step": 11641500 }, { "epoch": 33.7, "learning_rate": 3.31566755048166e-05, "loss": 2.0798, "step": 11642000 }, { "epoch": 33.7, "learning_rate": 3.315595185716933e-05, "loss": 2.0764, "step": 11642500 }, { "epoch": 33.7, "learning_rate": 3.315522820952205e-05, "loss": 2.1072, "step": 11643000 }, { "epoch": 33.7, "learning_rate": 3.315450456187477e-05, "loss": 2.0855, "step": 11643500 }, { "epoch": 33.7, "learning_rate": 3.3153780914227495e-05, "loss": 2.0839, "step": 11644000 }, { "epoch": 33.71, "learning_rate": 3.315305726658022e-05, "loss": 2.0819, "step": 11644500 }, { "epoch": 33.71, "learning_rate": 3.315233361893294e-05, "loss": 2.07, "step": 11645000 }, { "epoch": 33.71, "learning_rate": 3.315160997128566e-05, "loss": 2.0754, "step": 11645500 }, { "epoch": 33.71, "learning_rate": 3.3150886323638383e-05, "loss": 2.0952, "step": 11646000 }, { "epoch": 33.71, "learning_rate": 3.3150162675991106e-05, "loss": 2.1035, "step": 11646500 }, { "epoch": 33.71, "learning_rate": 3.314944047563913e-05, "loss": 2.0864, "step": 11647000 }, { "epoch": 33.71, "learning_rate": 3.3148718275287144e-05, "loss": 2.0822, "step": 11647500 }, { "epoch": 33.72, "learning_rate": 3.3147994627639866e-05, "loss": 2.0955, "step": 11648000 }, { "epoch": 33.72, "learning_rate": 3.314727242728788e-05, "loss": 2.0962, "step": 11648500 }, { "epoch": 33.72, "learning_rate": 3.314654877964061e-05, "loss": 2.0907, "step": 11649000 }, { "epoch": 33.72, "learning_rate": 3.314582513199333e-05, "loss": 2.0844, "step": 11649500 }, { "epoch": 33.72, "learning_rate": 3.314510148434606e-05, "loss": 2.0706, "step": 11650000 }, { "epoch": 33.72, "learning_rate": 3.3144377836698784e-05, "loss": 2.1027, "step": 11650500 }, { "epoch": 33.72, "learning_rate": 3.3143654189051506e-05, "loss": 2.0854, "step": 11651000 }, { "epoch": 33.73, "learning_rate": 3.314293054140423e-05, "loss": 2.1021, "step": 11651500 }, { "epoch": 33.73, "learning_rate": 3.314220689375695e-05, "loss": 2.1129, "step": 11652000 }, { "epoch": 33.73, "learning_rate": 3.3141484693404966e-05, "loss": 2.1121, "step": 11652500 }, { "epoch": 33.73, "learning_rate": 3.314076104575769e-05, "loss": 2.0973, "step": 11653000 }, { "epoch": 33.73, "learning_rate": 3.314003739811041e-05, "loss": 2.0778, "step": 11653500 }, { "epoch": 33.73, "learning_rate": 3.313931375046313e-05, "loss": 2.1001, "step": 11654000 }, { "epoch": 33.74, "learning_rate": 3.313859010281586e-05, "loss": 2.1015, "step": 11654500 }, { "epoch": 33.74, "learning_rate": 3.3137866455168584e-05, "loss": 2.0898, "step": 11655000 }, { "epoch": 33.74, "learning_rate": 3.3137142807521306e-05, "loss": 2.0829, "step": 11655500 }, { "epoch": 33.74, "learning_rate": 3.313641915987403e-05, "loss": 2.0735, "step": 11656000 }, { "epoch": 33.74, "learning_rate": 3.313569551222675e-05, "loss": 2.0993, "step": 11656500 }, { "epoch": 33.74, "learning_rate": 3.313497186457948e-05, "loss": 2.0967, "step": 11657000 }, { "epoch": 33.74, "learning_rate": 3.31342482169322e-05, "loss": 2.0873, "step": 11657500 }, { "epoch": 33.75, "learning_rate": 3.3133524569284924e-05, "loss": 2.0707, "step": 11658000 }, { "epoch": 33.75, "learning_rate": 3.3132800921637646e-05, "loss": 2.0912, "step": 11658500 }, { "epoch": 33.75, "learning_rate": 3.313207872128566e-05, "loss": 2.0852, "step": 11659000 }, { "epoch": 33.75, "learning_rate": 3.3131355073638384e-05, "loss": 2.1049, "step": 11659500 }, { "epoch": 33.75, "learning_rate": 3.313063142599111e-05, "loss": 2.0779, "step": 11660000 }, { "epoch": 33.75, "learning_rate": 3.3129907778343836e-05, "loss": 2.1034, "step": 11660500 }, { "epoch": 33.75, "learning_rate": 3.312918413069656e-05, "loss": 2.1041, "step": 11661000 }, { "epoch": 33.76, "learning_rate": 3.312846193034457e-05, "loss": 2.0867, "step": 11661500 }, { "epoch": 33.76, "learning_rate": 3.3127738282697296e-05, "loss": 2.0956, "step": 11662000 }, { "epoch": 33.76, "learning_rate": 3.312701608234531e-05, "loss": 2.1141, "step": 11662500 }, { "epoch": 33.76, "learning_rate": 3.312629243469803e-05, "loss": 2.0966, "step": 11663000 }, { "epoch": 33.76, "learning_rate": 3.312556878705076e-05, "loss": 2.0547, "step": 11663500 }, { "epoch": 33.76, "learning_rate": 3.312484803399407e-05, "loss": 2.0933, "step": 11664000 }, { "epoch": 33.76, "learning_rate": 3.3124124386346793e-05, "loss": 2.0734, "step": 11664500 }, { "epoch": 33.77, "learning_rate": 3.312340073869952e-05, "loss": 2.0764, "step": 11665000 }, { "epoch": 33.77, "learning_rate": 3.3122677091052245e-05, "loss": 2.0909, "step": 11665500 }, { "epoch": 33.77, "learning_rate": 3.312195344340497e-05, "loss": 2.0643, "step": 11666000 }, { "epoch": 33.77, "learning_rate": 3.312122979575769e-05, "loss": 2.0907, "step": 11666500 }, { "epoch": 33.77, "learning_rate": 3.312050614811041e-05, "loss": 2.0981, "step": 11667000 }, { "epoch": 33.77, "learning_rate": 3.3119782500463134e-05, "loss": 2.0756, "step": 11667500 }, { "epoch": 33.77, "learning_rate": 3.3119060300111156e-05, "loss": 2.0727, "step": 11668000 }, { "epoch": 33.78, "learning_rate": 3.311833665246388e-05, "loss": 2.0736, "step": 11668500 }, { "epoch": 33.78, "learning_rate": 3.31176130048166e-05, "loss": 2.0759, "step": 11669000 }, { "epoch": 33.78, "learning_rate": 3.311688935716932e-05, "loss": 2.0822, "step": 11669500 }, { "epoch": 33.78, "learning_rate": 3.3116165709522045e-05, "loss": 2.0938, "step": 11670000 }, { "epoch": 33.78, "learning_rate": 3.311544206187477e-05, "loss": 2.0871, "step": 11670500 }, { "epoch": 33.78, "learning_rate": 3.311471841422749e-05, "loss": 2.114, "step": 11671000 }, { "epoch": 33.78, "learning_rate": 3.311399476658021e-05, "loss": 2.0847, "step": 11671500 }, { "epoch": 33.79, "learning_rate": 3.3113271118932934e-05, "loss": 2.0483, "step": 11672000 }, { "epoch": 33.79, "learning_rate": 3.311254747128566e-05, "loss": 2.0913, "step": 11672500 }, { "epoch": 33.79, "learning_rate": 3.3111823823638385e-05, "loss": 2.0688, "step": 11673000 }, { "epoch": 33.79, "learning_rate": 3.311110162328641e-05, "loss": 2.0822, "step": 11673500 }, { "epoch": 33.79, "learning_rate": 3.311037797563913e-05, "loss": 2.0927, "step": 11674000 }, { "epoch": 33.79, "learning_rate": 3.310965432799185e-05, "loss": 2.0776, "step": 11674500 }, { "epoch": 33.79, "learning_rate": 3.3108930680344574e-05, "loss": 2.1098, "step": 11675000 }, { "epoch": 33.8, "learning_rate": 3.310820847999259e-05, "loss": 2.0925, "step": 11675500 }, { "epoch": 33.8, "learning_rate": 3.310748483234531e-05, "loss": 2.0847, "step": 11676000 }, { "epoch": 33.8, "learning_rate": 3.3106761184698034e-05, "loss": 2.0652, "step": 11676500 }, { "epoch": 33.8, "learning_rate": 3.310603753705076e-05, "loss": 2.1004, "step": 11677000 }, { "epoch": 33.8, "learning_rate": 3.3105313889403485e-05, "loss": 2.0999, "step": 11677500 }, { "epoch": 33.8, "learning_rate": 3.310459024175621e-05, "loss": 2.1025, "step": 11678000 }, { "epoch": 33.8, "learning_rate": 3.310386659410893e-05, "loss": 2.0901, "step": 11678500 }, { "epoch": 33.81, "learning_rate": 3.310314294646165e-05, "loss": 2.0812, "step": 11679000 }, { "epoch": 33.81, "learning_rate": 3.310241929881438e-05, "loss": 2.0678, "step": 11679500 }, { "epoch": 33.81, "learning_rate": 3.31016956511671e-05, "loss": 2.0775, "step": 11680000 }, { "epoch": 33.81, "learning_rate": 3.3100972003519826e-05, "loss": 2.0689, "step": 11680500 }, { "epoch": 33.81, "learning_rate": 3.310024835587255e-05, "loss": 2.0798, "step": 11681000 }, { "epoch": 33.81, "learning_rate": 3.309952470822527e-05, "loss": 2.0749, "step": 11681500 }, { "epoch": 33.81, "learning_rate": 3.309880106057799e-05, "loss": 2.0686, "step": 11682000 }, { "epoch": 33.82, "learning_rate": 3.3098078860226015e-05, "loss": 2.0719, "step": 11682500 }, { "epoch": 33.82, "learning_rate": 3.309735521257874e-05, "loss": 2.0871, "step": 11683000 }, { "epoch": 33.82, "learning_rate": 3.309663156493146e-05, "loss": 2.0917, "step": 11683500 }, { "epoch": 33.82, "learning_rate": 3.309590791728418e-05, "loss": 2.0887, "step": 11684000 }, { "epoch": 33.82, "learning_rate": 3.3095184269636904e-05, "loss": 2.0879, "step": 11684500 }, { "epoch": 33.82, "learning_rate": 3.3094460621989626e-05, "loss": 2.0852, "step": 11685000 }, { "epoch": 33.82, "learning_rate": 3.309373842163764e-05, "loss": 2.0985, "step": 11685500 }, { "epoch": 33.83, "learning_rate": 3.3093014773990364e-05, "loss": 2.0871, "step": 11686000 }, { "epoch": 33.83, "learning_rate": 3.3092292573638386e-05, "loss": 2.1035, "step": 11686500 }, { "epoch": 33.83, "learning_rate": 3.3091568925991115e-05, "loss": 2.0803, "step": 11687000 }, { "epoch": 33.83, "learning_rate": 3.309084527834384e-05, "loss": 2.0848, "step": 11687500 }, { "epoch": 33.83, "learning_rate": 3.309012307799185e-05, "loss": 2.0928, "step": 11688000 }, { "epoch": 33.83, "learning_rate": 3.3089399430344575e-05, "loss": 2.1, "step": 11688500 }, { "epoch": 33.83, "learning_rate": 3.30886757826973e-05, "loss": 2.0821, "step": 11689000 }, { "epoch": 33.84, "learning_rate": 3.308795213505002e-05, "loss": 2.0917, "step": 11689500 }, { "epoch": 33.84, "learning_rate": 3.308722848740274e-05, "loss": 2.0952, "step": 11690000 }, { "epoch": 33.84, "learning_rate": 3.3086504839755464e-05, "loss": 2.0686, "step": 11690500 }, { "epoch": 33.84, "learning_rate": 3.308578119210819e-05, "loss": 2.0799, "step": 11691000 }, { "epoch": 33.84, "learning_rate": 3.3085057544460915e-05, "loss": 2.0828, "step": 11691500 }, { "epoch": 33.84, "learning_rate": 3.308433389681364e-05, "loss": 2.0852, "step": 11692000 }, { "epoch": 33.85, "learning_rate": 3.308361024916636e-05, "loss": 2.1174, "step": 11692500 }, { "epoch": 33.85, "learning_rate": 3.308288660151908e-05, "loss": 2.0695, "step": 11693000 }, { "epoch": 33.85, "learning_rate": 3.3082162953871804e-05, "loss": 2.0851, "step": 11693500 }, { "epoch": 33.85, "learning_rate": 3.3081439306224526e-05, "loss": 2.0796, "step": 11694000 }, { "epoch": 33.85, "learning_rate": 3.308071710587255e-05, "loss": 2.1108, "step": 11694500 }, { "epoch": 33.85, "learning_rate": 3.307999345822527e-05, "loss": 2.111, "step": 11695000 }, { "epoch": 33.85, "learning_rate": 3.307926981057799e-05, "loss": 2.0707, "step": 11695500 }, { "epoch": 33.86, "learning_rate": 3.3078547610226015e-05, "loss": 2.1109, "step": 11696000 }, { "epoch": 33.86, "learning_rate": 3.307782396257874e-05, "loss": 2.0798, "step": 11696500 }, { "epoch": 33.86, "learning_rate": 3.307710031493146e-05, "loss": 2.0527, "step": 11697000 }, { "epoch": 33.86, "learning_rate": 3.307637666728418e-05, "loss": 2.0901, "step": 11697500 }, { "epoch": 33.86, "learning_rate": 3.3075653019636904e-05, "loss": 2.096, "step": 11698000 }, { "epoch": 33.86, "learning_rate": 3.3074929371989627e-05, "loss": 2.0759, "step": 11698500 }, { "epoch": 33.86, "learning_rate": 3.307420572434235e-05, "loss": 2.1082, "step": 11699000 }, { "epoch": 33.87, "learning_rate": 3.307348207669507e-05, "loss": 2.074, "step": 11699500 }, { "epoch": 33.87, "learning_rate": 3.307275987634309e-05, "loss": 2.0813, "step": 11700000 }, { "epoch": 33.87, "learning_rate": 3.3072036228695816e-05, "loss": 2.0708, "step": 11700500 }, { "epoch": 33.87, "learning_rate": 3.307131258104854e-05, "loss": 2.0742, "step": 11701000 }, { "epoch": 33.87, "learning_rate": 3.307058893340126e-05, "loss": 2.056, "step": 11701500 }, { "epoch": 33.87, "learning_rate": 3.306986528575399e-05, "loss": 2.099, "step": 11702000 }, { "epoch": 33.87, "learning_rate": 3.306914163810671e-05, "loss": 2.067, "step": 11702500 }, { "epoch": 33.88, "learning_rate": 3.306841943775473e-05, "loss": 2.0963, "step": 11703000 }, { "epoch": 33.88, "learning_rate": 3.306769579010745e-05, "loss": 2.0731, "step": 11703500 }, { "epoch": 33.88, "learning_rate": 3.306697214246017e-05, "loss": 2.0767, "step": 11704000 }, { "epoch": 33.88, "learning_rate": 3.3066249942108194e-05, "loss": 2.0569, "step": 11704500 }, { "epoch": 33.88, "learning_rate": 3.3065526294460916e-05, "loss": 2.09, "step": 11705000 }, { "epoch": 33.88, "learning_rate": 3.306480264681364e-05, "loss": 2.1249, "step": 11705500 }, { "epoch": 33.88, "learning_rate": 3.306407899916636e-05, "loss": 2.0881, "step": 11706000 }, { "epoch": 33.89, "learning_rate": 3.3063356798814376e-05, "loss": 2.1119, "step": 11706500 }, { "epoch": 33.89, "learning_rate": 3.30626331511671e-05, "loss": 2.1004, "step": 11707000 }, { "epoch": 33.89, "learning_rate": 3.306190950351982e-05, "loss": 2.0924, "step": 11707500 }, { "epoch": 33.89, "learning_rate": 3.306118585587254e-05, "loss": 2.1048, "step": 11708000 }, { "epoch": 33.89, "learning_rate": 3.3060462208225265e-05, "loss": 2.0896, "step": 11708500 }, { "epoch": 33.89, "learning_rate": 3.3059738560577994e-05, "loss": 2.0899, "step": 11709000 }, { "epoch": 33.89, "learning_rate": 3.3059014912930716e-05, "loss": 2.0724, "step": 11709500 }, { "epoch": 33.9, "learning_rate": 3.3058291265283445e-05, "loss": 2.0603, "step": 11710000 }, { "epoch": 33.9, "learning_rate": 3.305756906493146e-05, "loss": 2.0818, "step": 11710500 }, { "epoch": 33.9, "learning_rate": 3.305684541728418e-05, "loss": 2.0761, "step": 11711000 }, { "epoch": 33.9, "learning_rate": 3.3056121769636905e-05, "loss": 2.0831, "step": 11711500 }, { "epoch": 33.9, "learning_rate": 3.305539812198963e-05, "loss": 2.077, "step": 11712000 }, { "epoch": 33.9, "learning_rate": 3.305467447434235e-05, "loss": 2.0693, "step": 11712500 }, { "epoch": 33.9, "learning_rate": 3.3053952273990365e-05, "loss": 2.0949, "step": 11713000 }, { "epoch": 33.91, "learning_rate": 3.3053228626343094e-05, "loss": 2.0816, "step": 11713500 }, { "epoch": 33.91, "learning_rate": 3.305250642599111e-05, "loss": 2.0977, "step": 11714000 }, { "epoch": 33.91, "learning_rate": 3.305178277834383e-05, "loss": 2.0706, "step": 11714500 }, { "epoch": 33.91, "learning_rate": 3.305106057799185e-05, "loss": 2.0847, "step": 11715000 }, { "epoch": 33.91, "learning_rate": 3.305033693034457e-05, "loss": 2.0959, "step": 11715500 }, { "epoch": 33.91, "learning_rate": 3.304961328269729e-05, "loss": 2.0836, "step": 11716000 }, { "epoch": 33.91, "learning_rate": 3.3048891082345314e-05, "loss": 2.0753, "step": 11716500 }, { "epoch": 33.92, "learning_rate": 3.3048167434698037e-05, "loss": 2.0853, "step": 11717000 }, { "epoch": 33.92, "learning_rate": 3.3047443787050766e-05, "loss": 2.0998, "step": 11717500 }, { "epoch": 33.92, "learning_rate": 3.304672013940349e-05, "loss": 2.0762, "step": 11718000 }, { "epoch": 33.92, "learning_rate": 3.304599649175621e-05, "loss": 2.1111, "step": 11718500 }, { "epoch": 33.92, "learning_rate": 3.304527284410893e-05, "loss": 2.1008, "step": 11719000 }, { "epoch": 33.92, "learning_rate": 3.3044549196461654e-05, "loss": 2.1127, "step": 11719500 }, { "epoch": 33.92, "learning_rate": 3.304382554881438e-05, "loss": 2.1209, "step": 11720000 }, { "epoch": 33.93, "learning_rate": 3.30431019011671e-05, "loss": 2.0972, "step": 11720500 }, { "epoch": 33.93, "learning_rate": 3.304237825351982e-05, "loss": 2.0906, "step": 11721000 }, { "epoch": 33.93, "learning_rate": 3.304165460587254e-05, "loss": 2.0788, "step": 11721500 }, { "epoch": 33.93, "learning_rate": 3.304093095822527e-05, "loss": 2.0565, "step": 11722000 }, { "epoch": 33.93, "learning_rate": 3.3040207310577995e-05, "loss": 2.0848, "step": 11722500 }, { "epoch": 33.93, "learning_rate": 3.303948366293072e-05, "loss": 2.0761, "step": 11723000 }, { "epoch": 33.93, "learning_rate": 3.303876001528344e-05, "loss": 2.073, "step": 11723500 }, { "epoch": 33.94, "learning_rate": 3.303803636763616e-05, "loss": 2.0886, "step": 11724000 }, { "epoch": 33.94, "learning_rate": 3.3037314167284184e-05, "loss": 2.1171, "step": 11724500 }, { "epoch": 33.94, "learning_rate": 3.3036590519636906e-05, "loss": 2.0833, "step": 11725000 }, { "epoch": 33.94, "learning_rate": 3.303586687198963e-05, "loss": 2.0907, "step": 11725500 }, { "epoch": 33.94, "learning_rate": 3.303514322434235e-05, "loss": 2.088, "step": 11726000 }, { "epoch": 33.94, "learning_rate": 3.303441957669507e-05, "loss": 2.0699, "step": 11726500 }, { "epoch": 33.94, "learning_rate": 3.3033695929047795e-05, "loss": 2.0844, "step": 11727000 }, { "epoch": 33.95, "learning_rate": 3.3032972281400524e-05, "loss": 2.0925, "step": 11727500 }, { "epoch": 33.95, "learning_rate": 3.3032248633753246e-05, "loss": 2.0913, "step": 11728000 }, { "epoch": 33.95, "learning_rate": 3.303152498610597e-05, "loss": 2.0799, "step": 11728500 }, { "epoch": 33.95, "learning_rate": 3.303080133845869e-05, "loss": 2.0879, "step": 11729000 }, { "epoch": 33.95, "learning_rate": 3.303007769081141e-05, "loss": 2.1103, "step": 11729500 }, { "epoch": 33.95, "learning_rate": 3.3029354043164135e-05, "loss": 2.0887, "step": 11730000 }, { "epoch": 33.95, "learning_rate": 3.302863184281215e-05, "loss": 2.0939, "step": 11730500 }, { "epoch": 33.96, "learning_rate": 3.302790819516487e-05, "loss": 2.0801, "step": 11731000 }, { "epoch": 33.96, "learning_rate": 3.30271845475176e-05, "loss": 2.0742, "step": 11731500 }, { "epoch": 33.96, "learning_rate": 3.3026460899870324e-05, "loss": 2.0848, "step": 11732000 }, { "epoch": 33.96, "learning_rate": 3.3025738699518346e-05, "loss": 2.1011, "step": 11732500 }, { "epoch": 33.96, "learning_rate": 3.302501505187107e-05, "loss": 2.075, "step": 11733000 }, { "epoch": 33.96, "learning_rate": 3.3024292851519084e-05, "loss": 2.0803, "step": 11733500 }, { "epoch": 33.97, "learning_rate": 3.3023569203871806e-05, "loss": 2.101, "step": 11734000 }, { "epoch": 33.97, "learning_rate": 3.302284700351982e-05, "loss": 2.0901, "step": 11734500 }, { "epoch": 33.97, "learning_rate": 3.3022123355872544e-05, "loss": 2.0943, "step": 11735000 }, { "epoch": 33.97, "learning_rate": 3.302139970822527e-05, "loss": 2.0945, "step": 11735500 }, { "epoch": 33.97, "learning_rate": 3.3020676060577995e-05, "loss": 2.0736, "step": 11736000 }, { "epoch": 33.97, "learning_rate": 3.301995386022601e-05, "loss": 2.0864, "step": 11736500 }, { "epoch": 33.97, "learning_rate": 3.301923021257873e-05, "loss": 2.0809, "step": 11737000 }, { "epoch": 33.98, "learning_rate": 3.3018506564931455e-05, "loss": 2.0885, "step": 11737500 }, { "epoch": 33.98, "learning_rate": 3.301778291728418e-05, "loss": 2.1041, "step": 11738000 }, { "epoch": 33.98, "learning_rate": 3.301706071693219e-05, "loss": 2.0821, "step": 11738500 }, { "epoch": 33.98, "learning_rate": 3.301633706928492e-05, "loss": 2.0999, "step": 11739000 }, { "epoch": 33.98, "learning_rate": 3.3015613421637644e-05, "loss": 2.1192, "step": 11739500 }, { "epoch": 33.98, "learning_rate": 3.3014889773990373e-05, "loss": 2.067, "step": 11740000 }, { "epoch": 33.98, "learning_rate": 3.3014166126343096e-05, "loss": 2.0791, "step": 11740500 }, { "epoch": 33.99, "learning_rate": 3.301344247869582e-05, "loss": 2.1023, "step": 11741000 }, { "epoch": 33.99, "learning_rate": 3.301271883104854e-05, "loss": 2.0997, "step": 11741500 }, { "epoch": 33.99, "learning_rate": 3.301199518340126e-05, "loss": 2.0968, "step": 11742000 }, { "epoch": 33.99, "learning_rate": 3.3011271535753985e-05, "loss": 2.1097, "step": 11742500 }, { "epoch": 33.99, "learning_rate": 3.301054788810671e-05, "loss": 2.0738, "step": 11743000 }, { "epoch": 33.99, "learning_rate": 3.300982424045943e-05, "loss": 2.075, "step": 11743500 }, { "epoch": 33.99, "learning_rate": 3.300910059281215e-05, "loss": 2.0988, "step": 11744000 }, { "epoch": 34.0, "learning_rate": 3.3008376945164874e-05, "loss": 2.104, "step": 11744500 }, { "epoch": 34.0, "learning_rate": 3.3007654744812896e-05, "loss": 2.0784, "step": 11745000 }, { "epoch": 34.0, "learning_rate": 3.300693109716562e-05, "loss": 2.0744, "step": 11745500 }, { "epoch": 34.0, "learning_rate": 3.300620744951834e-05, "loss": 2.1208, "step": 11746000 }, { "epoch": 34.0, "eval_accuracy": 0.6683752018054113, "eval_accuracy_mlm": 0.6330868903676227, "eval_accuracy_nsp": 0.8576833353957276, "eval_loss": 2.1848316192626953, "eval_runtime": 331.8363, "eval_samples_per_second": 1315.064, "eval_steps_per_second": 54.795, "step": 11746048 }, { "epoch": 34.0, "learning_rate": 3.300548380187107e-05, "loss": 2.0541, "step": 11746500 }, { "epoch": 34.0, "learning_rate": 3.3004761601519085e-05, "loss": 2.0843, "step": 11747000 }, { "epoch": 34.0, "learning_rate": 3.300403795387181e-05, "loss": 2.0417, "step": 11747500 }, { "epoch": 34.01, "learning_rate": 3.300331430622453e-05, "loss": 2.0662, "step": 11748000 }, { "epoch": 34.01, "learning_rate": 3.300259065857725e-05, "loss": 2.0647, "step": 11748500 }, { "epoch": 34.01, "learning_rate": 3.3001867010929974e-05, "loss": 2.052, "step": 11749000 }, { "epoch": 34.01, "learning_rate": 3.3001143363282696e-05, "loss": 2.0667, "step": 11749500 }, { "epoch": 34.01, "learning_rate": 3.300042116293072e-05, "loss": 2.0503, "step": 11750000 }, { "epoch": 34.01, "learning_rate": 3.299969751528344e-05, "loss": 2.0598, "step": 11750500 }, { "epoch": 34.01, "learning_rate": 3.2998975314931456e-05, "loss": 2.0656, "step": 11751000 }, { "epoch": 34.02, "learning_rate": 3.299825166728418e-05, "loss": 2.0474, "step": 11751500 }, { "epoch": 34.02, "learning_rate": 3.29975280196369e-05, "loss": 2.0609, "step": 11752000 }, { "epoch": 34.02, "learning_rate": 3.299680581928492e-05, "loss": 2.0618, "step": 11752500 }, { "epoch": 34.02, "learning_rate": 3.2996082171637645e-05, "loss": 2.0827, "step": 11753000 }, { "epoch": 34.02, "learning_rate": 3.299535852399037e-05, "loss": 2.0515, "step": 11753500 }, { "epoch": 34.02, "learning_rate": 3.299463487634309e-05, "loss": 2.0554, "step": 11754000 }, { "epoch": 34.02, "learning_rate": 3.299391122869582e-05, "loss": 2.0759, "step": 11754500 }, { "epoch": 34.03, "learning_rate": 3.299318758104854e-05, "loss": 2.0613, "step": 11755000 }, { "epoch": 34.03, "learning_rate": 3.299246393340126e-05, "loss": 2.0762, "step": 11755500 }, { "epoch": 34.03, "learning_rate": 3.2991740285753985e-05, "loss": 2.0491, "step": 11756000 }, { "epoch": 34.03, "learning_rate": 3.299101663810671e-05, "loss": 2.0709, "step": 11756500 }, { "epoch": 34.03, "learning_rate": 3.299029443775472e-05, "loss": 2.0752, "step": 11757000 }, { "epoch": 34.03, "learning_rate": 3.298957079010745e-05, "loss": 2.063, "step": 11757500 }, { "epoch": 34.03, "learning_rate": 3.2988847142460174e-05, "loss": 2.0705, "step": 11758000 }, { "epoch": 34.04, "learning_rate": 3.29881234948129e-05, "loss": 2.0852, "step": 11758500 }, { "epoch": 34.04, "learning_rate": 3.298740129446091e-05, "loss": 2.0856, "step": 11759000 }, { "epoch": 34.04, "learning_rate": 3.2986677646813635e-05, "loss": 2.073, "step": 11759500 }, { "epoch": 34.04, "learning_rate": 3.298595689375695e-05, "loss": 2.0644, "step": 11760000 }, { "epoch": 34.04, "learning_rate": 3.298523324610967e-05, "loss": 2.061, "step": 11760500 }, { "epoch": 34.04, "learning_rate": 3.2984509598462395e-05, "loss": 2.0538, "step": 11761000 }, { "epoch": 34.04, "learning_rate": 3.298378595081512e-05, "loss": 2.0657, "step": 11761500 }, { "epoch": 34.05, "learning_rate": 3.2983062303167846e-05, "loss": 2.0939, "step": 11762000 }, { "epoch": 34.05, "learning_rate": 3.298233865552057e-05, "loss": 2.0421, "step": 11762500 }, { "epoch": 34.05, "learning_rate": 3.298161500787329e-05, "loss": 2.0746, "step": 11763000 }, { "epoch": 34.05, "learning_rate": 3.298089136022601e-05, "loss": 2.0625, "step": 11763500 }, { "epoch": 34.05, "learning_rate": 3.298016915987403e-05, "loss": 2.0915, "step": 11764000 }, { "epoch": 34.05, "learning_rate": 3.297944551222675e-05, "loss": 2.0715, "step": 11764500 }, { "epoch": 34.05, "learning_rate": 3.297872186457948e-05, "loss": 2.0651, "step": 11765000 }, { "epoch": 34.06, "learning_rate": 3.29779982169322e-05, "loss": 2.1096, "step": 11765500 }, { "epoch": 34.06, "learning_rate": 3.2977274569284924e-05, "loss": 2.0695, "step": 11766000 }, { "epoch": 34.06, "learning_rate": 3.2976550921637646e-05, "loss": 2.0827, "step": 11766500 }, { "epoch": 34.06, "learning_rate": 3.297582727399037e-05, "loss": 2.0595, "step": 11767000 }, { "epoch": 34.06, "learning_rate": 3.297510362634309e-05, "loss": 2.0558, "step": 11767500 }, { "epoch": 34.06, "learning_rate": 3.297437997869581e-05, "loss": 2.0537, "step": 11768000 }, { "epoch": 34.06, "learning_rate": 3.297365777834383e-05, "loss": 2.0665, "step": 11768500 }, { "epoch": 34.07, "learning_rate": 3.297293413069655e-05, "loss": 2.0891, "step": 11769000 }, { "epoch": 34.07, "learning_rate": 3.297221048304928e-05, "loss": 2.0659, "step": 11769500 }, { "epoch": 34.07, "learning_rate": 3.2971486835402e-05, "loss": 2.0969, "step": 11770000 }, { "epoch": 34.07, "learning_rate": 3.2970763187754724e-05, "loss": 2.0883, "step": 11770500 }, { "epoch": 34.07, "learning_rate": 3.297004243469804e-05, "loss": 2.0772, "step": 11771000 }, { "epoch": 34.07, "learning_rate": 3.296931878705076e-05, "loss": 2.0877, "step": 11771500 }, { "epoch": 34.08, "learning_rate": 3.2968595139403484e-05, "loss": 2.0845, "step": 11772000 }, { "epoch": 34.08, "learning_rate": 3.2967871491756206e-05, "loss": 2.0875, "step": 11772500 }, { "epoch": 34.08, "learning_rate": 3.296714784410893e-05, "loss": 2.0819, "step": 11773000 }, { "epoch": 34.08, "learning_rate": 3.296642419646165e-05, "loss": 2.0609, "step": 11773500 }, { "epoch": 34.08, "learning_rate": 3.296570054881438e-05, "loss": 2.0783, "step": 11774000 }, { "epoch": 34.08, "learning_rate": 3.29649769011671e-05, "loss": 2.084, "step": 11774500 }, { "epoch": 34.08, "learning_rate": 3.2964253253519824e-05, "loss": 2.0782, "step": 11775000 }, { "epoch": 34.09, "learning_rate": 3.2963529605872547e-05, "loss": 2.0791, "step": 11775500 }, { "epoch": 34.09, "learning_rate": 3.296280740552056e-05, "loss": 2.058, "step": 11776000 }, { "epoch": 34.09, "learning_rate": 3.2962083757873284e-05, "loss": 2.0687, "step": 11776500 }, { "epoch": 34.09, "learning_rate": 3.296136011022601e-05, "loss": 2.0633, "step": 11777000 }, { "epoch": 34.09, "learning_rate": 3.2960636462578736e-05, "loss": 2.0861, "step": 11777500 }, { "epoch": 34.09, "learning_rate": 3.295991281493146e-05, "loss": 2.0464, "step": 11778000 }, { "epoch": 34.09, "learning_rate": 3.295918916728418e-05, "loss": 2.0552, "step": 11778500 }, { "epoch": 34.1, "learning_rate": 3.29584655196369e-05, "loss": 2.0559, "step": 11779000 }, { "epoch": 34.1, "learning_rate": 3.295774187198963e-05, "loss": 2.0849, "step": 11779500 }, { "epoch": 34.1, "learning_rate": 3.2957018224342354e-05, "loss": 2.0898, "step": 11780000 }, { "epoch": 34.1, "learning_rate": 3.2956294576695076e-05, "loss": 2.0519, "step": 11780500 }, { "epoch": 34.1, "learning_rate": 3.29555709290478e-05, "loss": 2.071, "step": 11781000 }, { "epoch": 34.1, "learning_rate": 3.295484728140052e-05, "loss": 2.0886, "step": 11781500 }, { "epoch": 34.1, "learning_rate": 3.295412363375324e-05, "loss": 2.0841, "step": 11782000 }, { "epoch": 34.11, "learning_rate": 3.2953399986105965e-05, "loss": 2.0711, "step": 11782500 }, { "epoch": 34.11, "learning_rate": 3.295267633845869e-05, "loss": 2.084, "step": 11783000 }, { "epoch": 34.11, "learning_rate": 3.295195269081141e-05, "loss": 2.0786, "step": 11783500 }, { "epoch": 34.11, "learning_rate": 3.295122904316414e-05, "loss": 2.0444, "step": 11784000 }, { "epoch": 34.11, "learning_rate": 3.2950506842812154e-05, "loss": 2.0636, "step": 11784500 }, { "epoch": 34.11, "learning_rate": 3.2949784642460176e-05, "loss": 2.068, "step": 11785000 }, { "epoch": 34.11, "learning_rate": 3.294906244210819e-05, "loss": 2.0591, "step": 11785500 }, { "epoch": 34.12, "learning_rate": 3.2948338794460914e-05, "loss": 2.0663, "step": 11786000 }, { "epoch": 34.12, "learning_rate": 3.2947615146813636e-05, "loss": 2.059, "step": 11786500 }, { "epoch": 34.12, "learning_rate": 3.294689294646165e-05, "loss": 2.0956, "step": 11787000 }, { "epoch": 34.12, "learning_rate": 3.294616929881438e-05, "loss": 2.078, "step": 11787500 }, { "epoch": 34.12, "learning_rate": 3.29454456511671e-05, "loss": 2.0863, "step": 11788000 }, { "epoch": 34.12, "learning_rate": 3.2944722003519825e-05, "loss": 2.0534, "step": 11788500 }, { "epoch": 34.12, "learning_rate": 3.294399835587255e-05, "loss": 2.0767, "step": 11789000 }, { "epoch": 34.13, "learning_rate": 3.294327470822527e-05, "loss": 2.0804, "step": 11789500 }, { "epoch": 34.13, "learning_rate": 3.294255106057799e-05, "loss": 2.0808, "step": 11790000 }, { "epoch": 34.13, "learning_rate": 3.2941827412930714e-05, "loss": 2.0672, "step": 11790500 }, { "epoch": 34.13, "learning_rate": 3.2941103765283436e-05, "loss": 2.0873, "step": 11791000 }, { "epoch": 34.13, "learning_rate": 3.294038011763616e-05, "loss": 2.0539, "step": 11791500 }, { "epoch": 34.13, "learning_rate": 3.293965646998889e-05, "loss": 2.0846, "step": 11792000 }, { "epoch": 34.13, "learning_rate": 3.293893282234161e-05, "loss": 2.0529, "step": 11792500 }, { "epoch": 34.14, "learning_rate": 3.293820917469433e-05, "loss": 2.071, "step": 11793000 }, { "epoch": 34.14, "learning_rate": 3.2937486974342354e-05, "loss": 2.0645, "step": 11793500 }, { "epoch": 34.14, "learning_rate": 3.2936763326695077e-05, "loss": 2.0782, "step": 11794000 }, { "epoch": 34.14, "learning_rate": 3.29360396790478e-05, "loss": 2.0544, "step": 11794500 }, { "epoch": 34.14, "learning_rate": 3.293531603140052e-05, "loss": 2.0782, "step": 11795000 }, { "epoch": 34.14, "learning_rate": 3.293459238375324e-05, "loss": 2.0772, "step": 11795500 }, { "epoch": 34.14, "learning_rate": 3.2933868736105966e-05, "loss": 2.0607, "step": 11796000 }, { "epoch": 34.15, "learning_rate": 3.293314508845869e-05, "loss": 2.082, "step": 11796500 }, { "epoch": 34.15, "learning_rate": 3.29324228881067e-05, "loss": 2.0909, "step": 11797000 }, { "epoch": 34.15, "learning_rate": 3.2931700687754726e-05, "loss": 2.0745, "step": 11797500 }, { "epoch": 34.15, "learning_rate": 3.293097704010745e-05, "loss": 2.0617, "step": 11798000 }, { "epoch": 34.15, "learning_rate": 3.293025339246017e-05, "loss": 2.0706, "step": 11798500 }, { "epoch": 34.15, "learning_rate": 3.292952974481289e-05, "loss": 2.0894, "step": 11799000 }, { "epoch": 34.15, "learning_rate": 3.292880609716562e-05, "loss": 2.0677, "step": 11799500 }, { "epoch": 34.16, "learning_rate": 3.292808389681364e-05, "loss": 2.0815, "step": 11800000 }, { "epoch": 34.16, "learning_rate": 3.292736024916636e-05, "loss": 2.09, "step": 11800500 }, { "epoch": 34.16, "learning_rate": 3.292663660151908e-05, "loss": 2.0706, "step": 11801000 }, { "epoch": 34.16, "learning_rate": 3.2925912953871804e-05, "loss": 2.0848, "step": 11801500 }, { "epoch": 34.16, "learning_rate": 3.292518930622453e-05, "loss": 2.0687, "step": 11802000 }, { "epoch": 34.16, "learning_rate": 3.2924465658577255e-05, "loss": 2.0518, "step": 11802500 }, { "epoch": 34.16, "learning_rate": 3.292374345822527e-05, "loss": 2.0762, "step": 11803000 }, { "epoch": 34.17, "learning_rate": 3.292301981057799e-05, "loss": 2.0949, "step": 11803500 }, { "epoch": 34.17, "learning_rate": 3.2922296162930715e-05, "loss": 2.0569, "step": 11804000 }, { "epoch": 34.17, "learning_rate": 3.292157251528344e-05, "loss": 2.0685, "step": 11804500 }, { "epoch": 34.17, "learning_rate": 3.292085031493146e-05, "loss": 2.0883, "step": 11805000 }, { "epoch": 34.17, "learning_rate": 3.292012666728418e-05, "loss": 2.0779, "step": 11805500 }, { "epoch": 34.17, "learning_rate": 3.2919403019636904e-05, "loss": 2.096, "step": 11806000 }, { "epoch": 34.17, "learning_rate": 3.2918679371989626e-05, "loss": 2.0712, "step": 11806500 }, { "epoch": 34.18, "learning_rate": 3.2917955724342355e-05, "loss": 2.0623, "step": 11807000 }, { "epoch": 34.18, "learning_rate": 3.291723207669508e-05, "loss": 2.0802, "step": 11807500 }, { "epoch": 34.18, "learning_rate": 3.29165084290478e-05, "loss": 2.0674, "step": 11808000 }, { "epoch": 34.18, "learning_rate": 3.291578478140052e-05, "loss": 2.0715, "step": 11808500 }, { "epoch": 34.18, "learning_rate": 3.291506258104854e-05, "loss": 2.0689, "step": 11809000 }, { "epoch": 34.18, "learning_rate": 3.291433893340126e-05, "loss": 2.0656, "step": 11809500 }, { "epoch": 34.19, "learning_rate": 3.291361528575398e-05, "loss": 2.0631, "step": 11810000 }, { "epoch": 34.19, "learning_rate": 3.291289163810671e-05, "loss": 2.0952, "step": 11810500 }, { "epoch": 34.19, "learning_rate": 3.291216799045943e-05, "loss": 2.0691, "step": 11811000 }, { "epoch": 34.19, "learning_rate": 3.2911444342812155e-05, "loss": 2.093, "step": 11811500 }, { "epoch": 34.19, "learning_rate": 3.291072069516488e-05, "loss": 2.0667, "step": 11812000 }, { "epoch": 34.19, "learning_rate": 3.29099970475176e-05, "loss": 2.0783, "step": 11812500 }, { "epoch": 34.19, "learning_rate": 3.290927339987032e-05, "loss": 2.0768, "step": 11813000 }, { "epoch": 34.2, "learning_rate": 3.2908549752223044e-05, "loss": 2.0774, "step": 11813500 }, { "epoch": 34.2, "learning_rate": 3.290782610457577e-05, "loss": 2.0762, "step": 11814000 }, { "epoch": 34.2, "learning_rate": 3.2907102456928496e-05, "loss": 2.0663, "step": 11814500 }, { "epoch": 34.2, "learning_rate": 3.290637880928122e-05, "loss": 2.0947, "step": 11815000 }, { "epoch": 34.2, "learning_rate": 3.290565516163394e-05, "loss": 2.092, "step": 11815500 }, { "epoch": 34.2, "learning_rate": 3.290493151398666e-05, "loss": 2.0701, "step": 11816000 }, { "epoch": 34.2, "learning_rate": 3.2904209313634685e-05, "loss": 2.0864, "step": 11816500 }, { "epoch": 34.21, "learning_rate": 3.29034871132827e-05, "loss": 2.0738, "step": 11817000 }, { "epoch": 34.21, "learning_rate": 3.290276346563542e-05, "loss": 2.0635, "step": 11817500 }, { "epoch": 34.21, "learning_rate": 3.2902039817988145e-05, "loss": 2.0787, "step": 11818000 }, { "epoch": 34.21, "learning_rate": 3.290131617034087e-05, "loss": 2.0751, "step": 11818500 }, { "epoch": 34.21, "learning_rate": 3.290059252269359e-05, "loss": 2.0808, "step": 11819000 }, { "epoch": 34.21, "learning_rate": 3.289986887504631e-05, "loss": 2.0526, "step": 11819500 }, { "epoch": 34.21, "learning_rate": 3.2899145227399033e-05, "loss": 2.0808, "step": 11820000 }, { "epoch": 34.22, "learning_rate": 3.289842157975176e-05, "loss": 2.0637, "step": 11820500 }, { "epoch": 34.22, "learning_rate": 3.2897697932104485e-05, "loss": 2.0806, "step": 11821000 }, { "epoch": 34.22, "learning_rate": 3.2896974284457214e-05, "loss": 2.0874, "step": 11821500 }, { "epoch": 34.22, "learning_rate": 3.2896250636809936e-05, "loss": 2.079, "step": 11822000 }, { "epoch": 34.22, "learning_rate": 3.289552843645795e-05, "loss": 2.0667, "step": 11822500 }, { "epoch": 34.22, "learning_rate": 3.2894804788810674e-05, "loss": 2.0665, "step": 11823000 }, { "epoch": 34.22, "learning_rate": 3.2894081141163396e-05, "loss": 2.0862, "step": 11823500 }, { "epoch": 34.23, "learning_rate": 3.289335749351612e-05, "loss": 2.0858, "step": 11824000 }, { "epoch": 34.23, "learning_rate": 3.289263384586884e-05, "loss": 2.0813, "step": 11824500 }, { "epoch": 34.23, "learning_rate": 3.289191019822156e-05, "loss": 2.0796, "step": 11825000 }, { "epoch": 34.23, "learning_rate": 3.2891186550574285e-05, "loss": 2.0635, "step": 11825500 }, { "epoch": 34.23, "learning_rate": 3.2890462902927014e-05, "loss": 2.101, "step": 11826000 }, { "epoch": 34.23, "learning_rate": 3.2889739255279736e-05, "loss": 2.0824, "step": 11826500 }, { "epoch": 34.23, "learning_rate": 3.288901705492775e-05, "loss": 2.0819, "step": 11827000 }, { "epoch": 34.24, "learning_rate": 3.2888293407280474e-05, "loss": 2.0891, "step": 11827500 }, { "epoch": 34.24, "learning_rate": 3.2887569759633196e-05, "loss": 2.0645, "step": 11828000 }, { "epoch": 34.24, "learning_rate": 3.2886846111985925e-05, "loss": 2.0446, "step": 11828500 }, { "epoch": 34.24, "learning_rate": 3.288612391163394e-05, "loss": 2.0595, "step": 11829000 }, { "epoch": 34.24, "learning_rate": 3.288540026398666e-05, "loss": 2.0683, "step": 11829500 }, { "epoch": 34.24, "learning_rate": 3.2884676616339385e-05, "loss": 2.0903, "step": 11830000 }, { "epoch": 34.24, "learning_rate": 3.2883952968692114e-05, "loss": 2.0702, "step": 11830500 }, { "epoch": 34.25, "learning_rate": 3.2883229321044836e-05, "loss": 2.0579, "step": 11831000 }, { "epoch": 34.25, "learning_rate": 3.288250567339756e-05, "loss": 2.0784, "step": 11831500 }, { "epoch": 34.25, "learning_rate": 3.288178202575028e-05, "loss": 2.0906, "step": 11832000 }, { "epoch": 34.25, "learning_rate": 3.2881058378103e-05, "loss": 2.0933, "step": 11832500 }, { "epoch": 34.25, "learning_rate": 3.2880334730455725e-05, "loss": 2.0592, "step": 11833000 }, { "epoch": 34.25, "learning_rate": 3.287961108280845e-05, "loss": 2.0663, "step": 11833500 }, { "epoch": 34.25, "learning_rate": 3.287888743516117e-05, "loss": 2.0624, "step": 11834000 }, { "epoch": 34.26, "learning_rate": 3.2878165234809185e-05, "loss": 2.0725, "step": 11834500 }, { "epoch": 34.26, "learning_rate": 3.2877441587161914e-05, "loss": 2.071, "step": 11835000 }, { "epoch": 34.26, "learning_rate": 3.287671793951464e-05, "loss": 2.0681, "step": 11835500 }, { "epoch": 34.26, "learning_rate": 3.2875994291867366e-05, "loss": 2.0825, "step": 11836000 }, { "epoch": 34.26, "learning_rate": 3.287527209151538e-05, "loss": 2.0762, "step": 11836500 }, { "epoch": 34.26, "learning_rate": 3.28745498911634e-05, "loss": 2.0685, "step": 11837000 }, { "epoch": 34.26, "learning_rate": 3.287382624351612e-05, "loss": 2.0911, "step": 11837500 }, { "epoch": 34.27, "learning_rate": 3.287310259586884e-05, "loss": 2.0727, "step": 11838000 }, { "epoch": 34.27, "learning_rate": 3.2872378948221564e-05, "loss": 2.0694, "step": 11838500 }, { "epoch": 34.27, "learning_rate": 3.2871656747869586e-05, "loss": 2.0561, "step": 11839000 }, { "epoch": 34.27, "learning_rate": 3.287093310022231e-05, "loss": 2.0549, "step": 11839500 }, { "epoch": 34.27, "learning_rate": 3.287020945257503e-05, "loss": 2.0601, "step": 11840000 }, { "epoch": 34.27, "learning_rate": 3.286948580492775e-05, "loss": 2.0514, "step": 11840500 }, { "epoch": 34.27, "learning_rate": 3.286876360457577e-05, "loss": 2.0579, "step": 11841000 }, { "epoch": 34.28, "learning_rate": 3.286803995692849e-05, "loss": 2.0559, "step": 11841500 }, { "epoch": 34.28, "learning_rate": 3.286731630928121e-05, "loss": 2.0964, "step": 11842000 }, { "epoch": 34.28, "learning_rate": 3.2866592661633935e-05, "loss": 2.0831, "step": 11842500 }, { "epoch": 34.28, "learning_rate": 3.2865869013986664e-05, "loss": 2.0662, "step": 11843000 }, { "epoch": 34.28, "learning_rate": 3.2865145366339386e-05, "loss": 2.056, "step": 11843500 }, { "epoch": 34.28, "learning_rate": 3.2864421718692115e-05, "loss": 2.0835, "step": 11844000 }, { "epoch": 34.28, "learning_rate": 3.286369807104484e-05, "loss": 2.0764, "step": 11844500 }, { "epoch": 34.29, "learning_rate": 3.286297442339756e-05, "loss": 2.0726, "step": 11845000 }, { "epoch": 34.29, "learning_rate": 3.286225077575028e-05, "loss": 2.0781, "step": 11845500 }, { "epoch": 34.29, "learning_rate": 3.2861527128103004e-05, "loss": 2.0948, "step": 11846000 }, { "epoch": 34.29, "learning_rate": 3.2860803480455726e-05, "loss": 2.0848, "step": 11846500 }, { "epoch": 34.29, "learning_rate": 3.286007983280845e-05, "loss": 2.0764, "step": 11847000 }, { "epoch": 34.29, "learning_rate": 3.285935618516117e-05, "loss": 2.0622, "step": 11847500 }, { "epoch": 34.3, "learning_rate": 3.2858633984809186e-05, "loss": 2.0842, "step": 11848000 }, { "epoch": 34.3, "learning_rate": 3.2857910337161915e-05, "loss": 2.0715, "step": 11848500 }, { "epoch": 34.3, "learning_rate": 3.285718668951464e-05, "loss": 2.078, "step": 11849000 }, { "epoch": 34.3, "learning_rate": 3.285646304186736e-05, "loss": 2.0526, "step": 11849500 }, { "epoch": 34.3, "learning_rate": 3.285573939422008e-05, "loss": 2.0827, "step": 11850000 }, { "epoch": 34.3, "learning_rate": 3.2855015746572804e-05, "loss": 2.069, "step": 11850500 }, { "epoch": 34.3, "learning_rate": 3.285429209892553e-05, "loss": 2.0583, "step": 11851000 }, { "epoch": 34.31, "learning_rate": 3.2853568451278255e-05, "loss": 2.072, "step": 11851500 }, { "epoch": 34.31, "learning_rate": 3.285284625092627e-05, "loss": 2.0703, "step": 11852000 }, { "epoch": 34.31, "learning_rate": 3.285212405057429e-05, "loss": 2.0796, "step": 11852500 }, { "epoch": 34.31, "learning_rate": 3.2851400402927016e-05, "loss": 2.0784, "step": 11853000 }, { "epoch": 34.31, "learning_rate": 3.285067675527974e-05, "loss": 2.0633, "step": 11853500 }, { "epoch": 34.31, "learning_rate": 3.284995310763246e-05, "loss": 2.0706, "step": 11854000 }, { "epoch": 34.31, "learning_rate": 3.284922945998518e-05, "loss": 2.0692, "step": 11854500 }, { "epoch": 34.32, "learning_rate": 3.2848505812337904e-05, "loss": 2.0722, "step": 11855000 }, { "epoch": 34.32, "learning_rate": 3.284778216469063e-05, "loss": 2.0788, "step": 11855500 }, { "epoch": 34.32, "learning_rate": 3.284705996433864e-05, "loss": 2.0685, "step": 11856000 }, { "epoch": 34.32, "learning_rate": 3.2846336316691364e-05, "loss": 2.0481, "step": 11856500 }, { "epoch": 34.32, "learning_rate": 3.2845612669044094e-05, "loss": 2.0759, "step": 11857000 }, { "epoch": 34.32, "learning_rate": 3.284489046869211e-05, "loss": 2.1118, "step": 11857500 }, { "epoch": 34.32, "learning_rate": 3.284416682104483e-05, "loss": 2.0922, "step": 11858000 }, { "epoch": 34.33, "learning_rate": 3.284344317339756e-05, "loss": 2.1051, "step": 11858500 }, { "epoch": 34.33, "learning_rate": 3.284271952575028e-05, "loss": 2.0855, "step": 11859000 }, { "epoch": 34.33, "learning_rate": 3.2841995878103005e-05, "loss": 2.0725, "step": 11859500 }, { "epoch": 34.33, "learning_rate": 3.284127367775102e-05, "loss": 2.0398, "step": 11860000 }, { "epoch": 34.33, "learning_rate": 3.284055003010374e-05, "loss": 2.0646, "step": 11860500 }, { "epoch": 34.33, "learning_rate": 3.2839826382456465e-05, "loss": 2.0676, "step": 11861000 }, { "epoch": 34.33, "learning_rate": 3.2839102734809194e-05, "loss": 2.0883, "step": 11861500 }, { "epoch": 34.34, "learning_rate": 3.2838379087161916e-05, "loss": 2.0984, "step": 11862000 }, { "epoch": 34.34, "learning_rate": 3.283765543951464e-05, "loss": 2.088, "step": 11862500 }, { "epoch": 34.34, "learning_rate": 3.283693179186736e-05, "loss": 2.0904, "step": 11863000 }, { "epoch": 34.34, "learning_rate": 3.283620814422008e-05, "loss": 2.0876, "step": 11863500 }, { "epoch": 34.34, "learning_rate": 3.2835484496572805e-05, "loss": 2.0788, "step": 11864000 }, { "epoch": 34.34, "learning_rate": 3.283476229622082e-05, "loss": 2.0978, "step": 11864500 }, { "epoch": 34.34, "learning_rate": 3.283403864857354e-05, "loss": 2.0833, "step": 11865000 }, { "epoch": 34.35, "learning_rate": 3.2833315000926265e-05, "loss": 2.0943, "step": 11865500 }, { "epoch": 34.35, "learning_rate": 3.2832591353278994e-05, "loss": 2.0586, "step": 11866000 }, { "epoch": 34.35, "learning_rate": 3.283187060022231e-05, "loss": 2.0882, "step": 11866500 }, { "epoch": 34.35, "learning_rate": 3.283114695257503e-05, "loss": 2.1042, "step": 11867000 }, { "epoch": 34.35, "learning_rate": 3.2830423304927754e-05, "loss": 2.0841, "step": 11867500 }, { "epoch": 34.35, "learning_rate": 3.2829699657280476e-05, "loss": 2.056, "step": 11868000 }, { "epoch": 34.35, "learning_rate": 3.28289760096332e-05, "loss": 2.0748, "step": 11868500 }, { "epoch": 34.36, "learning_rate": 3.282825236198592e-05, "loss": 2.065, "step": 11869000 }, { "epoch": 34.36, "learning_rate": 3.282753016163394e-05, "loss": 2.0835, "step": 11869500 }, { "epoch": 34.36, "learning_rate": 3.2826806513986665e-05, "loss": 2.0824, "step": 11870000 }, { "epoch": 34.36, "learning_rate": 3.282608286633939e-05, "loss": 2.0795, "step": 11870500 }, { "epoch": 34.36, "learning_rate": 3.282535921869211e-05, "loss": 2.0756, "step": 11871000 }, { "epoch": 34.36, "learning_rate": 3.282463557104483e-05, "loss": 2.0781, "step": 11871500 }, { "epoch": 34.36, "learning_rate": 3.2823911923397554e-05, "loss": 2.0535, "step": 11872000 }, { "epoch": 34.37, "learning_rate": 3.2823188275750277e-05, "loss": 2.0538, "step": 11872500 }, { "epoch": 34.37, "learning_rate": 3.2822464628103e-05, "loss": 2.0771, "step": 11873000 }, { "epoch": 34.37, "learning_rate": 3.282174098045573e-05, "loss": 2.098, "step": 11873500 }, { "epoch": 34.37, "learning_rate": 3.282101733280845e-05, "loss": 2.0624, "step": 11874000 }, { "epoch": 34.37, "learning_rate": 3.282029368516117e-05, "loss": 2.079, "step": 11874500 }, { "epoch": 34.37, "learning_rate": 3.2819570037513895e-05, "loss": 2.0846, "step": 11875000 }, { "epoch": 34.37, "learning_rate": 3.281884638986662e-05, "loss": 2.049, "step": 11875500 }, { "epoch": 34.38, "learning_rate": 3.2818122742219346e-05, "loss": 2.073, "step": 11876000 }, { "epoch": 34.38, "learning_rate": 3.281740054186736e-05, "loss": 2.0808, "step": 11876500 }, { "epoch": 34.38, "learning_rate": 3.2816676894220084e-05, "loss": 2.0761, "step": 11877000 }, { "epoch": 34.38, "learning_rate": 3.2815953246572806e-05, "loss": 2.0803, "step": 11877500 }, { "epoch": 34.38, "learning_rate": 3.281523104622082e-05, "loss": 2.0762, "step": 11878000 }, { "epoch": 34.38, "learning_rate": 3.2814507398573544e-05, "loss": 2.0462, "step": 11878500 }, { "epoch": 34.38, "learning_rate": 3.2813783750926266e-05, "loss": 2.0695, "step": 11879000 }, { "epoch": 34.39, "learning_rate": 3.2813060103278995e-05, "loss": 2.0565, "step": 11879500 }, { "epoch": 34.39, "learning_rate": 3.281233645563172e-05, "loss": 2.0948, "step": 11880000 }, { "epoch": 34.39, "learning_rate": 3.281161425527973e-05, "loss": 2.0787, "step": 11880500 }, { "epoch": 34.39, "learning_rate": 3.281089060763246e-05, "loss": 2.0626, "step": 11881000 }, { "epoch": 34.39, "learning_rate": 3.2810166959985184e-05, "loss": 2.0615, "step": 11881500 }, { "epoch": 34.39, "learning_rate": 3.2809443312337906e-05, "loss": 2.0861, "step": 11882000 }, { "epoch": 34.39, "learning_rate": 3.280871966469063e-05, "loss": 2.0907, "step": 11882500 }, { "epoch": 34.4, "learning_rate": 3.280799601704335e-05, "loss": 2.0611, "step": 11883000 }, { "epoch": 34.4, "learning_rate": 3.280727236939607e-05, "loss": 2.0667, "step": 11883500 }, { "epoch": 34.4, "learning_rate": 3.2806548721748795e-05, "loss": 2.0649, "step": 11884000 }, { "epoch": 34.4, "learning_rate": 3.280582507410152e-05, "loss": 2.0867, "step": 11884500 }, { "epoch": 34.4, "learning_rate": 3.2805101426454246e-05, "loss": 2.0984, "step": 11885000 }, { "epoch": 34.4, "learning_rate": 3.280437922610226e-05, "loss": 2.0933, "step": 11885500 }, { "epoch": 34.41, "learning_rate": 3.2803655578454984e-05, "loss": 2.0601, "step": 11886000 }, { "epoch": 34.41, "learning_rate": 3.2802933378103e-05, "loss": 2.0586, "step": 11886500 }, { "epoch": 34.41, "learning_rate": 3.280220973045572e-05, "loss": 2.0949, "step": 11887000 }, { "epoch": 34.41, "learning_rate": 3.2801486082808444e-05, "loss": 2.0407, "step": 11887500 }, { "epoch": 34.41, "learning_rate": 3.280076243516117e-05, "loss": 2.0776, "step": 11888000 }, { "epoch": 34.41, "learning_rate": 3.2800040234809195e-05, "loss": 2.0552, "step": 11888500 }, { "epoch": 34.41, "learning_rate": 3.279931658716192e-05, "loss": 2.0498, "step": 11889000 }, { "epoch": 34.42, "learning_rate": 3.279859438680993e-05, "loss": 2.086, "step": 11889500 }, { "epoch": 34.42, "learning_rate": 3.2797870739162655e-05, "loss": 2.0788, "step": 11890000 }, { "epoch": 34.42, "learning_rate": 3.279714709151538e-05, "loss": 2.0848, "step": 11890500 }, { "epoch": 34.42, "learning_rate": 3.27964234438681e-05, "loss": 2.0765, "step": 11891000 }, { "epoch": 34.42, "learning_rate": 3.279569979622082e-05, "loss": 2.1022, "step": 11891500 }, { "epoch": 34.42, "learning_rate": 3.2794976148573544e-05, "loss": 2.0715, "step": 11892000 }, { "epoch": 34.42, "learning_rate": 3.279425250092627e-05, "loss": 2.074, "step": 11892500 }, { "epoch": 34.43, "learning_rate": 3.2793528853278996e-05, "loss": 2.074, "step": 11893000 }, { "epoch": 34.43, "learning_rate": 3.279280665292701e-05, "loss": 2.0818, "step": 11893500 }, { "epoch": 34.43, "learning_rate": 3.279208300527973e-05, "loss": 2.1109, "step": 11894000 }, { "epoch": 34.43, "learning_rate": 3.2791359357632456e-05, "loss": 2.0853, "step": 11894500 }, { "epoch": 34.43, "learning_rate": 3.279063570998518e-05, "loss": 2.0809, "step": 11895000 }, { "epoch": 34.43, "learning_rate": 3.27899120623379e-05, "loss": 2.0612, "step": 11895500 }, { "epoch": 34.43, "learning_rate": 3.2789191309281216e-05, "loss": 2.0567, "step": 11896000 }, { "epoch": 34.44, "learning_rate": 3.2788467661633945e-05, "loss": 2.0724, "step": 11896500 }, { "epoch": 34.44, "learning_rate": 3.278774401398667e-05, "loss": 2.0594, "step": 11897000 }, { "epoch": 34.44, "learning_rate": 3.278702036633939e-05, "loss": 2.0726, "step": 11897500 }, { "epoch": 34.44, "learning_rate": 3.278629671869211e-05, "loss": 2.0808, "step": 11898000 }, { "epoch": 34.44, "learning_rate": 3.2785573071044834e-05, "loss": 2.0963, "step": 11898500 }, { "epoch": 34.44, "learning_rate": 3.2784849423397556e-05, "loss": 2.0972, "step": 11899000 }, { "epoch": 34.44, "learning_rate": 3.278412577575028e-05, "loss": 2.07, "step": 11899500 }, { "epoch": 34.45, "learning_rate": 3.2783402128103e-05, "loss": 2.0636, "step": 11900000 }, { "epoch": 34.45, "learning_rate": 3.278267848045572e-05, "loss": 2.0752, "step": 11900500 }, { "epoch": 34.45, "learning_rate": 3.2781954832808445e-05, "loss": 2.0842, "step": 11901000 }, { "epoch": 34.45, "learning_rate": 3.2781231185161174e-05, "loss": 2.0751, "step": 11901500 }, { "epoch": 34.45, "learning_rate": 3.2780507537513896e-05, "loss": 2.0788, "step": 11902000 }, { "epoch": 34.45, "learning_rate": 3.277978388986662e-05, "loss": 2.1004, "step": 11902500 }, { "epoch": 34.45, "learning_rate": 3.277906024221934e-05, "loss": 2.0559, "step": 11903000 }, { "epoch": 34.46, "learning_rate": 3.277833659457207e-05, "loss": 2.0879, "step": 11903500 }, { "epoch": 34.46, "learning_rate": 3.2777614394220085e-05, "loss": 2.0761, "step": 11904000 }, { "epoch": 34.46, "learning_rate": 3.277689074657281e-05, "loss": 2.0718, "step": 11904500 }, { "epoch": 34.46, "learning_rate": 3.277616709892553e-05, "loss": 2.0913, "step": 11905000 }, { "epoch": 34.46, "learning_rate": 3.277544345127825e-05, "loss": 2.077, "step": 11905500 }, { "epoch": 34.46, "learning_rate": 3.2774719803630974e-05, "loss": 2.0781, "step": 11906000 }, { "epoch": 34.46, "learning_rate": 3.2773996155983696e-05, "loss": 2.0837, "step": 11906500 }, { "epoch": 34.47, "learning_rate": 3.2773272508336425e-05, "loss": 2.078, "step": 11907000 }, { "epoch": 34.47, "learning_rate": 3.277254886068915e-05, "loss": 2.0896, "step": 11907500 }, { "epoch": 34.47, "learning_rate": 3.277182521304187e-05, "loss": 2.0808, "step": 11908000 }, { "epoch": 34.47, "learning_rate": 3.2771103012689885e-05, "loss": 2.0781, "step": 11908500 }, { "epoch": 34.47, "learning_rate": 3.27703808123379e-05, "loss": 2.0693, "step": 11909000 }, { "epoch": 34.47, "learning_rate": 3.276965716469062e-05, "loss": 2.0693, "step": 11909500 }, { "epoch": 34.47, "learning_rate": 3.2768933517043345e-05, "loss": 2.11, "step": 11910000 }, { "epoch": 34.48, "learning_rate": 3.2768209869396074e-05, "loss": 2.0712, "step": 11910500 }, { "epoch": 34.48, "learning_rate": 3.2767486221748797e-05, "loss": 2.0934, "step": 11911000 }, { "epoch": 34.48, "learning_rate": 3.276676402139682e-05, "loss": 2.0813, "step": 11911500 }, { "epoch": 34.48, "learning_rate": 3.276604037374954e-05, "loss": 2.0885, "step": 11912000 }, { "epoch": 34.48, "learning_rate": 3.2765316726102263e-05, "loss": 2.0887, "step": 11912500 }, { "epoch": 34.48, "learning_rate": 3.2764593078454986e-05, "loss": 2.0613, "step": 11913000 }, { "epoch": 34.48, "learning_rate": 3.276386943080771e-05, "loss": 2.0774, "step": 11913500 }, { "epoch": 34.49, "learning_rate": 3.2763147230455723e-05, "loss": 2.092, "step": 11914000 }, { "epoch": 34.49, "learning_rate": 3.276242358280845e-05, "loss": 2.061, "step": 11914500 }, { "epoch": 34.49, "learning_rate": 3.2761699935161175e-05, "loss": 2.0815, "step": 11915000 }, { "epoch": 34.49, "learning_rate": 3.27609762875139e-05, "loss": 2.0528, "step": 11915500 }, { "epoch": 34.49, "learning_rate": 3.276025408716191e-05, "loss": 2.1093, "step": 11916000 }, { "epoch": 34.49, "learning_rate": 3.2759530439514635e-05, "loss": 2.0673, "step": 11916500 }, { "epoch": 34.49, "learning_rate": 3.275880679186736e-05, "loss": 2.0648, "step": 11917000 }, { "epoch": 34.5, "learning_rate": 3.275808314422008e-05, "loss": 2.0772, "step": 11917500 }, { "epoch": 34.5, "learning_rate": 3.27573594965728e-05, "loss": 2.0927, "step": 11918000 }, { "epoch": 34.5, "learning_rate": 3.275663584892553e-05, "loss": 2.0706, "step": 11918500 }, { "epoch": 34.5, "learning_rate": 3.275591220127825e-05, "loss": 2.0917, "step": 11919000 }, { "epoch": 34.5, "learning_rate": 3.2755188553630975e-05, "loss": 2.077, "step": 11919500 }, { "epoch": 34.5, "learning_rate": 3.2754464905983704e-05, "loss": 2.0827, "step": 11920000 }, { "epoch": 34.5, "learning_rate": 3.275374270563172e-05, "loss": 2.068, "step": 11920500 }, { "epoch": 34.51, "learning_rate": 3.2753020505279735e-05, "loss": 2.077, "step": 11921000 }, { "epoch": 34.51, "learning_rate": 3.275229685763246e-05, "loss": 2.1051, "step": 11921500 }, { "epoch": 34.51, "learning_rate": 3.275157320998518e-05, "loss": 2.0502, "step": 11922000 }, { "epoch": 34.51, "learning_rate": 3.27508510096332e-05, "loss": 2.0981, "step": 11922500 }, { "epoch": 34.51, "learning_rate": 3.2750127361985924e-05, "loss": 2.0931, "step": 11923000 }, { "epoch": 34.51, "learning_rate": 3.2749403714338646e-05, "loss": 2.1017, "step": 11923500 }, { "epoch": 34.52, "learning_rate": 3.274868006669137e-05, "loss": 2.0654, "step": 11924000 }, { "epoch": 34.52, "learning_rate": 3.274795641904409e-05, "loss": 2.0828, "step": 11924500 }, { "epoch": 34.52, "learning_rate": 3.274723277139681e-05, "loss": 2.0747, "step": 11925000 }, { "epoch": 34.52, "learning_rate": 3.2746509123749535e-05, "loss": 2.1031, "step": 11925500 }, { "epoch": 34.52, "learning_rate": 3.2745785476102264e-05, "loss": 2.0889, "step": 11926000 }, { "epoch": 34.52, "learning_rate": 3.274506327575028e-05, "loss": 2.0766, "step": 11926500 }, { "epoch": 34.52, "learning_rate": 3.2744339628103e-05, "loss": 2.0603, "step": 11927000 }, { "epoch": 34.53, "learning_rate": 3.2743615980455724e-05, "loss": 2.0838, "step": 11927500 }, { "epoch": 34.53, "learning_rate": 3.274289233280845e-05, "loss": 2.0849, "step": 11928000 }, { "epoch": 34.53, "learning_rate": 3.2742168685161175e-05, "loss": 2.0595, "step": 11928500 }, { "epoch": 34.53, "learning_rate": 3.27414450375139e-05, "loss": 2.0846, "step": 11929000 }, { "epoch": 34.53, "learning_rate": 3.274072138986662e-05, "loss": 2.0898, "step": 11929500 }, { "epoch": 34.53, "learning_rate": 3.273999774221934e-05, "loss": 2.0833, "step": 11930000 }, { "epoch": 34.53, "learning_rate": 3.2739274094572064e-05, "loss": 2.079, "step": 11930500 }, { "epoch": 34.54, "learning_rate": 3.273855044692479e-05, "loss": 2.0859, "step": 11931000 }, { "epoch": 34.54, "learning_rate": 3.27378282465728e-05, "loss": 2.0545, "step": 11931500 }, { "epoch": 34.54, "learning_rate": 3.2737104598925524e-05, "loss": 2.087, "step": 11932000 }, { "epoch": 34.54, "learning_rate": 3.2736380951278253e-05, "loss": 2.087, "step": 11932500 }, { "epoch": 34.54, "learning_rate": 3.2735657303630976e-05, "loss": 2.0946, "step": 11933000 }, { "epoch": 34.54, "learning_rate": 3.2734933655983705e-05, "loss": 2.0676, "step": 11933500 }, { "epoch": 34.54, "learning_rate": 3.273421145563172e-05, "loss": 2.1048, "step": 11934000 }, { "epoch": 34.55, "learning_rate": 3.273348780798444e-05, "loss": 2.0739, "step": 11934500 }, { "epoch": 34.55, "learning_rate": 3.273276705492775e-05, "loss": 2.0723, "step": 11935000 }, { "epoch": 34.55, "learning_rate": 3.273204340728048e-05, "loss": 2.0772, "step": 11935500 }, { "epoch": 34.55, "learning_rate": 3.2731321206928496e-05, "loss": 2.0949, "step": 11936000 }, { "epoch": 34.55, "learning_rate": 3.273059755928122e-05, "loss": 2.0744, "step": 11936500 }, { "epoch": 34.55, "learning_rate": 3.272987391163394e-05, "loss": 2.0634, "step": 11937000 }, { "epoch": 34.55, "learning_rate": 3.272915026398666e-05, "loss": 2.0971, "step": 11937500 }, { "epoch": 34.56, "learning_rate": 3.2728426616339385e-05, "loss": 2.0943, "step": 11938000 }, { "epoch": 34.56, "learning_rate": 3.272770296869211e-05, "loss": 2.0614, "step": 11938500 }, { "epoch": 34.56, "learning_rate": 3.272697932104483e-05, "loss": 2.087, "step": 11939000 }, { "epoch": 34.56, "learning_rate": 3.272625567339755e-05, "loss": 2.1045, "step": 11939500 }, { "epoch": 34.56, "learning_rate": 3.272553202575028e-05, "loss": 2.0836, "step": 11940000 }, { "epoch": 34.56, "learning_rate": 3.2724808378103e-05, "loss": 2.0468, "step": 11940500 }, { "epoch": 34.56, "learning_rate": 3.272408473045573e-05, "loss": 2.0786, "step": 11941000 }, { "epoch": 34.57, "learning_rate": 3.2723361082808454e-05, "loss": 2.076, "step": 11941500 }, { "epoch": 34.57, "learning_rate": 3.2722637435161176e-05, "loss": 2.0663, "step": 11942000 }, { "epoch": 34.57, "learning_rate": 3.27219137875139e-05, "loss": 2.0839, "step": 11942500 }, { "epoch": 34.57, "learning_rate": 3.2721191587161914e-05, "loss": 2.0755, "step": 11943000 }, { "epoch": 34.57, "learning_rate": 3.2720467939514636e-05, "loss": 2.0853, "step": 11943500 }, { "epoch": 34.57, "learning_rate": 3.271974429186736e-05, "loss": 2.0611, "step": 11944000 }, { "epoch": 34.57, "learning_rate": 3.271902209151538e-05, "loss": 2.0878, "step": 11944500 }, { "epoch": 34.58, "learning_rate": 3.27182984438681e-05, "loss": 2.0829, "step": 11945000 }, { "epoch": 34.58, "learning_rate": 3.2717574796220825e-05, "loss": 2.0937, "step": 11945500 }, { "epoch": 34.58, "learning_rate": 3.271685114857355e-05, "loss": 2.0874, "step": 11946000 }, { "epoch": 34.58, "learning_rate": 3.271612750092627e-05, "loss": 2.0916, "step": 11946500 }, { "epoch": 34.58, "learning_rate": 3.271540385327899e-05, "loss": 2.0858, "step": 11947000 }, { "epoch": 34.58, "learning_rate": 3.2714680205631714e-05, "loss": 2.0838, "step": 11947500 }, { "epoch": 34.58, "learning_rate": 3.2713956557984436e-05, "loss": 2.0866, "step": 11948000 }, { "epoch": 34.59, "learning_rate": 3.2713232910337165e-05, "loss": 2.0602, "step": 11948500 }, { "epoch": 34.59, "learning_rate": 3.271250926268989e-05, "loss": 2.0881, "step": 11949000 }, { "epoch": 34.59, "learning_rate": 3.271178561504261e-05, "loss": 2.0691, "step": 11949500 }, { "epoch": 34.59, "learning_rate": 3.271106196739533e-05, "loss": 2.0895, "step": 11950000 }, { "epoch": 34.59, "learning_rate": 3.2710338319748054e-05, "loss": 2.0833, "step": 11950500 }, { "epoch": 34.59, "learning_rate": 3.270961611939608e-05, "loss": 2.0772, "step": 11951000 }, { "epoch": 34.59, "learning_rate": 3.27088924717488e-05, "loss": 2.1088, "step": 11951500 }, { "epoch": 34.6, "learning_rate": 3.270816882410152e-05, "loss": 2.0685, "step": 11952000 }, { "epoch": 34.6, "learning_rate": 3.2707445176454243e-05, "loss": 2.0982, "step": 11952500 }, { "epoch": 34.6, "learning_rate": 3.2706721528806966e-05, "loss": 2.0671, "step": 11953000 }, { "epoch": 34.6, "learning_rate": 3.270599788115969e-05, "loss": 2.0903, "step": 11953500 }, { "epoch": 34.6, "learning_rate": 3.270527423351241e-05, "loss": 2.0947, "step": 11954000 }, { "epoch": 34.6, "learning_rate": 3.270455058586513e-05, "loss": 2.0933, "step": 11954500 }, { "epoch": 34.6, "learning_rate": 3.2703826938217855e-05, "loss": 2.1156, "step": 11955000 }, { "epoch": 34.61, "learning_rate": 3.2703104737865884e-05, "loss": 2.1087, "step": 11955500 }, { "epoch": 34.61, "learning_rate": 3.2702381090218606e-05, "loss": 2.0669, "step": 11956000 }, { "epoch": 34.61, "learning_rate": 3.270165888986662e-05, "loss": 2.0991, "step": 11956500 }, { "epoch": 34.61, "learning_rate": 3.2700935242219344e-05, "loss": 2.0677, "step": 11957000 }, { "epoch": 34.61, "learning_rate": 3.2700211594572066e-05, "loss": 2.0821, "step": 11957500 }, { "epoch": 34.61, "learning_rate": 3.269948794692479e-05, "loss": 2.0859, "step": 11958000 }, { "epoch": 34.61, "learning_rate": 3.269876429927751e-05, "loss": 2.0726, "step": 11958500 }, { "epoch": 34.62, "learning_rate": 3.269804065163023e-05, "loss": 2.0995, "step": 11959000 }, { "epoch": 34.62, "learning_rate": 3.2697317003982955e-05, "loss": 2.0438, "step": 11959500 }, { "epoch": 34.62, "learning_rate": 3.2696593356335684e-05, "loss": 2.0668, "step": 11960000 }, { "epoch": 34.62, "learning_rate": 3.2695869708688406e-05, "loss": 2.082, "step": 11960500 }, { "epoch": 34.62, "learning_rate": 3.269514750833642e-05, "loss": 2.0803, "step": 11961000 }, { "epoch": 34.62, "learning_rate": 3.269442530798444e-05, "loss": 2.0648, "step": 11961500 }, { "epoch": 34.63, "learning_rate": 3.269370166033716e-05, "loss": 2.0954, "step": 11962000 }, { "epoch": 34.63, "learning_rate": 3.269297801268988e-05, "loss": 2.0851, "step": 11962500 }, { "epoch": 34.63, "learning_rate": 3.2692254365042604e-05, "loss": 2.0841, "step": 11963000 }, { "epoch": 34.63, "learning_rate": 3.269153216469063e-05, "loss": 2.09, "step": 11963500 }, { "epoch": 34.63, "learning_rate": 3.269080996433865e-05, "loss": 2.0971, "step": 11964000 }, { "epoch": 34.63, "learning_rate": 3.269008631669137e-05, "loss": 2.0969, "step": 11964500 }, { "epoch": 34.63, "learning_rate": 3.268936266904409e-05, "loss": 2.0825, "step": 11965000 }, { "epoch": 34.64, "learning_rate": 3.2688639021396815e-05, "loss": 2.0841, "step": 11965500 }, { "epoch": 34.64, "learning_rate": 3.268791537374954e-05, "loss": 2.0821, "step": 11966000 }, { "epoch": 34.64, "learning_rate": 3.268719317339756e-05, "loss": 2.0811, "step": 11966500 }, { "epoch": 34.64, "learning_rate": 3.268646952575028e-05, "loss": 2.0811, "step": 11967000 }, { "epoch": 34.64, "learning_rate": 3.26857473253983e-05, "loss": 2.0651, "step": 11967500 }, { "epoch": 34.64, "learning_rate": 3.268502367775102e-05, "loss": 2.0607, "step": 11968000 }, { "epoch": 34.64, "learning_rate": 3.268430003010374e-05, "loss": 2.0755, "step": 11968500 }, { "epoch": 34.65, "learning_rate": 3.2683576382456464e-05, "loss": 2.0628, "step": 11969000 }, { "epoch": 34.65, "learning_rate": 3.268285273480919e-05, "loss": 2.1, "step": 11969500 }, { "epoch": 34.65, "learning_rate": 3.268212908716191e-05, "loss": 2.0861, "step": 11970000 }, { "epoch": 34.65, "learning_rate": 3.268140543951463e-05, "loss": 2.0819, "step": 11970500 }, { "epoch": 34.65, "learning_rate": 3.268068179186736e-05, "loss": 2.0896, "step": 11971000 }, { "epoch": 34.65, "learning_rate": 3.267995814422008e-05, "loss": 2.1143, "step": 11971500 }, { "epoch": 34.65, "learning_rate": 3.2679235943868105e-05, "loss": 2.0821, "step": 11972000 }, { "epoch": 34.66, "learning_rate": 3.267851229622083e-05, "loss": 2.0787, "step": 11972500 }, { "epoch": 34.66, "learning_rate": 3.267778864857355e-05, "loss": 2.1069, "step": 11973000 }, { "epoch": 34.66, "learning_rate": 3.267706500092627e-05, "loss": 2.0839, "step": 11973500 }, { "epoch": 34.66, "learning_rate": 3.2676341353278994e-05, "loss": 2.0626, "step": 11974000 }, { "epoch": 34.66, "learning_rate": 3.2675617705631716e-05, "loss": 2.0934, "step": 11974500 }, { "epoch": 34.66, "learning_rate": 3.267489405798444e-05, "loss": 2.093, "step": 11975000 }, { "epoch": 34.66, "learning_rate": 3.267417041033716e-05, "loss": 2.0745, "step": 11975500 }, { "epoch": 34.67, "learning_rate": 3.267344676268988e-05, "loss": 2.0733, "step": 11976000 }, { "epoch": 34.67, "learning_rate": 3.267272311504261e-05, "loss": 2.0769, "step": 11976500 }, { "epoch": 34.67, "learning_rate": 3.2671999467395334e-05, "loss": 2.0726, "step": 11977000 }, { "epoch": 34.67, "learning_rate": 3.267127726704335e-05, "loss": 2.073, "step": 11977500 }, { "epoch": 34.67, "learning_rate": 3.267055361939607e-05, "loss": 2.1022, "step": 11978000 }, { "epoch": 34.67, "learning_rate": 3.26698299717488e-05, "loss": 2.102, "step": 11978500 }, { "epoch": 34.67, "learning_rate": 3.266910632410152e-05, "loss": 2.0613, "step": 11979000 }, { "epoch": 34.68, "learning_rate": 3.2668382676454245e-05, "loss": 2.0785, "step": 11979500 }, { "epoch": 34.68, "learning_rate": 3.266765902880697e-05, "loss": 2.0822, "step": 11980000 }, { "epoch": 34.68, "learning_rate": 3.266693682845498e-05, "loss": 2.0708, "step": 11980500 }, { "epoch": 34.68, "learning_rate": 3.266621318080771e-05, "loss": 2.0768, "step": 11981000 }, { "epoch": 34.68, "learning_rate": 3.2665489533160434e-05, "loss": 2.0807, "step": 11981500 }, { "epoch": 34.68, "learning_rate": 3.2664765885513156e-05, "loss": 2.0663, "step": 11982000 }, { "epoch": 34.68, "learning_rate": 3.266404223786588e-05, "loss": 2.0749, "step": 11982500 }, { "epoch": 34.69, "learning_rate": 3.2663320037513894e-05, "loss": 2.0892, "step": 11983000 }, { "epoch": 34.69, "learning_rate": 3.2662596389866616e-05, "loss": 2.0901, "step": 11983500 }, { "epoch": 34.69, "learning_rate": 3.266187418951463e-05, "loss": 2.0713, "step": 11984000 }, { "epoch": 34.69, "learning_rate": 3.266115054186736e-05, "loss": 2.0984, "step": 11984500 }, { "epoch": 34.69, "learning_rate": 3.2660428341515376e-05, "loss": 2.0754, "step": 11985000 }, { "epoch": 34.69, "learning_rate": 3.26597046938681e-05, "loss": 2.0672, "step": 11985500 }, { "epoch": 34.69, "learning_rate": 3.265898104622083e-05, "loss": 2.073, "step": 11986000 }, { "epoch": 34.7, "learning_rate": 3.265825739857355e-05, "loss": 2.1204, "step": 11986500 }, { "epoch": 34.7, "learning_rate": 3.265753375092627e-05, "loss": 2.0937, "step": 11987000 }, { "epoch": 34.7, "learning_rate": 3.2656810103278994e-05, "loss": 2.0778, "step": 11987500 }, { "epoch": 34.7, "learning_rate": 3.265608645563172e-05, "loss": 2.0797, "step": 11988000 }, { "epoch": 34.7, "learning_rate": 3.265536280798444e-05, "loss": 2.0784, "step": 11988500 }, { "epoch": 34.7, "learning_rate": 3.265463916033716e-05, "loss": 2.0834, "step": 11989000 }, { "epoch": 34.7, "learning_rate": 3.265391551268988e-05, "loss": 2.094, "step": 11989500 }, { "epoch": 34.71, "learning_rate": 3.265319186504261e-05, "loss": 2.0934, "step": 11990000 }, { "epoch": 34.71, "learning_rate": 3.2652468217395335e-05, "loss": 2.0778, "step": 11990500 }, { "epoch": 34.71, "learning_rate": 3.265174456974806e-05, "loss": 2.1027, "step": 11991000 }, { "epoch": 34.71, "learning_rate": 3.265102092210078e-05, "loss": 2.0873, "step": 11991500 }, { "epoch": 34.71, "learning_rate": 3.26502972744535e-05, "loss": 2.0908, "step": 11992000 }, { "epoch": 34.71, "learning_rate": 3.2649573626806224e-05, "loss": 2.0577, "step": 11992500 }, { "epoch": 34.71, "learning_rate": 3.264885142645424e-05, "loss": 2.08, "step": 11993000 }, { "epoch": 34.72, "learning_rate": 3.264812777880697e-05, "loss": 2.0687, "step": 11993500 }, { "epoch": 34.72, "learning_rate": 3.264740413115969e-05, "loss": 2.0808, "step": 11994000 }, { "epoch": 34.72, "learning_rate": 3.264668048351241e-05, "loss": 2.0877, "step": 11994500 }, { "epoch": 34.72, "learning_rate": 3.264595973045573e-05, "loss": 2.0892, "step": 11995000 }, { "epoch": 34.72, "learning_rate": 3.264523608280845e-05, "loss": 2.0852, "step": 11995500 }, { "epoch": 34.72, "learning_rate": 3.264451243516117e-05, "loss": 2.0792, "step": 11996000 }, { "epoch": 34.72, "learning_rate": 3.2643788787513895e-05, "loss": 2.1024, "step": 11996500 }, { "epoch": 34.73, "learning_rate": 3.264306513986662e-05, "loss": 2.074, "step": 11997000 }, { "epoch": 34.73, "learning_rate": 3.264234149221934e-05, "loss": 2.0853, "step": 11997500 }, { "epoch": 34.73, "learning_rate": 3.264161929186736e-05, "loss": 2.0843, "step": 11998000 }, { "epoch": 34.73, "learning_rate": 3.2640895644220084e-05, "loss": 2.0797, "step": 11998500 }, { "epoch": 34.73, "learning_rate": 3.2640171996572806e-05, "loss": 2.0811, "step": 11999000 }, { "epoch": 34.73, "learning_rate": 3.263944834892553e-05, "loss": 2.0893, "step": 11999500 }, { "epoch": 34.74, "learning_rate": 3.263872470127825e-05, "loss": 2.0723, "step": 12000000 }, { "epoch": 34.74, "learning_rate": 3.263800105363097e-05, "loss": 2.0815, "step": 12000500 }, { "epoch": 34.74, "learning_rate": 3.26372774059837e-05, "loss": 2.0744, "step": 12001000 }, { "epoch": 34.74, "learning_rate": 3.2636553758336424e-05, "loss": 2.1033, "step": 12001500 }, { "epoch": 34.74, "learning_rate": 3.2635830110689146e-05, "loss": 2.0763, "step": 12002000 }, { "epoch": 34.74, "learning_rate": 3.263510646304187e-05, "loss": 2.087, "step": 12002500 }, { "epoch": 34.74, "learning_rate": 3.263438281539459e-05, "loss": 2.1124, "step": 12003000 }, { "epoch": 34.75, "learning_rate": 3.2633662062337906e-05, "loss": 2.0633, "step": 12003500 }, { "epoch": 34.75, "learning_rate": 3.263293841469063e-05, "loss": 2.1015, "step": 12004000 }, { "epoch": 34.75, "learning_rate": 3.263221476704335e-05, "loss": 2.0766, "step": 12004500 }, { "epoch": 34.75, "learning_rate": 3.263149111939607e-05, "loss": 2.0924, "step": 12005000 }, { "epoch": 34.75, "learning_rate": 3.2630767471748795e-05, "loss": 2.1032, "step": 12005500 }, { "epoch": 34.75, "learning_rate": 3.263004527139681e-05, "loss": 2.0861, "step": 12006000 }, { "epoch": 34.75, "learning_rate": 3.262932162374954e-05, "loss": 2.1026, "step": 12006500 }, { "epoch": 34.76, "learning_rate": 3.262859797610226e-05, "loss": 2.1052, "step": 12007000 }, { "epoch": 34.76, "learning_rate": 3.2627874328454984e-05, "loss": 2.0769, "step": 12007500 }, { "epoch": 34.76, "learning_rate": 3.2627152128103e-05, "loss": 2.0665, "step": 12008000 }, { "epoch": 34.76, "learning_rate": 3.262642848045573e-05, "loss": 2.0785, "step": 12008500 }, { "epoch": 34.76, "learning_rate": 3.262570483280845e-05, "loss": 2.0918, "step": 12009000 }, { "epoch": 34.76, "learning_rate": 3.2624981185161173e-05, "loss": 2.0614, "step": 12009500 }, { "epoch": 34.76, "learning_rate": 3.2624257537513896e-05, "loss": 2.0881, "step": 12010000 }, { "epoch": 34.77, "learning_rate": 3.262353388986662e-05, "loss": 2.0819, "step": 12010500 }, { "epoch": 34.77, "learning_rate": 3.262281024221934e-05, "loss": 2.0733, "step": 12011000 }, { "epoch": 34.77, "learning_rate": 3.262208659457206e-05, "loss": 2.0676, "step": 12011500 }, { "epoch": 34.77, "learning_rate": 3.262136294692479e-05, "loss": 2.0768, "step": 12012000 }, { "epoch": 34.77, "learning_rate": 3.2620639299277514e-05, "loss": 2.0678, "step": 12012500 }, { "epoch": 34.77, "learning_rate": 3.2619915651630236e-05, "loss": 2.082, "step": 12013000 }, { "epoch": 34.77, "learning_rate": 3.261919345127825e-05, "loss": 2.0705, "step": 12013500 }, { "epoch": 34.78, "learning_rate": 3.2618469803630974e-05, "loss": 2.0806, "step": 12014000 }, { "epoch": 34.78, "learning_rate": 3.2617746155983696e-05, "loss": 2.0724, "step": 12014500 }, { "epoch": 34.78, "learning_rate": 3.261702250833642e-05, "loss": 2.0845, "step": 12015000 }, { "epoch": 34.78, "learning_rate": 3.2616301755279734e-05, "loss": 2.0712, "step": 12015500 }, { "epoch": 34.78, "learning_rate": 3.261557810763246e-05, "loss": 2.0783, "step": 12016000 }, { "epoch": 34.78, "learning_rate": 3.2614854459985185e-05, "loss": 2.0938, "step": 12016500 }, { "epoch": 34.78, "learning_rate": 3.261413081233791e-05, "loss": 2.0853, "step": 12017000 }, { "epoch": 34.79, "learning_rate": 3.261340716469063e-05, "loss": 2.0955, "step": 12017500 }, { "epoch": 34.79, "learning_rate": 3.261268351704335e-05, "loss": 2.0813, "step": 12018000 }, { "epoch": 34.79, "learning_rate": 3.261196131669137e-05, "loss": 2.0991, "step": 12018500 }, { "epoch": 34.79, "learning_rate": 3.261123766904409e-05, "loss": 2.0716, "step": 12019000 }, { "epoch": 34.79, "learning_rate": 3.261051402139682e-05, "loss": 2.0892, "step": 12019500 }, { "epoch": 34.79, "learning_rate": 3.260979037374954e-05, "loss": 2.0958, "step": 12020000 }, { "epoch": 34.79, "learning_rate": 3.260906672610226e-05, "loss": 2.0905, "step": 12020500 }, { "epoch": 34.8, "learning_rate": 3.2608343078454985e-05, "loss": 2.0892, "step": 12021000 }, { "epoch": 34.8, "learning_rate": 3.260761943080771e-05, "loss": 2.096, "step": 12021500 }, { "epoch": 34.8, "learning_rate": 3.260689578316043e-05, "loss": 2.0891, "step": 12022000 }, { "epoch": 34.8, "learning_rate": 3.2606173582808445e-05, "loss": 2.0639, "step": 12022500 }, { "epoch": 34.8, "learning_rate": 3.260544993516117e-05, "loss": 2.084, "step": 12023000 }, { "epoch": 34.8, "learning_rate": 3.2604726287513896e-05, "loss": 2.0998, "step": 12023500 }, { "epoch": 34.8, "learning_rate": 3.260400408716192e-05, "loss": 2.0922, "step": 12024000 }, { "epoch": 34.81, "learning_rate": 3.260328043951464e-05, "loss": 2.0703, "step": 12024500 }, { "epoch": 34.81, "learning_rate": 3.260255679186736e-05, "loss": 2.0885, "step": 12025000 }, { "epoch": 34.81, "learning_rate": 3.2601833144220086e-05, "loss": 2.0647, "step": 12025500 }, { "epoch": 34.81, "learning_rate": 3.260110949657281e-05, "loss": 2.0588, "step": 12026000 }, { "epoch": 34.81, "learning_rate": 3.260038584892553e-05, "loss": 2.0893, "step": 12026500 }, { "epoch": 34.81, "learning_rate": 3.259966220127825e-05, "loss": 2.1022, "step": 12027000 }, { "epoch": 34.81, "learning_rate": 3.2598938553630974e-05, "loss": 2.0795, "step": 12027500 }, { "epoch": 34.82, "learning_rate": 3.25982149059837e-05, "loss": 2.0918, "step": 12028000 }, { "epoch": 34.82, "learning_rate": 3.259749125833642e-05, "loss": 2.1076, "step": 12028500 }, { "epoch": 34.82, "learning_rate": 3.259676761068914e-05, "loss": 2.0761, "step": 12029000 }, { "epoch": 34.82, "learning_rate": 3.259604396304187e-05, "loss": 2.101, "step": 12029500 }, { "epoch": 34.82, "learning_rate": 3.259532031539459e-05, "loss": 2.0778, "step": 12030000 }, { "epoch": 34.82, "learning_rate": 3.259459666774732e-05, "loss": 2.0602, "step": 12030500 }, { "epoch": 34.82, "learning_rate": 3.2593873020100044e-05, "loss": 2.0911, "step": 12031000 }, { "epoch": 34.83, "learning_rate": 3.2593149372452766e-05, "loss": 2.0719, "step": 12031500 }, { "epoch": 34.83, "learning_rate": 3.259242572480549e-05, "loss": 2.073, "step": 12032000 }, { "epoch": 34.83, "learning_rate": 3.25917049717488e-05, "loss": 2.1019, "step": 12032500 }, { "epoch": 34.83, "learning_rate": 3.259098132410152e-05, "loss": 2.0765, "step": 12033000 }, { "epoch": 34.83, "learning_rate": 3.259025767645424e-05, "loss": 2.0886, "step": 12033500 }, { "epoch": 34.83, "learning_rate": 3.258953402880697e-05, "loss": 2.1079, "step": 12034000 }, { "epoch": 34.83, "learning_rate": 3.258881038115969e-05, "loss": 2.0948, "step": 12034500 }, { "epoch": 34.84, "learning_rate": 3.2588086733512415e-05, "loss": 2.0956, "step": 12035000 }, { "epoch": 34.84, "learning_rate": 3.258736308586514e-05, "loss": 2.081, "step": 12035500 }, { "epoch": 34.84, "learning_rate": 3.258663943821786e-05, "loss": 2.0815, "step": 12036000 }, { "epoch": 34.84, "learning_rate": 3.258591579057058e-05, "loss": 2.0742, "step": 12036500 }, { "epoch": 34.84, "learning_rate": 3.258519503751389e-05, "loss": 2.0836, "step": 12037000 }, { "epoch": 34.84, "learning_rate": 3.258447138986662e-05, "loss": 2.062, "step": 12037500 }, { "epoch": 34.85, "learning_rate": 3.258374774221934e-05, "loss": 2.0807, "step": 12038000 }, { "epoch": 34.85, "learning_rate": 3.258302409457207e-05, "loss": 2.0817, "step": 12038500 }, { "epoch": 34.85, "learning_rate": 3.258230044692479e-05, "loss": 2.0669, "step": 12039000 }, { "epoch": 34.85, "learning_rate": 3.2581576799277515e-05, "loss": 2.074, "step": 12039500 }, { "epoch": 34.85, "learning_rate": 3.258085315163024e-05, "loss": 2.0799, "step": 12040000 }, { "epoch": 34.85, "learning_rate": 3.258012950398296e-05, "loss": 2.0888, "step": 12040500 }, { "epoch": 34.85, "learning_rate": 3.257940585633568e-05, "loss": 2.0654, "step": 12041000 }, { "epoch": 34.86, "learning_rate": 3.2578682208688404e-05, "loss": 2.0959, "step": 12041500 }, { "epoch": 34.86, "learning_rate": 3.2577958561041126e-05, "loss": 2.0891, "step": 12042000 }, { "epoch": 34.86, "learning_rate": 3.257723491339385e-05, "loss": 2.0797, "step": 12042500 }, { "epoch": 34.86, "learning_rate": 3.257651271304187e-05, "loss": 2.0764, "step": 12043000 }, { "epoch": 34.86, "learning_rate": 3.257578906539459e-05, "loss": 2.0892, "step": 12043500 }, { "epoch": 34.86, "learning_rate": 3.2575065417747315e-05, "loss": 2.0833, "step": 12044000 }, { "epoch": 34.86, "learning_rate": 3.2574344664690624e-05, "loss": 2.0869, "step": 12044500 }, { "epoch": 34.87, "learning_rate": 3.2573621017043347e-05, "loss": 2.0733, "step": 12045000 }, { "epoch": 34.87, "learning_rate": 3.257289736939607e-05, "loss": 2.089, "step": 12045500 }, { "epoch": 34.87, "learning_rate": 3.25721737217488e-05, "loss": 2.0676, "step": 12046000 }, { "epoch": 34.87, "learning_rate": 3.257145007410152e-05, "loss": 2.0774, "step": 12046500 }, { "epoch": 34.87, "learning_rate": 3.257072642645424e-05, "loss": 2.0773, "step": 12047000 }, { "epoch": 34.87, "learning_rate": 3.257000277880697e-05, "loss": 2.083, "step": 12047500 }, { "epoch": 34.87, "learning_rate": 3.2569279131159693e-05, "loss": 2.071, "step": 12048000 }, { "epoch": 34.88, "learning_rate": 3.256855693080771e-05, "loss": 2.0971, "step": 12048500 }, { "epoch": 34.88, "learning_rate": 3.256783328316043e-05, "loss": 2.0888, "step": 12049000 }, { "epoch": 34.88, "learning_rate": 3.2567109635513154e-05, "loss": 2.0955, "step": 12049500 }, { "epoch": 34.88, "learning_rate": 3.2566385987865876e-05, "loss": 2.0779, "step": 12050000 }, { "epoch": 34.88, "learning_rate": 3.25656637875139e-05, "loss": 2.0785, "step": 12050500 }, { "epoch": 34.88, "learning_rate": 3.256494013986662e-05, "loss": 2.074, "step": 12051000 }, { "epoch": 34.88, "learning_rate": 3.256421649221934e-05, "loss": 2.0935, "step": 12051500 }, { "epoch": 34.89, "learning_rate": 3.2563492844572065e-05, "loss": 2.0813, "step": 12052000 }, { "epoch": 34.89, "learning_rate": 3.256276919692479e-05, "loss": 2.0955, "step": 12052500 }, { "epoch": 34.89, "learning_rate": 3.2562045549277516e-05, "loss": 2.073, "step": 12053000 }, { "epoch": 34.89, "learning_rate": 3.256132190163024e-05, "loss": 2.0923, "step": 12053500 }, { "epoch": 34.89, "learning_rate": 3.2560599701278254e-05, "loss": 2.0972, "step": 12054000 }, { "epoch": 34.89, "learning_rate": 3.2559876053630976e-05, "loss": 2.0733, "step": 12054500 }, { "epoch": 34.89, "learning_rate": 3.25591524059837e-05, "loss": 2.0845, "step": 12055000 }, { "epoch": 34.9, "learning_rate": 3.255842875833642e-05, "loss": 2.0692, "step": 12055500 }, { "epoch": 34.9, "learning_rate": 3.255770511068915e-05, "loss": 2.0942, "step": 12056000 }, { "epoch": 34.9, "learning_rate": 3.255698146304187e-05, "loss": 2.0874, "step": 12056500 }, { "epoch": 34.9, "learning_rate": 3.2556257815394594e-05, "loss": 2.0938, "step": 12057000 }, { "epoch": 34.9, "learning_rate": 3.255553561504261e-05, "loss": 2.067, "step": 12057500 }, { "epoch": 34.9, "learning_rate": 3.255481196739533e-05, "loss": 2.0708, "step": 12058000 }, { "epoch": 34.9, "learning_rate": 3.2554088319748054e-05, "loss": 2.0572, "step": 12058500 }, { "epoch": 34.91, "learning_rate": 3.2553364672100776e-05, "loss": 2.0899, "step": 12059000 }, { "epoch": 34.91, "learning_rate": 3.25526410244535e-05, "loss": 2.0797, "step": 12059500 }, { "epoch": 34.91, "learning_rate": 3.255191737680622e-05, "loss": 2.0753, "step": 12060000 }, { "epoch": 34.91, "learning_rate": 3.255119372915895e-05, "loss": 2.0831, "step": 12060500 }, { "epoch": 34.91, "learning_rate": 3.255047008151167e-05, "loss": 2.0678, "step": 12061000 }, { "epoch": 34.91, "learning_rate": 3.2549747881159694e-05, "loss": 2.0799, "step": 12061500 }, { "epoch": 34.91, "learning_rate": 3.2549024233512417e-05, "loss": 2.0813, "step": 12062000 }, { "epoch": 34.92, "learning_rate": 3.254830058586514e-05, "loss": 2.1055, "step": 12062500 }, { "epoch": 34.92, "learning_rate": 3.254757693821786e-05, "loss": 2.0941, "step": 12063000 }, { "epoch": 34.92, "learning_rate": 3.254685329057058e-05, "loss": 2.0584, "step": 12063500 }, { "epoch": 34.92, "learning_rate": 3.25461310902186e-05, "loss": 2.0878, "step": 12064000 }, { "epoch": 34.92, "learning_rate": 3.254540888986662e-05, "loss": 2.1005, "step": 12064500 }, { "epoch": 34.92, "learning_rate": 3.254468524221934e-05, "loss": 2.0908, "step": 12065000 }, { "epoch": 34.92, "learning_rate": 3.2543961594572066e-05, "loss": 2.0991, "step": 12065500 }, { "epoch": 34.93, "learning_rate": 3.254323794692479e-05, "loss": 2.0596, "step": 12066000 }, { "epoch": 34.93, "learning_rate": 3.254251429927751e-05, "loss": 2.0855, "step": 12066500 }, { "epoch": 34.93, "learning_rate": 3.254179065163023e-05, "loss": 2.078, "step": 12067000 }, { "epoch": 34.93, "learning_rate": 3.2541067003982955e-05, "loss": 2.088, "step": 12067500 }, { "epoch": 34.93, "learning_rate": 3.2540343356335684e-05, "loss": 2.1078, "step": 12068000 }, { "epoch": 34.93, "learning_rate": 3.25396211559837e-05, "loss": 2.0772, "step": 12068500 }, { "epoch": 34.93, "learning_rate": 3.253889750833642e-05, "loss": 2.0777, "step": 12069000 }, { "epoch": 34.94, "learning_rate": 3.253817386068915e-05, "loss": 2.0688, "step": 12069500 }, { "epoch": 34.94, "learning_rate": 3.253745021304187e-05, "loss": 2.0972, "step": 12070000 }, { "epoch": 34.94, "learning_rate": 3.2536726565394595e-05, "loss": 2.1017, "step": 12070500 }, { "epoch": 34.94, "learning_rate": 3.253600291774732e-05, "loss": 2.0907, "step": 12071000 }, { "epoch": 34.94, "learning_rate": 3.253528071739533e-05, "loss": 2.1142, "step": 12071500 }, { "epoch": 34.94, "learning_rate": 3.2534557069748055e-05, "loss": 2.0709, "step": 12072000 }, { "epoch": 34.94, "learning_rate": 3.253383342210078e-05, "loss": 2.0945, "step": 12072500 }, { "epoch": 34.95, "learning_rate": 3.25331097744535e-05, "loss": 2.0985, "step": 12073000 }, { "epoch": 34.95, "learning_rate": 3.253238612680622e-05, "loss": 2.0889, "step": 12073500 }, { "epoch": 34.95, "learning_rate": 3.253166247915895e-05, "loss": 2.0878, "step": 12074000 }, { "epoch": 34.95, "learning_rate": 3.253093883151167e-05, "loss": 2.0581, "step": 12074500 }, { "epoch": 34.95, "learning_rate": 3.2530215183864395e-05, "loss": 2.0765, "step": 12075000 }, { "epoch": 34.95, "learning_rate": 3.2529491536217124e-05, "loss": 2.1122, "step": 12075500 }, { "epoch": 34.96, "learning_rate": 3.252876933586514e-05, "loss": 2.0669, "step": 12076000 }, { "epoch": 34.96, "learning_rate": 3.252804568821786e-05, "loss": 2.0982, "step": 12076500 }, { "epoch": 34.96, "learning_rate": 3.2527322040570584e-05, "loss": 2.0815, "step": 12077000 }, { "epoch": 34.96, "learning_rate": 3.2526598392923306e-05, "loss": 2.1089, "step": 12077500 }, { "epoch": 34.96, "learning_rate": 3.252587619257132e-05, "loss": 2.0947, "step": 12078000 }, { "epoch": 34.96, "learning_rate": 3.252515254492405e-05, "loss": 2.071, "step": 12078500 }, { "epoch": 34.96, "learning_rate": 3.252442889727677e-05, "loss": 2.0594, "step": 12079000 }, { "epoch": 34.97, "learning_rate": 3.2523705249629495e-05, "loss": 2.081, "step": 12079500 }, { "epoch": 34.97, "learning_rate": 3.252298160198222e-05, "loss": 2.0784, "step": 12080000 }, { "epoch": 34.97, "learning_rate": 3.252225795433494e-05, "loss": 2.0592, "step": 12080500 }, { "epoch": 34.97, "learning_rate": 3.252153430668766e-05, "loss": 2.0773, "step": 12081000 }, { "epoch": 34.97, "learning_rate": 3.2520810659040384e-05, "loss": 2.091, "step": 12081500 }, { "epoch": 34.97, "learning_rate": 3.2520087011393106e-05, "loss": 2.0784, "step": 12082000 }, { "epoch": 34.97, "learning_rate": 3.251936336374583e-05, "loss": 2.0763, "step": 12082500 }, { "epoch": 34.98, "learning_rate": 3.251863971609856e-05, "loss": 2.0959, "step": 12083000 }, { "epoch": 34.98, "learning_rate": 3.251791751574657e-05, "loss": 2.0677, "step": 12083500 }, { "epoch": 34.98, "learning_rate": 3.2517195315394596e-05, "loss": 2.1041, "step": 12084000 }, { "epoch": 34.98, "learning_rate": 3.251647166774732e-05, "loss": 2.0712, "step": 12084500 }, { "epoch": 34.98, "learning_rate": 3.251574802010004e-05, "loss": 2.0801, "step": 12085000 }, { "epoch": 34.98, "learning_rate": 3.251502437245276e-05, "loss": 2.0483, "step": 12085500 }, { "epoch": 34.98, "learning_rate": 3.2514300724805485e-05, "loss": 2.0741, "step": 12086000 }, { "epoch": 34.99, "learning_rate": 3.251357707715821e-05, "loss": 2.0703, "step": 12086500 }, { "epoch": 34.99, "learning_rate": 3.251285342951093e-05, "loss": 2.095, "step": 12087000 }, { "epoch": 34.99, "learning_rate": 3.251212978186365e-05, "loss": 2.0847, "step": 12087500 }, { "epoch": 34.99, "learning_rate": 3.2511406134216373e-05, "loss": 2.0831, "step": 12088000 }, { "epoch": 34.99, "learning_rate": 3.25106824865691e-05, "loss": 2.1114, "step": 12088500 }, { "epoch": 34.99, "learning_rate": 3.2509958838921825e-05, "loss": 2.0647, "step": 12089000 }, { "epoch": 34.99, "learning_rate": 3.250923663856984e-05, "loss": 2.0886, "step": 12089500 }, { "epoch": 35.0, "learning_rate": 3.250851299092256e-05, "loss": 2.0871, "step": 12090000 }, { "epoch": 35.0, "learning_rate": 3.250778934327529e-05, "loss": 2.087, "step": 12090500 }, { "epoch": 35.0, "learning_rate": 3.2507065695628014e-05, "loss": 2.0969, "step": 12091000 }, { "epoch": 35.0, "learning_rate": 3.250634349527603e-05, "loss": 2.0697, "step": 12091500 }, { "epoch": 35.0, "eval_accuracy": 0.6687623409562915, "eval_accuracy_mlm": 0.6335090121750351, "eval_accuracy_nsp": 0.8578345776445624, "eval_loss": 2.168201446533203, "eval_runtime": 331.5723, "eval_samples_per_second": 1316.111, "eval_steps_per_second": 54.839, "step": 12091520 }, { "epoch": 35.0, "learning_rate": 3.250561984762875e-05, "loss": 2.0743, "step": 12092000 }, { "epoch": 35.0, "learning_rate": 3.250489619998148e-05, "loss": 2.0658, "step": 12092500 }, { "epoch": 35.0, "learning_rate": 3.25041725523342e-05, "loss": 2.0524, "step": 12093000 }, { "epoch": 35.01, "learning_rate": 3.2503448904686925e-05, "loss": 2.052, "step": 12093500 }, { "epoch": 35.01, "learning_rate": 3.250272525703965e-05, "loss": 2.0333, "step": 12094000 }, { "epoch": 35.01, "learning_rate": 3.250200160939237e-05, "loss": 2.0897, "step": 12094500 }, { "epoch": 35.01, "learning_rate": 3.250127796174509e-05, "loss": 2.0747, "step": 12095000 }, { "epoch": 35.01, "learning_rate": 3.2500554314097814e-05, "loss": 2.066, "step": 12095500 }, { "epoch": 35.01, "learning_rate": 3.249983211374583e-05, "loss": 2.0713, "step": 12096000 }, { "epoch": 35.01, "learning_rate": 3.249910991339385e-05, "loss": 2.0707, "step": 12096500 }, { "epoch": 35.02, "learning_rate": 3.2498386265746574e-05, "loss": 2.0554, "step": 12097000 }, { "epoch": 35.02, "learning_rate": 3.249766406539459e-05, "loss": 2.0761, "step": 12097500 }, { "epoch": 35.02, "learning_rate": 3.249694041774732e-05, "loss": 2.0655, "step": 12098000 }, { "epoch": 35.02, "learning_rate": 3.249621966469063e-05, "loss": 2.0674, "step": 12098500 }, { "epoch": 35.02, "learning_rate": 3.2495496017043356e-05, "loss": 2.0642, "step": 12099000 }, { "epoch": 35.02, "learning_rate": 3.249477236939608e-05, "loss": 2.0858, "step": 12099500 }, { "epoch": 35.02, "learning_rate": 3.24940487217488e-05, "loss": 2.0681, "step": 12100000 }, { "epoch": 35.03, "learning_rate": 3.2493326521396817e-05, "loss": 2.0769, "step": 12100500 }, { "epoch": 35.03, "learning_rate": 3.249260287374954e-05, "loss": 2.0707, "step": 12101000 }, { "epoch": 35.03, "learning_rate": 3.249187922610226e-05, "loss": 2.0592, "step": 12101500 }, { "epoch": 35.03, "learning_rate": 3.249115557845498e-05, "loss": 2.0667, "step": 12102000 }, { "epoch": 35.03, "learning_rate": 3.2490431930807705e-05, "loss": 2.0766, "step": 12102500 }, { "epoch": 35.03, "learning_rate": 3.248970828316043e-05, "loss": 2.0546, "step": 12103000 }, { "epoch": 35.03, "learning_rate": 3.248898463551316e-05, "loss": 2.0661, "step": 12103500 }, { "epoch": 35.04, "learning_rate": 3.248826098786588e-05, "loss": 2.0761, "step": 12104000 }, { "epoch": 35.04, "learning_rate": 3.24875373402186e-05, "loss": 2.0575, "step": 12104500 }, { "epoch": 35.04, "learning_rate": 3.2486813692571323e-05, "loss": 2.0716, "step": 12105000 }, { "epoch": 35.04, "learning_rate": 3.248609149221934e-05, "loss": 2.0664, "step": 12105500 }, { "epoch": 35.04, "learning_rate": 3.248536784457207e-05, "loss": 2.0647, "step": 12106000 }, { "epoch": 35.04, "learning_rate": 3.248464419692479e-05, "loss": 2.0736, "step": 12106500 }, { "epoch": 35.04, "learning_rate": 3.248392054927751e-05, "loss": 2.0873, "step": 12107000 }, { "epoch": 35.05, "learning_rate": 3.2483196901630235e-05, "loss": 2.056, "step": 12107500 }, { "epoch": 35.05, "learning_rate": 3.248247325398296e-05, "loss": 2.0707, "step": 12108000 }, { "epoch": 35.05, "learning_rate": 3.248174960633568e-05, "loss": 2.0668, "step": 12108500 }, { "epoch": 35.05, "learning_rate": 3.24810274059837e-05, "loss": 2.0785, "step": 12109000 }, { "epoch": 35.05, "learning_rate": 3.2480303758336424e-05, "loss": 2.047, "step": 12109500 }, { "epoch": 35.05, "learning_rate": 3.2479580110689146e-05, "loss": 2.0746, "step": 12110000 }, { "epoch": 35.05, "learning_rate": 3.247885646304187e-05, "loss": 2.0683, "step": 12110500 }, { "epoch": 35.06, "learning_rate": 3.247813281539459e-05, "loss": 2.048, "step": 12111000 }, { "epoch": 35.06, "learning_rate": 3.247740916774731e-05, "loss": 2.07, "step": 12111500 }, { "epoch": 35.06, "learning_rate": 3.2476685520100035e-05, "loss": 2.0647, "step": 12112000 }, { "epoch": 35.06, "learning_rate": 3.247596187245276e-05, "loss": 2.0839, "step": 12112500 }, { "epoch": 35.06, "learning_rate": 3.2475238224805486e-05, "loss": 2.0798, "step": 12113000 }, { "epoch": 35.06, "learning_rate": 3.247451457715821e-05, "loss": 2.0349, "step": 12113500 }, { "epoch": 35.07, "learning_rate": 3.247379092951093e-05, "loss": 2.0658, "step": 12114000 }, { "epoch": 35.07, "learning_rate": 3.247306728186365e-05, "loss": 2.074, "step": 12114500 }, { "epoch": 35.07, "learning_rate": 3.247234363421638e-05, "loss": 2.0921, "step": 12115000 }, { "epoch": 35.07, "learning_rate": 3.2471619986569104e-05, "loss": 2.0795, "step": 12115500 }, { "epoch": 35.07, "learning_rate": 3.2470896338921826e-05, "loss": 2.0553, "step": 12116000 }, { "epoch": 35.07, "learning_rate": 3.247017269127455e-05, "loss": 2.101, "step": 12116500 }, { "epoch": 35.07, "learning_rate": 3.246944904362727e-05, "loss": 2.0659, "step": 12117000 }, { "epoch": 35.08, "learning_rate": 3.2468726843275286e-05, "loss": 2.0565, "step": 12117500 }, { "epoch": 35.08, "learning_rate": 3.246800319562801e-05, "loss": 2.0969, "step": 12118000 }, { "epoch": 35.08, "learning_rate": 3.246728099527603e-05, "loss": 2.0751, "step": 12118500 }, { "epoch": 35.08, "learning_rate": 3.246655734762875e-05, "loss": 2.0692, "step": 12119000 }, { "epoch": 35.08, "learning_rate": 3.2465833699981475e-05, "loss": 2.0561, "step": 12119500 }, { "epoch": 35.08, "learning_rate": 3.24651100523342e-05, "loss": 2.0483, "step": 12120000 }, { "epoch": 35.08, "learning_rate": 3.2464386404686927e-05, "loss": 2.0668, "step": 12120500 }, { "epoch": 35.09, "learning_rate": 3.2463665651630235e-05, "loss": 2.0676, "step": 12121000 }, { "epoch": 35.09, "learning_rate": 3.246294200398296e-05, "loss": 2.0721, "step": 12121500 }, { "epoch": 35.09, "learning_rate": 3.246221835633568e-05, "loss": 2.0589, "step": 12122000 }, { "epoch": 35.09, "learning_rate": 3.246149470868841e-05, "loss": 2.0807, "step": 12122500 }, { "epoch": 35.09, "learning_rate": 3.246077106104113e-05, "loss": 2.0684, "step": 12123000 }, { "epoch": 35.09, "learning_rate": 3.2460047413393853e-05, "loss": 2.0988, "step": 12123500 }, { "epoch": 35.09, "learning_rate": 3.2459323765746576e-05, "loss": 2.0418, "step": 12124000 }, { "epoch": 35.1, "learning_rate": 3.24586001180993e-05, "loss": 2.0782, "step": 12124500 }, { "epoch": 35.1, "learning_rate": 3.245787647045202e-05, "loss": 2.0636, "step": 12125000 }, { "epoch": 35.1, "learning_rate": 3.2457154270100036e-05, "loss": 2.0665, "step": 12125500 }, { "epoch": 35.1, "learning_rate": 3.245643062245276e-05, "loss": 2.0548, "step": 12126000 }, { "epoch": 35.1, "learning_rate": 3.245570697480548e-05, "loss": 2.0844, "step": 12126500 }, { "epoch": 35.1, "learning_rate": 3.245498332715821e-05, "loss": 2.0619, "step": 12127000 }, { "epoch": 35.1, "learning_rate": 3.2454261126806225e-05, "loss": 2.0653, "step": 12127500 }, { "epoch": 35.11, "learning_rate": 3.2453537479158954e-05, "loss": 2.0521, "step": 12128000 }, { "epoch": 35.11, "learning_rate": 3.245281672610226e-05, "loss": 2.0764, "step": 12128500 }, { "epoch": 35.11, "learning_rate": 3.2452093078454985e-05, "loss": 2.0577, "step": 12129000 }, { "epoch": 35.11, "learning_rate": 3.245136943080771e-05, "loss": 2.0606, "step": 12129500 }, { "epoch": 35.11, "learning_rate": 3.245064867775102e-05, "loss": 2.0783, "step": 12130000 }, { "epoch": 35.11, "learning_rate": 3.2449925030103745e-05, "loss": 2.0718, "step": 12130500 }, { "epoch": 35.11, "learning_rate": 3.244920138245647e-05, "loss": 2.0621, "step": 12131000 }, { "epoch": 35.12, "learning_rate": 3.244847773480919e-05, "loss": 2.0813, "step": 12131500 }, { "epoch": 35.12, "learning_rate": 3.244775408716191e-05, "loss": 2.0704, "step": 12132000 }, { "epoch": 35.12, "learning_rate": 3.2447030439514634e-05, "loss": 2.076, "step": 12132500 }, { "epoch": 35.12, "learning_rate": 3.2446306791867356e-05, "loss": 2.0878, "step": 12133000 }, { "epoch": 35.12, "learning_rate": 3.2445583144220085e-05, "loss": 2.0542, "step": 12133500 }, { "epoch": 35.12, "learning_rate": 3.244485949657281e-05, "loss": 2.089, "step": 12134000 }, { "epoch": 35.12, "learning_rate": 3.244413729622082e-05, "loss": 2.059, "step": 12134500 }, { "epoch": 35.13, "learning_rate": 3.2443413648573545e-05, "loss": 2.0647, "step": 12135000 }, { "epoch": 35.13, "learning_rate": 3.244269000092627e-05, "loss": 2.0436, "step": 12135500 }, { "epoch": 35.13, "learning_rate": 3.2441966353278996e-05, "loss": 2.0623, "step": 12136000 }, { "epoch": 35.13, "learning_rate": 3.244124270563172e-05, "loss": 2.0721, "step": 12136500 }, { "epoch": 35.13, "learning_rate": 3.2440520505279734e-05, "loss": 2.0778, "step": 12137000 }, { "epoch": 35.13, "learning_rate": 3.2439796857632456e-05, "loss": 2.0532, "step": 12137500 }, { "epoch": 35.13, "learning_rate": 3.2439073209985185e-05, "loss": 2.0734, "step": 12138000 }, { "epoch": 35.14, "learning_rate": 3.243834956233791e-05, "loss": 2.0808, "step": 12138500 }, { "epoch": 35.14, "learning_rate": 3.243762591469063e-05, "loss": 2.0473, "step": 12139000 }, { "epoch": 35.14, "learning_rate": 3.243690226704335e-05, "loss": 2.0713, "step": 12139500 }, { "epoch": 35.14, "learning_rate": 3.2436178619396074e-05, "loss": 2.0456, "step": 12140000 }, { "epoch": 35.14, "learning_rate": 3.2435454971748797e-05, "loss": 2.0842, "step": 12140500 }, { "epoch": 35.14, "learning_rate": 3.243473132410152e-05, "loss": 2.0818, "step": 12141000 }, { "epoch": 35.14, "learning_rate": 3.243400767645424e-05, "loss": 2.0641, "step": 12141500 }, { "epoch": 35.15, "learning_rate": 3.243328402880696e-05, "loss": 2.0575, "step": 12142000 }, { "epoch": 35.15, "learning_rate": 3.2432560381159686e-05, "loss": 2.0435, "step": 12142500 }, { "epoch": 35.15, "learning_rate": 3.2431836733512415e-05, "loss": 2.0739, "step": 12143000 }, { "epoch": 35.15, "learning_rate": 3.243111308586514e-05, "loss": 2.0605, "step": 12143500 }, { "epoch": 35.15, "learning_rate": 3.243038943821786e-05, "loss": 2.0506, "step": 12144000 }, { "epoch": 35.15, "learning_rate": 3.242966579057059e-05, "loss": 2.0518, "step": 12144500 }, { "epoch": 35.15, "learning_rate": 3.2428943590218604e-05, "loss": 2.0576, "step": 12145000 }, { "epoch": 35.16, "learning_rate": 3.2428219942571326e-05, "loss": 2.0692, "step": 12145500 }, { "epoch": 35.16, "learning_rate": 3.242749629492405e-05, "loss": 2.0478, "step": 12146000 }, { "epoch": 35.16, "learning_rate": 3.242677264727677e-05, "loss": 2.0601, "step": 12146500 }, { "epoch": 35.16, "learning_rate": 3.2426050446924786e-05, "loss": 2.0757, "step": 12147000 }, { "epoch": 35.16, "learning_rate": 3.242532679927751e-05, "loss": 2.0788, "step": 12147500 }, { "epoch": 35.16, "learning_rate": 3.242460315163024e-05, "loss": 2.0752, "step": 12148000 }, { "epoch": 35.16, "learning_rate": 3.242387950398296e-05, "loss": 2.0685, "step": 12148500 }, { "epoch": 35.17, "learning_rate": 3.242315585633568e-05, "loss": 2.0691, "step": 12149000 }, { "epoch": 35.17, "learning_rate": 3.2422432208688404e-05, "loss": 2.0572, "step": 12149500 }, { "epoch": 35.17, "learning_rate": 3.2421708561041126e-05, "loss": 2.0729, "step": 12150000 }, { "epoch": 35.17, "learning_rate": 3.2420984913393855e-05, "loss": 2.0654, "step": 12150500 }, { "epoch": 35.17, "learning_rate": 3.242026126574658e-05, "loss": 2.0406, "step": 12151000 }, { "epoch": 35.17, "learning_rate": 3.241953906539459e-05, "loss": 2.0694, "step": 12151500 }, { "epoch": 35.18, "learning_rate": 3.2418815417747315e-05, "loss": 2.0702, "step": 12152000 }, { "epoch": 35.18, "learning_rate": 3.241809177010004e-05, "loss": 2.0556, "step": 12152500 }, { "epoch": 35.18, "learning_rate": 3.241736812245276e-05, "loss": 2.0581, "step": 12153000 }, { "epoch": 35.18, "learning_rate": 3.241664447480549e-05, "loss": 2.0782, "step": 12153500 }, { "epoch": 35.18, "learning_rate": 3.241592082715821e-05, "loss": 2.0882, "step": 12154000 }, { "epoch": 35.18, "learning_rate": 3.241519717951093e-05, "loss": 2.0635, "step": 12154500 }, { "epoch": 35.18, "learning_rate": 3.2414473531863655e-05, "loss": 2.0851, "step": 12155000 }, { "epoch": 35.19, "learning_rate": 3.241374988421638e-05, "loss": 2.0636, "step": 12155500 }, { "epoch": 35.19, "learning_rate": 3.241302768386439e-05, "loss": 2.0619, "step": 12156000 }, { "epoch": 35.19, "learning_rate": 3.2412304036217115e-05, "loss": 2.0762, "step": 12156500 }, { "epoch": 35.19, "learning_rate": 3.241158038856984e-05, "loss": 2.0818, "step": 12157000 }, { "epoch": 35.19, "learning_rate": 3.241085674092256e-05, "loss": 2.0783, "step": 12157500 }, { "epoch": 35.19, "learning_rate": 3.241013309327529e-05, "loss": 2.0624, "step": 12158000 }, { "epoch": 35.19, "learning_rate": 3.240941089292331e-05, "loss": 2.0692, "step": 12158500 }, { "epoch": 35.2, "learning_rate": 3.240868724527603e-05, "loss": 2.0675, "step": 12159000 }, { "epoch": 35.2, "learning_rate": 3.240796504492405e-05, "loss": 2.0897, "step": 12159500 }, { "epoch": 35.2, "learning_rate": 3.240724139727677e-05, "loss": 2.0824, "step": 12160000 }, { "epoch": 35.2, "learning_rate": 3.240651774962949e-05, "loss": 2.0612, "step": 12160500 }, { "epoch": 35.2, "learning_rate": 3.2405794101982216e-05, "loss": 2.0581, "step": 12161000 }, { "epoch": 35.2, "learning_rate": 3.240507045433494e-05, "loss": 2.0456, "step": 12161500 }, { "epoch": 35.2, "learning_rate": 3.240434680668766e-05, "loss": 2.0833, "step": 12162000 }, { "epoch": 35.21, "learning_rate": 3.240362315904039e-05, "loss": 2.0646, "step": 12162500 }, { "epoch": 35.21, "learning_rate": 3.2402900958688405e-05, "loss": 2.0602, "step": 12163000 }, { "epoch": 35.21, "learning_rate": 3.240217731104113e-05, "loss": 2.068, "step": 12163500 }, { "epoch": 35.21, "learning_rate": 3.240145366339385e-05, "loss": 2.0979, "step": 12164000 }, { "epoch": 35.21, "learning_rate": 3.240073001574657e-05, "loss": 2.0633, "step": 12164500 }, { "epoch": 35.21, "learning_rate": 3.240000781539459e-05, "loss": 2.076, "step": 12165000 }, { "epoch": 35.21, "learning_rate": 3.2399284167747316e-05, "loss": 2.0641, "step": 12165500 }, { "epoch": 35.22, "learning_rate": 3.239856052010004e-05, "loss": 2.075, "step": 12166000 }, { "epoch": 35.22, "learning_rate": 3.239783687245277e-05, "loss": 2.0591, "step": 12166500 }, { "epoch": 35.22, "learning_rate": 3.239711322480549e-05, "loss": 2.0698, "step": 12167000 }, { "epoch": 35.22, "learning_rate": 3.239638957715821e-05, "loss": 2.073, "step": 12167500 }, { "epoch": 35.22, "learning_rate": 3.2395665929510934e-05, "loss": 2.0493, "step": 12168000 }, { "epoch": 35.22, "learning_rate": 3.239494372915895e-05, "loss": 2.0493, "step": 12168500 }, { "epoch": 35.22, "learning_rate": 3.239422008151167e-05, "loss": 2.0375, "step": 12169000 }, { "epoch": 35.23, "learning_rate": 3.2393496433864394e-05, "loss": 2.0633, "step": 12169500 }, { "epoch": 35.23, "learning_rate": 3.2392774233512416e-05, "loss": 2.0761, "step": 12170000 }, { "epoch": 35.23, "learning_rate": 3.239205203316043e-05, "loss": 2.0731, "step": 12170500 }, { "epoch": 35.23, "learning_rate": 3.2391328385513154e-05, "loss": 2.0812, "step": 12171000 }, { "epoch": 35.23, "learning_rate": 3.2390604737865876e-05, "loss": 2.0638, "step": 12171500 }, { "epoch": 35.23, "learning_rate": 3.23898810902186e-05, "loss": 2.09, "step": 12172000 }, { "epoch": 35.23, "learning_rate": 3.238915744257132e-05, "loss": 2.0494, "step": 12172500 }, { "epoch": 35.24, "learning_rate": 3.238843379492405e-05, "loss": 2.0893, "step": 12173000 }, { "epoch": 35.24, "learning_rate": 3.238771014727677e-05, "loss": 2.0811, "step": 12173500 }, { "epoch": 35.24, "learning_rate": 3.2386986499629494e-05, "loss": 2.0592, "step": 12174000 }, { "epoch": 35.24, "learning_rate": 3.2386262851982216e-05, "loss": 2.0701, "step": 12174500 }, { "epoch": 35.24, "learning_rate": 3.238553920433494e-05, "loss": 2.0845, "step": 12175000 }, { "epoch": 35.24, "learning_rate": 3.238481555668767e-05, "loss": 2.0674, "step": 12175500 }, { "epoch": 35.24, "learning_rate": 3.238409190904039e-05, "loss": 2.0488, "step": 12176000 }, { "epoch": 35.25, "learning_rate": 3.238336826139311e-05, "loss": 2.0668, "step": 12176500 }, { "epoch": 35.25, "learning_rate": 3.238264606104113e-05, "loss": 2.078, "step": 12177000 }, { "epoch": 35.25, "learning_rate": 3.238192241339385e-05, "loss": 2.0522, "step": 12177500 }, { "epoch": 35.25, "learning_rate": 3.2381200213041865e-05, "loss": 2.0394, "step": 12178000 }, { "epoch": 35.25, "learning_rate": 3.238047656539459e-05, "loss": 2.0748, "step": 12178500 }, { "epoch": 35.25, "learning_rate": 3.2379752917747317e-05, "loss": 2.0557, "step": 12179000 }, { "epoch": 35.25, "learning_rate": 3.237902927010004e-05, "loss": 2.081, "step": 12179500 }, { "epoch": 35.26, "learning_rate": 3.237830562245276e-05, "loss": 2.0678, "step": 12180000 }, { "epoch": 35.26, "learning_rate": 3.237758197480549e-05, "loss": 2.0674, "step": 12180500 }, { "epoch": 35.26, "learning_rate": 3.237685832715821e-05, "loss": 2.0892, "step": 12181000 }, { "epoch": 35.26, "learning_rate": 3.2376134679510935e-05, "loss": 2.0745, "step": 12181500 }, { "epoch": 35.26, "learning_rate": 3.237541103186366e-05, "loss": 2.0829, "step": 12182000 }, { "epoch": 35.26, "learning_rate": 3.237468738421638e-05, "loss": 2.0612, "step": 12182500 }, { "epoch": 35.26, "learning_rate": 3.23739637365691e-05, "loss": 2.0788, "step": 12183000 }, { "epoch": 35.27, "learning_rate": 3.2373240088921823e-05, "loss": 2.0533, "step": 12183500 }, { "epoch": 35.27, "learning_rate": 3.2372516441274546e-05, "loss": 2.0654, "step": 12184000 }, { "epoch": 35.27, "learning_rate": 3.237179279362727e-05, "loss": 2.0455, "step": 12184500 }, { "epoch": 35.27, "learning_rate": 3.237106914597999e-05, "loss": 2.0516, "step": 12185000 }, { "epoch": 35.27, "learning_rate": 3.237034549833272e-05, "loss": 2.0756, "step": 12185500 }, { "epoch": 35.27, "learning_rate": 3.2369623297980735e-05, "loss": 2.0543, "step": 12186000 }, { "epoch": 35.27, "learning_rate": 3.236889965033346e-05, "loss": 2.0646, "step": 12186500 }, { "epoch": 35.28, "learning_rate": 3.236817600268618e-05, "loss": 2.074, "step": 12187000 }, { "epoch": 35.28, "learning_rate": 3.2367453802334195e-05, "loss": 2.0673, "step": 12187500 }, { "epoch": 35.28, "learning_rate": 3.2366730154686924e-05, "loss": 2.0652, "step": 12188000 }, { "epoch": 35.28, "learning_rate": 3.2366006507039646e-05, "loss": 2.0729, "step": 12188500 }, { "epoch": 35.28, "learning_rate": 3.236528430668767e-05, "loss": 2.0495, "step": 12189000 }, { "epoch": 35.28, "learning_rate": 3.236456065904039e-05, "loss": 2.0616, "step": 12189500 }, { "epoch": 35.29, "learning_rate": 3.236383701139311e-05, "loss": 2.0432, "step": 12190000 }, { "epoch": 35.29, "learning_rate": 3.2363113363745835e-05, "loss": 2.0543, "step": 12190500 }, { "epoch": 35.29, "learning_rate": 3.236238971609856e-05, "loss": 2.0598, "step": 12191000 }, { "epoch": 35.29, "learning_rate": 3.236166606845128e-05, "loss": 2.0285, "step": 12191500 }, { "epoch": 35.29, "learning_rate": 3.2360942420804e-05, "loss": 2.0781, "step": 12192000 }, { "epoch": 35.29, "learning_rate": 3.236022022045202e-05, "loss": 2.0553, "step": 12192500 }, { "epoch": 35.29, "learning_rate": 3.235949657280474e-05, "loss": 2.1, "step": 12193000 }, { "epoch": 35.3, "learning_rate": 3.235877292515747e-05, "loss": 2.0696, "step": 12193500 }, { "epoch": 35.3, "learning_rate": 3.235804927751019e-05, "loss": 2.0732, "step": 12194000 }, { "epoch": 35.3, "learning_rate": 3.235732562986291e-05, "loss": 2.0585, "step": 12194500 }, { "epoch": 35.3, "learning_rate": 3.235660198221564e-05, "loss": 2.078, "step": 12195000 }, { "epoch": 35.3, "learning_rate": 3.2355878334568364e-05, "loss": 2.0572, "step": 12195500 }, { "epoch": 35.3, "learning_rate": 3.2355154686921086e-05, "loss": 2.0912, "step": 12196000 }, { "epoch": 35.3, "learning_rate": 3.235443103927381e-05, "loss": 2.0968, "step": 12196500 }, { "epoch": 35.31, "learning_rate": 3.2353708838921824e-05, "loss": 2.0851, "step": 12197000 }, { "epoch": 35.31, "learning_rate": 3.2352986638569847e-05, "loss": 2.0791, "step": 12197500 }, { "epoch": 35.31, "learning_rate": 3.235226299092257e-05, "loss": 2.0744, "step": 12198000 }, { "epoch": 35.31, "learning_rate": 3.235153934327529e-05, "loss": 2.0541, "step": 12198500 }, { "epoch": 35.31, "learning_rate": 3.235081569562801e-05, "loss": 2.0565, "step": 12199000 }, { "epoch": 35.31, "learning_rate": 3.235009349527603e-05, "loss": 2.0762, "step": 12199500 }, { "epoch": 35.31, "learning_rate": 3.234936984762875e-05, "loss": 2.0674, "step": 12200000 }, { "epoch": 35.32, "learning_rate": 3.234864619998147e-05, "loss": 2.0827, "step": 12200500 }, { "epoch": 35.32, "learning_rate": 3.2347922552334196e-05, "loss": 2.0736, "step": 12201000 }, { "epoch": 35.32, "learning_rate": 3.234720035198222e-05, "loss": 2.0705, "step": 12201500 }, { "epoch": 35.32, "learning_rate": 3.234647670433494e-05, "loss": 2.0769, "step": 12202000 }, { "epoch": 35.32, "learning_rate": 3.234575305668766e-05, "loss": 2.0783, "step": 12202500 }, { "epoch": 35.32, "learning_rate": 3.234502940904039e-05, "loss": 2.0699, "step": 12203000 }, { "epoch": 35.32, "learning_rate": 3.2344305761393114e-05, "loss": 2.0631, "step": 12203500 }, { "epoch": 35.33, "learning_rate": 3.234358500833642e-05, "loss": 2.0913, "step": 12204000 }, { "epoch": 35.33, "learning_rate": 3.2342861360689145e-05, "loss": 2.0568, "step": 12204500 }, { "epoch": 35.33, "learning_rate": 3.234213771304187e-05, "loss": 2.0799, "step": 12205000 }, { "epoch": 35.33, "learning_rate": 3.2341414065394596e-05, "loss": 2.0778, "step": 12205500 }, { "epoch": 35.33, "learning_rate": 3.234069041774732e-05, "loss": 2.0746, "step": 12206000 }, { "epoch": 35.33, "learning_rate": 3.233996677010004e-05, "loss": 2.088, "step": 12206500 }, { "epoch": 35.33, "learning_rate": 3.2339244569748056e-05, "loss": 2.0757, "step": 12207000 }, { "epoch": 35.34, "learning_rate": 3.233852092210078e-05, "loss": 2.0521, "step": 12207500 }, { "epoch": 35.34, "learning_rate": 3.23377972744535e-05, "loss": 2.0711, "step": 12208000 }, { "epoch": 35.34, "learning_rate": 3.233707362680622e-05, "loss": 2.0797, "step": 12208500 }, { "epoch": 35.34, "learning_rate": 3.2336349979158945e-05, "loss": 2.0569, "step": 12209000 }, { "epoch": 35.34, "learning_rate": 3.233562633151167e-05, "loss": 2.0608, "step": 12209500 }, { "epoch": 35.34, "learning_rate": 3.2334902683864396e-05, "loss": 2.0726, "step": 12210000 }, { "epoch": 35.34, "learning_rate": 3.233417903621712e-05, "loss": 2.037, "step": 12210500 }, { "epoch": 35.35, "learning_rate": 3.233345538856985e-05, "loss": 2.0704, "step": 12211000 }, { "epoch": 35.35, "learning_rate": 3.233273174092257e-05, "loss": 2.0634, "step": 12211500 }, { "epoch": 35.35, "learning_rate": 3.233200809327529e-05, "loss": 2.0502, "step": 12212000 }, { "epoch": 35.35, "learning_rate": 3.2331284445628014e-05, "loss": 2.1014, "step": 12212500 }, { "epoch": 35.35, "learning_rate": 3.2330560797980736e-05, "loss": 2.0574, "step": 12213000 }, { "epoch": 35.35, "learning_rate": 3.232983715033346e-05, "loss": 2.0642, "step": 12213500 }, { "epoch": 35.35, "learning_rate": 3.232911350268618e-05, "loss": 2.0668, "step": 12214000 }, { "epoch": 35.36, "learning_rate": 3.23283898550389e-05, "loss": 2.063, "step": 12214500 }, { "epoch": 35.36, "learning_rate": 3.232766910198222e-05, "loss": 2.0976, "step": 12215000 }, { "epoch": 35.36, "learning_rate": 3.232694545433494e-05, "loss": 2.0702, "step": 12215500 }, { "epoch": 35.36, "learning_rate": 3.2326223253982956e-05, "loss": 2.0849, "step": 12216000 }, { "epoch": 35.36, "learning_rate": 3.232549960633568e-05, "loss": 2.0877, "step": 12216500 }, { "epoch": 35.36, "learning_rate": 3.23247759586884e-05, "loss": 2.0614, "step": 12217000 }, { "epoch": 35.36, "learning_rate": 3.232405231104112e-05, "loss": 2.0972, "step": 12217500 }, { "epoch": 35.37, "learning_rate": 3.232332866339385e-05, "loss": 2.1018, "step": 12218000 }, { "epoch": 35.37, "learning_rate": 3.2322605015746574e-05, "loss": 2.033, "step": 12218500 }, { "epoch": 35.37, "learning_rate": 3.23218828153946e-05, "loss": 2.0856, "step": 12219000 }, { "epoch": 35.37, "learning_rate": 3.232115916774732e-05, "loss": 2.0785, "step": 12219500 }, { "epoch": 35.37, "learning_rate": 3.232043552010004e-05, "loss": 2.0803, "step": 12220000 }, { "epoch": 35.37, "learning_rate": 3.2319711872452763e-05, "loss": 2.0695, "step": 12220500 }, { "epoch": 35.37, "learning_rate": 3.2318988224805486e-05, "loss": 2.0613, "step": 12221000 }, { "epoch": 35.38, "learning_rate": 3.231826457715821e-05, "loss": 2.0619, "step": 12221500 }, { "epoch": 35.38, "learning_rate": 3.231754092951093e-05, "loss": 2.0543, "step": 12222000 }, { "epoch": 35.38, "learning_rate": 3.2316818729158946e-05, "loss": 2.0491, "step": 12222500 }, { "epoch": 35.38, "learning_rate": 3.2316095081511675e-05, "loss": 2.072, "step": 12223000 }, { "epoch": 35.38, "learning_rate": 3.23153714338644e-05, "loss": 2.0871, "step": 12223500 }, { "epoch": 35.38, "learning_rate": 3.231464778621712e-05, "loss": 2.0799, "step": 12224000 }, { "epoch": 35.38, "learning_rate": 3.231392413856984e-05, "loss": 2.0892, "step": 12224500 }, { "epoch": 35.39, "learning_rate": 3.2313200490922564e-05, "loss": 2.0758, "step": 12225000 }, { "epoch": 35.39, "learning_rate": 3.2312478290570586e-05, "loss": 2.0857, "step": 12225500 }, { "epoch": 35.39, "learning_rate": 3.231175464292331e-05, "loss": 2.0655, "step": 12226000 }, { "epoch": 35.39, "learning_rate": 3.231103099527603e-05, "loss": 2.0632, "step": 12226500 }, { "epoch": 35.39, "learning_rate": 3.231030734762875e-05, "loss": 2.0652, "step": 12227000 }, { "epoch": 35.39, "learning_rate": 3.2309583699981475e-05, "loss": 2.0707, "step": 12227500 }, { "epoch": 35.4, "learning_rate": 3.23088600523342e-05, "loss": 2.0817, "step": 12228000 }, { "epoch": 35.4, "learning_rate": 3.2308136404686926e-05, "loss": 2.0837, "step": 12228500 }, { "epoch": 35.4, "learning_rate": 3.230741275703965e-05, "loss": 2.0564, "step": 12229000 }, { "epoch": 35.4, "learning_rate": 3.230668910939237e-05, "loss": 2.0624, "step": 12229500 }, { "epoch": 35.4, "learning_rate": 3.230596546174509e-05, "loss": 2.0808, "step": 12230000 }, { "epoch": 35.4, "learning_rate": 3.2305241814097815e-05, "loss": 2.0792, "step": 12230500 }, { "epoch": 35.4, "learning_rate": 3.230451961374583e-05, "loss": 2.0936, "step": 12231000 }, { "epoch": 35.41, "learning_rate": 3.230379596609855e-05, "loss": 2.0718, "step": 12231500 }, { "epoch": 35.41, "learning_rate": 3.2303072318451275e-05, "loss": 2.0643, "step": 12232000 }, { "epoch": 35.41, "learning_rate": 3.2302348670804004e-05, "loss": 2.1005, "step": 12232500 }, { "epoch": 35.41, "learning_rate": 3.2301625023156726e-05, "loss": 2.062, "step": 12233000 }, { "epoch": 35.41, "learning_rate": 3.230090282280475e-05, "loss": 2.0647, "step": 12233500 }, { "epoch": 35.41, "learning_rate": 3.230017917515747e-05, "loss": 2.0661, "step": 12234000 }, { "epoch": 35.41, "learning_rate": 3.229945552751019e-05, "loss": 2.0802, "step": 12234500 }, { "epoch": 35.42, "learning_rate": 3.2298731879862915e-05, "loss": 2.0832, "step": 12235000 }, { "epoch": 35.42, "learning_rate": 3.229800967951093e-05, "loss": 2.0676, "step": 12235500 }, { "epoch": 35.42, "learning_rate": 3.229728603186365e-05, "loss": 2.0479, "step": 12236000 }, { "epoch": 35.42, "learning_rate": 3.2296562384216375e-05, "loss": 2.0753, "step": 12236500 }, { "epoch": 35.42, "learning_rate": 3.22958387365691e-05, "loss": 2.0754, "step": 12237000 }, { "epoch": 35.42, "learning_rate": 3.229511508892183e-05, "loss": 2.0603, "step": 12237500 }, { "epoch": 35.42, "learning_rate": 3.229439144127455e-05, "loss": 2.0779, "step": 12238000 }, { "epoch": 35.43, "learning_rate": 3.229366779362727e-05, "loss": 2.0482, "step": 12238500 }, { "epoch": 35.43, "learning_rate": 3.229294414597999e-05, "loss": 2.0655, "step": 12239000 }, { "epoch": 35.43, "learning_rate": 3.229222194562801e-05, "loss": 2.0695, "step": 12239500 }, { "epoch": 35.43, "learning_rate": 3.229149829798074e-05, "loss": 2.1075, "step": 12240000 }, { "epoch": 35.43, "learning_rate": 3.229077465033346e-05, "loss": 2.0964, "step": 12240500 }, { "epoch": 35.43, "learning_rate": 3.229005100268618e-05, "loss": 2.0552, "step": 12241000 }, { "epoch": 35.43, "learning_rate": 3.2289327355038905e-05, "loss": 2.0803, "step": 12241500 }, { "epoch": 35.44, "learning_rate": 3.228860370739163e-05, "loss": 2.0712, "step": 12242000 }, { "epoch": 35.44, "learning_rate": 3.228788005974435e-05, "loss": 2.0827, "step": 12242500 }, { "epoch": 35.44, "learning_rate": 3.228715641209708e-05, "loss": 2.0821, "step": 12243000 }, { "epoch": 35.44, "learning_rate": 3.22864327644498e-05, "loss": 2.0582, "step": 12243500 }, { "epoch": 35.44, "learning_rate": 3.228570911680252e-05, "loss": 2.0728, "step": 12244000 }, { "epoch": 35.44, "learning_rate": 3.228498691645054e-05, "loss": 2.0619, "step": 12244500 }, { "epoch": 35.44, "learning_rate": 3.228426326880326e-05, "loss": 2.0783, "step": 12245000 }, { "epoch": 35.45, "learning_rate": 3.228353962115598e-05, "loss": 2.056, "step": 12245500 }, { "epoch": 35.45, "learning_rate": 3.2282815973508705e-05, "loss": 2.099, "step": 12246000 }, { "epoch": 35.45, "learning_rate": 3.228209232586143e-05, "loss": 2.0765, "step": 12246500 }, { "epoch": 35.45, "learning_rate": 3.228136867821415e-05, "loss": 2.0844, "step": 12247000 }, { "epoch": 35.45, "learning_rate": 3.228064503056688e-05, "loss": 2.0828, "step": 12247500 }, { "epoch": 35.45, "learning_rate": 3.22799213829196e-05, "loss": 2.0511, "step": 12248000 }, { "epoch": 35.45, "learning_rate": 3.227919918256762e-05, "loss": 2.0754, "step": 12248500 }, { "epoch": 35.46, "learning_rate": 3.2278475534920345e-05, "loss": 2.0722, "step": 12249000 }, { "epoch": 35.46, "learning_rate": 3.227775188727307e-05, "loss": 2.0702, "step": 12249500 }, { "epoch": 35.46, "learning_rate": 3.227702823962579e-05, "loss": 2.0926, "step": 12250000 }, { "epoch": 35.46, "learning_rate": 3.2276306039273805e-05, "loss": 2.0378, "step": 12250500 }, { "epoch": 35.46, "learning_rate": 3.227558239162653e-05, "loss": 2.0549, "step": 12251000 }, { "epoch": 35.46, "learning_rate": 3.227486019127455e-05, "loss": 2.048, "step": 12251500 }, { "epoch": 35.46, "learning_rate": 3.227413654362727e-05, "loss": 2.093, "step": 12252000 }, { "epoch": 35.47, "learning_rate": 3.227341434327529e-05, "loss": 2.0532, "step": 12252500 }, { "epoch": 35.47, "learning_rate": 3.227269069562801e-05, "loss": 2.064, "step": 12253000 }, { "epoch": 35.47, "learning_rate": 3.227196704798073e-05, "loss": 2.0835, "step": 12253500 }, { "epoch": 35.47, "learning_rate": 3.2271243400333454e-05, "loss": 2.0598, "step": 12254000 }, { "epoch": 35.47, "learning_rate": 3.2270519752686176e-05, "loss": 2.0532, "step": 12254500 }, { "epoch": 35.47, "learning_rate": 3.2269796105038905e-05, "loss": 2.0699, "step": 12255000 }, { "epoch": 35.47, "learning_rate": 3.226907245739163e-05, "loss": 2.0665, "step": 12255500 }, { "epoch": 35.48, "learning_rate": 3.226834880974435e-05, "loss": 2.0671, "step": 12256000 }, { "epoch": 35.48, "learning_rate": 3.226762660939237e-05, "loss": 2.0706, "step": 12256500 }, { "epoch": 35.48, "learning_rate": 3.2266902961745094e-05, "loss": 2.0848, "step": 12257000 }, { "epoch": 35.48, "learning_rate": 3.226617931409782e-05, "loss": 2.0727, "step": 12257500 }, { "epoch": 35.48, "learning_rate": 3.226545566645054e-05, "loss": 2.0737, "step": 12258000 }, { "epoch": 35.48, "learning_rate": 3.226473201880326e-05, "loss": 2.0648, "step": 12258500 }, { "epoch": 35.48, "learning_rate": 3.226400837115598e-05, "loss": 2.082, "step": 12259000 }, { "epoch": 35.49, "learning_rate": 3.2263284723508706e-05, "loss": 2.0811, "step": 12259500 }, { "epoch": 35.49, "learning_rate": 3.226256252315673e-05, "loss": 2.0753, "step": 12260000 }, { "epoch": 35.49, "learning_rate": 3.226183887550945e-05, "loss": 2.0901, "step": 12260500 }, { "epoch": 35.49, "learning_rate": 3.226111522786217e-05, "loss": 2.0706, "step": 12261000 }, { "epoch": 35.49, "learning_rate": 3.2260391580214895e-05, "loss": 2.0491, "step": 12261500 }, { "epoch": 35.49, "learning_rate": 3.225966793256762e-05, "loss": 2.0731, "step": 12262000 }, { "epoch": 35.49, "learning_rate": 3.2258944284920346e-05, "loss": 2.0785, "step": 12262500 }, { "epoch": 35.5, "learning_rate": 3.225822063727307e-05, "loss": 2.0675, "step": 12263000 }, { "epoch": 35.5, "learning_rate": 3.2257498436921084e-05, "loss": 2.0879, "step": 12263500 }, { "epoch": 35.5, "learning_rate": 3.2256774789273806e-05, "loss": 2.0721, "step": 12264000 }, { "epoch": 35.5, "learning_rate": 3.225605114162653e-05, "loss": 2.0678, "step": 12264500 }, { "epoch": 35.5, "learning_rate": 3.225532749397926e-05, "loss": 2.0605, "step": 12265000 }, { "epoch": 35.5, "learning_rate": 3.225460384633198e-05, "loss": 2.0596, "step": 12265500 }, { "epoch": 35.51, "learning_rate": 3.22538801986847e-05, "loss": 2.076, "step": 12266000 }, { "epoch": 35.51, "learning_rate": 3.2253156551037424e-05, "loss": 2.0753, "step": 12266500 }, { "epoch": 35.51, "learning_rate": 3.2252432903390146e-05, "loss": 2.048, "step": 12267000 }, { "epoch": 35.51, "learning_rate": 3.225170925574287e-05, "loss": 2.0902, "step": 12267500 }, { "epoch": 35.51, "learning_rate": 3.225098560809559e-05, "loss": 2.0803, "step": 12268000 }, { "epoch": 35.51, "learning_rate": 3.225026196044831e-05, "loss": 2.0709, "step": 12268500 }, { "epoch": 35.51, "learning_rate": 3.224953976009633e-05, "loss": 2.0868, "step": 12269000 }, { "epoch": 35.52, "learning_rate": 3.224881611244906e-05, "loss": 2.0854, "step": 12269500 }, { "epoch": 35.52, "learning_rate": 3.224809246480178e-05, "loss": 2.0749, "step": 12270000 }, { "epoch": 35.52, "learning_rate": 3.22473702644498e-05, "loss": 2.0713, "step": 12270500 }, { "epoch": 35.52, "learning_rate": 3.2246646616802524e-05, "loss": 2.0581, "step": 12271000 }, { "epoch": 35.52, "learning_rate": 3.2245922969155246e-05, "loss": 2.0842, "step": 12271500 }, { "epoch": 35.52, "learning_rate": 3.224519932150797e-05, "loss": 2.0859, "step": 12272000 }, { "epoch": 35.52, "learning_rate": 3.224447567386069e-05, "loss": 2.0401, "step": 12272500 }, { "epoch": 35.53, "learning_rate": 3.224375202621341e-05, "loss": 2.0788, "step": 12273000 }, { "epoch": 35.53, "learning_rate": 3.2243028378566135e-05, "loss": 2.0872, "step": 12273500 }, { "epoch": 35.53, "learning_rate": 3.224230473091886e-05, "loss": 2.0843, "step": 12274000 }, { "epoch": 35.53, "learning_rate": 3.224158108327158e-05, "loss": 2.075, "step": 12274500 }, { "epoch": 35.53, "learning_rate": 3.22408588829196e-05, "loss": 2.0868, "step": 12275000 }, { "epoch": 35.53, "learning_rate": 3.2240135235272324e-05, "loss": 2.0877, "step": 12275500 }, { "epoch": 35.53, "learning_rate": 3.2239411587625047e-05, "loss": 2.0911, "step": 12276000 }, { "epoch": 35.54, "learning_rate": 3.223868938727306e-05, "loss": 2.085, "step": 12276500 }, { "epoch": 35.54, "learning_rate": 3.2237965739625784e-05, "loss": 2.1145, "step": 12277000 }, { "epoch": 35.54, "learning_rate": 3.2237242091978513e-05, "loss": 2.0601, "step": 12277500 }, { "epoch": 35.54, "learning_rate": 3.2236518444331236e-05, "loss": 2.0697, "step": 12278000 }, { "epoch": 35.54, "learning_rate": 3.223579624397926e-05, "loss": 2.0831, "step": 12278500 }, { "epoch": 35.54, "learning_rate": 3.223507259633198e-05, "loss": 2.0835, "step": 12279000 }, { "epoch": 35.54, "learning_rate": 3.22343489486847e-05, "loss": 2.0755, "step": 12279500 }, { "epoch": 35.55, "learning_rate": 3.2233625301037425e-05, "loss": 2.0889, "step": 12280000 }, { "epoch": 35.55, "learning_rate": 3.223290165339015e-05, "loss": 2.072, "step": 12280500 }, { "epoch": 35.55, "learning_rate": 3.223217800574287e-05, "loss": 2.0646, "step": 12281000 }, { "epoch": 35.55, "learning_rate": 3.223145435809559e-05, "loss": 2.0966, "step": 12281500 }, { "epoch": 35.55, "learning_rate": 3.2230730710448314e-05, "loss": 2.0992, "step": 12282000 }, { "epoch": 35.55, "learning_rate": 3.2230007062801036e-05, "loss": 2.0593, "step": 12282500 }, { "epoch": 35.55, "learning_rate": 3.222928341515376e-05, "loss": 2.069, "step": 12283000 }, { "epoch": 35.56, "learning_rate": 3.222856121480178e-05, "loss": 2.0557, "step": 12283500 }, { "epoch": 35.56, "learning_rate": 3.22278375671545e-05, "loss": 2.0895, "step": 12284000 }, { "epoch": 35.56, "learning_rate": 3.222711391950723e-05, "loss": 2.0716, "step": 12284500 }, { "epoch": 35.56, "learning_rate": 3.2226390271859954e-05, "loss": 2.0772, "step": 12285000 }, { "epoch": 35.56, "learning_rate": 3.2225666624212676e-05, "loss": 2.0702, "step": 12285500 }, { "epoch": 35.56, "learning_rate": 3.22249429765654e-05, "loss": 2.0564, "step": 12286000 }, { "epoch": 35.56, "learning_rate": 3.222421932891812e-05, "loss": 2.0736, "step": 12286500 }, { "epoch": 35.57, "learning_rate": 3.2223497128566136e-05, "loss": 2.0926, "step": 12287000 }, { "epoch": 35.57, "learning_rate": 3.222277348091886e-05, "loss": 2.0628, "step": 12287500 }, { "epoch": 35.57, "learning_rate": 3.222204983327158e-05, "loss": 2.0888, "step": 12288000 }, { "epoch": 35.57, "learning_rate": 3.222132618562431e-05, "loss": 2.0615, "step": 12288500 }, { "epoch": 35.57, "learning_rate": 3.222060253797703e-05, "loss": 2.0765, "step": 12289000 }, { "epoch": 35.57, "learning_rate": 3.2219878890329754e-05, "loss": 2.0754, "step": 12289500 }, { "epoch": 35.57, "learning_rate": 3.2219155242682476e-05, "loss": 2.0803, "step": 12290000 }, { "epoch": 35.58, "learning_rate": 3.22184315950352e-05, "loss": 2.0889, "step": 12290500 }, { "epoch": 35.58, "learning_rate": 3.221770794738792e-05, "loss": 2.069, "step": 12291000 }, { "epoch": 35.58, "learning_rate": 3.221698429974064e-05, "loss": 2.0823, "step": 12291500 }, { "epoch": 35.58, "learning_rate": 3.221626065209337e-05, "loss": 2.0598, "step": 12292000 }, { "epoch": 35.58, "learning_rate": 3.221553845174139e-05, "loss": 2.0813, "step": 12292500 }, { "epoch": 35.58, "learning_rate": 3.221481480409411e-05, "loss": 2.0689, "step": 12293000 }, { "epoch": 35.58, "learning_rate": 3.221409115644683e-05, "loss": 2.0732, "step": 12293500 }, { "epoch": 35.59, "learning_rate": 3.221336750879956e-05, "loss": 2.0603, "step": 12294000 }, { "epoch": 35.59, "learning_rate": 3.221264386115228e-05, "loss": 2.0729, "step": 12294500 }, { "epoch": 35.59, "learning_rate": 3.22119216608003e-05, "loss": 2.072, "step": 12295000 }, { "epoch": 35.59, "learning_rate": 3.221119801315302e-05, "loss": 2.0608, "step": 12295500 }, { "epoch": 35.59, "learning_rate": 3.221047436550574e-05, "loss": 2.0845, "step": 12296000 }, { "epoch": 35.59, "learning_rate": 3.2209750717858466e-05, "loss": 2.061, "step": 12296500 }, { "epoch": 35.59, "learning_rate": 3.220902707021119e-05, "loss": 2.0877, "step": 12297000 }, { "epoch": 35.6, "learning_rate": 3.220830342256391e-05, "loss": 2.0725, "step": 12297500 }, { "epoch": 35.6, "learning_rate": 3.220757977491663e-05, "loss": 2.0828, "step": 12298000 }, { "epoch": 35.6, "learning_rate": 3.220685612726936e-05, "loss": 2.058, "step": 12298500 }, { "epoch": 35.6, "learning_rate": 3.2206132479622083e-05, "loss": 2.0852, "step": 12299000 }, { "epoch": 35.6, "learning_rate": 3.220540883197481e-05, "loss": 2.036, "step": 12299500 }, { "epoch": 35.6, "learning_rate": 3.2204685184327535e-05, "loss": 2.0623, "step": 12300000 }, { "epoch": 35.6, "learning_rate": 3.220396298397555e-05, "loss": 2.0668, "step": 12300500 }, { "epoch": 35.61, "learning_rate": 3.220323933632827e-05, "loss": 2.0614, "step": 12301000 }, { "epoch": 35.61, "learning_rate": 3.2202515688680995e-05, "loss": 2.0814, "step": 12301500 }, { "epoch": 35.61, "learning_rate": 3.220179204103372e-05, "loss": 2.0684, "step": 12302000 }, { "epoch": 35.61, "learning_rate": 3.220106839338644e-05, "loss": 2.078, "step": 12302500 }, { "epoch": 35.61, "learning_rate": 3.220034474573916e-05, "loss": 2.0873, "step": 12303000 }, { "epoch": 35.61, "learning_rate": 3.2199621098091884e-05, "loss": 2.0888, "step": 12303500 }, { "epoch": 35.62, "learning_rate": 3.219889745044461e-05, "loss": 2.0935, "step": 12304000 }, { "epoch": 35.62, "learning_rate": 3.219817525009263e-05, "loss": 2.0724, "step": 12304500 }, { "epoch": 35.62, "learning_rate": 3.2197453049740644e-05, "loss": 2.0775, "step": 12305000 }, { "epoch": 35.62, "learning_rate": 3.219673084938866e-05, "loss": 2.0745, "step": 12305500 }, { "epoch": 35.62, "learning_rate": 3.219600720174138e-05, "loss": 2.0632, "step": 12306000 }, { "epoch": 35.62, "learning_rate": 3.219528355409411e-05, "loss": 2.0734, "step": 12306500 }, { "epoch": 35.62, "learning_rate": 3.219455990644683e-05, "loss": 2.0734, "step": 12307000 }, { "epoch": 35.63, "learning_rate": 3.219383625879956e-05, "loss": 2.0821, "step": 12307500 }, { "epoch": 35.63, "learning_rate": 3.2193112611152284e-05, "loss": 2.0717, "step": 12308000 }, { "epoch": 35.63, "learning_rate": 3.2192388963505006e-05, "loss": 2.0798, "step": 12308500 }, { "epoch": 35.63, "learning_rate": 3.219166531585773e-05, "loss": 2.0824, "step": 12309000 }, { "epoch": 35.63, "learning_rate": 3.219094166821045e-05, "loss": 2.0678, "step": 12309500 }, { "epoch": 35.63, "learning_rate": 3.219021802056317e-05, "loss": 2.0835, "step": 12310000 }, { "epoch": 35.63, "learning_rate": 3.2189494372915895e-05, "loss": 2.0984, "step": 12310500 }, { "epoch": 35.64, "learning_rate": 3.218877072526862e-05, "loss": 2.0645, "step": 12311000 }, { "epoch": 35.64, "learning_rate": 3.218804852491663e-05, "loss": 2.0843, "step": 12311500 }, { "epoch": 35.64, "learning_rate": 3.2187326324564655e-05, "loss": 2.0683, "step": 12312000 }, { "epoch": 35.64, "learning_rate": 3.218660267691738e-05, "loss": 2.0697, "step": 12312500 }, { "epoch": 35.64, "learning_rate": 3.21858790292701e-05, "loss": 2.0849, "step": 12313000 }, { "epoch": 35.64, "learning_rate": 3.218515538162282e-05, "loss": 2.0767, "step": 12313500 }, { "epoch": 35.64, "learning_rate": 3.218443173397555e-05, "loss": 2.0676, "step": 12314000 }, { "epoch": 35.65, "learning_rate": 3.218370808632827e-05, "loss": 2.0795, "step": 12314500 }, { "epoch": 35.65, "learning_rate": 3.218298588597629e-05, "loss": 2.0858, "step": 12315000 }, { "epoch": 35.65, "learning_rate": 3.218226223832901e-05, "loss": 2.0429, "step": 12315500 }, { "epoch": 35.65, "learning_rate": 3.218153859068174e-05, "loss": 2.0856, "step": 12316000 }, { "epoch": 35.65, "learning_rate": 3.218081494303446e-05, "loss": 2.0565, "step": 12316500 }, { "epoch": 35.65, "learning_rate": 3.218009274268248e-05, "loss": 2.0792, "step": 12317000 }, { "epoch": 35.65, "learning_rate": 3.2179370542330493e-05, "loss": 2.0675, "step": 12317500 }, { "epoch": 35.66, "learning_rate": 3.2178646894683216e-05, "loss": 2.0846, "step": 12318000 }, { "epoch": 35.66, "learning_rate": 3.217792324703594e-05, "loss": 2.0871, "step": 12318500 }, { "epoch": 35.66, "learning_rate": 3.217719959938866e-05, "loss": 2.0927, "step": 12319000 }, { "epoch": 35.66, "learning_rate": 3.217647595174139e-05, "loss": 2.0976, "step": 12319500 }, { "epoch": 35.66, "learning_rate": 3.217575230409411e-05, "loss": 2.069, "step": 12320000 }, { "epoch": 35.66, "learning_rate": 3.2175028656446834e-05, "loss": 2.069, "step": 12320500 }, { "epoch": 35.66, "learning_rate": 3.2174305008799556e-05, "loss": 2.0692, "step": 12321000 }, { "epoch": 35.67, "learning_rate": 3.217358136115228e-05, "loss": 2.0808, "step": 12321500 }, { "epoch": 35.67, "learning_rate": 3.217285771350501e-05, "loss": 2.0601, "step": 12322000 }, { "epoch": 35.67, "learning_rate": 3.217213406585773e-05, "loss": 2.0905, "step": 12322500 }, { "epoch": 35.67, "learning_rate": 3.217141041821045e-05, "loss": 2.0784, "step": 12323000 }, { "epoch": 35.67, "learning_rate": 3.2170686770563174e-05, "loss": 2.0639, "step": 12323500 }, { "epoch": 35.67, "learning_rate": 3.216996457021119e-05, "loss": 2.0925, "step": 12324000 }, { "epoch": 35.67, "learning_rate": 3.216924236985921e-05, "loss": 2.0599, "step": 12324500 }, { "epoch": 35.68, "learning_rate": 3.2168518722211934e-05, "loss": 2.0615, "step": 12325000 }, { "epoch": 35.68, "learning_rate": 3.2167795074564656e-05, "loss": 2.0804, "step": 12325500 }, { "epoch": 35.68, "learning_rate": 3.216707142691738e-05, "loss": 2.0625, "step": 12326000 }, { "epoch": 35.68, "learning_rate": 3.21663477792701e-05, "loss": 2.08, "step": 12326500 }, { "epoch": 35.68, "learning_rate": 3.216562413162282e-05, "loss": 2.0676, "step": 12327000 }, { "epoch": 35.68, "learning_rate": 3.2164900483975545e-05, "loss": 2.0551, "step": 12327500 }, { "epoch": 35.68, "learning_rate": 3.216417683632827e-05, "loss": 2.0876, "step": 12328000 }, { "epoch": 35.69, "learning_rate": 3.216345318868099e-05, "loss": 2.0757, "step": 12328500 }, { "epoch": 35.69, "learning_rate": 3.216272954103372e-05, "loss": 2.0543, "step": 12329000 }, { "epoch": 35.69, "learning_rate": 3.216200734068174e-05, "loss": 2.0827, "step": 12329500 }, { "epoch": 35.69, "learning_rate": 3.216128369303446e-05, "loss": 2.0829, "step": 12330000 }, { "epoch": 35.69, "learning_rate": 3.216056149268248e-05, "loss": 2.0717, "step": 12330500 }, { "epoch": 35.69, "learning_rate": 3.21598378450352e-05, "loss": 2.0608, "step": 12331000 }, { "epoch": 35.69, "learning_rate": 3.215911419738792e-05, "loss": 2.0463, "step": 12331500 }, { "epoch": 35.7, "learning_rate": 3.2158390549740645e-05, "loss": 2.0652, "step": 12332000 }, { "epoch": 35.7, "learning_rate": 3.215766690209337e-05, "loss": 2.0926, "step": 12332500 }, { "epoch": 35.7, "learning_rate": 3.215694325444609e-05, "loss": 2.0827, "step": 12333000 }, { "epoch": 35.7, "learning_rate": 3.215621960679881e-05, "loss": 2.0611, "step": 12333500 }, { "epoch": 35.7, "learning_rate": 3.215549595915154e-05, "loss": 2.0753, "step": 12334000 }, { "epoch": 35.7, "learning_rate": 3.215477375879956e-05, "loss": 2.0613, "step": 12334500 }, { "epoch": 35.7, "learning_rate": 3.215405011115228e-05, "loss": 2.0682, "step": 12335000 }, { "epoch": 35.71, "learning_rate": 3.2153326463505e-05, "loss": 2.0723, "step": 12335500 }, { "epoch": 35.71, "learning_rate": 3.215260281585772e-05, "loss": 2.0738, "step": 12336000 }, { "epoch": 35.71, "learning_rate": 3.215188061550574e-05, "loss": 2.0801, "step": 12336500 }, { "epoch": 35.71, "learning_rate": 3.215115696785847e-05, "loss": 2.0988, "step": 12337000 }, { "epoch": 35.71, "learning_rate": 3.215043332021119e-05, "loss": 2.0863, "step": 12337500 }, { "epoch": 35.71, "learning_rate": 3.214970967256391e-05, "loss": 2.0766, "step": 12338000 }, { "epoch": 35.71, "learning_rate": 3.2148987472211935e-05, "loss": 2.057, "step": 12338500 }, { "epoch": 35.72, "learning_rate": 3.214826382456466e-05, "loss": 2.0669, "step": 12339000 }, { "epoch": 35.72, "learning_rate": 3.214754017691738e-05, "loss": 2.0566, "step": 12339500 }, { "epoch": 35.72, "learning_rate": 3.2146817976565395e-05, "loss": 2.07, "step": 12340000 }, { "epoch": 35.72, "learning_rate": 3.214609432891812e-05, "loss": 2.0699, "step": 12340500 }, { "epoch": 35.72, "learning_rate": 3.214537068127084e-05, "loss": 2.0725, "step": 12341000 }, { "epoch": 35.72, "learning_rate": 3.214464703362357e-05, "loss": 2.0654, "step": 12341500 }, { "epoch": 35.73, "learning_rate": 3.214392338597629e-05, "loss": 2.095, "step": 12342000 }, { "epoch": 35.73, "learning_rate": 3.214319973832901e-05, "loss": 2.0897, "step": 12342500 }, { "epoch": 35.73, "learning_rate": 3.2142476090681735e-05, "loss": 2.0639, "step": 12343000 }, { "epoch": 35.73, "learning_rate": 3.214175244303446e-05, "loss": 2.0747, "step": 12343500 }, { "epoch": 35.73, "learning_rate": 3.214103024268247e-05, "loss": 2.0985, "step": 12344000 }, { "epoch": 35.73, "learning_rate": 3.21403065950352e-05, "loss": 2.0821, "step": 12344500 }, { "epoch": 35.73, "learning_rate": 3.2139582947387924e-05, "loss": 2.0772, "step": 12345000 }, { "epoch": 35.74, "learning_rate": 3.2138859299740646e-05, "loss": 2.0793, "step": 12345500 }, { "epoch": 35.74, "learning_rate": 3.213813565209337e-05, "loss": 2.0757, "step": 12346000 }, { "epoch": 35.74, "learning_rate": 3.213741345174139e-05, "loss": 2.0579, "step": 12346500 }, { "epoch": 35.74, "learning_rate": 3.2136691251389406e-05, "loss": 2.0839, "step": 12347000 }, { "epoch": 35.74, "learning_rate": 3.213596760374213e-05, "loss": 2.0958, "step": 12347500 }, { "epoch": 35.74, "learning_rate": 3.213524395609485e-05, "loss": 2.0923, "step": 12348000 }, { "epoch": 35.74, "learning_rate": 3.213452030844757e-05, "loss": 2.072, "step": 12348500 }, { "epoch": 35.75, "learning_rate": 3.2133796660800295e-05, "loss": 2.0561, "step": 12349000 }, { "epoch": 35.75, "learning_rate": 3.213307301315302e-05, "loss": 2.1017, "step": 12349500 }, { "epoch": 35.75, "learning_rate": 3.213234936550574e-05, "loss": 2.0697, "step": 12350000 }, { "epoch": 35.75, "learning_rate": 3.213162571785847e-05, "loss": 2.0788, "step": 12350500 }, { "epoch": 35.75, "learning_rate": 3.213090207021119e-05, "loss": 2.0646, "step": 12351000 }, { "epoch": 35.75, "learning_rate": 3.213017842256392e-05, "loss": 2.1037, "step": 12351500 }, { "epoch": 35.75, "learning_rate": 3.212945477491664e-05, "loss": 2.0851, "step": 12352000 }, { "epoch": 35.76, "learning_rate": 3.2128731127269364e-05, "loss": 2.0882, "step": 12352500 }, { "epoch": 35.76, "learning_rate": 3.212800747962209e-05, "loss": 2.1017, "step": 12353000 }, { "epoch": 35.76, "learning_rate": 3.212728383197481e-05, "loss": 2.0967, "step": 12353500 }, { "epoch": 35.76, "learning_rate": 3.2126561631622824e-05, "loss": 2.0773, "step": 12354000 }, { "epoch": 35.76, "learning_rate": 3.212583798397555e-05, "loss": 2.0672, "step": 12354500 }, { "epoch": 35.76, "learning_rate": 3.212511433632827e-05, "loss": 2.1051, "step": 12355000 }, { "epoch": 35.76, "learning_rate": 3.212439068868099e-05, "loss": 2.0756, "step": 12355500 }, { "epoch": 35.77, "learning_rate": 3.212366704103372e-05, "loss": 2.0727, "step": 12356000 }, { "epoch": 35.77, "learning_rate": 3.212294339338644e-05, "loss": 2.0646, "step": 12356500 }, { "epoch": 35.77, "learning_rate": 3.212222119303446e-05, "loss": 2.0542, "step": 12357000 }, { "epoch": 35.77, "learning_rate": 3.212149754538718e-05, "loss": 2.0827, "step": 12357500 }, { "epoch": 35.77, "learning_rate": 3.2120775345035196e-05, "loss": 2.0811, "step": 12358000 }, { "epoch": 35.77, "learning_rate": 3.212005169738792e-05, "loss": 2.0549, "step": 12358500 }, { "epoch": 35.77, "learning_rate": 3.211932804974064e-05, "loss": 2.0801, "step": 12359000 }, { "epoch": 35.78, "learning_rate": 3.211860440209337e-05, "loss": 2.0765, "step": 12359500 }, { "epoch": 35.78, "learning_rate": 3.211788220174139e-05, "loss": 2.0914, "step": 12360000 }, { "epoch": 35.78, "learning_rate": 3.2117158554094114e-05, "loss": 2.0775, "step": 12360500 }, { "epoch": 35.78, "learning_rate": 3.2116434906446836e-05, "loss": 2.0919, "step": 12361000 }, { "epoch": 35.78, "learning_rate": 3.211571125879956e-05, "loss": 2.0749, "step": 12361500 }, { "epoch": 35.78, "learning_rate": 3.2114989058447574e-05, "loss": 2.069, "step": 12362000 }, { "epoch": 35.78, "learning_rate": 3.2114265410800296e-05, "loss": 2.0814, "step": 12362500 }, { "epoch": 35.79, "learning_rate": 3.211354176315302e-05, "loss": 2.0694, "step": 12363000 }, { "epoch": 35.79, "learning_rate": 3.211281811550575e-05, "loss": 2.08, "step": 12363500 }, { "epoch": 35.79, "learning_rate": 3.211209446785847e-05, "loss": 2.0777, "step": 12364000 }, { "epoch": 35.79, "learning_rate": 3.211137082021119e-05, "loss": 2.0801, "step": 12364500 }, { "epoch": 35.79, "learning_rate": 3.2110647172563914e-05, "loss": 2.0677, "step": 12365000 }, { "epoch": 35.79, "learning_rate": 3.2109923524916636e-05, "loss": 2.0672, "step": 12365500 }, { "epoch": 35.79, "learning_rate": 3.210919987726936e-05, "loss": 2.0767, "step": 12366000 }, { "epoch": 35.8, "learning_rate": 3.210847622962209e-05, "loss": 2.0748, "step": 12366500 }, { "epoch": 35.8, "learning_rate": 3.210775258197481e-05, "loss": 2.0755, "step": 12367000 }, { "epoch": 35.8, "learning_rate": 3.210702893432753e-05, "loss": 2.0837, "step": 12367500 }, { "epoch": 35.8, "learning_rate": 3.2106305286680254e-05, "loss": 2.0788, "step": 12368000 }, { "epoch": 35.8, "learning_rate": 3.2105581639032976e-05, "loss": 2.0958, "step": 12368500 }, { "epoch": 35.8, "learning_rate": 3.21048579913857e-05, "loss": 2.0801, "step": 12369000 }, { "epoch": 35.8, "learning_rate": 3.210413434373842e-05, "loss": 2.074, "step": 12369500 }, { "epoch": 35.81, "learning_rate": 3.210341069609114e-05, "loss": 2.068, "step": 12370000 }, { "epoch": 35.81, "learning_rate": 3.2102688495739165e-05, "loss": 2.0894, "step": 12370500 }, { "epoch": 35.81, "learning_rate": 3.210196484809189e-05, "loss": 2.0845, "step": 12371000 }, { "epoch": 35.81, "learning_rate": 3.210124120044461e-05, "loss": 2.0939, "step": 12371500 }, { "epoch": 35.81, "learning_rate": 3.210051755279733e-05, "loss": 2.0645, "step": 12372000 }, { "epoch": 35.81, "learning_rate": 3.2099793905150054e-05, "loss": 2.0725, "step": 12372500 }, { "epoch": 35.81, "learning_rate": 3.2099070257502777e-05, "loss": 2.0645, "step": 12373000 }, { "epoch": 35.82, "learning_rate": 3.2098346609855506e-05, "loss": 2.0696, "step": 12373500 }, { "epoch": 35.82, "learning_rate": 3.209762440950352e-05, "loss": 2.0794, "step": 12374000 }, { "epoch": 35.82, "learning_rate": 3.2096902209151543e-05, "loss": 2.0687, "step": 12374500 }, { "epoch": 35.82, "learning_rate": 3.2096178561504266e-05, "loss": 2.0684, "step": 12375000 }, { "epoch": 35.82, "learning_rate": 3.209545491385699e-05, "loss": 2.1013, "step": 12375500 }, { "epoch": 35.82, "learning_rate": 3.209473126620971e-05, "loss": 2.0805, "step": 12376000 }, { "epoch": 35.82, "learning_rate": 3.209400761856243e-05, "loss": 2.0652, "step": 12376500 }, { "epoch": 35.83, "learning_rate": 3.2093283970915155e-05, "loss": 2.078, "step": 12377000 }, { "epoch": 35.83, "learning_rate": 3.209256032326788e-05, "loss": 2.0767, "step": 12377500 }, { "epoch": 35.83, "learning_rate": 3.20918366756206e-05, "loss": 2.0718, "step": 12378000 }, { "epoch": 35.83, "learning_rate": 3.209111302797332e-05, "loss": 2.0854, "step": 12378500 }, { "epoch": 35.83, "learning_rate": 3.2090390827621344e-05, "loss": 2.0548, "step": 12379000 }, { "epoch": 35.83, "learning_rate": 3.2089667179974066e-05, "loss": 2.0751, "step": 12379500 }, { "epoch": 35.84, "learning_rate": 3.208894353232679e-05, "loss": 2.0785, "step": 12380000 }, { "epoch": 35.84, "learning_rate": 3.208821988467951e-05, "loss": 2.0649, "step": 12380500 }, { "epoch": 35.84, "learning_rate": 3.2087497684327526e-05, "loss": 2.06, "step": 12381000 }, { "epoch": 35.84, "learning_rate": 3.2086774036680255e-05, "loss": 2.0807, "step": 12381500 }, { "epoch": 35.84, "learning_rate": 3.208605038903298e-05, "loss": 2.0539, "step": 12382000 }, { "epoch": 35.84, "learning_rate": 3.20853267413857e-05, "loss": 2.0846, "step": 12382500 }, { "epoch": 35.84, "learning_rate": 3.2084605988329015e-05, "loss": 2.0738, "step": 12383000 }, { "epoch": 35.85, "learning_rate": 3.208388234068174e-05, "loss": 2.0586, "step": 12383500 }, { "epoch": 35.85, "learning_rate": 3.208315869303446e-05, "loss": 2.0694, "step": 12384000 }, { "epoch": 35.85, "learning_rate": 3.208243504538718e-05, "loss": 2.0706, "step": 12384500 }, { "epoch": 35.85, "learning_rate": 3.2081711397739904e-05, "loss": 2.0649, "step": 12385000 }, { "epoch": 35.85, "learning_rate": 3.2080987750092626e-05, "loss": 2.0862, "step": 12385500 }, { "epoch": 35.85, "learning_rate": 3.208026410244535e-05, "loss": 2.07, "step": 12386000 }, { "epoch": 35.85, "learning_rate": 3.207954045479807e-05, "loss": 2.0759, "step": 12386500 }, { "epoch": 35.86, "learning_rate": 3.20788168071508e-05, "loss": 2.0763, "step": 12387000 }, { "epoch": 35.86, "learning_rate": 3.207809605409411e-05, "loss": 2.0803, "step": 12387500 }, { "epoch": 35.86, "learning_rate": 3.2077373853742124e-05, "loss": 2.0679, "step": 12388000 }, { "epoch": 35.86, "learning_rate": 3.2076650206094846e-05, "loss": 2.0741, "step": 12388500 }, { "epoch": 35.86, "learning_rate": 3.2075926558447575e-05, "loss": 2.1108, "step": 12389000 }, { "epoch": 35.86, "learning_rate": 3.20752029108003e-05, "loss": 2.0748, "step": 12389500 }, { "epoch": 35.86, "learning_rate": 3.207447926315303e-05, "loss": 2.0829, "step": 12390000 }, { "epoch": 35.87, "learning_rate": 3.207375706280104e-05, "loss": 2.0733, "step": 12390500 }, { "epoch": 35.87, "learning_rate": 3.2073033415153764e-05, "loss": 2.0505, "step": 12391000 }, { "epoch": 35.87, "learning_rate": 3.207230976750649e-05, "loss": 2.0956, "step": 12391500 }, { "epoch": 35.87, "learning_rate": 3.207158611985921e-05, "loss": 2.0625, "step": 12392000 }, { "epoch": 35.87, "learning_rate": 3.207086247221193e-05, "loss": 2.1094, "step": 12392500 }, { "epoch": 35.87, "learning_rate": 3.207013882456465e-05, "loss": 2.0948, "step": 12393000 }, { "epoch": 35.87, "learning_rate": 3.2069415176917376e-05, "loss": 2.0757, "step": 12393500 }, { "epoch": 35.88, "learning_rate": 3.20686915292701e-05, "loss": 2.0879, "step": 12394000 }, { "epoch": 35.88, "learning_rate": 3.206796932891812e-05, "loss": 2.083, "step": 12394500 }, { "epoch": 35.88, "learning_rate": 3.206724568127084e-05, "loss": 2.0923, "step": 12395000 }, { "epoch": 35.88, "learning_rate": 3.2066522033623565e-05, "loss": 2.0571, "step": 12395500 }, { "epoch": 35.88, "learning_rate": 3.206579838597629e-05, "loss": 2.0994, "step": 12396000 }, { "epoch": 35.88, "learning_rate": 3.206507473832901e-05, "loss": 2.0584, "step": 12396500 }, { "epoch": 35.88, "learning_rate": 3.206435109068174e-05, "loss": 2.0532, "step": 12397000 }, { "epoch": 35.89, "learning_rate": 3.206362744303446e-05, "loss": 2.0969, "step": 12397500 }, { "epoch": 35.89, "learning_rate": 3.2062905242682476e-05, "loss": 2.0907, "step": 12398000 }, { "epoch": 35.89, "learning_rate": 3.20621815950352e-05, "loss": 2.0594, "step": 12398500 }, { "epoch": 35.89, "learning_rate": 3.206145794738793e-05, "loss": 2.0663, "step": 12399000 }, { "epoch": 35.89, "learning_rate": 3.206073429974065e-05, "loss": 2.076, "step": 12399500 }, { "epoch": 35.89, "learning_rate": 3.206001065209337e-05, "loss": 2.0678, "step": 12400000 }, { "epoch": 35.89, "learning_rate": 3.2059287004446094e-05, "loss": 2.0838, "step": 12400500 }, { "epoch": 35.9, "learning_rate": 3.2058563356798816e-05, "loss": 2.065, "step": 12401000 }, { "epoch": 35.9, "learning_rate": 3.205784115644683e-05, "loss": 2.0945, "step": 12401500 }, { "epoch": 35.9, "learning_rate": 3.2057117508799554e-05, "loss": 2.0806, "step": 12402000 }, { "epoch": 35.9, "learning_rate": 3.2056393861152276e-05, "loss": 2.0867, "step": 12402500 }, { "epoch": 35.9, "learning_rate": 3.2055670213505e-05, "loss": 2.1111, "step": 12403000 }, { "epoch": 35.9, "learning_rate": 3.205494656585773e-05, "loss": 2.0866, "step": 12403500 }, { "epoch": 35.9, "learning_rate": 3.205422291821045e-05, "loss": 2.0932, "step": 12404000 }, { "epoch": 35.91, "learning_rate": 3.205349927056318e-05, "loss": 2.0781, "step": 12404500 }, { "epoch": 35.91, "learning_rate": 3.20527756229159e-05, "loss": 2.094, "step": 12405000 }, { "epoch": 35.91, "learning_rate": 3.205205197526862e-05, "loss": 2.0925, "step": 12405500 }, { "epoch": 35.91, "learning_rate": 3.2051328327621345e-05, "loss": 2.0854, "step": 12406000 }, { "epoch": 35.91, "learning_rate": 3.205060467997407e-05, "loss": 2.0633, "step": 12406500 }, { "epoch": 35.91, "learning_rate": 3.204988247962208e-05, "loss": 2.0532, "step": 12407000 }, { "epoch": 35.91, "learning_rate": 3.2049158831974805e-05, "loss": 2.0783, "step": 12407500 }, { "epoch": 35.92, "learning_rate": 3.204843518432753e-05, "loss": 2.0743, "step": 12408000 }, { "epoch": 35.92, "learning_rate": 3.204771298397555e-05, "loss": 2.0825, "step": 12408500 }, { "epoch": 35.92, "learning_rate": 3.204698933632827e-05, "loss": 2.0698, "step": 12409000 }, { "epoch": 35.92, "learning_rate": 3.2046265688680994e-05, "loss": 2.0744, "step": 12409500 }, { "epoch": 35.92, "learning_rate": 3.2045542041033717e-05, "loss": 2.081, "step": 12410000 }, { "epoch": 35.92, "learning_rate": 3.204481839338644e-05, "loss": 2.0975, "step": 12410500 }, { "epoch": 35.92, "learning_rate": 3.204409474573916e-05, "loss": 2.0849, "step": 12411000 }, { "epoch": 35.93, "learning_rate": 3.204337254538718e-05, "loss": 2.085, "step": 12411500 }, { "epoch": 35.93, "learning_rate": 3.2042648897739906e-05, "loss": 2.0817, "step": 12412000 }, { "epoch": 35.93, "learning_rate": 3.204192525009263e-05, "loss": 2.0916, "step": 12412500 }, { "epoch": 35.93, "learning_rate": 3.204120160244535e-05, "loss": 2.0827, "step": 12413000 }, { "epoch": 35.93, "learning_rate": 3.204047795479808e-05, "loss": 2.0756, "step": 12413500 }, { "epoch": 35.93, "learning_rate": 3.20397543071508e-05, "loss": 2.0696, "step": 12414000 }, { "epoch": 35.93, "learning_rate": 3.2039030659503524e-05, "loss": 2.0673, "step": 12414500 }, { "epoch": 35.94, "learning_rate": 3.203830845915154e-05, "loss": 2.0982, "step": 12415000 }, { "epoch": 35.94, "learning_rate": 3.203758481150426e-05, "loss": 2.0743, "step": 12415500 }, { "epoch": 35.94, "learning_rate": 3.2036861163856984e-05, "loss": 2.0681, "step": 12416000 }, { "epoch": 35.94, "learning_rate": 3.2036137516209706e-05, "loss": 2.084, "step": 12416500 }, { "epoch": 35.94, "learning_rate": 3.203541386856243e-05, "loss": 2.0789, "step": 12417000 }, { "epoch": 35.94, "learning_rate": 3.203469166821045e-05, "loss": 2.0807, "step": 12417500 }, { "epoch": 35.95, "learning_rate": 3.203396802056317e-05, "loss": 2.0525, "step": 12418000 }, { "epoch": 35.95, "learning_rate": 3.2033244372915895e-05, "loss": 2.0452, "step": 12418500 }, { "epoch": 35.95, "learning_rate": 3.2032520725268624e-05, "loss": 2.0895, "step": 12419000 }, { "epoch": 35.95, "learning_rate": 3.2031797077621346e-05, "loss": 2.0621, "step": 12419500 }, { "epoch": 35.95, "learning_rate": 3.203107342997407e-05, "loss": 2.0865, "step": 12420000 }, { "epoch": 35.95, "learning_rate": 3.203034978232679e-05, "loss": 2.049, "step": 12420500 }, { "epoch": 35.95, "learning_rate": 3.202962613467951e-05, "loss": 2.0984, "step": 12421000 }, { "epoch": 35.96, "learning_rate": 3.202890393432753e-05, "loss": 2.0844, "step": 12421500 }, { "epoch": 35.96, "learning_rate": 3.202818028668025e-05, "loss": 2.0891, "step": 12422000 }, { "epoch": 35.96, "learning_rate": 3.202745663903298e-05, "loss": 2.0873, "step": 12422500 }, { "epoch": 35.96, "learning_rate": 3.2026734438680995e-05, "loss": 2.0764, "step": 12423000 }, { "epoch": 35.96, "learning_rate": 3.202601079103372e-05, "loss": 2.0731, "step": 12423500 }, { "epoch": 35.96, "learning_rate": 3.202528714338644e-05, "loss": 2.0592, "step": 12424000 }, { "epoch": 35.96, "learning_rate": 3.202456349573916e-05, "loss": 2.0657, "step": 12424500 }, { "epoch": 35.97, "learning_rate": 3.2023839848091884e-05, "loss": 2.0917, "step": 12425000 }, { "epoch": 35.97, "learning_rate": 3.2023117647739906e-05, "loss": 2.08, "step": 12425500 }, { "epoch": 35.97, "learning_rate": 3.202239400009263e-05, "loss": 2.0762, "step": 12426000 }, { "epoch": 35.97, "learning_rate": 3.202167035244536e-05, "loss": 2.0708, "step": 12426500 }, { "epoch": 35.97, "learning_rate": 3.202094670479808e-05, "loss": 2.0826, "step": 12427000 }, { "epoch": 35.97, "learning_rate": 3.20202230571508e-05, "loss": 2.0832, "step": 12427500 }, { "epoch": 35.97, "learning_rate": 3.2019499409503524e-05, "loss": 2.0948, "step": 12428000 }, { "epoch": 35.98, "learning_rate": 3.201877720915154e-05, "loss": 2.0785, "step": 12428500 }, { "epoch": 35.98, "learning_rate": 3.201805356150426e-05, "loss": 2.0773, "step": 12429000 }, { "epoch": 35.98, "learning_rate": 3.2017329913856984e-05, "loss": 2.0607, "step": 12429500 }, { "epoch": 35.98, "learning_rate": 3.2016606266209707e-05, "loss": 2.1062, "step": 12430000 }, { "epoch": 35.98, "learning_rate": 3.201588261856243e-05, "loss": 2.0751, "step": 12430500 }, { "epoch": 35.98, "learning_rate": 3.201515897091516e-05, "loss": 2.0714, "step": 12431000 }, { "epoch": 35.98, "learning_rate": 3.201443532326788e-05, "loss": 2.0867, "step": 12431500 }, { "epoch": 35.99, "learning_rate": 3.20137116756206e-05, "loss": 2.0759, "step": 12432000 }, { "epoch": 35.99, "learning_rate": 3.2012988027973325e-05, "loss": 2.0983, "step": 12432500 }, { "epoch": 35.99, "learning_rate": 3.201226438032605e-05, "loss": 2.0554, "step": 12433000 }, { "epoch": 35.99, "learning_rate": 3.2011540732678776e-05, "loss": 2.0906, "step": 12433500 }, { "epoch": 35.99, "learning_rate": 3.201081853232679e-05, "loss": 2.0784, "step": 12434000 }, { "epoch": 35.99, "learning_rate": 3.2010094884679514e-05, "loss": 2.068, "step": 12434500 }, { "epoch": 35.99, "learning_rate": 3.2009371237032236e-05, "loss": 2.06, "step": 12435000 }, { "epoch": 36.0, "learning_rate": 3.200864903668026e-05, "loss": 2.0647, "step": 12435500 }, { "epoch": 36.0, "learning_rate": 3.200792538903298e-05, "loss": 2.0761, "step": 12436000 }, { "epoch": 36.0, "learning_rate": 3.2007203188680996e-05, "loss": 2.0755, "step": 12436500 }, { "epoch": 36.0, "eval_accuracy": 0.6696317492810138, "eval_accuracy_mlm": 0.6346618176717469, "eval_accuracy_nsp": 0.8570164945713199, "eval_loss": 2.170750141143799, "eval_runtime": 331.4091, "eval_samples_per_second": 1316.759, "eval_steps_per_second": 54.866, "step": 12436992 }, { "epoch": 36.0, "learning_rate": 3.200647954103372e-05, "loss": 2.0835, "step": 12437000 }, { "epoch": 36.0, "learning_rate": 3.200575589338644e-05, "loss": 2.0695, "step": 12437500 }, { "epoch": 36.0, "learning_rate": 3.200503224573916e-05, "loss": 2.0772, "step": 12438000 }, { "epoch": 36.0, "learning_rate": 3.2004308598091885e-05, "loss": 2.0507, "step": 12438500 }, { "epoch": 36.01, "learning_rate": 3.200358495044461e-05, "loss": 2.0681, "step": 12439000 }, { "epoch": 36.01, "learning_rate": 3.200286130279733e-05, "loss": 2.0525, "step": 12439500 }, { "epoch": 36.01, "learning_rate": 3.200213765515006e-05, "loss": 2.0395, "step": 12440000 }, { "epoch": 36.01, "learning_rate": 3.200141400750278e-05, "loss": 2.0736, "step": 12440500 }, { "epoch": 36.01, "learning_rate": 3.200069035985551e-05, "loss": 2.0698, "step": 12441000 }, { "epoch": 36.01, "learning_rate": 3.199996671220823e-05, "loss": 2.0789, "step": 12441500 }, { "epoch": 36.01, "learning_rate": 3.199924595915154e-05, "loss": 2.0589, "step": 12442000 }, { "epoch": 36.02, "learning_rate": 3.199852231150426e-05, "loss": 2.0533, "step": 12442500 }, { "epoch": 36.02, "learning_rate": 3.1997798663856985e-05, "loss": 2.0502, "step": 12443000 }, { "epoch": 36.02, "learning_rate": 3.199707501620971e-05, "loss": 2.0702, "step": 12443500 }, { "epoch": 36.02, "learning_rate": 3.199635136856243e-05, "loss": 2.0126, "step": 12444000 }, { "epoch": 36.02, "learning_rate": 3.199562772091516e-05, "loss": 2.0865, "step": 12444500 }, { "epoch": 36.02, "learning_rate": 3.199490407326788e-05, "loss": 2.0577, "step": 12445000 }, { "epoch": 36.02, "learning_rate": 3.19941804256206e-05, "loss": 2.0676, "step": 12445500 }, { "epoch": 36.03, "learning_rate": 3.1993456777973325e-05, "loss": 2.0435, "step": 12446000 }, { "epoch": 36.03, "learning_rate": 3.199273313032605e-05, "loss": 2.0495, "step": 12446500 }, { "epoch": 36.03, "learning_rate": 3.199200948267877e-05, "loss": 2.0509, "step": 12447000 }, { "epoch": 36.03, "learning_rate": 3.199128583503149e-05, "loss": 2.0273, "step": 12447500 }, { "epoch": 36.03, "learning_rate": 3.1990562187384214e-05, "loss": 2.0517, "step": 12448000 }, { "epoch": 36.03, "learning_rate": 3.198983853973694e-05, "loss": 2.0624, "step": 12448500 }, { "epoch": 36.03, "learning_rate": 3.1989114892089666e-05, "loss": 2.0471, "step": 12449000 }, { "epoch": 36.04, "learning_rate": 3.198839269173768e-05, "loss": 2.0762, "step": 12449500 }, { "epoch": 36.04, "learning_rate": 3.198766904409041e-05, "loss": 2.0488, "step": 12450000 }, { "epoch": 36.04, "learning_rate": 3.1986946843738426e-05, "loss": 2.0747, "step": 12450500 }, { "epoch": 36.04, "learning_rate": 3.198622464338644e-05, "loss": 2.059, "step": 12451000 }, { "epoch": 36.04, "learning_rate": 3.1985500995739163e-05, "loss": 2.0505, "step": 12451500 }, { "epoch": 36.04, "learning_rate": 3.1984777348091886e-05, "loss": 2.0622, "step": 12452000 }, { "epoch": 36.04, "learning_rate": 3.198405370044461e-05, "loss": 2.0598, "step": 12452500 }, { "epoch": 36.05, "learning_rate": 3.198333005279733e-05, "loss": 2.0451, "step": 12453000 }, { "epoch": 36.05, "learning_rate": 3.198260640515006e-05, "loss": 2.0541, "step": 12453500 }, { "epoch": 36.05, "learning_rate": 3.198188275750278e-05, "loss": 2.0547, "step": 12454000 }, { "epoch": 36.05, "learning_rate": 3.1981159109855504e-05, "loss": 2.0726, "step": 12454500 }, { "epoch": 36.05, "learning_rate": 3.1980435462208226e-05, "loss": 2.0486, "step": 12455000 }, { "epoch": 36.05, "learning_rate": 3.197971181456095e-05, "loss": 2.0548, "step": 12455500 }, { "epoch": 36.06, "learning_rate": 3.197898816691368e-05, "loss": 2.0418, "step": 12456000 }, { "epoch": 36.06, "learning_rate": 3.19782645192664e-05, "loss": 2.0776, "step": 12456500 }, { "epoch": 36.06, "learning_rate": 3.197754087161912e-05, "loss": 2.0645, "step": 12457000 }, { "epoch": 36.06, "learning_rate": 3.1976817223971844e-05, "loss": 2.0543, "step": 12457500 }, { "epoch": 36.06, "learning_rate": 3.1976093576324566e-05, "loss": 2.0534, "step": 12458000 }, { "epoch": 36.06, "learning_rate": 3.197536992867729e-05, "loss": 2.0317, "step": 12458500 }, { "epoch": 36.06, "learning_rate": 3.197464772832531e-05, "loss": 2.0799, "step": 12459000 }, { "epoch": 36.07, "learning_rate": 3.197392408067803e-05, "loss": 2.0623, "step": 12459500 }, { "epoch": 36.07, "learning_rate": 3.197320188032605e-05, "loss": 2.0328, "step": 12460000 }, { "epoch": 36.07, "learning_rate": 3.197247823267877e-05, "loss": 2.0482, "step": 12460500 }, { "epoch": 36.07, "learning_rate": 3.197175458503149e-05, "loss": 2.0596, "step": 12461000 }, { "epoch": 36.07, "learning_rate": 3.1971030937384215e-05, "loss": 2.0633, "step": 12461500 }, { "epoch": 36.07, "learning_rate": 3.197030728973694e-05, "loss": 2.0215, "step": 12462000 }, { "epoch": 36.07, "learning_rate": 3.196958364208966e-05, "loss": 2.0716, "step": 12462500 }, { "epoch": 36.08, "learning_rate": 3.196885999444238e-05, "loss": 2.0564, "step": 12463000 }, { "epoch": 36.08, "learning_rate": 3.196813779409041e-05, "loss": 2.065, "step": 12463500 }, { "epoch": 36.08, "learning_rate": 3.196741414644313e-05, "loss": 2.0426, "step": 12464000 }, { "epoch": 36.08, "learning_rate": 3.1966690498795855e-05, "loss": 2.0379, "step": 12464500 }, { "epoch": 36.08, "learning_rate": 3.196596685114858e-05, "loss": 2.075, "step": 12465000 }, { "epoch": 36.08, "learning_rate": 3.19652432035013e-05, "loss": 2.0741, "step": 12465500 }, { "epoch": 36.08, "learning_rate": 3.196451955585402e-05, "loss": 2.0446, "step": 12466000 }, { "epoch": 36.09, "learning_rate": 3.196379735550204e-05, "loss": 2.0782, "step": 12466500 }, { "epoch": 36.09, "learning_rate": 3.196307370785476e-05, "loss": 2.0721, "step": 12467000 }, { "epoch": 36.09, "learning_rate": 3.196235006020749e-05, "loss": 2.09, "step": 12467500 }, { "epoch": 36.09, "learning_rate": 3.196162641256021e-05, "loss": 2.0738, "step": 12468000 }, { "epoch": 36.09, "learning_rate": 3.196090276491293e-05, "loss": 2.051, "step": 12468500 }, { "epoch": 36.09, "learning_rate": 3.1960179117265656e-05, "loss": 2.0632, "step": 12469000 }, { "epoch": 36.09, "learning_rate": 3.195945691691367e-05, "loss": 2.0712, "step": 12469500 }, { "epoch": 36.1, "learning_rate": 3.195873326926639e-05, "loss": 2.0805, "step": 12470000 }, { "epoch": 36.1, "learning_rate": 3.1958009621619116e-05, "loss": 2.0623, "step": 12470500 }, { "epoch": 36.1, "learning_rate": 3.1957285973971845e-05, "loss": 2.0721, "step": 12471000 }, { "epoch": 36.1, "learning_rate": 3.195656377361986e-05, "loss": 2.0635, "step": 12471500 }, { "epoch": 36.1, "learning_rate": 3.195584012597259e-05, "loss": 2.0499, "step": 12472000 }, { "epoch": 36.1, "learning_rate": 3.195511647832531e-05, "loss": 2.0377, "step": 12472500 }, { "epoch": 36.1, "learning_rate": 3.1954392830678034e-05, "loss": 2.0515, "step": 12473000 }, { "epoch": 36.11, "learning_rate": 3.1953669183030756e-05, "loss": 2.0584, "step": 12473500 }, { "epoch": 36.11, "learning_rate": 3.195294553538348e-05, "loss": 2.099, "step": 12474000 }, { "epoch": 36.11, "learning_rate": 3.19522218877362e-05, "loss": 2.0449, "step": 12474500 }, { "epoch": 36.11, "learning_rate": 3.195149824008892e-05, "loss": 2.0751, "step": 12475000 }, { "epoch": 36.11, "learning_rate": 3.1950774592441645e-05, "loss": 2.0738, "step": 12475500 }, { "epoch": 36.11, "learning_rate": 3.195005239208966e-05, "loss": 2.0558, "step": 12476000 }, { "epoch": 36.11, "learning_rate": 3.194932874444239e-05, "loss": 2.0682, "step": 12476500 }, { "epoch": 36.12, "learning_rate": 3.194860509679511e-05, "loss": 2.0579, "step": 12477000 }, { "epoch": 36.12, "learning_rate": 3.1947881449147834e-05, "loss": 2.0891, "step": 12477500 }, { "epoch": 36.12, "learning_rate": 3.194715924879585e-05, "loss": 2.0719, "step": 12478000 }, { "epoch": 36.12, "learning_rate": 3.194643560114858e-05, "loss": 2.066, "step": 12478500 }, { "epoch": 36.12, "learning_rate": 3.1945713400796594e-05, "loss": 2.0694, "step": 12479000 }, { "epoch": 36.12, "learning_rate": 3.1944989753149316e-05, "loss": 2.0415, "step": 12479500 }, { "epoch": 36.12, "learning_rate": 3.194426610550204e-05, "loss": 2.0602, "step": 12480000 }, { "epoch": 36.13, "learning_rate": 3.194354245785476e-05, "loss": 2.0712, "step": 12480500 }, { "epoch": 36.13, "learning_rate": 3.194281881020749e-05, "loss": 2.0438, "step": 12481000 }, { "epoch": 36.13, "learning_rate": 3.194209516256021e-05, "loss": 2.0441, "step": 12481500 }, { "epoch": 36.13, "learning_rate": 3.1941371514912934e-05, "loss": 2.0848, "step": 12482000 }, { "epoch": 36.13, "learning_rate": 3.1940647867265656e-05, "loss": 2.0767, "step": 12482500 }, { "epoch": 36.13, "learning_rate": 3.193992566691367e-05, "loss": 2.0787, "step": 12483000 }, { "epoch": 36.13, "learning_rate": 3.1939202019266394e-05, "loss": 2.0593, "step": 12483500 }, { "epoch": 36.14, "learning_rate": 3.1938478371619116e-05, "loss": 2.0529, "step": 12484000 }, { "epoch": 36.14, "learning_rate": 3.193775472397184e-05, "loss": 2.0689, "step": 12484500 }, { "epoch": 36.14, "learning_rate": 3.193703107632456e-05, "loss": 2.068, "step": 12485000 }, { "epoch": 36.14, "learning_rate": 3.193630887597258e-05, "loss": 2.0508, "step": 12485500 }, { "epoch": 36.14, "learning_rate": 3.193558522832531e-05, "loss": 2.065, "step": 12486000 }, { "epoch": 36.14, "learning_rate": 3.1934861580678034e-05, "loss": 2.0539, "step": 12486500 }, { "epoch": 36.14, "learning_rate": 3.193413793303076e-05, "loss": 2.0388, "step": 12487000 }, { "epoch": 36.15, "learning_rate": 3.193341428538348e-05, "loss": 2.0539, "step": 12487500 }, { "epoch": 36.15, "learning_rate": 3.19326906377362e-05, "loss": 2.0467, "step": 12488000 }, { "epoch": 36.15, "learning_rate": 3.193196699008892e-05, "loss": 2.0355, "step": 12488500 }, { "epoch": 36.15, "learning_rate": 3.1931243342441646e-05, "loss": 2.0612, "step": 12489000 }, { "epoch": 36.15, "learning_rate": 3.193051969479437e-05, "loss": 2.0495, "step": 12489500 }, { "epoch": 36.15, "learning_rate": 3.192979604714709e-05, "loss": 2.0295, "step": 12490000 }, { "epoch": 36.15, "learning_rate": 3.192907239949981e-05, "loss": 2.0481, "step": 12490500 }, { "epoch": 36.16, "learning_rate": 3.1928350199147835e-05, "loss": 2.0497, "step": 12491000 }, { "epoch": 36.16, "learning_rate": 3.192762655150056e-05, "loss": 2.0675, "step": 12491500 }, { "epoch": 36.16, "learning_rate": 3.192690290385328e-05, "loss": 2.0984, "step": 12492000 }, { "epoch": 36.16, "learning_rate": 3.1926179256206e-05, "loss": 2.0355, "step": 12492500 }, { "epoch": 36.16, "learning_rate": 3.192545560855873e-05, "loss": 2.0512, "step": 12493000 }, { "epoch": 36.16, "learning_rate": 3.1924733408206746e-05, "loss": 2.0467, "step": 12493500 }, { "epoch": 36.17, "learning_rate": 3.192400976055947e-05, "loss": 2.0507, "step": 12494000 }, { "epoch": 36.17, "learning_rate": 3.192328611291219e-05, "loss": 2.0522, "step": 12494500 }, { "epoch": 36.17, "learning_rate": 3.192256246526491e-05, "loss": 2.0341, "step": 12495000 }, { "epoch": 36.17, "learning_rate": 3.1921840264912935e-05, "loss": 2.078, "step": 12495500 }, { "epoch": 36.17, "learning_rate": 3.192111661726566e-05, "loss": 2.0741, "step": 12496000 }, { "epoch": 36.17, "learning_rate": 3.192039296961838e-05, "loss": 2.06, "step": 12496500 }, { "epoch": 36.17, "learning_rate": 3.19196693219711e-05, "loss": 2.0527, "step": 12497000 }, { "epoch": 36.18, "learning_rate": 3.1918945674323824e-05, "loss": 2.0608, "step": 12497500 }, { "epoch": 36.18, "learning_rate": 3.1918222026676546e-05, "loss": 2.0688, "step": 12498000 }, { "epoch": 36.18, "learning_rate": 3.191749837902927e-05, "loss": 2.0853, "step": 12498500 }, { "epoch": 36.18, "learning_rate": 3.1916777625972584e-05, "loss": 2.0431, "step": 12499000 }, { "epoch": 36.18, "learning_rate": 3.1916053978325306e-05, "loss": 2.0694, "step": 12499500 }, { "epoch": 36.18, "learning_rate": 3.191533033067803e-05, "loss": 2.0751, "step": 12500000 }, { "epoch": 36.18, "learning_rate": 3.191460668303075e-05, "loss": 2.0665, "step": 12500500 }, { "epoch": 36.19, "learning_rate": 3.191388303538348e-05, "loss": 2.0632, "step": 12501000 }, { "epoch": 36.19, "learning_rate": 3.19131593877362e-05, "loss": 2.0771, "step": 12501500 }, { "epoch": 36.19, "learning_rate": 3.1912435740088924e-05, "loss": 2.0455, "step": 12502000 }, { "epoch": 36.19, "learning_rate": 3.191171353973694e-05, "loss": 2.0751, "step": 12502500 }, { "epoch": 36.19, "learning_rate": 3.191098989208967e-05, "loss": 2.0598, "step": 12503000 }, { "epoch": 36.19, "learning_rate": 3.191026624444239e-05, "loss": 2.0535, "step": 12503500 }, { "epoch": 36.19, "learning_rate": 3.190954259679511e-05, "loss": 2.0529, "step": 12504000 }, { "epoch": 36.2, "learning_rate": 3.1908818949147835e-05, "loss": 2.0678, "step": 12504500 }, { "epoch": 36.2, "learning_rate": 3.190809530150056e-05, "loss": 2.0713, "step": 12505000 }, { "epoch": 36.2, "learning_rate": 3.190737165385328e-05, "loss": 2.0488, "step": 12505500 }, { "epoch": 36.2, "learning_rate": 3.1906648006206e-05, "loss": 2.0585, "step": 12506000 }, { "epoch": 36.2, "learning_rate": 3.190592580585402e-05, "loss": 2.0721, "step": 12506500 }, { "epoch": 36.2, "learning_rate": 3.190520215820674e-05, "loss": 2.0435, "step": 12507000 }, { "epoch": 36.2, "learning_rate": 3.190447851055947e-05, "loss": 2.05, "step": 12507500 }, { "epoch": 36.21, "learning_rate": 3.190375486291219e-05, "loss": 2.0783, "step": 12508000 }, { "epoch": 36.21, "learning_rate": 3.190303121526492e-05, "loss": 2.0287, "step": 12508500 }, { "epoch": 36.21, "learning_rate": 3.190231046220823e-05, "loss": 2.0612, "step": 12509000 }, { "epoch": 36.21, "learning_rate": 3.190158681456095e-05, "loss": 2.0493, "step": 12509500 }, { "epoch": 36.21, "learning_rate": 3.1900863166913673e-05, "loss": 2.0914, "step": 12510000 }, { "epoch": 36.21, "learning_rate": 3.1900139519266396e-05, "loss": 2.0358, "step": 12510500 }, { "epoch": 36.21, "learning_rate": 3.189941587161912e-05, "loss": 2.0578, "step": 12511000 }, { "epoch": 36.22, "learning_rate": 3.189869222397184e-05, "loss": 2.056, "step": 12511500 }, { "epoch": 36.22, "learning_rate": 3.189796857632457e-05, "loss": 2.067, "step": 12512000 }, { "epoch": 36.22, "learning_rate": 3.189724492867729e-05, "loss": 2.08, "step": 12512500 }, { "epoch": 36.22, "learning_rate": 3.189652272832531e-05, "loss": 2.0568, "step": 12513000 }, { "epoch": 36.22, "learning_rate": 3.189580052797332e-05, "loss": 2.0492, "step": 12513500 }, { "epoch": 36.22, "learning_rate": 3.1895076880326045e-05, "loss": 2.0488, "step": 12514000 }, { "epoch": 36.22, "learning_rate": 3.189435323267877e-05, "loss": 2.069, "step": 12514500 }, { "epoch": 36.23, "learning_rate": 3.189362958503149e-05, "loss": 2.0782, "step": 12515000 }, { "epoch": 36.23, "learning_rate": 3.189290593738422e-05, "loss": 2.0644, "step": 12515500 }, { "epoch": 36.23, "learning_rate": 3.1892183737032234e-05, "loss": 2.0795, "step": 12516000 }, { "epoch": 36.23, "learning_rate": 3.189146008938496e-05, "loss": 2.0847, "step": 12516500 }, { "epoch": 36.23, "learning_rate": 3.189073788903298e-05, "loss": 2.0753, "step": 12517000 }, { "epoch": 36.23, "learning_rate": 3.18900142413857e-05, "loss": 2.0615, "step": 12517500 }, { "epoch": 36.23, "learning_rate": 3.188929059373842e-05, "loss": 2.0735, "step": 12518000 }, { "epoch": 36.24, "learning_rate": 3.1888566946091145e-05, "loss": 2.0576, "step": 12518500 }, { "epoch": 36.24, "learning_rate": 3.188784329844387e-05, "loss": 2.0499, "step": 12519000 }, { "epoch": 36.24, "learning_rate": 3.1887119650796596e-05, "loss": 2.0407, "step": 12519500 }, { "epoch": 36.24, "learning_rate": 3.188639600314932e-05, "loss": 2.0644, "step": 12520000 }, { "epoch": 36.24, "learning_rate": 3.188567235550204e-05, "loss": 2.0722, "step": 12520500 }, { "epoch": 36.24, "learning_rate": 3.188494870785476e-05, "loss": 2.0538, "step": 12521000 }, { "epoch": 36.24, "learning_rate": 3.1884225060207485e-05, "loss": 2.0547, "step": 12521500 }, { "epoch": 36.25, "learning_rate": 3.188350141256021e-05, "loss": 2.0647, "step": 12522000 }, { "epoch": 36.25, "learning_rate": 3.188277776491293e-05, "loss": 2.064, "step": 12522500 }, { "epoch": 36.25, "learning_rate": 3.188205411726565e-05, "loss": 2.0364, "step": 12523000 }, { "epoch": 36.25, "learning_rate": 3.188133046961838e-05, "loss": 2.0598, "step": 12523500 }, { "epoch": 36.25, "learning_rate": 3.18806068219711e-05, "loss": 2.0673, "step": 12524000 }, { "epoch": 36.25, "learning_rate": 3.1879883174323825e-05, "loss": 2.0524, "step": 12524500 }, { "epoch": 36.25, "learning_rate": 3.187915952667655e-05, "loss": 2.0609, "step": 12525000 }, { "epoch": 36.26, "learning_rate": 3.187843732632457e-05, "loss": 2.0602, "step": 12525500 }, { "epoch": 36.26, "learning_rate": 3.187771367867729e-05, "loss": 2.0545, "step": 12526000 }, { "epoch": 36.26, "learning_rate": 3.187699147832531e-05, "loss": 2.0826, "step": 12526500 }, { "epoch": 36.26, "learning_rate": 3.187626783067803e-05, "loss": 2.0555, "step": 12527000 }, { "epoch": 36.26, "learning_rate": 3.187554418303075e-05, "loss": 2.0643, "step": 12527500 }, { "epoch": 36.26, "learning_rate": 3.1874820535383474e-05, "loss": 2.0474, "step": 12528000 }, { "epoch": 36.26, "learning_rate": 3.18740968877362e-05, "loss": 2.0573, "step": 12528500 }, { "epoch": 36.27, "learning_rate": 3.187337324008892e-05, "loss": 2.0887, "step": 12529000 }, { "epoch": 36.27, "learning_rate": 3.187264959244165e-05, "loss": 2.0556, "step": 12529500 }, { "epoch": 36.27, "learning_rate": 3.187192594479437e-05, "loss": 2.0809, "step": 12530000 }, { "epoch": 36.27, "learning_rate": 3.187120229714709e-05, "loss": 2.0732, "step": 12530500 }, { "epoch": 36.27, "learning_rate": 3.1870480096795115e-05, "loss": 2.0548, "step": 12531000 }, { "epoch": 36.27, "learning_rate": 3.186975789644313e-05, "loss": 2.081, "step": 12531500 }, { "epoch": 36.28, "learning_rate": 3.186903424879585e-05, "loss": 2.054, "step": 12532000 }, { "epoch": 36.28, "learning_rate": 3.1868310601148575e-05, "loss": 2.0834, "step": 12532500 }, { "epoch": 36.28, "learning_rate": 3.18675869535013e-05, "loss": 2.0576, "step": 12533000 }, { "epoch": 36.28, "learning_rate": 3.186686330585402e-05, "loss": 2.0484, "step": 12533500 }, { "epoch": 36.28, "learning_rate": 3.186614110550204e-05, "loss": 2.0445, "step": 12534000 }, { "epoch": 36.28, "learning_rate": 3.1865417457854764e-05, "loss": 2.0685, "step": 12534500 }, { "epoch": 36.28, "learning_rate": 3.1864693810207486e-05, "loss": 2.0674, "step": 12535000 }, { "epoch": 36.29, "learning_rate": 3.186397016256021e-05, "loss": 2.078, "step": 12535500 }, { "epoch": 36.29, "learning_rate": 3.186324651491293e-05, "loss": 2.0461, "step": 12536000 }, { "epoch": 36.29, "learning_rate": 3.1862524314560946e-05, "loss": 2.082, "step": 12536500 }, { "epoch": 36.29, "learning_rate": 3.186180066691367e-05, "loss": 2.0629, "step": 12537000 }, { "epoch": 36.29, "learning_rate": 3.18610770192664e-05, "loss": 2.0693, "step": 12537500 }, { "epoch": 36.29, "learning_rate": 3.186035337161912e-05, "loss": 2.0524, "step": 12538000 }, { "epoch": 36.29, "learning_rate": 3.185962972397185e-05, "loss": 2.0835, "step": 12538500 }, { "epoch": 36.3, "learning_rate": 3.1858907523619864e-05, "loss": 2.0698, "step": 12539000 }, { "epoch": 36.3, "learning_rate": 3.1858183875972586e-05, "loss": 2.0731, "step": 12539500 }, { "epoch": 36.3, "learning_rate": 3.185746022832531e-05, "loss": 2.0697, "step": 12540000 }, { "epoch": 36.3, "learning_rate": 3.185673658067803e-05, "loss": 2.0506, "step": 12540500 }, { "epoch": 36.3, "learning_rate": 3.185601293303075e-05, "loss": 2.0571, "step": 12541000 }, { "epoch": 36.3, "learning_rate": 3.1855289285383475e-05, "loss": 2.0708, "step": 12541500 }, { "epoch": 36.3, "learning_rate": 3.18545656377362e-05, "loss": 2.0597, "step": 12542000 }, { "epoch": 36.31, "learning_rate": 3.185384343738422e-05, "loss": 2.0473, "step": 12542500 }, { "epoch": 36.31, "learning_rate": 3.185311978973694e-05, "loss": 2.0741, "step": 12543000 }, { "epoch": 36.31, "learning_rate": 3.1852396142089664e-05, "loss": 2.0739, "step": 12543500 }, { "epoch": 36.31, "learning_rate": 3.1851672494442387e-05, "loss": 2.058, "step": 12544000 }, { "epoch": 36.31, "learning_rate": 3.185094884679511e-05, "loss": 2.0827, "step": 12544500 }, { "epoch": 36.31, "learning_rate": 3.1850226646443124e-05, "loss": 2.0693, "step": 12545000 }, { "epoch": 36.31, "learning_rate": 3.1849502998795847e-05, "loss": 2.0824, "step": 12545500 }, { "epoch": 36.32, "learning_rate": 3.1848779351148576e-05, "loss": 2.0731, "step": 12546000 }, { "epoch": 36.32, "learning_rate": 3.18480557035013e-05, "loss": 2.0742, "step": 12546500 }, { "epoch": 36.32, "learning_rate": 3.184733350314932e-05, "loss": 2.0738, "step": 12547000 }, { "epoch": 36.32, "learning_rate": 3.184660985550204e-05, "loss": 2.0807, "step": 12547500 }, { "epoch": 36.32, "learning_rate": 3.1845886207854765e-05, "loss": 2.0725, "step": 12548000 }, { "epoch": 36.32, "learning_rate": 3.184516256020749e-05, "loss": 2.0557, "step": 12548500 }, { "epoch": 36.32, "learning_rate": 3.18444403598555e-05, "loss": 2.0655, "step": 12549000 }, { "epoch": 36.33, "learning_rate": 3.1843716712208225e-05, "loss": 2.0949, "step": 12549500 }, { "epoch": 36.33, "learning_rate": 3.184299306456095e-05, "loss": 2.0792, "step": 12550000 }, { "epoch": 36.33, "learning_rate": 3.1842269416913676e-05, "loss": 2.0602, "step": 12550500 }, { "epoch": 36.33, "learning_rate": 3.18415457692664e-05, "loss": 2.0678, "step": 12551000 }, { "epoch": 36.33, "learning_rate": 3.184082212161912e-05, "loss": 2.0731, "step": 12551500 }, { "epoch": 36.33, "learning_rate": 3.184009847397184e-05, "loss": 2.0799, "step": 12552000 }, { "epoch": 36.33, "learning_rate": 3.1839374826324565e-05, "loss": 2.0524, "step": 12552500 }, { "epoch": 36.34, "learning_rate": 3.183865117867729e-05, "loss": 2.0685, "step": 12553000 }, { "epoch": 36.34, "learning_rate": 3.1837927531030016e-05, "loss": 2.0711, "step": 12553500 }, { "epoch": 36.34, "learning_rate": 3.183720533067803e-05, "loss": 2.0388, "step": 12554000 }, { "epoch": 36.34, "learning_rate": 3.1836481683030754e-05, "loss": 2.0613, "step": 12554500 }, { "epoch": 36.34, "learning_rate": 3.1835758035383476e-05, "loss": 2.0601, "step": 12555000 }, { "epoch": 36.34, "learning_rate": 3.18350343877362e-05, "loss": 2.0549, "step": 12555500 }, { "epoch": 36.34, "learning_rate": 3.183431218738422e-05, "loss": 2.067, "step": 12556000 }, { "epoch": 36.35, "learning_rate": 3.1833589987032236e-05, "loss": 2.0604, "step": 12556500 }, { "epoch": 36.35, "learning_rate": 3.183286633938496e-05, "loss": 2.0924, "step": 12557000 }, { "epoch": 36.35, "learning_rate": 3.1832144139032974e-05, "loss": 2.0479, "step": 12557500 }, { "epoch": 36.35, "learning_rate": 3.1831420491385696e-05, "loss": 2.0861, "step": 12558000 }, { "epoch": 36.35, "learning_rate": 3.1830696843738425e-05, "loss": 2.0665, "step": 12558500 }, { "epoch": 36.35, "learning_rate": 3.182997319609115e-05, "loss": 2.07, "step": 12559000 }, { "epoch": 36.35, "learning_rate": 3.182924954844387e-05, "loss": 2.0512, "step": 12559500 }, { "epoch": 36.36, "learning_rate": 3.182852590079659e-05, "loss": 2.0528, "step": 12560000 }, { "epoch": 36.36, "learning_rate": 3.1827802253149314e-05, "loss": 2.0628, "step": 12560500 }, { "epoch": 36.36, "learning_rate": 3.182707860550204e-05, "loss": 2.0573, "step": 12561000 }, { "epoch": 36.36, "learning_rate": 3.1826354957854765e-05, "loss": 2.0932, "step": 12561500 }, { "epoch": 36.36, "learning_rate": 3.182563131020749e-05, "loss": 2.067, "step": 12562000 }, { "epoch": 36.36, "learning_rate": 3.182490766256021e-05, "loss": 2.0702, "step": 12562500 }, { "epoch": 36.36, "learning_rate": 3.182418401491293e-05, "loss": 2.0744, "step": 12563000 }, { "epoch": 36.37, "learning_rate": 3.1823460367265654e-05, "loss": 2.0666, "step": 12563500 }, { "epoch": 36.37, "learning_rate": 3.182273816691368e-05, "loss": 2.0565, "step": 12564000 }, { "epoch": 36.37, "learning_rate": 3.18220145192664e-05, "loss": 2.0499, "step": 12564500 }, { "epoch": 36.37, "learning_rate": 3.182129087161912e-05, "loss": 2.0701, "step": 12565000 }, { "epoch": 36.37, "learning_rate": 3.182056722397184e-05, "loss": 2.0617, "step": 12565500 }, { "epoch": 36.37, "learning_rate": 3.1819843576324566e-05, "loss": 2.0708, "step": 12566000 }, { "epoch": 36.37, "learning_rate": 3.181911992867729e-05, "loss": 2.0718, "step": 12566500 }, { "epoch": 36.38, "learning_rate": 3.18183977283253e-05, "loss": 2.0884, "step": 12567000 }, { "epoch": 36.38, "learning_rate": 3.1817674080678026e-05, "loss": 2.0645, "step": 12567500 }, { "epoch": 36.38, "learning_rate": 3.181695043303075e-05, "loss": 2.0532, "step": 12568000 }, { "epoch": 36.38, "learning_rate": 3.181622678538348e-05, "loss": 2.0727, "step": 12568500 }, { "epoch": 36.38, "learning_rate": 3.18155031377362e-05, "loss": 2.0658, "step": 12569000 }, { "epoch": 36.38, "learning_rate": 3.181477949008893e-05, "loss": 2.053, "step": 12569500 }, { "epoch": 36.39, "learning_rate": 3.181405584244165e-05, "loss": 2.0572, "step": 12570000 }, { "epoch": 36.39, "learning_rate": 3.181333219479437e-05, "loss": 2.0814, "step": 12570500 }, { "epoch": 36.39, "learning_rate": 3.1812608547147095e-05, "loss": 2.035, "step": 12571000 }, { "epoch": 36.39, "learning_rate": 3.181188634679511e-05, "loss": 2.0591, "step": 12571500 }, { "epoch": 36.39, "learning_rate": 3.1811164146443126e-05, "loss": 2.0613, "step": 12572000 }, { "epoch": 36.39, "learning_rate": 3.181044049879585e-05, "loss": 2.058, "step": 12572500 }, { "epoch": 36.39, "learning_rate": 3.180971685114858e-05, "loss": 2.0695, "step": 12573000 }, { "epoch": 36.4, "learning_rate": 3.18089932035013e-05, "loss": 2.1201, "step": 12573500 }, { "epoch": 36.4, "learning_rate": 3.180826955585402e-05, "loss": 2.0596, "step": 12574000 }, { "epoch": 36.4, "learning_rate": 3.180754735550204e-05, "loss": 2.0749, "step": 12574500 }, { "epoch": 36.4, "learning_rate": 3.180682370785476e-05, "loss": 2.0636, "step": 12575000 }, { "epoch": 36.4, "learning_rate": 3.180610006020748e-05, "loss": 2.0635, "step": 12575500 }, { "epoch": 36.4, "learning_rate": 3.180537641256021e-05, "loss": 2.0504, "step": 12576000 }, { "epoch": 36.4, "learning_rate": 3.180465276491293e-05, "loss": 2.0673, "step": 12576500 }, { "epoch": 36.41, "learning_rate": 3.1803929117265655e-05, "loss": 2.0859, "step": 12577000 }, { "epoch": 36.41, "learning_rate": 3.180320546961838e-05, "loss": 2.0704, "step": 12577500 }, { "epoch": 36.41, "learning_rate": 3.18024832692664e-05, "loss": 2.0792, "step": 12578000 }, { "epoch": 36.41, "learning_rate": 3.180175962161912e-05, "loss": 2.0506, "step": 12578500 }, { "epoch": 36.41, "learning_rate": 3.180103742126714e-05, "loss": 2.0946, "step": 12579000 }, { "epoch": 36.41, "learning_rate": 3.180031377361986e-05, "loss": 2.0607, "step": 12579500 }, { "epoch": 36.41, "learning_rate": 3.179959012597258e-05, "loss": 2.0874, "step": 12580000 }, { "epoch": 36.42, "learning_rate": 3.1798866478325304e-05, "loss": 2.042, "step": 12580500 }, { "epoch": 36.42, "learning_rate": 3.1798142830678026e-05, "loss": 2.0639, "step": 12581000 }, { "epoch": 36.42, "learning_rate": 3.1797419183030755e-05, "loss": 2.0736, "step": 12581500 }, { "epoch": 36.42, "learning_rate": 3.179669553538348e-05, "loss": 2.0681, "step": 12582000 }, { "epoch": 36.42, "learning_rate": 3.17959718877362e-05, "loss": 2.0664, "step": 12582500 }, { "epoch": 36.42, "learning_rate": 3.179524824008892e-05, "loss": 2.0801, "step": 12583000 }, { "epoch": 36.42, "learning_rate": 3.179452459244165e-05, "loss": 2.0522, "step": 12583500 }, { "epoch": 36.43, "learning_rate": 3.179380094479437e-05, "loss": 2.0806, "step": 12584000 }, { "epoch": 36.43, "learning_rate": 3.1793077297147096e-05, "loss": 2.0561, "step": 12584500 }, { "epoch": 36.43, "learning_rate": 3.179235364949982e-05, "loss": 2.0504, "step": 12585000 }, { "epoch": 36.43, "learning_rate": 3.1791631449147833e-05, "loss": 2.0542, "step": 12585500 }, { "epoch": 36.43, "learning_rate": 3.1790907801500556e-05, "loss": 2.0479, "step": 12586000 }, { "epoch": 36.43, "learning_rate": 3.179018415385328e-05, "loss": 2.0858, "step": 12586500 }, { "epoch": 36.43, "learning_rate": 3.178946050620601e-05, "loss": 2.0499, "step": 12587000 }, { "epoch": 36.44, "learning_rate": 3.178873685855873e-05, "loss": 2.0841, "step": 12587500 }, { "epoch": 36.44, "learning_rate": 3.178801321091145e-05, "loss": 2.0682, "step": 12588000 }, { "epoch": 36.44, "learning_rate": 3.1787289563264174e-05, "loss": 2.0732, "step": 12588500 }, { "epoch": 36.44, "learning_rate": 3.1786565915616896e-05, "loss": 2.0696, "step": 12589000 }, { "epoch": 36.44, "learning_rate": 3.178584371526491e-05, "loss": 2.073, "step": 12589500 }, { "epoch": 36.44, "learning_rate": 3.1785120067617634e-05, "loss": 2.0417, "step": 12590000 }, { "epoch": 36.44, "learning_rate": 3.178439641997036e-05, "loss": 2.0772, "step": 12590500 }, { "epoch": 36.45, "learning_rate": 3.1783672772323085e-05, "loss": 2.0733, "step": 12591000 }, { "epoch": 36.45, "learning_rate": 3.178295057197111e-05, "loss": 2.0526, "step": 12591500 }, { "epoch": 36.45, "learning_rate": 3.178222692432383e-05, "loss": 2.0773, "step": 12592000 }, { "epoch": 36.45, "learning_rate": 3.178150327667655e-05, "loss": 2.0613, "step": 12592500 }, { "epoch": 36.45, "learning_rate": 3.1780779629029274e-05, "loss": 2.0955, "step": 12593000 }, { "epoch": 36.45, "learning_rate": 3.178005742867729e-05, "loss": 2.0782, "step": 12593500 }, { "epoch": 36.45, "learning_rate": 3.177933378103001e-05, "loss": 2.0618, "step": 12594000 }, { "epoch": 36.46, "learning_rate": 3.177861158067803e-05, "loss": 2.051, "step": 12594500 }, { "epoch": 36.46, "learning_rate": 3.177788938032605e-05, "loss": 2.051, "step": 12595000 }, { "epoch": 36.46, "learning_rate": 3.177716573267877e-05, "loss": 2.0485, "step": 12595500 }, { "epoch": 36.46, "learning_rate": 3.1776442085031494e-05, "loss": 2.0578, "step": 12596000 }, { "epoch": 36.46, "learning_rate": 3.1775718437384216e-05, "loss": 2.0655, "step": 12596500 }, { "epoch": 36.46, "learning_rate": 3.177499478973694e-05, "loss": 2.0603, "step": 12597000 }, { "epoch": 36.46, "learning_rate": 3.177427114208966e-05, "loss": 2.0762, "step": 12597500 }, { "epoch": 36.47, "learning_rate": 3.177354749444238e-05, "loss": 2.0693, "step": 12598000 }, { "epoch": 36.47, "learning_rate": 3.177282384679511e-05, "loss": 2.0849, "step": 12598500 }, { "epoch": 36.47, "learning_rate": 3.1772100199147834e-05, "loss": 2.0501, "step": 12599000 }, { "epoch": 36.47, "learning_rate": 3.1771376551500556e-05, "loss": 2.0663, "step": 12599500 }, { "epoch": 36.47, "learning_rate": 3.177065290385328e-05, "loss": 2.0592, "step": 12600000 }, { "epoch": 36.47, "learning_rate": 3.176992925620601e-05, "loss": 2.0939, "step": 12600500 }, { "epoch": 36.47, "learning_rate": 3.176920560855873e-05, "loss": 2.063, "step": 12601000 }, { "epoch": 36.48, "learning_rate": 3.176848196091145e-05, "loss": 2.056, "step": 12601500 }, { "epoch": 36.48, "learning_rate": 3.176775976055947e-05, "loss": 2.0547, "step": 12602000 }, { "epoch": 36.48, "learning_rate": 3.176703611291219e-05, "loss": 2.0505, "step": 12602500 }, { "epoch": 36.48, "learning_rate": 3.176631246526491e-05, "loss": 2.0755, "step": 12603000 }, { "epoch": 36.48, "learning_rate": 3.1765588817617634e-05, "loss": 2.0679, "step": 12603500 }, { "epoch": 36.48, "learning_rate": 3.176486661726566e-05, "loss": 2.0531, "step": 12604000 }, { "epoch": 36.48, "learning_rate": 3.176414296961838e-05, "loss": 2.0535, "step": 12604500 }, { "epoch": 36.49, "learning_rate": 3.17634193219711e-05, "loss": 2.0584, "step": 12605000 }, { "epoch": 36.49, "learning_rate": 3.1762695674323823e-05, "loss": 2.0739, "step": 12605500 }, { "epoch": 36.49, "learning_rate": 3.176197202667655e-05, "loss": 2.059, "step": 12606000 }, { "epoch": 36.49, "learning_rate": 3.1761248379029275e-05, "loss": 2.0676, "step": 12606500 }, { "epoch": 36.49, "learning_rate": 3.1760524731382e-05, "loss": 2.0699, "step": 12607000 }, { "epoch": 36.49, "learning_rate": 3.175980253103001e-05, "loss": 2.0665, "step": 12607500 }, { "epoch": 36.49, "learning_rate": 3.1759078883382735e-05, "loss": 2.0838, "step": 12608000 }, { "epoch": 36.5, "learning_rate": 3.175835523573546e-05, "loss": 2.0707, "step": 12608500 }, { "epoch": 36.5, "learning_rate": 3.175763158808818e-05, "loss": 2.0458, "step": 12609000 }, { "epoch": 36.5, "learning_rate": 3.175690794044091e-05, "loss": 2.0703, "step": 12609500 }, { "epoch": 36.5, "learning_rate": 3.1756185740088924e-05, "loss": 2.0733, "step": 12610000 }, { "epoch": 36.5, "learning_rate": 3.1755462092441646e-05, "loss": 2.0484, "step": 12610500 }, { "epoch": 36.5, "learning_rate": 3.175473844479437e-05, "loss": 2.0336, "step": 12611000 }, { "epoch": 36.51, "learning_rate": 3.175401479714709e-05, "loss": 2.062, "step": 12611500 }, { "epoch": 36.51, "learning_rate": 3.175329114949981e-05, "loss": 2.0709, "step": 12612000 }, { "epoch": 36.51, "learning_rate": 3.1752567501852535e-05, "loss": 2.0859, "step": 12612500 }, { "epoch": 36.51, "learning_rate": 3.1751843854205264e-05, "loss": 2.0608, "step": 12613000 }, { "epoch": 36.51, "learning_rate": 3.1751120206557986e-05, "loss": 2.0506, "step": 12613500 }, { "epoch": 36.51, "learning_rate": 3.175039655891071e-05, "loss": 2.0451, "step": 12614000 }, { "epoch": 36.51, "learning_rate": 3.174967291126343e-05, "loss": 2.0358, "step": 12614500 }, { "epoch": 36.52, "learning_rate": 3.174895071091145e-05, "loss": 2.0668, "step": 12615000 }, { "epoch": 36.52, "learning_rate": 3.1748227063264175e-05, "loss": 2.0542, "step": 12615500 }, { "epoch": 36.52, "learning_rate": 3.174750486291219e-05, "loss": 2.0573, "step": 12616000 }, { "epoch": 36.52, "learning_rate": 3.174678121526491e-05, "loss": 2.0668, "step": 12616500 }, { "epoch": 36.52, "learning_rate": 3.1746057567617635e-05, "loss": 2.0519, "step": 12617000 }, { "epoch": 36.52, "learning_rate": 3.174533391997036e-05, "loss": 2.0695, "step": 12617500 }, { "epoch": 36.52, "learning_rate": 3.1744610272323086e-05, "loss": 2.0605, "step": 12618000 }, { "epoch": 36.53, "learning_rate": 3.174388662467581e-05, "loss": 2.0551, "step": 12618500 }, { "epoch": 36.53, "learning_rate": 3.174316297702853e-05, "loss": 2.0726, "step": 12619000 }, { "epoch": 36.53, "learning_rate": 3.174243932938125e-05, "loss": 2.0758, "step": 12619500 }, { "epoch": 36.53, "learning_rate": 3.174171712902927e-05, "loss": 2.0716, "step": 12620000 }, { "epoch": 36.53, "learning_rate": 3.1740993481382e-05, "loss": 2.0688, "step": 12620500 }, { "epoch": 36.53, "learning_rate": 3.174026983373472e-05, "loss": 2.052, "step": 12621000 }, { "epoch": 36.53, "learning_rate": 3.173954618608744e-05, "loss": 2.05, "step": 12621500 }, { "epoch": 36.54, "learning_rate": 3.173882398573546e-05, "loss": 2.0581, "step": 12622000 }, { "epoch": 36.54, "learning_rate": 3.173810033808819e-05, "loss": 2.0521, "step": 12622500 }, { "epoch": 36.54, "learning_rate": 3.173737669044091e-05, "loss": 2.0562, "step": 12623000 }, { "epoch": 36.54, "learning_rate": 3.173665304279363e-05, "loss": 2.0772, "step": 12623500 }, { "epoch": 36.54, "learning_rate": 3.1735929395146353e-05, "loss": 2.0666, "step": 12624000 }, { "epoch": 36.54, "learning_rate": 3.1735205747499076e-05, "loss": 2.0754, "step": 12624500 }, { "epoch": 36.54, "learning_rate": 3.17344820998518e-05, "loss": 2.0739, "step": 12625000 }, { "epoch": 36.55, "learning_rate": 3.1733759899499813e-05, "loss": 2.0664, "step": 12625500 }, { "epoch": 36.55, "learning_rate": 3.1733036251852536e-05, "loss": 2.0523, "step": 12626000 }, { "epoch": 36.55, "learning_rate": 3.173231260420526e-05, "loss": 2.0627, "step": 12626500 }, { "epoch": 36.55, "learning_rate": 3.173159040385328e-05, "loss": 2.0682, "step": 12627000 }, { "epoch": 36.55, "learning_rate": 3.1730866756206e-05, "loss": 2.0834, "step": 12627500 }, { "epoch": 36.55, "learning_rate": 3.173014310855873e-05, "loss": 2.0681, "step": 12628000 }, { "epoch": 36.55, "learning_rate": 3.1729419460911454e-05, "loss": 2.0664, "step": 12628500 }, { "epoch": 36.56, "learning_rate": 3.172869726055947e-05, "loss": 2.0528, "step": 12629000 }, { "epoch": 36.56, "learning_rate": 3.172797361291219e-05, "loss": 2.0527, "step": 12629500 }, { "epoch": 36.56, "learning_rate": 3.1727249965264914e-05, "loss": 2.0463, "step": 12630000 }, { "epoch": 36.56, "learning_rate": 3.1726526317617636e-05, "loss": 2.0894, "step": 12630500 }, { "epoch": 36.56, "learning_rate": 3.172580411726566e-05, "loss": 2.0656, "step": 12631000 }, { "epoch": 36.56, "learning_rate": 3.172508046961838e-05, "loss": 2.0634, "step": 12631500 }, { "epoch": 36.56, "learning_rate": 3.17243568219711e-05, "loss": 2.0582, "step": 12632000 }, { "epoch": 36.57, "learning_rate": 3.1723633174323825e-05, "loss": 2.0577, "step": 12632500 }, { "epoch": 36.57, "learning_rate": 3.172291097397184e-05, "loss": 2.0799, "step": 12633000 }, { "epoch": 36.57, "learning_rate": 3.172218732632456e-05, "loss": 2.0579, "step": 12633500 }, { "epoch": 36.57, "learning_rate": 3.1721463678677285e-05, "loss": 2.0683, "step": 12634000 }, { "epoch": 36.57, "learning_rate": 3.1720740031030014e-05, "loss": 2.0944, "step": 12634500 }, { "epoch": 36.57, "learning_rate": 3.1720016383382736e-05, "loss": 2.0434, "step": 12635000 }, { "epoch": 36.57, "learning_rate": 3.171929273573546e-05, "loss": 2.056, "step": 12635500 }, { "epoch": 36.58, "learning_rate": 3.171856908808819e-05, "loss": 2.0588, "step": 12636000 }, { "epoch": 36.58, "learning_rate": 3.17178468877362e-05, "loss": 2.0727, "step": 12636500 }, { "epoch": 36.58, "learning_rate": 3.1717123240088925e-05, "loss": 2.0743, "step": 12637000 }, { "epoch": 36.58, "learning_rate": 3.171639959244165e-05, "loss": 2.0527, "step": 12637500 }, { "epoch": 36.58, "learning_rate": 3.171567594479437e-05, "loss": 2.0651, "step": 12638000 }, { "epoch": 36.58, "learning_rate": 3.171495229714709e-05, "loss": 2.0901, "step": 12638500 }, { "epoch": 36.58, "learning_rate": 3.1714230096795114e-05, "loss": 2.0665, "step": 12639000 }, { "epoch": 36.59, "learning_rate": 3.1713506449147837e-05, "loss": 2.0571, "step": 12639500 }, { "epoch": 36.59, "learning_rate": 3.171278280150056e-05, "loss": 2.0418, "step": 12640000 }, { "epoch": 36.59, "learning_rate": 3.171205915385328e-05, "loss": 2.0524, "step": 12640500 }, { "epoch": 36.59, "learning_rate": 3.1711335506206e-05, "loss": 2.0844, "step": 12641000 }, { "epoch": 36.59, "learning_rate": 3.1710611858558726e-05, "loss": 2.0399, "step": 12641500 }, { "epoch": 36.59, "learning_rate": 3.170988821091145e-05, "loss": 2.0801, "step": 12642000 }, { "epoch": 36.59, "learning_rate": 3.170916456326417e-05, "loss": 2.0806, "step": 12642500 }, { "epoch": 36.6, "learning_rate": 3.17084409156169e-05, "loss": 2.0713, "step": 12643000 }, { "epoch": 36.6, "learning_rate": 3.170771726796962e-05, "loss": 2.0473, "step": 12643500 }, { "epoch": 36.6, "learning_rate": 3.170699506761764e-05, "loss": 2.0453, "step": 12644000 }, { "epoch": 36.6, "learning_rate": 3.1706271419970366e-05, "loss": 2.063, "step": 12644500 }, { "epoch": 36.6, "learning_rate": 3.170554777232309e-05, "loss": 2.0704, "step": 12645000 }, { "epoch": 36.6, "learning_rate": 3.170482412467581e-05, "loss": 2.0672, "step": 12645500 }, { "epoch": 36.6, "learning_rate": 3.1704101924323826e-05, "loss": 2.0567, "step": 12646000 }, { "epoch": 36.61, "learning_rate": 3.170337972397184e-05, "loss": 2.0655, "step": 12646500 }, { "epoch": 36.61, "learning_rate": 3.1702656076324564e-05, "loss": 2.0744, "step": 12647000 }, { "epoch": 36.61, "learning_rate": 3.1701932428677286e-05, "loss": 2.0839, "step": 12647500 }, { "epoch": 36.61, "learning_rate": 3.1701208781030015e-05, "loss": 2.0484, "step": 12648000 }, { "epoch": 36.61, "learning_rate": 3.170048658067803e-05, "loss": 2.0819, "step": 12648500 }, { "epoch": 36.61, "learning_rate": 3.169976293303075e-05, "loss": 2.0769, "step": 12649000 }, { "epoch": 36.62, "learning_rate": 3.1699039285383475e-05, "loss": 2.0761, "step": 12649500 }, { "epoch": 36.62, "learning_rate": 3.16983156377362e-05, "loss": 2.0794, "step": 12650000 }, { "epoch": 36.62, "learning_rate": 3.169759199008892e-05, "loss": 2.0877, "step": 12650500 }, { "epoch": 36.62, "learning_rate": 3.169686834244165e-05, "loss": 2.0623, "step": 12651000 }, { "epoch": 36.62, "learning_rate": 3.169614469479437e-05, "loss": 2.0667, "step": 12651500 }, { "epoch": 36.62, "learning_rate": 3.169542104714709e-05, "loss": 2.0622, "step": 12652000 }, { "epoch": 36.62, "learning_rate": 3.1694697399499815e-05, "loss": 2.0746, "step": 12652500 }, { "epoch": 36.63, "learning_rate": 3.169397375185254e-05, "loss": 2.0573, "step": 12653000 }, { "epoch": 36.63, "learning_rate": 3.1693250104205266e-05, "loss": 2.0526, "step": 12653500 }, { "epoch": 36.63, "learning_rate": 3.169252645655799e-05, "loss": 2.0641, "step": 12654000 }, { "epoch": 36.63, "learning_rate": 3.169180280891071e-05, "loss": 2.0551, "step": 12654500 }, { "epoch": 36.63, "learning_rate": 3.169107916126343e-05, "loss": 2.0704, "step": 12655000 }, { "epoch": 36.63, "learning_rate": 3.1690355513616155e-05, "loss": 2.067, "step": 12655500 }, { "epoch": 36.63, "learning_rate": 3.168963186596888e-05, "loss": 2.0509, "step": 12656000 }, { "epoch": 36.64, "learning_rate": 3.168890966561689e-05, "loss": 2.0597, "step": 12656500 }, { "epoch": 36.64, "learning_rate": 3.1688186017969615e-05, "loss": 2.0444, "step": 12657000 }, { "epoch": 36.64, "learning_rate": 3.168746237032234e-05, "loss": 2.0595, "step": 12657500 }, { "epoch": 36.64, "learning_rate": 3.1686738722675066e-05, "loss": 2.0894, "step": 12658000 }, { "epoch": 36.64, "learning_rate": 3.168601507502779e-05, "loss": 2.0656, "step": 12658500 }, { "epoch": 36.64, "learning_rate": 3.168529287467581e-05, "loss": 2.0933, "step": 12659000 }, { "epoch": 36.64, "learning_rate": 3.1684570674323827e-05, "loss": 2.0664, "step": 12659500 }, { "epoch": 36.65, "learning_rate": 3.168384702667655e-05, "loss": 2.0482, "step": 12660000 }, { "epoch": 36.65, "learning_rate": 3.1683124826324564e-05, "loss": 2.0539, "step": 12660500 }, { "epoch": 36.65, "learning_rate": 3.168240262597259e-05, "loss": 2.0585, "step": 12661000 }, { "epoch": 36.65, "learning_rate": 3.168167897832531e-05, "loss": 2.0746, "step": 12661500 }, { "epoch": 36.65, "learning_rate": 3.168095533067803e-05, "loss": 2.0833, "step": 12662000 }, { "epoch": 36.65, "learning_rate": 3.1680231683030753e-05, "loss": 2.0802, "step": 12662500 }, { "epoch": 36.65, "learning_rate": 3.1679508035383476e-05, "loss": 2.0608, "step": 12663000 }, { "epoch": 36.66, "learning_rate": 3.16787843877362e-05, "loss": 2.0587, "step": 12663500 }, { "epoch": 36.66, "learning_rate": 3.167806074008892e-05, "loss": 2.0797, "step": 12664000 }, { "epoch": 36.66, "learning_rate": 3.167733709244164e-05, "loss": 2.082, "step": 12664500 }, { "epoch": 36.66, "learning_rate": 3.1676613444794365e-05, "loss": 2.0864, "step": 12665000 }, { "epoch": 36.66, "learning_rate": 3.167589124444239e-05, "loss": 2.0772, "step": 12665500 }, { "epoch": 36.66, "learning_rate": 3.1675167596795116e-05, "loss": 2.067, "step": 12666000 }, { "epoch": 36.66, "learning_rate": 3.167444394914784e-05, "loss": 2.0584, "step": 12666500 }, { "epoch": 36.67, "learning_rate": 3.167372030150056e-05, "loss": 2.0672, "step": 12667000 }, { "epoch": 36.67, "learning_rate": 3.167299665385328e-05, "loss": 2.0786, "step": 12667500 }, { "epoch": 36.67, "learning_rate": 3.1672273006206005e-05, "loss": 2.0602, "step": 12668000 }, { "epoch": 36.67, "learning_rate": 3.167155080585402e-05, "loss": 2.0625, "step": 12668500 }, { "epoch": 36.67, "learning_rate": 3.167082860550204e-05, "loss": 2.0763, "step": 12669000 }, { "epoch": 36.67, "learning_rate": 3.1670104957854765e-05, "loss": 2.1129, "step": 12669500 }, { "epoch": 36.67, "learning_rate": 3.166938131020749e-05, "loss": 2.074, "step": 12670000 }, { "epoch": 36.68, "learning_rate": 3.166865766256021e-05, "loss": 2.0682, "step": 12670500 }, { "epoch": 36.68, "learning_rate": 3.166793401491293e-05, "loss": 2.0589, "step": 12671000 }, { "epoch": 36.68, "learning_rate": 3.1667210367265654e-05, "loss": 2.0605, "step": 12671500 }, { "epoch": 36.68, "learning_rate": 3.1666486719618376e-05, "loss": 2.0575, "step": 12672000 }, { "epoch": 36.68, "learning_rate": 3.16657630719711e-05, "loss": 2.0631, "step": 12672500 }, { "epoch": 36.68, "learning_rate": 3.166503942432382e-05, "loss": 2.067, "step": 12673000 }, { "epoch": 36.68, "learning_rate": 3.166431722397184e-05, "loss": 2.0627, "step": 12673500 }, { "epoch": 36.69, "learning_rate": 3.1663593576324565e-05, "loss": 2.0724, "step": 12674000 }, { "epoch": 36.69, "learning_rate": 3.1662869928677294e-05, "loss": 2.0721, "step": 12674500 }, { "epoch": 36.69, "learning_rate": 3.1662146281030016e-05, "loss": 2.0515, "step": 12675000 }, { "epoch": 36.69, "learning_rate": 3.166142263338274e-05, "loss": 2.074, "step": 12675500 }, { "epoch": 36.69, "learning_rate": 3.166069898573546e-05, "loss": 2.0665, "step": 12676000 }, { "epoch": 36.69, "learning_rate": 3.165997533808818e-05, "loss": 2.0734, "step": 12676500 }, { "epoch": 36.69, "learning_rate": 3.1659251690440905e-05, "loss": 2.0627, "step": 12677000 }, { "epoch": 36.7, "learning_rate": 3.165852804279363e-05, "loss": 2.0897, "step": 12677500 }, { "epoch": 36.7, "learning_rate": 3.165780584244164e-05, "loss": 2.0763, "step": 12678000 }, { "epoch": 36.7, "learning_rate": 3.1657082194794365e-05, "loss": 2.0686, "step": 12678500 }, { "epoch": 36.7, "learning_rate": 3.1656358547147094e-05, "loss": 2.0831, "step": 12679000 }, { "epoch": 36.7, "learning_rate": 3.165563634679511e-05, "loss": 2.0902, "step": 12679500 }, { "epoch": 36.7, "learning_rate": 3.165491269914783e-05, "loss": 2.0512, "step": 12680000 }, { "epoch": 36.7, "learning_rate": 3.1654189051500554e-05, "loss": 2.068, "step": 12680500 }, { "epoch": 36.71, "learning_rate": 3.1653465403853283e-05, "loss": 2.0822, "step": 12681000 }, { "epoch": 36.71, "learning_rate": 3.1652741756206006e-05, "loss": 2.0591, "step": 12681500 }, { "epoch": 36.71, "learning_rate": 3.165201810855873e-05, "loss": 2.0488, "step": 12682000 }, { "epoch": 36.71, "learning_rate": 3.165129446091145e-05, "loss": 2.0934, "step": 12682500 }, { "epoch": 36.71, "learning_rate": 3.165057081326417e-05, "loss": 2.0733, "step": 12683000 }, { "epoch": 36.71, "learning_rate": 3.1649848612912195e-05, "loss": 2.0722, "step": 12683500 }, { "epoch": 36.71, "learning_rate": 3.164912496526492e-05, "loss": 2.0554, "step": 12684000 }, { "epoch": 36.72, "learning_rate": 3.164840131761764e-05, "loss": 2.0537, "step": 12684500 }, { "epoch": 36.72, "learning_rate": 3.164767766997036e-05, "loss": 2.0735, "step": 12685000 }, { "epoch": 36.72, "learning_rate": 3.1646954022323084e-05, "loss": 2.0683, "step": 12685500 }, { "epoch": 36.72, "learning_rate": 3.1646230374675806e-05, "loss": 2.0938, "step": 12686000 }, { "epoch": 36.72, "learning_rate": 3.164550817432382e-05, "loss": 2.0483, "step": 12686500 }, { "epoch": 36.72, "learning_rate": 3.1644784526676544e-05, "loss": 2.0652, "step": 12687000 }, { "epoch": 36.73, "learning_rate": 3.1644060879029266e-05, "loss": 2.0851, "step": 12687500 }, { "epoch": 36.73, "learning_rate": 3.1643337231381995e-05, "loss": 2.0602, "step": 12688000 }, { "epoch": 36.73, "learning_rate": 3.164261647832531e-05, "loss": 2.0715, "step": 12688500 }, { "epoch": 36.73, "learning_rate": 3.164189283067803e-05, "loss": 2.0778, "step": 12689000 }, { "epoch": 36.73, "learning_rate": 3.164117063032605e-05, "loss": 2.0604, "step": 12689500 }, { "epoch": 36.73, "learning_rate": 3.164044698267877e-05, "loss": 2.055, "step": 12690000 }, { "epoch": 36.73, "learning_rate": 3.163972333503149e-05, "loss": 2.0502, "step": 12690500 }, { "epoch": 36.74, "learning_rate": 3.163899968738422e-05, "loss": 2.0592, "step": 12691000 }, { "epoch": 36.74, "learning_rate": 3.1638276039736944e-05, "loss": 2.0644, "step": 12691500 }, { "epoch": 36.74, "learning_rate": 3.1637552392089666e-05, "loss": 2.098, "step": 12692000 }, { "epoch": 36.74, "learning_rate": 3.163682874444239e-05, "loss": 2.0938, "step": 12692500 }, { "epoch": 36.74, "learning_rate": 3.163610509679511e-05, "loss": 2.0596, "step": 12693000 }, { "epoch": 36.74, "learning_rate": 3.1635382896443126e-05, "loss": 2.096, "step": 12693500 }, { "epoch": 36.74, "learning_rate": 3.163465924879585e-05, "loss": 2.0509, "step": 12694000 }, { "epoch": 36.75, "learning_rate": 3.163393560114857e-05, "loss": 2.0958, "step": 12694500 }, { "epoch": 36.75, "learning_rate": 3.163321195350129e-05, "loss": 2.0719, "step": 12695000 }, { "epoch": 36.75, "learning_rate": 3.163248830585402e-05, "loss": 2.0543, "step": 12695500 }, { "epoch": 36.75, "learning_rate": 3.1631766105502044e-05, "loss": 2.0775, "step": 12696000 }, { "epoch": 36.75, "learning_rate": 3.163104390515006e-05, "loss": 2.0577, "step": 12696500 }, { "epoch": 36.75, "learning_rate": 3.163032025750278e-05, "loss": 2.0538, "step": 12697000 }, { "epoch": 36.75, "learning_rate": 3.1629596609855504e-05, "loss": 2.0926, "step": 12697500 }, { "epoch": 36.76, "learning_rate": 3.1628872962208227e-05, "loss": 2.0751, "step": 12698000 }, { "epoch": 36.76, "learning_rate": 3.162815076185625e-05, "loss": 2.0637, "step": 12698500 }, { "epoch": 36.76, "learning_rate": 3.162742711420897e-05, "loss": 2.0587, "step": 12699000 }, { "epoch": 36.76, "learning_rate": 3.1626703466561693e-05, "loss": 2.0477, "step": 12699500 }, { "epoch": 36.76, "learning_rate": 3.1625979818914416e-05, "loss": 2.1098, "step": 12700000 }, { "epoch": 36.76, "learning_rate": 3.162525617126714e-05, "loss": 2.0774, "step": 12700500 }, { "epoch": 36.76, "learning_rate": 3.1624533970915153e-05, "loss": 2.0933, "step": 12701000 }, { "epoch": 36.77, "learning_rate": 3.1623810323267876e-05, "loss": 2.0576, "step": 12701500 }, { "epoch": 36.77, "learning_rate": 3.16230866756206e-05, "loss": 2.0706, "step": 12702000 }, { "epoch": 36.77, "learning_rate": 3.162236302797332e-05, "loss": 2.0675, "step": 12702500 }, { "epoch": 36.77, "learning_rate": 3.162163938032605e-05, "loss": 2.0766, "step": 12703000 }, { "epoch": 36.77, "learning_rate": 3.162091573267877e-05, "loss": 2.052, "step": 12703500 }, { "epoch": 36.77, "learning_rate": 3.1620193532326794e-05, "loss": 2.0706, "step": 12704000 }, { "epoch": 36.77, "learning_rate": 3.1619469884679516e-05, "loss": 2.0435, "step": 12704500 }, { "epoch": 36.78, "learning_rate": 3.161874623703224e-05, "loss": 2.088, "step": 12705000 }, { "epoch": 36.78, "learning_rate": 3.161802258938496e-05, "loss": 2.0638, "step": 12705500 }, { "epoch": 36.78, "learning_rate": 3.161729894173768e-05, "loss": 2.0671, "step": 12706000 }, { "epoch": 36.78, "learning_rate": 3.1616575294090405e-05, "loss": 2.0562, "step": 12706500 }, { "epoch": 36.78, "learning_rate": 3.161585164644313e-05, "loss": 2.0838, "step": 12707000 }, { "epoch": 36.78, "learning_rate": 3.161512799879585e-05, "loss": 2.0816, "step": 12707500 }, { "epoch": 36.78, "learning_rate": 3.161440435114857e-05, "loss": 2.0669, "step": 12708000 }, { "epoch": 36.79, "learning_rate": 3.1613680703501294e-05, "loss": 2.0566, "step": 12708500 }, { "epoch": 36.79, "learning_rate": 3.161295705585402e-05, "loss": 2.0692, "step": 12709000 }, { "epoch": 36.79, "learning_rate": 3.1612233408206745e-05, "loss": 2.048, "step": 12709500 }, { "epoch": 36.79, "learning_rate": 3.161151120785476e-05, "loss": 2.0985, "step": 12710000 }, { "epoch": 36.79, "learning_rate": 3.1610789007502776e-05, "loss": 2.0501, "step": 12710500 }, { "epoch": 36.79, "learning_rate": 3.1610065359855505e-05, "loss": 2.0688, "step": 12711000 }, { "epoch": 36.79, "learning_rate": 3.160934315950352e-05, "loss": 2.0719, "step": 12711500 }, { "epoch": 36.8, "learning_rate": 3.160861951185625e-05, "loss": 2.0785, "step": 12712000 }, { "epoch": 36.8, "learning_rate": 3.160789586420897e-05, "loss": 2.0934, "step": 12712500 }, { "epoch": 36.8, "learning_rate": 3.1607172216561694e-05, "loss": 2.0412, "step": 12713000 }, { "epoch": 36.8, "learning_rate": 3.1606448568914416e-05, "loss": 2.0508, "step": 12713500 }, { "epoch": 36.8, "learning_rate": 3.160572492126714e-05, "loss": 2.0788, "step": 12714000 }, { "epoch": 36.8, "learning_rate": 3.160500127361986e-05, "loss": 2.0706, "step": 12714500 }, { "epoch": 36.8, "learning_rate": 3.160427762597258e-05, "loss": 2.0825, "step": 12715000 }, { "epoch": 36.81, "learning_rate": 3.1603553978325305e-05, "loss": 2.0819, "step": 12715500 }, { "epoch": 36.81, "learning_rate": 3.160283033067803e-05, "loss": 2.0483, "step": 12716000 }, { "epoch": 36.81, "learning_rate": 3.160210668303075e-05, "loss": 2.0716, "step": 12716500 }, { "epoch": 36.81, "learning_rate": 3.160138303538347e-05, "loss": 2.0915, "step": 12717000 }, { "epoch": 36.81, "learning_rate": 3.16006593877362e-05, "loss": 2.0612, "step": 12717500 }, { "epoch": 36.81, "learning_rate": 3.159993718738422e-05, "loss": 2.0707, "step": 12718000 }, { "epoch": 36.81, "learning_rate": 3.1599213539736946e-05, "loss": 2.0501, "step": 12718500 }, { "epoch": 36.82, "learning_rate": 3.159848989208967e-05, "loss": 2.066, "step": 12719000 }, { "epoch": 36.82, "learning_rate": 3.159776624444239e-05, "loss": 2.0651, "step": 12719500 }, { "epoch": 36.82, "learning_rate": 3.159704259679511e-05, "loss": 2.0551, "step": 12720000 }, { "epoch": 36.82, "learning_rate": 3.1596318949147835e-05, "loss": 2.106, "step": 12720500 }, { "epoch": 36.82, "learning_rate": 3.159559530150056e-05, "loss": 2.0619, "step": 12721000 }, { "epoch": 36.82, "learning_rate": 3.159487310114857e-05, "loss": 2.1086, "step": 12721500 }, { "epoch": 36.82, "learning_rate": 3.1594150900796595e-05, "loss": 2.0686, "step": 12722000 }, { "epoch": 36.83, "learning_rate": 3.159342725314932e-05, "loss": 2.0675, "step": 12722500 }, { "epoch": 36.83, "learning_rate": 3.159270360550204e-05, "loss": 2.0759, "step": 12723000 }, { "epoch": 36.83, "learning_rate": 3.159197995785476e-05, "loss": 2.0939, "step": 12723500 }, { "epoch": 36.83, "learning_rate": 3.1591256310207484e-05, "loss": 2.0779, "step": 12724000 }, { "epoch": 36.83, "learning_rate": 3.1590532662560206e-05, "loss": 2.0706, "step": 12724500 }, { "epoch": 36.83, "learning_rate": 3.158980901491293e-05, "loss": 2.0687, "step": 12725000 }, { "epoch": 36.84, "learning_rate": 3.158908536726565e-05, "loss": 2.0627, "step": 12725500 }, { "epoch": 36.84, "learning_rate": 3.158836171961838e-05, "loss": 2.0562, "step": 12726000 }, { "epoch": 36.84, "learning_rate": 3.15876395192664e-05, "loss": 2.0708, "step": 12726500 }, { "epoch": 36.84, "learning_rate": 3.1586915871619124e-05, "loss": 2.08, "step": 12727000 }, { "epoch": 36.84, "learning_rate": 3.1586192223971846e-05, "loss": 2.0824, "step": 12727500 }, { "epoch": 36.84, "learning_rate": 3.158546857632457e-05, "loss": 2.0634, "step": 12728000 }, { "epoch": 36.84, "learning_rate": 3.1584746375972584e-05, "loss": 2.0767, "step": 12728500 }, { "epoch": 36.85, "learning_rate": 3.15840241756206e-05, "loss": 2.0821, "step": 12729000 }, { "epoch": 36.85, "learning_rate": 3.158330052797333e-05, "loss": 2.0752, "step": 12729500 }, { "epoch": 36.85, "learning_rate": 3.1582578327621344e-05, "loss": 2.0675, "step": 12730000 }, { "epoch": 36.85, "learning_rate": 3.1581854679974066e-05, "loss": 2.0592, "step": 12730500 }, { "epoch": 36.85, "learning_rate": 3.158113103232679e-05, "loss": 2.0781, "step": 12731000 }, { "epoch": 36.85, "learning_rate": 3.158040738467951e-05, "loss": 2.055, "step": 12731500 }, { "epoch": 36.85, "learning_rate": 3.157968373703223e-05, "loss": 2.0775, "step": 12732000 }, { "epoch": 36.86, "learning_rate": 3.1578960089384955e-05, "loss": 2.0791, "step": 12732500 }, { "epoch": 36.86, "learning_rate": 3.157823644173768e-05, "loss": 2.0744, "step": 12733000 }, { "epoch": 36.86, "learning_rate": 3.1577512794090406e-05, "loss": 2.0881, "step": 12733500 }, { "epoch": 36.86, "learning_rate": 3.157678914644313e-05, "loss": 2.0961, "step": 12734000 }, { "epoch": 36.86, "learning_rate": 3.157606549879585e-05, "loss": 2.0858, "step": 12734500 }, { "epoch": 36.86, "learning_rate": 3.157534185114858e-05, "loss": 2.0725, "step": 12735000 }, { "epoch": 36.86, "learning_rate": 3.15746182035013e-05, "loss": 2.073, "step": 12735500 }, { "epoch": 36.87, "learning_rate": 3.1573894555854024e-05, "loss": 2.0817, "step": 12736000 }, { "epoch": 36.87, "learning_rate": 3.157317090820675e-05, "loss": 2.0584, "step": 12736500 }, { "epoch": 36.87, "learning_rate": 3.157244726055947e-05, "loss": 2.0604, "step": 12737000 }, { "epoch": 36.87, "learning_rate": 3.1571725060207484e-05, "loss": 2.0869, "step": 12737500 }, { "epoch": 36.87, "learning_rate": 3.157100141256021e-05, "loss": 2.0552, "step": 12738000 }, { "epoch": 36.87, "learning_rate": 3.157027776491293e-05, "loss": 2.0627, "step": 12738500 }, { "epoch": 36.87, "learning_rate": 3.156955411726565e-05, "loss": 2.037, "step": 12739000 }, { "epoch": 36.88, "learning_rate": 3.156883046961838e-05, "loss": 2.0663, "step": 12739500 }, { "epoch": 36.88, "learning_rate": 3.1568108269266396e-05, "loss": 2.0965, "step": 12740000 }, { "epoch": 36.88, "learning_rate": 3.156738462161912e-05, "loss": 2.0379, "step": 12740500 }, { "epoch": 36.88, "learning_rate": 3.156666097397185e-05, "loss": 2.0607, "step": 12741000 }, { "epoch": 36.88, "learning_rate": 3.156593732632457e-05, "loss": 2.0599, "step": 12741500 }, { "epoch": 36.88, "learning_rate": 3.156521367867729e-05, "loss": 2.0616, "step": 12742000 }, { "epoch": 36.88, "learning_rate": 3.1564490031030014e-05, "loss": 2.0908, "step": 12742500 }, { "epoch": 36.89, "learning_rate": 3.1563766383382736e-05, "loss": 2.054, "step": 12743000 }, { "epoch": 36.89, "learning_rate": 3.156304273573546e-05, "loss": 2.0677, "step": 12743500 }, { "epoch": 36.89, "learning_rate": 3.156231908808818e-05, "loss": 2.0786, "step": 12744000 }, { "epoch": 36.89, "learning_rate": 3.15615954404409e-05, "loss": 2.1018, "step": 12744500 }, { "epoch": 36.89, "learning_rate": 3.1560871792793625e-05, "loss": 2.0509, "step": 12745000 }, { "epoch": 36.89, "learning_rate": 3.1560148145146354e-05, "loss": 2.0691, "step": 12745500 }, { "epoch": 36.89, "learning_rate": 3.1559424497499076e-05, "loss": 2.0662, "step": 12746000 }, { "epoch": 36.9, "learning_rate": 3.155870229714709e-05, "loss": 2.0704, "step": 12746500 }, { "epoch": 36.9, "learning_rate": 3.1557978649499814e-05, "loss": 2.0782, "step": 12747000 }, { "epoch": 36.9, "learning_rate": 3.1557255001852536e-05, "loss": 2.069, "step": 12747500 }, { "epoch": 36.9, "learning_rate": 3.1556531354205265e-05, "loss": 2.11, "step": 12748000 }, { "epoch": 36.9, "learning_rate": 3.155580915385328e-05, "loss": 2.0636, "step": 12748500 }, { "epoch": 36.9, "learning_rate": 3.1555085506206e-05, "loss": 2.0516, "step": 12749000 }, { "epoch": 36.9, "learning_rate": 3.155436185855873e-05, "loss": 2.0641, "step": 12749500 }, { "epoch": 36.91, "learning_rate": 3.1553638210911454e-05, "loss": 2.0642, "step": 12750000 }, { "epoch": 36.91, "learning_rate": 3.155291601055947e-05, "loss": 2.0694, "step": 12750500 }, { "epoch": 36.91, "learning_rate": 3.155219236291219e-05, "loss": 2.049, "step": 12751000 }, { "epoch": 36.91, "learning_rate": 3.1551468715264914e-05, "loss": 2.0668, "step": 12751500 }, { "epoch": 36.91, "learning_rate": 3.1550745067617636e-05, "loss": 2.0779, "step": 12752000 }, { "epoch": 36.91, "learning_rate": 3.155002141997036e-05, "loss": 2.0885, "step": 12752500 }, { "epoch": 36.91, "learning_rate": 3.154929921961838e-05, "loss": 2.0349, "step": 12753000 }, { "epoch": 36.92, "learning_rate": 3.15485755719711e-05, "loss": 2.0686, "step": 12753500 }, { "epoch": 36.92, "learning_rate": 3.154785337161912e-05, "loss": 2.0688, "step": 12754000 }, { "epoch": 36.92, "learning_rate": 3.1547131171267134e-05, "loss": 2.0775, "step": 12754500 }, { "epoch": 36.92, "learning_rate": 3.1546407523619857e-05, "loss": 2.0653, "step": 12755000 }, { "epoch": 36.92, "learning_rate": 3.154568387597258e-05, "loss": 2.0572, "step": 12755500 }, { "epoch": 36.92, "learning_rate": 3.154496022832531e-05, "loss": 2.0532, "step": 12756000 }, { "epoch": 36.92, "learning_rate": 3.154423658067803e-05, "loss": 2.0693, "step": 12756500 }, { "epoch": 36.93, "learning_rate": 3.154351293303075e-05, "loss": 2.0566, "step": 12757000 }, { "epoch": 36.93, "learning_rate": 3.154278928538348e-05, "loss": 2.068, "step": 12757500 }, { "epoch": 36.93, "learning_rate": 3.1542065637736203e-05, "loss": 2.0328, "step": 12758000 }, { "epoch": 36.93, "learning_rate": 3.1541341990088926e-05, "loss": 2.0955, "step": 12758500 }, { "epoch": 36.93, "learning_rate": 3.154061834244165e-05, "loss": 2.0796, "step": 12759000 }, { "epoch": 36.93, "learning_rate": 3.153989469479437e-05, "loss": 2.0867, "step": 12759500 }, { "epoch": 36.93, "learning_rate": 3.153917104714709e-05, "loss": 2.0888, "step": 12760000 }, { "epoch": 36.94, "learning_rate": 3.1538447399499815e-05, "loss": 2.0613, "step": 12760500 }, { "epoch": 36.94, "learning_rate": 3.153772375185254e-05, "loss": 2.0667, "step": 12761000 }, { "epoch": 36.94, "learning_rate": 3.153700010420526e-05, "loss": 2.0723, "step": 12761500 }, { "epoch": 36.94, "learning_rate": 3.153627645655798e-05, "loss": 2.0533, "step": 12762000 }, { "epoch": 36.94, "learning_rate": 3.1535552808910704e-05, "loss": 2.0645, "step": 12762500 }, { "epoch": 36.94, "learning_rate": 3.153482916126343e-05, "loss": 2.0566, "step": 12763000 }, { "epoch": 36.95, "learning_rate": 3.1534106960911455e-05, "loss": 2.0647, "step": 12763500 }, { "epoch": 36.95, "learning_rate": 3.1533386207854764e-05, "loss": 2.0874, "step": 12764000 }, { "epoch": 36.95, "learning_rate": 3.1532662560207486e-05, "loss": 2.0741, "step": 12764500 }, { "epoch": 36.95, "learning_rate": 3.153193891256021e-05, "loss": 2.0715, "step": 12765000 }, { "epoch": 36.95, "learning_rate": 3.153121526491293e-05, "loss": 2.0716, "step": 12765500 }, { "epoch": 36.95, "learning_rate": 3.153049161726566e-05, "loss": 2.0678, "step": 12766000 }, { "epoch": 36.95, "learning_rate": 3.152976796961838e-05, "loss": 2.0927, "step": 12766500 }, { "epoch": 36.96, "learning_rate": 3.1529044321971104e-05, "loss": 2.0689, "step": 12767000 }, { "epoch": 36.96, "learning_rate": 3.1528320674323826e-05, "loss": 2.0645, "step": 12767500 }, { "epoch": 36.96, "learning_rate": 3.1527599921267135e-05, "loss": 2.0771, "step": 12768000 }, { "epoch": 36.96, "learning_rate": 3.152687627361986e-05, "loss": 2.032, "step": 12768500 }, { "epoch": 36.96, "learning_rate": 3.152615262597258e-05, "loss": 2.0926, "step": 12769000 }, { "epoch": 36.96, "learning_rate": 3.152542897832531e-05, "loss": 2.0896, "step": 12769500 }, { "epoch": 36.96, "learning_rate": 3.152470533067803e-05, "loss": 2.0903, "step": 12770000 }, { "epoch": 36.97, "learning_rate": 3.152398168303075e-05, "loss": 2.0673, "step": 12770500 }, { "epoch": 36.97, "learning_rate": 3.1523259482678775e-05, "loss": 2.0557, "step": 12771000 }, { "epoch": 36.97, "learning_rate": 3.15225358350315e-05, "loss": 2.072, "step": 12771500 }, { "epoch": 36.97, "learning_rate": 3.152181363467951e-05, "loss": 2.0714, "step": 12772000 }, { "epoch": 36.97, "learning_rate": 3.1521089987032235e-05, "loss": 2.0792, "step": 12772500 }, { "epoch": 36.97, "learning_rate": 3.152036633938496e-05, "loss": 2.0703, "step": 12773000 }, { "epoch": 36.97, "learning_rate": 3.151964269173768e-05, "loss": 2.0501, "step": 12773500 }, { "epoch": 36.98, "learning_rate": 3.151891904409041e-05, "loss": 2.0818, "step": 12774000 }, { "epoch": 36.98, "learning_rate": 3.151819539644313e-05, "loss": 2.108, "step": 12774500 }, { "epoch": 36.98, "learning_rate": 3.151747319609115e-05, "loss": 2.0914, "step": 12775000 }, { "epoch": 36.98, "learning_rate": 3.151674954844387e-05, "loss": 2.0618, "step": 12775500 }, { "epoch": 36.98, "learning_rate": 3.151602590079659e-05, "loss": 2.0905, "step": 12776000 }, { "epoch": 36.98, "learning_rate": 3.151530225314931e-05, "loss": 2.0843, "step": 12776500 }, { "epoch": 36.98, "learning_rate": 3.1514578605502036e-05, "loss": 2.0828, "step": 12777000 }, { "epoch": 36.99, "learning_rate": 3.151385495785476e-05, "loss": 2.0532, "step": 12777500 }, { "epoch": 36.99, "learning_rate": 3.151313131020748e-05, "loss": 2.0684, "step": 12778000 }, { "epoch": 36.99, "learning_rate": 3.151240766256021e-05, "loss": 2.0754, "step": 12778500 }, { "epoch": 36.99, "learning_rate": 3.151168401491293e-05, "loss": 2.0595, "step": 12779000 }, { "epoch": 36.99, "learning_rate": 3.151096036726566e-05, "loss": 2.0687, "step": 12779500 }, { "epoch": 36.99, "learning_rate": 3.151023671961838e-05, "loss": 2.0687, "step": 12780000 }, { "epoch": 36.99, "learning_rate": 3.15095145192664e-05, "loss": 2.0732, "step": 12780500 }, { "epoch": 37.0, "learning_rate": 3.150879087161912e-05, "loss": 2.1001, "step": 12781000 }, { "epoch": 37.0, "learning_rate": 3.1508068671267136e-05, "loss": 2.0559, "step": 12781500 }, { "epoch": 37.0, "learning_rate": 3.150734502361986e-05, "loss": 2.0639, "step": 12782000 }, { "epoch": 37.0, "eval_accuracy": 0.6695824526693803, "eval_accuracy_mlm": 0.6348121816080246, "eval_accuracy_nsp": 0.8559096762957565, "eval_loss": 2.1710124015808105, "eval_runtime": 331.6656, "eval_samples_per_second": 1315.741, "eval_steps_per_second": 54.823, "step": 12782464 }, { "epoch": 37.0, "learning_rate": 3.150662137597258e-05, "loss": 2.0733, "step": 12782500 }, { "epoch": 37.0, "learning_rate": 3.150589772832531e-05, "loss": 2.0443, "step": 12783000 }, { "epoch": 37.0, "learning_rate": 3.150517408067803e-05, "loss": 2.0477, "step": 12783500 }, { "epoch": 37.0, "learning_rate": 3.1504450433030754e-05, "loss": 2.0526, "step": 12784000 }, { "epoch": 37.01, "learning_rate": 3.1503726785383476e-05, "loss": 2.0618, "step": 12784500 }, { "epoch": 37.01, "learning_rate": 3.15030031377362e-05, "loss": 2.0671, "step": 12785000 }, { "epoch": 37.01, "learning_rate": 3.150227949008892e-05, "loss": 2.0591, "step": 12785500 }, { "epoch": 37.01, "learning_rate": 3.150155584244165e-05, "loss": 2.0296, "step": 12786000 }, { "epoch": 37.01, "learning_rate": 3.150083219479437e-05, "loss": 2.075, "step": 12786500 }, { "epoch": 37.01, "learning_rate": 3.1500108547147094e-05, "loss": 2.0501, "step": 12787000 }, { "epoch": 37.01, "learning_rate": 3.1499384899499816e-05, "loss": 2.0698, "step": 12787500 }, { "epoch": 37.02, "learning_rate": 3.149866125185254e-05, "loss": 2.057, "step": 12788000 }, { "epoch": 37.02, "learning_rate": 3.149793905150056e-05, "loss": 2.0676, "step": 12788500 }, { "epoch": 37.02, "learning_rate": 3.149721540385328e-05, "loss": 2.0529, "step": 12789000 }, { "epoch": 37.02, "learning_rate": 3.1496491756206005e-05, "loss": 2.0617, "step": 12789500 }, { "epoch": 37.02, "learning_rate": 3.149576810855873e-05, "loss": 2.0428, "step": 12790000 }, { "epoch": 37.02, "learning_rate": 3.149504590820674e-05, "loss": 2.0492, "step": 12790500 }, { "epoch": 37.02, "learning_rate": 3.1494322260559465e-05, "loss": 2.0331, "step": 12791000 }, { "epoch": 37.03, "learning_rate": 3.149360006020749e-05, "loss": 2.0371, "step": 12791500 }, { "epoch": 37.03, "learning_rate": 3.149287641256021e-05, "loss": 2.0314, "step": 12792000 }, { "epoch": 37.03, "learning_rate": 3.149215276491293e-05, "loss": 2.0745, "step": 12792500 }, { "epoch": 37.03, "learning_rate": 3.1491429117265654e-05, "loss": 2.0652, "step": 12793000 }, { "epoch": 37.03, "learning_rate": 3.149070546961838e-05, "loss": 2.0504, "step": 12793500 }, { "epoch": 37.03, "learning_rate": 3.14899832692664e-05, "loss": 2.0866, "step": 12794000 }, { "epoch": 37.03, "learning_rate": 3.148925962161912e-05, "loss": 2.0443, "step": 12794500 }, { "epoch": 37.04, "learning_rate": 3.148853597397184e-05, "loss": 2.0399, "step": 12795000 }, { "epoch": 37.04, "learning_rate": 3.1487812326324566e-05, "loss": 2.0583, "step": 12795500 }, { "epoch": 37.04, "learning_rate": 3.148708867867729e-05, "loss": 2.0441, "step": 12796000 }, { "epoch": 37.04, "learning_rate": 3.148636503103001e-05, "loss": 2.061, "step": 12796500 }, { "epoch": 37.04, "learning_rate": 3.148564138338274e-05, "loss": 2.0219, "step": 12797000 }, { "epoch": 37.04, "learning_rate": 3.1484919183030755e-05, "loss": 2.0553, "step": 12797500 }, { "epoch": 37.04, "learning_rate": 3.148419553538348e-05, "loss": 2.0748, "step": 12798000 }, { "epoch": 37.05, "learning_rate": 3.14834718877362e-05, "loss": 2.0494, "step": 12798500 }, { "epoch": 37.05, "learning_rate": 3.148274824008892e-05, "loss": 2.0528, "step": 12799000 }, { "epoch": 37.05, "learning_rate": 3.1482024592441644e-05, "loss": 2.0329, "step": 12799500 }, { "epoch": 37.05, "learning_rate": 3.1481300944794366e-05, "loss": 2.0571, "step": 12800000 }, { "epoch": 37.05, "learning_rate": 3.148057729714709e-05, "loss": 2.0598, "step": 12800500 }, { "epoch": 37.05, "learning_rate": 3.147985364949982e-05, "loss": 2.0591, "step": 12801000 }, { "epoch": 37.06, "learning_rate": 3.147913000185254e-05, "loss": 2.0435, "step": 12801500 }, { "epoch": 37.06, "learning_rate": 3.147840780150056e-05, "loss": 2.0701, "step": 12802000 }, { "epoch": 37.06, "learning_rate": 3.1477684153853284e-05, "loss": 2.0745, "step": 12802500 }, { "epoch": 37.06, "learning_rate": 3.1476960506206006e-05, "loss": 2.0636, "step": 12803000 }, { "epoch": 37.06, "learning_rate": 3.147623685855873e-05, "loss": 2.0603, "step": 12803500 }, { "epoch": 37.06, "learning_rate": 3.147551321091145e-05, "loss": 2.0392, "step": 12804000 }, { "epoch": 37.06, "learning_rate": 3.1474791010559466e-05, "loss": 2.0594, "step": 12804500 }, { "epoch": 37.07, "learning_rate": 3.147406736291219e-05, "loss": 2.0611, "step": 12805000 }, { "epoch": 37.07, "learning_rate": 3.147334371526491e-05, "loss": 2.0382, "step": 12805500 }, { "epoch": 37.07, "learning_rate": 3.147262006761764e-05, "loss": 2.027, "step": 12806000 }, { "epoch": 37.07, "learning_rate": 3.1471897867265655e-05, "loss": 2.0509, "step": 12806500 }, { "epoch": 37.07, "learning_rate": 3.147117421961838e-05, "loss": 2.0347, "step": 12807000 }, { "epoch": 37.07, "learning_rate": 3.147045201926639e-05, "loss": 2.05, "step": 12807500 }, { "epoch": 37.07, "learning_rate": 3.1469728371619115e-05, "loss": 2.0598, "step": 12808000 }, { "epoch": 37.08, "learning_rate": 3.1469004723971844e-05, "loss": 2.056, "step": 12808500 }, { "epoch": 37.08, "learning_rate": 3.146828252361986e-05, "loss": 2.0344, "step": 12809000 }, { "epoch": 37.08, "learning_rate": 3.146755887597259e-05, "loss": 2.0352, "step": 12809500 }, { "epoch": 37.08, "learning_rate": 3.146683522832531e-05, "loss": 2.0678, "step": 12810000 }, { "epoch": 37.08, "learning_rate": 3.146611158067803e-05, "loss": 2.0444, "step": 12810500 }, { "epoch": 37.08, "learning_rate": 3.1465387933030755e-05, "loss": 2.0615, "step": 12811000 }, { "epoch": 37.08, "learning_rate": 3.146466428538348e-05, "loss": 2.0288, "step": 12811500 }, { "epoch": 37.09, "learning_rate": 3.14639406377362e-05, "loss": 2.0388, "step": 12812000 }, { "epoch": 37.09, "learning_rate": 3.146321699008892e-05, "loss": 2.0561, "step": 12812500 }, { "epoch": 37.09, "learning_rate": 3.1462493342441644e-05, "loss": 2.0349, "step": 12813000 }, { "epoch": 37.09, "learning_rate": 3.1461769694794367e-05, "loss": 2.0255, "step": 12813500 }, { "epoch": 37.09, "learning_rate": 3.146104604714709e-05, "loss": 2.0367, "step": 12814000 }, { "epoch": 37.09, "learning_rate": 3.146032239949981e-05, "loss": 2.0356, "step": 12814500 }, { "epoch": 37.09, "learning_rate": 3.145960019914783e-05, "loss": 2.0546, "step": 12815000 }, { "epoch": 37.1, "learning_rate": 3.1458876551500556e-05, "loss": 2.068, "step": 12815500 }, { "epoch": 37.1, "learning_rate": 3.1458152903853285e-05, "loss": 2.0404, "step": 12816000 }, { "epoch": 37.1, "learning_rate": 3.145742925620601e-05, "loss": 2.0273, "step": 12816500 }, { "epoch": 37.1, "learning_rate": 3.145670560855873e-05, "loss": 2.0524, "step": 12817000 }, { "epoch": 37.1, "learning_rate": 3.1455983408206745e-05, "loss": 2.0588, "step": 12817500 }, { "epoch": 37.1, "learning_rate": 3.145525976055947e-05, "loss": 2.0481, "step": 12818000 }, { "epoch": 37.1, "learning_rate": 3.145453611291219e-05, "loss": 2.0685, "step": 12818500 }, { "epoch": 37.11, "learning_rate": 3.145381246526491e-05, "loss": 2.0495, "step": 12819000 }, { "epoch": 37.11, "learning_rate": 3.145308881761764e-05, "loss": 2.0588, "step": 12819500 }, { "epoch": 37.11, "learning_rate": 3.145236516997036e-05, "loss": 2.0462, "step": 12820000 }, { "epoch": 37.11, "learning_rate": 3.1451641522323085e-05, "loss": 2.0389, "step": 12820500 }, { "epoch": 37.11, "learning_rate": 3.145091787467581e-05, "loss": 2.0441, "step": 12821000 }, { "epoch": 37.11, "learning_rate": 3.145019422702853e-05, "loss": 2.036, "step": 12821500 }, { "epoch": 37.11, "learning_rate": 3.1449472026676545e-05, "loss": 2.0632, "step": 12822000 }, { "epoch": 37.12, "learning_rate": 3.144874837902927e-05, "loss": 2.0653, "step": 12822500 }, { "epoch": 37.12, "learning_rate": 3.1448024731381996e-05, "loss": 2.044, "step": 12823000 }, { "epoch": 37.12, "learning_rate": 3.144730108373472e-05, "loss": 2.049, "step": 12823500 }, { "epoch": 37.12, "learning_rate": 3.144657743608744e-05, "loss": 2.0516, "step": 12824000 }, { "epoch": 37.12, "learning_rate": 3.144585378844016e-05, "loss": 2.0379, "step": 12824500 }, { "epoch": 37.12, "learning_rate": 3.144513014079289e-05, "loss": 2.0465, "step": 12825000 }, { "epoch": 37.12, "learning_rate": 3.1444406493145614e-05, "loss": 2.0307, "step": 12825500 }, { "epoch": 37.13, "learning_rate": 3.1443682845498336e-05, "loss": 2.0547, "step": 12826000 }, { "epoch": 37.13, "learning_rate": 3.1442962092441645e-05, "loss": 2.0525, "step": 12826500 }, { "epoch": 37.13, "learning_rate": 3.144223844479437e-05, "loss": 2.0422, "step": 12827000 }, { "epoch": 37.13, "learning_rate": 3.144151479714709e-05, "loss": 2.0376, "step": 12827500 }, { "epoch": 37.13, "learning_rate": 3.144079259679511e-05, "loss": 2.0835, "step": 12828000 }, { "epoch": 37.13, "learning_rate": 3.1440068949147834e-05, "loss": 2.0259, "step": 12828500 }, { "epoch": 37.13, "learning_rate": 3.143934674879585e-05, "loss": 2.0629, "step": 12829000 }, { "epoch": 37.14, "learning_rate": 3.143862310114857e-05, "loss": 2.053, "step": 12829500 }, { "epoch": 37.14, "learning_rate": 3.143790090079659e-05, "loss": 2.0567, "step": 12830000 }, { "epoch": 37.14, "learning_rate": 3.1437177253149317e-05, "loss": 2.0595, "step": 12830500 }, { "epoch": 37.14, "learning_rate": 3.143645360550204e-05, "loss": 2.0538, "step": 12831000 }, { "epoch": 37.14, "learning_rate": 3.143572995785477e-05, "loss": 2.0499, "step": 12831500 }, { "epoch": 37.14, "learning_rate": 3.143500631020749e-05, "loss": 2.0701, "step": 12832000 }, { "epoch": 37.14, "learning_rate": 3.143428266256021e-05, "loss": 2.0489, "step": 12832500 }, { "epoch": 37.15, "learning_rate": 3.1433559014912934e-05, "loss": 2.0404, "step": 12833000 }, { "epoch": 37.15, "learning_rate": 3.143283536726566e-05, "loss": 2.0546, "step": 12833500 }, { "epoch": 37.15, "learning_rate": 3.143211171961838e-05, "loss": 2.0572, "step": 12834000 }, { "epoch": 37.15, "learning_rate": 3.14313880719711e-05, "loss": 2.0383, "step": 12834500 }, { "epoch": 37.15, "learning_rate": 3.1430664424323823e-05, "loss": 2.0599, "step": 12835000 }, { "epoch": 37.15, "learning_rate": 3.1429940776676546e-05, "loss": 2.0561, "step": 12835500 }, { "epoch": 37.15, "learning_rate": 3.142921712902927e-05, "loss": 2.0625, "step": 12836000 }, { "epoch": 37.16, "learning_rate": 3.142849348138199e-05, "loss": 2.0657, "step": 12836500 }, { "epoch": 37.16, "learning_rate": 3.142776983373472e-05, "loss": 2.0629, "step": 12837000 }, { "epoch": 37.16, "learning_rate": 3.142704618608744e-05, "loss": 2.0451, "step": 12837500 }, { "epoch": 37.16, "learning_rate": 3.142632253844017e-05, "loss": 2.0775, "step": 12838000 }, { "epoch": 37.16, "learning_rate": 3.1425600338088186e-05, "loss": 2.0539, "step": 12838500 }, { "epoch": 37.16, "learning_rate": 3.142487669044091e-05, "loss": 2.0889, "step": 12839000 }, { "epoch": 37.17, "learning_rate": 3.142415304279363e-05, "loss": 2.0352, "step": 12839500 }, { "epoch": 37.17, "learning_rate": 3.142342939514635e-05, "loss": 2.0453, "step": 12840000 }, { "epoch": 37.17, "learning_rate": 3.1422705747499075e-05, "loss": 2.061, "step": 12840500 }, { "epoch": 37.17, "learning_rate": 3.14219820998518e-05, "loss": 2.0602, "step": 12841000 }, { "epoch": 37.17, "learning_rate": 3.142125845220452e-05, "loss": 2.0656, "step": 12841500 }, { "epoch": 37.17, "learning_rate": 3.142053480455724e-05, "loss": 2.0397, "step": 12842000 }, { "epoch": 37.17, "learning_rate": 3.1419812604205264e-05, "loss": 2.0388, "step": 12842500 }, { "epoch": 37.18, "learning_rate": 3.141909040385328e-05, "loss": 2.0363, "step": 12843000 }, { "epoch": 37.18, "learning_rate": 3.1418368203501295e-05, "loss": 2.054, "step": 12843500 }, { "epoch": 37.18, "learning_rate": 3.141764600314932e-05, "loss": 2.0775, "step": 12844000 }, { "epoch": 37.18, "learning_rate": 3.141692235550204e-05, "loss": 2.0386, "step": 12844500 }, { "epoch": 37.18, "learning_rate": 3.141619870785476e-05, "loss": 2.0678, "step": 12845000 }, { "epoch": 37.18, "learning_rate": 3.1415475060207484e-05, "loss": 2.041, "step": 12845500 }, { "epoch": 37.18, "learning_rate": 3.141475141256021e-05, "loss": 2.0508, "step": 12846000 }, { "epoch": 37.19, "learning_rate": 3.1414027764912935e-05, "loss": 2.0469, "step": 12846500 }, { "epoch": 37.19, "learning_rate": 3.141330411726566e-05, "loss": 2.0586, "step": 12847000 }, { "epoch": 37.19, "learning_rate": 3.141258046961838e-05, "loss": 2.0609, "step": 12847500 }, { "epoch": 37.19, "learning_rate": 3.14118568219711e-05, "loss": 2.0562, "step": 12848000 }, { "epoch": 37.19, "learning_rate": 3.1411133174323824e-05, "loss": 2.0672, "step": 12848500 }, { "epoch": 37.19, "learning_rate": 3.1410409526676546e-05, "loss": 2.0222, "step": 12849000 }, { "epoch": 37.19, "learning_rate": 3.140968587902927e-05, "loss": 2.0456, "step": 12849500 }, { "epoch": 37.2, "learning_rate": 3.1408965125972584e-05, "loss": 2.0156, "step": 12850000 }, { "epoch": 37.2, "learning_rate": 3.1408241478325307e-05, "loss": 2.0696, "step": 12850500 }, { "epoch": 37.2, "learning_rate": 3.140751783067803e-05, "loss": 2.0446, "step": 12851000 }, { "epoch": 37.2, "learning_rate": 3.140679418303075e-05, "loss": 2.0562, "step": 12851500 }, { "epoch": 37.2, "learning_rate": 3.140607053538347e-05, "loss": 2.0444, "step": 12852000 }, { "epoch": 37.2, "learning_rate": 3.1405346887736195e-05, "loss": 2.0644, "step": 12852500 }, { "epoch": 37.2, "learning_rate": 3.140462324008892e-05, "loss": 2.0397, "step": 12853000 }, { "epoch": 37.21, "learning_rate": 3.140389959244165e-05, "loss": 2.0498, "step": 12853500 }, { "epoch": 37.21, "learning_rate": 3.140317594479437e-05, "loss": 2.0677, "step": 12854000 }, { "epoch": 37.21, "learning_rate": 3.14024522971471e-05, "loss": 2.0474, "step": 12854500 }, { "epoch": 37.21, "learning_rate": 3.1401730096795114e-05, "loss": 2.0337, "step": 12855000 }, { "epoch": 37.21, "learning_rate": 3.1401006449147836e-05, "loss": 2.0331, "step": 12855500 }, { "epoch": 37.21, "learning_rate": 3.140028424879585e-05, "loss": 2.0735, "step": 12856000 }, { "epoch": 37.21, "learning_rate": 3.139956204844387e-05, "loss": 2.0488, "step": 12856500 }, { "epoch": 37.22, "learning_rate": 3.1398838400796596e-05, "loss": 2.0594, "step": 12857000 }, { "epoch": 37.22, "learning_rate": 3.139811475314932e-05, "loss": 2.0403, "step": 12857500 }, { "epoch": 37.22, "learning_rate": 3.139739110550204e-05, "loss": 2.053, "step": 12858000 }, { "epoch": 37.22, "learning_rate": 3.139666745785476e-05, "loss": 2.0542, "step": 12858500 }, { "epoch": 37.22, "learning_rate": 3.1395943810207485e-05, "loss": 2.0581, "step": 12859000 }, { "epoch": 37.22, "learning_rate": 3.13952216098555e-05, "loss": 2.027, "step": 12859500 }, { "epoch": 37.22, "learning_rate": 3.139449796220822e-05, "loss": 2.0397, "step": 12860000 }, { "epoch": 37.23, "learning_rate": 3.1393775761856245e-05, "loss": 2.0382, "step": 12860500 }, { "epoch": 37.23, "learning_rate": 3.139305211420897e-05, "loss": 2.0236, "step": 12861000 }, { "epoch": 37.23, "learning_rate": 3.1392328466561696e-05, "loss": 2.0529, "step": 12861500 }, { "epoch": 37.23, "learning_rate": 3.139160481891442e-05, "loss": 2.0593, "step": 12862000 }, { "epoch": 37.23, "learning_rate": 3.139088117126714e-05, "loss": 2.0597, "step": 12862500 }, { "epoch": 37.23, "learning_rate": 3.139015752361986e-05, "loss": 2.0431, "step": 12863000 }, { "epoch": 37.23, "learning_rate": 3.1389433875972585e-05, "loss": 2.0572, "step": 12863500 }, { "epoch": 37.24, "learning_rate": 3.138871022832531e-05, "loss": 2.0598, "step": 12864000 }, { "epoch": 37.24, "learning_rate": 3.138798658067803e-05, "loss": 2.0615, "step": 12864500 }, { "epoch": 37.24, "learning_rate": 3.138726293303075e-05, "loss": 2.0538, "step": 12865000 }, { "epoch": 37.24, "learning_rate": 3.1386539285383474e-05, "loss": 2.0478, "step": 12865500 }, { "epoch": 37.24, "learning_rate": 3.1385815637736196e-05, "loss": 2.0511, "step": 12866000 }, { "epoch": 37.24, "learning_rate": 3.138509199008892e-05, "loss": 2.0737, "step": 12866500 }, { "epoch": 37.24, "learning_rate": 3.138436978973694e-05, "loss": 2.0632, "step": 12867000 }, { "epoch": 37.25, "learning_rate": 3.138364614208966e-05, "loss": 2.0562, "step": 12867500 }, { "epoch": 37.25, "learning_rate": 3.1382922494442385e-05, "loss": 2.0515, "step": 12868000 }, { "epoch": 37.25, "learning_rate": 3.1382198846795114e-05, "loss": 2.0701, "step": 12868500 }, { "epoch": 37.25, "learning_rate": 3.1381475199147837e-05, "loss": 2.0683, "step": 12869000 }, { "epoch": 37.25, "learning_rate": 3.138075299879585e-05, "loss": 2.0691, "step": 12869500 }, { "epoch": 37.25, "learning_rate": 3.1380029351148574e-05, "loss": 2.0651, "step": 12870000 }, { "epoch": 37.25, "learning_rate": 3.13793071507966e-05, "loss": 2.0578, "step": 12870500 }, { "epoch": 37.26, "learning_rate": 3.137858350314932e-05, "loss": 2.0752, "step": 12871000 }, { "epoch": 37.26, "learning_rate": 3.137785985550204e-05, "loss": 2.0624, "step": 12871500 }, { "epoch": 37.26, "learning_rate": 3.137713620785476e-05, "loss": 2.0585, "step": 12872000 }, { "epoch": 37.26, "learning_rate": 3.1376412560207486e-05, "loss": 2.0778, "step": 12872500 }, { "epoch": 37.26, "learning_rate": 3.137568891256021e-05, "loss": 2.0747, "step": 12873000 }, { "epoch": 37.26, "learning_rate": 3.137496526491293e-05, "loss": 2.0462, "step": 12873500 }, { "epoch": 37.26, "learning_rate": 3.1374243064560946e-05, "loss": 2.0707, "step": 12874000 }, { "epoch": 37.27, "learning_rate": 3.1373519416913675e-05, "loss": 2.0582, "step": 12874500 }, { "epoch": 37.27, "learning_rate": 3.13727957692664e-05, "loss": 2.0685, "step": 12875000 }, { "epoch": 37.27, "learning_rate": 3.137207212161912e-05, "loss": 2.0375, "step": 12875500 }, { "epoch": 37.27, "learning_rate": 3.137134847397185e-05, "loss": 2.0823, "step": 12876000 }, { "epoch": 37.27, "learning_rate": 3.137062482632457e-05, "loss": 2.0763, "step": 12876500 }, { "epoch": 37.27, "learning_rate": 3.136990117867729e-05, "loss": 2.0604, "step": 12877000 }, { "epoch": 37.28, "learning_rate": 3.1369177531030015e-05, "loss": 2.061, "step": 12877500 }, { "epoch": 37.28, "learning_rate": 3.136845388338274e-05, "loss": 2.0807, "step": 12878000 }, { "epoch": 37.28, "learning_rate": 3.136773023573546e-05, "loss": 2.0394, "step": 12878500 }, { "epoch": 37.28, "learning_rate": 3.136700658808818e-05, "loss": 2.0419, "step": 12879000 }, { "epoch": 37.28, "learning_rate": 3.1366282940440904e-05, "loss": 2.045, "step": 12879500 }, { "epoch": 37.28, "learning_rate": 3.1365559292793626e-05, "loss": 2.0743, "step": 12880000 }, { "epoch": 37.28, "learning_rate": 3.136483564514635e-05, "loss": 2.0666, "step": 12880500 }, { "epoch": 37.29, "learning_rate": 3.136411199749907e-05, "loss": 2.0324, "step": 12881000 }, { "epoch": 37.29, "learning_rate": 3.136338979714709e-05, "loss": 2.0815, "step": 12881500 }, { "epoch": 37.29, "learning_rate": 3.1362666149499815e-05, "loss": 2.0679, "step": 12882000 }, { "epoch": 37.29, "learning_rate": 3.136194394914783e-05, "loss": 2.0357, "step": 12882500 }, { "epoch": 37.29, "learning_rate": 3.136122030150055e-05, "loss": 2.0576, "step": 12883000 }, { "epoch": 37.29, "learning_rate": 3.136049665385328e-05, "loss": 2.0622, "step": 12883500 }, { "epoch": 37.29, "learning_rate": 3.1359773006206004e-05, "loss": 2.0647, "step": 12884000 }, { "epoch": 37.3, "learning_rate": 3.1359049358558726e-05, "loss": 2.0793, "step": 12884500 }, { "epoch": 37.3, "learning_rate": 3.135832715820675e-05, "loss": 2.0534, "step": 12885000 }, { "epoch": 37.3, "learning_rate": 3.135760351055947e-05, "loss": 2.0466, "step": 12885500 }, { "epoch": 37.3, "learning_rate": 3.135687986291219e-05, "loss": 2.0615, "step": 12886000 }, { "epoch": 37.3, "learning_rate": 3.1356156215264915e-05, "loss": 2.0571, "step": 12886500 }, { "epoch": 37.3, "learning_rate": 3.135543256761764e-05, "loss": 2.0773, "step": 12887000 }, { "epoch": 37.3, "learning_rate": 3.135470891997036e-05, "loss": 2.0507, "step": 12887500 }, { "epoch": 37.31, "learning_rate": 3.135398527232308e-05, "loss": 2.0288, "step": 12888000 }, { "epoch": 37.31, "learning_rate": 3.1353261624675804e-05, "loss": 2.0455, "step": 12888500 }, { "epoch": 37.31, "learning_rate": 3.1352537977028526e-05, "loss": 2.0661, "step": 12889000 }, { "epoch": 37.31, "learning_rate": 3.135181432938125e-05, "loss": 2.0501, "step": 12889500 }, { "epoch": 37.31, "learning_rate": 3.135109068173398e-05, "loss": 2.0811, "step": 12890000 }, { "epoch": 37.31, "learning_rate": 3.13503670340867e-05, "loss": 2.045, "step": 12890500 }, { "epoch": 37.31, "learning_rate": 3.134964483373472e-05, "loss": 2.0582, "step": 12891000 }, { "epoch": 37.32, "learning_rate": 3.1348921186087445e-05, "loss": 2.0642, "step": 12891500 }, { "epoch": 37.32, "learning_rate": 3.134819753844017e-05, "loss": 2.061, "step": 12892000 }, { "epoch": 37.32, "learning_rate": 3.134747389079289e-05, "loss": 2.0654, "step": 12892500 }, { "epoch": 37.32, "learning_rate": 3.134675024314561e-05, "loss": 2.0475, "step": 12893000 }, { "epoch": 37.32, "learning_rate": 3.1346026595498333e-05, "loss": 2.0303, "step": 12893500 }, { "epoch": 37.32, "learning_rate": 3.134530439514635e-05, "loss": 2.0547, "step": 12894000 }, { "epoch": 37.32, "learning_rate": 3.134458074749908e-05, "loss": 2.0492, "step": 12894500 }, { "epoch": 37.33, "learning_rate": 3.13438570998518e-05, "loss": 2.0527, "step": 12895000 }, { "epoch": 37.33, "learning_rate": 3.1343134899499816e-05, "loss": 2.0729, "step": 12895500 }, { "epoch": 37.33, "learning_rate": 3.134241125185254e-05, "loss": 2.0566, "step": 12896000 }, { "epoch": 37.33, "learning_rate": 3.1341689051500554e-05, "loss": 2.0602, "step": 12896500 }, { "epoch": 37.33, "learning_rate": 3.1340965403853276e-05, "loss": 2.0541, "step": 12897000 }, { "epoch": 37.33, "learning_rate": 3.1340241756206e-05, "loss": 2.0517, "step": 12897500 }, { "epoch": 37.33, "learning_rate": 3.133951810855873e-05, "loss": 2.0539, "step": 12898000 }, { "epoch": 37.34, "learning_rate": 3.133879446091145e-05, "loss": 2.0384, "step": 12898500 }, { "epoch": 37.34, "learning_rate": 3.133807081326418e-05, "loss": 2.0552, "step": 12899000 }, { "epoch": 37.34, "learning_rate": 3.13373471656169e-05, "loss": 2.062, "step": 12899500 }, { "epoch": 37.34, "learning_rate": 3.133662351796962e-05, "loss": 2.0334, "step": 12900000 }, { "epoch": 37.34, "learning_rate": 3.1335899870322345e-05, "loss": 2.0572, "step": 12900500 }, { "epoch": 37.34, "learning_rate": 3.133517622267507e-05, "loss": 2.0613, "step": 12901000 }, { "epoch": 37.34, "learning_rate": 3.133445402232308e-05, "loss": 2.0381, "step": 12901500 }, { "epoch": 37.35, "learning_rate": 3.1333730374675805e-05, "loss": 2.0464, "step": 12902000 }, { "epoch": 37.35, "learning_rate": 3.133300672702853e-05, "loss": 2.0224, "step": 12902500 }, { "epoch": 37.35, "learning_rate": 3.133228307938125e-05, "loss": 2.029, "step": 12903000 }, { "epoch": 37.35, "learning_rate": 3.133155943173398e-05, "loss": 2.05, "step": 12903500 }, { "epoch": 37.35, "learning_rate": 3.1330837231381994e-05, "loss": 2.0893, "step": 12904000 }, { "epoch": 37.35, "learning_rate": 3.1330113583734716e-05, "loss": 2.0721, "step": 12904500 }, { "epoch": 37.35, "learning_rate": 3.132939138338273e-05, "loss": 2.0575, "step": 12905000 }, { "epoch": 37.36, "learning_rate": 3.1328667735735454e-05, "loss": 2.0882, "step": 12905500 }, { "epoch": 37.36, "learning_rate": 3.132794408808818e-05, "loss": 2.0648, "step": 12906000 }, { "epoch": 37.36, "learning_rate": 3.1327220440440905e-05, "loss": 2.0655, "step": 12906500 }, { "epoch": 37.36, "learning_rate": 3.132649824008893e-05, "loss": 2.0813, "step": 12907000 }, { "epoch": 37.36, "learning_rate": 3.132577459244165e-05, "loss": 2.0641, "step": 12907500 }, { "epoch": 37.36, "learning_rate": 3.132505094479437e-05, "loss": 2.0575, "step": 12908000 }, { "epoch": 37.36, "learning_rate": 3.1324327297147094e-05, "loss": 2.0594, "step": 12908500 }, { "epoch": 37.37, "learning_rate": 3.1323603649499817e-05, "loss": 2.0893, "step": 12909000 }, { "epoch": 37.37, "learning_rate": 3.132288000185254e-05, "loss": 2.0487, "step": 12909500 }, { "epoch": 37.37, "learning_rate": 3.132215635420526e-05, "loss": 2.0791, "step": 12910000 }, { "epoch": 37.37, "learning_rate": 3.132143270655798e-05, "loss": 2.0554, "step": 12910500 }, { "epoch": 37.37, "learning_rate": 3.1320709058910706e-05, "loss": 2.0522, "step": 12911000 }, { "epoch": 37.37, "learning_rate": 3.131998541126343e-05, "loss": 2.0863, "step": 12911500 }, { "epoch": 37.37, "learning_rate": 3.131926176361615e-05, "loss": 2.0742, "step": 12912000 }, { "epoch": 37.38, "learning_rate": 3.131853811596888e-05, "loss": 2.0438, "step": 12912500 }, { "epoch": 37.38, "learning_rate": 3.131781736291219e-05, "loss": 2.0639, "step": 12913000 }, { "epoch": 37.38, "learning_rate": 3.131709371526492e-05, "loss": 2.0705, "step": 12913500 }, { "epoch": 37.38, "learning_rate": 3.131637006761764e-05, "loss": 2.053, "step": 12914000 }, { "epoch": 37.38, "learning_rate": 3.131564641997036e-05, "loss": 2.0696, "step": 12914500 }, { "epoch": 37.38, "learning_rate": 3.1314922772323084e-05, "loss": 2.0758, "step": 12915000 }, { "epoch": 37.39, "learning_rate": 3.1314199124675806e-05, "loss": 2.0698, "step": 12915500 }, { "epoch": 37.39, "learning_rate": 3.131347547702853e-05, "loss": 2.0601, "step": 12916000 }, { "epoch": 37.39, "learning_rate": 3.131275182938126e-05, "loss": 2.0536, "step": 12916500 }, { "epoch": 37.39, "learning_rate": 3.131202818173398e-05, "loss": 2.0587, "step": 12917000 }, { "epoch": 37.39, "learning_rate": 3.13113045340867e-05, "loss": 2.0739, "step": 12917500 }, { "epoch": 37.39, "learning_rate": 3.131058233373472e-05, "loss": 2.0908, "step": 12918000 }, { "epoch": 37.39, "learning_rate": 3.130986013338273e-05, "loss": 2.0574, "step": 12918500 }, { "epoch": 37.4, "learning_rate": 3.1309136485735455e-05, "loss": 2.077, "step": 12919000 }, { "epoch": 37.4, "learning_rate": 3.130841283808818e-05, "loss": 2.0645, "step": 12919500 }, { "epoch": 37.4, "learning_rate": 3.1307689190440906e-05, "loss": 2.0395, "step": 12920000 }, { "epoch": 37.4, "learning_rate": 3.130696554279363e-05, "loss": 2.0849, "step": 12920500 }, { "epoch": 37.4, "learning_rate": 3.130624189514636e-05, "loss": 2.0505, "step": 12921000 }, { "epoch": 37.4, "learning_rate": 3.130551969479437e-05, "loss": 2.0597, "step": 12921500 }, { "epoch": 37.4, "learning_rate": 3.1304796047147095e-05, "loss": 2.0682, "step": 12922000 }, { "epoch": 37.41, "learning_rate": 3.130407239949982e-05, "loss": 2.0476, "step": 12922500 }, { "epoch": 37.41, "learning_rate": 3.130334875185254e-05, "loss": 2.0432, "step": 12923000 }, { "epoch": 37.41, "learning_rate": 3.130262510420526e-05, "loss": 2.0715, "step": 12923500 }, { "epoch": 37.41, "learning_rate": 3.1301901456557984e-05, "loss": 2.0566, "step": 12924000 }, { "epoch": 37.41, "learning_rate": 3.1301179256206006e-05, "loss": 2.068, "step": 12924500 }, { "epoch": 37.41, "learning_rate": 3.130045560855873e-05, "loss": 2.0501, "step": 12925000 }, { "epoch": 37.41, "learning_rate": 3.129973196091145e-05, "loss": 2.0386, "step": 12925500 }, { "epoch": 37.42, "learning_rate": 3.129900831326417e-05, "loss": 2.0582, "step": 12926000 }, { "epoch": 37.42, "learning_rate": 3.129828611291219e-05, "loss": 2.0804, "step": 12926500 }, { "epoch": 37.42, "learning_rate": 3.129756246526491e-05, "loss": 2.0418, "step": 12927000 }, { "epoch": 37.42, "learning_rate": 3.129684026491293e-05, "loss": 2.0449, "step": 12927500 }, { "epoch": 37.42, "learning_rate": 3.129611806456095e-05, "loss": 2.04, "step": 12928000 }, { "epoch": 37.42, "learning_rate": 3.129539441691368e-05, "loss": 2.0667, "step": 12928500 }, { "epoch": 37.42, "learning_rate": 3.12946707692664e-05, "loss": 2.0602, "step": 12929000 }, { "epoch": 37.43, "learning_rate": 3.129394712161912e-05, "loss": 2.0692, "step": 12929500 }, { "epoch": 37.43, "learning_rate": 3.1293223473971845e-05, "loss": 2.0568, "step": 12930000 }, { "epoch": 37.43, "learning_rate": 3.129250127361986e-05, "loss": 2.0477, "step": 12930500 }, { "epoch": 37.43, "learning_rate": 3.129177762597258e-05, "loss": 2.0329, "step": 12931000 }, { "epoch": 37.43, "learning_rate": 3.1291053978325305e-05, "loss": 2.0705, "step": 12931500 }, { "epoch": 37.43, "learning_rate": 3.1290330330678034e-05, "loss": 2.0621, "step": 12932000 }, { "epoch": 37.43, "learning_rate": 3.1289606683030756e-05, "loss": 2.0655, "step": 12932500 }, { "epoch": 37.44, "learning_rate": 3.128888303538348e-05, "loss": 2.0733, "step": 12933000 }, { "epoch": 37.44, "learning_rate": 3.12881593877362e-05, "loss": 2.0578, "step": 12933500 }, { "epoch": 37.44, "learning_rate": 3.128743574008892e-05, "loss": 2.0701, "step": 12934000 }, { "epoch": 37.44, "learning_rate": 3.1286712092441645e-05, "loss": 2.0704, "step": 12934500 }, { "epoch": 37.44, "learning_rate": 3.128598844479437e-05, "loss": 2.0413, "step": 12935000 }, { "epoch": 37.44, "learning_rate": 3.128526479714709e-05, "loss": 2.0599, "step": 12935500 }, { "epoch": 37.44, "learning_rate": 3.128454114949982e-05, "loss": 2.0868, "step": 12936000 }, { "epoch": 37.45, "learning_rate": 3.1283818949147834e-05, "loss": 2.0592, "step": 12936500 }, { "epoch": 37.45, "learning_rate": 3.1283095301500556e-05, "loss": 2.0408, "step": 12937000 }, { "epoch": 37.45, "learning_rate": 3.1282371653853285e-05, "loss": 2.0478, "step": 12937500 }, { "epoch": 37.45, "learning_rate": 3.128164800620601e-05, "loss": 2.0526, "step": 12938000 }, { "epoch": 37.45, "learning_rate": 3.128092435855873e-05, "loss": 2.0834, "step": 12938500 }, { "epoch": 37.45, "learning_rate": 3.1280202158206745e-05, "loss": 2.0592, "step": 12939000 }, { "epoch": 37.45, "learning_rate": 3.127947851055947e-05, "loss": 2.0545, "step": 12939500 }, { "epoch": 37.46, "learning_rate": 3.127875486291219e-05, "loss": 2.0677, "step": 12940000 }, { "epoch": 37.46, "learning_rate": 3.127803121526491e-05, "loss": 2.0807, "step": 12940500 }, { "epoch": 37.46, "learning_rate": 3.1277309014912934e-05, "loss": 2.0602, "step": 12941000 }, { "epoch": 37.46, "learning_rate": 3.1276585367265656e-05, "loss": 2.0448, "step": 12941500 }, { "epoch": 37.46, "learning_rate": 3.127586171961838e-05, "loss": 2.0497, "step": 12942000 }, { "epoch": 37.46, "learning_rate": 3.1275139519266394e-05, "loss": 2.0583, "step": 12942500 }, { "epoch": 37.46, "learning_rate": 3.1274415871619116e-05, "loss": 2.0252, "step": 12943000 }, { "epoch": 37.47, "learning_rate": 3.1273692223971845e-05, "loss": 2.0688, "step": 12943500 }, { "epoch": 37.47, "learning_rate": 3.127296857632457e-05, "loss": 2.0565, "step": 12944000 }, { "epoch": 37.47, "learning_rate": 3.127224492867729e-05, "loss": 2.0713, "step": 12944500 }, { "epoch": 37.47, "learning_rate": 3.127152128103001e-05, "loss": 2.066, "step": 12945000 }, { "epoch": 37.47, "learning_rate": 3.1270797633382734e-05, "loss": 2.0758, "step": 12945500 }, { "epoch": 37.47, "learning_rate": 3.1270073985735456e-05, "loss": 2.0659, "step": 12946000 }, { "epoch": 37.47, "learning_rate": 3.1269350338088185e-05, "loss": 2.0594, "step": 12946500 }, { "epoch": 37.48, "learning_rate": 3.12686281377362e-05, "loss": 2.0371, "step": 12947000 }, { "epoch": 37.48, "learning_rate": 3.126790449008892e-05, "loss": 2.0485, "step": 12947500 }, { "epoch": 37.48, "learning_rate": 3.1267180842441646e-05, "loss": 2.0671, "step": 12948000 }, { "epoch": 37.48, "learning_rate": 3.126645864208966e-05, "loss": 2.0744, "step": 12948500 }, { "epoch": 37.48, "learning_rate": 3.126573499444238e-05, "loss": 2.0475, "step": 12949000 }, { "epoch": 37.48, "learning_rate": 3.1265011346795106e-05, "loss": 2.0244, "step": 12949500 }, { "epoch": 37.48, "learning_rate": 3.126428914644313e-05, "loss": 2.0682, "step": 12950000 }, { "epoch": 37.49, "learning_rate": 3.126356549879585e-05, "loss": 2.0616, "step": 12950500 }, { "epoch": 37.49, "learning_rate": 3.126284185114858e-05, "loss": 2.0552, "step": 12951000 }, { "epoch": 37.49, "learning_rate": 3.1262119650796595e-05, "loss": 2.0635, "step": 12951500 }, { "epoch": 37.49, "learning_rate": 3.126139600314932e-05, "loss": 2.0865, "step": 12952000 }, { "epoch": 37.49, "learning_rate": 3.126067235550204e-05, "loss": 2.0461, "step": 12952500 }, { "epoch": 37.49, "learning_rate": 3.125995015515006e-05, "loss": 2.0444, "step": 12953000 }, { "epoch": 37.5, "learning_rate": 3.1259226507502784e-05, "loss": 2.0739, "step": 12953500 }, { "epoch": 37.5, "learning_rate": 3.1258502859855506e-05, "loss": 2.025, "step": 12954000 }, { "epoch": 37.5, "learning_rate": 3.125777921220823e-05, "loss": 2.0528, "step": 12954500 }, { "epoch": 37.5, "learning_rate": 3.1257057011856244e-05, "loss": 2.0711, "step": 12955000 }, { "epoch": 37.5, "learning_rate": 3.1256333364208966e-05, "loss": 2.0575, "step": 12955500 }, { "epoch": 37.5, "learning_rate": 3.125560971656169e-05, "loss": 2.053, "step": 12956000 }, { "epoch": 37.5, "learning_rate": 3.125488606891441e-05, "loss": 2.0497, "step": 12956500 }, { "epoch": 37.51, "learning_rate": 3.125416242126713e-05, "loss": 2.0575, "step": 12957000 }, { "epoch": 37.51, "learning_rate": 3.125343877361986e-05, "loss": 2.0728, "step": 12957500 }, { "epoch": 37.51, "learning_rate": 3.1252715125972584e-05, "loss": 2.0736, "step": 12958000 }, { "epoch": 37.51, "learning_rate": 3.125199147832531e-05, "loss": 2.0505, "step": 12958500 }, { "epoch": 37.51, "learning_rate": 3.1251267830678035e-05, "loss": 2.0499, "step": 12959000 }, { "epoch": 37.51, "learning_rate": 3.125054418303076e-05, "loss": 2.0535, "step": 12959500 }, { "epoch": 37.51, "learning_rate": 3.124982053538348e-05, "loss": 2.0633, "step": 12960000 }, { "epoch": 37.52, "learning_rate": 3.12490968877362e-05, "loss": 2.0805, "step": 12960500 }, { "epoch": 37.52, "learning_rate": 3.1248373240088924e-05, "loss": 2.037, "step": 12961000 }, { "epoch": 37.52, "learning_rate": 3.1247649592441646e-05, "loss": 2.0465, "step": 12961500 }, { "epoch": 37.52, "learning_rate": 3.124692594479437e-05, "loss": 2.039, "step": 12962000 }, { "epoch": 37.52, "learning_rate": 3.124620229714709e-05, "loss": 2.0435, "step": 12962500 }, { "epoch": 37.52, "learning_rate": 3.124547864949981e-05, "loss": 2.0831, "step": 12963000 }, { "epoch": 37.52, "learning_rate": 3.1244756449147835e-05, "loss": 2.096, "step": 12963500 }, { "epoch": 37.53, "learning_rate": 3.124403280150056e-05, "loss": 2.0609, "step": 12964000 }, { "epoch": 37.53, "learning_rate": 3.124330915385328e-05, "loss": 2.057, "step": 12964500 }, { "epoch": 37.53, "learning_rate": 3.1242585506206e-05, "loss": 2.0515, "step": 12965000 }, { "epoch": 37.53, "learning_rate": 3.124186185855873e-05, "loss": 2.0721, "step": 12965500 }, { "epoch": 37.53, "learning_rate": 3.124113821091145e-05, "loss": 2.0519, "step": 12966000 }, { "epoch": 37.53, "learning_rate": 3.1240414563264176e-05, "loss": 2.0615, "step": 12966500 }, { "epoch": 37.53, "learning_rate": 3.12396909156169e-05, "loss": 2.0499, "step": 12967000 }, { "epoch": 37.54, "learning_rate": 3.1238970162560213e-05, "loss": 2.064, "step": 12967500 }, { "epoch": 37.54, "learning_rate": 3.1238246514912936e-05, "loss": 2.0364, "step": 12968000 }, { "epoch": 37.54, "learning_rate": 3.123752286726566e-05, "loss": 2.0707, "step": 12968500 }, { "epoch": 37.54, "learning_rate": 3.123679921961838e-05, "loss": 2.0493, "step": 12969000 }, { "epoch": 37.54, "learning_rate": 3.12360755719711e-05, "loss": 2.0695, "step": 12969500 }, { "epoch": 37.54, "learning_rate": 3.1235351924323825e-05, "loss": 2.0385, "step": 12970000 }, { "epoch": 37.54, "learning_rate": 3.123462827667655e-05, "loss": 2.0549, "step": 12970500 }, { "epoch": 37.55, "learning_rate": 3.123390462902927e-05, "loss": 2.0601, "step": 12971000 }, { "epoch": 37.55, "learning_rate": 3.123318098138199e-05, "loss": 2.0542, "step": 12971500 }, { "epoch": 37.55, "learning_rate": 3.1232458781030014e-05, "loss": 2.0738, "step": 12972000 }, { "epoch": 37.55, "learning_rate": 3.1231735133382736e-05, "loss": 2.0707, "step": 12972500 }, { "epoch": 37.55, "learning_rate": 3.123101148573546e-05, "loss": 2.069, "step": 12973000 }, { "epoch": 37.55, "learning_rate": 3.123028783808819e-05, "loss": 2.0603, "step": 12973500 }, { "epoch": 37.55, "learning_rate": 3.12295656377362e-05, "loss": 2.0437, "step": 12974000 }, { "epoch": 37.56, "learning_rate": 3.1228841990088925e-05, "loss": 2.0795, "step": 12974500 }, { "epoch": 37.56, "learning_rate": 3.122811834244165e-05, "loss": 2.0322, "step": 12975000 }, { "epoch": 37.56, "learning_rate": 3.122739469479437e-05, "loss": 2.0521, "step": 12975500 }, { "epoch": 37.56, "learning_rate": 3.122667104714709e-05, "loss": 2.0425, "step": 12976000 }, { "epoch": 37.56, "learning_rate": 3.1225948846795114e-05, "loss": 2.0516, "step": 12976500 }, { "epoch": 37.56, "learning_rate": 3.1225225199147836e-05, "loss": 2.0721, "step": 12977000 }, { "epoch": 37.56, "learning_rate": 3.122450155150056e-05, "loss": 2.0462, "step": 12977500 }, { "epoch": 37.57, "learning_rate": 3.122377790385328e-05, "loss": 2.0781, "step": 12978000 }, { "epoch": 37.57, "learning_rate": 3.1223054256206e-05, "loss": 2.0781, "step": 12978500 }, { "epoch": 37.57, "learning_rate": 3.1222330608558725e-05, "loss": 2.0595, "step": 12979000 }, { "epoch": 37.57, "learning_rate": 3.122160696091145e-05, "loss": 2.0636, "step": 12979500 }, { "epoch": 37.57, "learning_rate": 3.122088331326417e-05, "loss": 2.0642, "step": 12980000 }, { "epoch": 37.57, "learning_rate": 3.12201596656169e-05, "loss": 2.0513, "step": 12980500 }, { "epoch": 37.57, "learning_rate": 3.1219437465264914e-05, "loss": 2.0417, "step": 12981000 }, { "epoch": 37.58, "learning_rate": 3.1218713817617636e-05, "loss": 2.0468, "step": 12981500 }, { "epoch": 37.58, "learning_rate": 3.1217990169970365e-05, "loss": 2.0397, "step": 12982000 }, { "epoch": 37.58, "learning_rate": 3.121726796961838e-05, "loss": 2.0677, "step": 12982500 }, { "epoch": 37.58, "learning_rate": 3.12165443219711e-05, "loss": 2.088, "step": 12983000 }, { "epoch": 37.58, "learning_rate": 3.1215820674323825e-05, "loss": 2.0451, "step": 12983500 }, { "epoch": 37.58, "learning_rate": 3.121509702667655e-05, "loss": 2.0528, "step": 12984000 }, { "epoch": 37.58, "learning_rate": 3.121437337902927e-05, "loss": 2.0534, "step": 12984500 }, { "epoch": 37.59, "learning_rate": 3.121364973138199e-05, "loss": 2.0491, "step": 12985000 }, { "epoch": 37.59, "learning_rate": 3.1212926083734714e-05, "loss": 2.0669, "step": 12985500 }, { "epoch": 37.59, "learning_rate": 3.1212202436087437e-05, "loss": 2.0712, "step": 12986000 }, { "epoch": 37.59, "learning_rate": 3.121148023573546e-05, "loss": 2.0734, "step": 12986500 }, { "epoch": 37.59, "learning_rate": 3.1210758035383474e-05, "loss": 2.0759, "step": 12987000 }, { "epoch": 37.59, "learning_rate": 3.12100343877362e-05, "loss": 2.0645, "step": 12987500 }, { "epoch": 37.59, "learning_rate": 3.120931074008892e-05, "loss": 2.0643, "step": 12988000 }, { "epoch": 37.6, "learning_rate": 3.120858709244165e-05, "loss": 2.0725, "step": 12988500 }, { "epoch": 37.6, "learning_rate": 3.120786344479437e-05, "loss": 2.0572, "step": 12989000 }, { "epoch": 37.6, "learning_rate": 3.120713979714709e-05, "loss": 2.0993, "step": 12989500 }, { "epoch": 37.6, "learning_rate": 3.1206416149499815e-05, "loss": 2.0685, "step": 12990000 }, { "epoch": 37.6, "learning_rate": 3.1205692501852544e-05, "loss": 2.0974, "step": 12990500 }, { "epoch": 37.6, "learning_rate": 3.120497030150056e-05, "loss": 2.0513, "step": 12991000 }, { "epoch": 37.61, "learning_rate": 3.1204248101148575e-05, "loss": 2.0729, "step": 12991500 }, { "epoch": 37.61, "learning_rate": 3.12035244535013e-05, "loss": 2.0614, "step": 12992000 }, { "epoch": 37.61, "learning_rate": 3.120280080585402e-05, "loss": 2.0464, "step": 12992500 }, { "epoch": 37.61, "learning_rate": 3.120207715820674e-05, "loss": 2.0649, "step": 12993000 }, { "epoch": 37.61, "learning_rate": 3.1201353510559464e-05, "loss": 2.0765, "step": 12993500 }, { "epoch": 37.61, "learning_rate": 3.120062986291219e-05, "loss": 2.0598, "step": 12994000 }, { "epoch": 37.61, "learning_rate": 3.1199906215264915e-05, "loss": 2.0928, "step": 12994500 }, { "epoch": 37.62, "learning_rate": 3.119918401491293e-05, "loss": 2.0184, "step": 12995000 }, { "epoch": 37.62, "learning_rate": 3.1198461814560946e-05, "loss": 2.0607, "step": 12995500 }, { "epoch": 37.62, "learning_rate": 3.119773961420897e-05, "loss": 2.0571, "step": 12996000 }, { "epoch": 37.62, "learning_rate": 3.119701596656169e-05, "loss": 2.0721, "step": 12996500 }, { "epoch": 37.62, "learning_rate": 3.119629231891442e-05, "loss": 2.0784, "step": 12997000 }, { "epoch": 37.62, "learning_rate": 3.119556867126714e-05, "loss": 2.067, "step": 12997500 }, { "epoch": 37.62, "learning_rate": 3.119484647091516e-05, "loss": 2.079, "step": 12998000 }, { "epoch": 37.63, "learning_rate": 3.119412282326788e-05, "loss": 2.0682, "step": 12998500 }, { "epoch": 37.63, "learning_rate": 3.11933991756206e-05, "loss": 2.0483, "step": 12999000 }, { "epoch": 37.63, "learning_rate": 3.1192675527973324e-05, "loss": 2.0239, "step": 12999500 }, { "epoch": 37.63, "learning_rate": 3.1191951880326046e-05, "loss": 2.072, "step": 13000000 }, { "epoch": 37.63, "learning_rate": 3.119122823267877e-05, "loss": 2.0434, "step": 13000500 }, { "epoch": 37.63, "learning_rate": 3.119050458503149e-05, "loss": 2.0682, "step": 13001000 }, { "epoch": 37.63, "learning_rate": 3.118978093738422e-05, "loss": 2.0724, "step": 13001500 }, { "epoch": 37.64, "learning_rate": 3.118905728973694e-05, "loss": 2.0547, "step": 13002000 }, { "epoch": 37.64, "learning_rate": 3.1188333642089664e-05, "loss": 2.0732, "step": 13002500 }, { "epoch": 37.64, "learning_rate": 3.1187609994442386e-05, "loss": 2.0517, "step": 13003000 }, { "epoch": 37.64, "learning_rate": 3.1186886346795115e-05, "loss": 2.0773, "step": 13003500 }, { "epoch": 37.64, "learning_rate": 3.118616269914784e-05, "loss": 2.0505, "step": 13004000 }, { "epoch": 37.64, "learning_rate": 3.118543905150056e-05, "loss": 2.0826, "step": 13004500 }, { "epoch": 37.64, "learning_rate": 3.1184716851148576e-05, "loss": 2.0816, "step": 13005000 }, { "epoch": 37.65, "learning_rate": 3.11839932035013e-05, "loss": 2.0768, "step": 13005500 }, { "epoch": 37.65, "learning_rate": 3.118326955585402e-05, "loss": 2.0453, "step": 13006000 }, { "epoch": 37.65, "learning_rate": 3.118254590820674e-05, "loss": 2.0612, "step": 13006500 }, { "epoch": 37.65, "learning_rate": 3.118182226055947e-05, "loss": 2.0591, "step": 13007000 }, { "epoch": 37.65, "learning_rate": 3.1181098612912193e-05, "loss": 2.0704, "step": 13007500 }, { "epoch": 37.65, "learning_rate": 3.1180374965264916e-05, "loss": 2.0676, "step": 13008000 }, { "epoch": 37.65, "learning_rate": 3.117965131761764e-05, "loss": 2.0484, "step": 13008500 }, { "epoch": 37.66, "learning_rate": 3.117892766997036e-05, "loss": 2.0778, "step": 13009000 }, { "epoch": 37.66, "learning_rate": 3.117820402232308e-05, "loss": 2.0581, "step": 13009500 }, { "epoch": 37.66, "learning_rate": 3.11774818219711e-05, "loss": 2.0618, "step": 13010000 }, { "epoch": 37.66, "learning_rate": 3.117675817432382e-05, "loss": 2.0848, "step": 13010500 }, { "epoch": 37.66, "learning_rate": 3.117603452667655e-05, "loss": 2.084, "step": 13011000 }, { "epoch": 37.66, "learning_rate": 3.117531087902927e-05, "loss": 2.0389, "step": 13011500 }, { "epoch": 37.66, "learning_rate": 3.1174587231381994e-05, "loss": 2.0614, "step": 13012000 }, { "epoch": 37.67, "learning_rate": 3.1173863583734716e-05, "loss": 2.0797, "step": 13012500 }, { "epoch": 37.67, "learning_rate": 3.1173139936087445e-05, "loss": 2.055, "step": 13013000 }, { "epoch": 37.67, "learning_rate": 3.117241628844017e-05, "loss": 2.0407, "step": 13013500 }, { "epoch": 37.67, "learning_rate": 3.117169264079289e-05, "loss": 2.0679, "step": 13014000 }, { "epoch": 37.67, "learning_rate": 3.117096899314561e-05, "loss": 2.0674, "step": 13014500 }, { "epoch": 37.67, "learning_rate": 3.1170245345498334e-05, "loss": 2.0371, "step": 13015000 }, { "epoch": 37.67, "learning_rate": 3.1169521697851056e-05, "loss": 2.0629, "step": 13015500 }, { "epoch": 37.68, "learning_rate": 3.116879805020378e-05, "loss": 2.0723, "step": 13016000 }, { "epoch": 37.68, "learning_rate": 3.11680744025565e-05, "loss": 2.0338, "step": 13016500 }, { "epoch": 37.68, "learning_rate": 3.116735075490922e-05, "loss": 2.0541, "step": 13017000 }, { "epoch": 37.68, "learning_rate": 3.116662710726195e-05, "loss": 2.0581, "step": 13017500 }, { "epoch": 37.68, "learning_rate": 3.116590635420527e-05, "loss": 2.0729, "step": 13018000 }, { "epoch": 37.68, "learning_rate": 3.116518270655799e-05, "loss": 2.0544, "step": 13018500 }, { "epoch": 37.68, "learning_rate": 3.116445905891071e-05, "loss": 2.0492, "step": 13019000 }, { "epoch": 37.69, "learning_rate": 3.1163735411263434e-05, "loss": 2.0652, "step": 13019500 }, { "epoch": 37.69, "learning_rate": 3.1163011763616156e-05, "loss": 2.0614, "step": 13020000 }, { "epoch": 37.69, "learning_rate": 3.116228956326417e-05, "loss": 2.0669, "step": 13020500 }, { "epoch": 37.69, "learning_rate": 3.1161565915616894e-05, "loss": 2.0642, "step": 13021000 }, { "epoch": 37.69, "learning_rate": 3.116084226796962e-05, "loss": 2.0572, "step": 13021500 }, { "epoch": 37.69, "learning_rate": 3.1160118620322345e-05, "loss": 2.046, "step": 13022000 }, { "epoch": 37.69, "learning_rate": 3.115939497267507e-05, "loss": 2.0786, "step": 13022500 }, { "epoch": 37.7, "learning_rate": 3.115867132502779e-05, "loss": 2.0744, "step": 13023000 }, { "epoch": 37.7, "learning_rate": 3.1157949124675805e-05, "loss": 2.0651, "step": 13023500 }, { "epoch": 37.7, "learning_rate": 3.115722547702853e-05, "loss": 2.0604, "step": 13024000 }, { "epoch": 37.7, "learning_rate": 3.115650182938125e-05, "loss": 2.068, "step": 13024500 }, { "epoch": 37.7, "learning_rate": 3.115577818173397e-05, "loss": 2.0445, "step": 13025000 }, { "epoch": 37.7, "learning_rate": 3.11550545340867e-05, "loss": 2.0803, "step": 13025500 }, { "epoch": 37.7, "learning_rate": 3.115433088643942e-05, "loss": 2.0443, "step": 13026000 }, { "epoch": 37.71, "learning_rate": 3.1153607238792146e-05, "loss": 2.0782, "step": 13026500 }, { "epoch": 37.71, "learning_rate": 3.1152883591144875e-05, "loss": 2.0559, "step": 13027000 }, { "epoch": 37.71, "learning_rate": 3.11521599434976e-05, "loss": 2.068, "step": 13027500 }, { "epoch": 37.71, "learning_rate": 3.115143774314561e-05, "loss": 2.0674, "step": 13028000 }, { "epoch": 37.71, "learning_rate": 3.1150714095498335e-05, "loss": 2.0528, "step": 13028500 }, { "epoch": 37.71, "learning_rate": 3.114999044785106e-05, "loss": 2.0493, "step": 13029000 }, { "epoch": 37.72, "learning_rate": 3.114926680020378e-05, "loss": 2.0774, "step": 13029500 }, { "epoch": 37.72, "learning_rate": 3.11485431525565e-05, "loss": 2.0528, "step": 13030000 }, { "epoch": 37.72, "learning_rate": 3.1147820952204524e-05, "loss": 2.0843, "step": 13030500 }, { "epoch": 37.72, "learning_rate": 3.114710019914783e-05, "loss": 2.074, "step": 13031000 }, { "epoch": 37.72, "learning_rate": 3.1146376551500555e-05, "loss": 2.0806, "step": 13031500 }, { "epoch": 37.72, "learning_rate": 3.114565290385328e-05, "loss": 2.0679, "step": 13032000 }, { "epoch": 37.72, "learning_rate": 3.1144929256206e-05, "loss": 2.0684, "step": 13032500 }, { "epoch": 37.73, "learning_rate": 3.114420705585402e-05, "loss": 2.0601, "step": 13033000 }, { "epoch": 37.73, "learning_rate": 3.114348340820675e-05, "loss": 2.0453, "step": 13033500 }, { "epoch": 37.73, "learning_rate": 3.1142761207854766e-05, "loss": 2.0596, "step": 13034000 }, { "epoch": 37.73, "learning_rate": 3.114203756020749e-05, "loss": 2.0577, "step": 13034500 }, { "epoch": 37.73, "learning_rate": 3.1141315359855504e-05, "loss": 2.0553, "step": 13035000 }, { "epoch": 37.73, "learning_rate": 3.1140591712208226e-05, "loss": 2.0602, "step": 13035500 }, { "epoch": 37.73, "learning_rate": 3.113986806456095e-05, "loss": 2.0892, "step": 13036000 }, { "epoch": 37.74, "learning_rate": 3.113914441691367e-05, "loss": 2.0753, "step": 13036500 }, { "epoch": 37.74, "learning_rate": 3.11384207692664e-05, "loss": 2.0805, "step": 13037000 }, { "epoch": 37.74, "learning_rate": 3.113769712161912e-05, "loss": 2.0836, "step": 13037500 }, { "epoch": 37.74, "learning_rate": 3.1136973473971844e-05, "loss": 2.0609, "step": 13038000 }, { "epoch": 37.74, "learning_rate": 3.1136249826324566e-05, "loss": 2.0647, "step": 13038500 }, { "epoch": 37.74, "learning_rate": 3.113552617867729e-05, "loss": 2.0672, "step": 13039000 }, { "epoch": 37.74, "learning_rate": 3.113480253103001e-05, "loss": 2.0502, "step": 13039500 }, { "epoch": 37.75, "learning_rate": 3.113407888338273e-05, "loss": 2.0691, "step": 13040000 }, { "epoch": 37.75, "learning_rate": 3.1133355235735455e-05, "loss": 2.0763, "step": 13040500 }, { "epoch": 37.75, "learning_rate": 3.113263303538348e-05, "loss": 2.0577, "step": 13041000 }, { "epoch": 37.75, "learning_rate": 3.11319093877362e-05, "loss": 2.0579, "step": 13041500 }, { "epoch": 37.75, "learning_rate": 3.113118574008892e-05, "loss": 2.064, "step": 13042000 }, { "epoch": 37.75, "learning_rate": 3.113046209244165e-05, "loss": 2.0682, "step": 13042500 }, { "epoch": 37.75, "learning_rate": 3.112973844479437e-05, "loss": 2.0798, "step": 13043000 }, { "epoch": 37.76, "learning_rate": 3.1129014797147096e-05, "loss": 2.0705, "step": 13043500 }, { "epoch": 37.76, "learning_rate": 3.112829114949982e-05, "loss": 2.0457, "step": 13044000 }, { "epoch": 37.76, "learning_rate": 3.112756750185254e-05, "loss": 2.05, "step": 13044500 }, { "epoch": 37.76, "learning_rate": 3.112684385420526e-05, "loss": 2.0612, "step": 13045000 }, { "epoch": 37.76, "learning_rate": 3.112612165385328e-05, "loss": 2.0359, "step": 13045500 }, { "epoch": 37.76, "learning_rate": 3.1125398006206e-05, "loss": 2.0482, "step": 13046000 }, { "epoch": 37.76, "learning_rate": 3.112467435855872e-05, "loss": 2.087, "step": 13046500 }, { "epoch": 37.77, "learning_rate": 3.112395071091145e-05, "loss": 2.0763, "step": 13047000 }, { "epoch": 37.77, "learning_rate": 3.1123227063264174e-05, "loss": 2.0689, "step": 13047500 }, { "epoch": 37.77, "learning_rate": 3.11225034156169e-05, "loss": 2.0445, "step": 13048000 }, { "epoch": 37.77, "learning_rate": 3.1121779767969625e-05, "loss": 2.0461, "step": 13048500 }, { "epoch": 37.77, "learning_rate": 3.112105612032235e-05, "loss": 2.0613, "step": 13049000 }, { "epoch": 37.77, "learning_rate": 3.112033391997036e-05, "loss": 2.0532, "step": 13049500 }, { "epoch": 37.77, "learning_rate": 3.1119610272323085e-05, "loss": 2.0528, "step": 13050000 }, { "epoch": 37.78, "learning_rate": 3.111888662467581e-05, "loss": 2.0489, "step": 13050500 }, { "epoch": 37.78, "learning_rate": 3.111816442432382e-05, "loss": 2.0627, "step": 13051000 }, { "epoch": 37.78, "learning_rate": 3.111744077667655e-05, "loss": 2.0741, "step": 13051500 }, { "epoch": 37.78, "learning_rate": 3.1116717129029274e-05, "loss": 2.0807, "step": 13052000 }, { "epoch": 37.78, "learning_rate": 3.1115993481381996e-05, "loss": 2.0447, "step": 13052500 }, { "epoch": 37.78, "learning_rate": 3.111526983373472e-05, "loss": 2.0637, "step": 13053000 }, { "epoch": 37.78, "learning_rate": 3.111454618608744e-05, "loss": 2.0359, "step": 13053500 }, { "epoch": 37.79, "learning_rate": 3.111382253844016e-05, "loss": 2.0793, "step": 13054000 }, { "epoch": 37.79, "learning_rate": 3.1113098890792885e-05, "loss": 2.0547, "step": 13054500 }, { "epoch": 37.79, "learning_rate": 3.111237524314561e-05, "loss": 2.0743, "step": 13055000 }, { "epoch": 37.79, "learning_rate": 3.1111651595498336e-05, "loss": 2.0598, "step": 13055500 }, { "epoch": 37.79, "learning_rate": 3.111092794785106e-05, "loss": 2.0667, "step": 13056000 }, { "epoch": 37.79, "learning_rate": 3.111020430020378e-05, "loss": 2.0702, "step": 13056500 }, { "epoch": 37.79, "learning_rate": 3.11094820998518e-05, "loss": 2.0662, "step": 13057000 }, { "epoch": 37.8, "learning_rate": 3.1108758452204525e-05, "loss": 2.0537, "step": 13057500 }, { "epoch": 37.8, "learning_rate": 3.110803480455725e-05, "loss": 2.0857, "step": 13058000 }, { "epoch": 37.8, "learning_rate": 3.110731260420526e-05, "loss": 2.0733, "step": 13058500 }, { "epoch": 37.8, "learning_rate": 3.1106588956557985e-05, "loss": 2.0607, "step": 13059000 }, { "epoch": 37.8, "learning_rate": 3.110586530891071e-05, "loss": 2.0756, "step": 13059500 }, { "epoch": 37.8, "learning_rate": 3.110514166126343e-05, "loss": 2.039, "step": 13060000 }, { "epoch": 37.8, "learning_rate": 3.110441801361615e-05, "loss": 2.0757, "step": 13060500 }, { "epoch": 37.81, "learning_rate": 3.1103694365968874e-05, "loss": 2.0881, "step": 13061000 }, { "epoch": 37.81, "learning_rate": 3.11029707183216e-05, "loss": 2.0675, "step": 13061500 }, { "epoch": 37.81, "learning_rate": 3.1102247070674325e-05, "loss": 2.0592, "step": 13062000 }, { "epoch": 37.81, "learning_rate": 3.110152342302705e-05, "loss": 2.0589, "step": 13062500 }, { "epoch": 37.81, "learning_rate": 3.110080122267507e-05, "loss": 2.0908, "step": 13063000 }, { "epoch": 37.81, "learning_rate": 3.110007757502779e-05, "loss": 2.0524, "step": 13063500 }, { "epoch": 37.81, "learning_rate": 3.1099353927380514e-05, "loss": 2.0789, "step": 13064000 }, { "epoch": 37.82, "learning_rate": 3.109863027973324e-05, "loss": 2.089, "step": 13064500 }, { "epoch": 37.82, "learning_rate": 3.109790807938125e-05, "loss": 2.0471, "step": 13065000 }, { "epoch": 37.82, "learning_rate": 3.1097184431733975e-05, "loss": 2.0857, "step": 13065500 }, { "epoch": 37.82, "learning_rate": 3.1096460784086704e-05, "loss": 2.0523, "step": 13066000 }, { "epoch": 37.82, "learning_rate": 3.1095737136439426e-05, "loss": 2.0631, "step": 13066500 }, { "epoch": 37.82, "learning_rate": 3.109501348879215e-05, "loss": 2.0495, "step": 13067000 }, { "epoch": 37.83, "learning_rate": 3.109428984114487e-05, "loss": 2.0886, "step": 13067500 }, { "epoch": 37.83, "learning_rate": 3.109356619349759e-05, "loss": 2.0564, "step": 13068000 }, { "epoch": 37.83, "learning_rate": 3.1092842545850315e-05, "loss": 2.0927, "step": 13068500 }, { "epoch": 37.83, "learning_rate": 3.109211889820304e-05, "loss": 2.0418, "step": 13069000 }, { "epoch": 37.83, "learning_rate": 3.109139525055576e-05, "loss": 2.0646, "step": 13069500 }, { "epoch": 37.83, "learning_rate": 3.109067160290849e-05, "loss": 2.0707, "step": 13070000 }, { "epoch": 37.83, "learning_rate": 3.1089949402556504e-05, "loss": 2.0763, "step": 13070500 }, { "epoch": 37.84, "learning_rate": 3.1089227202204526e-05, "loss": 2.0855, "step": 13071000 }, { "epoch": 37.84, "learning_rate": 3.108850355455725e-05, "loss": 2.0744, "step": 13071500 }, { "epoch": 37.84, "learning_rate": 3.108777990690997e-05, "loss": 2.0608, "step": 13072000 }, { "epoch": 37.84, "learning_rate": 3.108705625926269e-05, "loss": 2.0469, "step": 13072500 }, { "epoch": 37.84, "learning_rate": 3.108633405891071e-05, "loss": 2.0781, "step": 13073000 }, { "epoch": 37.84, "learning_rate": 3.108561041126343e-05, "loss": 2.0749, "step": 13073500 }, { "epoch": 37.84, "learning_rate": 3.108488676361615e-05, "loss": 2.0825, "step": 13074000 }, { "epoch": 37.85, "learning_rate": 3.108416311596888e-05, "loss": 2.0708, "step": 13074500 }, { "epoch": 37.85, "learning_rate": 3.1083439468321604e-05, "loss": 2.0712, "step": 13075000 }, { "epoch": 37.85, "learning_rate": 3.108271726796962e-05, "loss": 2.0474, "step": 13075500 }, { "epoch": 37.85, "learning_rate": 3.108199362032234e-05, "loss": 2.0654, "step": 13076000 }, { "epoch": 37.85, "learning_rate": 3.1081269972675064e-05, "loss": 2.0559, "step": 13076500 }, { "epoch": 37.85, "learning_rate": 3.1080546325027786e-05, "loss": 2.0528, "step": 13077000 }, { "epoch": 37.85, "learning_rate": 3.107982267738051e-05, "loss": 2.0515, "step": 13077500 }, { "epoch": 37.86, "learning_rate": 3.107909902973324e-05, "loss": 2.0816, "step": 13078000 }, { "epoch": 37.86, "learning_rate": 3.107837538208596e-05, "loss": 2.0847, "step": 13078500 }, { "epoch": 37.86, "learning_rate": 3.107765173443868e-05, "loss": 2.072, "step": 13079000 }, { "epoch": 37.86, "learning_rate": 3.1076928086791404e-05, "loss": 2.0768, "step": 13079500 }, { "epoch": 37.86, "learning_rate": 3.107620733373472e-05, "loss": 2.0708, "step": 13080000 }, { "epoch": 37.86, "learning_rate": 3.107548368608744e-05, "loss": 2.0646, "step": 13080500 }, { "epoch": 37.86, "learning_rate": 3.1074760038440164e-05, "loss": 2.0765, "step": 13081000 }, { "epoch": 37.87, "learning_rate": 3.1074036390792887e-05, "loss": 2.0481, "step": 13081500 }, { "epoch": 37.87, "learning_rate": 3.107331274314561e-05, "loss": 2.0584, "step": 13082000 }, { "epoch": 37.87, "learning_rate": 3.107258909549833e-05, "loss": 2.0704, "step": 13082500 }, { "epoch": 37.87, "learning_rate": 3.107186544785105e-05, "loss": 2.04, "step": 13083000 }, { "epoch": 37.87, "learning_rate": 3.107114180020378e-05, "loss": 2.0568, "step": 13083500 }, { "epoch": 37.87, "learning_rate": 3.1070418152556505e-05, "loss": 2.0814, "step": 13084000 }, { "epoch": 37.87, "learning_rate": 3.106969450490923e-05, "loss": 2.0688, "step": 13084500 }, { "epoch": 37.88, "learning_rate": 3.1068970857261956e-05, "loss": 2.0679, "step": 13085000 }, { "epoch": 37.88, "learning_rate": 3.106824720961468e-05, "loss": 2.0804, "step": 13085500 }, { "epoch": 37.88, "learning_rate": 3.10675235619674e-05, "loss": 2.074, "step": 13086000 }, { "epoch": 37.88, "learning_rate": 3.106679991432012e-05, "loss": 2.073, "step": 13086500 }, { "epoch": 37.88, "learning_rate": 3.1066076266672845e-05, "loss": 2.062, "step": 13087000 }, { "epoch": 37.88, "learning_rate": 3.106535261902557e-05, "loss": 2.0791, "step": 13087500 }, { "epoch": 37.88, "learning_rate": 3.106462897137829e-05, "loss": 2.0748, "step": 13088000 }, { "epoch": 37.89, "learning_rate": 3.106390532373101e-05, "loss": 2.068, "step": 13088500 }, { "epoch": 37.89, "learning_rate": 3.1063183123379034e-05, "loss": 2.0415, "step": 13089000 }, { "epoch": 37.89, "learning_rate": 3.1062459475731756e-05, "loss": 2.073, "step": 13089500 }, { "epoch": 37.89, "learning_rate": 3.106173582808448e-05, "loss": 2.0688, "step": 13090000 }, { "epoch": 37.89, "learning_rate": 3.1061013627732494e-05, "loss": 2.0263, "step": 13090500 }, { "epoch": 37.89, "learning_rate": 3.1060289980085216e-05, "loss": 2.0741, "step": 13091000 }, { "epoch": 37.89, "learning_rate": 3.105956633243794e-05, "loss": 2.0441, "step": 13091500 }, { "epoch": 37.9, "learning_rate": 3.105884268479066e-05, "loss": 2.0585, "step": 13092000 }, { "epoch": 37.9, "learning_rate": 3.105811903714339e-05, "loss": 2.0773, "step": 13092500 }, { "epoch": 37.9, "learning_rate": 3.105739538949611e-05, "loss": 2.1005, "step": 13093000 }, { "epoch": 37.9, "learning_rate": 3.1056671741848834e-05, "loss": 2.0548, "step": 13093500 }, { "epoch": 37.9, "learning_rate": 3.1055948094201556e-05, "loss": 2.0559, "step": 13094000 }, { "epoch": 37.9, "learning_rate": 3.1055224446554285e-05, "loss": 2.0522, "step": 13094500 }, { "epoch": 37.9, "learning_rate": 3.10545022462023e-05, "loss": 2.0576, "step": 13095000 }, { "epoch": 37.91, "learning_rate": 3.105377859855502e-05, "loss": 2.0774, "step": 13095500 }, { "epoch": 37.91, "learning_rate": 3.105305639820304e-05, "loss": 2.0454, "step": 13096000 }, { "epoch": 37.91, "learning_rate": 3.105233275055576e-05, "loss": 2.0919, "step": 13096500 }, { "epoch": 37.91, "learning_rate": 3.105160910290848e-05, "loss": 2.0861, "step": 13097000 }, { "epoch": 37.91, "learning_rate": 3.1050885455261205e-05, "loss": 2.0446, "step": 13097500 }, { "epoch": 37.91, "learning_rate": 3.1050161807613934e-05, "loss": 2.0686, "step": 13098000 }, { "epoch": 37.91, "learning_rate": 3.104943960726195e-05, "loss": 2.0686, "step": 13098500 }, { "epoch": 37.92, "learning_rate": 3.104871595961467e-05, "loss": 2.0757, "step": 13099000 }, { "epoch": 37.92, "learning_rate": 3.104799375926269e-05, "loss": 2.063, "step": 13099500 }, { "epoch": 37.92, "learning_rate": 3.104727011161541e-05, "loss": 2.0573, "step": 13100000 }, { "epoch": 37.92, "learning_rate": 3.104654646396814e-05, "loss": 2.0646, "step": 13100500 }, { "epoch": 37.92, "learning_rate": 3.104582281632086e-05, "loss": 2.0649, "step": 13101000 }, { "epoch": 37.92, "learning_rate": 3.104509916867358e-05, "loss": 2.0574, "step": 13101500 }, { "epoch": 37.92, "learning_rate": 3.1044375521026306e-05, "loss": 2.0667, "step": 13102000 }, { "epoch": 37.93, "learning_rate": 3.104365332067433e-05, "loss": 2.031, "step": 13102500 }, { "epoch": 37.93, "learning_rate": 3.104292967302705e-05, "loss": 2.0743, "step": 13103000 }, { "epoch": 37.93, "learning_rate": 3.104220602537977e-05, "loss": 2.0723, "step": 13103500 }, { "epoch": 37.93, "learning_rate": 3.1041482377732495e-05, "loss": 2.0787, "step": 13104000 }, { "epoch": 37.93, "learning_rate": 3.104075873008522e-05, "loss": 2.0749, "step": 13104500 }, { "epoch": 37.93, "learning_rate": 3.104003508243794e-05, "loss": 2.0696, "step": 13105000 }, { "epoch": 37.94, "learning_rate": 3.103931143479066e-05, "loss": 2.0562, "step": 13105500 }, { "epoch": 37.94, "learning_rate": 3.1038587787143383e-05, "loss": 2.0592, "step": 13106000 }, { "epoch": 37.94, "learning_rate": 3.1037865586791406e-05, "loss": 2.0658, "step": 13106500 }, { "epoch": 37.94, "learning_rate": 3.103714193914413e-05, "loss": 2.0603, "step": 13107000 }, { "epoch": 37.94, "learning_rate": 3.103641829149686e-05, "loss": 2.0739, "step": 13107500 }, { "epoch": 37.94, "learning_rate": 3.103569464384958e-05, "loss": 2.0598, "step": 13108000 }, { "epoch": 37.94, "learning_rate": 3.10349709962023e-05, "loss": 2.0548, "step": 13108500 }, { "epoch": 37.95, "learning_rate": 3.1034247348555024e-05, "loss": 2.0726, "step": 13109000 }, { "epoch": 37.95, "learning_rate": 3.103352514820304e-05, "loss": 2.052, "step": 13109500 }, { "epoch": 37.95, "learning_rate": 3.103280150055576e-05, "loss": 2.0629, "step": 13110000 }, { "epoch": 37.95, "learning_rate": 3.1032077852908484e-05, "loss": 2.0712, "step": 13110500 }, { "epoch": 37.95, "learning_rate": 3.1031355652556506e-05, "loss": 2.0754, "step": 13111000 }, { "epoch": 37.95, "learning_rate": 3.103063200490923e-05, "loss": 2.0396, "step": 13111500 }, { "epoch": 37.95, "learning_rate": 3.1029909804557244e-05, "loss": 2.0422, "step": 13112000 }, { "epoch": 37.96, "learning_rate": 3.1029186156909966e-05, "loss": 2.0361, "step": 13112500 }, { "epoch": 37.96, "learning_rate": 3.102846250926269e-05, "loss": 2.0856, "step": 13113000 }, { "epoch": 37.96, "learning_rate": 3.102773886161541e-05, "loss": 2.0545, "step": 13113500 }, { "epoch": 37.96, "learning_rate": 3.102701521396813e-05, "loss": 2.0788, "step": 13114000 }, { "epoch": 37.96, "learning_rate": 3.102629156632086e-05, "loss": 2.0677, "step": 13114500 }, { "epoch": 37.96, "learning_rate": 3.102556936596888e-05, "loss": 2.0607, "step": 13115000 }, { "epoch": 37.96, "learning_rate": 3.1024845718321606e-05, "loss": 2.0728, "step": 13115500 }, { "epoch": 37.97, "learning_rate": 3.102412207067433e-05, "loss": 2.0563, "step": 13116000 }, { "epoch": 37.97, "learning_rate": 3.102339842302705e-05, "loss": 2.0363, "step": 13116500 }, { "epoch": 37.97, "learning_rate": 3.1022676222675066e-05, "loss": 2.0633, "step": 13117000 }, { "epoch": 37.97, "learning_rate": 3.102195257502779e-05, "loss": 2.0755, "step": 13117500 }, { "epoch": 37.97, "learning_rate": 3.102122892738051e-05, "loss": 2.0742, "step": 13118000 }, { "epoch": 37.97, "learning_rate": 3.102050527973323e-05, "loss": 2.0685, "step": 13118500 }, { "epoch": 37.97, "learning_rate": 3.101978163208596e-05, "loss": 2.0674, "step": 13119000 }, { "epoch": 37.98, "learning_rate": 3.1019057984438684e-05, "loss": 2.0524, "step": 13119500 }, { "epoch": 37.98, "learning_rate": 3.10183357840867e-05, "loss": 2.0875, "step": 13120000 }, { "epoch": 37.98, "learning_rate": 3.101761213643942e-05, "loss": 2.0385, "step": 13120500 }, { "epoch": 37.98, "learning_rate": 3.1016888488792144e-05, "loss": 2.0654, "step": 13121000 }, { "epoch": 37.98, "learning_rate": 3.101616484114487e-05, "loss": 2.0788, "step": 13121500 }, { "epoch": 37.98, "learning_rate": 3.101544119349759e-05, "loss": 2.063, "step": 13122000 }, { "epoch": 37.98, "learning_rate": 3.101471754585032e-05, "loss": 2.0616, "step": 13122500 }, { "epoch": 37.99, "learning_rate": 3.101399389820304e-05, "loss": 2.0683, "step": 13123000 }, { "epoch": 37.99, "learning_rate": 3.101327025055576e-05, "loss": 2.056, "step": 13123500 }, { "epoch": 37.99, "learning_rate": 3.1012546602908485e-05, "loss": 2.0578, "step": 13124000 }, { "epoch": 37.99, "learning_rate": 3.101182440255651e-05, "loss": 2.0557, "step": 13124500 }, { "epoch": 37.99, "learning_rate": 3.101110075490923e-05, "loss": 2.0658, "step": 13125000 }, { "epoch": 37.99, "learning_rate": 3.101037710726195e-05, "loss": 2.0562, "step": 13125500 }, { "epoch": 37.99, "learning_rate": 3.1009653459614674e-05, "loss": 2.0643, "step": 13126000 }, { "epoch": 38.0, "learning_rate": 3.1008929811967396e-05, "loss": 2.0631, "step": 13126500 }, { "epoch": 38.0, "learning_rate": 3.100820761161541e-05, "loss": 2.0552, "step": 13127000 }, { "epoch": 38.0, "learning_rate": 3.1007483963968134e-05, "loss": 2.068, "step": 13127500 }, { "epoch": 38.0, "eval_accuracy": 0.6700079034069664, "eval_accuracy_mlm": 0.6351199675846458, "eval_accuracy_nsp": 0.857055450908141, "eval_loss": 2.176142454147339, "eval_runtime": 331.6481, "eval_samples_per_second": 1315.81, "eval_steps_per_second": 54.826, "step": 13127936 }, { "epoch": 38.0, "learning_rate": 3.100676031632086e-05, "loss": 2.0675, "step": 13128000 }, { "epoch": 38.0, "learning_rate": 3.1006036668673585e-05, "loss": 2.0444, "step": 13128500 }, { "epoch": 38.0, "learning_rate": 3.100531302102631e-05, "loss": 2.0409, "step": 13129000 }, { "epoch": 38.0, "learning_rate": 3.100459082067432e-05, "loss": 2.0686, "step": 13129500 }, { "epoch": 38.01, "learning_rate": 3.1003867173027045e-05, "loss": 2.0486, "step": 13130000 }, { "epoch": 38.01, "learning_rate": 3.1003143525379774e-05, "loss": 2.0344, "step": 13130500 }, { "epoch": 38.01, "learning_rate": 3.1002419877732496e-05, "loss": 2.0368, "step": 13131000 }, { "epoch": 38.01, "learning_rate": 3.100169623008522e-05, "loss": 2.0158, "step": 13131500 }, { "epoch": 38.01, "learning_rate": 3.100097258243794e-05, "loss": 2.0411, "step": 13132000 }, { "epoch": 38.01, "learning_rate": 3.100025038208596e-05, "loss": 2.0327, "step": 13132500 }, { "epoch": 38.01, "learning_rate": 3.0999526734438685e-05, "loss": 2.0615, "step": 13133000 }, { "epoch": 38.02, "learning_rate": 3.099880308679141e-05, "loss": 2.03, "step": 13133500 }, { "epoch": 38.02, "learning_rate": 3.099807943914413e-05, "loss": 2.0321, "step": 13134000 }, { "epoch": 38.02, "learning_rate": 3.099735579149685e-05, "loss": 2.0642, "step": 13134500 }, { "epoch": 38.02, "learning_rate": 3.0996632143849574e-05, "loss": 2.0562, "step": 13135000 }, { "epoch": 38.02, "learning_rate": 3.0995908496202296e-05, "loss": 2.0545, "step": 13135500 }, { "epoch": 38.02, "learning_rate": 3.099518629585031e-05, "loss": 2.038, "step": 13136000 }, { "epoch": 38.02, "learning_rate": 3.099446264820304e-05, "loss": 2.0516, "step": 13136500 }, { "epoch": 38.03, "learning_rate": 3.099373900055576e-05, "loss": 2.0788, "step": 13137000 }, { "epoch": 38.03, "learning_rate": 3.099301535290849e-05, "loss": 2.0427, "step": 13137500 }, { "epoch": 38.03, "learning_rate": 3.099229315255651e-05, "loss": 2.0485, "step": 13138000 }, { "epoch": 38.03, "learning_rate": 3.099156950490923e-05, "loss": 2.0627, "step": 13138500 }, { "epoch": 38.03, "learning_rate": 3.099084585726195e-05, "loss": 2.0255, "step": 13139000 }, { "epoch": 38.03, "learning_rate": 3.0990122209614674e-05, "loss": 2.0424, "step": 13139500 }, { "epoch": 38.03, "learning_rate": 3.09893985619674e-05, "loss": 2.039, "step": 13140000 }, { "epoch": 38.04, "learning_rate": 3.098867491432012e-05, "loss": 2.0338, "step": 13140500 }, { "epoch": 38.04, "learning_rate": 3.098795126667284e-05, "loss": 2.051, "step": 13141000 }, { "epoch": 38.04, "learning_rate": 3.098722761902556e-05, "loss": 2.0356, "step": 13141500 }, { "epoch": 38.04, "learning_rate": 3.098650397137829e-05, "loss": 2.0686, "step": 13142000 }, { "epoch": 38.04, "learning_rate": 3.0985780323731015e-05, "loss": 2.0545, "step": 13142500 }, { "epoch": 38.04, "learning_rate": 3.098505667608374e-05, "loss": 2.0543, "step": 13143000 }, { "epoch": 38.05, "learning_rate": 3.098433302843646e-05, "loss": 2.0651, "step": 13143500 }, { "epoch": 38.05, "learning_rate": 3.098360938078918e-05, "loss": 2.0674, "step": 13144000 }, { "epoch": 38.05, "learning_rate": 3.0982885733141904e-05, "loss": 2.0535, "step": 13144500 }, { "epoch": 38.05, "learning_rate": 3.098216208549463e-05, "loss": 2.0751, "step": 13145000 }, { "epoch": 38.05, "learning_rate": 3.098143988514265e-05, "loss": 2.0583, "step": 13145500 }, { "epoch": 38.05, "learning_rate": 3.098071623749537e-05, "loss": 2.0376, "step": 13146000 }, { "epoch": 38.05, "learning_rate": 3.097999258984809e-05, "loss": 2.0326, "step": 13146500 }, { "epoch": 38.06, "learning_rate": 3.0979268942200815e-05, "loss": 2.0381, "step": 13147000 }, { "epoch": 38.06, "learning_rate": 3.097854529455354e-05, "loss": 2.0569, "step": 13147500 }, { "epoch": 38.06, "learning_rate": 3.0977821646906266e-05, "loss": 2.0758, "step": 13148000 }, { "epoch": 38.06, "learning_rate": 3.097709799925899e-05, "loss": 2.0258, "step": 13148500 }, { "epoch": 38.06, "learning_rate": 3.097637435161171e-05, "loss": 2.076, "step": 13149000 }, { "epoch": 38.06, "learning_rate": 3.097565070396443e-05, "loss": 2.0588, "step": 13149500 }, { "epoch": 38.06, "learning_rate": 3.0974927056317155e-05, "loss": 2.0564, "step": 13150000 }, { "epoch": 38.07, "learning_rate": 3.097420485596517e-05, "loss": 2.0389, "step": 13150500 }, { "epoch": 38.07, "learning_rate": 3.097348265561319e-05, "loss": 2.0427, "step": 13151000 }, { "epoch": 38.07, "learning_rate": 3.0972759007965915e-05, "loss": 2.0358, "step": 13151500 }, { "epoch": 38.07, "learning_rate": 3.097203536031864e-05, "loss": 2.0283, "step": 13152000 }, { "epoch": 38.07, "learning_rate": 3.097131315996666e-05, "loss": 2.0518, "step": 13152500 }, { "epoch": 38.07, "learning_rate": 3.097058951231938e-05, "loss": 2.0432, "step": 13153000 }, { "epoch": 38.07, "learning_rate": 3.0969865864672104e-05, "loss": 2.0553, "step": 13153500 }, { "epoch": 38.08, "learning_rate": 3.0969142217024826e-05, "loss": 2.0477, "step": 13154000 }, { "epoch": 38.08, "learning_rate": 3.096842001667284e-05, "loss": 2.0603, "step": 13154500 }, { "epoch": 38.08, "learning_rate": 3.0967696369025564e-05, "loss": 2.0692, "step": 13155000 }, { "epoch": 38.08, "learning_rate": 3.096697272137829e-05, "loss": 2.0532, "step": 13155500 }, { "epoch": 38.08, "learning_rate": 3.0966249073731015e-05, "loss": 2.0524, "step": 13156000 }, { "epoch": 38.08, "learning_rate": 3.096552542608374e-05, "loss": 2.0634, "step": 13156500 }, { "epoch": 38.08, "learning_rate": 3.096480177843646e-05, "loss": 2.0622, "step": 13157000 }, { "epoch": 38.09, "learning_rate": 3.0964079578084475e-05, "loss": 2.0458, "step": 13157500 }, { "epoch": 38.09, "learning_rate": 3.09633559304372e-05, "loss": 2.0406, "step": 13158000 }, { "epoch": 38.09, "learning_rate": 3.096263228278992e-05, "loss": 2.0522, "step": 13158500 }, { "epoch": 38.09, "learning_rate": 3.096190863514264e-05, "loss": 2.0351, "step": 13159000 }, { "epoch": 38.09, "learning_rate": 3.0961184987495364e-05, "loss": 2.0601, "step": 13159500 }, { "epoch": 38.09, "learning_rate": 3.096046133984809e-05, "loss": 2.0637, "step": 13160000 }, { "epoch": 38.09, "learning_rate": 3.0959737692200816e-05, "loss": 2.0692, "step": 13160500 }, { "epoch": 38.1, "learning_rate": 3.0959014044553545e-05, "loss": 2.039, "step": 13161000 }, { "epoch": 38.1, "learning_rate": 3.095829039690627e-05, "loss": 2.041, "step": 13161500 }, { "epoch": 38.1, "learning_rate": 3.095756674925899e-05, "loss": 2.0246, "step": 13162000 }, { "epoch": 38.1, "learning_rate": 3.095684310161171e-05, "loss": 2.0709, "step": 13162500 }, { "epoch": 38.1, "learning_rate": 3.095612090125973e-05, "loss": 2.0565, "step": 13163000 }, { "epoch": 38.1, "learning_rate": 3.095539725361245e-05, "loss": 2.0677, "step": 13163500 }, { "epoch": 38.1, "learning_rate": 3.095467360596517e-05, "loss": 2.0613, "step": 13164000 }, { "epoch": 38.11, "learning_rate": 3.0953949958317894e-05, "loss": 2.0654, "step": 13164500 }, { "epoch": 38.11, "learning_rate": 3.0953226310670616e-05, "loss": 2.0533, "step": 13165000 }, { "epoch": 38.11, "learning_rate": 3.0952502663023345e-05, "loss": 2.0604, "step": 13165500 }, { "epoch": 38.11, "learning_rate": 3.095177901537607e-05, "loss": 2.0331, "step": 13166000 }, { "epoch": 38.11, "learning_rate": 3.095105536772879e-05, "loss": 2.0613, "step": 13166500 }, { "epoch": 38.11, "learning_rate": 3.095033172008152e-05, "loss": 2.0579, "step": 13167000 }, { "epoch": 38.11, "learning_rate": 3.0949609519729534e-05, "loss": 2.0455, "step": 13167500 }, { "epoch": 38.12, "learning_rate": 3.0948885872082256e-05, "loss": 2.0396, "step": 13168000 }, { "epoch": 38.12, "learning_rate": 3.094816222443498e-05, "loss": 2.071, "step": 13168500 }, { "epoch": 38.12, "learning_rate": 3.09474385767877e-05, "loss": 2.0295, "step": 13169000 }, { "epoch": 38.12, "learning_rate": 3.094671492914042e-05, "loss": 2.0484, "step": 13169500 }, { "epoch": 38.12, "learning_rate": 3.0945991281493145e-05, "loss": 2.0289, "step": 13170000 }, { "epoch": 38.12, "learning_rate": 3.094526763384587e-05, "loss": 2.0656, "step": 13170500 }, { "epoch": 38.12, "learning_rate": 3.0944543986198596e-05, "loss": 2.0672, "step": 13171000 }, { "epoch": 38.13, "learning_rate": 3.094382178584661e-05, "loss": 2.04, "step": 13171500 }, { "epoch": 38.13, "learning_rate": 3.0943098138199334e-05, "loss": 2.0497, "step": 13172000 }, { "epoch": 38.13, "learning_rate": 3.0942374490552056e-05, "loss": 2.0488, "step": 13172500 }, { "epoch": 38.13, "learning_rate": 3.094165084290478e-05, "loss": 2.0531, "step": 13173000 }, { "epoch": 38.13, "learning_rate": 3.0940928642552794e-05, "loss": 2.0341, "step": 13173500 }, { "epoch": 38.13, "learning_rate": 3.0940206442200816e-05, "loss": 2.0768, "step": 13174000 }, { "epoch": 38.13, "learning_rate": 3.0939482794553545e-05, "loss": 2.0569, "step": 13174500 }, { "epoch": 38.14, "learning_rate": 3.093875914690627e-05, "loss": 2.0517, "step": 13175000 }, { "epoch": 38.14, "learning_rate": 3.093803549925899e-05, "loss": 2.0544, "step": 13175500 }, { "epoch": 38.14, "learning_rate": 3.093731185161171e-05, "loss": 2.0465, "step": 13176000 }, { "epoch": 38.14, "learning_rate": 3.0936588203964434e-05, "loss": 2.0513, "step": 13176500 }, { "epoch": 38.14, "learning_rate": 3.0935864556317157e-05, "loss": 2.0594, "step": 13177000 }, { "epoch": 38.14, "learning_rate": 3.093514090866988e-05, "loss": 2.0627, "step": 13177500 }, { "epoch": 38.14, "learning_rate": 3.09344172610226e-05, "loss": 2.0459, "step": 13178000 }, { "epoch": 38.15, "learning_rate": 3.093369361337532e-05, "loss": 2.0642, "step": 13178500 }, { "epoch": 38.15, "learning_rate": 3.0932969965728045e-05, "loss": 2.05, "step": 13179000 }, { "epoch": 38.15, "learning_rate": 3.093224631808077e-05, "loss": 2.0168, "step": 13179500 }, { "epoch": 38.15, "learning_rate": 3.09315226704335e-05, "loss": 2.0754, "step": 13180000 }, { "epoch": 38.15, "learning_rate": 3.093079902278622e-05, "loss": 2.0535, "step": 13180500 }, { "epoch": 38.15, "learning_rate": 3.0930076822434235e-05, "loss": 2.0654, "step": 13181000 }, { "epoch": 38.16, "learning_rate": 3.092935317478696e-05, "loss": 2.0595, "step": 13181500 }, { "epoch": 38.16, "learning_rate": 3.0928629527139686e-05, "loss": 2.0687, "step": 13182000 }, { "epoch": 38.16, "learning_rate": 3.092790587949241e-05, "loss": 2.0496, "step": 13182500 }, { "epoch": 38.16, "learning_rate": 3.0927183679140424e-05, "loss": 2.0673, "step": 13183000 }, { "epoch": 38.16, "learning_rate": 3.0926461478788446e-05, "loss": 2.036, "step": 13183500 }, { "epoch": 38.16, "learning_rate": 3.092573783114117e-05, "loss": 2.0326, "step": 13184000 }, { "epoch": 38.16, "learning_rate": 3.092501418349389e-05, "loss": 2.0425, "step": 13184500 }, { "epoch": 38.17, "learning_rate": 3.092429053584661e-05, "loss": 2.0667, "step": 13185000 }, { "epoch": 38.17, "learning_rate": 3.0923566888199335e-05, "loss": 2.0627, "step": 13185500 }, { "epoch": 38.17, "learning_rate": 3.092284324055206e-05, "loss": 2.0512, "step": 13186000 }, { "epoch": 38.17, "learning_rate": 3.092211959290478e-05, "loss": 2.0288, "step": 13186500 }, { "epoch": 38.17, "learning_rate": 3.09213959452575e-05, "loss": 2.0557, "step": 13187000 }, { "epoch": 38.17, "learning_rate": 3.0920672297610224e-05, "loss": 2.0422, "step": 13187500 }, { "epoch": 38.17, "learning_rate": 3.0919948649962946e-05, "loss": 2.0551, "step": 13188000 }, { "epoch": 38.18, "learning_rate": 3.091922500231567e-05, "loss": 2.0567, "step": 13188500 }, { "epoch": 38.18, "learning_rate": 3.091850280196369e-05, "loss": 2.0354, "step": 13189000 }, { "epoch": 38.18, "learning_rate": 3.091777915431642e-05, "loss": 2.0652, "step": 13189500 }, { "epoch": 38.18, "learning_rate": 3.0917056953964435e-05, "loss": 2.0318, "step": 13190000 }, { "epoch": 38.18, "learning_rate": 3.091633330631716e-05, "loss": 2.0602, "step": 13190500 }, { "epoch": 38.18, "learning_rate": 3.091560965866988e-05, "loss": 2.063, "step": 13191000 }, { "epoch": 38.18, "learning_rate": 3.09148860110226e-05, "loss": 2.0532, "step": 13191500 }, { "epoch": 38.19, "learning_rate": 3.0914162363375324e-05, "loss": 2.0853, "step": 13192000 }, { "epoch": 38.19, "learning_rate": 3.0913440163023346e-05, "loss": 2.0645, "step": 13192500 }, { "epoch": 38.19, "learning_rate": 3.091271796267136e-05, "loss": 2.0321, "step": 13193000 }, { "epoch": 38.19, "learning_rate": 3.0911994315024084e-05, "loss": 2.0609, "step": 13193500 }, { "epoch": 38.19, "learning_rate": 3.0911270667376806e-05, "loss": 2.0393, "step": 13194000 }, { "epoch": 38.19, "learning_rate": 3.091054701972953e-05, "loss": 2.0503, "step": 13194500 }, { "epoch": 38.19, "learning_rate": 3.090982337208225e-05, "loss": 2.0394, "step": 13195000 }, { "epoch": 38.2, "learning_rate": 3.090909972443497e-05, "loss": 2.0653, "step": 13195500 }, { "epoch": 38.2, "learning_rate": 3.0908377524082995e-05, "loss": 2.0732, "step": 13196000 }, { "epoch": 38.2, "learning_rate": 3.090765387643572e-05, "loss": 2.0613, "step": 13196500 }, { "epoch": 38.2, "learning_rate": 3.090693022878845e-05, "loss": 2.0329, "step": 13197000 }, { "epoch": 38.2, "learning_rate": 3.090620802843646e-05, "loss": 2.0427, "step": 13197500 }, { "epoch": 38.2, "learning_rate": 3.0905484380789184e-05, "loss": 2.0611, "step": 13198000 }, { "epoch": 38.2, "learning_rate": 3.090476073314191e-05, "loss": 2.064, "step": 13198500 }, { "epoch": 38.21, "learning_rate": 3.090403708549463e-05, "loss": 2.032, "step": 13199000 }, { "epoch": 38.21, "learning_rate": 3.090331343784735e-05, "loss": 2.027, "step": 13199500 }, { "epoch": 38.21, "learning_rate": 3.0902589790200073e-05, "loss": 2.0515, "step": 13200000 }, { "epoch": 38.21, "learning_rate": 3.0901866142552796e-05, "loss": 2.0472, "step": 13200500 }, { "epoch": 38.21, "learning_rate": 3.0901142494905525e-05, "loss": 2.0527, "step": 13201000 }, { "epoch": 38.21, "learning_rate": 3.090041884725825e-05, "loss": 2.0758, "step": 13201500 }, { "epoch": 38.21, "learning_rate": 3.0899698094201556e-05, "loss": 2.0322, "step": 13202000 }, { "epoch": 38.22, "learning_rate": 3.089897444655428e-05, "loss": 2.085, "step": 13202500 }, { "epoch": 38.22, "learning_rate": 3.0898250798907e-05, "loss": 2.0562, "step": 13203000 }, { "epoch": 38.22, "learning_rate": 3.089752715125972e-05, "loss": 2.0361, "step": 13203500 }, { "epoch": 38.22, "learning_rate": 3.089680350361245e-05, "loss": 2.0541, "step": 13204000 }, { "epoch": 38.22, "learning_rate": 3.0896079855965174e-05, "loss": 2.0667, "step": 13204500 }, { "epoch": 38.22, "learning_rate": 3.08953562083179e-05, "loss": 2.0458, "step": 13205000 }, { "epoch": 38.22, "learning_rate": 3.0894632560670625e-05, "loss": 2.0516, "step": 13205500 }, { "epoch": 38.23, "learning_rate": 3.089390891302335e-05, "loss": 2.0569, "step": 13206000 }, { "epoch": 38.23, "learning_rate": 3.089318526537607e-05, "loss": 2.0643, "step": 13206500 }, { "epoch": 38.23, "learning_rate": 3.089246161772879e-05, "loss": 2.0314, "step": 13207000 }, { "epoch": 38.23, "learning_rate": 3.0891737970081514e-05, "loss": 2.0496, "step": 13207500 }, { "epoch": 38.23, "learning_rate": 3.0891014322434236e-05, "loss": 2.0572, "step": 13208000 }, { "epoch": 38.23, "learning_rate": 3.089029067478696e-05, "loss": 2.0461, "step": 13208500 }, { "epoch": 38.23, "learning_rate": 3.088956702713968e-05, "loss": 2.0688, "step": 13209000 }, { "epoch": 38.24, "learning_rate": 3.08888433794924e-05, "loss": 2.0458, "step": 13209500 }, { "epoch": 38.24, "learning_rate": 3.0888119731845125e-05, "loss": 2.0616, "step": 13210000 }, { "epoch": 38.24, "learning_rate": 3.088739897878844e-05, "loss": 2.0691, "step": 13210500 }, { "epoch": 38.24, "learning_rate": 3.088667533114116e-05, "loss": 2.0724, "step": 13211000 }, { "epoch": 38.24, "learning_rate": 3.0885951683493885e-05, "loss": 2.0821, "step": 13211500 }, { "epoch": 38.24, "learning_rate": 3.08852294831419e-05, "loss": 2.0733, "step": 13212000 }, { "epoch": 38.24, "learning_rate": 3.088450583549463e-05, "loss": 2.0852, "step": 13212500 }, { "epoch": 38.25, "learning_rate": 3.088378218784735e-05, "loss": 2.0623, "step": 13213000 }, { "epoch": 38.25, "learning_rate": 3.0883059987495374e-05, "loss": 2.062, "step": 13213500 }, { "epoch": 38.25, "learning_rate": 3.0882336339848097e-05, "loss": 2.0368, "step": 13214000 }, { "epoch": 38.25, "learning_rate": 3.088161269220082e-05, "loss": 2.0602, "step": 13214500 }, { "epoch": 38.25, "learning_rate": 3.088088904455354e-05, "loss": 2.0582, "step": 13215000 }, { "epoch": 38.25, "learning_rate": 3.088016539690626e-05, "loss": 2.0544, "step": 13215500 }, { "epoch": 38.25, "learning_rate": 3.0879441749258985e-05, "loss": 2.0493, "step": 13216000 }, { "epoch": 38.26, "learning_rate": 3.087871810161171e-05, "loss": 2.0494, "step": 13216500 }, { "epoch": 38.26, "learning_rate": 3.087799445396443e-05, "loss": 2.0367, "step": 13217000 }, { "epoch": 38.26, "learning_rate": 3.087727080631715e-05, "loss": 2.0437, "step": 13217500 }, { "epoch": 38.26, "learning_rate": 3.0876547158669874e-05, "loss": 2.0385, "step": 13218000 }, { "epoch": 38.26, "learning_rate": 3.08758249583179e-05, "loss": 2.0361, "step": 13218500 }, { "epoch": 38.26, "learning_rate": 3.087510131067062e-05, "loss": 2.0354, "step": 13219000 }, { "epoch": 38.27, "learning_rate": 3.087437766302335e-05, "loss": 2.0591, "step": 13219500 }, { "epoch": 38.27, "learning_rate": 3.0873655462671364e-05, "loss": 2.0355, "step": 13220000 }, { "epoch": 38.27, "learning_rate": 3.0872931815024086e-05, "loss": 2.0462, "step": 13220500 }, { "epoch": 38.27, "learning_rate": 3.087220816737681e-05, "loss": 2.0587, "step": 13221000 }, { "epoch": 38.27, "learning_rate": 3.087148451972953e-05, "loss": 2.0351, "step": 13221500 }, { "epoch": 38.27, "learning_rate": 3.087076087208225e-05, "loss": 2.0438, "step": 13222000 }, { "epoch": 38.27, "learning_rate": 3.0870037224434975e-05, "loss": 2.0852, "step": 13222500 }, { "epoch": 38.28, "learning_rate": 3.0869313576787704e-05, "loss": 2.064, "step": 13223000 }, { "epoch": 38.28, "learning_rate": 3.0868589929140426e-05, "loss": 2.0496, "step": 13223500 }, { "epoch": 38.28, "learning_rate": 3.086786628149315e-05, "loss": 2.063, "step": 13224000 }, { "epoch": 38.28, "learning_rate": 3.086714263384587e-05, "loss": 2.0544, "step": 13224500 }, { "epoch": 38.28, "learning_rate": 3.0866420433493886e-05, "loss": 2.0686, "step": 13225000 }, { "epoch": 38.28, "learning_rate": 3.086569678584661e-05, "loss": 2.0512, "step": 13225500 }, { "epoch": 38.28, "learning_rate": 3.086497313819933e-05, "loss": 2.0555, "step": 13226000 }, { "epoch": 38.29, "learning_rate": 3.086424949055205e-05, "loss": 2.0608, "step": 13226500 }, { "epoch": 38.29, "learning_rate": 3.086352584290478e-05, "loss": 2.0683, "step": 13227000 }, { "epoch": 38.29, "learning_rate": 3.0862803642552804e-05, "loss": 2.0672, "step": 13227500 }, { "epoch": 38.29, "learning_rate": 3.0862079994905526e-05, "loss": 2.0568, "step": 13228000 }, { "epoch": 38.29, "learning_rate": 3.086135634725825e-05, "loss": 2.0552, "step": 13228500 }, { "epoch": 38.29, "learning_rate": 3.086063269961097e-05, "loss": 2.0666, "step": 13229000 }, { "epoch": 38.29, "learning_rate": 3.085990905196369e-05, "loss": 2.0701, "step": 13229500 }, { "epoch": 38.3, "learning_rate": 3.0859185404316415e-05, "loss": 2.0415, "step": 13230000 }, { "epoch": 38.3, "learning_rate": 3.085846175666914e-05, "loss": 2.0781, "step": 13230500 }, { "epoch": 38.3, "learning_rate": 3.085773810902186e-05, "loss": 2.0327, "step": 13231000 }, { "epoch": 38.3, "learning_rate": 3.085701446137458e-05, "loss": 2.0478, "step": 13231500 }, { "epoch": 38.3, "learning_rate": 3.0856292261022604e-05, "loss": 2.0515, "step": 13232000 }, { "epoch": 38.3, "learning_rate": 3.0855568613375326e-05, "loss": 2.0453, "step": 13232500 }, { "epoch": 38.3, "learning_rate": 3.085484496572805e-05, "loss": 2.0562, "step": 13233000 }, { "epoch": 38.31, "learning_rate": 3.085412131808077e-05, "loss": 2.0693, "step": 13233500 }, { "epoch": 38.31, "learning_rate": 3.085339767043349e-05, "loss": 2.05, "step": 13234000 }, { "epoch": 38.31, "learning_rate": 3.0852675470081515e-05, "loss": 2.0636, "step": 13234500 }, { "epoch": 38.31, "learning_rate": 3.085195182243424e-05, "loss": 2.0556, "step": 13235000 }, { "epoch": 38.31, "learning_rate": 3.085122817478696e-05, "loss": 2.0486, "step": 13235500 }, { "epoch": 38.31, "learning_rate": 3.085050597443498e-05, "loss": 2.0879, "step": 13236000 }, { "epoch": 38.31, "learning_rate": 3.0849782326787704e-05, "loss": 2.0543, "step": 13236500 }, { "epoch": 38.32, "learning_rate": 3.084905867914043e-05, "loss": 2.0437, "step": 13237000 }, { "epoch": 38.32, "learning_rate": 3.084833503149315e-05, "loss": 2.0697, "step": 13237500 }, { "epoch": 38.32, "learning_rate": 3.084761138384587e-05, "loss": 2.0279, "step": 13238000 }, { "epoch": 38.32, "learning_rate": 3.0846887736198593e-05, "loss": 2.0518, "step": 13238500 }, { "epoch": 38.32, "learning_rate": 3.0846164088551316e-05, "loss": 2.0708, "step": 13239000 }, { "epoch": 38.32, "learning_rate": 3.084544044090404e-05, "loss": 2.0497, "step": 13239500 }, { "epoch": 38.32, "learning_rate": 3.0844718240552053e-05, "loss": 2.0397, "step": 13240000 }, { "epoch": 38.33, "learning_rate": 3.084399459290478e-05, "loss": 2.0493, "step": 13240500 }, { "epoch": 38.33, "learning_rate": 3.0843270945257505e-05, "loss": 2.0672, "step": 13241000 }, { "epoch": 38.33, "learning_rate": 3.084254729761023e-05, "loss": 2.0457, "step": 13241500 }, { "epoch": 38.33, "learning_rate": 3.0841823649962956e-05, "loss": 2.0482, "step": 13242000 }, { "epoch": 38.33, "learning_rate": 3.084110000231568e-05, "loss": 2.0619, "step": 13242500 }, { "epoch": 38.33, "learning_rate": 3.08403763546684e-05, "loss": 2.0684, "step": 13243000 }, { "epoch": 38.33, "learning_rate": 3.083965270702112e-05, "loss": 2.0272, "step": 13243500 }, { "epoch": 38.34, "learning_rate": 3.0838929059373845e-05, "loss": 2.0715, "step": 13244000 }, { "epoch": 38.34, "learning_rate": 3.083820541172657e-05, "loss": 2.0691, "step": 13244500 }, { "epoch": 38.34, "learning_rate": 3.083748176407929e-05, "loss": 2.0417, "step": 13245000 }, { "epoch": 38.34, "learning_rate": 3.0836759563727305e-05, "loss": 2.0654, "step": 13245500 }, { "epoch": 38.34, "learning_rate": 3.083603591608003e-05, "loss": 2.0686, "step": 13246000 }, { "epoch": 38.34, "learning_rate": 3.083531371572805e-05, "loss": 2.0601, "step": 13246500 }, { "epoch": 38.34, "learning_rate": 3.083459006808077e-05, "loss": 2.0734, "step": 13247000 }, { "epoch": 38.35, "learning_rate": 3.0833866420433494e-05, "loss": 2.0459, "step": 13247500 }, { "epoch": 38.35, "learning_rate": 3.0833142772786216e-05, "loss": 2.0708, "step": 13248000 }, { "epoch": 38.35, "learning_rate": 3.083242057243423e-05, "loss": 2.0681, "step": 13248500 }, { "epoch": 38.35, "learning_rate": 3.0831696924786954e-05, "loss": 2.0499, "step": 13249000 }, { "epoch": 38.35, "learning_rate": 3.083097327713968e-05, "loss": 2.035, "step": 13249500 }, { "epoch": 38.35, "learning_rate": 3.0830249629492405e-05, "loss": 2.0272, "step": 13250000 }, { "epoch": 38.35, "learning_rate": 3.0829525981845134e-05, "loss": 2.0333, "step": 13250500 }, { "epoch": 38.36, "learning_rate": 3.0828802334197856e-05, "loss": 2.0619, "step": 13251000 }, { "epoch": 38.36, "learning_rate": 3.082807868655058e-05, "loss": 2.0427, "step": 13251500 }, { "epoch": 38.36, "learning_rate": 3.0827356486198594e-05, "loss": 2.0532, "step": 13252000 }, { "epoch": 38.36, "learning_rate": 3.0826632838551316e-05, "loss": 2.0505, "step": 13252500 }, { "epoch": 38.36, "learning_rate": 3.082590919090404e-05, "loss": 2.0603, "step": 13253000 }, { "epoch": 38.36, "learning_rate": 3.0825188437847354e-05, "loss": 2.0683, "step": 13253500 }, { "epoch": 38.36, "learning_rate": 3.0824464790200077e-05, "loss": 2.0352, "step": 13254000 }, { "epoch": 38.37, "learning_rate": 3.08237411425528e-05, "loss": 2.049, "step": 13254500 }, { "epoch": 38.37, "learning_rate": 3.082301749490552e-05, "loss": 2.052, "step": 13255000 }, { "epoch": 38.37, "learning_rate": 3.082229384725824e-05, "loss": 2.0657, "step": 13255500 }, { "epoch": 38.37, "learning_rate": 3.0821570199610966e-05, "loss": 2.0643, "step": 13256000 }, { "epoch": 38.37, "learning_rate": 3.082084655196369e-05, "loss": 2.0702, "step": 13256500 }, { "epoch": 38.37, "learning_rate": 3.082012290431642e-05, "loss": 2.0555, "step": 13257000 }, { "epoch": 38.38, "learning_rate": 3.081939925666914e-05, "loss": 2.0724, "step": 13257500 }, { "epoch": 38.38, "learning_rate": 3.081867560902186e-05, "loss": 2.0567, "step": 13258000 }, { "epoch": 38.38, "learning_rate": 3.0817951961374583e-05, "loss": 2.0504, "step": 13258500 }, { "epoch": 38.38, "learning_rate": 3.0817228313727306e-05, "loss": 2.0539, "step": 13259000 }, { "epoch": 38.38, "learning_rate": 3.0816504666080035e-05, "loss": 2.0519, "step": 13259500 }, { "epoch": 38.38, "learning_rate": 3.081578101843276e-05, "loss": 2.0304, "step": 13260000 }, { "epoch": 38.38, "learning_rate": 3.081505737078548e-05, "loss": 2.0753, "step": 13260500 }, { "epoch": 38.39, "learning_rate": 3.08143337231382e-05, "loss": 2.0772, "step": 13261000 }, { "epoch": 38.39, "learning_rate": 3.0813610075490924e-05, "loss": 2.0915, "step": 13261500 }, { "epoch": 38.39, "learning_rate": 3.0812886427843646e-05, "loss": 2.0526, "step": 13262000 }, { "epoch": 38.39, "learning_rate": 3.081216278019637e-05, "loss": 2.0552, "step": 13262500 }, { "epoch": 38.39, "learning_rate": 3.081143913254909e-05, "loss": 2.0381, "step": 13263000 }, { "epoch": 38.39, "learning_rate": 3.081071548490181e-05, "loss": 2.0515, "step": 13263500 }, { "epoch": 38.39, "learning_rate": 3.080999183725454e-05, "loss": 2.0301, "step": 13264000 }, { "epoch": 38.4, "learning_rate": 3.080926963690256e-05, "loss": 2.0425, "step": 13264500 }, { "epoch": 38.4, "learning_rate": 3.0808545989255286e-05, "loss": 2.0313, "step": 13265000 }, { "epoch": 38.4, "learning_rate": 3.080782234160801e-05, "loss": 2.0483, "step": 13265500 }, { "epoch": 38.4, "learning_rate": 3.080709869396073e-05, "loss": 2.0788, "step": 13266000 }, { "epoch": 38.4, "learning_rate": 3.080637504631345e-05, "loss": 2.0464, "step": 13266500 }, { "epoch": 38.4, "learning_rate": 3.0805651398666175e-05, "loss": 2.0619, "step": 13267000 }, { "epoch": 38.4, "learning_rate": 3.08049277510189e-05, "loss": 2.0564, "step": 13267500 }, { "epoch": 38.41, "learning_rate": 3.080420410337162e-05, "loss": 2.0287, "step": 13268000 }, { "epoch": 38.41, "learning_rate": 3.080348045572434e-05, "loss": 2.0649, "step": 13268500 }, { "epoch": 38.41, "learning_rate": 3.080275970266766e-05, "loss": 2.0546, "step": 13269000 }, { "epoch": 38.41, "learning_rate": 3.080203605502038e-05, "loss": 2.0475, "step": 13269500 }, { "epoch": 38.41, "learning_rate": 3.08013124073731e-05, "loss": 2.0733, "step": 13270000 }, { "epoch": 38.41, "learning_rate": 3.0800588759725824e-05, "loss": 2.0732, "step": 13270500 }, { "epoch": 38.41, "learning_rate": 3.0799865112078546e-05, "loss": 2.0585, "step": 13271000 }, { "epoch": 38.42, "learning_rate": 3.079914435902186e-05, "loss": 2.0621, "step": 13271500 }, { "epoch": 38.42, "learning_rate": 3.0798420711374584e-05, "loss": 2.0497, "step": 13272000 }, { "epoch": 38.42, "learning_rate": 3.079769706372731e-05, "loss": 2.0607, "step": 13272500 }, { "epoch": 38.42, "learning_rate": 3.0796973416080035e-05, "loss": 2.0391, "step": 13273000 }, { "epoch": 38.42, "learning_rate": 3.079624976843276e-05, "loss": 2.08, "step": 13273500 }, { "epoch": 38.42, "learning_rate": 3.079552612078548e-05, "loss": 2.0456, "step": 13274000 }, { "epoch": 38.42, "learning_rate": 3.07948024731382e-05, "loss": 2.0453, "step": 13274500 }, { "epoch": 38.43, "learning_rate": 3.0794078825490924e-05, "loss": 2.0495, "step": 13275000 }, { "epoch": 38.43, "learning_rate": 3.079335662513894e-05, "loss": 2.0519, "step": 13275500 }, { "epoch": 38.43, "learning_rate": 3.079263297749166e-05, "loss": 2.0603, "step": 13276000 }, { "epoch": 38.43, "learning_rate": 3.0791910777139685e-05, "loss": 2.0596, "step": 13276500 }, { "epoch": 38.43, "learning_rate": 3.079118712949241e-05, "loss": 2.0757, "step": 13277000 }, { "epoch": 38.43, "learning_rate": 3.079046348184513e-05, "loss": 2.0742, "step": 13277500 }, { "epoch": 38.43, "learning_rate": 3.078973983419785e-05, "loss": 2.0715, "step": 13278000 }, { "epoch": 38.44, "learning_rate": 3.0789016186550573e-05, "loss": 2.0789, "step": 13278500 }, { "epoch": 38.44, "learning_rate": 3.07882925389033e-05, "loss": 2.0439, "step": 13279000 }, { "epoch": 38.44, "learning_rate": 3.0787568891256025e-05, "loss": 2.0533, "step": 13279500 }, { "epoch": 38.44, "learning_rate": 3.078684524360875e-05, "loss": 2.0731, "step": 13280000 }, { "epoch": 38.44, "learning_rate": 3.078612159596147e-05, "loss": 2.0751, "step": 13280500 }, { "epoch": 38.44, "learning_rate": 3.078539794831419e-05, "loss": 2.0428, "step": 13281000 }, { "epoch": 38.44, "learning_rate": 3.0784674300666914e-05, "loss": 2.0797, "step": 13281500 }, { "epoch": 38.45, "learning_rate": 3.0783950653019636e-05, "loss": 2.08, "step": 13282000 }, { "epoch": 38.45, "learning_rate": 3.078322845266766e-05, "loss": 2.0331, "step": 13282500 }, { "epoch": 38.45, "learning_rate": 3.078250480502038e-05, "loss": 2.0424, "step": 13283000 }, { "epoch": 38.45, "learning_rate": 3.07817811573731e-05, "loss": 2.0764, "step": 13283500 }, { "epoch": 38.45, "learning_rate": 3.078105895702112e-05, "loss": 2.064, "step": 13284000 }, { "epoch": 38.45, "learning_rate": 3.078033530937384e-05, "loss": 2.069, "step": 13284500 }, { "epoch": 38.45, "learning_rate": 3.077961166172656e-05, "loss": 2.0596, "step": 13285000 }, { "epoch": 38.46, "learning_rate": 3.0778888014079285e-05, "loss": 2.0456, "step": 13285500 }, { "epoch": 38.46, "learning_rate": 3.0778164366432014e-05, "loss": 2.0444, "step": 13286000 }, { "epoch": 38.46, "learning_rate": 3.0777440718784736e-05, "loss": 2.0559, "step": 13286500 }, { "epoch": 38.46, "learning_rate": 3.0776717071137465e-05, "loss": 2.0447, "step": 13287000 }, { "epoch": 38.46, "learning_rate": 3.077599342349019e-05, "loss": 2.0494, "step": 13287500 }, { "epoch": 38.46, "learning_rate": 3.0775272670433496e-05, "loss": 2.0521, "step": 13288000 }, { "epoch": 38.46, "learning_rate": 3.077454902278622e-05, "loss": 2.0433, "step": 13288500 }, { "epoch": 38.47, "learning_rate": 3.077382537513894e-05, "loss": 2.0397, "step": 13289000 }, { "epoch": 38.47, "learning_rate": 3.077310172749166e-05, "loss": 2.0406, "step": 13289500 }, { "epoch": 38.47, "learning_rate": 3.0772378079844385e-05, "loss": 2.0451, "step": 13290000 }, { "epoch": 38.47, "learning_rate": 3.077165587949241e-05, "loss": 2.0591, "step": 13290500 }, { "epoch": 38.47, "learning_rate": 3.077093223184513e-05, "loss": 2.049, "step": 13291000 }, { "epoch": 38.47, "learning_rate": 3.077020858419785e-05, "loss": 2.0776, "step": 13291500 }, { "epoch": 38.47, "learning_rate": 3.0769484936550574e-05, "loss": 2.0652, "step": 13292000 }, { "epoch": 38.48, "learning_rate": 3.0768761288903297e-05, "loss": 2.0747, "step": 13292500 }, { "epoch": 38.48, "learning_rate": 3.076803764125602e-05, "loss": 2.056, "step": 13293000 }, { "epoch": 38.48, "learning_rate": 3.076731399360874e-05, "loss": 2.0539, "step": 13293500 }, { "epoch": 38.48, "learning_rate": 3.076659034596147e-05, "loss": 2.05, "step": 13294000 }, { "epoch": 38.48, "learning_rate": 3.0765868145609486e-05, "loss": 2.0765, "step": 13294500 }, { "epoch": 38.48, "learning_rate": 3.0765144497962215e-05, "loss": 2.0531, "step": 13295000 }, { "epoch": 38.49, "learning_rate": 3.076442085031494e-05, "loss": 2.0293, "step": 13295500 }, { "epoch": 38.49, "learning_rate": 3.076369720266766e-05, "loss": 2.0571, "step": 13296000 }, { "epoch": 38.49, "learning_rate": 3.076297355502038e-05, "loss": 2.0532, "step": 13296500 }, { "epoch": 38.49, "learning_rate": 3.0762249907373103e-05, "loss": 2.0564, "step": 13297000 }, { "epoch": 38.49, "learning_rate": 3.076152770702112e-05, "loss": 2.0338, "step": 13297500 }, { "epoch": 38.49, "learning_rate": 3.076080405937384e-05, "loss": 2.0317, "step": 13298000 }, { "epoch": 38.49, "learning_rate": 3.0760080411726564e-05, "loss": 2.052, "step": 13298500 }, { "epoch": 38.5, "learning_rate": 3.0759356764079286e-05, "loss": 2.0607, "step": 13299000 }, { "epoch": 38.5, "learning_rate": 3.0758633116432015e-05, "loss": 2.0648, "step": 13299500 }, { "epoch": 38.5, "learning_rate": 3.075790946878474e-05, "loss": 2.0747, "step": 13300000 }, { "epoch": 38.5, "learning_rate": 3.075718726843275e-05, "loss": 2.0699, "step": 13300500 }, { "epoch": 38.5, "learning_rate": 3.0756463620785475e-05, "loss": 2.0846, "step": 13301000 }, { "epoch": 38.5, "learning_rate": 3.0755739973138204e-05, "loss": 2.059, "step": 13301500 }, { "epoch": 38.5, "learning_rate": 3.0755016325490926e-05, "loss": 2.09, "step": 13302000 }, { "epoch": 38.51, "learning_rate": 3.075429267784365e-05, "loss": 2.0614, "step": 13302500 }, { "epoch": 38.51, "learning_rate": 3.075356903019637e-05, "loss": 2.0398, "step": 13303000 }, { "epoch": 38.51, "learning_rate": 3.075284538254909e-05, "loss": 2.0585, "step": 13303500 }, { "epoch": 38.51, "learning_rate": 3.0752121734901815e-05, "loss": 2.0643, "step": 13304000 }, { "epoch": 38.51, "learning_rate": 3.075139808725454e-05, "loss": 2.0609, "step": 13304500 }, { "epoch": 38.51, "learning_rate": 3.0750674439607266e-05, "loss": 2.0429, "step": 13305000 }, { "epoch": 38.51, "learning_rate": 3.074995079195999e-05, "loss": 2.0607, "step": 13305500 }, { "epoch": 38.52, "learning_rate": 3.0749228591608004e-05, "loss": 2.0784, "step": 13306000 }, { "epoch": 38.52, "learning_rate": 3.0748504943960726e-05, "loss": 2.0561, "step": 13306500 }, { "epoch": 38.52, "learning_rate": 3.074778129631345e-05, "loss": 2.0799, "step": 13307000 }, { "epoch": 38.52, "learning_rate": 3.074705764866617e-05, "loss": 2.0671, "step": 13307500 }, { "epoch": 38.52, "learning_rate": 3.074633400101889e-05, "loss": 2.06, "step": 13308000 }, { "epoch": 38.52, "learning_rate": 3.074561035337162e-05, "loss": 2.0535, "step": 13308500 }, { "epoch": 38.52, "learning_rate": 3.074488815301964e-05, "loss": 2.0417, "step": 13309000 }, { "epoch": 38.53, "learning_rate": 3.0744164505372367e-05, "loss": 2.0841, "step": 13309500 }, { "epoch": 38.53, "learning_rate": 3.074344085772509e-05, "loss": 2.0737, "step": 13310000 }, { "epoch": 38.53, "learning_rate": 3.074271721007781e-05, "loss": 2.0857, "step": 13310500 }, { "epoch": 38.53, "learning_rate": 3.074199356243053e-05, "loss": 2.0534, "step": 13311000 }, { "epoch": 38.53, "learning_rate": 3.074127136207855e-05, "loss": 2.0517, "step": 13311500 }, { "epoch": 38.53, "learning_rate": 3.074054771443127e-05, "loss": 2.0632, "step": 13312000 }, { "epoch": 38.53, "learning_rate": 3.073982406678399e-05, "loss": 2.0564, "step": 13312500 }, { "epoch": 38.54, "learning_rate": 3.0739100419136715e-05, "loss": 2.0527, "step": 13313000 }, { "epoch": 38.54, "learning_rate": 3.073837677148944e-05, "loss": 2.0491, "step": 13313500 }, { "epoch": 38.54, "learning_rate": 3.073765312384217e-05, "loss": 2.0512, "step": 13314000 }, { "epoch": 38.54, "learning_rate": 3.073692947619489e-05, "loss": 2.044, "step": 13314500 }, { "epoch": 38.54, "learning_rate": 3.0736207275842904e-05, "loss": 2.0395, "step": 13315000 }, { "epoch": 38.54, "learning_rate": 3.073548507549092e-05, "loss": 2.0561, "step": 13315500 }, { "epoch": 38.54, "learning_rate": 3.073476287513894e-05, "loss": 2.0429, "step": 13316000 }, { "epoch": 38.55, "learning_rate": 3.0734039227491665e-05, "loss": 2.0684, "step": 13316500 }, { "epoch": 38.55, "learning_rate": 3.0733315579844394e-05, "loss": 2.0406, "step": 13317000 }, { "epoch": 38.55, "learning_rate": 3.0732591932197116e-05, "loss": 2.058, "step": 13317500 }, { "epoch": 38.55, "learning_rate": 3.073186828454984e-05, "loss": 2.056, "step": 13318000 }, { "epoch": 38.55, "learning_rate": 3.073114463690256e-05, "loss": 2.059, "step": 13318500 }, { "epoch": 38.55, "learning_rate": 3.0730422436550576e-05, "loss": 2.0307, "step": 13319000 }, { "epoch": 38.55, "learning_rate": 3.07296987889033e-05, "loss": 2.0742, "step": 13319500 }, { "epoch": 38.56, "learning_rate": 3.072897514125602e-05, "loss": 2.0551, "step": 13320000 }, { "epoch": 38.56, "learning_rate": 3.072825149360874e-05, "loss": 2.0496, "step": 13320500 }, { "epoch": 38.56, "learning_rate": 3.0727527845961465e-05, "loss": 2.0521, "step": 13321000 }, { "epoch": 38.56, "learning_rate": 3.0726804198314194e-05, "loss": 2.0481, "step": 13321500 }, { "epoch": 38.56, "learning_rate": 3.0726080550666916e-05, "loss": 2.0861, "step": 13322000 }, { "epoch": 38.56, "learning_rate": 3.072535835031493e-05, "loss": 2.0653, "step": 13322500 }, { "epoch": 38.56, "learning_rate": 3.0724634702667654e-05, "loss": 2.0816, "step": 13323000 }, { "epoch": 38.57, "learning_rate": 3.0723911055020376e-05, "loss": 2.0799, "step": 13323500 }, { "epoch": 38.57, "learning_rate": 3.0723187407373105e-05, "loss": 2.0555, "step": 13324000 }, { "epoch": 38.57, "learning_rate": 3.072246520702112e-05, "loss": 2.0456, "step": 13324500 }, { "epoch": 38.57, "learning_rate": 3.072174155937384e-05, "loss": 2.052, "step": 13325000 }, { "epoch": 38.57, "learning_rate": 3.0721017911726565e-05, "loss": 2.0454, "step": 13325500 }, { "epoch": 38.57, "learning_rate": 3.0720294264079294e-05, "loss": 2.068, "step": 13326000 }, { "epoch": 38.57, "learning_rate": 3.0719570616432016e-05, "loss": 2.0844, "step": 13326500 }, { "epoch": 38.58, "learning_rate": 3.071884696878474e-05, "loss": 2.075, "step": 13327000 }, { "epoch": 38.58, "learning_rate": 3.071812332113746e-05, "loss": 2.0452, "step": 13327500 }, { "epoch": 38.58, "learning_rate": 3.071739967349018e-05, "loss": 2.042, "step": 13328000 }, { "epoch": 38.58, "learning_rate": 3.0716676025842905e-05, "loss": 2.0775, "step": 13328500 }, { "epoch": 38.58, "learning_rate": 3.071595382549092e-05, "loss": 2.0476, "step": 13329000 }, { "epoch": 38.58, "learning_rate": 3.071523017784364e-05, "loss": 2.0604, "step": 13329500 }, { "epoch": 38.58, "learning_rate": 3.0714506530196365e-05, "loss": 2.0683, "step": 13330000 }, { "epoch": 38.59, "learning_rate": 3.0713782882549094e-05, "loss": 2.0844, "step": 13330500 }, { "epoch": 38.59, "learning_rate": 3.0713059234901817e-05, "loss": 2.0661, "step": 13331000 }, { "epoch": 38.59, "learning_rate": 3.0712335587254546e-05, "loss": 2.0787, "step": 13331500 }, { "epoch": 38.59, "learning_rate": 3.071161338690256e-05, "loss": 2.0716, "step": 13332000 }, { "epoch": 38.59, "learning_rate": 3.071089118655058e-05, "loss": 2.0437, "step": 13332500 }, { "epoch": 38.59, "learning_rate": 3.07101675389033e-05, "loss": 2.0636, "step": 13333000 }, { "epoch": 38.6, "learning_rate": 3.070944389125602e-05, "loss": 2.0619, "step": 13333500 }, { "epoch": 38.6, "learning_rate": 3.070872024360874e-05, "loss": 2.0577, "step": 13334000 }, { "epoch": 38.6, "learning_rate": 3.070799659596147e-05, "loss": 2.0594, "step": 13334500 }, { "epoch": 38.6, "learning_rate": 3.0707272948314195e-05, "loss": 2.0463, "step": 13335000 }, { "epoch": 38.6, "learning_rate": 3.070654930066692e-05, "loss": 2.0843, "step": 13335500 }, { "epoch": 38.6, "learning_rate": 3.070582565301964e-05, "loss": 2.0713, "step": 13336000 }, { "epoch": 38.6, "learning_rate": 3.070510200537236e-05, "loss": 2.0505, "step": 13336500 }, { "epoch": 38.61, "learning_rate": 3.0704378357725084e-05, "loss": 2.0519, "step": 13337000 }, { "epoch": 38.61, "learning_rate": 3.0703654710077806e-05, "loss": 2.0589, "step": 13337500 }, { "epoch": 38.61, "learning_rate": 3.070293106243053e-05, "loss": 2.0445, "step": 13338000 }, { "epoch": 38.61, "learning_rate": 3.070220741478326e-05, "loss": 2.0442, "step": 13338500 }, { "epoch": 38.61, "learning_rate": 3.070148376713598e-05, "loss": 2.0602, "step": 13339000 }, { "epoch": 38.61, "learning_rate": 3.07007601194887e-05, "loss": 2.068, "step": 13339500 }, { "epoch": 38.61, "learning_rate": 3.0700036471841424e-05, "loss": 2.0673, "step": 13340000 }, { "epoch": 38.62, "learning_rate": 3.0699314271489446e-05, "loss": 2.0824, "step": 13340500 }, { "epoch": 38.62, "learning_rate": 3.069859062384217e-05, "loss": 2.0991, "step": 13341000 }, { "epoch": 38.62, "learning_rate": 3.069786697619489e-05, "loss": 2.0638, "step": 13341500 }, { "epoch": 38.62, "learning_rate": 3.069714332854761e-05, "loss": 2.0481, "step": 13342000 }, { "epoch": 38.62, "learning_rate": 3.0696419680900335e-05, "loss": 2.0697, "step": 13342500 }, { "epoch": 38.62, "learning_rate": 3.069569603325306e-05, "loss": 2.0409, "step": 13343000 }, { "epoch": 38.62, "learning_rate": 3.069497238560578e-05, "loss": 2.0568, "step": 13343500 }, { "epoch": 38.63, "learning_rate": 3.06942487379585e-05, "loss": 2.0711, "step": 13344000 }, { "epoch": 38.63, "learning_rate": 3.0693525090311224e-05, "loss": 2.0633, "step": 13344500 }, { "epoch": 38.63, "learning_rate": 3.0692801442663946e-05, "loss": 2.0586, "step": 13345000 }, { "epoch": 38.63, "learning_rate": 3.069207779501667e-05, "loss": 2.0658, "step": 13345500 }, { "epoch": 38.63, "learning_rate": 3.06913555946647e-05, "loss": 2.0636, "step": 13346000 }, { "epoch": 38.63, "learning_rate": 3.069063194701742e-05, "loss": 2.0782, "step": 13346500 }, { "epoch": 38.63, "learning_rate": 3.068990829937014e-05, "loss": 2.0658, "step": 13347000 }, { "epoch": 38.64, "learning_rate": 3.0689184651722864e-05, "loss": 2.0522, "step": 13347500 }, { "epoch": 38.64, "learning_rate": 3.0688461004075586e-05, "loss": 2.0667, "step": 13348000 }, { "epoch": 38.64, "learning_rate": 3.068773735642831e-05, "loss": 2.0709, "step": 13348500 }, { "epoch": 38.64, "learning_rate": 3.068701370878103e-05, "loss": 2.0724, "step": 13349000 }, { "epoch": 38.64, "learning_rate": 3.068629006113375e-05, "loss": 2.0489, "step": 13349500 }, { "epoch": 38.64, "learning_rate": 3.068556786078177e-05, "loss": 2.0258, "step": 13350000 }, { "epoch": 38.64, "learning_rate": 3.068484566042979e-05, "loss": 2.072, "step": 13350500 }, { "epoch": 38.65, "learning_rate": 3.068412201278251e-05, "loss": 2.0442, "step": 13351000 }, { "epoch": 38.65, "learning_rate": 3.0683398365135235e-05, "loss": 2.08, "step": 13351500 }, { "epoch": 38.65, "learning_rate": 3.068267471748796e-05, "loss": 2.0274, "step": 13352000 }, { "epoch": 38.65, "learning_rate": 3.068195106984068e-05, "loss": 2.0581, "step": 13352500 }, { "epoch": 38.65, "learning_rate": 3.06812274221934e-05, "loss": 2.0515, "step": 13353000 }, { "epoch": 38.65, "learning_rate": 3.0680505221841425e-05, "loss": 2.0643, "step": 13353500 }, { "epoch": 38.65, "learning_rate": 3.067978157419415e-05, "loss": 2.0556, "step": 13354000 }, { "epoch": 38.66, "learning_rate": 3.0679057926546876e-05, "loss": 2.0379, "step": 13354500 }, { "epoch": 38.66, "learning_rate": 3.06783342788996e-05, "loss": 2.0551, "step": 13355000 }, { "epoch": 38.66, "learning_rate": 3.067761063125232e-05, "loss": 2.0355, "step": 13355500 }, { "epoch": 38.66, "learning_rate": 3.067688698360504e-05, "loss": 2.0577, "step": 13356000 }, { "epoch": 38.66, "learning_rate": 3.0676163335957765e-05, "loss": 2.0598, "step": 13356500 }, { "epoch": 38.66, "learning_rate": 3.067543968831049e-05, "loss": 2.0515, "step": 13357000 }, { "epoch": 38.66, "learning_rate": 3.067471604066321e-05, "loss": 2.0491, "step": 13357500 }, { "epoch": 38.67, "learning_rate": 3.0673993840311225e-05, "loss": 2.0634, "step": 13358000 }, { "epoch": 38.67, "learning_rate": 3.067327163995925e-05, "loss": 2.0648, "step": 13358500 }, { "epoch": 38.67, "learning_rate": 3.067254799231197e-05, "loss": 2.0815, "step": 13359000 }, { "epoch": 38.67, "learning_rate": 3.067182434466469e-05, "loss": 2.065, "step": 13359500 }, { "epoch": 38.67, "learning_rate": 3.0671100697017414e-05, "loss": 2.0336, "step": 13360000 }, { "epoch": 38.67, "learning_rate": 3.067037849666543e-05, "loss": 2.0702, "step": 13360500 }, { "epoch": 38.67, "learning_rate": 3.066965484901816e-05, "loss": 2.0538, "step": 13361000 }, { "epoch": 38.68, "learning_rate": 3.066893120137088e-05, "loss": 2.0539, "step": 13361500 }, { "epoch": 38.68, "learning_rate": 3.06682075537236e-05, "loss": 2.0563, "step": 13362000 }, { "epoch": 38.68, "learning_rate": 3.0667483906076325e-05, "loss": 2.0446, "step": 13362500 }, { "epoch": 38.68, "learning_rate": 3.066676025842905e-05, "loss": 2.0498, "step": 13363000 }, { "epoch": 38.68, "learning_rate": 3.0666036610781776e-05, "loss": 2.0612, "step": 13363500 }, { "epoch": 38.68, "learning_rate": 3.06653129631345e-05, "loss": 2.0389, "step": 13364000 }, { "epoch": 38.68, "learning_rate": 3.066458931548722e-05, "loss": 2.0357, "step": 13364500 }, { "epoch": 38.69, "learning_rate": 3.066386566783994e-05, "loss": 2.0613, "step": 13365000 }, { "epoch": 38.69, "learning_rate": 3.066314491478325e-05, "loss": 2.0441, "step": 13365500 }, { "epoch": 38.69, "learning_rate": 3.0662421267135974e-05, "loss": 2.073, "step": 13366000 }, { "epoch": 38.69, "learning_rate": 3.0661697619488696e-05, "loss": 2.052, "step": 13366500 }, { "epoch": 38.69, "learning_rate": 3.0660973971841425e-05, "loss": 2.0683, "step": 13367000 }, { "epoch": 38.69, "learning_rate": 3.066025032419415e-05, "loss": 2.0435, "step": 13367500 }, { "epoch": 38.69, "learning_rate": 3.065952667654687e-05, "loss": 2.0527, "step": 13368000 }, { "epoch": 38.7, "learning_rate": 3.065880447619489e-05, "loss": 2.0305, "step": 13368500 }, { "epoch": 38.7, "learning_rate": 3.0658080828547614e-05, "loss": 2.0688, "step": 13369000 }, { "epoch": 38.7, "learning_rate": 3.0657357180900337e-05, "loss": 2.0767, "step": 13369500 }, { "epoch": 38.7, "learning_rate": 3.065663498054835e-05, "loss": 2.0627, "step": 13370000 }, { "epoch": 38.7, "learning_rate": 3.0655911332901074e-05, "loss": 2.0416, "step": 13370500 }, { "epoch": 38.7, "learning_rate": 3.06551876852538e-05, "loss": 2.0779, "step": 13371000 }, { "epoch": 38.71, "learning_rate": 3.0654464037606526e-05, "loss": 2.0519, "step": 13371500 }, { "epoch": 38.71, "learning_rate": 3.065374038995925e-05, "loss": 2.0567, "step": 13372000 }, { "epoch": 38.71, "learning_rate": 3.065301674231197e-05, "loss": 2.066, "step": 13372500 }, { "epoch": 38.71, "learning_rate": 3.065229309466469e-05, "loss": 2.0439, "step": 13373000 }, { "epoch": 38.71, "learning_rate": 3.0651569447017415e-05, "loss": 2.0605, "step": 13373500 }, { "epoch": 38.71, "learning_rate": 3.065084579937014e-05, "loss": 2.047, "step": 13374000 }, { "epoch": 38.71, "learning_rate": 3.065012215172286e-05, "loss": 2.0521, "step": 13374500 }, { "epoch": 38.72, "learning_rate": 3.064939850407558e-05, "loss": 2.0632, "step": 13375000 }, { "epoch": 38.72, "learning_rate": 3.0648676303723604e-05, "loss": 2.0725, "step": 13375500 }, { "epoch": 38.72, "learning_rate": 3.0647952656076326e-05, "loss": 2.0763, "step": 13376000 }, { "epoch": 38.72, "learning_rate": 3.064722900842905e-05, "loss": 2.0506, "step": 13376500 }, { "epoch": 38.72, "learning_rate": 3.064650536078178e-05, "loss": 2.0585, "step": 13377000 }, { "epoch": 38.72, "learning_rate": 3.06457817131345e-05, "loss": 2.032, "step": 13377500 }, { "epoch": 38.72, "learning_rate": 3.064505806548722e-05, "loss": 2.0603, "step": 13378000 }, { "epoch": 38.73, "learning_rate": 3.0644334417839944e-05, "loss": 2.0777, "step": 13378500 }, { "epoch": 38.73, "learning_rate": 3.0643610770192666e-05, "loss": 2.0752, "step": 13379000 }, { "epoch": 38.73, "learning_rate": 3.064288712254539e-05, "loss": 2.0585, "step": 13379500 }, { "epoch": 38.73, "learning_rate": 3.0642164922193404e-05, "loss": 2.0523, "step": 13380000 }, { "epoch": 38.73, "learning_rate": 3.0641441274546126e-05, "loss": 2.0607, "step": 13380500 }, { "epoch": 38.73, "learning_rate": 3.064071762689885e-05, "loss": 2.0686, "step": 13381000 }, { "epoch": 38.73, "learning_rate": 3.063999397925158e-05, "loss": 2.0713, "step": 13381500 }, { "epoch": 38.74, "learning_rate": 3.06392703316043e-05, "loss": 2.0735, "step": 13382000 }, { "epoch": 38.74, "learning_rate": 3.063854668395702e-05, "loss": 2.066, "step": 13382500 }, { "epoch": 38.74, "learning_rate": 3.063782303630975e-05, "loss": 2.0782, "step": 13383000 }, { "epoch": 38.74, "learning_rate": 3.0637100835957766e-05, "loss": 2.0647, "step": 13383500 }, { "epoch": 38.74, "learning_rate": 3.063637718831049e-05, "loss": 2.0673, "step": 13384000 }, { "epoch": 38.74, "learning_rate": 3.063565354066321e-05, "loss": 2.0723, "step": 13384500 }, { "epoch": 38.74, "learning_rate": 3.063492989301593e-05, "loss": 2.0579, "step": 13385000 }, { "epoch": 38.75, "learning_rate": 3.0634206245368655e-05, "loss": 2.0686, "step": 13385500 }, { "epoch": 38.75, "learning_rate": 3.063348259772138e-05, "loss": 2.0435, "step": 13386000 }, { "epoch": 38.75, "learning_rate": 3.06327589500741e-05, "loss": 2.0718, "step": 13386500 }, { "epoch": 38.75, "learning_rate": 3.063203530242683e-05, "loss": 2.0637, "step": 13387000 }, { "epoch": 38.75, "learning_rate": 3.0631313102074844e-05, "loss": 2.0764, "step": 13387500 }, { "epoch": 38.75, "learning_rate": 3.063059234901815e-05, "loss": 2.0644, "step": 13388000 }, { "epoch": 38.75, "learning_rate": 3.0629868701370875e-05, "loss": 2.0564, "step": 13388500 }, { "epoch": 38.76, "learning_rate": 3.06291465010189e-05, "loss": 2.0602, "step": 13389000 }, { "epoch": 38.76, "learning_rate": 3.062842285337162e-05, "loss": 2.0628, "step": 13389500 }, { "epoch": 38.76, "learning_rate": 3.062769920572434e-05, "loss": 2.0628, "step": 13390000 }, { "epoch": 38.76, "learning_rate": 3.0626975558077064e-05, "loss": 2.0665, "step": 13390500 }, { "epoch": 38.76, "learning_rate": 3.0626251910429793e-05, "loss": 2.067, "step": 13391000 }, { "epoch": 38.76, "learning_rate": 3.0625528262782516e-05, "loss": 2.0661, "step": 13391500 }, { "epoch": 38.76, "learning_rate": 3.062480461513524e-05, "loss": 2.0535, "step": 13392000 }, { "epoch": 38.77, "learning_rate": 3.062408096748796e-05, "loss": 2.0737, "step": 13392500 }, { "epoch": 38.77, "learning_rate": 3.062335731984068e-05, "loss": 2.1037, "step": 13393000 }, { "epoch": 38.77, "learning_rate": 3.0622633672193405e-05, "loss": 2.0591, "step": 13393500 }, { "epoch": 38.77, "learning_rate": 3.062191002454613e-05, "loss": 2.0623, "step": 13394000 }, { "epoch": 38.77, "learning_rate": 3.0621186376898856e-05, "loss": 2.0556, "step": 13394500 }, { "epoch": 38.77, "learning_rate": 3.062046272925158e-05, "loss": 2.0579, "step": 13395000 }, { "epoch": 38.77, "learning_rate": 3.06197390816043e-05, "loss": 2.0707, "step": 13395500 }, { "epoch": 38.78, "learning_rate": 3.061901543395702e-05, "loss": 2.0692, "step": 13396000 }, { "epoch": 38.78, "learning_rate": 3.0618291786309745e-05, "loss": 2.0577, "step": 13396500 }, { "epoch": 38.78, "learning_rate": 3.061756813866247e-05, "loss": 2.0556, "step": 13397000 }, { "epoch": 38.78, "learning_rate": 3.061684449101519e-05, "loss": 2.0594, "step": 13397500 }, { "epoch": 38.78, "learning_rate": 3.061612084336792e-05, "loss": 2.0434, "step": 13398000 }, { "epoch": 38.78, "learning_rate": 3.061539719572064e-05, "loss": 2.0713, "step": 13398500 }, { "epoch": 38.78, "learning_rate": 3.0614674995368656e-05, "loss": 2.0428, "step": 13399000 }, { "epoch": 38.79, "learning_rate": 3.061395279501668e-05, "loss": 2.0674, "step": 13399500 }, { "epoch": 38.79, "learning_rate": 3.06132291473694e-05, "loss": 2.0491, "step": 13400000 }, { "epoch": 38.79, "learning_rate": 3.0612506947017416e-05, "loss": 2.0754, "step": 13400500 }, { "epoch": 38.79, "learning_rate": 3.061178329937014e-05, "loss": 2.046, "step": 13401000 }, { "epoch": 38.79, "learning_rate": 3.061105965172286e-05, "loss": 2.0644, "step": 13401500 }, { "epoch": 38.79, "learning_rate": 3.061033600407558e-05, "loss": 2.0762, "step": 13402000 }, { "epoch": 38.79, "learning_rate": 3.0609612356428305e-05, "loss": 2.0738, "step": 13402500 }, { "epoch": 38.8, "learning_rate": 3.060888870878103e-05, "loss": 2.0544, "step": 13403000 }, { "epoch": 38.8, "learning_rate": 3.0608165061133756e-05, "loss": 2.0888, "step": 13403500 }, { "epoch": 38.8, "learning_rate": 3.060744141348648e-05, "loss": 2.0842, "step": 13404000 }, { "epoch": 38.8, "learning_rate": 3.0606719213134494e-05, "loss": 2.0739, "step": 13404500 }, { "epoch": 38.8, "learning_rate": 3.0605995565487216e-05, "loss": 2.0733, "step": 13405000 }, { "epoch": 38.8, "learning_rate": 3.060527191783994e-05, "loss": 2.0834, "step": 13405500 }, { "epoch": 38.8, "learning_rate": 3.060454827019267e-05, "loss": 2.0404, "step": 13406000 }, { "epoch": 38.81, "learning_rate": 3.060382462254539e-05, "loss": 2.061, "step": 13406500 }, { "epoch": 38.81, "learning_rate": 3.060310097489811e-05, "loss": 2.0695, "step": 13407000 }, { "epoch": 38.81, "learning_rate": 3.0602377327250834e-05, "loss": 2.0769, "step": 13407500 }, { "epoch": 38.81, "learning_rate": 3.0601653679603557e-05, "loss": 2.055, "step": 13408000 }, { "epoch": 38.81, "learning_rate": 3.060093003195628e-05, "loss": 2.065, "step": 13408500 }, { "epoch": 38.81, "learning_rate": 3.060020638430901e-05, "loss": 2.0527, "step": 13409000 }, { "epoch": 38.82, "learning_rate": 3.059948273666173e-05, "loss": 2.0642, "step": 13409500 }, { "epoch": 38.82, "learning_rate": 3.0598760536309746e-05, "loss": 2.0539, "step": 13410000 }, { "epoch": 38.82, "learning_rate": 3.059803688866247e-05, "loss": 2.0639, "step": 13410500 }, { "epoch": 38.82, "learning_rate": 3.059731324101519e-05, "loss": 2.0612, "step": 13411000 }, { "epoch": 38.82, "learning_rate": 3.0596591040663206e-05, "loss": 2.0314, "step": 13411500 }, { "epoch": 38.82, "learning_rate": 3.059586739301593e-05, "loss": 2.0619, "step": 13412000 }, { "epoch": 38.82, "learning_rate": 3.059514374536866e-05, "loss": 2.0707, "step": 13412500 }, { "epoch": 38.83, "learning_rate": 3.059442009772138e-05, "loss": 2.0615, "step": 13413000 }, { "epoch": 38.83, "learning_rate": 3.059369645007411e-05, "loss": 2.0606, "step": 13413500 }, { "epoch": 38.83, "learning_rate": 3.059297280242683e-05, "loss": 2.0612, "step": 13414000 }, { "epoch": 38.83, "learning_rate": 3.059224915477955e-05, "loss": 2.0758, "step": 13414500 }, { "epoch": 38.83, "learning_rate": 3.0591525507132275e-05, "loss": 2.0699, "step": 13415000 }, { "epoch": 38.83, "learning_rate": 3.059080330678029e-05, "loss": 2.06, "step": 13415500 }, { "epoch": 38.83, "learning_rate": 3.059007965913301e-05, "loss": 2.0431, "step": 13416000 }, { "epoch": 38.84, "learning_rate": 3.0589357458781035e-05, "loss": 2.0779, "step": 13416500 }, { "epoch": 38.84, "learning_rate": 3.058863381113376e-05, "loss": 2.0723, "step": 13417000 }, { "epoch": 38.84, "learning_rate": 3.058791016348648e-05, "loss": 2.0556, "step": 13417500 }, { "epoch": 38.84, "learning_rate": 3.05871865158392e-05, "loss": 2.0642, "step": 13418000 }, { "epoch": 38.84, "learning_rate": 3.0586462868191924e-05, "loss": 2.0555, "step": 13418500 }, { "epoch": 38.84, "learning_rate": 3.0585739220544646e-05, "loss": 2.0786, "step": 13419000 }, { "epoch": 38.84, "learning_rate": 3.058501702019266e-05, "loss": 2.0398, "step": 13419500 }, { "epoch": 38.85, "learning_rate": 3.0584293372545384e-05, "loss": 2.0641, "step": 13420000 }, { "epoch": 38.85, "learning_rate": 3.058356972489811e-05, "loss": 2.0487, "step": 13420500 }, { "epoch": 38.85, "learning_rate": 3.0582846077250835e-05, "loss": 2.0748, "step": 13421000 }, { "epoch": 38.85, "learning_rate": 3.058212387689886e-05, "loss": 2.0593, "step": 13421500 }, { "epoch": 38.85, "learning_rate": 3.058140022925158e-05, "loss": 2.0644, "step": 13422000 }, { "epoch": 38.85, "learning_rate": 3.0580678028899595e-05, "loss": 2.0489, "step": 13422500 }, { "epoch": 38.85, "learning_rate": 3.057995438125232e-05, "loss": 2.0609, "step": 13423000 }, { "epoch": 38.86, "learning_rate": 3.057923073360504e-05, "loss": 2.0646, "step": 13423500 }, { "epoch": 38.86, "learning_rate": 3.0578508533253055e-05, "loss": 2.0458, "step": 13424000 }, { "epoch": 38.86, "learning_rate": 3.0577784885605784e-05, "loss": 2.0459, "step": 13424500 }, { "epoch": 38.86, "learning_rate": 3.0577061237958506e-05, "loss": 2.0729, "step": 13425000 }, { "epoch": 38.86, "learning_rate": 3.057633759031123e-05, "loss": 2.0748, "step": 13425500 }, { "epoch": 38.86, "learning_rate": 3.057561394266395e-05, "loss": 2.0495, "step": 13426000 }, { "epoch": 38.86, "learning_rate": 3.057489029501667e-05, "loss": 2.0574, "step": 13426500 }, { "epoch": 38.87, "learning_rate": 3.0574166647369395e-05, "loss": 2.0593, "step": 13427000 }, { "epoch": 38.87, "learning_rate": 3.057344299972212e-05, "loss": 2.0513, "step": 13427500 }, { "epoch": 38.87, "learning_rate": 3.057271935207485e-05, "loss": 2.0602, "step": 13428000 }, { "epoch": 38.87, "learning_rate": 3.057199570442757e-05, "loss": 2.1, "step": 13428500 }, { "epoch": 38.87, "learning_rate": 3.057127205678029e-05, "loss": 2.0673, "step": 13429000 }, { "epoch": 38.87, "learning_rate": 3.057054985642831e-05, "loss": 2.0414, "step": 13429500 }, { "epoch": 38.87, "learning_rate": 3.0569826208781036e-05, "loss": 2.0775, "step": 13430000 }, { "epoch": 38.88, "learning_rate": 3.056910400842905e-05, "loss": 2.0351, "step": 13430500 }, { "epoch": 38.88, "learning_rate": 3.0568380360781773e-05, "loss": 2.0714, "step": 13431000 }, { "epoch": 38.88, "learning_rate": 3.0567656713134496e-05, "loss": 2.0858, "step": 13431500 }, { "epoch": 38.88, "learning_rate": 3.056693306548722e-05, "loss": 2.0657, "step": 13432000 }, { "epoch": 38.88, "learning_rate": 3.056620941783994e-05, "loss": 2.0344, "step": 13432500 }, { "epoch": 38.88, "learning_rate": 3.056548577019266e-05, "loss": 2.0396, "step": 13433000 }, { "epoch": 38.88, "learning_rate": 3.0564762122545385e-05, "loss": 2.0637, "step": 13433500 }, { "epoch": 38.89, "learning_rate": 3.056403992219341e-05, "loss": 2.0691, "step": 13434000 }, { "epoch": 38.89, "learning_rate": 3.056331627454613e-05, "loss": 2.0481, "step": 13434500 }, { "epoch": 38.89, "learning_rate": 3.056259262689885e-05, "loss": 2.068, "step": 13435000 }, { "epoch": 38.89, "learning_rate": 3.056186897925158e-05, "loss": 2.0777, "step": 13435500 }, { "epoch": 38.89, "learning_rate": 3.05611453316043e-05, "loss": 2.0628, "step": 13436000 }, { "epoch": 38.89, "learning_rate": 3.0560421683957025e-05, "loss": 2.0459, "step": 13436500 }, { "epoch": 38.89, "learning_rate": 3.055969803630975e-05, "loss": 2.0672, "step": 13437000 }, { "epoch": 38.9, "learning_rate": 3.055897438866247e-05, "loss": 2.0674, "step": 13437500 }, { "epoch": 38.9, "learning_rate": 3.055825074101519e-05, "loss": 2.0755, "step": 13438000 }, { "epoch": 38.9, "learning_rate": 3.0557527093367914e-05, "loss": 2.0703, "step": 13438500 }, { "epoch": 38.9, "learning_rate": 3.0556803445720636e-05, "loss": 2.0767, "step": 13439000 }, { "epoch": 38.9, "learning_rate": 3.055607979807336e-05, "loss": 2.0619, "step": 13439500 }, { "epoch": 38.9, "learning_rate": 3.055535615042609e-05, "loss": 2.058, "step": 13440000 }, { "epoch": 38.9, "learning_rate": 3.055463250277881e-05, "loss": 2.0372, "step": 13440500 }, { "epoch": 38.91, "learning_rate": 3.055390885513153e-05, "loss": 2.0554, "step": 13441000 }, { "epoch": 38.91, "learning_rate": 3.0553185207484254e-05, "loss": 2.0502, "step": 13441500 }, { "epoch": 38.91, "learning_rate": 3.0552461559836976e-05, "loss": 2.066, "step": 13442000 }, { "epoch": 38.91, "learning_rate": 3.055173935948499e-05, "loss": 2.0376, "step": 13442500 }, { "epoch": 38.91, "learning_rate": 3.055101571183772e-05, "loss": 2.0557, "step": 13443000 }, { "epoch": 38.91, "learning_rate": 3.055029206419044e-05, "loss": 2.0859, "step": 13443500 }, { "epoch": 38.91, "learning_rate": 3.0549568416543165e-05, "loss": 2.0752, "step": 13444000 }, { "epoch": 38.92, "learning_rate": 3.054884476889589e-05, "loss": 2.0856, "step": 13444500 }, { "epoch": 38.92, "learning_rate": 3.054812112124861e-05, "loss": 2.0807, "step": 13445000 }, { "epoch": 38.92, "learning_rate": 3.054739892089663e-05, "loss": 2.0709, "step": 13445500 }, { "epoch": 38.92, "learning_rate": 3.0546675273249354e-05, "loss": 2.0738, "step": 13446000 }, { "epoch": 38.92, "learning_rate": 3.054595307289737e-05, "loss": 2.0543, "step": 13446500 }, { "epoch": 38.92, "learning_rate": 3.054522942525009e-05, "loss": 2.0774, "step": 13447000 }, { "epoch": 38.93, "learning_rate": 3.0544505777602814e-05, "loss": 2.0466, "step": 13447500 }, { "epoch": 38.93, "learning_rate": 3.0543782129955537e-05, "loss": 2.0429, "step": 13448000 }, { "epoch": 38.93, "learning_rate": 3.054305848230826e-05, "loss": 2.0772, "step": 13448500 }, { "epoch": 38.93, "learning_rate": 3.054233483466099e-05, "loss": 2.0737, "step": 13449000 }, { "epoch": 38.93, "learning_rate": 3.054161118701371e-05, "loss": 2.0603, "step": 13449500 }, { "epoch": 38.93, "learning_rate": 3.054088753936644e-05, "loss": 2.0852, "step": 13450000 }, { "epoch": 38.93, "learning_rate": 3.054016389171916e-05, "loss": 2.0691, "step": 13450500 }, { "epoch": 38.94, "learning_rate": 3.0539440244071884e-05, "loss": 2.0429, "step": 13451000 }, { "epoch": 38.94, "learning_rate": 3.05387180437199e-05, "loss": 2.0535, "step": 13451500 }, { "epoch": 38.94, "learning_rate": 3.053799439607262e-05, "loss": 2.0606, "step": 13452000 }, { "epoch": 38.94, "learning_rate": 3.0537270748425344e-05, "loss": 2.0684, "step": 13452500 }, { "epoch": 38.94, "learning_rate": 3.0536547100778066e-05, "loss": 2.0523, "step": 13453000 }, { "epoch": 38.94, "learning_rate": 3.053582490042609e-05, "loss": 2.0418, "step": 13453500 }, { "epoch": 38.94, "learning_rate": 3.0535102700074104e-05, "loss": 2.0794, "step": 13454000 }, { "epoch": 38.95, "learning_rate": 3.0534379052426826e-05, "loss": 2.0526, "step": 13454500 }, { "epoch": 38.95, "learning_rate": 3.053365540477955e-05, "loss": 2.0535, "step": 13455000 }, { "epoch": 38.95, "learning_rate": 3.053293175713227e-05, "loss": 2.0471, "step": 13455500 }, { "epoch": 38.95, "learning_rate": 3.053220810948499e-05, "loss": 2.0699, "step": 13456000 }, { "epoch": 38.95, "learning_rate": 3.0531484461837715e-05, "loss": 2.0878, "step": 13456500 }, { "epoch": 38.95, "learning_rate": 3.053076081419044e-05, "loss": 2.0693, "step": 13457000 }, { "epoch": 38.95, "learning_rate": 3.0530037166543166e-05, "loss": 2.0726, "step": 13457500 }, { "epoch": 38.96, "learning_rate": 3.052931496619119e-05, "loss": 2.056, "step": 13458000 }, { "epoch": 38.96, "learning_rate": 3.052859131854391e-05, "loss": 2.0871, "step": 13458500 }, { "epoch": 38.96, "learning_rate": 3.0527869118191926e-05, "loss": 2.0695, "step": 13459000 }, { "epoch": 38.96, "learning_rate": 3.052714547054465e-05, "loss": 2.0558, "step": 13459500 }, { "epoch": 38.96, "learning_rate": 3.052642182289737e-05, "loss": 2.0603, "step": 13460000 }, { "epoch": 38.96, "learning_rate": 3.052569817525009e-05, "loss": 2.0449, "step": 13460500 }, { "epoch": 38.96, "learning_rate": 3.0524974527602815e-05, "loss": 2.0627, "step": 13461000 }, { "epoch": 38.97, "learning_rate": 3.052425087995554e-05, "loss": 2.0594, "step": 13461500 }, { "epoch": 38.97, "learning_rate": 3.052352867960356e-05, "loss": 2.0689, "step": 13462000 }, { "epoch": 38.97, "learning_rate": 3.0522806479251575e-05, "loss": 2.083, "step": 13462500 }, { "epoch": 38.97, "learning_rate": 3.05220828316043e-05, "loss": 2.0561, "step": 13463000 }, { "epoch": 38.97, "learning_rate": 3.052135918395702e-05, "loss": 2.0645, "step": 13463500 }, { "epoch": 38.97, "learning_rate": 3.052063553630974e-05, "loss": 2.0761, "step": 13464000 }, { "epoch": 38.97, "learning_rate": 3.0519911888662464e-05, "loss": 2.0684, "step": 13464500 }, { "epoch": 38.98, "learning_rate": 3.0519188241015186e-05, "loss": 2.0678, "step": 13465000 }, { "epoch": 38.98, "learning_rate": 3.0518464593367915e-05, "loss": 2.0667, "step": 13465500 }, { "epoch": 38.98, "learning_rate": 3.051774094572064e-05, "loss": 2.0659, "step": 13466000 }, { "epoch": 38.98, "learning_rate": 3.0517017298073363e-05, "loss": 2.0566, "step": 13466500 }, { "epoch": 38.98, "learning_rate": 3.0516293650426086e-05, "loss": 2.0433, "step": 13467000 }, { "epoch": 38.98, "learning_rate": 3.0515571450074104e-05, "loss": 2.0649, "step": 13467500 }, { "epoch": 38.98, "learning_rate": 3.0514847802426827e-05, "loss": 2.0619, "step": 13468000 }, { "epoch": 38.99, "learning_rate": 3.051412415477955e-05, "loss": 2.0805, "step": 13468500 }, { "epoch": 38.99, "learning_rate": 3.0513401954427568e-05, "loss": 2.065, "step": 13469000 }, { "epoch": 38.99, "learning_rate": 3.051267830678029e-05, "loss": 2.0444, "step": 13469500 }, { "epoch": 38.99, "learning_rate": 3.0511954659133012e-05, "loss": 2.0311, "step": 13470000 }, { "epoch": 38.99, "learning_rate": 3.0511231011485735e-05, "loss": 2.069, "step": 13470500 }, { "epoch": 38.99, "learning_rate": 3.051050736383846e-05, "loss": 2.0379, "step": 13471000 }, { "epoch": 38.99, "learning_rate": 3.0509785163486476e-05, "loss": 2.0784, "step": 13471500 }, { "epoch": 39.0, "learning_rate": 3.0509061515839198e-05, "loss": 2.074, "step": 13472000 }, { "epoch": 39.0, "learning_rate": 3.050833786819192e-05, "loss": 2.0628, "step": 13472500 }, { "epoch": 39.0, "learning_rate": 3.050761566783994e-05, "loss": 2.0457, "step": 13473000 }, { "epoch": 39.0, "eval_accuracy": 0.670671881118002, "eval_accuracy_mlm": 0.6356850853404965, "eval_accuracy_nsp": 0.8582585142511446, "eval_loss": 2.165445327758789, "eval_runtime": 331.7981, "eval_samples_per_second": 1315.216, "eval_steps_per_second": 54.801, "step": 13473408 }, { "epoch": 39.0, "learning_rate": 3.0506892020192668e-05, "loss": 2.067, "step": 13473500 }, { "epoch": 39.0, "learning_rate": 3.050616837254539e-05, "loss": 2.0359, "step": 13474000 }, { "epoch": 39.0, "learning_rate": 3.0505444724898113e-05, "loss": 2.0454, "step": 13474500 }, { "epoch": 39.0, "learning_rate": 3.0504721077250835e-05, "loss": 2.0357, "step": 13475000 }, { "epoch": 39.01, "learning_rate": 3.050399742960356e-05, "loss": 2.0514, "step": 13475500 }, { "epoch": 39.01, "learning_rate": 3.0503273781956283e-05, "loss": 2.0459, "step": 13476000 }, { "epoch": 39.01, "learning_rate": 3.0502550134309005e-05, "loss": 2.0284, "step": 13476500 }, { "epoch": 39.01, "learning_rate": 3.0501826486661727e-05, "loss": 2.0664, "step": 13477000 }, { "epoch": 39.01, "learning_rate": 3.050110283901445e-05, "loss": 2.0234, "step": 13477500 }, { "epoch": 39.01, "learning_rate": 3.050037919136717e-05, "loss": 2.0301, "step": 13478000 }, { "epoch": 39.01, "learning_rate": 3.0499655543719897e-05, "loss": 2.0341, "step": 13478500 }, { "epoch": 39.02, "learning_rate": 3.049893189607262e-05, "loss": 2.0509, "step": 13479000 }, { "epoch": 39.02, "learning_rate": 3.0498208248425342e-05, "loss": 2.0225, "step": 13479500 }, { "epoch": 39.02, "learning_rate": 3.049748604807336e-05, "loss": 2.0345, "step": 13480000 }, { "epoch": 39.02, "learning_rate": 3.0496762400426086e-05, "loss": 2.0305, "step": 13480500 }, { "epoch": 39.02, "learning_rate": 3.0496038752778812e-05, "loss": 2.0365, "step": 13481000 }, { "epoch": 39.02, "learning_rate": 3.0495315105131534e-05, "loss": 2.049, "step": 13481500 }, { "epoch": 39.02, "learning_rate": 3.0494591457484256e-05, "loss": 2.0357, "step": 13482000 }, { "epoch": 39.03, "learning_rate": 3.049386780983698e-05, "loss": 2.0503, "step": 13482500 }, { "epoch": 39.03, "learning_rate": 3.04931441621897e-05, "loss": 2.0263, "step": 13483000 }, { "epoch": 39.03, "learning_rate": 3.0492420514542423e-05, "loss": 2.0413, "step": 13483500 }, { "epoch": 39.03, "learning_rate": 3.0491698314190442e-05, "loss": 2.0307, "step": 13484000 }, { "epoch": 39.03, "learning_rate": 3.049097611383846e-05, "loss": 2.0425, "step": 13484500 }, { "epoch": 39.03, "learning_rate": 3.0490252466191183e-05, "loss": 2.0416, "step": 13485000 }, { "epoch": 39.04, "learning_rate": 3.0489528818543905e-05, "loss": 2.0431, "step": 13485500 }, { "epoch": 39.04, "learning_rate": 3.0488805170896628e-05, "loss": 2.0321, "step": 13486000 }, { "epoch": 39.04, "learning_rate": 3.048808152324935e-05, "loss": 2.0438, "step": 13486500 }, { "epoch": 39.04, "learning_rate": 3.0487357875602076e-05, "loss": 2.0413, "step": 13487000 }, { "epoch": 39.04, "learning_rate": 3.048663567525009e-05, "loss": 2.0389, "step": 13487500 }, { "epoch": 39.04, "learning_rate": 3.048591202760282e-05, "loss": 2.0395, "step": 13488000 }, { "epoch": 39.04, "learning_rate": 3.0485188379955542e-05, "loss": 2.0539, "step": 13488500 }, { "epoch": 39.05, "learning_rate": 3.0484464732308265e-05, "loss": 2.0372, "step": 13489000 }, { "epoch": 39.05, "learning_rate": 3.0483741084660987e-05, "loss": 2.0397, "step": 13489500 }, { "epoch": 39.05, "learning_rate": 3.0483017437013712e-05, "loss": 2.0421, "step": 13490000 }, { "epoch": 39.05, "learning_rate": 3.0482293789366435e-05, "loss": 2.0502, "step": 13490500 }, { "epoch": 39.05, "learning_rate": 3.0481570141719157e-05, "loss": 2.0402, "step": 13491000 }, { "epoch": 39.05, "learning_rate": 3.0480847941367176e-05, "loss": 2.0459, "step": 13491500 }, { "epoch": 39.05, "learning_rate": 3.0480124293719898e-05, "loss": 2.0406, "step": 13492000 }, { "epoch": 39.06, "learning_rate": 3.0479402093367914e-05, "loss": 2.0685, "step": 13492500 }, { "epoch": 39.06, "learning_rate": 3.047867844572064e-05, "loss": 2.037, "step": 13493000 }, { "epoch": 39.06, "learning_rate": 3.047795479807336e-05, "loss": 2.0194, "step": 13493500 }, { "epoch": 39.06, "learning_rate": 3.0477231150426084e-05, "loss": 2.0433, "step": 13494000 }, { "epoch": 39.06, "learning_rate": 3.0476507502778806e-05, "loss": 2.0437, "step": 13494500 }, { "epoch": 39.06, "learning_rate": 3.0475785302426825e-05, "loss": 2.0862, "step": 13495000 }, { "epoch": 39.06, "learning_rate": 3.047506165477955e-05, "loss": 2.0431, "step": 13495500 }, { "epoch": 39.07, "learning_rate": 3.047433945442757e-05, "loss": 2.0625, "step": 13496000 }, { "epoch": 39.07, "learning_rate": 3.0473615806780292e-05, "loss": 2.0451, "step": 13496500 }, { "epoch": 39.07, "learning_rate": 3.0472892159133014e-05, "loss": 2.0657, "step": 13497000 }, { "epoch": 39.07, "learning_rate": 3.047216851148574e-05, "loss": 2.062, "step": 13497500 }, { "epoch": 39.07, "learning_rate": 3.0471444863838462e-05, "loss": 2.0422, "step": 13498000 }, { "epoch": 39.07, "learning_rate": 3.0470721216191184e-05, "loss": 2.038, "step": 13498500 }, { "epoch": 39.07, "learning_rate": 3.0469997568543906e-05, "loss": 2.0319, "step": 13499000 }, { "epoch": 39.08, "learning_rate": 3.0469275368191925e-05, "loss": 2.0661, "step": 13499500 }, { "epoch": 39.08, "learning_rate": 3.0468551720544647e-05, "loss": 2.0323, "step": 13500000 }, { "epoch": 39.08, "learning_rate": 3.046782807289737e-05, "loss": 2.0137, "step": 13500500 }, { "epoch": 39.08, "learning_rate": 3.0467104425250092e-05, "loss": 2.0332, "step": 13501000 }, { "epoch": 39.08, "learning_rate": 3.0466380777602814e-05, "loss": 2.0544, "step": 13501500 }, { "epoch": 39.08, "learning_rate": 3.046565712995554e-05, "loss": 2.0399, "step": 13502000 }, { "epoch": 39.08, "learning_rate": 3.0464933482308262e-05, "loss": 2.029, "step": 13502500 }, { "epoch": 39.09, "learning_rate": 3.046420983466099e-05, "loss": 2.0543, "step": 13503000 }, { "epoch": 39.09, "learning_rate": 3.0463486187013713e-05, "loss": 2.0439, "step": 13503500 }, { "epoch": 39.09, "learning_rate": 3.0462762539366435e-05, "loss": 2.0558, "step": 13504000 }, { "epoch": 39.09, "learning_rate": 3.0462038891719158e-05, "loss": 2.0375, "step": 13504500 }, { "epoch": 39.09, "learning_rate": 3.0461316691367177e-05, "loss": 2.0737, "step": 13505000 }, { "epoch": 39.09, "learning_rate": 3.04605930437199e-05, "loss": 2.0258, "step": 13505500 }, { "epoch": 39.09, "learning_rate": 3.045986939607262e-05, "loss": 2.0499, "step": 13506000 }, { "epoch": 39.1, "learning_rate": 3.0459145748425343e-05, "loss": 2.0614, "step": 13506500 }, { "epoch": 39.1, "learning_rate": 3.0458422100778066e-05, "loss": 2.0475, "step": 13507000 }, { "epoch": 39.1, "learning_rate": 3.045769845313079e-05, "loss": 2.0333, "step": 13507500 }, { "epoch": 39.1, "learning_rate": 3.0456976252778807e-05, "loss": 2.0399, "step": 13508000 }, { "epoch": 39.1, "learning_rate": 3.0456254052426826e-05, "loss": 2.0643, "step": 13508500 }, { "epoch": 39.1, "learning_rate": 3.0455530404779548e-05, "loss": 2.0295, "step": 13509000 }, { "epoch": 39.1, "learning_rate": 3.045480675713227e-05, "loss": 2.0685, "step": 13509500 }, { "epoch": 39.11, "learning_rate": 3.0454083109484992e-05, "loss": 2.0386, "step": 13510000 }, { "epoch": 39.11, "learning_rate": 3.0453360909133015e-05, "loss": 2.0454, "step": 13510500 }, { "epoch": 39.11, "learning_rate": 3.045263726148574e-05, "loss": 2.0556, "step": 13511000 }, { "epoch": 39.11, "learning_rate": 3.0451913613838463e-05, "loss": 2.0348, "step": 13511500 }, { "epoch": 39.11, "learning_rate": 3.0451189966191185e-05, "loss": 2.0393, "step": 13512000 }, { "epoch": 39.11, "learning_rate": 3.0450466318543907e-05, "loss": 2.0387, "step": 13512500 }, { "epoch": 39.11, "learning_rate": 3.044974267089663e-05, "loss": 2.06, "step": 13513000 }, { "epoch": 39.12, "learning_rate": 3.0449019023249355e-05, "loss": 2.0253, "step": 13513500 }, { "epoch": 39.12, "learning_rate": 3.0448295375602077e-05, "loss": 2.0646, "step": 13514000 }, { "epoch": 39.12, "learning_rate": 3.04475717279548e-05, "loss": 2.0578, "step": 13514500 }, { "epoch": 39.12, "learning_rate": 3.044684808030752e-05, "loss": 2.031, "step": 13515000 }, { "epoch": 39.12, "learning_rate": 3.0446124432660244e-05, "loss": 2.0369, "step": 13515500 }, { "epoch": 39.12, "learning_rate": 3.0445400785012966e-05, "loss": 2.0339, "step": 13516000 }, { "epoch": 39.12, "learning_rate": 3.0444677137365692e-05, "loss": 2.0486, "step": 13516500 }, { "epoch": 39.13, "learning_rate": 3.0443954937013707e-05, "loss": 2.0413, "step": 13517000 }, { "epoch": 39.13, "learning_rate": 3.0443232736661726e-05, "loss": 2.0709, "step": 13517500 }, { "epoch": 39.13, "learning_rate": 3.044251053630975e-05, "loss": 2.0424, "step": 13518000 }, { "epoch": 39.13, "learning_rate": 3.044178688866247e-05, "loss": 2.0397, "step": 13518500 }, { "epoch": 39.13, "learning_rate": 3.0441063241015193e-05, "loss": 2.0279, "step": 13519000 }, { "epoch": 39.13, "learning_rate": 3.044033959336792e-05, "loss": 2.0272, "step": 13519500 }, { "epoch": 39.13, "learning_rate": 3.043961594572064e-05, "loss": 2.0383, "step": 13520000 }, { "epoch": 39.14, "learning_rate": 3.0438892298073363e-05, "loss": 2.0444, "step": 13520500 }, { "epoch": 39.14, "learning_rate": 3.0438168650426085e-05, "loss": 2.0415, "step": 13521000 }, { "epoch": 39.14, "learning_rate": 3.0437445002778808e-05, "loss": 2.0605, "step": 13521500 }, { "epoch": 39.14, "learning_rate": 3.043672135513153e-05, "loss": 2.0253, "step": 13522000 }, { "epoch": 39.14, "learning_rate": 3.0435997707484255e-05, "loss": 2.0229, "step": 13522500 }, { "epoch": 39.14, "learning_rate": 3.0435274059836978e-05, "loss": 2.0311, "step": 13523000 }, { "epoch": 39.14, "learning_rate": 3.04345504121897e-05, "loss": 2.0445, "step": 13523500 }, { "epoch": 39.15, "learning_rate": 3.0433826764542422e-05, "loss": 2.0631, "step": 13524000 }, { "epoch": 39.15, "learning_rate": 3.0433103116895144e-05, "loss": 2.036, "step": 13524500 }, { "epoch": 39.15, "learning_rate": 3.0432379469247873e-05, "loss": 2.0316, "step": 13525000 }, { "epoch": 39.15, "learning_rate": 3.0431655821600596e-05, "loss": 2.0671, "step": 13525500 }, { "epoch": 39.15, "learning_rate": 3.0430933621248615e-05, "loss": 2.0393, "step": 13526000 }, { "epoch": 39.15, "learning_rate": 3.0430209973601337e-05, "loss": 2.0367, "step": 13526500 }, { "epoch": 39.16, "learning_rate": 3.042948922054465e-05, "loss": 2.0505, "step": 13527000 }, { "epoch": 39.16, "learning_rate": 3.042876557289737e-05, "loss": 2.0367, "step": 13527500 }, { "epoch": 39.16, "learning_rate": 3.042804337254539e-05, "loss": 2.0679, "step": 13528000 }, { "epoch": 39.16, "learning_rate": 3.0427319724898112e-05, "loss": 2.0318, "step": 13528500 }, { "epoch": 39.16, "learning_rate": 3.0426596077250835e-05, "loss": 2.0507, "step": 13529000 }, { "epoch": 39.16, "learning_rate": 3.0425872429603557e-05, "loss": 2.0602, "step": 13529500 }, { "epoch": 39.16, "learning_rate": 3.042514878195628e-05, "loss": 2.0265, "step": 13530000 }, { "epoch": 39.17, "learning_rate": 3.0424426581604298e-05, "loss": 2.0723, "step": 13530500 }, { "epoch": 39.17, "learning_rate": 3.042370293395702e-05, "loss": 2.0363, "step": 13531000 }, { "epoch": 39.17, "learning_rate": 3.0422979286309743e-05, "loss": 2.0536, "step": 13531500 }, { "epoch": 39.17, "learning_rate": 3.0422255638662468e-05, "loss": 2.0248, "step": 13532000 }, { "epoch": 39.17, "learning_rate": 3.042153199101519e-05, "loss": 2.0305, "step": 13532500 }, { "epoch": 39.17, "learning_rate": 3.042080834336792e-05, "loss": 2.0487, "step": 13533000 }, { "epoch": 39.17, "learning_rate": 3.042008469572064e-05, "loss": 2.0231, "step": 13533500 }, { "epoch": 39.18, "learning_rate": 3.0419361048073364e-05, "loss": 2.0644, "step": 13534000 }, { "epoch": 39.18, "learning_rate": 3.0418637400426086e-05, "loss": 2.0479, "step": 13534500 }, { "epoch": 39.18, "learning_rate": 3.041791375277881e-05, "loss": 2.0669, "step": 13535000 }, { "epoch": 39.18, "learning_rate": 3.041719010513153e-05, "loss": 2.0484, "step": 13535500 }, { "epoch": 39.18, "learning_rate": 3.041646790477955e-05, "loss": 2.0485, "step": 13536000 }, { "epoch": 39.18, "learning_rate": 3.0415744257132272e-05, "loss": 2.0443, "step": 13536500 }, { "epoch": 39.18, "learning_rate": 3.0415020609484994e-05, "loss": 2.0371, "step": 13537000 }, { "epoch": 39.19, "learning_rate": 3.041429696183772e-05, "loss": 2.0536, "step": 13537500 }, { "epoch": 39.19, "learning_rate": 3.0413573314190442e-05, "loss": 2.0338, "step": 13538000 }, { "epoch": 39.19, "learning_rate": 3.0412849666543164e-05, "loss": 2.045, "step": 13538500 }, { "epoch": 39.19, "learning_rate": 3.0412127466191183e-05, "loss": 2.0274, "step": 13539000 }, { "epoch": 39.19, "learning_rate": 3.0411403818543905e-05, "loss": 2.0597, "step": 13539500 }, { "epoch": 39.19, "learning_rate": 3.0410680170896627e-05, "loss": 2.0434, "step": 13540000 }, { "epoch": 39.19, "learning_rate": 3.0409956523249356e-05, "loss": 2.0411, "step": 13540500 }, { "epoch": 39.2, "learning_rate": 3.040923287560208e-05, "loss": 2.0563, "step": 13541000 }, { "epoch": 39.2, "learning_rate": 3.04085092279548e-05, "loss": 2.0552, "step": 13541500 }, { "epoch": 39.2, "learning_rate": 3.0407785580307523e-05, "loss": 2.0558, "step": 13542000 }, { "epoch": 39.2, "learning_rate": 3.0407061932660245e-05, "loss": 2.0372, "step": 13542500 }, { "epoch": 39.2, "learning_rate": 3.040633828501297e-05, "loss": 2.0438, "step": 13543000 }, { "epoch": 39.2, "learning_rate": 3.0405614637365693e-05, "loss": 2.0439, "step": 13543500 }, { "epoch": 39.2, "learning_rate": 3.0404890989718416e-05, "loss": 2.0479, "step": 13544000 }, { "epoch": 39.21, "learning_rate": 3.0404168789366434e-05, "loss": 2.0549, "step": 13544500 }, { "epoch": 39.21, "learning_rate": 3.040344658901445e-05, "loss": 2.0674, "step": 13545000 }, { "epoch": 39.21, "learning_rate": 3.040272438866247e-05, "loss": 2.0384, "step": 13545500 }, { "epoch": 39.21, "learning_rate": 3.040200074101519e-05, "loss": 2.0668, "step": 13546000 }, { "epoch": 39.21, "learning_rate": 3.0401278540663207e-05, "loss": 2.0592, "step": 13546500 }, { "epoch": 39.21, "learning_rate": 3.0400554893015932e-05, "loss": 2.0595, "step": 13547000 }, { "epoch": 39.21, "learning_rate": 3.0399831245368655e-05, "loss": 2.0357, "step": 13547500 }, { "epoch": 39.22, "learning_rate": 3.0399107597721384e-05, "loss": 2.0719, "step": 13548000 }, { "epoch": 39.22, "learning_rate": 3.0398383950074106e-05, "loss": 2.0378, "step": 13548500 }, { "epoch": 39.22, "learning_rate": 3.0397660302426828e-05, "loss": 2.0603, "step": 13549000 }, { "epoch": 39.22, "learning_rate": 3.039693665477955e-05, "loss": 2.0602, "step": 13549500 }, { "epoch": 39.22, "learning_rate": 3.0396213007132273e-05, "loss": 2.0576, "step": 13550000 }, { "epoch": 39.22, "learning_rate": 3.0395489359484998e-05, "loss": 2.0485, "step": 13550500 }, { "epoch": 39.22, "learning_rate": 3.039476571183772e-05, "loss": 2.052, "step": 13551000 }, { "epoch": 39.23, "learning_rate": 3.0394042064190443e-05, "loss": 2.0601, "step": 13551500 }, { "epoch": 39.23, "learning_rate": 3.0393319863838458e-05, "loss": 2.0523, "step": 13552000 }, { "epoch": 39.23, "learning_rate": 3.0392596216191184e-05, "loss": 2.0463, "step": 13552500 }, { "epoch": 39.23, "learning_rate": 3.0391872568543906e-05, "loss": 2.0385, "step": 13553000 }, { "epoch": 39.23, "learning_rate": 3.0391148920896628e-05, "loss": 2.0348, "step": 13553500 }, { "epoch": 39.23, "learning_rate": 3.039042527324935e-05, "loss": 2.0455, "step": 13554000 }, { "epoch": 39.23, "learning_rate": 3.0389701625602073e-05, "loss": 2.0388, "step": 13554500 }, { "epoch": 39.24, "learning_rate": 3.0388977977954802e-05, "loss": 2.0338, "step": 13555000 }, { "epoch": 39.24, "learning_rate": 3.0388254330307524e-05, "loss": 2.0377, "step": 13555500 }, { "epoch": 39.24, "learning_rate": 3.038753068266025e-05, "loss": 2.0357, "step": 13556000 }, { "epoch": 39.24, "learning_rate": 3.0386807035012972e-05, "loss": 2.0475, "step": 13556500 }, { "epoch": 39.24, "learning_rate": 3.0386083387365694e-05, "loss": 2.0504, "step": 13557000 }, { "epoch": 39.24, "learning_rate": 3.0385359739718416e-05, "loss": 2.0359, "step": 13557500 }, { "epoch": 39.24, "learning_rate": 3.038463609207114e-05, "loss": 2.055, "step": 13558000 }, { "epoch": 39.25, "learning_rate": 3.0383913891719157e-05, "loss": 2.0547, "step": 13558500 }, { "epoch": 39.25, "learning_rate": 3.038319024407188e-05, "loss": 2.045, "step": 13559000 }, { "epoch": 39.25, "learning_rate": 3.03824680437199e-05, "loss": 2.0613, "step": 13559500 }, { "epoch": 39.25, "learning_rate": 3.038174439607262e-05, "loss": 2.0347, "step": 13560000 }, { "epoch": 39.25, "learning_rate": 3.0381020748425343e-05, "loss": 2.0447, "step": 13560500 }, { "epoch": 39.25, "learning_rate": 3.0380297100778065e-05, "loss": 2.0493, "step": 13561000 }, { "epoch": 39.25, "learning_rate": 3.0379573453130788e-05, "loss": 2.0416, "step": 13561500 }, { "epoch": 39.26, "learning_rate": 3.037884980548351e-05, "loss": 2.0547, "step": 13562000 }, { "epoch": 39.26, "learning_rate": 3.037812615783624e-05, "loss": 2.0542, "step": 13562500 }, { "epoch": 39.26, "learning_rate": 3.0377403957484258e-05, "loss": 2.0555, "step": 13563000 }, { "epoch": 39.26, "learning_rate": 3.037668030983698e-05, "loss": 2.0608, "step": 13563500 }, { "epoch": 39.26, "learning_rate": 3.0375956662189702e-05, "loss": 2.0355, "step": 13564000 }, { "epoch": 39.26, "learning_rate": 3.0375233014542424e-05, "loss": 2.0527, "step": 13564500 }, { "epoch": 39.27, "learning_rate": 3.037450936689515e-05, "loss": 2.0336, "step": 13565000 }, { "epoch": 39.27, "learning_rate": 3.0373785719247872e-05, "loss": 2.0376, "step": 13565500 }, { "epoch": 39.27, "learning_rate": 3.0373062071600595e-05, "loss": 2.0461, "step": 13566000 }, { "epoch": 39.27, "learning_rate": 3.0372338423953317e-05, "loss": 2.0568, "step": 13566500 }, { "epoch": 39.27, "learning_rate": 3.037161477630604e-05, "loss": 2.0285, "step": 13567000 }, { "epoch": 39.27, "learning_rate": 3.037089112865876e-05, "loss": 2.0646, "step": 13567500 }, { "epoch": 39.27, "learning_rate": 3.0370167481011487e-05, "loss": 2.035, "step": 13568000 }, { "epoch": 39.28, "learning_rate": 3.036944383336421e-05, "loss": 2.0536, "step": 13568500 }, { "epoch": 39.28, "learning_rate": 3.0368721633012225e-05, "loss": 2.0171, "step": 13569000 }, { "epoch": 39.28, "learning_rate": 3.036799798536495e-05, "loss": 2.0602, "step": 13569500 }, { "epoch": 39.28, "learning_rate": 3.0367274337717676e-05, "loss": 2.0534, "step": 13570000 }, { "epoch": 39.28, "learning_rate": 3.03665506900704e-05, "loss": 2.0448, "step": 13570500 }, { "epoch": 39.28, "learning_rate": 3.0365827042423124e-05, "loss": 2.0476, "step": 13571000 }, { "epoch": 39.28, "learning_rate": 3.0365103394775846e-05, "loss": 2.0513, "step": 13571500 }, { "epoch": 39.29, "learning_rate": 3.036438119442386e-05, "loss": 2.0524, "step": 13572000 }, { "epoch": 39.29, "learning_rate": 3.0363657546776587e-05, "loss": 2.0585, "step": 13572500 }, { "epoch": 39.29, "learning_rate": 3.036293389912931e-05, "loss": 2.0389, "step": 13573000 }, { "epoch": 39.29, "learning_rate": 3.036221025148203e-05, "loss": 2.0696, "step": 13573500 }, { "epoch": 39.29, "learning_rate": 3.0361486603834754e-05, "loss": 2.0436, "step": 13574000 }, { "epoch": 39.29, "learning_rate": 3.0360765850778066e-05, "loss": 2.0545, "step": 13574500 }, { "epoch": 39.29, "learning_rate": 3.036004220313079e-05, "loss": 2.0642, "step": 13575000 }, { "epoch": 39.3, "learning_rate": 3.0359318555483514e-05, "loss": 2.0497, "step": 13575500 }, { "epoch": 39.3, "learning_rate": 3.0358594907836236e-05, "loss": 2.0778, "step": 13576000 }, { "epoch": 39.3, "learning_rate": 3.035787126018896e-05, "loss": 2.0471, "step": 13576500 }, { "epoch": 39.3, "learning_rate": 3.035714761254168e-05, "loss": 2.0699, "step": 13577000 }, { "epoch": 39.3, "learning_rate": 3.035642396489441e-05, "loss": 2.048, "step": 13577500 }, { "epoch": 39.3, "learning_rate": 3.0355701764542425e-05, "loss": 2.0522, "step": 13578000 }, { "epoch": 39.3, "learning_rate": 3.035497811689515e-05, "loss": 2.0436, "step": 13578500 }, { "epoch": 39.31, "learning_rate": 3.0354254469247873e-05, "loss": 2.0499, "step": 13579000 }, { "epoch": 39.31, "learning_rate": 3.0353530821600595e-05, "loss": 2.0429, "step": 13579500 }, { "epoch": 39.31, "learning_rate": 3.0352807173953318e-05, "loss": 2.0255, "step": 13580000 }, { "epoch": 39.31, "learning_rate": 3.035208352630604e-05, "loss": 2.049, "step": 13580500 }, { "epoch": 39.31, "learning_rate": 3.035136132595406e-05, "loss": 2.0651, "step": 13581000 }, { "epoch": 39.31, "learning_rate": 3.035063767830678e-05, "loss": 2.0712, "step": 13581500 }, { "epoch": 39.31, "learning_rate": 3.0349914030659503e-05, "loss": 2.0539, "step": 13582000 }, { "epoch": 39.32, "learning_rate": 3.0349190383012225e-05, "loss": 2.0669, "step": 13582500 }, { "epoch": 39.32, "learning_rate": 3.034846673536495e-05, "loss": 2.0493, "step": 13583000 }, { "epoch": 39.32, "learning_rate": 3.0347744535012967e-05, "loss": 2.0554, "step": 13583500 }, { "epoch": 39.32, "learning_rate": 3.034702088736569e-05, "loss": 2.0468, "step": 13584000 }, { "epoch": 39.32, "learning_rate": 3.0346297239718415e-05, "loss": 2.0488, "step": 13584500 }, { "epoch": 39.32, "learning_rate": 3.034557359207114e-05, "loss": 2.0582, "step": 13585000 }, { "epoch": 39.32, "learning_rate": 3.034485139171916e-05, "loss": 2.0764, "step": 13585500 }, { "epoch": 39.33, "learning_rate": 3.034412774407188e-05, "loss": 2.0536, "step": 13586000 }, { "epoch": 39.33, "learning_rate": 3.0343404096424604e-05, "loss": 2.0244, "step": 13586500 }, { "epoch": 39.33, "learning_rate": 3.034268044877733e-05, "loss": 2.0689, "step": 13587000 }, { "epoch": 39.33, "learning_rate": 3.034195680113005e-05, "loss": 2.0549, "step": 13587500 }, { "epoch": 39.33, "learning_rate": 3.0341233153482774e-05, "loss": 2.0379, "step": 13588000 }, { "epoch": 39.33, "learning_rate": 3.0340509505835496e-05, "loss": 2.0471, "step": 13588500 }, { "epoch": 39.33, "learning_rate": 3.0339787305483515e-05, "loss": 2.0456, "step": 13589000 }, { "epoch": 39.34, "learning_rate": 3.0339063657836237e-05, "loss": 2.0388, "step": 13589500 }, { "epoch": 39.34, "learning_rate": 3.033834001018896e-05, "loss": 2.0565, "step": 13590000 }, { "epoch": 39.34, "learning_rate": 3.033761636254168e-05, "loss": 2.0467, "step": 13590500 }, { "epoch": 39.34, "learning_rate": 3.0336892714894404e-05, "loss": 2.0282, "step": 13591000 }, { "epoch": 39.34, "learning_rate": 3.033616906724713e-05, "loss": 2.0444, "step": 13591500 }, { "epoch": 39.34, "learning_rate": 3.033544541959985e-05, "loss": 2.0598, "step": 13592000 }, { "epoch": 39.34, "learning_rate": 3.0334721771952577e-05, "loss": 2.0655, "step": 13592500 }, { "epoch": 39.35, "learning_rate": 3.0333998124305303e-05, "loss": 2.0385, "step": 13593000 }, { "epoch": 39.35, "learning_rate": 3.033327592395332e-05, "loss": 2.0475, "step": 13593500 }, { "epoch": 39.35, "learning_rate": 3.033255227630604e-05, "loss": 2.071, "step": 13594000 }, { "epoch": 39.35, "learning_rate": 3.033183007595406e-05, "loss": 2.0423, "step": 13594500 }, { "epoch": 39.35, "learning_rate": 3.0331106428306782e-05, "loss": 2.0534, "step": 13595000 }, { "epoch": 39.35, "learning_rate": 3.0330382780659504e-05, "loss": 2.0352, "step": 13595500 }, { "epoch": 39.35, "learning_rate": 3.032965913301223e-05, "loss": 2.0534, "step": 13596000 }, { "epoch": 39.36, "learning_rate": 3.0328935485364952e-05, "loss": 2.053, "step": 13596500 }, { "epoch": 39.36, "learning_rate": 3.0328211837717674e-05, "loss": 2.0586, "step": 13597000 }, { "epoch": 39.36, "learning_rate": 3.0327488190070396e-05, "loss": 2.0464, "step": 13597500 }, { "epoch": 39.36, "learning_rate": 3.032676454242312e-05, "loss": 2.0724, "step": 13598000 }, { "epoch": 39.36, "learning_rate": 3.0326042342071138e-05, "loss": 2.0686, "step": 13598500 }, { "epoch": 39.36, "learning_rate": 3.032531869442386e-05, "loss": 2.0552, "step": 13599000 }, { "epoch": 39.36, "learning_rate": 3.0324595046776582e-05, "loss": 2.0301, "step": 13599500 }, { "epoch": 39.37, "learning_rate": 3.032387139912931e-05, "loss": 2.042, "step": 13600000 }, { "epoch": 39.37, "learning_rate": 3.0323147751482033e-05, "loss": 2.0598, "step": 13600500 }, { "epoch": 39.37, "learning_rate": 3.0322425551130052e-05, "loss": 2.033, "step": 13601000 }, { "epoch": 39.37, "learning_rate": 3.0321703350778068e-05, "loss": 2.0295, "step": 13601500 }, { "epoch": 39.37, "learning_rate": 3.0320979703130793e-05, "loss": 2.0363, "step": 13602000 }, { "epoch": 39.37, "learning_rate": 3.0320256055483516e-05, "loss": 2.0476, "step": 13602500 }, { "epoch": 39.38, "learning_rate": 3.0319532407836238e-05, "loss": 2.0525, "step": 13603000 }, { "epoch": 39.38, "learning_rate": 3.031880876018896e-05, "loss": 2.0729, "step": 13603500 }, { "epoch": 39.38, "learning_rate": 3.0318085112541682e-05, "loss": 2.0565, "step": 13604000 }, { "epoch": 39.38, "learning_rate": 3.0317361464894405e-05, "loss": 2.0689, "step": 13604500 }, { "epoch": 39.38, "learning_rate": 3.031663781724713e-05, "loss": 2.0834, "step": 13605000 }, { "epoch": 39.38, "learning_rate": 3.0315914169599852e-05, "loss": 2.0377, "step": 13605500 }, { "epoch": 39.38, "learning_rate": 3.0315191969247868e-05, "loss": 2.0466, "step": 13606000 }, { "epoch": 39.39, "learning_rate": 3.0314468321600594e-05, "loss": 2.0518, "step": 13606500 }, { "epoch": 39.39, "learning_rate": 3.0313744673953316e-05, "loss": 2.0336, "step": 13607000 }, { "epoch": 39.39, "learning_rate": 3.0313021026306045e-05, "loss": 2.0384, "step": 13607500 }, { "epoch": 39.39, "learning_rate": 3.031229882595406e-05, "loss": 2.0586, "step": 13608000 }, { "epoch": 39.39, "learning_rate": 3.0311575178306783e-05, "loss": 2.066, "step": 13608500 }, { "epoch": 39.39, "learning_rate": 3.0310851530659505e-05, "loss": 2.0432, "step": 13609000 }, { "epoch": 39.39, "learning_rate": 3.031012788301223e-05, "loss": 2.0442, "step": 13609500 }, { "epoch": 39.4, "learning_rate": 3.0309404235364953e-05, "loss": 2.0527, "step": 13610000 }, { "epoch": 39.4, "learning_rate": 3.0308682035012968e-05, "loss": 2.0526, "step": 13610500 }, { "epoch": 39.4, "learning_rate": 3.0307958387365694e-05, "loss": 2.0414, "step": 13611000 }, { "epoch": 39.4, "learning_rate": 3.0307234739718416e-05, "loss": 2.0252, "step": 13611500 }, { "epoch": 39.4, "learning_rate": 3.030651109207114e-05, "loss": 2.0355, "step": 13612000 }, { "epoch": 39.4, "learning_rate": 3.030578744442386e-05, "loss": 2.0518, "step": 13612500 }, { "epoch": 39.4, "learning_rate": 3.0305063796776583e-05, "loss": 2.0556, "step": 13613000 }, { "epoch": 39.41, "learning_rate": 3.0304341596424602e-05, "loss": 2.0421, "step": 13613500 }, { "epoch": 39.41, "learning_rate": 3.0303617948777324e-05, "loss": 2.0582, "step": 13614000 }, { "epoch": 39.41, "learning_rate": 3.0302894301130046e-05, "loss": 2.0427, "step": 13614500 }, { "epoch": 39.41, "learning_rate": 3.0302170653482775e-05, "loss": 2.0524, "step": 13615000 }, { "epoch": 39.41, "learning_rate": 3.0301447005835497e-05, "loss": 2.0416, "step": 13615500 }, { "epoch": 39.41, "learning_rate": 3.0300724805483516e-05, "loss": 2.0515, "step": 13616000 }, { "epoch": 39.41, "learning_rate": 3.030000115783624e-05, "loss": 2.0326, "step": 13616500 }, { "epoch": 39.42, "learning_rate": 3.029927751018896e-05, "loss": 2.0558, "step": 13617000 }, { "epoch": 39.42, "learning_rate": 3.0298553862541683e-05, "loss": 2.0582, "step": 13617500 }, { "epoch": 39.42, "learning_rate": 3.0297831662189702e-05, "loss": 2.0616, "step": 13618000 }, { "epoch": 39.42, "learning_rate": 3.0297108014542424e-05, "loss": 2.0451, "step": 13618500 }, { "epoch": 39.42, "learning_rate": 3.0296384366895147e-05, "loss": 2.0612, "step": 13619000 }, { "epoch": 39.42, "learning_rate": 3.029566071924787e-05, "loss": 2.0445, "step": 13619500 }, { "epoch": 39.42, "learning_rate": 3.0294937071600594e-05, "loss": 2.0662, "step": 13620000 }, { "epoch": 39.43, "learning_rate": 3.0294213423953317e-05, "loss": 2.0599, "step": 13620500 }, { "epoch": 39.43, "learning_rate": 3.029348977630604e-05, "loss": 2.0414, "step": 13621000 }, { "epoch": 39.43, "learning_rate": 3.029276612865876e-05, "loss": 2.0573, "step": 13621500 }, { "epoch": 39.43, "learning_rate": 3.0292042481011483e-05, "loss": 2.0239, "step": 13622000 }, { "epoch": 39.43, "learning_rate": 3.0291318833364212e-05, "loss": 2.043, "step": 13622500 }, { "epoch": 39.43, "learning_rate": 3.0290598080307525e-05, "loss": 2.0685, "step": 13623000 }, { "epoch": 39.43, "learning_rate": 3.0289874432660247e-05, "loss": 2.0441, "step": 13623500 }, { "epoch": 39.44, "learning_rate": 3.0289150785012972e-05, "loss": 2.0487, "step": 13624000 }, { "epoch": 39.44, "learning_rate": 3.0288427137365695e-05, "loss": 2.0342, "step": 13624500 }, { "epoch": 39.44, "learning_rate": 3.028770493701371e-05, "loss": 2.0286, "step": 13625000 }, { "epoch": 39.44, "learning_rate": 3.0286981289366432e-05, "loss": 2.0082, "step": 13625500 }, { "epoch": 39.44, "learning_rate": 3.0286257641719158e-05, "loss": 2.022, "step": 13626000 }, { "epoch": 39.44, "learning_rate": 3.028553399407188e-05, "loss": 2.0388, "step": 13626500 }, { "epoch": 39.44, "learning_rate": 3.0284810346424603e-05, "loss": 2.0221, "step": 13627000 }, { "epoch": 39.45, "learning_rate": 3.0284086698777325e-05, "loss": 2.0518, "step": 13627500 }, { "epoch": 39.45, "learning_rate": 3.0283363051130047e-05, "loss": 2.0633, "step": 13628000 }, { "epoch": 39.45, "learning_rate": 3.0282639403482773e-05, "loss": 2.029, "step": 13628500 }, { "epoch": 39.45, "learning_rate": 3.0281917203130788e-05, "loss": 2.0316, "step": 13629000 }, { "epoch": 39.45, "learning_rate": 3.028119355548351e-05, "loss": 2.0457, "step": 13629500 }, { "epoch": 39.45, "learning_rate": 3.028046990783624e-05, "loss": 2.0704, "step": 13630000 }, { "epoch": 39.45, "learning_rate": 3.027974626018896e-05, "loss": 2.0682, "step": 13630500 }, { "epoch": 39.46, "learning_rate": 3.0279022612541684e-05, "loss": 2.0457, "step": 13631000 }, { "epoch": 39.46, "learning_rate": 3.0278301859484996e-05, "loss": 2.0317, "step": 13631500 }, { "epoch": 39.46, "learning_rate": 3.0277578211837722e-05, "loss": 2.0449, "step": 13632000 }, { "epoch": 39.46, "learning_rate": 3.0276854564190444e-05, "loss": 2.0408, "step": 13632500 }, { "epoch": 39.46, "learning_rate": 3.0276130916543166e-05, "loss": 2.0607, "step": 13633000 }, { "epoch": 39.46, "learning_rate": 3.0275408716191185e-05, "loss": 2.0793, "step": 13633500 }, { "epoch": 39.46, "learning_rate": 3.0274685068543907e-05, "loss": 2.0515, "step": 13634000 }, { "epoch": 39.47, "learning_rate": 3.027396142089663e-05, "loss": 2.0483, "step": 13634500 }, { "epoch": 39.47, "learning_rate": 3.0273237773249352e-05, "loss": 2.0518, "step": 13635000 }, { "epoch": 39.47, "learning_rate": 3.0272514125602074e-05, "loss": 2.0527, "step": 13635500 }, { "epoch": 39.47, "learning_rate": 3.0271790477954796e-05, "loss": 2.0736, "step": 13636000 }, { "epoch": 39.47, "learning_rate": 3.0271066830307522e-05, "loss": 2.0561, "step": 13636500 }, { "epoch": 39.47, "learning_rate": 3.0270343182660244e-05, "loss": 2.0623, "step": 13637000 }, { "epoch": 39.47, "learning_rate": 3.026962098230826e-05, "loss": 2.0547, "step": 13637500 }, { "epoch": 39.48, "learning_rate": 3.026889733466099e-05, "loss": 2.0223, "step": 13638000 }, { "epoch": 39.48, "learning_rate": 3.026817368701371e-05, "loss": 2.0437, "step": 13638500 }, { "epoch": 39.48, "learning_rate": 3.0267450039366437e-05, "loss": 2.0585, "step": 13639000 }, { "epoch": 39.48, "learning_rate": 3.026672639171916e-05, "loss": 2.0743, "step": 13639500 }, { "epoch": 39.48, "learning_rate": 3.026600274407188e-05, "loss": 2.0545, "step": 13640000 }, { "epoch": 39.48, "learning_rate": 3.0265280543719897e-05, "loss": 2.0243, "step": 13640500 }, { "epoch": 39.49, "learning_rate": 3.0264556896072622e-05, "loss": 2.0414, "step": 13641000 }, { "epoch": 39.49, "learning_rate": 3.0263833248425345e-05, "loss": 2.0474, "step": 13641500 }, { "epoch": 39.49, "learning_rate": 3.0263109600778067e-05, "loss": 2.0397, "step": 13642000 }, { "epoch": 39.49, "learning_rate": 3.026238595313079e-05, "loss": 2.045, "step": 13642500 }, { "epoch": 39.49, "learning_rate": 3.026166230548351e-05, "loss": 2.0526, "step": 13643000 }, { "epoch": 39.49, "learning_rate": 3.0260938657836237e-05, "loss": 2.0616, "step": 13643500 }, { "epoch": 39.49, "learning_rate": 3.026021501018896e-05, "loss": 2.0591, "step": 13644000 }, { "epoch": 39.5, "learning_rate": 3.025949136254168e-05, "loss": 2.0612, "step": 13644500 }, { "epoch": 39.5, "learning_rate": 3.025876771489441e-05, "loss": 2.0501, "step": 13645000 }, { "epoch": 39.5, "learning_rate": 3.0258044067247133e-05, "loss": 2.0457, "step": 13645500 }, { "epoch": 39.5, "learning_rate": 3.0257321866895148e-05, "loss": 2.0516, "step": 13646000 }, { "epoch": 39.5, "learning_rate": 3.0256598219247874e-05, "loss": 2.0463, "step": 13646500 }, { "epoch": 39.5, "learning_rate": 3.0255874571600596e-05, "loss": 2.0763, "step": 13647000 }, { "epoch": 39.5, "learning_rate": 3.0255150923953318e-05, "loss": 2.0634, "step": 13647500 }, { "epoch": 39.51, "learning_rate": 3.025442727630604e-05, "loss": 2.0368, "step": 13648000 }, { "epoch": 39.51, "learning_rate": 3.0253703628658763e-05, "loss": 2.0761, "step": 13648500 }, { "epoch": 39.51, "learning_rate": 3.0252979981011488e-05, "loss": 2.0394, "step": 13649000 }, { "epoch": 39.51, "learning_rate": 3.025225633336421e-05, "loss": 2.0557, "step": 13649500 }, { "epoch": 39.51, "learning_rate": 3.0251534133012226e-05, "loss": 2.0434, "step": 13650000 }, { "epoch": 39.51, "learning_rate": 3.025081337995554e-05, "loss": 2.0481, "step": 13650500 }, { "epoch": 39.51, "learning_rate": 3.025008973230826e-05, "loss": 2.0571, "step": 13651000 }, { "epoch": 39.52, "learning_rate": 3.0249366084660986e-05, "loss": 2.0574, "step": 13651500 }, { "epoch": 39.52, "learning_rate": 3.024864243701371e-05, "loss": 2.0454, "step": 13652000 }, { "epoch": 39.52, "learning_rate": 3.0247920236661724e-05, "loss": 2.0788, "step": 13652500 }, { "epoch": 39.52, "learning_rate": 3.0247196589014453e-05, "loss": 2.0495, "step": 13653000 }, { "epoch": 39.52, "learning_rate": 3.0246472941367175e-05, "loss": 2.0395, "step": 13653500 }, { "epoch": 39.52, "learning_rate": 3.02457492937199e-05, "loss": 2.0243, "step": 13654000 }, { "epoch": 39.52, "learning_rate": 3.0245025646072623e-05, "loss": 2.0571, "step": 13654500 }, { "epoch": 39.53, "learning_rate": 3.0244301998425345e-05, "loss": 2.0565, "step": 13655000 }, { "epoch": 39.53, "learning_rate": 3.0243578350778068e-05, "loss": 2.0479, "step": 13655500 }, { "epoch": 39.53, "learning_rate": 3.024285470313079e-05, "loss": 2.0359, "step": 13656000 }, { "epoch": 39.53, "learning_rate": 3.0242131055483512e-05, "loss": 2.0444, "step": 13656500 }, { "epoch": 39.53, "learning_rate": 3.0241407407836238e-05, "loss": 2.0549, "step": 13657000 }, { "epoch": 39.53, "learning_rate": 3.024068376018896e-05, "loss": 2.0387, "step": 13657500 }, { "epoch": 39.53, "learning_rate": 3.0239960112541682e-05, "loss": 2.0752, "step": 13658000 }, { "epoch": 39.54, "learning_rate": 3.02392379121897e-05, "loss": 2.0787, "step": 13658500 }, { "epoch": 39.54, "learning_rate": 3.0238514264542423e-05, "loss": 2.0489, "step": 13659000 }, { "epoch": 39.54, "learning_rate": 3.0237790616895146e-05, "loss": 2.0379, "step": 13659500 }, { "epoch": 39.54, "learning_rate": 3.0237066969247875e-05, "loss": 2.0568, "step": 13660000 }, { "epoch": 39.54, "learning_rate": 3.0236343321600597e-05, "loss": 2.0679, "step": 13660500 }, { "epoch": 39.54, "learning_rate": 3.023561967395332e-05, "loss": 2.0428, "step": 13661000 }, { "epoch": 39.54, "learning_rate": 3.023489602630604e-05, "loss": 2.0529, "step": 13661500 }, { "epoch": 39.55, "learning_rate": 3.0234172378658763e-05, "loss": 2.0355, "step": 13662000 }, { "epoch": 39.55, "learning_rate": 3.0233450178306782e-05, "loss": 2.0726, "step": 13662500 }, { "epoch": 39.55, "learning_rate": 3.0232726530659505e-05, "loss": 2.0441, "step": 13663000 }, { "epoch": 39.55, "learning_rate": 3.0232002883012227e-05, "loss": 2.0284, "step": 13663500 }, { "epoch": 39.55, "learning_rate": 3.0231279235364952e-05, "loss": 2.0535, "step": 13664000 }, { "epoch": 39.55, "learning_rate": 3.0230557035012968e-05, "loss": 2.048, "step": 13664500 }, { "epoch": 39.55, "learning_rate": 3.022983338736569e-05, "loss": 2.0665, "step": 13665000 }, { "epoch": 39.56, "learning_rate": 3.0229109739718413e-05, "loss": 2.0609, "step": 13665500 }, { "epoch": 39.56, "learning_rate": 3.022838753936643e-05, "loss": 2.0495, "step": 13666000 }, { "epoch": 39.56, "learning_rate": 3.0227663891719154e-05, "loss": 2.0404, "step": 13666500 }, { "epoch": 39.56, "learning_rate": 3.0226940244071876e-05, "loss": 2.0411, "step": 13667000 }, { "epoch": 39.56, "learning_rate": 3.0226216596424605e-05, "loss": 2.0258, "step": 13667500 }, { "epoch": 39.56, "learning_rate": 3.0225492948777327e-05, "loss": 2.0681, "step": 13668000 }, { "epoch": 39.56, "learning_rate": 3.0224769301130053e-05, "loss": 2.0536, "step": 13668500 }, { "epoch": 39.57, "learning_rate": 3.0224045653482775e-05, "loss": 2.0712, "step": 13669000 }, { "epoch": 39.57, "learning_rate": 3.0223322005835497e-05, "loss": 2.0581, "step": 13669500 }, { "epoch": 39.57, "learning_rate": 3.022259835818822e-05, "loss": 2.0685, "step": 13670000 }, { "epoch": 39.57, "learning_rate": 3.022187615783624e-05, "loss": 2.0571, "step": 13670500 }, { "epoch": 39.57, "learning_rate": 3.022115251018896e-05, "loss": 2.0593, "step": 13671000 }, { "epoch": 39.57, "learning_rate": 3.0220428862541683e-05, "loss": 2.0456, "step": 13671500 }, { "epoch": 39.57, "learning_rate": 3.0219705214894405e-05, "loss": 2.0521, "step": 13672000 }, { "epoch": 39.58, "learning_rate": 3.0218981567247127e-05, "loss": 2.0733, "step": 13672500 }, { "epoch": 39.58, "learning_rate": 3.0218257919599853e-05, "loss": 2.0551, "step": 13673000 }, { "epoch": 39.58, "learning_rate": 3.0217534271952575e-05, "loss": 2.0261, "step": 13673500 }, { "epoch": 39.58, "learning_rate": 3.0216810624305297e-05, "loss": 2.0587, "step": 13674000 }, { "epoch": 39.58, "learning_rate": 3.0216086976658026e-05, "loss": 2.0436, "step": 13674500 }, { "epoch": 39.58, "learning_rate": 3.021536332901075e-05, "loss": 2.0973, "step": 13675000 }, { "epoch": 39.58, "learning_rate": 3.0214641128658768e-05, "loss": 2.0221, "step": 13675500 }, { "epoch": 39.59, "learning_rate": 3.021391748101149e-05, "loss": 2.07, "step": 13676000 }, { "epoch": 39.59, "learning_rate": 3.0213193833364212e-05, "loss": 2.0494, "step": 13676500 }, { "epoch": 39.59, "learning_rate": 3.0212470185716934e-05, "loss": 2.0657, "step": 13677000 }, { "epoch": 39.59, "learning_rate": 3.0211746538069657e-05, "loss": 2.0572, "step": 13677500 }, { "epoch": 39.59, "learning_rate": 3.021102289042238e-05, "loss": 2.0581, "step": 13678000 }, { "epoch": 39.59, "learning_rate": 3.0210299242775104e-05, "loss": 2.0645, "step": 13678500 }, { "epoch": 39.6, "learning_rate": 3.0209575595127827e-05, "loss": 2.0328, "step": 13679000 }, { "epoch": 39.6, "learning_rate": 3.020885194748055e-05, "loss": 2.0698, "step": 13679500 }, { "epoch": 39.6, "learning_rate": 3.0208129747128568e-05, "loss": 2.0627, "step": 13680000 }, { "epoch": 39.6, "learning_rate": 3.0207407546776583e-05, "loss": 2.055, "step": 13680500 }, { "epoch": 39.6, "learning_rate": 3.0206683899129306e-05, "loss": 2.0599, "step": 13681000 }, { "epoch": 39.6, "learning_rate": 3.0205960251482028e-05, "loss": 2.0349, "step": 13681500 }, { "epoch": 39.6, "learning_rate": 3.0205238051130047e-05, "loss": 2.0645, "step": 13682000 }, { "epoch": 39.61, "learning_rate": 3.0204514403482776e-05, "loss": 2.0492, "step": 13682500 }, { "epoch": 39.61, "learning_rate": 3.020379220313079e-05, "loss": 2.0427, "step": 13683000 }, { "epoch": 39.61, "learning_rate": 3.0203068555483517e-05, "loss": 2.0413, "step": 13683500 }, { "epoch": 39.61, "learning_rate": 3.020234490783624e-05, "loss": 2.0575, "step": 13684000 }, { "epoch": 39.61, "learning_rate": 3.020162126018896e-05, "loss": 2.0501, "step": 13684500 }, { "epoch": 39.61, "learning_rate": 3.020089905983698e-05, "loss": 2.0483, "step": 13685000 }, { "epoch": 39.61, "learning_rate": 3.0200175412189703e-05, "loss": 2.0336, "step": 13685500 }, { "epoch": 39.62, "learning_rate": 3.0199451764542425e-05, "loss": 2.0262, "step": 13686000 }, { "epoch": 39.62, "learning_rate": 3.0198728116895147e-05, "loss": 2.0406, "step": 13686500 }, { "epoch": 39.62, "learning_rate": 3.019800446924787e-05, "loss": 2.0149, "step": 13687000 }, { "epoch": 39.62, "learning_rate": 3.019728082160059e-05, "loss": 2.0811, "step": 13687500 }, { "epoch": 39.62, "learning_rate": 3.0196557173953317e-05, "loss": 2.0493, "step": 13688000 }, { "epoch": 39.62, "learning_rate": 3.019583352630604e-05, "loss": 2.0499, "step": 13688500 }, { "epoch": 39.62, "learning_rate": 3.019510987865876e-05, "loss": 2.0718, "step": 13689000 }, { "epoch": 39.63, "learning_rate": 3.0194386231011484e-05, "loss": 2.0569, "step": 13689500 }, { "epoch": 39.63, "learning_rate": 3.0193662583364213e-05, "loss": 2.0568, "step": 13690000 }, { "epoch": 39.63, "learning_rate": 3.0192938935716935e-05, "loss": 2.0452, "step": 13690500 }, { "epoch": 39.63, "learning_rate": 3.0192216735364954e-05, "loss": 2.0453, "step": 13691000 }, { "epoch": 39.63, "learning_rate": 3.019149453501297e-05, "loss": 2.0511, "step": 13691500 }, { "epoch": 39.63, "learning_rate": 3.0190770887365695e-05, "loss": 2.0402, "step": 13692000 }, { "epoch": 39.63, "learning_rate": 3.019004868701371e-05, "loss": 2.0575, "step": 13692500 }, { "epoch": 39.64, "learning_rate": 3.0189325039366433e-05, "loss": 2.0482, "step": 13693000 }, { "epoch": 39.64, "learning_rate": 3.0188601391719155e-05, "loss": 2.0625, "step": 13693500 }, { "epoch": 39.64, "learning_rate": 3.018787774407188e-05, "loss": 2.0696, "step": 13694000 }, { "epoch": 39.64, "learning_rate": 3.0187154096424603e-05, "loss": 2.0476, "step": 13694500 }, { "epoch": 39.64, "learning_rate": 3.0186430448777325e-05, "loss": 2.07, "step": 13695000 }, { "epoch": 39.64, "learning_rate": 3.0185706801130048e-05, "loss": 2.0452, "step": 13695500 }, { "epoch": 39.64, "learning_rate": 3.018498315348277e-05, "loss": 2.0586, "step": 13696000 }, { "epoch": 39.65, "learning_rate": 3.0184259505835495e-05, "loss": 2.0492, "step": 13696500 }, { "epoch": 39.65, "learning_rate": 3.0183535858188218e-05, "loss": 2.0514, "step": 13697000 }, { "epoch": 39.65, "learning_rate": 3.0182812210540943e-05, "loss": 2.0562, "step": 13697500 }, { "epoch": 39.65, "learning_rate": 3.018208856289367e-05, "loss": 2.0712, "step": 13698000 }, { "epoch": 39.65, "learning_rate": 3.0181366362541684e-05, "loss": 2.0622, "step": 13698500 }, { "epoch": 39.65, "learning_rate": 3.0180644162189703e-05, "loss": 2.0478, "step": 13699000 }, { "epoch": 39.65, "learning_rate": 3.0179920514542426e-05, "loss": 2.0524, "step": 13699500 }, { "epoch": 39.66, "learning_rate": 3.0179196866895148e-05, "loss": 2.0614, "step": 13700000 }, { "epoch": 39.66, "learning_rate": 3.0178474666543167e-05, "loss": 2.0506, "step": 13700500 }, { "epoch": 39.66, "learning_rate": 3.017775101889589e-05, "loss": 2.0362, "step": 13701000 }, { "epoch": 39.66, "learning_rate": 3.017702737124861e-05, "loss": 2.0628, "step": 13701500 }, { "epoch": 39.66, "learning_rate": 3.017630517089663e-05, "loss": 2.0461, "step": 13702000 }, { "epoch": 39.66, "learning_rate": 3.0175581523249352e-05, "loss": 2.0537, "step": 13702500 }, { "epoch": 39.66, "learning_rate": 3.0174857875602075e-05, "loss": 2.0636, "step": 13703000 }, { "epoch": 39.67, "learning_rate": 3.0174134227954797e-05, "loss": 2.063, "step": 13703500 }, { "epoch": 39.67, "learning_rate": 3.017341058030752e-05, "loss": 2.0412, "step": 13704000 }, { "epoch": 39.67, "learning_rate": 3.0172686932660245e-05, "loss": 2.0472, "step": 13704500 }, { "epoch": 39.67, "learning_rate": 3.017196328501297e-05, "loss": 2.0674, "step": 13705000 }, { "epoch": 39.67, "learning_rate": 3.0171239637365696e-05, "loss": 2.0362, "step": 13705500 }, { "epoch": 39.67, "learning_rate": 3.0170515989718418e-05, "loss": 2.0616, "step": 13706000 }, { "epoch": 39.67, "learning_rate": 3.016979234207114e-05, "loss": 2.0448, "step": 13706500 }, { "epoch": 39.68, "learning_rate": 3.0169068694423863e-05, "loss": 2.0565, "step": 13707000 }, { "epoch": 39.68, "learning_rate": 3.0168345046776585e-05, "loss": 2.0572, "step": 13707500 }, { "epoch": 39.68, "learning_rate": 3.0167621399129307e-05, "loss": 2.0461, "step": 13708000 }, { "epoch": 39.68, "learning_rate": 3.0166897751482033e-05, "loss": 2.0424, "step": 13708500 }, { "epoch": 39.68, "learning_rate": 3.0166174103834755e-05, "loss": 2.0469, "step": 13709000 }, { "epoch": 39.68, "learning_rate": 3.0165450456187477e-05, "loss": 2.0424, "step": 13709500 }, { "epoch": 39.68, "learning_rate": 3.01647268085402e-05, "loss": 2.0461, "step": 13710000 }, { "epoch": 39.69, "learning_rate": 3.016400460818822e-05, "loss": 2.0473, "step": 13710500 }, { "epoch": 39.69, "learning_rate": 3.0163282407836234e-05, "loss": 2.0533, "step": 13711000 }, { "epoch": 39.69, "learning_rate": 3.016255876018896e-05, "loss": 2.0687, "step": 13711500 }, { "epoch": 39.69, "learning_rate": 3.0161835112541682e-05, "loss": 2.0482, "step": 13712000 }, { "epoch": 39.69, "learning_rate": 3.016111146489441e-05, "loss": 2.0454, "step": 13712500 }, { "epoch": 39.69, "learning_rate": 3.0160389264542426e-05, "loss": 2.0473, "step": 13713000 }, { "epoch": 39.69, "learning_rate": 3.015966561689515e-05, "loss": 2.0696, "step": 13713500 }, { "epoch": 39.7, "learning_rate": 3.0158943416543168e-05, "loss": 2.0453, "step": 13714000 }, { "epoch": 39.7, "learning_rate": 3.015821976889589e-05, "loss": 2.0533, "step": 13714500 }, { "epoch": 39.7, "learning_rate": 3.0157496121248612e-05, "loss": 2.0645, "step": 13715000 }, { "epoch": 39.7, "learning_rate": 3.0156772473601334e-05, "loss": 2.0718, "step": 13715500 }, { "epoch": 39.7, "learning_rate": 3.015604882595406e-05, "loss": 2.0466, "step": 13716000 }, { "epoch": 39.7, "learning_rate": 3.0155325178306782e-05, "loss": 2.0337, "step": 13716500 }, { "epoch": 39.71, "learning_rate": 3.0154601530659504e-05, "loss": 2.0442, "step": 13717000 }, { "epoch": 39.71, "learning_rate": 3.0153877883012227e-05, "loss": 2.0722, "step": 13717500 }, { "epoch": 39.71, "learning_rate": 3.015315423536495e-05, "loss": 2.0482, "step": 13718000 }, { "epoch": 39.71, "learning_rate": 3.0152432035012968e-05, "loss": 2.0397, "step": 13718500 }, { "epoch": 39.71, "learning_rate": 3.015170838736569e-05, "loss": 2.049, "step": 13719000 }, { "epoch": 39.71, "learning_rate": 3.0150984739718412e-05, "loss": 2.0454, "step": 13719500 }, { "epoch": 39.71, "learning_rate": 3.015026109207114e-05, "loss": 2.0836, "step": 13720000 }, { "epoch": 39.72, "learning_rate": 3.014953889171916e-05, "loss": 2.057, "step": 13720500 }, { "epoch": 39.72, "learning_rate": 3.0148815244071883e-05, "loss": 2.0813, "step": 13721000 }, { "epoch": 39.72, "learning_rate": 3.0148091596424605e-05, "loss": 2.0463, "step": 13721500 }, { "epoch": 39.72, "learning_rate": 3.0147367948777327e-05, "loss": 2.0317, "step": 13722000 }, { "epoch": 39.72, "learning_rate": 3.014664430113005e-05, "loss": 2.0703, "step": 13722500 }, { "epoch": 39.72, "learning_rate": 3.0145920653482775e-05, "loss": 2.0424, "step": 13723000 }, { "epoch": 39.72, "learning_rate": 3.0145197005835497e-05, "loss": 2.0494, "step": 13723500 }, { "epoch": 39.73, "learning_rate": 3.014447335818822e-05, "loss": 2.0561, "step": 13724000 }, { "epoch": 39.73, "learning_rate": 3.014374971054094e-05, "loss": 2.0504, "step": 13724500 }, { "epoch": 39.73, "learning_rate": 3.0143026062893664e-05, "loss": 2.0603, "step": 13725000 }, { "epoch": 39.73, "learning_rate": 3.0142302415246386e-05, "loss": 2.0517, "step": 13725500 }, { "epoch": 39.73, "learning_rate": 3.014157876759911e-05, "loss": 2.066, "step": 13726000 }, { "epoch": 39.73, "learning_rate": 3.0140855119951834e-05, "loss": 2.0545, "step": 13726500 }, { "epoch": 39.73, "learning_rate": 3.0140131472304563e-05, "loss": 2.0828, "step": 13727000 }, { "epoch": 39.74, "learning_rate": 3.0139407824657285e-05, "loss": 2.0596, "step": 13727500 }, { "epoch": 39.74, "learning_rate": 3.01386856243053e-05, "loss": 2.0505, "step": 13728000 }, { "epoch": 39.74, "learning_rate": 3.0137961976658023e-05, "loss": 2.045, "step": 13728500 }, { "epoch": 39.74, "learning_rate": 3.013723832901075e-05, "loss": 2.0467, "step": 13729000 }, { "epoch": 39.74, "learning_rate": 3.013651468136347e-05, "loss": 2.0589, "step": 13729500 }, { "epoch": 39.74, "learning_rate": 3.0135791033716193e-05, "loss": 2.0485, "step": 13730000 }, { "epoch": 39.74, "learning_rate": 3.0135068833364212e-05, "loss": 2.0648, "step": 13730500 }, { "epoch": 39.75, "learning_rate": 3.0134345185716934e-05, "loss": 2.0425, "step": 13731000 }, { "epoch": 39.75, "learning_rate": 3.0133621538069656e-05, "loss": 2.0805, "step": 13731500 }, { "epoch": 39.75, "learning_rate": 3.013289789042238e-05, "loss": 2.0398, "step": 13732000 }, { "epoch": 39.75, "learning_rate": 3.01321742427751e-05, "loss": 2.0479, "step": 13732500 }, { "epoch": 39.75, "learning_rate": 3.0131450595127823e-05, "loss": 2.0431, "step": 13733000 }, { "epoch": 39.75, "learning_rate": 3.0130729842071135e-05, "loss": 2.0371, "step": 13733500 }, { "epoch": 39.75, "learning_rate": 3.013000619442386e-05, "loss": 2.0449, "step": 13734000 }, { "epoch": 39.76, "learning_rate": 3.0129282546776583e-05, "loss": 2.0439, "step": 13734500 }, { "epoch": 39.76, "learning_rate": 3.0128558899129312e-05, "loss": 2.0607, "step": 13735000 }, { "epoch": 39.76, "learning_rate": 3.0127835251482034e-05, "loss": 2.0745, "step": 13735500 }, { "epoch": 39.76, "learning_rate": 3.0127111603834757e-05, "loss": 2.0807, "step": 13736000 }, { "epoch": 39.76, "learning_rate": 3.012638795618748e-05, "loss": 2.049, "step": 13736500 }, { "epoch": 39.76, "learning_rate": 3.0125665755835498e-05, "loss": 2.0525, "step": 13737000 }, { "epoch": 39.76, "learning_rate": 3.012494210818822e-05, "loss": 2.0582, "step": 13737500 }, { "epoch": 39.77, "learning_rate": 3.0124218460540942e-05, "loss": 2.0532, "step": 13738000 }, { "epoch": 39.77, "learning_rate": 3.0123494812893665e-05, "loss": 2.0567, "step": 13738500 }, { "epoch": 39.77, "learning_rate": 3.0122771165246387e-05, "loss": 2.0426, "step": 13739000 }, { "epoch": 39.77, "learning_rate": 3.0122047517599112e-05, "loss": 2.0665, "step": 13739500 }, { "epoch": 39.77, "learning_rate": 3.0121325317247128e-05, "loss": 2.0633, "step": 13740000 }, { "epoch": 39.77, "learning_rate": 3.012060166959985e-05, "loss": 2.0289, "step": 13740500 }, { "epoch": 39.77, "learning_rate": 3.0119878021952576e-05, "loss": 2.0387, "step": 13741000 }, { "epoch": 39.78, "learning_rate": 3.011915582160059e-05, "loss": 2.0418, "step": 13741500 }, { "epoch": 39.78, "learning_rate": 3.0118432173953314e-05, "loss": 2.041, "step": 13742000 }, { "epoch": 39.78, "learning_rate": 3.0117708526306043e-05, "loss": 2.0174, "step": 13742500 }, { "epoch": 39.78, "learning_rate": 3.0116984878658765e-05, "loss": 2.0437, "step": 13743000 }, { "epoch": 39.78, "learning_rate": 3.011626123101149e-05, "loss": 2.0643, "step": 13743500 }, { "epoch": 39.78, "learning_rate": 3.0115537583364213e-05, "loss": 2.0686, "step": 13744000 }, { "epoch": 39.78, "learning_rate": 3.0114813935716935e-05, "loss": 2.0469, "step": 13744500 }, { "epoch": 39.79, "learning_rate": 3.011409173536495e-05, "loss": 2.0761, "step": 13745000 }, { "epoch": 39.79, "learning_rate": 3.0113368087717676e-05, "loss": 2.0775, "step": 13745500 }, { "epoch": 39.79, "learning_rate": 3.01126444400704e-05, "loss": 2.0508, "step": 13746000 }, { "epoch": 39.79, "learning_rate": 3.011192079242312e-05, "loss": 2.0474, "step": 13746500 }, { "epoch": 39.79, "learning_rate": 3.0111197144775843e-05, "loss": 2.0491, "step": 13747000 }, { "epoch": 39.79, "learning_rate": 3.0110473497128565e-05, "loss": 2.0442, "step": 13747500 }, { "epoch": 39.79, "learning_rate": 3.010974984948129e-05, "loss": 2.0625, "step": 13748000 }, { "epoch": 39.8, "learning_rate": 3.0109026201834013e-05, "loss": 2.0459, "step": 13748500 }, { "epoch": 39.8, "learning_rate": 3.010830400148203e-05, "loss": 2.0337, "step": 13749000 }, { "epoch": 39.8, "learning_rate": 3.010758035383475e-05, "loss": 2.0451, "step": 13749500 }, { "epoch": 39.8, "learning_rate": 3.010685670618748e-05, "loss": 2.0486, "step": 13750000 }, { "epoch": 39.8, "learning_rate": 3.0106133058540202e-05, "loss": 2.0596, "step": 13750500 }, { "epoch": 39.8, "learning_rate": 3.0105409410892928e-05, "loss": 2.0416, "step": 13751000 }, { "epoch": 39.8, "learning_rate": 3.010468576324565e-05, "loss": 2.0561, "step": 13751500 }, { "epoch": 39.81, "learning_rate": 3.0103962115598372e-05, "loss": 2.0574, "step": 13752000 }, { "epoch": 39.81, "learning_rate": 3.0103238467951094e-05, "loss": 2.0529, "step": 13752500 }, { "epoch": 39.81, "learning_rate": 3.0102514820303816e-05, "loss": 2.0485, "step": 13753000 }, { "epoch": 39.81, "learning_rate": 3.0101792619951835e-05, "loss": 2.0502, "step": 13753500 }, { "epoch": 39.81, "learning_rate": 3.0101068972304558e-05, "loss": 2.0579, "step": 13754000 }, { "epoch": 39.81, "learning_rate": 3.010034532465728e-05, "loss": 2.0877, "step": 13754500 }, { "epoch": 39.82, "learning_rate": 3.0099621677010002e-05, "loss": 2.0587, "step": 13755000 }, { "epoch": 39.82, "learning_rate": 3.009889947665802e-05, "loss": 2.075, "step": 13755500 }, { "epoch": 39.82, "learning_rate": 3.0098175829010743e-05, "loss": 2.0603, "step": 13756000 }, { "epoch": 39.82, "learning_rate": 3.0097452181363466e-05, "loss": 2.0598, "step": 13756500 }, { "epoch": 39.82, "learning_rate": 3.0096728533716195e-05, "loss": 2.0419, "step": 13757000 }, { "epoch": 39.82, "learning_rate": 3.0096004886068917e-05, "loss": 2.0328, "step": 13757500 }, { "epoch": 39.82, "learning_rate": 3.0095281238421642e-05, "loss": 2.0643, "step": 13758000 }, { "epoch": 39.83, "learning_rate": 3.0094557590774365e-05, "loss": 2.0506, "step": 13758500 }, { "epoch": 39.83, "learning_rate": 3.0093833943127087e-05, "loss": 2.0588, "step": 13759000 }, { "epoch": 39.83, "learning_rate": 3.009311029547981e-05, "loss": 2.037, "step": 13759500 }, { "epoch": 39.83, "learning_rate": 3.009238664783253e-05, "loss": 2.0431, "step": 13760000 }, { "epoch": 39.83, "learning_rate": 3.0091663000185254e-05, "loss": 2.0282, "step": 13760500 }, { "epoch": 39.83, "learning_rate": 3.009093935253798e-05, "loss": 2.052, "step": 13761000 }, { "epoch": 39.83, "learning_rate": 3.0090217152185995e-05, "loss": 2.0635, "step": 13761500 }, { "epoch": 39.84, "learning_rate": 3.0089494951834014e-05, "loss": 2.078, "step": 13762000 }, { "epoch": 39.84, "learning_rate": 3.0088771304186736e-05, "loss": 2.0675, "step": 13762500 }, { "epoch": 39.84, "learning_rate": 3.0088047656539458e-05, "loss": 2.0581, "step": 13763000 }, { "epoch": 39.84, "learning_rate": 3.008732400889218e-05, "loss": 2.0869, "step": 13763500 }, { "epoch": 39.84, "learning_rate": 3.0086600361244906e-05, "loss": 2.0296, "step": 13764000 }, { "epoch": 39.84, "learning_rate": 3.008587671359763e-05, "loss": 2.0463, "step": 13764500 }, { "epoch": 39.84, "learning_rate": 3.0085153065950354e-05, "loss": 2.0711, "step": 13765000 }, { "epoch": 39.85, "learning_rate": 3.008442941830308e-05, "loss": 2.0339, "step": 13765500 }, { "epoch": 39.85, "learning_rate": 3.0083707217951095e-05, "loss": 2.0474, "step": 13766000 }, { "epoch": 39.85, "learning_rate": 3.0082983570303817e-05, "loss": 2.0473, "step": 13766500 }, { "epoch": 39.85, "learning_rate": 3.0082259922656543e-05, "loss": 2.0689, "step": 13767000 }, { "epoch": 39.85, "learning_rate": 3.0081536275009265e-05, "loss": 2.0479, "step": 13767500 }, { "epoch": 39.85, "learning_rate": 3.0080812627361987e-05, "loss": 2.0298, "step": 13768000 }, { "epoch": 39.85, "learning_rate": 3.008008897971471e-05, "loss": 2.0623, "step": 13768500 }, { "epoch": 39.86, "learning_rate": 3.0079365332067432e-05, "loss": 2.0611, "step": 13769000 }, { "epoch": 39.86, "learning_rate": 3.0078641684420154e-05, "loss": 2.0763, "step": 13769500 }, { "epoch": 39.86, "learning_rate": 3.0077919484068173e-05, "loss": 2.0557, "step": 13770000 }, { "epoch": 39.86, "learning_rate": 3.0077197283716192e-05, "loss": 2.0324, "step": 13770500 }, { "epoch": 39.86, "learning_rate": 3.0076473636068914e-05, "loss": 2.0462, "step": 13771000 }, { "epoch": 39.86, "learning_rate": 3.0075749988421636e-05, "loss": 2.0435, "step": 13771500 }, { "epoch": 39.86, "learning_rate": 3.0075026340774365e-05, "loss": 2.0594, "step": 13772000 }, { "epoch": 39.87, "learning_rate": 3.0074302693127088e-05, "loss": 2.0472, "step": 13772500 }, { "epoch": 39.87, "learning_rate": 3.007357904547981e-05, "loss": 2.0491, "step": 13773000 }, { "epoch": 39.87, "learning_rate": 3.007285684512783e-05, "loss": 2.06, "step": 13773500 }, { "epoch": 39.87, "learning_rate": 3.007213319748055e-05, "loss": 2.0596, "step": 13774000 }, { "epoch": 39.87, "learning_rate": 3.0071409549833273e-05, "loss": 2.0684, "step": 13774500 }, { "epoch": 39.87, "learning_rate": 3.0070685902185996e-05, "loss": 2.0829, "step": 13775000 }, { "epoch": 39.87, "learning_rate": 3.0069962254538718e-05, "loss": 2.0599, "step": 13775500 }, { "epoch": 39.88, "learning_rate": 3.0069238606891443e-05, "loss": 2.0512, "step": 13776000 }, { "epoch": 39.88, "learning_rate": 3.006851640653946e-05, "loss": 2.0426, "step": 13776500 }, { "epoch": 39.88, "learning_rate": 3.006779275889218e-05, "loss": 2.034, "step": 13777000 }, { "epoch": 39.88, "learning_rate": 3.0067069111244907e-05, "loss": 2.0528, "step": 13777500 }, { "epoch": 39.88, "learning_rate": 3.006634546359763e-05, "loss": 2.0671, "step": 13778000 }, { "epoch": 39.88, "learning_rate": 3.0065623263245645e-05, "loss": 2.0662, "step": 13778500 }, { "epoch": 39.88, "learning_rate": 3.006489961559837e-05, "loss": 2.0341, "step": 13779000 }, { "epoch": 39.89, "learning_rate": 3.0064175967951096e-05, "loss": 2.0566, "step": 13779500 }, { "epoch": 39.89, "learning_rate": 3.006345232030382e-05, "loss": 2.0755, "step": 13780000 }, { "epoch": 39.89, "learning_rate": 3.0062730119951837e-05, "loss": 2.0419, "step": 13780500 }, { "epoch": 39.89, "learning_rate": 3.006200647230456e-05, "loss": 2.0668, "step": 13781000 }, { "epoch": 39.89, "learning_rate": 3.006128282465728e-05, "loss": 2.0723, "step": 13781500 }, { "epoch": 39.89, "learning_rate": 3.0060559177010007e-05, "loss": 2.0432, "step": 13782000 }, { "epoch": 39.89, "learning_rate": 3.005983552936273e-05, "loss": 2.0508, "step": 13782500 }, { "epoch": 39.9, "learning_rate": 3.005911188171545e-05, "loss": 2.0564, "step": 13783000 }, { "epoch": 39.9, "learning_rate": 3.005838968136347e-05, "loss": 2.0695, "step": 13783500 }, { "epoch": 39.9, "learning_rate": 3.0057666033716193e-05, "loss": 2.0339, "step": 13784000 }, { "epoch": 39.9, "learning_rate": 3.0056942386068915e-05, "loss": 2.0665, "step": 13784500 }, { "epoch": 39.9, "learning_rate": 3.0056218738421637e-05, "loss": 2.0369, "step": 13785000 }, { "epoch": 39.9, "learning_rate": 3.005549509077436e-05, "loss": 2.081, "step": 13785500 }, { "epoch": 39.9, "learning_rate": 3.0054771443127082e-05, "loss": 2.0483, "step": 13786000 }, { "epoch": 39.91, "learning_rate": 3.00540492427751e-05, "loss": 2.0534, "step": 13786500 }, { "epoch": 39.91, "learning_rate": 3.005332559512783e-05, "loss": 2.0654, "step": 13787000 }, { "epoch": 39.91, "learning_rate": 3.0052601947480552e-05, "loss": 2.056, "step": 13787500 }, { "epoch": 39.91, "learning_rate": 3.0051878299833274e-05, "loss": 2.0607, "step": 13788000 }, { "epoch": 39.91, "learning_rate": 3.0051154652185996e-05, "loss": 2.0395, "step": 13788500 }, { "epoch": 39.91, "learning_rate": 3.0050431004538722e-05, "loss": 2.057, "step": 13789000 }, { "epoch": 39.91, "learning_rate": 3.0049707356891444e-05, "loss": 2.056, "step": 13789500 }, { "epoch": 39.92, "learning_rate": 3.0048983709244166e-05, "loss": 2.0287, "step": 13790000 }, { "epoch": 39.92, "learning_rate": 3.0048261508892185e-05, "loss": 2.0396, "step": 13790500 }, { "epoch": 39.92, "learning_rate": 3.0047537861244908e-05, "loss": 2.0423, "step": 13791000 }, { "epoch": 39.92, "learning_rate": 3.004681421359763e-05, "loss": 2.0727, "step": 13791500 }, { "epoch": 39.92, "learning_rate": 3.0046090565950352e-05, "loss": 2.031, "step": 13792000 }, { "epoch": 39.92, "learning_rate": 3.0045366918303074e-05, "loss": 2.0786, "step": 13792500 }, { "epoch": 39.93, "learning_rate": 3.0044643270655797e-05, "loss": 2.0731, "step": 13793000 }, { "epoch": 39.93, "learning_rate": 3.0043921070303815e-05, "loss": 2.0495, "step": 13793500 }, { "epoch": 39.93, "learning_rate": 3.0043197422656538e-05, "loss": 2.0576, "step": 13794000 }, { "epoch": 39.93, "learning_rate": 3.004247522230456e-05, "loss": 2.0412, "step": 13794500 }, { "epoch": 39.93, "learning_rate": 3.0041751574657286e-05, "loss": 2.0529, "step": 13795000 }, { "epoch": 39.93, "learning_rate": 3.0041027927010008e-05, "loss": 2.0499, "step": 13795500 }, { "epoch": 39.93, "learning_rate": 3.004030427936273e-05, "loss": 2.0686, "step": 13796000 }, { "epoch": 39.94, "learning_rate": 3.0039580631715452e-05, "loss": 2.0635, "step": 13796500 }, { "epoch": 39.94, "learning_rate": 3.0038856984068175e-05, "loss": 2.0713, "step": 13797000 }, { "epoch": 39.94, "learning_rate": 3.0038133336420897e-05, "loss": 2.048, "step": 13797500 }, { "epoch": 39.94, "learning_rate": 3.0037409688773622e-05, "loss": 2.0541, "step": 13798000 }, { "epoch": 39.94, "learning_rate": 3.0036687488421638e-05, "loss": 2.0624, "step": 13798500 }, { "epoch": 39.94, "learning_rate": 3.003596384077436e-05, "loss": 2.0601, "step": 13799000 }, { "epoch": 39.94, "learning_rate": 3.0035240193127086e-05, "loss": 2.0536, "step": 13799500 }, { "epoch": 39.95, "learning_rate": 3.00345179927751e-05, "loss": 2.0662, "step": 13800000 }, { "epoch": 39.95, "learning_rate": 3.0033794345127824e-05, "loss": 2.0678, "step": 13800500 }, { "epoch": 39.95, "learning_rate": 3.0033070697480546e-05, "loss": 2.0624, "step": 13801000 }, { "epoch": 39.95, "learning_rate": 3.003234704983327e-05, "loss": 2.0618, "step": 13801500 }, { "epoch": 39.95, "learning_rate": 3.0031623402185997e-05, "loss": 2.0309, "step": 13802000 }, { "epoch": 39.95, "learning_rate": 3.0030899754538723e-05, "loss": 2.0441, "step": 13802500 }, { "epoch": 39.95, "learning_rate": 3.0030176106891445e-05, "loss": 2.0305, "step": 13803000 }, { "epoch": 39.96, "learning_rate": 3.0029452459244167e-05, "loss": 2.0778, "step": 13803500 }, { "epoch": 39.96, "learning_rate": 3.002872881159689e-05, "loss": 2.0494, "step": 13804000 }, { "epoch": 39.96, "learning_rate": 3.0028005163949612e-05, "loss": 2.025, "step": 13804500 }, { "epoch": 39.96, "learning_rate": 3.002728296359763e-05, "loss": 2.0497, "step": 13805000 }, { "epoch": 39.96, "learning_rate": 3.0026559315950353e-05, "loss": 2.0632, "step": 13805500 }, { "epoch": 39.96, "learning_rate": 3.0025837115598372e-05, "loss": 2.0593, "step": 13806000 }, { "epoch": 39.96, "learning_rate": 3.0025113467951094e-05, "loss": 2.0818, "step": 13806500 }, { "epoch": 39.97, "learning_rate": 3.0024389820303816e-05, "loss": 2.0342, "step": 13807000 }, { "epoch": 39.97, "learning_rate": 3.0023667619951835e-05, "loss": 2.0495, "step": 13807500 }, { "epoch": 39.97, "learning_rate": 3.0022943972304557e-05, "loss": 2.0523, "step": 13808000 }, { "epoch": 39.97, "learning_rate": 3.002222032465728e-05, "loss": 2.0411, "step": 13808500 }, { "epoch": 39.97, "learning_rate": 3.0021496677010002e-05, "loss": 2.0529, "step": 13809000 }, { "epoch": 39.97, "learning_rate": 3.002077302936273e-05, "loss": 2.047, "step": 13809500 }, { "epoch": 39.97, "learning_rate": 3.0020049381715453e-05, "loss": 2.036, "step": 13810000 }, { "epoch": 39.98, "learning_rate": 3.0019325734068175e-05, "loss": 2.0671, "step": 13810500 }, { "epoch": 39.98, "learning_rate": 3.00186020864209e-05, "loss": 2.0604, "step": 13811000 }, { "epoch": 39.98, "learning_rate": 3.0017878438773623e-05, "loss": 2.0671, "step": 13811500 }, { "epoch": 39.98, "learning_rate": 3.0017154791126345e-05, "loss": 2.0569, "step": 13812000 }, { "epoch": 39.98, "learning_rate": 3.001643259077436e-05, "loss": 2.0469, "step": 13812500 }, { "epoch": 39.98, "learning_rate": 3.001571039042238e-05, "loss": 2.0436, "step": 13813000 }, { "epoch": 39.98, "learning_rate": 3.0014986742775102e-05, "loss": 2.0437, "step": 13813500 }, { "epoch": 39.99, "learning_rate": 3.0014263095127824e-05, "loss": 2.0407, "step": 13814000 }, { "epoch": 39.99, "learning_rate": 3.001353944748055e-05, "loss": 2.0631, "step": 13814500 }, { "epoch": 39.99, "learning_rate": 3.0012815799833272e-05, "loss": 2.0607, "step": 13815000 }, { "epoch": 39.99, "learning_rate": 3.0012092152185995e-05, "loss": 2.0498, "step": 13815500 }, { "epoch": 39.99, "learning_rate": 3.0011368504538717e-05, "loss": 2.062, "step": 13816000 }, { "epoch": 39.99, "learning_rate": 3.001064485689144e-05, "loss": 2.0513, "step": 13816500 }, { "epoch": 39.99, "learning_rate": 3.0009922656539465e-05, "loss": 2.0367, "step": 13817000 }, { "epoch": 40.0, "learning_rate": 3.0009199008892187e-05, "loss": 2.0887, "step": 13817500 }, { "epoch": 40.0, "learning_rate": 3.000847536124491e-05, "loss": 2.0401, "step": 13818000 }, { "epoch": 40.0, "learning_rate": 3.000775171359763e-05, "loss": 2.0678, "step": 13818500 }, { "epoch": 40.0, "eval_accuracy": 0.6710043969127615, "eval_accuracy_mlm": 0.6363494969737239, "eval_accuracy_nsp": 0.8568583776748109, "eval_loss": 2.165112018585205, "eval_runtime": 331.1724, "eval_samples_per_second": 1317.7, "eval_steps_per_second": 54.905, "step": 13818880 }, { "epoch": 40.0, "learning_rate": 3.0007028065950354e-05, "loss": 2.0527, "step": 13819000 }, { "epoch": 40.0, "learning_rate": 3.0006304418303076e-05, "loss": 2.0217, "step": 13819500 }, { "epoch": 40.0, "learning_rate": 3.00055807706558e-05, "loss": 2.0087, "step": 13820000 }, { "epoch": 40.0, "learning_rate": 3.0004857123008524e-05, "loss": 2.0424, "step": 13820500 }, { "epoch": 40.01, "learning_rate": 3.0004133475361246e-05, "loss": 2.0629, "step": 13821000 }, { "epoch": 40.01, "learning_rate": 3.0003409827713968e-05, "loss": 2.0474, "step": 13821500 }, { "epoch": 40.01, "learning_rate": 3.000268618006669e-05, "loss": 2.0426, "step": 13822000 }, { "epoch": 40.01, "learning_rate": 3.0001962532419413e-05, "loss": 2.0398, "step": 13822500 }, { "epoch": 40.01, "learning_rate": 3.000123888477214e-05, "loss": 2.0361, "step": 13823000 }, { "epoch": 40.01, "learning_rate": 3.0000516684420154e-05, "loss": 2.0254, "step": 13823500 }, { "epoch": 40.01, "learning_rate": 2.9999793036772883e-05, "loss": 2.0243, "step": 13824000 }, { "epoch": 40.02, "learning_rate": 2.9999069389125605e-05, "loss": 2.0572, "step": 13824500 }, { "epoch": 40.02, "learning_rate": 2.9998345741478327e-05, "loss": 2.0565, "step": 13825000 }, { "epoch": 40.02, "learning_rate": 2.9997623541126346e-05, "loss": 2.0421, "step": 13825500 }, { "epoch": 40.02, "learning_rate": 2.999689989347907e-05, "loss": 2.0447, "step": 13826000 }, { "epoch": 40.02, "learning_rate": 2.999617624583179e-05, "loss": 2.0275, "step": 13826500 }, { "epoch": 40.02, "learning_rate": 2.9995452598184516e-05, "loss": 2.0414, "step": 13827000 }, { "epoch": 40.02, "learning_rate": 2.999472895053724e-05, "loss": 2.0275, "step": 13827500 }, { "epoch": 40.03, "learning_rate": 2.999400530288996e-05, "loss": 2.0679, "step": 13828000 }, { "epoch": 40.03, "learning_rate": 2.9993283102537976e-05, "loss": 2.0333, "step": 13828500 }, { "epoch": 40.03, "learning_rate": 2.9992559454890702e-05, "loss": 2.0289, "step": 13829000 }, { "epoch": 40.03, "learning_rate": 2.9991835807243424e-05, "loss": 2.0394, "step": 13829500 }, { "epoch": 40.03, "learning_rate": 2.9991112159596146e-05, "loss": 2.0503, "step": 13830000 }, { "epoch": 40.03, "learning_rate": 2.999038851194887e-05, "loss": 2.0426, "step": 13830500 }, { "epoch": 40.04, "learning_rate": 2.998966486430159e-05, "loss": 2.0397, "step": 13831000 }, { "epoch": 40.04, "learning_rate": 2.9988942663949617e-05, "loss": 2.0423, "step": 13831500 }, { "epoch": 40.04, "learning_rate": 2.998821901630234e-05, "loss": 2.0345, "step": 13832000 }, { "epoch": 40.04, "learning_rate": 2.998749536865506e-05, "loss": 2.04, "step": 13832500 }, { "epoch": 40.04, "learning_rate": 2.9986771721007783e-05, "loss": 2.051, "step": 13833000 }, { "epoch": 40.04, "learning_rate": 2.9986048073360506e-05, "loss": 2.0393, "step": 13833500 }, { "epoch": 40.04, "learning_rate": 2.9985324425713228e-05, "loss": 2.0331, "step": 13834000 }, { "epoch": 40.05, "learning_rate": 2.9984600778065953e-05, "loss": 2.0446, "step": 13834500 }, { "epoch": 40.05, "learning_rate": 2.9983877130418676e-05, "loss": 2.0422, "step": 13835000 }, { "epoch": 40.05, "learning_rate": 2.9983153482771398e-05, "loss": 2.0339, "step": 13835500 }, { "epoch": 40.05, "learning_rate": 2.998242983512412e-05, "loss": 2.0355, "step": 13836000 }, { "epoch": 40.05, "learning_rate": 2.9981706187476842e-05, "loss": 2.0386, "step": 13836500 }, { "epoch": 40.05, "learning_rate": 2.9980985434420155e-05, "loss": 2.0141, "step": 13837000 }, { "epoch": 40.05, "learning_rate": 2.9980261786772877e-05, "loss": 2.0345, "step": 13837500 }, { "epoch": 40.06, "learning_rate": 2.9979538139125603e-05, "loss": 2.0358, "step": 13838000 }, { "epoch": 40.06, "learning_rate": 2.9978814491478325e-05, "loss": 2.0317, "step": 13838500 }, { "epoch": 40.06, "learning_rate": 2.9978090843831054e-05, "loss": 2.0342, "step": 13839000 }, { "epoch": 40.06, "learning_rate": 2.9977367196183776e-05, "loss": 2.0342, "step": 13839500 }, { "epoch": 40.06, "learning_rate": 2.9976643548536498e-05, "loss": 2.0355, "step": 13840000 }, { "epoch": 40.06, "learning_rate": 2.9975921348184517e-05, "loss": 2.025, "step": 13840500 }, { "epoch": 40.06, "learning_rate": 2.997519770053724e-05, "loss": 2.0298, "step": 13841000 }, { "epoch": 40.07, "learning_rate": 2.997447405288996e-05, "loss": 2.0214, "step": 13841500 }, { "epoch": 40.07, "learning_rate": 2.9973750405242684e-05, "loss": 2.0446, "step": 13842000 }, { "epoch": 40.07, "learning_rate": 2.9973026757595406e-05, "loss": 2.0383, "step": 13842500 }, { "epoch": 40.07, "learning_rate": 2.997230310994813e-05, "loss": 2.0285, "step": 13843000 }, { "epoch": 40.07, "learning_rate": 2.9971579462300854e-05, "loss": 2.0569, "step": 13843500 }, { "epoch": 40.07, "learning_rate": 2.9970855814653576e-05, "loss": 2.0497, "step": 13844000 }, { "epoch": 40.07, "learning_rate": 2.9970133614301592e-05, "loss": 2.0273, "step": 13844500 }, { "epoch": 40.08, "learning_rate": 2.9969409966654317e-05, "loss": 2.0547, "step": 13845000 }, { "epoch": 40.08, "learning_rate": 2.996868631900704e-05, "loss": 2.0409, "step": 13845500 }, { "epoch": 40.08, "learning_rate": 2.9967962671359762e-05, "loss": 2.0245, "step": 13846000 }, { "epoch": 40.08, "learning_rate": 2.996723902371249e-05, "loss": 2.0499, "step": 13846500 }, { "epoch": 40.08, "learning_rate": 2.9966516823360506e-05, "loss": 2.0109, "step": 13847000 }, { "epoch": 40.08, "learning_rate": 2.9965794623008525e-05, "loss": 2.0434, "step": 13847500 }, { "epoch": 40.08, "learning_rate": 2.9965070975361248e-05, "loss": 2.036, "step": 13848000 }, { "epoch": 40.09, "learning_rate": 2.996434732771397e-05, "loss": 2.0232, "step": 13848500 }, { "epoch": 40.09, "learning_rate": 2.9963623680066692e-05, "loss": 2.0497, "step": 13849000 }, { "epoch": 40.09, "learning_rate": 2.9962900032419418e-05, "loss": 2.041, "step": 13849500 }, { "epoch": 40.09, "learning_rate": 2.996217638477214e-05, "loss": 2.038, "step": 13850000 }, { "epoch": 40.09, "learning_rate": 2.9961452737124862e-05, "loss": 2.0471, "step": 13850500 }, { "epoch": 40.09, "learning_rate": 2.9960729089477584e-05, "loss": 2.0379, "step": 13851000 }, { "epoch": 40.09, "learning_rate": 2.9960005441830307e-05, "loss": 2.0328, "step": 13851500 }, { "epoch": 40.1, "learning_rate": 2.9959283241478326e-05, "loss": 2.0325, "step": 13852000 }, { "epoch": 40.1, "learning_rate": 2.9958559593831048e-05, "loss": 2.0528, "step": 13852500 }, { "epoch": 40.1, "learning_rate": 2.995783594618377e-05, "loss": 2.0208, "step": 13853000 }, { "epoch": 40.1, "learning_rate": 2.9957112298536492e-05, "loss": 2.0328, "step": 13853500 }, { "epoch": 40.1, "learning_rate": 2.995638865088922e-05, "loss": 2.0698, "step": 13854000 }, { "epoch": 40.1, "learning_rate": 2.9955665003241943e-05, "loss": 2.0506, "step": 13854500 }, { "epoch": 40.1, "learning_rate": 2.9954942802889962e-05, "loss": 2.0308, "step": 13855000 }, { "epoch": 40.11, "learning_rate": 2.9954219155242685e-05, "loss": 2.0576, "step": 13855500 }, { "epoch": 40.11, "learning_rate": 2.9953495507595407e-05, "loss": 2.0622, "step": 13856000 }, { "epoch": 40.11, "learning_rate": 2.9952771859948133e-05, "loss": 2.0524, "step": 13856500 }, { "epoch": 40.11, "learning_rate": 2.9952048212300855e-05, "loss": 2.04, "step": 13857000 }, { "epoch": 40.11, "learning_rate": 2.9951324564653577e-05, "loss": 2.0509, "step": 13857500 }, { "epoch": 40.11, "learning_rate": 2.99506009170063e-05, "loss": 2.0165, "step": 13858000 }, { "epoch": 40.11, "learning_rate": 2.994987726935902e-05, "loss": 2.0342, "step": 13858500 }, { "epoch": 40.12, "learning_rate": 2.9949153621711744e-05, "loss": 2.0416, "step": 13859000 }, { "epoch": 40.12, "learning_rate": 2.994842997406447e-05, "loss": 2.069, "step": 13859500 }, { "epoch": 40.12, "learning_rate": 2.994770632641719e-05, "loss": 2.0275, "step": 13860000 }, { "epoch": 40.12, "learning_rate": 2.9946982678769914e-05, "loss": 2.0371, "step": 13860500 }, { "epoch": 40.12, "learning_rate": 2.9946259031122643e-05, "loss": 2.039, "step": 13861000 }, { "epoch": 40.12, "learning_rate": 2.994553683077066e-05, "loss": 2.0111, "step": 13861500 }, { "epoch": 40.12, "learning_rate": 2.9944813183123384e-05, "loss": 2.0319, "step": 13862000 }, { "epoch": 40.13, "learning_rate": 2.9944089535476106e-05, "loss": 2.0485, "step": 13862500 }, { "epoch": 40.13, "learning_rate": 2.994336588782883e-05, "loss": 2.0401, "step": 13863000 }, { "epoch": 40.13, "learning_rate": 2.9942643687476844e-05, "loss": 2.0641, "step": 13863500 }, { "epoch": 40.13, "learning_rate": 2.9941921487124863e-05, "loss": 2.033, "step": 13864000 }, { "epoch": 40.13, "learning_rate": 2.9941197839477585e-05, "loss": 2.033, "step": 13864500 }, { "epoch": 40.13, "learning_rate": 2.9940474191830307e-05, "loss": 2.0291, "step": 13865000 }, { "epoch": 40.13, "learning_rate": 2.9939750544183033e-05, "loss": 2.0603, "step": 13865500 }, { "epoch": 40.14, "learning_rate": 2.9939026896535755e-05, "loss": 2.0244, "step": 13866000 }, { "epoch": 40.14, "learning_rate": 2.9938303248888477e-05, "loss": 2.0186, "step": 13866500 }, { "epoch": 40.14, "learning_rate": 2.99375796012412e-05, "loss": 2.0392, "step": 13867000 }, { "epoch": 40.14, "learning_rate": 2.9936855953593922e-05, "loss": 2.0516, "step": 13867500 }, { "epoch": 40.14, "learning_rate": 2.9936132305946644e-05, "loss": 2.0324, "step": 13868000 }, { "epoch": 40.14, "learning_rate": 2.9935410105594663e-05, "loss": 2.0576, "step": 13868500 }, { "epoch": 40.15, "learning_rate": 2.9934686457947392e-05, "loss": 2.0459, "step": 13869000 }, { "epoch": 40.15, "learning_rate": 2.9933962810300114e-05, "loss": 2.0327, "step": 13869500 }, { "epoch": 40.15, "learning_rate": 2.9933239162652837e-05, "loss": 2.0082, "step": 13870000 }, { "epoch": 40.15, "learning_rate": 2.993251551500556e-05, "loss": 2.0471, "step": 13870500 }, { "epoch": 40.15, "learning_rate": 2.9931791867358284e-05, "loss": 2.0438, "step": 13871000 }, { "epoch": 40.15, "learning_rate": 2.9931068219711007e-05, "loss": 2.0501, "step": 13871500 }, { "epoch": 40.15, "learning_rate": 2.993034457206373e-05, "loss": 2.0472, "step": 13872000 }, { "epoch": 40.16, "learning_rate": 2.992962092441645e-05, "loss": 2.0366, "step": 13872500 }, { "epoch": 40.16, "learning_rate": 2.9928897276769173e-05, "loss": 2.0474, "step": 13873000 }, { "epoch": 40.16, "learning_rate": 2.9928173629121896e-05, "loss": 2.042, "step": 13873500 }, { "epoch": 40.16, "learning_rate": 2.9927451428769915e-05, "loss": 2.043, "step": 13874000 }, { "epoch": 40.16, "learning_rate": 2.9926727781122637e-05, "loss": 2.0434, "step": 13874500 }, { "epoch": 40.16, "learning_rate": 2.992600413347536e-05, "loss": 2.0715, "step": 13875000 }, { "epoch": 40.16, "learning_rate": 2.9925280485828085e-05, "loss": 2.0344, "step": 13875500 }, { "epoch": 40.17, "learning_rate": 2.9924558285476107e-05, "loss": 2.0285, "step": 13876000 }, { "epoch": 40.17, "learning_rate": 2.992383463782883e-05, "loss": 2.0485, "step": 13876500 }, { "epoch": 40.17, "learning_rate": 2.992311099018155e-05, "loss": 2.0417, "step": 13877000 }, { "epoch": 40.17, "learning_rate": 2.9922387342534274e-05, "loss": 2.0342, "step": 13877500 }, { "epoch": 40.17, "learning_rate": 2.9921663694887e-05, "loss": 2.0303, "step": 13878000 }, { "epoch": 40.17, "learning_rate": 2.992094004723972e-05, "loss": 2.0356, "step": 13878500 }, { "epoch": 40.17, "learning_rate": 2.9920216399592444e-05, "loss": 2.0473, "step": 13879000 }, { "epoch": 40.18, "learning_rate": 2.991949419924046e-05, "loss": 2.052, "step": 13879500 }, { "epoch": 40.18, "learning_rate": 2.9918770551593185e-05, "loss": 2.0492, "step": 13880000 }, { "epoch": 40.18, "learning_rate": 2.9918046903945907e-05, "loss": 2.0321, "step": 13880500 }, { "epoch": 40.18, "learning_rate": 2.991732325629863e-05, "loss": 2.0342, "step": 13881000 }, { "epoch": 40.18, "learning_rate": 2.991659960865135e-05, "loss": 2.0518, "step": 13881500 }, { "epoch": 40.18, "learning_rate": 2.991587740829937e-05, "loss": 2.0505, "step": 13882000 }, { "epoch": 40.18, "learning_rate": 2.9915153760652093e-05, "loss": 2.0375, "step": 13882500 }, { "epoch": 40.19, "learning_rate": 2.9914431560300112e-05, "loss": 2.049, "step": 13883000 }, { "epoch": 40.19, "learning_rate": 2.9913707912652837e-05, "loss": 2.047, "step": 13883500 }, { "epoch": 40.19, "learning_rate": 2.9912985712300856e-05, "loss": 2.0709, "step": 13884000 }, { "epoch": 40.19, "learning_rate": 2.991226206465358e-05, "loss": 2.0439, "step": 13884500 }, { "epoch": 40.19, "learning_rate": 2.99115384170063e-05, "loss": 2.0545, "step": 13885000 }, { "epoch": 40.19, "learning_rate": 2.991081621665432e-05, "loss": 2.0536, "step": 13885500 }, { "epoch": 40.19, "learning_rate": 2.9910092569007042e-05, "loss": 2.0365, "step": 13886000 }, { "epoch": 40.2, "learning_rate": 2.990937036865506e-05, "loss": 2.0235, "step": 13886500 }, { "epoch": 40.2, "learning_rate": 2.9908646721007783e-05, "loss": 2.0291, "step": 13887000 }, { "epoch": 40.2, "learning_rate": 2.9907923073360505e-05, "loss": 2.0407, "step": 13887500 }, { "epoch": 40.2, "learning_rate": 2.9907199425713228e-05, "loss": 2.031, "step": 13888000 }, { "epoch": 40.2, "learning_rate": 2.990647577806595e-05, "loss": 2.0425, "step": 13888500 }, { "epoch": 40.2, "learning_rate": 2.9905752130418675e-05, "loss": 2.0378, "step": 13889000 }, { "epoch": 40.2, "learning_rate": 2.9905028482771398e-05, "loss": 2.0221, "step": 13889500 }, { "epoch": 40.21, "learning_rate": 2.990430483512412e-05, "loss": 2.0064, "step": 13890000 }, { "epoch": 40.21, "learning_rate": 2.9903581187476842e-05, "loss": 2.0707, "step": 13890500 }, { "epoch": 40.21, "learning_rate": 2.9902857539829564e-05, "loss": 2.037, "step": 13891000 }, { "epoch": 40.21, "learning_rate": 2.9902133892182293e-05, "loss": 2.054, "step": 13891500 }, { "epoch": 40.21, "learning_rate": 2.9901410244535016e-05, "loss": 2.0303, "step": 13892000 }, { "epoch": 40.21, "learning_rate": 2.9900686596887738e-05, "loss": 2.042, "step": 13892500 }, { "epoch": 40.21, "learning_rate": 2.9899962949240464e-05, "loss": 2.0267, "step": 13893000 }, { "epoch": 40.22, "learning_rate": 2.9899239301593186e-05, "loss": 2.0286, "step": 13893500 }, { "epoch": 40.22, "learning_rate": 2.98985171012412e-05, "loss": 2.025, "step": 13894000 }, { "epoch": 40.22, "learning_rate": 2.9897793453593927e-05, "loss": 2.0414, "step": 13894500 }, { "epoch": 40.22, "learning_rate": 2.989706980594665e-05, "loss": 2.0383, "step": 13895000 }, { "epoch": 40.22, "learning_rate": 2.989634615829937e-05, "loss": 2.0415, "step": 13895500 }, { "epoch": 40.22, "learning_rate": 2.9895622510652094e-05, "loss": 2.023, "step": 13896000 }, { "epoch": 40.22, "learning_rate": 2.9894898863004816e-05, "loss": 2.0276, "step": 13896500 }, { "epoch": 40.23, "learning_rate": 2.9894175215357538e-05, "loss": 2.0097, "step": 13897000 }, { "epoch": 40.23, "learning_rate": 2.9893451567710264e-05, "loss": 2.0544, "step": 13897500 }, { "epoch": 40.23, "learning_rate": 2.989272936735828e-05, "loss": 2.0377, "step": 13898000 }, { "epoch": 40.23, "learning_rate": 2.9892005719711008e-05, "loss": 2.0261, "step": 13898500 }, { "epoch": 40.23, "learning_rate": 2.9891283519359027e-05, "loss": 2.0386, "step": 13899000 }, { "epoch": 40.23, "learning_rate": 2.9890561319007043e-05, "loss": 2.0374, "step": 13899500 }, { "epoch": 40.23, "learning_rate": 2.9889837671359765e-05, "loss": 2.0386, "step": 13900000 }, { "epoch": 40.24, "learning_rate": 2.9889114023712487e-05, "loss": 2.0265, "step": 13900500 }, { "epoch": 40.24, "learning_rate": 2.9888390376065213e-05, "loss": 2.0637, "step": 13901000 }, { "epoch": 40.24, "learning_rate": 2.9887666728417935e-05, "loss": 2.0485, "step": 13901500 }, { "epoch": 40.24, "learning_rate": 2.9886943080770657e-05, "loss": 2.0324, "step": 13902000 }, { "epoch": 40.24, "learning_rate": 2.9886220880418676e-05, "loss": 2.0385, "step": 13902500 }, { "epoch": 40.24, "learning_rate": 2.98854972327714e-05, "loss": 2.0355, "step": 13903000 }, { "epoch": 40.24, "learning_rate": 2.988477358512412e-05, "loss": 2.0687, "step": 13903500 }, { "epoch": 40.25, "learning_rate": 2.988405138477214e-05, "loss": 2.0489, "step": 13904000 }, { "epoch": 40.25, "learning_rate": 2.9883327737124862e-05, "loss": 2.074, "step": 13904500 }, { "epoch": 40.25, "learning_rate": 2.9882604089477584e-05, "loss": 2.0613, "step": 13905000 }, { "epoch": 40.25, "learning_rate": 2.9881880441830306e-05, "loss": 2.0467, "step": 13905500 }, { "epoch": 40.25, "learning_rate": 2.988115679418303e-05, "loss": 2.04, "step": 13906000 }, { "epoch": 40.25, "learning_rate": 2.9880433146535758e-05, "loss": 2.0118, "step": 13906500 }, { "epoch": 40.26, "learning_rate": 2.9879710946183777e-05, "loss": 2.0266, "step": 13907000 }, { "epoch": 40.26, "learning_rate": 2.98789872985365e-05, "loss": 2.0602, "step": 13907500 }, { "epoch": 40.26, "learning_rate": 2.987826365088922e-05, "loss": 2.0247, "step": 13908000 }, { "epoch": 40.26, "learning_rate": 2.9877540003241943e-05, "loss": 2.0423, "step": 13908500 }, { "epoch": 40.26, "learning_rate": 2.9876816355594666e-05, "loss": 2.0727, "step": 13909000 }, { "epoch": 40.26, "learning_rate": 2.987609270794739e-05, "loss": 2.0259, "step": 13909500 }, { "epoch": 40.26, "learning_rate": 2.9875369060300113e-05, "loss": 2.015, "step": 13910000 }, { "epoch": 40.27, "learning_rate": 2.9874645412652836e-05, "loss": 2.0258, "step": 13910500 }, { "epoch": 40.27, "learning_rate": 2.9873921765005558e-05, "loss": 2.0549, "step": 13911000 }, { "epoch": 40.27, "learning_rate": 2.987319811735828e-05, "loss": 2.0636, "step": 13911500 }, { "epoch": 40.27, "learning_rate": 2.9872474469711002e-05, "loss": 2.0249, "step": 13912000 }, { "epoch": 40.27, "learning_rate": 2.9871750822063728e-05, "loss": 2.0515, "step": 13912500 }, { "epoch": 40.27, "learning_rate": 2.987102717441645e-05, "loss": 2.0199, "step": 13913000 }, { "epoch": 40.27, "learning_rate": 2.987030352676918e-05, "loss": 2.0123, "step": 13913500 }, { "epoch": 40.28, "learning_rate": 2.98695798791219e-05, "loss": 2.038, "step": 13914000 }, { "epoch": 40.28, "learning_rate": 2.9868856231474624e-05, "loss": 2.0682, "step": 13914500 }, { "epoch": 40.28, "learning_rate": 2.9868134031122643e-05, "loss": 2.0304, "step": 13915000 }, { "epoch": 40.28, "learning_rate": 2.9867410383475365e-05, "loss": 2.0442, "step": 13915500 }, { "epoch": 40.28, "learning_rate": 2.9866686735828087e-05, "loss": 2.0657, "step": 13916000 }, { "epoch": 40.28, "learning_rate": 2.986596308818081e-05, "loss": 2.0661, "step": 13916500 }, { "epoch": 40.28, "learning_rate": 2.986523944053353e-05, "loss": 2.0723, "step": 13917000 }, { "epoch": 40.29, "learning_rate": 2.9864515792886254e-05, "loss": 2.0416, "step": 13917500 }, { "epoch": 40.29, "learning_rate": 2.986379214523898e-05, "loss": 2.0222, "step": 13918000 }, { "epoch": 40.29, "learning_rate": 2.98630684975917e-05, "loss": 2.0725, "step": 13918500 }, { "epoch": 40.29, "learning_rate": 2.9862346297239717e-05, "loss": 2.0287, "step": 13919000 }, { "epoch": 40.29, "learning_rate": 2.9861624096887736e-05, "loss": 2.0503, "step": 13919500 }, { "epoch": 40.29, "learning_rate": 2.986090044924046e-05, "loss": 2.0433, "step": 13920000 }, { "epoch": 40.29, "learning_rate": 2.986017680159318e-05, "loss": 2.0344, "step": 13920500 }, { "epoch": 40.3, "learning_rate": 2.985945315394591e-05, "loss": 2.0511, "step": 13921000 }, { "epoch": 40.3, "learning_rate": 2.9858729506298632e-05, "loss": 2.0581, "step": 13921500 }, { "epoch": 40.3, "learning_rate": 2.985800730594665e-05, "loss": 2.0444, "step": 13922000 }, { "epoch": 40.3, "learning_rate": 2.9857283658299373e-05, "loss": 2.0541, "step": 13922500 }, { "epoch": 40.3, "learning_rate": 2.9856560010652095e-05, "loss": 2.0319, "step": 13923000 }, { "epoch": 40.3, "learning_rate": 2.9855836363004817e-05, "loss": 2.0564, "step": 13923500 }, { "epoch": 40.3, "learning_rate": 2.9855112715357543e-05, "loss": 2.0537, "step": 13924000 }, { "epoch": 40.31, "learning_rate": 2.9854389067710265e-05, "loss": 2.0292, "step": 13924500 }, { "epoch": 40.31, "learning_rate": 2.9853665420062988e-05, "loss": 2.0693, "step": 13925000 }, { "epoch": 40.31, "learning_rate": 2.985294177241571e-05, "loss": 2.0272, "step": 13925500 }, { "epoch": 40.31, "learning_rate": 2.9852218124768432e-05, "loss": 2.0714, "step": 13926000 }, { "epoch": 40.31, "learning_rate": 2.9851494477121154e-05, "loss": 2.0556, "step": 13926500 }, { "epoch": 40.31, "learning_rate": 2.985077082947388e-05, "loss": 2.0304, "step": 13927000 }, { "epoch": 40.31, "learning_rate": 2.9850047181826602e-05, "loss": 2.027, "step": 13927500 }, { "epoch": 40.32, "learning_rate": 2.984932353417933e-05, "loss": 2.0654, "step": 13928000 }, { "epoch": 40.32, "learning_rate": 2.9848601333827347e-05, "loss": 2.0487, "step": 13928500 }, { "epoch": 40.32, "learning_rate": 2.984787768618007e-05, "loss": 2.0268, "step": 13929000 }, { "epoch": 40.32, "learning_rate": 2.9847154038532795e-05, "loss": 2.0437, "step": 13929500 }, { "epoch": 40.32, "learning_rate": 2.9846430390885517e-05, "loss": 2.042, "step": 13930000 }, { "epoch": 40.32, "learning_rate": 2.9845708190533532e-05, "loss": 2.0542, "step": 13930500 }, { "epoch": 40.32, "learning_rate": 2.9844984542886255e-05, "loss": 2.0526, "step": 13931000 }, { "epoch": 40.33, "learning_rate": 2.984426089523898e-05, "loss": 2.0337, "step": 13931500 }, { "epoch": 40.33, "learning_rate": 2.9843537247591702e-05, "loss": 2.0452, "step": 13932000 }, { "epoch": 40.33, "learning_rate": 2.9842813599944425e-05, "loss": 2.0771, "step": 13932500 }, { "epoch": 40.33, "learning_rate": 2.9842089952297147e-05, "loss": 2.0429, "step": 13933000 }, { "epoch": 40.33, "learning_rate": 2.984136630464987e-05, "loss": 2.0298, "step": 13933500 }, { "epoch": 40.33, "learning_rate": 2.9840642657002595e-05, "loss": 2.021, "step": 13934000 }, { "epoch": 40.33, "learning_rate": 2.9839921903945907e-05, "loss": 2.0244, "step": 13934500 }, { "epoch": 40.34, "learning_rate": 2.9839199703593923e-05, "loss": 2.06, "step": 13935000 }, { "epoch": 40.34, "learning_rate": 2.983847750324194e-05, "loss": 2.0331, "step": 13935500 }, { "epoch": 40.34, "learning_rate": 2.9837753855594664e-05, "loss": 2.0379, "step": 13936000 }, { "epoch": 40.34, "learning_rate": 2.9837030207947393e-05, "loss": 2.0385, "step": 13936500 }, { "epoch": 40.34, "learning_rate": 2.9836306560300115e-05, "loss": 2.0331, "step": 13937000 }, { "epoch": 40.34, "learning_rate": 2.9835582912652837e-05, "loss": 2.0577, "step": 13937500 }, { "epoch": 40.34, "learning_rate": 2.983485926500556e-05, "loss": 2.0307, "step": 13938000 }, { "epoch": 40.35, "learning_rate": 2.983413561735828e-05, "loss": 2.0575, "step": 13938500 }, { "epoch": 40.35, "learning_rate": 2.9833411969711007e-05, "loss": 2.0526, "step": 13939000 }, { "epoch": 40.35, "learning_rate": 2.9832689769359023e-05, "loss": 2.0715, "step": 13939500 }, { "epoch": 40.35, "learning_rate": 2.9831966121711745e-05, "loss": 2.0389, "step": 13940000 }, { "epoch": 40.35, "learning_rate": 2.983124247406447e-05, "loss": 2.0475, "step": 13940500 }, { "epoch": 40.35, "learning_rate": 2.9830518826417193e-05, "loss": 2.0631, "step": 13941000 }, { "epoch": 40.35, "learning_rate": 2.9829795178769915e-05, "loss": 2.0371, "step": 13941500 }, { "epoch": 40.36, "learning_rate": 2.9829071531122637e-05, "loss": 2.0099, "step": 13942000 }, { "epoch": 40.36, "learning_rate": 2.982834788347536e-05, "loss": 2.0584, "step": 13942500 }, { "epoch": 40.36, "learning_rate": 2.9827624235828082e-05, "loss": 2.0585, "step": 13943000 }, { "epoch": 40.36, "learning_rate": 2.982690058818081e-05, "loss": 2.048, "step": 13943500 }, { "epoch": 40.36, "learning_rate": 2.9826176940533533e-05, "loss": 2.0291, "step": 13944000 }, { "epoch": 40.36, "learning_rate": 2.9825454740181552e-05, "loss": 2.0598, "step": 13944500 }, { "epoch": 40.37, "learning_rate": 2.9824731092534274e-05, "loss": 2.0483, "step": 13945000 }, { "epoch": 40.37, "learning_rate": 2.9824007444886997e-05, "loss": 2.0732, "step": 13945500 }, { "epoch": 40.37, "learning_rate": 2.9823283797239722e-05, "loss": 2.038, "step": 13946000 }, { "epoch": 40.37, "learning_rate": 2.9822560149592444e-05, "loss": 2.0328, "step": 13946500 }, { "epoch": 40.37, "learning_rate": 2.9821836501945167e-05, "loss": 2.0457, "step": 13947000 }, { "epoch": 40.37, "learning_rate": 2.982111285429789e-05, "loss": 2.05, "step": 13947500 }, { "epoch": 40.37, "learning_rate": 2.982038920665061e-05, "loss": 2.0555, "step": 13948000 }, { "epoch": 40.38, "learning_rate": 2.981966700629863e-05, "loss": 2.0617, "step": 13948500 }, { "epoch": 40.38, "learning_rate": 2.9818946253241942e-05, "loss": 2.0463, "step": 13949000 }, { "epoch": 40.38, "learning_rate": 2.9818222605594665e-05, "loss": 2.0264, "step": 13949500 }, { "epoch": 40.38, "learning_rate": 2.9817498957947387e-05, "loss": 2.0268, "step": 13950000 }, { "epoch": 40.38, "learning_rate": 2.981677531030011e-05, "loss": 2.0428, "step": 13950500 }, { "epoch": 40.38, "learning_rate": 2.9816053109948128e-05, "loss": 2.0563, "step": 13951000 }, { "epoch": 40.38, "learning_rate": 2.9815329462300857e-05, "loss": 2.0295, "step": 13951500 }, { "epoch": 40.39, "learning_rate": 2.981460581465358e-05, "loss": 2.0654, "step": 13952000 }, { "epoch": 40.39, "learning_rate": 2.98138821670063e-05, "loss": 2.0157, "step": 13952500 }, { "epoch": 40.39, "learning_rate": 2.9813158519359024e-05, "loss": 2.0392, "step": 13953000 }, { "epoch": 40.39, "learning_rate": 2.9812434871711746e-05, "loss": 2.0251, "step": 13953500 }, { "epoch": 40.39, "learning_rate": 2.981171122406447e-05, "loss": 2.0592, "step": 13954000 }, { "epoch": 40.39, "learning_rate": 2.9810987576417194e-05, "loss": 2.0356, "step": 13954500 }, { "epoch": 40.39, "learning_rate": 2.9810263928769916e-05, "loss": 2.0408, "step": 13955000 }, { "epoch": 40.4, "learning_rate": 2.9809543175713228e-05, "loss": 2.0325, "step": 13955500 }, { "epoch": 40.4, "learning_rate": 2.980881952806595e-05, "loss": 2.0526, "step": 13956000 }, { "epoch": 40.4, "learning_rate": 2.9808095880418673e-05, "loss": 2.0308, "step": 13956500 }, { "epoch": 40.4, "learning_rate": 2.98073722327714e-05, "loss": 2.0262, "step": 13957000 }, { "epoch": 40.4, "learning_rate": 2.980664858512412e-05, "loss": 2.0554, "step": 13957500 }, { "epoch": 40.4, "learning_rate": 2.9805924937476843e-05, "loss": 2.067, "step": 13958000 }, { "epoch": 40.4, "learning_rate": 2.9805201289829565e-05, "loss": 2.0664, "step": 13958500 }, { "epoch": 40.41, "learning_rate": 2.9804477642182294e-05, "loss": 2.0358, "step": 13959000 }, { "epoch": 40.41, "learning_rate": 2.980375544183031e-05, "loss": 2.0623, "step": 13959500 }, { "epoch": 40.41, "learning_rate": 2.9803031794183035e-05, "loss": 2.0526, "step": 13960000 }, { "epoch": 40.41, "learning_rate": 2.9802308146535757e-05, "loss": 2.0836, "step": 13960500 }, { "epoch": 40.41, "learning_rate": 2.980158449888848e-05, "loss": 2.0455, "step": 13961000 }, { "epoch": 40.41, "learning_rate": 2.9800860851241202e-05, "loss": 2.0537, "step": 13961500 }, { "epoch": 40.41, "learning_rate": 2.980013865088922e-05, "loss": 2.0446, "step": 13962000 }, { "epoch": 40.42, "learning_rate": 2.9799415003241943e-05, "loss": 2.0412, "step": 13962500 }, { "epoch": 40.42, "learning_rate": 2.9798691355594665e-05, "loss": 2.0579, "step": 13963000 }, { "epoch": 40.42, "learning_rate": 2.9797967707947388e-05, "loss": 2.0298, "step": 13963500 }, { "epoch": 40.42, "learning_rate": 2.979724406030011e-05, "loss": 2.0462, "step": 13964000 }, { "epoch": 40.42, "learning_rate": 2.9796520412652835e-05, "loss": 2.0449, "step": 13964500 }, { "epoch": 40.42, "learning_rate": 2.9795796765005558e-05, "loss": 2.054, "step": 13965000 }, { "epoch": 40.42, "learning_rate": 2.979507311735828e-05, "loss": 2.0595, "step": 13965500 }, { "epoch": 40.43, "learning_rate": 2.97943509170063e-05, "loss": 2.0575, "step": 13966000 }, { "epoch": 40.43, "learning_rate": 2.9793627269359024e-05, "loss": 2.0447, "step": 13966500 }, { "epoch": 40.43, "learning_rate": 2.9792905069007043e-05, "loss": 2.0386, "step": 13967000 }, { "epoch": 40.43, "learning_rate": 2.9792181421359766e-05, "loss": 2.0732, "step": 13967500 }, { "epoch": 40.43, "learning_rate": 2.9791457773712488e-05, "loss": 2.0549, "step": 13968000 }, { "epoch": 40.43, "learning_rate": 2.979073412606521e-05, "loss": 2.0669, "step": 13968500 }, { "epoch": 40.43, "learning_rate": 2.9790010478417936e-05, "loss": 2.0271, "step": 13969000 }, { "epoch": 40.44, "learning_rate": 2.9789286830770658e-05, "loss": 2.0438, "step": 13969500 }, { "epoch": 40.44, "learning_rate": 2.978856318312338e-05, "loss": 2.0318, "step": 13970000 }, { "epoch": 40.44, "learning_rate": 2.9787839535476102e-05, "loss": 2.0196, "step": 13970500 }, { "epoch": 40.44, "learning_rate": 2.9787115887828825e-05, "loss": 2.0363, "step": 13971000 }, { "epoch": 40.44, "learning_rate": 2.9786393687476844e-05, "loss": 2.0441, "step": 13971500 }, { "epoch": 40.44, "learning_rate": 2.9785670039829566e-05, "loss": 2.0416, "step": 13972000 }, { "epoch": 40.44, "learning_rate": 2.9784947839477585e-05, "loss": 2.048, "step": 13972500 }, { "epoch": 40.45, "learning_rate": 2.9784224191830307e-05, "loss": 2.042, "step": 13973000 }, { "epoch": 40.45, "learning_rate": 2.978350054418303e-05, "loss": 2.0584, "step": 13973500 }, { "epoch": 40.45, "learning_rate": 2.9782776896535758e-05, "loss": 2.0529, "step": 13974000 }, { "epoch": 40.45, "learning_rate": 2.978205324888848e-05, "loss": 2.0206, "step": 13974500 }, { "epoch": 40.45, "learning_rate": 2.9781329601241203e-05, "loss": 2.0499, "step": 13975000 }, { "epoch": 40.45, "learning_rate": 2.9780605953593925e-05, "loss": 2.0436, "step": 13975500 }, { "epoch": 40.45, "learning_rate": 2.9779885200537237e-05, "loss": 2.0175, "step": 13976000 }, { "epoch": 40.46, "learning_rate": 2.9779161552889963e-05, "loss": 2.0379, "step": 13976500 }, { "epoch": 40.46, "learning_rate": 2.9778437905242685e-05, "loss": 2.0604, "step": 13977000 }, { "epoch": 40.46, "learning_rate": 2.9777714257595407e-05, "loss": 2.0216, "step": 13977500 }, { "epoch": 40.46, "learning_rate": 2.977699060994813e-05, "loss": 2.0444, "step": 13978000 }, { "epoch": 40.46, "learning_rate": 2.9776266962300852e-05, "loss": 2.0671, "step": 13978500 }, { "epoch": 40.46, "learning_rate": 2.9775543314653574e-05, "loss": 2.0441, "step": 13979000 }, { "epoch": 40.46, "learning_rate": 2.97748196670063e-05, "loss": 2.0365, "step": 13979500 }, { "epoch": 40.47, "learning_rate": 2.9774096019359022e-05, "loss": 2.0489, "step": 13980000 }, { "epoch": 40.47, "learning_rate": 2.9773373819007037e-05, "loss": 2.0215, "step": 13980500 }, { "epoch": 40.47, "learning_rate": 2.9772650171359763e-05, "loss": 2.0469, "step": 13981000 }, { "epoch": 40.47, "learning_rate": 2.977192652371249e-05, "loss": 2.0352, "step": 13981500 }, { "epoch": 40.47, "learning_rate": 2.9771202876065214e-05, "loss": 2.043, "step": 13982000 }, { "epoch": 40.47, "learning_rate": 2.9770479228417937e-05, "loss": 2.0704, "step": 13982500 }, { "epoch": 40.48, "learning_rate": 2.9769757028065952e-05, "loss": 2.0766, "step": 13983000 }, { "epoch": 40.48, "learning_rate": 2.9769033380418678e-05, "loss": 2.0489, "step": 13983500 }, { "epoch": 40.48, "learning_rate": 2.97683097327714e-05, "loss": 2.0573, "step": 13984000 }, { "epoch": 40.48, "learning_rate": 2.9767586085124122e-05, "loss": 2.0459, "step": 13984500 }, { "epoch": 40.48, "learning_rate": 2.9766862437476844e-05, "loss": 2.0254, "step": 13985000 }, { "epoch": 40.48, "learning_rate": 2.9766138789829567e-05, "loss": 2.0549, "step": 13985500 }, { "epoch": 40.48, "learning_rate": 2.9765416589477586e-05, "loss": 2.0611, "step": 13986000 }, { "epoch": 40.49, "learning_rate": 2.9764692941830308e-05, "loss": 2.01, "step": 13986500 }, { "epoch": 40.49, "learning_rate": 2.976396929418303e-05, "loss": 2.0598, "step": 13987000 }, { "epoch": 40.49, "learning_rate": 2.9763245646535752e-05, "loss": 2.0153, "step": 13987500 }, { "epoch": 40.49, "learning_rate": 2.9762521998888478e-05, "loss": 2.0586, "step": 13988000 }, { "epoch": 40.49, "learning_rate": 2.9761798351241204e-05, "loss": 2.0063, "step": 13988500 }, { "epoch": 40.49, "learning_rate": 2.976107470359393e-05, "loss": 2.0323, "step": 13989000 }, { "epoch": 40.49, "learning_rate": 2.976035105594665e-05, "loss": 2.0748, "step": 13989500 }, { "epoch": 40.5, "learning_rate": 2.9759627408299374e-05, "loss": 2.0757, "step": 13990000 }, { "epoch": 40.5, "learning_rate": 2.9758903760652096e-05, "loss": 2.0394, "step": 13990500 }, { "epoch": 40.5, "learning_rate": 2.9758183007595408e-05, "loss": 2.0208, "step": 13991000 }, { "epoch": 40.5, "learning_rate": 2.975745935994813e-05, "loss": 2.0359, "step": 13991500 }, { "epoch": 40.5, "learning_rate": 2.975673715959615e-05, "loss": 2.0459, "step": 13992000 }, { "epoch": 40.5, "learning_rate": 2.975601351194887e-05, "loss": 2.045, "step": 13992500 }, { "epoch": 40.5, "learning_rate": 2.9755289864301594e-05, "loss": 2.0209, "step": 13993000 }, { "epoch": 40.51, "learning_rate": 2.9754566216654316e-05, "loss": 2.0294, "step": 13993500 }, { "epoch": 40.51, "learning_rate": 2.9753844016302335e-05, "loss": 2.0396, "step": 13994000 }, { "epoch": 40.51, "learning_rate": 2.9753120368655057e-05, "loss": 2.0392, "step": 13994500 }, { "epoch": 40.51, "learning_rate": 2.975239672100778e-05, "loss": 2.0594, "step": 13995000 }, { "epoch": 40.51, "learning_rate": 2.97516730733605e-05, "loss": 2.0442, "step": 13995500 }, { "epoch": 40.51, "learning_rate": 2.975095087300852e-05, "loss": 2.052, "step": 13996000 }, { "epoch": 40.51, "learning_rate": 2.975022722536125e-05, "loss": 2.0684, "step": 13996500 }, { "epoch": 40.52, "learning_rate": 2.9749503577713972e-05, "loss": 2.0623, "step": 13997000 }, { "epoch": 40.52, "learning_rate": 2.9748779930066694e-05, "loss": 2.042, "step": 13997500 }, { "epoch": 40.52, "learning_rate": 2.9748056282419416e-05, "loss": 2.0645, "step": 13998000 }, { "epoch": 40.52, "learning_rate": 2.9747334082067435e-05, "loss": 2.0455, "step": 13998500 }, { "epoch": 40.52, "learning_rate": 2.9746610434420157e-05, "loss": 2.0267, "step": 13999000 }, { "epoch": 40.52, "learning_rate": 2.974588678677288e-05, "loss": 2.0496, "step": 13999500 }, { "epoch": 40.52, "learning_rate": 2.9745163139125602e-05, "loss": 2.0741, "step": 14000000 }, { "epoch": 40.53, "learning_rate": 2.9744439491478328e-05, "loss": 2.0363, "step": 14000500 }, { "epoch": 40.53, "learning_rate": 2.974371584383105e-05, "loss": 2.029, "step": 14001000 }, { "epoch": 40.53, "learning_rate": 2.9742992196183772e-05, "loss": 2.0327, "step": 14001500 }, { "epoch": 40.53, "learning_rate": 2.9742268548536494e-05, "loss": 2.0394, "step": 14002000 }, { "epoch": 40.53, "learning_rate": 2.9741544900889216e-05, "loss": 2.0473, "step": 14002500 }, { "epoch": 40.53, "learning_rate": 2.9740821253241942e-05, "loss": 2.0338, "step": 14003000 }, { "epoch": 40.53, "learning_rate": 2.9740097605594664e-05, "loss": 2.0442, "step": 14003500 }, { "epoch": 40.54, "learning_rate": 2.9739373957947393e-05, "loss": 2.0365, "step": 14004000 }, { "epoch": 40.54, "learning_rate": 2.9738650310300116e-05, "loss": 2.0585, "step": 14004500 }, { "epoch": 40.54, "learning_rate": 2.9737926662652838e-05, "loss": 2.0641, "step": 14005000 }, { "epoch": 40.54, "learning_rate": 2.973720301500556e-05, "loss": 2.0603, "step": 14005500 }, { "epoch": 40.54, "learning_rate": 2.9736479367358282e-05, "loss": 2.0569, "step": 14006000 }, { "epoch": 40.54, "learning_rate": 2.97357571670063e-05, "loss": 2.0345, "step": 14006500 }, { "epoch": 40.54, "learning_rate": 2.9735033519359023e-05, "loss": 2.056, "step": 14007000 }, { "epoch": 40.55, "learning_rate": 2.9734309871711746e-05, "loss": 2.0445, "step": 14007500 }, { "epoch": 40.55, "learning_rate": 2.9733586224064468e-05, "loss": 2.0576, "step": 14008000 }, { "epoch": 40.55, "learning_rate": 2.9732864023712487e-05, "loss": 2.0562, "step": 14008500 }, { "epoch": 40.55, "learning_rate": 2.973214037606521e-05, "loss": 2.0564, "step": 14009000 }, { "epoch": 40.55, "learning_rate": 2.973141672841793e-05, "loss": 2.0282, "step": 14009500 }, { "epoch": 40.55, "learning_rate": 2.9730693080770654e-05, "loss": 2.0679, "step": 14010000 }, { "epoch": 40.55, "learning_rate": 2.972996943312338e-05, "loss": 2.0298, "step": 14010500 }, { "epoch": 40.56, "learning_rate": 2.9729245785476105e-05, "loss": 2.0443, "step": 14011000 }, { "epoch": 40.56, "learning_rate": 2.972852213782883e-05, "loss": 2.0548, "step": 14011500 }, { "epoch": 40.56, "learning_rate": 2.9727798490181553e-05, "loss": 2.0426, "step": 14012000 }, { "epoch": 40.56, "learning_rate": 2.9727076289829568e-05, "loss": 2.0422, "step": 14012500 }, { "epoch": 40.56, "learning_rate": 2.9726352642182294e-05, "loss": 2.0387, "step": 14013000 }, { "epoch": 40.56, "learning_rate": 2.9725628994535016e-05, "loss": 2.0321, "step": 14013500 }, { "epoch": 40.56, "learning_rate": 2.9724905346887738e-05, "loss": 2.0528, "step": 14014000 }, { "epoch": 40.57, "learning_rate": 2.9724183146535757e-05, "loss": 2.0263, "step": 14014500 }, { "epoch": 40.57, "learning_rate": 2.972345949888848e-05, "loss": 2.0317, "step": 14015000 }, { "epoch": 40.57, "learning_rate": 2.9722735851241202e-05, "loss": 2.0558, "step": 14015500 }, { "epoch": 40.57, "learning_rate": 2.9722012203593924e-05, "loss": 2.0401, "step": 14016000 }, { "epoch": 40.57, "learning_rate": 2.9721288555946646e-05, "loss": 2.05, "step": 14016500 }, { "epoch": 40.57, "learning_rate": 2.972056490829937e-05, "loss": 2.0573, "step": 14017000 }, { "epoch": 40.57, "learning_rate": 2.9719841260652094e-05, "loss": 2.06, "step": 14017500 }, { "epoch": 40.58, "learning_rate": 2.9719117613004816e-05, "loss": 2.0499, "step": 14018000 }, { "epoch": 40.58, "learning_rate": 2.9718393965357545e-05, "loss": 2.021, "step": 14018500 }, { "epoch": 40.58, "learning_rate": 2.9717670317710268e-05, "loss": 2.0429, "step": 14019000 }, { "epoch": 40.58, "learning_rate": 2.971694667006299e-05, "loss": 2.0546, "step": 14019500 }, { "epoch": 40.58, "learning_rate": 2.9716223022415712e-05, "loss": 2.0468, "step": 14020000 }, { "epoch": 40.58, "learning_rate": 2.9715499374768434e-05, "loss": 2.0425, "step": 14020500 }, { "epoch": 40.59, "learning_rate": 2.9714778621711746e-05, "loss": 2.0451, "step": 14021000 }, { "epoch": 40.59, "learning_rate": 2.971405497406447e-05, "loss": 2.048, "step": 14021500 }, { "epoch": 40.59, "learning_rate": 2.9713331326417194e-05, "loss": 2.0745, "step": 14022000 }, { "epoch": 40.59, "learning_rate": 2.9712607678769917e-05, "loss": 2.0266, "step": 14022500 }, { "epoch": 40.59, "learning_rate": 2.9711885478417932e-05, "loss": 2.0462, "step": 14023000 }, { "epoch": 40.59, "learning_rate": 2.971116327806595e-05, "loss": 2.0622, "step": 14023500 }, { "epoch": 40.59, "learning_rate": 2.9710439630418673e-05, "loss": 2.0327, "step": 14024000 }, { "epoch": 40.6, "learning_rate": 2.9709715982771396e-05, "loss": 2.024, "step": 14024500 }, { "epoch": 40.6, "learning_rate": 2.970899233512412e-05, "loss": 2.0547, "step": 14025000 }, { "epoch": 40.6, "learning_rate": 2.9708268687476843e-05, "loss": 2.0554, "step": 14025500 }, { "epoch": 40.6, "learning_rate": 2.9707545039829566e-05, "loss": 2.0507, "step": 14026000 }, { "epoch": 40.6, "learning_rate": 2.9706821392182295e-05, "loss": 2.063, "step": 14026500 }, { "epoch": 40.6, "learning_rate": 2.9706097744535017e-05, "loss": 2.0482, "step": 14027000 }, { "epoch": 40.6, "learning_rate": 2.970537409688774e-05, "loss": 2.0671, "step": 14027500 }, { "epoch": 40.61, "learning_rate": 2.970465044924046e-05, "loss": 2.0479, "step": 14028000 }, { "epoch": 40.61, "learning_rate": 2.9703926801593184e-05, "loss": 2.0436, "step": 14028500 }, { "epoch": 40.61, "learning_rate": 2.970320315394591e-05, "loss": 2.043, "step": 14029000 }, { "epoch": 40.61, "learning_rate": 2.970247950629863e-05, "loss": 2.0464, "step": 14029500 }, { "epoch": 40.61, "learning_rate": 2.9701755858651354e-05, "loss": 2.0383, "step": 14030000 }, { "epoch": 40.61, "learning_rate": 2.9701032211004076e-05, "loss": 2.0351, "step": 14030500 }, { "epoch": 40.61, "learning_rate": 2.9700310010652095e-05, "loss": 2.0619, "step": 14031000 }, { "epoch": 40.62, "learning_rate": 2.9699586363004817e-05, "loss": 2.0234, "step": 14031500 }, { "epoch": 40.62, "learning_rate": 2.969886271535754e-05, "loss": 2.0278, "step": 14032000 }, { "epoch": 40.62, "learning_rate": 2.969813906771026e-05, "loss": 2.0545, "step": 14032500 }, { "epoch": 40.62, "learning_rate": 2.969741686735828e-05, "loss": 2.0754, "step": 14033000 }, { "epoch": 40.62, "learning_rate": 2.969669321971101e-05, "loss": 2.0523, "step": 14033500 }, { "epoch": 40.62, "learning_rate": 2.9695969572063732e-05, "loss": 2.0125, "step": 14034000 }, { "epoch": 40.62, "learning_rate": 2.9695245924416454e-05, "loss": 2.0728, "step": 14034500 }, { "epoch": 40.63, "learning_rate": 2.9694522276769176e-05, "loss": 2.0304, "step": 14035000 }, { "epoch": 40.63, "learning_rate": 2.96937986291219e-05, "loss": 2.0506, "step": 14035500 }, { "epoch": 40.63, "learning_rate": 2.969307498147462e-05, "loss": 2.0487, "step": 14036000 }, { "epoch": 40.63, "learning_rate": 2.969235278112264e-05, "loss": 2.0337, "step": 14036500 }, { "epoch": 40.63, "learning_rate": 2.9691629133475362e-05, "loss": 2.0223, "step": 14037000 }, { "epoch": 40.63, "learning_rate": 2.9690905485828084e-05, "loss": 2.0578, "step": 14037500 }, { "epoch": 40.63, "learning_rate": 2.9690183285476103e-05, "loss": 2.0284, "step": 14038000 }, { "epoch": 40.64, "learning_rate": 2.9689459637828825e-05, "loss": 2.0679, "step": 14038500 }, { "epoch": 40.64, "learning_rate": 2.9688737437476844e-05, "loss": 2.0469, "step": 14039000 }, { "epoch": 40.64, "learning_rate": 2.9688013789829566e-05, "loss": 2.0193, "step": 14039500 }, { "epoch": 40.64, "learning_rate": 2.968729014218229e-05, "loss": 2.0289, "step": 14040000 }, { "epoch": 40.64, "learning_rate": 2.968656649453501e-05, "loss": 2.0474, "step": 14040500 }, { "epoch": 40.64, "learning_rate": 2.968584284688774e-05, "loss": 2.032, "step": 14041000 }, { "epoch": 40.64, "learning_rate": 2.9685119199240462e-05, "loss": 2.0353, "step": 14041500 }, { "epoch": 40.65, "learning_rate": 2.9684395551593184e-05, "loss": 2.0526, "step": 14042000 }, { "epoch": 40.65, "learning_rate": 2.968367190394591e-05, "loss": 2.0272, "step": 14042500 }, { "epoch": 40.65, "learning_rate": 2.9682948256298632e-05, "loss": 2.035, "step": 14043000 }, { "epoch": 40.65, "learning_rate": 2.9682224608651354e-05, "loss": 2.0506, "step": 14043500 }, { "epoch": 40.65, "learning_rate": 2.9681500961004077e-05, "loss": 2.0517, "step": 14044000 }, { "epoch": 40.65, "learning_rate": 2.96807773133568e-05, "loss": 2.0279, "step": 14044500 }, { "epoch": 40.65, "learning_rate": 2.9680053665709525e-05, "loss": 2.0385, "step": 14045000 }, { "epoch": 40.66, "learning_rate": 2.9679330018062247e-05, "loss": 2.0264, "step": 14045500 }, { "epoch": 40.66, "learning_rate": 2.967860637041497e-05, "loss": 2.0319, "step": 14046000 }, { "epoch": 40.66, "learning_rate": 2.967788272276769e-05, "loss": 2.0586, "step": 14046500 }, { "epoch": 40.66, "learning_rate": 2.9677159075120413e-05, "loss": 2.0285, "step": 14047000 }, { "epoch": 40.66, "learning_rate": 2.9676435427473136e-05, "loss": 2.0387, "step": 14047500 }, { "epoch": 40.66, "learning_rate": 2.9675711779825865e-05, "loss": 2.0254, "step": 14048000 }, { "epoch": 40.66, "learning_rate": 2.9674988132178587e-05, "loss": 2.0423, "step": 14048500 }, { "epoch": 40.67, "learning_rate": 2.9674264484531313e-05, "loss": 2.0463, "step": 14049000 }, { "epoch": 40.67, "learning_rate": 2.9673542284179328e-05, "loss": 2.0424, "step": 14049500 }, { "epoch": 40.67, "learning_rate": 2.967281863653205e-05, "loss": 2.0359, "step": 14050000 }, { "epoch": 40.67, "learning_rate": 2.9672094988884776e-05, "loss": 2.0436, "step": 14050500 }, { "epoch": 40.67, "learning_rate": 2.967137278853279e-05, "loss": 2.0444, "step": 14051000 }, { "epoch": 40.67, "learning_rate": 2.967065058818081e-05, "loss": 2.051, "step": 14051500 }, { "epoch": 40.67, "learning_rate": 2.9669926940533533e-05, "loss": 2.0443, "step": 14052000 }, { "epoch": 40.68, "learning_rate": 2.9669203292886255e-05, "loss": 2.0351, "step": 14052500 }, { "epoch": 40.68, "learning_rate": 2.9668479645238977e-05, "loss": 2.0211, "step": 14053000 }, { "epoch": 40.68, "learning_rate": 2.96677559975917e-05, "loss": 2.0748, "step": 14053500 }, { "epoch": 40.68, "learning_rate": 2.966703379723972e-05, "loss": 2.0538, "step": 14054000 }, { "epoch": 40.68, "learning_rate": 2.966631014959244e-05, "loss": 2.0576, "step": 14054500 }, { "epoch": 40.68, "learning_rate": 2.9665586501945163e-05, "loss": 2.0637, "step": 14055000 }, { "epoch": 40.68, "learning_rate": 2.966486285429789e-05, "loss": 2.0454, "step": 14055500 }, { "epoch": 40.69, "learning_rate": 2.966414065394591e-05, "loss": 2.0467, "step": 14056000 }, { "epoch": 40.69, "learning_rate": 2.9663417006298633e-05, "loss": 2.0467, "step": 14056500 }, { "epoch": 40.69, "learning_rate": 2.9662693358651355e-05, "loss": 2.0642, "step": 14057000 }, { "epoch": 40.69, "learning_rate": 2.9661969711004077e-05, "loss": 2.0279, "step": 14057500 }, { "epoch": 40.69, "learning_rate": 2.96612460633568e-05, "loss": 2.0448, "step": 14058000 }, { "epoch": 40.69, "learning_rate": 2.9660522415709525e-05, "loss": 2.0465, "step": 14058500 }, { "epoch": 40.7, "learning_rate": 2.9659798768062248e-05, "loss": 2.0448, "step": 14059000 }, { "epoch": 40.7, "learning_rate": 2.965907512041497e-05, "loss": 2.0165, "step": 14059500 }, { "epoch": 40.7, "learning_rate": 2.9658351472767692e-05, "loss": 2.0575, "step": 14060000 }, { "epoch": 40.7, "learning_rate": 2.9657627825120414e-05, "loss": 2.0519, "step": 14060500 }, { "epoch": 40.7, "learning_rate": 2.965690417747314e-05, "loss": 2.0714, "step": 14061000 }, { "epoch": 40.7, "learning_rate": 2.9656180529825862e-05, "loss": 2.0379, "step": 14061500 }, { "epoch": 40.7, "learning_rate": 2.9655458329473878e-05, "loss": 2.0492, "step": 14062000 }, { "epoch": 40.71, "learning_rate": 2.96547346818266e-05, "loss": 2.053, "step": 14062500 }, { "epoch": 40.71, "learning_rate": 2.965401248147462e-05, "loss": 2.0404, "step": 14063000 }, { "epoch": 40.71, "learning_rate": 2.9653288833827348e-05, "loss": 2.0571, "step": 14063500 }, { "epoch": 40.71, "learning_rate": 2.965256518618007e-05, "loss": 2.0379, "step": 14064000 }, { "epoch": 40.71, "learning_rate": 2.9651841538532792e-05, "loss": 2.0691, "step": 14064500 }, { "epoch": 40.71, "learning_rate": 2.9651117890885515e-05, "loss": 2.0315, "step": 14065000 }, { "epoch": 40.71, "learning_rate": 2.965039424323824e-05, "loss": 2.0466, "step": 14065500 }, { "epoch": 40.72, "learning_rate": 2.9649670595590962e-05, "loss": 2.0403, "step": 14066000 }, { "epoch": 40.72, "learning_rate": 2.9648946947943685e-05, "loss": 2.036, "step": 14066500 }, { "epoch": 40.72, "learning_rate": 2.96482247475917e-05, "loss": 2.0498, "step": 14067000 }, { "epoch": 40.72, "learning_rate": 2.964750254723972e-05, "loss": 2.0575, "step": 14067500 }, { "epoch": 40.72, "learning_rate": 2.964677889959244e-05, "loss": 2.0489, "step": 14068000 }, { "epoch": 40.72, "learning_rate": 2.964605669924046e-05, "loss": 2.0426, "step": 14068500 }, { "epoch": 40.72, "learning_rate": 2.9645333051593183e-05, "loss": 2.0405, "step": 14069000 }, { "epoch": 40.73, "learning_rate": 2.9644609403945905e-05, "loss": 2.0524, "step": 14069500 }, { "epoch": 40.73, "learning_rate": 2.9643885756298627e-05, "loss": 2.0804, "step": 14070000 }, { "epoch": 40.73, "learning_rate": 2.9643163555946646e-05, "loss": 2.0334, "step": 14070500 }, { "epoch": 40.73, "learning_rate": 2.9642439908299375e-05, "loss": 2.0239, "step": 14071000 }, { "epoch": 40.73, "learning_rate": 2.9641716260652097e-05, "loss": 2.0504, "step": 14071500 }, { "epoch": 40.73, "learning_rate": 2.964099261300482e-05, "loss": 2.062, "step": 14072000 }, { "epoch": 40.73, "learning_rate": 2.964026896535754e-05, "loss": 2.0547, "step": 14072500 }, { "epoch": 40.74, "learning_rate": 2.9639545317710264e-05, "loss": 2.0475, "step": 14073000 }, { "epoch": 40.74, "learning_rate": 2.963882167006299e-05, "loss": 2.0513, "step": 14073500 }, { "epoch": 40.74, "learning_rate": 2.9638098022415712e-05, "loss": 2.0499, "step": 14074000 }, { "epoch": 40.74, "learning_rate": 2.9637374374768434e-05, "loss": 2.0577, "step": 14074500 }, { "epoch": 40.74, "learning_rate": 2.9636650727121156e-05, "loss": 2.045, "step": 14075000 }, { "epoch": 40.74, "learning_rate": 2.963592707947388e-05, "loss": 2.0581, "step": 14075500 }, { "epoch": 40.74, "learning_rate": 2.9635203431826604e-05, "loss": 2.069, "step": 14076000 }, { "epoch": 40.75, "learning_rate": 2.963448123147462e-05, "loss": 2.0699, "step": 14076500 }, { "epoch": 40.75, "learning_rate": 2.9633757583827342e-05, "loss": 2.0711, "step": 14077000 }, { "epoch": 40.75, "learning_rate": 2.9633033936180064e-05, "loss": 2.0521, "step": 14077500 }, { "epoch": 40.75, "learning_rate": 2.9632310288532793e-05, "loss": 2.0655, "step": 14078000 }, { "epoch": 40.75, "learning_rate": 2.9631586640885515e-05, "loss": 2.0796, "step": 14078500 }, { "epoch": 40.75, "learning_rate": 2.963086299323824e-05, "loss": 2.0628, "step": 14079000 }, { "epoch": 40.75, "learning_rate": 2.9630139345590963e-05, "loss": 2.0505, "step": 14079500 }, { "epoch": 40.76, "learning_rate": 2.9629418592534275e-05, "loss": 2.0605, "step": 14080000 }, { "epoch": 40.76, "learning_rate": 2.9628694944886998e-05, "loss": 2.048, "step": 14080500 }, { "epoch": 40.76, "learning_rate": 2.962797129723972e-05, "loss": 2.0432, "step": 14081000 }, { "epoch": 40.76, "learning_rate": 2.9627247649592442e-05, "loss": 2.0549, "step": 14081500 }, { "epoch": 40.76, "learning_rate": 2.9626524001945168e-05, "loss": 2.0621, "step": 14082000 }, { "epoch": 40.76, "learning_rate": 2.962580035429789e-05, "loss": 2.0308, "step": 14082500 }, { "epoch": 40.76, "learning_rate": 2.9625076706650612e-05, "loss": 2.0725, "step": 14083000 }, { "epoch": 40.77, "learning_rate": 2.9624353059003334e-05, "loss": 2.0208, "step": 14083500 }, { "epoch": 40.77, "learning_rate": 2.9623629411356057e-05, "loss": 2.0373, "step": 14084000 }, { "epoch": 40.77, "learning_rate": 2.9622907211004076e-05, "loss": 2.0555, "step": 14084500 }, { "epoch": 40.77, "learning_rate": 2.9622183563356798e-05, "loss": 2.0586, "step": 14085000 }, { "epoch": 40.77, "learning_rate": 2.962145991570952e-05, "loss": 2.0437, "step": 14085500 }, { "epoch": 40.77, "learning_rate": 2.962073626806225e-05, "loss": 2.0363, "step": 14086000 }, { "epoch": 40.77, "learning_rate": 2.9620014067710268e-05, "loss": 2.0438, "step": 14086500 }, { "epoch": 40.78, "learning_rate": 2.961929042006299e-05, "loss": 2.0464, "step": 14087000 }, { "epoch": 40.78, "learning_rate": 2.9618566772415713e-05, "loss": 2.0301, "step": 14087500 }, { "epoch": 40.78, "learning_rate": 2.9617843124768435e-05, "loss": 2.0426, "step": 14088000 }, { "epoch": 40.78, "learning_rate": 2.9617119477121157e-05, "loss": 2.0534, "step": 14088500 }, { "epoch": 40.78, "learning_rate": 2.961639582947388e-05, "loss": 2.0567, "step": 14089000 }, { "epoch": 40.78, "learning_rate": 2.9615672181826605e-05, "loss": 2.0575, "step": 14089500 }, { "epoch": 40.78, "learning_rate": 2.9614948534179327e-05, "loss": 2.0414, "step": 14090000 }, { "epoch": 40.79, "learning_rate": 2.961422488653205e-05, "loss": 2.0587, "step": 14090500 }, { "epoch": 40.79, "learning_rate": 2.9613502686180068e-05, "loss": 2.0404, "step": 14091000 }, { "epoch": 40.79, "learning_rate": 2.9612780485828084e-05, "loss": 2.0387, "step": 14091500 }, { "epoch": 40.79, "learning_rate": 2.9612056838180806e-05, "loss": 2.0542, "step": 14092000 }, { "epoch": 40.79, "learning_rate": 2.9611334637828825e-05, "loss": 2.0223, "step": 14092500 }, { "epoch": 40.79, "learning_rate": 2.9610610990181547e-05, "loss": 2.0649, "step": 14093000 }, { "epoch": 40.79, "learning_rate": 2.9609887342534276e-05, "loss": 2.0597, "step": 14093500 }, { "epoch": 40.8, "learning_rate": 2.9609163694887e-05, "loss": 2.0329, "step": 14094000 }, { "epoch": 40.8, "learning_rate": 2.960844004723972e-05, "loss": 2.0739, "step": 14094500 }, { "epoch": 40.8, "learning_rate": 2.9607716399592443e-05, "loss": 2.0397, "step": 14095000 }, { "epoch": 40.8, "learning_rate": 2.960699275194517e-05, "loss": 2.057, "step": 14095500 }, { "epoch": 40.8, "learning_rate": 2.960626910429789e-05, "loss": 2.0483, "step": 14096000 }, { "epoch": 40.8, "learning_rate": 2.9605545456650613e-05, "loss": 2.0298, "step": 14096500 }, { "epoch": 40.81, "learning_rate": 2.9604824703593925e-05, "loss": 2.0395, "step": 14097000 }, { "epoch": 40.81, "learning_rate": 2.9604101055946648e-05, "loss": 2.0567, "step": 14097500 }, { "epoch": 40.81, "learning_rate": 2.960337740829937e-05, "loss": 2.0565, "step": 14098000 }, { "epoch": 40.81, "learning_rate": 2.960265520794739e-05, "loss": 2.0472, "step": 14098500 }, { "epoch": 40.81, "learning_rate": 2.960193156030011e-05, "loss": 2.0413, "step": 14099000 }, { "epoch": 40.81, "learning_rate": 2.9601207912652833e-05, "loss": 2.0543, "step": 14099500 }, { "epoch": 40.81, "learning_rate": 2.9600484265005555e-05, "loss": 2.0845, "step": 14100000 }, { "epoch": 40.82, "learning_rate": 2.959976061735828e-05, "loss": 2.0457, "step": 14100500 }, { "epoch": 40.82, "learning_rate": 2.9599036969711007e-05, "loss": 2.0392, "step": 14101000 }, { "epoch": 40.82, "learning_rate": 2.9598313322063732e-05, "loss": 2.0181, "step": 14101500 }, { "epoch": 40.82, "learning_rate": 2.9597591121711748e-05, "loss": 2.0732, "step": 14102000 }, { "epoch": 40.82, "learning_rate": 2.959686747406447e-05, "loss": 2.0423, "step": 14102500 }, { "epoch": 40.82, "learning_rate": 2.9596143826417196e-05, "loss": 2.0432, "step": 14103000 }, { "epoch": 40.82, "learning_rate": 2.9595420178769918e-05, "loss": 2.0636, "step": 14103500 }, { "epoch": 40.83, "learning_rate": 2.9594697978417934e-05, "loss": 2.0525, "step": 14104000 }, { "epoch": 40.83, "learning_rate": 2.9593974330770656e-05, "loss": 2.0494, "step": 14104500 }, { "epoch": 40.83, "learning_rate": 2.959325068312338e-05, "loss": 2.0917, "step": 14105000 }, { "epoch": 40.83, "learning_rate": 2.9592527035476104e-05, "loss": 2.0462, "step": 14105500 }, { "epoch": 40.83, "learning_rate": 2.9591803387828826e-05, "loss": 2.0611, "step": 14106000 }, { "epoch": 40.83, "learning_rate": 2.9591079740181548e-05, "loss": 2.0611, "step": 14106500 }, { "epoch": 40.83, "learning_rate": 2.9590357539829567e-05, "loss": 2.0633, "step": 14107000 }, { "epoch": 40.84, "learning_rate": 2.9589635339477583e-05, "loss": 2.0379, "step": 14107500 }, { "epoch": 40.84, "learning_rate": 2.9588911691830308e-05, "loss": 2.0683, "step": 14108000 }, { "epoch": 40.84, "learning_rate": 2.958818804418303e-05, "loss": 2.0338, "step": 14108500 }, { "epoch": 40.84, "learning_rate": 2.958746439653576e-05, "loss": 2.0505, "step": 14109000 }, { "epoch": 40.84, "learning_rate": 2.958674074888848e-05, "loss": 2.0403, "step": 14109500 }, { "epoch": 40.84, "learning_rate": 2.9586017101241204e-05, "loss": 2.068, "step": 14110000 }, { "epoch": 40.84, "learning_rate": 2.9585293453593926e-05, "loss": 2.0503, "step": 14110500 }, { "epoch": 40.85, "learning_rate": 2.958456980594665e-05, "loss": 2.0686, "step": 14111000 }, { "epoch": 40.85, "learning_rate": 2.958384615829937e-05, "loss": 2.0834, "step": 14111500 }, { "epoch": 40.85, "learning_rate": 2.9583122510652096e-05, "loss": 2.051, "step": 14112000 }, { "epoch": 40.85, "learning_rate": 2.958239886300482e-05, "loss": 2.0634, "step": 14112500 }, { "epoch": 40.85, "learning_rate": 2.958167521535754e-05, "loss": 2.0398, "step": 14113000 }, { "epoch": 40.85, "learning_rate": 2.9580951567710263e-05, "loss": 2.038, "step": 14113500 }, { "epoch": 40.85, "learning_rate": 2.9580230814653575e-05, "loss": 2.0825, "step": 14114000 }, { "epoch": 40.86, "learning_rate": 2.9579507167006297e-05, "loss": 2.0483, "step": 14114500 }, { "epoch": 40.86, "learning_rate": 2.957878351935902e-05, "loss": 2.0476, "step": 14115000 }, { "epoch": 40.86, "learning_rate": 2.9578059871711745e-05, "loss": 2.0346, "step": 14115500 }, { "epoch": 40.86, "learning_rate": 2.957733622406447e-05, "loss": 2.0678, "step": 14116000 }, { "epoch": 40.86, "learning_rate": 2.9576612576417197e-05, "loss": 2.057, "step": 14116500 }, { "epoch": 40.86, "learning_rate": 2.957588892876992e-05, "loss": 2.0747, "step": 14117000 }, { "epoch": 40.86, "learning_rate": 2.9575166728417934e-05, "loss": 2.0443, "step": 14117500 }, { "epoch": 40.87, "learning_rate": 2.957444308077066e-05, "loss": 2.0506, "step": 14118000 }, { "epoch": 40.87, "learning_rate": 2.9573719433123382e-05, "loss": 2.0502, "step": 14118500 }, { "epoch": 40.87, "learning_rate": 2.9572995785476104e-05, "loss": 2.0598, "step": 14119000 }, { "epoch": 40.87, "learning_rate": 2.9572272137828827e-05, "loss": 2.0255, "step": 14119500 }, { "epoch": 40.87, "learning_rate": 2.957154849018155e-05, "loss": 2.0674, "step": 14120000 }, { "epoch": 40.87, "learning_rate": 2.957082484253427e-05, "loss": 2.0184, "step": 14120500 }, { "epoch": 40.87, "learning_rate": 2.9570101194886997e-05, "loss": 2.0621, "step": 14121000 }, { "epoch": 40.88, "learning_rate": 2.956937754723972e-05, "loss": 2.0409, "step": 14121500 }, { "epoch": 40.88, "learning_rate": 2.9568658241478325e-05, "loss": 2.0433, "step": 14122000 }, { "epoch": 40.88, "learning_rate": 2.9567934593831047e-05, "loss": 2.0701, "step": 14122500 }, { "epoch": 40.88, "learning_rate": 2.9567210946183772e-05, "loss": 2.0428, "step": 14123000 }, { "epoch": 40.88, "learning_rate": 2.9566488745831788e-05, "loss": 2.0908, "step": 14123500 }, { "epoch": 40.88, "learning_rate": 2.9565766545479807e-05, "loss": 2.0541, "step": 14124000 }, { "epoch": 40.88, "learning_rate": 2.9565042897832536e-05, "loss": 2.0566, "step": 14124500 }, { "epoch": 40.89, "learning_rate": 2.9564319250185258e-05, "loss": 2.0541, "step": 14125000 }, { "epoch": 40.89, "learning_rate": 2.956359560253798e-05, "loss": 2.0548, "step": 14125500 }, { "epoch": 40.89, "learning_rate": 2.9562871954890703e-05, "loss": 2.0532, "step": 14126000 }, { "epoch": 40.89, "learning_rate": 2.9562148307243425e-05, "loss": 2.0386, "step": 14126500 }, { "epoch": 40.89, "learning_rate": 2.9561424659596147e-05, "loss": 2.0751, "step": 14127000 }, { "epoch": 40.89, "learning_rate": 2.9560701011948873e-05, "loss": 2.0571, "step": 14127500 }, { "epoch": 40.89, "learning_rate": 2.9559977364301595e-05, "loss": 2.0598, "step": 14128000 }, { "epoch": 40.9, "learning_rate": 2.9559253716654317e-05, "loss": 2.0355, "step": 14128500 }, { "epoch": 40.9, "learning_rate": 2.955853006900704e-05, "loss": 2.0536, "step": 14129000 }, { "epoch": 40.9, "learning_rate": 2.955780642135976e-05, "loss": 2.0491, "step": 14129500 }, { "epoch": 40.9, "learning_rate": 2.9557082773712487e-05, "loss": 2.0396, "step": 14130000 }, { "epoch": 40.9, "learning_rate": 2.955635912606521e-05, "loss": 2.0537, "step": 14130500 }, { "epoch": 40.9, "learning_rate": 2.9555635478417932e-05, "loss": 2.067, "step": 14131000 }, { "epoch": 40.9, "learning_rate": 2.955491183077066e-05, "loss": 2.0439, "step": 14131500 }, { "epoch": 40.91, "learning_rate": 2.9554188183123383e-05, "loss": 2.0379, "step": 14132000 }, { "epoch": 40.91, "learning_rate": 2.9553464535476105e-05, "loss": 2.0475, "step": 14132500 }, { "epoch": 40.91, "learning_rate": 2.9552740887828827e-05, "loss": 2.0662, "step": 14133000 }, { "epoch": 40.91, "learning_rate": 2.955201724018155e-05, "loss": 2.0478, "step": 14133500 }, { "epoch": 40.91, "learning_rate": 2.9551293592534275e-05, "loss": 2.0467, "step": 14134000 }, { "epoch": 40.91, "learning_rate": 2.9550569944886998e-05, "loss": 2.0524, "step": 14134500 }, { "epoch": 40.92, "learning_rate": 2.9549847744535013e-05, "loss": 2.0786, "step": 14135000 }, { "epoch": 40.92, "learning_rate": 2.9549124096887735e-05, "loss": 2.0659, "step": 14135500 }, { "epoch": 40.92, "learning_rate": 2.954840044924046e-05, "loss": 2.0617, "step": 14136000 }, { "epoch": 40.92, "learning_rate": 2.9547676801593183e-05, "loss": 2.0655, "step": 14136500 }, { "epoch": 40.92, "learning_rate": 2.95469546012412e-05, "loss": 2.05, "step": 14137000 }, { "epoch": 40.92, "learning_rate": 2.9546230953593924e-05, "loss": 2.035, "step": 14137500 }, { "epoch": 40.92, "learning_rate": 2.9545507305946647e-05, "loss": 2.0333, "step": 14138000 }, { "epoch": 40.93, "learning_rate": 2.9544783658299376e-05, "loss": 2.0425, "step": 14138500 }, { "epoch": 40.93, "learning_rate": 2.9544060010652098e-05, "loss": 2.0759, "step": 14139000 }, { "epoch": 40.93, "learning_rate": 2.954333636300482e-05, "loss": 2.0426, "step": 14139500 }, { "epoch": 40.93, "learning_rate": 2.9542612715357542e-05, "loss": 2.0702, "step": 14140000 }, { "epoch": 40.93, "learning_rate": 2.9541889067710265e-05, "loss": 2.0498, "step": 14140500 }, { "epoch": 40.93, "learning_rate": 2.9541165420062987e-05, "loss": 2.0564, "step": 14141000 }, { "epoch": 40.93, "learning_rate": 2.9540443219711006e-05, "loss": 2.0458, "step": 14141500 }, { "epoch": 40.94, "learning_rate": 2.9539721019359025e-05, "loss": 2.0467, "step": 14142000 }, { "epoch": 40.94, "learning_rate": 2.9538997371711747e-05, "loss": 2.0302, "step": 14142500 }, { "epoch": 40.94, "learning_rate": 2.9538275171359762e-05, "loss": 2.0612, "step": 14143000 }, { "epoch": 40.94, "learning_rate": 2.9537551523712488e-05, "loss": 2.024, "step": 14143500 }, { "epoch": 40.94, "learning_rate": 2.953682787606521e-05, "loss": 2.034, "step": 14144000 }, { "epoch": 40.94, "learning_rate": 2.9536105675713226e-05, "loss": 2.0255, "step": 14144500 }, { "epoch": 40.94, "learning_rate": 2.953538202806595e-05, "loss": 2.0718, "step": 14145000 }, { "epoch": 40.95, "learning_rate": 2.9534658380418674e-05, "loss": 2.0298, "step": 14145500 }, { "epoch": 40.95, "learning_rate": 2.9533934732771396e-05, "loss": 2.0536, "step": 14146000 }, { "epoch": 40.95, "learning_rate": 2.9533211085124125e-05, "loss": 2.0415, "step": 14146500 }, { "epoch": 40.95, "learning_rate": 2.9532487437476847e-05, "loss": 2.0468, "step": 14147000 }, { "epoch": 40.95, "learning_rate": 2.953176378982957e-05, "loss": 2.0629, "step": 14147500 }, { "epoch": 40.95, "learning_rate": 2.953104014218229e-05, "loss": 2.043, "step": 14148000 }, { "epoch": 40.95, "learning_rate": 2.9530316494535014e-05, "loss": 2.0571, "step": 14148500 }, { "epoch": 40.96, "learning_rate": 2.952959284688774e-05, "loss": 2.0432, "step": 14149000 }, { "epoch": 40.96, "learning_rate": 2.9528869199240462e-05, "loss": 2.0612, "step": 14149500 }, { "epoch": 40.96, "learning_rate": 2.9528146998888477e-05, "loss": 2.0465, "step": 14150000 }, { "epoch": 40.96, "learning_rate": 2.9527424798536496e-05, "loss": 2.0516, "step": 14150500 }, { "epoch": 40.96, "learning_rate": 2.952670115088922e-05, "loss": 2.0598, "step": 14151000 }, { "epoch": 40.96, "learning_rate": 2.952597750324194e-05, "loss": 2.0683, "step": 14151500 }, { "epoch": 40.96, "learning_rate": 2.9525253855594663e-05, "loss": 2.0518, "step": 14152000 }, { "epoch": 40.97, "learning_rate": 2.952453020794739e-05, "loss": 2.0671, "step": 14152500 }, { "epoch": 40.97, "learning_rate": 2.952380656030011e-05, "loss": 2.0462, "step": 14153000 }, { "epoch": 40.97, "learning_rate": 2.952308291265284e-05, "loss": 2.0364, "step": 14153500 }, { "epoch": 40.97, "learning_rate": 2.9522359265005562e-05, "loss": 2.0587, "step": 14154000 }, { "epoch": 40.97, "learning_rate": 2.9521637064653578e-05, "loss": 2.0386, "step": 14154500 }, { "epoch": 40.97, "learning_rate": 2.9520914864301597e-05, "loss": 2.0444, "step": 14155000 }, { "epoch": 40.97, "learning_rate": 2.952019121665432e-05, "loss": 2.0791, "step": 14155500 }, { "epoch": 40.98, "learning_rate": 2.951946756900704e-05, "loss": 2.0472, "step": 14156000 }, { "epoch": 40.98, "learning_rate": 2.9518743921359767e-05, "loss": 2.0553, "step": 14156500 }, { "epoch": 40.98, "learning_rate": 2.951802027371249e-05, "loss": 2.0551, "step": 14157000 }, { "epoch": 40.98, "learning_rate": 2.951729662606521e-05, "loss": 2.0383, "step": 14157500 }, { "epoch": 40.98, "learning_rate": 2.9516574425713227e-05, "loss": 2.0629, "step": 14158000 }, { "epoch": 40.98, "learning_rate": 2.9515850778065952e-05, "loss": 2.0662, "step": 14158500 }, { "epoch": 40.98, "learning_rate": 2.9515127130418674e-05, "loss": 2.0357, "step": 14159000 }, { "epoch": 40.99, "learning_rate": 2.9514403482771397e-05, "loss": 2.0244, "step": 14159500 }, { "epoch": 40.99, "learning_rate": 2.951367983512412e-05, "loss": 2.0499, "step": 14160000 }, { "epoch": 40.99, "learning_rate": 2.951295618747684e-05, "loss": 2.0675, "step": 14160500 }, { "epoch": 40.99, "learning_rate": 2.9512232539829567e-05, "loss": 2.0503, "step": 14161000 }, { "epoch": 40.99, "learning_rate": 2.9511508892182292e-05, "loss": 2.065, "step": 14161500 }, { "epoch": 40.99, "learning_rate": 2.9510788139125605e-05, "loss": 2.0618, "step": 14162000 }, { "epoch": 40.99, "learning_rate": 2.9510065938773624e-05, "loss": 2.0629, "step": 14162500 }, { "epoch": 41.0, "learning_rate": 2.9509342291126346e-05, "loss": 2.0609, "step": 14163000 }, { "epoch": 41.0, "learning_rate": 2.9508618643479068e-05, "loss": 2.0239, "step": 14163500 }, { "epoch": 41.0, "learning_rate": 2.950789499583179e-05, "loss": 2.0627, "step": 14164000 }, { "epoch": 41.0, "eval_accuracy": 0.6716259497639389, "eval_accuracy_mlm": 0.6370840410644276, "eval_accuracy_nsp": 0.8568652523224851, "eval_loss": 2.169929027557373, "eval_runtime": 331.2679, "eval_samples_per_second": 1317.32, "eval_steps_per_second": 54.889, "step": 14164352 }, { "epoch": 41.0, "learning_rate": 2.9507171348184516e-05, "loss": 2.0252, "step": 14164500 }, { "epoch": 41.0, "learning_rate": 2.9506447700537238e-05, "loss": 2.0292, "step": 14165000 }, { "epoch": 41.0, "learning_rate": 2.950572405288996e-05, "loss": 2.0558, "step": 14165500 }, { "epoch": 41.0, "learning_rate": 2.9505000405242683e-05, "loss": 2.0259, "step": 14166000 }, { "epoch": 41.01, "learning_rate": 2.9504276757595405e-05, "loss": 2.0301, "step": 14166500 }, { "epoch": 41.01, "learning_rate": 2.950355310994813e-05, "loss": 2.038, "step": 14167000 }, { "epoch": 41.01, "learning_rate": 2.9502829462300853e-05, "loss": 2.0093, "step": 14167500 }, { "epoch": 41.01, "learning_rate": 2.9502105814653575e-05, "loss": 2.0305, "step": 14168000 }, { "epoch": 41.01, "learning_rate": 2.9501382167006297e-05, "loss": 2.0267, "step": 14168500 }, { "epoch": 41.01, "learning_rate": 2.9500658519359026e-05, "loss": 2.0314, "step": 14169000 }, { "epoch": 41.01, "learning_rate": 2.949993487171175e-05, "loss": 2.0196, "step": 14169500 }, { "epoch": 41.02, "learning_rate": 2.949921122406447e-05, "loss": 2.0248, "step": 14170000 }, { "epoch": 41.02, "learning_rate": 2.9498487576417193e-05, "loss": 2.0372, "step": 14170500 }, { "epoch": 41.02, "learning_rate": 2.9497766823360505e-05, "loss": 2.0416, "step": 14171000 }, { "epoch": 41.02, "learning_rate": 2.949704317571323e-05, "loss": 2.0442, "step": 14171500 }, { "epoch": 41.02, "learning_rate": 2.9496319528065953e-05, "loss": 2.0304, "step": 14172000 }, { "epoch": 41.02, "learning_rate": 2.9495595880418675e-05, "loss": 2.047, "step": 14172500 }, { "epoch": 41.03, "learning_rate": 2.949487368006669e-05, "loss": 2.0282, "step": 14173000 }, { "epoch": 41.03, "learning_rate": 2.9494150032419416e-05, "loss": 2.0355, "step": 14173500 }, { "epoch": 41.03, "learning_rate": 2.949342638477214e-05, "loss": 2.0174, "step": 14174000 }, { "epoch": 41.03, "learning_rate": 2.949270273712486e-05, "loss": 1.9953, "step": 14174500 }, { "epoch": 41.03, "learning_rate": 2.9491979089477583e-05, "loss": 2.0373, "step": 14175000 }, { "epoch": 41.03, "learning_rate": 2.9491255441830305e-05, "loss": 2.0278, "step": 14175500 }, { "epoch": 41.03, "learning_rate": 2.949053179418303e-05, "loss": 2.0333, "step": 14176000 }, { "epoch": 41.04, "learning_rate": 2.9489808146535757e-05, "loss": 2.0023, "step": 14176500 }, { "epoch": 41.04, "learning_rate": 2.9489084498888482e-05, "loss": 1.9903, "step": 14177000 }, { "epoch": 41.04, "learning_rate": 2.9488360851241204e-05, "loss": 2.0444, "step": 14177500 }, { "epoch": 41.04, "learning_rate": 2.9487637203593927e-05, "loss": 2.0268, "step": 14178000 }, { "epoch": 41.04, "learning_rate": 2.948691355594665e-05, "loss": 2.0275, "step": 14178500 }, { "epoch": 41.04, "learning_rate": 2.948618990829937e-05, "loss": 2.0297, "step": 14179000 }, { "epoch": 41.04, "learning_rate": 2.9485466260652093e-05, "loss": 2.0383, "step": 14179500 }, { "epoch": 41.05, "learning_rate": 2.948474261300482e-05, "loss": 2.0256, "step": 14180000 }, { "epoch": 41.05, "learning_rate": 2.948401896535754e-05, "loss": 2.0112, "step": 14180500 }, { "epoch": 41.05, "learning_rate": 2.9483295317710264e-05, "loss": 2.0475, "step": 14181000 }, { "epoch": 41.05, "learning_rate": 2.9482573117358282e-05, "loss": 2.0332, "step": 14181500 }, { "epoch": 41.05, "learning_rate": 2.9481849469711005e-05, "loss": 2.0527, "step": 14182000 }, { "epoch": 41.05, "learning_rate": 2.9481125822063727e-05, "loss": 2.0443, "step": 14182500 }, { "epoch": 41.05, "learning_rate": 2.948040217441645e-05, "loss": 2.0133, "step": 14183000 }, { "epoch": 41.06, "learning_rate": 2.9479678526769178e-05, "loss": 2.0414, "step": 14183500 }, { "epoch": 41.06, "learning_rate": 2.94789548791219e-05, "loss": 2.0291, "step": 14184000 }, { "epoch": 41.06, "learning_rate": 2.947823267876992e-05, "loss": 2.0167, "step": 14184500 }, { "epoch": 41.06, "learning_rate": 2.947750903112264e-05, "loss": 2.0188, "step": 14185000 }, { "epoch": 41.06, "learning_rate": 2.9476785383475364e-05, "loss": 2.0144, "step": 14185500 }, { "epoch": 41.06, "learning_rate": 2.9476061735828086e-05, "loss": 2.0406, "step": 14186000 }, { "epoch": 41.06, "learning_rate": 2.9475338088180808e-05, "loss": 2.0518, "step": 14186500 }, { "epoch": 41.07, "learning_rate": 2.9474614440533534e-05, "loss": 2.0462, "step": 14187000 }, { "epoch": 41.07, "learning_rate": 2.947389224018155e-05, "loss": 2.0521, "step": 14187500 }, { "epoch": 41.07, "learning_rate": 2.947316859253427e-05, "loss": 2.0598, "step": 14188000 }, { "epoch": 41.07, "learning_rate": 2.9472444944886994e-05, "loss": 2.0191, "step": 14188500 }, { "epoch": 41.07, "learning_rate": 2.947172129723972e-05, "loss": 2.0168, "step": 14189000 }, { "epoch": 41.07, "learning_rate": 2.9471000544183032e-05, "loss": 2.0318, "step": 14189500 }, { "epoch": 41.07, "learning_rate": 2.9470276896535754e-05, "loss": 2.0274, "step": 14190000 }, { "epoch": 41.08, "learning_rate": 2.9469553248888476e-05, "loss": 2.0667, "step": 14190500 }, { "epoch": 41.08, "learning_rate": 2.9468829601241205e-05, "loss": 2.0326, "step": 14191000 }, { "epoch": 41.08, "learning_rate": 2.9468105953593928e-05, "loss": 2.0261, "step": 14191500 }, { "epoch": 41.08, "learning_rate": 2.946738230594665e-05, "loss": 2.0441, "step": 14192000 }, { "epoch": 41.08, "learning_rate": 2.9466658658299372e-05, "loss": 2.0387, "step": 14192500 }, { "epoch": 41.08, "learning_rate": 2.9465935010652098e-05, "loss": 2.0509, "step": 14193000 }, { "epoch": 41.08, "learning_rate": 2.9465212810300113e-05, "loss": 2.0244, "step": 14193500 }, { "epoch": 41.09, "learning_rate": 2.9464489162652835e-05, "loss": 2.0403, "step": 14194000 }, { "epoch": 41.09, "learning_rate": 2.9463765515005558e-05, "loss": 2.0152, "step": 14194500 }, { "epoch": 41.09, "learning_rate": 2.9463041867358283e-05, "loss": 2.0724, "step": 14195000 }, { "epoch": 41.09, "learning_rate": 2.9462318219711005e-05, "loss": 2.0321, "step": 14195500 }, { "epoch": 41.09, "learning_rate": 2.9461594572063728e-05, "loss": 2.0344, "step": 14196000 }, { "epoch": 41.09, "learning_rate": 2.946087092441645e-05, "loss": 2.0308, "step": 14196500 }, { "epoch": 41.09, "learning_rate": 2.9460147276769172e-05, "loss": 2.0229, "step": 14197000 }, { "epoch": 41.1, "learning_rate": 2.9459423629121898e-05, "loss": 2.0373, "step": 14197500 }, { "epoch": 41.1, "learning_rate": 2.945869998147462e-05, "loss": 2.038, "step": 14198000 }, { "epoch": 41.1, "learning_rate": 2.9457979228417932e-05, "loss": 2.0205, "step": 14198500 }, { "epoch": 41.1, "learning_rate": 2.9457258475361248e-05, "loss": 2.054, "step": 14199000 }, { "epoch": 41.1, "learning_rate": 2.9456534827713974e-05, "loss": 2.0354, "step": 14199500 }, { "epoch": 41.1, "learning_rate": 2.9455811180066696e-05, "loss": 2.0404, "step": 14200000 }, { "epoch": 41.1, "learning_rate": 2.9455087532419418e-05, "loss": 2.0229, "step": 14200500 }, { "epoch": 41.11, "learning_rate": 2.945436388477214e-05, "loss": 2.0462, "step": 14201000 }, { "epoch": 41.11, "learning_rate": 2.9453640237124863e-05, "loss": 2.0287, "step": 14201500 }, { "epoch": 41.11, "learning_rate": 2.9452916589477585e-05, "loss": 2.0327, "step": 14202000 }, { "epoch": 41.11, "learning_rate": 2.945219294183031e-05, "loss": 2.0494, "step": 14202500 }, { "epoch": 41.11, "learning_rate": 2.9451470741478326e-05, "loss": 2.0361, "step": 14203000 }, { "epoch": 41.11, "learning_rate": 2.9450747093831048e-05, "loss": 2.0342, "step": 14203500 }, { "epoch": 41.11, "learning_rate": 2.945002344618377e-05, "loss": 2.0383, "step": 14204000 }, { "epoch": 41.12, "learning_rate": 2.9449299798536496e-05, "loss": 2.0197, "step": 14204500 }, { "epoch": 41.12, "learning_rate": 2.9448576150889218e-05, "loss": 2.0289, "step": 14205000 }, { "epoch": 41.12, "learning_rate": 2.944785250324194e-05, "loss": 2.0386, "step": 14205500 }, { "epoch": 41.12, "learning_rate": 2.9447128855594663e-05, "loss": 2.0362, "step": 14206000 }, { "epoch": 41.12, "learning_rate": 2.9446405207947392e-05, "loss": 2.0308, "step": 14206500 }, { "epoch": 41.12, "learning_rate": 2.9445681560300114e-05, "loss": 2.0423, "step": 14207000 }, { "epoch": 41.12, "learning_rate": 2.9444957912652836e-05, "loss": 2.0322, "step": 14207500 }, { "epoch": 41.13, "learning_rate": 2.9444235712300855e-05, "loss": 2.0588, "step": 14208000 }, { "epoch": 41.13, "learning_rate": 2.9443512064653577e-05, "loss": 2.0297, "step": 14208500 }, { "epoch": 41.13, "learning_rate": 2.94427884170063e-05, "loss": 2.0157, "step": 14209000 }, { "epoch": 41.13, "learning_rate": 2.9442064769359022e-05, "loss": 2.0294, "step": 14209500 }, { "epoch": 41.13, "learning_rate": 2.9441341121711747e-05, "loss": 2.0437, "step": 14210000 }, { "epoch": 41.13, "learning_rate": 2.944061747406447e-05, "loss": 2.0391, "step": 14210500 }, { "epoch": 41.14, "learning_rate": 2.9439893826417192e-05, "loss": 2.0404, "step": 14211000 }, { "epoch": 41.14, "learning_rate": 2.9439170178769914e-05, "loss": 2.0221, "step": 14211500 }, { "epoch": 41.14, "learning_rate": 2.9438446531122636e-05, "loss": 2.0401, "step": 14212000 }, { "epoch": 41.14, "learning_rate": 2.9437724330770655e-05, "loss": 2.0364, "step": 14212500 }, { "epoch": 41.14, "learning_rate": 2.9437000683123378e-05, "loss": 2.0488, "step": 14213000 }, { "epoch": 41.14, "learning_rate": 2.9436277035476107e-05, "loss": 2.0412, "step": 14213500 }, { "epoch": 41.14, "learning_rate": 2.943555338782883e-05, "loss": 2.0482, "step": 14214000 }, { "epoch": 41.15, "learning_rate": 2.9434831187476848e-05, "loss": 2.0316, "step": 14214500 }, { "epoch": 41.15, "learning_rate": 2.943410753982957e-05, "loss": 1.9984, "step": 14215000 }, { "epoch": 41.15, "learning_rate": 2.9433383892182292e-05, "loss": 2.0581, "step": 14215500 }, { "epoch": 41.15, "learning_rate": 2.943266169183031e-05, "loss": 2.0289, "step": 14216000 }, { "epoch": 41.15, "learning_rate": 2.9431938044183033e-05, "loss": 2.0222, "step": 14216500 }, { "epoch": 41.15, "learning_rate": 2.9431214396535756e-05, "loss": 2.0472, "step": 14217000 }, { "epoch": 41.15, "learning_rate": 2.9430490748888478e-05, "loss": 2.0593, "step": 14217500 }, { "epoch": 41.16, "learning_rate": 2.94297671012412e-05, "loss": 2.0209, "step": 14218000 }, { "epoch": 41.16, "learning_rate": 2.9429043453593926e-05, "loss": 2.038, "step": 14218500 }, { "epoch": 41.16, "learning_rate": 2.9428319805946648e-05, "loss": 2.0427, "step": 14219000 }, { "epoch": 41.16, "learning_rate": 2.9427597605594664e-05, "loss": 2.0403, "step": 14219500 }, { "epoch": 41.16, "learning_rate": 2.9426873957947386e-05, "loss": 2.0303, "step": 14220000 }, { "epoch": 41.16, "learning_rate": 2.942615031030011e-05, "loss": 2.0412, "step": 14220500 }, { "epoch": 41.16, "learning_rate": 2.9425426662652837e-05, "loss": 2.0537, "step": 14221000 }, { "epoch": 41.17, "learning_rate": 2.9424703015005563e-05, "loss": 2.031, "step": 14221500 }, { "epoch": 41.17, "learning_rate": 2.9423979367358285e-05, "loss": 2.0252, "step": 14222000 }, { "epoch": 41.17, "learning_rate": 2.9423255719711007e-05, "loss": 2.0444, "step": 14222500 }, { "epoch": 41.17, "learning_rate": 2.942253207206373e-05, "loss": 2.0232, "step": 14223000 }, { "epoch": 41.17, "learning_rate": 2.9421809871711748e-05, "loss": 2.0544, "step": 14223500 }, { "epoch": 41.17, "learning_rate": 2.942108622406447e-05, "loss": 2.0487, "step": 14224000 }, { "epoch": 41.17, "learning_rate": 2.9420362576417193e-05, "loss": 2.035, "step": 14224500 }, { "epoch": 41.18, "learning_rate": 2.9419638928769915e-05, "loss": 2.0461, "step": 14225000 }, { "epoch": 41.18, "learning_rate": 2.9418919623008524e-05, "loss": 2.0592, "step": 14225500 }, { "epoch": 41.18, "learning_rate": 2.9418195975361246e-05, "loss": 2.0388, "step": 14226000 }, { "epoch": 41.18, "learning_rate": 2.941747232771397e-05, "loss": 2.0365, "step": 14226500 }, { "epoch": 41.18, "learning_rate": 2.941674868006669e-05, "loss": 2.0408, "step": 14227000 }, { "epoch": 41.18, "learning_rate": 2.941602647971471e-05, "loss": 1.998, "step": 14227500 }, { "epoch": 41.18, "learning_rate": 2.9415302832067432e-05, "loss": 2.0579, "step": 14228000 }, { "epoch": 41.19, "learning_rate": 2.9414579184420154e-05, "loss": 2.0452, "step": 14228500 }, { "epoch": 41.19, "learning_rate": 2.9413855536772883e-05, "loss": 2.0192, "step": 14229000 }, { "epoch": 41.19, "learning_rate": 2.9413131889125605e-05, "loss": 2.0282, "step": 14229500 }, { "epoch": 41.19, "learning_rate": 2.9412409688773624e-05, "loss": 2.0577, "step": 14230000 }, { "epoch": 41.19, "learning_rate": 2.9411686041126346e-05, "loss": 2.0425, "step": 14230500 }, { "epoch": 41.19, "learning_rate": 2.941096239347907e-05, "loss": 2.0099, "step": 14231000 }, { "epoch": 41.19, "learning_rate": 2.941023874583179e-05, "loss": 2.0373, "step": 14231500 }, { "epoch": 41.2, "learning_rate": 2.9409515098184513e-05, "loss": 2.0134, "step": 14232000 }, { "epoch": 41.2, "learning_rate": 2.940879145053724e-05, "loss": 2.0508, "step": 14232500 }, { "epoch": 41.2, "learning_rate": 2.940806780288996e-05, "loss": 2.0442, "step": 14233000 }, { "epoch": 41.2, "learning_rate": 2.9407344155242683e-05, "loss": 2.0356, "step": 14233500 }, { "epoch": 41.2, "learning_rate": 2.9406620507595405e-05, "loss": 2.0477, "step": 14234000 }, { "epoch": 41.2, "learning_rate": 2.9405896859948128e-05, "loss": 2.0242, "step": 14234500 }, { "epoch": 41.2, "learning_rate": 2.9405173212300853e-05, "loss": 2.0227, "step": 14235000 }, { "epoch": 41.21, "learning_rate": 2.9404449564653576e-05, "loss": 2.057, "step": 14235500 }, { "epoch": 41.21, "learning_rate": 2.9403725917006298e-05, "loss": 2.0369, "step": 14236000 }, { "epoch": 41.21, "learning_rate": 2.940300371665432e-05, "loss": 2.0459, "step": 14236500 }, { "epoch": 41.21, "learning_rate": 2.9402280069007042e-05, "loss": 2.0356, "step": 14237000 }, { "epoch": 41.21, "learning_rate": 2.9401556421359765e-05, "loss": 2.0512, "step": 14237500 }, { "epoch": 41.21, "learning_rate": 2.940083277371249e-05, "loss": 2.0497, "step": 14238000 }, { "epoch": 41.21, "learning_rate": 2.9400109126065212e-05, "loss": 2.0373, "step": 14238500 }, { "epoch": 41.22, "learning_rate": 2.9399385478417935e-05, "loss": 2.0159, "step": 14239000 }, { "epoch": 41.22, "learning_rate": 2.9398661830770657e-05, "loss": 2.0199, "step": 14239500 }, { "epoch": 41.22, "learning_rate": 2.939793818312338e-05, "loss": 2.0493, "step": 14240000 }, { "epoch": 41.22, "learning_rate": 2.93972145354761e-05, "loss": 2.0372, "step": 14240500 }, { "epoch": 41.22, "learning_rate": 2.9396490887828827e-05, "loss": 2.0532, "step": 14241000 }, { "epoch": 41.22, "learning_rate": 2.939577013477214e-05, "loss": 2.0465, "step": 14241500 }, { "epoch": 41.22, "learning_rate": 2.939504648712486e-05, "loss": 2.0258, "step": 14242000 }, { "epoch": 41.23, "learning_rate": 2.9394322839477584e-05, "loss": 2.0057, "step": 14242500 }, { "epoch": 41.23, "learning_rate": 2.9393599191830306e-05, "loss": 2.0447, "step": 14243000 }, { "epoch": 41.23, "learning_rate": 2.9392875544183028e-05, "loss": 2.0533, "step": 14243500 }, { "epoch": 41.23, "learning_rate": 2.9392151896535757e-05, "loss": 2.006, "step": 14244000 }, { "epoch": 41.23, "learning_rate": 2.939142824888848e-05, "loss": 2.0251, "step": 14244500 }, { "epoch": 41.23, "learning_rate": 2.9390704601241205e-05, "loss": 2.0238, "step": 14245000 }, { "epoch": 41.23, "learning_rate": 2.9389980953593927e-05, "loss": 2.0268, "step": 14245500 }, { "epoch": 41.24, "learning_rate": 2.938925730594665e-05, "loss": 2.0337, "step": 14246000 }, { "epoch": 41.24, "learning_rate": 2.9388533658299372e-05, "loss": 2.0455, "step": 14246500 }, { "epoch": 41.24, "learning_rate": 2.9387812905242684e-05, "loss": 2.0372, "step": 14247000 }, { "epoch": 41.24, "learning_rate": 2.9387089257595406e-05, "loss": 2.0379, "step": 14247500 }, { "epoch": 41.24, "learning_rate": 2.938636560994813e-05, "loss": 2.0414, "step": 14248000 }, { "epoch": 41.24, "learning_rate": 2.9385641962300854e-05, "loss": 2.0365, "step": 14248500 }, { "epoch": 41.25, "learning_rate": 2.9384918314653576e-05, "loss": 2.0531, "step": 14249000 }, { "epoch": 41.25, "learning_rate": 2.93841946670063e-05, "loss": 2.0188, "step": 14249500 }, { "epoch": 41.25, "learning_rate": 2.938347101935902e-05, "loss": 2.0293, "step": 14250000 }, { "epoch": 41.25, "learning_rate": 2.9382747371711743e-05, "loss": 2.0461, "step": 14250500 }, { "epoch": 41.25, "learning_rate": 2.9382023724064472e-05, "loss": 2.0478, "step": 14251000 }, { "epoch": 41.25, "learning_rate": 2.938130152371249e-05, "loss": 2.024, "step": 14251500 }, { "epoch": 41.25, "learning_rate": 2.9380577876065213e-05, "loss": 2.04, "step": 14252000 }, { "epoch": 41.26, "learning_rate": 2.9379854228417935e-05, "loss": 2.0208, "step": 14252500 }, { "epoch": 41.26, "learning_rate": 2.9379130580770658e-05, "loss": 2.0497, "step": 14253000 }, { "epoch": 41.26, "learning_rate": 2.937840693312338e-05, "loss": 2.0463, "step": 14253500 }, { "epoch": 41.26, "learning_rate": 2.9377683285476106e-05, "loss": 2.0468, "step": 14254000 }, { "epoch": 41.26, "learning_rate": 2.9376959637828828e-05, "loss": 2.013, "step": 14254500 }, { "epoch": 41.26, "learning_rate": 2.937623599018155e-05, "loss": 2.0199, "step": 14255000 }, { "epoch": 41.26, "learning_rate": 2.9375512342534272e-05, "loss": 2.037, "step": 14255500 }, { "epoch": 41.27, "learning_rate": 2.937479014218229e-05, "loss": 2.0414, "step": 14256000 }, { "epoch": 41.27, "learning_rate": 2.9374066494535013e-05, "loss": 2.0209, "step": 14256500 }, { "epoch": 41.27, "learning_rate": 2.9373342846887736e-05, "loss": 2.0469, "step": 14257000 }, { "epoch": 41.27, "learning_rate": 2.9372619199240458e-05, "loss": 2.0345, "step": 14257500 }, { "epoch": 41.27, "learning_rate": 2.9371896998888477e-05, "loss": 2.0496, "step": 14258000 }, { "epoch": 41.27, "learning_rate": 2.9371173351241206e-05, "loss": 2.0342, "step": 14258500 }, { "epoch": 41.27, "learning_rate": 2.9370449703593928e-05, "loss": 2.0495, "step": 14259000 }, { "epoch": 41.28, "learning_rate": 2.936972605594665e-05, "loss": 2.0525, "step": 14259500 }, { "epoch": 41.28, "learning_rate": 2.936900385559467e-05, "loss": 2.0451, "step": 14260000 }, { "epoch": 41.28, "learning_rate": 2.9368281655242685e-05, "loss": 2.0404, "step": 14260500 }, { "epoch": 41.28, "learning_rate": 2.9367558007595407e-05, "loss": 2.0594, "step": 14261000 }, { "epoch": 41.28, "learning_rate": 2.9366834359948133e-05, "loss": 2.027, "step": 14261500 }, { "epoch": 41.28, "learning_rate": 2.9366110712300855e-05, "loss": 2.0096, "step": 14262000 }, { "epoch": 41.28, "learning_rate": 2.9365387064653577e-05, "loss": 2.0484, "step": 14262500 }, { "epoch": 41.29, "learning_rate": 2.9364664864301593e-05, "loss": 2.0154, "step": 14263000 }, { "epoch": 41.29, "learning_rate": 2.936394121665432e-05, "loss": 2.0449, "step": 14263500 }, { "epoch": 41.29, "learning_rate": 2.936321756900704e-05, "loss": 2.0524, "step": 14264000 }, { "epoch": 41.29, "learning_rate": 2.9362493921359763e-05, "loss": 2.0547, "step": 14264500 }, { "epoch": 41.29, "learning_rate": 2.9361770273712485e-05, "loss": 2.0217, "step": 14265000 }, { "epoch": 41.29, "learning_rate": 2.9361048073360504e-05, "loss": 2.0209, "step": 14265500 }, { "epoch": 41.29, "learning_rate": 2.9360324425713226e-05, "loss": 2.0519, "step": 14266000 }, { "epoch": 41.3, "learning_rate": 2.9359600778065955e-05, "loss": 2.0369, "step": 14266500 }, { "epoch": 41.3, "learning_rate": 2.935887857771397e-05, "loss": 2.0461, "step": 14267000 }, { "epoch": 41.3, "learning_rate": 2.9358154930066696e-05, "loss": 2.0258, "step": 14267500 }, { "epoch": 41.3, "learning_rate": 2.935743128241942e-05, "loss": 2.0282, "step": 14268000 }, { "epoch": 41.3, "learning_rate": 2.935670763477214e-05, "loss": 2.0376, "step": 14268500 }, { "epoch": 41.3, "learning_rate": 2.9355983987124863e-05, "loss": 2.0437, "step": 14269000 }, { "epoch": 41.3, "learning_rate": 2.9355260339477585e-05, "loss": 2.0486, "step": 14269500 }, { "epoch": 41.31, "learning_rate": 2.9354536691830308e-05, "loss": 2.0384, "step": 14270000 }, { "epoch": 41.31, "learning_rate": 2.9353813044183033e-05, "loss": 2.0348, "step": 14270500 }, { "epoch": 41.31, "learning_rate": 2.9353089396535755e-05, "loss": 2.0374, "step": 14271000 }, { "epoch": 41.31, "learning_rate": 2.9352365748888478e-05, "loss": 2.0285, "step": 14271500 }, { "epoch": 41.31, "learning_rate": 2.9351643548536493e-05, "loss": 2.0374, "step": 14272000 }, { "epoch": 41.31, "learning_rate": 2.935091990088922e-05, "loss": 2.0249, "step": 14272500 }, { "epoch": 41.31, "learning_rate": 2.935019625324194e-05, "loss": 2.0331, "step": 14273000 }, { "epoch": 41.32, "learning_rate": 2.9349472605594663e-05, "loss": 2.0448, "step": 14273500 }, { "epoch": 41.32, "learning_rate": 2.9348748957947392e-05, "loss": 2.026, "step": 14274000 }, { "epoch": 41.32, "learning_rate": 2.9348025310300115e-05, "loss": 2.0271, "step": 14274500 }, { "epoch": 41.32, "learning_rate": 2.9347303109948133e-05, "loss": 2.0428, "step": 14275000 }, { "epoch": 41.32, "learning_rate": 2.9346579462300856e-05, "loss": 2.037, "step": 14275500 }, { "epoch": 41.32, "learning_rate": 2.9345855814653578e-05, "loss": 2.0638, "step": 14276000 }, { "epoch": 41.32, "learning_rate": 2.93451321670063e-05, "loss": 2.0037, "step": 14276500 }, { "epoch": 41.33, "learning_rate": 2.9344408519359022e-05, "loss": 2.0691, "step": 14277000 }, { "epoch": 41.33, "learning_rate": 2.9343684871711745e-05, "loss": 2.0482, "step": 14277500 }, { "epoch": 41.33, "learning_rate": 2.934296122406447e-05, "loss": 2.0496, "step": 14278000 }, { "epoch": 41.33, "learning_rate": 2.9342237576417193e-05, "loss": 2.0805, "step": 14278500 }, { "epoch": 41.33, "learning_rate": 2.9341515376065208e-05, "loss": 2.0443, "step": 14279000 }, { "epoch": 41.33, "learning_rate": 2.9340791728417934e-05, "loss": 2.0266, "step": 14279500 }, { "epoch": 41.33, "learning_rate": 2.9340068080770656e-05, "loss": 2.0351, "step": 14280000 }, { "epoch": 41.34, "learning_rate": 2.933934588041867e-05, "loss": 2.0433, "step": 14280500 }, { "epoch": 41.34, "learning_rate": 2.9338622232771397e-05, "loss": 2.0438, "step": 14281000 }, { "epoch": 41.34, "learning_rate": 2.933790003241942e-05, "loss": 2.0319, "step": 14281500 }, { "epoch": 41.34, "learning_rate": 2.933717638477214e-05, "loss": 2.0304, "step": 14282000 }, { "epoch": 41.34, "learning_rate": 2.9336452737124864e-05, "loss": 2.0494, "step": 14282500 }, { "epoch": 41.34, "learning_rate": 2.9335729089477586e-05, "loss": 2.0238, "step": 14283000 }, { "epoch": 41.34, "learning_rate": 2.933500544183031e-05, "loss": 2.0537, "step": 14283500 }, { "epoch": 41.35, "learning_rate": 2.9334281794183034e-05, "loss": 2.0578, "step": 14284000 }, { "epoch": 41.35, "learning_rate": 2.9333558146535756e-05, "loss": 2.0221, "step": 14284500 }, { "epoch": 41.35, "learning_rate": 2.933283449888848e-05, "loss": 2.0348, "step": 14285000 }, { "epoch": 41.35, "learning_rate": 2.93321108512412e-05, "loss": 2.0488, "step": 14285500 }, { "epoch": 41.35, "learning_rate": 2.9331387203593923e-05, "loss": 2.0376, "step": 14286000 }, { "epoch": 41.35, "learning_rate": 2.933066355594665e-05, "loss": 2.035, "step": 14286500 }, { "epoch": 41.36, "learning_rate": 2.932993990829937e-05, "loss": 2.0295, "step": 14287000 }, { "epoch": 41.36, "learning_rate": 2.9329217707947386e-05, "loss": 2.0351, "step": 14287500 }, { "epoch": 41.36, "learning_rate": 2.932849406030011e-05, "loss": 2.0455, "step": 14288000 }, { "epoch": 41.36, "learning_rate": 2.9327770412652838e-05, "loss": 2.0319, "step": 14288500 }, { "epoch": 41.36, "learning_rate": 2.932704676500556e-05, "loss": 2.0595, "step": 14289000 }, { "epoch": 41.36, "learning_rate": 2.932632456465358e-05, "loss": 2.0605, "step": 14289500 }, { "epoch": 41.36, "learning_rate": 2.93256009170063e-05, "loss": 2.0645, "step": 14290000 }, { "epoch": 41.37, "learning_rate": 2.9324877269359023e-05, "loss": 2.0317, "step": 14290500 }, { "epoch": 41.37, "learning_rate": 2.932415362171175e-05, "loss": 2.0304, "step": 14291000 }, { "epoch": 41.37, "learning_rate": 2.932342997406447e-05, "loss": 2.0435, "step": 14291500 }, { "epoch": 41.37, "learning_rate": 2.9322706326417193e-05, "loss": 2.0313, "step": 14292000 }, { "epoch": 41.37, "learning_rate": 2.9321982678769916e-05, "loss": 2.023, "step": 14292500 }, { "epoch": 41.37, "learning_rate": 2.9321259031122638e-05, "loss": 2.0247, "step": 14293000 }, { "epoch": 41.37, "learning_rate": 2.932053538347536e-05, "loss": 2.0413, "step": 14293500 }, { "epoch": 41.38, "learning_rate": 2.9319811735828086e-05, "loss": 2.0438, "step": 14294000 }, { "epoch": 41.38, "learning_rate": 2.9319088088180808e-05, "loss": 2.0204, "step": 14294500 }, { "epoch": 41.38, "learning_rate": 2.931836444053353e-05, "loss": 2.033, "step": 14295000 }, { "epoch": 41.38, "learning_rate": 2.9317640792886252e-05, "loss": 2.0498, "step": 14295500 }, { "epoch": 41.38, "learning_rate": 2.931691714523898e-05, "loss": 2.0341, "step": 14296000 }, { "epoch": 41.38, "learning_rate": 2.9316193497591704e-05, "loss": 2.0471, "step": 14296500 }, { "epoch": 41.38, "learning_rate": 2.9315471297239723e-05, "loss": 2.0409, "step": 14297000 }, { "epoch": 41.39, "learning_rate": 2.9314749096887738e-05, "loss": 2.0417, "step": 14297500 }, { "epoch": 41.39, "learning_rate": 2.9314026896535757e-05, "loss": 2.0365, "step": 14298000 }, { "epoch": 41.39, "learning_rate": 2.931330324888848e-05, "loss": 2.0633, "step": 14298500 }, { "epoch": 41.39, "learning_rate": 2.9312581048536498e-05, "loss": 2.04, "step": 14299000 }, { "epoch": 41.39, "learning_rate": 2.931185740088922e-05, "loss": 2.0437, "step": 14299500 }, { "epoch": 41.39, "learning_rate": 2.9311133753241943e-05, "loss": 2.0411, "step": 14300000 }, { "epoch": 41.39, "learning_rate": 2.9310410105594665e-05, "loss": 2.0459, "step": 14300500 }, { "epoch": 41.4, "learning_rate": 2.9309686457947387e-05, "loss": 2.0326, "step": 14301000 }, { "epoch": 41.4, "learning_rate": 2.9308962810300113e-05, "loss": 2.0201, "step": 14301500 }, { "epoch": 41.4, "learning_rate": 2.9308239162652835e-05, "loss": 2.0523, "step": 14302000 }, { "epoch": 41.4, "learning_rate": 2.9307515515005557e-05, "loss": 2.0617, "step": 14302500 }, { "epoch": 41.4, "learning_rate": 2.930679186735828e-05, "loss": 2.0404, "step": 14303000 }, { "epoch": 41.4, "learning_rate": 2.930606821971101e-05, "loss": 2.0448, "step": 14303500 }, { "epoch": 41.4, "learning_rate": 2.930534457206373e-05, "loss": 2.0318, "step": 14304000 }, { "epoch": 41.41, "learning_rate": 2.9304620924416453e-05, "loss": 2.0434, "step": 14304500 }, { "epoch": 41.41, "learning_rate": 2.9303898724064472e-05, "loss": 2.0243, "step": 14305000 }, { "epoch": 41.41, "learning_rate": 2.9303175076417194e-05, "loss": 2.0184, "step": 14305500 }, { "epoch": 41.41, "learning_rate": 2.9302451428769916e-05, "loss": 2.0571, "step": 14306000 }, { "epoch": 41.41, "learning_rate": 2.930172778112264e-05, "loss": 2.047, "step": 14306500 }, { "epoch": 41.41, "learning_rate": 2.9301004133475364e-05, "loss": 2.0488, "step": 14307000 }, { "epoch": 41.41, "learning_rate": 2.930028193312338e-05, "loss": 2.0491, "step": 14307500 }, { "epoch": 41.42, "learning_rate": 2.9299558285476102e-05, "loss": 2.0362, "step": 14308000 }, { "epoch": 41.42, "learning_rate": 2.9298834637828824e-05, "loss": 2.0454, "step": 14308500 }, { "epoch": 41.42, "learning_rate": 2.929811099018155e-05, "loss": 2.0211, "step": 14309000 }, { "epoch": 41.42, "learning_rate": 2.9297388789829565e-05, "loss": 2.0288, "step": 14309500 }, { "epoch": 41.42, "learning_rate": 2.9296665142182288e-05, "loss": 2.0461, "step": 14310000 }, { "epoch": 41.42, "learning_rate": 2.9295941494535013e-05, "loss": 2.0501, "step": 14310500 }, { "epoch": 41.42, "learning_rate": 2.929521784688774e-05, "loss": 2.053, "step": 14311000 }, { "epoch": 41.43, "learning_rate": 2.9294494199240464e-05, "loss": 2.0349, "step": 14311500 }, { "epoch": 41.43, "learning_rate": 2.9293770551593187e-05, "loss": 2.0163, "step": 14312000 }, { "epoch": 41.43, "learning_rate": 2.929304690394591e-05, "loss": 2.0398, "step": 14312500 }, { "epoch": 41.43, "learning_rate": 2.929232325629863e-05, "loss": 2.0515, "step": 14313000 }, { "epoch": 41.43, "learning_rate": 2.9291602503241943e-05, "loss": 2.0653, "step": 14313500 }, { "epoch": 41.43, "learning_rate": 2.9290878855594666e-05, "loss": 2.0257, "step": 14314000 }, { "epoch": 41.43, "learning_rate": 2.9290155207947388e-05, "loss": 2.053, "step": 14314500 }, { "epoch": 41.44, "learning_rate": 2.9289431560300114e-05, "loss": 2.0509, "step": 14315000 }, { "epoch": 41.44, "learning_rate": 2.9288707912652836e-05, "loss": 2.0454, "step": 14315500 }, { "epoch": 41.44, "learning_rate": 2.9287984265005558e-05, "loss": 2.0403, "step": 14316000 }, { "epoch": 41.44, "learning_rate": 2.928726061735828e-05, "loss": 2.0402, "step": 14316500 }, { "epoch": 41.44, "learning_rate": 2.92865384170063e-05, "loss": 2.0563, "step": 14317000 }, { "epoch": 41.44, "learning_rate": 2.928581476935902e-05, "loss": 2.047, "step": 14317500 }, { "epoch": 41.44, "learning_rate": 2.9285091121711744e-05, "loss": 2.0438, "step": 14318000 }, { "epoch": 41.45, "learning_rate": 2.9284367474064473e-05, "loss": 2.0282, "step": 14318500 }, { "epoch": 41.45, "learning_rate": 2.9283643826417195e-05, "loss": 2.037, "step": 14319000 }, { "epoch": 41.45, "learning_rate": 2.9282921626065214e-05, "loss": 2.0395, "step": 14319500 }, { "epoch": 41.45, "learning_rate": 2.9282197978417936e-05, "loss": 2.0538, "step": 14320000 }, { "epoch": 41.45, "learning_rate": 2.9281474330770658e-05, "loss": 2.0411, "step": 14320500 }, { "epoch": 41.45, "learning_rate": 2.928075068312338e-05, "loss": 2.0349, "step": 14321000 }, { "epoch": 41.45, "learning_rate": 2.92800284827714e-05, "loss": 2.0534, "step": 14321500 }, { "epoch": 41.46, "learning_rate": 2.9279304835124122e-05, "loss": 2.05, "step": 14322000 }, { "epoch": 41.46, "learning_rate": 2.9278581187476844e-05, "loss": 2.0603, "step": 14322500 }, { "epoch": 41.46, "learning_rate": 2.9277857539829566e-05, "loss": 2.028, "step": 14323000 }, { "epoch": 41.46, "learning_rate": 2.9277133892182292e-05, "loss": 2.0426, "step": 14323500 }, { "epoch": 41.46, "learning_rate": 2.9276410244535014e-05, "loss": 2.0443, "step": 14324000 }, { "epoch": 41.46, "learning_rate": 2.9275686596887736e-05, "loss": 2.0083, "step": 14324500 }, { "epoch": 41.47, "learning_rate": 2.927496294924046e-05, "loss": 2.0339, "step": 14325000 }, { "epoch": 41.47, "learning_rate": 2.927423930159318e-05, "loss": 2.0641, "step": 14325500 }, { "epoch": 41.47, "learning_rate": 2.927351565394591e-05, "loss": 2.0424, "step": 14326000 }, { "epoch": 41.47, "learning_rate": 2.9272792006298632e-05, "loss": 2.0721, "step": 14326500 }, { "epoch": 41.47, "learning_rate": 2.9272068358651354e-05, "loss": 2.0216, "step": 14327000 }, { "epoch": 41.47, "learning_rate": 2.927134471100408e-05, "loss": 2.0538, "step": 14327500 }, { "epoch": 41.47, "learning_rate": 2.9270621063356802e-05, "loss": 2.0325, "step": 14328000 }, { "epoch": 41.48, "learning_rate": 2.9269898863004818e-05, "loss": 2.0353, "step": 14328500 }, { "epoch": 41.48, "learning_rate": 2.9269176662652837e-05, "loss": 2.0513, "step": 14329000 }, { "epoch": 41.48, "learning_rate": 2.926845301500556e-05, "loss": 2.0435, "step": 14329500 }, { "epoch": 41.48, "learning_rate": 2.926772936735828e-05, "loss": 2.0423, "step": 14330000 }, { "epoch": 41.48, "learning_rate": 2.9267005719711003e-05, "loss": 2.0119, "step": 14330500 }, { "epoch": 41.48, "learning_rate": 2.9266283519359022e-05, "loss": 2.0307, "step": 14331000 }, { "epoch": 41.48, "learning_rate": 2.9265559871711744e-05, "loss": 2.0332, "step": 14331500 }, { "epoch": 41.49, "learning_rate": 2.9264836224064467e-05, "loss": 2.0231, "step": 14332000 }, { "epoch": 41.49, "learning_rate": 2.9264112576417192e-05, "loss": 2.0426, "step": 14332500 }, { "epoch": 41.49, "learning_rate": 2.9263388928769915e-05, "loss": 2.0437, "step": 14333000 }, { "epoch": 41.49, "learning_rate": 2.9262665281122644e-05, "loss": 2.0333, "step": 14333500 }, { "epoch": 41.49, "learning_rate": 2.9261941633475366e-05, "loss": 2.016, "step": 14334000 }, { "epoch": 41.49, "learning_rate": 2.926121943312338e-05, "loss": 2.0469, "step": 14334500 }, { "epoch": 41.49, "learning_rate": 2.92604972327714e-05, "loss": 2.0355, "step": 14335000 }, { "epoch": 41.5, "learning_rate": 2.9259773585124123e-05, "loss": 2.0485, "step": 14335500 }, { "epoch": 41.5, "learning_rate": 2.925905138477214e-05, "loss": 2.049, "step": 14336000 }, { "epoch": 41.5, "learning_rate": 2.9258327737124864e-05, "loss": 2.0482, "step": 14336500 }, { "epoch": 41.5, "learning_rate": 2.9257604089477586e-05, "loss": 2.0294, "step": 14337000 }, { "epoch": 41.5, "learning_rate": 2.9256880441830308e-05, "loss": 2.0452, "step": 14337500 }, { "epoch": 41.5, "learning_rate": 2.925615679418303e-05, "loss": 2.0598, "step": 14338000 }, { "epoch": 41.5, "learning_rate": 2.9255433146535756e-05, "loss": 2.0569, "step": 14338500 }, { "epoch": 41.51, "learning_rate": 2.9254709498888478e-05, "loss": 2.0297, "step": 14339000 }, { "epoch": 41.51, "learning_rate": 2.92539858512412e-05, "loss": 2.0164, "step": 14339500 }, { "epoch": 41.51, "learning_rate": 2.9253262203593923e-05, "loss": 2.052, "step": 14340000 }, { "epoch": 41.51, "learning_rate": 2.9252538555946645e-05, "loss": 2.032, "step": 14340500 }, { "epoch": 41.51, "learning_rate": 2.9251814908299374e-05, "loss": 2.0693, "step": 14341000 }, { "epoch": 41.51, "learning_rate": 2.9251091260652096e-05, "loss": 2.0412, "step": 14341500 }, { "epoch": 41.51, "learning_rate": 2.925036761300482e-05, "loss": 2.0666, "step": 14342000 }, { "epoch": 41.52, "learning_rate": 2.9249643965357544e-05, "loss": 2.0396, "step": 14342500 }, { "epoch": 41.52, "learning_rate": 2.9248920317710266e-05, "loss": 2.0485, "step": 14343000 }, { "epoch": 41.52, "learning_rate": 2.924819667006299e-05, "loss": 2.0361, "step": 14343500 }, { "epoch": 41.52, "learning_rate": 2.9247474469711007e-05, "loss": 2.0338, "step": 14344000 }, { "epoch": 41.52, "learning_rate": 2.924675082206373e-05, "loss": 2.0413, "step": 14344500 }, { "epoch": 41.52, "learning_rate": 2.9246027174416452e-05, "loss": 2.0466, "step": 14345000 }, { "epoch": 41.52, "learning_rate": 2.9245304974064467e-05, "loss": 2.0298, "step": 14345500 }, { "epoch": 41.53, "learning_rate": 2.9244581326417193e-05, "loss": 2.0153, "step": 14346000 }, { "epoch": 41.53, "learning_rate": 2.9243857678769915e-05, "loss": 2.0309, "step": 14346500 }, { "epoch": 41.53, "learning_rate": 2.9243134031122638e-05, "loss": 2.0422, "step": 14347000 }, { "epoch": 41.53, "learning_rate": 2.924241038347536e-05, "loss": 2.0332, "step": 14347500 }, { "epoch": 41.53, "learning_rate": 2.9241686735828082e-05, "loss": 2.0348, "step": 14348000 }, { "epoch": 41.53, "learning_rate": 2.924096308818081e-05, "loss": 2.0491, "step": 14348500 }, { "epoch": 41.53, "learning_rate": 2.9240239440533533e-05, "loss": 2.0771, "step": 14349000 }, { "epoch": 41.54, "learning_rate": 2.923951579288626e-05, "loss": 2.0481, "step": 14349500 }, { "epoch": 41.54, "learning_rate": 2.923879214523898e-05, "loss": 2.0323, "step": 14350000 }, { "epoch": 41.54, "learning_rate": 2.9238068497591703e-05, "loss": 2.0176, "step": 14350500 }, { "epoch": 41.54, "learning_rate": 2.9237344849944426e-05, "loss": 2.0472, "step": 14351000 }, { "epoch": 41.54, "learning_rate": 2.9236621202297148e-05, "loss": 2.0433, "step": 14351500 }, { "epoch": 41.54, "learning_rate": 2.923589755464987e-05, "loss": 2.0395, "step": 14352000 }, { "epoch": 41.54, "learning_rate": 2.9235173907002596e-05, "loss": 2.0821, "step": 14352500 }, { "epoch": 41.55, "learning_rate": 2.9234453153945908e-05, "loss": 2.0117, "step": 14353000 }, { "epoch": 41.55, "learning_rate": 2.923372950629863e-05, "loss": 2.0426, "step": 14353500 }, { "epoch": 41.55, "learning_rate": 2.9233005858651352e-05, "loss": 2.0344, "step": 14354000 }, { "epoch": 41.55, "learning_rate": 2.9232282211004075e-05, "loss": 2.0576, "step": 14354500 }, { "epoch": 41.55, "learning_rate": 2.9231558563356797e-05, "loss": 2.045, "step": 14355000 }, { "epoch": 41.55, "learning_rate": 2.923083491570952e-05, "loss": 2.0237, "step": 14355500 }, { "epoch": 41.55, "learning_rate": 2.9230111268062248e-05, "loss": 2.0346, "step": 14356000 }, { "epoch": 41.56, "learning_rate": 2.922938762041497e-05, "loss": 2.0149, "step": 14356500 }, { "epoch": 41.56, "learning_rate": 2.9228663972767696e-05, "loss": 2.0313, "step": 14357000 }, { "epoch": 41.56, "learning_rate": 2.922794177241571e-05, "loss": 2.0446, "step": 14357500 }, { "epoch": 41.56, "learning_rate": 2.9227218124768434e-05, "loss": 2.0393, "step": 14358000 }, { "epoch": 41.56, "learning_rate": 2.9226495924416453e-05, "loss": 2.0295, "step": 14358500 }, { "epoch": 41.56, "learning_rate": 2.922577372406447e-05, "loss": 2.0366, "step": 14359000 }, { "epoch": 41.56, "learning_rate": 2.9225050076417194e-05, "loss": 2.0443, "step": 14359500 }, { "epoch": 41.57, "learning_rate": 2.9224326428769916e-05, "loss": 2.042, "step": 14360000 }, { "epoch": 41.57, "learning_rate": 2.922360278112264e-05, "loss": 2.0538, "step": 14360500 }, { "epoch": 41.57, "learning_rate": 2.922287913347536e-05, "loss": 2.0299, "step": 14361000 }, { "epoch": 41.57, "learning_rate": 2.922215693312338e-05, "loss": 2.0487, "step": 14361500 }, { "epoch": 41.57, "learning_rate": 2.9221433285476102e-05, "loss": 2.0313, "step": 14362000 }, { "epoch": 41.57, "learning_rate": 2.9220709637828824e-05, "loss": 2.0435, "step": 14362500 }, { "epoch": 41.58, "learning_rate": 2.9219985990181546e-05, "loss": 2.023, "step": 14363000 }, { "epoch": 41.58, "learning_rate": 2.9219262342534275e-05, "loss": 2.0393, "step": 14363500 }, { "epoch": 41.58, "learning_rate": 2.9218538694886997e-05, "loss": 2.052, "step": 14364000 }, { "epoch": 41.58, "learning_rate": 2.9217815047239723e-05, "loss": 2.0229, "step": 14364500 }, { "epoch": 41.58, "learning_rate": 2.9217091399592445e-05, "loss": 2.0286, "step": 14365000 }, { "epoch": 41.58, "learning_rate": 2.9216367751945168e-05, "loss": 2.0452, "step": 14365500 }, { "epoch": 41.58, "learning_rate": 2.921564410429789e-05, "loss": 2.0486, "step": 14366000 }, { "epoch": 41.59, "learning_rate": 2.921492190394591e-05, "loss": 2.038, "step": 14366500 }, { "epoch": 41.59, "learning_rate": 2.921419825629863e-05, "loss": 2.0712, "step": 14367000 }, { "epoch": 41.59, "learning_rate": 2.9213474608651353e-05, "loss": 2.0336, "step": 14367500 }, { "epoch": 41.59, "learning_rate": 2.9212750961004075e-05, "loss": 2.0522, "step": 14368000 }, { "epoch": 41.59, "learning_rate": 2.9212028760652094e-05, "loss": 2.034, "step": 14368500 }, { "epoch": 41.59, "learning_rate": 2.9211305113004817e-05, "loss": 2.0417, "step": 14369000 }, { "epoch": 41.59, "learning_rate": 2.921058146535754e-05, "loss": 2.048, "step": 14369500 }, { "epoch": 41.6, "learning_rate": 2.920985781771026e-05, "loss": 2.046, "step": 14370000 }, { "epoch": 41.6, "learning_rate": 2.9209134170062987e-05, "loss": 2.0638, "step": 14370500 }, { "epoch": 41.6, "learning_rate": 2.9208410522415712e-05, "loss": 2.0396, "step": 14371000 }, { "epoch": 41.6, "learning_rate": 2.920768832206373e-05, "loss": 2.0366, "step": 14371500 }, { "epoch": 41.6, "learning_rate": 2.9206964674416454e-05, "loss": 2.0301, "step": 14372000 }, { "epoch": 41.6, "learning_rate": 2.9206241026769176e-05, "loss": 2.0225, "step": 14372500 }, { "epoch": 41.6, "learning_rate": 2.9205517379121898e-05, "loss": 2.049, "step": 14373000 }, { "epoch": 41.61, "learning_rate": 2.9204793731474624e-05, "loss": 2.0448, "step": 14373500 }, { "epoch": 41.61, "learning_rate": 2.9204070083827346e-05, "loss": 2.0232, "step": 14374000 }, { "epoch": 41.61, "learning_rate": 2.9203346436180068e-05, "loss": 2.0291, "step": 14374500 }, { "epoch": 41.61, "learning_rate": 2.920262278853279e-05, "loss": 2.0343, "step": 14375000 }, { "epoch": 41.61, "learning_rate": 2.9201899140885513e-05, "loss": 2.0466, "step": 14375500 }, { "epoch": 41.61, "learning_rate": 2.9201175493238235e-05, "loss": 2.0384, "step": 14376000 }, { "epoch": 41.61, "learning_rate": 2.920045184559096e-05, "loss": 2.0427, "step": 14376500 }, { "epoch": 41.62, "learning_rate": 2.9199728197943683e-05, "loss": 2.0529, "step": 14377000 }, { "epoch": 41.62, "learning_rate": 2.9199004550296405e-05, "loss": 2.0511, "step": 14377500 }, { "epoch": 41.62, "learning_rate": 2.9198280902649134e-05, "loss": 2.0504, "step": 14378000 }, { "epoch": 41.62, "learning_rate": 2.9197557255001856e-05, "loss": 2.03, "step": 14378500 }, { "epoch": 41.62, "learning_rate": 2.9196835054649875e-05, "loss": 2.0433, "step": 14379000 }, { "epoch": 41.62, "learning_rate": 2.9196111407002597e-05, "loss": 2.0251, "step": 14379500 }, { "epoch": 41.62, "learning_rate": 2.9195389206650613e-05, "loss": 2.0443, "step": 14380000 }, { "epoch": 41.63, "learning_rate": 2.919466555900334e-05, "loss": 2.0494, "step": 14380500 }, { "epoch": 41.63, "learning_rate": 2.9193943358651354e-05, "loss": 2.0707, "step": 14381000 }, { "epoch": 41.63, "learning_rate": 2.9193221158299373e-05, "loss": 2.0576, "step": 14381500 }, { "epoch": 41.63, "learning_rate": 2.919249895794739e-05, "loss": 2.041, "step": 14382000 }, { "epoch": 41.63, "learning_rate": 2.919177531030011e-05, "loss": 2.0208, "step": 14382500 }, { "epoch": 41.63, "learning_rate": 2.919105310994813e-05, "loss": 2.0707, "step": 14383000 }, { "epoch": 41.63, "learning_rate": 2.9190329462300852e-05, "loss": 2.0685, "step": 14383500 }, { "epoch": 41.64, "learning_rate": 2.9189605814653574e-05, "loss": 2.0405, "step": 14384000 }, { "epoch": 41.64, "learning_rate": 2.91888821670063e-05, "loss": 2.0455, "step": 14384500 }, { "epoch": 41.64, "learning_rate": 2.9188158519359022e-05, "loss": 2.0608, "step": 14385000 }, { "epoch": 41.64, "learning_rate": 2.9187434871711744e-05, "loss": 2.0474, "step": 14385500 }, { "epoch": 41.64, "learning_rate": 2.9186711224064473e-05, "loss": 2.0477, "step": 14386000 }, { "epoch": 41.64, "learning_rate": 2.9185987576417195e-05, "loss": 2.0497, "step": 14386500 }, { "epoch": 41.64, "learning_rate": 2.9185263928769918e-05, "loss": 2.0444, "step": 14387000 }, { "epoch": 41.65, "learning_rate": 2.918454028112264e-05, "loss": 2.042, "step": 14387500 }, { "epoch": 41.65, "learning_rate": 2.9183816633475362e-05, "loss": 2.0377, "step": 14388000 }, { "epoch": 41.65, "learning_rate": 2.9183092985828088e-05, "loss": 2.0568, "step": 14388500 }, { "epoch": 41.65, "learning_rate": 2.918236933818081e-05, "loss": 2.0508, "step": 14389000 }, { "epoch": 41.65, "learning_rate": 2.9181645690533532e-05, "loss": 2.0382, "step": 14389500 }, { "epoch": 41.65, "learning_rate": 2.9180922042886255e-05, "loss": 2.0491, "step": 14390000 }, { "epoch": 41.65, "learning_rate": 2.9180198395238977e-05, "loss": 2.0302, "step": 14390500 }, { "epoch": 41.66, "learning_rate": 2.9179476194886996e-05, "loss": 2.0605, "step": 14391000 }, { "epoch": 41.66, "learning_rate": 2.9178752547239718e-05, "loss": 2.0509, "step": 14391500 }, { "epoch": 41.66, "learning_rate": 2.917802889959244e-05, "loss": 2.0567, "step": 14392000 }, { "epoch": 41.66, "learning_rate": 2.9177305251945162e-05, "loss": 2.0385, "step": 14392500 }, { "epoch": 41.66, "learning_rate": 2.9176581604297888e-05, "loss": 2.0492, "step": 14393000 }, { "epoch": 41.66, "learning_rate": 2.9175857956650614e-05, "loss": 2.045, "step": 14393500 }, { "epoch": 41.66, "learning_rate": 2.917513430900334e-05, "loss": 2.0298, "step": 14394000 }, { "epoch": 41.67, "learning_rate": 2.917441066135606e-05, "loss": 2.0332, "step": 14394500 }, { "epoch": 41.67, "learning_rate": 2.9173687013708784e-05, "loss": 2.0468, "step": 14395000 }, { "epoch": 41.67, "learning_rate": 2.9172963366061506e-05, "loss": 2.0502, "step": 14395500 }, { "epoch": 41.67, "learning_rate": 2.9172239718414228e-05, "loss": 2.0475, "step": 14396000 }, { "epoch": 41.67, "learning_rate": 2.9171516070766954e-05, "loss": 2.064, "step": 14396500 }, { "epoch": 41.67, "learning_rate": 2.917079387041497e-05, "loss": 2.037, "step": 14397000 }, { "epoch": 41.67, "learning_rate": 2.917007022276769e-05, "loss": 2.0329, "step": 14397500 }, { "epoch": 41.68, "learning_rate": 2.9169346575120414e-05, "loss": 2.0361, "step": 14398000 }, { "epoch": 41.68, "learning_rate": 2.9168624374768433e-05, "loss": 2.0466, "step": 14398500 }, { "epoch": 41.68, "learning_rate": 2.9167902174416452e-05, "loss": 2.043, "step": 14399000 }, { "epoch": 41.68, "learning_rate": 2.9167178526769174e-05, "loss": 2.0241, "step": 14399500 }, { "epoch": 41.68, "learning_rate": 2.9166454879121896e-05, "loss": 2.046, "step": 14400000 }, { "epoch": 41.68, "learning_rate": 2.916573123147462e-05, "loss": 2.0254, "step": 14400500 }, { "epoch": 41.68, "learning_rate": 2.9165007583827347e-05, "loss": 2.0496, "step": 14401000 }, { "epoch": 41.69, "learning_rate": 2.9164285383475366e-05, "loss": 2.0644, "step": 14401500 }, { "epoch": 41.69, "learning_rate": 2.916356173582809e-05, "loss": 2.0286, "step": 14402000 }, { "epoch": 41.69, "learning_rate": 2.916283808818081e-05, "loss": 2.0649, "step": 14402500 }, { "epoch": 41.69, "learning_rate": 2.9162114440533533e-05, "loss": 2.03, "step": 14403000 }, { "epoch": 41.69, "learning_rate": 2.9161392240181552e-05, "loss": 2.0385, "step": 14403500 }, { "epoch": 41.69, "learning_rate": 2.9160668592534274e-05, "loss": 2.0321, "step": 14404000 }, { "epoch": 41.7, "learning_rate": 2.9159944944886996e-05, "loss": 2.0474, "step": 14404500 }, { "epoch": 41.7, "learning_rate": 2.915922129723972e-05, "loss": 2.0311, "step": 14405000 }, { "epoch": 41.7, "learning_rate": 2.9158499096887738e-05, "loss": 2.0371, "step": 14405500 }, { "epoch": 41.7, "learning_rate": 2.915777544924046e-05, "loss": 2.0624, "step": 14406000 }, { "epoch": 41.7, "learning_rate": 2.9157051801593182e-05, "loss": 2.0633, "step": 14406500 }, { "epoch": 41.7, "learning_rate": 2.9156328153945904e-05, "loss": 2.0479, "step": 14407000 }, { "epoch": 41.7, "learning_rate": 2.9155604506298627e-05, "loss": 2.0549, "step": 14407500 }, { "epoch": 41.71, "learning_rate": 2.9154880858651352e-05, "loss": 2.04, "step": 14408000 }, { "epoch": 41.71, "learning_rate": 2.9154158658299375e-05, "loss": 2.0239, "step": 14408500 }, { "epoch": 41.71, "learning_rate": 2.9153435010652097e-05, "loss": 2.0485, "step": 14409000 }, { "epoch": 41.71, "learning_rate": 2.915271136300482e-05, "loss": 2.0478, "step": 14409500 }, { "epoch": 41.71, "learning_rate": 2.915198771535754e-05, "loss": 2.0402, "step": 14410000 }, { "epoch": 41.71, "learning_rate": 2.9151264067710267e-05, "loss": 2.0445, "step": 14410500 }, { "epoch": 41.71, "learning_rate": 2.915054042006299e-05, "loss": 2.0272, "step": 14411000 }, { "epoch": 41.72, "learning_rate": 2.914981677241571e-05, "loss": 2.0544, "step": 14411500 }, { "epoch": 41.72, "learning_rate": 2.9149093124768434e-05, "loss": 2.0309, "step": 14412000 }, { "epoch": 41.72, "learning_rate": 2.9148369477121156e-05, "loss": 2.0496, "step": 14412500 }, { "epoch": 41.72, "learning_rate": 2.9147645829473878e-05, "loss": 2.0293, "step": 14413000 }, { "epoch": 41.72, "learning_rate": 2.9146922181826604e-05, "loss": 2.0508, "step": 14413500 }, { "epoch": 41.72, "learning_rate": 2.9146198534179326e-05, "loss": 2.0166, "step": 14414000 }, { "epoch": 41.72, "learning_rate": 2.9145474886532048e-05, "loss": 2.031, "step": 14414500 }, { "epoch": 41.73, "learning_rate": 2.914475123888477e-05, "loss": 2.0304, "step": 14415000 }, { "epoch": 41.73, "learning_rate": 2.91440275912375e-05, "loss": 2.0527, "step": 14415500 }, { "epoch": 41.73, "learning_rate": 2.914330683818081e-05, "loss": 2.0389, "step": 14416000 }, { "epoch": 41.73, "learning_rate": 2.9142583190533534e-05, "loss": 2.0494, "step": 14416500 }, { "epoch": 41.73, "learning_rate": 2.9141859542886256e-05, "loss": 2.063, "step": 14417000 }, { "epoch": 41.73, "learning_rate": 2.9141135895238982e-05, "loss": 2.042, "step": 14417500 }, { "epoch": 41.73, "learning_rate": 2.9140412247591704e-05, "loss": 2.0597, "step": 14418000 }, { "epoch": 41.74, "learning_rate": 2.913969004723972e-05, "loss": 2.0675, "step": 14418500 }, { "epoch": 41.74, "learning_rate": 2.9138966399592442e-05, "loss": 2.0672, "step": 14419000 }, { "epoch": 41.74, "learning_rate": 2.913824419924046e-05, "loss": 2.054, "step": 14419500 }, { "epoch": 41.74, "learning_rate": 2.9137520551593183e-05, "loss": 2.0417, "step": 14420000 }, { "epoch": 41.74, "learning_rate": 2.9136796903945905e-05, "loss": 2.0501, "step": 14420500 }, { "epoch": 41.74, "learning_rate": 2.9136074703593924e-05, "loss": 2.0391, "step": 14421000 }, { "epoch": 41.74, "learning_rate": 2.9135351055946646e-05, "loss": 2.0595, "step": 14421500 }, { "epoch": 41.75, "learning_rate": 2.913462740829937e-05, "loss": 2.0199, "step": 14422000 }, { "epoch": 41.75, "learning_rate": 2.9133903760652094e-05, "loss": 2.0432, "step": 14422500 }, { "epoch": 41.75, "learning_rate": 2.9133180113004816e-05, "loss": 2.0248, "step": 14423000 }, { "epoch": 41.75, "learning_rate": 2.9132456465357545e-05, "loss": 2.0428, "step": 14423500 }, { "epoch": 41.75, "learning_rate": 2.9131732817710268e-05, "loss": 2.0607, "step": 14424000 }, { "epoch": 41.75, "learning_rate": 2.913100917006299e-05, "loss": 2.0614, "step": 14424500 }, { "epoch": 41.75, "learning_rate": 2.9130285522415712e-05, "loss": 2.0605, "step": 14425000 }, { "epoch": 41.76, "learning_rate": 2.9129561874768434e-05, "loss": 2.0613, "step": 14425500 }, { "epoch": 41.76, "learning_rate": 2.9128838227121157e-05, "loss": 2.0324, "step": 14426000 }, { "epoch": 41.76, "learning_rate": 2.9128116026769176e-05, "loss": 2.0536, "step": 14426500 }, { "epoch": 41.76, "learning_rate": 2.9127392379121898e-05, "loss": 2.035, "step": 14427000 }, { "epoch": 41.76, "learning_rate": 2.912666873147462e-05, "loss": 2.0611, "step": 14427500 }, { "epoch": 41.76, "learning_rate": 2.9125945083827346e-05, "loss": 2.0334, "step": 14428000 }, { "epoch": 41.76, "learning_rate": 2.9125221436180068e-05, "loss": 2.0496, "step": 14428500 }, { "epoch": 41.77, "learning_rate": 2.912449778853279e-05, "loss": 2.0146, "step": 14429000 }, { "epoch": 41.77, "learning_rate": 2.9123774140885512e-05, "loss": 2.039, "step": 14429500 }, { "epoch": 41.77, "learning_rate": 2.912305194053353e-05, "loss": 2.0408, "step": 14430000 }, { "epoch": 41.77, "learning_rate": 2.9122328292886254e-05, "loss": 2.0621, "step": 14430500 }, { "epoch": 41.77, "learning_rate": 2.9121604645238983e-05, "loss": 2.0409, "step": 14431000 }, { "epoch": 41.77, "learning_rate": 2.9120880997591705e-05, "loss": 2.0371, "step": 14431500 }, { "epoch": 41.77, "learning_rate": 2.9120157349944427e-05, "loss": 2.0556, "step": 14432000 }, { "epoch": 41.78, "learning_rate": 2.911943370229715e-05, "loss": 2.044, "step": 14432500 }, { "epoch": 41.78, "learning_rate": 2.911871005464987e-05, "loss": 2.0509, "step": 14433000 }, { "epoch": 41.78, "learning_rate": 2.9117986407002597e-05, "loss": 2.0443, "step": 14433500 }, { "epoch": 41.78, "learning_rate": 2.911726275935532e-05, "loss": 2.0389, "step": 14434000 }, { "epoch": 41.78, "learning_rate": 2.911653911170804e-05, "loss": 2.0561, "step": 14434500 }, { "epoch": 41.78, "learning_rate": 2.9115815464060764e-05, "loss": 2.045, "step": 14435000 }, { "epoch": 41.78, "learning_rate": 2.9115091816413486e-05, "loss": 2.0532, "step": 14435500 }, { "epoch": 41.79, "learning_rate": 2.9114368168766208e-05, "loss": 2.0249, "step": 14436000 }, { "epoch": 41.79, "learning_rate": 2.9113644521118934e-05, "loss": 2.0307, "step": 14436500 }, { "epoch": 41.79, "learning_rate": 2.911292232076695e-05, "loss": 2.057, "step": 14437000 }, { "epoch": 41.79, "learning_rate": 2.911220012041497e-05, "loss": 2.0323, "step": 14437500 }, { "epoch": 41.79, "learning_rate": 2.9111477920062984e-05, "loss": 2.047, "step": 14438000 }, { "epoch": 41.79, "learning_rate": 2.9110754272415713e-05, "loss": 2.0587, "step": 14438500 }, { "epoch": 41.79, "learning_rate": 2.9110030624768435e-05, "loss": 2.0428, "step": 14439000 }, { "epoch": 41.8, "learning_rate": 2.9109306977121157e-05, "loss": 2.044, "step": 14439500 }, { "epoch": 41.8, "learning_rate": 2.9108583329473883e-05, "loss": 2.0539, "step": 14440000 }, { "epoch": 41.8, "learning_rate": 2.9107859681826605e-05, "loss": 2.0343, "step": 14440500 }, { "epoch": 41.8, "learning_rate": 2.9107136034179327e-05, "loss": 2.0326, "step": 14441000 }, { "epoch": 41.8, "learning_rate": 2.910641238653205e-05, "loss": 2.0231, "step": 14441500 }, { "epoch": 41.8, "learning_rate": 2.9105688738884772e-05, "loss": 2.049, "step": 14442000 }, { "epoch": 41.81, "learning_rate": 2.9104965091237498e-05, "loss": 2.0557, "step": 14442500 }, { "epoch": 41.81, "learning_rate": 2.910424144359022e-05, "loss": 2.0543, "step": 14443000 }, { "epoch": 41.81, "learning_rate": 2.9103517795942942e-05, "loss": 2.0575, "step": 14443500 }, { "epoch": 41.81, "learning_rate": 2.9102794148295664e-05, "loss": 1.9995, "step": 14444000 }, { "epoch": 41.81, "learning_rate": 2.9102070500648387e-05, "loss": 2.0222, "step": 14444500 }, { "epoch": 41.81, "learning_rate": 2.910134685300111e-05, "loss": 2.0571, "step": 14445000 }, { "epoch": 41.81, "learning_rate": 2.9100624652649134e-05, "loss": 2.0443, "step": 14445500 }, { "epoch": 41.82, "learning_rate": 2.9099901005001857e-05, "loss": 2.0475, "step": 14446000 }, { "epoch": 41.82, "learning_rate": 2.909917735735458e-05, "loss": 2.054, "step": 14446500 }, { "epoch": 41.82, "learning_rate": 2.90984537097073e-05, "loss": 2.0255, "step": 14447000 }, { "epoch": 41.82, "learning_rate": 2.9097732956650613e-05, "loss": 2.0404, "step": 14447500 }, { "epoch": 41.82, "learning_rate": 2.9097009309003336e-05, "loss": 2.0728, "step": 14448000 }, { "epoch": 41.82, "learning_rate": 2.909628566135606e-05, "loss": 2.0632, "step": 14448500 }, { "epoch": 41.82, "learning_rate": 2.9095562013708784e-05, "loss": 2.0364, "step": 14449000 }, { "epoch": 41.83, "learning_rate": 2.9094838366061506e-05, "loss": 2.047, "step": 14449500 }, { "epoch": 41.83, "learning_rate": 2.9094114718414228e-05, "loss": 2.0376, "step": 14450000 }, { "epoch": 41.83, "learning_rate": 2.909339107076695e-05, "loss": 2.0364, "step": 14450500 }, { "epoch": 41.83, "learning_rate": 2.9092667423119672e-05, "loss": 2.0375, "step": 14451000 }, { "epoch": 41.83, "learning_rate": 2.9091943775472398e-05, "loss": 2.0328, "step": 14451500 }, { "epoch": 41.83, "learning_rate": 2.909122012782512e-05, "loss": 2.0566, "step": 14452000 }, { "epoch": 41.83, "learning_rate": 2.9090496480177843e-05, "loss": 2.0477, "step": 14452500 }, { "epoch": 41.84, "learning_rate": 2.908977283253057e-05, "loss": 2.0344, "step": 14453000 }, { "epoch": 41.84, "learning_rate": 2.9089050632178587e-05, "loss": 2.0672, "step": 14453500 }, { "epoch": 41.84, "learning_rate": 2.9088326984531313e-05, "loss": 2.0296, "step": 14454000 }, { "epoch": 41.84, "learning_rate": 2.9087604784179328e-05, "loss": 2.055, "step": 14454500 }, { "epoch": 41.84, "learning_rate": 2.908688113653205e-05, "loss": 2.0451, "step": 14455000 }, { "epoch": 41.84, "learning_rate": 2.9086157488884773e-05, "loss": 2.0475, "step": 14455500 }, { "epoch": 41.84, "learning_rate": 2.908543528853279e-05, "loss": 2.0197, "step": 14456000 }, { "epoch": 41.85, "learning_rate": 2.9084711640885514e-05, "loss": 2.041, "step": 14456500 }, { "epoch": 41.85, "learning_rate": 2.9083987993238236e-05, "loss": 2.0467, "step": 14457000 }, { "epoch": 41.85, "learning_rate": 2.9083264345590962e-05, "loss": 2.0368, "step": 14457500 }, { "epoch": 41.85, "learning_rate": 2.9082540697943684e-05, "loss": 2.0287, "step": 14458000 }, { "epoch": 41.85, "learning_rate": 2.9081817050296406e-05, "loss": 2.051, "step": 14458500 }, { "epoch": 41.85, "learning_rate": 2.908109340264913e-05, "loss": 2.0712, "step": 14459000 }, { "epoch": 41.85, "learning_rate": 2.908036975500185e-05, "loss": 2.036, "step": 14459500 }, { "epoch": 41.86, "learning_rate": 2.9079646107354573e-05, "loss": 2.0571, "step": 14460000 }, { "epoch": 41.86, "learning_rate": 2.9078922459707302e-05, "loss": 2.0648, "step": 14460500 }, { "epoch": 41.86, "learning_rate": 2.907820025935532e-05, "loss": 2.0495, "step": 14461000 }, { "epoch": 41.86, "learning_rate": 2.9077476611708043e-05, "loss": 2.0468, "step": 14461500 }, { "epoch": 41.86, "learning_rate": 2.9076752964060765e-05, "loss": 2.0356, "step": 14462000 }, { "epoch": 41.86, "learning_rate": 2.9076029316413488e-05, "loss": 2.048, "step": 14462500 }, { "epoch": 41.86, "learning_rate": 2.9075305668766213e-05, "loss": 2.0691, "step": 14463000 }, { "epoch": 41.87, "learning_rate": 2.9074582021118935e-05, "loss": 2.0339, "step": 14463500 }, { "epoch": 41.87, "learning_rate": 2.9073858373471658e-05, "loss": 2.0461, "step": 14464000 }, { "epoch": 41.87, "learning_rate": 2.9073136173119677e-05, "loss": 2.0147, "step": 14464500 }, { "epoch": 41.87, "learning_rate": 2.90724125254724e-05, "loss": 2.0492, "step": 14465000 }, { "epoch": 41.87, "learning_rate": 2.907168887782512e-05, "loss": 2.0388, "step": 14465500 }, { "epoch": 41.87, "learning_rate": 2.9070965230177843e-05, "loss": 2.0586, "step": 14466000 }, { "epoch": 41.87, "learning_rate": 2.9070241582530566e-05, "loss": 2.0627, "step": 14466500 }, { "epoch": 41.88, "learning_rate": 2.9069517934883288e-05, "loss": 2.0375, "step": 14467000 }, { "epoch": 41.88, "learning_rate": 2.9068794287236017e-05, "loss": 2.0392, "step": 14467500 }, { "epoch": 41.88, "learning_rate": 2.906807063958874e-05, "loss": 2.0473, "step": 14468000 }, { "epoch": 41.88, "learning_rate": 2.9067346991941465e-05, "loss": 2.0469, "step": 14468500 }, { "epoch": 41.88, "learning_rate": 2.906662479158948e-05, "loss": 2.0266, "step": 14469000 }, { "epoch": 41.88, "learning_rate": 2.90659025912375e-05, "loss": 2.0563, "step": 14469500 }, { "epoch": 41.88, "learning_rate": 2.906517894359022e-05, "loss": 2.0259, "step": 14470000 }, { "epoch": 41.89, "learning_rate": 2.9064455295942944e-05, "loss": 2.069, "step": 14470500 }, { "epoch": 41.89, "learning_rate": 2.9063731648295666e-05, "loss": 2.0168, "step": 14471000 }, { "epoch": 41.89, "learning_rate": 2.9063008000648388e-05, "loss": 2.0383, "step": 14471500 }, { "epoch": 41.89, "learning_rate": 2.9062284353001114e-05, "loss": 2.0454, "step": 14472000 }, { "epoch": 41.89, "learning_rate": 2.9061560705353836e-05, "loss": 2.0553, "step": 14472500 }, { "epoch": 41.89, "learning_rate": 2.9060839952297148e-05, "loss": 2.0451, "step": 14473000 }, { "epoch": 41.89, "learning_rate": 2.906011630464987e-05, "loss": 2.0569, "step": 14473500 }, { "epoch": 41.9, "learning_rate": 2.9059392657002593e-05, "loss": 2.0374, "step": 14474000 }, { "epoch": 41.9, "learning_rate": 2.9058669009355315e-05, "loss": 2.0778, "step": 14474500 }, { "epoch": 41.9, "learning_rate": 2.9057945361708037e-05, "loss": 2.0174, "step": 14475000 }, { "epoch": 41.9, "learning_rate": 2.9057221714060766e-05, "loss": 2.0471, "step": 14475500 }, { "epoch": 41.9, "learning_rate": 2.905649806641349e-05, "loss": 2.0417, "step": 14476000 }, { "epoch": 41.9, "learning_rate": 2.9055774418766214e-05, "loss": 2.0438, "step": 14476500 }, { "epoch": 41.9, "learning_rate": 2.9055050771118936e-05, "loss": 2.0261, "step": 14477000 }, { "epoch": 41.91, "learning_rate": 2.905432712347166e-05, "loss": 2.0288, "step": 14477500 }, { "epoch": 41.91, "learning_rate": 2.905360347582438e-05, "loss": 2.0476, "step": 14478000 }, { "epoch": 41.91, "learning_rate": 2.9052879828177103e-05, "loss": 2.052, "step": 14478500 }, { "epoch": 41.91, "learning_rate": 2.905215618052983e-05, "loss": 2.0482, "step": 14479000 }, { "epoch": 41.91, "learning_rate": 2.905143253288255e-05, "loss": 2.0501, "step": 14479500 }, { "epoch": 41.91, "learning_rate": 2.9050708885235273e-05, "loss": 2.0558, "step": 14480000 }, { "epoch": 41.92, "learning_rate": 2.9049985237587995e-05, "loss": 2.0617, "step": 14480500 }, { "epoch": 41.92, "learning_rate": 2.9049261589940718e-05, "loss": 2.0456, "step": 14481000 }, { "epoch": 41.92, "learning_rate": 2.904853794229344e-05, "loss": 2.0503, "step": 14481500 }, { "epoch": 41.92, "learning_rate": 2.9047814294646165e-05, "loss": 2.0383, "step": 14482000 }, { "epoch": 41.92, "learning_rate": 2.904709064699889e-05, "loss": 2.0524, "step": 14482500 }, { "epoch": 41.92, "learning_rate": 2.9046369893942203e-05, "loss": 2.0432, "step": 14483000 }, { "epoch": 41.92, "learning_rate": 2.9045647693590222e-05, "loss": 2.043, "step": 14483500 }, { "epoch": 41.93, "learning_rate": 2.9044924045942944e-05, "loss": 2.07, "step": 14484000 }, { "epoch": 41.93, "learning_rate": 2.9044200398295667e-05, "loss": 2.0551, "step": 14484500 }, { "epoch": 41.93, "learning_rate": 2.9043476750648392e-05, "loss": 2.0634, "step": 14485000 }, { "epoch": 41.93, "learning_rate": 2.9042753103001115e-05, "loss": 2.0289, "step": 14485500 }, { "epoch": 41.93, "learning_rate": 2.904203090264913e-05, "loss": 2.0245, "step": 14486000 }, { "epoch": 41.93, "learning_rate": 2.9041307255001852e-05, "loss": 2.0494, "step": 14486500 }, { "epoch": 41.93, "learning_rate": 2.9040583607354578e-05, "loss": 2.0464, "step": 14487000 }, { "epoch": 41.94, "learning_rate": 2.90398599597073e-05, "loss": 2.0466, "step": 14487500 }, { "epoch": 41.94, "learning_rate": 2.9039136312060022e-05, "loss": 2.0492, "step": 14488000 }, { "epoch": 41.94, "learning_rate": 2.903841411170804e-05, "loss": 2.0447, "step": 14488500 }, { "epoch": 41.94, "learning_rate": 2.9037690464060764e-05, "loss": 2.0701, "step": 14489000 }, { "epoch": 41.94, "learning_rate": 2.9036966816413486e-05, "loss": 2.0494, "step": 14489500 }, { "epoch": 41.94, "learning_rate": 2.9036243168766208e-05, "loss": 2.0491, "step": 14490000 }, { "epoch": 41.94, "learning_rate": 2.9035519521118937e-05, "loss": 2.0516, "step": 14490500 }, { "epoch": 41.95, "learning_rate": 2.903479587347166e-05, "loss": 2.0454, "step": 14491000 }, { "epoch": 41.95, "learning_rate": 2.903407222582438e-05, "loss": 2.0609, "step": 14491500 }, { "epoch": 41.95, "learning_rate": 2.90333500254724e-05, "loss": 2.047, "step": 14492000 }, { "epoch": 41.95, "learning_rate": 2.9032626377825123e-05, "loss": 2.0528, "step": 14492500 }, { "epoch": 41.95, "learning_rate": 2.9031902730177845e-05, "loss": 2.0383, "step": 14493000 }, { "epoch": 41.95, "learning_rate": 2.9031180529825864e-05, "loss": 2.0586, "step": 14493500 }, { "epoch": 41.95, "learning_rate": 2.9030456882178586e-05, "loss": 2.0486, "step": 14494000 }, { "epoch": 41.96, "learning_rate": 2.902973323453131e-05, "loss": 2.043, "step": 14494500 }, { "epoch": 41.96, "learning_rate": 2.902900958688403e-05, "loss": 2.0269, "step": 14495000 }, { "epoch": 41.96, "learning_rate": 2.902828738653205e-05, "loss": 2.0268, "step": 14495500 }, { "epoch": 41.96, "learning_rate": 2.9027563738884772e-05, "loss": 2.0649, "step": 14496000 }, { "epoch": 41.96, "learning_rate": 2.9026840091237494e-05, "loss": 2.0446, "step": 14496500 }, { "epoch": 41.96, "learning_rate": 2.9026116443590216e-05, "loss": 2.0596, "step": 14497000 }, { "epoch": 41.96, "learning_rate": 2.9025392795942942e-05, "loss": 2.02, "step": 14497500 }, { "epoch": 41.97, "learning_rate": 2.9024669148295667e-05, "loss": 2.0333, "step": 14498000 }, { "epoch": 41.97, "learning_rate": 2.9023945500648393e-05, "loss": 2.0338, "step": 14498500 }, { "epoch": 41.97, "learning_rate": 2.9023221853001115e-05, "loss": 2.0514, "step": 14499000 }, { "epoch": 41.97, "learning_rate": 2.9022498205353838e-05, "loss": 2.0496, "step": 14499500 }, { "epoch": 41.97, "learning_rate": 2.902177455770656e-05, "loss": 2.046, "step": 14500000 }, { "epoch": 41.97, "learning_rate": 2.9021050910059282e-05, "loss": 2.0668, "step": 14500500 }, { "epoch": 41.97, "learning_rate": 2.9020327262412008e-05, "loss": 2.0152, "step": 14501000 }, { "epoch": 41.98, "learning_rate": 2.901960361476473e-05, "loss": 2.0179, "step": 14501500 }, { "epoch": 41.98, "learning_rate": 2.9018881414412745e-05, "loss": 2.0482, "step": 14502000 }, { "epoch": 41.98, "learning_rate": 2.9018157766765468e-05, "loss": 2.0322, "step": 14502500 }, { "epoch": 41.98, "learning_rate": 2.9017434119118193e-05, "loss": 2.05, "step": 14503000 }, { "epoch": 41.98, "learning_rate": 2.901671191876621e-05, "loss": 2.0248, "step": 14503500 }, { "epoch": 41.98, "learning_rate": 2.901598827111893e-05, "loss": 2.0466, "step": 14504000 }, { "epoch": 41.98, "learning_rate": 2.9015264623471657e-05, "loss": 2.0162, "step": 14504500 }, { "epoch": 41.99, "learning_rate": 2.901454097582438e-05, "loss": 2.0288, "step": 14505000 }, { "epoch": 41.99, "learning_rate": 2.9013817328177108e-05, "loss": 2.0321, "step": 14505500 }, { "epoch": 41.99, "learning_rate": 2.901309368052983e-05, "loss": 2.0285, "step": 14506000 }, { "epoch": 41.99, "learning_rate": 2.9012370032882552e-05, "loss": 2.0527, "step": 14506500 }, { "epoch": 41.99, "learning_rate": 2.9011646385235275e-05, "loss": 2.0253, "step": 14507000 }, { "epoch": 41.99, "learning_rate": 2.9010922737587997e-05, "loss": 2.0485, "step": 14507500 }, { "epoch": 41.99, "learning_rate": 2.901019908994072e-05, "loss": 2.0659, "step": 14508000 }, { "epoch": 42.0, "learning_rate": 2.9009475442293445e-05, "loss": 2.0598, "step": 14508500 }, { "epoch": 42.0, "learning_rate": 2.9008751794646167e-05, "loss": 2.0648, "step": 14509000 }, { "epoch": 42.0, "learning_rate": 2.900802814699889e-05, "loss": 2.0386, "step": 14509500 }, { "epoch": 42.0, "eval_accuracy": 0.67138533193208, "eval_accuracy_mlm": 0.6369597842213577, "eval_accuracy_nsp": 0.8559967551662977, "eval_loss": 2.166109323501587, "eval_runtime": 331.1807, "eval_samples_per_second": 1317.667, "eval_steps_per_second": 54.904, "step": 14509824 }, { "epoch": 42.0, "learning_rate": 2.90073073939422e-05, "loss": 2.0616, "step": 14510000 }, { "epoch": 42.0, "learning_rate": 2.9006583746294924e-05, "loss": 2.0531, "step": 14510500 }, { "epoch": 42.0, "learning_rate": 2.9005862993238236e-05, "loss": 2.0052, "step": 14511000 }, { "epoch": 42.0, "learning_rate": 2.9005139345590958e-05, "loss": 2.0104, "step": 14511500 }, { "epoch": 42.01, "learning_rate": 2.900441569794368e-05, "loss": 2.0179, "step": 14512000 }, { "epoch": 42.01, "learning_rate": 2.9003692050296406e-05, "loss": 2.0305, "step": 14512500 }, { "epoch": 42.01, "learning_rate": 2.900296840264913e-05, "loss": 2.0107, "step": 14513000 }, { "epoch": 42.01, "learning_rate": 2.9002244755001857e-05, "loss": 2.0175, "step": 14513500 }, { "epoch": 42.01, "learning_rate": 2.900152110735458e-05, "loss": 2.0106, "step": 14514000 }, { "epoch": 42.01, "learning_rate": 2.9000798907002595e-05, "loss": 2.0327, "step": 14514500 }, { "epoch": 42.01, "learning_rate": 2.900007525935532e-05, "loss": 2.0215, "step": 14515000 }, { "epoch": 42.02, "learning_rate": 2.8999351611708043e-05, "loss": 2.0025, "step": 14515500 }, { "epoch": 42.02, "learning_rate": 2.8998627964060765e-05, "loss": 2.0331, "step": 14516000 }, { "epoch": 42.02, "learning_rate": 2.8997904316413487e-05, "loss": 2.0377, "step": 14516500 }, { "epoch": 42.02, "learning_rate": 2.899718066876621e-05, "loss": 2.016, "step": 14517000 }, { "epoch": 42.02, "learning_rate": 2.8996457021118932e-05, "loss": 2.031, "step": 14517500 }, { "epoch": 42.02, "learning_rate": 2.8995733373471657e-05, "loss": 2.0532, "step": 14518000 }, { "epoch": 42.03, "learning_rate": 2.899500972582438e-05, "loss": 2.0345, "step": 14518500 }, { "epoch": 42.03, "learning_rate": 2.8994286078177102e-05, "loss": 2.0238, "step": 14519000 }, { "epoch": 42.03, "learning_rate": 2.899356387782512e-05, "loss": 2.0127, "step": 14519500 }, { "epoch": 42.03, "learning_rate": 2.8992840230177843e-05, "loss": 2.0367, "step": 14520000 }, { "epoch": 42.03, "learning_rate": 2.8992116582530572e-05, "loss": 2.0445, "step": 14520500 }, { "epoch": 42.03, "learning_rate": 2.8991392934883294e-05, "loss": 2.0297, "step": 14521000 }, { "epoch": 42.03, "learning_rate": 2.8990669287236017e-05, "loss": 2.0226, "step": 14521500 }, { "epoch": 42.04, "learning_rate": 2.898994563958874e-05, "loss": 2.0137, "step": 14522000 }, { "epoch": 42.04, "learning_rate": 2.898922199194146e-05, "loss": 2.0168, "step": 14522500 }, { "epoch": 42.04, "learning_rate": 2.898849979158948e-05, "loss": 2.0072, "step": 14523000 }, { "epoch": 42.04, "learning_rate": 2.8987776143942202e-05, "loss": 2.0148, "step": 14523500 }, { "epoch": 42.04, "learning_rate": 2.8987052496294924e-05, "loss": 2.0187, "step": 14524000 }, { "epoch": 42.04, "learning_rate": 2.8986328848647647e-05, "loss": 2.029, "step": 14524500 }, { "epoch": 42.04, "learning_rate": 2.8985605201000372e-05, "loss": 2.0371, "step": 14525000 }, { "epoch": 42.05, "learning_rate": 2.8984881553353095e-05, "loss": 2.0135, "step": 14525500 }, { "epoch": 42.05, "learning_rate": 2.898415935300111e-05, "loss": 2.0311, "step": 14526000 }, { "epoch": 42.05, "learning_rate": 2.8983435705353836e-05, "loss": 2.0066, "step": 14526500 }, { "epoch": 42.05, "learning_rate": 2.8982712057706558e-05, "loss": 2.0221, "step": 14527000 }, { "epoch": 42.05, "learning_rate": 2.8981988410059287e-05, "loss": 2.0123, "step": 14527500 }, { "epoch": 42.05, "learning_rate": 2.898126476241201e-05, "loss": 2.0428, "step": 14528000 }, { "epoch": 42.05, "learning_rate": 2.898054111476473e-05, "loss": 2.0391, "step": 14528500 }, { "epoch": 42.06, "learning_rate": 2.8979817467117454e-05, "loss": 2.0325, "step": 14529000 }, { "epoch": 42.06, "learning_rate": 2.8979093819470176e-05, "loss": 2.054, "step": 14529500 }, { "epoch": 42.06, "learning_rate": 2.8978370171822898e-05, "loss": 2.0372, "step": 14530000 }, { "epoch": 42.06, "learning_rate": 2.8977646524175624e-05, "loss": 2.0119, "step": 14530500 }, { "epoch": 42.06, "learning_rate": 2.8976922876528346e-05, "loss": 2.0362, "step": 14531000 }, { "epoch": 42.06, "learning_rate": 2.8976199228881068e-05, "loss": 2.0255, "step": 14531500 }, { "epoch": 42.06, "learning_rate": 2.8975477028529087e-05, "loss": 1.9955, "step": 14532000 }, { "epoch": 42.07, "learning_rate": 2.897475338088181e-05, "loss": 2.0328, "step": 14532500 }, { "epoch": 42.07, "learning_rate": 2.897402973323453e-05, "loss": 2.0371, "step": 14533000 }, { "epoch": 42.07, "learning_rate": 2.8973306085587254e-05, "loss": 2.0096, "step": 14533500 }, { "epoch": 42.07, "learning_rate": 2.8972582437939976e-05, "loss": 2.0362, "step": 14534000 }, { "epoch": 42.07, "learning_rate": 2.89718587902927e-05, "loss": 2.0386, "step": 14534500 }, { "epoch": 42.07, "learning_rate": 2.8971136589940724e-05, "loss": 2.0148, "step": 14535000 }, { "epoch": 42.07, "learning_rate": 2.897041438958874e-05, "loss": 2.031, "step": 14535500 }, { "epoch": 42.08, "learning_rate": 2.8969690741941462e-05, "loss": 2.0479, "step": 14536000 }, { "epoch": 42.08, "learning_rate": 2.8968967094294187e-05, "loss": 2.0273, "step": 14536500 }, { "epoch": 42.08, "learning_rate": 2.896824344664691e-05, "loss": 2.0247, "step": 14537000 }, { "epoch": 42.08, "learning_rate": 2.8967519798999632e-05, "loss": 2.0239, "step": 14537500 }, { "epoch": 42.08, "learning_rate": 2.8966797598647648e-05, "loss": 2.0116, "step": 14538000 }, { "epoch": 42.08, "learning_rate": 2.8966073951000373e-05, "loss": 2.02, "step": 14538500 }, { "epoch": 42.08, "learning_rate": 2.8965350303353095e-05, "loss": 2.0286, "step": 14539000 }, { "epoch": 42.09, "learning_rate": 2.8964626655705818e-05, "loss": 2.0144, "step": 14539500 }, { "epoch": 42.09, "learning_rate": 2.896390300805854e-05, "loss": 2.0283, "step": 14540000 }, { "epoch": 42.09, "learning_rate": 2.8963179360411262e-05, "loss": 2.044, "step": 14540500 }, { "epoch": 42.09, "learning_rate": 2.8962455712763988e-05, "loss": 2.0191, "step": 14541000 }, { "epoch": 42.09, "learning_rate": 2.896173206511671e-05, "loss": 2.019, "step": 14541500 }, { "epoch": 42.09, "learning_rate": 2.8961009864764725e-05, "loss": 2.0043, "step": 14542000 }, { "epoch": 42.09, "learning_rate": 2.8960286217117454e-05, "loss": 2.016, "step": 14542500 }, { "epoch": 42.1, "learning_rate": 2.8959564016765473e-05, "loss": 2.0118, "step": 14543000 }, { "epoch": 42.1, "learning_rate": 2.8958840369118196e-05, "loss": 2.0224, "step": 14543500 }, { "epoch": 42.1, "learning_rate": 2.8958116721470918e-05, "loss": 2.0456, "step": 14544000 }, { "epoch": 42.1, "learning_rate": 2.895739307382364e-05, "loss": 2.0371, "step": 14544500 }, { "epoch": 42.1, "learning_rate": 2.8956669426176362e-05, "loss": 1.9996, "step": 14545000 }, { "epoch": 42.1, "learning_rate": 2.895594722582438e-05, "loss": 2.0171, "step": 14545500 }, { "epoch": 42.1, "learning_rate": 2.8955223578177104e-05, "loss": 2.0375, "step": 14546000 }, { "epoch": 42.11, "learning_rate": 2.8954499930529826e-05, "loss": 2.0182, "step": 14546500 }, { "epoch": 42.11, "learning_rate": 2.8953777730177845e-05, "loss": 2.0285, "step": 14547000 }, { "epoch": 42.11, "learning_rate": 2.8953054082530567e-05, "loss": 2.0496, "step": 14547500 }, { "epoch": 42.11, "learning_rate": 2.895233043488329e-05, "loss": 2.0249, "step": 14548000 }, { "epoch": 42.11, "learning_rate": 2.895160678723601e-05, "loss": 2.0212, "step": 14548500 }, { "epoch": 42.11, "learning_rate": 2.8950883139588737e-05, "loss": 2.055, "step": 14549000 }, { "epoch": 42.11, "learning_rate": 2.8950160939236753e-05, "loss": 2.0435, "step": 14549500 }, { "epoch": 42.12, "learning_rate": 2.8949437291589475e-05, "loss": 2.0248, "step": 14550000 }, { "epoch": 42.12, "learning_rate": 2.8948713643942204e-05, "loss": 2.0364, "step": 14550500 }, { "epoch": 42.12, "learning_rate": 2.8947989996294926e-05, "loss": 2.0316, "step": 14551000 }, { "epoch": 42.12, "learning_rate": 2.894726634864765e-05, "loss": 2.0169, "step": 14551500 }, { "epoch": 42.12, "learning_rate": 2.8946542701000374e-05, "loss": 2.0268, "step": 14552000 }, { "epoch": 42.12, "learning_rate": 2.8945819053353096e-05, "loss": 2.0256, "step": 14552500 }, { "epoch": 42.12, "learning_rate": 2.894509540570582e-05, "loss": 2.0215, "step": 14553000 }, { "epoch": 42.13, "learning_rate": 2.894437175805854e-05, "loss": 2.0051, "step": 14553500 }, { "epoch": 42.13, "learning_rate": 2.8943648110411263e-05, "loss": 2.0326, "step": 14554000 }, { "epoch": 42.13, "learning_rate": 2.894292446276399e-05, "loss": 2.0293, "step": 14554500 }, { "epoch": 42.13, "learning_rate": 2.894220081511671e-05, "loss": 2.0201, "step": 14555000 }, { "epoch": 42.13, "learning_rate": 2.8941477167469433e-05, "loss": 2.0175, "step": 14555500 }, { "epoch": 42.13, "learning_rate": 2.8940754967117452e-05, "loss": 2.0304, "step": 14556000 }, { "epoch": 42.14, "learning_rate": 2.8940031319470174e-05, "loss": 2.0125, "step": 14556500 }, { "epoch": 42.14, "learning_rate": 2.8939307671822896e-05, "loss": 2.0398, "step": 14557000 }, { "epoch": 42.14, "learning_rate": 2.8938584024175625e-05, "loss": 2.0403, "step": 14557500 }, { "epoch": 42.14, "learning_rate": 2.8937860376528348e-05, "loss": 2.032, "step": 14558000 }, { "epoch": 42.14, "learning_rate": 2.8937138176176367e-05, "loss": 2.0372, "step": 14558500 }, { "epoch": 42.14, "learning_rate": 2.893641452852909e-05, "loss": 2.0376, "step": 14559000 }, { "epoch": 42.14, "learning_rate": 2.893569088088181e-05, "loss": 2.05, "step": 14559500 }, { "epoch": 42.15, "learning_rate": 2.8934968680529827e-05, "loss": 2.0376, "step": 14560000 }, { "epoch": 42.15, "learning_rate": 2.8934245032882552e-05, "loss": 2.0204, "step": 14560500 }, { "epoch": 42.15, "learning_rate": 2.8933521385235274e-05, "loss": 2.0459, "step": 14561000 }, { "epoch": 42.15, "learning_rate": 2.893279918488329e-05, "loss": 2.0417, "step": 14561500 }, { "epoch": 42.15, "learning_rate": 2.8932075537236016e-05, "loss": 2.0172, "step": 14562000 }, { "epoch": 42.15, "learning_rate": 2.8931351889588738e-05, "loss": 2.0186, "step": 14562500 }, { "epoch": 42.15, "learning_rate": 2.893062824194146e-05, "loss": 2.0129, "step": 14563000 }, { "epoch": 42.16, "learning_rate": 2.8929904594294182e-05, "loss": 2.0572, "step": 14563500 }, { "epoch": 42.16, "learning_rate": 2.8929180946646905e-05, "loss": 2.0532, "step": 14564000 }, { "epoch": 42.16, "learning_rate": 2.8928457298999627e-05, "loss": 2.0247, "step": 14564500 }, { "epoch": 42.16, "learning_rate": 2.8927733651352356e-05, "loss": 2.0218, "step": 14565000 }, { "epoch": 42.16, "learning_rate": 2.8927010003705078e-05, "loss": 2.0175, "step": 14565500 }, { "epoch": 42.16, "learning_rate": 2.8926286356057804e-05, "loss": 2.0425, "step": 14566000 }, { "epoch": 42.16, "learning_rate": 2.8925562708410526e-05, "loss": 2.0121, "step": 14566500 }, { "epoch": 42.17, "learning_rate": 2.8924839060763248e-05, "loss": 2.0439, "step": 14567000 }, { "epoch": 42.17, "learning_rate": 2.892411541311597e-05, "loss": 2.0145, "step": 14567500 }, { "epoch": 42.17, "learning_rate": 2.8923391765468693e-05, "loss": 2.0306, "step": 14568000 }, { "epoch": 42.17, "learning_rate": 2.8922668117821418e-05, "loss": 2.0074, "step": 14568500 }, { "epoch": 42.17, "learning_rate": 2.8921945917469434e-05, "loss": 2.0232, "step": 14569000 }, { "epoch": 42.17, "learning_rate": 2.8921222269822156e-05, "loss": 2.0061, "step": 14569500 }, { "epoch": 42.17, "learning_rate": 2.8920498622174878e-05, "loss": 2.0149, "step": 14570000 }, { "epoch": 42.18, "learning_rate": 2.8919776421822897e-05, "loss": 2.0109, "step": 14570500 }, { "epoch": 42.18, "learning_rate": 2.891905277417562e-05, "loss": 2.0483, "step": 14571000 }, { "epoch": 42.18, "learning_rate": 2.891832912652834e-05, "loss": 2.0157, "step": 14571500 }, { "epoch": 42.18, "learning_rate": 2.8917605478881067e-05, "loss": 2.0475, "step": 14572000 }, { "epoch": 42.18, "learning_rate": 2.8916881831233793e-05, "loss": 2.0168, "step": 14572500 }, { "epoch": 42.18, "learning_rate": 2.891615818358652e-05, "loss": 2.0433, "step": 14573000 }, { "epoch": 42.18, "learning_rate": 2.891543453593924e-05, "loss": 2.0381, "step": 14573500 }, { "epoch": 42.19, "learning_rate": 2.8914710888291963e-05, "loss": 2.0413, "step": 14574000 }, { "epoch": 42.19, "learning_rate": 2.8913987240644685e-05, "loss": 2.0472, "step": 14574500 }, { "epoch": 42.19, "learning_rate": 2.8913265040292704e-05, "loss": 2.0397, "step": 14575000 }, { "epoch": 42.19, "learning_rate": 2.8912541392645426e-05, "loss": 2.0539, "step": 14575500 }, { "epoch": 42.19, "learning_rate": 2.8911819192293442e-05, "loss": 2.0282, "step": 14576000 }, { "epoch": 42.19, "learning_rate": 2.891109699194146e-05, "loss": 2.0371, "step": 14576500 }, { "epoch": 42.19, "learning_rate": 2.8910373344294183e-05, "loss": 2.0107, "step": 14577000 }, { "epoch": 42.2, "learning_rate": 2.8909649696646905e-05, "loss": 2.0172, "step": 14577500 }, { "epoch": 42.2, "learning_rate": 2.890892604899963e-05, "loss": 2.0064, "step": 14578000 }, { "epoch": 42.2, "learning_rate": 2.8908202401352353e-05, "loss": 2.0421, "step": 14578500 }, { "epoch": 42.2, "learning_rate": 2.8907478753705075e-05, "loss": 2.0333, "step": 14579000 }, { "epoch": 42.2, "learning_rate": 2.8906755106057798e-05, "loss": 2.0206, "step": 14579500 }, { "epoch": 42.2, "learning_rate": 2.8906031458410527e-05, "loss": 2.0132, "step": 14580000 }, { "epoch": 42.2, "learning_rate": 2.890530781076325e-05, "loss": 2.0151, "step": 14580500 }, { "epoch": 42.21, "learning_rate": 2.890458416311597e-05, "loss": 2.0154, "step": 14581000 }, { "epoch": 42.21, "learning_rate": 2.8903860515468693e-05, "loss": 2.0375, "step": 14581500 }, { "epoch": 42.21, "learning_rate": 2.890313686782142e-05, "loss": 2.0176, "step": 14582000 }, { "epoch": 42.21, "learning_rate": 2.890241322017414e-05, "loss": 2.0368, "step": 14582500 }, { "epoch": 42.21, "learning_rate": 2.8901691019822157e-05, "loss": 2.0381, "step": 14583000 }, { "epoch": 42.21, "learning_rate": 2.8900968819470176e-05, "loss": 2.0342, "step": 14583500 }, { "epoch": 42.21, "learning_rate": 2.8900245171822898e-05, "loss": 2.0247, "step": 14584000 }, { "epoch": 42.22, "learning_rate": 2.8899522971470917e-05, "loss": 2.0353, "step": 14584500 }, { "epoch": 42.22, "learning_rate": 2.889879932382364e-05, "loss": 2.0347, "step": 14585000 }, { "epoch": 42.22, "learning_rate": 2.889807567617636e-05, "loss": 2.0215, "step": 14585500 }, { "epoch": 42.22, "learning_rate": 2.8897352028529084e-05, "loss": 2.0529, "step": 14586000 }, { "epoch": 42.22, "learning_rate": 2.8896628380881806e-05, "loss": 2.0232, "step": 14586500 }, { "epoch": 42.22, "learning_rate": 2.889590473323453e-05, "loss": 2.0138, "step": 14587000 }, { "epoch": 42.22, "learning_rate": 2.8895181085587257e-05, "loss": 2.0291, "step": 14587500 }, { "epoch": 42.23, "learning_rate": 2.8894457437939983e-05, "loss": 2.0303, "step": 14588000 }, { "epoch": 42.23, "learning_rate": 2.8893733790292705e-05, "loss": 2.0334, "step": 14588500 }, { "epoch": 42.23, "learning_rate": 2.8893010142645427e-05, "loss": 2.0518, "step": 14589000 }, { "epoch": 42.23, "learning_rate": 2.889228649499815e-05, "loss": 2.0404, "step": 14589500 }, { "epoch": 42.23, "learning_rate": 2.889156284735087e-05, "loss": 2.0278, "step": 14590000 }, { "epoch": 42.23, "learning_rate": 2.8890839199703594e-05, "loss": 2.0285, "step": 14590500 }, { "epoch": 42.23, "learning_rate": 2.889011555205632e-05, "loss": 2.0256, "step": 14591000 }, { "epoch": 42.24, "learning_rate": 2.8889391904409042e-05, "loss": 2.0372, "step": 14591500 }, { "epoch": 42.24, "learning_rate": 2.8888669704057057e-05, "loss": 2.0415, "step": 14592000 }, { "epoch": 42.24, "learning_rate": 2.8887947503705076e-05, "loss": 2.0073, "step": 14592500 }, { "epoch": 42.24, "learning_rate": 2.88872238560578e-05, "loss": 2.0372, "step": 14593000 }, { "epoch": 42.24, "learning_rate": 2.8886501655705817e-05, "loss": 2.0291, "step": 14593500 }, { "epoch": 42.24, "learning_rate": 2.888577800805854e-05, "loss": 2.0486, "step": 14594000 }, { "epoch": 42.25, "learning_rate": 2.8885054360411262e-05, "loss": 2.0247, "step": 14594500 }, { "epoch": 42.25, "learning_rate": 2.888433071276399e-05, "loss": 2.0197, "step": 14595000 }, { "epoch": 42.25, "learning_rate": 2.8883607065116713e-05, "loss": 2.019, "step": 14595500 }, { "epoch": 42.25, "learning_rate": 2.8882883417469435e-05, "loss": 2.011, "step": 14596000 }, { "epoch": 42.25, "learning_rate": 2.8882159769822158e-05, "loss": 2.0147, "step": 14596500 }, { "epoch": 42.25, "learning_rate": 2.8881436122174883e-05, "loss": 2.0478, "step": 14597000 }, { "epoch": 42.25, "learning_rate": 2.88807139218229e-05, "loss": 2.0582, "step": 14597500 }, { "epoch": 42.26, "learning_rate": 2.887999027417562e-05, "loss": 2.043, "step": 14598000 }, { "epoch": 42.26, "learning_rate": 2.8879266626528347e-05, "loss": 2.0323, "step": 14598500 }, { "epoch": 42.26, "learning_rate": 2.887854297888107e-05, "loss": 2.0241, "step": 14599000 }, { "epoch": 42.26, "learning_rate": 2.887781933123379e-05, "loss": 2.0218, "step": 14599500 }, { "epoch": 42.26, "learning_rate": 2.8877095683586513e-05, "loss": 2.0479, "step": 14600000 }, { "epoch": 42.26, "learning_rate": 2.8876372035939236e-05, "loss": 2.0562, "step": 14600500 }, { "epoch": 42.26, "learning_rate": 2.8875648388291958e-05, "loss": 2.0333, "step": 14601000 }, { "epoch": 42.27, "learning_rate": 2.8874924740644683e-05, "loss": 2.0431, "step": 14601500 }, { "epoch": 42.27, "learning_rate": 2.887420109299741e-05, "loss": 2.0481, "step": 14602000 }, { "epoch": 42.27, "learning_rate": 2.8873477445350135e-05, "loss": 2.0207, "step": 14602500 }, { "epoch": 42.27, "learning_rate": 2.8872753797702857e-05, "loss": 2.0206, "step": 14603000 }, { "epoch": 42.27, "learning_rate": 2.887203015005558e-05, "loss": 2.0478, "step": 14603500 }, { "epoch": 42.27, "learning_rate": 2.88713065024083e-05, "loss": 2.0345, "step": 14604000 }, { "epoch": 42.27, "learning_rate": 2.887058430205632e-05, "loss": 2.0399, "step": 14604500 }, { "epoch": 42.28, "learning_rate": 2.8869860654409043e-05, "loss": 2.016, "step": 14605000 }, { "epoch": 42.28, "learning_rate": 2.8869137006761765e-05, "loss": 2.0391, "step": 14605500 }, { "epoch": 42.28, "learning_rate": 2.8868413359114487e-05, "loss": 2.0145, "step": 14606000 }, { "epoch": 42.28, "learning_rate": 2.886768971146721e-05, "loss": 2.0103, "step": 14606500 }, { "epoch": 42.28, "learning_rate": 2.8866966063819935e-05, "loss": 2.0311, "step": 14607000 }, { "epoch": 42.28, "learning_rate": 2.8866242416172657e-05, "loss": 2.0282, "step": 14607500 }, { "epoch": 42.28, "learning_rate": 2.886551876852538e-05, "loss": 2.0024, "step": 14608000 }, { "epoch": 42.29, "learning_rate": 2.88647951208781e-05, "loss": 2.0325, "step": 14608500 }, { "epoch": 42.29, "learning_rate": 2.886407292052612e-05, "loss": 2.0257, "step": 14609000 }, { "epoch": 42.29, "learning_rate": 2.8863350720174143e-05, "loss": 2.0526, "step": 14609500 }, { "epoch": 42.29, "learning_rate": 2.8862628519822162e-05, "loss": 2.0654, "step": 14610000 }, { "epoch": 42.29, "learning_rate": 2.8861904872174884e-05, "loss": 2.0396, "step": 14610500 }, { "epoch": 42.29, "learning_rate": 2.8861181224527606e-05, "loss": 2.034, "step": 14611000 }, { "epoch": 42.29, "learning_rate": 2.886045757688033e-05, "loss": 2.0542, "step": 14611500 }, { "epoch": 42.3, "learning_rate": 2.885973392923305e-05, "loss": 2.0368, "step": 14612000 }, { "epoch": 42.3, "learning_rate": 2.8859010281585773e-05, "loss": 2.0409, "step": 14612500 }, { "epoch": 42.3, "learning_rate": 2.88582866339385e-05, "loss": 2.0308, "step": 14613000 }, { "epoch": 42.3, "learning_rate": 2.885756298629122e-05, "loss": 2.0325, "step": 14613500 }, { "epoch": 42.3, "learning_rate": 2.8856839338643943e-05, "loss": 2.015, "step": 14614000 }, { "epoch": 42.3, "learning_rate": 2.8856117138291962e-05, "loss": 2.0039, "step": 14614500 }, { "epoch": 42.3, "learning_rate": 2.8855394937939978e-05, "loss": 2.0391, "step": 14615000 }, { "epoch": 42.31, "learning_rate": 2.88546712902927e-05, "loss": 2.0429, "step": 14615500 }, { "epoch": 42.31, "learning_rate": 2.8853947642645422e-05, "loss": 2.0443, "step": 14616000 }, { "epoch": 42.31, "learning_rate": 2.8853223994998148e-05, "loss": 2.034, "step": 14616500 }, { "epoch": 42.31, "learning_rate": 2.8852500347350873e-05, "loss": 2.0329, "step": 14617000 }, { "epoch": 42.31, "learning_rate": 2.8851778146998892e-05, "loss": 2.0302, "step": 14617500 }, { "epoch": 42.31, "learning_rate": 2.8851054499351614e-05, "loss": 2.0164, "step": 14618000 }, { "epoch": 42.31, "learning_rate": 2.8850330851704337e-05, "loss": 2.0242, "step": 14618500 }, { "epoch": 42.32, "learning_rate": 2.8849607204057062e-05, "loss": 2.036, "step": 14619000 }, { "epoch": 42.32, "learning_rate": 2.8848883556409784e-05, "loss": 2.0345, "step": 14619500 }, { "epoch": 42.32, "learning_rate": 2.8848159908762507e-05, "loss": 2.0367, "step": 14620000 }, { "epoch": 42.32, "learning_rate": 2.884743626111523e-05, "loss": 2.0217, "step": 14620500 }, { "epoch": 42.32, "learning_rate": 2.8846714060763248e-05, "loss": 2.0322, "step": 14621000 }, { "epoch": 42.32, "learning_rate": 2.884599041311597e-05, "loss": 2.0047, "step": 14621500 }, { "epoch": 42.32, "learning_rate": 2.8845266765468692e-05, "loss": 2.0437, "step": 14622000 }, { "epoch": 42.33, "learning_rate": 2.8844543117821415e-05, "loss": 2.0325, "step": 14622500 }, { "epoch": 42.33, "learning_rate": 2.8843819470174137e-05, "loss": 2.0025, "step": 14623000 }, { "epoch": 42.33, "learning_rate": 2.8843095822526862e-05, "loss": 2.0427, "step": 14623500 }, { "epoch": 42.33, "learning_rate": 2.8842372174879585e-05, "loss": 2.0531, "step": 14624000 }, { "epoch": 42.33, "learning_rate": 2.8841648527232314e-05, "loss": 2.0363, "step": 14624500 }, { "epoch": 42.33, "learning_rate": 2.8840924879585036e-05, "loss": 2.0558, "step": 14625000 }, { "epoch": 42.33, "learning_rate": 2.884020267923305e-05, "loss": 2.0515, "step": 14625500 }, { "epoch": 42.34, "learning_rate": 2.8839479031585777e-05, "loss": 2.0355, "step": 14626000 }, { "epoch": 42.34, "learning_rate": 2.88387553839385e-05, "loss": 2.0275, "step": 14626500 }, { "epoch": 42.34, "learning_rate": 2.883803173629122e-05, "loss": 2.0335, "step": 14627000 }, { "epoch": 42.34, "learning_rate": 2.8837309535939237e-05, "loss": 2.0268, "step": 14627500 }, { "epoch": 42.34, "learning_rate": 2.8836585888291963e-05, "loss": 2.0286, "step": 14628000 }, { "epoch": 42.34, "learning_rate": 2.8835862240644685e-05, "loss": 2.0371, "step": 14628500 }, { "epoch": 42.34, "learning_rate": 2.8835138592997407e-05, "loss": 2.0327, "step": 14629000 }, { "epoch": 42.35, "learning_rate": 2.883441494535013e-05, "loss": 2.0415, "step": 14629500 }, { "epoch": 42.35, "learning_rate": 2.883369129770285e-05, "loss": 2.0355, "step": 14630000 }, { "epoch": 42.35, "learning_rate": 2.8832967650055577e-05, "loss": 2.0367, "step": 14630500 }, { "epoch": 42.35, "learning_rate": 2.88322440024083e-05, "loss": 2.0233, "step": 14631000 }, { "epoch": 42.35, "learning_rate": 2.8831520354761022e-05, "loss": 2.0414, "step": 14631500 }, { "epoch": 42.35, "learning_rate": 2.883079670711375e-05, "loss": 2.0284, "step": 14632000 }, { "epoch": 42.36, "learning_rate": 2.8830073059466473e-05, "loss": 2.0409, "step": 14632500 }, { "epoch": 42.36, "learning_rate": 2.8829349411819195e-05, "loss": 2.0436, "step": 14633000 }, { "epoch": 42.36, "learning_rate": 2.8828625764171917e-05, "loss": 2.035, "step": 14633500 }, { "epoch": 42.36, "learning_rate": 2.882790211652464e-05, "loss": 2.0391, "step": 14634000 }, { "epoch": 42.36, "learning_rate": 2.8827181363467952e-05, "loss": 2.0495, "step": 14634500 }, { "epoch": 42.36, "learning_rate": 2.8826457715820678e-05, "loss": 2.0459, "step": 14635000 }, { "epoch": 42.36, "learning_rate": 2.88257340681734e-05, "loss": 2.0351, "step": 14635500 }, { "epoch": 42.37, "learning_rate": 2.8825010420526122e-05, "loss": 2.0164, "step": 14636000 }, { "epoch": 42.37, "learning_rate": 2.8824286772878844e-05, "loss": 2.0443, "step": 14636500 }, { "epoch": 42.37, "learning_rate": 2.8823564572526863e-05, "loss": 2.0393, "step": 14637000 }, { "epoch": 42.37, "learning_rate": 2.882284237217488e-05, "loss": 2.0353, "step": 14637500 }, { "epoch": 42.37, "learning_rate": 2.88221187245276e-05, "loss": 2.0414, "step": 14638000 }, { "epoch": 42.37, "learning_rate": 2.8821395076880327e-05, "loss": 2.0463, "step": 14638500 }, { "epoch": 42.37, "learning_rate": 2.882067142923305e-05, "loss": 2.0378, "step": 14639000 }, { "epoch": 42.38, "learning_rate": 2.8819947781585778e-05, "loss": 2.055, "step": 14639500 }, { "epoch": 42.38, "learning_rate": 2.88192241339385e-05, "loss": 2.0415, "step": 14640000 }, { "epoch": 42.38, "learning_rate": 2.8818500486291222e-05, "loss": 2.0418, "step": 14640500 }, { "epoch": 42.38, "learning_rate": 2.8817776838643945e-05, "loss": 2.0335, "step": 14641000 }, { "epoch": 42.38, "learning_rate": 2.8817053190996667e-05, "loss": 2.0252, "step": 14641500 }, { "epoch": 42.38, "learning_rate": 2.881632954334939e-05, "loss": 2.0314, "step": 14642000 }, { "epoch": 42.38, "learning_rate": 2.8815607342997408e-05, "loss": 2.0345, "step": 14642500 }, { "epoch": 42.39, "learning_rate": 2.881488369535013e-05, "loss": 2.0313, "step": 14643000 }, { "epoch": 42.39, "learning_rate": 2.881416149499815e-05, "loss": 2.0212, "step": 14643500 }, { "epoch": 42.39, "learning_rate": 2.8813439294646165e-05, "loss": 2.0339, "step": 14644000 }, { "epoch": 42.39, "learning_rate": 2.881271564699889e-05, "loss": 2.0343, "step": 14644500 }, { "epoch": 42.39, "learning_rate": 2.8811991999351613e-05, "loss": 2.0495, "step": 14645000 }, { "epoch": 42.39, "learning_rate": 2.8811268351704335e-05, "loss": 2.0556, "step": 14645500 }, { "epoch": 42.39, "learning_rate": 2.8810544704057057e-05, "loss": 2.048, "step": 14646000 }, { "epoch": 42.4, "learning_rate": 2.880982105640978e-05, "loss": 2.0214, "step": 14646500 }, { "epoch": 42.4, "learning_rate": 2.880909740876251e-05, "loss": 2.0202, "step": 14647000 }, { "epoch": 42.4, "learning_rate": 2.880837376111523e-05, "loss": 2.0386, "step": 14647500 }, { "epoch": 42.4, "learning_rate": 2.8807650113467953e-05, "loss": 2.0342, "step": 14648000 }, { "epoch": 42.4, "learning_rate": 2.880692646582068e-05, "loss": 2.0299, "step": 14648500 }, { "epoch": 42.4, "learning_rate": 2.88062028181734e-05, "loss": 2.0389, "step": 14649000 }, { "epoch": 42.4, "learning_rate": 2.8805479170526123e-05, "loss": 2.04, "step": 14649500 }, { "epoch": 42.41, "learning_rate": 2.8804755522878845e-05, "loss": 2.0533, "step": 14650000 }, { "epoch": 42.41, "learning_rate": 2.8804034769822157e-05, "loss": 2.0359, "step": 14650500 }, { "epoch": 42.41, "learning_rate": 2.880331112217488e-05, "loss": 2.027, "step": 14651000 }, { "epoch": 42.41, "learning_rate": 2.8802587474527605e-05, "loss": 2.0216, "step": 14651500 }, { "epoch": 42.41, "learning_rate": 2.8801863826880327e-05, "loss": 2.0488, "step": 14652000 }, { "epoch": 42.41, "learning_rate": 2.880114017923305e-05, "loss": 2.025, "step": 14652500 }, { "epoch": 42.41, "learning_rate": 2.8800416531585772e-05, "loss": 2.0462, "step": 14653000 }, { "epoch": 42.42, "learning_rate": 2.8799692883938494e-05, "loss": 2.0393, "step": 14653500 }, { "epoch": 42.42, "learning_rate": 2.8798969236291216e-05, "loss": 2.0244, "step": 14654000 }, { "epoch": 42.42, "learning_rate": 2.8798247035939242e-05, "loss": 2.031, "step": 14654500 }, { "epoch": 42.42, "learning_rate": 2.8797523388291964e-05, "loss": 2.0585, "step": 14655000 }, { "epoch": 42.42, "learning_rate": 2.8796799740644687e-05, "loss": 2.0137, "step": 14655500 }, { "epoch": 42.42, "learning_rate": 2.879607609299741e-05, "loss": 2.0618, "step": 14656000 }, { "epoch": 42.42, "learning_rate": 2.879535244535013e-05, "loss": 2.0288, "step": 14656500 }, { "epoch": 42.43, "learning_rate": 2.8794628797702857e-05, "loss": 2.0301, "step": 14657000 }, { "epoch": 42.43, "learning_rate": 2.879390515005558e-05, "loss": 2.0464, "step": 14657500 }, { "epoch": 42.43, "learning_rate": 2.8793182949703594e-05, "loss": 2.0286, "step": 14658000 }, { "epoch": 42.43, "learning_rate": 2.8792459302056317e-05, "loss": 2.0266, "step": 14658500 }, { "epoch": 42.43, "learning_rate": 2.8791735654409042e-05, "loss": 2.0288, "step": 14659000 }, { "epoch": 42.43, "learning_rate": 2.8791013454057058e-05, "loss": 2.0289, "step": 14659500 }, { "epoch": 42.43, "learning_rate": 2.8790291253705077e-05, "loss": 2.0432, "step": 14660000 }, { "epoch": 42.44, "learning_rate": 2.87895676060578e-05, "loss": 2.0491, "step": 14660500 }, { "epoch": 42.44, "learning_rate": 2.878884395841052e-05, "loss": 2.029, "step": 14661000 }, { "epoch": 42.44, "learning_rate": 2.8788120310763244e-05, "loss": 2.0458, "step": 14661500 }, { "epoch": 42.44, "learning_rate": 2.878739666311597e-05, "loss": 2.0419, "step": 14662000 }, { "epoch": 42.44, "learning_rate": 2.8786673015468695e-05, "loss": 2.0454, "step": 14662500 }, { "epoch": 42.44, "learning_rate": 2.878594936782142e-05, "loss": 2.0443, "step": 14663000 }, { "epoch": 42.44, "learning_rate": 2.8785225720174143e-05, "loss": 2.033, "step": 14663500 }, { "epoch": 42.45, "learning_rate": 2.8784502072526865e-05, "loss": 2.0488, "step": 14664000 }, { "epoch": 42.45, "learning_rate": 2.878377987217488e-05, "loss": 2.0515, "step": 14664500 }, { "epoch": 42.45, "learning_rate": 2.8783056224527606e-05, "loss": 2.028, "step": 14665000 }, { "epoch": 42.45, "learning_rate": 2.8782332576880328e-05, "loss": 2.0513, "step": 14665500 }, { "epoch": 42.45, "learning_rate": 2.878160892923305e-05, "loss": 2.0313, "step": 14666000 }, { "epoch": 42.45, "learning_rate": 2.8780885281585773e-05, "loss": 2.044, "step": 14666500 }, { "epoch": 42.45, "learning_rate": 2.8780161633938495e-05, "loss": 2.0136, "step": 14667000 }, { "epoch": 42.46, "learning_rate": 2.877943798629122e-05, "loss": 2.0236, "step": 14667500 }, { "epoch": 42.46, "learning_rate": 2.8778714338643943e-05, "loss": 2.0343, "step": 14668000 }, { "epoch": 42.46, "learning_rate": 2.8777990690996665e-05, "loss": 2.0179, "step": 14668500 }, { "epoch": 42.46, "learning_rate": 2.877726849064468e-05, "loss": 2.0196, "step": 14669000 }, { "epoch": 42.46, "learning_rate": 2.877654484299741e-05, "loss": 2.0302, "step": 14669500 }, { "epoch": 42.46, "learning_rate": 2.8775821195350132e-05, "loss": 2.0326, "step": 14670000 }, { "epoch": 42.47, "learning_rate": 2.8775097547702857e-05, "loss": 2.0464, "step": 14670500 }, { "epoch": 42.47, "learning_rate": 2.8774375347350873e-05, "loss": 2.0277, "step": 14671000 }, { "epoch": 42.47, "learning_rate": 2.8773651699703595e-05, "loss": 2.0477, "step": 14671500 }, { "epoch": 42.47, "learning_rate": 2.877292805205632e-05, "loss": 2.0489, "step": 14672000 }, { "epoch": 42.47, "learning_rate": 2.8772204404409043e-05, "loss": 2.044, "step": 14672500 }, { "epoch": 42.47, "learning_rate": 2.8771480756761765e-05, "loss": 2.0357, "step": 14673000 }, { "epoch": 42.47, "learning_rate": 2.8770757109114488e-05, "loss": 2.0248, "step": 14673500 }, { "epoch": 42.48, "learning_rate": 2.877003346146721e-05, "loss": 2.0478, "step": 14674000 }, { "epoch": 42.48, "learning_rate": 2.8769309813819932e-05, "loss": 2.0463, "step": 14674500 }, { "epoch": 42.48, "learning_rate": 2.8768586166172658e-05, "loss": 2.0189, "step": 14675000 }, { "epoch": 42.48, "learning_rate": 2.8767863965820673e-05, "loss": 2.0427, "step": 14675500 }, { "epoch": 42.48, "learning_rate": 2.8767140318173395e-05, "loss": 2.0283, "step": 14676000 }, { "epoch": 42.48, "learning_rate": 2.876641667052612e-05, "loss": 2.0448, "step": 14676500 }, { "epoch": 42.48, "learning_rate": 2.8765693022878847e-05, "loss": 2.0218, "step": 14677000 }, { "epoch": 42.49, "learning_rate": 2.8764969375231572e-05, "loss": 2.0406, "step": 14677500 }, { "epoch": 42.49, "learning_rate": 2.8764248622174885e-05, "loss": 2.0623, "step": 14678000 }, { "epoch": 42.49, "learning_rate": 2.8763524974527607e-05, "loss": 2.0276, "step": 14678500 }, { "epoch": 42.49, "learning_rate": 2.876280132688033e-05, "loss": 2.0386, "step": 14679000 }, { "epoch": 42.49, "learning_rate": 2.876207767923305e-05, "loss": 2.0367, "step": 14679500 }, { "epoch": 42.49, "learning_rate": 2.8761354031585774e-05, "loss": 2.0411, "step": 14680000 }, { "epoch": 42.49, "learning_rate": 2.8760630383938496e-05, "loss": 2.0414, "step": 14680500 }, { "epoch": 42.5, "learning_rate": 2.875990673629122e-05, "loss": 2.0271, "step": 14681000 }, { "epoch": 42.5, "learning_rate": 2.8759183088643944e-05, "loss": 2.0272, "step": 14681500 }, { "epoch": 42.5, "learning_rate": 2.8758459440996666e-05, "loss": 2.0495, "step": 14682000 }, { "epoch": 42.5, "learning_rate": 2.8757735793349388e-05, "loss": 2.0531, "step": 14682500 }, { "epoch": 42.5, "learning_rate": 2.8757013592997407e-05, "loss": 2.0354, "step": 14683000 }, { "epoch": 42.5, "learning_rate": 2.8756291392645423e-05, "loss": 2.0573, "step": 14683500 }, { "epoch": 42.5, "learning_rate": 2.8755567744998145e-05, "loss": 2.0308, "step": 14684000 }, { "epoch": 42.51, "learning_rate": 2.8754845544646164e-05, "loss": 2.0451, "step": 14684500 }, { "epoch": 42.51, "learning_rate": 2.8754121896998893e-05, "loss": 2.0096, "step": 14685000 }, { "epoch": 42.51, "learning_rate": 2.8753398249351615e-05, "loss": 2.0246, "step": 14685500 }, { "epoch": 42.51, "learning_rate": 2.8752674601704337e-05, "loss": 2.031, "step": 14686000 }, { "epoch": 42.51, "learning_rate": 2.875195095405706e-05, "loss": 2.0396, "step": 14686500 }, { "epoch": 42.51, "learning_rate": 2.8751227306409785e-05, "loss": 2.0351, "step": 14687000 }, { "epoch": 42.51, "learning_rate": 2.8750503658762507e-05, "loss": 2.0189, "step": 14687500 }, { "epoch": 42.52, "learning_rate": 2.874978001111523e-05, "loss": 2.0447, "step": 14688000 }, { "epoch": 42.52, "learning_rate": 2.8749056363467952e-05, "loss": 2.0286, "step": 14688500 }, { "epoch": 42.52, "learning_rate": 2.8748332715820674e-05, "loss": 2.0601, "step": 14689000 }, { "epoch": 42.52, "learning_rate": 2.8747609068173396e-05, "loss": 2.0403, "step": 14689500 }, { "epoch": 42.52, "learning_rate": 2.8746885420526122e-05, "loss": 2.0476, "step": 14690000 }, { "epoch": 42.52, "learning_rate": 2.8746163220174137e-05, "loss": 2.0105, "step": 14690500 }, { "epoch": 42.52, "learning_rate": 2.874543957252686e-05, "loss": 2.0411, "step": 14691000 }, { "epoch": 42.53, "learning_rate": 2.8744715924879585e-05, "loss": 2.0457, "step": 14691500 }, { "epoch": 42.53, "learning_rate": 2.874399227723231e-05, "loss": 2.0389, "step": 14692000 }, { "epoch": 42.53, "learning_rate": 2.8743268629585037e-05, "loss": 2.0476, "step": 14692500 }, { "epoch": 42.53, "learning_rate": 2.8742546429233052e-05, "loss": 2.0201, "step": 14693000 }, { "epoch": 42.53, "learning_rate": 2.8741822781585774e-05, "loss": 2.0304, "step": 14693500 }, { "epoch": 42.53, "learning_rate": 2.87410991339385e-05, "loss": 2.0682, "step": 14694000 }, { "epoch": 42.53, "learning_rate": 2.8740375486291222e-05, "loss": 2.0234, "step": 14694500 }, { "epoch": 42.54, "learning_rate": 2.8739651838643944e-05, "loss": 2.0245, "step": 14695000 }, { "epoch": 42.54, "learning_rate": 2.873892963829196e-05, "loss": 2.0185, "step": 14695500 }, { "epoch": 42.54, "learning_rate": 2.8738205990644686e-05, "loss": 2.0563, "step": 14696000 }, { "epoch": 42.54, "learning_rate": 2.8737482342997408e-05, "loss": 2.0714, "step": 14696500 }, { "epoch": 42.54, "learning_rate": 2.873675869535013e-05, "loss": 2.0623, "step": 14697000 }, { "epoch": 42.54, "learning_rate": 2.8736035047702852e-05, "loss": 2.0446, "step": 14697500 }, { "epoch": 42.54, "learning_rate": 2.8735311400055575e-05, "loss": 2.0317, "step": 14698000 }, { "epoch": 42.55, "learning_rate": 2.8734589199703593e-05, "loss": 2.0283, "step": 14698500 }, { "epoch": 42.55, "learning_rate": 2.8733866999351612e-05, "loss": 2.0212, "step": 14699000 }, { "epoch": 42.55, "learning_rate": 2.8733143351704335e-05, "loss": 2.0186, "step": 14699500 }, { "epoch": 42.55, "learning_rate": 2.8732419704057064e-05, "loss": 2.0581, "step": 14700000 }, { "epoch": 42.55, "learning_rate": 2.8731696056409786e-05, "loss": 2.0295, "step": 14700500 }, { "epoch": 42.55, "learning_rate": 2.8730972408762508e-05, "loss": 2.0377, "step": 14701000 }, { "epoch": 42.55, "learning_rate": 2.873024876111523e-05, "loss": 2.027, "step": 14701500 }, { "epoch": 42.56, "learning_rate": 2.8729525113467953e-05, "loss": 2.0458, "step": 14702000 }, { "epoch": 42.56, "learning_rate": 2.8728801465820675e-05, "loss": 2.0227, "step": 14702500 }, { "epoch": 42.56, "learning_rate": 2.87280778181734e-05, "loss": 2.0217, "step": 14703000 }, { "epoch": 42.56, "learning_rate": 2.8727354170526123e-05, "loss": 2.0183, "step": 14703500 }, { "epoch": 42.56, "learning_rate": 2.8726630522878845e-05, "loss": 2.0339, "step": 14704000 }, { "epoch": 42.56, "learning_rate": 2.8725906875231567e-05, "loss": 2.0355, "step": 14704500 }, { "epoch": 42.56, "learning_rate": 2.8725184674879586e-05, "loss": 2.0395, "step": 14705000 }, { "epoch": 42.57, "learning_rate": 2.8724461027232308e-05, "loss": 2.044, "step": 14705500 }, { "epoch": 42.57, "learning_rate": 2.872373737958503e-05, "loss": 2.0308, "step": 14706000 }, { "epoch": 42.57, "learning_rate": 2.8723013731937753e-05, "loss": 2.0492, "step": 14706500 }, { "epoch": 42.57, "learning_rate": 2.8722291531585775e-05, "loss": 2.037, "step": 14707000 }, { "epoch": 42.57, "learning_rate": 2.87215678839385e-05, "loss": 2.0308, "step": 14707500 }, { "epoch": 42.57, "learning_rate": 2.8720844236291223e-05, "loss": 2.0623, "step": 14708000 }, { "epoch": 42.58, "learning_rate": 2.8720120588643945e-05, "loss": 2.0578, "step": 14708500 }, { "epoch": 42.58, "learning_rate": 2.8719396940996667e-05, "loss": 2.0394, "step": 14709000 }, { "epoch": 42.58, "learning_rate": 2.871867329334939e-05, "loss": 2.0309, "step": 14709500 }, { "epoch": 42.58, "learning_rate": 2.8717949645702112e-05, "loss": 2.0533, "step": 14710000 }, { "epoch": 42.58, "learning_rate": 2.8717225998054838e-05, "loss": 2.0589, "step": 14710500 }, { "epoch": 42.58, "learning_rate": 2.8716506692293443e-05, "loss": 2.0296, "step": 14711000 }, { "epoch": 42.58, "learning_rate": 2.8715783044646165e-05, "loss": 2.0117, "step": 14711500 }, { "epoch": 42.59, "learning_rate": 2.8715059396998888e-05, "loss": 2.0301, "step": 14712000 }, { "epoch": 42.59, "learning_rate": 2.8714335749351613e-05, "loss": 2.0237, "step": 14712500 }, { "epoch": 42.59, "learning_rate": 2.8713612101704335e-05, "loss": 2.0251, "step": 14713000 }, { "epoch": 42.59, "learning_rate": 2.871288990135235e-05, "loss": 2.0421, "step": 14713500 }, { "epoch": 42.59, "learning_rate": 2.8712166253705077e-05, "loss": 2.0351, "step": 14714000 }, { "epoch": 42.59, "learning_rate": 2.87114426060578e-05, "loss": 2.0385, "step": 14714500 }, { "epoch": 42.59, "learning_rate": 2.8710718958410528e-05, "loss": 2.0338, "step": 14715000 }, { "epoch": 42.6, "learning_rate": 2.870999531076325e-05, "loss": 2.0358, "step": 14715500 }, { "epoch": 42.6, "learning_rate": 2.8709271663115972e-05, "loss": 2.0437, "step": 14716000 }, { "epoch": 42.6, "learning_rate": 2.8708548015468695e-05, "loss": 2.0272, "step": 14716500 }, { "epoch": 42.6, "learning_rate": 2.8707824367821417e-05, "loss": 2.0232, "step": 14717000 }, { "epoch": 42.6, "learning_rate": 2.870710072017414e-05, "loss": 2.0487, "step": 14717500 }, { "epoch": 42.6, "learning_rate": 2.8706377072526865e-05, "loss": 2.0618, "step": 14718000 }, { "epoch": 42.6, "learning_rate": 2.8705653424879587e-05, "loss": 2.0352, "step": 14718500 }, { "epoch": 42.61, "learning_rate": 2.8704931224527602e-05, "loss": 2.0438, "step": 14719000 }, { "epoch": 42.61, "learning_rate": 2.8704207576880328e-05, "loss": 2.0094, "step": 14719500 }, { "epoch": 42.61, "learning_rate": 2.870348392923305e-05, "loss": 2.0237, "step": 14720000 }, { "epoch": 42.61, "learning_rate": 2.8702760281585773e-05, "loss": 2.0432, "step": 14720500 }, { "epoch": 42.61, "learning_rate": 2.8702036633938495e-05, "loss": 2.0322, "step": 14721000 }, { "epoch": 42.61, "learning_rate": 2.8701312986291217e-05, "loss": 2.0471, "step": 14721500 }, { "epoch": 42.61, "learning_rate": 2.870059078593924e-05, "loss": 2.0273, "step": 14722000 }, { "epoch": 42.62, "learning_rate": 2.8699867138291965e-05, "loss": 2.0034, "step": 14722500 }, { "epoch": 42.62, "learning_rate": 2.869914493793998e-05, "loss": 2.0394, "step": 14723000 }, { "epoch": 42.62, "learning_rate": 2.8698421290292703e-05, "loss": 2.0204, "step": 14723500 }, { "epoch": 42.62, "learning_rate": 2.869769764264543e-05, "loss": 2.0293, "step": 14724000 }, { "epoch": 42.62, "learning_rate": 2.8696975442293444e-05, "loss": 2.0396, "step": 14724500 }, { "epoch": 42.62, "learning_rate": 2.8696251794646166e-05, "loss": 2.0298, "step": 14725000 }, { "epoch": 42.62, "learning_rate": 2.8695528146998892e-05, "loss": 2.0333, "step": 14725500 }, { "epoch": 42.63, "learning_rate": 2.8694807393942204e-05, "loss": 2.0326, "step": 14726000 }, { "epoch": 42.63, "learning_rate": 2.8694083746294926e-05, "loss": 2.0328, "step": 14726500 }, { "epoch": 42.63, "learning_rate": 2.869336009864765e-05, "loss": 2.0247, "step": 14727000 }, { "epoch": 42.63, "learning_rate": 2.869263645100037e-05, "loss": 2.0399, "step": 14727500 }, { "epoch": 42.63, "learning_rate": 2.8691912803353093e-05, "loss": 2.0059, "step": 14728000 }, { "epoch": 42.63, "learning_rate": 2.8691189155705815e-05, "loss": 2.0222, "step": 14728500 }, { "epoch": 42.63, "learning_rate": 2.869046550805854e-05, "loss": 2.0298, "step": 14729000 }, { "epoch": 42.64, "learning_rate": 2.8689741860411263e-05, "loss": 2.0265, "step": 14729500 }, { "epoch": 42.64, "learning_rate": 2.8689018212763992e-05, "loss": 2.047, "step": 14730000 }, { "epoch": 42.64, "learning_rate": 2.8688294565116714e-05, "loss": 2.0352, "step": 14730500 }, { "epoch": 42.64, "learning_rate": 2.8687570917469437e-05, "loss": 2.0344, "step": 14731000 }, { "epoch": 42.64, "learning_rate": 2.868684726982216e-05, "loss": 2.0417, "step": 14731500 }, { "epoch": 42.64, "learning_rate": 2.868612362217488e-05, "loss": 2.0161, "step": 14732000 }, { "epoch": 42.64, "learning_rate": 2.8685399974527603e-05, "loss": 2.0566, "step": 14732500 }, { "epoch": 42.65, "learning_rate": 2.868467632688033e-05, "loss": 2.0454, "step": 14733000 }, { "epoch": 42.65, "learning_rate": 2.868395267923305e-05, "loss": 2.0402, "step": 14733500 }, { "epoch": 42.65, "learning_rate": 2.8683229031585773e-05, "loss": 2.0364, "step": 14734000 }, { "epoch": 42.65, "learning_rate": 2.8682505383938496e-05, "loss": 2.0379, "step": 14734500 }, { "epoch": 42.65, "learning_rate": 2.8681781736291218e-05, "loss": 2.0342, "step": 14735000 }, { "epoch": 42.65, "learning_rate": 2.8681058088643943e-05, "loss": 2.0393, "step": 14735500 }, { "epoch": 42.65, "learning_rate": 2.868033588829196e-05, "loss": 2.0343, "step": 14736000 }, { "epoch": 42.66, "learning_rate": 2.867961224064468e-05, "loss": 2.0532, "step": 14736500 }, { "epoch": 42.66, "learning_rate": 2.867888859299741e-05, "loss": 2.0343, "step": 14737000 }, { "epoch": 42.66, "learning_rate": 2.8678164945350132e-05, "loss": 2.0334, "step": 14737500 }, { "epoch": 42.66, "learning_rate": 2.8677441297702855e-05, "loss": 2.0439, "step": 14738000 }, { "epoch": 42.66, "learning_rate": 2.867671765005558e-05, "loss": 2.0133, "step": 14738500 }, { "epoch": 42.66, "learning_rate": 2.8675994002408303e-05, "loss": 2.0312, "step": 14739000 }, { "epoch": 42.66, "learning_rate": 2.8675271802056318e-05, "loss": 2.0118, "step": 14739500 }, { "epoch": 42.67, "learning_rate": 2.8674548154409044e-05, "loss": 2.0325, "step": 14740000 }, { "epoch": 42.67, "learning_rate": 2.8673824506761766e-05, "loss": 2.0285, "step": 14740500 }, { "epoch": 42.67, "learning_rate": 2.8673100859114488e-05, "loss": 2.0624, "step": 14741000 }, { "epoch": 42.67, "learning_rate": 2.867237721146721e-05, "loss": 2.0547, "step": 14741500 }, { "epoch": 42.67, "learning_rate": 2.8671653563819933e-05, "loss": 2.0437, "step": 14742000 }, { "epoch": 42.67, "learning_rate": 2.8670929916172655e-05, "loss": 2.0325, "step": 14742500 }, { "epoch": 42.67, "learning_rate": 2.867020626852538e-05, "loss": 2.0387, "step": 14743000 }, { "epoch": 42.68, "learning_rate": 2.8669482620878103e-05, "loss": 2.0219, "step": 14743500 }, { "epoch": 42.68, "learning_rate": 2.8668758973230832e-05, "loss": 2.0147, "step": 14744000 }, { "epoch": 42.68, "learning_rate": 2.8668035325583554e-05, "loss": 2.0429, "step": 14744500 }, { "epoch": 42.68, "learning_rate": 2.8667311677936276e-05, "loss": 2.013, "step": 14745000 }, { "epoch": 42.68, "learning_rate": 2.8666589477584295e-05, "loss": 2.0398, "step": 14745500 }, { "epoch": 42.68, "learning_rate": 2.8665865829937017e-05, "loss": 2.0473, "step": 14746000 }, { "epoch": 42.69, "learning_rate": 2.866514218228974e-05, "loss": 2.0283, "step": 14746500 }, { "epoch": 42.69, "learning_rate": 2.8664418534642462e-05, "loss": 2.0295, "step": 14747000 }, { "epoch": 42.69, "learning_rate": 2.8663694886995184e-05, "loss": 2.0503, "step": 14747500 }, { "epoch": 42.69, "learning_rate": 2.8662972686643203e-05, "loss": 2.0238, "step": 14748000 }, { "epoch": 42.69, "learning_rate": 2.8662249038995925e-05, "loss": 2.0445, "step": 14748500 }, { "epoch": 42.69, "learning_rate": 2.8661525391348647e-05, "loss": 2.0396, "step": 14749000 }, { "epoch": 42.69, "learning_rate": 2.8660803190996666e-05, "loss": 2.0341, "step": 14749500 }, { "epoch": 42.7, "learning_rate": 2.866007954334939e-05, "loss": 2.0215, "step": 14750000 }, { "epoch": 42.7, "learning_rate": 2.865935589570211e-05, "loss": 2.0485, "step": 14750500 }, { "epoch": 42.7, "learning_rate": 2.8658632248054833e-05, "loss": 2.0246, "step": 14751000 }, { "epoch": 42.7, "learning_rate": 2.8657910047702852e-05, "loss": 2.0378, "step": 14751500 }, { "epoch": 42.7, "learning_rate": 2.865718640005558e-05, "loss": 2.0523, "step": 14752000 }, { "epoch": 42.7, "learning_rate": 2.8656462752408303e-05, "loss": 2.04, "step": 14752500 }, { "epoch": 42.7, "learning_rate": 2.8655739104761026e-05, "loss": 2.0473, "step": 14753000 }, { "epoch": 42.71, "learning_rate": 2.8655015457113748e-05, "loss": 2.062, "step": 14753500 }, { "epoch": 42.71, "learning_rate": 2.865429180946647e-05, "loss": 2.0307, "step": 14754000 }, { "epoch": 42.71, "learning_rate": 2.865356960911449e-05, "loss": 2.0329, "step": 14754500 }, { "epoch": 42.71, "learning_rate": 2.865284596146721e-05, "loss": 2.0439, "step": 14755000 }, { "epoch": 42.71, "learning_rate": 2.8652122313819933e-05, "loss": 2.0568, "step": 14755500 }, { "epoch": 42.71, "learning_rate": 2.865139866617266e-05, "loss": 2.0531, "step": 14756000 }, { "epoch": 42.71, "learning_rate": 2.8650676465820675e-05, "loss": 2.0499, "step": 14756500 }, { "epoch": 42.72, "learning_rate": 2.8649954265468694e-05, "loss": 2.034, "step": 14757000 }, { "epoch": 42.72, "learning_rate": 2.8649230617821416e-05, "loss": 2.0372, "step": 14757500 }, { "epoch": 42.72, "learning_rate": 2.8648506970174138e-05, "loss": 2.0424, "step": 14758000 }, { "epoch": 42.72, "learning_rate": 2.864778332252686e-05, "loss": 2.0537, "step": 14758500 }, { "epoch": 42.72, "learning_rate": 2.8647059674879582e-05, "loss": 2.0529, "step": 14759000 }, { "epoch": 42.72, "learning_rate": 2.864633602723231e-05, "loss": 2.0538, "step": 14759500 }, { "epoch": 42.72, "learning_rate": 2.8645612379585034e-05, "loss": 2.0154, "step": 14760000 }, { "epoch": 42.73, "learning_rate": 2.864488873193776e-05, "loss": 2.027, "step": 14760500 }, { "epoch": 42.73, "learning_rate": 2.864416508429048e-05, "loss": 2.0483, "step": 14761000 }, { "epoch": 42.73, "learning_rate": 2.8643441436643204e-05, "loss": 2.0278, "step": 14761500 }, { "epoch": 42.73, "learning_rate": 2.8642717788995926e-05, "loss": 2.0291, "step": 14762000 }, { "epoch": 42.73, "learning_rate": 2.8641994141348648e-05, "loss": 2.0253, "step": 14762500 }, { "epoch": 42.73, "learning_rate": 2.864127049370137e-05, "loss": 2.0359, "step": 14763000 }, { "epoch": 42.73, "learning_rate": 2.8640546846054096e-05, "loss": 2.0286, "step": 14763500 }, { "epoch": 42.74, "learning_rate": 2.863982319840682e-05, "loss": 2.0496, "step": 14764000 }, { "epoch": 42.74, "learning_rate": 2.863909955075954e-05, "loss": 2.0372, "step": 14764500 }, { "epoch": 42.74, "learning_rate": 2.8638375903112263e-05, "loss": 2.0578, "step": 14765000 }, { "epoch": 42.74, "learning_rate": 2.8637653702760282e-05, "loss": 2.0509, "step": 14765500 }, { "epoch": 42.74, "learning_rate": 2.8636930055113004e-05, "loss": 2.0279, "step": 14766000 }, { "epoch": 42.74, "learning_rate": 2.8636206407465733e-05, "loss": 2.0211, "step": 14766500 }, { "epoch": 42.74, "learning_rate": 2.8635482759818455e-05, "loss": 2.0431, "step": 14767000 }, { "epoch": 42.75, "learning_rate": 2.8634759112171177e-05, "loss": 2.0261, "step": 14767500 }, { "epoch": 42.75, "learning_rate": 2.86340354645239e-05, "loss": 2.0408, "step": 14768000 }, { "epoch": 42.75, "learning_rate": 2.863331326417192e-05, "loss": 2.0227, "step": 14768500 }, { "epoch": 42.75, "learning_rate": 2.863258961652464e-05, "loss": 2.0395, "step": 14769000 }, { "epoch": 42.75, "learning_rate": 2.8631865968877363e-05, "loss": 2.0295, "step": 14769500 }, { "epoch": 42.75, "learning_rate": 2.8631142321230085e-05, "loss": 2.0598, "step": 14770000 }, { "epoch": 42.75, "learning_rate": 2.863041867358281e-05, "loss": 2.0476, "step": 14770500 }, { "epoch": 42.76, "learning_rate": 2.8629695025935533e-05, "loss": 2.035, "step": 14771000 }, { "epoch": 42.76, "learning_rate": 2.8628971378288255e-05, "loss": 2.0286, "step": 14771500 }, { "epoch": 42.76, "learning_rate": 2.8628247730640978e-05, "loss": 2.0395, "step": 14772000 }, { "epoch": 42.76, "learning_rate": 2.8627525530288997e-05, "loss": 2.008, "step": 14772500 }, { "epoch": 42.76, "learning_rate": 2.8626803329937012e-05, "loss": 2.0721, "step": 14773000 }, { "epoch": 42.76, "learning_rate": 2.862608112958503e-05, "loss": 2.0472, "step": 14773500 }, { "epoch": 42.76, "learning_rate": 2.8625357481937753e-05, "loss": 2.0181, "step": 14774000 }, { "epoch": 42.77, "learning_rate": 2.8624633834290482e-05, "loss": 2.018, "step": 14774500 }, { "epoch": 42.77, "learning_rate": 2.8623910186643205e-05, "loss": 2.0046, "step": 14775000 }, { "epoch": 42.77, "learning_rate": 2.8623186538995927e-05, "loss": 2.051, "step": 14775500 }, { "epoch": 42.77, "learning_rate": 2.862246289134865e-05, "loss": 2.0264, "step": 14776000 }, { "epoch": 42.77, "learning_rate": 2.8621739243701375e-05, "loss": 2.0404, "step": 14776500 }, { "epoch": 42.77, "learning_rate": 2.862101704334939e-05, "loss": 2.0325, "step": 14777000 }, { "epoch": 42.77, "learning_rate": 2.8620293395702112e-05, "loss": 2.032, "step": 14777500 }, { "epoch": 42.78, "learning_rate": 2.8619569748054835e-05, "loss": 2.0436, "step": 14778000 }, { "epoch": 42.78, "learning_rate": 2.861884610040756e-05, "loss": 2.0264, "step": 14778500 }, { "epoch": 42.78, "learning_rate": 2.8618122452760283e-05, "loss": 2.0338, "step": 14779000 }, { "epoch": 42.78, "learning_rate": 2.8617398805113005e-05, "loss": 2.0256, "step": 14779500 }, { "epoch": 42.78, "learning_rate": 2.8616675157465727e-05, "loss": 2.0288, "step": 14780000 }, { "epoch": 42.78, "learning_rate": 2.861595150981845e-05, "loss": 2.0235, "step": 14780500 }, { "epoch": 42.78, "learning_rate": 2.8615229309466468e-05, "loss": 2.0277, "step": 14781000 }, { "epoch": 42.79, "learning_rate": 2.861450566181919e-05, "loss": 2.0378, "step": 14781500 }, { "epoch": 42.79, "learning_rate": 2.861378201417192e-05, "loss": 2.0344, "step": 14782000 }, { "epoch": 42.79, "learning_rate": 2.861305981381994e-05, "loss": 2.0348, "step": 14782500 }, { "epoch": 42.79, "learning_rate": 2.861233616617266e-05, "loss": 2.0642, "step": 14783000 }, { "epoch": 42.79, "learning_rate": 2.8611612518525383e-05, "loss": 2.031, "step": 14783500 }, { "epoch": 42.79, "learning_rate": 2.8610888870878105e-05, "loss": 2.0345, "step": 14784000 }, { "epoch": 42.8, "learning_rate": 2.8610165223230827e-05, "loss": 2.0334, "step": 14784500 }, { "epoch": 42.8, "learning_rate": 2.8609443022878846e-05, "loss": 2.0439, "step": 14785000 }, { "epoch": 42.8, "learning_rate": 2.860871937523157e-05, "loss": 2.0406, "step": 14785500 }, { "epoch": 42.8, "learning_rate": 2.860799572758429e-05, "loss": 2.0188, "step": 14786000 }, { "epoch": 42.8, "learning_rate": 2.8607272079937013e-05, "loss": 2.0352, "step": 14786500 }, { "epoch": 42.8, "learning_rate": 2.860654843228974e-05, "loss": 2.0322, "step": 14787000 }, { "epoch": 42.8, "learning_rate": 2.860582478464246e-05, "loss": 2.0461, "step": 14787500 }, { "epoch": 42.81, "learning_rate": 2.8605102584290476e-05, "loss": 2.0528, "step": 14788000 }, { "epoch": 42.81, "learning_rate": 2.86043789366432e-05, "loss": 2.0286, "step": 14788500 }, { "epoch": 42.81, "learning_rate": 2.8603655288995924e-05, "loss": 2.0538, "step": 14789000 }, { "epoch": 42.81, "learning_rate": 2.860293164134865e-05, "loss": 2.0155, "step": 14789500 }, { "epoch": 42.81, "learning_rate": 2.8602207993701375e-05, "loss": 2.0313, "step": 14790000 }, { "epoch": 42.81, "learning_rate": 2.8601484346054098e-05, "loss": 2.0611, "step": 14790500 }, { "epoch": 42.81, "learning_rate": 2.860076069840682e-05, "loss": 2.0324, "step": 14791000 }, { "epoch": 42.82, "learning_rate": 2.8600037050759542e-05, "loss": 2.0359, "step": 14791500 }, { "epoch": 42.82, "learning_rate": 2.8599313403112264e-05, "loss": 2.035, "step": 14792000 }, { "epoch": 42.82, "learning_rate": 2.8598591202760283e-05, "loss": 2.029, "step": 14792500 }, { "epoch": 42.82, "learning_rate": 2.8597867555113006e-05, "loss": 2.0588, "step": 14793000 }, { "epoch": 42.82, "learning_rate": 2.8597143907465728e-05, "loss": 2.0417, "step": 14793500 }, { "epoch": 42.82, "learning_rate": 2.859642025981845e-05, "loss": 2.0302, "step": 14794000 }, { "epoch": 42.82, "learning_rate": 2.859569805946647e-05, "loss": 2.0162, "step": 14794500 }, { "epoch": 42.83, "learning_rate": 2.859497441181919e-05, "loss": 2.0265, "step": 14795000 }, { "epoch": 42.83, "learning_rate": 2.8594250764171913e-05, "loss": 2.018, "step": 14795500 }, { "epoch": 42.83, "learning_rate": 2.859352711652464e-05, "loss": 2.0289, "step": 14796000 }, { "epoch": 42.83, "learning_rate": 2.8592803468877365e-05, "loss": 2.0401, "step": 14796500 }, { "epoch": 42.83, "learning_rate": 2.8592081268525384e-05, "loss": 2.0481, "step": 14797000 }, { "epoch": 42.83, "learning_rate": 2.8591357620878106e-05, "loss": 2.0327, "step": 14797500 }, { "epoch": 42.83, "learning_rate": 2.8590633973230828e-05, "loss": 2.0431, "step": 14798000 }, { "epoch": 42.84, "learning_rate": 2.8589910325583554e-05, "loss": 2.041, "step": 14798500 }, { "epoch": 42.84, "learning_rate": 2.858918812523157e-05, "loss": 2.0356, "step": 14799000 }, { "epoch": 42.84, "learning_rate": 2.8588465924879588e-05, "loss": 2.054, "step": 14799500 }, { "epoch": 42.84, "learning_rate": 2.858774227723231e-05, "loss": 2.045, "step": 14800000 }, { "epoch": 42.84, "learning_rate": 2.8587018629585033e-05, "loss": 2.0497, "step": 14800500 }, { "epoch": 42.84, "learning_rate": 2.8586294981937755e-05, "loss": 2.0328, "step": 14801000 }, { "epoch": 42.84, "learning_rate": 2.8585571334290477e-05, "loss": 2.043, "step": 14801500 }, { "epoch": 42.85, "learning_rate": 2.8584847686643203e-05, "loss": 2.0367, "step": 14802000 }, { "epoch": 42.85, "learning_rate": 2.8584124038995925e-05, "loss": 2.0283, "step": 14802500 }, { "epoch": 42.85, "learning_rate": 2.8583400391348647e-05, "loss": 2.0355, "step": 14803000 }, { "epoch": 42.85, "learning_rate": 2.858267674370137e-05, "loss": 2.0412, "step": 14803500 }, { "epoch": 42.85, "learning_rate": 2.85819530960541e-05, "loss": 2.0415, "step": 14804000 }, { "epoch": 42.85, "learning_rate": 2.858122944840682e-05, "loss": 2.0253, "step": 14804500 }, { "epoch": 42.85, "learning_rate": 2.8580505800759543e-05, "loss": 2.0263, "step": 14805000 }, { "epoch": 42.86, "learning_rate": 2.8579783600407562e-05, "loss": 2.0416, "step": 14805500 }, { "epoch": 42.86, "learning_rate": 2.8579059952760284e-05, "loss": 2.0587, "step": 14806000 }, { "epoch": 42.86, "learning_rate": 2.8578336305113006e-05, "loss": 2.0294, "step": 14806500 }, { "epoch": 42.86, "learning_rate": 2.857761265746573e-05, "loss": 2.0317, "step": 14807000 }, { "epoch": 42.86, "learning_rate": 2.8576889009818454e-05, "loss": 2.0413, "step": 14807500 }, { "epoch": 42.86, "learning_rate": 2.8576165362171176e-05, "loss": 2.046, "step": 14808000 }, { "epoch": 42.86, "learning_rate": 2.85754417145239e-05, "loss": 2.0395, "step": 14808500 }, { "epoch": 42.87, "learning_rate": 2.857471806687662e-05, "loss": 2.0305, "step": 14809000 }, { "epoch": 42.87, "learning_rate": 2.8573994419229343e-05, "loss": 2.0306, "step": 14809500 }, { "epoch": 42.87, "learning_rate": 2.8573272218877362e-05, "loss": 2.0431, "step": 14810000 }, { "epoch": 42.87, "learning_rate": 2.8572550018525378e-05, "loss": 2.0377, "step": 14810500 }, { "epoch": 42.87, "learning_rate": 2.8571826370878103e-05, "loss": 2.0484, "step": 14811000 }, { "epoch": 42.87, "learning_rate": 2.857110272323083e-05, "loss": 2.0508, "step": 14811500 }, { "epoch": 42.87, "learning_rate": 2.8570379075583555e-05, "loss": 2.0346, "step": 14812000 }, { "epoch": 42.88, "learning_rate": 2.8569655427936277e-05, "loss": 2.0552, "step": 14812500 }, { "epoch": 42.88, "learning_rate": 2.8568931780289e-05, "loss": 2.0223, "step": 14813000 }, { "epoch": 42.88, "learning_rate": 2.856820813264172e-05, "loss": 2.0244, "step": 14813500 }, { "epoch": 42.88, "learning_rate": 2.856748593228974e-05, "loss": 2.021, "step": 14814000 }, { "epoch": 42.88, "learning_rate": 2.8566763731937756e-05, "loss": 2.0628, "step": 14814500 }, { "epoch": 42.88, "learning_rate": 2.8566040084290478e-05, "loss": 2.0513, "step": 14815000 }, { "epoch": 42.88, "learning_rate": 2.8565316436643204e-05, "loss": 2.0458, "step": 14815500 }, { "epoch": 42.89, "learning_rate": 2.8564592788995926e-05, "loss": 2.0509, "step": 14816000 }, { "epoch": 42.89, "learning_rate": 2.856387058864394e-05, "loss": 2.0436, "step": 14816500 }, { "epoch": 42.89, "learning_rate": 2.8563146940996667e-05, "loss": 2.0256, "step": 14817000 }, { "epoch": 42.89, "learning_rate": 2.856242329334939e-05, "loss": 2.0493, "step": 14817500 }, { "epoch": 42.89, "learning_rate": 2.856169964570211e-05, "loss": 2.0357, "step": 14818000 }, { "epoch": 42.89, "learning_rate": 2.856097744535013e-05, "loss": 2.0506, "step": 14818500 }, { "epoch": 42.89, "learning_rate": 2.8560255244998146e-05, "loss": 2.0339, "step": 14819000 }, { "epoch": 42.9, "learning_rate": 2.8559531597350875e-05, "loss": 2.0675, "step": 14819500 }, { "epoch": 42.9, "learning_rate": 2.8558807949703597e-05, "loss": 2.0389, "step": 14820000 }, { "epoch": 42.9, "learning_rate": 2.855808430205632e-05, "loss": 2.053, "step": 14820500 }, { "epoch": 42.9, "learning_rate": 2.855736065440904e-05, "loss": 2.0194, "step": 14821000 }, { "epoch": 42.9, "learning_rate": 2.8556637006761767e-05, "loss": 2.053, "step": 14821500 }, { "epoch": 42.9, "learning_rate": 2.855591335911449e-05, "loss": 2.0289, "step": 14822000 }, { "epoch": 42.91, "learning_rate": 2.8555189711467212e-05, "loss": 2.027, "step": 14822500 }, { "epoch": 42.91, "learning_rate": 2.8554466063819934e-05, "loss": 2.0382, "step": 14823000 }, { "epoch": 42.91, "learning_rate": 2.8553742416172656e-05, "loss": 2.0103, "step": 14823500 }, { "epoch": 42.91, "learning_rate": 2.8553020215820675e-05, "loss": 2.0305, "step": 14824000 }, { "epoch": 42.91, "learning_rate": 2.8552296568173397e-05, "loss": 2.0363, "step": 14824500 }, { "epoch": 42.91, "learning_rate": 2.855157292052612e-05, "loss": 2.0475, "step": 14825000 }, { "epoch": 42.91, "learning_rate": 2.8550849272878842e-05, "loss": 2.0249, "step": 14825500 }, { "epoch": 42.92, "learning_rate": 2.8550125625231568e-05, "loss": 2.039, "step": 14826000 }, { "epoch": 42.92, "learning_rate": 2.854940197758429e-05, "loss": 2.0425, "step": 14826500 }, { "epoch": 42.92, "learning_rate": 2.854867832993702e-05, "loss": 2.055, "step": 14827000 }, { "epoch": 42.92, "learning_rate": 2.854795468228974e-05, "loss": 2.0296, "step": 14827500 }, { "epoch": 42.92, "learning_rate": 2.8547231034642463e-05, "loss": 2.0321, "step": 14828000 }, { "epoch": 42.92, "learning_rate": 2.8546507386995185e-05, "loss": 2.0342, "step": 14828500 }, { "epoch": 42.92, "learning_rate": 2.8545783739347908e-05, "loss": 2.0303, "step": 14829000 }, { "epoch": 42.93, "learning_rate": 2.8545060091700633e-05, "loss": 2.0395, "step": 14829500 }, { "epoch": 42.93, "learning_rate": 2.854433789134865e-05, "loss": 2.04, "step": 14830000 }, { "epoch": 42.93, "learning_rate": 2.8543615690996668e-05, "loss": 2.0258, "step": 14830500 }, { "epoch": 42.93, "learning_rate": 2.854289204334939e-05, "loss": 2.0443, "step": 14831000 }, { "epoch": 42.93, "learning_rate": 2.8542168395702112e-05, "loss": 2.0127, "step": 14831500 }, { "epoch": 42.93, "learning_rate": 2.8541444748054835e-05, "loss": 2.0293, "step": 14832000 }, { "epoch": 42.93, "learning_rate": 2.8540721100407557e-05, "loss": 2.0316, "step": 14832500 }, { "epoch": 42.94, "learning_rate": 2.8539997452760282e-05, "loss": 2.0376, "step": 14833000 }, { "epoch": 42.94, "learning_rate": 2.8539273805113005e-05, "loss": 2.0284, "step": 14833500 }, { "epoch": 42.94, "learning_rate": 2.8538550157465734e-05, "loss": 2.0308, "step": 14834000 }, { "epoch": 42.94, "learning_rate": 2.8537826509818456e-05, "loss": 2.0398, "step": 14834500 }, { "epoch": 42.94, "learning_rate": 2.853710430946647e-05, "loss": 2.0466, "step": 14835000 }, { "epoch": 42.94, "learning_rate": 2.8536380661819197e-05, "loss": 2.0646, "step": 14835500 }, { "epoch": 42.94, "learning_rate": 2.853565701417192e-05, "loss": 2.0643, "step": 14836000 }, { "epoch": 42.95, "learning_rate": 2.853493336652464e-05, "loss": 2.034, "step": 14836500 }, { "epoch": 42.95, "learning_rate": 2.8534209718877364e-05, "loss": 2.0379, "step": 14837000 }, { "epoch": 42.95, "learning_rate": 2.8533486071230086e-05, "loss": 2.0302, "step": 14837500 }, { "epoch": 42.95, "learning_rate": 2.8532762423582808e-05, "loss": 2.0351, "step": 14838000 }, { "epoch": 42.95, "learning_rate": 2.8532038775935534e-05, "loss": 2.0524, "step": 14838500 }, { "epoch": 42.95, "learning_rate": 2.853131657558355e-05, "loss": 2.065, "step": 14839000 }, { "epoch": 42.95, "learning_rate": 2.853059292793627e-05, "loss": 2.0477, "step": 14839500 }, { "epoch": 42.96, "learning_rate": 2.852987072758429e-05, "loss": 2.0367, "step": 14840000 }, { "epoch": 42.96, "learning_rate": 2.852914852723231e-05, "loss": 2.0352, "step": 14840500 }, { "epoch": 42.96, "learning_rate": 2.8528424879585032e-05, "loss": 2.0255, "step": 14841000 }, { "epoch": 42.96, "learning_rate": 2.8527701231937754e-05, "loss": 2.022, "step": 14841500 }, { "epoch": 42.96, "learning_rate": 2.8526977584290483e-05, "loss": 2.0591, "step": 14842000 }, { "epoch": 42.96, "learning_rate": 2.8526253936643205e-05, "loss": 2.023, "step": 14842500 }, { "epoch": 42.96, "learning_rate": 2.8525530288995927e-05, "loss": 2.0592, "step": 14843000 }, { "epoch": 42.97, "learning_rate": 2.852480664134865e-05, "loss": 2.0111, "step": 14843500 }, { "epoch": 42.97, "learning_rate": 2.8524082993701372e-05, "loss": 2.0723, "step": 14844000 }, { "epoch": 42.97, "learning_rate": 2.8523359346054098e-05, "loss": 2.0519, "step": 14844500 }, { "epoch": 42.97, "learning_rate": 2.852263569840682e-05, "loss": 2.0577, "step": 14845000 }, { "epoch": 42.97, "learning_rate": 2.8521913498054835e-05, "loss": 2.0255, "step": 14845500 }, { "epoch": 42.97, "learning_rate": 2.8521189850407558e-05, "loss": 2.0372, "step": 14846000 }, { "epoch": 42.97, "learning_rate": 2.8520466202760283e-05, "loss": 2.0559, "step": 14846500 }, { "epoch": 42.98, "learning_rate": 2.8519742555113005e-05, "loss": 2.0478, "step": 14847000 }, { "epoch": 42.98, "learning_rate": 2.851902035476102e-05, "loss": 2.0353, "step": 14847500 }, { "epoch": 42.98, "learning_rate": 2.8518296707113747e-05, "loss": 2.0451, "step": 14848000 }, { "epoch": 42.98, "learning_rate": 2.851757305946647e-05, "loss": 2.0482, "step": 14848500 }, { "epoch": 42.98, "learning_rate": 2.8516850859114484e-05, "loss": 2.0535, "step": 14849000 }, { "epoch": 42.98, "learning_rate": 2.851612865876251e-05, "loss": 2.0497, "step": 14849500 }, { "epoch": 42.98, "learning_rate": 2.8515405011115232e-05, "loss": 2.0619, "step": 14850000 }, { "epoch": 42.99, "learning_rate": 2.8514681363467955e-05, "loss": 2.0135, "step": 14850500 }, { "epoch": 42.99, "learning_rate": 2.8513957715820677e-05, "loss": 2.018, "step": 14851000 }, { "epoch": 42.99, "learning_rate": 2.85132340681734e-05, "loss": 2.0373, "step": 14851500 }, { "epoch": 42.99, "learning_rate": 2.8512511867821418e-05, "loss": 2.0609, "step": 14852000 }, { "epoch": 42.99, "learning_rate": 2.851178822017414e-05, "loss": 2.0517, "step": 14852500 }, { "epoch": 42.99, "learning_rate": 2.8511064572526862e-05, "loss": 2.026, "step": 14853000 }, { "epoch": 42.99, "learning_rate": 2.8510340924879585e-05, "loss": 2.0487, "step": 14853500 }, { "epoch": 43.0, "learning_rate": 2.850961727723231e-05, "loss": 2.023, "step": 14854000 }, { "epoch": 43.0, "learning_rate": 2.8508893629585033e-05, "loss": 2.0351, "step": 14854500 }, { "epoch": 43.0, "learning_rate": 2.8508169981937755e-05, "loss": 2.0147, "step": 14855000 }, { "epoch": 43.0, "eval_accuracy": 0.6710644659307677, "eval_accuracy_mlm": 0.6362523003108089, "eval_accuracy_nsp": 0.8578139537015395, "eval_loss": 2.1682963371276855, "eval_runtime": 331.1057, "eval_samples_per_second": 1317.966, "eval_steps_per_second": 54.916, "step": 14855296 }, { "epoch": 43.0, "learning_rate": 2.8507446334290477e-05, "loss": 2.0307, "step": 14855500 }, { "epoch": 43.0, "learning_rate": 2.85067226866432e-05, "loss": 2.0209, "step": 14856000 }, { "epoch": 43.0, "learning_rate": 2.850599903899592e-05, "loss": 1.9957, "step": 14856500 }, { "epoch": 43.0, "learning_rate": 2.850527539134865e-05, "loss": 2.016, "step": 14857000 }, { "epoch": 43.01, "learning_rate": 2.8504551743701373e-05, "loss": 2.006, "step": 14857500 }, { "epoch": 43.01, "learning_rate": 2.850382954334939e-05, "loss": 2.0374, "step": 14858000 }, { "epoch": 43.01, "learning_rate": 2.8503105895702114e-05, "loss": 2.0148, "step": 14858500 }, { "epoch": 43.01, "learning_rate": 2.8502382248054836e-05, "loss": 2.0184, "step": 14859000 }, { "epoch": 43.01, "learning_rate": 2.8501658600407562e-05, "loss": 2.0393, "step": 14859500 }, { "epoch": 43.01, "learning_rate": 2.8500934952760284e-05, "loss": 2.0317, "step": 14860000 }, { "epoch": 43.02, "learning_rate": 2.8500211305113006e-05, "loss": 2.0428, "step": 14860500 }, { "epoch": 43.02, "learning_rate": 2.849948765746573e-05, "loss": 2.0015, "step": 14861000 }, { "epoch": 43.02, "learning_rate": 2.849876400981845e-05, "loss": 2.0086, "step": 14861500 }, { "epoch": 43.02, "learning_rate": 2.8498040362171173e-05, "loss": 2.0092, "step": 14862000 }, { "epoch": 43.02, "learning_rate": 2.8497318161819192e-05, "loss": 2.011, "step": 14862500 }, { "epoch": 43.02, "learning_rate": 2.8496594514171914e-05, "loss": 2.0157, "step": 14863000 }, { "epoch": 43.02, "learning_rate": 2.8495870866524636e-05, "loss": 2.0127, "step": 14863500 }, { "epoch": 43.03, "learning_rate": 2.8495148666172655e-05, "loss": 2.0225, "step": 14864000 }, { "epoch": 43.03, "learning_rate": 2.8494425018525384e-05, "loss": 2.0169, "step": 14864500 }, { "epoch": 43.03, "learning_rate": 2.8493701370878106e-05, "loss": 2.0258, "step": 14865000 }, { "epoch": 43.03, "learning_rate": 2.849297772323083e-05, "loss": 2.0225, "step": 14865500 }, { "epoch": 43.03, "learning_rate": 2.849225407558355e-05, "loss": 2.0173, "step": 14866000 }, { "epoch": 43.03, "learning_rate": 2.8491530427936277e-05, "loss": 2.0302, "step": 14866500 }, { "epoch": 43.03, "learning_rate": 2.8490806780289e-05, "loss": 2.0347, "step": 14867000 }, { "epoch": 43.04, "learning_rate": 2.849008313264172e-05, "loss": 2.0324, "step": 14867500 }, { "epoch": 43.04, "learning_rate": 2.8489360932289737e-05, "loss": 1.9943, "step": 14868000 }, { "epoch": 43.04, "learning_rate": 2.8488637284642462e-05, "loss": 2.0056, "step": 14868500 }, { "epoch": 43.04, "learning_rate": 2.8487913636995184e-05, "loss": 2.0512, "step": 14869000 }, { "epoch": 43.04, "learning_rate": 2.8487189989347907e-05, "loss": 2.02, "step": 14869500 }, { "epoch": 43.04, "learning_rate": 2.848646634170063e-05, "loss": 2.0295, "step": 14870000 }, { "epoch": 43.04, "learning_rate": 2.848574269405335e-05, "loss": 2.012, "step": 14870500 }, { "epoch": 43.05, "learning_rate": 2.848502049370137e-05, "loss": 2.0118, "step": 14871000 }, { "epoch": 43.05, "learning_rate": 2.848429829334939e-05, "loss": 2.0138, "step": 14871500 }, { "epoch": 43.05, "learning_rate": 2.8483574645702115e-05, "loss": 2.0165, "step": 14872000 }, { "epoch": 43.05, "learning_rate": 2.8482850998054837e-05, "loss": 2.0458, "step": 14872500 }, { "epoch": 43.05, "learning_rate": 2.8482127350407563e-05, "loss": 2.0071, "step": 14873000 }, { "epoch": 43.05, "learning_rate": 2.8481403702760285e-05, "loss": 2.0164, "step": 14873500 }, { "epoch": 43.05, "learning_rate": 2.8480680055113007e-05, "loss": 2.0145, "step": 14874000 }, { "epoch": 43.06, "learning_rate": 2.847995640746573e-05, "loss": 2.0147, "step": 14874500 }, { "epoch": 43.06, "learning_rate": 2.8479234207113748e-05, "loss": 2.0382, "step": 14875000 }, { "epoch": 43.06, "learning_rate": 2.847851055946647e-05, "loss": 2.0137, "step": 14875500 }, { "epoch": 43.06, "learning_rate": 2.8477786911819193e-05, "loss": 2.0113, "step": 14876000 }, { "epoch": 43.06, "learning_rate": 2.8477063264171915e-05, "loss": 2.014, "step": 14876500 }, { "epoch": 43.06, "learning_rate": 2.8476339616524637e-05, "loss": 2.0193, "step": 14877000 }, { "epoch": 43.06, "learning_rate": 2.8475617416172656e-05, "loss": 2.034, "step": 14877500 }, { "epoch": 43.07, "learning_rate": 2.8474893768525378e-05, "loss": 2.0088, "step": 14878000 }, { "epoch": 43.07, "learning_rate": 2.84741701208781e-05, "loss": 2.0212, "step": 14878500 }, { "epoch": 43.07, "learning_rate": 2.847344647323083e-05, "loss": 2.0289, "step": 14879000 }, { "epoch": 43.07, "learning_rate": 2.847272427287885e-05, "loss": 2.0146, "step": 14879500 }, { "epoch": 43.07, "learning_rate": 2.8472002072526864e-05, "loss": 2.0272, "step": 14880000 }, { "epoch": 43.07, "learning_rate": 2.847127842487959e-05, "loss": 2.0264, "step": 14880500 }, { "epoch": 43.07, "learning_rate": 2.8470554777232312e-05, "loss": 2.0123, "step": 14881000 }, { "epoch": 43.08, "learning_rate": 2.8469831129585034e-05, "loss": 2.0062, "step": 14881500 }, { "epoch": 43.08, "learning_rate": 2.8469107481937756e-05, "loss": 2.013, "step": 14882000 }, { "epoch": 43.08, "learning_rate": 2.8468385281585775e-05, "loss": 2.0219, "step": 14882500 }, { "epoch": 43.08, "learning_rate": 2.8467661633938498e-05, "loss": 2.0216, "step": 14883000 }, { "epoch": 43.08, "learning_rate": 2.846693798629122e-05, "loss": 2.0076, "step": 14883500 }, { "epoch": 43.08, "learning_rate": 2.8466214338643942e-05, "loss": 1.9989, "step": 14884000 }, { "epoch": 43.08, "learning_rate": 2.846549213829196e-05, "loss": 2.0148, "step": 14884500 }, { "epoch": 43.09, "learning_rate": 2.8464771385235273e-05, "loss": 2.0065, "step": 14885000 }, { "epoch": 43.09, "learning_rate": 2.8464047737587995e-05, "loss": 2.0281, "step": 14885500 }, { "epoch": 43.09, "learning_rate": 2.8463324089940718e-05, "loss": 2.0402, "step": 14886000 }, { "epoch": 43.09, "learning_rate": 2.846260044229344e-05, "loss": 2.0392, "step": 14886500 }, { "epoch": 43.09, "learning_rate": 2.8461876794646166e-05, "loss": 2.037, "step": 14887000 }, { "epoch": 43.09, "learning_rate": 2.846115314699889e-05, "loss": 2.0052, "step": 14887500 }, { "epoch": 43.09, "learning_rate": 2.8460429499351617e-05, "loss": 2.0263, "step": 14888000 }, { "epoch": 43.1, "learning_rate": 2.845970585170434e-05, "loss": 2.0344, "step": 14888500 }, { "epoch": 43.1, "learning_rate": 2.845898220405706e-05, "loss": 2.0221, "step": 14889000 }, { "epoch": 43.1, "learning_rate": 2.8458258556409783e-05, "loss": 2.0314, "step": 14889500 }, { "epoch": 43.1, "learning_rate": 2.8457534908762506e-05, "loss": 2.0191, "step": 14890000 }, { "epoch": 43.1, "learning_rate": 2.8456811261115228e-05, "loss": 2.0214, "step": 14890500 }, { "epoch": 43.1, "learning_rate": 2.8456087613467954e-05, "loss": 2.0253, "step": 14891000 }, { "epoch": 43.1, "learning_rate": 2.8455363965820676e-05, "loss": 2.0355, "step": 14891500 }, { "epoch": 43.11, "learning_rate": 2.8454640318173398e-05, "loss": 2.0139, "step": 14892000 }, { "epoch": 43.11, "learning_rate": 2.8453918117821417e-05, "loss": 2.0323, "step": 14892500 }, { "epoch": 43.11, "learning_rate": 2.845319447017414e-05, "loss": 2.0263, "step": 14893000 }, { "epoch": 43.11, "learning_rate": 2.845247082252686e-05, "loss": 2.0279, "step": 14893500 }, { "epoch": 43.11, "learning_rate": 2.8451747174879584e-05, "loss": 2.015, "step": 14894000 }, { "epoch": 43.11, "learning_rate": 2.8451023527232313e-05, "loss": 2.0321, "step": 14894500 }, { "epoch": 43.11, "learning_rate": 2.8450299879585035e-05, "loss": 2.0075, "step": 14895000 }, { "epoch": 43.12, "learning_rate": 2.8449576231937757e-05, "loss": 2.0199, "step": 14895500 }, { "epoch": 43.12, "learning_rate": 2.844885258429048e-05, "loss": 2.0372, "step": 14896000 }, { "epoch": 43.12, "learning_rate": 2.8448128936643205e-05, "loss": 1.9983, "step": 14896500 }, { "epoch": 43.12, "learning_rate": 2.8447405288995927e-05, "loss": 2.0201, "step": 14897000 }, { "epoch": 43.12, "learning_rate": 2.844668164134865e-05, "loss": 2.0251, "step": 14897500 }, { "epoch": 43.12, "learning_rate": 2.844595799370137e-05, "loss": 2.0249, "step": 14898000 }, { "epoch": 43.13, "learning_rate": 2.8445234346054094e-05, "loss": 2.0146, "step": 14898500 }, { "epoch": 43.13, "learning_rate": 2.8444513592997406e-05, "loss": 2.0266, "step": 14899000 }, { "epoch": 43.13, "learning_rate": 2.844378994535013e-05, "loss": 2.043, "step": 14899500 }, { "epoch": 43.13, "learning_rate": 2.8443067744998147e-05, "loss": 2.0273, "step": 14900000 }, { "epoch": 43.13, "learning_rate": 2.844234409735087e-05, "loss": 2.0508, "step": 14900500 }, { "epoch": 43.13, "learning_rate": 2.8441620449703592e-05, "loss": 2.0411, "step": 14901000 }, { "epoch": 43.13, "learning_rate": 2.8440896802056317e-05, "loss": 2.0321, "step": 14901500 }, { "epoch": 43.14, "learning_rate": 2.8440173154409043e-05, "loss": 2.0107, "step": 14902000 }, { "epoch": 43.14, "learning_rate": 2.843944950676177e-05, "loss": 2.0338, "step": 14902500 }, { "epoch": 43.14, "learning_rate": 2.8438727306409784e-05, "loss": 2.0342, "step": 14903000 }, { "epoch": 43.14, "learning_rate": 2.8438003658762506e-05, "loss": 1.9996, "step": 14903500 }, { "epoch": 43.14, "learning_rate": 2.8437280011115232e-05, "loss": 2.0191, "step": 14904000 }, { "epoch": 43.14, "learning_rate": 2.8436556363467954e-05, "loss": 2.0065, "step": 14904500 }, { "epoch": 43.14, "learning_rate": 2.843583416311597e-05, "loss": 2.0259, "step": 14905000 }, { "epoch": 43.15, "learning_rate": 2.843511196276399e-05, "loss": 2.039, "step": 14905500 }, { "epoch": 43.15, "learning_rate": 2.843438831511671e-05, "loss": 2.0137, "step": 14906000 }, { "epoch": 43.15, "learning_rate": 2.843366611476473e-05, "loss": 2.0159, "step": 14906500 }, { "epoch": 43.15, "learning_rate": 2.8432942467117452e-05, "loss": 2.0372, "step": 14907000 }, { "epoch": 43.15, "learning_rate": 2.8432218819470174e-05, "loss": 2.0331, "step": 14907500 }, { "epoch": 43.15, "learning_rate": 2.8431495171822897e-05, "loss": 2.0214, "step": 14908000 }, { "epoch": 43.15, "learning_rate": 2.843077152417562e-05, "loss": 2.0344, "step": 14908500 }, { "epoch": 43.16, "learning_rate": 2.8430047876528345e-05, "loss": 2.0102, "step": 14909000 }, { "epoch": 43.16, "learning_rate": 2.8429324228881067e-05, "loss": 2.0324, "step": 14909500 }, { "epoch": 43.16, "learning_rate": 2.8428600581233792e-05, "loss": 2.0216, "step": 14910000 }, { "epoch": 43.16, "learning_rate": 2.8427876933586518e-05, "loss": 2.0348, "step": 14910500 }, { "epoch": 43.16, "learning_rate": 2.842715328593924e-05, "loss": 2.0115, "step": 14911000 }, { "epoch": 43.16, "learning_rate": 2.8426429638291963e-05, "loss": 2.0229, "step": 14911500 }, { "epoch": 43.16, "learning_rate": 2.8425705990644685e-05, "loss": 2.018, "step": 14912000 }, { "epoch": 43.17, "learning_rate": 2.8424982342997407e-05, "loss": 2.0154, "step": 14912500 }, { "epoch": 43.17, "learning_rate": 2.8424258695350133e-05, "loss": 2.0404, "step": 14913000 }, { "epoch": 43.17, "learning_rate": 2.8423535047702855e-05, "loss": 2.0305, "step": 14913500 }, { "epoch": 43.17, "learning_rate": 2.842281284735087e-05, "loss": 2.027, "step": 14914000 }, { "epoch": 43.17, "learning_rate": 2.8422089199703593e-05, "loss": 2.0372, "step": 14914500 }, { "epoch": 43.17, "learning_rate": 2.842136699935161e-05, "loss": 2.0119, "step": 14915000 }, { "epoch": 43.17, "learning_rate": 2.8420643351704334e-05, "loss": 2.0044, "step": 14915500 }, { "epoch": 43.18, "learning_rate": 2.8419919704057056e-05, "loss": 2.003, "step": 14916000 }, { "epoch": 43.18, "learning_rate": 2.841919605640978e-05, "loss": 2.0246, "step": 14916500 }, { "epoch": 43.18, "learning_rate": 2.8418472408762507e-05, "loss": 2.0254, "step": 14917000 }, { "epoch": 43.18, "learning_rate": 2.8417748761115233e-05, "loss": 2.0299, "step": 14917500 }, { "epoch": 43.18, "learning_rate": 2.8417025113467955e-05, "loss": 2.0323, "step": 14918000 }, { "epoch": 43.18, "learning_rate": 2.841630291311597e-05, "loss": 2.0376, "step": 14918500 }, { "epoch": 43.18, "learning_rate": 2.8415579265468696e-05, "loss": 2.0335, "step": 14919000 }, { "epoch": 43.19, "learning_rate": 2.841485561782142e-05, "loss": 2.0162, "step": 14919500 }, { "epoch": 43.19, "learning_rate": 2.841413197017414e-05, "loss": 2.0419, "step": 14920000 }, { "epoch": 43.19, "learning_rate": 2.8413408322526863e-05, "loss": 2.0342, "step": 14920500 }, { "epoch": 43.19, "learning_rate": 2.8412684674879585e-05, "loss": 2.027, "step": 14921000 }, { "epoch": 43.19, "learning_rate": 2.8411961027232307e-05, "loss": 2.0122, "step": 14921500 }, { "epoch": 43.19, "learning_rate": 2.8411237379585033e-05, "loss": 2.0283, "step": 14922000 }, { "epoch": 43.19, "learning_rate": 2.8410513731937755e-05, "loss": 2.0356, "step": 14922500 }, { "epoch": 43.2, "learning_rate": 2.8409790084290478e-05, "loss": 2.0297, "step": 14923000 }, { "epoch": 43.2, "learning_rate": 2.8409067883938497e-05, "loss": 2.043, "step": 14923500 }, { "epoch": 43.2, "learning_rate": 2.840834423629122e-05, "loss": 2.0283, "step": 14924000 }, { "epoch": 43.2, "learning_rate": 2.8407620588643948e-05, "loss": 2.0028, "step": 14924500 }, { "epoch": 43.2, "learning_rate": 2.840689694099667e-05, "loss": 2.0442, "step": 14925000 }, { "epoch": 43.2, "learning_rate": 2.8406173293349392e-05, "loss": 2.0567, "step": 14925500 }, { "epoch": 43.2, "learning_rate": 2.8405451092997408e-05, "loss": 2.0207, "step": 14926000 }, { "epoch": 43.21, "learning_rate": 2.8404727445350133e-05, "loss": 2.0199, "step": 14926500 }, { "epoch": 43.21, "learning_rate": 2.8404006692293446e-05, "loss": 2.0339, "step": 14927000 }, { "epoch": 43.21, "learning_rate": 2.8403283044646168e-05, "loss": 2.0325, "step": 14927500 }, { "epoch": 43.21, "learning_rate": 2.840255939699889e-05, "loss": 2.0167, "step": 14928000 }, { "epoch": 43.21, "learning_rate": 2.8401835749351612e-05, "loss": 2.0508, "step": 14928500 }, { "epoch": 43.21, "learning_rate": 2.8401112101704335e-05, "loss": 2.0213, "step": 14929000 }, { "epoch": 43.21, "learning_rate": 2.840038845405706e-05, "loss": 2.0097, "step": 14929500 }, { "epoch": 43.22, "learning_rate": 2.8399664806409782e-05, "loss": 2.0307, "step": 14930000 }, { "epoch": 43.22, "learning_rate": 2.8398941158762505e-05, "loss": 2.0142, "step": 14930500 }, { "epoch": 43.22, "learning_rate": 2.8398217511115227e-05, "loss": 2.0263, "step": 14931000 }, { "epoch": 43.22, "learning_rate": 2.839749386346795e-05, "loss": 2.0288, "step": 14931500 }, { "epoch": 43.22, "learning_rate": 2.8396770215820678e-05, "loss": 2.028, "step": 14932000 }, { "epoch": 43.22, "learning_rate": 2.83960465681734e-05, "loss": 2.0179, "step": 14932500 }, { "epoch": 43.22, "learning_rate": 2.8395322920526123e-05, "loss": 2.0553, "step": 14933000 }, { "epoch": 43.23, "learning_rate": 2.8394599272878848e-05, "loss": 2.0168, "step": 14933500 }, { "epoch": 43.23, "learning_rate": 2.8393877072526864e-05, "loss": 2.0229, "step": 14934000 }, { "epoch": 43.23, "learning_rate": 2.8393153424879586e-05, "loss": 2.013, "step": 14934500 }, { "epoch": 43.23, "learning_rate": 2.839242977723231e-05, "loss": 2.015, "step": 14935000 }, { "epoch": 43.23, "learning_rate": 2.8391706129585034e-05, "loss": 2.0255, "step": 14935500 }, { "epoch": 43.23, "learning_rate": 2.839098392923305e-05, "loss": 2.0342, "step": 14936000 }, { "epoch": 43.24, "learning_rate": 2.839026028158577e-05, "loss": 2.03, "step": 14936500 }, { "epoch": 43.24, "learning_rate": 2.8389536633938497e-05, "loss": 1.9976, "step": 14937000 }, { "epoch": 43.24, "learning_rate": 2.838881298629122e-05, "loss": 2.0311, "step": 14937500 }, { "epoch": 43.24, "learning_rate": 2.8388089338643942e-05, "loss": 2.0553, "step": 14938000 }, { "epoch": 43.24, "learning_rate": 2.8387365690996664e-05, "loss": 2.0093, "step": 14938500 }, { "epoch": 43.24, "learning_rate": 2.8386642043349386e-05, "loss": 2.0278, "step": 14939000 }, { "epoch": 43.24, "learning_rate": 2.8385919842997412e-05, "loss": 2.0185, "step": 14939500 }, { "epoch": 43.25, "learning_rate": 2.8385196195350134e-05, "loss": 2.0079, "step": 14940000 }, { "epoch": 43.25, "learning_rate": 2.8384472547702856e-05, "loss": 2.0378, "step": 14940500 }, { "epoch": 43.25, "learning_rate": 2.8383750347350875e-05, "loss": 2.0261, "step": 14941000 }, { "epoch": 43.25, "learning_rate": 2.8383026699703598e-05, "loss": 2.044, "step": 14941500 }, { "epoch": 43.25, "learning_rate": 2.838230305205632e-05, "loss": 2.0383, "step": 14942000 }, { "epoch": 43.25, "learning_rate": 2.8381579404409042e-05, "loss": 2.0318, "step": 14942500 }, { "epoch": 43.25, "learning_rate": 2.8380855756761764e-05, "loss": 2.0335, "step": 14943000 }, { "epoch": 43.26, "learning_rate": 2.8380132109114487e-05, "loss": 2.0266, "step": 14943500 }, { "epoch": 43.26, "learning_rate": 2.8379408461467212e-05, "loss": 2.04, "step": 14944000 }, { "epoch": 43.26, "learning_rate": 2.8378684813819934e-05, "loss": 2.0296, "step": 14944500 }, { "epoch": 43.26, "learning_rate": 2.8377961166172657e-05, "loss": 2.0401, "step": 14945000 }, { "epoch": 43.26, "learning_rate": 2.837723751852538e-05, "loss": 2.0266, "step": 14945500 }, { "epoch": 43.26, "learning_rate": 2.83765138708781e-05, "loss": 2.0192, "step": 14946000 }, { "epoch": 43.26, "learning_rate": 2.837579022323083e-05, "loss": 2.0373, "step": 14946500 }, { "epoch": 43.27, "learning_rate": 2.8375066575583552e-05, "loss": 2.0167, "step": 14947000 }, { "epoch": 43.27, "learning_rate": 2.8374342927936275e-05, "loss": 1.9926, "step": 14947500 }, { "epoch": 43.27, "learning_rate": 2.8373619280289e-05, "loss": 2.0195, "step": 14948000 }, { "epoch": 43.27, "learning_rate": 2.8372895632641722e-05, "loss": 2.0332, "step": 14948500 }, { "epoch": 43.27, "learning_rate": 2.8372171984994445e-05, "loss": 2.013, "step": 14949000 }, { "epoch": 43.27, "learning_rate": 2.8371449784642464e-05, "loss": 2.0185, "step": 14949500 }, { "epoch": 43.27, "learning_rate": 2.837072758429048e-05, "loss": 2.0307, "step": 14950000 }, { "epoch": 43.28, "learning_rate": 2.83700039366432e-05, "loss": 2.0297, "step": 14950500 }, { "epoch": 43.28, "learning_rate": 2.8369280288995924e-05, "loss": 2.0336, "step": 14951000 }, { "epoch": 43.28, "learning_rate": 2.836855664134865e-05, "loss": 2.03, "step": 14951500 }, { "epoch": 43.28, "learning_rate": 2.836783299370137e-05, "loss": 2.0196, "step": 14952000 }, { "epoch": 43.28, "learning_rate": 2.8367110793349387e-05, "loss": 2.0138, "step": 14952500 }, { "epoch": 43.28, "learning_rate": 2.8366387145702113e-05, "loss": 2.015, "step": 14953000 }, { "epoch": 43.28, "learning_rate": 2.8365663498054835e-05, "loss": 2.0273, "step": 14953500 }, { "epoch": 43.29, "learning_rate": 2.8364939850407557e-05, "loss": 2.0188, "step": 14954000 }, { "epoch": 43.29, "learning_rate": 2.8364216202760286e-05, "loss": 2.0304, "step": 14954500 }, { "epoch": 43.29, "learning_rate": 2.83634940024083e-05, "loss": 2.0231, "step": 14955000 }, { "epoch": 43.29, "learning_rate": 2.8362770354761027e-05, "loss": 2.0216, "step": 14955500 }, { "epoch": 43.29, "learning_rate": 2.8362048154409043e-05, "loss": 2.0151, "step": 14956000 }, { "epoch": 43.29, "learning_rate": 2.8361324506761765e-05, "loss": 2.0515, "step": 14956500 }, { "epoch": 43.29, "learning_rate": 2.8360600859114487e-05, "loss": 2.0114, "step": 14957000 }, { "epoch": 43.3, "learning_rate": 2.8359877211467213e-05, "loss": 2.0415, "step": 14957500 }, { "epoch": 43.3, "learning_rate": 2.8359153563819935e-05, "loss": 2.0304, "step": 14958000 }, { "epoch": 43.3, "learning_rate": 2.8358429916172657e-05, "loss": 2.0338, "step": 14958500 }, { "epoch": 43.3, "learning_rate": 2.8357707715820676e-05, "loss": 2.0177, "step": 14959000 }, { "epoch": 43.3, "learning_rate": 2.83569840681734e-05, "loss": 2.0489, "step": 14959500 }, { "epoch": 43.3, "learning_rate": 2.835626042052612e-05, "loss": 2.029, "step": 14960000 }, { "epoch": 43.3, "learning_rate": 2.8355536772878843e-05, "loss": 2.0315, "step": 14960500 }, { "epoch": 43.31, "learning_rate": 2.8354813125231565e-05, "loss": 2.0452, "step": 14961000 }, { "epoch": 43.31, "learning_rate": 2.8354089477584288e-05, "loss": 2.0276, "step": 14961500 }, { "epoch": 43.31, "learning_rate": 2.8353365829937017e-05, "loss": 2.0429, "step": 14962000 }, { "epoch": 43.31, "learning_rate": 2.835264218228974e-05, "loss": 2.0471, "step": 14962500 }, { "epoch": 43.31, "learning_rate": 2.8351918534642464e-05, "loss": 2.0265, "step": 14963000 }, { "epoch": 43.31, "learning_rate": 2.8351194886995187e-05, "loss": 2.043, "step": 14963500 }, { "epoch": 43.31, "learning_rate": 2.8350472686643202e-05, "loss": 2.0203, "step": 14964000 }, { "epoch": 43.32, "learning_rate": 2.8349749038995928e-05, "loss": 2.0141, "step": 14964500 }, { "epoch": 43.32, "learning_rate": 2.8349026838643943e-05, "loss": 2.0428, "step": 14965000 }, { "epoch": 43.32, "learning_rate": 2.8348303190996666e-05, "loss": 2.0516, "step": 14965500 }, { "epoch": 43.32, "learning_rate": 2.834757954334939e-05, "loss": 1.998, "step": 14966000 }, { "epoch": 43.32, "learning_rate": 2.8346855895702113e-05, "loss": 2.0207, "step": 14966500 }, { "epoch": 43.32, "learning_rate": 2.8346132248054836e-05, "loss": 2.026, "step": 14967000 }, { "epoch": 43.32, "learning_rate": 2.8345408600407558e-05, "loss": 2.0376, "step": 14967500 }, { "epoch": 43.33, "learning_rate": 2.834468495276028e-05, "loss": 2.0188, "step": 14968000 }, { "epoch": 43.33, "learning_rate": 2.8343961305113002e-05, "loss": 2.0453, "step": 14968500 }, { "epoch": 43.33, "learning_rate": 2.834323765746573e-05, "loss": 2.0241, "step": 14969000 }, { "epoch": 43.33, "learning_rate": 2.8342516904409044e-05, "loss": 2.0369, "step": 14969500 }, { "epoch": 43.33, "learning_rate": 2.8341793256761766e-05, "loss": 2.0389, "step": 14970000 }, { "epoch": 43.33, "learning_rate": 2.834106960911449e-05, "loss": 2.0179, "step": 14970500 }, { "epoch": 43.33, "learning_rate": 2.8340345961467214e-05, "loss": 2.0545, "step": 14971000 }, { "epoch": 43.34, "learning_rate": 2.8339622313819936e-05, "loss": 2.0432, "step": 14971500 }, { "epoch": 43.34, "learning_rate": 2.8338898666172658e-05, "loss": 2.0278, "step": 14972000 }, { "epoch": 43.34, "learning_rate": 2.833817501852538e-05, "loss": 2.0335, "step": 14972500 }, { "epoch": 43.34, "learning_rate": 2.8337451370878103e-05, "loss": 2.0357, "step": 14973000 }, { "epoch": 43.34, "learning_rate": 2.833672917052612e-05, "loss": 2.0292, "step": 14973500 }, { "epoch": 43.34, "learning_rate": 2.833600697017414e-05, "loss": 2.0166, "step": 14974000 }, { "epoch": 43.35, "learning_rate": 2.8335283322526863e-05, "loss": 2.019, "step": 14974500 }, { "epoch": 43.35, "learning_rate": 2.833456112217488e-05, "loss": 2.0353, "step": 14975000 }, { "epoch": 43.35, "learning_rate": 2.8333837474527604e-05, "loss": 2.0309, "step": 14975500 }, { "epoch": 43.35, "learning_rate": 2.8333113826880326e-05, "loss": 2.0358, "step": 14976000 }, { "epoch": 43.35, "learning_rate": 2.833239017923305e-05, "loss": 2.0329, "step": 14976500 }, { "epoch": 43.35, "learning_rate": 2.8331666531585777e-05, "loss": 2.0374, "step": 14977000 }, { "epoch": 43.35, "learning_rate": 2.83309428839385e-05, "loss": 2.0121, "step": 14977500 }, { "epoch": 43.36, "learning_rate": 2.8330219236291222e-05, "loss": 2.0294, "step": 14978000 }, { "epoch": 43.36, "learning_rate": 2.8329495588643944e-05, "loss": 2.0138, "step": 14978500 }, { "epoch": 43.36, "learning_rate": 2.8328771940996666e-05, "loss": 2.0253, "step": 14979000 }, { "epoch": 43.36, "learning_rate": 2.8328048293349392e-05, "loss": 2.0091, "step": 14979500 }, { "epoch": 43.36, "learning_rate": 2.8327324645702114e-05, "loss": 2.0151, "step": 14980000 }, { "epoch": 43.36, "learning_rate": 2.832660244535013e-05, "loss": 2.0451, "step": 14980500 }, { "epoch": 43.36, "learning_rate": 2.8325878797702855e-05, "loss": 2.0238, "step": 14981000 }, { "epoch": 43.37, "learning_rate": 2.8325155150055578e-05, "loss": 2.0587, "step": 14981500 }, { "epoch": 43.37, "learning_rate": 2.83244315024083e-05, "loss": 2.0362, "step": 14982000 }, { "epoch": 43.37, "learning_rate": 2.8323707854761022e-05, "loss": 2.0388, "step": 14982500 }, { "epoch": 43.37, "learning_rate": 2.8322984207113744e-05, "loss": 2.029, "step": 14983000 }, { "epoch": 43.37, "learning_rate": 2.8322260559466467e-05, "loss": 2.0281, "step": 14983500 }, { "epoch": 43.37, "learning_rate": 2.8321536911819192e-05, "loss": 2.0476, "step": 14984000 }, { "epoch": 43.37, "learning_rate": 2.8320814711467215e-05, "loss": 2.0211, "step": 14984500 }, { "epoch": 43.38, "learning_rate": 2.8320091063819937e-05, "loss": 2.0336, "step": 14985000 }, { "epoch": 43.38, "learning_rate": 2.831936741617266e-05, "loss": 1.9926, "step": 14985500 }, { "epoch": 43.38, "learning_rate": 2.831864376852538e-05, "loss": 2.0468, "step": 14986000 }, { "epoch": 43.38, "learning_rate": 2.8317920120878107e-05, "loss": 2.0081, "step": 14986500 }, { "epoch": 43.38, "learning_rate": 2.831719647323083e-05, "loss": 2.0501, "step": 14987000 }, { "epoch": 43.38, "learning_rate": 2.831647282558355e-05, "loss": 2.0215, "step": 14987500 }, { "epoch": 43.38, "learning_rate": 2.8315749177936274e-05, "loss": 2.0425, "step": 14988000 }, { "epoch": 43.39, "learning_rate": 2.8315025530288996e-05, "loss": 2.0181, "step": 14988500 }, { "epoch": 43.39, "learning_rate": 2.8314301882641718e-05, "loss": 2.0245, "step": 14989000 }, { "epoch": 43.39, "learning_rate": 2.8313578234994444e-05, "loss": 2.0511, "step": 14989500 }, { "epoch": 43.39, "learning_rate": 2.831285603464246e-05, "loss": 2.0325, "step": 14990000 }, { "epoch": 43.39, "learning_rate": 2.831213238699518e-05, "loss": 2.0364, "step": 14990500 }, { "epoch": 43.39, "learning_rate": 2.8311408739347907e-05, "loss": 2.013, "step": 14991000 }, { "epoch": 43.39, "learning_rate": 2.8310685091700633e-05, "loss": 2.0237, "step": 14991500 }, { "epoch": 43.4, "learning_rate": 2.830996289134865e-05, "loss": 2.0257, "step": 14992000 }, { "epoch": 43.4, "learning_rate": 2.8309239243701374e-05, "loss": 2.0229, "step": 14992500 }, { "epoch": 43.4, "learning_rate": 2.8308515596054096e-05, "loss": 2.0335, "step": 14993000 }, { "epoch": 43.4, "learning_rate": 2.830779194840682e-05, "loss": 2.0097, "step": 14993500 }, { "epoch": 43.4, "learning_rate": 2.8307068300759544e-05, "loss": 2.0122, "step": 14994000 }, { "epoch": 43.4, "learning_rate": 2.8306344653112266e-05, "loss": 2.0308, "step": 14994500 }, { "epoch": 43.4, "learning_rate": 2.830562100546499e-05, "loss": 2.0191, "step": 14995000 }, { "epoch": 43.41, "learning_rate": 2.830489735781771e-05, "loss": 2.0177, "step": 14995500 }, { "epoch": 43.41, "learning_rate": 2.8304173710170433e-05, "loss": 2.0276, "step": 14996000 }, { "epoch": 43.41, "learning_rate": 2.830345006252316e-05, "loss": 2.05, "step": 14996500 }, { "epoch": 43.41, "learning_rate": 2.830272641487588e-05, "loss": 2.0345, "step": 14997000 }, { "epoch": 43.41, "learning_rate": 2.8302002767228603e-05, "loss": 2.0426, "step": 14997500 }, { "epoch": 43.41, "learning_rate": 2.8301279119581325e-05, "loss": 2.0081, "step": 14998000 }, { "epoch": 43.41, "learning_rate": 2.8300555471934054e-05, "loss": 2.0217, "step": 14998500 }, { "epoch": 43.42, "learning_rate": 2.829983327158207e-05, "loss": 2.0242, "step": 14999000 }, { "epoch": 43.42, "learning_rate": 2.8299109623934795e-05, "loss": 2.0004, "step": 14999500 }, { "epoch": 43.42, "learning_rate": 2.8298385976287518e-05, "loss": 2.0542, "step": 15000000 }, { "epoch": 43.42, "learning_rate": 2.829766232864024e-05, "loss": 2.0309, "step": 15000500 }, { "epoch": 43.42, "learning_rate": 2.8296938680992962e-05, "loss": 2.0235, "step": 15001000 }, { "epoch": 43.42, "learning_rate": 2.8296215033345684e-05, "loss": 2.0369, "step": 15001500 }, { "epoch": 43.42, "learning_rate": 2.829549138569841e-05, "loss": 2.0267, "step": 15002000 }, { "epoch": 43.43, "learning_rate": 2.8294767738051132e-05, "loss": 2.0253, "step": 15002500 }, { "epoch": 43.43, "learning_rate": 2.8294046984994444e-05, "loss": 2.024, "step": 15003000 }, { "epoch": 43.43, "learning_rate": 2.8293323337347167e-05, "loss": 2.006, "step": 15003500 }, { "epoch": 43.43, "learning_rate": 2.8292601136995182e-05, "loss": 2.0438, "step": 15004000 }, { "epoch": 43.43, "learning_rate": 2.8291877489347908e-05, "loss": 2.0485, "step": 15004500 }, { "epoch": 43.43, "learning_rate": 2.829115384170063e-05, "loss": 2.0128, "step": 15005000 }, { "epoch": 43.43, "learning_rate": 2.8290430194053352e-05, "loss": 2.0366, "step": 15005500 }, { "epoch": 43.44, "learning_rate": 2.8289706546406075e-05, "loss": 2.0276, "step": 15006000 }, { "epoch": 43.44, "learning_rate": 2.8288982898758804e-05, "loss": 2.0379, "step": 15006500 }, { "epoch": 43.44, "learning_rate": 2.8288259251111526e-05, "loss": 2.0282, "step": 15007000 }, { "epoch": 43.44, "learning_rate": 2.8287535603464248e-05, "loss": 2.0414, "step": 15007500 }, { "epoch": 43.44, "learning_rate": 2.8286813403112267e-05, "loss": 2.0239, "step": 15008000 }, { "epoch": 43.44, "learning_rate": 2.828608975546499e-05, "loss": 2.0484, "step": 15008500 }, { "epoch": 43.44, "learning_rate": 2.828536610781771e-05, "loss": 2.0034, "step": 15009000 }, { "epoch": 43.45, "learning_rate": 2.8284642460170434e-05, "loss": 2.041, "step": 15009500 }, { "epoch": 43.45, "learning_rate": 2.828391881252316e-05, "loss": 2.0261, "step": 15010000 }, { "epoch": 43.45, "learning_rate": 2.828319516487588e-05, "loss": 2.0496, "step": 15010500 }, { "epoch": 43.45, "learning_rate": 2.8282471517228604e-05, "loss": 2.0061, "step": 15011000 }, { "epoch": 43.45, "learning_rate": 2.8281747869581326e-05, "loss": 2.0369, "step": 15011500 }, { "epoch": 43.45, "learning_rate": 2.8281025669229345e-05, "loss": 2.039, "step": 15012000 }, { "epoch": 43.46, "learning_rate": 2.8280302021582067e-05, "loss": 2.0491, "step": 15012500 }, { "epoch": 43.46, "learning_rate": 2.827957837393479e-05, "loss": 2.0543, "step": 15013000 }, { "epoch": 43.46, "learning_rate": 2.827885472628751e-05, "loss": 2.0441, "step": 15013500 }, { "epoch": 43.46, "learning_rate": 2.827813107864024e-05, "loss": 2.0385, "step": 15014000 }, { "epoch": 43.46, "learning_rate": 2.827740887828826e-05, "loss": 2.033, "step": 15014500 }, { "epoch": 43.46, "learning_rate": 2.8276686677936275e-05, "loss": 2.0203, "step": 15015000 }, { "epoch": 43.46, "learning_rate": 2.8275963030288997e-05, "loss": 2.0468, "step": 15015500 }, { "epoch": 43.47, "learning_rate": 2.8275239382641723e-05, "loss": 2.0277, "step": 15016000 }, { "epoch": 43.47, "learning_rate": 2.8274515734994445e-05, "loss": 2.0225, "step": 15016500 }, { "epoch": 43.47, "learning_rate": 2.8273792087347167e-05, "loss": 2.0267, "step": 15017000 }, { "epoch": 43.47, "learning_rate": 2.827306843969989e-05, "loss": 2.0276, "step": 15017500 }, { "epoch": 43.47, "learning_rate": 2.8272344792052612e-05, "loss": 1.9969, "step": 15018000 }, { "epoch": 43.47, "learning_rate": 2.827162259170063e-05, "loss": 2.0192, "step": 15018500 }, { "epoch": 43.47, "learning_rate": 2.8270898944053353e-05, "loss": 2.0524, "step": 15019000 }, { "epoch": 43.48, "learning_rate": 2.8270175296406075e-05, "loss": 2.0219, "step": 15019500 }, { "epoch": 43.48, "learning_rate": 2.8269451648758798e-05, "loss": 2.0262, "step": 15020000 }, { "epoch": 43.48, "learning_rate": 2.8268729448406817e-05, "loss": 2.0181, "step": 15020500 }, { "epoch": 43.48, "learning_rate": 2.826800580075954e-05, "loss": 2.0494, "step": 15021000 }, { "epoch": 43.48, "learning_rate": 2.8267282153112268e-05, "loss": 2.0354, "step": 15021500 }, { "epoch": 43.48, "learning_rate": 2.826655850546499e-05, "loss": 2.0429, "step": 15022000 }, { "epoch": 43.48, "learning_rate": 2.826583630511301e-05, "loss": 2.0401, "step": 15022500 }, { "epoch": 43.49, "learning_rate": 2.826511265746573e-05, "loss": 2.0528, "step": 15023000 }, { "epoch": 43.49, "learning_rate": 2.8264389009818453e-05, "loss": 2.0227, "step": 15023500 }, { "epoch": 43.49, "learning_rate": 2.8263665362171176e-05, "loss": 2.008, "step": 15024000 }, { "epoch": 43.49, "learning_rate": 2.8262941714523898e-05, "loss": 2.0312, "step": 15024500 }, { "epoch": 43.49, "learning_rate": 2.8262219514171917e-05, "loss": 2.0423, "step": 15025000 }, { "epoch": 43.49, "learning_rate": 2.826149586652464e-05, "loss": 2.0193, "step": 15025500 }, { "epoch": 43.49, "learning_rate": 2.826077221887736e-05, "loss": 2.0396, "step": 15026000 }, { "epoch": 43.5, "learning_rate": 2.8260048571230087e-05, "loss": 2.0217, "step": 15026500 }, { "epoch": 43.5, "learning_rate": 2.825932492358281e-05, "loss": 2.0319, "step": 15027000 }, { "epoch": 43.5, "learning_rate": 2.825860127593553e-05, "loss": 2.0378, "step": 15027500 }, { "epoch": 43.5, "learning_rate": 2.8257877628288254e-05, "loss": 2.0307, "step": 15028000 }, { "epoch": 43.5, "learning_rate": 2.8257153980640976e-05, "loss": 2.0375, "step": 15028500 }, { "epoch": 43.5, "learning_rate": 2.8256430332993705e-05, "loss": 2.036, "step": 15029000 }, { "epoch": 43.5, "learning_rate": 2.8255708132641724e-05, "loss": 2.0496, "step": 15029500 }, { "epoch": 43.51, "learning_rate": 2.8254984484994446e-05, "loss": 2.0254, "step": 15030000 }, { "epoch": 43.51, "learning_rate": 2.8254260837347168e-05, "loss": 2.0424, "step": 15030500 }, { "epoch": 43.51, "learning_rate": 2.825353718969989e-05, "loss": 2.0329, "step": 15031000 }, { "epoch": 43.51, "learning_rate": 2.8252813542052613e-05, "loss": 2.0062, "step": 15031500 }, { "epoch": 43.51, "learning_rate": 2.825208989440534e-05, "loss": 2.0122, "step": 15032000 }, { "epoch": 43.51, "learning_rate": 2.825136624675806e-05, "loss": 2.0331, "step": 15032500 }, { "epoch": 43.51, "learning_rate": 2.8250642599110783e-05, "loss": 2.0542, "step": 15033000 }, { "epoch": 43.52, "learning_rate": 2.8249918951463505e-05, "loss": 2.0294, "step": 15033500 }, { "epoch": 43.52, "learning_rate": 2.8249195303816227e-05, "loss": 2.0435, "step": 15034000 }, { "epoch": 43.52, "learning_rate": 2.8248473103464246e-05, "loss": 2.0227, "step": 15034500 }, { "epoch": 43.52, "learning_rate": 2.824774945581697e-05, "loss": 2.0251, "step": 15035000 }, { "epoch": 43.52, "learning_rate": 2.8247027255464987e-05, "loss": 2.0489, "step": 15035500 }, { "epoch": 43.52, "learning_rate": 2.824630360781771e-05, "loss": 2.0261, "step": 15036000 }, { "epoch": 43.52, "learning_rate": 2.824557996017044e-05, "loss": 2.0326, "step": 15036500 }, { "epoch": 43.53, "learning_rate": 2.824485631252316e-05, "loss": 2.0403, "step": 15037000 }, { "epoch": 43.53, "learning_rate": 2.8244135559466473e-05, "loss": 2.0003, "step": 15037500 }, { "epoch": 43.53, "learning_rate": 2.8243411911819195e-05, "loss": 2.0232, "step": 15038000 }, { "epoch": 43.53, "learning_rate": 2.8242688264171918e-05, "loss": 2.038, "step": 15038500 }, { "epoch": 43.53, "learning_rate": 2.824196461652464e-05, "loss": 2.0088, "step": 15039000 }, { "epoch": 43.53, "learning_rate": 2.8241240968877365e-05, "loss": 2.0455, "step": 15039500 }, { "epoch": 43.53, "learning_rate": 2.8240517321230088e-05, "loss": 2.0294, "step": 15040000 }, { "epoch": 43.54, "learning_rate": 2.823979367358281e-05, "loss": 2.0483, "step": 15040500 }, { "epoch": 43.54, "learning_rate": 2.8239070025935532e-05, "loss": 2.0074, "step": 15041000 }, { "epoch": 43.54, "learning_rate": 2.823834782558355e-05, "loss": 2.0408, "step": 15041500 }, { "epoch": 43.54, "learning_rate": 2.8237624177936273e-05, "loss": 2.047, "step": 15042000 }, { "epoch": 43.54, "learning_rate": 2.8236900530288996e-05, "loss": 2.0383, "step": 15042500 }, { "epoch": 43.54, "learning_rate": 2.8236176882641718e-05, "loss": 2.0419, "step": 15043000 }, { "epoch": 43.54, "learning_rate": 2.823545323499444e-05, "loss": 2.0434, "step": 15043500 }, { "epoch": 43.55, "learning_rate": 2.823472958734717e-05, "loss": 2.0169, "step": 15044000 }, { "epoch": 43.55, "learning_rate": 2.823400593969989e-05, "loss": 2.0357, "step": 15044500 }, { "epoch": 43.55, "learning_rate": 2.8233282292052614e-05, "loss": 2.0164, "step": 15045000 }, { "epoch": 43.55, "learning_rate": 2.823255864440534e-05, "loss": 2.0462, "step": 15045500 }, { "epoch": 43.55, "learning_rate": 2.823183499675806e-05, "loss": 2.0379, "step": 15046000 }, { "epoch": 43.55, "learning_rate": 2.8231112796406077e-05, "loss": 2.0354, "step": 15046500 }, { "epoch": 43.55, "learning_rate": 2.8230389148758803e-05, "loss": 2.0352, "step": 15047000 }, { "epoch": 43.56, "learning_rate": 2.8229666948406818e-05, "loss": 2.0224, "step": 15047500 }, { "epoch": 43.56, "learning_rate": 2.822894330075954e-05, "loss": 2.0276, "step": 15048000 }, { "epoch": 43.56, "learning_rate": 2.822822110040756e-05, "loss": 2.0223, "step": 15048500 }, { "epoch": 43.56, "learning_rate": 2.822749745276028e-05, "loss": 2.0201, "step": 15049000 }, { "epoch": 43.56, "learning_rate": 2.8226773805113004e-05, "loss": 2.0327, "step": 15049500 }, { "epoch": 43.56, "learning_rate": 2.8226050157465726e-05, "loss": 2.0433, "step": 15050000 }, { "epoch": 43.57, "learning_rate": 2.822532650981845e-05, "loss": 2.0407, "step": 15050500 }, { "epoch": 43.57, "learning_rate": 2.8224602862171174e-05, "loss": 2.0401, "step": 15051000 }, { "epoch": 43.57, "learning_rate": 2.8223879214523903e-05, "loss": 2.0475, "step": 15051500 }, { "epoch": 43.57, "learning_rate": 2.822315701417192e-05, "loss": 2.0167, "step": 15052000 }, { "epoch": 43.57, "learning_rate": 2.822243336652464e-05, "loss": 2.0189, "step": 15052500 }, { "epoch": 43.57, "learning_rate": 2.8221709718877366e-05, "loss": 2.0269, "step": 15053000 }, { "epoch": 43.57, "learning_rate": 2.822098607123009e-05, "loss": 2.0438, "step": 15053500 }, { "epoch": 43.58, "learning_rate": 2.8220263870878104e-05, "loss": 2.0271, "step": 15054000 }, { "epoch": 43.58, "learning_rate": 2.821954022323083e-05, "loss": 2.018, "step": 15054500 }, { "epoch": 43.58, "learning_rate": 2.8218816575583552e-05, "loss": 2.044, "step": 15055000 }, { "epoch": 43.58, "learning_rate": 2.8218092927936274e-05, "loss": 2.0521, "step": 15055500 }, { "epoch": 43.58, "learning_rate": 2.8217369280288996e-05, "loss": 2.0363, "step": 15056000 }, { "epoch": 43.58, "learning_rate": 2.821664563264172e-05, "loss": 2.0399, "step": 15056500 }, { "epoch": 43.58, "learning_rate": 2.821592198499444e-05, "loss": 2.0435, "step": 15057000 }, { "epoch": 43.59, "learning_rate": 2.821519978464246e-05, "loss": 2.0359, "step": 15057500 }, { "epoch": 43.59, "learning_rate": 2.8214476136995182e-05, "loss": 2.0209, "step": 15058000 }, { "epoch": 43.59, "learning_rate": 2.8213752489347904e-05, "loss": 2.0252, "step": 15058500 }, { "epoch": 43.59, "learning_rate": 2.8213028841700633e-05, "loss": 2.0409, "step": 15059000 }, { "epoch": 43.59, "learning_rate": 2.8212305194053356e-05, "loss": 2.0084, "step": 15059500 }, { "epoch": 43.59, "learning_rate": 2.821158154640608e-05, "loss": 2.0585, "step": 15060000 }, { "epoch": 43.59, "learning_rate": 2.8210857898758803e-05, "loss": 2.0224, "step": 15060500 }, { "epoch": 43.6, "learning_rate": 2.8210134251111526e-05, "loss": 2.0256, "step": 15061000 }, { "epoch": 43.6, "learning_rate": 2.8209410603464248e-05, "loss": 2.0399, "step": 15061500 }, { "epoch": 43.6, "learning_rate": 2.820868695581697e-05, "loss": 2.0261, "step": 15062000 }, { "epoch": 43.6, "learning_rate": 2.820796475546499e-05, "loss": 2.0453, "step": 15062500 }, { "epoch": 43.6, "learning_rate": 2.8207242555113005e-05, "loss": 2.0382, "step": 15063000 }, { "epoch": 43.6, "learning_rate": 2.820651890746573e-05, "loss": 2.0518, "step": 15063500 }, { "epoch": 43.6, "learning_rate": 2.8205795259818452e-05, "loss": 2.026, "step": 15064000 }, { "epoch": 43.61, "learning_rate": 2.8205071612171175e-05, "loss": 1.9932, "step": 15064500 }, { "epoch": 43.61, "learning_rate": 2.8204347964523897e-05, "loss": 2.033, "step": 15065000 }, { "epoch": 43.61, "learning_rate": 2.8203625764171916e-05, "loss": 2.024, "step": 15065500 }, { "epoch": 43.61, "learning_rate": 2.8202902116524638e-05, "loss": 2.0513, "step": 15066000 }, { "epoch": 43.61, "learning_rate": 2.8202178468877367e-05, "loss": 2.0246, "step": 15066500 }, { "epoch": 43.61, "learning_rate": 2.820145482123009e-05, "loss": 2.0356, "step": 15067000 }, { "epoch": 43.61, "learning_rate": 2.820073117358281e-05, "loss": 2.0437, "step": 15067500 }, { "epoch": 43.62, "learning_rate": 2.8200007525935534e-05, "loss": 2.0235, "step": 15068000 }, { "epoch": 43.62, "learning_rate": 2.8199283878288256e-05, "loss": 2.0539, "step": 15068500 }, { "epoch": 43.62, "learning_rate": 2.819856023064098e-05, "loss": 2.0495, "step": 15069000 }, { "epoch": 43.62, "learning_rate": 2.8197836582993704e-05, "loss": 2.027, "step": 15069500 }, { "epoch": 43.62, "learning_rate": 2.8197112935346426e-05, "loss": 2.0282, "step": 15070000 }, { "epoch": 43.62, "learning_rate": 2.8196390734994445e-05, "loss": 2.0344, "step": 15070500 }, { "epoch": 43.62, "learning_rate": 2.819566853464246e-05, "loss": 2.025, "step": 15071000 }, { "epoch": 43.63, "learning_rate": 2.8194944886995183e-05, "loss": 2.0292, "step": 15071500 }, { "epoch": 43.63, "learning_rate": 2.8194221239347905e-05, "loss": 2.0355, "step": 15072000 }, { "epoch": 43.63, "learning_rate": 2.819349759170063e-05, "loss": 2.0628, "step": 15072500 }, { "epoch": 43.63, "learning_rate": 2.8192773944053353e-05, "loss": 2.0503, "step": 15073000 }, { "epoch": 43.63, "learning_rate": 2.819205174370137e-05, "loss": 2.0328, "step": 15073500 }, { "epoch": 43.63, "learning_rate": 2.8191328096054097e-05, "loss": 2.0537, "step": 15074000 }, { "epoch": 43.63, "learning_rate": 2.819060444840682e-05, "loss": 2.0285, "step": 15074500 }, { "epoch": 43.64, "learning_rate": 2.818988224805484e-05, "loss": 2.0202, "step": 15075000 }, { "epoch": 43.64, "learning_rate": 2.818915860040756e-05, "loss": 2.0138, "step": 15075500 }, { "epoch": 43.64, "learning_rate": 2.8188434952760283e-05, "loss": 2.0269, "step": 15076000 }, { "epoch": 43.64, "learning_rate": 2.8187711305113005e-05, "loss": 2.0538, "step": 15076500 }, { "epoch": 43.64, "learning_rate": 2.818698765746573e-05, "loss": 2.0247, "step": 15077000 }, { "epoch": 43.64, "learning_rate": 2.8186264009818453e-05, "loss": 2.0187, "step": 15077500 }, { "epoch": 43.64, "learning_rate": 2.8185540362171175e-05, "loss": 2.0431, "step": 15078000 }, { "epoch": 43.65, "learning_rate": 2.8184818161819194e-05, "loss": 2.0263, "step": 15078500 }, { "epoch": 43.65, "learning_rate": 2.8184094514171917e-05, "loss": 2.0304, "step": 15079000 }, { "epoch": 43.65, "learning_rate": 2.818337086652464e-05, "loss": 2.0089, "step": 15079500 }, { "epoch": 43.65, "learning_rate": 2.818264721887736e-05, "loss": 2.0579, "step": 15080000 }, { "epoch": 43.65, "learning_rate": 2.8181923571230083e-05, "loss": 2.0367, "step": 15080500 }, { "epoch": 43.65, "learning_rate": 2.8181201370878102e-05, "loss": 2.0391, "step": 15081000 }, { "epoch": 43.65, "learning_rate": 2.818047772323083e-05, "loss": 2.0426, "step": 15081500 }, { "epoch": 43.66, "learning_rate": 2.8179754075583554e-05, "loss": 2.029, "step": 15082000 }, { "epoch": 43.66, "learning_rate": 2.8179030427936276e-05, "loss": 2.03, "step": 15082500 }, { "epoch": 43.66, "learning_rate": 2.8178306780288998e-05, "loss": 2.0299, "step": 15083000 }, { "epoch": 43.66, "learning_rate": 2.8177584579937017e-05, "loss": 2.0359, "step": 15083500 }, { "epoch": 43.66, "learning_rate": 2.8176862379585032e-05, "loss": 2.0356, "step": 15084000 }, { "epoch": 43.66, "learning_rate": 2.8176138731937758e-05, "loss": 2.0175, "step": 15084500 }, { "epoch": 43.66, "learning_rate": 2.817541508429048e-05, "loss": 2.0323, "step": 15085000 }, { "epoch": 43.67, "learning_rate": 2.8174692883938496e-05, "loss": 2.0351, "step": 15085500 }, { "epoch": 43.67, "learning_rate": 2.817396923629122e-05, "loss": 2.021, "step": 15086000 }, { "epoch": 43.67, "learning_rate": 2.8173245588643944e-05, "loss": 2.0638, "step": 15086500 }, { "epoch": 43.67, "learning_rate": 2.8172521940996666e-05, "loss": 2.0319, "step": 15087000 }, { "epoch": 43.67, "learning_rate": 2.8171798293349388e-05, "loss": 2.0264, "step": 15087500 }, { "epoch": 43.67, "learning_rate": 2.817107464570211e-05, "loss": 2.0438, "step": 15088000 }, { "epoch": 43.68, "learning_rate": 2.8170350998054833e-05, "loss": 2.0523, "step": 15088500 }, { "epoch": 43.68, "learning_rate": 2.816962735040756e-05, "loss": 2.0165, "step": 15089000 }, { "epoch": 43.68, "learning_rate": 2.8168903702760284e-05, "loss": 2.0181, "step": 15089500 }, { "epoch": 43.68, "learning_rate": 2.816818005511301e-05, "loss": 2.0273, "step": 15090000 }, { "epoch": 43.68, "learning_rate": 2.8167456407465732e-05, "loss": 2.0118, "step": 15090500 }, { "epoch": 43.68, "learning_rate": 2.8166732759818454e-05, "loss": 2.0334, "step": 15091000 }, { "epoch": 43.68, "learning_rate": 2.8166009112171176e-05, "loss": 2.047, "step": 15091500 }, { "epoch": 43.69, "learning_rate": 2.81652854645239e-05, "loss": 2.0423, "step": 15092000 }, { "epoch": 43.69, "learning_rate": 2.8164563264171917e-05, "loss": 2.0326, "step": 15092500 }, { "epoch": 43.69, "learning_rate": 2.816383961652464e-05, "loss": 2.0407, "step": 15093000 }, { "epoch": 43.69, "learning_rate": 2.8163115968877362e-05, "loss": 2.0319, "step": 15093500 }, { "epoch": 43.69, "learning_rate": 2.8162392321230084e-05, "loss": 2.0084, "step": 15094000 }, { "epoch": 43.69, "learning_rate": 2.816166867358281e-05, "loss": 2.025, "step": 15094500 }, { "epoch": 43.69, "learning_rate": 2.8160945025935532e-05, "loss": 2.028, "step": 15095000 }, { "epoch": 43.7, "learning_rate": 2.8160221378288254e-05, "loss": 2.0224, "step": 15095500 }, { "epoch": 43.7, "learning_rate": 2.8159497730640976e-05, "loss": 2.0219, "step": 15096000 }, { "epoch": 43.7, "learning_rate": 2.8158774082993705e-05, "loss": 2.0216, "step": 15096500 }, { "epoch": 43.7, "learning_rate": 2.8158050435346428e-05, "loss": 2.0243, "step": 15097000 }, { "epoch": 43.7, "learning_rate": 2.815732678769915e-05, "loss": 2.0342, "step": 15097500 }, { "epoch": 43.7, "learning_rate": 2.8156603140051872e-05, "loss": 2.053, "step": 15098000 }, { "epoch": 43.7, "learning_rate": 2.8155879492404598e-05, "loss": 2.0282, "step": 15098500 }, { "epoch": 43.71, "learning_rate": 2.815515873934791e-05, "loss": 2.0433, "step": 15099000 }, { "epoch": 43.71, "learning_rate": 2.8154435091700632e-05, "loss": 2.0381, "step": 15099500 }, { "epoch": 43.71, "learning_rate": 2.8153711444053355e-05, "loss": 2.0397, "step": 15100000 }, { "epoch": 43.71, "learning_rate": 2.8152987796406077e-05, "loss": 2.033, "step": 15100500 }, { "epoch": 43.71, "learning_rate": 2.81522641487588e-05, "loss": 2.0363, "step": 15101000 }, { "epoch": 43.71, "learning_rate": 2.8151540501111525e-05, "loss": 2.0633, "step": 15101500 }, { "epoch": 43.71, "learning_rate": 2.8150816853464247e-05, "loss": 2.0244, "step": 15102000 }, { "epoch": 43.72, "learning_rate": 2.815009320581697e-05, "loss": 2.0343, "step": 15102500 }, { "epoch": 43.72, "learning_rate": 2.814936955816969e-05, "loss": 2.0198, "step": 15103000 }, { "epoch": 43.72, "learning_rate": 2.8148645910522414e-05, "loss": 2.0551, "step": 15103500 }, { "epoch": 43.72, "learning_rate": 2.8147922262875143e-05, "loss": 2.0374, "step": 15104000 }, { "epoch": 43.72, "learning_rate": 2.8147198615227865e-05, "loss": 2.0142, "step": 15104500 }, { "epoch": 43.72, "learning_rate": 2.8146474967580587e-05, "loss": 2.0017, "step": 15105000 }, { "epoch": 43.72, "learning_rate": 2.8145751319933313e-05, "loss": 2.0345, "step": 15105500 }, { "epoch": 43.73, "learning_rate": 2.8145030566876625e-05, "loss": 2.0095, "step": 15106000 }, { "epoch": 43.73, "learning_rate": 2.814430836652464e-05, "loss": 2.0361, "step": 15106500 }, { "epoch": 43.73, "learning_rate": 2.8143584718877363e-05, "loss": 2.0414, "step": 15107000 }, { "epoch": 43.73, "learning_rate": 2.814286107123009e-05, "loss": 2.0457, "step": 15107500 }, { "epoch": 43.73, "learning_rate": 2.814213742358281e-05, "loss": 2.0439, "step": 15108000 }, { "epoch": 43.73, "learning_rate": 2.8141413775935533e-05, "loss": 2.0197, "step": 15108500 }, { "epoch": 43.73, "learning_rate": 2.8140690128288255e-05, "loss": 2.0572, "step": 15109000 }, { "epoch": 43.74, "learning_rate": 2.8139966480640977e-05, "loss": 2.047, "step": 15109500 }, { "epoch": 43.74, "learning_rate": 2.81392428329937e-05, "loss": 2.0403, "step": 15110000 }, { "epoch": 43.74, "learning_rate": 2.8138519185346425e-05, "loss": 2.0316, "step": 15110500 }, { "epoch": 43.74, "learning_rate": 2.813779698499444e-05, "loss": 2.0391, "step": 15111000 }, { "epoch": 43.74, "learning_rate": 2.813707333734717e-05, "loss": 2.0498, "step": 15111500 }, { "epoch": 43.74, "learning_rate": 2.8136349689699892e-05, "loss": 2.0171, "step": 15112000 }, { "epoch": 43.74, "learning_rate": 2.8135626042052614e-05, "loss": 2.0297, "step": 15112500 }, { "epoch": 43.75, "learning_rate": 2.8134902394405336e-05, "loss": 2.0336, "step": 15113000 }, { "epoch": 43.75, "learning_rate": 2.8134180194053355e-05, "loss": 2.0219, "step": 15113500 }, { "epoch": 43.75, "learning_rate": 2.8133456546406078e-05, "loss": 2.0175, "step": 15114000 }, { "epoch": 43.75, "learning_rate": 2.81327328987588e-05, "loss": 2.0199, "step": 15114500 }, { "epoch": 43.75, "learning_rate": 2.8132009251111525e-05, "loss": 2.0674, "step": 15115000 }, { "epoch": 43.75, "learning_rate": 2.813128705075954e-05, "loss": 2.0267, "step": 15115500 }, { "epoch": 43.75, "learning_rate": 2.8130563403112263e-05, "loss": 2.0573, "step": 15116000 }, { "epoch": 43.76, "learning_rate": 2.812983975546499e-05, "loss": 2.0374, "step": 15116500 }, { "epoch": 43.76, "learning_rate": 2.8129117555113004e-05, "loss": 2.0292, "step": 15117000 }, { "epoch": 43.76, "learning_rate": 2.8128395354761023e-05, "loss": 2.0451, "step": 15117500 }, { "epoch": 43.76, "learning_rate": 2.8127671707113746e-05, "loss": 2.032, "step": 15118000 }, { "epoch": 43.76, "learning_rate": 2.8126948059466468e-05, "loss": 2.0396, "step": 15118500 }, { "epoch": 43.76, "learning_rate": 2.812622441181919e-05, "loss": 2.0288, "step": 15119000 }, { "epoch": 43.76, "learning_rate": 2.812550076417192e-05, "loss": 2.0416, "step": 15119500 }, { "epoch": 43.77, "learning_rate": 2.812477711652464e-05, "loss": 2.0376, "step": 15120000 }, { "epoch": 43.77, "learning_rate": 2.8124053468877363e-05, "loss": 2.0349, "step": 15120500 }, { "epoch": 43.77, "learning_rate": 2.8123331268525382e-05, "loss": 2.0422, "step": 15121000 }, { "epoch": 43.77, "learning_rate": 2.8122607620878105e-05, "loss": 2.0406, "step": 15121500 }, { "epoch": 43.77, "learning_rate": 2.8121883973230827e-05, "loss": 2.0152, "step": 15122000 }, { "epoch": 43.77, "learning_rate": 2.8121160325583553e-05, "loss": 2.0261, "step": 15122500 }, { "epoch": 43.77, "learning_rate": 2.8120436677936275e-05, "loss": 2.0409, "step": 15123000 }, { "epoch": 43.78, "learning_rate": 2.8119713030288997e-05, "loss": 2.0277, "step": 15123500 }, { "epoch": 43.78, "learning_rate": 2.811898938264172e-05, "loss": 2.0189, "step": 15124000 }, { "epoch": 43.78, "learning_rate": 2.811826573499444e-05, "loss": 2.0423, "step": 15124500 }, { "epoch": 43.78, "learning_rate": 2.8117542087347164e-05, "loss": 2.0456, "step": 15125000 }, { "epoch": 43.78, "learning_rate": 2.811681843969989e-05, "loss": 2.0281, "step": 15125500 }, { "epoch": 43.78, "learning_rate": 2.811609479205261e-05, "loss": 2.0565, "step": 15126000 }, { "epoch": 43.79, "learning_rate": 2.811537114440534e-05, "loss": 2.0452, "step": 15126500 }, { "epoch": 43.79, "learning_rate": 2.8114647496758063e-05, "loss": 2.0615, "step": 15127000 }, { "epoch": 43.79, "learning_rate": 2.811392529640608e-05, "loss": 2.0412, "step": 15127500 }, { "epoch": 43.79, "learning_rate": 2.8113203096054097e-05, "loss": 2.0276, "step": 15128000 }, { "epoch": 43.79, "learning_rate": 2.811247944840682e-05, "loss": 2.0316, "step": 15128500 }, { "epoch": 43.79, "learning_rate": 2.8111755800759542e-05, "loss": 2.0318, "step": 15129000 }, { "epoch": 43.79, "learning_rate": 2.8111032153112264e-05, "loss": 2.0193, "step": 15129500 }, { "epoch": 43.8, "learning_rate": 2.8110309952760283e-05, "loss": 2.0202, "step": 15130000 }, { "epoch": 43.8, "learning_rate": 2.8109586305113005e-05, "loss": 2.0339, "step": 15130500 }, { "epoch": 43.8, "learning_rate": 2.8108862657465727e-05, "loss": 2.0292, "step": 15131000 }, { "epoch": 43.8, "learning_rate": 2.8108139009818453e-05, "loss": 2.0344, "step": 15131500 }, { "epoch": 43.8, "learning_rate": 2.8107415362171175e-05, "loss": 2.0295, "step": 15132000 }, { "epoch": 43.8, "learning_rate": 2.8106691714523897e-05, "loss": 2.0423, "step": 15132500 }, { "epoch": 43.8, "learning_rate": 2.8105969514171916e-05, "loss": 2.0419, "step": 15133000 }, { "epoch": 43.81, "learning_rate": 2.810524586652464e-05, "loss": 2.027, "step": 15133500 }, { "epoch": 43.81, "learning_rate": 2.8104522218877368e-05, "loss": 2.0171, "step": 15134000 }, { "epoch": 43.81, "learning_rate": 2.810379857123009e-05, "loss": 2.0303, "step": 15134500 }, { "epoch": 43.81, "learning_rate": 2.8103074923582812e-05, "loss": 2.0271, "step": 15135000 }, { "epoch": 43.81, "learning_rate": 2.8102351275935534e-05, "loss": 2.0598, "step": 15135500 }, { "epoch": 43.81, "learning_rate": 2.8101627628288257e-05, "loss": 2.0433, "step": 15136000 }, { "epoch": 43.81, "learning_rate": 2.810090398064098e-05, "loss": 2.0301, "step": 15136500 }, { "epoch": 43.82, "learning_rate": 2.8100180332993704e-05, "loss": 2.0237, "step": 15137000 }, { "epoch": 43.82, "learning_rate": 2.8099456685346427e-05, "loss": 2.0314, "step": 15137500 }, { "epoch": 43.82, "learning_rate": 2.809873303769915e-05, "loss": 2.0209, "step": 15138000 }, { "epoch": 43.82, "learning_rate": 2.809800939005187e-05, "loss": 2.0353, "step": 15138500 }, { "epoch": 43.82, "learning_rate": 2.8097285742404593e-05, "loss": 2.0583, "step": 15139000 }, { "epoch": 43.82, "learning_rate": 2.8096562094757316e-05, "loss": 2.0239, "step": 15139500 }, { "epoch": 43.82, "learning_rate": 2.8095839894405335e-05, "loss": 2.0382, "step": 15140000 }, { "epoch": 43.83, "learning_rate": 2.8095116246758057e-05, "loss": 2.0346, "step": 15140500 }, { "epoch": 43.83, "learning_rate": 2.809439259911078e-05, "loss": 2.042, "step": 15141000 }, { "epoch": 43.83, "learning_rate": 2.8093668951463508e-05, "loss": 2.042, "step": 15141500 }, { "epoch": 43.83, "learning_rate": 2.809294530381623e-05, "loss": 2.0402, "step": 15142000 }, { "epoch": 43.83, "learning_rate": 2.8092221656168956e-05, "loss": 2.0253, "step": 15142500 }, { "epoch": 43.83, "learning_rate": 2.809149945581697e-05, "loss": 2.0506, "step": 15143000 }, { "epoch": 43.83, "learning_rate": 2.8090775808169694e-05, "loss": 2.0312, "step": 15143500 }, { "epoch": 43.84, "learning_rate": 2.8090052160522416e-05, "loss": 2.0245, "step": 15144000 }, { "epoch": 43.84, "learning_rate": 2.808932851287514e-05, "loss": 2.0177, "step": 15144500 }, { "epoch": 43.84, "learning_rate": 2.8088604865227864e-05, "loss": 2.035, "step": 15145000 }, { "epoch": 43.84, "learning_rate": 2.8087881217580586e-05, "loss": 2.0341, "step": 15145500 }, { "epoch": 43.84, "learning_rate": 2.8087159017228605e-05, "loss": 2.0433, "step": 15146000 }, { "epoch": 43.84, "learning_rate": 2.8086435369581327e-05, "loss": 2.0471, "step": 15146500 }, { "epoch": 43.84, "learning_rate": 2.8085713169229343e-05, "loss": 2.0364, "step": 15147000 }, { "epoch": 43.85, "learning_rate": 2.808498952158207e-05, "loss": 2.0443, "step": 15147500 }, { "epoch": 43.85, "learning_rate": 2.808426587393479e-05, "loss": 2.005, "step": 15148000 }, { "epoch": 43.85, "learning_rate": 2.8083542226287513e-05, "loss": 2.0191, "step": 15148500 }, { "epoch": 43.85, "learning_rate": 2.8082818578640242e-05, "loss": 2.0452, "step": 15149000 }, { "epoch": 43.85, "learning_rate": 2.8082096378288257e-05, "loss": 2.0174, "step": 15149500 }, { "epoch": 43.85, "learning_rate": 2.8081374177936276e-05, "loss": 2.025, "step": 15150000 }, { "epoch": 43.85, "learning_rate": 2.8080650530289e-05, "loss": 2.0373, "step": 15150500 }, { "epoch": 43.86, "learning_rate": 2.807992688264172e-05, "loss": 2.0291, "step": 15151000 }, { "epoch": 43.86, "learning_rate": 2.8079203234994443e-05, "loss": 2.0261, "step": 15151500 }, { "epoch": 43.86, "learning_rate": 2.807847958734717e-05, "loss": 2.0458, "step": 15152000 }, { "epoch": 43.86, "learning_rate": 2.807775593969989e-05, "loss": 2.0115, "step": 15152500 }, { "epoch": 43.86, "learning_rate": 2.8077032292052613e-05, "loss": 2.0399, "step": 15153000 }, { "epoch": 43.86, "learning_rate": 2.8076308644405335e-05, "loss": 2.0631, "step": 15153500 }, { "epoch": 43.86, "learning_rate": 2.8075584996758058e-05, "loss": 2.0195, "step": 15154000 }, { "epoch": 43.87, "learning_rate": 2.807486134911078e-05, "loss": 2.0189, "step": 15154500 }, { "epoch": 43.87, "learning_rate": 2.8074137701463505e-05, "loss": 2.0117, "step": 15155000 }, { "epoch": 43.87, "learning_rate": 2.8073414053816228e-05, "loss": 2.0555, "step": 15155500 }, { "epoch": 43.87, "learning_rate": 2.8072691853464243e-05, "loss": 2.0427, "step": 15156000 }, { "epoch": 43.87, "learning_rate": 2.8071968205816972e-05, "loss": 2.0264, "step": 15156500 }, { "epoch": 43.87, "learning_rate": 2.8071244558169694e-05, "loss": 2.0139, "step": 15157000 }, { "epoch": 43.87, "learning_rate": 2.8070522357817713e-05, "loss": 2.0325, "step": 15157500 }, { "epoch": 43.88, "learning_rate": 2.8069800157465732e-05, "loss": 2.0187, "step": 15158000 }, { "epoch": 43.88, "learning_rate": 2.8069076509818455e-05, "loss": 2.0081, "step": 15158500 }, { "epoch": 43.88, "learning_rate": 2.8068352862171177e-05, "loss": 2.0222, "step": 15159000 }, { "epoch": 43.88, "learning_rate": 2.80676292145239e-05, "loss": 2.0217, "step": 15159500 }, { "epoch": 43.88, "learning_rate": 2.806690556687662e-05, "loss": 2.0284, "step": 15160000 }, { "epoch": 43.88, "learning_rate": 2.8066181919229344e-05, "loss": 2.0332, "step": 15160500 }, { "epoch": 43.88, "learning_rate": 2.806545827158207e-05, "loss": 2.0356, "step": 15161000 }, { "epoch": 43.89, "learning_rate": 2.806473462393479e-05, "loss": 2.0326, "step": 15161500 }, { "epoch": 43.89, "learning_rate": 2.8064010976287514e-05, "loss": 2.0578, "step": 15162000 }, { "epoch": 43.89, "learning_rate": 2.8063287328640236e-05, "loss": 2.032, "step": 15162500 }, { "epoch": 43.89, "learning_rate": 2.8062563680992958e-05, "loss": 2.0408, "step": 15163000 }, { "epoch": 43.89, "learning_rate": 2.8061840033345687e-05, "loss": 2.0631, "step": 15163500 }, { "epoch": 43.89, "learning_rate": 2.806111638569841e-05, "loss": 2.0167, "step": 15164000 }, { "epoch": 43.9, "learning_rate": 2.8060392738051135e-05, "loss": 2.0508, "step": 15164500 }, { "epoch": 43.9, "learning_rate": 2.8059669090403857e-05, "loss": 2.023, "step": 15165000 }, { "epoch": 43.9, "learning_rate": 2.805894544275658e-05, "loss": 2.0441, "step": 15165500 }, { "epoch": 43.9, "learning_rate": 2.80582217951093e-05, "loss": 2.0011, "step": 15166000 }, { "epoch": 43.9, "learning_rate": 2.805749959475732e-05, "loss": 2.0103, "step": 15166500 }, { "epoch": 43.9, "learning_rate": 2.8056777394405336e-05, "loss": 2.0437, "step": 15167000 }, { "epoch": 43.9, "learning_rate": 2.8056055194053355e-05, "loss": 2.0204, "step": 15167500 }, { "epoch": 43.91, "learning_rate": 2.8055331546406077e-05, "loss": 2.0041, "step": 15168000 }, { "epoch": 43.91, "learning_rate": 2.80546078987588e-05, "loss": 2.0411, "step": 15168500 }, { "epoch": 43.91, "learning_rate": 2.8053884251111522e-05, "loss": 2.0392, "step": 15169000 }, { "epoch": 43.91, "learning_rate": 2.8053160603464247e-05, "loss": 2.0306, "step": 15169500 }, { "epoch": 43.91, "learning_rate": 2.805243695581697e-05, "loss": 2.0198, "step": 15170000 }, { "epoch": 43.91, "learning_rate": 2.8051714755464985e-05, "loss": 2.0352, "step": 15170500 }, { "epoch": 43.91, "learning_rate": 2.8050991107817707e-05, "loss": 2.0524, "step": 15171000 }, { "epoch": 43.92, "learning_rate": 2.8050267460170436e-05, "loss": 2.039, "step": 15171500 }, { "epoch": 43.92, "learning_rate": 2.804954381252316e-05, "loss": 2.0261, "step": 15172000 }, { "epoch": 43.92, "learning_rate": 2.8048820164875884e-05, "loss": 2.0425, "step": 15172500 }, { "epoch": 43.92, "learning_rate": 2.8048096517228607e-05, "loss": 2.0188, "step": 15173000 }, { "epoch": 43.92, "learning_rate": 2.804737286958133e-05, "loss": 2.0114, "step": 15173500 }, { "epoch": 43.92, "learning_rate": 2.804664922193405e-05, "loss": 2.0284, "step": 15174000 }, { "epoch": 43.92, "learning_rate": 2.8045925574286773e-05, "loss": 2.0212, "step": 15174500 }, { "epoch": 43.93, "learning_rate": 2.80452019266395e-05, "loss": 2.0319, "step": 15175000 }, { "epoch": 43.93, "learning_rate": 2.804447827899222e-05, "loss": 2.0428, "step": 15175500 }, { "epoch": 43.93, "learning_rate": 2.8043754631344943e-05, "loss": 2.023, "step": 15176000 }, { "epoch": 43.93, "learning_rate": 2.804303243099296e-05, "loss": 2.0372, "step": 15176500 }, { "epoch": 43.93, "learning_rate": 2.8042308783345685e-05, "loss": 2.0247, "step": 15177000 }, { "epoch": 43.93, "learning_rate": 2.8041585135698407e-05, "loss": 2.0239, "step": 15177500 }, { "epoch": 43.93, "learning_rate": 2.804086148805113e-05, "loss": 2.0422, "step": 15178000 }, { "epoch": 43.94, "learning_rate": 2.8040137840403858e-05, "loss": 2.02, "step": 15178500 }, { "epoch": 43.94, "learning_rate": 2.803941419275658e-05, "loss": 2.0406, "step": 15179000 }, { "epoch": 43.94, "learning_rate": 2.8038690545109302e-05, "loss": 2.0377, "step": 15179500 }, { "epoch": 43.94, "learning_rate": 2.8037966897462025e-05, "loss": 2.0315, "step": 15180000 }, { "epoch": 43.94, "learning_rate": 2.8037244697110044e-05, "loss": 2.0404, "step": 15180500 }, { "epoch": 43.94, "learning_rate": 2.8036521049462766e-05, "loss": 2.0224, "step": 15181000 }, { "epoch": 43.94, "learning_rate": 2.8035797401815488e-05, "loss": 2.0298, "step": 15181500 }, { "epoch": 43.95, "learning_rate": 2.803507375416821e-05, "loss": 2.0401, "step": 15182000 }, { "epoch": 43.95, "learning_rate": 2.8034350106520936e-05, "loss": 2.0314, "step": 15182500 }, { "epoch": 43.95, "learning_rate": 2.803362790616895e-05, "loss": 2.0432, "step": 15183000 }, { "epoch": 43.95, "learning_rate": 2.8032904258521674e-05, "loss": 2.0257, "step": 15183500 }, { "epoch": 43.95, "learning_rate": 2.80321806108744e-05, "loss": 2.0317, "step": 15184000 }, { "epoch": 43.95, "learning_rate": 2.803145696322712e-05, "loss": 2.045, "step": 15184500 }, { "epoch": 43.95, "learning_rate": 2.8030734762875137e-05, "loss": 2.0361, "step": 15185000 }, { "epoch": 43.96, "learning_rate": 2.8030012562523156e-05, "loss": 2.055, "step": 15185500 }, { "epoch": 43.96, "learning_rate": 2.802928891487588e-05, "loss": 2.0447, "step": 15186000 }, { "epoch": 43.96, "learning_rate": 2.8028565267228607e-05, "loss": 2.0436, "step": 15186500 }, { "epoch": 43.96, "learning_rate": 2.802784161958133e-05, "loss": 2.0449, "step": 15187000 }, { "epoch": 43.96, "learning_rate": 2.8027117971934052e-05, "loss": 2.0231, "step": 15187500 }, { "epoch": 43.96, "learning_rate": 2.8026394324286774e-05, "loss": 2.0372, "step": 15188000 }, { "epoch": 43.96, "learning_rate": 2.80256706766395e-05, "loss": 2.0146, "step": 15188500 }, { "epoch": 43.97, "learning_rate": 2.8024947028992222e-05, "loss": 2.0484, "step": 15189000 }, { "epoch": 43.97, "learning_rate": 2.8024223381344944e-05, "loss": 2.0244, "step": 15189500 }, { "epoch": 43.97, "learning_rate": 2.8023499733697666e-05, "loss": 2.0208, "step": 15190000 }, { "epoch": 43.97, "learning_rate": 2.8022777533345685e-05, "loss": 2.0356, "step": 15190500 }, { "epoch": 43.97, "learning_rate": 2.80220553329937e-05, "loss": 2.0564, "step": 15191000 }, { "epoch": 43.97, "learning_rate": 2.8021331685346423e-05, "loss": 2.0381, "step": 15191500 }, { "epoch": 43.97, "learning_rate": 2.802060803769915e-05, "loss": 2.0386, "step": 15192000 }, { "epoch": 43.98, "learning_rate": 2.801988439005187e-05, "loss": 2.0258, "step": 15192500 }, { "epoch": 43.98, "learning_rate": 2.8019160742404593e-05, "loss": 2.0454, "step": 15193000 }, { "epoch": 43.98, "learning_rate": 2.8018437094757322e-05, "loss": 2.0328, "step": 15193500 }, { "epoch": 43.98, "learning_rate": 2.8017713447110044e-05, "loss": 2.0444, "step": 15194000 }, { "epoch": 43.98, "learning_rate": 2.8016991246758063e-05, "loss": 2.0149, "step": 15194500 }, { "epoch": 43.98, "learning_rate": 2.8016267599110786e-05, "loss": 2.0361, "step": 15195000 }, { "epoch": 43.98, "learning_rate": 2.8015543951463508e-05, "loss": 2.0131, "step": 15195500 }, { "epoch": 43.99, "learning_rate": 2.8014821751111527e-05, "loss": 2.0619, "step": 15196000 }, { "epoch": 43.99, "learning_rate": 2.8014099550759542e-05, "loss": 2.0229, "step": 15196500 }, { "epoch": 43.99, "learning_rate": 2.8013375903112265e-05, "loss": 2.0374, "step": 15197000 }, { "epoch": 43.99, "learning_rate": 2.8012652255464987e-05, "loss": 2.0544, "step": 15197500 }, { "epoch": 43.99, "learning_rate": 2.8011930055113006e-05, "loss": 2.0401, "step": 15198000 }, { "epoch": 43.99, "learning_rate": 2.8011206407465728e-05, "loss": 2.0413, "step": 15198500 }, { "epoch": 43.99, "learning_rate": 2.801048275981845e-05, "loss": 2.0313, "step": 15199000 }, { "epoch": 44.0, "learning_rate": 2.8009759112171176e-05, "loss": 2.0427, "step": 15199500 }, { "epoch": 44.0, "learning_rate": 2.8009035464523898e-05, "loss": 2.0291, "step": 15200000 }, { "epoch": 44.0, "learning_rate": 2.800831181687662e-05, "loss": 2.0537, "step": 15200500 }, { "epoch": 44.0, "eval_accuracy": 0.6721790831878672, "eval_accuracy_mlm": 0.6378787277283956, "eval_accuracy_nsp": 0.856310697410091, "eval_loss": 2.162269353866577, "eval_runtime": 330.7589, "eval_samples_per_second": 1319.348, "eval_steps_per_second": 54.974, "step": 15200768 }, { "epoch": 44.0, "learning_rate": 2.8007588169229343e-05, "loss": 2.0462, "step": 15201000 }, { "epoch": 44.0, "learning_rate": 2.800686452158207e-05, "loss": 1.9841, "step": 15201500 }, { "epoch": 44.0, "learning_rate": 2.8006140873934794e-05, "loss": 2.0125, "step": 15202000 }, { "epoch": 44.01, "learning_rate": 2.8005417226287516e-05, "loss": 2.021, "step": 15202500 }, { "epoch": 44.01, "learning_rate": 2.8004693578640238e-05, "loss": 2.021, "step": 15203000 }, { "epoch": 44.01, "learning_rate": 2.8003969930992964e-05, "loss": 1.9987, "step": 15203500 }, { "epoch": 44.01, "learning_rate": 2.8003246283345686e-05, "loss": 2.012, "step": 15204000 }, { "epoch": 44.01, "learning_rate": 2.800252263569841e-05, "loss": 2.021, "step": 15204500 }, { "epoch": 44.01, "learning_rate": 2.800179898805113e-05, "loss": 2.0301, "step": 15205000 }, { "epoch": 44.01, "learning_rate": 2.8001075340403853e-05, "loss": 2.0177, "step": 15205500 }, { "epoch": 44.02, "learning_rate": 2.800035169275658e-05, "loss": 2.0283, "step": 15206000 }, { "epoch": 44.02, "learning_rate": 2.79996280451093e-05, "loss": 2.0221, "step": 15206500 }, { "epoch": 44.02, "learning_rate": 2.7998904397462023e-05, "loss": 2.0286, "step": 15207000 }, { "epoch": 44.02, "learning_rate": 2.7998180749814745e-05, "loss": 2.0311, "step": 15207500 }, { "epoch": 44.02, "learning_rate": 2.7997458549462764e-05, "loss": 2.0269, "step": 15208000 }, { "epoch": 44.02, "learning_rate": 2.799673634911078e-05, "loss": 2.0087, "step": 15208500 }, { "epoch": 44.02, "learning_rate": 2.799601270146351e-05, "loss": 2.0074, "step": 15209000 }, { "epoch": 44.03, "learning_rate": 2.799528905381623e-05, "loss": 2.0182, "step": 15209500 }, { "epoch": 44.03, "learning_rate": 2.7994565406168953e-05, "loss": 2.0089, "step": 15210000 }, { "epoch": 44.03, "learning_rate": 2.799384175852168e-05, "loss": 2.0228, "step": 15210500 }, { "epoch": 44.03, "learning_rate": 2.79931181108744e-05, "loss": 2.0175, "step": 15211000 }, { "epoch": 44.03, "learning_rate": 2.7992395910522417e-05, "loss": 2.0327, "step": 15211500 }, { "epoch": 44.03, "learning_rate": 2.799167226287514e-05, "loss": 2.038, "step": 15212000 }, { "epoch": 44.03, "learning_rate": 2.7990948615227864e-05, "loss": 2.0184, "step": 15212500 }, { "epoch": 44.04, "learning_rate": 2.799022641487588e-05, "loss": 2.0191, "step": 15213000 }, { "epoch": 44.04, "learning_rate": 2.7989502767228602e-05, "loss": 2.0406, "step": 15213500 }, { "epoch": 44.04, "learning_rate": 2.7988779119581328e-05, "loss": 2.0026, "step": 15214000 }, { "epoch": 44.04, "learning_rate": 2.798805547193405e-05, "loss": 2.0131, "step": 15214500 }, { "epoch": 44.04, "learning_rate": 2.7987331824286772e-05, "loss": 1.9986, "step": 15215000 }, { "epoch": 44.04, "learning_rate": 2.7986608176639494e-05, "loss": 2.0458, "step": 15215500 }, { "epoch": 44.04, "learning_rate": 2.7985884528992223e-05, "loss": 2.0487, "step": 15216000 }, { "epoch": 44.05, "learning_rate": 2.7985160881344946e-05, "loss": 2.0105, "step": 15216500 }, { "epoch": 44.05, "learning_rate": 2.7984437233697668e-05, "loss": 2.0341, "step": 15217000 }, { "epoch": 44.05, "learning_rate": 2.7983715033345687e-05, "loss": 2.0077, "step": 15217500 }, { "epoch": 44.05, "learning_rate": 2.798299138569841e-05, "loss": 2.013, "step": 15218000 }, { "epoch": 44.05, "learning_rate": 2.798226773805113e-05, "loss": 2.0405, "step": 15218500 }, { "epoch": 44.05, "learning_rate": 2.7981544090403854e-05, "loss": 2.0105, "step": 15219000 }, { "epoch": 44.05, "learning_rate": 2.798082044275658e-05, "loss": 2.0219, "step": 15219500 }, { "epoch": 44.06, "learning_rate": 2.79800967951093e-05, "loss": 2.0175, "step": 15220000 }, { "epoch": 44.06, "learning_rate": 2.7979373147462024e-05, "loss": 2.0051, "step": 15220500 }, { "epoch": 44.06, "learning_rate": 2.7978650947110043e-05, "loss": 2.0242, "step": 15221000 }, { "epoch": 44.06, "learning_rate": 2.7977927299462765e-05, "loss": 2.0493, "step": 15221500 }, { "epoch": 44.06, "learning_rate": 2.7977203651815487e-05, "loss": 2.0055, "step": 15222000 }, { "epoch": 44.06, "learning_rate": 2.797648000416821e-05, "loss": 2.0175, "step": 15222500 }, { "epoch": 44.06, "learning_rate": 2.797575635652093e-05, "loss": 2.0179, "step": 15223000 }, { "epoch": 44.07, "learning_rate": 2.797503270887366e-05, "loss": 2.0057, "step": 15223500 }, { "epoch": 44.07, "learning_rate": 2.7974309061226383e-05, "loss": 2.0358, "step": 15224000 }, { "epoch": 44.07, "learning_rate": 2.7973585413579105e-05, "loss": 2.0064, "step": 15224500 }, { "epoch": 44.07, "learning_rate": 2.797286176593183e-05, "loss": 1.9895, "step": 15225000 }, { "epoch": 44.07, "learning_rate": 2.7972138118284553e-05, "loss": 2.0263, "step": 15225500 }, { "epoch": 44.07, "learning_rate": 2.7971414470637275e-05, "loss": 2.0012, "step": 15226000 }, { "epoch": 44.07, "learning_rate": 2.7970690822989997e-05, "loss": 2.0143, "step": 15226500 }, { "epoch": 44.08, "learning_rate": 2.796996717534272e-05, "loss": 1.9842, "step": 15227000 }, { "epoch": 44.08, "learning_rate": 2.7969243527695442e-05, "loss": 1.9813, "step": 15227500 }, { "epoch": 44.08, "learning_rate": 2.796852132734346e-05, "loss": 2.0244, "step": 15228000 }, { "epoch": 44.08, "learning_rate": 2.7967797679696183e-05, "loss": 2.0318, "step": 15228500 }, { "epoch": 44.08, "learning_rate": 2.7967075479344202e-05, "loss": 2.0235, "step": 15229000 }, { "epoch": 44.08, "learning_rate": 2.7966351831696924e-05, "loss": 2.0087, "step": 15229500 }, { "epoch": 44.08, "learning_rate": 2.7965628184049646e-05, "loss": 2.0067, "step": 15230000 }, { "epoch": 44.09, "learning_rate": 2.796490453640237e-05, "loss": 2.0404, "step": 15230500 }, { "epoch": 44.09, "learning_rate": 2.7964182336050394e-05, "loss": 2.0222, "step": 15231000 }, { "epoch": 44.09, "learning_rate": 2.7963458688403117e-05, "loss": 2.033, "step": 15231500 }, { "epoch": 44.09, "learning_rate": 2.796273504075584e-05, "loss": 2.0447, "step": 15232000 }, { "epoch": 44.09, "learning_rate": 2.796201139310856e-05, "loss": 2.0161, "step": 15232500 }, { "epoch": 44.09, "learning_rate": 2.7961287745461283e-05, "loss": 2.0099, "step": 15233000 }, { "epoch": 44.09, "learning_rate": 2.7960564097814006e-05, "loss": 2.0051, "step": 15233500 }, { "epoch": 44.1, "learning_rate": 2.795984045016673e-05, "loss": 2.0276, "step": 15234000 }, { "epoch": 44.1, "learning_rate": 2.7959116802519453e-05, "loss": 1.9939, "step": 15234500 }, { "epoch": 44.1, "learning_rate": 2.7958393154872176e-05, "loss": 2.0317, "step": 15235000 }, { "epoch": 44.1, "learning_rate": 2.7957670954520195e-05, "loss": 2.0083, "step": 15235500 }, { "epoch": 44.1, "learning_rate": 2.7956947306872917e-05, "loss": 1.9789, "step": 15236000 }, { "epoch": 44.1, "learning_rate": 2.7956225106520932e-05, "loss": 2.0415, "step": 15236500 }, { "epoch": 44.1, "learning_rate": 2.7955501458873658e-05, "loss": 2.0125, "step": 15237000 }, { "epoch": 44.11, "learning_rate": 2.795477781122638e-05, "loss": 2.0434, "step": 15237500 }, { "epoch": 44.11, "learning_rate": 2.7954054163579102e-05, "loss": 2.0453, "step": 15238000 }, { "epoch": 44.11, "learning_rate": 2.795333051593183e-05, "loss": 2.0381, "step": 15238500 }, { "epoch": 44.11, "learning_rate": 2.7952606868284554e-05, "loss": 2.0252, "step": 15239000 }, { "epoch": 44.11, "learning_rate": 2.7951883220637276e-05, "loss": 2.0146, "step": 15239500 }, { "epoch": 44.11, "learning_rate": 2.7951159572989998e-05, "loss": 2.0108, "step": 15240000 }, { "epoch": 44.12, "learning_rate": 2.795043592534272e-05, "loss": 2.0463, "step": 15240500 }, { "epoch": 44.12, "learning_rate": 2.7949712277695446e-05, "loss": 2.0133, "step": 15241000 }, { "epoch": 44.12, "learning_rate": 2.794899007734346e-05, "loss": 2.0249, "step": 15241500 }, { "epoch": 44.12, "learning_rate": 2.7948266429696184e-05, "loss": 1.9912, "step": 15242000 }, { "epoch": 44.12, "learning_rate": 2.7947544229344203e-05, "loss": 2.0128, "step": 15242500 }, { "epoch": 44.12, "learning_rate": 2.7946820581696925e-05, "loss": 2.0264, "step": 15243000 }, { "epoch": 44.12, "learning_rate": 2.7946096934049647e-05, "loss": 2.0107, "step": 15243500 }, { "epoch": 44.13, "learning_rate": 2.794537328640237e-05, "loss": 2.0288, "step": 15244000 }, { "epoch": 44.13, "learning_rate": 2.7944652533345682e-05, "loss": 2.0201, "step": 15244500 }, { "epoch": 44.13, "learning_rate": 2.7943928885698407e-05, "loss": 2.0198, "step": 15245000 }, { "epoch": 44.13, "learning_rate": 2.794320523805113e-05, "loss": 2.0175, "step": 15245500 }, { "epoch": 44.13, "learning_rate": 2.794248159040386e-05, "loss": 2.0258, "step": 15246000 }, { "epoch": 44.13, "learning_rate": 2.7941759390051874e-05, "loss": 2.0183, "step": 15246500 }, { "epoch": 44.13, "learning_rate": 2.7941035742404596e-05, "loss": 2.0374, "step": 15247000 }, { "epoch": 44.14, "learning_rate": 2.7940312094757322e-05, "loss": 2.0271, "step": 15247500 }, { "epoch": 44.14, "learning_rate": 2.7939588447110044e-05, "loss": 2.0027, "step": 15248000 }, { "epoch": 44.14, "learning_rate": 2.7938864799462766e-05, "loss": 2.0275, "step": 15248500 }, { "epoch": 44.14, "learning_rate": 2.793814115181549e-05, "loss": 2.0126, "step": 15249000 }, { "epoch": 44.14, "learning_rate": 2.793741750416821e-05, "loss": 2.0178, "step": 15249500 }, { "epoch": 44.14, "learning_rate": 2.793669530381623e-05, "loss": 2.0265, "step": 15250000 }, { "epoch": 44.14, "learning_rate": 2.7935971656168952e-05, "loss": 2.0051, "step": 15250500 }, { "epoch": 44.15, "learning_rate": 2.7935248008521674e-05, "loss": 2.0153, "step": 15251000 }, { "epoch": 44.15, "learning_rate": 2.7934524360874397e-05, "loss": 2.0367, "step": 15251500 }, { "epoch": 44.15, "learning_rate": 2.7933800713227122e-05, "loss": 2.0009, "step": 15252000 }, { "epoch": 44.15, "learning_rate": 2.7933077065579844e-05, "loss": 2.0183, "step": 15252500 }, { "epoch": 44.15, "learning_rate": 2.7932353417932567e-05, "loss": 2.0243, "step": 15253000 }, { "epoch": 44.15, "learning_rate": 2.7931629770285296e-05, "loss": 2.0292, "step": 15253500 }, { "epoch": 44.15, "learning_rate": 2.7930906122638018e-05, "loss": 2.0268, "step": 15254000 }, { "epoch": 44.16, "learning_rate": 2.793018247499074e-05, "loss": 2.0233, "step": 15254500 }, { "epoch": 44.16, "learning_rate": 2.7929458827343462e-05, "loss": 1.9949, "step": 15255000 }, { "epoch": 44.16, "learning_rate": 2.7928735179696185e-05, "loss": 2.0169, "step": 15255500 }, { "epoch": 44.16, "learning_rate": 2.792801153204891e-05, "loss": 2.0106, "step": 15256000 }, { "epoch": 44.16, "learning_rate": 2.7927287884401632e-05, "loss": 2.016, "step": 15256500 }, { "epoch": 44.16, "learning_rate": 2.7926564236754355e-05, "loss": 2.0158, "step": 15257000 }, { "epoch": 44.16, "learning_rate": 2.7925840589107077e-05, "loss": 2.0099, "step": 15257500 }, { "epoch": 44.17, "learning_rate": 2.7925118388755096e-05, "loss": 2.0234, "step": 15258000 }, { "epoch": 44.17, "learning_rate": 2.7924394741107818e-05, "loss": 2.0335, "step": 15258500 }, { "epoch": 44.17, "learning_rate": 2.792367109346054e-05, "loss": 2.0166, "step": 15259000 }, { "epoch": 44.17, "learning_rate": 2.7922947445813263e-05, "loss": 2.033, "step": 15259500 }, { "epoch": 44.17, "learning_rate": 2.7922223798165985e-05, "loss": 2.0284, "step": 15260000 }, { "epoch": 44.17, "learning_rate": 2.7921500150518714e-05, "loss": 2.0372, "step": 15260500 }, { "epoch": 44.17, "learning_rate": 2.7920776502871436e-05, "loss": 2.0268, "step": 15261000 }, { "epoch": 44.18, "learning_rate": 2.792005285522416e-05, "loss": 2.0131, "step": 15261500 }, { "epoch": 44.18, "learning_rate": 2.7919329207576884e-05, "loss": 2.03, "step": 15262000 }, { "epoch": 44.18, "learning_rate": 2.7918605559929606e-05, "loss": 2.0144, "step": 15262500 }, { "epoch": 44.18, "learning_rate": 2.7917883359577625e-05, "loss": 2.0196, "step": 15263000 }, { "epoch": 44.18, "learning_rate": 2.7917159711930347e-05, "loss": 2.017, "step": 15263500 }, { "epoch": 44.18, "learning_rate": 2.791643606428307e-05, "loss": 2.0145, "step": 15264000 }, { "epoch": 44.18, "learning_rate": 2.7915712416635792e-05, "loss": 2.0081, "step": 15264500 }, { "epoch": 44.19, "learning_rate": 2.7914988768988514e-05, "loss": 2.025, "step": 15265000 }, { "epoch": 44.19, "learning_rate": 2.7914265121341236e-05, "loss": 2.0249, "step": 15265500 }, { "epoch": 44.19, "learning_rate": 2.7913541473693962e-05, "loss": 2.0134, "step": 15266000 }, { "epoch": 44.19, "learning_rate": 2.7912819273341977e-05, "loss": 2.0154, "step": 15266500 }, { "epoch": 44.19, "learning_rate": 2.79120956256947e-05, "loss": 2.0316, "step": 15267000 }, { "epoch": 44.19, "learning_rate": 2.791137342534272e-05, "loss": 2.0092, "step": 15267500 }, { "epoch": 44.19, "learning_rate": 2.7910649777695448e-05, "loss": 2.04, "step": 15268000 }, { "epoch": 44.2, "learning_rate": 2.790992613004817e-05, "loss": 2.0049, "step": 15268500 }, { "epoch": 44.2, "learning_rate": 2.7909202482400892e-05, "loss": 2.0114, "step": 15269000 }, { "epoch": 44.2, "learning_rate": 2.7908478834753614e-05, "loss": 2.0479, "step": 15269500 }, { "epoch": 44.2, "learning_rate": 2.7907755187106337e-05, "loss": 2.0251, "step": 15270000 }, { "epoch": 44.2, "learning_rate": 2.7907031539459062e-05, "loss": 2.0042, "step": 15270500 }, { "epoch": 44.2, "learning_rate": 2.7906307891811784e-05, "loss": 2.0176, "step": 15271000 }, { "epoch": 44.2, "learning_rate": 2.7905584244164507e-05, "loss": 2.0235, "step": 15271500 }, { "epoch": 44.21, "learning_rate": 2.790486059651723e-05, "loss": 2.0126, "step": 15272000 }, { "epoch": 44.21, "learning_rate": 2.7904138396165248e-05, "loss": 2.0186, "step": 15272500 }, { "epoch": 44.21, "learning_rate": 2.790341474851797e-05, "loss": 2.0316, "step": 15273000 }, { "epoch": 44.21, "learning_rate": 2.7902691100870692e-05, "loss": 2.0198, "step": 15273500 }, { "epoch": 44.21, "learning_rate": 2.7901967453223415e-05, "loss": 2.0195, "step": 15274000 }, { "epoch": 44.21, "learning_rate": 2.7901245252871433e-05, "loss": 2.0159, "step": 15274500 }, { "epoch": 44.21, "learning_rate": 2.7900521605224156e-05, "loss": 2.0059, "step": 15275000 }, { "epoch": 44.22, "learning_rate": 2.7899797957576885e-05, "loss": 2.0086, "step": 15275500 }, { "epoch": 44.22, "learning_rate": 2.7899074309929607e-05, "loss": 2.0225, "step": 15276000 }, { "epoch": 44.22, "learning_rate": 2.7898352109577626e-05, "loss": 2.0312, "step": 15276500 }, { "epoch": 44.22, "learning_rate": 2.7897628461930348e-05, "loss": 2.0394, "step": 15277000 }, { "epoch": 44.22, "learning_rate": 2.7896906261578364e-05, "loss": 2.0265, "step": 15277500 }, { "epoch": 44.22, "learning_rate": 2.789618261393109e-05, "loss": 2.0142, "step": 15278000 }, { "epoch": 44.23, "learning_rate": 2.789545896628381e-05, "loss": 2.0287, "step": 15278500 }, { "epoch": 44.23, "learning_rate": 2.7894735318636534e-05, "loss": 2.0255, "step": 15279000 }, { "epoch": 44.23, "learning_rate": 2.789401311828455e-05, "loss": 2.0195, "step": 15279500 }, { "epoch": 44.23, "learning_rate": 2.7893289470637275e-05, "loss": 2.027, "step": 15280000 }, { "epoch": 44.23, "learning_rate": 2.7892565822989997e-05, "loss": 1.9916, "step": 15280500 }, { "epoch": 44.23, "learning_rate": 2.789184217534272e-05, "loss": 2.064, "step": 15281000 }, { "epoch": 44.23, "learning_rate": 2.789111852769544e-05, "loss": 2.0183, "step": 15281500 }, { "epoch": 44.24, "learning_rate": 2.7890394880048164e-05, "loss": 2.0155, "step": 15282000 }, { "epoch": 44.24, "learning_rate": 2.7889672679696183e-05, "loss": 2.04, "step": 15282500 }, { "epoch": 44.24, "learning_rate": 2.7888949032048912e-05, "loss": 2.0014, "step": 15283000 }, { "epoch": 44.24, "learning_rate": 2.7888225384401634e-05, "loss": 2.0259, "step": 15283500 }, { "epoch": 44.24, "learning_rate": 2.7887501736754356e-05, "loss": 2.0313, "step": 15284000 }, { "epoch": 44.24, "learning_rate": 2.788677808910708e-05, "loss": 2.0194, "step": 15284500 }, { "epoch": 44.24, "learning_rate": 2.78860544414598e-05, "loss": 2.0364, "step": 15285000 }, { "epoch": 44.25, "learning_rate": 2.788533224110782e-05, "loss": 2.0165, "step": 15285500 }, { "epoch": 44.25, "learning_rate": 2.7884608593460542e-05, "loss": 2.032, "step": 15286000 }, { "epoch": 44.25, "learning_rate": 2.7883884945813264e-05, "loss": 2.026, "step": 15286500 }, { "epoch": 44.25, "learning_rate": 2.788316129816599e-05, "loss": 2.0183, "step": 15287000 }, { "epoch": 44.25, "learning_rate": 2.7882439097814005e-05, "loss": 2.024, "step": 15287500 }, { "epoch": 44.25, "learning_rate": 2.7881715450166728e-05, "loss": 2.027, "step": 15288000 }, { "epoch": 44.25, "learning_rate": 2.7880991802519453e-05, "loss": 2.017, "step": 15288500 }, { "epoch": 44.26, "learning_rate": 2.7880268154872175e-05, "loss": 2.0175, "step": 15289000 }, { "epoch": 44.26, "learning_rate": 2.7879544507224898e-05, "loss": 2.0359, "step": 15289500 }, { "epoch": 44.26, "learning_rate": 2.7878822306872913e-05, "loss": 2.0056, "step": 15290000 }, { "epoch": 44.26, "learning_rate": 2.7878100106520932e-05, "loss": 2.0117, "step": 15290500 }, { "epoch": 44.26, "learning_rate": 2.787737645887366e-05, "loss": 2.0052, "step": 15291000 }, { "epoch": 44.26, "learning_rate": 2.7876654258521677e-05, "loss": 2.0074, "step": 15291500 }, { "epoch": 44.26, "learning_rate": 2.7875930610874402e-05, "loss": 2.0325, "step": 15292000 }, { "epoch": 44.27, "learning_rate": 2.7875206963227125e-05, "loss": 2.0248, "step": 15292500 }, { "epoch": 44.27, "learning_rate": 2.7874483315579847e-05, "loss": 2.0315, "step": 15293000 }, { "epoch": 44.27, "learning_rate": 2.787375966793257e-05, "loss": 2.0347, "step": 15293500 }, { "epoch": 44.27, "learning_rate": 2.787303602028529e-05, "loss": 2.0309, "step": 15294000 }, { "epoch": 44.27, "learning_rate": 2.7872312372638017e-05, "loss": 2.0448, "step": 15294500 }, { "epoch": 44.27, "learning_rate": 2.787158872499074e-05, "loss": 2.0297, "step": 15295000 }, { "epoch": 44.27, "learning_rate": 2.787086507734346e-05, "loss": 2.0033, "step": 15295500 }, { "epoch": 44.28, "learning_rate": 2.7870141429696184e-05, "loss": 2.037, "step": 15296000 }, { "epoch": 44.28, "learning_rate": 2.7869419229344203e-05, "loss": 2.0066, "step": 15296500 }, { "epoch": 44.28, "learning_rate": 2.7868695581696925e-05, "loss": 2.041, "step": 15297000 }, { "epoch": 44.28, "learning_rate": 2.7867971934049647e-05, "loss": 2.028, "step": 15297500 }, { "epoch": 44.28, "learning_rate": 2.786724828640237e-05, "loss": 2.043, "step": 15298000 }, { "epoch": 44.28, "learning_rate": 2.7866524638755098e-05, "loss": 2.042, "step": 15298500 }, { "epoch": 44.28, "learning_rate": 2.7865802438403117e-05, "loss": 2.004, "step": 15299000 }, { "epoch": 44.29, "learning_rate": 2.786507879075584e-05, "loss": 2.0251, "step": 15299500 }, { "epoch": 44.29, "learning_rate": 2.786435514310856e-05, "loss": 2.0135, "step": 15300000 }, { "epoch": 44.29, "learning_rate": 2.7863631495461284e-05, "loss": 1.998, "step": 15300500 }, { "epoch": 44.29, "learning_rate": 2.7862907847814006e-05, "loss": 2.0288, "step": 15301000 }, { "epoch": 44.29, "learning_rate": 2.786218420016673e-05, "loss": 2.0382, "step": 15301500 }, { "epoch": 44.29, "learning_rate": 2.7861460552519454e-05, "loss": 2.0279, "step": 15302000 }, { "epoch": 44.29, "learning_rate": 2.7860736904872176e-05, "loss": 2.0104, "step": 15302500 }, { "epoch": 44.3, "learning_rate": 2.78600132572249e-05, "loss": 2.0155, "step": 15303000 }, { "epoch": 44.3, "learning_rate": 2.785928960957762e-05, "loss": 2.029, "step": 15303500 }, { "epoch": 44.3, "learning_rate": 2.7858565961930343e-05, "loss": 2.0046, "step": 15304000 }, { "epoch": 44.3, "learning_rate": 2.785784231428307e-05, "loss": 2.0643, "step": 15304500 }, { "epoch": 44.3, "learning_rate": 2.7857120113931084e-05, "loss": 2.0255, "step": 15305000 }, { "epoch": 44.3, "learning_rate": 2.7856396466283813e-05, "loss": 2.0147, "step": 15305500 }, { "epoch": 44.3, "learning_rate": 2.7855672818636535e-05, "loss": 2.0143, "step": 15306000 }, { "epoch": 44.31, "learning_rate": 2.7854949170989258e-05, "loss": 2.0128, "step": 15306500 }, { "epoch": 44.31, "learning_rate": 2.785422552334198e-05, "loss": 1.999, "step": 15307000 }, { "epoch": 44.31, "learning_rate": 2.7853501875694705e-05, "loss": 2.0249, "step": 15307500 }, { "epoch": 44.31, "learning_rate": 2.7852778228047428e-05, "loss": 2.0329, "step": 15308000 }, { "epoch": 44.31, "learning_rate": 2.7852056027695443e-05, "loss": 2.0117, "step": 15308500 }, { "epoch": 44.31, "learning_rate": 2.785133238004817e-05, "loss": 2.0302, "step": 15309000 }, { "epoch": 44.31, "learning_rate": 2.785060873240089e-05, "loss": 2.0107, "step": 15309500 }, { "epoch": 44.32, "learning_rate": 2.7849885084753613e-05, "loss": 2.0314, "step": 15310000 }, { "epoch": 44.32, "learning_rate": 2.7849161437106336e-05, "loss": 2.0131, "step": 15310500 }, { "epoch": 44.32, "learning_rate": 2.7848437789459058e-05, "loss": 2.0086, "step": 15311000 }, { "epoch": 44.32, "learning_rate": 2.7847715589107077e-05, "loss": 2.0241, "step": 15311500 }, { "epoch": 44.32, "learning_rate": 2.78469919414598e-05, "loss": 2.0221, "step": 15312000 }, { "epoch": 44.32, "learning_rate": 2.784626829381252e-05, "loss": 2.032, "step": 15312500 }, { "epoch": 44.32, "learning_rate": 2.784554464616525e-05, "loss": 2.0186, "step": 15313000 }, { "epoch": 44.33, "learning_rate": 2.7844820998517972e-05, "loss": 2.0299, "step": 15313500 }, { "epoch": 44.33, "learning_rate": 2.7844097350870695e-05, "loss": 2.0375, "step": 15314000 }, { "epoch": 44.33, "learning_rate": 2.784337370322342e-05, "loss": 2.0207, "step": 15314500 }, { "epoch": 44.33, "learning_rate": 2.7842650055576143e-05, "loss": 2.0224, "step": 15315000 }, { "epoch": 44.33, "learning_rate": 2.7841927855224158e-05, "loss": 2.0187, "step": 15315500 }, { "epoch": 44.33, "learning_rate": 2.7841205654872177e-05, "loss": 2.0115, "step": 15316000 }, { "epoch": 44.33, "learning_rate": 2.78404820072249e-05, "loss": 2.0243, "step": 15316500 }, { "epoch": 44.34, "learning_rate": 2.783975835957762e-05, "loss": 2.0285, "step": 15317000 }, { "epoch": 44.34, "learning_rate": 2.7839034711930344e-05, "loss": 2.0122, "step": 15317500 }, { "epoch": 44.34, "learning_rate": 2.783831106428307e-05, "loss": 2.0449, "step": 15318000 }, { "epoch": 44.34, "learning_rate": 2.783758741663579e-05, "loss": 2.0147, "step": 15318500 }, { "epoch": 44.34, "learning_rate": 2.7836863768988514e-05, "loss": 2.0313, "step": 15319000 }, { "epoch": 44.34, "learning_rate": 2.7836140121341236e-05, "loss": 2.0144, "step": 15319500 }, { "epoch": 44.35, "learning_rate": 2.7835417920989255e-05, "loss": 2.0423, "step": 15320000 }, { "epoch": 44.35, "learning_rate": 2.7834694273341984e-05, "loss": 2.0124, "step": 15320500 }, { "epoch": 44.35, "learning_rate": 2.783397207299e-05, "loss": 2.0298, "step": 15321000 }, { "epoch": 44.35, "learning_rate": 2.7833248425342722e-05, "loss": 2.035, "step": 15321500 }, { "epoch": 44.35, "learning_rate": 2.7832524777695444e-05, "loss": 2.0091, "step": 15322000 }, { "epoch": 44.35, "learning_rate": 2.7831802577343463e-05, "loss": 2.0194, "step": 15322500 }, { "epoch": 44.35, "learning_rate": 2.7831080376991482e-05, "loss": 2.0239, "step": 15323000 }, { "epoch": 44.36, "learning_rate": 2.7830356729344204e-05, "loss": 2.0222, "step": 15323500 }, { "epoch": 44.36, "learning_rate": 2.7829633081696926e-05, "loss": 2.0538, "step": 15324000 }, { "epoch": 44.36, "learning_rate": 2.782890943404965e-05, "loss": 2.0065, "step": 15324500 }, { "epoch": 44.36, "learning_rate": 2.782818578640237e-05, "loss": 2.0098, "step": 15325000 }, { "epoch": 44.36, "learning_rate": 2.782746358605039e-05, "loss": 2.0524, "step": 15325500 }, { "epoch": 44.36, "learning_rate": 2.7826739938403112e-05, "loss": 2.0207, "step": 15326000 }, { "epoch": 44.36, "learning_rate": 2.7826016290755834e-05, "loss": 1.9872, "step": 15326500 }, { "epoch": 44.37, "learning_rate": 2.7825292643108556e-05, "loss": 2.0234, "step": 15327000 }, { "epoch": 44.37, "learning_rate": 2.7824568995461282e-05, "loss": 1.9998, "step": 15327500 }, { "epoch": 44.37, "learning_rate": 2.7823846795109298e-05, "loss": 2.0049, "step": 15328000 }, { "epoch": 44.37, "learning_rate": 2.7823123147462027e-05, "loss": 2.0349, "step": 15328500 }, { "epoch": 44.37, "learning_rate": 2.782239949981475e-05, "loss": 2.0068, "step": 15329000 }, { "epoch": 44.37, "learning_rate": 2.782167585216747e-05, "loss": 2.0199, "step": 15329500 }, { "epoch": 44.37, "learning_rate": 2.7820952204520197e-05, "loss": 2.0386, "step": 15330000 }, { "epoch": 44.38, "learning_rate": 2.782022855687292e-05, "loss": 2.0121, "step": 15330500 }, { "epoch": 44.38, "learning_rate": 2.781950490922564e-05, "loss": 2.0246, "step": 15331000 }, { "epoch": 44.38, "learning_rate": 2.7818781261578363e-05, "loss": 2.0267, "step": 15331500 }, { "epoch": 44.38, "learning_rate": 2.7818057613931086e-05, "loss": 2.0044, "step": 15332000 }, { "epoch": 44.38, "learning_rate": 2.7817335413579105e-05, "loss": 2.0221, "step": 15332500 }, { "epoch": 44.38, "learning_rate": 2.7816611765931827e-05, "loss": 2.0098, "step": 15333000 }, { "epoch": 44.38, "learning_rate": 2.781588811828455e-05, "loss": 2.0247, "step": 15333500 }, { "epoch": 44.39, "learning_rate": 2.781516447063727e-05, "loss": 2.0177, "step": 15334000 }, { "epoch": 44.39, "learning_rate": 2.7814440822989997e-05, "loss": 2.0078, "step": 15334500 }, { "epoch": 44.39, "learning_rate": 2.781371717534272e-05, "loss": 2.0085, "step": 15335000 }, { "epoch": 44.39, "learning_rate": 2.7812993527695448e-05, "loss": 2.0265, "step": 15335500 }, { "epoch": 44.39, "learning_rate": 2.781226988004817e-05, "loss": 2.0375, "step": 15336000 }, { "epoch": 44.39, "learning_rate": 2.7811546232400893e-05, "loss": 2.025, "step": 15336500 }, { "epoch": 44.39, "learning_rate": 2.7810822584753615e-05, "loss": 2.0382, "step": 15337000 }, { "epoch": 44.4, "learning_rate": 2.7810098937106337e-05, "loss": 2.0318, "step": 15337500 }, { "epoch": 44.4, "learning_rate": 2.780937528945906e-05, "loss": 2.0034, "step": 15338000 }, { "epoch": 44.4, "learning_rate": 2.7808651641811785e-05, "loss": 1.9948, "step": 15338500 }, { "epoch": 44.4, "learning_rate": 2.7807927994164507e-05, "loss": 2.0067, "step": 15339000 }, { "epoch": 44.4, "learning_rate": 2.780720434651723e-05, "loss": 2.0151, "step": 15339500 }, { "epoch": 44.4, "learning_rate": 2.780648069886995e-05, "loss": 2.0058, "step": 15340000 }, { "epoch": 44.4, "learning_rate": 2.7805757051222674e-05, "loss": 1.9883, "step": 15340500 }, { "epoch": 44.41, "learning_rate": 2.78050334035754e-05, "loss": 2.04, "step": 15341000 }, { "epoch": 44.41, "learning_rate": 2.7804309755928122e-05, "loss": 2.0062, "step": 15341500 }, { "epoch": 44.41, "learning_rate": 2.7803587555576137e-05, "loss": 2.0356, "step": 15342000 }, { "epoch": 44.41, "learning_rate": 2.7802863907928866e-05, "loss": 2.0251, "step": 15342500 }, { "epoch": 44.41, "learning_rate": 2.780214026028159e-05, "loss": 2.0356, "step": 15343000 }, { "epoch": 44.41, "learning_rate": 2.780141661263431e-05, "loss": 2.0173, "step": 15343500 }, { "epoch": 44.41, "learning_rate": 2.7800692964987036e-05, "loss": 2.0264, "step": 15344000 }, { "epoch": 44.42, "learning_rate": 2.779996931733976e-05, "loss": 2.037, "step": 15344500 }, { "epoch": 44.42, "learning_rate": 2.779924566969248e-05, "loss": 2.0251, "step": 15345000 }, { "epoch": 44.42, "learning_rate": 2.77985234693405e-05, "loss": 2.0296, "step": 15345500 }, { "epoch": 44.42, "learning_rate": 2.7797802716283812e-05, "loss": 2.0287, "step": 15346000 }, { "epoch": 44.42, "learning_rate": 2.7797079068636534e-05, "loss": 2.0435, "step": 15346500 }, { "epoch": 44.42, "learning_rate": 2.7796355420989257e-05, "loss": 2.0207, "step": 15347000 }, { "epoch": 44.42, "learning_rate": 2.779563177334198e-05, "loss": 2.0247, "step": 15347500 }, { "epoch": 44.43, "learning_rate": 2.77949081256947e-05, "loss": 2.0232, "step": 15348000 }, { "epoch": 44.43, "learning_rate": 2.7794184478047423e-05, "loss": 2.0163, "step": 15348500 }, { "epoch": 44.43, "learning_rate": 2.779346083040015e-05, "loss": 2.0282, "step": 15349000 }, { "epoch": 44.43, "learning_rate": 2.779273718275287e-05, "loss": 2.0148, "step": 15349500 }, { "epoch": 44.43, "learning_rate": 2.7792013535105593e-05, "loss": 2.0152, "step": 15350000 }, { "epoch": 44.43, "learning_rate": 2.7791289887458322e-05, "loss": 2.0364, "step": 15350500 }, { "epoch": 44.43, "learning_rate": 2.7790566239811045e-05, "loss": 2.0257, "step": 15351000 }, { "epoch": 44.44, "learning_rate": 2.7789842592163767e-05, "loss": 2.0317, "step": 15351500 }, { "epoch": 44.44, "learning_rate": 2.778911894451649e-05, "loss": 2.0397, "step": 15352000 }, { "epoch": 44.44, "learning_rate": 2.7788396744164508e-05, "loss": 2.0236, "step": 15352500 }, { "epoch": 44.44, "learning_rate": 2.778767309651723e-05, "loss": 2.022, "step": 15353000 }, { "epoch": 44.44, "learning_rate": 2.7786949448869952e-05, "loss": 2.0098, "step": 15353500 }, { "epoch": 44.44, "learning_rate": 2.7786225801222675e-05, "loss": 2.0197, "step": 15354000 }, { "epoch": 44.44, "learning_rate": 2.7785503600870694e-05, "loss": 2.0173, "step": 15354500 }, { "epoch": 44.45, "learning_rate": 2.7784781400518713e-05, "loss": 2.0226, "step": 15355000 }, { "epoch": 44.45, "learning_rate": 2.7784057752871435e-05, "loss": 2.0129, "step": 15355500 }, { "epoch": 44.45, "learning_rate": 2.7783334105224157e-05, "loss": 2.0396, "step": 15356000 }, { "epoch": 44.45, "learning_rate": 2.778261045757688e-05, "loss": 2.0172, "step": 15356500 }, { "epoch": 44.45, "learning_rate": 2.77818868099296e-05, "loss": 2.0165, "step": 15357000 }, { "epoch": 44.45, "learning_rate": 2.7781163162282324e-05, "loss": 2.0269, "step": 15357500 }, { "epoch": 44.46, "learning_rate": 2.778044096193035e-05, "loss": 2.0306, "step": 15358000 }, { "epoch": 44.46, "learning_rate": 2.7779717314283072e-05, "loss": 2.0379, "step": 15358500 }, { "epoch": 44.46, "learning_rate": 2.7778993666635794e-05, "loss": 2.0098, "step": 15359000 }, { "epoch": 44.46, "learning_rate": 2.7778270018988516e-05, "loss": 2.0068, "step": 15359500 }, { "epoch": 44.46, "learning_rate": 2.777754637134124e-05, "loss": 2.034, "step": 15360000 }, { "epoch": 44.46, "learning_rate": 2.7776822723693964e-05, "loss": 2.0434, "step": 15360500 }, { "epoch": 44.46, "learning_rate": 2.7776099076046686e-05, "loss": 2.0208, "step": 15361000 }, { "epoch": 44.47, "learning_rate": 2.777537542839941e-05, "loss": 2.0284, "step": 15361500 }, { "epoch": 44.47, "learning_rate": 2.777465178075213e-05, "loss": 2.0276, "step": 15362000 }, { "epoch": 44.47, "learning_rate": 2.777392958040015e-05, "loss": 2.0211, "step": 15362500 }, { "epoch": 44.47, "learning_rate": 2.7773205932752872e-05, "loss": 2.0469, "step": 15363000 }, { "epoch": 44.47, "learning_rate": 2.7772482285105594e-05, "loss": 2.0327, "step": 15363500 }, { "epoch": 44.47, "learning_rate": 2.7771760084753613e-05, "loss": 2.0076, "step": 15364000 }, { "epoch": 44.47, "learning_rate": 2.777103788440163e-05, "loss": 2.0418, "step": 15364500 }, { "epoch": 44.48, "learning_rate": 2.777031423675435e-05, "loss": 2.0387, "step": 15365000 }, { "epoch": 44.48, "learning_rate": 2.776959058910708e-05, "loss": 2.0352, "step": 15365500 }, { "epoch": 44.48, "learning_rate": 2.7768866941459802e-05, "loss": 2.0342, "step": 15366000 }, { "epoch": 44.48, "learning_rate": 2.7768143293812528e-05, "loss": 2.0271, "step": 15366500 }, { "epoch": 44.48, "learning_rate": 2.776741964616525e-05, "loss": 2.0325, "step": 15367000 }, { "epoch": 44.48, "learning_rate": 2.7766695998517972e-05, "loss": 2.0051, "step": 15367500 }, { "epoch": 44.48, "learning_rate": 2.7765972350870694e-05, "loss": 2.0193, "step": 15368000 }, { "epoch": 44.49, "learning_rate": 2.7765248703223417e-05, "loss": 2.0475, "step": 15368500 }, { "epoch": 44.49, "learning_rate": 2.776452505557614e-05, "loss": 2.0041, "step": 15369000 }, { "epoch": 44.49, "learning_rate": 2.7763802855224158e-05, "loss": 2.0245, "step": 15369500 }, { "epoch": 44.49, "learning_rate": 2.776307920757688e-05, "loss": 2.0492, "step": 15370000 }, { "epoch": 44.49, "learning_rate": 2.7762355559929602e-05, "loss": 2.0266, "step": 15370500 }, { "epoch": 44.49, "learning_rate": 2.7761631912282328e-05, "loss": 2.0276, "step": 15371000 }, { "epoch": 44.49, "learning_rate": 2.776090826463505e-05, "loss": 2.0181, "step": 15371500 }, { "epoch": 44.5, "learning_rate": 2.7760186064283066e-05, "loss": 2.0176, "step": 15372000 }, { "epoch": 44.5, "learning_rate": 2.775946241663579e-05, "loss": 2.0343, "step": 15372500 }, { "epoch": 44.5, "learning_rate": 2.7758738768988517e-05, "loss": 2.0079, "step": 15373000 }, { "epoch": 44.5, "learning_rate": 2.7758015121341243e-05, "loss": 2.0467, "step": 15373500 }, { "epoch": 44.5, "learning_rate": 2.7757291473693965e-05, "loss": 2.0438, "step": 15374000 }, { "epoch": 44.5, "learning_rate": 2.7756567826046687e-05, "loss": 2.0245, "step": 15374500 }, { "epoch": 44.5, "learning_rate": 2.775584417839941e-05, "loss": 2.0256, "step": 15375000 }, { "epoch": 44.51, "learning_rate": 2.7755121978047428e-05, "loss": 2.0467, "step": 15375500 }, { "epoch": 44.51, "learning_rate": 2.775439833040015e-05, "loss": 2.0109, "step": 15376000 }, { "epoch": 44.51, "learning_rate": 2.7753674682752873e-05, "loss": 2.0127, "step": 15376500 }, { "epoch": 44.51, "learning_rate": 2.7752951035105595e-05, "loss": 2.0171, "step": 15377000 }, { "epoch": 44.51, "learning_rate": 2.7752227387458317e-05, "loss": 2.0383, "step": 15377500 }, { "epoch": 44.51, "learning_rate": 2.7751503739811043e-05, "loss": 2.0601, "step": 15378000 }, { "epoch": 44.51, "learning_rate": 2.7750780092163765e-05, "loss": 2.0324, "step": 15378500 }, { "epoch": 44.52, "learning_rate": 2.7750056444516487e-05, "loss": 1.9837, "step": 15379000 }, { "epoch": 44.52, "learning_rate": 2.774933279686921e-05, "loss": 2.0369, "step": 15379500 }, { "epoch": 44.52, "learning_rate": 2.774861059651723e-05, "loss": 2.016, "step": 15380000 }, { "epoch": 44.52, "learning_rate": 2.774788839616525e-05, "loss": 2.0227, "step": 15380500 }, { "epoch": 44.52, "learning_rate": 2.7747164748517973e-05, "loss": 1.993, "step": 15381000 }, { "epoch": 44.52, "learning_rate": 2.7746441100870695e-05, "loss": 2.0341, "step": 15381500 }, { "epoch": 44.52, "learning_rate": 2.7745717453223417e-05, "loss": 2.0057, "step": 15382000 }, { "epoch": 44.53, "learning_rate": 2.7744993805576143e-05, "loss": 2.0314, "step": 15382500 }, { "epoch": 44.53, "learning_rate": 2.7744270157928865e-05, "loss": 2.0241, "step": 15383000 }, { "epoch": 44.53, "learning_rate": 2.7743546510281588e-05, "loss": 2.0218, "step": 15383500 }, { "epoch": 44.53, "learning_rate": 2.774282286263431e-05, "loss": 2.0287, "step": 15384000 }, { "epoch": 44.53, "learning_rate": 2.7742099214987032e-05, "loss": 2.0239, "step": 15384500 }, { "epoch": 44.53, "learning_rate": 2.774137701463505e-05, "loss": 2.0351, "step": 15385000 }, { "epoch": 44.53, "learning_rate": 2.7740656261578363e-05, "loss": 2.0405, "step": 15385500 }, { "epoch": 44.54, "learning_rate": 2.7739932613931085e-05, "loss": 2.0294, "step": 15386000 }, { "epoch": 44.54, "learning_rate": 2.7739210413579104e-05, "loss": 2.0316, "step": 15386500 }, { "epoch": 44.54, "learning_rate": 2.7738486765931827e-05, "loss": 2.0271, "step": 15387000 }, { "epoch": 44.54, "learning_rate": 2.773776311828455e-05, "loss": 2.0034, "step": 15387500 }, { "epoch": 44.54, "learning_rate": 2.7737039470637278e-05, "loss": 2.0186, "step": 15388000 }, { "epoch": 44.54, "learning_rate": 2.773631582299e-05, "loss": 2.025, "step": 15388500 }, { "epoch": 44.54, "learning_rate": 2.7735592175342722e-05, "loss": 2.0371, "step": 15389000 }, { "epoch": 44.55, "learning_rate": 2.7734868527695445e-05, "loss": 2.0261, "step": 15389500 }, { "epoch": 44.55, "learning_rate": 2.7734144880048167e-05, "loss": 2.0086, "step": 15390000 }, { "epoch": 44.55, "learning_rate": 2.7733421232400892e-05, "loss": 2.0366, "step": 15390500 }, { "epoch": 44.55, "learning_rate": 2.7732697584753615e-05, "loss": 2.0196, "step": 15391000 }, { "epoch": 44.55, "learning_rate": 2.7731973937106337e-05, "loss": 2.0405, "step": 15391500 }, { "epoch": 44.55, "learning_rate": 2.773125028945906e-05, "loss": 2.0009, "step": 15392000 }, { "epoch": 44.55, "learning_rate": 2.773052664181178e-05, "loss": 2.0245, "step": 15392500 }, { "epoch": 44.56, "learning_rate": 2.7729802994164507e-05, "loss": 2.0314, "step": 15393000 }, { "epoch": 44.56, "learning_rate": 2.772907934651723e-05, "loss": 2.0416, "step": 15393500 }, { "epoch": 44.56, "learning_rate": 2.772835569886995e-05, "loss": 2.0238, "step": 15394000 }, { "epoch": 44.56, "learning_rate": 2.7727632051222674e-05, "loss": 2.0244, "step": 15394500 }, { "epoch": 44.56, "learning_rate": 2.7726909850870693e-05, "loss": 2.0237, "step": 15395000 }, { "epoch": 44.56, "learning_rate": 2.7726187650518715e-05, "loss": 2.0221, "step": 15395500 }, { "epoch": 44.57, "learning_rate": 2.7725464002871437e-05, "loss": 2.0026, "step": 15396000 }, { "epoch": 44.57, "learning_rate": 2.772474035522416e-05, "loss": 2.0574, "step": 15396500 }, { "epoch": 44.57, "learning_rate": 2.7724016707576882e-05, "loss": 2.0411, "step": 15397000 }, { "epoch": 44.57, "learning_rate": 2.7723293059929607e-05, "loss": 2.0346, "step": 15397500 }, { "epoch": 44.57, "learning_rate": 2.772256941228233e-05, "loss": 2.0221, "step": 15398000 }, { "epoch": 44.57, "learning_rate": 2.7721845764635052e-05, "loss": 2.0388, "step": 15398500 }, { "epoch": 44.57, "learning_rate": 2.772112356428307e-05, "loss": 2.042, "step": 15399000 }, { "epoch": 44.58, "learning_rate": 2.7720399916635793e-05, "loss": 2.0408, "step": 15399500 }, { "epoch": 44.58, "learning_rate": 2.7719676268988515e-05, "loss": 2.0275, "step": 15400000 }, { "epoch": 44.58, "learning_rate": 2.7718952621341237e-05, "loss": 2.0247, "step": 15400500 }, { "epoch": 44.58, "learning_rate": 2.771822897369396e-05, "loss": 2.0378, "step": 15401000 }, { "epoch": 44.58, "learning_rate": 2.7717505326046682e-05, "loss": 2.0399, "step": 15401500 }, { "epoch": 44.58, "learning_rate": 2.7716781678399408e-05, "loss": 2.0303, "step": 15402000 }, { "epoch": 44.58, "learning_rate": 2.7716058030752133e-05, "loss": 2.0014, "step": 15402500 }, { "epoch": 44.59, "learning_rate": 2.771533438310486e-05, "loss": 2.0562, "step": 15403000 }, { "epoch": 44.59, "learning_rate": 2.771461073545758e-05, "loss": 2.0335, "step": 15403500 }, { "epoch": 44.59, "learning_rate": 2.7713887087810303e-05, "loss": 2.0232, "step": 15404000 }, { "epoch": 44.59, "learning_rate": 2.7713164887458322e-05, "loss": 2.0321, "step": 15404500 }, { "epoch": 44.59, "learning_rate": 2.7712442687106338e-05, "loss": 2.0064, "step": 15405000 }, { "epoch": 44.59, "learning_rate": 2.771171903945906e-05, "loss": 2.007, "step": 15405500 }, { "epoch": 44.59, "learning_rate": 2.7710995391811782e-05, "loss": 2.0206, "step": 15406000 }, { "epoch": 44.6, "learning_rate": 2.7710271744164508e-05, "loss": 2.0148, "step": 15406500 }, { "epoch": 44.6, "learning_rate": 2.770954809651723e-05, "loss": 2.0216, "step": 15407000 }, { "epoch": 44.6, "learning_rate": 2.7708824448869952e-05, "loss": 2.0292, "step": 15407500 }, { "epoch": 44.6, "learning_rate": 2.7708100801222675e-05, "loss": 2.0118, "step": 15408000 }, { "epoch": 44.6, "learning_rate": 2.7707377153575397e-05, "loss": 2.0229, "step": 15408500 }, { "epoch": 44.6, "learning_rate": 2.770665640051871e-05, "loss": 2.0192, "step": 15409000 }, { "epoch": 44.6, "learning_rate": 2.7705932752871435e-05, "loss": 2.0167, "step": 15409500 }, { "epoch": 44.61, "learning_rate": 2.7705209105224157e-05, "loss": 2.0329, "step": 15410000 }, { "epoch": 44.61, "learning_rate": 2.7704485457576882e-05, "loss": 2.0161, "step": 15410500 }, { "epoch": 44.61, "learning_rate": 2.7703761809929608e-05, "loss": 2.0203, "step": 15411000 }, { "epoch": 44.61, "learning_rate": 2.7703039609577624e-05, "loss": 2.0215, "step": 15411500 }, { "epoch": 44.61, "learning_rate": 2.7702315961930346e-05, "loss": 2.0136, "step": 15412000 }, { "epoch": 44.61, "learning_rate": 2.770159231428307e-05, "loss": 2.0249, "step": 15412500 }, { "epoch": 44.61, "learning_rate": 2.7700868666635794e-05, "loss": 2.0392, "step": 15413000 }, { "epoch": 44.62, "learning_rate": 2.7700145018988516e-05, "loss": 2.0239, "step": 15413500 }, { "epoch": 44.62, "learning_rate": 2.7699422818636535e-05, "loss": 2.0215, "step": 15414000 }, { "epoch": 44.62, "learning_rate": 2.7698699170989257e-05, "loss": 2.0367, "step": 15414500 }, { "epoch": 44.62, "learning_rate": 2.769797552334198e-05, "loss": 2.0367, "step": 15415000 }, { "epoch": 44.62, "learning_rate": 2.76972518756947e-05, "loss": 2.0218, "step": 15415500 }, { "epoch": 44.62, "learning_rate": 2.7696528228047424e-05, "loss": 1.998, "step": 15416000 }, { "epoch": 44.62, "learning_rate": 2.7695804580400146e-05, "loss": 2.0639, "step": 15416500 }, { "epoch": 44.63, "learning_rate": 2.7695080932752872e-05, "loss": 2.0026, "step": 15417000 }, { "epoch": 44.63, "learning_rate": 2.7694358732400887e-05, "loss": 2.043, "step": 15417500 }, { "epoch": 44.63, "learning_rate": 2.7693635084753616e-05, "loss": 2.0346, "step": 15418000 }, { "epoch": 44.63, "learning_rate": 2.769291143710634e-05, "loss": 2.0413, "step": 15418500 }, { "epoch": 44.63, "learning_rate": 2.769218778945906e-05, "loss": 2.0251, "step": 15419000 }, { "epoch": 44.63, "learning_rate": 2.7691464141811786e-05, "loss": 2.0379, "step": 15419500 }, { "epoch": 44.63, "learning_rate": 2.769074049416451e-05, "loss": 2.013, "step": 15420000 }, { "epoch": 44.64, "learning_rate": 2.769001684651723e-05, "loss": 2.0257, "step": 15420500 }, { "epoch": 44.64, "learning_rate": 2.7689294646165246e-05, "loss": 2.0228, "step": 15421000 }, { "epoch": 44.64, "learning_rate": 2.7688570998517972e-05, "loss": 2.0316, "step": 15421500 }, { "epoch": 44.64, "learning_rate": 2.7687847350870694e-05, "loss": 2.03, "step": 15422000 }, { "epoch": 44.64, "learning_rate": 2.7687123703223416e-05, "loss": 2.0258, "step": 15422500 }, { "epoch": 44.64, "learning_rate": 2.768640005557614e-05, "loss": 2.0258, "step": 15423000 }, { "epoch": 44.64, "learning_rate": 2.768567640792886e-05, "loss": 2.022, "step": 15423500 }, { "epoch": 44.65, "learning_rate": 2.7684952760281587e-05, "loss": 2.0118, "step": 15424000 }, { "epoch": 44.65, "learning_rate": 2.7684230559929602e-05, "loss": 2.0348, "step": 15424500 }, { "epoch": 44.65, "learning_rate": 2.7683506912282324e-05, "loss": 2.0062, "step": 15425000 }, { "epoch": 44.65, "learning_rate": 2.7682783264635053e-05, "loss": 2.0211, "step": 15425500 }, { "epoch": 44.65, "learning_rate": 2.7682059616987776e-05, "loss": 2.0404, "step": 15426000 }, { "epoch": 44.65, "learning_rate": 2.7681335969340498e-05, "loss": 2.0398, "step": 15426500 }, { "epoch": 44.65, "learning_rate": 2.7680612321693223e-05, "loss": 2.0363, "step": 15427000 }, { "epoch": 44.66, "learning_rate": 2.7679888674045946e-05, "loss": 2.0263, "step": 15427500 }, { "epoch": 44.66, "learning_rate": 2.7679165026398668e-05, "loss": 2.0629, "step": 15428000 }, { "epoch": 44.66, "learning_rate": 2.767844137875139e-05, "loss": 2.035, "step": 15428500 }, { "epoch": 44.66, "learning_rate": 2.7677717731104112e-05, "loss": 2.0238, "step": 15429000 }, { "epoch": 44.66, "learning_rate": 2.767699553075213e-05, "loss": 2.0454, "step": 15429500 }, { "epoch": 44.66, "learning_rate": 2.7676271883104854e-05, "loss": 2.0443, "step": 15430000 }, { "epoch": 44.66, "learning_rate": 2.7675548235457576e-05, "loss": 2.0205, "step": 15430500 }, { "epoch": 44.67, "learning_rate": 2.7674824587810298e-05, "loss": 2.0248, "step": 15431000 }, { "epoch": 44.67, "learning_rate": 2.7674100940163024e-05, "loss": 2.0287, "step": 15431500 }, { "epoch": 44.67, "learning_rate": 2.767337873981104e-05, "loss": 2.0165, "step": 15432000 }, { "epoch": 44.67, "learning_rate": 2.7672655092163768e-05, "loss": 2.0239, "step": 15432500 }, { "epoch": 44.67, "learning_rate": 2.767193144451649e-05, "loss": 2.0353, "step": 15433000 }, { "epoch": 44.67, "learning_rate": 2.7671207796869213e-05, "loss": 2.044, "step": 15433500 }, { "epoch": 44.68, "learning_rate": 2.767048414922194e-05, "loss": 2.0304, "step": 15434000 }, { "epoch": 44.68, "learning_rate": 2.766976050157466e-05, "loss": 2.0372, "step": 15434500 }, { "epoch": 44.68, "learning_rate": 2.7669036853927383e-05, "loss": 2.0035, "step": 15435000 }, { "epoch": 44.68, "learning_rate": 2.7668316100870695e-05, "loss": 2.018, "step": 15435500 }, { "epoch": 44.68, "learning_rate": 2.7667592453223417e-05, "loss": 2.0405, "step": 15436000 }, { "epoch": 44.68, "learning_rate": 2.766686880557614e-05, "loss": 2.018, "step": 15436500 }, { "epoch": 44.68, "learning_rate": 2.7666145157928862e-05, "loss": 2.0275, "step": 15437000 }, { "epoch": 44.69, "learning_rate": 2.7665421510281587e-05, "loss": 2.0375, "step": 15437500 }, { "epoch": 44.69, "learning_rate": 2.766469786263431e-05, "loss": 2.0209, "step": 15438000 }, { "epoch": 44.69, "learning_rate": 2.7663977109577622e-05, "loss": 2.0195, "step": 15438500 }, { "epoch": 44.69, "learning_rate": 2.7663253461930344e-05, "loss": 2.0324, "step": 15439000 }, { "epoch": 44.69, "learning_rate": 2.7662529814283066e-05, "loss": 2.0263, "step": 15439500 }, { "epoch": 44.69, "learning_rate": 2.766180616663579e-05, "loss": 2.0231, "step": 15440000 }, { "epoch": 44.69, "learning_rate": 2.7661082518988518e-05, "loss": 2.0347, "step": 15440500 }, { "epoch": 44.7, "learning_rate": 2.766035887134124e-05, "loss": 2.0447, "step": 15441000 }, { "epoch": 44.7, "learning_rate": 2.7659635223693965e-05, "loss": 2.0205, "step": 15441500 }, { "epoch": 44.7, "learning_rate": 2.7658911576046688e-05, "loss": 2.0461, "step": 15442000 }, { "epoch": 44.7, "learning_rate": 2.765818792839941e-05, "loss": 2.0199, "step": 15442500 }, { "epoch": 44.7, "learning_rate": 2.7657464280752132e-05, "loss": 2.0215, "step": 15443000 }, { "epoch": 44.7, "learning_rate": 2.7656740633104854e-05, "loss": 2.0443, "step": 15443500 }, { "epoch": 44.7, "learning_rate": 2.7656016985457577e-05, "loss": 2.0124, "step": 15444000 }, { "epoch": 44.71, "learning_rate": 2.7655294785105596e-05, "loss": 2.0371, "step": 15444500 }, { "epoch": 44.71, "learning_rate": 2.7654572584753614e-05, "loss": 2.0312, "step": 15445000 }, { "epoch": 44.71, "learning_rate": 2.7653848937106337e-05, "loss": 2.0482, "step": 15445500 }, { "epoch": 44.71, "learning_rate": 2.765312528945906e-05, "loss": 2.0246, "step": 15446000 }, { "epoch": 44.71, "learning_rate": 2.765240164181178e-05, "loss": 2.0189, "step": 15446500 }, { "epoch": 44.71, "learning_rate": 2.7651677994164503e-05, "loss": 2.0163, "step": 15447000 }, { "epoch": 44.71, "learning_rate": 2.7650954346517226e-05, "loss": 2.0187, "step": 15447500 }, { "epoch": 44.72, "learning_rate": 2.7650230698869955e-05, "loss": 2.0338, "step": 15448000 }, { "epoch": 44.72, "learning_rate": 2.7649508498517974e-05, "loss": 2.0202, "step": 15448500 }, { "epoch": 44.72, "learning_rate": 2.7648784850870696e-05, "loss": 2.0456, "step": 15449000 }, { "epoch": 44.72, "learning_rate": 2.7648061203223418e-05, "loss": 2.0529, "step": 15449500 }, { "epoch": 44.72, "learning_rate": 2.7647339002871437e-05, "loss": 2.0123, "step": 15450000 }, { "epoch": 44.72, "learning_rate": 2.764661535522416e-05, "loss": 2.0485, "step": 15450500 }, { "epoch": 44.72, "learning_rate": 2.764589170757688e-05, "loss": 2.0424, "step": 15451000 }, { "epoch": 44.73, "learning_rate": 2.7645168059929604e-05, "loss": 2.0491, "step": 15451500 }, { "epoch": 44.73, "learning_rate": 2.7644444412282326e-05, "loss": 2.0359, "step": 15452000 }, { "epoch": 44.73, "learning_rate": 2.764372076463505e-05, "loss": 2.0311, "step": 15452500 }, { "epoch": 44.73, "learning_rate": 2.7642997116987774e-05, "loss": 2.0259, "step": 15453000 }, { "epoch": 44.73, "learning_rate": 2.7642273469340496e-05, "loss": 2.0095, "step": 15453500 }, { "epoch": 44.73, "learning_rate": 2.7641549821693218e-05, "loss": 2.035, "step": 15454000 }, { "epoch": 44.73, "learning_rate": 2.764082617404594e-05, "loss": 2.0398, "step": 15454500 }, { "epoch": 44.74, "learning_rate": 2.764010397369396e-05, "loss": 2.0426, "step": 15455000 }, { "epoch": 44.74, "learning_rate": 2.763938032604669e-05, "loss": 2.0299, "step": 15455500 }, { "epoch": 44.74, "learning_rate": 2.7638658125694704e-05, "loss": 2.0198, "step": 15456000 }, { "epoch": 44.74, "learning_rate": 2.7637935925342723e-05, "loss": 2.0222, "step": 15456500 }, { "epoch": 44.74, "learning_rate": 2.7637212277695445e-05, "loss": 2.0297, "step": 15457000 }, { "epoch": 44.74, "learning_rate": 2.7636488630048167e-05, "loss": 2.0313, "step": 15457500 }, { "epoch": 44.74, "learning_rate": 2.763576498240089e-05, "loss": 2.0074, "step": 15458000 }, { "epoch": 44.75, "learning_rate": 2.7635041334753615e-05, "loss": 2.0038, "step": 15458500 }, { "epoch": 44.75, "learning_rate": 2.7634317687106338e-05, "loss": 2.0247, "step": 15459000 }, { "epoch": 44.75, "learning_rate": 2.763359403945906e-05, "loss": 2.0414, "step": 15459500 }, { "epoch": 44.75, "learning_rate": 2.7632870391811782e-05, "loss": 2.0553, "step": 15460000 }, { "epoch": 44.75, "learning_rate": 2.7632146744164504e-05, "loss": 2.0353, "step": 15460500 }, { "epoch": 44.75, "learning_rate": 2.763142309651723e-05, "loss": 2.0196, "step": 15461000 }, { "epoch": 44.75, "learning_rate": 2.7630699448869952e-05, "loss": 2.0052, "step": 15461500 }, { "epoch": 44.76, "learning_rate": 2.7629975801222674e-05, "loss": 2.0196, "step": 15462000 }, { "epoch": 44.76, "learning_rate": 2.7629252153575403e-05, "loss": 2.019, "step": 15462500 }, { "epoch": 44.76, "learning_rate": 2.7628528505928126e-05, "loss": 2.0101, "step": 15463000 }, { "epoch": 44.76, "learning_rate": 2.762780630557614e-05, "loss": 2.0068, "step": 15463500 }, { "epoch": 44.76, "learning_rate": 2.7627082657928867e-05, "loss": 2.0058, "step": 15464000 }, { "epoch": 44.76, "learning_rate": 2.762635901028159e-05, "loss": 2.0491, "step": 15464500 }, { "epoch": 44.76, "learning_rate": 2.7625636809929605e-05, "loss": 2.0103, "step": 15465000 }, { "epoch": 44.77, "learning_rate": 2.762491316228233e-05, "loss": 2.0383, "step": 15465500 }, { "epoch": 44.77, "learning_rate": 2.7624189514635052e-05, "loss": 2.0198, "step": 15466000 }, { "epoch": 44.77, "learning_rate": 2.7623465866987775e-05, "loss": 2.0101, "step": 15466500 }, { "epoch": 44.77, "learning_rate": 2.7622742219340497e-05, "loss": 2.0484, "step": 15467000 }, { "epoch": 44.77, "learning_rate": 2.762201857169322e-05, "loss": 2.0137, "step": 15467500 }, { "epoch": 44.77, "learning_rate": 2.762129492404594e-05, "loss": 2.0236, "step": 15468000 }, { "epoch": 44.77, "learning_rate": 2.7620571276398667e-05, "loss": 2.0018, "step": 15468500 }, { "epoch": 44.78, "learning_rate": 2.7619849076046682e-05, "loss": 2.0407, "step": 15469000 }, { "epoch": 44.78, "learning_rate": 2.7619125428399405e-05, "loss": 2.0385, "step": 15469500 }, { "epoch": 44.78, "learning_rate": 2.7618401780752134e-05, "loss": 2.0361, "step": 15470000 }, { "epoch": 44.78, "learning_rate": 2.7617678133104856e-05, "loss": 2.0309, "step": 15470500 }, { "epoch": 44.78, "learning_rate": 2.761695448545758e-05, "loss": 2.0372, "step": 15471000 }, { "epoch": 44.78, "learning_rate": 2.7616230837810304e-05, "loss": 2.0315, "step": 15471500 }, { "epoch": 44.79, "learning_rate": 2.7615507190163026e-05, "loss": 2.0329, "step": 15472000 }, { "epoch": 44.79, "learning_rate": 2.7614784989811045e-05, "loss": 2.0215, "step": 15472500 }, { "epoch": 44.79, "learning_rate": 2.7614061342163767e-05, "loss": 2.0274, "step": 15473000 }, { "epoch": 44.79, "learning_rate": 2.761333769451649e-05, "loss": 2.0461, "step": 15473500 }, { "epoch": 44.79, "learning_rate": 2.7612614046869212e-05, "loss": 2.0222, "step": 15474000 }, { "epoch": 44.79, "learning_rate": 2.7611890399221934e-05, "loss": 2.0471, "step": 15474500 }, { "epoch": 44.79, "learning_rate": 2.7611166751574656e-05, "loss": 2.0223, "step": 15475000 }, { "epoch": 44.8, "learning_rate": 2.7610443103927382e-05, "loss": 2.0331, "step": 15475500 }, { "epoch": 44.8, "learning_rate": 2.7609719456280104e-05, "loss": 2.0324, "step": 15476000 }, { "epoch": 44.8, "learning_rate": 2.7608995808632826e-05, "loss": 2.055, "step": 15476500 }, { "epoch": 44.8, "learning_rate": 2.760827216098555e-05, "loss": 2.0263, "step": 15477000 }, { "epoch": 44.8, "learning_rate": 2.7607548513338277e-05, "loss": 2.0295, "step": 15477500 }, { "epoch": 44.8, "learning_rate": 2.7606824865691e-05, "loss": 2.0535, "step": 15478000 }, { "epoch": 44.8, "learning_rate": 2.7606101218043722e-05, "loss": 2.0511, "step": 15478500 }, { "epoch": 44.81, "learning_rate": 2.7605377570396444e-05, "loss": 2.0141, "step": 15479000 }, { "epoch": 44.81, "learning_rate": 2.7604655370044463e-05, "loss": 2.0528, "step": 15479500 }, { "epoch": 44.81, "learning_rate": 2.7603931722397185e-05, "loss": 2.0352, "step": 15480000 }, { "epoch": 44.81, "learning_rate": 2.7603208074749908e-05, "loss": 2.056, "step": 15480500 }, { "epoch": 44.81, "learning_rate": 2.7602484427102633e-05, "loss": 1.9979, "step": 15481000 }, { "epoch": 44.81, "learning_rate": 2.7601760779455355e-05, "loss": 2.031, "step": 15481500 }, { "epoch": 44.81, "learning_rate": 2.7601037131808078e-05, "loss": 2.049, "step": 15482000 }, { "epoch": 44.82, "learning_rate": 2.76003134841608e-05, "loss": 2.0241, "step": 15482500 }, { "epoch": 44.82, "learning_rate": 2.759959128380882e-05, "loss": 2.0455, "step": 15483000 }, { "epoch": 44.82, "learning_rate": 2.7598869083456834e-05, "loss": 2.0156, "step": 15483500 }, { "epoch": 44.82, "learning_rate": 2.7598145435809557e-05, "loss": 2.0403, "step": 15484000 }, { "epoch": 44.82, "learning_rate": 2.7597421788162282e-05, "loss": 2.0354, "step": 15484500 }, { "epoch": 44.82, "learning_rate": 2.7596698140515008e-05, "loss": 2.0274, "step": 15485000 }, { "epoch": 44.82, "learning_rate": 2.7595974492867734e-05, "loss": 2.0358, "step": 15485500 }, { "epoch": 44.83, "learning_rate": 2.759525229251575e-05, "loss": 2.0495, "step": 15486000 }, { "epoch": 44.83, "learning_rate": 2.759452864486847e-05, "loss": 2.025, "step": 15486500 }, { "epoch": 44.83, "learning_rate": 2.7593804997221197e-05, "loss": 2.0457, "step": 15487000 }, { "epoch": 44.83, "learning_rate": 2.759308134957392e-05, "loss": 2.0156, "step": 15487500 }, { "epoch": 44.83, "learning_rate": 2.759235770192664e-05, "loss": 2.0305, "step": 15488000 }, { "epoch": 44.83, "learning_rate": 2.7591635501574657e-05, "loss": 2.0482, "step": 15488500 }, { "epoch": 44.83, "learning_rate": 2.7590913301222676e-05, "loss": 2.0281, "step": 15489000 }, { "epoch": 44.84, "learning_rate": 2.7590189653575398e-05, "loss": 2.0263, "step": 15489500 }, { "epoch": 44.84, "learning_rate": 2.758946600592812e-05, "loss": 2.0127, "step": 15490000 }, { "epoch": 44.84, "learning_rate": 2.7588742358280846e-05, "loss": 2.0218, "step": 15490500 }, { "epoch": 44.84, "learning_rate": 2.758802015792886e-05, "loss": 2.0342, "step": 15491000 }, { "epoch": 44.84, "learning_rate": 2.758729795757688e-05, "loss": 2.0455, "step": 15491500 }, { "epoch": 44.84, "learning_rate": 2.7586574309929603e-05, "loss": 2.0225, "step": 15492000 }, { "epoch": 44.84, "learning_rate": 2.7585850662282325e-05, "loss": 2.0415, "step": 15492500 }, { "epoch": 44.85, "learning_rate": 2.7585127014635054e-05, "loss": 2.0251, "step": 15493000 }, { "epoch": 44.85, "learning_rate": 2.7584403366987776e-05, "loss": 2.0209, "step": 15493500 }, { "epoch": 44.85, "learning_rate": 2.75836797193405e-05, "loss": 2.0288, "step": 15494000 }, { "epoch": 44.85, "learning_rate": 2.758295607169322e-05, "loss": 2.0172, "step": 15494500 }, { "epoch": 44.85, "learning_rate": 2.7582232424045946e-05, "loss": 2.0465, "step": 15495000 }, { "epoch": 44.85, "learning_rate": 2.758150877639867e-05, "loss": 2.0263, "step": 15495500 }, { "epoch": 44.85, "learning_rate": 2.758078512875139e-05, "loss": 2.0159, "step": 15496000 }, { "epoch": 44.86, "learning_rate": 2.7580061481104113e-05, "loss": 2.0166, "step": 15496500 }, { "epoch": 44.86, "learning_rate": 2.7579337833456835e-05, "loss": 2.0321, "step": 15497000 }, { "epoch": 44.86, "learning_rate": 2.757861418580956e-05, "loss": 2.0175, "step": 15497500 }, { "epoch": 44.86, "learning_rate": 2.7577890538162283e-05, "loss": 2.0429, "step": 15498000 }, { "epoch": 44.86, "learning_rate": 2.75771683378103e-05, "loss": 2.0521, "step": 15498500 }, { "epoch": 44.86, "learning_rate": 2.757644469016302e-05, "loss": 2.0464, "step": 15499000 }, { "epoch": 44.86, "learning_rate": 2.7575721042515746e-05, "loss": 2.0361, "step": 15499500 }, { "epoch": 44.87, "learning_rate": 2.7574997394868472e-05, "loss": 2.0311, "step": 15500000 }, { "epoch": 44.87, "learning_rate": 2.7574273747221198e-05, "loss": 2.0271, "step": 15500500 }, { "epoch": 44.87, "learning_rate": 2.757355009957392e-05, "loss": 2.0149, "step": 15501000 }, { "epoch": 44.87, "learning_rate": 2.7572827899221936e-05, "loss": 2.0109, "step": 15501500 }, { "epoch": 44.87, "learning_rate": 2.757210425157466e-05, "loss": 2.0104, "step": 15502000 }, { "epoch": 44.87, "learning_rate": 2.7571380603927383e-05, "loss": 2.0234, "step": 15502500 }, { "epoch": 44.87, "learning_rate": 2.7570656956280106e-05, "loss": 2.0164, "step": 15503000 }, { "epoch": 44.88, "learning_rate": 2.7569933308632828e-05, "loss": 2.0387, "step": 15503500 }, { "epoch": 44.88, "learning_rate": 2.756920966098555e-05, "loss": 2.0503, "step": 15504000 }, { "epoch": 44.88, "learning_rate": 2.7568486013338272e-05, "loss": 2.0469, "step": 15504500 }, { "epoch": 44.88, "learning_rate": 2.7567762365690998e-05, "loss": 2.0243, "step": 15505000 }, { "epoch": 44.88, "learning_rate": 2.7567040165339013e-05, "loss": 2.0374, "step": 15505500 }, { "epoch": 44.88, "learning_rate": 2.7566316517691736e-05, "loss": 2.0444, "step": 15506000 }, { "epoch": 44.88, "learning_rate": 2.756559287004446e-05, "loss": 2.0414, "step": 15506500 }, { "epoch": 44.89, "learning_rate": 2.7564869222397184e-05, "loss": 2.0328, "step": 15507000 }, { "epoch": 44.89, "learning_rate": 2.7564147022045206e-05, "loss": 2.0101, "step": 15507500 }, { "epoch": 44.89, "learning_rate": 2.7563423374397928e-05, "loss": 2.0328, "step": 15508000 }, { "epoch": 44.89, "learning_rate": 2.756269972675065e-05, "loss": 2.0595, "step": 15508500 }, { "epoch": 44.89, "learning_rate": 2.7561976079103376e-05, "loss": 2.0125, "step": 15509000 }, { "epoch": 44.89, "learning_rate": 2.7561252431456098e-05, "loss": 2.0431, "step": 15509500 }, { "epoch": 44.9, "learning_rate": 2.756052878380882e-05, "loss": 2.0386, "step": 15510000 }, { "epoch": 44.9, "learning_rate": 2.7559806583456836e-05, "loss": 2.03, "step": 15510500 }, { "epoch": 44.9, "learning_rate": 2.755908293580956e-05, "loss": 2.034, "step": 15511000 }, { "epoch": 44.9, "learning_rate": 2.7558359288162284e-05, "loss": 2.0363, "step": 15511500 }, { "epoch": 44.9, "learning_rate": 2.7557635640515006e-05, "loss": 2.051, "step": 15512000 }, { "epoch": 44.9, "learning_rate": 2.755691199286773e-05, "loss": 2.0279, "step": 15512500 }, { "epoch": 44.9, "learning_rate": 2.755618834522045e-05, "loss": 2.0261, "step": 15513000 }, { "epoch": 44.91, "learning_rate": 2.7555464697573176e-05, "loss": 2.0487, "step": 15513500 }, { "epoch": 44.91, "learning_rate": 2.75547410499259e-05, "loss": 2.0519, "step": 15514000 }, { "epoch": 44.91, "learning_rate": 2.7554017402278624e-05, "loss": 2.0128, "step": 15514500 }, { "epoch": 44.91, "learning_rate": 2.755329375463135e-05, "loss": 2.0384, "step": 15515000 }, { "epoch": 44.91, "learning_rate": 2.7552571554279365e-05, "loss": 2.0343, "step": 15515500 }, { "epoch": 44.91, "learning_rate": 2.7551847906632087e-05, "loss": 2.0423, "step": 15516000 }, { "epoch": 44.91, "learning_rate": 2.7551125706280106e-05, "loss": 2.0418, "step": 15516500 }, { "epoch": 44.92, "learning_rate": 2.755040205863283e-05, "loss": 2.0341, "step": 15517000 }, { "epoch": 44.92, "learning_rate": 2.754967841098555e-05, "loss": 2.037, "step": 15517500 }, { "epoch": 44.92, "learning_rate": 2.7548954763338276e-05, "loss": 2.0146, "step": 15518000 }, { "epoch": 44.92, "learning_rate": 2.754823401028159e-05, "loss": 2.0301, "step": 15518500 }, { "epoch": 44.92, "learning_rate": 2.754751036263431e-05, "loss": 2.0652, "step": 15519000 }, { "epoch": 44.92, "learning_rate": 2.7546786714987033e-05, "loss": 2.0482, "step": 15519500 }, { "epoch": 44.92, "learning_rate": 2.7546063067339755e-05, "loss": 2.0185, "step": 15520000 }, { "epoch": 44.93, "learning_rate": 2.7545339419692478e-05, "loss": 2.0284, "step": 15520500 }, { "epoch": 44.93, "learning_rate": 2.75446157720452e-05, "loss": 2.0114, "step": 15521000 }, { "epoch": 44.93, "learning_rate": 2.7543892124397926e-05, "loss": 2.0166, "step": 15521500 }, { "epoch": 44.93, "learning_rate": 2.7543168476750648e-05, "loss": 2.0452, "step": 15522000 }, { "epoch": 44.93, "learning_rate": 2.7542444829103377e-05, "loss": 2.0083, "step": 15522500 }, { "epoch": 44.93, "learning_rate": 2.7541722628751392e-05, "loss": 2.0219, "step": 15523000 }, { "epoch": 44.93, "learning_rate": 2.7540998981104115e-05, "loss": 2.0275, "step": 15523500 }, { "epoch": 44.94, "learning_rate": 2.754027533345684e-05, "loss": 2.0433, "step": 15524000 }, { "epoch": 44.94, "learning_rate": 2.7539551685809562e-05, "loss": 2.0314, "step": 15524500 }, { "epoch": 44.94, "learning_rate": 2.7538828038162285e-05, "loss": 2.0086, "step": 15525000 }, { "epoch": 44.94, "learning_rate": 2.75381058378103e-05, "loss": 2.0364, "step": 15525500 }, { "epoch": 44.94, "learning_rate": 2.7537382190163026e-05, "loss": 2.0164, "step": 15526000 }, { "epoch": 44.94, "learning_rate": 2.7536658542515748e-05, "loss": 2.043, "step": 15526500 }, { "epoch": 44.94, "learning_rate": 2.753593489486847e-05, "loss": 2.0474, "step": 15527000 }, { "epoch": 44.95, "learning_rate": 2.7535211247221193e-05, "loss": 2.0275, "step": 15527500 }, { "epoch": 44.95, "learning_rate": 2.753448904686921e-05, "loss": 2.0265, "step": 15528000 }, { "epoch": 44.95, "learning_rate": 2.7533765399221934e-05, "loss": 2.0344, "step": 15528500 }, { "epoch": 44.95, "learning_rate": 2.7533041751574656e-05, "loss": 2.043, "step": 15529000 }, { "epoch": 44.95, "learning_rate": 2.7532318103927378e-05, "loss": 2.0273, "step": 15529500 }, { "epoch": 44.95, "learning_rate": 2.7531594456280107e-05, "loss": 2.0377, "step": 15530000 }, { "epoch": 44.95, "learning_rate": 2.7530872255928126e-05, "loss": 2.0448, "step": 15530500 }, { "epoch": 44.96, "learning_rate": 2.753014860828085e-05, "loss": 2.0396, "step": 15531000 }, { "epoch": 44.96, "learning_rate": 2.752942496063357e-05, "loss": 2.0393, "step": 15531500 }, { "epoch": 44.96, "learning_rate": 2.752870276028159e-05, "loss": 1.9955, "step": 15532000 }, { "epoch": 44.96, "learning_rate": 2.7527979112634312e-05, "loss": 2.0444, "step": 15532500 }, { "epoch": 44.96, "learning_rate": 2.7527255464987034e-05, "loss": 2.0264, "step": 15533000 }, { "epoch": 44.96, "learning_rate": 2.7526531817339756e-05, "loss": 2.0382, "step": 15533500 }, { "epoch": 44.96, "learning_rate": 2.752580816969248e-05, "loss": 2.0423, "step": 15534000 }, { "epoch": 44.97, "learning_rate": 2.7525084522045204e-05, "loss": 2.024, "step": 15534500 }, { "epoch": 44.97, "learning_rate": 2.752436232169322e-05, "loss": 2.0256, "step": 15535000 }, { "epoch": 44.97, "learning_rate": 2.7523638674045942e-05, "loss": 2.0522, "step": 15535500 }, { "epoch": 44.97, "learning_rate": 2.7522915026398664e-05, "loss": 2.0165, "step": 15536000 }, { "epoch": 44.97, "learning_rate": 2.752219137875139e-05, "loss": 2.0283, "step": 15536500 }, { "epoch": 44.97, "learning_rate": 2.7521467731104112e-05, "loss": 2.024, "step": 15537000 }, { "epoch": 44.97, "learning_rate": 2.752074408345684e-05, "loss": 2.0311, "step": 15537500 }, { "epoch": 44.98, "learning_rate": 2.7520020435809563e-05, "loss": 2.0281, "step": 15538000 }, { "epoch": 44.98, "learning_rate": 2.751929823545758e-05, "loss": 2.0424, "step": 15538500 }, { "epoch": 44.98, "learning_rate": 2.7518574587810304e-05, "loss": 2.0312, "step": 15539000 }, { "epoch": 44.98, "learning_rate": 2.7517850940163027e-05, "loss": 2.0063, "step": 15539500 }, { "epoch": 44.98, "learning_rate": 2.751712729251575e-05, "loss": 2.0413, "step": 15540000 }, { "epoch": 44.98, "learning_rate": 2.751640364486847e-05, "loss": 2.034, "step": 15540500 }, { "epoch": 44.98, "learning_rate": 2.7515679997221193e-05, "loss": 2.0246, "step": 15541000 }, { "epoch": 44.99, "learning_rate": 2.7514956349573916e-05, "loss": 2.0195, "step": 15541500 }, { "epoch": 44.99, "learning_rate": 2.751423270192664e-05, "loss": 2.0227, "step": 15542000 }, { "epoch": 44.99, "learning_rate": 2.7513509054279363e-05, "loss": 2.0369, "step": 15542500 }, { "epoch": 44.99, "learning_rate": 2.7512785406632086e-05, "loss": 2.0123, "step": 15543000 }, { "epoch": 44.99, "learning_rate": 2.7512061758984808e-05, "loss": 2.0278, "step": 15543500 }, { "epoch": 44.99, "learning_rate": 2.7511339558632827e-05, "loss": 2.0491, "step": 15544000 }, { "epoch": 44.99, "learning_rate": 2.751061591098555e-05, "loss": 2.0339, "step": 15544500 }, { "epoch": 45.0, "learning_rate": 2.7509892263338278e-05, "loss": 2.0192, "step": 15545000 }, { "epoch": 45.0, "learning_rate": 2.7509168615691e-05, "loss": 2.0317, "step": 15545500 }, { "epoch": 45.0, "learning_rate": 2.7508444968043723e-05, "loss": 2.0252, "step": 15546000 }, { "epoch": 45.0, "eval_accuracy": 0.6716350750480929, "eval_accuracy_mlm": 0.6368857422767499, "eval_accuracy_nsp": 0.8580935227069613, "eval_loss": 2.1569485664367676, "eval_runtime": 332.1327, "eval_samples_per_second": 1313.89, "eval_steps_per_second": 54.746, "step": 15546240 }, { "epoch": 45.0, "learning_rate": 2.7507721320396445e-05, "loss": 2.0416, "step": 15546500 }, { "epoch": 45.0, "learning_rate": 2.7506997672749167e-05, "loss": 2.0123, "step": 15547000 }, { "epoch": 45.0, "learning_rate": 2.7506274025101893e-05, "loss": 1.9978, "step": 15547500 }, { "epoch": 45.01, "learning_rate": 2.7505550377454615e-05, "loss": 1.9899, "step": 15548000 }, { "epoch": 45.01, "learning_rate": 2.7504826729807337e-05, "loss": 1.9903, "step": 15548500 }, { "epoch": 45.01, "learning_rate": 2.750410308216006e-05, "loss": 2.0151, "step": 15549000 }, { "epoch": 45.01, "learning_rate": 2.750337943451278e-05, "loss": 2.0027, "step": 15549500 }, { "epoch": 45.01, "learning_rate": 2.7502655786865504e-05, "loss": 2.0242, "step": 15550000 }, { "epoch": 45.01, "learning_rate": 2.7501933586513523e-05, "loss": 2.016, "step": 15550500 }, { "epoch": 45.01, "learning_rate": 2.7501209938866245e-05, "loss": 2.0307, "step": 15551000 }, { "epoch": 45.02, "learning_rate": 2.7500486291218967e-05, "loss": 2.0081, "step": 15551500 }, { "epoch": 45.02, "learning_rate": 2.7499762643571696e-05, "loss": 2.0161, "step": 15552000 }, { "epoch": 45.02, "learning_rate": 2.7499043337810305e-05, "loss": 2.0023, "step": 15552500 }, { "epoch": 45.02, "learning_rate": 2.7498319690163027e-05, "loss": 2.0236, "step": 15553000 }, { "epoch": 45.02, "learning_rate": 2.749759604251575e-05, "loss": 2.023, "step": 15553500 }, { "epoch": 45.02, "learning_rate": 2.7496872394868472e-05, "loss": 1.9733, "step": 15554000 }, { "epoch": 45.02, "learning_rate": 2.7496148747221194e-05, "loss": 2.0304, "step": 15554500 }, { "epoch": 45.03, "learning_rate": 2.749542509957392e-05, "loss": 2.0318, "step": 15555000 }, { "epoch": 45.03, "learning_rate": 2.7494701451926642e-05, "loss": 1.9963, "step": 15555500 }, { "epoch": 45.03, "learning_rate": 2.7493977804279364e-05, "loss": 2.0187, "step": 15556000 }, { "epoch": 45.03, "learning_rate": 2.7493254156632086e-05, "loss": 2.0072, "step": 15556500 }, { "epoch": 45.03, "learning_rate": 2.74925334035754e-05, "loss": 1.9833, "step": 15557000 }, { "epoch": 45.03, "learning_rate": 2.749180975592812e-05, "loss": 2.0103, "step": 15557500 }, { "epoch": 45.03, "learning_rate": 2.7491086108280843e-05, "loss": 2.0384, "step": 15558000 }, { "epoch": 45.04, "learning_rate": 2.749036246063357e-05, "loss": 2.0396, "step": 15558500 }, { "epoch": 45.04, "learning_rate": 2.748963881298629e-05, "loss": 1.9975, "step": 15559000 }, { "epoch": 45.04, "learning_rate": 2.7488915165339013e-05, "loss": 2.0022, "step": 15559500 }, { "epoch": 45.04, "learning_rate": 2.7488191517691742e-05, "loss": 2.0316, "step": 15560000 }, { "epoch": 45.04, "learning_rate": 2.7487467870044465e-05, "loss": 2.0042, "step": 15560500 }, { "epoch": 45.04, "learning_rate": 2.7486744222397187e-05, "loss": 2.0254, "step": 15561000 }, { "epoch": 45.04, "learning_rate": 2.748602057474991e-05, "loss": 2.0384, "step": 15561500 }, { "epoch": 45.05, "learning_rate": 2.748529692710263e-05, "loss": 1.9898, "step": 15562000 }, { "epoch": 45.05, "learning_rate": 2.7484573279455357e-05, "loss": 2.0317, "step": 15562500 }, { "epoch": 45.05, "learning_rate": 2.7483851079103372e-05, "loss": 2.0257, "step": 15563000 }, { "epoch": 45.05, "learning_rate": 2.7483127431456095e-05, "loss": 2.0482, "step": 15563500 }, { "epoch": 45.05, "learning_rate": 2.748240378380882e-05, "loss": 2.026, "step": 15564000 }, { "epoch": 45.05, "learning_rate": 2.7481681583456836e-05, "loss": 1.9973, "step": 15564500 }, { "epoch": 45.05, "learning_rate": 2.7480957935809558e-05, "loss": 2.0174, "step": 15565000 }, { "epoch": 45.06, "learning_rate": 2.7480234288162284e-05, "loss": 2.0223, "step": 15565500 }, { "epoch": 45.06, "learning_rate": 2.7479510640515006e-05, "loss": 2.0229, "step": 15566000 }, { "epoch": 45.06, "learning_rate": 2.7478786992867728e-05, "loss": 2.0166, "step": 15566500 }, { "epoch": 45.06, "learning_rate": 2.7478064792515744e-05, "loss": 1.9885, "step": 15567000 }, { "epoch": 45.06, "learning_rate": 2.747734259216377e-05, "loss": 2.0033, "step": 15567500 }, { "epoch": 45.06, "learning_rate": 2.747661894451649e-05, "loss": 1.9823, "step": 15568000 }, { "epoch": 45.06, "learning_rate": 2.7475895296869214e-05, "loss": 2.0091, "step": 15568500 }, { "epoch": 45.07, "learning_rate": 2.7475171649221936e-05, "loss": 2.0008, "step": 15569000 }, { "epoch": 45.07, "learning_rate": 2.747444800157466e-05, "loss": 2.0032, "step": 15569500 }, { "epoch": 45.07, "learning_rate": 2.7473724353927384e-05, "loss": 2.0298, "step": 15570000 }, { "epoch": 45.07, "learning_rate": 2.7473000706280106e-05, "loss": 2.0195, "step": 15570500 }, { "epoch": 45.07, "learning_rate": 2.747227705863283e-05, "loss": 2.0196, "step": 15571000 }, { "epoch": 45.07, "learning_rate": 2.747155341098555e-05, "loss": 2.0206, "step": 15571500 }, { "epoch": 45.07, "learning_rate": 2.7470832657928863e-05, "loss": 2.016, "step": 15572000 }, { "epoch": 45.08, "learning_rate": 2.7470109010281585e-05, "loss": 1.9954, "step": 15572500 }, { "epoch": 45.08, "learning_rate": 2.7469385362634307e-05, "loss": 2.0537, "step": 15573000 }, { "epoch": 45.08, "learning_rate": 2.7468661714987033e-05, "loss": 2.0015, "step": 15573500 }, { "epoch": 45.08, "learning_rate": 2.7467938067339755e-05, "loss": 2.0174, "step": 15574000 }, { "epoch": 45.08, "learning_rate": 2.7467214419692477e-05, "loss": 1.9911, "step": 15574500 }, { "epoch": 45.08, "learning_rate": 2.7466490772045206e-05, "loss": 2.0224, "step": 15575000 }, { "epoch": 45.08, "learning_rate": 2.746576712439793e-05, "loss": 2.0114, "step": 15575500 }, { "epoch": 45.09, "learning_rate": 2.746504637134124e-05, "loss": 2.0323, "step": 15576000 }, { "epoch": 45.09, "learning_rate": 2.7464322723693963e-05, "loss": 2.0221, "step": 15576500 }, { "epoch": 45.09, "learning_rate": 2.7463599076046685e-05, "loss": 2.0279, "step": 15577000 }, { "epoch": 45.09, "learning_rate": 2.746287542839941e-05, "loss": 2.0124, "step": 15577500 }, { "epoch": 45.09, "learning_rate": 2.7462151780752133e-05, "loss": 2.0071, "step": 15578000 }, { "epoch": 45.09, "learning_rate": 2.7461428133104856e-05, "loss": 2.0155, "step": 15578500 }, { "epoch": 45.09, "learning_rate": 2.746070593275287e-05, "loss": 2.0243, "step": 15579000 }, { "epoch": 45.1, "learning_rate": 2.7459982285105597e-05, "loss": 1.9902, "step": 15579500 }, { "epoch": 45.1, "learning_rate": 2.7459260084753612e-05, "loss": 2.0213, "step": 15580000 }, { "epoch": 45.1, "learning_rate": 2.7458536437106335e-05, "loss": 2.0354, "step": 15580500 }, { "epoch": 45.1, "learning_rate": 2.745781278945906e-05, "loss": 1.9913, "step": 15581000 }, { "epoch": 45.1, "learning_rate": 2.7457090589107076e-05, "loss": 2.0354, "step": 15581500 }, { "epoch": 45.1, "learning_rate": 2.7456366941459798e-05, "loss": 2.0346, "step": 15582000 }, { "epoch": 45.1, "learning_rate": 2.7455643293812524e-05, "loss": 2.0215, "step": 15582500 }, { "epoch": 45.11, "learning_rate": 2.745491964616525e-05, "loss": 1.9874, "step": 15583000 }, { "epoch": 45.11, "learning_rate": 2.745419599851797e-05, "loss": 1.9985, "step": 15583500 }, { "epoch": 45.11, "learning_rate": 2.7453472350870697e-05, "loss": 2.0036, "step": 15584000 }, { "epoch": 45.11, "learning_rate": 2.745274870322342e-05, "loss": 1.9983, "step": 15584500 }, { "epoch": 45.11, "learning_rate": 2.745202505557614e-05, "loss": 1.9991, "step": 15585000 }, { "epoch": 45.11, "learning_rate": 2.7451301407928864e-05, "loss": 2.0198, "step": 15585500 }, { "epoch": 45.12, "learning_rate": 2.7450577760281586e-05, "loss": 1.9981, "step": 15586000 }, { "epoch": 45.12, "learning_rate": 2.744985411263431e-05, "loss": 2.0175, "step": 15586500 }, { "epoch": 45.12, "learning_rate": 2.7449130464987034e-05, "loss": 2.0045, "step": 15587000 }, { "epoch": 45.12, "learning_rate": 2.7448406817339756e-05, "loss": 2.0014, "step": 15587500 }, { "epoch": 45.12, "learning_rate": 2.7447683169692478e-05, "loss": 2.0598, "step": 15588000 }, { "epoch": 45.12, "learning_rate": 2.74469595220452e-05, "loss": 2.015, "step": 15588500 }, { "epoch": 45.12, "learning_rate": 2.7446235874397923e-05, "loss": 1.9943, "step": 15589000 }, { "epoch": 45.13, "learning_rate": 2.744551222675065e-05, "loss": 2.016, "step": 15589500 }, { "epoch": 45.13, "learning_rate": 2.744479002639867e-05, "loss": 2.0049, "step": 15590000 }, { "epoch": 45.13, "learning_rate": 2.7444066378751393e-05, "loss": 2.0221, "step": 15590500 }, { "epoch": 45.13, "learning_rate": 2.7443342731104115e-05, "loss": 1.9948, "step": 15591000 }, { "epoch": 45.13, "learning_rate": 2.7442619083456837e-05, "loss": 2.0225, "step": 15591500 }, { "epoch": 45.13, "learning_rate": 2.7441896883104856e-05, "loss": 2.0264, "step": 15592000 }, { "epoch": 45.13, "learning_rate": 2.744117323545758e-05, "loss": 2.0116, "step": 15592500 }, { "epoch": 45.14, "learning_rate": 2.744045248240089e-05, "loss": 2.0177, "step": 15593000 }, { "epoch": 45.14, "learning_rate": 2.7439728834753613e-05, "loss": 2.0192, "step": 15593500 }, { "epoch": 45.14, "learning_rate": 2.7439005187106335e-05, "loss": 2.0326, "step": 15594000 }, { "epoch": 45.14, "learning_rate": 2.743828153945906e-05, "loss": 2.0417, "step": 15594500 }, { "epoch": 45.14, "learning_rate": 2.7437557891811783e-05, "loss": 2.0436, "step": 15595000 }, { "epoch": 45.14, "learning_rate": 2.7436834244164505e-05, "loss": 1.9975, "step": 15595500 }, { "epoch": 45.14, "learning_rate": 2.7436110596517228e-05, "loss": 2.0196, "step": 15596000 }, { "epoch": 45.15, "learning_rate": 2.743538694886995e-05, "loss": 2.0186, "step": 15596500 }, { "epoch": 45.15, "learning_rate": 2.7434663301222675e-05, "loss": 1.9983, "step": 15597000 }, { "epoch": 45.15, "learning_rate": 2.74339396535754e-05, "loss": 2.0021, "step": 15597500 }, { "epoch": 45.15, "learning_rate": 2.7433218900518713e-05, "loss": 2.0101, "step": 15598000 }, { "epoch": 45.15, "learning_rate": 2.743249525287144e-05, "loss": 1.9964, "step": 15598500 }, { "epoch": 45.15, "learning_rate": 2.743177160522416e-05, "loss": 2.0136, "step": 15599000 }, { "epoch": 45.15, "learning_rate": 2.7431047957576883e-05, "loss": 2.0049, "step": 15599500 }, { "epoch": 45.16, "learning_rate": 2.7430324309929606e-05, "loss": 2.0326, "step": 15600000 }, { "epoch": 45.16, "learning_rate": 2.7429600662282328e-05, "loss": 1.9939, "step": 15600500 }, { "epoch": 45.16, "learning_rate": 2.742887701463505e-05, "loss": 2.0278, "step": 15601000 }, { "epoch": 45.16, "learning_rate": 2.7428153366987776e-05, "loss": 2.0067, "step": 15601500 }, { "epoch": 45.16, "learning_rate": 2.7427429719340498e-05, "loss": 2.0199, "step": 15602000 }, { "epoch": 45.16, "learning_rate": 2.742670607169322e-05, "loss": 2.0145, "step": 15602500 }, { "epoch": 45.16, "learning_rate": 2.7425982424045942e-05, "loss": 2.0046, "step": 15603000 }, { "epoch": 45.17, "learning_rate": 2.742526022369396e-05, "loss": 2.025, "step": 15603500 }, { "epoch": 45.17, "learning_rate": 2.7424536576046684e-05, "loss": 2.0459, "step": 15604000 }, { "epoch": 45.17, "learning_rate": 2.7423812928399406e-05, "loss": 2.0333, "step": 15604500 }, { "epoch": 45.17, "learning_rate": 2.7423089280752135e-05, "loss": 2.0395, "step": 15605000 }, { "epoch": 45.17, "learning_rate": 2.7422365633104857e-05, "loss": 2.0247, "step": 15605500 }, { "epoch": 45.17, "learning_rate": 2.742164198545758e-05, "loss": 2.0154, "step": 15606000 }, { "epoch": 45.17, "learning_rate": 2.74209197851056e-05, "loss": 2.022, "step": 15606500 }, { "epoch": 45.18, "learning_rate": 2.742019613745832e-05, "loss": 2.017, "step": 15607000 }, { "epoch": 45.18, "learning_rate": 2.7419472489811043e-05, "loss": 2.04, "step": 15607500 }, { "epoch": 45.18, "learning_rate": 2.7418748842163765e-05, "loss": 2.0086, "step": 15608000 }, { "epoch": 45.18, "learning_rate": 2.7418026641811784e-05, "loss": 1.9932, "step": 15608500 }, { "epoch": 45.18, "learning_rate": 2.7417302994164506e-05, "loss": 1.9986, "step": 15609000 }, { "epoch": 45.18, "learning_rate": 2.741657934651723e-05, "loss": 2.0325, "step": 15609500 }, { "epoch": 45.18, "learning_rate": 2.741585569886995e-05, "loss": 2.0273, "step": 15610000 }, { "epoch": 45.19, "learning_rate": 2.7415132051222676e-05, "loss": 2.0133, "step": 15610500 }, { "epoch": 45.19, "learning_rate": 2.74144084035754e-05, "loss": 2.0477, "step": 15611000 }, { "epoch": 45.19, "learning_rate": 2.741368475592812e-05, "loss": 2.028, "step": 15611500 }, { "epoch": 45.19, "learning_rate": 2.7412961108280843e-05, "loss": 2.0257, "step": 15612000 }, { "epoch": 45.19, "learning_rate": 2.7412237460633572e-05, "loss": 2.0436, "step": 15612500 }, { "epoch": 45.19, "learning_rate": 2.7411513812986294e-05, "loss": 2.0117, "step": 15613000 }, { "epoch": 45.19, "learning_rate": 2.7410790165339016e-05, "loss": 2.021, "step": 15613500 }, { "epoch": 45.2, "learning_rate": 2.741006651769174e-05, "loss": 2.0414, "step": 15614000 }, { "epoch": 45.2, "learning_rate": 2.7409342870044464e-05, "loss": 2.0162, "step": 15614500 }, { "epoch": 45.2, "learning_rate": 2.7408619222397187e-05, "loss": 2.0162, "step": 15615000 }, { "epoch": 45.2, "learning_rate": 2.7407897022045202e-05, "loss": 2.0276, "step": 15615500 }, { "epoch": 45.2, "learning_rate": 2.7407173374397928e-05, "loss": 2.0175, "step": 15616000 }, { "epoch": 45.2, "learning_rate": 2.7406451174045943e-05, "loss": 2.0036, "step": 15616500 }, { "epoch": 45.2, "learning_rate": 2.7405727526398666e-05, "loss": 2.0167, "step": 15617000 }, { "epoch": 45.21, "learning_rate": 2.7405005326046684e-05, "loss": 2.0254, "step": 15617500 }, { "epoch": 45.21, "learning_rate": 2.7404281678399407e-05, "loss": 2.0143, "step": 15618000 }, { "epoch": 45.21, "learning_rate": 2.740355803075213e-05, "loss": 2.023, "step": 15618500 }, { "epoch": 45.21, "learning_rate": 2.740283438310485e-05, "loss": 2.0118, "step": 15619000 }, { "epoch": 45.21, "learning_rate": 2.7402110735457577e-05, "loss": 2.0206, "step": 15619500 }, { "epoch": 45.21, "learning_rate": 2.7401387087810302e-05, "loss": 2.0226, "step": 15620000 }, { "epoch": 45.21, "learning_rate": 2.740066488745832e-05, "loss": 2.0097, "step": 15620500 }, { "epoch": 45.22, "learning_rate": 2.7399941239811044e-05, "loss": 2.0143, "step": 15621000 }, { "epoch": 45.22, "learning_rate": 2.7399217592163766e-05, "loss": 2.0037, "step": 15621500 }, { "epoch": 45.22, "learning_rate": 2.739849394451649e-05, "loss": 2.0159, "step": 15622000 }, { "epoch": 45.22, "learning_rate": 2.7397770296869214e-05, "loss": 1.9923, "step": 15622500 }, { "epoch": 45.22, "learning_rate": 2.7397046649221936e-05, "loss": 2.0072, "step": 15623000 }, { "epoch": 45.22, "learning_rate": 2.7396323001574658e-05, "loss": 2.0296, "step": 15623500 }, { "epoch": 45.23, "learning_rate": 2.739559935392738e-05, "loss": 2.02, "step": 15624000 }, { "epoch": 45.23, "learning_rate": 2.7394875706280103e-05, "loss": 2.0278, "step": 15624500 }, { "epoch": 45.23, "learning_rate": 2.7394152058632828e-05, "loss": 2.0321, "step": 15625000 }, { "epoch": 45.23, "learning_rate": 2.739342841098555e-05, "loss": 2.0246, "step": 15625500 }, { "epoch": 45.23, "learning_rate": 2.7392704763338273e-05, "loss": 2.016, "step": 15626000 }, { "epoch": 45.23, "learning_rate": 2.7391981115690995e-05, "loss": 2.0077, "step": 15626500 }, { "epoch": 45.23, "learning_rate": 2.7391257468043724e-05, "loss": 1.9938, "step": 15627000 }, { "epoch": 45.24, "learning_rate": 2.7390533820396446e-05, "loss": 2.0229, "step": 15627500 }, { "epoch": 45.24, "learning_rate": 2.7389811620044465e-05, "loss": 2.0213, "step": 15628000 }, { "epoch": 45.24, "learning_rate": 2.7389087972397187e-05, "loss": 2.0134, "step": 15628500 }, { "epoch": 45.24, "learning_rate": 2.738836432474991e-05, "loss": 2.0396, "step": 15629000 }, { "epoch": 45.24, "learning_rate": 2.7387640677102632e-05, "loss": 2.0354, "step": 15629500 }, { "epoch": 45.24, "learning_rate": 2.7386917029455354e-05, "loss": 2.0319, "step": 15630000 }, { "epoch": 45.24, "learning_rate": 2.738619338180808e-05, "loss": 2.005, "step": 15630500 }, { "epoch": 45.25, "learning_rate": 2.7385471181456095e-05, "loss": 1.9977, "step": 15631000 }, { "epoch": 45.25, "learning_rate": 2.7384747533808817e-05, "loss": 2.0271, "step": 15631500 }, { "epoch": 45.25, "learning_rate": 2.7384023886161543e-05, "loss": 2.0272, "step": 15632000 }, { "epoch": 45.25, "learning_rate": 2.738330168580956e-05, "loss": 2.0139, "step": 15632500 }, { "epoch": 45.25, "learning_rate": 2.738257803816228e-05, "loss": 2.018, "step": 15633000 }, { "epoch": 45.25, "learning_rate": 2.7381854390515006e-05, "loss": 2.0043, "step": 15633500 }, { "epoch": 45.25, "learning_rate": 2.738113074286773e-05, "loss": 2.0291, "step": 15634000 }, { "epoch": 45.26, "learning_rate": 2.7380407095220458e-05, "loss": 2.0099, "step": 15634500 }, { "epoch": 45.26, "learning_rate": 2.737968344757318e-05, "loss": 2.0053, "step": 15635000 }, { "epoch": 45.26, "learning_rate": 2.7378959799925902e-05, "loss": 2.0116, "step": 15635500 }, { "epoch": 45.26, "learning_rate": 2.7378237599573918e-05, "loss": 2.0162, "step": 15636000 }, { "epoch": 45.26, "learning_rate": 2.7377513951926643e-05, "loss": 2.045, "step": 15636500 }, { "epoch": 45.26, "learning_rate": 2.7376790304279366e-05, "loss": 2.0209, "step": 15637000 }, { "epoch": 45.26, "learning_rate": 2.7376066656632088e-05, "loss": 2.0087, "step": 15637500 }, { "epoch": 45.27, "learning_rate": 2.737534300898481e-05, "loss": 2.0295, "step": 15638000 }, { "epoch": 45.27, "learning_rate": 2.7374619361337532e-05, "loss": 2.0238, "step": 15638500 }, { "epoch": 45.27, "learning_rate": 2.737389716098555e-05, "loss": 2.0139, "step": 15639000 }, { "epoch": 45.27, "learning_rate": 2.7373173513338273e-05, "loss": 2.0092, "step": 15639500 }, { "epoch": 45.27, "learning_rate": 2.7372449865690996e-05, "loss": 2.02, "step": 15640000 }, { "epoch": 45.27, "learning_rate": 2.7371727665339015e-05, "loss": 2.0168, "step": 15640500 }, { "epoch": 45.27, "learning_rate": 2.737100546498703e-05, "loss": 2.0296, "step": 15641000 }, { "epoch": 45.28, "learning_rate": 2.7370281817339756e-05, "loss": 2.0312, "step": 15641500 }, { "epoch": 45.28, "learning_rate": 2.7369558169692478e-05, "loss": 2.014, "step": 15642000 }, { "epoch": 45.28, "learning_rate": 2.7368834522045207e-05, "loss": 2.0096, "step": 15642500 }, { "epoch": 45.28, "learning_rate": 2.736811087439793e-05, "loss": 2.0293, "step": 15643000 }, { "epoch": 45.28, "learning_rate": 2.736738722675065e-05, "loss": 2.035, "step": 15643500 }, { "epoch": 45.28, "learning_rate": 2.7366663579103374e-05, "loss": 2.0193, "step": 15644000 }, { "epoch": 45.28, "learning_rate": 2.7365939931456096e-05, "loss": 2.019, "step": 15644500 }, { "epoch": 45.29, "learning_rate": 2.736521628380882e-05, "loss": 2.0325, "step": 15645000 }, { "epoch": 45.29, "learning_rate": 2.7364492636161544e-05, "loss": 2.0206, "step": 15645500 }, { "epoch": 45.29, "learning_rate": 2.7363768988514266e-05, "loss": 2.0244, "step": 15646000 }, { "epoch": 45.29, "learning_rate": 2.736304534086699e-05, "loss": 2.0211, "step": 15646500 }, { "epoch": 45.29, "learning_rate": 2.736232169321971e-05, "loss": 2.0241, "step": 15647000 }, { "epoch": 45.29, "learning_rate": 2.7361598045572433e-05, "loss": 2.0299, "step": 15647500 }, { "epoch": 45.29, "learning_rate": 2.7360875845220452e-05, "loss": 2.0352, "step": 15648000 }, { "epoch": 45.3, "learning_rate": 2.7360152197573174e-05, "loss": 2.0294, "step": 15648500 }, { "epoch": 45.3, "learning_rate": 2.7359428549925896e-05, "loss": 2.0284, "step": 15649000 }, { "epoch": 45.3, "learning_rate": 2.7358704902278625e-05, "loss": 2.0483, "step": 15649500 }, { "epoch": 45.3, "learning_rate": 2.7357981254631347e-05, "loss": 2.0568, "step": 15650000 }, { "epoch": 45.3, "learning_rate": 2.7357259054279366e-05, "loss": 2.009, "step": 15650500 }, { "epoch": 45.3, "learning_rate": 2.7356536853927382e-05, "loss": 2.0088, "step": 15651000 }, { "epoch": 45.3, "learning_rate": 2.7355813206280108e-05, "loss": 2.0012, "step": 15651500 }, { "epoch": 45.31, "learning_rate": 2.735508955863283e-05, "loss": 2.02, "step": 15652000 }, { "epoch": 45.31, "learning_rate": 2.7354365910985552e-05, "loss": 2.0241, "step": 15652500 }, { "epoch": 45.31, "learning_rate": 2.7353642263338274e-05, "loss": 1.9945, "step": 15653000 }, { "epoch": 45.31, "learning_rate": 2.7352918615690997e-05, "loss": 2.0044, "step": 15653500 }, { "epoch": 45.31, "learning_rate": 2.7352194968043722e-05, "loss": 2.0149, "step": 15654000 }, { "epoch": 45.31, "learning_rate": 2.7351472767691738e-05, "loss": 2.0211, "step": 15654500 }, { "epoch": 45.31, "learning_rate": 2.735074912004446e-05, "loss": 2.0077, "step": 15655000 }, { "epoch": 45.32, "learning_rate": 2.7350025472397182e-05, "loss": 2.0258, "step": 15655500 }, { "epoch": 45.32, "learning_rate": 2.7349301824749908e-05, "loss": 2.0338, "step": 15656000 }, { "epoch": 45.32, "learning_rate": 2.734857817710263e-05, "loss": 2.0149, "step": 15656500 }, { "epoch": 45.32, "learning_rate": 2.734785452945536e-05, "loss": 2.0337, "step": 15657000 }, { "epoch": 45.32, "learning_rate": 2.734713088180808e-05, "loss": 2.0113, "step": 15657500 }, { "epoch": 45.32, "learning_rate": 2.7346407234160803e-05, "loss": 1.996, "step": 15658000 }, { "epoch": 45.32, "learning_rate": 2.7345683586513526e-05, "loss": 2.0085, "step": 15658500 }, { "epoch": 45.33, "learning_rate": 2.7344959938866248e-05, "loss": 2.0119, "step": 15659000 }, { "epoch": 45.33, "learning_rate": 2.7344236291218974e-05, "loss": 2.0201, "step": 15659500 }, { "epoch": 45.33, "learning_rate": 2.7343512643571696e-05, "loss": 2.0195, "step": 15660000 }, { "epoch": 45.33, "learning_rate": 2.7342788995924418e-05, "loss": 1.9933, "step": 15660500 }, { "epoch": 45.33, "learning_rate": 2.7342066795572434e-05, "loss": 2.014, "step": 15661000 }, { "epoch": 45.33, "learning_rate": 2.7341344595220453e-05, "loss": 2.0142, "step": 15661500 }, { "epoch": 45.34, "learning_rate": 2.7340620947573175e-05, "loss": 2.023, "step": 15662000 }, { "epoch": 45.34, "learning_rate": 2.7339897299925897e-05, "loss": 2.0049, "step": 15662500 }, { "epoch": 45.34, "learning_rate": 2.7339173652278623e-05, "loss": 2.0042, "step": 15663000 }, { "epoch": 45.34, "learning_rate": 2.7338450004631345e-05, "loss": 2.0484, "step": 15663500 }, { "epoch": 45.34, "learning_rate": 2.7337726356984067e-05, "loss": 2.023, "step": 15664000 }, { "epoch": 45.34, "learning_rate": 2.7337002709336796e-05, "loss": 2.0012, "step": 15664500 }, { "epoch": 45.34, "learning_rate": 2.733627906168952e-05, "loss": 2.0242, "step": 15665000 }, { "epoch": 45.35, "learning_rate": 2.733555541404224e-05, "loss": 2.0104, "step": 15665500 }, { "epoch": 45.35, "learning_rate": 2.7334831766394963e-05, "loss": 2.0062, "step": 15666000 }, { "epoch": 45.35, "learning_rate": 2.7334108118747685e-05, "loss": 2.0249, "step": 15666500 }, { "epoch": 45.35, "learning_rate": 2.7333385918395704e-05, "loss": 2.0142, "step": 15667000 }, { "epoch": 45.35, "learning_rate": 2.7332663718043723e-05, "loss": 2.0427, "step": 15667500 }, { "epoch": 45.35, "learning_rate": 2.7331940070396445e-05, "loss": 2.012, "step": 15668000 }, { "epoch": 45.35, "learning_rate": 2.7331216422749167e-05, "loss": 2.0141, "step": 15668500 }, { "epoch": 45.36, "learning_rate": 2.733049277510189e-05, "loss": 2.0267, "step": 15669000 }, { "epoch": 45.36, "learning_rate": 2.7329769127454612e-05, "loss": 2.0411, "step": 15669500 }, { "epoch": 45.36, "learning_rate": 2.7329045479807337e-05, "loss": 2.0182, "step": 15670000 }, { "epoch": 45.36, "learning_rate": 2.732832183216006e-05, "loss": 1.9914, "step": 15670500 }, { "epoch": 45.36, "learning_rate": 2.7327599631808075e-05, "loss": 2.034, "step": 15671000 }, { "epoch": 45.36, "learning_rate": 2.7326875984160798e-05, "loss": 1.9993, "step": 15671500 }, { "epoch": 45.36, "learning_rate": 2.7326152336513527e-05, "loss": 2.0332, "step": 15672000 }, { "epoch": 45.37, "learning_rate": 2.7325430136161545e-05, "loss": 2.0269, "step": 15672500 }, { "epoch": 45.37, "learning_rate": 2.7324706488514268e-05, "loss": 2.0238, "step": 15673000 }, { "epoch": 45.37, "learning_rate": 2.732398284086699e-05, "loss": 2.0179, "step": 15673500 }, { "epoch": 45.37, "learning_rate": 2.7323259193219712e-05, "loss": 2.0051, "step": 15674000 }, { "epoch": 45.37, "learning_rate": 2.732253699286773e-05, "loss": 2.036, "step": 15674500 }, { "epoch": 45.37, "learning_rate": 2.7321813345220453e-05, "loss": 2.0339, "step": 15675000 }, { "epoch": 45.37, "learning_rate": 2.7321089697573176e-05, "loss": 2.0465, "step": 15675500 }, { "epoch": 45.38, "learning_rate": 2.73203660499259e-05, "loss": 2.0262, "step": 15676000 }, { "epoch": 45.38, "learning_rate": 2.7319642402278623e-05, "loss": 2.0218, "step": 15676500 }, { "epoch": 45.38, "learning_rate": 2.7318918754631346e-05, "loss": 2.0297, "step": 15677000 }, { "epoch": 45.38, "learning_rate": 2.7318195106984068e-05, "loss": 2.0409, "step": 15677500 }, { "epoch": 45.38, "learning_rate": 2.731747145933679e-05, "loss": 2.0333, "step": 15678000 }, { "epoch": 45.38, "learning_rate": 2.7316747811689512e-05, "loss": 2.0318, "step": 15678500 }, { "epoch": 45.38, "learning_rate": 2.731602561133753e-05, "loss": 2.0131, "step": 15679000 }, { "epoch": 45.39, "learning_rate": 2.731530196369026e-05, "loss": 1.9895, "step": 15679500 }, { "epoch": 45.39, "learning_rate": 2.7314578316042983e-05, "loss": 2.0303, "step": 15680000 }, { "epoch": 45.39, "learning_rate": 2.7313854668395705e-05, "loss": 2.0146, "step": 15680500 }, { "epoch": 45.39, "learning_rate": 2.7313132468043724e-05, "loss": 2.0344, "step": 15681000 }, { "epoch": 45.39, "learning_rate": 2.7312408820396446e-05, "loss": 2.0317, "step": 15681500 }, { "epoch": 45.39, "learning_rate": 2.7311685172749168e-05, "loss": 1.9903, "step": 15682000 }, { "epoch": 45.39, "learning_rate": 2.731096152510189e-05, "loss": 2.008, "step": 15682500 }, { "epoch": 45.4, "learning_rate": 2.7310237877454613e-05, "loss": 2.0159, "step": 15683000 }, { "epoch": 45.4, "learning_rate": 2.7309514229807338e-05, "loss": 2.0214, "step": 15683500 }, { "epoch": 45.4, "learning_rate": 2.730879058216006e-05, "loss": 2.0328, "step": 15684000 }, { "epoch": 45.4, "learning_rate": 2.7308066934512783e-05, "loss": 2.0455, "step": 15684500 }, { "epoch": 45.4, "learning_rate": 2.7307344734160802e-05, "loss": 2.0417, "step": 15685000 }, { "epoch": 45.4, "learning_rate": 2.7306621086513524e-05, "loss": 2.0043, "step": 15685500 }, { "epoch": 45.4, "learning_rate": 2.7305897438866246e-05, "loss": 2.0195, "step": 15686000 }, { "epoch": 45.41, "learning_rate": 2.730517379121897e-05, "loss": 2.0138, "step": 15686500 }, { "epoch": 45.41, "learning_rate": 2.7304450143571697e-05, "loss": 2.023, "step": 15687000 }, { "epoch": 45.41, "learning_rate": 2.730372649592442e-05, "loss": 2.0381, "step": 15687500 }, { "epoch": 45.41, "learning_rate": 2.730300429557244e-05, "loss": 2.0223, "step": 15688000 }, { "epoch": 45.41, "learning_rate": 2.730228064792516e-05, "loss": 2.0203, "step": 15688500 }, { "epoch": 45.41, "learning_rate": 2.7301557000277883e-05, "loss": 2.019, "step": 15689000 }, { "epoch": 45.41, "learning_rate": 2.7300833352630605e-05, "loss": 2.001, "step": 15689500 }, { "epoch": 45.42, "learning_rate": 2.7300111152278624e-05, "loss": 2.0316, "step": 15690000 }, { "epoch": 45.42, "learning_rate": 2.7299387504631346e-05, "loss": 2.0367, "step": 15690500 }, { "epoch": 45.42, "learning_rate": 2.729866385698407e-05, "loss": 2.0258, "step": 15691000 }, { "epoch": 45.42, "learning_rate": 2.729794020933679e-05, "loss": 2.0095, "step": 15691500 }, { "epoch": 45.42, "learning_rate": 2.7297216561689513e-05, "loss": 2.0261, "step": 15692000 }, { "epoch": 45.42, "learning_rate": 2.729649291404224e-05, "loss": 2.0092, "step": 15692500 }, { "epoch": 45.42, "learning_rate": 2.729576926639496e-05, "loss": 2.033, "step": 15693000 }, { "epoch": 45.43, "learning_rate": 2.7295045618747683e-05, "loss": 2.0441, "step": 15693500 }, { "epoch": 45.43, "learning_rate": 2.7294321971100405e-05, "loss": 2.0519, "step": 15694000 }, { "epoch": 45.43, "learning_rate": 2.7293598323453134e-05, "loss": 2.037, "step": 15694500 }, { "epoch": 45.43, "learning_rate": 2.7292876123101153e-05, "loss": 2.0221, "step": 15695000 }, { "epoch": 45.43, "learning_rate": 2.7292152475453876e-05, "loss": 2.0529, "step": 15695500 }, { "epoch": 45.43, "learning_rate": 2.7291428827806598e-05, "loss": 2.0316, "step": 15696000 }, { "epoch": 45.43, "learning_rate": 2.729070518015932e-05, "loss": 2.0325, "step": 15696500 }, { "epoch": 45.44, "learning_rate": 2.7289981532512042e-05, "loss": 2.0256, "step": 15697000 }, { "epoch": 45.44, "learning_rate": 2.728925933216006e-05, "loss": 2.0127, "step": 15697500 }, { "epoch": 45.44, "learning_rate": 2.7288535684512784e-05, "loss": 2.0209, "step": 15698000 }, { "epoch": 45.44, "learning_rate": 2.7287813484160802e-05, "loss": 2.0193, "step": 15698500 }, { "epoch": 45.44, "learning_rate": 2.7287089836513525e-05, "loss": 2.0299, "step": 15699000 }, { "epoch": 45.44, "learning_rate": 2.7286366188866247e-05, "loss": 2.0158, "step": 15699500 }, { "epoch": 45.45, "learning_rate": 2.728564254121897e-05, "loss": 1.9998, "step": 15700000 }, { "epoch": 45.45, "learning_rate": 2.728491889357169e-05, "loss": 2.0068, "step": 15700500 }, { "epoch": 45.45, "learning_rate": 2.7284195245924417e-05, "loss": 2.0274, "step": 15701000 }, { "epoch": 45.45, "learning_rate": 2.728347159827714e-05, "loss": 2.0021, "step": 15701500 }, { "epoch": 45.45, "learning_rate": 2.7282747950629868e-05, "loss": 2.0482, "step": 15702000 }, { "epoch": 45.45, "learning_rate": 2.728202430298259e-05, "loss": 2.0158, "step": 15702500 }, { "epoch": 45.45, "learning_rate": 2.7281302102630606e-05, "loss": 2.0078, "step": 15703000 }, { "epoch": 45.46, "learning_rate": 2.728057845498333e-05, "loss": 2.0231, "step": 15703500 }, { "epoch": 45.46, "learning_rate": 2.7279854807336054e-05, "loss": 2.0153, "step": 15704000 }, { "epoch": 45.46, "learning_rate": 2.7279131159688776e-05, "loss": 2.0264, "step": 15704500 }, { "epoch": 45.46, "learning_rate": 2.7278408959336792e-05, "loss": 2.029, "step": 15705000 }, { "epoch": 45.46, "learning_rate": 2.7277685311689517e-05, "loss": 2.0362, "step": 15705500 }, { "epoch": 45.46, "learning_rate": 2.727696166404224e-05, "loss": 2.0339, "step": 15706000 }, { "epoch": 45.46, "learning_rate": 2.7276239463690255e-05, "loss": 2.0305, "step": 15706500 }, { "epoch": 45.47, "learning_rate": 2.727551581604298e-05, "loss": 2.0241, "step": 15707000 }, { "epoch": 45.47, "learning_rate": 2.7274792168395703e-05, "loss": 2.0242, "step": 15707500 }, { "epoch": 45.47, "learning_rate": 2.7274068520748425e-05, "loss": 2.023, "step": 15708000 }, { "epoch": 45.47, "learning_rate": 2.7273344873101147e-05, "loss": 1.9901, "step": 15708500 }, { "epoch": 45.47, "learning_rate": 2.727262122545387e-05, "loss": 2.0261, "step": 15709000 }, { "epoch": 45.47, "learning_rate": 2.72718975778066e-05, "loss": 2.0336, "step": 15709500 }, { "epoch": 45.47, "learning_rate": 2.727117393015932e-05, "loss": 2.0033, "step": 15710000 }, { "epoch": 45.48, "learning_rate": 2.727045172980734e-05, "loss": 2.054, "step": 15710500 }, { "epoch": 45.48, "learning_rate": 2.7269728082160062e-05, "loss": 2.0548, "step": 15711000 }, { "epoch": 45.48, "learning_rate": 2.7269004434512784e-05, "loss": 2.0318, "step": 15711500 }, { "epoch": 45.48, "learning_rate": 2.7268280786865507e-05, "loss": 2.0351, "step": 15712000 }, { "epoch": 45.48, "learning_rate": 2.7267557139218232e-05, "loss": 2.0408, "step": 15712500 }, { "epoch": 45.48, "learning_rate": 2.7266833491570954e-05, "loss": 2.0184, "step": 15713000 }, { "epoch": 45.48, "learning_rate": 2.7266109843923677e-05, "loss": 1.9892, "step": 15713500 }, { "epoch": 45.49, "learning_rate": 2.72653861962764e-05, "loss": 2.0121, "step": 15714000 }, { "epoch": 45.49, "learning_rate": 2.7264663995924418e-05, "loss": 2.0276, "step": 15714500 }, { "epoch": 45.49, "learning_rate": 2.726394034827714e-05, "loss": 2.0345, "step": 15715000 }, { "epoch": 45.49, "learning_rate": 2.7263216700629862e-05, "loss": 2.0451, "step": 15715500 }, { "epoch": 45.49, "learning_rate": 2.7262493052982585e-05, "loss": 2.0007, "step": 15716000 }, { "epoch": 45.49, "learning_rate": 2.7261769405335314e-05, "loss": 2.0116, "step": 15716500 }, { "epoch": 45.49, "learning_rate": 2.7261045757688036e-05, "loss": 2.0366, "step": 15717000 }, { "epoch": 45.5, "learning_rate": 2.7260322110040758e-05, "loss": 2.0168, "step": 15717500 }, { "epoch": 45.5, "learning_rate": 2.725959846239348e-05, "loss": 2.0157, "step": 15718000 }, { "epoch": 45.5, "learning_rate": 2.7258874814746206e-05, "loss": 2.0235, "step": 15718500 }, { "epoch": 45.5, "learning_rate": 2.7258151167098928e-05, "loss": 2.0082, "step": 15719000 }, { "epoch": 45.5, "learning_rate": 2.725742751945165e-05, "loss": 2.0206, "step": 15719500 }, { "epoch": 45.5, "learning_rate": 2.7256706766394963e-05, "loss": 2.014, "step": 15720000 }, { "epoch": 45.5, "learning_rate": 2.7255983118747685e-05, "loss": 2.026, "step": 15720500 }, { "epoch": 45.51, "learning_rate": 2.7255259471100407e-05, "loss": 2.0324, "step": 15721000 }, { "epoch": 45.51, "learning_rate": 2.7254535823453133e-05, "loss": 2.0481, "step": 15721500 }, { "epoch": 45.51, "learning_rate": 2.7253812175805855e-05, "loss": 2.0191, "step": 15722000 }, { "epoch": 45.51, "learning_rate": 2.7253088528158577e-05, "loss": 2.0258, "step": 15722500 }, { "epoch": 45.51, "learning_rate": 2.72523648805113e-05, "loss": 2.0458, "step": 15723000 }, { "epoch": 45.51, "learning_rate": 2.725164123286402e-05, "loss": 2.0284, "step": 15723500 }, { "epoch": 45.51, "learning_rate": 2.725091758521675e-05, "loss": 2.0261, "step": 15724000 }, { "epoch": 45.52, "learning_rate": 2.7250193937569473e-05, "loss": 2.0263, "step": 15724500 }, { "epoch": 45.52, "learning_rate": 2.7249471737217492e-05, "loss": 2.0065, "step": 15725000 }, { "epoch": 45.52, "learning_rate": 2.7248748089570214e-05, "loss": 2.0252, "step": 15725500 }, { "epoch": 45.52, "learning_rate": 2.7248024441922936e-05, "loss": 2.0141, "step": 15726000 }, { "epoch": 45.52, "learning_rate": 2.7247302241570955e-05, "loss": 2.0328, "step": 15726500 }, { "epoch": 45.52, "learning_rate": 2.7246578593923677e-05, "loss": 2.0038, "step": 15727000 }, { "epoch": 45.52, "learning_rate": 2.72458549462764e-05, "loss": 2.012, "step": 15727500 }, { "epoch": 45.53, "learning_rate": 2.7245131298629122e-05, "loss": 2.043, "step": 15728000 }, { "epoch": 45.53, "learning_rate": 2.7244407650981844e-05, "loss": 2.0314, "step": 15728500 }, { "epoch": 45.53, "learning_rate": 2.7243686897925156e-05, "loss": 2.0219, "step": 15729000 }, { "epoch": 45.53, "learning_rate": 2.7242963250277882e-05, "loss": 2.0184, "step": 15729500 }, { "epoch": 45.53, "learning_rate": 2.7242239602630604e-05, "loss": 2.0085, "step": 15730000 }, { "epoch": 45.53, "learning_rate": 2.7241515954983327e-05, "loss": 2.0125, "step": 15730500 }, { "epoch": 45.53, "learning_rate": 2.724079230733605e-05, "loss": 1.9994, "step": 15731000 }, { "epoch": 45.54, "learning_rate": 2.724006865968877e-05, "loss": 2.0331, "step": 15731500 }, { "epoch": 45.54, "learning_rate": 2.72393450120415e-05, "loss": 2.0227, "step": 15732000 }, { "epoch": 45.54, "learning_rate": 2.723862281168952e-05, "loss": 2.0151, "step": 15732500 }, { "epoch": 45.54, "learning_rate": 2.723789916404224e-05, "loss": 2.0129, "step": 15733000 }, { "epoch": 45.54, "learning_rate": 2.7237175516394963e-05, "loss": 2.0116, "step": 15733500 }, { "epoch": 45.54, "learning_rate": 2.7236451868747686e-05, "loss": 2.0155, "step": 15734000 }, { "epoch": 45.54, "learning_rate": 2.7235729668395705e-05, "loss": 2.0022, "step": 15734500 }, { "epoch": 45.55, "learning_rate": 2.7235006020748427e-05, "loss": 2.0189, "step": 15735000 }, { "epoch": 45.55, "learning_rate": 2.723428237310115e-05, "loss": 2.0149, "step": 15735500 }, { "epoch": 45.55, "learning_rate": 2.723355872545387e-05, "loss": 2.0296, "step": 15736000 }, { "epoch": 45.55, "learning_rate": 2.7232835077806597e-05, "loss": 2.0212, "step": 15736500 }, { "epoch": 45.55, "learning_rate": 2.723211143015932e-05, "loss": 2.0276, "step": 15737000 }, { "epoch": 45.55, "learning_rate": 2.723138778251204e-05, "loss": 2.0181, "step": 15737500 }, { "epoch": 45.56, "learning_rate": 2.7230664134864764e-05, "loss": 2.033, "step": 15738000 }, { "epoch": 45.56, "learning_rate": 2.7229941934512783e-05, "loss": 2.0281, "step": 15738500 }, { "epoch": 45.56, "learning_rate": 2.7229218286865505e-05, "loss": 2.0259, "step": 15739000 }, { "epoch": 45.56, "learning_rate": 2.7228496086513527e-05, "loss": 2.0274, "step": 15739500 }, { "epoch": 45.56, "learning_rate": 2.722777243886625e-05, "loss": 2.034, "step": 15740000 }, { "epoch": 45.56, "learning_rate": 2.722704879121897e-05, "loss": 2.0406, "step": 15740500 }, { "epoch": 45.56, "learning_rate": 2.7226325143571697e-05, "loss": 2.035, "step": 15741000 }, { "epoch": 45.57, "learning_rate": 2.722560149592442e-05, "loss": 2.0317, "step": 15741500 }, { "epoch": 45.57, "learning_rate": 2.722487784827714e-05, "loss": 2.0047, "step": 15742000 }, { "epoch": 45.57, "learning_rate": 2.7224154200629864e-05, "loss": 2.0259, "step": 15742500 }, { "epoch": 45.57, "learning_rate": 2.7223430552982586e-05, "loss": 2.05, "step": 15743000 }, { "epoch": 45.57, "learning_rate": 2.7222706905335312e-05, "loss": 2.0305, "step": 15743500 }, { "epoch": 45.57, "learning_rate": 2.7221983257688034e-05, "loss": 2.0518, "step": 15744000 }, { "epoch": 45.57, "learning_rate": 2.722126105733605e-05, "loss": 2.0372, "step": 15744500 }, { "epoch": 45.58, "learning_rate": 2.722053885698407e-05, "loss": 2.0415, "step": 15745000 }, { "epoch": 45.58, "learning_rate": 2.721981520933679e-05, "loss": 2.0083, "step": 15745500 }, { "epoch": 45.58, "learning_rate": 2.7219091561689513e-05, "loss": 2.0333, "step": 15746000 }, { "epoch": 45.58, "learning_rate": 2.7218367914042235e-05, "loss": 2.0164, "step": 15746500 }, { "epoch": 45.58, "learning_rate": 2.7217644266394964e-05, "loss": 2.0265, "step": 15747000 }, { "epoch": 45.58, "learning_rate": 2.7216920618747686e-05, "loss": 2.047, "step": 15747500 }, { "epoch": 45.58, "learning_rate": 2.7216198418395705e-05, "loss": 2.0437, "step": 15748000 }, { "epoch": 45.59, "learning_rate": 2.7215474770748428e-05, "loss": 1.987, "step": 15748500 }, { "epoch": 45.59, "learning_rate": 2.721475112310115e-05, "loss": 2.017, "step": 15749000 }, { "epoch": 45.59, "learning_rate": 2.7214027475453872e-05, "loss": 2.0209, "step": 15749500 }, { "epoch": 45.59, "learning_rate": 2.7213303827806598e-05, "loss": 2.0105, "step": 15750000 }, { "epoch": 45.59, "learning_rate": 2.721258018015932e-05, "loss": 2.023, "step": 15750500 }, { "epoch": 45.59, "learning_rate": 2.7211856532512042e-05, "loss": 2.0091, "step": 15751000 }, { "epoch": 45.59, "learning_rate": 2.7211132884864764e-05, "loss": 2.018, "step": 15751500 }, { "epoch": 45.6, "learning_rate": 2.7210410684512783e-05, "loss": 2.0312, "step": 15752000 }, { "epoch": 45.6, "learning_rate": 2.72096884841608e-05, "loss": 2.0301, "step": 15752500 }, { "epoch": 45.6, "learning_rate": 2.7208964836513525e-05, "loss": 2.0205, "step": 15753000 }, { "epoch": 45.6, "learning_rate": 2.7208241188866247e-05, "loss": 2.0194, "step": 15753500 }, { "epoch": 45.6, "learning_rate": 2.720751754121897e-05, "loss": 2.0216, "step": 15754000 }, { "epoch": 45.6, "learning_rate": 2.7206793893571698e-05, "loss": 2.014, "step": 15754500 }, { "epoch": 45.6, "learning_rate": 2.720607024592442e-05, "loss": 2.0088, "step": 15755000 }, { "epoch": 45.61, "learning_rate": 2.7205346598277142e-05, "loss": 2.0369, "step": 15755500 }, { "epoch": 45.61, "learning_rate": 2.7204622950629865e-05, "loss": 2.0458, "step": 15756000 }, { "epoch": 45.61, "learning_rate": 2.7203899302982587e-05, "loss": 2.0498, "step": 15756500 }, { "epoch": 45.61, "learning_rate": 2.7203175655335313e-05, "loss": 2.0213, "step": 15757000 }, { "epoch": 45.61, "learning_rate": 2.7202452007688035e-05, "loss": 2.0289, "step": 15757500 }, { "epoch": 45.61, "learning_rate": 2.7201728360040757e-05, "loss": 2.0153, "step": 15758000 }, { "epoch": 45.61, "learning_rate": 2.720100471239348e-05, "loss": 2.0121, "step": 15758500 }, { "epoch": 45.62, "learning_rate": 2.72002810647462e-05, "loss": 2.0423, "step": 15759000 }, { "epoch": 45.62, "learning_rate": 2.719955886439422e-05, "loss": 2.0082, "step": 15759500 }, { "epoch": 45.62, "learning_rate": 2.7198835216746943e-05, "loss": 2.0169, "step": 15760000 }, { "epoch": 45.62, "learning_rate": 2.7198111569099665e-05, "loss": 2.0596, "step": 15760500 }, { "epoch": 45.62, "learning_rate": 2.7197387921452387e-05, "loss": 2.0341, "step": 15761000 }, { "epoch": 45.62, "learning_rate": 2.7196664273805116e-05, "loss": 2.0148, "step": 15761500 }, { "epoch": 45.62, "learning_rate": 2.719594062615784e-05, "loss": 2.054, "step": 15762000 }, { "epoch": 45.63, "learning_rate": 2.7195216978510564e-05, "loss": 2.0297, "step": 15762500 }, { "epoch": 45.63, "learning_rate": 2.7194493330863286e-05, "loss": 2.0386, "step": 15763000 }, { "epoch": 45.63, "learning_rate": 2.7193771130511302e-05, "loss": 2.0307, "step": 15763500 }, { "epoch": 45.63, "learning_rate": 2.7193047482864027e-05, "loss": 2.0172, "step": 15764000 }, { "epoch": 45.63, "learning_rate": 2.719232383521675e-05, "loss": 2.0297, "step": 15764500 }, { "epoch": 45.63, "learning_rate": 2.7191601634864765e-05, "loss": 2.0455, "step": 15765000 }, { "epoch": 45.63, "learning_rate": 2.7190877987217487e-05, "loss": 2.0332, "step": 15765500 }, { "epoch": 45.64, "learning_rate": 2.7190154339570213e-05, "loss": 2.0202, "step": 15766000 }, { "epoch": 45.64, "learning_rate": 2.718943213921823e-05, "loss": 2.0601, "step": 15766500 }, { "epoch": 45.64, "learning_rate": 2.718870849157095e-05, "loss": 2.011, "step": 15767000 }, { "epoch": 45.64, "learning_rate": 2.7187984843923676e-05, "loss": 2.0248, "step": 15767500 }, { "epoch": 45.64, "learning_rate": 2.7187262643571692e-05, "loss": 2.0157, "step": 15768000 }, { "epoch": 45.64, "learning_rate": 2.7186538995924414e-05, "loss": 2.0207, "step": 15768500 }, { "epoch": 45.64, "learning_rate": 2.718581534827714e-05, "loss": 2.0122, "step": 15769000 }, { "epoch": 45.65, "learning_rate": 2.7185091700629865e-05, "loss": 2.0231, "step": 15769500 }, { "epoch": 45.65, "learning_rate": 2.718436805298259e-05, "loss": 2.0317, "step": 15770000 }, { "epoch": 45.65, "learning_rate": 2.7183644405335313e-05, "loss": 2.0345, "step": 15770500 }, { "epoch": 45.65, "learning_rate": 2.7182920757688036e-05, "loss": 2.0475, "step": 15771000 }, { "epoch": 45.65, "learning_rate": 2.7182197110040758e-05, "loss": 2.0016, "step": 15771500 }, { "epoch": 45.65, "learning_rate": 2.718147346239348e-05, "loss": 2.0258, "step": 15772000 }, { "epoch": 45.65, "learning_rate": 2.7180749814746202e-05, "loss": 1.9978, "step": 15772500 }, { "epoch": 45.66, "learning_rate": 2.7180026167098928e-05, "loss": 2.0245, "step": 15773000 }, { "epoch": 45.66, "learning_rate": 2.717930251945165e-05, "loss": 2.044, "step": 15773500 }, { "epoch": 45.66, "learning_rate": 2.7178578871804372e-05, "loss": 2.0124, "step": 15774000 }, { "epoch": 45.66, "learning_rate": 2.7177855224157095e-05, "loss": 2.019, "step": 15774500 }, { "epoch": 45.66, "learning_rate": 2.7177131576509817e-05, "loss": 2.0329, "step": 15775000 }, { "epoch": 45.66, "learning_rate": 2.717640792886254e-05, "loss": 2.0141, "step": 15775500 }, { "epoch": 45.67, "learning_rate": 2.7175685728510558e-05, "loss": 2.0107, "step": 15776000 }, { "epoch": 45.67, "learning_rate": 2.7174962080863287e-05, "loss": 2.0254, "step": 15776500 }, { "epoch": 45.67, "learning_rate": 2.7174239880511303e-05, "loss": 2.0264, "step": 15777000 }, { "epoch": 45.67, "learning_rate": 2.7173516232864028e-05, "loss": 2.0222, "step": 15777500 }, { "epoch": 45.67, "learning_rate": 2.717279258521675e-05, "loss": 2.0257, "step": 15778000 }, { "epoch": 45.67, "learning_rate": 2.7172070384864766e-05, "loss": 2.0201, "step": 15778500 }, { "epoch": 45.67, "learning_rate": 2.717134673721749e-05, "loss": 2.0274, "step": 15779000 }, { "epoch": 45.68, "learning_rate": 2.7170623089570214e-05, "loss": 2.0202, "step": 15779500 }, { "epoch": 45.68, "learning_rate": 2.7169899441922936e-05, "loss": 2.034, "step": 15780000 }, { "epoch": 45.68, "learning_rate": 2.716917579427566e-05, "loss": 2.02, "step": 15780500 }, { "epoch": 45.68, "learning_rate": 2.716845214662838e-05, "loss": 2.0153, "step": 15781000 }, { "epoch": 45.68, "learning_rate": 2.7167728498981103e-05, "loss": 2.0303, "step": 15781500 }, { "epoch": 45.68, "learning_rate": 2.716700485133383e-05, "loss": 2.0227, "step": 15782000 }, { "epoch": 45.68, "learning_rate": 2.716628120368655e-05, "loss": 2.0077, "step": 15782500 }, { "epoch": 45.69, "learning_rate": 2.7165557556039273e-05, "loss": 2.0318, "step": 15783000 }, { "epoch": 45.69, "learning_rate": 2.7164835355687292e-05, "loss": 2.0304, "step": 15783500 }, { "epoch": 45.69, "learning_rate": 2.7164111708040017e-05, "loss": 2.0475, "step": 15784000 }, { "epoch": 45.69, "learning_rate": 2.7163388060392743e-05, "loss": 2.006, "step": 15784500 }, { "epoch": 45.69, "learning_rate": 2.7162664412745465e-05, "loss": 2.0276, "step": 15785000 }, { "epoch": 45.69, "learning_rate": 2.716194221239348e-05, "loss": 2.016, "step": 15785500 }, { "epoch": 45.69, "learning_rate": 2.7161218564746203e-05, "loss": 2.0252, "step": 15786000 }, { "epoch": 45.7, "learning_rate": 2.716049491709893e-05, "loss": 2.0418, "step": 15786500 }, { "epoch": 45.7, "learning_rate": 2.715977126945165e-05, "loss": 2.0366, "step": 15787000 }, { "epoch": 45.7, "learning_rate": 2.7159047621804373e-05, "loss": 2.0328, "step": 15787500 }, { "epoch": 45.7, "learning_rate": 2.7158323974157095e-05, "loss": 2.0385, "step": 15788000 }, { "epoch": 45.7, "learning_rate": 2.7157600326509818e-05, "loss": 2.0388, "step": 15788500 }, { "epoch": 45.7, "learning_rate": 2.7156876678862543e-05, "loss": 2.0334, "step": 15789000 }, { "epoch": 45.7, "learning_rate": 2.715615447851056e-05, "loss": 2.0311, "step": 15789500 }, { "epoch": 45.71, "learning_rate": 2.715543083086328e-05, "loss": 2.0264, "step": 15790000 }, { "epoch": 45.71, "learning_rate": 2.7154707183216003e-05, "loss": 2.011, "step": 15790500 }, { "epoch": 45.71, "learning_rate": 2.715398353556873e-05, "loss": 2.0081, "step": 15791000 }, { "epoch": 45.71, "learning_rate": 2.7153259887921455e-05, "loss": 2.032, "step": 15791500 }, { "epoch": 45.71, "learning_rate": 2.7152537687569473e-05, "loss": 2.0077, "step": 15792000 }, { "epoch": 45.71, "learning_rate": 2.7151815487217492e-05, "loss": 2.0461, "step": 15792500 }, { "epoch": 45.71, "learning_rate": 2.7151091839570215e-05, "loss": 2.0412, "step": 15793000 }, { "epoch": 45.72, "learning_rate": 2.7150368191922937e-05, "loss": 2.023, "step": 15793500 }, { "epoch": 45.72, "learning_rate": 2.714964454427566e-05, "loss": 2.042, "step": 15794000 }, { "epoch": 45.72, "learning_rate": 2.714892089662838e-05, "loss": 2.0213, "step": 15794500 }, { "epoch": 45.72, "learning_rate": 2.7148197248981107e-05, "loss": 2.0548, "step": 15795000 }, { "epoch": 45.72, "learning_rate": 2.714747360133383e-05, "loss": 2.029, "step": 15795500 }, { "epoch": 45.72, "learning_rate": 2.714674995368655e-05, "loss": 2.0255, "step": 15796000 }, { "epoch": 45.72, "learning_rate": 2.7146027753334567e-05, "loss": 2.0278, "step": 15796500 }, { "epoch": 45.73, "learning_rate": 2.7145304105687293e-05, "loss": 2.0335, "step": 15797000 }, { "epoch": 45.73, "learning_rate": 2.7144580458040015e-05, "loss": 2.0184, "step": 15797500 }, { "epoch": 45.73, "learning_rate": 2.7143856810392737e-05, "loss": 2.0302, "step": 15798000 }, { "epoch": 45.73, "learning_rate": 2.714313316274546e-05, "loss": 2.0164, "step": 15798500 }, { "epoch": 45.73, "learning_rate": 2.714240951509819e-05, "loss": 2.0343, "step": 15799000 }, { "epoch": 45.73, "learning_rate": 2.714168586745091e-05, "loss": 2.0279, "step": 15799500 }, { "epoch": 45.73, "learning_rate": 2.7140962219803633e-05, "loss": 2.0217, "step": 15800000 }, { "epoch": 45.74, "learning_rate": 2.7140240019451652e-05, "loss": 2.0338, "step": 15800500 }, { "epoch": 45.74, "learning_rate": 2.713951781909967e-05, "loss": 2.0157, "step": 15801000 }, { "epoch": 45.74, "learning_rate": 2.7138794171452393e-05, "loss": 2.0413, "step": 15801500 }, { "epoch": 45.74, "learning_rate": 2.713807197110041e-05, "loss": 2.0432, "step": 15802000 }, { "epoch": 45.74, "learning_rate": 2.713734832345313e-05, "loss": 2.0343, "step": 15802500 }, { "epoch": 45.74, "learning_rate": 2.7136624675805856e-05, "loss": 2.016, "step": 15803000 }, { "epoch": 45.74, "learning_rate": 2.713590102815858e-05, "loss": 2.0364, "step": 15803500 }, { "epoch": 45.75, "learning_rate": 2.71351773805113e-05, "loss": 2.0388, "step": 15804000 }, { "epoch": 45.75, "learning_rate": 2.7134453732864023e-05, "loss": 2.0059, "step": 15804500 }, { "epoch": 45.75, "learning_rate": 2.7133731532512042e-05, "loss": 2.0422, "step": 15805000 }, { "epoch": 45.75, "learning_rate": 2.7133007884864764e-05, "loss": 2.0059, "step": 15805500 }, { "epoch": 45.75, "learning_rate": 2.7132284237217486e-05, "loss": 2.0362, "step": 15806000 }, { "epoch": 45.75, "learning_rate": 2.7131560589570215e-05, "loss": 2.0143, "step": 15806500 }, { "epoch": 45.75, "learning_rate": 2.7130839836513528e-05, "loss": 2.0054, "step": 15807000 }, { "epoch": 45.76, "learning_rate": 2.713011618886625e-05, "loss": 2.0567, "step": 15807500 }, { "epoch": 45.76, "learning_rate": 2.7129392541218972e-05, "loss": 2.0125, "step": 15808000 }, { "epoch": 45.76, "learning_rate": 2.7128668893571694e-05, "loss": 2.049, "step": 15808500 }, { "epoch": 45.76, "learning_rate": 2.712794524592442e-05, "loss": 2.0025, "step": 15809000 }, { "epoch": 45.76, "learning_rate": 2.7127221598277142e-05, "loss": 2.033, "step": 15809500 }, { "epoch": 45.76, "learning_rate": 2.7126497950629864e-05, "loss": 2.0085, "step": 15810000 }, { "epoch": 45.76, "learning_rate": 2.7125774302982587e-05, "loss": 2.0193, "step": 15810500 }, { "epoch": 45.77, "learning_rate": 2.712505065533531e-05, "loss": 2.0263, "step": 15811000 }, { "epoch": 45.77, "learning_rate": 2.7124327007688035e-05, "loss": 2.0286, "step": 15811500 }, { "epoch": 45.77, "learning_rate": 2.7123603360040757e-05, "loss": 2.0276, "step": 15812000 }, { "epoch": 45.77, "learning_rate": 2.712287971239348e-05, "loss": 2.0491, "step": 15812500 }, { "epoch": 45.77, "learning_rate": 2.71221560647462e-05, "loss": 2.0364, "step": 15813000 }, { "epoch": 45.77, "learning_rate": 2.7121432417098924e-05, "loss": 2.0487, "step": 15813500 }, { "epoch": 45.78, "learning_rate": 2.7120708769451653e-05, "loss": 2.0417, "step": 15814000 }, { "epoch": 45.78, "learning_rate": 2.7119985121804375e-05, "loss": 2.0279, "step": 15814500 }, { "epoch": 45.78, "learning_rate": 2.7119261474157097e-05, "loss": 2.0206, "step": 15815000 }, { "epoch": 45.78, "learning_rate": 2.7118537826509823e-05, "loss": 2.0369, "step": 15815500 }, { "epoch": 45.78, "learning_rate": 2.7117814178862545e-05, "loss": 2.0275, "step": 15816000 }, { "epoch": 45.78, "learning_rate": 2.7117090531215267e-05, "loss": 2.017, "step": 15816500 }, { "epoch": 45.78, "learning_rate": 2.711636688356799e-05, "loss": 2.0494, "step": 15817000 }, { "epoch": 45.79, "learning_rate": 2.711564323592071e-05, "loss": 2.0405, "step": 15817500 }, { "epoch": 45.79, "learning_rate": 2.711492103556873e-05, "loss": 2.0324, "step": 15818000 }, { "epoch": 45.79, "learning_rate": 2.7114197387921453e-05, "loss": 2.0427, "step": 15818500 }, { "epoch": 45.79, "learning_rate": 2.7113473740274175e-05, "loss": 2.0188, "step": 15819000 }, { "epoch": 45.79, "learning_rate": 2.7112750092626897e-05, "loss": 2.0234, "step": 15819500 }, { "epoch": 45.79, "learning_rate": 2.7112026444979623e-05, "loss": 2.0519, "step": 15820000 }, { "epoch": 45.79, "learning_rate": 2.711130424462764e-05, "loss": 2.024, "step": 15820500 }, { "epoch": 45.8, "learning_rate": 2.711058059698036e-05, "loss": 2.0285, "step": 15821000 }, { "epoch": 45.8, "learning_rate": 2.710985694933309e-05, "loss": 2.0311, "step": 15821500 }, { "epoch": 45.8, "learning_rate": 2.7109133301685812e-05, "loss": 2.0303, "step": 15822000 }, { "epoch": 45.8, "learning_rate": 2.710841110133383e-05, "loss": 2.0245, "step": 15822500 }, { "epoch": 45.8, "learning_rate": 2.7107687453686553e-05, "loss": 2.013, "step": 15823000 }, { "epoch": 45.8, "learning_rate": 2.7106965253334572e-05, "loss": 2.0419, "step": 15823500 }, { "epoch": 45.8, "learning_rate": 2.7106241605687294e-05, "loss": 2.0237, "step": 15824000 }, { "epoch": 45.81, "learning_rate": 2.7105517958040016e-05, "loss": 1.9835, "step": 15824500 }, { "epoch": 45.81, "learning_rate": 2.710479431039274e-05, "loss": 2.0439, "step": 15825000 }, { "epoch": 45.81, "learning_rate": 2.710407066274546e-05, "loss": 2.0395, "step": 15825500 }, { "epoch": 45.81, "learning_rate": 2.7103347015098187e-05, "loss": 2.041, "step": 15826000 }, { "epoch": 45.81, "learning_rate": 2.7102624814746202e-05, "loss": 2.0132, "step": 15826500 }, { "epoch": 45.81, "learning_rate": 2.7101901167098924e-05, "loss": 2.0173, "step": 15827000 }, { "epoch": 45.81, "learning_rate": 2.7101177519451647e-05, "loss": 2.0405, "step": 15827500 }, { "epoch": 45.82, "learning_rate": 2.7100453871804372e-05, "loss": 2.0335, "step": 15828000 }, { "epoch": 45.82, "learning_rate": 2.7099730224157094e-05, "loss": 2.0294, "step": 15828500 }, { "epoch": 45.82, "learning_rate": 2.7099006576509823e-05, "loss": 2.0333, "step": 15829000 }, { "epoch": 45.82, "learning_rate": 2.7098282928862546e-05, "loss": 2.0319, "step": 15829500 }, { "epoch": 45.82, "learning_rate": 2.7097559281215268e-05, "loss": 2.0249, "step": 15830000 }, { "epoch": 45.82, "learning_rate": 2.709683563356799e-05, "loss": 2.0293, "step": 15830500 }, { "epoch": 45.82, "learning_rate": 2.7096111985920712e-05, "loss": 2.036, "step": 15831000 }, { "epoch": 45.83, "learning_rate": 2.7095388338273438e-05, "loss": 2.048, "step": 15831500 }, { "epoch": 45.83, "learning_rate": 2.709466469062616e-05, "loss": 2.0318, "step": 15832000 }, { "epoch": 45.83, "learning_rate": 2.7093943937569472e-05, "loss": 2.0185, "step": 15832500 }, { "epoch": 45.83, "learning_rate": 2.7093220289922195e-05, "loss": 2.0232, "step": 15833000 }, { "epoch": 45.83, "learning_rate": 2.7092496642274917e-05, "loss": 2.0241, "step": 15833500 }, { "epoch": 45.83, "learning_rate": 2.709177299462764e-05, "loss": 2.0398, "step": 15834000 }, { "epoch": 45.83, "learning_rate": 2.709104934698036e-05, "loss": 1.9963, "step": 15834500 }, { "epoch": 45.84, "learning_rate": 2.7090325699333087e-05, "loss": 2.0288, "step": 15835000 }, { "epoch": 45.84, "learning_rate": 2.708960205168581e-05, "loss": 2.0099, "step": 15835500 }, { "epoch": 45.84, "learning_rate": 2.7088878404038538e-05, "loss": 2.0276, "step": 15836000 }, { "epoch": 45.84, "learning_rate": 2.7088156203686554e-05, "loss": 2.0532, "step": 15836500 }, { "epoch": 45.84, "learning_rate": 2.7087432556039276e-05, "loss": 2.0514, "step": 15837000 }, { "epoch": 45.84, "learning_rate": 2.7086708908392e-05, "loss": 2.0098, "step": 15837500 }, { "epoch": 45.84, "learning_rate": 2.7085985260744724e-05, "loss": 2.0243, "step": 15838000 }, { "epoch": 45.85, "learning_rate": 2.7085261613097446e-05, "loss": 2.0201, "step": 15838500 }, { "epoch": 45.85, "learning_rate": 2.708453796545017e-05, "loss": 2.0177, "step": 15839000 }, { "epoch": 45.85, "learning_rate": 2.708381431780289e-05, "loss": 2.0482, "step": 15839500 }, { "epoch": 45.85, "learning_rate": 2.7083090670155613e-05, "loss": 2.0139, "step": 15840000 }, { "epoch": 45.85, "learning_rate": 2.7082368469803632e-05, "loss": 2.027, "step": 15840500 }, { "epoch": 45.85, "learning_rate": 2.7081644822156354e-05, "loss": 2.0169, "step": 15841000 }, { "epoch": 45.85, "learning_rate": 2.7080921174509076e-05, "loss": 2.0378, "step": 15841500 }, { "epoch": 45.86, "learning_rate": 2.7080198974157095e-05, "loss": 2.0375, "step": 15842000 }, { "epoch": 45.86, "learning_rate": 2.7079475326509817e-05, "loss": 2.0083, "step": 15842500 }, { "epoch": 45.86, "learning_rate": 2.707875167886254e-05, "loss": 2.0235, "step": 15843000 }, { "epoch": 45.86, "learning_rate": 2.7078028031215262e-05, "loss": 2.0512, "step": 15843500 }, { "epoch": 45.86, "learning_rate": 2.707730438356799e-05, "loss": 2.0053, "step": 15844000 }, { "epoch": 45.86, "learning_rate": 2.7076580735920713e-05, "loss": 2.034, "step": 15844500 }, { "epoch": 45.86, "learning_rate": 2.707585708827344e-05, "loss": 2.021, "step": 15845000 }, { "epoch": 45.87, "learning_rate": 2.707513344062616e-05, "loss": 2.0365, "step": 15845500 }, { "epoch": 45.87, "learning_rate": 2.7074409792978883e-05, "loss": 2.0238, "step": 15846000 }, { "epoch": 45.87, "learning_rate": 2.7073686145331605e-05, "loss": 2.0365, "step": 15846500 }, { "epoch": 45.87, "learning_rate": 2.7072963944979624e-05, "loss": 2.0183, "step": 15847000 }, { "epoch": 45.87, "learning_rate": 2.7072240297332347e-05, "loss": 2.0268, "step": 15847500 }, { "epoch": 45.87, "learning_rate": 2.7071518096980366e-05, "loss": 2.0096, "step": 15848000 }, { "epoch": 45.87, "learning_rate": 2.7070794449333088e-05, "loss": 2.0496, "step": 15848500 }, { "epoch": 45.88, "learning_rate": 2.707007080168581e-05, "loss": 2.0309, "step": 15849000 }, { "epoch": 45.88, "learning_rate": 2.7069347154038532e-05, "loss": 2.0186, "step": 15849500 }, { "epoch": 45.88, "learning_rate": 2.7068623506391255e-05, "loss": 2.041, "step": 15850000 }, { "epoch": 45.88, "learning_rate": 2.7067899858743977e-05, "loss": 2.0364, "step": 15850500 }, { "epoch": 45.88, "learning_rate": 2.7067176211096706e-05, "loss": 2.0244, "step": 15851000 }, { "epoch": 45.88, "learning_rate": 2.7066452563449428e-05, "loss": 1.9888, "step": 15851500 }, { "epoch": 45.89, "learning_rate": 2.7065728915802154e-05, "loss": 2.0394, "step": 15852000 }, { "epoch": 45.89, "learning_rate": 2.7065005268154876e-05, "loss": 2.0155, "step": 15852500 }, { "epoch": 45.89, "learning_rate": 2.706428306780289e-05, "loss": 2.0015, "step": 15853000 }, { "epoch": 45.89, "learning_rate": 2.706356086745091e-05, "loss": 2.0178, "step": 15853500 }, { "epoch": 45.89, "learning_rate": 2.7062837219803633e-05, "loss": 2.0217, "step": 15854000 }, { "epoch": 45.89, "learning_rate": 2.7062113572156355e-05, "loss": 2.0196, "step": 15854500 }, { "epoch": 45.89, "learning_rate": 2.7061389924509077e-05, "loss": 2.0111, "step": 15855000 }, { "epoch": 45.9, "learning_rate": 2.7060666276861803e-05, "loss": 2.0334, "step": 15855500 }, { "epoch": 45.9, "learning_rate": 2.7059942629214525e-05, "loss": 2.0317, "step": 15856000 }, { "epoch": 45.9, "learning_rate": 2.7059218981567247e-05, "loss": 2.0198, "step": 15856500 }, { "epoch": 45.9, "learning_rate": 2.705849533391997e-05, "loss": 2.0246, "step": 15857000 }, { "epoch": 45.9, "learning_rate": 2.705777168627269e-05, "loss": 2.0468, "step": 15857500 }, { "epoch": 45.9, "learning_rate": 2.7057048038625414e-05, "loss": 2.0407, "step": 15858000 }, { "epoch": 45.9, "learning_rate": 2.7056324390978143e-05, "loss": 2.0446, "step": 15858500 }, { "epoch": 45.91, "learning_rate": 2.7055602190626162e-05, "loss": 2.0012, "step": 15859000 }, { "epoch": 45.91, "learning_rate": 2.7054879990274177e-05, "loss": 2.0082, "step": 15859500 }, { "epoch": 45.91, "learning_rate": 2.7054156342626903e-05, "loss": 2.0313, "step": 15860000 }, { "epoch": 45.91, "learning_rate": 2.7053432694979625e-05, "loss": 2.0259, "step": 15860500 }, { "epoch": 45.91, "learning_rate": 2.7052709047332347e-05, "loss": 2.0078, "step": 15861000 }, { "epoch": 45.91, "learning_rate": 2.7051986846980366e-05, "loss": 2.0017, "step": 15861500 }, { "epoch": 45.91, "learning_rate": 2.705126319933309e-05, "loss": 2.0401, "step": 15862000 }, { "epoch": 45.92, "learning_rate": 2.705053955168581e-05, "loss": 2.0483, "step": 15862500 }, { "epoch": 45.92, "learning_rate": 2.7049815904038533e-05, "loss": 2.0086, "step": 15863000 }, { "epoch": 45.92, "learning_rate": 2.7049092256391255e-05, "loss": 2.0355, "step": 15863500 }, { "epoch": 45.92, "learning_rate": 2.7048368608743978e-05, "loss": 2.0418, "step": 15864000 }, { "epoch": 45.92, "learning_rate": 2.7047646408391996e-05, "loss": 2.0066, "step": 15864500 }, { "epoch": 45.92, "learning_rate": 2.704692276074472e-05, "loss": 2.0252, "step": 15865000 }, { "epoch": 45.92, "learning_rate": 2.704619911309744e-05, "loss": 2.0377, "step": 15865500 }, { "epoch": 45.93, "learning_rate": 2.704547546545017e-05, "loss": 2.0401, "step": 15866000 }, { "epoch": 45.93, "learning_rate": 2.7044751817802892e-05, "loss": 2.0403, "step": 15866500 }, { "epoch": 45.93, "learning_rate": 2.704402961745091e-05, "loss": 2.0631, "step": 15867000 }, { "epoch": 45.93, "learning_rate": 2.7043305969803633e-05, "loss": 2.0179, "step": 15867500 }, { "epoch": 45.93, "learning_rate": 2.7042582322156356e-05, "loss": 2.0401, "step": 15868000 }, { "epoch": 45.93, "learning_rate": 2.704185867450908e-05, "loss": 2.0209, "step": 15868500 }, { "epoch": 45.93, "learning_rate": 2.7041135026861803e-05, "loss": 2.0095, "step": 15869000 }, { "epoch": 45.94, "learning_rate": 2.7040411379214526e-05, "loss": 2.0352, "step": 15869500 }, { "epoch": 45.94, "learning_rate": 2.7039687731567248e-05, "loss": 2.0303, "step": 15870000 }, { "epoch": 45.94, "learning_rate": 2.703896408391997e-05, "loss": 2.0351, "step": 15870500 }, { "epoch": 45.94, "learning_rate": 2.7038240436272692e-05, "loss": 2.0017, "step": 15871000 }, { "epoch": 45.94, "learning_rate": 2.7037516788625418e-05, "loss": 2.0409, "step": 15871500 }, { "epoch": 45.94, "learning_rate": 2.703679314097814e-05, "loss": 2.0493, "step": 15872000 }, { "epoch": 45.94, "learning_rate": 2.7036069493330862e-05, "loss": 2.0444, "step": 15872500 }, { "epoch": 45.95, "learning_rate": 2.7035345845683585e-05, "loss": 2.0073, "step": 15873000 }, { "epoch": 45.95, "learning_rate": 2.7034623645331607e-05, "loss": 2.0462, "step": 15873500 }, { "epoch": 45.95, "learning_rate": 2.7033901444979626e-05, "loss": 2.0324, "step": 15874000 }, { "epoch": 45.95, "learning_rate": 2.7033177797332348e-05, "loss": 2.0257, "step": 15874500 }, { "epoch": 45.95, "learning_rate": 2.703245414968507e-05, "loss": 2.0121, "step": 15875000 }, { "epoch": 45.95, "learning_rate": 2.7031730502037793e-05, "loss": 2.0352, "step": 15875500 }, { "epoch": 45.95, "learning_rate": 2.703100685439052e-05, "loss": 2.0278, "step": 15876000 }, { "epoch": 45.96, "learning_rate": 2.7030284654038534e-05, "loss": 2.0306, "step": 15876500 }, { "epoch": 45.96, "learning_rate": 2.7029562453686553e-05, "loss": 2.0268, "step": 15877000 }, { "epoch": 45.96, "learning_rate": 2.702884314792516e-05, "loss": 2.0432, "step": 15877500 }, { "epoch": 45.96, "learning_rate": 2.702811950027788e-05, "loss": 2.0415, "step": 15878000 }, { "epoch": 45.96, "learning_rate": 2.7027395852630606e-05, "loss": 2.046, "step": 15878500 }, { "epoch": 45.96, "learning_rate": 2.702667220498333e-05, "loss": 2.0302, "step": 15879000 }, { "epoch": 45.96, "learning_rate": 2.702594855733605e-05, "loss": 2.0348, "step": 15879500 }, { "epoch": 45.97, "learning_rate": 2.7025224909688773e-05, "loss": 2.022, "step": 15880000 }, { "epoch": 45.97, "learning_rate": 2.7024501262041495e-05, "loss": 2.0384, "step": 15880500 }, { "epoch": 45.97, "learning_rate": 2.7023777614394217e-05, "loss": 2.0358, "step": 15881000 }, { "epoch": 45.97, "learning_rate": 2.7023055414042236e-05, "loss": 2.0211, "step": 15881500 }, { "epoch": 45.97, "learning_rate": 2.7022331766394965e-05, "loss": 2.0233, "step": 15882000 }, { "epoch": 45.97, "learning_rate": 2.7021608118747688e-05, "loss": 2.0589, "step": 15882500 }, { "epoch": 45.97, "learning_rate": 2.702088447110041e-05, "loss": 2.0349, "step": 15883000 }, { "epoch": 45.98, "learning_rate": 2.7020160823453132e-05, "loss": 2.0187, "step": 15883500 }, { "epoch": 45.98, "learning_rate": 2.7019437175805858e-05, "loss": 2.0231, "step": 15884000 }, { "epoch": 45.98, "learning_rate": 2.701871352815858e-05, "loss": 2.0444, "step": 15884500 }, { "epoch": 45.98, "learning_rate": 2.7017989880511302e-05, "loss": 2.0273, "step": 15885000 }, { "epoch": 45.98, "learning_rate": 2.7017266232864024e-05, "loss": 2.0156, "step": 15885500 }, { "epoch": 45.98, "learning_rate": 2.7016542585216747e-05, "loss": 1.9957, "step": 15886000 }, { "epoch": 45.98, "learning_rate": 2.701581893756947e-05, "loss": 2.0297, "step": 15886500 }, { "epoch": 45.99, "learning_rate": 2.7015095289922194e-05, "loss": 2.0135, "step": 15887000 }, { "epoch": 45.99, "learning_rate": 2.7014371642274917e-05, "loss": 2.0436, "step": 15887500 }, { "epoch": 45.99, "learning_rate": 2.701364799462764e-05, "loss": 2.009, "step": 15888000 }, { "epoch": 45.99, "learning_rate": 2.701292434698036e-05, "loss": 2.0197, "step": 15888500 }, { "epoch": 45.99, "learning_rate": 2.701220069933309e-05, "loss": 2.0243, "step": 15889000 }, { "epoch": 45.99, "learning_rate": 2.7011477051685812e-05, "loss": 2.0323, "step": 15889500 }, { "epoch": 46.0, "learning_rate": 2.701075485133383e-05, "loss": 2.0312, "step": 15890000 }, { "epoch": 46.0, "learning_rate": 2.7010032650981847e-05, "loss": 2.0394, "step": 15890500 }, { "epoch": 46.0, "learning_rate": 2.700930900333457e-05, "loss": 2.0192, "step": 15891000 }, { "epoch": 46.0, "learning_rate": 2.7008585355687295e-05, "loss": 2.0276, "step": 15891500 }, { "epoch": 46.0, "eval_accuracy": 0.6718168801260671, "eval_accuracy_mlm": 0.6375680241316996, "eval_accuracy_nsp": 0.8556094833473118, "eval_loss": 2.1730716228485107, "eval_runtime": 331.3659, "eval_samples_per_second": 1316.931, "eval_steps_per_second": 54.873, "step": 15891712 }, { "epoch": 46.0, "learning_rate": 2.7007861708040017e-05, "loss": 1.9985, "step": 15892000 }, { "epoch": 46.0, "learning_rate": 2.700713806039274e-05, "loss": 2.0023, "step": 15892500 }, { "epoch": 46.0, "learning_rate": 2.700641441274546e-05, "loss": 2.035, "step": 15893000 }, { "epoch": 46.01, "learning_rate": 2.7005690765098184e-05, "loss": 1.9903, "step": 15893500 }, { "epoch": 46.01, "learning_rate": 2.700496711745091e-05, "loss": 1.9988, "step": 15894000 }, { "epoch": 46.01, "learning_rate": 2.700424346980363e-05, "loss": 1.9995, "step": 15894500 }, { "epoch": 46.01, "learning_rate": 2.7003519822156354e-05, "loss": 1.9948, "step": 15895000 }, { "epoch": 46.01, "learning_rate": 2.7002796174509076e-05, "loss": 1.9956, "step": 15895500 }, { "epoch": 46.01, "learning_rate": 2.7002072526861805e-05, "loss": 1.993, "step": 15896000 }, { "epoch": 46.01, "learning_rate": 2.7001348879214527e-05, "loss": 2.01, "step": 15896500 }, { "epoch": 46.02, "learning_rate": 2.700062523156725e-05, "loss": 2.0034, "step": 15897000 }, { "epoch": 46.02, "learning_rate": 2.6999901583919972e-05, "loss": 2.0214, "step": 15897500 }, { "epoch": 46.02, "learning_rate": 2.6999177936272697e-05, "loss": 2.0176, "step": 15898000 }, { "epoch": 46.02, "learning_rate": 2.699845428862542e-05, "loss": 2.0045, "step": 15898500 }, { "epoch": 46.02, "learning_rate": 2.6997732088273435e-05, "loss": 1.9933, "step": 15899000 }, { "epoch": 46.02, "learning_rate": 2.699700844062616e-05, "loss": 1.9905, "step": 15899500 }, { "epoch": 46.02, "learning_rate": 2.6996284792978883e-05, "loss": 2.0168, "step": 15900000 }, { "epoch": 46.03, "learning_rate": 2.69955625926269e-05, "loss": 2.0133, "step": 15900500 }, { "epoch": 46.03, "learning_rate": 2.6994840392274918e-05, "loss": 2.0365, "step": 15901000 }, { "epoch": 46.03, "learning_rate": 2.699411674462764e-05, "loss": 2.0242, "step": 15901500 }, { "epoch": 46.03, "learning_rate": 2.6993393096980362e-05, "loss": 2.0142, "step": 15902000 }, { "epoch": 46.03, "learning_rate": 2.699267089662838e-05, "loss": 2.0126, "step": 15902500 }, { "epoch": 46.03, "learning_rate": 2.6991948696276396e-05, "loss": 1.9939, "step": 15903000 }, { "epoch": 46.03, "learning_rate": 2.6991225048629122e-05, "loss": 2.0054, "step": 15903500 }, { "epoch": 46.04, "learning_rate": 2.6990501400981848e-05, "loss": 1.9977, "step": 15904000 }, { "epoch": 46.04, "learning_rate": 2.6989777753334573e-05, "loss": 2.0106, "step": 15904500 }, { "epoch": 46.04, "learning_rate": 2.6989054105687296e-05, "loss": 2.0104, "step": 15905000 }, { "epoch": 46.04, "learning_rate": 2.6988330458040018e-05, "loss": 2.0162, "step": 15905500 }, { "epoch": 46.04, "learning_rate": 2.698760681039274e-05, "loss": 2.0092, "step": 15906000 }, { "epoch": 46.04, "learning_rate": 2.6986883162745462e-05, "loss": 2.0157, "step": 15906500 }, { "epoch": 46.04, "learning_rate": 2.6986159515098185e-05, "loss": 2.0011, "step": 15907000 }, { "epoch": 46.05, "learning_rate": 2.698543586745091e-05, "loss": 2.0162, "step": 15907500 }, { "epoch": 46.05, "learning_rate": 2.6984712219803632e-05, "loss": 2.0117, "step": 15908000 }, { "epoch": 46.05, "learning_rate": 2.6983988572156355e-05, "loss": 2.0073, "step": 15908500 }, { "epoch": 46.05, "learning_rate": 2.6983266371804374e-05, "loss": 2.0058, "step": 15909000 }, { "epoch": 46.05, "learning_rate": 2.6982542724157096e-05, "loss": 1.9911, "step": 15909500 }, { "epoch": 46.05, "learning_rate": 2.6981819076509818e-05, "loss": 1.9951, "step": 15910000 }, { "epoch": 46.05, "learning_rate": 2.6981096876157837e-05, "loss": 2.0266, "step": 15910500 }, { "epoch": 46.06, "learning_rate": 2.698037322851056e-05, "loss": 2.0092, "step": 15911000 }, { "epoch": 46.06, "learning_rate": 2.6979649580863288e-05, "loss": 2.0122, "step": 15911500 }, { "epoch": 46.06, "learning_rate": 2.697892593321601e-05, "loss": 2.0094, "step": 15912000 }, { "epoch": 46.06, "learning_rate": 2.6978202285568733e-05, "loss": 2.0108, "step": 15912500 }, { "epoch": 46.06, "learning_rate": 2.6977478637921455e-05, "loss": 2.0112, "step": 15913000 }, { "epoch": 46.06, "learning_rate": 2.6976754990274177e-05, "loss": 1.9968, "step": 15913500 }, { "epoch": 46.06, "learning_rate": 2.69760313426269e-05, "loss": 1.9882, "step": 15914000 }, { "epoch": 46.07, "learning_rate": 2.6975307694979625e-05, "loss": 2.0421, "step": 15914500 }, { "epoch": 46.07, "learning_rate": 2.6974584047332347e-05, "loss": 2.0322, "step": 15915000 }, { "epoch": 46.07, "learning_rate": 2.697386039968507e-05, "loss": 2.0063, "step": 15915500 }, { "epoch": 46.07, "learning_rate": 2.6973136752037792e-05, "loss": 1.9937, "step": 15916000 }, { "epoch": 46.07, "learning_rate": 2.6972413104390514e-05, "loss": 2.0137, "step": 15916500 }, { "epoch": 46.07, "learning_rate": 2.6971689456743236e-05, "loss": 2.0021, "step": 15917000 }, { "epoch": 46.07, "learning_rate": 2.6970965809095962e-05, "loss": 2.0094, "step": 15917500 }, { "epoch": 46.08, "learning_rate": 2.6970242161448684e-05, "loss": 2.0142, "step": 15918000 }, { "epoch": 46.08, "learning_rate": 2.6969519961096706e-05, "loss": 2.0127, "step": 15918500 }, { "epoch": 46.08, "learning_rate": 2.6968797760744725e-05, "loss": 2.0056, "step": 15919000 }, { "epoch": 46.08, "learning_rate": 2.6968074113097448e-05, "loss": 2.038, "step": 15919500 }, { "epoch": 46.08, "learning_rate": 2.696735046545017e-05, "loss": 2.0184, "step": 15920000 }, { "epoch": 46.08, "learning_rate": 2.6966626817802892e-05, "loss": 2.0286, "step": 15920500 }, { "epoch": 46.08, "learning_rate": 2.696590461745091e-05, "loss": 2.0169, "step": 15921000 }, { "epoch": 46.09, "learning_rate": 2.6965180969803633e-05, "loss": 2.0142, "step": 15921500 }, { "epoch": 46.09, "learning_rate": 2.6964457322156355e-05, "loss": 1.9985, "step": 15922000 }, { "epoch": 46.09, "learning_rate": 2.6963733674509078e-05, "loss": 2.0167, "step": 15922500 }, { "epoch": 46.09, "learning_rate": 2.69630100268618e-05, "loss": 2.0377, "step": 15923000 }, { "epoch": 46.09, "learning_rate": 2.6962286379214525e-05, "loss": 2.0031, "step": 15923500 }, { "epoch": 46.09, "learning_rate": 2.6961562731567248e-05, "loss": 2.0127, "step": 15924000 }, { "epoch": 46.09, "learning_rate": 2.6960840531215263e-05, "loss": 2.0001, "step": 15924500 }, { "epoch": 46.1, "learning_rate": 2.696011688356799e-05, "loss": 2.0058, "step": 15925000 }, { "epoch": 46.1, "learning_rate": 2.695939323592071e-05, "loss": 2.0104, "step": 15925500 }, { "epoch": 46.1, "learning_rate": 2.695866958827344e-05, "loss": 2.0137, "step": 15926000 }, { "epoch": 46.1, "learning_rate": 2.6957945940626162e-05, "loss": 2.0023, "step": 15926500 }, { "epoch": 46.1, "learning_rate": 2.6957222292978885e-05, "loss": 2.0173, "step": 15927000 }, { "epoch": 46.1, "learning_rate": 2.6956498645331607e-05, "loss": 2.0341, "step": 15927500 }, { "epoch": 46.11, "learning_rate": 2.695577499768433e-05, "loss": 1.9917, "step": 15928000 }, { "epoch": 46.11, "learning_rate": 2.695505135003705e-05, "loss": 2.0276, "step": 15928500 }, { "epoch": 46.11, "learning_rate": 2.6954327702389777e-05, "loss": 2.0088, "step": 15929000 }, { "epoch": 46.11, "learning_rate": 2.69536040547425e-05, "loss": 2.0115, "step": 15929500 }, { "epoch": 46.11, "learning_rate": 2.6952881854390515e-05, "loss": 2.0278, "step": 15930000 }, { "epoch": 46.11, "learning_rate": 2.6952159654038534e-05, "loss": 2.0032, "step": 15930500 }, { "epoch": 46.11, "learning_rate": 2.6951436006391256e-05, "loss": 2.0107, "step": 15931000 }, { "epoch": 46.12, "learning_rate": 2.6950712358743978e-05, "loss": 2.0136, "step": 15931500 }, { "epoch": 46.12, "learning_rate": 2.69499887110967e-05, "loss": 2.0139, "step": 15932000 }, { "epoch": 46.12, "learning_rate": 2.6949265063449426e-05, "loss": 2.0094, "step": 15932500 }, { "epoch": 46.12, "learning_rate": 2.6948541415802148e-05, "loss": 2.0302, "step": 15933000 }, { "epoch": 46.12, "learning_rate": 2.6947817768154877e-05, "loss": 2.0268, "step": 15933500 }, { "epoch": 46.12, "learning_rate": 2.69470941205076e-05, "loss": 2.0146, "step": 15934000 }, { "epoch": 46.12, "learning_rate": 2.6946371920155615e-05, "loss": 2.0107, "step": 15934500 }, { "epoch": 46.13, "learning_rate": 2.694564827250834e-05, "loss": 2.0306, "step": 15935000 }, { "epoch": 46.13, "learning_rate": 2.6944924624861063e-05, "loss": 2.0149, "step": 15935500 }, { "epoch": 46.13, "learning_rate": 2.6944200977213785e-05, "loss": 2.0089, "step": 15936000 }, { "epoch": 46.13, "learning_rate": 2.6943478776861804e-05, "loss": 2.0092, "step": 15936500 }, { "epoch": 46.13, "learning_rate": 2.694275657650982e-05, "loss": 2.03, "step": 15937000 }, { "epoch": 46.13, "learning_rate": 2.6942032928862542e-05, "loss": 1.9878, "step": 15937500 }, { "epoch": 46.13, "learning_rate": 2.6941309281215264e-05, "loss": 2.003, "step": 15938000 }, { "epoch": 46.14, "learning_rate": 2.6940587080863283e-05, "loss": 2.0201, "step": 15938500 }, { "epoch": 46.14, "learning_rate": 2.6939863433216005e-05, "loss": 2.0296, "step": 15939000 }, { "epoch": 46.14, "learning_rate": 2.6939139785568727e-05, "loss": 2.0137, "step": 15939500 }, { "epoch": 46.14, "learning_rate": 2.6938416137921453e-05, "loss": 2.0063, "step": 15940000 }, { "epoch": 46.14, "learning_rate": 2.6937692490274175e-05, "loss": 2.0145, "step": 15940500 }, { "epoch": 46.14, "learning_rate": 2.6936968842626904e-05, "loss": 2.0143, "step": 15941000 }, { "epoch": 46.14, "learning_rate": 2.6936245194979627e-05, "loss": 1.9999, "step": 15941500 }, { "epoch": 46.15, "learning_rate": 2.693552154733235e-05, "loss": 2.0374, "step": 15942000 }, { "epoch": 46.15, "learning_rate": 2.693479789968507e-05, "loss": 2.0146, "step": 15942500 }, { "epoch": 46.15, "learning_rate": 2.6934074252037793e-05, "loss": 2.0187, "step": 15943000 }, { "epoch": 46.15, "learning_rate": 2.6933350604390516e-05, "loss": 2.0102, "step": 15943500 }, { "epoch": 46.15, "learning_rate": 2.693262695674324e-05, "loss": 1.9936, "step": 15944000 }, { "epoch": 46.15, "learning_rate": 2.6931903309095963e-05, "loss": 2.0111, "step": 15944500 }, { "epoch": 46.15, "learning_rate": 2.6931179661448686e-05, "loss": 2.0024, "step": 15945000 }, { "epoch": 46.16, "learning_rate": 2.6930457461096705e-05, "loss": 2.021, "step": 15945500 }, { "epoch": 46.16, "learning_rate": 2.6929733813449427e-05, "loss": 2.0228, "step": 15946000 }, { "epoch": 46.16, "learning_rate": 2.6929011613097442e-05, "loss": 2.0188, "step": 15946500 }, { "epoch": 46.16, "learning_rate": 2.6928287965450168e-05, "loss": 2.0219, "step": 15947000 }, { "epoch": 46.16, "learning_rate": 2.692756431780289e-05, "loss": 2.005, "step": 15947500 }, { "epoch": 46.16, "learning_rate": 2.6926840670155612e-05, "loss": 2.0074, "step": 15948000 }, { "epoch": 46.16, "learning_rate": 2.6926118469803628e-05, "loss": 2.0188, "step": 15948500 }, { "epoch": 46.17, "learning_rate": 2.6925394822156357e-05, "loss": 2.0114, "step": 15949000 }, { "epoch": 46.17, "learning_rate": 2.692467117450908e-05, "loss": 2.006, "step": 15949500 }, { "epoch": 46.17, "learning_rate": 2.6923947526861805e-05, "loss": 1.9915, "step": 15950000 }, { "epoch": 46.17, "learning_rate": 2.6923223879214527e-05, "loss": 2.0103, "step": 15950500 }, { "epoch": 46.17, "learning_rate": 2.6922501678862543e-05, "loss": 1.9995, "step": 15951000 }, { "epoch": 46.17, "learning_rate": 2.6921778031215268e-05, "loss": 1.9913, "step": 15951500 }, { "epoch": 46.17, "learning_rate": 2.692105438356799e-05, "loss": 2.0216, "step": 15952000 }, { "epoch": 46.18, "learning_rate": 2.6920330735920713e-05, "loss": 1.9647, "step": 15952500 }, { "epoch": 46.18, "learning_rate": 2.6919607088273435e-05, "loss": 2.0086, "step": 15953000 }, { "epoch": 46.18, "learning_rate": 2.6918883440626157e-05, "loss": 2.0057, "step": 15953500 }, { "epoch": 46.18, "learning_rate": 2.691815979297888e-05, "loss": 2.0096, "step": 15954000 }, { "epoch": 46.18, "learning_rate": 2.6917436145331605e-05, "loss": 2.0168, "step": 15954500 }, { "epoch": 46.18, "learning_rate": 2.6916712497684327e-05, "loss": 2.0318, "step": 15955000 }, { "epoch": 46.18, "learning_rate": 2.691598885003705e-05, "loss": 2.0257, "step": 15955500 }, { "epoch": 46.19, "learning_rate": 2.691526520238978e-05, "loss": 2.0384, "step": 15956000 }, { "epoch": 46.19, "learning_rate": 2.69145415547425e-05, "loss": 2.0052, "step": 15956500 }, { "epoch": 46.19, "learning_rate": 2.6913817907095223e-05, "loss": 2.005, "step": 15957000 }, { "epoch": 46.19, "learning_rate": 2.6913097154038535e-05, "loss": 1.9957, "step": 15957500 }, { "epoch": 46.19, "learning_rate": 2.6912374953686554e-05, "loss": 2.0122, "step": 15958000 }, { "epoch": 46.19, "learning_rate": 2.6911651306039276e-05, "loss": 2.0174, "step": 15958500 }, { "epoch": 46.19, "learning_rate": 2.6910927658392e-05, "loss": 1.9995, "step": 15959000 }, { "epoch": 46.2, "learning_rate": 2.691020401074472e-05, "loss": 2.0227, "step": 15959500 }, { "epoch": 46.2, "learning_rate": 2.6909480363097443e-05, "loss": 2.007, "step": 15960000 }, { "epoch": 46.2, "learning_rate": 2.690875671545017e-05, "loss": 2.0094, "step": 15960500 }, { "epoch": 46.2, "learning_rate": 2.690803306780289e-05, "loss": 2.0037, "step": 15961000 }, { "epoch": 46.2, "learning_rate": 2.6907309420155613e-05, "loss": 2.0274, "step": 15961500 }, { "epoch": 46.2, "learning_rate": 2.6906585772508335e-05, "loss": 2.0305, "step": 15962000 }, { "epoch": 46.2, "learning_rate": 2.6905863572156354e-05, "loss": 2.0074, "step": 15962500 }, { "epoch": 46.21, "learning_rate": 2.690514137180437e-05, "loss": 2.0111, "step": 15963000 }, { "epoch": 46.21, "learning_rate": 2.6904417724157092e-05, "loss": 2.0154, "step": 15963500 }, { "epoch": 46.21, "learning_rate": 2.690369407650982e-05, "loss": 2.0065, "step": 15964000 }, { "epoch": 46.21, "learning_rate": 2.6902970428862543e-05, "loss": 2.0041, "step": 15964500 }, { "epoch": 46.21, "learning_rate": 2.690224678121527e-05, "loss": 2.0229, "step": 15965000 }, { "epoch": 46.21, "learning_rate": 2.6901524580863285e-05, "loss": 2.0535, "step": 15965500 }, { "epoch": 46.22, "learning_rate": 2.6900800933216007e-05, "loss": 2.0124, "step": 15966000 }, { "epoch": 46.22, "learning_rate": 2.6900077285568732e-05, "loss": 2.0179, "step": 15966500 }, { "epoch": 46.22, "learning_rate": 2.6899353637921455e-05, "loss": 2.0049, "step": 15967000 }, { "epoch": 46.22, "learning_rate": 2.6898629990274177e-05, "loss": 2.0434, "step": 15967500 }, { "epoch": 46.22, "learning_rate": 2.6897907789922196e-05, "loss": 2.0348, "step": 15968000 }, { "epoch": 46.22, "learning_rate": 2.6897184142274918e-05, "loss": 2.0111, "step": 15968500 }, { "epoch": 46.22, "learning_rate": 2.6896461941922934e-05, "loss": 2.0153, "step": 15969000 }, { "epoch": 46.23, "learning_rate": 2.6895738294275656e-05, "loss": 2.0025, "step": 15969500 }, { "epoch": 46.23, "learning_rate": 2.689501464662838e-05, "loss": 2.0005, "step": 15970000 }, { "epoch": 46.23, "learning_rate": 2.6894290998981104e-05, "loss": 1.997, "step": 15970500 }, { "epoch": 46.23, "learning_rate": 2.6893567351333826e-05, "loss": 2.0179, "step": 15971000 }, { "epoch": 46.23, "learning_rate": 2.6892843703686555e-05, "loss": 2.0, "step": 15971500 }, { "epoch": 46.23, "learning_rate": 2.6892120056039277e-05, "loss": 2.0334, "step": 15972000 }, { "epoch": 46.23, "learning_rate": 2.6891396408392e-05, "loss": 2.0032, "step": 15972500 }, { "epoch": 46.24, "learning_rate": 2.6890672760744722e-05, "loss": 2.0126, "step": 15973000 }, { "epoch": 46.24, "learning_rate": 2.6889949113097447e-05, "loss": 2.0198, "step": 15973500 }, { "epoch": 46.24, "learning_rate": 2.688922546545017e-05, "loss": 2.0013, "step": 15974000 }, { "epoch": 46.24, "learning_rate": 2.6888501817802892e-05, "loss": 2.0048, "step": 15974500 }, { "epoch": 46.24, "learning_rate": 2.6887781064746204e-05, "loss": 2.0135, "step": 15975000 }, { "epoch": 46.24, "learning_rate": 2.6887057417098926e-05, "loss": 2.0265, "step": 15975500 }, { "epoch": 46.24, "learning_rate": 2.688633376945165e-05, "loss": 2.0002, "step": 15976000 }, { "epoch": 46.25, "learning_rate": 2.688561012180437e-05, "loss": 2.0234, "step": 15976500 }, { "epoch": 46.25, "learning_rate": 2.688488792145239e-05, "loss": 2.0172, "step": 15977000 }, { "epoch": 46.25, "learning_rate": 2.6884164273805112e-05, "loss": 2.0148, "step": 15977500 }, { "epoch": 46.25, "learning_rate": 2.6883440626157834e-05, "loss": 2.0063, "step": 15978000 }, { "epoch": 46.25, "learning_rate": 2.6882718425805853e-05, "loss": 2.0123, "step": 15978500 }, { "epoch": 46.25, "learning_rate": 2.6881994778158582e-05, "loss": 2.0326, "step": 15979000 }, { "epoch": 46.25, "learning_rate": 2.6881271130511304e-05, "loss": 1.9967, "step": 15979500 }, { "epoch": 46.26, "learning_rate": 2.6880547482864027e-05, "loss": 2.0239, "step": 15980000 }, { "epoch": 46.26, "learning_rate": 2.687982383521675e-05, "loss": 2.0271, "step": 15980500 }, { "epoch": 46.26, "learning_rate": 2.687910018756947e-05, "loss": 2.0217, "step": 15981000 }, { "epoch": 46.26, "learning_rate": 2.6878376539922197e-05, "loss": 2.015, "step": 15981500 }, { "epoch": 46.26, "learning_rate": 2.687765289227492e-05, "loss": 2.0208, "step": 15982000 }, { "epoch": 46.26, "learning_rate": 2.687692924462764e-05, "loss": 2.0178, "step": 15982500 }, { "epoch": 46.26, "learning_rate": 2.6876205596980363e-05, "loss": 1.9887, "step": 15983000 }, { "epoch": 46.27, "learning_rate": 2.6875481949333086e-05, "loss": 2.0085, "step": 15983500 }, { "epoch": 46.27, "learning_rate": 2.687475830168581e-05, "loss": 2.0137, "step": 15984000 }, { "epoch": 46.27, "learning_rate": 2.6874034654038533e-05, "loss": 2.0159, "step": 15984500 }, { "epoch": 46.27, "learning_rate": 2.6873311006391256e-05, "loss": 2.0105, "step": 15985000 }, { "epoch": 46.27, "learning_rate": 2.6872587358743978e-05, "loss": 2.0174, "step": 15985500 }, { "epoch": 46.27, "learning_rate": 2.6871865158391997e-05, "loss": 2.0261, "step": 15986000 }, { "epoch": 46.27, "learning_rate": 2.6871141510744723e-05, "loss": 2.0242, "step": 15986500 }, { "epoch": 46.28, "learning_rate": 2.687041931039274e-05, "loss": 2.0059, "step": 15987000 }, { "epoch": 46.28, "learning_rate": 2.6869695662745464e-05, "loss": 2.0145, "step": 15987500 }, { "epoch": 46.28, "learning_rate": 2.6868972015098186e-05, "loss": 2.0448, "step": 15988000 }, { "epoch": 46.28, "learning_rate": 2.686824836745091e-05, "loss": 2.0079, "step": 15988500 }, { "epoch": 46.28, "learning_rate": 2.6867524719803634e-05, "loss": 2.0397, "step": 15989000 }, { "epoch": 46.28, "learning_rate": 2.686680251945165e-05, "loss": 2.0275, "step": 15989500 }, { "epoch": 46.28, "learning_rate": 2.686607887180437e-05, "loss": 2.0108, "step": 15990000 }, { "epoch": 46.29, "learning_rate": 2.6865355224157097e-05, "loss": 2.0304, "step": 15990500 }, { "epoch": 46.29, "learning_rate": 2.686463157650982e-05, "loss": 2.0043, "step": 15991000 }, { "epoch": 46.29, "learning_rate": 2.6863909376157835e-05, "loss": 1.9993, "step": 15991500 }, { "epoch": 46.29, "learning_rate": 2.686318572851056e-05, "loss": 1.9863, "step": 15992000 }, { "epoch": 46.29, "learning_rate": 2.6862462080863283e-05, "loss": 2.0138, "step": 15992500 }, { "epoch": 46.29, "learning_rate": 2.6861738433216005e-05, "loss": 1.989, "step": 15993000 }, { "epoch": 46.29, "learning_rate": 2.6861014785568727e-05, "loss": 2.0312, "step": 15993500 }, { "epoch": 46.3, "learning_rate": 2.6860291137921456e-05, "loss": 1.9926, "step": 15994000 }, { "epoch": 46.3, "learning_rate": 2.685956749027418e-05, "loss": 2.0391, "step": 15994500 }, { "epoch": 46.3, "learning_rate": 2.68588438426269e-05, "loss": 2.0125, "step": 15995000 }, { "epoch": 46.3, "learning_rate": 2.6858120194979623e-05, "loss": 2.0152, "step": 15995500 }, { "epoch": 46.3, "learning_rate": 2.685739654733235e-05, "loss": 2.0086, "step": 15996000 }, { "epoch": 46.3, "learning_rate": 2.6856674346980364e-05, "loss": 1.9981, "step": 15996500 }, { "epoch": 46.3, "learning_rate": 2.6855952146628383e-05, "loss": 2.0041, "step": 15997000 }, { "epoch": 46.31, "learning_rate": 2.6855228498981105e-05, "loss": 2.024, "step": 15997500 }, { "epoch": 46.31, "learning_rate": 2.6854504851333828e-05, "loss": 1.9922, "step": 15998000 }, { "epoch": 46.31, "learning_rate": 2.685378120368655e-05, "loss": 2.0332, "step": 15998500 }, { "epoch": 46.31, "learning_rate": 2.685305900333457e-05, "loss": 2.006, "step": 15999000 }, { "epoch": 46.31, "learning_rate": 2.685233535568729e-05, "loss": 1.9998, "step": 15999500 }, { "epoch": 46.31, "learning_rate": 2.6851611708040013e-05, "loss": 2.0511, "step": 16000000 }, { "epoch": 46.31, "learning_rate": 2.6850888060392735e-05, "loss": 2.0407, "step": 16000500 }, { "epoch": 46.32, "learning_rate": 2.685016441274546e-05, "loss": 2.0295, "step": 16001000 }, { "epoch": 46.32, "learning_rate": 2.6849440765098187e-05, "loss": 2.0244, "step": 16001500 }, { "epoch": 46.32, "learning_rate": 2.6848717117450912e-05, "loss": 2.01, "step": 16002000 }, { "epoch": 46.32, "learning_rate": 2.6847993469803635e-05, "loss": 2.0331, "step": 16002500 }, { "epoch": 46.32, "learning_rate": 2.6847269822156357e-05, "loss": 1.9945, "step": 16003000 }, { "epoch": 46.32, "learning_rate": 2.684654617450908e-05, "loss": 2.0342, "step": 16003500 }, { "epoch": 46.33, "learning_rate": 2.68458225268618e-05, "loss": 2.0183, "step": 16004000 }, { "epoch": 46.33, "learning_rate": 2.684510032650982e-05, "loss": 2.0128, "step": 16004500 }, { "epoch": 46.33, "learning_rate": 2.684437812615784e-05, "loss": 2.0004, "step": 16005000 }, { "epoch": 46.33, "learning_rate": 2.684365447851056e-05, "loss": 2.0425, "step": 16005500 }, { "epoch": 46.33, "learning_rate": 2.6842930830863284e-05, "loss": 2.0194, "step": 16006000 }, { "epoch": 46.33, "learning_rate": 2.6842207183216006e-05, "loss": 2.0424, "step": 16006500 }, { "epoch": 46.33, "learning_rate": 2.6841483535568728e-05, "loss": 2.0077, "step": 16007000 }, { "epoch": 46.34, "learning_rate": 2.684075988792145e-05, "loss": 2.0282, "step": 16007500 }, { "epoch": 46.34, "learning_rate": 2.6840036240274176e-05, "loss": 2.0077, "step": 16008000 }, { "epoch": 46.34, "learning_rate": 2.68393125926269e-05, "loss": 2.0077, "step": 16008500 }, { "epoch": 46.34, "learning_rate": 2.6838588944979627e-05, "loss": 2.0142, "step": 16009000 }, { "epoch": 46.34, "learning_rate": 2.683786529733235e-05, "loss": 2.0272, "step": 16009500 }, { "epoch": 46.34, "learning_rate": 2.683714164968507e-05, "loss": 2.0041, "step": 16010000 }, { "epoch": 46.34, "learning_rate": 2.6836418002037794e-05, "loss": 1.9908, "step": 16010500 }, { "epoch": 46.35, "learning_rate": 2.6835695801685813e-05, "loss": 2.0176, "step": 16011000 }, { "epoch": 46.35, "learning_rate": 2.6834972154038535e-05, "loss": 2.0233, "step": 16011500 }, { "epoch": 46.35, "learning_rate": 2.6834248506391257e-05, "loss": 2.0154, "step": 16012000 }, { "epoch": 46.35, "learning_rate": 2.683352485874398e-05, "loss": 2.023, "step": 16012500 }, { "epoch": 46.35, "learning_rate": 2.6832801211096702e-05, "loss": 2.0283, "step": 16013000 }, { "epoch": 46.35, "learning_rate": 2.6832077563449427e-05, "loss": 2.0041, "step": 16013500 }, { "epoch": 46.35, "learning_rate": 2.683135391580215e-05, "loss": 1.9918, "step": 16014000 }, { "epoch": 46.36, "learning_rate": 2.6830630268154872e-05, "loss": 2.0165, "step": 16014500 }, { "epoch": 46.36, "learning_rate": 2.6829906620507594e-05, "loss": 2.0054, "step": 16015000 }, { "epoch": 46.36, "learning_rate": 2.6829182972860316e-05, "loss": 2.0215, "step": 16015500 }, { "epoch": 46.36, "learning_rate": 2.6828459325213045e-05, "loss": 2.0009, "step": 16016000 }, { "epoch": 46.36, "learning_rate": 2.6827735677565768e-05, "loss": 2.0304, "step": 16016500 }, { "epoch": 46.36, "learning_rate": 2.682701202991849e-05, "loss": 2.023, "step": 16017000 }, { "epoch": 46.36, "learning_rate": 2.6826288382271215e-05, "loss": 2.022, "step": 16017500 }, { "epoch": 46.37, "learning_rate": 2.682556618191923e-05, "loss": 2.0334, "step": 16018000 }, { "epoch": 46.37, "learning_rate": 2.6824842534271953e-05, "loss": 2.0024, "step": 16018500 }, { "epoch": 46.37, "learning_rate": 2.682411888662468e-05, "loss": 2.0219, "step": 16019000 }, { "epoch": 46.37, "learning_rate": 2.68233952389774e-05, "loss": 2.0204, "step": 16019500 }, { "epoch": 46.37, "learning_rate": 2.6822671591330123e-05, "loss": 1.9871, "step": 16020000 }, { "epoch": 46.37, "learning_rate": 2.6821947943682846e-05, "loss": 2.0036, "step": 16020500 }, { "epoch": 46.37, "learning_rate": 2.6821224296035568e-05, "loss": 2.0164, "step": 16021000 }, { "epoch": 46.38, "learning_rate": 2.682050064838829e-05, "loss": 2.0123, "step": 16021500 }, { "epoch": 46.38, "learning_rate": 2.6819777000741016e-05, "loss": 2.0025, "step": 16022000 }, { "epoch": 46.38, "learning_rate": 2.6819053353093738e-05, "loss": 2.0383, "step": 16022500 }, { "epoch": 46.38, "learning_rate": 2.6818329705446467e-05, "loss": 2.0121, "step": 16023000 }, { "epoch": 46.38, "learning_rate": 2.681760605779919e-05, "loss": 2.0066, "step": 16023500 }, { "epoch": 46.38, "learning_rate": 2.681688241015191e-05, "loss": 2.0085, "step": 16024000 }, { "epoch": 46.38, "learning_rate": 2.6816158762504634e-05, "loss": 2.0258, "step": 16024500 }, { "epoch": 46.39, "learning_rate": 2.6815436562152653e-05, "loss": 1.9954, "step": 16025000 }, { "epoch": 46.39, "learning_rate": 2.6814712914505375e-05, "loss": 1.9894, "step": 16025500 }, { "epoch": 46.39, "learning_rate": 2.681399071415339e-05, "loss": 2.0518, "step": 16026000 }, { "epoch": 46.39, "learning_rate": 2.6813267066506116e-05, "loss": 1.9916, "step": 16026500 }, { "epoch": 46.39, "learning_rate": 2.6812543418858838e-05, "loss": 2.0222, "step": 16027000 }, { "epoch": 46.39, "learning_rate": 2.681181977121156e-05, "loss": 2.0279, "step": 16027500 }, { "epoch": 46.39, "learning_rate": 2.681109757085958e-05, "loss": 2.0129, "step": 16028000 }, { "epoch": 46.4, "learning_rate": 2.6810375370507595e-05, "loss": 2.0234, "step": 16028500 }, { "epoch": 46.4, "learning_rate": 2.6809651722860317e-05, "loss": 2.0283, "step": 16029000 }, { "epoch": 46.4, "learning_rate": 2.6808928075213043e-05, "loss": 2.0239, "step": 16029500 }, { "epoch": 46.4, "learning_rate": 2.6808204427565765e-05, "loss": 2.0145, "step": 16030000 }, { "epoch": 46.4, "learning_rate": 2.6807480779918494e-05, "loss": 2.0193, "step": 16030500 }, { "epoch": 46.4, "learning_rate": 2.6806757132271216e-05, "loss": 2.0226, "step": 16031000 }, { "epoch": 46.4, "learning_rate": 2.680603348462394e-05, "loss": 2.0186, "step": 16031500 }, { "epoch": 46.41, "learning_rate": 2.6805311284271954e-05, "loss": 2.0179, "step": 16032000 }, { "epoch": 46.41, "learning_rate": 2.680458763662468e-05, "loss": 2.0103, "step": 16032500 }, { "epoch": 46.41, "learning_rate": 2.6803865436272695e-05, "loss": 2.0116, "step": 16033000 }, { "epoch": 46.41, "learning_rate": 2.6803143235920714e-05, "loss": 2.024, "step": 16033500 }, { "epoch": 46.41, "learning_rate": 2.6802419588273436e-05, "loss": 2.0299, "step": 16034000 }, { "epoch": 46.41, "learning_rate": 2.6801697387921455e-05, "loss": 2.0438, "step": 16034500 }, { "epoch": 46.41, "learning_rate": 2.6800973740274178e-05, "loss": 2.027, "step": 16035000 }, { "epoch": 46.42, "learning_rate": 2.68002500926269e-05, "loss": 2.0263, "step": 16035500 }, { "epoch": 46.42, "learning_rate": 2.6799526444979622e-05, "loss": 2.0323, "step": 16036000 }, { "epoch": 46.42, "learning_rate": 2.6798802797332344e-05, "loss": 1.9995, "step": 16036500 }, { "epoch": 46.42, "learning_rate": 2.6798079149685066e-05, "loss": 2.0279, "step": 16037000 }, { "epoch": 46.42, "learning_rate": 2.6797355502037792e-05, "loss": 1.979, "step": 16037500 }, { "epoch": 46.42, "learning_rate": 2.6796631854390514e-05, "loss": 2.0211, "step": 16038000 }, { "epoch": 46.42, "learning_rate": 2.6795908206743243e-05, "loss": 2.0112, "step": 16038500 }, { "epoch": 46.43, "learning_rate": 2.679518600639126e-05, "loss": 2.0282, "step": 16039000 }, { "epoch": 46.43, "learning_rate": 2.679446235874398e-05, "loss": 2.0115, "step": 16039500 }, { "epoch": 46.43, "learning_rate": 2.6793738711096707e-05, "loss": 2.0398, "step": 16040000 }, { "epoch": 46.43, "learning_rate": 2.679301506344943e-05, "loss": 2.0148, "step": 16040500 }, { "epoch": 46.43, "learning_rate": 2.679229141580215e-05, "loss": 1.9981, "step": 16041000 }, { "epoch": 46.43, "learning_rate": 2.6791567768154873e-05, "loss": 2.0014, "step": 16041500 }, { "epoch": 46.44, "learning_rate": 2.6790844120507596e-05, "loss": 2.0178, "step": 16042000 }, { "epoch": 46.44, "learning_rate": 2.6790120472860318e-05, "loss": 2.0154, "step": 16042500 }, { "epoch": 46.44, "learning_rate": 2.6789396825213044e-05, "loss": 2.0105, "step": 16043000 }, { "epoch": 46.44, "learning_rate": 2.6788673177565766e-05, "loss": 2.0089, "step": 16043500 }, { "epoch": 46.44, "learning_rate": 2.6787949529918488e-05, "loss": 2.0169, "step": 16044000 }, { "epoch": 46.44, "learning_rate": 2.678722588227121e-05, "loss": 2.0115, "step": 16044500 }, { "epoch": 46.44, "learning_rate": 2.6786502234623932e-05, "loss": 2.0155, "step": 16045000 }, { "epoch": 46.45, "learning_rate": 2.678577858697666e-05, "loss": 2.0263, "step": 16045500 }, { "epoch": 46.45, "learning_rate": 2.6785054939329384e-05, "loss": 2.0096, "step": 16046000 }, { "epoch": 46.45, "learning_rate": 2.678433129168211e-05, "loss": 2.0232, "step": 16046500 }, { "epoch": 46.45, "learning_rate": 2.678360764403483e-05, "loss": 2.0139, "step": 16047000 }, { "epoch": 46.45, "learning_rate": 2.6782885443682847e-05, "loss": 2.0018, "step": 16047500 }, { "epoch": 46.45, "learning_rate": 2.678216179603557e-05, "loss": 2.0288, "step": 16048000 }, { "epoch": 46.45, "learning_rate": 2.6781438148388295e-05, "loss": 2.0217, "step": 16048500 }, { "epoch": 46.46, "learning_rate": 2.678071594803631e-05, "loss": 2.0036, "step": 16049000 }, { "epoch": 46.46, "learning_rate": 2.6779992300389033e-05, "loss": 2.0338, "step": 16049500 }, { "epoch": 46.46, "learning_rate": 2.677926865274176e-05, "loss": 2.0212, "step": 16050000 }, { "epoch": 46.46, "learning_rate": 2.677854500509448e-05, "loss": 2.0053, "step": 16050500 }, { "epoch": 46.46, "learning_rate": 2.6777821357447203e-05, "loss": 2.0179, "step": 16051000 }, { "epoch": 46.46, "learning_rate": 2.6777099157095222e-05, "loss": 2.018, "step": 16051500 }, { "epoch": 46.46, "learning_rate": 2.6776375509447944e-05, "loss": 2.0057, "step": 16052000 }, { "epoch": 46.47, "learning_rate": 2.6775651861800666e-05, "loss": 2.0345, "step": 16052500 }, { "epoch": 46.47, "learning_rate": 2.677493110874398e-05, "loss": 2.0015, "step": 16053000 }, { "epoch": 46.47, "learning_rate": 2.6774207461096708e-05, "loss": 2.0263, "step": 16053500 }, { "epoch": 46.47, "learning_rate": 2.677348381344943e-05, "loss": 2.0269, "step": 16054000 }, { "epoch": 46.47, "learning_rate": 2.6772760165802152e-05, "loss": 2.0121, "step": 16054500 }, { "epoch": 46.47, "learning_rate": 2.6772036518154874e-05, "loss": 2.0224, "step": 16055000 }, { "epoch": 46.47, "learning_rate": 2.6771312870507596e-05, "loss": 2.011, "step": 16055500 }, { "epoch": 46.48, "learning_rate": 2.6770589222860322e-05, "loss": 2.0278, "step": 16056000 }, { "epoch": 46.48, "learning_rate": 2.6769865575213044e-05, "loss": 2.0198, "step": 16056500 }, { "epoch": 46.48, "learning_rate": 2.6769141927565767e-05, "loss": 2.0276, "step": 16057000 }, { "epoch": 46.48, "learning_rate": 2.676841827991849e-05, "loss": 2.0137, "step": 16057500 }, { "epoch": 46.48, "learning_rate": 2.676769463227121e-05, "loss": 2.0012, "step": 16058000 }, { "epoch": 46.48, "learning_rate": 2.6766970984623933e-05, "loss": 2.0175, "step": 16058500 }, { "epoch": 46.48, "learning_rate": 2.6766248784271952e-05, "loss": 1.9908, "step": 16059000 }, { "epoch": 46.49, "learning_rate": 2.676552658391997e-05, "loss": 2.0202, "step": 16059500 }, { "epoch": 46.49, "learning_rate": 2.6764802936272693e-05, "loss": 2.0213, "step": 16060000 }, { "epoch": 46.49, "learning_rate": 2.6764079288625416e-05, "loss": 2.0156, "step": 16060500 }, { "epoch": 46.49, "learning_rate": 2.6763355640978145e-05, "loss": 2.0071, "step": 16061000 }, { "epoch": 46.49, "learning_rate": 2.6762631993330867e-05, "loss": 2.0159, "step": 16061500 }, { "epoch": 46.49, "learning_rate": 2.676190834568359e-05, "loss": 2.0041, "step": 16062000 }, { "epoch": 46.49, "learning_rate": 2.676118469803631e-05, "loss": 2.0298, "step": 16062500 }, { "epoch": 46.5, "learning_rate": 2.6760461050389034e-05, "loss": 2.0211, "step": 16063000 }, { "epoch": 46.5, "learning_rate": 2.6759738850037053e-05, "loss": 2.0162, "step": 16063500 }, { "epoch": 46.5, "learning_rate": 2.6759015202389775e-05, "loss": 2.0047, "step": 16064000 }, { "epoch": 46.5, "learning_rate": 2.6758291554742497e-05, "loss": 2.0116, "step": 16064500 }, { "epoch": 46.5, "learning_rate": 2.6757567907095223e-05, "loss": 1.9965, "step": 16065000 }, { "epoch": 46.5, "learning_rate": 2.6756844259447945e-05, "loss": 2.0051, "step": 16065500 }, { "epoch": 46.5, "learning_rate": 2.6756120611800667e-05, "loss": 2.0377, "step": 16066000 }, { "epoch": 46.51, "learning_rate": 2.675539696415339e-05, "loss": 2.0138, "step": 16066500 }, { "epoch": 46.51, "learning_rate": 2.675467331650611e-05, "loss": 2.025, "step": 16067000 }, { "epoch": 46.51, "learning_rate": 2.6753949668858834e-05, "loss": 2.0229, "step": 16067500 }, { "epoch": 46.51, "learning_rate": 2.6753226021211563e-05, "loss": 2.011, "step": 16068000 }, { "epoch": 46.51, "learning_rate": 2.6752502373564285e-05, "loss": 2.0031, "step": 16068500 }, { "epoch": 46.51, "learning_rate": 2.675177872591701e-05, "loss": 2.0457, "step": 16069000 }, { "epoch": 46.51, "learning_rate": 2.6751055078269733e-05, "loss": 2.0275, "step": 16069500 }, { "epoch": 46.52, "learning_rate": 2.6750331430622455e-05, "loss": 2.0298, "step": 16070000 }, { "epoch": 46.52, "learning_rate": 2.6749607782975177e-05, "loss": 2.0342, "step": 16070500 }, { "epoch": 46.52, "learning_rate": 2.6748885582623196e-05, "loss": 2.0385, "step": 16071000 }, { "epoch": 46.52, "learning_rate": 2.674816193497592e-05, "loss": 2.0278, "step": 16071500 }, { "epoch": 46.52, "learning_rate": 2.674743828732864e-05, "loss": 2.0265, "step": 16072000 }, { "epoch": 46.52, "learning_rate": 2.6746717534271953e-05, "loss": 2.0052, "step": 16072500 }, { "epoch": 46.52, "learning_rate": 2.6745995333919972e-05, "loss": 2.008, "step": 16073000 }, { "epoch": 46.53, "learning_rate": 2.6745271686272694e-05, "loss": 2.0119, "step": 16073500 }, { "epoch": 46.53, "learning_rate": 2.6744548038625416e-05, "loss": 2.0243, "step": 16074000 }, { "epoch": 46.53, "learning_rate": 2.674382439097814e-05, "loss": 2.0259, "step": 16074500 }, { "epoch": 46.53, "learning_rate": 2.674310074333086e-05, "loss": 2.0047, "step": 16075000 }, { "epoch": 46.53, "learning_rate": 2.6742377095683586e-05, "loss": 1.9962, "step": 16075500 }, { "epoch": 46.53, "learning_rate": 2.6741653448036312e-05, "loss": 2.0038, "step": 16076000 }, { "epoch": 46.53, "learning_rate": 2.6740929800389038e-05, "loss": 2.0027, "step": 16076500 }, { "epoch": 46.54, "learning_rate": 2.6740207600037053e-05, "loss": 2.0114, "step": 16077000 }, { "epoch": 46.54, "learning_rate": 2.6739483952389776e-05, "loss": 2.0488, "step": 16077500 }, { "epoch": 46.54, "learning_rate": 2.67387603047425e-05, "loss": 2.0176, "step": 16078000 }, { "epoch": 46.54, "learning_rate": 2.6738036657095223e-05, "loss": 1.9989, "step": 16078500 }, { "epoch": 46.54, "learning_rate": 2.6737313009447946e-05, "loss": 2.0229, "step": 16079000 }, { "epoch": 46.54, "learning_rate": 2.6736589361800668e-05, "loss": 1.9998, "step": 16079500 }, { "epoch": 46.55, "learning_rate": 2.6735867161448687e-05, "loss": 1.9984, "step": 16080000 }, { "epoch": 46.55, "learning_rate": 2.673514351380141e-05, "loss": 2.0102, "step": 16080500 }, { "epoch": 46.55, "learning_rate": 2.673441986615413e-05, "loss": 2.0248, "step": 16081000 }, { "epoch": 46.55, "learning_rate": 2.6733696218506853e-05, "loss": 2.0283, "step": 16081500 }, { "epoch": 46.55, "learning_rate": 2.6732972570859576e-05, "loss": 2.0455, "step": 16082000 }, { "epoch": 46.55, "learning_rate": 2.67322489232123e-05, "loss": 2.0105, "step": 16082500 }, { "epoch": 46.55, "learning_rate": 2.6731525275565027e-05, "loss": 2.0236, "step": 16083000 }, { "epoch": 46.56, "learning_rate": 2.673080162791775e-05, "loss": 2.0298, "step": 16083500 }, { "epoch": 46.56, "learning_rate": 2.6730077980270475e-05, "loss": 2.0033, "step": 16084000 }, { "epoch": 46.56, "learning_rate": 2.6729354332623197e-05, "loss": 2.0202, "step": 16084500 }, { "epoch": 46.56, "learning_rate": 2.672863068497592e-05, "loss": 2.0119, "step": 16085000 }, { "epoch": 46.56, "learning_rate": 2.672790703732864e-05, "loss": 2.0056, "step": 16085500 }, { "epoch": 46.56, "learning_rate": 2.672718483697666e-05, "loss": 2.0057, "step": 16086000 }, { "epoch": 46.56, "learning_rate": 2.6726462636624676e-05, "loss": 2.0129, "step": 16086500 }, { "epoch": 46.57, "learning_rate": 2.67257389889774e-05, "loss": 2.0222, "step": 16087000 }, { "epoch": 46.57, "learning_rate": 2.6725016788625417e-05, "loss": 2.0139, "step": 16087500 }, { "epoch": 46.57, "learning_rate": 2.672429314097814e-05, "loss": 2.0104, "step": 16088000 }, { "epoch": 46.57, "learning_rate": 2.672356949333086e-05, "loss": 2.0298, "step": 16088500 }, { "epoch": 46.57, "learning_rate": 2.6722845845683587e-05, "loss": 2.0233, "step": 16089000 }, { "epoch": 46.57, "learning_rate": 2.672212219803631e-05, "loss": 2.0126, "step": 16089500 }, { "epoch": 46.57, "learning_rate": 2.6721398550389032e-05, "loss": 2.024, "step": 16090000 }, { "epoch": 46.58, "learning_rate": 2.672067635003705e-05, "loss": 2.0376, "step": 16090500 }, { "epoch": 46.58, "learning_rate": 2.6719952702389776e-05, "loss": 2.0003, "step": 16091000 }, { "epoch": 46.58, "learning_rate": 2.6719229054742502e-05, "loss": 2.0412, "step": 16091500 }, { "epoch": 46.58, "learning_rate": 2.6718505407095224e-05, "loss": 2.0183, "step": 16092000 }, { "epoch": 46.58, "learning_rate": 2.6717781759447946e-05, "loss": 2.0097, "step": 16092500 }, { "epoch": 46.58, "learning_rate": 2.671705811180067e-05, "loss": 2.0024, "step": 16093000 }, { "epoch": 46.58, "learning_rate": 2.6716335911448688e-05, "loss": 2.0237, "step": 16093500 }, { "epoch": 46.59, "learning_rate": 2.671561226380141e-05, "loss": 2.0217, "step": 16094000 }, { "epoch": 46.59, "learning_rate": 2.6714888616154132e-05, "loss": 2.0187, "step": 16094500 }, { "epoch": 46.59, "learning_rate": 2.671416641580215e-05, "loss": 2.0303, "step": 16095000 }, { "epoch": 46.59, "learning_rate": 2.6713442768154873e-05, "loss": 2.0096, "step": 16095500 }, { "epoch": 46.59, "learning_rate": 2.6712719120507595e-05, "loss": 2.0278, "step": 16096000 }, { "epoch": 46.59, "learning_rate": 2.6711995472860318e-05, "loss": 2.0357, "step": 16096500 }, { "epoch": 46.59, "learning_rate": 2.671127182521304e-05, "loss": 2.0281, "step": 16097000 }, { "epoch": 46.6, "learning_rate": 2.671054962486106e-05, "loss": 2.0228, "step": 16097500 }, { "epoch": 46.6, "learning_rate": 2.670982597721378e-05, "loss": 2.012, "step": 16098000 }, { "epoch": 46.6, "learning_rate": 2.670910232956651e-05, "loss": 2.0267, "step": 16098500 }, { "epoch": 46.6, "learning_rate": 2.6708378681919232e-05, "loss": 2.027, "step": 16099000 }, { "epoch": 46.6, "learning_rate": 2.6707655034271955e-05, "loss": 1.9977, "step": 16099500 }, { "epoch": 46.6, "learning_rate": 2.6706931386624677e-05, "loss": 2.0142, "step": 16100000 }, { "epoch": 46.6, "learning_rate": 2.6706207738977402e-05, "loss": 2.0088, "step": 16100500 }, { "epoch": 46.61, "learning_rate": 2.6705484091330125e-05, "loss": 2.0238, "step": 16101000 }, { "epoch": 46.61, "learning_rate": 2.6704760443682847e-05, "loss": 2.0004, "step": 16101500 }, { "epoch": 46.61, "learning_rate": 2.670403679603557e-05, "loss": 2.0295, "step": 16102000 }, { "epoch": 46.61, "learning_rate": 2.670331314838829e-05, "loss": 1.9974, "step": 16102500 }, { "epoch": 46.61, "learning_rate": 2.6702589500741017e-05, "loss": 2.0426, "step": 16103000 }, { "epoch": 46.61, "learning_rate": 2.670186585309374e-05, "loss": 1.9883, "step": 16103500 }, { "epoch": 46.61, "learning_rate": 2.670114220544646e-05, "loss": 1.9998, "step": 16104000 }, { "epoch": 46.62, "learning_rate": 2.6700418557799184e-05, "loss": 2.0267, "step": 16104500 }, { "epoch": 46.62, "learning_rate": 2.6699696357447203e-05, "loss": 2.0051, "step": 16105000 }, { "epoch": 46.62, "learning_rate": 2.6698972709799928e-05, "loss": 2.0326, "step": 16105500 }, { "epoch": 46.62, "learning_rate": 2.6698249062152654e-05, "loss": 2.0083, "step": 16106000 }, { "epoch": 46.62, "learning_rate": 2.6697525414505376e-05, "loss": 2.011, "step": 16106500 }, { "epoch": 46.62, "learning_rate": 2.66968017668581e-05, "loss": 2.0041, "step": 16107000 }, { "epoch": 46.62, "learning_rate": 2.6696079566506117e-05, "loss": 2.0098, "step": 16107500 }, { "epoch": 46.63, "learning_rate": 2.669535591885884e-05, "loss": 2.0193, "step": 16108000 }, { "epoch": 46.63, "learning_rate": 2.6694632271211562e-05, "loss": 2.0233, "step": 16108500 }, { "epoch": 46.63, "learning_rate": 2.6693908623564284e-05, "loss": 2.0108, "step": 16109000 }, { "epoch": 46.63, "learning_rate": 2.6693184975917006e-05, "loss": 2.0363, "step": 16109500 }, { "epoch": 46.63, "learning_rate": 2.669246132826973e-05, "loss": 2.0229, "step": 16110000 }, { "epoch": 46.63, "learning_rate": 2.6691737680622454e-05, "loss": 2.0101, "step": 16110500 }, { "epoch": 46.63, "learning_rate": 2.669101548027047e-05, "loss": 2.0221, "step": 16111000 }, { "epoch": 46.64, "learning_rate": 2.6690291832623192e-05, "loss": 2.0343, "step": 16111500 }, { "epoch": 46.64, "learning_rate": 2.6689568184975917e-05, "loss": 2.0007, "step": 16112000 }, { "epoch": 46.64, "learning_rate": 2.668884453732864e-05, "loss": 2.0102, "step": 16112500 }, { "epoch": 46.64, "learning_rate": 2.668812088968137e-05, "loss": 2.0127, "step": 16113000 }, { "epoch": 46.64, "learning_rate": 2.668739724203409e-05, "loss": 2.0235, "step": 16113500 }, { "epoch": 46.64, "learning_rate": 2.6686675041682107e-05, "loss": 1.9965, "step": 16114000 }, { "epoch": 46.64, "learning_rate": 2.6685951394034832e-05, "loss": 2.0218, "step": 16114500 }, { "epoch": 46.65, "learning_rate": 2.6685227746387554e-05, "loss": 2.0315, "step": 16115000 }, { "epoch": 46.65, "learning_rate": 2.6684504098740277e-05, "loss": 2.0364, "step": 16115500 }, { "epoch": 46.65, "learning_rate": 2.6683780451093e-05, "loss": 2.0128, "step": 16116000 }, { "epoch": 46.65, "learning_rate": 2.668305680344572e-05, "loss": 2.0241, "step": 16116500 }, { "epoch": 46.65, "learning_rate": 2.668233460309374e-05, "loss": 2.0025, "step": 16117000 }, { "epoch": 46.65, "learning_rate": 2.6681610955446462e-05, "loss": 2.0173, "step": 16117500 }, { "epoch": 46.66, "learning_rate": 2.668088875509448e-05, "loss": 1.9961, "step": 16118000 }, { "epoch": 46.66, "learning_rate": 2.6680165107447203e-05, "loss": 2.0198, "step": 16118500 }, { "epoch": 46.66, "learning_rate": 2.6679441459799926e-05, "loss": 2.018, "step": 16119000 }, { "epoch": 46.66, "learning_rate": 2.6678717812152648e-05, "loss": 2.0136, "step": 16119500 }, { "epoch": 46.66, "learning_rate": 2.667799416450537e-05, "loss": 2.0081, "step": 16120000 }, { "epoch": 46.66, "learning_rate": 2.66772705168581e-05, "loss": 2.0129, "step": 16120500 }, { "epoch": 46.66, "learning_rate": 2.667654686921082e-05, "loss": 1.999, "step": 16121000 }, { "epoch": 46.67, "learning_rate": 2.6675823221563544e-05, "loss": 2.0203, "step": 16121500 }, { "epoch": 46.67, "learning_rate": 2.6675101021211563e-05, "loss": 2.0109, "step": 16122000 }, { "epoch": 46.67, "learning_rate": 2.6674377373564285e-05, "loss": 2.0431, "step": 16122500 }, { "epoch": 46.67, "learning_rate": 2.6673653725917007e-05, "loss": 2.0023, "step": 16123000 }, { "epoch": 46.67, "learning_rate": 2.6672930078269733e-05, "loss": 2.0003, "step": 16123500 }, { "epoch": 46.67, "learning_rate": 2.6672206430622455e-05, "loss": 2.0329, "step": 16124000 }, { "epoch": 46.67, "learning_rate": 2.6671482782975177e-05, "loss": 2.0103, "step": 16124500 }, { "epoch": 46.68, "learning_rate": 2.66707591353279e-05, "loss": 2.0266, "step": 16125000 }, { "epoch": 46.68, "learning_rate": 2.667003548768062e-05, "loss": 2.0291, "step": 16125500 }, { "epoch": 46.68, "learning_rate": 2.666931328732864e-05, "loss": 1.9999, "step": 16126000 }, { "epoch": 46.68, "learning_rate": 2.6668589639681363e-05, "loss": 2.0435, "step": 16126500 }, { "epoch": 46.68, "learning_rate": 2.6667865992034085e-05, "loss": 2.0041, "step": 16127000 }, { "epoch": 46.68, "learning_rate": 2.6667142344386807e-05, "loss": 2.0314, "step": 16127500 }, { "epoch": 46.68, "learning_rate": 2.6666418696739536e-05, "loss": 2.0175, "step": 16128000 }, { "epoch": 46.69, "learning_rate": 2.666569504909226e-05, "loss": 2.0316, "step": 16128500 }, { "epoch": 46.69, "learning_rate": 2.6664971401444984e-05, "loss": 2.0211, "step": 16129000 }, { "epoch": 46.69, "learning_rate": 2.6664247753797706e-05, "loss": 2.0142, "step": 16129500 }, { "epoch": 46.69, "learning_rate": 2.6663525553445722e-05, "loss": 2.0231, "step": 16130000 }, { "epoch": 46.69, "learning_rate": 2.6662801905798444e-05, "loss": 1.999, "step": 16130500 }, { "epoch": 46.69, "learning_rate": 2.666207825815117e-05, "loss": 2.0161, "step": 16131000 }, { "epoch": 46.69, "learning_rate": 2.6661354610503892e-05, "loss": 2.0431, "step": 16131500 }, { "epoch": 46.7, "learning_rate": 2.6660632410151908e-05, "loss": 2.041, "step": 16132000 }, { "epoch": 46.7, "learning_rate": 2.6659908762504633e-05, "loss": 2.0007, "step": 16132500 }, { "epoch": 46.7, "learning_rate": 2.665918656215265e-05, "loss": 2.0428, "step": 16133000 }, { "epoch": 46.7, "learning_rate": 2.665846291450537e-05, "loss": 2.0174, "step": 16133500 }, { "epoch": 46.7, "learning_rate": 2.6657739266858097e-05, "loss": 2.0265, "step": 16134000 }, { "epoch": 46.7, "learning_rate": 2.665701561921082e-05, "loss": 1.9991, "step": 16134500 }, { "epoch": 46.7, "learning_rate": 2.665629197156354e-05, "loss": 2.0102, "step": 16135000 }, { "epoch": 46.71, "learning_rate": 2.665556832391627e-05, "loss": 2.0031, "step": 16135500 }, { "epoch": 46.71, "learning_rate": 2.6654844676268992e-05, "loss": 2.0305, "step": 16136000 }, { "epoch": 46.71, "learning_rate": 2.6654121028621715e-05, "loss": 2.0038, "step": 16136500 }, { "epoch": 46.71, "learning_rate": 2.6653397380974437e-05, "loss": 2.0287, "step": 16137000 }, { "epoch": 46.71, "learning_rate": 2.6652675180622456e-05, "loss": 2.0168, "step": 16137500 }, { "epoch": 46.71, "learning_rate": 2.6651951532975178e-05, "loss": 2.0342, "step": 16138000 }, { "epoch": 46.71, "learning_rate": 2.66512278853279e-05, "loss": 2.0241, "step": 16138500 }, { "epoch": 46.72, "learning_rate": 2.6650504237680622e-05, "loss": 2.0284, "step": 16139000 }, { "epoch": 46.72, "learning_rate": 2.6649780590033348e-05, "loss": 2.025, "step": 16139500 }, { "epoch": 46.72, "learning_rate": 2.6649058389681364e-05, "loss": 2.0278, "step": 16140000 }, { "epoch": 46.72, "learning_rate": 2.6648336189329383e-05, "loss": 2.0396, "step": 16140500 }, { "epoch": 46.72, "learning_rate": 2.6647612541682105e-05, "loss": 2.0032, "step": 16141000 }, { "epoch": 46.72, "learning_rate": 2.6646888894034827e-05, "loss": 2.0042, "step": 16141500 }, { "epoch": 46.72, "learning_rate": 2.664616524638755e-05, "loss": 2.0307, "step": 16142000 }, { "epoch": 46.73, "learning_rate": 2.6645443046035568e-05, "loss": 2.0051, "step": 16142500 }, { "epoch": 46.73, "learning_rate": 2.6644720845683584e-05, "loss": 2.0013, "step": 16143000 }, { "epoch": 46.73, "learning_rate": 2.6643997198036313e-05, "loss": 2.0118, "step": 16143500 }, { "epoch": 46.73, "learning_rate": 2.6643273550389035e-05, "loss": 2.0153, "step": 16144000 }, { "epoch": 46.73, "learning_rate": 2.664254990274176e-05, "loss": 2.0094, "step": 16144500 }, { "epoch": 46.73, "learning_rate": 2.6641826255094483e-05, "loss": 2.01, "step": 16145000 }, { "epoch": 46.73, "learning_rate": 2.6641102607447205e-05, "loss": 2.0137, "step": 16145500 }, { "epoch": 46.74, "learning_rate": 2.6640378959799927e-05, "loss": 2.02, "step": 16146000 }, { "epoch": 46.74, "learning_rate": 2.663965531215265e-05, "loss": 2.0233, "step": 16146500 }, { "epoch": 46.74, "learning_rate": 2.6638931664505372e-05, "loss": 2.0197, "step": 16147000 }, { "epoch": 46.74, "learning_rate": 2.663820946415339e-05, "loss": 2.0135, "step": 16147500 }, { "epoch": 46.74, "learning_rate": 2.6637485816506113e-05, "loss": 2.0608, "step": 16148000 }, { "epoch": 46.74, "learning_rate": 2.6636762168858835e-05, "loss": 2.0296, "step": 16148500 }, { "epoch": 46.74, "learning_rate": 2.663603852121156e-05, "loss": 2.0118, "step": 16149000 }, { "epoch": 46.75, "learning_rate": 2.6635314873564283e-05, "loss": 2.014, "step": 16149500 }, { "epoch": 46.75, "learning_rate": 2.6634591225917005e-05, "loss": 2.0394, "step": 16150000 }, { "epoch": 46.75, "learning_rate": 2.6633867578269734e-05, "loss": 2.002, "step": 16150500 }, { "epoch": 46.75, "learning_rate": 2.663314537791775e-05, "loss": 1.9866, "step": 16151000 }, { "epoch": 46.75, "learning_rate": 2.6632421730270472e-05, "loss": 2.024, "step": 16151500 }, { "epoch": 46.75, "learning_rate": 2.6631698082623198e-05, "loss": 2.0262, "step": 16152000 }, { "epoch": 46.75, "learning_rate": 2.663097443497592e-05, "loss": 2.0409, "step": 16152500 }, { "epoch": 46.76, "learning_rate": 2.6630252234623935e-05, "loss": 2.0236, "step": 16153000 }, { "epoch": 46.76, "learning_rate": 2.662952858697666e-05, "loss": 2.0089, "step": 16153500 }, { "epoch": 46.76, "learning_rate": 2.6628804939329383e-05, "loss": 2.0156, "step": 16154000 }, { "epoch": 46.76, "learning_rate": 2.6628081291682106e-05, "loss": 2.0134, "step": 16154500 }, { "epoch": 46.76, "learning_rate": 2.6627359091330124e-05, "loss": 2.0546, "step": 16155000 }, { "epoch": 46.76, "learning_rate": 2.6626635443682847e-05, "loss": 2.0335, "step": 16155500 }, { "epoch": 46.77, "learning_rate": 2.662591179603557e-05, "loss": 2.0257, "step": 16156000 }, { "epoch": 46.77, "learning_rate": 2.6625189595683584e-05, "loss": 2.0221, "step": 16156500 }, { "epoch": 46.77, "learning_rate": 2.662446594803631e-05, "loss": 1.9984, "step": 16157000 }, { "epoch": 46.77, "learning_rate": 2.6623742300389032e-05, "loss": 2.0454, "step": 16157500 }, { "epoch": 46.77, "learning_rate": 2.662301865274176e-05, "loss": 2.0263, "step": 16158000 }, { "epoch": 46.77, "learning_rate": 2.6622295005094484e-05, "loss": 2.0292, "step": 16158500 }, { "epoch": 46.77, "learning_rate": 2.6621571357447206e-05, "loss": 2.0551, "step": 16159000 }, { "epoch": 46.78, "learning_rate": 2.6620847709799928e-05, "loss": 2.0193, "step": 16159500 }, { "epoch": 46.78, "learning_rate": 2.662012695674324e-05, "loss": 2.0207, "step": 16160000 }, { "epoch": 46.78, "learning_rate": 2.6619403309095963e-05, "loss": 2.0165, "step": 16160500 }, { "epoch": 46.78, "learning_rate": 2.6618679661448688e-05, "loss": 1.9945, "step": 16161000 }, { "epoch": 46.78, "learning_rate": 2.661795601380141e-05, "loss": 2.0121, "step": 16161500 }, { "epoch": 46.78, "learning_rate": 2.6617232366154133e-05, "loss": 2.0368, "step": 16162000 }, { "epoch": 46.78, "learning_rate": 2.6616508718506855e-05, "loss": 2.0504, "step": 16162500 }, { "epoch": 46.79, "learning_rate": 2.6615785070859577e-05, "loss": 2.0282, "step": 16163000 }, { "epoch": 46.79, "learning_rate": 2.66150614232123e-05, "loss": 2.0324, "step": 16163500 }, { "epoch": 46.79, "learning_rate": 2.6614337775565025e-05, "loss": 2.0044, "step": 16164000 }, { "epoch": 46.79, "learning_rate": 2.6613614127917747e-05, "loss": 2.0123, "step": 16164500 }, { "epoch": 46.79, "learning_rate": 2.661289048027047e-05, "loss": 2.0198, "step": 16165000 }, { "epoch": 46.79, "learning_rate": 2.66121668326232e-05, "loss": 2.0013, "step": 16165500 }, { "epoch": 46.79, "learning_rate": 2.661144318497592e-05, "loss": 2.0412, "step": 16166000 }, { "epoch": 46.8, "learning_rate": 2.661072098462394e-05, "loss": 2.0281, "step": 16166500 }, { "epoch": 46.8, "learning_rate": 2.6609998784271955e-05, "loss": 1.9957, "step": 16167000 }, { "epoch": 46.8, "learning_rate": 2.6609275136624677e-05, "loss": 1.9999, "step": 16167500 }, { "epoch": 46.8, "learning_rate": 2.66085514889774e-05, "loss": 1.9973, "step": 16168000 }, { "epoch": 46.8, "learning_rate": 2.6607827841330125e-05, "loss": 2.0115, "step": 16168500 }, { "epoch": 46.8, "learning_rate": 2.6607104193682848e-05, "loss": 2.0239, "step": 16169000 }, { "epoch": 46.8, "learning_rate": 2.660638054603557e-05, "loss": 2.0118, "step": 16169500 }, { "epoch": 46.81, "learning_rate": 2.6605656898388292e-05, "loss": 2.0099, "step": 16170000 }, { "epoch": 46.81, "learning_rate": 2.6604933250741014e-05, "loss": 2.0065, "step": 16170500 }, { "epoch": 46.81, "learning_rate": 2.660420960309374e-05, "loss": 2.0138, "step": 16171000 }, { "epoch": 46.81, "learning_rate": 2.6603485955446462e-05, "loss": 2.0115, "step": 16171500 }, { "epoch": 46.81, "learning_rate": 2.6602762307799184e-05, "loss": 2.0273, "step": 16172000 }, { "epoch": 46.81, "learning_rate": 2.6602038660151907e-05, "loss": 2.0362, "step": 16172500 }, { "epoch": 46.81, "learning_rate": 2.660131645979993e-05, "loss": 2.0397, "step": 16173000 }, { "epoch": 46.82, "learning_rate": 2.660059281215265e-05, "loss": 2.0096, "step": 16173500 }, { "epoch": 46.82, "learning_rate": 2.6599869164505377e-05, "loss": 2.0123, "step": 16174000 }, { "epoch": 46.82, "learning_rate": 2.65991455168581e-05, "loss": 1.9935, "step": 16174500 }, { "epoch": 46.82, "learning_rate": 2.6598423316506115e-05, "loss": 2.0065, "step": 16175000 }, { "epoch": 46.82, "learning_rate": 2.659769966885884e-05, "loss": 2.0295, "step": 16175500 }, { "epoch": 46.82, "learning_rate": 2.6596977468506856e-05, "loss": 1.9913, "step": 16176000 }, { "epoch": 46.82, "learning_rate": 2.6596253820859578e-05, "loss": 2.0194, "step": 16176500 }, { "epoch": 46.83, "learning_rate": 2.6595530173212304e-05, "loss": 2.0305, "step": 16177000 }, { "epoch": 46.83, "learning_rate": 2.6594806525565026e-05, "loss": 2.0338, "step": 16177500 }, { "epoch": 46.83, "learning_rate": 2.659408432521304e-05, "loss": 2.0305, "step": 16178000 }, { "epoch": 46.83, "learning_rate": 2.6593360677565764e-05, "loss": 2.01, "step": 16178500 }, { "epoch": 46.83, "learning_rate": 2.659263702991849e-05, "loss": 2.0462, "step": 16179000 }, { "epoch": 46.83, "learning_rate": 2.659191338227121e-05, "loss": 1.9937, "step": 16179500 }, { "epoch": 46.83, "learning_rate": 2.6591189734623934e-05, "loss": 2.0395, "step": 16180000 }, { "epoch": 46.84, "learning_rate": 2.6590466086976663e-05, "loss": 2.0129, "step": 16180500 }, { "epoch": 46.84, "learning_rate": 2.6589742439329385e-05, "loss": 2.0166, "step": 16181000 }, { "epoch": 46.84, "learning_rate": 2.6589018791682107e-05, "loss": 2.0012, "step": 16181500 }, { "epoch": 46.84, "learning_rate": 2.658829514403483e-05, "loss": 2.0459, "step": 16182000 }, { "epoch": 46.84, "learning_rate": 2.6587572943682848e-05, "loss": 2.0319, "step": 16182500 }, { "epoch": 46.84, "learning_rate": 2.658684929603557e-05, "loss": 2.0199, "step": 16183000 }, { "epoch": 46.84, "learning_rate": 2.6586125648388293e-05, "loss": 2.0261, "step": 16183500 }, { "epoch": 46.85, "learning_rate": 2.6585403448036312e-05, "loss": 2.0057, "step": 16184000 }, { "epoch": 46.85, "learning_rate": 2.6584679800389034e-05, "loss": 2.0306, "step": 16184500 }, { "epoch": 46.85, "learning_rate": 2.6583956152741756e-05, "loss": 2.0344, "step": 16185000 }, { "epoch": 46.85, "learning_rate": 2.658323250509448e-05, "loss": 2.019, "step": 16185500 }, { "epoch": 46.85, "learning_rate": 2.6582508857447204e-05, "loss": 2.0428, "step": 16186000 }, { "epoch": 46.85, "learning_rate": 2.6581785209799926e-05, "loss": 2.0176, "step": 16186500 }, { "epoch": 46.85, "learning_rate": 2.658106156215265e-05, "loss": 2.0474, "step": 16187000 }, { "epoch": 46.86, "learning_rate": 2.658033791450537e-05, "loss": 2.0284, "step": 16187500 }, { "epoch": 46.86, "learning_rate": 2.65796142668581e-05, "loss": 2.0192, "step": 16188000 }, { "epoch": 46.86, "learning_rate": 2.6578890619210822e-05, "loss": 2.023, "step": 16188500 }, { "epoch": 46.86, "learning_rate": 2.6578166971563544e-05, "loss": 2.0093, "step": 16189000 }, { "epoch": 46.86, "learning_rate": 2.6577443323916266e-05, "loss": 2.0157, "step": 16189500 }, { "epoch": 46.86, "learning_rate": 2.6576719676268992e-05, "loss": 1.9902, "step": 16190000 }, { "epoch": 46.86, "learning_rate": 2.6575998923212304e-05, "loss": 2.0011, "step": 16190500 }, { "epoch": 46.87, "learning_rate": 2.6575275275565027e-05, "loss": 2.01, "step": 16191000 }, { "epoch": 46.87, "learning_rate": 2.657455162791775e-05, "loss": 2.0154, "step": 16191500 }, { "epoch": 46.87, "learning_rate": 2.657382798027047e-05, "loss": 2.0055, "step": 16192000 }, { "epoch": 46.87, "learning_rate": 2.6573104332623193e-05, "loss": 2.0125, "step": 16192500 }, { "epoch": 46.87, "learning_rate": 2.6572382132271212e-05, "loss": 2.0261, "step": 16193000 }, { "epoch": 46.87, "learning_rate": 2.6571658484623934e-05, "loss": 1.9999, "step": 16193500 }, { "epoch": 46.88, "learning_rate": 2.6570934836976657e-05, "loss": 2.0306, "step": 16194000 }, { "epoch": 46.88, "learning_rate": 2.6570212636624676e-05, "loss": 2.0389, "step": 16194500 }, { "epoch": 46.88, "learning_rate": 2.6569488988977398e-05, "loss": 2.022, "step": 16195000 }, { "epoch": 46.88, "learning_rate": 2.6568765341330127e-05, "loss": 1.9915, "step": 16195500 }, { "epoch": 46.88, "learning_rate": 2.656804169368285e-05, "loss": 2.0139, "step": 16196000 }, { "epoch": 46.88, "learning_rate": 2.656731804603557e-05, "loss": 2.013, "step": 16196500 }, { "epoch": 46.88, "learning_rate": 2.6566594398388294e-05, "loss": 2.0115, "step": 16197000 }, { "epoch": 46.89, "learning_rate": 2.6565872198036313e-05, "loss": 1.9834, "step": 16197500 }, { "epoch": 46.89, "learning_rate": 2.6565148550389035e-05, "loss": 2.0116, "step": 16198000 }, { "epoch": 46.89, "learning_rate": 2.6564424902741757e-05, "loss": 2.0067, "step": 16198500 }, { "epoch": 46.89, "learning_rate": 2.656370125509448e-05, "loss": 2.0466, "step": 16199000 }, { "epoch": 46.89, "learning_rate": 2.6562979054742498e-05, "loss": 2.0388, "step": 16199500 }, { "epoch": 46.89, "learning_rate": 2.656225540709522e-05, "loss": 2.0394, "step": 16200000 }, { "epoch": 46.89, "learning_rate": 2.6561531759447943e-05, "loss": 2.0123, "step": 16200500 }, { "epoch": 46.9, "learning_rate": 2.6560808111800668e-05, "loss": 2.0032, "step": 16201000 }, { "epoch": 46.9, "learning_rate": 2.656008446415339e-05, "loss": 2.0047, "step": 16201500 }, { "epoch": 46.9, "learning_rate": 2.6559360816506113e-05, "loss": 2.0289, "step": 16202000 }, { "epoch": 46.9, "learning_rate": 2.6558637168858835e-05, "loss": 2.0293, "step": 16202500 }, { "epoch": 46.9, "learning_rate": 2.6557913521211564e-05, "loss": 2.0212, "step": 16203000 }, { "epoch": 46.9, "learning_rate": 2.6557189873564286e-05, "loss": 2.0385, "step": 16203500 }, { "epoch": 46.9, "learning_rate": 2.655646622591701e-05, "loss": 2.0195, "step": 16204000 }, { "epoch": 46.91, "learning_rate": 2.655574257826973e-05, "loss": 2.0249, "step": 16204500 }, { "epoch": 46.91, "learning_rate": 2.6555018930622456e-05, "loss": 2.019, "step": 16205000 }, { "epoch": 46.91, "learning_rate": 2.655429528297518e-05, "loss": 2.0348, "step": 16205500 }, { "epoch": 46.91, "learning_rate": 2.65535716353279e-05, "loss": 2.0277, "step": 16206000 }, { "epoch": 46.91, "learning_rate": 2.655284943497592e-05, "loss": 2.0257, "step": 16206500 }, { "epoch": 46.91, "learning_rate": 2.6552125787328642e-05, "loss": 2.0328, "step": 16207000 }, { "epoch": 46.91, "learning_rate": 2.6551402139681364e-05, "loss": 2.0506, "step": 16207500 }, { "epoch": 46.92, "learning_rate": 2.6550678492034086e-05, "loss": 2.0613, "step": 16208000 }, { "epoch": 46.92, "learning_rate": 2.654995484438681e-05, "loss": 1.9983, "step": 16208500 }, { "epoch": 46.92, "learning_rate": 2.654923119673953e-05, "loss": 2.0021, "step": 16209000 }, { "epoch": 46.92, "learning_rate": 2.6548507549092256e-05, "loss": 2.0177, "step": 16209500 }, { "epoch": 46.92, "learning_rate": 2.6547783901444982e-05, "loss": 2.0078, "step": 16210000 }, { "epoch": 46.92, "learning_rate": 2.6547060253797708e-05, "loss": 2.0201, "step": 16210500 }, { "epoch": 46.92, "learning_rate": 2.6546338053445723e-05, "loss": 2.0336, "step": 16211000 }, { "epoch": 46.93, "learning_rate": 2.6545614405798446e-05, "loss": 2.0209, "step": 16211500 }, { "epoch": 46.93, "learning_rate": 2.654489075815117e-05, "loss": 2.0285, "step": 16212000 }, { "epoch": 46.93, "learning_rate": 2.6544167110503893e-05, "loss": 2.0313, "step": 16212500 }, { "epoch": 46.93, "learning_rate": 2.6543443462856616e-05, "loss": 2.0029, "step": 16213000 }, { "epoch": 46.93, "learning_rate": 2.6542722709799928e-05, "loss": 2.0234, "step": 16213500 }, { "epoch": 46.93, "learning_rate": 2.654199906215265e-05, "loss": 2.0117, "step": 16214000 }, { "epoch": 46.93, "learning_rate": 2.6541275414505372e-05, "loss": 2.0235, "step": 16214500 }, { "epoch": 46.94, "learning_rate": 2.6540551766858095e-05, "loss": 2.0301, "step": 16215000 }, { "epoch": 46.94, "learning_rate": 2.653982811921082e-05, "loss": 2.0358, "step": 16215500 }, { "epoch": 46.94, "learning_rate": 2.6539104471563542e-05, "loss": 2.0192, "step": 16216000 }, { "epoch": 46.94, "learning_rate": 2.6538380823916265e-05, "loss": 2.0148, "step": 16216500 }, { "epoch": 46.94, "learning_rate": 2.6537657176268987e-05, "loss": 2.0015, "step": 16217000 }, { "epoch": 46.94, "learning_rate": 2.6536934975917006e-05, "loss": 2.026, "step": 16217500 }, { "epoch": 46.94, "learning_rate": 2.6536211328269735e-05, "loss": 2.0174, "step": 16218000 }, { "epoch": 46.95, "learning_rate": 2.6535487680622457e-05, "loss": 2.0093, "step": 16218500 }, { "epoch": 46.95, "learning_rate": 2.653476403297518e-05, "loss": 2.0194, "step": 16219000 }, { "epoch": 46.95, "learning_rate": 2.65340403853279e-05, "loss": 2.0544, "step": 16219500 }, { "epoch": 46.95, "learning_rate": 2.6533316737680624e-05, "loss": 2.0305, "step": 16220000 }, { "epoch": 46.95, "learning_rate": 2.6532593090033346e-05, "loss": 2.0287, "step": 16220500 }, { "epoch": 46.95, "learning_rate": 2.653186944238607e-05, "loss": 2.029, "step": 16221000 }, { "epoch": 46.95, "learning_rate": 2.6531145794738794e-05, "loss": 2.0467, "step": 16221500 }, { "epoch": 46.96, "learning_rate": 2.6530422147091516e-05, "loss": 2.0261, "step": 16222000 }, { "epoch": 46.96, "learning_rate": 2.6529699946739535e-05, "loss": 2.0302, "step": 16222500 }, { "epoch": 46.96, "learning_rate": 2.6528976299092257e-05, "loss": 2.0492, "step": 16223000 }, { "epoch": 46.96, "learning_rate": 2.652825265144498e-05, "loss": 2.0154, "step": 16223500 }, { "epoch": 46.96, "learning_rate": 2.6527529003797702e-05, "loss": 1.9939, "step": 16224000 }, { "epoch": 46.96, "learning_rate": 2.6526805356150424e-05, "loss": 2.0316, "step": 16224500 }, { "epoch": 46.96, "learning_rate": 2.6526083155798443e-05, "loss": 2.0216, "step": 16225000 }, { "epoch": 46.97, "learning_rate": 2.6525359508151172e-05, "loss": 1.9904, "step": 16225500 }, { "epoch": 46.97, "learning_rate": 2.6524635860503894e-05, "loss": 2.0194, "step": 16226000 }, { "epoch": 46.97, "learning_rate": 2.6523912212856616e-05, "loss": 2.024, "step": 16226500 }, { "epoch": 46.97, "learning_rate": 2.652318856520934e-05, "loss": 2.0107, "step": 16227000 }, { "epoch": 46.97, "learning_rate": 2.652246491756206e-05, "loss": 2.0238, "step": 16227500 }, { "epoch": 46.97, "learning_rate": 2.6521741269914786e-05, "loss": 2.0366, "step": 16228000 }, { "epoch": 46.97, "learning_rate": 2.65210205168581e-05, "loss": 2.0183, "step": 16228500 }, { "epoch": 46.98, "learning_rate": 2.652029686921082e-05, "loss": 2.0229, "step": 16229000 }, { "epoch": 46.98, "learning_rate": 2.6519573221563543e-05, "loss": 2.0183, "step": 16229500 }, { "epoch": 46.98, "learning_rate": 2.6518849573916265e-05, "loss": 2.0177, "step": 16230000 }, { "epoch": 46.98, "learning_rate": 2.6518125926268988e-05, "loss": 2.031, "step": 16230500 }, { "epoch": 46.98, "learning_rate": 2.651740227862171e-05, "loss": 2.0306, "step": 16231000 }, { "epoch": 46.98, "learning_rate": 2.6516678630974436e-05, "loss": 2.0178, "step": 16231500 }, { "epoch": 46.98, "learning_rate": 2.651595643062245e-05, "loss": 2.0021, "step": 16232000 }, { "epoch": 46.99, "learning_rate": 2.6515232782975173e-05, "loss": 2.0334, "step": 16232500 }, { "epoch": 46.99, "learning_rate": 2.6514509135327902e-05, "loss": 2.0101, "step": 16233000 }, { "epoch": 46.99, "learning_rate": 2.6513785487680625e-05, "loss": 2.0134, "step": 16233500 }, { "epoch": 46.99, "learning_rate": 2.651306184003335e-05, "loss": 2.0177, "step": 16234000 }, { "epoch": 46.99, "learning_rate": 2.6512338192386072e-05, "loss": 2.0161, "step": 16234500 }, { "epoch": 46.99, "learning_rate": 2.6511614544738795e-05, "loss": 2.0203, "step": 16235000 }, { "epoch": 47.0, "learning_rate": 2.6510890897091517e-05, "loss": 2.0128, "step": 16235500 }, { "epoch": 47.0, "learning_rate": 2.6510168696739536e-05, "loss": 2.0218, "step": 16236000 }, { "epoch": 47.0, "learning_rate": 2.650944649638755e-05, "loss": 2.0444, "step": 16236500 }, { "epoch": 47.0, "learning_rate": 2.6508722848740274e-05, "loss": 2.0357, "step": 16237000 }, { "epoch": 47.0, "eval_accuracy": 0.672088574466875, "eval_accuracy_mlm": 0.6375967336898031, "eval_accuracy_nsp": 0.8571585706232555, "eval_loss": 2.169609785079956, "eval_runtime": 331.6707, "eval_samples_per_second": 1315.721, "eval_steps_per_second": 54.822, "step": 16237184 }, { "epoch": 47.0, "learning_rate": 2.6507999201093e-05, "loss": 2.0167, "step": 16237500 }, { "epoch": 47.0, "learning_rate": 2.650727555344572e-05, "loss": 2.0055, "step": 16238000 }, { "epoch": 47.0, "learning_rate": 2.6506553353093737e-05, "loss": 2.028, "step": 16238500 }, { "epoch": 47.01, "learning_rate": 2.6505829705446463e-05, "loss": 1.9944, "step": 16239000 }, { "epoch": 47.01, "learning_rate": 2.6505106057799185e-05, "loss": 2.0081, "step": 16239500 }, { "epoch": 47.01, "learning_rate": 2.6504382410151907e-05, "loss": 2.0141, "step": 16240000 }, { "epoch": 47.01, "learning_rate": 2.6503658762504636e-05, "loss": 1.9945, "step": 16240500 }, { "epoch": 47.01, "learning_rate": 2.650293511485736e-05, "loss": 1.9809, "step": 16241000 }, { "epoch": 47.01, "learning_rate": 2.650221146721008e-05, "loss": 2.0042, "step": 16241500 }, { "epoch": 47.01, "learning_rate": 2.6501487819562803e-05, "loss": 1.9931, "step": 16242000 }, { "epoch": 47.02, "learning_rate": 2.6500764171915525e-05, "loss": 1.9765, "step": 16242500 }, { "epoch": 47.02, "learning_rate": 2.650004052426825e-05, "loss": 1.9911, "step": 16243000 }, { "epoch": 47.02, "learning_rate": 2.6499316876620973e-05, "loss": 1.9891, "step": 16243500 }, { "epoch": 47.02, "learning_rate": 2.6498593228973695e-05, "loss": 2.0185, "step": 16244000 }, { "epoch": 47.02, "learning_rate": 2.6497871028621714e-05, "loss": 2.0038, "step": 16244500 }, { "epoch": 47.02, "learning_rate": 2.649714882826973e-05, "loss": 1.9875, "step": 16245000 }, { "epoch": 47.02, "learning_rate": 2.6496425180622452e-05, "loss": 1.9972, "step": 16245500 }, { "epoch": 47.03, "learning_rate": 2.6495701532975174e-05, "loss": 2.0023, "step": 16246000 }, { "epoch": 47.03, "learning_rate": 2.64949778853279e-05, "loss": 1.9806, "step": 16246500 }, { "epoch": 47.03, "learning_rate": 2.6494254237680622e-05, "loss": 1.9785, "step": 16247000 }, { "epoch": 47.03, "learning_rate": 2.6493532037328638e-05, "loss": 1.9971, "step": 16247500 }, { "epoch": 47.03, "learning_rate": 2.6492808389681367e-05, "loss": 1.9984, "step": 16248000 }, { "epoch": 47.03, "learning_rate": 2.649208474203409e-05, "loss": 2.0143, "step": 16248500 }, { "epoch": 47.03, "learning_rate": 2.6491361094386814e-05, "loss": 2.0059, "step": 16249000 }, { "epoch": 47.04, "learning_rate": 2.6490637446739537e-05, "loss": 2.0115, "step": 16249500 }, { "epoch": 47.04, "learning_rate": 2.648991379909226e-05, "loss": 1.9961, "step": 16250000 }, { "epoch": 47.04, "learning_rate": 2.6489191598740278e-05, "loss": 1.9944, "step": 16250500 }, { "epoch": 47.04, "learning_rate": 2.6488467951093e-05, "loss": 2.0279, "step": 16251000 }, { "epoch": 47.04, "learning_rate": 2.6487744303445722e-05, "loss": 1.9967, "step": 16251500 }, { "epoch": 47.04, "learning_rate": 2.6487020655798445e-05, "loss": 2.01, "step": 16252000 }, { "epoch": 47.04, "learning_rate": 2.6486297008151167e-05, "loss": 1.9854, "step": 16252500 }, { "epoch": 47.05, "learning_rate": 2.648557336050389e-05, "loss": 2.0177, "step": 16253000 }, { "epoch": 47.05, "learning_rate": 2.6484851160151908e-05, "loss": 1.9834, "step": 16253500 }, { "epoch": 47.05, "learning_rate": 2.648412751250463e-05, "loss": 2.0051, "step": 16254000 }, { "epoch": 47.05, "learning_rate": 2.6483403864857352e-05, "loss": 2.0042, "step": 16254500 }, { "epoch": 47.05, "learning_rate": 2.6482680217210075e-05, "loss": 2.0084, "step": 16255000 }, { "epoch": 47.05, "learning_rate": 2.64819580168581e-05, "loss": 2.0143, "step": 16255500 }, { "epoch": 47.05, "learning_rate": 2.6481234369210823e-05, "loss": 2.0037, "step": 16256000 }, { "epoch": 47.06, "learning_rate": 2.6480510721563545e-05, "loss": 1.9933, "step": 16256500 }, { "epoch": 47.06, "learning_rate": 2.6479787073916267e-05, "loss": 1.9866, "step": 16257000 }, { "epoch": 47.06, "learning_rate": 2.647906342626899e-05, "loss": 2.0103, "step": 16257500 }, { "epoch": 47.06, "learning_rate": 2.6478339778621715e-05, "loss": 1.9992, "step": 16258000 }, { "epoch": 47.06, "learning_rate": 2.6477619025565027e-05, "loss": 2.0001, "step": 16258500 }, { "epoch": 47.06, "learning_rate": 2.647689537791775e-05, "loss": 1.9887, "step": 16259000 }, { "epoch": 47.06, "learning_rate": 2.647617173027047e-05, "loss": 2.0086, "step": 16259500 }, { "epoch": 47.07, "learning_rate": 2.6475448082623194e-05, "loss": 1.9814, "step": 16260000 }, { "epoch": 47.07, "learning_rate": 2.6474724434975916e-05, "loss": 2.0141, "step": 16260500 }, { "epoch": 47.07, "learning_rate": 2.6474002234623935e-05, "loss": 1.9882, "step": 16261000 }, { "epoch": 47.07, "learning_rate": 2.6473278586976657e-05, "loss": 2.0177, "step": 16261500 }, { "epoch": 47.07, "learning_rate": 2.647255493932938e-05, "loss": 2.0047, "step": 16262000 }, { "epoch": 47.07, "learning_rate": 2.6471831291682102e-05, "loss": 2.0122, "step": 16262500 }, { "epoch": 47.07, "learning_rate": 2.647110764403483e-05, "loss": 1.9908, "step": 16263000 }, { "epoch": 47.08, "learning_rate": 2.6470383996387553e-05, "loss": 1.9781, "step": 16263500 }, { "epoch": 47.08, "learning_rate": 2.646966034874028e-05, "loss": 2.0095, "step": 16264000 }, { "epoch": 47.08, "learning_rate": 2.6468936701093e-05, "loss": 2.0017, "step": 16264500 }, { "epoch": 47.08, "learning_rate": 2.6468213053445723e-05, "loss": 2.0256, "step": 16265000 }, { "epoch": 47.08, "learning_rate": 2.6467490853093742e-05, "loss": 2.0126, "step": 16265500 }, { "epoch": 47.08, "learning_rate": 2.6466767205446464e-05, "loss": 1.987, "step": 16266000 }, { "epoch": 47.08, "learning_rate": 2.646604500509448e-05, "loss": 1.9914, "step": 16266500 }, { "epoch": 47.09, "learning_rate": 2.6465321357447202e-05, "loss": 1.9869, "step": 16267000 }, { "epoch": 47.09, "learning_rate": 2.6464597709799928e-05, "loss": 2.0148, "step": 16267500 }, { "epoch": 47.09, "learning_rate": 2.646387406215265e-05, "loss": 1.9782, "step": 16268000 }, { "epoch": 47.09, "learning_rate": 2.6463150414505372e-05, "loss": 2.0241, "step": 16268500 }, { "epoch": 47.09, "learning_rate": 2.6462426766858094e-05, "loss": 1.9896, "step": 16269000 }, { "epoch": 47.09, "learning_rate": 2.6461703119210817e-05, "loss": 2.0107, "step": 16269500 }, { "epoch": 47.09, "learning_rate": 2.646098236615413e-05, "loss": 2.0062, "step": 16270000 }, { "epoch": 47.1, "learning_rate": 2.6460258718506854e-05, "loss": 2.0045, "step": 16270500 }, { "epoch": 47.1, "learning_rate": 2.645953507085958e-05, "loss": 2.0029, "step": 16271000 }, { "epoch": 47.1, "learning_rate": 2.6458811423212306e-05, "loss": 2.0221, "step": 16271500 }, { "epoch": 47.1, "learning_rate": 2.645808922286032e-05, "loss": 1.9869, "step": 16272000 }, { "epoch": 47.1, "learning_rate": 2.645736702250834e-05, "loss": 2.0297, "step": 16272500 }, { "epoch": 47.1, "learning_rate": 2.6456643374861062e-05, "loss": 2.0038, "step": 16273000 }, { "epoch": 47.11, "learning_rate": 2.6455919727213785e-05, "loss": 2.0056, "step": 16273500 }, { "epoch": 47.11, "learning_rate": 2.6455196079566507e-05, "loss": 2.0079, "step": 16274000 }, { "epoch": 47.11, "learning_rate": 2.645447243191923e-05, "loss": 1.9754, "step": 16274500 }, { "epoch": 47.11, "learning_rate": 2.6453748784271955e-05, "loss": 1.9992, "step": 16275000 }, { "epoch": 47.11, "learning_rate": 2.6453025136624677e-05, "loss": 1.9686, "step": 16275500 }, { "epoch": 47.11, "learning_rate": 2.64523014889774e-05, "loss": 2.0061, "step": 16276000 }, { "epoch": 47.11, "learning_rate": 2.645157784133012e-05, "loss": 1.9908, "step": 16276500 }, { "epoch": 47.12, "learning_rate": 2.6450854193682844e-05, "loss": 1.9912, "step": 16277000 }, { "epoch": 47.12, "learning_rate": 2.6450130546035566e-05, "loss": 1.9964, "step": 16277500 }, { "epoch": 47.12, "learning_rate": 2.6449406898388295e-05, "loss": 2.0016, "step": 16278000 }, { "epoch": 47.12, "learning_rate": 2.6448683250741017e-05, "loss": 2.0154, "step": 16278500 }, { "epoch": 47.12, "learning_rate": 2.6447959603093743e-05, "loss": 2.0106, "step": 16279000 }, { "epoch": 47.12, "learning_rate": 2.6447235955446465e-05, "loss": 1.9801, "step": 16279500 }, { "epoch": 47.12, "learning_rate": 2.644651375509448e-05, "loss": 2.0191, "step": 16280000 }, { "epoch": 47.13, "learning_rate": 2.6445790107447206e-05, "loss": 1.9984, "step": 16280500 }, { "epoch": 47.13, "learning_rate": 2.6445067907095222e-05, "loss": 1.9883, "step": 16281000 }, { "epoch": 47.13, "learning_rate": 2.6444344259447944e-05, "loss": 2.0342, "step": 16281500 }, { "epoch": 47.13, "learning_rate": 2.644362061180067e-05, "loss": 2.0042, "step": 16282000 }, { "epoch": 47.13, "learning_rate": 2.6442896964153392e-05, "loss": 1.989, "step": 16282500 }, { "epoch": 47.13, "learning_rate": 2.6442173316506114e-05, "loss": 2.0144, "step": 16283000 }, { "epoch": 47.13, "learning_rate": 2.6441449668858836e-05, "loss": 2.0145, "step": 16283500 }, { "epoch": 47.14, "learning_rate": 2.644072602121156e-05, "loss": 1.9841, "step": 16284000 }, { "epoch": 47.14, "learning_rate": 2.644000237356428e-05, "loss": 2.0091, "step": 16284500 }, { "epoch": 47.14, "learning_rate": 2.6439281620507593e-05, "loss": 2.0015, "step": 16285000 }, { "epoch": 47.14, "learning_rate": 2.643855797286032e-05, "loss": 2.0131, "step": 16285500 }, { "epoch": 47.14, "learning_rate": 2.6437834325213044e-05, "loss": 1.9874, "step": 16286000 }, { "epoch": 47.14, "learning_rate": 2.643711067756577e-05, "loss": 2.049, "step": 16286500 }, { "epoch": 47.14, "learning_rate": 2.6436387029918492e-05, "loss": 2.0233, "step": 16287000 }, { "epoch": 47.15, "learning_rate": 2.6435663382271214e-05, "loss": 1.9926, "step": 16287500 }, { "epoch": 47.15, "learning_rate": 2.6434939734623937e-05, "loss": 1.9898, "step": 16288000 }, { "epoch": 47.15, "learning_rate": 2.643421608697666e-05, "loss": 2.0219, "step": 16288500 }, { "epoch": 47.15, "learning_rate": 2.643349243932938e-05, "loss": 2.0217, "step": 16289000 }, { "epoch": 47.15, "learning_rate": 2.6432768791682107e-05, "loss": 1.9784, "step": 16289500 }, { "epoch": 47.15, "learning_rate": 2.643204514403483e-05, "loss": 2.0124, "step": 16290000 }, { "epoch": 47.15, "learning_rate": 2.643132149638755e-05, "loss": 2.007, "step": 16290500 }, { "epoch": 47.16, "learning_rate": 2.643059929603557e-05, "loss": 1.9855, "step": 16291000 }, { "epoch": 47.16, "learning_rate": 2.6429875648388292e-05, "loss": 2.0095, "step": 16291500 }, { "epoch": 47.16, "learning_rate": 2.6429152000741015e-05, "loss": 2.017, "step": 16292000 }, { "epoch": 47.16, "learning_rate": 2.6428428353093737e-05, "loss": 1.9794, "step": 16292500 }, { "epoch": 47.16, "learning_rate": 2.6427704705446466e-05, "loss": 2.0038, "step": 16293000 }, { "epoch": 47.16, "learning_rate": 2.642698250509448e-05, "loss": 2.0104, "step": 16293500 }, { "epoch": 47.16, "learning_rate": 2.64262603047425e-05, "loss": 1.9943, "step": 16294000 }, { "epoch": 47.17, "learning_rate": 2.6425536657095223e-05, "loss": 2.0119, "step": 16294500 }, { "epoch": 47.17, "learning_rate": 2.642481445674324e-05, "loss": 2.0061, "step": 16295000 }, { "epoch": 47.17, "learning_rate": 2.6424092256391257e-05, "loss": 2.0041, "step": 16295500 }, { "epoch": 47.17, "learning_rate": 2.6423368608743983e-05, "loss": 2.0168, "step": 16296000 }, { "epoch": 47.17, "learning_rate": 2.6422644961096705e-05, "loss": 1.983, "step": 16296500 }, { "epoch": 47.17, "learning_rate": 2.6421921313449427e-05, "loss": 2.0104, "step": 16297000 }, { "epoch": 47.17, "learning_rate": 2.6421199113097446e-05, "loss": 1.9985, "step": 16297500 }, { "epoch": 47.18, "learning_rate": 2.642047546545017e-05, "loss": 1.9915, "step": 16298000 }, { "epoch": 47.18, "learning_rate": 2.641975181780289e-05, "loss": 2.0136, "step": 16298500 }, { "epoch": 47.18, "learning_rate": 2.6419028170155613e-05, "loss": 2.0119, "step": 16299000 }, { "epoch": 47.18, "learning_rate": 2.6418304522508335e-05, "loss": 2.0058, "step": 16299500 }, { "epoch": 47.18, "learning_rate": 2.6417582322156354e-05, "loss": 2.0085, "step": 16300000 }, { "epoch": 47.18, "learning_rate": 2.6416858674509076e-05, "loss": 2.0056, "step": 16300500 }, { "epoch": 47.18, "learning_rate": 2.6416135026861805e-05, "loss": 2.0153, "step": 16301000 }, { "epoch": 47.19, "learning_rate": 2.6415411379214527e-05, "loss": 2.0066, "step": 16301500 }, { "epoch": 47.19, "learning_rate": 2.641468773156725e-05, "loss": 1.9834, "step": 16302000 }, { "epoch": 47.19, "learning_rate": 2.6413964083919972e-05, "loss": 2.0004, "step": 16302500 }, { "epoch": 47.19, "learning_rate": 2.6413240436272698e-05, "loss": 2.0015, "step": 16303000 }, { "epoch": 47.19, "learning_rate": 2.641251678862542e-05, "loss": 2.0234, "step": 16303500 }, { "epoch": 47.19, "learning_rate": 2.6411793140978142e-05, "loss": 2.0069, "step": 16304000 }, { "epoch": 47.19, "learning_rate": 2.6411069493330864e-05, "loss": 1.9965, "step": 16304500 }, { "epoch": 47.2, "learning_rate": 2.6410345845683586e-05, "loss": 1.9851, "step": 16305000 }, { "epoch": 47.2, "learning_rate": 2.640962219803631e-05, "loss": 2.0069, "step": 16305500 }, { "epoch": 47.2, "learning_rate": 2.6408898550389034e-05, "loss": 1.9979, "step": 16306000 }, { "epoch": 47.2, "learning_rate": 2.6408174902741757e-05, "loss": 2.0327, "step": 16306500 }, { "epoch": 47.2, "learning_rate": 2.6407452702389772e-05, "loss": 2.001, "step": 16307000 }, { "epoch": 47.2, "learning_rate": 2.6406729054742498e-05, "loss": 2.0299, "step": 16307500 }, { "epoch": 47.2, "learning_rate": 2.640600540709522e-05, "loss": 1.9893, "step": 16308000 }, { "epoch": 47.21, "learning_rate": 2.640528175944795e-05, "loss": 2.0143, "step": 16308500 }, { "epoch": 47.21, "learning_rate": 2.640455811180067e-05, "loss": 2.0184, "step": 16309000 }, { "epoch": 47.21, "learning_rate": 2.6403834464153393e-05, "loss": 1.9848, "step": 16309500 }, { "epoch": 47.21, "learning_rate": 2.6403110816506116e-05, "loss": 2.0062, "step": 16310000 }, { "epoch": 47.21, "learning_rate": 2.6402387168858838e-05, "loss": 1.9871, "step": 16310500 }, { "epoch": 47.21, "learning_rate": 2.6401664968506857e-05, "loss": 1.9977, "step": 16311000 }, { "epoch": 47.22, "learning_rate": 2.640094132085958e-05, "loss": 2.0173, "step": 16311500 }, { "epoch": 47.22, "learning_rate": 2.64002176732123e-05, "loss": 2.0125, "step": 16312000 }, { "epoch": 47.22, "learning_rate": 2.6399494025565024e-05, "loss": 2.013, "step": 16312500 }, { "epoch": 47.22, "learning_rate": 2.639877037791775e-05, "loss": 1.9997, "step": 16313000 }, { "epoch": 47.22, "learning_rate": 2.639804673027047e-05, "loss": 2.0101, "step": 16313500 }, { "epoch": 47.22, "learning_rate": 2.6397323082623194e-05, "loss": 1.9984, "step": 16314000 }, { "epoch": 47.22, "learning_rate": 2.6396599434975916e-05, "loss": 2.0268, "step": 16314500 }, { "epoch": 47.23, "learning_rate": 2.6395875787328638e-05, "loss": 2.0094, "step": 16315000 }, { "epoch": 47.23, "learning_rate": 2.6395152139681367e-05, "loss": 2.0193, "step": 16315500 }, { "epoch": 47.23, "learning_rate": 2.639442849203409e-05, "loss": 2.0044, "step": 16316000 }, { "epoch": 47.23, "learning_rate": 2.6393706291682108e-05, "loss": 2.0221, "step": 16316500 }, { "epoch": 47.23, "learning_rate": 2.6392984091330124e-05, "loss": 2.0269, "step": 16317000 }, { "epoch": 47.23, "learning_rate": 2.639226044368285e-05, "loss": 2.0133, "step": 16317500 }, { "epoch": 47.23, "learning_rate": 2.6391536796035572e-05, "loss": 1.9977, "step": 16318000 }, { "epoch": 47.24, "learning_rate": 2.6390813148388294e-05, "loss": 2.0048, "step": 16318500 }, { "epoch": 47.24, "learning_rate": 2.6390089500741016e-05, "loss": 1.9914, "step": 16319000 }, { "epoch": 47.24, "learning_rate": 2.6389367300389035e-05, "loss": 2.0082, "step": 16319500 }, { "epoch": 47.24, "learning_rate": 2.6388643652741757e-05, "loss": 1.9893, "step": 16320000 }, { "epoch": 47.24, "learning_rate": 2.638792000509448e-05, "loss": 1.99, "step": 16320500 }, { "epoch": 47.24, "learning_rate": 2.6387196357447202e-05, "loss": 2.0123, "step": 16321000 }, { "epoch": 47.24, "learning_rate": 2.6386472709799924e-05, "loss": 2.004, "step": 16321500 }, { "epoch": 47.25, "learning_rate": 2.638574906215265e-05, "loss": 1.9895, "step": 16322000 }, { "epoch": 47.25, "learning_rate": 2.6385025414505372e-05, "loss": 2.0231, "step": 16322500 }, { "epoch": 47.25, "learning_rate": 2.63843017668581e-05, "loss": 2.0211, "step": 16323000 }, { "epoch": 47.25, "learning_rate": 2.6383578119210823e-05, "loss": 2.0197, "step": 16323500 }, { "epoch": 47.25, "learning_rate": 2.6382854471563545e-05, "loss": 2.0166, "step": 16324000 }, { "epoch": 47.25, "learning_rate": 2.6382130823916268e-05, "loss": 2.0057, "step": 16324500 }, { "epoch": 47.25, "learning_rate": 2.638140717626899e-05, "loss": 1.9987, "step": 16325000 }, { "epoch": 47.26, "learning_rate": 2.6380683528621712e-05, "loss": 1.987, "step": 16325500 }, { "epoch": 47.26, "learning_rate": 2.6379959880974438e-05, "loss": 1.9817, "step": 16326000 }, { "epoch": 47.26, "learning_rate": 2.637923623332716e-05, "loss": 1.9871, "step": 16326500 }, { "epoch": 47.26, "learning_rate": 2.6378512585679882e-05, "loss": 2.0226, "step": 16327000 }, { "epoch": 47.26, "learning_rate": 2.63777903853279e-05, "loss": 2.0067, "step": 16327500 }, { "epoch": 47.26, "learning_rate": 2.6377068184975917e-05, "loss": 1.9891, "step": 16328000 }, { "epoch": 47.26, "learning_rate": 2.637634453732864e-05, "loss": 2.0269, "step": 16328500 }, { "epoch": 47.27, "learning_rate": 2.637562088968136e-05, "loss": 2.0247, "step": 16329000 }, { "epoch": 47.27, "learning_rate": 2.6374897242034087e-05, "loss": 2.0083, "step": 16329500 }, { "epoch": 47.27, "learning_rate": 2.637417359438681e-05, "loss": 2.0155, "step": 16330000 }, { "epoch": 47.27, "learning_rate": 2.6373449946739538e-05, "loss": 2.0135, "step": 16330500 }, { "epoch": 47.27, "learning_rate": 2.637272629909226e-05, "loss": 1.9891, "step": 16331000 }, { "epoch": 47.27, "learning_rate": 2.6372002651444982e-05, "loss": 2.0146, "step": 16331500 }, { "epoch": 47.27, "learning_rate": 2.6371279003797705e-05, "loss": 2.0242, "step": 16332000 }, { "epoch": 47.28, "learning_rate": 2.6370555356150427e-05, "loss": 2.0138, "step": 16332500 }, { "epoch": 47.28, "learning_rate": 2.6369833155798446e-05, "loss": 2.0022, "step": 16333000 }, { "epoch": 47.28, "learning_rate": 2.6369109508151168e-05, "loss": 2.0029, "step": 16333500 }, { "epoch": 47.28, "learning_rate": 2.6368387307799187e-05, "loss": 1.9876, "step": 16334000 }, { "epoch": 47.28, "learning_rate": 2.6367665107447203e-05, "loss": 2.0318, "step": 16334500 }, { "epoch": 47.28, "learning_rate": 2.6366941459799925e-05, "loss": 2.0194, "step": 16335000 }, { "epoch": 47.28, "learning_rate": 2.636621781215265e-05, "loss": 2.0375, "step": 16335500 }, { "epoch": 47.29, "learning_rate": 2.6365494164505373e-05, "loss": 1.9954, "step": 16336000 }, { "epoch": 47.29, "learning_rate": 2.6364770516858095e-05, "loss": 2.0106, "step": 16336500 }, { "epoch": 47.29, "learning_rate": 2.6364046869210817e-05, "loss": 1.9903, "step": 16337000 }, { "epoch": 47.29, "learning_rate": 2.636332322156354e-05, "loss": 2.004, "step": 16337500 }, { "epoch": 47.29, "learning_rate": 2.636259957391627e-05, "loss": 2.0019, "step": 16338000 }, { "epoch": 47.29, "learning_rate": 2.636187592626899e-05, "loss": 1.9963, "step": 16338500 }, { "epoch": 47.29, "learning_rate": 2.6361152278621716e-05, "loss": 2.0387, "step": 16339000 }, { "epoch": 47.3, "learning_rate": 2.636042863097444e-05, "loss": 1.9958, "step": 16339500 }, { "epoch": 47.3, "learning_rate": 2.6359706430622454e-05, "loss": 2.0255, "step": 16340000 }, { "epoch": 47.3, "learning_rate": 2.6358982782975176e-05, "loss": 1.9951, "step": 16340500 }, { "epoch": 47.3, "learning_rate": 2.6358259135327902e-05, "loss": 2.0139, "step": 16341000 }, { "epoch": 47.3, "learning_rate": 2.6357536934975917e-05, "loss": 2.018, "step": 16341500 }, { "epoch": 47.3, "learning_rate": 2.635681328732864e-05, "loss": 2.0176, "step": 16342000 }, { "epoch": 47.3, "learning_rate": 2.6356089639681365e-05, "loss": 1.993, "step": 16342500 }, { "epoch": 47.31, "learning_rate": 2.6355365992034088e-05, "loss": 2.0066, "step": 16343000 }, { "epoch": 47.31, "learning_rate": 2.6354643791682103e-05, "loss": 1.9846, "step": 16343500 }, { "epoch": 47.31, "learning_rate": 2.635392014403483e-05, "loss": 2.0079, "step": 16344000 }, { "epoch": 47.31, "learning_rate": 2.635319649638755e-05, "loss": 2.0094, "step": 16344500 }, { "epoch": 47.31, "learning_rate": 2.6352472848740273e-05, "loss": 1.9937, "step": 16345000 }, { "epoch": 47.31, "learning_rate": 2.6351749201093002e-05, "loss": 2.0138, "step": 16345500 }, { "epoch": 47.31, "learning_rate": 2.6351027000741018e-05, "loss": 1.9858, "step": 16346000 }, { "epoch": 47.32, "learning_rate": 2.635030335309374e-05, "loss": 1.9942, "step": 16346500 }, { "epoch": 47.32, "learning_rate": 2.6349579705446466e-05, "loss": 1.9924, "step": 16347000 }, { "epoch": 47.32, "learning_rate": 2.6348856057799188e-05, "loss": 1.9803, "step": 16347500 }, { "epoch": 47.32, "learning_rate": 2.634813241015191e-05, "loss": 2.0046, "step": 16348000 }, { "epoch": 47.32, "learning_rate": 2.6347408762504632e-05, "loss": 2.0043, "step": 16348500 }, { "epoch": 47.32, "learning_rate": 2.634668656215265e-05, "loss": 2.0243, "step": 16349000 }, { "epoch": 47.33, "learning_rate": 2.6345962914505374e-05, "loss": 2.0111, "step": 16349500 }, { "epoch": 47.33, "learning_rate": 2.6345239266858096e-05, "loss": 2.0081, "step": 16350000 }, { "epoch": 47.33, "learning_rate": 2.6344515619210818e-05, "loss": 1.9986, "step": 16350500 }, { "epoch": 47.33, "learning_rate": 2.634379197156354e-05, "loss": 2.0231, "step": 16351000 }, { "epoch": 47.33, "learning_rate": 2.6343068323916266e-05, "loss": 2.0094, "step": 16351500 }, { "epoch": 47.33, "learning_rate": 2.6342344676268988e-05, "loss": 2.0184, "step": 16352000 }, { "epoch": 47.33, "learning_rate": 2.6341621028621717e-05, "loss": 1.9829, "step": 16352500 }, { "epoch": 47.34, "learning_rate": 2.6340898828269733e-05, "loss": 2.0146, "step": 16353000 }, { "epoch": 47.34, "learning_rate": 2.6340175180622455e-05, "loss": 2.0034, "step": 16353500 }, { "epoch": 47.34, "learning_rate": 2.633945153297518e-05, "loss": 2.0014, "step": 16354000 }, { "epoch": 47.34, "learning_rate": 2.6338727885327903e-05, "loss": 2.0199, "step": 16354500 }, { "epoch": 47.34, "learning_rate": 2.6338005684975918e-05, "loss": 2.0218, "step": 16355000 }, { "epoch": 47.34, "learning_rate": 2.633728203732864e-05, "loss": 2.0042, "step": 16355500 }, { "epoch": 47.34, "learning_rate": 2.6336558389681366e-05, "loss": 1.9926, "step": 16356000 }, { "epoch": 47.35, "learning_rate": 2.633583474203409e-05, "loss": 1.9974, "step": 16356500 }, { "epoch": 47.35, "learning_rate": 2.633511109438681e-05, "loss": 2.008, "step": 16357000 }, { "epoch": 47.35, "learning_rate": 2.633438889403483e-05, "loss": 2.0049, "step": 16357500 }, { "epoch": 47.35, "learning_rate": 2.6333665246387552e-05, "loss": 2.0176, "step": 16358000 }, { "epoch": 47.35, "learning_rate": 2.6332941598740274e-05, "loss": 2.0239, "step": 16358500 }, { "epoch": 47.35, "learning_rate": 2.6332217951092996e-05, "loss": 2.0147, "step": 16359000 }, { "epoch": 47.35, "learning_rate": 2.6331495750741015e-05, "loss": 2.0387, "step": 16359500 }, { "epoch": 47.36, "learning_rate": 2.6330772103093737e-05, "loss": 2.0079, "step": 16360000 }, { "epoch": 47.36, "learning_rate": 2.6330048455446466e-05, "loss": 2.0159, "step": 16360500 }, { "epoch": 47.36, "learning_rate": 2.632932480779919e-05, "loss": 2.037, "step": 16361000 }, { "epoch": 47.36, "learning_rate": 2.6328602607447204e-05, "loss": 2.0206, "step": 16361500 }, { "epoch": 47.36, "learning_rate": 2.632787895979993e-05, "loss": 2.0081, "step": 16362000 }, { "epoch": 47.36, "learning_rate": 2.6327155312152652e-05, "loss": 2.0134, "step": 16362500 }, { "epoch": 47.36, "learning_rate": 2.6326431664505374e-05, "loss": 2.0279, "step": 16363000 }, { "epoch": 47.37, "learning_rate": 2.6325708016858097e-05, "loss": 2.0289, "step": 16363500 }, { "epoch": 47.37, "learning_rate": 2.632498436921082e-05, "loss": 2.0225, "step": 16364000 }, { "epoch": 47.37, "learning_rate": 2.6324260721563544e-05, "loss": 2.0154, "step": 16364500 }, { "epoch": 47.37, "learning_rate": 2.6323537073916267e-05, "loss": 2.0336, "step": 16365000 }, { "epoch": 47.37, "learning_rate": 2.632281342626899e-05, "loss": 2.003, "step": 16365500 }, { "epoch": 47.37, "learning_rate": 2.632208977862171e-05, "loss": 1.9761, "step": 16366000 }, { "epoch": 47.37, "learning_rate": 2.632136757826973e-05, "loss": 1.9819, "step": 16366500 }, { "epoch": 47.38, "learning_rate": 2.6320643930622452e-05, "loss": 2.0061, "step": 16367000 }, { "epoch": 47.38, "learning_rate": 2.6319920282975175e-05, "loss": 2.0131, "step": 16367500 }, { "epoch": 47.38, "learning_rate": 2.6319196635327904e-05, "loss": 1.9965, "step": 16368000 }, { "epoch": 47.38, "learning_rate": 2.6318472987680626e-05, "loss": 2.0203, "step": 16368500 }, { "epoch": 47.38, "learning_rate": 2.6317750787328645e-05, "loss": 2.0057, "step": 16369000 }, { "epoch": 47.38, "learning_rate": 2.6317027139681367e-05, "loss": 2.0288, "step": 16369500 }, { "epoch": 47.38, "learning_rate": 2.631630349203409e-05, "loss": 2.0132, "step": 16370000 }, { "epoch": 47.39, "learning_rate": 2.631557984438681e-05, "loss": 2.0267, "step": 16370500 }, { "epoch": 47.39, "learning_rate": 2.6314856196739534e-05, "loss": 2.0316, "step": 16371000 }, { "epoch": 47.39, "learning_rate": 2.6314132549092256e-05, "loss": 1.997, "step": 16371500 }, { "epoch": 47.39, "learning_rate": 2.631340890144498e-05, "loss": 1.9998, "step": 16372000 }, { "epoch": 47.39, "learning_rate": 2.6312685253797704e-05, "loss": 2.0143, "step": 16372500 }, { "epoch": 47.39, "learning_rate": 2.6311964500741016e-05, "loss": 2.0033, "step": 16373000 }, { "epoch": 47.39, "learning_rate": 2.6311240853093738e-05, "loss": 2.0248, "step": 16373500 }, { "epoch": 47.4, "learning_rate": 2.631051720544646e-05, "loss": 1.9824, "step": 16374000 }, { "epoch": 47.4, "learning_rate": 2.630979500509448e-05, "loss": 2.0423, "step": 16374500 }, { "epoch": 47.4, "learning_rate": 2.63090713574472e-05, "loss": 2.0303, "step": 16375000 }, { "epoch": 47.4, "learning_rate": 2.630834770979993e-05, "loss": 1.9959, "step": 16375500 }, { "epoch": 47.4, "learning_rate": 2.6307624062152653e-05, "loss": 2.0308, "step": 16376000 }, { "epoch": 47.4, "learning_rate": 2.6306900414505375e-05, "loss": 2.02, "step": 16376500 }, { "epoch": 47.4, "learning_rate": 2.6306176766858097e-05, "loss": 1.9993, "step": 16377000 }, { "epoch": 47.41, "learning_rate": 2.630545311921082e-05, "loss": 2.012, "step": 16377500 }, { "epoch": 47.41, "learning_rate": 2.6304729471563545e-05, "loss": 1.9946, "step": 16378000 }, { "epoch": 47.41, "learning_rate": 2.6304005823916267e-05, "loss": 2.0324, "step": 16378500 }, { "epoch": 47.41, "learning_rate": 2.630328217626899e-05, "loss": 1.9805, "step": 16379000 }, { "epoch": 47.41, "learning_rate": 2.6302558528621712e-05, "loss": 1.9956, "step": 16379500 }, { "epoch": 47.41, "learning_rate": 2.6301834880974434e-05, "loss": 2.0126, "step": 16380000 }, { "epoch": 47.41, "learning_rate": 2.630111123332716e-05, "loss": 1.9921, "step": 16380500 }, { "epoch": 47.42, "learning_rate": 2.6300389032975175e-05, "loss": 2.0083, "step": 16381000 }, { "epoch": 47.42, "learning_rate": 2.6299665385327898e-05, "loss": 2.0235, "step": 16381500 }, { "epoch": 47.42, "learning_rate": 2.6298943184975916e-05, "loss": 2.0389, "step": 16382000 }, { "epoch": 47.42, "learning_rate": 2.629821953732864e-05, "loss": 2.0282, "step": 16382500 }, { "epoch": 47.42, "learning_rate": 2.6297495889681368e-05, "loss": 1.9951, "step": 16383000 }, { "epoch": 47.42, "learning_rate": 2.629677224203409e-05, "loss": 2.0261, "step": 16383500 }, { "epoch": 47.42, "learning_rate": 2.6296048594386812e-05, "loss": 2.0066, "step": 16384000 }, { "epoch": 47.43, "learning_rate": 2.629532639403483e-05, "loss": 2.0181, "step": 16384500 }, { "epoch": 47.43, "learning_rate": 2.6294602746387553e-05, "loss": 2.0007, "step": 16385000 }, { "epoch": 47.43, "learning_rate": 2.6293879098740276e-05, "loss": 2.0155, "step": 16385500 }, { "epoch": 47.43, "learning_rate": 2.6293155451092998e-05, "loss": 2.002, "step": 16386000 }, { "epoch": 47.43, "learning_rate": 2.6292431803445723e-05, "loss": 2.0183, "step": 16386500 }, { "epoch": 47.43, "learning_rate": 2.6291708155798446e-05, "loss": 2.0276, "step": 16387000 }, { "epoch": 47.44, "learning_rate": 2.6290984508151168e-05, "loss": 2.0376, "step": 16387500 }, { "epoch": 47.44, "learning_rate": 2.629026086050389e-05, "loss": 2.0428, "step": 16388000 }, { "epoch": 47.44, "learning_rate": 2.6289537212856612e-05, "loss": 1.9948, "step": 16388500 }, { "epoch": 47.44, "learning_rate": 2.628881501250463e-05, "loss": 2.0283, "step": 16389000 }, { "epoch": 47.44, "learning_rate": 2.6288091364857354e-05, "loss": 2.0138, "step": 16389500 }, { "epoch": 47.44, "learning_rate": 2.6287367717210076e-05, "loss": 2.0037, "step": 16390000 }, { "epoch": 47.44, "learning_rate": 2.6286645516858098e-05, "loss": 2.0117, "step": 16390500 }, { "epoch": 47.45, "learning_rate": 2.6285921869210824e-05, "loss": 2.0153, "step": 16391000 }, { "epoch": 47.45, "learning_rate": 2.6285198221563546e-05, "loss": 1.996, "step": 16391500 }, { "epoch": 47.45, "learning_rate": 2.6284474573916268e-05, "loss": 2.0256, "step": 16392000 }, { "epoch": 47.45, "learning_rate": 2.628375092626899e-05, "loss": 2.0133, "step": 16392500 }, { "epoch": 47.45, "learning_rate": 2.6283027278621713e-05, "loss": 1.9783, "step": 16393000 }, { "epoch": 47.45, "learning_rate": 2.6282303630974435e-05, "loss": 2.0093, "step": 16393500 }, { "epoch": 47.45, "learning_rate": 2.6281581430622454e-05, "loss": 2.0034, "step": 16394000 }, { "epoch": 47.46, "learning_rate": 2.6280857782975176e-05, "loss": 2.0135, "step": 16394500 }, { "epoch": 47.46, "learning_rate": 2.62801341353279e-05, "loss": 2.0092, "step": 16395000 }, { "epoch": 47.46, "learning_rate": 2.6279410487680624e-05, "loss": 2.0232, "step": 16395500 }, { "epoch": 47.46, "learning_rate": 2.6278686840033346e-05, "loss": 2.0079, "step": 16396000 }, { "epoch": 47.46, "learning_rate": 2.6277964639681362e-05, "loss": 2.0002, "step": 16396500 }, { "epoch": 47.46, "learning_rate": 2.6277240992034084e-05, "loss": 2.0299, "step": 16397000 }, { "epoch": 47.46, "learning_rate": 2.627651734438681e-05, "loss": 2.0163, "step": 16397500 }, { "epoch": 47.47, "learning_rate": 2.6275793696739535e-05, "loss": 2.0058, "step": 16398000 }, { "epoch": 47.47, "learning_rate": 2.6275071496387554e-05, "loss": 1.9934, "step": 16398500 }, { "epoch": 47.47, "learning_rate": 2.6274347848740276e-05, "loss": 2.0191, "step": 16399000 }, { "epoch": 47.47, "learning_rate": 2.6273625648388295e-05, "loss": 2.0023, "step": 16399500 }, { "epoch": 47.47, "learning_rate": 2.6272902000741018e-05, "loss": 2.0137, "step": 16400000 }, { "epoch": 47.47, "learning_rate": 2.627217835309374e-05, "loss": 2.0005, "step": 16400500 }, { "epoch": 47.47, "learning_rate": 2.6271454705446462e-05, "loss": 1.9948, "step": 16401000 }, { "epoch": 47.48, "learning_rate": 2.6270731057799188e-05, "loss": 2.0085, "step": 16401500 }, { "epoch": 47.48, "learning_rate": 2.627000741015191e-05, "loss": 2.0158, "step": 16402000 }, { "epoch": 47.48, "learning_rate": 2.6269283762504632e-05, "loss": 2.0282, "step": 16402500 }, { "epoch": 47.48, "learning_rate": 2.6268560114857354e-05, "loss": 2.0144, "step": 16403000 }, { "epoch": 47.48, "learning_rate": 2.6267836467210077e-05, "loss": 2.002, "step": 16403500 }, { "epoch": 47.48, "learning_rate": 2.62671128195628e-05, "loss": 2.0173, "step": 16404000 }, { "epoch": 47.48, "learning_rate": 2.6266390619210818e-05, "loss": 2.0076, "step": 16404500 }, { "epoch": 47.49, "learning_rate": 2.626566697156354e-05, "loss": 1.9791, "step": 16405000 }, { "epoch": 47.49, "learning_rate": 2.626494332391627e-05, "loss": 1.9964, "step": 16405500 }, { "epoch": 47.49, "learning_rate": 2.626421967626899e-05, "loss": 2.0081, "step": 16406000 }, { "epoch": 47.49, "learning_rate": 2.626349747591701e-05, "loss": 2.002, "step": 16406500 }, { "epoch": 47.49, "learning_rate": 2.6262773828269732e-05, "loss": 2.0224, "step": 16407000 }, { "epoch": 47.49, "learning_rate": 2.6262050180622455e-05, "loss": 2.023, "step": 16407500 }, { "epoch": 47.49, "learning_rate": 2.6261326532975177e-05, "loss": 2.017, "step": 16408000 }, { "epoch": 47.5, "learning_rate": 2.62606028853279e-05, "loss": 2.019, "step": 16408500 }, { "epoch": 47.5, "learning_rate": 2.6259880684975918e-05, "loss": 2.0249, "step": 16409000 }, { "epoch": 47.5, "learning_rate": 2.625915703732864e-05, "loss": 2.0192, "step": 16409500 }, { "epoch": 47.5, "learning_rate": 2.625843483697666e-05, "loss": 2.0133, "step": 16410000 }, { "epoch": 47.5, "learning_rate": 2.625771118932938e-05, "loss": 2.0151, "step": 16410500 }, { "epoch": 47.5, "learning_rate": 2.6256987541682104e-05, "loss": 2.0081, "step": 16411000 }, { "epoch": 47.5, "learning_rate": 2.6256263894034826e-05, "loss": 1.9954, "step": 16411500 }, { "epoch": 47.51, "learning_rate": 2.625554024638755e-05, "loss": 2.0016, "step": 16412000 }, { "epoch": 47.51, "learning_rate": 2.6254816598740274e-05, "loss": 2.0156, "step": 16412500 }, { "epoch": 47.51, "learning_rate": 2.6254092951093003e-05, "loss": 1.9923, "step": 16413000 }, { "epoch": 47.51, "learning_rate": 2.6253369303445725e-05, "loss": 1.9882, "step": 16413500 }, { "epoch": 47.51, "learning_rate": 2.6252645655798447e-05, "loss": 2.0145, "step": 16414000 }, { "epoch": 47.51, "learning_rate": 2.625192200815117e-05, "loss": 2.0059, "step": 16414500 }, { "epoch": 47.51, "learning_rate": 2.6251198360503892e-05, "loss": 1.9963, "step": 16415000 }, { "epoch": 47.52, "learning_rate": 2.625047616015191e-05, "loss": 2.0113, "step": 16415500 }, { "epoch": 47.52, "learning_rate": 2.6249752512504633e-05, "loss": 2.0239, "step": 16416000 }, { "epoch": 47.52, "learning_rate": 2.6249028864857355e-05, "loss": 2.0054, "step": 16416500 }, { "epoch": 47.52, "learning_rate": 2.6248305217210077e-05, "loss": 2.0156, "step": 16417000 }, { "epoch": 47.52, "learning_rate": 2.6247581569562803e-05, "loss": 1.9949, "step": 16417500 }, { "epoch": 47.52, "learning_rate": 2.6246857921915525e-05, "loss": 2.0006, "step": 16418000 }, { "epoch": 47.52, "learning_rate": 2.624613572156354e-05, "loss": 2.0106, "step": 16418500 }, { "epoch": 47.53, "learning_rate": 2.6245412073916263e-05, "loss": 2.0399, "step": 16419000 }, { "epoch": 47.53, "learning_rate": 2.624468842626899e-05, "loss": 2.0177, "step": 16419500 }, { "epoch": 47.53, "learning_rate": 2.6243964778621714e-05, "loss": 2.0142, "step": 16420000 }, { "epoch": 47.53, "learning_rate": 2.624324113097444e-05, "loss": 2.0021, "step": 16420500 }, { "epoch": 47.53, "learning_rate": 2.6242517483327162e-05, "loss": 2.0072, "step": 16421000 }, { "epoch": 47.53, "learning_rate": 2.6241793835679884e-05, "loss": 2.0204, "step": 16421500 }, { "epoch": 47.53, "learning_rate": 2.6241071635327903e-05, "loss": 1.9889, "step": 16422000 }, { "epoch": 47.54, "learning_rate": 2.6240347987680626e-05, "loss": 2.0089, "step": 16422500 }, { "epoch": 47.54, "learning_rate": 2.6239624340033348e-05, "loss": 1.9755, "step": 16423000 }, { "epoch": 47.54, "learning_rate": 2.623890069238607e-05, "loss": 2.013, "step": 16423500 }, { "epoch": 47.54, "learning_rate": 2.6238177044738792e-05, "loss": 2.02, "step": 16424000 }, { "epoch": 47.54, "learning_rate": 2.6237453397091514e-05, "loss": 2.0034, "step": 16424500 }, { "epoch": 47.54, "learning_rate": 2.623672974944424e-05, "loss": 2.0141, "step": 16425000 }, { "epoch": 47.55, "learning_rate": 2.6236007549092256e-05, "loss": 1.993, "step": 16425500 }, { "epoch": 47.55, "learning_rate": 2.6235283901444978e-05, "loss": 2.0122, "step": 16426000 }, { "epoch": 47.55, "learning_rate": 2.6234560253797704e-05, "loss": 2.0107, "step": 16426500 }, { "epoch": 47.55, "learning_rate": 2.6233836606150426e-05, "loss": 2.0276, "step": 16427000 }, { "epoch": 47.55, "learning_rate": 2.6233112958503155e-05, "loss": 2.0161, "step": 16427500 }, { "epoch": 47.55, "learning_rate": 2.6232389310855877e-05, "loss": 1.9924, "step": 16428000 }, { "epoch": 47.55, "learning_rate": 2.62316656632086e-05, "loss": 2.0359, "step": 16428500 }, { "epoch": 47.56, "learning_rate": 2.623094201556132e-05, "loss": 1.9987, "step": 16429000 }, { "epoch": 47.56, "learning_rate": 2.6230218367914044e-05, "loss": 2.0006, "step": 16429500 }, { "epoch": 47.56, "learning_rate": 2.6229494720266766e-05, "loss": 2.0022, "step": 16430000 }, { "epoch": 47.56, "learning_rate": 2.6228772519914785e-05, "loss": 2.0208, "step": 16430500 }, { "epoch": 47.56, "learning_rate": 2.6228048872267507e-05, "loss": 2.0285, "step": 16431000 }, { "epoch": 47.56, "learning_rate": 2.622732522462023e-05, "loss": 2.0319, "step": 16431500 }, { "epoch": 47.56, "learning_rate": 2.622660447156354e-05, "loss": 2.0046, "step": 16432000 }, { "epoch": 47.57, "learning_rate": 2.6225880823916267e-05, "loss": 2.0144, "step": 16432500 }, { "epoch": 47.57, "learning_rate": 2.622515717626899e-05, "loss": 2.0066, "step": 16433000 }, { "epoch": 47.57, "learning_rate": 2.622443352862171e-05, "loss": 1.9969, "step": 16433500 }, { "epoch": 47.57, "learning_rate": 2.6223709880974434e-05, "loss": 1.9998, "step": 16434000 }, { "epoch": 47.57, "learning_rate": 2.6222987680622453e-05, "loss": 1.9973, "step": 16434500 }, { "epoch": 47.57, "learning_rate": 2.6222264032975175e-05, "loss": 2.0095, "step": 16435000 }, { "epoch": 47.57, "learning_rate": 2.6221540385327904e-05, "loss": 2.0239, "step": 16435500 }, { "epoch": 47.58, "learning_rate": 2.6220816737680626e-05, "loss": 2.0055, "step": 16436000 }, { "epoch": 47.58, "learning_rate": 2.6220094537328642e-05, "loss": 2.0246, "step": 16436500 }, { "epoch": 47.58, "learning_rate": 2.6219370889681368e-05, "loss": 2.0227, "step": 16437000 }, { "epoch": 47.58, "learning_rate": 2.621864724203409e-05, "loss": 2.007, "step": 16437500 }, { "epoch": 47.58, "learning_rate": 2.6217923594386812e-05, "loss": 2.0323, "step": 16438000 }, { "epoch": 47.58, "learning_rate": 2.6217199946739534e-05, "loss": 2.0138, "step": 16438500 }, { "epoch": 47.58, "learning_rate": 2.6216476299092256e-05, "loss": 2.0164, "step": 16439000 }, { "epoch": 47.59, "learning_rate": 2.621575265144498e-05, "loss": 2.0208, "step": 16439500 }, { "epoch": 47.59, "learning_rate": 2.6215030451092998e-05, "loss": 2.0453, "step": 16440000 }, { "epoch": 47.59, "learning_rate": 2.621430680344572e-05, "loss": 2.0222, "step": 16440500 }, { "epoch": 47.59, "learning_rate": 2.621358460309374e-05, "loss": 2.0007, "step": 16441000 }, { "epoch": 47.59, "learning_rate": 2.621286095544646e-05, "loss": 2.0023, "step": 16441500 }, { "epoch": 47.59, "learning_rate": 2.6212137307799183e-05, "loss": 1.9812, "step": 16442000 }, { "epoch": 47.59, "learning_rate": 2.6211413660151906e-05, "loss": 2.032, "step": 16442500 }, { "epoch": 47.6, "learning_rate": 2.6210690012504635e-05, "loss": 2.0153, "step": 16443000 }, { "epoch": 47.6, "learning_rate": 2.6209966364857357e-05, "loss": 2.0308, "step": 16443500 }, { "epoch": 47.6, "learning_rate": 2.6209242717210082e-05, "loss": 2.009, "step": 16444000 }, { "epoch": 47.6, "learning_rate": 2.6208519069562805e-05, "loss": 2.0077, "step": 16444500 }, { "epoch": 47.6, "learning_rate": 2.620779686921082e-05, "loss": 2.015, "step": 16445000 }, { "epoch": 47.6, "learning_rate": 2.6207073221563542e-05, "loss": 2.0279, "step": 16445500 }, { "epoch": 47.6, "learning_rate": 2.620635102121156e-05, "loss": 2.0074, "step": 16446000 }, { "epoch": 47.61, "learning_rate": 2.620562882085958e-05, "loss": 2.0173, "step": 16446500 }, { "epoch": 47.61, "learning_rate": 2.6204905173212303e-05, "loss": 2.0228, "step": 16447000 }, { "epoch": 47.61, "learning_rate": 2.6204181525565025e-05, "loss": 2.0211, "step": 16447500 }, { "epoch": 47.61, "learning_rate": 2.6203457877917747e-05, "loss": 2.0023, "step": 16448000 }, { "epoch": 47.61, "learning_rate": 2.620273423027047e-05, "loss": 2.0191, "step": 16448500 }, { "epoch": 47.61, "learning_rate": 2.6202012029918488e-05, "loss": 2.0132, "step": 16449000 }, { "epoch": 47.61, "learning_rate": 2.620128838227121e-05, "loss": 2.0186, "step": 16449500 }, { "epoch": 47.62, "learning_rate": 2.6200564734623933e-05, "loss": 2.0093, "step": 16450000 }, { "epoch": 47.62, "learning_rate": 2.619984108697666e-05, "loss": 1.9901, "step": 16450500 }, { "epoch": 47.62, "learning_rate": 2.6199117439329384e-05, "loss": 2.0089, "step": 16451000 }, { "epoch": 47.62, "learning_rate": 2.6198393791682106e-05, "loss": 2.0226, "step": 16451500 }, { "epoch": 47.62, "learning_rate": 2.6197670144034832e-05, "loss": 2.0172, "step": 16452000 }, { "epoch": 47.62, "learning_rate": 2.6196946496387554e-05, "loss": 2.005, "step": 16452500 }, { "epoch": 47.62, "learning_rate": 2.6196222848740276e-05, "loss": 2.0184, "step": 16453000 }, { "epoch": 47.63, "learning_rate": 2.6195499201093e-05, "loss": 2.0096, "step": 16453500 }, { "epoch": 47.63, "learning_rate": 2.619477555344572e-05, "loss": 2.0126, "step": 16454000 }, { "epoch": 47.63, "learning_rate": 2.6194051905798446e-05, "loss": 2.0163, "step": 16454500 }, { "epoch": 47.63, "learning_rate": 2.619332825815117e-05, "loss": 2.0134, "step": 16455000 }, { "epoch": 47.63, "learning_rate": 2.619260461050389e-05, "loss": 2.0084, "step": 16455500 }, { "epoch": 47.63, "learning_rate": 2.6191880962856613e-05, "loss": 1.9979, "step": 16456000 }, { "epoch": 47.63, "learning_rate": 2.6191157315209335e-05, "loss": 2.0206, "step": 16456500 }, { "epoch": 47.64, "learning_rate": 2.6190435114857354e-05, "loss": 2.0128, "step": 16457000 }, { "epoch": 47.64, "learning_rate": 2.6189711467210076e-05, "loss": 2.0239, "step": 16457500 }, { "epoch": 47.64, "learning_rate": 2.6188987819562805e-05, "loss": 2.0077, "step": 16458000 }, { "epoch": 47.64, "learning_rate": 2.6188264171915528e-05, "loss": 2.0276, "step": 16458500 }, { "epoch": 47.64, "learning_rate": 2.618754052426825e-05, "loss": 2.0049, "step": 16459000 }, { "epoch": 47.64, "learning_rate": 2.6186816876620972e-05, "loss": 2.0126, "step": 16459500 }, { "epoch": 47.64, "learning_rate": 2.6186093228973694e-05, "loss": 2.0108, "step": 16460000 }, { "epoch": 47.65, "learning_rate": 2.618536958132642e-05, "loss": 2.0261, "step": 16460500 }, { "epoch": 47.65, "learning_rate": 2.6184645933679142e-05, "loss": 2.0298, "step": 16461000 }, { "epoch": 47.65, "learning_rate": 2.6183923733327158e-05, "loss": 2.0395, "step": 16461500 }, { "epoch": 47.65, "learning_rate": 2.6183200085679883e-05, "loss": 2.0235, "step": 16462000 }, { "epoch": 47.65, "learning_rate": 2.6182476438032606e-05, "loss": 2.0274, "step": 16462500 }, { "epoch": 47.65, "learning_rate": 2.6181752790385328e-05, "loss": 1.9983, "step": 16463000 }, { "epoch": 47.66, "learning_rate": 2.618102914273805e-05, "loss": 2.0386, "step": 16463500 }, { "epoch": 47.66, "learning_rate": 2.6180305495090772e-05, "loss": 1.9961, "step": 16464000 }, { "epoch": 47.66, "learning_rate": 2.6179581847443495e-05, "loss": 2.0145, "step": 16464500 }, { "epoch": 47.66, "learning_rate": 2.617885964709152e-05, "loss": 2.0203, "step": 16465000 }, { "epoch": 47.66, "learning_rate": 2.6178135999444242e-05, "loss": 2.0087, "step": 16465500 }, { "epoch": 47.66, "learning_rate": 2.6177412351796965e-05, "loss": 1.9872, "step": 16466000 }, { "epoch": 47.66, "learning_rate": 2.6176690151444984e-05, "loss": 2.0184, "step": 16466500 }, { "epoch": 47.67, "learning_rate": 2.6175966503797706e-05, "loss": 2.0363, "step": 16467000 }, { "epoch": 47.67, "learning_rate": 2.6175242856150428e-05, "loss": 2.0197, "step": 16467500 }, { "epoch": 47.67, "learning_rate": 2.617451920850315e-05, "loss": 2.0042, "step": 16468000 }, { "epoch": 47.67, "learning_rate": 2.6173795560855873e-05, "loss": 2.0245, "step": 16468500 }, { "epoch": 47.67, "learning_rate": 2.6173071913208598e-05, "loss": 2.0087, "step": 16469000 }, { "epoch": 47.67, "learning_rate": 2.6172349712856614e-05, "loss": 2.0033, "step": 16469500 }, { "epoch": 47.67, "learning_rate": 2.6171626065209336e-05, "loss": 1.9989, "step": 16470000 }, { "epoch": 47.68, "learning_rate": 2.6170902417562058e-05, "loss": 2.0101, "step": 16470500 }, { "epoch": 47.68, "learning_rate": 2.6170178769914784e-05, "loss": 2.0037, "step": 16471000 }, { "epoch": 47.68, "learning_rate": 2.6169455122267506e-05, "loss": 1.9838, "step": 16471500 }, { "epoch": 47.68, "learning_rate": 2.616873147462023e-05, "loss": 2.0089, "step": 16472000 }, { "epoch": 47.68, "learning_rate": 2.6168007826972957e-05, "loss": 2.0172, "step": 16472500 }, { "epoch": 47.68, "learning_rate": 2.616728417932568e-05, "loss": 2.0054, "step": 16473000 }, { "epoch": 47.68, "learning_rate": 2.6166560531678402e-05, "loss": 2.0122, "step": 16473500 }, { "epoch": 47.69, "learning_rate": 2.6165836884031124e-05, "loss": 1.9878, "step": 16474000 }, { "epoch": 47.69, "learning_rate": 2.6165114683679143e-05, "loss": 2.0214, "step": 16474500 }, { "epoch": 47.69, "learning_rate": 2.6164391036031865e-05, "loss": 2.0222, "step": 16475000 }, { "epoch": 47.69, "learning_rate": 2.6163667388384587e-05, "loss": 2.0353, "step": 16475500 }, { "epoch": 47.69, "learning_rate": 2.616294374073731e-05, "loss": 2.0336, "step": 16476000 }, { "epoch": 47.69, "learning_rate": 2.6162220093090035e-05, "loss": 2.0132, "step": 16476500 }, { "epoch": 47.69, "learning_rate": 2.6161496445442758e-05, "loss": 2.0079, "step": 16477000 }, { "epoch": 47.7, "learning_rate": 2.616077279779548e-05, "loss": 1.9968, "step": 16477500 }, { "epoch": 47.7, "learning_rate": 2.6160049150148202e-05, "loss": 2.0178, "step": 16478000 }, { "epoch": 47.7, "learning_rate": 2.6159325502500924e-05, "loss": 2.0029, "step": 16478500 }, { "epoch": 47.7, "learning_rate": 2.615860185485365e-05, "loss": 2.0024, "step": 16479000 }, { "epoch": 47.7, "learning_rate": 2.6157878207206375e-05, "loss": 2.0351, "step": 16479500 }, { "epoch": 47.7, "learning_rate": 2.61571545595591e-05, "loss": 2.0048, "step": 16480000 }, { "epoch": 47.7, "learning_rate": 2.6156430911911823e-05, "loss": 1.9937, "step": 16480500 }, { "epoch": 47.71, "learning_rate": 2.615570871155984e-05, "loss": 2.0192, "step": 16481000 }, { "epoch": 47.71, "learning_rate": 2.615498506391256e-05, "loss": 2.007, "step": 16481500 }, { "epoch": 47.71, "learning_rate": 2.615426286356058e-05, "loss": 2.024, "step": 16482000 }, { "epoch": 47.71, "learning_rate": 2.6153539215913302e-05, "loss": 2.0016, "step": 16482500 }, { "epoch": 47.71, "learning_rate": 2.6152815568266025e-05, "loss": 2.0195, "step": 16483000 }, { "epoch": 47.71, "learning_rate": 2.615209192061875e-05, "loss": 2.0247, "step": 16483500 }, { "epoch": 47.71, "learning_rate": 2.6151368272971472e-05, "loss": 2.0028, "step": 16484000 }, { "epoch": 47.72, "learning_rate": 2.6150644625324195e-05, "loss": 2.031, "step": 16484500 }, { "epoch": 47.72, "learning_rate": 2.6149920977676917e-05, "loss": 2.0452, "step": 16485000 }, { "epoch": 47.72, "learning_rate": 2.6149198777324936e-05, "loss": 2.01, "step": 16485500 }, { "epoch": 47.72, "learning_rate": 2.6148475129677658e-05, "loss": 2.0249, "step": 16486000 }, { "epoch": 47.72, "learning_rate": 2.614775148203038e-05, "loss": 2.0292, "step": 16486500 }, { "epoch": 47.72, "learning_rate": 2.614702783438311e-05, "loss": 2.0207, "step": 16487000 }, { "epoch": 47.72, "learning_rate": 2.614630418673583e-05, "loss": 2.0141, "step": 16487500 }, { "epoch": 47.73, "learning_rate": 2.6145580539088554e-05, "loss": 2.019, "step": 16488000 }, { "epoch": 47.73, "learning_rate": 2.6144856891441276e-05, "loss": 2.0158, "step": 16488500 }, { "epoch": 47.73, "learning_rate": 2.6144133243794e-05, "loss": 2.0228, "step": 16489000 }, { "epoch": 47.73, "learning_rate": 2.6143409596146724e-05, "loss": 2.0079, "step": 16489500 }, { "epoch": 47.73, "learning_rate": 2.6142685948499446e-05, "loss": 2.0218, "step": 16490000 }, { "epoch": 47.73, "learning_rate": 2.6141962300852168e-05, "loss": 2.0142, "step": 16490500 }, { "epoch": 47.73, "learning_rate": 2.614123865320489e-05, "loss": 2.021, "step": 16491000 }, { "epoch": 47.74, "learning_rate": 2.6140515005557613e-05, "loss": 2.0316, "step": 16491500 }, { "epoch": 47.74, "learning_rate": 2.6139792805205632e-05, "loss": 2.0112, "step": 16492000 }, { "epoch": 47.74, "learning_rate": 2.6139072052148944e-05, "loss": 2.0289, "step": 16492500 }, { "epoch": 47.74, "learning_rate": 2.613835274638755e-05, "loss": 2.0284, "step": 16493000 }, { "epoch": 47.74, "learning_rate": 2.6137629098740275e-05, "loss": 2.0181, "step": 16493500 }, { "epoch": 47.74, "learning_rate": 2.6136905451092997e-05, "loss": 2.0128, "step": 16494000 }, { "epoch": 47.74, "learning_rate": 2.613618180344572e-05, "loss": 2.0045, "step": 16494500 }, { "epoch": 47.75, "learning_rate": 2.6135458155798442e-05, "loss": 2.0055, "step": 16495000 }, { "epoch": 47.75, "learning_rate": 2.613473450815117e-05, "loss": 2.0012, "step": 16495500 }, { "epoch": 47.75, "learning_rate": 2.6134010860503893e-05, "loss": 2.0211, "step": 16496000 }, { "epoch": 47.75, "learning_rate": 2.6133291554742502e-05, "loss": 2.046, "step": 16496500 }, { "epoch": 47.75, "learning_rate": 2.6132567907095224e-05, "loss": 2.0706, "step": 16497000 }, { "epoch": 47.75, "learning_rate": 2.6131844259447947e-05, "loss": 2.0104, "step": 16497500 }, { "epoch": 47.75, "learning_rate": 2.613112061180067e-05, "loss": 2.02, "step": 16498000 }, { "epoch": 47.76, "learning_rate": 2.613039696415339e-05, "loss": 2.0103, "step": 16498500 }, { "epoch": 47.76, "learning_rate": 2.6129673316506113e-05, "loss": 2.0073, "step": 16499000 }, { "epoch": 47.76, "learning_rate": 2.612894966885884e-05, "loss": 2.0256, "step": 16499500 }, { "epoch": 47.76, "learning_rate": 2.612822602121156e-05, "loss": 2.0203, "step": 16500000 }, { "epoch": 47.76, "learning_rate": 2.6127502373564283e-05, "loss": 2.0095, "step": 16500500 }, { "epoch": 47.76, "learning_rate": 2.6126778725917006e-05, "loss": 2.011, "step": 16501000 }, { "epoch": 47.77, "learning_rate": 2.6126055078269728e-05, "loss": 2.0105, "step": 16501500 }, { "epoch": 47.77, "learning_rate": 2.612533143062245e-05, "loss": 2.0139, "step": 16502000 }, { "epoch": 47.77, "learning_rate": 2.6124607782975176e-05, "loss": 2.0296, "step": 16502500 }, { "epoch": 47.77, "learning_rate": 2.61238841353279e-05, "loss": 2.0275, "step": 16503000 }, { "epoch": 47.77, "learning_rate": 2.6123160487680627e-05, "loss": 1.9994, "step": 16503500 }, { "epoch": 47.77, "learning_rate": 2.612243684003335e-05, "loss": 2.0088, "step": 16504000 }, { "epoch": 47.77, "learning_rate": 2.612171319238607e-05, "loss": 2.024, "step": 16504500 }, { "epoch": 47.78, "learning_rate": 2.6120989544738794e-05, "loss": 2.0437, "step": 16505000 }, { "epoch": 47.78, "learning_rate": 2.6120265897091516e-05, "loss": 2.0048, "step": 16505500 }, { "epoch": 47.78, "learning_rate": 2.6119543696739535e-05, "loss": 2.0066, "step": 16506000 }, { "epoch": 47.78, "learning_rate": 2.6118820049092257e-05, "loss": 2.0295, "step": 16506500 }, { "epoch": 47.78, "learning_rate": 2.611809640144498e-05, "loss": 2.0129, "step": 16507000 }, { "epoch": 47.78, "learning_rate": 2.61173727537977e-05, "loss": 2.0167, "step": 16507500 }, { "epoch": 47.78, "learning_rate": 2.6116649106150427e-05, "loss": 2.0313, "step": 16508000 }, { "epoch": 47.79, "learning_rate": 2.611592545850315e-05, "loss": 1.9929, "step": 16508500 }, { "epoch": 47.79, "learning_rate": 2.611520181085587e-05, "loss": 2.0028, "step": 16509000 }, { "epoch": 47.79, "learning_rate": 2.6114478163208594e-05, "loss": 2.0331, "step": 16509500 }, { "epoch": 47.79, "learning_rate": 2.6113754515561323e-05, "loss": 2.0007, "step": 16510000 }, { "epoch": 47.79, "learning_rate": 2.6113030867914045e-05, "loss": 2.0001, "step": 16510500 }, { "epoch": 47.79, "learning_rate": 2.6112307220266767e-05, "loss": 2.015, "step": 16511000 }, { "epoch": 47.79, "learning_rate": 2.6111583572619493e-05, "loss": 2.0053, "step": 16511500 }, { "epoch": 47.8, "learning_rate": 2.6110859924972215e-05, "loss": 1.9972, "step": 16512000 }, { "epoch": 47.8, "learning_rate": 2.6110136277324937e-05, "loss": 1.9995, "step": 16512500 }, { "epoch": 47.8, "learning_rate": 2.610941262967766e-05, "loss": 2.0346, "step": 16513000 }, { "epoch": 47.8, "learning_rate": 2.6108688982030382e-05, "loss": 2.0162, "step": 16513500 }, { "epoch": 47.8, "learning_rate": 2.6107968228973694e-05, "loss": 2.0156, "step": 16514000 }, { "epoch": 47.8, "learning_rate": 2.6107246028621713e-05, "loss": 2.0085, "step": 16514500 }, { "epoch": 47.8, "learning_rate": 2.6106522380974435e-05, "loss": 2.0156, "step": 16515000 }, { "epoch": 47.81, "learning_rate": 2.6105798733327158e-05, "loss": 2.0203, "step": 16515500 }, { "epoch": 47.81, "learning_rate": 2.610507508567988e-05, "loss": 2.0291, "step": 16516000 }, { "epoch": 47.81, "learning_rate": 2.6104351438032605e-05, "loss": 2.0198, "step": 16516500 }, { "epoch": 47.81, "learning_rate": 2.6103627790385328e-05, "loss": 1.9924, "step": 16517000 }, { "epoch": 47.81, "learning_rate": 2.6102904142738057e-05, "loss": 2.0169, "step": 16517500 }, { "epoch": 47.81, "learning_rate": 2.610218049509078e-05, "loss": 2.0096, "step": 16518000 }, { "epoch": 47.81, "learning_rate": 2.61014568474435e-05, "loss": 2.0314, "step": 16518500 }, { "epoch": 47.82, "learning_rate": 2.6100733199796223e-05, "loss": 2.011, "step": 16519000 }, { "epoch": 47.82, "learning_rate": 2.6100009552148946e-05, "loss": 2.0148, "step": 16519500 }, { "epoch": 47.82, "learning_rate": 2.6099285904501668e-05, "loss": 2.0172, "step": 16520000 }, { "epoch": 47.82, "learning_rate": 2.6098562256854393e-05, "loss": 2.0628, "step": 16520500 }, { "epoch": 47.82, "learning_rate": 2.6097838609207116e-05, "loss": 2.0224, "step": 16521000 }, { "epoch": 47.82, "learning_rate": 2.6097114961559838e-05, "loss": 2.0335, "step": 16521500 }, { "epoch": 47.82, "learning_rate": 2.6096392761207857e-05, "loss": 2.021, "step": 16522000 }, { "epoch": 47.83, "learning_rate": 2.609566911356058e-05, "loss": 2.0101, "step": 16522500 }, { "epoch": 47.83, "learning_rate": 2.60949454659133e-05, "loss": 2.0287, "step": 16523000 }, { "epoch": 47.83, "learning_rate": 2.6094223265561317e-05, "loss": 2.0177, "step": 16523500 }, { "epoch": 47.83, "learning_rate": 2.6093499617914042e-05, "loss": 2.0136, "step": 16524000 }, { "epoch": 47.83, "learning_rate": 2.6092775970266765e-05, "loss": 2.0332, "step": 16524500 }, { "epoch": 47.83, "learning_rate": 2.6092052322619494e-05, "loss": 2.0135, "step": 16525000 }, { "epoch": 47.83, "learning_rate": 2.6091328674972216e-05, "loss": 2.0016, "step": 16525500 }, { "epoch": 47.84, "learning_rate": 2.609060647462023e-05, "loss": 2.0189, "step": 16526000 }, { "epoch": 47.84, "learning_rate": 2.608988427426825e-05, "loss": 2.016, "step": 16526500 }, { "epoch": 47.84, "learning_rate": 2.6089160626620973e-05, "loss": 2.0256, "step": 16527000 }, { "epoch": 47.84, "learning_rate": 2.6088436978973695e-05, "loss": 1.9924, "step": 16527500 }, { "epoch": 47.84, "learning_rate": 2.6087713331326417e-05, "loss": 2.0231, "step": 16528000 }, { "epoch": 47.84, "learning_rate": 2.6086989683679143e-05, "loss": 1.9967, "step": 16528500 }, { "epoch": 47.84, "learning_rate": 2.6086266036031865e-05, "loss": 2.0209, "step": 16529000 }, { "epoch": 47.85, "learning_rate": 2.6085542388384587e-05, "loss": 2.0127, "step": 16529500 }, { "epoch": 47.85, "learning_rate": 2.608481874073731e-05, "loss": 2.0141, "step": 16530000 }, { "epoch": 47.85, "learning_rate": 2.6084095093090032e-05, "loss": 2.0303, "step": 16530500 }, { "epoch": 47.85, "learning_rate": 2.6083371445442757e-05, "loss": 1.9991, "step": 16531000 }, { "epoch": 47.85, "learning_rate": 2.608264779779548e-05, "loss": 2.0296, "step": 16531500 }, { "epoch": 47.85, "learning_rate": 2.608192415014821e-05, "loss": 2.0148, "step": 16532000 }, { "epoch": 47.85, "learning_rate": 2.608120050250093e-05, "loss": 2.0237, "step": 16532500 }, { "epoch": 47.86, "learning_rate": 2.6080476854853653e-05, "loss": 1.985, "step": 16533000 }, { "epoch": 47.86, "learning_rate": 2.6079753207206375e-05, "loss": 1.9955, "step": 16533500 }, { "epoch": 47.86, "learning_rate": 2.6079029559559098e-05, "loss": 1.9841, "step": 16534000 }, { "epoch": 47.86, "learning_rate": 2.6078307359207116e-05, "loss": 2.0266, "step": 16534500 }, { "epoch": 47.86, "learning_rate": 2.6077585158855132e-05, "loss": 2.0053, "step": 16535000 }, { "epoch": 47.86, "learning_rate": 2.6076861511207858e-05, "loss": 1.9952, "step": 16535500 }, { "epoch": 47.86, "learning_rate": 2.607613786356058e-05, "loss": 2.0188, "step": 16536000 }, { "epoch": 47.87, "learning_rate": 2.6075414215913302e-05, "loss": 2.0231, "step": 16536500 }, { "epoch": 47.87, "learning_rate": 2.6074690568266024e-05, "loss": 2.0253, "step": 16537000 }, { "epoch": 47.87, "learning_rate": 2.6073966920618747e-05, "loss": 2.0285, "step": 16537500 }, { "epoch": 47.87, "learning_rate": 2.607324327297147e-05, "loss": 2.0218, "step": 16538000 }, { "epoch": 47.87, "learning_rate": 2.6072519625324194e-05, "loss": 2.0481, "step": 16538500 }, { "epoch": 47.87, "learning_rate": 2.6071795977676917e-05, "loss": 1.9992, "step": 16539000 }, { "epoch": 47.88, "learning_rate": 2.607107377732494e-05, "loss": 1.9858, "step": 16539500 }, { "epoch": 47.88, "learning_rate": 2.607035012967766e-05, "loss": 2.015, "step": 16540000 }, { "epoch": 47.88, "learning_rate": 2.6069626482030383e-05, "loss": 2.0117, "step": 16540500 }, { "epoch": 47.88, "learning_rate": 2.606890283438311e-05, "loss": 2.0371, "step": 16541000 }, { "epoch": 47.88, "learning_rate": 2.606817918673583e-05, "loss": 2.0309, "step": 16541500 }, { "epoch": 47.88, "learning_rate": 2.6067456986383847e-05, "loss": 2.0118, "step": 16542000 }, { "epoch": 47.88, "learning_rate": 2.606673623332716e-05, "loss": 2.0005, "step": 16542500 }, { "epoch": 47.89, "learning_rate": 2.6066012585679885e-05, "loss": 1.9945, "step": 16543000 }, { "epoch": 47.89, "learning_rate": 2.6065288938032607e-05, "loss": 2.0138, "step": 16543500 }, { "epoch": 47.89, "learning_rate": 2.606456529038533e-05, "loss": 2.0057, "step": 16544000 }, { "epoch": 47.89, "learning_rate": 2.6063843090033345e-05, "loss": 2.0058, "step": 16544500 }, { "epoch": 47.89, "learning_rate": 2.606311944238607e-05, "loss": 2.019, "step": 16545000 }, { "epoch": 47.89, "learning_rate": 2.6062395794738793e-05, "loss": 2.0154, "step": 16545500 }, { "epoch": 47.89, "learning_rate": 2.6061672147091515e-05, "loss": 2.04, "step": 16546000 }, { "epoch": 47.9, "learning_rate": 2.6060948499444237e-05, "loss": 2.0019, "step": 16546500 }, { "epoch": 47.9, "learning_rate": 2.6060226299092256e-05, "loss": 2.0092, "step": 16547000 }, { "epoch": 47.9, "learning_rate": 2.6059502651444985e-05, "loss": 2.0384, "step": 16547500 }, { "epoch": 47.9, "learning_rate": 2.6058779003797707e-05, "loss": 2.005, "step": 16548000 }, { "epoch": 47.9, "learning_rate": 2.605805535615043e-05, "loss": 2.0226, "step": 16548500 }, { "epoch": 47.9, "learning_rate": 2.605733315579845e-05, "loss": 2.0074, "step": 16549000 }, { "epoch": 47.9, "learning_rate": 2.605660950815117e-05, "loss": 1.9913, "step": 16549500 }, { "epoch": 47.91, "learning_rate": 2.6055885860503893e-05, "loss": 2.0255, "step": 16550000 }, { "epoch": 47.91, "learning_rate": 2.6055162212856615e-05, "loss": 2.0296, "step": 16550500 }, { "epoch": 47.91, "learning_rate": 2.6054438565209337e-05, "loss": 2.0274, "step": 16551000 }, { "epoch": 47.91, "learning_rate": 2.605371491756206e-05, "loss": 2.0208, "step": 16551500 }, { "epoch": 47.91, "learning_rate": 2.6052991269914785e-05, "loss": 2.0285, "step": 16552000 }, { "epoch": 47.91, "learning_rate": 2.6052267622267507e-05, "loss": 1.9884, "step": 16552500 }, { "epoch": 47.91, "learning_rate": 2.605154397462023e-05, "loss": 2.0162, "step": 16553000 }, { "epoch": 47.92, "learning_rate": 2.6050820326972952e-05, "loss": 2.0141, "step": 16553500 }, { "epoch": 47.92, "learning_rate": 2.6050096679325674e-05, "loss": 1.9977, "step": 16554000 }, { "epoch": 47.92, "learning_rate": 2.6049373031678396e-05, "loss": 2.0303, "step": 16554500 }, { "epoch": 47.92, "learning_rate": 2.6048650831326422e-05, "loss": 2.0048, "step": 16555000 }, { "epoch": 47.92, "learning_rate": 2.6047927183679144e-05, "loss": 2.0163, "step": 16555500 }, { "epoch": 47.92, "learning_rate": 2.6047203536031867e-05, "loss": 2.0164, "step": 16556000 }, { "epoch": 47.92, "learning_rate": 2.604647988838459e-05, "loss": 2.0243, "step": 16556500 }, { "epoch": 47.93, "learning_rate": 2.6045757688032608e-05, "loss": 2.0285, "step": 16557000 }, { "epoch": 47.93, "learning_rate": 2.604503404038533e-05, "loss": 2.0346, "step": 16557500 }, { "epoch": 47.93, "learning_rate": 2.6044310392738052e-05, "loss": 2.0388, "step": 16558000 }, { "epoch": 47.93, "learning_rate": 2.604358819238607e-05, "loss": 2.0052, "step": 16558500 }, { "epoch": 47.93, "learning_rate": 2.6042864544738793e-05, "loss": 2.02, "step": 16559000 }, { "epoch": 47.93, "learning_rate": 2.6042140897091516e-05, "loss": 2.0247, "step": 16559500 }, { "epoch": 47.93, "learning_rate": 2.6041417249444238e-05, "loss": 2.0152, "step": 16560000 }, { "epoch": 47.94, "learning_rate": 2.604069360179696e-05, "loss": 2.0407, "step": 16560500 }, { "epoch": 47.94, "learning_rate": 2.603997140144498e-05, "loss": 1.9922, "step": 16561000 }, { "epoch": 47.94, "learning_rate": 2.60392477537977e-05, "loss": 2.02, "step": 16561500 }, { "epoch": 47.94, "learning_rate": 2.6038524106150424e-05, "loss": 2.0239, "step": 16562000 }, { "epoch": 47.94, "learning_rate": 2.6037800458503153e-05, "loss": 2.0137, "step": 16562500 }, { "epoch": 47.94, "learning_rate": 2.6037076810855875e-05, "loss": 2.0181, "step": 16563000 }, { "epoch": 47.94, "learning_rate": 2.60363531632086e-05, "loss": 2.0154, "step": 16563500 }, { "epoch": 47.95, "learning_rate": 2.6035629515561323e-05, "loss": 2.0037, "step": 16564000 }, { "epoch": 47.95, "learning_rate": 2.6034905867914045e-05, "loss": 2.0053, "step": 16564500 }, { "epoch": 47.95, "learning_rate": 2.6034182220266767e-05, "loss": 2.0177, "step": 16565000 }, { "epoch": 47.95, "learning_rate": 2.603345857261949e-05, "loss": 2.0047, "step": 16565500 }, { "epoch": 47.95, "learning_rate": 2.603273492497221e-05, "loss": 2.0063, "step": 16566000 }, { "epoch": 47.95, "learning_rate": 2.6032011277324937e-05, "loss": 2.005, "step": 16566500 }, { "epoch": 47.95, "learning_rate": 2.603128762967766e-05, "loss": 2.0106, "step": 16567000 }, { "epoch": 47.96, "learning_rate": 2.6030565429325675e-05, "loss": 2.0004, "step": 16567500 }, { "epoch": 47.96, "learning_rate": 2.60298417816784e-05, "loss": 1.9813, "step": 16568000 }, { "epoch": 47.96, "learning_rate": 2.6029118134031123e-05, "loss": 2.0217, "step": 16568500 }, { "epoch": 47.96, "learning_rate": 2.6028394486383845e-05, "loss": 2.0139, "step": 16569000 }, { "epoch": 47.96, "learning_rate": 2.602767228603186e-05, "loss": 2.0038, "step": 16569500 }, { "epoch": 47.96, "learning_rate": 2.602694863838459e-05, "loss": 2.0123, "step": 16570000 }, { "epoch": 47.96, "learning_rate": 2.602622643803261e-05, "loss": 2.0323, "step": 16570500 }, { "epoch": 47.97, "learning_rate": 2.602550279038533e-05, "loss": 2.0187, "step": 16571000 }, { "epoch": 47.97, "learning_rate": 2.602478059003335e-05, "loss": 2.0152, "step": 16571500 }, { "epoch": 47.97, "learning_rate": 2.6024056942386072e-05, "loss": 1.9897, "step": 16572000 }, { "epoch": 47.97, "learning_rate": 2.6023333294738794e-05, "loss": 2.0136, "step": 16572500 }, { "epoch": 47.97, "learning_rate": 2.6022609647091516e-05, "loss": 2.0093, "step": 16573000 }, { "epoch": 47.97, "learning_rate": 2.602188599944424e-05, "loss": 1.9944, "step": 16573500 }, { "epoch": 47.97, "learning_rate": 2.6021162351796964e-05, "loss": 2.0217, "step": 16574000 }, { "epoch": 47.98, "learning_rate": 2.6020438704149687e-05, "loss": 2.0159, "step": 16574500 }, { "epoch": 47.98, "learning_rate": 2.601971505650241e-05, "loss": 2.0156, "step": 16575000 }, { "epoch": 47.98, "learning_rate": 2.601899140885513e-05, "loss": 2.0112, "step": 16575500 }, { "epoch": 47.98, "learning_rate": 2.601826920850315e-05, "loss": 2.0098, "step": 16576000 }, { "epoch": 47.98, "learning_rate": 2.6017545560855872e-05, "loss": 2.0277, "step": 16576500 }, { "epoch": 47.98, "learning_rate": 2.6016821913208594e-05, "loss": 2.0026, "step": 16577000 }, { "epoch": 47.99, "learning_rate": 2.6016098265561323e-05, "loss": 2.015, "step": 16577500 }, { "epoch": 47.99, "learning_rate": 2.6015374617914046e-05, "loss": 2.033, "step": 16578000 }, { "epoch": 47.99, "learning_rate": 2.6014650970266768e-05, "loss": 2.0095, "step": 16578500 }, { "epoch": 47.99, "learning_rate": 2.601392732261949e-05, "loss": 2.0216, "step": 16579000 }, { "epoch": 47.99, "learning_rate": 2.601320512226751e-05, "loss": 2.0012, "step": 16579500 }, { "epoch": 47.99, "learning_rate": 2.601248147462023e-05, "loss": 1.9971, "step": 16580000 }, { "epoch": 47.99, "learning_rate": 2.6011757826972954e-05, "loss": 2.0165, "step": 16580500 }, { "epoch": 48.0, "learning_rate": 2.6011034179325676e-05, "loss": 2.0261, "step": 16581000 }, { "epoch": 48.0, "learning_rate": 2.60103105316784e-05, "loss": 2.0208, "step": 16581500 }, { "epoch": 48.0, "learning_rate": 2.6009586884031124e-05, "loss": 1.9938, "step": 16582000 }, { "epoch": 48.0, "learning_rate": 2.6008863236383846e-05, "loss": 2.0093, "step": 16582500 }, { "epoch": 48.0, "eval_accuracy": 0.6727647036358356, "eval_accuracy_mlm": 0.6387673823676272, "eval_accuracy_nsp": 0.8549999312535232, "eval_loss": 2.163412570953369, "eval_runtime": 331.6025, "eval_samples_per_second": 1315.991, "eval_steps_per_second": 54.834, "step": 16582656 }, { "epoch": 48.0, "learning_rate": 2.6008139588736568e-05, "loss": 2.0014, "step": 16583000 }, { "epoch": 48.0, "learning_rate": 2.600741594108929e-05, "loss": 1.9942, "step": 16583500 }, { "epoch": 48.0, "learning_rate": 2.6006692293442016e-05, "loss": 1.9808, "step": 16584000 }, { "epoch": 48.01, "learning_rate": 2.600596864579474e-05, "loss": 1.9759, "step": 16584500 }, { "epoch": 48.01, "learning_rate": 2.6005244998147467e-05, "loss": 2.0071, "step": 16585000 }, { "epoch": 48.01, "learning_rate": 2.600452135050019e-05, "loss": 2.0084, "step": 16585500 }, { "epoch": 48.01, "learning_rate": 2.6003799150148205e-05, "loss": 2.0025, "step": 16586000 }, { "epoch": 48.01, "learning_rate": 2.6003078397091517e-05, "loss": 1.9974, "step": 16586500 }, { "epoch": 48.01, "learning_rate": 2.600235474944424e-05, "loss": 1.9839, "step": 16587000 }, { "epoch": 48.01, "learning_rate": 2.6001631101796965e-05, "loss": 1.978, "step": 16587500 }, { "epoch": 48.02, "learning_rate": 2.6000907454149687e-05, "loss": 2.0232, "step": 16588000 }, { "epoch": 48.02, "learning_rate": 2.600018380650241e-05, "loss": 2.0117, "step": 16588500 }, { "epoch": 48.02, "learning_rate": 2.5999463053445722e-05, "loss": 1.9927, "step": 16589000 }, { "epoch": 48.02, "learning_rate": 2.599874085309374e-05, "loss": 2.0049, "step": 16589500 }, { "epoch": 48.02, "learning_rate": 2.5998017205446463e-05, "loss": 1.9852, "step": 16590000 }, { "epoch": 48.02, "learning_rate": 2.5997293557799185e-05, "loss": 1.9818, "step": 16590500 }, { "epoch": 48.02, "learning_rate": 2.5996569910151907e-05, "loss": 2.023, "step": 16591000 }, { "epoch": 48.03, "learning_rate": 2.599584626250463e-05, "loss": 1.9989, "step": 16591500 }, { "epoch": 48.03, "learning_rate": 2.5995122614857352e-05, "loss": 1.9887, "step": 16592000 }, { "epoch": 48.03, "learning_rate": 2.5994398967210078e-05, "loss": 1.9811, "step": 16592500 }, { "epoch": 48.03, "learning_rate": 2.5993675319562803e-05, "loss": 1.9907, "step": 16593000 }, { "epoch": 48.03, "learning_rate": 2.599295167191553e-05, "loss": 1.9865, "step": 16593500 }, { "epoch": 48.03, "learning_rate": 2.5992229471563544e-05, "loss": 1.9976, "step": 16594000 }, { "epoch": 48.03, "learning_rate": 2.5991505823916267e-05, "loss": 2.0073, "step": 16594500 }, { "epoch": 48.04, "learning_rate": 2.5990782176268992e-05, "loss": 1.9724, "step": 16595000 }, { "epoch": 48.04, "learning_rate": 2.5990058528621714e-05, "loss": 2.0018, "step": 16595500 }, { "epoch": 48.04, "learning_rate": 2.5989334880974437e-05, "loss": 2.0171, "step": 16596000 }, { "epoch": 48.04, "learning_rate": 2.598861123332716e-05, "loss": 1.9986, "step": 16596500 }, { "epoch": 48.04, "learning_rate": 2.598788758567988e-05, "loss": 2.0129, "step": 16597000 }, { "epoch": 48.04, "learning_rate": 2.5987163938032603e-05, "loss": 1.9889, "step": 16597500 }, { "epoch": 48.04, "learning_rate": 2.598644029038533e-05, "loss": 1.9941, "step": 16598000 }, { "epoch": 48.05, "learning_rate": 2.598571664273805e-05, "loss": 2.0047, "step": 16598500 }, { "epoch": 48.05, "learning_rate": 2.5984992995090773e-05, "loss": 2.0055, "step": 16599000 }, { "epoch": 48.05, "learning_rate": 2.5984269347443496e-05, "loss": 1.9859, "step": 16599500 }, { "epoch": 48.05, "learning_rate": 2.5983547147091518e-05, "loss": 2.0024, "step": 16600000 }, { "epoch": 48.05, "learning_rate": 2.5982823499444244e-05, "loss": 1.9872, "step": 16600500 }, { "epoch": 48.05, "learning_rate": 2.5982099851796966e-05, "loss": 1.9964, "step": 16601000 }, { "epoch": 48.05, "learning_rate": 2.5981376204149688e-05, "loss": 2.001, "step": 16601500 }, { "epoch": 48.06, "learning_rate": 2.5980654003797704e-05, "loss": 1.9917, "step": 16602000 }, { "epoch": 48.06, "learning_rate": 2.597993035615043e-05, "loss": 1.9906, "step": 16602500 }, { "epoch": 48.06, "learning_rate": 2.597920670850315e-05, "loss": 1.9932, "step": 16603000 }, { "epoch": 48.06, "learning_rate": 2.5978483060855874e-05, "loss": 2.0174, "step": 16603500 }, { "epoch": 48.06, "learning_rate": 2.5977759413208596e-05, "loss": 1.9911, "step": 16604000 }, { "epoch": 48.06, "learning_rate": 2.5977035765561318e-05, "loss": 2.0098, "step": 16604500 }, { "epoch": 48.06, "learning_rate": 2.5976312117914044e-05, "loss": 1.9951, "step": 16605000 }, { "epoch": 48.07, "learning_rate": 2.597558991756206e-05, "loss": 1.9922, "step": 16605500 }, { "epoch": 48.07, "learning_rate": 2.597486626991478e-05, "loss": 2.0194, "step": 16606000 }, { "epoch": 48.07, "learning_rate": 2.5974142622267504e-05, "loss": 1.9991, "step": 16606500 }, { "epoch": 48.07, "learning_rate": 2.597341897462023e-05, "loss": 1.9729, "step": 16607000 }, { "epoch": 48.07, "learning_rate": 2.5972695326972955e-05, "loss": 1.9586, "step": 16607500 }, { "epoch": 48.07, "learning_rate": 2.5971973126620974e-05, "loss": 1.9924, "step": 16608000 }, { "epoch": 48.07, "learning_rate": 2.5971249478973696e-05, "loss": 2.0101, "step": 16608500 }, { "epoch": 48.08, "learning_rate": 2.597052583132642e-05, "loss": 1.9957, "step": 16609000 }, { "epoch": 48.08, "learning_rate": 2.5969802183679144e-05, "loss": 2.0081, "step": 16609500 }, { "epoch": 48.08, "learning_rate": 2.5969078536031866e-05, "loss": 1.9904, "step": 16610000 }, { "epoch": 48.08, "learning_rate": 2.596835488838459e-05, "loss": 2.0035, "step": 16610500 }, { "epoch": 48.08, "learning_rate": 2.5967632688032608e-05, "loss": 1.9966, "step": 16611000 }, { "epoch": 48.08, "learning_rate": 2.5966910487680623e-05, "loss": 1.9789, "step": 16611500 }, { "epoch": 48.08, "learning_rate": 2.5966186840033345e-05, "loss": 2.0032, "step": 16612000 }, { "epoch": 48.09, "learning_rate": 2.5965464639681364e-05, "loss": 1.9877, "step": 16612500 }, { "epoch": 48.09, "learning_rate": 2.5964740992034087e-05, "loss": 2.0065, "step": 16613000 }, { "epoch": 48.09, "learning_rate": 2.596401734438681e-05, "loss": 2.023, "step": 16613500 }, { "epoch": 48.09, "learning_rate": 2.596329369673953e-05, "loss": 1.9961, "step": 16614000 }, { "epoch": 48.09, "learning_rate": 2.5962570049092257e-05, "loss": 1.9986, "step": 16614500 }, { "epoch": 48.09, "learning_rate": 2.5961846401444982e-05, "loss": 1.9982, "step": 16615000 }, { "epoch": 48.1, "learning_rate": 2.5961122753797708e-05, "loss": 1.9836, "step": 16615500 }, { "epoch": 48.1, "learning_rate": 2.596039910615043e-05, "loss": 1.9785, "step": 16616000 }, { "epoch": 48.1, "learning_rate": 2.5959675458503152e-05, "loss": 1.9943, "step": 16616500 }, { "epoch": 48.1, "learning_rate": 2.595895325815117e-05, "loss": 1.995, "step": 16617000 }, { "epoch": 48.1, "learning_rate": 2.5958229610503894e-05, "loss": 2.0007, "step": 16617500 }, { "epoch": 48.1, "learning_rate": 2.5957505962856616e-05, "loss": 2.0049, "step": 16618000 }, { "epoch": 48.1, "learning_rate": 2.5956782315209338e-05, "loss": 1.9961, "step": 16618500 }, { "epoch": 48.11, "learning_rate": 2.595605866756206e-05, "loss": 1.9886, "step": 16619000 }, { "epoch": 48.11, "learning_rate": 2.5955335019914782e-05, "loss": 1.9742, "step": 16619500 }, { "epoch": 48.11, "learning_rate": 2.5954611372267508e-05, "loss": 1.9964, "step": 16620000 }, { "epoch": 48.11, "learning_rate": 2.5953889171915524e-05, "loss": 2.0015, "step": 16620500 }, { "epoch": 48.11, "learning_rate": 2.5953165524268246e-05, "loss": 2.001, "step": 16621000 }, { "epoch": 48.11, "learning_rate": 2.595244187662097e-05, "loss": 1.9848, "step": 16621500 }, { "epoch": 48.11, "learning_rate": 2.5951718228973694e-05, "loss": 2.004, "step": 16622000 }, { "epoch": 48.12, "learning_rate": 2.595099458132642e-05, "loss": 2.001, "step": 16622500 }, { "epoch": 48.12, "learning_rate": 2.5950270933679145e-05, "loss": 1.9664, "step": 16623000 }, { "epoch": 48.12, "learning_rate": 2.5949547286031867e-05, "loss": 1.9905, "step": 16623500 }, { "epoch": 48.12, "learning_rate": 2.594882363838459e-05, "loss": 2.006, "step": 16624000 }, { "epoch": 48.12, "learning_rate": 2.594810143803261e-05, "loss": 1.9986, "step": 16624500 }, { "epoch": 48.12, "learning_rate": 2.594737779038533e-05, "loss": 2.0104, "step": 16625000 }, { "epoch": 48.12, "learning_rate": 2.5946654142738053e-05, "loss": 2.001, "step": 16625500 }, { "epoch": 48.13, "learning_rate": 2.5945930495090775e-05, "loss": 1.98, "step": 16626000 }, { "epoch": 48.13, "learning_rate": 2.5945206847443497e-05, "loss": 1.985, "step": 16626500 }, { "epoch": 48.13, "learning_rate": 2.5944484647091516e-05, "loss": 1.993, "step": 16627000 }, { "epoch": 48.13, "learning_rate": 2.594376099944424e-05, "loss": 1.9892, "step": 16627500 }, { "epoch": 48.13, "learning_rate": 2.594303735179696e-05, "loss": 1.9778, "step": 16628000 }, { "epoch": 48.13, "learning_rate": 2.5942313704149683e-05, "loss": 2.0139, "step": 16628500 }, { "epoch": 48.13, "learning_rate": 2.594159005650241e-05, "loss": 1.9955, "step": 16629000 }, { "epoch": 48.14, "learning_rate": 2.594086640885513e-05, "loss": 2.0036, "step": 16629500 }, { "epoch": 48.14, "learning_rate": 2.594014276120786e-05, "loss": 2.0082, "step": 16630000 }, { "epoch": 48.14, "learning_rate": 2.5939419113560582e-05, "loss": 1.9896, "step": 16630500 }, { "epoch": 48.14, "learning_rate": 2.5938695465913304e-05, "loss": 2.0129, "step": 16631000 }, { "epoch": 48.14, "learning_rate": 2.5937971818266027e-05, "loss": 1.9901, "step": 16631500 }, { "epoch": 48.14, "learning_rate": 2.593724817061875e-05, "loss": 1.9911, "step": 16632000 }, { "epoch": 48.14, "learning_rate": 2.593652452297147e-05, "loss": 1.9994, "step": 16632500 }, { "epoch": 48.15, "learning_rate": 2.5935800875324197e-05, "loss": 1.9901, "step": 16633000 }, { "epoch": 48.15, "learning_rate": 2.5935078674972212e-05, "loss": 2.0107, "step": 16633500 }, { "epoch": 48.15, "learning_rate": 2.5934355027324934e-05, "loss": 1.9941, "step": 16634000 }, { "epoch": 48.15, "learning_rate": 2.5933632826972953e-05, "loss": 1.9971, "step": 16634500 }, { "epoch": 48.15, "learning_rate": 2.5932909179325676e-05, "loss": 2.0302, "step": 16635000 }, { "epoch": 48.15, "learning_rate": 2.5932186978973695e-05, "loss": 2.0021, "step": 16635500 }, { "epoch": 48.15, "learning_rate": 2.5931463331326417e-05, "loss": 2.0054, "step": 16636000 }, { "epoch": 48.16, "learning_rate": 2.593073968367914e-05, "loss": 2.0086, "step": 16636500 }, { "epoch": 48.16, "learning_rate": 2.593001603603186e-05, "loss": 2.0114, "step": 16637000 }, { "epoch": 48.16, "learning_rate": 2.592929238838459e-05, "loss": 1.9852, "step": 16637500 }, { "epoch": 48.16, "learning_rate": 2.5928568740737312e-05, "loss": 1.9982, "step": 16638000 }, { "epoch": 48.16, "learning_rate": 2.592784654038533e-05, "loss": 1.9838, "step": 16638500 }, { "epoch": 48.16, "learning_rate": 2.5927122892738054e-05, "loss": 1.9771, "step": 16639000 }, { "epoch": 48.16, "learning_rate": 2.5926400692386073e-05, "loss": 1.9954, "step": 16639500 }, { "epoch": 48.17, "learning_rate": 2.5925677044738795e-05, "loss": 1.9898, "step": 16640000 }, { "epoch": 48.17, "learning_rate": 2.5924953397091517e-05, "loss": 1.9983, "step": 16640500 }, { "epoch": 48.17, "learning_rate": 2.592422974944424e-05, "loss": 1.9834, "step": 16641000 }, { "epoch": 48.17, "learning_rate": 2.592350610179696e-05, "loss": 2.025, "step": 16641500 }, { "epoch": 48.17, "learning_rate": 2.5922782454149687e-05, "loss": 2.0247, "step": 16642000 }, { "epoch": 48.17, "learning_rate": 2.592205880650241e-05, "loss": 2.0048, "step": 16642500 }, { "epoch": 48.17, "learning_rate": 2.592133515885513e-05, "loss": 2.0096, "step": 16643000 }, { "epoch": 48.18, "learning_rate": 2.5920611511207854e-05, "loss": 2.0137, "step": 16643500 }, { "epoch": 48.18, "learning_rate": 2.5919887863560576e-05, "loss": 2.038, "step": 16644000 }, { "epoch": 48.18, "learning_rate": 2.5919164215913305e-05, "loss": 2.0313, "step": 16644500 }, { "epoch": 48.18, "learning_rate": 2.5918440568266027e-05, "loss": 2.0196, "step": 16645000 }, { "epoch": 48.18, "learning_rate": 2.5917718367914046e-05, "loss": 1.9965, "step": 16645500 }, { "epoch": 48.18, "learning_rate": 2.591699472026677e-05, "loss": 2.0092, "step": 16646000 }, { "epoch": 48.18, "learning_rate": 2.591627107261949e-05, "loss": 2.0292, "step": 16646500 }, { "epoch": 48.19, "learning_rate": 2.591554887226751e-05, "loss": 1.9765, "step": 16647000 }, { "epoch": 48.19, "learning_rate": 2.5914825224620232e-05, "loss": 2.0156, "step": 16647500 }, { "epoch": 48.19, "learning_rate": 2.5914101576972954e-05, "loss": 2.0085, "step": 16648000 }, { "epoch": 48.19, "learning_rate": 2.5913377929325676e-05, "loss": 1.9953, "step": 16648500 }, { "epoch": 48.19, "learning_rate": 2.59126542816784e-05, "loss": 2.0102, "step": 16649000 }, { "epoch": 48.19, "learning_rate": 2.5911932081326418e-05, "loss": 2.004, "step": 16649500 }, { "epoch": 48.19, "learning_rate": 2.591120843367914e-05, "loss": 2.0075, "step": 16650000 }, { "epoch": 48.2, "learning_rate": 2.5910484786031862e-05, "loss": 1.999, "step": 16650500 }, { "epoch": 48.2, "learning_rate": 2.5909761138384588e-05, "loss": 1.9961, "step": 16651000 }, { "epoch": 48.2, "learning_rate": 2.590903749073731e-05, "loss": 1.9949, "step": 16651500 }, { "epoch": 48.2, "learning_rate": 2.5908313843090032e-05, "loss": 2.017, "step": 16652000 }, { "epoch": 48.2, "learning_rate": 2.5907591642738054e-05, "loss": 2.012, "step": 16652500 }, { "epoch": 48.2, "learning_rate": 2.5906867995090777e-05, "loss": 2.0146, "step": 16653000 }, { "epoch": 48.21, "learning_rate": 2.5906144347443502e-05, "loss": 2.0002, "step": 16653500 }, { "epoch": 48.21, "learning_rate": 2.5905420699796225e-05, "loss": 2.021, "step": 16654000 }, { "epoch": 48.21, "learning_rate": 2.5904697052148947e-05, "loss": 2.0023, "step": 16654500 }, { "epoch": 48.21, "learning_rate": 2.590397340450167e-05, "loss": 1.9919, "step": 16655000 }, { "epoch": 48.21, "learning_rate": 2.590324975685439e-05, "loss": 1.9787, "step": 16655500 }, { "epoch": 48.21, "learning_rate": 2.5902526109207113e-05, "loss": 2.0071, "step": 16656000 }, { "epoch": 48.21, "learning_rate": 2.590180246155984e-05, "loss": 2.0246, "step": 16656500 }, { "epoch": 48.22, "learning_rate": 2.590108170850315e-05, "loss": 2.0095, "step": 16657000 }, { "epoch": 48.22, "learning_rate": 2.5900358060855874e-05, "loss": 2.0032, "step": 16657500 }, { "epoch": 48.22, "learning_rate": 2.5899634413208596e-05, "loss": 2.0007, "step": 16658000 }, { "epoch": 48.22, "learning_rate": 2.5898910765561318e-05, "loss": 2.004, "step": 16658500 }, { "epoch": 48.22, "learning_rate": 2.589818711791404e-05, "loss": 2.038, "step": 16659000 }, { "epoch": 48.22, "learning_rate": 2.5897463470266763e-05, "loss": 1.9832, "step": 16659500 }, { "epoch": 48.22, "learning_rate": 2.589673982261949e-05, "loss": 2.0024, "step": 16660000 }, { "epoch": 48.23, "learning_rate": 2.5896016174972214e-05, "loss": 2.0035, "step": 16660500 }, { "epoch": 48.23, "learning_rate": 2.589529252732494e-05, "loss": 2.0222, "step": 16661000 }, { "epoch": 48.23, "learning_rate": 2.5894570326972955e-05, "loss": 1.9875, "step": 16661500 }, { "epoch": 48.23, "learning_rate": 2.5893846679325677e-05, "loss": 2.008, "step": 16662000 }, { "epoch": 48.23, "learning_rate": 2.5893123031678403e-05, "loss": 1.9869, "step": 16662500 }, { "epoch": 48.23, "learning_rate": 2.5892399384031125e-05, "loss": 1.9839, "step": 16663000 }, { "epoch": 48.23, "learning_rate": 2.589167718367914e-05, "loss": 2.0055, "step": 16663500 }, { "epoch": 48.24, "learning_rate": 2.589095498332716e-05, "loss": 1.9752, "step": 16664000 }, { "epoch": 48.24, "learning_rate": 2.5890231335679882e-05, "loss": 1.9866, "step": 16664500 }, { "epoch": 48.24, "learning_rate": 2.58895091353279e-05, "loss": 2.0118, "step": 16665000 }, { "epoch": 48.24, "learning_rate": 2.5888785487680623e-05, "loss": 2.0068, "step": 16665500 }, { "epoch": 48.24, "learning_rate": 2.5888061840033345e-05, "loss": 1.9997, "step": 16666000 }, { "epoch": 48.24, "learning_rate": 2.5887338192386067e-05, "loss": 1.9943, "step": 16666500 }, { "epoch": 48.24, "learning_rate": 2.5886615992034086e-05, "loss": 1.9984, "step": 16667000 }, { "epoch": 48.25, "learning_rate": 2.588589234438681e-05, "loss": 1.9911, "step": 16667500 }, { "epoch": 48.25, "learning_rate": 2.5885168696739538e-05, "loss": 1.9836, "step": 16668000 }, { "epoch": 48.25, "learning_rate": 2.588444504909226e-05, "loss": 1.9949, "step": 16668500 }, { "epoch": 48.25, "learning_rate": 2.5883721401444982e-05, "loss": 1.9899, "step": 16669000 }, { "epoch": 48.25, "learning_rate": 2.5882999201093e-05, "loss": 1.9809, "step": 16669500 }, { "epoch": 48.25, "learning_rate": 2.5882275553445723e-05, "loss": 1.9928, "step": 16670000 }, { "epoch": 48.25, "learning_rate": 2.5881551905798445e-05, "loss": 1.9952, "step": 16670500 }, { "epoch": 48.26, "learning_rate": 2.5880828258151168e-05, "loss": 2.0224, "step": 16671000 }, { "epoch": 48.26, "learning_rate": 2.588010461050389e-05, "loss": 2.03, "step": 16671500 }, { "epoch": 48.26, "learning_rate": 2.5879380962856616e-05, "loss": 1.9937, "step": 16672000 }, { "epoch": 48.26, "learning_rate": 2.5878657315209338e-05, "loss": 1.9901, "step": 16672500 }, { "epoch": 48.26, "learning_rate": 2.587793366756206e-05, "loss": 2.0094, "step": 16673000 }, { "epoch": 48.26, "learning_rate": 2.5877210019914782e-05, "loss": 2.025, "step": 16673500 }, { "epoch": 48.26, "learning_rate": 2.5876486372267504e-05, "loss": 1.9978, "step": 16674000 }, { "epoch": 48.27, "learning_rate": 2.5875762724620227e-05, "loss": 2.0098, "step": 16674500 }, { "epoch": 48.27, "learning_rate": 2.5875039076972956e-05, "loss": 2.0082, "step": 16675000 }, { "epoch": 48.27, "learning_rate": 2.5874315429325678e-05, "loss": 2.0003, "step": 16675500 }, { "epoch": 48.27, "learning_rate": 2.5873591781678404e-05, "loss": 1.995, "step": 16676000 }, { "epoch": 48.27, "learning_rate": 2.5872868134031126e-05, "loss": 2.0023, "step": 16676500 }, { "epoch": 48.27, "learning_rate": 2.587214593367914e-05, "loss": 2.0175, "step": 16677000 }, { "epoch": 48.27, "learning_rate": 2.5871422286031867e-05, "loss": 2.0287, "step": 16677500 }, { "epoch": 48.28, "learning_rate": 2.587069863838459e-05, "loss": 2.0133, "step": 16678000 }, { "epoch": 48.28, "learning_rate": 2.5869976438032605e-05, "loss": 1.9973, "step": 16678500 }, { "epoch": 48.28, "learning_rate": 2.5869254237680624e-05, "loss": 1.9893, "step": 16679000 }, { "epoch": 48.28, "learning_rate": 2.5868532037328643e-05, "loss": 1.9957, "step": 16679500 }, { "epoch": 48.28, "learning_rate": 2.5867808389681365e-05, "loss": 2.0128, "step": 16680000 }, { "epoch": 48.28, "learning_rate": 2.586708618932938e-05, "loss": 1.9991, "step": 16680500 }, { "epoch": 48.28, "learning_rate": 2.5866362541682103e-05, "loss": 1.9993, "step": 16681000 }, { "epoch": 48.29, "learning_rate": 2.586563889403483e-05, "loss": 2.0171, "step": 16681500 }, { "epoch": 48.29, "learning_rate": 2.586491524638755e-05, "loss": 2.0086, "step": 16682000 }, { "epoch": 48.29, "learning_rate": 2.5864191598740273e-05, "loss": 1.9928, "step": 16682500 }, { "epoch": 48.29, "learning_rate": 2.5863467951093002e-05, "loss": 1.9985, "step": 16683000 }, { "epoch": 48.29, "learning_rate": 2.5862744303445724e-05, "loss": 1.9936, "step": 16683500 }, { "epoch": 48.29, "learning_rate": 2.5862020655798446e-05, "loss": 1.9914, "step": 16684000 }, { "epoch": 48.29, "learning_rate": 2.586129700815117e-05, "loss": 1.9803, "step": 16684500 }, { "epoch": 48.3, "learning_rate": 2.5860573360503894e-05, "loss": 2.0307, "step": 16685000 }, { "epoch": 48.3, "learning_rate": 2.5859849712856616e-05, "loss": 1.9882, "step": 16685500 }, { "epoch": 48.3, "learning_rate": 2.585912606520934e-05, "loss": 1.996, "step": 16686000 }, { "epoch": 48.3, "learning_rate": 2.585840241756206e-05, "loss": 2.0015, "step": 16686500 }, { "epoch": 48.3, "learning_rate": 2.5857678769914783e-05, "loss": 1.9958, "step": 16687000 }, { "epoch": 48.3, "learning_rate": 2.5856955122267505e-05, "loss": 1.9996, "step": 16687500 }, { "epoch": 48.3, "learning_rate": 2.585623147462023e-05, "loss": 2.0227, "step": 16688000 }, { "epoch": 48.31, "learning_rate": 2.5855509274268246e-05, "loss": 2.013, "step": 16688500 }, { "epoch": 48.31, "learning_rate": 2.585478562662097e-05, "loss": 2.007, "step": 16689000 }, { "epoch": 48.31, "learning_rate": 2.5854061978973694e-05, "loss": 1.9998, "step": 16689500 }, { "epoch": 48.31, "learning_rate": 2.585333833132642e-05, "loss": 2.0022, "step": 16690000 }, { "epoch": 48.31, "learning_rate": 2.5852614683679146e-05, "loss": 2.0229, "step": 16690500 }, { "epoch": 48.31, "learning_rate": 2.5851891036031868e-05, "loss": 2.0104, "step": 16691000 }, { "epoch": 48.32, "learning_rate": 2.585116738838459e-05, "loss": 1.9947, "step": 16691500 }, { "epoch": 48.32, "learning_rate": 2.5850443740737312e-05, "loss": 2.0037, "step": 16692000 }, { "epoch": 48.32, "learning_rate": 2.5849720093090034e-05, "loss": 2.0058, "step": 16692500 }, { "epoch": 48.32, "learning_rate": 2.5848996445442757e-05, "loss": 2.0015, "step": 16693000 }, { "epoch": 48.32, "learning_rate": 2.5848274245090776e-05, "loss": 2.0149, "step": 16693500 }, { "epoch": 48.32, "learning_rate": 2.5847550597443498e-05, "loss": 2.0304, "step": 16694000 }, { "epoch": 48.32, "learning_rate": 2.584682694979622e-05, "loss": 1.965, "step": 16694500 }, { "epoch": 48.33, "learning_rate": 2.5846103302148942e-05, "loss": 2.0143, "step": 16695000 }, { "epoch": 48.33, "learning_rate": 2.5845379654501668e-05, "loss": 2.0073, "step": 16695500 }, { "epoch": 48.33, "learning_rate": 2.584465890144498e-05, "loss": 1.9647, "step": 16696000 }, { "epoch": 48.33, "learning_rate": 2.5843935253797702e-05, "loss": 2.0124, "step": 16696500 }, { "epoch": 48.33, "learning_rate": 2.5843211606150425e-05, "loss": 2.0209, "step": 16697000 }, { "epoch": 48.33, "learning_rate": 2.5842487958503154e-05, "loss": 2.0258, "step": 16697500 }, { "epoch": 48.33, "learning_rate": 2.5841764310855876e-05, "loss": 2.0202, "step": 16698000 }, { "epoch": 48.34, "learning_rate": 2.5841040663208598e-05, "loss": 2.0139, "step": 16698500 }, { "epoch": 48.34, "learning_rate": 2.5840318462856617e-05, "loss": 2.0052, "step": 16699000 }, { "epoch": 48.34, "learning_rate": 2.583959481520934e-05, "loss": 2.0385, "step": 16699500 }, { "epoch": 48.34, "learning_rate": 2.583887116756206e-05, "loss": 2.0097, "step": 16700000 }, { "epoch": 48.34, "learning_rate": 2.5838147519914784e-05, "loss": 2.0107, "step": 16700500 }, { "epoch": 48.34, "learning_rate": 2.5837423872267506e-05, "loss": 2.0102, "step": 16701000 }, { "epoch": 48.34, "learning_rate": 2.583670022462023e-05, "loss": 1.9968, "step": 16701500 }, { "epoch": 48.35, "learning_rate": 2.5835976576972954e-05, "loss": 1.9946, "step": 16702000 }, { "epoch": 48.35, "learning_rate": 2.583525437662097e-05, "loss": 2.0321, "step": 16702500 }, { "epoch": 48.35, "learning_rate": 2.5834530728973695e-05, "loss": 2.0107, "step": 16703000 }, { "epoch": 48.35, "learning_rate": 2.583380852862171e-05, "loss": 1.9749, "step": 16703500 }, { "epoch": 48.35, "learning_rate": 2.5833084880974433e-05, "loss": 2.002, "step": 16704000 }, { "epoch": 48.35, "learning_rate": 2.583236123332716e-05, "loss": 2.0037, "step": 16704500 }, { "epoch": 48.35, "learning_rate": 2.5831637585679884e-05, "loss": 1.9901, "step": 16705000 }, { "epoch": 48.36, "learning_rate": 2.583091393803261e-05, "loss": 1.9874, "step": 16705500 }, { "epoch": 48.36, "learning_rate": 2.5830190290385332e-05, "loss": 1.9729, "step": 16706000 }, { "epoch": 48.36, "learning_rate": 2.5829466642738054e-05, "loss": 1.9712, "step": 16706500 }, { "epoch": 48.36, "learning_rate": 2.5828742995090776e-05, "loss": 2.0083, "step": 16707000 }, { "epoch": 48.36, "learning_rate": 2.58280193474435e-05, "loss": 1.9939, "step": 16707500 }, { "epoch": 48.36, "learning_rate": 2.582729569979622e-05, "loss": 2.0024, "step": 16708000 }, { "epoch": 48.36, "learning_rate": 2.5826572052148947e-05, "loss": 2.0001, "step": 16708500 }, { "epoch": 48.37, "learning_rate": 2.582584840450167e-05, "loss": 1.9976, "step": 16709000 }, { "epoch": 48.37, "learning_rate": 2.5825126204149684e-05, "loss": 2.0158, "step": 16709500 }, { "epoch": 48.37, "learning_rate": 2.582440255650241e-05, "loss": 2.0078, "step": 16710000 }, { "epoch": 48.37, "learning_rate": 2.5823678908855132e-05, "loss": 2.0015, "step": 16710500 }, { "epoch": 48.37, "learning_rate": 2.5822955261207854e-05, "loss": 1.999, "step": 16711000 }, { "epoch": 48.37, "learning_rate": 2.582223306085587e-05, "loss": 1.9943, "step": 16711500 }, { "epoch": 48.37, "learning_rate": 2.5821509413208596e-05, "loss": 2.0174, "step": 16712000 }, { "epoch": 48.38, "learning_rate": 2.582078576556132e-05, "loss": 1.9871, "step": 16712500 }, { "epoch": 48.38, "learning_rate": 2.5820062117914047e-05, "loss": 2.0, "step": 16713000 }, { "epoch": 48.38, "learning_rate": 2.581933847026677e-05, "loss": 2.0, "step": 16713500 }, { "epoch": 48.38, "learning_rate": 2.581861482261949e-05, "loss": 2.013, "step": 16714000 }, { "epoch": 48.38, "learning_rate": 2.5817891174972214e-05, "loss": 1.9961, "step": 16714500 }, { "epoch": 48.38, "learning_rate": 2.5817167527324936e-05, "loss": 1.9917, "step": 16715000 }, { "epoch": 48.38, "learning_rate": 2.581644387967766e-05, "loss": 2.0037, "step": 16715500 }, { "epoch": 48.39, "learning_rate": 2.5815723126620974e-05, "loss": 1.9941, "step": 16716000 }, { "epoch": 48.39, "learning_rate": 2.5814999478973696e-05, "loss": 2.0215, "step": 16716500 }, { "epoch": 48.39, "learning_rate": 2.5814275831326418e-05, "loss": 2.0146, "step": 16717000 }, { "epoch": 48.39, "learning_rate": 2.581355218367914e-05, "loss": 2.0165, "step": 16717500 }, { "epoch": 48.39, "learning_rate": 2.5812828536031863e-05, "loss": 1.9953, "step": 16718000 }, { "epoch": 48.39, "learning_rate": 2.5812104888384585e-05, "loss": 1.9853, "step": 16718500 }, { "epoch": 48.39, "learning_rate": 2.581138124073731e-05, "loss": 1.9875, "step": 16719000 }, { "epoch": 48.4, "learning_rate": 2.5810657593090033e-05, "loss": 2.007, "step": 16719500 }, { "epoch": 48.4, "learning_rate": 2.5809935392738055e-05, "loss": 2.0141, "step": 16720000 }, { "epoch": 48.4, "learning_rate": 2.5809213192386074e-05, "loss": 2.0066, "step": 16720500 }, { "epoch": 48.4, "learning_rate": 2.580849099203409e-05, "loss": 2.0141, "step": 16721000 }, { "epoch": 48.4, "learning_rate": 2.5807767344386812e-05, "loss": 2.0044, "step": 16721500 }, { "epoch": 48.4, "learning_rate": 2.5807043696739537e-05, "loss": 2.009, "step": 16722000 }, { "epoch": 48.4, "learning_rate": 2.580632004909226e-05, "loss": 1.9849, "step": 16722500 }, { "epoch": 48.41, "learning_rate": 2.5805596401444982e-05, "loss": 2.0019, "step": 16723000 }, { "epoch": 48.41, "learning_rate": 2.5804872753797704e-05, "loss": 2.022, "step": 16723500 }, { "epoch": 48.41, "learning_rate": 2.5804149106150426e-05, "loss": 1.9962, "step": 16724000 }, { "epoch": 48.41, "learning_rate": 2.580342545850315e-05, "loss": 1.983, "step": 16724500 }, { "epoch": 48.41, "learning_rate": 2.5802701810855874e-05, "loss": 2.0245, "step": 16725000 }, { "epoch": 48.41, "learning_rate": 2.5801978163208596e-05, "loss": 2.0045, "step": 16725500 }, { "epoch": 48.41, "learning_rate": 2.580125451556132e-05, "loss": 2.009, "step": 16726000 }, { "epoch": 48.42, "learning_rate": 2.580053086791404e-05, "loss": 2.0197, "step": 16726500 }, { "epoch": 48.42, "learning_rate": 2.579980866756206e-05, "loss": 2.0221, "step": 16727000 }, { "epoch": 48.42, "learning_rate": 2.5799085019914785e-05, "loss": 1.9979, "step": 16727500 }, { "epoch": 48.42, "learning_rate": 2.579836137226751e-05, "loss": 2.006, "step": 16728000 }, { "epoch": 48.42, "learning_rate": 2.5797639171915527e-05, "loss": 1.9862, "step": 16728500 }, { "epoch": 48.42, "learning_rate": 2.579691552426825e-05, "loss": 2.0002, "step": 16729000 }, { "epoch": 48.43, "learning_rate": 2.5796191876620974e-05, "loss": 2.0182, "step": 16729500 }, { "epoch": 48.43, "learning_rate": 2.5795468228973697e-05, "loss": 1.9796, "step": 16730000 }, { "epoch": 48.43, "learning_rate": 2.579474458132642e-05, "loss": 1.9946, "step": 16730500 }, { "epoch": 48.43, "learning_rate": 2.579402093367914e-05, "loss": 1.9918, "step": 16731000 }, { "epoch": 48.43, "learning_rate": 2.579329873332716e-05, "loss": 2.0049, "step": 16731500 }, { "epoch": 48.43, "learning_rate": 2.5792575085679882e-05, "loss": 2.008, "step": 16732000 }, { "epoch": 48.43, "learning_rate": 2.5791851438032605e-05, "loss": 1.9648, "step": 16732500 }, { "epoch": 48.44, "learning_rate": 2.5791127790385327e-05, "loss": 2.0067, "step": 16733000 }, { "epoch": 48.44, "learning_rate": 2.579040414273805e-05, "loss": 2.0199, "step": 16733500 }, { "epoch": 48.44, "learning_rate": 2.5789681942386068e-05, "loss": 1.997, "step": 16734000 }, { "epoch": 48.44, "learning_rate": 2.578895829473879e-05, "loss": 1.9986, "step": 16734500 }, { "epoch": 48.44, "learning_rate": 2.578823464709152e-05, "loss": 2.019, "step": 16735000 }, { "epoch": 48.44, "learning_rate": 2.578751099944424e-05, "loss": 2.0242, "step": 16735500 }, { "epoch": 48.44, "learning_rate": 2.578678879909226e-05, "loss": 2.0098, "step": 16736000 }, { "epoch": 48.45, "learning_rate": 2.5786066598740276e-05, "loss": 2.0182, "step": 16736500 }, { "epoch": 48.45, "learning_rate": 2.5785342951093e-05, "loss": 1.9957, "step": 16737000 }, { "epoch": 48.45, "learning_rate": 2.5784619303445724e-05, "loss": 2.0195, "step": 16737500 }, { "epoch": 48.45, "learning_rate": 2.5783895655798446e-05, "loss": 2.0245, "step": 16738000 }, { "epoch": 48.45, "learning_rate": 2.5783172008151168e-05, "loss": 2.0029, "step": 16738500 }, { "epoch": 48.45, "learning_rate": 2.5782449807799187e-05, "loss": 1.9981, "step": 16739000 }, { "epoch": 48.45, "learning_rate": 2.5781727607447203e-05, "loss": 1.965, "step": 16739500 }, { "epoch": 48.46, "learning_rate": 2.5781003959799925e-05, "loss": 1.9813, "step": 16740000 }, { "epoch": 48.46, "learning_rate": 2.5780283206743237e-05, "loss": 2.0129, "step": 16740500 }, { "epoch": 48.46, "learning_rate": 2.5779559559095963e-05, "loss": 1.9986, "step": 16741000 }, { "epoch": 48.46, "learning_rate": 2.5778835911448685e-05, "loss": 2.0496, "step": 16741500 }, { "epoch": 48.46, "learning_rate": 2.5778112263801407e-05, "loss": 2.0098, "step": 16742000 }, { "epoch": 48.46, "learning_rate": 2.577738861615413e-05, "loss": 1.9919, "step": 16742500 }, { "epoch": 48.46, "learning_rate": 2.5776664968506852e-05, "loss": 2.006, "step": 16743000 }, { "epoch": 48.47, "learning_rate": 2.577594132085958e-05, "loss": 2.0262, "step": 16743500 }, { "epoch": 48.47, "learning_rate": 2.5775217673212303e-05, "loss": 2.0073, "step": 16744000 }, { "epoch": 48.47, "learning_rate": 2.5774494025565025e-05, "loss": 2.0056, "step": 16744500 }, { "epoch": 48.47, "learning_rate": 2.577377037791775e-05, "loss": 1.9859, "step": 16745000 }, { "epoch": 48.47, "learning_rate": 2.5773046730270473e-05, "loss": 1.9906, "step": 16745500 }, { "epoch": 48.47, "learning_rate": 2.5772323082623195e-05, "loss": 2.0001, "step": 16746000 }, { "epoch": 48.47, "learning_rate": 2.5771599434975918e-05, "loss": 1.9944, "step": 16746500 }, { "epoch": 48.48, "learning_rate": 2.577087578732864e-05, "loss": 1.9843, "step": 16747000 }, { "epoch": 48.48, "learning_rate": 2.5770152139681365e-05, "loss": 1.9976, "step": 16747500 }, { "epoch": 48.48, "learning_rate": 2.5769428492034088e-05, "loss": 2.0087, "step": 16748000 }, { "epoch": 48.48, "learning_rate": 2.576870484438681e-05, "loss": 2.0075, "step": 16748500 }, { "epoch": 48.48, "learning_rate": 2.5767981196739532e-05, "loss": 2.0239, "step": 16749000 }, { "epoch": 48.48, "learning_rate": 2.5767257549092254e-05, "loss": 1.9882, "step": 16749500 }, { "epoch": 48.48, "learning_rate": 2.5766533901444983e-05, "loss": 1.9916, "step": 16750000 }, { "epoch": 48.49, "learning_rate": 2.5765810253797706e-05, "loss": 2.0029, "step": 16750500 }, { "epoch": 48.49, "learning_rate": 2.5765086606150428e-05, "loss": 2.02, "step": 16751000 }, { "epoch": 48.49, "learning_rate": 2.5764364405798447e-05, "loss": 1.9968, "step": 16751500 }, { "epoch": 48.49, "learning_rate": 2.576364075815117e-05, "loss": 2.0275, "step": 16752000 }, { "epoch": 48.49, "learning_rate": 2.576291711050389e-05, "loss": 2.0104, "step": 16752500 }, { "epoch": 48.49, "learning_rate": 2.576219491015191e-05, "loss": 2.0001, "step": 16753000 }, { "epoch": 48.49, "learning_rate": 2.5761471262504632e-05, "loss": 1.9949, "step": 16753500 }, { "epoch": 48.5, "learning_rate": 2.5760747614857355e-05, "loss": 2.0205, "step": 16754000 }, { "epoch": 48.5, "learning_rate": 2.5760023967210077e-05, "loss": 2.0184, "step": 16754500 }, { "epoch": 48.5, "learning_rate": 2.5759300319562803e-05, "loss": 2.0009, "step": 16755000 }, { "epoch": 48.5, "learning_rate": 2.5758576671915525e-05, "loss": 2.023, "step": 16755500 }, { "epoch": 48.5, "learning_rate": 2.5757853024268247e-05, "loss": 2.0174, "step": 16756000 }, { "epoch": 48.5, "learning_rate": 2.5757130823916266e-05, "loss": 1.9904, "step": 16756500 }, { "epoch": 48.5, "learning_rate": 2.5756407176268988e-05, "loss": 2.0112, "step": 16757000 }, { "epoch": 48.51, "learning_rate": 2.5755683528621717e-05, "loss": 2.0065, "step": 16757500 }, { "epoch": 48.51, "learning_rate": 2.575495988097444e-05, "loss": 2.0, "step": 16758000 }, { "epoch": 48.51, "learning_rate": 2.5754236233327162e-05, "loss": 2.0099, "step": 16758500 }, { "epoch": 48.51, "learning_rate": 2.5753512585679884e-05, "loss": 2.0144, "step": 16759000 }, { "epoch": 48.51, "learning_rate": 2.5752788938032606e-05, "loss": 2.0073, "step": 16759500 }, { "epoch": 48.51, "learning_rate": 2.575206529038533e-05, "loss": 2.0142, "step": 16760000 }, { "epoch": 48.51, "learning_rate": 2.5751341642738054e-05, "loss": 1.9875, "step": 16760500 }, { "epoch": 48.52, "learning_rate": 2.5750617995090776e-05, "loss": 2.0232, "step": 16761000 }, { "epoch": 48.52, "learning_rate": 2.57498943474435e-05, "loss": 2.0138, "step": 16761500 }, { "epoch": 48.52, "learning_rate": 2.574917069979622e-05, "loss": 1.9913, "step": 16762000 }, { "epoch": 48.52, "learning_rate": 2.5748447052148943e-05, "loss": 2.017, "step": 16762500 }, { "epoch": 48.52, "learning_rate": 2.5747724851796962e-05, "loss": 1.998, "step": 16763000 }, { "epoch": 48.52, "learning_rate": 2.5747001204149684e-05, "loss": 2.0099, "step": 16763500 }, { "epoch": 48.52, "learning_rate": 2.5746277556502406e-05, "loss": 2.0068, "step": 16764000 }, { "epoch": 48.53, "learning_rate": 2.5745555356150425e-05, "loss": 2.0, "step": 16764500 }, { "epoch": 48.53, "learning_rate": 2.5744831708503154e-05, "loss": 1.9768, "step": 16765000 }, { "epoch": 48.53, "learning_rate": 2.5744108060855877e-05, "loss": 2.0019, "step": 16765500 }, { "epoch": 48.53, "learning_rate": 2.57433844132086e-05, "loss": 1.9932, "step": 16766000 }, { "epoch": 48.53, "learning_rate": 2.574266076556132e-05, "loss": 2.0094, "step": 16766500 }, { "epoch": 48.53, "learning_rate": 2.5741937117914043e-05, "loss": 1.9716, "step": 16767000 }, { "epoch": 48.54, "learning_rate": 2.574121347026677e-05, "loss": 1.9952, "step": 16767500 }, { "epoch": 48.54, "learning_rate": 2.574048982261949e-05, "loss": 2.0013, "step": 16768000 }, { "epoch": 48.54, "learning_rate": 2.5739766174972213e-05, "loss": 1.9878, "step": 16768500 }, { "epoch": 48.54, "learning_rate": 2.573904397462023e-05, "loss": 2.0099, "step": 16769000 }, { "epoch": 48.54, "learning_rate": 2.5738320326972955e-05, "loss": 2.0119, "step": 16769500 }, { "epoch": 48.54, "learning_rate": 2.5737596679325677e-05, "loss": 2.0169, "step": 16770000 }, { "epoch": 48.54, "learning_rate": 2.57368730316784e-05, "loss": 2.0052, "step": 16770500 }, { "epoch": 48.55, "learning_rate": 2.5736150831326418e-05, "loss": 2.002, "step": 16771000 }, { "epoch": 48.55, "learning_rate": 2.573542718367914e-05, "loss": 2.0171, "step": 16771500 }, { "epoch": 48.55, "learning_rate": 2.5734703536031862e-05, "loss": 1.9949, "step": 16772000 }, { "epoch": 48.55, "learning_rate": 2.573397988838459e-05, "loss": 2.0089, "step": 16772500 }, { "epoch": 48.55, "learning_rate": 2.5733256240737314e-05, "loss": 1.9781, "step": 16773000 }, { "epoch": 48.55, "learning_rate": 2.5732532593090036e-05, "loss": 1.9991, "step": 16773500 }, { "epoch": 48.55, "learning_rate": 2.5731808945442758e-05, "loss": 2.0106, "step": 16774000 }, { "epoch": 48.56, "learning_rate": 2.573108529779548e-05, "loss": 2.0231, "step": 16774500 }, { "epoch": 48.56, "learning_rate": 2.5730361650148206e-05, "loss": 2.0078, "step": 16775000 }, { "epoch": 48.56, "learning_rate": 2.572963944979622e-05, "loss": 2.0067, "step": 16775500 }, { "epoch": 48.56, "learning_rate": 2.5728915802148944e-05, "loss": 1.9822, "step": 16776000 }, { "epoch": 48.56, "learning_rate": 2.572819215450167e-05, "loss": 1.9801, "step": 16776500 }, { "epoch": 48.56, "learning_rate": 2.572746850685439e-05, "loss": 1.9775, "step": 16777000 }, { "epoch": 48.56, "learning_rate": 2.5726744859207114e-05, "loss": 2.0277, "step": 16777500 }, { "epoch": 48.57, "learning_rate": 2.5726021211559836e-05, "loss": 2.0165, "step": 16778000 }, { "epoch": 48.57, "learning_rate": 2.5725297563912558e-05, "loss": 2.0233, "step": 16778500 }, { "epoch": 48.57, "learning_rate": 2.572457391626528e-05, "loss": 2.0123, "step": 16779000 }, { "epoch": 48.57, "learning_rate": 2.572385026861801e-05, "loss": 2.011, "step": 16779500 }, { "epoch": 48.57, "learning_rate": 2.572312806826603e-05, "loss": 2.0098, "step": 16780000 }, { "epoch": 48.57, "learning_rate": 2.572240442061875e-05, "loss": 1.9799, "step": 16780500 }, { "epoch": 48.57, "learning_rate": 2.5721680772971473e-05, "loss": 2.0018, "step": 16781000 }, { "epoch": 48.58, "learning_rate": 2.5720957125324195e-05, "loss": 2.0031, "step": 16781500 }, { "epoch": 48.58, "learning_rate": 2.5720234924972214e-05, "loss": 1.9881, "step": 16782000 }, { "epoch": 48.58, "learning_rate": 2.5719511277324936e-05, "loss": 2.0033, "step": 16782500 }, { "epoch": 48.58, "learning_rate": 2.571878762967766e-05, "loss": 2.0, "step": 16783000 }, { "epoch": 48.58, "learning_rate": 2.5718063982030384e-05, "loss": 2.0015, "step": 16783500 }, { "epoch": 48.58, "learning_rate": 2.5717340334383106e-05, "loss": 2.0114, "step": 16784000 }, { "epoch": 48.58, "learning_rate": 2.571661668673583e-05, "loss": 1.9899, "step": 16784500 }, { "epoch": 48.59, "learning_rate": 2.5715894486383844e-05, "loss": 2.0012, "step": 16785000 }, { "epoch": 48.59, "learning_rate": 2.5715172286031863e-05, "loss": 2.0242, "step": 16785500 }, { "epoch": 48.59, "learning_rate": 2.5714448638384585e-05, "loss": 2.0145, "step": 16786000 }, { "epoch": 48.59, "learning_rate": 2.5713724990737308e-05, "loss": 2.0088, "step": 16786500 }, { "epoch": 48.59, "learning_rate": 2.5713001343090033e-05, "loss": 1.9923, "step": 16787000 }, { "epoch": 48.59, "learning_rate": 2.571227769544276e-05, "loss": 2.0, "step": 16787500 }, { "epoch": 48.59, "learning_rate": 2.5711554047795485e-05, "loss": 2.0125, "step": 16788000 }, { "epoch": 48.6, "learning_rate": 2.5710830400148207e-05, "loss": 1.9762, "step": 16788500 }, { "epoch": 48.6, "learning_rate": 2.571010675250093e-05, "loss": 1.9761, "step": 16789000 }, { "epoch": 48.6, "learning_rate": 2.570938310485365e-05, "loss": 1.996, "step": 16789500 }, { "epoch": 48.6, "learning_rate": 2.5708659457206373e-05, "loss": 2.0195, "step": 16790000 }, { "epoch": 48.6, "learning_rate": 2.5707935809559096e-05, "loss": 2.0096, "step": 16790500 }, { "epoch": 48.6, "learning_rate": 2.5707213609207115e-05, "loss": 2.0252, "step": 16791000 }, { "epoch": 48.6, "learning_rate": 2.5706489961559837e-05, "loss": 1.9999, "step": 16791500 }, { "epoch": 48.61, "learning_rate": 2.5705767761207856e-05, "loss": 2.0091, "step": 16792000 }, { "epoch": 48.61, "learning_rate": 2.5705044113560578e-05, "loss": 2.0097, "step": 16792500 }, { "epoch": 48.61, "learning_rate": 2.57043204659133e-05, "loss": 2.0297, "step": 16793000 }, { "epoch": 48.61, "learning_rate": 2.5703596818266023e-05, "loss": 2.0132, "step": 16793500 }, { "epoch": 48.61, "learning_rate": 2.5702873170618748e-05, "loss": 2.0052, "step": 16794000 }, { "epoch": 48.61, "learning_rate": 2.5702149522971474e-05, "loss": 1.9871, "step": 16794500 }, { "epoch": 48.61, "learning_rate": 2.5701425875324196e-05, "loss": 2.0176, "step": 16795000 }, { "epoch": 48.62, "learning_rate": 2.5700703674972215e-05, "loss": 2.0051, "step": 16795500 }, { "epoch": 48.62, "learning_rate": 2.5699980027324937e-05, "loss": 2.0092, "step": 16796000 }, { "epoch": 48.62, "learning_rate": 2.569925637967766e-05, "loss": 2.0238, "step": 16796500 }, { "epoch": 48.62, "learning_rate": 2.5698532732030385e-05, "loss": 2.0, "step": 16797000 }, { "epoch": 48.62, "learning_rate": 2.5697809084383107e-05, "loss": 2.0012, "step": 16797500 }, { "epoch": 48.62, "learning_rate": 2.569708543673583e-05, "loss": 2.0019, "step": 16798000 }, { "epoch": 48.62, "learning_rate": 2.5696361789088552e-05, "loss": 2.0118, "step": 16798500 }, { "epoch": 48.63, "learning_rate": 2.5695638141441274e-05, "loss": 2.0287, "step": 16799000 }, { "epoch": 48.63, "learning_rate": 2.5694914493793996e-05, "loss": 1.9933, "step": 16799500 }, { "epoch": 48.63, "learning_rate": 2.5694190846146722e-05, "loss": 2.031, "step": 16800000 }, { "epoch": 48.63, "learning_rate": 2.5693468645794737e-05, "loss": 1.9776, "step": 16800500 }, { "epoch": 48.63, "learning_rate": 2.569274499814746e-05, "loss": 2.0006, "step": 16801000 }, { "epoch": 48.63, "learning_rate": 2.5692021350500185e-05, "loss": 1.9855, "step": 16801500 }, { "epoch": 48.63, "learning_rate": 2.569129770285291e-05, "loss": 2.0015, "step": 16802000 }, { "epoch": 48.64, "learning_rate": 2.5690574055205636e-05, "loss": 2.0005, "step": 16802500 }, { "epoch": 48.64, "learning_rate": 2.568985040755836e-05, "loss": 2.0133, "step": 16803000 }, { "epoch": 48.64, "learning_rate": 2.568912675991108e-05, "loss": 2.0298, "step": 16803500 }, { "epoch": 48.64, "learning_rate": 2.5688403112263803e-05, "loss": 2.0079, "step": 16804000 }, { "epoch": 48.64, "learning_rate": 2.5687679464616525e-05, "loss": 2.0175, "step": 16804500 }, { "epoch": 48.64, "learning_rate": 2.5686955816969248e-05, "loss": 2.0043, "step": 16805000 }, { "epoch": 48.65, "learning_rate": 2.5686233616617267e-05, "loss": 2.0022, "step": 16805500 }, { "epoch": 48.65, "learning_rate": 2.568550996896999e-05, "loss": 1.9834, "step": 16806000 }, { "epoch": 48.65, "learning_rate": 2.568478632132271e-05, "loss": 2.0137, "step": 16806500 }, { "epoch": 48.65, "learning_rate": 2.5684062673675437e-05, "loss": 2.0126, "step": 16807000 }, { "epoch": 48.65, "learning_rate": 2.5683340473323452e-05, "loss": 2.0101, "step": 16807500 }, { "epoch": 48.65, "learning_rate": 2.5682616825676174e-05, "loss": 1.9949, "step": 16808000 }, { "epoch": 48.65, "learning_rate": 2.56818931780289e-05, "loss": 1.9983, "step": 16808500 }, { "epoch": 48.66, "learning_rate": 2.5681169530381622e-05, "loss": 2.0048, "step": 16809000 }, { "epoch": 48.66, "learning_rate": 2.568044588273435e-05, "loss": 2.0114, "step": 16809500 }, { "epoch": 48.66, "learning_rate": 2.5679723682382367e-05, "loss": 2.0291, "step": 16810000 }, { "epoch": 48.66, "learning_rate": 2.567900003473509e-05, "loss": 2.0099, "step": 16810500 }, { "epoch": 48.66, "learning_rate": 2.567827638708781e-05, "loss": 2.0072, "step": 16811000 }, { "epoch": 48.66, "learning_rate": 2.5677552739440537e-05, "loss": 1.9863, "step": 16811500 }, { "epoch": 48.66, "learning_rate": 2.567683198638385e-05, "loss": 2.0041, "step": 16812000 }, { "epoch": 48.67, "learning_rate": 2.567610833873657e-05, "loss": 2.0274, "step": 16812500 }, { "epoch": 48.67, "learning_rate": 2.5675384691089294e-05, "loss": 2.0356, "step": 16813000 }, { "epoch": 48.67, "learning_rate": 2.5674661043442016e-05, "loss": 2.0194, "step": 16813500 }, { "epoch": 48.67, "learning_rate": 2.5673937395794738e-05, "loss": 1.9996, "step": 16814000 }, { "epoch": 48.67, "learning_rate": 2.5673213748147464e-05, "loss": 2.0075, "step": 16814500 }, { "epoch": 48.67, "learning_rate": 2.5672490100500186e-05, "loss": 1.9908, "step": 16815000 }, { "epoch": 48.67, "learning_rate": 2.5671766452852908e-05, "loss": 2.0157, "step": 16815500 }, { "epoch": 48.68, "learning_rate": 2.567104280520563e-05, "loss": 2.0048, "step": 16816000 }, { "epoch": 48.68, "learning_rate": 2.5670319157558353e-05, "loss": 2.0133, "step": 16816500 }, { "epoch": 48.68, "learning_rate": 2.5669596957206375e-05, "loss": 1.9784, "step": 16817000 }, { "epoch": 48.68, "learning_rate": 2.56688733095591e-05, "loss": 2.0334, "step": 16817500 }, { "epoch": 48.68, "learning_rate": 2.5668149661911823e-05, "loss": 2.0061, "step": 16818000 }, { "epoch": 48.68, "learning_rate": 2.5667426014264545e-05, "loss": 2.0, "step": 16818500 }, { "epoch": 48.68, "learning_rate": 2.5666703813912564e-05, "loss": 2.0135, "step": 16819000 }, { "epoch": 48.69, "learning_rate": 2.5665980166265286e-05, "loss": 2.0175, "step": 16819500 }, { "epoch": 48.69, "learning_rate": 2.566525651861801e-05, "loss": 2.0022, "step": 16820000 }, { "epoch": 48.69, "learning_rate": 2.566453287097073e-05, "loss": 2.0018, "step": 16820500 }, { "epoch": 48.69, "learning_rate": 2.5663809223323453e-05, "loss": 2.0024, "step": 16821000 }, { "epoch": 48.69, "learning_rate": 2.5663085575676175e-05, "loss": 2.0269, "step": 16821500 }, { "epoch": 48.69, "learning_rate": 2.56623619280289e-05, "loss": 2.0171, "step": 16822000 }, { "epoch": 48.69, "learning_rate": 2.5661638280381623e-05, "loss": 2.0256, "step": 16822500 }, { "epoch": 48.7, "learning_rate": 2.5660914632734345e-05, "loss": 2.003, "step": 16823000 }, { "epoch": 48.7, "learning_rate": 2.5660190985087068e-05, "loss": 2.0353, "step": 16823500 }, { "epoch": 48.7, "learning_rate": 2.5659468784735087e-05, "loss": 2.0252, "step": 16824000 }, { "epoch": 48.7, "learning_rate": 2.565874658438311e-05, "loss": 2.0003, "step": 16824500 }, { "epoch": 48.7, "learning_rate": 2.565802293673583e-05, "loss": 2.0194, "step": 16825000 }, { "epoch": 48.7, "learning_rate": 2.5657299289088553e-05, "loss": 2.0106, "step": 16825500 }, { "epoch": 48.7, "learning_rate": 2.5656575641441276e-05, "loss": 1.9922, "step": 16826000 }, { "epoch": 48.71, "learning_rate": 2.5655851993794e-05, "loss": 2.0035, "step": 16826500 }, { "epoch": 48.71, "learning_rate": 2.5655128346146723e-05, "loss": 2.0022, "step": 16827000 }, { "epoch": 48.71, "learning_rate": 2.5654404698499446e-05, "loss": 1.9765, "step": 16827500 }, { "epoch": 48.71, "learning_rate": 2.5653681050852168e-05, "loss": 1.9938, "step": 16828000 }, { "epoch": 48.71, "learning_rate": 2.565295740320489e-05, "loss": 2.033, "step": 16828500 }, { "epoch": 48.71, "learning_rate": 2.5652233755557616e-05, "loss": 2.0159, "step": 16829000 }, { "epoch": 48.71, "learning_rate": 2.5651510107910338e-05, "loss": 2.0112, "step": 16829500 }, { "epoch": 48.72, "learning_rate": 2.565078646026306e-05, "loss": 1.996, "step": 16830000 }, { "epoch": 48.72, "learning_rate": 2.5650064259911076e-05, "loss": 2.0182, "step": 16830500 }, { "epoch": 48.72, "learning_rate": 2.56493406122638e-05, "loss": 2.009, "step": 16831000 }, { "epoch": 48.72, "learning_rate": 2.5648616964616527e-05, "loss": 2.0171, "step": 16831500 }, { "epoch": 48.72, "learning_rate": 2.5647893316969253e-05, "loss": 2.017, "step": 16832000 }, { "epoch": 48.72, "learning_rate": 2.5647169669321975e-05, "loss": 2.008, "step": 16832500 }, { "epoch": 48.72, "learning_rate": 2.5646446021674697e-05, "loss": 2.028, "step": 16833000 }, { "epoch": 48.73, "learning_rate": 2.564572237402742e-05, "loss": 1.9953, "step": 16833500 }, { "epoch": 48.73, "learning_rate": 2.5645000173675438e-05, "loss": 2.0202, "step": 16834000 }, { "epoch": 48.73, "learning_rate": 2.5644277973323454e-05, "loss": 1.9846, "step": 16834500 }, { "epoch": 48.73, "learning_rate": 2.564355432567618e-05, "loss": 2.0013, "step": 16835000 }, { "epoch": 48.73, "learning_rate": 2.56428306780289e-05, "loss": 2.0106, "step": 16835500 }, { "epoch": 48.73, "learning_rate": 2.5642107030381624e-05, "loss": 2.0306, "step": 16836000 }, { "epoch": 48.73, "learning_rate": 2.5641383382734346e-05, "loss": 1.9933, "step": 16836500 }, { "epoch": 48.74, "learning_rate": 2.5640661182382365e-05, "loss": 2.0074, "step": 16837000 }, { "epoch": 48.74, "learning_rate": 2.5639937534735087e-05, "loss": 1.9811, "step": 16837500 }, { "epoch": 48.74, "learning_rate": 2.563921388708781e-05, "loss": 2.0128, "step": 16838000 }, { "epoch": 48.74, "learning_rate": 2.5638490239440532e-05, "loss": 1.9941, "step": 16838500 }, { "epoch": 48.74, "learning_rate": 2.5637766591793254e-05, "loss": 2.0116, "step": 16839000 }, { "epoch": 48.74, "learning_rate": 2.563704439144128e-05, "loss": 1.994, "step": 16839500 }, { "epoch": 48.74, "learning_rate": 2.5636320743794002e-05, "loss": 2.018, "step": 16840000 }, { "epoch": 48.75, "learning_rate": 2.5635597096146724e-05, "loss": 2.0006, "step": 16840500 }, { "epoch": 48.75, "learning_rate": 2.5634873448499446e-05, "loss": 2.0145, "step": 16841000 }, { "epoch": 48.75, "learning_rate": 2.563414980085217e-05, "loss": 1.9948, "step": 16841500 }, { "epoch": 48.75, "learning_rate": 2.563342615320489e-05, "loss": 1.9876, "step": 16842000 }, { "epoch": 48.75, "learning_rate": 2.5632702505557617e-05, "loss": 1.9881, "step": 16842500 }, { "epoch": 48.75, "learning_rate": 2.563197885791034e-05, "loss": 1.9876, "step": 16843000 }, { "epoch": 48.76, "learning_rate": 2.563125521026306e-05, "loss": 1.9932, "step": 16843500 }, { "epoch": 48.76, "learning_rate": 2.563053300991108e-05, "loss": 2.0133, "step": 16844000 }, { "epoch": 48.76, "learning_rate": 2.5629810809559095e-05, "loss": 1.9963, "step": 16844500 }, { "epoch": 48.76, "learning_rate": 2.5629087161911818e-05, "loss": 2.0256, "step": 16845000 }, { "epoch": 48.76, "learning_rate": 2.5628363514264543e-05, "loss": 1.9973, "step": 16845500 }, { "epoch": 48.76, "learning_rate": 2.5627639866617266e-05, "loss": 1.9948, "step": 16846000 }, { "epoch": 48.76, "learning_rate": 2.5626916218969988e-05, "loss": 2.0122, "step": 16846500 }, { "epoch": 48.77, "learning_rate": 2.5626192571322717e-05, "loss": 2.0, "step": 16847000 }, { "epoch": 48.77, "learning_rate": 2.562546892367544e-05, "loss": 1.9917, "step": 16847500 }, { "epoch": 48.77, "learning_rate": 2.562474527602816e-05, "loss": 1.9957, "step": 16848000 }, { "epoch": 48.77, "learning_rate": 2.562402307567618e-05, "loss": 2.0301, "step": 16848500 }, { "epoch": 48.77, "learning_rate": 2.5623299428028902e-05, "loss": 2.0051, "step": 16849000 }, { "epoch": 48.77, "learning_rate": 2.5622575780381625e-05, "loss": 2.0151, "step": 16849500 }, { "epoch": 48.77, "learning_rate": 2.5621852132734347e-05, "loss": 2.0145, "step": 16850000 }, { "epoch": 48.78, "learning_rate": 2.562112848508707e-05, "loss": 1.9969, "step": 16850500 }, { "epoch": 48.78, "learning_rate": 2.5620404837439795e-05, "loss": 2.046, "step": 16851000 }, { "epoch": 48.78, "learning_rate": 2.5619681189792517e-05, "loss": 1.9962, "step": 16851500 }, { "epoch": 48.78, "learning_rate": 2.561895754214524e-05, "loss": 2.0181, "step": 16852000 }, { "epoch": 48.78, "learning_rate": 2.561823389449796e-05, "loss": 1.9811, "step": 16852500 }, { "epoch": 48.78, "learning_rate": 2.561751169414598e-05, "loss": 2.0166, "step": 16853000 }, { "epoch": 48.78, "learning_rate": 2.5616788046498703e-05, "loss": 2.0103, "step": 16853500 }, { "epoch": 48.79, "learning_rate": 2.561606439885143e-05, "loss": 2.0077, "step": 16854000 }, { "epoch": 48.79, "learning_rate": 2.5615340751204154e-05, "loss": 2.0167, "step": 16854500 }, { "epoch": 48.79, "learning_rate": 2.5614617103556876e-05, "loss": 2.0144, "step": 16855000 }, { "epoch": 48.79, "learning_rate": 2.56138934559096e-05, "loss": 2.0124, "step": 16855500 }, { "epoch": 48.79, "learning_rate": 2.561316980826232e-05, "loss": 2.0032, "step": 16856000 }, { "epoch": 48.79, "learning_rate": 2.5612446160615046e-05, "loss": 2.0143, "step": 16856500 }, { "epoch": 48.79, "learning_rate": 2.561172251296777e-05, "loss": 1.9995, "step": 16857000 }, { "epoch": 48.8, "learning_rate": 2.561099886532049e-05, "loss": 1.9902, "step": 16857500 }, { "epoch": 48.8, "learning_rate": 2.5610275217673213e-05, "loss": 2.0225, "step": 16858000 }, { "epoch": 48.8, "learning_rate": 2.5609553017321232e-05, "loss": 2.0195, "step": 16858500 }, { "epoch": 48.8, "learning_rate": 2.5608829369673954e-05, "loss": 2.0143, "step": 16859000 }, { "epoch": 48.8, "learning_rate": 2.5608105722026676e-05, "loss": 2.0249, "step": 16859500 }, { "epoch": 48.8, "learning_rate": 2.56073820743794e-05, "loss": 1.9892, "step": 16860000 }, { "epoch": 48.8, "learning_rate": 2.560665842673212e-05, "loss": 2.0274, "step": 16860500 }, { "epoch": 48.81, "learning_rate": 2.560593622638014e-05, "loss": 2.0126, "step": 16861000 }, { "epoch": 48.81, "learning_rate": 2.560521257873287e-05, "loss": 2.0107, "step": 16861500 }, { "epoch": 48.81, "learning_rate": 2.5604490378380884e-05, "loss": 2.0158, "step": 16862000 }, { "epoch": 48.81, "learning_rate": 2.5603766730733607e-05, "loss": 2.0168, "step": 16862500 }, { "epoch": 48.81, "learning_rate": 2.5603043083086332e-05, "loss": 2.0173, "step": 16863000 }, { "epoch": 48.81, "learning_rate": 2.5602319435439054e-05, "loss": 2.0022, "step": 16863500 }, { "epoch": 48.81, "learning_rate": 2.5601595787791777e-05, "loss": 2.0068, "step": 16864000 }, { "epoch": 48.82, "learning_rate": 2.56008721401445e-05, "loss": 1.9868, "step": 16864500 }, { "epoch": 48.82, "learning_rate": 2.560014849249722e-05, "loss": 2.0371, "step": 16865000 }, { "epoch": 48.82, "learning_rate": 2.5599424844849947e-05, "loss": 1.9955, "step": 16865500 }, { "epoch": 48.82, "learning_rate": 2.5598702644497962e-05, "loss": 2.0107, "step": 16866000 }, { "epoch": 48.82, "learning_rate": 2.5597978996850685e-05, "loss": 1.9992, "step": 16866500 }, { "epoch": 48.82, "learning_rate": 2.5597256796498703e-05, "loss": 1.9925, "step": 16867000 }, { "epoch": 48.82, "learning_rate": 2.5596533148851426e-05, "loss": 2.0232, "step": 16867500 }, { "epoch": 48.83, "learning_rate": 2.5595812395794738e-05, "loss": 1.9998, "step": 16868000 }, { "epoch": 48.83, "learning_rate": 2.559508874814746e-05, "loss": 1.9897, "step": 16868500 }, { "epoch": 48.83, "learning_rate": 2.5594365100500182e-05, "loss": 2.0227, "step": 16869000 }, { "epoch": 48.83, "learning_rate": 2.559364145285291e-05, "loss": 2.0256, "step": 16869500 }, { "epoch": 48.83, "learning_rate": 2.5592917805205634e-05, "loss": 1.9978, "step": 16870000 }, { "epoch": 48.83, "learning_rate": 2.559219415755836e-05, "loss": 1.9822, "step": 16870500 }, { "epoch": 48.83, "learning_rate": 2.559147050991108e-05, "loss": 1.9981, "step": 16871000 }, { "epoch": 48.84, "learning_rate": 2.5590746862263804e-05, "loss": 2.0265, "step": 16871500 }, { "epoch": 48.84, "learning_rate": 2.5590023214616526e-05, "loss": 2.0046, "step": 16872000 }, { "epoch": 48.84, "learning_rate": 2.5589299566969248e-05, "loss": 2.0273, "step": 16872500 }, { "epoch": 48.84, "learning_rate": 2.558857591932197e-05, "loss": 1.9811, "step": 16873000 }, { "epoch": 48.84, "learning_rate": 2.558785371896999e-05, "loss": 2.0019, "step": 16873500 }, { "epoch": 48.84, "learning_rate": 2.558713007132271e-05, "loss": 2.0083, "step": 16874000 }, { "epoch": 48.84, "learning_rate": 2.5586406423675434e-05, "loss": 2.0176, "step": 16874500 }, { "epoch": 48.85, "learning_rate": 2.558568277602816e-05, "loss": 1.9994, "step": 16875000 }, { "epoch": 48.85, "learning_rate": 2.5584959128380882e-05, "loss": 2.0093, "step": 16875500 }, { "epoch": 48.85, "learning_rate": 2.5584235480733604e-05, "loss": 2.0042, "step": 16876000 }, { "epoch": 48.85, "learning_rate": 2.5583513280381623e-05, "loss": 2.0035, "step": 16876500 }, { "epoch": 48.85, "learning_rate": 2.558278963273435e-05, "loss": 2.0205, "step": 16877000 }, { "epoch": 48.85, "learning_rate": 2.5582067432382367e-05, "loss": 1.9956, "step": 16877500 }, { "epoch": 48.85, "learning_rate": 2.558134378473509e-05, "loss": 2.0141, "step": 16878000 }, { "epoch": 48.86, "learning_rate": 2.5580620137087812e-05, "loss": 1.9835, "step": 16878500 }, { "epoch": 48.86, "learning_rate": 2.5579896489440534e-05, "loss": 2.0065, "step": 16879000 }, { "epoch": 48.86, "learning_rate": 2.557917284179326e-05, "loss": 2.0093, "step": 16879500 }, { "epoch": 48.86, "learning_rate": 2.5578449194145982e-05, "loss": 1.9926, "step": 16880000 }, { "epoch": 48.86, "learning_rate": 2.5577725546498704e-05, "loss": 2.0302, "step": 16880500 }, { "epoch": 48.86, "learning_rate": 2.5577001898851426e-05, "loss": 2.0076, "step": 16881000 }, { "epoch": 48.87, "learning_rate": 2.557627825120415e-05, "loss": 2.0017, "step": 16881500 }, { "epoch": 48.87, "learning_rate": 2.5575554603556874e-05, "loss": 2.0053, "step": 16882000 }, { "epoch": 48.87, "learning_rate": 2.5574830955909597e-05, "loss": 2.0146, "step": 16882500 }, { "epoch": 48.87, "learning_rate": 2.557410730826232e-05, "loss": 2.0007, "step": 16883000 }, { "epoch": 48.87, "learning_rate": 2.557338366061504e-05, "loss": 2.0123, "step": 16883500 }, { "epoch": 48.87, "learning_rate": 2.557266001296777e-05, "loss": 2.0226, "step": 16884000 }, { "epoch": 48.87, "learning_rate": 2.5571936365320492e-05, "loss": 2.021, "step": 16884500 }, { "epoch": 48.88, "learning_rate": 2.557121416496851e-05, "loss": 2.0155, "step": 16885000 }, { "epoch": 48.88, "learning_rate": 2.5570490517321233e-05, "loss": 2.0114, "step": 16885500 }, { "epoch": 48.88, "learning_rate": 2.5569766869673956e-05, "loss": 2.0014, "step": 16886000 }, { "epoch": 48.88, "learning_rate": 2.5569043222026678e-05, "loss": 2.0481, "step": 16886500 }, { "epoch": 48.88, "learning_rate": 2.55683195743794e-05, "loss": 2.0068, "step": 16887000 }, { "epoch": 48.88, "learning_rate": 2.5567595926732126e-05, "loss": 2.0048, "step": 16887500 }, { "epoch": 48.88, "learning_rate": 2.5566872279084848e-05, "loss": 2.0309, "step": 16888000 }, { "epoch": 48.89, "learning_rate": 2.556614863143757e-05, "loss": 2.0193, "step": 16888500 }, { "epoch": 48.89, "learning_rate": 2.5565424983790292e-05, "loss": 1.9915, "step": 16889000 }, { "epoch": 48.89, "learning_rate": 2.5564701336143015e-05, "loss": 2.003, "step": 16889500 }, { "epoch": 48.89, "learning_rate": 2.5563977688495737e-05, "loss": 2.0065, "step": 16890000 }, { "epoch": 48.89, "learning_rate": 2.5563255488143756e-05, "loss": 2.0234, "step": 16890500 }, { "epoch": 48.89, "learning_rate": 2.5562533287791775e-05, "loss": 2.0213, "step": 16891000 }, { "epoch": 48.89, "learning_rate": 2.55618096401445e-05, "loss": 2.0038, "step": 16891500 }, { "epoch": 48.9, "learning_rate": 2.5561085992497226e-05, "loss": 1.9897, "step": 16892000 }, { "epoch": 48.9, "learning_rate": 2.556036234484995e-05, "loss": 2.0247, "step": 16892500 }, { "epoch": 48.9, "learning_rate": 2.555963869720267e-05, "loss": 2.0249, "step": 16893000 }, { "epoch": 48.9, "learning_rate": 2.5558915049555393e-05, "loss": 2.0164, "step": 16893500 }, { "epoch": 48.9, "learning_rate": 2.5558191401908115e-05, "loss": 2.0373, "step": 16894000 }, { "epoch": 48.9, "learning_rate": 2.5557467754260837e-05, "loss": 1.9702, "step": 16894500 }, { "epoch": 48.9, "learning_rate": 2.5556744106613563e-05, "loss": 2.0071, "step": 16895000 }, { "epoch": 48.91, "learning_rate": 2.5556020458966285e-05, "loss": 2.0097, "step": 16895500 }, { "epoch": 48.91, "learning_rate": 2.55552982586143e-05, "loss": 2.0178, "step": 16896000 }, { "epoch": 48.91, "learning_rate": 2.5554574610967026e-05, "loss": 2.0201, "step": 16896500 }, { "epoch": 48.91, "learning_rate": 2.555385096331975e-05, "loss": 2.0243, "step": 16897000 }, { "epoch": 48.91, "learning_rate": 2.5553128762967764e-05, "loss": 1.9959, "step": 16897500 }, { "epoch": 48.91, "learning_rate": 2.5552405115320486e-05, "loss": 1.9942, "step": 16898000 }, { "epoch": 48.91, "learning_rate": 2.5551681467673212e-05, "loss": 1.9882, "step": 16898500 }, { "epoch": 48.92, "learning_rate": 2.5550957820025938e-05, "loss": 2.0003, "step": 16899000 }, { "epoch": 48.92, "learning_rate": 2.5550234172378663e-05, "loss": 2.0188, "step": 16899500 }, { "epoch": 48.92, "learning_rate": 2.5549510524731385e-05, "loss": 2.0184, "step": 16900000 }, { "epoch": 48.92, "learning_rate": 2.5548786877084108e-05, "loss": 2.0209, "step": 16900500 }, { "epoch": 48.92, "learning_rate": 2.5548064676732127e-05, "loss": 2.0243, "step": 16901000 }, { "epoch": 48.92, "learning_rate": 2.554734102908485e-05, "loss": 2.0065, "step": 16901500 }, { "epoch": 48.92, "learning_rate": 2.554661738143757e-05, "loss": 1.9999, "step": 16902000 }, { "epoch": 48.93, "learning_rate": 2.5545893733790293e-05, "loss": 1.9929, "step": 16902500 }, { "epoch": 48.93, "learning_rate": 2.5545170086143016e-05, "loss": 1.9811, "step": 16903000 }, { "epoch": 48.93, "learning_rate": 2.5544446438495738e-05, "loss": 2.0097, "step": 16903500 }, { "epoch": 48.93, "learning_rate": 2.5543722790848463e-05, "loss": 1.9956, "step": 16904000 }, { "epoch": 48.93, "learning_rate": 2.5542999143201186e-05, "loss": 2.0033, "step": 16904500 }, { "epoch": 48.93, "learning_rate": 2.5542275495553908e-05, "loss": 1.9942, "step": 16905000 }, { "epoch": 48.93, "learning_rate": 2.5541553295201927e-05, "loss": 2.0237, "step": 16905500 }, { "epoch": 48.94, "learning_rate": 2.5540829647554652e-05, "loss": 2.0123, "step": 16906000 }, { "epoch": 48.94, "learning_rate": 2.5540105999907378e-05, "loss": 2.0101, "step": 16906500 }, { "epoch": 48.94, "learning_rate": 2.55393823522601e-05, "loss": 1.9978, "step": 16907000 }, { "epoch": 48.94, "learning_rate": 2.5538658704612822e-05, "loss": 2.0109, "step": 16907500 }, { "epoch": 48.94, "learning_rate": 2.553793650426084e-05, "loss": 2.0134, "step": 16908000 }, { "epoch": 48.94, "learning_rate": 2.5537212856613564e-05, "loss": 2.0114, "step": 16908500 }, { "epoch": 48.94, "learning_rate": 2.5536489208966286e-05, "loss": 2.0242, "step": 16909000 }, { "epoch": 48.95, "learning_rate": 2.5535765561319008e-05, "loss": 2.001, "step": 16909500 }, { "epoch": 48.95, "learning_rate": 2.553504191367173e-05, "loss": 1.9845, "step": 16910000 }, { "epoch": 48.95, "learning_rate": 2.5534318266024453e-05, "loss": 1.9943, "step": 16910500 }, { "epoch": 48.95, "learning_rate": 2.5533594618377178e-05, "loss": 2.0097, "step": 16911000 }, { "epoch": 48.95, "learning_rate": 2.5532872418025194e-05, "loss": 1.9663, "step": 16911500 }, { "epoch": 48.95, "learning_rate": 2.5532148770377916e-05, "loss": 2.0319, "step": 16912000 }, { "epoch": 48.95, "learning_rate": 2.553142512273064e-05, "loss": 2.0267, "step": 16912500 }, { "epoch": 48.96, "learning_rate": 2.5530701475083364e-05, "loss": 2.0273, "step": 16913000 }, { "epoch": 48.96, "learning_rate": 2.5529977827436093e-05, "loss": 2.0224, "step": 16913500 }, { "epoch": 48.96, "learning_rate": 2.5529254179788815e-05, "loss": 2.006, "step": 16914000 }, { "epoch": 48.96, "learning_rate": 2.5528530532141537e-05, "loss": 2.0354, "step": 16914500 }, { "epoch": 48.96, "learning_rate": 2.552780688449426e-05, "loss": 2.0276, "step": 16915000 }, { "epoch": 48.96, "learning_rate": 2.552708468414228e-05, "loss": 2.014, "step": 16915500 }, { "epoch": 48.96, "learning_rate": 2.5526365378380884e-05, "loss": 2.0149, "step": 16916000 }, { "epoch": 48.97, "learning_rate": 2.5525641730733606e-05, "loss": 2.0238, "step": 16916500 }, { "epoch": 48.97, "learning_rate": 2.552491808308633e-05, "loss": 2.0276, "step": 16917000 }, { "epoch": 48.97, "learning_rate": 2.5524194435439054e-05, "loss": 2.0197, "step": 16917500 }, { "epoch": 48.97, "learning_rate": 2.552347223508707e-05, "loss": 2.0218, "step": 16918000 }, { "epoch": 48.97, "learning_rate": 2.5522748587439792e-05, "loss": 1.9908, "step": 16918500 }, { "epoch": 48.97, "learning_rate": 2.5522024939792518e-05, "loss": 2.013, "step": 16919000 }, { "epoch": 48.98, "learning_rate": 2.552130129214524e-05, "loss": 2.0257, "step": 16919500 }, { "epoch": 48.98, "learning_rate": 2.5520577644497962e-05, "loss": 2.0084, "step": 16920000 }, { "epoch": 48.98, "learning_rate": 2.5519853996850684e-05, "loss": 1.9764, "step": 16920500 }, { "epoch": 48.98, "learning_rate": 2.5519131796498703e-05, "loss": 1.9922, "step": 16921000 }, { "epoch": 48.98, "learning_rate": 2.551840814885143e-05, "loss": 2.0049, "step": 16921500 }, { "epoch": 48.98, "learning_rate": 2.5517684501204154e-05, "loss": 2.0233, "step": 16922000 }, { "epoch": 48.98, "learning_rate": 2.5516960853556877e-05, "loss": 1.9986, "step": 16922500 }, { "epoch": 48.99, "learning_rate": 2.55162372059096e-05, "loss": 1.9991, "step": 16923000 }, { "epoch": 48.99, "learning_rate": 2.551551355826232e-05, "loss": 2.0083, "step": 16923500 }, { "epoch": 48.99, "learning_rate": 2.5514789910615043e-05, "loss": 1.9907, "step": 16924000 }, { "epoch": 48.99, "learning_rate": 2.551406626296777e-05, "loss": 1.9852, "step": 16924500 }, { "epoch": 48.99, "learning_rate": 2.551334261532049e-05, "loss": 2.0225, "step": 16925000 }, { "epoch": 48.99, "learning_rate": 2.5512618967673214e-05, "loss": 2.0156, "step": 16925500 }, { "epoch": 48.99, "learning_rate": 2.5511895320025936e-05, "loss": 2.0281, "step": 16926000 }, { "epoch": 49.0, "learning_rate": 2.5511173119673955e-05, "loss": 2.0107, "step": 16926500 }, { "epoch": 49.0, "learning_rate": 2.5510449472026677e-05, "loss": 1.9908, "step": 16927000 }, { "epoch": 49.0, "learning_rate": 2.55097258243794e-05, "loss": 1.989, "step": 16927500 }, { "epoch": 49.0, "learning_rate": 2.5509003624027418e-05, "loss": 2.0041, "step": 16928000 }, { "epoch": 49.0, "eval_accuracy": 0.6736439941361908, "eval_accuracy_mlm": 0.6394893188323623, "eval_accuracy_nsp": 0.8566796368352788, "eval_loss": 2.1714398860931396, "eval_runtime": 331.7519, "eval_samples_per_second": 1315.399, "eval_steps_per_second": 54.809, "step": 16928128 }, { "epoch": 49.0, "learning_rate": 2.550827997638014e-05, "loss": 1.9782, "step": 16928500 }, { "epoch": 49.0, "learning_rate": 2.550755632873287e-05, "loss": 1.994, "step": 16929000 }, { "epoch": 49.0, "learning_rate": 2.550683268108559e-05, "loss": 1.9906, "step": 16929500 }, { "epoch": 49.01, "learning_rate": 2.5506109033438314e-05, "loss": 2.0034, "step": 16930000 }, { "epoch": 49.01, "learning_rate": 2.5505385385791036e-05, "loss": 2.0016, "step": 16930500 }, { "epoch": 49.01, "learning_rate": 2.5504661738143758e-05, "loss": 1.9844, "step": 16931000 }, { "epoch": 49.01, "learning_rate": 2.550393809049648e-05, "loss": 1.965, "step": 16931500 }, { "epoch": 49.01, "learning_rate": 2.5503214442849206e-05, "loss": 2.0007, "step": 16932000 }, { "epoch": 49.01, "learning_rate": 2.550249079520193e-05, "loss": 1.9915, "step": 16932500 }, { "epoch": 49.01, "learning_rate": 2.550176714755465e-05, "loss": 1.9724, "step": 16933000 }, { "epoch": 49.02, "learning_rate": 2.550104494720267e-05, "loss": 2.0185, "step": 16933500 }, { "epoch": 49.02, "learning_rate": 2.5500321299555392e-05, "loss": 1.975, "step": 16934000 }, { "epoch": 49.02, "learning_rate": 2.5499597651908114e-05, "loss": 1.9651, "step": 16934500 }, { "epoch": 49.02, "learning_rate": 2.5498874004260836e-05, "loss": 2.0111, "step": 16935000 }, { "epoch": 49.02, "learning_rate": 2.549815035661356e-05, "loss": 2.013, "step": 16935500 }, { "epoch": 49.02, "learning_rate": 2.5497426708966287e-05, "loss": 1.9855, "step": 16936000 }, { "epoch": 49.02, "learning_rate": 2.549670306131901e-05, "loss": 1.9776, "step": 16936500 }, { "epoch": 49.03, "learning_rate": 2.5495979413671732e-05, "loss": 1.981, "step": 16937000 }, { "epoch": 49.03, "learning_rate": 2.5495255766024458e-05, "loss": 1.9619, "step": 16937500 }, { "epoch": 49.03, "learning_rate": 2.5494533565672473e-05, "loss": 1.9775, "step": 16938000 }, { "epoch": 49.03, "learning_rate": 2.5493809918025195e-05, "loss": 1.9593, "step": 16938500 }, { "epoch": 49.03, "learning_rate": 2.549308627037792e-05, "loss": 1.9754, "step": 16939000 }, { "epoch": 49.03, "learning_rate": 2.5492362622730643e-05, "loss": 1.9623, "step": 16939500 }, { "epoch": 49.03, "learning_rate": 2.5491638975083365e-05, "loss": 1.9801, "step": 16940000 }, { "epoch": 49.04, "learning_rate": 2.549091677473138e-05, "loss": 1.9867, "step": 16940500 }, { "epoch": 49.04, "learning_rate": 2.5490193127084107e-05, "loss": 1.9734, "step": 16941000 }, { "epoch": 49.04, "learning_rate": 2.548946947943683e-05, "loss": 1.9963, "step": 16941500 }, { "epoch": 49.04, "learning_rate": 2.548874583178955e-05, "loss": 1.9766, "step": 16942000 }, { "epoch": 49.04, "learning_rate": 2.5488022184142273e-05, "loss": 2.0071, "step": 16942500 }, { "epoch": 49.04, "learning_rate": 2.5487299983790292e-05, "loss": 1.9846, "step": 16943000 }, { "epoch": 49.04, "learning_rate": 2.548657633614302e-05, "loss": 2.032, "step": 16943500 }, { "epoch": 49.05, "learning_rate": 2.5485852688495744e-05, "loss": 2.0034, "step": 16944000 }, { "epoch": 49.05, "learning_rate": 2.5485129040848466e-05, "loss": 1.9925, "step": 16944500 }, { "epoch": 49.05, "learning_rate": 2.5484405393201188e-05, "loss": 1.9841, "step": 16945000 }, { "epoch": 49.05, "learning_rate": 2.548368174555391e-05, "loss": 1.9762, "step": 16945500 }, { "epoch": 49.05, "learning_rate": 2.5482958097906632e-05, "loss": 1.998, "step": 16946000 }, { "epoch": 49.05, "learning_rate": 2.548223589755465e-05, "loss": 1.9723, "step": 16946500 }, { "epoch": 49.05, "learning_rate": 2.5481512249907374e-05, "loss": 2.0047, "step": 16947000 }, { "epoch": 49.06, "learning_rate": 2.5480788602260096e-05, "loss": 1.9964, "step": 16947500 }, { "epoch": 49.06, "learning_rate": 2.548006495461282e-05, "loss": 2.0056, "step": 16948000 }, { "epoch": 49.06, "learning_rate": 2.5479341306965544e-05, "loss": 2.0033, "step": 16948500 }, { "epoch": 49.06, "learning_rate": 2.5478617659318266e-05, "loss": 2.0076, "step": 16949000 }, { "epoch": 49.06, "learning_rate": 2.5477894011670988e-05, "loss": 2.0378, "step": 16949500 }, { "epoch": 49.06, "learning_rate": 2.5477171811319007e-05, "loss": 1.9785, "step": 16950000 }, { "epoch": 49.06, "learning_rate": 2.547644816367173e-05, "loss": 1.9846, "step": 16950500 }, { "epoch": 49.07, "learning_rate": 2.547572451602446e-05, "loss": 2.0016, "step": 16951000 }, { "epoch": 49.07, "learning_rate": 2.5475002315672474e-05, "loss": 2.0012, "step": 16951500 }, { "epoch": 49.07, "learning_rate": 2.5474278668025196e-05, "loss": 1.9876, "step": 16952000 }, { "epoch": 49.07, "learning_rate": 2.5473556467673215e-05, "loss": 1.9862, "step": 16952500 }, { "epoch": 49.07, "learning_rate": 2.5472832820025937e-05, "loss": 1.9668, "step": 16953000 }, { "epoch": 49.07, "learning_rate": 2.547210917237866e-05, "loss": 1.9762, "step": 16953500 }, { "epoch": 49.07, "learning_rate": 2.5471385524731385e-05, "loss": 1.9725, "step": 16954000 }, { "epoch": 49.08, "learning_rate": 2.5470661877084107e-05, "loss": 1.9848, "step": 16954500 }, { "epoch": 49.08, "learning_rate": 2.546993822943683e-05, "loss": 2.0021, "step": 16955000 }, { "epoch": 49.08, "learning_rate": 2.546921602908485e-05, "loss": 2.0018, "step": 16955500 }, { "epoch": 49.08, "learning_rate": 2.546849238143757e-05, "loss": 2.01, "step": 16956000 }, { "epoch": 49.08, "learning_rate": 2.5467768733790293e-05, "loss": 1.992, "step": 16956500 }, { "epoch": 49.08, "learning_rate": 2.5467045086143015e-05, "loss": 1.9692, "step": 16957000 }, { "epoch": 49.09, "learning_rate": 2.5466321438495738e-05, "loss": 1.9939, "step": 16957500 }, { "epoch": 49.09, "learning_rate": 2.546559779084846e-05, "loss": 2.0019, "step": 16958000 }, { "epoch": 49.09, "learning_rate": 2.546487414320119e-05, "loss": 1.9474, "step": 16958500 }, { "epoch": 49.09, "learning_rate": 2.546415049555391e-05, "loss": 2.003, "step": 16959000 }, { "epoch": 49.09, "learning_rate": 2.5463426847906637e-05, "loss": 1.9765, "step": 16959500 }, { "epoch": 49.09, "learning_rate": 2.546270320025936e-05, "loss": 2.0026, "step": 16960000 }, { "epoch": 49.09, "learning_rate": 2.546197955261208e-05, "loss": 1.9805, "step": 16960500 }, { "epoch": 49.1, "learning_rate": 2.5461255904964803e-05, "loss": 1.9997, "step": 16961000 }, { "epoch": 49.1, "learning_rate": 2.5460532257317526e-05, "loss": 2.001, "step": 16961500 }, { "epoch": 49.1, "learning_rate": 2.5459808609670248e-05, "loss": 2.0146, "step": 16962000 }, { "epoch": 49.1, "learning_rate": 2.5459084962022973e-05, "loss": 2.0055, "step": 16962500 }, { "epoch": 49.1, "learning_rate": 2.545836276167099e-05, "loss": 2.0152, "step": 16963000 }, { "epoch": 49.1, "learning_rate": 2.545763911402371e-05, "loss": 2.0095, "step": 16963500 }, { "epoch": 49.1, "learning_rate": 2.5456915466376437e-05, "loss": 1.9861, "step": 16964000 }, { "epoch": 49.11, "learning_rate": 2.545619181872916e-05, "loss": 2.0048, "step": 16964500 }, { "epoch": 49.11, "learning_rate": 2.545546817108188e-05, "loss": 1.9813, "step": 16965000 }, { "epoch": 49.11, "learning_rate": 2.545474452343461e-05, "loss": 2.0109, "step": 16965500 }, { "epoch": 49.11, "learning_rate": 2.5454020875787333e-05, "loss": 2.012, "step": 16966000 }, { "epoch": 49.11, "learning_rate": 2.5453298675435348e-05, "loss": 1.9951, "step": 16966500 }, { "epoch": 49.11, "learning_rate": 2.5452575027788074e-05, "loss": 1.9888, "step": 16967000 }, { "epoch": 49.11, "learning_rate": 2.5451851380140796e-05, "loss": 1.9866, "step": 16967500 }, { "epoch": 49.12, "learning_rate": 2.5451127732493518e-05, "loss": 1.9844, "step": 16968000 }, { "epoch": 49.12, "learning_rate": 2.545040408484624e-05, "loss": 1.9843, "step": 16968500 }, { "epoch": 49.12, "learning_rate": 2.5449680437198963e-05, "loss": 2.0029, "step": 16969000 }, { "epoch": 49.12, "learning_rate": 2.5448956789551688e-05, "loss": 1.9713, "step": 16969500 }, { "epoch": 49.12, "learning_rate": 2.5448234589199704e-05, "loss": 2.0069, "step": 16970000 }, { "epoch": 49.12, "learning_rate": 2.5447510941552426e-05, "loss": 2.0149, "step": 16970500 }, { "epoch": 49.12, "learning_rate": 2.5446787293905148e-05, "loss": 2.0192, "step": 16971000 }, { "epoch": 49.13, "learning_rate": 2.5446063646257874e-05, "loss": 2.0014, "step": 16971500 }, { "epoch": 49.13, "learning_rate": 2.544534144590589e-05, "loss": 1.9724, "step": 16972000 }, { "epoch": 49.13, "learning_rate": 2.5444617798258612e-05, "loss": 2.0048, "step": 16972500 }, { "epoch": 49.13, "learning_rate": 2.544389559790663e-05, "loss": 1.9768, "step": 16973000 }, { "epoch": 49.13, "learning_rate": 2.544317195025936e-05, "loss": 1.9847, "step": 16973500 }, { "epoch": 49.13, "learning_rate": 2.5442448302612082e-05, "loss": 2.001, "step": 16974000 }, { "epoch": 49.13, "learning_rate": 2.5441724654964804e-05, "loss": 1.9997, "step": 16974500 }, { "epoch": 49.14, "learning_rate": 2.5441001007317526e-05, "loss": 1.9746, "step": 16975000 }, { "epoch": 49.14, "learning_rate": 2.5440277359670252e-05, "loss": 1.9913, "step": 16975500 }, { "epoch": 49.14, "learning_rate": 2.5439553712022974e-05, "loss": 1.9872, "step": 16976000 }, { "epoch": 49.14, "learning_rate": 2.5438830064375696e-05, "loss": 2.0002, "step": 16976500 }, { "epoch": 49.14, "learning_rate": 2.543810641672842e-05, "loss": 1.9737, "step": 16977000 }, { "epoch": 49.14, "learning_rate": 2.543738276908114e-05, "loss": 1.9835, "step": 16977500 }, { "epoch": 49.14, "learning_rate": 2.5436659121433863e-05, "loss": 2.0001, "step": 16978000 }, { "epoch": 49.15, "learning_rate": 2.5435938368377175e-05, "loss": 1.9786, "step": 16978500 }, { "epoch": 49.15, "learning_rate": 2.54352147207299e-05, "loss": 1.9967, "step": 16979000 }, { "epoch": 49.15, "learning_rate": 2.5434491073082623e-05, "loss": 1.9991, "step": 16979500 }, { "epoch": 49.15, "learning_rate": 2.5433767425435346e-05, "loss": 2.0106, "step": 16980000 }, { "epoch": 49.15, "learning_rate": 2.5433043777788068e-05, "loss": 1.9797, "step": 16980500 }, { "epoch": 49.15, "learning_rate": 2.5432320130140797e-05, "loss": 1.9991, "step": 16981000 }, { "epoch": 49.15, "learning_rate": 2.5431597929788816e-05, "loss": 1.9766, "step": 16981500 }, { "epoch": 49.16, "learning_rate": 2.5430874282141538e-05, "loss": 2.002, "step": 16982000 }, { "epoch": 49.16, "learning_rate": 2.543015063449426e-05, "loss": 2.0145, "step": 16982500 }, { "epoch": 49.16, "learning_rate": 2.5429426986846982e-05, "loss": 1.9925, "step": 16983000 }, { "epoch": 49.16, "learning_rate": 2.5428703339199705e-05, "loss": 2.0015, "step": 16983500 }, { "epoch": 49.16, "learning_rate": 2.5427979691552427e-05, "loss": 2.001, "step": 16984000 }, { "epoch": 49.16, "learning_rate": 2.5427256043905152e-05, "loss": 2.0143, "step": 16984500 }, { "epoch": 49.16, "learning_rate": 2.5426533843553168e-05, "loss": 1.9803, "step": 16985000 }, { "epoch": 49.17, "learning_rate": 2.542581019590589e-05, "loss": 1.9949, "step": 16985500 }, { "epoch": 49.17, "learning_rate": 2.5425086548258616e-05, "loss": 1.9897, "step": 16986000 }, { "epoch": 49.17, "learning_rate": 2.5424362900611338e-05, "loss": 1.9959, "step": 16986500 }, { "epoch": 49.17, "learning_rate": 2.542363925296406e-05, "loss": 1.9987, "step": 16987000 }, { "epoch": 49.17, "learning_rate": 2.5422915605316783e-05, "loss": 2.0033, "step": 16987500 }, { "epoch": 49.17, "learning_rate": 2.54221934049648e-05, "loss": 1.9775, "step": 16988000 }, { "epoch": 49.17, "learning_rate": 2.5421469757317527e-05, "loss": 1.9811, "step": 16988500 }, { "epoch": 49.18, "learning_rate": 2.5420746109670253e-05, "loss": 1.9894, "step": 16989000 }, { "epoch": 49.18, "learning_rate": 2.5420022462022975e-05, "loss": 1.9761, "step": 16989500 }, { "epoch": 49.18, "learning_rate": 2.5419298814375697e-05, "loss": 2.0026, "step": 16990000 }, { "epoch": 49.18, "learning_rate": 2.541857516672842e-05, "loss": 1.9803, "step": 16990500 }, { "epoch": 49.18, "learning_rate": 2.5417851519081142e-05, "loss": 1.9874, "step": 16991000 }, { "epoch": 49.18, "learning_rate": 2.5417127871433867e-05, "loss": 1.9894, "step": 16991500 }, { "epoch": 49.18, "learning_rate": 2.541640422378659e-05, "loss": 1.9988, "step": 16992000 }, { "epoch": 49.19, "learning_rate": 2.5415682023434605e-05, "loss": 2.0012, "step": 16992500 }, { "epoch": 49.19, "learning_rate": 2.5414958375787327e-05, "loss": 2.0146, "step": 16993000 }, { "epoch": 49.19, "learning_rate": 2.5414236175435346e-05, "loss": 2.0129, "step": 16993500 }, { "epoch": 49.19, "learning_rate": 2.541351252778807e-05, "loss": 1.9853, "step": 16994000 }, { "epoch": 49.19, "learning_rate": 2.541278888014079e-05, "loss": 1.9931, "step": 16994500 }, { "epoch": 49.19, "learning_rate": 2.5412065232493516e-05, "loss": 1.98, "step": 16995000 }, { "epoch": 49.2, "learning_rate": 2.5411341584846242e-05, "loss": 1.9911, "step": 16995500 }, { "epoch": 49.2, "learning_rate": 2.5410617937198968e-05, "loss": 1.9718, "step": 16996000 }, { "epoch": 49.2, "learning_rate": 2.540989428955169e-05, "loss": 2.001, "step": 16996500 }, { "epoch": 49.2, "learning_rate": 2.5409170641904412e-05, "loss": 2.0017, "step": 16997000 }, { "epoch": 49.2, "learning_rate": 2.5408446994257134e-05, "loss": 1.9938, "step": 16997500 }, { "epoch": 49.2, "learning_rate": 2.5407723346609857e-05, "loss": 2.0015, "step": 16998000 }, { "epoch": 49.2, "learning_rate": 2.5407001146257876e-05, "loss": 2.0, "step": 16998500 }, { "epoch": 49.21, "learning_rate": 2.5406277498610598e-05, "loss": 2.0171, "step": 16999000 }, { "epoch": 49.21, "learning_rate": 2.540555385096332e-05, "loss": 1.978, "step": 16999500 }, { "epoch": 49.21, "learning_rate": 2.5404830203316042e-05, "loss": 1.9999, "step": 17000000 }, { "epoch": 49.21, "learning_rate": 2.5404106555668768e-05, "loss": 1.987, "step": 17000500 }, { "epoch": 49.21, "learning_rate": 2.540338290802149e-05, "loss": 1.9858, "step": 17001000 }, { "epoch": 49.21, "learning_rate": 2.5402660707669506e-05, "loss": 2.0057, "step": 17001500 }, { "epoch": 49.21, "learning_rate": 2.5401937060022228e-05, "loss": 2.0059, "step": 17002000 }, { "epoch": 49.22, "learning_rate": 2.5401213412374953e-05, "loss": 2.0109, "step": 17002500 }, { "epoch": 49.22, "learning_rate": 2.540048976472768e-05, "loss": 2.0087, "step": 17003000 }, { "epoch": 49.22, "learning_rate": 2.5399767564375698e-05, "loss": 1.9982, "step": 17003500 }, { "epoch": 49.22, "learning_rate": 2.539904391672842e-05, "loss": 1.9856, "step": 17004000 }, { "epoch": 49.22, "learning_rate": 2.5398320269081143e-05, "loss": 1.9869, "step": 17004500 }, { "epoch": 49.22, "learning_rate": 2.5397596621433868e-05, "loss": 1.9751, "step": 17005000 }, { "epoch": 49.22, "learning_rate": 2.539687297378659e-05, "loss": 1.9991, "step": 17005500 }, { "epoch": 49.23, "learning_rate": 2.5396149326139313e-05, "loss": 2.0184, "step": 17006000 }, { "epoch": 49.23, "learning_rate": 2.5395425678492035e-05, "loss": 1.9958, "step": 17006500 }, { "epoch": 49.23, "learning_rate": 2.5394702030844757e-05, "loss": 1.9887, "step": 17007000 }, { "epoch": 49.23, "learning_rate": 2.539397838319748e-05, "loss": 2.016, "step": 17007500 }, { "epoch": 49.23, "learning_rate": 2.5393254735550205e-05, "loss": 1.9994, "step": 17008000 }, { "epoch": 49.23, "learning_rate": 2.5392531087902927e-05, "loss": 2.004, "step": 17008500 }, { "epoch": 49.23, "learning_rate": 2.5391808887550943e-05, "loss": 1.9745, "step": 17009000 }, { "epoch": 49.24, "learning_rate": 2.539108668719896e-05, "loss": 1.991, "step": 17009500 }, { "epoch": 49.24, "learning_rate": 2.5390363039551684e-05, "loss": 1.9807, "step": 17010000 }, { "epoch": 49.24, "learning_rate": 2.5389639391904413e-05, "loss": 2.0207, "step": 17010500 }, { "epoch": 49.24, "learning_rate": 2.5388915744257135e-05, "loss": 2.025, "step": 17011000 }, { "epoch": 49.24, "learning_rate": 2.5388192096609857e-05, "loss": 2.0078, "step": 17011500 }, { "epoch": 49.24, "learning_rate": 2.5387468448962583e-05, "loss": 1.9998, "step": 17012000 }, { "epoch": 49.24, "learning_rate": 2.5386744801315305e-05, "loss": 1.9966, "step": 17012500 }, { "epoch": 49.25, "learning_rate": 2.5386021153668027e-05, "loss": 1.9742, "step": 17013000 }, { "epoch": 49.25, "learning_rate": 2.5385298953316043e-05, "loss": 1.9769, "step": 17013500 }, { "epoch": 49.25, "learning_rate": 2.538457530566877e-05, "loss": 1.9933, "step": 17014000 }, { "epoch": 49.25, "learning_rate": 2.538385165802149e-05, "loss": 1.9838, "step": 17014500 }, { "epoch": 49.25, "learning_rate": 2.5383129457669506e-05, "loss": 2.0115, "step": 17015000 }, { "epoch": 49.25, "learning_rate": 2.5382405810022232e-05, "loss": 1.9943, "step": 17015500 }, { "epoch": 49.25, "learning_rate": 2.5381682162374954e-05, "loss": 1.9974, "step": 17016000 }, { "epoch": 49.26, "learning_rate": 2.5380958514727677e-05, "loss": 2.0184, "step": 17016500 }, { "epoch": 49.26, "learning_rate": 2.53802348670804e-05, "loss": 1.9884, "step": 17017000 }, { "epoch": 49.26, "learning_rate": 2.537951121943312e-05, "loss": 1.9876, "step": 17017500 }, { "epoch": 49.26, "learning_rate": 2.537878757178585e-05, "loss": 2.002, "step": 17018000 }, { "epoch": 49.26, "learning_rate": 2.5378063924138572e-05, "loss": 2.0026, "step": 17018500 }, { "epoch": 49.26, "learning_rate": 2.5377340276491294e-05, "loss": 1.9863, "step": 17019000 }, { "epoch": 49.26, "learning_rate": 2.537661662884402e-05, "loss": 1.9871, "step": 17019500 }, { "epoch": 49.27, "learning_rate": 2.5375892981196742e-05, "loss": 1.9942, "step": 17020000 }, { "epoch": 49.27, "learning_rate": 2.5375169333549465e-05, "loss": 2.0147, "step": 17020500 }, { "epoch": 49.27, "learning_rate": 2.5374445685902187e-05, "loss": 1.9802, "step": 17021000 }, { "epoch": 49.27, "learning_rate": 2.537372203825491e-05, "loss": 1.9684, "step": 17021500 }, { "epoch": 49.27, "learning_rate": 2.5372999837902928e-05, "loss": 1.9884, "step": 17022000 }, { "epoch": 49.27, "learning_rate": 2.537227619025565e-05, "loss": 1.9845, "step": 17022500 }, { "epoch": 49.27, "learning_rate": 2.537155398990367e-05, "loss": 1.9777, "step": 17023000 }, { "epoch": 49.28, "learning_rate": 2.537083034225639e-05, "loss": 1.9924, "step": 17023500 }, { "epoch": 49.28, "learning_rate": 2.5370106694609114e-05, "loss": 2.0218, "step": 17024000 }, { "epoch": 49.28, "learning_rate": 2.5369383046961836e-05, "loss": 2.0135, "step": 17024500 }, { "epoch": 49.28, "learning_rate": 2.5368659399314565e-05, "loss": 2.0057, "step": 17025000 }, { "epoch": 49.28, "learning_rate": 2.5367935751667287e-05, "loss": 2.0018, "step": 17025500 }, { "epoch": 49.28, "learning_rate": 2.536721210402001e-05, "loss": 2.0086, "step": 17026000 }, { "epoch": 49.28, "learning_rate": 2.5366488456372735e-05, "loss": 2.0072, "step": 17026500 }, { "epoch": 49.29, "learning_rate": 2.5365764808725457e-05, "loss": 1.9951, "step": 17027000 }, { "epoch": 49.29, "learning_rate": 2.536504116107818e-05, "loss": 2.0334, "step": 17027500 }, { "epoch": 49.29, "learning_rate": 2.5364318960726195e-05, "loss": 2.0056, "step": 17028000 }, { "epoch": 49.29, "learning_rate": 2.536359531307892e-05, "loss": 1.9953, "step": 17028500 }, { "epoch": 49.29, "learning_rate": 2.5362873112726936e-05, "loss": 2.0111, "step": 17029000 }, { "epoch": 49.29, "learning_rate": 2.536214946507966e-05, "loss": 1.9915, "step": 17029500 }, { "epoch": 49.29, "learning_rate": 2.5361425817432384e-05, "loss": 2.0007, "step": 17030000 }, { "epoch": 49.3, "learning_rate": 2.5360702169785106e-05, "loss": 2.0196, "step": 17030500 }, { "epoch": 49.3, "learning_rate": 2.535997852213783e-05, "loss": 2.0158, "step": 17031000 }, { "epoch": 49.3, "learning_rate": 2.535925487449055e-05, "loss": 1.9766, "step": 17031500 }, { "epoch": 49.3, "learning_rate": 2.5358531226843273e-05, "loss": 1.988, "step": 17032000 }, { "epoch": 49.3, "learning_rate": 2.5357809026491292e-05, "loss": 2.0046, "step": 17032500 }, { "epoch": 49.3, "learning_rate": 2.535708537884402e-05, "loss": 1.9674, "step": 17033000 }, { "epoch": 49.31, "learning_rate": 2.5356361731196743e-05, "loss": 1.9983, "step": 17033500 }, { "epoch": 49.31, "learning_rate": 2.5355638083549465e-05, "loss": 2.0105, "step": 17034000 }, { "epoch": 49.31, "learning_rate": 2.5354914435902188e-05, "loss": 2.011, "step": 17034500 }, { "epoch": 49.31, "learning_rate": 2.535419078825491e-05, "loss": 1.9966, "step": 17035000 }, { "epoch": 49.31, "learning_rate": 2.5353467140607635e-05, "loss": 1.9979, "step": 17035500 }, { "epoch": 49.31, "learning_rate": 2.5352743492960358e-05, "loss": 1.9819, "step": 17036000 }, { "epoch": 49.31, "learning_rate": 2.535201984531308e-05, "loss": 1.9765, "step": 17036500 }, { "epoch": 49.32, "learning_rate": 2.5351296197665802e-05, "loss": 1.9823, "step": 17037000 }, { "epoch": 49.32, "learning_rate": 2.535057399731382e-05, "loss": 1.9992, "step": 17037500 }, { "epoch": 49.32, "learning_rate": 2.5349850349666543e-05, "loss": 1.9907, "step": 17038000 }, { "epoch": 49.32, "learning_rate": 2.534912814931456e-05, "loss": 2.0134, "step": 17038500 }, { "epoch": 49.32, "learning_rate": 2.5348404501667284e-05, "loss": 1.9793, "step": 17039000 }, { "epoch": 49.32, "learning_rate": 2.53476823013153e-05, "loss": 1.9995, "step": 17039500 }, { "epoch": 49.32, "learning_rate": 2.5346958653668022e-05, "loss": 2.0073, "step": 17040000 }, { "epoch": 49.33, "learning_rate": 2.534623500602075e-05, "loss": 1.9914, "step": 17040500 }, { "epoch": 49.33, "learning_rate": 2.5345511358373474e-05, "loss": 1.9896, "step": 17041000 }, { "epoch": 49.33, "learning_rate": 2.53447877107262e-05, "loss": 2.0144, "step": 17041500 }, { "epoch": 49.33, "learning_rate": 2.534406406307892e-05, "loss": 2.0124, "step": 17042000 }, { "epoch": 49.33, "learning_rate": 2.5343340415431644e-05, "loss": 2.0135, "step": 17042500 }, { "epoch": 49.33, "learning_rate": 2.5342616767784366e-05, "loss": 2.0084, "step": 17043000 }, { "epoch": 49.33, "learning_rate": 2.5341893120137088e-05, "loss": 2.0038, "step": 17043500 }, { "epoch": 49.34, "learning_rate": 2.5341170919785107e-05, "loss": 1.9882, "step": 17044000 }, { "epoch": 49.34, "learning_rate": 2.534044727213783e-05, "loss": 1.9871, "step": 17044500 }, { "epoch": 49.34, "learning_rate": 2.533972362449055e-05, "loss": 1.9878, "step": 17045000 }, { "epoch": 49.34, "learning_rate": 2.5338999976843274e-05, "loss": 2.0148, "step": 17045500 }, { "epoch": 49.34, "learning_rate": 2.5338276329196e-05, "loss": 1.9808, "step": 17046000 }, { "epoch": 49.34, "learning_rate": 2.533755268154872e-05, "loss": 1.9965, "step": 17046500 }, { "epoch": 49.34, "learning_rate": 2.5336829033901444e-05, "loss": 1.9995, "step": 17047000 }, { "epoch": 49.35, "learning_rate": 2.5336106833549466e-05, "loss": 2.0137, "step": 17047500 }, { "epoch": 49.35, "learning_rate": 2.533538318590219e-05, "loss": 2.0017, "step": 17048000 }, { "epoch": 49.35, "learning_rate": 2.5334659538254914e-05, "loss": 1.9902, "step": 17048500 }, { "epoch": 49.35, "learning_rate": 2.5333935890607636e-05, "loss": 2.0068, "step": 17049000 }, { "epoch": 49.35, "learning_rate": 2.533321224296036e-05, "loss": 2.0103, "step": 17049500 }, { "epoch": 49.35, "learning_rate": 2.533248859531308e-05, "loss": 2.0107, "step": 17050000 }, { "epoch": 49.35, "learning_rate": 2.5331764947665803e-05, "loss": 2.0139, "step": 17050500 }, { "epoch": 49.36, "learning_rate": 2.5331042747313822e-05, "loss": 2.0124, "step": 17051000 }, { "epoch": 49.36, "learning_rate": 2.5330319099666544e-05, "loss": 2.0031, "step": 17051500 }, { "epoch": 49.36, "learning_rate": 2.5329595452019266e-05, "loss": 2.0019, "step": 17052000 }, { "epoch": 49.36, "learning_rate": 2.532887180437199e-05, "loss": 1.9875, "step": 17052500 }, { "epoch": 49.36, "learning_rate": 2.5328148156724714e-05, "loss": 1.9992, "step": 17053000 }, { "epoch": 49.36, "learning_rate": 2.5327424509077436e-05, "loss": 1.9908, "step": 17053500 }, { "epoch": 49.36, "learning_rate": 2.532670086143016e-05, "loss": 2.0141, "step": 17054000 }, { "epoch": 49.37, "learning_rate": 2.532597721378288e-05, "loss": 2.0042, "step": 17054500 }, { "epoch": 49.37, "learning_rate": 2.5325255013430903e-05, "loss": 1.9969, "step": 17055000 }, { "epoch": 49.37, "learning_rate": 2.5324532813078922e-05, "loss": 2.0081, "step": 17055500 }, { "epoch": 49.37, "learning_rate": 2.5323809165431644e-05, "loss": 1.9859, "step": 17056000 }, { "epoch": 49.37, "learning_rate": 2.5323085517784367e-05, "loss": 2.0096, "step": 17056500 }, { "epoch": 49.37, "learning_rate": 2.532236187013709e-05, "loss": 2.0114, "step": 17057000 }, { "epoch": 49.37, "learning_rate": 2.5321638222489814e-05, "loss": 2.0047, "step": 17057500 }, { "epoch": 49.38, "learning_rate": 2.5320914574842537e-05, "loss": 1.9712, "step": 17058000 }, { "epoch": 49.38, "learning_rate": 2.532019092719526e-05, "loss": 2.0086, "step": 17058500 }, { "epoch": 49.38, "learning_rate": 2.531946727954798e-05, "loss": 1.998, "step": 17059000 }, { "epoch": 49.38, "learning_rate": 2.5318745079196e-05, "loss": 2.0056, "step": 17059500 }, { "epoch": 49.38, "learning_rate": 2.5318021431548722e-05, "loss": 2.0157, "step": 17060000 }, { "epoch": 49.38, "learning_rate": 2.5317297783901445e-05, "loss": 2.0048, "step": 17060500 }, { "epoch": 49.38, "learning_rate": 2.5316574136254167e-05, "loss": 1.9836, "step": 17061000 }, { "epoch": 49.39, "learning_rate": 2.531585048860689e-05, "loss": 1.9898, "step": 17061500 }, { "epoch": 49.39, "learning_rate": 2.5315126840959615e-05, "loss": 2.0078, "step": 17062000 }, { "epoch": 49.39, "learning_rate": 2.531440319331234e-05, "loss": 2.0107, "step": 17062500 }, { "epoch": 49.39, "learning_rate": 2.5313679545665066e-05, "loss": 2.0103, "step": 17063000 }, { "epoch": 49.39, "learning_rate": 2.531295734531308e-05, "loss": 1.9947, "step": 17063500 }, { "epoch": 49.39, "learning_rate": 2.53122351449611e-05, "loss": 1.9724, "step": 17064000 }, { "epoch": 49.39, "learning_rate": 2.5311511497313823e-05, "loss": 1.9911, "step": 17064500 }, { "epoch": 49.4, "learning_rate": 2.5310787849666545e-05, "loss": 1.9932, "step": 17065000 }, { "epoch": 49.4, "learning_rate": 2.5310064202019267e-05, "loss": 1.9863, "step": 17065500 }, { "epoch": 49.4, "learning_rate": 2.5309342001667286e-05, "loss": 2.0376, "step": 17066000 }, { "epoch": 49.4, "learning_rate": 2.53086198013153e-05, "loss": 1.9961, "step": 17066500 }, { "epoch": 49.4, "learning_rate": 2.5307896153668027e-05, "loss": 1.9949, "step": 17067000 }, { "epoch": 49.4, "learning_rate": 2.530717250602075e-05, "loss": 2.0141, "step": 17067500 }, { "epoch": 49.4, "learning_rate": 2.5306448858373472e-05, "loss": 2.029, "step": 17068000 }, { "epoch": 49.41, "learning_rate": 2.5305725210726194e-05, "loss": 1.9913, "step": 17068500 }, { "epoch": 49.41, "learning_rate": 2.5305001563078916e-05, "loss": 1.9645, "step": 17069000 }, { "epoch": 49.41, "learning_rate": 2.530427791543164e-05, "loss": 1.9895, "step": 17069500 }, { "epoch": 49.41, "learning_rate": 2.5303554267784367e-05, "loss": 2.0046, "step": 17070000 }, { "epoch": 49.41, "learning_rate": 2.5302832067432386e-05, "loss": 2.0095, "step": 17070500 }, { "epoch": 49.41, "learning_rate": 2.530210841978511e-05, "loss": 2.0035, "step": 17071000 }, { "epoch": 49.42, "learning_rate": 2.530138477213783e-05, "loss": 1.9779, "step": 17071500 }, { "epoch": 49.42, "learning_rate": 2.530066257178585e-05, "loss": 1.9904, "step": 17072000 }, { "epoch": 49.42, "learning_rate": 2.5299938924138572e-05, "loss": 2.0064, "step": 17072500 }, { "epoch": 49.42, "learning_rate": 2.5299215276491294e-05, "loss": 1.9821, "step": 17073000 }, { "epoch": 49.42, "learning_rate": 2.5298491628844016e-05, "loss": 2.0477, "step": 17073500 }, { "epoch": 49.42, "learning_rate": 2.5297767981196742e-05, "loss": 1.9921, "step": 17074000 }, { "epoch": 49.42, "learning_rate": 2.5297044333549464e-05, "loss": 2.0153, "step": 17074500 }, { "epoch": 49.43, "learning_rate": 2.5296320685902187e-05, "loss": 1.979, "step": 17075000 }, { "epoch": 49.43, "learning_rate": 2.529559703825491e-05, "loss": 1.9973, "step": 17075500 }, { "epoch": 49.43, "learning_rate": 2.529487339060763e-05, "loss": 1.9974, "step": 17076000 }, { "epoch": 49.43, "learning_rate": 2.5294149742960353e-05, "loss": 2.0052, "step": 17076500 }, { "epoch": 49.43, "learning_rate": 2.529342609531308e-05, "loss": 1.9979, "step": 17077000 }, { "epoch": 49.43, "learning_rate": 2.5292702447665805e-05, "loss": 2.0116, "step": 17077500 }, { "epoch": 49.43, "learning_rate": 2.5291980247313823e-05, "loss": 1.9885, "step": 17078000 }, { "epoch": 49.44, "learning_rate": 2.5291256599666546e-05, "loss": 1.9838, "step": 17078500 }, { "epoch": 49.44, "learning_rate": 2.5290532952019268e-05, "loss": 2.0033, "step": 17079000 }, { "epoch": 49.44, "learning_rate": 2.5289809304371994e-05, "loss": 1.9949, "step": 17079500 }, { "epoch": 49.44, "learning_rate": 2.5289085656724716e-05, "loss": 2.0188, "step": 17080000 }, { "epoch": 49.44, "learning_rate": 2.5288362009077438e-05, "loss": 2.0108, "step": 17080500 }, { "epoch": 49.44, "learning_rate": 2.528763836143016e-05, "loss": 2.0071, "step": 17081000 }, { "epoch": 49.44, "learning_rate": 2.5286914713782882e-05, "loss": 1.9814, "step": 17081500 }, { "epoch": 49.45, "learning_rate": 2.52861925134309e-05, "loss": 1.9911, "step": 17082000 }, { "epoch": 49.45, "learning_rate": 2.5285470313078917e-05, "loss": 2.0101, "step": 17082500 }, { "epoch": 49.45, "learning_rate": 2.5284746665431643e-05, "loss": 1.992, "step": 17083000 }, { "epoch": 49.45, "learning_rate": 2.5284023017784365e-05, "loss": 1.9969, "step": 17083500 }, { "epoch": 49.45, "learning_rate": 2.5283299370137087e-05, "loss": 2.0026, "step": 17084000 }, { "epoch": 49.45, "learning_rate": 2.528257572248981e-05, "loss": 2.0298, "step": 17084500 }, { "epoch": 49.45, "learning_rate": 2.528185207484254e-05, "loss": 1.9949, "step": 17085000 }, { "epoch": 49.46, "learning_rate": 2.528112842719526e-05, "loss": 1.9894, "step": 17085500 }, { "epoch": 49.46, "learning_rate": 2.5280404779547983e-05, "loss": 1.9827, "step": 17086000 }, { "epoch": 49.46, "learning_rate": 2.5279681131900705e-05, "loss": 1.9812, "step": 17086500 }, { "epoch": 49.46, "learning_rate": 2.527895748425343e-05, "loss": 2.019, "step": 17087000 }, { "epoch": 49.46, "learning_rate": 2.5278233836606153e-05, "loss": 2.0025, "step": 17087500 }, { "epoch": 49.46, "learning_rate": 2.5277510188958875e-05, "loss": 2.0236, "step": 17088000 }, { "epoch": 49.46, "learning_rate": 2.5276787988606894e-05, "loss": 1.9979, "step": 17088500 }, { "epoch": 49.47, "learning_rate": 2.5276064340959616e-05, "loss": 2.0137, "step": 17089000 }, { "epoch": 49.47, "learning_rate": 2.5275342140607632e-05, "loss": 1.9983, "step": 17089500 }, { "epoch": 49.47, "learning_rate": 2.5274618492960357e-05, "loss": 2.0185, "step": 17090000 }, { "epoch": 49.47, "learning_rate": 2.527389484531308e-05, "loss": 1.9768, "step": 17090500 }, { "epoch": 49.47, "learning_rate": 2.5273171197665802e-05, "loss": 2.0123, "step": 17091000 }, { "epoch": 49.47, "learning_rate": 2.5272447550018524e-05, "loss": 2.0137, "step": 17091500 }, { "epoch": 49.47, "learning_rate": 2.5271723902371246e-05, "loss": 2.0162, "step": 17092000 }, { "epoch": 49.48, "learning_rate": 2.5271000254723975e-05, "loss": 1.9708, "step": 17092500 }, { "epoch": 49.48, "learning_rate": 2.5270276607076698e-05, "loss": 1.9997, "step": 17093000 }, { "epoch": 49.48, "learning_rate": 2.526955295942942e-05, "loss": 2.0134, "step": 17093500 }, { "epoch": 49.48, "learning_rate": 2.5268829311782145e-05, "loss": 1.9853, "step": 17094000 }, { "epoch": 49.48, "learning_rate": 2.5268105664134868e-05, "loss": 1.9868, "step": 17094500 }, { "epoch": 49.48, "learning_rate": 2.5267383463782883e-05, "loss": 1.9923, "step": 17095000 }, { "epoch": 49.48, "learning_rate": 2.5266659816135606e-05, "loss": 2.0049, "step": 17095500 }, { "epoch": 49.49, "learning_rate": 2.5265937615783624e-05, "loss": 1.9896, "step": 17096000 }, { "epoch": 49.49, "learning_rate": 2.5265213968136347e-05, "loss": 1.9926, "step": 17096500 }, { "epoch": 49.49, "learning_rate": 2.526449032048907e-05, "loss": 2.0012, "step": 17097000 }, { "epoch": 49.49, "learning_rate": 2.5263766672841795e-05, "loss": 2.0224, "step": 17097500 }, { "epoch": 49.49, "learning_rate": 2.5263045919785107e-05, "loss": 2.0277, "step": 17098000 }, { "epoch": 49.49, "learning_rate": 2.526232227213783e-05, "loss": 2.0084, "step": 17098500 }, { "epoch": 49.49, "learning_rate": 2.5261600071785845e-05, "loss": 2.0042, "step": 17099000 }, { "epoch": 49.5, "learning_rate": 2.526087642413857e-05, "loss": 1.9889, "step": 17099500 }, { "epoch": 49.5, "learning_rate": 2.5260152776491292e-05, "loss": 2.0, "step": 17100000 }, { "epoch": 49.5, "learning_rate": 2.525942912884402e-05, "loss": 1.9969, "step": 17100500 }, { "epoch": 49.5, "learning_rate": 2.5258705481196744e-05, "loss": 2.0208, "step": 17101000 }, { "epoch": 49.5, "learning_rate": 2.5257981833549466e-05, "loss": 1.9853, "step": 17101500 }, { "epoch": 49.5, "learning_rate": 2.5257258185902188e-05, "loss": 1.9803, "step": 17102000 }, { "epoch": 49.5, "learning_rate": 2.525653453825491e-05, "loss": 1.9966, "step": 17102500 }, { "epoch": 49.51, "learning_rate": 2.5255810890607633e-05, "loss": 2.0139, "step": 17103000 }, { "epoch": 49.51, "learning_rate": 2.5255087242960358e-05, "loss": 2.0193, "step": 17103500 }, { "epoch": 49.51, "learning_rate": 2.525436359531308e-05, "loss": 2.018, "step": 17104000 }, { "epoch": 49.51, "learning_rate": 2.5253639947665803e-05, "loss": 1.9754, "step": 17104500 }, { "epoch": 49.51, "learning_rate": 2.5252916300018525e-05, "loss": 1.975, "step": 17105000 }, { "epoch": 49.51, "learning_rate": 2.5252192652371247e-05, "loss": 2.0106, "step": 17105500 }, { "epoch": 49.51, "learning_rate": 2.5251470452019266e-05, "loss": 2.0064, "step": 17106000 }, { "epoch": 49.52, "learning_rate": 2.525074825166728e-05, "loss": 2.0203, "step": 17106500 }, { "epoch": 49.52, "learning_rate": 2.5250024604020007e-05, "loss": 2.0046, "step": 17107000 }, { "epoch": 49.52, "learning_rate": 2.5249300956372733e-05, "loss": 2.0135, "step": 17107500 }, { "epoch": 49.52, "learning_rate": 2.524857730872546e-05, "loss": 2.0038, "step": 17108000 }, { "epoch": 49.52, "learning_rate": 2.524785366107818e-05, "loss": 1.995, "step": 17108500 }, { "epoch": 49.52, "learning_rate": 2.5247131460726196e-05, "loss": 1.9867, "step": 17109000 }, { "epoch": 49.52, "learning_rate": 2.5246409260374215e-05, "loss": 1.9985, "step": 17109500 }, { "epoch": 49.53, "learning_rate": 2.5245685612726938e-05, "loss": 2.0139, "step": 17110000 }, { "epoch": 49.53, "learning_rate": 2.524496196507966e-05, "loss": 1.9932, "step": 17110500 }, { "epoch": 49.53, "learning_rate": 2.5244238317432385e-05, "loss": 2.0113, "step": 17111000 }, { "epoch": 49.53, "learning_rate": 2.5243514669785108e-05, "loss": 2.0036, "step": 17111500 }, { "epoch": 49.53, "learning_rate": 2.524279102213783e-05, "loss": 2.0059, "step": 17112000 }, { "epoch": 49.53, "learning_rate": 2.5242068821785845e-05, "loss": 2.001, "step": 17112500 }, { "epoch": 49.54, "learning_rate": 2.524134517413857e-05, "loss": 2.0134, "step": 17113000 }, { "epoch": 49.54, "learning_rate": 2.5240622973786587e-05, "loss": 1.9815, "step": 17113500 }, { "epoch": 49.54, "learning_rate": 2.523989932613931e-05, "loss": 2.0046, "step": 17114000 }, { "epoch": 49.54, "learning_rate": 2.5239175678492034e-05, "loss": 1.9996, "step": 17114500 }, { "epoch": 49.54, "learning_rate": 2.5238452030844757e-05, "loss": 1.9902, "step": 17115000 }, { "epoch": 49.54, "learning_rate": 2.5237728383197486e-05, "loss": 2.0029, "step": 17115500 }, { "epoch": 49.54, "learning_rate": 2.5237004735550208e-05, "loss": 2.0206, "step": 17116000 }, { "epoch": 49.55, "learning_rate": 2.523628108790293e-05, "loss": 2.0135, "step": 17116500 }, { "epoch": 49.55, "learning_rate": 2.5235557440255652e-05, "loss": 2.0353, "step": 17117000 }, { "epoch": 49.55, "learning_rate": 2.5234833792608375e-05, "loss": 1.9984, "step": 17117500 }, { "epoch": 49.55, "learning_rate": 2.5234110144961097e-05, "loss": 1.9927, "step": 17118000 }, { "epoch": 49.55, "learning_rate": 2.5233386497313822e-05, "loss": 2.0047, "step": 17118500 }, { "epoch": 49.55, "learning_rate": 2.5232662849666545e-05, "loss": 2.0135, "step": 17119000 }, { "epoch": 49.55, "learning_rate": 2.5231939202019267e-05, "loss": 1.996, "step": 17119500 }, { "epoch": 49.56, "learning_rate": 2.523121555437199e-05, "loss": 2.0119, "step": 17120000 }, { "epoch": 49.56, "learning_rate": 2.523049190672471e-05, "loss": 2.0009, "step": 17120500 }, { "epoch": 49.56, "learning_rate": 2.5229768259077437e-05, "loss": 2.0177, "step": 17121000 }, { "epoch": 49.56, "learning_rate": 2.522904461143016e-05, "loss": 2.0089, "step": 17121500 }, { "epoch": 49.56, "learning_rate": 2.522832385837347e-05, "loss": 1.9958, "step": 17122000 }, { "epoch": 49.56, "learning_rate": 2.52276002107262e-05, "loss": 2.009, "step": 17122500 }, { "epoch": 49.56, "learning_rate": 2.5226876563078923e-05, "loss": 2.007, "step": 17123000 }, { "epoch": 49.57, "learning_rate": 2.5226152915431645e-05, "loss": 2.0198, "step": 17123500 }, { "epoch": 49.57, "learning_rate": 2.5225429267784367e-05, "loss": 2.0102, "step": 17124000 }, { "epoch": 49.57, "learning_rate": 2.522470562013709e-05, "loss": 2.0018, "step": 17124500 }, { "epoch": 49.57, "learning_rate": 2.522398197248981e-05, "loss": 2.0111, "step": 17125000 }, { "epoch": 49.57, "learning_rate": 2.5223258324842537e-05, "loss": 2.0134, "step": 17125500 }, { "epoch": 49.57, "learning_rate": 2.522253467719526e-05, "loss": 1.9996, "step": 17126000 }, { "epoch": 49.57, "learning_rate": 2.5221811029547982e-05, "loss": 1.9889, "step": 17126500 }, { "epoch": 49.58, "learning_rate": 2.5221087381900704e-05, "loss": 1.9895, "step": 17127000 }, { "epoch": 49.58, "learning_rate": 2.5220365181548723e-05, "loss": 2.003, "step": 17127500 }, { "epoch": 49.58, "learning_rate": 2.5219641533901445e-05, "loss": 2.0164, "step": 17128000 }, { "epoch": 49.58, "learning_rate": 2.5218917886254167e-05, "loss": 2.0049, "step": 17128500 }, { "epoch": 49.58, "learning_rate": 2.521819423860689e-05, "loss": 1.9764, "step": 17129000 }, { "epoch": 49.58, "learning_rate": 2.5217470590959612e-05, "loss": 1.9928, "step": 17129500 }, { "epoch": 49.58, "learning_rate": 2.521674694331234e-05, "loss": 1.9901, "step": 17130000 }, { "epoch": 49.59, "learning_rate": 2.5216023295665063e-05, "loss": 2.0011, "step": 17130500 }, { "epoch": 49.59, "learning_rate": 2.521529964801779e-05, "loss": 2.0113, "step": 17131000 }, { "epoch": 49.59, "learning_rate": 2.521457600037051e-05, "loss": 1.9957, "step": 17131500 }, { "epoch": 49.59, "learning_rate": 2.5213852352723233e-05, "loss": 2.0171, "step": 17132000 }, { "epoch": 49.59, "learning_rate": 2.5213128705075955e-05, "loss": 2.0022, "step": 17132500 }, { "epoch": 49.59, "learning_rate": 2.5212406504723974e-05, "loss": 1.9972, "step": 17133000 }, { "epoch": 49.59, "learning_rate": 2.5211682857076697e-05, "loss": 2.0201, "step": 17133500 }, { "epoch": 49.6, "learning_rate": 2.5210960656724712e-05, "loss": 1.9908, "step": 17134000 }, { "epoch": 49.6, "learning_rate": 2.5210237009077438e-05, "loss": 2.0076, "step": 17134500 }, { "epoch": 49.6, "learning_rate": 2.520951336143016e-05, "loss": 2.0088, "step": 17135000 }, { "epoch": 49.6, "learning_rate": 2.5208791161078176e-05, "loss": 1.9947, "step": 17135500 }, { "epoch": 49.6, "learning_rate": 2.52080675134309e-05, "loss": 1.9786, "step": 17136000 }, { "epoch": 49.6, "learning_rate": 2.5207343865783623e-05, "loss": 1.982, "step": 17136500 }, { "epoch": 49.6, "learning_rate": 2.5206620218136346e-05, "loss": 2.0174, "step": 17137000 }, { "epoch": 49.61, "learning_rate": 2.5205896570489075e-05, "loss": 1.992, "step": 17137500 }, { "epoch": 49.61, "learning_rate": 2.5205172922841797e-05, "loss": 2.0168, "step": 17138000 }, { "epoch": 49.61, "learning_rate": 2.520444927519452e-05, "loss": 2.0113, "step": 17138500 }, { "epoch": 49.61, "learning_rate": 2.5203727074842538e-05, "loss": 1.9923, "step": 17139000 }, { "epoch": 49.61, "learning_rate": 2.520300342719526e-05, "loss": 1.9978, "step": 17139500 }, { "epoch": 49.61, "learning_rate": 2.5202279779547983e-05, "loss": 2.0138, "step": 17140000 }, { "epoch": 49.61, "learning_rate": 2.5201556131900705e-05, "loss": 2.0056, "step": 17140500 }, { "epoch": 49.62, "learning_rate": 2.5200832484253427e-05, "loss": 2.0002, "step": 17141000 }, { "epoch": 49.62, "learning_rate": 2.5200108836606153e-05, "loss": 2.0206, "step": 17141500 }, { "epoch": 49.62, "learning_rate": 2.5199385188958875e-05, "loss": 1.9865, "step": 17142000 }, { "epoch": 49.62, "learning_rate": 2.519866298860689e-05, "loss": 2.0136, "step": 17142500 }, { "epoch": 49.62, "learning_rate": 2.5197939340959613e-05, "loss": 1.9972, "step": 17143000 }, { "epoch": 49.62, "learning_rate": 2.519721714060763e-05, "loss": 1.9926, "step": 17143500 }, { "epoch": 49.62, "learning_rate": 2.5196493492960354e-05, "loss": 2.0165, "step": 17144000 }, { "epoch": 49.63, "learning_rate": 2.5195769845313076e-05, "loss": 1.993, "step": 17144500 }, { "epoch": 49.63, "learning_rate": 2.5195046197665805e-05, "loss": 2.0149, "step": 17145000 }, { "epoch": 49.63, "learning_rate": 2.5194322550018527e-05, "loss": 2.006, "step": 17145500 }, { "epoch": 49.63, "learning_rate": 2.5193598902371253e-05, "loss": 2.0373, "step": 17146000 }, { "epoch": 49.63, "learning_rate": 2.5192875254723975e-05, "loss": 2.0059, "step": 17146500 }, { "epoch": 49.63, "learning_rate": 2.5192151607076697e-05, "loss": 2.0046, "step": 17147000 }, { "epoch": 49.63, "learning_rate": 2.5191429406724716e-05, "loss": 2.0093, "step": 17147500 }, { "epoch": 49.64, "learning_rate": 2.519070575907744e-05, "loss": 2.0185, "step": 17148000 }, { "epoch": 49.64, "learning_rate": 2.518998211143016e-05, "loss": 2.0074, "step": 17148500 }, { "epoch": 49.64, "learning_rate": 2.5189258463782883e-05, "loss": 1.9889, "step": 17149000 }, { "epoch": 49.64, "learning_rate": 2.5188534816135605e-05, "loss": 1.9943, "step": 17149500 }, { "epoch": 49.64, "learning_rate": 2.5187811168488328e-05, "loss": 2.0241, "step": 17150000 }, { "epoch": 49.64, "learning_rate": 2.5187087520841053e-05, "loss": 1.9832, "step": 17150500 }, { "epoch": 49.65, "learning_rate": 2.5186363873193775e-05, "loss": 2.0023, "step": 17151000 }, { "epoch": 49.65, "learning_rate": 2.5185640225546498e-05, "loss": 2.0074, "step": 17151500 }, { "epoch": 49.65, "learning_rate": 2.5184916577899227e-05, "loss": 1.9895, "step": 17152000 }, { "epoch": 49.65, "learning_rate": 2.518419293025195e-05, "loss": 2.0149, "step": 17152500 }, { "epoch": 49.65, "learning_rate": 2.518346928260467e-05, "loss": 1.9795, "step": 17153000 }, { "epoch": 49.65, "learning_rate": 2.518274708225269e-05, "loss": 1.9969, "step": 17153500 }, { "epoch": 49.65, "learning_rate": 2.5182023434605412e-05, "loss": 2.0187, "step": 17154000 }, { "epoch": 49.66, "learning_rate": 2.5181299786958135e-05, "loss": 1.9854, "step": 17154500 }, { "epoch": 49.66, "learning_rate": 2.5180576139310857e-05, "loss": 1.9972, "step": 17155000 }, { "epoch": 49.66, "learning_rate": 2.5179853938958876e-05, "loss": 2.0164, "step": 17155500 }, { "epoch": 49.66, "learning_rate": 2.5179130291311598e-05, "loss": 1.9999, "step": 17156000 }, { "epoch": 49.66, "learning_rate": 2.5178408090959617e-05, "loss": 2.0081, "step": 17156500 }, { "epoch": 49.66, "learning_rate": 2.517768444331234e-05, "loss": 2.0098, "step": 17157000 }, { "epoch": 49.66, "learning_rate": 2.517696079566506e-05, "loss": 1.9937, "step": 17157500 }, { "epoch": 49.67, "learning_rate": 2.5176237148017784e-05, "loss": 2.005, "step": 17158000 }, { "epoch": 49.67, "learning_rate": 2.5175514947665803e-05, "loss": 1.995, "step": 17158500 }, { "epoch": 49.67, "learning_rate": 2.5174791300018525e-05, "loss": 2.0228, "step": 17159000 }, { "epoch": 49.67, "learning_rate": 2.5174067652371247e-05, "loss": 2.012, "step": 17159500 }, { "epoch": 49.67, "learning_rate": 2.5173344004723976e-05, "loss": 2.0139, "step": 17160000 }, { "epoch": 49.67, "learning_rate": 2.517262180437199e-05, "loss": 2.012, "step": 17160500 }, { "epoch": 49.67, "learning_rate": 2.5171898156724717e-05, "loss": 2.011, "step": 17161000 }, { "epoch": 49.68, "learning_rate": 2.5171175956372733e-05, "loss": 2.0123, "step": 17161500 }, { "epoch": 49.68, "learning_rate": 2.5170452308725455e-05, "loss": 1.9957, "step": 17162000 }, { "epoch": 49.68, "learning_rate": 2.516972866107818e-05, "loss": 2.0077, "step": 17162500 }, { "epoch": 49.68, "learning_rate": 2.5169005013430903e-05, "loss": 2.0187, "step": 17163000 }, { "epoch": 49.68, "learning_rate": 2.516828281307892e-05, "loss": 2.0028, "step": 17163500 }, { "epoch": 49.68, "learning_rate": 2.516755916543164e-05, "loss": 2.0191, "step": 17164000 }, { "epoch": 49.68, "learning_rate": 2.5166835517784366e-05, "loss": 2.0195, "step": 17164500 }, { "epoch": 49.69, "learning_rate": 2.516611187013709e-05, "loss": 2.0122, "step": 17165000 }, { "epoch": 49.69, "learning_rate": 2.516538822248981e-05, "loss": 1.9988, "step": 17165500 }, { "epoch": 49.69, "learning_rate": 2.5164664574842533e-05, "loss": 2.0053, "step": 17166000 }, { "epoch": 49.69, "learning_rate": 2.5163940927195255e-05, "loss": 2.0224, "step": 17166500 }, { "epoch": 49.69, "learning_rate": 2.5163218726843274e-05, "loss": 2.0096, "step": 17167000 }, { "epoch": 49.69, "learning_rate": 2.5162495079196003e-05, "loss": 2.0325, "step": 17167500 }, { "epoch": 49.69, "learning_rate": 2.5161771431548725e-05, "loss": 2.0172, "step": 17168000 }, { "epoch": 49.7, "learning_rate": 2.5161047783901448e-05, "loss": 1.9684, "step": 17168500 }, { "epoch": 49.7, "learning_rate": 2.516032413625417e-05, "loss": 2.0319, "step": 17169000 }, { "epoch": 49.7, "learning_rate": 2.5159600488606892e-05, "loss": 1.9945, "step": 17169500 }, { "epoch": 49.7, "learning_rate": 2.5158876840959618e-05, "loss": 2.0173, "step": 17170000 }, { "epoch": 49.7, "learning_rate": 2.515815319331234e-05, "loss": 2.0298, "step": 17170500 }, { "epoch": 49.7, "learning_rate": 2.5157429545665062e-05, "loss": 1.9965, "step": 17171000 }, { "epoch": 49.7, "learning_rate": 2.5156705898017784e-05, "loss": 2.0051, "step": 17171500 }, { "epoch": 49.71, "learning_rate": 2.5155982250370507e-05, "loss": 1.9961, "step": 17172000 }, { "epoch": 49.71, "learning_rate": 2.5155258602723232e-05, "loss": 1.997, "step": 17172500 }, { "epoch": 49.71, "learning_rate": 2.5154534955075954e-05, "loss": 2.0052, "step": 17173000 }, { "epoch": 49.71, "learning_rate": 2.5153814202019267e-05, "loss": 2.0135, "step": 17173500 }, { "epoch": 49.71, "learning_rate": 2.515309055437199e-05, "loss": 1.9972, "step": 17174000 }, { "epoch": 49.71, "learning_rate": 2.515236690672471e-05, "loss": 2.0242, "step": 17174500 }, { "epoch": 49.71, "learning_rate": 2.515164325907744e-05, "loss": 1.995, "step": 17175000 }, { "epoch": 49.72, "learning_rate": 2.5150919611430162e-05, "loss": 2.0093, "step": 17175500 }, { "epoch": 49.72, "learning_rate": 2.515019741107818e-05, "loss": 1.9937, "step": 17176000 }, { "epoch": 49.72, "learning_rate": 2.5149473763430904e-05, "loss": 2.0161, "step": 17176500 }, { "epoch": 49.72, "learning_rate": 2.5148750115783626e-05, "loss": 2.0111, "step": 17177000 }, { "epoch": 49.72, "learning_rate": 2.5148026468136348e-05, "loss": 2.0036, "step": 17177500 }, { "epoch": 49.72, "learning_rate": 2.514730282048907e-05, "loss": 2.0191, "step": 17178000 }, { "epoch": 49.72, "learning_rate": 2.5146579172841796e-05, "loss": 2.0152, "step": 17178500 }, { "epoch": 49.73, "learning_rate": 2.5145855525194518e-05, "loss": 1.9918, "step": 17179000 }, { "epoch": 49.73, "learning_rate": 2.514513187754724e-05, "loss": 2.0073, "step": 17179500 }, { "epoch": 49.73, "learning_rate": 2.5144408229899963e-05, "loss": 2.0184, "step": 17180000 }, { "epoch": 49.73, "learning_rate": 2.5143684582252685e-05, "loss": 2.0309, "step": 17180500 }, { "epoch": 49.73, "learning_rate": 2.5142960934605407e-05, "loss": 2.0242, "step": 17181000 }, { "epoch": 49.73, "learning_rate": 2.5142237286958133e-05, "loss": 1.9942, "step": 17181500 }, { "epoch": 49.73, "learning_rate": 2.514151363931086e-05, "loss": 1.9888, "step": 17182000 }, { "epoch": 49.74, "learning_rate": 2.5140789991663584e-05, "loss": 1.9844, "step": 17182500 }, { "epoch": 49.74, "learning_rate": 2.5140066344016306e-05, "loss": 1.9805, "step": 17183000 }, { "epoch": 49.74, "learning_rate": 2.5139344143664322e-05, "loss": 2.0171, "step": 17183500 }, { "epoch": 49.74, "learning_rate": 2.5138623390607634e-05, "loss": 1.9793, "step": 17184000 }, { "epoch": 49.74, "learning_rate": 2.513789974296036e-05, "loss": 1.9992, "step": 17184500 }, { "epoch": 49.74, "learning_rate": 2.5137177542608375e-05, "loss": 1.9955, "step": 17185000 }, { "epoch": 49.74, "learning_rate": 2.5136455342256394e-05, "loss": 2.0044, "step": 17185500 }, { "epoch": 49.75, "learning_rate": 2.5135731694609116e-05, "loss": 1.9941, "step": 17186000 }, { "epoch": 49.75, "learning_rate": 2.513500804696184e-05, "loss": 2.0118, "step": 17186500 }, { "epoch": 49.75, "learning_rate": 2.513428439931456e-05, "loss": 2.017, "step": 17187000 }, { "epoch": 49.75, "learning_rate": 2.5133560751667283e-05, "loss": 1.9888, "step": 17187500 }, { "epoch": 49.75, "learning_rate": 2.513283710402001e-05, "loss": 2.0233, "step": 17188000 }, { "epoch": 49.75, "learning_rate": 2.513211345637273e-05, "loss": 2.008, "step": 17188500 }, { "epoch": 49.76, "learning_rate": 2.5131389808725453e-05, "loss": 2.0155, "step": 17189000 }, { "epoch": 49.76, "learning_rate": 2.5130666161078175e-05, "loss": 1.991, "step": 17189500 }, { "epoch": 49.76, "learning_rate": 2.5129942513430904e-05, "loss": 1.9857, "step": 17190000 }, { "epoch": 49.76, "learning_rate": 2.5129218865783627e-05, "loss": 2.0267, "step": 17190500 }, { "epoch": 49.76, "learning_rate": 2.512849521813635e-05, "loss": 2.006, "step": 17191000 }, { "epoch": 49.76, "learning_rate": 2.512777157048907e-05, "loss": 2.0314, "step": 17191500 }, { "epoch": 49.76, "learning_rate": 2.5127047922841797e-05, "loss": 1.9939, "step": 17192000 }, { "epoch": 49.77, "learning_rate": 2.512632427519452e-05, "loss": 2.0021, "step": 17192500 }, { "epoch": 49.77, "learning_rate": 2.5125602074842535e-05, "loss": 1.9968, "step": 17193000 }, { "epoch": 49.77, "learning_rate": 2.512487842719526e-05, "loss": 2.0245, "step": 17193500 }, { "epoch": 49.77, "learning_rate": 2.5124154779547982e-05, "loss": 2.0078, "step": 17194000 }, { "epoch": 49.77, "learning_rate": 2.5123431131900705e-05, "loss": 1.9716, "step": 17194500 }, { "epoch": 49.77, "learning_rate": 2.5122707484253427e-05, "loss": 2.0013, "step": 17195000 }, { "epoch": 49.77, "learning_rate": 2.512198383660615e-05, "loss": 2.0056, "step": 17195500 }, { "epoch": 49.78, "learning_rate": 2.512126018895887e-05, "loss": 1.9977, "step": 17196000 }, { "epoch": 49.78, "learning_rate": 2.5120536541311597e-05, "loss": 1.9895, "step": 17196500 }, { "epoch": 49.78, "learning_rate": 2.5119812893664323e-05, "loss": 2.0022, "step": 17197000 }, { "epoch": 49.78, "learning_rate": 2.511909069331234e-05, "loss": 1.9957, "step": 17197500 }, { "epoch": 49.78, "learning_rate": 2.5118367045665064e-05, "loss": 2.0019, "step": 17198000 }, { "epoch": 49.78, "learning_rate": 2.5117643398017786e-05, "loss": 2.0101, "step": 17198500 }, { "epoch": 49.78, "learning_rate": 2.511691975037051e-05, "loss": 2.0117, "step": 17199000 }, { "epoch": 49.79, "learning_rate": 2.5116197550018527e-05, "loss": 1.9904, "step": 17199500 }, { "epoch": 49.79, "learning_rate": 2.511547390237125e-05, "loss": 2.0189, "step": 17200000 }, { "epoch": 49.79, "learning_rate": 2.511475025472397e-05, "loss": 1.9909, "step": 17200500 }, { "epoch": 49.79, "learning_rate": 2.5114026607076697e-05, "loss": 1.9984, "step": 17201000 }, { "epoch": 49.79, "learning_rate": 2.511330295942942e-05, "loss": 1.9783, "step": 17201500 }, { "epoch": 49.79, "learning_rate": 2.511257931178214e-05, "loss": 1.9991, "step": 17202000 }, { "epoch": 49.79, "learning_rate": 2.5111855664134864e-05, "loss": 2.0123, "step": 17202500 }, { "epoch": 49.8, "learning_rate": 2.5111132016487586e-05, "loss": 2.0075, "step": 17203000 }, { "epoch": 49.8, "learning_rate": 2.5110408368840312e-05, "loss": 2.01, "step": 17203500 }, { "epoch": 49.8, "learning_rate": 2.5109686168488327e-05, "loss": 1.9995, "step": 17204000 }, { "epoch": 49.8, "learning_rate": 2.5108963968136346e-05, "loss": 1.9837, "step": 17204500 }, { "epoch": 49.8, "learning_rate": 2.510824176778437e-05, "loss": 2.0044, "step": 17205000 }, { "epoch": 49.8, "learning_rate": 2.510751812013709e-05, "loss": 2.0081, "step": 17205500 }, { "epoch": 49.8, "learning_rate": 2.5106794472489813e-05, "loss": 1.9916, "step": 17206000 }, { "epoch": 49.81, "learning_rate": 2.5106070824842535e-05, "loss": 1.9999, "step": 17206500 }, { "epoch": 49.81, "learning_rate": 2.5105348624490554e-05, "loss": 2.0008, "step": 17207000 }, { "epoch": 49.81, "learning_rate": 2.5104624976843276e-05, "loss": 1.9969, "step": 17207500 }, { "epoch": 49.81, "learning_rate": 2.5103901329196e-05, "loss": 2.0141, "step": 17208000 }, { "epoch": 49.81, "learning_rate": 2.5103177681548724e-05, "loss": 2.0204, "step": 17208500 }, { "epoch": 49.81, "learning_rate": 2.5102454033901447e-05, "loss": 2.0074, "step": 17209000 }, { "epoch": 49.81, "learning_rate": 2.510173038625417e-05, "loss": 2.0277, "step": 17209500 }, { "epoch": 49.82, "learning_rate": 2.510100673860689e-05, "loss": 2.0103, "step": 17210000 }, { "epoch": 49.82, "learning_rate": 2.5100283090959613e-05, "loss": 1.989, "step": 17210500 }, { "epoch": 49.82, "learning_rate": 2.5099560890607632e-05, "loss": 1.9838, "step": 17211000 }, { "epoch": 49.82, "learning_rate": 2.5098837242960354e-05, "loss": 1.9962, "step": 17211500 }, { "epoch": 49.82, "learning_rate": 2.5098113595313077e-05, "loss": 2.0084, "step": 17212000 }, { "epoch": 49.82, "learning_rate": 2.5097389947665806e-05, "loss": 2.0097, "step": 17212500 }, { "epoch": 49.82, "learning_rate": 2.5096666300018528e-05, "loss": 2.008, "step": 17213000 }, { "epoch": 49.83, "learning_rate": 2.509594265237125e-05, "loss": 1.9742, "step": 17213500 }, { "epoch": 49.83, "learning_rate": 2.5095219004723976e-05, "loss": 2.0039, "step": 17214000 }, { "epoch": 49.83, "learning_rate": 2.5094495357076698e-05, "loss": 2.0155, "step": 17214500 }, { "epoch": 49.83, "learning_rate": 2.509377170942942e-05, "loss": 1.9983, "step": 17215000 }, { "epoch": 49.83, "learning_rate": 2.509304950907744e-05, "loss": 1.9882, "step": 17215500 }, { "epoch": 49.83, "learning_rate": 2.509232586143016e-05, "loss": 1.9893, "step": 17216000 }, { "epoch": 49.83, "learning_rate": 2.5091602213782884e-05, "loss": 1.9917, "step": 17216500 }, { "epoch": 49.84, "learning_rate": 2.5090878566135606e-05, "loss": 2.0152, "step": 17217000 }, { "epoch": 49.84, "learning_rate": 2.5090156365783625e-05, "loss": 2.0168, "step": 17217500 }, { "epoch": 49.84, "learning_rate": 2.508943416543164e-05, "loss": 2.0151, "step": 17218000 }, { "epoch": 49.84, "learning_rate": 2.5088710517784363e-05, "loss": 2.0269, "step": 17218500 }, { "epoch": 49.84, "learning_rate": 2.508798831743238e-05, "loss": 2.0095, "step": 17219000 }, { "epoch": 49.84, "learning_rate": 2.5087264669785104e-05, "loss": 2.0184, "step": 17219500 }, { "epoch": 49.84, "learning_rate": 2.5086541022137833e-05, "loss": 2.0028, "step": 17220000 }, { "epoch": 49.85, "learning_rate": 2.5085817374490555e-05, "loss": 2.0164, "step": 17220500 }, { "epoch": 49.85, "learning_rate": 2.5085093726843277e-05, "loss": 1.9868, "step": 17221000 }, { "epoch": 49.85, "learning_rate": 2.5084371526491296e-05, "loss": 2.0377, "step": 17221500 }, { "epoch": 49.85, "learning_rate": 2.508364787884402e-05, "loss": 1.9783, "step": 17222000 }, { "epoch": 49.85, "learning_rate": 2.508292423119674e-05, "loss": 2.0146, "step": 17222500 }, { "epoch": 49.85, "learning_rate": 2.5082200583549463e-05, "loss": 2.0298, "step": 17223000 }, { "epoch": 49.85, "learning_rate": 2.508147693590219e-05, "loss": 2.0146, "step": 17223500 }, { "epoch": 49.86, "learning_rate": 2.508075328825491e-05, "loss": 1.9954, "step": 17224000 }, { "epoch": 49.86, "learning_rate": 2.5080029640607633e-05, "loss": 2.0177, "step": 17224500 }, { "epoch": 49.86, "learning_rate": 2.5079305992960355e-05, "loss": 2.0136, "step": 17225000 }, { "epoch": 49.86, "learning_rate": 2.5078582345313077e-05, "loss": 2.0046, "step": 17225500 }, { "epoch": 49.86, "learning_rate": 2.5077858697665803e-05, "loss": 2.0043, "step": 17226000 }, { "epoch": 49.86, "learning_rate": 2.5077135050018525e-05, "loss": 2.0186, "step": 17226500 }, { "epoch": 49.87, "learning_rate": 2.5076411402371248e-05, "loss": 2.006, "step": 17227000 }, { "epoch": 49.87, "learning_rate": 2.5075687754723977e-05, "loss": 1.9879, "step": 17227500 }, { "epoch": 49.87, "learning_rate": 2.50749641070767e-05, "loss": 1.9947, "step": 17228000 }, { "epoch": 49.87, "learning_rate": 2.5074241906724714e-05, "loss": 1.9746, "step": 17228500 }, { "epoch": 49.87, "learning_rate": 2.507351825907744e-05, "loss": 2.0195, "step": 17229000 }, { "epoch": 49.87, "learning_rate": 2.5072794611430162e-05, "loss": 2.023, "step": 17229500 }, { "epoch": 49.87, "learning_rate": 2.5072070963782884e-05, "loss": 2.0226, "step": 17230000 }, { "epoch": 49.88, "learning_rate": 2.5071347316135607e-05, "loss": 2.0182, "step": 17230500 }, { "epoch": 49.88, "learning_rate": 2.507062366848833e-05, "loss": 2.0009, "step": 17231000 }, { "epoch": 49.88, "learning_rate": 2.5069901468136348e-05, "loss": 1.9983, "step": 17231500 }, { "epoch": 49.88, "learning_rate": 2.506917782048907e-05, "loss": 2.0023, "step": 17232000 }, { "epoch": 49.88, "learning_rate": 2.5068454172841792e-05, "loss": 2.0088, "step": 17232500 }, { "epoch": 49.88, "learning_rate": 2.5067730525194515e-05, "loss": 2.0243, "step": 17233000 }, { "epoch": 49.88, "learning_rate": 2.506700687754724e-05, "loss": 2.01, "step": 17233500 }, { "epoch": 49.89, "learning_rate": 2.5066284677195256e-05, "loss": 2.0227, "step": 17234000 }, { "epoch": 49.89, "learning_rate": 2.5065561029547978e-05, "loss": 2.0083, "step": 17234500 }, { "epoch": 49.89, "learning_rate": 2.5064837381900707e-05, "loss": 2.0073, "step": 17235000 }, { "epoch": 49.89, "learning_rate": 2.506411373425343e-05, "loss": 2.0123, "step": 17235500 }, { "epoch": 49.89, "learning_rate": 2.5063390086606155e-05, "loss": 1.985, "step": 17236000 }, { "epoch": 49.89, "learning_rate": 2.5062666438958877e-05, "loss": 2.0157, "step": 17236500 }, { "epoch": 49.89, "learning_rate": 2.50619427913116e-05, "loss": 2.0047, "step": 17237000 }, { "epoch": 49.9, "learning_rate": 2.506121914366432e-05, "loss": 2.0147, "step": 17237500 }, { "epoch": 49.9, "learning_rate": 2.5060495496017044e-05, "loss": 1.9979, "step": 17238000 }, { "epoch": 49.9, "learning_rate": 2.5059771848369766e-05, "loss": 2.0031, "step": 17238500 }, { "epoch": 49.9, "learning_rate": 2.505904820072249e-05, "loss": 1.9938, "step": 17239000 }, { "epoch": 49.9, "learning_rate": 2.5058324553075214e-05, "loss": 1.9909, "step": 17239500 }, { "epoch": 49.9, "learning_rate": 2.505760235272323e-05, "loss": 2.0003, "step": 17240000 }, { "epoch": 49.9, "learning_rate": 2.5056878705075955e-05, "loss": 2.0262, "step": 17240500 }, { "epoch": 49.91, "learning_rate": 2.5056155057428677e-05, "loss": 1.9928, "step": 17241000 }, { "epoch": 49.91, "learning_rate": 2.50554314097814e-05, "loss": 2.0057, "step": 17241500 }, { "epoch": 49.91, "learning_rate": 2.505470776213413e-05, "loss": 2.0238, "step": 17242000 }, { "epoch": 49.91, "learning_rate": 2.505398411448685e-05, "loss": 1.9817, "step": 17242500 }, { "epoch": 49.91, "learning_rate": 2.5053260466839573e-05, "loss": 2.0066, "step": 17243000 }, { "epoch": 49.91, "learning_rate": 2.5052536819192295e-05, "loss": 2.0363, "step": 17243500 }, { "epoch": 49.91, "learning_rate": 2.5051814618840314e-05, "loss": 2.0045, "step": 17244000 }, { "epoch": 49.92, "learning_rate": 2.505109241848833e-05, "loss": 2.0202, "step": 17244500 }, { "epoch": 49.92, "learning_rate": 2.5050368770841055e-05, "loss": 1.9989, "step": 17245000 }, { "epoch": 49.92, "learning_rate": 2.5049645123193778e-05, "loss": 1.9885, "step": 17245500 }, { "epoch": 49.92, "learning_rate": 2.50489214755465e-05, "loss": 2.0192, "step": 17246000 }, { "epoch": 49.92, "learning_rate": 2.5048197827899222e-05, "loss": 1.9959, "step": 17246500 }, { "epoch": 49.92, "learning_rate": 2.5047474180251944e-05, "loss": 1.9762, "step": 17247000 }, { "epoch": 49.92, "learning_rate": 2.5046750532604667e-05, "loss": 2.0121, "step": 17247500 }, { "epoch": 49.93, "learning_rate": 2.5046028332252685e-05, "loss": 1.9769, "step": 17248000 }, { "epoch": 49.93, "learning_rate": 2.5045304684605408e-05, "loss": 2.0101, "step": 17248500 }, { "epoch": 49.93, "learning_rate": 2.504458103695813e-05, "loss": 1.9803, "step": 17249000 }, { "epoch": 49.93, "learning_rate": 2.504385738931086e-05, "loss": 2.022, "step": 17249500 }, { "epoch": 49.93, "learning_rate": 2.504313374166358e-05, "loss": 1.9777, "step": 17250000 }, { "epoch": 49.93, "learning_rate": 2.5042410094016307e-05, "loss": 1.9609, "step": 17250500 }, { "epoch": 49.93, "learning_rate": 2.504168644636903e-05, "loss": 2.0106, "step": 17251000 }, { "epoch": 49.94, "learning_rate": 2.5040964246017045e-05, "loss": 1.9936, "step": 17251500 }, { "epoch": 49.94, "learning_rate": 2.504024059836977e-05, "loss": 1.996, "step": 17252000 }, { "epoch": 49.94, "learning_rate": 2.5039516950722492e-05, "loss": 2.0196, "step": 17252500 }, { "epoch": 49.94, "learning_rate": 2.5038793303075215e-05, "loss": 2.0026, "step": 17253000 }, { "epoch": 49.94, "learning_rate": 2.503807110272323e-05, "loss": 2.0257, "step": 17253500 }, { "epoch": 49.94, "learning_rate": 2.5037347455075956e-05, "loss": 1.9888, "step": 17254000 }, { "epoch": 49.94, "learning_rate": 2.5036623807428678e-05, "loss": 2.017, "step": 17254500 }, { "epoch": 49.95, "learning_rate": 2.50359001597814e-05, "loss": 2.0108, "step": 17255000 }, { "epoch": 49.95, "learning_rate": 2.5035176512134123e-05, "loss": 2.0257, "step": 17255500 }, { "epoch": 49.95, "learning_rate": 2.5034452864486845e-05, "loss": 1.9923, "step": 17256000 }, { "epoch": 49.95, "learning_rate": 2.503372921683957e-05, "loss": 2.015, "step": 17256500 }, { "epoch": 49.95, "learning_rate": 2.5033005569192296e-05, "loss": 1.9953, "step": 17257000 }, { "epoch": 49.95, "learning_rate": 2.5032281921545018e-05, "loss": 1.9949, "step": 17257500 }, { "epoch": 49.95, "learning_rate": 2.5031559721193037e-05, "loss": 1.9776, "step": 17258000 }, { "epoch": 49.96, "learning_rate": 2.503083607354576e-05, "loss": 2.0111, "step": 17258500 }, { "epoch": 49.96, "learning_rate": 2.503011242589848e-05, "loss": 1.9954, "step": 17259000 }, { "epoch": 49.96, "learning_rate": 2.5029388778251207e-05, "loss": 1.9916, "step": 17259500 }, { "epoch": 49.96, "learning_rate": 2.502866802519452e-05, "loss": 2.0306, "step": 17260000 }, { "epoch": 49.96, "learning_rate": 2.5027944377547242e-05, "loss": 2.0086, "step": 17260500 }, { "epoch": 49.96, "learning_rate": 2.5027220729899964e-05, "loss": 2.0263, "step": 17261000 }, { "epoch": 49.96, "learning_rate": 2.5026497082252686e-05, "loss": 2.004, "step": 17261500 }, { "epoch": 49.97, "learning_rate": 2.502577343460541e-05, "loss": 2.0082, "step": 17262000 }, { "epoch": 49.97, "learning_rate": 2.502504978695813e-05, "loss": 2.0068, "step": 17262500 }, { "epoch": 49.97, "learning_rate": 2.5024326139310856e-05, "loss": 2.0072, "step": 17263000 }, { "epoch": 49.97, "learning_rate": 2.502360538625417e-05, "loss": 2.0177, "step": 17263500 }, { "epoch": 49.97, "learning_rate": 2.502288173860689e-05, "loss": 1.9981, "step": 17264000 }, { "epoch": 49.97, "learning_rate": 2.5022159538254906e-05, "loss": 2.0084, "step": 17264500 }, { "epoch": 49.98, "learning_rate": 2.5021435890607635e-05, "loss": 1.9905, "step": 17265000 }, { "epoch": 49.98, "learning_rate": 2.5020712242960358e-05, "loss": 1.9823, "step": 17265500 }, { "epoch": 49.98, "learning_rate": 2.5019988595313083e-05, "loss": 1.9878, "step": 17266000 }, { "epoch": 49.98, "learning_rate": 2.5019264947665805e-05, "loss": 2.0191, "step": 17266500 }, { "epoch": 49.98, "learning_rate": 2.5018541300018528e-05, "loss": 2.0064, "step": 17267000 }, { "epoch": 49.98, "learning_rate": 2.501781765237125e-05, "loss": 2.0192, "step": 17267500 }, { "epoch": 49.98, "learning_rate": 2.5017094004723972e-05, "loss": 1.9878, "step": 17268000 }, { "epoch": 49.99, "learning_rate": 2.5016370357076694e-05, "loss": 1.999, "step": 17268500 }, { "epoch": 49.99, "learning_rate": 2.501564670942942e-05, "loss": 1.9902, "step": 17269000 }, { "epoch": 49.99, "learning_rate": 2.5014923061782142e-05, "loss": 2.0018, "step": 17269500 }, { "epoch": 49.99, "learning_rate": 2.5014200861430158e-05, "loss": 2.0427, "step": 17270000 }, { "epoch": 49.99, "learning_rate": 2.5013477213782883e-05, "loss": 2.0052, "step": 17270500 }, { "epoch": 49.99, "learning_rate": 2.5012753566135606e-05, "loss": 2.0067, "step": 17271000 }, { "epoch": 49.99, "learning_rate": 2.5012029918488328e-05, "loss": 2.0015, "step": 17271500 }, { "epoch": 50.0, "learning_rate": 2.5011306270841057e-05, "loss": 2.0068, "step": 17272000 }, { "epoch": 50.0, "learning_rate": 2.501058262319378e-05, "loss": 1.9937, "step": 17272500 }, { "epoch": 50.0, "learning_rate": 2.50098589755465e-05, "loss": 2.0069, "step": 17273000 }, { "epoch": 50.0, "learning_rate": 2.5009135327899224e-05, "loss": 1.9959, "step": 17273500 }, { "epoch": 50.0, "eval_accuracy": 0.6738095040996602, "eval_accuracy_mlm": 0.6396376624402491, "eval_accuracy_nsp": 0.8572044016077509, "eval_loss": 2.1667933464050293, "eval_runtime": 331.9451, "eval_samples_per_second": 1314.633, "eval_steps_per_second": 54.777, "step": 17273600 }, { "epoch": 50.0, "learning_rate": 2.5008411680251946e-05, "loss": 1.9799, "step": 17274000 }, { "epoch": 50.0, "learning_rate": 2.500768803260467e-05, "loss": 1.9725, "step": 17274500 }, { "epoch": 50.0, "learning_rate": 2.5006964384957394e-05, "loss": 1.9746, "step": 17275000 }, { "epoch": 50.01, "learning_rate": 2.5006240737310116e-05, "loss": 2.0099, "step": 17275500 }, { "epoch": 50.01, "learning_rate": 2.5005517089662838e-05, "loss": 1.9809, "step": 17276000 }, { "epoch": 50.01, "learning_rate": 2.500479344201556e-05, "loss": 1.9807, "step": 17276500 }, { "epoch": 50.01, "learning_rate": 2.5004069794368286e-05, "loss": 1.9837, "step": 17277000 }, { "epoch": 50.01, "learning_rate": 2.5003346146721008e-05, "loss": 1.9915, "step": 17277500 }, { "epoch": 50.01, "learning_rate": 2.500262249907373e-05, "loss": 2.0015, "step": 17278000 }, { "epoch": 50.01, "learning_rate": 2.5001898851426453e-05, "loss": 1.9679, "step": 17278500 }, { "epoch": 50.02, "learning_rate": 2.500117665107447e-05, "loss": 1.9543, "step": 17279000 }, { "epoch": 50.02, "learning_rate": 2.5000453003427197e-05, "loss": 1.973, "step": 17279500 }, { "epoch": 50.02, "learning_rate": 2.499972935577992e-05, "loss": 2.0089, "step": 17280000 }, { "epoch": 50.02, "learning_rate": 2.4999005708132642e-05, "loss": 1.9815, "step": 17280500 }, { "epoch": 50.02, "learning_rate": 2.499828350778066e-05, "loss": 1.9694, "step": 17281000 }, { "epoch": 50.02, "learning_rate": 2.4997559860133386e-05, "loss": 2.0003, "step": 17281500 }, { "epoch": 50.02, "learning_rate": 2.499683621248611e-05, "loss": 1.9927, "step": 17282000 }, { "epoch": 50.03, "learning_rate": 2.499611256483883e-05, "loss": 1.9785, "step": 17282500 }, { "epoch": 50.03, "learning_rate": 2.4995388917191553e-05, "loss": 2.0075, "step": 17283000 }, { "epoch": 50.03, "learning_rate": 2.4994665269544275e-05, "loss": 1.9742, "step": 17283500 }, { "epoch": 50.03, "learning_rate": 2.4993943069192294e-05, "loss": 1.9941, "step": 17284000 }, { "epoch": 50.03, "learning_rate": 2.4993219421545016e-05, "loss": 1.9981, "step": 17284500 }, { "epoch": 50.03, "learning_rate": 2.4992495773897742e-05, "loss": 1.9847, "step": 17285000 }, { "epoch": 50.03, "learning_rate": 2.4991772126250464e-05, "loss": 1.9996, "step": 17285500 }, { "epoch": 50.04, "learning_rate": 2.4991048478603187e-05, "loss": 1.9901, "step": 17286000 }, { "epoch": 50.04, "learning_rate": 2.4990324830955912e-05, "loss": 1.9742, "step": 17286500 }, { "epoch": 50.04, "learning_rate": 2.4989601183308634e-05, "loss": 1.9722, "step": 17287000 }, { "epoch": 50.04, "learning_rate": 2.4988877535661357e-05, "loss": 1.9862, "step": 17287500 }, { "epoch": 50.04, "learning_rate": 2.498815388801408e-05, "loss": 1.9744, "step": 17288000 }, { "epoch": 50.04, "learning_rate": 2.498743313495739e-05, "loss": 1.9983, "step": 17288500 }, { "epoch": 50.04, "learning_rate": 2.4986709487310117e-05, "loss": 1.9922, "step": 17289000 }, { "epoch": 50.05, "learning_rate": 2.4985987286958136e-05, "loss": 2.0188, "step": 17289500 }, { "epoch": 50.05, "learning_rate": 2.4985263639310858e-05, "loss": 2.0132, "step": 17290000 }, { "epoch": 50.05, "learning_rate": 2.498453999166358e-05, "loss": 1.965, "step": 17290500 }, { "epoch": 50.05, "learning_rate": 2.4983816344016302e-05, "loss": 1.9674, "step": 17291000 }, { "epoch": 50.05, "learning_rate": 2.4983092696369025e-05, "loss": 1.9928, "step": 17291500 }, { "epoch": 50.05, "learning_rate": 2.498236904872175e-05, "loss": 1.9651, "step": 17292000 }, { "epoch": 50.05, "learning_rate": 2.4981645401074476e-05, "loss": 2.0029, "step": 17292500 }, { "epoch": 50.06, "learning_rate": 2.4980921753427198e-05, "loss": 1.9575, "step": 17293000 }, { "epoch": 50.06, "learning_rate": 2.4980199553075214e-05, "loss": 1.989, "step": 17293500 }, { "epoch": 50.06, "learning_rate": 2.4979475905427936e-05, "loss": 1.9848, "step": 17294000 }, { "epoch": 50.06, "learning_rate": 2.4978753705075955e-05, "loss": 1.9803, "step": 17294500 }, { "epoch": 50.06, "learning_rate": 2.4978030057428677e-05, "loss": 1.988, "step": 17295000 }, { "epoch": 50.06, "learning_rate": 2.49773064097814e-05, "loss": 1.9715, "step": 17295500 }, { "epoch": 50.06, "learning_rate": 2.4976582762134125e-05, "loss": 2.0055, "step": 17296000 }, { "epoch": 50.07, "learning_rate": 2.497585911448685e-05, "loss": 2.0013, "step": 17296500 }, { "epoch": 50.07, "learning_rate": 2.4975135466839573e-05, "loss": 1.9806, "step": 17297000 }, { "epoch": 50.07, "learning_rate": 2.4974411819192295e-05, "loss": 2.0164, "step": 17297500 }, { "epoch": 50.07, "learning_rate": 2.4973688171545017e-05, "loss": 1.9793, "step": 17298000 }, { "epoch": 50.07, "learning_rate": 2.497296452389774e-05, "loss": 1.9851, "step": 17298500 }, { "epoch": 50.07, "learning_rate": 2.4972240876250462e-05, "loss": 1.9913, "step": 17299000 }, { "epoch": 50.07, "learning_rate": 2.4971517228603187e-05, "loss": 1.9892, "step": 17299500 }, { "epoch": 50.08, "learning_rate": 2.4970793580955913e-05, "loss": 1.9841, "step": 17300000 }, { "epoch": 50.08, "learning_rate": 2.4970069933308635e-05, "loss": 1.9887, "step": 17300500 }, { "epoch": 50.08, "learning_rate": 2.4969346285661357e-05, "loss": 1.961, "step": 17301000 }, { "epoch": 50.08, "learning_rate": 2.496862263801408e-05, "loss": 1.9903, "step": 17301500 }, { "epoch": 50.08, "learning_rate": 2.4967898990366802e-05, "loss": 1.9844, "step": 17302000 }, { "epoch": 50.08, "learning_rate": 2.4967175342719528e-05, "loss": 1.9927, "step": 17302500 }, { "epoch": 50.09, "learning_rate": 2.496645169507225e-05, "loss": 2.0015, "step": 17303000 }, { "epoch": 50.09, "learning_rate": 2.496572949472027e-05, "loss": 1.9786, "step": 17303500 }, { "epoch": 50.09, "learning_rate": 2.496500584707299e-05, "loss": 1.9586, "step": 17304000 }, { "epoch": 50.09, "learning_rate": 2.4964282199425713e-05, "loss": 1.9639, "step": 17304500 }, { "epoch": 50.09, "learning_rate": 2.496355855177844e-05, "loss": 1.9983, "step": 17305000 }, { "epoch": 50.09, "learning_rate": 2.4962836351426454e-05, "loss": 1.9841, "step": 17305500 }, { "epoch": 50.09, "learning_rate": 2.4962112703779177e-05, "loss": 2.013, "step": 17306000 }, { "epoch": 50.1, "learning_rate": 2.4961389056131902e-05, "loss": 1.9718, "step": 17306500 }, { "epoch": 50.1, "learning_rate": 2.4960666855779918e-05, "loss": 1.9945, "step": 17307000 }, { "epoch": 50.1, "learning_rate": 2.4959943208132643e-05, "loss": 2.0052, "step": 17307500 }, { "epoch": 50.1, "learning_rate": 2.4959221007780662e-05, "loss": 1.9896, "step": 17308000 }, { "epoch": 50.1, "learning_rate": 2.4958497360133385e-05, "loss": 2.0044, "step": 17308500 }, { "epoch": 50.1, "learning_rate": 2.4957773712486107e-05, "loss": 1.9897, "step": 17309000 }, { "epoch": 50.1, "learning_rate": 2.4957051512134126e-05, "loss": 1.9821, "step": 17309500 }, { "epoch": 50.11, "learning_rate": 2.4956327864486848e-05, "loss": 1.996, "step": 17310000 }, { "epoch": 50.11, "learning_rate": 2.495560421683957e-05, "loss": 1.9854, "step": 17310500 }, { "epoch": 50.11, "learning_rate": 2.4954880569192296e-05, "loss": 2.0065, "step": 17311000 }, { "epoch": 50.11, "learning_rate": 2.4954156921545018e-05, "loss": 2.0063, "step": 17311500 }, { "epoch": 50.11, "learning_rate": 2.495343327389774e-05, "loss": 1.97, "step": 17312000 }, { "epoch": 50.11, "learning_rate": 2.4952709626250466e-05, "loss": 1.9786, "step": 17312500 }, { "epoch": 50.11, "learning_rate": 2.4951985978603188e-05, "loss": 2.0034, "step": 17313000 }, { "epoch": 50.12, "learning_rate": 2.4951263778251204e-05, "loss": 1.9928, "step": 17313500 }, { "epoch": 50.12, "learning_rate": 2.495054013060393e-05, "loss": 1.994, "step": 17314000 }, { "epoch": 50.12, "learning_rate": 2.494981648295665e-05, "loss": 1.9931, "step": 17314500 }, { "epoch": 50.12, "learning_rate": 2.4949092835309377e-05, "loss": 1.9751, "step": 17315000 }, { "epoch": 50.12, "learning_rate": 2.49483691876621e-05, "loss": 1.9952, "step": 17315500 }, { "epoch": 50.12, "learning_rate": 2.4947646987310115e-05, "loss": 2.0058, "step": 17316000 }, { "epoch": 50.12, "learning_rate": 2.494692333966284e-05, "loss": 1.9842, "step": 17316500 }, { "epoch": 50.13, "learning_rate": 2.4946199692015563e-05, "loss": 2.0002, "step": 17317000 }, { "epoch": 50.13, "learning_rate": 2.4945476044368285e-05, "loss": 1.9815, "step": 17317500 }, { "epoch": 50.13, "learning_rate": 2.4944752396721007e-05, "loss": 1.9977, "step": 17318000 }, { "epoch": 50.13, "learning_rate": 2.4944028749073733e-05, "loss": 1.9983, "step": 17318500 }, { "epoch": 50.13, "learning_rate": 2.4943305101426455e-05, "loss": 1.9859, "step": 17319000 }, { "epoch": 50.13, "learning_rate": 2.494258145377918e-05, "loss": 1.9796, "step": 17319500 }, { "epoch": 50.13, "learning_rate": 2.4941857806131903e-05, "loss": 1.9982, "step": 17320000 }, { "epoch": 50.14, "learning_rate": 2.494113560577992e-05, "loss": 1.9927, "step": 17320500 }, { "epoch": 50.14, "learning_rate": 2.494041195813264e-05, "loss": 1.9698, "step": 17321000 }, { "epoch": 50.14, "learning_rate": 2.493968975778066e-05, "loss": 1.979, "step": 17321500 }, { "epoch": 50.14, "learning_rate": 2.4938966110133382e-05, "loss": 1.9691, "step": 17322000 }, { "epoch": 50.14, "learning_rate": 2.4938242462486108e-05, "loss": 1.9898, "step": 17322500 }, { "epoch": 50.14, "learning_rate": 2.493751881483883e-05, "loss": 1.9711, "step": 17323000 }, { "epoch": 50.14, "learning_rate": 2.493679661448685e-05, "loss": 1.993, "step": 17323500 }, { "epoch": 50.15, "learning_rate": 2.493607296683957e-05, "loss": 1.9847, "step": 17324000 }, { "epoch": 50.15, "learning_rate": 2.4935349319192293e-05, "loss": 1.9873, "step": 17324500 }, { "epoch": 50.15, "learning_rate": 2.493462567154502e-05, "loss": 1.998, "step": 17325000 }, { "epoch": 50.15, "learning_rate": 2.493390202389774e-05, "loss": 1.9979, "step": 17325500 }, { "epoch": 50.15, "learning_rate": 2.4933178376250467e-05, "loss": 2.009, "step": 17326000 }, { "epoch": 50.15, "learning_rate": 2.493245472860319e-05, "loss": 2.0006, "step": 17326500 }, { "epoch": 50.15, "learning_rate": 2.493173108095591e-05, "loss": 1.9862, "step": 17327000 }, { "epoch": 50.16, "learning_rate": 2.4931007433308633e-05, "loss": 1.9925, "step": 17327500 }, { "epoch": 50.16, "learning_rate": 2.4930283785661356e-05, "loss": 2.0133, "step": 17328000 }, { "epoch": 50.16, "learning_rate": 2.492956013801408e-05, "loss": 1.9986, "step": 17328500 }, { "epoch": 50.16, "learning_rate": 2.4928836490366803e-05, "loss": 1.9809, "step": 17329000 }, { "epoch": 50.16, "learning_rate": 2.492811284271953e-05, "loss": 1.9832, "step": 17329500 }, { "epoch": 50.16, "learning_rate": 2.492738919507225e-05, "loss": 1.9843, "step": 17330000 }, { "epoch": 50.16, "learning_rate": 2.4926665547424974e-05, "loss": 1.9872, "step": 17330500 }, { "epoch": 50.17, "learning_rate": 2.4925943347072993e-05, "loss": 1.9958, "step": 17331000 }, { "epoch": 50.17, "learning_rate": 2.4925219699425715e-05, "loss": 1.9842, "step": 17331500 }, { "epoch": 50.17, "learning_rate": 2.4924496051778437e-05, "loss": 1.9749, "step": 17332000 }, { "epoch": 50.17, "learning_rate": 2.492377240413116e-05, "loss": 2.0018, "step": 17332500 }, { "epoch": 50.17, "learning_rate": 2.4923048756483885e-05, "loss": 2.0097, "step": 17333000 }, { "epoch": 50.17, "learning_rate": 2.4922325108836607e-05, "loss": 1.99, "step": 17333500 }, { "epoch": 50.17, "learning_rate": 2.4921601461189333e-05, "loss": 2.0028, "step": 17334000 }, { "epoch": 50.18, "learning_rate": 2.4920879260837348e-05, "loss": 1.9941, "step": 17334500 }, { "epoch": 50.18, "learning_rate": 2.492015561319007e-05, "loss": 1.9876, "step": 17335000 }, { "epoch": 50.18, "learning_rate": 2.4919431965542793e-05, "loss": 1.9999, "step": 17335500 }, { "epoch": 50.18, "learning_rate": 2.491870831789552e-05, "loss": 2.0008, "step": 17336000 }, { "epoch": 50.18, "learning_rate": 2.491798467024824e-05, "loss": 1.9921, "step": 17336500 }, { "epoch": 50.18, "learning_rate": 2.4917261022600966e-05, "loss": 2.0039, "step": 17337000 }, { "epoch": 50.18, "learning_rate": 2.4916538822248982e-05, "loss": 2.0004, "step": 17337500 }, { "epoch": 50.19, "learning_rate": 2.4915815174601707e-05, "loss": 1.997, "step": 17338000 }, { "epoch": 50.19, "learning_rate": 2.491509152695443e-05, "loss": 1.9888, "step": 17338500 }, { "epoch": 50.19, "learning_rate": 2.4914367879307152e-05, "loss": 1.9952, "step": 17339000 }, { "epoch": 50.19, "learning_rate": 2.4913644231659874e-05, "loss": 1.9574, "step": 17339500 }, { "epoch": 50.19, "learning_rate": 2.4912922031307893e-05, "loss": 1.9946, "step": 17340000 }, { "epoch": 50.19, "learning_rate": 2.4912198383660615e-05, "loss": 2.0033, "step": 17340500 }, { "epoch": 50.2, "learning_rate": 2.491147473601334e-05, "loss": 1.9669, "step": 17341000 }, { "epoch": 50.2, "learning_rate": 2.4910751088366063e-05, "loss": 1.9864, "step": 17341500 }, { "epoch": 50.2, "learning_rate": 2.4910027440718785e-05, "loss": 1.9639, "step": 17342000 }, { "epoch": 50.2, "learning_rate": 2.4909305240366804e-05, "loss": 1.9888, "step": 17342500 }, { "epoch": 50.2, "learning_rate": 2.4908581592719527e-05, "loss": 1.9918, "step": 17343000 }, { "epoch": 50.2, "learning_rate": 2.490785794507225e-05, "loss": 1.9835, "step": 17343500 }, { "epoch": 50.2, "learning_rate": 2.490713429742497e-05, "loss": 1.9576, "step": 17344000 }, { "epoch": 50.21, "learning_rate": 2.4906410649777697e-05, "loss": 1.9886, "step": 17344500 }, { "epoch": 50.21, "learning_rate": 2.4905687002130422e-05, "loss": 1.9862, "step": 17345000 }, { "epoch": 50.21, "learning_rate": 2.4904963354483144e-05, "loss": 1.9952, "step": 17345500 }, { "epoch": 50.21, "learning_rate": 2.4904239706835867e-05, "loss": 1.9939, "step": 17346000 }, { "epoch": 50.21, "learning_rate": 2.490351605918859e-05, "loss": 2.0038, "step": 17346500 }, { "epoch": 50.21, "learning_rate": 2.490279241154131e-05, "loss": 1.9934, "step": 17347000 }, { "epoch": 50.21, "learning_rate": 2.4902068763894033e-05, "loss": 1.992, "step": 17347500 }, { "epoch": 50.22, "learning_rate": 2.490134511624676e-05, "loss": 1.9906, "step": 17348000 }, { "epoch": 50.22, "learning_rate": 2.4900621468599485e-05, "loss": 2.0035, "step": 17348500 }, { "epoch": 50.22, "learning_rate": 2.4899897820952207e-05, "loss": 1.9983, "step": 17349000 }, { "epoch": 50.22, "learning_rate": 2.489917417330493e-05, "loss": 2.007, "step": 17349500 }, { "epoch": 50.22, "learning_rate": 2.489845052565765e-05, "loss": 1.9917, "step": 17350000 }, { "epoch": 50.22, "learning_rate": 2.4897729772600964e-05, "loss": 1.9712, "step": 17350500 }, { "epoch": 50.22, "learning_rate": 2.4897007572248983e-05, "loss": 1.9944, "step": 17351000 }, { "epoch": 50.23, "learning_rate": 2.4896283924601705e-05, "loss": 1.9806, "step": 17351500 }, { "epoch": 50.23, "learning_rate": 2.489556027695443e-05, "loss": 2.0172, "step": 17352000 }, { "epoch": 50.23, "learning_rate": 2.4894836629307153e-05, "loss": 2.0283, "step": 17352500 }, { "epoch": 50.23, "learning_rate": 2.489411442895517e-05, "loss": 2.0039, "step": 17353000 }, { "epoch": 50.23, "learning_rate": 2.4893390781307894e-05, "loss": 1.9857, "step": 17353500 }, { "epoch": 50.23, "learning_rate": 2.4892667133660616e-05, "loss": 1.9803, "step": 17354000 }, { "epoch": 50.23, "learning_rate": 2.4891943486013338e-05, "loss": 2.0028, "step": 17354500 }, { "epoch": 50.24, "learning_rate": 2.489121983836606e-05, "loss": 2.0014, "step": 17355000 }, { "epoch": 50.24, "learning_rate": 2.4890496190718786e-05, "loss": 1.9846, "step": 17355500 }, { "epoch": 50.24, "learning_rate": 2.4889772543071512e-05, "loss": 1.993, "step": 17356000 }, { "epoch": 50.24, "learning_rate": 2.4889048895424234e-05, "loss": 1.9771, "step": 17356500 }, { "epoch": 50.24, "learning_rate": 2.4888325247776956e-05, "loss": 1.9963, "step": 17357000 }, { "epoch": 50.24, "learning_rate": 2.488760160012968e-05, "loss": 1.9877, "step": 17357500 }, { "epoch": 50.24, "learning_rate": 2.48868779524824e-05, "loss": 1.9948, "step": 17358000 }, { "epoch": 50.25, "learning_rate": 2.4886154304835123e-05, "loss": 1.9968, "step": 17358500 }, { "epoch": 50.25, "learning_rate": 2.488543065718785e-05, "loss": 1.9894, "step": 17359000 }, { "epoch": 50.25, "learning_rate": 2.4884707009540574e-05, "loss": 2.0012, "step": 17359500 }, { "epoch": 50.25, "learning_rate": 2.4883983361893296e-05, "loss": 2.0122, "step": 17360000 }, { "epoch": 50.25, "learning_rate": 2.4883261161541312e-05, "loss": 1.9951, "step": 17360500 }, { "epoch": 50.25, "learning_rate": 2.4882537513894034e-05, "loss": 1.9909, "step": 17361000 }, { "epoch": 50.25, "learning_rate": 2.488181386624676e-05, "loss": 1.972, "step": 17361500 }, { "epoch": 50.26, "learning_rate": 2.4881090218599482e-05, "loss": 1.9815, "step": 17362000 }, { "epoch": 50.26, "learning_rate": 2.4880368018247498e-05, "loss": 1.9966, "step": 17362500 }, { "epoch": 50.26, "learning_rate": 2.4879644370600223e-05, "loss": 2.0009, "step": 17363000 }, { "epoch": 50.26, "learning_rate": 2.487892072295295e-05, "loss": 1.9903, "step": 17363500 }, { "epoch": 50.26, "learning_rate": 2.487819707530567e-05, "loss": 1.9868, "step": 17364000 }, { "epoch": 50.26, "learning_rate": 2.4877474874953687e-05, "loss": 1.9914, "step": 17364500 }, { "epoch": 50.26, "learning_rate": 2.4876751227306412e-05, "loss": 1.9963, "step": 17365000 }, { "epoch": 50.27, "learning_rate": 2.4876027579659134e-05, "loss": 1.9841, "step": 17365500 }, { "epoch": 50.27, "learning_rate": 2.487530537930715e-05, "loss": 1.9952, "step": 17366000 }, { "epoch": 50.27, "learning_rate": 2.4874581731659872e-05, "loss": 2.0083, "step": 17366500 }, { "epoch": 50.27, "learning_rate": 2.4873858084012598e-05, "loss": 1.9895, "step": 17367000 }, { "epoch": 50.27, "learning_rate": 2.4873134436365324e-05, "loss": 2.0331, "step": 17367500 }, { "epoch": 50.27, "learning_rate": 2.4872410788718046e-05, "loss": 1.9786, "step": 17368000 }, { "epoch": 50.27, "learning_rate": 2.4871690035661358e-05, "loss": 1.9931, "step": 17368500 }, { "epoch": 50.28, "learning_rate": 2.487096638801408e-05, "loss": 1.9892, "step": 17369000 }, { "epoch": 50.28, "learning_rate": 2.4870242740366802e-05, "loss": 1.9965, "step": 17369500 }, { "epoch": 50.28, "learning_rate": 2.4869519092719525e-05, "loss": 1.9833, "step": 17370000 }, { "epoch": 50.28, "learning_rate": 2.486879544507225e-05, "loss": 1.9753, "step": 17370500 }, { "epoch": 50.28, "learning_rate": 2.4868071797424976e-05, "loss": 1.9702, "step": 17371000 }, { "epoch": 50.28, "learning_rate": 2.486734959707299e-05, "loss": 2.0033, "step": 17371500 }, { "epoch": 50.28, "learning_rate": 2.4866625949425714e-05, "loss": 1.9789, "step": 17372000 }, { "epoch": 50.29, "learning_rate": 2.4865902301778436e-05, "loss": 1.9754, "step": 17372500 }, { "epoch": 50.29, "learning_rate": 2.486517865413116e-05, "loss": 1.9817, "step": 17373000 }, { "epoch": 50.29, "learning_rate": 2.4864455006483884e-05, "loss": 1.99, "step": 17373500 }, { "epoch": 50.29, "learning_rate": 2.4863734253427196e-05, "loss": 1.9934, "step": 17374000 }, { "epoch": 50.29, "learning_rate": 2.486301060577992e-05, "loss": 1.9881, "step": 17374500 }, { "epoch": 50.29, "learning_rate": 2.4862286958132644e-05, "loss": 1.9784, "step": 17375000 }, { "epoch": 50.29, "learning_rate": 2.4861563310485366e-05, "loss": 2.0021, "step": 17375500 }, { "epoch": 50.3, "learning_rate": 2.486083966283809e-05, "loss": 1.9886, "step": 17376000 }, { "epoch": 50.3, "learning_rate": 2.4860116015190814e-05, "loss": 1.9914, "step": 17376500 }, { "epoch": 50.3, "learning_rate": 2.4859392367543536e-05, "loss": 1.972, "step": 17377000 }, { "epoch": 50.3, "learning_rate": 2.485866871989626e-05, "loss": 1.9976, "step": 17377500 }, { "epoch": 50.3, "learning_rate": 2.485794507224898e-05, "loss": 1.9722, "step": 17378000 }, { "epoch": 50.3, "learning_rate": 2.4857221424601706e-05, "loss": 1.9912, "step": 17378500 }, { "epoch": 50.31, "learning_rate": 2.485649777695443e-05, "loss": 1.9858, "step": 17379000 }, { "epoch": 50.31, "learning_rate": 2.485577412930715e-05, "loss": 1.9874, "step": 17379500 }, { "epoch": 50.31, "learning_rate": 2.4855050481659876e-05, "loss": 2.0062, "step": 17380000 }, { "epoch": 50.31, "learning_rate": 2.48543268340126e-05, "loss": 1.9686, "step": 17380500 }, { "epoch": 50.31, "learning_rate": 2.4853604633660614e-05, "loss": 1.9991, "step": 17381000 }, { "epoch": 50.31, "learning_rate": 2.485288098601334e-05, "loss": 2.0058, "step": 17381500 }, { "epoch": 50.31, "learning_rate": 2.485215878566136e-05, "loss": 1.974, "step": 17382000 }, { "epoch": 50.32, "learning_rate": 2.485143513801408e-05, "loss": 1.9929, "step": 17382500 }, { "epoch": 50.32, "learning_rate": 2.4850711490366803e-05, "loss": 1.9743, "step": 17383000 }, { "epoch": 50.32, "learning_rate": 2.4849987842719526e-05, "loss": 1.981, "step": 17383500 }, { "epoch": 50.32, "learning_rate": 2.484926419507225e-05, "loss": 1.9751, "step": 17384000 }, { "epoch": 50.32, "learning_rate": 2.4848540547424973e-05, "loss": 1.9955, "step": 17384500 }, { "epoch": 50.32, "learning_rate": 2.4847816899777696e-05, "loss": 1.9925, "step": 17385000 }, { "epoch": 50.32, "learning_rate": 2.484709325213042e-05, "loss": 2.0191, "step": 17385500 }, { "epoch": 50.33, "learning_rate": 2.4846369604483143e-05, "loss": 1.9942, "step": 17386000 }, { "epoch": 50.33, "learning_rate": 2.4845647404131162e-05, "loss": 1.9974, "step": 17386500 }, { "epoch": 50.33, "learning_rate": 2.4844923756483885e-05, "loss": 2.0031, "step": 17387000 }, { "epoch": 50.33, "learning_rate": 2.4844201556131904e-05, "loss": 1.9792, "step": 17387500 }, { "epoch": 50.33, "learning_rate": 2.4843477908484626e-05, "loss": 2.0051, "step": 17388000 }, { "epoch": 50.33, "learning_rate": 2.4842754260837348e-05, "loss": 1.9902, "step": 17388500 }, { "epoch": 50.33, "learning_rate": 2.484203061319007e-05, "loss": 2.0088, "step": 17389000 }, { "epoch": 50.34, "learning_rate": 2.4841306965542796e-05, "loss": 1.9952, "step": 17389500 }, { "epoch": 50.34, "learning_rate": 2.4840583317895518e-05, "loss": 1.9923, "step": 17390000 }, { "epoch": 50.34, "learning_rate": 2.483985967024824e-05, "loss": 1.9908, "step": 17390500 }, { "epoch": 50.34, "learning_rate": 2.4839136022600966e-05, "loss": 1.9692, "step": 17391000 }, { "epoch": 50.34, "learning_rate": 2.4838412374953688e-05, "loss": 1.9869, "step": 17391500 }, { "epoch": 50.34, "learning_rate": 2.483768872730641e-05, "loss": 2.0331, "step": 17392000 }, { "epoch": 50.34, "learning_rate": 2.4836965079659133e-05, "loss": 1.9672, "step": 17392500 }, { "epoch": 50.35, "learning_rate": 2.483624143201186e-05, "loss": 1.9638, "step": 17393000 }, { "epoch": 50.35, "learning_rate": 2.4835519231659877e-05, "loss": 1.991, "step": 17393500 }, { "epoch": 50.35, "learning_rate": 2.48347955840126e-05, "loss": 2.0156, "step": 17394000 }, { "epoch": 50.35, "learning_rate": 2.4834071936365322e-05, "loss": 1.9986, "step": 17394500 }, { "epoch": 50.35, "learning_rate": 2.4833348288718044e-05, "loss": 2.0033, "step": 17395000 }, { "epoch": 50.35, "learning_rate": 2.4832624641070766e-05, "loss": 2.0009, "step": 17395500 }, { "epoch": 50.35, "learning_rate": 2.4831900993423492e-05, "loss": 2.0046, "step": 17396000 }, { "epoch": 50.36, "learning_rate": 2.4831177345776217e-05, "loss": 2.0006, "step": 17396500 }, { "epoch": 50.36, "learning_rate": 2.483045369812894e-05, "loss": 1.9968, "step": 17397000 }, { "epoch": 50.36, "learning_rate": 2.4829731497776955e-05, "loss": 1.9734, "step": 17397500 }, { "epoch": 50.36, "learning_rate": 2.4829007850129677e-05, "loss": 1.9864, "step": 17398000 }, { "epoch": 50.36, "learning_rate": 2.4828284202482403e-05, "loss": 2.0261, "step": 17398500 }, { "epoch": 50.36, "learning_rate": 2.4827560554835125e-05, "loss": 2.0025, "step": 17399000 }, { "epoch": 50.36, "learning_rate": 2.482683835448314e-05, "loss": 1.9791, "step": 17399500 }, { "epoch": 50.37, "learning_rate": 2.4826114706835866e-05, "loss": 2.0009, "step": 17400000 }, { "epoch": 50.37, "learning_rate": 2.4825391059188592e-05, "loss": 2.0112, "step": 17400500 }, { "epoch": 50.37, "learning_rate": 2.4824667411541314e-05, "loss": 1.9896, "step": 17401000 }, { "epoch": 50.37, "learning_rate": 2.4823943763894037e-05, "loss": 2.0145, "step": 17401500 }, { "epoch": 50.37, "learning_rate": 2.4823221563542056e-05, "loss": 2.0149, "step": 17402000 }, { "epoch": 50.37, "learning_rate": 2.4822500810485368e-05, "loss": 2.0097, "step": 17402500 }, { "epoch": 50.37, "learning_rate": 2.482177716283809e-05, "loss": 1.9965, "step": 17403000 }, { "epoch": 50.38, "learning_rate": 2.4821053515190812e-05, "loss": 1.9762, "step": 17403500 }, { "epoch": 50.38, "learning_rate": 2.4820329867543534e-05, "loss": 1.9651, "step": 17404000 }, { "epoch": 50.38, "learning_rate": 2.481960621989626e-05, "loss": 1.9967, "step": 17404500 }, { "epoch": 50.38, "learning_rate": 2.4818882572248982e-05, "loss": 1.9935, "step": 17405000 }, { "epoch": 50.38, "learning_rate": 2.4818158924601705e-05, "loss": 2.0024, "step": 17405500 }, { "epoch": 50.38, "learning_rate": 2.481743527695443e-05, "loss": 1.9951, "step": 17406000 }, { "epoch": 50.38, "learning_rate": 2.4816711629307152e-05, "loss": 2.0151, "step": 17406500 }, { "epoch": 50.39, "learning_rate": 2.4815989428955168e-05, "loss": 1.9846, "step": 17407000 }, { "epoch": 50.39, "learning_rate": 2.4815265781307894e-05, "loss": 1.988, "step": 17407500 }, { "epoch": 50.39, "learning_rate": 2.4814542133660616e-05, "loss": 2.0073, "step": 17408000 }, { "epoch": 50.39, "learning_rate": 2.481381848601334e-05, "loss": 1.9632, "step": 17408500 }, { "epoch": 50.39, "learning_rate": 2.4813094838366064e-05, "loss": 1.9713, "step": 17409000 }, { "epoch": 50.39, "learning_rate": 2.4812371190718786e-05, "loss": 1.9914, "step": 17409500 }, { "epoch": 50.39, "learning_rate": 2.4811648990366805e-05, "loss": 1.9628, "step": 17410000 }, { "epoch": 50.4, "learning_rate": 2.4810925342719527e-05, "loss": 2.0066, "step": 17410500 }, { "epoch": 50.4, "learning_rate": 2.481020169507225e-05, "loss": 1.9854, "step": 17411000 }, { "epoch": 50.4, "learning_rate": 2.480947804742497e-05, "loss": 1.9945, "step": 17411500 }, { "epoch": 50.4, "learning_rate": 2.4808754399777697e-05, "loss": 1.9988, "step": 17412000 }, { "epoch": 50.4, "learning_rate": 2.480803364672101e-05, "loss": 1.9925, "step": 17412500 }, { "epoch": 50.4, "learning_rate": 2.480730999907373e-05, "loss": 2.0012, "step": 17413000 }, { "epoch": 50.4, "learning_rate": 2.4806586351426457e-05, "loss": 1.9747, "step": 17413500 }, { "epoch": 50.41, "learning_rate": 2.480586270377918e-05, "loss": 2.0006, "step": 17414000 }, { "epoch": 50.41, "learning_rate": 2.4805139056131902e-05, "loss": 1.9911, "step": 17414500 }, { "epoch": 50.41, "learning_rate": 2.4804415408484624e-05, "loss": 1.9953, "step": 17415000 }, { "epoch": 50.41, "learning_rate": 2.4803691760837346e-05, "loss": 2.001, "step": 17415500 }, { "epoch": 50.41, "learning_rate": 2.4802968113190072e-05, "loss": 1.9964, "step": 17416000 }, { "epoch": 50.41, "learning_rate": 2.480224591283809e-05, "loss": 1.9944, "step": 17416500 }, { "epoch": 50.42, "learning_rate": 2.4801522265190813e-05, "loss": 1.9922, "step": 17417000 }, { "epoch": 50.42, "learning_rate": 2.4800798617543535e-05, "loss": 1.9922, "step": 17417500 }, { "epoch": 50.42, "learning_rate": 2.4800074969896258e-05, "loss": 1.9827, "step": 17418000 }, { "epoch": 50.42, "learning_rate": 2.4799351322248983e-05, "loss": 1.9929, "step": 17418500 }, { "epoch": 50.42, "learning_rate": 2.4798627674601705e-05, "loss": 1.9796, "step": 17419000 }, { "epoch": 50.42, "learning_rate": 2.479790402695443e-05, "loss": 1.9813, "step": 17419500 }, { "epoch": 50.42, "learning_rate": 2.4797181826602447e-05, "loss": 2.0144, "step": 17420000 }, { "epoch": 50.43, "learning_rate": 2.479645817895517e-05, "loss": 1.9745, "step": 17420500 }, { "epoch": 50.43, "learning_rate": 2.4795734531307894e-05, "loss": 2.0111, "step": 17421000 }, { "epoch": 50.43, "learning_rate": 2.4795010883660617e-05, "loss": 1.9904, "step": 17421500 }, { "epoch": 50.43, "learning_rate": 2.479428723601334e-05, "loss": 1.9971, "step": 17422000 }, { "epoch": 50.43, "learning_rate": 2.479356358836606e-05, "loss": 1.9603, "step": 17422500 }, { "epoch": 50.43, "learning_rate": 2.4792839940718787e-05, "loss": 2.0042, "step": 17423000 }, { "epoch": 50.43, "learning_rate": 2.479211629307151e-05, "loss": 2.0064, "step": 17423500 }, { "epoch": 50.44, "learning_rate": 2.4791392645424235e-05, "loss": 1.9844, "step": 17424000 }, { "epoch": 50.44, "learning_rate": 2.4790668997776957e-05, "loss": 1.9765, "step": 17424500 }, { "epoch": 50.44, "learning_rate": 2.478994535012968e-05, "loss": 1.9938, "step": 17425000 }, { "epoch": 50.44, "learning_rate": 2.4789223149777695e-05, "loss": 1.982, "step": 17425500 }, { "epoch": 50.44, "learning_rate": 2.478849950213042e-05, "loss": 1.9949, "step": 17426000 }, { "epoch": 50.44, "learning_rate": 2.4787775854483142e-05, "loss": 2.0092, "step": 17426500 }, { "epoch": 50.44, "learning_rate": 2.4787052206835868e-05, "loss": 2.0001, "step": 17427000 }, { "epoch": 50.45, "learning_rate": 2.478632855918859e-05, "loss": 1.9664, "step": 17427500 }, { "epoch": 50.45, "learning_rate": 2.4785604911541313e-05, "loss": 2.0078, "step": 17428000 }, { "epoch": 50.45, "learning_rate": 2.4784881263894035e-05, "loss": 1.9871, "step": 17428500 }, { "epoch": 50.45, "learning_rate": 2.4784157616246757e-05, "loss": 1.9873, "step": 17429000 }, { "epoch": 50.45, "learning_rate": 2.4783435415894776e-05, "loss": 1.9728, "step": 17429500 }, { "epoch": 50.45, "learning_rate": 2.4782711768247498e-05, "loss": 1.9873, "step": 17430000 }, { "epoch": 50.45, "learning_rate": 2.4781988120600224e-05, "loss": 1.9805, "step": 17430500 }, { "epoch": 50.46, "learning_rate": 2.4781264472952946e-05, "loss": 1.9878, "step": 17431000 }, { "epoch": 50.46, "learning_rate": 2.478054082530567e-05, "loss": 1.9835, "step": 17431500 }, { "epoch": 50.46, "learning_rate": 2.4779818624953687e-05, "loss": 1.984, "step": 17432000 }, { "epoch": 50.46, "learning_rate": 2.477909497730641e-05, "loss": 1.9968, "step": 17432500 }, { "epoch": 50.46, "learning_rate": 2.4778371329659135e-05, "loss": 2.0091, "step": 17433000 }, { "epoch": 50.46, "learning_rate": 2.4777647682011857e-05, "loss": 1.976, "step": 17433500 }, { "epoch": 50.46, "learning_rate": 2.4776925481659873e-05, "loss": 2.0087, "step": 17434000 }, { "epoch": 50.47, "learning_rate": 2.47762018340126e-05, "loss": 1.9791, "step": 17434500 }, { "epoch": 50.47, "learning_rate": 2.477547818636532e-05, "loss": 1.9951, "step": 17435000 }, { "epoch": 50.47, "learning_rate": 2.4774754538718046e-05, "loss": 2.0234, "step": 17435500 }, { "epoch": 50.47, "learning_rate": 2.4774032338366062e-05, "loss": 2.0091, "step": 17436000 }, { "epoch": 50.47, "learning_rate": 2.477331013801408e-05, "loss": 1.9887, "step": 17436500 }, { "epoch": 50.47, "learning_rate": 2.4772586490366803e-05, "loss": 1.9927, "step": 17437000 }, { "epoch": 50.47, "learning_rate": 2.4771862842719525e-05, "loss": 1.9992, "step": 17437500 }, { "epoch": 50.48, "learning_rate": 2.4771139195072248e-05, "loss": 2.0106, "step": 17438000 }, { "epoch": 50.48, "learning_rate": 2.4770415547424973e-05, "loss": 1.9927, "step": 17438500 }, { "epoch": 50.48, "learning_rate": 2.4769694794368285e-05, "loss": 2.0006, "step": 17439000 }, { "epoch": 50.48, "learning_rate": 2.4768972594016304e-05, "loss": 2.0174, "step": 17439500 }, { "epoch": 50.48, "learning_rate": 2.4768250393664323e-05, "loss": 1.9971, "step": 17440000 }, { "epoch": 50.48, "learning_rate": 2.4767526746017046e-05, "loss": 1.9879, "step": 17440500 }, { "epoch": 50.48, "learning_rate": 2.4766803098369768e-05, "loss": 1.9943, "step": 17441000 }, { "epoch": 50.49, "learning_rate": 2.476607945072249e-05, "loss": 1.9902, "step": 17441500 }, { "epoch": 50.49, "learning_rate": 2.4765355803075212e-05, "loss": 2.0023, "step": 17442000 }, { "epoch": 50.49, "learning_rate": 2.4764632155427938e-05, "loss": 1.9824, "step": 17442500 }, { "epoch": 50.49, "learning_rate": 2.476390850778066e-05, "loss": 1.985, "step": 17443000 }, { "epoch": 50.49, "learning_rate": 2.4763184860133386e-05, "loss": 2.016, "step": 17443500 }, { "epoch": 50.49, "learning_rate": 2.4762461212486108e-05, "loss": 2.0225, "step": 17444000 }, { "epoch": 50.49, "learning_rate": 2.476173756483883e-05, "loss": 1.9983, "step": 17444500 }, { "epoch": 50.5, "learning_rate": 2.4761013917191552e-05, "loss": 1.9952, "step": 17445000 }, { "epoch": 50.5, "learning_rate": 2.4760290269544275e-05, "loss": 2.0148, "step": 17445500 }, { "epoch": 50.5, "learning_rate": 2.4759566621897e-05, "loss": 1.9865, "step": 17446000 }, { "epoch": 50.5, "learning_rate": 2.4758842974249723e-05, "loss": 2.0025, "step": 17446500 }, { "epoch": 50.5, "learning_rate": 2.4758119326602448e-05, "loss": 2.0154, "step": 17447000 }, { "epoch": 50.5, "learning_rate": 2.475739567895517e-05, "loss": 1.9968, "step": 17447500 }, { "epoch": 50.5, "learning_rate": 2.4756672031307893e-05, "loss": 1.9791, "step": 17448000 }, { "epoch": 50.51, "learning_rate": 2.4755948383660615e-05, "loss": 1.9893, "step": 17448500 }, { "epoch": 50.51, "learning_rate": 2.4755226183308634e-05, "loss": 2.0045, "step": 17449000 }, { "epoch": 50.51, "learning_rate": 2.475450253566136e-05, "loss": 2.0072, "step": 17449500 }, { "epoch": 50.51, "learning_rate": 2.4753780335309375e-05, "loss": 1.9908, "step": 17450000 }, { "epoch": 50.51, "learning_rate": 2.47530566876621e-05, "loss": 1.9955, "step": 17450500 }, { "epoch": 50.51, "learning_rate": 2.4752333040014823e-05, "loss": 1.9845, "step": 17451000 }, { "epoch": 50.51, "learning_rate": 2.4751609392367545e-05, "loss": 2.0091, "step": 17451500 }, { "epoch": 50.52, "learning_rate": 2.4750885744720267e-05, "loss": 2.0017, "step": 17452000 }, { "epoch": 50.52, "learning_rate": 2.475016209707299e-05, "loss": 1.9792, "step": 17452500 }, { "epoch": 50.52, "learning_rate": 2.4749438449425712e-05, "loss": 2.0255, "step": 17453000 }, { "epoch": 50.52, "learning_rate": 2.4748714801778437e-05, "loss": 1.9892, "step": 17453500 }, { "epoch": 50.52, "learning_rate": 2.4747991154131163e-05, "loss": 2.0207, "step": 17454000 }, { "epoch": 50.52, "learning_rate": 2.4747267506483885e-05, "loss": 1.9995, "step": 17454500 }, { "epoch": 50.53, "learning_rate": 2.4746543858836607e-05, "loss": 1.9952, "step": 17455000 }, { "epoch": 50.53, "learning_rate": 2.474582021118933e-05, "loss": 1.9979, "step": 17455500 }, { "epoch": 50.53, "learning_rate": 2.4745096563542052e-05, "loss": 1.9859, "step": 17456000 }, { "epoch": 50.53, "learning_rate": 2.4744372915894774e-05, "loss": 1.9939, "step": 17456500 }, { "epoch": 50.53, "learning_rate": 2.474365216283809e-05, "loss": 1.9905, "step": 17457000 }, { "epoch": 50.53, "learning_rate": 2.4742928515190812e-05, "loss": 1.9933, "step": 17457500 }, { "epoch": 50.53, "learning_rate": 2.4742204867543538e-05, "loss": 2.001, "step": 17458000 }, { "epoch": 50.54, "learning_rate": 2.474148121989626e-05, "loss": 1.9928, "step": 17458500 }, { "epoch": 50.54, "learning_rate": 2.4740757572248982e-05, "loss": 1.9765, "step": 17459000 }, { "epoch": 50.54, "learning_rate": 2.4740033924601704e-05, "loss": 2.002, "step": 17459500 }, { "epoch": 50.54, "learning_rate": 2.4739310276954427e-05, "loss": 1.9911, "step": 17460000 }, { "epoch": 50.54, "learning_rate": 2.4738586629307152e-05, "loss": 1.9832, "step": 17460500 }, { "epoch": 50.54, "learning_rate": 2.4737862981659874e-05, "loss": 2.0018, "step": 17461000 }, { "epoch": 50.54, "learning_rate": 2.47371393340126e-05, "loss": 1.9946, "step": 17461500 }, { "epoch": 50.55, "learning_rate": 2.4736415686365322e-05, "loss": 1.9628, "step": 17462000 }, { "epoch": 50.55, "learning_rate": 2.4735692038718045e-05, "loss": 1.9984, "step": 17462500 }, { "epoch": 50.55, "learning_rate": 2.4734969838366063e-05, "loss": 2.0152, "step": 17463000 }, { "epoch": 50.55, "learning_rate": 2.4734246190718786e-05, "loss": 1.9665, "step": 17463500 }, { "epoch": 50.55, "learning_rate": 2.4733522543071508e-05, "loss": 1.9968, "step": 17464000 }, { "epoch": 50.55, "learning_rate": 2.4732800342719527e-05, "loss": 2.0006, "step": 17464500 }, { "epoch": 50.55, "learning_rate": 2.4732076695072253e-05, "loss": 1.9942, "step": 17465000 }, { "epoch": 50.56, "learning_rate": 2.4731353047424975e-05, "loss": 2.0033, "step": 17465500 }, { "epoch": 50.56, "learning_rate": 2.4730629399777697e-05, "loss": 1.9885, "step": 17466000 }, { "epoch": 50.56, "learning_rate": 2.472990575213042e-05, "loss": 1.9931, "step": 17466500 }, { "epoch": 50.56, "learning_rate": 2.4729183551778438e-05, "loss": 2.0058, "step": 17467000 }, { "epoch": 50.56, "learning_rate": 2.472845990413116e-05, "loss": 2.009, "step": 17467500 }, { "epoch": 50.56, "learning_rate": 2.4727736256483886e-05, "loss": 1.9898, "step": 17468000 }, { "epoch": 50.56, "learning_rate": 2.4727012608836608e-05, "loss": 2.0043, "step": 17468500 }, { "epoch": 50.57, "learning_rate": 2.472628896118933e-05, "loss": 1.9885, "step": 17469000 }, { "epoch": 50.57, "learning_rate": 2.472556676083735e-05, "loss": 2.0047, "step": 17469500 }, { "epoch": 50.57, "learning_rate": 2.472484311319007e-05, "loss": 2.007, "step": 17470000 }, { "epoch": 50.57, "learning_rate": 2.4724119465542794e-05, "loss": 1.9803, "step": 17470500 }, { "epoch": 50.57, "learning_rate": 2.4723395817895516e-05, "loss": 1.9892, "step": 17471000 }, { "epoch": 50.57, "learning_rate": 2.472267217024824e-05, "loss": 1.9896, "step": 17471500 }, { "epoch": 50.57, "learning_rate": 2.472195141719155e-05, "loss": 1.9931, "step": 17472000 }, { "epoch": 50.58, "learning_rate": 2.4721227769544276e-05, "loss": 1.9777, "step": 17472500 }, { "epoch": 50.58, "learning_rate": 2.4720504121897002e-05, "loss": 1.97, "step": 17473000 }, { "epoch": 50.58, "learning_rate": 2.4719780474249724e-05, "loss": 1.9771, "step": 17473500 }, { "epoch": 50.58, "learning_rate": 2.4719056826602446e-05, "loss": 1.994, "step": 17474000 }, { "epoch": 50.58, "learning_rate": 2.471833317895517e-05, "loss": 1.9899, "step": 17474500 }, { "epoch": 50.58, "learning_rate": 2.471760953130789e-05, "loss": 1.9945, "step": 17475000 }, { "epoch": 50.58, "learning_rate": 2.4716885883660616e-05, "loss": 1.9872, "step": 17475500 }, { "epoch": 50.59, "learning_rate": 2.4716162236013342e-05, "loss": 1.9854, "step": 17476000 }, { "epoch": 50.59, "learning_rate": 2.4715438588366064e-05, "loss": 2.0142, "step": 17476500 }, { "epoch": 50.59, "learning_rate": 2.4714714940718787e-05, "loss": 2.0106, "step": 17477000 }, { "epoch": 50.59, "learning_rate": 2.471399129307151e-05, "loss": 2.0082, "step": 17477500 }, { "epoch": 50.59, "learning_rate": 2.471326764542423e-05, "loss": 1.9982, "step": 17478000 }, { "epoch": 50.59, "learning_rate": 2.4712543997776953e-05, "loss": 1.9827, "step": 17478500 }, { "epoch": 50.59, "learning_rate": 2.4711823244720265e-05, "loss": 1.998, "step": 17479000 }, { "epoch": 50.6, "learning_rate": 2.471109959707299e-05, "loss": 1.9942, "step": 17479500 }, { "epoch": 50.6, "learning_rate": 2.4710375949425717e-05, "loss": 1.9935, "step": 17480000 }, { "epoch": 50.6, "learning_rate": 2.470965230177844e-05, "loss": 2.0113, "step": 17480500 }, { "epoch": 50.6, "learning_rate": 2.470892865413116e-05, "loss": 1.9715, "step": 17481000 }, { "epoch": 50.6, "learning_rate": 2.4708205006483883e-05, "loss": 2.0057, "step": 17481500 }, { "epoch": 50.6, "learning_rate": 2.4707481358836606e-05, "loss": 1.9884, "step": 17482000 }, { "epoch": 50.6, "learning_rate": 2.4706757711189328e-05, "loss": 1.9807, "step": 17482500 }, { "epoch": 50.61, "learning_rate": 2.4706034063542054e-05, "loss": 1.9627, "step": 17483000 }, { "epoch": 50.61, "learning_rate": 2.4705311863190072e-05, "loss": 2.0058, "step": 17483500 }, { "epoch": 50.61, "learning_rate": 2.4704588215542795e-05, "loss": 2.0196, "step": 17484000 }, { "epoch": 50.61, "learning_rate": 2.4703864567895517e-05, "loss": 1.9879, "step": 17484500 }, { "epoch": 50.61, "learning_rate": 2.4703140920248243e-05, "loss": 1.984, "step": 17485000 }, { "epoch": 50.61, "learning_rate": 2.4702417272600965e-05, "loss": 1.9933, "step": 17485500 }, { "epoch": 50.61, "learning_rate": 2.470169507224898e-05, "loss": 1.9807, "step": 17486000 }, { "epoch": 50.62, "learning_rate": 2.4700971424601706e-05, "loss": 1.9985, "step": 17486500 }, { "epoch": 50.62, "learning_rate": 2.470024777695443e-05, "loss": 2.0027, "step": 17487000 }, { "epoch": 50.62, "learning_rate": 2.4699524129307154e-05, "loss": 1.9925, "step": 17487500 }, { "epoch": 50.62, "learning_rate": 2.4698800481659876e-05, "loss": 1.9926, "step": 17488000 }, { "epoch": 50.62, "learning_rate": 2.469807828130789e-05, "loss": 2.0012, "step": 17488500 }, { "epoch": 50.62, "learning_rate": 2.469735608095591e-05, "loss": 1.9997, "step": 17489000 }, { "epoch": 50.62, "learning_rate": 2.4696632433308633e-05, "loss": 2.0088, "step": 17489500 }, { "epoch": 50.63, "learning_rate": 2.4695908785661355e-05, "loss": 1.9827, "step": 17490000 }, { "epoch": 50.63, "learning_rate": 2.469518513801408e-05, "loss": 2.0116, "step": 17490500 }, { "epoch": 50.63, "learning_rate": 2.4694461490366806e-05, "loss": 1.9905, "step": 17491000 }, { "epoch": 50.63, "learning_rate": 2.469373784271953e-05, "loss": 2.0062, "step": 17491500 }, { "epoch": 50.63, "learning_rate": 2.469301419507225e-05, "loss": 1.9968, "step": 17492000 }, { "epoch": 50.63, "learning_rate": 2.4692290547424973e-05, "loss": 2.0161, "step": 17492500 }, { "epoch": 50.64, "learning_rate": 2.4691566899777695e-05, "loss": 1.9858, "step": 17493000 }, { "epoch": 50.64, "learning_rate": 2.4690846146721007e-05, "loss": 2.0133, "step": 17493500 }, { "epoch": 50.64, "learning_rate": 2.469012249907373e-05, "loss": 2.0, "step": 17494000 }, { "epoch": 50.64, "learning_rate": 2.4689398851426455e-05, "loss": 1.9779, "step": 17494500 }, { "epoch": 50.64, "learning_rate": 2.468867520377918e-05, "loss": 1.9836, "step": 17495000 }, { "epoch": 50.64, "learning_rate": 2.4687951556131903e-05, "loss": 2.0027, "step": 17495500 }, { "epoch": 50.64, "learning_rate": 2.4687227908484625e-05, "loss": 1.9955, "step": 17496000 }, { "epoch": 50.65, "learning_rate": 2.4686504260837348e-05, "loss": 2.0085, "step": 17496500 }, { "epoch": 50.65, "learning_rate": 2.4685782060485367e-05, "loss": 2.0006, "step": 17497000 }, { "epoch": 50.65, "learning_rate": 2.468505841283809e-05, "loss": 2.0057, "step": 17497500 }, { "epoch": 50.65, "learning_rate": 2.468433476519081e-05, "loss": 1.9994, "step": 17498000 }, { "epoch": 50.65, "learning_rate": 2.4683611117543537e-05, "loss": 2.0058, "step": 17498500 }, { "epoch": 50.65, "learning_rate": 2.468288746989626e-05, "loss": 1.9991, "step": 17499000 }, { "epoch": 50.65, "learning_rate": 2.468216382224898e-05, "loss": 2.0089, "step": 17499500 }, { "epoch": 50.66, "learning_rate": 2.4681440174601707e-05, "loss": 1.9931, "step": 17500000 }, { "epoch": 50.66, "learning_rate": 2.4680717974249722e-05, "loss": 2.0089, "step": 17500500 }, { "epoch": 50.66, "learning_rate": 2.4679994326602445e-05, "loss": 2.0205, "step": 17501000 }, { "epoch": 50.66, "learning_rate": 2.467927067895517e-05, "loss": 1.9868, "step": 17501500 }, { "epoch": 50.66, "learning_rate": 2.4678547031307896e-05, "loss": 1.9947, "step": 17502000 }, { "epoch": 50.66, "learning_rate": 2.4677823383660618e-05, "loss": 2.0023, "step": 17502500 }, { "epoch": 50.66, "learning_rate": 2.467709973601334e-05, "loss": 2.0296, "step": 17503000 }, { "epoch": 50.67, "learning_rate": 2.4676377535661356e-05, "loss": 1.9838, "step": 17503500 }, { "epoch": 50.67, "learning_rate": 2.467565388801408e-05, "loss": 1.9932, "step": 17504000 }, { "epoch": 50.67, "learning_rate": 2.4674930240366804e-05, "loss": 1.9686, "step": 17504500 }, { "epoch": 50.67, "learning_rate": 2.4674206592719526e-05, "loss": 2.0352, "step": 17505000 }, { "epoch": 50.67, "learning_rate": 2.4673484392367545e-05, "loss": 1.99, "step": 17505500 }, { "epoch": 50.67, "learning_rate": 2.4672762192015564e-05, "loss": 2.0125, "step": 17506000 }, { "epoch": 50.67, "learning_rate": 2.4672038544368286e-05, "loss": 1.9879, "step": 17506500 }, { "epoch": 50.68, "learning_rate": 2.4671314896721008e-05, "loss": 1.977, "step": 17507000 }, { "epoch": 50.68, "learning_rate": 2.4670591249073734e-05, "loss": 2.0234, "step": 17507500 }, { "epoch": 50.68, "learning_rate": 2.4669867601426456e-05, "loss": 2.0016, "step": 17508000 }, { "epoch": 50.68, "learning_rate": 2.466914395377918e-05, "loss": 1.9841, "step": 17508500 }, { "epoch": 50.68, "learning_rate": 2.4668421753427194e-05, "loss": 1.9496, "step": 17509000 }, { "epoch": 50.68, "learning_rate": 2.466769810577992e-05, "loss": 1.9724, "step": 17509500 }, { "epoch": 50.68, "learning_rate": 2.4666974458132645e-05, "loss": 2.005, "step": 17510000 }, { "epoch": 50.69, "learning_rate": 2.466625225778066e-05, "loss": 1.9969, "step": 17510500 }, { "epoch": 50.69, "learning_rate": 2.466553005742868e-05, "loss": 2.0198, "step": 17511000 }, { "epoch": 50.69, "learning_rate": 2.4664806409781402e-05, "loss": 1.9979, "step": 17511500 }, { "epoch": 50.69, "learning_rate": 2.4664082762134124e-05, "loss": 2.0014, "step": 17512000 }, { "epoch": 50.69, "learning_rate": 2.4663359114486846e-05, "loss": 1.9969, "step": 17512500 }, { "epoch": 50.69, "learning_rate": 2.4662635466839572e-05, "loss": 1.9972, "step": 17513000 }, { "epoch": 50.69, "learning_rate": 2.4661911819192298e-05, "loss": 1.984, "step": 17513500 }, { "epoch": 50.7, "learning_rate": 2.466118817154502e-05, "loss": 1.9894, "step": 17514000 }, { "epoch": 50.7, "learning_rate": 2.4660464523897742e-05, "loss": 1.9843, "step": 17514500 }, { "epoch": 50.7, "learning_rate": 2.4659740876250464e-05, "loss": 1.9978, "step": 17515000 }, { "epoch": 50.7, "learning_rate": 2.4659017228603187e-05, "loss": 1.9942, "step": 17515500 }, { "epoch": 50.7, "learning_rate": 2.465829358095591e-05, "loss": 2.0056, "step": 17516000 }, { "epoch": 50.7, "learning_rate": 2.4657569933308634e-05, "loss": 2.0077, "step": 17516500 }, { "epoch": 50.7, "learning_rate": 2.465684628566136e-05, "loss": 2.0216, "step": 17517000 }, { "epoch": 50.71, "learning_rate": 2.4656124085309376e-05, "loss": 2.0021, "step": 17517500 }, { "epoch": 50.71, "learning_rate": 2.4655400437662098e-05, "loss": 1.9934, "step": 17518000 }, { "epoch": 50.71, "learning_rate": 2.4654678237310117e-05, "loss": 2.0218, "step": 17518500 }, { "epoch": 50.71, "learning_rate": 2.465395458966284e-05, "loss": 2.0018, "step": 17519000 }, { "epoch": 50.71, "learning_rate": 2.465323094201556e-05, "loss": 1.9822, "step": 17519500 }, { "epoch": 50.71, "learning_rate": 2.4652507294368283e-05, "loss": 1.9795, "step": 17520000 }, { "epoch": 50.71, "learning_rate": 2.465178364672101e-05, "loss": 2.0216, "step": 17520500 }, { "epoch": 50.72, "learning_rate": 2.4651059999073735e-05, "loss": 1.9912, "step": 17521000 }, { "epoch": 50.72, "learning_rate": 2.465033779872175e-05, "loss": 1.9999, "step": 17521500 }, { "epoch": 50.72, "learning_rate": 2.4649614151074472e-05, "loss": 2.004, "step": 17522000 }, { "epoch": 50.72, "learning_rate": 2.4648890503427198e-05, "loss": 1.9926, "step": 17522500 }, { "epoch": 50.72, "learning_rate": 2.464816685577992e-05, "loss": 2.0017, "step": 17523000 }, { "epoch": 50.72, "learning_rate": 2.4647444655427936e-05, "loss": 2.0184, "step": 17523500 }, { "epoch": 50.72, "learning_rate": 2.464672100778066e-05, "loss": 2.0029, "step": 17524000 }, { "epoch": 50.73, "learning_rate": 2.4645997360133384e-05, "loss": 2.0148, "step": 17524500 }, { "epoch": 50.73, "learning_rate": 2.464527371248611e-05, "loss": 2.0069, "step": 17525000 }, { "epoch": 50.73, "learning_rate": 2.4644551512134125e-05, "loss": 1.9907, "step": 17525500 }, { "epoch": 50.73, "learning_rate": 2.4643827864486847e-05, "loss": 2.0446, "step": 17526000 }, { "epoch": 50.73, "learning_rate": 2.4643104216839573e-05, "loss": 1.9987, "step": 17526500 }, { "epoch": 50.73, "learning_rate": 2.4642380569192295e-05, "loss": 2.0077, "step": 17527000 }, { "epoch": 50.73, "learning_rate": 2.4641656921545017e-05, "loss": 2.0126, "step": 17527500 }, { "epoch": 50.74, "learning_rate": 2.464093327389774e-05, "loss": 2.0036, "step": 17528000 }, { "epoch": 50.74, "learning_rate": 2.4640209626250465e-05, "loss": 1.9733, "step": 17528500 }, { "epoch": 50.74, "learning_rate": 2.4639485978603187e-05, "loss": 1.9847, "step": 17529000 }, { "epoch": 50.74, "learning_rate": 2.4638762330955913e-05, "loss": 1.9902, "step": 17529500 }, { "epoch": 50.74, "learning_rate": 2.4638038683308635e-05, "loss": 2.0017, "step": 17530000 }, { "epoch": 50.74, "learning_rate": 2.4637315035661357e-05, "loss": 1.9753, "step": 17530500 }, { "epoch": 50.75, "learning_rate": 2.463659138801408e-05, "loss": 1.9906, "step": 17531000 }, { "epoch": 50.75, "learning_rate": 2.46358691876621e-05, "loss": 1.9932, "step": 17531500 }, { "epoch": 50.75, "learning_rate": 2.463514554001482e-05, "loss": 1.9649, "step": 17532000 }, { "epoch": 50.75, "learning_rate": 2.4634421892367546e-05, "loss": 2.0006, "step": 17532500 }, { "epoch": 50.75, "learning_rate": 2.463369824472027e-05, "loss": 1.9979, "step": 17533000 }, { "epoch": 50.75, "learning_rate": 2.463297459707299e-05, "loss": 2.0129, "step": 17533500 }, { "epoch": 50.75, "learning_rate": 2.4632250949425713e-05, "loss": 1.9912, "step": 17534000 }, { "epoch": 50.76, "learning_rate": 2.4631527301778435e-05, "loss": 1.991, "step": 17534500 }, { "epoch": 50.76, "learning_rate": 2.463080365413116e-05, "loss": 1.9867, "step": 17535000 }, { "epoch": 50.76, "learning_rate": 2.4630080006483887e-05, "loss": 2.0029, "step": 17535500 }, { "epoch": 50.76, "learning_rate": 2.462935635883661e-05, "loss": 2.0012, "step": 17536000 }, { "epoch": 50.76, "learning_rate": 2.4628634158484624e-05, "loss": 1.9926, "step": 17536500 }, { "epoch": 50.76, "learning_rate": 2.462791051083735e-05, "loss": 2.0055, "step": 17537000 }, { "epoch": 50.76, "learning_rate": 2.4627186863190072e-05, "loss": 2.0242, "step": 17537500 }, { "epoch": 50.77, "learning_rate": 2.4626463215542794e-05, "loss": 1.988, "step": 17538000 }, { "epoch": 50.77, "learning_rate": 2.4625739567895517e-05, "loss": 2.0085, "step": 17538500 }, { "epoch": 50.77, "learning_rate": 2.4625017367543536e-05, "loss": 2.0176, "step": 17539000 }, { "epoch": 50.77, "learning_rate": 2.462429516719155e-05, "loss": 2.0005, "step": 17539500 }, { "epoch": 50.77, "learning_rate": 2.4623571519544277e-05, "loss": 1.9808, "step": 17540000 }, { "epoch": 50.77, "learning_rate": 2.4622847871897e-05, "loss": 1.9914, "step": 17540500 }, { "epoch": 50.77, "learning_rate": 2.4622124224249725e-05, "loss": 2.0064, "step": 17541000 }, { "epoch": 50.78, "learning_rate": 2.4621400576602447e-05, "loss": 1.9927, "step": 17541500 }, { "epoch": 50.78, "learning_rate": 2.462067692895517e-05, "loss": 2.0006, "step": 17542000 }, { "epoch": 50.78, "learning_rate": 2.461995328130789e-05, "loss": 1.9874, "step": 17542500 }, { "epoch": 50.78, "learning_rate": 2.4619229633660614e-05, "loss": 2.0076, "step": 17543000 }, { "epoch": 50.78, "learning_rate": 2.4618507433308636e-05, "loss": 1.9975, "step": 17543500 }, { "epoch": 50.78, "learning_rate": 2.4617783785661358e-05, "loss": 1.9847, "step": 17544000 }, { "epoch": 50.78, "learning_rate": 2.4617061585309377e-05, "loss": 2.0011, "step": 17544500 }, { "epoch": 50.79, "learning_rate": 2.46163379376621e-05, "loss": 1.9955, "step": 17545000 }, { "epoch": 50.79, "learning_rate": 2.461561429001482e-05, "loss": 1.9921, "step": 17545500 }, { "epoch": 50.79, "learning_rate": 2.4614890642367544e-05, "loss": 2.0053, "step": 17546000 }, { "epoch": 50.79, "learning_rate": 2.4614166994720266e-05, "loss": 1.9896, "step": 17546500 }, { "epoch": 50.79, "learning_rate": 2.4613444794368285e-05, "loss": 2.0138, "step": 17547000 }, { "epoch": 50.79, "learning_rate": 2.461272114672101e-05, "loss": 1.9994, "step": 17547500 }, { "epoch": 50.79, "learning_rate": 2.4611997499073733e-05, "loss": 2.0149, "step": 17548000 }, { "epoch": 50.8, "learning_rate": 2.4611273851426455e-05, "loss": 2.0131, "step": 17548500 }, { "epoch": 50.8, "learning_rate": 2.4610550203779177e-05, "loss": 1.9967, "step": 17549000 }, { "epoch": 50.8, "learning_rate": 2.4609826556131903e-05, "loss": 2.0265, "step": 17549500 }, { "epoch": 50.8, "learning_rate": 2.4609102908484625e-05, "loss": 1.9856, "step": 17550000 }, { "epoch": 50.8, "learning_rate": 2.460838070813264e-05, "loss": 2.001, "step": 17550500 }, { "epoch": 50.8, "learning_rate": 2.4607657060485366e-05, "loss": 1.994, "step": 17551000 }, { "epoch": 50.8, "learning_rate": 2.460693341283809e-05, "loss": 1.9973, "step": 17551500 }, { "epoch": 50.81, "learning_rate": 2.4606211212486108e-05, "loss": 1.983, "step": 17552000 }, { "epoch": 50.81, "learning_rate": 2.460548756483883e-05, "loss": 1.978, "step": 17552500 }, { "epoch": 50.81, "learning_rate": 2.4604763917191552e-05, "loss": 2.0205, "step": 17553000 }, { "epoch": 50.81, "learning_rate": 2.4604040269544278e-05, "loss": 1.9772, "step": 17553500 }, { "epoch": 50.81, "learning_rate": 2.4603316621897e-05, "loss": 1.9826, "step": 17554000 }, { "epoch": 50.81, "learning_rate": 2.4602592974249725e-05, "loss": 2.0124, "step": 17554500 }, { "epoch": 50.81, "learning_rate": 2.4601869326602448e-05, "loss": 1.9836, "step": 17555000 }, { "epoch": 50.82, "learning_rate": 2.460114567895517e-05, "loss": 1.9941, "step": 17555500 }, { "epoch": 50.82, "learning_rate": 2.4600422031307892e-05, "loss": 2.0124, "step": 17556000 }, { "epoch": 50.82, "learning_rate": 2.459969983095591e-05, "loss": 2.0018, "step": 17556500 }, { "epoch": 50.82, "learning_rate": 2.4598976183308633e-05, "loss": 2.0123, "step": 17557000 }, { "epoch": 50.82, "learning_rate": 2.4598252535661356e-05, "loss": 2.023, "step": 17557500 }, { "epoch": 50.82, "learning_rate": 2.4597528888014078e-05, "loss": 1.9853, "step": 17558000 }, { "epoch": 50.82, "learning_rate": 2.4596805240366803e-05, "loss": 2.0241, "step": 17558500 }, { "epoch": 50.83, "learning_rate": 2.459608159271953e-05, "loss": 1.983, "step": 17559000 }, { "epoch": 50.83, "learning_rate": 2.459535794507225e-05, "loss": 2.0119, "step": 17559500 }, { "epoch": 50.83, "learning_rate": 2.4594634297424974e-05, "loss": 1.994, "step": 17560000 }, { "epoch": 50.83, "learning_rate": 2.4593910649777696e-05, "loss": 1.9932, "step": 17560500 }, { "epoch": 50.83, "learning_rate": 2.4593187002130418e-05, "loss": 1.9749, "step": 17561000 }, { "epoch": 50.83, "learning_rate": 2.4592464801778437e-05, "loss": 1.9964, "step": 17561500 }, { "epoch": 50.83, "learning_rate": 2.4591741154131163e-05, "loss": 1.9923, "step": 17562000 }, { "epoch": 50.84, "learning_rate": 2.4591017506483885e-05, "loss": 1.9847, "step": 17562500 }, { "epoch": 50.84, "learning_rate": 2.4590293858836607e-05, "loss": 1.9737, "step": 17563000 }, { "epoch": 50.84, "learning_rate": 2.458957021118933e-05, "loss": 1.9936, "step": 17563500 }, { "epoch": 50.84, "learning_rate": 2.4588846563542055e-05, "loss": 2.0146, "step": 17564000 }, { "epoch": 50.84, "learning_rate": 2.4588122915894777e-05, "loss": 1.9861, "step": 17564500 }, { "epoch": 50.84, "learning_rate": 2.4587400715542793e-05, "loss": 2.015, "step": 17565000 }, { "epoch": 50.84, "learning_rate": 2.4586679962486105e-05, "loss": 2.0106, "step": 17565500 }, { "epoch": 50.85, "learning_rate": 2.4585957762134124e-05, "loss": 1.9886, "step": 17566000 }, { "epoch": 50.85, "learning_rate": 2.458523411448685e-05, "loss": 2.0031, "step": 17566500 }, { "epoch": 50.85, "learning_rate": 2.4584510466839572e-05, "loss": 2.0058, "step": 17567000 }, { "epoch": 50.85, "learning_rate": 2.4583786819192294e-05, "loss": 1.9887, "step": 17567500 }, { "epoch": 50.85, "learning_rate": 2.4583063171545016e-05, "loss": 2.0092, "step": 17568000 }, { "epoch": 50.85, "learning_rate": 2.4582339523897742e-05, "loss": 1.9855, "step": 17568500 }, { "epoch": 50.86, "learning_rate": 2.4581615876250464e-05, "loss": 1.9857, "step": 17569000 }, { "epoch": 50.86, "learning_rate": 2.4580892228603186e-05, "loss": 1.995, "step": 17569500 }, { "epoch": 50.86, "learning_rate": 2.4580168580955912e-05, "loss": 2.0041, "step": 17570000 }, { "epoch": 50.86, "learning_rate": 2.4579444933308634e-05, "loss": 2.0195, "step": 17570500 }, { "epoch": 50.86, "learning_rate": 2.4578721285661356e-05, "loss": 1.9652, "step": 17571000 }, { "epoch": 50.86, "learning_rate": 2.457799763801408e-05, "loss": 1.9895, "step": 17571500 }, { "epoch": 50.86, "learning_rate": 2.4577273990366804e-05, "loss": 1.9836, "step": 17572000 }, { "epoch": 50.87, "learning_rate": 2.4576550342719526e-05, "loss": 2.0032, "step": 17572500 }, { "epoch": 50.87, "learning_rate": 2.457582669507225e-05, "loss": 1.9937, "step": 17573000 }, { "epoch": 50.87, "learning_rate": 2.4575104494720268e-05, "loss": 2.0251, "step": 17573500 }, { "epoch": 50.87, "learning_rate": 2.4574382294368287e-05, "loss": 2.0081, "step": 17574000 }, { "epoch": 50.87, "learning_rate": 2.4573660094016306e-05, "loss": 2.0161, "step": 17574500 }, { "epoch": 50.87, "learning_rate": 2.4572936446369028e-05, "loss": 1.9937, "step": 17575000 }, { "epoch": 50.87, "learning_rate": 2.457221279872175e-05, "loss": 2.0087, "step": 17575500 }, { "epoch": 50.88, "learning_rate": 2.4571489151074472e-05, "loss": 2.0195, "step": 17576000 }, { "epoch": 50.88, "learning_rate": 2.457076695072249e-05, "loss": 2.0174, "step": 17576500 }, { "epoch": 50.88, "learning_rate": 2.4570043303075213e-05, "loss": 2.0127, "step": 17577000 }, { "epoch": 50.88, "learning_rate": 2.456931965542794e-05, "loss": 1.9987, "step": 17577500 }, { "epoch": 50.88, "learning_rate": 2.456859600778066e-05, "loss": 2.0063, "step": 17578000 }, { "epoch": 50.88, "learning_rate": 2.4567872360133384e-05, "loss": 2.0057, "step": 17578500 }, { "epoch": 50.88, "learning_rate": 2.4567148712486106e-05, "loss": 1.9862, "step": 17579000 }, { "epoch": 50.89, "learning_rate": 2.456642506483883e-05, "loss": 1.9999, "step": 17579500 }, { "epoch": 50.89, "learning_rate": 2.4565701417191554e-05, "loss": 2.0094, "step": 17580000 }, { "epoch": 50.89, "learning_rate": 2.4564977769544276e-05, "loss": 2.0148, "step": 17580500 }, { "epoch": 50.89, "learning_rate": 2.4564254121897e-05, "loss": 1.9963, "step": 17581000 }, { "epoch": 50.89, "learning_rate": 2.456353192154502e-05, "loss": 2.0104, "step": 17581500 }, { "epoch": 50.89, "learning_rate": 2.4562808273897743e-05, "loss": 2.0072, "step": 17582000 }, { "epoch": 50.89, "learning_rate": 2.4562084626250465e-05, "loss": 2.0132, "step": 17582500 }, { "epoch": 50.9, "learning_rate": 2.4561360978603187e-05, "loss": 2.0153, "step": 17583000 }, { "epoch": 50.9, "learning_rate": 2.456063733095591e-05, "loss": 2.0013, "step": 17583500 }, { "epoch": 50.9, "learning_rate": 2.455991368330863e-05, "loss": 1.9907, "step": 17584000 }, { "epoch": 50.9, "learning_rate": 2.4559190035661357e-05, "loss": 2.0022, "step": 17584500 }, { "epoch": 50.9, "learning_rate": 2.4558466388014083e-05, "loss": 1.9937, "step": 17585000 }, { "epoch": 50.9, "learning_rate": 2.4557742740366805e-05, "loss": 2.0182, "step": 17585500 }, { "epoch": 50.9, "learning_rate": 2.4557019092719527e-05, "loss": 2.0162, "step": 17586000 }, { "epoch": 50.91, "learning_rate": 2.455629544507225e-05, "loss": 1.9928, "step": 17586500 }, { "epoch": 50.91, "learning_rate": 2.4555571797424972e-05, "loss": 2.0108, "step": 17587000 }, { "epoch": 50.91, "learning_rate": 2.455484959707299e-05, "loss": 2.0005, "step": 17587500 }, { "epoch": 50.91, "learning_rate": 2.4554127396721006e-05, "loss": 1.99, "step": 17588000 }, { "epoch": 50.91, "learning_rate": 2.4553403749073732e-05, "loss": 2.0026, "step": 17588500 }, { "epoch": 50.91, "learning_rate": 2.4552680101426457e-05, "loss": 2.0008, "step": 17589000 }, { "epoch": 50.91, "learning_rate": 2.455195645377918e-05, "loss": 1.9975, "step": 17589500 }, { "epoch": 50.92, "learning_rate": 2.4551232806131902e-05, "loss": 2.0039, "step": 17590000 }, { "epoch": 50.92, "learning_rate": 2.455051060577992e-05, "loss": 2.0302, "step": 17590500 }, { "epoch": 50.92, "learning_rate": 2.4549788405427936e-05, "loss": 2.0174, "step": 17591000 }, { "epoch": 50.92, "learning_rate": 2.454906475778066e-05, "loss": 2.0144, "step": 17591500 }, { "epoch": 50.92, "learning_rate": 2.4548341110133384e-05, "loss": 1.9996, "step": 17592000 }, { "epoch": 50.92, "learning_rate": 2.454761746248611e-05, "loss": 2.0079, "step": 17592500 }, { "epoch": 50.92, "learning_rate": 2.4546893814838832e-05, "loss": 1.9956, "step": 17593000 }, { "epoch": 50.93, "learning_rate": 2.4546170167191554e-05, "loss": 2.0057, "step": 17593500 }, { "epoch": 50.93, "learning_rate": 2.4545446519544277e-05, "loss": 2.0034, "step": 17594000 }, { "epoch": 50.93, "learning_rate": 2.4544722871897e-05, "loss": 2.0071, "step": 17594500 }, { "epoch": 50.93, "learning_rate": 2.454399922424972e-05, "loss": 2.007, "step": 17595000 }, { "epoch": 50.93, "learning_rate": 2.4543275576602447e-05, "loss": 1.9867, "step": 17595500 }, { "epoch": 50.93, "learning_rate": 2.4542551928955172e-05, "loss": 2.0045, "step": 17596000 }, { "epoch": 50.93, "learning_rate": 2.4541829728603188e-05, "loss": 2.0055, "step": 17596500 }, { "epoch": 50.94, "learning_rate": 2.454110608095591e-05, "loss": 1.9932, "step": 17597000 }, { "epoch": 50.94, "learning_rate": 2.4540382433308636e-05, "loss": 1.9901, "step": 17597500 }, { "epoch": 50.94, "learning_rate": 2.4539658785661358e-05, "loss": 2.0109, "step": 17598000 }, { "epoch": 50.94, "learning_rate": 2.4538936585309374e-05, "loss": 1.9969, "step": 17598500 }, { "epoch": 50.94, "learning_rate": 2.4538214384957392e-05, "loss": 2.0186, "step": 17599000 }, { "epoch": 50.94, "learning_rate": 2.4537490737310115e-05, "loss": 2.0254, "step": 17599500 }, { "epoch": 50.94, "learning_rate": 2.453676708966284e-05, "loss": 2.0075, "step": 17600000 }, { "epoch": 50.95, "learning_rate": 2.4536043442015563e-05, "loss": 1.981, "step": 17600500 }, { "epoch": 50.95, "learning_rate": 2.4535319794368285e-05, "loss": 1.997, "step": 17601000 }, { "epoch": 50.95, "learning_rate": 2.453459614672101e-05, "loss": 1.9839, "step": 17601500 }, { "epoch": 50.95, "learning_rate": 2.4533872499073733e-05, "loss": 1.9963, "step": 17602000 }, { "epoch": 50.95, "learning_rate": 2.4533148851426455e-05, "loss": 1.987, "step": 17602500 }, { "epoch": 50.95, "learning_rate": 2.4532425203779177e-05, "loss": 1.9847, "step": 17603000 }, { "epoch": 50.95, "learning_rate": 2.4531701556131903e-05, "loss": 2.0164, "step": 17603500 }, { "epoch": 50.96, "learning_rate": 2.4530980803075215e-05, "loss": 1.9937, "step": 17604000 }, { "epoch": 50.96, "learning_rate": 2.4530257155427937e-05, "loss": 2.0042, "step": 17604500 }, { "epoch": 50.96, "learning_rate": 2.452953350778066e-05, "loss": 1.9596, "step": 17605000 }, { "epoch": 50.96, "learning_rate": 2.4528809860133385e-05, "loss": 2.0091, "step": 17605500 }, { "epoch": 50.96, "learning_rate": 2.4528086212486107e-05, "loss": 2.0024, "step": 17606000 }, { "epoch": 50.96, "learning_rate": 2.452736256483883e-05, "loss": 2.0047, "step": 17606500 }, { "epoch": 50.97, "learning_rate": 2.4526638917191552e-05, "loss": 2.0183, "step": 17607000 }, { "epoch": 50.97, "learning_rate": 2.4525915269544277e-05, "loss": 2.0066, "step": 17607500 }, { "epoch": 50.97, "learning_rate": 2.4525193069192296e-05, "loss": 2.0064, "step": 17608000 }, { "epoch": 50.97, "learning_rate": 2.452446942154502e-05, "loss": 2.0116, "step": 17608500 }, { "epoch": 50.97, "learning_rate": 2.452374577389774e-05, "loss": 1.9679, "step": 17609000 }, { "epoch": 50.97, "learning_rate": 2.4523022126250463e-05, "loss": 1.9919, "step": 17609500 }, { "epoch": 50.97, "learning_rate": 2.4522298478603185e-05, "loss": 2.0007, "step": 17610000 }, { "epoch": 50.98, "learning_rate": 2.452157483095591e-05, "loss": 1.9881, "step": 17610500 }, { "epoch": 50.98, "learning_rate": 2.4520851183308637e-05, "loss": 1.9894, "step": 17611000 }, { "epoch": 50.98, "learning_rate": 2.452012753566136e-05, "loss": 1.997, "step": 17611500 }, { "epoch": 50.98, "learning_rate": 2.451940388801408e-05, "loss": 1.9965, "step": 17612000 }, { "epoch": 50.98, "learning_rate": 2.4518680240366803e-05, "loss": 1.9939, "step": 17612500 }, { "epoch": 50.98, "learning_rate": 2.4517956592719525e-05, "loss": 1.9864, "step": 17613000 }, { "epoch": 50.98, "learning_rate": 2.4517232945072248e-05, "loss": 1.9933, "step": 17613500 }, { "epoch": 50.99, "learning_rate": 2.4516509297424973e-05, "loss": 2.0052, "step": 17614000 }, { "epoch": 50.99, "learning_rate": 2.45157856497777e-05, "loss": 2.0182, "step": 17614500 }, { "epoch": 50.99, "learning_rate": 2.451506200213042e-05, "loss": 2.0023, "step": 17615000 }, { "epoch": 50.99, "learning_rate": 2.4514339801778437e-05, "loss": 1.99, "step": 17615500 }, { "epoch": 50.99, "learning_rate": 2.4513616154131162e-05, "loss": 1.9817, "step": 17616000 }, { "epoch": 50.99, "learning_rate": 2.4512893953779178e-05, "loss": 2.0061, "step": 17616500 }, { "epoch": 50.99, "learning_rate": 2.45121703061319e-05, "loss": 1.9763, "step": 17617000 }, { "epoch": 51.0, "learning_rate": 2.4511446658484626e-05, "loss": 2.0107, "step": 17617500 }, { "epoch": 51.0, "learning_rate": 2.4510723010837348e-05, "loss": 1.9947, "step": 17618000 }, { "epoch": 51.0, "learning_rate": 2.4509999363190074e-05, "loss": 1.9891, "step": 17618500 }, { "epoch": 51.0, "learning_rate": 2.4509275715542796e-05, "loss": 1.9701, "step": 17619000 }, { "epoch": 51.0, "eval_accuracy": 0.6732738776787731, "eval_accuracy_mlm": 0.6393918868388159, "eval_accuracy_nsp": 0.8549586833674774, "eval_loss": 2.174137830734253, "eval_runtime": 331.935, "eval_samples_per_second": 1314.673, "eval_steps_per_second": 54.779, "step": 17619072 }, { "epoch": 51.0, "learning_rate": 2.4508552067895518e-05, "loss": 1.9949, "step": 17619500 }, { "epoch": 51.0, "learning_rate": 2.450782842024824e-05, "loss": 1.9747, "step": 17620000 }, { "epoch": 51.0, "learning_rate": 2.4507104772600963e-05, "loss": 1.9979, "step": 17620500 }, { "epoch": 51.01, "learning_rate": 2.4506381124953688e-05, "loss": 1.9735, "step": 17621000 }, { "epoch": 51.01, "learning_rate": 2.4505658924601704e-05, "loss": 1.9927, "step": 17621500 }, { "epoch": 51.01, "learning_rate": 2.450493527695443e-05, "loss": 1.9978, "step": 17622000 }, { "epoch": 51.01, "learning_rate": 2.450421162930715e-05, "loss": 1.9624, "step": 17622500 }, { "epoch": 51.01, "learning_rate": 2.4503487981659877e-05, "loss": 2.0011, "step": 17623000 }, { "epoch": 51.01, "learning_rate": 2.45027643340126e-05, "loss": 1.9851, "step": 17623500 }, { "epoch": 51.01, "learning_rate": 2.4502040686365322e-05, "loss": 1.9784, "step": 17624000 }, { "epoch": 51.02, "learning_rate": 2.4501317038718044e-05, "loss": 1.9831, "step": 17624500 }, { "epoch": 51.02, "learning_rate": 2.4500596285661356e-05, "loss": 1.9701, "step": 17625000 }, { "epoch": 51.02, "learning_rate": 2.449987263801408e-05, "loss": 1.9709, "step": 17625500 }, { "epoch": 51.02, "learning_rate": 2.4499148990366804e-05, "loss": 1.9816, "step": 17626000 }, { "epoch": 51.02, "learning_rate": 2.4498425342719526e-05, "loss": 1.9544, "step": 17626500 }, { "epoch": 51.02, "learning_rate": 2.4497701695072252e-05, "loss": 1.9795, "step": 17627000 }, { "epoch": 51.02, "learning_rate": 2.4496978047424974e-05, "loss": 1.967, "step": 17627500 }, { "epoch": 51.03, "learning_rate": 2.4496254399777696e-05, "loss": 1.992, "step": 17628000 }, { "epoch": 51.03, "learning_rate": 2.449553075213042e-05, "loss": 1.9772, "step": 17628500 }, { "epoch": 51.03, "learning_rate": 2.4494808551778438e-05, "loss": 1.9825, "step": 17629000 }, { "epoch": 51.03, "learning_rate": 2.4494084904131163e-05, "loss": 1.9858, "step": 17629500 }, { "epoch": 51.03, "learning_rate": 2.4493361256483885e-05, "loss": 1.9852, "step": 17630000 }, { "epoch": 51.03, "learning_rate": 2.4492637608836608e-05, "loss": 1.9825, "step": 17630500 }, { "epoch": 51.03, "learning_rate": 2.449191396118933e-05, "loss": 1.9753, "step": 17631000 }, { "epoch": 51.04, "learning_rate": 2.4491190313542052e-05, "loss": 2.0039, "step": 17631500 }, { "epoch": 51.04, "learning_rate": 2.4490466665894778e-05, "loss": 1.9847, "step": 17632000 }, { "epoch": 51.04, "learning_rate": 2.44897430182475e-05, "loss": 1.9872, "step": 17632500 }, { "epoch": 51.04, "learning_rate": 2.4489019370600226e-05, "loss": 1.9923, "step": 17633000 }, { "epoch": 51.04, "learning_rate": 2.4488295722952948e-05, "loss": 1.9524, "step": 17633500 }, { "epoch": 51.04, "learning_rate": 2.4487573522600963e-05, "loss": 1.9673, "step": 17634000 }, { "epoch": 51.04, "learning_rate": 2.448684987495369e-05, "loss": 1.9886, "step": 17634500 }, { "epoch": 51.05, "learning_rate": 2.448612622730641e-05, "loss": 1.9651, "step": 17635000 }, { "epoch": 51.05, "learning_rate": 2.4485402579659133e-05, "loss": 1.9806, "step": 17635500 }, { "epoch": 51.05, "learning_rate": 2.4484678932011856e-05, "loss": 1.9819, "step": 17636000 }, { "epoch": 51.05, "learning_rate": 2.448395528436458e-05, "loss": 2.0005, "step": 17636500 }, { "epoch": 51.05, "learning_rate": 2.4483231636717304e-05, "loss": 1.9665, "step": 17637000 }, { "epoch": 51.05, "learning_rate": 2.448250798907003e-05, "loss": 1.9687, "step": 17637500 }, { "epoch": 51.05, "learning_rate": 2.448178434142275e-05, "loss": 1.9729, "step": 17638000 }, { "epoch": 51.06, "learning_rate": 2.4481062141070767e-05, "loss": 1.9739, "step": 17638500 }, { "epoch": 51.06, "learning_rate": 2.448033849342349e-05, "loss": 2.0044, "step": 17639000 }, { "epoch": 51.06, "learning_rate": 2.4479614845776215e-05, "loss": 1.975, "step": 17639500 }, { "epoch": 51.06, "learning_rate": 2.4478891198128937e-05, "loss": 1.984, "step": 17640000 }, { "epoch": 51.06, "learning_rate": 2.4478167550481663e-05, "loss": 1.9874, "step": 17640500 }, { "epoch": 51.06, "learning_rate": 2.4477443902834385e-05, "loss": 2.0056, "step": 17641000 }, { "epoch": 51.06, "learning_rate": 2.4476720255187107e-05, "loss": 1.984, "step": 17641500 }, { "epoch": 51.07, "learning_rate": 2.447599660753983e-05, "loss": 1.9732, "step": 17642000 }, { "epoch": 51.07, "learning_rate": 2.4475272959892555e-05, "loss": 1.9892, "step": 17642500 }, { "epoch": 51.07, "learning_rate": 2.4474552206835867e-05, "loss": 1.9649, "step": 17643000 }, { "epoch": 51.07, "learning_rate": 2.447382855918859e-05, "loss": 1.9791, "step": 17643500 }, { "epoch": 51.07, "learning_rate": 2.4473104911541315e-05, "loss": 1.9715, "step": 17644000 }, { "epoch": 51.07, "learning_rate": 2.4472381263894037e-05, "loss": 1.9926, "step": 17644500 }, { "epoch": 51.08, "learning_rate": 2.447165761624676e-05, "loss": 2.0049, "step": 17645000 }, { "epoch": 51.08, "learning_rate": 2.4470933968599482e-05, "loss": 2.0015, "step": 17645500 }, { "epoch": 51.08, "learning_rate": 2.4470210320952204e-05, "loss": 1.9711, "step": 17646000 }, { "epoch": 51.08, "learning_rate": 2.446948667330493e-05, "loss": 1.9765, "step": 17646500 }, { "epoch": 51.08, "learning_rate": 2.4468763025657652e-05, "loss": 1.9781, "step": 17647000 }, { "epoch": 51.08, "learning_rate": 2.4468039378010378e-05, "loss": 1.981, "step": 17647500 }, { "epoch": 51.08, "learning_rate": 2.44673157303631e-05, "loss": 1.9917, "step": 17648000 }, { "epoch": 51.09, "learning_rate": 2.446659353001112e-05, "loss": 1.9621, "step": 17648500 }, { "epoch": 51.09, "learning_rate": 2.446586988236384e-05, "loss": 1.9682, "step": 17649000 }, { "epoch": 51.09, "learning_rate": 2.4465147682011856e-05, "loss": 1.9912, "step": 17649500 }, { "epoch": 51.09, "learning_rate": 2.446442403436458e-05, "loss": 1.9995, "step": 17650000 }, { "epoch": 51.09, "learning_rate": 2.4463700386717304e-05, "loss": 1.9759, "step": 17650500 }, { "epoch": 51.09, "learning_rate": 2.4462976739070027e-05, "loss": 1.9904, "step": 17651000 }, { "epoch": 51.09, "learning_rate": 2.4462253091422752e-05, "loss": 1.9909, "step": 17651500 }, { "epoch": 51.1, "learning_rate": 2.4461530891070768e-05, "loss": 1.9844, "step": 17652000 }, { "epoch": 51.1, "learning_rate": 2.4460807243423493e-05, "loss": 1.9696, "step": 17652500 }, { "epoch": 51.1, "learning_rate": 2.4460083595776216e-05, "loss": 1.9827, "step": 17653000 }, { "epoch": 51.1, "learning_rate": 2.445936139542423e-05, "loss": 1.995, "step": 17653500 }, { "epoch": 51.1, "learning_rate": 2.4458637747776957e-05, "loss": 1.9909, "step": 17654000 }, { "epoch": 51.1, "learning_rate": 2.445791410012968e-05, "loss": 1.9711, "step": 17654500 }, { "epoch": 51.1, "learning_rate": 2.44571904524824e-05, "loss": 1.9805, "step": 17655000 }, { "epoch": 51.11, "learning_rate": 2.445646825213042e-05, "loss": 1.994, "step": 17655500 }, { "epoch": 51.11, "learning_rate": 2.4455744604483142e-05, "loss": 1.9783, "step": 17656000 }, { "epoch": 51.11, "learning_rate": 2.445502240413116e-05, "loss": 1.984, "step": 17656500 }, { "epoch": 51.11, "learning_rate": 2.4454298756483884e-05, "loss": 1.9605, "step": 17657000 }, { "epoch": 51.11, "learning_rate": 2.4453575108836606e-05, "loss": 1.9961, "step": 17657500 }, { "epoch": 51.11, "learning_rate": 2.445285146118933e-05, "loss": 1.9765, "step": 17658000 }, { "epoch": 51.11, "learning_rate": 2.4452127813542054e-05, "loss": 1.9907, "step": 17658500 }, { "epoch": 51.12, "learning_rate": 2.4451404165894776e-05, "loss": 2.0014, "step": 17659000 }, { "epoch": 51.12, "learning_rate": 2.44506805182475e-05, "loss": 1.9877, "step": 17659500 }, { "epoch": 51.12, "learning_rate": 2.4449956870600224e-05, "loss": 1.9856, "step": 17660000 }, { "epoch": 51.12, "learning_rate": 2.4449233222952946e-05, "loss": 1.9659, "step": 17660500 }, { "epoch": 51.12, "learning_rate": 2.4448509575305668e-05, "loss": 1.9546, "step": 17661000 }, { "epoch": 51.12, "learning_rate": 2.4447785927658394e-05, "loss": 2.0012, "step": 17661500 }, { "epoch": 51.12, "learning_rate": 2.4447062280011116e-05, "loss": 1.9893, "step": 17662000 }, { "epoch": 51.13, "learning_rate": 2.444633863236384e-05, "loss": 1.9668, "step": 17662500 }, { "epoch": 51.13, "learning_rate": 2.4445614984716564e-05, "loss": 1.9966, "step": 17663000 }, { "epoch": 51.13, "learning_rate": 2.4444891337069286e-05, "loss": 1.987, "step": 17663500 }, { "epoch": 51.13, "learning_rate": 2.444416768942201e-05, "loss": 1.9552, "step": 17664000 }, { "epoch": 51.13, "learning_rate": 2.444344404177473e-05, "loss": 1.964, "step": 17664500 }, { "epoch": 51.13, "learning_rate": 2.4442723288718043e-05, "loss": 1.9699, "step": 17665000 }, { "epoch": 51.13, "learning_rate": 2.4442001088366062e-05, "loss": 2.0116, "step": 17665500 }, { "epoch": 51.14, "learning_rate": 2.4441277440718784e-05, "loss": 1.987, "step": 17666000 }, { "epoch": 51.14, "learning_rate": 2.4440553793071506e-05, "loss": 1.9843, "step": 17666500 }, { "epoch": 51.14, "learning_rate": 2.4439830145424232e-05, "loss": 1.9891, "step": 17667000 }, { "epoch": 51.14, "learning_rate": 2.4439106497776958e-05, "loss": 2.0022, "step": 17667500 }, { "epoch": 51.14, "learning_rate": 2.443838285012968e-05, "loss": 1.9888, "step": 17668000 }, { "epoch": 51.14, "learning_rate": 2.4437659202482402e-05, "loss": 2.0083, "step": 17668500 }, { "epoch": 51.14, "learning_rate": 2.443693700213042e-05, "loss": 1.9891, "step": 17669000 }, { "epoch": 51.15, "learning_rate": 2.4436213354483143e-05, "loss": 1.9905, "step": 17669500 }, { "epoch": 51.15, "learning_rate": 2.4435489706835865e-05, "loss": 1.9767, "step": 17670000 }, { "epoch": 51.15, "learning_rate": 2.443476605918859e-05, "loss": 2.0048, "step": 17670500 }, { "epoch": 51.15, "learning_rate": 2.4434042411541313e-05, "loss": 1.9986, "step": 17671000 }, { "epoch": 51.15, "learning_rate": 2.4433318763894036e-05, "loss": 1.9624, "step": 17671500 }, { "epoch": 51.15, "learning_rate": 2.4432596563542054e-05, "loss": 1.9885, "step": 17672000 }, { "epoch": 51.15, "learning_rate": 2.4431872915894777e-05, "loss": 1.9892, "step": 17672500 }, { "epoch": 51.16, "learning_rate": 2.44311492682475e-05, "loss": 1.9868, "step": 17673000 }, { "epoch": 51.16, "learning_rate": 2.443042562060022e-05, "loss": 1.9986, "step": 17673500 }, { "epoch": 51.16, "learning_rate": 2.4429701972952947e-05, "loss": 1.9867, "step": 17674000 }, { "epoch": 51.16, "learning_rate": 2.4428978325305672e-05, "loss": 1.988, "step": 17674500 }, { "epoch": 51.16, "learning_rate": 2.4428256124953688e-05, "loss": 1.9801, "step": 17675000 }, { "epoch": 51.16, "learning_rate": 2.442753247730641e-05, "loss": 1.951, "step": 17675500 }, { "epoch": 51.16, "learning_rate": 2.4426808829659132e-05, "loss": 1.977, "step": 17676000 }, { "epoch": 51.17, "learning_rate": 2.4426085182011858e-05, "loss": 2.0077, "step": 17676500 }, { "epoch": 51.17, "learning_rate": 2.442536153436458e-05, "loss": 1.9678, "step": 17677000 }, { "epoch": 51.17, "learning_rate": 2.4424637886717303e-05, "loss": 1.9969, "step": 17677500 }, { "epoch": 51.17, "learning_rate": 2.4423914239070028e-05, "loss": 2.0112, "step": 17678000 }, { "epoch": 51.17, "learning_rate": 2.442319059142275e-05, "loss": 1.9616, "step": 17678500 }, { "epoch": 51.17, "learning_rate": 2.442246839107077e-05, "loss": 2.0111, "step": 17679000 }, { "epoch": 51.17, "learning_rate": 2.442174474342349e-05, "loss": 2.0043, "step": 17679500 }, { "epoch": 51.18, "learning_rate": 2.4421021095776214e-05, "loss": 1.9938, "step": 17680000 }, { "epoch": 51.18, "learning_rate": 2.4420298895424233e-05, "loss": 1.9844, "step": 17680500 }, { "epoch": 51.18, "learning_rate": 2.4419575247776955e-05, "loss": 2.0036, "step": 17681000 }, { "epoch": 51.18, "learning_rate": 2.441885304742497e-05, "loss": 2.0096, "step": 17681500 }, { "epoch": 51.18, "learning_rate": 2.4418129399777696e-05, "loss": 1.9985, "step": 17682000 }, { "epoch": 51.18, "learning_rate": 2.4417405752130422e-05, "loss": 1.9815, "step": 17682500 }, { "epoch": 51.19, "learning_rate": 2.4416682104483144e-05, "loss": 1.9834, "step": 17683000 }, { "epoch": 51.19, "learning_rate": 2.4415958456835866e-05, "loss": 1.9797, "step": 17683500 }, { "epoch": 51.19, "learning_rate": 2.441523480918859e-05, "loss": 1.9725, "step": 17684000 }, { "epoch": 51.19, "learning_rate": 2.441451116154131e-05, "loss": 1.9793, "step": 17684500 }, { "epoch": 51.19, "learning_rate": 2.4413787513894036e-05, "loss": 1.9821, "step": 17685000 }, { "epoch": 51.19, "learning_rate": 2.4413063866246762e-05, "loss": 1.9998, "step": 17685500 }, { "epoch": 51.19, "learning_rate": 2.4412340218599484e-05, "loss": 1.9707, "step": 17686000 }, { "epoch": 51.2, "learning_rate": 2.4411616570952206e-05, "loss": 1.9765, "step": 17686500 }, { "epoch": 51.2, "learning_rate": 2.441089292330493e-05, "loss": 1.9816, "step": 17687000 }, { "epoch": 51.2, "learning_rate": 2.441016927565765e-05, "loss": 1.98, "step": 17687500 }, { "epoch": 51.2, "learning_rate": 2.4409445628010373e-05, "loss": 1.9581, "step": 17688000 }, { "epoch": 51.2, "learning_rate": 2.44087219803631e-05, "loss": 1.9771, "step": 17688500 }, { "epoch": 51.2, "learning_rate": 2.4407998332715824e-05, "loss": 2.0041, "step": 17689000 }, { "epoch": 51.2, "learning_rate": 2.4407274685068547e-05, "loss": 1.9989, "step": 17689500 }, { "epoch": 51.21, "learning_rate": 2.4406552484716562e-05, "loss": 1.994, "step": 17690000 }, { "epoch": 51.21, "learning_rate": 2.440583028436458e-05, "loss": 1.9754, "step": 17690500 }, { "epoch": 51.21, "learning_rate": 2.4405106636717303e-05, "loss": 1.9891, "step": 17691000 }, { "epoch": 51.21, "learning_rate": 2.4404382989070026e-05, "loss": 2.0084, "step": 17691500 }, { "epoch": 51.21, "learning_rate": 2.4403659341422748e-05, "loss": 1.9854, "step": 17692000 }, { "epoch": 51.21, "learning_rate": 2.4402935693775473e-05, "loss": 2.0098, "step": 17692500 }, { "epoch": 51.21, "learning_rate": 2.44022120461282e-05, "loss": 1.9977, "step": 17693000 }, { "epoch": 51.22, "learning_rate": 2.440148839848092e-05, "loss": 1.9687, "step": 17693500 }, { "epoch": 51.22, "learning_rate": 2.4400764750833644e-05, "loss": 1.9837, "step": 17694000 }, { "epoch": 51.22, "learning_rate": 2.4400041103186366e-05, "loss": 2.0025, "step": 17694500 }, { "epoch": 51.22, "learning_rate": 2.4399317455539088e-05, "loss": 1.986, "step": 17695000 }, { "epoch": 51.22, "learning_rate": 2.4398593807891814e-05, "loss": 1.9921, "step": 17695500 }, { "epoch": 51.22, "learning_rate": 2.4397870160244536e-05, "loss": 1.9585, "step": 17696000 }, { "epoch": 51.22, "learning_rate": 2.4397147959892555e-05, "loss": 1.9823, "step": 17696500 }, { "epoch": 51.23, "learning_rate": 2.4396424312245277e-05, "loss": 1.9956, "step": 17697000 }, { "epoch": 51.23, "learning_rate": 2.4395700664598e-05, "loss": 1.9764, "step": 17697500 }, { "epoch": 51.23, "learning_rate": 2.4394978464246018e-05, "loss": 2.0184, "step": 17698000 }, { "epoch": 51.23, "learning_rate": 2.439425481659874e-05, "loss": 1.9898, "step": 17698500 }, { "epoch": 51.23, "learning_rate": 2.4393531168951463e-05, "loss": 1.9678, "step": 17699000 }, { "epoch": 51.23, "learning_rate": 2.4392807521304188e-05, "loss": 1.9788, "step": 17699500 }, { "epoch": 51.23, "learning_rate": 2.4392083873656914e-05, "loss": 1.983, "step": 17700000 }, { "epoch": 51.24, "learning_rate": 2.4391360226009636e-05, "loss": 1.9922, "step": 17700500 }, { "epoch": 51.24, "learning_rate": 2.439063657836236e-05, "loss": 1.9816, "step": 17701000 }, { "epoch": 51.24, "learning_rate": 2.438991293071508e-05, "loss": 1.9732, "step": 17701500 }, { "epoch": 51.24, "learning_rate": 2.4389189283067803e-05, "loss": 1.9948, "step": 17702000 }, { "epoch": 51.24, "learning_rate": 2.4388465635420525e-05, "loss": 1.9982, "step": 17702500 }, { "epoch": 51.24, "learning_rate": 2.438774198777325e-05, "loss": 2.0041, "step": 17703000 }, { "epoch": 51.24, "learning_rate": 2.4387018340125976e-05, "loss": 1.9898, "step": 17703500 }, { "epoch": 51.25, "learning_rate": 2.4386296139773992e-05, "loss": 1.9815, "step": 17704000 }, { "epoch": 51.25, "learning_rate": 2.4385572492126714e-05, "loss": 1.98, "step": 17704500 }, { "epoch": 51.25, "learning_rate": 2.438484884447944e-05, "loss": 1.9783, "step": 17705000 }, { "epoch": 51.25, "learning_rate": 2.4384126644127455e-05, "loss": 1.9731, "step": 17705500 }, { "epoch": 51.25, "learning_rate": 2.4383402996480178e-05, "loss": 1.9765, "step": 17706000 }, { "epoch": 51.25, "learning_rate": 2.43826793488329e-05, "loss": 1.9773, "step": 17706500 }, { "epoch": 51.25, "learning_rate": 2.4381955701185625e-05, "loss": 1.9804, "step": 17707000 }, { "epoch": 51.26, "learning_rate": 2.438123205353835e-05, "loss": 1.9992, "step": 17707500 }, { "epoch": 51.26, "learning_rate": 2.4380508405891073e-05, "loss": 1.9816, "step": 17708000 }, { "epoch": 51.26, "learning_rate": 2.437978620553909e-05, "loss": 1.9938, "step": 17708500 }, { "epoch": 51.26, "learning_rate": 2.4379062557891814e-05, "loss": 1.9932, "step": 17709000 }, { "epoch": 51.26, "learning_rate": 2.437834035753983e-05, "loss": 2.0002, "step": 17709500 }, { "epoch": 51.26, "learning_rate": 2.4377616709892552e-05, "loss": 1.9774, "step": 17710000 }, { "epoch": 51.26, "learning_rate": 2.4376893062245278e-05, "loss": 2.0023, "step": 17710500 }, { "epoch": 51.27, "learning_rate": 2.4376169414598e-05, "loss": 1.9971, "step": 17711000 }, { "epoch": 51.27, "learning_rate": 2.4375445766950726e-05, "loss": 2.0062, "step": 17711500 }, { "epoch": 51.27, "learning_rate": 2.4374722119303448e-05, "loss": 1.9993, "step": 17712000 }, { "epoch": 51.27, "learning_rate": 2.437399847165617e-05, "loss": 1.993, "step": 17712500 }, { "epoch": 51.27, "learning_rate": 2.4373274824008892e-05, "loss": 1.9738, "step": 17713000 }, { "epoch": 51.27, "learning_rate": 2.4372551176361615e-05, "loss": 1.9924, "step": 17713500 }, { "epoch": 51.27, "learning_rate": 2.437182752871434e-05, "loss": 2.0012, "step": 17714000 }, { "epoch": 51.28, "learning_rate": 2.4371103881067062e-05, "loss": 1.9822, "step": 17714500 }, { "epoch": 51.28, "learning_rate": 2.4370380233419788e-05, "loss": 1.9882, "step": 17715000 }, { "epoch": 51.28, "learning_rate": 2.436965658577251e-05, "loss": 2.0018, "step": 17715500 }, { "epoch": 51.28, "learning_rate": 2.436893438542053e-05, "loss": 1.9936, "step": 17716000 }, { "epoch": 51.28, "learning_rate": 2.4368212185068545e-05, "loss": 1.9701, "step": 17716500 }, { "epoch": 51.28, "learning_rate": 2.4367488537421267e-05, "loss": 1.9812, "step": 17717000 }, { "epoch": 51.28, "learning_rate": 2.436676488977399e-05, "loss": 1.9801, "step": 17717500 }, { "epoch": 51.29, "learning_rate": 2.4366041242126715e-05, "loss": 1.9927, "step": 17718000 }, { "epoch": 51.29, "learning_rate": 2.436531759447944e-05, "loss": 2.0044, "step": 17718500 }, { "epoch": 51.29, "learning_rate": 2.4364595394127456e-05, "loss": 1.9808, "step": 17719000 }, { "epoch": 51.29, "learning_rate": 2.436387174648018e-05, "loss": 2.0154, "step": 17719500 }, { "epoch": 51.29, "learning_rate": 2.4363149546128197e-05, "loss": 1.9955, "step": 17720000 }, { "epoch": 51.29, "learning_rate": 2.436242589848092e-05, "loss": 1.9788, "step": 17720500 }, { "epoch": 51.3, "learning_rate": 2.436170369812894e-05, "loss": 1.9964, "step": 17721000 }, { "epoch": 51.3, "learning_rate": 2.436098005048166e-05, "loss": 1.9775, "step": 17721500 }, { "epoch": 51.3, "learning_rate": 2.4360256402834383e-05, "loss": 1.9969, "step": 17722000 }, { "epoch": 51.3, "learning_rate": 2.435953275518711e-05, "loss": 1.9881, "step": 17722500 }, { "epoch": 51.3, "learning_rate": 2.435880910753983e-05, "loss": 1.9744, "step": 17723000 }, { "epoch": 51.3, "learning_rate": 2.4358085459892553e-05, "loss": 1.9911, "step": 17723500 }, { "epoch": 51.3, "learning_rate": 2.435736181224528e-05, "loss": 1.9726, "step": 17724000 }, { "epoch": 51.31, "learning_rate": 2.4356638164598e-05, "loss": 1.9767, "step": 17724500 }, { "epoch": 51.31, "learning_rate": 2.4355914516950723e-05, "loss": 1.9795, "step": 17725000 }, { "epoch": 51.31, "learning_rate": 2.4355192316598742e-05, "loss": 1.9973, "step": 17725500 }, { "epoch": 51.31, "learning_rate": 2.4354468668951464e-05, "loss": 1.982, "step": 17726000 }, { "epoch": 51.31, "learning_rate": 2.435374502130419e-05, "loss": 1.9871, "step": 17726500 }, { "epoch": 51.31, "learning_rate": 2.4353021373656912e-05, "loss": 1.9876, "step": 17727000 }, { "epoch": 51.31, "learning_rate": 2.4352297726009634e-05, "loss": 2.001, "step": 17727500 }, { "epoch": 51.32, "learning_rate": 2.4351574078362357e-05, "loss": 2.0063, "step": 17728000 }, { "epoch": 51.32, "learning_rate": 2.435085043071508e-05, "loss": 1.9989, "step": 17728500 }, { "epoch": 51.32, "learning_rate": 2.4350126783067804e-05, "loss": 1.9569, "step": 17729000 }, { "epoch": 51.32, "learning_rate": 2.434940458271582e-05, "loss": 1.9762, "step": 17729500 }, { "epoch": 51.32, "learning_rate": 2.4348680935068546e-05, "loss": 2.0073, "step": 17730000 }, { "epoch": 51.32, "learning_rate": 2.4347957287421268e-05, "loss": 1.9871, "step": 17730500 }, { "epoch": 51.32, "learning_rate": 2.4347233639773993e-05, "loss": 1.9912, "step": 17731000 }, { "epoch": 51.33, "learning_rate": 2.4346509992126716e-05, "loss": 2.0098, "step": 17731500 }, { "epoch": 51.33, "learning_rate": 2.4345786344479438e-05, "loss": 1.9744, "step": 17732000 }, { "epoch": 51.33, "learning_rate": 2.434506269683216e-05, "loss": 1.9985, "step": 17732500 }, { "epoch": 51.33, "learning_rate": 2.4344339049184882e-05, "loss": 1.9773, "step": 17733000 }, { "epoch": 51.33, "learning_rate": 2.4343616848832905e-05, "loss": 1.9958, "step": 17733500 }, { "epoch": 51.33, "learning_rate": 2.4342893201185627e-05, "loss": 1.9777, "step": 17734000 }, { "epoch": 51.33, "learning_rate": 2.434216955353835e-05, "loss": 1.9991, "step": 17734500 }, { "epoch": 51.34, "learning_rate": 2.4341447353186368e-05, "loss": 2.0075, "step": 17735000 }, { "epoch": 51.34, "learning_rate": 2.434072370553909e-05, "loss": 1.9932, "step": 17735500 }, { "epoch": 51.34, "learning_rate": 2.4340001505187106e-05, "loss": 1.982, "step": 17736000 }, { "epoch": 51.34, "learning_rate": 2.433927785753983e-05, "loss": 1.9842, "step": 17736500 }, { "epoch": 51.34, "learning_rate": 2.4338554209892554e-05, "loss": 1.9956, "step": 17737000 }, { "epoch": 51.34, "learning_rate": 2.433783056224528e-05, "loss": 1.9695, "step": 17737500 }, { "epoch": 51.34, "learning_rate": 2.4337106914598e-05, "loss": 1.9812, "step": 17738000 }, { "epoch": 51.35, "learning_rate": 2.4336383266950724e-05, "loss": 2.002, "step": 17738500 }, { "epoch": 51.35, "learning_rate": 2.4335659619303446e-05, "loss": 1.9895, "step": 17739000 }, { "epoch": 51.35, "learning_rate": 2.433493597165617e-05, "loss": 1.9969, "step": 17739500 }, { "epoch": 51.35, "learning_rate": 2.4334212324008894e-05, "loss": 1.9887, "step": 17740000 }, { "epoch": 51.35, "learning_rate": 2.4333488676361616e-05, "loss": 1.9793, "step": 17740500 }, { "epoch": 51.35, "learning_rate": 2.4332766476009632e-05, "loss": 2.0083, "step": 17741000 }, { "epoch": 51.35, "learning_rate": 2.4332042828362357e-05, "loss": 2.0087, "step": 17741500 }, { "epoch": 51.36, "learning_rate": 2.4331319180715083e-05, "loss": 2.0044, "step": 17742000 }, { "epoch": 51.36, "learning_rate": 2.4330595533067805e-05, "loss": 2.0007, "step": 17742500 }, { "epoch": 51.36, "learning_rate": 2.432987333271582e-05, "loss": 1.9892, "step": 17743000 }, { "epoch": 51.36, "learning_rate": 2.4329149685068543e-05, "loss": 2.0041, "step": 17743500 }, { "epoch": 51.36, "learning_rate": 2.432842603742127e-05, "loss": 2.0129, "step": 17744000 }, { "epoch": 51.36, "learning_rate": 2.432770238977399e-05, "loss": 1.9825, "step": 17744500 }, { "epoch": 51.36, "learning_rate": 2.4326978742126716e-05, "loss": 1.991, "step": 17745000 }, { "epoch": 51.37, "learning_rate": 2.432625509447944e-05, "loss": 2.0086, "step": 17745500 }, { "epoch": 51.37, "learning_rate": 2.432553434142275e-05, "loss": 1.9807, "step": 17746000 }, { "epoch": 51.37, "learning_rate": 2.4324813588366063e-05, "loss": 2.0073, "step": 17746500 }, { "epoch": 51.37, "learning_rate": 2.4324089940718785e-05, "loss": 2.0274, "step": 17747000 }, { "epoch": 51.37, "learning_rate": 2.4323366293071508e-05, "loss": 2.0009, "step": 17747500 }, { "epoch": 51.37, "learning_rate": 2.4322642645424233e-05, "loss": 1.9813, "step": 17748000 }, { "epoch": 51.37, "learning_rate": 2.4321918997776956e-05, "loss": 1.992, "step": 17748500 }, { "epoch": 51.38, "learning_rate": 2.432119535012968e-05, "loss": 1.9664, "step": 17749000 }, { "epoch": 51.38, "learning_rate": 2.4320471702482403e-05, "loss": 1.9831, "step": 17749500 }, { "epoch": 51.38, "learning_rate": 2.4319748054835126e-05, "loss": 1.9782, "step": 17750000 }, { "epoch": 51.38, "learning_rate": 2.4319024407187848e-05, "loss": 1.9753, "step": 17750500 }, { "epoch": 51.38, "learning_rate": 2.431830075954057e-05, "loss": 1.9954, "step": 17751000 }, { "epoch": 51.38, "learning_rate": 2.4317577111893296e-05, "loss": 1.9961, "step": 17751500 }, { "epoch": 51.38, "learning_rate": 2.431685491154131e-05, "loss": 1.9791, "step": 17752000 }, { "epoch": 51.39, "learning_rate": 2.4316131263894034e-05, "loss": 1.9557, "step": 17752500 }, { "epoch": 51.39, "learning_rate": 2.431540761624676e-05, "loss": 1.9933, "step": 17753000 }, { "epoch": 51.39, "learning_rate": 2.4314683968599485e-05, "loss": 1.9728, "step": 17753500 }, { "epoch": 51.39, "learning_rate": 2.4313960320952207e-05, "loss": 1.9737, "step": 17754000 }, { "epoch": 51.39, "learning_rate": 2.431323667330493e-05, "loss": 2.006, "step": 17754500 }, { "epoch": 51.39, "learning_rate": 2.431251302565765e-05, "loss": 1.9818, "step": 17755000 }, { "epoch": 51.39, "learning_rate": 2.4311789378010374e-05, "loss": 2.0053, "step": 17755500 }, { "epoch": 51.4, "learning_rate": 2.4311065730363096e-05, "loss": 1.9741, "step": 17756000 }, { "epoch": 51.4, "learning_rate": 2.431034208271582e-05, "loss": 2.0018, "step": 17756500 }, { "epoch": 51.4, "learning_rate": 2.430961988236384e-05, "loss": 1.9887, "step": 17757000 }, { "epoch": 51.4, "learning_rate": 2.4308896234716563e-05, "loss": 1.9965, "step": 17757500 }, { "epoch": 51.4, "learning_rate": 2.4308172587069285e-05, "loss": 1.9966, "step": 17758000 }, { "epoch": 51.4, "learning_rate": 2.430744893942201e-05, "loss": 2.0037, "step": 17758500 }, { "epoch": 51.41, "learning_rate": 2.4306725291774733e-05, "loss": 2.0014, "step": 17759000 }, { "epoch": 51.41, "learning_rate": 2.4306001644127455e-05, "loss": 1.9779, "step": 17759500 }, { "epoch": 51.41, "learning_rate": 2.430527799648018e-05, "loss": 1.9963, "step": 17760000 }, { "epoch": 51.41, "learning_rate": 2.4304554348832903e-05, "loss": 1.9884, "step": 17760500 }, { "epoch": 51.41, "learning_rate": 2.4303832148480922e-05, "loss": 2.0144, "step": 17761000 }, { "epoch": 51.41, "learning_rate": 2.4303108500833644e-05, "loss": 2.0232, "step": 17761500 }, { "epoch": 51.41, "learning_rate": 2.430238630048166e-05, "loss": 1.9989, "step": 17762000 }, { "epoch": 51.42, "learning_rate": 2.4301662652834385e-05, "loss": 1.9807, "step": 17762500 }, { "epoch": 51.42, "learning_rate": 2.4300939005187108e-05, "loss": 2.0134, "step": 17763000 }, { "epoch": 51.42, "learning_rate": 2.430021535753983e-05, "loss": 1.9752, "step": 17763500 }, { "epoch": 51.42, "learning_rate": 2.4299491709892555e-05, "loss": 1.979, "step": 17764000 }, { "epoch": 51.42, "learning_rate": 2.4298768062245278e-05, "loss": 1.9745, "step": 17764500 }, { "epoch": 51.42, "learning_rate": 2.4298045861893297e-05, "loss": 1.9798, "step": 17765000 }, { "epoch": 51.42, "learning_rate": 2.429732221424602e-05, "loss": 1.9645, "step": 17765500 }, { "epoch": 51.43, "learning_rate": 2.429659856659874e-05, "loss": 1.9786, "step": 17766000 }, { "epoch": 51.43, "learning_rate": 2.4295874918951463e-05, "loss": 2.0019, "step": 17766500 }, { "epoch": 51.43, "learning_rate": 2.4295151271304185e-05, "loss": 1.9875, "step": 17767000 }, { "epoch": 51.43, "learning_rate": 2.429442762365691e-05, "loss": 1.9936, "step": 17767500 }, { "epoch": 51.43, "learning_rate": 2.4293703976009637e-05, "loss": 1.9964, "step": 17768000 }, { "epoch": 51.43, "learning_rate": 2.429298032836236e-05, "loss": 2.006, "step": 17768500 }, { "epoch": 51.43, "learning_rate": 2.429225668071508e-05, "loss": 1.9675, "step": 17769000 }, { "epoch": 51.44, "learning_rate": 2.4291533033067803e-05, "loss": 1.9867, "step": 17769500 }, { "epoch": 51.44, "learning_rate": 2.4290810832715822e-05, "loss": 2.0065, "step": 17770000 }, { "epoch": 51.44, "learning_rate": 2.4290088632363838e-05, "loss": 1.971, "step": 17770500 }, { "epoch": 51.44, "learning_rate": 2.428936498471656e-05, "loss": 2.0023, "step": 17771000 }, { "epoch": 51.44, "learning_rate": 2.4288641337069286e-05, "loss": 2.0077, "step": 17771500 }, { "epoch": 51.44, "learning_rate": 2.428791768942201e-05, "loss": 1.9849, "step": 17772000 }, { "epoch": 51.44, "learning_rate": 2.4287194041774734e-05, "loss": 1.981, "step": 17772500 }, { "epoch": 51.45, "learning_rate": 2.4286470394127456e-05, "loss": 2.0022, "step": 17773000 }, { "epoch": 51.45, "learning_rate": 2.4285746746480178e-05, "loss": 1.9873, "step": 17773500 }, { "epoch": 51.45, "learning_rate": 2.42850230988329e-05, "loss": 1.9903, "step": 17774000 }, { "epoch": 51.45, "learning_rate": 2.4284299451185623e-05, "loss": 1.9959, "step": 17774500 }, { "epoch": 51.45, "learning_rate": 2.4283575803538348e-05, "loss": 1.994, "step": 17775000 }, { "epoch": 51.45, "learning_rate": 2.4282852155891074e-05, "loss": 2.0203, "step": 17775500 }, { "epoch": 51.45, "learning_rate": 2.4282128508243796e-05, "loss": 1.9736, "step": 17776000 }, { "epoch": 51.46, "learning_rate": 2.428140630789181e-05, "loss": 1.9931, "step": 17776500 }, { "epoch": 51.46, "learning_rate": 2.4280682660244537e-05, "loss": 1.9829, "step": 17777000 }, { "epoch": 51.46, "learning_rate": 2.4279960459892553e-05, "loss": 1.9807, "step": 17777500 }, { "epoch": 51.46, "learning_rate": 2.4279236812245275e-05, "loss": 2.0, "step": 17778000 }, { "epoch": 51.46, "learning_rate": 2.4278514611893294e-05, "loss": 1.9775, "step": 17778500 }, { "epoch": 51.46, "learning_rate": 2.427779096424602e-05, "loss": 1.997, "step": 17779000 }, { "epoch": 51.46, "learning_rate": 2.4277067316598742e-05, "loss": 1.9909, "step": 17779500 }, { "epoch": 51.47, "learning_rate": 2.4276343668951464e-05, "loss": 2.0104, "step": 17780000 }, { "epoch": 51.47, "learning_rate": 2.4275620021304186e-05, "loss": 1.9832, "step": 17780500 }, { "epoch": 51.47, "learning_rate": 2.4274896373656912e-05, "loss": 1.9882, "step": 17781000 }, { "epoch": 51.47, "learning_rate": 2.4274172726009634e-05, "loss": 1.9843, "step": 17781500 }, { "epoch": 51.47, "learning_rate": 2.4273449078362356e-05, "loss": 1.9743, "step": 17782000 }, { "epoch": 51.47, "learning_rate": 2.4272725430715082e-05, "loss": 1.9883, "step": 17782500 }, { "epoch": 51.47, "learning_rate": 2.4272001783067804e-05, "loss": 1.9852, "step": 17783000 }, { "epoch": 51.48, "learning_rate": 2.4271278135420526e-05, "loss": 1.9665, "step": 17783500 }, { "epoch": 51.48, "learning_rate": 2.4270554487773252e-05, "loss": 1.9924, "step": 17784000 }, { "epoch": 51.48, "learning_rate": 2.4269830840125974e-05, "loss": 1.9915, "step": 17784500 }, { "epoch": 51.48, "learning_rate": 2.426910863977399e-05, "loss": 1.9815, "step": 17785000 }, { "epoch": 51.48, "learning_rate": 2.4268384992126712e-05, "loss": 1.9967, "step": 17785500 }, { "epoch": 51.48, "learning_rate": 2.4267661344479438e-05, "loss": 1.9839, "step": 17786000 }, { "epoch": 51.48, "learning_rate": 2.4266937696832163e-05, "loss": 1.992, "step": 17786500 }, { "epoch": 51.49, "learning_rate": 2.4266214049184886e-05, "loss": 1.9891, "step": 17787000 }, { "epoch": 51.49, "learning_rate": 2.42654918488329e-05, "loss": 1.9864, "step": 17787500 }, { "epoch": 51.49, "learning_rate": 2.4264768201185627e-05, "loss": 1.9949, "step": 17788000 }, { "epoch": 51.49, "learning_rate": 2.426404455353835e-05, "loss": 1.9861, "step": 17788500 }, { "epoch": 51.49, "learning_rate": 2.426332090589107e-05, "loss": 1.9972, "step": 17789000 }, { "epoch": 51.49, "learning_rate": 2.426259870553909e-05, "loss": 1.979, "step": 17789500 }, { "epoch": 51.49, "learning_rate": 2.426187650518711e-05, "loss": 2.0037, "step": 17790000 }, { "epoch": 51.5, "learning_rate": 2.426115285753983e-05, "loss": 2.0009, "step": 17790500 }, { "epoch": 51.5, "learning_rate": 2.4260429209892554e-05, "loss": 1.9909, "step": 17791000 }, { "epoch": 51.5, "learning_rate": 2.4259707009540573e-05, "loss": 1.9745, "step": 17791500 }, { "epoch": 51.5, "learning_rate": 2.4258983361893295e-05, "loss": 1.9729, "step": 17792000 }, { "epoch": 51.5, "learning_rate": 2.4258259714246017e-05, "loss": 1.9919, "step": 17792500 }, { "epoch": 51.5, "learning_rate": 2.425753606659874e-05, "loss": 1.984, "step": 17793000 }, { "epoch": 51.5, "learning_rate": 2.4256812418951465e-05, "loss": 1.9853, "step": 17793500 }, { "epoch": 51.51, "learning_rate": 2.425608877130419e-05, "loss": 1.9979, "step": 17794000 }, { "epoch": 51.51, "learning_rate": 2.4255366570952206e-05, "loss": 2.0018, "step": 17794500 }, { "epoch": 51.51, "learning_rate": 2.4254642923304928e-05, "loss": 2.0158, "step": 17795000 }, { "epoch": 51.51, "learning_rate": 2.4253920722952947e-05, "loss": 1.9974, "step": 17795500 }, { "epoch": 51.51, "learning_rate": 2.425319707530567e-05, "loss": 1.9711, "step": 17796000 }, { "epoch": 51.51, "learning_rate": 2.425247342765839e-05, "loss": 1.9994, "step": 17796500 }, { "epoch": 51.52, "learning_rate": 2.4251749780011114e-05, "loss": 1.9739, "step": 17797000 }, { "epoch": 51.52, "learning_rate": 2.425102613236384e-05, "loss": 1.9888, "step": 17797500 }, { "epoch": 51.52, "learning_rate": 2.4250302484716565e-05, "loss": 1.999, "step": 17798000 }, { "epoch": 51.52, "learning_rate": 2.4249578837069287e-05, "loss": 1.9923, "step": 17798500 }, { "epoch": 51.52, "learning_rate": 2.424885518942201e-05, "loss": 1.9993, "step": 17799000 }, { "epoch": 51.52, "learning_rate": 2.4248131541774732e-05, "loss": 2.0076, "step": 17799500 }, { "epoch": 51.52, "learning_rate": 2.4247407894127454e-05, "loss": 1.9969, "step": 17800000 }, { "epoch": 51.53, "learning_rate": 2.4246684246480176e-05, "loss": 2.008, "step": 17800500 }, { "epoch": 51.53, "learning_rate": 2.4245960598832902e-05, "loss": 1.969, "step": 17801000 }, { "epoch": 51.53, "learning_rate": 2.4245236951185628e-05, "loss": 2.01, "step": 17801500 }, { "epoch": 51.53, "learning_rate": 2.424451330353835e-05, "loss": 1.9812, "step": 17802000 }, { "epoch": 51.53, "learning_rate": 2.4243789655891072e-05, "loss": 2.0148, "step": 17802500 }, { "epoch": 51.53, "learning_rate": 2.4243066008243794e-05, "loss": 1.994, "step": 17803000 }, { "epoch": 51.53, "learning_rate": 2.4242343807891813e-05, "loss": 1.987, "step": 17803500 }, { "epoch": 51.54, "learning_rate": 2.424162160753983e-05, "loss": 2.0125, "step": 17804000 }, { "epoch": 51.54, "learning_rate": 2.4240897959892554e-05, "loss": 2.0064, "step": 17804500 }, { "epoch": 51.54, "learning_rate": 2.424017431224528e-05, "loss": 1.9766, "step": 17805000 }, { "epoch": 51.54, "learning_rate": 2.4239452111893296e-05, "loss": 1.9909, "step": 17805500 }, { "epoch": 51.54, "learning_rate": 2.4238728464246018e-05, "loss": 1.9909, "step": 17806000 }, { "epoch": 51.54, "learning_rate": 2.423800481659874e-05, "loss": 2.0074, "step": 17806500 }, { "epoch": 51.54, "learning_rate": 2.4237281168951466e-05, "loss": 1.9958, "step": 17807000 }, { "epoch": 51.55, "learning_rate": 2.4236557521304188e-05, "loss": 1.995, "step": 17807500 }, { "epoch": 51.55, "learning_rate": 2.423583387365691e-05, "loss": 1.982, "step": 17808000 }, { "epoch": 51.55, "learning_rate": 2.4235110226009632e-05, "loss": 1.9904, "step": 17808500 }, { "epoch": 51.55, "learning_rate": 2.4234386578362358e-05, "loss": 1.9952, "step": 17809000 }, { "epoch": 51.55, "learning_rate": 2.423366293071508e-05, "loss": 1.9846, "step": 17809500 }, { "epoch": 51.55, "learning_rate": 2.4232939283067806e-05, "loss": 2.0042, "step": 17810000 }, { "epoch": 51.55, "learning_rate": 2.4232215635420528e-05, "loss": 1.9878, "step": 17810500 }, { "epoch": 51.56, "learning_rate": 2.423149198777325e-05, "loss": 1.9853, "step": 17811000 }, { "epoch": 51.56, "learning_rate": 2.4230768340125973e-05, "loss": 1.9883, "step": 17811500 }, { "epoch": 51.56, "learning_rate": 2.4230044692478698e-05, "loss": 1.9842, "step": 17812000 }, { "epoch": 51.56, "learning_rate": 2.422932104483142e-05, "loss": 2.0059, "step": 17812500 }, { "epoch": 51.56, "learning_rate": 2.4228597397184143e-05, "loss": 1.9831, "step": 17813000 }, { "epoch": 51.56, "learning_rate": 2.422787519683216e-05, "loss": 2.0047, "step": 17813500 }, { "epoch": 51.56, "learning_rate": 2.4227151549184884e-05, "loss": 2.0043, "step": 17814000 }, { "epoch": 51.57, "learning_rate": 2.4226429348832903e-05, "loss": 2.002, "step": 17814500 }, { "epoch": 51.57, "learning_rate": 2.4225705701185625e-05, "loss": 1.9772, "step": 17815000 }, { "epoch": 51.57, "learning_rate": 2.4224983500833644e-05, "loss": 1.9984, "step": 17815500 }, { "epoch": 51.57, "learning_rate": 2.4224259853186366e-05, "loss": 1.9742, "step": 17816000 }, { "epoch": 51.57, "learning_rate": 2.4223537652834385e-05, "loss": 1.9888, "step": 17816500 }, { "epoch": 51.57, "learning_rate": 2.4222814005187107e-05, "loss": 2.0104, "step": 17817000 }, { "epoch": 51.57, "learning_rate": 2.422209035753983e-05, "loss": 1.9822, "step": 17817500 }, { "epoch": 51.58, "learning_rate": 2.4221366709892555e-05, "loss": 1.9954, "step": 17818000 }, { "epoch": 51.58, "learning_rate": 2.4220643062245277e-05, "loss": 1.9771, "step": 17818500 }, { "epoch": 51.58, "learning_rate": 2.4219919414598e-05, "loss": 2.004, "step": 17819000 }, { "epoch": 51.58, "learning_rate": 2.4219195766950722e-05, "loss": 2.0019, "step": 17819500 }, { "epoch": 51.58, "learning_rate": 2.4218472119303447e-05, "loss": 1.9826, "step": 17820000 }, { "epoch": 51.58, "learning_rate": 2.421774847165617e-05, "loss": 1.9801, "step": 17820500 }, { "epoch": 51.58, "learning_rate": 2.4217024824008895e-05, "loss": 1.994, "step": 17821000 }, { "epoch": 51.59, "learning_rate": 2.4216301176361618e-05, "loss": 2.0118, "step": 17821500 }, { "epoch": 51.59, "learning_rate": 2.421557752871434e-05, "loss": 1.975, "step": 17822000 }, { "epoch": 51.59, "learning_rate": 2.4214855328362355e-05, "loss": 1.9759, "step": 17822500 }, { "epoch": 51.59, "learning_rate": 2.421413168071508e-05, "loss": 1.9796, "step": 17823000 }, { "epoch": 51.59, "learning_rate": 2.4213408033067807e-05, "loss": 1.9897, "step": 17823500 }, { "epoch": 51.59, "learning_rate": 2.421268438542053e-05, "loss": 1.9582, "step": 17824000 }, { "epoch": 51.59, "learning_rate": 2.421196073777325e-05, "loss": 1.9928, "step": 17824500 }, { "epoch": 51.6, "learning_rate": 2.4211237090125973e-05, "loss": 2.009, "step": 17825000 }, { "epoch": 51.6, "learning_rate": 2.4210513442478696e-05, "loss": 1.9929, "step": 17825500 }, { "epoch": 51.6, "learning_rate": 2.420978979483142e-05, "loss": 1.9976, "step": 17826000 }, { "epoch": 51.6, "learning_rate": 2.4209066147184143e-05, "loss": 1.9962, "step": 17826500 }, { "epoch": 51.6, "learning_rate": 2.420834394683216e-05, "loss": 2.0236, "step": 17827000 }, { "epoch": 51.6, "learning_rate": 2.4207620299184885e-05, "loss": 1.9801, "step": 17827500 }, { "epoch": 51.6, "learning_rate": 2.4206898098832904e-05, "loss": 1.9704, "step": 17828000 }, { "epoch": 51.61, "learning_rate": 2.4206174451185626e-05, "loss": 1.9973, "step": 17828500 }, { "epoch": 51.61, "learning_rate": 2.4205450803538348e-05, "loss": 1.9994, "step": 17829000 }, { "epoch": 51.61, "learning_rate": 2.420472715589107e-05, "loss": 1.9647, "step": 17829500 }, { "epoch": 51.61, "learning_rate": 2.420400495553909e-05, "loss": 1.9988, "step": 17830000 }, { "epoch": 51.61, "learning_rate": 2.4203282755187108e-05, "loss": 1.9832, "step": 17830500 }, { "epoch": 51.61, "learning_rate": 2.420255910753983e-05, "loss": 1.9955, "step": 17831000 }, { "epoch": 51.61, "learning_rate": 2.4201835459892556e-05, "loss": 1.9977, "step": 17831500 }, { "epoch": 51.62, "learning_rate": 2.4201111812245278e-05, "loss": 1.9805, "step": 17832000 }, { "epoch": 51.62, "learning_rate": 2.4200388164598e-05, "loss": 2.0149, "step": 17832500 }, { "epoch": 51.62, "learning_rate": 2.419966596424602e-05, "loss": 1.9917, "step": 17833000 }, { "epoch": 51.62, "learning_rate": 2.419894231659874e-05, "loss": 2.0017, "step": 17833500 }, { "epoch": 51.62, "learning_rate": 2.4198218668951464e-05, "loss": 1.9985, "step": 17834000 }, { "epoch": 51.62, "learning_rate": 2.4197495021304186e-05, "loss": 1.9916, "step": 17834500 }, { "epoch": 51.63, "learning_rate": 2.4196771373656912e-05, "loss": 2.0182, "step": 17835000 }, { "epoch": 51.63, "learning_rate": 2.4196047726009634e-05, "loss": 1.9854, "step": 17835500 }, { "epoch": 51.63, "learning_rate": 2.419532407836236e-05, "loss": 1.9664, "step": 17836000 }, { "epoch": 51.63, "learning_rate": 2.4194601878010375e-05, "loss": 2.0408, "step": 17836500 }, { "epoch": 51.63, "learning_rate": 2.4193878230363097e-05, "loss": 1.9784, "step": 17837000 }, { "epoch": 51.63, "learning_rate": 2.4193156030011116e-05, "loss": 1.9788, "step": 17837500 }, { "epoch": 51.63, "learning_rate": 2.419243238236384e-05, "loss": 1.9918, "step": 17838000 }, { "epoch": 51.64, "learning_rate": 2.419170873471656e-05, "loss": 1.9885, "step": 17838500 }, { "epoch": 51.64, "learning_rate": 2.4190985087069286e-05, "loss": 1.9747, "step": 17839000 }, { "epoch": 51.64, "learning_rate": 2.4190262886717305e-05, "loss": 2.0008, "step": 17839500 }, { "epoch": 51.64, "learning_rate": 2.4189539239070028e-05, "loss": 2.0073, "step": 17840000 }, { "epoch": 51.64, "learning_rate": 2.418881559142275e-05, "loss": 1.9941, "step": 17840500 }, { "epoch": 51.64, "learning_rate": 2.4188091943775472e-05, "loss": 1.9926, "step": 17841000 }, { "epoch": 51.64, "learning_rate": 2.4187368296128198e-05, "loss": 1.9846, "step": 17841500 }, { "epoch": 51.65, "learning_rate": 2.418664464848092e-05, "loss": 2.0055, "step": 17842000 }, { "epoch": 51.65, "learning_rate": 2.4185921000833645e-05, "loss": 1.9837, "step": 17842500 }, { "epoch": 51.65, "learning_rate": 2.4185197353186368e-05, "loss": 2.0073, "step": 17843000 }, { "epoch": 51.65, "learning_rate": 2.418447370553909e-05, "loss": 2.0092, "step": 17843500 }, { "epoch": 51.65, "learning_rate": 2.4183750057891812e-05, "loss": 2.0056, "step": 17844000 }, { "epoch": 51.65, "learning_rate": 2.4183026410244534e-05, "loss": 1.9885, "step": 17844500 }, { "epoch": 51.65, "learning_rate": 2.418230276259726e-05, "loss": 1.9733, "step": 17845000 }, { "epoch": 51.66, "learning_rate": 2.4181579114949982e-05, "loss": 1.9846, "step": 17845500 }, { "epoch": 51.66, "learning_rate": 2.4180855467302708e-05, "loss": 1.9828, "step": 17846000 }, { "epoch": 51.66, "learning_rate": 2.418013181965543e-05, "loss": 1.9824, "step": 17846500 }, { "epoch": 51.66, "learning_rate": 2.417940961930345e-05, "loss": 1.9769, "step": 17847000 }, { "epoch": 51.66, "learning_rate": 2.417868597165617e-05, "loss": 1.9933, "step": 17847500 }, { "epoch": 51.66, "learning_rate": 2.4177962324008894e-05, "loss": 1.9941, "step": 17848000 }, { "epoch": 51.66, "learning_rate": 2.4177238676361616e-05, "loss": 1.9797, "step": 17848500 }, { "epoch": 51.67, "learning_rate": 2.4176515028714338e-05, "loss": 1.9926, "step": 17849000 }, { "epoch": 51.67, "learning_rate": 2.417579138106706e-05, "loss": 2.0036, "step": 17849500 }, { "epoch": 51.67, "learning_rate": 2.4175067733419786e-05, "loss": 1.9825, "step": 17850000 }, { "epoch": 51.67, "learning_rate": 2.417434408577251e-05, "loss": 2.0047, "step": 17850500 }, { "epoch": 51.67, "learning_rate": 2.4173623332715824e-05, "loss": 2.0039, "step": 17851000 }, { "epoch": 51.67, "learning_rate": 2.4172899685068546e-05, "loss": 2.0111, "step": 17851500 }, { "epoch": 51.67, "learning_rate": 2.4172176037421268e-05, "loss": 2.007, "step": 17852000 }, { "epoch": 51.68, "learning_rate": 2.417145238977399e-05, "loss": 1.994, "step": 17852500 }, { "epoch": 51.68, "learning_rate": 2.4170728742126713e-05, "loss": 2.0155, "step": 17853000 }, { "epoch": 51.68, "learning_rate": 2.417000509447944e-05, "loss": 1.9994, "step": 17853500 }, { "epoch": 51.68, "learning_rate": 2.416928144683216e-05, "loss": 1.9968, "step": 17854000 }, { "epoch": 51.68, "learning_rate": 2.4168557799184886e-05, "loss": 2.0092, "step": 17854500 }, { "epoch": 51.68, "learning_rate": 2.416783415153761e-05, "loss": 1.9643, "step": 17855000 }, { "epoch": 51.68, "learning_rate": 2.416711050389033e-05, "loss": 2.0068, "step": 17855500 }, { "epoch": 51.69, "learning_rate": 2.4166386856243053e-05, "loss": 1.9979, "step": 17856000 }, { "epoch": 51.69, "learning_rate": 2.4165663208595775e-05, "loss": 2.0112, "step": 17856500 }, { "epoch": 51.69, "learning_rate": 2.41649395609485e-05, "loss": 2.0116, "step": 17857000 }, { "epoch": 51.69, "learning_rate": 2.4164215913301226e-05, "loss": 1.9697, "step": 17857500 }, { "epoch": 51.69, "learning_rate": 2.4163493712949242e-05, "loss": 1.9926, "step": 17858000 }, { "epoch": 51.69, "learning_rate": 2.4162770065301964e-05, "loss": 1.9828, "step": 17858500 }, { "epoch": 51.69, "learning_rate": 2.4162046417654686e-05, "loss": 1.9876, "step": 17859000 }, { "epoch": 51.7, "learning_rate": 2.4161322770007412e-05, "loss": 1.9986, "step": 17859500 }, { "epoch": 51.7, "learning_rate": 2.4160599122360134e-05, "loss": 1.9905, "step": 17860000 }, { "epoch": 51.7, "learning_rate": 2.4159875474712856e-05, "loss": 2.0239, "step": 17860500 }, { "epoch": 51.7, "learning_rate": 2.4159153274360875e-05, "loss": 1.993, "step": 17861000 }, { "epoch": 51.7, "learning_rate": 2.41584296267136e-05, "loss": 1.9816, "step": 17861500 }, { "epoch": 51.7, "learning_rate": 2.4157705979066323e-05, "loss": 1.9968, "step": 17862000 }, { "epoch": 51.7, "learning_rate": 2.4156982331419045e-05, "loss": 1.9858, "step": 17862500 }, { "epoch": 51.71, "learning_rate": 2.4156258683771768e-05, "loss": 1.9657, "step": 17863000 }, { "epoch": 51.71, "learning_rate": 2.415553503612449e-05, "loss": 1.9779, "step": 17863500 }, { "epoch": 51.71, "learning_rate": 2.415481283577251e-05, "loss": 1.9815, "step": 17864000 }, { "epoch": 51.71, "learning_rate": 2.4154089188125235e-05, "loss": 2.0093, "step": 17864500 }, { "epoch": 51.71, "learning_rate": 2.415336698777325e-05, "loss": 2.0089, "step": 17865000 }, { "epoch": 51.71, "learning_rate": 2.4152643340125976e-05, "loss": 1.9634, "step": 17865500 }, { "epoch": 51.71, "learning_rate": 2.4151919692478698e-05, "loss": 2.0003, "step": 17866000 }, { "epoch": 51.72, "learning_rate": 2.415119604483142e-05, "loss": 2.0055, "step": 17866500 }, { "epoch": 51.72, "learning_rate": 2.4150472397184142e-05, "loss": 1.9846, "step": 17867000 }, { "epoch": 51.72, "learning_rate": 2.4149748749536865e-05, "loss": 2.0077, "step": 17867500 }, { "epoch": 51.72, "learning_rate": 2.4149025101889587e-05, "loss": 1.9702, "step": 17868000 }, { "epoch": 51.72, "learning_rate": 2.4148301454242312e-05, "loss": 2.0114, "step": 17868500 }, { "epoch": 51.72, "learning_rate": 2.4147577806595038e-05, "loss": 1.9846, "step": 17869000 }, { "epoch": 51.72, "learning_rate": 2.414685415894776e-05, "loss": 1.9885, "step": 17869500 }, { "epoch": 51.73, "learning_rate": 2.4146130511300483e-05, "loss": 1.9884, "step": 17870000 }, { "epoch": 51.73, "learning_rate": 2.4145406863653205e-05, "loss": 1.9826, "step": 17870500 }, { "epoch": 51.73, "learning_rate": 2.4144684663301224e-05, "loss": 1.9927, "step": 17871000 }, { "epoch": 51.73, "learning_rate": 2.4143961015653946e-05, "loss": 1.9944, "step": 17871500 }, { "epoch": 51.73, "learning_rate": 2.4143238815301965e-05, "loss": 2.0158, "step": 17872000 }, { "epoch": 51.73, "learning_rate": 2.414251516765469e-05, "loss": 1.9885, "step": 17872500 }, { "epoch": 51.74, "learning_rate": 2.4141792967302706e-05, "loss": 1.962, "step": 17873000 }, { "epoch": 51.74, "learning_rate": 2.414106931965543e-05, "loss": 1.9864, "step": 17873500 }, { "epoch": 51.74, "learning_rate": 2.414034567200815e-05, "loss": 1.9758, "step": 17874000 }, { "epoch": 51.74, "learning_rate": 2.4139622024360876e-05, "loss": 1.9842, "step": 17874500 }, { "epoch": 51.74, "learning_rate": 2.41388983767136e-05, "loss": 1.9943, "step": 17875000 }, { "epoch": 51.74, "learning_rate": 2.413817472906632e-05, "loss": 2.0157, "step": 17875500 }, { "epoch": 51.74, "learning_rate": 2.4137451081419046e-05, "loss": 1.9879, "step": 17876000 }, { "epoch": 51.75, "learning_rate": 2.413672743377177e-05, "loss": 2.0002, "step": 17876500 }, { "epoch": 51.75, "learning_rate": 2.413600378612449e-05, "loss": 1.9943, "step": 17877000 }, { "epoch": 51.75, "learning_rate": 2.413528158577251e-05, "loss": 1.9848, "step": 17877500 }, { "epoch": 51.75, "learning_rate": 2.4134557938125232e-05, "loss": 1.9746, "step": 17878000 }, { "epoch": 51.75, "learning_rate": 2.4133834290477954e-05, "loss": 2.009, "step": 17878500 }, { "epoch": 51.75, "learning_rate": 2.4133110642830676e-05, "loss": 2.0211, "step": 17879000 }, { "epoch": 51.75, "learning_rate": 2.4132386995183402e-05, "loss": 1.9892, "step": 17879500 }, { "epoch": 51.76, "learning_rate": 2.4131663347536128e-05, "loss": 1.9654, "step": 17880000 }, { "epoch": 51.76, "learning_rate": 2.4130941147184143e-05, "loss": 2.0012, "step": 17880500 }, { "epoch": 51.76, "learning_rate": 2.4130217499536865e-05, "loss": 1.9971, "step": 17881000 }, { "epoch": 51.76, "learning_rate": 2.412949385188959e-05, "loss": 1.9865, "step": 17881500 }, { "epoch": 51.76, "learning_rate": 2.4128770204242313e-05, "loss": 2.0005, "step": 17882000 }, { "epoch": 51.76, "learning_rate": 2.4128046556595036e-05, "loss": 2.0104, "step": 17882500 }, { "epoch": 51.76, "learning_rate": 2.412732290894776e-05, "loss": 2.015, "step": 17883000 }, { "epoch": 51.77, "learning_rate": 2.412660070859578e-05, "loss": 2.0063, "step": 17883500 }, { "epoch": 51.77, "learning_rate": 2.4125878508243796e-05, "loss": 1.9833, "step": 17884000 }, { "epoch": 51.77, "learning_rate": 2.4125154860596518e-05, "loss": 1.985, "step": 17884500 }, { "epoch": 51.77, "learning_rate": 2.412443121294924e-05, "loss": 1.9923, "step": 17885000 }, { "epoch": 51.77, "learning_rate": 2.4123707565301966e-05, "loss": 1.9756, "step": 17885500 }, { "epoch": 51.77, "learning_rate": 2.4122983917654688e-05, "loss": 1.9833, "step": 17886000 }, { "epoch": 51.77, "learning_rate": 2.412226027000741e-05, "loss": 1.9929, "step": 17886500 }, { "epoch": 51.78, "learning_rate": 2.412153806965543e-05, "loss": 1.9785, "step": 17887000 }, { "epoch": 51.78, "learning_rate": 2.4120814422008155e-05, "loss": 2.0035, "step": 17887500 }, { "epoch": 51.78, "learning_rate": 2.412009222165617e-05, "loss": 1.97, "step": 17888000 }, { "epoch": 51.78, "learning_rate": 2.4119368574008893e-05, "loss": 1.9909, "step": 17888500 }, { "epoch": 51.78, "learning_rate": 2.4118644926361618e-05, "loss": 1.9951, "step": 17889000 }, { "epoch": 51.78, "learning_rate": 2.411792127871434e-05, "loss": 1.9855, "step": 17889500 }, { "epoch": 51.78, "learning_rate": 2.4117197631067063e-05, "loss": 1.9831, "step": 17890000 }, { "epoch": 51.79, "learning_rate": 2.4116473983419785e-05, "loss": 1.999, "step": 17890500 }, { "epoch": 51.79, "learning_rate": 2.411575033577251e-05, "loss": 1.9675, "step": 17891000 }, { "epoch": 51.79, "learning_rate": 2.4115026688125233e-05, "loss": 1.9946, "step": 17891500 }, { "epoch": 51.79, "learning_rate": 2.4114303040477955e-05, "loss": 1.9948, "step": 17892000 }, { "epoch": 51.79, "learning_rate": 2.411357939283068e-05, "loss": 1.9847, "step": 17892500 }, { "epoch": 51.79, "learning_rate": 2.4112855745183403e-05, "loss": 2.0055, "step": 17893000 }, { "epoch": 51.79, "learning_rate": 2.4112132097536125e-05, "loss": 2.0054, "step": 17893500 }, { "epoch": 51.8, "learning_rate": 2.4111408449888847e-05, "loss": 2.0047, "step": 17894000 }, { "epoch": 51.8, "learning_rate": 2.411068624953687e-05, "loss": 2.0053, "step": 17894500 }, { "epoch": 51.8, "learning_rate": 2.4109964049184885e-05, "loss": 1.9783, "step": 17895000 }, { "epoch": 51.8, "learning_rate": 2.4109240401537607e-05, "loss": 1.9767, "step": 17895500 }, { "epoch": 51.8, "learning_rate": 2.410851675389033e-05, "loss": 1.9955, "step": 17896000 }, { "epoch": 51.8, "learning_rate": 2.4107793106243055e-05, "loss": 1.9758, "step": 17896500 }, { "epoch": 51.8, "learning_rate": 2.4107069458595777e-05, "loss": 2.0083, "step": 17897000 }, { "epoch": 51.81, "learning_rate": 2.41063458109485e-05, "loss": 1.9777, "step": 17897500 }, { "epoch": 51.81, "learning_rate": 2.4105622163301222e-05, "loss": 2.0355, "step": 17898000 }, { "epoch": 51.81, "learning_rate": 2.4104898515653948e-05, "loss": 1.9953, "step": 17898500 }, { "epoch": 51.81, "learning_rate": 2.410417486800667e-05, "loss": 1.999, "step": 17899000 }, { "epoch": 51.81, "learning_rate": 2.4103451220359392e-05, "loss": 2.0195, "step": 17899500 }, { "epoch": 51.81, "learning_rate": 2.410272902000741e-05, "loss": 1.9829, "step": 17900000 }, { "epoch": 51.81, "learning_rate": 2.4102005372360133e-05, "loss": 1.9678, "step": 17900500 }, { "epoch": 51.82, "learning_rate": 2.4101281724712855e-05, "loss": 1.9793, "step": 17901000 }, { "epoch": 51.82, "learning_rate": 2.4100559524360874e-05, "loss": 2.0003, "step": 17901500 }, { "epoch": 51.82, "learning_rate": 2.40998358767136e-05, "loss": 1.987, "step": 17902000 }, { "epoch": 51.82, "learning_rate": 2.4099112229066322e-05, "loss": 2.0019, "step": 17902500 }, { "epoch": 51.82, "learning_rate": 2.409839002871434e-05, "loss": 1.9471, "step": 17903000 }, { "epoch": 51.82, "learning_rate": 2.4097666381067063e-05, "loss": 1.9942, "step": 17903500 }, { "epoch": 51.82, "learning_rate": 2.4096942733419786e-05, "loss": 2.0126, "step": 17904000 }, { "epoch": 51.83, "learning_rate": 2.4096219085772508e-05, "loss": 1.9953, "step": 17904500 }, { "epoch": 51.83, "learning_rate": 2.409549543812523e-05, "loss": 1.992, "step": 17905000 }, { "epoch": 51.83, "learning_rate": 2.4094771790477956e-05, "loss": 1.9968, "step": 17905500 }, { "epoch": 51.83, "learning_rate": 2.4094049590125975e-05, "loss": 1.9949, "step": 17906000 }, { "epoch": 51.83, "learning_rate": 2.4093325942478697e-05, "loss": 1.9983, "step": 17906500 }, { "epoch": 51.83, "learning_rate": 2.409260229483142e-05, "loss": 1.9753, "step": 17907000 }, { "epoch": 51.83, "learning_rate": 2.4091878647184145e-05, "loss": 1.9995, "step": 17907500 }, { "epoch": 51.84, "learning_rate": 2.4091154999536867e-05, "loss": 1.9909, "step": 17908000 }, { "epoch": 51.84, "learning_rate": 2.4090432799184883e-05, "loss": 1.9879, "step": 17908500 }, { "epoch": 51.84, "learning_rate": 2.40897105988329e-05, "loss": 2.0082, "step": 17909000 }, { "epoch": 51.84, "learning_rate": 2.4088986951185624e-05, "loss": 1.9986, "step": 17909500 }, { "epoch": 51.84, "learning_rate": 2.408826330353835e-05, "loss": 2.014, "step": 17910000 }, { "epoch": 51.84, "learning_rate": 2.408753965589107e-05, "loss": 1.988, "step": 17910500 }, { "epoch": 51.85, "learning_rate": 2.4086816008243794e-05, "loss": 2.0019, "step": 17911000 }, { "epoch": 51.85, "learning_rate": 2.408609236059652e-05, "loss": 1.972, "step": 17911500 }, { "epoch": 51.85, "learning_rate": 2.4085368712949242e-05, "loss": 1.9818, "step": 17912000 }, { "epoch": 51.85, "learning_rate": 2.4084645065301964e-05, "loss": 2.0015, "step": 17912500 }, { "epoch": 51.85, "learning_rate": 2.4083921417654686e-05, "loss": 2.0172, "step": 17913000 }, { "epoch": 51.85, "learning_rate": 2.4083197770007412e-05, "loss": 2.0059, "step": 17913500 }, { "epoch": 51.85, "learning_rate": 2.4082474122360134e-05, "loss": 1.9894, "step": 17914000 }, { "epoch": 51.86, "learning_rate": 2.408175047471286e-05, "loss": 1.9931, "step": 17914500 }, { "epoch": 51.86, "learning_rate": 2.4081026827065582e-05, "loss": 2.0055, "step": 17915000 }, { "epoch": 51.86, "learning_rate": 2.4080303179418304e-05, "loss": 1.9908, "step": 17915500 }, { "epoch": 51.86, "learning_rate": 2.4079579531771026e-05, "loss": 1.9894, "step": 17916000 }, { "epoch": 51.86, "learning_rate": 2.407885588412375e-05, "loss": 2.0038, "step": 17916500 }, { "epoch": 51.86, "learning_rate": 2.4078132236476474e-05, "loss": 1.9793, "step": 17917000 }, { "epoch": 51.86, "learning_rate": 2.4077408588829196e-05, "loss": 1.982, "step": 17917500 }, { "epoch": 51.87, "learning_rate": 2.4076684941181922e-05, "loss": 2.0041, "step": 17918000 }, { "epoch": 51.87, "learning_rate": 2.4075961293534644e-05, "loss": 1.9675, "step": 17918500 }, { "epoch": 51.87, "learning_rate": 2.407523909318266e-05, "loss": 1.9917, "step": 17919000 }, { "epoch": 51.87, "learning_rate": 2.4074515445535385e-05, "loss": 2.0117, "step": 17919500 }, { "epoch": 51.87, "learning_rate": 2.4073794692478698e-05, "loss": 1.9785, "step": 17920000 }, { "epoch": 51.87, "learning_rate": 2.407307104483142e-05, "loss": 1.9916, "step": 17920500 }, { "epoch": 51.87, "learning_rate": 2.4072347397184146e-05, "loss": 2.0303, "step": 17921000 }, { "epoch": 51.88, "learning_rate": 2.4071623749536868e-05, "loss": 1.9962, "step": 17921500 }, { "epoch": 51.88, "learning_rate": 2.407090010188959e-05, "loss": 1.9977, "step": 17922000 }, { "epoch": 51.88, "learning_rate": 2.4070176454242312e-05, "loss": 1.9996, "step": 17922500 }, { "epoch": 51.88, "learning_rate": 2.4069452806595035e-05, "loss": 1.9966, "step": 17923000 }, { "epoch": 51.88, "learning_rate": 2.4068730606243053e-05, "loss": 2.0157, "step": 17923500 }, { "epoch": 51.88, "learning_rate": 2.4068006958595776e-05, "loss": 2.0132, "step": 17924000 }, { "epoch": 51.88, "learning_rate": 2.4067284758243795e-05, "loss": 1.9959, "step": 17924500 }, { "epoch": 51.89, "learning_rate": 2.406656111059652e-05, "loss": 2.0228, "step": 17925000 }, { "epoch": 51.89, "learning_rate": 2.4065837462949242e-05, "loss": 1.9954, "step": 17925500 }, { "epoch": 51.89, "learning_rate": 2.4065113815301965e-05, "loss": 1.9962, "step": 17926000 }, { "epoch": 51.89, "learning_rate": 2.4064390167654687e-05, "loss": 1.9922, "step": 17926500 }, { "epoch": 51.89, "learning_rate": 2.406366652000741e-05, "loss": 2.0128, "step": 17927000 }, { "epoch": 51.89, "learning_rate": 2.4062942872360135e-05, "loss": 1.9784, "step": 17927500 }, { "epoch": 51.89, "learning_rate": 2.406222067200815e-05, "loss": 1.9981, "step": 17928000 }, { "epoch": 51.9, "learning_rate": 2.4061497024360876e-05, "loss": 2.0066, "step": 17928500 }, { "epoch": 51.9, "learning_rate": 2.4060773376713598e-05, "loss": 1.9799, "step": 17929000 }, { "epoch": 51.9, "learning_rate": 2.4060049729066324e-05, "loss": 1.9834, "step": 17929500 }, { "epoch": 51.9, "learning_rate": 2.4059326081419046e-05, "loss": 1.9965, "step": 17930000 }, { "epoch": 51.9, "learning_rate": 2.405860243377177e-05, "loss": 1.9857, "step": 17930500 }, { "epoch": 51.9, "learning_rate": 2.405787878612449e-05, "loss": 1.989, "step": 17931000 }, { "epoch": 51.9, "learning_rate": 2.4057155138477213e-05, "loss": 1.9924, "step": 17931500 }, { "epoch": 51.91, "learning_rate": 2.405643149082994e-05, "loss": 1.9879, "step": 17932000 }, { "epoch": 51.91, "learning_rate": 2.4055709290477957e-05, "loss": 1.9851, "step": 17932500 }, { "epoch": 51.91, "learning_rate": 2.405498564283068e-05, "loss": 1.9867, "step": 17933000 }, { "epoch": 51.91, "learning_rate": 2.40542634424787e-05, "loss": 1.9812, "step": 17933500 }, { "epoch": 51.91, "learning_rate": 2.405353979483142e-05, "loss": 1.9995, "step": 17934000 }, { "epoch": 51.91, "learning_rate": 2.4052816147184143e-05, "loss": 1.9925, "step": 17934500 }, { "epoch": 51.91, "learning_rate": 2.4052092499536865e-05, "loss": 2.0056, "step": 17935000 }, { "epoch": 51.92, "learning_rate": 2.4051368851889587e-05, "loss": 2.0094, "step": 17935500 }, { "epoch": 51.92, "learning_rate": 2.4050645204242313e-05, "loss": 1.9854, "step": 17936000 }, { "epoch": 51.92, "learning_rate": 2.4049921556595035e-05, "loss": 1.9946, "step": 17936500 }, { "epoch": 51.92, "learning_rate": 2.404919790894776e-05, "loss": 1.9916, "step": 17937000 }, { "epoch": 51.92, "learning_rate": 2.4048474261300483e-05, "loss": 1.9931, "step": 17937500 }, { "epoch": 51.92, "learning_rate": 2.4047753508243795e-05, "loss": 2.0235, "step": 17938000 }, { "epoch": 51.92, "learning_rate": 2.4047029860596518e-05, "loss": 2.0159, "step": 17938500 }, { "epoch": 51.93, "learning_rate": 2.404630621294924e-05, "loss": 2.0153, "step": 17939000 }, { "epoch": 51.93, "learning_rate": 2.4045582565301966e-05, "loss": 1.9665, "step": 17939500 }, { "epoch": 51.93, "learning_rate": 2.4044858917654688e-05, "loss": 2.0157, "step": 17940000 }, { "epoch": 51.93, "learning_rate": 2.4044135270007413e-05, "loss": 1.9814, "step": 17940500 }, { "epoch": 51.93, "learning_rate": 2.4043411622360136e-05, "loss": 1.9936, "step": 17941000 }, { "epoch": 51.93, "learning_rate": 2.4042687974712858e-05, "loss": 1.9912, "step": 17941500 }, { "epoch": 51.93, "learning_rate": 2.404196432706558e-05, "loss": 1.9819, "step": 17942000 }, { "epoch": 51.94, "learning_rate": 2.4041240679418302e-05, "loss": 1.997, "step": 17942500 }, { "epoch": 51.94, "learning_rate": 2.4040517031771028e-05, "loss": 2.0217, "step": 17943000 }, { "epoch": 51.94, "learning_rate": 2.4039794831419047e-05, "loss": 2.0166, "step": 17943500 }, { "epoch": 51.94, "learning_rate": 2.403907118377177e-05, "loss": 1.9938, "step": 17944000 }, { "epoch": 51.94, "learning_rate": 2.403834753612449e-05, "loss": 1.9768, "step": 17944500 }, { "epoch": 51.94, "learning_rate": 2.403762533577251e-05, "loss": 1.9886, "step": 17945000 }, { "epoch": 51.94, "learning_rate": 2.4036903135420526e-05, "loss": 1.9923, "step": 17945500 }, { "epoch": 51.95, "learning_rate": 2.403617948777325e-05, "loss": 2.0024, "step": 17946000 }, { "epoch": 51.95, "learning_rate": 2.4035455840125974e-05, "loss": 1.9859, "step": 17946500 }, { "epoch": 51.95, "learning_rate": 2.40347321924787e-05, "loss": 1.9866, "step": 17947000 }, { "epoch": 51.95, "learning_rate": 2.403400854483142e-05, "loss": 1.9836, "step": 17947500 }, { "epoch": 51.95, "learning_rate": 2.4033284897184144e-05, "loss": 1.9876, "step": 17948000 }, { "epoch": 51.95, "learning_rate": 2.4032562696832163e-05, "loss": 1.9725, "step": 17948500 }, { "epoch": 51.96, "learning_rate": 2.4031839049184885e-05, "loss": 1.9994, "step": 17949000 }, { "epoch": 51.96, "learning_rate": 2.4031115401537607e-05, "loss": 1.9762, "step": 17949500 }, { "epoch": 51.96, "learning_rate": 2.403039175389033e-05, "loss": 2.0033, "step": 17950000 }, { "epoch": 51.96, "learning_rate": 2.402966810624305e-05, "loss": 1.9961, "step": 17950500 }, { "epoch": 51.96, "learning_rate": 2.4028944458595777e-05, "loss": 2.0289, "step": 17951000 }, { "epoch": 51.96, "learning_rate": 2.4028220810948503e-05, "loss": 1.9946, "step": 17951500 }, { "epoch": 51.96, "learning_rate": 2.4027497163301225e-05, "loss": 1.9834, "step": 17952000 }, { "epoch": 51.97, "learning_rate": 2.4026773515653947e-05, "loss": 1.9938, "step": 17952500 }, { "epoch": 51.97, "learning_rate": 2.402604986800667e-05, "loss": 2.005, "step": 17953000 }, { "epoch": 51.97, "learning_rate": 2.4025326220359392e-05, "loss": 2.0028, "step": 17953500 }, { "epoch": 51.97, "learning_rate": 2.4024602572712114e-05, "loss": 2.0058, "step": 17954000 }, { "epoch": 51.97, "learning_rate": 2.402387892506484e-05, "loss": 1.9653, "step": 17954500 }, { "epoch": 51.97, "learning_rate": 2.402315672471286e-05, "loss": 2.0039, "step": 17955000 }, { "epoch": 51.97, "learning_rate": 2.402243307706558e-05, "loss": 2.0044, "step": 17955500 }, { "epoch": 51.98, "learning_rate": 2.4021709429418303e-05, "loss": 2.0221, "step": 17956000 }, { "epoch": 51.98, "learning_rate": 2.402098578177103e-05, "loss": 1.9855, "step": 17956500 }, { "epoch": 51.98, "learning_rate": 2.402026213412375e-05, "loss": 2.014, "step": 17957000 }, { "epoch": 51.98, "learning_rate": 2.4019538486476473e-05, "loss": 1.9889, "step": 17957500 }, { "epoch": 51.98, "learning_rate": 2.40188148388292e-05, "loss": 2.0233, "step": 17958000 }, { "epoch": 51.98, "learning_rate": 2.401809408577251e-05, "loss": 2.002, "step": 17958500 }, { "epoch": 51.98, "learning_rate": 2.4017371885420527e-05, "loss": 1.9845, "step": 17959000 }, { "epoch": 51.99, "learning_rate": 2.4016649685068546e-05, "loss": 2.0269, "step": 17959500 }, { "epoch": 51.99, "learning_rate": 2.4015926037421268e-05, "loss": 2.0001, "step": 17960000 }, { "epoch": 51.99, "learning_rate": 2.401520238977399e-05, "loss": 1.9809, "step": 17960500 }, { "epoch": 51.99, "learning_rate": 2.4014478742126716e-05, "loss": 2.0207, "step": 17961000 }, { "epoch": 51.99, "learning_rate": 2.401375654177473e-05, "loss": 2.0085, "step": 17961500 }, { "epoch": 51.99, "learning_rate": 2.4013032894127453e-05, "loss": 1.9825, "step": 17962000 }, { "epoch": 51.99, "learning_rate": 2.401230924648018e-05, "loss": 2.0076, "step": 17962500 }, { "epoch": 52.0, "learning_rate": 2.4011585598832905e-05, "loss": 2.0011, "step": 17963000 }, { "epoch": 52.0, "learning_rate": 2.4010861951185627e-05, "loss": 1.9912, "step": 17963500 }, { "epoch": 52.0, "learning_rate": 2.401013830353835e-05, "loss": 2.023, "step": 17964000 }, { "epoch": 52.0, "learning_rate": 2.400941465589107e-05, "loss": 2.0113, "step": 17964500 }, { "epoch": 52.0, "eval_accuracy": 0.6733575656076577, "eval_accuracy_mlm": 0.6398535085439098, "eval_accuracy_nsp": 0.8529352454020065, "eval_loss": 2.1604368686676025, "eval_runtime": 331.8083, "eval_samples_per_second": 1315.175, "eval_steps_per_second": 54.8, "step": 17964544 }, { "epoch": 52.0, "learning_rate": 2.4008691008243794e-05, "loss": 1.9692, "step": 17965000 }, { "epoch": 52.0, "learning_rate": 2.4007967360596516e-05, "loss": 1.9905, "step": 17965500 }, { "epoch": 52.0, "learning_rate": 2.400724371294924e-05, "loss": 1.9939, "step": 17966000 }, { "epoch": 52.01, "learning_rate": 2.400652151259726e-05, "loss": 1.9971, "step": 17966500 }, { "epoch": 52.01, "learning_rate": 2.4005797864949983e-05, "loss": 1.9688, "step": 17967000 }, { "epoch": 52.01, "learning_rate": 2.4005074217302705e-05, "loss": 1.9806, "step": 17967500 }, { "epoch": 52.01, "learning_rate": 2.4004350569655427e-05, "loss": 1.9852, "step": 17968000 }, { "epoch": 52.01, "learning_rate": 2.4003626922008153e-05, "loss": 1.9637, "step": 17968500 }, { "epoch": 52.01, "learning_rate": 2.400290472165617e-05, "loss": 1.9664, "step": 17969000 }, { "epoch": 52.01, "learning_rate": 2.400218107400889e-05, "loss": 1.9848, "step": 17969500 }, { "epoch": 52.02, "learning_rate": 2.4001457426361616e-05, "loss": 1.9547, "step": 17970000 }, { "epoch": 52.02, "learning_rate": 2.4000733778714342e-05, "loss": 1.9873, "step": 17970500 }, { "epoch": 52.02, "learning_rate": 2.4000011578362357e-05, "loss": 1.9764, "step": 17971000 }, { "epoch": 52.02, "learning_rate": 2.399928793071508e-05, "loss": 1.9571, "step": 17971500 }, { "epoch": 52.02, "learning_rate": 2.3998564283067805e-05, "loss": 1.9932, "step": 17972000 }, { "epoch": 52.02, "learning_rate": 2.3997840635420527e-05, "loss": 2.0068, "step": 17972500 }, { "epoch": 52.02, "learning_rate": 2.399711698777325e-05, "loss": 1.9844, "step": 17973000 }, { "epoch": 52.03, "learning_rate": 2.3996393340125975e-05, "loss": 1.9721, "step": 17973500 }, { "epoch": 52.03, "learning_rate": 2.3995669692478698e-05, "loss": 1.9976, "step": 17974000 }, { "epoch": 52.03, "learning_rate": 2.399494604483142e-05, "loss": 1.9776, "step": 17974500 }, { "epoch": 52.03, "learning_rate": 2.399422384447944e-05, "loss": 1.9746, "step": 17975000 }, { "epoch": 52.03, "learning_rate": 2.399350019683216e-05, "loss": 1.9654, "step": 17975500 }, { "epoch": 52.03, "learning_rate": 2.3992776549184883e-05, "loss": 1.9853, "step": 17976000 }, { "epoch": 52.03, "learning_rate": 2.3992052901537605e-05, "loss": 1.9811, "step": 17976500 }, { "epoch": 52.04, "learning_rate": 2.399132925389033e-05, "loss": 1.9706, "step": 17977000 }, { "epoch": 52.04, "learning_rate": 2.3990605606243057e-05, "loss": 1.9728, "step": 17977500 }, { "epoch": 52.04, "learning_rate": 2.398988195859578e-05, "loss": 1.9912, "step": 17978000 }, { "epoch": 52.04, "learning_rate": 2.39891583109485e-05, "loss": 1.9626, "step": 17978500 }, { "epoch": 52.04, "learning_rate": 2.3988434663301223e-05, "loss": 1.9793, "step": 17979000 }, { "epoch": 52.04, "learning_rate": 2.3987711015653946e-05, "loss": 1.9737, "step": 17979500 }, { "epoch": 52.04, "learning_rate": 2.3986987368006668e-05, "loss": 1.9903, "step": 17980000 }, { "epoch": 52.05, "learning_rate": 2.3986265167654687e-05, "loss": 1.9732, "step": 17980500 }, { "epoch": 52.05, "learning_rate": 2.3985541520007412e-05, "loss": 1.9862, "step": 17981000 }, { "epoch": 52.05, "learning_rate": 2.3984817872360135e-05, "loss": 1.9599, "step": 17981500 }, { "epoch": 52.05, "learning_rate": 2.3984094224712857e-05, "loss": 1.9863, "step": 17982000 }, { "epoch": 52.05, "learning_rate": 2.3983370577065582e-05, "loss": 1.9879, "step": 17982500 }, { "epoch": 52.05, "learning_rate": 2.3982648376713598e-05, "loss": 1.9867, "step": 17983000 }, { "epoch": 52.05, "learning_rate": 2.398192472906632e-05, "loss": 2.0022, "step": 17983500 }, { "epoch": 52.06, "learning_rate": 2.3981201081419042e-05, "loss": 1.9627, "step": 17984000 }, { "epoch": 52.06, "learning_rate": 2.3980477433771768e-05, "loss": 1.9772, "step": 17984500 }, { "epoch": 52.06, "learning_rate": 2.3979755233419787e-05, "loss": 1.9667, "step": 17985000 }, { "epoch": 52.06, "learning_rate": 2.397903158577251e-05, "loss": 1.9771, "step": 17985500 }, { "epoch": 52.06, "learning_rate": 2.397830793812523e-05, "loss": 1.9553, "step": 17986000 }, { "epoch": 52.06, "learning_rate": 2.3977584290477957e-05, "loss": 1.9609, "step": 17986500 }, { "epoch": 52.07, "learning_rate": 2.397686064283068e-05, "loss": 1.9716, "step": 17987000 }, { "epoch": 52.07, "learning_rate": 2.39761369951834e-05, "loss": 1.9825, "step": 17987500 }, { "epoch": 52.07, "learning_rate": 2.3975413347536127e-05, "loss": 1.9891, "step": 17988000 }, { "epoch": 52.07, "learning_rate": 2.397468969988885e-05, "loss": 1.9685, "step": 17988500 }, { "epoch": 52.07, "learning_rate": 2.3973966052241572e-05, "loss": 1.9624, "step": 17989000 }, { "epoch": 52.07, "learning_rate": 2.3973242404594294e-05, "loss": 1.97, "step": 17989500 }, { "epoch": 52.07, "learning_rate": 2.397251875694702e-05, "loss": 1.9468, "step": 17990000 }, { "epoch": 52.08, "learning_rate": 2.3971795109299742e-05, "loss": 1.9825, "step": 17990500 }, { "epoch": 52.08, "learning_rate": 2.3971071461652464e-05, "loss": 1.9772, "step": 17991000 }, { "epoch": 52.08, "learning_rate": 2.397034781400519e-05, "loss": 1.9851, "step": 17991500 }, { "epoch": 52.08, "learning_rate": 2.396962561365321e-05, "loss": 1.9617, "step": 17992000 }, { "epoch": 52.08, "learning_rate": 2.396890196600593e-05, "loss": 1.9892, "step": 17992500 }, { "epoch": 52.08, "learning_rate": 2.3968178318358653e-05, "loss": 1.9438, "step": 17993000 }, { "epoch": 52.08, "learning_rate": 2.3967454670711375e-05, "loss": 1.9785, "step": 17993500 }, { "epoch": 52.09, "learning_rate": 2.3966733917654688e-05, "loss": 1.9942, "step": 17994000 }, { "epoch": 52.09, "learning_rate": 2.396601027000741e-05, "loss": 1.9652, "step": 17994500 }, { "epoch": 52.09, "learning_rate": 2.396528806965543e-05, "loss": 1.9555, "step": 17995000 }, { "epoch": 52.09, "learning_rate": 2.396456442200815e-05, "loss": 1.9545, "step": 17995500 }, { "epoch": 52.09, "learning_rate": 2.3963840774360877e-05, "loss": 1.9717, "step": 17996000 }, { "epoch": 52.09, "learning_rate": 2.39631171267136e-05, "loss": 1.9879, "step": 17996500 }, { "epoch": 52.09, "learning_rate": 2.396239347906632e-05, "loss": 1.9426, "step": 17997000 }, { "epoch": 52.1, "learning_rate": 2.3961669831419047e-05, "loss": 1.9578, "step": 17997500 }, { "epoch": 52.1, "learning_rate": 2.396094618377177e-05, "loss": 1.9701, "step": 17998000 }, { "epoch": 52.1, "learning_rate": 2.396022253612449e-05, "loss": 2.0016, "step": 17998500 }, { "epoch": 52.1, "learning_rate": 2.395950033577251e-05, "loss": 1.9863, "step": 17999000 }, { "epoch": 52.1, "learning_rate": 2.3958776688125236e-05, "loss": 1.9981, "step": 17999500 }, { "epoch": 52.1, "learning_rate": 2.3958053040477958e-05, "loss": 1.9824, "step": 18000000 }, { "epoch": 52.1, "learning_rate": 2.395732939283068e-05, "loss": 1.9898, "step": 18000500 }, { "epoch": 52.11, "learning_rate": 2.3956605745183402e-05, "loss": 1.9961, "step": 18001000 }, { "epoch": 52.11, "learning_rate": 2.3955882097536125e-05, "loss": 1.9645, "step": 18001500 }, { "epoch": 52.11, "learning_rate": 2.3955158449888847e-05, "loss": 1.9396, "step": 18002000 }, { "epoch": 52.11, "learning_rate": 2.3954434802241572e-05, "loss": 1.9814, "step": 18002500 }, { "epoch": 52.11, "learning_rate": 2.3953711154594298e-05, "loss": 1.9939, "step": 18003000 }, { "epoch": 52.11, "learning_rate": 2.3952988954242314e-05, "loss": 1.9722, "step": 18003500 }, { "epoch": 52.11, "learning_rate": 2.3952265306595036e-05, "loss": 1.9796, "step": 18004000 }, { "epoch": 52.12, "learning_rate": 2.3951541658947758e-05, "loss": 1.9788, "step": 18004500 }, { "epoch": 52.12, "learning_rate": 2.3950818011300484e-05, "loss": 1.9689, "step": 18005000 }, { "epoch": 52.12, "learning_rate": 2.3950094363653206e-05, "loss": 1.9648, "step": 18005500 }, { "epoch": 52.12, "learning_rate": 2.3949370716005928e-05, "loss": 1.9719, "step": 18006000 }, { "epoch": 52.12, "learning_rate": 2.3948648515653947e-05, "loss": 1.9751, "step": 18006500 }, { "epoch": 52.12, "learning_rate": 2.3947924868006673e-05, "loss": 1.9792, "step": 18007000 }, { "epoch": 52.12, "learning_rate": 2.3947201220359395e-05, "loss": 1.9894, "step": 18007500 }, { "epoch": 52.13, "learning_rate": 2.3946477572712117e-05, "loss": 1.982, "step": 18008000 }, { "epoch": 52.13, "learning_rate": 2.394575392506484e-05, "loss": 1.9997, "step": 18008500 }, { "epoch": 52.13, "learning_rate": 2.3945030277417562e-05, "loss": 1.982, "step": 18009000 }, { "epoch": 52.13, "learning_rate": 2.3944306629770284e-05, "loss": 1.9933, "step": 18009500 }, { "epoch": 52.13, "learning_rate": 2.394358298212301e-05, "loss": 1.963, "step": 18010000 }, { "epoch": 52.13, "learning_rate": 2.394286078177103e-05, "loss": 1.9744, "step": 18010500 }, { "epoch": 52.13, "learning_rate": 2.394213713412375e-05, "loss": 1.9675, "step": 18011000 }, { "epoch": 52.14, "learning_rate": 2.3941413486476473e-05, "loss": 1.979, "step": 18011500 }, { "epoch": 52.14, "learning_rate": 2.39406898388292e-05, "loss": 1.9752, "step": 18012000 }, { "epoch": 52.14, "learning_rate": 2.393996619118192e-05, "loss": 1.9485, "step": 18012500 }, { "epoch": 52.14, "learning_rate": 2.3939243990829936e-05, "loss": 1.9757, "step": 18013000 }, { "epoch": 52.14, "learning_rate": 2.3938520343182662e-05, "loss": 1.9781, "step": 18013500 }, { "epoch": 52.14, "learning_rate": 2.3937798142830678e-05, "loss": 1.9736, "step": 18014000 }, { "epoch": 52.14, "learning_rate": 2.3937074495183403e-05, "loss": 1.9807, "step": 18014500 }, { "epoch": 52.15, "learning_rate": 2.3936350847536125e-05, "loss": 1.9986, "step": 18015000 }, { "epoch": 52.15, "learning_rate": 2.3935627199888848e-05, "loss": 1.9802, "step": 18015500 }, { "epoch": 52.15, "learning_rate": 2.3934903552241573e-05, "loss": 1.9634, "step": 18016000 }, { "epoch": 52.15, "learning_rate": 2.3934179904594296e-05, "loss": 2.0083, "step": 18016500 }, { "epoch": 52.15, "learning_rate": 2.3933456256947018e-05, "loss": 1.9671, "step": 18017000 }, { "epoch": 52.15, "learning_rate": 2.393273260929974e-05, "loss": 1.9985, "step": 18017500 }, { "epoch": 52.15, "learning_rate": 2.3932008961652466e-05, "loss": 1.9947, "step": 18018000 }, { "epoch": 52.16, "learning_rate": 2.3931286761300485e-05, "loss": 1.9817, "step": 18018500 }, { "epoch": 52.16, "learning_rate": 2.3930563113653207e-05, "loss": 1.9745, "step": 18019000 }, { "epoch": 52.16, "learning_rate": 2.392984236059652e-05, "loss": 1.9691, "step": 18019500 }, { "epoch": 52.16, "learning_rate": 2.392911871294924e-05, "loss": 1.9734, "step": 18020000 }, { "epoch": 52.16, "learning_rate": 2.3928395065301964e-05, "loss": 1.9939, "step": 18020500 }, { "epoch": 52.16, "learning_rate": 2.3927671417654686e-05, "loss": 1.9912, "step": 18021000 }, { "epoch": 52.16, "learning_rate": 2.392694777000741e-05, "loss": 2.0022, "step": 18021500 }, { "epoch": 52.17, "learning_rate": 2.3926224122360137e-05, "loss": 1.9792, "step": 18022000 }, { "epoch": 52.17, "learning_rate": 2.392550047471286e-05, "loss": 1.9674, "step": 18022500 }, { "epoch": 52.17, "learning_rate": 2.392477682706558e-05, "loss": 1.9699, "step": 18023000 }, { "epoch": 52.17, "learning_rate": 2.3924053179418304e-05, "loss": 1.9815, "step": 18023500 }, { "epoch": 52.17, "learning_rate": 2.3923330979066323e-05, "loss": 1.9926, "step": 18024000 }, { "epoch": 52.17, "learning_rate": 2.3922607331419045e-05, "loss": 2.0032, "step": 18024500 }, { "epoch": 52.17, "learning_rate": 2.3921883683771767e-05, "loss": 1.9983, "step": 18025000 }, { "epoch": 52.18, "learning_rate": 2.3921160036124493e-05, "loss": 1.9787, "step": 18025500 }, { "epoch": 52.18, "learning_rate": 2.392043783577251e-05, "loss": 1.9775, "step": 18026000 }, { "epoch": 52.18, "learning_rate": 2.3919714188125234e-05, "loss": 1.9896, "step": 18026500 }, { "epoch": 52.18, "learning_rate": 2.3918990540477956e-05, "loss": 1.9954, "step": 18027000 }, { "epoch": 52.18, "learning_rate": 2.391826689283068e-05, "loss": 1.9906, "step": 18027500 }, { "epoch": 52.18, "learning_rate": 2.39175432451834e-05, "loss": 1.9733, "step": 18028000 }, { "epoch": 52.19, "learning_rate": 2.3916819597536126e-05, "loss": 1.9725, "step": 18028500 }, { "epoch": 52.19, "learning_rate": 2.391609594988885e-05, "loss": 1.9827, "step": 18029000 }, { "epoch": 52.19, "learning_rate": 2.3915372302241574e-05, "loss": 1.9928, "step": 18029500 }, { "epoch": 52.19, "learning_rate": 2.391465010188959e-05, "loss": 1.9937, "step": 18030000 }, { "epoch": 52.19, "learning_rate": 2.3913926454242315e-05, "loss": 1.9943, "step": 18030500 }, { "epoch": 52.19, "learning_rate": 2.3913202806595037e-05, "loss": 1.994, "step": 18031000 }, { "epoch": 52.19, "learning_rate": 2.391247915894776e-05, "loss": 1.9545, "step": 18031500 }, { "epoch": 52.2, "learning_rate": 2.3911755511300482e-05, "loss": 1.9868, "step": 18032000 }, { "epoch": 52.2, "learning_rate": 2.3911031863653204e-05, "loss": 1.9876, "step": 18032500 }, { "epoch": 52.2, "learning_rate": 2.3910309663301223e-05, "loss": 1.9849, "step": 18033000 }, { "epoch": 52.2, "learning_rate": 2.390958601565395e-05, "loss": 1.9906, "step": 18033500 }, { "epoch": 52.2, "learning_rate": 2.390886236800667e-05, "loss": 1.9954, "step": 18034000 }, { "epoch": 52.2, "learning_rate": 2.3908138720359393e-05, "loss": 1.9781, "step": 18034500 }, { "epoch": 52.2, "learning_rate": 2.3907415072712115e-05, "loss": 1.9885, "step": 18035000 }, { "epoch": 52.21, "learning_rate": 2.3906691425064838e-05, "loss": 1.9517, "step": 18035500 }, { "epoch": 52.21, "learning_rate": 2.3905969224712857e-05, "loss": 1.9748, "step": 18036000 }, { "epoch": 52.21, "learning_rate": 2.390524557706558e-05, "loss": 1.9728, "step": 18036500 }, { "epoch": 52.21, "learning_rate": 2.3904521929418304e-05, "loss": 1.9622, "step": 18037000 }, { "epoch": 52.21, "learning_rate": 2.3903798281771027e-05, "loss": 1.9897, "step": 18037500 }, { "epoch": 52.21, "learning_rate": 2.3903074634123752e-05, "loss": 1.9903, "step": 18038000 }, { "epoch": 52.21, "learning_rate": 2.3902352433771768e-05, "loss": 1.961, "step": 18038500 }, { "epoch": 52.22, "learning_rate": 2.390162878612449e-05, "loss": 1.9802, "step": 18039000 }, { "epoch": 52.22, "learning_rate": 2.3900905138477216e-05, "loss": 1.9555, "step": 18039500 }, { "epoch": 52.22, "learning_rate": 2.3900181490829938e-05, "loss": 1.9787, "step": 18040000 }, { "epoch": 52.22, "learning_rate": 2.3899457843182664e-05, "loss": 1.9865, "step": 18040500 }, { "epoch": 52.22, "learning_rate": 2.3898734195535386e-05, "loss": 1.9977, "step": 18041000 }, { "epoch": 52.22, "learning_rate": 2.3898010547888108e-05, "loss": 1.9654, "step": 18041500 }, { "epoch": 52.22, "learning_rate": 2.389728690024083e-05, "loss": 1.9967, "step": 18042000 }, { "epoch": 52.23, "learning_rate": 2.389656469988885e-05, "loss": 1.9846, "step": 18042500 }, { "epoch": 52.23, "learning_rate": 2.389584105224157e-05, "loss": 1.995, "step": 18043000 }, { "epoch": 52.23, "learning_rate": 2.3895117404594294e-05, "loss": 1.974, "step": 18043500 }, { "epoch": 52.23, "learning_rate": 2.3894395204242313e-05, "loss": 2.0012, "step": 18044000 }, { "epoch": 52.23, "learning_rate": 2.3893671556595038e-05, "loss": 1.9901, "step": 18044500 }, { "epoch": 52.23, "learning_rate": 2.389294790894776e-05, "loss": 1.9788, "step": 18045000 }, { "epoch": 52.23, "learning_rate": 2.3892224261300483e-05, "loss": 1.973, "step": 18045500 }, { "epoch": 52.24, "learning_rate": 2.3891500613653205e-05, "loss": 1.9799, "step": 18046000 }, { "epoch": 52.24, "learning_rate": 2.3890776966005927e-05, "loss": 1.9983, "step": 18046500 }, { "epoch": 52.24, "learning_rate": 2.3890053318358653e-05, "loss": 2.0009, "step": 18047000 }, { "epoch": 52.24, "learning_rate": 2.3889329670711375e-05, "loss": 1.9982, "step": 18047500 }, { "epoch": 52.24, "learning_rate": 2.38886060230641e-05, "loss": 1.9898, "step": 18048000 }, { "epoch": 52.24, "learning_rate": 2.3887882375416823e-05, "loss": 1.9818, "step": 18048500 }, { "epoch": 52.24, "learning_rate": 2.3887160175064842e-05, "loss": 1.9552, "step": 18049000 }, { "epoch": 52.25, "learning_rate": 2.3886436527417564e-05, "loss": 1.9755, "step": 18049500 }, { "epoch": 52.25, "learning_rate": 2.3885712879770286e-05, "loss": 1.977, "step": 18050000 }, { "epoch": 52.25, "learning_rate": 2.3884990679418305e-05, "loss": 1.9645, "step": 18050500 }, { "epoch": 52.25, "learning_rate": 2.3884267031771028e-05, "loss": 1.9909, "step": 18051000 }, { "epoch": 52.25, "learning_rate": 2.388354338412375e-05, "loss": 1.9592, "step": 18051500 }, { "epoch": 52.25, "learning_rate": 2.3882819736476475e-05, "loss": 1.9839, "step": 18052000 }, { "epoch": 52.25, "learning_rate": 2.3882096088829198e-05, "loss": 1.9772, "step": 18052500 }, { "epoch": 52.26, "learning_rate": 2.388137244118192e-05, "loss": 1.9695, "step": 18053000 }, { "epoch": 52.26, "learning_rate": 2.3880648793534642e-05, "loss": 1.987, "step": 18053500 }, { "epoch": 52.26, "learning_rate": 2.3879925145887368e-05, "loss": 1.9771, "step": 18054000 }, { "epoch": 52.26, "learning_rate": 2.3879202945535383e-05, "loss": 2.0041, "step": 18054500 }, { "epoch": 52.26, "learning_rate": 2.3878480745183402e-05, "loss": 1.9763, "step": 18055000 }, { "epoch": 52.26, "learning_rate": 2.3877757097536128e-05, "loss": 1.9817, "step": 18055500 }, { "epoch": 52.26, "learning_rate": 2.387703344988885e-05, "loss": 1.9894, "step": 18056000 }, { "epoch": 52.27, "learning_rate": 2.3876309802241572e-05, "loss": 1.97, "step": 18056500 }, { "epoch": 52.27, "learning_rate": 2.3875586154594295e-05, "loss": 1.9891, "step": 18057000 }, { "epoch": 52.27, "learning_rate": 2.3874862506947017e-05, "loss": 2.0176, "step": 18057500 }, { "epoch": 52.27, "learning_rate": 2.3874138859299742e-05, "loss": 1.9646, "step": 18058000 }, { "epoch": 52.27, "learning_rate": 2.3873415211652465e-05, "loss": 1.9566, "step": 18058500 }, { "epoch": 52.27, "learning_rate": 2.387269156400519e-05, "loss": 1.9841, "step": 18059000 }, { "epoch": 52.27, "learning_rate": 2.3871969363653206e-05, "loss": 1.9879, "step": 18059500 }, { "epoch": 52.28, "learning_rate": 2.387124571600593e-05, "loss": 1.9915, "step": 18060000 }, { "epoch": 52.28, "learning_rate": 2.3870522068358654e-05, "loss": 1.9888, "step": 18060500 }, { "epoch": 52.28, "learning_rate": 2.3869798420711376e-05, "loss": 1.997, "step": 18061000 }, { "epoch": 52.28, "learning_rate": 2.3869074773064098e-05, "loss": 1.977, "step": 18061500 }, { "epoch": 52.28, "learning_rate": 2.386835112541682e-05, "loss": 1.9671, "step": 18062000 }, { "epoch": 52.28, "learning_rate": 2.3867627477769543e-05, "loss": 1.9852, "step": 18062500 }, { "epoch": 52.28, "learning_rate": 2.3866903830122268e-05, "loss": 1.9899, "step": 18063000 }, { "epoch": 52.29, "learning_rate": 2.3866180182474994e-05, "loss": 1.9639, "step": 18063500 }, { "epoch": 52.29, "learning_rate": 2.3865456534827716e-05, "loss": 1.982, "step": 18064000 }, { "epoch": 52.29, "learning_rate": 2.386473433447573e-05, "loss": 1.9786, "step": 18064500 }, { "epoch": 52.29, "learning_rate": 2.3864010686828457e-05, "loss": 2.0186, "step": 18065000 }, { "epoch": 52.29, "learning_rate": 2.3863288486476473e-05, "loss": 1.9776, "step": 18065500 }, { "epoch": 52.29, "learning_rate": 2.3862564838829195e-05, "loss": 1.9692, "step": 18066000 }, { "epoch": 52.3, "learning_rate": 2.386184119118192e-05, "loss": 1.999, "step": 18066500 }, { "epoch": 52.3, "learning_rate": 2.3861117543534643e-05, "loss": 1.9687, "step": 18067000 }, { "epoch": 52.3, "learning_rate": 2.386039389588737e-05, "loss": 1.9796, "step": 18067500 }, { "epoch": 52.3, "learning_rate": 2.385967024824009e-05, "loss": 1.9619, "step": 18068000 }, { "epoch": 52.3, "learning_rate": 2.3858946600592813e-05, "loss": 1.985, "step": 18068500 }, { "epoch": 52.3, "learning_rate": 2.3858224400240832e-05, "loss": 1.9723, "step": 18069000 }, { "epoch": 52.3, "learning_rate": 2.3857500752593554e-05, "loss": 1.9798, "step": 18069500 }, { "epoch": 52.31, "learning_rate": 2.3856777104946276e-05, "loss": 1.9675, "step": 18070000 }, { "epoch": 52.31, "learning_rate": 2.3856053457299002e-05, "loss": 2.004, "step": 18070500 }, { "epoch": 52.31, "learning_rate": 2.3855329809651724e-05, "loss": 2.0104, "step": 18071000 }, { "epoch": 52.31, "learning_rate": 2.3854606162004446e-05, "loss": 1.9757, "step": 18071500 }, { "epoch": 52.31, "learning_rate": 2.385388251435717e-05, "loss": 1.982, "step": 18072000 }, { "epoch": 52.31, "learning_rate": 2.3853160314005188e-05, "loss": 1.971, "step": 18072500 }, { "epoch": 52.31, "learning_rate": 2.385243666635791e-05, "loss": 1.963, "step": 18073000 }, { "epoch": 52.32, "learning_rate": 2.3851713018710632e-05, "loss": 1.9797, "step": 18073500 }, { "epoch": 52.32, "learning_rate": 2.3850989371063358e-05, "loss": 1.9998, "step": 18074000 }, { "epoch": 52.32, "learning_rate": 2.3850265723416083e-05, "loss": 1.9903, "step": 18074500 }, { "epoch": 52.32, "learning_rate": 2.38495435230641e-05, "loss": 2.0157, "step": 18075000 }, { "epoch": 52.32, "learning_rate": 2.384881987541682e-05, "loss": 1.9821, "step": 18075500 }, { "epoch": 52.32, "learning_rate": 2.3848096227769547e-05, "loss": 1.9624, "step": 18076000 }, { "epoch": 52.32, "learning_rate": 2.384737258012227e-05, "loss": 1.965, "step": 18076500 }, { "epoch": 52.33, "learning_rate": 2.3846650379770285e-05, "loss": 1.97, "step": 18077000 }, { "epoch": 52.33, "learning_rate": 2.3845926732123007e-05, "loss": 1.9774, "step": 18077500 }, { "epoch": 52.33, "learning_rate": 2.3845203084475732e-05, "loss": 2.0155, "step": 18078000 }, { "epoch": 52.33, "learning_rate": 2.3844479436828458e-05, "loss": 1.9798, "step": 18078500 }, { "epoch": 52.33, "learning_rate": 2.384375578918118e-05, "loss": 1.974, "step": 18079000 }, { "epoch": 52.33, "learning_rate": 2.3843032141533902e-05, "loss": 1.9777, "step": 18079500 }, { "epoch": 52.33, "learning_rate": 2.3842308493886625e-05, "loss": 1.9685, "step": 18080000 }, { "epoch": 52.34, "learning_rate": 2.3841584846239347e-05, "loss": 1.9861, "step": 18080500 }, { "epoch": 52.34, "learning_rate": 2.3840861198592073e-05, "loss": 1.9942, "step": 18081000 }, { "epoch": 52.34, "learning_rate": 2.384013899824009e-05, "loss": 1.9685, "step": 18081500 }, { "epoch": 52.34, "learning_rate": 2.383941679788811e-05, "loss": 1.9631, "step": 18082000 }, { "epoch": 52.34, "learning_rate": 2.3838693150240833e-05, "loss": 1.9576, "step": 18082500 }, { "epoch": 52.34, "learning_rate": 2.3837970949888848e-05, "loss": 1.991, "step": 18083000 }, { "epoch": 52.34, "learning_rate": 2.383724730224157e-05, "loss": 1.9437, "step": 18083500 }, { "epoch": 52.35, "learning_rate": 2.3836523654594296e-05, "loss": 1.9881, "step": 18084000 }, { "epoch": 52.35, "learning_rate": 2.383580000694702e-05, "loss": 1.9571, "step": 18084500 }, { "epoch": 52.35, "learning_rate": 2.383507635929974e-05, "loss": 1.9763, "step": 18085000 }, { "epoch": 52.35, "learning_rate": 2.3834352711652466e-05, "loss": 1.9689, "step": 18085500 }, { "epoch": 52.35, "learning_rate": 2.383362906400519e-05, "loss": 1.9709, "step": 18086000 }, { "epoch": 52.35, "learning_rate": 2.383290541635791e-05, "loss": 2.0078, "step": 18086500 }, { "epoch": 52.35, "learning_rate": 2.3832181768710636e-05, "loss": 1.9897, "step": 18087000 }, { "epoch": 52.36, "learning_rate": 2.383145812106336e-05, "loss": 1.9485, "step": 18087500 }, { "epoch": 52.36, "learning_rate": 2.383073447341608e-05, "loss": 1.9912, "step": 18088000 }, { "epoch": 52.36, "learning_rate": 2.3830010825768803e-05, "loss": 1.9617, "step": 18088500 }, { "epoch": 52.36, "learning_rate": 2.382928717812153e-05, "loss": 1.9687, "step": 18089000 }, { "epoch": 52.36, "learning_rate": 2.382856353047425e-05, "loss": 1.9946, "step": 18089500 }, { "epoch": 52.36, "learning_rate": 2.3827842777417563e-05, "loss": 1.9755, "step": 18090000 }, { "epoch": 52.36, "learning_rate": 2.3827120577065582e-05, "loss": 1.9881, "step": 18090500 }, { "epoch": 52.37, "learning_rate": 2.3826396929418304e-05, "loss": 1.9606, "step": 18091000 }, { "epoch": 52.37, "learning_rate": 2.3825673281771027e-05, "loss": 1.976, "step": 18091500 }, { "epoch": 52.37, "learning_rate": 2.382494963412375e-05, "loss": 1.981, "step": 18092000 }, { "epoch": 52.37, "learning_rate": 2.3824225986476474e-05, "loss": 1.9838, "step": 18092500 }, { "epoch": 52.37, "learning_rate": 2.3823503786124493e-05, "loss": 2.0, "step": 18093000 }, { "epoch": 52.37, "learning_rate": 2.3822780138477216e-05, "loss": 1.9737, "step": 18093500 }, { "epoch": 52.37, "learning_rate": 2.3822056490829938e-05, "loss": 1.9739, "step": 18094000 }, { "epoch": 52.38, "learning_rate": 2.382133284318266e-05, "loss": 1.9733, "step": 18094500 }, { "epoch": 52.38, "learning_rate": 2.3820609195535386e-05, "loss": 1.9953, "step": 18095000 }, { "epoch": 52.38, "learning_rate": 2.3819885547888108e-05, "loss": 1.9883, "step": 18095500 }, { "epoch": 52.38, "learning_rate": 2.381916190024083e-05, "loss": 1.9996, "step": 18096000 }, { "epoch": 52.38, "learning_rate": 2.3818438252593556e-05, "loss": 1.9924, "step": 18096500 }, { "epoch": 52.38, "learning_rate": 2.3817714604946278e-05, "loss": 2.0069, "step": 18097000 }, { "epoch": 52.38, "learning_rate": 2.3816992404594297e-05, "loss": 1.9831, "step": 18097500 }, { "epoch": 52.39, "learning_rate": 2.381626875694702e-05, "loss": 1.9977, "step": 18098000 }, { "epoch": 52.39, "learning_rate": 2.381554510929974e-05, "loss": 2.0033, "step": 18098500 }, { "epoch": 52.39, "learning_rate": 2.3814821461652464e-05, "loss": 2.0067, "step": 18099000 }, { "epoch": 52.39, "learning_rate": 2.3814097814005186e-05, "loss": 1.976, "step": 18099500 }, { "epoch": 52.39, "learning_rate": 2.3813375613653205e-05, "loss": 1.9901, "step": 18100000 }, { "epoch": 52.39, "learning_rate": 2.381265196600593e-05, "loss": 1.9907, "step": 18100500 }, { "epoch": 52.39, "learning_rate": 2.3811928318358653e-05, "loss": 1.9774, "step": 18101000 }, { "epoch": 52.4, "learning_rate": 2.3811204670711375e-05, "loss": 1.9582, "step": 18101500 }, { "epoch": 52.4, "learning_rate": 2.38104810230641e-05, "loss": 1.9709, "step": 18102000 }, { "epoch": 52.4, "learning_rate": 2.3809757375416823e-05, "loss": 1.9551, "step": 18102500 }, { "epoch": 52.4, "learning_rate": 2.3809033727769545e-05, "loss": 1.9756, "step": 18103000 }, { "epoch": 52.4, "learning_rate": 2.380831152741756e-05, "loss": 1.9859, "step": 18103500 }, { "epoch": 52.4, "learning_rate": 2.3807587879770286e-05, "loss": 2.0019, "step": 18104000 }, { "epoch": 52.41, "learning_rate": 2.3806864232123012e-05, "loss": 1.9959, "step": 18104500 }, { "epoch": 52.41, "learning_rate": 2.3806140584475734e-05, "loss": 2.0052, "step": 18105000 }, { "epoch": 52.41, "learning_rate": 2.380541838412375e-05, "loss": 1.9965, "step": 18105500 }, { "epoch": 52.41, "learning_rate": 2.3804694736476475e-05, "loss": 1.9705, "step": 18106000 }, { "epoch": 52.41, "learning_rate": 2.3803971088829197e-05, "loss": 1.9775, "step": 18106500 }, { "epoch": 52.41, "learning_rate": 2.380324744118192e-05, "loss": 1.9821, "step": 18107000 }, { "epoch": 52.41, "learning_rate": 2.3802523793534642e-05, "loss": 1.973, "step": 18107500 }, { "epoch": 52.42, "learning_rate": 2.3801800145887367e-05, "loss": 1.9659, "step": 18108000 }, { "epoch": 52.42, "learning_rate": 2.380107649824009e-05, "loss": 1.9809, "step": 18108500 }, { "epoch": 52.42, "learning_rate": 2.3800352850592812e-05, "loss": 1.9954, "step": 18109000 }, { "epoch": 52.42, "learning_rate": 2.3799629202945538e-05, "loss": 1.9856, "step": 18109500 }, { "epoch": 52.42, "learning_rate": 2.379890555529826e-05, "loss": 1.9658, "step": 18110000 }, { "epoch": 52.42, "learning_rate": 2.3798183354946275e-05, "loss": 1.9811, "step": 18110500 }, { "epoch": 52.42, "learning_rate": 2.3797459707299e-05, "loss": 1.9779, "step": 18111000 }, { "epoch": 52.43, "learning_rate": 2.3796737506947017e-05, "loss": 1.9919, "step": 18111500 }, { "epoch": 52.43, "learning_rate": 2.3796013859299742e-05, "loss": 1.9956, "step": 18112000 }, { "epoch": 52.43, "learning_rate": 2.379529165894776e-05, "loss": 1.9872, "step": 18112500 }, { "epoch": 52.43, "learning_rate": 2.3794568011300483e-05, "loss": 1.9893, "step": 18113000 }, { "epoch": 52.43, "learning_rate": 2.3793844363653206e-05, "loss": 2.0038, "step": 18113500 }, { "epoch": 52.43, "learning_rate": 2.3793120716005928e-05, "loss": 1.9872, "step": 18114000 }, { "epoch": 52.43, "learning_rate": 2.379239706835865e-05, "loss": 1.9656, "step": 18114500 }, { "epoch": 52.44, "learning_rate": 2.3791673420711376e-05, "loss": 1.9759, "step": 18115000 }, { "epoch": 52.44, "learning_rate": 2.37909497730641e-05, "loss": 2.0076, "step": 18115500 }, { "epoch": 52.44, "learning_rate": 2.3790226125416824e-05, "loss": 1.99, "step": 18116000 }, { "epoch": 52.44, "learning_rate": 2.3789502477769546e-05, "loss": 1.9873, "step": 18116500 }, { "epoch": 52.44, "learning_rate": 2.3788778830122268e-05, "loss": 1.9654, "step": 18117000 }, { "epoch": 52.44, "learning_rate": 2.378805518247499e-05, "loss": 1.9775, "step": 18117500 }, { "epoch": 52.44, "learning_rate": 2.378733298212301e-05, "loss": 1.9913, "step": 18118000 }, { "epoch": 52.45, "learning_rate": 2.378660933447573e-05, "loss": 1.9865, "step": 18118500 }, { "epoch": 52.45, "learning_rate": 2.378588713412375e-05, "loss": 2.0073, "step": 18119000 }, { "epoch": 52.45, "learning_rate": 2.3785163486476476e-05, "loss": 1.9688, "step": 18119500 }, { "epoch": 52.45, "learning_rate": 2.3784439838829198e-05, "loss": 1.9861, "step": 18120000 }, { "epoch": 52.45, "learning_rate": 2.378371619118192e-05, "loss": 1.9777, "step": 18120500 }, { "epoch": 52.45, "learning_rate": 2.3782992543534643e-05, "loss": 1.9892, "step": 18121000 }, { "epoch": 52.45, "learning_rate": 2.3782268895887365e-05, "loss": 2.0024, "step": 18121500 }, { "epoch": 52.46, "learning_rate": 2.378154524824009e-05, "loss": 1.9682, "step": 18122000 }, { "epoch": 52.46, "learning_rate": 2.3780821600592813e-05, "loss": 2.0042, "step": 18122500 }, { "epoch": 52.46, "learning_rate": 2.3780099400240832e-05, "loss": 1.9976, "step": 18123000 }, { "epoch": 52.46, "learning_rate": 2.3779375752593554e-05, "loss": 1.9853, "step": 18123500 }, { "epoch": 52.46, "learning_rate": 2.377865210494628e-05, "loss": 1.9827, "step": 18124000 }, { "epoch": 52.46, "learning_rate": 2.3777928457299002e-05, "loss": 1.9653, "step": 18124500 }, { "epoch": 52.46, "learning_rate": 2.3777206256947017e-05, "loss": 1.9998, "step": 18125000 }, { "epoch": 52.47, "learning_rate": 2.377648260929974e-05, "loss": 1.9868, "step": 18125500 }, { "epoch": 52.47, "learning_rate": 2.3775758961652465e-05, "loss": 1.9881, "step": 18126000 }, { "epoch": 52.47, "learning_rate": 2.377503531400519e-05, "loss": 1.9896, "step": 18126500 }, { "epoch": 52.47, "learning_rate": 2.3774311666357913e-05, "loss": 2.0116, "step": 18127000 }, { "epoch": 52.47, "learning_rate": 2.3773588018710635e-05, "loss": 1.9975, "step": 18127500 }, { "epoch": 52.47, "learning_rate": 2.3772864371063358e-05, "loss": 1.9742, "step": 18128000 }, { "epoch": 52.47, "learning_rate": 2.377214072341608e-05, "loss": 1.975, "step": 18128500 }, { "epoch": 52.48, "learning_rate": 2.3771417075768805e-05, "loss": 1.9949, "step": 18129000 }, { "epoch": 52.48, "learning_rate": 2.3770693428121528e-05, "loss": 1.9546, "step": 18129500 }, { "epoch": 52.48, "learning_rate": 2.3769971227769543e-05, "loss": 1.9932, "step": 18130000 }, { "epoch": 52.48, "learning_rate": 2.376924758012227e-05, "loss": 1.983, "step": 18130500 }, { "epoch": 52.48, "learning_rate": 2.376852393247499e-05, "loss": 1.9796, "step": 18131000 }, { "epoch": 52.48, "learning_rate": 2.3767800284827717e-05, "loss": 1.9496, "step": 18131500 }, { "epoch": 52.48, "learning_rate": 2.376707663718044e-05, "loss": 2.002, "step": 18132000 }, { "epoch": 52.49, "learning_rate": 2.3766354436828454e-05, "loss": 1.975, "step": 18132500 }, { "epoch": 52.49, "learning_rate": 2.376563078918118e-05, "loss": 2.0127, "step": 18133000 }, { "epoch": 52.49, "learning_rate": 2.3764907141533902e-05, "loss": 1.9646, "step": 18133500 }, { "epoch": 52.49, "learning_rate": 2.3764183493886628e-05, "loss": 2.0027, "step": 18134000 }, { "epoch": 52.49, "learning_rate": 2.376345984623935e-05, "loss": 1.9769, "step": 18134500 }, { "epoch": 52.49, "learning_rate": 2.3762739093182662e-05, "loss": 1.9946, "step": 18135000 }, { "epoch": 52.49, "learning_rate": 2.3762015445535385e-05, "loss": 2.0056, "step": 18135500 }, { "epoch": 52.5, "learning_rate": 2.3761291797888107e-05, "loss": 1.9789, "step": 18136000 }, { "epoch": 52.5, "learning_rate": 2.376056815024083e-05, "loss": 1.9758, "step": 18136500 }, { "epoch": 52.5, "learning_rate": 2.3759845949888848e-05, "loss": 2.0221, "step": 18137000 }, { "epoch": 52.5, "learning_rate": 2.375912230224157e-05, "loss": 1.9856, "step": 18137500 }, { "epoch": 52.5, "learning_rate": 2.3758398654594296e-05, "loss": 1.9954, "step": 18138000 }, { "epoch": 52.5, "learning_rate": 2.3757675006947018e-05, "loss": 1.9747, "step": 18138500 }, { "epoch": 52.5, "learning_rate": 2.3756951359299744e-05, "loss": 1.9795, "step": 18139000 }, { "epoch": 52.51, "learning_rate": 2.3756227711652466e-05, "loss": 2.0, "step": 18139500 }, { "epoch": 52.51, "learning_rate": 2.3755504064005188e-05, "loss": 1.9969, "step": 18140000 }, { "epoch": 52.51, "learning_rate": 2.375478041635791e-05, "loss": 1.959, "step": 18140500 }, { "epoch": 52.51, "learning_rate": 2.3754056768710633e-05, "loss": 1.9748, "step": 18141000 }, { "epoch": 52.51, "learning_rate": 2.375333312106336e-05, "loss": 1.9973, "step": 18141500 }, { "epoch": 52.51, "learning_rate": 2.3752610920711377e-05, "loss": 2.0078, "step": 18142000 }, { "epoch": 52.52, "learning_rate": 2.37518872730641e-05, "loss": 1.9835, "step": 18142500 }, { "epoch": 52.52, "learning_rate": 2.3751163625416822e-05, "loss": 2.001, "step": 18143000 }, { "epoch": 52.52, "learning_rate": 2.3750439977769544e-05, "loss": 1.9884, "step": 18143500 }, { "epoch": 52.52, "learning_rate": 2.374971633012227e-05, "loss": 1.9853, "step": 18144000 }, { "epoch": 52.52, "learning_rate": 2.3748992682474992e-05, "loss": 1.975, "step": 18144500 }, { "epoch": 52.52, "learning_rate": 2.3748269034827717e-05, "loss": 1.9599, "step": 18145000 }, { "epoch": 52.52, "learning_rate": 2.374754538718044e-05, "loss": 1.9782, "step": 18145500 }, { "epoch": 52.53, "learning_rate": 2.3746821739533162e-05, "loss": 1.9505, "step": 18146000 }, { "epoch": 52.53, "learning_rate": 2.374609953918118e-05, "loss": 1.9755, "step": 18146500 }, { "epoch": 52.53, "learning_rate": 2.3745375891533903e-05, "loss": 1.9865, "step": 18147000 }, { "epoch": 52.53, "learning_rate": 2.374465369118192e-05, "loss": 1.9845, "step": 18147500 }, { "epoch": 52.53, "learning_rate": 2.3743930043534644e-05, "loss": 2.0075, "step": 18148000 }, { "epoch": 52.53, "learning_rate": 2.3743206395887366e-05, "loss": 2.0044, "step": 18148500 }, { "epoch": 52.53, "learning_rate": 2.3742482748240092e-05, "loss": 1.9862, "step": 18149000 }, { "epoch": 52.54, "learning_rate": 2.3741759100592814e-05, "loss": 1.9829, "step": 18149500 }, { "epoch": 52.54, "learning_rate": 2.3741035452945537e-05, "loss": 1.9721, "step": 18150000 }, { "epoch": 52.54, "learning_rate": 2.374031180529826e-05, "loss": 1.9807, "step": 18150500 }, { "epoch": 52.54, "learning_rate": 2.373958815765098e-05, "loss": 1.9901, "step": 18151000 }, { "epoch": 52.54, "learning_rate": 2.3738864510003707e-05, "loss": 2.0054, "step": 18151500 }, { "epoch": 52.54, "learning_rate": 2.3738142309651722e-05, "loss": 1.9619, "step": 18152000 }, { "epoch": 52.54, "learning_rate": 2.3737418662004444e-05, "loss": 1.9912, "step": 18152500 }, { "epoch": 52.55, "learning_rate": 2.373669501435717e-05, "loss": 1.9777, "step": 18153000 }, { "epoch": 52.55, "learning_rate": 2.373597281400519e-05, "loss": 1.9983, "step": 18153500 }, { "epoch": 52.55, "learning_rate": 2.3735250613653208e-05, "loss": 1.9925, "step": 18154000 }, { "epoch": 52.55, "learning_rate": 2.373452696600593e-05, "loss": 1.9858, "step": 18154500 }, { "epoch": 52.55, "learning_rate": 2.3733803318358652e-05, "loss": 1.9591, "step": 18155000 }, { "epoch": 52.55, "learning_rate": 2.3733079670711375e-05, "loss": 1.9848, "step": 18155500 }, { "epoch": 52.55, "learning_rate": 2.3732357470359394e-05, "loss": 1.9797, "step": 18156000 }, { "epoch": 52.56, "learning_rate": 2.3731633822712116e-05, "loss": 1.9873, "step": 18156500 }, { "epoch": 52.56, "learning_rate": 2.373091017506484e-05, "loss": 2.0087, "step": 18157000 }, { "epoch": 52.56, "learning_rate": 2.3730186527417564e-05, "loss": 1.9949, "step": 18157500 }, { "epoch": 52.56, "learning_rate": 2.3729462879770286e-05, "loss": 2.0076, "step": 18158000 }, { "epoch": 52.56, "learning_rate": 2.3728740679418305e-05, "loss": 1.983, "step": 18158500 }, { "epoch": 52.56, "learning_rate": 2.3728017031771027e-05, "loss": 1.9864, "step": 18159000 }, { "epoch": 52.56, "learning_rate": 2.372729338412375e-05, "loss": 1.9743, "step": 18159500 }, { "epoch": 52.57, "learning_rate": 2.372656973647647e-05, "loss": 1.9651, "step": 18160000 }, { "epoch": 52.57, "learning_rate": 2.3725846088829197e-05, "loss": 1.9737, "step": 18160500 }, { "epoch": 52.57, "learning_rate": 2.3725122441181923e-05, "loss": 1.9794, "step": 18161000 }, { "epoch": 52.57, "learning_rate": 2.3724398793534645e-05, "loss": 1.9985, "step": 18161500 }, { "epoch": 52.57, "learning_rate": 2.3723675145887367e-05, "loss": 2.0152, "step": 18162000 }, { "epoch": 52.57, "learning_rate": 2.372295149824009e-05, "loss": 1.9824, "step": 18162500 }, { "epoch": 52.57, "learning_rate": 2.372222929788811e-05, "loss": 1.9772, "step": 18163000 }, { "epoch": 52.58, "learning_rate": 2.372150565024083e-05, "loss": 1.986, "step": 18163500 }, { "epoch": 52.58, "learning_rate": 2.3720782002593556e-05, "loss": 1.9662, "step": 18164000 }, { "epoch": 52.58, "learning_rate": 2.372005835494628e-05, "loss": 1.9913, "step": 18164500 }, { "epoch": 52.58, "learning_rate": 2.3719334707299e-05, "loss": 1.9947, "step": 18165000 }, { "epoch": 52.58, "learning_rate": 2.3718611059651723e-05, "loss": 2.0097, "step": 18165500 }, { "epoch": 52.58, "learning_rate": 2.371788741200445e-05, "loss": 2.0117, "step": 18166000 }, { "epoch": 52.58, "learning_rate": 2.3717165211652464e-05, "loss": 1.9698, "step": 18166500 }, { "epoch": 52.59, "learning_rate": 2.3716441564005186e-05, "loss": 1.9988, "step": 18167000 }, { "epoch": 52.59, "learning_rate": 2.37157208109485e-05, "loss": 1.9892, "step": 18167500 }, { "epoch": 52.59, "learning_rate": 2.371499716330122e-05, "loss": 1.9689, "step": 18168000 }, { "epoch": 52.59, "learning_rate": 2.3714276410244537e-05, "loss": 1.9737, "step": 18168500 }, { "epoch": 52.59, "learning_rate": 2.371355276259726e-05, "loss": 1.9523, "step": 18169000 }, { "epoch": 52.59, "learning_rate": 2.3712829114949984e-05, "loss": 1.9994, "step": 18169500 }, { "epoch": 52.59, "learning_rate": 2.3712105467302707e-05, "loss": 1.9846, "step": 18170000 }, { "epoch": 52.6, "learning_rate": 2.371138181965543e-05, "loss": 1.9861, "step": 18170500 }, { "epoch": 52.6, "learning_rate": 2.371065817200815e-05, "loss": 1.9867, "step": 18171000 }, { "epoch": 52.6, "learning_rate": 2.3709934524360873e-05, "loss": 1.978, "step": 18171500 }, { "epoch": 52.6, "learning_rate": 2.37092108767136e-05, "loss": 1.9812, "step": 18172000 }, { "epoch": 52.6, "learning_rate": 2.370848722906632e-05, "loss": 1.9888, "step": 18172500 }, { "epoch": 52.6, "learning_rate": 2.3707763581419047e-05, "loss": 1.9854, "step": 18173000 }, { "epoch": 52.6, "learning_rate": 2.370703993377177e-05, "loss": 2.007, "step": 18173500 }, { "epoch": 52.61, "learning_rate": 2.370631628612449e-05, "loss": 1.9832, "step": 18174000 }, { "epoch": 52.61, "learning_rate": 2.3705592638477214e-05, "loss": 2.0097, "step": 18174500 }, { "epoch": 52.61, "learning_rate": 2.3704868990829936e-05, "loss": 1.9766, "step": 18175000 }, { "epoch": 52.61, "learning_rate": 2.370414534318266e-05, "loss": 1.9771, "step": 18175500 }, { "epoch": 52.61, "learning_rate": 2.3703421695535387e-05, "loss": 1.9813, "step": 18176000 }, { "epoch": 52.61, "learning_rate": 2.370269804788811e-05, "loss": 1.9941, "step": 18176500 }, { "epoch": 52.61, "learning_rate": 2.370197440024083e-05, "loss": 1.9996, "step": 18177000 }, { "epoch": 52.62, "learning_rate": 2.3701250752593554e-05, "loss": 1.9735, "step": 18177500 }, { "epoch": 52.62, "learning_rate": 2.3700527104946276e-05, "loss": 1.9971, "step": 18178000 }, { "epoch": 52.62, "learning_rate": 2.3699804904594295e-05, "loss": 1.9805, "step": 18178500 }, { "epoch": 52.62, "learning_rate": 2.3699081256947017e-05, "loss": 1.9661, "step": 18179000 }, { "epoch": 52.62, "learning_rate": 2.3698357609299743e-05, "loss": 1.9794, "step": 18179500 }, { "epoch": 52.62, "learning_rate": 2.3697633961652465e-05, "loss": 1.9762, "step": 18180000 }, { "epoch": 52.63, "learning_rate": 2.3696910314005187e-05, "loss": 1.9873, "step": 18180500 }, { "epoch": 52.63, "learning_rate": 2.3696188113653206e-05, "loss": 1.9888, "step": 18181000 }, { "epoch": 52.63, "learning_rate": 2.3695465913301225e-05, "loss": 1.9833, "step": 18181500 }, { "epoch": 52.63, "learning_rate": 2.3694742265653947e-05, "loss": 2.005, "step": 18182000 }, { "epoch": 52.63, "learning_rate": 2.369401861800667e-05, "loss": 1.9956, "step": 18182500 }, { "epoch": 52.63, "learning_rate": 2.3693294970359395e-05, "loss": 1.9885, "step": 18183000 }, { "epoch": 52.63, "learning_rate": 2.3692571322712117e-05, "loss": 1.9835, "step": 18183500 }, { "epoch": 52.64, "learning_rate": 2.369184767506484e-05, "loss": 1.9963, "step": 18184000 }, { "epoch": 52.64, "learning_rate": 2.3691124027417562e-05, "loss": 1.9772, "step": 18184500 }, { "epoch": 52.64, "learning_rate": 2.3690400379770288e-05, "loss": 1.9938, "step": 18185000 }, { "epoch": 52.64, "learning_rate": 2.368967673212301e-05, "loss": 1.98, "step": 18185500 }, { "epoch": 52.64, "learning_rate": 2.3688953084475732e-05, "loss": 1.9914, "step": 18186000 }, { "epoch": 52.64, "learning_rate": 2.3688229436828458e-05, "loss": 1.9881, "step": 18186500 }, { "epoch": 52.64, "learning_rate": 2.368750578918118e-05, "loss": 1.9795, "step": 18187000 }, { "epoch": 52.65, "learning_rate": 2.3686782141533902e-05, "loss": 1.9757, "step": 18187500 }, { "epoch": 52.65, "learning_rate": 2.3686058493886624e-05, "loss": 1.9994, "step": 18188000 }, { "epoch": 52.65, "learning_rate": 2.3685336293534643e-05, "loss": 1.9695, "step": 18188500 }, { "epoch": 52.65, "learning_rate": 2.3684612645887365e-05, "loss": 1.991, "step": 18189000 }, { "epoch": 52.65, "learning_rate": 2.3683888998240088e-05, "loss": 2.0035, "step": 18189500 }, { "epoch": 52.65, "learning_rate": 2.3683165350592813e-05, "loss": 2.0033, "step": 18190000 }, { "epoch": 52.65, "learning_rate": 2.3682443150240832e-05, "loss": 2.0015, "step": 18190500 }, { "epoch": 52.66, "learning_rate": 2.3681719502593555e-05, "loss": 1.9803, "step": 18191000 }, { "epoch": 52.66, "learning_rate": 2.3680995854946277e-05, "loss": 1.9924, "step": 18191500 }, { "epoch": 52.66, "learning_rate": 2.3680272207299002e-05, "loss": 1.9948, "step": 18192000 }, { "epoch": 52.66, "learning_rate": 2.3679550006947018e-05, "loss": 1.989, "step": 18192500 }, { "epoch": 52.66, "learning_rate": 2.367882635929974e-05, "loss": 1.9873, "step": 18193000 }, { "epoch": 52.66, "learning_rate": 2.3678102711652462e-05, "loss": 1.9913, "step": 18193500 }, { "epoch": 52.66, "learning_rate": 2.3677379064005188e-05, "loss": 1.9914, "step": 18194000 }, { "epoch": 52.67, "learning_rate": 2.3676655416357914e-05, "loss": 1.972, "step": 18194500 }, { "epoch": 52.67, "learning_rate": 2.3675931768710636e-05, "loss": 1.9465, "step": 18195000 }, { "epoch": 52.67, "learning_rate": 2.3675208121063358e-05, "loss": 1.9882, "step": 18195500 }, { "epoch": 52.67, "learning_rate": 2.3674485920711377e-05, "loss": 1.993, "step": 18196000 }, { "epoch": 52.67, "learning_rate": 2.36737622730641e-05, "loss": 1.9943, "step": 18196500 }, { "epoch": 52.67, "learning_rate": 2.367303862541682e-05, "loss": 1.9831, "step": 18197000 }, { "epoch": 52.67, "learning_rate": 2.3672314977769544e-05, "loss": 1.965, "step": 18197500 }, { "epoch": 52.68, "learning_rate": 2.367159133012227e-05, "loss": 1.977, "step": 18198000 }, { "epoch": 52.68, "learning_rate": 2.367086768247499e-05, "loss": 2.003, "step": 18198500 }, { "epoch": 52.68, "learning_rate": 2.3670144034827714e-05, "loss": 1.992, "step": 18199000 }, { "epoch": 52.68, "learning_rate": 2.366942038718044e-05, "loss": 1.9695, "step": 18199500 }, { "epoch": 52.68, "learning_rate": 2.3668698186828455e-05, "loss": 1.9952, "step": 18200000 }, { "epoch": 52.68, "learning_rate": 2.3667975986476474e-05, "loss": 1.984, "step": 18200500 }, { "epoch": 52.68, "learning_rate": 2.3667252338829196e-05, "loss": 2.0022, "step": 18201000 }, { "epoch": 52.69, "learning_rate": 2.3666528691181922e-05, "loss": 1.9897, "step": 18201500 }, { "epoch": 52.69, "learning_rate": 2.3665805043534644e-05, "loss": 1.9922, "step": 18202000 }, { "epoch": 52.69, "learning_rate": 2.3665081395887366e-05, "loss": 1.9758, "step": 18202500 }, { "epoch": 52.69, "learning_rate": 2.3664359195535385e-05, "loss": 1.9796, "step": 18203000 }, { "epoch": 52.69, "learning_rate": 2.3663635547888107e-05, "loss": 1.9809, "step": 18203500 }, { "epoch": 52.69, "learning_rate": 2.366291190024083e-05, "loss": 1.9912, "step": 18204000 }, { "epoch": 52.69, "learning_rate": 2.3662188252593552e-05, "loss": 1.9762, "step": 18204500 }, { "epoch": 52.7, "learning_rate": 2.3661464604946278e-05, "loss": 1.9723, "step": 18205000 }, { "epoch": 52.7, "learning_rate": 2.3660740957299003e-05, "loss": 1.9849, "step": 18205500 }, { "epoch": 52.7, "learning_rate": 2.3660017309651725e-05, "loss": 1.9894, "step": 18206000 }, { "epoch": 52.7, "learning_rate": 2.3659293662004448e-05, "loss": 1.9849, "step": 18206500 }, { "epoch": 52.7, "learning_rate": 2.365857001435717e-05, "loss": 1.9855, "step": 18207000 }, { "epoch": 52.7, "learning_rate": 2.3657846366709892e-05, "loss": 2.0027, "step": 18207500 }, { "epoch": 52.7, "learning_rate": 2.365712416635791e-05, "loss": 1.9922, "step": 18208000 }, { "epoch": 52.71, "learning_rate": 2.3656400518710633e-05, "loss": 1.977, "step": 18208500 }, { "epoch": 52.71, "learning_rate": 2.365567687106336e-05, "loss": 2.0176, "step": 18209000 }, { "epoch": 52.71, "learning_rate": 2.365495322341608e-05, "loss": 1.9971, "step": 18209500 }, { "epoch": 52.71, "learning_rate": 2.3654229575768803e-05, "loss": 1.9646, "step": 18210000 }, { "epoch": 52.71, "learning_rate": 2.365350592812153e-05, "loss": 1.9931, "step": 18210500 }, { "epoch": 52.71, "learning_rate": 2.365278228047425e-05, "loss": 1.9599, "step": 18211000 }, { "epoch": 52.71, "learning_rate": 2.3652060080122267e-05, "loss": 1.9888, "step": 18211500 }, { "epoch": 52.72, "learning_rate": 2.3651336432474992e-05, "loss": 2.0018, "step": 18212000 }, { "epoch": 52.72, "learning_rate": 2.3650612784827718e-05, "loss": 1.9736, "step": 18212500 }, { "epoch": 52.72, "learning_rate": 2.364988913718044e-05, "loss": 1.979, "step": 18213000 }, { "epoch": 52.72, "learning_rate": 2.3649165489533163e-05, "loss": 1.9911, "step": 18213500 }, { "epoch": 52.72, "learning_rate": 2.3648443289181178e-05, "loss": 1.9808, "step": 18214000 }, { "epoch": 52.72, "learning_rate": 2.3647719641533904e-05, "loss": 1.9882, "step": 18214500 }, { "epoch": 52.72, "learning_rate": 2.3646995993886626e-05, "loss": 1.991, "step": 18215000 }, { "epoch": 52.73, "learning_rate": 2.3646272346239348e-05, "loss": 1.9908, "step": 18215500 }, { "epoch": 52.73, "learning_rate": 2.364554869859207e-05, "loss": 1.9651, "step": 18216000 }, { "epoch": 52.73, "learning_rate": 2.3644825050944796e-05, "loss": 1.9919, "step": 18216500 }, { "epoch": 52.73, "learning_rate": 2.3644101403297518e-05, "loss": 1.9826, "step": 18217000 }, { "epoch": 52.73, "learning_rate": 2.3643377755650244e-05, "loss": 1.9823, "step": 18217500 }, { "epoch": 52.73, "learning_rate": 2.3642654108002966e-05, "loss": 2.0025, "step": 18218000 }, { "epoch": 52.74, "learning_rate": 2.364193046035569e-05, "loss": 1.9856, "step": 18218500 }, { "epoch": 52.74, "learning_rate": 2.3641208260003704e-05, "loss": 2.0056, "step": 18219000 }, { "epoch": 52.74, "learning_rate": 2.364048461235643e-05, "loss": 1.9995, "step": 18219500 }, { "epoch": 52.74, "learning_rate": 2.3639760964709155e-05, "loss": 1.9842, "step": 18220000 }, { "epoch": 52.74, "learning_rate": 2.3639037317061877e-05, "loss": 1.9964, "step": 18220500 }, { "epoch": 52.74, "learning_rate": 2.36383136694146e-05, "loss": 1.9838, "step": 18221000 }, { "epoch": 52.74, "learning_rate": 2.3637590021767322e-05, "loss": 2.0006, "step": 18221500 }, { "epoch": 52.75, "learning_rate": 2.3636866374120044e-05, "loss": 1.9851, "step": 18222000 }, { "epoch": 52.75, "learning_rate": 2.363614272647277e-05, "loss": 1.9995, "step": 18222500 }, { "epoch": 52.75, "learning_rate": 2.3635419078825492e-05, "loss": 1.9743, "step": 18223000 }, { "epoch": 52.75, "learning_rate": 2.3634695431178218e-05, "loss": 1.9903, "step": 18223500 }, { "epoch": 52.75, "learning_rate": 2.3633973230826233e-05, "loss": 1.9792, "step": 18224000 }, { "epoch": 52.75, "learning_rate": 2.3633251030474252e-05, "loss": 1.9752, "step": 18224500 }, { "epoch": 52.75, "learning_rate": 2.3632527382826974e-05, "loss": 1.9994, "step": 18225000 }, { "epoch": 52.76, "learning_rate": 2.3631803735179696e-05, "loss": 1.9949, "step": 18225500 }, { "epoch": 52.76, "learning_rate": 2.363108008753242e-05, "loss": 2.0131, "step": 18226000 }, { "epoch": 52.76, "learning_rate": 2.3630357887180438e-05, "loss": 1.9948, "step": 18226500 }, { "epoch": 52.76, "learning_rate": 2.362963423953316e-05, "loss": 1.9893, "step": 18227000 }, { "epoch": 52.76, "learning_rate": 2.3628910591885886e-05, "loss": 2.0062, "step": 18227500 }, { "epoch": 52.76, "learning_rate": 2.3628186944238608e-05, "loss": 2.0181, "step": 18228000 }, { "epoch": 52.76, "learning_rate": 2.3627463296591333e-05, "loss": 2.0045, "step": 18228500 }, { "epoch": 52.77, "learning_rate": 2.3626739648944056e-05, "loss": 1.9821, "step": 18229000 }, { "epoch": 52.77, "learning_rate": 2.3626016001296778e-05, "loss": 2.0189, "step": 18229500 }, { "epoch": 52.77, "learning_rate": 2.3625293800944793e-05, "loss": 1.9761, "step": 18230000 }, { "epoch": 52.77, "learning_rate": 2.362457015329752e-05, "loss": 2.0094, "step": 18230500 }, { "epoch": 52.77, "learning_rate": 2.362384650565024e-05, "loss": 2.0122, "step": 18231000 }, { "epoch": 52.77, "learning_rate": 2.3623122858002967e-05, "loss": 1.9952, "step": 18231500 }, { "epoch": 52.77, "learning_rate": 2.362239921035569e-05, "loss": 1.9979, "step": 18232000 }, { "epoch": 52.78, "learning_rate": 2.362167556270841e-05, "loss": 2.0159, "step": 18232500 }, { "epoch": 52.78, "learning_rate": 2.362095336235643e-05, "loss": 1.9939, "step": 18233000 }, { "epoch": 52.78, "learning_rate": 2.3620229714709153e-05, "loss": 1.9933, "step": 18233500 }, { "epoch": 52.78, "learning_rate": 2.361950751435717e-05, "loss": 1.9963, "step": 18234000 }, { "epoch": 52.78, "learning_rate": 2.3618783866709894e-05, "loss": 1.9646, "step": 18234500 }, { "epoch": 52.78, "learning_rate": 2.361806021906262e-05, "loss": 2.0081, "step": 18235000 }, { "epoch": 52.78, "learning_rate": 2.361733657141534e-05, "loss": 1.9844, "step": 18235500 }, { "epoch": 52.79, "learning_rate": 2.3616612923768064e-05, "loss": 1.9763, "step": 18236000 }, { "epoch": 52.79, "learning_rate": 2.3615889276120786e-05, "loss": 1.9816, "step": 18236500 }, { "epoch": 52.79, "learning_rate": 2.3615165628473508e-05, "loss": 1.9912, "step": 18237000 }, { "epoch": 52.79, "learning_rate": 2.3614441980826234e-05, "loss": 1.9906, "step": 18237500 }, { "epoch": 52.79, "learning_rate": 2.3613718333178956e-05, "loss": 1.9821, "step": 18238000 }, { "epoch": 52.79, "learning_rate": 2.361299613282697e-05, "loss": 1.9635, "step": 18238500 }, { "epoch": 52.79, "learning_rate": 2.3612272485179697e-05, "loss": 1.9677, "step": 18239000 }, { "epoch": 52.8, "learning_rate": 2.3611550284827716e-05, "loss": 2.0071, "step": 18239500 }, { "epoch": 52.8, "learning_rate": 2.361082663718044e-05, "loss": 1.9959, "step": 18240000 }, { "epoch": 52.8, "learning_rate": 2.361010298953316e-05, "loss": 2.0013, "step": 18240500 }, { "epoch": 52.8, "learning_rate": 2.3609379341885883e-05, "loss": 1.9707, "step": 18241000 }, { "epoch": 52.8, "learning_rate": 2.360865569423861e-05, "loss": 1.9912, "step": 18241500 }, { "epoch": 52.8, "learning_rate": 2.3607933493886624e-05, "loss": 2.0034, "step": 18242000 }, { "epoch": 52.8, "learning_rate": 2.360720984623935e-05, "loss": 2.0121, "step": 18242500 }, { "epoch": 52.81, "learning_rate": 2.3606486198592072e-05, "loss": 1.9741, "step": 18243000 }, { "epoch": 52.81, "learning_rate": 2.3605762550944798e-05, "loss": 2.0113, "step": 18243500 }, { "epoch": 52.81, "learning_rate": 2.360503890329752e-05, "loss": 1.978, "step": 18244000 }, { "epoch": 52.81, "learning_rate": 2.3604315255650242e-05, "loss": 1.9861, "step": 18244500 }, { "epoch": 52.81, "learning_rate": 2.3603591608002964e-05, "loss": 1.9698, "step": 18245000 }, { "epoch": 52.81, "learning_rate": 2.3602867960355687e-05, "loss": 1.9739, "step": 18245500 }, { "epoch": 52.81, "learning_rate": 2.3602144312708412e-05, "loss": 1.9856, "step": 18246000 }, { "epoch": 52.82, "learning_rate": 2.3601420665061134e-05, "loss": 1.9794, "step": 18246500 }, { "epoch": 52.82, "learning_rate": 2.360069701741386e-05, "loss": 2.001, "step": 18247000 }, { "epoch": 52.82, "learning_rate": 2.3599973369766582e-05, "loss": 1.9956, "step": 18247500 }, { "epoch": 52.82, "learning_rate": 2.3599249722119304e-05, "loss": 2.004, "step": 18248000 }, { "epoch": 52.82, "learning_rate": 2.3598527521767323e-05, "loss": 1.9926, "step": 18248500 }, { "epoch": 52.82, "learning_rate": 2.359780532141534e-05, "loss": 1.9767, "step": 18249000 }, { "epoch": 52.82, "learning_rate": 2.359708167376806e-05, "loss": 1.9839, "step": 18249500 }, { "epoch": 52.83, "learning_rate": 2.3596358026120787e-05, "loss": 1.9812, "step": 18250000 }, { "epoch": 52.83, "learning_rate": 2.359563437847351e-05, "loss": 1.9955, "step": 18250500 }, { "epoch": 52.83, "learning_rate": 2.3594910730826235e-05, "loss": 1.9962, "step": 18251000 }, { "epoch": 52.83, "learning_rate": 2.3594187083178957e-05, "loss": 1.9726, "step": 18251500 }, { "epoch": 52.83, "learning_rate": 2.359346343553168e-05, "loss": 1.9905, "step": 18252000 }, { "epoch": 52.83, "learning_rate": 2.35927397878844e-05, "loss": 1.9918, "step": 18252500 }, { "epoch": 52.83, "learning_rate": 2.3592016140237124e-05, "loss": 1.9786, "step": 18253000 }, { "epoch": 52.84, "learning_rate": 2.359129249258985e-05, "loss": 1.9833, "step": 18253500 }, { "epoch": 52.84, "learning_rate": 2.3590570292237868e-05, "loss": 1.9884, "step": 18254000 }, { "epoch": 52.84, "learning_rate": 2.3589848091885887e-05, "loss": 1.9587, "step": 18254500 }, { "epoch": 52.84, "learning_rate": 2.358912444423861e-05, "loss": 1.9892, "step": 18255000 }, { "epoch": 52.84, "learning_rate": 2.358840079659133e-05, "loss": 2.0008, "step": 18255500 }, { "epoch": 52.84, "learning_rate": 2.3587677148944054e-05, "loss": 2.0007, "step": 18256000 }, { "epoch": 52.85, "learning_rate": 2.3586953501296776e-05, "loss": 1.9978, "step": 18256500 }, { "epoch": 52.85, "learning_rate": 2.3586231300944795e-05, "loss": 1.9722, "step": 18257000 }, { "epoch": 52.85, "learning_rate": 2.358550765329752e-05, "loss": 1.9828, "step": 18257500 }, { "epoch": 52.85, "learning_rate": 2.3584784005650243e-05, "loss": 1.9584, "step": 18258000 }, { "epoch": 52.85, "learning_rate": 2.3584061805298262e-05, "loss": 1.985, "step": 18258500 }, { "epoch": 52.85, "learning_rate": 2.3583338157650984e-05, "loss": 1.9892, "step": 18259000 }, { "epoch": 52.85, "learning_rate": 2.3582614510003706e-05, "loss": 1.9924, "step": 18259500 }, { "epoch": 52.86, "learning_rate": 2.358189086235643e-05, "loss": 1.9816, "step": 18260000 }, { "epoch": 52.86, "learning_rate": 2.358116721470915e-05, "loss": 1.9822, "step": 18260500 }, { "epoch": 52.86, "learning_rate": 2.3580443567061873e-05, "loss": 1.9948, "step": 18261000 }, { "epoch": 52.86, "learning_rate": 2.3579721366709895e-05, "loss": 1.9811, "step": 18261500 }, { "epoch": 52.86, "learning_rate": 2.3578997719062618e-05, "loss": 1.9984, "step": 18262000 }, { "epoch": 52.86, "learning_rate": 2.357827407141534e-05, "loss": 1.979, "step": 18262500 }, { "epoch": 52.86, "learning_rate": 2.3577550423768062e-05, "loss": 1.9996, "step": 18263000 }, { "epoch": 52.87, "learning_rate": 2.3576826776120788e-05, "loss": 1.9734, "step": 18263500 }, { "epoch": 52.87, "learning_rate": 2.3576104575768803e-05, "loss": 2.0015, "step": 18264000 }, { "epoch": 52.87, "learning_rate": 2.3575380928121525e-05, "loss": 1.9883, "step": 18264500 }, { "epoch": 52.87, "learning_rate": 2.357465728047425e-05, "loss": 2.0078, "step": 18265000 }, { "epoch": 52.87, "learning_rate": 2.3573933632826977e-05, "loss": 1.9834, "step": 18265500 }, { "epoch": 52.87, "learning_rate": 2.35732099851797e-05, "loss": 1.9849, "step": 18266000 }, { "epoch": 52.87, "learning_rate": 2.357248633753242e-05, "loss": 1.998, "step": 18266500 }, { "epoch": 52.88, "learning_rate": 2.3571762689885143e-05, "loss": 2.0245, "step": 18267000 }, { "epoch": 52.88, "learning_rate": 2.3571039042237866e-05, "loss": 1.9922, "step": 18267500 }, { "epoch": 52.88, "learning_rate": 2.3570316841885885e-05, "loss": 1.9956, "step": 18268000 }, { "epoch": 52.88, "learning_rate": 2.35695946415339e-05, "loss": 1.9774, "step": 18268500 }, { "epoch": 52.88, "learning_rate": 2.3568870993886626e-05, "loss": 1.9705, "step": 18269000 }, { "epoch": 52.88, "learning_rate": 2.356814734623935e-05, "loss": 1.971, "step": 18269500 }, { "epoch": 52.88, "learning_rate": 2.3567423698592074e-05, "loss": 2.001, "step": 18270000 }, { "epoch": 52.89, "learning_rate": 2.3566700050944796e-05, "loss": 1.9887, "step": 18270500 }, { "epoch": 52.89, "learning_rate": 2.3565976403297518e-05, "loss": 1.9915, "step": 18271000 }, { "epoch": 52.89, "learning_rate": 2.356525275565024e-05, "loss": 1.9883, "step": 18271500 }, { "epoch": 52.89, "learning_rate": 2.3564529108002962e-05, "loss": 1.9604, "step": 18272000 }, { "epoch": 52.89, "learning_rate": 2.3563805460355688e-05, "loss": 1.9885, "step": 18272500 }, { "epoch": 52.89, "learning_rate": 2.3563083260003707e-05, "loss": 1.9725, "step": 18273000 }, { "epoch": 52.89, "learning_rate": 2.3562361059651726e-05, "loss": 1.97, "step": 18273500 }, { "epoch": 52.9, "learning_rate": 2.3561637412004448e-05, "loss": 1.964, "step": 18274000 }, { "epoch": 52.9, "learning_rate": 2.3560915211652464e-05, "loss": 1.9852, "step": 18274500 }, { "epoch": 52.9, "learning_rate": 2.356019156400519e-05, "loss": 1.9889, "step": 18275000 }, { "epoch": 52.9, "learning_rate": 2.355946791635791e-05, "loss": 1.9944, "step": 18275500 }, { "epoch": 52.9, "learning_rate": 2.3558744268710634e-05, "loss": 2.0001, "step": 18276000 }, { "epoch": 52.9, "learning_rate": 2.355802062106336e-05, "loss": 1.9889, "step": 18276500 }, { "epoch": 52.9, "learning_rate": 2.3557296973416082e-05, "loss": 1.9936, "step": 18277000 }, { "epoch": 52.91, "learning_rate": 2.3556573325768804e-05, "loss": 1.998, "step": 18277500 }, { "epoch": 52.91, "learning_rate": 2.3555849678121526e-05, "loss": 1.9929, "step": 18278000 }, { "epoch": 52.91, "learning_rate": 2.3555126030474252e-05, "loss": 2.006, "step": 18278500 }, { "epoch": 52.91, "learning_rate": 2.3554402382826974e-05, "loss": 2.0108, "step": 18279000 }, { "epoch": 52.91, "learning_rate": 2.3553678735179696e-05, "loss": 1.9952, "step": 18279500 }, { "epoch": 52.91, "learning_rate": 2.3552955087532422e-05, "loss": 1.9757, "step": 18280000 }, { "epoch": 52.91, "learning_rate": 2.3552231439885144e-05, "loss": 1.9685, "step": 18280500 }, { "epoch": 52.92, "learning_rate": 2.3551507792237866e-05, "loss": 2.0039, "step": 18281000 }, { "epoch": 52.92, "learning_rate": 2.355078414459059e-05, "loss": 1.9669, "step": 18281500 }, { "epoch": 52.92, "learning_rate": 2.3550061944238608e-05, "loss": 1.9867, "step": 18282000 }, { "epoch": 52.92, "learning_rate": 2.354933829659133e-05, "loss": 1.9897, "step": 18282500 }, { "epoch": 52.92, "learning_rate": 2.3548614648944052e-05, "loss": 1.9782, "step": 18283000 }, { "epoch": 52.92, "learning_rate": 2.354789244859207e-05, "loss": 1.9843, "step": 18283500 }, { "epoch": 52.92, "learning_rate": 2.3547168800944797e-05, "loss": 1.9993, "step": 18284000 }, { "epoch": 52.93, "learning_rate": 2.354644515329752e-05, "loss": 1.9871, "step": 18284500 }, { "epoch": 52.93, "learning_rate": 2.354572150565024e-05, "loss": 1.9835, "step": 18285000 }, { "epoch": 52.93, "learning_rate": 2.3544997858002967e-05, "loss": 2.0049, "step": 18285500 }, { "epoch": 52.93, "learning_rate": 2.354427421035569e-05, "loss": 1.9921, "step": 18286000 }, { "epoch": 52.93, "learning_rate": 2.354355056270841e-05, "loss": 2.0152, "step": 18286500 }, { "epoch": 52.93, "learning_rate": 2.3542826915061133e-05, "loss": 1.9995, "step": 18287000 }, { "epoch": 52.93, "learning_rate": 2.354210326741386e-05, "loss": 2.014, "step": 18287500 }, { "epoch": 52.94, "learning_rate": 2.354137961976658e-05, "loss": 1.9721, "step": 18288000 }, { "epoch": 52.94, "learning_rate": 2.3540655972119303e-05, "loss": 1.992, "step": 18288500 }, { "epoch": 52.94, "learning_rate": 2.353993232447203e-05, "loss": 1.9793, "step": 18289000 }, { "epoch": 52.94, "learning_rate": 2.353920867682475e-05, "loss": 1.9817, "step": 18289500 }, { "epoch": 52.94, "learning_rate": 2.3538487923768064e-05, "loss": 2.0064, "step": 18290000 }, { "epoch": 52.94, "learning_rate": 2.3537764276120786e-05, "loss": 1.9911, "step": 18290500 }, { "epoch": 52.94, "learning_rate": 2.3537042075768805e-05, "loss": 1.9856, "step": 18291000 }, { "epoch": 52.95, "learning_rate": 2.353631842812153e-05, "loss": 1.9809, "step": 18291500 }, { "epoch": 52.95, "learning_rate": 2.3535594780474253e-05, "loss": 1.9833, "step": 18292000 }, { "epoch": 52.95, "learning_rate": 2.3534871132826975e-05, "loss": 1.9816, "step": 18292500 }, { "epoch": 52.95, "learning_rate": 2.3534147485179697e-05, "loss": 1.97, "step": 18293000 }, { "epoch": 52.95, "learning_rate": 2.353342383753242e-05, "loss": 1.9985, "step": 18293500 }, { "epoch": 52.95, "learning_rate": 2.353270018988514e-05, "loss": 1.9854, "step": 18294000 }, { "epoch": 52.96, "learning_rate": 2.3531976542237867e-05, "loss": 1.97, "step": 18294500 }, { "epoch": 52.96, "learning_rate": 2.3531252894590593e-05, "loss": 2.0074, "step": 18295000 }, { "epoch": 52.96, "learning_rate": 2.353053069423861e-05, "loss": 1.987, "step": 18295500 }, { "epoch": 52.96, "learning_rate": 2.352980704659133e-05, "loss": 1.9777, "step": 18296000 }, { "epoch": 52.96, "learning_rate": 2.3529083398944056e-05, "loss": 1.9863, "step": 18296500 }, { "epoch": 52.96, "learning_rate": 2.352835975129678e-05, "loss": 1.9882, "step": 18297000 }, { "epoch": 52.96, "learning_rate": 2.35276361036495e-05, "loss": 1.9782, "step": 18297500 }, { "epoch": 52.97, "learning_rate": 2.3526913903297516e-05, "loss": 1.98, "step": 18298000 }, { "epoch": 52.97, "learning_rate": 2.3526190255650242e-05, "loss": 1.9988, "step": 18298500 }, { "epoch": 52.97, "learning_rate": 2.352546805529826e-05, "loss": 1.9843, "step": 18299000 }, { "epoch": 52.97, "learning_rate": 2.3524744407650983e-05, "loss": 1.9878, "step": 18299500 }, { "epoch": 52.97, "learning_rate": 2.3524020760003705e-05, "loss": 1.9845, "step": 18300000 }, { "epoch": 52.97, "learning_rate": 2.352329711235643e-05, "loss": 1.9577, "step": 18300500 }, { "epoch": 52.97, "learning_rate": 2.3522573464709153e-05, "loss": 1.9844, "step": 18301000 }, { "epoch": 52.98, "learning_rate": 2.3521849817061875e-05, "loss": 1.9695, "step": 18301500 }, { "epoch": 52.98, "learning_rate": 2.3521126169414598e-05, "loss": 1.9866, "step": 18302000 }, { "epoch": 52.98, "learning_rate": 2.3520402521767323e-05, "loss": 1.9785, "step": 18302500 }, { "epoch": 52.98, "learning_rate": 2.3519678874120045e-05, "loss": 1.9861, "step": 18303000 }, { "epoch": 52.98, "learning_rate": 2.3518955226472768e-05, "loss": 1.9747, "step": 18303500 }, { "epoch": 52.98, "learning_rate": 2.3518231578825493e-05, "loss": 1.9817, "step": 18304000 }, { "epoch": 52.98, "learning_rate": 2.3517507931178216e-05, "loss": 1.9984, "step": 18304500 }, { "epoch": 52.99, "learning_rate": 2.3516784283530938e-05, "loss": 1.9903, "step": 18305000 }, { "epoch": 52.99, "learning_rate": 2.351606353047425e-05, "loss": 2.0017, "step": 18305500 }, { "epoch": 52.99, "learning_rate": 2.3515339882826972e-05, "loss": 1.9828, "step": 18306000 }, { "epoch": 52.99, "learning_rate": 2.3514617682474995e-05, "loss": 1.9829, "step": 18306500 }, { "epoch": 52.99, "learning_rate": 2.3513894034827717e-05, "loss": 1.9766, "step": 18307000 }, { "epoch": 52.99, "learning_rate": 2.351317038718044e-05, "loss": 1.9841, "step": 18307500 }, { "epoch": 52.99, "learning_rate": 2.351244673953316e-05, "loss": 1.9948, "step": 18308000 }, { "epoch": 53.0, "learning_rate": 2.3511723091885884e-05, "loss": 1.9655, "step": 18308500 }, { "epoch": 53.0, "learning_rate": 2.3510999444238606e-05, "loss": 1.9785, "step": 18309000 }, { "epoch": 53.0, "learning_rate": 2.351027579659133e-05, "loss": 1.9925, "step": 18309500 }, { "epoch": 53.0, "learning_rate": 2.3509552148944057e-05, "loss": 2.0027, "step": 18310000 }, { "epoch": 53.0, "eval_accuracy": 0.6746968742115707, "eval_accuracy_mlm": 0.6409910673456639, "eval_accuracy_nsp": 0.8555957340519632, "eval_loss": 2.1695713996887207, "eval_runtime": 331.4336, "eval_samples_per_second": 1316.662, "eval_steps_per_second": 54.862, "step": 18310016 }, { "epoch": 53.0, "learning_rate": 2.350883139588737e-05, "loss": 1.9548, "step": 18310500 }, { "epoch": 53.0, "learning_rate": 2.350810774824009e-05, "loss": 1.9557, "step": 18311000 }, { "epoch": 53.0, "learning_rate": 2.3507384100592814e-05, "loss": 1.9956, "step": 18311500 }, { "epoch": 53.01, "learning_rate": 2.3506660452945536e-05, "loss": 1.9563, "step": 18312000 }, { "epoch": 53.01, "learning_rate": 2.3505936805298258e-05, "loss": 1.9515, "step": 18312500 }, { "epoch": 53.01, "learning_rate": 2.3505214604946277e-05, "loss": 1.9616, "step": 18313000 }, { "epoch": 53.01, "learning_rate": 2.3504490957299e-05, "loss": 1.9612, "step": 18313500 }, { "epoch": 53.01, "learning_rate": 2.3503767309651725e-05, "loss": 1.9614, "step": 18314000 }, { "epoch": 53.01, "learning_rate": 2.3503043662004447e-05, "loss": 1.9658, "step": 18314500 }, { "epoch": 53.01, "learning_rate": 2.350232001435717e-05, "loss": 1.9725, "step": 18315000 }, { "epoch": 53.02, "learning_rate": 2.350159781400519e-05, "loss": 1.9633, "step": 18315500 }, { "epoch": 53.02, "learning_rate": 2.350087416635791e-05, "loss": 1.9664, "step": 18316000 }, { "epoch": 53.02, "learning_rate": 2.3500150518710633e-05, "loss": 1.9499, "step": 18316500 }, { "epoch": 53.02, "learning_rate": 2.349942687106336e-05, "loss": 1.9625, "step": 18317000 }, { "epoch": 53.02, "learning_rate": 2.349870322341608e-05, "loss": 1.9761, "step": 18317500 }, { "epoch": 53.02, "learning_rate": 2.3497979575768806e-05, "loss": 1.9814, "step": 18318000 }, { "epoch": 53.02, "learning_rate": 2.3497257375416822e-05, "loss": 1.9775, "step": 18318500 }, { "epoch": 53.03, "learning_rate": 2.3496533727769544e-05, "loss": 1.9671, "step": 18319000 }, { "epoch": 53.03, "learning_rate": 2.349581008012227e-05, "loss": 1.9823, "step": 18319500 }, { "epoch": 53.03, "learning_rate": 2.3495086432474992e-05, "loss": 1.971, "step": 18320000 }, { "epoch": 53.03, "learning_rate": 2.3494362784827714e-05, "loss": 1.9586, "step": 18320500 }, { "epoch": 53.03, "learning_rate": 2.3493640584475733e-05, "loss": 1.9435, "step": 18321000 }, { "epoch": 53.03, "learning_rate": 2.349291693682846e-05, "loss": 1.9558, "step": 18321500 }, { "epoch": 53.03, "learning_rate": 2.349219328918118e-05, "loss": 1.9561, "step": 18322000 }, { "epoch": 53.04, "learning_rate": 2.3491471088829197e-05, "loss": 1.9759, "step": 18322500 }, { "epoch": 53.04, "learning_rate": 2.3490747441181922e-05, "loss": 1.9912, "step": 18323000 }, { "epoch": 53.04, "learning_rate": 2.3490025240829938e-05, "loss": 1.9873, "step": 18323500 }, { "epoch": 53.04, "learning_rate": 2.348930159318266e-05, "loss": 1.9878, "step": 18324000 }, { "epoch": 53.04, "learning_rate": 2.3488577945535382e-05, "loss": 1.978, "step": 18324500 }, { "epoch": 53.04, "learning_rate": 2.3487854297888108e-05, "loss": 1.9788, "step": 18325000 }, { "epoch": 53.04, "learning_rate": 2.3487130650240833e-05, "loss": 1.965, "step": 18325500 }, { "epoch": 53.05, "learning_rate": 2.3486407002593556e-05, "loss": 1.9709, "step": 18326000 }, { "epoch": 53.05, "learning_rate": 2.3485683354946278e-05, "loss": 1.9794, "step": 18326500 }, { "epoch": 53.05, "learning_rate": 2.3484959707299e-05, "loss": 1.98, "step": 18327000 }, { "epoch": 53.05, "learning_rate": 2.3484236059651722e-05, "loss": 1.9707, "step": 18327500 }, { "epoch": 53.05, "learning_rate": 2.348351385929974e-05, "loss": 1.9835, "step": 18328000 }, { "epoch": 53.05, "learning_rate": 2.3482790211652464e-05, "loss": 1.9757, "step": 18328500 }, { "epoch": 53.05, "learning_rate": 2.348206656400519e-05, "loss": 1.9857, "step": 18329000 }, { "epoch": 53.06, "learning_rate": 2.348134291635791e-05, "loss": 1.992, "step": 18329500 }, { "epoch": 53.06, "learning_rate": 2.3480619268710634e-05, "loss": 1.993, "step": 18330000 }, { "epoch": 53.06, "learning_rate": 2.347989562106336e-05, "loss": 1.9924, "step": 18330500 }, { "epoch": 53.06, "learning_rate": 2.347917197341608e-05, "loss": 1.9681, "step": 18331000 }, { "epoch": 53.06, "learning_rate": 2.3478448325768804e-05, "loss": 1.994, "step": 18331500 }, { "epoch": 53.06, "learning_rate": 2.3477724678121526e-05, "loss": 1.9921, "step": 18332000 }, { "epoch": 53.07, "learning_rate": 2.347700103047425e-05, "loss": 2.0165, "step": 18332500 }, { "epoch": 53.07, "learning_rate": 2.3476277382826974e-05, "loss": 1.9823, "step": 18333000 }, { "epoch": 53.07, "learning_rate": 2.34755537351797e-05, "loss": 1.99, "step": 18333500 }, { "epoch": 53.07, "learning_rate": 2.3474830087532422e-05, "loss": 1.9676, "step": 18334000 }, { "epoch": 53.07, "learning_rate": 2.3474106439885144e-05, "loss": 1.9527, "step": 18334500 }, { "epoch": 53.07, "learning_rate": 2.3473382792237866e-05, "loss": 1.9565, "step": 18335000 }, { "epoch": 53.07, "learning_rate": 2.347265914459059e-05, "loss": 1.9731, "step": 18335500 }, { "epoch": 53.08, "learning_rate": 2.3471935496943314e-05, "loss": 1.9716, "step": 18336000 }, { "epoch": 53.08, "learning_rate": 2.3471211849296036e-05, "loss": 1.9778, "step": 18336500 }, { "epoch": 53.08, "learning_rate": 2.3470488201648762e-05, "loss": 1.9644, "step": 18337000 }, { "epoch": 53.08, "learning_rate": 2.3469764554001484e-05, "loss": 1.9678, "step": 18337500 }, { "epoch": 53.08, "learning_rate": 2.3469040906354206e-05, "loss": 1.9846, "step": 18338000 }, { "epoch": 53.08, "learning_rate": 2.346831725870693e-05, "loss": 1.9898, "step": 18338500 }, { "epoch": 53.08, "learning_rate": 2.3467595058354948e-05, "loss": 1.9637, "step": 18339000 }, { "epoch": 53.09, "learning_rate": 2.3466872858002963e-05, "loss": 1.9881, "step": 18339500 }, { "epoch": 53.09, "learning_rate": 2.346614921035569e-05, "loss": 1.9952, "step": 18340000 }, { "epoch": 53.09, "learning_rate": 2.3465427010003708e-05, "loss": 1.9858, "step": 18340500 }, { "epoch": 53.09, "learning_rate": 2.346470336235643e-05, "loss": 1.9811, "step": 18341000 }, { "epoch": 53.09, "learning_rate": 2.3463979714709152e-05, "loss": 1.9666, "step": 18341500 }, { "epoch": 53.09, "learning_rate": 2.3463256067061874e-05, "loss": 1.978, "step": 18342000 }, { "epoch": 53.09, "learning_rate": 2.34625324194146e-05, "loss": 1.9883, "step": 18342500 }, { "epoch": 53.1, "learning_rate": 2.3461808771767322e-05, "loss": 1.9784, "step": 18343000 }, { "epoch": 53.1, "learning_rate": 2.3461085124120048e-05, "loss": 1.9491, "step": 18343500 }, { "epoch": 53.1, "learning_rate": 2.346036147647277e-05, "loss": 1.978, "step": 18344000 }, { "epoch": 53.1, "learning_rate": 2.3459637828825492e-05, "loss": 1.9529, "step": 18344500 }, { "epoch": 53.1, "learning_rate": 2.3458914181178215e-05, "loss": 1.9526, "step": 18345000 }, { "epoch": 53.1, "learning_rate": 2.3458191980826233e-05, "loss": 1.9672, "step": 18345500 }, { "epoch": 53.1, "learning_rate": 2.3457468333178956e-05, "loss": 1.9791, "step": 18346000 }, { "epoch": 53.11, "learning_rate": 2.3456744685531678e-05, "loss": 1.9821, "step": 18346500 }, { "epoch": 53.11, "learning_rate": 2.34560210378844e-05, "loss": 1.9907, "step": 18347000 }, { "epoch": 53.11, "learning_rate": 2.3455298837532423e-05, "loss": 1.9697, "step": 18347500 }, { "epoch": 53.11, "learning_rate": 2.3454575189885145e-05, "loss": 1.9728, "step": 18348000 }, { "epoch": 53.11, "learning_rate": 2.3453852989533164e-05, "loss": 1.9698, "step": 18348500 }, { "epoch": 53.11, "learning_rate": 2.3453129341885886e-05, "loss": 1.9711, "step": 18349000 }, { "epoch": 53.11, "learning_rate": 2.3452405694238608e-05, "loss": 1.968, "step": 18349500 }, { "epoch": 53.12, "learning_rate": 2.345168204659133e-05, "loss": 1.9703, "step": 18350000 }, { "epoch": 53.12, "learning_rate": 2.3450958398944053e-05, "loss": 1.9864, "step": 18350500 }, { "epoch": 53.12, "learning_rate": 2.3450234751296778e-05, "loss": 1.9726, "step": 18351000 }, { "epoch": 53.12, "learning_rate": 2.34495111036495e-05, "loss": 1.9773, "step": 18351500 }, { "epoch": 53.12, "learning_rate": 2.3448787456002226e-05, "loss": 1.9539, "step": 18352000 }, { "epoch": 53.12, "learning_rate": 2.344806525565024e-05, "loss": 1.976, "step": 18352500 }, { "epoch": 53.12, "learning_rate": 2.3447341608002964e-05, "loss": 1.9656, "step": 18353000 }, { "epoch": 53.13, "learning_rate": 2.344661796035569e-05, "loss": 1.9687, "step": 18353500 }, { "epoch": 53.13, "learning_rate": 2.3445894312708412e-05, "loss": 1.9729, "step": 18354000 }, { "epoch": 53.13, "learning_rate": 2.3445172112356427e-05, "loss": 1.9917, "step": 18354500 }, { "epoch": 53.13, "learning_rate": 2.3444448464709153e-05, "loss": 1.9772, "step": 18355000 }, { "epoch": 53.13, "learning_rate": 2.3443724817061875e-05, "loss": 1.9611, "step": 18355500 }, { "epoch": 53.13, "learning_rate": 2.34430011694146e-05, "loss": 1.9705, "step": 18356000 }, { "epoch": 53.13, "learning_rate": 2.3442277521767323e-05, "loss": 1.9705, "step": 18356500 }, { "epoch": 53.14, "learning_rate": 2.3441553874120045e-05, "loss": 1.9749, "step": 18357000 }, { "epoch": 53.14, "learning_rate": 2.3440831673768064e-05, "loss": 1.9827, "step": 18357500 }, { "epoch": 53.14, "learning_rate": 2.3440108026120786e-05, "loss": 1.9769, "step": 18358000 }, { "epoch": 53.14, "learning_rate": 2.3439384378473512e-05, "loss": 1.9698, "step": 18358500 }, { "epoch": 53.14, "learning_rate": 2.3438662178121528e-05, "loss": 1.9826, "step": 18359000 }, { "epoch": 53.14, "learning_rate": 2.3437938530474253e-05, "loss": 1.9752, "step": 18359500 }, { "epoch": 53.14, "learning_rate": 2.3437214882826975e-05, "loss": 1.9703, "step": 18360000 }, { "epoch": 53.15, "learning_rate": 2.3436491235179698e-05, "loss": 1.9934, "step": 18360500 }, { "epoch": 53.15, "learning_rate": 2.343576758753242e-05, "loss": 1.9745, "step": 18361000 }, { "epoch": 53.15, "learning_rate": 2.3435043939885142e-05, "loss": 1.9748, "step": 18361500 }, { "epoch": 53.15, "learning_rate": 2.3434320292237864e-05, "loss": 1.983, "step": 18362000 }, { "epoch": 53.15, "learning_rate": 2.343359664459059e-05, "loss": 1.9575, "step": 18362500 }, { "epoch": 53.15, "learning_rate": 2.3432872996943316e-05, "loss": 1.9709, "step": 18363000 }, { "epoch": 53.15, "learning_rate": 2.3432149349296038e-05, "loss": 1.9815, "step": 18363500 }, { "epoch": 53.16, "learning_rate": 2.343142570164876e-05, "loss": 1.9796, "step": 18364000 }, { "epoch": 53.16, "learning_rate": 2.343070350129678e-05, "loss": 1.9495, "step": 18364500 }, { "epoch": 53.16, "learning_rate": 2.34299798536495e-05, "loss": 1.9663, "step": 18365000 }, { "epoch": 53.16, "learning_rate": 2.3429257653297517e-05, "loss": 1.9747, "step": 18365500 }, { "epoch": 53.16, "learning_rate": 2.342853400565024e-05, "loss": 1.9441, "step": 18366000 }, { "epoch": 53.16, "learning_rate": 2.3427810358002965e-05, "loss": 1.9691, "step": 18366500 }, { "epoch": 53.16, "learning_rate": 2.342708671035569e-05, "loss": 1.9848, "step": 18367000 }, { "epoch": 53.17, "learning_rate": 2.3426363062708413e-05, "loss": 1.9609, "step": 18367500 }, { "epoch": 53.17, "learning_rate": 2.3425639415061135e-05, "loss": 1.9566, "step": 18368000 }, { "epoch": 53.17, "learning_rate": 2.3424915767413857e-05, "loss": 1.971, "step": 18368500 }, { "epoch": 53.17, "learning_rate": 2.3424193567061876e-05, "loss": 1.9779, "step": 18369000 }, { "epoch": 53.17, "learning_rate": 2.3423469919414598e-05, "loss": 1.9782, "step": 18369500 }, { "epoch": 53.17, "learning_rate": 2.3422746271767324e-05, "loss": 1.971, "step": 18370000 }, { "epoch": 53.18, "learning_rate": 2.3422022624120046e-05, "loss": 1.9711, "step": 18370500 }, { "epoch": 53.18, "learning_rate": 2.3421298976472768e-05, "loss": 1.9585, "step": 18371000 }, { "epoch": 53.18, "learning_rate": 2.342057532882549e-05, "loss": 1.9929, "step": 18371500 }, { "epoch": 53.18, "learning_rate": 2.3419851681178216e-05, "loss": 1.9728, "step": 18372000 }, { "epoch": 53.18, "learning_rate": 2.341912803353094e-05, "loss": 1.9831, "step": 18372500 }, { "epoch": 53.18, "learning_rate": 2.3418405833178954e-05, "loss": 2.0014, "step": 18373000 }, { "epoch": 53.18, "learning_rate": 2.341768218553168e-05, "loss": 1.9673, "step": 18373500 }, { "epoch": 53.19, "learning_rate": 2.3416958537884405e-05, "loss": 1.9837, "step": 18374000 }, { "epoch": 53.19, "learning_rate": 2.3416234890237127e-05, "loss": 1.9887, "step": 18374500 }, { "epoch": 53.19, "learning_rate": 2.341551124258985e-05, "loss": 1.979, "step": 18375000 }, { "epoch": 53.19, "learning_rate": 2.3414789042237865e-05, "loss": 1.9924, "step": 18375500 }, { "epoch": 53.19, "learning_rate": 2.341406539459059e-05, "loss": 1.9692, "step": 18376000 }, { "epoch": 53.19, "learning_rate": 2.3413341746943313e-05, "loss": 1.9963, "step": 18376500 }, { "epoch": 53.19, "learning_rate": 2.3412618099296035e-05, "loss": 1.9979, "step": 18377000 }, { "epoch": 53.2, "learning_rate": 2.341189445164876e-05, "loss": 1.9719, "step": 18377500 }, { "epoch": 53.2, "learning_rate": 2.3411170804001483e-05, "loss": 1.9927, "step": 18378000 }, { "epoch": 53.2, "learning_rate": 2.3410447156354205e-05, "loss": 1.9947, "step": 18378500 }, { "epoch": 53.2, "learning_rate": 2.340972350870693e-05, "loss": 1.9782, "step": 18379000 }, { "epoch": 53.2, "learning_rate": 2.3408999861059653e-05, "loss": 1.9762, "step": 18379500 }, { "epoch": 53.2, "learning_rate": 2.3408276213412375e-05, "loss": 2.0053, "step": 18380000 }, { "epoch": 53.2, "learning_rate": 2.3407552565765098e-05, "loss": 1.9753, "step": 18380500 }, { "epoch": 53.21, "learning_rate": 2.3406828918117823e-05, "loss": 1.9823, "step": 18381000 }, { "epoch": 53.21, "learning_rate": 2.3406105270470546e-05, "loss": 1.952, "step": 18381500 }, { "epoch": 53.21, "learning_rate": 2.3405383070118564e-05, "loss": 1.9826, "step": 18382000 }, { "epoch": 53.21, "learning_rate": 2.3404659422471287e-05, "loss": 1.9764, "step": 18382500 }, { "epoch": 53.21, "learning_rate": 2.3403937222119306e-05, "loss": 1.9394, "step": 18383000 }, { "epoch": 53.21, "learning_rate": 2.3403213574472028e-05, "loss": 1.9628, "step": 18383500 }, { "epoch": 53.21, "learning_rate": 2.340248992682475e-05, "loss": 1.9629, "step": 18384000 }, { "epoch": 53.22, "learning_rate": 2.3401766279177476e-05, "loss": 1.9631, "step": 18384500 }, { "epoch": 53.22, "learning_rate": 2.3401042631530198e-05, "loss": 1.9983, "step": 18385000 }, { "epoch": 53.22, "learning_rate": 2.340031898388292e-05, "loss": 1.9672, "step": 18385500 }, { "epoch": 53.22, "learning_rate": 2.339959678353094e-05, "loss": 1.9705, "step": 18386000 }, { "epoch": 53.22, "learning_rate": 2.339887313588366e-05, "loss": 1.9753, "step": 18386500 }, { "epoch": 53.22, "learning_rate": 2.339815093553168e-05, "loss": 1.9699, "step": 18387000 }, { "epoch": 53.22, "learning_rate": 2.3397428735179696e-05, "loss": 1.9934, "step": 18387500 }, { "epoch": 53.23, "learning_rate": 2.3396705087532418e-05, "loss": 1.9805, "step": 18388000 }, { "epoch": 53.23, "learning_rate": 2.3395981439885144e-05, "loss": 1.9694, "step": 18388500 }, { "epoch": 53.23, "learning_rate": 2.339525779223787e-05, "loss": 1.9764, "step": 18389000 }, { "epoch": 53.23, "learning_rate": 2.3394535591885885e-05, "loss": 1.9851, "step": 18389500 }, { "epoch": 53.23, "learning_rate": 2.3393811944238607e-05, "loss": 1.9718, "step": 18390000 }, { "epoch": 53.23, "learning_rate": 2.3393088296591333e-05, "loss": 1.9777, "step": 18390500 }, { "epoch": 53.23, "learning_rate": 2.3392364648944055e-05, "loss": 1.9766, "step": 18391000 }, { "epoch": 53.24, "learning_rate": 2.3391641001296777e-05, "loss": 1.9806, "step": 18391500 }, { "epoch": 53.24, "learning_rate": 2.33909173536495e-05, "loss": 1.9801, "step": 18392000 }, { "epoch": 53.24, "learning_rate": 2.339019515329752e-05, "loss": 1.9833, "step": 18392500 }, { "epoch": 53.24, "learning_rate": 2.3389471505650244e-05, "loss": 1.989, "step": 18393000 }, { "epoch": 53.24, "learning_rate": 2.3388747858002966e-05, "loss": 1.9707, "step": 18393500 }, { "epoch": 53.24, "learning_rate": 2.338802421035569e-05, "loss": 1.9727, "step": 18394000 }, { "epoch": 53.24, "learning_rate": 2.338730056270841e-05, "loss": 1.9914, "step": 18394500 }, { "epoch": 53.25, "learning_rate": 2.3386576915061133e-05, "loss": 1.9756, "step": 18395000 }, { "epoch": 53.25, "learning_rate": 2.338585326741386e-05, "loss": 1.9794, "step": 18395500 }, { "epoch": 53.25, "learning_rate": 2.3385129619766584e-05, "loss": 1.9773, "step": 18396000 }, { "epoch": 53.25, "learning_rate": 2.3384405972119306e-05, "loss": 1.9603, "step": 18396500 }, { "epoch": 53.25, "learning_rate": 2.338368232447203e-05, "loss": 1.9892, "step": 18397000 }, { "epoch": 53.25, "learning_rate": 2.338295867682475e-05, "loss": 1.9733, "step": 18397500 }, { "epoch": 53.25, "learning_rate": 2.3382235029177473e-05, "loss": 1.9859, "step": 18398000 }, { "epoch": 53.26, "learning_rate": 2.3381511381530195e-05, "loss": 1.9926, "step": 18398500 }, { "epoch": 53.26, "learning_rate": 2.338078773388292e-05, "loss": 1.9821, "step": 18399000 }, { "epoch": 53.26, "learning_rate": 2.3380064086235647e-05, "loss": 1.9747, "step": 18399500 }, { "epoch": 53.26, "learning_rate": 2.3379341885883662e-05, "loss": 2.0066, "step": 18400000 }, { "epoch": 53.26, "learning_rate": 2.3378618238236384e-05, "loss": 1.9795, "step": 18400500 }, { "epoch": 53.26, "learning_rate": 2.337789459058911e-05, "loss": 1.9719, "step": 18401000 }, { "epoch": 53.26, "learning_rate": 2.3377172390237126e-05, "loss": 1.9516, "step": 18401500 }, { "epoch": 53.27, "learning_rate": 2.3376448742589848e-05, "loss": 1.9491, "step": 18402000 }, { "epoch": 53.27, "learning_rate": 2.337572509494257e-05, "loss": 1.9756, "step": 18402500 }, { "epoch": 53.27, "learning_rate": 2.3375001447295296e-05, "loss": 1.9819, "step": 18403000 }, { "epoch": 53.27, "learning_rate": 2.3374279246943315e-05, "loss": 1.9875, "step": 18403500 }, { "epoch": 53.27, "learning_rate": 2.3373555599296037e-05, "loss": 1.9439, "step": 18404000 }, { "epoch": 53.27, "learning_rate": 2.3372833398944056e-05, "loss": 1.9757, "step": 18404500 }, { "epoch": 53.27, "learning_rate": 2.3372109751296778e-05, "loss": 1.9757, "step": 18405000 }, { "epoch": 53.28, "learning_rate": 2.33713861036495e-05, "loss": 1.9674, "step": 18405500 }, { "epoch": 53.28, "learning_rate": 2.3370662456002223e-05, "loss": 1.9897, "step": 18406000 }, { "epoch": 53.28, "learning_rate": 2.3369938808354945e-05, "loss": 1.9969, "step": 18406500 }, { "epoch": 53.28, "learning_rate": 2.336921516070767e-05, "loss": 1.9783, "step": 18407000 }, { "epoch": 53.28, "learning_rate": 2.3368491513060396e-05, "loss": 1.9876, "step": 18407500 }, { "epoch": 53.28, "learning_rate": 2.3367767865413118e-05, "loss": 1.9844, "step": 18408000 }, { "epoch": 53.29, "learning_rate": 2.336704421776584e-05, "loss": 1.9819, "step": 18408500 }, { "epoch": 53.29, "learning_rate": 2.3366320570118563e-05, "loss": 1.9838, "step": 18409000 }, { "epoch": 53.29, "learning_rate": 2.3365596922471285e-05, "loss": 1.9757, "step": 18409500 }, { "epoch": 53.29, "learning_rate": 2.336487327482401e-05, "loss": 1.9818, "step": 18410000 }, { "epoch": 53.29, "learning_rate": 2.3364149627176736e-05, "loss": 1.9932, "step": 18410500 }, { "epoch": 53.29, "learning_rate": 2.336342597952946e-05, "loss": 1.97, "step": 18411000 }, { "epoch": 53.29, "learning_rate": 2.3362706673768064e-05, "loss": 1.9723, "step": 18411500 }, { "epoch": 53.3, "learning_rate": 2.3361984473416083e-05, "loss": 1.9707, "step": 18412000 }, { "epoch": 53.3, "learning_rate": 2.3361260825768805e-05, "loss": 2.0069, "step": 18412500 }, { "epoch": 53.3, "learning_rate": 2.3360537178121527e-05, "loss": 1.9682, "step": 18413000 }, { "epoch": 53.3, "learning_rate": 2.335981353047425e-05, "loss": 1.9825, "step": 18413500 }, { "epoch": 53.3, "learning_rate": 2.335909133012227e-05, "loss": 1.9934, "step": 18414000 }, { "epoch": 53.3, "learning_rate": 2.335836768247499e-05, "loss": 1.9847, "step": 18414500 }, { "epoch": 53.3, "learning_rate": 2.3357644034827716e-05, "loss": 1.9581, "step": 18415000 }, { "epoch": 53.31, "learning_rate": 2.335692038718044e-05, "loss": 2.0025, "step": 18415500 }, { "epoch": 53.31, "learning_rate": 2.335619673953316e-05, "loss": 1.9699, "step": 18416000 }, { "epoch": 53.31, "learning_rate": 2.3355473091885887e-05, "loss": 1.9908, "step": 18416500 }, { "epoch": 53.31, "learning_rate": 2.335474944423861e-05, "loss": 1.9383, "step": 18417000 }, { "epoch": 53.31, "learning_rate": 2.335402579659133e-05, "loss": 1.978, "step": 18417500 }, { "epoch": 53.31, "learning_rate": 2.3353302148944053e-05, "loss": 1.982, "step": 18418000 }, { "epoch": 53.31, "learning_rate": 2.335257850129678e-05, "loss": 1.9866, "step": 18418500 }, { "epoch": 53.32, "learning_rate": 2.33518548536495e-05, "loss": 1.9607, "step": 18419000 }, { "epoch": 53.32, "learning_rate": 2.3351131206002223e-05, "loss": 1.9845, "step": 18419500 }, { "epoch": 53.32, "learning_rate": 2.335040755835495e-05, "loss": 1.9874, "step": 18420000 }, { "epoch": 53.32, "learning_rate": 2.334968391070767e-05, "loss": 1.9703, "step": 18420500 }, { "epoch": 53.32, "learning_rate": 2.3348960263060393e-05, "loss": 1.9927, "step": 18421000 }, { "epoch": 53.32, "learning_rate": 2.3348236615413116e-05, "loss": 1.9578, "step": 18421500 }, { "epoch": 53.32, "learning_rate": 2.334751296776584e-05, "loss": 1.9611, "step": 18422000 }, { "epoch": 53.33, "learning_rate": 2.3346789320118563e-05, "loss": 1.9647, "step": 18422500 }, { "epoch": 53.33, "learning_rate": 2.3346065672471286e-05, "loss": 1.9888, "step": 18423000 }, { "epoch": 53.33, "learning_rate": 2.3345343472119305e-05, "loss": 1.9642, "step": 18423500 }, { "epoch": 53.33, "learning_rate": 2.3344619824472027e-05, "loss": 1.9623, "step": 18424000 }, { "epoch": 53.33, "learning_rate": 2.334389617682475e-05, "loss": 1.9927, "step": 18424500 }, { "epoch": 53.33, "learning_rate": 2.3343173976472768e-05, "loss": 1.9782, "step": 18425000 }, { "epoch": 53.33, "learning_rate": 2.334245032882549e-05, "loss": 1.9881, "step": 18425500 }, { "epoch": 53.34, "learning_rate": 2.3341726681178216e-05, "loss": 1.9551, "step": 18426000 }, { "epoch": 53.34, "learning_rate": 2.3341003033530938e-05, "loss": 2.0059, "step": 18426500 }, { "epoch": 53.34, "learning_rate": 2.3340279385883664e-05, "loss": 1.9952, "step": 18427000 }, { "epoch": 53.34, "learning_rate": 2.3339555738236386e-05, "loss": 2.0009, "step": 18427500 }, { "epoch": 53.34, "learning_rate": 2.3338832090589108e-05, "loss": 1.9534, "step": 18428000 }, { "epoch": 53.34, "learning_rate": 2.333810844294183e-05, "loss": 1.9939, "step": 18428500 }, { "epoch": 53.34, "learning_rate": 2.3337384795294553e-05, "loss": 1.9976, "step": 18429000 }, { "epoch": 53.35, "learning_rate": 2.333666114764728e-05, "loss": 1.9733, "step": 18429500 }, { "epoch": 53.35, "learning_rate": 2.33359375e-05, "loss": 1.9653, "step": 18430000 }, { "epoch": 53.35, "learning_rate": 2.3335213852352726e-05, "loss": 2.0065, "step": 18430500 }, { "epoch": 53.35, "learning_rate": 2.333449020470545e-05, "loss": 1.9642, "step": 18431000 }, { "epoch": 53.35, "learning_rate": 2.3333768004353464e-05, "loss": 1.983, "step": 18431500 }, { "epoch": 53.35, "learning_rate": 2.333304435670619e-05, "loss": 1.9783, "step": 18432000 }, { "epoch": 53.35, "learning_rate": 2.3332322156354205e-05, "loss": 1.9638, "step": 18432500 }, { "epoch": 53.36, "learning_rate": 2.3331599956002224e-05, "loss": 1.9849, "step": 18433000 }, { "epoch": 53.36, "learning_rate": 2.333087630835495e-05, "loss": 1.9796, "step": 18433500 }, { "epoch": 53.36, "learning_rate": 2.3330152660707672e-05, "loss": 1.9485, "step": 18434000 }, { "epoch": 53.36, "learning_rate": 2.3329429013060394e-05, "loss": 1.9978, "step": 18434500 }, { "epoch": 53.36, "learning_rate": 2.3328705365413116e-05, "loss": 1.975, "step": 18435000 }, { "epoch": 53.36, "learning_rate": 2.332798171776584e-05, "loss": 1.9611, "step": 18435500 }, { "epoch": 53.36, "learning_rate": 2.3327258070118564e-05, "loss": 1.985, "step": 18436000 }, { "epoch": 53.37, "learning_rate": 2.3326534422471287e-05, "loss": 1.9762, "step": 18436500 }, { "epoch": 53.37, "learning_rate": 2.3325810774824012e-05, "loss": 1.9919, "step": 18437000 }, { "epoch": 53.37, "learning_rate": 2.3325087127176734e-05, "loss": 1.9502, "step": 18437500 }, { "epoch": 53.37, "learning_rate": 2.3324363479529457e-05, "loss": 1.9571, "step": 18438000 }, { "epoch": 53.37, "learning_rate": 2.332363983188218e-05, "loss": 1.9962, "step": 18438500 }, { "epoch": 53.37, "learning_rate": 2.33229161842349e-05, "loss": 1.9914, "step": 18439000 }, { "epoch": 53.37, "learning_rate": 2.332219398388292e-05, "loss": 1.9753, "step": 18439500 }, { "epoch": 53.38, "learning_rate": 2.3321470336235642e-05, "loss": 1.9779, "step": 18440000 }, { "epoch": 53.38, "learning_rate": 2.3320746688588368e-05, "loss": 1.9748, "step": 18440500 }, { "epoch": 53.38, "learning_rate": 2.332002304094109e-05, "loss": 1.9978, "step": 18441000 }, { "epoch": 53.38, "learning_rate": 2.3319299393293816e-05, "loss": 1.9798, "step": 18441500 }, { "epoch": 53.38, "learning_rate": 2.3318575745646538e-05, "loss": 1.9471, "step": 18442000 }, { "epoch": 53.38, "learning_rate": 2.3317853545294554e-05, "loss": 1.987, "step": 18442500 }, { "epoch": 53.38, "learning_rate": 2.3317129897647276e-05, "loss": 1.9823, "step": 18443000 }, { "epoch": 53.39, "learning_rate": 2.3316407697295295e-05, "loss": 1.9729, "step": 18443500 }, { "epoch": 53.39, "learning_rate": 2.3315684049648017e-05, "loss": 2.0337, "step": 18444000 }, { "epoch": 53.39, "learning_rate": 2.3314960402000743e-05, "loss": 1.9763, "step": 18444500 }, { "epoch": 53.39, "learning_rate": 2.3314236754353465e-05, "loss": 1.9969, "step": 18445000 }, { "epoch": 53.39, "learning_rate": 2.331351310670619e-05, "loss": 1.9718, "step": 18445500 }, { "epoch": 53.39, "learning_rate": 2.3312789459058913e-05, "loss": 1.9568, "step": 18446000 }, { "epoch": 53.4, "learning_rate": 2.3312065811411635e-05, "loss": 1.9875, "step": 18446500 }, { "epoch": 53.4, "learning_rate": 2.3311343611059654e-05, "loss": 1.9727, "step": 18447000 }, { "epoch": 53.4, "learning_rate": 2.3310619963412376e-05, "loss": 1.9693, "step": 18447500 }, { "epoch": 53.4, "learning_rate": 2.3309896315765098e-05, "loss": 1.9814, "step": 18448000 }, { "epoch": 53.4, "learning_rate": 2.3309172668117824e-05, "loss": 1.9998, "step": 18448500 }, { "epoch": 53.4, "learning_rate": 2.3308449020470546e-05, "loss": 1.9844, "step": 18449000 }, { "epoch": 53.4, "learning_rate": 2.330772537282327e-05, "loss": 1.9677, "step": 18449500 }, { "epoch": 53.41, "learning_rate": 2.330700172517599e-05, "loss": 1.9794, "step": 18450000 }, { "epoch": 53.41, "learning_rate": 2.3306278077528716e-05, "loss": 1.9908, "step": 18450500 }, { "epoch": 53.41, "learning_rate": 2.330555442988144e-05, "loss": 1.9769, "step": 18451000 }, { "epoch": 53.41, "learning_rate": 2.3304830782234164e-05, "loss": 1.9681, "step": 18451500 }, { "epoch": 53.41, "learning_rate": 2.330410858188218e-05, "loss": 1.9856, "step": 18452000 }, { "epoch": 53.41, "learning_rate": 2.3303384934234905e-05, "loss": 1.9811, "step": 18452500 }, { "epoch": 53.41, "learning_rate": 2.330266273388292e-05, "loss": 1.9631, "step": 18453000 }, { "epoch": 53.42, "learning_rate": 2.3301939086235643e-05, "loss": 1.966, "step": 18453500 }, { "epoch": 53.42, "learning_rate": 2.3301215438588365e-05, "loss": 1.9759, "step": 18454000 }, { "epoch": 53.42, "learning_rate": 2.330049179094109e-05, "loss": 1.9687, "step": 18454500 }, { "epoch": 53.42, "learning_rate": 2.3299768143293813e-05, "loss": 1.9942, "step": 18455000 }, { "epoch": 53.42, "learning_rate": 2.329904449564654e-05, "loss": 1.9725, "step": 18455500 }, { "epoch": 53.42, "learning_rate": 2.329832084799926e-05, "loss": 1.9803, "step": 18456000 }, { "epoch": 53.42, "learning_rate": 2.329759864764728e-05, "loss": 1.9791, "step": 18456500 }, { "epoch": 53.43, "learning_rate": 2.3296875000000002e-05, "loss": 2.0035, "step": 18457000 }, { "epoch": 53.43, "learning_rate": 2.3296151352352724e-05, "loss": 1.982, "step": 18457500 }, { "epoch": 53.43, "learning_rate": 2.3295427704705447e-05, "loss": 1.9957, "step": 18458000 }, { "epoch": 53.43, "learning_rate": 2.329470405705817e-05, "loss": 1.9552, "step": 18458500 }, { "epoch": 53.43, "learning_rate": 2.3293981856706188e-05, "loss": 1.9938, "step": 18459000 }, { "epoch": 53.43, "learning_rate": 2.3293258209058913e-05, "loss": 1.9578, "step": 18459500 }, { "epoch": 53.43, "learning_rate": 2.3292534561411636e-05, "loss": 1.9722, "step": 18460000 }, { "epoch": 53.44, "learning_rate": 2.3291810913764358e-05, "loss": 1.9793, "step": 18460500 }, { "epoch": 53.44, "learning_rate": 2.329109016070767e-05, "loss": 1.9929, "step": 18461000 }, { "epoch": 53.44, "learning_rate": 2.3290366513060392e-05, "loss": 1.9878, "step": 18461500 }, { "epoch": 53.44, "learning_rate": 2.3289642865413118e-05, "loss": 1.9858, "step": 18462000 }, { "epoch": 53.44, "learning_rate": 2.328891921776584e-05, "loss": 1.9877, "step": 18462500 }, { "epoch": 53.44, "learning_rate": 2.3288195570118562e-05, "loss": 2.0034, "step": 18463000 }, { "epoch": 53.44, "learning_rate": 2.3287471922471288e-05, "loss": 1.9943, "step": 18463500 }, { "epoch": 53.45, "learning_rate": 2.328674827482401e-05, "loss": 1.9697, "step": 18464000 }, { "epoch": 53.45, "learning_rate": 2.3286024627176733e-05, "loss": 1.951, "step": 18464500 }, { "epoch": 53.45, "learning_rate": 2.3285300979529455e-05, "loss": 1.9854, "step": 18465000 }, { "epoch": 53.45, "learning_rate": 2.328457733188218e-05, "loss": 1.975, "step": 18465500 }, { "epoch": 53.45, "learning_rate": 2.3283853684234903e-05, "loss": 1.9879, "step": 18466000 }, { "epoch": 53.45, "learning_rate": 2.3283130036587625e-05, "loss": 1.9848, "step": 18466500 }, { "epoch": 53.45, "learning_rate": 2.328240638894035e-05, "loss": 1.9851, "step": 18467000 }, { "epoch": 53.46, "learning_rate": 2.3281682741293073e-05, "loss": 1.9775, "step": 18467500 }, { "epoch": 53.46, "learning_rate": 2.3280959093645795e-05, "loss": 1.9885, "step": 18468000 }, { "epoch": 53.46, "learning_rate": 2.328023544599852e-05, "loss": 1.9779, "step": 18468500 }, { "epoch": 53.46, "learning_rate": 2.3279513245646536e-05, "loss": 1.9786, "step": 18469000 }, { "epoch": 53.46, "learning_rate": 2.327878959799926e-05, "loss": 1.9662, "step": 18469500 }, { "epoch": 53.46, "learning_rate": 2.327806595035198e-05, "loss": 1.965, "step": 18470000 }, { "epoch": 53.46, "learning_rate": 2.3277343750000003e-05, "loss": 1.9621, "step": 18470500 }, { "epoch": 53.47, "learning_rate": 2.3276620102352725e-05, "loss": 1.9629, "step": 18471000 }, { "epoch": 53.47, "learning_rate": 2.3275897902000744e-05, "loss": 1.9814, "step": 18471500 }, { "epoch": 53.47, "learning_rate": 2.3275174254353466e-05, "loss": 1.9877, "step": 18472000 }, { "epoch": 53.47, "learning_rate": 2.327445060670619e-05, "loss": 1.96, "step": 18472500 }, { "epoch": 53.47, "learning_rate": 2.3273728406354208e-05, "loss": 1.9924, "step": 18473000 }, { "epoch": 53.47, "learning_rate": 2.327300475870693e-05, "loss": 1.957, "step": 18473500 }, { "epoch": 53.47, "learning_rate": 2.3272281111059652e-05, "loss": 1.968, "step": 18474000 }, { "epoch": 53.48, "learning_rate": 2.3271557463412378e-05, "loss": 1.9908, "step": 18474500 }, { "epoch": 53.48, "learning_rate": 2.32708338157651e-05, "loss": 1.9816, "step": 18475000 }, { "epoch": 53.48, "learning_rate": 2.3270110168117822e-05, "loss": 2.0062, "step": 18475500 }, { "epoch": 53.48, "learning_rate": 2.326938796776584e-05, "loss": 1.9813, "step": 18476000 }, { "epoch": 53.48, "learning_rate": 2.3268664320118563e-05, "loss": 1.981, "step": 18476500 }, { "epoch": 53.48, "learning_rate": 2.3267940672471286e-05, "loss": 1.982, "step": 18477000 }, { "epoch": 53.48, "learning_rate": 2.3267218472119304e-05, "loss": 2.0051, "step": 18477500 }, { "epoch": 53.49, "learning_rate": 2.3266494824472027e-05, "loss": 1.9916, "step": 18478000 }, { "epoch": 53.49, "learning_rate": 2.3265771176824752e-05, "loss": 1.9678, "step": 18478500 }, { "epoch": 53.49, "learning_rate": 2.3265047529177475e-05, "loss": 1.9993, "step": 18479000 }, { "epoch": 53.49, "learning_rate": 2.3264323881530197e-05, "loss": 1.9872, "step": 18479500 }, { "epoch": 53.49, "learning_rate": 2.326360023388292e-05, "loss": 1.9807, "step": 18480000 }, { "epoch": 53.49, "learning_rate": 2.3262876586235645e-05, "loss": 1.9553, "step": 18480500 }, { "epoch": 53.49, "learning_rate": 2.3262152938588367e-05, "loss": 2.0118, "step": 18481000 }, { "epoch": 53.5, "learning_rate": 2.326142929094109e-05, "loss": 1.986, "step": 18481500 }, { "epoch": 53.5, "learning_rate": 2.3260705643293815e-05, "loss": 1.9413, "step": 18482000 }, { "epoch": 53.5, "learning_rate": 2.3259981995646537e-05, "loss": 1.9627, "step": 18482500 }, { "epoch": 53.5, "learning_rate": 2.325925834799926e-05, "loss": 1.9992, "step": 18483000 }, { "epoch": 53.5, "learning_rate": 2.3258534700351985e-05, "loss": 1.9868, "step": 18483500 }, { "epoch": 53.5, "learning_rate": 2.3257811052704707e-05, "loss": 1.9832, "step": 18484000 }, { "epoch": 53.51, "learning_rate": 2.325708740505743e-05, "loss": 1.9902, "step": 18484500 }, { "epoch": 53.51, "learning_rate": 2.325636375741015e-05, "loss": 2.0056, "step": 18485000 }, { "epoch": 53.51, "learning_rate": 2.3255640109762877e-05, "loss": 1.988, "step": 18485500 }, { "epoch": 53.51, "learning_rate": 2.32549164621156e-05, "loss": 1.9642, "step": 18486000 }, { "epoch": 53.51, "learning_rate": 2.3254194261763618e-05, "loss": 1.9864, "step": 18486500 }, { "epoch": 53.51, "learning_rate": 2.325347061411634e-05, "loss": 1.9899, "step": 18487000 }, { "epoch": 53.51, "learning_rate": 2.3252746966469063e-05, "loss": 1.9892, "step": 18487500 }, { "epoch": 53.52, "learning_rate": 2.3252023318821785e-05, "loss": 1.9697, "step": 18488000 }, { "epoch": 53.52, "learning_rate": 2.3251301118469804e-05, "loss": 1.9875, "step": 18488500 }, { "epoch": 53.52, "learning_rate": 2.325057747082253e-05, "loss": 1.9764, "step": 18489000 }, { "epoch": 53.52, "learning_rate": 2.3249853823175252e-05, "loss": 1.9739, "step": 18489500 }, { "epoch": 53.52, "learning_rate": 2.3249130175527974e-05, "loss": 1.9968, "step": 18490000 }, { "epoch": 53.52, "learning_rate": 2.3248406527880696e-05, "loss": 1.9841, "step": 18490500 }, { "epoch": 53.52, "learning_rate": 2.3247682880233422e-05, "loss": 1.9549, "step": 18491000 }, { "epoch": 53.53, "learning_rate": 2.3246959232586144e-05, "loss": 1.987, "step": 18491500 }, { "epoch": 53.53, "learning_rate": 2.3246235584938866e-05, "loss": 1.9805, "step": 18492000 }, { "epoch": 53.53, "learning_rate": 2.3245511937291592e-05, "loss": 1.9845, "step": 18492500 }, { "epoch": 53.53, "learning_rate": 2.3244788289644314e-05, "loss": 1.9691, "step": 18493000 }, { "epoch": 53.53, "learning_rate": 2.3244064641997036e-05, "loss": 1.9839, "step": 18493500 }, { "epoch": 53.53, "learning_rate": 2.3243340994349762e-05, "loss": 1.9792, "step": 18494000 }, { "epoch": 53.53, "learning_rate": 2.3242617346702484e-05, "loss": 1.9867, "step": 18494500 }, { "epoch": 53.54, "learning_rate": 2.3241893699055207e-05, "loss": 1.9947, "step": 18495000 }, { "epoch": 53.54, "learning_rate": 2.324117294599852e-05, "loss": 1.9906, "step": 18495500 }, { "epoch": 53.54, "learning_rate": 2.3240450745646534e-05, "loss": 1.9823, "step": 18496000 }, { "epoch": 53.54, "learning_rate": 2.323972709799926e-05, "loss": 1.9686, "step": 18496500 }, { "epoch": 53.54, "learning_rate": 2.3239003450351986e-05, "loss": 1.996, "step": 18497000 }, { "epoch": 53.54, "learning_rate": 2.3238279802704708e-05, "loss": 1.9769, "step": 18497500 }, { "epoch": 53.54, "learning_rate": 2.323755615505743e-05, "loss": 1.9934, "step": 18498000 }, { "epoch": 53.55, "learning_rate": 2.3236832507410152e-05, "loss": 1.9702, "step": 18498500 }, { "epoch": 53.55, "learning_rate": 2.3236108859762875e-05, "loss": 2.0041, "step": 18499000 }, { "epoch": 53.55, "learning_rate": 2.3235386659410893e-05, "loss": 1.9955, "step": 18499500 }, { "epoch": 53.55, "learning_rate": 2.3234663011763616e-05, "loss": 1.9684, "step": 18500000 }, { "epoch": 53.55, "learning_rate": 2.323393936411634e-05, "loss": 1.9927, "step": 18500500 }, { "epoch": 53.55, "learning_rate": 2.3233215716469064e-05, "loss": 1.9685, "step": 18501000 }, { "epoch": 53.55, "learning_rate": 2.3232492068821786e-05, "loss": 1.9769, "step": 18501500 }, { "epoch": 53.56, "learning_rate": 2.323176842117451e-05, "loss": 1.9935, "step": 18502000 }, { "epoch": 53.56, "learning_rate": 2.3231044773527234e-05, "loss": 1.9984, "step": 18502500 }, { "epoch": 53.56, "learning_rate": 2.3230321125879956e-05, "loss": 1.98, "step": 18503000 }, { "epoch": 53.56, "learning_rate": 2.3229600372823268e-05, "loss": 1.9898, "step": 18503500 }, { "epoch": 53.56, "learning_rate": 2.322887672517599e-05, "loss": 1.9866, "step": 18504000 }, { "epoch": 53.56, "learning_rate": 2.3228153077528716e-05, "loss": 1.9736, "step": 18504500 }, { "epoch": 53.56, "learning_rate": 2.3227429429881438e-05, "loss": 2.0032, "step": 18505000 }, { "epoch": 53.57, "learning_rate": 2.322670578223416e-05, "loss": 1.9548, "step": 18505500 }, { "epoch": 53.57, "learning_rate": 2.3225982134586886e-05, "loss": 1.9731, "step": 18506000 }, { "epoch": 53.57, "learning_rate": 2.322525848693961e-05, "loss": 1.9914, "step": 18506500 }, { "epoch": 53.57, "learning_rate": 2.322453483929233e-05, "loss": 1.9964, "step": 18507000 }, { "epoch": 53.57, "learning_rate": 2.3223811191645053e-05, "loss": 1.9671, "step": 18507500 }, { "epoch": 53.57, "learning_rate": 2.322308754399778e-05, "loss": 1.9707, "step": 18508000 }, { "epoch": 53.57, "learning_rate": 2.32223638963505e-05, "loss": 1.987, "step": 18508500 }, { "epoch": 53.58, "learning_rate": 2.3221640248703226e-05, "loss": 1.9718, "step": 18509000 }, { "epoch": 53.58, "learning_rate": 2.322091660105595e-05, "loss": 1.9906, "step": 18509500 }, { "epoch": 53.58, "learning_rate": 2.3220194400703964e-05, "loss": 1.9867, "step": 18510000 }, { "epoch": 53.58, "learning_rate": 2.3219470753056686e-05, "loss": 1.9614, "step": 18510500 }, { "epoch": 53.58, "learning_rate": 2.3218747105409412e-05, "loss": 1.9701, "step": 18511000 }, { "epoch": 53.58, "learning_rate": 2.3218023457762138e-05, "loss": 1.9888, "step": 18511500 }, { "epoch": 53.58, "learning_rate": 2.321729981011486e-05, "loss": 1.9751, "step": 18512000 }, { "epoch": 53.59, "learning_rate": 2.3216576162467582e-05, "loss": 2.0022, "step": 18512500 }, { "epoch": 53.59, "learning_rate": 2.3215852514820304e-05, "loss": 1.9527, "step": 18513000 }, { "epoch": 53.59, "learning_rate": 2.3215128867173026e-05, "loss": 1.9689, "step": 18513500 }, { "epoch": 53.59, "learning_rate": 2.3214405219525752e-05, "loss": 1.9839, "step": 18514000 }, { "epoch": 53.59, "learning_rate": 2.3213683019173768e-05, "loss": 1.9843, "step": 18514500 }, { "epoch": 53.59, "learning_rate": 2.3212959371526493e-05, "loss": 1.9995, "step": 18515000 }, { "epoch": 53.59, "learning_rate": 2.3212235723879216e-05, "loss": 1.9718, "step": 18515500 }, { "epoch": 53.6, "learning_rate": 2.3211512076231938e-05, "loss": 1.9767, "step": 18516000 }, { "epoch": 53.6, "learning_rate": 2.3210788428584663e-05, "loss": 1.9747, "step": 18516500 }, { "epoch": 53.6, "learning_rate": 2.3210064780937386e-05, "loss": 1.9977, "step": 18517000 }, { "epoch": 53.6, "learning_rate": 2.3209341133290108e-05, "loss": 1.9844, "step": 18517500 }, { "epoch": 53.6, "learning_rate": 2.320861748564283e-05, "loss": 1.9881, "step": 18518000 }, { "epoch": 53.6, "learning_rate": 2.3207893837995556e-05, "loss": 1.9758, "step": 18518500 }, { "epoch": 53.6, "learning_rate": 2.3207170190348278e-05, "loss": 1.9508, "step": 18519000 }, { "epoch": 53.61, "learning_rate": 2.3206446542701004e-05, "loss": 1.9746, "step": 18519500 }, { "epoch": 53.61, "learning_rate": 2.320572434234902e-05, "loss": 1.9793, "step": 18520000 }, { "epoch": 53.61, "learning_rate": 2.320500069470174e-05, "loss": 1.9919, "step": 18520500 }, { "epoch": 53.61, "learning_rate": 2.320427849434976e-05, "loss": 1.9814, "step": 18521000 }, { "epoch": 53.61, "learning_rate": 2.3203554846702483e-05, "loss": 1.963, "step": 18521500 }, { "epoch": 53.61, "learning_rate": 2.3202831199055205e-05, "loss": 1.9746, "step": 18522000 }, { "epoch": 53.62, "learning_rate": 2.320210755140793e-05, "loss": 1.9763, "step": 18522500 }, { "epoch": 53.62, "learning_rate": 2.3201383903760653e-05, "loss": 2.0192, "step": 18523000 }, { "epoch": 53.62, "learning_rate": 2.3200660256113378e-05, "loss": 1.9946, "step": 18523500 }, { "epoch": 53.62, "learning_rate": 2.3199938055761394e-05, "loss": 1.9686, "step": 18524000 }, { "epoch": 53.62, "learning_rate": 2.3199214408114116e-05, "loss": 1.9759, "step": 18524500 }, { "epoch": 53.62, "learning_rate": 2.319849076046684e-05, "loss": 1.9838, "step": 18525000 }, { "epoch": 53.62, "learning_rate": 2.3197767112819564e-05, "loss": 1.9859, "step": 18525500 }, { "epoch": 53.63, "learning_rate": 2.319704346517229e-05, "loss": 1.9734, "step": 18526000 }, { "epoch": 53.63, "learning_rate": 2.3196321264820305e-05, "loss": 1.982, "step": 18526500 }, { "epoch": 53.63, "learning_rate": 2.3195597617173027e-05, "loss": 1.9903, "step": 18527000 }, { "epoch": 53.63, "learning_rate": 2.3194873969525753e-05, "loss": 1.9936, "step": 18527500 }, { "epoch": 53.63, "learning_rate": 2.3194150321878475e-05, "loss": 1.9803, "step": 18528000 }, { "epoch": 53.63, "learning_rate": 2.3193426674231197e-05, "loss": 1.9865, "step": 18528500 }, { "epoch": 53.63, "learning_rate": 2.319270302658392e-05, "loss": 2.0077, "step": 18529000 }, { "epoch": 53.64, "learning_rate": 2.3191979378936642e-05, "loss": 2.0014, "step": 18529500 }, { "epoch": 53.64, "learning_rate": 2.3191255731289367e-05, "loss": 1.9753, "step": 18530000 }, { "epoch": 53.64, "learning_rate": 2.3190532083642093e-05, "loss": 1.9789, "step": 18530500 }, { "epoch": 53.64, "learning_rate": 2.318980988329011e-05, "loss": 1.9624, "step": 18531000 }, { "epoch": 53.64, "learning_rate": 2.3189090577528718e-05, "loss": 2.0131, "step": 18531500 }, { "epoch": 53.64, "learning_rate": 2.318836692988144e-05, "loss": 1.9562, "step": 18532000 }, { "epoch": 53.64, "learning_rate": 2.3187643282234162e-05, "loss": 1.9951, "step": 18532500 }, { "epoch": 53.65, "learning_rate": 2.3186919634586884e-05, "loss": 1.9821, "step": 18533000 }, { "epoch": 53.65, "learning_rate": 2.3186195986939607e-05, "loss": 1.9783, "step": 18533500 }, { "epoch": 53.65, "learning_rate": 2.3185472339292332e-05, "loss": 1.9695, "step": 18534000 }, { "epoch": 53.65, "learning_rate": 2.3184748691645054e-05, "loss": 1.9832, "step": 18534500 }, { "epoch": 53.65, "learning_rate": 2.318402504399778e-05, "loss": 1.9784, "step": 18535000 }, { "epoch": 53.65, "learning_rate": 2.3183301396350502e-05, "loss": 1.9656, "step": 18535500 }, { "epoch": 53.65, "learning_rate": 2.3182577748703224e-05, "loss": 1.9775, "step": 18536000 }, { "epoch": 53.66, "learning_rate": 2.3181854101055947e-05, "loss": 1.9756, "step": 18536500 }, { "epoch": 53.66, "learning_rate": 2.318113045340867e-05, "loss": 1.9804, "step": 18537000 }, { "epoch": 53.66, "learning_rate": 2.3180406805761395e-05, "loss": 1.9846, "step": 18537500 }, { "epoch": 53.66, "learning_rate": 2.3179684605409414e-05, "loss": 1.9858, "step": 18538000 }, { "epoch": 53.66, "learning_rate": 2.3178960957762136e-05, "loss": 1.9643, "step": 18538500 }, { "epoch": 53.66, "learning_rate": 2.3178237310114858e-05, "loss": 1.9848, "step": 18539000 }, { "epoch": 53.66, "learning_rate": 2.317751366246758e-05, "loss": 1.9886, "step": 18539500 }, { "epoch": 53.67, "learning_rate": 2.31767914621156e-05, "loss": 1.9631, "step": 18540000 }, { "epoch": 53.67, "learning_rate": 2.3176069261763618e-05, "loss": 1.995, "step": 18540500 }, { "epoch": 53.67, "learning_rate": 2.317534561411634e-05, "loss": 1.9861, "step": 18541000 }, { "epoch": 53.67, "learning_rate": 2.3174621966469066e-05, "loss": 2.0095, "step": 18541500 }, { "epoch": 53.67, "learning_rate": 2.3173898318821788e-05, "loss": 1.9944, "step": 18542000 }, { "epoch": 53.67, "learning_rate": 2.317317467117451e-05, "loss": 1.9909, "step": 18542500 }, { "epoch": 53.67, "learning_rate": 2.317245247082253e-05, "loss": 1.9692, "step": 18543000 }, { "epoch": 53.68, "learning_rate": 2.317172882317525e-05, "loss": 1.9828, "step": 18543500 }, { "epoch": 53.68, "learning_rate": 2.3171005175527974e-05, "loss": 1.9616, "step": 18544000 }, { "epoch": 53.68, "learning_rate": 2.3170281527880696e-05, "loss": 1.9805, "step": 18544500 }, { "epoch": 53.68, "learning_rate": 2.3169557880233418e-05, "loss": 1.9968, "step": 18545000 }, { "epoch": 53.68, "learning_rate": 2.3168834232586144e-05, "loss": 1.9827, "step": 18545500 }, { "epoch": 53.68, "learning_rate": 2.316811058493887e-05, "loss": 1.9793, "step": 18546000 }, { "epoch": 53.68, "learning_rate": 2.3167386937291592e-05, "loss": 1.98, "step": 18546500 }, { "epoch": 53.69, "learning_rate": 2.3166663289644314e-05, "loss": 1.981, "step": 18547000 }, { "epoch": 53.69, "learning_rate": 2.3165939641997036e-05, "loss": 2.0054, "step": 18547500 }, { "epoch": 53.69, "learning_rate": 2.3165217441645055e-05, "loss": 2.0095, "step": 18548000 }, { "epoch": 53.69, "learning_rate": 2.3164493793997777e-05, "loss": 1.9885, "step": 18548500 }, { "epoch": 53.69, "learning_rate": 2.3163770146350503e-05, "loss": 1.9925, "step": 18549000 }, { "epoch": 53.69, "learning_rate": 2.316304794599852e-05, "loss": 1.9851, "step": 18549500 }, { "epoch": 53.69, "learning_rate": 2.3162324298351244e-05, "loss": 1.9664, "step": 18550000 }, { "epoch": 53.7, "learning_rate": 2.3161600650703966e-05, "loss": 1.9891, "step": 18550500 }, { "epoch": 53.7, "learning_rate": 2.316087700305669e-05, "loss": 1.9781, "step": 18551000 }, { "epoch": 53.7, "learning_rate": 2.316015335540941e-05, "loss": 1.9658, "step": 18551500 }, { "epoch": 53.7, "learning_rate": 2.3159429707762133e-05, "loss": 1.9536, "step": 18552000 }, { "epoch": 53.7, "learning_rate": 2.315870606011486e-05, "loss": 1.9932, "step": 18552500 }, { "epoch": 53.7, "learning_rate": 2.315798241246758e-05, "loss": 2.0032, "step": 18553000 }, { "epoch": 53.7, "learning_rate": 2.31572602121156e-05, "loss": 1.9765, "step": 18553500 }, { "epoch": 53.71, "learning_rate": 2.3156536564468322e-05, "loss": 1.9893, "step": 18554000 }, { "epoch": 53.71, "learning_rate": 2.3155812916821044e-05, "loss": 1.9852, "step": 18554500 }, { "epoch": 53.71, "learning_rate": 2.315508926917377e-05, "loss": 1.9894, "step": 18555000 }, { "epoch": 53.71, "learning_rate": 2.3154365621526492e-05, "loss": 1.9962, "step": 18555500 }, { "epoch": 53.71, "learning_rate": 2.3153641973879215e-05, "loss": 1.9938, "step": 18556000 }, { "epoch": 53.71, "learning_rate": 2.315291832623194e-05, "loss": 1.974, "step": 18556500 }, { "epoch": 53.71, "learning_rate": 2.315219612587996e-05, "loss": 1.9922, "step": 18557000 }, { "epoch": 53.72, "learning_rate": 2.3151473925527975e-05, "loss": 1.9615, "step": 18557500 }, { "epoch": 53.72, "learning_rate": 2.3150750277880697e-05, "loss": 1.9832, "step": 18558000 }, { "epoch": 53.72, "learning_rate": 2.315002663023342e-05, "loss": 1.9815, "step": 18558500 }, { "epoch": 53.72, "learning_rate": 2.3149302982586145e-05, "loss": 1.9772, "step": 18559000 }, { "epoch": 53.72, "learning_rate": 2.3148579334938867e-05, "loss": 1.9732, "step": 18559500 }, { "epoch": 53.72, "learning_rate": 2.3147855687291593e-05, "loss": 1.9781, "step": 18560000 }, { "epoch": 53.73, "learning_rate": 2.3147132039644315e-05, "loss": 1.9769, "step": 18560500 }, { "epoch": 53.73, "learning_rate": 2.3146408391997037e-05, "loss": 1.9796, "step": 18561000 }, { "epoch": 53.73, "learning_rate": 2.314568474434976e-05, "loss": 1.9901, "step": 18561500 }, { "epoch": 53.73, "learning_rate": 2.3144961096702485e-05, "loss": 1.9933, "step": 18562000 }, { "epoch": 53.73, "learning_rate": 2.3144237449055207e-05, "loss": 2.0076, "step": 18562500 }, { "epoch": 53.73, "learning_rate": 2.314351380140793e-05, "loss": 1.9545, "step": 18563000 }, { "epoch": 53.73, "learning_rate": 2.3142790153760655e-05, "loss": 1.9834, "step": 18563500 }, { "epoch": 53.74, "learning_rate": 2.314206795340867e-05, "loss": 1.9618, "step": 18564000 }, { "epoch": 53.74, "learning_rate": 2.3141344305761396e-05, "loss": 1.9764, "step": 18564500 }, { "epoch": 53.74, "learning_rate": 2.314062065811412e-05, "loss": 1.9795, "step": 18565000 }, { "epoch": 53.74, "learning_rate": 2.313989701046684e-05, "loss": 1.9741, "step": 18565500 }, { "epoch": 53.74, "learning_rate": 2.313917481011486e-05, "loss": 1.9776, "step": 18566000 }, { "epoch": 53.74, "learning_rate": 2.3138451162467582e-05, "loss": 1.9981, "step": 18566500 }, { "epoch": 53.74, "learning_rate": 2.3137727514820304e-05, "loss": 1.9971, "step": 18567000 }, { "epoch": 53.75, "learning_rate": 2.313700386717303e-05, "loss": 1.9761, "step": 18567500 }, { "epoch": 53.75, "learning_rate": 2.3136280219525752e-05, "loss": 1.9973, "step": 18568000 }, { "epoch": 53.75, "learning_rate": 2.313555801917377e-05, "loss": 1.9885, "step": 18568500 }, { "epoch": 53.75, "learning_rate": 2.3134834371526493e-05, "loss": 1.9616, "step": 18569000 }, { "epoch": 53.75, "learning_rate": 2.3134110723879215e-05, "loss": 1.9812, "step": 18569500 }, { "epoch": 53.75, "learning_rate": 2.3133387076231938e-05, "loss": 1.9851, "step": 18570000 }, { "epoch": 53.75, "learning_rate": 2.313266342858466e-05, "loss": 1.9853, "step": 18570500 }, { "epoch": 53.76, "learning_rate": 2.3131939780937385e-05, "loss": 1.9838, "step": 18571000 }, { "epoch": 53.76, "learning_rate": 2.313121613329011e-05, "loss": 2.0016, "step": 18571500 }, { "epoch": 53.76, "learning_rate": 2.3130492485642833e-05, "loss": 1.9983, "step": 18572000 }, { "epoch": 53.76, "learning_rate": 2.3129768837995555e-05, "loss": 1.9862, "step": 18572500 }, { "epoch": 53.76, "learning_rate": 2.3129048084938868e-05, "loss": 1.9626, "step": 18573000 }, { "epoch": 53.76, "learning_rate": 2.312832443729159e-05, "loss": 1.9602, "step": 18573500 }, { "epoch": 53.76, "learning_rate": 2.312760223693961e-05, "loss": 1.9766, "step": 18574000 }, { "epoch": 53.77, "learning_rate": 2.312687858929233e-05, "loss": 1.9783, "step": 18574500 }, { "epoch": 53.77, "learning_rate": 2.3126154941645053e-05, "loss": 1.9648, "step": 18575000 }, { "epoch": 53.77, "learning_rate": 2.312543129399778e-05, "loss": 1.9704, "step": 18575500 }, { "epoch": 53.77, "learning_rate": 2.31247076463505e-05, "loss": 1.9966, "step": 18576000 }, { "epoch": 53.77, "learning_rate": 2.3123983998703223e-05, "loss": 2.0068, "step": 18576500 }, { "epoch": 53.77, "learning_rate": 2.312326035105595e-05, "loss": 1.9816, "step": 18577000 }, { "epoch": 53.77, "learning_rate": 2.312253670340867e-05, "loss": 1.9781, "step": 18577500 }, { "epoch": 53.78, "learning_rate": 2.3121813055761394e-05, "loss": 1.9711, "step": 18578000 }, { "epoch": 53.78, "learning_rate": 2.312108940811412e-05, "loss": 1.9773, "step": 18578500 }, { "epoch": 53.78, "learning_rate": 2.3120367207762135e-05, "loss": 1.9914, "step": 18579000 }, { "epoch": 53.78, "learning_rate": 2.3119645007410154e-05, "loss": 1.9976, "step": 18579500 }, { "epoch": 53.78, "learning_rate": 2.3118921359762876e-05, "loss": 1.9868, "step": 18580000 }, { "epoch": 53.78, "learning_rate": 2.3118197712115598e-05, "loss": 1.9853, "step": 18580500 }, { "epoch": 53.78, "learning_rate": 2.3117474064468324e-05, "loss": 1.9839, "step": 18581000 }, { "epoch": 53.79, "learning_rate": 2.3116750416821046e-05, "loss": 1.9685, "step": 18581500 }, { "epoch": 53.79, "learning_rate": 2.3116026769173768e-05, "loss": 1.9671, "step": 18582000 }, { "epoch": 53.79, "learning_rate": 2.3115303121526494e-05, "loss": 1.9713, "step": 18582500 }, { "epoch": 53.79, "learning_rate": 2.3114580921174513e-05, "loss": 2.0246, "step": 18583000 }, { "epoch": 53.79, "learning_rate": 2.3113857273527235e-05, "loss": 1.9872, "step": 18583500 }, { "epoch": 53.79, "learning_rate": 2.3113133625879957e-05, "loss": 1.9721, "step": 18584000 }, { "epoch": 53.79, "learning_rate": 2.311240997823268e-05, "loss": 1.9725, "step": 18584500 }, { "epoch": 53.8, "learning_rate": 2.3111686330585402e-05, "loss": 1.9603, "step": 18585000 }, { "epoch": 53.8, "learning_rate": 2.3110962682938124e-05, "loss": 1.9736, "step": 18585500 }, { "epoch": 53.8, "learning_rate": 2.311023903529085e-05, "loss": 1.9872, "step": 18586000 }, { "epoch": 53.8, "learning_rate": 2.3109515387643575e-05, "loss": 1.965, "step": 18586500 }, { "epoch": 53.8, "learning_rate": 2.3108791739996297e-05, "loss": 1.9736, "step": 18587000 }, { "epoch": 53.8, "learning_rate": 2.3108069539644313e-05, "loss": 1.9737, "step": 18587500 }, { "epoch": 53.8, "learning_rate": 2.310734589199704e-05, "loss": 1.9819, "step": 18588000 }, { "epoch": 53.81, "learning_rate": 2.310662224434976e-05, "loss": 1.9605, "step": 18588500 }, { "epoch": 53.81, "learning_rate": 2.3105900043997776e-05, "loss": 1.9803, "step": 18589000 }, { "epoch": 53.81, "learning_rate": 2.31051763963505e-05, "loss": 1.9787, "step": 18589500 }, { "epoch": 53.81, "learning_rate": 2.3104452748703224e-05, "loss": 1.9768, "step": 18590000 }, { "epoch": 53.81, "learning_rate": 2.310372910105595e-05, "loss": 1.9723, "step": 18590500 }, { "epoch": 53.81, "learning_rate": 2.3103005453408672e-05, "loss": 1.9877, "step": 18591000 }, { "epoch": 53.81, "learning_rate": 2.3102281805761394e-05, "loss": 1.982, "step": 18591500 }, { "epoch": 53.82, "learning_rate": 2.3101558158114117e-05, "loss": 1.9823, "step": 18592000 }, { "epoch": 53.82, "learning_rate": 2.310083451046684e-05, "loss": 1.9687, "step": 18592500 }, { "epoch": 53.82, "learning_rate": 2.3100110862819564e-05, "loss": 1.9737, "step": 18593000 }, { "epoch": 53.82, "learning_rate": 2.309938721517229e-05, "loss": 1.974, "step": 18593500 }, { "epoch": 53.82, "learning_rate": 2.3098663567525012e-05, "loss": 1.9682, "step": 18594000 }, { "epoch": 53.82, "learning_rate": 2.3097939919877735e-05, "loss": 1.9722, "step": 18594500 }, { "epoch": 53.82, "learning_rate": 2.309721771952575e-05, "loss": 2.0019, "step": 18595000 }, { "epoch": 53.83, "learning_rate": 2.3096494071878476e-05, "loss": 1.9915, "step": 18595500 }, { "epoch": 53.83, "learning_rate": 2.3095770424231198e-05, "loss": 1.9927, "step": 18596000 }, { "epoch": 53.83, "learning_rate": 2.309504677658392e-05, "loss": 1.9811, "step": 18596500 }, { "epoch": 53.83, "learning_rate": 2.3094323128936642e-05, "loss": 1.9671, "step": 18597000 }, { "epoch": 53.83, "learning_rate": 2.3093599481289368e-05, "loss": 1.9942, "step": 18597500 }, { "epoch": 53.83, "learning_rate": 2.309287583364209e-05, "loss": 1.9792, "step": 18598000 }, { "epoch": 53.84, "learning_rate": 2.309215363329011e-05, "loss": 1.993, "step": 18598500 }, { "epoch": 53.84, "learning_rate": 2.3091431432938128e-05, "loss": 1.9648, "step": 18599000 }, { "epoch": 53.84, "learning_rate": 2.309070778529085e-05, "loss": 1.9998, "step": 18599500 }, { "epoch": 53.84, "learning_rate": 2.3089984137643573e-05, "loss": 1.9957, "step": 18600000 }, { "epoch": 53.84, "learning_rate": 2.3089260489996295e-05, "loss": 1.9859, "step": 18600500 }, { "epoch": 53.84, "learning_rate": 2.308853684234902e-05, "loss": 1.9957, "step": 18601000 }, { "epoch": 53.84, "learning_rate": 2.3087813194701743e-05, "loss": 1.9848, "step": 18601500 }, { "epoch": 53.85, "learning_rate": 2.3087089547054465e-05, "loss": 1.9688, "step": 18602000 }, { "epoch": 53.85, "learning_rate": 2.308636589940719e-05, "loss": 1.9789, "step": 18602500 }, { "epoch": 53.85, "learning_rate": 2.3085642251759913e-05, "loss": 2.0074, "step": 18603000 }, { "epoch": 53.85, "learning_rate": 2.3084918604112635e-05, "loss": 1.9633, "step": 18603500 }, { "epoch": 53.85, "learning_rate": 2.3084194956465357e-05, "loss": 1.9842, "step": 18604000 }, { "epoch": 53.85, "learning_rate": 2.3083471308818083e-05, "loss": 1.9528, "step": 18604500 }, { "epoch": 53.85, "learning_rate": 2.3082747661170805e-05, "loss": 1.9576, "step": 18605000 }, { "epoch": 53.86, "learning_rate": 2.3082025460818824e-05, "loss": 1.9821, "step": 18605500 }, { "epoch": 53.86, "learning_rate": 2.308130326046684e-05, "loss": 1.9856, "step": 18606000 }, { "epoch": 53.86, "learning_rate": 2.308058106011486e-05, "loss": 1.9824, "step": 18606500 }, { "epoch": 53.86, "learning_rate": 2.307985741246758e-05, "loss": 1.9942, "step": 18607000 }, { "epoch": 53.86, "learning_rate": 2.3079133764820303e-05, "loss": 1.9804, "step": 18607500 }, { "epoch": 53.86, "learning_rate": 2.307841011717303e-05, "loss": 1.9873, "step": 18608000 }, { "epoch": 53.86, "learning_rate": 2.3077686469525754e-05, "loss": 1.9831, "step": 18608500 }, { "epoch": 53.87, "learning_rate": 2.3076962821878477e-05, "loss": 1.9942, "step": 18609000 }, { "epoch": 53.87, "learning_rate": 2.30762391742312e-05, "loss": 1.9978, "step": 18609500 }, { "epoch": 53.87, "learning_rate": 2.307551552658392e-05, "loss": 1.9988, "step": 18610000 }, { "epoch": 53.87, "learning_rate": 2.3074794773527233e-05, "loss": 1.9962, "step": 18610500 }, { "epoch": 53.87, "learning_rate": 2.3074071125879955e-05, "loss": 1.9646, "step": 18611000 }, { "epoch": 53.87, "learning_rate": 2.3073347478232678e-05, "loss": 1.9864, "step": 18611500 }, { "epoch": 53.87, "learning_rate": 2.3072623830585403e-05, "loss": 2.0023, "step": 18612000 }, { "epoch": 53.88, "learning_rate": 2.307190018293813e-05, "loss": 1.9507, "step": 18612500 }, { "epoch": 53.88, "learning_rate": 2.307117653529085e-05, "loss": 2.0081, "step": 18613000 }, { "epoch": 53.88, "learning_rate": 2.3070454334938867e-05, "loss": 1.969, "step": 18613500 }, { "epoch": 53.88, "learning_rate": 2.3069730687291592e-05, "loss": 1.9804, "step": 18614000 }, { "epoch": 53.88, "learning_rate": 2.3069007039644315e-05, "loss": 1.999, "step": 18614500 }, { "epoch": 53.88, "learning_rate": 2.3068283391997037e-05, "loss": 1.9796, "step": 18615000 }, { "epoch": 53.88, "learning_rate": 2.306755974434976e-05, "loss": 1.9745, "step": 18615500 }, { "epoch": 53.89, "learning_rate": 2.306683609670248e-05, "loss": 1.97, "step": 18616000 }, { "epoch": 53.89, "learning_rate": 2.3066112449055207e-05, "loss": 1.9902, "step": 18616500 }, { "epoch": 53.89, "learning_rate": 2.306538880140793e-05, "loss": 1.985, "step": 18617000 }, { "epoch": 53.89, "learning_rate": 2.3064665153760655e-05, "loss": 1.9857, "step": 18617500 }, { "epoch": 53.89, "learning_rate": 2.3063941506113377e-05, "loss": 1.9919, "step": 18618000 }, { "epoch": 53.89, "learning_rate": 2.3063219305761393e-05, "loss": 2.0152, "step": 18618500 }, { "epoch": 53.89, "learning_rate": 2.306249710540941e-05, "loss": 1.9883, "step": 18619000 }, { "epoch": 53.9, "learning_rate": 2.306177490505743e-05, "loss": 1.9685, "step": 18619500 }, { "epoch": 53.9, "learning_rate": 2.3061051257410153e-05, "loss": 1.9622, "step": 18620000 }, { "epoch": 53.9, "learning_rate": 2.3060327609762878e-05, "loss": 1.99, "step": 18620500 }, { "epoch": 53.9, "learning_rate": 2.30596039621156e-05, "loss": 1.9843, "step": 18621000 }, { "epoch": 53.9, "learning_rate": 2.3058880314468323e-05, "loss": 1.9861, "step": 18621500 }, { "epoch": 53.9, "learning_rate": 2.3058156666821045e-05, "loss": 1.9717, "step": 18622000 }, { "epoch": 53.9, "learning_rate": 2.3057433019173767e-05, "loss": 1.9743, "step": 18622500 }, { "epoch": 53.91, "learning_rate": 2.3056709371526493e-05, "loss": 1.9953, "step": 18623000 }, { "epoch": 53.91, "learning_rate": 2.3055985723879215e-05, "loss": 1.9987, "step": 18623500 }, { "epoch": 53.91, "learning_rate": 2.3055263523527234e-05, "loss": 1.9885, "step": 18624000 }, { "epoch": 53.91, "learning_rate": 2.3054541323175253e-05, "loss": 1.9826, "step": 18624500 }, { "epoch": 53.91, "learning_rate": 2.3053817675527975e-05, "loss": 2.0006, "step": 18625000 }, { "epoch": 53.91, "learning_rate": 2.3053094027880697e-05, "loss": 1.9751, "step": 18625500 }, { "epoch": 53.91, "learning_rate": 2.305237038023342e-05, "loss": 1.9808, "step": 18626000 }, { "epoch": 53.92, "learning_rate": 2.3051646732586142e-05, "loss": 1.9922, "step": 18626500 }, { "epoch": 53.92, "learning_rate": 2.3050923084938868e-05, "loss": 1.9823, "step": 18627000 }, { "epoch": 53.92, "learning_rate": 2.3050200884586883e-05, "loss": 1.9734, "step": 18627500 }, { "epoch": 53.92, "learning_rate": 2.304947723693961e-05, "loss": 1.9958, "step": 18628000 }, { "epoch": 53.92, "learning_rate": 2.304875358929233e-05, "loss": 1.9657, "step": 18628500 }, { "epoch": 53.92, "learning_rate": 2.3048029941645057e-05, "loss": 1.9613, "step": 18629000 }, { "epoch": 53.92, "learning_rate": 2.304730629399778e-05, "loss": 2.0197, "step": 18629500 }, { "epoch": 53.93, "learning_rate": 2.30465826463505e-05, "loss": 1.9939, "step": 18630000 }, { "epoch": 53.93, "learning_rate": 2.3045858998703223e-05, "loss": 1.9585, "step": 18630500 }, { "epoch": 53.93, "learning_rate": 2.3045135351055946e-05, "loss": 1.9737, "step": 18631000 }, { "epoch": 53.93, "learning_rate": 2.304441170340867e-05, "loss": 1.9689, "step": 18631500 }, { "epoch": 53.93, "learning_rate": 2.3043688055761393e-05, "loss": 1.972, "step": 18632000 }, { "epoch": 53.93, "learning_rate": 2.304296440811412e-05, "loss": 1.9721, "step": 18632500 }, { "epoch": 53.93, "learning_rate": 2.304224076046684e-05, "loss": 1.9969, "step": 18633000 }, { "epoch": 53.94, "learning_rate": 2.3041518560114857e-05, "loss": 2.0207, "step": 18633500 }, { "epoch": 53.94, "learning_rate": 2.3040794912467582e-05, "loss": 1.9884, "step": 18634000 }, { "epoch": 53.94, "learning_rate": 2.3040071264820305e-05, "loss": 1.9912, "step": 18634500 }, { "epoch": 53.94, "learning_rate": 2.3039349064468324e-05, "loss": 2.0088, "step": 18635000 }, { "epoch": 53.94, "learning_rate": 2.3038625416821046e-05, "loss": 1.972, "step": 18635500 }, { "epoch": 53.94, "learning_rate": 2.303790176917377e-05, "loss": 1.9748, "step": 18636000 }, { "epoch": 53.95, "learning_rate": 2.3037179568821787e-05, "loss": 1.9882, "step": 18636500 }, { "epoch": 53.95, "learning_rate": 2.303645592117451e-05, "loss": 1.9838, "step": 18637000 }, { "epoch": 53.95, "learning_rate": 2.303573227352723e-05, "loss": 2.0027, "step": 18637500 }, { "epoch": 53.95, "learning_rate": 2.3035008625879957e-05, "loss": 1.9771, "step": 18638000 }, { "epoch": 53.95, "learning_rate": 2.303428497823268e-05, "loss": 1.9936, "step": 18638500 }, { "epoch": 53.95, "learning_rate": 2.3033561330585405e-05, "loss": 1.9601, "step": 18639000 }, { "epoch": 53.95, "learning_rate": 2.3032837682938127e-05, "loss": 1.975, "step": 18639500 }, { "epoch": 53.96, "learning_rate": 2.303211403529085e-05, "loss": 2.005, "step": 18640000 }, { "epoch": 53.96, "learning_rate": 2.303139183493887e-05, "loss": 1.9746, "step": 18640500 }, { "epoch": 53.96, "learning_rate": 2.303066818729159e-05, "loss": 1.9831, "step": 18641000 }, { "epoch": 53.96, "learning_rate": 2.3029944539644313e-05, "loss": 2.0014, "step": 18641500 }, { "epoch": 53.96, "learning_rate": 2.3029220891997035e-05, "loss": 1.9682, "step": 18642000 }, { "epoch": 53.96, "learning_rate": 2.302849724434976e-05, "loss": 1.9895, "step": 18642500 }, { "epoch": 53.96, "learning_rate": 2.302777504399778e-05, "loss": 1.9823, "step": 18643000 }, { "epoch": 53.97, "learning_rate": 2.3027051396350502e-05, "loss": 1.9855, "step": 18643500 }, { "epoch": 53.97, "learning_rate": 2.3026327748703224e-05, "loss": 1.9806, "step": 18644000 }, { "epoch": 53.97, "learning_rate": 2.3025604101055946e-05, "loss": 2.0, "step": 18644500 }, { "epoch": 53.97, "learning_rate": 2.3024880453408672e-05, "loss": 1.9613, "step": 18645000 }, { "epoch": 53.97, "learning_rate": 2.3024156805761394e-05, "loss": 1.9813, "step": 18645500 }, { "epoch": 53.97, "learning_rate": 2.302343315811412e-05, "loss": 1.9873, "step": 18646000 }, { "epoch": 53.97, "learning_rate": 2.3022709510466842e-05, "loss": 1.9753, "step": 18646500 }, { "epoch": 53.98, "learning_rate": 2.3021987310114858e-05, "loss": 1.9924, "step": 18647000 }, { "epoch": 53.98, "learning_rate": 2.3021263662467583e-05, "loss": 1.9912, "step": 18647500 }, { "epoch": 53.98, "learning_rate": 2.3020540014820305e-05, "loss": 1.9881, "step": 18648000 }, { "epoch": 53.98, "learning_rate": 2.3019816367173028e-05, "loss": 2.0051, "step": 18648500 }, { "epoch": 53.98, "learning_rate": 2.301909271952575e-05, "loss": 1.9798, "step": 18649000 }, { "epoch": 53.98, "learning_rate": 2.3018369071878472e-05, "loss": 1.9722, "step": 18649500 }, { "epoch": 53.98, "learning_rate": 2.3017645424231198e-05, "loss": 1.9945, "step": 18650000 }, { "epoch": 53.99, "learning_rate": 2.3016921776583923e-05, "loss": 1.9944, "step": 18650500 }, { "epoch": 53.99, "learning_rate": 2.3016198128936646e-05, "loss": 1.9719, "step": 18651000 }, { "epoch": 53.99, "learning_rate": 2.3015474481289368e-05, "loss": 2.0136, "step": 18651500 }, { "epoch": 53.99, "learning_rate": 2.301475083364209e-05, "loss": 1.9766, "step": 18652000 }, { "epoch": 53.99, "learning_rate": 2.3014027185994812e-05, "loss": 1.9841, "step": 18652500 }, { "epoch": 53.99, "learning_rate": 2.3013303538347535e-05, "loss": 1.9878, "step": 18653000 }, { "epoch": 53.99, "learning_rate": 2.3012581337995557e-05, "loss": 1.9761, "step": 18653500 }, { "epoch": 54.0, "learning_rate": 2.301185769034828e-05, "loss": 1.9663, "step": 18654000 }, { "epoch": 54.0, "learning_rate": 2.3011134042701e-05, "loss": 1.9654, "step": 18654500 }, { "epoch": 54.0, "learning_rate": 2.3010410395053724e-05, "loss": 1.99, "step": 18655000 }, { "epoch": 54.0, "eval_accuracy": 0.6748646316641627, "eval_accuracy_mlm": 0.6411324675857366, "eval_accuracy_nsp": 0.8557813495391694, "eval_loss": 2.1620771884918213, "eval_runtime": 331.6379, "eval_samples_per_second": 1315.851, "eval_steps_per_second": 54.828, "step": 18655488 }, { "epoch": 54.0, "learning_rate": 2.300968674740645e-05, "loss": 1.9749, "step": 18655500 }, { "epoch": 54.0, "learning_rate": 2.3008964547054465e-05, "loss": 1.989, "step": 18656000 }, { "epoch": 54.0, "learning_rate": 2.3008240899407187e-05, "loss": 1.9611, "step": 18656500 }, { "epoch": 54.0, "learning_rate": 2.300751725175991e-05, "loss": 1.9425, "step": 18657000 }, { "epoch": 54.01, "learning_rate": 2.3006793604112635e-05, "loss": 1.9418, "step": 18657500 }, { "epoch": 54.01, "learning_rate": 2.300606995646536e-05, "loss": 1.974, "step": 18658000 }, { "epoch": 54.01, "learning_rate": 2.3005346308818083e-05, "loss": 1.96, "step": 18658500 }, { "epoch": 54.01, "learning_rate": 2.3004624108466098e-05, "loss": 1.9959, "step": 18659000 }, { "epoch": 54.01, "learning_rate": 2.3003900460818824e-05, "loss": 1.9524, "step": 18659500 }, { "epoch": 54.01, "learning_rate": 2.300317826046684e-05, "loss": 1.9542, "step": 18660000 }, { "epoch": 54.01, "learning_rate": 2.300245461281956e-05, "loss": 1.9436, "step": 18660500 }, { "epoch": 54.02, "learning_rate": 2.3001730965172287e-05, "loss": 1.937, "step": 18661000 }, { "epoch": 54.02, "learning_rate": 2.3001007317525013e-05, "loss": 1.978, "step": 18661500 }, { "epoch": 54.02, "learning_rate": 2.3000283669877735e-05, "loss": 1.9311, "step": 18662000 }, { "epoch": 54.02, "learning_rate": 2.2999560022230457e-05, "loss": 1.961, "step": 18662500 }, { "epoch": 54.02, "learning_rate": 2.2998837821878473e-05, "loss": 1.9807, "step": 18663000 }, { "epoch": 54.02, "learning_rate": 2.29981141742312e-05, "loss": 1.9532, "step": 18663500 }, { "epoch": 54.02, "learning_rate": 2.299739052658392e-05, "loss": 1.9303, "step": 18664000 }, { "epoch": 54.03, "learning_rate": 2.2996666878936643e-05, "loss": 1.9717, "step": 18664500 }, { "epoch": 54.03, "learning_rate": 2.299594323128937e-05, "loss": 1.9747, "step": 18665000 }, { "epoch": 54.03, "learning_rate": 2.2995221030937388e-05, "loss": 1.9615, "step": 18665500 }, { "epoch": 54.03, "learning_rate": 2.299449738329011e-05, "loss": 1.9499, "step": 18666000 }, { "epoch": 54.03, "learning_rate": 2.2993775182938125e-05, "loss": 1.9542, "step": 18666500 }, { "epoch": 54.03, "learning_rate": 2.299305153529085e-05, "loss": 1.9658, "step": 18667000 }, { "epoch": 54.03, "learning_rate": 2.2992327887643573e-05, "loss": 1.9685, "step": 18667500 }, { "epoch": 54.04, "learning_rate": 2.2991604239996295e-05, "loss": 1.959, "step": 18668000 }, { "epoch": 54.04, "learning_rate": 2.299088059234902e-05, "loss": 1.9664, "step": 18668500 }, { "epoch": 54.04, "learning_rate": 2.2990156944701743e-05, "loss": 1.9978, "step": 18669000 }, { "epoch": 54.04, "learning_rate": 2.2989433297054466e-05, "loss": 1.9838, "step": 18669500 }, { "epoch": 54.04, "learning_rate": 2.2988711096702484e-05, "loss": 1.9701, "step": 18670000 }, { "epoch": 54.04, "learning_rate": 2.2987987449055207e-05, "loss": 1.9517, "step": 18670500 }, { "epoch": 54.04, "learning_rate": 2.298726380140793e-05, "loss": 1.9762, "step": 18671000 }, { "epoch": 54.05, "learning_rate": 2.298654015376065e-05, "loss": 1.9523, "step": 18671500 }, { "epoch": 54.05, "learning_rate": 2.2985816506113377e-05, "loss": 1.9649, "step": 18672000 }, { "epoch": 54.05, "learning_rate": 2.29850928584661e-05, "loss": 1.9701, "step": 18672500 }, { "epoch": 54.05, "learning_rate": 2.2984369210818825e-05, "loss": 1.9505, "step": 18673000 }, { "epoch": 54.05, "learning_rate": 2.2983645563171547e-05, "loss": 1.9537, "step": 18673500 }, { "epoch": 54.05, "learning_rate": 2.298292191552427e-05, "loss": 1.9605, "step": 18674000 }, { "epoch": 54.06, "learning_rate": 2.298219826787699e-05, "loss": 1.9759, "step": 18674500 }, { "epoch": 54.06, "learning_rate": 2.2981474620229714e-05, "loss": 1.9464, "step": 18675000 }, { "epoch": 54.06, "learning_rate": 2.2980752419877733e-05, "loss": 1.9613, "step": 18675500 }, { "epoch": 54.06, "learning_rate": 2.2980028772230458e-05, "loss": 1.9844, "step": 18676000 }, { "epoch": 54.06, "learning_rate": 2.297930512458318e-05, "loss": 1.9785, "step": 18676500 }, { "epoch": 54.06, "learning_rate": 2.2978581476935903e-05, "loss": 1.9745, "step": 18677000 }, { "epoch": 54.06, "learning_rate": 2.2977857829288625e-05, "loss": 1.977, "step": 18677500 }, { "epoch": 54.07, "learning_rate": 2.297713418164135e-05, "loss": 1.9492, "step": 18678000 }, { "epoch": 54.07, "learning_rate": 2.2976411981289366e-05, "loss": 1.9386, "step": 18678500 }, { "epoch": 54.07, "learning_rate": 2.2975689780937385e-05, "loss": 1.9818, "step": 18679000 }, { "epoch": 54.07, "learning_rate": 2.2974966133290107e-05, "loss": 1.9845, "step": 18679500 }, { "epoch": 54.07, "learning_rate": 2.2974242485642833e-05, "loss": 1.9707, "step": 18680000 }, { "epoch": 54.07, "learning_rate": 2.2973518837995555e-05, "loss": 1.9861, "step": 18680500 }, { "epoch": 54.07, "learning_rate": 2.2972795190348277e-05, "loss": 1.972, "step": 18681000 }, { "epoch": 54.08, "learning_rate": 2.2972071542701003e-05, "loss": 1.9857, "step": 18681500 }, { "epoch": 54.08, "learning_rate": 2.2971347895053725e-05, "loss": 1.997, "step": 18682000 }, { "epoch": 54.08, "learning_rate": 2.2970624247406447e-05, "loss": 1.9697, "step": 18682500 }, { "epoch": 54.08, "learning_rate": 2.296990059975917e-05, "loss": 1.9562, "step": 18683000 }, { "epoch": 54.08, "learning_rate": 2.2969176952111895e-05, "loss": 1.9566, "step": 18683500 }, { "epoch": 54.08, "learning_rate": 2.2968454751759914e-05, "loss": 1.9511, "step": 18684000 }, { "epoch": 54.08, "learning_rate": 2.2967731104112636e-05, "loss": 1.9855, "step": 18684500 }, { "epoch": 54.09, "learning_rate": 2.296700745646536e-05, "loss": 1.9849, "step": 18685000 }, { "epoch": 54.09, "learning_rate": 2.296628380881808e-05, "loss": 1.9792, "step": 18685500 }, { "epoch": 54.09, "learning_rate": 2.2965560161170803e-05, "loss": 1.9504, "step": 18686000 }, { "epoch": 54.09, "learning_rate": 2.296483651352353e-05, "loss": 1.9719, "step": 18686500 }, { "epoch": 54.09, "learning_rate": 2.2964112865876254e-05, "loss": 1.9783, "step": 18687000 }, { "epoch": 54.09, "learning_rate": 2.2963389218228977e-05, "loss": 1.9833, "step": 18687500 }, { "epoch": 54.09, "learning_rate": 2.29626655705817e-05, "loss": 1.9791, "step": 18688000 }, { "epoch": 54.1, "learning_rate": 2.2961943370229714e-05, "loss": 1.9724, "step": 18688500 }, { "epoch": 54.1, "learning_rate": 2.296121972258244e-05, "loss": 1.9606, "step": 18689000 }, { "epoch": 54.1, "learning_rate": 2.2960497522230456e-05, "loss": 1.95, "step": 18689500 }, { "epoch": 54.1, "learning_rate": 2.2959773874583178e-05, "loss": 1.9931, "step": 18690000 }, { "epoch": 54.1, "learning_rate": 2.2959050226935903e-05, "loss": 1.9732, "step": 18690500 }, { "epoch": 54.1, "learning_rate": 2.295832657928863e-05, "loss": 1.9623, "step": 18691000 }, { "epoch": 54.1, "learning_rate": 2.295760293164135e-05, "loss": 1.9422, "step": 18691500 }, { "epoch": 54.11, "learning_rate": 2.2956880731289367e-05, "loss": 1.9926, "step": 18692000 }, { "epoch": 54.11, "learning_rate": 2.2956157083642092e-05, "loss": 1.9801, "step": 18692500 }, { "epoch": 54.11, "learning_rate": 2.2955433435994815e-05, "loss": 1.9738, "step": 18693000 }, { "epoch": 54.11, "learning_rate": 2.2954709788347537e-05, "loss": 1.9774, "step": 18693500 }, { "epoch": 54.11, "learning_rate": 2.2953987587995552e-05, "loss": 1.971, "step": 18694000 }, { "epoch": 54.11, "learning_rate": 2.2953263940348278e-05, "loss": 1.9697, "step": 18694500 }, { "epoch": 54.11, "learning_rate": 2.2952540292701004e-05, "loss": 1.9626, "step": 18695000 }, { "epoch": 54.12, "learning_rate": 2.2951816645053726e-05, "loss": 1.9507, "step": 18695500 }, { "epoch": 54.12, "learning_rate": 2.2951092997406448e-05, "loss": 1.9681, "step": 18696000 }, { "epoch": 54.12, "learning_rate": 2.295036934975917e-05, "loss": 1.9865, "step": 18696500 }, { "epoch": 54.12, "learning_rate": 2.2949645702111893e-05, "loss": 1.9749, "step": 18697000 }, { "epoch": 54.12, "learning_rate": 2.2948922054464618e-05, "loss": 1.9684, "step": 18697500 }, { "epoch": 54.12, "learning_rate": 2.2948198406817344e-05, "loss": 1.9572, "step": 18698000 }, { "epoch": 54.12, "learning_rate": 2.2947474759170066e-05, "loss": 1.9998, "step": 18698500 }, { "epoch": 54.13, "learning_rate": 2.294675111152279e-05, "loss": 1.9766, "step": 18699000 }, { "epoch": 54.13, "learning_rate": 2.294602746387551e-05, "loss": 1.9736, "step": 18699500 }, { "epoch": 54.13, "learning_rate": 2.2945303816228233e-05, "loss": 1.9768, "step": 18700000 }, { "epoch": 54.13, "learning_rate": 2.2944581615876252e-05, "loss": 1.9658, "step": 18700500 }, { "epoch": 54.13, "learning_rate": 2.2943857968228974e-05, "loss": 1.9635, "step": 18701000 }, { "epoch": 54.13, "learning_rate": 2.2943135767876993e-05, "loss": 1.9801, "step": 18701500 }, { "epoch": 54.13, "learning_rate": 2.294241212022972e-05, "loss": 1.9434, "step": 18702000 }, { "epoch": 54.14, "learning_rate": 2.294168847258244e-05, "loss": 1.965, "step": 18702500 }, { "epoch": 54.14, "learning_rate": 2.2940964824935163e-05, "loss": 1.9571, "step": 18703000 }, { "epoch": 54.14, "learning_rate": 2.2940241177287885e-05, "loss": 1.9681, "step": 18703500 }, { "epoch": 54.14, "learning_rate": 2.2939518976935904e-05, "loss": 1.9707, "step": 18704000 }, { "epoch": 54.14, "learning_rate": 2.293879677658392e-05, "loss": 1.9979, "step": 18704500 }, { "epoch": 54.14, "learning_rate": 2.2938073128936642e-05, "loss": 1.9697, "step": 18705000 }, { "epoch": 54.14, "learning_rate": 2.2937349481289368e-05, "loss": 1.9726, "step": 18705500 }, { "epoch": 54.15, "learning_rate": 2.2936625833642093e-05, "loss": 1.982, "step": 18706000 }, { "epoch": 54.15, "learning_rate": 2.2935902185994815e-05, "loss": 1.9816, "step": 18706500 }, { "epoch": 54.15, "learning_rate": 2.2935178538347538e-05, "loss": 1.9581, "step": 18707000 }, { "epoch": 54.15, "learning_rate": 2.293445489070026e-05, "loss": 1.9768, "step": 18707500 }, { "epoch": 54.15, "learning_rate": 2.2933731243052982e-05, "loss": 1.9745, "step": 18708000 }, { "epoch": 54.15, "learning_rate": 2.2933007595405704e-05, "loss": 1.9767, "step": 18708500 }, { "epoch": 54.15, "learning_rate": 2.293228394775843e-05, "loss": 1.9503, "step": 18709000 }, { "epoch": 54.16, "learning_rate": 2.2931560300111156e-05, "loss": 1.953, "step": 18709500 }, { "epoch": 54.16, "learning_rate": 2.2930836652463878e-05, "loss": 1.9805, "step": 18710000 }, { "epoch": 54.16, "learning_rate": 2.29301130048166e-05, "loss": 1.9731, "step": 18710500 }, { "epoch": 54.16, "learning_rate": 2.2929389357169322e-05, "loss": 1.9517, "step": 18711000 }, { "epoch": 54.16, "learning_rate": 2.292866715681734e-05, "loss": 1.9836, "step": 18711500 }, { "epoch": 54.16, "learning_rate": 2.2927943509170064e-05, "loss": 1.9705, "step": 18712000 }, { "epoch": 54.17, "learning_rate": 2.2927219861522786e-05, "loss": 1.9788, "step": 18712500 }, { "epoch": 54.17, "learning_rate": 2.292649621387551e-05, "loss": 1.9633, "step": 18713000 }, { "epoch": 54.17, "learning_rate": 2.292577401352353e-05, "loss": 1.9693, "step": 18713500 }, { "epoch": 54.17, "learning_rate": 2.2925050365876253e-05, "loss": 1.9745, "step": 18714000 }, { "epoch": 54.17, "learning_rate": 2.2924326718228975e-05, "loss": 1.9628, "step": 18714500 }, { "epoch": 54.17, "learning_rate": 2.2923603070581697e-05, "loss": 1.9419, "step": 18715000 }, { "epoch": 54.17, "learning_rate": 2.292288231752501e-05, "loss": 1.9785, "step": 18715500 }, { "epoch": 54.18, "learning_rate": 2.292215866987773e-05, "loss": 1.9875, "step": 18716000 }, { "epoch": 54.18, "learning_rate": 2.2921435022230457e-05, "loss": 1.971, "step": 18716500 }, { "epoch": 54.18, "learning_rate": 2.2920711374583183e-05, "loss": 1.9516, "step": 18717000 }, { "epoch": 54.18, "learning_rate": 2.2919987726935905e-05, "loss": 1.9766, "step": 18717500 }, { "epoch": 54.18, "learning_rate": 2.291926552658392e-05, "loss": 1.9938, "step": 18718000 }, { "epoch": 54.18, "learning_rate": 2.291854332623194e-05, "loss": 1.9883, "step": 18718500 }, { "epoch": 54.18, "learning_rate": 2.2917819678584662e-05, "loss": 1.9766, "step": 18719000 }, { "epoch": 54.19, "learning_rate": 2.2917096030937384e-05, "loss": 1.9667, "step": 18719500 }, { "epoch": 54.19, "learning_rate": 2.2916372383290106e-05, "loss": 1.9927, "step": 18720000 }, { "epoch": 54.19, "learning_rate": 2.2915648735642832e-05, "loss": 1.9991, "step": 18720500 }, { "epoch": 54.19, "learning_rate": 2.2914925087995557e-05, "loss": 1.98, "step": 18721000 }, { "epoch": 54.19, "learning_rate": 2.291420144034828e-05, "loss": 1.9589, "step": 18721500 }, { "epoch": 54.19, "learning_rate": 2.2913477792701002e-05, "loss": 1.9911, "step": 18722000 }, { "epoch": 54.19, "learning_rate": 2.2912754145053724e-05, "loss": 1.9764, "step": 18722500 }, { "epoch": 54.2, "learning_rate": 2.2912030497406446e-05, "loss": 1.9558, "step": 18723000 }, { "epoch": 54.2, "learning_rate": 2.2911308297054465e-05, "loss": 1.9424, "step": 18723500 }, { "epoch": 54.2, "learning_rate": 2.2910584649407188e-05, "loss": 1.9339, "step": 18724000 }, { "epoch": 54.2, "learning_rate": 2.2909862449055207e-05, "loss": 1.9473, "step": 18724500 }, { "epoch": 54.2, "learning_rate": 2.2909138801407932e-05, "loss": 1.9723, "step": 18725000 }, { "epoch": 54.2, "learning_rate": 2.2908416601055948e-05, "loss": 1.9499, "step": 18725500 }, { "epoch": 54.2, "learning_rate": 2.290769295340867e-05, "loss": 1.9573, "step": 18726000 }, { "epoch": 54.21, "learning_rate": 2.2906969305761396e-05, "loss": 1.9441, "step": 18726500 }, { "epoch": 54.21, "learning_rate": 2.2906245658114118e-05, "loss": 1.9648, "step": 18727000 }, { "epoch": 54.21, "learning_rate": 2.290552201046684e-05, "loss": 1.9646, "step": 18727500 }, { "epoch": 54.21, "learning_rate": 2.2904798362819562e-05, "loss": 1.9873, "step": 18728000 }, { "epoch": 54.21, "learning_rate": 2.2904074715172288e-05, "loss": 1.9569, "step": 18728500 }, { "epoch": 54.21, "learning_rate": 2.290335106752501e-05, "loss": 1.9871, "step": 18729000 }, { "epoch": 54.21, "learning_rate": 2.2902627419877736e-05, "loss": 1.9619, "step": 18729500 }, { "epoch": 54.22, "learning_rate": 2.290190521952575e-05, "loss": 1.9702, "step": 18730000 }, { "epoch": 54.22, "learning_rate": 2.2901181571878474e-05, "loss": 1.9613, "step": 18730500 }, { "epoch": 54.22, "learning_rate": 2.2900457924231196e-05, "loss": 1.9746, "step": 18731000 }, { "epoch": 54.22, "learning_rate": 2.289973427658392e-05, "loss": 1.9629, "step": 18731500 }, { "epoch": 54.22, "learning_rate": 2.2899010628936644e-05, "loss": 1.9776, "step": 18732000 }, { "epoch": 54.22, "learning_rate": 2.289828698128937e-05, "loss": 1.9721, "step": 18732500 }, { "epoch": 54.22, "learning_rate": 2.289756333364209e-05, "loss": 1.9817, "step": 18733000 }, { "epoch": 54.23, "learning_rate": 2.2896839685994814e-05, "loss": 1.9654, "step": 18733500 }, { "epoch": 54.23, "learning_rate": 2.2896116038347536e-05, "loss": 1.9739, "step": 18734000 }, { "epoch": 54.23, "learning_rate": 2.2895393837995555e-05, "loss": 1.9686, "step": 18734500 }, { "epoch": 54.23, "learning_rate": 2.2894670190348277e-05, "loss": 1.9627, "step": 18735000 }, { "epoch": 54.23, "learning_rate": 2.2893946542701e-05, "loss": 1.9513, "step": 18735500 }, { "epoch": 54.23, "learning_rate": 2.289322434234902e-05, "loss": 1.952, "step": 18736000 }, { "epoch": 54.23, "learning_rate": 2.2892500694701744e-05, "loss": 1.965, "step": 18736500 }, { "epoch": 54.24, "learning_rate": 2.2891777047054466e-05, "loss": 1.9408, "step": 18737000 }, { "epoch": 54.24, "learning_rate": 2.289105339940719e-05, "loss": 1.97, "step": 18737500 }, { "epoch": 54.24, "learning_rate": 2.289032975175991e-05, "loss": 1.9741, "step": 18738000 }, { "epoch": 54.24, "learning_rate": 2.288960755140793e-05, "loss": 1.9617, "step": 18738500 }, { "epoch": 54.24, "learning_rate": 2.2888883903760652e-05, "loss": 1.993, "step": 18739000 }, { "epoch": 54.24, "learning_rate": 2.2888160256113374e-05, "loss": 1.99, "step": 18739500 }, { "epoch": 54.24, "learning_rate": 2.28874366084661e-05, "loss": 1.9614, "step": 18740000 }, { "epoch": 54.25, "learning_rate": 2.2886712960818822e-05, "loss": 1.9825, "step": 18740500 }, { "epoch": 54.25, "learning_rate": 2.2885989313171547e-05, "loss": 1.9864, "step": 18741000 }, { "epoch": 54.25, "learning_rate": 2.288526566552427e-05, "loss": 1.9846, "step": 18741500 }, { "epoch": 54.25, "learning_rate": 2.2884542017876992e-05, "loss": 1.9706, "step": 18742000 }, { "epoch": 54.25, "learning_rate": 2.288381981752501e-05, "loss": 1.9504, "step": 18742500 }, { "epoch": 54.25, "learning_rate": 2.2883096169877733e-05, "loss": 1.9685, "step": 18743000 }, { "epoch": 54.25, "learning_rate": 2.288237252223046e-05, "loss": 1.9769, "step": 18743500 }, { "epoch": 54.26, "learning_rate": 2.288164887458318e-05, "loss": 1.9631, "step": 18744000 }, { "epoch": 54.26, "learning_rate": 2.2880925226935903e-05, "loss": 1.9563, "step": 18744500 }, { "epoch": 54.26, "learning_rate": 2.2880201579288625e-05, "loss": 1.9756, "step": 18745000 }, { "epoch": 54.26, "learning_rate": 2.2879477931641348e-05, "loss": 1.9615, "step": 18745500 }, { "epoch": 54.26, "learning_rate": 2.2878754283994073e-05, "loss": 1.9712, "step": 18746000 }, { "epoch": 54.26, "learning_rate": 2.287803208364209e-05, "loss": 1.9825, "step": 18746500 }, { "epoch": 54.26, "learning_rate": 2.2877308435994814e-05, "loss": 1.9642, "step": 18747000 }, { "epoch": 54.27, "learning_rate": 2.2876584788347537e-05, "loss": 1.9806, "step": 18747500 }, { "epoch": 54.27, "learning_rate": 2.2875862587995556e-05, "loss": 1.9813, "step": 18748000 }, { "epoch": 54.27, "learning_rate": 2.2875138940348278e-05, "loss": 1.9832, "step": 18748500 }, { "epoch": 54.27, "learning_rate": 2.2874415292701e-05, "loss": 1.9737, "step": 18749000 }, { "epoch": 54.27, "learning_rate": 2.2873691645053726e-05, "loss": 1.9814, "step": 18749500 }, { "epoch": 54.27, "learning_rate": 2.2872967997406448e-05, "loss": 1.9765, "step": 18750000 }, { "epoch": 54.28, "learning_rate": 2.287224434975917e-05, "loss": 2.0037, "step": 18750500 }, { "epoch": 54.28, "learning_rate": 2.2871520702111896e-05, "loss": 1.979, "step": 18751000 }, { "epoch": 54.28, "learning_rate": 2.287079850175991e-05, "loss": 1.9738, "step": 18751500 }, { "epoch": 54.28, "learning_rate": 2.2870074854112637e-05, "loss": 1.9666, "step": 18752000 }, { "epoch": 54.28, "learning_rate": 2.286935120646536e-05, "loss": 1.9738, "step": 18752500 }, { "epoch": 54.28, "learning_rate": 2.2868629006113375e-05, "loss": 1.9733, "step": 18753000 }, { "epoch": 54.28, "learning_rate": 2.28679053584661e-05, "loss": 1.9713, "step": 18753500 }, { "epoch": 54.29, "learning_rate": 2.2867181710818823e-05, "loss": 1.9857, "step": 18754000 }, { "epoch": 54.29, "learning_rate": 2.2866458063171548e-05, "loss": 1.9733, "step": 18754500 }, { "epoch": 54.29, "learning_rate": 2.286573441552427e-05, "loss": 2.0092, "step": 18755000 }, { "epoch": 54.29, "learning_rate": 2.286501221517229e-05, "loss": 1.9798, "step": 18755500 }, { "epoch": 54.29, "learning_rate": 2.286428856752501e-05, "loss": 1.9647, "step": 18756000 }, { "epoch": 54.29, "learning_rate": 2.2863564919877734e-05, "loss": 1.971, "step": 18756500 }, { "epoch": 54.29, "learning_rate": 2.2862841272230456e-05, "loss": 1.9652, "step": 18757000 }, { "epoch": 54.3, "learning_rate": 2.286211762458318e-05, "loss": 1.9859, "step": 18757500 }, { "epoch": 54.3, "learning_rate": 2.28613939769359e-05, "loss": 1.9703, "step": 18758000 }, { "epoch": 54.3, "learning_rate": 2.2860671776583923e-05, "loss": 1.9667, "step": 18758500 }, { "epoch": 54.3, "learning_rate": 2.2859948128936645e-05, "loss": 1.9545, "step": 18759000 }, { "epoch": 54.3, "learning_rate": 2.2859224481289367e-05, "loss": 1.9872, "step": 18759500 }, { "epoch": 54.3, "learning_rate": 2.285850083364209e-05, "loss": 1.9827, "step": 18760000 }, { "epoch": 54.3, "learning_rate": 2.285777863329011e-05, "loss": 1.9698, "step": 18760500 }, { "epoch": 54.31, "learning_rate": 2.285705498564283e-05, "loss": 1.9914, "step": 18761000 }, { "epoch": 54.31, "learning_rate": 2.2856331337995553e-05, "loss": 1.9836, "step": 18761500 }, { "epoch": 54.31, "learning_rate": 2.2855607690348275e-05, "loss": 1.9744, "step": 18762000 }, { "epoch": 54.31, "learning_rate": 2.2854884042701e-05, "loss": 1.9856, "step": 18762500 }, { "epoch": 54.31, "learning_rate": 2.285416184234902e-05, "loss": 1.9496, "step": 18763000 }, { "epoch": 54.31, "learning_rate": 2.2853438194701742e-05, "loss": 2.0061, "step": 18763500 }, { "epoch": 54.31, "learning_rate": 2.2852714547054464e-05, "loss": 1.9877, "step": 18764000 }, { "epoch": 54.32, "learning_rate": 2.285199089940719e-05, "loss": 1.9656, "step": 18764500 }, { "epoch": 54.32, "learning_rate": 2.2851267251759912e-05, "loss": 1.9677, "step": 18765000 }, { "epoch": 54.32, "learning_rate": 2.2850543604112634e-05, "loss": 1.9717, "step": 18765500 }, { "epoch": 54.32, "learning_rate": 2.284981995646536e-05, "loss": 1.9945, "step": 18766000 }, { "epoch": 54.32, "learning_rate": 2.2849096308818082e-05, "loss": 1.9732, "step": 18766500 }, { "epoch": 54.32, "learning_rate": 2.2848372661170805e-05, "loss": 1.9514, "step": 18767000 }, { "epoch": 54.32, "learning_rate": 2.2847649013523527e-05, "loss": 1.9702, "step": 18767500 }, { "epoch": 54.33, "learning_rate": 2.2846925365876252e-05, "loss": 2.014, "step": 18768000 }, { "epoch": 54.33, "learning_rate": 2.2846201718228975e-05, "loss": 1.9648, "step": 18768500 }, { "epoch": 54.33, "learning_rate": 2.2845478070581697e-05, "loss": 1.9746, "step": 18769000 }, { "epoch": 54.33, "learning_rate": 2.284475731752501e-05, "loss": 1.9629, "step": 18769500 }, { "epoch": 54.33, "learning_rate": 2.2844033669877735e-05, "loss": 1.9798, "step": 18770000 }, { "epoch": 54.33, "learning_rate": 2.2843310022230457e-05, "loss": 1.9898, "step": 18770500 }, { "epoch": 54.33, "learning_rate": 2.2842587821878476e-05, "loss": 1.9612, "step": 18771000 }, { "epoch": 54.34, "learning_rate": 2.2841864174231198e-05, "loss": 1.9806, "step": 18771500 }, { "epoch": 54.34, "learning_rate": 2.284114052658392e-05, "loss": 1.9766, "step": 18772000 }, { "epoch": 54.34, "learning_rate": 2.2840416878936643e-05, "loss": 2.0022, "step": 18772500 }, { "epoch": 54.34, "learning_rate": 2.2839693231289365e-05, "loss": 1.9966, "step": 18773000 }, { "epoch": 54.34, "learning_rate": 2.283896958364209e-05, "loss": 1.9809, "step": 18773500 }, { "epoch": 54.34, "learning_rate": 2.2838245935994816e-05, "loss": 1.9697, "step": 18774000 }, { "epoch": 54.34, "learning_rate": 2.2837522288347538e-05, "loss": 1.9675, "step": 18774500 }, { "epoch": 54.35, "learning_rate": 2.2836800087995554e-05, "loss": 1.9566, "step": 18775000 }, { "epoch": 54.35, "learning_rate": 2.283607644034828e-05, "loss": 1.9554, "step": 18775500 }, { "epoch": 54.35, "learning_rate": 2.2835352792701002e-05, "loss": 1.9611, "step": 18776000 }, { "epoch": 54.35, "learning_rate": 2.2834629145053724e-05, "loss": 1.9823, "step": 18776500 }, { "epoch": 54.35, "learning_rate": 2.283390549740645e-05, "loss": 1.9743, "step": 18777000 }, { "epoch": 54.35, "learning_rate": 2.2833181849759172e-05, "loss": 1.9487, "step": 18777500 }, { "epoch": 54.35, "learning_rate": 2.2832461096702484e-05, "loss": 1.9819, "step": 18778000 }, { "epoch": 54.36, "learning_rate": 2.2831737449055206e-05, "loss": 1.9609, "step": 18778500 }, { "epoch": 54.36, "learning_rate": 2.283101380140793e-05, "loss": 1.9679, "step": 18779000 }, { "epoch": 54.36, "learning_rate": 2.2830290153760654e-05, "loss": 1.984, "step": 18779500 }, { "epoch": 54.36, "learning_rate": 2.2829566506113376e-05, "loss": 1.9903, "step": 18780000 }, { "epoch": 54.36, "learning_rate": 2.28288428584661e-05, "loss": 1.962, "step": 18780500 }, { "epoch": 54.36, "learning_rate": 2.2828119210818824e-05, "loss": 1.9751, "step": 18781000 }, { "epoch": 54.36, "learning_rate": 2.2827395563171546e-05, "loss": 1.9589, "step": 18781500 }, { "epoch": 54.37, "learning_rate": 2.282667191552427e-05, "loss": 1.9688, "step": 18782000 }, { "epoch": 54.37, "learning_rate": 2.282594826787699e-05, "loss": 1.9845, "step": 18782500 }, { "epoch": 54.37, "learning_rate": 2.2825224620229717e-05, "loss": 1.9986, "step": 18783000 }, { "epoch": 54.37, "learning_rate": 2.282450097258244e-05, "loss": 1.9815, "step": 18783500 }, { "epoch": 54.37, "learning_rate": 2.282377732493516e-05, "loss": 1.9611, "step": 18784000 }, { "epoch": 54.37, "learning_rate": 2.2823053677287887e-05, "loss": 1.9622, "step": 18784500 }, { "epoch": 54.37, "learning_rate": 2.28223329242312e-05, "loss": 1.9862, "step": 18785000 }, { "epoch": 54.38, "learning_rate": 2.282160927658392e-05, "loss": 1.9727, "step": 18785500 }, { "epoch": 54.38, "learning_rate": 2.2820885628936643e-05, "loss": 1.9855, "step": 18786000 }, { "epoch": 54.38, "learning_rate": 2.282016198128937e-05, "loss": 1.9647, "step": 18786500 }, { "epoch": 54.38, "learning_rate": 2.281943833364209e-05, "loss": 1.981, "step": 18787000 }, { "epoch": 54.38, "learning_rate": 2.2818714685994813e-05, "loss": 1.9714, "step": 18787500 }, { "epoch": 54.38, "learning_rate": 2.2817991038347536e-05, "loss": 1.9717, "step": 18788000 }, { "epoch": 54.39, "learning_rate": 2.281726739070026e-05, "loss": 1.9502, "step": 18788500 }, { "epoch": 54.39, "learning_rate": 2.2816543743052984e-05, "loss": 1.9694, "step": 18789000 }, { "epoch": 54.39, "learning_rate": 2.2815820095405706e-05, "loss": 1.979, "step": 18789500 }, { "epoch": 54.39, "learning_rate": 2.281509644775843e-05, "loss": 1.9813, "step": 18790000 }, { "epoch": 54.39, "learning_rate": 2.2814374247406447e-05, "loss": 1.9739, "step": 18790500 }, { "epoch": 54.39, "learning_rate": 2.281365059975917e-05, "loss": 1.9836, "step": 18791000 }, { "epoch": 54.39, "learning_rate": 2.2812926952111895e-05, "loss": 1.9708, "step": 18791500 }, { "epoch": 54.4, "learning_rate": 2.281220330446462e-05, "loss": 2.0082, "step": 18792000 }, { "epoch": 54.4, "learning_rate": 2.2811479656817343e-05, "loss": 1.9833, "step": 18792500 }, { "epoch": 54.4, "learning_rate": 2.2810756009170065e-05, "loss": 1.9575, "step": 18793000 }, { "epoch": 54.4, "learning_rate": 2.2810032361522787e-05, "loss": 1.9952, "step": 18793500 }, { "epoch": 54.4, "learning_rate": 2.280930871387551e-05, "loss": 1.9716, "step": 18794000 }, { "epoch": 54.4, "learning_rate": 2.280858651352353e-05, "loss": 1.9765, "step": 18794500 }, { "epoch": 54.4, "learning_rate": 2.280786286587625e-05, "loss": 1.9636, "step": 18795000 }, { "epoch": 54.41, "learning_rate": 2.2807139218228976e-05, "loss": 1.9598, "step": 18795500 }, { "epoch": 54.41, "learning_rate": 2.28064155705817e-05, "loss": 1.9703, "step": 18796000 }, { "epoch": 54.41, "learning_rate": 2.280569192293442e-05, "loss": 2.0163, "step": 18796500 }, { "epoch": 54.41, "learning_rate": 2.280496972258244e-05, "loss": 1.989, "step": 18797000 }, { "epoch": 54.41, "learning_rate": 2.2804246074935162e-05, "loss": 1.9651, "step": 18797500 }, { "epoch": 54.41, "learning_rate": 2.280352387458318e-05, "loss": 2.0077, "step": 18798000 }, { "epoch": 54.41, "learning_rate": 2.2802800226935903e-05, "loss": 1.9874, "step": 18798500 }, { "epoch": 54.42, "learning_rate": 2.2802076579288625e-05, "loss": 1.9593, "step": 18799000 }, { "epoch": 54.42, "learning_rate": 2.280135293164135e-05, "loss": 1.9531, "step": 18799500 }, { "epoch": 54.42, "learning_rate": 2.2800629283994073e-05, "loss": 1.9872, "step": 18800000 }, { "epoch": 54.42, "learning_rate": 2.2799907083642092e-05, "loss": 1.9739, "step": 18800500 }, { "epoch": 54.42, "learning_rate": 2.2799183435994814e-05, "loss": 1.99, "step": 18801000 }, { "epoch": 54.42, "learning_rate": 2.2798459788347537e-05, "loss": 1.9718, "step": 18801500 }, { "epoch": 54.42, "learning_rate": 2.279773614070026e-05, "loss": 1.9671, "step": 18802000 }, { "epoch": 54.43, "learning_rate": 2.2797012493052984e-05, "loss": 1.9764, "step": 18802500 }, { "epoch": 54.43, "learning_rate": 2.2796288845405707e-05, "loss": 1.9854, "step": 18803000 }, { "epoch": 54.43, "learning_rate": 2.2795565197758432e-05, "loss": 1.9693, "step": 18803500 }, { "epoch": 54.43, "learning_rate": 2.2794841550111154e-05, "loss": 1.9773, "step": 18804000 }, { "epoch": 54.43, "learning_rate": 2.2794117902463877e-05, "loss": 1.9767, "step": 18804500 }, { "epoch": 54.43, "learning_rate": 2.27933942548166e-05, "loss": 1.9716, "step": 18805000 }, { "epoch": 54.43, "learning_rate": 2.279267060716932e-05, "loss": 1.9722, "step": 18805500 }, { "epoch": 54.44, "learning_rate": 2.279194840681734e-05, "loss": 1.9802, "step": 18806000 }, { "epoch": 54.44, "learning_rate": 2.2791224759170062e-05, "loss": 1.9821, "step": 18806500 }, { "epoch": 54.44, "learning_rate": 2.2790501111522788e-05, "loss": 1.9768, "step": 18807000 }, { "epoch": 54.44, "learning_rate": 2.278977746387551e-05, "loss": 1.9722, "step": 18807500 }, { "epoch": 54.44, "learning_rate": 2.2789053816228232e-05, "loss": 1.9805, "step": 18808000 }, { "epoch": 54.44, "learning_rate": 2.2788330168580958e-05, "loss": 1.9882, "step": 18808500 }, { "epoch": 54.44, "learning_rate": 2.2787607968228974e-05, "loss": 1.982, "step": 18809000 }, { "epoch": 54.45, "learning_rate": 2.2786885767876993e-05, "loss": 1.957, "step": 18809500 }, { "epoch": 54.45, "learning_rate": 2.2786162120229715e-05, "loss": 1.9722, "step": 18810000 }, { "epoch": 54.45, "learning_rate": 2.2785438472582437e-05, "loss": 1.9907, "step": 18810500 }, { "epoch": 54.45, "learning_rate": 2.2784714824935163e-05, "loss": 1.973, "step": 18811000 }, { "epoch": 54.45, "learning_rate": 2.2783991177287885e-05, "loss": 1.9756, "step": 18811500 }, { "epoch": 54.45, "learning_rate": 2.278326752964061e-05, "loss": 1.9753, "step": 18812000 }, { "epoch": 54.45, "learning_rate": 2.2782543881993333e-05, "loss": 1.9572, "step": 18812500 }, { "epoch": 54.46, "learning_rate": 2.2781820234346055e-05, "loss": 1.9593, "step": 18813000 }, { "epoch": 54.46, "learning_rate": 2.2781096586698777e-05, "loss": 1.9895, "step": 18813500 }, { "epoch": 54.46, "learning_rate": 2.27803729390515e-05, "loss": 1.9602, "step": 18814000 }, { "epoch": 54.46, "learning_rate": 2.2779650738699522e-05, "loss": 1.9769, "step": 18814500 }, { "epoch": 54.46, "learning_rate": 2.2778927091052244e-05, "loss": 1.9509, "step": 18815000 }, { "epoch": 54.46, "learning_rate": 2.2778203443404966e-05, "loss": 1.9792, "step": 18815500 }, { "epoch": 54.46, "learning_rate": 2.2777481243052985e-05, "loss": 1.9973, "step": 18816000 }, { "epoch": 54.47, "learning_rate": 2.2776757595405707e-05, "loss": 1.9753, "step": 18816500 }, { "epoch": 54.47, "learning_rate": 2.2776035395053723e-05, "loss": 1.9918, "step": 18817000 }, { "epoch": 54.47, "learning_rate": 2.277531174740645e-05, "loss": 1.9593, "step": 18817500 }, { "epoch": 54.47, "learning_rate": 2.277458809975917e-05, "loss": 1.9867, "step": 18818000 }, { "epoch": 54.47, "learning_rate": 2.277386589940719e-05, "loss": 1.9606, "step": 18818500 }, { "epoch": 54.47, "learning_rate": 2.2773142251759912e-05, "loss": 1.9677, "step": 18819000 }, { "epoch": 54.47, "learning_rate": 2.2772418604112634e-05, "loss": 1.9882, "step": 18819500 }, { "epoch": 54.48, "learning_rate": 2.277169495646536e-05, "loss": 1.9798, "step": 18820000 }, { "epoch": 54.48, "learning_rate": 2.2770971308818082e-05, "loss": 1.9557, "step": 18820500 }, { "epoch": 54.48, "learning_rate": 2.2770247661170804e-05, "loss": 1.9491, "step": 18821000 }, { "epoch": 54.48, "learning_rate": 2.2769524013523527e-05, "loss": 1.9597, "step": 18821500 }, { "epoch": 54.48, "learning_rate": 2.276880326046684e-05, "loss": 1.9551, "step": 18822000 }, { "epoch": 54.48, "learning_rate": 2.2768079612819564e-05, "loss": 1.968, "step": 18822500 }, { "epoch": 54.48, "learning_rate": 2.2767355965172287e-05, "loss": 1.9663, "step": 18823000 }, { "epoch": 54.49, "learning_rate": 2.2766632317525012e-05, "loss": 1.9767, "step": 18823500 }, { "epoch": 54.49, "learning_rate": 2.2765908669877735e-05, "loss": 1.9947, "step": 18824000 }, { "epoch": 54.49, "learning_rate": 2.2765185022230457e-05, "loss": 1.9667, "step": 18824500 }, { "epoch": 54.49, "learning_rate": 2.276446137458318e-05, "loss": 2.0037, "step": 18825000 }, { "epoch": 54.49, "learning_rate": 2.27637377269359e-05, "loss": 1.9692, "step": 18825500 }, { "epoch": 54.49, "learning_rate": 2.2763014079288627e-05, "loss": 2.0036, "step": 18826000 }, { "epoch": 54.5, "learning_rate": 2.276229043164135e-05, "loss": 1.9733, "step": 18826500 }, { "epoch": 54.5, "learning_rate": 2.2761566783994075e-05, "loss": 1.9659, "step": 18827000 }, { "epoch": 54.5, "learning_rate": 2.2760843136346797e-05, "loss": 1.9832, "step": 18827500 }, { "epoch": 54.5, "learning_rate": 2.2760120935994812e-05, "loss": 1.9753, "step": 18828000 }, { "epoch": 54.5, "learning_rate": 2.2759397288347538e-05, "loss": 1.9723, "step": 18828500 }, { "epoch": 54.5, "learning_rate": 2.275867364070026e-05, "loss": 1.9541, "step": 18829000 }, { "epoch": 54.5, "learning_rate": 2.2757949993052986e-05, "loss": 1.9681, "step": 18829500 }, { "epoch": 54.51, "learning_rate": 2.2757226345405708e-05, "loss": 1.9696, "step": 18830000 }, { "epoch": 54.51, "learning_rate": 2.275650269775843e-05, "loss": 1.9721, "step": 18830500 }, { "epoch": 54.51, "learning_rate": 2.2755779050111153e-05, "loss": 1.9879, "step": 18831000 }, { "epoch": 54.51, "learning_rate": 2.2755055402463875e-05, "loss": 1.9813, "step": 18831500 }, { "epoch": 54.51, "learning_rate": 2.27543317548166e-05, "loss": 1.9786, "step": 18832000 }, { "epoch": 54.51, "learning_rate": 2.2753609554464616e-05, "loss": 1.9695, "step": 18832500 }, { "epoch": 54.51, "learning_rate": 2.275288590681734e-05, "loss": 1.9771, "step": 18833000 }, { "epoch": 54.52, "learning_rate": 2.2752162259170064e-05, "loss": 1.9779, "step": 18833500 }, { "epoch": 54.52, "learning_rate": 2.275143861152279e-05, "loss": 1.9705, "step": 18834000 }, { "epoch": 54.52, "learning_rate": 2.2750716411170805e-05, "loss": 1.9772, "step": 18834500 }, { "epoch": 54.52, "learning_rate": 2.2749992763523527e-05, "loss": 2.0091, "step": 18835000 }, { "epoch": 54.52, "learning_rate": 2.274926911587625e-05, "loss": 1.9673, "step": 18835500 }, { "epoch": 54.52, "learning_rate": 2.2748545468228975e-05, "loss": 1.977, "step": 18836000 }, { "epoch": 54.52, "learning_rate": 2.2747821820581697e-05, "loss": 1.9754, "step": 18836500 }, { "epoch": 54.53, "learning_rate": 2.2747098172934423e-05, "loss": 1.9777, "step": 18837000 }, { "epoch": 54.53, "learning_rate": 2.2746374525287145e-05, "loss": 1.9761, "step": 18837500 }, { "epoch": 54.53, "learning_rate": 2.2745650877639868e-05, "loss": 1.9524, "step": 18838000 }, { "epoch": 54.53, "learning_rate": 2.274492722999259e-05, "loss": 1.9863, "step": 18838500 }, { "epoch": 54.53, "learning_rate": 2.2744203582345315e-05, "loss": 1.9753, "step": 18839000 }, { "epoch": 54.53, "learning_rate": 2.274348138199333e-05, "loss": 1.9948, "step": 18839500 }, { "epoch": 54.53, "learning_rate": 2.2742757734346053e-05, "loss": 1.9794, "step": 18840000 }, { "epoch": 54.54, "learning_rate": 2.274203408669878e-05, "loss": 1.9957, "step": 18840500 }, { "epoch": 54.54, "learning_rate": 2.27413104390515e-05, "loss": 1.9792, "step": 18841000 }, { "epoch": 54.54, "learning_rate": 2.274058823869952e-05, "loss": 1.9606, "step": 18841500 }, { "epoch": 54.54, "learning_rate": 2.2739864591052242e-05, "loss": 1.9841, "step": 18842000 }, { "epoch": 54.54, "learning_rate": 2.2739140943404964e-05, "loss": 1.982, "step": 18842500 }, { "epoch": 54.54, "learning_rate": 2.273841729575769e-05, "loss": 1.9773, "step": 18843000 }, { "epoch": 54.54, "learning_rate": 2.2737693648110412e-05, "loss": 1.9969, "step": 18843500 }, { "epoch": 54.55, "learning_rate": 2.2736970000463138e-05, "loss": 1.9938, "step": 18844000 }, { "epoch": 54.55, "learning_rate": 2.273624635281586e-05, "loss": 1.9569, "step": 18844500 }, { "epoch": 54.55, "learning_rate": 2.2735522705168582e-05, "loss": 1.9808, "step": 18845000 }, { "epoch": 54.55, "learning_rate": 2.2734799057521305e-05, "loss": 1.9926, "step": 18845500 }, { "epoch": 54.55, "learning_rate": 2.2734075409874027e-05, "loss": 1.9801, "step": 18846000 }, { "epoch": 54.55, "learning_rate": 2.2733351762226752e-05, "loss": 1.9816, "step": 18846500 }, { "epoch": 54.55, "learning_rate": 2.2732628114579475e-05, "loss": 1.9655, "step": 18847000 }, { "epoch": 54.56, "learning_rate": 2.27319044669322e-05, "loss": 1.9599, "step": 18847500 }, { "epoch": 54.56, "learning_rate": 2.2731182266580216e-05, "loss": 2.0045, "step": 18848000 }, { "epoch": 54.56, "learning_rate": 2.273045861893294e-05, "loss": 1.9767, "step": 18848500 }, { "epoch": 54.56, "learning_rate": 2.2729734971285664e-05, "loss": 1.9851, "step": 18849000 }, { "epoch": 54.56, "learning_rate": 2.272901277093368e-05, "loss": 1.9903, "step": 18849500 }, { "epoch": 54.56, "learning_rate": 2.27282891232864e-05, "loss": 1.9729, "step": 18850000 }, { "epoch": 54.56, "learning_rate": 2.2727565475639127e-05, "loss": 1.9847, "step": 18850500 }, { "epoch": 54.57, "learning_rate": 2.272684182799185e-05, "loss": 1.9739, "step": 18851000 }, { "epoch": 54.57, "learning_rate": 2.2726118180344575e-05, "loss": 1.9667, "step": 18851500 }, { "epoch": 54.57, "learning_rate": 2.2725394532697297e-05, "loss": 1.9966, "step": 18852000 }, { "epoch": 54.57, "learning_rate": 2.272467088505002e-05, "loss": 1.9951, "step": 18852500 }, { "epoch": 54.57, "learning_rate": 2.272394723740274e-05, "loss": 1.9841, "step": 18853000 }, { "epoch": 54.57, "learning_rate": 2.2723223589755467e-05, "loss": 1.9635, "step": 18853500 }, { "epoch": 54.57, "learning_rate": 2.272249994210819e-05, "loss": 1.979, "step": 18854000 }, { "epoch": 54.58, "learning_rate": 2.2721776294460912e-05, "loss": 1.9848, "step": 18854500 }, { "epoch": 54.58, "learning_rate": 2.2721054094108927e-05, "loss": 1.9787, "step": 18855000 }, { "epoch": 54.58, "learning_rate": 2.2720330446461653e-05, "loss": 1.9809, "step": 18855500 }, { "epoch": 54.58, "learning_rate": 2.2719608246109672e-05, "loss": 1.9729, "step": 18856000 }, { "epoch": 54.58, "learning_rate": 2.271888604575769e-05, "loss": 1.9706, "step": 18856500 }, { "epoch": 54.58, "learning_rate": 2.2718162398110413e-05, "loss": 1.9862, "step": 18857000 }, { "epoch": 54.58, "learning_rate": 2.2717438750463135e-05, "loss": 1.9911, "step": 18857500 }, { "epoch": 54.59, "learning_rate": 2.2716715102815858e-05, "loss": 1.9775, "step": 18858000 }, { "epoch": 54.59, "learning_rate": 2.271599145516858e-05, "loss": 1.9816, "step": 18858500 }, { "epoch": 54.59, "learning_rate": 2.2715267807521305e-05, "loss": 1.9758, "step": 18859000 }, { "epoch": 54.59, "learning_rate": 2.271454415987403e-05, "loss": 1.9844, "step": 18859500 }, { "epoch": 54.59, "learning_rate": 2.2713820512226753e-05, "loss": 1.9767, "step": 18860000 }, { "epoch": 54.59, "learning_rate": 2.271309831187477e-05, "loss": 1.9708, "step": 18860500 }, { "epoch": 54.59, "learning_rate": 2.271237466422749e-05, "loss": 1.9753, "step": 18861000 }, { "epoch": 54.6, "learning_rate": 2.2711651016580217e-05, "loss": 1.9807, "step": 18861500 }, { "epoch": 54.6, "learning_rate": 2.271092736893294e-05, "loss": 1.9655, "step": 18862000 }, { "epoch": 54.6, "learning_rate": 2.271020372128566e-05, "loss": 1.968, "step": 18862500 }, { "epoch": 54.6, "learning_rate": 2.2709480073638387e-05, "loss": 2.0054, "step": 18863000 }, { "epoch": 54.6, "learning_rate": 2.270875642599111e-05, "loss": 1.977, "step": 18863500 }, { "epoch": 54.6, "learning_rate": 2.270803277834383e-05, "loss": 1.9667, "step": 18864000 }, { "epoch": 54.61, "learning_rate": 2.2707309130696557e-05, "loss": 1.9847, "step": 18864500 }, { "epoch": 54.61, "learning_rate": 2.270658548304928e-05, "loss": 1.9844, "step": 18865000 }, { "epoch": 54.61, "learning_rate": 2.2705861835402e-05, "loss": 1.9684, "step": 18865500 }, { "epoch": 54.61, "learning_rate": 2.2705138187754724e-05, "loss": 1.9905, "step": 18866000 }, { "epoch": 54.61, "learning_rate": 2.270441454010745e-05, "loss": 2.0043, "step": 18866500 }, { "epoch": 54.61, "learning_rate": 2.2703692339755468e-05, "loss": 1.9819, "step": 18867000 }, { "epoch": 54.61, "learning_rate": 2.270296869210819e-05, "loss": 1.9978, "step": 18867500 }, { "epoch": 54.62, "learning_rate": 2.2702245044460913e-05, "loss": 1.9842, "step": 18868000 }, { "epoch": 54.62, "learning_rate": 2.2701521396813635e-05, "loss": 1.9674, "step": 18868500 }, { "epoch": 54.62, "learning_rate": 2.2700797749166357e-05, "loss": 1.9545, "step": 18869000 }, { "epoch": 54.62, "learning_rate": 2.270007699610967e-05, "loss": 1.9814, "step": 18869500 }, { "epoch": 54.62, "learning_rate": 2.2699353348462395e-05, "loss": 1.9788, "step": 18870000 }, { "epoch": 54.62, "learning_rate": 2.269862970081512e-05, "loss": 1.9726, "step": 18870500 }, { "epoch": 54.62, "learning_rate": 2.2697906053167843e-05, "loss": 1.9757, "step": 18871000 }, { "epoch": 54.63, "learning_rate": 2.2697182405520565e-05, "loss": 1.9896, "step": 18871500 }, { "epoch": 54.63, "learning_rate": 2.2696458757873287e-05, "loss": 1.9852, "step": 18872000 }, { "epoch": 54.63, "learning_rate": 2.269573511022601e-05, "loss": 1.9503, "step": 18872500 }, { "epoch": 54.63, "learning_rate": 2.2695011462578732e-05, "loss": 1.9563, "step": 18873000 }, { "epoch": 54.63, "learning_rate": 2.2694287814931457e-05, "loss": 1.9793, "step": 18873500 }, { "epoch": 54.63, "learning_rate": 2.269356706187477e-05, "loss": 1.983, "step": 18874000 }, { "epoch": 54.63, "learning_rate": 2.2692843414227495e-05, "loss": 1.986, "step": 18874500 }, { "epoch": 54.64, "learning_rate": 2.2692119766580217e-05, "loss": 1.9546, "step": 18875000 }, { "epoch": 54.64, "learning_rate": 2.269139611893294e-05, "loss": 1.9928, "step": 18875500 }, { "epoch": 54.64, "learning_rate": 2.2690673918580955e-05, "loss": 2.0007, "step": 18876000 }, { "epoch": 54.64, "learning_rate": 2.268995027093368e-05, "loss": 1.9879, "step": 18876500 }, { "epoch": 54.64, "learning_rate": 2.2689226623286403e-05, "loss": 2.0036, "step": 18877000 }, { "epoch": 54.64, "learning_rate": 2.2688502975639125e-05, "loss": 1.9953, "step": 18877500 }, { "epoch": 54.64, "learning_rate": 2.268777932799185e-05, "loss": 1.952, "step": 18878000 }, { "epoch": 54.65, "learning_rate": 2.2687055680344573e-05, "loss": 1.9839, "step": 18878500 }, { "epoch": 54.65, "learning_rate": 2.2686333479992592e-05, "loss": 1.9732, "step": 18879000 }, { "epoch": 54.65, "learning_rate": 2.2685609832345314e-05, "loss": 1.9825, "step": 18879500 }, { "epoch": 54.65, "learning_rate": 2.2684886184698037e-05, "loss": 1.9801, "step": 18880000 }, { "epoch": 54.65, "learning_rate": 2.268416253705076e-05, "loss": 1.9784, "step": 18880500 }, { "epoch": 54.65, "learning_rate": 2.268343888940348e-05, "loss": 1.9803, "step": 18881000 }, { "epoch": 54.65, "learning_rate": 2.2682715241756207e-05, "loss": 1.9585, "step": 18881500 }, { "epoch": 54.66, "learning_rate": 2.2681993041404226e-05, "loss": 1.9704, "step": 18882000 }, { "epoch": 54.66, "learning_rate": 2.2681269393756948e-05, "loss": 1.9587, "step": 18882500 }, { "epoch": 54.66, "learning_rate": 2.268054574610967e-05, "loss": 1.9841, "step": 18883000 }, { "epoch": 54.66, "learning_rate": 2.2679822098462396e-05, "loss": 1.9699, "step": 18883500 }, { "epoch": 54.66, "learning_rate": 2.267909989811041e-05, "loss": 1.9629, "step": 18884000 }, { "epoch": 54.66, "learning_rate": 2.2678376250463134e-05, "loss": 2.0018, "step": 18884500 }, { "epoch": 54.66, "learning_rate": 2.267765260281586e-05, "loss": 1.9661, "step": 18885000 }, { "epoch": 54.67, "learning_rate": 2.2676930402463878e-05, "loss": 1.9577, "step": 18885500 }, { "epoch": 54.67, "learning_rate": 2.26762067548166e-05, "loss": 1.9687, "step": 18886000 }, { "epoch": 54.67, "learning_rate": 2.2675483107169323e-05, "loss": 1.9835, "step": 18886500 }, { "epoch": 54.67, "learning_rate": 2.2674759459522045e-05, "loss": 1.9978, "step": 18887000 }, { "epoch": 54.67, "learning_rate": 2.267403581187477e-05, "loss": 1.9689, "step": 18887500 }, { "epoch": 54.67, "learning_rate": 2.2673312164227493e-05, "loss": 1.9651, "step": 18888000 }, { "epoch": 54.67, "learning_rate": 2.2672588516580215e-05, "loss": 1.9781, "step": 18888500 }, { "epoch": 54.68, "learning_rate": 2.267186486893294e-05, "loss": 1.993, "step": 18889000 }, { "epoch": 54.68, "learning_rate": 2.2671141221285663e-05, "loss": 1.9713, "step": 18889500 }, { "epoch": 54.68, "learning_rate": 2.267041902093368e-05, "loss": 1.9956, "step": 18890000 }, { "epoch": 54.68, "learning_rate": 2.2669695373286404e-05, "loss": 1.9646, "step": 18890500 }, { "epoch": 54.68, "learning_rate": 2.2668971725639126e-05, "loss": 1.9546, "step": 18891000 }, { "epoch": 54.68, "learning_rate": 2.266824807799185e-05, "loss": 1.9825, "step": 18891500 }, { "epoch": 54.68, "learning_rate": 2.266752443034457e-05, "loss": 1.9745, "step": 18892000 }, { "epoch": 54.69, "learning_rate": 2.2666800782697296e-05, "loss": 1.9907, "step": 18892500 }, { "epoch": 54.69, "learning_rate": 2.2666077135050022e-05, "loss": 2.0153, "step": 18893000 }, { "epoch": 54.69, "learning_rate": 2.2665353487402744e-05, "loss": 1.9599, "step": 18893500 }, { "epoch": 54.69, "learning_rate": 2.2664629839755466e-05, "loss": 1.9572, "step": 18894000 }, { "epoch": 54.69, "learning_rate": 2.266390619210819e-05, "loss": 1.9818, "step": 18894500 }, { "epoch": 54.69, "learning_rate": 2.266318254446091e-05, "loss": 1.9523, "step": 18895000 }, { "epoch": 54.69, "learning_rate": 2.2662458896813636e-05, "loss": 1.9822, "step": 18895500 }, { "epoch": 54.7, "learning_rate": 2.2661736696461652e-05, "loss": 1.9755, "step": 18896000 }, { "epoch": 54.7, "learning_rate": 2.2661013048814378e-05, "loss": 1.9655, "step": 18896500 }, { "epoch": 54.7, "learning_rate": 2.26602894011671e-05, "loss": 1.9839, "step": 18897000 }, { "epoch": 54.7, "learning_rate": 2.2659565753519822e-05, "loss": 2.0034, "step": 18897500 }, { "epoch": 54.7, "learning_rate": 2.2658842105872548e-05, "loss": 1.9785, "step": 18898000 }, { "epoch": 54.7, "learning_rate": 2.265811845822527e-05, "loss": 1.9865, "step": 18898500 }, { "epoch": 54.7, "learning_rate": 2.2657394810577992e-05, "loss": 1.9756, "step": 18899000 }, { "epoch": 54.71, "learning_rate": 2.2656671162930714e-05, "loss": 1.9504, "step": 18899500 }, { "epoch": 54.71, "learning_rate": 2.265594751528344e-05, "loss": 1.9772, "step": 18900000 }, { "epoch": 54.71, "learning_rate": 2.265522531493146e-05, "loss": 1.9625, "step": 18900500 }, { "epoch": 54.71, "learning_rate": 2.2654503114579474e-05, "loss": 1.9815, "step": 18901000 }, { "epoch": 54.71, "learning_rate": 2.26537794669322e-05, "loss": 1.9765, "step": 18901500 }, { "epoch": 54.71, "learning_rate": 2.2653057266580216e-05, "loss": 1.9778, "step": 18902000 }, { "epoch": 54.71, "learning_rate": 2.2652333618932938e-05, "loss": 1.9899, "step": 18902500 }, { "epoch": 54.72, "learning_rate": 2.265160997128566e-05, "loss": 1.9784, "step": 18903000 }, { "epoch": 54.72, "learning_rate": 2.2650886323638386e-05, "loss": 1.9802, "step": 18903500 }, { "epoch": 54.72, "learning_rate": 2.265016267599111e-05, "loss": 1.9956, "step": 18904000 }, { "epoch": 54.72, "learning_rate": 2.2649439028343834e-05, "loss": 1.9596, "step": 18904500 }, { "epoch": 54.72, "learning_rate": 2.2648715380696556e-05, "loss": 1.9515, "step": 18905000 }, { "epoch": 54.72, "learning_rate": 2.2647991733049278e-05, "loss": 1.9447, "step": 18905500 }, { "epoch": 54.73, "learning_rate": 2.2647269532697297e-05, "loss": 1.9735, "step": 18906000 }, { "epoch": 54.73, "learning_rate": 2.264654588505002e-05, "loss": 1.9792, "step": 18906500 }, { "epoch": 54.73, "learning_rate": 2.264582223740274e-05, "loss": 1.9763, "step": 18907000 }, { "epoch": 54.73, "learning_rate": 2.2645098589755467e-05, "loss": 1.9556, "step": 18907500 }, { "epoch": 54.73, "learning_rate": 2.264437494210819e-05, "loss": 1.9985, "step": 18908000 }, { "epoch": 54.73, "learning_rate": 2.264365129446091e-05, "loss": 1.9762, "step": 18908500 }, { "epoch": 54.73, "learning_rate": 2.2642927646813637e-05, "loss": 1.9732, "step": 18909000 }, { "epoch": 54.74, "learning_rate": 2.264220399916636e-05, "loss": 1.9867, "step": 18909500 }, { "epoch": 54.74, "learning_rate": 2.264148035151908e-05, "loss": 1.9887, "step": 18910000 }, { "epoch": 54.74, "learning_rate": 2.2640756703871804e-05, "loss": 1.9516, "step": 18910500 }, { "epoch": 54.74, "learning_rate": 2.2640034503519823e-05, "loss": 1.9892, "step": 18911000 }, { "epoch": 54.74, "learning_rate": 2.263931085587255e-05, "loss": 1.9857, "step": 18911500 }, { "epoch": 54.74, "learning_rate": 2.263858720822527e-05, "loss": 1.969, "step": 18912000 }, { "epoch": 54.74, "learning_rate": 2.2637863560577993e-05, "loss": 1.9554, "step": 18912500 }, { "epoch": 54.75, "learning_rate": 2.2637139912930715e-05, "loss": 1.9735, "step": 18913000 }, { "epoch": 54.75, "learning_rate": 2.2636417712578734e-05, "loss": 1.9639, "step": 18913500 }, { "epoch": 54.75, "learning_rate": 2.263569551222675e-05, "loss": 1.9598, "step": 18914000 }, { "epoch": 54.75, "learning_rate": 2.2634971864579475e-05, "loss": 1.9813, "step": 18914500 }, { "epoch": 54.75, "learning_rate": 2.26342482169322e-05, "loss": 1.9873, "step": 18915000 }, { "epoch": 54.75, "learning_rate": 2.2633524569284923e-05, "loss": 1.9858, "step": 18915500 }, { "epoch": 54.75, "learning_rate": 2.2632800921637645e-05, "loss": 1.9723, "step": 18916000 }, { "epoch": 54.76, "learning_rate": 2.2632078721285664e-05, "loss": 1.9665, "step": 18916500 }, { "epoch": 54.76, "learning_rate": 2.2631355073638387e-05, "loss": 1.9783, "step": 18917000 }, { "epoch": 54.76, "learning_rate": 2.263063142599111e-05, "loss": 1.9871, "step": 18917500 }, { "epoch": 54.76, "learning_rate": 2.262990777834383e-05, "loss": 1.9891, "step": 18918000 }, { "epoch": 54.76, "learning_rate": 2.2629184130696553e-05, "loss": 1.9803, "step": 18918500 }, { "epoch": 54.76, "learning_rate": 2.262846048304928e-05, "loss": 2.0, "step": 18919000 }, { "epoch": 54.76, "learning_rate": 2.2627736835402e-05, "loss": 1.979, "step": 18919500 }, { "epoch": 54.77, "learning_rate": 2.2627013187754727e-05, "loss": 1.9829, "step": 18920000 }, { "epoch": 54.77, "learning_rate": 2.2626290987402742e-05, "loss": 2.0102, "step": 18920500 }, { "epoch": 54.77, "learning_rate": 2.2625567339755465e-05, "loss": 1.9744, "step": 18921000 }, { "epoch": 54.77, "learning_rate": 2.262484369210819e-05, "loss": 1.9715, "step": 18921500 }, { "epoch": 54.77, "learning_rate": 2.2624120044460912e-05, "loss": 1.9876, "step": 18922000 }, { "epoch": 54.77, "learning_rate": 2.2623396396813638e-05, "loss": 1.9793, "step": 18922500 }, { "epoch": 54.77, "learning_rate": 2.2622674196461654e-05, "loss": 1.9734, "step": 18923000 }, { "epoch": 54.78, "learning_rate": 2.2621951996109672e-05, "loss": 1.9987, "step": 18923500 }, { "epoch": 54.78, "learning_rate": 2.2621228348462395e-05, "loss": 1.9641, "step": 18924000 }, { "epoch": 54.78, "learning_rate": 2.2620504700815117e-05, "loss": 1.9924, "step": 18924500 }, { "epoch": 54.78, "learning_rate": 2.261978105316784e-05, "loss": 1.9418, "step": 18925000 }, { "epoch": 54.78, "learning_rate": 2.2619057405520565e-05, "loss": 1.9773, "step": 18925500 }, { "epoch": 54.78, "learning_rate": 2.2618333757873287e-05, "loss": 1.9881, "step": 18926000 }, { "epoch": 54.78, "learning_rate": 2.2617610110226013e-05, "loss": 1.9953, "step": 18926500 }, { "epoch": 54.79, "learning_rate": 2.2616887909874028e-05, "loss": 1.9799, "step": 18927000 }, { "epoch": 54.79, "learning_rate": 2.2616164262226754e-05, "loss": 1.9595, "step": 18927500 }, { "epoch": 54.79, "learning_rate": 2.2615440614579476e-05, "loss": 1.9764, "step": 18928000 }, { "epoch": 54.79, "learning_rate": 2.2614716966932198e-05, "loss": 1.9922, "step": 18928500 }, { "epoch": 54.79, "learning_rate": 2.261399331928492e-05, "loss": 2.0055, "step": 18929000 }, { "epoch": 54.79, "learning_rate": 2.2613269671637643e-05, "loss": 1.961, "step": 18929500 }, { "epoch": 54.79, "learning_rate": 2.261254602399037e-05, "loss": 1.9854, "step": 18930000 }, { "epoch": 54.8, "learning_rate": 2.2611823823638387e-05, "loss": 1.9589, "step": 18930500 }, { "epoch": 54.8, "learning_rate": 2.261110017599111e-05, "loss": 1.9541, "step": 18931000 }, { "epoch": 54.8, "learning_rate": 2.2610376528343832e-05, "loss": 1.9726, "step": 18931500 }, { "epoch": 54.8, "learning_rate": 2.260965432799185e-05, "loss": 1.985, "step": 18932000 }, { "epoch": 54.8, "learning_rate": 2.2608930680344573e-05, "loss": 1.9919, "step": 18932500 }, { "epoch": 54.8, "learning_rate": 2.2608207032697295e-05, "loss": 1.9767, "step": 18933000 }, { "epoch": 54.8, "learning_rate": 2.2607483385050017e-05, "loss": 1.987, "step": 18933500 }, { "epoch": 54.81, "learning_rate": 2.2606759737402743e-05, "loss": 1.9802, "step": 18934000 }, { "epoch": 54.81, "learning_rate": 2.2606036089755465e-05, "loss": 1.988, "step": 18934500 }, { "epoch": 54.81, "learning_rate": 2.260531244210819e-05, "loss": 1.9636, "step": 18935000 }, { "epoch": 54.81, "learning_rate": 2.2604588794460913e-05, "loss": 1.9779, "step": 18935500 }, { "epoch": 54.81, "learning_rate": 2.2603865146813635e-05, "loss": 1.9671, "step": 18936000 }, { "epoch": 54.81, "learning_rate": 2.2603142946461654e-05, "loss": 1.9855, "step": 18936500 }, { "epoch": 54.81, "learning_rate": 2.2602419298814377e-05, "loss": 1.9661, "step": 18937000 }, { "epoch": 54.82, "learning_rate": 2.2601695651167102e-05, "loss": 1.9661, "step": 18937500 }, { "epoch": 54.82, "learning_rate": 2.2600972003519824e-05, "loss": 1.9707, "step": 18938000 }, { "epoch": 54.82, "learning_rate": 2.2600248355872547e-05, "loss": 1.9668, "step": 18938500 }, { "epoch": 54.82, "learning_rate": 2.259952470822527e-05, "loss": 1.9632, "step": 18939000 }, { "epoch": 54.82, "learning_rate": 2.259880106057799e-05, "loss": 1.975, "step": 18939500 }, { "epoch": 54.82, "learning_rate": 2.2598077412930717e-05, "loss": 1.9678, "step": 18940000 }, { "epoch": 54.82, "learning_rate": 2.259735376528344e-05, "loss": 1.9831, "step": 18940500 }, { "epoch": 54.83, "learning_rate": 2.2596631564931455e-05, "loss": 1.981, "step": 18941000 }, { "epoch": 54.83, "learning_rate": 2.259590791728418e-05, "loss": 1.9578, "step": 18941500 }, { "epoch": 54.83, "learning_rate": 2.25951857169322e-05, "loss": 1.9823, "step": 18942000 }, { "epoch": 54.83, "learning_rate": 2.259446206928492e-05, "loss": 1.97, "step": 18942500 }, { "epoch": 54.83, "learning_rate": 2.2593738421637644e-05, "loss": 1.9995, "step": 18943000 }, { "epoch": 54.83, "learning_rate": 2.2593014773990366e-05, "loss": 1.9942, "step": 18943500 }, { "epoch": 54.84, "learning_rate": 2.259229112634309e-05, "loss": 1.9967, "step": 18944000 }, { "epoch": 54.84, "learning_rate": 2.2591567478695814e-05, "loss": 2.0123, "step": 18944500 }, { "epoch": 54.84, "learning_rate": 2.259084383104854e-05, "loss": 1.9889, "step": 18945000 }, { "epoch": 54.84, "learning_rate": 2.259012018340126e-05, "loss": 1.9731, "step": 18945500 }, { "epoch": 54.84, "learning_rate": 2.258939798304928e-05, "loss": 1.9694, "step": 18946000 }, { "epoch": 54.84, "learning_rate": 2.2588674335402003e-05, "loss": 1.9958, "step": 18946500 }, { "epoch": 54.84, "learning_rate": 2.2587950687754725e-05, "loss": 1.974, "step": 18947000 }, { "epoch": 54.85, "learning_rate": 2.2587227040107447e-05, "loss": 1.9844, "step": 18947500 }, { "epoch": 54.85, "learning_rate": 2.258650339246017e-05, "loss": 1.9764, "step": 18948000 }, { "epoch": 54.85, "learning_rate": 2.258578119210819e-05, "loss": 2.0131, "step": 18948500 }, { "epoch": 54.85, "learning_rate": 2.2585057544460914e-05, "loss": 2.0031, "step": 18949000 }, { "epoch": 54.85, "learning_rate": 2.258433534410893e-05, "loss": 1.9913, "step": 18949500 }, { "epoch": 54.85, "learning_rate": 2.2583611696461655e-05, "loss": 1.9595, "step": 18950000 }, { "epoch": 54.85, "learning_rate": 2.2582888048814377e-05, "loss": 1.9821, "step": 18950500 }, { "epoch": 54.86, "learning_rate": 2.25821644011671e-05, "loss": 1.992, "step": 18951000 }, { "epoch": 54.86, "learning_rate": 2.258144220081512e-05, "loss": 1.9925, "step": 18951500 }, { "epoch": 54.86, "learning_rate": 2.258071855316784e-05, "loss": 1.9709, "step": 18952000 }, { "epoch": 54.86, "learning_rate": 2.2579996352815856e-05, "loss": 1.9742, "step": 18952500 }, { "epoch": 54.86, "learning_rate": 2.2579272705168582e-05, "loss": 1.9736, "step": 18953000 }, { "epoch": 54.86, "learning_rate": 2.2578549057521308e-05, "loss": 1.9907, "step": 18953500 }, { "epoch": 54.86, "learning_rate": 2.257782540987403e-05, "loss": 1.9948, "step": 18954000 }, { "epoch": 54.87, "learning_rate": 2.2577101762226752e-05, "loss": 1.9753, "step": 18954500 }, { "epoch": 54.87, "learning_rate": 2.2576378114579474e-05, "loss": 1.9949, "step": 18955000 }, { "epoch": 54.87, "learning_rate": 2.2575654466932197e-05, "loss": 1.9862, "step": 18955500 }, { "epoch": 54.87, "learning_rate": 2.257493081928492e-05, "loss": 1.9844, "step": 18956000 }, { "epoch": 54.87, "learning_rate": 2.2574207171637644e-05, "loss": 1.9718, "step": 18956500 }, { "epoch": 54.87, "learning_rate": 2.257348352399037e-05, "loss": 1.9728, "step": 18957000 }, { "epoch": 54.87, "learning_rate": 2.2572759876343092e-05, "loss": 1.9761, "step": 18957500 }, { "epoch": 54.88, "learning_rate": 2.2572036228695814e-05, "loss": 1.9553, "step": 18958000 }, { "epoch": 54.88, "learning_rate": 2.2571312581048537e-05, "loss": 1.995, "step": 18958500 }, { "epoch": 54.88, "learning_rate": 2.257058893340126e-05, "loss": 1.9737, "step": 18959000 }, { "epoch": 54.88, "learning_rate": 2.256986528575398e-05, "loss": 1.9735, "step": 18959500 }, { "epoch": 54.88, "learning_rate": 2.2569141638106707e-05, "loss": 1.9794, "step": 18960000 }, { "epoch": 54.88, "learning_rate": 2.2568417990459432e-05, "loss": 1.995, "step": 18960500 }, { "epoch": 54.88, "learning_rate": 2.2567694342812155e-05, "loss": 1.9698, "step": 18961000 }, { "epoch": 54.89, "learning_rate": 2.2566970695164877e-05, "loss": 1.9667, "step": 18961500 }, { "epoch": 54.89, "learning_rate": 2.2566248494812896e-05, "loss": 1.9762, "step": 18962000 }, { "epoch": 54.89, "learning_rate": 2.2565524847165618e-05, "loss": 1.9478, "step": 18962500 }, { "epoch": 54.89, "learning_rate": 2.256480119951834e-05, "loss": 1.9827, "step": 18963000 }, { "epoch": 54.89, "learning_rate": 2.256407899916636e-05, "loss": 1.9781, "step": 18963500 }, { "epoch": 54.89, "learning_rate": 2.2563356798814378e-05, "loss": 1.9633, "step": 18964000 }, { "epoch": 54.89, "learning_rate": 2.25626331511671e-05, "loss": 1.9737, "step": 18964500 }, { "epoch": 54.9, "learning_rate": 2.2561909503519823e-05, "loss": 1.9487, "step": 18965000 }, { "epoch": 54.9, "learning_rate": 2.2561185855872545e-05, "loss": 1.9604, "step": 18965500 }, { "epoch": 54.9, "learning_rate": 2.256046220822527e-05, "loss": 1.9663, "step": 18966000 }, { "epoch": 54.9, "learning_rate": 2.2559738560577993e-05, "loss": 1.9778, "step": 18966500 }, { "epoch": 54.9, "learning_rate": 2.2559014912930715e-05, "loss": 2.0022, "step": 18967000 }, { "epoch": 54.9, "learning_rate": 2.255829126528344e-05, "loss": 1.9485, "step": 18967500 }, { "epoch": 54.9, "learning_rate": 2.2557567617636163e-05, "loss": 2.0108, "step": 18968000 }, { "epoch": 54.91, "learning_rate": 2.2556843969988885e-05, "loss": 1.9713, "step": 18968500 }, { "epoch": 54.91, "learning_rate": 2.255612032234161e-05, "loss": 1.9625, "step": 18969000 }, { "epoch": 54.91, "learning_rate": 2.2555396674694333e-05, "loss": 1.9692, "step": 18969500 }, { "epoch": 54.91, "learning_rate": 2.2554673027047055e-05, "loss": 1.9605, "step": 18970000 }, { "epoch": 54.91, "learning_rate": 2.2553949379399777e-05, "loss": 1.9754, "step": 18970500 }, { "epoch": 54.91, "learning_rate": 2.2553227179047796e-05, "loss": 1.9764, "step": 18971000 }, { "epoch": 54.91, "learning_rate": 2.2552503531400522e-05, "loss": 1.9546, "step": 18971500 }, { "epoch": 54.92, "learning_rate": 2.2551779883753244e-05, "loss": 1.9851, "step": 18972000 }, { "epoch": 54.92, "learning_rate": 2.2551056236105966e-05, "loss": 1.9793, "step": 18972500 }, { "epoch": 54.92, "learning_rate": 2.255033258845869e-05, "loss": 1.9667, "step": 18973000 }, { "epoch": 54.92, "learning_rate": 2.2549610388106708e-05, "loss": 1.9652, "step": 18973500 }, { "epoch": 54.92, "learning_rate": 2.2548888187754723e-05, "loss": 2.0086, "step": 18974000 }, { "epoch": 54.92, "learning_rate": 2.254816454010745e-05, "loss": 1.9601, "step": 18974500 }, { "epoch": 54.92, "learning_rate": 2.254744089246017e-05, "loss": 1.9926, "step": 18975000 }, { "epoch": 54.93, "learning_rate": 2.2546717244812897e-05, "loss": 1.9611, "step": 18975500 }, { "epoch": 54.93, "learning_rate": 2.254599359716562e-05, "loss": 2.0004, "step": 18976000 }, { "epoch": 54.93, "learning_rate": 2.2545271396813634e-05, "loss": 1.9718, "step": 18976500 }, { "epoch": 54.93, "learning_rate": 2.2544549196461653e-05, "loss": 1.9805, "step": 18977000 }, { "epoch": 54.93, "learning_rate": 2.2543825548814376e-05, "loss": 1.9932, "step": 18977500 }, { "epoch": 54.93, "learning_rate": 2.2543101901167098e-05, "loss": 1.9551, "step": 18978000 }, { "epoch": 54.93, "learning_rate": 2.2542378253519823e-05, "loss": 1.9901, "step": 18978500 }, { "epoch": 54.94, "learning_rate": 2.254165460587255e-05, "loss": 1.9553, "step": 18979000 }, { "epoch": 54.94, "learning_rate": 2.254093095822527e-05, "loss": 1.968, "step": 18979500 }, { "epoch": 54.94, "learning_rate": 2.2540207310577994e-05, "loss": 1.9908, "step": 18980000 }, { "epoch": 54.94, "learning_rate": 2.2539483662930716e-05, "loss": 1.9721, "step": 18980500 }, { "epoch": 54.94, "learning_rate": 2.2538760015283438e-05, "loss": 1.9765, "step": 18981000 }, { "epoch": 54.94, "learning_rate": 2.253803636763616e-05, "loss": 1.981, "step": 18981500 }, { "epoch": 54.95, "learning_rate": 2.2537315614579472e-05, "loss": 1.9878, "step": 18982000 }, { "epoch": 54.95, "learning_rate": 2.2536591966932198e-05, "loss": 1.9873, "step": 18982500 }, { "epoch": 54.95, "learning_rate": 2.2535868319284924e-05, "loss": 1.9721, "step": 18983000 }, { "epoch": 54.95, "learning_rate": 2.2535144671637646e-05, "loss": 1.9661, "step": 18983500 }, { "epoch": 54.95, "learning_rate": 2.2534421023990368e-05, "loss": 1.9908, "step": 18984000 }, { "epoch": 54.95, "learning_rate": 2.253369737634309e-05, "loss": 1.9468, "step": 18984500 }, { "epoch": 54.95, "learning_rate": 2.2532973728695813e-05, "loss": 1.9689, "step": 18985000 }, { "epoch": 54.96, "learning_rate": 2.2532250081048535e-05, "loss": 1.9997, "step": 18985500 }, { "epoch": 54.96, "learning_rate": 2.253152643340126e-05, "loss": 1.9755, "step": 18986000 }, { "epoch": 54.96, "learning_rate": 2.2530802785753986e-05, "loss": 1.9962, "step": 18986500 }, { "epoch": 54.96, "learning_rate": 2.253007913810671e-05, "loss": 1.9723, "step": 18987000 }, { "epoch": 54.96, "learning_rate": 2.252935549045943e-05, "loss": 1.9976, "step": 18987500 }, { "epoch": 54.96, "learning_rate": 2.2528631842812153e-05, "loss": 1.9666, "step": 18988000 }, { "epoch": 54.96, "learning_rate": 2.2527908195164875e-05, "loss": 1.9699, "step": 18988500 }, { "epoch": 54.97, "learning_rate": 2.25271845475176e-05, "loss": 1.958, "step": 18989000 }, { "epoch": 54.97, "learning_rate": 2.2526460899870326e-05, "loss": 1.9638, "step": 18989500 }, { "epoch": 54.97, "learning_rate": 2.252573725222305e-05, "loss": 1.9873, "step": 18990000 }, { "epoch": 54.97, "learning_rate": 2.2525015051871064e-05, "loss": 1.9781, "step": 18990500 }, { "epoch": 54.97, "learning_rate": 2.2524291404223786e-05, "loss": 1.9915, "step": 18991000 }, { "epoch": 54.97, "learning_rate": 2.2523567756576512e-05, "loss": 1.9867, "step": 18991500 }, { "epoch": 54.97, "learning_rate": 2.2522844108929234e-05, "loss": 1.9549, "step": 18992000 }, { "epoch": 54.98, "learning_rate": 2.2522120461281956e-05, "loss": 1.9818, "step": 18992500 }, { "epoch": 54.98, "learning_rate": 2.2521398260929975e-05, "loss": 1.9874, "step": 18993000 }, { "epoch": 54.98, "learning_rate": 2.25206746132827e-05, "loss": 1.9825, "step": 18993500 }, { "epoch": 54.98, "learning_rate": 2.2519950965635423e-05, "loss": 1.9738, "step": 18994000 }, { "epoch": 54.98, "learning_rate": 2.2519227317988145e-05, "loss": 1.996, "step": 18994500 }, { "epoch": 54.98, "learning_rate": 2.2518503670340868e-05, "loss": 1.9433, "step": 18995000 }, { "epoch": 54.98, "learning_rate": 2.251778002269359e-05, "loss": 1.9928, "step": 18995500 }, { "epoch": 54.99, "learning_rate": 2.251705782234161e-05, "loss": 1.9601, "step": 18996000 }, { "epoch": 54.99, "learning_rate": 2.251633417469433e-05, "loss": 1.9539, "step": 18996500 }, { "epoch": 54.99, "learning_rate": 2.2515610527047057e-05, "loss": 2.0001, "step": 18997000 }, { "epoch": 54.99, "learning_rate": 2.2514888326695076e-05, "loss": 2.0158, "step": 18997500 }, { "epoch": 54.99, "learning_rate": 2.2514164679047798e-05, "loss": 1.9984, "step": 18998000 }, { "epoch": 54.99, "learning_rate": 2.251344103140052e-05, "loss": 1.9686, "step": 18998500 }, { "epoch": 54.99, "learning_rate": 2.2512717383753242e-05, "loss": 1.9777, "step": 18999000 }, { "epoch": 55.0, "learning_rate": 2.2511993736105965e-05, "loss": 1.973, "step": 18999500 }, { "epoch": 55.0, "learning_rate": 2.251127008845869e-05, "loss": 1.9756, "step": 19000000 }, { "epoch": 55.0, "learning_rate": 2.2510546440811412e-05, "loss": 1.9994, "step": 19000500 }, { "epoch": 55.0, "eval_accuracy": 0.6754768735668595, "eval_accuracy_mlm": 0.6420146190389152, "eval_accuracy_nsp": 0.8550320129426701, "eval_loss": 2.161105155944824, "eval_runtime": 331.5839, "eval_samples_per_second": 1316.065, "eval_steps_per_second": 54.837, "step": 19000960 }, { "epoch": 55.0, "learning_rate": 2.250982424045943e-05, "loss": 1.9806, "step": 19001000 }, { "epoch": 55.0, "learning_rate": 2.250910204010745e-05, "loss": 1.9872, "step": 19001500 }, { "epoch": 55.0, "learning_rate": 2.2508378392460173e-05, "loss": 1.9389, "step": 19002000 }, { "epoch": 55.0, "learning_rate": 2.2507654744812895e-05, "loss": 1.967, "step": 19002500 }, { "epoch": 55.01, "learning_rate": 2.2506931097165617e-05, "loss": 1.9646, "step": 19003000 }, { "epoch": 55.01, "learning_rate": 2.250620744951834e-05, "loss": 1.9836, "step": 19003500 }, { "epoch": 55.01, "learning_rate": 2.2505483801871065e-05, "loss": 1.9732, "step": 19004000 }, { "epoch": 55.01, "learning_rate": 2.250476015422379e-05, "loss": 1.9739, "step": 19004500 }, { "epoch": 55.01, "learning_rate": 2.2504036506576513e-05, "loss": 1.9462, "step": 19005000 }, { "epoch": 55.01, "learning_rate": 2.2503312858929235e-05, "loss": 1.9426, "step": 19005500 }, { "epoch": 55.01, "learning_rate": 2.2502590658577254e-05, "loss": 1.96, "step": 19006000 }, { "epoch": 55.02, "learning_rate": 2.2501867010929976e-05, "loss": 1.9779, "step": 19006500 }, { "epoch": 55.02, "learning_rate": 2.25011433632827e-05, "loss": 1.9652, "step": 19007000 }, { "epoch": 55.02, "learning_rate": 2.250041971563542e-05, "loss": 1.969, "step": 19007500 }, { "epoch": 55.02, "learning_rate": 2.2499696067988143e-05, "loss": 1.9475, "step": 19008000 }, { "epoch": 55.02, "learning_rate": 2.249897242034087e-05, "loss": 1.9695, "step": 19008500 }, { "epoch": 55.02, "learning_rate": 2.249824877269359e-05, "loss": 1.9568, "step": 19009000 }, { "epoch": 55.02, "learning_rate": 2.2497525125046316e-05, "loss": 1.9672, "step": 19009500 }, { "epoch": 55.03, "learning_rate": 2.249680147739904e-05, "loss": 1.9606, "step": 19010000 }, { "epoch": 55.03, "learning_rate": 2.249607782975176e-05, "loss": 1.9504, "step": 19010500 }, { "epoch": 55.03, "learning_rate": 2.2495355629399776e-05, "loss": 1.9571, "step": 19011000 }, { "epoch": 55.03, "learning_rate": 2.2494631981752502e-05, "loss": 1.9563, "step": 19011500 }, { "epoch": 55.03, "learning_rate": 2.2493909781400518e-05, "loss": 1.9638, "step": 19012000 }, { "epoch": 55.03, "learning_rate": 2.249318758104854e-05, "loss": 1.9832, "step": 19012500 }, { "epoch": 55.03, "learning_rate": 2.2492463933401262e-05, "loss": 1.9491, "step": 19013000 }, { "epoch": 55.04, "learning_rate": 2.2491740285753984e-05, "loss": 1.9554, "step": 19013500 }, { "epoch": 55.04, "learning_rate": 2.2491016638106707e-05, "loss": 1.9477, "step": 19014000 }, { "epoch": 55.04, "learning_rate": 2.2490294437754726e-05, "loss": 1.9484, "step": 19014500 }, { "epoch": 55.04, "learning_rate": 2.2489570790107448e-05, "loss": 1.9825, "step": 19015000 }, { "epoch": 55.04, "learning_rate": 2.248884714246017e-05, "loss": 1.9757, "step": 19015500 }, { "epoch": 55.04, "learning_rate": 2.2488123494812896e-05, "loss": 1.9589, "step": 19016000 }, { "epoch": 55.04, "learning_rate": 2.2487399847165618e-05, "loss": 1.9615, "step": 19016500 }, { "epoch": 55.05, "learning_rate": 2.248667619951834e-05, "loss": 1.9585, "step": 19017000 }, { "epoch": 55.05, "learning_rate": 2.248595399916636e-05, "loss": 1.9467, "step": 19017500 }, { "epoch": 55.05, "learning_rate": 2.2485231798814378e-05, "loss": 1.9542, "step": 19018000 }, { "epoch": 55.05, "learning_rate": 2.24845081511671e-05, "loss": 1.9399, "step": 19018500 }, { "epoch": 55.05, "learning_rate": 2.2483784503519822e-05, "loss": 1.9588, "step": 19019000 }, { "epoch": 55.05, "learning_rate": 2.248306230316784e-05, "loss": 1.964, "step": 19019500 }, { "epoch": 55.06, "learning_rate": 2.2482338655520567e-05, "loss": 1.9653, "step": 19020000 }, { "epoch": 55.06, "learning_rate": 2.248161500787329e-05, "loss": 1.9701, "step": 19020500 }, { "epoch": 55.06, "learning_rate": 2.248089136022601e-05, "loss": 1.9589, "step": 19021000 }, { "epoch": 55.06, "learning_rate": 2.2480167712578734e-05, "loss": 1.953, "step": 19021500 }, { "epoch": 55.06, "learning_rate": 2.2479444064931456e-05, "loss": 1.9579, "step": 19022000 }, { "epoch": 55.06, "learning_rate": 2.2478720417284178e-05, "loss": 1.9843, "step": 19022500 }, { "epoch": 55.06, "learning_rate": 2.2477996769636904e-05, "loss": 1.9589, "step": 19023000 }, { "epoch": 55.07, "learning_rate": 2.247727312198963e-05, "loss": 1.995, "step": 19023500 }, { "epoch": 55.07, "learning_rate": 2.247654947434235e-05, "loss": 1.966, "step": 19024000 }, { "epoch": 55.07, "learning_rate": 2.2475828721285664e-05, "loss": 1.9648, "step": 19024500 }, { "epoch": 55.07, "learning_rate": 2.2475105073638386e-05, "loss": 1.975, "step": 19025000 }, { "epoch": 55.07, "learning_rate": 2.247438142599111e-05, "loss": 1.9534, "step": 19025500 }, { "epoch": 55.07, "learning_rate": 2.247365777834383e-05, "loss": 1.9678, "step": 19026000 }, { "epoch": 55.07, "learning_rate": 2.2472934130696556e-05, "loss": 1.947, "step": 19026500 }, { "epoch": 55.08, "learning_rate": 2.247221048304928e-05, "loss": 1.9623, "step": 19027000 }, { "epoch": 55.08, "learning_rate": 2.2471486835402004e-05, "loss": 1.9635, "step": 19027500 }, { "epoch": 55.08, "learning_rate": 2.2470763187754726e-05, "loss": 1.9532, "step": 19028000 }, { "epoch": 55.08, "learning_rate": 2.247003954010745e-05, "loss": 1.9695, "step": 19028500 }, { "epoch": 55.08, "learning_rate": 2.246931589246017e-05, "loss": 1.9828, "step": 19029000 }, { "epoch": 55.08, "learning_rate": 2.2468592244812893e-05, "loss": 1.9591, "step": 19029500 }, { "epoch": 55.08, "learning_rate": 2.246786859716562e-05, "loss": 1.951, "step": 19030000 }, { "epoch": 55.09, "learning_rate": 2.246714494951834e-05, "loss": 1.9544, "step": 19030500 }, { "epoch": 55.09, "learning_rate": 2.2466421301871066e-05, "loss": 1.9464, "step": 19031000 }, { "epoch": 55.09, "learning_rate": 2.246569765422379e-05, "loss": 1.9733, "step": 19031500 }, { "epoch": 55.09, "learning_rate": 2.2464975453871808e-05, "loss": 1.9509, "step": 19032000 }, { "epoch": 55.09, "learning_rate": 2.246425180622453e-05, "loss": 1.9658, "step": 19032500 }, { "epoch": 55.09, "learning_rate": 2.2463528158577252e-05, "loss": 1.9487, "step": 19033000 }, { "epoch": 55.09, "learning_rate": 2.2462804510929974e-05, "loss": 1.9707, "step": 19033500 }, { "epoch": 55.1, "learning_rate": 2.2462080863282697e-05, "loss": 1.9896, "step": 19034000 }, { "epoch": 55.1, "learning_rate": 2.2461357215635422e-05, "loss": 1.9576, "step": 19034500 }, { "epoch": 55.1, "learning_rate": 2.2460633567988144e-05, "loss": 1.9836, "step": 19035000 }, { "epoch": 55.1, "learning_rate": 2.245990992034087e-05, "loss": 1.9598, "step": 19035500 }, { "epoch": 55.1, "learning_rate": 2.2459186272693592e-05, "loss": 1.9674, "step": 19036000 }, { "epoch": 55.1, "learning_rate": 2.2458462625046315e-05, "loss": 1.9553, "step": 19036500 }, { "epoch": 55.1, "learning_rate": 2.2457738977399037e-05, "loss": 1.9525, "step": 19037000 }, { "epoch": 55.11, "learning_rate": 2.2457016777047056e-05, "loss": 1.9524, "step": 19037500 }, { "epoch": 55.11, "learning_rate": 2.2456293129399778e-05, "loss": 1.9732, "step": 19038000 }, { "epoch": 55.11, "learning_rate": 2.2455569481752504e-05, "loss": 1.9799, "step": 19038500 }, { "epoch": 55.11, "learning_rate": 2.2454845834105226e-05, "loss": 1.9679, "step": 19039000 }, { "epoch": 55.11, "learning_rate": 2.2454123633753245e-05, "loss": 1.9477, "step": 19039500 }, { "epoch": 55.11, "learning_rate": 2.2453399986105967e-05, "loss": 1.9637, "step": 19040000 }, { "epoch": 55.11, "learning_rate": 2.245267633845869e-05, "loss": 1.9647, "step": 19040500 }, { "epoch": 55.12, "learning_rate": 2.245195269081141e-05, "loss": 1.9747, "step": 19041000 }, { "epoch": 55.12, "learning_rate": 2.2451231937754724e-05, "loss": 1.9668, "step": 19041500 }, { "epoch": 55.12, "learning_rate": 2.2450508290107446e-05, "loss": 1.9817, "step": 19042000 }, { "epoch": 55.12, "learning_rate": 2.244978464246017e-05, "loss": 1.9467, "step": 19042500 }, { "epoch": 55.12, "learning_rate": 2.2449060994812894e-05, "loss": 1.9447, "step": 19043000 }, { "epoch": 55.12, "learning_rate": 2.244833734716562e-05, "loss": 1.9744, "step": 19043500 }, { "epoch": 55.12, "learning_rate": 2.244761369951834e-05, "loss": 1.9751, "step": 19044000 }, { "epoch": 55.13, "learning_rate": 2.2446890051871064e-05, "loss": 1.9461, "step": 19044500 }, { "epoch": 55.13, "learning_rate": 2.2446166404223786e-05, "loss": 1.9908, "step": 19045000 }, { "epoch": 55.13, "learning_rate": 2.244544275657651e-05, "loss": 1.9503, "step": 19045500 }, { "epoch": 55.13, "learning_rate": 2.2444719108929234e-05, "loss": 1.9547, "step": 19046000 }, { "epoch": 55.13, "learning_rate": 2.244399546128196e-05, "loss": 1.9534, "step": 19046500 }, { "epoch": 55.13, "learning_rate": 2.2443271813634682e-05, "loss": 1.9649, "step": 19047000 }, { "epoch": 55.13, "learning_rate": 2.2442549613282697e-05, "loss": 1.962, "step": 19047500 }, { "epoch": 55.14, "learning_rate": 2.244182596563542e-05, "loss": 1.9584, "step": 19048000 }, { "epoch": 55.14, "learning_rate": 2.2441102317988145e-05, "loss": 1.9455, "step": 19048500 }, { "epoch": 55.14, "learning_rate": 2.2440378670340867e-05, "loss": 1.9522, "step": 19049000 }, { "epoch": 55.14, "learning_rate": 2.2439655022693593e-05, "loss": 1.9648, "step": 19049500 }, { "epoch": 55.14, "learning_rate": 2.2438931375046315e-05, "loss": 1.9687, "step": 19050000 }, { "epoch": 55.14, "learning_rate": 2.2438209174694334e-05, "loss": 1.9537, "step": 19050500 }, { "epoch": 55.14, "learning_rate": 2.2437485527047057e-05, "loss": 1.9459, "step": 19051000 }, { "epoch": 55.15, "learning_rate": 2.243676187939978e-05, "loss": 1.9512, "step": 19051500 }, { "epoch": 55.15, "learning_rate": 2.24360382317525e-05, "loss": 1.9842, "step": 19052000 }, { "epoch": 55.15, "learning_rate": 2.243531603140052e-05, "loss": 1.955, "step": 19052500 }, { "epoch": 55.15, "learning_rate": 2.2434592383753242e-05, "loss": 1.9827, "step": 19053000 }, { "epoch": 55.15, "learning_rate": 2.2433868736105968e-05, "loss": 1.9712, "step": 19053500 }, { "epoch": 55.15, "learning_rate": 2.243314508845869e-05, "loss": 1.9563, "step": 19054000 }, { "epoch": 55.15, "learning_rate": 2.2432421440811412e-05, "loss": 1.9502, "step": 19054500 }, { "epoch": 55.16, "learning_rate": 2.2431697793164134e-05, "loss": 1.9708, "step": 19055000 }, { "epoch": 55.16, "learning_rate": 2.2430975592812153e-05, "loss": 1.9648, "step": 19055500 }, { "epoch": 55.16, "learning_rate": 2.2430251945164876e-05, "loss": 1.9667, "step": 19056000 }, { "epoch": 55.16, "learning_rate": 2.2429528297517598e-05, "loss": 1.957, "step": 19056500 }, { "epoch": 55.16, "learning_rate": 2.2428804649870324e-05, "loss": 1.9744, "step": 19057000 }, { "epoch": 55.16, "learning_rate": 2.242808100222305e-05, "loss": 1.9822, "step": 19057500 }, { "epoch": 55.17, "learning_rate": 2.242735735457577e-05, "loss": 1.9728, "step": 19058000 }, { "epoch": 55.17, "learning_rate": 2.2426633706928494e-05, "loss": 1.9505, "step": 19058500 }, { "epoch": 55.17, "learning_rate": 2.2425910059281216e-05, "loss": 1.9778, "step": 19059000 }, { "epoch": 55.17, "learning_rate": 2.2425186411633938e-05, "loss": 1.9612, "step": 19059500 }, { "epoch": 55.17, "learning_rate": 2.242446276398666e-05, "loss": 1.9779, "step": 19060000 }, { "epoch": 55.17, "learning_rate": 2.2423739116339386e-05, "loss": 1.952, "step": 19060500 }, { "epoch": 55.17, "learning_rate": 2.2423016915987405e-05, "loss": 1.9618, "step": 19061000 }, { "epoch": 55.18, "learning_rate": 2.2422293268340127e-05, "loss": 1.9835, "step": 19061500 }, { "epoch": 55.18, "learning_rate": 2.242156962069285e-05, "loss": 1.9266, "step": 19062000 }, { "epoch": 55.18, "learning_rate": 2.2420845973045575e-05, "loss": 1.9409, "step": 19062500 }, { "epoch": 55.18, "learning_rate": 2.2420122325398297e-05, "loss": 1.9348, "step": 19063000 }, { "epoch": 55.18, "learning_rate": 2.241939867775102e-05, "loss": 1.961, "step": 19063500 }, { "epoch": 55.18, "learning_rate": 2.241867503010374e-05, "loss": 1.9643, "step": 19064000 }, { "epoch": 55.18, "learning_rate": 2.2417954277047057e-05, "loss": 1.9744, "step": 19064500 }, { "epoch": 55.19, "learning_rate": 2.241723062939978e-05, "loss": 1.9677, "step": 19065000 }, { "epoch": 55.19, "learning_rate": 2.2416506981752502e-05, "loss": 1.975, "step": 19065500 }, { "epoch": 55.19, "learning_rate": 2.241578478140052e-05, "loss": 1.9648, "step": 19066000 }, { "epoch": 55.19, "learning_rate": 2.2415061133753243e-05, "loss": 1.9618, "step": 19066500 }, { "epoch": 55.19, "learning_rate": 2.2414337486105965e-05, "loss": 1.976, "step": 19067000 }, { "epoch": 55.19, "learning_rate": 2.2413613838458687e-05, "loss": 1.9616, "step": 19067500 }, { "epoch": 55.19, "learning_rate": 2.2412890190811413e-05, "loss": 1.9815, "step": 19068000 }, { "epoch": 55.2, "learning_rate": 2.241216654316414e-05, "loss": 1.9621, "step": 19068500 }, { "epoch": 55.2, "learning_rate": 2.241144289551686e-05, "loss": 1.9482, "step": 19069000 }, { "epoch": 55.2, "learning_rate": 2.2410719247869583e-05, "loss": 1.9917, "step": 19069500 }, { "epoch": 55.2, "learning_rate": 2.2409995600222305e-05, "loss": 1.9904, "step": 19070000 }, { "epoch": 55.2, "learning_rate": 2.2409271952575028e-05, "loss": 1.9615, "step": 19070500 }, { "epoch": 55.2, "learning_rate": 2.240854830492775e-05, "loss": 1.9535, "step": 19071000 }, { "epoch": 55.2, "learning_rate": 2.2407824657280475e-05, "loss": 1.9555, "step": 19071500 }, { "epoch": 55.21, "learning_rate": 2.2407102456928494e-05, "loss": 1.9396, "step": 19072000 }, { "epoch": 55.21, "learning_rate": 2.2406378809281217e-05, "loss": 1.9565, "step": 19072500 }, { "epoch": 55.21, "learning_rate": 2.2405656608929236e-05, "loss": 1.9896, "step": 19073000 }, { "epoch": 55.21, "learning_rate": 2.2404932961281958e-05, "loss": 1.954, "step": 19073500 }, { "epoch": 55.21, "learning_rate": 2.240420931363468e-05, "loss": 1.9671, "step": 19074000 }, { "epoch": 55.21, "learning_rate": 2.2403485665987402e-05, "loss": 1.9521, "step": 19074500 }, { "epoch": 55.21, "learning_rate": 2.2402762018340125e-05, "loss": 1.9579, "step": 19075000 }, { "epoch": 55.22, "learning_rate": 2.240203837069285e-05, "loss": 1.9554, "step": 19075500 }, { "epoch": 55.22, "learning_rate": 2.2401314723045576e-05, "loss": 1.9505, "step": 19076000 }, { "epoch": 55.22, "learning_rate": 2.2400591075398298e-05, "loss": 1.984, "step": 19076500 }, { "epoch": 55.22, "learning_rate": 2.2399868875046314e-05, "loss": 1.9782, "step": 19077000 }, { "epoch": 55.22, "learning_rate": 2.239914522739904e-05, "loss": 1.9474, "step": 19077500 }, { "epoch": 55.22, "learning_rate": 2.239842157975176e-05, "loss": 1.9574, "step": 19078000 }, { "epoch": 55.22, "learning_rate": 2.2397697932104484e-05, "loss": 1.9522, "step": 19078500 }, { "epoch": 55.23, "learning_rate": 2.2396974284457206e-05, "loss": 1.992, "step": 19079000 }, { "epoch": 55.23, "learning_rate": 2.239625063680993e-05, "loss": 1.9965, "step": 19079500 }, { "epoch": 55.23, "learning_rate": 2.2395526989162654e-05, "loss": 1.9663, "step": 19080000 }, { "epoch": 55.23, "learning_rate": 2.2394803341515376e-05, "loss": 1.9925, "step": 19080500 }, { "epoch": 55.23, "learning_rate": 2.2394082588458688e-05, "loss": 1.966, "step": 19081000 }, { "epoch": 55.23, "learning_rate": 2.2393358940811414e-05, "loss": 1.9628, "step": 19081500 }, { "epoch": 55.23, "learning_rate": 2.2392635293164136e-05, "loss": 1.9562, "step": 19082000 }, { "epoch": 55.24, "learning_rate": 2.2391911645516858e-05, "loss": 1.9507, "step": 19082500 }, { "epoch": 55.24, "learning_rate": 2.2391187997869584e-05, "loss": 1.9614, "step": 19083000 }, { "epoch": 55.24, "learning_rate": 2.2390464350222306e-05, "loss": 2.0017, "step": 19083500 }, { "epoch": 55.24, "learning_rate": 2.2389742149870325e-05, "loss": 1.9414, "step": 19084000 }, { "epoch": 55.24, "learning_rate": 2.2389018502223047e-05, "loss": 1.9752, "step": 19084500 }, { "epoch": 55.24, "learning_rate": 2.238829485457577e-05, "loss": 1.9895, "step": 19085000 }, { "epoch": 55.24, "learning_rate": 2.2387571206928492e-05, "loss": 1.9748, "step": 19085500 }, { "epoch": 55.25, "learning_rate": 2.2386847559281214e-05, "loss": 1.945, "step": 19086000 }, { "epoch": 55.25, "learning_rate": 2.238612391163394e-05, "loss": 1.9707, "step": 19086500 }, { "epoch": 55.25, "learning_rate": 2.2385400263986665e-05, "loss": 1.9716, "step": 19087000 }, { "epoch": 55.25, "learning_rate": 2.2384676616339388e-05, "loss": 1.9921, "step": 19087500 }, { "epoch": 55.25, "learning_rate": 2.238395296869211e-05, "loss": 1.9445, "step": 19088000 }, { "epoch": 55.25, "learning_rate": 2.238323076834013e-05, "loss": 1.9601, "step": 19088500 }, { "epoch": 55.25, "learning_rate": 2.2382508567988144e-05, "loss": 1.9405, "step": 19089000 }, { "epoch": 55.26, "learning_rate": 2.2381784920340866e-05, "loss": 1.981, "step": 19089500 }, { "epoch": 55.26, "learning_rate": 2.238106127269359e-05, "loss": 1.9534, "step": 19090000 }, { "epoch": 55.26, "learning_rate": 2.2380337625046314e-05, "loss": 1.9784, "step": 19090500 }, { "epoch": 55.26, "learning_rate": 2.237961397739904e-05, "loss": 1.9714, "step": 19091000 }, { "epoch": 55.26, "learning_rate": 2.2378890329751762e-05, "loss": 1.9903, "step": 19091500 }, { "epoch": 55.26, "learning_rate": 2.2378166682104484e-05, "loss": 1.9657, "step": 19092000 }, { "epoch": 55.26, "learning_rate": 2.2377443034457207e-05, "loss": 1.9694, "step": 19092500 }, { "epoch": 55.27, "learning_rate": 2.2376720834105226e-05, "loss": 1.97, "step": 19093000 }, { "epoch": 55.27, "learning_rate": 2.237599863375324e-05, "loss": 1.9678, "step": 19093500 }, { "epoch": 55.27, "learning_rate": 2.237527643340126e-05, "loss": 1.9531, "step": 19094000 }, { "epoch": 55.27, "learning_rate": 2.2374552785753982e-05, "loss": 1.9856, "step": 19094500 }, { "epoch": 55.27, "learning_rate": 2.2373829138106708e-05, "loss": 1.9798, "step": 19095000 }, { "epoch": 55.27, "learning_rate": 2.237310549045943e-05, "loss": 1.9827, "step": 19095500 }, { "epoch": 55.28, "learning_rate": 2.2372381842812152e-05, "loss": 1.9654, "step": 19096000 }, { "epoch": 55.28, "learning_rate": 2.2371658195164878e-05, "loss": 1.9438, "step": 19096500 }, { "epoch": 55.28, "learning_rate": 2.2370935994812894e-05, "loss": 1.9653, "step": 19097000 }, { "epoch": 55.28, "learning_rate": 2.2370212347165616e-05, "loss": 1.9811, "step": 19097500 }, { "epoch": 55.28, "learning_rate": 2.236948869951834e-05, "loss": 1.9664, "step": 19098000 }, { "epoch": 55.28, "learning_rate": 2.2368765051871067e-05, "loss": 1.9678, "step": 19098500 }, { "epoch": 55.28, "learning_rate": 2.236804140422379e-05, "loss": 1.9659, "step": 19099000 }, { "epoch": 55.29, "learning_rate": 2.236731775657651e-05, "loss": 1.9693, "step": 19099500 }, { "epoch": 55.29, "learning_rate": 2.2366594108929234e-05, "loss": 1.9692, "step": 19100000 }, { "epoch": 55.29, "learning_rate": 2.2365871908577253e-05, "loss": 1.9882, "step": 19100500 }, { "epoch": 55.29, "learning_rate": 2.2365148260929975e-05, "loss": 1.9691, "step": 19101000 }, { "epoch": 55.29, "learning_rate": 2.2364424613282697e-05, "loss": 1.9966, "step": 19101500 }, { "epoch": 55.29, "learning_rate": 2.2363700965635423e-05, "loss": 1.9714, "step": 19102000 }, { "epoch": 55.29, "learning_rate": 2.2362977317988145e-05, "loss": 1.963, "step": 19102500 }, { "epoch": 55.3, "learning_rate": 2.2362253670340867e-05, "loss": 1.9778, "step": 19103000 }, { "epoch": 55.3, "learning_rate": 2.2361530022693593e-05, "loss": 1.9709, "step": 19103500 }, { "epoch": 55.3, "learning_rate": 2.2360806375046315e-05, "loss": 1.9737, "step": 19104000 }, { "epoch": 55.3, "learning_rate": 2.236008417469433e-05, "loss": 1.963, "step": 19104500 }, { "epoch": 55.3, "learning_rate": 2.235936197434235e-05, "loss": 1.9613, "step": 19105000 }, { "epoch": 55.3, "learning_rate": 2.2358638326695072e-05, "loss": 1.9469, "step": 19105500 }, { "epoch": 55.3, "learning_rate": 2.2357914679047797e-05, "loss": 1.9806, "step": 19106000 }, { "epoch": 55.31, "learning_rate": 2.235719103140052e-05, "loss": 1.9517, "step": 19106500 }, { "epoch": 55.31, "learning_rate": 2.235646883104854e-05, "loss": 1.9829, "step": 19107000 }, { "epoch": 55.31, "learning_rate": 2.235574518340126e-05, "loss": 1.9848, "step": 19107500 }, { "epoch": 55.31, "learning_rate": 2.2355021535753983e-05, "loss": 1.9532, "step": 19108000 }, { "epoch": 55.31, "learning_rate": 2.2354297888106705e-05, "loss": 1.9775, "step": 19108500 }, { "epoch": 55.31, "learning_rate": 2.235357424045943e-05, "loss": 1.9629, "step": 19109000 }, { "epoch": 55.31, "learning_rate": 2.2352850592812157e-05, "loss": 1.9729, "step": 19109500 }, { "epoch": 55.32, "learning_rate": 2.235212694516488e-05, "loss": 1.9542, "step": 19110000 }, { "epoch": 55.32, "learning_rate": 2.2351404744812894e-05, "loss": 1.9674, "step": 19110500 }, { "epoch": 55.32, "learning_rate": 2.2350681097165617e-05, "loss": 1.9627, "step": 19111000 }, { "epoch": 55.32, "learning_rate": 2.2349957449518342e-05, "loss": 2.0051, "step": 19111500 }, { "epoch": 55.32, "learning_rate": 2.2349233801871064e-05, "loss": 1.9904, "step": 19112000 }, { "epoch": 55.32, "learning_rate": 2.2348510154223787e-05, "loss": 1.9515, "step": 19112500 }, { "epoch": 55.32, "learning_rate": 2.23477894011671e-05, "loss": 1.951, "step": 19113000 }, { "epoch": 55.33, "learning_rate": 2.234706575351982e-05, "loss": 1.9725, "step": 19113500 }, { "epoch": 55.33, "learning_rate": 2.2346342105872547e-05, "loss": 1.9871, "step": 19114000 }, { "epoch": 55.33, "learning_rate": 2.234561845822527e-05, "loss": 1.9456, "step": 19114500 }, { "epoch": 55.33, "learning_rate": 2.2344894810577995e-05, "loss": 1.9585, "step": 19115000 }, { "epoch": 55.33, "learning_rate": 2.2344171162930717e-05, "loss": 1.9569, "step": 19115500 }, { "epoch": 55.33, "learning_rate": 2.234344751528344e-05, "loss": 1.9788, "step": 19116000 }, { "epoch": 55.33, "learning_rate": 2.234272386763616e-05, "loss": 1.9691, "step": 19116500 }, { "epoch": 55.34, "learning_rate": 2.2342000219988884e-05, "loss": 1.9483, "step": 19117000 }, { "epoch": 55.34, "learning_rate": 2.234127657234161e-05, "loss": 1.9686, "step": 19117500 }, { "epoch": 55.34, "learning_rate": 2.234055292469433e-05, "loss": 1.9753, "step": 19118000 }, { "epoch": 55.34, "learning_rate": 2.2339829277047057e-05, "loss": 1.9617, "step": 19118500 }, { "epoch": 55.34, "learning_rate": 2.233910562939978e-05, "loss": 1.97, "step": 19119000 }, { "epoch": 55.34, "learning_rate": 2.2338383429047795e-05, "loss": 1.9826, "step": 19119500 }, { "epoch": 55.34, "learning_rate": 2.2337661228695814e-05, "loss": 1.955, "step": 19120000 }, { "epoch": 55.35, "learning_rate": 2.2336937581048536e-05, "loss": 1.9608, "step": 19120500 }, { "epoch": 55.35, "learning_rate": 2.233621393340126e-05, "loss": 1.9763, "step": 19121000 }, { "epoch": 55.35, "learning_rate": 2.2335490285753984e-05, "loss": 1.9254, "step": 19121500 }, { "epoch": 55.35, "learning_rate": 2.2334768085402003e-05, "loss": 1.9796, "step": 19122000 }, { "epoch": 55.35, "learning_rate": 2.2334044437754725e-05, "loss": 1.999, "step": 19122500 }, { "epoch": 55.35, "learning_rate": 2.2333320790107447e-05, "loss": 1.9921, "step": 19123000 }, { "epoch": 55.35, "learning_rate": 2.233259714246017e-05, "loss": 1.9694, "step": 19123500 }, { "epoch": 55.36, "learning_rate": 2.2331873494812895e-05, "loss": 1.9533, "step": 19124000 }, { "epoch": 55.36, "learning_rate": 2.2331149847165617e-05, "loss": 1.9675, "step": 19124500 }, { "epoch": 55.36, "learning_rate": 2.2330426199518343e-05, "loss": 1.9748, "step": 19125000 }, { "epoch": 55.36, "learning_rate": 2.2329702551871065e-05, "loss": 1.9685, "step": 19125500 }, { "epoch": 55.36, "learning_rate": 2.2328980351519084e-05, "loss": 1.9715, "step": 19126000 }, { "epoch": 55.36, "learning_rate": 2.2328256703871806e-05, "loss": 1.9981, "step": 19126500 }, { "epoch": 55.36, "learning_rate": 2.232753305622453e-05, "loss": 1.9877, "step": 19127000 }, { "epoch": 55.37, "learning_rate": 2.232680940857725e-05, "loss": 1.9674, "step": 19127500 }, { "epoch": 55.37, "learning_rate": 2.2326085760929973e-05, "loss": 1.9577, "step": 19128000 }, { "epoch": 55.37, "learning_rate": 2.23253621132827e-05, "loss": 2.005, "step": 19128500 }, { "epoch": 55.37, "learning_rate": 2.2324639912930718e-05, "loss": 1.9778, "step": 19129000 }, { "epoch": 55.37, "learning_rate": 2.232391626528344e-05, "loss": 1.9715, "step": 19129500 }, { "epoch": 55.37, "learning_rate": 2.2323192617636162e-05, "loss": 1.9619, "step": 19130000 }, { "epoch": 55.37, "learning_rate": 2.2322468969988884e-05, "loss": 1.9745, "step": 19130500 }, { "epoch": 55.38, "learning_rate": 2.232174532234161e-05, "loss": 1.9737, "step": 19131000 }, { "epoch": 55.38, "learning_rate": 2.2321021674694332e-05, "loss": 1.9717, "step": 19131500 }, { "epoch": 55.38, "learning_rate": 2.2320298027047058e-05, "loss": 1.9764, "step": 19132000 }, { "epoch": 55.38, "learning_rate": 2.231957437939978e-05, "loss": 1.9552, "step": 19132500 }, { "epoch": 55.38, "learning_rate": 2.2318850731752502e-05, "loss": 1.9635, "step": 19133000 }, { "epoch": 55.38, "learning_rate": 2.2318127084105225e-05, "loss": 1.9531, "step": 19133500 }, { "epoch": 55.39, "learning_rate": 2.2317403436457947e-05, "loss": 1.9886, "step": 19134000 }, { "epoch": 55.39, "learning_rate": 2.2316679788810672e-05, "loss": 1.9496, "step": 19134500 }, { "epoch": 55.39, "learning_rate": 2.2315957588458688e-05, "loss": 1.9717, "step": 19135000 }, { "epoch": 55.39, "learning_rate": 2.231523394081141e-05, "loss": 1.9658, "step": 19135500 }, { "epoch": 55.39, "learning_rate": 2.2314510293164136e-05, "loss": 1.966, "step": 19136000 }, { "epoch": 55.39, "learning_rate": 2.231378664551686e-05, "loss": 2.011, "step": 19136500 }, { "epoch": 55.39, "learning_rate": 2.2313062997869584e-05, "loss": 1.9665, "step": 19137000 }, { "epoch": 55.4, "learning_rate": 2.2312339350222306e-05, "loss": 1.9842, "step": 19137500 }, { "epoch": 55.4, "learning_rate": 2.2311618597165618e-05, "loss": 1.9702, "step": 19138000 }, { "epoch": 55.4, "learning_rate": 2.2310896396813634e-05, "loss": 1.9519, "step": 19138500 }, { "epoch": 55.4, "learning_rate": 2.231017274916636e-05, "loss": 1.9801, "step": 19139000 }, { "epoch": 55.4, "learning_rate": 2.230944910151908e-05, "loss": 1.9716, "step": 19139500 }, { "epoch": 55.4, "learning_rate": 2.2308725453871807e-05, "loss": 1.9662, "step": 19140000 }, { "epoch": 55.4, "learning_rate": 2.2308003253519823e-05, "loss": 1.9525, "step": 19140500 }, { "epoch": 55.41, "learning_rate": 2.230727960587255e-05, "loss": 1.9328, "step": 19141000 }, { "epoch": 55.41, "learning_rate": 2.230655595822527e-05, "loss": 1.971, "step": 19141500 }, { "epoch": 55.41, "learning_rate": 2.2305832310577993e-05, "loss": 1.9589, "step": 19142000 }, { "epoch": 55.41, "learning_rate": 2.2305108662930715e-05, "loss": 1.9762, "step": 19142500 }, { "epoch": 55.41, "learning_rate": 2.2304385015283437e-05, "loss": 1.9798, "step": 19143000 }, { "epoch": 55.41, "learning_rate": 2.2303662814931456e-05, "loss": 1.9882, "step": 19143500 }, { "epoch": 55.41, "learning_rate": 2.2302939167284182e-05, "loss": 1.9632, "step": 19144000 }, { "epoch": 55.42, "learning_rate": 2.2302215519636904e-05, "loss": 1.9577, "step": 19144500 }, { "epoch": 55.42, "learning_rate": 2.2301493319284923e-05, "loss": 1.9831, "step": 19145000 }, { "epoch": 55.42, "learning_rate": 2.2300769671637645e-05, "loss": 1.9851, "step": 19145500 }, { "epoch": 55.42, "learning_rate": 2.2300046023990368e-05, "loss": 1.9521, "step": 19146000 }, { "epoch": 55.42, "learning_rate": 2.229932237634309e-05, "loss": 1.9861, "step": 19146500 }, { "epoch": 55.42, "learning_rate": 2.2298598728695812e-05, "loss": 1.9621, "step": 19147000 }, { "epoch": 55.42, "learning_rate": 2.2297875081048538e-05, "loss": 1.979, "step": 19147500 }, { "epoch": 55.43, "learning_rate": 2.229715143340126e-05, "loss": 1.9594, "step": 19148000 }, { "epoch": 55.43, "learning_rate": 2.2296427785753986e-05, "loss": 1.9741, "step": 19148500 }, { "epoch": 55.43, "learning_rate": 2.2295704138106708e-05, "loss": 1.9681, "step": 19149000 }, { "epoch": 55.43, "learning_rate": 2.229498049045943e-05, "loss": 1.9802, "step": 19149500 }, { "epoch": 55.43, "learning_rate": 2.229425829010745e-05, "loss": 1.9597, "step": 19150000 }, { "epoch": 55.43, "learning_rate": 2.229353464246017e-05, "loss": 1.9792, "step": 19150500 }, { "epoch": 55.43, "learning_rate": 2.2292810994812897e-05, "loss": 1.9746, "step": 19151000 }, { "epoch": 55.44, "learning_rate": 2.229208734716562e-05, "loss": 1.9711, "step": 19151500 }, { "epoch": 55.44, "learning_rate": 2.229136369951834e-05, "loss": 1.9759, "step": 19152000 }, { "epoch": 55.44, "learning_rate": 2.229064149916636e-05, "loss": 1.9741, "step": 19152500 }, { "epoch": 55.44, "learning_rate": 2.2289917851519082e-05, "loss": 1.9543, "step": 19153000 }, { "epoch": 55.44, "learning_rate": 2.2289194203871805e-05, "loss": 1.9704, "step": 19153500 }, { "epoch": 55.44, "learning_rate": 2.2288470556224527e-05, "loss": 1.9674, "step": 19154000 }, { "epoch": 55.44, "learning_rate": 2.228774690857725e-05, "loss": 1.9699, "step": 19154500 }, { "epoch": 55.45, "learning_rate": 2.2287023260929975e-05, "loss": 1.9622, "step": 19155000 }, { "epoch": 55.45, "learning_rate": 2.22862996132827e-05, "loss": 1.9614, "step": 19155500 }, { "epoch": 55.45, "learning_rate": 2.2285575965635423e-05, "loss": 1.9735, "step": 19156000 }, { "epoch": 55.45, "learning_rate": 2.2284852317988145e-05, "loss": 1.9586, "step": 19156500 }, { "epoch": 55.45, "learning_rate": 2.2284128670340867e-05, "loss": 1.9812, "step": 19157000 }, { "epoch": 55.45, "learning_rate": 2.228340502269359e-05, "loss": 1.9681, "step": 19157500 }, { "epoch": 55.45, "learning_rate": 2.228268137504631e-05, "loss": 1.9734, "step": 19158000 }, { "epoch": 55.46, "learning_rate": 2.2281959174694334e-05, "loss": 1.9999, "step": 19158500 }, { "epoch": 55.46, "learning_rate": 2.2281235527047056e-05, "loss": 1.9656, "step": 19159000 }, { "epoch": 55.46, "learning_rate": 2.228051187939978e-05, "loss": 1.9678, "step": 19159500 }, { "epoch": 55.46, "learning_rate": 2.22797882317525e-05, "loss": 1.9605, "step": 19160000 }, { "epoch": 55.46, "learning_rate": 2.2279064584105226e-05, "loss": 1.9665, "step": 19160500 }, { "epoch": 55.46, "learning_rate": 2.227834093645795e-05, "loss": 1.9725, "step": 19161000 }, { "epoch": 55.46, "learning_rate": 2.227761728881067e-05, "loss": 1.9716, "step": 19161500 }, { "epoch": 55.47, "learning_rate": 2.227689508845869e-05, "loss": 1.991, "step": 19162000 }, { "epoch": 55.47, "learning_rate": 2.2276171440811415e-05, "loss": 1.9843, "step": 19162500 }, { "epoch": 55.47, "learning_rate": 2.227544924045943e-05, "loss": 1.9714, "step": 19163000 }, { "epoch": 55.47, "learning_rate": 2.2274725592812153e-05, "loss": 1.9708, "step": 19163500 }, { "epoch": 55.47, "learning_rate": 2.2274001945164875e-05, "loss": 1.9525, "step": 19164000 }, { "epoch": 55.47, "learning_rate": 2.22732782975176e-05, "loss": 1.9675, "step": 19164500 }, { "epoch": 55.47, "learning_rate": 2.2272554649870323e-05, "loss": 1.9911, "step": 19165000 }, { "epoch": 55.48, "learning_rate": 2.2271831002223045e-05, "loss": 1.9776, "step": 19165500 }, { "epoch": 55.48, "learning_rate": 2.227110735457577e-05, "loss": 1.9788, "step": 19166000 }, { "epoch": 55.48, "learning_rate": 2.2270383706928493e-05, "loss": 1.9908, "step": 19166500 }, { "epoch": 55.48, "learning_rate": 2.2269660059281215e-05, "loss": 1.9539, "step": 19167000 }, { "epoch": 55.48, "learning_rate": 2.226893641163394e-05, "loss": 1.967, "step": 19167500 }, { "epoch": 55.48, "learning_rate": 2.2268214211281957e-05, "loss": 1.9919, "step": 19168000 }, { "epoch": 55.48, "learning_rate": 2.2267492010929976e-05, "loss": 1.9673, "step": 19168500 }, { "epoch": 55.49, "learning_rate": 2.226676981057799e-05, "loss": 1.9523, "step": 19169000 }, { "epoch": 55.49, "learning_rate": 2.2266046162930713e-05, "loss": 1.9853, "step": 19169500 }, { "epoch": 55.49, "learning_rate": 2.226532251528344e-05, "loss": 1.9496, "step": 19170000 }, { "epoch": 55.49, "learning_rate": 2.2264600314931458e-05, "loss": 1.9665, "step": 19170500 }, { "epoch": 55.49, "learning_rate": 2.226387666728418e-05, "loss": 1.9872, "step": 19171000 }, { "epoch": 55.49, "learning_rate": 2.2263153019636902e-05, "loss": 1.9616, "step": 19171500 }, { "epoch": 55.5, "learning_rate": 2.2262429371989628e-05, "loss": 1.9834, "step": 19172000 }, { "epoch": 55.5, "learning_rate": 2.226170572434235e-05, "loss": 1.9698, "step": 19172500 }, { "epoch": 55.5, "learning_rate": 2.2260982076695072e-05, "loss": 1.9507, "step": 19173000 }, { "epoch": 55.5, "learning_rate": 2.2260258429047798e-05, "loss": 1.9624, "step": 19173500 }, { "epoch": 55.5, "learning_rate": 2.225953478140052e-05, "loss": 1.9772, "step": 19174000 }, { "epoch": 55.5, "learning_rate": 2.225881258104854e-05, "loss": 1.9515, "step": 19174500 }, { "epoch": 55.5, "learning_rate": 2.225808893340126e-05, "loss": 1.9832, "step": 19175000 }, { "epoch": 55.51, "learning_rate": 2.2257365285753984e-05, "loss": 1.9881, "step": 19175500 }, { "epoch": 55.51, "learning_rate": 2.2256641638106706e-05, "loss": 1.9797, "step": 19176000 }, { "epoch": 55.51, "learning_rate": 2.2255917990459428e-05, "loss": 1.9531, "step": 19176500 }, { "epoch": 55.51, "learning_rate": 2.2255194342812154e-05, "loss": 1.9484, "step": 19177000 }, { "epoch": 55.51, "learning_rate": 2.225447069516488e-05, "loss": 1.9694, "step": 19177500 }, { "epoch": 55.51, "learning_rate": 2.2253749942108192e-05, "loss": 1.9642, "step": 19178000 }, { "epoch": 55.51, "learning_rate": 2.2253026294460914e-05, "loss": 1.9838, "step": 19178500 }, { "epoch": 55.52, "learning_rate": 2.2252302646813636e-05, "loss": 1.953, "step": 19179000 }, { "epoch": 55.52, "learning_rate": 2.225157899916636e-05, "loss": 1.9897, "step": 19179500 }, { "epoch": 55.52, "learning_rate": 2.2250856798814377e-05, "loss": 1.9698, "step": 19180000 }, { "epoch": 55.52, "learning_rate": 2.22501331511671e-05, "loss": 1.9551, "step": 19180500 }, { "epoch": 55.52, "learning_rate": 2.2249409503519822e-05, "loss": 1.9529, "step": 19181000 }, { "epoch": 55.52, "learning_rate": 2.2248685855872547e-05, "loss": 1.9917, "step": 19181500 }, { "epoch": 55.52, "learning_rate": 2.224796220822527e-05, "loss": 1.9372, "step": 19182000 }, { "epoch": 55.53, "learning_rate": 2.2247238560577992e-05, "loss": 1.972, "step": 19182500 }, { "epoch": 55.53, "learning_rate": 2.2246514912930718e-05, "loss": 1.9881, "step": 19183000 }, { "epoch": 55.53, "learning_rate": 2.224579126528344e-05, "loss": 1.9931, "step": 19183500 }, { "epoch": 55.53, "learning_rate": 2.2245067617636162e-05, "loss": 1.9481, "step": 19184000 }, { "epoch": 55.53, "learning_rate": 2.2244343969988884e-05, "loss": 1.9564, "step": 19184500 }, { "epoch": 55.53, "learning_rate": 2.224362032234161e-05, "loss": 1.9781, "step": 19185000 }, { "epoch": 55.53, "learning_rate": 2.2242896674694332e-05, "loss": 1.9885, "step": 19185500 }, { "epoch": 55.54, "learning_rate": 2.2242173027047054e-05, "loss": 1.9958, "step": 19186000 }, { "epoch": 55.54, "learning_rate": 2.224144937939978e-05, "loss": 1.9826, "step": 19186500 }, { "epoch": 55.54, "learning_rate": 2.2240725731752502e-05, "loss": 1.9864, "step": 19187000 }, { "epoch": 55.54, "learning_rate": 2.2240002084105224e-05, "loss": 1.9659, "step": 19187500 }, { "epoch": 55.54, "learning_rate": 2.223927843645795e-05, "loss": 1.9596, "step": 19188000 }, { "epoch": 55.54, "learning_rate": 2.223855623610597e-05, "loss": 1.9772, "step": 19188500 }, { "epoch": 55.54, "learning_rate": 2.2237834035753985e-05, "loss": 1.9786, "step": 19189000 }, { "epoch": 55.55, "learning_rate": 2.2237110388106707e-05, "loss": 2.0, "step": 19189500 }, { "epoch": 55.55, "learning_rate": 2.223638674045943e-05, "loss": 1.9451, "step": 19190000 }, { "epoch": 55.55, "learning_rate": 2.2235663092812155e-05, "loss": 1.9878, "step": 19190500 }, { "epoch": 55.55, "learning_rate": 2.2234939445164877e-05, "loss": 1.9642, "step": 19191000 }, { "epoch": 55.55, "learning_rate": 2.22342157975176e-05, "loss": 1.9613, "step": 19191500 }, { "epoch": 55.55, "learning_rate": 2.2233492149870325e-05, "loss": 1.9537, "step": 19192000 }, { "epoch": 55.55, "learning_rate": 2.2232771396813637e-05, "loss": 1.9756, "step": 19192500 }, { "epoch": 55.56, "learning_rate": 2.223204774916636e-05, "loss": 1.9688, "step": 19193000 }, { "epoch": 55.56, "learning_rate": 2.223132410151908e-05, "loss": 1.9654, "step": 19193500 }, { "epoch": 55.56, "learning_rate": 2.2230600453871807e-05, "loss": 1.9769, "step": 19194000 }, { "epoch": 55.56, "learning_rate": 2.222987680622453e-05, "loss": 1.9699, "step": 19194500 }, { "epoch": 55.56, "learning_rate": 2.222915315857725e-05, "loss": 1.9761, "step": 19195000 }, { "epoch": 55.56, "learning_rate": 2.2228429510929974e-05, "loss": 1.9728, "step": 19195500 }, { "epoch": 55.56, "learning_rate": 2.22277058632827e-05, "loss": 1.9707, "step": 19196000 }, { "epoch": 55.57, "learning_rate": 2.222698221563542e-05, "loss": 1.964, "step": 19196500 }, { "epoch": 55.57, "learning_rate": 2.2226261462578734e-05, "loss": 1.9599, "step": 19197000 }, { "epoch": 55.57, "learning_rate": 2.2225537814931456e-05, "loss": 1.9505, "step": 19197500 }, { "epoch": 55.57, "learning_rate": 2.2224814167284182e-05, "loss": 1.9927, "step": 19198000 }, { "epoch": 55.57, "learning_rate": 2.2224090519636904e-05, "loss": 1.9644, "step": 19198500 }, { "epoch": 55.57, "learning_rate": 2.2223366871989626e-05, "loss": 1.9925, "step": 19199000 }, { "epoch": 55.57, "learning_rate": 2.222264322434235e-05, "loss": 1.9408, "step": 19199500 }, { "epoch": 55.58, "learning_rate": 2.2221919576695074e-05, "loss": 1.981, "step": 19200000 }, { "epoch": 55.58, "learning_rate": 2.2221195929047796e-05, "loss": 1.9765, "step": 19200500 }, { "epoch": 55.58, "learning_rate": 2.2220473728695815e-05, "loss": 1.9599, "step": 19201000 }, { "epoch": 55.58, "learning_rate": 2.2219750081048537e-05, "loss": 1.944, "step": 19201500 }, { "epoch": 55.58, "learning_rate": 2.221902643340126e-05, "loss": 1.981, "step": 19202000 }, { "epoch": 55.58, "learning_rate": 2.2218302785753982e-05, "loss": 1.9578, "step": 19202500 }, { "epoch": 55.58, "learning_rate": 2.2217579138106708e-05, "loss": 1.9875, "step": 19203000 }, { "epoch": 55.59, "learning_rate": 2.2216855490459433e-05, "loss": 2.0035, "step": 19203500 }, { "epoch": 55.59, "learning_rate": 2.2216131842812155e-05, "loss": 1.9774, "step": 19204000 }, { "epoch": 55.59, "learning_rate": 2.2215408195164878e-05, "loss": 1.9562, "step": 19204500 }, { "epoch": 55.59, "learning_rate": 2.22146845475176e-05, "loss": 1.976, "step": 19205000 }, { "epoch": 55.59, "learning_rate": 2.2213960899870322e-05, "loss": 1.9784, "step": 19205500 }, { "epoch": 55.59, "learning_rate": 2.221323869951834e-05, "loss": 1.9634, "step": 19206000 }, { "epoch": 55.59, "learning_rate": 2.2212516499166357e-05, "loss": 1.9742, "step": 19206500 }, { "epoch": 55.6, "learning_rate": 2.2211792851519082e-05, "loss": 1.9518, "step": 19207000 }, { "epoch": 55.6, "learning_rate": 2.2211069203871808e-05, "loss": 1.9576, "step": 19207500 }, { "epoch": 55.6, "learning_rate": 2.221034555622453e-05, "loss": 1.931, "step": 19208000 }, { "epoch": 55.6, "learning_rate": 2.2209621908577252e-05, "loss": 1.9501, "step": 19208500 }, { "epoch": 55.6, "learning_rate": 2.220889970822527e-05, "loss": 1.9702, "step": 19209000 }, { "epoch": 55.6, "learning_rate": 2.2208176060577993e-05, "loss": 1.9813, "step": 19209500 }, { "epoch": 55.61, "learning_rate": 2.2207452412930716e-05, "loss": 1.9579, "step": 19210000 }, { "epoch": 55.61, "learning_rate": 2.2206728765283438e-05, "loss": 1.9628, "step": 19210500 }, { "epoch": 55.61, "learning_rate": 2.2206005117636164e-05, "loss": 1.9694, "step": 19211000 }, { "epoch": 55.61, "learning_rate": 2.2205281469988886e-05, "loss": 1.9811, "step": 19211500 }, { "epoch": 55.61, "learning_rate": 2.2204557822341608e-05, "loss": 1.9544, "step": 19212000 }, { "epoch": 55.61, "learning_rate": 2.2203834174694334e-05, "loss": 1.9718, "step": 19212500 }, { "epoch": 55.61, "learning_rate": 2.2203110527047056e-05, "loss": 1.9901, "step": 19213000 }, { "epoch": 55.62, "learning_rate": 2.2202386879399778e-05, "loss": 1.9665, "step": 19213500 }, { "epoch": 55.62, "learning_rate": 2.22016632317525e-05, "loss": 1.9769, "step": 19214000 }, { "epoch": 55.62, "learning_rate": 2.2200939584105226e-05, "loss": 1.9847, "step": 19214500 }, { "epoch": 55.62, "learning_rate": 2.2200215936457948e-05, "loss": 1.9539, "step": 19215000 }, { "epoch": 55.62, "learning_rate": 2.219949228881067e-05, "loss": 1.9736, "step": 19215500 }, { "epoch": 55.62, "learning_rate": 2.2198768641163396e-05, "loss": 1.9697, "step": 19216000 }, { "epoch": 55.62, "learning_rate": 2.219804644081141e-05, "loss": 1.9696, "step": 19216500 }, { "epoch": 55.63, "learning_rate": 2.2197322793164134e-05, "loss": 1.9756, "step": 19217000 }, { "epoch": 55.63, "learning_rate": 2.2196600592812153e-05, "loss": 1.9698, "step": 19217500 }, { "epoch": 55.63, "learning_rate": 2.2195876945164875e-05, "loss": 1.9561, "step": 19218000 }, { "epoch": 55.63, "learning_rate": 2.21951532975176e-05, "loss": 1.9648, "step": 19218500 }, { "epoch": 55.63, "learning_rate": 2.2194429649870323e-05, "loss": 1.9708, "step": 19219000 }, { "epoch": 55.63, "learning_rate": 2.219370600222305e-05, "loss": 1.9712, "step": 19219500 }, { "epoch": 55.63, "learning_rate": 2.219298235457577e-05, "loss": 1.974, "step": 19220000 }, { "epoch": 55.64, "learning_rate": 2.2192260154223786e-05, "loss": 1.9675, "step": 19220500 }, { "epoch": 55.64, "learning_rate": 2.219153650657651e-05, "loss": 1.9709, "step": 19221000 }, { "epoch": 55.64, "learning_rate": 2.2190812858929234e-05, "loss": 1.9767, "step": 19221500 }, { "epoch": 55.64, "learning_rate": 2.219008921128196e-05, "loss": 1.9769, "step": 19222000 }, { "epoch": 55.64, "learning_rate": 2.2189365563634682e-05, "loss": 1.968, "step": 19222500 }, { "epoch": 55.64, "learning_rate": 2.2188641915987404e-05, "loss": 1.9737, "step": 19223000 }, { "epoch": 55.64, "learning_rate": 2.2187918268340126e-05, "loss": 1.9645, "step": 19223500 }, { "epoch": 55.65, "learning_rate": 2.218719462069285e-05, "loss": 1.9678, "step": 19224000 }, { "epoch": 55.65, "learning_rate": 2.2186472420340868e-05, "loss": 2.0016, "step": 19224500 }, { "epoch": 55.65, "learning_rate": 2.218574877269359e-05, "loss": 1.9635, "step": 19225000 }, { "epoch": 55.65, "learning_rate": 2.2185025125046312e-05, "loss": 1.9921, "step": 19225500 }, { "epoch": 55.65, "learning_rate": 2.2184301477399038e-05, "loss": 1.9699, "step": 19226000 }, { "epoch": 55.65, "learning_rate": 2.218357782975176e-05, "loss": 1.9933, "step": 19226500 }, { "epoch": 55.65, "learning_rate": 2.2182854182104486e-05, "loss": 1.9863, "step": 19227000 }, { "epoch": 55.66, "learning_rate": 2.2182130534457208e-05, "loss": 1.9668, "step": 19227500 }, { "epoch": 55.66, "learning_rate": 2.218140688680993e-05, "loss": 2.0179, "step": 19228000 }, { "epoch": 55.66, "learning_rate": 2.2180683239162652e-05, "loss": 1.9674, "step": 19228500 }, { "epoch": 55.66, "learning_rate": 2.217996103881067e-05, "loss": 1.9657, "step": 19229000 }, { "epoch": 55.66, "learning_rate": 2.2179237391163397e-05, "loss": 2.0042, "step": 19229500 }, { "epoch": 55.66, "learning_rate": 2.217851374351612e-05, "loss": 1.9898, "step": 19230000 }, { "epoch": 55.66, "learning_rate": 2.217779009586884e-05, "loss": 1.9862, "step": 19230500 }, { "epoch": 55.67, "learning_rate": 2.2177066448221564e-05, "loss": 1.9672, "step": 19231000 }, { "epoch": 55.67, "learning_rate": 2.2176344247869583e-05, "loss": 1.9539, "step": 19231500 }, { "epoch": 55.67, "learning_rate": 2.2175620600222305e-05, "loss": 1.9895, "step": 19232000 }, { "epoch": 55.67, "learning_rate": 2.2174896952575027e-05, "loss": 1.9724, "step": 19232500 }, { "epoch": 55.67, "learning_rate": 2.2174173304927753e-05, "loss": 1.9628, "step": 19233000 }, { "epoch": 55.67, "learning_rate": 2.217345110457577e-05, "loss": 1.9613, "step": 19233500 }, { "epoch": 55.67, "learning_rate": 2.2172727456928494e-05, "loss": 1.9611, "step": 19234000 }, { "epoch": 55.68, "learning_rate": 2.2172003809281216e-05, "loss": 1.9654, "step": 19234500 }, { "epoch": 55.68, "learning_rate": 2.2171280161633938e-05, "loss": 1.9459, "step": 19235000 }, { "epoch": 55.68, "learning_rate": 2.2170557961281957e-05, "loss": 1.9587, "step": 19235500 }, { "epoch": 55.68, "learning_rate": 2.216983431363468e-05, "loss": 1.9408, "step": 19236000 }, { "epoch": 55.68, "learning_rate": 2.21691106659874e-05, "loss": 1.9651, "step": 19236500 }, { "epoch": 55.68, "learning_rate": 2.2168387018340127e-05, "loss": 1.9767, "step": 19237000 }, { "epoch": 55.68, "learning_rate": 2.216766337069285e-05, "loss": 1.962, "step": 19237500 }, { "epoch": 55.69, "learning_rate": 2.216694117034087e-05, "loss": 1.9853, "step": 19238000 }, { "epoch": 55.69, "learning_rate": 2.216621752269359e-05, "loss": 1.9753, "step": 19238500 }, { "epoch": 55.69, "learning_rate": 2.216549532234161e-05, "loss": 1.9981, "step": 19239000 }, { "epoch": 55.69, "learning_rate": 2.2164771674694332e-05, "loss": 1.9844, "step": 19239500 }, { "epoch": 55.69, "learning_rate": 2.2164048027047054e-05, "loss": 1.9816, "step": 19240000 }, { "epoch": 55.69, "learning_rate": 2.2163325826695073e-05, "loss": 1.9725, "step": 19240500 }, { "epoch": 55.69, "learning_rate": 2.21626021790478e-05, "loss": 1.9583, "step": 19241000 }, { "epoch": 55.7, "learning_rate": 2.216187853140052e-05, "loss": 2.0042, "step": 19241500 }, { "epoch": 55.7, "learning_rate": 2.2161154883753243e-05, "loss": 1.9533, "step": 19242000 }, { "epoch": 55.7, "learning_rate": 2.2160431236105965e-05, "loss": 1.9512, "step": 19242500 }, { "epoch": 55.7, "learning_rate": 2.2159707588458688e-05, "loss": 1.9887, "step": 19243000 }, { "epoch": 55.7, "learning_rate": 2.2158983940811413e-05, "loss": 1.962, "step": 19243500 }, { "epoch": 55.7, "learning_rate": 2.2158260293164135e-05, "loss": 1.9693, "step": 19244000 }, { "epoch": 55.7, "learning_rate": 2.2157538092812154e-05, "loss": 1.9691, "step": 19244500 }, { "epoch": 55.71, "learning_rate": 2.2156814445164877e-05, "loss": 1.9954, "step": 19245000 }, { "epoch": 55.71, "learning_rate": 2.2156092244812896e-05, "loss": 1.9551, "step": 19245500 }, { "epoch": 55.71, "learning_rate": 2.2155368597165618e-05, "loss": 1.9794, "step": 19246000 }, { "epoch": 55.71, "learning_rate": 2.215464494951834e-05, "loss": 1.9833, "step": 19246500 }, { "epoch": 55.71, "learning_rate": 2.2153921301871062e-05, "loss": 1.9619, "step": 19247000 }, { "epoch": 55.71, "learning_rate": 2.2153197654223788e-05, "loss": 1.9624, "step": 19247500 }, { "epoch": 55.72, "learning_rate": 2.215247400657651e-05, "loss": 1.978, "step": 19248000 }, { "epoch": 55.72, "learning_rate": 2.2151750358929236e-05, "loss": 1.9643, "step": 19248500 }, { "epoch": 55.72, "learning_rate": 2.2151026711281958e-05, "loss": 1.9695, "step": 19249000 }, { "epoch": 55.72, "learning_rate": 2.215030306363468e-05, "loss": 1.9683, "step": 19249500 }, { "epoch": 55.72, "learning_rate": 2.2149579415987402e-05, "loss": 1.9747, "step": 19250000 }, { "epoch": 55.72, "learning_rate": 2.214885721563542e-05, "loss": 1.9733, "step": 19250500 }, { "epoch": 55.72, "learning_rate": 2.2148133567988144e-05, "loss": 2.0025, "step": 19251000 }, { "epoch": 55.73, "learning_rate": 2.2147409920340866e-05, "loss": 1.9558, "step": 19251500 }, { "epoch": 55.73, "learning_rate": 2.214668627269359e-05, "loss": 1.986, "step": 19252000 }, { "epoch": 55.73, "learning_rate": 2.2145962625046314e-05, "loss": 1.9705, "step": 19252500 }, { "epoch": 55.73, "learning_rate": 2.214523897739904e-05, "loss": 1.9785, "step": 19253000 }, { "epoch": 55.73, "learning_rate": 2.214451822434235e-05, "loss": 1.9751, "step": 19253500 }, { "epoch": 55.73, "learning_rate": 2.2143794576695074e-05, "loss": 1.971, "step": 19254000 }, { "epoch": 55.73, "learning_rate": 2.2143070929047796e-05, "loss": 2.01, "step": 19254500 }, { "epoch": 55.74, "learning_rate": 2.214234728140052e-05, "loss": 1.9996, "step": 19255000 }, { "epoch": 55.74, "learning_rate": 2.214162363375324e-05, "loss": 1.9545, "step": 19255500 }, { "epoch": 55.74, "learning_rate": 2.2140899986105966e-05, "loss": 1.9642, "step": 19256000 }, { "epoch": 55.74, "learning_rate": 2.2140176338458692e-05, "loss": 1.9928, "step": 19256500 }, { "epoch": 55.74, "learning_rate": 2.2139452690811414e-05, "loss": 1.9499, "step": 19257000 }, { "epoch": 55.74, "learning_rate": 2.2138729043164136e-05, "loss": 1.9655, "step": 19257500 }, { "epoch": 55.74, "learning_rate": 2.213800539551686e-05, "loss": 1.9843, "step": 19258000 }, { "epoch": 55.75, "learning_rate": 2.213728174786958e-05, "loss": 1.9739, "step": 19258500 }, { "epoch": 55.75, "learning_rate": 2.21365595475176e-05, "loss": 1.9626, "step": 19259000 }, { "epoch": 55.75, "learning_rate": 2.2135835899870325e-05, "loss": 1.9899, "step": 19259500 }, { "epoch": 55.75, "learning_rate": 2.2135112252223048e-05, "loss": 1.9683, "step": 19260000 }, { "epoch": 55.75, "learning_rate": 2.213438860457577e-05, "loss": 1.9399, "step": 19260500 }, { "epoch": 55.75, "learning_rate": 2.213366640422379e-05, "loss": 1.9627, "step": 19261000 }, { "epoch": 55.75, "learning_rate": 2.213294275657651e-05, "loss": 1.9692, "step": 19261500 }, { "epoch": 55.76, "learning_rate": 2.2132219108929233e-05, "loss": 1.9768, "step": 19262000 }, { "epoch": 55.76, "learning_rate": 2.2131496908577252e-05, "loss": 1.9876, "step": 19262500 }, { "epoch": 55.76, "learning_rate": 2.2130773260929974e-05, "loss": 1.971, "step": 19263000 }, { "epoch": 55.76, "learning_rate": 2.21300496132827e-05, "loss": 2.0019, "step": 19263500 }, { "epoch": 55.76, "learning_rate": 2.2129325965635422e-05, "loss": 1.9609, "step": 19264000 }, { "epoch": 55.76, "learning_rate": 2.2128602317988144e-05, "loss": 1.9617, "step": 19264500 }, { "epoch": 55.76, "learning_rate": 2.2127878670340867e-05, "loss": 1.9924, "step": 19265000 }, { "epoch": 55.77, "learning_rate": 2.2127155022693592e-05, "loss": 1.9556, "step": 19265500 }, { "epoch": 55.77, "learning_rate": 2.2126431375046315e-05, "loss": 1.9518, "step": 19266000 }, { "epoch": 55.77, "learning_rate": 2.212570917469433e-05, "loss": 1.952, "step": 19266500 }, { "epoch": 55.77, "learning_rate": 2.2124985527047056e-05, "loss": 1.9779, "step": 19267000 }, { "epoch": 55.77, "learning_rate": 2.212426187939978e-05, "loss": 1.945, "step": 19267500 }, { "epoch": 55.77, "learning_rate": 2.2123538231752504e-05, "loss": 1.9528, "step": 19268000 }, { "epoch": 55.77, "learning_rate": 2.2122814584105226e-05, "loss": 1.9722, "step": 19268500 }, { "epoch": 55.78, "learning_rate": 2.212209238375324e-05, "loss": 1.984, "step": 19269000 }, { "epoch": 55.78, "learning_rate": 2.2121368736105967e-05, "loss": 1.9632, "step": 19269500 }, { "epoch": 55.78, "learning_rate": 2.212064508845869e-05, "loss": 1.9717, "step": 19270000 }, { "epoch": 55.78, "learning_rate": 2.211992144081141e-05, "loss": 1.9749, "step": 19270500 }, { "epoch": 55.78, "learning_rate": 2.2119197793164137e-05, "loss": 1.9556, "step": 19271000 }, { "epoch": 55.78, "learning_rate": 2.211847414551686e-05, "loss": 1.978, "step": 19271500 }, { "epoch": 55.78, "learning_rate": 2.211775049786958e-05, "loss": 1.9489, "step": 19272000 }, { "epoch": 55.79, "learning_rate": 2.2117026850222307e-05, "loss": 1.9922, "step": 19272500 }, { "epoch": 55.79, "learning_rate": 2.211630320257503e-05, "loss": 1.9945, "step": 19273000 }, { "epoch": 55.79, "learning_rate": 2.211557955492775e-05, "loss": 1.9898, "step": 19273500 }, { "epoch": 55.79, "learning_rate": 2.2114857354575767e-05, "loss": 1.9661, "step": 19274000 }, { "epoch": 55.79, "learning_rate": 2.2114133706928493e-05, "loss": 1.9753, "step": 19274500 }, { "epoch": 55.79, "learning_rate": 2.211341005928122e-05, "loss": 1.9609, "step": 19275000 }, { "epoch": 55.79, "learning_rate": 2.211268641163394e-05, "loss": 1.9783, "step": 19275500 }, { "epoch": 55.8, "learning_rate": 2.2111964211281956e-05, "loss": 1.9642, "step": 19276000 }, { "epoch": 55.8, "learning_rate": 2.2111240563634682e-05, "loss": 1.9697, "step": 19276500 }, { "epoch": 55.8, "learning_rate": 2.2110516915987404e-05, "loss": 1.9621, "step": 19277000 }, { "epoch": 55.8, "learning_rate": 2.210979471563542e-05, "loss": 1.9908, "step": 19277500 }, { "epoch": 55.8, "learning_rate": 2.2109071067988145e-05, "loss": 1.9974, "step": 19278000 }, { "epoch": 55.8, "learning_rate": 2.2108347420340867e-05, "loss": 1.9722, "step": 19278500 }, { "epoch": 55.8, "learning_rate": 2.2107623772693593e-05, "loss": 1.9666, "step": 19279000 }, { "epoch": 55.81, "learning_rate": 2.2106900125046315e-05, "loss": 1.9808, "step": 19279500 }, { "epoch": 55.81, "learning_rate": 2.210617792469433e-05, "loss": 1.9946, "step": 19280000 }, { "epoch": 55.81, "learning_rate": 2.2105454277047056e-05, "loss": 1.988, "step": 19280500 }, { "epoch": 55.81, "learning_rate": 2.210473062939978e-05, "loss": 1.9514, "step": 19281000 }, { "epoch": 55.81, "learning_rate": 2.21040069817525e-05, "loss": 1.9654, "step": 19281500 }, { "epoch": 55.81, "learning_rate": 2.2103283334105227e-05, "loss": 1.97, "step": 19282000 }, { "epoch": 55.81, "learning_rate": 2.2102561133753246e-05, "loss": 1.9823, "step": 19282500 }, { "epoch": 55.82, "learning_rate": 2.210183893340126e-05, "loss": 1.9317, "step": 19283000 }, { "epoch": 55.82, "learning_rate": 2.2101115285753983e-05, "loss": 1.9624, "step": 19283500 }, { "epoch": 55.82, "learning_rate": 2.2100391638106706e-05, "loss": 1.9901, "step": 19284000 }, { "epoch": 55.82, "learning_rate": 2.209966799045943e-05, "loss": 1.9803, "step": 19284500 }, { "epoch": 55.82, "learning_rate": 2.2098944342812153e-05, "loss": 1.9722, "step": 19285000 }, { "epoch": 55.82, "learning_rate": 2.2098220695164876e-05, "loss": 1.9793, "step": 19285500 }, { "epoch": 55.83, "learning_rate": 2.20974970475176e-05, "loss": 1.9595, "step": 19286000 }, { "epoch": 55.83, "learning_rate": 2.209677484716562e-05, "loss": 1.9758, "step": 19286500 }, { "epoch": 55.83, "learning_rate": 2.2096051199518342e-05, "loss": 1.9591, "step": 19287000 }, { "epoch": 55.83, "learning_rate": 2.2095327551871065e-05, "loss": 1.9679, "step": 19287500 }, { "epoch": 55.83, "learning_rate": 2.2094603904223787e-05, "loss": 1.9953, "step": 19288000 }, { "epoch": 55.83, "learning_rate": 2.2093881703871806e-05, "loss": 1.9685, "step": 19288500 }, { "epoch": 55.83, "learning_rate": 2.2093158056224528e-05, "loss": 1.97, "step": 19289000 }, { "epoch": 55.84, "learning_rate": 2.209243440857725e-05, "loss": 1.9684, "step": 19289500 }, { "epoch": 55.84, "learning_rate": 2.2091710760929976e-05, "loss": 1.9762, "step": 19290000 }, { "epoch": 55.84, "learning_rate": 2.2090987113282698e-05, "loss": 1.9743, "step": 19290500 }, { "epoch": 55.84, "learning_rate": 2.209026346563542e-05, "loss": 2.0042, "step": 19291000 }, { "epoch": 55.84, "learning_rate": 2.2089539817988146e-05, "loss": 1.9617, "step": 19291500 }, { "epoch": 55.84, "learning_rate": 2.2088816170340868e-05, "loss": 1.9708, "step": 19292000 }, { "epoch": 55.84, "learning_rate": 2.208809252269359e-05, "loss": 1.98, "step": 19292500 }, { "epoch": 55.85, "learning_rate": 2.2087368875046313e-05, "loss": 1.9524, "step": 19293000 }, { "epoch": 55.85, "learning_rate": 2.208664522739904e-05, "loss": 1.9921, "step": 19293500 }, { "epoch": 55.85, "learning_rate": 2.208592157975176e-05, "loss": 1.9902, "step": 19294000 }, { "epoch": 55.85, "learning_rate": 2.208519937939978e-05, "loss": 1.9496, "step": 19294500 }, { "epoch": 55.85, "learning_rate": 2.2084475731752502e-05, "loss": 1.9512, "step": 19295000 }, { "epoch": 55.85, "learning_rate": 2.2083752084105224e-05, "loss": 1.9751, "step": 19295500 }, { "epoch": 55.85, "learning_rate": 2.2083028436457946e-05, "loss": 1.99, "step": 19296000 }, { "epoch": 55.86, "learning_rate": 2.2082304788810672e-05, "loss": 1.9771, "step": 19296500 }, { "epoch": 55.86, "learning_rate": 2.2081581141163397e-05, "loss": 1.9683, "step": 19297000 }, { "epoch": 55.86, "learning_rate": 2.208085749351612e-05, "loss": 1.9831, "step": 19297500 }, { "epoch": 55.86, "learning_rate": 2.2080133845868842e-05, "loss": 1.9806, "step": 19298000 }, { "epoch": 55.86, "learning_rate": 2.2079410198221564e-05, "loss": 1.9811, "step": 19298500 }, { "epoch": 55.86, "learning_rate": 2.2078686550574286e-05, "loss": 1.9603, "step": 19299000 }, { "epoch": 55.86, "learning_rate": 2.207796290292701e-05, "loss": 1.9821, "step": 19299500 }, { "epoch": 55.87, "learning_rate": 2.2077240702575028e-05, "loss": 2.0051, "step": 19300000 }, { "epoch": 55.87, "learning_rate": 2.2076517054927753e-05, "loss": 1.962, "step": 19300500 }, { "epoch": 55.87, "learning_rate": 2.2075794854575772e-05, "loss": 1.9943, "step": 19301000 }, { "epoch": 55.87, "learning_rate": 2.2075071206928494e-05, "loss": 1.9714, "step": 19301500 }, { "epoch": 55.87, "learning_rate": 2.2074347559281217e-05, "loss": 1.9989, "step": 19302000 }, { "epoch": 55.87, "learning_rate": 2.207362391163394e-05, "loss": 1.9922, "step": 19302500 }, { "epoch": 55.87, "learning_rate": 2.2072901711281958e-05, "loss": 1.9631, "step": 19303000 }, { "epoch": 55.88, "learning_rate": 2.207217806363468e-05, "loss": 1.9736, "step": 19303500 }, { "epoch": 55.88, "learning_rate": 2.2071454415987402e-05, "loss": 1.9685, "step": 19304000 }, { "epoch": 55.88, "learning_rate": 2.2070730768340128e-05, "loss": 1.9666, "step": 19304500 }, { "epoch": 55.88, "learning_rate": 2.2070008567988147e-05, "loss": 1.9677, "step": 19305000 }, { "epoch": 55.88, "learning_rate": 2.206928492034087e-05, "loss": 1.9771, "step": 19305500 }, { "epoch": 55.88, "learning_rate": 2.206856127269359e-05, "loss": 1.9355, "step": 19306000 }, { "epoch": 55.88, "learning_rate": 2.2067837625046314e-05, "loss": 1.9794, "step": 19306500 }, { "epoch": 55.89, "learning_rate": 2.2067113977399036e-05, "loss": 1.9751, "step": 19307000 }, { "epoch": 55.89, "learning_rate": 2.206639032975176e-05, "loss": 1.9352, "step": 19307500 }, { "epoch": 55.89, "learning_rate": 2.2065666682104487e-05, "loss": 1.9953, "step": 19308000 }, { "epoch": 55.89, "learning_rate": 2.206494303445721e-05, "loss": 1.9688, "step": 19308500 }, { "epoch": 55.89, "learning_rate": 2.2064220834105225e-05, "loss": 1.9594, "step": 19309000 }, { "epoch": 55.89, "learning_rate": 2.206349718645795e-05, "loss": 1.9612, "step": 19309500 }, { "epoch": 55.89, "learning_rate": 2.2062773538810673e-05, "loss": 1.9503, "step": 19310000 }, { "epoch": 55.9, "learning_rate": 2.2062049891163395e-05, "loss": 1.9692, "step": 19310500 }, { "epoch": 55.9, "learning_rate": 2.2061326243516117e-05, "loss": 1.966, "step": 19311000 }, { "epoch": 55.9, "learning_rate": 2.206060259586884e-05, "loss": 1.9761, "step": 19311500 }, { "epoch": 55.9, "learning_rate": 2.2059878948221565e-05, "loss": 1.9775, "step": 19312000 }, { "epoch": 55.9, "learning_rate": 2.2059155300574287e-05, "loss": 1.9615, "step": 19312500 }, { "epoch": 55.9, "learning_rate": 2.2058431652927013e-05, "loss": 1.9589, "step": 19313000 }, { "epoch": 55.9, "learning_rate": 2.2057708005279735e-05, "loss": 1.9781, "step": 19313500 }, { "epoch": 55.91, "learning_rate": 2.2056984357632457e-05, "loss": 1.9627, "step": 19314000 }, { "epoch": 55.91, "learning_rate": 2.2056262157280473e-05, "loss": 1.9945, "step": 19314500 }, { "epoch": 55.91, "learning_rate": 2.2055539956928492e-05, "loss": 1.9583, "step": 19315000 }, { "epoch": 55.91, "learning_rate": 2.2054816309281217e-05, "loss": 1.97, "step": 19315500 }, { "epoch": 55.91, "learning_rate": 2.205409266163394e-05, "loss": 1.9735, "step": 19316000 }, { "epoch": 55.91, "learning_rate": 2.2053369013986662e-05, "loss": 1.9548, "step": 19316500 }, { "epoch": 55.91, "learning_rate": 2.2052645366339387e-05, "loss": 1.9682, "step": 19317000 }, { "epoch": 55.92, "learning_rate": 2.205192171869211e-05, "loss": 1.9606, "step": 19317500 }, { "epoch": 55.92, "learning_rate": 2.2051198071044832e-05, "loss": 1.9875, "step": 19318000 }, { "epoch": 55.92, "learning_rate": 2.2050474423397554e-05, "loss": 1.9607, "step": 19318500 }, { "epoch": 55.92, "learning_rate": 2.204975077575028e-05, "loss": 1.9703, "step": 19319000 }, { "epoch": 55.92, "learning_rate": 2.2049027128103002e-05, "loss": 1.9728, "step": 19319500 }, { "epoch": 55.92, "learning_rate": 2.2048303480455724e-05, "loss": 2.0002, "step": 19320000 }, { "epoch": 55.92, "learning_rate": 2.204757983280845e-05, "loss": 1.9444, "step": 19320500 }, { "epoch": 55.93, "learning_rate": 2.2046856185161172e-05, "loss": 1.9407, "step": 19321000 }, { "epoch": 55.93, "learning_rate": 2.2046133984809188e-05, "loss": 1.9571, "step": 19321500 }, { "epoch": 55.93, "learning_rate": 2.2045410337161913e-05, "loss": 1.9901, "step": 19322000 }, { "epoch": 55.93, "learning_rate": 2.2044686689514636e-05, "loss": 1.9768, "step": 19322500 }, { "epoch": 55.93, "learning_rate": 2.204396304186736e-05, "loss": 1.9879, "step": 19323000 }, { "epoch": 55.93, "learning_rate": 2.2043240841515377e-05, "loss": 1.9906, "step": 19323500 }, { "epoch": 55.94, "learning_rate": 2.2042517193868102e-05, "loss": 1.9731, "step": 19324000 }, { "epoch": 55.94, "learning_rate": 2.2041793546220825e-05, "loss": 2.0012, "step": 19324500 }, { "epoch": 55.94, "learning_rate": 2.2041069898573547e-05, "loss": 1.9789, "step": 19325000 }, { "epoch": 55.94, "learning_rate": 2.204034625092627e-05, "loss": 1.9804, "step": 19325500 }, { "epoch": 55.94, "learning_rate": 2.203962260327899e-05, "loss": 1.9711, "step": 19326000 }, { "epoch": 55.94, "learning_rate": 2.2038898955631717e-05, "loss": 1.949, "step": 19326500 }, { "epoch": 55.94, "learning_rate": 2.203817530798444e-05, "loss": 1.9857, "step": 19327000 }, { "epoch": 55.95, "learning_rate": 2.2037453107632458e-05, "loss": 1.974, "step": 19327500 }, { "epoch": 55.95, "learning_rate": 2.203672945998518e-05, "loss": 1.9673, "step": 19328000 }, { "epoch": 55.95, "learning_rate": 2.2036005812337903e-05, "loss": 1.9629, "step": 19328500 }, { "epoch": 55.95, "learning_rate": 2.2035282164690628e-05, "loss": 1.985, "step": 19329000 }, { "epoch": 55.95, "learning_rate": 2.203455851704335e-05, "loss": 1.9895, "step": 19329500 }, { "epoch": 55.95, "learning_rate": 2.2033836316691366e-05, "loss": 1.9902, "step": 19330000 }, { "epoch": 55.95, "learning_rate": 2.2033114116339388e-05, "loss": 1.9659, "step": 19330500 }, { "epoch": 55.96, "learning_rate": 2.203239046869211e-05, "loss": 1.977, "step": 19331000 }, { "epoch": 55.96, "learning_rate": 2.2031666821044833e-05, "loss": 1.9891, "step": 19331500 }, { "epoch": 55.96, "learning_rate": 2.2030943173397555e-05, "loss": 1.9624, "step": 19332000 }, { "epoch": 55.96, "learning_rate": 2.2030220973045574e-05, "loss": 1.9585, "step": 19332500 }, { "epoch": 55.96, "learning_rate": 2.2029497325398296e-05, "loss": 1.9748, "step": 19333000 }, { "epoch": 55.96, "learning_rate": 2.2028775125046315e-05, "loss": 1.9653, "step": 19333500 }, { "epoch": 55.96, "learning_rate": 2.2028051477399037e-05, "loss": 1.9973, "step": 19334000 }, { "epoch": 55.97, "learning_rate": 2.2027327829751763e-05, "loss": 1.969, "step": 19334500 }, { "epoch": 55.97, "learning_rate": 2.2026604182104485e-05, "loss": 1.9617, "step": 19335000 }, { "epoch": 55.97, "learning_rate": 2.2025880534457207e-05, "loss": 1.9862, "step": 19335500 }, { "epoch": 55.97, "learning_rate": 2.202515688680993e-05, "loss": 1.982, "step": 19336000 }, { "epoch": 55.97, "learning_rate": 2.2024433239162652e-05, "loss": 1.968, "step": 19336500 }, { "epoch": 55.97, "learning_rate": 2.202371103881067e-05, "loss": 1.9727, "step": 19337000 }, { "epoch": 55.97, "learning_rate": 2.2022987391163393e-05, "loss": 1.9719, "step": 19337500 }, { "epoch": 55.98, "learning_rate": 2.202226374351612e-05, "loss": 1.9769, "step": 19338000 }, { "epoch": 55.98, "learning_rate": 2.202154009586884e-05, "loss": 1.96, "step": 19338500 }, { "epoch": 55.98, "learning_rate": 2.2020816448221567e-05, "loss": 1.9648, "step": 19339000 }, { "epoch": 55.98, "learning_rate": 2.202009280057429e-05, "loss": 1.988, "step": 19339500 }, { "epoch": 55.98, "learning_rate": 2.201936915292701e-05, "loss": 1.9792, "step": 19340000 }, { "epoch": 55.98, "learning_rate": 2.2018645505279733e-05, "loss": 1.9794, "step": 19340500 }, { "epoch": 55.98, "learning_rate": 2.2017921857632455e-05, "loss": 1.9952, "step": 19341000 }, { "epoch": 55.99, "learning_rate": 2.2017199657280474e-05, "loss": 1.9716, "step": 19341500 }, { "epoch": 55.99, "learning_rate": 2.20164760096332e-05, "loss": 1.9693, "step": 19342000 }, { "epoch": 55.99, "learning_rate": 2.2015752361985922e-05, "loss": 1.9581, "step": 19342500 }, { "epoch": 55.99, "learning_rate": 2.2015028714338645e-05, "loss": 1.9645, "step": 19343000 }, { "epoch": 55.99, "learning_rate": 2.2014305066691367e-05, "loss": 1.9989, "step": 19343500 }, { "epoch": 55.99, "learning_rate": 2.2013581419044092e-05, "loss": 1.9718, "step": 19344000 }, { "epoch": 55.99, "learning_rate": 2.2012859218692108e-05, "loss": 1.9909, "step": 19344500 }, { "epoch": 56.0, "learning_rate": 2.201213557104483e-05, "loss": 1.9929, "step": 19345000 }, { "epoch": 56.0, "learning_rate": 2.2011411923397556e-05, "loss": 1.9667, "step": 19345500 }, { "epoch": 56.0, "learning_rate": 2.2010688275750278e-05, "loss": 1.9712, "step": 19346000 }, { "epoch": 56.0, "eval_accuracy": 0.6760111246694773, "eval_accuracy_mlm": 0.6422216699057036, "eval_accuracy_nsp": 0.8570142030220951, "eval_loss": 2.155001163482666, "eval_runtime": 331.9519, "eval_samples_per_second": 1314.606, "eval_steps_per_second": 54.776, "step": 19346432 }, { "epoch": 56.0, "learning_rate": 2.2009966075398297e-05, "loss": 1.9831, "step": 19346500 }, { "epoch": 56.0, "learning_rate": 2.2009243875046316e-05, "loss": 1.9425, "step": 19347000 }, { "epoch": 56.0, "learning_rate": 2.2008520227399038e-05, "loss": 1.953, "step": 19347500 }, { "epoch": 56.0, "learning_rate": 2.200779657975176e-05, "loss": 1.9466, "step": 19348000 }, { "epoch": 56.01, "learning_rate": 2.2007072932104483e-05, "loss": 1.9602, "step": 19348500 }, { "epoch": 56.01, "learning_rate": 2.20063507317525e-05, "loss": 1.947, "step": 19349000 }, { "epoch": 56.01, "learning_rate": 2.2005627084105227e-05, "loss": 1.9583, "step": 19349500 }, { "epoch": 56.01, "learning_rate": 2.200490343645795e-05, "loss": 1.9722, "step": 19350000 }, { "epoch": 56.01, "learning_rate": 2.200417978881067e-05, "loss": 1.9281, "step": 19350500 }, { "epoch": 56.01, "learning_rate": 2.200345758845869e-05, "loss": 1.9633, "step": 19351000 }, { "epoch": 56.01, "learning_rate": 2.2002733940811413e-05, "loss": 1.938, "step": 19351500 }, { "epoch": 56.02, "learning_rate": 2.2002010293164135e-05, "loss": 1.9654, "step": 19352000 }, { "epoch": 56.02, "learning_rate": 2.2001286645516857e-05, "loss": 1.9775, "step": 19352500 }, { "epoch": 56.02, "learning_rate": 2.2000562997869583e-05, "loss": 1.9549, "step": 19353000 }, { "epoch": 56.02, "learning_rate": 2.1999839350222305e-05, "loss": 1.964, "step": 19353500 }, { "epoch": 56.02, "learning_rate": 2.199911570257503e-05, "loss": 1.953, "step": 19354000 }, { "epoch": 56.02, "learning_rate": 2.1998393502223046e-05, "loss": 1.9697, "step": 19354500 }, { "epoch": 56.02, "learning_rate": 2.199766985457577e-05, "loss": 1.96, "step": 19355000 }, { "epoch": 56.03, "learning_rate": 2.1996946206928494e-05, "loss": 1.9548, "step": 19355500 }, { "epoch": 56.03, "learning_rate": 2.1996222559281216e-05, "loss": 1.931, "step": 19356000 }, { "epoch": 56.03, "learning_rate": 2.199549891163394e-05, "loss": 1.9517, "step": 19356500 }, { "epoch": 56.03, "learning_rate": 2.1994775263986664e-05, "loss": 1.9388, "step": 19357000 }, { "epoch": 56.03, "learning_rate": 2.1994051616339386e-05, "loss": 1.9672, "step": 19357500 }, { "epoch": 56.03, "learning_rate": 2.199332796869211e-05, "loss": 1.983, "step": 19358000 }, { "epoch": 56.03, "learning_rate": 2.1992605768340128e-05, "loss": 1.9552, "step": 19358500 }, { "epoch": 56.04, "learning_rate": 2.199188212069285e-05, "loss": 1.9521, "step": 19359000 }, { "epoch": 56.04, "learning_rate": 2.1991158473045572e-05, "loss": 1.9437, "step": 19359500 }, { "epoch": 56.04, "learning_rate": 2.1990434825398294e-05, "loss": 1.9451, "step": 19360000 }, { "epoch": 56.04, "learning_rate": 2.198971117775102e-05, "loss": 1.9409, "step": 19360500 }, { "epoch": 56.04, "learning_rate": 2.1988987530103746e-05, "loss": 1.9525, "step": 19361000 }, { "epoch": 56.04, "learning_rate": 2.198826532975176e-05, "loss": 1.9648, "step": 19361500 }, { "epoch": 56.05, "learning_rate": 2.1987541682104483e-05, "loss": 1.9468, "step": 19362000 }, { "epoch": 56.05, "learning_rate": 2.1986818034457206e-05, "loss": 1.967, "step": 19362500 }, { "epoch": 56.05, "learning_rate": 2.198609438680993e-05, "loss": 1.9682, "step": 19363000 }, { "epoch": 56.05, "learning_rate": 2.1985370739162653e-05, "loss": 1.9514, "step": 19363500 }, { "epoch": 56.05, "learning_rate": 2.198464709151538e-05, "loss": 1.9606, "step": 19364000 }, { "epoch": 56.05, "learning_rate": 2.19839234438681e-05, "loss": 1.9584, "step": 19364500 }, { "epoch": 56.05, "learning_rate": 2.1983199796220824e-05, "loss": 1.9355, "step": 19365000 }, { "epoch": 56.06, "learning_rate": 2.1982477595868843e-05, "loss": 1.9518, "step": 19365500 }, { "epoch": 56.06, "learning_rate": 2.1981753948221565e-05, "loss": 1.9823, "step": 19366000 }, { "epoch": 56.06, "learning_rate": 2.1981030300574287e-05, "loss": 1.9429, "step": 19366500 }, { "epoch": 56.06, "learning_rate": 2.1980308100222306e-05, "loss": 1.9619, "step": 19367000 }, { "epoch": 56.06, "learning_rate": 2.1979584452575028e-05, "loss": 1.9374, "step": 19367500 }, { "epoch": 56.06, "learning_rate": 2.1978860804927754e-05, "loss": 1.9503, "step": 19368000 }, { "epoch": 56.06, "learning_rate": 2.1978137157280476e-05, "loss": 1.9645, "step": 19368500 }, { "epoch": 56.07, "learning_rate": 2.1977414956928495e-05, "loss": 1.9417, "step": 19369000 }, { "epoch": 56.07, "learning_rate": 2.1976691309281217e-05, "loss": 1.9599, "step": 19369500 }, { "epoch": 56.07, "learning_rate": 2.197596766163394e-05, "loss": 1.9747, "step": 19370000 }, { "epoch": 56.07, "learning_rate": 2.197524401398666e-05, "loss": 1.9359, "step": 19370500 }, { "epoch": 56.07, "learning_rate": 2.1974520366339384e-05, "loss": 1.9546, "step": 19371000 }, { "epoch": 56.07, "learning_rate": 2.197379671869211e-05, "loss": 1.9521, "step": 19371500 }, { "epoch": 56.07, "learning_rate": 2.1973073071044835e-05, "loss": 1.9357, "step": 19372000 }, { "epoch": 56.08, "learning_rate": 2.1972349423397557e-05, "loss": 1.9646, "step": 19372500 }, { "epoch": 56.08, "learning_rate": 2.1971627223045573e-05, "loss": 1.987, "step": 19373000 }, { "epoch": 56.08, "learning_rate": 2.1970905022693592e-05, "loss": 1.9396, "step": 19373500 }, { "epoch": 56.08, "learning_rate": 2.1970181375046314e-05, "loss": 2.0014, "step": 19374000 }, { "epoch": 56.08, "learning_rate": 2.1969457727399036e-05, "loss": 1.9722, "step": 19374500 }, { "epoch": 56.08, "learning_rate": 2.196873407975176e-05, "loss": 1.9612, "step": 19375000 }, { "epoch": 56.08, "learning_rate": 2.1968010432104484e-05, "loss": 1.974, "step": 19375500 }, { "epoch": 56.09, "learning_rate": 2.196728678445721e-05, "loss": 1.9733, "step": 19376000 }, { "epoch": 56.09, "learning_rate": 2.1966564584105225e-05, "loss": 1.9367, "step": 19376500 }, { "epoch": 56.09, "learning_rate": 2.1965842383753244e-05, "loss": 1.9752, "step": 19377000 }, { "epoch": 56.09, "learning_rate": 2.1965118736105967e-05, "loss": 1.9618, "step": 19377500 }, { "epoch": 56.09, "learning_rate": 2.196439508845869e-05, "loss": 1.9661, "step": 19378000 }, { "epoch": 56.09, "learning_rate": 2.196367144081141e-05, "loss": 1.9782, "step": 19378500 }, { "epoch": 56.09, "learning_rate": 2.1962947793164133e-05, "loss": 1.9581, "step": 19379000 }, { "epoch": 56.1, "learning_rate": 2.196222414551686e-05, "loss": 1.9674, "step": 19379500 }, { "epoch": 56.1, "learning_rate": 2.1961500497869584e-05, "loss": 1.9614, "step": 19380000 }, { "epoch": 56.1, "learning_rate": 2.19607782975176e-05, "loss": 1.9471, "step": 19380500 }, { "epoch": 56.1, "learning_rate": 2.1960054649870322e-05, "loss": 1.9664, "step": 19381000 }, { "epoch": 56.1, "learning_rate": 2.1959331002223048e-05, "loss": 1.9748, "step": 19381500 }, { "epoch": 56.1, "learning_rate": 2.195860735457577e-05, "loss": 1.9631, "step": 19382000 }, { "epoch": 56.1, "learning_rate": 2.1957883706928492e-05, "loss": 1.9568, "step": 19382500 }, { "epoch": 56.11, "learning_rate": 2.1957160059281218e-05, "loss": 1.9484, "step": 19383000 }, { "epoch": 56.11, "learning_rate": 2.195643641163394e-05, "loss": 1.935, "step": 19383500 }, { "epoch": 56.11, "learning_rate": 2.1955712763986662e-05, "loss": 1.9515, "step": 19384000 }, { "epoch": 56.11, "learning_rate": 2.1954989116339385e-05, "loss": 1.9509, "step": 19384500 }, { "epoch": 56.11, "learning_rate": 2.195426546869211e-05, "loss": 1.9469, "step": 19385000 }, { "epoch": 56.11, "learning_rate": 2.1953541821044833e-05, "loss": 1.9665, "step": 19385500 }, { "epoch": 56.11, "learning_rate": 2.1952818173397555e-05, "loss": 1.9645, "step": 19386000 }, { "epoch": 56.12, "learning_rate": 2.195209452575028e-05, "loss": 1.96, "step": 19386500 }, { "epoch": 56.12, "learning_rate": 2.1951370878103003e-05, "loss": 1.9637, "step": 19387000 }, { "epoch": 56.12, "learning_rate": 2.195064867775102e-05, "loss": 1.9833, "step": 19387500 }, { "epoch": 56.12, "learning_rate": 2.1949925030103744e-05, "loss": 1.9664, "step": 19388000 }, { "epoch": 56.12, "learning_rate": 2.1949201382456466e-05, "loss": 1.9648, "step": 19388500 }, { "epoch": 56.12, "learning_rate": 2.1948477734809188e-05, "loss": 1.969, "step": 19389000 }, { "epoch": 56.12, "learning_rate": 2.194775408716191e-05, "loss": 1.9506, "step": 19389500 }, { "epoch": 56.13, "learning_rate": 2.1947030439514636e-05, "loss": 1.9424, "step": 19390000 }, { "epoch": 56.13, "learning_rate": 2.1946306791867362e-05, "loss": 1.9759, "step": 19390500 }, { "epoch": 56.13, "learning_rate": 2.1945583144220084e-05, "loss": 1.9766, "step": 19391000 }, { "epoch": 56.13, "learning_rate": 2.1944859496572806e-05, "loss": 1.9547, "step": 19391500 }, { "epoch": 56.13, "learning_rate": 2.194413584892553e-05, "loss": 1.9721, "step": 19392000 }, { "epoch": 56.13, "learning_rate": 2.194341220127825e-05, "loss": 1.9593, "step": 19392500 }, { "epoch": 56.13, "learning_rate": 2.1942688553630973e-05, "loss": 1.9522, "step": 19393000 }, { "epoch": 56.14, "learning_rate": 2.19419649059837e-05, "loss": 1.9712, "step": 19393500 }, { "epoch": 56.14, "learning_rate": 2.1941241258336424e-05, "loss": 1.9429, "step": 19394000 }, { "epoch": 56.14, "learning_rate": 2.194051905798444e-05, "loss": 1.958, "step": 19394500 }, { "epoch": 56.14, "learning_rate": 2.1939795410337162e-05, "loss": 1.94, "step": 19395000 }, { "epoch": 56.14, "learning_rate": 2.1939071762689888e-05, "loss": 1.9557, "step": 19395500 }, { "epoch": 56.14, "learning_rate": 2.193834811504261e-05, "loss": 1.949, "step": 19396000 }, { "epoch": 56.14, "learning_rate": 2.1937625914690625e-05, "loss": 1.946, "step": 19396500 }, { "epoch": 56.15, "learning_rate": 2.193690226704335e-05, "loss": 1.9743, "step": 19397000 }, { "epoch": 56.15, "learning_rate": 2.1936180066691367e-05, "loss": 1.9549, "step": 19397500 }, { "epoch": 56.15, "learning_rate": 2.1935456419044092e-05, "loss": 1.9558, "step": 19398000 }, { "epoch": 56.15, "learning_rate": 2.1934732771396814e-05, "loss": 1.9757, "step": 19398500 }, { "epoch": 56.15, "learning_rate": 2.1934009123749537e-05, "loss": 1.9587, "step": 19399000 }, { "epoch": 56.15, "learning_rate": 2.1933285476102262e-05, "loss": 1.9562, "step": 19399500 }, { "epoch": 56.16, "learning_rate": 2.1932561828454984e-05, "loss": 1.9692, "step": 19400000 }, { "epoch": 56.16, "learning_rate": 2.1931839628103e-05, "loss": 1.9862, "step": 19400500 }, { "epoch": 56.16, "learning_rate": 2.1931115980455726e-05, "loss": 1.9588, "step": 19401000 }, { "epoch": 56.16, "learning_rate": 2.193039233280845e-05, "loss": 1.959, "step": 19401500 }, { "epoch": 56.16, "learning_rate": 2.1929668685161174e-05, "loss": 1.9553, "step": 19402000 }, { "epoch": 56.16, "learning_rate": 2.1928945037513896e-05, "loss": 1.9625, "step": 19402500 }, { "epoch": 56.16, "learning_rate": 2.1928221389866618e-05, "loss": 1.9714, "step": 19403000 }, { "epoch": 56.17, "learning_rate": 2.192749774221934e-05, "loss": 1.9696, "step": 19403500 }, { "epoch": 56.17, "learning_rate": 2.192677554186736e-05, "loss": 1.9568, "step": 19404000 }, { "epoch": 56.17, "learning_rate": 2.192605189422008e-05, "loss": 1.9652, "step": 19404500 }, { "epoch": 56.17, "learning_rate": 2.1925328246572807e-05, "loss": 1.944, "step": 19405000 }, { "epoch": 56.17, "learning_rate": 2.192460459892553e-05, "loss": 1.974, "step": 19405500 }, { "epoch": 56.17, "learning_rate": 2.192388095127825e-05, "loss": 1.9775, "step": 19406000 }, { "epoch": 56.17, "learning_rate": 2.1923157303630977e-05, "loss": 1.951, "step": 19406500 }, { "epoch": 56.18, "learning_rate": 2.19224336559837e-05, "loss": 1.9589, "step": 19407000 }, { "epoch": 56.18, "learning_rate": 2.192171000833642e-05, "loss": 1.9567, "step": 19407500 }, { "epoch": 56.18, "learning_rate": 2.192098780798444e-05, "loss": 1.9587, "step": 19408000 }, { "epoch": 56.18, "learning_rate": 2.1920264160337163e-05, "loss": 1.9649, "step": 19408500 }, { "epoch": 56.18, "learning_rate": 2.191954051268989e-05, "loss": 1.9587, "step": 19409000 }, { "epoch": 56.18, "learning_rate": 2.191881686504261e-05, "loss": 1.9575, "step": 19409500 }, { "epoch": 56.18, "learning_rate": 2.1918094664690626e-05, "loss": 1.9492, "step": 19410000 }, { "epoch": 56.19, "learning_rate": 2.1917371017043352e-05, "loss": 1.9466, "step": 19410500 }, { "epoch": 56.19, "learning_rate": 2.1916648816691367e-05, "loss": 1.9927, "step": 19411000 }, { "epoch": 56.19, "learning_rate": 2.191592516904409e-05, "loss": 1.9742, "step": 19411500 }, { "epoch": 56.19, "learning_rate": 2.1915201521396815e-05, "loss": 1.9518, "step": 19412000 }, { "epoch": 56.19, "learning_rate": 2.1914477873749537e-05, "loss": 1.9662, "step": 19412500 }, { "epoch": 56.19, "learning_rate": 2.1913754226102263e-05, "loss": 1.9689, "step": 19413000 }, { "epoch": 56.19, "learning_rate": 2.1913030578454985e-05, "loss": 1.9532, "step": 19413500 }, { "epoch": 56.2, "learning_rate": 2.1912306930807708e-05, "loss": 1.9562, "step": 19414000 }, { "epoch": 56.2, "learning_rate": 2.191158328316043e-05, "loss": 1.9713, "step": 19414500 }, { "epoch": 56.2, "learning_rate": 2.1910859635513152e-05, "loss": 1.9379, "step": 19415000 }, { "epoch": 56.2, "learning_rate": 2.1910135987865878e-05, "loss": 1.9343, "step": 19415500 }, { "epoch": 56.2, "learning_rate": 2.1909413787513893e-05, "loss": 1.9692, "step": 19416000 }, { "epoch": 56.2, "learning_rate": 2.190869013986662e-05, "loss": 1.9345, "step": 19416500 }, { "epoch": 56.2, "learning_rate": 2.190796649221934e-05, "loss": 1.9385, "step": 19417000 }, { "epoch": 56.21, "learning_rate": 2.1907242844572067e-05, "loss": 1.9707, "step": 19417500 }, { "epoch": 56.21, "learning_rate": 2.190651919692479e-05, "loss": 1.9572, "step": 19418000 }, { "epoch": 56.21, "learning_rate": 2.190579554927751e-05, "loss": 1.9677, "step": 19418500 }, { "epoch": 56.21, "learning_rate": 2.1905071901630233e-05, "loss": 1.9485, "step": 19419000 }, { "epoch": 56.21, "learning_rate": 2.1904348253982956e-05, "loss": 1.9556, "step": 19419500 }, { "epoch": 56.21, "learning_rate": 2.1903626053630978e-05, "loss": 1.9368, "step": 19420000 }, { "epoch": 56.21, "learning_rate": 2.19029024059837e-05, "loss": 1.9755, "step": 19420500 }, { "epoch": 56.22, "learning_rate": 2.1902178758336422e-05, "loss": 1.959, "step": 19421000 }, { "epoch": 56.22, "learning_rate": 2.190145655798444e-05, "loss": 1.9558, "step": 19421500 }, { "epoch": 56.22, "learning_rate": 2.1900732910337164e-05, "loss": 1.9839, "step": 19422000 }, { "epoch": 56.22, "learning_rate": 2.1900009262689886e-05, "loss": 1.9413, "step": 19422500 }, { "epoch": 56.22, "learning_rate": 2.1899285615042608e-05, "loss": 1.94, "step": 19423000 }, { "epoch": 56.22, "learning_rate": 2.189856196739533e-05, "loss": 1.9779, "step": 19423500 }, { "epoch": 56.22, "learning_rate": 2.1897839767043353e-05, "loss": 1.9544, "step": 19424000 }, { "epoch": 56.23, "learning_rate": 2.1897117566691368e-05, "loss": 1.9438, "step": 19424500 }, { "epoch": 56.23, "learning_rate": 2.189639391904409e-05, "loss": 1.9468, "step": 19425000 }, { "epoch": 56.23, "learning_rate": 2.1895670271396816e-05, "loss": 1.9744, "step": 19425500 }, { "epoch": 56.23, "learning_rate": 2.1894946623749538e-05, "loss": 1.9535, "step": 19426000 }, { "epoch": 56.23, "learning_rate": 2.189422297610226e-05, "loss": 1.9538, "step": 19426500 }, { "epoch": 56.23, "learning_rate": 2.1893499328454983e-05, "loss": 1.9787, "step": 19427000 }, { "epoch": 56.23, "learning_rate": 2.189277568080771e-05, "loss": 1.9611, "step": 19427500 }, { "epoch": 56.24, "learning_rate": 2.189205203316043e-05, "loss": 1.9607, "step": 19428000 }, { "epoch": 56.24, "learning_rate": 2.1891328385513156e-05, "loss": 1.9552, "step": 19428500 }, { "epoch": 56.24, "learning_rate": 2.189060473786588e-05, "loss": 1.9486, "step": 19429000 }, { "epoch": 56.24, "learning_rate": 2.18898810902186e-05, "loss": 1.9545, "step": 19429500 }, { "epoch": 56.24, "learning_rate": 2.1889157442571323e-05, "loss": 1.9537, "step": 19430000 }, { "epoch": 56.24, "learning_rate": 2.1888433794924045e-05, "loss": 1.9523, "step": 19430500 }, { "epoch": 56.24, "learning_rate": 2.1887711594572064e-05, "loss": 1.9506, "step": 19431000 }, { "epoch": 56.25, "learning_rate": 2.188698794692479e-05, "loss": 1.9461, "step": 19431500 }, { "epoch": 56.25, "learning_rate": 2.1886264299277512e-05, "loss": 1.9764, "step": 19432000 }, { "epoch": 56.25, "learning_rate": 2.1885540651630234e-05, "loss": 1.9835, "step": 19432500 }, { "epoch": 56.25, "learning_rate": 2.1884817003982956e-05, "loss": 1.9384, "step": 19433000 }, { "epoch": 56.25, "learning_rate": 2.1884093356335682e-05, "loss": 1.9502, "step": 19433500 }, { "epoch": 56.25, "learning_rate": 2.1883369708688404e-05, "loss": 1.9572, "step": 19434000 }, { "epoch": 56.25, "learning_rate": 2.1882646061041126e-05, "loss": 1.9607, "step": 19434500 }, { "epoch": 56.26, "learning_rate": 2.1881922413393852e-05, "loss": 1.9853, "step": 19435000 }, { "epoch": 56.26, "learning_rate": 2.1881198765746574e-05, "loss": 1.9229, "step": 19435500 }, { "epoch": 56.26, "learning_rate": 2.1880476565394593e-05, "loss": 1.996, "step": 19436000 }, { "epoch": 56.26, "learning_rate": 2.187975436504261e-05, "loss": 1.9372, "step": 19436500 }, { "epoch": 56.26, "learning_rate": 2.187903071739533e-05, "loss": 1.9417, "step": 19437000 }, { "epoch": 56.26, "learning_rate": 2.187830851704335e-05, "loss": 1.9654, "step": 19437500 }, { "epoch": 56.27, "learning_rate": 2.1877584869396072e-05, "loss": 1.9656, "step": 19438000 }, { "epoch": 56.27, "learning_rate": 2.1876861221748794e-05, "loss": 1.9649, "step": 19438500 }, { "epoch": 56.27, "learning_rate": 2.187613757410152e-05, "loss": 1.9391, "step": 19439000 }, { "epoch": 56.27, "learning_rate": 2.1875413926454246e-05, "loss": 1.9451, "step": 19439500 }, { "epoch": 56.27, "learning_rate": 2.1874690278806968e-05, "loss": 1.9627, "step": 19440000 }, { "epoch": 56.27, "learning_rate": 2.1873968078454983e-05, "loss": 1.9544, "step": 19440500 }, { "epoch": 56.27, "learning_rate": 2.1873245878103002e-05, "loss": 1.9757, "step": 19441000 }, { "epoch": 56.28, "learning_rate": 2.1872522230455725e-05, "loss": 1.9785, "step": 19441500 }, { "epoch": 56.28, "learning_rate": 2.1871798582808447e-05, "loss": 1.9589, "step": 19442000 }, { "epoch": 56.28, "learning_rate": 2.1871074935161173e-05, "loss": 1.9502, "step": 19442500 }, { "epoch": 56.28, "learning_rate": 2.187035273480919e-05, "loss": 1.9486, "step": 19443000 }, { "epoch": 56.28, "learning_rate": 2.1869629087161914e-05, "loss": 1.9696, "step": 19443500 }, { "epoch": 56.28, "learning_rate": 2.1868905439514636e-05, "loss": 1.9572, "step": 19444000 }, { "epoch": 56.28, "learning_rate": 2.1868181791867358e-05, "loss": 1.9478, "step": 19444500 }, { "epoch": 56.29, "learning_rate": 2.1867458144220084e-05, "loss": 1.938, "step": 19445000 }, { "epoch": 56.29, "learning_rate": 2.1866734496572806e-05, "loss": 1.9682, "step": 19445500 }, { "epoch": 56.29, "learning_rate": 2.1866010848925528e-05, "loss": 1.9598, "step": 19446000 }, { "epoch": 56.29, "learning_rate": 2.1865287201278254e-05, "loss": 1.9499, "step": 19446500 }, { "epoch": 56.29, "learning_rate": 2.1864563553630976e-05, "loss": 1.9582, "step": 19447000 }, { "epoch": 56.29, "learning_rate": 2.18638399059837e-05, "loss": 1.977, "step": 19447500 }, { "epoch": 56.29, "learning_rate": 2.186311625833642e-05, "loss": 1.9693, "step": 19448000 }, { "epoch": 56.3, "learning_rate": 2.1862392610689146e-05, "loss": 1.9568, "step": 19448500 }, { "epoch": 56.3, "learning_rate": 2.186166896304187e-05, "loss": 1.9657, "step": 19449000 }, { "epoch": 56.3, "learning_rate": 2.1860946762689884e-05, "loss": 1.9443, "step": 19449500 }, { "epoch": 56.3, "learning_rate": 2.1860224562337903e-05, "loss": 1.9452, "step": 19450000 }, { "epoch": 56.3, "learning_rate": 2.185950091469063e-05, "loss": 1.9728, "step": 19450500 }, { "epoch": 56.3, "learning_rate": 2.185877726704335e-05, "loss": 1.9524, "step": 19451000 }, { "epoch": 56.3, "learning_rate": 2.1858053619396073e-05, "loss": 1.9745, "step": 19451500 }, { "epoch": 56.31, "learning_rate": 2.1857329971748795e-05, "loss": 1.9619, "step": 19452000 }, { "epoch": 56.31, "learning_rate": 2.185660632410152e-05, "loss": 1.9769, "step": 19452500 }, { "epoch": 56.31, "learning_rate": 2.1855882676454243e-05, "loss": 1.9634, "step": 19453000 }, { "epoch": 56.31, "learning_rate": 2.185515902880697e-05, "loss": 1.9797, "step": 19453500 }, { "epoch": 56.31, "learning_rate": 2.185443538115969e-05, "loss": 1.9526, "step": 19454000 }, { "epoch": 56.31, "learning_rate": 2.1853711733512413e-05, "loss": 1.9567, "step": 19454500 }, { "epoch": 56.31, "learning_rate": 2.1852989533160432e-05, "loss": 1.9715, "step": 19455000 }, { "epoch": 56.32, "learning_rate": 2.1852265885513154e-05, "loss": 1.9491, "step": 19455500 }, { "epoch": 56.32, "learning_rate": 2.1851542237865877e-05, "loss": 1.9527, "step": 19456000 }, { "epoch": 56.32, "learning_rate": 2.1850820037513896e-05, "loss": 1.9775, "step": 19456500 }, { "epoch": 56.32, "learning_rate": 2.1850096389866618e-05, "loss": 1.953, "step": 19457000 }, { "epoch": 56.32, "learning_rate": 2.1849372742219343e-05, "loss": 1.9632, "step": 19457500 }, { "epoch": 56.32, "learning_rate": 2.1848649094572066e-05, "loss": 1.9428, "step": 19458000 }, { "epoch": 56.32, "learning_rate": 2.1847925446924788e-05, "loss": 1.9642, "step": 19458500 }, { "epoch": 56.33, "learning_rate": 2.184720179927751e-05, "loss": 1.989, "step": 19459000 }, { "epoch": 56.33, "learning_rate": 2.1846478151630236e-05, "loss": 1.9518, "step": 19459500 }, { "epoch": 56.33, "learning_rate": 2.1845754503982958e-05, "loss": 1.9825, "step": 19460000 }, { "epoch": 56.33, "learning_rate": 2.184503085633568e-05, "loss": 1.9622, "step": 19460500 }, { "epoch": 56.33, "learning_rate": 2.1844308655983696e-05, "loss": 1.96, "step": 19461000 }, { "epoch": 56.33, "learning_rate": 2.184358500833642e-05, "loss": 1.9404, "step": 19461500 }, { "epoch": 56.33, "learning_rate": 2.1842861360689147e-05, "loss": 1.9777, "step": 19462000 }, { "epoch": 56.34, "learning_rate": 2.1842139160337163e-05, "loss": 1.9839, "step": 19462500 }, { "epoch": 56.34, "learning_rate": 2.1841415512689885e-05, "loss": 1.9685, "step": 19463000 }, { "epoch": 56.34, "learning_rate": 2.184069186504261e-05, "loss": 1.9757, "step": 19463500 }, { "epoch": 56.34, "learning_rate": 2.1839968217395333e-05, "loss": 1.9793, "step": 19464000 }, { "epoch": 56.34, "learning_rate": 2.1839244569748055e-05, "loss": 1.9721, "step": 19464500 }, { "epoch": 56.34, "learning_rate": 2.183852092210078e-05, "loss": 1.9572, "step": 19465000 }, { "epoch": 56.34, "learning_rate": 2.1837797274453503e-05, "loss": 1.9414, "step": 19465500 }, { "epoch": 56.35, "learning_rate": 2.1837073626806225e-05, "loss": 1.9384, "step": 19466000 }, { "epoch": 56.35, "learning_rate": 2.1836349979158947e-05, "loss": 1.9497, "step": 19466500 }, { "epoch": 56.35, "learning_rate": 2.1835626331511673e-05, "loss": 1.9577, "step": 19467000 }, { "epoch": 56.35, "learning_rate": 2.1834902683864395e-05, "loss": 1.9583, "step": 19467500 }, { "epoch": 56.35, "learning_rate": 2.1834179036217117e-05, "loss": 1.953, "step": 19468000 }, { "epoch": 56.35, "learning_rate": 2.1833455388569843e-05, "loss": 1.9791, "step": 19468500 }, { "epoch": 56.35, "learning_rate": 2.1832733188217862e-05, "loss": 1.9562, "step": 19469000 }, { "epoch": 56.36, "learning_rate": 2.1832009540570584e-05, "loss": 1.975, "step": 19469500 }, { "epoch": 56.36, "learning_rate": 2.18312873402186e-05, "loss": 1.9485, "step": 19470000 }, { "epoch": 56.36, "learning_rate": 2.1830563692571325e-05, "loss": 1.9521, "step": 19470500 }, { "epoch": 56.36, "learning_rate": 2.1829840044924047e-05, "loss": 1.9538, "step": 19471000 }, { "epoch": 56.36, "learning_rate": 2.182911639727677e-05, "loss": 1.959, "step": 19471500 }, { "epoch": 56.36, "learning_rate": 2.1828392749629492e-05, "loss": 1.9574, "step": 19472000 }, { "epoch": 56.36, "learning_rate": 2.1827669101982218e-05, "loss": 1.9554, "step": 19472500 }, { "epoch": 56.37, "learning_rate": 2.1826946901630237e-05, "loss": 1.9575, "step": 19473000 }, { "epoch": 56.37, "learning_rate": 2.1826224701278252e-05, "loss": 1.9494, "step": 19473500 }, { "epoch": 56.37, "learning_rate": 2.1825501053630974e-05, "loss": 1.9362, "step": 19474000 }, { "epoch": 56.37, "learning_rate": 2.18247774059837e-05, "loss": 1.9768, "step": 19474500 }, { "epoch": 56.37, "learning_rate": 2.1824053758336422e-05, "loss": 1.9462, "step": 19475000 }, { "epoch": 56.37, "learning_rate": 2.1823330110689144e-05, "loss": 1.9456, "step": 19475500 }, { "epoch": 56.38, "learning_rate": 2.182260646304187e-05, "loss": 1.9612, "step": 19476000 }, { "epoch": 56.38, "learning_rate": 2.1821882815394592e-05, "loss": 1.9441, "step": 19476500 }, { "epoch": 56.38, "learning_rate": 2.1821159167747314e-05, "loss": 1.9724, "step": 19477000 }, { "epoch": 56.38, "learning_rate": 2.1820435520100037e-05, "loss": 1.963, "step": 19477500 }, { "epoch": 56.38, "learning_rate": 2.1819711872452762e-05, "loss": 1.9662, "step": 19478000 }, { "epoch": 56.38, "learning_rate": 2.1818988224805485e-05, "loss": 1.9509, "step": 19478500 }, { "epoch": 56.38, "learning_rate": 2.1818264577158207e-05, "loss": 1.9514, "step": 19479000 }, { "epoch": 56.39, "learning_rate": 2.1817542376806226e-05, "loss": 1.9434, "step": 19479500 }, { "epoch": 56.39, "learning_rate": 2.181681872915895e-05, "loss": 1.9664, "step": 19480000 }, { "epoch": 56.39, "learning_rate": 2.1816096528806967e-05, "loss": 1.9677, "step": 19480500 }, { "epoch": 56.39, "learning_rate": 2.181537288115969e-05, "loss": 1.98, "step": 19481000 }, { "epoch": 56.39, "learning_rate": 2.181464923351241e-05, "loss": 1.9451, "step": 19481500 }, { "epoch": 56.39, "learning_rate": 2.1813925585865137e-05, "loss": 1.9783, "step": 19482000 }, { "epoch": 56.39, "learning_rate": 2.181320193821786e-05, "loss": 1.9271, "step": 19482500 }, { "epoch": 56.4, "learning_rate": 2.181247829057058e-05, "loss": 1.969, "step": 19483000 }, { "epoch": 56.4, "learning_rate": 2.18117560902186e-05, "loss": 1.9516, "step": 19483500 }, { "epoch": 56.4, "learning_rate": 2.1811032442571326e-05, "loss": 1.9584, "step": 19484000 }, { "epoch": 56.4, "learning_rate": 2.1810308794924048e-05, "loss": 1.951, "step": 19484500 }, { "epoch": 56.4, "learning_rate": 2.180958514727677e-05, "loss": 1.9454, "step": 19485000 }, { "epoch": 56.4, "learning_rate": 2.1808861499629493e-05, "loss": 1.9623, "step": 19485500 }, { "epoch": 56.4, "learning_rate": 2.1808137851982215e-05, "loss": 1.965, "step": 19486000 }, { "epoch": 56.41, "learning_rate": 2.1807414204334937e-05, "loss": 1.9667, "step": 19486500 }, { "epoch": 56.41, "learning_rate": 2.1806690556687663e-05, "loss": 1.9701, "step": 19487000 }, { "epoch": 56.41, "learning_rate": 2.180596690904039e-05, "loss": 1.9742, "step": 19487500 }, { "epoch": 56.41, "learning_rate": 2.180524326139311e-05, "loss": 1.9599, "step": 19488000 }, { "epoch": 56.41, "learning_rate": 2.1804519613745833e-05, "loss": 1.9469, "step": 19488500 }, { "epoch": 56.41, "learning_rate": 2.1803795966098555e-05, "loss": 1.9584, "step": 19489000 }, { "epoch": 56.41, "learning_rate": 2.1803073765746574e-05, "loss": 1.9631, "step": 19489500 }, { "epoch": 56.42, "learning_rate": 2.1802350118099296e-05, "loss": 1.9903, "step": 19490000 }, { "epoch": 56.42, "learning_rate": 2.180162647045202e-05, "loss": 1.9666, "step": 19490500 }, { "epoch": 56.42, "learning_rate": 2.180090427010004e-05, "loss": 1.9591, "step": 19491000 }, { "epoch": 56.42, "learning_rate": 2.1800180622452763e-05, "loss": 1.9772, "step": 19491500 }, { "epoch": 56.42, "learning_rate": 2.1799456974805485e-05, "loss": 1.9617, "step": 19492000 }, { "epoch": 56.42, "learning_rate": 2.1798733327158208e-05, "loss": 1.9978, "step": 19492500 }, { "epoch": 56.42, "learning_rate": 2.179800967951093e-05, "loss": 1.9514, "step": 19493000 }, { "epoch": 56.43, "learning_rate": 2.1797286031863652e-05, "loss": 1.9589, "step": 19493500 }, { "epoch": 56.43, "learning_rate": 2.1796562384216378e-05, "loss": 1.962, "step": 19494000 }, { "epoch": 56.43, "learning_rate": 2.1795838736569103e-05, "loss": 1.9705, "step": 19494500 }, { "epoch": 56.43, "learning_rate": 2.1795115088921826e-05, "loss": 1.9747, "step": 19495000 }, { "epoch": 56.43, "learning_rate": 2.179439288856984e-05, "loss": 1.9458, "step": 19495500 }, { "epoch": 56.43, "learning_rate": 2.1793669240922567e-05, "loss": 1.9647, "step": 19496000 }, { "epoch": 56.43, "learning_rate": 2.179294559327529e-05, "loss": 1.9747, "step": 19496500 }, { "epoch": 56.44, "learning_rate": 2.179222194562801e-05, "loss": 1.9595, "step": 19497000 }, { "epoch": 56.44, "learning_rate": 2.1791498297980733e-05, "loss": 1.9751, "step": 19497500 }, { "epoch": 56.44, "learning_rate": 2.179077465033346e-05, "loss": 1.9913, "step": 19498000 }, { "epoch": 56.44, "learning_rate": 2.1790052449981478e-05, "loss": 1.9576, "step": 19498500 }, { "epoch": 56.44, "learning_rate": 2.17893288023342e-05, "loss": 1.9665, "step": 19499000 }, { "epoch": 56.44, "learning_rate": 2.1788605154686922e-05, "loss": 1.9482, "step": 19499500 }, { "epoch": 56.44, "learning_rate": 2.178788295433494e-05, "loss": 1.9574, "step": 19500000 }, { "epoch": 56.45, "learning_rate": 2.1787159306687664e-05, "loss": 1.9675, "step": 19500500 }, { "epoch": 56.45, "learning_rate": 2.1786435659040386e-05, "loss": 1.9761, "step": 19501000 }, { "epoch": 56.45, "learning_rate": 2.1785712011393108e-05, "loss": 1.9557, "step": 19501500 }, { "epoch": 56.45, "learning_rate": 2.1784988363745834e-05, "loss": 1.9765, "step": 19502000 }, { "epoch": 56.45, "learning_rate": 2.1784264716098556e-05, "loss": 1.9692, "step": 19502500 }, { "epoch": 56.45, "learning_rate": 2.1783541068451278e-05, "loss": 1.9537, "step": 19503000 }, { "epoch": 56.45, "learning_rate": 2.1782817420804004e-05, "loss": 1.9706, "step": 19503500 }, { "epoch": 56.46, "learning_rate": 2.178209522045202e-05, "loss": 1.9747, "step": 19504000 }, { "epoch": 56.46, "learning_rate": 2.178137157280474e-05, "loss": 1.9688, "step": 19504500 }, { "epoch": 56.46, "learning_rate": 2.1780647925157467e-05, "loss": 2.0037, "step": 19505000 }, { "epoch": 56.46, "learning_rate": 2.1779925724805483e-05, "loss": 1.9535, "step": 19505500 }, { "epoch": 56.46, "learning_rate": 2.177920207715821e-05, "loss": 1.9549, "step": 19506000 }, { "epoch": 56.46, "learning_rate": 2.177847842951093e-05, "loss": 1.9746, "step": 19506500 }, { "epoch": 56.46, "learning_rate": 2.1777754781863656e-05, "loss": 1.9607, "step": 19507000 }, { "epoch": 56.47, "learning_rate": 2.177703113421638e-05, "loss": 1.9827, "step": 19507500 }, { "epoch": 56.47, "learning_rate": 2.17763074865691e-05, "loss": 1.9656, "step": 19508000 }, { "epoch": 56.47, "learning_rate": 2.1775583838921823e-05, "loss": 1.9584, "step": 19508500 }, { "epoch": 56.47, "learning_rate": 2.1774860191274545e-05, "loss": 1.9855, "step": 19509000 }, { "epoch": 56.47, "learning_rate": 2.177413654362727e-05, "loss": 1.9694, "step": 19509500 }, { "epoch": 56.47, "learning_rate": 2.1773412895979993e-05, "loss": 1.955, "step": 19510000 }, { "epoch": 56.47, "learning_rate": 2.177268924833272e-05, "loss": 1.973, "step": 19510500 }, { "epoch": 56.48, "learning_rate": 2.177196560068544e-05, "loss": 1.9514, "step": 19511000 }, { "epoch": 56.48, "learning_rate": 2.1771243400333456e-05, "loss": 1.9532, "step": 19511500 }, { "epoch": 56.48, "learning_rate": 2.1770521199981475e-05, "loss": 1.9757, "step": 19512000 }, { "epoch": 56.48, "learning_rate": 2.1769797552334198e-05, "loss": 1.9614, "step": 19512500 }, { "epoch": 56.48, "learning_rate": 2.176907390468692e-05, "loss": 1.952, "step": 19513000 }, { "epoch": 56.48, "learning_rate": 2.1768350257039645e-05, "loss": 1.9384, "step": 19513500 }, { "epoch": 56.49, "learning_rate": 2.1767626609392368e-05, "loss": 1.9741, "step": 19514000 }, { "epoch": 56.49, "learning_rate": 2.1766902961745093e-05, "loss": 1.9701, "step": 19514500 }, { "epoch": 56.49, "learning_rate": 2.1766179314097816e-05, "loss": 1.9615, "step": 19515000 }, { "epoch": 56.49, "learning_rate": 2.176545711374583e-05, "loss": 1.9527, "step": 19515500 }, { "epoch": 56.49, "learning_rate": 2.1764733466098557e-05, "loss": 1.9619, "step": 19516000 }, { "epoch": 56.49, "learning_rate": 2.176400981845128e-05, "loss": 1.9659, "step": 19516500 }, { "epoch": 56.49, "learning_rate": 2.1763286170804005e-05, "loss": 1.9538, "step": 19517000 }, { "epoch": 56.5, "learning_rate": 2.1762562523156727e-05, "loss": 1.9963, "step": 19517500 }, { "epoch": 56.5, "learning_rate": 2.176183887550945e-05, "loss": 1.9385, "step": 19518000 }, { "epoch": 56.5, "learning_rate": 2.176111522786217e-05, "loss": 1.96, "step": 19518500 }, { "epoch": 56.5, "learning_rate": 2.1760391580214894e-05, "loss": 1.9426, "step": 19519000 }, { "epoch": 56.5, "learning_rate": 2.1759669379862912e-05, "loss": 1.956, "step": 19519500 }, { "epoch": 56.5, "learning_rate": 2.175894717951093e-05, "loss": 1.9538, "step": 19520000 }, { "epoch": 56.5, "learning_rate": 2.1758224979158947e-05, "loss": 1.9802, "step": 19520500 }, { "epoch": 56.51, "learning_rate": 2.1757501331511673e-05, "loss": 1.946, "step": 19521000 }, { "epoch": 56.51, "learning_rate": 2.1756777683864395e-05, "loss": 1.961, "step": 19521500 }, { "epoch": 56.51, "learning_rate": 2.175605403621712e-05, "loss": 1.9763, "step": 19522000 }, { "epoch": 56.51, "learning_rate": 2.1755330388569843e-05, "loss": 1.984, "step": 19522500 }, { "epoch": 56.51, "learning_rate": 2.1754606740922565e-05, "loss": 1.9752, "step": 19523000 }, { "epoch": 56.51, "learning_rate": 2.1753883093275287e-05, "loss": 1.9515, "step": 19523500 }, { "epoch": 56.51, "learning_rate": 2.175315944562801e-05, "loss": 1.9864, "step": 19524000 }, { "epoch": 56.52, "learning_rate": 2.1752435797980735e-05, "loss": 1.9673, "step": 19524500 }, { "epoch": 56.52, "learning_rate": 2.1751712150333457e-05, "loss": 1.9852, "step": 19525000 }, { "epoch": 56.52, "learning_rate": 2.1750988502686183e-05, "loss": 1.9637, "step": 19525500 }, { "epoch": 56.52, "learning_rate": 2.1750264855038905e-05, "loss": 1.9646, "step": 19526000 }, { "epoch": 56.52, "learning_rate": 2.1749541207391627e-05, "loss": 1.9938, "step": 19526500 }, { "epoch": 56.52, "learning_rate": 2.1748819007039646e-05, "loss": 1.9633, "step": 19527000 }, { "epoch": 56.52, "learning_rate": 2.174809535939237e-05, "loss": 1.9901, "step": 19527500 }, { "epoch": 56.53, "learning_rate": 2.1747371711745094e-05, "loss": 1.9657, "step": 19528000 }, { "epoch": 56.53, "learning_rate": 2.1746648064097816e-05, "loss": 1.984, "step": 19528500 }, { "epoch": 56.53, "learning_rate": 2.174592441645054e-05, "loss": 1.9659, "step": 19529000 }, { "epoch": 56.53, "learning_rate": 2.174520076880326e-05, "loss": 1.96, "step": 19529500 }, { "epoch": 56.53, "learning_rate": 2.1744477121155983e-05, "loss": 1.9616, "step": 19530000 }, { "epoch": 56.53, "learning_rate": 2.174375347350871e-05, "loss": 1.9323, "step": 19530500 }, { "epoch": 56.53, "learning_rate": 2.174302982586143e-05, "loss": 1.955, "step": 19531000 }, { "epoch": 56.54, "learning_rate": 2.1742309072804743e-05, "loss": 1.9706, "step": 19531500 }, { "epoch": 56.54, "learning_rate": 2.174158542515747e-05, "loss": 1.9721, "step": 19532000 }, { "epoch": 56.54, "learning_rate": 2.1740863224805484e-05, "loss": 1.9608, "step": 19532500 }, { "epoch": 56.54, "learning_rate": 2.174013957715821e-05, "loss": 1.9433, "step": 19533000 }, { "epoch": 56.54, "learning_rate": 2.1739415929510932e-05, "loss": 1.9564, "step": 19533500 }, { "epoch": 56.54, "learning_rate": 2.1738692281863654e-05, "loss": 1.9846, "step": 19534000 }, { "epoch": 56.54, "learning_rate": 2.1737968634216377e-05, "loss": 1.9571, "step": 19534500 }, { "epoch": 56.55, "learning_rate": 2.17372449865691e-05, "loss": 1.9383, "step": 19535000 }, { "epoch": 56.55, "learning_rate": 2.1736521338921825e-05, "loss": 1.9733, "step": 19535500 }, { "epoch": 56.55, "learning_rate": 2.1735797691274547e-05, "loss": 1.9553, "step": 19536000 }, { "epoch": 56.55, "learning_rate": 2.1735074043627272e-05, "loss": 1.9618, "step": 19536500 }, { "epoch": 56.55, "learning_rate": 2.1734350395979995e-05, "loss": 1.9754, "step": 19537000 }, { "epoch": 56.55, "learning_rate": 2.173362819562801e-05, "loss": 1.9582, "step": 19537500 }, { "epoch": 56.55, "learning_rate": 2.1732904547980736e-05, "loss": 1.9621, "step": 19538000 }, { "epoch": 56.56, "learning_rate": 2.1732180900333458e-05, "loss": 1.9749, "step": 19538500 }, { "epoch": 56.56, "learning_rate": 2.173145725268618e-05, "loss": 1.9758, "step": 19539000 }, { "epoch": 56.56, "learning_rate": 2.17307350523342e-05, "loss": 1.9638, "step": 19539500 }, { "epoch": 56.56, "learning_rate": 2.173001140468692e-05, "loss": 1.9547, "step": 19540000 }, { "epoch": 56.56, "learning_rate": 2.1729287757039647e-05, "loss": 1.9686, "step": 19540500 }, { "epoch": 56.56, "learning_rate": 2.172856410939237e-05, "loss": 1.9894, "step": 19541000 }, { "epoch": 56.56, "learning_rate": 2.172784046174509e-05, "loss": 1.9523, "step": 19541500 }, { "epoch": 56.57, "learning_rate": 2.1727116814097814e-05, "loss": 1.9627, "step": 19542000 }, { "epoch": 56.57, "learning_rate": 2.1726396061041126e-05, "loss": 1.9781, "step": 19542500 }, { "epoch": 56.57, "learning_rate": 2.1725672413393848e-05, "loss": 1.9493, "step": 19543000 }, { "epoch": 56.57, "learning_rate": 2.1724948765746574e-05, "loss": 1.9739, "step": 19543500 }, { "epoch": 56.57, "learning_rate": 2.17242251180993e-05, "loss": 1.9441, "step": 19544000 }, { "epoch": 56.57, "learning_rate": 2.1723501470452022e-05, "loss": 1.9658, "step": 19544500 }, { "epoch": 56.57, "learning_rate": 2.1722777822804744e-05, "loss": 1.9591, "step": 19545000 }, { "epoch": 56.58, "learning_rate": 2.172205562245276e-05, "loss": 1.9811, "step": 19545500 }, { "epoch": 56.58, "learning_rate": 2.1721331974805485e-05, "loss": 1.9441, "step": 19546000 }, { "epoch": 56.58, "learning_rate": 2.1720608327158207e-05, "loss": 1.9683, "step": 19546500 }, { "epoch": 56.58, "learning_rate": 2.1719884679510933e-05, "loss": 1.9501, "step": 19547000 }, { "epoch": 56.58, "learning_rate": 2.1719161031863655e-05, "loss": 1.9634, "step": 19547500 }, { "epoch": 56.58, "learning_rate": 2.1718437384216377e-05, "loss": 1.9604, "step": 19548000 }, { "epoch": 56.58, "learning_rate": 2.1717715183864396e-05, "loss": 1.9641, "step": 19548500 }, { "epoch": 56.59, "learning_rate": 2.171699153621712e-05, "loss": 1.9767, "step": 19549000 }, { "epoch": 56.59, "learning_rate": 2.171626788856984e-05, "loss": 1.9408, "step": 19549500 }, { "epoch": 56.59, "learning_rate": 2.1715544240922563e-05, "loss": 1.9645, "step": 19550000 }, { "epoch": 56.59, "learning_rate": 2.1714822040570582e-05, "loss": 1.9519, "step": 19550500 }, { "epoch": 56.59, "learning_rate": 2.1714098392923308e-05, "loss": 1.9431, "step": 19551000 }, { "epoch": 56.59, "learning_rate": 2.171337474527603e-05, "loss": 1.9453, "step": 19551500 }, { "epoch": 56.6, "learning_rate": 2.1712651097628752e-05, "loss": 1.96, "step": 19552000 }, { "epoch": 56.6, "learning_rate": 2.1711927449981474e-05, "loss": 1.9692, "step": 19552500 }, { "epoch": 56.6, "learning_rate": 2.17112038023342e-05, "loss": 1.9421, "step": 19553000 }, { "epoch": 56.6, "learning_rate": 2.1710480154686922e-05, "loss": 1.9616, "step": 19553500 }, { "epoch": 56.6, "learning_rate": 2.1709756507039644e-05, "loss": 1.9577, "step": 19554000 }, { "epoch": 56.6, "learning_rate": 2.170903285939237e-05, "loss": 1.999, "step": 19554500 }, { "epoch": 56.6, "learning_rate": 2.1708309211745092e-05, "loss": 1.951, "step": 19555000 }, { "epoch": 56.61, "learning_rate": 2.1707585564097815e-05, "loss": 1.9699, "step": 19555500 }, { "epoch": 56.61, "learning_rate": 2.1706861916450537e-05, "loss": 1.9742, "step": 19556000 }, { "epoch": 56.61, "learning_rate": 2.1706138268803262e-05, "loss": 1.9591, "step": 19556500 }, { "epoch": 56.61, "learning_rate": 2.1705414621155985e-05, "loss": 1.9609, "step": 19557000 }, { "epoch": 56.61, "learning_rate": 2.1704692420804e-05, "loss": 1.9421, "step": 19557500 }, { "epoch": 56.61, "learning_rate": 2.1703968773156726e-05, "loss": 1.9604, "step": 19558000 }, { "epoch": 56.61, "learning_rate": 2.170324512550945e-05, "loss": 1.9562, "step": 19558500 }, { "epoch": 56.62, "learning_rate": 2.1702521477862174e-05, "loss": 1.9531, "step": 19559000 }, { "epoch": 56.62, "learning_rate": 2.1701797830214896e-05, "loss": 1.9705, "step": 19559500 }, { "epoch": 56.62, "learning_rate": 2.170107562986291e-05, "loss": 1.9877, "step": 19560000 }, { "epoch": 56.62, "learning_rate": 2.1700351982215637e-05, "loss": 1.9784, "step": 19560500 }, { "epoch": 56.62, "learning_rate": 2.169962833456836e-05, "loss": 1.9802, "step": 19561000 }, { "epoch": 56.62, "learning_rate": 2.169890468692108e-05, "loss": 1.9876, "step": 19561500 }, { "epoch": 56.62, "learning_rate": 2.1698181039273807e-05, "loss": 1.9396, "step": 19562000 }, { "epoch": 56.63, "learning_rate": 2.169745739162653e-05, "loss": 1.9755, "step": 19562500 }, { "epoch": 56.63, "learning_rate": 2.169673519127455e-05, "loss": 1.9666, "step": 19563000 }, { "epoch": 56.63, "learning_rate": 2.169601154362727e-05, "loss": 1.9597, "step": 19563500 }, { "epoch": 56.63, "learning_rate": 2.1695287895979993e-05, "loss": 1.9616, "step": 19564000 }, { "epoch": 56.63, "learning_rate": 2.1694564248332715e-05, "loss": 1.9583, "step": 19564500 }, { "epoch": 56.63, "learning_rate": 2.1693840600685437e-05, "loss": 1.9627, "step": 19565000 }, { "epoch": 56.63, "learning_rate": 2.1693116953038163e-05, "loss": 1.9424, "step": 19565500 }, { "epoch": 56.64, "learning_rate": 2.169239330539089e-05, "loss": 1.9684, "step": 19566000 }, { "epoch": 56.64, "learning_rate": 2.169166965774361e-05, "loss": 1.963, "step": 19566500 }, { "epoch": 56.64, "learning_rate": 2.1690947457391626e-05, "loss": 1.9817, "step": 19567000 }, { "epoch": 56.64, "learning_rate": 2.1690223809744352e-05, "loss": 1.9232, "step": 19567500 }, { "epoch": 56.64, "learning_rate": 2.1689500162097074e-05, "loss": 1.9583, "step": 19568000 }, { "epoch": 56.64, "learning_rate": 2.1688776514449796e-05, "loss": 1.9473, "step": 19568500 }, { "epoch": 56.64, "learning_rate": 2.1688054314097815e-05, "loss": 1.9817, "step": 19569000 }, { "epoch": 56.65, "learning_rate": 2.168733066645054e-05, "loss": 1.9466, "step": 19569500 }, { "epoch": 56.65, "learning_rate": 2.1686607018803263e-05, "loss": 1.9812, "step": 19570000 }, { "epoch": 56.65, "learning_rate": 2.1685883371155985e-05, "loss": 1.9565, "step": 19570500 }, { "epoch": 56.65, "learning_rate": 2.1685159723508708e-05, "loss": 1.9494, "step": 19571000 }, { "epoch": 56.65, "learning_rate": 2.1684437523156727e-05, "loss": 1.9936, "step": 19571500 }, { "epoch": 56.65, "learning_rate": 2.1683715322804742e-05, "loss": 1.9859, "step": 19572000 }, { "epoch": 56.65, "learning_rate": 2.1682991675157464e-05, "loss": 1.947, "step": 19572500 }, { "epoch": 56.66, "learning_rate": 2.168226802751019e-05, "loss": 1.9835, "step": 19573000 }, { "epoch": 56.66, "learning_rate": 2.1681544379862916e-05, "loss": 1.9452, "step": 19573500 }, { "epoch": 56.66, "learning_rate": 2.1680820732215638e-05, "loss": 1.9639, "step": 19574000 }, { "epoch": 56.66, "learning_rate": 2.168009708456836e-05, "loss": 1.9737, "step": 19574500 }, { "epoch": 56.66, "learning_rate": 2.1679373436921082e-05, "loss": 1.9689, "step": 19575000 }, { "epoch": 56.66, "learning_rate": 2.1678649789273805e-05, "loss": 1.9437, "step": 19575500 }, { "epoch": 56.66, "learning_rate": 2.1677927588921824e-05, "loss": 1.9667, "step": 19576000 }, { "epoch": 56.67, "learning_rate": 2.1677203941274546e-05, "loss": 1.9592, "step": 19576500 }, { "epoch": 56.67, "learning_rate": 2.167648029362727e-05, "loss": 1.9729, "step": 19577000 }, { "epoch": 56.67, "learning_rate": 2.1675756645979994e-05, "loss": 1.9647, "step": 19577500 }, { "epoch": 56.67, "learning_rate": 2.1675032998332716e-05, "loss": 1.949, "step": 19578000 }, { "epoch": 56.67, "learning_rate": 2.167430935068544e-05, "loss": 1.9583, "step": 19578500 }, { "epoch": 56.67, "learning_rate": 2.1673585703038164e-05, "loss": 1.9791, "step": 19579000 }, { "epoch": 56.67, "learning_rate": 2.1672862055390886e-05, "loss": 1.9835, "step": 19579500 }, { "epoch": 56.68, "learning_rate": 2.1672138407743608e-05, "loss": 1.9565, "step": 19580000 }, { "epoch": 56.68, "learning_rate": 2.1671414760096334e-05, "loss": 1.996, "step": 19580500 }, { "epoch": 56.68, "learning_rate": 2.1670691112449056e-05, "loss": 1.9642, "step": 19581000 }, { "epoch": 56.68, "learning_rate": 2.1669968912097075e-05, "loss": 1.9712, "step": 19581500 }, { "epoch": 56.68, "learning_rate": 2.166924671174509e-05, "loss": 1.9704, "step": 19582000 }, { "epoch": 56.68, "learning_rate": 2.1668523064097816e-05, "loss": 1.9702, "step": 19582500 }, { "epoch": 56.68, "learning_rate": 2.166779941645054e-05, "loss": 1.9773, "step": 19583000 }, { "epoch": 56.69, "learning_rate": 2.166707576880326e-05, "loss": 1.9663, "step": 19583500 }, { "epoch": 56.69, "learning_rate": 2.166635356845128e-05, "loss": 1.97, "step": 19584000 }, { "epoch": 56.69, "learning_rate": 2.16656313680993e-05, "loss": 1.9693, "step": 19584500 }, { "epoch": 56.69, "learning_rate": 2.166490772045202e-05, "loss": 1.9759, "step": 19585000 }, { "epoch": 56.69, "learning_rate": 2.1664184072804743e-05, "loss": 1.9802, "step": 19585500 }, { "epoch": 56.69, "learning_rate": 2.1663460425157465e-05, "loss": 1.9853, "step": 19586000 }, { "epoch": 56.69, "learning_rate": 2.166273677751019e-05, "loss": 1.9624, "step": 19586500 }, { "epoch": 56.7, "learning_rate": 2.1662013129862913e-05, "loss": 1.9783, "step": 19587000 }, { "epoch": 56.7, "learning_rate": 2.1661289482215635e-05, "loss": 1.9611, "step": 19587500 }, { "epoch": 56.7, "learning_rate": 2.166056583456836e-05, "loss": 1.9719, "step": 19588000 }, { "epoch": 56.7, "learning_rate": 2.1659842186921083e-05, "loss": 1.9477, "step": 19588500 }, { "epoch": 56.7, "learning_rate": 2.1659118539273805e-05, "loss": 1.9462, "step": 19589000 }, { "epoch": 56.7, "learning_rate": 2.1658396338921824e-05, "loss": 1.9791, "step": 19589500 }, { "epoch": 56.71, "learning_rate": 2.1657672691274547e-05, "loss": 1.9708, "step": 19590000 }, { "epoch": 56.71, "learning_rate": 2.1656950490922566e-05, "loss": 1.9485, "step": 19590500 }, { "epoch": 56.71, "learning_rate": 2.1656226843275288e-05, "loss": 1.9679, "step": 19591000 }, { "epoch": 56.71, "learning_rate": 2.165550319562801e-05, "loss": 1.973, "step": 19591500 }, { "epoch": 56.71, "learning_rate": 2.1654779547980736e-05, "loss": 1.9795, "step": 19592000 }, { "epoch": 56.71, "learning_rate": 2.1654055900333458e-05, "loss": 1.9678, "step": 19592500 }, { "epoch": 56.71, "learning_rate": 2.165333225268618e-05, "loss": 1.9546, "step": 19593000 }, { "epoch": 56.72, "learning_rate": 2.16526100523342e-05, "loss": 1.9478, "step": 19593500 }, { "epoch": 56.72, "learning_rate": 2.165188640468692e-05, "loss": 1.9738, "step": 19594000 }, { "epoch": 56.72, "learning_rate": 2.1651162757039643e-05, "loss": 1.9854, "step": 19594500 }, { "epoch": 56.72, "learning_rate": 2.165043910939237e-05, "loss": 1.9496, "step": 19595000 }, { "epoch": 56.72, "learning_rate": 2.1649715461745095e-05, "loss": 1.9479, "step": 19595500 }, { "epoch": 56.72, "learning_rate": 2.1648991814097817e-05, "loss": 1.9655, "step": 19596000 }, { "epoch": 56.72, "learning_rate": 2.164826816645054e-05, "loss": 1.9627, "step": 19596500 }, { "epoch": 56.73, "learning_rate": 2.164754451880326e-05, "loss": 1.9507, "step": 19597000 }, { "epoch": 56.73, "learning_rate": 2.1646820871155984e-05, "loss": 1.9623, "step": 19597500 }, { "epoch": 56.73, "learning_rate": 2.1646097223508706e-05, "loss": 1.9745, "step": 19598000 }, { "epoch": 56.73, "learning_rate": 2.164537357586143e-05, "loss": 1.9535, "step": 19598500 }, { "epoch": 56.73, "learning_rate": 2.1644649928214157e-05, "loss": 1.9725, "step": 19599000 }, { "epoch": 56.73, "learning_rate": 2.164392628056688e-05, "loss": 1.9421, "step": 19599500 }, { "epoch": 56.73, "learning_rate": 2.164320552751019e-05, "loss": 1.9616, "step": 19600000 }, { "epoch": 56.74, "learning_rate": 2.1642481879862914e-05, "loss": 1.9441, "step": 19600500 }, { "epoch": 56.74, "learning_rate": 2.1641758232215636e-05, "loss": 1.9535, "step": 19601000 }, { "epoch": 56.74, "learning_rate": 2.1641036031863655e-05, "loss": 1.9688, "step": 19601500 }, { "epoch": 56.74, "learning_rate": 2.1640312384216377e-05, "loss": 1.9548, "step": 19602000 }, { "epoch": 56.74, "learning_rate": 2.16395887365691e-05, "loss": 1.9623, "step": 19602500 }, { "epoch": 56.74, "learning_rate": 2.163886653621712e-05, "loss": 1.9804, "step": 19603000 }, { "epoch": 56.74, "learning_rate": 2.1638142888569844e-05, "loss": 1.9564, "step": 19603500 }, { "epoch": 56.75, "learning_rate": 2.1637419240922566e-05, "loss": 1.9593, "step": 19604000 }, { "epoch": 56.75, "learning_rate": 2.163669559327529e-05, "loss": 1.9601, "step": 19604500 }, { "epoch": 56.75, "learning_rate": 2.163597194562801e-05, "loss": 1.9593, "step": 19605000 }, { "epoch": 56.75, "learning_rate": 2.1635248297980733e-05, "loss": 1.9812, "step": 19605500 }, { "epoch": 56.75, "learning_rate": 2.1634526097628752e-05, "loss": 1.9767, "step": 19606000 }, { "epoch": 56.75, "learning_rate": 2.1633802449981474e-05, "loss": 1.9852, "step": 19606500 }, { "epoch": 56.75, "learning_rate": 2.16330788023342e-05, "loss": 1.9489, "step": 19607000 }, { "epoch": 56.76, "learning_rate": 2.1632355154686922e-05, "loss": 1.9537, "step": 19607500 }, { "epoch": 56.76, "learning_rate": 2.1631631507039644e-05, "loss": 1.9589, "step": 19608000 }, { "epoch": 56.76, "learning_rate": 2.1630909306687663e-05, "loss": 1.9645, "step": 19608500 }, { "epoch": 56.76, "learning_rate": 2.1630185659040385e-05, "loss": 1.9843, "step": 19609000 }, { "epoch": 56.76, "learning_rate": 2.1629462011393108e-05, "loss": 1.9551, "step": 19609500 }, { "epoch": 56.76, "learning_rate": 2.1628738363745833e-05, "loss": 1.9733, "step": 19610000 }, { "epoch": 56.76, "learning_rate": 2.1628014716098556e-05, "loss": 1.9675, "step": 19610500 }, { "epoch": 56.77, "learning_rate": 2.162729106845128e-05, "loss": 1.9801, "step": 19611000 }, { "epoch": 56.77, "learning_rate": 2.1626567420804003e-05, "loss": 1.9728, "step": 19611500 }, { "epoch": 56.77, "learning_rate": 2.1625845220452022e-05, "loss": 1.9836, "step": 19612000 }, { "epoch": 56.77, "learning_rate": 2.1625123020100038e-05, "loss": 1.9514, "step": 19612500 }, { "epoch": 56.77, "learning_rate": 2.162439937245276e-05, "loss": 1.9732, "step": 19613000 }, { "epoch": 56.77, "learning_rate": 2.1623675724805482e-05, "loss": 1.9761, "step": 19613500 }, { "epoch": 56.77, "learning_rate": 2.1622952077158208e-05, "loss": 1.9617, "step": 19614000 }, { "epoch": 56.78, "learning_rate": 2.1622228429510934e-05, "loss": 1.982, "step": 19614500 }, { "epoch": 56.78, "learning_rate": 2.1621504781863656e-05, "loss": 1.956, "step": 19615000 }, { "epoch": 56.78, "learning_rate": 2.1620781134216378e-05, "loss": 1.9666, "step": 19615500 }, { "epoch": 56.78, "learning_rate": 2.16200574865691e-05, "loss": 1.966, "step": 19616000 }, { "epoch": 56.78, "learning_rate": 2.161933528621712e-05, "loss": 1.9663, "step": 19616500 }, { "epoch": 56.78, "learning_rate": 2.161861163856984e-05, "loss": 1.9762, "step": 19617000 }, { "epoch": 56.78, "learning_rate": 2.1617887990922564e-05, "loss": 1.9689, "step": 19617500 }, { "epoch": 56.79, "learning_rate": 2.1617164343275286e-05, "loss": 1.972, "step": 19618000 }, { "epoch": 56.79, "learning_rate": 2.161644069562801e-05, "loss": 1.9811, "step": 19618500 }, { "epoch": 56.79, "learning_rate": 2.1615717047980734e-05, "loss": 1.9938, "step": 19619000 }, { "epoch": 56.79, "learning_rate": 2.1614994847628753e-05, "loss": 1.9598, "step": 19619500 }, { "epoch": 56.79, "learning_rate": 2.1614271199981475e-05, "loss": 1.9559, "step": 19620000 }, { "epoch": 56.79, "learning_rate": 2.1613547552334197e-05, "loss": 1.9538, "step": 19620500 }, { "epoch": 56.79, "learning_rate": 2.1612823904686923e-05, "loss": 1.9417, "step": 19621000 }, { "epoch": 56.8, "learning_rate": 2.1612100257039645e-05, "loss": 1.9832, "step": 19621500 }, { "epoch": 56.8, "learning_rate": 2.161137660939237e-05, "loss": 1.9684, "step": 19622000 }, { "epoch": 56.8, "learning_rate": 2.1610654409040386e-05, "loss": 1.9461, "step": 19622500 }, { "epoch": 56.8, "learning_rate": 2.160993076139311e-05, "loss": 1.9817, "step": 19623000 }, { "epoch": 56.8, "learning_rate": 2.1609207113745834e-05, "loss": 1.9616, "step": 19623500 }, { "epoch": 56.8, "learning_rate": 2.1608483466098556e-05, "loss": 1.9839, "step": 19624000 }, { "epoch": 56.8, "learning_rate": 2.160775981845128e-05, "loss": 1.9638, "step": 19624500 }, { "epoch": 56.81, "learning_rate": 2.1607036170804e-05, "loss": 1.9631, "step": 19625000 }, { "epoch": 56.81, "learning_rate": 2.1606312523156726e-05, "loss": 1.9483, "step": 19625500 }, { "epoch": 56.81, "learning_rate": 2.160558887550945e-05, "loss": 1.9391, "step": 19626000 }, { "epoch": 56.81, "learning_rate": 2.1604865227862174e-05, "loss": 1.9661, "step": 19626500 }, { "epoch": 56.81, "learning_rate": 2.1604141580214897e-05, "loss": 1.9768, "step": 19627000 }, { "epoch": 56.81, "learning_rate": 2.160341793256762e-05, "loss": 1.9541, "step": 19627500 }, { "epoch": 56.82, "learning_rate": 2.160269428492034e-05, "loss": 1.9827, "step": 19628000 }, { "epoch": 56.82, "learning_rate": 2.1601970637273063e-05, "loss": 1.9915, "step": 19628500 }, { "epoch": 56.82, "learning_rate": 2.1601248436921082e-05, "loss": 1.9656, "step": 19629000 }, { "epoch": 56.82, "learning_rate": 2.1600524789273808e-05, "loss": 1.961, "step": 19629500 }, { "epoch": 56.82, "learning_rate": 2.159980114162653e-05, "loss": 1.958, "step": 19630000 }, { "epoch": 56.82, "learning_rate": 2.1599077493979252e-05, "loss": 1.9883, "step": 19630500 }, { "epoch": 56.82, "learning_rate": 2.1598353846331974e-05, "loss": 1.9683, "step": 19631000 }, { "epoch": 56.83, "learning_rate": 2.15976301986847e-05, "loss": 1.9795, "step": 19631500 }, { "epoch": 56.83, "learning_rate": 2.1596906551037422e-05, "loss": 1.9859, "step": 19632000 }, { "epoch": 56.83, "learning_rate": 2.1596182903390145e-05, "loss": 1.9696, "step": 19632500 }, { "epoch": 56.83, "learning_rate": 2.159545925574287e-05, "loss": 1.982, "step": 19633000 }, { "epoch": 56.83, "learning_rate": 2.1594737055390886e-05, "loss": 1.9465, "step": 19633500 }, { "epoch": 56.83, "learning_rate": 2.159401340774361e-05, "loss": 1.9697, "step": 19634000 }, { "epoch": 56.83, "learning_rate": 2.1593291207391627e-05, "loss": 1.9841, "step": 19634500 }, { "epoch": 56.84, "learning_rate": 2.1592569007039646e-05, "loss": 1.9683, "step": 19635000 }, { "epoch": 56.84, "learning_rate": 2.1591845359392368e-05, "loss": 1.9764, "step": 19635500 }, { "epoch": 56.84, "learning_rate": 2.159112171174509e-05, "loss": 1.9724, "step": 19636000 }, { "epoch": 56.84, "learning_rate": 2.1590398064097813e-05, "loss": 1.9535, "step": 19636500 }, { "epoch": 56.84, "learning_rate": 2.1589674416450538e-05, "loss": 1.9707, "step": 19637000 }, { "epoch": 56.84, "learning_rate": 2.1588950768803264e-05, "loss": 1.9496, "step": 19637500 }, { "epoch": 56.84, "learning_rate": 2.1588227121155986e-05, "loss": 1.9384, "step": 19638000 }, { "epoch": 56.85, "learning_rate": 2.1587504920804e-05, "loss": 1.9661, "step": 19638500 }, { "epoch": 56.85, "learning_rate": 2.1586781273156724e-05, "loss": 1.9421, "step": 19639000 }, { "epoch": 56.85, "learning_rate": 2.158605762550945e-05, "loss": 1.9665, "step": 19639500 }, { "epoch": 56.85, "learning_rate": 2.158533397786217e-05, "loss": 1.9631, "step": 19640000 }, { "epoch": 56.85, "learning_rate": 2.1584610330214897e-05, "loss": 1.9525, "step": 19640500 }, { "epoch": 56.85, "learning_rate": 2.158388668256762e-05, "loss": 1.9611, "step": 19641000 }, { "epoch": 56.85, "learning_rate": 2.1583163034920342e-05, "loss": 1.9642, "step": 19641500 }, { "epoch": 56.86, "learning_rate": 2.1582439387273064e-05, "loss": 1.9614, "step": 19642000 }, { "epoch": 56.86, "learning_rate": 2.158171573962579e-05, "loss": 1.9788, "step": 19642500 }, { "epoch": 56.86, "learning_rate": 2.1580992091978512e-05, "loss": 1.9642, "step": 19643000 }, { "epoch": 56.86, "learning_rate": 2.1580268444331234e-05, "loss": 1.9552, "step": 19643500 }, { "epoch": 56.86, "learning_rate": 2.157954479668396e-05, "loss": 1.9702, "step": 19644000 }, { "epoch": 56.86, "learning_rate": 2.1578822596331975e-05, "loss": 1.9677, "step": 19644500 }, { "epoch": 56.86, "learning_rate": 2.15780989486847e-05, "loss": 1.9631, "step": 19645000 }, { "epoch": 56.87, "learning_rate": 2.1577375301037423e-05, "loss": 1.9803, "step": 19645500 }, { "epoch": 56.87, "learning_rate": 2.157665310068544e-05, "loss": 1.9615, "step": 19646000 }, { "epoch": 56.87, "learning_rate": 2.1575930900333458e-05, "loss": 1.9631, "step": 19646500 }, { "epoch": 56.87, "learning_rate": 2.1575208699981477e-05, "loss": 1.9578, "step": 19647000 }, { "epoch": 56.87, "learning_rate": 2.15744850523342e-05, "loss": 1.9515, "step": 19647500 }, { "epoch": 56.87, "learning_rate": 2.157376140468692e-05, "loss": 1.976, "step": 19648000 }, { "epoch": 56.87, "learning_rate": 2.1573037757039647e-05, "loss": 1.9958, "step": 19648500 }, { "epoch": 56.88, "learning_rate": 2.157231410939237e-05, "loss": 1.9684, "step": 19649000 }, { "epoch": 56.88, "learning_rate": 2.157159046174509e-05, "loss": 1.948, "step": 19649500 }, { "epoch": 56.88, "learning_rate": 2.1570866814097813e-05, "loss": 1.9849, "step": 19650000 }, { "epoch": 56.88, "learning_rate": 2.157014316645054e-05, "loss": 1.9866, "step": 19650500 }, { "epoch": 56.88, "learning_rate": 2.156941951880326e-05, "loss": 1.9624, "step": 19651000 }, { "epoch": 56.88, "learning_rate": 2.1568695871155987e-05, "loss": 1.9847, "step": 19651500 }, { "epoch": 56.88, "learning_rate": 2.156797222350871e-05, "loss": 1.9736, "step": 19652000 }, { "epoch": 56.89, "learning_rate": 2.156724857586143e-05, "loss": 1.9606, "step": 19652500 }, { "epoch": 56.89, "learning_rate": 2.1566524928214154e-05, "loss": 1.9559, "step": 19653000 }, { "epoch": 56.89, "learning_rate": 2.1565802727862172e-05, "loss": 1.9505, "step": 19653500 }, { "epoch": 56.89, "learning_rate": 2.1565079080214895e-05, "loss": 1.971, "step": 19654000 }, { "epoch": 56.89, "learning_rate": 2.1564355432567617e-05, "loss": 1.9673, "step": 19654500 }, { "epoch": 56.89, "learning_rate": 2.156363178492034e-05, "loss": 1.9632, "step": 19655000 }, { "epoch": 56.89, "learning_rate": 2.1562908137273065e-05, "loss": 1.9599, "step": 19655500 }, { "epoch": 56.9, "learning_rate": 2.1562185936921084e-05, "loss": 1.9625, "step": 19656000 }, { "epoch": 56.9, "learning_rate": 2.1561462289273806e-05, "loss": 1.9913, "step": 19656500 }, { "epoch": 56.9, "learning_rate": 2.1560738641626528e-05, "loss": 1.981, "step": 19657000 }, { "epoch": 56.9, "learning_rate": 2.1560016441274547e-05, "loss": 1.9743, "step": 19657500 }, { "epoch": 56.9, "learning_rate": 2.155929279362727e-05, "loss": 1.9534, "step": 19658000 }, { "epoch": 56.9, "learning_rate": 2.155856914597999e-05, "loss": 1.966, "step": 19658500 }, { "epoch": 56.9, "learning_rate": 2.1557845498332714e-05, "loss": 1.9804, "step": 19659000 }, { "epoch": 56.91, "learning_rate": 2.155712185068544e-05, "loss": 1.9401, "step": 19659500 }, { "epoch": 56.91, "learning_rate": 2.1556398203038165e-05, "loss": 1.9734, "step": 19660000 }, { "epoch": 56.91, "learning_rate": 2.1555674555390887e-05, "loss": 1.9583, "step": 19660500 }, { "epoch": 56.91, "learning_rate": 2.155495090774361e-05, "loss": 1.964, "step": 19661000 }, { "epoch": 56.91, "learning_rate": 2.1554227260096332e-05, "loss": 1.9666, "step": 19661500 }, { "epoch": 56.91, "learning_rate": 2.1553503612449054e-05, "loss": 1.976, "step": 19662000 }, { "epoch": 56.91, "learning_rate": 2.155277996480178e-05, "loss": 1.9587, "step": 19662500 }, { "epoch": 56.92, "learning_rate": 2.15520577644498e-05, "loss": 1.9517, "step": 19663000 }, { "epoch": 56.92, "learning_rate": 2.155133411680252e-05, "loss": 1.9782, "step": 19663500 }, { "epoch": 56.92, "learning_rate": 2.1550610469155243e-05, "loss": 1.9648, "step": 19664000 }, { "epoch": 56.92, "learning_rate": 2.1549886821507965e-05, "loss": 1.9659, "step": 19664500 }, { "epoch": 56.92, "learning_rate": 2.1549164621155984e-05, "loss": 1.9844, "step": 19665000 }, { "epoch": 56.92, "learning_rate": 2.1548440973508706e-05, "loss": 1.9753, "step": 19665500 }, { "epoch": 56.93, "learning_rate": 2.154771732586143e-05, "loss": 1.9553, "step": 19666000 }, { "epoch": 56.93, "learning_rate": 2.1546993678214154e-05, "loss": 2.0003, "step": 19666500 }, { "epoch": 56.93, "learning_rate": 2.154627003056688e-05, "loss": 1.9837, "step": 19667000 }, { "epoch": 56.93, "learning_rate": 2.1545546382919602e-05, "loss": 1.9762, "step": 19667500 }, { "epoch": 56.93, "learning_rate": 2.1544824182567618e-05, "loss": 1.9743, "step": 19668000 }, { "epoch": 56.93, "learning_rate": 2.1544100534920343e-05, "loss": 1.9557, "step": 19668500 }, { "epoch": 56.93, "learning_rate": 2.1543376887273066e-05, "loss": 1.9737, "step": 19669000 }, { "epoch": 56.94, "learning_rate": 2.1542653239625788e-05, "loss": 1.9606, "step": 19669500 }, { "epoch": 56.94, "learning_rate": 2.154192959197851e-05, "loss": 1.9955, "step": 19670000 }, { "epoch": 56.94, "learning_rate": 2.1541205944331236e-05, "loss": 1.9554, "step": 19670500 }, { "epoch": 56.94, "learning_rate": 2.1540482296683958e-05, "loss": 1.973, "step": 19671000 }, { "epoch": 56.94, "learning_rate": 2.153975864903668e-05, "loss": 1.9724, "step": 19671500 }, { "epoch": 56.94, "learning_rate": 2.1539035001389406e-05, "loss": 1.9744, "step": 19672000 }, { "epoch": 56.94, "learning_rate": 2.1538311353742128e-05, "loss": 1.9584, "step": 19672500 }, { "epoch": 56.95, "learning_rate": 2.153758770609485e-05, "loss": 1.9721, "step": 19673000 }, { "epoch": 56.95, "learning_rate": 2.1536864058447572e-05, "loss": 1.9791, "step": 19673500 }, { "epoch": 56.95, "learning_rate": 2.1536140410800298e-05, "loss": 1.9572, "step": 19674000 }, { "epoch": 56.95, "learning_rate": 2.153541676315302e-05, "loss": 1.9361, "step": 19674500 }, { "epoch": 56.95, "learning_rate": 2.1534693115505743e-05, "loss": 1.9845, "step": 19675000 }, { "epoch": 56.95, "learning_rate": 2.1533969467858468e-05, "loss": 1.9507, "step": 19675500 }, { "epoch": 56.95, "learning_rate": 2.1533247267506484e-05, "loss": 1.9672, "step": 19676000 }, { "epoch": 56.96, "learning_rate": 2.1532523619859206e-05, "loss": 1.987, "step": 19676500 }, { "epoch": 56.96, "learning_rate": 2.153179997221193e-05, "loss": 1.9599, "step": 19677000 }, { "epoch": 56.96, "learning_rate": 2.153107777185995e-05, "loss": 1.9523, "step": 19677500 }, { "epoch": 56.96, "learning_rate": 2.1530354124212673e-05, "loss": 1.9628, "step": 19678000 }, { "epoch": 56.96, "learning_rate": 2.1529630476565395e-05, "loss": 1.985, "step": 19678500 }, { "epoch": 56.96, "learning_rate": 2.1528906828918117e-05, "loss": 1.95, "step": 19679000 }, { "epoch": 56.96, "learning_rate": 2.1528184628566136e-05, "loss": 1.9915, "step": 19679500 }, { "epoch": 56.97, "learning_rate": 2.152746098091886e-05, "loss": 1.9632, "step": 19680000 }, { "epoch": 56.97, "learning_rate": 2.152673733327158e-05, "loss": 1.9815, "step": 19680500 }, { "epoch": 56.97, "learning_rate": 2.1526013685624306e-05, "loss": 1.9794, "step": 19681000 }, { "epoch": 56.97, "learning_rate": 2.1525291485272325e-05, "loss": 1.9876, "step": 19681500 }, { "epoch": 56.97, "learning_rate": 2.1524567837625047e-05, "loss": 1.9702, "step": 19682000 }, { "epoch": 56.97, "learning_rate": 2.152384418997777e-05, "loss": 1.9883, "step": 19682500 }, { "epoch": 56.97, "learning_rate": 2.152312198962579e-05, "loss": 1.9543, "step": 19683000 }, { "epoch": 56.98, "learning_rate": 2.152239834197851e-05, "loss": 1.969, "step": 19683500 }, { "epoch": 56.98, "learning_rate": 2.1521674694331233e-05, "loss": 1.9501, "step": 19684000 }, { "epoch": 56.98, "learning_rate": 2.1520951046683955e-05, "loss": 1.979, "step": 19684500 }, { "epoch": 56.98, "learning_rate": 2.152022739903668e-05, "loss": 1.9686, "step": 19685000 }, { "epoch": 56.98, "learning_rate": 2.15195051986847e-05, "loss": 1.9717, "step": 19685500 }, { "epoch": 56.98, "learning_rate": 2.1518781551037422e-05, "loss": 1.9929, "step": 19686000 }, { "epoch": 56.98, "learning_rate": 2.1518057903390144e-05, "loss": 1.9657, "step": 19686500 }, { "epoch": 56.99, "learning_rate": 2.151733425574287e-05, "loss": 1.949, "step": 19687000 }, { "epoch": 56.99, "learning_rate": 2.1516610608095592e-05, "loss": 1.9635, "step": 19687500 }, { "epoch": 56.99, "learning_rate": 2.1515886960448314e-05, "loss": 1.9742, "step": 19688000 }, { "epoch": 56.99, "learning_rate": 2.1515163312801037e-05, "loss": 1.953, "step": 19688500 }, { "epoch": 56.99, "learning_rate": 2.1514439665153762e-05, "loss": 1.9405, "step": 19689000 }, { "epoch": 56.99, "learning_rate": 2.1513716017506485e-05, "loss": 1.9907, "step": 19689500 }, { "epoch": 56.99, "learning_rate": 2.1512992369859207e-05, "loss": 1.9667, "step": 19690000 }, { "epoch": 57.0, "learning_rate": 2.1512268722211932e-05, "loss": 1.9593, "step": 19690500 }, { "epoch": 57.0, "learning_rate": 2.1511545074564655e-05, "loss": 1.9784, "step": 19691000 }, { "epoch": 57.0, "learning_rate": 2.1510821426917377e-05, "loss": 1.9584, "step": 19691500 }, { "epoch": 57.0, "eval_accuracy": 0.6764066632297837, "eval_accuracy_mlm": 0.6428924696020535, "eval_accuracy_nsp": 0.8560700847414904, "eval_loss": 2.166233777999878, "eval_runtime": 331.9117, "eval_samples_per_second": 1314.765, "eval_steps_per_second": 54.783, "step": 19691904 }, { "epoch": 57.0, "learning_rate": 2.1510099226565396e-05, "loss": 1.9737, "step": 19692000 }, { "epoch": 57.0, "learning_rate": 2.150937557891812e-05, "loss": 1.9697, "step": 19692500 }, { "epoch": 57.0, "learning_rate": 2.1508651931270844e-05, "loss": 1.9555, "step": 19693000 }, { "epoch": 57.0, "learning_rate": 2.150792973091886e-05, "loss": 1.9462, "step": 19693500 }, { "epoch": 57.01, "learning_rate": 2.1507206083271585e-05, "loss": 1.9601, "step": 19694000 }, { "epoch": 57.01, "learning_rate": 2.1506482435624307e-05, "loss": 1.9598, "step": 19694500 }, { "epoch": 57.01, "learning_rate": 2.150575878797703e-05, "loss": 1.9496, "step": 19695000 }, { "epoch": 57.01, "learning_rate": 2.150503514032975e-05, "loss": 1.9817, "step": 19695500 }, { "epoch": 57.01, "learning_rate": 2.1504311492682477e-05, "loss": 1.9445, "step": 19696000 }, { "epoch": 57.01, "learning_rate": 2.15035878450352e-05, "loss": 1.9501, "step": 19696500 }, { "epoch": 57.01, "learning_rate": 2.150286419738792e-05, "loss": 1.9462, "step": 19697000 }, { "epoch": 57.02, "learning_rate": 2.150214199703594e-05, "loss": 1.9322, "step": 19697500 }, { "epoch": 57.02, "learning_rate": 2.1501418349388663e-05, "loss": 1.9825, "step": 19698000 }, { "epoch": 57.02, "learning_rate": 2.1500694701741385e-05, "loss": 1.9342, "step": 19698500 }, { "epoch": 57.02, "learning_rate": 2.149997105409411e-05, "loss": 1.9475, "step": 19699000 }, { "epoch": 57.02, "learning_rate": 2.1499247406446833e-05, "loss": 1.9187, "step": 19699500 }, { "epoch": 57.02, "learning_rate": 2.149852375879956e-05, "loss": 1.9676, "step": 19700000 }, { "epoch": 57.02, "learning_rate": 2.1497801558447574e-05, "loss": 1.9836, "step": 19700500 }, { "epoch": 57.03, "learning_rate": 2.1497077910800296e-05, "loss": 1.9237, "step": 19701000 }, { "epoch": 57.03, "learning_rate": 2.149635715774361e-05, "loss": 1.9256, "step": 19701500 }, { "epoch": 57.03, "learning_rate": 2.1495633510096334e-05, "loss": 1.9687, "step": 19702000 }, { "epoch": 57.03, "learning_rate": 2.1494909862449056e-05, "loss": 1.9474, "step": 19702500 }, { "epoch": 57.03, "learning_rate": 2.149418621480178e-05, "loss": 1.9543, "step": 19703000 }, { "epoch": 57.03, "learning_rate": 2.14934625671545e-05, "loss": 1.9065, "step": 19703500 }, { "epoch": 57.04, "learning_rate": 2.1492738919507227e-05, "loss": 1.9547, "step": 19704000 }, { "epoch": 57.04, "learning_rate": 2.149201527185995e-05, "loss": 1.9595, "step": 19704500 }, { "epoch": 57.04, "learning_rate": 2.1491291624212674e-05, "loss": 1.9643, "step": 19705000 }, { "epoch": 57.04, "learning_rate": 2.1490567976565397e-05, "loss": 1.9671, "step": 19705500 }, { "epoch": 57.04, "learning_rate": 2.148984432891812e-05, "loss": 1.9669, "step": 19706000 }, { "epoch": 57.04, "learning_rate": 2.148912068127084e-05, "loss": 1.9643, "step": 19706500 }, { "epoch": 57.04, "learning_rate": 2.1488397033623563e-05, "loss": 1.9543, "step": 19707000 }, { "epoch": 57.05, "learning_rate": 2.1487674833271586e-05, "loss": 1.963, "step": 19707500 }, { "epoch": 57.05, "learning_rate": 2.1486951185624308e-05, "loss": 1.9328, "step": 19708000 }, { "epoch": 57.05, "learning_rate": 2.148622753797703e-05, "loss": 1.971, "step": 19708500 }, { "epoch": 57.05, "learning_rate": 2.1485503890329752e-05, "loss": 1.957, "step": 19709000 }, { "epoch": 57.05, "learning_rate": 2.1484780242682475e-05, "loss": 1.9477, "step": 19709500 }, { "epoch": 57.05, "learning_rate": 2.14840565950352e-05, "loss": 1.9622, "step": 19710000 }, { "epoch": 57.05, "learning_rate": 2.1483332947387922e-05, "loss": 1.9549, "step": 19710500 }, { "epoch": 57.06, "learning_rate": 2.1482610747035938e-05, "loss": 1.9372, "step": 19711000 }, { "epoch": 57.06, "learning_rate": 2.1481887099388664e-05, "loss": 1.9336, "step": 19711500 }, { "epoch": 57.06, "learning_rate": 2.1481163451741386e-05, "loss": 1.9321, "step": 19712000 }, { "epoch": 57.06, "learning_rate": 2.148043980409411e-05, "loss": 1.9475, "step": 19712500 }, { "epoch": 57.06, "learning_rate": 2.1479716156446834e-05, "loss": 1.9703, "step": 19713000 }, { "epoch": 57.06, "learning_rate": 2.1478992508799556e-05, "loss": 1.9622, "step": 19713500 }, { "epoch": 57.06, "learning_rate": 2.1478268861152278e-05, "loss": 1.9357, "step": 19714000 }, { "epoch": 57.07, "learning_rate": 2.1477545213505004e-05, "loss": 1.9427, "step": 19714500 }, { "epoch": 57.07, "learning_rate": 2.1476821565857726e-05, "loss": 1.9726, "step": 19715000 }, { "epoch": 57.07, "learning_rate": 2.1476097918210448e-05, "loss": 1.9291, "step": 19715500 }, { "epoch": 57.07, "learning_rate": 2.1475375717858467e-05, "loss": 1.9686, "step": 19716000 }, { "epoch": 57.07, "learning_rate": 2.147465207021119e-05, "loss": 1.9627, "step": 19716500 }, { "epoch": 57.07, "learning_rate": 2.147392842256391e-05, "loss": 1.9598, "step": 19717000 }, { "epoch": 57.07, "learning_rate": 2.1473204774916637e-05, "loss": 1.9804, "step": 19717500 }, { "epoch": 57.08, "learning_rate": 2.147248112726936e-05, "loss": 1.9423, "step": 19718000 }, { "epoch": 57.08, "learning_rate": 2.1471760374212672e-05, "loss": 1.9469, "step": 19718500 }, { "epoch": 57.08, "learning_rate": 2.1471036726565397e-05, "loss": 1.9656, "step": 19719000 }, { "epoch": 57.08, "learning_rate": 2.1470314526213413e-05, "loss": 1.9548, "step": 19719500 }, { "epoch": 57.08, "learning_rate": 2.146959087856614e-05, "loss": 1.9448, "step": 19720000 }, { "epoch": 57.08, "learning_rate": 2.146886723091886e-05, "loss": 1.9403, "step": 19720500 }, { "epoch": 57.08, "learning_rate": 2.1468143583271583e-05, "loss": 1.9558, "step": 19721000 }, { "epoch": 57.09, "learning_rate": 2.1467419935624305e-05, "loss": 1.955, "step": 19721500 }, { "epoch": 57.09, "learning_rate": 2.1466696287977028e-05, "loss": 1.9367, "step": 19722000 }, { "epoch": 57.09, "learning_rate": 2.1465972640329753e-05, "loss": 1.9489, "step": 19722500 }, { "epoch": 57.09, "learning_rate": 2.1465248992682475e-05, "loss": 1.935, "step": 19723000 }, { "epoch": 57.09, "learning_rate": 2.1464526792330494e-05, "loss": 1.9602, "step": 19723500 }, { "epoch": 57.09, "learning_rate": 2.1463803144683217e-05, "loss": 1.9601, "step": 19724000 }, { "epoch": 57.09, "learning_rate": 2.146307949703594e-05, "loss": 1.9496, "step": 19724500 }, { "epoch": 57.1, "learning_rate": 2.1462355849388664e-05, "loss": 1.9541, "step": 19725000 }, { "epoch": 57.1, "learning_rate": 2.1461632201741387e-05, "loss": 1.9418, "step": 19725500 }, { "epoch": 57.1, "learning_rate": 2.1460910001389402e-05, "loss": 1.9731, "step": 19726000 }, { "epoch": 57.1, "learning_rate": 2.1460186353742128e-05, "loss": 1.9493, "step": 19726500 }, { "epoch": 57.1, "learning_rate": 2.145946270609485e-05, "loss": 1.9662, "step": 19727000 }, { "epoch": 57.1, "learning_rate": 2.1458739058447576e-05, "loss": 1.9501, "step": 19727500 }, { "epoch": 57.1, "learning_rate": 2.145801685809559e-05, "loss": 1.9657, "step": 19728000 }, { "epoch": 57.11, "learning_rate": 2.1457293210448313e-05, "loss": 1.9531, "step": 19728500 }, { "epoch": 57.11, "learning_rate": 2.145656956280104e-05, "loss": 1.9828, "step": 19729000 }, { "epoch": 57.11, "learning_rate": 2.145584591515376e-05, "loss": 1.9449, "step": 19729500 }, { "epoch": 57.11, "learning_rate": 2.1455122267506487e-05, "loss": 1.971, "step": 19730000 }, { "epoch": 57.11, "learning_rate": 2.145439861985921e-05, "loss": 1.9665, "step": 19730500 }, { "epoch": 57.11, "learning_rate": 2.145367497221193e-05, "loss": 1.963, "step": 19731000 }, { "epoch": 57.11, "learning_rate": 2.1452951324564654e-05, "loss": 1.9337, "step": 19731500 }, { "epoch": 57.12, "learning_rate": 2.1452227676917376e-05, "loss": 1.9466, "step": 19732000 }, { "epoch": 57.12, "learning_rate": 2.14515040292701e-05, "loss": 1.9525, "step": 19732500 }, { "epoch": 57.12, "learning_rate": 2.1450781828918117e-05, "loss": 1.9539, "step": 19733000 }, { "epoch": 57.12, "learning_rate": 2.1450058181270843e-05, "loss": 1.9659, "step": 19733500 }, { "epoch": 57.12, "learning_rate": 2.144933598091886e-05, "loss": 1.9598, "step": 19734000 }, { "epoch": 57.12, "learning_rate": 2.1448612333271584e-05, "loss": 1.9683, "step": 19734500 }, { "epoch": 57.12, "learning_rate": 2.1447888685624306e-05, "loss": 1.9468, "step": 19735000 }, { "epoch": 57.13, "learning_rate": 2.144716503797703e-05, "loss": 1.9474, "step": 19735500 }, { "epoch": 57.13, "learning_rate": 2.1446441390329754e-05, "loss": 1.9648, "step": 19736000 }, { "epoch": 57.13, "learning_rate": 2.144571918997777e-05, "loss": 1.9377, "step": 19736500 }, { "epoch": 57.13, "learning_rate": 2.1444995542330492e-05, "loss": 1.9758, "step": 19737000 }, { "epoch": 57.13, "learning_rate": 2.1444271894683217e-05, "loss": 1.9498, "step": 19737500 }, { "epoch": 57.13, "learning_rate": 2.144354824703594e-05, "loss": 1.9327, "step": 19738000 }, { "epoch": 57.13, "learning_rate": 2.1442824599388665e-05, "loss": 1.9401, "step": 19738500 }, { "epoch": 57.14, "learning_rate": 2.144210239903668e-05, "loss": 1.9361, "step": 19739000 }, { "epoch": 57.14, "learning_rate": 2.1441378751389403e-05, "loss": 1.9593, "step": 19739500 }, { "epoch": 57.14, "learning_rate": 2.1440656551037422e-05, "loss": 1.964, "step": 19740000 }, { "epoch": 57.14, "learning_rate": 2.1439932903390144e-05, "loss": 1.9345, "step": 19740500 }, { "epoch": 57.14, "learning_rate": 2.1439210703038163e-05, "loss": 1.9596, "step": 19741000 }, { "epoch": 57.14, "learning_rate": 2.143848705539089e-05, "loss": 1.9451, "step": 19741500 }, { "epoch": 57.15, "learning_rate": 2.143776340774361e-05, "loss": 1.9658, "step": 19742000 }, { "epoch": 57.15, "learning_rate": 2.1437039760096333e-05, "loss": 1.9373, "step": 19742500 }, { "epoch": 57.15, "learning_rate": 2.1436316112449055e-05, "loss": 1.9361, "step": 19743000 }, { "epoch": 57.15, "learning_rate": 2.1435592464801778e-05, "loss": 1.9661, "step": 19743500 }, { "epoch": 57.15, "learning_rate": 2.1434868817154503e-05, "loss": 1.9849, "step": 19744000 }, { "epoch": 57.15, "learning_rate": 2.1434145169507226e-05, "loss": 1.938, "step": 19744500 }, { "epoch": 57.15, "learning_rate": 2.143342152185995e-05, "loss": 1.9264, "step": 19745000 }, { "epoch": 57.16, "learning_rate": 2.1432697874212673e-05, "loss": 1.9563, "step": 19745500 }, { "epoch": 57.16, "learning_rate": 2.1431974226565396e-05, "loss": 1.9528, "step": 19746000 }, { "epoch": 57.16, "learning_rate": 2.1431250578918118e-05, "loss": 1.9728, "step": 19746500 }, { "epoch": 57.16, "learning_rate": 2.143052693127084e-05, "loss": 1.9444, "step": 19747000 }, { "epoch": 57.16, "learning_rate": 2.1429803283623566e-05, "loss": 1.9606, "step": 19747500 }, { "epoch": 57.16, "learning_rate": 2.1429079635976288e-05, "loss": 1.9634, "step": 19748000 }, { "epoch": 57.16, "learning_rate": 2.1428357435624303e-05, "loss": 1.9331, "step": 19748500 }, { "epoch": 57.17, "learning_rate": 2.142763378797703e-05, "loss": 1.9711, "step": 19749000 }, { "epoch": 57.17, "learning_rate": 2.1426910140329755e-05, "loss": 1.9593, "step": 19749500 }, { "epoch": 57.17, "learning_rate": 2.1426186492682477e-05, "loss": 1.9574, "step": 19750000 }, { "epoch": 57.17, "learning_rate": 2.14254628450352e-05, "loss": 1.957, "step": 19750500 }, { "epoch": 57.17, "learning_rate": 2.1424740644683218e-05, "loss": 1.9436, "step": 19751000 }, { "epoch": 57.17, "learning_rate": 2.1424018444331234e-05, "loss": 1.9631, "step": 19751500 }, { "epoch": 57.17, "learning_rate": 2.1423294796683956e-05, "loss": 1.9408, "step": 19752000 }, { "epoch": 57.18, "learning_rate": 2.142257114903668e-05, "loss": 1.9415, "step": 19752500 }, { "epoch": 57.18, "learning_rate": 2.1421847501389404e-05, "loss": 1.9568, "step": 19753000 }, { "epoch": 57.18, "learning_rate": 2.142112385374213e-05, "loss": 1.9674, "step": 19753500 }, { "epoch": 57.18, "learning_rate": 2.142040020609485e-05, "loss": 1.9618, "step": 19754000 }, { "epoch": 57.18, "learning_rate": 2.1419676558447574e-05, "loss": 1.952, "step": 19754500 }, { "epoch": 57.18, "learning_rate": 2.1418952910800296e-05, "loss": 1.9549, "step": 19755000 }, { "epoch": 57.18, "learning_rate": 2.141822926315302e-05, "loss": 1.9581, "step": 19755500 }, { "epoch": 57.19, "learning_rate": 2.1417505615505744e-05, "loss": 1.9635, "step": 19756000 }, { "epoch": 57.19, "learning_rate": 2.141678196785847e-05, "loss": 1.9483, "step": 19756500 }, { "epoch": 57.19, "learning_rate": 2.1416058320211192e-05, "loss": 1.9716, "step": 19757000 }, { "epoch": 57.19, "learning_rate": 2.1415334672563914e-05, "loss": 1.9463, "step": 19757500 }, { "epoch": 57.19, "learning_rate": 2.1414611024916636e-05, "loss": 1.9547, "step": 19758000 }, { "epoch": 57.19, "learning_rate": 2.1413888824564655e-05, "loss": 1.9518, "step": 19758500 }, { "epoch": 57.19, "learning_rate": 2.141316662421267e-05, "loss": 1.967, "step": 19759000 }, { "epoch": 57.2, "learning_rate": 2.1412442976565393e-05, "loss": 1.9507, "step": 19759500 }, { "epoch": 57.2, "learning_rate": 2.141171932891812e-05, "loss": 1.9626, "step": 19760000 }, { "epoch": 57.2, "learning_rate": 2.1410995681270844e-05, "loss": 1.9543, "step": 19760500 }, { "epoch": 57.2, "learning_rate": 2.1410272033623566e-05, "loss": 1.9688, "step": 19761000 }, { "epoch": 57.2, "learning_rate": 2.1409549833271582e-05, "loss": 1.9466, "step": 19761500 }, { "epoch": 57.2, "learning_rate": 2.14088276329196e-05, "loss": 1.949, "step": 19762000 }, { "epoch": 57.2, "learning_rate": 2.1408103985272323e-05, "loss": 1.9629, "step": 19762500 }, { "epoch": 57.21, "learning_rate": 2.1407380337625045e-05, "loss": 1.9606, "step": 19763000 }, { "epoch": 57.21, "learning_rate": 2.1406656689977768e-05, "loss": 1.9438, "step": 19763500 }, { "epoch": 57.21, "learning_rate": 2.1405933042330493e-05, "loss": 1.9493, "step": 19764000 }, { "epoch": 57.21, "learning_rate": 2.140520939468322e-05, "loss": 1.974, "step": 19764500 }, { "epoch": 57.21, "learning_rate": 2.140448574703594e-05, "loss": 1.9745, "step": 19765000 }, { "epoch": 57.21, "learning_rate": 2.1403763546683957e-05, "loss": 1.9416, "step": 19765500 }, { "epoch": 57.21, "learning_rate": 2.1403041346331976e-05, "loss": 1.9349, "step": 19766000 }, { "epoch": 57.22, "learning_rate": 2.1402317698684698e-05, "loss": 1.9434, "step": 19766500 }, { "epoch": 57.22, "learning_rate": 2.140159405103742e-05, "loss": 1.9495, "step": 19767000 }, { "epoch": 57.22, "learning_rate": 2.1400870403390146e-05, "loss": 1.9646, "step": 19767500 }, { "epoch": 57.22, "learning_rate": 2.140014675574287e-05, "loss": 1.9622, "step": 19768000 }, { "epoch": 57.22, "learning_rate": 2.1399423108095594e-05, "loss": 1.9786, "step": 19768500 }, { "epoch": 57.22, "learning_rate": 2.1398699460448316e-05, "loss": 1.963, "step": 19769000 }, { "epoch": 57.22, "learning_rate": 2.1397975812801038e-05, "loss": 1.9716, "step": 19769500 }, { "epoch": 57.23, "learning_rate": 2.139725216515376e-05, "loss": 1.9503, "step": 19770000 }, { "epoch": 57.23, "learning_rate": 2.1396528517506483e-05, "loss": 1.9769, "step": 19770500 }, { "epoch": 57.23, "learning_rate": 2.13958063171545e-05, "loss": 1.9487, "step": 19771000 }, { "epoch": 57.23, "learning_rate": 2.1395082669507227e-05, "loss": 1.9827, "step": 19771500 }, { "epoch": 57.23, "learning_rate": 2.139435902185995e-05, "loss": 1.9532, "step": 19772000 }, { "epoch": 57.23, "learning_rate": 2.139363537421267e-05, "loss": 1.9581, "step": 19772500 }, { "epoch": 57.23, "learning_rate": 2.1392911726565397e-05, "loss": 1.9602, "step": 19773000 }, { "epoch": 57.24, "learning_rate": 2.139218807891812e-05, "loss": 1.9741, "step": 19773500 }, { "epoch": 57.24, "learning_rate": 2.139146443127084e-05, "loss": 1.9686, "step": 19774000 }, { "epoch": 57.24, "learning_rate": 2.1390740783623564e-05, "loss": 1.9353, "step": 19774500 }, { "epoch": 57.24, "learning_rate": 2.139001713597629e-05, "loss": 1.959, "step": 19775000 }, { "epoch": 57.24, "learning_rate": 2.1389293488329012e-05, "loss": 1.964, "step": 19775500 }, { "epoch": 57.24, "learning_rate": 2.138857128797703e-05, "loss": 1.9587, "step": 19776000 }, { "epoch": 57.24, "learning_rate": 2.1387847640329753e-05, "loss": 1.9707, "step": 19776500 }, { "epoch": 57.25, "learning_rate": 2.1387123992682475e-05, "loss": 1.9609, "step": 19777000 }, { "epoch": 57.25, "learning_rate": 2.1386400345035197e-05, "loss": 1.9425, "step": 19777500 }, { "epoch": 57.25, "learning_rate": 2.1385676697387923e-05, "loss": 1.9757, "step": 19778000 }, { "epoch": 57.25, "learning_rate": 2.138495304974065e-05, "loss": 1.9741, "step": 19778500 }, { "epoch": 57.25, "learning_rate": 2.1384230849388664e-05, "loss": 1.971, "step": 19779000 }, { "epoch": 57.25, "learning_rate": 2.1383507201741386e-05, "loss": 1.9623, "step": 19779500 }, { "epoch": 57.26, "learning_rate": 2.138278355409411e-05, "loss": 1.9754, "step": 19780000 }, { "epoch": 57.26, "learning_rate": 2.1382059906446834e-05, "loss": 1.9516, "step": 19780500 }, { "epoch": 57.26, "learning_rate": 2.1381336258799557e-05, "loss": 1.9529, "step": 19781000 }, { "epoch": 57.26, "learning_rate": 2.138061261115228e-05, "loss": 1.9603, "step": 19781500 }, { "epoch": 57.26, "learning_rate": 2.1379888963505004e-05, "loss": 1.9657, "step": 19782000 }, { "epoch": 57.26, "learning_rate": 2.1379165315857727e-05, "loss": 1.9526, "step": 19782500 }, { "epoch": 57.26, "learning_rate": 2.137844166821045e-05, "loss": 1.9525, "step": 19783000 }, { "epoch": 57.27, "learning_rate": 2.137772091515376e-05, "loss": 1.952, "step": 19783500 }, { "epoch": 57.27, "learning_rate": 2.1376997267506483e-05, "loss": 1.9671, "step": 19784000 }, { "epoch": 57.27, "learning_rate": 2.137627361985921e-05, "loss": 1.9515, "step": 19784500 }, { "epoch": 57.27, "learning_rate": 2.137554997221193e-05, "loss": 1.9538, "step": 19785000 }, { "epoch": 57.27, "learning_rate": 2.1374826324564653e-05, "loss": 1.976, "step": 19785500 }, { "epoch": 57.27, "learning_rate": 2.137410267691738e-05, "loss": 1.9589, "step": 19786000 }, { "epoch": 57.27, "learning_rate": 2.13733790292701e-05, "loss": 1.9411, "step": 19786500 }, { "epoch": 57.28, "learning_rate": 2.1372655381622824e-05, "loss": 1.9813, "step": 19787000 }, { "epoch": 57.28, "learning_rate": 2.137193173397555e-05, "loss": 1.9522, "step": 19787500 }, { "epoch": 57.28, "learning_rate": 2.1371209533623565e-05, "loss": 1.9503, "step": 19788000 }, { "epoch": 57.28, "learning_rate": 2.1370485885976287e-05, "loss": 1.9275, "step": 19788500 }, { "epoch": 57.28, "learning_rate": 2.136976223832901e-05, "loss": 1.9423, "step": 19789000 }, { "epoch": 57.28, "learning_rate": 2.1369038590681735e-05, "loss": 1.9591, "step": 19789500 }, { "epoch": 57.28, "learning_rate": 2.1368317837625047e-05, "loss": 1.9517, "step": 19790000 }, { "epoch": 57.29, "learning_rate": 2.1367594189977773e-05, "loss": 1.9491, "step": 19790500 }, { "epoch": 57.29, "learning_rate": 2.1366870542330495e-05, "loss": 1.948, "step": 19791000 }, { "epoch": 57.29, "learning_rate": 2.1366146894683217e-05, "loss": 1.9519, "step": 19791500 }, { "epoch": 57.29, "learning_rate": 2.136542324703594e-05, "loss": 1.9701, "step": 19792000 }, { "epoch": 57.29, "learning_rate": 2.136469959938866e-05, "loss": 1.9565, "step": 19792500 }, { "epoch": 57.29, "learning_rate": 2.136397739903668e-05, "loss": 1.9463, "step": 19793000 }, { "epoch": 57.29, "learning_rate": 2.1363253751389403e-05, "loss": 1.9298, "step": 19793500 }, { "epoch": 57.3, "learning_rate": 2.136253010374213e-05, "loss": 1.9587, "step": 19794000 }, { "epoch": 57.3, "learning_rate": 2.136180645609485e-05, "loss": 1.9681, "step": 19794500 }, { "epoch": 57.3, "learning_rate": 2.1361082808447573e-05, "loss": 1.9853, "step": 19795000 }, { "epoch": 57.3, "learning_rate": 2.13603591608003e-05, "loss": 1.9567, "step": 19795500 }, { "epoch": 57.3, "learning_rate": 2.1359636960448314e-05, "loss": 1.9499, "step": 19796000 }, { "epoch": 57.3, "learning_rate": 2.1358913312801036e-05, "loss": 1.9789, "step": 19796500 }, { "epoch": 57.3, "learning_rate": 2.1358189665153762e-05, "loss": 1.9675, "step": 19797000 }, { "epoch": 57.31, "learning_rate": 2.1357466017506488e-05, "loss": 1.9409, "step": 19797500 }, { "epoch": 57.31, "learning_rate": 2.1356743817154503e-05, "loss": 1.9398, "step": 19798000 }, { "epoch": 57.31, "learning_rate": 2.1356020169507225e-05, "loss": 1.9579, "step": 19798500 }, { "epoch": 57.31, "learning_rate": 2.135529652185995e-05, "loss": 1.9517, "step": 19799000 }, { "epoch": 57.31, "learning_rate": 2.1354574321507966e-05, "loss": 1.9684, "step": 19799500 }, { "epoch": 57.31, "learning_rate": 2.135385067386069e-05, "loss": 1.9568, "step": 19800000 }, { "epoch": 57.31, "learning_rate": 2.135312702621341e-05, "loss": 1.9409, "step": 19800500 }, { "epoch": 57.32, "learning_rate": 2.1352403378566137e-05, "loss": 1.9484, "step": 19801000 }, { "epoch": 57.32, "learning_rate": 2.1351679730918862e-05, "loss": 1.9627, "step": 19801500 }, { "epoch": 57.32, "learning_rate": 2.1350956083271584e-05, "loss": 1.959, "step": 19802000 }, { "epoch": 57.32, "learning_rate": 2.1350232435624307e-05, "loss": 1.959, "step": 19802500 }, { "epoch": 57.32, "learning_rate": 2.134950878797703e-05, "loss": 1.9662, "step": 19803000 }, { "epoch": 57.32, "learning_rate": 2.134878514032975e-05, "loss": 1.9552, "step": 19803500 }, { "epoch": 57.32, "learning_rate": 2.134806293997777e-05, "loss": 1.9576, "step": 19804000 }, { "epoch": 57.33, "learning_rate": 2.134734073962579e-05, "loss": 1.9727, "step": 19804500 }, { "epoch": 57.33, "learning_rate": 2.134661709197851e-05, "loss": 1.9428, "step": 19805000 }, { "epoch": 57.33, "learning_rate": 2.1345893444331237e-05, "loss": 1.9771, "step": 19805500 }, { "epoch": 57.33, "learning_rate": 2.134516979668396e-05, "loss": 1.9404, "step": 19806000 }, { "epoch": 57.33, "learning_rate": 2.134444614903668e-05, "loss": 1.9657, "step": 19806500 }, { "epoch": 57.33, "learning_rate": 2.13437239486847e-05, "loss": 1.9265, "step": 19807000 }, { "epoch": 57.33, "learning_rate": 2.1343000301037423e-05, "loss": 1.9487, "step": 19807500 }, { "epoch": 57.34, "learning_rate": 2.1342276653390145e-05, "loss": 1.9411, "step": 19808000 }, { "epoch": 57.34, "learning_rate": 2.1341553005742867e-05, "loss": 1.9782, "step": 19808500 }, { "epoch": 57.34, "learning_rate": 2.1340829358095593e-05, "loss": 1.9532, "step": 19809000 }, { "epoch": 57.34, "learning_rate": 2.1340105710448315e-05, "loss": 1.9512, "step": 19809500 }, { "epoch": 57.34, "learning_rate": 2.1339383510096334e-05, "loss": 1.9803, "step": 19810000 }, { "epoch": 57.34, "learning_rate": 2.1338659862449056e-05, "loss": 1.9378, "step": 19810500 }, { "epoch": 57.34, "learning_rate": 2.1337936214801778e-05, "loss": 1.9344, "step": 19811000 }, { "epoch": 57.35, "learning_rate": 2.13372125671545e-05, "loss": 1.9352, "step": 19811500 }, { "epoch": 57.35, "learning_rate": 2.1336488919507226e-05, "loss": 1.9559, "step": 19812000 }, { "epoch": 57.35, "learning_rate": 2.1335765271859952e-05, "loss": 1.9754, "step": 19812500 }, { "epoch": 57.35, "learning_rate": 2.1335041624212674e-05, "loss": 1.9375, "step": 19813000 }, { "epoch": 57.35, "learning_rate": 2.133431942386069e-05, "loss": 1.9474, "step": 19813500 }, { "epoch": 57.35, "learning_rate": 2.1333595776213415e-05, "loss": 1.9267, "step": 19814000 }, { "epoch": 57.35, "learning_rate": 2.133287357586143e-05, "loss": 1.9443, "step": 19814500 }, { "epoch": 57.36, "learning_rate": 2.1332149928214153e-05, "loss": 1.9431, "step": 19815000 }, { "epoch": 57.36, "learning_rate": 2.133142628056688e-05, "loss": 1.9689, "step": 19815500 }, { "epoch": 57.36, "learning_rate": 2.13307026329196e-05, "loss": 1.9719, "step": 19816000 }, { "epoch": 57.36, "learning_rate": 2.1329978985272326e-05, "loss": 1.955, "step": 19816500 }, { "epoch": 57.36, "learning_rate": 2.132925533762505e-05, "loss": 1.9399, "step": 19817000 }, { "epoch": 57.36, "learning_rate": 2.1328533137273064e-05, "loss": 1.964, "step": 19817500 }, { "epoch": 57.36, "learning_rate": 2.132780948962579e-05, "loss": 1.9794, "step": 19818000 }, { "epoch": 57.37, "learning_rate": 2.1327085841978512e-05, "loss": 1.9703, "step": 19818500 }, { "epoch": 57.37, "learning_rate": 2.1326362194331234e-05, "loss": 1.9401, "step": 19819000 }, { "epoch": 57.37, "learning_rate": 2.1325638546683957e-05, "loss": 1.967, "step": 19819500 }, { "epoch": 57.37, "learning_rate": 2.1324914899036682e-05, "loss": 1.9667, "step": 19820000 }, { "epoch": 57.37, "learning_rate": 2.1324191251389404e-05, "loss": 1.9595, "step": 19820500 }, { "epoch": 57.37, "learning_rate": 2.1323469051037423e-05, "loss": 1.9764, "step": 19821000 }, { "epoch": 57.38, "learning_rate": 2.1322745403390146e-05, "loss": 1.9579, "step": 19821500 }, { "epoch": 57.38, "learning_rate": 2.1322021755742868e-05, "loss": 1.9469, "step": 19822000 }, { "epoch": 57.38, "learning_rate": 2.132129810809559e-05, "loss": 1.9619, "step": 19822500 }, { "epoch": 57.38, "learning_rate": 2.132057590774361e-05, "loss": 1.9498, "step": 19823000 }, { "epoch": 57.38, "learning_rate": 2.131985226009633e-05, "loss": 1.9767, "step": 19823500 }, { "epoch": 57.38, "learning_rate": 2.1319128612449057e-05, "loss": 1.9533, "step": 19824000 }, { "epoch": 57.38, "learning_rate": 2.131840496480178e-05, "loss": 1.9631, "step": 19824500 }, { "epoch": 57.39, "learning_rate": 2.1317681317154505e-05, "loss": 1.9616, "step": 19825000 }, { "epoch": 57.39, "learning_rate": 2.131695911680252e-05, "loss": 1.962, "step": 19825500 }, { "epoch": 57.39, "learning_rate": 2.1316235469155242e-05, "loss": 1.9401, "step": 19826000 }, { "epoch": 57.39, "learning_rate": 2.1315511821507965e-05, "loss": 1.9676, "step": 19826500 }, { "epoch": 57.39, "learning_rate": 2.131478817386069e-05, "loss": 1.9684, "step": 19827000 }, { "epoch": 57.39, "learning_rate": 2.1314064526213416e-05, "loss": 1.9607, "step": 19827500 }, { "epoch": 57.39, "learning_rate": 2.1313340878566138e-05, "loss": 1.9839, "step": 19828000 }, { "epoch": 57.4, "learning_rate": 2.131261723091886e-05, "loss": 1.9703, "step": 19828500 }, { "epoch": 57.4, "learning_rate": 2.1311893583271583e-05, "loss": 1.9395, "step": 19829000 }, { "epoch": 57.4, "learning_rate": 2.1311169935624305e-05, "loss": 1.9428, "step": 19829500 }, { "epoch": 57.4, "learning_rate": 2.131044628797703e-05, "loss": 1.9351, "step": 19830000 }, { "epoch": 57.4, "learning_rate": 2.1309722640329753e-05, "loss": 1.984, "step": 19830500 }, { "epoch": 57.4, "learning_rate": 2.130899899268248e-05, "loss": 1.9647, "step": 19831000 }, { "epoch": 57.4, "learning_rate": 2.13082753450352e-05, "loss": 1.9535, "step": 19831500 }, { "epoch": 57.41, "learning_rate": 2.1307551697387923e-05, "loss": 1.9651, "step": 19832000 }, { "epoch": 57.41, "learning_rate": 2.1306828049740645e-05, "loss": 1.9326, "step": 19832500 }, { "epoch": 57.41, "learning_rate": 2.1306104402093367e-05, "loss": 1.963, "step": 19833000 }, { "epoch": 57.41, "learning_rate": 2.1305380754446093e-05, "loss": 1.9621, "step": 19833500 }, { "epoch": 57.41, "learning_rate": 2.1304657106798815e-05, "loss": 1.9707, "step": 19834000 }, { "epoch": 57.41, "learning_rate": 2.130393490644683e-05, "loss": 1.9607, "step": 19834500 }, { "epoch": 57.41, "learning_rate": 2.1303211258799556e-05, "loss": 1.9453, "step": 19835000 }, { "epoch": 57.42, "learning_rate": 2.1302487611152282e-05, "loss": 1.9538, "step": 19835500 }, { "epoch": 57.42, "learning_rate": 2.1301765410800297e-05, "loss": 1.9387, "step": 19836000 }, { "epoch": 57.42, "learning_rate": 2.130104176315302e-05, "loss": 1.9556, "step": 19836500 }, { "epoch": 57.42, "learning_rate": 2.1300318115505742e-05, "loss": 1.9629, "step": 19837000 }, { "epoch": 57.42, "learning_rate": 2.129959591515376e-05, "loss": 1.995, "step": 19837500 }, { "epoch": 57.42, "learning_rate": 2.1298872267506483e-05, "loss": 1.9498, "step": 19838000 }, { "epoch": 57.42, "learning_rate": 2.129814861985921e-05, "loss": 1.9662, "step": 19838500 }, { "epoch": 57.43, "learning_rate": 2.129742497221193e-05, "loss": 1.9455, "step": 19839000 }, { "epoch": 57.43, "learning_rate": 2.1296701324564657e-05, "loss": 1.9674, "step": 19839500 }, { "epoch": 57.43, "learning_rate": 2.129597767691738e-05, "loss": 1.9617, "step": 19840000 }, { "epoch": 57.43, "learning_rate": 2.12952540292701e-05, "loss": 1.9521, "step": 19840500 }, { "epoch": 57.43, "learning_rate": 2.1294530381622823e-05, "loss": 1.9322, "step": 19841000 }, { "epoch": 57.43, "learning_rate": 2.1293808181270842e-05, "loss": 1.966, "step": 19841500 }, { "epoch": 57.43, "learning_rate": 2.1293084533623564e-05, "loss": 1.9536, "step": 19842000 }, { "epoch": 57.44, "learning_rate": 2.1292362333271583e-05, "loss": 1.9731, "step": 19842500 }, { "epoch": 57.44, "learning_rate": 2.1291638685624306e-05, "loss": 1.9523, "step": 19843000 }, { "epoch": 57.44, "learning_rate": 2.129091503797703e-05, "loss": 1.9508, "step": 19843500 }, { "epoch": 57.44, "learning_rate": 2.1290191390329754e-05, "loss": 1.9524, "step": 19844000 }, { "epoch": 57.44, "learning_rate": 2.1289467742682476e-05, "loss": 1.9561, "step": 19844500 }, { "epoch": 57.44, "learning_rate": 2.1288744095035198e-05, "loss": 1.978, "step": 19845000 }, { "epoch": 57.44, "learning_rate": 2.128802044738792e-05, "loss": 1.9472, "step": 19845500 }, { "epoch": 57.45, "learning_rate": 2.1287296799740646e-05, "loss": 1.9513, "step": 19846000 }, { "epoch": 57.45, "learning_rate": 2.128657315209337e-05, "loss": 1.9577, "step": 19846500 }, { "epoch": 57.45, "learning_rate": 2.1285850951741387e-05, "loss": 1.9712, "step": 19847000 }, { "epoch": 57.45, "learning_rate": 2.128512730409411e-05, "loss": 1.9863, "step": 19847500 }, { "epoch": 57.45, "learning_rate": 2.128440365644683e-05, "loss": 1.9522, "step": 19848000 }, { "epoch": 57.45, "learning_rate": 2.1283680008799557e-05, "loss": 1.9557, "step": 19848500 }, { "epoch": 57.45, "learning_rate": 2.128295636115228e-05, "loss": 1.9677, "step": 19849000 }, { "epoch": 57.46, "learning_rate": 2.1282232713505005e-05, "loss": 1.9736, "step": 19849500 }, { "epoch": 57.46, "learning_rate": 2.1281509065857727e-05, "loss": 1.9749, "step": 19850000 }, { "epoch": 57.46, "learning_rate": 2.128078541821045e-05, "loss": 1.9483, "step": 19850500 }, { "epoch": 57.46, "learning_rate": 2.128006321785847e-05, "loss": 1.9539, "step": 19851000 }, { "epoch": 57.46, "learning_rate": 2.1279341017506484e-05, "loss": 1.9658, "step": 19851500 }, { "epoch": 57.46, "learning_rate": 2.1278617369859206e-05, "loss": 1.9287, "step": 19852000 }, { "epoch": 57.46, "learning_rate": 2.1277893722211932e-05, "loss": 1.9462, "step": 19852500 }, { "epoch": 57.47, "learning_rate": 2.1277170074564654e-05, "loss": 1.9679, "step": 19853000 }, { "epoch": 57.47, "learning_rate": 2.127644642691738e-05, "loss": 1.9637, "step": 19853500 }, { "epoch": 57.47, "learning_rate": 2.1275722779270102e-05, "loss": 1.9512, "step": 19854000 }, { "epoch": 57.47, "learning_rate": 2.127500057891812e-05, "loss": 1.9583, "step": 19854500 }, { "epoch": 57.47, "learning_rate": 2.1274276931270843e-05, "loss": 1.9738, "step": 19855000 }, { "epoch": 57.47, "learning_rate": 2.1273553283623565e-05, "loss": 1.9565, "step": 19855500 }, { "epoch": 57.47, "learning_rate": 2.1272829635976288e-05, "loss": 1.966, "step": 19856000 }, { "epoch": 57.48, "learning_rate": 2.127210598832901e-05, "loss": 1.9642, "step": 19856500 }, { "epoch": 57.48, "learning_rate": 2.1271382340681732e-05, "loss": 1.9657, "step": 19857000 }, { "epoch": 57.48, "learning_rate": 2.1270658693034458e-05, "loss": 1.9619, "step": 19857500 }, { "epoch": 57.48, "learning_rate": 2.1269935045387183e-05, "loss": 1.9582, "step": 19858000 }, { "epoch": 57.48, "learning_rate": 2.1269211397739905e-05, "loss": 1.964, "step": 19858500 }, { "epoch": 57.48, "learning_rate": 2.126848919738792e-05, "loss": 1.9455, "step": 19859000 }, { "epoch": 57.49, "learning_rate": 2.1267765549740647e-05, "loss": 1.9461, "step": 19859500 }, { "epoch": 57.49, "learning_rate": 2.1267043349388662e-05, "loss": 1.9756, "step": 19860000 }, { "epoch": 57.49, "learning_rate": 2.1266319701741384e-05, "loss": 1.9466, "step": 19860500 }, { "epoch": 57.49, "learning_rate": 2.126559605409411e-05, "loss": 1.9761, "step": 19861000 }, { "epoch": 57.49, "learning_rate": 2.1264872406446836e-05, "loss": 1.9465, "step": 19861500 }, { "epoch": 57.49, "learning_rate": 2.1264148758799558e-05, "loss": 1.9317, "step": 19862000 }, { "epoch": 57.49, "learning_rate": 2.126342511115228e-05, "loss": 1.9709, "step": 19862500 }, { "epoch": 57.5, "learning_rate": 2.1262701463505002e-05, "loss": 1.9505, "step": 19863000 }, { "epoch": 57.5, "learning_rate": 2.1261977815857725e-05, "loss": 1.9404, "step": 19863500 }, { "epoch": 57.5, "learning_rate": 2.1261254168210447e-05, "loss": 1.9366, "step": 19864000 }, { "epoch": 57.5, "learning_rate": 2.1260530520563172e-05, "loss": 1.9612, "step": 19864500 }, { "epoch": 57.5, "learning_rate": 2.1259806872915898e-05, "loss": 1.9559, "step": 19865000 }, { "epoch": 57.5, "learning_rate": 2.1259084672563914e-05, "loss": 1.9551, "step": 19865500 }, { "epoch": 57.5, "learning_rate": 2.1258361024916636e-05, "loss": 1.9597, "step": 19866000 }, { "epoch": 57.51, "learning_rate": 2.125763737726936e-05, "loss": 1.9395, "step": 19866500 }, { "epoch": 57.51, "learning_rate": 2.1256915176917377e-05, "loss": 1.9903, "step": 19867000 }, { "epoch": 57.51, "learning_rate": 2.12561915292701e-05, "loss": 1.9688, "step": 19867500 }, { "epoch": 57.51, "learning_rate": 2.125546788162282e-05, "loss": 1.9467, "step": 19868000 }, { "epoch": 57.51, "learning_rate": 2.1254744233975547e-05, "loss": 1.9372, "step": 19868500 }, { "epoch": 57.51, "learning_rate": 2.1254020586328273e-05, "loss": 1.9373, "step": 19869000 }, { "epoch": 57.51, "learning_rate": 2.1253296938680995e-05, "loss": 1.9501, "step": 19869500 }, { "epoch": 57.52, "learning_rate": 2.1252573291033717e-05, "loss": 1.9541, "step": 19870000 }, { "epoch": 57.52, "learning_rate": 2.1251851090681736e-05, "loss": 1.9338, "step": 19870500 }, { "epoch": 57.52, "learning_rate": 2.125112744303446e-05, "loss": 1.9398, "step": 19871000 }, { "epoch": 57.52, "learning_rate": 2.125040379538718e-05, "loss": 1.9688, "step": 19871500 }, { "epoch": 57.52, "learning_rate": 2.1249680147739906e-05, "loss": 1.9699, "step": 19872000 }, { "epoch": 57.52, "learning_rate": 2.124895650009263e-05, "loss": 1.9604, "step": 19872500 }, { "epoch": 57.52, "learning_rate": 2.1248234299740647e-05, "loss": 1.9434, "step": 19873000 }, { "epoch": 57.53, "learning_rate": 2.124751065209337e-05, "loss": 1.9415, "step": 19873500 }, { "epoch": 57.53, "learning_rate": 2.1246788451741385e-05, "loss": 1.9706, "step": 19874000 }, { "epoch": 57.53, "learning_rate": 2.124606480409411e-05, "loss": 1.9858, "step": 19874500 }, { "epoch": 57.53, "learning_rate": 2.1245341156446833e-05, "loss": 1.9766, "step": 19875000 }, { "epoch": 57.53, "learning_rate": 2.1244617508799555e-05, "loss": 1.9625, "step": 19875500 }, { "epoch": 57.53, "learning_rate": 2.1243895308447574e-05, "loss": 1.962, "step": 19876000 }, { "epoch": 57.53, "learning_rate": 2.12431716608003e-05, "loss": 1.9693, "step": 19876500 }, { "epoch": 57.54, "learning_rate": 2.1242448013153022e-05, "loss": 1.9482, "step": 19877000 }, { "epoch": 57.54, "learning_rate": 2.1241724365505744e-05, "loss": 1.9654, "step": 19877500 }, { "epoch": 57.54, "learning_rate": 2.1241002165153763e-05, "loss": 1.946, "step": 19878000 }, { "epoch": 57.54, "learning_rate": 2.1240278517506486e-05, "loss": 1.9605, "step": 19878500 }, { "epoch": 57.54, "learning_rate": 2.1239554869859208e-05, "loss": 1.9599, "step": 19879000 }, { "epoch": 57.54, "learning_rate": 2.123883122221193e-05, "loss": 1.9658, "step": 19879500 }, { "epoch": 57.54, "learning_rate": 2.1238107574564656e-05, "loss": 1.9496, "step": 19880000 }, { "epoch": 57.55, "learning_rate": 2.1237383926917378e-05, "loss": 1.9289, "step": 19880500 }, { "epoch": 57.55, "learning_rate": 2.12366602792701e-05, "loss": 1.9731, "step": 19881000 }, { "epoch": 57.55, "learning_rate": 2.123593807891812e-05, "loss": 1.9794, "step": 19881500 }, { "epoch": 57.55, "learning_rate": 2.123521443127084e-05, "loss": 1.9687, "step": 19882000 }, { "epoch": 57.55, "learning_rate": 2.123449223091886e-05, "loss": 1.9579, "step": 19882500 }, { "epoch": 57.55, "learning_rate": 2.1233768583271582e-05, "loss": 1.9705, "step": 19883000 }, { "epoch": 57.55, "learning_rate": 2.1233044935624305e-05, "loss": 1.9593, "step": 19883500 }, { "epoch": 57.56, "learning_rate": 2.123232128797703e-05, "loss": 1.954, "step": 19884000 }, { "epoch": 57.56, "learning_rate": 2.1231597640329753e-05, "loss": 1.9392, "step": 19884500 }, { "epoch": 57.56, "learning_rate": 2.1230873992682475e-05, "loss": 1.9656, "step": 19885000 }, { "epoch": 57.56, "learning_rate": 2.12301503450352e-05, "loss": 1.9653, "step": 19885500 }, { "epoch": 57.56, "learning_rate": 2.1229426697387923e-05, "loss": 1.9715, "step": 19886000 }, { "epoch": 57.56, "learning_rate": 2.1228703049740645e-05, "loss": 1.9451, "step": 19886500 }, { "epoch": 57.56, "learning_rate": 2.1227979402093367e-05, "loss": 1.9661, "step": 19887000 }, { "epoch": 57.57, "learning_rate": 2.1227255754446093e-05, "loss": 1.9684, "step": 19887500 }, { "epoch": 57.57, "learning_rate": 2.1226532106798815e-05, "loss": 1.9555, "step": 19888000 }, { "epoch": 57.57, "learning_rate": 2.1225808459151537e-05, "loss": 1.9639, "step": 19888500 }, { "epoch": 57.57, "learning_rate": 2.1225084811504263e-05, "loss": 1.9546, "step": 19889000 }, { "epoch": 57.57, "learning_rate": 2.122436261115228e-05, "loss": 1.9457, "step": 19889500 }, { "epoch": 57.57, "learning_rate": 2.1223638963505e-05, "loss": 1.9495, "step": 19890000 }, { "epoch": 57.57, "learning_rate": 2.122291676315302e-05, "loss": 1.9593, "step": 19890500 }, { "epoch": 57.58, "learning_rate": 2.122219456280104e-05, "loss": 1.9497, "step": 19891000 }, { "epoch": 57.58, "learning_rate": 2.1221470915153764e-05, "loss": 1.9716, "step": 19891500 }, { "epoch": 57.58, "learning_rate": 2.1220747267506486e-05, "loss": 1.9618, "step": 19892000 }, { "epoch": 57.58, "learning_rate": 2.122002361985921e-05, "loss": 1.9667, "step": 19892500 }, { "epoch": 57.58, "learning_rate": 2.121929997221193e-05, "loss": 1.9629, "step": 19893000 }, { "epoch": 57.58, "learning_rate": 2.1218576324564653e-05, "loss": 1.964, "step": 19893500 }, { "epoch": 57.58, "learning_rate": 2.1217852676917375e-05, "loss": 1.9712, "step": 19894000 }, { "epoch": 57.59, "learning_rate": 2.12171290292701e-05, "loss": 1.9445, "step": 19894500 }, { "epoch": 57.59, "learning_rate": 2.1216405381622826e-05, "loss": 1.9284, "step": 19895000 }, { "epoch": 57.59, "learning_rate": 2.121568173397555e-05, "loss": 1.9629, "step": 19895500 }, { "epoch": 57.59, "learning_rate": 2.121495808632827e-05, "loss": 1.9904, "step": 19896000 }, { "epoch": 57.59, "learning_rate": 2.1214234438680993e-05, "loss": 1.9861, "step": 19896500 }, { "epoch": 57.59, "learning_rate": 2.1213510791033715e-05, "loss": 1.967, "step": 19897000 }, { "epoch": 57.6, "learning_rate": 2.121278714338644e-05, "loss": 1.9946, "step": 19897500 }, { "epoch": 57.6, "learning_rate": 2.1212064943034457e-05, "loss": 1.9671, "step": 19898000 }, { "epoch": 57.6, "learning_rate": 2.1211341295387182e-05, "loss": 1.9619, "step": 19898500 }, { "epoch": 57.6, "learning_rate": 2.1210617647739904e-05, "loss": 1.9653, "step": 19899000 }, { "epoch": 57.6, "learning_rate": 2.1209896894683217e-05, "loss": 1.9587, "step": 19899500 }, { "epoch": 57.6, "learning_rate": 2.120917324703594e-05, "loss": 1.9448, "step": 19900000 }, { "epoch": 57.6, "learning_rate": 2.1208449599388665e-05, "loss": 1.9584, "step": 19900500 }, { "epoch": 57.61, "learning_rate": 2.1207725951741387e-05, "loss": 1.9578, "step": 19901000 }, { "epoch": 57.61, "learning_rate": 2.120700230409411e-05, "loss": 1.9543, "step": 19901500 }, { "epoch": 57.61, "learning_rate": 2.120627865644683e-05, "loss": 1.9571, "step": 19902000 }, { "epoch": 57.61, "learning_rate": 2.1205555008799557e-05, "loss": 1.9666, "step": 19902500 }, { "epoch": 57.61, "learning_rate": 2.120483136115228e-05, "loss": 1.9622, "step": 19903000 }, { "epoch": 57.61, "learning_rate": 2.1204107713505005e-05, "loss": 1.9154, "step": 19903500 }, { "epoch": 57.61, "learning_rate": 2.1203384065857727e-05, "loss": 1.9603, "step": 19904000 }, { "epoch": 57.62, "learning_rate": 2.120266041821045e-05, "loss": 1.9656, "step": 19904500 }, { "epoch": 57.62, "learning_rate": 2.1201938217858465e-05, "loss": 1.9622, "step": 19905000 }, { "epoch": 57.62, "learning_rate": 2.120121457021119e-05, "loss": 1.9464, "step": 19905500 }, { "epoch": 57.62, "learning_rate": 2.1200490922563916e-05, "loss": 1.9568, "step": 19906000 }, { "epoch": 57.62, "learning_rate": 2.119976872221193e-05, "loss": 1.9552, "step": 19906500 }, { "epoch": 57.62, "learning_rate": 2.1199045074564654e-05, "loss": 1.9571, "step": 19907000 }, { "epoch": 57.62, "learning_rate": 2.119832142691738e-05, "loss": 1.9699, "step": 19907500 }, { "epoch": 57.63, "learning_rate": 2.11975977792701e-05, "loss": 1.9798, "step": 19908000 }, { "epoch": 57.63, "learning_rate": 2.1196874131622824e-05, "loss": 1.9592, "step": 19908500 }, { "epoch": 57.63, "learning_rate": 2.1196150483975546e-05, "loss": 1.9668, "step": 19909000 }, { "epoch": 57.63, "learning_rate": 2.1195428283623565e-05, "loss": 1.9733, "step": 19909500 }, { "epoch": 57.63, "learning_rate": 2.119470463597629e-05, "loss": 1.955, "step": 19910000 }, { "epoch": 57.63, "learning_rate": 2.1193980988329013e-05, "loss": 1.9724, "step": 19910500 }, { "epoch": 57.63, "learning_rate": 2.1193257340681735e-05, "loss": 1.9653, "step": 19911000 }, { "epoch": 57.64, "learning_rate": 2.1192533693034457e-05, "loss": 1.9464, "step": 19911500 }, { "epoch": 57.64, "learning_rate": 2.119181004538718e-05, "loss": 1.9243, "step": 19912000 }, { "epoch": 57.64, "learning_rate": 2.1191086397739905e-05, "loss": 1.9631, "step": 19912500 }, { "epoch": 57.64, "learning_rate": 2.1190362750092627e-05, "loss": 1.9624, "step": 19913000 }, { "epoch": 57.64, "learning_rate": 2.1189639102445353e-05, "loss": 1.971, "step": 19913500 }, { "epoch": 57.64, "learning_rate": 2.1188915454798075e-05, "loss": 1.9665, "step": 19914000 }, { "epoch": 57.64, "learning_rate": 2.1188191807150798e-05, "loss": 1.9473, "step": 19914500 }, { "epoch": 57.65, "learning_rate": 2.118746815950352e-05, "loss": 1.954, "step": 19915000 }, { "epoch": 57.65, "learning_rate": 2.1186744511856242e-05, "loss": 1.9689, "step": 19915500 }, { "epoch": 57.65, "learning_rate": 2.1186020864208968e-05, "loss": 1.9518, "step": 19916000 }, { "epoch": 57.65, "learning_rate": 2.118529721656169e-05, "loss": 1.9413, "step": 19916500 }, { "epoch": 57.65, "learning_rate": 2.1184573568914416e-05, "loss": 1.9559, "step": 19917000 }, { "epoch": 57.65, "learning_rate": 2.1183849921267138e-05, "loss": 1.982, "step": 19917500 }, { "epoch": 57.65, "learning_rate": 2.1183127720915157e-05, "loss": 1.9617, "step": 19918000 }, { "epoch": 57.66, "learning_rate": 2.1182405520563172e-05, "loss": 1.9661, "step": 19918500 }, { "epoch": 57.66, "learning_rate": 2.118168332021119e-05, "loss": 1.972, "step": 19919000 }, { "epoch": 57.66, "learning_rate": 2.1180959672563913e-05, "loss": 1.9558, "step": 19919500 }, { "epoch": 57.66, "learning_rate": 2.1180236024916636e-05, "loss": 1.9745, "step": 19920000 }, { "epoch": 57.66, "learning_rate": 2.1179512377269358e-05, "loss": 1.9621, "step": 19920500 }, { "epoch": 57.66, "learning_rate": 2.1178788729622084e-05, "loss": 1.9785, "step": 19921000 }, { "epoch": 57.66, "learning_rate": 2.1178065081974806e-05, "loss": 1.9823, "step": 19921500 }, { "epoch": 57.67, "learning_rate": 2.117734143432753e-05, "loss": 1.9511, "step": 19922000 }, { "epoch": 57.67, "learning_rate": 2.1176617786680254e-05, "loss": 1.9704, "step": 19922500 }, { "epoch": 57.67, "learning_rate": 2.1175894139032976e-05, "loss": 1.9337, "step": 19923000 }, { "epoch": 57.67, "learning_rate": 2.1175171938680995e-05, "loss": 1.9653, "step": 19923500 }, { "epoch": 57.67, "learning_rate": 2.1174448291033717e-05, "loss": 1.9702, "step": 19924000 }, { "epoch": 57.67, "learning_rate": 2.1173724643386443e-05, "loss": 1.9776, "step": 19924500 }, { "epoch": 57.67, "learning_rate": 2.1173000995739165e-05, "loss": 1.9904, "step": 19925000 }, { "epoch": 57.68, "learning_rate": 2.1172277348091887e-05, "loss": 1.9552, "step": 19925500 }, { "epoch": 57.68, "learning_rate": 2.117155370044461e-05, "loss": 1.9677, "step": 19926000 }, { "epoch": 57.68, "learning_rate": 2.117083005279733e-05, "loss": 1.9749, "step": 19926500 }, { "epoch": 57.68, "learning_rate": 2.1170106405150057e-05, "loss": 1.9658, "step": 19927000 }, { "epoch": 57.68, "learning_rate": 2.116938565209337e-05, "loss": 1.9675, "step": 19927500 }, { "epoch": 57.68, "learning_rate": 2.1168662004446092e-05, "loss": 1.9814, "step": 19928000 }, { "epoch": 57.68, "learning_rate": 2.1167938356798817e-05, "loss": 1.9813, "step": 19928500 }, { "epoch": 57.69, "learning_rate": 2.116721470915154e-05, "loss": 1.9537, "step": 19929000 }, { "epoch": 57.69, "learning_rate": 2.1166491061504262e-05, "loss": 1.9777, "step": 19929500 }, { "epoch": 57.69, "learning_rate": 2.1165767413856984e-05, "loss": 1.9821, "step": 19930000 }, { "epoch": 57.69, "learning_rate": 2.1165045213505003e-05, "loss": 1.9759, "step": 19930500 }, { "epoch": 57.69, "learning_rate": 2.116432301315302e-05, "loss": 1.9728, "step": 19931000 }, { "epoch": 57.69, "learning_rate": 2.1163599365505744e-05, "loss": 1.9607, "step": 19931500 }, { "epoch": 57.69, "learning_rate": 2.1162875717858466e-05, "loss": 1.9651, "step": 19932000 }, { "epoch": 57.7, "learning_rate": 2.1162152070211192e-05, "loss": 1.9585, "step": 19932500 }, { "epoch": 57.7, "learning_rate": 2.1161428422563914e-05, "loss": 1.9594, "step": 19933000 }, { "epoch": 57.7, "learning_rate": 2.1160706222211933e-05, "loss": 2.0025, "step": 19933500 }, { "epoch": 57.7, "learning_rate": 2.1159982574564655e-05, "loss": 1.9695, "step": 19934000 }, { "epoch": 57.7, "learning_rate": 2.1159258926917378e-05, "loss": 1.9557, "step": 19934500 }, { "epoch": 57.7, "learning_rate": 2.11585352792701e-05, "loss": 1.9759, "step": 19935000 }, { "epoch": 57.71, "learning_rate": 2.1157811631622822e-05, "loss": 1.9569, "step": 19935500 }, { "epoch": 57.71, "learning_rate": 2.1157087983975548e-05, "loss": 1.9579, "step": 19936000 }, { "epoch": 57.71, "learning_rate": 2.115636433632827e-05, "loss": 1.9714, "step": 19936500 }, { "epoch": 57.71, "learning_rate": 2.1155640688680996e-05, "loss": 1.9923, "step": 19937000 }, { "epoch": 57.71, "learning_rate": 2.1154917041033718e-05, "loss": 2.0006, "step": 19937500 }, { "epoch": 57.71, "learning_rate": 2.115419339338644e-05, "loss": 1.9866, "step": 19938000 }, { "epoch": 57.71, "learning_rate": 2.1153469745739162e-05, "loss": 1.9405, "step": 19938500 }, { "epoch": 57.72, "learning_rate": 2.1152746098091885e-05, "loss": 1.9563, "step": 19939000 }, { "epoch": 57.72, "learning_rate": 2.115202245044461e-05, "loss": 1.9628, "step": 19939500 }, { "epoch": 57.72, "learning_rate": 2.115130025009263e-05, "loss": 1.9866, "step": 19940000 }, { "epoch": 57.72, "learning_rate": 2.115057660244535e-05, "loss": 1.9511, "step": 19940500 }, { "epoch": 57.72, "learning_rate": 2.1149852954798074e-05, "loss": 1.9593, "step": 19941000 }, { "epoch": 57.72, "learning_rate": 2.1149129307150796e-05, "loss": 1.9588, "step": 19941500 }, { "epoch": 57.72, "learning_rate": 2.114840565950352e-05, "loss": 1.9959, "step": 19942000 }, { "epoch": 57.73, "learning_rate": 2.1147682011856244e-05, "loss": 1.9874, "step": 19942500 }, { "epoch": 57.73, "learning_rate": 2.114695981150426e-05, "loss": 1.9618, "step": 19943000 }, { "epoch": 57.73, "learning_rate": 2.114623905844757e-05, "loss": 1.9668, "step": 19943500 }, { "epoch": 57.73, "learning_rate": 2.1145515410800297e-05, "loss": 1.9525, "step": 19944000 }, { "epoch": 57.73, "learning_rate": 2.1144791763153023e-05, "loss": 1.967, "step": 19944500 }, { "epoch": 57.73, "learning_rate": 2.1144069562801038e-05, "loss": 1.9709, "step": 19945000 }, { "epoch": 57.73, "learning_rate": 2.114334591515376e-05, "loss": 1.9409, "step": 19945500 }, { "epoch": 57.74, "learning_rate": 2.1142622267506486e-05, "loss": 1.9676, "step": 19946000 }, { "epoch": 57.74, "learning_rate": 2.114189861985921e-05, "loss": 1.9579, "step": 19946500 }, { "epoch": 57.74, "learning_rate": 2.114117497221193e-05, "loss": 1.9647, "step": 19947000 }, { "epoch": 57.74, "learning_rate": 2.1140451324564656e-05, "loss": 1.9754, "step": 19947500 }, { "epoch": 57.74, "learning_rate": 2.113972767691738e-05, "loss": 1.9559, "step": 19948000 }, { "epoch": 57.74, "learning_rate": 2.11390040292701e-05, "loss": 1.964, "step": 19948500 }, { "epoch": 57.74, "learning_rate": 2.1138280381622823e-05, "loss": 1.9683, "step": 19949000 }, { "epoch": 57.75, "learning_rate": 2.113755673397555e-05, "loss": 1.9667, "step": 19949500 }, { "epoch": 57.75, "learning_rate": 2.1136834533623564e-05, "loss": 1.9533, "step": 19950000 }, { "epoch": 57.75, "learning_rate": 2.1136110885976286e-05, "loss": 1.9748, "step": 19950500 }, { "epoch": 57.75, "learning_rate": 2.1135387238329012e-05, "loss": 1.946, "step": 19951000 }, { "epoch": 57.75, "learning_rate": 2.1134663590681734e-05, "loss": 1.9407, "step": 19951500 }, { "epoch": 57.75, "learning_rate": 2.113393994303446e-05, "loss": 1.9488, "step": 19952000 }, { "epoch": 57.75, "learning_rate": 2.1133216295387182e-05, "loss": 1.9609, "step": 19952500 }, { "epoch": 57.76, "learning_rate": 2.1132492647739904e-05, "loss": 1.9669, "step": 19953000 }, { "epoch": 57.76, "learning_rate": 2.1131769000092626e-05, "loss": 1.9952, "step": 19953500 }, { "epoch": 57.76, "learning_rate": 2.113104535244535e-05, "loss": 1.9416, "step": 19954000 }, { "epoch": 57.76, "learning_rate": 2.1130321704798074e-05, "loss": 1.9689, "step": 19954500 }, { "epoch": 57.76, "learning_rate": 2.11295980571508e-05, "loss": 1.9576, "step": 19955000 }, { "epoch": 57.76, "learning_rate": 2.1128874409503522e-05, "loss": 1.9607, "step": 19955500 }, { "epoch": 57.76, "learning_rate": 2.1128150761856244e-05, "loss": 1.9532, "step": 19956000 }, { "epoch": 57.77, "learning_rate": 2.1127427114208967e-05, "loss": 1.9478, "step": 19956500 }, { "epoch": 57.77, "learning_rate": 2.112670346656169e-05, "loss": 1.987, "step": 19957000 }, { "epoch": 57.77, "learning_rate": 2.112597981891441e-05, "loss": 1.9671, "step": 19957500 }, { "epoch": 57.77, "learning_rate": 2.1125256171267137e-05, "loss": 1.9663, "step": 19958000 }, { "epoch": 57.77, "learning_rate": 2.1124533970915156e-05, "loss": 1.9587, "step": 19958500 }, { "epoch": 57.77, "learning_rate": 2.1123810323267878e-05, "loss": 1.9774, "step": 19959000 }, { "epoch": 57.77, "learning_rate": 2.112308957021119e-05, "loss": 1.967, "step": 19959500 }, { "epoch": 57.78, "learning_rate": 2.1122365922563912e-05, "loss": 1.9666, "step": 19960000 }, { "epoch": 57.78, "learning_rate": 2.1121642274916638e-05, "loss": 1.9921, "step": 19960500 }, { "epoch": 57.78, "learning_rate": 2.112091862726936e-05, "loss": 1.9687, "step": 19961000 }, { "epoch": 57.78, "learning_rate": 2.1120194979622083e-05, "loss": 1.9573, "step": 19961500 }, { "epoch": 57.78, "learning_rate": 2.1119472779270098e-05, "loss": 1.9537, "step": 19962000 }, { "epoch": 57.78, "learning_rate": 2.1118749131622824e-05, "loss": 1.97, "step": 19962500 }, { "epoch": 57.78, "learning_rate": 2.1118026931270843e-05, "loss": 1.965, "step": 19963000 }, { "epoch": 57.79, "learning_rate": 2.1117306178214155e-05, "loss": 1.9656, "step": 19963500 }, { "epoch": 57.79, "learning_rate": 2.1116582530566877e-05, "loss": 1.9641, "step": 19964000 }, { "epoch": 57.79, "learning_rate": 2.1115860330214896e-05, "loss": 1.9862, "step": 19964500 }, { "epoch": 57.79, "learning_rate": 2.111513668256762e-05, "loss": 1.954, "step": 19965000 }, { "epoch": 57.79, "learning_rate": 2.111441303492034e-05, "loss": 1.9682, "step": 19965500 }, { "epoch": 57.79, "learning_rate": 2.1113689387273063e-05, "loss": 1.9617, "step": 19966000 }, { "epoch": 57.79, "learning_rate": 2.111296573962579e-05, "loss": 1.9631, "step": 19966500 }, { "epoch": 57.8, "learning_rate": 2.1112242091978514e-05, "loss": 1.959, "step": 19967000 }, { "epoch": 57.8, "learning_rate": 2.1111518444331236e-05, "loss": 1.9609, "step": 19967500 }, { "epoch": 57.8, "learning_rate": 2.111079479668396e-05, "loss": 1.9591, "step": 19968000 }, { "epoch": 57.8, "learning_rate": 2.111007114903668e-05, "loss": 1.9736, "step": 19968500 }, { "epoch": 57.8, "learning_rate": 2.1109347501389403e-05, "loss": 1.9568, "step": 19969000 }, { "epoch": 57.8, "learning_rate": 2.1108623853742125e-05, "loss": 1.9657, "step": 19969500 }, { "epoch": 57.8, "learning_rate": 2.110790020609485e-05, "loss": 1.9744, "step": 19970000 }, { "epoch": 57.81, "learning_rate": 2.1107176558447576e-05, "loss": 1.9527, "step": 19970500 }, { "epoch": 57.81, "learning_rate": 2.11064529108003e-05, "loss": 1.9875, "step": 19971000 }, { "epoch": 57.81, "learning_rate": 2.110572926315302e-05, "loss": 1.9795, "step": 19971500 }, { "epoch": 57.81, "learning_rate": 2.1105005615505743e-05, "loss": 1.9676, "step": 19972000 }, { "epoch": 57.81, "learning_rate": 2.1104281967858465e-05, "loss": 1.9447, "step": 19972500 }, { "epoch": 57.81, "learning_rate": 2.1103558320211188e-05, "loss": 1.9585, "step": 19973000 }, { "epoch": 57.82, "learning_rate": 2.110283611985921e-05, "loss": 1.9598, "step": 19973500 }, { "epoch": 57.82, "learning_rate": 2.1102112472211932e-05, "loss": 1.9608, "step": 19974000 }, { "epoch": 57.82, "learning_rate": 2.110139027185995e-05, "loss": 1.9577, "step": 19974500 }, { "epoch": 57.82, "learning_rate": 2.1100666624212673e-05, "loss": 1.989, "step": 19975000 }, { "epoch": 57.82, "learning_rate": 2.1099942976565396e-05, "loss": 1.9726, "step": 19975500 }, { "epoch": 57.82, "learning_rate": 2.1099219328918118e-05, "loss": 1.9818, "step": 19976000 }, { "epoch": 57.82, "learning_rate": 2.109849568127084e-05, "loss": 1.9596, "step": 19976500 }, { "epoch": 57.83, "learning_rate": 2.1097772033623566e-05, "loss": 1.9781, "step": 19977000 }, { "epoch": 57.83, "learning_rate": 2.109704838597629e-05, "loss": 1.9587, "step": 19977500 }, { "epoch": 57.83, "learning_rate": 2.1096324738329014e-05, "loss": 1.9693, "step": 19978000 }, { "epoch": 57.83, "learning_rate": 2.109560253797703e-05, "loss": 1.969, "step": 19978500 }, { "epoch": 57.83, "learning_rate": 2.109487889032975e-05, "loss": 1.9657, "step": 19979000 }, { "epoch": 57.83, "learning_rate": 2.1094155242682477e-05, "loss": 1.9685, "step": 19979500 }, { "epoch": 57.83, "learning_rate": 2.10934315950352e-05, "loss": 1.9537, "step": 19980000 }, { "epoch": 57.84, "learning_rate": 2.109270794738792e-05, "loss": 1.9505, "step": 19980500 }, { "epoch": 57.84, "learning_rate": 2.1091984299740647e-05, "loss": 1.9875, "step": 19981000 }, { "epoch": 57.84, "learning_rate": 2.109126065209337e-05, "loss": 1.9757, "step": 19981500 }, { "epoch": 57.84, "learning_rate": 2.109053700444609e-05, "loss": 1.9678, "step": 19982000 }, { "epoch": 57.84, "learning_rate": 2.1089813356798817e-05, "loss": 1.9902, "step": 19982500 }, { "epoch": 57.84, "learning_rate": 2.108908970915154e-05, "loss": 1.956, "step": 19983000 }, { "epoch": 57.84, "learning_rate": 2.108836606150426e-05, "loss": 1.9486, "step": 19983500 }, { "epoch": 57.85, "learning_rate": 2.1087642413856984e-05, "loss": 1.9535, "step": 19984000 }, { "epoch": 57.85, "learning_rate": 2.1086920213505003e-05, "loss": 1.9682, "step": 19984500 }, { "epoch": 57.85, "learning_rate": 2.108619656585773e-05, "loss": 1.9611, "step": 19985000 }, { "epoch": 57.85, "learning_rate": 2.108547291821045e-05, "loss": 1.9468, "step": 19985500 }, { "epoch": 57.85, "learning_rate": 2.1084749270563173e-05, "loss": 1.9689, "step": 19986000 }, { "epoch": 57.85, "learning_rate": 2.1084025622915895e-05, "loss": 1.9674, "step": 19986500 }, { "epoch": 57.85, "learning_rate": 2.1083301975268617e-05, "loss": 1.9879, "step": 19987000 }, { "epoch": 57.86, "learning_rate": 2.108257832762134e-05, "loss": 1.9773, "step": 19987500 }, { "epoch": 57.86, "learning_rate": 2.1081854679974065e-05, "loss": 1.9425, "step": 19988000 }, { "epoch": 57.86, "learning_rate": 2.108113103232679e-05, "loss": 1.9602, "step": 19988500 }, { "epoch": 57.86, "learning_rate": 2.1080408831974806e-05, "loss": 1.9556, "step": 19989000 }, { "epoch": 57.86, "learning_rate": 2.1079686631622825e-05, "loss": 1.9719, "step": 19989500 }, { "epoch": 57.86, "learning_rate": 2.1078962983975548e-05, "loss": 1.9588, "step": 19990000 }, { "epoch": 57.86, "learning_rate": 2.107823933632827e-05, "loss": 1.981, "step": 19990500 }, { "epoch": 57.87, "learning_rate": 2.1077515688680992e-05, "loss": 1.9864, "step": 19991000 }, { "epoch": 57.87, "learning_rate": 2.1076792041033718e-05, "loss": 1.9525, "step": 19991500 }, { "epoch": 57.87, "learning_rate": 2.1076068393386443e-05, "loss": 1.9566, "step": 19992000 }, { "epoch": 57.87, "learning_rate": 2.1075344745739165e-05, "loss": 1.9269, "step": 19992500 }, { "epoch": 57.87, "learning_rate": 2.1074621098091888e-05, "loss": 1.9688, "step": 19993000 }, { "epoch": 57.87, "learning_rate": 2.107389745044461e-05, "loss": 1.9473, "step": 19993500 }, { "epoch": 57.87, "learning_rate": 2.1073173802797332e-05, "loss": 1.9643, "step": 19994000 }, { "epoch": 57.88, "learning_rate": 2.1072450155150054e-05, "loss": 1.9613, "step": 19994500 }, { "epoch": 57.88, "learning_rate": 2.107172650750278e-05, "loss": 1.9751, "step": 19995000 }, { "epoch": 57.88, "learning_rate": 2.1071002859855506e-05, "loss": 1.9751, "step": 19995500 }, { "epoch": 57.88, "learning_rate": 2.1070282106798818e-05, "loss": 1.9593, "step": 19996000 }, { "epoch": 57.88, "learning_rate": 2.106955845915154e-05, "loss": 1.9593, "step": 19996500 }, { "epoch": 57.88, "learning_rate": 2.1068834811504262e-05, "loss": 1.9877, "step": 19997000 }, { "epoch": 57.88, "learning_rate": 2.1068111163856985e-05, "loss": 1.966, "step": 19997500 }, { "epoch": 57.89, "learning_rate": 2.1067388963505004e-05, "loss": 1.9641, "step": 19998000 }, { "epoch": 57.89, "learning_rate": 2.1066665315857726e-05, "loss": 1.9598, "step": 19998500 }, { "epoch": 57.89, "learning_rate": 2.1065941668210448e-05, "loss": 1.9751, "step": 19999000 }, { "epoch": 57.89, "learning_rate": 2.1065218020563174e-05, "loss": 1.9557, "step": 19999500 }, { "epoch": 57.89, "learning_rate": 2.1064494372915896e-05, "loss": 1.9762, "step": 20000000 }, { "epoch": 57.89, "learning_rate": 2.1063770725268618e-05, "loss": 1.9719, "step": 20000500 }, { "epoch": 57.89, "learning_rate": 2.1063047077621344e-05, "loss": 1.9548, "step": 20001000 }, { "epoch": 57.9, "learning_rate": 2.1062323429974066e-05, "loss": 1.941, "step": 20001500 }, { "epoch": 57.9, "learning_rate": 2.1061599782326788e-05, "loss": 1.9545, "step": 20002000 }, { "epoch": 57.9, "learning_rate": 2.1060877581974807e-05, "loss": 1.9847, "step": 20002500 }, { "epoch": 57.9, "learning_rate": 2.1060155381622823e-05, "loss": 1.9563, "step": 20003000 }, { "epoch": 57.9, "learning_rate": 2.105943173397555e-05, "loss": 1.9785, "step": 20003500 }, { "epoch": 57.9, "learning_rate": 2.105870808632827e-05, "loss": 1.965, "step": 20004000 }, { "epoch": 57.9, "learning_rate": 2.1057984438680993e-05, "loss": 1.9668, "step": 20004500 }, { "epoch": 57.91, "learning_rate": 2.105726079103372e-05, "loss": 1.954, "step": 20005000 }, { "epoch": 57.91, "learning_rate": 2.105653714338644e-05, "loss": 1.9704, "step": 20005500 }, { "epoch": 57.91, "learning_rate": 2.1055813495739163e-05, "loss": 1.9697, "step": 20006000 }, { "epoch": 57.91, "learning_rate": 2.1055089848091885e-05, "loss": 1.9478, "step": 20006500 }, { "epoch": 57.91, "learning_rate": 2.105436620044461e-05, "loss": 1.9644, "step": 20007000 }, { "epoch": 57.91, "learning_rate": 2.1053642552797333e-05, "loss": 1.9774, "step": 20007500 }, { "epoch": 57.91, "learning_rate": 2.105291890515006e-05, "loss": 1.9703, "step": 20008000 }, { "epoch": 57.92, "learning_rate": 2.105219525750278e-05, "loss": 1.9901, "step": 20008500 }, { "epoch": 57.92, "learning_rate": 2.1051473057150796e-05, "loss": 1.9731, "step": 20009000 }, { "epoch": 57.92, "learning_rate": 2.1050750856798815e-05, "loss": 1.9601, "step": 20009500 }, { "epoch": 57.92, "learning_rate": 2.1050027209151538e-05, "loss": 1.9563, "step": 20010000 }, { "epoch": 57.92, "learning_rate": 2.104930356150426e-05, "loss": 1.9911, "step": 20010500 }, { "epoch": 57.92, "learning_rate": 2.1048579913856985e-05, "loss": 1.9503, "step": 20011000 }, { "epoch": 57.93, "learning_rate": 2.1047856266209708e-05, "loss": 1.9525, "step": 20011500 }, { "epoch": 57.93, "learning_rate": 2.1047132618562433e-05, "loss": 1.9734, "step": 20012000 }, { "epoch": 57.93, "learning_rate": 2.1046408970915155e-05, "loss": 1.971, "step": 20012500 }, { "epoch": 57.93, "learning_rate": 2.104568677056317e-05, "loss": 1.9778, "step": 20013000 }, { "epoch": 57.93, "learning_rate": 2.1044963122915897e-05, "loss": 1.9742, "step": 20013500 }, { "epoch": 57.93, "learning_rate": 2.104423947526862e-05, "loss": 1.9502, "step": 20014000 }, { "epoch": 57.93, "learning_rate": 2.1043515827621345e-05, "loss": 1.951, "step": 20014500 }, { "epoch": 57.94, "learning_rate": 2.104279362726936e-05, "loss": 1.9754, "step": 20015000 }, { "epoch": 57.94, "learning_rate": 2.1042069979622082e-05, "loss": 1.9484, "step": 20015500 }, { "epoch": 57.94, "learning_rate": 2.1041346331974808e-05, "loss": 1.9554, "step": 20016000 }, { "epoch": 57.94, "learning_rate": 2.104062268432753e-05, "loss": 1.9466, "step": 20016500 }, { "epoch": 57.94, "learning_rate": 2.1039899036680252e-05, "loss": 1.9806, "step": 20017000 }, { "epoch": 57.94, "learning_rate": 2.1039175389032975e-05, "loss": 1.9551, "step": 20017500 }, { "epoch": 57.94, "learning_rate": 2.10384517413857e-05, "loss": 1.9886, "step": 20018000 }, { "epoch": 57.95, "learning_rate": 2.1037728093738422e-05, "loss": 1.9568, "step": 20018500 }, { "epoch": 57.95, "learning_rate": 2.1037004446091145e-05, "loss": 1.9626, "step": 20019000 }, { "epoch": 57.95, "learning_rate": 2.103628079844387e-05, "loss": 1.9658, "step": 20019500 }, { "epoch": 57.95, "learning_rate": 2.1035557150796593e-05, "loss": 1.951, "step": 20020000 }, { "epoch": 57.95, "learning_rate": 2.1034833503149315e-05, "loss": 1.9926, "step": 20020500 }, { "epoch": 57.95, "learning_rate": 2.1034109855502037e-05, "loss": 1.974, "step": 20021000 }, { "epoch": 57.95, "learning_rate": 2.1033387655150056e-05, "loss": 1.9644, "step": 20021500 }, { "epoch": 57.96, "learning_rate": 2.1032665454798075e-05, "loss": 1.9588, "step": 20022000 }, { "epoch": 57.96, "learning_rate": 2.1031941807150797e-05, "loss": 1.987, "step": 20022500 }, { "epoch": 57.96, "learning_rate": 2.1031218159503523e-05, "loss": 1.9317, "step": 20023000 }, { "epoch": 57.96, "learning_rate": 2.1030494511856245e-05, "loss": 1.9692, "step": 20023500 }, { "epoch": 57.96, "learning_rate": 2.1029770864208967e-05, "loss": 1.9744, "step": 20024000 }, { "epoch": 57.96, "learning_rate": 2.102904721656169e-05, "loss": 1.9559, "step": 20024500 }, { "epoch": 57.96, "learning_rate": 2.1028323568914412e-05, "loss": 1.9539, "step": 20025000 }, { "epoch": 57.97, "learning_rate": 2.1027601368562434e-05, "loss": 1.9591, "step": 20025500 }, { "epoch": 57.97, "learning_rate": 2.102687916821045e-05, "loss": 1.9876, "step": 20026000 }, { "epoch": 57.97, "learning_rate": 2.1026155520563172e-05, "loss": 1.9682, "step": 20026500 }, { "epoch": 57.97, "learning_rate": 2.1025431872915897e-05, "loss": 1.962, "step": 20027000 }, { "epoch": 57.97, "learning_rate": 2.102470822526862e-05, "loss": 1.9751, "step": 20027500 }, { "epoch": 57.97, "learning_rate": 2.1023984577621342e-05, "loss": 1.9698, "step": 20028000 }, { "epoch": 57.97, "learning_rate": 2.1023260929974064e-05, "loss": 1.9724, "step": 20028500 }, { "epoch": 57.98, "learning_rate": 2.1022537282326786e-05, "loss": 1.9752, "step": 20029000 }, { "epoch": 57.98, "learning_rate": 2.1021813634679512e-05, "loss": 1.945, "step": 20029500 }, { "epoch": 57.98, "learning_rate": 2.1021089987032234e-05, "loss": 1.9908, "step": 20030000 }, { "epoch": 57.98, "learning_rate": 2.102036633938496e-05, "loss": 1.9757, "step": 20030500 }, { "epoch": 57.98, "learning_rate": 2.1019644139032975e-05, "loss": 1.9814, "step": 20031000 }, { "epoch": 57.98, "learning_rate": 2.1018920491385698e-05, "loss": 1.9823, "step": 20031500 }, { "epoch": 57.98, "learning_rate": 2.1018198291033717e-05, "loss": 1.9496, "step": 20032000 }, { "epoch": 57.99, "learning_rate": 2.101747464338644e-05, "loss": 1.9681, "step": 20032500 }, { "epoch": 57.99, "learning_rate": 2.101675099573916e-05, "loss": 1.9887, "step": 20033000 }, { "epoch": 57.99, "learning_rate": 2.1016027348091887e-05, "loss": 1.9705, "step": 20033500 }, { "epoch": 57.99, "learning_rate": 2.1015303700444612e-05, "loss": 1.9888, "step": 20034000 }, { "epoch": 57.99, "learning_rate": 2.1014580052797335e-05, "loss": 1.966, "step": 20034500 }, { "epoch": 57.99, "learning_rate": 2.1013856405150057e-05, "loss": 1.9558, "step": 20035000 }, { "epoch": 57.99, "learning_rate": 2.101313275750278e-05, "loss": 1.9812, "step": 20035500 }, { "epoch": 58.0, "learning_rate": 2.10124091098555e-05, "loss": 1.968, "step": 20036000 }, { "epoch": 58.0, "learning_rate": 2.1011685462208227e-05, "loss": 1.9581, "step": 20036500 }, { "epoch": 58.0, "learning_rate": 2.1010963261856246e-05, "loss": 1.9541, "step": 20037000 }, { "epoch": 58.0, "eval_accuracy": 0.6768148279968789, "eval_accuracy_mlm": 0.6435821900945138, "eval_accuracy_nsp": 0.8549815988597251, "eval_loss": 2.161170721054077, "eval_runtime": 332.2544, "eval_samples_per_second": 1313.409, "eval_steps_per_second": 54.726, "step": 20037376 }, { "epoch": 58.0, "learning_rate": 2.1010239614208968e-05, "loss": 1.9466, "step": 20037500 }, { "epoch": 58.0, "learning_rate": 2.1009520308447574e-05, "loss": 1.923, "step": 20038000 }, { "epoch": 58.0, "learning_rate": 2.10087966608003e-05, "loss": 1.9453, "step": 20038500 }, { "epoch": 58.0, "learning_rate": 2.100807301315302e-05, "loss": 1.9405, "step": 20039000 }, { "epoch": 58.01, "learning_rate": 2.1007349365505744e-05, "loss": 1.9429, "step": 20039500 }, { "epoch": 58.01, "learning_rate": 2.1006625717858466e-05, "loss": 1.9558, "step": 20040000 }, { "epoch": 58.01, "learning_rate": 2.1005902070211188e-05, "loss": 1.9251, "step": 20040500 }, { "epoch": 58.01, "learning_rate": 2.1005178422563914e-05, "loss": 1.9413, "step": 20041000 }, { "epoch": 58.01, "learning_rate": 2.1004454774916636e-05, "loss": 1.9536, "step": 20041500 }, { "epoch": 58.01, "learning_rate": 2.100373112726936e-05, "loss": 1.9315, "step": 20042000 }, { "epoch": 58.01, "learning_rate": 2.1003007479622084e-05, "loss": 1.9507, "step": 20042500 }, { "epoch": 58.02, "learning_rate": 2.1002283831974806e-05, "loss": 1.9219, "step": 20043000 }, { "epoch": 58.02, "learning_rate": 2.100156018432753e-05, "loss": 1.9073, "step": 20043500 }, { "epoch": 58.02, "learning_rate": 2.100083653668025e-05, "loss": 1.9858, "step": 20044000 }, { "epoch": 58.02, "learning_rate": 2.1000112889032976e-05, "loss": 1.9526, "step": 20044500 }, { "epoch": 58.02, "learning_rate": 2.0999389241385702e-05, "loss": 1.9469, "step": 20045000 }, { "epoch": 58.02, "learning_rate": 2.0998667041033717e-05, "loss": 1.9319, "step": 20045500 }, { "epoch": 58.02, "learning_rate": 2.099794339338644e-05, "loss": 1.9208, "step": 20046000 }, { "epoch": 58.03, "learning_rate": 2.0997219745739162e-05, "loss": 1.9513, "step": 20046500 }, { "epoch": 58.03, "learning_rate": 2.099649754538718e-05, "loss": 1.9387, "step": 20047000 }, { "epoch": 58.03, "learning_rate": 2.0995773897739903e-05, "loss": 1.9501, "step": 20047500 }, { "epoch": 58.03, "learning_rate": 2.0995050250092625e-05, "loss": 1.9281, "step": 20048000 }, { "epoch": 58.03, "learning_rate": 2.099432660244535e-05, "loss": 1.9667, "step": 20048500 }, { "epoch": 58.03, "learning_rate": 2.0993602954798077e-05, "loss": 1.9531, "step": 20049000 }, { "epoch": 58.04, "learning_rate": 2.09928793071508e-05, "loss": 1.9427, "step": 20049500 }, { "epoch": 58.04, "learning_rate": 2.099215565950352e-05, "loss": 1.9487, "step": 20050000 }, { "epoch": 58.04, "learning_rate": 2.0991432011856243e-05, "loss": 1.9504, "step": 20050500 }, { "epoch": 58.04, "learning_rate": 2.0990708364208965e-05, "loss": 1.9451, "step": 20051000 }, { "epoch": 58.04, "learning_rate": 2.0989984716561688e-05, "loss": 1.9576, "step": 20051500 }, { "epoch": 58.04, "learning_rate": 2.0989261068914413e-05, "loss": 1.9416, "step": 20052000 }, { "epoch": 58.04, "learning_rate": 2.098853742126714e-05, "loss": 1.933, "step": 20052500 }, { "epoch": 58.05, "learning_rate": 2.098781377361986e-05, "loss": 1.9273, "step": 20053000 }, { "epoch": 58.05, "learning_rate": 2.0987091573267877e-05, "loss": 1.938, "step": 20053500 }, { "epoch": 58.05, "learning_rate": 2.0986367925620602e-05, "loss": 1.9291, "step": 20054000 }, { "epoch": 58.05, "learning_rate": 2.0985644277973325e-05, "loss": 1.9634, "step": 20054500 }, { "epoch": 58.05, "learning_rate": 2.0984920630326047e-05, "loss": 1.945, "step": 20055000 }, { "epoch": 58.05, "learning_rate": 2.0984198429974066e-05, "loss": 1.9418, "step": 20055500 }, { "epoch": 58.05, "learning_rate": 2.0983474782326788e-05, "loss": 1.9454, "step": 20056000 }, { "epoch": 58.06, "learning_rate": 2.0982751134679514e-05, "loss": 1.9819, "step": 20056500 }, { "epoch": 58.06, "learning_rate": 2.0982027487032236e-05, "loss": 1.9666, "step": 20057000 }, { "epoch": 58.06, "learning_rate": 2.0981303839384958e-05, "loss": 1.9393, "step": 20057500 }, { "epoch": 58.06, "learning_rate": 2.098058019173768e-05, "loss": 1.9587, "step": 20058000 }, { "epoch": 58.06, "learning_rate": 2.0979856544090403e-05, "loss": 1.9365, "step": 20058500 }, { "epoch": 58.06, "learning_rate": 2.0979132896443128e-05, "loss": 1.9501, "step": 20059000 }, { "epoch": 58.06, "learning_rate": 2.0978410696091147e-05, "loss": 1.9515, "step": 20059500 }, { "epoch": 58.07, "learning_rate": 2.097768704844387e-05, "loss": 1.9175, "step": 20060000 }, { "epoch": 58.07, "learning_rate": 2.097696340079659e-05, "loss": 1.9313, "step": 20060500 }, { "epoch": 58.07, "learning_rate": 2.097624120044461e-05, "loss": 1.9715, "step": 20061000 }, { "epoch": 58.07, "learning_rate": 2.0975517552797333e-05, "loss": 1.9559, "step": 20061500 }, { "epoch": 58.07, "learning_rate": 2.0974793905150055e-05, "loss": 1.9455, "step": 20062000 }, { "epoch": 58.07, "learning_rate": 2.0974070257502777e-05, "loss": 1.9482, "step": 20062500 }, { "epoch": 58.07, "learning_rate": 2.0973346609855503e-05, "loss": 1.9805, "step": 20063000 }, { "epoch": 58.08, "learning_rate": 2.097262296220823e-05, "loss": 1.9625, "step": 20063500 }, { "epoch": 58.08, "learning_rate": 2.097189931456095e-05, "loss": 1.9367, "step": 20064000 }, { "epoch": 58.08, "learning_rate": 2.0971175666913673e-05, "loss": 1.9569, "step": 20064500 }, { "epoch": 58.08, "learning_rate": 2.0970452019266395e-05, "loss": 1.9367, "step": 20065000 }, { "epoch": 58.08, "learning_rate": 2.0969728371619117e-05, "loss": 1.9408, "step": 20065500 }, { "epoch": 58.08, "learning_rate": 2.0969006171267136e-05, "loss": 1.9539, "step": 20066000 }, { "epoch": 58.08, "learning_rate": 2.0968282523619862e-05, "loss": 1.929, "step": 20066500 }, { "epoch": 58.09, "learning_rate": 2.0967558875972584e-05, "loss": 1.9431, "step": 20067000 }, { "epoch": 58.09, "learning_rate": 2.0966835228325306e-05, "loss": 1.9357, "step": 20067500 }, { "epoch": 58.09, "learning_rate": 2.0966113027973325e-05, "loss": 1.9687, "step": 20068000 }, { "epoch": 58.09, "learning_rate": 2.0965389380326048e-05, "loss": 1.9497, "step": 20068500 }, { "epoch": 58.09, "learning_rate": 2.096466573267877e-05, "loss": 1.9726, "step": 20069000 }, { "epoch": 58.09, "learning_rate": 2.0963942085031492e-05, "loss": 1.9785, "step": 20069500 }, { "epoch": 58.09, "learning_rate": 2.0963218437384218e-05, "loss": 1.9488, "step": 20070000 }, { "epoch": 58.1, "learning_rate": 2.0962494789736943e-05, "loss": 1.9687, "step": 20070500 }, { "epoch": 58.1, "learning_rate": 2.0961771142089666e-05, "loss": 1.9458, "step": 20071000 }, { "epoch": 58.1, "learning_rate": 2.0961047494442388e-05, "loss": 1.961, "step": 20071500 }, { "epoch": 58.1, "learning_rate": 2.096032384679511e-05, "loss": 1.9437, "step": 20072000 }, { "epoch": 58.1, "learning_rate": 2.095960164644313e-05, "loss": 1.9292, "step": 20072500 }, { "epoch": 58.1, "learning_rate": 2.0958879446091145e-05, "loss": 1.9406, "step": 20073000 }, { "epoch": 58.1, "learning_rate": 2.0958157245739163e-05, "loss": 1.9592, "step": 20073500 }, { "epoch": 58.11, "learning_rate": 2.0957433598091886e-05, "loss": 1.945, "step": 20074000 }, { "epoch": 58.11, "learning_rate": 2.095670995044461e-05, "loss": 1.9634, "step": 20074500 }, { "epoch": 58.11, "learning_rate": 2.0955986302797334e-05, "loss": 1.9575, "step": 20075000 }, { "epoch": 58.11, "learning_rate": 2.0955262655150056e-05, "loss": 1.9302, "step": 20075500 }, { "epoch": 58.11, "learning_rate": 2.095453900750278e-05, "loss": 1.9618, "step": 20076000 }, { "epoch": 58.11, "learning_rate": 2.0953815359855504e-05, "loss": 1.9495, "step": 20076500 }, { "epoch": 58.11, "learning_rate": 2.0953091712208226e-05, "loss": 1.9402, "step": 20077000 }, { "epoch": 58.12, "learning_rate": 2.0952368064560948e-05, "loss": 1.9402, "step": 20077500 }, { "epoch": 58.12, "learning_rate": 2.0951644416913674e-05, "loss": 1.9412, "step": 20078000 }, { "epoch": 58.12, "learning_rate": 2.0950920769266396e-05, "loss": 1.9584, "step": 20078500 }, { "epoch": 58.12, "learning_rate": 2.0950197121619118e-05, "loss": 1.9509, "step": 20079000 }, { "epoch": 58.12, "learning_rate": 2.0949474921267137e-05, "loss": 1.9666, "step": 20079500 }, { "epoch": 58.12, "learning_rate": 2.094875127361986e-05, "loss": 1.9573, "step": 20080000 }, { "epoch": 58.12, "learning_rate": 2.094802762597258e-05, "loss": 1.9536, "step": 20080500 }, { "epoch": 58.13, "learning_rate": 2.09473054256206e-05, "loss": 1.9362, "step": 20081000 }, { "epoch": 58.13, "learning_rate": 2.0946581777973323e-05, "loss": 1.9528, "step": 20081500 }, { "epoch": 58.13, "learning_rate": 2.094585813032605e-05, "loss": 1.9624, "step": 20082000 }, { "epoch": 58.13, "learning_rate": 2.094513448267877e-05, "loss": 1.972, "step": 20082500 }, { "epoch": 58.13, "learning_rate": 2.0944410835031493e-05, "loss": 1.9463, "step": 20083000 }, { "epoch": 58.13, "learning_rate": 2.0943688634679512e-05, "loss": 1.9431, "step": 20083500 }, { "epoch": 58.13, "learning_rate": 2.0942964987032234e-05, "loss": 1.9464, "step": 20084000 }, { "epoch": 58.14, "learning_rate": 2.0942242786680253e-05, "loss": 1.9631, "step": 20084500 }, { "epoch": 58.14, "learning_rate": 2.0941519139032975e-05, "loss": 1.9498, "step": 20085000 }, { "epoch": 58.14, "learning_rate": 2.09407954913857e-05, "loss": 1.9484, "step": 20085500 }, { "epoch": 58.14, "learning_rate": 2.0940071843738423e-05, "loss": 1.9203, "step": 20086000 }, { "epoch": 58.14, "learning_rate": 2.0939348196091145e-05, "loss": 1.933, "step": 20086500 }, { "epoch": 58.14, "learning_rate": 2.0938625995739164e-05, "loss": 1.9238, "step": 20087000 }, { "epoch": 58.15, "learning_rate": 2.0937902348091886e-05, "loss": 1.9487, "step": 20087500 }, { "epoch": 58.15, "learning_rate": 2.093717870044461e-05, "loss": 1.9413, "step": 20088000 }, { "epoch": 58.15, "learning_rate": 2.093645505279733e-05, "loss": 1.9495, "step": 20088500 }, { "epoch": 58.15, "learning_rate": 2.0935731405150057e-05, "loss": 1.9487, "step": 20089000 }, { "epoch": 58.15, "learning_rate": 2.0935007757502782e-05, "loss": 1.957, "step": 20089500 }, { "epoch": 58.15, "learning_rate": 2.0934284109855504e-05, "loss": 1.9503, "step": 20090000 }, { "epoch": 58.15, "learning_rate": 2.0933560462208227e-05, "loss": 1.9597, "step": 20090500 }, { "epoch": 58.16, "learning_rate": 2.0932838261856246e-05, "loss": 1.9676, "step": 20091000 }, { "epoch": 58.16, "learning_rate": 2.0932114614208968e-05, "loss": 1.9425, "step": 20091500 }, { "epoch": 58.16, "learning_rate": 2.0931392413856983e-05, "loss": 1.9225, "step": 20092000 }, { "epoch": 58.16, "learning_rate": 2.0930668766209706e-05, "loss": 1.9783, "step": 20092500 }, { "epoch": 58.16, "learning_rate": 2.092994511856243e-05, "loss": 1.943, "step": 20093000 }, { "epoch": 58.16, "learning_rate": 2.0929221470915157e-05, "loss": 1.9642, "step": 20093500 }, { "epoch": 58.16, "learning_rate": 2.092849782326788e-05, "loss": 1.9329, "step": 20094000 }, { "epoch": 58.17, "learning_rate": 2.09277741756206e-05, "loss": 1.9493, "step": 20094500 }, { "epoch": 58.17, "learning_rate": 2.0927050527973324e-05, "loss": 1.9423, "step": 20095000 }, { "epoch": 58.17, "learning_rate": 2.0926326880326046e-05, "loss": 1.9514, "step": 20095500 }, { "epoch": 58.17, "learning_rate": 2.092560323267877e-05, "loss": 1.9319, "step": 20096000 }, { "epoch": 58.17, "learning_rate": 2.0924879585031497e-05, "loss": 1.9518, "step": 20096500 }, { "epoch": 58.17, "learning_rate": 2.092415593738422e-05, "loss": 1.9422, "step": 20097000 }, { "epoch": 58.17, "learning_rate": 2.092343228973694e-05, "loss": 1.9457, "step": 20097500 }, { "epoch": 58.18, "learning_rate": 2.0922708642089664e-05, "loss": 1.9639, "step": 20098000 }, { "epoch": 58.18, "learning_rate": 2.0921986441737683e-05, "loss": 1.9403, "step": 20098500 }, { "epoch": 58.18, "learning_rate": 2.0921262794090405e-05, "loss": 1.9481, "step": 20099000 }, { "epoch": 58.18, "learning_rate": 2.0920539146443127e-05, "loss": 1.9679, "step": 20099500 }, { "epoch": 58.18, "learning_rate": 2.091981549879585e-05, "loss": 1.9556, "step": 20100000 }, { "epoch": 58.18, "learning_rate": 2.0919093298443872e-05, "loss": 1.955, "step": 20100500 }, { "epoch": 58.18, "learning_rate": 2.0918369650796594e-05, "loss": 1.9502, "step": 20101000 }, { "epoch": 58.19, "learning_rate": 2.0917646003149316e-05, "loss": 1.9502, "step": 20101500 }, { "epoch": 58.19, "learning_rate": 2.0916923802797335e-05, "loss": 1.9516, "step": 20102000 }, { "epoch": 58.19, "learning_rate": 2.0916200155150057e-05, "loss": 1.9671, "step": 20102500 }, { "epoch": 58.19, "learning_rate": 2.091547650750278e-05, "loss": 1.9331, "step": 20103000 }, { "epoch": 58.19, "learning_rate": 2.0914754307150795e-05, "loss": 1.9707, "step": 20103500 }, { "epoch": 58.19, "learning_rate": 2.091403065950352e-05, "loss": 1.9536, "step": 20104000 }, { "epoch": 58.19, "learning_rate": 2.0913307011856246e-05, "loss": 1.939, "step": 20104500 }, { "epoch": 58.2, "learning_rate": 2.091258336420897e-05, "loss": 1.9889, "step": 20105000 }, { "epoch": 58.2, "learning_rate": 2.091185971656169e-05, "loss": 1.9611, "step": 20105500 }, { "epoch": 58.2, "learning_rate": 2.0911136068914413e-05, "loss": 1.9507, "step": 20106000 }, { "epoch": 58.2, "learning_rate": 2.0910412421267135e-05, "loss": 1.9548, "step": 20106500 }, { "epoch": 58.2, "learning_rate": 2.0909690220915154e-05, "loss": 1.9535, "step": 20107000 }, { "epoch": 58.2, "learning_rate": 2.0908966573267877e-05, "loss": 1.9493, "step": 20107500 }, { "epoch": 58.2, "learning_rate": 2.0908242925620602e-05, "loss": 1.9411, "step": 20108000 }, { "epoch": 58.21, "learning_rate": 2.0907519277973324e-05, "loss": 1.9552, "step": 20108500 }, { "epoch": 58.21, "learning_rate": 2.0906795630326047e-05, "loss": 1.9482, "step": 20109000 }, { "epoch": 58.21, "learning_rate": 2.0906073429974066e-05, "loss": 1.9463, "step": 20109500 }, { "epoch": 58.21, "learning_rate": 2.0905349782326788e-05, "loss": 1.9617, "step": 20110000 }, { "epoch": 58.21, "learning_rate": 2.090462613467951e-05, "loss": 1.9305, "step": 20110500 }, { "epoch": 58.21, "learning_rate": 2.0903902487032236e-05, "loss": 1.9801, "step": 20111000 }, { "epoch": 58.21, "learning_rate": 2.0903178839384958e-05, "loss": 1.9664, "step": 20111500 }, { "epoch": 58.22, "learning_rate": 2.0902455191737683e-05, "loss": 1.9487, "step": 20112000 }, { "epoch": 58.22, "learning_rate": 2.0901731544090406e-05, "loss": 1.9637, "step": 20112500 }, { "epoch": 58.22, "learning_rate": 2.0901007896443128e-05, "loss": 1.945, "step": 20113000 }, { "epoch": 58.22, "learning_rate": 2.090028424879585e-05, "loss": 1.9478, "step": 20113500 }, { "epoch": 58.22, "learning_rate": 2.0899560601148572e-05, "loss": 1.9591, "step": 20114000 }, { "epoch": 58.22, "learning_rate": 2.089883840079659e-05, "loss": 1.9653, "step": 20114500 }, { "epoch": 58.22, "learning_rate": 2.0898114753149314e-05, "loss": 1.9626, "step": 20115000 }, { "epoch": 58.23, "learning_rate": 2.089739110550204e-05, "loss": 1.9296, "step": 20115500 }, { "epoch": 58.23, "learning_rate": 2.089666745785476e-05, "loss": 1.9642, "step": 20116000 }, { "epoch": 58.23, "learning_rate": 2.0895943810207487e-05, "loss": 1.9734, "step": 20116500 }, { "epoch": 58.23, "learning_rate": 2.089522016256021e-05, "loss": 1.9383, "step": 20117000 }, { "epoch": 58.23, "learning_rate": 2.089449651491293e-05, "loss": 1.9666, "step": 20117500 }, { "epoch": 58.23, "learning_rate": 2.0893772867265654e-05, "loss": 1.9575, "step": 20118000 }, { "epoch": 58.23, "learning_rate": 2.0893050666913673e-05, "loss": 1.943, "step": 20118500 }, { "epoch": 58.24, "learning_rate": 2.08923270192664e-05, "loss": 1.9397, "step": 20119000 }, { "epoch": 58.24, "learning_rate": 2.089160337161912e-05, "loss": 1.9535, "step": 20119500 }, { "epoch": 58.24, "learning_rate": 2.0890879723971843e-05, "loss": 1.9455, "step": 20120000 }, { "epoch": 58.24, "learning_rate": 2.0890157523619862e-05, "loss": 1.9538, "step": 20120500 }, { "epoch": 58.24, "learning_rate": 2.0889433875972584e-05, "loss": 1.9429, "step": 20121000 }, { "epoch": 58.24, "learning_rate": 2.0888710228325306e-05, "loss": 1.9751, "step": 20121500 }, { "epoch": 58.24, "learning_rate": 2.088798658067803e-05, "loss": 1.9561, "step": 20122000 }, { "epoch": 58.25, "learning_rate": 2.088726293303075e-05, "loss": 1.9714, "step": 20122500 }, { "epoch": 58.25, "learning_rate": 2.0886539285383476e-05, "loss": 1.9916, "step": 20123000 }, { "epoch": 58.25, "learning_rate": 2.0885817085031495e-05, "loss": 1.9506, "step": 20123500 }, { "epoch": 58.25, "learning_rate": 2.0885093437384217e-05, "loss": 1.9493, "step": 20124000 }, { "epoch": 58.25, "learning_rate": 2.0884371237032236e-05, "loss": 1.9616, "step": 20124500 }, { "epoch": 58.25, "learning_rate": 2.0883649036680252e-05, "loss": 1.9537, "step": 20125000 }, { "epoch": 58.26, "learning_rate": 2.0882925389032974e-05, "loss": 1.9377, "step": 20125500 }, { "epoch": 58.26, "learning_rate": 2.08822017413857e-05, "loss": 1.9953, "step": 20126000 }, { "epoch": 58.26, "learning_rate": 2.0881478093738422e-05, "loss": 1.9819, "step": 20126500 }, { "epoch": 58.26, "learning_rate": 2.0880754446091148e-05, "loss": 1.9556, "step": 20127000 }, { "epoch": 58.26, "learning_rate": 2.088003079844387e-05, "loss": 1.9485, "step": 20127500 }, { "epoch": 58.26, "learning_rate": 2.0879307150796592e-05, "loss": 1.9512, "step": 20128000 }, { "epoch": 58.26, "learning_rate": 2.0878583503149314e-05, "loss": 1.9305, "step": 20128500 }, { "epoch": 58.27, "learning_rate": 2.0877859855502037e-05, "loss": 1.9554, "step": 20129000 }, { "epoch": 58.27, "learning_rate": 2.0877136207854762e-05, "loss": 1.9511, "step": 20129500 }, { "epoch": 58.27, "learning_rate": 2.0876412560207484e-05, "loss": 1.9422, "step": 20130000 }, { "epoch": 58.27, "learning_rate": 2.0875690359855503e-05, "loss": 1.9493, "step": 20130500 }, { "epoch": 58.27, "learning_rate": 2.0874966712208226e-05, "loss": 1.9492, "step": 20131000 }, { "epoch": 58.27, "learning_rate": 2.087424306456095e-05, "loss": 1.9517, "step": 20131500 }, { "epoch": 58.27, "learning_rate": 2.0873519416913674e-05, "loss": 1.9604, "step": 20132000 }, { "epoch": 58.28, "learning_rate": 2.0872795769266396e-05, "loss": 1.9577, "step": 20132500 }, { "epoch": 58.28, "learning_rate": 2.0872072121619118e-05, "loss": 1.9585, "step": 20133000 }, { "epoch": 58.28, "learning_rate": 2.087134847397184e-05, "loss": 1.9407, "step": 20133500 }, { "epoch": 58.28, "learning_rate": 2.0870624826324566e-05, "loss": 1.9521, "step": 20134000 }, { "epoch": 58.28, "learning_rate": 2.0869902625972585e-05, "loss": 1.972, "step": 20134500 }, { "epoch": 58.28, "learning_rate": 2.0869178978325307e-05, "loss": 1.9464, "step": 20135000 }, { "epoch": 58.28, "learning_rate": 2.086845533067803e-05, "loss": 1.9442, "step": 20135500 }, { "epoch": 58.29, "learning_rate": 2.086773168303075e-05, "loss": 1.9533, "step": 20136000 }, { "epoch": 58.29, "learning_rate": 2.086700948267877e-05, "loss": 1.9582, "step": 20136500 }, { "epoch": 58.29, "learning_rate": 2.0866285835031493e-05, "loss": 1.9603, "step": 20137000 }, { "epoch": 58.29, "learning_rate": 2.0865562187384215e-05, "loss": 1.9491, "step": 20137500 }, { "epoch": 58.29, "learning_rate": 2.086483853973694e-05, "loss": 1.9428, "step": 20138000 }, { "epoch": 58.29, "learning_rate": 2.0864114892089666e-05, "loss": 1.9488, "step": 20138500 }, { "epoch": 58.29, "learning_rate": 2.086339124444239e-05, "loss": 1.9725, "step": 20139000 }, { "epoch": 58.3, "learning_rate": 2.086266759679511e-05, "loss": 1.9408, "step": 20139500 }, { "epoch": 58.3, "learning_rate": 2.0861943949147833e-05, "loss": 1.948, "step": 20140000 }, { "epoch": 58.3, "learning_rate": 2.0861221748795852e-05, "loss": 1.9447, "step": 20140500 }, { "epoch": 58.3, "learning_rate": 2.0860498101148574e-05, "loss": 1.9211, "step": 20141000 }, { "epoch": 58.3, "learning_rate": 2.08597744535013e-05, "loss": 1.956, "step": 20141500 }, { "epoch": 58.3, "learning_rate": 2.0859050805854022e-05, "loss": 1.9478, "step": 20142000 }, { "epoch": 58.3, "learning_rate": 2.0858327158206744e-05, "loss": 1.92, "step": 20142500 }, { "epoch": 58.31, "learning_rate": 2.0857603510559466e-05, "loss": 1.9693, "step": 20143000 }, { "epoch": 58.31, "learning_rate": 2.0856881310207485e-05, "loss": 1.9998, "step": 20143500 }, { "epoch": 58.31, "learning_rate": 2.0856157662560208e-05, "loss": 1.9555, "step": 20144000 }, { "epoch": 58.31, "learning_rate": 2.085543401491293e-05, "loss": 1.9447, "step": 20144500 }, { "epoch": 58.31, "learning_rate": 2.0854710367265655e-05, "loss": 1.9755, "step": 20145000 }, { "epoch": 58.31, "learning_rate": 2.0853986719618378e-05, "loss": 1.9789, "step": 20145500 }, { "epoch": 58.31, "learning_rate": 2.0853263071971103e-05, "loss": 1.9683, "step": 20146000 }, { "epoch": 58.32, "learning_rate": 2.0852539424323825e-05, "loss": 1.9993, "step": 20146500 }, { "epoch": 58.32, "learning_rate": 2.085181722397184e-05, "loss": 1.9432, "step": 20147000 }, { "epoch": 58.32, "learning_rate": 2.0851093576324567e-05, "loss": 1.9549, "step": 20147500 }, { "epoch": 58.32, "learning_rate": 2.085036992867729e-05, "loss": 1.9573, "step": 20148000 }, { "epoch": 58.32, "learning_rate": 2.084964628103001e-05, "loss": 1.9547, "step": 20148500 }, { "epoch": 58.32, "learning_rate": 2.0848922633382737e-05, "loss": 1.9802, "step": 20149000 }, { "epoch": 58.32, "learning_rate": 2.0848200433030756e-05, "loss": 1.9423, "step": 20149500 }, { "epoch": 58.33, "learning_rate": 2.0847476785383478e-05, "loss": 1.9566, "step": 20150000 }, { "epoch": 58.33, "learning_rate": 2.08467531377362e-05, "loss": 1.9421, "step": 20150500 }, { "epoch": 58.33, "learning_rate": 2.0846029490088922e-05, "loss": 1.9574, "step": 20151000 }, { "epoch": 58.33, "learning_rate": 2.0845305842441645e-05, "loss": 1.9485, "step": 20151500 }, { "epoch": 58.33, "learning_rate": 2.0844582194794367e-05, "loss": 1.9567, "step": 20152000 }, { "epoch": 58.33, "learning_rate": 2.0843858547147092e-05, "loss": 1.9743, "step": 20152500 }, { "epoch": 58.33, "learning_rate": 2.0843134899499818e-05, "loss": 1.9677, "step": 20153000 }, { "epoch": 58.34, "learning_rate": 2.084241125185254e-05, "loss": 1.9665, "step": 20153500 }, { "epoch": 58.34, "learning_rate": 2.0841687604205263e-05, "loss": 1.9451, "step": 20154000 }, { "epoch": 58.34, "learning_rate": 2.0840963956557985e-05, "loss": 1.9459, "step": 20154500 }, { "epoch": 58.34, "learning_rate": 2.0840240308910707e-05, "loss": 1.9464, "step": 20155000 }, { "epoch": 58.34, "learning_rate": 2.0839518108558726e-05, "loss": 1.9538, "step": 20155500 }, { "epoch": 58.34, "learning_rate": 2.083879446091145e-05, "loss": 1.9662, "step": 20156000 }, { "epoch": 58.34, "learning_rate": 2.0838070813264174e-05, "loss": 1.9547, "step": 20156500 }, { "epoch": 58.35, "learning_rate": 2.0837347165616896e-05, "loss": 1.9581, "step": 20157000 }, { "epoch": 58.35, "learning_rate": 2.0836623517969618e-05, "loss": 1.9401, "step": 20157500 }, { "epoch": 58.35, "learning_rate": 2.0835899870322344e-05, "loss": 1.9359, "step": 20158000 }, { "epoch": 58.35, "learning_rate": 2.083517766997036e-05, "loss": 1.9505, "step": 20158500 }, { "epoch": 58.35, "learning_rate": 2.0834454022323082e-05, "loss": 1.9607, "step": 20159000 }, { "epoch": 58.35, "learning_rate": 2.0833730374675804e-05, "loss": 1.9797, "step": 20159500 }, { "epoch": 58.35, "learning_rate": 2.083300672702853e-05, "loss": 1.9693, "step": 20160000 }, { "epoch": 58.36, "learning_rate": 2.083228452667655e-05, "loss": 1.9591, "step": 20160500 }, { "epoch": 58.36, "learning_rate": 2.083156087902927e-05, "loss": 1.9565, "step": 20161000 }, { "epoch": 58.36, "learning_rate": 2.0830837231381993e-05, "loss": 1.9478, "step": 20161500 }, { "epoch": 58.36, "learning_rate": 2.083011358373472e-05, "loss": 1.9256, "step": 20162000 }, { "epoch": 58.36, "learning_rate": 2.082938993608744e-05, "loss": 1.9396, "step": 20162500 }, { "epoch": 58.36, "learning_rate": 2.0828666288440163e-05, "loss": 1.98, "step": 20163000 }, { "epoch": 58.37, "learning_rate": 2.082794264079289e-05, "loss": 1.9623, "step": 20163500 }, { "epoch": 58.37, "learning_rate": 2.0827220440440908e-05, "loss": 1.9628, "step": 20164000 }, { "epoch": 58.37, "learning_rate": 2.082649679279363e-05, "loss": 1.931, "step": 20164500 }, { "epoch": 58.37, "learning_rate": 2.0825773145146352e-05, "loss": 1.9573, "step": 20165000 }, { "epoch": 58.37, "learning_rate": 2.0825050944794368e-05, "loss": 1.9494, "step": 20165500 }, { "epoch": 58.37, "learning_rate": 2.0824327297147093e-05, "loss": 1.934, "step": 20166000 }, { "epoch": 58.37, "learning_rate": 2.0823603649499815e-05, "loss": 1.9515, "step": 20166500 }, { "epoch": 58.38, "learning_rate": 2.0822880001852538e-05, "loss": 1.9609, "step": 20167000 }, { "epoch": 58.38, "learning_rate": 2.0822156354205263e-05, "loss": 1.9297, "step": 20167500 }, { "epoch": 58.38, "learning_rate": 2.0821434153853282e-05, "loss": 1.942, "step": 20168000 }, { "epoch": 58.38, "learning_rate": 2.0820710506206005e-05, "loss": 1.9562, "step": 20168500 }, { "epoch": 58.38, "learning_rate": 2.0819986858558727e-05, "loss": 1.9667, "step": 20169000 }, { "epoch": 58.38, "learning_rate": 2.081926321091145e-05, "loss": 1.9511, "step": 20169500 }, { "epoch": 58.38, "learning_rate": 2.081853956326417e-05, "loss": 1.9687, "step": 20170000 }, { "epoch": 58.39, "learning_rate": 2.0817815915616893e-05, "loss": 1.9632, "step": 20170500 }, { "epoch": 58.39, "learning_rate": 2.081709226796962e-05, "loss": 1.9483, "step": 20171000 }, { "epoch": 58.39, "learning_rate": 2.0816368620322345e-05, "loss": 1.9271, "step": 20171500 }, { "epoch": 58.39, "learning_rate": 2.0815644972675067e-05, "loss": 1.9864, "step": 20172000 }, { "epoch": 58.39, "learning_rate": 2.081492421961838e-05, "loss": 1.9382, "step": 20172500 }, { "epoch": 58.39, "learning_rate": 2.08142005719711e-05, "loss": 1.9285, "step": 20173000 }, { "epoch": 58.39, "learning_rate": 2.0813476924323824e-05, "loss": 1.9505, "step": 20173500 }, { "epoch": 58.4, "learning_rate": 2.0812753276676546e-05, "loss": 1.9495, "step": 20174000 }, { "epoch": 58.4, "learning_rate": 2.081202962902927e-05, "loss": 1.959, "step": 20174500 }, { "epoch": 58.4, "learning_rate": 2.0811305981381997e-05, "loss": 1.9737, "step": 20175000 }, { "epoch": 58.4, "learning_rate": 2.081058233373472e-05, "loss": 1.9541, "step": 20175500 }, { "epoch": 58.4, "learning_rate": 2.080985868608744e-05, "loss": 1.9759, "step": 20176000 }, { "epoch": 58.4, "learning_rate": 2.0809135038440164e-05, "loss": 1.9648, "step": 20176500 }, { "epoch": 58.4, "learning_rate": 2.0808411390792886e-05, "loss": 1.9587, "step": 20177000 }, { "epoch": 58.41, "learning_rate": 2.080768774314561e-05, "loss": 1.969, "step": 20177500 }, { "epoch": 58.41, "learning_rate": 2.0806964095498334e-05, "loss": 1.9648, "step": 20178000 }, { "epoch": 58.41, "learning_rate": 2.080624044785106e-05, "loss": 1.9694, "step": 20178500 }, { "epoch": 58.41, "learning_rate": 2.0805518247499075e-05, "loss": 1.9447, "step": 20179000 }, { "epoch": 58.41, "learning_rate": 2.0804796047147094e-05, "loss": 1.9518, "step": 20179500 }, { "epoch": 58.41, "learning_rate": 2.0804072399499816e-05, "loss": 1.9536, "step": 20180000 }, { "epoch": 58.41, "learning_rate": 2.080334875185254e-05, "loss": 1.9334, "step": 20180500 }, { "epoch": 58.42, "learning_rate": 2.0802626551500557e-05, "loss": 1.9574, "step": 20181000 }, { "epoch": 58.42, "learning_rate": 2.080190290385328e-05, "loss": 1.9584, "step": 20181500 }, { "epoch": 58.42, "learning_rate": 2.0801179256206002e-05, "loss": 1.9515, "step": 20182000 }, { "epoch": 58.42, "learning_rate": 2.0800455608558728e-05, "loss": 1.9677, "step": 20182500 }, { "epoch": 58.42, "learning_rate": 2.079973196091145e-05, "loss": 1.9505, "step": 20183000 }, { "epoch": 58.42, "learning_rate": 2.079900976055947e-05, "loss": 1.9584, "step": 20183500 }, { "epoch": 58.42, "learning_rate": 2.079828611291219e-05, "loss": 1.9457, "step": 20184000 }, { "epoch": 58.43, "learning_rate": 2.0797562465264913e-05, "loss": 1.9404, "step": 20184500 }, { "epoch": 58.43, "learning_rate": 2.0796838817617635e-05, "loss": 1.9727, "step": 20185000 }, { "epoch": 58.43, "learning_rate": 2.0796115169970358e-05, "loss": 1.9615, "step": 20185500 }, { "epoch": 58.43, "learning_rate": 2.0795391522323083e-05, "loss": 1.9403, "step": 20186000 }, { "epoch": 58.43, "learning_rate": 2.079466787467581e-05, "loss": 1.9741, "step": 20186500 }, { "epoch": 58.43, "learning_rate": 2.079394422702853e-05, "loss": 1.9636, "step": 20187000 }, { "epoch": 58.43, "learning_rate": 2.0793222026676547e-05, "loss": 1.9442, "step": 20187500 }, { "epoch": 58.44, "learning_rate": 2.0792498379029272e-05, "loss": 1.9396, "step": 20188000 }, { "epoch": 58.44, "learning_rate": 2.0791774731381995e-05, "loss": 1.9693, "step": 20188500 }, { "epoch": 58.44, "learning_rate": 2.0791051083734717e-05, "loss": 1.9685, "step": 20189000 }, { "epoch": 58.44, "learning_rate": 2.079032743608744e-05, "loss": 1.9689, "step": 20189500 }, { "epoch": 58.44, "learning_rate": 2.078960523573546e-05, "loss": 1.9764, "step": 20190000 }, { "epoch": 58.44, "learning_rate": 2.0788881588088184e-05, "loss": 1.9411, "step": 20190500 }, { "epoch": 58.44, "learning_rate": 2.0788157940440906e-05, "loss": 1.9715, "step": 20191000 }, { "epoch": 58.45, "learning_rate": 2.0787434292793628e-05, "loss": 1.972, "step": 20191500 }, { "epoch": 58.45, "learning_rate": 2.078671064514635e-05, "loss": 1.9898, "step": 20192000 }, { "epoch": 58.45, "learning_rate": 2.0785986997499073e-05, "loss": 1.9422, "step": 20192500 }, { "epoch": 58.45, "learning_rate": 2.0785263349851798e-05, "loss": 1.956, "step": 20193000 }, { "epoch": 58.45, "learning_rate": 2.0784539702204524e-05, "loss": 1.9542, "step": 20193500 }, { "epoch": 58.45, "learning_rate": 2.0783816054557246e-05, "loss": 1.9565, "step": 20194000 }, { "epoch": 58.45, "learning_rate": 2.0783092406909968e-05, "loss": 1.9592, "step": 20194500 }, { "epoch": 58.46, "learning_rate": 2.0782370206557987e-05, "loss": 1.9749, "step": 20195000 }, { "epoch": 58.46, "learning_rate": 2.078164655891071e-05, "loss": 1.9478, "step": 20195500 }, { "epoch": 58.46, "learning_rate": 2.078092291126343e-05, "loss": 1.9252, "step": 20196000 }, { "epoch": 58.46, "learning_rate": 2.0780199263616154e-05, "loss": 1.9227, "step": 20196500 }, { "epoch": 58.46, "learning_rate": 2.0779477063264173e-05, "loss": 1.9556, "step": 20197000 }, { "epoch": 58.46, "learning_rate": 2.07787534156169e-05, "loss": 1.9589, "step": 20197500 }, { "epoch": 58.46, "learning_rate": 2.077802976796962e-05, "loss": 1.9515, "step": 20198000 }, { "epoch": 58.47, "learning_rate": 2.0777306120322343e-05, "loss": 1.9522, "step": 20198500 }, { "epoch": 58.47, "learning_rate": 2.0776582472675065e-05, "loss": 1.9496, "step": 20199000 }, { "epoch": 58.47, "learning_rate": 2.0775858825027787e-05, "loss": 1.9362, "step": 20199500 }, { "epoch": 58.47, "learning_rate": 2.0775135177380513e-05, "loss": 1.9341, "step": 20200000 }, { "epoch": 58.47, "learning_rate": 2.0774411529733235e-05, "loss": 1.9461, "step": 20200500 }, { "epoch": 58.47, "learning_rate": 2.077368788208596e-05, "loss": 1.9678, "step": 20201000 }, { "epoch": 58.48, "learning_rate": 2.0772964234438683e-05, "loss": 1.932, "step": 20201500 }, { "epoch": 58.48, "learning_rate": 2.07722420340867e-05, "loss": 1.9575, "step": 20202000 }, { "epoch": 58.48, "learning_rate": 2.0771518386439424e-05, "loss": 1.9688, "step": 20202500 }, { "epoch": 58.48, "learning_rate": 2.0770794738792146e-05, "loss": 1.9436, "step": 20203000 }, { "epoch": 58.48, "learning_rate": 2.077007109114487e-05, "loss": 1.9683, "step": 20203500 }, { "epoch": 58.48, "learning_rate": 2.076934744349759e-05, "loss": 1.9444, "step": 20204000 }, { "epoch": 58.48, "learning_rate": 2.0768623795850317e-05, "loss": 1.9396, "step": 20204500 }, { "epoch": 58.49, "learning_rate": 2.076790014820304e-05, "loss": 1.9478, "step": 20205000 }, { "epoch": 58.49, "learning_rate": 2.0767176500555764e-05, "loss": 1.9535, "step": 20205500 }, { "epoch": 58.49, "learning_rate": 2.0766455747499077e-05, "loss": 1.9622, "step": 20206000 }, { "epoch": 58.49, "learning_rate": 2.07657320998518e-05, "loss": 1.9601, "step": 20206500 }, { "epoch": 58.49, "learning_rate": 2.076500845220452e-05, "loss": 1.9923, "step": 20207000 }, { "epoch": 58.49, "learning_rate": 2.0764284804557243e-05, "loss": 1.9399, "step": 20207500 }, { "epoch": 58.49, "learning_rate": 2.0763561156909966e-05, "loss": 1.9767, "step": 20208000 }, { "epoch": 58.5, "learning_rate": 2.076283750926269e-05, "loss": 1.9457, "step": 20208500 }, { "epoch": 58.5, "learning_rate": 2.0762113861615413e-05, "loss": 1.9285, "step": 20209000 }, { "epoch": 58.5, "learning_rate": 2.076139021396814e-05, "loss": 1.9557, "step": 20209500 }, { "epoch": 58.5, "learning_rate": 2.076066656632086e-05, "loss": 1.986, "step": 20210000 }, { "epoch": 58.5, "learning_rate": 2.0759944365968877e-05, "loss": 1.9399, "step": 20210500 }, { "epoch": 58.5, "learning_rate": 2.0759220718321603e-05, "loss": 1.9591, "step": 20211000 }, { "epoch": 58.5, "learning_rate": 2.0758499965264915e-05, "loss": 1.9679, "step": 20211500 }, { "epoch": 58.51, "learning_rate": 2.0757776317617637e-05, "loss": 1.969, "step": 20212000 }, { "epoch": 58.51, "learning_rate": 2.0757052669970363e-05, "loss": 1.9617, "step": 20212500 }, { "epoch": 58.51, "learning_rate": 2.0756329022323085e-05, "loss": 1.9461, "step": 20213000 }, { "epoch": 58.51, "learning_rate": 2.0755605374675807e-05, "loss": 1.9518, "step": 20213500 }, { "epoch": 58.51, "learning_rate": 2.075488172702853e-05, "loss": 1.9872, "step": 20214000 }, { "epoch": 58.51, "learning_rate": 2.075415807938125e-05, "loss": 1.9346, "step": 20214500 }, { "epoch": 58.51, "learning_rate": 2.0753434431733977e-05, "loss": 1.9446, "step": 20215000 }, { "epoch": 58.52, "learning_rate": 2.0752712231381993e-05, "loss": 1.9577, "step": 20215500 }, { "epoch": 58.52, "learning_rate": 2.075198858373472e-05, "loss": 1.9706, "step": 20216000 }, { "epoch": 58.52, "learning_rate": 2.075126493608744e-05, "loss": 1.9714, "step": 20216500 }, { "epoch": 58.52, "learning_rate": 2.075054273573546e-05, "loss": 1.9501, "step": 20217000 }, { "epoch": 58.52, "learning_rate": 2.074982053538348e-05, "loss": 1.9665, "step": 20217500 }, { "epoch": 58.52, "learning_rate": 2.07490968877362e-05, "loss": 1.9478, "step": 20218000 }, { "epoch": 58.52, "learning_rate": 2.0748373240088923e-05, "loss": 1.9275, "step": 20218500 }, { "epoch": 58.53, "learning_rate": 2.0747649592441645e-05, "loss": 1.9729, "step": 20219000 }, { "epoch": 58.53, "learning_rate": 2.0746925944794367e-05, "loss": 1.9515, "step": 20219500 }, { "epoch": 58.53, "learning_rate": 2.0746202297147093e-05, "loss": 1.9265, "step": 20220000 }, { "epoch": 58.53, "learning_rate": 2.0745478649499815e-05, "loss": 1.9707, "step": 20220500 }, { "epoch": 58.53, "learning_rate": 2.074475500185254e-05, "loss": 1.9705, "step": 20221000 }, { "epoch": 58.53, "learning_rate": 2.0744031354205263e-05, "loss": 1.9365, "step": 20221500 }, { "epoch": 58.53, "learning_rate": 2.0743307706557985e-05, "loss": 1.9621, "step": 20222000 }, { "epoch": 58.54, "learning_rate": 2.0742584058910708e-05, "loss": 1.9622, "step": 20222500 }, { "epoch": 58.54, "learning_rate": 2.074186041126343e-05, "loss": 1.9596, "step": 20223000 }, { "epoch": 58.54, "learning_rate": 2.0741136763616155e-05, "loss": 1.9569, "step": 20223500 }, { "epoch": 58.54, "learning_rate": 2.0740413115968878e-05, "loss": 1.9286, "step": 20224000 }, { "epoch": 58.54, "learning_rate": 2.0739689468321603e-05, "loss": 1.9216, "step": 20224500 }, { "epoch": 58.54, "learning_rate": 2.0738965820674326e-05, "loss": 1.9636, "step": 20225000 }, { "epoch": 58.54, "learning_rate": 2.0738242173027048e-05, "loss": 1.9481, "step": 20225500 }, { "epoch": 58.55, "learning_rate": 2.073751852537977e-05, "loss": 1.9491, "step": 20226000 }, { "epoch": 58.55, "learning_rate": 2.073679632502779e-05, "loss": 1.962, "step": 20226500 }, { "epoch": 58.55, "learning_rate": 2.0736072677380515e-05, "loss": 1.9241, "step": 20227000 }, { "epoch": 58.55, "learning_rate": 2.0735349029733237e-05, "loss": 1.9502, "step": 20227500 }, { "epoch": 58.55, "learning_rate": 2.073462538208596e-05, "loss": 1.9661, "step": 20228000 }, { "epoch": 58.55, "learning_rate": 2.0733903181733978e-05, "loss": 1.9553, "step": 20228500 }, { "epoch": 58.55, "learning_rate": 2.07331795340867e-05, "loss": 1.9625, "step": 20229000 }, { "epoch": 58.56, "learning_rate": 2.0732455886439422e-05, "loss": 1.9471, "step": 20229500 }, { "epoch": 58.56, "learning_rate": 2.0731732238792145e-05, "loss": 1.9708, "step": 20230000 }, { "epoch": 58.56, "learning_rate": 2.0731008591144867e-05, "loss": 1.9424, "step": 20230500 }, { "epoch": 58.56, "learning_rate": 2.0730284943497593e-05, "loss": 1.9732, "step": 20231000 }, { "epoch": 58.56, "learning_rate": 2.072956274314561e-05, "loss": 1.962, "step": 20231500 }, { "epoch": 58.56, "learning_rate": 2.0728839095498334e-05, "loss": 1.9379, "step": 20232000 }, { "epoch": 58.56, "learning_rate": 2.0728115447851056e-05, "loss": 1.9663, "step": 20232500 }, { "epoch": 58.57, "learning_rate": 2.0727391800203778e-05, "loss": 1.9528, "step": 20233000 }, { "epoch": 58.57, "learning_rate": 2.0726668152556504e-05, "loss": 1.9629, "step": 20233500 }, { "epoch": 58.57, "learning_rate": 2.0725944504909226e-05, "loss": 1.9465, "step": 20234000 }, { "epoch": 58.57, "learning_rate": 2.072522085726195e-05, "loss": 1.9669, "step": 20234500 }, { "epoch": 58.57, "learning_rate": 2.0724497209614674e-05, "loss": 1.953, "step": 20235000 }, { "epoch": 58.57, "learning_rate": 2.0723776456557986e-05, "loss": 1.9531, "step": 20235500 }, { "epoch": 58.57, "learning_rate": 2.072305280891071e-05, "loss": 1.9795, "step": 20236000 }, { "epoch": 58.58, "learning_rate": 2.072232916126343e-05, "loss": 1.9461, "step": 20236500 }, { "epoch": 58.58, "learning_rate": 2.0721605513616156e-05, "loss": 1.9677, "step": 20237000 }, { "epoch": 58.58, "learning_rate": 2.072088186596888e-05, "loss": 1.9702, "step": 20237500 }, { "epoch": 58.58, "learning_rate": 2.0720159665616894e-05, "loss": 1.9802, "step": 20238000 }, { "epoch": 58.58, "learning_rate": 2.071943601796962e-05, "loss": 1.9635, "step": 20238500 }, { "epoch": 58.58, "learning_rate": 2.0718712370322342e-05, "loss": 1.9694, "step": 20239000 }, { "epoch": 58.59, "learning_rate": 2.0717988722675068e-05, "loss": 1.9674, "step": 20239500 }, { "epoch": 58.59, "learning_rate": 2.071726507502779e-05, "loss": 1.9321, "step": 20240000 }, { "epoch": 58.59, "learning_rate": 2.0716541427380512e-05, "loss": 1.9527, "step": 20240500 }, { "epoch": 58.59, "learning_rate": 2.0715817779733234e-05, "loss": 1.9583, "step": 20241000 }, { "epoch": 58.59, "learning_rate": 2.0715095579381253e-05, "loss": 1.9695, "step": 20241500 }, { "epoch": 58.59, "learning_rate": 2.0714371931733975e-05, "loss": 1.9531, "step": 20242000 }, { "epoch": 58.59, "learning_rate": 2.07136482840867e-05, "loss": 1.9641, "step": 20242500 }, { "epoch": 58.6, "learning_rate": 2.0712924636439423e-05, "loss": 1.9649, "step": 20243000 }, { "epoch": 58.6, "learning_rate": 2.0712200988792145e-05, "loss": 1.9695, "step": 20243500 }, { "epoch": 58.6, "learning_rate": 2.0711477341144868e-05, "loss": 1.9512, "step": 20244000 }, { "epoch": 58.6, "learning_rate": 2.0710755140792887e-05, "loss": 1.9818, "step": 20244500 }, { "epoch": 58.6, "learning_rate": 2.0710032940440906e-05, "loss": 1.9621, "step": 20245000 }, { "epoch": 58.6, "learning_rate": 2.0709309292793628e-05, "loss": 1.9413, "step": 20245500 }, { "epoch": 58.6, "learning_rate": 2.0708585645146353e-05, "loss": 1.9686, "step": 20246000 }, { "epoch": 58.61, "learning_rate": 2.0707861997499076e-05, "loss": 1.9582, "step": 20246500 }, { "epoch": 58.61, "learning_rate": 2.0707139797147095e-05, "loss": 1.9545, "step": 20247000 }, { "epoch": 58.61, "learning_rate": 2.0706416149499817e-05, "loss": 1.9621, "step": 20247500 }, { "epoch": 58.61, "learning_rate": 2.070569250185254e-05, "loss": 1.9679, "step": 20248000 }, { "epoch": 58.61, "learning_rate": 2.070496885420526e-05, "loss": 1.9754, "step": 20248500 }, { "epoch": 58.61, "learning_rate": 2.0704245206557984e-05, "loss": 1.9661, "step": 20249000 }, { "epoch": 58.61, "learning_rate": 2.0703521558910706e-05, "loss": 1.9584, "step": 20249500 }, { "epoch": 58.62, "learning_rate": 2.070279791126343e-05, "loss": 1.9483, "step": 20250000 }, { "epoch": 58.62, "learning_rate": 2.070207571091145e-05, "loss": 1.9697, "step": 20250500 }, { "epoch": 58.62, "learning_rate": 2.0701352063264173e-05, "loss": 1.9657, "step": 20251000 }, { "epoch": 58.62, "learning_rate": 2.070062986291219e-05, "loss": 1.9276, "step": 20251500 }, { "epoch": 58.62, "learning_rate": 2.0699906215264914e-05, "loss": 1.9603, "step": 20252000 }, { "epoch": 58.62, "learning_rate": 2.0699182567617636e-05, "loss": 1.9361, "step": 20252500 }, { "epoch": 58.62, "learning_rate": 2.0698458919970358e-05, "loss": 1.9757, "step": 20253000 }, { "epoch": 58.63, "learning_rate": 2.0697735272323084e-05, "loss": 1.9547, "step": 20253500 }, { "epoch": 58.63, "learning_rate": 2.0697011624675806e-05, "loss": 1.9737, "step": 20254000 }, { "epoch": 58.63, "learning_rate": 2.0696287977028532e-05, "loss": 1.9571, "step": 20254500 }, { "epoch": 58.63, "learning_rate": 2.0695564329381254e-05, "loss": 1.9499, "step": 20255000 }, { "epoch": 58.63, "learning_rate": 2.0694840681733976e-05, "loss": 1.9648, "step": 20255500 }, { "epoch": 58.63, "learning_rate": 2.06941170340867e-05, "loss": 1.9602, "step": 20256000 }, { "epoch": 58.63, "learning_rate": 2.069339338643942e-05, "loss": 1.9781, "step": 20256500 }, { "epoch": 58.64, "learning_rate": 2.0692669738792146e-05, "loss": 1.9497, "step": 20257000 }, { "epoch": 58.64, "learning_rate": 2.0691946091144872e-05, "loss": 1.9296, "step": 20257500 }, { "epoch": 58.64, "learning_rate": 2.0691222443497594e-05, "loss": 1.9537, "step": 20258000 }, { "epoch": 58.64, "learning_rate": 2.0690498795850316e-05, "loss": 1.9722, "step": 20258500 }, { "epoch": 58.64, "learning_rate": 2.068977514820304e-05, "loss": 1.9544, "step": 20259000 }, { "epoch": 58.64, "learning_rate": 2.068905150055576e-05, "loss": 1.9638, "step": 20259500 }, { "epoch": 58.64, "learning_rate": 2.0688327852908483e-05, "loss": 1.957, "step": 20260000 }, { "epoch": 58.65, "learning_rate": 2.0687605652556502e-05, "loss": 1.9563, "step": 20260500 }, { "epoch": 58.65, "learning_rate": 2.0686882004909228e-05, "loss": 1.9536, "step": 20261000 }, { "epoch": 58.65, "learning_rate": 2.068615835726195e-05, "loss": 1.9561, "step": 20261500 }, { "epoch": 58.65, "learning_rate": 2.0685434709614672e-05, "loss": 1.9626, "step": 20262000 }, { "epoch": 58.65, "learning_rate": 2.0684711061967398e-05, "loss": 1.9595, "step": 20262500 }, { "epoch": 58.65, "learning_rate": 2.068398741432012e-05, "loss": 1.975, "step": 20263000 }, { "epoch": 58.65, "learning_rate": 2.0683263766672842e-05, "loss": 1.9614, "step": 20263500 }, { "epoch": 58.66, "learning_rate": 2.0682541566320858e-05, "loss": 1.9418, "step": 20264000 }, { "epoch": 58.66, "learning_rate": 2.0681817918673583e-05, "loss": 1.9546, "step": 20264500 }, { "epoch": 58.66, "learning_rate": 2.068109427102631e-05, "loss": 1.982, "step": 20265000 }, { "epoch": 58.66, "learning_rate": 2.0680372070674325e-05, "loss": 1.9773, "step": 20265500 }, { "epoch": 58.66, "learning_rate": 2.0679648423027047e-05, "loss": 1.9391, "step": 20266000 }, { "epoch": 58.66, "learning_rate": 2.0678924775379772e-05, "loss": 1.955, "step": 20266500 }, { "epoch": 58.66, "learning_rate": 2.0678201127732495e-05, "loss": 1.9487, "step": 20267000 }, { "epoch": 58.67, "learning_rate": 2.0677477480085217e-05, "loss": 1.9309, "step": 20267500 }, { "epoch": 58.67, "learning_rate": 2.0676753832437942e-05, "loss": 1.9632, "step": 20268000 }, { "epoch": 58.67, "learning_rate": 2.0676030184790665e-05, "loss": 1.9609, "step": 20268500 }, { "epoch": 58.67, "learning_rate": 2.0675306537143387e-05, "loss": 1.95, "step": 20269000 }, { "epoch": 58.67, "learning_rate": 2.067458288949611e-05, "loss": 1.9598, "step": 20269500 }, { "epoch": 58.67, "learning_rate": 2.0673859241848835e-05, "loss": 1.9562, "step": 20270000 }, { "epoch": 58.67, "learning_rate": 2.067313704149685e-05, "loss": 1.9787, "step": 20270500 }, { "epoch": 58.68, "learning_rate": 2.0672413393849573e-05, "loss": 1.9356, "step": 20271000 }, { "epoch": 58.68, "learning_rate": 2.0671689746202298e-05, "loss": 1.9542, "step": 20271500 }, { "epoch": 58.68, "learning_rate": 2.0670966098555024e-05, "loss": 1.9598, "step": 20272000 }, { "epoch": 58.68, "learning_rate": 2.0670242450907746e-05, "loss": 1.9611, "step": 20272500 }, { "epoch": 58.68, "learning_rate": 2.066951880326047e-05, "loss": 1.9836, "step": 20273000 }, { "epoch": 58.68, "learning_rate": 2.066879515561319e-05, "loss": 1.9674, "step": 20273500 }, { "epoch": 58.68, "learning_rate": 2.066807295526121e-05, "loss": 1.9463, "step": 20274000 }, { "epoch": 58.69, "learning_rate": 2.0667350754909225e-05, "loss": 1.9602, "step": 20274500 }, { "epoch": 58.69, "learning_rate": 2.0666627107261947e-05, "loss": 1.9343, "step": 20275000 }, { "epoch": 58.69, "learning_rate": 2.0665903459614673e-05, "loss": 1.9513, "step": 20275500 }, { "epoch": 58.69, "learning_rate": 2.06651798119674e-05, "loss": 1.951, "step": 20276000 }, { "epoch": 58.69, "learning_rate": 2.066445616432012e-05, "loss": 1.9665, "step": 20276500 }, { "epoch": 58.69, "learning_rate": 2.0663732516672843e-05, "loss": 1.95, "step": 20277000 }, { "epoch": 58.7, "learning_rate": 2.0663008869025565e-05, "loss": 1.9545, "step": 20277500 }, { "epoch": 58.7, "learning_rate": 2.0662285221378287e-05, "loss": 1.9468, "step": 20278000 }, { "epoch": 58.7, "learning_rate": 2.0661561573731013e-05, "loss": 1.9628, "step": 20278500 }, { "epoch": 58.7, "learning_rate": 2.066083937337903e-05, "loss": 1.9652, "step": 20279000 }, { "epoch": 58.7, "learning_rate": 2.0660115725731754e-05, "loss": 1.9666, "step": 20279500 }, { "epoch": 58.7, "learning_rate": 2.0659392078084476e-05, "loss": 1.9473, "step": 20280000 }, { "epoch": 58.7, "learning_rate": 2.06586684304372e-05, "loss": 1.9485, "step": 20280500 }, { "epoch": 58.71, "learning_rate": 2.0657944782789924e-05, "loss": 1.9297, "step": 20281000 }, { "epoch": 58.71, "learning_rate": 2.065722258243794e-05, "loss": 1.959, "step": 20281500 }, { "epoch": 58.71, "learning_rate": 2.0656498934790662e-05, "loss": 1.9686, "step": 20282000 }, { "epoch": 58.71, "learning_rate": 2.0655775287143388e-05, "loss": 1.9639, "step": 20282500 }, { "epoch": 58.71, "learning_rate": 2.0655051639496113e-05, "loss": 1.9463, "step": 20283000 }, { "epoch": 58.71, "learning_rate": 2.0654327991848836e-05, "loss": 1.9527, "step": 20283500 }, { "epoch": 58.71, "learning_rate": 2.0653604344201558e-05, "loss": 1.9605, "step": 20284000 }, { "epoch": 58.72, "learning_rate": 2.065288069655428e-05, "loss": 1.9502, "step": 20284500 }, { "epoch": 58.72, "learning_rate": 2.0652157048907002e-05, "loss": 1.9299, "step": 20285000 }, { "epoch": 58.72, "learning_rate": 2.0651433401259725e-05, "loss": 1.9507, "step": 20285500 }, { "epoch": 58.72, "learning_rate": 2.065070975361245e-05, "loss": 1.9602, "step": 20286000 }, { "epoch": 58.72, "learning_rate": 2.0649986105965176e-05, "loss": 1.9447, "step": 20286500 }, { "epoch": 58.72, "learning_rate": 2.0649262458317898e-05, "loss": 1.9501, "step": 20287000 }, { "epoch": 58.72, "learning_rate": 2.064853881067062e-05, "loss": 1.9573, "step": 20287500 }, { "epoch": 58.73, "learning_rate": 2.064781661031864e-05, "loss": 1.9614, "step": 20288000 }, { "epoch": 58.73, "learning_rate": 2.064709296267136e-05, "loss": 1.9666, "step": 20288500 }, { "epoch": 58.73, "learning_rate": 2.0646369315024084e-05, "loss": 1.9401, "step": 20289000 }, { "epoch": 58.73, "learning_rate": 2.0645645667376806e-05, "loss": 1.9718, "step": 20289500 }, { "epoch": 58.73, "learning_rate": 2.0644924914320118e-05, "loss": 1.9769, "step": 20290000 }, { "epoch": 58.73, "learning_rate": 2.0644201266672844e-05, "loss": 1.9587, "step": 20290500 }, { "epoch": 58.73, "learning_rate": 2.0643477619025566e-05, "loss": 1.9676, "step": 20291000 }, { "epoch": 58.74, "learning_rate": 2.0642753971378288e-05, "loss": 1.9488, "step": 20291500 }, { "epoch": 58.74, "learning_rate": 2.0642030323731014e-05, "loss": 1.9656, "step": 20292000 }, { "epoch": 58.74, "learning_rate": 2.0641306676083736e-05, "loss": 1.9621, "step": 20292500 }, { "epoch": 58.74, "learning_rate": 2.064058302843646e-05, "loss": 1.9494, "step": 20293000 }, { "epoch": 58.74, "learning_rate": 2.063985938078918e-05, "loss": 1.959, "step": 20293500 }, { "epoch": 58.74, "learning_rate": 2.0639135733141906e-05, "loss": 1.9411, "step": 20294000 }, { "epoch": 58.74, "learning_rate": 2.0638413532789925e-05, "loss": 1.9729, "step": 20294500 }, { "epoch": 58.75, "learning_rate": 2.0637689885142647e-05, "loss": 1.9627, "step": 20295000 }, { "epoch": 58.75, "learning_rate": 2.0636967684790663e-05, "loss": 1.9403, "step": 20295500 }, { "epoch": 58.75, "learning_rate": 2.063624403714339e-05, "loss": 1.933, "step": 20296000 }, { "epoch": 58.75, "learning_rate": 2.063552038949611e-05, "loss": 1.9507, "step": 20296500 }, { "epoch": 58.75, "learning_rate": 2.0634796741848833e-05, "loss": 1.9682, "step": 20297000 }, { "epoch": 58.75, "learning_rate": 2.0634074541496852e-05, "loss": 1.9749, "step": 20297500 }, { "epoch": 58.75, "learning_rate": 2.0633350893849578e-05, "loss": 1.9664, "step": 20298000 }, { "epoch": 58.76, "learning_rate": 2.0632628693497593e-05, "loss": 1.9928, "step": 20298500 }, { "epoch": 58.76, "learning_rate": 2.0631905045850315e-05, "loss": 1.9544, "step": 20299000 }, { "epoch": 58.76, "learning_rate": 2.063118139820304e-05, "loss": 1.9494, "step": 20299500 }, { "epoch": 58.76, "learning_rate": 2.0630457750555763e-05, "loss": 1.9848, "step": 20300000 }, { "epoch": 58.76, "learning_rate": 2.062973555020378e-05, "loss": 1.9637, "step": 20300500 }, { "epoch": 58.76, "learning_rate": 2.06290119025565e-05, "loss": 1.9499, "step": 20301000 }, { "epoch": 58.76, "learning_rate": 2.0628288254909227e-05, "loss": 1.967, "step": 20301500 }, { "epoch": 58.77, "learning_rate": 2.0627564607261952e-05, "loss": 1.9479, "step": 20302000 }, { "epoch": 58.77, "learning_rate": 2.0626840959614674e-05, "loss": 1.9665, "step": 20302500 }, { "epoch": 58.77, "learning_rate": 2.062611875926269e-05, "loss": 1.9642, "step": 20303000 }, { "epoch": 58.77, "learning_rate": 2.0625395111615416e-05, "loss": 1.9336, "step": 20303500 }, { "epoch": 58.77, "learning_rate": 2.0624671463968138e-05, "loss": 1.9625, "step": 20304000 }, { "epoch": 58.77, "learning_rate": 2.062394781632086e-05, "loss": 1.9875, "step": 20304500 }, { "epoch": 58.77, "learning_rate": 2.0623224168673582e-05, "loss": 1.9581, "step": 20305000 }, { "epoch": 58.78, "learning_rate": 2.06225019683216e-05, "loss": 1.9785, "step": 20305500 }, { "epoch": 58.78, "learning_rate": 2.0621778320674327e-05, "loss": 1.9256, "step": 20306000 }, { "epoch": 58.78, "learning_rate": 2.062105467302705e-05, "loss": 1.957, "step": 20306500 }, { "epoch": 58.78, "learning_rate": 2.062033102537977e-05, "loss": 1.9579, "step": 20307000 }, { "epoch": 58.78, "learning_rate": 2.0619607377732494e-05, "loss": 1.9504, "step": 20307500 }, { "epoch": 58.78, "learning_rate": 2.0618883730085216e-05, "loss": 1.9569, "step": 20308000 }, { "epoch": 58.78, "learning_rate": 2.061816008243794e-05, "loss": 1.9466, "step": 20308500 }, { "epoch": 58.79, "learning_rate": 2.0617436434790664e-05, "loss": 1.9495, "step": 20309000 }, { "epoch": 58.79, "learning_rate": 2.061671278714339e-05, "loss": 1.9319, "step": 20309500 }, { "epoch": 58.79, "learning_rate": 2.061598913949611e-05, "loss": 1.9586, "step": 20310000 }, { "epoch": 58.79, "learning_rate": 2.0615265491848834e-05, "loss": 1.9602, "step": 20310500 }, { "epoch": 58.79, "learning_rate": 2.0614543291496853e-05, "loss": 1.9682, "step": 20311000 }, { "epoch": 58.79, "learning_rate": 2.0613819643849575e-05, "loss": 1.957, "step": 20311500 }, { "epoch": 58.79, "learning_rate": 2.0613095996202297e-05, "loss": 1.9739, "step": 20312000 }, { "epoch": 58.8, "learning_rate": 2.061237234855502e-05, "loss": 1.9931, "step": 20312500 }, { "epoch": 58.8, "learning_rate": 2.0611648700907745e-05, "loss": 1.9516, "step": 20313000 }, { "epoch": 58.8, "learning_rate": 2.0610926500555764e-05, "loss": 1.9573, "step": 20313500 }, { "epoch": 58.8, "learning_rate": 2.0610202852908486e-05, "loss": 1.9913, "step": 20314000 }, { "epoch": 58.8, "learning_rate": 2.060947920526121e-05, "loss": 1.9618, "step": 20314500 }, { "epoch": 58.8, "learning_rate": 2.060875555761393e-05, "loss": 1.9588, "step": 20315000 }, { "epoch": 58.81, "learning_rate": 2.0608031909966656e-05, "loss": 1.9803, "step": 20315500 }, { "epoch": 58.81, "learning_rate": 2.060730826231938e-05, "loss": 1.9528, "step": 20316000 }, { "epoch": 58.81, "learning_rate": 2.0606584614672104e-05, "loss": 1.9407, "step": 20316500 }, { "epoch": 58.81, "learning_rate": 2.0605860967024826e-05, "loss": 1.9593, "step": 20317000 }, { "epoch": 58.81, "learning_rate": 2.060513731937755e-05, "loss": 1.9481, "step": 20317500 }, { "epoch": 58.81, "learning_rate": 2.0604415119025568e-05, "loss": 1.9643, "step": 20318000 }, { "epoch": 58.81, "learning_rate": 2.0603692918673583e-05, "loss": 1.9461, "step": 20318500 }, { "epoch": 58.82, "learning_rate": 2.0602969271026305e-05, "loss": 1.9778, "step": 20319000 }, { "epoch": 58.82, "learning_rate": 2.060224562337903e-05, "loss": 1.9482, "step": 20319500 }, { "epoch": 58.82, "learning_rate": 2.0601521975731753e-05, "loss": 1.9398, "step": 20320000 }, { "epoch": 58.82, "learning_rate": 2.060079832808448e-05, "loss": 1.9587, "step": 20320500 }, { "epoch": 58.82, "learning_rate": 2.06000746804372e-05, "loss": 1.9717, "step": 20321000 }, { "epoch": 58.82, "learning_rate": 2.0599351032789923e-05, "loss": 1.9706, "step": 20321500 }, { "epoch": 58.82, "learning_rate": 2.0598627385142646e-05, "loss": 1.979, "step": 20322000 }, { "epoch": 58.83, "learning_rate": 2.0597905184790665e-05, "loss": 1.9781, "step": 20322500 }, { "epoch": 58.83, "learning_rate": 2.0597181537143387e-05, "loss": 1.9524, "step": 20323000 }, { "epoch": 58.83, "learning_rate": 2.059645788949611e-05, "loss": 1.9632, "step": 20323500 }, { "epoch": 58.83, "learning_rate": 2.0595735689144128e-05, "loss": 1.9922, "step": 20324000 }, { "epoch": 58.83, "learning_rate": 2.0595012041496854e-05, "loss": 1.9493, "step": 20324500 }, { "epoch": 58.83, "learning_rate": 2.0594288393849576e-05, "loss": 1.9764, "step": 20325000 }, { "epoch": 58.83, "learning_rate": 2.0593564746202298e-05, "loss": 1.9598, "step": 20325500 }, { "epoch": 58.84, "learning_rate": 2.059284109855502e-05, "loss": 1.9669, "step": 20326000 }, { "epoch": 58.84, "learning_rate": 2.0592117450907742e-05, "loss": 1.9716, "step": 20326500 }, { "epoch": 58.84, "learning_rate": 2.0591393803260468e-05, "loss": 1.9839, "step": 20327000 }, { "epoch": 58.84, "learning_rate": 2.059067015561319e-05, "loss": 1.9399, "step": 20327500 }, { "epoch": 58.84, "learning_rate": 2.058994795526121e-05, "loss": 1.9581, "step": 20328000 }, { "epoch": 58.84, "learning_rate": 2.0589225754909228e-05, "loss": 1.955, "step": 20328500 }, { "epoch": 58.84, "learning_rate": 2.058850210726195e-05, "loss": 1.9469, "step": 20329000 }, { "epoch": 58.85, "learning_rate": 2.0587778459614673e-05, "loss": 1.9583, "step": 20329500 }, { "epoch": 58.85, "learning_rate": 2.0587054811967395e-05, "loss": 1.9547, "step": 20330000 }, { "epoch": 58.85, "learning_rate": 2.0586334058910707e-05, "loss": 1.9404, "step": 20330500 }, { "epoch": 58.85, "learning_rate": 2.0585610411263433e-05, "loss": 1.9648, "step": 20331000 }, { "epoch": 58.85, "learning_rate": 2.0584886763616155e-05, "loss": 1.9721, "step": 20331500 }, { "epoch": 58.85, "learning_rate": 2.058416311596888e-05, "loss": 1.9401, "step": 20332000 }, { "epoch": 58.85, "learning_rate": 2.0583439468321603e-05, "loss": 1.9602, "step": 20332500 }, { "epoch": 58.86, "learning_rate": 2.0582715820674325e-05, "loss": 1.9817, "step": 20333000 }, { "epoch": 58.86, "learning_rate": 2.0581992173027047e-05, "loss": 1.9475, "step": 20333500 }, { "epoch": 58.86, "learning_rate": 2.0581269972675066e-05, "loss": 1.9787, "step": 20334000 }, { "epoch": 58.86, "learning_rate": 2.058054632502779e-05, "loss": 1.9469, "step": 20334500 }, { "epoch": 58.86, "learning_rate": 2.057982267738051e-05, "loss": 1.9748, "step": 20335000 }, { "epoch": 58.86, "learning_rate": 2.0579099029733233e-05, "loss": 1.9572, "step": 20335500 }, { "epoch": 58.86, "learning_rate": 2.057837538208596e-05, "loss": 1.9479, "step": 20336000 }, { "epoch": 58.87, "learning_rate": 2.0577651734438684e-05, "loss": 1.9661, "step": 20336500 }, { "epoch": 58.87, "learning_rate": 2.0576928086791406e-05, "loss": 1.95, "step": 20337000 }, { "epoch": 58.87, "learning_rate": 2.057620443914413e-05, "loss": 1.9622, "step": 20337500 }, { "epoch": 58.87, "learning_rate": 2.057548079149685e-05, "loss": 1.9649, "step": 20338000 }, { "epoch": 58.87, "learning_rate": 2.0574757143849573e-05, "loss": 1.9558, "step": 20338500 }, { "epoch": 58.87, "learning_rate": 2.0574033496202295e-05, "loss": 1.9363, "step": 20339000 }, { "epoch": 58.87, "learning_rate": 2.057330984855502e-05, "loss": 1.9329, "step": 20339500 }, { "epoch": 58.88, "learning_rate": 2.057258764820304e-05, "loss": 1.9449, "step": 20340000 }, { "epoch": 58.88, "learning_rate": 2.0571864000555762e-05, "loss": 1.975, "step": 20340500 }, { "epoch": 58.88, "learning_rate": 2.0571140352908484e-05, "loss": 1.933, "step": 20341000 }, { "epoch": 58.88, "learning_rate": 2.057041670526121e-05, "loss": 1.9475, "step": 20341500 }, { "epoch": 58.88, "learning_rate": 2.0569694504909226e-05, "loss": 1.9633, "step": 20342000 }, { "epoch": 58.88, "learning_rate": 2.0568970857261948e-05, "loss": 1.9448, "step": 20342500 }, { "epoch": 58.88, "learning_rate": 2.0568247209614673e-05, "loss": 1.9562, "step": 20343000 }, { "epoch": 58.89, "learning_rate": 2.0567523561967396e-05, "loss": 1.9359, "step": 20343500 }, { "epoch": 58.89, "learning_rate": 2.056679991432012e-05, "loss": 1.9454, "step": 20344000 }, { "epoch": 58.89, "learning_rate": 2.0566076266672844e-05, "loss": 1.9517, "step": 20344500 }, { "epoch": 58.89, "learning_rate": 2.0565352619025566e-05, "loss": 1.9343, "step": 20345000 }, { "epoch": 58.89, "learning_rate": 2.0564628971378288e-05, "loss": 1.9431, "step": 20345500 }, { "epoch": 58.89, "learning_rate": 2.0563906771026307e-05, "loss": 1.9612, "step": 20346000 }, { "epoch": 58.89, "learning_rate": 2.056318312337903e-05, "loss": 1.9561, "step": 20346500 }, { "epoch": 58.9, "learning_rate": 2.0562459475731755e-05, "loss": 1.965, "step": 20347000 }, { "epoch": 58.9, "learning_rate": 2.0561735828084477e-05, "loss": 1.9628, "step": 20347500 }, { "epoch": 58.9, "learning_rate": 2.05610121804372e-05, "loss": 1.9578, "step": 20348000 }, { "epoch": 58.9, "learning_rate": 2.0560289980085218e-05, "loss": 1.9666, "step": 20348500 }, { "epoch": 58.9, "learning_rate": 2.055956633243794e-05, "loss": 1.937, "step": 20349000 }, { "epoch": 58.9, "learning_rate": 2.0558842684790663e-05, "loss": 1.9754, "step": 20349500 }, { "epoch": 58.9, "learning_rate": 2.0558119037143385e-05, "loss": 1.9389, "step": 20350000 }, { "epoch": 58.91, "learning_rate": 2.055739538949611e-05, "loss": 1.9511, "step": 20350500 }, { "epoch": 58.91, "learning_rate": 2.0556671741848836e-05, "loss": 1.9652, "step": 20351000 }, { "epoch": 58.91, "learning_rate": 2.0555949541496852e-05, "loss": 1.9605, "step": 20351500 }, { "epoch": 58.91, "learning_rate": 2.0555225893849574e-05, "loss": 1.964, "step": 20352000 }, { "epoch": 58.91, "learning_rate": 2.0554502246202296e-05, "loss": 1.941, "step": 20352500 }, { "epoch": 58.91, "learning_rate": 2.0553778598555022e-05, "loss": 1.9493, "step": 20353000 }, { "epoch": 58.92, "learning_rate": 2.0553054950907744e-05, "loss": 1.9605, "step": 20353500 }, { "epoch": 58.92, "learning_rate": 2.055233130326047e-05, "loss": 1.9504, "step": 20354000 }, { "epoch": 58.92, "learning_rate": 2.0551607655613192e-05, "loss": 1.9508, "step": 20354500 }, { "epoch": 58.92, "learning_rate": 2.0550884007965914e-05, "loss": 1.9476, "step": 20355000 }, { "epoch": 58.92, "learning_rate": 2.0550161807613933e-05, "loss": 1.9557, "step": 20355500 }, { "epoch": 58.92, "learning_rate": 2.0549438159966655e-05, "loss": 1.9543, "step": 20356000 }, { "epoch": 58.92, "learning_rate": 2.0548715959614674e-05, "loss": 1.966, "step": 20356500 }, { "epoch": 58.93, "learning_rate": 2.0547992311967397e-05, "loss": 1.9588, "step": 20357000 }, { "epoch": 58.93, "learning_rate": 2.054726866432012e-05, "loss": 1.9583, "step": 20357500 }, { "epoch": 58.93, "learning_rate": 2.0546545016672844e-05, "loss": 1.9796, "step": 20358000 }, { "epoch": 58.93, "learning_rate": 2.0545821369025567e-05, "loss": 1.9543, "step": 20358500 }, { "epoch": 58.93, "learning_rate": 2.054509772137829e-05, "loss": 1.9392, "step": 20359000 }, { "epoch": 58.93, "learning_rate": 2.054437407373101e-05, "loss": 1.9423, "step": 20359500 }, { "epoch": 58.93, "learning_rate": 2.0543650426083737e-05, "loss": 2.0011, "step": 20360000 }, { "epoch": 58.94, "learning_rate": 2.0542928225731752e-05, "loss": 1.9607, "step": 20360500 }, { "epoch": 58.94, "learning_rate": 2.0542204578084474e-05, "loss": 1.9706, "step": 20361000 }, { "epoch": 58.94, "learning_rate": 2.0541482377732493e-05, "loss": 1.9519, "step": 20361500 }, { "epoch": 58.94, "learning_rate": 2.054075873008522e-05, "loss": 1.9593, "step": 20362000 }, { "epoch": 58.94, "learning_rate": 2.054003508243794e-05, "loss": 1.9501, "step": 20362500 }, { "epoch": 58.94, "learning_rate": 2.053931288208596e-05, "loss": 1.9836, "step": 20363000 }, { "epoch": 58.94, "learning_rate": 2.0538589234438682e-05, "loss": 1.9673, "step": 20363500 }, { "epoch": 58.95, "learning_rate": 2.0537865586791405e-05, "loss": 1.9734, "step": 20364000 }, { "epoch": 58.95, "learning_rate": 2.0537141939144127e-05, "loss": 1.9381, "step": 20364500 }, { "epoch": 58.95, "learning_rate": 2.0536419738792146e-05, "loss": 1.9612, "step": 20365000 }, { "epoch": 58.95, "learning_rate": 2.0535696091144868e-05, "loss": 1.9585, "step": 20365500 }, { "epoch": 58.95, "learning_rate": 2.0534972443497594e-05, "loss": 1.9699, "step": 20366000 }, { "epoch": 58.95, "learning_rate": 2.0534248795850316e-05, "loss": 1.9627, "step": 20366500 }, { "epoch": 58.95, "learning_rate": 2.0533526595498335e-05, "loss": 1.9586, "step": 20367000 }, { "epoch": 58.96, "learning_rate": 2.0532802947851057e-05, "loss": 1.9635, "step": 20367500 }, { "epoch": 58.96, "learning_rate": 2.0532080747499076e-05, "loss": 1.9493, "step": 20368000 }, { "epoch": 58.96, "learning_rate": 2.05313570998518e-05, "loss": 1.9367, "step": 20368500 }, { "epoch": 58.96, "learning_rate": 2.053063345220452e-05, "loss": 1.9641, "step": 20369000 }, { "epoch": 58.96, "learning_rate": 2.0529909804557246e-05, "loss": 1.9711, "step": 20369500 }, { "epoch": 58.96, "learning_rate": 2.052918615690997e-05, "loss": 1.977, "step": 20370000 }, { "epoch": 58.96, "learning_rate": 2.052846250926269e-05, "loss": 1.9572, "step": 20370500 }, { "epoch": 58.97, "learning_rate": 2.052774030891071e-05, "loss": 1.9676, "step": 20371000 }, { "epoch": 58.97, "learning_rate": 2.0527016661263432e-05, "loss": 1.9606, "step": 20371500 }, { "epoch": 58.97, "learning_rate": 2.0526293013616154e-05, "loss": 1.9497, "step": 20372000 }, { "epoch": 58.97, "learning_rate": 2.0525569365968876e-05, "loss": 1.9623, "step": 20372500 }, { "epoch": 58.97, "learning_rate": 2.0524845718321602e-05, "loss": 1.978, "step": 20373000 }, { "epoch": 58.97, "learning_rate": 2.052412351796962e-05, "loss": 1.9524, "step": 20373500 }, { "epoch": 58.97, "learning_rate": 2.0523399870322343e-05, "loss": 1.9383, "step": 20374000 }, { "epoch": 58.98, "learning_rate": 2.0522676222675065e-05, "loss": 1.9599, "step": 20374500 }, { "epoch": 58.98, "learning_rate": 2.0521952575027788e-05, "loss": 1.9951, "step": 20375000 }, { "epoch": 58.98, "learning_rate": 2.0521228927380513e-05, "loss": 1.9568, "step": 20375500 }, { "epoch": 58.98, "learning_rate": 2.0520505279733235e-05, "loss": 1.9543, "step": 20376000 }, { "epoch": 58.98, "learning_rate": 2.0519781632085958e-05, "loss": 1.945, "step": 20376500 }, { "epoch": 58.98, "learning_rate": 2.0519057984438683e-05, "loss": 1.9668, "step": 20377000 }, { "epoch": 58.98, "learning_rate": 2.0518334336791405e-05, "loss": 1.9473, "step": 20377500 }, { "epoch": 58.99, "learning_rate": 2.0517610689144128e-05, "loss": 1.9654, "step": 20378000 }, { "epoch": 58.99, "learning_rate": 2.0516887041496853e-05, "loss": 1.9528, "step": 20378500 }, { "epoch": 58.99, "learning_rate": 2.0516163393849576e-05, "loss": 1.9764, "step": 20379000 }, { "epoch": 58.99, "learning_rate": 2.0515439746202298e-05, "loss": 1.9597, "step": 20379500 }, { "epoch": 58.99, "learning_rate": 2.051471609855502e-05, "loss": 1.9549, "step": 20380000 }, { "epoch": 58.99, "learning_rate": 2.0513992450907746e-05, "loss": 1.913, "step": 20380500 }, { "epoch": 58.99, "learning_rate": 2.0513270250555765e-05, "loss": 1.9601, "step": 20381000 }, { "epoch": 59.0, "learning_rate": 2.0512546602908487e-05, "loss": 1.9474, "step": 20381500 }, { "epoch": 59.0, "learning_rate": 2.051182295526121e-05, "loss": 1.9347, "step": 20382000 }, { "epoch": 59.0, "learning_rate": 2.051109930761393e-05, "loss": 1.9616, "step": 20382500 }, { "epoch": 59.0, "eval_accuracy": 0.6762973060311479, "eval_accuracy_mlm": 0.6429612446253726, "eval_accuracy_nsp": 0.8552428354713487, "eval_loss": 2.1759543418884277, "eval_runtime": 331.5891, "eval_samples_per_second": 1316.045, "eval_steps_per_second": 54.836, "step": 20382848 }, { "epoch": 59.0, "learning_rate": 2.0510375659966654e-05, "loss": 1.9577, "step": 20383000 }, { "epoch": 59.0, "learning_rate": 2.050965201231938e-05, "loss": 1.9604, "step": 20383500 }, { "epoch": 59.0, "learning_rate": 2.05089283646721e-05, "loss": 1.9313, "step": 20384000 }, { "epoch": 59.0, "learning_rate": 2.050820616432012e-05, "loss": 1.9366, "step": 20384500 }, { "epoch": 59.01, "learning_rate": 2.0507482516672843e-05, "loss": 1.9608, "step": 20385000 }, { "epoch": 59.01, "learning_rate": 2.0506758869025565e-05, "loss": 1.9263, "step": 20385500 }, { "epoch": 59.01, "learning_rate": 2.050603522137829e-05, "loss": 1.9303, "step": 20386000 }, { "epoch": 59.01, "learning_rate": 2.0505311573731013e-05, "loss": 1.9689, "step": 20386500 }, { "epoch": 59.01, "learning_rate": 2.0504587926083735e-05, "loss": 1.9523, "step": 20387000 }, { "epoch": 59.01, "learning_rate": 2.0503865725731754e-05, "loss": 1.9189, "step": 20387500 }, { "epoch": 59.01, "learning_rate": 2.050314207808448e-05, "loss": 1.9391, "step": 20388000 }, { "epoch": 59.02, "learning_rate": 2.0502418430437202e-05, "loss": 1.9374, "step": 20388500 }, { "epoch": 59.02, "learning_rate": 2.0501696230085217e-05, "loss": 1.9482, "step": 20389000 }, { "epoch": 59.02, "learning_rate": 2.050097258243794e-05, "loss": 1.9405, "step": 20389500 }, { "epoch": 59.02, "learning_rate": 2.0500248934790665e-05, "loss": 1.9344, "step": 20390000 }, { "epoch": 59.02, "learning_rate": 2.0499525287143387e-05, "loss": 1.9323, "step": 20390500 }, { "epoch": 59.02, "learning_rate": 2.049880163949611e-05, "loss": 1.9639, "step": 20391000 }, { "epoch": 59.03, "learning_rate": 2.0498077991848835e-05, "loss": 1.9495, "step": 20391500 }, { "epoch": 59.03, "learning_rate": 2.0497354344201557e-05, "loss": 1.948, "step": 20392000 }, { "epoch": 59.03, "learning_rate": 2.049663069655428e-05, "loss": 1.9315, "step": 20392500 }, { "epoch": 59.03, "learning_rate": 2.0495907048907005e-05, "loss": 1.9473, "step": 20393000 }, { "epoch": 59.03, "learning_rate": 2.0495183401259728e-05, "loss": 1.9319, "step": 20393500 }, { "epoch": 59.03, "learning_rate": 2.049445975361245e-05, "loss": 1.9524, "step": 20394000 }, { "epoch": 59.03, "learning_rate": 2.0493736105965172e-05, "loss": 1.9315, "step": 20394500 }, { "epoch": 59.04, "learning_rate": 2.0493012458317898e-05, "loss": 1.9416, "step": 20395000 }, { "epoch": 59.04, "learning_rate": 2.049228881067062e-05, "loss": 1.9262, "step": 20395500 }, { "epoch": 59.04, "learning_rate": 2.0491565163023342e-05, "loss": 1.9373, "step": 20396000 }, { "epoch": 59.04, "learning_rate": 2.049084296267136e-05, "loss": 1.9445, "step": 20396500 }, { "epoch": 59.04, "learning_rate": 2.0490119315024083e-05, "loss": 1.9467, "step": 20397000 }, { "epoch": 59.04, "learning_rate": 2.0489395667376805e-05, "loss": 1.9296, "step": 20397500 }, { "epoch": 59.04, "learning_rate": 2.0488673467024824e-05, "loss": 1.9437, "step": 20398000 }, { "epoch": 59.05, "learning_rate": 2.0487949819377547e-05, "loss": 1.9305, "step": 20398500 }, { "epoch": 59.05, "learning_rate": 2.0487226171730272e-05, "loss": 1.9498, "step": 20399000 }, { "epoch": 59.05, "learning_rate": 2.0486502524082995e-05, "loss": 1.9505, "step": 20399500 }, { "epoch": 59.05, "learning_rate": 2.0485778876435717e-05, "loss": 1.9368, "step": 20400000 }, { "epoch": 59.05, "learning_rate": 2.0485056676083736e-05, "loss": 1.9414, "step": 20400500 }, { "epoch": 59.05, "learning_rate": 2.0484333028436458e-05, "loss": 1.946, "step": 20401000 }, { "epoch": 59.05, "learning_rate": 2.048360938078918e-05, "loss": 1.9513, "step": 20401500 }, { "epoch": 59.06, "learning_rate": 2.0482885733141906e-05, "loss": 1.9274, "step": 20402000 }, { "epoch": 59.06, "learning_rate": 2.048216208549463e-05, "loss": 1.9329, "step": 20402500 }, { "epoch": 59.06, "learning_rate": 2.0481438437847354e-05, "loss": 1.9886, "step": 20403000 }, { "epoch": 59.06, "learning_rate": 2.0480714790200076e-05, "loss": 1.9792, "step": 20403500 }, { "epoch": 59.06, "learning_rate": 2.0479991142552798e-05, "loss": 1.9375, "step": 20404000 }, { "epoch": 59.06, "learning_rate": 2.0479268942200817e-05, "loss": 1.9591, "step": 20404500 }, { "epoch": 59.06, "learning_rate": 2.0478546741848833e-05, "loss": 1.9119, "step": 20405000 }, { "epoch": 59.07, "learning_rate": 2.0477823094201555e-05, "loss": 1.9413, "step": 20405500 }, { "epoch": 59.07, "learning_rate": 2.047709944655428e-05, "loss": 1.9445, "step": 20406000 }, { "epoch": 59.07, "learning_rate": 2.0476375798907006e-05, "loss": 1.9583, "step": 20406500 }, { "epoch": 59.07, "learning_rate": 2.047565215125973e-05, "loss": 1.9356, "step": 20407000 }, { "epoch": 59.07, "learning_rate": 2.047492850361245e-05, "loss": 1.9111, "step": 20407500 }, { "epoch": 59.07, "learning_rate": 2.0474204855965173e-05, "loss": 1.9347, "step": 20408000 }, { "epoch": 59.07, "learning_rate": 2.0473481208317895e-05, "loss": 1.9255, "step": 20408500 }, { "epoch": 59.08, "learning_rate": 2.047275756067062e-05, "loss": 1.9525, "step": 20409000 }, { "epoch": 59.08, "learning_rate": 2.0472036807613933e-05, "loss": 1.9425, "step": 20409500 }, { "epoch": 59.08, "learning_rate": 2.047131460726195e-05, "loss": 1.9396, "step": 20410000 }, { "epoch": 59.08, "learning_rate": 2.0470590959614674e-05, "loss": 1.9461, "step": 20410500 }, { "epoch": 59.08, "learning_rate": 2.0469867311967396e-05, "loss": 1.9432, "step": 20411000 }, { "epoch": 59.08, "learning_rate": 2.046914366432012e-05, "loss": 1.9371, "step": 20411500 }, { "epoch": 59.08, "learning_rate": 2.0468420016672844e-05, "loss": 1.9559, "step": 20412000 }, { "epoch": 59.09, "learning_rate": 2.0467696369025566e-05, "loss": 1.933, "step": 20412500 }, { "epoch": 59.09, "learning_rate": 2.046697272137829e-05, "loss": 1.947, "step": 20413000 }, { "epoch": 59.09, "learning_rate": 2.046624907373101e-05, "loss": 1.9564, "step": 20413500 }, { "epoch": 59.09, "learning_rate": 2.0465525426083736e-05, "loss": 1.9404, "step": 20414000 }, { "epoch": 59.09, "learning_rate": 2.046480177843646e-05, "loss": 1.9361, "step": 20414500 }, { "epoch": 59.09, "learning_rate": 2.0464078130789184e-05, "loss": 1.9474, "step": 20415000 }, { "epoch": 59.09, "learning_rate": 2.0463354483141907e-05, "loss": 1.9334, "step": 20415500 }, { "epoch": 59.1, "learning_rate": 2.0462632282789922e-05, "loss": 1.9334, "step": 20416000 }, { "epoch": 59.1, "learning_rate": 2.0461908635142644e-05, "loss": 1.9485, "step": 20416500 }, { "epoch": 59.1, "learning_rate": 2.046118498749537e-05, "loss": 1.9242, "step": 20417000 }, { "epoch": 59.1, "learning_rate": 2.0460461339848092e-05, "loss": 1.9248, "step": 20417500 }, { "epoch": 59.1, "learning_rate": 2.0459737692200818e-05, "loss": 1.9382, "step": 20418000 }, { "epoch": 59.1, "learning_rate": 2.045901404455354e-05, "loss": 1.9198, "step": 20418500 }, { "epoch": 59.1, "learning_rate": 2.045829184420156e-05, "loss": 1.9529, "step": 20419000 }, { "epoch": 59.11, "learning_rate": 2.0457569643849575e-05, "loss": 1.937, "step": 20419500 }, { "epoch": 59.11, "learning_rate": 2.0456845996202297e-05, "loss": 1.9305, "step": 20420000 }, { "epoch": 59.11, "learning_rate": 2.0456123795850316e-05, "loss": 1.9356, "step": 20420500 }, { "epoch": 59.11, "learning_rate": 2.0455400148203038e-05, "loss": 1.9118, "step": 20421000 }, { "epoch": 59.11, "learning_rate": 2.045467650055576e-05, "loss": 1.9437, "step": 20421500 }, { "epoch": 59.11, "learning_rate": 2.0453952852908486e-05, "loss": 1.9433, "step": 20422000 }, { "epoch": 59.11, "learning_rate": 2.0453230652556505e-05, "loss": 1.944, "step": 20422500 }, { "epoch": 59.12, "learning_rate": 2.0452507004909227e-05, "loss": 1.9242, "step": 20423000 }, { "epoch": 59.12, "learning_rate": 2.0451784804557246e-05, "loss": 1.9702, "step": 20423500 }, { "epoch": 59.12, "learning_rate": 2.0451061156909968e-05, "loss": 1.9244, "step": 20424000 }, { "epoch": 59.12, "learning_rate": 2.045033750926269e-05, "loss": 1.9526, "step": 20424500 }, { "epoch": 59.12, "learning_rate": 2.0449613861615413e-05, "loss": 1.924, "step": 20425000 }, { "epoch": 59.12, "learning_rate": 2.0448890213968135e-05, "loss": 1.95, "step": 20425500 }, { "epoch": 59.12, "learning_rate": 2.044816656632086e-05, "loss": 1.9505, "step": 20426000 }, { "epoch": 59.13, "learning_rate": 2.0447442918673583e-05, "loss": 1.931, "step": 20426500 }, { "epoch": 59.13, "learning_rate": 2.044671927102631e-05, "loss": 1.9585, "step": 20427000 }, { "epoch": 59.13, "learning_rate": 2.044599562337903e-05, "loss": 1.9326, "step": 20427500 }, { "epoch": 59.13, "learning_rate": 2.0445273423027046e-05, "loss": 1.9445, "step": 20428000 }, { "epoch": 59.13, "learning_rate": 2.0444549775379772e-05, "loss": 1.9555, "step": 20428500 }, { "epoch": 59.13, "learning_rate": 2.0443826127732494e-05, "loss": 1.9233, "step": 20429000 }, { "epoch": 59.14, "learning_rate": 2.044310248008522e-05, "loss": 1.9617, "step": 20429500 }, { "epoch": 59.14, "learning_rate": 2.0442378832437942e-05, "loss": 1.9487, "step": 20430000 }, { "epoch": 59.14, "learning_rate": 2.0441655184790664e-05, "loss": 1.9397, "step": 20430500 }, { "epoch": 59.14, "learning_rate": 2.0440931537143386e-05, "loss": 1.9457, "step": 20431000 }, { "epoch": 59.14, "learning_rate": 2.044020788949611e-05, "loss": 1.9323, "step": 20431500 }, { "epoch": 59.14, "learning_rate": 2.0439484241848834e-05, "loss": 1.9447, "step": 20432000 }, { "epoch": 59.14, "learning_rate": 2.0438760594201556e-05, "loss": 1.9445, "step": 20432500 }, { "epoch": 59.15, "learning_rate": 2.0438036946554282e-05, "loss": 1.9288, "step": 20433000 }, { "epoch": 59.15, "learning_rate": 2.0437313298907004e-05, "loss": 1.9596, "step": 20433500 }, { "epoch": 59.15, "learning_rate": 2.0436589651259727e-05, "loss": 1.9437, "step": 20434000 }, { "epoch": 59.15, "learning_rate": 2.043586600361245e-05, "loss": 1.9683, "step": 20434500 }, { "epoch": 59.15, "learning_rate": 2.0435142355965174e-05, "loss": 1.9551, "step": 20435000 }, { "epoch": 59.15, "learning_rate": 2.0434418708317897e-05, "loss": 1.9545, "step": 20435500 }, { "epoch": 59.15, "learning_rate": 2.043369506067062e-05, "loss": 1.944, "step": 20436000 }, { "epoch": 59.16, "learning_rate": 2.0432972860318638e-05, "loss": 1.9439, "step": 20436500 }, { "epoch": 59.16, "learning_rate": 2.043224921267136e-05, "loss": 1.9537, "step": 20437000 }, { "epoch": 59.16, "learning_rate": 2.0431525565024086e-05, "loss": 1.9416, "step": 20437500 }, { "epoch": 59.16, "learning_rate": 2.0430801917376808e-05, "loss": 1.9543, "step": 20438000 }, { "epoch": 59.16, "learning_rate": 2.043007826972953e-05, "loss": 1.9525, "step": 20438500 }, { "epoch": 59.16, "learning_rate": 2.042935606937755e-05, "loss": 1.949, "step": 20439000 }, { "epoch": 59.16, "learning_rate": 2.042863242173027e-05, "loss": 1.9393, "step": 20439500 }, { "epoch": 59.17, "learning_rate": 2.0427908774082994e-05, "loss": 1.934, "step": 20440000 }, { "epoch": 59.17, "learning_rate": 2.0427186573731012e-05, "loss": 1.9411, "step": 20440500 }, { "epoch": 59.17, "learning_rate": 2.0426462926083738e-05, "loss": 1.9383, "step": 20441000 }, { "epoch": 59.17, "learning_rate": 2.042573927843646e-05, "loss": 1.9485, "step": 20441500 }, { "epoch": 59.17, "learning_rate": 2.0425015630789183e-05, "loss": 1.9448, "step": 20442000 }, { "epoch": 59.17, "learning_rate": 2.0424293430437198e-05, "loss": 1.9524, "step": 20442500 }, { "epoch": 59.17, "learning_rate": 2.0423569782789924e-05, "loss": 1.941, "step": 20443000 }, { "epoch": 59.18, "learning_rate": 2.0422846135142646e-05, "loss": 1.948, "step": 20443500 }, { "epoch": 59.18, "learning_rate": 2.042212248749537e-05, "loss": 1.9416, "step": 20444000 }, { "epoch": 59.18, "learning_rate": 2.0421398839848094e-05, "loss": 1.9579, "step": 20444500 }, { "epoch": 59.18, "learning_rate": 2.0420675192200816e-05, "loss": 1.9436, "step": 20445000 }, { "epoch": 59.18, "learning_rate": 2.0419951544553538e-05, "loss": 1.9426, "step": 20445500 }, { "epoch": 59.18, "learning_rate": 2.0419227896906264e-05, "loss": 1.9377, "step": 20446000 }, { "epoch": 59.18, "learning_rate": 2.0418504249258986e-05, "loss": 1.921, "step": 20446500 }, { "epoch": 59.19, "learning_rate": 2.0417782048907002e-05, "loss": 1.9812, "step": 20447000 }, { "epoch": 59.19, "learning_rate": 2.0417058401259724e-05, "loss": 1.937, "step": 20447500 }, { "epoch": 59.19, "learning_rate": 2.041633475361245e-05, "loss": 1.9674, "step": 20448000 }, { "epoch": 59.19, "learning_rate": 2.041561255326047e-05, "loss": 1.9281, "step": 20448500 }, { "epoch": 59.19, "learning_rate": 2.041488890561319e-05, "loss": 1.9181, "step": 20449000 }, { "epoch": 59.19, "learning_rate": 2.0414165257965913e-05, "loss": 1.9242, "step": 20449500 }, { "epoch": 59.19, "learning_rate": 2.041344161031864e-05, "loss": 1.942, "step": 20450000 }, { "epoch": 59.2, "learning_rate": 2.041271796267136e-05, "loss": 1.9376, "step": 20450500 }, { "epoch": 59.2, "learning_rate": 2.0411995762319376e-05, "loss": 1.9469, "step": 20451000 }, { "epoch": 59.2, "learning_rate": 2.0411272114672102e-05, "loss": 1.9366, "step": 20451500 }, { "epoch": 59.2, "learning_rate": 2.0410548467024824e-05, "loss": 1.93, "step": 20452000 }, { "epoch": 59.2, "learning_rate": 2.040982481937755e-05, "loss": 1.9672, "step": 20452500 }, { "epoch": 59.2, "learning_rate": 2.0409101171730272e-05, "loss": 1.9611, "step": 20453000 }, { "epoch": 59.2, "learning_rate": 2.0408377524082994e-05, "loss": 1.9665, "step": 20453500 }, { "epoch": 59.21, "learning_rate": 2.0407653876435717e-05, "loss": 1.9447, "step": 20454000 }, { "epoch": 59.21, "learning_rate": 2.0406931676083735e-05, "loss": 1.9573, "step": 20454500 }, { "epoch": 59.21, "learning_rate": 2.0406208028436458e-05, "loss": 1.9599, "step": 20455000 }, { "epoch": 59.21, "learning_rate": 2.0405484380789183e-05, "loss": 1.9383, "step": 20455500 }, { "epoch": 59.21, "learning_rate": 2.0404762180437202e-05, "loss": 1.9701, "step": 20456000 }, { "epoch": 59.21, "learning_rate": 2.0404038532789925e-05, "loss": 1.9467, "step": 20456500 }, { "epoch": 59.21, "learning_rate": 2.0403314885142647e-05, "loss": 1.9386, "step": 20457000 }, { "epoch": 59.22, "learning_rate": 2.040259123749537e-05, "loss": 1.9211, "step": 20457500 }, { "epoch": 59.22, "learning_rate": 2.040186758984809e-05, "loss": 1.9412, "step": 20458000 }, { "epoch": 59.22, "learning_rate": 2.0401143942200813e-05, "loss": 1.9791, "step": 20458500 }, { "epoch": 59.22, "learning_rate": 2.040042029455354e-05, "loss": 1.933, "step": 20459000 }, { "epoch": 59.22, "learning_rate": 2.0399696646906265e-05, "loss": 1.9135, "step": 20459500 }, { "epoch": 59.22, "learning_rate": 2.0398972999258987e-05, "loss": 1.9473, "step": 20460000 }, { "epoch": 59.22, "learning_rate": 2.0398250798907002e-05, "loss": 1.9434, "step": 20460500 }, { "epoch": 59.23, "learning_rate": 2.0397527151259728e-05, "loss": 1.9512, "step": 20461000 }, { "epoch": 59.23, "learning_rate": 2.039680350361245e-05, "loss": 1.9558, "step": 20461500 }, { "epoch": 59.23, "learning_rate": 2.0396079855965173e-05, "loss": 1.9429, "step": 20462000 }, { "epoch": 59.23, "learning_rate": 2.0395357655613188e-05, "loss": 1.9577, "step": 20462500 }, { "epoch": 59.23, "learning_rate": 2.039463545526121e-05, "loss": 1.9545, "step": 20463000 }, { "epoch": 59.23, "learning_rate": 2.0393911807613933e-05, "loss": 1.9705, "step": 20463500 }, { "epoch": 59.23, "learning_rate": 2.0393188159966655e-05, "loss": 1.9492, "step": 20464000 }, { "epoch": 59.24, "learning_rate": 2.0392464512319377e-05, "loss": 1.9353, "step": 20464500 }, { "epoch": 59.24, "learning_rate": 2.0391740864672103e-05, "loss": 1.9307, "step": 20465000 }, { "epoch": 59.24, "learning_rate": 2.0391017217024825e-05, "loss": 1.9201, "step": 20465500 }, { "epoch": 59.24, "learning_rate": 2.0390293569377547e-05, "loss": 1.933, "step": 20466000 }, { "epoch": 59.24, "learning_rate": 2.0389569921730273e-05, "loss": 1.9583, "step": 20466500 }, { "epoch": 59.24, "learning_rate": 2.0388846274082995e-05, "loss": 1.9454, "step": 20467000 }, { "epoch": 59.25, "learning_rate": 2.0388122626435717e-05, "loss": 1.9306, "step": 20467500 }, { "epoch": 59.25, "learning_rate": 2.038739897878844e-05, "loss": 1.9425, "step": 20468000 }, { "epoch": 59.25, "learning_rate": 2.0386675331141165e-05, "loss": 1.9672, "step": 20468500 }, { "epoch": 59.25, "learning_rate": 2.0385954578084477e-05, "loss": 1.9618, "step": 20469000 }, { "epoch": 59.25, "learning_rate": 2.03852309304372e-05, "loss": 1.9238, "step": 20469500 }, { "epoch": 59.25, "learning_rate": 2.0384507282789922e-05, "loss": 1.9581, "step": 20470000 }, { "epoch": 59.25, "learning_rate": 2.038378508243794e-05, "loss": 1.964, "step": 20470500 }, { "epoch": 59.26, "learning_rate": 2.0383061434790667e-05, "loss": 1.9465, "step": 20471000 }, { "epoch": 59.26, "learning_rate": 2.038233778714339e-05, "loss": 1.9204, "step": 20471500 }, { "epoch": 59.26, "learning_rate": 2.038161413949611e-05, "loss": 1.9324, "step": 20472000 }, { "epoch": 59.26, "learning_rate": 2.0380890491848833e-05, "loss": 1.9447, "step": 20472500 }, { "epoch": 59.26, "learning_rate": 2.0380166844201555e-05, "loss": 1.98, "step": 20473000 }, { "epoch": 59.26, "learning_rate": 2.0379443196554278e-05, "loss": 1.933, "step": 20473500 }, { "epoch": 59.26, "learning_rate": 2.0378720996202297e-05, "loss": 1.9362, "step": 20474000 }, { "epoch": 59.27, "learning_rate": 2.0377997348555022e-05, "loss": 1.9437, "step": 20474500 }, { "epoch": 59.27, "learning_rate": 2.0377273700907744e-05, "loss": 1.9417, "step": 20475000 }, { "epoch": 59.27, "learning_rate": 2.0376550053260467e-05, "loss": 1.9497, "step": 20475500 }, { "epoch": 59.27, "learning_rate": 2.0375826405613192e-05, "loss": 1.9452, "step": 20476000 }, { "epoch": 59.27, "learning_rate": 2.0375102757965915e-05, "loss": 1.9509, "step": 20476500 }, { "epoch": 59.27, "learning_rate": 2.0374379110318637e-05, "loss": 1.9432, "step": 20477000 }, { "epoch": 59.27, "learning_rate": 2.0373656909966656e-05, "loss": 1.9689, "step": 20477500 }, { "epoch": 59.28, "learning_rate": 2.0372934709614675e-05, "loss": 1.9451, "step": 20478000 }, { "epoch": 59.28, "learning_rate": 2.0372211061967397e-05, "loss": 1.9514, "step": 20478500 }, { "epoch": 59.28, "learning_rate": 2.037148741432012e-05, "loss": 1.9524, "step": 20479000 }, { "epoch": 59.28, "learning_rate": 2.037076376667284e-05, "loss": 1.9481, "step": 20479500 }, { "epoch": 59.28, "learning_rate": 2.0370040119025567e-05, "loss": 1.9593, "step": 20480000 }, { "epoch": 59.28, "learning_rate": 2.036931647137829e-05, "loss": 1.9291, "step": 20480500 }, { "epoch": 59.28, "learning_rate": 2.036859282373101e-05, "loss": 1.9405, "step": 20481000 }, { "epoch": 59.29, "learning_rate": 2.036787062337903e-05, "loss": 1.9469, "step": 20481500 }, { "epoch": 59.29, "learning_rate": 2.0367146975731756e-05, "loss": 1.9578, "step": 20482000 }, { "epoch": 59.29, "learning_rate": 2.0366423328084478e-05, "loss": 1.941, "step": 20482500 }, { "epoch": 59.29, "learning_rate": 2.03656996804372e-05, "loss": 1.9395, "step": 20483000 }, { "epoch": 59.29, "learning_rate": 2.0364976032789923e-05, "loss": 1.9275, "step": 20483500 }, { "epoch": 59.29, "learning_rate": 2.0364252385142645e-05, "loss": 1.9682, "step": 20484000 }, { "epoch": 59.29, "learning_rate": 2.0363528737495367e-05, "loss": 1.9437, "step": 20484500 }, { "epoch": 59.3, "learning_rate": 2.0362805089848093e-05, "loss": 1.9202, "step": 20485000 }, { "epoch": 59.3, "learning_rate": 2.036208144220082e-05, "loss": 1.9527, "step": 20485500 }, { "epoch": 59.3, "learning_rate": 2.0361359241848834e-05, "loss": 1.9678, "step": 20486000 }, { "epoch": 59.3, "learning_rate": 2.0360635594201556e-05, "loss": 1.9614, "step": 20486500 }, { "epoch": 59.3, "learning_rate": 2.0359911946554282e-05, "loss": 1.9431, "step": 20487000 }, { "epoch": 59.3, "learning_rate": 2.0359188298907004e-05, "loss": 1.9387, "step": 20487500 }, { "epoch": 59.3, "learning_rate": 2.0358464651259726e-05, "loss": 1.9369, "step": 20488000 }, { "epoch": 59.31, "learning_rate": 2.035774100361245e-05, "loss": 1.9777, "step": 20488500 }, { "epoch": 59.31, "learning_rate": 2.0357017355965174e-05, "loss": 1.9364, "step": 20489000 }, { "epoch": 59.31, "learning_rate": 2.0356293708317896e-05, "loss": 1.9056, "step": 20489500 }, { "epoch": 59.31, "learning_rate": 2.035557006067062e-05, "loss": 1.9532, "step": 20490000 }, { "epoch": 59.31, "learning_rate": 2.0354846413023344e-05, "loss": 1.9651, "step": 20490500 }, { "epoch": 59.31, "learning_rate": 2.0354122765376066e-05, "loss": 1.9147, "step": 20491000 }, { "epoch": 59.31, "learning_rate": 2.0353400565024082e-05, "loss": 1.9729, "step": 20491500 }, { "epoch": 59.32, "learning_rate": 2.0352676917376808e-05, "loss": 1.9477, "step": 20492000 }, { "epoch": 59.32, "learning_rate": 2.0351953269729533e-05, "loss": 1.9687, "step": 20492500 }, { "epoch": 59.32, "learning_rate": 2.0351229622082256e-05, "loss": 1.9777, "step": 20493000 }, { "epoch": 59.32, "learning_rate": 2.0350505974434978e-05, "loss": 1.9366, "step": 20493500 }, { "epoch": 59.32, "learning_rate": 2.03497823267877e-05, "loss": 1.9433, "step": 20494000 }, { "epoch": 59.32, "learning_rate": 2.0349058679140422e-05, "loss": 1.9351, "step": 20494500 }, { "epoch": 59.32, "learning_rate": 2.0348335031493144e-05, "loss": 1.947, "step": 20495000 }, { "epoch": 59.33, "learning_rate": 2.034761138384587e-05, "loss": 1.9532, "step": 20495500 }, { "epoch": 59.33, "learning_rate": 2.0346887736198596e-05, "loss": 1.9546, "step": 20496000 }, { "epoch": 59.33, "learning_rate": 2.034616553584661e-05, "loss": 1.9563, "step": 20496500 }, { "epoch": 59.33, "learning_rate": 2.0345441888199333e-05, "loss": 1.9512, "step": 20497000 }, { "epoch": 59.33, "learning_rate": 2.034471824055206e-05, "loss": 1.9614, "step": 20497500 }, { "epoch": 59.33, "learning_rate": 2.034399459290478e-05, "loss": 1.9432, "step": 20498000 }, { "epoch": 59.33, "learning_rate": 2.0343272392552797e-05, "loss": 1.9458, "step": 20498500 }, { "epoch": 59.34, "learning_rate": 2.034254874490552e-05, "loss": 1.9296, "step": 20499000 }, { "epoch": 59.34, "learning_rate": 2.0341825097258245e-05, "loss": 1.9303, "step": 20499500 }, { "epoch": 59.34, "learning_rate": 2.034110144961097e-05, "loss": 1.9532, "step": 20500000 }, { "epoch": 59.34, "learning_rate": 2.0340377801963693e-05, "loss": 1.9562, "step": 20500500 }, { "epoch": 59.34, "learning_rate": 2.0339654154316415e-05, "loss": 1.9557, "step": 20501000 }, { "epoch": 59.34, "learning_rate": 2.0338933401259727e-05, "loss": 1.9248, "step": 20501500 }, { "epoch": 59.34, "learning_rate": 2.033820975361245e-05, "loss": 1.9494, "step": 20502000 }, { "epoch": 59.35, "learning_rate": 2.033748610596517e-05, "loss": 1.9483, "step": 20502500 }, { "epoch": 59.35, "learning_rate": 2.0336762458317897e-05, "loss": 1.9541, "step": 20503000 }, { "epoch": 59.35, "learning_rate": 2.0336040257965913e-05, "loss": 1.9324, "step": 20503500 }, { "epoch": 59.35, "learning_rate": 2.033531661031864e-05, "loss": 1.9499, "step": 20504000 }, { "epoch": 59.35, "learning_rate": 2.033459296267136e-05, "loss": 1.9553, "step": 20504500 }, { "epoch": 59.35, "learning_rate": 2.0333869315024083e-05, "loss": 1.9577, "step": 20505000 }, { "epoch": 59.36, "learning_rate": 2.033314566737681e-05, "loss": 1.9534, "step": 20505500 }, { "epoch": 59.36, "learning_rate": 2.0332423467024824e-05, "loss": 1.9615, "step": 20506000 }, { "epoch": 59.36, "learning_rate": 2.0331699819377546e-05, "loss": 1.9406, "step": 20506500 }, { "epoch": 59.36, "learning_rate": 2.0330976171730272e-05, "loss": 1.9378, "step": 20507000 }, { "epoch": 59.36, "learning_rate": 2.0330253971378287e-05, "loss": 1.9337, "step": 20507500 }, { "epoch": 59.36, "learning_rate": 2.0329530323731013e-05, "loss": 1.9549, "step": 20508000 }, { "epoch": 59.36, "learning_rate": 2.0328806676083735e-05, "loss": 1.9737, "step": 20508500 }, { "epoch": 59.37, "learning_rate": 2.032808302843646e-05, "loss": 1.9561, "step": 20509000 }, { "epoch": 59.37, "learning_rate": 2.0327359380789183e-05, "loss": 1.9456, "step": 20509500 }, { "epoch": 59.37, "learning_rate": 2.0326635733141905e-05, "loss": 1.971, "step": 20510000 }, { "epoch": 59.37, "learning_rate": 2.0325912085494628e-05, "loss": 1.9451, "step": 20510500 }, { "epoch": 59.37, "learning_rate": 2.0325189885142647e-05, "loss": 1.9742, "step": 20511000 }, { "epoch": 59.37, "learning_rate": 2.0324466237495372e-05, "loss": 1.9315, "step": 20511500 }, { "epoch": 59.37, "learning_rate": 2.0323742589848094e-05, "loss": 1.9636, "step": 20512000 }, { "epoch": 59.38, "learning_rate": 2.0323018942200817e-05, "loss": 1.9387, "step": 20512500 }, { "epoch": 59.38, "learning_rate": 2.032229529455354e-05, "loss": 1.958, "step": 20513000 }, { "epoch": 59.38, "learning_rate": 2.032157164690626e-05, "loss": 1.9696, "step": 20513500 }, { "epoch": 59.38, "learning_rate": 2.0320847999258987e-05, "loss": 1.9375, "step": 20514000 }, { "epoch": 59.38, "learning_rate": 2.032012435161171e-05, "loss": 1.9312, "step": 20514500 }, { "epoch": 59.38, "learning_rate": 2.0319400703964435e-05, "loss": 1.9653, "step": 20515000 }, { "epoch": 59.38, "learning_rate": 2.0318677056317157e-05, "loss": 1.9503, "step": 20515500 }, { "epoch": 59.39, "learning_rate": 2.0317954855965172e-05, "loss": 1.9782, "step": 20516000 }, { "epoch": 59.39, "learning_rate": 2.0317231208317898e-05, "loss": 1.9573, "step": 20516500 }, { "epoch": 59.39, "learning_rate": 2.031650756067062e-05, "loss": 1.9232, "step": 20517000 }, { "epoch": 59.39, "learning_rate": 2.0315783913023342e-05, "loss": 1.9356, "step": 20517500 }, { "epoch": 59.39, "learning_rate": 2.0315060265376065e-05, "loss": 1.9466, "step": 20518000 }, { "epoch": 59.39, "learning_rate": 2.0314336617728787e-05, "loss": 1.9541, "step": 20518500 }, { "epoch": 59.39, "learning_rate": 2.0313612970081513e-05, "loss": 1.9341, "step": 20519000 }, { "epoch": 59.4, "learning_rate": 2.0312889322434235e-05, "loss": 1.9301, "step": 20519500 }, { "epoch": 59.4, "learning_rate": 2.031216567478696e-05, "loss": 1.9586, "step": 20520000 }, { "epoch": 59.4, "learning_rate": 2.0311443474434976e-05, "loss": 1.9546, "step": 20520500 }, { "epoch": 59.4, "learning_rate": 2.0310719826787698e-05, "loss": 1.9527, "step": 20521000 }, { "epoch": 59.4, "learning_rate": 2.0309997626435717e-05, "loss": 1.9447, "step": 20521500 }, { "epoch": 59.4, "learning_rate": 2.030927397878844e-05, "loss": 1.9156, "step": 20522000 }, { "epoch": 59.4, "learning_rate": 2.030855177843646e-05, "loss": 1.9533, "step": 20522500 }, { "epoch": 59.41, "learning_rate": 2.0307828130789184e-05, "loss": 1.9427, "step": 20523000 }, { "epoch": 59.41, "learning_rate": 2.03071059304372e-05, "loss": 1.958, "step": 20523500 }, { "epoch": 59.41, "learning_rate": 2.0306382282789925e-05, "loss": 1.9728, "step": 20524000 }, { "epoch": 59.41, "learning_rate": 2.0305658635142647e-05, "loss": 1.9553, "step": 20524500 }, { "epoch": 59.41, "learning_rate": 2.0304936434790663e-05, "loss": 1.9558, "step": 20525000 }, { "epoch": 59.41, "learning_rate": 2.0304212787143385e-05, "loss": 1.9477, "step": 20525500 }, { "epoch": 59.41, "learning_rate": 2.0303490586791404e-05, "loss": 1.9424, "step": 20526000 }, { "epoch": 59.42, "learning_rate": 2.0302766939144126e-05, "loss": 1.9548, "step": 20526500 }, { "epoch": 59.42, "learning_rate": 2.0302043291496852e-05, "loss": 1.9645, "step": 20527000 }, { "epoch": 59.42, "learning_rate": 2.0301319643849574e-05, "loss": 1.957, "step": 20527500 }, { "epoch": 59.42, "learning_rate": 2.03005959962023e-05, "loss": 1.9518, "step": 20528000 }, { "epoch": 59.42, "learning_rate": 2.0299872348555022e-05, "loss": 1.941, "step": 20528500 }, { "epoch": 59.42, "learning_rate": 2.0299148700907744e-05, "loss": 1.9133, "step": 20529000 }, { "epoch": 59.42, "learning_rate": 2.0298425053260466e-05, "loss": 1.9206, "step": 20529500 }, { "epoch": 59.43, "learning_rate": 2.029770140561319e-05, "loss": 1.9586, "step": 20530000 }, { "epoch": 59.43, "learning_rate": 2.0296977757965914e-05, "loss": 1.9389, "step": 20530500 }, { "epoch": 59.43, "learning_rate": 2.0296254110318637e-05, "loss": 1.9578, "step": 20531000 }, { "epoch": 59.43, "learning_rate": 2.0295530462671362e-05, "loss": 1.9651, "step": 20531500 }, { "epoch": 59.43, "learning_rate": 2.0294806815024084e-05, "loss": 1.9768, "step": 20532000 }, { "epoch": 59.43, "learning_rate": 2.0294083167376807e-05, "loss": 1.9836, "step": 20532500 }, { "epoch": 59.43, "learning_rate": 2.029335951972953e-05, "loss": 1.9611, "step": 20533000 }, { "epoch": 59.44, "learning_rate": 2.029263587208225e-05, "loss": 1.9409, "step": 20533500 }, { "epoch": 59.44, "learning_rate": 2.0291912224434977e-05, "loss": 1.9391, "step": 20534000 }, { "epoch": 59.44, "learning_rate": 2.0291188576787702e-05, "loss": 1.9385, "step": 20534500 }, { "epoch": 59.44, "learning_rate": 2.0290464929140425e-05, "loss": 1.9554, "step": 20535000 }, { "epoch": 59.44, "learning_rate": 2.028974272878844e-05, "loss": 1.9316, "step": 20535500 }, { "epoch": 59.44, "learning_rate": 2.0289019081141162e-05, "loss": 1.9516, "step": 20536000 }, { "epoch": 59.44, "learning_rate": 2.0288295433493888e-05, "loss": 1.9271, "step": 20536500 }, { "epoch": 59.45, "learning_rate": 2.028757178584661e-05, "loss": 1.9506, "step": 20537000 }, { "epoch": 59.45, "learning_rate": 2.0286848138199336e-05, "loss": 1.949, "step": 20537500 }, { "epoch": 59.45, "learning_rate": 2.0286124490552058e-05, "loss": 1.9364, "step": 20538000 }, { "epoch": 59.45, "learning_rate": 2.028540084290478e-05, "loss": 1.9353, "step": 20538500 }, { "epoch": 59.45, "learning_rate": 2.02846786425528e-05, "loss": 1.9636, "step": 20539000 }, { "epoch": 59.45, "learning_rate": 2.028395499490552e-05, "loss": 1.9422, "step": 20539500 }, { "epoch": 59.45, "learning_rate": 2.0283231347258244e-05, "loss": 1.9517, "step": 20540000 }, { "epoch": 59.46, "learning_rate": 2.0282507699610966e-05, "loss": 1.9646, "step": 20540500 }, { "epoch": 59.46, "learning_rate": 2.028178405196369e-05, "loss": 1.9292, "step": 20541000 }, { "epoch": 59.46, "learning_rate": 2.028106185161171e-05, "loss": 1.9621, "step": 20541500 }, { "epoch": 59.46, "learning_rate": 2.0280338203964433e-05, "loss": 1.9508, "step": 20542000 }, { "epoch": 59.46, "learning_rate": 2.0279614556317155e-05, "loss": 1.9544, "step": 20542500 }, { "epoch": 59.46, "learning_rate": 2.0278890908669877e-05, "loss": 1.9425, "step": 20543000 }, { "epoch": 59.47, "learning_rate": 2.0278167261022603e-05, "loss": 1.9665, "step": 20543500 }, { "epoch": 59.47, "learning_rate": 2.0277443613375325e-05, "loss": 1.9559, "step": 20544000 }, { "epoch": 59.47, "learning_rate": 2.0276719965728047e-05, "loss": 1.9508, "step": 20544500 }, { "epoch": 59.47, "learning_rate": 2.0275996318080773e-05, "loss": 1.9318, "step": 20545000 }, { "epoch": 59.47, "learning_rate": 2.0275272670433495e-05, "loss": 1.9516, "step": 20545500 }, { "epoch": 59.47, "learning_rate": 2.0274549022786217e-05, "loss": 1.9628, "step": 20546000 }, { "epoch": 59.47, "learning_rate": 2.027382537513894e-05, "loss": 1.9444, "step": 20546500 }, { "epoch": 59.48, "learning_rate": 2.0273101727491665e-05, "loss": 1.9552, "step": 20547000 }, { "epoch": 59.48, "learning_rate": 2.0272378079844388e-05, "loss": 1.9265, "step": 20547500 }, { "epoch": 59.48, "learning_rate": 2.0271655879492403e-05, "loss": 1.9705, "step": 20548000 }, { "epoch": 59.48, "learning_rate": 2.0270933679140422e-05, "loss": 1.9423, "step": 20548500 }, { "epoch": 59.48, "learning_rate": 2.0270210031493148e-05, "loss": 1.9668, "step": 20549000 }, { "epoch": 59.48, "learning_rate": 2.026948638384587e-05, "loss": 1.9466, "step": 20549500 }, { "epoch": 59.48, "learning_rate": 2.0268762736198592e-05, "loss": 1.9365, "step": 20550000 }, { "epoch": 59.49, "learning_rate": 2.0268039088551318e-05, "loss": 1.9578, "step": 20550500 }, { "epoch": 59.49, "learning_rate": 2.026731544090404e-05, "loss": 1.9354, "step": 20551000 }, { "epoch": 59.49, "learning_rate": 2.0266591793256762e-05, "loss": 1.9372, "step": 20551500 }, { "epoch": 59.49, "learning_rate": 2.0265868145609488e-05, "loss": 1.934, "step": 20552000 }, { "epoch": 59.49, "learning_rate": 2.0265145945257503e-05, "loss": 1.946, "step": 20552500 }, { "epoch": 59.49, "learning_rate": 2.026442229761023e-05, "loss": 1.984, "step": 20553000 }, { "epoch": 59.49, "learning_rate": 2.026369864996295e-05, "loss": 1.9425, "step": 20553500 }, { "epoch": 59.5, "learning_rate": 2.0262976449610967e-05, "loss": 1.9714, "step": 20554000 }, { "epoch": 59.5, "learning_rate": 2.0262252801963692e-05, "loss": 1.9412, "step": 20554500 }, { "epoch": 59.5, "learning_rate": 2.0261529154316415e-05, "loss": 1.9533, "step": 20555000 }, { "epoch": 59.5, "learning_rate": 2.026080695396443e-05, "loss": 1.9223, "step": 20555500 }, { "epoch": 59.5, "learning_rate": 2.0260083306317152e-05, "loss": 1.9411, "step": 20556000 }, { "epoch": 59.5, "learning_rate": 2.0259359658669878e-05, "loss": 1.945, "step": 20556500 }, { "epoch": 59.5, "learning_rate": 2.0258636011022604e-05, "loss": 1.9559, "step": 20557000 }, { "epoch": 59.51, "learning_rate": 2.0257912363375326e-05, "loss": 1.9162, "step": 20557500 }, { "epoch": 59.51, "learning_rate": 2.0257188715728048e-05, "loss": 1.9515, "step": 20558000 }, { "epoch": 59.51, "learning_rate": 2.0256466515376067e-05, "loss": 1.9467, "step": 20558500 }, { "epoch": 59.51, "learning_rate": 2.025574286772879e-05, "loss": 1.9441, "step": 20559000 }, { "epoch": 59.51, "learning_rate": 2.025501922008151e-05, "loss": 1.9548, "step": 20559500 }, { "epoch": 59.51, "learning_rate": 2.0254295572434237e-05, "loss": 1.9503, "step": 20560000 }, { "epoch": 59.51, "learning_rate": 2.025357192478696e-05, "loss": 1.9464, "step": 20560500 }, { "epoch": 59.52, "learning_rate": 2.025284827713968e-05, "loss": 1.9472, "step": 20561000 }, { "epoch": 59.52, "learning_rate": 2.02521260767877e-05, "loss": 1.9851, "step": 20561500 }, { "epoch": 59.52, "learning_rate": 2.0251402429140423e-05, "loss": 1.9359, "step": 20562000 }, { "epoch": 59.52, "learning_rate": 2.0250678781493145e-05, "loss": 1.938, "step": 20562500 }, { "epoch": 59.52, "learning_rate": 2.0249955133845867e-05, "loss": 1.9395, "step": 20563000 }, { "epoch": 59.52, "learning_rate": 2.0249231486198593e-05, "loss": 1.9541, "step": 20563500 }, { "epoch": 59.52, "learning_rate": 2.024850783855132e-05, "loss": 1.9584, "step": 20564000 }, { "epoch": 59.53, "learning_rate": 2.024778419090404e-05, "loss": 1.9466, "step": 20564500 }, { "epoch": 59.53, "learning_rate": 2.0247060543256763e-05, "loss": 1.9667, "step": 20565000 }, { "epoch": 59.53, "learning_rate": 2.0246336895609485e-05, "loss": 1.9445, "step": 20565500 }, { "epoch": 59.53, "learning_rate": 2.0245613247962207e-05, "loss": 1.9666, "step": 20566000 }, { "epoch": 59.53, "learning_rate": 2.024488960031493e-05, "loss": 1.9563, "step": 20566500 }, { "epoch": 59.53, "learning_rate": 2.0244165952667655e-05, "loss": 1.9497, "step": 20567000 }, { "epoch": 59.53, "learning_rate": 2.024344230502038e-05, "loss": 1.9447, "step": 20567500 }, { "epoch": 59.54, "learning_rate": 2.0242721551963693e-05, "loss": 1.9324, "step": 20568000 }, { "epoch": 59.54, "learning_rate": 2.0241997904316415e-05, "loss": 1.9541, "step": 20568500 }, { "epoch": 59.54, "learning_rate": 2.0241274256669138e-05, "loss": 1.9654, "step": 20569000 }, { "epoch": 59.54, "learning_rate": 2.024055060902186e-05, "loss": 1.9329, "step": 20569500 }, { "epoch": 59.54, "learning_rate": 2.0239826961374582e-05, "loss": 1.9426, "step": 20570000 }, { "epoch": 59.54, "learning_rate": 2.0239103313727308e-05, "loss": 1.9552, "step": 20570500 }, { "epoch": 59.54, "learning_rate": 2.0238379666080033e-05, "loss": 1.9342, "step": 20571000 }, { "epoch": 59.55, "learning_rate": 2.023765746572805e-05, "loss": 1.9628, "step": 20571500 }, { "epoch": 59.55, "learning_rate": 2.023693381808077e-05, "loss": 1.9506, "step": 20572000 }, { "epoch": 59.55, "learning_rate": 2.023621161772879e-05, "loss": 1.9486, "step": 20572500 }, { "epoch": 59.55, "learning_rate": 2.0235487970081512e-05, "loss": 1.9623, "step": 20573000 }, { "epoch": 59.55, "learning_rate": 2.0234764322434235e-05, "loss": 1.9083, "step": 20573500 }, { "epoch": 59.55, "learning_rate": 2.0234040674786957e-05, "loss": 1.9569, "step": 20574000 }, { "epoch": 59.55, "learning_rate": 2.0233317027139682e-05, "loss": 1.9516, "step": 20574500 }, { "epoch": 59.56, "learning_rate": 2.0232593379492408e-05, "loss": 1.9538, "step": 20575000 }, { "epoch": 59.56, "learning_rate": 2.023186973184513e-05, "loss": 1.9467, "step": 20575500 }, { "epoch": 59.56, "learning_rate": 2.0231146084197853e-05, "loss": 1.9619, "step": 20576000 }, { "epoch": 59.56, "learning_rate": 2.023042388384587e-05, "loss": 1.9571, "step": 20576500 }, { "epoch": 59.56, "learning_rate": 2.0229700236198594e-05, "loss": 1.9489, "step": 20577000 }, { "epoch": 59.56, "learning_rate": 2.0228976588551316e-05, "loss": 1.9581, "step": 20577500 }, { "epoch": 59.56, "learning_rate": 2.0228252940904038e-05, "loss": 1.9274, "step": 20578000 }, { "epoch": 59.57, "learning_rate": 2.0227529293256764e-05, "loss": 1.9256, "step": 20578500 }, { "epoch": 59.57, "learning_rate": 2.0226807092904783e-05, "loss": 1.9495, "step": 20579000 }, { "epoch": 59.57, "learning_rate": 2.0226083445257505e-05, "loss": 1.9727, "step": 20579500 }, { "epoch": 59.57, "learning_rate": 2.0225359797610227e-05, "loss": 1.9563, "step": 20580000 }, { "epoch": 59.57, "learning_rate": 2.022463614996295e-05, "loss": 1.9613, "step": 20580500 }, { "epoch": 59.57, "learning_rate": 2.022391250231567e-05, "loss": 1.9656, "step": 20581000 }, { "epoch": 59.58, "learning_rate": 2.0223188854668397e-05, "loss": 1.9658, "step": 20581500 }, { "epoch": 59.58, "learning_rate": 2.0222465207021123e-05, "loss": 1.9689, "step": 20582000 }, { "epoch": 59.58, "learning_rate": 2.0221741559373845e-05, "loss": 1.94, "step": 20582500 }, { "epoch": 59.58, "learning_rate": 2.0221017911726567e-05, "loss": 1.9239, "step": 20583000 }, { "epoch": 59.58, "learning_rate": 2.0220295711374583e-05, "loss": 1.9377, "step": 20583500 }, { "epoch": 59.58, "learning_rate": 2.021957206372731e-05, "loss": 1.9321, "step": 20584000 }, { "epoch": 59.58, "learning_rate": 2.021884841608003e-05, "loss": 1.9648, "step": 20584500 }, { "epoch": 59.59, "learning_rate": 2.0218124768432753e-05, "loss": 1.9519, "step": 20585000 }, { "epoch": 59.59, "learning_rate": 2.0217401120785475e-05, "loss": 1.9405, "step": 20585500 }, { "epoch": 59.59, "learning_rate": 2.02166774731382e-05, "loss": 1.9228, "step": 20586000 }, { "epoch": 59.59, "learning_rate": 2.0215953825490923e-05, "loss": 1.9665, "step": 20586500 }, { "epoch": 59.59, "learning_rate": 2.0215231625138942e-05, "loss": 1.9481, "step": 20587000 }, { "epoch": 59.59, "learning_rate": 2.0214507977491664e-05, "loss": 1.9434, "step": 20587500 }, { "epoch": 59.59, "learning_rate": 2.0213784329844387e-05, "loss": 1.9494, "step": 20588000 }, { "epoch": 59.6, "learning_rate": 2.021306068219711e-05, "loss": 1.947, "step": 20588500 }, { "epoch": 59.6, "learning_rate": 2.0212337034549834e-05, "loss": 1.9655, "step": 20589000 }, { "epoch": 59.6, "learning_rate": 2.0211616281493147e-05, "loss": 1.966, "step": 20589500 }, { "epoch": 59.6, "learning_rate": 2.0210892633845872e-05, "loss": 1.9741, "step": 20590000 }, { "epoch": 59.6, "learning_rate": 2.0210168986198595e-05, "loss": 1.9504, "step": 20590500 }, { "epoch": 59.6, "learning_rate": 2.0209445338551317e-05, "loss": 1.9574, "step": 20591000 }, { "epoch": 59.6, "learning_rate": 2.020872169090404e-05, "loss": 1.96, "step": 20591500 }, { "epoch": 59.61, "learning_rate": 2.020799804325676e-05, "loss": 1.9413, "step": 20592000 }, { "epoch": 59.61, "learning_rate": 2.0207274395609483e-05, "loss": 1.9664, "step": 20592500 }, { "epoch": 59.61, "learning_rate": 2.020655074796221e-05, "loss": 1.9282, "step": 20593000 }, { "epoch": 59.61, "learning_rate": 2.0205827100314935e-05, "loss": 1.9522, "step": 20593500 }, { "epoch": 59.61, "learning_rate": 2.0205103452667657e-05, "loss": 1.9455, "step": 20594000 }, { "epoch": 59.61, "learning_rate": 2.0204381252315672e-05, "loss": 1.9316, "step": 20594500 }, { "epoch": 59.61, "learning_rate": 2.0203657604668398e-05, "loss": 1.9308, "step": 20595000 }, { "epoch": 59.62, "learning_rate": 2.0202935404316414e-05, "loss": 1.956, "step": 20595500 }, { "epoch": 59.62, "learning_rate": 2.0202211756669136e-05, "loss": 1.9358, "step": 20596000 }, { "epoch": 59.62, "learning_rate": 2.020148810902186e-05, "loss": 1.9492, "step": 20596500 }, { "epoch": 59.62, "learning_rate": 2.0200764461374584e-05, "loss": 1.9447, "step": 20597000 }, { "epoch": 59.62, "learning_rate": 2.020004081372731e-05, "loss": 1.9515, "step": 20597500 }, { "epoch": 59.62, "learning_rate": 2.019931716608003e-05, "loss": 1.9871, "step": 20598000 }, { "epoch": 59.62, "learning_rate": 2.0198593518432754e-05, "loss": 1.9468, "step": 20598500 }, { "epoch": 59.63, "learning_rate": 2.0197869870785476e-05, "loss": 1.9477, "step": 20599000 }, { "epoch": 59.63, "learning_rate": 2.0197146223138198e-05, "loss": 1.9516, "step": 20599500 }, { "epoch": 59.63, "learning_rate": 2.0196422575490924e-05, "loss": 1.9661, "step": 20600000 }, { "epoch": 59.63, "learning_rate": 2.019569892784365e-05, "loss": 1.9385, "step": 20600500 }, { "epoch": 59.63, "learning_rate": 2.0194975280196372e-05, "loss": 1.9522, "step": 20601000 }, { "epoch": 59.63, "learning_rate": 2.0194253079844387e-05, "loss": 1.9369, "step": 20601500 }, { "epoch": 59.63, "learning_rate": 2.0193529432197113e-05, "loss": 1.9437, "step": 20602000 }, { "epoch": 59.64, "learning_rate": 2.0192805784549835e-05, "loss": 1.945, "step": 20602500 }, { "epoch": 59.64, "learning_rate": 2.0192082136902557e-05, "loss": 1.954, "step": 20603000 }, { "epoch": 59.64, "learning_rate": 2.0191359936550573e-05, "loss": 1.9449, "step": 20603500 }, { "epoch": 59.64, "learning_rate": 2.0190637736198592e-05, "loss": 1.9439, "step": 20604000 }, { "epoch": 59.64, "learning_rate": 2.0189914088551314e-05, "loss": 1.96, "step": 20604500 }, { "epoch": 59.64, "learning_rate": 2.018919044090404e-05, "loss": 1.9476, "step": 20605000 }, { "epoch": 59.64, "learning_rate": 2.0188466793256762e-05, "loss": 1.9387, "step": 20605500 }, { "epoch": 59.65, "learning_rate": 2.018774459290478e-05, "loss": 1.9846, "step": 20606000 }, { "epoch": 59.65, "learning_rate": 2.0187020945257503e-05, "loss": 1.9464, "step": 20606500 }, { "epoch": 59.65, "learning_rate": 2.0186298744905522e-05, "loss": 1.9401, "step": 20607000 }, { "epoch": 59.65, "learning_rate": 2.0185575097258244e-05, "loss": 1.945, "step": 20607500 }, { "epoch": 59.65, "learning_rate": 2.0184851449610967e-05, "loss": 1.9407, "step": 20608000 }, { "epoch": 59.65, "learning_rate": 2.0184127801963692e-05, "loss": 1.9394, "step": 20608500 }, { "epoch": 59.65, "learning_rate": 2.0183404154316414e-05, "loss": 1.9665, "step": 20609000 }, { "epoch": 59.66, "learning_rate": 2.0182680506669137e-05, "loss": 1.9667, "step": 20609500 }, { "epoch": 59.66, "learning_rate": 2.0181958306317156e-05, "loss": 1.9514, "step": 20610000 }, { "epoch": 59.66, "learning_rate": 2.0181234658669878e-05, "loss": 1.9599, "step": 20610500 }, { "epoch": 59.66, "learning_rate": 2.01805110110226e-05, "loss": 1.9336, "step": 20611000 }, { "epoch": 59.66, "learning_rate": 2.0179787363375326e-05, "loss": 1.9738, "step": 20611500 }, { "epoch": 59.66, "learning_rate": 2.017906516302334e-05, "loss": 1.9252, "step": 20612000 }, { "epoch": 59.66, "learning_rate": 2.0178341515376067e-05, "loss": 1.9707, "step": 20612500 }, { "epoch": 59.67, "learning_rate": 2.017761786772879e-05, "loss": 1.9543, "step": 20613000 }, { "epoch": 59.67, "learning_rate": 2.0176894220081515e-05, "loss": 1.9663, "step": 20613500 }, { "epoch": 59.67, "learning_rate": 2.0176170572434237e-05, "loss": 1.9505, "step": 20614000 }, { "epoch": 59.67, "learning_rate": 2.017544692478696e-05, "loss": 1.9496, "step": 20614500 }, { "epoch": 59.67, "learning_rate": 2.017472327713968e-05, "loss": 1.9274, "step": 20615000 }, { "epoch": 59.67, "learning_rate": 2.0173999629492404e-05, "loss": 1.9641, "step": 20615500 }, { "epoch": 59.67, "learning_rate": 2.017327598184513e-05, "loss": 1.9625, "step": 20616000 }, { "epoch": 59.68, "learning_rate": 2.0172553781493148e-05, "loss": 1.9283, "step": 20616500 }, { "epoch": 59.68, "learning_rate": 2.017183013384587e-05, "loss": 1.943, "step": 20617000 }, { "epoch": 59.68, "learning_rate": 2.0171106486198593e-05, "loss": 1.9582, "step": 20617500 }, { "epoch": 59.68, "learning_rate": 2.0170382838551315e-05, "loss": 1.9588, "step": 20618000 }, { "epoch": 59.68, "learning_rate": 2.016965919090404e-05, "loss": 1.9454, "step": 20618500 }, { "epoch": 59.68, "learning_rate": 2.0168936990552056e-05, "loss": 1.9606, "step": 20619000 }, { "epoch": 59.69, "learning_rate": 2.016821334290478e-05, "loss": 1.9326, "step": 20619500 }, { "epoch": 59.69, "learning_rate": 2.0167489695257504e-05, "loss": 1.9697, "step": 20620000 }, { "epoch": 59.69, "learning_rate": 2.0166767494905523e-05, "loss": 1.9448, "step": 20620500 }, { "epoch": 59.69, "learning_rate": 2.0166043847258245e-05, "loss": 1.9455, "step": 20621000 }, { "epoch": 59.69, "learning_rate": 2.0165320199610967e-05, "loss": 1.9369, "step": 20621500 }, { "epoch": 59.69, "learning_rate": 2.016459655196369e-05, "loss": 1.9742, "step": 20622000 }, { "epoch": 59.69, "learning_rate": 2.0163872904316415e-05, "loss": 1.9497, "step": 20622500 }, { "epoch": 59.7, "learning_rate": 2.016315070396443e-05, "loss": 1.9515, "step": 20623000 }, { "epoch": 59.7, "learning_rate": 2.0162427056317153e-05, "loss": 1.9499, "step": 20623500 }, { "epoch": 59.7, "learning_rate": 2.016170340866988e-05, "loss": 1.9559, "step": 20624000 }, { "epoch": 59.7, "learning_rate": 2.01609797610226e-05, "loss": 1.946, "step": 20624500 }, { "epoch": 59.7, "learning_rate": 2.0160256113375327e-05, "loss": 1.9554, "step": 20625000 }, { "epoch": 59.7, "learning_rate": 2.015953246572805e-05, "loss": 1.9548, "step": 20625500 }, { "epoch": 59.7, "learning_rate": 2.015880881808077e-05, "loss": 1.9496, "step": 20626000 }, { "epoch": 59.71, "learning_rate": 2.0158085170433493e-05, "loss": 1.9417, "step": 20626500 }, { "epoch": 59.71, "learning_rate": 2.0157361522786215e-05, "loss": 1.947, "step": 20627000 }, { "epoch": 59.71, "learning_rate": 2.015663787513894e-05, "loss": 1.9593, "step": 20627500 }, { "epoch": 59.71, "learning_rate": 2.015591567478696e-05, "loss": 1.9458, "step": 20628000 }, { "epoch": 59.71, "learning_rate": 2.0155192027139682e-05, "loss": 1.9486, "step": 20628500 }, { "epoch": 59.71, "learning_rate": 2.0154468379492404e-05, "loss": 1.9267, "step": 20629000 }, { "epoch": 59.71, "learning_rate": 2.0153744731845127e-05, "loss": 1.9343, "step": 20629500 }, { "epoch": 59.72, "learning_rate": 2.0153021084197852e-05, "loss": 1.9698, "step": 20630000 }, { "epoch": 59.72, "learning_rate": 2.0152297436550575e-05, "loss": 1.9537, "step": 20630500 }, { "epoch": 59.72, "learning_rate": 2.0151575236198594e-05, "loss": 1.9466, "step": 20631000 }, { "epoch": 59.72, "learning_rate": 2.0150853035846612e-05, "loss": 1.952, "step": 20631500 }, { "epoch": 59.72, "learning_rate": 2.0150129388199335e-05, "loss": 1.9538, "step": 20632000 }, { "epoch": 59.72, "learning_rate": 2.0149405740552057e-05, "loss": 1.9351, "step": 20632500 }, { "epoch": 59.72, "learning_rate": 2.014868209290478e-05, "loss": 1.937, "step": 20633000 }, { "epoch": 59.73, "learning_rate": 2.0147959892552798e-05, "loss": 1.9595, "step": 20633500 }, { "epoch": 59.73, "learning_rate": 2.014723624490552e-05, "loss": 1.9668, "step": 20634000 }, { "epoch": 59.73, "learning_rate": 2.0146512597258243e-05, "loss": 1.9445, "step": 20634500 }, { "epoch": 59.73, "learning_rate": 2.0145788949610968e-05, "loss": 1.9489, "step": 20635000 }, { "epoch": 59.73, "learning_rate": 2.014506530196369e-05, "loss": 1.9397, "step": 20635500 }, { "epoch": 59.73, "learning_rate": 2.0144341654316416e-05, "loss": 1.9729, "step": 20636000 }, { "epoch": 59.73, "learning_rate": 2.0143618006669138e-05, "loss": 1.9648, "step": 20636500 }, { "epoch": 59.74, "learning_rate": 2.014289435902186e-05, "loss": 1.9198, "step": 20637000 }, { "epoch": 59.74, "learning_rate": 2.0142170711374583e-05, "loss": 1.9571, "step": 20637500 }, { "epoch": 59.74, "learning_rate": 2.0141447063727305e-05, "loss": 1.9518, "step": 20638000 }, { "epoch": 59.74, "learning_rate": 2.0140726310670617e-05, "loss": 1.9611, "step": 20638500 }, { "epoch": 59.74, "learning_rate": 2.0140002663023343e-05, "loss": 1.9593, "step": 20639000 }, { "epoch": 59.74, "learning_rate": 2.013927901537607e-05, "loss": 1.9559, "step": 20639500 }, { "epoch": 59.74, "learning_rate": 2.013855536772879e-05, "loss": 1.9301, "step": 20640000 }, { "epoch": 59.75, "learning_rate": 2.0137831720081513e-05, "loss": 1.9509, "step": 20640500 }, { "epoch": 59.75, "learning_rate": 2.0137108072434235e-05, "loss": 1.9661, "step": 20641000 }, { "epoch": 59.75, "learning_rate": 2.0136384424786957e-05, "loss": 1.9335, "step": 20641500 }, { "epoch": 59.75, "learning_rate": 2.013566077713968e-05, "loss": 1.9461, "step": 20642000 }, { "epoch": 59.75, "learning_rate": 2.0134938576787702e-05, "loss": 1.9225, "step": 20642500 }, { "epoch": 59.75, "learning_rate": 2.0134214929140424e-05, "loss": 1.975, "step": 20643000 }, { "epoch": 59.75, "learning_rate": 2.0133491281493146e-05, "loss": 1.9437, "step": 20643500 }, { "epoch": 59.76, "learning_rate": 2.013276763384587e-05, "loss": 1.9781, "step": 20644000 }, { "epoch": 59.76, "learning_rate": 2.0132045433493888e-05, "loss": 1.9537, "step": 20644500 }, { "epoch": 59.76, "learning_rate": 2.013132178584661e-05, "loss": 1.9583, "step": 20645000 }, { "epoch": 59.76, "learning_rate": 2.0130598138199332e-05, "loss": 1.9323, "step": 20645500 }, { "epoch": 59.76, "learning_rate": 2.0129874490552058e-05, "loss": 1.943, "step": 20646000 }, { "epoch": 59.76, "learning_rate": 2.012915084290478e-05, "loss": 1.9458, "step": 20646500 }, { "epoch": 59.76, "learning_rate": 2.0128427195257506e-05, "loss": 1.9454, "step": 20647000 }, { "epoch": 59.77, "learning_rate": 2.0127703547610228e-05, "loss": 1.9624, "step": 20647500 }, { "epoch": 59.77, "learning_rate": 2.0126981347258243e-05, "loss": 1.9599, "step": 20648000 }, { "epoch": 59.77, "learning_rate": 2.012625769961097e-05, "loss": 1.963, "step": 20648500 }, { "epoch": 59.77, "learning_rate": 2.012553405196369e-05, "loss": 1.9369, "step": 20649000 }, { "epoch": 59.77, "learning_rate": 2.0124810404316413e-05, "loss": 1.9693, "step": 20649500 }, { "epoch": 59.77, "learning_rate": 2.0124088203964432e-05, "loss": 1.9504, "step": 20650000 }, { "epoch": 59.77, "learning_rate": 2.0123364556317158e-05, "loss": 1.9289, "step": 20650500 }, { "epoch": 59.78, "learning_rate": 2.012264090866988e-05, "loss": 1.9845, "step": 20651000 }, { "epoch": 59.78, "learning_rate": 2.0121917261022602e-05, "loss": 1.9596, "step": 20651500 }, { "epoch": 59.78, "learning_rate": 2.0121195060670618e-05, "loss": 1.9693, "step": 20652000 }, { "epoch": 59.78, "learning_rate": 2.0120471413023344e-05, "loss": 1.9552, "step": 20652500 }, { "epoch": 59.78, "learning_rate": 2.0119747765376066e-05, "loss": 1.9358, "step": 20653000 }, { "epoch": 59.78, "learning_rate": 2.0119024117728788e-05, "loss": 1.969, "step": 20653500 }, { "epoch": 59.78, "learning_rate": 2.0118300470081514e-05, "loss": 1.9546, "step": 20654000 }, { "epoch": 59.79, "learning_rate": 2.0117576822434236e-05, "loss": 1.9655, "step": 20654500 }, { "epoch": 59.79, "learning_rate": 2.0116853174786958e-05, "loss": 1.9562, "step": 20655000 }, { "epoch": 59.79, "learning_rate": 2.011613242173027e-05, "loss": 1.9439, "step": 20655500 }, { "epoch": 59.79, "learning_rate": 2.0115408774082996e-05, "loss": 1.9527, "step": 20656000 }, { "epoch": 59.79, "learning_rate": 2.011468512643572e-05, "loss": 1.9407, "step": 20656500 }, { "epoch": 59.79, "learning_rate": 2.011396147878844e-05, "loss": 1.9507, "step": 20657000 }, { "epoch": 59.8, "learning_rate": 2.0113237831141166e-05, "loss": 1.9919, "step": 20657500 }, { "epoch": 59.8, "learning_rate": 2.011251418349389e-05, "loss": 1.9433, "step": 20658000 }, { "epoch": 59.8, "learning_rate": 2.011179053584661e-05, "loss": 1.9494, "step": 20658500 }, { "epoch": 59.8, "learning_rate": 2.0111066888199333e-05, "loss": 1.9409, "step": 20659000 }, { "epoch": 59.8, "learning_rate": 2.0110344687847352e-05, "loss": 1.9681, "step": 20659500 }, { "epoch": 59.8, "learning_rate": 2.0109621040200074e-05, "loss": 1.961, "step": 20660000 }, { "epoch": 59.8, "learning_rate": 2.0108897392552796e-05, "loss": 1.9714, "step": 20660500 }, { "epoch": 59.81, "learning_rate": 2.010817374490552e-05, "loss": 1.9501, "step": 20661000 }, { "epoch": 59.81, "learning_rate": 2.0107450097258244e-05, "loss": 1.9594, "step": 20661500 }, { "epoch": 59.81, "learning_rate": 2.010672644961097e-05, "loss": 1.9507, "step": 20662000 }, { "epoch": 59.81, "learning_rate": 2.0106002801963692e-05, "loss": 1.9688, "step": 20662500 }, { "epoch": 59.81, "learning_rate": 2.0105279154316414e-05, "loss": 1.9384, "step": 20663000 }, { "epoch": 59.81, "learning_rate": 2.0104555506669136e-05, "loss": 1.9603, "step": 20663500 }, { "epoch": 59.81, "learning_rate": 2.010383185902186e-05, "loss": 1.9597, "step": 20664000 }, { "epoch": 59.82, "learning_rate": 2.0103108211374584e-05, "loss": 1.9579, "step": 20664500 }, { "epoch": 59.82, "learning_rate": 2.010238456372731e-05, "loss": 1.9348, "step": 20665000 }, { "epoch": 59.82, "learning_rate": 2.0101660916080032e-05, "loss": 1.9548, "step": 20665500 }, { "epoch": 59.82, "learning_rate": 2.0100940163023344e-05, "loss": 1.9396, "step": 20666000 }, { "epoch": 59.82, "learning_rate": 2.0100216515376067e-05, "loss": 1.9473, "step": 20666500 }, { "epoch": 59.82, "learning_rate": 2.009949286772879e-05, "loss": 1.9235, "step": 20667000 }, { "epoch": 59.82, "learning_rate": 2.009876922008151e-05, "loss": 1.9351, "step": 20667500 }, { "epoch": 59.83, "learning_rate": 2.0098045572434233e-05, "loss": 1.9532, "step": 20668000 }, { "epoch": 59.83, "learning_rate": 2.009732192478696e-05, "loss": 1.9355, "step": 20668500 }, { "epoch": 59.83, "learning_rate": 2.0096598277139685e-05, "loss": 1.9463, "step": 20669000 }, { "epoch": 59.83, "learning_rate": 2.0095874629492407e-05, "loss": 1.9415, "step": 20669500 }, { "epoch": 59.83, "learning_rate": 2.009515098184513e-05, "loss": 1.9559, "step": 20670000 }, { "epoch": 59.83, "learning_rate": 2.0094428781493148e-05, "loss": 1.9456, "step": 20670500 }, { "epoch": 59.83, "learning_rate": 2.009370513384587e-05, "loss": 1.9329, "step": 20671000 }, { "epoch": 59.84, "learning_rate": 2.0092981486198593e-05, "loss": 1.9582, "step": 20671500 }, { "epoch": 59.84, "learning_rate": 2.0092257838551315e-05, "loss": 1.9603, "step": 20672000 }, { "epoch": 59.84, "learning_rate": 2.009153419090404e-05, "loss": 1.961, "step": 20672500 }, { "epoch": 59.84, "learning_rate": 2.0090810543256763e-05, "loss": 1.9667, "step": 20673000 }, { "epoch": 59.84, "learning_rate": 2.0090086895609485e-05, "loss": 1.9401, "step": 20673500 }, { "epoch": 59.84, "learning_rate": 2.008936324796221e-05, "loss": 1.9495, "step": 20674000 }, { "epoch": 59.84, "learning_rate": 2.0088642494905523e-05, "loss": 1.9491, "step": 20674500 }, { "epoch": 59.85, "learning_rate": 2.0087918847258245e-05, "loss": 1.9539, "step": 20675000 }, { "epoch": 59.85, "learning_rate": 2.0087195199610967e-05, "loss": 1.9453, "step": 20675500 }, { "epoch": 59.85, "learning_rate": 2.0086471551963693e-05, "loss": 1.9535, "step": 20676000 }, { "epoch": 59.85, "learning_rate": 2.0085747904316415e-05, "loss": 1.9518, "step": 20676500 }, { "epoch": 59.85, "learning_rate": 2.0085024256669137e-05, "loss": 1.9448, "step": 20677000 }, { "epoch": 59.85, "learning_rate": 2.008430060902186e-05, "loss": 1.9464, "step": 20677500 }, { "epoch": 59.85, "learning_rate": 2.0083576961374585e-05, "loss": 1.928, "step": 20678000 }, { "epoch": 59.86, "learning_rate": 2.0082853313727307e-05, "loss": 1.9618, "step": 20678500 }, { "epoch": 59.86, "learning_rate": 2.008212966608003e-05, "loss": 1.9514, "step": 20679000 }, { "epoch": 59.86, "learning_rate": 2.0081406018432755e-05, "loss": 1.937, "step": 20679500 }, { "epoch": 59.86, "learning_rate": 2.0080682370785477e-05, "loss": 1.9655, "step": 20680000 }, { "epoch": 59.86, "learning_rate": 2.00799587231382e-05, "loss": 1.971, "step": 20680500 }, { "epoch": 59.86, "learning_rate": 2.0079235075490925e-05, "loss": 1.967, "step": 20681000 }, { "epoch": 59.86, "learning_rate": 2.0078511427843648e-05, "loss": 1.9432, "step": 20681500 }, { "epoch": 59.87, "learning_rate": 2.0077789227491663e-05, "loss": 1.9525, "step": 20682000 }, { "epoch": 59.87, "learning_rate": 2.0077065579844385e-05, "loss": 1.9527, "step": 20682500 }, { "epoch": 59.87, "learning_rate": 2.007634193219711e-05, "loss": 1.9439, "step": 20683000 }, { "epoch": 59.87, "learning_rate": 2.0075618284549837e-05, "loss": 1.9528, "step": 20683500 }, { "epoch": 59.87, "learning_rate": 2.007489463690256e-05, "loss": 1.954, "step": 20684000 }, { "epoch": 59.87, "learning_rate": 2.007417388384587e-05, "loss": 1.9453, "step": 20684500 }, { "epoch": 59.87, "learning_rate": 2.0073450236198593e-05, "loss": 1.9615, "step": 20685000 }, { "epoch": 59.88, "learning_rate": 2.0072726588551316e-05, "loss": 1.9513, "step": 20685500 }, { "epoch": 59.88, "learning_rate": 2.0072002940904038e-05, "loss": 1.9454, "step": 20686000 }, { "epoch": 59.88, "learning_rate": 2.0071279293256763e-05, "loss": 1.9666, "step": 20686500 }, { "epoch": 59.88, "learning_rate": 2.0070555645609486e-05, "loss": 1.9807, "step": 20687000 }, { "epoch": 59.88, "learning_rate": 2.006983199796221e-05, "loss": 1.9623, "step": 20687500 }, { "epoch": 59.88, "learning_rate": 2.0069108350314933e-05, "loss": 1.9615, "step": 20688000 }, { "epoch": 59.88, "learning_rate": 2.0068384702667656e-05, "loss": 1.943, "step": 20688500 }, { "epoch": 59.89, "learning_rate": 2.0067662502315675e-05, "loss": 1.9625, "step": 20689000 }, { "epoch": 59.89, "learning_rate": 2.0066938854668397e-05, "loss": 1.9714, "step": 20689500 }, { "epoch": 59.89, "learning_rate": 2.0066216654316412e-05, "loss": 1.9529, "step": 20690000 }, { "epoch": 59.89, "learning_rate": 2.0065493006669138e-05, "loss": 1.97, "step": 20690500 }, { "epoch": 59.89, "learning_rate": 2.0064769359021864e-05, "loss": 1.9474, "step": 20691000 }, { "epoch": 59.89, "learning_rate": 2.0064045711374586e-05, "loss": 1.9559, "step": 20691500 }, { "epoch": 59.89, "learning_rate": 2.00633235110226e-05, "loss": 1.9475, "step": 20692000 }, { "epoch": 59.9, "learning_rate": 2.0062599863375324e-05, "loss": 1.945, "step": 20692500 }, { "epoch": 59.9, "learning_rate": 2.006187621572805e-05, "loss": 1.9201, "step": 20693000 }, { "epoch": 59.9, "learning_rate": 2.006115256808077e-05, "loss": 1.9639, "step": 20693500 }, { "epoch": 59.9, "learning_rate": 2.0060428920433494e-05, "loss": 1.9436, "step": 20694000 }, { "epoch": 59.9, "learning_rate": 2.0059705272786216e-05, "loss": 1.9433, "step": 20694500 }, { "epoch": 59.9, "learning_rate": 2.005898162513894e-05, "loss": 1.9626, "step": 20695000 }, { "epoch": 59.9, "learning_rate": 2.0058257977491664e-05, "loss": 1.9389, "step": 20695500 }, { "epoch": 59.91, "learning_rate": 2.0057537224434976e-05, "loss": 1.9289, "step": 20696000 }, { "epoch": 59.91, "learning_rate": 2.0056813576787702e-05, "loss": 1.9339, "step": 20696500 }, { "epoch": 59.91, "learning_rate": 2.0056089929140424e-05, "loss": 1.9442, "step": 20697000 }, { "epoch": 59.91, "learning_rate": 2.005536772878844e-05, "loss": 1.9571, "step": 20697500 }, { "epoch": 59.91, "learning_rate": 2.0054644081141162e-05, "loss": 1.9665, "step": 20698000 }, { "epoch": 59.91, "learning_rate": 2.005392188078918e-05, "loss": 1.9718, "step": 20698500 }, { "epoch": 59.92, "learning_rate": 2.0053198233141906e-05, "loss": 1.9536, "step": 20699000 }, { "epoch": 59.92, "learning_rate": 2.005247458549463e-05, "loss": 1.9543, "step": 20699500 }, { "epoch": 59.92, "learning_rate": 2.005175093784735e-05, "loss": 1.9296, "step": 20700000 }, { "epoch": 59.92, "learning_rate": 2.0051027290200076e-05, "loss": 1.9748, "step": 20700500 }, { "epoch": 59.92, "learning_rate": 2.00503036425528e-05, "loss": 1.9721, "step": 20701000 }, { "epoch": 59.92, "learning_rate": 2.004957999490552e-05, "loss": 1.9636, "step": 20701500 }, { "epoch": 59.92, "learning_rate": 2.0048856347258243e-05, "loss": 1.96, "step": 20702000 }, { "epoch": 59.93, "learning_rate": 2.004813269961097e-05, "loss": 1.9533, "step": 20702500 }, { "epoch": 59.93, "learning_rate": 2.004740905196369e-05, "loss": 1.9652, "step": 20703000 }, { "epoch": 59.93, "learning_rate": 2.0046685404316413e-05, "loss": 1.9618, "step": 20703500 }, { "epoch": 59.93, "learning_rate": 2.004596175666914e-05, "loss": 1.9753, "step": 20704000 }, { "epoch": 59.93, "learning_rate": 2.0045239556317154e-05, "loss": 1.9354, "step": 20704500 }, { "epoch": 59.93, "learning_rate": 2.0044515908669877e-05, "loss": 1.9505, "step": 20705000 }, { "epoch": 59.93, "learning_rate": 2.0043792261022602e-05, "loss": 1.941, "step": 20705500 }, { "epoch": 59.94, "learning_rate": 2.0043068613375328e-05, "loss": 1.9578, "step": 20706000 }, { "epoch": 59.94, "learning_rate": 2.0042346413023343e-05, "loss": 1.9441, "step": 20706500 }, { "epoch": 59.94, "learning_rate": 2.0041622765376066e-05, "loss": 1.943, "step": 20707000 }, { "epoch": 59.94, "learning_rate": 2.004089911772879e-05, "loss": 1.9537, "step": 20707500 }, { "epoch": 59.94, "learning_rate": 2.0040175470081514e-05, "loss": 1.9686, "step": 20708000 }, { "epoch": 59.94, "learning_rate": 2.0039451822434236e-05, "loss": 1.9394, "step": 20708500 }, { "epoch": 59.94, "learning_rate": 2.0038728174786958e-05, "loss": 1.9649, "step": 20709000 }, { "epoch": 59.95, "learning_rate": 2.003800452713968e-05, "loss": 1.9465, "step": 20709500 }, { "epoch": 59.95, "learning_rate": 2.0037280879492406e-05, "loss": 1.9609, "step": 20710000 }, { "epoch": 59.95, "learning_rate": 2.0036557231845128e-05, "loss": 1.9933, "step": 20710500 }, { "epoch": 59.95, "learning_rate": 2.0035835031493147e-05, "loss": 1.9574, "step": 20711000 }, { "epoch": 59.95, "learning_rate": 2.003511138384587e-05, "loss": 1.9435, "step": 20711500 }, { "epoch": 59.95, "learning_rate": 2.003438773619859e-05, "loss": 1.9296, "step": 20712000 }, { "epoch": 59.95, "learning_rate": 2.0033664088551317e-05, "loss": 1.9225, "step": 20712500 }, { "epoch": 59.96, "learning_rate": 2.003294044090404e-05, "loss": 1.9645, "step": 20713000 }, { "epoch": 59.96, "learning_rate": 2.0032216793256765e-05, "loss": 1.9424, "step": 20713500 }, { "epoch": 59.96, "learning_rate": 2.0031493145609487e-05, "loss": 1.9376, "step": 20714000 }, { "epoch": 59.96, "learning_rate": 2.0030770945257503e-05, "loss": 1.9374, "step": 20714500 }, { "epoch": 59.96, "learning_rate": 2.003004729761023e-05, "loss": 1.946, "step": 20715000 }, { "epoch": 59.96, "learning_rate": 2.002932364996295e-05, "loss": 1.9781, "step": 20715500 }, { "epoch": 59.96, "learning_rate": 2.0028600002315673e-05, "loss": 1.9449, "step": 20716000 }, { "epoch": 59.97, "learning_rate": 2.0027876354668395e-05, "loss": 1.9667, "step": 20716500 }, { "epoch": 59.97, "learning_rate": 2.002715270702112e-05, "loss": 1.9635, "step": 20717000 }, { "epoch": 59.97, "learning_rate": 2.0026429059373843e-05, "loss": 1.9665, "step": 20717500 }, { "epoch": 59.97, "learning_rate": 2.002570541172657e-05, "loss": 1.9566, "step": 20718000 }, { "epoch": 59.97, "learning_rate": 2.002498176407929e-05, "loss": 1.972, "step": 20718500 }, { "epoch": 59.97, "learning_rate": 2.0024258116432013e-05, "loss": 1.9879, "step": 20719000 }, { "epoch": 59.97, "learning_rate": 2.0023534468784735e-05, "loss": 1.9781, "step": 20719500 }, { "epoch": 59.98, "learning_rate": 2.0022812268432754e-05, "loss": 1.9594, "step": 20720000 }, { "epoch": 59.98, "learning_rate": 2.0022088620785476e-05, "loss": 1.9592, "step": 20720500 }, { "epoch": 59.98, "learning_rate": 2.0021366420433495e-05, "loss": 1.9537, "step": 20721000 }, { "epoch": 59.98, "learning_rate": 2.0020642772786218e-05, "loss": 1.9344, "step": 20721500 }, { "epoch": 59.98, "learning_rate": 2.0019919125138943e-05, "loss": 1.9856, "step": 20722000 }, { "epoch": 59.98, "learning_rate": 2.0019195477491665e-05, "loss": 1.9469, "step": 20722500 }, { "epoch": 59.98, "learning_rate": 2.0018471829844388e-05, "loss": 1.9389, "step": 20723000 }, { "epoch": 59.99, "learning_rate": 2.0017749629492403e-05, "loss": 1.9476, "step": 20723500 }, { "epoch": 59.99, "learning_rate": 2.001702598184513e-05, "loss": 1.971, "step": 20724000 }, { "epoch": 59.99, "learning_rate": 2.0016302334197855e-05, "loss": 1.9543, "step": 20724500 }, { "epoch": 59.99, "learning_rate": 2.0015578686550577e-05, "loss": 1.9651, "step": 20725000 }, { "epoch": 59.99, "learning_rate": 2.00148550389033e-05, "loss": 1.9549, "step": 20725500 }, { "epoch": 59.99, "learning_rate": 2.001413139125602e-05, "loss": 1.9458, "step": 20726000 }, { "epoch": 59.99, "learning_rate": 2.0013407743608743e-05, "loss": 1.9514, "step": 20726500 }, { "epoch": 60.0, "learning_rate": 2.001268409596147e-05, "loss": 1.9333, "step": 20727000 }, { "epoch": 60.0, "learning_rate": 2.001196044831419e-05, "loss": 1.9379, "step": 20727500 }, { "epoch": 60.0, "learning_rate": 2.0011236800666917e-05, "loss": 1.9627, "step": 20728000 }, { "epoch": 60.0, "eval_accuracy": 0.6774629093801515, "eval_accuracy_mlm": 0.6439839317329467, "eval_accuracy_nsp": 0.8569477480945769, "eval_loss": 2.1707332134246826, "eval_runtime": 331.3276, "eval_samples_per_second": 1317.083, "eval_steps_per_second": 54.879, "step": 20728320 }, { "epoch": 60.0, "learning_rate": 2.001051315301964e-05, "loss": 1.9291, "step": 20728500 }, { "epoch": 60.0, "learning_rate": 2.000978950537236e-05, "loss": 1.923, "step": 20729000 }, { "epoch": 60.0, "learning_rate": 2.0009065857725084e-05, "loss": 1.9593, "step": 20729500 }, { "epoch": 60.0, "learning_rate": 2.0008342210077806e-05, "loss": 1.9158, "step": 20730000 }, { "epoch": 60.01, "learning_rate": 2.0007620009725825e-05, "loss": 1.9442, "step": 20730500 }, { "epoch": 60.01, "learning_rate": 2.0006896362078547e-05, "loss": 1.935, "step": 20731000 }, { "epoch": 60.01, "learning_rate": 2.000617271443127e-05, "loss": 1.9145, "step": 20731500 }, { "epoch": 60.01, "learning_rate": 2.000545051407929e-05, "loss": 1.9338, "step": 20732000 }, { "epoch": 60.01, "learning_rate": 2.0004726866432014e-05, "loss": 1.9189, "step": 20732500 }, { "epoch": 60.01, "learning_rate": 2.0004003218784736e-05, "loss": 1.9321, "step": 20733000 }, { "epoch": 60.01, "learning_rate": 2.0003281018432755e-05, "loss": 1.9122, "step": 20733500 }, { "epoch": 60.02, "learning_rate": 2.0002557370785477e-05, "loss": 1.9415, "step": 20734000 }, { "epoch": 60.02, "learning_rate": 2.00018337231382e-05, "loss": 1.9539, "step": 20734500 }, { "epoch": 60.02, "learning_rate": 2.0001110075490922e-05, "loss": 1.9012, "step": 20735000 }, { "epoch": 60.02, "learning_rate": 2.0000386427843647e-05, "loss": 1.9704, "step": 20735500 }, { "epoch": 60.02, "learning_rate": 1.999966278019637e-05, "loss": 1.9499, "step": 20736000 }, { "epoch": 60.02, "learning_rate": 1.9998939132549095e-05, "loss": 1.9325, "step": 20736500 }, { "epoch": 60.03, "learning_rate": 1.9998215484901817e-05, "loss": 1.9302, "step": 20737000 }, { "epoch": 60.03, "learning_rate": 1.999749183725454e-05, "loss": 1.9238, "step": 20737500 }, { "epoch": 60.03, "learning_rate": 1.9996768189607262e-05, "loss": 1.9396, "step": 20738000 }, { "epoch": 60.03, "learning_rate": 1.9996044541959984e-05, "loss": 1.9691, "step": 20738500 }, { "epoch": 60.03, "learning_rate": 1.999532089431271e-05, "loss": 1.9395, "step": 20739000 }, { "epoch": 60.03, "learning_rate": 1.9994597246665432e-05, "loss": 1.9281, "step": 20739500 }, { "epoch": 60.03, "learning_rate": 1.9993873599018158e-05, "loss": 1.9449, "step": 20740000 }, { "epoch": 60.04, "learning_rate": 1.9993151398666173e-05, "loss": 1.9386, "step": 20740500 }, { "epoch": 60.04, "learning_rate": 1.9992429198314192e-05, "loss": 1.9225, "step": 20741000 }, { "epoch": 60.04, "learning_rate": 1.9991705550666914e-05, "loss": 1.959, "step": 20741500 }, { "epoch": 60.04, "learning_rate": 1.9990981903019637e-05, "loss": 1.9471, "step": 20742000 }, { "epoch": 60.04, "learning_rate": 1.999025825537236e-05, "loss": 1.9377, "step": 20742500 }, { "epoch": 60.04, "learning_rate": 1.9989536055020378e-05, "loss": 1.9215, "step": 20743000 }, { "epoch": 60.04, "learning_rate": 1.9988812407373103e-05, "loss": 1.9298, "step": 20743500 }, { "epoch": 60.05, "learning_rate": 1.9988090207021122e-05, "loss": 1.9344, "step": 20744000 }, { "epoch": 60.05, "learning_rate": 1.9987366559373845e-05, "loss": 1.9229, "step": 20744500 }, { "epoch": 60.05, "learning_rate": 1.9986642911726567e-05, "loss": 1.9634, "step": 20745000 }, { "epoch": 60.05, "learning_rate": 1.998591926407929e-05, "loss": 1.9367, "step": 20745500 }, { "epoch": 60.05, "learning_rate": 1.9985197063727308e-05, "loss": 1.9334, "step": 20746000 }, { "epoch": 60.05, "learning_rate": 1.998447341608003e-05, "loss": 1.9636, "step": 20746500 }, { "epoch": 60.05, "learning_rate": 1.9983749768432756e-05, "loss": 1.9253, "step": 20747000 }, { "epoch": 60.06, "learning_rate": 1.9983026120785478e-05, "loss": 1.9501, "step": 20747500 }, { "epoch": 60.06, "learning_rate": 1.99823024731382e-05, "loss": 1.9386, "step": 20748000 }, { "epoch": 60.06, "learning_rate": 1.9981578825490923e-05, "loss": 1.9489, "step": 20748500 }, { "epoch": 60.06, "learning_rate": 1.9980855177843648e-05, "loss": 1.9336, "step": 20749000 }, { "epoch": 60.06, "learning_rate": 1.9980132977491664e-05, "loss": 1.9203, "step": 20749500 }, { "epoch": 60.06, "learning_rate": 1.9979410777139683e-05, "loss": 1.9305, "step": 20750000 }, { "epoch": 60.06, "learning_rate": 1.9978687129492405e-05, "loss": 1.9325, "step": 20750500 }, { "epoch": 60.07, "learning_rate": 1.997796348184513e-05, "loss": 1.9719, "step": 20751000 }, { "epoch": 60.07, "learning_rate": 1.9977239834197853e-05, "loss": 1.9329, "step": 20751500 }, { "epoch": 60.07, "learning_rate": 1.997651763384587e-05, "loss": 1.9432, "step": 20752000 }, { "epoch": 60.07, "learning_rate": 1.9975793986198594e-05, "loss": 1.9371, "step": 20752500 }, { "epoch": 60.07, "learning_rate": 1.9975070338551316e-05, "loss": 1.9303, "step": 20753000 }, { "epoch": 60.07, "learning_rate": 1.997434669090404e-05, "loss": 1.9218, "step": 20753500 }, { "epoch": 60.07, "learning_rate": 1.997362304325676e-05, "loss": 1.9157, "step": 20754000 }, { "epoch": 60.08, "learning_rate": 1.9972899395609486e-05, "loss": 1.9439, "step": 20754500 }, { "epoch": 60.08, "learning_rate": 1.9972175747962212e-05, "loss": 1.9372, "step": 20755000 }, { "epoch": 60.08, "learning_rate": 1.9971452100314934e-05, "loss": 1.9128, "step": 20755500 }, { "epoch": 60.08, "learning_rate": 1.9970728452667656e-05, "loss": 1.923, "step": 20756000 }, { "epoch": 60.08, "learning_rate": 1.9970006252315672e-05, "loss": 1.9433, "step": 20756500 }, { "epoch": 60.08, "learning_rate": 1.9969282604668397e-05, "loss": 1.9363, "step": 20757000 }, { "epoch": 60.08, "learning_rate": 1.996855895702112e-05, "loss": 1.9323, "step": 20757500 }, { "epoch": 60.09, "learning_rate": 1.9967835309373842e-05, "loss": 1.9208, "step": 20758000 }, { "epoch": 60.09, "learning_rate": 1.9967111661726568e-05, "loss": 1.9068, "step": 20758500 }, { "epoch": 60.09, "learning_rate": 1.996638801407929e-05, "loss": 1.9254, "step": 20759000 }, { "epoch": 60.09, "learning_rate": 1.9965664366432012e-05, "loss": 1.9391, "step": 20759500 }, { "epoch": 60.09, "learning_rate": 1.9964940718784734e-05, "loss": 1.9477, "step": 20760000 }, { "epoch": 60.09, "learning_rate": 1.9964218518432753e-05, "loss": 1.9535, "step": 20760500 }, { "epoch": 60.09, "learning_rate": 1.9963494870785475e-05, "loss": 1.9414, "step": 20761000 }, { "epoch": 60.1, "learning_rate": 1.9962771223138198e-05, "loss": 1.9518, "step": 20761500 }, { "epoch": 60.1, "learning_rate": 1.9962047575490923e-05, "loss": 1.9409, "step": 20762000 }, { "epoch": 60.1, "learning_rate": 1.996132392784365e-05, "loss": 1.9356, "step": 20762500 }, { "epoch": 60.1, "learning_rate": 1.996060028019637e-05, "loss": 1.9307, "step": 20763000 }, { "epoch": 60.1, "learning_rate": 1.9959876632549093e-05, "loss": 1.9453, "step": 20763500 }, { "epoch": 60.1, "learning_rate": 1.9959152984901816e-05, "loss": 1.9453, "step": 20764000 }, { "epoch": 60.1, "learning_rate": 1.9958429337254538e-05, "loss": 1.9511, "step": 20764500 }, { "epoch": 60.11, "learning_rate": 1.9957707136902557e-05, "loss": 1.9389, "step": 20765000 }, { "epoch": 60.11, "learning_rate": 1.9956983489255282e-05, "loss": 1.9269, "step": 20765500 }, { "epoch": 60.11, "learning_rate": 1.9956259841608005e-05, "loss": 1.9368, "step": 20766000 }, { "epoch": 60.11, "learning_rate": 1.9955536193960727e-05, "loss": 1.911, "step": 20766500 }, { "epoch": 60.11, "learning_rate": 1.995481254631345e-05, "loss": 1.9436, "step": 20767000 }, { "epoch": 60.11, "learning_rate": 1.9954088898666175e-05, "loss": 1.9165, "step": 20767500 }, { "epoch": 60.11, "learning_rate": 1.9953365251018897e-05, "loss": 1.9563, "step": 20768000 }, { "epoch": 60.12, "learning_rate": 1.995264160337162e-05, "loss": 1.9342, "step": 20768500 }, { "epoch": 60.12, "learning_rate": 1.9951917955724345e-05, "loss": 1.9343, "step": 20769000 }, { "epoch": 60.12, "learning_rate": 1.9951194308077067e-05, "loss": 1.9439, "step": 20769500 }, { "epoch": 60.12, "learning_rate": 1.995047066042979e-05, "loss": 1.9315, "step": 20770000 }, { "epoch": 60.12, "learning_rate": 1.994974701278251e-05, "loss": 1.9068, "step": 20770500 }, { "epoch": 60.12, "learning_rate": 1.994902481243053e-05, "loss": 1.936, "step": 20771000 }, { "epoch": 60.12, "learning_rate": 1.994830261207855e-05, "loss": 1.9516, "step": 20771500 }, { "epoch": 60.13, "learning_rate": 1.9947580411726565e-05, "loss": 1.9429, "step": 20772000 }, { "epoch": 60.13, "learning_rate": 1.9946856764079287e-05, "loss": 1.9567, "step": 20772500 }, { "epoch": 60.13, "learning_rate": 1.9946133116432013e-05, "loss": 1.9312, "step": 20773000 }, { "epoch": 60.13, "learning_rate": 1.994540946878474e-05, "loss": 1.9541, "step": 20773500 }, { "epoch": 60.13, "learning_rate": 1.994468582113746e-05, "loss": 1.9295, "step": 20774000 }, { "epoch": 60.13, "learning_rate": 1.9943963620785476e-05, "loss": 1.9416, "step": 20774500 }, { "epoch": 60.14, "learning_rate": 1.9943239973138202e-05, "loss": 1.9196, "step": 20775000 }, { "epoch": 60.14, "learning_rate": 1.9942517772786217e-05, "loss": 1.9131, "step": 20775500 }, { "epoch": 60.14, "learning_rate": 1.994179412513894e-05, "loss": 1.9224, "step": 20776000 }, { "epoch": 60.14, "learning_rate": 1.9941070477491662e-05, "loss": 1.9492, "step": 20776500 }, { "epoch": 60.14, "learning_rate": 1.9940346829844388e-05, "loss": 1.9277, "step": 20777000 }, { "epoch": 60.14, "learning_rate": 1.9939623182197113e-05, "loss": 1.9296, "step": 20777500 }, { "epoch": 60.14, "learning_rate": 1.9938899534549835e-05, "loss": 1.9468, "step": 20778000 }, { "epoch": 60.15, "learning_rate": 1.993817733419785e-05, "loss": 1.9451, "step": 20778500 }, { "epoch": 60.15, "learning_rate": 1.9937453686550577e-05, "loss": 1.9661, "step": 20779000 }, { "epoch": 60.15, "learning_rate": 1.99367300389033e-05, "loss": 1.9328, "step": 20779500 }, { "epoch": 60.15, "learning_rate": 1.993600639125602e-05, "loss": 1.9516, "step": 20780000 }, { "epoch": 60.15, "learning_rate": 1.9935282743608743e-05, "loss": 1.9217, "step": 20780500 }, { "epoch": 60.15, "learning_rate": 1.993455909596147e-05, "loss": 1.9373, "step": 20781000 }, { "epoch": 60.15, "learning_rate": 1.993383544831419e-05, "loss": 1.9334, "step": 20781500 }, { "epoch": 60.16, "learning_rate": 1.9933111800666913e-05, "loss": 1.9194, "step": 20782000 }, { "epoch": 60.16, "learning_rate": 1.993238815301964e-05, "loss": 1.9364, "step": 20782500 }, { "epoch": 60.16, "learning_rate": 1.9931665952667655e-05, "loss": 1.9478, "step": 20783000 }, { "epoch": 60.16, "learning_rate": 1.9930942305020377e-05, "loss": 1.9413, "step": 20783500 }, { "epoch": 60.16, "learning_rate": 1.9930218657373102e-05, "loss": 1.9332, "step": 20784000 }, { "epoch": 60.16, "learning_rate": 1.9929495009725828e-05, "loss": 1.9471, "step": 20784500 }, { "epoch": 60.16, "learning_rate": 1.9928772809373844e-05, "loss": 1.9273, "step": 20785000 }, { "epoch": 60.17, "learning_rate": 1.9928049161726566e-05, "loss": 1.9429, "step": 20785500 }, { "epoch": 60.17, "learning_rate": 1.992732551407929e-05, "loss": 1.9279, "step": 20786000 }, { "epoch": 60.17, "learning_rate": 1.9926601866432014e-05, "loss": 1.9395, "step": 20786500 }, { "epoch": 60.17, "learning_rate": 1.992587966608003e-05, "loss": 1.96, "step": 20787000 }, { "epoch": 60.17, "learning_rate": 1.992515601843275e-05, "loss": 1.921, "step": 20787500 }, { "epoch": 60.17, "learning_rate": 1.9924432370785477e-05, "loss": 1.9333, "step": 20788000 }, { "epoch": 60.17, "learning_rate": 1.9923708723138203e-05, "loss": 1.9566, "step": 20788500 }, { "epoch": 60.18, "learning_rate": 1.9922985075490925e-05, "loss": 1.9487, "step": 20789000 }, { "epoch": 60.18, "learning_rate": 1.9922261427843647e-05, "loss": 1.9105, "step": 20789500 }, { "epoch": 60.18, "learning_rate": 1.992153778019637e-05, "loss": 1.9311, "step": 20790000 }, { "epoch": 60.18, "learning_rate": 1.992081413254909e-05, "loss": 1.9437, "step": 20790500 }, { "epoch": 60.18, "learning_rate": 1.9920090484901814e-05, "loss": 1.9535, "step": 20791000 }, { "epoch": 60.18, "learning_rate": 1.9919368284549833e-05, "loss": 1.9431, "step": 20791500 }, { "epoch": 60.18, "learning_rate": 1.991864463690256e-05, "loss": 1.9289, "step": 20792000 }, { "epoch": 60.19, "learning_rate": 1.991792098925528e-05, "loss": 1.9458, "step": 20792500 }, { "epoch": 60.19, "learning_rate": 1.9917197341608003e-05, "loss": 1.9518, "step": 20793000 }, { "epoch": 60.19, "learning_rate": 1.991647369396073e-05, "loss": 1.9469, "step": 20793500 }, { "epoch": 60.19, "learning_rate": 1.9915751493608744e-05, "loss": 1.9622, "step": 20794000 }, { "epoch": 60.19, "learning_rate": 1.9915029293256763e-05, "loss": 1.95, "step": 20794500 }, { "epoch": 60.19, "learning_rate": 1.9914305645609485e-05, "loss": 1.9406, "step": 20795000 }, { "epoch": 60.19, "learning_rate": 1.9913581997962207e-05, "loss": 1.948, "step": 20795500 }, { "epoch": 60.2, "learning_rate": 1.9912858350314933e-05, "loss": 1.9476, "step": 20796000 }, { "epoch": 60.2, "learning_rate": 1.9912134702667655e-05, "loss": 1.9292, "step": 20796500 }, { "epoch": 60.2, "learning_rate": 1.9911412502315674e-05, "loss": 1.9404, "step": 20797000 }, { "epoch": 60.2, "learning_rate": 1.9910688854668396e-05, "loss": 1.9463, "step": 20797500 }, { "epoch": 60.2, "learning_rate": 1.990996520702112e-05, "loss": 1.9609, "step": 20798000 }, { "epoch": 60.2, "learning_rate": 1.990924155937384e-05, "loss": 1.9483, "step": 20798500 }, { "epoch": 60.2, "learning_rate": 1.9908517911726567e-05, "loss": 1.9544, "step": 20799000 }, { "epoch": 60.21, "learning_rate": 1.9907795711374582e-05, "loss": 1.9461, "step": 20799500 }, { "epoch": 60.21, "learning_rate": 1.9907072063727308e-05, "loss": 1.9255, "step": 20800000 }, { "epoch": 60.21, "learning_rate": 1.990634841608003e-05, "loss": 1.9503, "step": 20800500 }, { "epoch": 60.21, "learning_rate": 1.9905624768432756e-05, "loss": 1.9538, "step": 20801000 }, { "epoch": 60.21, "learning_rate": 1.9904901120785478e-05, "loss": 1.9568, "step": 20801500 }, { "epoch": 60.21, "learning_rate": 1.99041774731382e-05, "loss": 1.9412, "step": 20802000 }, { "epoch": 60.21, "learning_rate": 1.9903453825490922e-05, "loss": 1.9341, "step": 20802500 }, { "epoch": 60.22, "learning_rate": 1.9902730177843648e-05, "loss": 1.9402, "step": 20803000 }, { "epoch": 60.22, "learning_rate": 1.990200653019637e-05, "loss": 1.9042, "step": 20803500 }, { "epoch": 60.22, "learning_rate": 1.9901282882549092e-05, "loss": 1.9244, "step": 20804000 }, { "epoch": 60.22, "learning_rate": 1.9900559234901818e-05, "loss": 1.9403, "step": 20804500 }, { "epoch": 60.22, "learning_rate": 1.989983558725454e-05, "loss": 1.955, "step": 20805000 }, { "epoch": 60.22, "learning_rate": 1.9899111939607262e-05, "loss": 1.937, "step": 20805500 }, { "epoch": 60.22, "learning_rate": 1.9898391186550575e-05, "loss": 1.9345, "step": 20806000 }, { "epoch": 60.23, "learning_rate": 1.9897667538903297e-05, "loss": 1.9327, "step": 20806500 }, { "epoch": 60.23, "learning_rate": 1.9896943891256023e-05, "loss": 1.9523, "step": 20807000 }, { "epoch": 60.23, "learning_rate": 1.9896220243608745e-05, "loss": 1.9445, "step": 20807500 }, { "epoch": 60.23, "learning_rate": 1.9895498043256764e-05, "loss": 1.937, "step": 20808000 }, { "epoch": 60.23, "learning_rate": 1.9894774395609486e-05, "loss": 1.9445, "step": 20808500 }, { "epoch": 60.23, "learning_rate": 1.9894050747962208e-05, "loss": 1.9411, "step": 20809000 }, { "epoch": 60.23, "learning_rate": 1.989332710031493e-05, "loss": 1.9035, "step": 20809500 }, { "epoch": 60.24, "learning_rate": 1.989260489996295e-05, "loss": 1.9654, "step": 20810000 }, { "epoch": 60.24, "learning_rate": 1.989188125231567e-05, "loss": 1.9658, "step": 20810500 }, { "epoch": 60.24, "learning_rate": 1.9891157604668397e-05, "loss": 1.9595, "step": 20811000 }, { "epoch": 60.24, "learning_rate": 1.989043395702112e-05, "loss": 1.948, "step": 20811500 }, { "epoch": 60.24, "learning_rate": 1.9889710309373845e-05, "loss": 1.9181, "step": 20812000 }, { "epoch": 60.24, "learning_rate": 1.988898810902186e-05, "loss": 1.9531, "step": 20812500 }, { "epoch": 60.25, "learning_rate": 1.9888264461374583e-05, "loss": 1.9457, "step": 20813000 }, { "epoch": 60.25, "learning_rate": 1.9887540813727305e-05, "loss": 1.9168, "step": 20813500 }, { "epoch": 60.25, "learning_rate": 1.988681716608003e-05, "loss": 1.9439, "step": 20814000 }, { "epoch": 60.25, "learning_rate": 1.9886093518432756e-05, "loss": 1.9635, "step": 20814500 }, { "epoch": 60.25, "learning_rate": 1.988536987078548e-05, "loss": 1.948, "step": 20815000 }, { "epoch": 60.25, "learning_rate": 1.9884647670433494e-05, "loss": 1.9486, "step": 20815500 }, { "epoch": 60.25, "learning_rate": 1.988392402278622e-05, "loss": 1.9206, "step": 20816000 }, { "epoch": 60.26, "learning_rate": 1.9883200375138942e-05, "loss": 1.9245, "step": 20816500 }, { "epoch": 60.26, "learning_rate": 1.9882476727491664e-05, "loss": 1.9484, "step": 20817000 }, { "epoch": 60.26, "learning_rate": 1.9881753079844387e-05, "loss": 1.9653, "step": 20817500 }, { "epoch": 60.26, "learning_rate": 1.9881030879492405e-05, "loss": 1.9453, "step": 20818000 }, { "epoch": 60.26, "learning_rate": 1.988030723184513e-05, "loss": 1.9417, "step": 20818500 }, { "epoch": 60.26, "learning_rate": 1.9879583584197853e-05, "loss": 1.9367, "step": 20819000 }, { "epoch": 60.26, "learning_rate": 1.9878859936550576e-05, "loss": 1.9439, "step": 20819500 }, { "epoch": 60.27, "learning_rate": 1.9878136288903298e-05, "loss": 1.9103, "step": 20820000 }, { "epoch": 60.27, "learning_rate": 1.987741264125602e-05, "loss": 1.9353, "step": 20820500 }, { "epoch": 60.27, "learning_rate": 1.9876688993608746e-05, "loss": 1.938, "step": 20821000 }, { "epoch": 60.27, "learning_rate": 1.9875965345961468e-05, "loss": 1.9159, "step": 20821500 }, { "epoch": 60.27, "learning_rate": 1.9875241698314193e-05, "loss": 1.9231, "step": 20822000 }, { "epoch": 60.27, "learning_rate": 1.9874518050666916e-05, "loss": 1.9502, "step": 20822500 }, { "epoch": 60.27, "learning_rate": 1.9873794403019638e-05, "loss": 1.9721, "step": 20823000 }, { "epoch": 60.28, "learning_rate": 1.987307075537236e-05, "loss": 1.9332, "step": 20823500 }, { "epoch": 60.28, "learning_rate": 1.9872347107725082e-05, "loss": 1.951, "step": 20824000 }, { "epoch": 60.28, "learning_rate": 1.9871626354668395e-05, "loss": 1.9238, "step": 20824500 }, { "epoch": 60.28, "learning_rate": 1.987090270702112e-05, "loss": 1.9173, "step": 20825000 }, { "epoch": 60.28, "learning_rate": 1.9870179059373843e-05, "loss": 1.9239, "step": 20825500 }, { "epoch": 60.28, "learning_rate": 1.9869455411726568e-05, "loss": 1.9566, "step": 20826000 }, { "epoch": 60.28, "learning_rate": 1.986873176407929e-05, "loss": 1.9247, "step": 20826500 }, { "epoch": 60.29, "learning_rate": 1.986800956372731e-05, "loss": 1.9488, "step": 20827000 }, { "epoch": 60.29, "learning_rate": 1.986728591608003e-05, "loss": 1.9297, "step": 20827500 }, { "epoch": 60.29, "learning_rate": 1.9866562268432754e-05, "loss": 1.9335, "step": 20828000 }, { "epoch": 60.29, "learning_rate": 1.986584006808077e-05, "loss": 1.9385, "step": 20828500 }, { "epoch": 60.29, "learning_rate": 1.9865116420433495e-05, "loss": 1.948, "step": 20829000 }, { "epoch": 60.29, "learning_rate": 1.986439422008151e-05, "loss": 1.9549, "step": 20829500 }, { "epoch": 60.29, "learning_rate": 1.9863670572434236e-05, "loss": 1.9388, "step": 20830000 }, { "epoch": 60.3, "learning_rate": 1.986294692478696e-05, "loss": 1.9437, "step": 20830500 }, { "epoch": 60.3, "learning_rate": 1.9862223277139684e-05, "loss": 1.9401, "step": 20831000 }, { "epoch": 60.3, "learning_rate": 1.9861499629492406e-05, "loss": 1.9466, "step": 20831500 }, { "epoch": 60.3, "learning_rate": 1.986077598184513e-05, "loss": 1.9628, "step": 20832000 }, { "epoch": 60.3, "learning_rate": 1.986005233419785e-05, "loss": 1.9266, "step": 20832500 }, { "epoch": 60.3, "learning_rate": 1.9859328686550573e-05, "loss": 1.9262, "step": 20833000 }, { "epoch": 60.3, "learning_rate": 1.98586050389033e-05, "loss": 1.9448, "step": 20833500 }, { "epoch": 60.31, "learning_rate": 1.985788139125602e-05, "loss": 1.9378, "step": 20834000 }, { "epoch": 60.31, "learning_rate": 1.9857157743608746e-05, "loss": 1.9424, "step": 20834500 }, { "epoch": 60.31, "learning_rate": 1.9856435543256762e-05, "loss": 1.9324, "step": 20835000 }, { "epoch": 60.31, "learning_rate": 1.9855711895609484e-05, "loss": 1.9294, "step": 20835500 }, { "epoch": 60.31, "learning_rate": 1.985498824796221e-05, "loss": 1.9155, "step": 20836000 }, { "epoch": 60.31, "learning_rate": 1.9854264600314932e-05, "loss": 1.9619, "step": 20836500 }, { "epoch": 60.31, "learning_rate": 1.9853540952667658e-05, "loss": 1.928, "step": 20837000 }, { "epoch": 60.32, "learning_rate": 1.985281730502038e-05, "loss": 1.9443, "step": 20837500 }, { "epoch": 60.32, "learning_rate": 1.9852093657373102e-05, "loss": 1.9452, "step": 20838000 }, { "epoch": 60.32, "learning_rate": 1.985137145702112e-05, "loss": 1.9538, "step": 20838500 }, { "epoch": 60.32, "learning_rate": 1.9850647809373843e-05, "loss": 1.9567, "step": 20839000 }, { "epoch": 60.32, "learning_rate": 1.9849924161726566e-05, "loss": 1.943, "step": 20839500 }, { "epoch": 60.32, "learning_rate": 1.9849200514079288e-05, "loss": 1.9732, "step": 20840000 }, { "epoch": 60.32, "learning_rate": 1.984847686643201e-05, "loss": 1.9384, "step": 20840500 }, { "epoch": 60.33, "learning_rate": 1.9847753218784736e-05, "loss": 1.9267, "step": 20841000 }, { "epoch": 60.33, "learning_rate": 1.9847031018432755e-05, "loss": 1.9255, "step": 20841500 }, { "epoch": 60.33, "learning_rate": 1.9846307370785477e-05, "loss": 1.9433, "step": 20842000 }, { "epoch": 60.33, "learning_rate": 1.98455837231382e-05, "loss": 1.9163, "step": 20842500 }, { "epoch": 60.33, "learning_rate": 1.9844860075490925e-05, "loss": 1.9691, "step": 20843000 }, { "epoch": 60.33, "learning_rate": 1.984413787513894e-05, "loss": 1.9293, "step": 20843500 }, { "epoch": 60.33, "learning_rate": 1.9843414227491662e-05, "loss": 1.9412, "step": 20844000 }, { "epoch": 60.34, "learning_rate": 1.9842690579844388e-05, "loss": 1.9424, "step": 20844500 }, { "epoch": 60.34, "learning_rate": 1.984196693219711e-05, "loss": 1.9579, "step": 20845000 }, { "epoch": 60.34, "learning_rate": 1.9841243284549836e-05, "loss": 1.9433, "step": 20845500 }, { "epoch": 60.34, "learning_rate": 1.9840519636902558e-05, "loss": 1.9245, "step": 20846000 }, { "epoch": 60.34, "learning_rate": 1.9839797436550574e-05, "loss": 1.9191, "step": 20846500 }, { "epoch": 60.34, "learning_rate": 1.98390737889033e-05, "loss": 1.9519, "step": 20847000 }, { "epoch": 60.34, "learning_rate": 1.983835014125602e-05, "loss": 1.9436, "step": 20847500 }, { "epoch": 60.35, "learning_rate": 1.9837626493608744e-05, "loss": 1.9037, "step": 20848000 }, { "epoch": 60.35, "learning_rate": 1.983690284596147e-05, "loss": 1.926, "step": 20848500 }, { "epoch": 60.35, "learning_rate": 1.983617919831419e-05, "loss": 1.9663, "step": 20849000 }, { "epoch": 60.35, "learning_rate": 1.9835455550666914e-05, "loss": 1.9393, "step": 20849500 }, { "epoch": 60.35, "learning_rate": 1.9834731903019636e-05, "loss": 1.9431, "step": 20850000 }, { "epoch": 60.35, "learning_rate": 1.9834008255372362e-05, "loss": 1.9416, "step": 20850500 }, { "epoch": 60.36, "learning_rate": 1.9833286055020377e-05, "loss": 1.9596, "step": 20851000 }, { "epoch": 60.36, "learning_rate": 1.98325624073731e-05, "loss": 1.9299, "step": 20851500 }, { "epoch": 60.36, "learning_rate": 1.9831838759725825e-05, "loss": 1.9338, "step": 20852000 }, { "epoch": 60.36, "learning_rate": 1.9831116559373844e-05, "loss": 1.9349, "step": 20852500 }, { "epoch": 60.36, "learning_rate": 1.9830392911726566e-05, "loss": 1.9304, "step": 20853000 }, { "epoch": 60.36, "learning_rate": 1.982966926407929e-05, "loss": 1.9254, "step": 20853500 }, { "epoch": 60.36, "learning_rate": 1.9828945616432014e-05, "loss": 1.8997, "step": 20854000 }, { "epoch": 60.37, "learning_rate": 1.9828221968784736e-05, "loss": 1.9261, "step": 20854500 }, { "epoch": 60.37, "learning_rate": 1.982749832113746e-05, "loss": 1.9152, "step": 20855000 }, { "epoch": 60.37, "learning_rate": 1.9826774673490184e-05, "loss": 1.9513, "step": 20855500 }, { "epoch": 60.37, "learning_rate": 1.9826051025842907e-05, "loss": 1.9638, "step": 20856000 }, { "epoch": 60.37, "learning_rate": 1.982532737819563e-05, "loss": 1.95, "step": 20856500 }, { "epoch": 60.37, "learning_rate": 1.982460373054835e-05, "loss": 1.9322, "step": 20857000 }, { "epoch": 60.37, "learning_rate": 1.9823880082901077e-05, "loss": 1.9456, "step": 20857500 }, { "epoch": 60.38, "learning_rate": 1.98231564352538e-05, "loss": 1.9391, "step": 20858000 }, { "epoch": 60.38, "learning_rate": 1.982243278760652e-05, "loss": 1.9639, "step": 20858500 }, { "epoch": 60.38, "learning_rate": 1.9821709139959247e-05, "loss": 1.9467, "step": 20859000 }, { "epoch": 60.38, "learning_rate": 1.9820986939607262e-05, "loss": 1.9502, "step": 20859500 }, { "epoch": 60.38, "learning_rate": 1.9820263291959988e-05, "loss": 1.9625, "step": 20860000 }, { "epoch": 60.38, "learning_rate": 1.981953964431271e-05, "loss": 1.9505, "step": 20860500 }, { "epoch": 60.38, "learning_rate": 1.9818817443960726e-05, "loss": 1.9416, "step": 20861000 }, { "epoch": 60.39, "learning_rate": 1.9818095243608745e-05, "loss": 1.9284, "step": 20861500 }, { "epoch": 60.39, "learning_rate": 1.9817371595961467e-05, "loss": 1.9407, "step": 20862000 }, { "epoch": 60.39, "learning_rate": 1.981664794831419e-05, "loss": 1.9551, "step": 20862500 }, { "epoch": 60.39, "learning_rate": 1.9815924300666915e-05, "loss": 1.9441, "step": 20863000 }, { "epoch": 60.39, "learning_rate": 1.981520065301964e-05, "loss": 1.9494, "step": 20863500 }, { "epoch": 60.39, "learning_rate": 1.9814477005372363e-05, "loss": 1.9674, "step": 20864000 }, { "epoch": 60.39, "learning_rate": 1.9813753357725085e-05, "loss": 1.9436, "step": 20864500 }, { "epoch": 60.4, "learning_rate": 1.9813029710077807e-05, "loss": 1.9393, "step": 20865000 }, { "epoch": 60.4, "learning_rate": 1.981230606243053e-05, "loss": 1.9464, "step": 20865500 }, { "epoch": 60.4, "learning_rate": 1.9811583862078548e-05, "loss": 1.9406, "step": 20866000 }, { "epoch": 60.4, "learning_rate": 1.981086021443127e-05, "loss": 1.9573, "step": 20866500 }, { "epoch": 60.4, "learning_rate": 1.9810136566783996e-05, "loss": 1.9401, "step": 20867000 }, { "epoch": 60.4, "learning_rate": 1.9809412919136718e-05, "loss": 1.9434, "step": 20867500 }, { "epoch": 60.4, "learning_rate": 1.9808690718784737e-05, "loss": 1.9572, "step": 20868000 }, { "epoch": 60.41, "learning_rate": 1.980796707113746e-05, "loss": 1.9394, "step": 20868500 }, { "epoch": 60.41, "learning_rate": 1.980724487078548e-05, "loss": 1.9361, "step": 20869000 }, { "epoch": 60.41, "learning_rate": 1.98065212231382e-05, "loss": 1.9629, "step": 20869500 }, { "epoch": 60.41, "learning_rate": 1.9805797575490923e-05, "loss": 1.9158, "step": 20870000 }, { "epoch": 60.41, "learning_rate": 1.980507537513894e-05, "loss": 1.9545, "step": 20870500 }, { "epoch": 60.41, "learning_rate": 1.9804351727491664e-05, "loss": 1.9229, "step": 20871000 }, { "epoch": 60.41, "learning_rate": 1.980362807984439e-05, "loss": 1.9406, "step": 20871500 }, { "epoch": 60.42, "learning_rate": 1.9802905879492405e-05, "loss": 1.9538, "step": 20872000 }, { "epoch": 60.42, "learning_rate": 1.9802182231845127e-05, "loss": 1.9674, "step": 20872500 }, { "epoch": 60.42, "learning_rate": 1.9801458584197853e-05, "loss": 1.9384, "step": 20873000 }, { "epoch": 60.42, "learning_rate": 1.9800734936550575e-05, "loss": 1.9247, "step": 20873500 }, { "epoch": 60.42, "learning_rate": 1.9800011288903298e-05, "loss": 1.9419, "step": 20874000 }, { "epoch": 60.42, "learning_rate": 1.9799287641256023e-05, "loss": 1.9795, "step": 20874500 }, { "epoch": 60.42, "learning_rate": 1.9798563993608745e-05, "loss": 1.9657, "step": 20875000 }, { "epoch": 60.43, "learning_rate": 1.9797840345961468e-05, "loss": 1.9693, "step": 20875500 }, { "epoch": 60.43, "learning_rate": 1.979711669831419e-05, "loss": 1.9429, "step": 20876000 }, { "epoch": 60.43, "learning_rate": 1.9796393050666916e-05, "loss": 1.9576, "step": 20876500 }, { "epoch": 60.43, "learning_rate": 1.9795669403019638e-05, "loss": 1.9657, "step": 20877000 }, { "epoch": 60.43, "learning_rate": 1.979494575537236e-05, "loss": 1.9307, "step": 20877500 }, { "epoch": 60.43, "learning_rate": 1.9794222107725086e-05, "loss": 1.9438, "step": 20878000 }, { "epoch": 60.43, "learning_rate": 1.9793498460077808e-05, "loss": 1.9423, "step": 20878500 }, { "epoch": 60.44, "learning_rate": 1.979277481243053e-05, "loss": 1.9364, "step": 20879000 }, { "epoch": 60.44, "learning_rate": 1.979205261207855e-05, "loss": 1.9417, "step": 20879500 }, { "epoch": 60.44, "learning_rate": 1.979132896443127e-05, "loss": 1.9307, "step": 20880000 }, { "epoch": 60.44, "learning_rate": 1.9790605316783993e-05, "loss": 1.9476, "step": 20880500 }, { "epoch": 60.44, "learning_rate": 1.9789881669136716e-05, "loss": 1.9457, "step": 20881000 }, { "epoch": 60.44, "learning_rate": 1.978915802148944e-05, "loss": 1.9641, "step": 20881500 }, { "epoch": 60.44, "learning_rate": 1.9788434373842167e-05, "loss": 1.9505, "step": 20882000 }, { "epoch": 60.45, "learning_rate": 1.978771072619489e-05, "loss": 1.9406, "step": 20882500 }, { "epoch": 60.45, "learning_rate": 1.978698707854761e-05, "loss": 1.9502, "step": 20883000 }, { "epoch": 60.45, "learning_rate": 1.9786263430900334e-05, "loss": 1.9369, "step": 20883500 }, { "epoch": 60.45, "learning_rate": 1.9785539783253056e-05, "loss": 1.9639, "step": 20884000 }, { "epoch": 60.45, "learning_rate": 1.978481613560578e-05, "loss": 1.9448, "step": 20884500 }, { "epoch": 60.45, "learning_rate": 1.9784093935253797e-05, "loss": 1.957, "step": 20885000 }, { "epoch": 60.45, "learning_rate": 1.9783370287606523e-05, "loss": 1.9285, "step": 20885500 }, { "epoch": 60.46, "learning_rate": 1.9782646639959245e-05, "loss": 1.9498, "step": 20886000 }, { "epoch": 60.46, "learning_rate": 1.9781922992311967e-05, "loss": 1.9414, "step": 20886500 }, { "epoch": 60.46, "learning_rate": 1.9781199344664693e-05, "loss": 1.9335, "step": 20887000 }, { "epoch": 60.46, "learning_rate": 1.9780475697017415e-05, "loss": 1.9682, "step": 20887500 }, { "epoch": 60.46, "learning_rate": 1.9779752049370137e-05, "loss": 1.9408, "step": 20888000 }, { "epoch": 60.46, "learning_rate": 1.977902840172286e-05, "loss": 1.9463, "step": 20888500 }, { "epoch": 60.47, "learning_rate": 1.9778306201370882e-05, "loss": 1.9241, "step": 20889000 }, { "epoch": 60.47, "learning_rate": 1.9777582553723604e-05, "loss": 1.9592, "step": 20889500 }, { "epoch": 60.47, "learning_rate": 1.9776858906076326e-05, "loss": 1.9701, "step": 20890000 }, { "epoch": 60.47, "learning_rate": 1.977613525842905e-05, "loss": 1.9529, "step": 20890500 }, { "epoch": 60.47, "learning_rate": 1.977541161078177e-05, "loss": 1.9536, "step": 20891000 }, { "epoch": 60.47, "learning_rate": 1.9774687963134493e-05, "loss": 1.9387, "step": 20891500 }, { "epoch": 60.47, "learning_rate": 1.977396431548722e-05, "loss": 1.9599, "step": 20892000 }, { "epoch": 60.48, "learning_rate": 1.977324356243053e-05, "loss": 1.934, "step": 20892500 }, { "epoch": 60.48, "learning_rate": 1.9772519914783256e-05, "loss": 1.9372, "step": 20893000 }, { "epoch": 60.48, "learning_rate": 1.9771797714431272e-05, "loss": 1.9465, "step": 20893500 }, { "epoch": 60.48, "learning_rate": 1.9771074066783994e-05, "loss": 1.9723, "step": 20894000 }, { "epoch": 60.48, "learning_rate": 1.977035041913672e-05, "loss": 1.9508, "step": 20894500 }, { "epoch": 60.48, "learning_rate": 1.9769626771489442e-05, "loss": 1.9449, "step": 20895000 }, { "epoch": 60.48, "learning_rate": 1.9768903123842164e-05, "loss": 1.9452, "step": 20895500 }, { "epoch": 60.49, "learning_rate": 1.9768179476194887e-05, "loss": 1.9335, "step": 20896000 }, { "epoch": 60.49, "learning_rate": 1.9767455828547612e-05, "loss": 1.9445, "step": 20896500 }, { "epoch": 60.49, "learning_rate": 1.9766732180900334e-05, "loss": 1.9448, "step": 20897000 }, { "epoch": 60.49, "learning_rate": 1.9766009980548353e-05, "loss": 1.9522, "step": 20897500 }, { "epoch": 60.49, "learning_rate": 1.9765286332901076e-05, "loss": 1.9356, "step": 20898000 }, { "epoch": 60.49, "learning_rate": 1.9764562685253798e-05, "loss": 1.9659, "step": 20898500 }, { "epoch": 60.49, "learning_rate": 1.976383903760652e-05, "loss": 1.9686, "step": 20899000 }, { "epoch": 60.5, "learning_rate": 1.976311683725454e-05, "loss": 1.9425, "step": 20899500 }, { "epoch": 60.5, "learning_rate": 1.976239318960726e-05, "loss": 1.9394, "step": 20900000 }, { "epoch": 60.5, "learning_rate": 1.9761669541959987e-05, "loss": 1.9638, "step": 20900500 }, { "epoch": 60.5, "learning_rate": 1.976094589431271e-05, "loss": 1.9758, "step": 20901000 }, { "epoch": 60.5, "learning_rate": 1.976022224666543e-05, "loss": 1.9486, "step": 20901500 }, { "epoch": 60.5, "learning_rate": 1.975950004631345e-05, "loss": 1.9393, "step": 20902000 }, { "epoch": 60.5, "learning_rate": 1.975877784596147e-05, "loss": 1.9526, "step": 20902500 }, { "epoch": 60.51, "learning_rate": 1.975805419831419e-05, "loss": 1.972, "step": 20903000 }, { "epoch": 60.51, "learning_rate": 1.9757330550666914e-05, "loss": 1.9452, "step": 20903500 }, { "epoch": 60.51, "learning_rate": 1.9756606903019636e-05, "loss": 1.9414, "step": 20904000 }, { "epoch": 60.51, "learning_rate": 1.975588325537236e-05, "loss": 1.9558, "step": 20904500 }, { "epoch": 60.51, "learning_rate": 1.9755159607725084e-05, "loss": 1.9395, "step": 20905000 }, { "epoch": 60.51, "learning_rate": 1.975443596007781e-05, "loss": 1.9662, "step": 20905500 }, { "epoch": 60.51, "learning_rate": 1.975371231243053e-05, "loss": 1.9406, "step": 20906000 }, { "epoch": 60.52, "learning_rate": 1.9752988664783254e-05, "loss": 1.9466, "step": 20906500 }, { "epoch": 60.52, "learning_rate": 1.975226646443127e-05, "loss": 1.9478, "step": 20907000 }, { "epoch": 60.52, "learning_rate": 1.9751542816783995e-05, "loss": 1.9589, "step": 20907500 }, { "epoch": 60.52, "learning_rate": 1.975081916913672e-05, "loss": 1.9165, "step": 20908000 }, { "epoch": 60.52, "learning_rate": 1.9750095521489443e-05, "loss": 1.9652, "step": 20908500 }, { "epoch": 60.52, "learning_rate": 1.9749371873842165e-05, "loss": 1.9356, "step": 20909000 }, { "epoch": 60.52, "learning_rate": 1.9748648226194887e-05, "loss": 1.9666, "step": 20909500 }, { "epoch": 60.53, "learning_rate": 1.974792457854761e-05, "loss": 1.9258, "step": 20910000 }, { "epoch": 60.53, "learning_rate": 1.9747200930900335e-05, "loss": 1.9329, "step": 20910500 }, { "epoch": 60.53, "learning_rate": 1.9746477283253057e-05, "loss": 1.9463, "step": 20911000 }, { "epoch": 60.53, "learning_rate": 1.9745753635605783e-05, "loss": 1.9638, "step": 20911500 }, { "epoch": 60.53, "learning_rate": 1.9745029987958505e-05, "loss": 1.9615, "step": 20912000 }, { "epoch": 60.53, "learning_rate": 1.9744306340311228e-05, "loss": 1.9248, "step": 20912500 }, { "epoch": 60.53, "learning_rate": 1.9743584139959247e-05, "loss": 1.9731, "step": 20913000 }, { "epoch": 60.54, "learning_rate": 1.974286049231197e-05, "loss": 1.9247, "step": 20913500 }, { "epoch": 60.54, "learning_rate": 1.974213684466469e-05, "loss": 1.9295, "step": 20914000 }, { "epoch": 60.54, "learning_rate": 1.9741413197017413e-05, "loss": 1.9445, "step": 20914500 }, { "epoch": 60.54, "learning_rate": 1.974068954937014e-05, "loss": 1.9435, "step": 20915000 }, { "epoch": 60.54, "learning_rate": 1.973996590172286e-05, "loss": 1.9605, "step": 20915500 }, { "epoch": 60.54, "learning_rate": 1.9739242254075587e-05, "loss": 1.9623, "step": 20916000 }, { "epoch": 60.54, "learning_rate": 1.9738520053723602e-05, "loss": 1.9643, "step": 20916500 }, { "epoch": 60.55, "learning_rate": 1.9737796406076324e-05, "loss": 1.9249, "step": 20917000 }, { "epoch": 60.55, "learning_rate": 1.9737072758429047e-05, "loss": 1.9641, "step": 20917500 }, { "epoch": 60.55, "learning_rate": 1.9736349110781772e-05, "loss": 1.9433, "step": 20918000 }, { "epoch": 60.55, "learning_rate": 1.9735626910429788e-05, "loss": 1.9434, "step": 20918500 }, { "epoch": 60.55, "learning_rate": 1.9734903262782514e-05, "loss": 1.9419, "step": 20919000 }, { "epoch": 60.55, "learning_rate": 1.9734179615135236e-05, "loss": 1.9608, "step": 20919500 }, { "epoch": 60.55, "learning_rate": 1.973345596748796e-05, "loss": 1.9476, "step": 20920000 }, { "epoch": 60.56, "learning_rate": 1.9732732319840684e-05, "loss": 1.9212, "step": 20920500 }, { "epoch": 60.56, "learning_rate": 1.9732008672193406e-05, "loss": 1.9687, "step": 20921000 }, { "epoch": 60.56, "learning_rate": 1.9731286471841425e-05, "loss": 1.9364, "step": 20921500 }, { "epoch": 60.56, "learning_rate": 1.9730562824194147e-05, "loss": 1.952, "step": 20922000 }, { "epoch": 60.56, "learning_rate": 1.9729839176546873e-05, "loss": 1.9579, "step": 20922500 }, { "epoch": 60.56, "learning_rate": 1.9729115528899595e-05, "loss": 1.9588, "step": 20923000 }, { "epoch": 60.56, "learning_rate": 1.9728391881252317e-05, "loss": 1.9502, "step": 20923500 }, { "epoch": 60.57, "learning_rate": 1.972766823360504e-05, "loss": 1.9307, "step": 20924000 }, { "epoch": 60.57, "learning_rate": 1.9726946033253058e-05, "loss": 1.9635, "step": 20924500 }, { "epoch": 60.57, "learning_rate": 1.972622238560578e-05, "loss": 1.9167, "step": 20925000 }, { "epoch": 60.57, "learning_rate": 1.97255001852538e-05, "loss": 1.9449, "step": 20925500 }, { "epoch": 60.57, "learning_rate": 1.972477653760652e-05, "loss": 1.937, "step": 20926000 }, { "epoch": 60.57, "learning_rate": 1.9724052889959247e-05, "loss": 1.9583, "step": 20926500 }, { "epoch": 60.58, "learning_rate": 1.972332924231197e-05, "loss": 1.9709, "step": 20927000 }, { "epoch": 60.58, "learning_rate": 1.9722605594664692e-05, "loss": 1.945, "step": 20927500 }, { "epoch": 60.58, "learning_rate": 1.972188339431271e-05, "loss": 1.9542, "step": 20928000 }, { "epoch": 60.58, "learning_rate": 1.9721159746665433e-05, "loss": 1.9343, "step": 20928500 }, { "epoch": 60.58, "learning_rate": 1.9720436099018155e-05, "loss": 1.9293, "step": 20929000 }, { "epoch": 60.58, "learning_rate": 1.9719712451370877e-05, "loss": 1.9737, "step": 20929500 }, { "epoch": 60.58, "learning_rate": 1.97189888037236e-05, "loss": 1.945, "step": 20930000 }, { "epoch": 60.59, "learning_rate": 1.9718265156076325e-05, "loss": 1.9554, "step": 20930500 }, { "epoch": 60.59, "learning_rate": 1.971754150842905e-05, "loss": 1.9588, "step": 20931000 }, { "epoch": 60.59, "learning_rate": 1.9716817860781773e-05, "loss": 1.9396, "step": 20931500 }, { "epoch": 60.59, "learning_rate": 1.9716094213134495e-05, "loss": 1.9281, "step": 20932000 }, { "epoch": 60.59, "learning_rate": 1.971537201278251e-05, "loss": 1.9383, "step": 20932500 }, { "epoch": 60.59, "learning_rate": 1.971464981243053e-05, "loss": 1.9577, "step": 20933000 }, { "epoch": 60.59, "learning_rate": 1.9713926164783252e-05, "loss": 1.9272, "step": 20933500 }, { "epoch": 60.6, "learning_rate": 1.971320396443127e-05, "loss": 1.9469, "step": 20934000 }, { "epoch": 60.6, "learning_rate": 1.9712480316783997e-05, "loss": 1.9495, "step": 20934500 }, { "epoch": 60.6, "learning_rate": 1.971175666913672e-05, "loss": 1.9565, "step": 20935000 }, { "epoch": 60.6, "learning_rate": 1.971103302148944e-05, "loss": 1.9424, "step": 20935500 }, { "epoch": 60.6, "learning_rate": 1.9710309373842163e-05, "loss": 1.9334, "step": 20936000 }, { "epoch": 60.6, "learning_rate": 1.970958572619489e-05, "loss": 1.9443, "step": 20936500 }, { "epoch": 60.6, "learning_rate": 1.970886207854761e-05, "loss": 1.9336, "step": 20937000 }, { "epoch": 60.61, "learning_rate": 1.9708138430900333e-05, "loss": 1.9211, "step": 20937500 }, { "epoch": 60.61, "learning_rate": 1.970741478325306e-05, "loss": 1.9285, "step": 20938000 }, { "epoch": 60.61, "learning_rate": 1.970669113560578e-05, "loss": 1.9631, "step": 20938500 }, { "epoch": 60.61, "learning_rate": 1.9705967487958504e-05, "loss": 1.9721, "step": 20939000 }, { "epoch": 60.61, "learning_rate": 1.9705243840311226e-05, "loss": 1.9558, "step": 20939500 }, { "epoch": 60.61, "learning_rate": 1.970452019266395e-05, "loss": 1.9289, "step": 20940000 }, { "epoch": 60.61, "learning_rate": 1.9703797992311967e-05, "loss": 1.9603, "step": 20940500 }, { "epoch": 60.62, "learning_rate": 1.970307434466469e-05, "loss": 1.9494, "step": 20941000 }, { "epoch": 60.62, "learning_rate": 1.9702350697017415e-05, "loss": 1.9397, "step": 20941500 }, { "epoch": 60.62, "learning_rate": 1.970162704937014e-05, "loss": 1.951, "step": 20942000 }, { "epoch": 60.62, "learning_rate": 1.9700903401722863e-05, "loss": 1.9472, "step": 20942500 }, { "epoch": 60.62, "learning_rate": 1.9700179754075585e-05, "loss": 1.9505, "step": 20943000 }, { "epoch": 60.62, "learning_rate": 1.96994575537236e-05, "loss": 1.9627, "step": 20943500 }, { "epoch": 60.62, "learning_rate": 1.9698733906076326e-05, "loss": 1.9433, "step": 20944000 }, { "epoch": 60.63, "learning_rate": 1.9698010258429048e-05, "loss": 1.9629, "step": 20944500 }, { "epoch": 60.63, "learning_rate": 1.9697286610781774e-05, "loss": 1.9185, "step": 20945000 }, { "epoch": 60.63, "learning_rate": 1.9696562963134496e-05, "loss": 1.9611, "step": 20945500 }, { "epoch": 60.63, "learning_rate": 1.969583931548722e-05, "loss": 1.9354, "step": 20946000 }, { "epoch": 60.63, "learning_rate": 1.969511566783994e-05, "loss": 1.9451, "step": 20946500 }, { "epoch": 60.63, "learning_rate": 1.969439346748796e-05, "loss": 1.973, "step": 20947000 }, { "epoch": 60.63, "learning_rate": 1.9693669819840682e-05, "loss": 1.9388, "step": 20947500 }, { "epoch": 60.64, "learning_rate": 1.9692946172193404e-05, "loss": 1.9239, "step": 20948000 }, { "epoch": 60.64, "learning_rate": 1.9692222524546126e-05, "loss": 1.9418, "step": 20948500 }, { "epoch": 60.64, "learning_rate": 1.9691498876898852e-05, "loss": 1.9565, "step": 20949000 }, { "epoch": 60.64, "learning_rate": 1.9690778123842164e-05, "loss": 1.9593, "step": 20949500 }, { "epoch": 60.64, "learning_rate": 1.9690055923490183e-05, "loss": 1.9262, "step": 20950000 }, { "epoch": 60.64, "learning_rate": 1.9689332275842905e-05, "loss": 1.935, "step": 20950500 }, { "epoch": 60.64, "learning_rate": 1.9688608628195628e-05, "loss": 1.9549, "step": 20951000 }, { "epoch": 60.65, "learning_rate": 1.9687884980548353e-05, "loss": 1.9509, "step": 20951500 }, { "epoch": 60.65, "learning_rate": 1.9687161332901075e-05, "loss": 1.9401, "step": 20952000 }, { "epoch": 60.65, "learning_rate": 1.9686437685253798e-05, "loss": 1.9484, "step": 20952500 }, { "epoch": 60.65, "learning_rate": 1.9685714037606523e-05, "loss": 1.9524, "step": 20953000 }, { "epoch": 60.65, "learning_rate": 1.9684990389959246e-05, "loss": 1.9322, "step": 20953500 }, { "epoch": 60.65, "learning_rate": 1.9684266742311968e-05, "loss": 1.9519, "step": 20954000 }, { "epoch": 60.65, "learning_rate": 1.968354309466469e-05, "loss": 1.944, "step": 20954500 }, { "epoch": 60.66, "learning_rate": 1.968282089431271e-05, "loss": 1.9504, "step": 20955000 }, { "epoch": 60.66, "learning_rate": 1.968209724666543e-05, "loss": 1.9387, "step": 20955500 }, { "epoch": 60.66, "learning_rate": 1.9681373599018153e-05, "loss": 1.9369, "step": 20956000 }, { "epoch": 60.66, "learning_rate": 1.968064995137088e-05, "loss": 1.9133, "step": 20956500 }, { "epoch": 60.66, "learning_rate": 1.9679927751018898e-05, "loss": 1.9331, "step": 20957000 }, { "epoch": 60.66, "learning_rate": 1.967920410337162e-05, "loss": 1.9336, "step": 20957500 }, { "epoch": 60.66, "learning_rate": 1.9678480455724342e-05, "loss": 1.9395, "step": 20958000 }, { "epoch": 60.67, "learning_rate": 1.9677756808077065e-05, "loss": 1.9261, "step": 20958500 }, { "epoch": 60.67, "learning_rate": 1.967703316042979e-05, "loss": 1.9613, "step": 20959000 }, { "epoch": 60.67, "learning_rate": 1.9676309512782513e-05, "loss": 1.9228, "step": 20959500 }, { "epoch": 60.67, "learning_rate": 1.9675585865135235e-05, "loss": 1.9254, "step": 20960000 }, { "epoch": 60.67, "learning_rate": 1.967486221748796e-05, "loss": 1.9418, "step": 20960500 }, { "epoch": 60.67, "learning_rate": 1.9674138569840683e-05, "loss": 1.9324, "step": 20961000 }, { "epoch": 60.67, "learning_rate": 1.9673414922193405e-05, "loss": 1.9621, "step": 20961500 }, { "epoch": 60.68, "learning_rate": 1.967269127454613e-05, "loss": 1.9297, "step": 20962000 }, { "epoch": 60.68, "learning_rate": 1.9671969074194146e-05, "loss": 1.927, "step": 20962500 }, { "epoch": 60.68, "learning_rate": 1.9671245426546868e-05, "loss": 1.9484, "step": 20963000 }, { "epoch": 60.68, "learning_rate": 1.967052177889959e-05, "loss": 1.9118, "step": 20963500 }, { "epoch": 60.68, "learning_rate": 1.9669798131252316e-05, "loss": 1.9448, "step": 20964000 }, { "epoch": 60.68, "learning_rate": 1.9669074483605042e-05, "loss": 1.9672, "step": 20964500 }, { "epoch": 60.69, "learning_rate": 1.9668350835957764e-05, "loss": 1.9394, "step": 20965000 }, { "epoch": 60.69, "learning_rate": 1.9667627188310486e-05, "loss": 1.9645, "step": 20965500 }, { "epoch": 60.69, "learning_rate": 1.966690354066321e-05, "loss": 1.9499, "step": 20966000 }, { "epoch": 60.69, "learning_rate": 1.966617989301593e-05, "loss": 1.9517, "step": 20966500 }, { "epoch": 60.69, "learning_rate": 1.966545769266395e-05, "loss": 1.9585, "step": 20967000 }, { "epoch": 60.69, "learning_rate": 1.9664734045016675e-05, "loss": 1.9518, "step": 20967500 }, { "epoch": 60.69, "learning_rate": 1.9664010397369397e-05, "loss": 1.941, "step": 20968000 }, { "epoch": 60.7, "learning_rate": 1.966328674972212e-05, "loss": 1.9338, "step": 20968500 }, { "epoch": 60.7, "learning_rate": 1.9662563102074842e-05, "loss": 1.9564, "step": 20969000 }, { "epoch": 60.7, "learning_rate": 1.9661839454427568e-05, "loss": 1.9296, "step": 20969500 }, { "epoch": 60.7, "learning_rate": 1.966111580678029e-05, "loss": 1.9358, "step": 20970000 }, { "epoch": 60.7, "learning_rate": 1.9660393606428305e-05, "loss": 1.9528, "step": 20970500 }, { "epoch": 60.7, "learning_rate": 1.965966995878103e-05, "loss": 1.9416, "step": 20971000 }, { "epoch": 60.7, "learning_rate": 1.9658946311133757e-05, "loss": 1.9554, "step": 20971500 }, { "epoch": 60.71, "learning_rate": 1.965822266348648e-05, "loss": 1.9158, "step": 20972000 }, { "epoch": 60.71, "learning_rate": 1.96574990158392e-05, "loss": 1.9455, "step": 20972500 }, { "epoch": 60.71, "learning_rate": 1.9656775368191923e-05, "loss": 1.9456, "step": 20973000 }, { "epoch": 60.71, "learning_rate": 1.9656051720544646e-05, "loss": 1.9618, "step": 20973500 }, { "epoch": 60.71, "learning_rate": 1.9655328072897368e-05, "loss": 1.9469, "step": 20974000 }, { "epoch": 60.71, "learning_rate": 1.9654605872545387e-05, "loss": 1.9286, "step": 20974500 }, { "epoch": 60.71, "learning_rate": 1.9653882224898112e-05, "loss": 1.9504, "step": 20975000 }, { "epoch": 60.72, "learning_rate": 1.9653158577250835e-05, "loss": 1.9515, "step": 20975500 }, { "epoch": 60.72, "learning_rate": 1.9652434929603557e-05, "loss": 1.9808, "step": 20976000 }, { "epoch": 60.72, "learning_rate": 1.9651711281956282e-05, "loss": 1.9861, "step": 20976500 }, { "epoch": 60.72, "learning_rate": 1.9650987634309005e-05, "loss": 1.9062, "step": 20977000 }, { "epoch": 60.72, "learning_rate": 1.9650263986661727e-05, "loss": 1.9522, "step": 20977500 }, { "epoch": 60.72, "learning_rate": 1.964954033901445e-05, "loss": 1.9595, "step": 20978000 }, { "epoch": 60.72, "learning_rate": 1.964881813866247e-05, "loss": 1.9608, "step": 20978500 }, { "epoch": 60.73, "learning_rate": 1.9648094491015194e-05, "loss": 1.932, "step": 20979000 }, { "epoch": 60.73, "learning_rate": 1.9647370843367916e-05, "loss": 1.9475, "step": 20979500 }, { "epoch": 60.73, "learning_rate": 1.9646647195720638e-05, "loss": 1.9591, "step": 20980000 }, { "epoch": 60.73, "learning_rate": 1.9645924995368657e-05, "loss": 1.9515, "step": 20980500 }, { "epoch": 60.73, "learning_rate": 1.964520134772138e-05, "loss": 1.95, "step": 20981000 }, { "epoch": 60.73, "learning_rate": 1.96444777000741e-05, "loss": 1.9539, "step": 20981500 }, { "epoch": 60.73, "learning_rate": 1.9643754052426824e-05, "loss": 1.933, "step": 20982000 }, { "epoch": 60.74, "learning_rate": 1.964303040477955e-05, "loss": 1.9303, "step": 20982500 }, { "epoch": 60.74, "learning_rate": 1.964230820442757e-05, "loss": 1.966, "step": 20983000 }, { "epoch": 60.74, "learning_rate": 1.964158455678029e-05, "loss": 1.9283, "step": 20983500 }, { "epoch": 60.74, "learning_rate": 1.9640860909133013e-05, "loss": 1.9161, "step": 20984000 }, { "epoch": 60.74, "learning_rate": 1.9640137261485735e-05, "loss": 1.9715, "step": 20984500 }, { "epoch": 60.74, "learning_rate": 1.9639415061133754e-05, "loss": 1.9624, "step": 20985000 }, { "epoch": 60.74, "learning_rate": 1.9638691413486476e-05, "loss": 1.9286, "step": 20985500 }, { "epoch": 60.75, "learning_rate": 1.9637969213134495e-05, "loss": 1.9457, "step": 20986000 }, { "epoch": 60.75, "learning_rate": 1.963724556548722e-05, "loss": 1.9473, "step": 20986500 }, { "epoch": 60.75, "learning_rate": 1.9636521917839943e-05, "loss": 1.9596, "step": 20987000 }, { "epoch": 60.75, "learning_rate": 1.9635798270192665e-05, "loss": 1.9549, "step": 20987500 }, { "epoch": 60.75, "learning_rate": 1.9635074622545387e-05, "loss": 1.9193, "step": 20988000 }, { "epoch": 60.75, "learning_rate": 1.9634352422193406e-05, "loss": 1.9304, "step": 20988500 }, { "epoch": 60.75, "learning_rate": 1.963362877454613e-05, "loss": 1.9784, "step": 20989000 }, { "epoch": 60.76, "learning_rate": 1.963290512689885e-05, "loss": 1.9218, "step": 20989500 }, { "epoch": 60.76, "learning_rate": 1.9632181479251577e-05, "loss": 1.905, "step": 20990000 }, { "epoch": 60.76, "learning_rate": 1.9631459278899595e-05, "loss": 1.9176, "step": 20990500 }, { "epoch": 60.76, "learning_rate": 1.9630735631252318e-05, "loss": 1.9692, "step": 20991000 }, { "epoch": 60.76, "learning_rate": 1.963001198360504e-05, "loss": 1.9152, "step": 20991500 }, { "epoch": 60.76, "learning_rate": 1.962928978325306e-05, "loss": 1.9445, "step": 20992000 }, { "epoch": 60.76, "learning_rate": 1.962856613560578e-05, "loss": 1.9545, "step": 20992500 }, { "epoch": 60.77, "learning_rate": 1.9627842487958503e-05, "loss": 1.9798, "step": 20993000 }, { "epoch": 60.77, "learning_rate": 1.9627118840311226e-05, "loss": 1.9344, "step": 20993500 }, { "epoch": 60.77, "learning_rate": 1.962639519266395e-05, "loss": 1.9354, "step": 20994000 }, { "epoch": 60.77, "learning_rate": 1.9625671545016673e-05, "loss": 1.948, "step": 20994500 }, { "epoch": 60.77, "learning_rate": 1.9624947897369396e-05, "loss": 1.9423, "step": 20995000 }, { "epoch": 60.77, "learning_rate": 1.962422424972212e-05, "loss": 1.9653, "step": 20995500 }, { "epoch": 60.77, "learning_rate": 1.9623502049370137e-05, "loss": 1.9456, "step": 20996000 }, { "epoch": 60.78, "learning_rate": 1.962277840172286e-05, "loss": 1.9723, "step": 20996500 }, { "epoch": 60.78, "learning_rate": 1.9622054754075585e-05, "loss": 1.9475, "step": 20997000 }, { "epoch": 60.78, "learning_rate": 1.96213325537236e-05, "loss": 1.939, "step": 20997500 }, { "epoch": 60.78, "learning_rate": 1.9620608906076326e-05, "loss": 1.9411, "step": 20998000 }, { "epoch": 60.78, "learning_rate": 1.9619885258429048e-05, "loss": 1.9364, "step": 20998500 }, { "epoch": 60.78, "learning_rate": 1.9619163058077067e-05, "loss": 1.9475, "step": 20999000 }, { "epoch": 60.78, "learning_rate": 1.9618440857725086e-05, "loss": 1.9313, "step": 20999500 }, { "epoch": 60.79, "learning_rate": 1.9617717210077808e-05, "loss": 1.9461, "step": 21000000 }, { "epoch": 60.79, "learning_rate": 1.961699356243053e-05, "loss": 1.9303, "step": 21000500 }, { "epoch": 60.79, "learning_rate": 1.9616269914783253e-05, "loss": 1.9535, "step": 21001000 }, { "epoch": 60.79, "learning_rate": 1.9615546267135978e-05, "loss": 1.9334, "step": 21001500 }, { "epoch": 60.79, "learning_rate": 1.96148226194887e-05, "loss": 1.9537, "step": 21002000 }, { "epoch": 60.79, "learning_rate": 1.9614098971841423e-05, "loss": 1.9358, "step": 21002500 }, { "epoch": 60.8, "learning_rate": 1.961337532419415e-05, "loss": 1.9474, "step": 21003000 }, { "epoch": 60.8, "learning_rate": 1.961265167654687e-05, "loss": 1.9356, "step": 21003500 }, { "epoch": 60.8, "learning_rate": 1.9611928028899593e-05, "loss": 1.9315, "step": 21004000 }, { "epoch": 60.8, "learning_rate": 1.9611204381252315e-05, "loss": 1.9286, "step": 21004500 }, { "epoch": 60.8, "learning_rate": 1.961048073360504e-05, "loss": 1.9291, "step": 21005000 }, { "epoch": 60.8, "learning_rate": 1.9609757085957763e-05, "loss": 1.9537, "step": 21005500 }, { "epoch": 60.8, "learning_rate": 1.9609033438310485e-05, "loss": 1.9524, "step": 21006000 }, { "epoch": 60.81, "learning_rate": 1.960830979066321e-05, "loss": 1.939, "step": 21006500 }, { "epoch": 60.81, "learning_rate": 1.9607586143015933e-05, "loss": 1.9376, "step": 21007000 }, { "epoch": 60.81, "learning_rate": 1.960686394266395e-05, "loss": 1.9712, "step": 21007500 }, { "epoch": 60.81, "learning_rate": 1.9606141742311968e-05, "loss": 1.9244, "step": 21008000 }, { "epoch": 60.81, "learning_rate": 1.960541809466469e-05, "loss": 1.9456, "step": 21008500 }, { "epoch": 60.81, "learning_rate": 1.9604694447017415e-05, "loss": 1.9344, "step": 21009000 }, { "epoch": 60.81, "learning_rate": 1.9603972246665434e-05, "loss": 1.9458, "step": 21009500 }, { "epoch": 60.82, "learning_rate": 1.9603248599018157e-05, "loss": 1.9237, "step": 21010000 }, { "epoch": 60.82, "learning_rate": 1.960252495137088e-05, "loss": 1.9446, "step": 21010500 }, { "epoch": 60.82, "learning_rate": 1.96018013037236e-05, "loss": 1.9673, "step": 21011000 }, { "epoch": 60.82, "learning_rate": 1.9601077656076323e-05, "loss": 1.9453, "step": 21011500 }, { "epoch": 60.82, "learning_rate": 1.960035400842905e-05, "loss": 1.9546, "step": 21012000 }, { "epoch": 60.82, "learning_rate": 1.9599630360781775e-05, "loss": 1.952, "step": 21012500 }, { "epoch": 60.82, "learning_rate": 1.9598906713134497e-05, "loss": 1.9231, "step": 21013000 }, { "epoch": 60.83, "learning_rate": 1.959818306548722e-05, "loss": 1.9492, "step": 21013500 }, { "epoch": 60.83, "learning_rate": 1.959745941783994e-05, "loss": 1.9547, "step": 21014000 }, { "epoch": 60.83, "learning_rate": 1.9596735770192663e-05, "loss": 1.9483, "step": 21014500 }, { "epoch": 60.83, "learning_rate": 1.959601212254539e-05, "loss": 1.9349, "step": 21015000 }, { "epoch": 60.83, "learning_rate": 1.959528847489811e-05, "loss": 1.9669, "step": 21015500 }, { "epoch": 60.83, "learning_rate": 1.9594564827250837e-05, "loss": 1.9453, "step": 21016000 }, { "epoch": 60.83, "learning_rate": 1.9593842626898852e-05, "loss": 1.9555, "step": 21016500 }, { "epoch": 60.84, "learning_rate": 1.9593118979251575e-05, "loss": 1.9421, "step": 21017000 }, { "epoch": 60.84, "learning_rate": 1.95923953316043e-05, "loss": 1.9415, "step": 21017500 }, { "epoch": 60.84, "learning_rate": 1.9591671683957023e-05, "loss": 1.9347, "step": 21018000 }, { "epoch": 60.84, "learning_rate": 1.9590948036309745e-05, "loss": 1.9531, "step": 21018500 }, { "epoch": 60.84, "learning_rate": 1.9590224388662467e-05, "loss": 1.9291, "step": 21019000 }, { "epoch": 60.84, "learning_rate": 1.9589502188310486e-05, "loss": 1.9466, "step": 21019500 }, { "epoch": 60.84, "learning_rate": 1.958877854066321e-05, "loss": 1.9829, "step": 21020000 }, { "epoch": 60.85, "learning_rate": 1.9588054893015934e-05, "loss": 1.921, "step": 21020500 }, { "epoch": 60.85, "learning_rate": 1.9587331245368656e-05, "loss": 1.9535, "step": 21021000 }, { "epoch": 60.85, "learning_rate": 1.9586607597721378e-05, "loss": 1.9337, "step": 21021500 }, { "epoch": 60.85, "learning_rate": 1.9585885397369397e-05, "loss": 1.967, "step": 21022000 }, { "epoch": 60.85, "learning_rate": 1.958516174972212e-05, "loss": 1.9441, "step": 21022500 }, { "epoch": 60.85, "learning_rate": 1.9584438102074842e-05, "loss": 1.9234, "step": 21023000 }, { "epoch": 60.85, "learning_rate": 1.9583714454427567e-05, "loss": 1.933, "step": 21023500 }, { "epoch": 60.86, "learning_rate": 1.958299080678029e-05, "loss": 1.9674, "step": 21024000 }, { "epoch": 60.86, "learning_rate": 1.958226860642831e-05, "loss": 1.9574, "step": 21024500 }, { "epoch": 60.86, "learning_rate": 1.958154495878103e-05, "loss": 1.9627, "step": 21025000 }, { "epoch": 60.86, "learning_rate": 1.9580821311133753e-05, "loss": 1.9423, "step": 21025500 }, { "epoch": 60.86, "learning_rate": 1.9580097663486475e-05, "loss": 1.9494, "step": 21026000 }, { "epoch": 60.86, "learning_rate": 1.95793740158392e-05, "loss": 1.9392, "step": 21026500 }, { "epoch": 60.86, "learning_rate": 1.9578650368191923e-05, "loss": 1.947, "step": 21027000 }, { "epoch": 60.87, "learning_rate": 1.957792672054465e-05, "loss": 1.9316, "step": 21027500 }, { "epoch": 60.87, "learning_rate": 1.957720307289737e-05, "loss": 1.9562, "step": 21028000 }, { "epoch": 60.87, "learning_rate": 1.9576479425250093e-05, "loss": 1.9359, "step": 21028500 }, { "epoch": 60.87, "learning_rate": 1.9575758672193405e-05, "loss": 1.9341, "step": 21029000 }, { "epoch": 60.87, "learning_rate": 1.9575035024546128e-05, "loss": 1.9774, "step": 21029500 }, { "epoch": 60.87, "learning_rate": 1.9574311376898853e-05, "loss": 1.936, "step": 21030000 }, { "epoch": 60.87, "learning_rate": 1.9573587729251576e-05, "loss": 1.9404, "step": 21030500 }, { "epoch": 60.88, "learning_rate": 1.95728640816043e-05, "loss": 1.9536, "step": 21031000 }, { "epoch": 60.88, "learning_rate": 1.9572140433957023e-05, "loss": 1.9304, "step": 21031500 }, { "epoch": 60.88, "learning_rate": 1.9571416786309746e-05, "loss": 1.9355, "step": 21032000 }, { "epoch": 60.88, "learning_rate": 1.9570693138662468e-05, "loss": 1.9482, "step": 21032500 }, { "epoch": 60.88, "learning_rate": 1.9569970938310487e-05, "loss": 1.9428, "step": 21033000 }, { "epoch": 60.88, "learning_rate": 1.956924729066321e-05, "loss": 1.9405, "step": 21033500 }, { "epoch": 60.88, "learning_rate": 1.9568525090311228e-05, "loss": 1.937, "step": 21034000 }, { "epoch": 60.89, "learning_rate": 1.956780144266395e-05, "loss": 1.9486, "step": 21034500 }, { "epoch": 60.89, "learning_rate": 1.9567079242311966e-05, "loss": 1.9706, "step": 21035000 }, { "epoch": 60.89, "learning_rate": 1.956635559466469e-05, "loss": 1.9498, "step": 21035500 }, { "epoch": 60.89, "learning_rate": 1.9565631947017417e-05, "loss": 1.9547, "step": 21036000 }, { "epoch": 60.89, "learning_rate": 1.956490829937014e-05, "loss": 1.9214, "step": 21036500 }, { "epoch": 60.89, "learning_rate": 1.956418465172286e-05, "loss": 1.9425, "step": 21037000 }, { "epoch": 60.89, "learning_rate": 1.9563461004075584e-05, "loss": 1.9242, "step": 21037500 }, { "epoch": 60.9, "learning_rate": 1.9562737356428306e-05, "loss": 1.9475, "step": 21038000 }, { "epoch": 60.9, "learning_rate": 1.9562013708781028e-05, "loss": 1.9535, "step": 21038500 }, { "epoch": 60.9, "learning_rate": 1.9561290061133754e-05, "loss": 1.9247, "step": 21039000 }, { "epoch": 60.9, "learning_rate": 1.9560567860781773e-05, "loss": 1.9517, "step": 21039500 }, { "epoch": 60.9, "learning_rate": 1.9559844213134495e-05, "loss": 1.9512, "step": 21040000 }, { "epoch": 60.9, "learning_rate": 1.9559120565487217e-05, "loss": 1.9492, "step": 21040500 }, { "epoch": 60.91, "learning_rate": 1.9558396917839943e-05, "loss": 1.9355, "step": 21041000 }, { "epoch": 60.91, "learning_rate": 1.955767471748796e-05, "loss": 1.9535, "step": 21041500 }, { "epoch": 60.91, "learning_rate": 1.955695106984068e-05, "loss": 1.9276, "step": 21042000 }, { "epoch": 60.91, "learning_rate": 1.95562288694887e-05, "loss": 1.9852, "step": 21042500 }, { "epoch": 60.91, "learning_rate": 1.9555505221841425e-05, "loss": 1.9604, "step": 21043000 }, { "epoch": 60.91, "learning_rate": 1.9554781574194147e-05, "loss": 1.9852, "step": 21043500 }, { "epoch": 60.91, "learning_rate": 1.955405792654687e-05, "loss": 1.9505, "step": 21044000 }, { "epoch": 60.92, "learning_rate": 1.9553334278899592e-05, "loss": 1.9248, "step": 21044500 }, { "epoch": 60.92, "learning_rate": 1.9552610631252317e-05, "loss": 1.9376, "step": 21045000 }, { "epoch": 60.92, "learning_rate": 1.955188698360504e-05, "loss": 1.9213, "step": 21045500 }, { "epoch": 60.92, "learning_rate": 1.9551163335957762e-05, "loss": 1.9653, "step": 21046000 }, { "epoch": 60.92, "learning_rate": 1.955044113560578e-05, "loss": 1.9333, "step": 21046500 }, { "epoch": 60.92, "learning_rate": 1.9549717487958507e-05, "loss": 1.9634, "step": 21047000 }, { "epoch": 60.92, "learning_rate": 1.954899384031123e-05, "loss": 1.9348, "step": 21047500 }, { "epoch": 60.93, "learning_rate": 1.954827019266395e-05, "loss": 1.9341, "step": 21048000 }, { "epoch": 60.93, "learning_rate": 1.9547546545016673e-05, "loss": 1.9663, "step": 21048500 }, { "epoch": 60.93, "learning_rate": 1.9546822897369395e-05, "loss": 1.9498, "step": 21049000 }, { "epoch": 60.93, "learning_rate": 1.9546100697017414e-05, "loss": 1.9479, "step": 21049500 }, { "epoch": 60.93, "learning_rate": 1.954537704937014e-05, "loss": 1.9591, "step": 21050000 }, { "epoch": 60.93, "learning_rate": 1.9544653401722862e-05, "loss": 1.9474, "step": 21050500 }, { "epoch": 60.93, "learning_rate": 1.9543929754075584e-05, "loss": 1.9332, "step": 21051000 }, { "epoch": 60.94, "learning_rate": 1.9543206106428307e-05, "loss": 1.9607, "step": 21051500 }, { "epoch": 60.94, "learning_rate": 1.9542482458781032e-05, "loss": 1.944, "step": 21052000 }, { "epoch": 60.94, "learning_rate": 1.9541758811133755e-05, "loss": 1.9333, "step": 21052500 }, { "epoch": 60.94, "learning_rate": 1.9541035163486477e-05, "loss": 1.9374, "step": 21053000 }, { "epoch": 60.94, "learning_rate": 1.9540311515839202e-05, "loss": 1.9199, "step": 21053500 }, { "epoch": 60.94, "learning_rate": 1.9539587868191925e-05, "loss": 1.9537, "step": 21054000 }, { "epoch": 60.94, "learning_rate": 1.9538864220544647e-05, "loss": 1.9512, "step": 21054500 }, { "epoch": 60.95, "learning_rate": 1.953814057289737e-05, "loss": 1.9497, "step": 21055000 }, { "epoch": 60.95, "learning_rate": 1.9537416925250095e-05, "loss": 1.9663, "step": 21055500 }, { "epoch": 60.95, "learning_rate": 1.953669472489811e-05, "loss": 1.9288, "step": 21056000 }, { "epoch": 60.95, "learning_rate": 1.953597252454613e-05, "loss": 1.9461, "step": 21056500 }, { "epoch": 60.95, "learning_rate": 1.953524887689885e-05, "loss": 1.9572, "step": 21057000 }, { "epoch": 60.95, "learning_rate": 1.9534525229251577e-05, "loss": 1.9334, "step": 21057500 }, { "epoch": 60.95, "learning_rate": 1.95338015816043e-05, "loss": 1.9486, "step": 21058000 }, { "epoch": 60.96, "learning_rate": 1.9533079381252318e-05, "loss": 1.9517, "step": 21058500 }, { "epoch": 60.96, "learning_rate": 1.953235573360504e-05, "loss": 1.9465, "step": 21059000 }, { "epoch": 60.96, "learning_rate": 1.9531632085957763e-05, "loss": 1.9464, "step": 21059500 }, { "epoch": 60.96, "learning_rate": 1.9530908438310485e-05, "loss": 1.9486, "step": 21060000 }, { "epoch": 60.96, "learning_rate": 1.9530186237958504e-05, "loss": 1.9239, "step": 21060500 }, { "epoch": 60.96, "learning_rate": 1.952946403760652e-05, "loss": 1.9581, "step": 21061000 }, { "epoch": 60.96, "learning_rate": 1.9528740389959245e-05, "loss": 1.9718, "step": 21061500 }, { "epoch": 60.97, "learning_rate": 1.952801674231197e-05, "loss": 1.9542, "step": 21062000 }, { "epoch": 60.97, "learning_rate": 1.9527293094664693e-05, "loss": 1.9383, "step": 21062500 }, { "epoch": 60.97, "learning_rate": 1.9526569447017415e-05, "loss": 1.9372, "step": 21063000 }, { "epoch": 60.97, "learning_rate": 1.9525845799370137e-05, "loss": 1.9257, "step": 21063500 }, { "epoch": 60.97, "learning_rate": 1.952512215172286e-05, "loss": 1.9401, "step": 21064000 }, { "epoch": 60.97, "learning_rate": 1.9524398504075582e-05, "loss": 1.9432, "step": 21064500 }, { "epoch": 60.97, "learning_rate": 1.9523674856428308e-05, "loss": 1.9277, "step": 21065000 }, { "epoch": 60.98, "learning_rate": 1.9522951208781033e-05, "loss": 1.9399, "step": 21065500 }, { "epoch": 60.98, "learning_rate": 1.9522227561133755e-05, "loss": 1.9486, "step": 21066000 }, { "epoch": 60.98, "learning_rate": 1.9521503913486478e-05, "loss": 1.9641, "step": 21066500 }, { "epoch": 60.98, "learning_rate": 1.9520781713134497e-05, "loss": 1.9495, "step": 21067000 }, { "epoch": 60.98, "learning_rate": 1.952005806548722e-05, "loss": 1.9376, "step": 21067500 }, { "epoch": 60.98, "learning_rate": 1.951933441783994e-05, "loss": 1.9556, "step": 21068000 }, { "epoch": 60.98, "learning_rate": 1.9518610770192667e-05, "loss": 1.9444, "step": 21068500 }, { "epoch": 60.99, "learning_rate": 1.951788712254539e-05, "loss": 1.9539, "step": 21069000 }, { "epoch": 60.99, "learning_rate": 1.951716347489811e-05, "loss": 1.9625, "step": 21069500 }, { "epoch": 60.99, "learning_rate": 1.951644127454613e-05, "loss": 1.9489, "step": 21070000 }, { "epoch": 60.99, "learning_rate": 1.9515717626898852e-05, "loss": 1.9626, "step": 21070500 }, { "epoch": 60.99, "learning_rate": 1.9514993979251575e-05, "loss": 1.9355, "step": 21071000 }, { "epoch": 60.99, "learning_rate": 1.9514270331604297e-05, "loss": 1.9591, "step": 21071500 }, { "epoch": 60.99, "learning_rate": 1.9513546683957022e-05, "loss": 1.9438, "step": 21072000 }, { "epoch": 61.0, "learning_rate": 1.9512823036309748e-05, "loss": 1.9595, "step": 21072500 }, { "epoch": 61.0, "learning_rate": 1.951209938866247e-05, "loss": 1.9578, "step": 21073000 }, { "epoch": 61.0, "learning_rate": 1.9511375741015192e-05, "loss": 1.9373, "step": 21073500 }, { "epoch": 61.0, "eval_accuracy": 0.6782427676844988, "eval_accuracy_mlm": 0.6448035000222343, "eval_accuracy_nsp": 0.8574541804732507, "eval_loss": 2.148995876312256, "eval_runtime": 331.7803, "eval_samples_per_second": 1315.286, "eval_steps_per_second": 54.804, "step": 21073792 }, { "epoch": 61.0, "learning_rate": 1.9510652093367915e-05, "loss": 1.9296, "step": 21074000 }, { "epoch": 61.0, "learning_rate": 1.9509928445720637e-05, "loss": 1.9245, "step": 21074500 }, { "epoch": 61.0, "learning_rate": 1.9509206245368656e-05, "loss": 1.9313, "step": 21075000 }, { "epoch": 61.0, "learning_rate": 1.950848404501667e-05, "loss": 1.9225, "step": 21075500 }, { "epoch": 61.01, "learning_rate": 1.9507760397369397e-05, "loss": 1.9259, "step": 21076000 }, { "epoch": 61.01, "learning_rate": 1.9507038197017416e-05, "loss": 1.9358, "step": 21076500 }, { "epoch": 61.01, "learning_rate": 1.9506314549370138e-05, "loss": 1.9091, "step": 21077000 }, { "epoch": 61.01, "learning_rate": 1.950559090172286e-05, "loss": 1.9, "step": 21077500 }, { "epoch": 61.01, "learning_rate": 1.9504867254075586e-05, "loss": 1.9415, "step": 21078000 }, { "epoch": 61.01, "learning_rate": 1.9504143606428308e-05, "loss": 1.9431, "step": 21078500 }, { "epoch": 61.02, "learning_rate": 1.950341995878103e-05, "loss": 1.929, "step": 21079000 }, { "epoch": 61.02, "learning_rate": 1.9502696311133753e-05, "loss": 1.9169, "step": 21079500 }, { "epoch": 61.02, "learning_rate": 1.950197266348648e-05, "loss": 1.9198, "step": 21080000 }, { "epoch": 61.02, "learning_rate": 1.95012490158392e-05, "loss": 1.9486, "step": 21080500 }, { "epoch": 61.02, "learning_rate": 1.9500525368191923e-05, "loss": 1.932, "step": 21081000 }, { "epoch": 61.02, "learning_rate": 1.949980172054465e-05, "loss": 1.9466, "step": 21081500 }, { "epoch": 61.02, "learning_rate": 1.949907807289737e-05, "loss": 1.921, "step": 21082000 }, { "epoch": 61.03, "learning_rate": 1.9498354425250093e-05, "loss": 1.9303, "step": 21082500 }, { "epoch": 61.03, "learning_rate": 1.9497630777602815e-05, "loss": 1.9217, "step": 21083000 }, { "epoch": 61.03, "learning_rate": 1.949690712995554e-05, "loss": 1.943, "step": 21083500 }, { "epoch": 61.03, "learning_rate": 1.9496183482308263e-05, "loss": 1.9363, "step": 21084000 }, { "epoch": 61.03, "learning_rate": 1.9495459834660985e-05, "loss": 1.938, "step": 21084500 }, { "epoch": 61.03, "learning_rate": 1.9494737634309004e-05, "loss": 1.9461, "step": 21085000 }, { "epoch": 61.03, "learning_rate": 1.9494013986661726e-05, "loss": 1.9408, "step": 21085500 }, { "epoch": 61.04, "learning_rate": 1.949329033901445e-05, "loss": 1.9248, "step": 21086000 }, { "epoch": 61.04, "learning_rate": 1.9492566691367174e-05, "loss": 1.9204, "step": 21086500 }, { "epoch": 61.04, "learning_rate": 1.94918430437199e-05, "loss": 1.9406, "step": 21087000 }, { "epoch": 61.04, "learning_rate": 1.9491120843367915e-05, "loss": 1.9493, "step": 21087500 }, { "epoch": 61.04, "learning_rate": 1.9490398643015934e-05, "loss": 1.9434, "step": 21088000 }, { "epoch": 61.04, "learning_rate": 1.9489674995368657e-05, "loss": 1.9409, "step": 21088500 }, { "epoch": 61.04, "learning_rate": 1.948895134772138e-05, "loss": 1.9245, "step": 21089000 }, { "epoch": 61.05, "learning_rate": 1.94882277000741e-05, "loss": 1.9403, "step": 21089500 }, { "epoch": 61.05, "learning_rate": 1.9487504052426823e-05, "loss": 1.9256, "step": 21090000 }, { "epoch": 61.05, "learning_rate": 1.9486781852074842e-05, "loss": 1.925, "step": 21090500 }, { "epoch": 61.05, "learning_rate": 1.9486058204427568e-05, "loss": 1.9193, "step": 21091000 }, { "epoch": 61.05, "learning_rate": 1.948533455678029e-05, "loss": 1.918, "step": 21091500 }, { "epoch": 61.05, "learning_rate": 1.948461235642831e-05, "loss": 1.9238, "step": 21092000 }, { "epoch": 61.05, "learning_rate": 1.948388870878103e-05, "loss": 1.9219, "step": 21092500 }, { "epoch": 61.06, "learning_rate": 1.9483165061133754e-05, "loss": 1.9277, "step": 21093000 }, { "epoch": 61.06, "learning_rate": 1.9482441413486476e-05, "loss": 1.935, "step": 21093500 }, { "epoch": 61.06, "learning_rate": 1.9481717765839198e-05, "loss": 1.9295, "step": 21094000 }, { "epoch": 61.06, "learning_rate": 1.9480995565487217e-05, "loss": 1.9383, "step": 21094500 }, { "epoch": 61.06, "learning_rate": 1.9480271917839943e-05, "loss": 1.9129, "step": 21095000 }, { "epoch": 61.06, "learning_rate": 1.9479548270192665e-05, "loss": 1.915, "step": 21095500 }, { "epoch": 61.06, "learning_rate": 1.9478824622545387e-05, "loss": 1.943, "step": 21096000 }, { "epoch": 61.07, "learning_rate": 1.9478100974898113e-05, "loss": 1.9267, "step": 21096500 }, { "epoch": 61.07, "learning_rate": 1.9477377327250835e-05, "loss": 1.9375, "step": 21097000 }, { "epoch": 61.07, "learning_rate": 1.947665512689885e-05, "loss": 1.9516, "step": 21097500 }, { "epoch": 61.07, "learning_rate": 1.9475931479251576e-05, "loss": 1.9472, "step": 21098000 }, { "epoch": 61.07, "learning_rate": 1.9475207831604302e-05, "loss": 1.9178, "step": 21098500 }, { "epoch": 61.07, "learning_rate": 1.9474484183957024e-05, "loss": 1.931, "step": 21099000 }, { "epoch": 61.07, "learning_rate": 1.9473760536309746e-05, "loss": 1.9443, "step": 21099500 }, { "epoch": 61.08, "learning_rate": 1.947303688866247e-05, "loss": 1.9569, "step": 21100000 }, { "epoch": 61.08, "learning_rate": 1.9472314688310487e-05, "loss": 1.943, "step": 21100500 }, { "epoch": 61.08, "learning_rate": 1.9471592487958503e-05, "loss": 1.9352, "step": 21101000 }, { "epoch": 61.08, "learning_rate": 1.9470868840311225e-05, "loss": 1.9079, "step": 21101500 }, { "epoch": 61.08, "learning_rate": 1.947014519266395e-05, "loss": 1.9023, "step": 21102000 }, { "epoch": 61.08, "learning_rate": 1.9469421545016676e-05, "loss": 1.9373, "step": 21102500 }, { "epoch": 61.08, "learning_rate": 1.94686978973694e-05, "loss": 1.9401, "step": 21103000 }, { "epoch": 61.09, "learning_rate": 1.946797424972212e-05, "loss": 1.9225, "step": 21103500 }, { "epoch": 61.09, "learning_rate": 1.9467250602074843e-05, "loss": 1.9241, "step": 21104000 }, { "epoch": 61.09, "learning_rate": 1.9466526954427565e-05, "loss": 1.9426, "step": 21104500 }, { "epoch": 61.09, "learning_rate": 1.9465803306780288e-05, "loss": 1.9371, "step": 21105000 }, { "epoch": 61.09, "learning_rate": 1.9465079659133013e-05, "loss": 1.9078, "step": 21105500 }, { "epoch": 61.09, "learning_rate": 1.946435601148574e-05, "loss": 1.9397, "step": 21106000 }, { "epoch": 61.09, "learning_rate": 1.9463633811133754e-05, "loss": 1.9152, "step": 21106500 }, { "epoch": 61.1, "learning_rate": 1.9462910163486477e-05, "loss": 1.9434, "step": 21107000 }, { "epoch": 61.1, "learning_rate": 1.9462186515839202e-05, "loss": 1.9384, "step": 21107500 }, { "epoch": 61.1, "learning_rate": 1.9461462868191924e-05, "loss": 1.9249, "step": 21108000 }, { "epoch": 61.1, "learning_rate": 1.9460739220544647e-05, "loss": 1.9119, "step": 21108500 }, { "epoch": 61.1, "learning_rate": 1.946001557289737e-05, "loss": 1.9424, "step": 21109000 }, { "epoch": 61.1, "learning_rate": 1.9459291925250095e-05, "loss": 1.916, "step": 21109500 }, { "epoch": 61.1, "learning_rate": 1.9458568277602817e-05, "loss": 1.9447, "step": 21110000 }, { "epoch": 61.11, "learning_rate": 1.945784462995554e-05, "loss": 1.9354, "step": 21110500 }, { "epoch": 61.11, "learning_rate": 1.9457122429603558e-05, "loss": 1.9285, "step": 21111000 }, { "epoch": 61.11, "learning_rate": 1.945639878195628e-05, "loss": 1.9355, "step": 21111500 }, { "epoch": 61.11, "learning_rate": 1.94556765816043e-05, "loss": 1.9174, "step": 21112000 }, { "epoch": 61.11, "learning_rate": 1.945495293395702e-05, "loss": 1.9423, "step": 21112500 }, { "epoch": 61.11, "learning_rate": 1.9454229286309744e-05, "loss": 1.9498, "step": 21113000 }, { "epoch": 61.11, "learning_rate": 1.945350563866247e-05, "loss": 1.9253, "step": 21113500 }, { "epoch": 61.12, "learning_rate": 1.945278199101519e-05, "loss": 1.9352, "step": 21114000 }, { "epoch": 61.12, "learning_rate": 1.9452058343367917e-05, "loss": 1.9403, "step": 21114500 }, { "epoch": 61.12, "learning_rate": 1.945133469572064e-05, "loss": 1.9468, "step": 21115000 }, { "epoch": 61.12, "learning_rate": 1.945061104807336e-05, "loss": 1.9509, "step": 21115500 }, { "epoch": 61.12, "learning_rate": 1.9449888847721377e-05, "loss": 1.9187, "step": 21116000 }, { "epoch": 61.12, "learning_rate": 1.9449165200074103e-05, "loss": 1.9303, "step": 21116500 }, { "epoch": 61.13, "learning_rate": 1.9448441552426825e-05, "loss": 1.9444, "step": 21117000 }, { "epoch": 61.13, "learning_rate": 1.944771790477955e-05, "loss": 1.9455, "step": 21117500 }, { "epoch": 61.13, "learning_rate": 1.9446994257132273e-05, "loss": 1.9443, "step": 21118000 }, { "epoch": 61.13, "learning_rate": 1.9446270609484995e-05, "loss": 1.9333, "step": 21118500 }, { "epoch": 61.13, "learning_rate": 1.9445546961837717e-05, "loss": 1.9381, "step": 21119000 }, { "epoch": 61.13, "learning_rate": 1.9444823314190443e-05, "loss": 1.9316, "step": 21119500 }, { "epoch": 61.13, "learning_rate": 1.944410111383846e-05, "loss": 1.9493, "step": 21120000 }, { "epoch": 61.14, "learning_rate": 1.9443378913486477e-05, "loss": 1.9495, "step": 21120500 }, { "epoch": 61.14, "learning_rate": 1.9442655265839203e-05, "loss": 1.9254, "step": 21121000 }, { "epoch": 61.14, "learning_rate": 1.9441931618191925e-05, "loss": 1.9367, "step": 21121500 }, { "epoch": 61.14, "learning_rate": 1.9441207970544647e-05, "loss": 1.9227, "step": 21122000 }, { "epoch": 61.14, "learning_rate": 1.944048432289737e-05, "loss": 1.9264, "step": 21122500 }, { "epoch": 61.14, "learning_rate": 1.9439760675250092e-05, "loss": 1.9477, "step": 21123000 }, { "epoch": 61.14, "learning_rate": 1.9439037027602818e-05, "loss": 1.9358, "step": 21123500 }, { "epoch": 61.15, "learning_rate": 1.943831337995554e-05, "loss": 1.9362, "step": 21124000 }, { "epoch": 61.15, "learning_rate": 1.9437589732308265e-05, "loss": 1.93, "step": 21124500 }, { "epoch": 61.15, "learning_rate": 1.943686753195628e-05, "loss": 1.9379, "step": 21125000 }, { "epoch": 61.15, "learning_rate": 1.94361453316043e-05, "loss": 1.9328, "step": 21125500 }, { "epoch": 61.15, "learning_rate": 1.9435421683957022e-05, "loss": 1.946, "step": 21126000 }, { "epoch": 61.15, "learning_rate": 1.9434698036309744e-05, "loss": 1.9255, "step": 21126500 }, { "epoch": 61.15, "learning_rate": 1.9433975835957763e-05, "loss": 1.9241, "step": 21127000 }, { "epoch": 61.16, "learning_rate": 1.9433252188310486e-05, "loss": 1.9276, "step": 21127500 }, { "epoch": 61.16, "learning_rate": 1.9432531435253798e-05, "loss": 1.951, "step": 21128000 }, { "epoch": 61.16, "learning_rate": 1.943180778760652e-05, "loss": 1.9385, "step": 21128500 }, { "epoch": 61.16, "learning_rate": 1.9431084139959246e-05, "loss": 1.9218, "step": 21129000 }, { "epoch": 61.16, "learning_rate": 1.9430361939607265e-05, "loss": 1.9268, "step": 21129500 }, { "epoch": 61.16, "learning_rate": 1.9429638291959987e-05, "loss": 1.9246, "step": 21130000 }, { "epoch": 61.16, "learning_rate": 1.942891464431271e-05, "loss": 1.9306, "step": 21130500 }, { "epoch": 61.17, "learning_rate": 1.942819099666543e-05, "loss": 1.9591, "step": 21131000 }, { "epoch": 61.17, "learning_rate": 1.9427467349018154e-05, "loss": 1.9451, "step": 21131500 }, { "epoch": 61.17, "learning_rate": 1.942674370137088e-05, "loss": 1.9482, "step": 21132000 }, { "epoch": 61.17, "learning_rate": 1.94260200537236e-05, "loss": 1.9264, "step": 21132500 }, { "epoch": 61.17, "learning_rate": 1.9425296406076327e-05, "loss": 1.9439, "step": 21133000 }, { "epoch": 61.17, "learning_rate": 1.942457275842905e-05, "loss": 1.9356, "step": 21133500 }, { "epoch": 61.17, "learning_rate": 1.942384911078177e-05, "loss": 1.9373, "step": 21134000 }, { "epoch": 61.18, "learning_rate": 1.9423125463134494e-05, "loss": 1.9423, "step": 21134500 }, { "epoch": 61.18, "learning_rate": 1.942240181548722e-05, "loss": 1.929, "step": 21135000 }, { "epoch": 61.18, "learning_rate": 1.942167816783994e-05, "loss": 1.915, "step": 21135500 }, { "epoch": 61.18, "learning_rate": 1.9420954520192667e-05, "loss": 1.9332, "step": 21136000 }, { "epoch": 61.18, "learning_rate": 1.942023087254539e-05, "loss": 1.9475, "step": 21136500 }, { "epoch": 61.18, "learning_rate": 1.941950722489811e-05, "loss": 1.9385, "step": 21137000 }, { "epoch": 61.18, "learning_rate": 1.9418783577250834e-05, "loss": 1.9102, "step": 21137500 }, { "epoch": 61.19, "learning_rate": 1.9418059929603556e-05, "loss": 1.9473, "step": 21138000 }, { "epoch": 61.19, "learning_rate": 1.9417337729251575e-05, "loss": 1.9197, "step": 21138500 }, { "epoch": 61.19, "learning_rate": 1.9416614081604297e-05, "loss": 1.9299, "step": 21139000 }, { "epoch": 61.19, "learning_rate": 1.941589043395702e-05, "loss": 1.9584, "step": 21139500 }, { "epoch": 61.19, "learning_rate": 1.9415166786309745e-05, "loss": 1.9328, "step": 21140000 }, { "epoch": 61.19, "learning_rate": 1.941444313866247e-05, "loss": 1.9245, "step": 21140500 }, { "epoch": 61.19, "learning_rate": 1.9413719491015193e-05, "loss": 1.9239, "step": 21141000 }, { "epoch": 61.2, "learning_rate": 1.9412995843367915e-05, "loss": 1.901, "step": 21141500 }, { "epoch": 61.2, "learning_rate": 1.9412272195720638e-05, "loss": 1.9548, "step": 21142000 }, { "epoch": 61.2, "learning_rate": 1.941154854807336e-05, "loss": 1.9446, "step": 21142500 }, { "epoch": 61.2, "learning_rate": 1.941082634772138e-05, "loss": 1.9196, "step": 21143000 }, { "epoch": 61.2, "learning_rate": 1.9410102700074104e-05, "loss": 1.9333, "step": 21143500 }, { "epoch": 61.2, "learning_rate": 1.9409379052426827e-05, "loss": 1.9352, "step": 21144000 }, { "epoch": 61.2, "learning_rate": 1.940865540477955e-05, "loss": 1.9457, "step": 21144500 }, { "epoch": 61.21, "learning_rate": 1.940793175713227e-05, "loss": 1.9429, "step": 21145000 }, { "epoch": 61.21, "learning_rate": 1.9407208109484997e-05, "loss": 1.9276, "step": 21145500 }, { "epoch": 61.21, "learning_rate": 1.9406485909133012e-05, "loss": 1.9187, "step": 21146000 }, { "epoch": 61.21, "learning_rate": 1.9405762261485734e-05, "loss": 1.9108, "step": 21146500 }, { "epoch": 61.21, "learning_rate": 1.940503861383846e-05, "loss": 1.9263, "step": 21147000 }, { "epoch": 61.21, "learning_rate": 1.9404314966191182e-05, "loss": 1.9441, "step": 21147500 }, { "epoch": 61.21, "learning_rate": 1.9403591318543908e-05, "loss": 1.9244, "step": 21148000 }, { "epoch": 61.22, "learning_rate": 1.940286767089663e-05, "loss": 1.9476, "step": 21148500 }, { "epoch": 61.22, "learning_rate": 1.9402145470544646e-05, "loss": 1.9317, "step": 21149000 }, { "epoch": 61.22, "learning_rate": 1.940142182289737e-05, "loss": 1.9366, "step": 21149500 }, { "epoch": 61.22, "learning_rate": 1.9400698175250094e-05, "loss": 1.9601, "step": 21150000 }, { "epoch": 61.22, "learning_rate": 1.9399974527602816e-05, "loss": 1.9282, "step": 21150500 }, { "epoch": 61.22, "learning_rate": 1.939925087995554e-05, "loss": 1.9442, "step": 21151000 }, { "epoch": 61.22, "learning_rate": 1.9398527232308264e-05, "loss": 1.9444, "step": 21151500 }, { "epoch": 61.23, "learning_rate": 1.9397805031956283e-05, "loss": 1.9335, "step": 21152000 }, { "epoch": 61.23, "learning_rate": 1.9397081384309005e-05, "loss": 1.9479, "step": 21152500 }, { "epoch": 61.23, "learning_rate": 1.9396357736661727e-05, "loss": 1.9473, "step": 21153000 }, { "epoch": 61.23, "learning_rate": 1.939563408901445e-05, "loss": 1.9381, "step": 21153500 }, { "epoch": 61.23, "learning_rate": 1.9394911888662468e-05, "loss": 1.9156, "step": 21154000 }, { "epoch": 61.23, "learning_rate": 1.939418824101519e-05, "loss": 1.9181, "step": 21154500 }, { "epoch": 61.24, "learning_rate": 1.9393464593367916e-05, "loss": 1.9022, "step": 21155000 }, { "epoch": 61.24, "learning_rate": 1.9392740945720638e-05, "loss": 1.9531, "step": 21155500 }, { "epoch": 61.24, "learning_rate": 1.939201729807336e-05, "loss": 1.9536, "step": 21156000 }, { "epoch": 61.24, "learning_rate": 1.9391293650426086e-05, "loss": 1.9228, "step": 21156500 }, { "epoch": 61.24, "learning_rate": 1.9390571450074102e-05, "loss": 1.945, "step": 21157000 }, { "epoch": 61.24, "learning_rate": 1.9389847802426824e-05, "loss": 1.9255, "step": 21157500 }, { "epoch": 61.24, "learning_rate": 1.9389124154779546e-05, "loss": 1.9166, "step": 21158000 }, { "epoch": 61.25, "learning_rate": 1.9388400507132272e-05, "loss": 1.9163, "step": 21158500 }, { "epoch": 61.25, "learning_rate": 1.9387676859484997e-05, "loss": 1.9355, "step": 21159000 }, { "epoch": 61.25, "learning_rate": 1.9386954659133013e-05, "loss": 1.905, "step": 21159500 }, { "epoch": 61.25, "learning_rate": 1.9386231011485735e-05, "loss": 1.917, "step": 21160000 }, { "epoch": 61.25, "learning_rate": 1.9385508811133754e-05, "loss": 1.9131, "step": 21160500 }, { "epoch": 61.25, "learning_rate": 1.9384785163486476e-05, "loss": 1.9463, "step": 21161000 }, { "epoch": 61.25, "learning_rate": 1.93840615158392e-05, "loss": 1.9364, "step": 21161500 }, { "epoch": 61.26, "learning_rate": 1.938333786819192e-05, "loss": 1.9144, "step": 21162000 }, { "epoch": 61.26, "learning_rate": 1.9382614220544646e-05, "loss": 1.9678, "step": 21162500 }, { "epoch": 61.26, "learning_rate": 1.9381890572897372e-05, "loss": 1.9407, "step": 21163000 }, { "epoch": 61.26, "learning_rate": 1.9381166925250094e-05, "loss": 1.9281, "step": 21163500 }, { "epoch": 61.26, "learning_rate": 1.938044472489811e-05, "loss": 1.9511, "step": 21164000 }, { "epoch": 61.26, "learning_rate": 1.9379721077250836e-05, "loss": 1.9517, "step": 21164500 }, { "epoch": 61.26, "learning_rate": 1.9378997429603558e-05, "loss": 1.9239, "step": 21165000 }, { "epoch": 61.27, "learning_rate": 1.9378275229251573e-05, "loss": 1.9135, "step": 21165500 }, { "epoch": 61.27, "learning_rate": 1.93775515816043e-05, "loss": 1.9485, "step": 21166000 }, { "epoch": 61.27, "learning_rate": 1.9376827933957025e-05, "loss": 1.9202, "step": 21166500 }, { "epoch": 61.27, "learning_rate": 1.9376104286309747e-05, "loss": 1.9275, "step": 21167000 }, { "epoch": 61.27, "learning_rate": 1.937538063866247e-05, "loss": 1.9545, "step": 21167500 }, { "epoch": 61.27, "learning_rate": 1.937465699101519e-05, "loss": 1.9363, "step": 21168000 }, { "epoch": 61.27, "learning_rate": 1.9373933343367913e-05, "loss": 1.9381, "step": 21168500 }, { "epoch": 61.28, "learning_rate": 1.9373209695720636e-05, "loss": 1.9321, "step": 21169000 }, { "epoch": 61.28, "learning_rate": 1.937248604807336e-05, "loss": 1.9178, "step": 21169500 }, { "epoch": 61.28, "learning_rate": 1.9371762400426087e-05, "loss": 1.9188, "step": 21170000 }, { "epoch": 61.28, "learning_rate": 1.937103875277881e-05, "loss": 1.94, "step": 21170500 }, { "epoch": 61.28, "learning_rate": 1.937031510513153e-05, "loss": 1.9276, "step": 21171000 }, { "epoch": 61.28, "learning_rate": 1.9369591457484254e-05, "loss": 1.9439, "step": 21171500 }, { "epoch": 61.28, "learning_rate": 1.9368867809836976e-05, "loss": 1.942, "step": 21172000 }, { "epoch": 61.29, "learning_rate": 1.9368144162189698e-05, "loss": 1.9289, "step": 21172500 }, { "epoch": 61.29, "learning_rate": 1.9367421961837717e-05, "loss": 1.9485, "step": 21173000 }, { "epoch": 61.29, "learning_rate": 1.9366698314190443e-05, "loss": 1.9182, "step": 21173500 }, { "epoch": 61.29, "learning_rate": 1.9365974666543165e-05, "loss": 1.9412, "step": 21174000 }, { "epoch": 61.29, "learning_rate": 1.9365251018895887e-05, "loss": 1.9281, "step": 21174500 }, { "epoch": 61.29, "learning_rate": 1.9364527371248613e-05, "loss": 1.9365, "step": 21175000 }, { "epoch": 61.29, "learning_rate": 1.9363803723601335e-05, "loss": 1.931, "step": 21175500 }, { "epoch": 61.3, "learning_rate": 1.9363080075954057e-05, "loss": 1.9138, "step": 21176000 }, { "epoch": 61.3, "learning_rate": 1.936235642830678e-05, "loss": 1.9236, "step": 21176500 }, { "epoch": 61.3, "learning_rate": 1.9361632780659505e-05, "loss": 1.9201, "step": 21177000 }, { "epoch": 61.3, "learning_rate": 1.9360909133012227e-05, "loss": 1.9467, "step": 21177500 }, { "epoch": 61.3, "learning_rate": 1.936018548536495e-05, "loss": 1.9241, "step": 21178000 }, { "epoch": 61.3, "learning_rate": 1.9359461837717675e-05, "loss": 1.9317, "step": 21178500 }, { "epoch": 61.3, "learning_rate": 1.935873963736569e-05, "loss": 1.9363, "step": 21179000 }, { "epoch": 61.31, "learning_rate": 1.9358015989718413e-05, "loss": 1.963, "step": 21179500 }, { "epoch": 61.31, "learning_rate": 1.935729234207114e-05, "loss": 1.9471, "step": 21180000 }, { "epoch": 61.31, "learning_rate": 1.9356568694423864e-05, "loss": 1.9471, "step": 21180500 }, { "epoch": 61.31, "learning_rate": 1.9355845046776586e-05, "loss": 1.9307, "step": 21181000 }, { "epoch": 61.31, "learning_rate": 1.935512139912931e-05, "loss": 1.9411, "step": 21181500 }, { "epoch": 61.31, "learning_rate": 1.935439775148203e-05, "loss": 1.924, "step": 21182000 }, { "epoch": 61.31, "learning_rate": 1.9353674103834753e-05, "loss": 1.9243, "step": 21182500 }, { "epoch": 61.32, "learning_rate": 1.9352951903482772e-05, "loss": 1.9149, "step": 21183000 }, { "epoch": 61.32, "learning_rate": 1.9352228255835494e-05, "loss": 1.9563, "step": 21183500 }, { "epoch": 61.32, "learning_rate": 1.935150460818822e-05, "loss": 1.9414, "step": 21184000 }, { "epoch": 61.32, "learning_rate": 1.9350780960540942e-05, "loss": 1.9256, "step": 21184500 }, { "epoch": 61.32, "learning_rate": 1.9350057312893664e-05, "loss": 1.915, "step": 21185000 }, { "epoch": 61.32, "learning_rate": 1.9349338007132273e-05, "loss": 1.9386, "step": 21185500 }, { "epoch": 61.32, "learning_rate": 1.9348614359484996e-05, "loss": 1.9271, "step": 21186000 }, { "epoch": 61.33, "learning_rate": 1.9347892159133015e-05, "loss": 1.9501, "step": 21186500 }, { "epoch": 61.33, "learning_rate": 1.934716995878103e-05, "loss": 1.9206, "step": 21187000 }, { "epoch": 61.33, "learning_rate": 1.9346446311133752e-05, "loss": 1.9203, "step": 21187500 }, { "epoch": 61.33, "learning_rate": 1.9345722663486478e-05, "loss": 1.9687, "step": 21188000 }, { "epoch": 61.33, "learning_rate": 1.9344999015839204e-05, "loss": 1.9406, "step": 21188500 }, { "epoch": 61.33, "learning_rate": 1.9344275368191926e-05, "loss": 1.9324, "step": 21189000 }, { "epoch": 61.33, "learning_rate": 1.9343551720544648e-05, "loss": 1.9427, "step": 21189500 }, { "epoch": 61.34, "learning_rate": 1.934282807289737e-05, "loss": 1.934, "step": 21190000 }, { "epoch": 61.34, "learning_rate": 1.9342104425250093e-05, "loss": 1.9636, "step": 21190500 }, { "epoch": 61.34, "learning_rate": 1.9341380777602815e-05, "loss": 1.9275, "step": 21191000 }, { "epoch": 61.34, "learning_rate": 1.934065712995554e-05, "loss": 1.9397, "step": 21191500 }, { "epoch": 61.34, "learning_rate": 1.9339933482308266e-05, "loss": 1.954, "step": 21192000 }, { "epoch": 61.34, "learning_rate": 1.9339209834660988e-05, "loss": 1.9476, "step": 21192500 }, { "epoch": 61.35, "learning_rate": 1.933848618701371e-05, "loss": 1.9292, "step": 21193000 }, { "epoch": 61.35, "learning_rate": 1.9337762539366433e-05, "loss": 1.9421, "step": 21193500 }, { "epoch": 61.35, "learning_rate": 1.9337038891719155e-05, "loss": 1.921, "step": 21194000 }, { "epoch": 61.35, "learning_rate": 1.9336316691367174e-05, "loss": 1.9319, "step": 21194500 }, { "epoch": 61.35, "learning_rate": 1.9335593043719896e-05, "loss": 1.9092, "step": 21195000 }, { "epoch": 61.35, "learning_rate": 1.933486939607262e-05, "loss": 1.9471, "step": 21195500 }, { "epoch": 61.35, "learning_rate": 1.9334145748425344e-05, "loss": 1.9337, "step": 21196000 }, { "epoch": 61.36, "learning_rate": 1.9333422100778066e-05, "loss": 1.9394, "step": 21196500 }, { "epoch": 61.36, "learning_rate": 1.9332698453130792e-05, "loss": 1.9276, "step": 21197000 }, { "epoch": 61.36, "learning_rate": 1.9331974805483514e-05, "loss": 1.9303, "step": 21197500 }, { "epoch": 61.36, "learning_rate": 1.9331251157836236e-05, "loss": 1.9246, "step": 21198000 }, { "epoch": 61.36, "learning_rate": 1.933052751018896e-05, "loss": 1.9404, "step": 21198500 }, { "epoch": 61.36, "learning_rate": 1.9329805309836977e-05, "loss": 1.941, "step": 21199000 }, { "epoch": 61.36, "learning_rate": 1.9329081662189703e-05, "loss": 1.9572, "step": 21199500 }, { "epoch": 61.37, "learning_rate": 1.9328358014542425e-05, "loss": 1.9181, "step": 21200000 }, { "epoch": 61.37, "learning_rate": 1.9327634366895148e-05, "loss": 1.9558, "step": 21200500 }, { "epoch": 61.37, "learning_rate": 1.932691071924787e-05, "loss": 1.9542, "step": 21201000 }, { "epoch": 61.37, "learning_rate": 1.9326187071600592e-05, "loss": 1.9541, "step": 21201500 }, { "epoch": 61.37, "learning_rate": 1.932546487124861e-05, "loss": 1.9478, "step": 21202000 }, { "epoch": 61.37, "learning_rate": 1.9324741223601333e-05, "loss": 1.9476, "step": 21202500 }, { "epoch": 61.37, "learning_rate": 1.932401757595406e-05, "loss": 1.9433, "step": 21203000 }, { "epoch": 61.38, "learning_rate": 1.932329392830678e-05, "loss": 1.9208, "step": 21203500 }, { "epoch": 61.38, "learning_rate": 1.9322570280659503e-05, "loss": 1.9341, "step": 21204000 }, { "epoch": 61.38, "learning_rate": 1.9321848080307522e-05, "loss": 1.9321, "step": 21204500 }, { "epoch": 61.38, "learning_rate": 1.932112587995554e-05, "loss": 1.9465, "step": 21205000 }, { "epoch": 61.38, "learning_rate": 1.9320403679603557e-05, "loss": 1.9425, "step": 21205500 }, { "epoch": 61.38, "learning_rate": 1.931968003195628e-05, "loss": 1.95, "step": 21206000 }, { "epoch": 61.38, "learning_rate": 1.9318956384309005e-05, "loss": 1.9335, "step": 21206500 }, { "epoch": 61.39, "learning_rate": 1.931823273666173e-05, "loss": 1.958, "step": 21207000 }, { "epoch": 61.39, "learning_rate": 1.9317509089014452e-05, "loss": 1.9435, "step": 21207500 }, { "epoch": 61.39, "learning_rate": 1.9316785441367175e-05, "loss": 1.9433, "step": 21208000 }, { "epoch": 61.39, "learning_rate": 1.9316061793719897e-05, "loss": 1.9217, "step": 21208500 }, { "epoch": 61.39, "learning_rate": 1.931533814607262e-05, "loss": 1.9322, "step": 21209000 }, { "epoch": 61.39, "learning_rate": 1.931461449842534e-05, "loss": 1.9354, "step": 21209500 }, { "epoch": 61.39, "learning_rate": 1.9313890850778067e-05, "loss": 1.9136, "step": 21210000 }, { "epoch": 61.4, "learning_rate": 1.9313167203130793e-05, "loss": 1.9466, "step": 21210500 }, { "epoch": 61.4, "learning_rate": 1.9312443555483515e-05, "loss": 1.9236, "step": 21211000 }, { "epoch": 61.4, "learning_rate": 1.9311719907836237e-05, "loss": 1.9415, "step": 21211500 }, { "epoch": 61.4, "learning_rate": 1.931099626018896e-05, "loss": 1.9509, "step": 21212000 }, { "epoch": 61.4, "learning_rate": 1.9310274059836978e-05, "loss": 1.9361, "step": 21212500 }, { "epoch": 61.4, "learning_rate": 1.93095504121897e-05, "loss": 1.9434, "step": 21213000 }, { "epoch": 61.4, "learning_rate": 1.9308826764542423e-05, "loss": 1.951, "step": 21213500 }, { "epoch": 61.41, "learning_rate": 1.9308103116895145e-05, "loss": 1.9295, "step": 21214000 }, { "epoch": 61.41, "learning_rate": 1.930737946924787e-05, "loss": 1.926, "step": 21214500 }, { "epoch": 61.41, "learning_rate": 1.9306655821600593e-05, "loss": 1.937, "step": 21215000 }, { "epoch": 61.41, "learning_rate": 1.9305933621248612e-05, "loss": 1.9262, "step": 21215500 }, { "epoch": 61.41, "learning_rate": 1.9305209973601334e-05, "loss": 1.9576, "step": 21216000 }, { "epoch": 61.41, "learning_rate": 1.9304486325954056e-05, "loss": 1.9593, "step": 21216500 }, { "epoch": 61.41, "learning_rate": 1.9303762678306782e-05, "loss": 1.9239, "step": 21217000 }, { "epoch": 61.42, "learning_rate": 1.9303039030659504e-05, "loss": 1.939, "step": 21217500 }, { "epoch": 61.42, "learning_rate": 1.930231538301223e-05, "loss": 1.9622, "step": 21218000 }, { "epoch": 61.42, "learning_rate": 1.9301593182660245e-05, "loss": 1.9265, "step": 21218500 }, { "epoch": 61.42, "learning_rate": 1.9300870982308264e-05, "loss": 1.9372, "step": 21219000 }, { "epoch": 61.42, "learning_rate": 1.9300147334660986e-05, "loss": 1.9484, "step": 21219500 }, { "epoch": 61.42, "learning_rate": 1.929942368701371e-05, "loss": 1.9213, "step": 21220000 }, { "epoch": 61.42, "learning_rate": 1.929870003936643e-05, "loss": 1.9276, "step": 21220500 }, { "epoch": 61.43, "learning_rate": 1.9297976391719157e-05, "loss": 1.9498, "step": 21221000 }, { "epoch": 61.43, "learning_rate": 1.929725274407188e-05, "loss": 1.9163, "step": 21221500 }, { "epoch": 61.43, "learning_rate": 1.9296529096424604e-05, "loss": 1.9509, "step": 21222000 }, { "epoch": 61.43, "learning_rate": 1.9295805448777327e-05, "loss": 1.9399, "step": 21222500 }, { "epoch": 61.43, "learning_rate": 1.9295083248425346e-05, "loss": 1.9588, "step": 21223000 }, { "epoch": 61.43, "learning_rate": 1.929436104807336e-05, "loss": 1.9387, "step": 21223500 }, { "epoch": 61.43, "learning_rate": 1.9293637400426083e-05, "loss": 1.9329, "step": 21224000 }, { "epoch": 61.44, "learning_rate": 1.929291375277881e-05, "loss": 1.9274, "step": 21224500 }, { "epoch": 61.44, "learning_rate": 1.929219010513153e-05, "loss": 1.9367, "step": 21225000 }, { "epoch": 61.44, "learning_rate": 1.9291467904779547e-05, "loss": 1.945, "step": 21225500 }, { "epoch": 61.44, "learning_rate": 1.9290744257132272e-05, "loss": 1.9091, "step": 21226000 }, { "epoch": 61.44, "learning_rate": 1.9290020609484995e-05, "loss": 1.9198, "step": 21226500 }, { "epoch": 61.44, "learning_rate": 1.928929696183772e-05, "loss": 1.9422, "step": 21227000 }, { "epoch": 61.44, "learning_rate": 1.9288573314190442e-05, "loss": 1.9351, "step": 21227500 }, { "epoch": 61.45, "learning_rate": 1.9287849666543165e-05, "loss": 1.9442, "step": 21228000 }, { "epoch": 61.45, "learning_rate": 1.9287126018895887e-05, "loss": 1.9467, "step": 21228500 }, { "epoch": 61.45, "learning_rate": 1.928640237124861e-05, "loss": 1.9439, "step": 21229000 }, { "epoch": 61.45, "learning_rate": 1.9285678723601335e-05, "loss": 1.9304, "step": 21229500 }, { "epoch": 61.45, "learning_rate": 1.9284955075954057e-05, "loss": 1.9386, "step": 21230000 }, { "epoch": 61.45, "learning_rate": 1.928423432289737e-05, "loss": 1.9371, "step": 21230500 }, { "epoch": 61.46, "learning_rate": 1.9283510675250095e-05, "loss": 1.9304, "step": 21231000 }, { "epoch": 61.46, "learning_rate": 1.928278847489811e-05, "loss": 1.9326, "step": 21231500 }, { "epoch": 61.46, "learning_rate": 1.9282064827250833e-05, "loss": 1.9538, "step": 21232000 }, { "epoch": 61.46, "learning_rate": 1.9281344074194145e-05, "loss": 1.923, "step": 21232500 }, { "epoch": 61.46, "learning_rate": 1.928062042654687e-05, "loss": 1.9493, "step": 21233000 }, { "epoch": 61.46, "learning_rate": 1.9279896778899593e-05, "loss": 1.9502, "step": 21233500 }, { "epoch": 61.46, "learning_rate": 1.927917313125232e-05, "loss": 1.9415, "step": 21234000 }, { "epoch": 61.47, "learning_rate": 1.927844948360504e-05, "loss": 1.9046, "step": 21234500 }, { "epoch": 61.47, "learning_rate": 1.9277725835957763e-05, "loss": 1.9551, "step": 21235000 }, { "epoch": 61.47, "learning_rate": 1.9277002188310485e-05, "loss": 1.9519, "step": 21235500 }, { "epoch": 61.47, "learning_rate": 1.9276278540663207e-05, "loss": 1.944, "step": 21236000 }, { "epoch": 61.47, "learning_rate": 1.9275554893015933e-05, "loss": 1.9403, "step": 21236500 }, { "epoch": 61.47, "learning_rate": 1.9274831245368655e-05, "loss": 1.9175, "step": 21237000 }, { "epoch": 61.47, "learning_rate": 1.927410759772138e-05, "loss": 1.9213, "step": 21237500 }, { "epoch": 61.48, "learning_rate": 1.9273383950074103e-05, "loss": 1.9413, "step": 21238000 }, { "epoch": 61.48, "learning_rate": 1.9272660302426825e-05, "loss": 1.9298, "step": 21238500 }, { "epoch": 61.48, "learning_rate": 1.9271936654779548e-05, "loss": 1.9572, "step": 21239000 }, { "epoch": 61.48, "learning_rate": 1.9271213007132273e-05, "loss": 1.9315, "step": 21239500 }, { "epoch": 61.48, "learning_rate": 1.927049080678029e-05, "loss": 1.9393, "step": 21240000 }, { "epoch": 61.48, "learning_rate": 1.926976715913301e-05, "loss": 1.9495, "step": 21240500 }, { "epoch": 61.48, "learning_rate": 1.9269043511485737e-05, "loss": 1.951, "step": 21241000 }, { "epoch": 61.49, "learning_rate": 1.9268321311133756e-05, "loss": 1.9448, "step": 21241500 }, { "epoch": 61.49, "learning_rate": 1.9267597663486478e-05, "loss": 1.9109, "step": 21242000 }, { "epoch": 61.49, "learning_rate": 1.92668740158392e-05, "loss": 1.9376, "step": 21242500 }, { "epoch": 61.49, "learning_rate": 1.9266150368191922e-05, "loss": 1.9216, "step": 21243000 }, { "epoch": 61.49, "learning_rate": 1.9265426720544648e-05, "loss": 1.9104, "step": 21243500 }, { "epoch": 61.49, "learning_rate": 1.926470307289737e-05, "loss": 1.9431, "step": 21244000 }, { "epoch": 61.49, "learning_rate": 1.9263979425250096e-05, "loss": 1.9139, "step": 21244500 }, { "epoch": 61.5, "learning_rate": 1.9263255777602818e-05, "loss": 1.958, "step": 21245000 }, { "epoch": 61.5, "learning_rate": 1.926253212995554e-05, "loss": 1.936, "step": 21245500 }, { "epoch": 61.5, "learning_rate": 1.9261808482308262e-05, "loss": 1.9167, "step": 21246000 }, { "epoch": 61.5, "learning_rate": 1.9261084834660985e-05, "loss": 1.9521, "step": 21246500 }, { "epoch": 61.5, "learning_rate": 1.926036118701371e-05, "loss": 1.9449, "step": 21247000 }, { "epoch": 61.5, "learning_rate": 1.9259638986661726e-05, "loss": 1.9491, "step": 21247500 }, { "epoch": 61.5, "learning_rate": 1.9258915339014448e-05, "loss": 1.9457, "step": 21248000 }, { "epoch": 61.51, "learning_rate": 1.9258191691367174e-05, "loss": 1.9275, "step": 21248500 }, { "epoch": 61.51, "learning_rate": 1.92574680437199e-05, "loss": 1.9506, "step": 21249000 }, { "epoch": 61.51, "learning_rate": 1.925674439607262e-05, "loss": 1.9318, "step": 21249500 }, { "epoch": 61.51, "learning_rate": 1.9256020748425344e-05, "loss": 1.9204, "step": 21250000 }, { "epoch": 61.51, "learning_rate": 1.9255297100778066e-05, "loss": 1.9399, "step": 21250500 }, { "epoch": 61.51, "learning_rate": 1.9254573453130788e-05, "loss": 1.9308, "step": 21251000 }, { "epoch": 61.51, "learning_rate": 1.925384980548351e-05, "loss": 1.9463, "step": 21251500 }, { "epoch": 61.52, "learning_rate": 1.9253126157836236e-05, "loss": 1.9603, "step": 21252000 }, { "epoch": 61.52, "learning_rate": 1.9252402510188962e-05, "loss": 1.9539, "step": 21252500 }, { "epoch": 61.52, "learning_rate": 1.9251678862541684e-05, "loss": 1.9652, "step": 21253000 }, { "epoch": 61.52, "learning_rate": 1.92509566621897e-05, "loss": 1.9337, "step": 21253500 }, { "epoch": 61.52, "learning_rate": 1.925023446183772e-05, "loss": 1.9361, "step": 21254000 }, { "epoch": 61.52, "learning_rate": 1.924951081419044e-05, "loss": 1.8999, "step": 21254500 }, { "epoch": 61.52, "learning_rate": 1.9248787166543163e-05, "loss": 1.9185, "step": 21255000 }, { "epoch": 61.53, "learning_rate": 1.924806351889589e-05, "loss": 1.942, "step": 21255500 }, { "epoch": 61.53, "learning_rate": 1.9247339871248614e-05, "loss": 1.9559, "step": 21256000 }, { "epoch": 61.53, "learning_rate": 1.9246616223601336e-05, "loss": 1.939, "step": 21256500 }, { "epoch": 61.53, "learning_rate": 1.924589257595406e-05, "loss": 1.9324, "step": 21257000 }, { "epoch": 61.53, "learning_rate": 1.924516892830678e-05, "loss": 1.9273, "step": 21257500 }, { "epoch": 61.53, "learning_rate": 1.92444467279548e-05, "loss": 1.9407, "step": 21258000 }, { "epoch": 61.53, "learning_rate": 1.9243723080307522e-05, "loss": 1.9317, "step": 21258500 }, { "epoch": 61.54, "learning_rate": 1.9242999432660244e-05, "loss": 1.9462, "step": 21259000 }, { "epoch": 61.54, "learning_rate": 1.924227578501297e-05, "loss": 1.9163, "step": 21259500 }, { "epoch": 61.54, "learning_rate": 1.9241552137365692e-05, "loss": 1.9457, "step": 21260000 }, { "epoch": 61.54, "learning_rate": 1.924082993701371e-05, "loss": 1.9396, "step": 21260500 }, { "epoch": 61.54, "learning_rate": 1.9240106289366433e-05, "loss": 1.9358, "step": 21261000 }, { "epoch": 61.54, "learning_rate": 1.9239382641719156e-05, "loss": 1.9158, "step": 21261500 }, { "epoch": 61.54, "learning_rate": 1.9238658994071878e-05, "loss": 1.9277, "step": 21262000 }, { "epoch": 61.55, "learning_rate": 1.92379353464246e-05, "loss": 1.9369, "step": 21262500 }, { "epoch": 61.55, "learning_rate": 1.9237211698777326e-05, "loss": 1.9495, "step": 21263000 }, { "epoch": 61.55, "learning_rate": 1.9236489498425345e-05, "loss": 1.9372, "step": 21263500 }, { "epoch": 61.55, "learning_rate": 1.9235765850778067e-05, "loss": 1.9305, "step": 21264000 }, { "epoch": 61.55, "learning_rate": 1.923504220313079e-05, "loss": 1.9569, "step": 21264500 }, { "epoch": 61.55, "learning_rate": 1.9234318555483515e-05, "loss": 1.9544, "step": 21265000 }, { "epoch": 61.55, "learning_rate": 1.9233594907836237e-05, "loss": 1.9139, "step": 21265500 }, { "epoch": 61.56, "learning_rate": 1.923287126018896e-05, "loss": 1.939, "step": 21266000 }, { "epoch": 61.56, "learning_rate": 1.9232147612541685e-05, "loss": 1.9188, "step": 21266500 }, { "epoch": 61.56, "learning_rate": 1.9231423964894407e-05, "loss": 1.9475, "step": 21267000 }, { "epoch": 61.56, "learning_rate": 1.9230701764542426e-05, "loss": 1.9696, "step": 21267500 }, { "epoch": 61.56, "learning_rate": 1.9229978116895148e-05, "loss": 1.9322, "step": 21268000 }, { "epoch": 61.56, "learning_rate": 1.9229255916543164e-05, "loss": 1.9365, "step": 21268500 }, { "epoch": 61.57, "learning_rate": 1.922853226889589e-05, "loss": 1.943, "step": 21269000 }, { "epoch": 61.57, "learning_rate": 1.922780862124861e-05, "loss": 1.9512, "step": 21269500 }, { "epoch": 61.57, "learning_rate": 1.9227084973601334e-05, "loss": 1.9161, "step": 21270000 }, { "epoch": 61.57, "learning_rate": 1.922636132595406e-05, "loss": 1.9381, "step": 21270500 }, { "epoch": 61.57, "learning_rate": 1.922563767830678e-05, "loss": 1.9465, "step": 21271000 }, { "epoch": 61.57, "learning_rate": 1.92249154779548e-05, "loss": 1.9502, "step": 21271500 }, { "epoch": 61.57, "learning_rate": 1.9224191830307523e-05, "loss": 1.9143, "step": 21272000 }, { "epoch": 61.58, "learning_rate": 1.9223468182660245e-05, "loss": 1.9365, "step": 21272500 }, { "epoch": 61.58, "learning_rate": 1.9222744535012967e-05, "loss": 1.9426, "step": 21273000 }, { "epoch": 61.58, "learning_rate": 1.9222022334660986e-05, "loss": 1.9591, "step": 21273500 }, { "epoch": 61.58, "learning_rate": 1.922129868701371e-05, "loss": 1.9285, "step": 21274000 }, { "epoch": 61.58, "learning_rate": 1.9220575039366434e-05, "loss": 1.9257, "step": 21274500 }, { "epoch": 61.58, "learning_rate": 1.9219851391719156e-05, "loss": 1.9502, "step": 21275000 }, { "epoch": 61.58, "learning_rate": 1.9219129191367175e-05, "loss": 1.9314, "step": 21275500 }, { "epoch": 61.59, "learning_rate": 1.9218405543719898e-05, "loss": 1.9289, "step": 21276000 }, { "epoch": 61.59, "learning_rate": 1.921768189607262e-05, "loss": 1.9636, "step": 21276500 }, { "epoch": 61.59, "learning_rate": 1.9216958248425342e-05, "loss": 1.9524, "step": 21277000 }, { "epoch": 61.59, "learning_rate": 1.921623604807336e-05, "loss": 1.9485, "step": 21277500 }, { "epoch": 61.59, "learning_rate": 1.9215512400426083e-05, "loss": 1.9598, "step": 21278000 }, { "epoch": 61.59, "learning_rate": 1.921478875277881e-05, "loss": 1.9332, "step": 21278500 }, { "epoch": 61.59, "learning_rate": 1.921406799972212e-05, "loss": 1.9354, "step": 21279000 }, { "epoch": 61.6, "learning_rate": 1.9213344352074843e-05, "loss": 1.9505, "step": 21279500 }, { "epoch": 61.6, "learning_rate": 1.9212620704427566e-05, "loss": 1.9262, "step": 21280000 }, { "epoch": 61.6, "learning_rate": 1.921189705678029e-05, "loss": 1.9466, "step": 21280500 }, { "epoch": 61.6, "learning_rate": 1.9211173409133013e-05, "loss": 1.9311, "step": 21281000 }, { "epoch": 61.6, "learning_rate": 1.9210449761485736e-05, "loss": 1.9479, "step": 21281500 }, { "epoch": 61.6, "learning_rate": 1.920972611383846e-05, "loss": 1.9483, "step": 21282000 }, { "epoch": 61.6, "learning_rate": 1.9209002466191183e-05, "loss": 1.9469, "step": 21282500 }, { "epoch": 61.61, "learning_rate": 1.9208278818543906e-05, "loss": 1.9308, "step": 21283000 }, { "epoch": 61.61, "learning_rate": 1.9207555170896628e-05, "loss": 1.9397, "step": 21283500 }, { "epoch": 61.61, "learning_rate": 1.9206832970544647e-05, "loss": 1.9533, "step": 21284000 }, { "epoch": 61.61, "learning_rate": 1.920610932289737e-05, "loss": 1.9488, "step": 21284500 }, { "epoch": 61.61, "learning_rate": 1.920538567525009e-05, "loss": 1.9321, "step": 21285000 }, { "epoch": 61.61, "learning_rate": 1.9204662027602817e-05, "loss": 1.9229, "step": 21285500 }, { "epoch": 61.61, "learning_rate": 1.9203938379955543e-05, "loss": 1.9568, "step": 21286000 }, { "epoch": 61.62, "learning_rate": 1.9203216179603558e-05, "loss": 1.9457, "step": 21286500 }, { "epoch": 61.62, "learning_rate": 1.920249253195628e-05, "loss": 1.9352, "step": 21287000 }, { "epoch": 61.62, "learning_rate": 1.9201768884309006e-05, "loss": 1.9746, "step": 21287500 }, { "epoch": 61.62, "learning_rate": 1.9201045236661728e-05, "loss": 1.9389, "step": 21288000 }, { "epoch": 61.62, "learning_rate": 1.920032158901445e-05, "loss": 1.9203, "step": 21288500 }, { "epoch": 61.62, "learning_rate": 1.9199597941367173e-05, "loss": 1.9226, "step": 21289000 }, { "epoch": 61.62, "learning_rate": 1.91988742937199e-05, "loss": 1.915, "step": 21289500 }, { "epoch": 61.63, "learning_rate": 1.919815064607262e-05, "loss": 1.9477, "step": 21290000 }, { "epoch": 61.63, "learning_rate": 1.9197426998425343e-05, "loss": 1.9351, "step": 21290500 }, { "epoch": 61.63, "learning_rate": 1.919670335077807e-05, "loss": 1.9239, "step": 21291000 }, { "epoch": 61.63, "learning_rate": 1.919597970313079e-05, "loss": 1.9258, "step": 21291500 }, { "epoch": 61.63, "learning_rate": 1.9195256055483513e-05, "loss": 1.954, "step": 21292000 }, { "epoch": 61.63, "learning_rate": 1.9194532407836235e-05, "loss": 1.9265, "step": 21292500 }, { "epoch": 61.63, "learning_rate": 1.919380876018896e-05, "loss": 1.933, "step": 21293000 }, { "epoch": 61.64, "learning_rate": 1.9193085112541683e-05, "loss": 1.9422, "step": 21293500 }, { "epoch": 61.64, "learning_rate": 1.9192361464894405e-05, "loss": 1.954, "step": 21294000 }, { "epoch": 61.64, "learning_rate": 1.919163781724713e-05, "loss": 1.947, "step": 21294500 }, { "epoch": 61.64, "learning_rate": 1.9190914169599853e-05, "loss": 1.9338, "step": 21295000 }, { "epoch": 61.64, "learning_rate": 1.9190190521952575e-05, "loss": 1.9444, "step": 21295500 }, { "epoch": 61.64, "learning_rate": 1.9189468321600594e-05, "loss": 1.9564, "step": 21296000 }, { "epoch": 61.64, "learning_rate": 1.918874467395332e-05, "loss": 1.9275, "step": 21296500 }, { "epoch": 61.65, "learning_rate": 1.9188021026306042e-05, "loss": 1.9378, "step": 21297000 }, { "epoch": 61.65, "learning_rate": 1.9187298825954058e-05, "loss": 1.9376, "step": 21297500 }, { "epoch": 61.65, "learning_rate": 1.9186576625602077e-05, "loss": 1.936, "step": 21298000 }, { "epoch": 61.65, "learning_rate": 1.91858529779548e-05, "loss": 1.8962, "step": 21298500 }, { "epoch": 61.65, "learning_rate": 1.918512933030752e-05, "loss": 1.9517, "step": 21299000 }, { "epoch": 61.65, "learning_rate": 1.9184405682660243e-05, "loss": 1.9442, "step": 21299500 }, { "epoch": 61.65, "learning_rate": 1.918368203501297e-05, "loss": 1.9529, "step": 21300000 }, { "epoch": 61.66, "learning_rate": 1.9182958387365695e-05, "loss": 1.9508, "step": 21300500 }, { "epoch": 61.66, "learning_rate": 1.9182234739718417e-05, "loss": 1.9141, "step": 21301000 }, { "epoch": 61.66, "learning_rate": 1.9181512539366432e-05, "loss": 1.9363, "step": 21301500 }, { "epoch": 61.66, "learning_rate": 1.9180788891719158e-05, "loss": 1.9435, "step": 21302000 }, { "epoch": 61.66, "learning_rate": 1.918006524407188e-05, "loss": 1.9444, "step": 21302500 }, { "epoch": 61.66, "learning_rate": 1.9179341596424602e-05, "loss": 1.9358, "step": 21303000 }, { "epoch": 61.66, "learning_rate": 1.9178617948777325e-05, "loss": 1.9672, "step": 21303500 }, { "epoch": 61.67, "learning_rate": 1.9177894301130047e-05, "loss": 1.9261, "step": 21304000 }, { "epoch": 61.67, "learning_rate": 1.917717210077807e-05, "loss": 1.9529, "step": 21304500 }, { "epoch": 61.67, "learning_rate": 1.917644845313079e-05, "loss": 1.9431, "step": 21305000 }, { "epoch": 61.67, "learning_rate": 1.9175724805483514e-05, "loss": 1.9337, "step": 21305500 }, { "epoch": 61.67, "learning_rate": 1.9175001157836236e-05, "loss": 1.9543, "step": 21306000 }, { "epoch": 61.67, "learning_rate": 1.9174277510188958e-05, "loss": 1.9447, "step": 21306500 }, { "epoch": 61.68, "learning_rate": 1.9173553862541684e-05, "loss": 1.9372, "step": 21307000 }, { "epoch": 61.68, "learning_rate": 1.9172833109484996e-05, "loss": 1.9394, "step": 21307500 }, { "epoch": 61.68, "learning_rate": 1.9172109461837718e-05, "loss": 1.9625, "step": 21308000 }, { "epoch": 61.68, "learning_rate": 1.9171385814190444e-05, "loss": 1.9355, "step": 21308500 }, { "epoch": 61.68, "learning_rate": 1.9170662166543166e-05, "loss": 1.9352, "step": 21309000 }, { "epoch": 61.68, "learning_rate": 1.916993851889589e-05, "loss": 1.9435, "step": 21309500 }, { "epoch": 61.68, "learning_rate": 1.916921487124861e-05, "loss": 1.9412, "step": 21310000 }, { "epoch": 61.69, "learning_rate": 1.9168491223601333e-05, "loss": 1.9562, "step": 21310500 }, { "epoch": 61.69, "learning_rate": 1.916776757595406e-05, "loss": 1.9333, "step": 21311000 }, { "epoch": 61.69, "learning_rate": 1.916704392830678e-05, "loss": 1.9658, "step": 21311500 }, { "epoch": 61.69, "learning_rate": 1.9166320280659506e-05, "loss": 1.9353, "step": 21312000 }, { "epoch": 61.69, "learning_rate": 1.916559663301223e-05, "loss": 1.9571, "step": 21312500 }, { "epoch": 61.69, "learning_rate": 1.916487298536495e-05, "loss": 1.9331, "step": 21313000 }, { "epoch": 61.69, "learning_rate": 1.9164149337717673e-05, "loss": 1.9451, "step": 21313500 }, { "epoch": 61.7, "learning_rate": 1.9163425690070395e-05, "loss": 1.9328, "step": 21314000 }, { "epoch": 61.7, "learning_rate": 1.916270204242312e-05, "loss": 1.9414, "step": 21314500 }, { "epoch": 61.7, "learning_rate": 1.9161978394775843e-05, "loss": 1.9341, "step": 21315000 }, { "epoch": 61.7, "learning_rate": 1.916125474712857e-05, "loss": 1.9199, "step": 21315500 }, { "epoch": 61.7, "learning_rate": 1.916053109948129e-05, "loss": 1.9104, "step": 21316000 }, { "epoch": 61.7, "learning_rate": 1.915980889912931e-05, "loss": 1.9561, "step": 21316500 }, { "epoch": 61.7, "learning_rate": 1.9159085251482032e-05, "loss": 1.9317, "step": 21317000 }, { "epoch": 61.71, "learning_rate": 1.9158361603834754e-05, "loss": 1.9239, "step": 21317500 }, { "epoch": 61.71, "learning_rate": 1.9157637956187477e-05, "loss": 1.9491, "step": 21318000 }, { "epoch": 61.71, "learning_rate": 1.9156915755835496e-05, "loss": 1.9714, "step": 21318500 }, { "epoch": 61.71, "learning_rate": 1.915619355548351e-05, "loss": 1.9444, "step": 21319000 }, { "epoch": 61.71, "learning_rate": 1.9155469907836237e-05, "loss": 1.9491, "step": 21319500 }, { "epoch": 61.71, "learning_rate": 1.915474626018896e-05, "loss": 1.9315, "step": 21320000 }, { "epoch": 61.71, "learning_rate": 1.9154022612541685e-05, "loss": 1.9454, "step": 21320500 }, { "epoch": 61.72, "learning_rate": 1.9153298964894407e-05, "loss": 1.9151, "step": 21321000 }, { "epoch": 61.72, "learning_rate": 1.915257531724713e-05, "loss": 1.9321, "step": 21321500 }, { "epoch": 61.72, "learning_rate": 1.915185166959985e-05, "loss": 1.9428, "step": 21322000 }, { "epoch": 61.72, "learning_rate": 1.9151128021952573e-05, "loss": 1.9316, "step": 21322500 }, { "epoch": 61.72, "learning_rate": 1.91504043743053e-05, "loss": 1.9305, "step": 21323000 }, { "epoch": 61.72, "learning_rate": 1.9149682173953318e-05, "loss": 1.9644, "step": 21323500 }, { "epoch": 61.72, "learning_rate": 1.914895852630604e-05, "loss": 1.9593, "step": 21324000 }, { "epoch": 61.73, "learning_rate": 1.9148234878658763e-05, "loss": 1.9319, "step": 21324500 }, { "epoch": 61.73, "learning_rate": 1.9147511231011485e-05, "loss": 1.932, "step": 21325000 }, { "epoch": 61.73, "learning_rate": 1.914678758336421e-05, "loss": 1.9258, "step": 21325500 }, { "epoch": 61.73, "learning_rate": 1.9146065383012226e-05, "loss": 1.9184, "step": 21326000 }, { "epoch": 61.73, "learning_rate": 1.914534173536495e-05, "loss": 1.9538, "step": 21326500 }, { "epoch": 61.73, "learning_rate": 1.9144620982308264e-05, "loss": 1.9419, "step": 21327000 }, { "epoch": 61.73, "learning_rate": 1.9143897334660986e-05, "loss": 1.9593, "step": 21327500 }, { "epoch": 61.74, "learning_rate": 1.914317368701371e-05, "loss": 1.9253, "step": 21328000 }, { "epoch": 61.74, "learning_rate": 1.9142450039366434e-05, "loss": 1.952, "step": 21328500 }, { "epoch": 61.74, "learning_rate": 1.914172783901445e-05, "loss": 1.9474, "step": 21329000 }, { "epoch": 61.74, "learning_rate": 1.914100419136717e-05, "loss": 1.9282, "step": 21329500 }, { "epoch": 61.74, "learning_rate": 1.9140280543719897e-05, "loss": 1.9442, "step": 21330000 }, { "epoch": 61.74, "learning_rate": 1.913955689607262e-05, "loss": 1.9398, "step": 21330500 }, { "epoch": 61.74, "learning_rate": 1.9138833248425345e-05, "loss": 1.9466, "step": 21331000 }, { "epoch": 61.75, "learning_rate": 1.9138109600778067e-05, "loss": 1.9364, "step": 21331500 }, { "epoch": 61.75, "learning_rate": 1.913738595313079e-05, "loss": 1.939, "step": 21332000 }, { "epoch": 61.75, "learning_rate": 1.9136662305483512e-05, "loss": 1.9362, "step": 21332500 }, { "epoch": 61.75, "learning_rate": 1.9135938657836237e-05, "loss": 1.9556, "step": 21333000 }, { "epoch": 61.75, "learning_rate": 1.913521501018896e-05, "loss": 1.9603, "step": 21333500 }, { "epoch": 61.75, "learning_rate": 1.9134492809836975e-05, "loss": 1.9277, "step": 21334000 }, { "epoch": 61.75, "learning_rate": 1.91337691621897e-05, "loss": 1.9453, "step": 21334500 }, { "epoch": 61.76, "learning_rate": 1.9133045514542423e-05, "loss": 1.9667, "step": 21335000 }, { "epoch": 61.76, "learning_rate": 1.913232186689515e-05, "loss": 1.9092, "step": 21335500 }, { "epoch": 61.76, "learning_rate": 1.913159821924787e-05, "loss": 1.9211, "step": 21336000 }, { "epoch": 61.76, "learning_rate": 1.9130874571600593e-05, "loss": 1.9358, "step": 21336500 }, { "epoch": 61.76, "learning_rate": 1.9130150923953315e-05, "loss": 1.9243, "step": 21337000 }, { "epoch": 61.76, "learning_rate": 1.9129427276306038e-05, "loss": 1.9571, "step": 21337500 }, { "epoch": 61.76, "learning_rate": 1.9128703628658763e-05, "loss": 1.9327, "step": 21338000 }, { "epoch": 61.77, "learning_rate": 1.9127981428306782e-05, "loss": 1.9398, "step": 21338500 }, { "epoch": 61.77, "learning_rate": 1.9127257780659504e-05, "loss": 1.9407, "step": 21339000 }, { "epoch": 61.77, "learning_rate": 1.9126534133012227e-05, "loss": 1.9452, "step": 21339500 }, { "epoch": 61.77, "learning_rate": 1.912581048536495e-05, "loss": 1.9071, "step": 21340000 }, { "epoch": 61.77, "learning_rate": 1.9125086837717675e-05, "loss": 1.949, "step": 21340500 }, { "epoch": 61.77, "learning_rate": 1.9124363190070397e-05, "loss": 1.9296, "step": 21341000 }, { "epoch": 61.77, "learning_rate": 1.9123639542423122e-05, "loss": 1.9404, "step": 21341500 }, { "epoch": 61.78, "learning_rate": 1.9122915894775845e-05, "loss": 1.9529, "step": 21342000 }, { "epoch": 61.78, "learning_rate": 1.9122192247128567e-05, "loss": 1.9476, "step": 21342500 }, { "epoch": 61.78, "learning_rate": 1.912146859948129e-05, "loss": 1.9284, "step": 21343000 }, { "epoch": 61.78, "learning_rate": 1.9120744951834015e-05, "loss": 1.9576, "step": 21343500 }, { "epoch": 61.78, "learning_rate": 1.912002275148203e-05, "loss": 1.9144, "step": 21344000 }, { "epoch": 61.78, "learning_rate": 1.9119299103834753e-05, "loss": 1.9478, "step": 21344500 }, { "epoch": 61.79, "learning_rate": 1.9118575456187478e-05, "loss": 1.9377, "step": 21345000 }, { "epoch": 61.79, "learning_rate": 1.9117853255835497e-05, "loss": 1.9335, "step": 21345500 }, { "epoch": 61.79, "learning_rate": 1.9117131055483513e-05, "loss": 1.93, "step": 21346000 }, { "epoch": 61.79, "learning_rate": 1.9116407407836238e-05, "loss": 1.9541, "step": 21346500 }, { "epoch": 61.79, "learning_rate": 1.911568376018896e-05, "loss": 1.9495, "step": 21347000 }, { "epoch": 61.79, "learning_rate": 1.9114960112541683e-05, "loss": 1.9461, "step": 21347500 }, { "epoch": 61.79, "learning_rate": 1.9114236464894405e-05, "loss": 1.925, "step": 21348000 }, { "epoch": 61.8, "learning_rate": 1.9113514264542424e-05, "loss": 1.9388, "step": 21348500 }, { "epoch": 61.8, "learning_rate": 1.9112790616895146e-05, "loss": 1.928, "step": 21349000 }, { "epoch": 61.8, "learning_rate": 1.9112066969247872e-05, "loss": 1.9413, "step": 21349500 }, { "epoch": 61.8, "learning_rate": 1.9111343321600594e-05, "loss": 1.9316, "step": 21350000 }, { "epoch": 61.8, "learning_rate": 1.9110619673953316e-05, "loss": 1.9543, "step": 21350500 }, { "epoch": 61.8, "learning_rate": 1.910989602630604e-05, "loss": 1.9379, "step": 21351000 }, { "epoch": 61.8, "learning_rate": 1.9109172378658764e-05, "loss": 1.9336, "step": 21351500 }, { "epoch": 61.81, "learning_rate": 1.9108448731011486e-05, "loss": 1.9523, "step": 21352000 }, { "epoch": 61.81, "learning_rate": 1.9107726530659502e-05, "loss": 1.9452, "step": 21352500 }, { "epoch": 61.81, "learning_rate": 1.9107004330307524e-05, "loss": 1.9577, "step": 21353000 }, { "epoch": 61.81, "learning_rate": 1.9106280682660246e-05, "loss": 1.9545, "step": 21353500 }, { "epoch": 61.81, "learning_rate": 1.910555703501297e-05, "loss": 1.9634, "step": 21354000 }, { "epoch": 61.81, "learning_rate": 1.910483338736569e-05, "loss": 1.9432, "step": 21354500 }, { "epoch": 61.81, "learning_rate": 1.9104109739718417e-05, "loss": 1.9398, "step": 21355000 }, { "epoch": 61.82, "learning_rate": 1.910338609207114e-05, "loss": 1.9354, "step": 21355500 }, { "epoch": 61.82, "learning_rate": 1.9102663891719154e-05, "loss": 1.9671, "step": 21356000 }, { "epoch": 61.82, "learning_rate": 1.9101940244071877e-05, "loss": 1.9479, "step": 21356500 }, { "epoch": 61.82, "learning_rate": 1.9101216596424602e-05, "loss": 1.9589, "step": 21357000 }, { "epoch": 61.82, "learning_rate": 1.9100492948777328e-05, "loss": 1.9396, "step": 21357500 }, { "epoch": 61.82, "learning_rate": 1.909976930113005e-05, "loss": 1.9348, "step": 21358000 }, { "epoch": 61.82, "learning_rate": 1.9099045653482772e-05, "loss": 1.9457, "step": 21358500 }, { "epoch": 61.83, "learning_rate": 1.9098322005835495e-05, "loss": 1.9456, "step": 21359000 }, { "epoch": 61.83, "learning_rate": 1.9097598358188217e-05, "loss": 1.9414, "step": 21359500 }, { "epoch": 61.83, "learning_rate": 1.9096874710540942e-05, "loss": 1.9457, "step": 21360000 }, { "epoch": 61.83, "learning_rate": 1.9096151062893665e-05, "loss": 1.921, "step": 21360500 }, { "epoch": 61.83, "learning_rate": 1.909542741524639e-05, "loss": 1.9456, "step": 21361000 }, { "epoch": 61.83, "learning_rate": 1.9094703767599112e-05, "loss": 1.96, "step": 21361500 }, { "epoch": 61.83, "learning_rate": 1.9093980119951835e-05, "loss": 1.9748, "step": 21362000 }, { "epoch": 61.84, "learning_rate": 1.9093256472304557e-05, "loss": 1.9745, "step": 21362500 }, { "epoch": 61.84, "learning_rate": 1.909253282465728e-05, "loss": 1.912, "step": 21363000 }, { "epoch": 61.84, "learning_rate": 1.909181207160059e-05, "loss": 1.9261, "step": 21363500 }, { "epoch": 61.84, "learning_rate": 1.9091088423953317e-05, "loss": 1.947, "step": 21364000 }, { "epoch": 61.84, "learning_rate": 1.9090364776306043e-05, "loss": 1.9119, "step": 21364500 }, { "epoch": 61.84, "learning_rate": 1.9089641128658765e-05, "loss": 1.9268, "step": 21365000 }, { "epoch": 61.84, "learning_rate": 1.9088917481011487e-05, "loss": 1.9324, "step": 21365500 }, { "epoch": 61.85, "learning_rate": 1.908819383336421e-05, "loss": 1.9391, "step": 21366000 }, { "epoch": 61.85, "learning_rate": 1.908747163301223e-05, "loss": 1.9246, "step": 21366500 }, { "epoch": 61.85, "learning_rate": 1.908674798536495e-05, "loss": 1.9434, "step": 21367000 }, { "epoch": 61.85, "learning_rate": 1.9086024337717673e-05, "loss": 1.9428, "step": 21367500 }, { "epoch": 61.85, "learning_rate": 1.9085302137365692e-05, "loss": 1.9744, "step": 21368000 }, { "epoch": 61.85, "learning_rate": 1.9084578489718417e-05, "loss": 1.9729, "step": 21368500 }, { "epoch": 61.85, "learning_rate": 1.908385484207114e-05, "loss": 1.9503, "step": 21369000 }, { "epoch": 61.86, "learning_rate": 1.9083131194423862e-05, "loss": 1.9633, "step": 21369500 }, { "epoch": 61.86, "learning_rate": 1.908240899407188e-05, "loss": 1.9341, "step": 21370000 }, { "epoch": 61.86, "learning_rate": 1.9081685346424603e-05, "loss": 1.9517, "step": 21370500 }, { "epoch": 61.86, "learning_rate": 1.9080961698777325e-05, "loss": 1.9553, "step": 21371000 }, { "epoch": 61.86, "learning_rate": 1.9080238051130047e-05, "loss": 1.9374, "step": 21371500 }, { "epoch": 61.86, "learning_rate": 1.9079515850778066e-05, "loss": 1.952, "step": 21372000 }, { "epoch": 61.86, "learning_rate": 1.9078793650426085e-05, "loss": 1.9335, "step": 21372500 }, { "epoch": 61.87, "learning_rate": 1.9078070002778808e-05, "loss": 1.8963, "step": 21373000 }, { "epoch": 61.87, "learning_rate": 1.907734635513153e-05, "loss": 1.9601, "step": 21373500 }, { "epoch": 61.87, "learning_rate": 1.9076622707484255e-05, "loss": 1.9621, "step": 21374000 }, { "epoch": 61.87, "learning_rate": 1.9075899059836978e-05, "loss": 1.9343, "step": 21374500 }, { "epoch": 61.87, "learning_rate": 1.90751754121897e-05, "loss": 1.9334, "step": 21375000 }, { "epoch": 61.87, "learning_rate": 1.9074451764542426e-05, "loss": 1.9473, "step": 21375500 }, { "epoch": 61.87, "learning_rate": 1.9073728116895148e-05, "loss": 1.9427, "step": 21376000 }, { "epoch": 61.88, "learning_rate": 1.907300446924787e-05, "loss": 1.9417, "step": 21376500 }, { "epoch": 61.88, "learning_rate": 1.9072280821600592e-05, "loss": 1.9443, "step": 21377000 }, { "epoch": 61.88, "learning_rate": 1.9071557173953318e-05, "loss": 1.9275, "step": 21377500 }, { "epoch": 61.88, "learning_rate": 1.907083352630604e-05, "loss": 1.9628, "step": 21378000 }, { "epoch": 61.88, "learning_rate": 1.9070109878658762e-05, "loss": 1.9468, "step": 21378500 }, { "epoch": 61.88, "learning_rate": 1.9069386231011488e-05, "loss": 1.9506, "step": 21379000 }, { "epoch": 61.88, "learning_rate": 1.9068664030659507e-05, "loss": 1.9236, "step": 21379500 }, { "epoch": 61.89, "learning_rate": 1.906794038301223e-05, "loss": 1.9404, "step": 21380000 }, { "epoch": 61.89, "learning_rate": 1.906721673536495e-05, "loss": 1.9204, "step": 21380500 }, { "epoch": 61.89, "learning_rate": 1.9066493087717674e-05, "loss": 1.9362, "step": 21381000 }, { "epoch": 61.89, "learning_rate": 1.9065769440070396e-05, "loss": 1.9535, "step": 21381500 }, { "epoch": 61.89, "learning_rate": 1.9065047239718415e-05, "loss": 1.9271, "step": 21382000 }, { "epoch": 61.89, "learning_rate": 1.9064323592071137e-05, "loss": 1.9458, "step": 21382500 }, { "epoch": 61.9, "learning_rate": 1.9063601391719156e-05, "loss": 1.9562, "step": 21383000 }, { "epoch": 61.9, "learning_rate": 1.906287774407188e-05, "loss": 1.948, "step": 21383500 }, { "epoch": 61.9, "learning_rate": 1.9062154096424604e-05, "loss": 1.9385, "step": 21384000 }, { "epoch": 61.9, "learning_rate": 1.906143189607262e-05, "loss": 1.9404, "step": 21384500 }, { "epoch": 61.9, "learning_rate": 1.9060708248425345e-05, "loss": 1.9335, "step": 21385000 }, { "epoch": 61.9, "learning_rate": 1.9059984600778067e-05, "loss": 1.9229, "step": 21385500 }, { "epoch": 61.9, "learning_rate": 1.905926095313079e-05, "loss": 1.9476, "step": 21386000 }, { "epoch": 61.91, "learning_rate": 1.905853730548351e-05, "loss": 1.9345, "step": 21386500 }, { "epoch": 61.91, "learning_rate": 1.9057813657836237e-05, "loss": 1.9166, "step": 21387000 }, { "epoch": 61.91, "learning_rate": 1.905709001018896e-05, "loss": 1.9445, "step": 21387500 }, { "epoch": 61.91, "learning_rate": 1.9056366362541682e-05, "loss": 1.927, "step": 21388000 }, { "epoch": 61.91, "learning_rate": 1.9055642714894407e-05, "loss": 1.9273, "step": 21388500 }, { "epoch": 61.91, "learning_rate": 1.905491906724713e-05, "loss": 1.9267, "step": 21389000 }, { "epoch": 61.91, "learning_rate": 1.9054195419599852e-05, "loss": 1.9413, "step": 21389500 }, { "epoch": 61.92, "learning_rate": 1.9053471771952574e-05, "loss": 1.9333, "step": 21390000 }, { "epoch": 61.92, "learning_rate": 1.90527481243053e-05, "loss": 1.9463, "step": 21390500 }, { "epoch": 61.92, "learning_rate": 1.9052024476658022e-05, "loss": 1.9292, "step": 21391000 }, { "epoch": 61.92, "learning_rate": 1.9051300829010748e-05, "loss": 1.9531, "step": 21391500 }, { "epoch": 61.92, "learning_rate": 1.905057718136347e-05, "loss": 1.9554, "step": 21392000 }, { "epoch": 61.92, "learning_rate": 1.9049853533716192e-05, "loss": 1.9597, "step": 21392500 }, { "epoch": 61.92, "learning_rate": 1.9049129886068914e-05, "loss": 1.929, "step": 21393000 }, { "epoch": 61.93, "learning_rate": 1.9048406238421636e-05, "loss": 1.9513, "step": 21393500 }, { "epoch": 61.93, "learning_rate": 1.9047682590774362e-05, "loss": 1.9594, "step": 21394000 }, { "epoch": 61.93, "learning_rate": 1.9046958943127084e-05, "loss": 1.9545, "step": 21394500 }, { "epoch": 61.93, "learning_rate": 1.9046236742775103e-05, "loss": 1.9752, "step": 21395000 }, { "epoch": 61.93, "learning_rate": 1.9045513095127826e-05, "loss": 1.9456, "step": 21395500 }, { "epoch": 61.93, "learning_rate": 1.9044792342071138e-05, "loss": 1.9384, "step": 21396000 }, { "epoch": 61.93, "learning_rate": 1.904406869442386e-05, "loss": 1.9163, "step": 21396500 }, { "epoch": 61.94, "learning_rate": 1.9043345046776582e-05, "loss": 1.9424, "step": 21397000 }, { "epoch": 61.94, "learning_rate": 1.9042621399129308e-05, "loss": 1.9511, "step": 21397500 }, { "epoch": 61.94, "learning_rate": 1.9041897751482033e-05, "loss": 1.936, "step": 21398000 }, { "epoch": 61.94, "learning_rate": 1.9041174103834756e-05, "loss": 1.9498, "step": 21398500 }, { "epoch": 61.94, "learning_rate": 1.9040450456187478e-05, "loss": 1.9506, "step": 21399000 }, { "epoch": 61.94, "learning_rate": 1.9039728255835497e-05, "loss": 1.9375, "step": 21399500 }, { "epoch": 61.94, "learning_rate": 1.903900460818822e-05, "loss": 1.9361, "step": 21400000 }, { "epoch": 61.95, "learning_rate": 1.903828096054094e-05, "loss": 1.9531, "step": 21400500 }, { "epoch": 61.95, "learning_rate": 1.9037557312893664e-05, "loss": 1.9323, "step": 21401000 }, { "epoch": 61.95, "learning_rate": 1.903683366524639e-05, "loss": 1.9222, "step": 21401500 }, { "epoch": 61.95, "learning_rate": 1.903611001759911e-05, "loss": 1.9513, "step": 21402000 }, { "epoch": 61.95, "learning_rate": 1.9035386369951834e-05, "loss": 1.938, "step": 21402500 }, { "epoch": 61.95, "learning_rate": 1.903466272230456e-05, "loss": 1.9797, "step": 21403000 }, { "epoch": 61.95, "learning_rate": 1.903393907465728e-05, "loss": 1.9398, "step": 21403500 }, { "epoch": 61.96, "learning_rate": 1.9033216874305297e-05, "loss": 1.9396, "step": 21404000 }, { "epoch": 61.96, "learning_rate": 1.9032493226658023e-05, "loss": 1.924, "step": 21404500 }, { "epoch": 61.96, "learning_rate": 1.903176957901075e-05, "loss": 1.9302, "step": 21405000 }, { "epoch": 61.96, "learning_rate": 1.9031047378658764e-05, "loss": 1.9551, "step": 21405500 }, { "epoch": 61.96, "learning_rate": 1.9030323731011486e-05, "loss": 1.9511, "step": 21406000 }, { "epoch": 61.96, "learning_rate": 1.9029600083364212e-05, "loss": 1.9694, "step": 21406500 }, { "epoch": 61.96, "learning_rate": 1.9028876435716934e-05, "loss": 1.9365, "step": 21407000 }, { "epoch": 61.97, "learning_rate": 1.9028152788069656e-05, "loss": 1.9355, "step": 21407500 }, { "epoch": 61.97, "learning_rate": 1.902742914042238e-05, "loss": 1.9484, "step": 21408000 }, { "epoch": 61.97, "learning_rate": 1.90267054927751e-05, "loss": 1.9434, "step": 21408500 }, { "epoch": 61.97, "learning_rate": 1.9025981845127826e-05, "loss": 1.9437, "step": 21409000 }, { "epoch": 61.97, "learning_rate": 1.902525819748055e-05, "loss": 1.9265, "step": 21409500 }, { "epoch": 61.97, "learning_rate": 1.9024534549833274e-05, "loss": 1.9661, "step": 21410000 }, { "epoch": 61.97, "learning_rate": 1.902381234948129e-05, "loss": 1.937, "step": 21410500 }, { "epoch": 61.98, "learning_rate": 1.9023091596424602e-05, "loss": 1.9543, "step": 21411000 }, { "epoch": 61.98, "learning_rate": 1.9022367948777324e-05, "loss": 1.9298, "step": 21411500 }, { "epoch": 61.98, "learning_rate": 1.902164430113005e-05, "loss": 1.9328, "step": 21412000 }, { "epoch": 61.98, "learning_rate": 1.9020920653482772e-05, "loss": 1.9565, "step": 21412500 }, { "epoch": 61.98, "learning_rate": 1.9020197005835498e-05, "loss": 1.9117, "step": 21413000 }, { "epoch": 61.98, "learning_rate": 1.901947335818822e-05, "loss": 1.9259, "step": 21413500 }, { "epoch": 61.98, "learning_rate": 1.9018749710540942e-05, "loss": 1.9353, "step": 21414000 }, { "epoch": 61.99, "learning_rate": 1.9018026062893664e-05, "loss": 1.9421, "step": 21414500 }, { "epoch": 61.99, "learning_rate": 1.9017302415246387e-05, "loss": 1.9434, "step": 21415000 }, { "epoch": 61.99, "learning_rate": 1.9016578767599112e-05, "loss": 1.9584, "step": 21415500 }, { "epoch": 61.99, "learning_rate": 1.9015855119951834e-05, "loss": 1.952, "step": 21416000 }, { "epoch": 61.99, "learning_rate": 1.901513147230456e-05, "loss": 1.9512, "step": 21416500 }, { "epoch": 61.99, "learning_rate": 1.9014407824657282e-05, "loss": 1.938, "step": 21417000 }, { "epoch": 61.99, "learning_rate": 1.9013684177010005e-05, "loss": 1.9373, "step": 21417500 }, { "epoch": 62.0, "learning_rate": 1.9012960529362727e-05, "loss": 1.947, "step": 21418000 }, { "epoch": 62.0, "learning_rate": 1.9012238329010746e-05, "loss": 1.9512, "step": 21418500 }, { "epoch": 62.0, "learning_rate": 1.9011514681363468e-05, "loss": 1.9685, "step": 21419000 }, { "epoch": 62.0, "eval_accuracy": 0.6775106560681988, "eval_accuracy_mlm": 0.6447252766965038, "eval_accuracy_nsp": 0.8534164707392079, "eval_loss": 2.2011451721191406, "eval_runtime": 332.0148, "eval_samples_per_second": 1314.357, "eval_steps_per_second": 54.766, "step": 21419264 }, { "epoch": 62.0, "learning_rate": 1.901079103371619e-05, "loss": 1.9203, "step": 21419500 }, { "epoch": 62.0, "learning_rate": 1.9010067386068916e-05, "loss": 1.9133, "step": 21420000 }, { "epoch": 62.0, "learning_rate": 1.9009345185716935e-05, "loss": 1.942, "step": 21420500 }, { "epoch": 62.01, "learning_rate": 1.900862298536495e-05, "loss": 1.8864, "step": 21421000 }, { "epoch": 62.01, "learning_rate": 1.9007899337717676e-05, "loss": 1.912, "step": 21421500 }, { "epoch": 62.01, "learning_rate": 1.9007175690070398e-05, "loss": 1.9083, "step": 21422000 }, { "epoch": 62.01, "learning_rate": 1.900645204242312e-05, "loss": 1.9165, "step": 21422500 }, { "epoch": 62.01, "learning_rate": 1.9005728394775843e-05, "loss": 1.9122, "step": 21423000 }, { "epoch": 62.01, "learning_rate": 1.9005004747128565e-05, "loss": 1.9323, "step": 21423500 }, { "epoch": 62.01, "learning_rate": 1.900428109948129e-05, "loss": 1.9619, "step": 21424000 }, { "epoch": 62.02, "learning_rate": 1.9003557451834013e-05, "loss": 1.8903, "step": 21424500 }, { "epoch": 62.02, "learning_rate": 1.900283380418674e-05, "loss": 1.9047, "step": 21425000 }, { "epoch": 62.02, "learning_rate": 1.900211015653946e-05, "loss": 1.9253, "step": 21425500 }, { "epoch": 62.02, "learning_rate": 1.9001386508892183e-05, "loss": 1.9245, "step": 21426000 }, { "epoch": 62.02, "learning_rate": 1.9000662861244905e-05, "loss": 1.9341, "step": 21426500 }, { "epoch": 62.02, "learning_rate": 1.8999940660892924e-05, "loss": 1.929, "step": 21427000 }, { "epoch": 62.02, "learning_rate": 1.899921846054094e-05, "loss": 1.9225, "step": 21427500 }, { "epoch": 62.03, "learning_rate": 1.8998494812893665e-05, "loss": 1.9311, "step": 21428000 }, { "epoch": 62.03, "learning_rate": 1.8997771165246387e-05, "loss": 1.9565, "step": 21428500 }, { "epoch": 62.03, "learning_rate": 1.8997047517599113e-05, "loss": 1.9514, "step": 21429000 }, { "epoch": 62.03, "learning_rate": 1.8996323869951835e-05, "loss": 1.9304, "step": 21429500 }, { "epoch": 62.03, "learning_rate": 1.8995600222304558e-05, "loss": 1.9287, "step": 21430000 }, { "epoch": 62.03, "learning_rate": 1.899487657465728e-05, "loss": 1.9361, "step": 21430500 }, { "epoch": 62.03, "learning_rate": 1.8994152927010002e-05, "loss": 1.9234, "step": 21431000 }, { "epoch": 62.04, "learning_rate": 1.8993429279362728e-05, "loss": 1.9172, "step": 21431500 }, { "epoch": 62.04, "learning_rate": 1.899270852630604e-05, "loss": 1.9192, "step": 21432000 }, { "epoch": 62.04, "learning_rate": 1.8991984878658765e-05, "loss": 1.9206, "step": 21432500 }, { "epoch": 62.04, "learning_rate": 1.8991261231011488e-05, "loss": 1.8897, "step": 21433000 }, { "epoch": 62.04, "learning_rate": 1.899053758336421e-05, "loss": 1.9083, "step": 21433500 }, { "epoch": 62.04, "learning_rate": 1.8989813935716932e-05, "loss": 1.9559, "step": 21434000 }, { "epoch": 62.04, "learning_rate": 1.898909173536495e-05, "loss": 1.9231, "step": 21434500 }, { "epoch": 62.05, "learning_rate": 1.8988368087717673e-05, "loss": 1.9352, "step": 21435000 }, { "epoch": 62.05, "learning_rate": 1.8987645887365692e-05, "loss": 1.9159, "step": 21435500 }, { "epoch": 62.05, "learning_rate": 1.8986922239718415e-05, "loss": 1.9258, "step": 21436000 }, { "epoch": 62.05, "learning_rate": 1.898619859207114e-05, "loss": 1.918, "step": 21436500 }, { "epoch": 62.05, "learning_rate": 1.8985474944423862e-05, "loss": 1.937, "step": 21437000 }, { "epoch": 62.05, "learning_rate": 1.8984751296776585e-05, "loss": 1.9367, "step": 21437500 }, { "epoch": 62.05, "learning_rate": 1.8984027649129307e-05, "loss": 1.9034, "step": 21438000 }, { "epoch": 62.06, "learning_rate": 1.898330400148203e-05, "loss": 1.9155, "step": 21438500 }, { "epoch": 62.06, "learning_rate": 1.8982580353834755e-05, "loss": 1.9613, "step": 21439000 }, { "epoch": 62.06, "learning_rate": 1.8981856706187477e-05, "loss": 1.932, "step": 21439500 }, { "epoch": 62.06, "learning_rate": 1.8981133058540203e-05, "loss": 1.9265, "step": 21440000 }, { "epoch": 62.06, "learning_rate": 1.8980409410892925e-05, "loss": 1.9204, "step": 21440500 }, { "epoch": 62.06, "learning_rate": 1.8979685763245647e-05, "loss": 1.9279, "step": 21441000 }, { "epoch": 62.06, "learning_rate": 1.8978963562893666e-05, "loss": 1.938, "step": 21441500 }, { "epoch": 62.07, "learning_rate": 1.8978239915246388e-05, "loss": 1.9413, "step": 21442000 }, { "epoch": 62.07, "learning_rate": 1.8977517714894404e-05, "loss": 1.9379, "step": 21442500 }, { "epoch": 62.07, "learning_rate": 1.897679406724713e-05, "loss": 1.9322, "step": 21443000 }, { "epoch": 62.07, "learning_rate": 1.8976070419599855e-05, "loss": 1.9092, "step": 21443500 }, { "epoch": 62.07, "learning_rate": 1.897534821924787e-05, "loss": 1.9108, "step": 21444000 }, { "epoch": 62.07, "learning_rate": 1.8974624571600593e-05, "loss": 1.9425, "step": 21444500 }, { "epoch": 62.07, "learning_rate": 1.8973900923953315e-05, "loss": 1.9241, "step": 21445000 }, { "epoch": 62.08, "learning_rate": 1.897317727630604e-05, "loss": 1.9461, "step": 21445500 }, { "epoch": 62.08, "learning_rate": 1.8972453628658763e-05, "loss": 1.93, "step": 21446000 }, { "epoch": 62.08, "learning_rate": 1.897172998101149e-05, "loss": 1.9326, "step": 21446500 }, { "epoch": 62.08, "learning_rate": 1.897100633336421e-05, "loss": 1.9283, "step": 21447000 }, { "epoch": 62.08, "learning_rate": 1.8970282685716933e-05, "loss": 1.9262, "step": 21447500 }, { "epoch": 62.08, "learning_rate": 1.8969559038069655e-05, "loss": 1.927, "step": 21448000 }, { "epoch": 62.08, "learning_rate": 1.8968836837717674e-05, "loss": 1.9298, "step": 21448500 }, { "epoch": 62.09, "learning_rate": 1.8968116084660986e-05, "loss": 1.9432, "step": 21449000 }, { "epoch": 62.09, "learning_rate": 1.896739243701371e-05, "loss": 1.9278, "step": 21449500 }, { "epoch": 62.09, "learning_rate": 1.896666878936643e-05, "loss": 1.9087, "step": 21450000 }, { "epoch": 62.09, "learning_rate": 1.8965945141719157e-05, "loss": 1.9053, "step": 21450500 }, { "epoch": 62.09, "learning_rate": 1.896522149407188e-05, "loss": 1.9478, "step": 21451000 }, { "epoch": 62.09, "learning_rate": 1.8964497846424604e-05, "loss": 1.9217, "step": 21451500 }, { "epoch": 62.09, "learning_rate": 1.8963774198777327e-05, "loss": 1.9177, "step": 21452000 }, { "epoch": 62.1, "learning_rate": 1.896305055113005e-05, "loss": 1.9194, "step": 21452500 }, { "epoch": 62.1, "learning_rate": 1.896232690348277e-05, "loss": 1.8985, "step": 21453000 }, { "epoch": 62.1, "learning_rate": 1.8961603255835493e-05, "loss": 1.9335, "step": 21453500 }, { "epoch": 62.1, "learning_rate": 1.896087960818822e-05, "loss": 1.9335, "step": 21454000 }, { "epoch": 62.1, "learning_rate": 1.8960155960540945e-05, "loss": 1.9119, "step": 21454500 }, { "epoch": 62.1, "learning_rate": 1.8959432312893667e-05, "loss": 1.9072, "step": 21455000 }, { "epoch": 62.1, "learning_rate": 1.895870866524639e-05, "loss": 1.9318, "step": 21455500 }, { "epoch": 62.11, "learning_rate": 1.895798501759911e-05, "loss": 1.9214, "step": 21456000 }, { "epoch": 62.11, "learning_rate": 1.895726281724713e-05, "loss": 1.9334, "step": 21456500 }, { "epoch": 62.11, "learning_rate": 1.8956539169599852e-05, "loss": 1.9245, "step": 21457000 }, { "epoch": 62.11, "learning_rate": 1.8955815521952575e-05, "loss": 1.9067, "step": 21457500 }, { "epoch": 62.11, "learning_rate": 1.89550918743053e-05, "loss": 1.9254, "step": 21458000 }, { "epoch": 62.11, "learning_rate": 1.895436967395332e-05, "loss": 1.9126, "step": 21458500 }, { "epoch": 62.12, "learning_rate": 1.895364602630604e-05, "loss": 1.9385, "step": 21459000 }, { "epoch": 62.12, "learning_rate": 1.8952922378658764e-05, "loss": 1.9238, "step": 21459500 }, { "epoch": 62.12, "learning_rate": 1.8952198731011486e-05, "loss": 1.9137, "step": 21460000 }, { "epoch": 62.12, "learning_rate": 1.8951476530659505e-05, "loss": 1.9592, "step": 21460500 }, { "epoch": 62.12, "learning_rate": 1.8950752883012227e-05, "loss": 1.9317, "step": 21461000 }, { "epoch": 62.12, "learning_rate": 1.8950029235364953e-05, "loss": 1.9246, "step": 21461500 }, { "epoch": 62.12, "learning_rate": 1.8949305587717675e-05, "loss": 1.9261, "step": 21462000 }, { "epoch": 62.13, "learning_rate": 1.8948581940070397e-05, "loss": 1.9133, "step": 21462500 }, { "epoch": 62.13, "learning_rate": 1.894785829242312e-05, "loss": 1.9394, "step": 21463000 }, { "epoch": 62.13, "learning_rate": 1.894713609207114e-05, "loss": 1.9154, "step": 21463500 }, { "epoch": 62.13, "learning_rate": 1.894641244442386e-05, "loss": 1.9286, "step": 21464000 }, { "epoch": 62.13, "learning_rate": 1.8945688796776583e-05, "loss": 1.9389, "step": 21464500 }, { "epoch": 62.13, "learning_rate": 1.8944965149129305e-05, "loss": 1.8968, "step": 21465000 }, { "epoch": 62.13, "learning_rate": 1.894424150148203e-05, "loss": 1.9094, "step": 21465500 }, { "epoch": 62.14, "learning_rate": 1.8943517853834756e-05, "loss": 1.9425, "step": 21466000 }, { "epoch": 62.14, "learning_rate": 1.894279420618748e-05, "loss": 1.9217, "step": 21466500 }, { "epoch": 62.14, "learning_rate": 1.89420705585402e-05, "loss": 1.948, "step": 21467000 }, { "epoch": 62.14, "learning_rate": 1.8941346910892923e-05, "loss": 1.948, "step": 21467500 }, { "epoch": 62.14, "learning_rate": 1.8940623263245645e-05, "loss": 1.9306, "step": 21468000 }, { "epoch": 62.14, "learning_rate": 1.893989961559837e-05, "loss": 1.895, "step": 21468500 }, { "epoch": 62.14, "learning_rate": 1.8939175967951096e-05, "loss": 1.9108, "step": 21469000 }, { "epoch": 62.15, "learning_rate": 1.8938453767599112e-05, "loss": 1.9166, "step": 21469500 }, { "epoch": 62.15, "learning_rate": 1.8937730119951834e-05, "loss": 1.9469, "step": 21470000 }, { "epoch": 62.15, "learning_rate": 1.8937006472304557e-05, "loss": 1.9179, "step": 21470500 }, { "epoch": 62.15, "learning_rate": 1.8936282824657282e-05, "loss": 1.9203, "step": 21471000 }, { "epoch": 62.15, "learning_rate": 1.8935560624305298e-05, "loss": 1.954, "step": 21471500 }, { "epoch": 62.15, "learning_rate": 1.893483697665802e-05, "loss": 1.9391, "step": 21472000 }, { "epoch": 62.15, "learning_rate": 1.8934113329010746e-05, "loss": 1.9378, "step": 21472500 }, { "epoch": 62.16, "learning_rate": 1.8933392575954058e-05, "loss": 1.9523, "step": 21473000 }, { "epoch": 62.16, "learning_rate": 1.8932668928306783e-05, "loss": 1.9352, "step": 21473500 }, { "epoch": 62.16, "learning_rate": 1.8931945280659506e-05, "loss": 1.9356, "step": 21474000 }, { "epoch": 62.16, "learning_rate": 1.8931221633012228e-05, "loss": 1.914, "step": 21474500 }, { "epoch": 62.16, "learning_rate": 1.893049798536495e-05, "loss": 1.9595, "step": 21475000 }, { "epoch": 62.16, "learning_rate": 1.8929774337717672e-05, "loss": 1.9403, "step": 21475500 }, { "epoch": 62.16, "learning_rate": 1.8929050690070395e-05, "loss": 1.9242, "step": 21476000 }, { "epoch": 62.17, "learning_rate": 1.892832704242312e-05, "loss": 1.9396, "step": 21476500 }, { "epoch": 62.17, "learning_rate": 1.8927603394775846e-05, "loss": 1.9429, "step": 21477000 }, { "epoch": 62.17, "learning_rate": 1.8926879747128568e-05, "loss": 1.9417, "step": 21477500 }, { "epoch": 62.17, "learning_rate": 1.892615609948129e-05, "loss": 1.9413, "step": 21478000 }, { "epoch": 62.17, "learning_rate": 1.8925432451834013e-05, "loss": 1.9263, "step": 21478500 }, { "epoch": 62.17, "learning_rate": 1.892471025148203e-05, "loss": 1.9419, "step": 21479000 }, { "epoch": 62.17, "learning_rate": 1.8923986603834754e-05, "loss": 1.9342, "step": 21479500 }, { "epoch": 62.18, "learning_rate": 1.8923264403482773e-05, "loss": 1.9223, "step": 21480000 }, { "epoch": 62.18, "learning_rate": 1.8922540755835498e-05, "loss": 1.9348, "step": 21480500 }, { "epoch": 62.18, "learning_rate": 1.8921818555483514e-05, "loss": 1.9333, "step": 21481000 }, { "epoch": 62.18, "learning_rate": 1.8921094907836236e-05, "loss": 1.9389, "step": 21481500 }, { "epoch": 62.18, "learning_rate": 1.892037126018896e-05, "loss": 1.9427, "step": 21482000 }, { "epoch": 62.18, "learning_rate": 1.8919647612541684e-05, "loss": 1.922, "step": 21482500 }, { "epoch": 62.18, "learning_rate": 1.8918923964894406e-05, "loss": 1.9123, "step": 21483000 }, { "epoch": 62.19, "learning_rate": 1.891820031724713e-05, "loss": 1.9135, "step": 21483500 }, { "epoch": 62.19, "learning_rate": 1.8917476669599854e-05, "loss": 1.9185, "step": 21484000 }, { "epoch": 62.19, "learning_rate": 1.8916753021952576e-05, "loss": 1.8991, "step": 21484500 }, { "epoch": 62.19, "learning_rate": 1.89160293743053e-05, "loss": 1.925, "step": 21485000 }, { "epoch": 62.19, "learning_rate": 1.8915305726658024e-05, "loss": 1.9409, "step": 21485500 }, { "epoch": 62.19, "learning_rate": 1.8914582079010746e-05, "loss": 1.9272, "step": 21486000 }, { "epoch": 62.19, "learning_rate": 1.8913859878658762e-05, "loss": 1.9105, "step": 21486500 }, { "epoch": 62.2, "learning_rate": 1.8913136231011484e-05, "loss": 1.9182, "step": 21487000 }, { "epoch": 62.2, "learning_rate": 1.891241258336421e-05, "loss": 1.9314, "step": 21487500 }, { "epoch": 62.2, "learning_rate": 1.8911688935716935e-05, "loss": 1.9224, "step": 21488000 }, { "epoch": 62.2, "learning_rate": 1.891096673536495e-05, "loss": 1.9392, "step": 21488500 }, { "epoch": 62.2, "learning_rate": 1.8910243087717673e-05, "loss": 1.9386, "step": 21489000 }, { "epoch": 62.2, "learning_rate": 1.89095194400704e-05, "loss": 1.952, "step": 21489500 }, { "epoch": 62.2, "learning_rate": 1.890879579242312e-05, "loss": 1.9407, "step": 21490000 }, { "epoch": 62.21, "learning_rate": 1.8908073592071137e-05, "loss": 1.9149, "step": 21490500 }, { "epoch": 62.21, "learning_rate": 1.8907349944423862e-05, "loss": 1.8864, "step": 21491000 }, { "epoch": 62.21, "learning_rate": 1.8906626296776588e-05, "loss": 1.9314, "step": 21491500 }, { "epoch": 62.21, "learning_rate": 1.890590264912931e-05, "loss": 1.9298, "step": 21492000 }, { "epoch": 62.21, "learning_rate": 1.8905179001482032e-05, "loss": 1.937, "step": 21492500 }, { "epoch": 62.21, "learning_rate": 1.8904455353834755e-05, "loss": 1.9314, "step": 21493000 }, { "epoch": 62.21, "learning_rate": 1.8903731706187477e-05, "loss": 1.9348, "step": 21493500 }, { "epoch": 62.22, "learning_rate": 1.89030080585402e-05, "loss": 1.9665, "step": 21494000 }, { "epoch": 62.22, "learning_rate": 1.8902284410892925e-05, "loss": 1.9301, "step": 21494500 }, { "epoch": 62.22, "learning_rate": 1.890156076324565e-05, "loss": 1.9532, "step": 21495000 }, { "epoch": 62.22, "learning_rate": 1.8900838562893666e-05, "loss": 1.9369, "step": 21495500 }, { "epoch": 62.22, "learning_rate": 1.8900114915246388e-05, "loss": 1.9349, "step": 21496000 }, { "epoch": 62.22, "learning_rate": 1.8899392714894407e-05, "loss": 1.9193, "step": 21496500 }, { "epoch": 62.23, "learning_rate": 1.889866906724713e-05, "loss": 1.9218, "step": 21497000 }, { "epoch": 62.23, "learning_rate": 1.889794541959985e-05, "loss": 1.9555, "step": 21497500 }, { "epoch": 62.23, "learning_rate": 1.8897221771952574e-05, "loss": 1.9416, "step": 21498000 }, { "epoch": 62.23, "learning_rate": 1.88964981243053e-05, "loss": 1.9407, "step": 21498500 }, { "epoch": 62.23, "learning_rate": 1.8895775923953318e-05, "loss": 1.9205, "step": 21499000 }, { "epoch": 62.23, "learning_rate": 1.8895053723601337e-05, "loss": 1.9279, "step": 21499500 }, { "epoch": 62.23, "learning_rate": 1.889433007595406e-05, "loss": 1.9117, "step": 21500000 }, { "epoch": 62.24, "learning_rate": 1.889360642830678e-05, "loss": 1.9298, "step": 21500500 }, { "epoch": 62.24, "learning_rate": 1.8892882780659504e-05, "loss": 1.9385, "step": 21501000 }, { "epoch": 62.24, "learning_rate": 1.8892159133012226e-05, "loss": 1.9385, "step": 21501500 }, { "epoch": 62.24, "learning_rate": 1.889143548536495e-05, "loss": 1.932, "step": 21502000 }, { "epoch": 62.24, "learning_rate": 1.8890711837717674e-05, "loss": 1.9209, "step": 21502500 }, { "epoch": 62.24, "learning_rate": 1.88899881900704e-05, "loss": 1.9334, "step": 21503000 }, { "epoch": 62.24, "learning_rate": 1.8889264542423122e-05, "loss": 1.9216, "step": 21503500 }, { "epoch": 62.25, "learning_rate": 1.8888540894775844e-05, "loss": 1.9412, "step": 21504000 }, { "epoch": 62.25, "learning_rate": 1.8887817247128566e-05, "loss": 1.9275, "step": 21504500 }, { "epoch": 62.25, "learning_rate": 1.888709359948129e-05, "loss": 1.923, "step": 21505000 }, { "epoch": 62.25, "learning_rate": 1.8886369951834014e-05, "loss": 1.9136, "step": 21505500 }, { "epoch": 62.25, "learning_rate": 1.8885646304186736e-05, "loss": 1.9436, "step": 21506000 }, { "epoch": 62.25, "learning_rate": 1.8884924103834755e-05, "loss": 1.922, "step": 21506500 }, { "epoch": 62.25, "learning_rate": 1.8884200456187478e-05, "loss": 1.9449, "step": 21507000 }, { "epoch": 62.26, "learning_rate": 1.88834768085402e-05, "loss": 1.8895, "step": 21507500 }, { "epoch": 62.26, "learning_rate": 1.8882753160892925e-05, "loss": 1.9202, "step": 21508000 }, { "epoch": 62.26, "learning_rate": 1.888203096054094e-05, "loss": 1.9338, "step": 21508500 }, { "epoch": 62.26, "learning_rate": 1.8881307312893663e-05, "loss": 1.9245, "step": 21509000 }, { "epoch": 62.26, "learning_rate": 1.888058366524639e-05, "loss": 1.9682, "step": 21509500 }, { "epoch": 62.26, "learning_rate": 1.8879860017599114e-05, "loss": 1.9376, "step": 21510000 }, { "epoch": 62.26, "learning_rate": 1.887913781724713e-05, "loss": 1.9288, "step": 21510500 }, { "epoch": 62.27, "learning_rate": 1.8878414169599852e-05, "loss": 1.8932, "step": 21511000 }, { "epoch": 62.27, "learning_rate": 1.8877690521952578e-05, "loss": 1.9425, "step": 21511500 }, { "epoch": 62.27, "learning_rate": 1.88769668743053e-05, "loss": 1.926, "step": 21512000 }, { "epoch": 62.27, "learning_rate": 1.8876243226658022e-05, "loss": 1.9193, "step": 21512500 }, { "epoch": 62.27, "learning_rate": 1.8875519579010745e-05, "loss": 1.9379, "step": 21513000 }, { "epoch": 62.27, "learning_rate": 1.8874795931363467e-05, "loss": 1.9174, "step": 21513500 }, { "epoch": 62.27, "learning_rate": 1.887407373101149e-05, "loss": 1.9313, "step": 21514000 }, { "epoch": 62.28, "learning_rate": 1.887335008336421e-05, "loss": 1.9366, "step": 21514500 }, { "epoch": 62.28, "learning_rate": 1.8872626435716934e-05, "loss": 1.94, "step": 21515000 }, { "epoch": 62.28, "learning_rate": 1.8871902788069656e-05, "loss": 1.9409, "step": 21515500 }, { "epoch": 62.28, "learning_rate": 1.8871179140422378e-05, "loss": 1.9276, "step": 21516000 }, { "epoch": 62.28, "learning_rate": 1.8870455492775104e-05, "loss": 1.9202, "step": 21516500 }, { "epoch": 62.28, "learning_rate": 1.8869731845127826e-05, "loss": 1.9103, "step": 21517000 }, { "epoch": 62.28, "learning_rate": 1.886900819748055e-05, "loss": 1.9287, "step": 21517500 }, { "epoch": 62.29, "learning_rate": 1.8868284549833274e-05, "loss": 1.9027, "step": 21518000 }, { "epoch": 62.29, "learning_rate": 1.8867560902185996e-05, "loss": 1.9368, "step": 21518500 }, { "epoch": 62.29, "learning_rate": 1.8866837254538718e-05, "loss": 1.9091, "step": 21519000 }, { "epoch": 62.29, "learning_rate": 1.8866115054186737e-05, "loss": 1.9149, "step": 21519500 }, { "epoch": 62.29, "learning_rate": 1.886539140653946e-05, "loss": 1.9329, "step": 21520000 }, { "epoch": 62.29, "learning_rate": 1.886466775889218e-05, "loss": 1.933, "step": 21520500 }, { "epoch": 62.29, "learning_rate": 1.8863944111244907e-05, "loss": 1.898, "step": 21521000 }, { "epoch": 62.3, "learning_rate": 1.8863221910892926e-05, "loss": 1.9366, "step": 21521500 }, { "epoch": 62.3, "learning_rate": 1.886249826324565e-05, "loss": 1.9328, "step": 21522000 }, { "epoch": 62.3, "learning_rate": 1.886177461559837e-05, "loss": 1.9386, "step": 21522500 }, { "epoch": 62.3, "learning_rate": 1.8861050967951093e-05, "loss": 1.9281, "step": 21523000 }, { "epoch": 62.3, "learning_rate": 1.8860327320303815e-05, "loss": 1.9195, "step": 21523500 }, { "epoch": 62.3, "learning_rate": 1.885960367265654e-05, "loss": 1.9231, "step": 21524000 }, { "epoch": 62.3, "learning_rate": 1.8858880025009263e-05, "loss": 1.9355, "step": 21524500 }, { "epoch": 62.31, "learning_rate": 1.8858159271952575e-05, "loss": 1.9423, "step": 21525000 }, { "epoch": 62.31, "learning_rate": 1.88574356243053e-05, "loss": 1.9485, "step": 21525500 }, { "epoch": 62.31, "learning_rate": 1.8856711976658023e-05, "loss": 1.9169, "step": 21526000 }, { "epoch": 62.31, "learning_rate": 1.8855988329010745e-05, "loss": 1.9117, "step": 21526500 }, { "epoch": 62.31, "learning_rate": 1.8855264681363468e-05, "loss": 1.93, "step": 21527000 }, { "epoch": 62.31, "learning_rate": 1.8854541033716193e-05, "loss": 1.9341, "step": 21527500 }, { "epoch": 62.31, "learning_rate": 1.885381883336421e-05, "loss": 1.9537, "step": 21528000 }, { "epoch": 62.32, "learning_rate": 1.885309518571693e-05, "loss": 1.9371, "step": 21528500 }, { "epoch": 62.32, "learning_rate": 1.8852371538069657e-05, "loss": 1.9482, "step": 21529000 }, { "epoch": 62.32, "learning_rate": 1.885164789042238e-05, "loss": 1.9143, "step": 21529500 }, { "epoch": 62.32, "learning_rate": 1.8850924242775104e-05, "loss": 1.9475, "step": 21530000 }, { "epoch": 62.32, "learning_rate": 1.8850200595127827e-05, "loss": 1.9333, "step": 21530500 }, { "epoch": 62.32, "learning_rate": 1.884947694748055e-05, "loss": 1.9468, "step": 21531000 }, { "epoch": 62.32, "learning_rate": 1.884875329983327e-05, "loss": 1.9466, "step": 21531500 }, { "epoch": 62.33, "learning_rate": 1.8848029652185993e-05, "loss": 1.9258, "step": 21532000 }, { "epoch": 62.33, "learning_rate": 1.884730600453872e-05, "loss": 1.9163, "step": 21532500 }, { "epoch": 62.33, "learning_rate": 1.8846583804186738e-05, "loss": 1.9321, "step": 21533000 }, { "epoch": 62.33, "learning_rate": 1.884586015653946e-05, "loss": 1.9314, "step": 21533500 }, { "epoch": 62.33, "learning_rate": 1.884513795618748e-05, "loss": 1.9198, "step": 21534000 }, { "epoch": 62.33, "learning_rate": 1.88444143085402e-05, "loss": 1.9014, "step": 21534500 }, { "epoch": 62.34, "learning_rate": 1.8843690660892924e-05, "loss": 1.9488, "step": 21535000 }, { "epoch": 62.34, "learning_rate": 1.8842967013245646e-05, "loss": 1.9291, "step": 21535500 }, { "epoch": 62.34, "learning_rate": 1.8842243365598368e-05, "loss": 1.9274, "step": 21536000 }, { "epoch": 62.34, "learning_rate": 1.884152116524639e-05, "loss": 1.9059, "step": 21536500 }, { "epoch": 62.34, "learning_rate": 1.8840798964894406e-05, "loss": 1.9307, "step": 21537000 }, { "epoch": 62.34, "learning_rate": 1.884007531724713e-05, "loss": 1.9474, "step": 21537500 }, { "epoch": 62.34, "learning_rate": 1.8839351669599854e-05, "loss": 1.9327, "step": 21538000 }, { "epoch": 62.35, "learning_rate": 1.883862946924787e-05, "loss": 1.9397, "step": 21538500 }, { "epoch": 62.35, "learning_rate": 1.883790582160059e-05, "loss": 1.9386, "step": 21539000 }, { "epoch": 62.35, "learning_rate": 1.8837182173953317e-05, "loss": 1.956, "step": 21539500 }, { "epoch": 62.35, "learning_rate": 1.883645852630604e-05, "loss": 1.9411, "step": 21540000 }, { "epoch": 62.35, "learning_rate": 1.8835734878658765e-05, "loss": 1.918, "step": 21540500 }, { "epoch": 62.35, "learning_rate": 1.8835011231011487e-05, "loss": 1.9329, "step": 21541000 }, { "epoch": 62.35, "learning_rate": 1.883428758336421e-05, "loss": 1.9147, "step": 21541500 }, { "epoch": 62.36, "learning_rate": 1.8833563935716932e-05, "loss": 1.9305, "step": 21542000 }, { "epoch": 62.36, "learning_rate": 1.8832840288069657e-05, "loss": 1.9575, "step": 21542500 }, { "epoch": 62.36, "learning_rate": 1.883211664042238e-05, "loss": 1.9373, "step": 21543000 }, { "epoch": 62.36, "learning_rate": 1.8831392992775102e-05, "loss": 1.9429, "step": 21543500 }, { "epoch": 62.36, "learning_rate": 1.8830669345127827e-05, "loss": 1.9352, "step": 21544000 }, { "epoch": 62.36, "learning_rate": 1.882994569748055e-05, "loss": 1.9442, "step": 21544500 }, { "epoch": 62.36, "learning_rate": 1.8829222049833272e-05, "loss": 1.9381, "step": 21545000 }, { "epoch": 62.37, "learning_rate": 1.8828498402185994e-05, "loss": 1.9144, "step": 21545500 }, { "epoch": 62.37, "learning_rate": 1.8827777649129306e-05, "loss": 1.9319, "step": 21546000 }, { "epoch": 62.37, "learning_rate": 1.8827054001482032e-05, "loss": 1.9379, "step": 21546500 }, { "epoch": 62.37, "learning_rate": 1.8826330353834754e-05, "loss": 1.9321, "step": 21547000 }, { "epoch": 62.37, "learning_rate": 1.882560815348277e-05, "loss": 1.9101, "step": 21547500 }, { "epoch": 62.37, "learning_rate": 1.8824884505835495e-05, "loss": 1.9146, "step": 21548000 }, { "epoch": 62.37, "learning_rate": 1.882416085818822e-05, "loss": 1.9171, "step": 21548500 }, { "epoch": 62.38, "learning_rate": 1.8823437210540943e-05, "loss": 1.9407, "step": 21549000 }, { "epoch": 62.38, "learning_rate": 1.8822713562893666e-05, "loss": 1.9545, "step": 21549500 }, { "epoch": 62.38, "learning_rate": 1.8821989915246388e-05, "loss": 1.931, "step": 21550000 }, { "epoch": 62.38, "learning_rate": 1.882126626759911e-05, "loss": 1.9459, "step": 21550500 }, { "epoch": 62.38, "learning_rate": 1.8820542619951832e-05, "loss": 1.9173, "step": 21551000 }, { "epoch": 62.38, "learning_rate": 1.8819818972304558e-05, "loss": 1.9409, "step": 21551500 }, { "epoch": 62.38, "learning_rate": 1.8819095324657284e-05, "loss": 1.9425, "step": 21552000 }, { "epoch": 62.39, "learning_rate": 1.8818371677010006e-05, "loss": 1.9448, "step": 21552500 }, { "epoch": 62.39, "learning_rate": 1.8817648029362728e-05, "loss": 1.9126, "step": 21553000 }, { "epoch": 62.39, "learning_rate": 1.881692438171545e-05, "loss": 1.9462, "step": 21553500 }, { "epoch": 62.39, "learning_rate": 1.8816200734068172e-05, "loss": 1.938, "step": 21554000 }, { "epoch": 62.39, "learning_rate": 1.881547853371619e-05, "loss": 1.9289, "step": 21554500 }, { "epoch": 62.39, "learning_rate": 1.8814754886068917e-05, "loss": 1.9342, "step": 21555000 }, { "epoch": 62.39, "learning_rate": 1.881403123842164e-05, "loss": 1.9086, "step": 21555500 }, { "epoch": 62.4, "learning_rate": 1.881330759077436e-05, "loss": 1.9268, "step": 21556000 }, { "epoch": 62.4, "learning_rate": 1.8812583943127084e-05, "loss": 1.9413, "step": 21556500 }, { "epoch": 62.4, "learning_rate": 1.881186029547981e-05, "loss": 1.9289, "step": 21557000 }, { "epoch": 62.4, "learning_rate": 1.881113664783253e-05, "loss": 1.931, "step": 21557500 }, { "epoch": 62.4, "learning_rate": 1.8810414447480547e-05, "loss": 1.9427, "step": 21558000 }, { "epoch": 62.4, "learning_rate": 1.8809690799833273e-05, "loss": 1.9276, "step": 21558500 }, { "epoch": 62.4, "learning_rate": 1.8808967152186e-05, "loss": 1.9434, "step": 21559000 }, { "epoch": 62.41, "learning_rate": 1.880824350453872e-05, "loss": 1.9199, "step": 21559500 }, { "epoch": 62.41, "learning_rate": 1.8807519856891443e-05, "loss": 1.938, "step": 21560000 }, { "epoch": 62.41, "learning_rate": 1.880679765653946e-05, "loss": 1.9609, "step": 21560500 }, { "epoch": 62.41, "learning_rate": 1.8806074008892184e-05, "loss": 1.9132, "step": 21561000 }, { "epoch": 62.41, "learning_rate": 1.8805350361244906e-05, "loss": 1.933, "step": 21561500 }, { "epoch": 62.41, "learning_rate": 1.880462671359763e-05, "loss": 1.9409, "step": 21562000 }, { "epoch": 62.41, "learning_rate": 1.8803903065950354e-05, "loss": 1.92, "step": 21562500 }, { "epoch": 62.42, "learning_rate": 1.8803179418303076e-05, "loss": 1.956, "step": 21563000 }, { "epoch": 62.42, "learning_rate": 1.88024557706558e-05, "loss": 1.9339, "step": 21563500 }, { "epoch": 62.42, "learning_rate": 1.880173212300852e-05, "loss": 1.9354, "step": 21564000 }, { "epoch": 62.42, "learning_rate": 1.880100992265654e-05, "loss": 1.9447, "step": 21564500 }, { "epoch": 62.42, "learning_rate": 1.8800286275009262e-05, "loss": 1.9372, "step": 21565000 }, { "epoch": 62.42, "learning_rate": 1.8799562627361984e-05, "loss": 1.9295, "step": 21565500 }, { "epoch": 62.42, "learning_rate": 1.879883897971471e-05, "loss": 1.9463, "step": 21566000 }, { "epoch": 62.43, "learning_rate": 1.879811677936273e-05, "loss": 1.9343, "step": 21566500 }, { "epoch": 62.43, "learning_rate": 1.8797394579010748e-05, "loss": 1.9363, "step": 21567000 }, { "epoch": 62.43, "learning_rate": 1.879667093136347e-05, "loss": 1.9101, "step": 21567500 }, { "epoch": 62.43, "learning_rate": 1.8795947283716192e-05, "loss": 1.9307, "step": 21568000 }, { "epoch": 62.43, "learning_rate": 1.8795223636068914e-05, "loss": 1.9364, "step": 21568500 }, { "epoch": 62.43, "learning_rate": 1.8794501435716933e-05, "loss": 1.9101, "step": 21569000 }, { "epoch": 62.43, "learning_rate": 1.8793777788069656e-05, "loss": 1.9243, "step": 21569500 }, { "epoch": 62.44, "learning_rate": 1.879305414042238e-05, "loss": 1.9157, "step": 21570000 }, { "epoch": 62.44, "learning_rate": 1.8792330492775103e-05, "loss": 1.9146, "step": 21570500 }, { "epoch": 62.44, "learning_rate": 1.8791606845127826e-05, "loss": 1.9264, "step": 21571000 }, { "epoch": 62.44, "learning_rate": 1.8790884644775845e-05, "loss": 1.9067, "step": 21571500 }, { "epoch": 62.44, "learning_rate": 1.8790160997128567e-05, "loss": 1.9401, "step": 21572000 }, { "epoch": 62.44, "learning_rate": 1.878943734948129e-05, "loss": 1.9093, "step": 21572500 }, { "epoch": 62.45, "learning_rate": 1.878871370183401e-05, "loss": 1.9092, "step": 21573000 }, { "epoch": 62.45, "learning_rate": 1.8787990054186737e-05, "loss": 1.9447, "step": 21573500 }, { "epoch": 62.45, "learning_rate": 1.8787266406539463e-05, "loss": 1.9632, "step": 21574000 }, { "epoch": 62.45, "learning_rate": 1.8786542758892185e-05, "loss": 1.9508, "step": 21574500 }, { "epoch": 62.45, "learning_rate": 1.8785819111244907e-05, "loss": 1.9356, "step": 21575000 }, { "epoch": 62.45, "learning_rate": 1.878509546359763e-05, "loss": 1.9167, "step": 21575500 }, { "epoch": 62.45, "learning_rate": 1.878437181595035e-05, "loss": 1.8996, "step": 21576000 }, { "epoch": 62.46, "learning_rate": 1.8783648168303074e-05, "loss": 1.9226, "step": 21576500 }, { "epoch": 62.46, "learning_rate": 1.8782925967951093e-05, "loss": 1.9427, "step": 21577000 }, { "epoch": 62.46, "learning_rate": 1.878220376759911e-05, "loss": 1.9361, "step": 21577500 }, { "epoch": 62.46, "learning_rate": 1.8781480119951837e-05, "loss": 1.9453, "step": 21578000 }, { "epoch": 62.46, "learning_rate": 1.878075647230456e-05, "loss": 1.9387, "step": 21578500 }, { "epoch": 62.46, "learning_rate": 1.8780032824657282e-05, "loss": 1.9342, "step": 21579000 }, { "epoch": 62.46, "learning_rate": 1.8779309177010004e-05, "loss": 1.9319, "step": 21579500 }, { "epoch": 62.47, "learning_rate": 1.8778585529362726e-05, "loss": 1.9439, "step": 21580000 }, { "epoch": 62.47, "learning_rate": 1.877786188171545e-05, "loss": 1.9205, "step": 21580500 }, { "epoch": 62.47, "learning_rate": 1.8777138234068174e-05, "loss": 1.9277, "step": 21581000 }, { "epoch": 62.47, "learning_rate": 1.87764145864209e-05, "loss": 1.9305, "step": 21581500 }, { "epoch": 62.47, "learning_rate": 1.8775690938773622e-05, "loss": 1.9449, "step": 21582000 }, { "epoch": 62.47, "learning_rate": 1.8774967291126344e-05, "loss": 1.9418, "step": 21582500 }, { "epoch": 62.47, "learning_rate": 1.8774243643479066e-05, "loss": 1.9201, "step": 21583000 }, { "epoch": 62.48, "learning_rate": 1.877351999583179e-05, "loss": 1.9242, "step": 21583500 }, { "epoch": 62.48, "learning_rate": 1.8772796348184514e-05, "loss": 1.949, "step": 21584000 }, { "epoch": 62.48, "learning_rate": 1.877207270053724e-05, "loss": 1.9193, "step": 21584500 }, { "epoch": 62.48, "learning_rate": 1.8771350500185255e-05, "loss": 1.9228, "step": 21585000 }, { "epoch": 62.48, "learning_rate": 1.8770626852537978e-05, "loss": 1.9352, "step": 21585500 }, { "epoch": 62.48, "learning_rate": 1.8769904652185997e-05, "loss": 1.9653, "step": 21586000 }, { "epoch": 62.48, "learning_rate": 1.876918100453872e-05, "loss": 1.9604, "step": 21586500 }, { "epoch": 62.49, "learning_rate": 1.876845735689144e-05, "loss": 1.9211, "step": 21587000 }, { "epoch": 62.49, "learning_rate": 1.8767733709244163e-05, "loss": 1.9505, "step": 21587500 }, { "epoch": 62.49, "learning_rate": 1.8767011508892182e-05, "loss": 1.9204, "step": 21588000 }, { "epoch": 62.49, "learning_rate": 1.8766287861244908e-05, "loss": 1.9397, "step": 21588500 }, { "epoch": 62.49, "learning_rate": 1.876556421359763e-05, "loss": 1.9326, "step": 21589000 }, { "epoch": 62.49, "learning_rate": 1.8764840565950352e-05, "loss": 1.9542, "step": 21589500 }, { "epoch": 62.49, "learning_rate": 1.8764116918303078e-05, "loss": 1.9272, "step": 21590000 }, { "epoch": 62.5, "learning_rate": 1.87633932706558e-05, "loss": 1.9406, "step": 21590500 }, { "epoch": 62.5, "learning_rate": 1.8762669623008522e-05, "loss": 1.9274, "step": 21591000 }, { "epoch": 62.5, "learning_rate": 1.8761945975361245e-05, "loss": 1.9493, "step": 21591500 }, { "epoch": 62.5, "learning_rate": 1.876122232771397e-05, "loss": 1.9551, "step": 21592000 }, { "epoch": 62.5, "learning_rate": 1.8760498680066692e-05, "loss": 1.9527, "step": 21592500 }, { "epoch": 62.5, "learning_rate": 1.8759775032419415e-05, "loss": 1.932, "step": 21593000 }, { "epoch": 62.5, "learning_rate": 1.8759054279362727e-05, "loss": 1.9397, "step": 21593500 }, { "epoch": 62.51, "learning_rate": 1.8758330631715453e-05, "loss": 1.9202, "step": 21594000 }, { "epoch": 62.51, "learning_rate": 1.8757606984068175e-05, "loss": 1.9418, "step": 21594500 }, { "epoch": 62.51, "learning_rate": 1.8756883336420897e-05, "loss": 1.9025, "step": 21595000 }, { "epoch": 62.51, "learning_rate": 1.8756161136068916e-05, "loss": 1.9423, "step": 21595500 }, { "epoch": 62.51, "learning_rate": 1.8755437488421638e-05, "loss": 1.9301, "step": 21596000 }, { "epoch": 62.51, "learning_rate": 1.8754713840774364e-05, "loss": 1.928, "step": 21596500 }, { "epoch": 62.51, "learning_rate": 1.8753990193127086e-05, "loss": 1.9304, "step": 21597000 }, { "epoch": 62.52, "learning_rate": 1.875326654547981e-05, "loss": 1.9309, "step": 21597500 }, { "epoch": 62.52, "learning_rate": 1.875254289783253e-05, "loss": 1.9197, "step": 21598000 }, { "epoch": 62.52, "learning_rate": 1.8751819250185253e-05, "loss": 1.9392, "step": 21598500 }, { "epoch": 62.52, "learning_rate": 1.875109560253798e-05, "loss": 1.9583, "step": 21599000 }, { "epoch": 62.52, "learning_rate": 1.8750371954890704e-05, "loss": 1.9292, "step": 21599500 }, { "epoch": 62.52, "learning_rate": 1.874964975453872e-05, "loss": 1.9295, "step": 21600000 }, { "epoch": 62.52, "learning_rate": 1.8748926106891442e-05, "loss": 1.9339, "step": 21600500 }, { "epoch": 62.53, "learning_rate": 1.8748202459244164e-05, "loss": 1.9319, "step": 21601000 }, { "epoch": 62.53, "learning_rate": 1.874747881159689e-05, "loss": 1.9334, "step": 21601500 }, { "epoch": 62.53, "learning_rate": 1.8746755163949612e-05, "loss": 1.9154, "step": 21602000 }, { "epoch": 62.53, "learning_rate": 1.8746031516302334e-05, "loss": 1.9488, "step": 21602500 }, { "epoch": 62.53, "learning_rate": 1.8745307868655056e-05, "loss": 1.9276, "step": 21603000 }, { "epoch": 62.53, "learning_rate": 1.8744584221007782e-05, "loss": 1.906, "step": 21603500 }, { "epoch": 62.53, "learning_rate": 1.8743860573360504e-05, "loss": 1.9463, "step": 21604000 }, { "epoch": 62.54, "learning_rate": 1.874313692571323e-05, "loss": 1.9055, "step": 21604500 }, { "epoch": 62.54, "learning_rate": 1.8742414725361245e-05, "loss": 1.9257, "step": 21605000 }, { "epoch": 62.54, "learning_rate": 1.8741691077713968e-05, "loss": 1.905, "step": 21605500 }, { "epoch": 62.54, "learning_rate": 1.8740968877361987e-05, "loss": 1.9342, "step": 21606000 }, { "epoch": 62.54, "learning_rate": 1.874024522971471e-05, "loss": 1.9253, "step": 21606500 }, { "epoch": 62.54, "learning_rate": 1.873952158206743e-05, "loss": 1.9316, "step": 21607000 }, { "epoch": 62.54, "learning_rate": 1.8738800829010747e-05, "loss": 1.9221, "step": 21607500 }, { "epoch": 62.55, "learning_rate": 1.873807718136347e-05, "loss": 1.8979, "step": 21608000 }, { "epoch": 62.55, "learning_rate": 1.873735353371619e-05, "loss": 1.9334, "step": 21608500 }, { "epoch": 62.55, "learning_rate": 1.8736629886068917e-05, "loss": 1.9244, "step": 21609000 }, { "epoch": 62.55, "learning_rate": 1.873590623842164e-05, "loss": 1.9224, "step": 21609500 }, { "epoch": 62.55, "learning_rate": 1.873518259077436e-05, "loss": 1.9677, "step": 21610000 }, { "epoch": 62.55, "learning_rate": 1.8734458943127084e-05, "loss": 1.9215, "step": 21610500 }, { "epoch": 62.55, "learning_rate": 1.873373529547981e-05, "loss": 1.9243, "step": 21611000 }, { "epoch": 62.56, "learning_rate": 1.8733013095127828e-05, "loss": 1.9424, "step": 21611500 }, { "epoch": 62.56, "learning_rate": 1.873228944748055e-05, "loss": 1.9547, "step": 21612000 }, { "epoch": 62.56, "learning_rate": 1.8731565799833273e-05, "loss": 1.9178, "step": 21612500 }, { "epoch": 62.56, "learning_rate": 1.8730842152185995e-05, "loss": 1.9323, "step": 21613000 }, { "epoch": 62.56, "learning_rate": 1.8730118504538717e-05, "loss": 1.9374, "step": 21613500 }, { "epoch": 62.56, "learning_rate": 1.8729394856891443e-05, "loss": 1.9344, "step": 21614000 }, { "epoch": 62.57, "learning_rate": 1.8728671209244165e-05, "loss": 1.9307, "step": 21614500 }, { "epoch": 62.57, "learning_rate": 1.872794756159689e-05, "loss": 1.9347, "step": 21615000 }, { "epoch": 62.57, "learning_rate": 1.8727223913949613e-05, "loss": 1.946, "step": 21615500 }, { "epoch": 62.57, "learning_rate": 1.8726500266302335e-05, "loss": 1.9206, "step": 21616000 }, { "epoch": 62.57, "learning_rate": 1.8725776618655057e-05, "loss": 1.9196, "step": 21616500 }, { "epoch": 62.57, "learning_rate": 1.872505297100778e-05, "loss": 1.9574, "step": 21617000 }, { "epoch": 62.57, "learning_rate": 1.8724329323360505e-05, "loss": 1.939, "step": 21617500 }, { "epoch": 62.58, "learning_rate": 1.8723605675713227e-05, "loss": 1.9357, "step": 21618000 }, { "epoch": 62.58, "learning_rate": 1.8722882028065953e-05, "loss": 1.9259, "step": 21618500 }, { "epoch": 62.58, "learning_rate": 1.8722158380418675e-05, "loss": 1.928, "step": 21619000 }, { "epoch": 62.58, "learning_rate": 1.8721434732771397e-05, "loss": 1.9509, "step": 21619500 }, { "epoch": 62.58, "learning_rate": 1.8720712532419416e-05, "loss": 1.9442, "step": 21620000 }, { "epoch": 62.58, "learning_rate": 1.871998888477214e-05, "loss": 1.9485, "step": 21620500 }, { "epoch": 62.58, "learning_rate": 1.871926523712486e-05, "loss": 1.9395, "step": 21621000 }, { "epoch": 62.59, "learning_rate": 1.8718541589477583e-05, "loss": 1.9291, "step": 21621500 }, { "epoch": 62.59, "learning_rate": 1.871781794183031e-05, "loss": 1.9452, "step": 21622000 }, { "epoch": 62.59, "learning_rate": 1.871709429418303e-05, "loss": 1.9214, "step": 21622500 }, { "epoch": 62.59, "learning_rate": 1.8716370646535756e-05, "loss": 1.9062, "step": 21623000 }, { "epoch": 62.59, "learning_rate": 1.871564699888848e-05, "loss": 1.9177, "step": 21623500 }, { "epoch": 62.59, "learning_rate": 1.8714924798536494e-05, "loss": 1.936, "step": 21624000 }, { "epoch": 62.59, "learning_rate": 1.871420115088922e-05, "loss": 1.953, "step": 21624500 }, { "epoch": 62.6, "learning_rate": 1.8713477503241942e-05, "loss": 1.9384, "step": 21625000 }, { "epoch": 62.6, "learning_rate": 1.8712753855594668e-05, "loss": 1.9186, "step": 21625500 }, { "epoch": 62.6, "learning_rate": 1.8712031655242683e-05, "loss": 1.9302, "step": 21626000 }, { "epoch": 62.6, "learning_rate": 1.871130800759541e-05, "loss": 1.9447, "step": 21626500 }, { "epoch": 62.6, "learning_rate": 1.8710585807243424e-05, "loss": 1.9292, "step": 21627000 }, { "epoch": 62.6, "learning_rate": 1.8709862159596147e-05, "loss": 1.9449, "step": 21627500 }, { "epoch": 62.6, "learning_rate": 1.870913851194887e-05, "loss": 1.9442, "step": 21628000 }, { "epoch": 62.61, "learning_rate": 1.8708416311596888e-05, "loss": 1.9155, "step": 21628500 }, { "epoch": 62.61, "learning_rate": 1.870769266394961e-05, "loss": 1.9427, "step": 21629000 }, { "epoch": 62.61, "learning_rate": 1.8706969016302336e-05, "loss": 1.9423, "step": 21629500 }, { "epoch": 62.61, "learning_rate": 1.8706245368655058e-05, "loss": 1.9201, "step": 21630000 }, { "epoch": 62.61, "learning_rate": 1.8705521721007784e-05, "loss": 1.9189, "step": 21630500 }, { "epoch": 62.61, "learning_rate": 1.8704798073360506e-05, "loss": 1.9235, "step": 21631000 }, { "epoch": 62.61, "learning_rate": 1.8704074425713228e-05, "loss": 1.9351, "step": 21631500 }, { "epoch": 62.62, "learning_rate": 1.870335077806595e-05, "loss": 1.925, "step": 21632000 }, { "epoch": 62.62, "learning_rate": 1.8702627130418673e-05, "loss": 1.9544, "step": 21632500 }, { "epoch": 62.62, "learning_rate": 1.8701903482771398e-05, "loss": 1.9345, "step": 21633000 }, { "epoch": 62.62, "learning_rate": 1.8701181282419417e-05, "loss": 1.9428, "step": 21633500 }, { "epoch": 62.62, "learning_rate": 1.870045763477214e-05, "loss": 1.9642, "step": 21634000 }, { "epoch": 62.62, "learning_rate": 1.869973398712486e-05, "loss": 1.9344, "step": 21634500 }, { "epoch": 62.62, "learning_rate": 1.8699010339477584e-05, "loss": 1.9154, "step": 21635000 }, { "epoch": 62.63, "learning_rate": 1.8698288139125603e-05, "loss": 1.9464, "step": 21635500 }, { "epoch": 62.63, "learning_rate": 1.8697564491478325e-05, "loss": 1.935, "step": 21636000 }, { "epoch": 62.63, "learning_rate": 1.8696840843831047e-05, "loss": 1.9428, "step": 21636500 }, { "epoch": 62.63, "learning_rate": 1.8696117196183773e-05, "loss": 1.9426, "step": 21637000 }, { "epoch": 62.63, "learning_rate": 1.8695396443127085e-05, "loss": 1.9344, "step": 21637500 }, { "epoch": 62.63, "learning_rate": 1.8694672795479807e-05, "loss": 1.9284, "step": 21638000 }, { "epoch": 62.63, "learning_rate": 1.8693949147832533e-05, "loss": 1.9403, "step": 21638500 }, { "epoch": 62.64, "learning_rate": 1.8693225500185255e-05, "loss": 1.9428, "step": 21639000 }, { "epoch": 62.64, "learning_rate": 1.8692501852537977e-05, "loss": 1.9274, "step": 21639500 }, { "epoch": 62.64, "learning_rate": 1.8691779652185996e-05, "loss": 1.9333, "step": 21640000 }, { "epoch": 62.64, "learning_rate": 1.869105600453872e-05, "loss": 1.9436, "step": 21640500 }, { "epoch": 62.64, "learning_rate": 1.8690332356891444e-05, "loss": 1.9215, "step": 21641000 }, { "epoch": 62.64, "learning_rate": 1.8689608709244166e-05, "loss": 1.9276, "step": 21641500 }, { "epoch": 62.64, "learning_rate": 1.8688886508892185e-05, "loss": 1.9156, "step": 21642000 }, { "epoch": 62.65, "learning_rate": 1.8688162861244908e-05, "loss": 1.9331, "step": 21642500 }, { "epoch": 62.65, "learning_rate": 1.868743921359763e-05, "loss": 1.9363, "step": 21643000 }, { "epoch": 62.65, "learning_rate": 1.8686717013245645e-05, "loss": 1.9679, "step": 21643500 }, { "epoch": 62.65, "learning_rate": 1.868599336559837e-05, "loss": 1.9247, "step": 21644000 }, { "epoch": 62.65, "learning_rate": 1.8685269717951093e-05, "loss": 1.9465, "step": 21644500 }, { "epoch": 62.65, "learning_rate": 1.868454607030382e-05, "loss": 1.9367, "step": 21645000 }, { "epoch": 62.65, "learning_rate": 1.868382242265654e-05, "loss": 1.9151, "step": 21645500 }, { "epoch": 62.66, "learning_rate": 1.868310022230456e-05, "loss": 1.9453, "step": 21646000 }, { "epoch": 62.66, "learning_rate": 1.8682376574657282e-05, "loss": 1.9421, "step": 21646500 }, { "epoch": 62.66, "learning_rate": 1.8681652927010005e-05, "loss": 1.9393, "step": 21647000 }, { "epoch": 62.66, "learning_rate": 1.8680929279362727e-05, "loss": 1.9723, "step": 21647500 }, { "epoch": 62.66, "learning_rate": 1.868020563171545e-05, "loss": 1.9333, "step": 21648000 }, { "epoch": 62.66, "learning_rate": 1.8679481984068175e-05, "loss": 1.9291, "step": 21648500 }, { "epoch": 62.66, "learning_rate": 1.8678758336420897e-05, "loss": 1.9607, "step": 21649000 }, { "epoch": 62.67, "learning_rate": 1.8678034688773622e-05, "loss": 1.9491, "step": 21649500 }, { "epoch": 62.67, "learning_rate": 1.8677311041126345e-05, "loss": 1.9194, "step": 21650000 }, { "epoch": 62.67, "learning_rate": 1.8676587393479067e-05, "loss": 1.9383, "step": 21650500 }, { "epoch": 62.67, "learning_rate": 1.8675865193127086e-05, "loss": 1.9157, "step": 21651000 }, { "epoch": 62.67, "learning_rate": 1.8675141545479808e-05, "loss": 1.9212, "step": 21651500 }, { "epoch": 62.67, "learning_rate": 1.867441789783253e-05, "loss": 1.9313, "step": 21652000 }, { "epoch": 62.68, "learning_rate": 1.8673694250185256e-05, "loss": 1.9499, "step": 21652500 }, { "epoch": 62.68, "learning_rate": 1.8672970602537978e-05, "loss": 1.9663, "step": 21653000 }, { "epoch": 62.68, "learning_rate": 1.86722469548907e-05, "loss": 1.9328, "step": 21653500 }, { "epoch": 62.68, "learning_rate": 1.8671523307243423e-05, "loss": 1.9253, "step": 21654000 }, { "epoch": 62.68, "learning_rate": 1.867079965959615e-05, "loss": 1.9202, "step": 21654500 }, { "epoch": 62.68, "learning_rate": 1.867007601194887e-05, "loss": 1.9272, "step": 21655000 }, { "epoch": 62.68, "learning_rate": 1.8669352364301593e-05, "loss": 1.9236, "step": 21655500 }, { "epoch": 62.69, "learning_rate": 1.8668630163949612e-05, "loss": 1.9601, "step": 21656000 }, { "epoch": 62.69, "learning_rate": 1.866790796359763e-05, "loss": 1.9721, "step": 21656500 }, { "epoch": 62.69, "learning_rate": 1.866718576324565e-05, "loss": 1.9543, "step": 21657000 }, { "epoch": 62.69, "learning_rate": 1.8666462115598372e-05, "loss": 1.9183, "step": 21657500 }, { "epoch": 62.69, "learning_rate": 1.8665738467951094e-05, "loss": 1.9538, "step": 21658000 }, { "epoch": 62.69, "learning_rate": 1.8665014820303816e-05, "loss": 1.9368, "step": 21658500 }, { "epoch": 62.69, "learning_rate": 1.866429117265654e-05, "loss": 1.9489, "step": 21659000 }, { "epoch": 62.7, "learning_rate": 1.866356752500926e-05, "loss": 1.9444, "step": 21659500 }, { "epoch": 62.7, "learning_rate": 1.8662843877361986e-05, "loss": 1.9478, "step": 21660000 }, { "epoch": 62.7, "learning_rate": 1.8662120229714712e-05, "loss": 1.9257, "step": 21660500 }, { "epoch": 62.7, "learning_rate": 1.8661396582067434e-05, "loss": 1.9251, "step": 21661000 }, { "epoch": 62.7, "learning_rate": 1.8660672934420156e-05, "loss": 1.9433, "step": 21661500 }, { "epoch": 62.7, "learning_rate": 1.865994928677288e-05, "loss": 1.9545, "step": 21662000 }, { "epoch": 62.7, "learning_rate": 1.86592256391256e-05, "loss": 1.9454, "step": 21662500 }, { "epoch": 62.71, "learning_rate": 1.8658501991478327e-05, "loss": 1.9478, "step": 21663000 }, { "epoch": 62.71, "learning_rate": 1.865777834383105e-05, "loss": 1.9434, "step": 21663500 }, { "epoch": 62.71, "learning_rate": 1.8657054696183774e-05, "loss": 1.9499, "step": 21664000 }, { "epoch": 62.71, "learning_rate": 1.8656331048536497e-05, "loss": 1.9258, "step": 21664500 }, { "epoch": 62.71, "learning_rate": 1.865560740088922e-05, "loss": 1.9488, "step": 21665000 }, { "epoch": 62.71, "learning_rate": 1.865488375324194e-05, "loss": 1.9528, "step": 21665500 }, { "epoch": 62.71, "learning_rate": 1.8654160105594663e-05, "loss": 1.9357, "step": 21666000 }, { "epoch": 62.72, "learning_rate": 1.865343645794739e-05, "loss": 1.9422, "step": 21666500 }, { "epoch": 62.72, "learning_rate": 1.8652714257595408e-05, "loss": 1.9236, "step": 21667000 }, { "epoch": 62.72, "learning_rate": 1.865199060994813e-05, "loss": 1.9192, "step": 21667500 }, { "epoch": 62.72, "learning_rate": 1.865126840959615e-05, "loss": 1.9393, "step": 21668000 }, { "epoch": 62.72, "learning_rate": 1.865054476194887e-05, "loss": 1.9697, "step": 21668500 }, { "epoch": 62.72, "learning_rate": 1.8649822561596887e-05, "loss": 1.9549, "step": 21669000 }, { "epoch": 62.72, "learning_rate": 1.8649098913949613e-05, "loss": 1.9268, "step": 21669500 }, { "epoch": 62.73, "learning_rate": 1.8648375266302335e-05, "loss": 1.9243, "step": 21670000 }, { "epoch": 62.73, "learning_rate": 1.8647651618655057e-05, "loss": 1.9221, "step": 21670500 }, { "epoch": 62.73, "learning_rate": 1.8646927971007783e-05, "loss": 1.9462, "step": 21671000 }, { "epoch": 62.73, "learning_rate": 1.8646204323360505e-05, "loss": 1.9458, "step": 21671500 }, { "epoch": 62.73, "learning_rate": 1.8645480675713227e-05, "loss": 1.9354, "step": 21672000 }, { "epoch": 62.73, "learning_rate": 1.8644757028065953e-05, "loss": 1.9388, "step": 21672500 }, { "epoch": 62.73, "learning_rate": 1.8644033380418675e-05, "loss": 1.931, "step": 21673000 }, { "epoch": 62.74, "learning_rate": 1.8643309732771397e-05, "loss": 1.9387, "step": 21673500 }, { "epoch": 62.74, "learning_rate": 1.864258608512412e-05, "loss": 1.9486, "step": 21674000 }, { "epoch": 62.74, "learning_rate": 1.864186388477214e-05, "loss": 1.9353, "step": 21674500 }, { "epoch": 62.74, "learning_rate": 1.8641140237124864e-05, "loss": 1.9349, "step": 21675000 }, { "epoch": 62.74, "learning_rate": 1.8640416589477586e-05, "loss": 1.9369, "step": 21675500 }, { "epoch": 62.74, "learning_rate": 1.8639694389125602e-05, "loss": 1.8989, "step": 21676000 }, { "epoch": 62.74, "learning_rate": 1.8638970741478327e-05, "loss": 1.9454, "step": 21676500 }, { "epoch": 62.75, "learning_rate": 1.863824709383105e-05, "loss": 1.9189, "step": 21677000 }, { "epoch": 62.75, "learning_rate": 1.8637523446183772e-05, "loss": 1.9351, "step": 21677500 }, { "epoch": 62.75, "learning_rate": 1.8636799798536497e-05, "loss": 1.9044, "step": 21678000 }, { "epoch": 62.75, "learning_rate": 1.863607615088922e-05, "loss": 1.9518, "step": 21678500 }, { "epoch": 62.75, "learning_rate": 1.8635352503241942e-05, "loss": 1.9469, "step": 21679000 }, { "epoch": 62.75, "learning_rate": 1.8634628855594664e-05, "loss": 1.9212, "step": 21679500 }, { "epoch": 62.75, "learning_rate": 1.863390520794739e-05, "loss": 1.9368, "step": 21680000 }, { "epoch": 62.76, "learning_rate": 1.8633181560300112e-05, "loss": 1.9256, "step": 21680500 }, { "epoch": 62.76, "learning_rate": 1.8632457912652834e-05, "loss": 1.9365, "step": 21681000 }, { "epoch": 62.76, "learning_rate": 1.8631735712300853e-05, "loss": 1.9353, "step": 21681500 }, { "epoch": 62.76, "learning_rate": 1.863101206465358e-05, "loss": 1.9206, "step": 21682000 }, { "epoch": 62.76, "learning_rate": 1.86302884170063e-05, "loss": 1.9605, "step": 21682500 }, { "epoch": 62.76, "learning_rate": 1.8629564769359023e-05, "loss": 1.9365, "step": 21683000 }, { "epoch": 62.76, "learning_rate": 1.8628841121711746e-05, "loss": 1.9363, "step": 21683500 }, { "epoch": 62.77, "learning_rate": 1.8628117474064468e-05, "loss": 1.9502, "step": 21684000 }, { "epoch": 62.77, "learning_rate": 1.862739382641719e-05, "loss": 1.9443, "step": 21684500 }, { "epoch": 62.77, "learning_rate": 1.8626670178769916e-05, "loss": 1.9579, "step": 21685000 }, { "epoch": 62.77, "learning_rate": 1.862594653112264e-05, "loss": 1.9415, "step": 21685500 }, { "epoch": 62.77, "learning_rate": 1.8625224330770657e-05, "loss": 1.9162, "step": 21686000 }, { "epoch": 62.77, "learning_rate": 1.862450068312338e-05, "loss": 1.9473, "step": 21686500 }, { "epoch": 62.77, "learning_rate": 1.8623777035476105e-05, "loss": 1.9203, "step": 21687000 }, { "epoch": 62.78, "learning_rate": 1.8623053387828827e-05, "loss": 1.9456, "step": 21687500 }, { "epoch": 62.78, "learning_rate": 1.862232974018155e-05, "loss": 1.907, "step": 21688000 }, { "epoch": 62.78, "learning_rate": 1.862160609253427e-05, "loss": 1.9601, "step": 21688500 }, { "epoch": 62.78, "learning_rate": 1.8620882444886997e-05, "loss": 1.9343, "step": 21689000 }, { "epoch": 62.78, "learning_rate": 1.862015879723972e-05, "loss": 1.9238, "step": 21689500 }, { "epoch": 62.78, "learning_rate": 1.8619436596887738e-05, "loss": 1.9491, "step": 21690000 }, { "epoch": 62.79, "learning_rate": 1.861871294924046e-05, "loss": 1.91, "step": 21690500 }, { "epoch": 62.79, "learning_rate": 1.8617989301593183e-05, "loss": 1.9373, "step": 21691000 }, { "epoch": 62.79, "learning_rate": 1.8617265653945905e-05, "loss": 1.9303, "step": 21691500 }, { "epoch": 62.79, "learning_rate": 1.861654200629863e-05, "loss": 1.9144, "step": 21692000 }, { "epoch": 62.79, "learning_rate": 1.8615819805946646e-05, "loss": 1.927, "step": 21692500 }, { "epoch": 62.79, "learning_rate": 1.861509615829937e-05, "loss": 1.9568, "step": 21693000 }, { "epoch": 62.79, "learning_rate": 1.861437395794739e-05, "loss": 1.934, "step": 21693500 }, { "epoch": 62.8, "learning_rate": 1.8613650310300113e-05, "loss": 1.9281, "step": 21694000 }, { "epoch": 62.8, "learning_rate": 1.8612926662652835e-05, "loss": 1.9426, "step": 21694500 }, { "epoch": 62.8, "learning_rate": 1.8612203015005557e-05, "loss": 1.9328, "step": 21695000 }, { "epoch": 62.8, "learning_rate": 1.861147936735828e-05, "loss": 1.919, "step": 21695500 }, { "epoch": 62.8, "learning_rate": 1.8610755719711005e-05, "loss": 1.9299, "step": 21696000 }, { "epoch": 62.8, "learning_rate": 1.861003351935902e-05, "loss": 1.9222, "step": 21696500 }, { "epoch": 62.8, "learning_rate": 1.8609309871711746e-05, "loss": 1.9353, "step": 21697000 }, { "epoch": 62.81, "learning_rate": 1.860858622406447e-05, "loss": 1.9361, "step": 21697500 }, { "epoch": 62.81, "learning_rate": 1.8607862576417194e-05, "loss": 1.9086, "step": 21698000 }, { "epoch": 62.81, "learning_rate": 1.8607138928769916e-05, "loss": 1.9428, "step": 21698500 }, { "epoch": 62.81, "learning_rate": 1.860641528112264e-05, "loss": 1.9308, "step": 21699000 }, { "epoch": 62.81, "learning_rate": 1.860569163347536e-05, "loss": 1.9038, "step": 21699500 }, { "epoch": 62.81, "learning_rate": 1.860496943312338e-05, "loss": 1.9521, "step": 21700000 }, { "epoch": 62.81, "learning_rate": 1.8604245785476105e-05, "loss": 1.9204, "step": 21700500 }, { "epoch": 62.82, "learning_rate": 1.8603522137828828e-05, "loss": 1.9218, "step": 21701000 }, { "epoch": 62.82, "learning_rate": 1.860279849018155e-05, "loss": 1.9234, "step": 21701500 }, { "epoch": 62.82, "learning_rate": 1.8602074842534272e-05, "loss": 1.9213, "step": 21702000 }, { "epoch": 62.82, "learning_rate": 1.860135264218229e-05, "loss": 1.9256, "step": 21702500 }, { "epoch": 62.82, "learning_rate": 1.8600628994535013e-05, "loss": 1.9398, "step": 21703000 }, { "epoch": 62.82, "learning_rate": 1.8599905346887736e-05, "loss": 1.9302, "step": 21703500 }, { "epoch": 62.82, "learning_rate": 1.859918169924046e-05, "loss": 1.9257, "step": 21704000 }, { "epoch": 62.83, "learning_rate": 1.859845949888848e-05, "loss": 1.9364, "step": 21704500 }, { "epoch": 62.83, "learning_rate": 1.8597735851241202e-05, "loss": 1.939, "step": 21705000 }, { "epoch": 62.83, "learning_rate": 1.8597012203593925e-05, "loss": 1.9332, "step": 21705500 }, { "epoch": 62.83, "learning_rate": 1.8596288555946647e-05, "loss": 1.959, "step": 21706000 }, { "epoch": 62.83, "learning_rate": 1.859556490829937e-05, "loss": 1.9367, "step": 21706500 }, { "epoch": 62.83, "learning_rate": 1.8594841260652095e-05, "loss": 1.9316, "step": 21707000 }, { "epoch": 62.83, "learning_rate": 1.8594117613004817e-05, "loss": 1.9267, "step": 21707500 }, { "epoch": 62.84, "learning_rate": 1.8593393965357543e-05, "loss": 1.9472, "step": 21708000 }, { "epoch": 62.84, "learning_rate": 1.8592670317710265e-05, "loss": 1.9338, "step": 21708500 }, { "epoch": 62.84, "learning_rate": 1.8591948117358284e-05, "loss": 1.9352, "step": 21709000 }, { "epoch": 62.84, "learning_rate": 1.85912259170063e-05, "loss": 1.9739, "step": 21709500 }, { "epoch": 62.84, "learning_rate": 1.859050226935902e-05, "loss": 1.9575, "step": 21710000 }, { "epoch": 62.84, "learning_rate": 1.8589778621711744e-05, "loss": 1.9193, "step": 21710500 }, { "epoch": 62.84, "learning_rate": 1.858905497406447e-05, "loss": 1.963, "step": 21711000 }, { "epoch": 62.85, "learning_rate": 1.8588331326417195e-05, "loss": 1.9414, "step": 21711500 }, { "epoch": 62.85, "learning_rate": 1.8587607678769917e-05, "loss": 1.9455, "step": 21712000 }, { "epoch": 62.85, "learning_rate": 1.8586885478417933e-05, "loss": 1.9194, "step": 21712500 }, { "epoch": 62.85, "learning_rate": 1.858616183077066e-05, "loss": 1.9281, "step": 21713000 }, { "epoch": 62.85, "learning_rate": 1.858543818312338e-05, "loss": 1.9359, "step": 21713500 }, { "epoch": 62.85, "learning_rate": 1.8584714535476103e-05, "loss": 1.9441, "step": 21714000 }, { "epoch": 62.85, "learning_rate": 1.8583992335124122e-05, "loss": 1.9474, "step": 21714500 }, { "epoch": 62.86, "learning_rate": 1.8583268687476844e-05, "loss": 1.9247, "step": 21715000 }, { "epoch": 62.86, "learning_rate": 1.858254503982957e-05, "loss": 1.9222, "step": 21715500 }, { "epoch": 62.86, "learning_rate": 1.8581821392182292e-05, "loss": 1.9226, "step": 21716000 }, { "epoch": 62.86, "learning_rate": 1.8581097744535014e-05, "loss": 1.9432, "step": 21716500 }, { "epoch": 62.86, "learning_rate": 1.8580374096887736e-05, "loss": 1.9799, "step": 21717000 }, { "epoch": 62.86, "learning_rate": 1.857965044924046e-05, "loss": 1.922, "step": 21717500 }, { "epoch": 62.86, "learning_rate": 1.8578926801593184e-05, "loss": 1.919, "step": 21718000 }, { "epoch": 62.87, "learning_rate": 1.8578203153945906e-05, "loss": 1.9474, "step": 21718500 }, { "epoch": 62.87, "learning_rate": 1.8577480953593925e-05, "loss": 1.9422, "step": 21719000 }, { "epoch": 62.87, "learning_rate": 1.8576757305946648e-05, "loss": 1.9435, "step": 21719500 }, { "epoch": 62.87, "learning_rate": 1.8576033658299373e-05, "loss": 1.9516, "step": 21720000 }, { "epoch": 62.87, "learning_rate": 1.8575310010652095e-05, "loss": 1.9354, "step": 21720500 }, { "epoch": 62.87, "learning_rate": 1.8574586363004818e-05, "loss": 1.9326, "step": 21721000 }, { "epoch": 62.87, "learning_rate": 1.8573864162652833e-05, "loss": 1.9467, "step": 21721500 }, { "epoch": 62.88, "learning_rate": 1.857314051500556e-05, "loss": 1.9039, "step": 21722000 }, { "epoch": 62.88, "learning_rate": 1.857241686735828e-05, "loss": 1.9444, "step": 21722500 }, { "epoch": 62.88, "learning_rate": 1.8571693219711007e-05, "loss": 1.9421, "step": 21723000 }, { "epoch": 62.88, "learning_rate": 1.857096957206373e-05, "loss": 1.9527, "step": 21723500 }, { "epoch": 62.88, "learning_rate": 1.8570247371711748e-05, "loss": 1.9356, "step": 21724000 }, { "epoch": 62.88, "learning_rate": 1.8569525171359763e-05, "loss": 1.9183, "step": 21724500 }, { "epoch": 62.88, "learning_rate": 1.8568801523712486e-05, "loss": 1.9188, "step": 21725000 }, { "epoch": 62.89, "learning_rate": 1.856807787606521e-05, "loss": 1.9378, "step": 21725500 }, { "epoch": 62.89, "learning_rate": 1.8567354228417934e-05, "loss": 1.96, "step": 21726000 }, { "epoch": 62.89, "learning_rate": 1.8566630580770656e-05, "loss": 1.943, "step": 21726500 }, { "epoch": 62.89, "learning_rate": 1.856590693312338e-05, "loss": 1.9281, "step": 21727000 }, { "epoch": 62.89, "learning_rate": 1.8565183285476104e-05, "loss": 1.9345, "step": 21727500 }, { "epoch": 62.89, "learning_rate": 1.8564459637828826e-05, "loss": 1.9478, "step": 21728000 }, { "epoch": 62.9, "learning_rate": 1.8563735990181548e-05, "loss": 1.9501, "step": 21728500 }, { "epoch": 62.9, "learning_rate": 1.8563012342534274e-05, "loss": 1.9395, "step": 21729000 }, { "epoch": 62.9, "learning_rate": 1.8562288694886996e-05, "loss": 1.9523, "step": 21729500 }, { "epoch": 62.9, "learning_rate": 1.856156504723972e-05, "loss": 1.9276, "step": 21730000 }, { "epoch": 62.9, "learning_rate": 1.8560841399592444e-05, "loss": 1.9155, "step": 21730500 }, { "epoch": 62.9, "learning_rate": 1.8560117751945166e-05, "loss": 1.9331, "step": 21731000 }, { "epoch": 62.9, "learning_rate": 1.8559395551593185e-05, "loss": 1.9382, "step": 21731500 }, { "epoch": 62.91, "learning_rate": 1.8558671903945907e-05, "loss": 1.9432, "step": 21732000 }, { "epoch": 62.91, "learning_rate": 1.855794825629863e-05, "loss": 1.906, "step": 21732500 }, { "epoch": 62.91, "learning_rate": 1.855722460865135e-05, "loss": 1.9245, "step": 21733000 }, { "epoch": 62.91, "learning_rate": 1.855650240829937e-05, "loss": 1.9172, "step": 21733500 }, { "epoch": 62.91, "learning_rate": 1.8555778760652096e-05, "loss": 1.9525, "step": 21734000 }, { "epoch": 62.91, "learning_rate": 1.855505511300482e-05, "loss": 1.9652, "step": 21734500 }, { "epoch": 62.91, "learning_rate": 1.855433146535754e-05, "loss": 1.9356, "step": 21735000 }, { "epoch": 62.92, "learning_rate": 1.855360926500556e-05, "loss": 1.9551, "step": 21735500 }, { "epoch": 62.92, "learning_rate": 1.8552885617358282e-05, "loss": 1.9512, "step": 21736000 }, { "epoch": 62.92, "learning_rate": 1.8552161969711004e-05, "loss": 1.9397, "step": 21736500 }, { "epoch": 62.92, "learning_rate": 1.8551438322063726e-05, "loss": 1.9572, "step": 21737000 }, { "epoch": 62.92, "learning_rate": 1.855071467441645e-05, "loss": 1.9221, "step": 21737500 }, { "epoch": 62.92, "learning_rate": 1.854999247406447e-05, "loss": 1.9458, "step": 21738000 }, { "epoch": 62.92, "learning_rate": 1.8549268826417193e-05, "loss": 1.921, "step": 21738500 }, { "epoch": 62.93, "learning_rate": 1.8548545178769915e-05, "loss": 1.9411, "step": 21739000 }, { "epoch": 62.93, "learning_rate": 1.8547821531122638e-05, "loss": 1.9331, "step": 21739500 }, { "epoch": 62.93, "learning_rate": 1.8547097883475363e-05, "loss": 1.9357, "step": 21740000 }, { "epoch": 62.93, "learning_rate": 1.8546374235828085e-05, "loss": 1.921, "step": 21740500 }, { "epoch": 62.93, "learning_rate": 1.8545650588180808e-05, "loss": 1.9451, "step": 21741000 }, { "epoch": 62.93, "learning_rate": 1.8544926940533533e-05, "loss": 1.9383, "step": 21741500 }, { "epoch": 62.93, "learning_rate": 1.8544206187476846e-05, "loss": 1.9585, "step": 21742000 }, { "epoch": 62.94, "learning_rate": 1.8543482539829568e-05, "loss": 1.928, "step": 21742500 }, { "epoch": 62.94, "learning_rate": 1.854275889218229e-05, "loss": 1.9486, "step": 21743000 }, { "epoch": 62.94, "learning_rate": 1.8542035244535012e-05, "loss": 1.9244, "step": 21743500 }, { "epoch": 62.94, "learning_rate": 1.8541311596887738e-05, "loss": 1.9621, "step": 21744000 }, { "epoch": 62.94, "learning_rate": 1.854058794924046e-05, "loss": 1.9304, "step": 21744500 }, { "epoch": 62.94, "learning_rate": 1.8539864301593182e-05, "loss": 1.964, "step": 21745000 }, { "epoch": 62.94, "learning_rate": 1.8539140653945908e-05, "loss": 1.9433, "step": 21745500 }, { "epoch": 62.95, "learning_rate": 1.853841700629863e-05, "loss": 1.9325, "step": 21746000 }, { "epoch": 62.95, "learning_rate": 1.853769480594665e-05, "loss": 1.9374, "step": 21746500 }, { "epoch": 62.95, "learning_rate": 1.853697115829937e-05, "loss": 1.9389, "step": 21747000 }, { "epoch": 62.95, "learning_rate": 1.8536247510652094e-05, "loss": 1.9499, "step": 21747500 }, { "epoch": 62.95, "learning_rate": 1.8535523863004816e-05, "loss": 1.9268, "step": 21748000 }, { "epoch": 62.95, "learning_rate": 1.8534800215357538e-05, "loss": 1.9337, "step": 21748500 }, { "epoch": 62.95, "learning_rate": 1.853407801500556e-05, "loss": 1.9311, "step": 21749000 }, { "epoch": 62.96, "learning_rate": 1.8533354367358283e-05, "loss": 1.9268, "step": 21749500 }, { "epoch": 62.96, "learning_rate": 1.8532630719711005e-05, "loss": 1.916, "step": 21750000 }, { "epoch": 62.96, "learning_rate": 1.8531907072063727e-05, "loss": 1.97, "step": 21750500 }, { "epoch": 62.96, "learning_rate": 1.8531184871711746e-05, "loss": 1.9504, "step": 21751000 }, { "epoch": 62.96, "learning_rate": 1.853046122406447e-05, "loss": 1.9431, "step": 21751500 }, { "epoch": 62.96, "learning_rate": 1.852973757641719e-05, "loss": 1.9472, "step": 21752000 }, { "epoch": 62.96, "learning_rate": 1.8529013928769913e-05, "loss": 1.9313, "step": 21752500 }, { "epoch": 62.97, "learning_rate": 1.852829028112264e-05, "loss": 1.9454, "step": 21753000 }, { "epoch": 62.97, "learning_rate": 1.8527566633475364e-05, "loss": 1.9694, "step": 21753500 }, { "epoch": 62.97, "learning_rate": 1.8526842985828086e-05, "loss": 1.9375, "step": 21754000 }, { "epoch": 62.97, "learning_rate": 1.852611933818081e-05, "loss": 1.9624, "step": 21754500 }, { "epoch": 62.97, "learning_rate": 1.8525397137828827e-05, "loss": 1.9276, "step": 21755000 }, { "epoch": 62.97, "learning_rate": 1.852467349018155e-05, "loss": 1.9319, "step": 21755500 }, { "epoch": 62.97, "learning_rate": 1.8523949842534272e-05, "loss": 1.9442, "step": 21756000 }, { "epoch": 62.98, "learning_rate": 1.8523226194886998e-05, "loss": 1.9415, "step": 21756500 }, { "epoch": 62.98, "learning_rate": 1.852250254723972e-05, "loss": 1.9402, "step": 21757000 }, { "epoch": 62.98, "learning_rate": 1.852178034688774e-05, "loss": 1.9534, "step": 21757500 }, { "epoch": 62.98, "learning_rate": 1.852105669924046e-05, "loss": 1.9264, "step": 21758000 }, { "epoch": 62.98, "learning_rate": 1.8520333051593183e-05, "loss": 1.9293, "step": 21758500 }, { "epoch": 62.98, "learning_rate": 1.8519609403945905e-05, "loss": 1.9361, "step": 21759000 }, { "epoch": 62.98, "learning_rate": 1.8518885756298628e-05, "loss": 1.9216, "step": 21759500 }, { "epoch": 62.99, "learning_rate": 1.8518162108651353e-05, "loss": 1.9241, "step": 21760000 }, { "epoch": 62.99, "learning_rate": 1.851743846100408e-05, "loss": 1.929, "step": 21760500 }, { "epoch": 62.99, "learning_rate": 1.85167148133568e-05, "loss": 1.9233, "step": 21761000 }, { "epoch": 62.99, "learning_rate": 1.8515991165709523e-05, "loss": 1.928, "step": 21761500 }, { "epoch": 62.99, "learning_rate": 1.8515270412652836e-05, "loss": 1.9424, "step": 21762000 }, { "epoch": 62.99, "learning_rate": 1.8514546765005558e-05, "loss": 1.9531, "step": 21762500 }, { "epoch": 62.99, "learning_rate": 1.851382311735828e-05, "loss": 1.9544, "step": 21763000 }, { "epoch": 63.0, "learning_rate": 1.85131009170063e-05, "loss": 1.9427, "step": 21763500 }, { "epoch": 63.0, "learning_rate": 1.851237726935902e-05, "loss": 1.9364, "step": 21764000 }, { "epoch": 63.0, "learning_rate": 1.8511653621711747e-05, "loss": 1.9414, "step": 21764500 }, { "epoch": 63.0, "eval_accuracy": 0.6786074424753182, "eval_accuracy_mlm": 0.6457583512378033, "eval_accuracy_nsp": 0.8546264087298859, "eval_loss": 2.1795690059661865, "eval_runtime": 331.7441, "eval_samples_per_second": 1315.43, "eval_steps_per_second": 54.81, "step": 21764736 }, { "epoch": 63.0, "learning_rate": 1.851092997406447e-05, "loss": 1.9002, "step": 21765000 }, { "epoch": 63.0, "learning_rate": 1.851020632641719e-05, "loss": 1.9126, "step": 21765500 }, { "epoch": 63.0, "learning_rate": 1.8509482678769917e-05, "loss": 1.9417, "step": 21766000 }, { "epoch": 63.01, "learning_rate": 1.850875903112264e-05, "loss": 1.9187, "step": 21766500 }, { "epoch": 63.01, "learning_rate": 1.8508036830770655e-05, "loss": 1.9364, "step": 21767000 }, { "epoch": 63.01, "learning_rate": 1.8507313183123377e-05, "loss": 1.8871, "step": 21767500 }, { "epoch": 63.01, "learning_rate": 1.8506589535476103e-05, "loss": 1.9121, "step": 21768000 }, { "epoch": 63.01, "learning_rate": 1.8505865887828828e-05, "loss": 1.9, "step": 21768500 }, { "epoch": 63.01, "learning_rate": 1.850514224018155e-05, "loss": 1.9366, "step": 21769000 }, { "epoch": 63.01, "learning_rate": 1.8504418592534273e-05, "loss": 1.9087, "step": 21769500 }, { "epoch": 63.02, "learning_rate": 1.850369639218229e-05, "loss": 1.9076, "step": 21770000 }, { "epoch": 63.02, "learning_rate": 1.8502972744535014e-05, "loss": 1.9231, "step": 21770500 }, { "epoch": 63.02, "learning_rate": 1.8502249096887736e-05, "loss": 1.9051, "step": 21771000 }, { "epoch": 63.02, "learning_rate": 1.8501525449240462e-05, "loss": 1.9335, "step": 21771500 }, { "epoch": 63.02, "learning_rate": 1.8500801801593184e-05, "loss": 1.9231, "step": 21772000 }, { "epoch": 63.02, "learning_rate": 1.8500078153945906e-05, "loss": 1.9188, "step": 21772500 }, { "epoch": 63.02, "learning_rate": 1.849935450629863e-05, "loss": 1.9331, "step": 21773000 }, { "epoch": 63.03, "learning_rate": 1.8498630858651354e-05, "loss": 1.9096, "step": 21773500 }, { "epoch": 63.03, "learning_rate": 1.8497907211004076e-05, "loss": 1.9334, "step": 21774000 }, { "epoch": 63.03, "learning_rate": 1.84971835633568e-05, "loss": 1.9159, "step": 21774500 }, { "epoch": 63.03, "learning_rate": 1.8496459915709524e-05, "loss": 1.9133, "step": 21775000 }, { "epoch": 63.03, "learning_rate": 1.8495737715357543e-05, "loss": 1.9272, "step": 21775500 }, { "epoch": 63.03, "learning_rate": 1.8495014067710265e-05, "loss": 1.9133, "step": 21776000 }, { "epoch": 63.03, "learning_rate": 1.8494290420062988e-05, "loss": 1.921, "step": 21776500 }, { "epoch": 63.04, "learning_rate": 1.849356677241571e-05, "loss": 1.8924, "step": 21777000 }, { "epoch": 63.04, "learning_rate": 1.8492843124768432e-05, "loss": 1.8985, "step": 21777500 }, { "epoch": 63.04, "learning_rate": 1.849212092441645e-05, "loss": 1.9405, "step": 21778000 }, { "epoch": 63.04, "learning_rate": 1.8491397276769173e-05, "loss": 1.9272, "step": 21778500 }, { "epoch": 63.04, "learning_rate": 1.84906736291219e-05, "loss": 1.9145, "step": 21779000 }, { "epoch": 63.04, "learning_rate": 1.848994998147462e-05, "loss": 1.9226, "step": 21779500 }, { "epoch": 63.04, "learning_rate": 1.848922778112264e-05, "loss": 1.9148, "step": 21780000 }, { "epoch": 63.05, "learning_rate": 1.8488504133475362e-05, "loss": 1.9413, "step": 21780500 }, { "epoch": 63.05, "learning_rate": 1.8487780485828084e-05, "loss": 1.9344, "step": 21781000 }, { "epoch": 63.05, "learning_rate": 1.8487056838180807e-05, "loss": 1.8985, "step": 21781500 }, { "epoch": 63.05, "learning_rate": 1.8486334637828826e-05, "loss": 1.909, "step": 21782000 }, { "epoch": 63.05, "learning_rate": 1.8485610990181548e-05, "loss": 1.9206, "step": 21782500 }, { "epoch": 63.05, "learning_rate": 1.8484887342534274e-05, "loss": 1.9277, "step": 21783000 }, { "epoch": 63.05, "learning_rate": 1.8484163694886996e-05, "loss": 1.9363, "step": 21783500 }, { "epoch": 63.06, "learning_rate": 1.8483440047239718e-05, "loss": 1.9197, "step": 21784000 }, { "epoch": 63.06, "learning_rate": 1.8482716399592444e-05, "loss": 1.9167, "step": 21784500 }, { "epoch": 63.06, "learning_rate": 1.8481992751945166e-05, "loss": 1.9347, "step": 21785000 }, { "epoch": 63.06, "learning_rate": 1.8481269104297888e-05, "loss": 1.9324, "step": 21785500 }, { "epoch": 63.06, "learning_rate": 1.848054545665061e-05, "loss": 1.9267, "step": 21786000 }, { "epoch": 63.06, "learning_rate": 1.8479821809003336e-05, "loss": 1.9447, "step": 21786500 }, { "epoch": 63.06, "learning_rate": 1.8479099608651355e-05, "loss": 1.9203, "step": 21787000 }, { "epoch": 63.07, "learning_rate": 1.8478375961004077e-05, "loss": 1.9242, "step": 21787500 }, { "epoch": 63.07, "learning_rate": 1.84776523133568e-05, "loss": 1.9245, "step": 21788000 }, { "epoch": 63.07, "learning_rate": 1.847692866570952e-05, "loss": 1.9251, "step": 21788500 }, { "epoch": 63.07, "learning_rate": 1.8476205018062244e-05, "loss": 1.925, "step": 21789000 }, { "epoch": 63.07, "learning_rate": 1.847548137041497e-05, "loss": 1.92, "step": 21789500 }, { "epoch": 63.07, "learning_rate": 1.847475917006299e-05, "loss": 1.9276, "step": 21790000 }, { "epoch": 63.07, "learning_rate": 1.847403552241571e-05, "loss": 1.927, "step": 21790500 }, { "epoch": 63.08, "learning_rate": 1.8473311874768433e-05, "loss": 1.9275, "step": 21791000 }, { "epoch": 63.08, "learning_rate": 1.847258822712116e-05, "loss": 1.9068, "step": 21791500 }, { "epoch": 63.08, "learning_rate": 1.8471866026769174e-05, "loss": 1.9314, "step": 21792000 }, { "epoch": 63.08, "learning_rate": 1.8471142379121896e-05, "loss": 1.9127, "step": 21792500 }, { "epoch": 63.08, "learning_rate": 1.8470418731474622e-05, "loss": 1.9267, "step": 21793000 }, { "epoch": 63.08, "learning_rate": 1.8469695083827344e-05, "loss": 1.9219, "step": 21793500 }, { "epoch": 63.08, "learning_rate": 1.846897143618007e-05, "loss": 1.9129, "step": 21794000 }, { "epoch": 63.09, "learning_rate": 1.8468249235828085e-05, "loss": 1.924, "step": 21794500 }, { "epoch": 63.09, "learning_rate": 1.8467527035476104e-05, "loss": 1.9337, "step": 21795000 }, { "epoch": 63.09, "learning_rate": 1.8466803387828826e-05, "loss": 1.913, "step": 21795500 }, { "epoch": 63.09, "learning_rate": 1.846607974018155e-05, "loss": 1.9202, "step": 21796000 }, { "epoch": 63.09, "learning_rate": 1.846535609253427e-05, "loss": 1.9305, "step": 21796500 }, { "epoch": 63.09, "learning_rate": 1.8464632444886997e-05, "loss": 1.9178, "step": 21797000 }, { "epoch": 63.09, "learning_rate": 1.8463908797239722e-05, "loss": 1.9145, "step": 21797500 }, { "epoch": 63.1, "learning_rate": 1.8463185149592444e-05, "loss": 1.9155, "step": 21798000 }, { "epoch": 63.1, "learning_rate": 1.8462461501945167e-05, "loss": 1.9323, "step": 21798500 }, { "epoch": 63.1, "learning_rate": 1.8461739301593182e-05, "loss": 1.929, "step": 21799000 }, { "epoch": 63.1, "learning_rate": 1.8461015653945908e-05, "loss": 1.9164, "step": 21799500 }, { "epoch": 63.1, "learning_rate": 1.846029200629863e-05, "loss": 1.9391, "step": 21800000 }, { "epoch": 63.1, "learning_rate": 1.8459568358651352e-05, "loss": 1.8975, "step": 21800500 }, { "epoch": 63.1, "learning_rate": 1.8458844711004075e-05, "loss": 1.9119, "step": 21801000 }, { "epoch": 63.11, "learning_rate": 1.8458122510652097e-05, "loss": 1.9458, "step": 21801500 }, { "epoch": 63.11, "learning_rate": 1.845739886300482e-05, "loss": 1.9002, "step": 21802000 }, { "epoch": 63.11, "learning_rate": 1.845667521535754e-05, "loss": 1.9169, "step": 21802500 }, { "epoch": 63.11, "learning_rate": 1.8455951567710264e-05, "loss": 1.9251, "step": 21803000 }, { "epoch": 63.11, "learning_rate": 1.8455227920062986e-05, "loss": 1.9138, "step": 21803500 }, { "epoch": 63.11, "learning_rate": 1.8454505719711005e-05, "loss": 1.9424, "step": 21804000 }, { "epoch": 63.12, "learning_rate": 1.8453782072063727e-05, "loss": 1.9015, "step": 21804500 }, { "epoch": 63.12, "learning_rate": 1.8453059871711746e-05, "loss": 1.9296, "step": 21805000 }, { "epoch": 63.12, "learning_rate": 1.845233622406447e-05, "loss": 1.923, "step": 21805500 }, { "epoch": 63.12, "learning_rate": 1.8451612576417194e-05, "loss": 1.9246, "step": 21806000 }, { "epoch": 63.12, "learning_rate": 1.845089037606521e-05, "loss": 1.9194, "step": 21806500 }, { "epoch": 63.12, "learning_rate": 1.8450166728417935e-05, "loss": 1.9242, "step": 21807000 }, { "epoch": 63.12, "learning_rate": 1.8449443080770657e-05, "loss": 1.9258, "step": 21807500 }, { "epoch": 63.13, "learning_rate": 1.844871943312338e-05, "loss": 1.913, "step": 21808000 }, { "epoch": 63.13, "learning_rate": 1.84479957854761e-05, "loss": 1.9155, "step": 21808500 }, { "epoch": 63.13, "learning_rate": 1.8447272137828827e-05, "loss": 1.9135, "step": 21809000 }, { "epoch": 63.13, "learning_rate": 1.844654849018155e-05, "loss": 1.9527, "step": 21809500 }, { "epoch": 63.13, "learning_rate": 1.8445824842534272e-05, "loss": 1.9259, "step": 21810000 }, { "epoch": 63.13, "learning_rate": 1.8445101194886997e-05, "loss": 1.9249, "step": 21810500 }, { "epoch": 63.13, "learning_rate": 1.844437754723972e-05, "loss": 1.9075, "step": 21811000 }, { "epoch": 63.14, "learning_rate": 1.8443653899592442e-05, "loss": 1.9066, "step": 21811500 }, { "epoch": 63.14, "learning_rate": 1.8442930251945164e-05, "loss": 1.9088, "step": 21812000 }, { "epoch": 63.14, "learning_rate": 1.844220660429789e-05, "loss": 1.9249, "step": 21812500 }, { "epoch": 63.14, "learning_rate": 1.8441482956650612e-05, "loss": 1.9319, "step": 21813000 }, { "epoch": 63.14, "learning_rate": 1.844076075629863e-05, "loss": 1.9045, "step": 21813500 }, { "epoch": 63.14, "learning_rate": 1.8440037108651353e-05, "loss": 1.9546, "step": 21814000 }, { "epoch": 63.14, "learning_rate": 1.8439313461004075e-05, "loss": 1.8945, "step": 21814500 }, { "epoch": 63.15, "learning_rate": 1.8438591260652094e-05, "loss": 1.9208, "step": 21815000 }, { "epoch": 63.15, "learning_rate": 1.8437867613004816e-05, "loss": 1.9175, "step": 21815500 }, { "epoch": 63.15, "learning_rate": 1.843714396535754e-05, "loss": 1.9116, "step": 21816000 }, { "epoch": 63.15, "learning_rate": 1.8436420317710264e-05, "loss": 1.9384, "step": 21816500 }, { "epoch": 63.15, "learning_rate": 1.8435696670062987e-05, "loss": 1.9083, "step": 21817000 }, { "epoch": 63.15, "learning_rate": 1.8434974469711006e-05, "loss": 1.9388, "step": 21817500 }, { "epoch": 63.15, "learning_rate": 1.8434250822063728e-05, "loss": 1.9263, "step": 21818000 }, { "epoch": 63.16, "learning_rate": 1.843352717441645e-05, "loss": 1.9143, "step": 21818500 }, { "epoch": 63.16, "learning_rate": 1.8432803526769176e-05, "loss": 1.9291, "step": 21819000 }, { "epoch": 63.16, "learning_rate": 1.8432079879121898e-05, "loss": 1.91, "step": 21819500 }, { "epoch": 63.16, "learning_rate": 1.8431356231474623e-05, "loss": 1.9322, "step": 21820000 }, { "epoch": 63.16, "learning_rate": 1.8430632583827346e-05, "loss": 1.929, "step": 21820500 }, { "epoch": 63.16, "learning_rate": 1.8429908936180068e-05, "loss": 1.9323, "step": 21821000 }, { "epoch": 63.16, "learning_rate": 1.842918528853279e-05, "loss": 1.9277, "step": 21821500 }, { "epoch": 63.17, "learning_rate": 1.842846308818081e-05, "loss": 1.9307, "step": 21822000 }, { "epoch": 63.17, "learning_rate": 1.842773944053353e-05, "loss": 1.9198, "step": 21822500 }, { "epoch": 63.17, "learning_rate": 1.8427015792886254e-05, "loss": 1.9263, "step": 21823000 }, { "epoch": 63.17, "learning_rate": 1.8426292145238976e-05, "loss": 1.902, "step": 21823500 }, { "epoch": 63.17, "learning_rate": 1.8425569944886998e-05, "loss": 1.9173, "step": 21824000 }, { "epoch": 63.17, "learning_rate": 1.842484629723972e-05, "loss": 1.9161, "step": 21824500 }, { "epoch": 63.17, "learning_rate": 1.8424122649592443e-05, "loss": 1.9308, "step": 21825000 }, { "epoch": 63.18, "learning_rate": 1.8423399001945165e-05, "loss": 1.9357, "step": 21825500 }, { "epoch": 63.18, "learning_rate": 1.8422676801593184e-05, "loss": 1.9379, "step": 21826000 }, { "epoch": 63.18, "learning_rate": 1.8421953153945906e-05, "loss": 1.9191, "step": 21826500 }, { "epoch": 63.18, "learning_rate": 1.8421229506298628e-05, "loss": 1.901, "step": 21827000 }, { "epoch": 63.18, "learning_rate": 1.8420505858651354e-05, "loss": 1.9225, "step": 21827500 }, { "epoch": 63.18, "learning_rate": 1.8419782211004076e-05, "loss": 1.9353, "step": 21828000 }, { "epoch": 63.18, "learning_rate": 1.8419058563356802e-05, "loss": 1.9268, "step": 21828500 }, { "epoch": 63.19, "learning_rate": 1.8418334915709524e-05, "loss": 1.9127, "step": 21829000 }, { "epoch": 63.19, "learning_rate": 1.8417611268062246e-05, "loss": 1.9369, "step": 21829500 }, { "epoch": 63.19, "learning_rate": 1.841688762041497e-05, "loss": 1.9218, "step": 21830000 }, { "epoch": 63.19, "learning_rate": 1.841616397276769e-05, "loss": 1.9309, "step": 21830500 }, { "epoch": 63.19, "learning_rate": 1.841544177241571e-05, "loss": 1.9382, "step": 21831000 }, { "epoch": 63.19, "learning_rate": 1.841471957206373e-05, "loss": 1.9182, "step": 21831500 }, { "epoch": 63.19, "learning_rate": 1.841399592441645e-05, "loss": 1.9273, "step": 21832000 }, { "epoch": 63.2, "learning_rate": 1.8413272276769176e-05, "loss": 1.9283, "step": 21832500 }, { "epoch": 63.2, "learning_rate": 1.84125486291219e-05, "loss": 1.9281, "step": 21833000 }, { "epoch": 63.2, "learning_rate": 1.8411826428769914e-05, "loss": 1.9269, "step": 21833500 }, { "epoch": 63.2, "learning_rate": 1.8411104228417933e-05, "loss": 1.9174, "step": 21834000 }, { "epoch": 63.2, "learning_rate": 1.8410380580770655e-05, "loss": 1.9202, "step": 21834500 }, { "epoch": 63.2, "learning_rate": 1.8409656933123378e-05, "loss": 1.9381, "step": 21835000 }, { "epoch": 63.2, "learning_rate": 1.8408933285476103e-05, "loss": 1.9177, "step": 21835500 }, { "epoch": 63.21, "learning_rate": 1.8408209637828825e-05, "loss": 1.9294, "step": 21836000 }, { "epoch": 63.21, "learning_rate": 1.840748599018155e-05, "loss": 1.9087, "step": 21836500 }, { "epoch": 63.21, "learning_rate": 1.8406762342534273e-05, "loss": 1.9309, "step": 21837000 }, { "epoch": 63.21, "learning_rate": 1.8406038694886996e-05, "loss": 1.899, "step": 21837500 }, { "epoch": 63.21, "learning_rate": 1.8405315047239718e-05, "loss": 1.9262, "step": 21838000 }, { "epoch": 63.21, "learning_rate": 1.840459139959244e-05, "loss": 1.9085, "step": 21838500 }, { "epoch": 63.21, "learning_rate": 1.8403867751945166e-05, "loss": 1.9057, "step": 21839000 }, { "epoch": 63.22, "learning_rate": 1.840314410429789e-05, "loss": 1.937, "step": 21839500 }, { "epoch": 63.22, "learning_rate": 1.8402421903945907e-05, "loss": 1.9186, "step": 21840000 }, { "epoch": 63.22, "learning_rate": 1.840169825629863e-05, "loss": 1.9226, "step": 21840500 }, { "epoch": 63.22, "learning_rate": 1.8400976055946648e-05, "loss": 1.9029, "step": 21841000 }, { "epoch": 63.22, "learning_rate": 1.840025240829937e-05, "loss": 1.892, "step": 21841500 }, { "epoch": 63.22, "learning_rate": 1.839953020794739e-05, "loss": 1.9291, "step": 21842000 }, { "epoch": 63.23, "learning_rate": 1.839880656030011e-05, "loss": 1.9039, "step": 21842500 }, { "epoch": 63.23, "learning_rate": 1.8398082912652837e-05, "loss": 1.9152, "step": 21843000 }, { "epoch": 63.23, "learning_rate": 1.839735926500556e-05, "loss": 1.9169, "step": 21843500 }, { "epoch": 63.23, "learning_rate": 1.839663561735828e-05, "loss": 1.9459, "step": 21844000 }, { "epoch": 63.23, "learning_rate": 1.8395911969711004e-05, "loss": 1.9169, "step": 21844500 }, { "epoch": 63.23, "learning_rate": 1.839518832206373e-05, "loss": 1.9205, "step": 21845000 }, { "epoch": 63.23, "learning_rate": 1.839446467441645e-05, "loss": 1.9075, "step": 21845500 }, { "epoch": 63.24, "learning_rate": 1.8393742474064467e-05, "loss": 1.9518, "step": 21846000 }, { "epoch": 63.24, "learning_rate": 1.8393020273712486e-05, "loss": 1.9313, "step": 21846500 }, { "epoch": 63.24, "learning_rate": 1.8392296626065212e-05, "loss": 1.9255, "step": 21847000 }, { "epoch": 63.24, "learning_rate": 1.8391572978417934e-05, "loss": 1.9214, "step": 21847500 }, { "epoch": 63.24, "learning_rate": 1.8390849330770656e-05, "loss": 1.9231, "step": 21848000 }, { "epoch": 63.24, "learning_rate": 1.839012568312338e-05, "loss": 1.9468, "step": 21848500 }, { "epoch": 63.24, "learning_rate": 1.8389402035476104e-05, "loss": 1.9213, "step": 21849000 }, { "epoch": 63.25, "learning_rate": 1.8388678387828826e-05, "loss": 1.9409, "step": 21849500 }, { "epoch": 63.25, "learning_rate": 1.838795474018155e-05, "loss": 1.9452, "step": 21850000 }, { "epoch": 63.25, "learning_rate": 1.8387231092534274e-05, "loss": 1.9139, "step": 21850500 }, { "epoch": 63.25, "learning_rate": 1.8386508892182293e-05, "loss": 1.9205, "step": 21851000 }, { "epoch": 63.25, "learning_rate": 1.8385785244535015e-05, "loss": 1.9158, "step": 21851500 }, { "epoch": 63.25, "learning_rate": 1.8385061596887738e-05, "loss": 1.9228, "step": 21852000 }, { "epoch": 63.25, "learning_rate": 1.838433794924046e-05, "loss": 1.9114, "step": 21852500 }, { "epoch": 63.26, "learning_rate": 1.8383614301593182e-05, "loss": 1.9479, "step": 21853000 }, { "epoch": 63.26, "learning_rate": 1.8382890653945904e-05, "loss": 1.9233, "step": 21853500 }, { "epoch": 63.26, "learning_rate": 1.8382168453593927e-05, "loss": 1.9289, "step": 21854000 }, { "epoch": 63.26, "learning_rate": 1.838144480594665e-05, "loss": 1.9213, "step": 21854500 }, { "epoch": 63.26, "learning_rate": 1.838072115829937e-05, "loss": 1.9206, "step": 21855000 }, { "epoch": 63.26, "learning_rate": 1.8379997510652093e-05, "loss": 1.9314, "step": 21855500 }, { "epoch": 63.26, "learning_rate": 1.837927386300482e-05, "loss": 1.9328, "step": 21856000 }, { "epoch": 63.27, "learning_rate": 1.837855021535754e-05, "loss": 1.9174, "step": 21856500 }, { "epoch": 63.27, "learning_rate": 1.8377826567710263e-05, "loss": 1.9151, "step": 21857000 }, { "epoch": 63.27, "learning_rate": 1.837710292006299e-05, "loss": 1.9083, "step": 21857500 }, { "epoch": 63.27, "learning_rate": 1.8376380719711005e-05, "loss": 1.9432, "step": 21858000 }, { "epoch": 63.27, "learning_rate": 1.837565707206373e-05, "loss": 1.9113, "step": 21858500 }, { "epoch": 63.27, "learning_rate": 1.8374934871711746e-05, "loss": 1.928, "step": 21859000 }, { "epoch": 63.27, "learning_rate": 1.8374211224064468e-05, "loss": 1.9708, "step": 21859500 }, { "epoch": 63.28, "learning_rate": 1.8373487576417194e-05, "loss": 1.9052, "step": 21860000 }, { "epoch": 63.28, "learning_rate": 1.8372763928769916e-05, "loss": 1.9223, "step": 21860500 }, { "epoch": 63.28, "learning_rate": 1.837204172841793e-05, "loss": 1.9187, "step": 21861000 }, { "epoch": 63.28, "learning_rate": 1.8371318080770657e-05, "loss": 1.9387, "step": 21861500 }, { "epoch": 63.28, "learning_rate": 1.8370594433123383e-05, "loss": 1.9503, "step": 21862000 }, { "epoch": 63.28, "learning_rate": 1.8369870785476105e-05, "loss": 1.94, "step": 21862500 }, { "epoch": 63.28, "learning_rate": 1.8369147137828827e-05, "loss": 1.9189, "step": 21863000 }, { "epoch": 63.29, "learning_rate": 1.8368424937476843e-05, "loss": 1.9251, "step": 21863500 }, { "epoch": 63.29, "learning_rate": 1.8367701289829568e-05, "loss": 1.9233, "step": 21864000 }, { "epoch": 63.29, "learning_rate": 1.836697764218229e-05, "loss": 1.9389, "step": 21864500 }, { "epoch": 63.29, "learning_rate": 1.8366253994535013e-05, "loss": 1.9206, "step": 21865000 }, { "epoch": 63.29, "learning_rate": 1.836553179418303e-05, "loss": 1.8982, "step": 21865500 }, { "epoch": 63.29, "learning_rate": 1.8364808146535757e-05, "loss": 1.9199, "step": 21866000 }, { "epoch": 63.29, "learning_rate": 1.836408449888848e-05, "loss": 1.9161, "step": 21866500 }, { "epoch": 63.3, "learning_rate": 1.8363360851241202e-05, "loss": 1.9235, "step": 21867000 }, { "epoch": 63.3, "learning_rate": 1.8362637203593924e-05, "loss": 1.9052, "step": 21867500 }, { "epoch": 63.3, "learning_rate": 1.8361913555946646e-05, "loss": 1.943, "step": 21868000 }, { "epoch": 63.3, "learning_rate": 1.836118990829937e-05, "loss": 1.9181, "step": 21868500 }, { "epoch": 63.3, "learning_rate": 1.8360466260652094e-05, "loss": 1.9343, "step": 21869000 }, { "epoch": 63.3, "learning_rate": 1.835974261300482e-05, "loss": 1.9314, "step": 21869500 }, { "epoch": 63.3, "learning_rate": 1.8359018965357542e-05, "loss": 1.9074, "step": 21870000 }, { "epoch": 63.31, "learning_rate": 1.8358295317710264e-05, "loss": 1.9253, "step": 21870500 }, { "epoch": 63.31, "learning_rate": 1.8357571670062986e-05, "loss": 1.9203, "step": 21871000 }, { "epoch": 63.31, "learning_rate": 1.835684802241571e-05, "loss": 1.9495, "step": 21871500 }, { "epoch": 63.31, "learning_rate": 1.8356125822063728e-05, "loss": 1.9201, "step": 21872000 }, { "epoch": 63.31, "learning_rate": 1.835540217441645e-05, "loss": 1.9184, "step": 21872500 }, { "epoch": 63.31, "learning_rate": 1.8354678526769175e-05, "loss": 1.9144, "step": 21873000 }, { "epoch": 63.31, "learning_rate": 1.8353954879121898e-05, "loss": 1.9391, "step": 21873500 }, { "epoch": 63.32, "learning_rate": 1.835323123147462e-05, "loss": 1.925, "step": 21874000 }, { "epoch": 63.32, "learning_rate": 1.8352510478417932e-05, "loss": 1.9397, "step": 21874500 }, { "epoch": 63.32, "learning_rate": 1.8351786830770658e-05, "loss": 1.9172, "step": 21875000 }, { "epoch": 63.32, "learning_rate": 1.835106318312338e-05, "loss": 1.9361, "step": 21875500 }, { "epoch": 63.32, "learning_rate": 1.8350339535476102e-05, "loss": 1.9143, "step": 21876000 }, { "epoch": 63.32, "learning_rate": 1.8349615887828828e-05, "loss": 1.9225, "step": 21876500 }, { "epoch": 63.32, "learning_rate": 1.834889224018155e-05, "loss": 1.9318, "step": 21877000 }, { "epoch": 63.33, "learning_rate": 1.8348168592534272e-05, "loss": 1.9272, "step": 21877500 }, { "epoch": 63.33, "learning_rate": 1.8347444944886995e-05, "loss": 1.9295, "step": 21878000 }, { "epoch": 63.33, "learning_rate": 1.834672129723972e-05, "loss": 1.9311, "step": 21878500 }, { "epoch": 63.33, "learning_rate": 1.8346000544183032e-05, "loss": 1.9188, "step": 21879000 }, { "epoch": 63.33, "learning_rate": 1.8345276896535755e-05, "loss": 1.9214, "step": 21879500 }, { "epoch": 63.33, "learning_rate": 1.8344553248888477e-05, "loss": 1.9248, "step": 21880000 }, { "epoch": 63.34, "learning_rate": 1.8343829601241203e-05, "loss": 1.9294, "step": 21880500 }, { "epoch": 63.34, "learning_rate": 1.8343105953593925e-05, "loss": 1.9323, "step": 21881000 }, { "epoch": 63.34, "learning_rate": 1.8342383753241944e-05, "loss": 1.9154, "step": 21881500 }, { "epoch": 63.34, "learning_rate": 1.8341660105594666e-05, "loss": 1.9401, "step": 21882000 }, { "epoch": 63.34, "learning_rate": 1.8340936457947388e-05, "loss": 1.9407, "step": 21882500 }, { "epoch": 63.34, "learning_rate": 1.834021281030011e-05, "loss": 1.9063, "step": 21883000 }, { "epoch": 63.34, "learning_rate": 1.8339489162652833e-05, "loss": 1.9107, "step": 21883500 }, { "epoch": 63.35, "learning_rate": 1.8338765515005558e-05, "loss": 1.9468, "step": 21884000 }, { "epoch": 63.35, "learning_rate": 1.8338041867358284e-05, "loss": 1.9264, "step": 21884500 }, { "epoch": 63.35, "learning_rate": 1.8337318219711006e-05, "loss": 1.9195, "step": 21885000 }, { "epoch": 63.35, "learning_rate": 1.833659601935902e-05, "loss": 1.9256, "step": 21885500 }, { "epoch": 63.35, "learning_rate": 1.8335875266302334e-05, "loss": 1.9258, "step": 21886000 }, { "epoch": 63.35, "learning_rate": 1.833515161865506e-05, "loss": 1.8944, "step": 21886500 }, { "epoch": 63.35, "learning_rate": 1.8334429418303075e-05, "loss": 1.9365, "step": 21887000 }, { "epoch": 63.36, "learning_rate": 1.8333705770655797e-05, "loss": 1.9217, "step": 21887500 }, { "epoch": 63.36, "learning_rate": 1.8332982123008523e-05, "loss": 1.9214, "step": 21888000 }, { "epoch": 63.36, "learning_rate": 1.833225847536125e-05, "loss": 1.9269, "step": 21888500 }, { "epoch": 63.36, "learning_rate": 1.833153482771397e-05, "loss": 1.9487, "step": 21889000 }, { "epoch": 63.36, "learning_rate": 1.8330811180066693e-05, "loss": 1.9257, "step": 21889500 }, { "epoch": 63.36, "learning_rate": 1.8330087532419415e-05, "loss": 1.9279, "step": 21890000 }, { "epoch": 63.36, "learning_rate": 1.8329363884772138e-05, "loss": 1.9179, "step": 21890500 }, { "epoch": 63.37, "learning_rate": 1.832864023712486e-05, "loss": 1.9317, "step": 21891000 }, { "epoch": 63.37, "learning_rate": 1.8327916589477585e-05, "loss": 1.9194, "step": 21891500 }, { "epoch": 63.37, "learning_rate": 1.832719294183031e-05, "loss": 1.9188, "step": 21892000 }, { "epoch": 63.37, "learning_rate": 1.8326469294183033e-05, "loss": 1.9189, "step": 21892500 }, { "epoch": 63.37, "learning_rate": 1.8325745646535755e-05, "loss": 1.934, "step": 21893000 }, { "epoch": 63.37, "learning_rate": 1.8325021998888478e-05, "loss": 1.9191, "step": 21893500 }, { "epoch": 63.37, "learning_rate": 1.83242983512412e-05, "loss": 1.9081, "step": 21894000 }, { "epoch": 63.38, "learning_rate": 1.8323574703593922e-05, "loss": 1.9379, "step": 21894500 }, { "epoch": 63.38, "learning_rate": 1.832285250324194e-05, "loss": 1.9409, "step": 21895000 }, { "epoch": 63.38, "learning_rate": 1.8322128855594667e-05, "loss": 1.9181, "step": 21895500 }, { "epoch": 63.38, "learning_rate": 1.832140520794739e-05, "loss": 1.9236, "step": 21896000 }, { "epoch": 63.38, "learning_rate": 1.8320683007595408e-05, "loss": 1.9319, "step": 21896500 }, { "epoch": 63.38, "learning_rate": 1.831995935994813e-05, "loss": 1.9152, "step": 21897000 }, { "epoch": 63.38, "learning_rate": 1.8319235712300852e-05, "loss": 1.9268, "step": 21897500 }, { "epoch": 63.39, "learning_rate": 1.8318512064653575e-05, "loss": 1.9318, "step": 21898000 }, { "epoch": 63.39, "learning_rate": 1.8317789864301594e-05, "loss": 1.9291, "step": 21898500 }, { "epoch": 63.39, "learning_rate": 1.8317066216654316e-05, "loss": 1.9191, "step": 21899000 }, { "epoch": 63.39, "learning_rate": 1.831634256900704e-05, "loss": 1.9084, "step": 21899500 }, { "epoch": 63.39, "learning_rate": 1.8315618921359764e-05, "loss": 1.9212, "step": 21900000 }, { "epoch": 63.39, "learning_rate": 1.8314895273712486e-05, "loss": 1.9258, "step": 21900500 }, { "epoch": 63.39, "learning_rate": 1.831417162606521e-05, "loss": 1.9053, "step": 21901000 }, { "epoch": 63.4, "learning_rate": 1.8313447978417934e-05, "loss": 1.91, "step": 21901500 }, { "epoch": 63.4, "learning_rate": 1.8312724330770656e-05, "loss": 1.9361, "step": 21902000 }, { "epoch": 63.4, "learning_rate": 1.8312000683123378e-05, "loss": 1.9648, "step": 21902500 }, { "epoch": 63.4, "learning_rate": 1.8311281377361987e-05, "loss": 1.9229, "step": 21903000 }, { "epoch": 63.4, "learning_rate": 1.8310557729714713e-05, "loss": 1.9285, "step": 21903500 }, { "epoch": 63.4, "learning_rate": 1.8309834082067435e-05, "loss": 1.9242, "step": 21904000 }, { "epoch": 63.4, "learning_rate": 1.8309110434420157e-05, "loss": 1.9304, "step": 21904500 }, { "epoch": 63.41, "learning_rate": 1.830838678677288e-05, "loss": 1.9085, "step": 21905000 }, { "epoch": 63.41, "learning_rate": 1.8307663139125602e-05, "loss": 1.93, "step": 21905500 }, { "epoch": 63.41, "learning_rate": 1.8306939491478324e-05, "loss": 1.9054, "step": 21906000 }, { "epoch": 63.41, "learning_rate": 1.830621584383105e-05, "loss": 1.9292, "step": 21906500 }, { "epoch": 63.41, "learning_rate": 1.8305492196183775e-05, "loss": 1.9405, "step": 21907000 }, { "epoch": 63.41, "learning_rate": 1.8304768548536497e-05, "loss": 1.9351, "step": 21907500 }, { "epoch": 63.41, "learning_rate": 1.830404490088922e-05, "loss": 1.9429, "step": 21908000 }, { "epoch": 63.42, "learning_rate": 1.8303321253241942e-05, "loss": 1.935, "step": 21908500 }, { "epoch": 63.42, "learning_rate": 1.8302597605594664e-05, "loss": 1.9301, "step": 21909000 }, { "epoch": 63.42, "learning_rate": 1.8301873957947386e-05, "loss": 1.9239, "step": 21909500 }, { "epoch": 63.42, "learning_rate": 1.8301150310300112e-05, "loss": 1.9109, "step": 21910000 }, { "epoch": 63.42, "learning_rate": 1.8300426662652838e-05, "loss": 1.9258, "step": 21910500 }, { "epoch": 63.42, "learning_rate": 1.8299704462300853e-05, "loss": 1.9353, "step": 21911000 }, { "epoch": 63.42, "learning_rate": 1.8298980814653575e-05, "loss": 1.9204, "step": 21911500 }, { "epoch": 63.43, "learning_rate": 1.82982571670063e-05, "loss": 1.9163, "step": 21912000 }, { "epoch": 63.43, "learning_rate": 1.8297533519359023e-05, "loss": 1.9255, "step": 21912500 }, { "epoch": 63.43, "learning_rate": 1.829681131900704e-05, "loss": 1.9285, "step": 21913000 }, { "epoch": 63.43, "learning_rate": 1.8296087671359764e-05, "loss": 1.9175, "step": 21913500 }, { "epoch": 63.43, "learning_rate": 1.8295364023712487e-05, "loss": 1.9349, "step": 21914000 }, { "epoch": 63.43, "learning_rate": 1.8294640376065212e-05, "loss": 1.9387, "step": 21914500 }, { "epoch": 63.43, "learning_rate": 1.8293918175713228e-05, "loss": 1.9208, "step": 21915000 }, { "epoch": 63.44, "learning_rate": 1.829319452806595e-05, "loss": 1.9212, "step": 21915500 }, { "epoch": 63.44, "learning_rate": 1.829247232771397e-05, "loss": 1.9082, "step": 21916000 }, { "epoch": 63.44, "learning_rate": 1.829174868006669e-05, "loss": 1.9129, "step": 21916500 }, { "epoch": 63.44, "learning_rate": 1.8291025032419413e-05, "loss": 1.9369, "step": 21917000 }, { "epoch": 63.44, "learning_rate": 1.829030138477214e-05, "loss": 1.9079, "step": 21917500 }, { "epoch": 63.44, "learning_rate": 1.8289579184420155e-05, "loss": 1.928, "step": 21918000 }, { "epoch": 63.45, "learning_rate": 1.828885553677288e-05, "loss": 1.8974, "step": 21918500 }, { "epoch": 63.45, "learning_rate": 1.8288131889125603e-05, "loss": 1.9138, "step": 21919000 }, { "epoch": 63.45, "learning_rate": 1.8287408241478328e-05, "loss": 1.9266, "step": 21919500 }, { "epoch": 63.45, "learning_rate": 1.828668459383105e-05, "loss": 1.9103, "step": 21920000 }, { "epoch": 63.45, "learning_rate": 1.8285960946183773e-05, "loss": 1.9313, "step": 21920500 }, { "epoch": 63.45, "learning_rate": 1.8285237298536495e-05, "loss": 1.9275, "step": 21921000 }, { "epoch": 63.45, "learning_rate": 1.8284513650889217e-05, "loss": 1.9181, "step": 21921500 }, { "epoch": 63.46, "learning_rate": 1.8283790003241943e-05, "loss": 1.9569, "step": 21922000 }, { "epoch": 63.46, "learning_rate": 1.828306780288996e-05, "loss": 1.9367, "step": 21922500 }, { "epoch": 63.46, "learning_rate": 1.8282344155242684e-05, "loss": 1.9278, "step": 21923000 }, { "epoch": 63.46, "learning_rate": 1.8281620507595406e-05, "loss": 1.9243, "step": 21923500 }, { "epoch": 63.46, "learning_rate": 1.828089685994813e-05, "loss": 1.9176, "step": 21924000 }, { "epoch": 63.46, "learning_rate": 1.8280173212300854e-05, "loss": 1.9149, "step": 21924500 }, { "epoch": 63.46, "learning_rate": 1.8279449564653576e-05, "loss": 1.925, "step": 21925000 }, { "epoch": 63.47, "learning_rate": 1.8278725917006302e-05, "loss": 1.9323, "step": 21925500 }, { "epoch": 63.47, "learning_rate": 1.8278002269359024e-05, "loss": 1.9256, "step": 21926000 }, { "epoch": 63.47, "learning_rate": 1.8277278621711746e-05, "loss": 1.9363, "step": 21926500 }, { "epoch": 63.47, "learning_rate": 1.8276556421359765e-05, "loss": 1.9443, "step": 21927000 }, { "epoch": 63.47, "learning_rate": 1.8275832773712487e-05, "loss": 1.9136, "step": 21927500 }, { "epoch": 63.47, "learning_rate": 1.827510912606521e-05, "loss": 1.9297, "step": 21928000 }, { "epoch": 63.47, "learning_rate": 1.8274385478417932e-05, "loss": 1.9441, "step": 21928500 }, { "epoch": 63.48, "learning_rate": 1.827366327806595e-05, "loss": 1.9331, "step": 21929000 }, { "epoch": 63.48, "learning_rate": 1.8272939630418676e-05, "loss": 1.9283, "step": 21929500 }, { "epoch": 63.48, "learning_rate": 1.82722159827714e-05, "loss": 1.9263, "step": 21930000 }, { "epoch": 63.48, "learning_rate": 1.8271493782419418e-05, "loss": 1.9475, "step": 21930500 }, { "epoch": 63.48, "learning_rate": 1.827077013477214e-05, "loss": 1.9344, "step": 21931000 }, { "epoch": 63.48, "learning_rate": 1.8270046487124862e-05, "loss": 1.9234, "step": 21931500 }, { "epoch": 63.48, "learning_rate": 1.8269324286772878e-05, "loss": 1.9246, "step": 21932000 }, { "epoch": 63.49, "learning_rate": 1.8268600639125603e-05, "loss": 1.9403, "step": 21932500 }, { "epoch": 63.49, "learning_rate": 1.8267876991478326e-05, "loss": 1.9118, "step": 21933000 }, { "epoch": 63.49, "learning_rate": 1.826715334383105e-05, "loss": 1.9402, "step": 21933500 }, { "epoch": 63.49, "learning_rate": 1.8266429696183773e-05, "loss": 1.9293, "step": 21934000 }, { "epoch": 63.49, "learning_rate": 1.8265706048536496e-05, "loss": 1.928, "step": 21934500 }, { "epoch": 63.49, "learning_rate": 1.8264982400889218e-05, "loss": 1.9088, "step": 21935000 }, { "epoch": 63.49, "learning_rate": 1.826425875324194e-05, "loss": 1.9351, "step": 21935500 }, { "epoch": 63.5, "learning_rate": 1.8263535105594666e-05, "loss": 1.9279, "step": 21936000 }, { "epoch": 63.5, "learning_rate": 1.826281290524268e-05, "loss": 1.9319, "step": 21936500 }, { "epoch": 63.5, "learning_rate": 1.8262089257595407e-05, "loss": 1.9301, "step": 21937000 }, { "epoch": 63.5, "learning_rate": 1.826136560994813e-05, "loss": 1.9537, "step": 21937500 }, { "epoch": 63.5, "learning_rate": 1.8260643409596148e-05, "loss": 1.8993, "step": 21938000 }, { "epoch": 63.5, "learning_rate": 1.825991976194887e-05, "loss": 1.9285, "step": 21938500 }, { "epoch": 63.5, "learning_rate": 1.8259196114301593e-05, "loss": 1.9131, "step": 21939000 }, { "epoch": 63.51, "learning_rate": 1.8258472466654318e-05, "loss": 1.9314, "step": 21939500 }, { "epoch": 63.51, "learning_rate": 1.825774881900704e-05, "loss": 1.9308, "step": 21940000 }, { "epoch": 63.51, "learning_rate": 1.8257025171359766e-05, "loss": 1.9336, "step": 21940500 }, { "epoch": 63.51, "learning_rate": 1.8256301523712488e-05, "loss": 1.922, "step": 21941000 }, { "epoch": 63.51, "learning_rate": 1.825557787606521e-05, "loss": 1.9478, "step": 21941500 }, { "epoch": 63.51, "learning_rate": 1.8254854228417933e-05, "loss": 1.9136, "step": 21942000 }, { "epoch": 63.51, "learning_rate": 1.8254130580770655e-05, "loss": 1.9337, "step": 21942500 }, { "epoch": 63.52, "learning_rate": 1.825340693312338e-05, "loss": 1.9122, "step": 21943000 }, { "epoch": 63.52, "learning_rate": 1.8252683285476103e-05, "loss": 1.9261, "step": 21943500 }, { "epoch": 63.52, "learning_rate": 1.825195963782883e-05, "loss": 1.9301, "step": 21944000 }, { "epoch": 63.52, "learning_rate": 1.825123599018155e-05, "loss": 1.9265, "step": 21944500 }, { "epoch": 63.52, "learning_rate": 1.8250515237124863e-05, "loss": 1.9525, "step": 21945000 }, { "epoch": 63.52, "learning_rate": 1.8249791589477585e-05, "loss": 1.9135, "step": 21945500 }, { "epoch": 63.52, "learning_rate": 1.8249067941830307e-05, "loss": 1.9065, "step": 21946000 }, { "epoch": 63.53, "learning_rate": 1.824834429418303e-05, "loss": 1.914, "step": 21946500 }, { "epoch": 63.53, "learning_rate": 1.8247620646535755e-05, "loss": 1.9196, "step": 21947000 }, { "epoch": 63.53, "learning_rate": 1.824689844618377e-05, "loss": 1.9321, "step": 21947500 }, { "epoch": 63.53, "learning_rate": 1.8246174798536496e-05, "loss": 1.9118, "step": 21948000 }, { "epoch": 63.53, "learning_rate": 1.824545115088922e-05, "loss": 1.9345, "step": 21948500 }, { "epoch": 63.53, "learning_rate": 1.8244728950537238e-05, "loss": 1.9384, "step": 21949000 }, { "epoch": 63.53, "learning_rate": 1.824400530288996e-05, "loss": 1.9325, "step": 21949500 }, { "epoch": 63.54, "learning_rate": 1.8243281655242682e-05, "loss": 1.9041, "step": 21950000 }, { "epoch": 63.54, "learning_rate": 1.8242558007595408e-05, "loss": 1.9154, "step": 21950500 }, { "epoch": 63.54, "learning_rate": 1.824183435994813e-05, "loss": 1.9428, "step": 21951000 }, { "epoch": 63.54, "learning_rate": 1.8241110712300852e-05, "loss": 1.9194, "step": 21951500 }, { "epoch": 63.54, "learning_rate": 1.8240387064653578e-05, "loss": 1.9262, "step": 21952000 }, { "epoch": 63.54, "learning_rate": 1.82396634170063e-05, "loss": 1.9214, "step": 21952500 }, { "epoch": 63.54, "learning_rate": 1.8238939769359022e-05, "loss": 1.9115, "step": 21953000 }, { "epoch": 63.55, "learning_rate": 1.8238216121711744e-05, "loss": 1.9191, "step": 21953500 }, { "epoch": 63.55, "learning_rate": 1.823749247406447e-05, "loss": 1.9371, "step": 21954000 }, { "epoch": 63.55, "learning_rate": 1.8236770273712486e-05, "loss": 1.9028, "step": 21954500 }, { "epoch": 63.55, "learning_rate": 1.8236046626065208e-05, "loss": 1.9231, "step": 21955000 }, { "epoch": 63.55, "learning_rate": 1.8235324425713227e-05, "loss": 1.9292, "step": 21955500 }, { "epoch": 63.55, "learning_rate": 1.8234600778065952e-05, "loss": 1.9175, "step": 21956000 }, { "epoch": 63.56, "learning_rate": 1.8233877130418675e-05, "loss": 1.9002, "step": 21956500 }, { "epoch": 63.56, "learning_rate": 1.8233153482771397e-05, "loss": 1.8957, "step": 21957000 }, { "epoch": 63.56, "learning_rate": 1.823242983512412e-05, "loss": 1.9134, "step": 21957500 }, { "epoch": 63.56, "learning_rate": 1.8231706187476845e-05, "loss": 1.9605, "step": 21958000 }, { "epoch": 63.56, "learning_rate": 1.8230982539829567e-05, "loss": 1.9099, "step": 21958500 }, { "epoch": 63.56, "learning_rate": 1.8230258892182293e-05, "loss": 1.9124, "step": 21959000 }, { "epoch": 63.56, "learning_rate": 1.8229535244535015e-05, "loss": 1.9485, "step": 21959500 }, { "epoch": 63.57, "learning_rate": 1.8228813044183034e-05, "loss": 1.9204, "step": 21960000 }, { "epoch": 63.57, "learning_rate": 1.8228089396535756e-05, "loss": 1.9333, "step": 21960500 }, { "epoch": 63.57, "learning_rate": 1.8227365748888478e-05, "loss": 1.9346, "step": 21961000 }, { "epoch": 63.57, "learning_rate": 1.82266421012412e-05, "loss": 1.9288, "step": 21961500 }, { "epoch": 63.57, "learning_rate": 1.8225918453593923e-05, "loss": 1.9138, "step": 21962000 }, { "epoch": 63.57, "learning_rate": 1.8225194805946645e-05, "loss": 1.9107, "step": 21962500 }, { "epoch": 63.57, "learning_rate": 1.822447115829937e-05, "loss": 1.927, "step": 21963000 }, { "epoch": 63.58, "learning_rate": 1.8223747510652096e-05, "loss": 1.9316, "step": 21963500 }, { "epoch": 63.58, "learning_rate": 1.822302386300482e-05, "loss": 1.953, "step": 21964000 }, { "epoch": 63.58, "learning_rate": 1.8222301662652834e-05, "loss": 1.9384, "step": 21964500 }, { "epoch": 63.58, "learning_rate": 1.8221580909596146e-05, "loss": 1.9388, "step": 21965000 }, { "epoch": 63.58, "learning_rate": 1.8220857261948872e-05, "loss": 1.9142, "step": 21965500 }, { "epoch": 63.58, "learning_rate": 1.8220133614301594e-05, "loss": 1.9318, "step": 21966000 }, { "epoch": 63.58, "learning_rate": 1.8219409966654316e-05, "loss": 1.9082, "step": 21966500 }, { "epoch": 63.59, "learning_rate": 1.8218687766302335e-05, "loss": 1.932, "step": 21967000 }, { "epoch": 63.59, "learning_rate": 1.8217965565950354e-05, "loss": 1.9274, "step": 21967500 }, { "epoch": 63.59, "learning_rate": 1.8217241918303076e-05, "loss": 1.9402, "step": 21968000 }, { "epoch": 63.59, "learning_rate": 1.82165182706558e-05, "loss": 1.9366, "step": 21968500 }, { "epoch": 63.59, "learning_rate": 1.821579462300852e-05, "loss": 1.9192, "step": 21969000 }, { "epoch": 63.59, "learning_rate": 1.8215070975361247e-05, "loss": 1.9242, "step": 21969500 }, { "epoch": 63.59, "learning_rate": 1.821434732771397e-05, "loss": 1.9036, "step": 21970000 }, { "epoch": 63.6, "learning_rate": 1.821362368006669e-05, "loss": 1.8994, "step": 21970500 }, { "epoch": 63.6, "learning_rate": 1.8212900032419417e-05, "loss": 1.9393, "step": 21971000 }, { "epoch": 63.6, "learning_rate": 1.821217638477214e-05, "loss": 1.9397, "step": 21971500 }, { "epoch": 63.6, "learning_rate": 1.821145273712486e-05, "loss": 1.9324, "step": 21972000 }, { "epoch": 63.6, "learning_rate": 1.8210729089477583e-05, "loss": 1.9228, "step": 21972500 }, { "epoch": 63.6, "learning_rate": 1.8210006889125602e-05, "loss": 1.9408, "step": 21973000 }, { "epoch": 63.6, "learning_rate": 1.8209283241478325e-05, "loss": 1.9209, "step": 21973500 }, { "epoch": 63.61, "learning_rate": 1.8208559593831047e-05, "loss": 1.9186, "step": 21974000 }, { "epoch": 63.61, "learning_rate": 1.8207835946183772e-05, "loss": 1.9096, "step": 21974500 }, { "epoch": 63.61, "learning_rate": 1.820711374583179e-05, "loss": 1.917, "step": 21975000 }, { "epoch": 63.61, "learning_rate": 1.8206390098184514e-05, "loss": 1.9279, "step": 21975500 }, { "epoch": 63.61, "learning_rate": 1.8205666450537236e-05, "loss": 1.9461, "step": 21976000 }, { "epoch": 63.61, "learning_rate": 1.820494280288996e-05, "loss": 1.9447, "step": 21976500 }, { "epoch": 63.61, "learning_rate": 1.8204219155242684e-05, "loss": 1.9439, "step": 21977000 }, { "epoch": 63.62, "learning_rate": 1.8203495507595406e-05, "loss": 1.9245, "step": 21977500 }, { "epoch": 63.62, "learning_rate": 1.820277185994813e-05, "loss": 1.9329, "step": 21978000 }, { "epoch": 63.62, "learning_rate": 1.8202048212300854e-05, "loss": 1.9426, "step": 21978500 }, { "epoch": 63.62, "learning_rate": 1.8201326011948873e-05, "loss": 1.9342, "step": 21979000 }, { "epoch": 63.62, "learning_rate": 1.8200602364301595e-05, "loss": 1.9295, "step": 21979500 }, { "epoch": 63.62, "learning_rate": 1.8199878716654317e-05, "loss": 1.9251, "step": 21980000 }, { "epoch": 63.62, "learning_rate": 1.819915506900704e-05, "loss": 1.9499, "step": 21980500 }, { "epoch": 63.63, "learning_rate": 1.819843142135976e-05, "loss": 1.9431, "step": 21981000 }, { "epoch": 63.63, "learning_rate": 1.8197707773712487e-05, "loss": 1.9122, "step": 21981500 }, { "epoch": 63.63, "learning_rate": 1.8196984126065213e-05, "loss": 1.9353, "step": 21982000 }, { "epoch": 63.63, "learning_rate": 1.8196260478417935e-05, "loss": 1.9129, "step": 21982500 }, { "epoch": 63.63, "learning_rate": 1.8195536830770657e-05, "loss": 1.9093, "step": 21983000 }, { "epoch": 63.63, "learning_rate": 1.819481318312338e-05, "loss": 1.9277, "step": 21983500 }, { "epoch": 63.63, "learning_rate": 1.8194089535476102e-05, "loss": 1.9203, "step": 21984000 }, { "epoch": 63.64, "learning_rate": 1.8193365887828824e-05, "loss": 1.9345, "step": 21984500 }, { "epoch": 63.64, "learning_rate": 1.8192643687476843e-05, "loss": 1.9311, "step": 21985000 }, { "epoch": 63.64, "learning_rate": 1.819192003982957e-05, "loss": 1.9207, "step": 21985500 }, { "epoch": 63.64, "learning_rate": 1.819119639218229e-05, "loss": 1.934, "step": 21986000 }, { "epoch": 63.64, "learning_rate": 1.8190472744535013e-05, "loss": 1.945, "step": 21986500 }, { "epoch": 63.64, "learning_rate": 1.818974909688774e-05, "loss": 1.9215, "step": 21987000 }, { "epoch": 63.64, "learning_rate": 1.818902544924046e-05, "loss": 1.9305, "step": 21987500 }, { "epoch": 63.65, "learning_rate": 1.8188301801593183e-05, "loss": 1.9329, "step": 21988000 }, { "epoch": 63.65, "learning_rate": 1.81875796012412e-05, "loss": 1.9435, "step": 21988500 }, { "epoch": 63.65, "learning_rate": 1.8186857400889218e-05, "loss": 1.9422, "step": 21989000 }, { "epoch": 63.65, "learning_rate": 1.8186133753241943e-05, "loss": 1.9173, "step": 21989500 }, { "epoch": 63.65, "learning_rate": 1.8185410105594666e-05, "loss": 1.9314, "step": 21990000 }, { "epoch": 63.65, "learning_rate": 1.8184686457947388e-05, "loss": 1.9047, "step": 21990500 }, { "epoch": 63.65, "learning_rate": 1.8183964257595407e-05, "loss": 1.9261, "step": 21991000 }, { "epoch": 63.66, "learning_rate": 1.818324060994813e-05, "loss": 1.927, "step": 21991500 }, { "epoch": 63.66, "learning_rate": 1.818251696230085e-05, "loss": 1.955, "step": 21992000 }, { "epoch": 63.66, "learning_rate": 1.8181793314653577e-05, "loss": 1.9355, "step": 21992500 }, { "epoch": 63.66, "learning_rate": 1.8181069667006302e-05, "loss": 1.913, "step": 21993000 }, { "epoch": 63.66, "learning_rate": 1.8180347466654318e-05, "loss": 1.9164, "step": 21993500 }, { "epoch": 63.66, "learning_rate": 1.817962381900704e-05, "loss": 1.9293, "step": 21994000 }, { "epoch": 63.67, "learning_rate": 1.8178900171359762e-05, "loss": 1.9197, "step": 21994500 }, { "epoch": 63.67, "learning_rate": 1.8178176523712488e-05, "loss": 1.9238, "step": 21995000 }, { "epoch": 63.67, "learning_rate": 1.817745287606521e-05, "loss": 1.9256, "step": 21995500 }, { "epoch": 63.67, "learning_rate": 1.8176729228417933e-05, "loss": 1.9311, "step": 21996000 }, { "epoch": 63.67, "learning_rate": 1.8176005580770655e-05, "loss": 1.9118, "step": 21996500 }, { "epoch": 63.67, "learning_rate": 1.817528193312338e-05, "loss": 1.9163, "step": 21997000 }, { "epoch": 63.67, "learning_rate": 1.8174558285476103e-05, "loss": 1.9362, "step": 21997500 }, { "epoch": 63.68, "learning_rate": 1.8173837532419415e-05, "loss": 1.923, "step": 21998000 }, { "epoch": 63.68, "learning_rate": 1.817311388477214e-05, "loss": 1.9391, "step": 21998500 }, { "epoch": 63.68, "learning_rate": 1.8172390237124863e-05, "loss": 1.9365, "step": 21999000 }, { "epoch": 63.68, "learning_rate": 1.8171666589477585e-05, "loss": 1.9175, "step": 21999500 }, { "epoch": 63.68, "learning_rate": 1.8170942941830307e-05, "loss": 1.9365, "step": 22000000 }, { "epoch": 63.68, "learning_rate": 1.8170219294183033e-05, "loss": 1.9301, "step": 22000500 }, { "epoch": 63.68, "learning_rate": 1.8169495646535755e-05, "loss": 1.945, "step": 22001000 }, { "epoch": 63.69, "learning_rate": 1.8168771998888477e-05, "loss": 1.9308, "step": 22001500 }, { "epoch": 63.69, "learning_rate": 1.8168048351241203e-05, "loss": 1.9197, "step": 22002000 }, { "epoch": 63.69, "learning_rate": 1.8167324703593925e-05, "loss": 1.9458, "step": 22002500 }, { "epoch": 63.69, "learning_rate": 1.8166601055946647e-05, "loss": 1.9265, "step": 22003000 }, { "epoch": 63.69, "learning_rate": 1.816587740829937e-05, "loss": 1.9244, "step": 22003500 }, { "epoch": 63.69, "learning_rate": 1.8165153760652095e-05, "loss": 1.9235, "step": 22004000 }, { "epoch": 63.69, "learning_rate": 1.8164431560300114e-05, "loss": 1.9393, "step": 22004500 }, { "epoch": 63.7, "learning_rate": 1.8163707912652836e-05, "loss": 1.9282, "step": 22005000 }, { "epoch": 63.7, "learning_rate": 1.816298426500556e-05, "loss": 1.9417, "step": 22005500 }, { "epoch": 63.7, "learning_rate": 1.816226061735828e-05, "loss": 1.921, "step": 22006000 }, { "epoch": 63.7, "learning_rate": 1.8161536969711003e-05, "loss": 1.9057, "step": 22006500 }, { "epoch": 63.7, "learning_rate": 1.8160814769359022e-05, "loss": 1.9125, "step": 22007000 }, { "epoch": 63.7, "learning_rate": 1.8160091121711744e-05, "loss": 1.9067, "step": 22007500 }, { "epoch": 63.7, "learning_rate": 1.8159368921359767e-05, "loss": 1.9559, "step": 22008000 }, { "epoch": 63.71, "learning_rate": 1.8158646721007782e-05, "loss": 1.9355, "step": 22008500 }, { "epoch": 63.71, "learning_rate": 1.8157923073360504e-05, "loss": 1.947, "step": 22009000 }, { "epoch": 63.71, "learning_rate": 1.8157199425713227e-05, "loss": 1.9175, "step": 22009500 }, { "epoch": 63.71, "learning_rate": 1.8156475778065952e-05, "loss": 1.9372, "step": 22010000 }, { "epoch": 63.71, "learning_rate": 1.8155752130418674e-05, "loss": 1.9205, "step": 22010500 }, { "epoch": 63.71, "learning_rate": 1.8155028482771397e-05, "loss": 1.9466, "step": 22011000 }, { "epoch": 63.71, "learning_rate": 1.815430483512412e-05, "loss": 1.9401, "step": 22011500 }, { "epoch": 63.72, "learning_rate": 1.8153581187476845e-05, "loss": 1.9076, "step": 22012000 }, { "epoch": 63.72, "learning_rate": 1.8152857539829567e-05, "loss": 1.9198, "step": 22012500 }, { "epoch": 63.72, "learning_rate": 1.8152133892182292e-05, "loss": 1.9335, "step": 22013000 }, { "epoch": 63.72, "learning_rate": 1.8151410244535015e-05, "loss": 1.9423, "step": 22013500 }, { "epoch": 63.72, "learning_rate": 1.815068804418303e-05, "loss": 1.9235, "step": 22014000 }, { "epoch": 63.72, "learning_rate": 1.8149964396535752e-05, "loss": 1.8949, "step": 22014500 }, { "epoch": 63.72, "learning_rate": 1.8149240748888478e-05, "loss": 1.9254, "step": 22015000 }, { "epoch": 63.73, "learning_rate": 1.8148517101241204e-05, "loss": 1.9034, "step": 22015500 }, { "epoch": 63.73, "learning_rate": 1.8147793453593926e-05, "loss": 1.9267, "step": 22016000 }, { "epoch": 63.73, "learning_rate": 1.8147069805946648e-05, "loss": 1.9443, "step": 22016500 }, { "epoch": 63.73, "learning_rate": 1.814634615829937e-05, "loss": 1.9216, "step": 22017000 }, { "epoch": 63.73, "learning_rate": 1.8145622510652093e-05, "loss": 1.9232, "step": 22017500 }, { "epoch": 63.73, "learning_rate": 1.8144898863004818e-05, "loss": 1.9306, "step": 22018000 }, { "epoch": 63.73, "learning_rate": 1.814417521535754e-05, "loss": 1.9354, "step": 22018500 }, { "epoch": 63.74, "learning_rate": 1.814345301500556e-05, "loss": 1.9292, "step": 22019000 }, { "epoch": 63.74, "learning_rate": 1.814273081465358e-05, "loss": 1.9117, "step": 22019500 }, { "epoch": 63.74, "learning_rate": 1.81420071670063e-05, "loss": 1.9404, "step": 22020000 }, { "epoch": 63.74, "learning_rate": 1.8141283519359023e-05, "loss": 1.9221, "step": 22020500 }, { "epoch": 63.74, "learning_rate": 1.8140559871711745e-05, "loss": 1.915, "step": 22021000 }, { "epoch": 63.74, "learning_rate": 1.8139836224064467e-05, "loss": 1.9439, "step": 22021500 }, { "epoch": 63.74, "learning_rate": 1.8139112576417193e-05, "loss": 1.9299, "step": 22022000 }, { "epoch": 63.75, "learning_rate": 1.8138388928769915e-05, "loss": 1.9015, "step": 22022500 }, { "epoch": 63.75, "learning_rate": 1.8137666728417934e-05, "loss": 1.9454, "step": 22023000 }, { "epoch": 63.75, "learning_rate": 1.8136943080770656e-05, "loss": 1.9153, "step": 22023500 }, { "epoch": 63.75, "learning_rate": 1.8136219433123382e-05, "loss": 1.9304, "step": 22024000 }, { "epoch": 63.75, "learning_rate": 1.8135495785476104e-05, "loss": 1.913, "step": 22024500 }, { "epoch": 63.75, "learning_rate": 1.813477358512412e-05, "loss": 1.9383, "step": 22025000 }, { "epoch": 63.75, "learning_rate": 1.8134049937476842e-05, "loss": 1.9525, "step": 22025500 }, { "epoch": 63.76, "learning_rate": 1.8133326289829568e-05, "loss": 1.9537, "step": 22026000 }, { "epoch": 63.76, "learning_rate": 1.8132602642182293e-05, "loss": 1.9445, "step": 22026500 }, { "epoch": 63.76, "learning_rate": 1.8131878994535015e-05, "loss": 1.9376, "step": 22027000 }, { "epoch": 63.76, "learning_rate": 1.8131155346887738e-05, "loss": 1.9329, "step": 22027500 }, { "epoch": 63.76, "learning_rate": 1.813043169924046e-05, "loss": 1.9259, "step": 22028000 }, { "epoch": 63.76, "learning_rate": 1.8129708051593182e-05, "loss": 1.9441, "step": 22028500 }, { "epoch": 63.76, "learning_rate": 1.8128984403945908e-05, "loss": 1.9056, "step": 22029000 }, { "epoch": 63.77, "learning_rate": 1.812826075629863e-05, "loss": 1.9408, "step": 22029500 }, { "epoch": 63.77, "learning_rate": 1.8127537108651356e-05, "loss": 1.9016, "step": 22030000 }, { "epoch": 63.77, "learning_rate": 1.8126813461004078e-05, "loss": 1.9336, "step": 22030500 }, { "epoch": 63.77, "learning_rate": 1.812609270794739e-05, "loss": 1.9394, "step": 22031000 }, { "epoch": 63.77, "learning_rate": 1.8125369060300112e-05, "loss": 1.9063, "step": 22031500 }, { "epoch": 63.77, "learning_rate": 1.8124645412652835e-05, "loss": 1.9321, "step": 22032000 }, { "epoch": 63.78, "learning_rate": 1.8123921765005557e-05, "loss": 1.9126, "step": 22032500 }, { "epoch": 63.78, "learning_rate": 1.8123198117358282e-05, "loss": 1.9426, "step": 22033000 }, { "epoch": 63.78, "learning_rate": 1.8122474469711005e-05, "loss": 1.9484, "step": 22033500 }, { "epoch": 63.78, "learning_rate": 1.812175082206373e-05, "loss": 1.918, "step": 22034000 }, { "epoch": 63.78, "learning_rate": 1.8121028621711746e-05, "loss": 1.9408, "step": 22034500 }, { "epoch": 63.78, "learning_rate": 1.812030497406447e-05, "loss": 1.928, "step": 22035000 }, { "epoch": 63.78, "learning_rate": 1.8119581326417194e-05, "loss": 1.9547, "step": 22035500 }, { "epoch": 63.79, "learning_rate": 1.8118857678769916e-05, "loss": 1.9143, "step": 22036000 }, { "epoch": 63.79, "learning_rate": 1.8118134031122638e-05, "loss": 1.9731, "step": 22036500 }, { "epoch": 63.79, "learning_rate": 1.811741038347536e-05, "loss": 1.9384, "step": 22037000 }, { "epoch": 63.79, "learning_rate": 1.8116686735828083e-05, "loss": 1.9479, "step": 22037500 }, { "epoch": 63.79, "learning_rate": 1.8115963088180808e-05, "loss": 1.936, "step": 22038000 }, { "epoch": 63.79, "learning_rate": 1.8115239440533534e-05, "loss": 1.9129, "step": 22038500 }, { "epoch": 63.79, "learning_rate": 1.811451724018155e-05, "loss": 1.9005, "step": 22039000 }, { "epoch": 63.8, "learning_rate": 1.8113793592534272e-05, "loss": 1.9279, "step": 22039500 }, { "epoch": 63.8, "learning_rate": 1.811307139218229e-05, "loss": 1.9257, "step": 22040000 }, { "epoch": 63.8, "learning_rate": 1.8112347744535013e-05, "loss": 1.9157, "step": 22040500 }, { "epoch": 63.8, "learning_rate": 1.8111624096887735e-05, "loss": 1.9443, "step": 22041000 }, { "epoch": 63.8, "learning_rate": 1.811090044924046e-05, "loss": 1.9427, "step": 22041500 }, { "epoch": 63.8, "learning_rate": 1.8110176801593183e-05, "loss": 1.9185, "step": 22042000 }, { "epoch": 63.8, "learning_rate": 1.810945315394591e-05, "loss": 1.9288, "step": 22042500 }, { "epoch": 63.81, "learning_rate": 1.810872950629863e-05, "loss": 1.9196, "step": 22043000 }, { "epoch": 63.81, "learning_rate": 1.8108005858651353e-05, "loss": 1.9334, "step": 22043500 }, { "epoch": 63.81, "learning_rate": 1.8107282211004075e-05, "loss": 1.9402, "step": 22044000 }, { "epoch": 63.81, "learning_rate": 1.8106560010652094e-05, "loss": 1.9287, "step": 22044500 }, { "epoch": 63.81, "learning_rate": 1.8105836363004816e-05, "loss": 1.9242, "step": 22045000 }, { "epoch": 63.81, "learning_rate": 1.8105112715357542e-05, "loss": 1.904, "step": 22045500 }, { "epoch": 63.81, "learning_rate": 1.8104389067710264e-05, "loss": 1.931, "step": 22046000 }, { "epoch": 63.82, "learning_rate": 1.8103665420062987e-05, "loss": 1.9517, "step": 22046500 }, { "epoch": 63.82, "learning_rate": 1.810294177241571e-05, "loss": 1.9139, "step": 22047000 }, { "epoch": 63.82, "learning_rate": 1.8102218124768434e-05, "loss": 1.9254, "step": 22047500 }, { "epoch": 63.82, "learning_rate": 1.810149592441645e-05, "loss": 1.9335, "step": 22048000 }, { "epoch": 63.82, "learning_rate": 1.810077372406447e-05, "loss": 1.9352, "step": 22048500 }, { "epoch": 63.82, "learning_rate": 1.8100050076417195e-05, "loss": 1.92, "step": 22049000 }, { "epoch": 63.82, "learning_rate": 1.8099326428769917e-05, "loss": 1.9289, "step": 22049500 }, { "epoch": 63.83, "learning_rate": 1.809860278112264e-05, "loss": 1.9246, "step": 22050000 }, { "epoch": 63.83, "learning_rate": 1.809787913347536e-05, "loss": 1.892, "step": 22050500 }, { "epoch": 63.83, "learning_rate": 1.8097158380418673e-05, "loss": 1.9424, "step": 22051000 }, { "epoch": 63.83, "learning_rate": 1.8096434732771396e-05, "loss": 1.9581, "step": 22051500 }, { "epoch": 63.83, "learning_rate": 1.809571108512412e-05, "loss": 1.9297, "step": 22052000 }, { "epoch": 63.83, "learning_rate": 1.8094987437476844e-05, "loss": 1.9223, "step": 22052500 }, { "epoch": 63.83, "learning_rate": 1.809426378982957e-05, "loss": 1.9296, "step": 22053000 }, { "epoch": 63.84, "learning_rate": 1.809354014218229e-05, "loss": 1.923, "step": 22053500 }, { "epoch": 63.84, "learning_rate": 1.8092816494535014e-05, "loss": 1.9247, "step": 22054000 }, { "epoch": 63.84, "learning_rate": 1.8092092846887736e-05, "loss": 1.9271, "step": 22054500 }, { "epoch": 63.84, "learning_rate": 1.809136919924046e-05, "loss": 1.9182, "step": 22055000 }, { "epoch": 63.84, "learning_rate": 1.8090645551593184e-05, "loss": 1.9032, "step": 22055500 }, { "epoch": 63.84, "learning_rate": 1.8089921903945906e-05, "loss": 1.9215, "step": 22056000 }, { "epoch": 63.84, "learning_rate": 1.808919825629863e-05, "loss": 1.9185, "step": 22056500 }, { "epoch": 63.85, "learning_rate": 1.8088474608651354e-05, "loss": 1.9402, "step": 22057000 }, { "epoch": 63.85, "learning_rate": 1.8087752408299373e-05, "loss": 1.9335, "step": 22057500 }, { "epoch": 63.85, "learning_rate": 1.8087028760652095e-05, "loss": 1.9483, "step": 22058000 }, { "epoch": 63.85, "learning_rate": 1.8086305113004817e-05, "loss": 1.9376, "step": 22058500 }, { "epoch": 63.85, "learning_rate": 1.8085582912652836e-05, "loss": 1.9511, "step": 22059000 }, { "epoch": 63.85, "learning_rate": 1.808485926500556e-05, "loss": 1.9235, "step": 22059500 }, { "epoch": 63.85, "learning_rate": 1.808413561735828e-05, "loss": 1.9249, "step": 22060000 }, { "epoch": 63.86, "learning_rate": 1.8083411969711006e-05, "loss": 1.9351, "step": 22060500 }, { "epoch": 63.86, "learning_rate": 1.808268832206373e-05, "loss": 1.9352, "step": 22061000 }, { "epoch": 63.86, "learning_rate": 1.808196467441645e-05, "loss": 1.9251, "step": 22061500 }, { "epoch": 63.86, "learning_rate": 1.8081241026769173e-05, "loss": 1.9396, "step": 22062000 }, { "epoch": 63.86, "learning_rate": 1.8080518826417192e-05, "loss": 1.9447, "step": 22062500 }, { "epoch": 63.86, "learning_rate": 1.8079795178769914e-05, "loss": 1.9303, "step": 22063000 }, { "epoch": 63.86, "learning_rate": 1.8079071531122636e-05, "loss": 1.9535, "step": 22063500 }, { "epoch": 63.87, "learning_rate": 1.8078347883475362e-05, "loss": 1.9551, "step": 22064000 }, { "epoch": 63.87, "learning_rate": 1.8077624235828088e-05, "loss": 1.9353, "step": 22064500 }, { "epoch": 63.87, "learning_rate": 1.807690058818081e-05, "loss": 1.9344, "step": 22065000 }, { "epoch": 63.87, "learning_rate": 1.8076176940533532e-05, "loss": 1.9259, "step": 22065500 }, { "epoch": 63.87, "learning_rate": 1.807545474018155e-05, "loss": 1.9049, "step": 22066000 }, { "epoch": 63.87, "learning_rate": 1.8074731092534273e-05, "loss": 1.9233, "step": 22066500 }, { "epoch": 63.87, "learning_rate": 1.8074007444886996e-05, "loss": 1.9507, "step": 22067000 }, { "epoch": 63.88, "learning_rate": 1.807328379723972e-05, "loss": 1.9317, "step": 22067500 }, { "epoch": 63.88, "learning_rate": 1.8072560149592443e-05, "loss": 1.9385, "step": 22068000 }, { "epoch": 63.88, "learning_rate": 1.8071836501945166e-05, "loss": 1.9309, "step": 22068500 }, { "epoch": 63.88, "learning_rate": 1.8071112854297888e-05, "loss": 1.9462, "step": 22069000 }, { "epoch": 63.88, "learning_rate": 1.8070389206650613e-05, "loss": 1.9071, "step": 22069500 }, { "epoch": 63.88, "learning_rate": 1.8069665559003336e-05, "loss": 1.9248, "step": 22070000 }, { "epoch": 63.89, "learning_rate": 1.8068941911356058e-05, "loss": 1.9457, "step": 22070500 }, { "epoch": 63.89, "learning_rate": 1.8068218263708784e-05, "loss": 1.9453, "step": 22071000 }, { "epoch": 63.89, "learning_rate": 1.8067494616061506e-05, "loss": 1.9071, "step": 22071500 }, { "epoch": 63.89, "learning_rate": 1.8066772415709525e-05, "loss": 1.9279, "step": 22072000 }, { "epoch": 63.89, "learning_rate": 1.8066048768062247e-05, "loss": 1.9191, "step": 22072500 }, { "epoch": 63.89, "learning_rate": 1.806532512041497e-05, "loss": 1.9291, "step": 22073000 }, { "epoch": 63.89, "learning_rate": 1.806460147276769e-05, "loss": 1.9489, "step": 22073500 }, { "epoch": 63.9, "learning_rate": 1.806387927241571e-05, "loss": 1.9247, "step": 22074000 }, { "epoch": 63.9, "learning_rate": 1.8063157072063726e-05, "loss": 1.9234, "step": 22074500 }, { "epoch": 63.9, "learning_rate": 1.806243342441645e-05, "loss": 1.9157, "step": 22075000 }, { "epoch": 63.9, "learning_rate": 1.8061709776769177e-05, "loss": 1.9148, "step": 22075500 }, { "epoch": 63.9, "learning_rate": 1.8060987576417193e-05, "loss": 1.9324, "step": 22076000 }, { "epoch": 63.9, "learning_rate": 1.8060263928769915e-05, "loss": 1.9307, "step": 22076500 }, { "epoch": 63.9, "learning_rate": 1.8059540281122637e-05, "loss": 1.9038, "step": 22077000 }, { "epoch": 63.91, "learning_rate": 1.8058816633475363e-05, "loss": 1.9322, "step": 22077500 }, { "epoch": 63.91, "learning_rate": 1.8058092985828085e-05, "loss": 1.9243, "step": 22078000 }, { "epoch": 63.91, "learning_rate": 1.8057369338180807e-05, "loss": 1.9311, "step": 22078500 }, { "epoch": 63.91, "learning_rate": 1.8056647137828826e-05, "loss": 1.9341, "step": 22079000 }, { "epoch": 63.91, "learning_rate": 1.8055923490181552e-05, "loss": 1.9347, "step": 22079500 }, { "epoch": 63.91, "learning_rate": 1.8055199842534274e-05, "loss": 1.9199, "step": 22080000 }, { "epoch": 63.91, "learning_rate": 1.8054476194886996e-05, "loss": 1.9311, "step": 22080500 }, { "epoch": 63.92, "learning_rate": 1.8053753994535015e-05, "loss": 1.9118, "step": 22081000 }, { "epoch": 63.92, "learning_rate": 1.8053030346887737e-05, "loss": 1.9332, "step": 22081500 }, { "epoch": 63.92, "learning_rate": 1.805230669924046e-05, "loss": 1.9199, "step": 22082000 }, { "epoch": 63.92, "learning_rate": 1.8051583051593182e-05, "loss": 1.9096, "step": 22082500 }, { "epoch": 63.92, "learning_rate": 1.8050859403945908e-05, "loss": 1.9093, "step": 22083000 }, { "epoch": 63.92, "learning_rate": 1.805013575629863e-05, "loss": 1.9209, "step": 22083500 }, { "epoch": 63.92, "learning_rate": 1.8049412108651352e-05, "loss": 1.9465, "step": 22084000 }, { "epoch": 63.93, "learning_rate": 1.8048688461004078e-05, "loss": 1.928, "step": 22084500 }, { "epoch": 63.93, "learning_rate": 1.80479648133568e-05, "loss": 1.9165, "step": 22085000 }, { "epoch": 63.93, "learning_rate": 1.8047241165709522e-05, "loss": 1.8942, "step": 22085500 }, { "epoch": 63.93, "learning_rate": 1.8046517518062244e-05, "loss": 1.9313, "step": 22086000 }, { "epoch": 63.93, "learning_rate": 1.804579387041497e-05, "loss": 1.9285, "step": 22086500 }, { "epoch": 63.93, "learning_rate": 1.8045070222767692e-05, "loss": 1.9409, "step": 22087000 }, { "epoch": 63.93, "learning_rate": 1.8044346575120414e-05, "loss": 1.9141, "step": 22087500 }, { "epoch": 63.94, "learning_rate": 1.804362292747314e-05, "loss": 1.9106, "step": 22088000 }, { "epoch": 63.94, "learning_rate": 1.8042900727121156e-05, "loss": 1.9241, "step": 22088500 }, { "epoch": 63.94, "learning_rate": 1.8042177079473878e-05, "loss": 1.9376, "step": 22089000 }, { "epoch": 63.94, "learning_rate": 1.8041453431826603e-05, "loss": 1.9232, "step": 22089500 }, { "epoch": 63.94, "learning_rate": 1.804072978417933e-05, "loss": 1.9383, "step": 22090000 }, { "epoch": 63.94, "learning_rate": 1.8040007583827345e-05, "loss": 1.9371, "step": 22090500 }, { "epoch": 63.94, "learning_rate": 1.8039283936180067e-05, "loss": 1.9322, "step": 22091000 }, { "epoch": 63.95, "learning_rate": 1.8038560288532793e-05, "loss": 1.9284, "step": 22091500 }, { "epoch": 63.95, "learning_rate": 1.8037836640885515e-05, "loss": 1.9261, "step": 22092000 }, { "epoch": 63.95, "learning_rate": 1.8037112993238237e-05, "loss": 1.9513, "step": 22092500 }, { "epoch": 63.95, "learning_rate": 1.803638934559096e-05, "loss": 1.9176, "step": 22093000 }, { "epoch": 63.95, "learning_rate": 1.8035665697943685e-05, "loss": 1.9272, "step": 22093500 }, { "epoch": 63.95, "learning_rate": 1.8034942050296407e-05, "loss": 1.9232, "step": 22094000 }, { "epoch": 63.95, "learning_rate": 1.8034219849944426e-05, "loss": 1.9121, "step": 22094500 }, { "epoch": 63.96, "learning_rate": 1.8033496202297148e-05, "loss": 1.916, "step": 22095000 }, { "epoch": 63.96, "learning_rate": 1.803277255464987e-05, "loss": 1.9056, "step": 22095500 }, { "epoch": 63.96, "learning_rate": 1.803205035429789e-05, "loss": 1.9472, "step": 22096000 }, { "epoch": 63.96, "learning_rate": 1.803132670665061e-05, "loss": 1.9203, "step": 22096500 }, { "epoch": 63.96, "learning_rate": 1.8030603059003334e-05, "loss": 1.9363, "step": 22097000 }, { "epoch": 63.96, "learning_rate": 1.802987941135606e-05, "loss": 1.9267, "step": 22097500 }, { "epoch": 63.96, "learning_rate": 1.8029155763708782e-05, "loss": 1.9339, "step": 22098000 }, { "epoch": 63.97, "learning_rate": 1.8028432116061504e-05, "loss": 1.912, "step": 22098500 }, { "epoch": 63.97, "learning_rate": 1.802770846841423e-05, "loss": 1.9146, "step": 22099000 }, { "epoch": 63.97, "learning_rate": 1.8026984820766952e-05, "loss": 1.9246, "step": 22099500 }, { "epoch": 63.97, "learning_rate": 1.8026261173119674e-05, "loss": 1.9457, "step": 22100000 }, { "epoch": 63.97, "learning_rate": 1.8025537525472396e-05, "loss": 1.9215, "step": 22100500 }, { "epoch": 63.97, "learning_rate": 1.802481677241571e-05, "loss": 1.9588, "step": 22101000 }, { "epoch": 63.97, "learning_rate": 1.8024093124768434e-05, "loss": 1.918, "step": 22101500 }, { "epoch": 63.98, "learning_rate": 1.8023369477121156e-05, "loss": 1.9041, "step": 22102000 }, { "epoch": 63.98, "learning_rate": 1.802264582947388e-05, "loss": 1.9297, "step": 22102500 }, { "epoch": 63.98, "learning_rate": 1.8021922181826604e-05, "loss": 1.9234, "step": 22103000 }, { "epoch": 63.98, "learning_rate": 1.8021198534179327e-05, "loss": 1.9384, "step": 22103500 }, { "epoch": 63.98, "learning_rate": 1.802047488653205e-05, "loss": 1.9231, "step": 22104000 }, { "epoch": 63.98, "learning_rate": 1.801975123888477e-05, "loss": 1.9559, "step": 22104500 }, { "epoch": 63.98, "learning_rate": 1.8019027591237497e-05, "loss": 1.9192, "step": 22105000 }, { "epoch": 63.99, "learning_rate": 1.8018305390885516e-05, "loss": 1.9339, "step": 22105500 }, { "epoch": 63.99, "learning_rate": 1.801758319053353e-05, "loss": 1.9181, "step": 22106000 }, { "epoch": 63.99, "learning_rate": 1.8016859542886257e-05, "loss": 1.9179, "step": 22106500 }, { "epoch": 63.99, "learning_rate": 1.801613589523898e-05, "loss": 1.9435, "step": 22107000 }, { "epoch": 63.99, "learning_rate": 1.80154122475917e-05, "loss": 1.9354, "step": 22107500 }, { "epoch": 63.99, "learning_rate": 1.8014688599944423e-05, "loss": 1.9361, "step": 22108000 }, { "epoch": 64.0, "learning_rate": 1.801396495229715e-05, "loss": 1.929, "step": 22108500 }, { "epoch": 64.0, "learning_rate": 1.8013242751945168e-05, "loss": 1.9308, "step": 22109000 }, { "epoch": 64.0, "learning_rate": 1.801251910429789e-05, "loss": 1.926, "step": 22109500 }, { "epoch": 64.0, "learning_rate": 1.8011795456650612e-05, "loss": 1.9216, "step": 22110000 }, { "epoch": 64.0, "eval_accuracy": 0.6793195536172655, "eval_accuracy_mlm": 0.6463131908496063, "eval_accuracy_nsp": 0.8564069424775314, "eval_loss": 2.15972900390625, "eval_runtime": 331.551, "eval_samples_per_second": 1316.196, "eval_steps_per_second": 54.842, "step": 22110208 }, { "epoch": 64.0, "learning_rate": 1.8011071809003335e-05, "loss": 1.9285, "step": 22110500 }, { "epoch": 64.0, "learning_rate": 1.8010351055946647e-05, "loss": 1.9188, "step": 22111000 }, { "epoch": 64.0, "learning_rate": 1.800962740829937e-05, "loss": 1.9244, "step": 22111500 }, { "epoch": 64.01, "learning_rate": 1.8008903760652095e-05, "loss": 1.9091, "step": 22112000 }, { "epoch": 64.01, "learning_rate": 1.8008180113004817e-05, "loss": 1.9017, "step": 22112500 }, { "epoch": 64.01, "learning_rate": 1.8007456465357543e-05, "loss": 1.8827, "step": 22113000 }, { "epoch": 64.01, "learning_rate": 1.8006732817710265e-05, "loss": 1.9168, "step": 22113500 }, { "epoch": 64.01, "learning_rate": 1.8006009170062987e-05, "loss": 1.9206, "step": 22114000 }, { "epoch": 64.01, "learning_rate": 1.800528552241571e-05, "loss": 1.9244, "step": 22114500 }, { "epoch": 64.01, "learning_rate": 1.800456187476843e-05, "loss": 1.8962, "step": 22115000 }, { "epoch": 64.02, "learning_rate": 1.8003838227121157e-05, "loss": 1.9183, "step": 22115500 }, { "epoch": 64.02, "learning_rate": 1.8003116026769173e-05, "loss": 1.8986, "step": 22116000 }, { "epoch": 64.02, "learning_rate": 1.80023923791219e-05, "loss": 1.9191, "step": 22116500 }, { "epoch": 64.02, "learning_rate": 1.800166873147462e-05, "loss": 1.9482, "step": 22117000 }, { "epoch": 64.02, "learning_rate": 1.8000945083827346e-05, "loss": 1.9122, "step": 22117500 }, { "epoch": 64.02, "learning_rate": 1.800022143618007e-05, "loss": 1.9042, "step": 22118000 }, { "epoch": 64.02, "learning_rate": 1.799949778853279e-05, "loss": 1.9005, "step": 22118500 }, { "epoch": 64.03, "learning_rate": 1.7998774140885513e-05, "loss": 1.9069, "step": 22119000 }, { "epoch": 64.03, "learning_rate": 1.7998050493238235e-05, "loss": 1.8987, "step": 22119500 }, { "epoch": 64.03, "learning_rate": 1.799732684559096e-05, "loss": 1.895, "step": 22120000 }, { "epoch": 64.03, "learning_rate": 1.7996606092534273e-05, "loss": 1.9266, "step": 22120500 }, { "epoch": 64.03, "learning_rate": 1.7995882444886995e-05, "loss": 1.9125, "step": 22121000 }, { "epoch": 64.03, "learning_rate": 1.799515879723972e-05, "loss": 1.9262, "step": 22121500 }, { "epoch": 64.03, "learning_rate": 1.7994435149592443e-05, "loss": 1.9259, "step": 22122000 }, { "epoch": 64.04, "learning_rate": 1.7993711501945165e-05, "loss": 1.909, "step": 22122500 }, { "epoch": 64.04, "learning_rate": 1.7992989301593184e-05, "loss": 1.9082, "step": 22123000 }, { "epoch": 64.04, "learning_rate": 1.79922671012412e-05, "loss": 1.9267, "step": 22123500 }, { "epoch": 64.04, "learning_rate": 1.7991543453593926e-05, "loss": 1.8904, "step": 22124000 }, { "epoch": 64.04, "learning_rate": 1.7990819805946648e-05, "loss": 1.9068, "step": 22124500 }, { "epoch": 64.04, "learning_rate": 1.799009615829937e-05, "loss": 1.9247, "step": 22125000 }, { "epoch": 64.04, "learning_rate": 1.7989372510652096e-05, "loss": 1.9175, "step": 22125500 }, { "epoch": 64.05, "learning_rate": 1.7988648863004818e-05, "loss": 1.9019, "step": 22126000 }, { "epoch": 64.05, "learning_rate": 1.798792521535754e-05, "loss": 1.898, "step": 22126500 }, { "epoch": 64.05, "learning_rate": 1.7987201567710262e-05, "loss": 1.9159, "step": 22127000 }, { "epoch": 64.05, "learning_rate": 1.7986477920062988e-05, "loss": 1.9222, "step": 22127500 }, { "epoch": 64.05, "learning_rate": 1.7985755719711007e-05, "loss": 1.9086, "step": 22128000 }, { "epoch": 64.05, "learning_rate": 1.798503207206373e-05, "loss": 1.9034, "step": 22128500 }, { "epoch": 64.05, "learning_rate": 1.798430842441645e-05, "loss": 1.9042, "step": 22129000 }, { "epoch": 64.06, "learning_rate": 1.7983584776769174e-05, "loss": 1.8934, "step": 22129500 }, { "epoch": 64.06, "learning_rate": 1.7982861129121896e-05, "loss": 1.9304, "step": 22130000 }, { "epoch": 64.06, "learning_rate": 1.798213748147462e-05, "loss": 1.8965, "step": 22130500 }, { "epoch": 64.06, "learning_rate": 1.7981413833827344e-05, "loss": 1.9218, "step": 22131000 }, { "epoch": 64.06, "learning_rate": 1.798069018618007e-05, "loss": 1.9197, "step": 22131500 }, { "epoch": 64.06, "learning_rate": 1.7979967985828085e-05, "loss": 1.9037, "step": 22132000 }, { "epoch": 64.06, "learning_rate": 1.797924433818081e-05, "loss": 1.9199, "step": 22132500 }, { "epoch": 64.07, "learning_rate": 1.7978520690533533e-05, "loss": 1.9222, "step": 22133000 }, { "epoch": 64.07, "learning_rate": 1.7977797042886255e-05, "loss": 1.9473, "step": 22133500 }, { "epoch": 64.07, "learning_rate": 1.7977073395238977e-05, "loss": 1.9169, "step": 22134000 }, { "epoch": 64.07, "learning_rate": 1.7976351194886996e-05, "loss": 1.9081, "step": 22134500 }, { "epoch": 64.07, "learning_rate": 1.7975627547239722e-05, "loss": 1.9115, "step": 22135000 }, { "epoch": 64.07, "learning_rate": 1.7974905346887737e-05, "loss": 1.9003, "step": 22135500 }, { "epoch": 64.07, "learning_rate": 1.797418169924046e-05, "loss": 1.9331, "step": 22136000 }, { "epoch": 64.08, "learning_rate": 1.7973458051593185e-05, "loss": 1.9153, "step": 22136500 }, { "epoch": 64.08, "learning_rate": 1.7972734403945907e-05, "loss": 1.9007, "step": 22137000 }, { "epoch": 64.08, "learning_rate": 1.797201075629863e-05, "loss": 1.8972, "step": 22137500 }, { "epoch": 64.08, "learning_rate": 1.7971287108651352e-05, "loss": 1.9338, "step": 22138000 }, { "epoch": 64.08, "learning_rate": 1.7970563461004074e-05, "loss": 1.9156, "step": 22138500 }, { "epoch": 64.08, "learning_rate": 1.79698398133568e-05, "loss": 1.9222, "step": 22139000 }, { "epoch": 64.08, "learning_rate": 1.796911761300482e-05, "loss": 1.8952, "step": 22139500 }, { "epoch": 64.09, "learning_rate": 1.796839396535754e-05, "loss": 1.9226, "step": 22140000 }, { "epoch": 64.09, "learning_rate": 1.7967670317710263e-05, "loss": 1.8989, "step": 22140500 }, { "epoch": 64.09, "learning_rate": 1.7966948117358282e-05, "loss": 1.8995, "step": 22141000 }, { "epoch": 64.09, "learning_rate": 1.7966224469711004e-05, "loss": 1.914, "step": 22141500 }, { "epoch": 64.09, "learning_rate": 1.7965500822063727e-05, "loss": 1.912, "step": 22142000 }, { "epoch": 64.09, "learning_rate": 1.796477717441645e-05, "loss": 1.9256, "step": 22142500 }, { "epoch": 64.09, "learning_rate": 1.7964053526769174e-05, "loss": 1.872, "step": 22143000 }, { "epoch": 64.1, "learning_rate": 1.79633298791219e-05, "loss": 1.9093, "step": 22143500 }, { "epoch": 64.1, "learning_rate": 1.7962606231474622e-05, "loss": 1.9323, "step": 22144000 }, { "epoch": 64.1, "learning_rate": 1.7961882583827344e-05, "loss": 1.913, "step": 22144500 }, { "epoch": 64.1, "learning_rate": 1.7961158936180067e-05, "loss": 1.9215, "step": 22145000 }, { "epoch": 64.1, "learning_rate": 1.7960436735828086e-05, "loss": 1.9245, "step": 22145500 }, { "epoch": 64.1, "learning_rate": 1.7959713088180808e-05, "loss": 1.9082, "step": 22146000 }, { "epoch": 64.11, "learning_rate": 1.7958989440533533e-05, "loss": 1.9013, "step": 22146500 }, { "epoch": 64.11, "learning_rate": 1.7958265792886256e-05, "loss": 1.9192, "step": 22147000 }, { "epoch": 64.11, "learning_rate": 1.7957542145238978e-05, "loss": 1.9149, "step": 22147500 }, { "epoch": 64.11, "learning_rate": 1.79568184975917e-05, "loss": 1.9131, "step": 22148000 }, { "epoch": 64.11, "learning_rate": 1.795609629723972e-05, "loss": 1.9122, "step": 22148500 }, { "epoch": 64.11, "learning_rate": 1.795537264959244e-05, "loss": 1.9222, "step": 22149000 }, { "epoch": 64.11, "learning_rate": 1.7954649001945164e-05, "loss": 1.9272, "step": 22149500 }, { "epoch": 64.12, "learning_rate": 1.7953926801593183e-05, "loss": 1.9028, "step": 22150000 }, { "epoch": 64.12, "learning_rate": 1.7953203153945908e-05, "loss": 1.9009, "step": 22150500 }, { "epoch": 64.12, "learning_rate": 1.795247950629863e-05, "loss": 1.8874, "step": 22151000 }, { "epoch": 64.12, "learning_rate": 1.7951755858651353e-05, "loss": 1.9004, "step": 22151500 }, { "epoch": 64.12, "learning_rate": 1.7951032211004075e-05, "loss": 1.9168, "step": 22152000 }, { "epoch": 64.12, "learning_rate": 1.7950310010652094e-05, "loss": 1.9096, "step": 22152500 }, { "epoch": 64.12, "learning_rate": 1.7949586363004816e-05, "loss": 1.9034, "step": 22153000 }, { "epoch": 64.13, "learning_rate": 1.7948862715357538e-05, "loss": 1.9018, "step": 22153500 }, { "epoch": 64.13, "learning_rate": 1.7948139067710264e-05, "loss": 1.9008, "step": 22154000 }, { "epoch": 64.13, "learning_rate": 1.794741542006299e-05, "loss": 1.9214, "step": 22154500 }, { "epoch": 64.13, "learning_rate": 1.7946691772415712e-05, "loss": 1.9188, "step": 22155000 }, { "epoch": 64.13, "learning_rate": 1.7945968124768434e-05, "loss": 1.9334, "step": 22155500 }, { "epoch": 64.13, "learning_rate": 1.7945244477121156e-05, "loss": 1.9264, "step": 22156000 }, { "epoch": 64.13, "learning_rate": 1.7944522276769175e-05, "loss": 1.9151, "step": 22156500 }, { "epoch": 64.14, "learning_rate": 1.794380007641719e-05, "loss": 1.9073, "step": 22157000 }, { "epoch": 64.14, "learning_rate": 1.7943076428769913e-05, "loss": 1.9088, "step": 22157500 }, { "epoch": 64.14, "learning_rate": 1.794235278112264e-05, "loss": 1.9066, "step": 22158000 }, { "epoch": 64.14, "learning_rate": 1.7941629133475364e-05, "loss": 1.9157, "step": 22158500 }, { "epoch": 64.14, "learning_rate": 1.7940905485828086e-05, "loss": 1.9184, "step": 22159000 }, { "epoch": 64.14, "learning_rate": 1.7940183285476102e-05, "loss": 1.9127, "step": 22159500 }, { "epoch": 64.14, "learning_rate": 1.7939459637828828e-05, "loss": 1.9221, "step": 22160000 }, { "epoch": 64.15, "learning_rate": 1.793873599018155e-05, "loss": 1.9012, "step": 22160500 }, { "epoch": 64.15, "learning_rate": 1.7938012342534272e-05, "loss": 1.9229, "step": 22161000 }, { "epoch": 64.15, "learning_rate": 1.793729014218229e-05, "loss": 1.8903, "step": 22161500 }, { "epoch": 64.15, "learning_rate": 1.7936566494535013e-05, "loss": 1.8957, "step": 22162000 }, { "epoch": 64.15, "learning_rate": 1.793584284688774e-05, "loss": 1.9205, "step": 22162500 }, { "epoch": 64.15, "learning_rate": 1.793511919924046e-05, "loss": 1.8969, "step": 22163000 }, { "epoch": 64.15, "learning_rate": 1.7934395551593183e-05, "loss": 1.9103, "step": 22163500 }, { "epoch": 64.16, "learning_rate": 1.7933671903945906e-05, "loss": 1.9238, "step": 22164000 }, { "epoch": 64.16, "learning_rate": 1.7932948256298628e-05, "loss": 1.913, "step": 22164500 }, { "epoch": 64.16, "learning_rate": 1.7932224608651353e-05, "loss": 1.9272, "step": 22165000 }, { "epoch": 64.16, "learning_rate": 1.793150096100408e-05, "loss": 1.9211, "step": 22165500 }, { "epoch": 64.16, "learning_rate": 1.79307773133568e-05, "loss": 1.9034, "step": 22166000 }, { "epoch": 64.16, "learning_rate": 1.7930055113004817e-05, "loss": 1.9051, "step": 22166500 }, { "epoch": 64.16, "learning_rate": 1.792933146535754e-05, "loss": 1.9146, "step": 22167000 }, { "epoch": 64.17, "learning_rate": 1.7928607817710265e-05, "loss": 1.9199, "step": 22167500 }, { "epoch": 64.17, "learning_rate": 1.7927884170062987e-05, "loss": 1.9386, "step": 22168000 }, { "epoch": 64.17, "learning_rate": 1.7927161969711002e-05, "loss": 1.9342, "step": 22168500 }, { "epoch": 64.17, "learning_rate": 1.7926438322063728e-05, "loss": 1.9399, "step": 22169000 }, { "epoch": 64.17, "learning_rate": 1.7925714674416454e-05, "loss": 1.9248, "step": 22169500 }, { "epoch": 64.17, "learning_rate": 1.7924991026769176e-05, "loss": 1.9493, "step": 22170000 }, { "epoch": 64.17, "learning_rate": 1.7924267379121898e-05, "loss": 1.9337, "step": 22170500 }, { "epoch": 64.18, "learning_rate": 1.792354373147462e-05, "loss": 1.9078, "step": 22171000 }, { "epoch": 64.18, "learning_rate": 1.7922820083827343e-05, "loss": 1.9289, "step": 22171500 }, { "epoch": 64.18, "learning_rate": 1.792209788347536e-05, "loss": 1.8999, "step": 22172000 }, { "epoch": 64.18, "learning_rate": 1.7921375683123377e-05, "loss": 1.9212, "step": 22172500 }, { "epoch": 64.18, "learning_rate": 1.7920652035476103e-05, "loss": 1.9559, "step": 22173000 }, { "epoch": 64.18, "learning_rate": 1.791992838782883e-05, "loss": 1.9233, "step": 22173500 }, { "epoch": 64.18, "learning_rate": 1.791920474018155e-05, "loss": 1.9345, "step": 22174000 }, { "epoch": 64.19, "learning_rate": 1.7918481092534273e-05, "loss": 1.9278, "step": 22174500 }, { "epoch": 64.19, "learning_rate": 1.7917757444886995e-05, "loss": 1.9246, "step": 22175000 }, { "epoch": 64.19, "learning_rate": 1.7917033797239717e-05, "loss": 1.9153, "step": 22175500 }, { "epoch": 64.19, "learning_rate": 1.791631014959244e-05, "loss": 1.9247, "step": 22176000 }, { "epoch": 64.19, "learning_rate": 1.7915586501945165e-05, "loss": 1.9154, "step": 22176500 }, { "epoch": 64.19, "learning_rate": 1.791486285429789e-05, "loss": 1.9172, "step": 22177000 }, { "epoch": 64.19, "learning_rate": 1.7914139206650613e-05, "loss": 1.9054, "step": 22177500 }, { "epoch": 64.2, "learning_rate": 1.791341700629863e-05, "loss": 1.9005, "step": 22178000 }, { "epoch": 64.2, "learning_rate": 1.7912693358651354e-05, "loss": 1.9285, "step": 22178500 }, { "epoch": 64.2, "learning_rate": 1.7911969711004076e-05, "loss": 1.8965, "step": 22179000 }, { "epoch": 64.2, "learning_rate": 1.79112460633568e-05, "loss": 1.9192, "step": 22179500 }, { "epoch": 64.2, "learning_rate": 1.7910523863004818e-05, "loss": 1.9443, "step": 22180000 }, { "epoch": 64.2, "learning_rate": 1.7909800215357543e-05, "loss": 1.9252, "step": 22180500 }, { "epoch": 64.2, "learning_rate": 1.7909076567710265e-05, "loss": 1.9131, "step": 22181000 }, { "epoch": 64.21, "learning_rate": 1.7908352920062988e-05, "loss": 1.9253, "step": 22181500 }, { "epoch": 64.21, "learning_rate": 1.79076321670063e-05, "loss": 1.9213, "step": 22182000 }, { "epoch": 64.21, "learning_rate": 1.7906908519359022e-05, "loss": 1.9208, "step": 22182500 }, { "epoch": 64.21, "learning_rate": 1.7906184871711744e-05, "loss": 1.9269, "step": 22183000 }, { "epoch": 64.21, "learning_rate": 1.7905461224064467e-05, "loss": 1.9114, "step": 22183500 }, { "epoch": 64.21, "learning_rate": 1.7904737576417192e-05, "loss": 1.9033, "step": 22184000 }, { "epoch": 64.22, "learning_rate": 1.7904013928769918e-05, "loss": 1.9007, "step": 22184500 }, { "epoch": 64.22, "learning_rate": 1.790329028112264e-05, "loss": 1.9158, "step": 22185000 }, { "epoch": 64.22, "learning_rate": 1.7902566633475362e-05, "loss": 1.9172, "step": 22185500 }, { "epoch": 64.22, "learning_rate": 1.7901842985828085e-05, "loss": 1.9034, "step": 22186000 }, { "epoch": 64.22, "learning_rate": 1.7901119338180807e-05, "loss": 1.9164, "step": 22186500 }, { "epoch": 64.22, "learning_rate": 1.7900397137828826e-05, "loss": 1.903, "step": 22187000 }, { "epoch": 64.22, "learning_rate": 1.789967493747684e-05, "loss": 1.9162, "step": 22187500 }, { "epoch": 64.23, "learning_rate": 1.7898951289829567e-05, "loss": 1.889, "step": 22188000 }, { "epoch": 64.23, "learning_rate": 1.7898227642182293e-05, "loss": 1.9083, "step": 22188500 }, { "epoch": 64.23, "learning_rate": 1.7897503994535015e-05, "loss": 1.9033, "step": 22189000 }, { "epoch": 64.23, "learning_rate": 1.7896780346887737e-05, "loss": 1.9267, "step": 22189500 }, { "epoch": 64.23, "learning_rate": 1.789605669924046e-05, "loss": 1.9168, "step": 22190000 }, { "epoch": 64.23, "learning_rate": 1.789533305159318e-05, "loss": 1.915, "step": 22190500 }, { "epoch": 64.23, "learning_rate": 1.7894609403945907e-05, "loss": 1.9119, "step": 22191000 }, { "epoch": 64.24, "learning_rate": 1.7893885756298633e-05, "loss": 1.9252, "step": 22191500 }, { "epoch": 64.24, "learning_rate": 1.7893162108651355e-05, "loss": 1.9116, "step": 22192000 }, { "epoch": 64.24, "learning_rate": 1.7892438461004077e-05, "loss": 1.9466, "step": 22192500 }, { "epoch": 64.24, "learning_rate": 1.78917148133568e-05, "loss": 1.9083, "step": 22193000 }, { "epoch": 64.24, "learning_rate": 1.7890991165709522e-05, "loss": 1.9373, "step": 22193500 }, { "epoch": 64.24, "learning_rate": 1.7890267518062244e-05, "loss": 1.9018, "step": 22194000 }, { "epoch": 64.24, "learning_rate": 1.788954387041497e-05, "loss": 1.934, "step": 22194500 }, { "epoch": 64.25, "learning_rate": 1.7888820222767695e-05, "loss": 1.9197, "step": 22195000 }, { "epoch": 64.25, "learning_rate": 1.7888096575120417e-05, "loss": 1.9212, "step": 22195500 }, { "epoch": 64.25, "learning_rate": 1.7887374374768433e-05, "loss": 1.9105, "step": 22196000 }, { "epoch": 64.25, "learning_rate": 1.788665072712116e-05, "loss": 1.9126, "step": 22196500 }, { "epoch": 64.25, "learning_rate": 1.788592707947388e-05, "loss": 1.9191, "step": 22197000 }, { "epoch": 64.25, "learning_rate": 1.7885203431826603e-05, "loss": 1.9139, "step": 22197500 }, { "epoch": 64.25, "learning_rate": 1.7884479784179325e-05, "loss": 1.9035, "step": 22198000 }, { "epoch": 64.26, "learning_rate": 1.7883757583827344e-05, "loss": 1.9185, "step": 22198500 }, { "epoch": 64.26, "learning_rate": 1.788303393618007e-05, "loss": 1.9253, "step": 22199000 }, { "epoch": 64.26, "learning_rate": 1.7882310288532792e-05, "loss": 1.9217, "step": 22199500 }, { "epoch": 64.26, "learning_rate": 1.7881586640885514e-05, "loss": 1.9086, "step": 22200000 }, { "epoch": 64.26, "learning_rate": 1.7880862993238237e-05, "loss": 1.9297, "step": 22200500 }, { "epoch": 64.26, "learning_rate": 1.788013934559096e-05, "loss": 1.9134, "step": 22201000 }, { "epoch": 64.26, "learning_rate": 1.7879417145238978e-05, "loss": 1.92, "step": 22201500 }, { "epoch": 64.27, "learning_rate": 1.78786934975917e-05, "loss": 1.9248, "step": 22202000 }, { "epoch": 64.27, "learning_rate": 1.7877969849944426e-05, "loss": 1.8934, "step": 22202500 }, { "epoch": 64.27, "learning_rate": 1.7877246202297148e-05, "loss": 1.9159, "step": 22203000 }, { "epoch": 64.27, "learning_rate": 1.787652255464987e-05, "loss": 1.9209, "step": 22203500 }, { "epoch": 64.27, "learning_rate": 1.7875798907002596e-05, "loss": 1.9085, "step": 22204000 }, { "epoch": 64.27, "learning_rate": 1.7875075259355318e-05, "loss": 1.9164, "step": 22204500 }, { "epoch": 64.27, "learning_rate": 1.7874353059003333e-05, "loss": 1.9066, "step": 22205000 }, { "epoch": 64.28, "learning_rate": 1.787362941135606e-05, "loss": 1.9133, "step": 22205500 }, { "epoch": 64.28, "learning_rate": 1.7872905763708785e-05, "loss": 1.8985, "step": 22206000 }, { "epoch": 64.28, "learning_rate": 1.7872182116061507e-05, "loss": 1.9027, "step": 22206500 }, { "epoch": 64.28, "learning_rate": 1.787145846841423e-05, "loss": 1.9282, "step": 22207000 }, { "epoch": 64.28, "learning_rate": 1.787073482076695e-05, "loss": 1.9029, "step": 22207500 }, { "epoch": 64.28, "learning_rate": 1.787001262041497e-05, "loss": 1.9329, "step": 22208000 }, { "epoch": 64.28, "learning_rate": 1.7869288972767693e-05, "loss": 1.9103, "step": 22208500 }, { "epoch": 64.29, "learning_rate": 1.7868565325120415e-05, "loss": 1.9184, "step": 22209000 }, { "epoch": 64.29, "learning_rate": 1.7867841677473137e-05, "loss": 1.9291, "step": 22209500 }, { "epoch": 64.29, "learning_rate": 1.7867118029825863e-05, "loss": 1.9072, "step": 22210000 }, { "epoch": 64.29, "learning_rate": 1.7866394382178585e-05, "loss": 1.9412, "step": 22210500 }, { "epoch": 64.29, "learning_rate": 1.7865672181826604e-05, "loss": 1.9138, "step": 22211000 }, { "epoch": 64.29, "learning_rate": 1.7864949981474623e-05, "loss": 1.9151, "step": 22211500 }, { "epoch": 64.29, "learning_rate": 1.7864226333827345e-05, "loss": 1.9156, "step": 22212000 }, { "epoch": 64.3, "learning_rate": 1.7863502686180067e-05, "loss": 1.8867, "step": 22212500 }, { "epoch": 64.3, "learning_rate": 1.786277903853279e-05, "loss": 1.9213, "step": 22213000 }, { "epoch": 64.3, "learning_rate": 1.7862055390885515e-05, "loss": 1.9268, "step": 22213500 }, { "epoch": 64.3, "learning_rate": 1.7861334637828827e-05, "loss": 1.8999, "step": 22214000 }, { "epoch": 64.3, "learning_rate": 1.786061099018155e-05, "loss": 1.9158, "step": 22214500 }, { "epoch": 64.3, "learning_rate": 1.7859887342534272e-05, "loss": 1.9317, "step": 22215000 }, { "epoch": 64.3, "learning_rate": 1.7859163694886998e-05, "loss": 1.9246, "step": 22215500 }, { "epoch": 64.31, "learning_rate": 1.785844004723972e-05, "loss": 1.9095, "step": 22216000 }, { "epoch": 64.31, "learning_rate": 1.7857716399592442e-05, "loss": 1.9298, "step": 22216500 }, { "epoch": 64.31, "learning_rate": 1.7856992751945164e-05, "loss": 1.8953, "step": 22217000 }, { "epoch": 64.31, "learning_rate": 1.785626910429789e-05, "loss": 1.8986, "step": 22217500 }, { "epoch": 64.31, "learning_rate": 1.7855545456650612e-05, "loss": 1.9304, "step": 22218000 }, { "epoch": 64.31, "learning_rate": 1.7854821809003334e-05, "loss": 1.8979, "step": 22218500 }, { "epoch": 64.31, "learning_rate": 1.7854099608651353e-05, "loss": 1.9249, "step": 22219000 }, { "epoch": 64.32, "learning_rate": 1.7853377408299372e-05, "loss": 1.908, "step": 22219500 }, { "epoch": 64.32, "learning_rate": 1.7852653760652094e-05, "loss": 1.9032, "step": 22220000 }, { "epoch": 64.32, "learning_rate": 1.7851930113004817e-05, "loss": 1.9251, "step": 22220500 }, { "epoch": 64.32, "learning_rate": 1.785120646535754e-05, "loss": 1.9066, "step": 22221000 }, { "epoch": 64.32, "learning_rate": 1.785048426500556e-05, "loss": 1.9163, "step": 22221500 }, { "epoch": 64.32, "learning_rate": 1.7849762064653577e-05, "loss": 1.9285, "step": 22222000 }, { "epoch": 64.33, "learning_rate": 1.78490384170063e-05, "loss": 1.9197, "step": 22222500 }, { "epoch": 64.33, "learning_rate": 1.7848314769359025e-05, "loss": 1.9061, "step": 22223000 }, { "epoch": 64.33, "learning_rate": 1.7847591121711747e-05, "loss": 1.9403, "step": 22223500 }, { "epoch": 64.33, "learning_rate": 1.784686747406447e-05, "loss": 1.902, "step": 22224000 }, { "epoch": 64.33, "learning_rate": 1.784614382641719e-05, "loss": 1.9104, "step": 22224500 }, { "epoch": 64.33, "learning_rate": 1.7845420178769914e-05, "loss": 1.9308, "step": 22225000 }, { "epoch": 64.33, "learning_rate": 1.784469653112264e-05, "loss": 1.9298, "step": 22225500 }, { "epoch": 64.34, "learning_rate": 1.784397288347536e-05, "loss": 1.9157, "step": 22226000 }, { "epoch": 64.34, "learning_rate": 1.7843249235828087e-05, "loss": 1.9295, "step": 22226500 }, { "epoch": 64.34, "learning_rate": 1.784252558818081e-05, "loss": 1.9378, "step": 22227000 }, { "epoch": 64.34, "learning_rate": 1.784180194053353e-05, "loss": 1.9095, "step": 22227500 }, { "epoch": 64.34, "learning_rate": 1.7841078292886254e-05, "loss": 1.9101, "step": 22228000 }, { "epoch": 64.34, "learning_rate": 1.7840354645238976e-05, "loss": 1.931, "step": 22228500 }, { "epoch": 64.34, "learning_rate": 1.78396309975917e-05, "loss": 1.9209, "step": 22229000 }, { "epoch": 64.35, "learning_rate": 1.7838907349944424e-05, "loss": 1.9522, "step": 22229500 }, { "epoch": 64.35, "learning_rate": 1.783818370229715e-05, "loss": 1.9274, "step": 22230000 }, { "epoch": 64.35, "learning_rate": 1.783746005464987e-05, "loss": 1.935, "step": 22230500 }, { "epoch": 64.35, "learning_rate": 1.7836736407002594e-05, "loss": 1.9142, "step": 22231000 }, { "epoch": 64.35, "learning_rate": 1.7836014206650613e-05, "loss": 1.9304, "step": 22231500 }, { "epoch": 64.35, "learning_rate": 1.783529200629863e-05, "loss": 1.9153, "step": 22232000 }, { "epoch": 64.35, "learning_rate": 1.7834569805946647e-05, "loss": 1.9175, "step": 22232500 }, { "epoch": 64.36, "learning_rate": 1.7833846158299373e-05, "loss": 1.913, "step": 22233000 }, { "epoch": 64.36, "learning_rate": 1.7833122510652095e-05, "loss": 1.9213, "step": 22233500 }, { "epoch": 64.36, "learning_rate": 1.7832398863004817e-05, "loss": 1.8909, "step": 22234000 }, { "epoch": 64.36, "learning_rate": 1.783167521535754e-05, "loss": 1.8811, "step": 22234500 }, { "epoch": 64.36, "learning_rate": 1.7830951567710262e-05, "loss": 1.9195, "step": 22235000 }, { "epoch": 64.36, "learning_rate": 1.7830227920062988e-05, "loss": 1.9035, "step": 22235500 }, { "epoch": 64.36, "learning_rate": 1.7829505719711003e-05, "loss": 1.909, "step": 22236000 }, { "epoch": 64.37, "learning_rate": 1.782878207206373e-05, "loss": 1.9165, "step": 22236500 }, { "epoch": 64.37, "learning_rate": 1.782805842441645e-05, "loss": 1.9231, "step": 22237000 }, { "epoch": 64.37, "learning_rate": 1.7827334776769177e-05, "loss": 1.9178, "step": 22237500 }, { "epoch": 64.37, "learning_rate": 1.78266111291219e-05, "loss": 1.9357, "step": 22238000 }, { "epoch": 64.37, "learning_rate": 1.782588748147462e-05, "loss": 1.9108, "step": 22238500 }, { "epoch": 64.37, "learning_rate": 1.7825163833827343e-05, "loss": 1.895, "step": 22239000 }, { "epoch": 64.37, "learning_rate": 1.7824440186180065e-05, "loss": 1.9285, "step": 22239500 }, { "epoch": 64.38, "learning_rate": 1.782371653853279e-05, "loss": 1.9204, "step": 22240000 }, { "epoch": 64.38, "learning_rate": 1.7822992890885513e-05, "loss": 1.9328, "step": 22240500 }, { "epoch": 64.38, "learning_rate": 1.782226924323824e-05, "loss": 1.9037, "step": 22241000 }, { "epoch": 64.38, "learning_rate": 1.782154559559096e-05, "loss": 1.9389, "step": 22241500 }, { "epoch": 64.38, "learning_rate": 1.7820821947943683e-05, "loss": 1.9095, "step": 22242000 }, { "epoch": 64.38, "learning_rate": 1.7820099747591702e-05, "loss": 1.9544, "step": 22242500 }, { "epoch": 64.38, "learning_rate": 1.7819376099944425e-05, "loss": 1.8979, "step": 22243000 }, { "epoch": 64.39, "learning_rate": 1.781865245229715e-05, "loss": 1.9209, "step": 22243500 }, { "epoch": 64.39, "learning_rate": 1.7817928804649872e-05, "loss": 1.9123, "step": 22244000 }, { "epoch": 64.39, "learning_rate": 1.7817206604297888e-05, "loss": 1.9396, "step": 22244500 }, { "epoch": 64.39, "learning_rate": 1.7816482956650614e-05, "loss": 1.9117, "step": 22245000 }, { "epoch": 64.39, "learning_rate": 1.7815759309003336e-05, "loss": 1.8936, "step": 22245500 }, { "epoch": 64.39, "learning_rate": 1.781503710865135e-05, "loss": 1.9252, "step": 22246000 }, { "epoch": 64.39, "learning_rate": 1.781431490829937e-05, "loss": 1.9005, "step": 22246500 }, { "epoch": 64.4, "learning_rate": 1.7813591260652093e-05, "loss": 1.929, "step": 22247000 }, { "epoch": 64.4, "learning_rate": 1.7812867613004815e-05, "loss": 1.9128, "step": 22247500 }, { "epoch": 64.4, "learning_rate": 1.781214396535754e-05, "loss": 1.9162, "step": 22248000 }, { "epoch": 64.4, "learning_rate": 1.7811420317710266e-05, "loss": 1.9075, "step": 22248500 }, { "epoch": 64.4, "learning_rate": 1.781069667006299e-05, "loss": 1.9143, "step": 22249000 }, { "epoch": 64.4, "learning_rate": 1.780997302241571e-05, "loss": 1.9097, "step": 22249500 }, { "epoch": 64.4, "learning_rate": 1.7809249374768433e-05, "loss": 1.8995, "step": 22250000 }, { "epoch": 64.41, "learning_rate": 1.7808525727121155e-05, "loss": 1.9232, "step": 22250500 }, { "epoch": 64.41, "learning_rate": 1.7807802079473877e-05, "loss": 1.9274, "step": 22251000 }, { "epoch": 64.41, "learning_rate": 1.7807078431826603e-05, "loss": 1.9448, "step": 22251500 }, { "epoch": 64.41, "learning_rate": 1.780635478417933e-05, "loss": 1.9449, "step": 22252000 }, { "epoch": 64.41, "learning_rate": 1.780563113653205e-05, "loss": 1.9165, "step": 22252500 }, { "epoch": 64.41, "learning_rate": 1.7804908936180066e-05, "loss": 1.9353, "step": 22253000 }, { "epoch": 64.41, "learning_rate": 1.7804185288532792e-05, "loss": 1.9256, "step": 22253500 }, { "epoch": 64.42, "learning_rate": 1.7803461640885514e-05, "loss": 1.9206, "step": 22254000 }, { "epoch": 64.42, "learning_rate": 1.7802737993238236e-05, "loss": 1.9149, "step": 22254500 }, { "epoch": 64.42, "learning_rate": 1.7802015792886255e-05, "loss": 1.9203, "step": 22255000 }, { "epoch": 64.42, "learning_rate": 1.7801292145238978e-05, "loss": 1.9184, "step": 22255500 }, { "epoch": 64.42, "learning_rate": 1.7800568497591703e-05, "loss": 1.9253, "step": 22256000 }, { "epoch": 64.42, "learning_rate": 1.779984629723972e-05, "loss": 1.9495, "step": 22256500 }, { "epoch": 64.42, "learning_rate": 1.779912264959244e-05, "loss": 1.9061, "step": 22257000 }, { "epoch": 64.43, "learning_rate": 1.7798399001945167e-05, "loss": 1.906, "step": 22257500 }, { "epoch": 64.43, "learning_rate": 1.779767535429789e-05, "loss": 1.8877, "step": 22258000 }, { "epoch": 64.43, "learning_rate": 1.779695170665061e-05, "loss": 1.9264, "step": 22258500 }, { "epoch": 64.43, "learning_rate": 1.7796228059003337e-05, "loss": 1.9237, "step": 22259000 }, { "epoch": 64.43, "learning_rate": 1.7795505858651356e-05, "loss": 1.9102, "step": 22259500 }, { "epoch": 64.43, "learning_rate": 1.7794782211004078e-05, "loss": 1.9204, "step": 22260000 }, { "epoch": 64.44, "learning_rate": 1.77940585633568e-05, "loss": 1.9059, "step": 22260500 }, { "epoch": 64.44, "learning_rate": 1.7793334915709522e-05, "loss": 1.9366, "step": 22261000 }, { "epoch": 64.44, "learning_rate": 1.7792611268062245e-05, "loss": 1.9238, "step": 22261500 }, { "epoch": 64.44, "learning_rate": 1.7791889067710263e-05, "loss": 1.9149, "step": 22262000 }, { "epoch": 64.44, "learning_rate": 1.779116686735828e-05, "loss": 1.9124, "step": 22262500 }, { "epoch": 64.44, "learning_rate": 1.7790443219711005e-05, "loss": 1.9147, "step": 22263000 }, { "epoch": 64.44, "learning_rate": 1.778971957206373e-05, "loss": 1.9148, "step": 22263500 }, { "epoch": 64.45, "learning_rate": 1.7788995924416453e-05, "loss": 1.932, "step": 22264000 }, { "epoch": 64.45, "learning_rate": 1.7788272276769175e-05, "loss": 1.9085, "step": 22264500 }, { "epoch": 64.45, "learning_rate": 1.7787548629121897e-05, "loss": 1.9036, "step": 22265000 }, { "epoch": 64.45, "learning_rate": 1.778682498147462e-05, "loss": 1.9142, "step": 22265500 }, { "epoch": 64.45, "learning_rate": 1.778610133382734e-05, "loss": 1.9162, "step": 22266000 }, { "epoch": 64.45, "learning_rate": 1.7785377686180067e-05, "loss": 1.9355, "step": 22266500 }, { "epoch": 64.45, "learning_rate": 1.7784654038532793e-05, "loss": 1.9344, "step": 22267000 }, { "epoch": 64.46, "learning_rate": 1.7783931838180808e-05, "loss": 1.9151, "step": 22267500 }, { "epoch": 64.46, "learning_rate": 1.778320819053353e-05, "loss": 1.9391, "step": 22268000 }, { "epoch": 64.46, "learning_rate": 1.7782484542886256e-05, "loss": 1.939, "step": 22268500 }, { "epoch": 64.46, "learning_rate": 1.778176089523898e-05, "loss": 1.9417, "step": 22269000 }, { "epoch": 64.46, "learning_rate": 1.77810372475917e-05, "loss": 1.9262, "step": 22269500 }, { "epoch": 64.46, "learning_rate": 1.7780313599944426e-05, "loss": 1.9296, "step": 22270000 }, { "epoch": 64.46, "learning_rate": 1.777958995229715e-05, "loss": 1.9046, "step": 22270500 }, { "epoch": 64.47, "learning_rate": 1.7778867751945167e-05, "loss": 1.8984, "step": 22271000 }, { "epoch": 64.47, "learning_rate": 1.777814410429789e-05, "loss": 1.9407, "step": 22271500 }, { "epoch": 64.47, "learning_rate": 1.7777420456650612e-05, "loss": 1.9346, "step": 22272000 }, { "epoch": 64.47, "learning_rate": 1.7776696809003334e-05, "loss": 1.9335, "step": 22272500 }, { "epoch": 64.47, "learning_rate": 1.7775973161356056e-05, "loss": 1.931, "step": 22273000 }, { "epoch": 64.47, "learning_rate": 1.7775249513708782e-05, "loss": 1.9119, "step": 22273500 }, { "epoch": 64.47, "learning_rate": 1.7774525866061508e-05, "loss": 1.8989, "step": 22274000 }, { "epoch": 64.48, "learning_rate": 1.777380221841423e-05, "loss": 1.9195, "step": 22274500 }, { "epoch": 64.48, "learning_rate": 1.7773080018062245e-05, "loss": 1.9188, "step": 22275000 }, { "epoch": 64.48, "learning_rate": 1.7772356370414968e-05, "loss": 1.9187, "step": 22275500 }, { "epoch": 64.48, "learning_rate": 1.7771632722767693e-05, "loss": 1.9223, "step": 22276000 }, { "epoch": 64.48, "learning_rate": 1.7770909075120415e-05, "loss": 1.9314, "step": 22276500 }, { "epoch": 64.48, "learning_rate": 1.7770185427473138e-05, "loss": 1.918, "step": 22277000 }, { "epoch": 64.48, "learning_rate": 1.7769461779825863e-05, "loss": 1.9266, "step": 22277500 }, { "epoch": 64.49, "learning_rate": 1.7768738132178586e-05, "loss": 1.931, "step": 22278000 }, { "epoch": 64.49, "learning_rate": 1.7768014484531308e-05, "loss": 1.9065, "step": 22278500 }, { "epoch": 64.49, "learning_rate": 1.7767290836884033e-05, "loss": 1.9085, "step": 22279000 }, { "epoch": 64.49, "learning_rate": 1.7766567189236756e-05, "loss": 1.9226, "step": 22279500 }, { "epoch": 64.49, "learning_rate": 1.7765843541589478e-05, "loss": 1.9095, "step": 22280000 }, { "epoch": 64.49, "learning_rate": 1.77651198939422e-05, "loss": 1.9128, "step": 22280500 }, { "epoch": 64.49, "learning_rate": 1.7764396246294926e-05, "loss": 1.9079, "step": 22281000 }, { "epoch": 64.5, "learning_rate": 1.7763672598647648e-05, "loss": 1.9183, "step": 22281500 }, { "epoch": 64.5, "learning_rate": 1.7762950398295667e-05, "loss": 1.9309, "step": 22282000 }, { "epoch": 64.5, "learning_rate": 1.776222675064839e-05, "loss": 1.8981, "step": 22282500 }, { "epoch": 64.5, "learning_rate": 1.776150310300111e-05, "loss": 1.927, "step": 22283000 }, { "epoch": 64.5, "learning_rate": 1.7760779455353834e-05, "loss": 1.9316, "step": 22283500 }, { "epoch": 64.5, "learning_rate": 1.776005580770656e-05, "loss": 1.9273, "step": 22284000 }, { "epoch": 64.5, "learning_rate": 1.7759333607354578e-05, "loss": 1.9396, "step": 22284500 }, { "epoch": 64.51, "learning_rate": 1.77586099597073e-05, "loss": 1.9242, "step": 22285000 }, { "epoch": 64.51, "learning_rate": 1.7757886312060023e-05, "loss": 1.9246, "step": 22285500 }, { "epoch": 64.51, "learning_rate": 1.7757162664412745e-05, "loss": 1.9171, "step": 22286000 }, { "epoch": 64.51, "learning_rate": 1.775643901676547e-05, "loss": 1.9173, "step": 22286500 }, { "epoch": 64.51, "learning_rate": 1.7755716816413486e-05, "loss": 1.9149, "step": 22287000 }, { "epoch": 64.51, "learning_rate": 1.7754993168766208e-05, "loss": 1.9179, "step": 22287500 }, { "epoch": 64.51, "learning_rate": 1.7754269521118934e-05, "loss": 1.9298, "step": 22288000 }, { "epoch": 64.52, "learning_rate": 1.775354587347166e-05, "loss": 1.9257, "step": 22288500 }, { "epoch": 64.52, "learning_rate": 1.7752822225824382e-05, "loss": 1.9144, "step": 22289000 }, { "epoch": 64.52, "learning_rate": 1.7752098578177104e-05, "loss": 1.9173, "step": 22289500 }, { "epoch": 64.52, "learning_rate": 1.7751376377825123e-05, "loss": 1.9155, "step": 22290000 }, { "epoch": 64.52, "learning_rate": 1.7750652730177845e-05, "loss": 1.9028, "step": 22290500 }, { "epoch": 64.52, "learning_rate": 1.7749929082530567e-05, "loss": 1.9324, "step": 22291000 }, { "epoch": 64.52, "learning_rate": 1.7749206882178583e-05, "loss": 1.9065, "step": 22291500 }, { "epoch": 64.53, "learning_rate": 1.774848323453131e-05, "loss": 1.879, "step": 22292000 }, { "epoch": 64.53, "learning_rate": 1.7747759586884034e-05, "loss": 1.9123, "step": 22292500 }, { "epoch": 64.53, "learning_rate": 1.7747035939236756e-05, "loss": 1.8928, "step": 22293000 }, { "epoch": 64.53, "learning_rate": 1.774631229158948e-05, "loss": 1.9183, "step": 22293500 }, { "epoch": 64.53, "learning_rate": 1.77455886439422e-05, "loss": 1.9208, "step": 22294000 }, { "epoch": 64.53, "learning_rate": 1.7744864996294923e-05, "loss": 1.9316, "step": 22294500 }, { "epoch": 64.53, "learning_rate": 1.774414134864765e-05, "loss": 1.9161, "step": 22295000 }, { "epoch": 64.54, "learning_rate": 1.7743417701000374e-05, "loss": 1.9152, "step": 22295500 }, { "epoch": 64.54, "learning_rate": 1.7742694053353097e-05, "loss": 1.9162, "step": 22296000 }, { "epoch": 64.54, "learning_rate": 1.774197040570582e-05, "loss": 1.9381, "step": 22296500 }, { "epoch": 64.54, "learning_rate": 1.7741248205353834e-05, "loss": 1.9012, "step": 22297000 }, { "epoch": 64.54, "learning_rate": 1.774052455770656e-05, "loss": 1.9116, "step": 22297500 }, { "epoch": 64.54, "learning_rate": 1.7739802357354576e-05, "loss": 1.9004, "step": 22298000 }, { "epoch": 64.55, "learning_rate": 1.7739078709707298e-05, "loss": 1.9107, "step": 22298500 }, { "epoch": 64.55, "learning_rate": 1.7738355062060023e-05, "loss": 1.9007, "step": 22299000 }, { "epoch": 64.55, "learning_rate": 1.773763141441275e-05, "loss": 1.9102, "step": 22299500 }, { "epoch": 64.55, "learning_rate": 1.773690776676547e-05, "loss": 1.9463, "step": 22300000 }, { "epoch": 64.55, "learning_rate": 1.7736184119118193e-05, "loss": 1.9038, "step": 22300500 }, { "epoch": 64.55, "learning_rate": 1.7735460471470916e-05, "loss": 1.9115, "step": 22301000 }, { "epoch": 64.55, "learning_rate": 1.7734736823823638e-05, "loss": 1.906, "step": 22301500 }, { "epoch": 64.56, "learning_rate": 1.7734014623471657e-05, "loss": 1.9173, "step": 22302000 }, { "epoch": 64.56, "learning_rate": 1.773329097582438e-05, "loss": 1.9293, "step": 22302500 }, { "epoch": 64.56, "learning_rate": 1.7732567328177105e-05, "loss": 1.9353, "step": 22303000 }, { "epoch": 64.56, "learning_rate": 1.7731843680529827e-05, "loss": 1.9094, "step": 22303500 }, { "epoch": 64.56, "learning_rate": 1.773112003288255e-05, "loss": 1.9266, "step": 22304000 }, { "epoch": 64.56, "learning_rate": 1.7730397832530568e-05, "loss": 1.9124, "step": 22304500 }, { "epoch": 64.56, "learning_rate": 1.772967418488329e-05, "loss": 1.9235, "step": 22305000 }, { "epoch": 64.57, "learning_rate": 1.7728950537236013e-05, "loss": 1.8882, "step": 22305500 }, { "epoch": 64.57, "learning_rate": 1.7728226889588735e-05, "loss": 1.9412, "step": 22306000 }, { "epoch": 64.57, "learning_rate": 1.772750324194146e-05, "loss": 1.9066, "step": 22306500 }, { "epoch": 64.57, "learning_rate": 1.7726782488884773e-05, "loss": 1.9319, "step": 22307000 }, { "epoch": 64.57, "learning_rate": 1.77260588412375e-05, "loss": 1.896, "step": 22307500 }, { "epoch": 64.57, "learning_rate": 1.772533519359022e-05, "loss": 1.9142, "step": 22308000 }, { "epoch": 64.57, "learning_rate": 1.7724611545942943e-05, "loss": 1.9134, "step": 22308500 }, { "epoch": 64.58, "learning_rate": 1.7723887898295665e-05, "loss": 1.9286, "step": 22309000 }, { "epoch": 64.58, "learning_rate": 1.7723164250648387e-05, "loss": 1.9342, "step": 22309500 }, { "epoch": 64.58, "learning_rate": 1.7722440603001113e-05, "loss": 1.9171, "step": 22310000 }, { "epoch": 64.58, "learning_rate": 1.7721716955353835e-05, "loss": 1.9205, "step": 22310500 }, { "epoch": 64.58, "learning_rate": 1.7720994755001854e-05, "loss": 1.9098, "step": 22311000 }, { "epoch": 64.58, "learning_rate": 1.7720271107354576e-05, "loss": 1.9137, "step": 22311500 }, { "epoch": 64.58, "learning_rate": 1.7719548907002595e-05, "loss": 1.9368, "step": 22312000 }, { "epoch": 64.59, "learning_rate": 1.7718825259355318e-05, "loss": 1.9263, "step": 22312500 }, { "epoch": 64.59, "learning_rate": 1.771810161170804e-05, "loss": 1.9198, "step": 22313000 }, { "epoch": 64.59, "learning_rate": 1.7717377964060762e-05, "loss": 1.9379, "step": 22313500 }, { "epoch": 64.59, "learning_rate": 1.7716654316413488e-05, "loss": 1.9438, "step": 22314000 }, { "epoch": 64.59, "learning_rate": 1.7715930668766213e-05, "loss": 1.9344, "step": 22314500 }, { "epoch": 64.59, "learning_rate": 1.771520846841423e-05, "loss": 1.8849, "step": 22315000 }, { "epoch": 64.59, "learning_rate": 1.771448482076695e-05, "loss": 1.9295, "step": 22315500 }, { "epoch": 64.6, "learning_rate": 1.7713761173119677e-05, "loss": 1.9328, "step": 22316000 }, { "epoch": 64.6, "learning_rate": 1.77130375254724e-05, "loss": 1.9068, "step": 22316500 }, { "epoch": 64.6, "learning_rate": 1.7712315325120414e-05, "loss": 1.9138, "step": 22317000 }, { "epoch": 64.6, "learning_rate": 1.7711591677473137e-05, "loss": 1.9355, "step": 22317500 }, { "epoch": 64.6, "learning_rate": 1.7710868029825862e-05, "loss": 1.9295, "step": 22318000 }, { "epoch": 64.6, "learning_rate": 1.7710145829473878e-05, "loss": 1.9311, "step": 22318500 }, { "epoch": 64.6, "learning_rate": 1.7709422181826603e-05, "loss": 1.9247, "step": 22319000 }, { "epoch": 64.61, "learning_rate": 1.7708698534179326e-05, "loss": 1.9528, "step": 22319500 }, { "epoch": 64.61, "learning_rate": 1.770797488653205e-05, "loss": 1.9019, "step": 22320000 }, { "epoch": 64.61, "learning_rate": 1.7707251238884774e-05, "loss": 1.9186, "step": 22320500 }, { "epoch": 64.61, "learning_rate": 1.7706527591237496e-05, "loss": 1.9237, "step": 22321000 }, { "epoch": 64.61, "learning_rate": 1.7705803943590218e-05, "loss": 1.9097, "step": 22321500 }, { "epoch": 64.61, "learning_rate": 1.7705080295942944e-05, "loss": 1.9345, "step": 22322000 }, { "epoch": 64.61, "learning_rate": 1.7704358095590963e-05, "loss": 1.901, "step": 22322500 }, { "epoch": 64.62, "learning_rate": 1.7703634447943685e-05, "loss": 1.9358, "step": 22323000 }, { "epoch": 64.62, "learning_rate": 1.7702910800296407e-05, "loss": 1.9041, "step": 22323500 }, { "epoch": 64.62, "learning_rate": 1.770218715264913e-05, "loss": 1.916, "step": 22324000 }, { "epoch": 64.62, "learning_rate": 1.770146350500185e-05, "loss": 1.9302, "step": 22324500 }, { "epoch": 64.62, "learning_rate": 1.7700739857354577e-05, "loss": 1.913, "step": 22325000 }, { "epoch": 64.62, "learning_rate": 1.77000162097073e-05, "loss": 1.9286, "step": 22325500 }, { "epoch": 64.62, "learning_rate": 1.7699292562060025e-05, "loss": 1.9367, "step": 22326000 }, { "epoch": 64.63, "learning_rate": 1.7698568914412747e-05, "loss": 1.9513, "step": 22326500 }, { "epoch": 64.63, "learning_rate": 1.769784526676547e-05, "loss": 1.9471, "step": 22327000 }, { "epoch": 64.63, "learning_rate": 1.769712306641349e-05, "loss": 1.9195, "step": 22327500 }, { "epoch": 64.63, "learning_rate": 1.769639941876621e-05, "loss": 1.9008, "step": 22328000 }, { "epoch": 64.63, "learning_rate": 1.7695675771118933e-05, "loss": 1.9251, "step": 22328500 }, { "epoch": 64.63, "learning_rate": 1.7694953570766952e-05, "loss": 1.9208, "step": 22329000 }, { "epoch": 64.63, "learning_rate": 1.7694229923119674e-05, "loss": 1.9186, "step": 22329500 }, { "epoch": 64.64, "learning_rate": 1.76935062754724e-05, "loss": 1.9062, "step": 22330000 }, { "epoch": 64.64, "learning_rate": 1.7692782627825122e-05, "loss": 1.9265, "step": 22330500 }, { "epoch": 64.64, "learning_rate": 1.7692058980177844e-05, "loss": 1.9093, "step": 22331000 }, { "epoch": 64.64, "learning_rate": 1.7691335332530566e-05, "loss": 1.9089, "step": 22331500 }, { "epoch": 64.64, "learning_rate": 1.7690613132178585e-05, "loss": 1.887, "step": 22332000 }, { "epoch": 64.64, "learning_rate": 1.7689889484531308e-05, "loss": 1.9311, "step": 22332500 }, { "epoch": 64.64, "learning_rate": 1.768916583688403e-05, "loss": 1.9074, "step": 22333000 }, { "epoch": 64.65, "learning_rate": 1.7688442189236755e-05, "loss": 1.9007, "step": 22333500 }, { "epoch": 64.65, "learning_rate": 1.7687718541589478e-05, "loss": 1.9088, "step": 22334000 }, { "epoch": 64.65, "learning_rate": 1.7686994893942203e-05, "loss": 1.9232, "step": 22334500 }, { "epoch": 64.65, "learning_rate": 1.768627269359022e-05, "loss": 1.9269, "step": 22335000 }, { "epoch": 64.65, "learning_rate": 1.768554904594294e-05, "loss": 1.935, "step": 22335500 }, { "epoch": 64.65, "learning_rate": 1.7684825398295667e-05, "loss": 1.9212, "step": 22336000 }, { "epoch": 64.66, "learning_rate": 1.768410175064839e-05, "loss": 1.919, "step": 22336500 }, { "epoch": 64.66, "learning_rate": 1.7683378103001115e-05, "loss": 1.9001, "step": 22337000 }, { "epoch": 64.66, "learning_rate": 1.768265590264913e-05, "loss": 1.9181, "step": 22337500 }, { "epoch": 64.66, "learning_rate": 1.768193370229715e-05, "loss": 1.9051, "step": 22338000 }, { "epoch": 64.66, "learning_rate": 1.768121005464987e-05, "loss": 1.915, "step": 22338500 }, { "epoch": 64.66, "learning_rate": 1.768048785429789e-05, "loss": 1.9212, "step": 22339000 }, { "epoch": 64.66, "learning_rate": 1.7679764206650612e-05, "loss": 1.9274, "step": 22339500 }, { "epoch": 64.67, "learning_rate": 1.7679040559003335e-05, "loss": 1.9414, "step": 22340000 }, { "epoch": 64.67, "learning_rate": 1.7678316911356057e-05, "loss": 1.9139, "step": 22340500 }, { "epoch": 64.67, "learning_rate": 1.7677593263708783e-05, "loss": 1.9009, "step": 22341000 }, { "epoch": 64.67, "learning_rate": 1.7676869616061505e-05, "loss": 1.918, "step": 22341500 }, { "epoch": 64.67, "learning_rate": 1.767614596841423e-05, "loss": 1.9036, "step": 22342000 }, { "epoch": 64.67, "learning_rate": 1.7675422320766953e-05, "loss": 1.9302, "step": 22342500 }, { "epoch": 64.67, "learning_rate": 1.7674698673119675e-05, "loss": 1.9538, "step": 22343000 }, { "epoch": 64.68, "learning_rate": 1.7673975025472397e-05, "loss": 1.9069, "step": 22343500 }, { "epoch": 64.68, "learning_rate": 1.767325137782512e-05, "loss": 1.9189, "step": 22344000 }, { "epoch": 64.68, "learning_rate": 1.7672527730177845e-05, "loss": 1.9282, "step": 22344500 }, { "epoch": 64.68, "learning_rate": 1.7671804082530567e-05, "loss": 1.9174, "step": 22345000 }, { "epoch": 64.68, "learning_rate": 1.7671080434883293e-05, "loss": 1.9086, "step": 22345500 }, { "epoch": 64.68, "learning_rate": 1.7670356787236015e-05, "loss": 1.9172, "step": 22346000 }, { "epoch": 64.68, "learning_rate": 1.7669633139588737e-05, "loss": 1.9076, "step": 22346500 }, { "epoch": 64.69, "learning_rate": 1.766890949194146e-05, "loss": 1.9137, "step": 22347000 }, { "epoch": 64.69, "learning_rate": 1.7668185844294182e-05, "loss": 1.9082, "step": 22347500 }, { "epoch": 64.69, "learning_rate": 1.7667462196646907e-05, "loss": 1.9384, "step": 22348000 }, { "epoch": 64.69, "learning_rate": 1.766673854899963e-05, "loss": 1.9371, "step": 22348500 }, { "epoch": 64.69, "learning_rate": 1.7666014901352355e-05, "loss": 1.9194, "step": 22349000 }, { "epoch": 64.69, "learning_rate": 1.766529270100037e-05, "loss": 1.9252, "step": 22349500 }, { "epoch": 64.69, "learning_rate": 1.7664569053353093e-05, "loss": 1.9171, "step": 22350000 }, { "epoch": 64.7, "learning_rate": 1.766384540570582e-05, "loss": 1.92, "step": 22350500 }, { "epoch": 64.7, "learning_rate": 1.7663123205353834e-05, "loss": 1.9314, "step": 22351000 }, { "epoch": 64.7, "learning_rate": 1.7662399557706556e-05, "loss": 1.9265, "step": 22351500 }, { "epoch": 64.7, "learning_rate": 1.7661675910059282e-05, "loss": 1.9191, "step": 22352000 }, { "epoch": 64.7, "learning_rate": 1.76609537097073e-05, "loss": 1.9061, "step": 22352500 }, { "epoch": 64.7, "learning_rate": 1.7660230062060023e-05, "loss": 1.9231, "step": 22353000 }, { "epoch": 64.7, "learning_rate": 1.7659506414412745e-05, "loss": 1.9053, "step": 22353500 }, { "epoch": 64.71, "learning_rate": 1.7658782766765468e-05, "loss": 1.9137, "step": 22354000 }, { "epoch": 64.71, "learning_rate": 1.7658059119118193e-05, "loss": 1.937, "step": 22354500 }, { "epoch": 64.71, "learning_rate": 1.7657335471470916e-05, "loss": 1.9386, "step": 22355000 }, { "epoch": 64.71, "learning_rate": 1.765661327111893e-05, "loss": 1.9324, "step": 22355500 }, { "epoch": 64.71, "learning_rate": 1.7655889623471657e-05, "loss": 1.9408, "step": 22356000 }, { "epoch": 64.71, "learning_rate": 1.7655165975824382e-05, "loss": 1.9318, "step": 22356500 }, { "epoch": 64.71, "learning_rate": 1.7654442328177105e-05, "loss": 1.9269, "step": 22357000 }, { "epoch": 64.72, "learning_rate": 1.7653718680529827e-05, "loss": 1.924, "step": 22357500 }, { "epoch": 64.72, "learning_rate": 1.765299503288255e-05, "loss": 1.93, "step": 22358000 }, { "epoch": 64.72, "learning_rate": 1.765227138523527e-05, "loss": 1.9099, "step": 22358500 }, { "epoch": 64.72, "learning_rate": 1.7651547737587993e-05, "loss": 1.9373, "step": 22359000 }, { "epoch": 64.72, "learning_rate": 1.765082408994072e-05, "loss": 1.9433, "step": 22359500 }, { "epoch": 64.72, "learning_rate": 1.7650100442293445e-05, "loss": 1.9277, "step": 22360000 }, { "epoch": 64.72, "learning_rate": 1.7649376794646167e-05, "loss": 1.9305, "step": 22360500 }, { "epoch": 64.73, "learning_rate": 1.764865314699889e-05, "loss": 1.9036, "step": 22361000 }, { "epoch": 64.73, "learning_rate": 1.764792949935161e-05, "loss": 1.913, "step": 22361500 }, { "epoch": 64.73, "learning_rate": 1.764720729899963e-05, "loss": 1.9243, "step": 22362000 }, { "epoch": 64.73, "learning_rate": 1.7646483651352353e-05, "loss": 1.9598, "step": 22362500 }, { "epoch": 64.73, "learning_rate": 1.7645760003705078e-05, "loss": 1.9412, "step": 22363000 }, { "epoch": 64.73, "learning_rate": 1.76450363560578e-05, "loss": 1.9139, "step": 22363500 }, { "epoch": 64.73, "learning_rate": 1.7644312708410523e-05, "loss": 1.9014, "step": 22364000 }, { "epoch": 64.74, "learning_rate": 1.764359050805854e-05, "loss": 1.9231, "step": 22364500 }, { "epoch": 64.74, "learning_rate": 1.7642866860411264e-05, "loss": 1.9359, "step": 22365000 }, { "epoch": 64.74, "learning_rate": 1.7642143212763986e-05, "loss": 1.9348, "step": 22365500 }, { "epoch": 64.74, "learning_rate": 1.764141956511671e-05, "loss": 1.9447, "step": 22366000 }, { "epoch": 64.74, "learning_rate": 1.7640695917469434e-05, "loss": 1.9014, "step": 22366500 }, { "epoch": 64.74, "learning_rate": 1.763997226982216e-05, "loss": 1.9386, "step": 22367000 }, { "epoch": 64.74, "learning_rate": 1.7639248622174882e-05, "loss": 1.9133, "step": 22367500 }, { "epoch": 64.75, "learning_rate": 1.7638526421822897e-05, "loss": 1.9032, "step": 22368000 }, { "epoch": 64.75, "learning_rate": 1.7637802774175623e-05, "loss": 1.905, "step": 22368500 }, { "epoch": 64.75, "learning_rate": 1.7637079126528345e-05, "loss": 1.9024, "step": 22369000 }, { "epoch": 64.75, "learning_rate": 1.763635692617636e-05, "loss": 1.9144, "step": 22369500 }, { "epoch": 64.75, "learning_rate": 1.763563472582438e-05, "loss": 1.9196, "step": 22370000 }, { "epoch": 64.75, "learning_rate": 1.7634911078177105e-05, "loss": 1.9408, "step": 22370500 }, { "epoch": 64.75, "learning_rate": 1.763418887782512e-05, "loss": 1.9048, "step": 22371000 }, { "epoch": 64.76, "learning_rate": 1.7633465230177847e-05, "loss": 1.9085, "step": 22371500 }, { "epoch": 64.76, "learning_rate": 1.763274158253057e-05, "loss": 1.9274, "step": 22372000 }, { "epoch": 64.76, "learning_rate": 1.763201793488329e-05, "loss": 1.9247, "step": 22372500 }, { "epoch": 64.76, "learning_rate": 1.7631294287236013e-05, "loss": 1.9363, "step": 22373000 }, { "epoch": 64.76, "learning_rate": 1.7630570639588735e-05, "loss": 1.9191, "step": 22373500 }, { "epoch": 64.76, "learning_rate": 1.7629846991941458e-05, "loss": 1.9287, "step": 22374000 }, { "epoch": 64.77, "learning_rate": 1.7629123344294183e-05, "loss": 1.9245, "step": 22374500 }, { "epoch": 64.77, "learning_rate": 1.762839969664691e-05, "loss": 1.9246, "step": 22375000 }, { "epoch": 64.77, "learning_rate": 1.762767604899963e-05, "loss": 1.9328, "step": 22375500 }, { "epoch": 64.77, "learning_rate": 1.7626952401352353e-05, "loss": 1.9166, "step": 22376000 }, { "epoch": 64.77, "learning_rate": 1.7626228753705076e-05, "loss": 1.9222, "step": 22376500 }, { "epoch": 64.77, "learning_rate": 1.7625505106057798e-05, "loss": 1.9164, "step": 22377000 }, { "epoch": 64.77, "learning_rate": 1.7624781458410523e-05, "loss": 1.9221, "step": 22377500 }, { "epoch": 64.78, "learning_rate": 1.762405781076325e-05, "loss": 1.921, "step": 22378000 }, { "epoch": 64.78, "learning_rate": 1.7623335610411265e-05, "loss": 1.9018, "step": 22378500 }, { "epoch": 64.78, "learning_rate": 1.7622611962763987e-05, "loss": 1.8847, "step": 22379000 }, { "epoch": 64.78, "learning_rate": 1.762188831511671e-05, "loss": 1.9031, "step": 22379500 }, { "epoch": 64.78, "learning_rate": 1.7621164667469435e-05, "loss": 1.9015, "step": 22380000 }, { "epoch": 64.78, "learning_rate": 1.7620441019822157e-05, "loss": 1.9092, "step": 22380500 }, { "epoch": 64.78, "learning_rate": 1.761971737217488e-05, "loss": 1.91, "step": 22381000 }, { "epoch": 64.79, "learning_rate": 1.7618993724527605e-05, "loss": 1.9148, "step": 22381500 }, { "epoch": 64.79, "learning_rate": 1.7618270076880327e-05, "loss": 1.892, "step": 22382000 }, { "epoch": 64.79, "learning_rate": 1.761754642923305e-05, "loss": 1.9419, "step": 22382500 }, { "epoch": 64.79, "learning_rate": 1.7616822781585775e-05, "loss": 1.9339, "step": 22383000 }, { "epoch": 64.79, "learning_rate": 1.7616099133938497e-05, "loss": 1.927, "step": 22383500 }, { "epoch": 64.79, "learning_rate": 1.7615376933586513e-05, "loss": 1.9393, "step": 22384000 }, { "epoch": 64.79, "learning_rate": 1.7614653285939235e-05, "loss": 1.9357, "step": 22384500 }, { "epoch": 64.8, "learning_rate": 1.7613931085587254e-05, "loss": 1.9109, "step": 22385000 }, { "epoch": 64.8, "learning_rate": 1.761320743793998e-05, "loss": 1.9213, "step": 22385500 }, { "epoch": 64.8, "learning_rate": 1.7612483790292702e-05, "loss": 1.955, "step": 22386000 }, { "epoch": 64.8, "learning_rate": 1.7611760142645424e-05, "loss": 1.9166, "step": 22386500 }, { "epoch": 64.8, "learning_rate": 1.761103649499815e-05, "loss": 1.9265, "step": 22387000 }, { "epoch": 64.8, "learning_rate": 1.7610314294646165e-05, "loss": 1.9451, "step": 22387500 }, { "epoch": 64.8, "learning_rate": 1.7609592094294184e-05, "loss": 1.9185, "step": 22388000 }, { "epoch": 64.81, "learning_rate": 1.7608868446646906e-05, "loss": 1.9145, "step": 22388500 }, { "epoch": 64.81, "learning_rate": 1.760814479899963e-05, "loss": 1.9252, "step": 22389000 }, { "epoch": 64.81, "learning_rate": 1.7607421151352354e-05, "loss": 1.9659, "step": 22389500 }, { "epoch": 64.81, "learning_rate": 1.7606697503705076e-05, "loss": 1.9217, "step": 22390000 }, { "epoch": 64.81, "learning_rate": 1.76059738560578e-05, "loss": 1.8844, "step": 22390500 }, { "epoch": 64.81, "learning_rate": 1.760525310300111e-05, "loss": 1.9108, "step": 22391000 }, { "epoch": 64.81, "learning_rate": 1.7604529455353837e-05, "loss": 1.928, "step": 22391500 }, { "epoch": 64.82, "learning_rate": 1.760380580770656e-05, "loss": 1.939, "step": 22392000 }, { "epoch": 64.82, "learning_rate": 1.760308216005928e-05, "loss": 1.9231, "step": 22392500 }, { "epoch": 64.82, "learning_rate": 1.7602358512412007e-05, "loss": 1.9178, "step": 22393000 }, { "epoch": 64.82, "learning_rate": 1.7601636312060026e-05, "loss": 1.9539, "step": 22393500 }, { "epoch": 64.82, "learning_rate": 1.7600912664412748e-05, "loss": 1.9136, "step": 22394000 }, { "epoch": 64.82, "learning_rate": 1.760018901676547e-05, "loss": 1.9351, "step": 22394500 }, { "epoch": 64.82, "learning_rate": 1.7599465369118192e-05, "loss": 1.947, "step": 22395000 }, { "epoch": 64.83, "learning_rate": 1.7598741721470915e-05, "loss": 1.9197, "step": 22395500 }, { "epoch": 64.83, "learning_rate": 1.7598018073823637e-05, "loss": 1.9186, "step": 22396000 }, { "epoch": 64.83, "learning_rate": 1.7597294426176362e-05, "loss": 1.9074, "step": 22396500 }, { "epoch": 64.83, "learning_rate": 1.7596570778529088e-05, "loss": 1.9424, "step": 22397000 }, { "epoch": 64.83, "learning_rate": 1.7595848578177104e-05, "loss": 1.9156, "step": 22397500 }, { "epoch": 64.83, "learning_rate": 1.7595124930529826e-05, "loss": 1.9089, "step": 22398000 }, { "epoch": 64.83, "learning_rate": 1.759440128288255e-05, "loss": 1.9176, "step": 22398500 }, { "epoch": 64.84, "learning_rate": 1.7593677635235274e-05, "loss": 1.9306, "step": 22399000 }, { "epoch": 64.84, "learning_rate": 1.7592953987587996e-05, "loss": 1.9325, "step": 22399500 }, { "epoch": 64.84, "learning_rate": 1.7592230339940718e-05, "loss": 1.9322, "step": 22400000 }, { "epoch": 64.84, "learning_rate": 1.7591506692293444e-05, "loss": 1.9208, "step": 22400500 }, { "epoch": 64.84, "learning_rate": 1.7590783044646166e-05, "loss": 1.917, "step": 22401000 }, { "epoch": 64.84, "learning_rate": 1.7590059396998888e-05, "loss": 1.9029, "step": 22401500 }, { "epoch": 64.84, "learning_rate": 1.7589335749351614e-05, "loss": 1.9248, "step": 22402000 }, { "epoch": 64.85, "learning_rate": 1.7588612101704336e-05, "loss": 1.8943, "step": 22402500 }, { "epoch": 64.85, "learning_rate": 1.7587888454057058e-05, "loss": 1.9367, "step": 22403000 }, { "epoch": 64.85, "learning_rate": 1.758716480640978e-05, "loss": 1.8986, "step": 22403500 }, { "epoch": 64.85, "learning_rate": 1.7586441158762506e-05, "loss": 1.9057, "step": 22404000 }, { "epoch": 64.85, "learning_rate": 1.758571751111523e-05, "loss": 1.9147, "step": 22404500 }, { "epoch": 64.85, "learning_rate": 1.758499386346795e-05, "loss": 1.9251, "step": 22405000 }, { "epoch": 64.85, "learning_rate": 1.7584270215820676e-05, "loss": 1.9369, "step": 22405500 }, { "epoch": 64.86, "learning_rate": 1.75835465681734e-05, "loss": 1.9276, "step": 22406000 }, { "epoch": 64.86, "learning_rate": 1.758282581511671e-05, "loss": 1.9167, "step": 22406500 }, { "epoch": 64.86, "learning_rate": 1.7582102167469433e-05, "loss": 1.9275, "step": 22407000 }, { "epoch": 64.86, "learning_rate": 1.7581379967117452e-05, "loss": 1.9447, "step": 22407500 }, { "epoch": 64.86, "learning_rate": 1.7580656319470178e-05, "loss": 1.9465, "step": 22408000 }, { "epoch": 64.86, "learning_rate": 1.75799326718229e-05, "loss": 1.9419, "step": 22408500 }, { "epoch": 64.86, "learning_rate": 1.7579209024175622e-05, "loss": 1.9326, "step": 22409000 }, { "epoch": 64.87, "learning_rate": 1.7578485376528344e-05, "loss": 1.9322, "step": 22409500 }, { "epoch": 64.87, "learning_rate": 1.7577761728881066e-05, "loss": 1.9015, "step": 22410000 }, { "epoch": 64.87, "learning_rate": 1.757703808123379e-05, "loss": 1.912, "step": 22410500 }, { "epoch": 64.87, "learning_rate": 1.7576314433586514e-05, "loss": 1.91, "step": 22411000 }, { "epoch": 64.87, "learning_rate": 1.757559078593924e-05, "loss": 1.9192, "step": 22411500 }, { "epoch": 64.87, "learning_rate": 1.7574867138291962e-05, "loss": 1.9304, "step": 22412000 }, { "epoch": 64.88, "learning_rate": 1.7574143490644684e-05, "loss": 1.9628, "step": 22412500 }, { "epoch": 64.88, "learning_rate": 1.7573421290292703e-05, "loss": 1.9531, "step": 22413000 }, { "epoch": 64.88, "learning_rate": 1.7572697642645426e-05, "loss": 1.9329, "step": 22413500 }, { "epoch": 64.88, "learning_rate": 1.7571973994998148e-05, "loss": 1.9207, "step": 22414000 }, { "epoch": 64.88, "learning_rate": 1.757125034735087e-05, "loss": 1.9323, "step": 22414500 }, { "epoch": 64.88, "learning_rate": 1.7570526699703596e-05, "loss": 1.91, "step": 22415000 }, { "epoch": 64.88, "learning_rate": 1.7569804499351615e-05, "loss": 1.9098, "step": 22415500 }, { "epoch": 64.89, "learning_rate": 1.7569080851704337e-05, "loss": 1.9181, "step": 22416000 }, { "epoch": 64.89, "learning_rate": 1.756835720405706e-05, "loss": 1.8959, "step": 22416500 }, { "epoch": 64.89, "learning_rate": 1.756763355640978e-05, "loss": 1.9218, "step": 22417000 }, { "epoch": 64.89, "learning_rate": 1.7566909908762504e-05, "loss": 1.8973, "step": 22417500 }, { "epoch": 64.89, "learning_rate": 1.756618626111523e-05, "loss": 1.9003, "step": 22418000 }, { "epoch": 64.89, "learning_rate": 1.7565464060763245e-05, "loss": 1.9281, "step": 22418500 }, { "epoch": 64.89, "learning_rate": 1.756474041311597e-05, "loss": 1.9317, "step": 22419000 }, { "epoch": 64.9, "learning_rate": 1.7564016765468693e-05, "loss": 1.9061, "step": 22419500 }, { "epoch": 64.9, "learning_rate": 1.7563293117821418e-05, "loss": 1.9366, "step": 22420000 }, { "epoch": 64.9, "learning_rate": 1.756256947017414e-05, "loss": 1.9021, "step": 22420500 }, { "epoch": 64.9, "learning_rate": 1.7561845822526863e-05, "loss": 1.9326, "step": 22421000 }, { "epoch": 64.9, "learning_rate": 1.7561122174879585e-05, "loss": 1.9123, "step": 22421500 }, { "epoch": 64.9, "learning_rate": 1.7560399974527604e-05, "loss": 1.9093, "step": 22422000 }, { "epoch": 64.9, "learning_rate": 1.755967632688033e-05, "loss": 1.9196, "step": 22422500 }, { "epoch": 64.91, "learning_rate": 1.7558952679233052e-05, "loss": 1.9071, "step": 22423000 }, { "epoch": 64.91, "learning_rate": 1.7558229031585774e-05, "loss": 1.9105, "step": 22423500 }, { "epoch": 64.91, "learning_rate": 1.7557505383938496e-05, "loss": 1.9002, "step": 22424000 }, { "epoch": 64.91, "learning_rate": 1.7556783183586515e-05, "loss": 1.9415, "step": 22424500 }, { "epoch": 64.91, "learning_rate": 1.7556059535939237e-05, "loss": 1.9347, "step": 22425000 }, { "epoch": 64.91, "learning_rate": 1.755533588829196e-05, "loss": 1.9498, "step": 22425500 }, { "epoch": 64.91, "learning_rate": 1.7554612240644682e-05, "loss": 1.9084, "step": 22426000 }, { "epoch": 64.92, "learning_rate": 1.7553888592997407e-05, "loss": 1.926, "step": 22426500 }, { "epoch": 64.92, "learning_rate": 1.755316494535013e-05, "loss": 1.9101, "step": 22427000 }, { "epoch": 64.92, "learning_rate": 1.755244274499815e-05, "loss": 1.9127, "step": 22427500 }, { "epoch": 64.92, "learning_rate": 1.755171909735087e-05, "loss": 1.9231, "step": 22428000 }, { "epoch": 64.92, "learning_rate": 1.7550995449703593e-05, "loss": 1.9062, "step": 22428500 }, { "epoch": 64.92, "learning_rate": 1.755027180205632e-05, "loss": 1.9145, "step": 22429000 }, { "epoch": 64.92, "learning_rate": 1.754954815440904e-05, "loss": 1.9306, "step": 22429500 }, { "epoch": 64.93, "learning_rate": 1.7548824506761767e-05, "loss": 1.9185, "step": 22430000 }, { "epoch": 64.93, "learning_rate": 1.754810085911449e-05, "loss": 1.9329, "step": 22430500 }, { "epoch": 64.93, "learning_rate": 1.754737721146721e-05, "loss": 1.9149, "step": 22431000 }, { "epoch": 64.93, "learning_rate": 1.7546656458410523e-05, "loss": 1.922, "step": 22431500 }, { "epoch": 64.93, "learning_rate": 1.7545932810763246e-05, "loss": 1.9123, "step": 22432000 }, { "epoch": 64.93, "learning_rate": 1.7545209163115968e-05, "loss": 1.9297, "step": 22432500 }, { "epoch": 64.93, "learning_rate": 1.7544485515468693e-05, "loss": 1.9358, "step": 22433000 }, { "epoch": 64.94, "learning_rate": 1.7543761867821416e-05, "loss": 1.9162, "step": 22433500 }, { "epoch": 64.94, "learning_rate": 1.754303822017414e-05, "loss": 1.9264, "step": 22434000 }, { "epoch": 64.94, "learning_rate": 1.7542314572526863e-05, "loss": 1.9243, "step": 22434500 }, { "epoch": 64.94, "learning_rate": 1.7541592372174882e-05, "loss": 1.9132, "step": 22435000 }, { "epoch": 64.94, "learning_rate": 1.7540868724527605e-05, "loss": 1.9079, "step": 22435500 }, { "epoch": 64.94, "learning_rate": 1.754014652417562e-05, "loss": 1.9063, "step": 22436000 }, { "epoch": 64.94, "learning_rate": 1.7539422876528346e-05, "loss": 1.9557, "step": 22436500 }, { "epoch": 64.95, "learning_rate": 1.7538699228881068e-05, "loss": 1.9229, "step": 22437000 }, { "epoch": 64.95, "learning_rate": 1.7537977028529084e-05, "loss": 1.9162, "step": 22437500 }, { "epoch": 64.95, "learning_rate": 1.753725338088181e-05, "loss": 1.9362, "step": 22438000 }, { "epoch": 64.95, "learning_rate": 1.753652973323453e-05, "loss": 1.9176, "step": 22438500 }, { "epoch": 64.95, "learning_rate": 1.753580753288255e-05, "loss": 1.9155, "step": 22439000 }, { "epoch": 64.95, "learning_rate": 1.7535083885235273e-05, "loss": 1.903, "step": 22439500 }, { "epoch": 64.95, "learning_rate": 1.7534360237587995e-05, "loss": 1.9169, "step": 22440000 }, { "epoch": 64.96, "learning_rate": 1.753363658994072e-05, "loss": 1.9164, "step": 22440500 }, { "epoch": 64.96, "learning_rate": 1.7532912942293443e-05, "loss": 1.9021, "step": 22441000 }, { "epoch": 64.96, "learning_rate": 1.753218929464617e-05, "loss": 1.9262, "step": 22441500 }, { "epoch": 64.96, "learning_rate": 1.753146564699889e-05, "loss": 1.9182, "step": 22442000 }, { "epoch": 64.96, "learning_rate": 1.7530741999351613e-05, "loss": 1.9242, "step": 22442500 }, { "epoch": 64.96, "learning_rate": 1.7530018351704335e-05, "loss": 1.9159, "step": 22443000 }, { "epoch": 64.96, "learning_rate": 1.7529296151352354e-05, "loss": 1.9273, "step": 22443500 }, { "epoch": 64.97, "learning_rate": 1.7528572503705076e-05, "loss": 1.9292, "step": 22444000 }, { "epoch": 64.97, "learning_rate": 1.75278488560578e-05, "loss": 1.9283, "step": 22444500 }, { "epoch": 64.97, "learning_rate": 1.752712520841052e-05, "loss": 1.9017, "step": 22445000 }, { "epoch": 64.97, "learning_rate": 1.7526401560763246e-05, "loss": 1.932, "step": 22445500 }, { "epoch": 64.97, "learning_rate": 1.7525677913115972e-05, "loss": 1.932, "step": 22446000 }, { "epoch": 64.97, "learning_rate": 1.7524954265468694e-05, "loss": 1.9097, "step": 22446500 }, { "epoch": 64.97, "learning_rate": 1.7524230617821416e-05, "loss": 1.9306, "step": 22447000 }, { "epoch": 64.98, "learning_rate": 1.752350697017414e-05, "loss": 1.9141, "step": 22447500 }, { "epoch": 64.98, "learning_rate": 1.7522784769822158e-05, "loss": 1.9231, "step": 22448000 }, { "epoch": 64.98, "learning_rate": 1.752206112217488e-05, "loss": 1.9305, "step": 22448500 }, { "epoch": 64.98, "learning_rate": 1.7521337474527605e-05, "loss": 1.9331, "step": 22449000 }, { "epoch": 64.98, "learning_rate": 1.7520613826880328e-05, "loss": 1.9324, "step": 22449500 }, { "epoch": 64.98, "learning_rate": 1.751989017923305e-05, "loss": 1.9508, "step": 22450000 }, { "epoch": 64.99, "learning_rate": 1.7519166531585772e-05, "loss": 1.9108, "step": 22450500 }, { "epoch": 64.99, "learning_rate": 1.7518442883938498e-05, "loss": 1.9267, "step": 22451000 }, { "epoch": 64.99, "learning_rate": 1.751771923629122e-05, "loss": 1.9094, "step": 22451500 }, { "epoch": 64.99, "learning_rate": 1.7516995588643942e-05, "loss": 1.9571, "step": 22452000 }, { "epoch": 64.99, "learning_rate": 1.7516271940996668e-05, "loss": 1.948, "step": 22452500 }, { "epoch": 64.99, "learning_rate": 1.751554829334939e-05, "loss": 1.9435, "step": 22453000 }, { "epoch": 64.99, "learning_rate": 1.751482609299741e-05, "loss": 1.9315, "step": 22453500 }, { "epoch": 65.0, "learning_rate": 1.751410244535013e-05, "loss": 1.9407, "step": 22454000 }, { "epoch": 65.0, "learning_rate": 1.7513378797702853e-05, "loss": 1.9433, "step": 22454500 }, { "epoch": 65.0, "learning_rate": 1.7512655150055576e-05, "loss": 1.9325, "step": 22455000 }, { "epoch": 65.0, "learning_rate": 1.7511931502408298e-05, "loss": 1.9389, "step": 22455500 }, { "epoch": 65.0, "eval_accuracy": 0.6798080889451371, "eval_accuracy_mlm": 0.6471297852570015, "eval_accuracy_nsp": 0.8551695058961561, "eval_loss": 2.1693429946899414, "eval_runtime": 331.553, "eval_samples_per_second": 1316.188, "eval_steps_per_second": 54.842, "step": 22455680 }, { "epoch": 65.0, "learning_rate": 1.7511209302056317e-05, "loss": 1.9085, "step": 22456000 }, { "epoch": 65.0, "learning_rate": 1.7510485654409043e-05, "loss": 1.9196, "step": 22456500 }, { "epoch": 65.0, "learning_rate": 1.7509762006761765e-05, "loss": 1.8969, "step": 22457000 }, { "epoch": 65.01, "learning_rate": 1.7509038359114487e-05, "loss": 1.9214, "step": 22457500 }, { "epoch": 65.01, "learning_rate": 1.7508316158762506e-05, "loss": 1.8959, "step": 22458000 }, { "epoch": 65.01, "learning_rate": 1.7507592511115228e-05, "loss": 1.8919, "step": 22458500 }, { "epoch": 65.01, "learning_rate": 1.750686886346795e-05, "loss": 1.9018, "step": 22459000 }, { "epoch": 65.01, "learning_rate": 1.7506145215820673e-05, "loss": 1.9152, "step": 22459500 }, { "epoch": 65.01, "learning_rate": 1.750542301546869e-05, "loss": 1.9251, "step": 22460000 }, { "epoch": 65.01, "learning_rate": 1.7504699367821417e-05, "loss": 1.889, "step": 22460500 }, { "epoch": 65.02, "learning_rate": 1.750397572017414e-05, "loss": 1.8992, "step": 22461000 }, { "epoch": 65.02, "learning_rate": 1.750325351982216e-05, "loss": 1.8921, "step": 22461500 }, { "epoch": 65.02, "learning_rate": 1.750252987217488e-05, "loss": 1.8975, "step": 22462000 }, { "epoch": 65.02, "learning_rate": 1.7501806224527603e-05, "loss": 1.9204, "step": 22462500 }, { "epoch": 65.02, "learning_rate": 1.7501084024175622e-05, "loss": 1.8972, "step": 22463000 }, { "epoch": 65.02, "learning_rate": 1.7500360376528344e-05, "loss": 1.9183, "step": 22463500 }, { "epoch": 65.02, "learning_rate": 1.749963672888107e-05, "loss": 1.917, "step": 22464000 }, { "epoch": 65.03, "learning_rate": 1.7498913081233792e-05, "loss": 1.9089, "step": 22464500 }, { "epoch": 65.03, "learning_rate": 1.7498189433586514e-05, "loss": 1.9056, "step": 22465000 }, { "epoch": 65.03, "learning_rate": 1.7497465785939236e-05, "loss": 1.9043, "step": 22465500 }, { "epoch": 65.03, "learning_rate": 1.7496742138291962e-05, "loss": 1.9065, "step": 22466000 }, { "epoch": 65.03, "learning_rate": 1.7496018490644684e-05, "loss": 1.9143, "step": 22466500 }, { "epoch": 65.03, "learning_rate": 1.7495294842997406e-05, "loss": 1.9086, "step": 22467000 }, { "epoch": 65.03, "learning_rate": 1.7494571195350132e-05, "loss": 1.9204, "step": 22467500 }, { "epoch": 65.04, "learning_rate": 1.7493847547702854e-05, "loss": 1.9085, "step": 22468000 }, { "epoch": 65.04, "learning_rate": 1.7493123900055577e-05, "loss": 1.9173, "step": 22468500 }, { "epoch": 65.04, "learning_rate": 1.74924002524083e-05, "loss": 1.9174, "step": 22469000 }, { "epoch": 65.04, "learning_rate": 1.7491678052056318e-05, "loss": 1.9147, "step": 22469500 }, { "epoch": 65.04, "learning_rate": 1.749095440440904e-05, "loss": 1.9155, "step": 22470000 }, { "epoch": 65.04, "learning_rate": 1.7490230756761762e-05, "loss": 1.896, "step": 22470500 }, { "epoch": 65.04, "learning_rate": 1.7489507109114488e-05, "loss": 1.9281, "step": 22471000 }, { "epoch": 65.05, "learning_rate": 1.7488783461467213e-05, "loss": 1.9195, "step": 22471500 }, { "epoch": 65.05, "learning_rate": 1.7488059813819936e-05, "loss": 1.8784, "step": 22472000 }, { "epoch": 65.05, "learning_rate": 1.748733761346795e-05, "loss": 1.9153, "step": 22472500 }, { "epoch": 65.05, "learning_rate": 1.7486613965820673e-05, "loss": 1.9101, "step": 22473000 }, { "epoch": 65.05, "learning_rate": 1.74858903181734e-05, "loss": 1.9099, "step": 22473500 }, { "epoch": 65.05, "learning_rate": 1.748516667052612e-05, "loss": 1.891, "step": 22474000 }, { "epoch": 65.05, "learning_rate": 1.7484443022878844e-05, "loss": 1.9229, "step": 22474500 }, { "epoch": 65.06, "learning_rate": 1.748371937523157e-05, "loss": 1.8924, "step": 22475000 }, { "epoch": 65.06, "learning_rate": 1.7482997174879588e-05, "loss": 1.8866, "step": 22475500 }, { "epoch": 65.06, "learning_rate": 1.7482274974527604e-05, "loss": 1.9191, "step": 22476000 }, { "epoch": 65.06, "learning_rate": 1.7481551326880326e-05, "loss": 1.8985, "step": 22476500 }, { "epoch": 65.06, "learning_rate": 1.748082767923305e-05, "loss": 1.9191, "step": 22477000 }, { "epoch": 65.06, "learning_rate": 1.7480104031585774e-05, "loss": 1.912, "step": 22477500 }, { "epoch": 65.06, "learning_rate": 1.7479380383938496e-05, "loss": 1.8916, "step": 22478000 }, { "epoch": 65.07, "learning_rate": 1.7478656736291218e-05, "loss": 1.8821, "step": 22478500 }, { "epoch": 65.07, "learning_rate": 1.7477933088643944e-05, "loss": 1.9063, "step": 22479000 }, { "epoch": 65.07, "learning_rate": 1.7477209440996666e-05, "loss": 1.914, "step": 22479500 }, { "epoch": 65.07, "learning_rate": 1.7476485793349388e-05, "loss": 1.9202, "step": 22480000 }, { "epoch": 65.07, "learning_rate": 1.7475763592997407e-05, "loss": 1.9168, "step": 22480500 }, { "epoch": 65.07, "learning_rate": 1.747503994535013e-05, "loss": 1.923, "step": 22481000 }, { "epoch": 65.07, "learning_rate": 1.7474316297702852e-05, "loss": 1.906, "step": 22481500 }, { "epoch": 65.08, "learning_rate": 1.747359409735087e-05, "loss": 1.917, "step": 22482000 }, { "epoch": 65.08, "learning_rate": 1.7472870449703596e-05, "loss": 1.9086, "step": 22482500 }, { "epoch": 65.08, "learning_rate": 1.747214680205632e-05, "loss": 1.9056, "step": 22483000 }, { "epoch": 65.08, "learning_rate": 1.747142315440904e-05, "loss": 1.9529, "step": 22483500 }, { "epoch": 65.08, "learning_rate": 1.7470699506761763e-05, "loss": 1.9444, "step": 22484000 }, { "epoch": 65.08, "learning_rate": 1.746997585911449e-05, "loss": 1.911, "step": 22484500 }, { "epoch": 65.08, "learning_rate": 1.746925221146721e-05, "loss": 1.9177, "step": 22485000 }, { "epoch": 65.09, "learning_rate": 1.7468528563819933e-05, "loss": 1.9189, "step": 22485500 }, { "epoch": 65.09, "learning_rate": 1.746780491617266e-05, "loss": 1.9008, "step": 22486000 }, { "epoch": 65.09, "learning_rate": 1.746708126852538e-05, "loss": 1.911, "step": 22486500 }, { "epoch": 65.09, "learning_rate": 1.74663590681734e-05, "loss": 1.9218, "step": 22487000 }, { "epoch": 65.09, "learning_rate": 1.7465635420526122e-05, "loss": 1.8757, "step": 22487500 }, { "epoch": 65.09, "learning_rate": 1.7464911772878844e-05, "loss": 1.8991, "step": 22488000 }, { "epoch": 65.1, "learning_rate": 1.7464188125231567e-05, "loss": 1.9117, "step": 22488500 }, { "epoch": 65.1, "learning_rate": 1.746346447758429e-05, "loss": 1.9237, "step": 22489000 }, { "epoch": 65.1, "learning_rate": 1.7462740829937014e-05, "loss": 1.91, "step": 22489500 }, { "epoch": 65.1, "learning_rate": 1.7462018629585033e-05, "loss": 1.9148, "step": 22490000 }, { "epoch": 65.1, "learning_rate": 1.7461294981937756e-05, "loss": 1.8957, "step": 22490500 }, { "epoch": 65.1, "learning_rate": 1.7460571334290478e-05, "loss": 1.9309, "step": 22491000 }, { "epoch": 65.1, "learning_rate": 1.7459847686643203e-05, "loss": 1.8852, "step": 22491500 }, { "epoch": 65.11, "learning_rate": 1.7459124038995926e-05, "loss": 1.9272, "step": 22492000 }, { "epoch": 65.11, "learning_rate": 1.7458400391348648e-05, "loss": 1.9216, "step": 22492500 }, { "epoch": 65.11, "learning_rate": 1.745767674370137e-05, "loss": 1.925, "step": 22493000 }, { "epoch": 65.11, "learning_rate": 1.7456953096054096e-05, "loss": 1.895, "step": 22493500 }, { "epoch": 65.11, "learning_rate": 1.7456229448406818e-05, "loss": 1.885, "step": 22494000 }, { "epoch": 65.11, "learning_rate": 1.745550580075954e-05, "loss": 1.9022, "step": 22494500 }, { "epoch": 65.11, "learning_rate": 1.745478360040756e-05, "loss": 1.9168, "step": 22495000 }, { "epoch": 65.12, "learning_rate": 1.745405995276028e-05, "loss": 1.9213, "step": 22495500 }, { "epoch": 65.12, "learning_rate": 1.7453336305113004e-05, "loss": 1.9143, "step": 22496000 }, { "epoch": 65.12, "learning_rate": 1.745261265746573e-05, "loss": 1.9225, "step": 22496500 }, { "epoch": 65.12, "learning_rate": 1.7451890457113745e-05, "loss": 1.9117, "step": 22497000 }, { "epoch": 65.12, "learning_rate": 1.7451168256761767e-05, "loss": 1.9267, "step": 22497500 }, { "epoch": 65.12, "learning_rate": 1.745044460911449e-05, "loss": 1.9311, "step": 22498000 }, { "epoch": 65.12, "learning_rate": 1.744972096146721e-05, "loss": 1.9275, "step": 22498500 }, { "epoch": 65.13, "learning_rate": 1.7448997313819934e-05, "loss": 1.9041, "step": 22499000 }, { "epoch": 65.13, "learning_rate": 1.7448273666172656e-05, "loss": 1.9036, "step": 22499500 }, { "epoch": 65.13, "learning_rate": 1.744755001852538e-05, "loss": 1.9095, "step": 22500000 }, { "epoch": 65.13, "learning_rate": 1.7446826370878104e-05, "loss": 1.8949, "step": 22500500 }, { "epoch": 65.13, "learning_rate": 1.744610272323083e-05, "loss": 1.916, "step": 22501000 }, { "epoch": 65.13, "learning_rate": 1.7445379075583552e-05, "loss": 1.9027, "step": 22501500 }, { "epoch": 65.13, "learning_rate": 1.7444655427936274e-05, "loss": 1.9189, "step": 22502000 }, { "epoch": 65.14, "learning_rate": 1.7443933227584293e-05, "loss": 1.9343, "step": 22502500 }, { "epoch": 65.14, "learning_rate": 1.744321102723231e-05, "loss": 1.9101, "step": 22503000 }, { "epoch": 65.14, "learning_rate": 1.744248737958503e-05, "loss": 1.9215, "step": 22503500 }, { "epoch": 65.14, "learning_rate": 1.7441763731937756e-05, "loss": 1.9329, "step": 22504000 }, { "epoch": 65.14, "learning_rate": 1.744104008429048e-05, "loss": 1.9037, "step": 22504500 }, { "epoch": 65.14, "learning_rate": 1.7440316436643204e-05, "loss": 1.8819, "step": 22505000 }, { "epoch": 65.14, "learning_rate": 1.7439592788995926e-05, "loss": 1.9105, "step": 22505500 }, { "epoch": 65.15, "learning_rate": 1.743886914134865e-05, "loss": 1.8881, "step": 22506000 }, { "epoch": 65.15, "learning_rate": 1.743814549370137e-05, "loss": 1.9168, "step": 22506500 }, { "epoch": 65.15, "learning_rate": 1.7437421846054093e-05, "loss": 1.902, "step": 22507000 }, { "epoch": 65.15, "learning_rate": 1.7436699645702112e-05, "loss": 1.926, "step": 22507500 }, { "epoch": 65.15, "learning_rate": 1.7435975998054834e-05, "loss": 1.9145, "step": 22508000 }, { "epoch": 65.15, "learning_rate": 1.743525235040756e-05, "loss": 1.9011, "step": 22508500 }, { "epoch": 65.15, "learning_rate": 1.7434528702760282e-05, "loss": 1.8986, "step": 22509000 }, { "epoch": 65.16, "learning_rate": 1.74338065024083e-05, "loss": 1.9138, "step": 22509500 }, { "epoch": 65.16, "learning_rate": 1.7433082854761023e-05, "loss": 1.9124, "step": 22510000 }, { "epoch": 65.16, "learning_rate": 1.7432359207113746e-05, "loss": 1.9024, "step": 22510500 }, { "epoch": 65.16, "learning_rate": 1.7431635559466468e-05, "loss": 1.9297, "step": 22511000 }, { "epoch": 65.16, "learning_rate": 1.7430911911819193e-05, "loss": 1.9064, "step": 22511500 }, { "epoch": 65.16, "learning_rate": 1.743018826417192e-05, "loss": 1.8811, "step": 22512000 }, { "epoch": 65.16, "learning_rate": 1.7429466063819935e-05, "loss": 1.903, "step": 22512500 }, { "epoch": 65.17, "learning_rate": 1.7428742416172657e-05, "loss": 1.9098, "step": 22513000 }, { "epoch": 65.17, "learning_rate": 1.7428018768525383e-05, "loss": 1.9252, "step": 22513500 }, { "epoch": 65.17, "learning_rate": 1.7427295120878105e-05, "loss": 1.9144, "step": 22514000 }, { "epoch": 65.17, "learning_rate": 1.7426571473230827e-05, "loss": 1.8969, "step": 22514500 }, { "epoch": 65.17, "learning_rate": 1.742584782558355e-05, "loss": 1.9053, "step": 22515000 }, { "epoch": 65.17, "learning_rate": 1.742512707252686e-05, "loss": 1.9127, "step": 22515500 }, { "epoch": 65.17, "learning_rate": 1.7424403424879584e-05, "loss": 1.9077, "step": 22516000 }, { "epoch": 65.18, "learning_rate": 1.742367977723231e-05, "loss": 1.9246, "step": 22516500 }, { "epoch": 65.18, "learning_rate": 1.742295612958503e-05, "loss": 1.9021, "step": 22517000 }, { "epoch": 65.18, "learning_rate": 1.7422232481937757e-05, "loss": 1.898, "step": 22517500 }, { "epoch": 65.18, "learning_rate": 1.742150883429048e-05, "loss": 1.9331, "step": 22518000 }, { "epoch": 65.18, "learning_rate": 1.74207851866432e-05, "loss": 1.8978, "step": 22518500 }, { "epoch": 65.18, "learning_rate": 1.7420061538995924e-05, "loss": 1.8964, "step": 22519000 }, { "epoch": 65.18, "learning_rate": 1.7419337891348646e-05, "loss": 1.9121, "step": 22519500 }, { "epoch": 65.19, "learning_rate": 1.7418614243701372e-05, "loss": 1.9183, "step": 22520000 }, { "epoch": 65.19, "learning_rate": 1.741789204334939e-05, "loss": 1.9073, "step": 22520500 }, { "epoch": 65.19, "learning_rate": 1.7417168395702113e-05, "loss": 1.9218, "step": 22521000 }, { "epoch": 65.19, "learning_rate": 1.7416444748054835e-05, "loss": 1.896, "step": 22521500 }, { "epoch": 65.19, "learning_rate": 1.7415722547702854e-05, "loss": 1.9111, "step": 22522000 }, { "epoch": 65.19, "learning_rate": 1.7414998900055576e-05, "loss": 1.9035, "step": 22522500 }, { "epoch": 65.19, "learning_rate": 1.74142752524083e-05, "loss": 1.897, "step": 22523000 }, { "epoch": 65.2, "learning_rate": 1.7413551604761024e-05, "loss": 1.9148, "step": 22523500 }, { "epoch": 65.2, "learning_rate": 1.7412829404409043e-05, "loss": 1.8917, "step": 22524000 }, { "epoch": 65.2, "learning_rate": 1.7412105756761765e-05, "loss": 1.9111, "step": 22524500 }, { "epoch": 65.2, "learning_rate": 1.7411382109114488e-05, "loss": 1.9036, "step": 22525000 }, { "epoch": 65.2, "learning_rate": 1.741065846146721e-05, "loss": 1.8992, "step": 22525500 }, { "epoch": 65.2, "learning_rate": 1.7409934813819932e-05, "loss": 1.9147, "step": 22526000 }, { "epoch": 65.2, "learning_rate": 1.7409211166172658e-05, "loss": 1.9284, "step": 22526500 }, { "epoch": 65.21, "learning_rate": 1.740848751852538e-05, "loss": 1.9075, "step": 22527000 }, { "epoch": 65.21, "learning_rate": 1.7407763870878106e-05, "loss": 1.9, "step": 22527500 }, { "epoch": 65.21, "learning_rate": 1.7407040223230828e-05, "loss": 1.8965, "step": 22528000 }, { "epoch": 65.21, "learning_rate": 1.740631947017414e-05, "loss": 1.9262, "step": 22528500 }, { "epoch": 65.21, "learning_rate": 1.7405595822526862e-05, "loss": 1.9031, "step": 22529000 }, { "epoch": 65.21, "learning_rate": 1.7404872174879584e-05, "loss": 1.9006, "step": 22529500 }, { "epoch": 65.22, "learning_rate": 1.740414852723231e-05, "loss": 1.9152, "step": 22530000 }, { "epoch": 65.22, "learning_rate": 1.7403424879585032e-05, "loss": 1.9249, "step": 22530500 }, { "epoch": 65.22, "learning_rate": 1.7402702679233048e-05, "loss": 1.9257, "step": 22531000 }, { "epoch": 65.22, "learning_rate": 1.740198047888107e-05, "loss": 1.9078, "step": 22531500 }, { "epoch": 65.22, "learning_rate": 1.7401256831233792e-05, "loss": 1.9076, "step": 22532000 }, { "epoch": 65.22, "learning_rate": 1.7400533183586515e-05, "loss": 1.8996, "step": 22532500 }, { "epoch": 65.22, "learning_rate": 1.7399809535939237e-05, "loss": 1.9366, "step": 22533000 }, { "epoch": 65.23, "learning_rate": 1.739908588829196e-05, "loss": 1.9175, "step": 22533500 }, { "epoch": 65.23, "learning_rate": 1.7398362240644685e-05, "loss": 1.9212, "step": 22534000 }, { "epoch": 65.23, "learning_rate": 1.7397638592997407e-05, "loss": 1.8978, "step": 22534500 }, { "epoch": 65.23, "learning_rate": 1.7396914945350133e-05, "loss": 1.9104, "step": 22535000 }, { "epoch": 65.23, "learning_rate": 1.7396191297702855e-05, "loss": 1.9102, "step": 22535500 }, { "epoch": 65.23, "learning_rate": 1.7395467650055577e-05, "loss": 1.895, "step": 22536000 }, { "epoch": 65.23, "learning_rate": 1.73947440024083e-05, "loss": 1.9173, "step": 22536500 }, { "epoch": 65.24, "learning_rate": 1.7394021802056318e-05, "loss": 1.9072, "step": 22537000 }, { "epoch": 65.24, "learning_rate": 1.739329815440904e-05, "loss": 1.8837, "step": 22537500 }, { "epoch": 65.24, "learning_rate": 1.7392574506761763e-05, "loss": 1.8939, "step": 22538000 }, { "epoch": 65.24, "learning_rate": 1.7391850859114485e-05, "loss": 1.9011, "step": 22538500 }, { "epoch": 65.24, "learning_rate": 1.739112721146721e-05, "loss": 1.9191, "step": 22539000 }, { "epoch": 65.24, "learning_rate": 1.7390403563819936e-05, "loss": 1.9246, "step": 22539500 }, { "epoch": 65.24, "learning_rate": 1.7389681363467952e-05, "loss": 1.9219, "step": 22540000 }, { "epoch": 65.25, "learning_rate": 1.7388957715820674e-05, "loss": 1.9181, "step": 22540500 }, { "epoch": 65.25, "learning_rate": 1.7388234068173396e-05, "loss": 1.9002, "step": 22541000 }, { "epoch": 65.25, "learning_rate": 1.7387510420526122e-05, "loss": 1.8903, "step": 22541500 }, { "epoch": 65.25, "learning_rate": 1.7386786772878844e-05, "loss": 1.9076, "step": 22542000 }, { "epoch": 65.25, "learning_rate": 1.738606312523157e-05, "loss": 1.9044, "step": 22542500 }, { "epoch": 65.25, "learning_rate": 1.7385339477584292e-05, "loss": 1.9213, "step": 22543000 }, { "epoch": 65.25, "learning_rate": 1.738461727723231e-05, "loss": 1.9286, "step": 22543500 }, { "epoch": 65.26, "learning_rate": 1.7383893629585033e-05, "loss": 1.9184, "step": 22544000 }, { "epoch": 65.26, "learning_rate": 1.7383169981937755e-05, "loss": 1.8985, "step": 22544500 }, { "epoch": 65.26, "learning_rate": 1.7382446334290478e-05, "loss": 1.9295, "step": 22545000 }, { "epoch": 65.26, "learning_rate": 1.73817226866432e-05, "loss": 1.9387, "step": 22545500 }, { "epoch": 65.26, "learning_rate": 1.7380999038995925e-05, "loss": 1.9241, "step": 22546000 }, { "epoch": 65.26, "learning_rate": 1.7380275391348648e-05, "loss": 1.8898, "step": 22546500 }, { "epoch": 65.26, "learning_rate": 1.7379551743701373e-05, "loss": 1.9142, "step": 22547000 }, { "epoch": 65.27, "learning_rate": 1.7378828096054096e-05, "loss": 1.9082, "step": 22547500 }, { "epoch": 65.27, "learning_rate": 1.737810589570211e-05, "loss": 1.9207, "step": 22548000 }, { "epoch": 65.27, "learning_rate": 1.737738369535013e-05, "loss": 1.9139, "step": 22548500 }, { "epoch": 65.27, "learning_rate": 1.7376660047702852e-05, "loss": 1.9116, "step": 22549000 }, { "epoch": 65.27, "learning_rate": 1.7375939294646165e-05, "loss": 1.9072, "step": 22549500 }, { "epoch": 65.27, "learning_rate": 1.7375215646998887e-05, "loss": 1.9158, "step": 22550000 }, { "epoch": 65.27, "learning_rate": 1.7374491999351612e-05, "loss": 1.9194, "step": 22550500 }, { "epoch": 65.28, "learning_rate": 1.7373768351704338e-05, "loss": 1.895, "step": 22551000 }, { "epoch": 65.28, "learning_rate": 1.737304470405706e-05, "loss": 1.9384, "step": 22551500 }, { "epoch": 65.28, "learning_rate": 1.7372321056409783e-05, "loss": 1.8939, "step": 22552000 }, { "epoch": 65.28, "learning_rate": 1.7371597408762505e-05, "loss": 1.9255, "step": 22552500 }, { "epoch": 65.28, "learning_rate": 1.7370873761115227e-05, "loss": 1.9355, "step": 22553000 }, { "epoch": 65.28, "learning_rate": 1.737015011346795e-05, "loss": 1.9167, "step": 22553500 }, { "epoch": 65.28, "learning_rate": 1.7369426465820675e-05, "loss": 1.9246, "step": 22554000 }, { "epoch": 65.29, "learning_rate": 1.73687028181734e-05, "loss": 1.9109, "step": 22554500 }, { "epoch": 65.29, "learning_rate": 1.7367979170526123e-05, "loss": 1.9062, "step": 22555000 }, { "epoch": 65.29, "learning_rate": 1.7367255522878845e-05, "loss": 1.913, "step": 22555500 }, { "epoch": 65.29, "learning_rate": 1.7366531875231567e-05, "loss": 1.9148, "step": 22556000 }, { "epoch": 65.29, "learning_rate": 1.736580822758429e-05, "loss": 1.9274, "step": 22556500 }, { "epoch": 65.29, "learning_rate": 1.736508457993701e-05, "loss": 1.9261, "step": 22557000 }, { "epoch": 65.29, "learning_rate": 1.7364360932289737e-05, "loss": 1.9232, "step": 22557500 }, { "epoch": 65.3, "learning_rate": 1.7363637284642463e-05, "loss": 1.9178, "step": 22558000 }, { "epoch": 65.3, "learning_rate": 1.7362913636995185e-05, "loss": 1.9039, "step": 22558500 }, { "epoch": 65.3, "learning_rate": 1.7362189989347907e-05, "loss": 1.8964, "step": 22559000 }, { "epoch": 65.3, "learning_rate": 1.7361467788995926e-05, "loss": 1.9207, "step": 22559500 }, { "epoch": 65.3, "learning_rate": 1.7360745588643942e-05, "loss": 1.9232, "step": 22560000 }, { "epoch": 65.3, "learning_rate": 1.736002338829196e-05, "loss": 1.9126, "step": 22560500 }, { "epoch": 65.3, "learning_rate": 1.7359299740644683e-05, "loss": 1.9163, "step": 22561000 }, { "epoch": 65.31, "learning_rate": 1.735857609299741e-05, "loss": 1.9195, "step": 22561500 }, { "epoch": 65.31, "learning_rate": 1.735785244535013e-05, "loss": 1.9265, "step": 22562000 }, { "epoch": 65.31, "learning_rate": 1.7357128797702853e-05, "loss": 1.9124, "step": 22562500 }, { "epoch": 65.31, "learning_rate": 1.7356405150055575e-05, "loss": 1.9375, "step": 22563000 }, { "epoch": 65.31, "learning_rate": 1.73556815024083e-05, "loss": 1.912, "step": 22563500 }, { "epoch": 65.31, "learning_rate": 1.7354959302056316e-05, "loss": 1.9137, "step": 22564000 }, { "epoch": 65.31, "learning_rate": 1.735423565440904e-05, "loss": 1.9301, "step": 22564500 }, { "epoch": 65.32, "learning_rate": 1.7353512006761764e-05, "loss": 1.9078, "step": 22565000 }, { "epoch": 65.32, "learning_rate": 1.735278835911449e-05, "loss": 1.8942, "step": 22565500 }, { "epoch": 65.32, "learning_rate": 1.7352064711467212e-05, "loss": 1.8933, "step": 22566000 }, { "epoch": 65.32, "learning_rate": 1.7351342511115228e-05, "loss": 1.9253, "step": 22566500 }, { "epoch": 65.32, "learning_rate": 1.7350618863467953e-05, "loss": 1.9133, "step": 22567000 }, { "epoch": 65.32, "learning_rate": 1.7349895215820676e-05, "loss": 1.8957, "step": 22567500 }, { "epoch": 65.33, "learning_rate": 1.7349171568173398e-05, "loss": 1.9353, "step": 22568000 }, { "epoch": 65.33, "learning_rate": 1.7348447920526123e-05, "loss": 1.9001, "step": 22568500 }, { "epoch": 65.33, "learning_rate": 1.7347724272878846e-05, "loss": 1.9028, "step": 22569000 }, { "epoch": 65.33, "learning_rate": 1.7347000625231568e-05, "loss": 1.8925, "step": 22569500 }, { "epoch": 65.33, "learning_rate": 1.7346278424879587e-05, "loss": 1.9142, "step": 22570000 }, { "epoch": 65.33, "learning_rate": 1.734555477723231e-05, "loss": 1.9037, "step": 22570500 }, { "epoch": 65.33, "learning_rate": 1.734483112958503e-05, "loss": 1.9009, "step": 22571000 }, { "epoch": 65.34, "learning_rate": 1.7344107481937754e-05, "loss": 1.9214, "step": 22571500 }, { "epoch": 65.34, "learning_rate": 1.734338383429048e-05, "loss": 1.9063, "step": 22572000 }, { "epoch": 65.34, "learning_rate": 1.7342661633938498e-05, "loss": 1.8872, "step": 22572500 }, { "epoch": 65.34, "learning_rate": 1.734193798629122e-05, "loss": 1.9039, "step": 22573000 }, { "epoch": 65.34, "learning_rate": 1.7341214338643943e-05, "loss": 1.9173, "step": 22573500 }, { "epoch": 65.34, "learning_rate": 1.7340490690996665e-05, "loss": 1.9209, "step": 22574000 }, { "epoch": 65.34, "learning_rate": 1.7339768490644684e-05, "loss": 1.9182, "step": 22574500 }, { "epoch": 65.35, "learning_rate": 1.7339044842997406e-05, "loss": 1.9286, "step": 22575000 }, { "epoch": 65.35, "learning_rate": 1.7338321195350128e-05, "loss": 1.9109, "step": 22575500 }, { "epoch": 65.35, "learning_rate": 1.7337597547702854e-05, "loss": 1.9123, "step": 22576000 }, { "epoch": 65.35, "learning_rate": 1.733687390005558e-05, "loss": 1.9165, "step": 22576500 }, { "epoch": 65.35, "learning_rate": 1.7336150252408302e-05, "loss": 1.9201, "step": 22577000 }, { "epoch": 65.35, "learning_rate": 1.7335426604761024e-05, "loss": 1.9354, "step": 22577500 }, { "epoch": 65.35, "learning_rate": 1.7334702957113746e-05, "loss": 1.9133, "step": 22578000 }, { "epoch": 65.36, "learning_rate": 1.733397930946647e-05, "loss": 1.9256, "step": 22578500 }, { "epoch": 65.36, "learning_rate": 1.7333257109114487e-05, "loss": 1.9204, "step": 22579000 }, { "epoch": 65.36, "learning_rate": 1.733253346146721e-05, "loss": 1.9074, "step": 22579500 }, { "epoch": 65.36, "learning_rate": 1.7331809813819935e-05, "loss": 1.9023, "step": 22580000 }, { "epoch": 65.36, "learning_rate": 1.7331086166172657e-05, "loss": 1.9295, "step": 22580500 }, { "epoch": 65.36, "learning_rate": 1.7330363965820676e-05, "loss": 1.9208, "step": 22581000 }, { "epoch": 65.36, "learning_rate": 1.73296403181734e-05, "loss": 1.9148, "step": 22581500 }, { "epoch": 65.37, "learning_rate": 1.732891667052612e-05, "loss": 1.8986, "step": 22582000 }, { "epoch": 65.37, "learning_rate": 1.7328193022878843e-05, "loss": 1.9178, "step": 22582500 }, { "epoch": 65.37, "learning_rate": 1.7327469375231565e-05, "loss": 1.91, "step": 22583000 }, { "epoch": 65.37, "learning_rate": 1.7326747174879584e-05, "loss": 1.9159, "step": 22583500 }, { "epoch": 65.37, "learning_rate": 1.732602352723231e-05, "loss": 1.9303, "step": 22584000 }, { "epoch": 65.37, "learning_rate": 1.7325299879585032e-05, "loss": 1.9249, "step": 22584500 }, { "epoch": 65.37, "learning_rate": 1.7324576231937754e-05, "loss": 1.9009, "step": 22585000 }, { "epoch": 65.38, "learning_rate": 1.732385258429048e-05, "loss": 1.9315, "step": 22585500 }, { "epoch": 65.38, "learning_rate": 1.7323128936643202e-05, "loss": 1.9139, "step": 22586000 }, { "epoch": 65.38, "learning_rate": 1.7322405288995924e-05, "loss": 1.9108, "step": 22586500 }, { "epoch": 65.38, "learning_rate": 1.7321683088643943e-05, "loss": 1.8955, "step": 22587000 }, { "epoch": 65.38, "learning_rate": 1.732095944099667e-05, "loss": 1.9255, "step": 22587500 }, { "epoch": 65.38, "learning_rate": 1.7320237240644685e-05, "loss": 1.935, "step": 22588000 }, { "epoch": 65.38, "learning_rate": 1.7319513592997407e-05, "loss": 1.9169, "step": 22588500 }, { "epoch": 65.39, "learning_rate": 1.731878994535013e-05, "loss": 1.9331, "step": 22589000 }, { "epoch": 65.39, "learning_rate": 1.7318066297702855e-05, "loss": 1.9077, "step": 22589500 }, { "epoch": 65.39, "learning_rate": 1.7317342650055577e-05, "loss": 1.9224, "step": 22590000 }, { "epoch": 65.39, "learning_rate": 1.73166190024083e-05, "loss": 1.9206, "step": 22590500 }, { "epoch": 65.39, "learning_rate": 1.7315895354761025e-05, "loss": 1.9398, "step": 22591000 }, { "epoch": 65.39, "learning_rate": 1.7315171707113747e-05, "loss": 1.9179, "step": 22591500 }, { "epoch": 65.39, "learning_rate": 1.731444805946647e-05, "loss": 1.9135, "step": 22592000 }, { "epoch": 65.4, "learning_rate": 1.7313725859114488e-05, "loss": 1.9009, "step": 22592500 }, { "epoch": 65.4, "learning_rate": 1.7313003658762507e-05, "loss": 1.9052, "step": 22593000 }, { "epoch": 65.4, "learning_rate": 1.731228001111523e-05, "loss": 1.9212, "step": 22593500 }, { "epoch": 65.4, "learning_rate": 1.731155636346795e-05, "loss": 1.9051, "step": 22594000 }, { "epoch": 65.4, "learning_rate": 1.7310832715820674e-05, "loss": 1.9177, "step": 22594500 }, { "epoch": 65.4, "learning_rate": 1.73101090681734e-05, "loss": 1.9148, "step": 22595000 }, { "epoch": 65.4, "learning_rate": 1.730938542052612e-05, "loss": 1.9236, "step": 22595500 }, { "epoch": 65.41, "learning_rate": 1.7308661772878844e-05, "loss": 1.9094, "step": 22596000 }, { "epoch": 65.41, "learning_rate": 1.730793812523157e-05, "loss": 1.9008, "step": 22596500 }, { "epoch": 65.41, "learning_rate": 1.7307214477584292e-05, "loss": 1.893, "step": 22597000 }, { "epoch": 65.41, "learning_rate": 1.7306490829937014e-05, "loss": 1.8995, "step": 22597500 }, { "epoch": 65.41, "learning_rate": 1.7305768629585033e-05, "loss": 1.9112, "step": 22598000 }, { "epoch": 65.41, "learning_rate": 1.730504642923305e-05, "loss": 1.9034, "step": 22598500 }, { "epoch": 65.41, "learning_rate": 1.7304322781585774e-05, "loss": 1.9156, "step": 22599000 }, { "epoch": 65.42, "learning_rate": 1.7303599133938496e-05, "loss": 1.8956, "step": 22599500 }, { "epoch": 65.42, "learning_rate": 1.730287548629122e-05, "loss": 1.9166, "step": 22600000 }, { "epoch": 65.42, "learning_rate": 1.7302151838643944e-05, "loss": 1.9145, "step": 22600500 }, { "epoch": 65.42, "learning_rate": 1.730142963829196e-05, "loss": 1.9121, "step": 22601000 }, { "epoch": 65.42, "learning_rate": 1.7300705990644682e-05, "loss": 1.9234, "step": 22601500 }, { "epoch": 65.42, "learning_rate": 1.7299982342997408e-05, "loss": 1.9042, "step": 22602000 }, { "epoch": 65.42, "learning_rate": 1.7299258695350133e-05, "loss": 1.9093, "step": 22602500 }, { "epoch": 65.43, "learning_rate": 1.729853649499815e-05, "loss": 1.9138, "step": 22603000 }, { "epoch": 65.43, "learning_rate": 1.729781284735087e-05, "loss": 1.9134, "step": 22603500 }, { "epoch": 65.43, "learning_rate": 1.7297089199703597e-05, "loss": 1.9066, "step": 22604000 }, { "epoch": 65.43, "learning_rate": 1.729636555205632e-05, "loss": 1.9148, "step": 22604500 }, { "epoch": 65.43, "learning_rate": 1.729564190440904e-05, "loss": 1.922, "step": 22605000 }, { "epoch": 65.43, "learning_rate": 1.7294918256761763e-05, "loss": 1.9174, "step": 22605500 }, { "epoch": 65.44, "learning_rate": 1.7294194609114486e-05, "loss": 1.939, "step": 22606000 }, { "epoch": 65.44, "learning_rate": 1.729347096146721e-05, "loss": 1.9083, "step": 22606500 }, { "epoch": 65.44, "learning_rate": 1.729274876111523e-05, "loss": 1.9204, "step": 22607000 }, { "epoch": 65.44, "learning_rate": 1.7292025113467952e-05, "loss": 1.9156, "step": 22607500 }, { "epoch": 65.44, "learning_rate": 1.7291301465820675e-05, "loss": 1.8825, "step": 22608000 }, { "epoch": 65.44, "learning_rate": 1.7290579265468694e-05, "loss": 1.9004, "step": 22608500 }, { "epoch": 65.44, "learning_rate": 1.7289855617821416e-05, "loss": 1.9049, "step": 22609000 }, { "epoch": 65.45, "learning_rate": 1.7289133417469435e-05, "loss": 1.9242, "step": 22609500 }, { "epoch": 65.45, "learning_rate": 1.7288409769822157e-05, "loss": 1.9207, "step": 22610000 }, { "epoch": 65.45, "learning_rate": 1.7287686122174883e-05, "loss": 1.9208, "step": 22610500 }, { "epoch": 65.45, "learning_rate": 1.7286962474527605e-05, "loss": 1.9018, "step": 22611000 }, { "epoch": 65.45, "learning_rate": 1.7286238826880327e-05, "loss": 1.8942, "step": 22611500 }, { "epoch": 65.45, "learning_rate": 1.728551517923305e-05, "loss": 1.9336, "step": 22612000 }, { "epoch": 65.45, "learning_rate": 1.728479153158577e-05, "loss": 1.9309, "step": 22612500 }, { "epoch": 65.46, "learning_rate": 1.7284067883938497e-05, "loss": 1.9139, "step": 22613000 }, { "epoch": 65.46, "learning_rate": 1.728334423629122e-05, "loss": 1.8919, "step": 22613500 }, { "epoch": 65.46, "learning_rate": 1.728262203593924e-05, "loss": 1.9066, "step": 22614000 }, { "epoch": 65.46, "learning_rate": 1.728189838829196e-05, "loss": 1.934, "step": 22614500 }, { "epoch": 65.46, "learning_rate": 1.7281174740644683e-05, "loss": 1.8869, "step": 22615000 }, { "epoch": 65.46, "learning_rate": 1.728045109299741e-05, "loss": 1.9123, "step": 22615500 }, { "epoch": 65.46, "learning_rate": 1.727972744535013e-05, "loss": 1.9179, "step": 22616000 }, { "epoch": 65.47, "learning_rate": 1.7279003797702853e-05, "loss": 1.9043, "step": 22616500 }, { "epoch": 65.47, "learning_rate": 1.7278280150055575e-05, "loss": 1.9258, "step": 22617000 }, { "epoch": 65.47, "learning_rate": 1.72775565024083e-05, "loss": 1.9293, "step": 22617500 }, { "epoch": 65.47, "learning_rate": 1.727683430205632e-05, "loss": 1.9161, "step": 22618000 }, { "epoch": 65.47, "learning_rate": 1.7276110654409042e-05, "loss": 1.9138, "step": 22618500 }, { "epoch": 65.47, "learning_rate": 1.7275387006761764e-05, "loss": 1.8957, "step": 22619000 }, { "epoch": 65.47, "learning_rate": 1.7274663359114486e-05, "loss": 1.922, "step": 22619500 }, { "epoch": 65.48, "learning_rate": 1.727393971146721e-05, "loss": 1.9198, "step": 22620000 }, { "epoch": 65.48, "learning_rate": 1.7273216063819934e-05, "loss": 1.9279, "step": 22620500 }, { "epoch": 65.48, "learning_rate": 1.727249386346795e-05, "loss": 1.9339, "step": 22621000 }, { "epoch": 65.48, "learning_rate": 1.7271770215820675e-05, "loss": 1.9243, "step": 22621500 }, { "epoch": 65.48, "learning_rate": 1.7271046568173398e-05, "loss": 1.9048, "step": 22622000 }, { "epoch": 65.48, "learning_rate": 1.7270322920526123e-05, "loss": 1.9128, "step": 22622500 }, { "epoch": 65.48, "learning_rate": 1.7269599272878846e-05, "loss": 1.9259, "step": 22623000 }, { "epoch": 65.49, "learning_rate": 1.7268875625231568e-05, "loss": 1.9157, "step": 22623500 }, { "epoch": 65.49, "learning_rate": 1.726815197758429e-05, "loss": 1.9144, "step": 22624000 }, { "epoch": 65.49, "learning_rate": 1.7267428329937012e-05, "loss": 1.9049, "step": 22624500 }, { "epoch": 65.49, "learning_rate": 1.7266706129585035e-05, "loss": 1.918, "step": 22625000 }, { "epoch": 65.49, "learning_rate": 1.726598392923305e-05, "loss": 1.9059, "step": 22625500 }, { "epoch": 65.49, "learning_rate": 1.7265260281585772e-05, "loss": 1.9038, "step": 22626000 }, { "epoch": 65.49, "learning_rate": 1.7264536633938498e-05, "loss": 1.9059, "step": 22626500 }, { "epoch": 65.5, "learning_rate": 1.726381298629122e-05, "loss": 1.9251, "step": 22627000 }, { "epoch": 65.5, "learning_rate": 1.7263089338643942e-05, "loss": 1.9048, "step": 22627500 }, { "epoch": 65.5, "learning_rate": 1.7262365690996665e-05, "loss": 1.9213, "step": 22628000 }, { "epoch": 65.5, "learning_rate": 1.726164204334939e-05, "loss": 1.9032, "step": 22628500 }, { "epoch": 65.5, "learning_rate": 1.7260918395702113e-05, "loss": 1.9035, "step": 22629000 }, { "epoch": 65.5, "learning_rate": 1.726019619535013e-05, "loss": 1.9163, "step": 22629500 }, { "epoch": 65.5, "learning_rate": 1.7259472547702854e-05, "loss": 1.9331, "step": 22630000 }, { "epoch": 65.51, "learning_rate": 1.7258748900055576e-05, "loss": 1.93, "step": 22630500 }, { "epoch": 65.51, "learning_rate": 1.7258026699703595e-05, "loss": 1.9133, "step": 22631000 }, { "epoch": 65.51, "learning_rate": 1.7257303052056317e-05, "loss": 1.8879, "step": 22631500 }, { "epoch": 65.51, "learning_rate": 1.725657940440904e-05, "loss": 1.9162, "step": 22632000 }, { "epoch": 65.51, "learning_rate": 1.7255855756761765e-05, "loss": 1.8993, "step": 22632500 }, { "epoch": 65.51, "learning_rate": 1.7255132109114487e-05, "loss": 1.9143, "step": 22633000 }, { "epoch": 65.51, "learning_rate": 1.7254409908762506e-05, "loss": 1.8955, "step": 22633500 }, { "epoch": 65.52, "learning_rate": 1.725368626111523e-05, "loss": 1.9369, "step": 22634000 }, { "epoch": 65.52, "learning_rate": 1.7252964060763247e-05, "loss": 1.9103, "step": 22634500 }, { "epoch": 65.52, "learning_rate": 1.725224041311597e-05, "loss": 1.9363, "step": 22635000 }, { "epoch": 65.52, "learning_rate": 1.7251516765468692e-05, "loss": 1.9352, "step": 22635500 }, { "epoch": 65.52, "learning_rate": 1.7250793117821414e-05, "loss": 1.916, "step": 22636000 }, { "epoch": 65.52, "learning_rate": 1.7250070917469436e-05, "loss": 1.9018, "step": 22636500 }, { "epoch": 65.52, "learning_rate": 1.724934726982216e-05, "loss": 1.9198, "step": 22637000 }, { "epoch": 65.53, "learning_rate": 1.724862362217488e-05, "loss": 1.9265, "step": 22637500 }, { "epoch": 65.53, "learning_rate": 1.7247899974527603e-05, "loss": 1.8999, "step": 22638000 }, { "epoch": 65.53, "learning_rate": 1.7247176326880325e-05, "loss": 1.9081, "step": 22638500 }, { "epoch": 65.53, "learning_rate": 1.724645267923305e-05, "loss": 1.9244, "step": 22639000 }, { "epoch": 65.53, "learning_rate": 1.7245729031585773e-05, "loss": 1.9489, "step": 22639500 }, { "epoch": 65.53, "learning_rate": 1.72450053839385e-05, "loss": 1.8906, "step": 22640000 }, { "epoch": 65.53, "learning_rate": 1.724428173629122e-05, "loss": 1.9051, "step": 22640500 }, { "epoch": 65.54, "learning_rate": 1.7243558088643943e-05, "loss": 1.9041, "step": 22641000 }, { "epoch": 65.54, "learning_rate": 1.7242834440996665e-05, "loss": 1.9111, "step": 22641500 }, { "epoch": 65.54, "learning_rate": 1.7242112240644684e-05, "loss": 1.9103, "step": 22642000 }, { "epoch": 65.54, "learning_rate": 1.7241388592997407e-05, "loss": 1.9061, "step": 22642500 }, { "epoch": 65.54, "learning_rate": 1.724066494535013e-05, "loss": 1.9154, "step": 22643000 }, { "epoch": 65.54, "learning_rate": 1.723994129770285e-05, "loss": 1.9233, "step": 22643500 }, { "epoch": 65.55, "learning_rate": 1.7239217650055577e-05, "loss": 1.9254, "step": 22644000 }, { "epoch": 65.55, "learning_rate": 1.7238494002408302e-05, "loss": 1.912, "step": 22644500 }, { "epoch": 65.55, "learning_rate": 1.7237770354761025e-05, "loss": 1.894, "step": 22645000 }, { "epoch": 65.55, "learning_rate": 1.7237046707113747e-05, "loss": 1.9266, "step": 22645500 }, { "epoch": 65.55, "learning_rate": 1.723632305946647e-05, "loss": 1.9187, "step": 22646000 }, { "epoch": 65.55, "learning_rate": 1.723559941181919e-05, "loss": 1.8979, "step": 22646500 }, { "epoch": 65.55, "learning_rate": 1.7234875764171917e-05, "loss": 1.917, "step": 22647000 }, { "epoch": 65.56, "learning_rate": 1.7234153563819936e-05, "loss": 1.9397, "step": 22647500 }, { "epoch": 65.56, "learning_rate": 1.7233429916172658e-05, "loss": 1.9076, "step": 22648000 }, { "epoch": 65.56, "learning_rate": 1.723270626852538e-05, "loss": 1.8911, "step": 22648500 }, { "epoch": 65.56, "learning_rate": 1.7231982620878103e-05, "loss": 1.9099, "step": 22649000 }, { "epoch": 65.56, "learning_rate": 1.7231258973230828e-05, "loss": 1.9047, "step": 22649500 }, { "epoch": 65.56, "learning_rate": 1.723053532558355e-05, "loss": 1.9062, "step": 22650000 }, { "epoch": 65.56, "learning_rate": 1.7229813125231566e-05, "loss": 1.9006, "step": 22650500 }, { "epoch": 65.57, "learning_rate": 1.722908947758429e-05, "loss": 1.907, "step": 22651000 }, { "epoch": 65.57, "learning_rate": 1.7228365829937014e-05, "loss": 1.935, "step": 22651500 }, { "epoch": 65.57, "learning_rate": 1.722764218228974e-05, "loss": 1.9143, "step": 22652000 }, { "epoch": 65.57, "learning_rate": 1.722691853464246e-05, "loss": 1.9285, "step": 22652500 }, { "epoch": 65.57, "learning_rate": 1.7226194886995184e-05, "loss": 1.8859, "step": 22653000 }, { "epoch": 65.57, "learning_rate": 1.7225471239347906e-05, "loss": 1.9045, "step": 22653500 }, { "epoch": 65.57, "learning_rate": 1.722474759170063e-05, "loss": 1.9132, "step": 22654000 }, { "epoch": 65.58, "learning_rate": 1.7224023944053354e-05, "loss": 1.9216, "step": 22654500 }, { "epoch": 65.58, "learning_rate": 1.7223301743701373e-05, "loss": 1.9616, "step": 22655000 }, { "epoch": 65.58, "learning_rate": 1.7222580990644685e-05, "loss": 1.8933, "step": 22655500 }, { "epoch": 65.58, "learning_rate": 1.7221857342997407e-05, "loss": 1.8915, "step": 22656000 }, { "epoch": 65.58, "learning_rate": 1.722113369535013e-05, "loss": 1.9151, "step": 22656500 }, { "epoch": 65.58, "learning_rate": 1.7220410047702852e-05, "loss": 1.8919, "step": 22657000 }, { "epoch": 65.58, "learning_rate": 1.7219686400055578e-05, "loss": 1.9178, "step": 22657500 }, { "epoch": 65.59, "learning_rate": 1.72189627524083e-05, "loss": 1.9108, "step": 22658000 }, { "epoch": 65.59, "learning_rate": 1.7218240552056315e-05, "loss": 1.9129, "step": 22658500 }, { "epoch": 65.59, "learning_rate": 1.721751690440904e-05, "loss": 1.9149, "step": 22659000 }, { "epoch": 65.59, "learning_rate": 1.7216793256761767e-05, "loss": 1.9144, "step": 22659500 }, { "epoch": 65.59, "learning_rate": 1.721606960911449e-05, "loss": 1.8784, "step": 22660000 }, { "epoch": 65.59, "learning_rate": 1.721534596146721e-05, "loss": 1.9142, "step": 22660500 }, { "epoch": 65.59, "learning_rate": 1.7214622313819933e-05, "loss": 1.8964, "step": 22661000 }, { "epoch": 65.6, "learning_rate": 1.7213898666172655e-05, "loss": 1.8966, "step": 22661500 }, { "epoch": 65.6, "learning_rate": 1.7213175018525378e-05, "loss": 1.9019, "step": 22662000 }, { "epoch": 65.6, "learning_rate": 1.7212451370878103e-05, "loss": 1.9469, "step": 22662500 }, { "epoch": 65.6, "learning_rate": 1.721172772323083e-05, "loss": 1.9129, "step": 22663000 }, { "epoch": 65.6, "learning_rate": 1.721100407558355e-05, "loss": 1.9267, "step": 22663500 }, { "epoch": 65.6, "learning_rate": 1.7210280427936273e-05, "loss": 1.9075, "step": 22664000 }, { "epoch": 65.6, "learning_rate": 1.7209556780288996e-05, "loss": 1.9045, "step": 22664500 }, { "epoch": 65.61, "learning_rate": 1.7208834579937015e-05, "loss": 1.9375, "step": 22665000 }, { "epoch": 65.61, "learning_rate": 1.7208110932289737e-05, "loss": 1.9065, "step": 22665500 }, { "epoch": 65.61, "learning_rate": 1.7207387284642462e-05, "loss": 1.9066, "step": 22666000 }, { "epoch": 65.61, "learning_rate": 1.7206663636995185e-05, "loss": 1.9184, "step": 22666500 }, { "epoch": 65.61, "learning_rate": 1.7205939989347907e-05, "loss": 1.9257, "step": 22667000 }, { "epoch": 65.61, "learning_rate": 1.7205217788995926e-05, "loss": 1.9226, "step": 22667500 }, { "epoch": 65.61, "learning_rate": 1.720449558864394e-05, "loss": 1.8835, "step": 22668000 }, { "epoch": 65.62, "learning_rate": 1.720377338829196e-05, "loss": 1.9277, "step": 22668500 }, { "epoch": 65.62, "learning_rate": 1.7203049740644683e-05, "loss": 1.917, "step": 22669000 }, { "epoch": 65.62, "learning_rate": 1.7202326092997405e-05, "loss": 1.9135, "step": 22669500 }, { "epoch": 65.62, "learning_rate": 1.720160244535013e-05, "loss": 1.9155, "step": 22670000 }, { "epoch": 65.62, "learning_rate": 1.7200878797702856e-05, "loss": 1.9146, "step": 22670500 }, { "epoch": 65.62, "learning_rate": 1.7200155150055578e-05, "loss": 1.9234, "step": 22671000 }, { "epoch": 65.62, "learning_rate": 1.71994315024083e-05, "loss": 1.916, "step": 22671500 }, { "epoch": 65.63, "learning_rate": 1.7198707854761023e-05, "loss": 1.9009, "step": 22672000 }, { "epoch": 65.63, "learning_rate": 1.7197984207113745e-05, "loss": 1.9206, "step": 22672500 }, { "epoch": 65.63, "learning_rate": 1.7197260559466467e-05, "loss": 1.9273, "step": 22673000 }, { "epoch": 65.63, "learning_rate": 1.7196538359114486e-05, "loss": 1.9179, "step": 22673500 }, { "epoch": 65.63, "learning_rate": 1.7195814711467212e-05, "loss": 1.9182, "step": 22674000 }, { "epoch": 65.63, "learning_rate": 1.7195091063819934e-05, "loss": 1.9495, "step": 22674500 }, { "epoch": 65.63, "learning_rate": 1.7194367416172656e-05, "loss": 1.9096, "step": 22675000 }, { "epoch": 65.64, "learning_rate": 1.7193643768525382e-05, "loss": 1.9066, "step": 22675500 }, { "epoch": 65.64, "learning_rate": 1.7192920120878104e-05, "loss": 1.9219, "step": 22676000 }, { "epoch": 65.64, "learning_rate": 1.7192196473230826e-05, "loss": 1.9065, "step": 22676500 }, { "epoch": 65.64, "learning_rate": 1.7191472825583552e-05, "loss": 1.9165, "step": 22677000 }, { "epoch": 65.64, "learning_rate": 1.7190749177936274e-05, "loss": 1.9213, "step": 22677500 }, { "epoch": 65.64, "learning_rate": 1.7190026977584293e-05, "loss": 1.9226, "step": 22678000 }, { "epoch": 65.64, "learning_rate": 1.718930477723231e-05, "loss": 1.9264, "step": 22678500 }, { "epoch": 65.65, "learning_rate": 1.7188582576880328e-05, "loss": 1.9105, "step": 22679000 }, { "epoch": 65.65, "learning_rate": 1.718785892923305e-05, "loss": 1.9302, "step": 22679500 }, { "epoch": 65.65, "learning_rate": 1.7187135281585772e-05, "loss": 1.9235, "step": 22680000 }, { "epoch": 65.65, "learning_rate": 1.7186411633938494e-05, "loss": 1.9361, "step": 22680500 }, { "epoch": 65.65, "learning_rate": 1.718568798629122e-05, "loss": 1.9069, "step": 22681000 }, { "epoch": 65.65, "learning_rate": 1.7184964338643946e-05, "loss": 1.8955, "step": 22681500 }, { "epoch": 65.66, "learning_rate": 1.7184240690996668e-05, "loss": 1.9232, "step": 22682000 }, { "epoch": 65.66, "learning_rate": 1.718351704334939e-05, "loss": 1.9086, "step": 22682500 }, { "epoch": 65.66, "learning_rate": 1.7182793395702112e-05, "loss": 1.9102, "step": 22683000 }, { "epoch": 65.66, "learning_rate": 1.7182069748054835e-05, "loss": 1.9269, "step": 22683500 }, { "epoch": 65.66, "learning_rate": 1.7181347547702853e-05, "loss": 1.9125, "step": 22684000 }, { "epoch": 65.66, "learning_rate": 1.7180623900055576e-05, "loss": 1.9169, "step": 22684500 }, { "epoch": 65.66, "learning_rate": 1.71799002524083e-05, "loss": 1.9136, "step": 22685000 }, { "epoch": 65.67, "learning_rate": 1.7179176604761024e-05, "loss": 1.8951, "step": 22685500 }, { "epoch": 65.67, "learning_rate": 1.7178452957113746e-05, "loss": 1.9484, "step": 22686000 }, { "epoch": 65.67, "learning_rate": 1.717772930946647e-05, "loss": 1.8732, "step": 22686500 }, { "epoch": 65.67, "learning_rate": 1.7177005661819194e-05, "loss": 1.9187, "step": 22687000 }, { "epoch": 65.67, "learning_rate": 1.7176282014171916e-05, "loss": 1.9179, "step": 22687500 }, { "epoch": 65.67, "learning_rate": 1.7175561261115228e-05, "loss": 1.8936, "step": 22688000 }, { "epoch": 65.67, "learning_rate": 1.717483761346795e-05, "loss": 1.9395, "step": 22688500 }, { "epoch": 65.68, "learning_rate": 1.717411541311597e-05, "loss": 1.9074, "step": 22689000 }, { "epoch": 65.68, "learning_rate": 1.7173391765468695e-05, "loss": 1.94, "step": 22689500 }, { "epoch": 65.68, "learning_rate": 1.7172668117821417e-05, "loss": 1.9063, "step": 22690000 }, { "epoch": 65.68, "learning_rate": 1.717194447017414e-05, "loss": 1.9114, "step": 22690500 }, { "epoch": 65.68, "learning_rate": 1.717122226982216e-05, "loss": 1.9225, "step": 22691000 }, { "epoch": 65.68, "learning_rate": 1.717049862217488e-05, "loss": 1.9046, "step": 22691500 }, { "epoch": 65.68, "learning_rate": 1.7169774974527603e-05, "loss": 1.8953, "step": 22692000 }, { "epoch": 65.69, "learning_rate": 1.716905132688033e-05, "loss": 1.9247, "step": 22692500 }, { "epoch": 65.69, "learning_rate": 1.716832767923305e-05, "loss": 1.9283, "step": 22693000 }, { "epoch": 65.69, "learning_rate": 1.7167604031585773e-05, "loss": 1.9195, "step": 22693500 }, { "epoch": 65.69, "learning_rate": 1.7166880383938495e-05, "loss": 1.926, "step": 22694000 }, { "epoch": 65.69, "learning_rate": 1.716615673629122e-05, "loss": 1.9253, "step": 22694500 }, { "epoch": 65.69, "learning_rate": 1.7165433088643943e-05, "loss": 1.906, "step": 22695000 }, { "epoch": 65.69, "learning_rate": 1.716471088829196e-05, "loss": 1.9134, "step": 22695500 }, { "epoch": 65.7, "learning_rate": 1.7163987240644684e-05, "loss": 1.9084, "step": 22696000 }, { "epoch": 65.7, "learning_rate": 1.716326359299741e-05, "loss": 1.8841, "step": 22696500 }, { "epoch": 65.7, "learning_rate": 1.7162539945350132e-05, "loss": 1.9198, "step": 22697000 }, { "epoch": 65.7, "learning_rate": 1.7161816297702854e-05, "loss": 1.9218, "step": 22697500 }, { "epoch": 65.7, "learning_rate": 1.7161092650055577e-05, "loss": 1.9261, "step": 22698000 }, { "epoch": 65.7, "learning_rate": 1.71603690024083e-05, "loss": 1.9247, "step": 22698500 }, { "epoch": 65.7, "learning_rate": 1.715964535476102e-05, "loss": 1.9007, "step": 22699000 }, { "epoch": 65.71, "learning_rate": 1.715892315440904e-05, "loss": 1.9271, "step": 22699500 }, { "epoch": 65.71, "learning_rate": 1.7158199506761766e-05, "loss": 1.915, "step": 22700000 }, { "epoch": 65.71, "learning_rate": 1.7157475859114488e-05, "loss": 1.9492, "step": 22700500 }, { "epoch": 65.71, "learning_rate": 1.715675221146721e-05, "loss": 1.903, "step": 22701000 }, { "epoch": 65.71, "learning_rate": 1.7156028563819936e-05, "loss": 1.9146, "step": 22701500 }, { "epoch": 65.71, "learning_rate": 1.7155304916172658e-05, "loss": 1.9076, "step": 22702000 }, { "epoch": 65.71, "learning_rate": 1.715458126852538e-05, "loss": 1.9362, "step": 22702500 }, { "epoch": 65.72, "learning_rate": 1.71538590681734e-05, "loss": 1.9201, "step": 22703000 }, { "epoch": 65.72, "learning_rate": 1.7153135420526125e-05, "loss": 1.9041, "step": 22703500 }, { "epoch": 65.72, "learning_rate": 1.7152414667469437e-05, "loss": 1.9253, "step": 22704000 }, { "epoch": 65.72, "learning_rate": 1.715169101982216e-05, "loss": 1.925, "step": 22704500 }, { "epoch": 65.72, "learning_rate": 1.715096737217488e-05, "loss": 1.9362, "step": 22705000 }, { "epoch": 65.72, "learning_rate": 1.7150243724527604e-05, "loss": 1.9241, "step": 22705500 }, { "epoch": 65.72, "learning_rate": 1.7149520076880326e-05, "loss": 1.9006, "step": 22706000 }, { "epoch": 65.73, "learning_rate": 1.7148796429233048e-05, "loss": 1.9293, "step": 22706500 }, { "epoch": 65.73, "learning_rate": 1.7148072781585774e-05, "loss": 1.9198, "step": 22707000 }, { "epoch": 65.73, "learning_rate": 1.71473491339385e-05, "loss": 1.9375, "step": 22707500 }, { "epoch": 65.73, "learning_rate": 1.714662548629122e-05, "loss": 1.9246, "step": 22708000 }, { "epoch": 65.73, "learning_rate": 1.7145901838643944e-05, "loss": 1.927, "step": 22708500 }, { "epoch": 65.73, "learning_rate": 1.7145178190996666e-05, "loss": 1.9181, "step": 22709000 }, { "epoch": 65.73, "learning_rate": 1.7144454543349388e-05, "loss": 1.9024, "step": 22709500 }, { "epoch": 65.74, "learning_rate": 1.714373089570211e-05, "loss": 1.9081, "step": 22710000 }, { "epoch": 65.74, "learning_rate": 1.7143007248054836e-05, "loss": 1.8994, "step": 22710500 }, { "epoch": 65.74, "learning_rate": 1.714228504770285e-05, "loss": 1.9479, "step": 22711000 }, { "epoch": 65.74, "learning_rate": 1.7141561400055577e-05, "loss": 1.9312, "step": 22711500 }, { "epoch": 65.74, "learning_rate": 1.71408377524083e-05, "loss": 1.9189, "step": 22712000 }, { "epoch": 65.74, "learning_rate": 1.7140114104761025e-05, "loss": 1.9091, "step": 22712500 }, { "epoch": 65.74, "learning_rate": 1.7139390457113747e-05, "loss": 1.8961, "step": 22713000 }, { "epoch": 65.75, "learning_rate": 1.713866680946647e-05, "loss": 1.9166, "step": 22713500 }, { "epoch": 65.75, "learning_rate": 1.7137943161819192e-05, "loss": 1.9326, "step": 22714000 }, { "epoch": 65.75, "learning_rate": 1.7137219514171917e-05, "loss": 1.9044, "step": 22714500 }, { "epoch": 65.75, "learning_rate": 1.713649586652464e-05, "loss": 1.9312, "step": 22715000 }, { "epoch": 65.75, "learning_rate": 1.713577366617266e-05, "loss": 1.9009, "step": 22715500 }, { "epoch": 65.75, "learning_rate": 1.7135051465820674e-05, "loss": 1.9089, "step": 22716000 }, { "epoch": 65.75, "learning_rate": 1.71343278181734e-05, "loss": 1.9418, "step": 22716500 }, { "epoch": 65.76, "learning_rate": 1.7133604170526122e-05, "loss": 1.9475, "step": 22717000 }, { "epoch": 65.76, "learning_rate": 1.7132880522878844e-05, "loss": 1.9039, "step": 22717500 }, { "epoch": 65.76, "learning_rate": 1.7132156875231567e-05, "loss": 1.9096, "step": 22718000 }, { "epoch": 65.76, "learning_rate": 1.7131434674879585e-05, "loss": 1.8866, "step": 22718500 }, { "epoch": 65.76, "learning_rate": 1.713071102723231e-05, "loss": 1.8896, "step": 22719000 }, { "epoch": 65.76, "learning_rate": 1.7129987379585033e-05, "loss": 1.9289, "step": 22719500 }, { "epoch": 65.77, "learning_rate": 1.7129263731937756e-05, "loss": 1.9363, "step": 22720000 }, { "epoch": 65.77, "learning_rate": 1.7128540084290478e-05, "loss": 1.9113, "step": 22720500 }, { "epoch": 65.77, "learning_rate": 1.71278164366432e-05, "loss": 1.8982, "step": 22721000 }, { "epoch": 65.77, "learning_rate": 1.7127092788995926e-05, "loss": 1.9332, "step": 22721500 }, { "epoch": 65.77, "learning_rate": 1.7126369141348648e-05, "loss": 1.9045, "step": 22722000 }, { "epoch": 65.77, "learning_rate": 1.7125645493701374e-05, "loss": 1.9115, "step": 22722500 }, { "epoch": 65.77, "learning_rate": 1.712492329334939e-05, "loss": 1.9258, "step": 22723000 }, { "epoch": 65.78, "learning_rate": 1.7124199645702115e-05, "loss": 1.9155, "step": 22723500 }, { "epoch": 65.78, "learning_rate": 1.7123475998054837e-05, "loss": 1.9124, "step": 22724000 }, { "epoch": 65.78, "learning_rate": 1.7122753797702852e-05, "loss": 1.9084, "step": 22724500 }, { "epoch": 65.78, "learning_rate": 1.7122030150055575e-05, "loss": 1.9179, "step": 22725000 }, { "epoch": 65.78, "learning_rate": 1.71213065024083e-05, "loss": 1.9181, "step": 22725500 }, { "epoch": 65.78, "learning_rate": 1.7120582854761026e-05, "loss": 1.8984, "step": 22726000 }, { "epoch": 65.78, "learning_rate": 1.7119859207113748e-05, "loss": 1.9276, "step": 22726500 }, { "epoch": 65.79, "learning_rate": 1.711913555946647e-05, "loss": 1.9111, "step": 22727000 }, { "epoch": 65.79, "learning_rate": 1.711841335911449e-05, "loss": 1.921, "step": 22727500 }, { "epoch": 65.79, "learning_rate": 1.711768971146721e-05, "loss": 1.9039, "step": 22728000 }, { "epoch": 65.79, "learning_rate": 1.7116966063819934e-05, "loss": 1.8926, "step": 22728500 }, { "epoch": 65.79, "learning_rate": 1.7116242416172656e-05, "loss": 1.9171, "step": 22729000 }, { "epoch": 65.79, "learning_rate": 1.7115518768525378e-05, "loss": 1.8948, "step": 22729500 }, { "epoch": 65.79, "learning_rate": 1.7114795120878104e-05, "loss": 1.9207, "step": 22730000 }, { "epoch": 65.8, "learning_rate": 1.7114071473230826e-05, "loss": 1.8966, "step": 22730500 }, { "epoch": 65.8, "learning_rate": 1.7113347825583552e-05, "loss": 1.8957, "step": 22731000 }, { "epoch": 65.8, "learning_rate": 1.7112625625231567e-05, "loss": 1.9342, "step": 22731500 }, { "epoch": 65.8, "learning_rate": 1.711190197758429e-05, "loss": 1.9165, "step": 22732000 }, { "epoch": 65.8, "learning_rate": 1.711117977723231e-05, "loss": 1.916, "step": 22732500 }, { "epoch": 65.8, "learning_rate": 1.711045612958503e-05, "loss": 1.9288, "step": 22733000 }, { "epoch": 65.8, "learning_rate": 1.7109732481937756e-05, "loss": 1.9115, "step": 22733500 }, { "epoch": 65.81, "learning_rate": 1.710900883429048e-05, "loss": 1.9005, "step": 22734000 }, { "epoch": 65.81, "learning_rate": 1.7108286633938498e-05, "loss": 1.9153, "step": 22734500 }, { "epoch": 65.81, "learning_rate": 1.710756298629122e-05, "loss": 1.9208, "step": 22735000 }, { "epoch": 65.81, "learning_rate": 1.7106839338643942e-05, "loss": 1.931, "step": 22735500 }, { "epoch": 65.81, "learning_rate": 1.7106115690996664e-05, "loss": 1.9287, "step": 22736000 }, { "epoch": 65.81, "learning_rate": 1.710539204334939e-05, "loss": 1.9194, "step": 22736500 }, { "epoch": 65.81, "learning_rate": 1.7104668395702112e-05, "loss": 1.8744, "step": 22737000 }, { "epoch": 65.82, "learning_rate": 1.7103944748054838e-05, "loss": 1.9033, "step": 22737500 }, { "epoch": 65.82, "learning_rate": 1.710322110040756e-05, "loss": 1.8991, "step": 22738000 }, { "epoch": 65.82, "learning_rate": 1.7102497452760282e-05, "loss": 1.9208, "step": 22738500 }, { "epoch": 65.82, "learning_rate": 1.71017752524083e-05, "loss": 1.8959, "step": 22739000 }, { "epoch": 65.82, "learning_rate": 1.7101051604761023e-05, "loss": 1.9134, "step": 22739500 }, { "epoch": 65.82, "learning_rate": 1.7100329404409042e-05, "loss": 1.9248, "step": 22740000 }, { "epoch": 65.82, "learning_rate": 1.7099605756761765e-05, "loss": 1.9265, "step": 22740500 }, { "epoch": 65.83, "learning_rate": 1.709888355640978e-05, "loss": 1.9116, "step": 22741000 }, { "epoch": 65.83, "learning_rate": 1.7098161356057802e-05, "loss": 1.902, "step": 22741500 }, { "epoch": 65.83, "learning_rate": 1.7097437708410525e-05, "loss": 1.9307, "step": 22742000 }, { "epoch": 65.83, "learning_rate": 1.7096714060763247e-05, "loss": 1.9153, "step": 22742500 }, { "epoch": 65.83, "learning_rate": 1.709599041311597e-05, "loss": 1.9056, "step": 22743000 }, { "epoch": 65.83, "learning_rate": 1.709526676546869e-05, "loss": 1.948, "step": 22743500 }, { "epoch": 65.83, "learning_rate": 1.7094543117821417e-05, "loss": 1.9091, "step": 22744000 }, { "epoch": 65.84, "learning_rate": 1.709381947017414e-05, "loss": 1.9199, "step": 22744500 }, { "epoch": 65.84, "learning_rate": 1.7093095822526865e-05, "loss": 1.892, "step": 22745000 }, { "epoch": 65.84, "learning_rate": 1.7092372174879587e-05, "loss": 1.9157, "step": 22745500 }, { "epoch": 65.84, "learning_rate": 1.709164852723231e-05, "loss": 1.9201, "step": 22746000 }, { "epoch": 65.84, "learning_rate": 1.709092487958503e-05, "loss": 1.9464, "step": 22746500 }, { "epoch": 65.84, "learning_rate": 1.7090201231937754e-05, "loss": 1.9247, "step": 22747000 }, { "epoch": 65.84, "learning_rate": 1.7089479031585773e-05, "loss": 1.912, "step": 22747500 }, { "epoch": 65.85, "learning_rate": 1.7088755383938495e-05, "loss": 1.8829, "step": 22748000 }, { "epoch": 65.85, "learning_rate": 1.7088031736291217e-05, "loss": 1.9144, "step": 22748500 }, { "epoch": 65.85, "learning_rate": 1.7087308088643943e-05, "loss": 1.9059, "step": 22749000 }, { "epoch": 65.85, "learning_rate": 1.7086585888291962e-05, "loss": 1.9188, "step": 22749500 }, { "epoch": 65.85, "learning_rate": 1.7085862240644684e-05, "loss": 1.9229, "step": 22750000 }, { "epoch": 65.85, "learning_rate": 1.7085138592997406e-05, "loss": 1.9349, "step": 22750500 }, { "epoch": 65.85, "learning_rate": 1.708441494535013e-05, "loss": 1.9248, "step": 22751000 }, { "epoch": 65.86, "learning_rate": 1.7083691297702854e-05, "loss": 1.9027, "step": 22751500 }, { "epoch": 65.86, "learning_rate": 1.7082967650055576e-05, "loss": 1.8861, "step": 22752000 }, { "epoch": 65.86, "learning_rate": 1.7082244002408302e-05, "loss": 1.9254, "step": 22752500 }, { "epoch": 65.86, "learning_rate": 1.7081520354761024e-05, "loss": 1.9061, "step": 22753000 }, { "epoch": 65.86, "learning_rate": 1.7080796707113746e-05, "loss": 1.8931, "step": 22753500 }, { "epoch": 65.86, "learning_rate": 1.708007305946647e-05, "loss": 1.9324, "step": 22754000 }, { "epoch": 65.86, "learning_rate": 1.7079349411819194e-05, "loss": 1.9107, "step": 22754500 }, { "epoch": 65.87, "learning_rate": 1.7078625764171916e-05, "loss": 1.9133, "step": 22755000 }, { "epoch": 65.87, "learning_rate": 1.707790211652464e-05, "loss": 1.9302, "step": 22755500 }, { "epoch": 65.87, "learning_rate": 1.7077179916172658e-05, "loss": 1.9266, "step": 22756000 }, { "epoch": 65.87, "learning_rate": 1.707645626852538e-05, "loss": 1.8956, "step": 22756500 }, { "epoch": 65.87, "learning_rate": 1.70757340681734e-05, "loss": 1.9141, "step": 22757000 }, { "epoch": 65.87, "learning_rate": 1.707501042052612e-05, "loss": 1.9122, "step": 22757500 }, { "epoch": 65.88, "learning_rate": 1.7074286772878843e-05, "loss": 1.9157, "step": 22758000 }, { "epoch": 65.88, "learning_rate": 1.7073564572526862e-05, "loss": 1.8987, "step": 22758500 }, { "epoch": 65.88, "learning_rate": 1.7072840924879584e-05, "loss": 1.9277, "step": 22759000 }, { "epoch": 65.88, "learning_rate": 1.7072118724527603e-05, "loss": 1.9352, "step": 22759500 }, { "epoch": 65.88, "learning_rate": 1.707139507688033e-05, "loss": 1.8974, "step": 22760000 }, { "epoch": 65.88, "learning_rate": 1.707067142923305e-05, "loss": 1.9004, "step": 22760500 }, { "epoch": 65.88, "learning_rate": 1.7069947781585774e-05, "loss": 1.9305, "step": 22761000 }, { "epoch": 65.89, "learning_rate": 1.7069224133938496e-05, "loss": 1.9262, "step": 22761500 }, { "epoch": 65.89, "learning_rate": 1.7068500486291218e-05, "loss": 1.9009, "step": 22762000 }, { "epoch": 65.89, "learning_rate": 1.7067776838643944e-05, "loss": 1.9129, "step": 22762500 }, { "epoch": 65.89, "learning_rate": 1.7067053190996666e-05, "loss": 1.9375, "step": 22763000 }, { "epoch": 65.89, "learning_rate": 1.706632954334939e-05, "loss": 1.9035, "step": 22763500 }, { "epoch": 65.89, "learning_rate": 1.7065605895702114e-05, "loss": 1.9199, "step": 22764000 }, { "epoch": 65.89, "learning_rate": 1.7064882248054836e-05, "loss": 1.9019, "step": 22764500 }, { "epoch": 65.9, "learning_rate": 1.7064158600407558e-05, "loss": 1.9233, "step": 22765000 }, { "epoch": 65.9, "learning_rate": 1.7063434952760284e-05, "loss": 1.9302, "step": 22765500 }, { "epoch": 65.9, "learning_rate": 1.70627127524083e-05, "loss": 1.9102, "step": 22766000 }, { "epoch": 65.9, "learning_rate": 1.7061990552056318e-05, "loss": 1.9076, "step": 22766500 }, { "epoch": 65.9, "learning_rate": 1.706126690440904e-05, "loss": 1.9208, "step": 22767000 }, { "epoch": 65.9, "learning_rate": 1.7060543256761766e-05, "loss": 1.895, "step": 22767500 }, { "epoch": 65.9, "learning_rate": 1.705981960911449e-05, "loss": 1.9164, "step": 22768000 }, { "epoch": 65.91, "learning_rate": 1.705909596146721e-05, "loss": 1.9047, "step": 22768500 }, { "epoch": 65.91, "learning_rate": 1.7058372313819933e-05, "loss": 1.8958, "step": 22769000 }, { "epoch": 65.91, "learning_rate": 1.705764866617266e-05, "loss": 1.8769, "step": 22769500 }, { "epoch": 65.91, "learning_rate": 1.705692501852538e-05, "loss": 1.9423, "step": 22770000 }, { "epoch": 65.91, "learning_rate": 1.7056201370878103e-05, "loss": 1.9185, "step": 22770500 }, { "epoch": 65.91, "learning_rate": 1.705547772323083e-05, "loss": 1.9329, "step": 22771000 }, { "epoch": 65.91, "learning_rate": 1.705475407558355e-05, "loss": 1.8847, "step": 22771500 }, { "epoch": 65.92, "learning_rate": 1.7054030427936273e-05, "loss": 1.9088, "step": 22772000 }, { "epoch": 65.92, "learning_rate": 1.7053309674879585e-05, "loss": 1.9308, "step": 22772500 }, { "epoch": 65.92, "learning_rate": 1.7052586027232308e-05, "loss": 1.9216, "step": 22773000 }, { "epoch": 65.92, "learning_rate": 1.7051862379585033e-05, "loss": 1.9396, "step": 22773500 }, { "epoch": 65.92, "learning_rate": 1.7051138731937755e-05, "loss": 1.9181, "step": 22774000 }, { "epoch": 65.92, "learning_rate": 1.7050415084290478e-05, "loss": 1.9205, "step": 22774500 }, { "epoch": 65.92, "learning_rate": 1.7049691436643203e-05, "loss": 1.9282, "step": 22775000 }, { "epoch": 65.93, "learning_rate": 1.7048967788995925e-05, "loss": 1.9317, "step": 22775500 }, { "epoch": 65.93, "learning_rate": 1.7048245588643944e-05, "loss": 1.9235, "step": 22776000 }, { "epoch": 65.93, "learning_rate": 1.7047521940996667e-05, "loss": 1.9272, "step": 22776500 }, { "epoch": 65.93, "learning_rate": 1.704679829334939e-05, "loss": 1.9259, "step": 22777000 }, { "epoch": 65.93, "learning_rate": 1.704607464570211e-05, "loss": 1.9074, "step": 22777500 }, { "epoch": 65.93, "learning_rate": 1.7045350998054833e-05, "loss": 1.9205, "step": 22778000 }, { "epoch": 65.93, "learning_rate": 1.704462735040756e-05, "loss": 1.8999, "step": 22778500 }, { "epoch": 65.94, "learning_rate": 1.7043905150055578e-05, "loss": 1.8967, "step": 22779000 }, { "epoch": 65.94, "learning_rate": 1.70431815024083e-05, "loss": 1.9262, "step": 22779500 }, { "epoch": 65.94, "learning_rate": 1.7042457854761022e-05, "loss": 1.9179, "step": 22780000 }, { "epoch": 65.94, "learning_rate": 1.7041734207113748e-05, "loss": 1.9344, "step": 22780500 }, { "epoch": 65.94, "learning_rate": 1.704101055946647e-05, "loss": 1.921, "step": 22781000 }, { "epoch": 65.94, "learning_rate": 1.7040286911819192e-05, "loss": 1.9299, "step": 22781500 }, { "epoch": 65.94, "learning_rate": 1.7039563264171918e-05, "loss": 1.9286, "step": 22782000 }, { "epoch": 65.95, "learning_rate": 1.703883961652464e-05, "loss": 1.9102, "step": 22782500 }, { "epoch": 65.95, "learning_rate": 1.7038115968877363e-05, "loss": 1.9023, "step": 22783000 }, { "epoch": 65.95, "learning_rate": 1.7037392321230085e-05, "loss": 1.94, "step": 22783500 }, { "epoch": 65.95, "learning_rate": 1.703666867358281e-05, "loss": 1.9117, "step": 22784000 }, { "epoch": 65.95, "learning_rate": 1.7035945025935533e-05, "loss": 1.9027, "step": 22784500 }, { "epoch": 65.95, "learning_rate": 1.7035222825583548e-05, "loss": 1.9236, "step": 22785000 }, { "epoch": 65.95, "learning_rate": 1.7034499177936274e-05, "loss": 1.9063, "step": 22785500 }, { "epoch": 65.96, "learning_rate": 1.7033775530289e-05, "loss": 1.9064, "step": 22786000 }, { "epoch": 65.96, "learning_rate": 1.703305188264172e-05, "loss": 1.9239, "step": 22786500 }, { "epoch": 65.96, "learning_rate": 1.7032328234994444e-05, "loss": 1.9075, "step": 22787000 }, { "epoch": 65.96, "learning_rate": 1.7031604587347166e-05, "loss": 1.9176, "step": 22787500 }, { "epoch": 65.96, "learning_rate": 1.703088093969989e-05, "loss": 1.9129, "step": 22788000 }, { "epoch": 65.96, "learning_rate": 1.703015729205261e-05, "loss": 1.9305, "step": 22788500 }, { "epoch": 65.96, "learning_rate": 1.702943509170063e-05, "loss": 1.9188, "step": 22789000 }, { "epoch": 65.97, "learning_rate": 1.7028711444053355e-05, "loss": 1.9353, "step": 22789500 }, { "epoch": 65.97, "learning_rate": 1.7027987796406077e-05, "loss": 1.9045, "step": 22790000 }, { "epoch": 65.97, "learning_rate": 1.70272641487588e-05, "loss": 1.9076, "step": 22790500 }, { "epoch": 65.97, "learning_rate": 1.702654194840682e-05, "loss": 1.9321, "step": 22791000 }, { "epoch": 65.97, "learning_rate": 1.702581830075954e-05, "loss": 1.9018, "step": 22791500 }, { "epoch": 65.97, "learning_rate": 1.7025094653112263e-05, "loss": 1.919, "step": 22792000 }, { "epoch": 65.97, "learning_rate": 1.7024372452760282e-05, "loss": 1.9131, "step": 22792500 }, { "epoch": 65.98, "learning_rate": 1.7023648805113004e-05, "loss": 1.8827, "step": 22793000 }, { "epoch": 65.98, "learning_rate": 1.702292515746573e-05, "loss": 1.9303, "step": 22793500 }, { "epoch": 65.98, "learning_rate": 1.7022201509818452e-05, "loss": 1.9232, "step": 22794000 }, { "epoch": 65.98, "learning_rate": 1.7021477862171174e-05, "loss": 1.9209, "step": 22794500 }, { "epoch": 65.98, "learning_rate": 1.70207542145239e-05, "loss": 1.9022, "step": 22795000 }, { "epoch": 65.98, "learning_rate": 1.7020030566876622e-05, "loss": 1.9106, "step": 22795500 }, { "epoch": 65.99, "learning_rate": 1.7019306919229344e-05, "loss": 1.903, "step": 22796000 }, { "epoch": 65.99, "learning_rate": 1.7018583271582067e-05, "loss": 1.9457, "step": 22796500 }, { "epoch": 65.99, "learning_rate": 1.7017859623934792e-05, "loss": 1.927, "step": 22797000 }, { "epoch": 65.99, "learning_rate": 1.701713742358281e-05, "loss": 1.9193, "step": 22797500 }, { "epoch": 65.99, "learning_rate": 1.7016413775935533e-05, "loss": 1.923, "step": 22798000 }, { "epoch": 65.99, "learning_rate": 1.7015690128288256e-05, "loss": 1.9196, "step": 22798500 }, { "epoch": 65.99, "learning_rate": 1.7014966480640978e-05, "loss": 1.929, "step": 22799000 }, { "epoch": 66.0, "learning_rate": 1.70142428329937e-05, "loss": 1.9159, "step": 22799500 }, { "epoch": 66.0, "learning_rate": 1.701352063264172e-05, "loss": 1.9001, "step": 22800000 }, { "epoch": 66.0, "learning_rate": 1.701279698499444e-05, "loss": 1.9033, "step": 22800500 }, { "epoch": 66.0, "learning_rate": 1.7012073337347167e-05, "loss": 1.9284, "step": 22801000 }, { "epoch": 66.0, "eval_accuracy": 0.6804083062541988, "eval_accuracy_mlm": 0.6478514651026132, "eval_accuracy_nsp": 0.8548509805539133, "eval_loss": 2.1602234840393066, "eval_runtime": 331.352, "eval_samples_per_second": 1316.986, "eval_steps_per_second": 54.875, "step": 22801152 }, { "epoch": 66.0, "learning_rate": 1.7011351136995186e-05, "loss": 1.9298, "step": 22801500 }, { "epoch": 66.0, "learning_rate": 1.7010627489347908e-05, "loss": 1.9174, "step": 22802000 }, { "epoch": 66.0, "learning_rate": 1.700990384170063e-05, "loss": 1.9058, "step": 22802500 }, { "epoch": 66.01, "learning_rate": 1.7009180194053353e-05, "loss": 1.871, "step": 22803000 }, { "epoch": 66.01, "learning_rate": 1.7008456546406075e-05, "loss": 1.8994, "step": 22803500 }, { "epoch": 66.01, "learning_rate": 1.70077328987588e-05, "loss": 1.8893, "step": 22804000 }, { "epoch": 66.01, "learning_rate": 1.7007009251111526e-05, "loss": 1.9003, "step": 22804500 }, { "epoch": 66.01, "learning_rate": 1.7006285603464248e-05, "loss": 1.9073, "step": 22805000 }, { "epoch": 66.01, "learning_rate": 1.7005563403112264e-05, "loss": 1.9099, "step": 22805500 }, { "epoch": 66.01, "learning_rate": 1.700483975546499e-05, "loss": 1.8884, "step": 22806000 }, { "epoch": 66.02, "learning_rate": 1.700411610781771e-05, "loss": 1.9185, "step": 22806500 }, { "epoch": 66.02, "learning_rate": 1.7003392460170434e-05, "loss": 1.9189, "step": 22807000 }, { "epoch": 66.02, "learning_rate": 1.7002668812523156e-05, "loss": 1.8828, "step": 22807500 }, { "epoch": 66.02, "learning_rate": 1.7001945164875882e-05, "loss": 1.9279, "step": 22808000 }, { "epoch": 66.02, "learning_rate": 1.7001221517228604e-05, "loss": 1.8998, "step": 22808500 }, { "epoch": 66.02, "learning_rate": 1.7000499316876623e-05, "loss": 1.9159, "step": 22809000 }, { "epoch": 66.02, "learning_rate": 1.6999775669229345e-05, "loss": 1.8798, "step": 22809500 }, { "epoch": 66.03, "learning_rate": 1.6999052021582067e-05, "loss": 1.8743, "step": 22810000 }, { "epoch": 66.03, "learning_rate": 1.699832837393479e-05, "loss": 1.8822, "step": 22810500 }, { "epoch": 66.03, "learning_rate": 1.6997604726287515e-05, "loss": 1.9114, "step": 22811000 }, { "epoch": 66.03, "learning_rate": 1.6996881078640238e-05, "loss": 1.8999, "step": 22811500 }, { "epoch": 66.03, "learning_rate": 1.6996157430992963e-05, "loss": 1.9109, "step": 22812000 }, { "epoch": 66.03, "learning_rate": 1.6995433783345685e-05, "loss": 1.9167, "step": 22812500 }, { "epoch": 66.03, "learning_rate": 1.6994710135698408e-05, "loss": 1.9204, "step": 22813000 }, { "epoch": 66.04, "learning_rate": 1.6993987935346427e-05, "loss": 1.8939, "step": 22813500 }, { "epoch": 66.04, "learning_rate": 1.699326428769915e-05, "loss": 1.9274, "step": 22814000 }, { "epoch": 66.04, "learning_rate": 1.6992542087347164e-05, "loss": 1.9019, "step": 22814500 }, { "epoch": 66.04, "learning_rate": 1.699181843969989e-05, "loss": 1.89, "step": 22815000 }, { "epoch": 66.04, "learning_rate": 1.6991094792052616e-05, "loss": 1.9144, "step": 22815500 }, { "epoch": 66.04, "learning_rate": 1.6990371144405338e-05, "loss": 1.8827, "step": 22816000 }, { "epoch": 66.04, "learning_rate": 1.698964749675806e-05, "loss": 1.9052, "step": 22816500 }, { "epoch": 66.05, "learning_rate": 1.698892529640608e-05, "loss": 1.908, "step": 22817000 }, { "epoch": 66.05, "learning_rate": 1.69882016487588e-05, "loss": 1.902, "step": 22817500 }, { "epoch": 66.05, "learning_rate": 1.6987478001111523e-05, "loss": 1.8959, "step": 22818000 }, { "epoch": 66.05, "learning_rate": 1.6986754353464246e-05, "loss": 1.8882, "step": 22818500 }, { "epoch": 66.05, "learning_rate": 1.6986030705816968e-05, "loss": 1.8917, "step": 22819000 }, { "epoch": 66.05, "learning_rate": 1.698530850546499e-05, "loss": 1.9087, "step": 22819500 }, { "epoch": 66.05, "learning_rate": 1.6984584857817712e-05, "loss": 1.9091, "step": 22820000 }, { "epoch": 66.06, "learning_rate": 1.6983861210170435e-05, "loss": 1.9017, "step": 22820500 }, { "epoch": 66.06, "learning_rate": 1.6983137562523157e-05, "loss": 1.9133, "step": 22821000 }, { "epoch": 66.06, "learning_rate": 1.698241391487588e-05, "loss": 1.8884, "step": 22821500 }, { "epoch": 66.06, "learning_rate": 1.6981690267228605e-05, "loss": 1.9052, "step": 22822000 }, { "epoch": 66.06, "learning_rate": 1.6980966619581327e-05, "loss": 1.8908, "step": 22822500 }, { "epoch": 66.06, "learning_rate": 1.6980242971934053e-05, "loss": 1.9248, "step": 22823000 }, { "epoch": 66.06, "learning_rate": 1.6979519324286775e-05, "loss": 1.8855, "step": 22823500 }, { "epoch": 66.07, "learning_rate": 1.6978795676639497e-05, "loss": 1.9066, "step": 22824000 }, { "epoch": 66.07, "learning_rate": 1.697807202899222e-05, "loss": 1.8884, "step": 22824500 }, { "epoch": 66.07, "learning_rate": 1.697734838134494e-05, "loss": 1.9027, "step": 22825000 }, { "epoch": 66.07, "learning_rate": 1.6976624733697667e-05, "loss": 1.8933, "step": 22825500 }, { "epoch": 66.07, "learning_rate": 1.6975902533345683e-05, "loss": 1.9123, "step": 22826000 }, { "epoch": 66.07, "learning_rate": 1.697517888569841e-05, "loss": 1.9386, "step": 22826500 }, { "epoch": 66.07, "learning_rate": 1.697445523805113e-05, "loss": 1.8968, "step": 22827000 }, { "epoch": 66.08, "learning_rate": 1.6973731590403856e-05, "loss": 1.9172, "step": 22827500 }, { "epoch": 66.08, "learning_rate": 1.697300794275658e-05, "loss": 1.9113, "step": 22828000 }, { "epoch": 66.08, "learning_rate": 1.6972285742404594e-05, "loss": 1.9033, "step": 22828500 }, { "epoch": 66.08, "learning_rate": 1.6971564989347906e-05, "loss": 1.9013, "step": 22829000 }, { "epoch": 66.08, "learning_rate": 1.697084134170063e-05, "loss": 1.9264, "step": 22829500 }, { "epoch": 66.08, "learning_rate": 1.6970117694053354e-05, "loss": 1.9201, "step": 22830000 }, { "epoch": 66.08, "learning_rate": 1.6969394046406076e-05, "loss": 1.9344, "step": 22830500 }, { "epoch": 66.09, "learning_rate": 1.6968670398758802e-05, "loss": 1.9056, "step": 22831000 }, { "epoch": 66.09, "learning_rate": 1.6967946751111524e-05, "loss": 1.9076, "step": 22831500 }, { "epoch": 66.09, "learning_rate": 1.6967223103464246e-05, "loss": 1.8942, "step": 22832000 }, { "epoch": 66.09, "learning_rate": 1.696649945581697e-05, "loss": 1.9021, "step": 22832500 }, { "epoch": 66.09, "learning_rate": 1.6965775808169694e-05, "loss": 1.9217, "step": 22833000 }, { "epoch": 66.09, "learning_rate": 1.6965052160522417e-05, "loss": 1.9067, "step": 22833500 }, { "epoch": 66.1, "learning_rate": 1.6964329960170432e-05, "loss": 1.9035, "step": 22834000 }, { "epoch": 66.1, "learning_rate": 1.6963607759818454e-05, "loss": 1.8783, "step": 22834500 }, { "epoch": 66.1, "learning_rate": 1.6962884112171177e-05, "loss": 1.9018, "step": 22835000 }, { "epoch": 66.1, "learning_rate": 1.69621604645239e-05, "loss": 1.8986, "step": 22835500 }, { "epoch": 66.1, "learning_rate": 1.696143681687662e-05, "loss": 1.8924, "step": 22836000 }, { "epoch": 66.1, "learning_rate": 1.6960713169229343e-05, "loss": 1.8811, "step": 22836500 }, { "epoch": 66.1, "learning_rate": 1.695998952158207e-05, "loss": 1.9032, "step": 22837000 }, { "epoch": 66.11, "learning_rate": 1.695926587393479e-05, "loss": 1.923, "step": 22837500 }, { "epoch": 66.11, "learning_rate": 1.6958542226287517e-05, "loss": 1.9146, "step": 22838000 }, { "epoch": 66.11, "learning_rate": 1.695781857864024e-05, "loss": 1.9109, "step": 22838500 }, { "epoch": 66.11, "learning_rate": 1.695709493099296e-05, "loss": 1.8923, "step": 22839000 }, { "epoch": 66.11, "learning_rate": 1.6956371283345684e-05, "loss": 1.8916, "step": 22839500 }, { "epoch": 66.11, "learning_rate": 1.6955647635698406e-05, "loss": 1.8858, "step": 22840000 }, { "epoch": 66.11, "learning_rate": 1.695492398805113e-05, "loss": 1.9048, "step": 22840500 }, { "epoch": 66.12, "learning_rate": 1.6954201787699147e-05, "loss": 1.901, "step": 22841000 }, { "epoch": 66.12, "learning_rate": 1.695347814005187e-05, "loss": 1.9045, "step": 22841500 }, { "epoch": 66.12, "learning_rate": 1.6952754492404595e-05, "loss": 1.8918, "step": 22842000 }, { "epoch": 66.12, "learning_rate": 1.695203084475732e-05, "loss": 1.8715, "step": 22842500 }, { "epoch": 66.12, "learning_rate": 1.6951308644405336e-05, "loss": 1.9188, "step": 22843000 }, { "epoch": 66.12, "learning_rate": 1.6950584996758058e-05, "loss": 1.9027, "step": 22843500 }, { "epoch": 66.12, "learning_rate": 1.694986134911078e-05, "loss": 1.8995, "step": 22844000 }, { "epoch": 66.13, "learning_rate": 1.6949137701463506e-05, "loss": 1.9099, "step": 22844500 }, { "epoch": 66.13, "learning_rate": 1.694841405381623e-05, "loss": 1.8808, "step": 22845000 }, { "epoch": 66.13, "learning_rate": 1.6947691853464247e-05, "loss": 1.9182, "step": 22845500 }, { "epoch": 66.13, "learning_rate": 1.694696820581697e-05, "loss": 1.8858, "step": 22846000 }, { "epoch": 66.13, "learning_rate": 1.6946244558169695e-05, "loss": 1.9069, "step": 22846500 }, { "epoch": 66.13, "learning_rate": 1.6945520910522417e-05, "loss": 1.927, "step": 22847000 }, { "epoch": 66.13, "learning_rate": 1.6944798710170433e-05, "loss": 1.902, "step": 22847500 }, { "epoch": 66.14, "learning_rate": 1.694407506252316e-05, "loss": 1.9152, "step": 22848000 }, { "epoch": 66.14, "learning_rate": 1.694335141487588e-05, "loss": 1.9059, "step": 22848500 }, { "epoch": 66.14, "learning_rate": 1.6942627767228603e-05, "loss": 1.8991, "step": 22849000 }, { "epoch": 66.14, "learning_rate": 1.694190411958133e-05, "loss": 1.8996, "step": 22849500 }, { "epoch": 66.14, "learning_rate": 1.6941181919229344e-05, "loss": 1.9148, "step": 22850000 }, { "epoch": 66.14, "learning_rate": 1.694045827158207e-05, "loss": 1.8913, "step": 22850500 }, { "epoch": 66.14, "learning_rate": 1.6939734623934792e-05, "loss": 1.8984, "step": 22851000 }, { "epoch": 66.15, "learning_rate": 1.6939010976287514e-05, "loss": 1.892, "step": 22851500 }, { "epoch": 66.15, "learning_rate": 1.6938287328640237e-05, "loss": 1.8825, "step": 22852000 }, { "epoch": 66.15, "learning_rate": 1.693756368099296e-05, "loss": 1.9056, "step": 22852500 }, { "epoch": 66.15, "learning_rate": 1.693684148064098e-05, "loss": 1.9251, "step": 22853000 }, { "epoch": 66.15, "learning_rate": 1.6936117832993703e-05, "loss": 1.9028, "step": 22853500 }, { "epoch": 66.15, "learning_rate": 1.6935395632641722e-05, "loss": 1.9251, "step": 22854000 }, { "epoch": 66.15, "learning_rate": 1.6934671984994444e-05, "loss": 1.897, "step": 22854500 }, { "epoch": 66.16, "learning_rate": 1.6933948337347167e-05, "loss": 1.9131, "step": 22855000 }, { "epoch": 66.16, "learning_rate": 1.693322468969989e-05, "loss": 1.9007, "step": 22855500 }, { "epoch": 66.16, "learning_rate": 1.693250104205261e-05, "loss": 1.9051, "step": 22856000 }, { "epoch": 66.16, "learning_rate": 1.6931777394405333e-05, "loss": 1.9037, "step": 22856500 }, { "epoch": 66.16, "learning_rate": 1.693105374675806e-05, "loss": 1.8961, "step": 22857000 }, { "epoch": 66.16, "learning_rate": 1.6930330099110785e-05, "loss": 1.8961, "step": 22857500 }, { "epoch": 66.16, "learning_rate": 1.6929606451463507e-05, "loss": 1.9084, "step": 22858000 }, { "epoch": 66.17, "learning_rate": 1.692888280381623e-05, "loss": 1.905, "step": 22858500 }, { "epoch": 66.17, "learning_rate": 1.692815915616895e-05, "loss": 1.9186, "step": 22859000 }, { "epoch": 66.17, "learning_rate": 1.692743695581697e-05, "loss": 1.8968, "step": 22859500 }, { "epoch": 66.17, "learning_rate": 1.6926713308169693e-05, "loss": 1.9178, "step": 22860000 }, { "epoch": 66.17, "learning_rate": 1.6925989660522418e-05, "loss": 1.9047, "step": 22860500 }, { "epoch": 66.17, "learning_rate": 1.692526601287514e-05, "loss": 1.9273, "step": 22861000 }, { "epoch": 66.17, "learning_rate": 1.6924542365227863e-05, "loss": 1.8979, "step": 22861500 }, { "epoch": 66.18, "learning_rate": 1.692382016487588e-05, "loss": 1.8872, "step": 22862000 }, { "epoch": 66.18, "learning_rate": 1.6923096517228604e-05, "loss": 1.9009, "step": 22862500 }, { "epoch": 66.18, "learning_rate": 1.6922372869581326e-05, "loss": 1.8995, "step": 22863000 }, { "epoch": 66.18, "learning_rate": 1.6921649221934048e-05, "loss": 1.9013, "step": 22863500 }, { "epoch": 66.18, "learning_rate": 1.6920925574286774e-05, "loss": 1.9147, "step": 22864000 }, { "epoch": 66.18, "learning_rate": 1.69202019266395e-05, "loss": 1.9214, "step": 22864500 }, { "epoch": 66.18, "learning_rate": 1.6919478278992222e-05, "loss": 1.9138, "step": 22865000 }, { "epoch": 66.19, "learning_rate": 1.6918756078640237e-05, "loss": 1.8954, "step": 22865500 }, { "epoch": 66.19, "learning_rate": 1.691803243099296e-05, "loss": 1.8789, "step": 22866000 }, { "epoch": 66.19, "learning_rate": 1.6917308783345685e-05, "loss": 1.8928, "step": 22866500 }, { "epoch": 66.19, "learning_rate": 1.6916585135698407e-05, "loss": 1.9238, "step": 22867000 }, { "epoch": 66.19, "learning_rate": 1.6915862935346423e-05, "loss": 1.8931, "step": 22867500 }, { "epoch": 66.19, "learning_rate": 1.691513928769915e-05, "loss": 1.9017, "step": 22868000 }, { "epoch": 66.19, "learning_rate": 1.6914415640051874e-05, "loss": 1.8978, "step": 22868500 }, { "epoch": 66.2, "learning_rate": 1.6913691992404596e-05, "loss": 1.9059, "step": 22869000 }, { "epoch": 66.2, "learning_rate": 1.691296834475732e-05, "loss": 1.897, "step": 22869500 }, { "epoch": 66.2, "learning_rate": 1.691224469711004e-05, "loss": 1.9263, "step": 22870000 }, { "epoch": 66.2, "learning_rate": 1.6911521049462763e-05, "loss": 1.8961, "step": 22870500 }, { "epoch": 66.2, "learning_rate": 1.6910797401815485e-05, "loss": 1.9083, "step": 22871000 }, { "epoch": 66.2, "learning_rate": 1.691007375416821e-05, "loss": 1.8979, "step": 22871500 }, { "epoch": 66.21, "learning_rate": 1.6909350106520937e-05, "loss": 1.8859, "step": 22872000 }, { "epoch": 66.21, "learning_rate": 1.6908627906168952e-05, "loss": 1.8694, "step": 22872500 }, { "epoch": 66.21, "learning_rate": 1.6907904258521674e-05, "loss": 1.9116, "step": 22873000 }, { "epoch": 66.21, "learning_rate": 1.69071806108744e-05, "loss": 1.9255, "step": 22873500 }, { "epoch": 66.21, "learning_rate": 1.6906456963227122e-05, "loss": 1.8763, "step": 22874000 }, { "epoch": 66.21, "learning_rate": 1.6905733315579844e-05, "loss": 1.8832, "step": 22874500 }, { "epoch": 66.21, "learning_rate": 1.690500966793257e-05, "loss": 1.8856, "step": 22875000 }, { "epoch": 66.22, "learning_rate": 1.6904288914875882e-05, "loss": 1.9183, "step": 22875500 }, { "epoch": 66.22, "learning_rate": 1.6903565267228605e-05, "loss": 1.9465, "step": 22876000 }, { "epoch": 66.22, "learning_rate": 1.6902841619581327e-05, "loss": 1.9002, "step": 22876500 }, { "epoch": 66.22, "learning_rate": 1.690211797193405e-05, "loss": 1.8967, "step": 22877000 }, { "epoch": 66.22, "learning_rate": 1.6901394324286775e-05, "loss": 1.91, "step": 22877500 }, { "epoch": 66.22, "learning_rate": 1.6900670676639497e-05, "loss": 1.93, "step": 22878000 }, { "epoch": 66.22, "learning_rate": 1.689994702899222e-05, "loss": 1.9005, "step": 22878500 }, { "epoch": 66.23, "learning_rate": 1.6899223381344945e-05, "loss": 1.9008, "step": 22879000 }, { "epoch": 66.23, "learning_rate": 1.6898499733697667e-05, "loss": 1.9118, "step": 22879500 }, { "epoch": 66.23, "learning_rate": 1.689777608605039e-05, "loss": 1.8938, "step": 22880000 }, { "epoch": 66.23, "learning_rate": 1.689705243840311e-05, "loss": 1.8804, "step": 22880500 }, { "epoch": 66.23, "learning_rate": 1.689633023805113e-05, "loss": 1.9076, "step": 22881000 }, { "epoch": 66.23, "learning_rate": 1.6895606590403853e-05, "loss": 1.9086, "step": 22881500 }, { "epoch": 66.23, "learning_rate": 1.6894882942756575e-05, "loss": 1.8946, "step": 22882000 }, { "epoch": 66.24, "learning_rate": 1.68941592951093e-05, "loss": 1.9188, "step": 22882500 }, { "epoch": 66.24, "learning_rate": 1.6893435647462026e-05, "loss": 1.9171, "step": 22883000 }, { "epoch": 66.24, "learning_rate": 1.689271344711004e-05, "loss": 1.9014, "step": 22883500 }, { "epoch": 66.24, "learning_rate": 1.6891989799462764e-05, "loss": 1.9075, "step": 22884000 }, { "epoch": 66.24, "learning_rate": 1.689126615181549e-05, "loss": 1.9006, "step": 22884500 }, { "epoch": 66.24, "learning_rate": 1.6890542504168212e-05, "loss": 1.907, "step": 22885000 }, { "epoch": 66.24, "learning_rate": 1.6889820303816227e-05, "loss": 1.8931, "step": 22885500 }, { "epoch": 66.25, "learning_rate": 1.688909665616895e-05, "loss": 1.9182, "step": 22886000 }, { "epoch": 66.25, "learning_rate": 1.6888373008521675e-05, "loss": 1.9185, "step": 22886500 }, { "epoch": 66.25, "learning_rate": 1.68876493608744e-05, "loss": 1.92, "step": 22887000 }, { "epoch": 66.25, "learning_rate": 1.6886925713227123e-05, "loss": 1.9282, "step": 22887500 }, { "epoch": 66.25, "learning_rate": 1.688620351287514e-05, "loss": 1.9035, "step": 22888000 }, { "epoch": 66.25, "learning_rate": 1.6885479865227864e-05, "loss": 1.8967, "step": 22888500 }, { "epoch": 66.25, "learning_rate": 1.6884756217580586e-05, "loss": 1.8909, "step": 22889000 }, { "epoch": 66.26, "learning_rate": 1.6884034017228602e-05, "loss": 1.9205, "step": 22889500 }, { "epoch": 66.26, "learning_rate": 1.6883310369581328e-05, "loss": 1.9218, "step": 22890000 }, { "epoch": 66.26, "learning_rate": 1.6882586721934053e-05, "loss": 1.9155, "step": 22890500 }, { "epoch": 66.26, "learning_rate": 1.6881863074286775e-05, "loss": 1.9084, "step": 22891000 }, { "epoch": 66.26, "learning_rate": 1.6881139426639498e-05, "loss": 1.9183, "step": 22891500 }, { "epoch": 66.26, "learning_rate": 1.688041577899222e-05, "loss": 1.9174, "step": 22892000 }, { "epoch": 66.26, "learning_rate": 1.6879692131344942e-05, "loss": 1.9206, "step": 22892500 }, { "epoch": 66.27, "learning_rate": 1.687896993099296e-05, "loss": 1.9267, "step": 22893000 }, { "epoch": 66.27, "learning_rate": 1.6878246283345683e-05, "loss": 1.9073, "step": 22893500 }, { "epoch": 66.27, "learning_rate": 1.6877524082993702e-05, "loss": 1.8841, "step": 22894000 }, { "epoch": 66.27, "learning_rate": 1.6876800435346428e-05, "loss": 1.9033, "step": 22894500 }, { "epoch": 66.27, "learning_rate": 1.687607678769915e-05, "loss": 1.8927, "step": 22895000 }, { "epoch": 66.27, "learning_rate": 1.6875354587347166e-05, "loss": 1.9075, "step": 22895500 }, { "epoch": 66.27, "learning_rate": 1.687463093969989e-05, "loss": 1.8975, "step": 22896000 }, { "epoch": 66.28, "learning_rate": 1.6873907292052614e-05, "loss": 1.9121, "step": 22896500 }, { "epoch": 66.28, "learning_rate": 1.6873183644405336e-05, "loss": 1.9025, "step": 22897000 }, { "epoch": 66.28, "learning_rate": 1.6872459996758058e-05, "loss": 1.8883, "step": 22897500 }, { "epoch": 66.28, "learning_rate": 1.6871736349110784e-05, "loss": 1.8767, "step": 22898000 }, { "epoch": 66.28, "learning_rate": 1.6871012701463506e-05, "loss": 1.9236, "step": 22898500 }, { "epoch": 66.28, "learning_rate": 1.6870289053816228e-05, "loss": 1.8875, "step": 22899000 }, { "epoch": 66.28, "learning_rate": 1.6869565406168954e-05, "loss": 1.9128, "step": 22899500 }, { "epoch": 66.29, "learning_rate": 1.6868841758521676e-05, "loss": 1.8992, "step": 22900000 }, { "epoch": 66.29, "learning_rate": 1.6868118110874398e-05, "loss": 1.9173, "step": 22900500 }, { "epoch": 66.29, "learning_rate": 1.686739446322712e-05, "loss": 1.9046, "step": 22901000 }, { "epoch": 66.29, "learning_rate": 1.6866670815579846e-05, "loss": 1.9291, "step": 22901500 }, { "epoch": 66.29, "learning_rate": 1.6865947167932568e-05, "loss": 1.9154, "step": 22902000 }, { "epoch": 66.29, "learning_rate": 1.686522352028529e-05, "loss": 1.9393, "step": 22902500 }, { "epoch": 66.29, "learning_rate": 1.6864499872638016e-05, "loss": 1.9096, "step": 22903000 }, { "epoch": 66.3, "learning_rate": 1.686377622499074e-05, "loss": 1.916, "step": 22903500 }, { "epoch": 66.3, "learning_rate": 1.686305257734346e-05, "loss": 1.9269, "step": 22904000 }, { "epoch": 66.3, "learning_rate": 1.686233037699148e-05, "loss": 1.9078, "step": 22904500 }, { "epoch": 66.3, "learning_rate": 1.6861608176639495e-05, "loss": 1.8955, "step": 22905000 }, { "epoch": 66.3, "learning_rate": 1.686088452899222e-05, "loss": 1.9346, "step": 22905500 }, { "epoch": 66.3, "learning_rate": 1.6860160881344943e-05, "loss": 1.8909, "step": 22906000 }, { "epoch": 66.3, "learning_rate": 1.685943723369767e-05, "loss": 1.9279, "step": 22906500 }, { "epoch": 66.31, "learning_rate": 1.685871358605039e-05, "loss": 1.9126, "step": 22907000 }, { "epoch": 66.31, "learning_rate": 1.6857989938403113e-05, "loss": 1.9112, "step": 22907500 }, { "epoch": 66.31, "learning_rate": 1.6857266290755835e-05, "loss": 1.9053, "step": 22908000 }, { "epoch": 66.31, "learning_rate": 1.6856542643108558e-05, "loss": 1.9102, "step": 22908500 }, { "epoch": 66.31, "learning_rate": 1.6855818995461283e-05, "loss": 1.8887, "step": 22909000 }, { "epoch": 66.31, "learning_rate": 1.6855095347814005e-05, "loss": 1.8867, "step": 22909500 }, { "epoch": 66.32, "learning_rate": 1.685437170016673e-05, "loss": 1.8996, "step": 22910000 }, { "epoch": 66.32, "learning_rate": 1.6853648052519453e-05, "loss": 1.9109, "step": 22910500 }, { "epoch": 66.32, "learning_rate": 1.685292585216747e-05, "loss": 1.9057, "step": 22911000 }, { "epoch": 66.32, "learning_rate": 1.685220220452019e-05, "loss": 1.8908, "step": 22911500 }, { "epoch": 66.32, "learning_rate": 1.6851478556872917e-05, "loss": 1.8995, "step": 22912000 }, { "epoch": 66.32, "learning_rate": 1.6850754909225642e-05, "loss": 1.9133, "step": 22912500 }, { "epoch": 66.32, "learning_rate": 1.6850032708873658e-05, "loss": 1.9085, "step": 22913000 }, { "epoch": 66.33, "learning_rate": 1.684930906122638e-05, "loss": 1.9044, "step": 22913500 }, { "epoch": 66.33, "learning_rate": 1.6848585413579106e-05, "loss": 1.8998, "step": 22914000 }, { "epoch": 66.33, "learning_rate": 1.684786321322712e-05, "loss": 1.9093, "step": 22914500 }, { "epoch": 66.33, "learning_rate": 1.6847139565579843e-05, "loss": 1.8963, "step": 22915000 }, { "epoch": 66.33, "learning_rate": 1.684641591793257e-05, "loss": 1.917, "step": 22915500 }, { "epoch": 66.33, "learning_rate": 1.684569227028529e-05, "loss": 1.9003, "step": 22916000 }, { "epoch": 66.33, "learning_rate": 1.6844968622638017e-05, "loss": 1.927, "step": 22916500 }, { "epoch": 66.34, "learning_rate": 1.684424497499074e-05, "loss": 1.8979, "step": 22917000 }, { "epoch": 66.34, "learning_rate": 1.6843522774638755e-05, "loss": 1.915, "step": 22917500 }, { "epoch": 66.34, "learning_rate": 1.684279912699148e-05, "loss": 1.8791, "step": 22918000 }, { "epoch": 66.34, "learning_rate": 1.6842075479344203e-05, "loss": 1.9088, "step": 22918500 }, { "epoch": 66.34, "learning_rate": 1.6841351831696925e-05, "loss": 1.9087, "step": 22919000 }, { "epoch": 66.34, "learning_rate": 1.6840628184049647e-05, "loss": 1.9186, "step": 22919500 }, { "epoch": 66.34, "learning_rate": 1.6839904536402373e-05, "loss": 1.8947, "step": 22920000 }, { "epoch": 66.35, "learning_rate": 1.6839180888755095e-05, "loss": 1.9259, "step": 22920500 }, { "epoch": 66.35, "learning_rate": 1.683845724110782e-05, "loss": 1.9155, "step": 22921000 }, { "epoch": 66.35, "learning_rate": 1.6837733593460543e-05, "loss": 1.9093, "step": 22921500 }, { "epoch": 66.35, "learning_rate": 1.6837009945813265e-05, "loss": 1.9045, "step": 22922000 }, { "epoch": 66.35, "learning_rate": 1.6836286298165987e-05, "loss": 1.9033, "step": 22922500 }, { "epoch": 66.35, "learning_rate": 1.683556265051871e-05, "loss": 1.9016, "step": 22923000 }, { "epoch": 66.35, "learning_rate": 1.6834839002871435e-05, "loss": 1.8856, "step": 22923500 }, { "epoch": 66.36, "learning_rate": 1.6834116802519454e-05, "loss": 1.9148, "step": 22924000 }, { "epoch": 66.36, "learning_rate": 1.6833393154872176e-05, "loss": 1.904, "step": 22924500 }, { "epoch": 66.36, "learning_rate": 1.68326695072249e-05, "loss": 1.8841, "step": 22925000 }, { "epoch": 66.36, "learning_rate": 1.683194585957762e-05, "loss": 1.9232, "step": 22925500 }, { "epoch": 66.36, "learning_rate": 1.6831222211930346e-05, "loss": 1.8937, "step": 22926000 }, { "epoch": 66.36, "learning_rate": 1.6830500011578362e-05, "loss": 1.9203, "step": 22926500 }, { "epoch": 66.36, "learning_rate": 1.682977781122638e-05, "loss": 1.892, "step": 22927000 }, { "epoch": 66.37, "learning_rate": 1.6829054163579106e-05, "loss": 1.8688, "step": 22927500 }, { "epoch": 66.37, "learning_rate": 1.682833051593183e-05, "loss": 1.9115, "step": 22928000 }, { "epoch": 66.37, "learning_rate": 1.6827608315579844e-05, "loss": 1.9046, "step": 22928500 }, { "epoch": 66.37, "learning_rate": 1.682688466793257e-05, "loss": 1.9009, "step": 22929000 }, { "epoch": 66.37, "learning_rate": 1.6826161020285292e-05, "loss": 1.8947, "step": 22929500 }, { "epoch": 66.37, "learning_rate": 1.6825437372638014e-05, "loss": 1.922, "step": 22930000 }, { "epoch": 66.37, "learning_rate": 1.6824715172286033e-05, "loss": 1.9053, "step": 22930500 }, { "epoch": 66.38, "learning_rate": 1.6823991524638756e-05, "loss": 1.907, "step": 22931000 }, { "epoch": 66.38, "learning_rate": 1.682326787699148e-05, "loss": 1.9038, "step": 22931500 }, { "epoch": 66.38, "learning_rate": 1.6822544229344203e-05, "loss": 1.9114, "step": 22932000 }, { "epoch": 66.38, "learning_rate": 1.6821820581696926e-05, "loss": 1.9012, "step": 22932500 }, { "epoch": 66.38, "learning_rate": 1.6821098381344945e-05, "loss": 1.9209, "step": 22933000 }, { "epoch": 66.38, "learning_rate": 1.682037618099296e-05, "loss": 1.9202, "step": 22933500 }, { "epoch": 66.38, "learning_rate": 1.6819652533345682e-05, "loss": 1.9142, "step": 22934000 }, { "epoch": 66.39, "learning_rate": 1.6818928885698408e-05, "loss": 1.922, "step": 22934500 }, { "epoch": 66.39, "learning_rate": 1.681820523805113e-05, "loss": 1.9217, "step": 22935000 }, { "epoch": 66.39, "learning_rate": 1.6817481590403856e-05, "loss": 1.9053, "step": 22935500 }, { "epoch": 66.39, "learning_rate": 1.6816757942756578e-05, "loss": 1.9148, "step": 22936000 }, { "epoch": 66.39, "learning_rate": 1.68160342951093e-05, "loss": 1.9269, "step": 22936500 }, { "epoch": 66.39, "learning_rate": 1.6815310647462023e-05, "loss": 1.9008, "step": 22937000 }, { "epoch": 66.39, "learning_rate": 1.6814586999814748e-05, "loss": 1.9201, "step": 22937500 }, { "epoch": 66.4, "learning_rate": 1.681386335216747e-05, "loss": 1.9367, "step": 22938000 }, { "epoch": 66.4, "learning_rate": 1.6813139704520193e-05, "loss": 1.907, "step": 22938500 }, { "epoch": 66.4, "learning_rate": 1.6812416056872918e-05, "loss": 1.9125, "step": 22939000 }, { "epoch": 66.4, "learning_rate": 1.681169240922564e-05, "loss": 1.9332, "step": 22939500 }, { "epoch": 66.4, "learning_rate": 1.6810968761578363e-05, "loss": 1.9049, "step": 22940000 }, { "epoch": 66.4, "learning_rate": 1.6810245113931085e-05, "loss": 1.9281, "step": 22940500 }, { "epoch": 66.4, "learning_rate": 1.6809522913579104e-05, "loss": 1.918, "step": 22941000 }, { "epoch": 66.41, "learning_rate": 1.6808799265931826e-05, "loss": 1.9239, "step": 22941500 }, { "epoch": 66.41, "learning_rate": 1.680807561828455e-05, "loss": 1.9239, "step": 22942000 }, { "epoch": 66.41, "learning_rate": 1.6807351970637274e-05, "loss": 1.9061, "step": 22942500 }, { "epoch": 66.41, "learning_rate": 1.6806628322989996e-05, "loss": 1.8795, "step": 22943000 }, { "epoch": 66.41, "learning_rate": 1.6805904675342722e-05, "loss": 1.9292, "step": 22943500 }, { "epoch": 66.41, "learning_rate": 1.6805181027695444e-05, "loss": 1.9082, "step": 22944000 }, { "epoch": 66.41, "learning_rate": 1.6804457380048166e-05, "loss": 1.8857, "step": 22944500 }, { "epoch": 66.42, "learning_rate": 1.6803735179696185e-05, "loss": 1.9099, "step": 22945000 }, { "epoch": 66.42, "learning_rate": 1.6803011532048907e-05, "loss": 1.9089, "step": 22945500 }, { "epoch": 66.42, "learning_rate": 1.6802287884401633e-05, "loss": 1.9202, "step": 22946000 }, { "epoch": 66.42, "learning_rate": 1.6801564236754355e-05, "loss": 1.9163, "step": 22946500 }, { "epoch": 66.42, "learning_rate": 1.6800840589107078e-05, "loss": 1.8993, "step": 22947000 }, { "epoch": 66.42, "learning_rate": 1.68001169414598e-05, "loss": 1.9341, "step": 22947500 }, { "epoch": 66.43, "learning_rate": 1.679939474110782e-05, "loss": 1.9094, "step": 22948000 }, { "epoch": 66.43, "learning_rate": 1.679867109346054e-05, "loss": 1.9052, "step": 22948500 }, { "epoch": 66.43, "learning_rate": 1.6797947445813263e-05, "loss": 1.9293, "step": 22949000 }, { "epoch": 66.43, "learning_rate": 1.6797223798165985e-05, "loss": 1.9194, "step": 22949500 }, { "epoch": 66.43, "learning_rate": 1.679650015051871e-05, "loss": 1.9042, "step": 22950000 }, { "epoch": 66.43, "learning_rate": 1.6795776502871437e-05, "loss": 1.8827, "step": 22950500 }, { "epoch": 66.43, "learning_rate": 1.6795054302519452e-05, "loss": 1.9068, "step": 22951000 }, { "epoch": 66.44, "learning_rate": 1.6794330654872174e-05, "loss": 1.8916, "step": 22951500 }, { "epoch": 66.44, "learning_rate": 1.6793608454520193e-05, "loss": 1.9384, "step": 22952000 }, { "epoch": 66.44, "learning_rate": 1.6792884806872916e-05, "loss": 1.9068, "step": 22952500 }, { "epoch": 66.44, "learning_rate": 1.6792161159225638e-05, "loss": 1.9143, "step": 22953000 }, { "epoch": 66.44, "learning_rate": 1.6791437511578364e-05, "loss": 1.9163, "step": 22953500 }, { "epoch": 66.44, "learning_rate": 1.6790713863931086e-05, "loss": 1.908, "step": 22954000 }, { "epoch": 66.44, "learning_rate": 1.678999021628381e-05, "loss": 1.9246, "step": 22954500 }, { "epoch": 66.45, "learning_rate": 1.6789266568636534e-05, "loss": 1.9196, "step": 22955000 }, { "epoch": 66.45, "learning_rate": 1.678854436828455e-05, "loss": 1.9107, "step": 22955500 }, { "epoch": 66.45, "learning_rate": 1.6787820720637275e-05, "loss": 1.9142, "step": 22956000 }, { "epoch": 66.45, "learning_rate": 1.6787097072989997e-05, "loss": 1.923, "step": 22956500 }, { "epoch": 66.45, "learning_rate": 1.678637342534272e-05, "loss": 1.9135, "step": 22957000 }, { "epoch": 66.45, "learning_rate": 1.6785649777695445e-05, "loss": 1.8801, "step": 22957500 }, { "epoch": 66.45, "learning_rate": 1.6784926130048167e-05, "loss": 1.9263, "step": 22958000 }, { "epoch": 66.46, "learning_rate": 1.678420248240089e-05, "loss": 1.91, "step": 22958500 }, { "epoch": 66.46, "learning_rate": 1.6783480282048908e-05, "loss": 1.9311, "step": 22959000 }, { "epoch": 66.46, "learning_rate": 1.6782758081696924e-05, "loss": 1.9288, "step": 22959500 }, { "epoch": 66.46, "learning_rate": 1.678203443404965e-05, "loss": 1.916, "step": 22960000 }, { "epoch": 66.46, "learning_rate": 1.678131078640237e-05, "loss": 1.8826, "step": 22960500 }, { "epoch": 66.46, "learning_rate": 1.6780587138755094e-05, "loss": 1.8898, "step": 22961000 }, { "epoch": 66.46, "learning_rate": 1.6779864938403113e-05, "loss": 1.9139, "step": 22961500 }, { "epoch": 66.47, "learning_rate": 1.677914129075584e-05, "loss": 1.8936, "step": 22962000 }, { "epoch": 66.47, "learning_rate": 1.677841764310856e-05, "loss": 1.9045, "step": 22962500 }, { "epoch": 66.47, "learning_rate": 1.6777693995461283e-05, "loss": 1.9174, "step": 22963000 }, { "epoch": 66.47, "learning_rate": 1.6776971795109302e-05, "loss": 1.908, "step": 22963500 }, { "epoch": 66.47, "learning_rate": 1.6776248147462024e-05, "loss": 1.9149, "step": 22964000 }, { "epoch": 66.47, "learning_rate": 1.677552594711004e-05, "loss": 1.8846, "step": 22964500 }, { "epoch": 66.47, "learning_rate": 1.6774802299462762e-05, "loss": 1.9232, "step": 22965000 }, { "epoch": 66.48, "learning_rate": 1.6774078651815488e-05, "loss": 1.9284, "step": 22965500 }, { "epoch": 66.48, "learning_rate": 1.6773355004168213e-05, "loss": 1.8986, "step": 22966000 }, { "epoch": 66.48, "learning_rate": 1.6772631356520935e-05, "loss": 1.9334, "step": 22966500 }, { "epoch": 66.48, "learning_rate": 1.6771907708873658e-05, "loss": 1.9051, "step": 22967000 }, { "epoch": 66.48, "learning_rate": 1.677118406122638e-05, "loss": 1.8966, "step": 22967500 }, { "epoch": 66.48, "learning_rate": 1.6770460413579102e-05, "loss": 1.894, "step": 22968000 }, { "epoch": 66.48, "learning_rate": 1.6769736765931828e-05, "loss": 1.9016, "step": 22968500 }, { "epoch": 66.49, "learning_rate": 1.6769013118284553e-05, "loss": 1.9102, "step": 22969000 }, { "epoch": 66.49, "learning_rate": 1.6768289470637276e-05, "loss": 1.9258, "step": 22969500 }, { "epoch": 66.49, "learning_rate": 1.6767565822989998e-05, "loss": 1.9038, "step": 22970000 }, { "epoch": 66.49, "learning_rate": 1.676684217534272e-05, "loss": 1.9147, "step": 22970500 }, { "epoch": 66.49, "learning_rate": 1.6766118527695442e-05, "loss": 1.9238, "step": 22971000 }, { "epoch": 66.49, "learning_rate": 1.6765394880048165e-05, "loss": 1.9091, "step": 22971500 }, { "epoch": 66.49, "learning_rate": 1.676467123240089e-05, "loss": 1.9321, "step": 22972000 }, { "epoch": 66.5, "learning_rate": 1.6763947584753616e-05, "loss": 1.9146, "step": 22972500 }, { "epoch": 66.5, "learning_rate": 1.676322538440163e-05, "loss": 1.8967, "step": 22973000 }, { "epoch": 66.5, "learning_rate": 1.6762501736754354e-05, "loss": 1.913, "step": 22973500 }, { "epoch": 66.5, "learning_rate": 1.676177808910708e-05, "loss": 1.9192, "step": 22974000 }, { "epoch": 66.5, "learning_rate": 1.67610544414598e-05, "loss": 1.9129, "step": 22974500 }, { "epoch": 66.5, "learning_rate": 1.6760330793812524e-05, "loss": 1.927, "step": 22975000 }, { "epoch": 66.5, "learning_rate": 1.6759607146165246e-05, "loss": 1.9093, "step": 22975500 }, { "epoch": 66.51, "learning_rate": 1.675888349851797e-05, "loss": 1.9057, "step": 22976000 }, { "epoch": 66.51, "learning_rate": 1.6758159850870694e-05, "loss": 1.9019, "step": 22976500 }, { "epoch": 66.51, "learning_rate": 1.6757436203223416e-05, "loss": 1.9006, "step": 22977000 }, { "epoch": 66.51, "learning_rate": 1.675671255557614e-05, "loss": 1.9027, "step": 22977500 }, { "epoch": 66.51, "learning_rate": 1.6755988907928864e-05, "loss": 1.9079, "step": 22978000 }, { "epoch": 66.51, "learning_rate": 1.6755265260281586e-05, "loss": 1.9073, "step": 22978500 }, { "epoch": 66.51, "learning_rate": 1.6754541612634308e-05, "loss": 1.9288, "step": 22979000 }, { "epoch": 66.52, "learning_rate": 1.6753819412282327e-05, "loss": 1.9092, "step": 22979500 }, { "epoch": 66.52, "learning_rate": 1.6753095764635053e-05, "loss": 1.9311, "step": 22980000 }, { "epoch": 66.52, "learning_rate": 1.6752372116987775e-05, "loss": 1.9178, "step": 22980500 }, { "epoch": 66.52, "learning_rate": 1.6751648469340497e-05, "loss": 1.9171, "step": 22981000 }, { "epoch": 66.52, "learning_rate": 1.675092482169322e-05, "loss": 1.936, "step": 22981500 }, { "epoch": 66.52, "learning_rate": 1.675020262134124e-05, "loss": 1.8911, "step": 22982000 }, { "epoch": 66.52, "learning_rate": 1.674947897369396e-05, "loss": 1.91, "step": 22982500 }, { "epoch": 66.53, "learning_rate": 1.6748755326046683e-05, "loss": 1.9133, "step": 22983000 }, { "epoch": 66.53, "learning_rate": 1.674803167839941e-05, "loss": 1.9014, "step": 22983500 }, { "epoch": 66.53, "learning_rate": 1.674730803075213e-05, "loss": 1.9086, "step": 22984000 }, { "epoch": 66.53, "learning_rate": 1.674658583040015e-05, "loss": 1.9284, "step": 22984500 }, { "epoch": 66.53, "learning_rate": 1.6745862182752872e-05, "loss": 1.9241, "step": 22985000 }, { "epoch": 66.53, "learning_rate": 1.6745138535105594e-05, "loss": 1.9188, "step": 22985500 }, { "epoch": 66.54, "learning_rate": 1.6744414887458316e-05, "loss": 1.9032, "step": 22986000 }, { "epoch": 66.54, "learning_rate": 1.6743692687106335e-05, "loss": 1.9221, "step": 22986500 }, { "epoch": 66.54, "learning_rate": 1.674296903945906e-05, "loss": 1.8848, "step": 22987000 }, { "epoch": 66.54, "learning_rate": 1.6742245391811783e-05, "loss": 1.9053, "step": 22987500 }, { "epoch": 66.54, "learning_rate": 1.6741523191459802e-05, "loss": 1.9004, "step": 22988000 }, { "epoch": 66.54, "learning_rate": 1.6740799543812524e-05, "loss": 1.9118, "step": 22988500 }, { "epoch": 66.54, "learning_rate": 1.6740075896165247e-05, "loss": 1.894, "step": 22989000 }, { "epoch": 66.55, "learning_rate": 1.673935224851797e-05, "loss": 1.9105, "step": 22989500 }, { "epoch": 66.55, "learning_rate": 1.673862860087069e-05, "loss": 1.9275, "step": 22990000 }, { "epoch": 66.55, "learning_rate": 1.6737904953223417e-05, "loss": 1.8973, "step": 22990500 }, { "epoch": 66.55, "learning_rate": 1.6737181305576142e-05, "loss": 1.8976, "step": 22991000 }, { "epoch": 66.55, "learning_rate": 1.6736457657928865e-05, "loss": 1.9024, "step": 22991500 }, { "epoch": 66.55, "learning_rate": 1.6735736904872177e-05, "loss": 1.8904, "step": 22992000 }, { "epoch": 66.55, "learning_rate": 1.67350132572249e-05, "loss": 1.9046, "step": 22992500 }, { "epoch": 66.56, "learning_rate": 1.673428960957762e-05, "loss": 1.9142, "step": 22993000 }, { "epoch": 66.56, "learning_rate": 1.6733565961930344e-05, "loss": 1.9074, "step": 22993500 }, { "epoch": 66.56, "learning_rate": 1.673284231428307e-05, "loss": 1.9169, "step": 22994000 }, { "epoch": 66.56, "learning_rate": 1.6732118666635795e-05, "loss": 1.9296, "step": 22994500 }, { "epoch": 66.56, "learning_rate": 1.6731395018988517e-05, "loss": 1.9452, "step": 22995000 }, { "epoch": 66.56, "learning_rate": 1.673067137134124e-05, "loss": 1.9323, "step": 22995500 }, { "epoch": 66.56, "learning_rate": 1.672994772369396e-05, "loss": 1.874, "step": 22996000 }, { "epoch": 66.57, "learning_rate": 1.6729224076046684e-05, "loss": 1.9176, "step": 22996500 }, { "epoch": 66.57, "learning_rate": 1.6728500428399406e-05, "loss": 1.9134, "step": 22997000 }, { "epoch": 66.57, "learning_rate": 1.672777678075213e-05, "loss": 1.9349, "step": 22997500 }, { "epoch": 66.57, "learning_rate": 1.6727053133104857e-05, "loss": 1.936, "step": 22998000 }, { "epoch": 66.57, "learning_rate": 1.6726330932752873e-05, "loss": 1.9198, "step": 22998500 }, { "epoch": 66.57, "learning_rate": 1.6725608732400892e-05, "loss": 1.8967, "step": 22999000 }, { "epoch": 66.57, "learning_rate": 1.6724885084753614e-05, "loss": 1.9053, "step": 22999500 }, { "epoch": 66.58, "learning_rate": 1.6724161437106336e-05, "loss": 1.9276, "step": 23000000 }, { "epoch": 66.58, "learning_rate": 1.672343778945906e-05, "loss": 1.9179, "step": 23000500 }, { "epoch": 66.58, "learning_rate": 1.6722715589107077e-05, "loss": 1.8936, "step": 23001000 }, { "epoch": 66.58, "learning_rate": 1.67219919414598e-05, "loss": 1.9237, "step": 23001500 }, { "epoch": 66.58, "learning_rate": 1.6721268293812522e-05, "loss": 1.9202, "step": 23002000 }, { "epoch": 66.58, "learning_rate": 1.6720544646165247e-05, "loss": 1.8906, "step": 23002500 }, { "epoch": 66.58, "learning_rate": 1.671982099851797e-05, "loss": 1.906, "step": 23003000 }, { "epoch": 66.59, "learning_rate": 1.6719097350870695e-05, "loss": 1.9284, "step": 23003500 }, { "epoch": 66.59, "learning_rate": 1.6718373703223418e-05, "loss": 1.8878, "step": 23004000 }, { "epoch": 66.59, "learning_rate": 1.671765005557614e-05, "loss": 1.9026, "step": 23004500 }, { "epoch": 66.59, "learning_rate": 1.6716926407928862e-05, "loss": 1.9097, "step": 23005000 }, { "epoch": 66.59, "learning_rate": 1.6716202760281588e-05, "loss": 1.9105, "step": 23005500 }, { "epoch": 66.59, "learning_rate": 1.671547911263431e-05, "loss": 1.9171, "step": 23006000 }, { "epoch": 66.59, "learning_rate": 1.6714755464987032e-05, "loss": 1.9184, "step": 23006500 }, { "epoch": 66.6, "learning_rate": 1.6714031817339758e-05, "loss": 1.9223, "step": 23007000 }, { "epoch": 66.6, "learning_rate": 1.671331106428307e-05, "loss": 1.9123, "step": 23007500 }, { "epoch": 66.6, "learning_rate": 1.6712587416635792e-05, "loss": 1.9263, "step": 23008000 }, { "epoch": 66.6, "learning_rate": 1.6711865216283808e-05, "loss": 1.9034, "step": 23008500 }, { "epoch": 66.6, "learning_rate": 1.6711141568636533e-05, "loss": 1.9106, "step": 23009000 }, { "epoch": 66.6, "learning_rate": 1.6710417920989256e-05, "loss": 1.8963, "step": 23009500 }, { "epoch": 66.6, "learning_rate": 1.670969427334198e-05, "loss": 1.8973, "step": 23010000 }, { "epoch": 66.61, "learning_rate": 1.6708972072989997e-05, "loss": 1.9264, "step": 23010500 }, { "epoch": 66.61, "learning_rate": 1.670824842534272e-05, "loss": 1.9195, "step": 23011000 }, { "epoch": 66.61, "learning_rate": 1.6707524777695445e-05, "loss": 1.9231, "step": 23011500 }, { "epoch": 66.61, "learning_rate": 1.6706801130048167e-05, "loss": 1.8935, "step": 23012000 }, { "epoch": 66.61, "learning_rate": 1.670607748240089e-05, "loss": 1.9322, "step": 23012500 }, { "epoch": 66.61, "learning_rate": 1.670535383475361e-05, "loss": 1.9264, "step": 23013000 }, { "epoch": 66.61, "learning_rate": 1.6704630187106337e-05, "loss": 1.9024, "step": 23013500 }, { "epoch": 66.62, "learning_rate": 1.6703907986754356e-05, "loss": 1.8916, "step": 23014000 }, { "epoch": 66.62, "learning_rate": 1.6703184339107078e-05, "loss": 1.9235, "step": 23014500 }, { "epoch": 66.62, "learning_rate": 1.67024606914598e-05, "loss": 1.925, "step": 23015000 }, { "epoch": 66.62, "learning_rate": 1.6701737043812523e-05, "loss": 1.9229, "step": 23015500 }, { "epoch": 66.62, "learning_rate": 1.6701013396165245e-05, "loss": 1.8937, "step": 23016000 }, { "epoch": 66.62, "learning_rate": 1.6700291195813264e-05, "loss": 1.9314, "step": 23016500 }, { "epoch": 66.62, "learning_rate": 1.6699567548165986e-05, "loss": 1.9182, "step": 23017000 }, { "epoch": 66.63, "learning_rate": 1.669884390051871e-05, "loss": 1.8828, "step": 23017500 }, { "epoch": 66.63, "learning_rate": 1.6698120252871434e-05, "loss": 1.9241, "step": 23018000 }, { "epoch": 66.63, "learning_rate": 1.6697398052519453e-05, "loss": 1.8892, "step": 23018500 }, { "epoch": 66.63, "learning_rate": 1.6696674404872175e-05, "loss": 1.8967, "step": 23019000 }, { "epoch": 66.63, "learning_rate": 1.6695950757224897e-05, "loss": 1.9194, "step": 23019500 }, { "epoch": 66.63, "learning_rate": 1.6695227109577623e-05, "loss": 1.9132, "step": 23020000 }, { "epoch": 66.63, "learning_rate": 1.6694503461930345e-05, "loss": 1.9073, "step": 23020500 }, { "epoch": 66.64, "learning_rate": 1.669377981428307e-05, "loss": 1.9296, "step": 23021000 }, { "epoch": 66.64, "learning_rate": 1.6693056166635793e-05, "loss": 1.9247, "step": 23021500 }, { "epoch": 66.64, "learning_rate": 1.6692332518988515e-05, "loss": 1.9305, "step": 23022000 }, { "epoch": 66.64, "learning_rate": 1.6691608871341237e-05, "loss": 1.9178, "step": 23022500 }, { "epoch": 66.64, "learning_rate": 1.6690886670989256e-05, "loss": 1.9226, "step": 23023000 }, { "epoch": 66.64, "learning_rate": 1.6690164470637272e-05, "loss": 1.9191, "step": 23023500 }, { "epoch": 66.65, "learning_rate": 1.6689440822989998e-05, "loss": 1.9044, "step": 23024000 }, { "epoch": 66.65, "learning_rate": 1.668871717534272e-05, "loss": 1.8998, "step": 23024500 }, { "epoch": 66.65, "learning_rate": 1.6687993527695445e-05, "loss": 1.9294, "step": 23025000 }, { "epoch": 66.65, "learning_rate": 1.6687269880048168e-05, "loss": 1.9314, "step": 23025500 }, { "epoch": 66.65, "learning_rate": 1.668654623240089e-05, "loss": 1.9303, "step": 23026000 }, { "epoch": 66.65, "learning_rate": 1.6685822584753612e-05, "loss": 1.9056, "step": 23026500 }, { "epoch": 66.65, "learning_rate": 1.6685098937106334e-05, "loss": 1.8864, "step": 23027000 }, { "epoch": 66.66, "learning_rate": 1.668437528945906e-05, "loss": 1.9239, "step": 23027500 }, { "epoch": 66.66, "learning_rate": 1.6683651641811782e-05, "loss": 1.9477, "step": 23028000 }, { "epoch": 66.66, "learning_rate": 1.6682927994164508e-05, "loss": 1.8899, "step": 23028500 }, { "epoch": 66.66, "learning_rate": 1.6682205793812523e-05, "loss": 1.8952, "step": 23029000 }, { "epoch": 66.66, "learning_rate": 1.6681483593460542e-05, "loss": 1.9093, "step": 23029500 }, { "epoch": 66.66, "learning_rate": 1.6680759945813265e-05, "loss": 1.9019, "step": 23030000 }, { "epoch": 66.66, "learning_rate": 1.6680036298165987e-05, "loss": 1.9063, "step": 23030500 }, { "epoch": 66.67, "learning_rate": 1.6679312650518712e-05, "loss": 1.931, "step": 23031000 }, { "epoch": 66.67, "learning_rate": 1.6678589002871435e-05, "loss": 1.9319, "step": 23031500 }, { "epoch": 66.67, "learning_rate": 1.667786535522416e-05, "loss": 1.8889, "step": 23032000 }, { "epoch": 66.67, "learning_rate": 1.6677141707576883e-05, "loss": 1.9268, "step": 23032500 }, { "epoch": 66.67, "learning_rate": 1.6676418059929605e-05, "loss": 1.9123, "step": 23033000 }, { "epoch": 66.67, "learning_rate": 1.6675694412282327e-05, "loss": 1.9319, "step": 23033500 }, { "epoch": 66.67, "learning_rate": 1.667497076463505e-05, "loss": 1.9187, "step": 23034000 }, { "epoch": 66.68, "learning_rate": 1.6674247116987775e-05, "loss": 1.9217, "step": 23034500 }, { "epoch": 66.68, "learning_rate": 1.6673523469340497e-05, "loss": 1.9264, "step": 23035000 }, { "epoch": 66.68, "learning_rate": 1.6672799821693223e-05, "loss": 1.8961, "step": 23035500 }, { "epoch": 66.68, "learning_rate": 1.6672077621341238e-05, "loss": 1.9298, "step": 23036000 }, { "epoch": 66.68, "learning_rate": 1.6671353973693964e-05, "loss": 1.9108, "step": 23036500 }, { "epoch": 66.68, "learning_rate": 1.6670630326046686e-05, "loss": 1.9186, "step": 23037000 }, { "epoch": 66.68, "learning_rate": 1.666990667839941e-05, "loss": 1.9095, "step": 23037500 }, { "epoch": 66.69, "learning_rate": 1.666918303075213e-05, "loss": 1.9186, "step": 23038000 }, { "epoch": 66.69, "learning_rate": 1.666846083040015e-05, "loss": 1.9374, "step": 23038500 }, { "epoch": 66.69, "learning_rate": 1.6667737182752872e-05, "loss": 1.8944, "step": 23039000 }, { "epoch": 66.69, "learning_rate": 1.6667013535105597e-05, "loss": 1.9234, "step": 23039500 }, { "epoch": 66.69, "learning_rate": 1.666628988745832e-05, "loss": 1.8841, "step": 23040000 }, { "epoch": 66.69, "learning_rate": 1.6665566239811042e-05, "loss": 1.8965, "step": 23040500 }, { "epoch": 66.69, "learning_rate": 1.666484403945906e-05, "loss": 1.9146, "step": 23041000 }, { "epoch": 66.7, "learning_rate": 1.6664120391811783e-05, "loss": 1.9119, "step": 23041500 }, { "epoch": 66.7, "learning_rate": 1.6663396744164505e-05, "loss": 1.9028, "step": 23042000 }, { "epoch": 66.7, "learning_rate": 1.6662673096517228e-05, "loss": 1.9118, "step": 23042500 }, { "epoch": 66.7, "learning_rate": 1.6661949448869953e-05, "loss": 1.889, "step": 23043000 }, { "epoch": 66.7, "learning_rate": 1.6661225801222675e-05, "loss": 1.8971, "step": 23043500 }, { "epoch": 66.7, "learning_rate": 1.66605021535754e-05, "loss": 1.8901, "step": 23044000 }, { "epoch": 66.7, "learning_rate": 1.6659778505928123e-05, "loss": 1.919, "step": 23044500 }, { "epoch": 66.71, "learning_rate": 1.665905630557614e-05, "loss": 1.8782, "step": 23045000 }, { "epoch": 66.71, "learning_rate": 1.6658332657928864e-05, "loss": 1.9189, "step": 23045500 }, { "epoch": 66.71, "learning_rate": 1.6657609010281587e-05, "loss": 1.9044, "step": 23046000 }, { "epoch": 66.71, "learning_rate": 1.665688536263431e-05, "loss": 1.9256, "step": 23046500 }, { "epoch": 66.71, "learning_rate": 1.6656161714987034e-05, "loss": 1.9162, "step": 23047000 }, { "epoch": 66.71, "learning_rate": 1.6655438067339757e-05, "loss": 1.9044, "step": 23047500 }, { "epoch": 66.71, "learning_rate": 1.665471441969248e-05, "loss": 1.9105, "step": 23048000 }, { "epoch": 66.72, "learning_rate": 1.665399366663579e-05, "loss": 1.9044, "step": 23048500 }, { "epoch": 66.72, "learning_rate": 1.6653270018988513e-05, "loss": 1.9345, "step": 23049000 }, { "epoch": 66.72, "learning_rate": 1.665254637134124e-05, "loss": 1.9044, "step": 23049500 }, { "epoch": 66.72, "learning_rate": 1.665182272369396e-05, "loss": 1.9066, "step": 23050000 }, { "epoch": 66.72, "learning_rate": 1.6651099076046684e-05, "loss": 1.9025, "step": 23050500 }, { "epoch": 66.72, "learning_rate": 1.665037542839941e-05, "loss": 1.8999, "step": 23051000 }, { "epoch": 66.72, "learning_rate": 1.6649653228047428e-05, "loss": 1.9315, "step": 23051500 }, { "epoch": 66.73, "learning_rate": 1.664892958040015e-05, "loss": 1.8911, "step": 23052000 }, { "epoch": 66.73, "learning_rate": 1.6648205932752873e-05, "loss": 1.9031, "step": 23052500 }, { "epoch": 66.73, "learning_rate": 1.6647482285105595e-05, "loss": 1.9381, "step": 23053000 }, { "epoch": 66.73, "learning_rate": 1.6646758637458317e-05, "loss": 1.9165, "step": 23053500 }, { "epoch": 66.73, "learning_rate": 1.664603498981104e-05, "loss": 1.9039, "step": 23054000 }, { "epoch": 66.73, "learning_rate": 1.6645311342163765e-05, "loss": 1.895, "step": 23054500 }, { "epoch": 66.73, "learning_rate": 1.664458769451649e-05, "loss": 1.9044, "step": 23055000 }, { "epoch": 66.74, "learning_rate": 1.6643864046869213e-05, "loss": 1.915, "step": 23055500 }, { "epoch": 66.74, "learning_rate": 1.6643141846517228e-05, "loss": 1.9073, "step": 23056000 }, { "epoch": 66.74, "learning_rate": 1.6642418198869954e-05, "loss": 1.9016, "step": 23056500 }, { "epoch": 66.74, "learning_rate": 1.664169599851797e-05, "loss": 1.9143, "step": 23057000 }, { "epoch": 66.74, "learning_rate": 1.6640972350870692e-05, "loss": 1.8988, "step": 23057500 }, { "epoch": 66.74, "learning_rate": 1.6640248703223414e-05, "loss": 1.9035, "step": 23058000 }, { "epoch": 66.74, "learning_rate": 1.663952505557614e-05, "loss": 1.9072, "step": 23058500 }, { "epoch": 66.75, "learning_rate": 1.6638801407928865e-05, "loss": 1.9236, "step": 23059000 }, { "epoch": 66.75, "learning_rate": 1.6638077760281587e-05, "loss": 1.9278, "step": 23059500 }, { "epoch": 66.75, "learning_rate": 1.663735411263431e-05, "loss": 1.924, "step": 23060000 }, { "epoch": 66.75, "learning_rate": 1.663663191228233e-05, "loss": 1.9248, "step": 23060500 }, { "epoch": 66.75, "learning_rate": 1.663590826463505e-05, "loss": 1.9108, "step": 23061000 }, { "epoch": 66.75, "learning_rate": 1.6635184616987773e-05, "loss": 1.9035, "step": 23061500 }, { "epoch": 66.76, "learning_rate": 1.66344609693405e-05, "loss": 1.8963, "step": 23062000 }, { "epoch": 66.76, "learning_rate": 1.663373732169322e-05, "loss": 1.9111, "step": 23062500 }, { "epoch": 66.76, "learning_rate": 1.6633013674045943e-05, "loss": 1.8997, "step": 23063000 }, { "epoch": 66.76, "learning_rate": 1.6632290026398665e-05, "loss": 1.9264, "step": 23063500 }, { "epoch": 66.76, "learning_rate": 1.663156637875139e-05, "loss": 1.8963, "step": 23064000 }, { "epoch": 66.76, "learning_rate": 1.6630844178399407e-05, "loss": 1.8832, "step": 23064500 }, { "epoch": 66.76, "learning_rate": 1.663012053075213e-05, "loss": 1.9312, "step": 23065000 }, { "epoch": 66.77, "learning_rate": 1.6629396883104854e-05, "loss": 1.9131, "step": 23065500 }, { "epoch": 66.77, "learning_rate": 1.662867323545758e-05, "loss": 1.9327, "step": 23066000 }, { "epoch": 66.77, "learning_rate": 1.6627949587810302e-05, "loss": 1.9089, "step": 23066500 }, { "epoch": 66.77, "learning_rate": 1.6627227387458318e-05, "loss": 1.9099, "step": 23067000 }, { "epoch": 66.77, "learning_rate": 1.6626503739811043e-05, "loss": 1.9232, "step": 23067500 }, { "epoch": 66.77, "learning_rate": 1.6625780092163766e-05, "loss": 1.9104, "step": 23068000 }, { "epoch": 66.77, "learning_rate": 1.6625056444516488e-05, "loss": 1.8965, "step": 23068500 }, { "epoch": 66.78, "learning_rate": 1.6624334244164503e-05, "loss": 1.9078, "step": 23069000 }, { "epoch": 66.78, "learning_rate": 1.662361059651723e-05, "loss": 1.902, "step": 23069500 }, { "epoch": 66.78, "learning_rate": 1.6622886948869955e-05, "loss": 1.8969, "step": 23070000 }, { "epoch": 66.78, "learning_rate": 1.6622163301222677e-05, "loss": 1.8914, "step": 23070500 }, { "epoch": 66.78, "learning_rate": 1.66214396535754e-05, "loss": 1.9145, "step": 23071000 }, { "epoch": 66.78, "learning_rate": 1.6620717453223418e-05, "loss": 1.9422, "step": 23071500 }, { "epoch": 66.78, "learning_rate": 1.6619995252871434e-05, "loss": 1.9224, "step": 23072000 }, { "epoch": 66.79, "learning_rate": 1.6619271605224156e-05, "loss": 1.9025, "step": 23072500 }, { "epoch": 66.79, "learning_rate": 1.661854795757688e-05, "loss": 1.9214, "step": 23073000 }, { "epoch": 66.79, "learning_rate": 1.6617824309929607e-05, "loss": 1.9371, "step": 23073500 }, { "epoch": 66.79, "learning_rate": 1.661710066228233e-05, "loss": 1.9282, "step": 23074000 }, { "epoch": 66.79, "learning_rate": 1.661637701463505e-05, "loss": 1.9363, "step": 23074500 }, { "epoch": 66.79, "learning_rate": 1.6615653366987774e-05, "loss": 1.9222, "step": 23075000 }, { "epoch": 66.79, "learning_rate": 1.6614931166635793e-05, "loss": 1.9291, "step": 23075500 }, { "epoch": 66.8, "learning_rate": 1.6614207518988515e-05, "loss": 1.9246, "step": 23076000 }, { "epoch": 66.8, "learning_rate": 1.6613483871341237e-05, "loss": 1.9134, "step": 23076500 }, { "epoch": 66.8, "learning_rate": 1.6612760223693963e-05, "loss": 1.9329, "step": 23077000 }, { "epoch": 66.8, "learning_rate": 1.6612036576046685e-05, "loss": 1.8991, "step": 23077500 }, { "epoch": 66.8, "learning_rate": 1.6611312928399407e-05, "loss": 1.8914, "step": 23078000 }, { "epoch": 66.8, "learning_rate": 1.661058928075213e-05, "loss": 1.9219, "step": 23078500 }, { "epoch": 66.8, "learning_rate": 1.660986708040015e-05, "loss": 1.8934, "step": 23079000 }, { "epoch": 66.81, "learning_rate": 1.660914343275287e-05, "loss": 1.9284, "step": 23079500 }, { "epoch": 66.81, "learning_rate": 1.6608419785105593e-05, "loss": 1.9146, "step": 23080000 }, { "epoch": 66.81, "learning_rate": 1.660769613745832e-05, "loss": 1.9126, "step": 23080500 }, { "epoch": 66.81, "learning_rate": 1.6606972489811044e-05, "loss": 1.9462, "step": 23081000 }, { "epoch": 66.81, "learning_rate": 1.6606248842163766e-05, "loss": 1.8886, "step": 23081500 }, { "epoch": 66.81, "learning_rate": 1.660552519451649e-05, "loss": 1.94, "step": 23082000 }, { "epoch": 66.81, "learning_rate": 1.660480154686921e-05, "loss": 1.9372, "step": 23082500 }, { "epoch": 66.82, "learning_rate": 1.660407934651723e-05, "loss": 1.8984, "step": 23083000 }, { "epoch": 66.82, "learning_rate": 1.6603355698869952e-05, "loss": 1.9154, "step": 23083500 }, { "epoch": 66.82, "learning_rate": 1.6602632051222674e-05, "loss": 1.9316, "step": 23084000 }, { "epoch": 66.82, "learning_rate": 1.66019084035754e-05, "loss": 1.9096, "step": 23084500 }, { "epoch": 66.82, "learning_rate": 1.6601184755928122e-05, "loss": 1.8998, "step": 23085000 }, { "epoch": 66.82, "learning_rate": 1.6600461108280844e-05, "loss": 1.93, "step": 23085500 }, { "epoch": 66.82, "learning_rate": 1.659973746063357e-05, "loss": 1.8916, "step": 23086000 }, { "epoch": 66.83, "learning_rate": 1.6599013812986292e-05, "loss": 1.9027, "step": 23086500 }, { "epoch": 66.83, "learning_rate": 1.6598291612634308e-05, "loss": 1.9273, "step": 23087000 }, { "epoch": 66.83, "learning_rate": 1.6597567964987033e-05, "loss": 1.8951, "step": 23087500 }, { "epoch": 66.83, "learning_rate": 1.659684431733976e-05, "loss": 1.9054, "step": 23088000 }, { "epoch": 66.83, "learning_rate": 1.659612066969248e-05, "loss": 1.9007, "step": 23088500 }, { "epoch": 66.83, "learning_rate": 1.6595397022045204e-05, "loss": 1.9101, "step": 23089000 }, { "epoch": 66.83, "learning_rate": 1.6594673374397926e-05, "loss": 1.8899, "step": 23089500 }, { "epoch": 66.84, "learning_rate": 1.6593951174045945e-05, "loss": 1.8948, "step": 23090000 }, { "epoch": 66.84, "learning_rate": 1.6593227526398667e-05, "loss": 1.9063, "step": 23090500 }, { "epoch": 66.84, "learning_rate": 1.659250387875139e-05, "loss": 1.9137, "step": 23091000 }, { "epoch": 66.84, "learning_rate": 1.659178023110411e-05, "loss": 1.8965, "step": 23091500 }, { "epoch": 66.84, "learning_rate": 1.6591058030752134e-05, "loss": 1.9311, "step": 23092000 }, { "epoch": 66.84, "learning_rate": 1.6590334383104856e-05, "loss": 1.9126, "step": 23092500 }, { "epoch": 66.84, "learning_rate": 1.6589610735457578e-05, "loss": 1.9111, "step": 23093000 }, { "epoch": 66.85, "learning_rate": 1.65888870878103e-05, "loss": 1.9198, "step": 23093500 }, { "epoch": 66.85, "learning_rate": 1.6588163440163023e-05, "loss": 1.9133, "step": 23094000 }, { "epoch": 66.85, "learning_rate": 1.6587439792515745e-05, "loss": 1.903, "step": 23094500 }, { "epoch": 66.85, "learning_rate": 1.658671614486847e-05, "loss": 1.891, "step": 23095000 }, { "epoch": 66.85, "learning_rate": 1.6585992497221196e-05, "loss": 1.9119, "step": 23095500 }, { "epoch": 66.85, "learning_rate": 1.6585270296869212e-05, "loss": 1.9267, "step": 23096000 }, { "epoch": 66.85, "learning_rate": 1.6584546649221934e-05, "loss": 1.8912, "step": 23096500 }, { "epoch": 66.86, "learning_rate": 1.658382300157466e-05, "loss": 1.9115, "step": 23097000 }, { "epoch": 66.86, "learning_rate": 1.6583099353927382e-05, "loss": 1.9504, "step": 23097500 }, { "epoch": 66.86, "learning_rate": 1.6582377153575397e-05, "loss": 1.9179, "step": 23098000 }, { "epoch": 66.86, "learning_rate": 1.6581653505928123e-05, "loss": 1.886, "step": 23098500 }, { "epoch": 66.86, "learning_rate": 1.658093130557614e-05, "loss": 1.938, "step": 23099000 }, { "epoch": 66.86, "learning_rate": 1.6580207657928864e-05, "loss": 1.9428, "step": 23099500 }, { "epoch": 66.87, "learning_rate": 1.6579484010281586e-05, "loss": 1.9042, "step": 23100000 }, { "epoch": 66.87, "learning_rate": 1.657876036263431e-05, "loss": 1.9063, "step": 23100500 }, { "epoch": 66.87, "learning_rate": 1.6578036714987034e-05, "loss": 1.9206, "step": 23101000 }, { "epoch": 66.87, "learning_rate": 1.6577313067339757e-05, "loss": 1.9106, "step": 23101500 }, { "epoch": 66.87, "learning_rate": 1.6576590866987772e-05, "loss": 1.8983, "step": 23102000 }, { "epoch": 66.87, "learning_rate": 1.6575867219340498e-05, "loss": 1.9083, "step": 23102500 }, { "epoch": 66.87, "learning_rate": 1.6575143571693223e-05, "loss": 1.9372, "step": 23103000 }, { "epoch": 66.88, "learning_rate": 1.6574419924045946e-05, "loss": 1.9427, "step": 23103500 }, { "epoch": 66.88, "learning_rate": 1.6573699170989258e-05, "loss": 1.9125, "step": 23104000 }, { "epoch": 66.88, "learning_rate": 1.657297552334198e-05, "loss": 1.9164, "step": 23104500 }, { "epoch": 66.88, "learning_rate": 1.6572251875694702e-05, "loss": 1.9258, "step": 23105000 }, { "epoch": 66.88, "learning_rate": 1.6571528228047425e-05, "loss": 1.8859, "step": 23105500 }, { "epoch": 66.88, "learning_rate": 1.6570804580400147e-05, "loss": 1.9232, "step": 23106000 }, { "epoch": 66.88, "learning_rate": 1.6570080932752872e-05, "loss": 1.9101, "step": 23106500 }, { "epoch": 66.89, "learning_rate": 1.6569357285105598e-05, "loss": 1.9034, "step": 23107000 }, { "epoch": 66.89, "learning_rate": 1.656863363745832e-05, "loss": 1.8993, "step": 23107500 }, { "epoch": 66.89, "learning_rate": 1.6567911437106336e-05, "loss": 1.9082, "step": 23108000 }, { "epoch": 66.89, "learning_rate": 1.656718778945906e-05, "loss": 1.8909, "step": 23108500 }, { "epoch": 66.89, "learning_rate": 1.6566464141811784e-05, "loss": 1.9236, "step": 23109000 }, { "epoch": 66.89, "learning_rate": 1.6565740494164506e-05, "loss": 1.9027, "step": 23109500 }, { "epoch": 66.89, "learning_rate": 1.6565016846517228e-05, "loss": 1.9238, "step": 23110000 }, { "epoch": 66.9, "learning_rate": 1.6564293198869954e-05, "loss": 1.9079, "step": 23110500 }, { "epoch": 66.9, "learning_rate": 1.6563570998517973e-05, "loss": 1.898, "step": 23111000 }, { "epoch": 66.9, "learning_rate": 1.6562847350870695e-05, "loss": 1.9293, "step": 23111500 }, { "epoch": 66.9, "learning_rate": 1.6562123703223417e-05, "loss": 1.9023, "step": 23112000 }, { "epoch": 66.9, "learning_rate": 1.656140005557614e-05, "loss": 1.904, "step": 23112500 }, { "epoch": 66.9, "learning_rate": 1.656067640792886e-05, "loss": 1.896, "step": 23113000 }, { "epoch": 66.9, "learning_rate": 1.6559952760281587e-05, "loss": 1.9242, "step": 23113500 }, { "epoch": 66.91, "learning_rate": 1.655922911263431e-05, "loss": 1.8958, "step": 23114000 }, { "epoch": 66.91, "learning_rate": 1.6558505464987035e-05, "loss": 1.9317, "step": 23114500 }, { "epoch": 66.91, "learning_rate": 1.6557781817339757e-05, "loss": 1.9125, "step": 23115000 }, { "epoch": 66.91, "learning_rate": 1.655705816969248e-05, "loss": 1.9071, "step": 23115500 }, { "epoch": 66.91, "learning_rate": 1.6556334522045202e-05, "loss": 1.9113, "step": 23116000 }, { "epoch": 66.91, "learning_rate": 1.6555610874397924e-05, "loss": 1.916, "step": 23116500 }, { "epoch": 66.91, "learning_rate": 1.6554888674045943e-05, "loss": 1.9015, "step": 23117000 }, { "epoch": 66.92, "learning_rate": 1.6554165026398665e-05, "loss": 1.8995, "step": 23117500 }, { "epoch": 66.92, "learning_rate": 1.655344137875139e-05, "loss": 1.9306, "step": 23118000 }, { "epoch": 66.92, "learning_rate": 1.6552717731104113e-05, "loss": 1.9121, "step": 23118500 }, { "epoch": 66.92, "learning_rate": 1.655199408345684e-05, "loss": 1.9073, "step": 23119000 }, { "epoch": 66.92, "learning_rate": 1.6551271883104854e-05, "loss": 1.931, "step": 23119500 }, { "epoch": 66.92, "learning_rate": 1.6550548235457576e-05, "loss": 1.9332, "step": 23120000 }, { "epoch": 66.92, "learning_rate": 1.65498245878103e-05, "loss": 1.9284, "step": 23120500 }, { "epoch": 66.93, "learning_rate": 1.6549100940163024e-05, "loss": 1.9211, "step": 23121000 }, { "epoch": 66.93, "learning_rate": 1.654837729251575e-05, "loss": 1.8981, "step": 23121500 }, { "epoch": 66.93, "learning_rate": 1.6547655092163765e-05, "loss": 1.9248, "step": 23122000 }, { "epoch": 66.93, "learning_rate": 1.6546932891811784e-05, "loss": 1.9154, "step": 23122500 }, { "epoch": 66.93, "learning_rate": 1.6546209244164507e-05, "loss": 1.8986, "step": 23123000 }, { "epoch": 66.93, "learning_rate": 1.6545487043812526e-05, "loss": 1.9215, "step": 23123500 }, { "epoch": 66.93, "learning_rate": 1.6544763396165248e-05, "loss": 1.9127, "step": 23124000 }, { "epoch": 66.94, "learning_rate": 1.654403974851797e-05, "loss": 1.8934, "step": 23124500 }, { "epoch": 66.94, "learning_rate": 1.6543316100870692e-05, "loss": 1.9172, "step": 23125000 }, { "epoch": 66.94, "learning_rate": 1.654259390051871e-05, "loss": 1.8975, "step": 23125500 }, { "epoch": 66.94, "learning_rate": 1.6541870252871437e-05, "loss": 1.9121, "step": 23126000 }, { "epoch": 66.94, "learning_rate": 1.654114660522416e-05, "loss": 1.9083, "step": 23126500 }, { "epoch": 66.94, "learning_rate": 1.654042295757688e-05, "loss": 1.8995, "step": 23127000 }, { "epoch": 66.94, "learning_rate": 1.6539699309929604e-05, "loss": 1.885, "step": 23127500 }, { "epoch": 66.95, "learning_rate": 1.6538975662282326e-05, "loss": 1.9323, "step": 23128000 }, { "epoch": 66.95, "learning_rate": 1.653825201463505e-05, "loss": 1.9176, "step": 23128500 }, { "epoch": 66.95, "learning_rate": 1.6537528366987774e-05, "loss": 1.911, "step": 23129000 }, { "epoch": 66.95, "learning_rate": 1.65368047193405e-05, "loss": 1.908, "step": 23129500 }, { "epoch": 66.95, "learning_rate": 1.6536082518988515e-05, "loss": 1.9165, "step": 23130000 }, { "epoch": 66.95, "learning_rate": 1.6535360318636534e-05, "loss": 1.9157, "step": 23130500 }, { "epoch": 66.95, "learning_rate": 1.6534636670989256e-05, "loss": 1.9391, "step": 23131000 }, { "epoch": 66.96, "learning_rate": 1.6533913023341978e-05, "loss": 1.9188, "step": 23131500 }, { "epoch": 66.96, "learning_rate": 1.65331893756947e-05, "loss": 1.9078, "step": 23132000 }, { "epoch": 66.96, "learning_rate": 1.6532465728047426e-05, "loss": 1.9342, "step": 23132500 }, { "epoch": 66.96, "learning_rate": 1.653174208040015e-05, "loss": 1.9017, "step": 23133000 }, { "epoch": 66.96, "learning_rate": 1.6531018432752874e-05, "loss": 1.9392, "step": 23133500 }, { "epoch": 66.96, "learning_rate": 1.6530294785105596e-05, "loss": 1.9366, "step": 23134000 }, { "epoch": 66.96, "learning_rate": 1.6529572584753615e-05, "loss": 1.8761, "step": 23134500 }, { "epoch": 66.97, "learning_rate": 1.6528848937106337e-05, "loss": 1.8929, "step": 23135000 }, { "epoch": 66.97, "learning_rate": 1.652812528945906e-05, "loss": 1.9009, "step": 23135500 }, { "epoch": 66.97, "learning_rate": 1.6527401641811782e-05, "loss": 1.9353, "step": 23136000 }, { "epoch": 66.97, "learning_rate": 1.6526677994164504e-05, "loss": 1.9237, "step": 23136500 }, { "epoch": 66.97, "learning_rate": 1.652595434651723e-05, "loss": 1.9332, "step": 23137000 }, { "epoch": 66.97, "learning_rate": 1.6525230698869952e-05, "loss": 1.9169, "step": 23137500 }, { "epoch": 66.98, "learning_rate": 1.6524507051222678e-05, "loss": 1.9157, "step": 23138000 }, { "epoch": 66.98, "learning_rate": 1.6523784850870693e-05, "loss": 1.9065, "step": 23138500 }, { "epoch": 66.98, "learning_rate": 1.6523061203223415e-05, "loss": 1.9145, "step": 23139000 }, { "epoch": 66.98, "learning_rate": 1.652233755557614e-05, "loss": 1.9257, "step": 23139500 }, { "epoch": 66.98, "learning_rate": 1.6521613907928863e-05, "loss": 1.8951, "step": 23140000 }, { "epoch": 66.98, "learning_rate": 1.652089026028159e-05, "loss": 1.9014, "step": 23140500 }, { "epoch": 66.98, "learning_rate": 1.6520168059929604e-05, "loss": 1.9362, "step": 23141000 }, { "epoch": 66.99, "learning_rate": 1.6519445859577623e-05, "loss": 1.938, "step": 23141500 }, { "epoch": 66.99, "learning_rate": 1.6518722211930346e-05, "loss": 1.9194, "step": 23142000 }, { "epoch": 66.99, "learning_rate": 1.6518000011578364e-05, "loss": 1.9227, "step": 23142500 }, { "epoch": 66.99, "learning_rate": 1.6517276363931087e-05, "loss": 1.9273, "step": 23143000 }, { "epoch": 66.99, "learning_rate": 1.651655271628381e-05, "loss": 1.9275, "step": 23143500 }, { "epoch": 66.99, "learning_rate": 1.651582906863653e-05, "loss": 1.9145, "step": 23144000 }, { "epoch": 66.99, "learning_rate": 1.6515105420989253e-05, "loss": 1.9319, "step": 23144500 }, { "epoch": 67.0, "learning_rate": 1.651438177334198e-05, "loss": 1.9246, "step": 23145000 }, { "epoch": 67.0, "learning_rate": 1.6513658125694705e-05, "loss": 1.9162, "step": 23145500 }, { "epoch": 67.0, "learning_rate": 1.6512934478047427e-05, "loss": 1.8788, "step": 23146000 }, { "epoch": 67.0, "learning_rate": 1.651221083040015e-05, "loss": 1.9358, "step": 23146500 }, { "epoch": 67.0, "eval_accuracy": 0.6807491301743107, "eval_accuracy_mlm": 0.6483818857312247, "eval_accuracy_nsp": 0.854397253807409, "eval_loss": 2.1636321544647217, "eval_runtime": 331.6851, "eval_samples_per_second": 1315.664, "eval_steps_per_second": 54.82, "step": 23146624 }, { "epoch": 67.0, "learning_rate": 1.6511488630048165e-05, "loss": 1.8829, "step": 23147000 }, { "epoch": 67.0, "learning_rate": 1.651076498240089e-05, "loss": 1.9076, "step": 23147500 }, { "epoch": 67.0, "learning_rate": 1.6510041334753613e-05, "loss": 1.9026, "step": 23148000 }, { "epoch": 67.01, "learning_rate": 1.650931913440163e-05, "loss": 1.9101, "step": 23148500 }, { "epoch": 67.01, "learning_rate": 1.650859693404965e-05, "loss": 1.9086, "step": 23149000 }, { "epoch": 67.01, "learning_rate": 1.6507873286402373e-05, "loss": 1.9171, "step": 23149500 }, { "epoch": 67.01, "learning_rate": 1.6507149638755095e-05, "loss": 1.8866, "step": 23150000 }, { "epoch": 67.01, "learning_rate": 1.6506425991107817e-05, "loss": 1.9079, "step": 23150500 }, { "epoch": 67.01, "learning_rate": 1.6505702343460543e-05, "loss": 1.8898, "step": 23151000 }, { "epoch": 67.01, "learning_rate": 1.6504978695813265e-05, "loss": 1.8801, "step": 23151500 }, { "epoch": 67.02, "learning_rate": 1.6504255048165987e-05, "loss": 1.9058, "step": 23152000 }, { "epoch": 67.02, "learning_rate": 1.6503531400518713e-05, "loss": 1.8846, "step": 23152500 }, { "epoch": 67.02, "learning_rate": 1.6502807752871435e-05, "loss": 1.9013, "step": 23153000 }, { "epoch": 67.02, "learning_rate": 1.6502085552519454e-05, "loss": 1.8974, "step": 23153500 }, { "epoch": 67.02, "learning_rate": 1.6501361904872176e-05, "loss": 1.9219, "step": 23154000 }, { "epoch": 67.02, "learning_rate": 1.65006382572249e-05, "loss": 1.8918, "step": 23154500 }, { "epoch": 67.02, "learning_rate": 1.649991460957762e-05, "loss": 1.9267, "step": 23155000 }, { "epoch": 67.03, "learning_rate": 1.6499190961930343e-05, "loss": 1.8951, "step": 23155500 }, { "epoch": 67.03, "learning_rate": 1.649846731428307e-05, "loss": 1.8765, "step": 23156000 }, { "epoch": 67.03, "learning_rate": 1.6497743666635794e-05, "loss": 1.89, "step": 23156500 }, { "epoch": 67.03, "learning_rate": 1.6497020018988516e-05, "loss": 1.8972, "step": 23157000 }, { "epoch": 67.03, "learning_rate": 1.649629637134124e-05, "loss": 1.9307, "step": 23157500 }, { "epoch": 67.03, "learning_rate": 1.649557272369396e-05, "loss": 1.9156, "step": 23158000 }, { "epoch": 67.03, "learning_rate": 1.6494849076046683e-05, "loss": 1.8689, "step": 23158500 }, { "epoch": 67.04, "learning_rate": 1.6494126875694702e-05, "loss": 1.9225, "step": 23159000 }, { "epoch": 67.04, "learning_rate": 1.6493404675342718e-05, "loss": 1.8781, "step": 23159500 }, { "epoch": 67.04, "learning_rate": 1.6492681027695443e-05, "loss": 1.8861, "step": 23160000 }, { "epoch": 67.04, "learning_rate": 1.649195738004817e-05, "loss": 1.9137, "step": 23160500 }, { "epoch": 67.04, "learning_rate": 1.649123373240089e-05, "loss": 1.9006, "step": 23161000 }, { "epoch": 67.04, "learning_rate": 1.6490510084753613e-05, "loss": 1.9057, "step": 23161500 }, { "epoch": 67.04, "learning_rate": 1.6489786437106336e-05, "loss": 1.8899, "step": 23162000 }, { "epoch": 67.05, "learning_rate": 1.6489062789459058e-05, "loss": 1.8826, "step": 23162500 }, { "epoch": 67.05, "learning_rate": 1.6488340589107077e-05, "loss": 1.8953, "step": 23163000 }, { "epoch": 67.05, "learning_rate": 1.6487616941459802e-05, "loss": 1.8946, "step": 23163500 }, { "epoch": 67.05, "learning_rate": 1.6486893293812525e-05, "loss": 1.9253, "step": 23164000 }, { "epoch": 67.05, "learning_rate": 1.6486169646165247e-05, "loss": 1.9024, "step": 23164500 }, { "epoch": 67.05, "learning_rate": 1.6485447445813266e-05, "loss": 1.9099, "step": 23165000 }, { "epoch": 67.05, "learning_rate": 1.6484723798165988e-05, "loss": 1.8849, "step": 23165500 }, { "epoch": 67.06, "learning_rate": 1.648400015051871e-05, "loss": 1.8958, "step": 23166000 }, { "epoch": 67.06, "learning_rate": 1.6483276502871432e-05, "loss": 1.8932, "step": 23166500 }, { "epoch": 67.06, "learning_rate": 1.6482552855224158e-05, "loss": 1.8933, "step": 23167000 }, { "epoch": 67.06, "learning_rate": 1.6481829207576884e-05, "loss": 1.8958, "step": 23167500 }, { "epoch": 67.06, "learning_rate": 1.6481105559929606e-05, "loss": 1.9072, "step": 23168000 }, { "epoch": 67.06, "learning_rate": 1.6480381912282328e-05, "loss": 1.9001, "step": 23168500 }, { "epoch": 67.06, "learning_rate": 1.647965826463505e-05, "loss": 1.8999, "step": 23169000 }, { "epoch": 67.07, "learning_rate": 1.6478934616987773e-05, "loss": 1.889, "step": 23169500 }, { "epoch": 67.07, "learning_rate": 1.647821241663579e-05, "loss": 1.8873, "step": 23170000 }, { "epoch": 67.07, "learning_rate": 1.6477488768988514e-05, "loss": 1.8941, "step": 23170500 }, { "epoch": 67.07, "learning_rate": 1.647676512134124e-05, "loss": 1.9201, "step": 23171000 }, { "epoch": 67.07, "learning_rate": 1.647604147369396e-05, "loss": 1.8865, "step": 23171500 }, { "epoch": 67.07, "learning_rate": 1.6475317826046684e-05, "loss": 1.8961, "step": 23172000 }, { "epoch": 67.07, "learning_rate": 1.647459417839941e-05, "loss": 1.8852, "step": 23172500 }, { "epoch": 67.08, "learning_rate": 1.6473870530752132e-05, "loss": 1.908, "step": 23173000 }, { "epoch": 67.08, "learning_rate": 1.6473146883104854e-05, "loss": 1.9076, "step": 23173500 }, { "epoch": 67.08, "learning_rate": 1.6472423235457576e-05, "loss": 1.9155, "step": 23174000 }, { "epoch": 67.08, "learning_rate": 1.6471701035105595e-05, "loss": 1.8889, "step": 23174500 }, { "epoch": 67.08, "learning_rate": 1.6470978834753614e-05, "loss": 1.8684, "step": 23175000 }, { "epoch": 67.08, "learning_rate": 1.6470255187106336e-05, "loss": 1.8836, "step": 23175500 }, { "epoch": 67.09, "learning_rate": 1.646953153945906e-05, "loss": 1.9046, "step": 23176000 }, { "epoch": 67.09, "learning_rate": 1.6468807891811784e-05, "loss": 1.8968, "step": 23176500 }, { "epoch": 67.09, "learning_rate": 1.6468084244164506e-05, "loss": 1.8995, "step": 23177000 }, { "epoch": 67.09, "learning_rate": 1.646736059651723e-05, "loss": 1.8597, "step": 23177500 }, { "epoch": 67.09, "learning_rate": 1.6466636948869954e-05, "loss": 1.8909, "step": 23178000 }, { "epoch": 67.09, "learning_rate": 1.6465913301222677e-05, "loss": 1.9237, "step": 23178500 }, { "epoch": 67.09, "learning_rate": 1.6465191100870695e-05, "loss": 1.9137, "step": 23179000 }, { "epoch": 67.1, "learning_rate": 1.6464467453223418e-05, "loss": 1.914, "step": 23179500 }, { "epoch": 67.1, "learning_rate": 1.646374380557614e-05, "loss": 1.8728, "step": 23180000 }, { "epoch": 67.1, "learning_rate": 1.6463020157928862e-05, "loss": 1.8907, "step": 23180500 }, { "epoch": 67.1, "learning_rate": 1.6462296510281584e-05, "loss": 1.9126, "step": 23181000 }, { "epoch": 67.1, "learning_rate": 1.646157286263431e-05, "loss": 1.9013, "step": 23181500 }, { "epoch": 67.1, "learning_rate": 1.6460849214987036e-05, "loss": 1.8998, "step": 23182000 }, { "epoch": 67.1, "learning_rate": 1.6460125567339758e-05, "loss": 1.8899, "step": 23182500 }, { "epoch": 67.11, "learning_rate": 1.6459403366987773e-05, "loss": 1.8973, "step": 23183000 }, { "epoch": 67.11, "learning_rate": 1.6458679719340496e-05, "loss": 1.8842, "step": 23183500 }, { "epoch": 67.11, "learning_rate": 1.645795607169322e-05, "loss": 1.8999, "step": 23184000 }, { "epoch": 67.11, "learning_rate": 1.6457233871341237e-05, "loss": 1.8741, "step": 23184500 }, { "epoch": 67.11, "learning_rate": 1.645651022369396e-05, "loss": 1.8837, "step": 23185000 }, { "epoch": 67.11, "learning_rate": 1.6455788023341978e-05, "loss": 1.8807, "step": 23185500 }, { "epoch": 67.11, "learning_rate": 1.6455064375694704e-05, "loss": 1.871, "step": 23186000 }, { "epoch": 67.12, "learning_rate": 1.6454340728047426e-05, "loss": 1.884, "step": 23186500 }, { "epoch": 67.12, "learning_rate": 1.6453617080400148e-05, "loss": 1.8839, "step": 23187000 }, { "epoch": 67.12, "learning_rate": 1.6452894880048167e-05, "loss": 1.9047, "step": 23187500 }, { "epoch": 67.12, "learning_rate": 1.645217123240089e-05, "loss": 1.9146, "step": 23188000 }, { "epoch": 67.12, "learning_rate": 1.645144758475361e-05, "loss": 1.9205, "step": 23188500 }, { "epoch": 67.12, "learning_rate": 1.6450723937106334e-05, "loss": 1.8792, "step": 23189000 }, { "epoch": 67.12, "learning_rate": 1.645000028945906e-05, "loss": 1.8937, "step": 23189500 }, { "epoch": 67.13, "learning_rate": 1.6449276641811785e-05, "loss": 1.8934, "step": 23190000 }, { "epoch": 67.13, "learning_rate": 1.6448552994164507e-05, "loss": 1.8865, "step": 23190500 }, { "epoch": 67.13, "learning_rate": 1.644782934651723e-05, "loss": 1.8833, "step": 23191000 }, { "epoch": 67.13, "learning_rate": 1.6447105698869952e-05, "loss": 1.8809, "step": 23191500 }, { "epoch": 67.13, "learning_rate": 1.6446382051222674e-05, "loss": 1.8732, "step": 23192000 }, { "epoch": 67.13, "learning_rate": 1.64456584035754e-05, "loss": 1.8952, "step": 23192500 }, { "epoch": 67.13, "learning_rate": 1.6444936203223415e-05, "loss": 1.9363, "step": 23193000 }, { "epoch": 67.14, "learning_rate": 1.644421255557614e-05, "loss": 1.8876, "step": 23193500 }, { "epoch": 67.14, "learning_rate": 1.6443488907928863e-05, "loss": 1.8846, "step": 23194000 }, { "epoch": 67.14, "learning_rate": 1.6442765260281585e-05, "loss": 1.89, "step": 23194500 }, { "epoch": 67.14, "learning_rate": 1.644204161263431e-05, "loss": 1.9066, "step": 23195000 }, { "epoch": 67.14, "learning_rate": 1.6441317964987033e-05, "loss": 1.9047, "step": 23195500 }, { "epoch": 67.14, "learning_rate": 1.6440594317339755e-05, "loss": 1.9093, "step": 23196000 }, { "epoch": 67.14, "learning_rate": 1.6439872116987774e-05, "loss": 1.8821, "step": 23196500 }, { "epoch": 67.15, "learning_rate": 1.64391484693405e-05, "loss": 1.9099, "step": 23197000 }, { "epoch": 67.15, "learning_rate": 1.6438424821693222e-05, "loss": 1.8831, "step": 23197500 }, { "epoch": 67.15, "learning_rate": 1.6437701174045944e-05, "loss": 1.9005, "step": 23198000 }, { "epoch": 67.15, "learning_rate": 1.6436977526398667e-05, "loss": 1.9236, "step": 23198500 }, { "epoch": 67.15, "learning_rate": 1.643625387875139e-05, "loss": 1.876, "step": 23199000 }, { "epoch": 67.15, "learning_rate": 1.643553023110411e-05, "loss": 1.8871, "step": 23199500 }, { "epoch": 67.15, "learning_rate": 1.6434806583456837e-05, "loss": 1.9069, "step": 23200000 }, { "epoch": 67.16, "learning_rate": 1.6434084383104856e-05, "loss": 1.8922, "step": 23200500 }, { "epoch": 67.16, "learning_rate": 1.6433362182752875e-05, "loss": 1.898, "step": 23201000 }, { "epoch": 67.16, "learning_rate": 1.6432638535105597e-05, "loss": 1.8999, "step": 23201500 }, { "epoch": 67.16, "learning_rate": 1.643191488745832e-05, "loss": 1.9175, "step": 23202000 }, { "epoch": 67.16, "learning_rate": 1.643119123981104e-05, "loss": 1.9027, "step": 23202500 }, { "epoch": 67.16, "learning_rate": 1.643046903945906e-05, "loss": 1.9077, "step": 23203000 }, { "epoch": 67.16, "learning_rate": 1.6429746839107076e-05, "loss": 1.9038, "step": 23203500 }, { "epoch": 67.17, "learning_rate": 1.64290231914598e-05, "loss": 1.8962, "step": 23204000 }, { "epoch": 67.17, "learning_rate": 1.6428299543812524e-05, "loss": 1.9077, "step": 23204500 }, { "epoch": 67.17, "learning_rate": 1.642757589616525e-05, "loss": 1.9186, "step": 23205000 }, { "epoch": 67.17, "learning_rate": 1.642685224851797e-05, "loss": 1.9033, "step": 23205500 }, { "epoch": 67.17, "learning_rate": 1.6426128600870694e-05, "loss": 1.9177, "step": 23206000 }, { "epoch": 67.17, "learning_rate": 1.6425404953223416e-05, "loss": 1.9011, "step": 23206500 }, { "epoch": 67.17, "learning_rate": 1.6424681305576138e-05, "loss": 1.8834, "step": 23207000 }, { "epoch": 67.18, "learning_rate": 1.6423957657928864e-05, "loss": 1.8891, "step": 23207500 }, { "epoch": 67.18, "learning_rate": 1.642323401028159e-05, "loss": 1.9027, "step": 23208000 }, { "epoch": 67.18, "learning_rate": 1.642251036263431e-05, "loss": 1.9095, "step": 23208500 }, { "epoch": 67.18, "learning_rate": 1.6421786714987034e-05, "loss": 1.9345, "step": 23209000 }, { "epoch": 67.18, "learning_rate": 1.6421063067339756e-05, "loss": 1.9144, "step": 23209500 }, { "epoch": 67.18, "learning_rate": 1.642033941969248e-05, "loss": 1.903, "step": 23210000 }, { "epoch": 67.18, "learning_rate": 1.6419617219340497e-05, "loss": 1.8988, "step": 23210500 }, { "epoch": 67.19, "learning_rate": 1.641889357169322e-05, "loss": 1.8777, "step": 23211000 }, { "epoch": 67.19, "learning_rate": 1.6418169924045942e-05, "loss": 1.8875, "step": 23211500 }, { "epoch": 67.19, "learning_rate": 1.6417446276398667e-05, "loss": 1.9263, "step": 23212000 }, { "epoch": 67.19, "learning_rate": 1.6416724076046686e-05, "loss": 1.8878, "step": 23212500 }, { "epoch": 67.19, "learning_rate": 1.641600042839941e-05, "loss": 1.9165, "step": 23213000 }, { "epoch": 67.19, "learning_rate": 1.641527678075213e-05, "loss": 1.916, "step": 23213500 }, { "epoch": 67.2, "learning_rate": 1.6414553133104853e-05, "loss": 1.9479, "step": 23214000 }, { "epoch": 67.2, "learning_rate": 1.6413829485457575e-05, "loss": 1.8963, "step": 23214500 }, { "epoch": 67.2, "learning_rate": 1.64131058378103e-05, "loss": 1.9127, "step": 23215000 }, { "epoch": 67.2, "learning_rate": 1.6412382190163026e-05, "loss": 1.8932, "step": 23215500 }, { "epoch": 67.2, "learning_rate": 1.641165854251575e-05, "loss": 1.8825, "step": 23216000 }, { "epoch": 67.2, "learning_rate": 1.641093489486847e-05, "loss": 1.9091, "step": 23216500 }, { "epoch": 67.2, "learning_rate": 1.6410211247221193e-05, "loss": 1.9059, "step": 23217000 }, { "epoch": 67.21, "learning_rate": 1.6409489046869212e-05, "loss": 1.9066, "step": 23217500 }, { "epoch": 67.21, "learning_rate": 1.6408765399221934e-05, "loss": 1.9027, "step": 23218000 }, { "epoch": 67.21, "learning_rate": 1.6408041751574657e-05, "loss": 1.8877, "step": 23218500 }, { "epoch": 67.21, "learning_rate": 1.6407318103927382e-05, "loss": 1.8966, "step": 23219000 }, { "epoch": 67.21, "learning_rate": 1.64065959035754e-05, "loss": 1.8843, "step": 23219500 }, { "epoch": 67.21, "learning_rate": 1.6405872255928123e-05, "loss": 1.926, "step": 23220000 }, { "epoch": 67.21, "learning_rate": 1.6405148608280846e-05, "loss": 1.9177, "step": 23220500 }, { "epoch": 67.22, "learning_rate": 1.6404424960633568e-05, "loss": 1.904, "step": 23221000 }, { "epoch": 67.22, "learning_rate": 1.640370131298629e-05, "loss": 1.8875, "step": 23221500 }, { "epoch": 67.22, "learning_rate": 1.6402977665339016e-05, "loss": 1.915, "step": 23222000 }, { "epoch": 67.22, "learning_rate": 1.6402254017691738e-05, "loss": 1.8854, "step": 23222500 }, { "epoch": 67.22, "learning_rate": 1.6401530370044464e-05, "loss": 1.9245, "step": 23223000 }, { "epoch": 67.22, "learning_rate": 1.6400809616987776e-05, "loss": 1.8878, "step": 23223500 }, { "epoch": 67.22, "learning_rate": 1.6400085969340498e-05, "loss": 1.8996, "step": 23224000 }, { "epoch": 67.23, "learning_rate": 1.639936232169322e-05, "loss": 1.9058, "step": 23224500 }, { "epoch": 67.23, "learning_rate": 1.6398638674045943e-05, "loss": 1.9114, "step": 23225000 }, { "epoch": 67.23, "learning_rate": 1.6397915026398665e-05, "loss": 1.9017, "step": 23225500 }, { "epoch": 67.23, "learning_rate": 1.639719137875139e-05, "loss": 1.8808, "step": 23226000 }, { "epoch": 67.23, "learning_rate": 1.6396467731104113e-05, "loss": 1.916, "step": 23226500 }, { "epoch": 67.23, "learning_rate": 1.6395744083456838e-05, "loss": 1.8859, "step": 23227000 }, { "epoch": 67.23, "learning_rate": 1.639502043580956e-05, "loss": 1.888, "step": 23227500 }, { "epoch": 67.24, "learning_rate": 1.6394296788162283e-05, "loss": 1.8814, "step": 23228000 }, { "epoch": 67.24, "learning_rate": 1.63935745878103e-05, "loss": 1.8723, "step": 23228500 }, { "epoch": 67.24, "learning_rate": 1.6392850940163024e-05, "loss": 1.9089, "step": 23229000 }, { "epoch": 67.24, "learning_rate": 1.6392127292515746e-05, "loss": 1.9199, "step": 23229500 }, { "epoch": 67.24, "learning_rate": 1.639140364486847e-05, "loss": 1.9113, "step": 23230000 }, { "epoch": 67.24, "learning_rate": 1.6390679997221194e-05, "loss": 1.8785, "step": 23230500 }, { "epoch": 67.24, "learning_rate": 1.6389956349573916e-05, "loss": 1.9136, "step": 23231000 }, { "epoch": 67.25, "learning_rate": 1.6389234149221935e-05, "loss": 1.8909, "step": 23231500 }, { "epoch": 67.25, "learning_rate": 1.6388510501574657e-05, "loss": 1.8754, "step": 23232000 }, { "epoch": 67.25, "learning_rate": 1.638778685392738e-05, "loss": 1.9046, "step": 23232500 }, { "epoch": 67.25, "learning_rate": 1.63870646535754e-05, "loss": 1.9053, "step": 23233000 }, { "epoch": 67.25, "learning_rate": 1.6386342453223418e-05, "loss": 1.8773, "step": 23233500 }, { "epoch": 67.25, "learning_rate": 1.6385620252871433e-05, "loss": 1.9119, "step": 23234000 }, { "epoch": 67.25, "learning_rate": 1.638489660522416e-05, "loss": 1.9025, "step": 23234500 }, { "epoch": 67.26, "learning_rate": 1.638417295757688e-05, "loss": 1.911, "step": 23235000 }, { "epoch": 67.26, "learning_rate": 1.6383449309929607e-05, "loss": 1.9126, "step": 23235500 }, { "epoch": 67.26, "learning_rate": 1.638272566228233e-05, "loss": 1.9054, "step": 23236000 }, { "epoch": 67.26, "learning_rate": 1.638200201463505e-05, "loss": 1.9021, "step": 23236500 }, { "epoch": 67.26, "learning_rate": 1.6381278366987773e-05, "loss": 1.9004, "step": 23237000 }, { "epoch": 67.26, "learning_rate": 1.6380554719340495e-05, "loss": 1.9119, "step": 23237500 }, { "epoch": 67.26, "learning_rate": 1.637983107169322e-05, "loss": 1.9234, "step": 23238000 }, { "epoch": 67.27, "learning_rate": 1.6379107424045943e-05, "loss": 1.9066, "step": 23238500 }, { "epoch": 67.27, "learning_rate": 1.637838377639867e-05, "loss": 1.8963, "step": 23239000 }, { "epoch": 67.27, "learning_rate": 1.637766012875139e-05, "loss": 1.9225, "step": 23239500 }, { "epoch": 67.27, "learning_rate": 1.6376936481104113e-05, "loss": 1.8862, "step": 23240000 }, { "epoch": 67.27, "learning_rate": 1.6376212833456836e-05, "loss": 1.8858, "step": 23240500 }, { "epoch": 67.27, "learning_rate": 1.6375489185809558e-05, "loss": 1.928, "step": 23241000 }, { "epoch": 67.27, "learning_rate": 1.6374765538162284e-05, "loss": 1.8866, "step": 23241500 }, { "epoch": 67.28, "learning_rate": 1.6374041890515006e-05, "loss": 1.8912, "step": 23242000 }, { "epoch": 67.28, "learning_rate": 1.6373319690163025e-05, "loss": 1.8803, "step": 23242500 }, { "epoch": 67.28, "learning_rate": 1.6372596042515747e-05, "loss": 1.8978, "step": 23243000 }, { "epoch": 67.28, "learning_rate": 1.637187239486847e-05, "loss": 1.9131, "step": 23243500 }, { "epoch": 67.28, "learning_rate": 1.6371148747221195e-05, "loss": 1.9049, "step": 23244000 }, { "epoch": 67.28, "learning_rate": 1.637042654686921e-05, "loss": 1.913, "step": 23244500 }, { "epoch": 67.28, "learning_rate": 1.6369702899221933e-05, "loss": 1.9133, "step": 23245000 }, { "epoch": 67.29, "learning_rate": 1.6368980698869955e-05, "loss": 1.9155, "step": 23245500 }, { "epoch": 67.29, "learning_rate": 1.6368257051222677e-05, "loss": 1.8849, "step": 23246000 }, { "epoch": 67.29, "learning_rate": 1.63675334035754e-05, "loss": 1.9058, "step": 23246500 }, { "epoch": 67.29, "learning_rate": 1.636680975592812e-05, "loss": 1.8969, "step": 23247000 }, { "epoch": 67.29, "learning_rate": 1.6366086108280844e-05, "loss": 1.8876, "step": 23247500 }, { "epoch": 67.29, "learning_rate": 1.636536246063357e-05, "loss": 1.938, "step": 23248000 }, { "epoch": 67.29, "learning_rate": 1.636463881298629e-05, "loss": 1.8905, "step": 23248500 }, { "epoch": 67.3, "learning_rate": 1.6363916612634307e-05, "loss": 1.8869, "step": 23249000 }, { "epoch": 67.3, "learning_rate": 1.6363192964987033e-05, "loss": 1.9213, "step": 23249500 }, { "epoch": 67.3, "learning_rate": 1.636246931733976e-05, "loss": 1.8649, "step": 23250000 }, { "epoch": 67.3, "learning_rate": 1.636174566969248e-05, "loss": 1.8914, "step": 23250500 }, { "epoch": 67.3, "learning_rate": 1.6361022022045203e-05, "loss": 1.9131, "step": 23251000 }, { "epoch": 67.3, "learning_rate": 1.6360298374397925e-05, "loss": 1.8948, "step": 23251500 }, { "epoch": 67.31, "learning_rate": 1.6359574726750647e-05, "loss": 1.8947, "step": 23252000 }, { "epoch": 67.31, "learning_rate": 1.635885107910337e-05, "loss": 1.921, "step": 23252500 }, { "epoch": 67.31, "learning_rate": 1.6358128878751392e-05, "loss": 1.9222, "step": 23253000 }, { "epoch": 67.31, "learning_rate": 1.6357405231104114e-05, "loss": 1.9026, "step": 23253500 }, { "epoch": 67.31, "learning_rate": 1.6356681583456836e-05, "loss": 1.9181, "step": 23254000 }, { "epoch": 67.31, "learning_rate": 1.635595793580956e-05, "loss": 1.9194, "step": 23254500 }, { "epoch": 67.31, "learning_rate": 1.6355234288162284e-05, "loss": 1.8786, "step": 23255000 }, { "epoch": 67.32, "learning_rate": 1.6354510640515007e-05, "loss": 1.9023, "step": 23255500 }, { "epoch": 67.32, "learning_rate": 1.635378699286773e-05, "loss": 1.8593, "step": 23256000 }, { "epoch": 67.32, "learning_rate": 1.6353063345220454e-05, "loss": 1.8758, "step": 23256500 }, { "epoch": 67.32, "learning_rate": 1.6352339697573177e-05, "loss": 1.9014, "step": 23257000 }, { "epoch": 67.32, "learning_rate": 1.6351617497221196e-05, "loss": 1.8933, "step": 23257500 }, { "epoch": 67.32, "learning_rate": 1.635089529686921e-05, "loss": 1.9019, "step": 23258000 }, { "epoch": 67.32, "learning_rate": 1.6350171649221933e-05, "loss": 1.8723, "step": 23258500 }, { "epoch": 67.33, "learning_rate": 1.6349449448869952e-05, "loss": 1.8924, "step": 23259000 }, { "epoch": 67.33, "learning_rate": 1.6348725801222675e-05, "loss": 1.9055, "step": 23259500 }, { "epoch": 67.33, "learning_rate": 1.6348002153575397e-05, "loss": 1.9158, "step": 23260000 }, { "epoch": 67.33, "learning_rate": 1.6347278505928122e-05, "loss": 1.901, "step": 23260500 }, { "epoch": 67.33, "learning_rate": 1.6346554858280848e-05, "loss": 1.9005, "step": 23261000 }, { "epoch": 67.33, "learning_rate": 1.634583121063357e-05, "loss": 1.8955, "step": 23261500 }, { "epoch": 67.33, "learning_rate": 1.6345107562986292e-05, "loss": 1.8918, "step": 23262000 }, { "epoch": 67.34, "learning_rate": 1.6344383915339015e-05, "loss": 1.9099, "step": 23262500 }, { "epoch": 67.34, "learning_rate": 1.6343660267691737e-05, "loss": 1.9082, "step": 23263000 }, { "epoch": 67.34, "learning_rate": 1.634293951463505e-05, "loss": 1.9117, "step": 23263500 }, { "epoch": 67.34, "learning_rate": 1.634221586698777e-05, "loss": 1.9047, "step": 23264000 }, { "epoch": 67.34, "learning_rate": 1.6341493666635794e-05, "loss": 1.8909, "step": 23264500 }, { "epoch": 67.34, "learning_rate": 1.6340770018988516e-05, "loss": 1.9091, "step": 23265000 }, { "epoch": 67.34, "learning_rate": 1.6340046371341238e-05, "loss": 1.8872, "step": 23265500 }, { "epoch": 67.35, "learning_rate": 1.633932272369396e-05, "loss": 1.921, "step": 23266000 }, { "epoch": 67.35, "learning_rate": 1.6338599076046686e-05, "loss": 1.867, "step": 23266500 }, { "epoch": 67.35, "learning_rate": 1.633787542839941e-05, "loss": 1.8969, "step": 23267000 }, { "epoch": 67.35, "learning_rate": 1.633715178075213e-05, "loss": 1.9053, "step": 23267500 }, { "epoch": 67.35, "learning_rate": 1.6336428133104856e-05, "loss": 1.8945, "step": 23268000 }, { "epoch": 67.35, "learning_rate": 1.6335705932752872e-05, "loss": 1.8948, "step": 23268500 }, { "epoch": 67.35, "learning_rate": 1.6334982285105597e-05, "loss": 1.899, "step": 23269000 }, { "epoch": 67.36, "learning_rate": 1.633425863745832e-05, "loss": 1.9051, "step": 23269500 }, { "epoch": 67.36, "learning_rate": 1.6333534989811042e-05, "loss": 1.9213, "step": 23270000 }, { "epoch": 67.36, "learning_rate": 1.6332811342163764e-05, "loss": 1.8932, "step": 23270500 }, { "epoch": 67.36, "learning_rate": 1.6332087694516486e-05, "loss": 1.8998, "step": 23271000 }, { "epoch": 67.36, "learning_rate": 1.6331364046869212e-05, "loss": 1.8955, "step": 23271500 }, { "epoch": 67.36, "learning_rate": 1.6330640399221938e-05, "loss": 1.9096, "step": 23272000 }, { "epoch": 67.36, "learning_rate": 1.632991675157466e-05, "loss": 1.9152, "step": 23272500 }, { "epoch": 67.37, "learning_rate": 1.6329193103927382e-05, "loss": 1.8873, "step": 23273000 }, { "epoch": 67.37, "learning_rate": 1.6328469456280104e-05, "loss": 1.8941, "step": 23273500 }, { "epoch": 67.37, "learning_rate": 1.6327745808632826e-05, "loss": 1.9003, "step": 23274000 }, { "epoch": 67.37, "learning_rate": 1.632702216098555e-05, "loss": 1.8825, "step": 23274500 }, { "epoch": 67.37, "learning_rate": 1.6326299960633568e-05, "loss": 1.9015, "step": 23275000 }, { "epoch": 67.37, "learning_rate": 1.6325576312986293e-05, "loss": 1.8612, "step": 23275500 }, { "epoch": 67.37, "learning_rate": 1.6324852665339016e-05, "loss": 1.8955, "step": 23276000 }, { "epoch": 67.38, "learning_rate": 1.6324129017691738e-05, "loss": 1.8975, "step": 23276500 }, { "epoch": 67.38, "learning_rate": 1.6323405370044463e-05, "loss": 1.9038, "step": 23277000 }, { "epoch": 67.38, "learning_rate": 1.6322681722397186e-05, "loss": 1.9222, "step": 23277500 }, { "epoch": 67.38, "learning_rate": 1.63219595220452e-05, "loss": 1.8766, "step": 23278000 }, { "epoch": 67.38, "learning_rate": 1.6321235874397923e-05, "loss": 1.8998, "step": 23278500 }, { "epoch": 67.38, "learning_rate": 1.632051222675065e-05, "loss": 1.8813, "step": 23279000 }, { "epoch": 67.38, "learning_rate": 1.6319790026398668e-05, "loss": 1.8925, "step": 23279500 }, { "epoch": 67.39, "learning_rate": 1.631906637875139e-05, "loss": 1.9268, "step": 23280000 }, { "epoch": 67.39, "learning_rate": 1.6318342731104112e-05, "loss": 1.8914, "step": 23280500 }, { "epoch": 67.39, "learning_rate": 1.631762053075213e-05, "loss": 1.8942, "step": 23281000 }, { "epoch": 67.39, "learning_rate": 1.6316896883104854e-05, "loss": 1.9075, "step": 23281500 }, { "epoch": 67.39, "learning_rate": 1.6316173235457576e-05, "loss": 1.9113, "step": 23282000 }, { "epoch": 67.39, "learning_rate": 1.6315449587810298e-05, "loss": 1.9164, "step": 23282500 }, { "epoch": 67.39, "learning_rate": 1.6314725940163024e-05, "loss": 1.8934, "step": 23283000 }, { "epoch": 67.4, "learning_rate": 1.631400229251575e-05, "loss": 1.8909, "step": 23283500 }, { "epoch": 67.4, "learning_rate": 1.631327864486847e-05, "loss": 1.8967, "step": 23284000 }, { "epoch": 67.4, "learning_rate": 1.6312554997221194e-05, "loss": 1.9186, "step": 23284500 }, { "epoch": 67.4, "learning_rate": 1.6311831349573916e-05, "loss": 1.929, "step": 23285000 }, { "epoch": 67.4, "learning_rate": 1.6311107701926638e-05, "loss": 1.911, "step": 23285500 }, { "epoch": 67.4, "learning_rate": 1.6310384054279364e-05, "loss": 1.9049, "step": 23286000 }, { "epoch": 67.4, "learning_rate": 1.630966040663209e-05, "loss": 1.8935, "step": 23286500 }, { "epoch": 67.41, "learning_rate": 1.6308936758984812e-05, "loss": 1.9187, "step": 23287000 }, { "epoch": 67.41, "learning_rate": 1.6308216005928124e-05, "loss": 1.8903, "step": 23287500 }, { "epoch": 67.41, "learning_rate": 1.630749380557614e-05, "loss": 1.8959, "step": 23288000 }, { "epoch": 67.41, "learning_rate": 1.6306770157928862e-05, "loss": 1.9097, "step": 23288500 }, { "epoch": 67.41, "learning_rate": 1.6306046510281587e-05, "loss": 1.8895, "step": 23289000 }, { "epoch": 67.41, "learning_rate": 1.630532286263431e-05, "loss": 1.9157, "step": 23289500 }, { "epoch": 67.42, "learning_rate": 1.6304599214987032e-05, "loss": 1.9222, "step": 23290000 }, { "epoch": 67.42, "learning_rate": 1.6303875567339757e-05, "loss": 1.9105, "step": 23290500 }, { "epoch": 67.42, "learning_rate": 1.630315191969248e-05, "loss": 1.9142, "step": 23291000 }, { "epoch": 67.42, "learning_rate": 1.6302428272045202e-05, "loss": 1.9206, "step": 23291500 }, { "epoch": 67.42, "learning_rate": 1.6301704624397928e-05, "loss": 1.9087, "step": 23292000 }, { "epoch": 67.42, "learning_rate": 1.630098097675065e-05, "loss": 1.8867, "step": 23292500 }, { "epoch": 67.42, "learning_rate": 1.6300257329103372e-05, "loss": 1.9041, "step": 23293000 }, { "epoch": 67.43, "learning_rate": 1.6299533681456094e-05, "loss": 1.885, "step": 23293500 }, { "epoch": 67.43, "learning_rate": 1.629881003380882e-05, "loss": 1.8918, "step": 23294000 }, { "epoch": 67.43, "learning_rate": 1.629808783345684e-05, "loss": 1.887, "step": 23294500 }, { "epoch": 67.43, "learning_rate": 1.629736418580956e-05, "loss": 1.9289, "step": 23295000 }, { "epoch": 67.43, "learning_rate": 1.6296640538162283e-05, "loss": 1.883, "step": 23295500 }, { "epoch": 67.43, "learning_rate": 1.6295916890515006e-05, "loss": 1.8852, "step": 23296000 }, { "epoch": 67.43, "learning_rate": 1.6295194690163024e-05, "loss": 1.9221, "step": 23296500 }, { "epoch": 67.44, "learning_rate": 1.6294471042515747e-05, "loss": 1.9036, "step": 23297000 }, { "epoch": 67.44, "learning_rate": 1.629374739486847e-05, "loss": 1.8751, "step": 23297500 }, { "epoch": 67.44, "learning_rate": 1.629302519451649e-05, "loss": 1.9101, "step": 23298000 }, { "epoch": 67.44, "learning_rate": 1.6292301546869214e-05, "loss": 1.9147, "step": 23298500 }, { "epoch": 67.44, "learning_rate": 1.629157934651723e-05, "loss": 1.901, "step": 23299000 }, { "epoch": 67.44, "learning_rate": 1.629085569886995e-05, "loss": 1.9103, "step": 23299500 }, { "epoch": 67.44, "learning_rate": 1.6290132051222677e-05, "loss": 1.9098, "step": 23300000 }, { "epoch": 67.45, "learning_rate": 1.62894084035754e-05, "loss": 1.8865, "step": 23300500 }, { "epoch": 67.45, "learning_rate": 1.628868475592812e-05, "loss": 1.9169, "step": 23301000 }, { "epoch": 67.45, "learning_rate": 1.6287961108280844e-05, "loss": 1.9204, "step": 23301500 }, { "epoch": 67.45, "learning_rate": 1.628723746063357e-05, "loss": 1.937, "step": 23302000 }, { "epoch": 67.45, "learning_rate": 1.628651381298629e-05, "loss": 1.9146, "step": 23302500 }, { "epoch": 67.45, "learning_rate": 1.6285790165339017e-05, "loss": 1.9141, "step": 23303000 }, { "epoch": 67.45, "learning_rate": 1.628506651769174e-05, "loss": 1.9154, "step": 23303500 }, { "epoch": 67.46, "learning_rate": 1.628434287004446e-05, "loss": 1.9069, "step": 23304000 }, { "epoch": 67.46, "learning_rate": 1.6283619222397184e-05, "loss": 1.9011, "step": 23304500 }, { "epoch": 67.46, "learning_rate": 1.6282898469340496e-05, "loss": 1.9263, "step": 23305000 }, { "epoch": 67.46, "learning_rate": 1.628217482169322e-05, "loss": 1.9116, "step": 23305500 }, { "epoch": 67.46, "learning_rate": 1.6281451174045944e-05, "loss": 1.9072, "step": 23306000 }, { "epoch": 67.46, "learning_rate": 1.6280727526398666e-05, "loss": 1.9076, "step": 23306500 }, { "epoch": 67.46, "learning_rate": 1.6280003878751392e-05, "loss": 1.9158, "step": 23307000 }, { "epoch": 67.47, "learning_rate": 1.6279280231104114e-05, "loss": 1.9018, "step": 23307500 }, { "epoch": 67.47, "learning_rate": 1.6278556583456836e-05, "loss": 1.9042, "step": 23308000 }, { "epoch": 67.47, "learning_rate": 1.6277834383104855e-05, "loss": 1.9116, "step": 23308500 }, { "epoch": 67.47, "learning_rate": 1.6277110735457577e-05, "loss": 1.9074, "step": 23309000 }, { "epoch": 67.47, "learning_rate": 1.6276387087810303e-05, "loss": 1.8888, "step": 23309500 }, { "epoch": 67.47, "learning_rate": 1.627566488745832e-05, "loss": 1.8865, "step": 23310000 }, { "epoch": 67.47, "learning_rate": 1.627494123981104e-05, "loss": 1.9061, "step": 23310500 }, { "epoch": 67.48, "learning_rate": 1.6274217592163766e-05, "loss": 1.9024, "step": 23311000 }, { "epoch": 67.48, "learning_rate": 1.627349394451649e-05, "loss": 1.9046, "step": 23311500 }, { "epoch": 67.48, "learning_rate": 1.627277029686921e-05, "loss": 1.9187, "step": 23312000 }, { "epoch": 67.48, "learning_rate": 1.6272046649221933e-05, "loss": 1.9018, "step": 23312500 }, { "epoch": 67.48, "learning_rate": 1.627132300157466e-05, "loss": 1.8996, "step": 23313000 }, { "epoch": 67.48, "learning_rate": 1.627059935392738e-05, "loss": 1.883, "step": 23313500 }, { "epoch": 67.48, "learning_rate": 1.6269875706280103e-05, "loss": 1.8909, "step": 23314000 }, { "epoch": 67.49, "learning_rate": 1.6269153505928122e-05, "loss": 1.9015, "step": 23314500 }, { "epoch": 67.49, "learning_rate": 1.626843130557614e-05, "loss": 1.8972, "step": 23315000 }, { "epoch": 67.49, "learning_rate": 1.6267707657928863e-05, "loss": 1.9051, "step": 23315500 }, { "epoch": 67.49, "learning_rate": 1.6266984010281586e-05, "loss": 1.8984, "step": 23316000 }, { "epoch": 67.49, "learning_rate": 1.6266260362634308e-05, "loss": 1.9046, "step": 23316500 }, { "epoch": 67.49, "learning_rate": 1.6265536714987033e-05, "loss": 1.9164, "step": 23317000 }, { "epoch": 67.49, "learning_rate": 1.6264814514635052e-05, "loss": 1.8864, "step": 23317500 }, { "epoch": 67.5, "learning_rate": 1.6264090866987775e-05, "loss": 1.9227, "step": 23318000 }, { "epoch": 67.5, "learning_rate": 1.6263367219340497e-05, "loss": 1.9129, "step": 23318500 }, { "epoch": 67.5, "learning_rate": 1.626264357169322e-05, "loss": 1.9205, "step": 23319000 }, { "epoch": 67.5, "learning_rate": 1.626191992404594e-05, "loss": 1.8884, "step": 23319500 }, { "epoch": 67.5, "learning_rate": 1.626119772369396e-05, "loss": 1.8947, "step": 23320000 }, { "epoch": 67.5, "learning_rate": 1.6260474076046683e-05, "loss": 1.9105, "step": 23320500 }, { "epoch": 67.5, "learning_rate": 1.6259750428399408e-05, "loss": 1.9046, "step": 23321000 }, { "epoch": 67.51, "learning_rate": 1.6259028228047427e-05, "loss": 1.9027, "step": 23321500 }, { "epoch": 67.51, "learning_rate": 1.625830458040015e-05, "loss": 1.8883, "step": 23322000 }, { "epoch": 67.51, "learning_rate": 1.625758093275287e-05, "loss": 1.9217, "step": 23322500 }, { "epoch": 67.51, "learning_rate": 1.6256857285105594e-05, "loss": 1.8894, "step": 23323000 }, { "epoch": 67.51, "learning_rate": 1.625613363745832e-05, "loss": 1.9122, "step": 23323500 }, { "epoch": 67.51, "learning_rate": 1.625540998981104e-05, "loss": 1.9191, "step": 23324000 }, { "epoch": 67.51, "learning_rate": 1.6254686342163767e-05, "loss": 1.914, "step": 23324500 }, { "epoch": 67.52, "learning_rate": 1.625396269451649e-05, "loss": 1.9046, "step": 23325000 }, { "epoch": 67.52, "learning_rate": 1.6253239046869212e-05, "loss": 1.9285, "step": 23325500 }, { "epoch": 67.52, "learning_rate": 1.6252515399221934e-05, "loss": 1.9241, "step": 23326000 }, { "epoch": 67.52, "learning_rate": 1.6251791751574656e-05, "loss": 1.9055, "step": 23326500 }, { "epoch": 67.52, "learning_rate": 1.6251068103927382e-05, "loss": 1.9106, "step": 23327000 }, { "epoch": 67.52, "learning_rate": 1.6250345903575397e-05, "loss": 1.9419, "step": 23327500 }, { "epoch": 67.53, "learning_rate": 1.6249622255928123e-05, "loss": 1.912, "step": 23328000 }, { "epoch": 67.53, "learning_rate": 1.6248898608280845e-05, "loss": 1.9248, "step": 23328500 }, { "epoch": 67.53, "learning_rate": 1.624817496063357e-05, "loss": 1.9341, "step": 23329000 }, { "epoch": 67.53, "learning_rate": 1.6247452760281586e-05, "loss": 1.8921, "step": 23329500 }, { "epoch": 67.53, "learning_rate": 1.624672911263431e-05, "loss": 1.9083, "step": 23330000 }, { "epoch": 67.53, "learning_rate": 1.6246006912282328e-05, "loss": 1.892, "step": 23330500 }, { "epoch": 67.53, "learning_rate": 1.624528326463505e-05, "loss": 1.8824, "step": 23331000 }, { "epoch": 67.54, "learning_rate": 1.6244559616987772e-05, "loss": 1.9092, "step": 23331500 }, { "epoch": 67.54, "learning_rate": 1.6243837416635794e-05, "loss": 1.9357, "step": 23332000 }, { "epoch": 67.54, "learning_rate": 1.6243113768988517e-05, "loss": 1.9081, "step": 23332500 }, { "epoch": 67.54, "learning_rate": 1.624239012134124e-05, "loss": 1.9043, "step": 23333000 }, { "epoch": 67.54, "learning_rate": 1.624166647369396e-05, "loss": 1.889, "step": 23333500 }, { "epoch": 67.54, "learning_rate": 1.6240942826046683e-05, "loss": 1.9014, "step": 23334000 }, { "epoch": 67.54, "learning_rate": 1.624021917839941e-05, "loss": 1.9142, "step": 23334500 }, { "epoch": 67.55, "learning_rate": 1.623949553075213e-05, "loss": 1.8926, "step": 23335000 }, { "epoch": 67.55, "learning_rate": 1.6238773330400147e-05, "loss": 1.8825, "step": 23335500 }, { "epoch": 67.55, "learning_rate": 1.6238049682752872e-05, "loss": 1.9071, "step": 23336000 }, { "epoch": 67.55, "learning_rate": 1.6237326035105595e-05, "loss": 1.863, "step": 23336500 }, { "epoch": 67.55, "learning_rate": 1.623660238745832e-05, "loss": 1.9171, "step": 23337000 }, { "epoch": 67.55, "learning_rate": 1.6235878739811042e-05, "loss": 1.8964, "step": 23337500 }, { "epoch": 67.55, "learning_rate": 1.6235155092163765e-05, "loss": 1.9083, "step": 23338000 }, { "epoch": 67.56, "learning_rate": 1.6234431444516487e-05, "loss": 1.9283, "step": 23338500 }, { "epoch": 67.56, "learning_rate": 1.623370779686921e-05, "loss": 1.9131, "step": 23339000 }, { "epoch": 67.56, "learning_rate": 1.6232984149221935e-05, "loss": 1.9033, "step": 23339500 }, { "epoch": 67.56, "learning_rate": 1.623226050157466e-05, "loss": 1.8949, "step": 23340000 }, { "epoch": 67.56, "learning_rate": 1.6231536853927383e-05, "loss": 1.8897, "step": 23340500 }, { "epoch": 67.56, "learning_rate": 1.6230814653575398e-05, "loss": 1.9186, "step": 23341000 }, { "epoch": 67.56, "learning_rate": 1.623009100592812e-05, "loss": 1.9014, "step": 23341500 }, { "epoch": 67.57, "learning_rate": 1.622936880557614e-05, "loss": 1.8991, "step": 23342000 }, { "epoch": 67.57, "learning_rate": 1.622864515792886e-05, "loss": 1.9045, "step": 23342500 }, { "epoch": 67.57, "learning_rate": 1.6227921510281587e-05, "loss": 1.9052, "step": 23343000 }, { "epoch": 67.57, "learning_rate": 1.622719786263431e-05, "loss": 1.9256, "step": 23343500 }, { "epoch": 67.57, "learning_rate": 1.6226474214987035e-05, "loss": 1.9013, "step": 23344000 }, { "epoch": 67.57, "learning_rate": 1.6225750567339757e-05, "loss": 1.8859, "step": 23344500 }, { "epoch": 67.57, "learning_rate": 1.622502691969248e-05, "loss": 1.9286, "step": 23345000 }, { "epoch": 67.58, "learning_rate": 1.6224303272045202e-05, "loss": 1.8921, "step": 23345500 }, { "epoch": 67.58, "learning_rate": 1.622358107169322e-05, "loss": 1.903, "step": 23346000 }, { "epoch": 67.58, "learning_rate": 1.6222858871341236e-05, "loss": 1.9141, "step": 23346500 }, { "epoch": 67.58, "learning_rate": 1.6222135223693962e-05, "loss": 1.9163, "step": 23347000 }, { "epoch": 67.58, "learning_rate": 1.6221411576046684e-05, "loss": 1.9034, "step": 23347500 }, { "epoch": 67.58, "learning_rate": 1.622068792839941e-05, "loss": 1.9288, "step": 23348000 }, { "epoch": 67.58, "learning_rate": 1.6219964280752132e-05, "loss": 1.9072, "step": 23348500 }, { "epoch": 67.59, "learning_rate": 1.6219240633104854e-05, "loss": 1.9216, "step": 23349000 }, { "epoch": 67.59, "learning_rate": 1.6218516985457576e-05, "loss": 1.8688, "step": 23349500 }, { "epoch": 67.59, "learning_rate": 1.62177933378103e-05, "loss": 1.933, "step": 23350000 }, { "epoch": 67.59, "learning_rate": 1.6217069690163024e-05, "loss": 1.8866, "step": 23350500 }, { "epoch": 67.59, "learning_rate": 1.6216346042515747e-05, "loss": 1.919, "step": 23351000 }, { "epoch": 67.59, "learning_rate": 1.6215622394868472e-05, "loss": 1.9328, "step": 23351500 }, { "epoch": 67.59, "learning_rate": 1.6214900194516488e-05, "loss": 1.9135, "step": 23352000 }, { "epoch": 67.6, "learning_rate": 1.621417654686921e-05, "loss": 1.9031, "step": 23352500 }, { "epoch": 67.6, "learning_rate": 1.6213452899221936e-05, "loss": 1.9053, "step": 23353000 }, { "epoch": 67.6, "learning_rate": 1.6212729251574658e-05, "loss": 1.8869, "step": 23353500 }, { "epoch": 67.6, "learning_rate": 1.6212005603927383e-05, "loss": 1.8993, "step": 23354000 }, { "epoch": 67.6, "learning_rate": 1.6211281956280106e-05, "loss": 1.9319, "step": 23354500 }, { "epoch": 67.6, "learning_rate": 1.6210558308632828e-05, "loss": 1.929, "step": 23355000 }, { "epoch": 67.6, "learning_rate": 1.620983466098555e-05, "loss": 1.8869, "step": 23355500 }, { "epoch": 67.61, "learning_rate": 1.6209111013338272e-05, "loss": 1.8889, "step": 23356000 }, { "epoch": 67.61, "learning_rate": 1.6208387365690998e-05, "loss": 1.9222, "step": 23356500 }, { "epoch": 67.61, "learning_rate": 1.6207665165339014e-05, "loss": 1.871, "step": 23357000 }, { "epoch": 67.61, "learning_rate": 1.6206942964987032e-05, "loss": 1.9226, "step": 23357500 }, { "epoch": 67.61, "learning_rate": 1.6206219317339758e-05, "loss": 1.8955, "step": 23358000 }, { "epoch": 67.61, "learning_rate": 1.6205497116987774e-05, "loss": 1.8997, "step": 23358500 }, { "epoch": 67.61, "learning_rate": 1.62047734693405e-05, "loss": 1.8989, "step": 23359000 }, { "epoch": 67.62, "learning_rate": 1.620404982169322e-05, "loss": 1.9064, "step": 23359500 }, { "epoch": 67.62, "learning_rate": 1.6203326174045944e-05, "loss": 1.8971, "step": 23360000 }, { "epoch": 67.62, "learning_rate": 1.6202602526398666e-05, "loss": 1.9119, "step": 23360500 }, { "epoch": 67.62, "learning_rate": 1.6201878878751388e-05, "loss": 1.907, "step": 23361000 }, { "epoch": 67.62, "learning_rate": 1.620115523110411e-05, "loss": 1.9259, "step": 23361500 }, { "epoch": 67.62, "learning_rate": 1.6200431583456836e-05, "loss": 1.8829, "step": 23362000 }, { "epoch": 67.62, "learning_rate": 1.619970793580956e-05, "loss": 1.8986, "step": 23362500 }, { "epoch": 67.63, "learning_rate": 1.6198985735457577e-05, "loss": 1.9025, "step": 23363000 }, { "epoch": 67.63, "learning_rate": 1.61982620878103e-05, "loss": 1.9047, "step": 23363500 }, { "epoch": 67.63, "learning_rate": 1.6197538440163025e-05, "loss": 1.9088, "step": 23364000 }, { "epoch": 67.63, "learning_rate": 1.619681623981104e-05, "loss": 1.8783, "step": 23364500 }, { "epoch": 67.63, "learning_rate": 1.6196092592163763e-05, "loss": 1.8971, "step": 23365000 }, { "epoch": 67.63, "learning_rate": 1.619536894451649e-05, "loss": 1.9269, "step": 23365500 }, { "epoch": 67.64, "learning_rate": 1.6194645296869214e-05, "loss": 1.8864, "step": 23366000 }, { "epoch": 67.64, "learning_rate": 1.6193921649221936e-05, "loss": 1.8965, "step": 23366500 }, { "epoch": 67.64, "learning_rate": 1.619319800157466e-05, "loss": 1.8898, "step": 23367000 }, { "epoch": 67.64, "learning_rate": 1.619247435392738e-05, "loss": 1.887, "step": 23367500 }, { "epoch": 67.64, "learning_rate": 1.6191750706280103e-05, "loss": 1.9115, "step": 23368000 }, { "epoch": 67.64, "learning_rate": 1.6191027058632825e-05, "loss": 1.908, "step": 23368500 }, { "epoch": 67.64, "learning_rate": 1.619030341098555e-05, "loss": 1.9243, "step": 23369000 }, { "epoch": 67.65, "learning_rate": 1.6189579763338277e-05, "loss": 1.9055, "step": 23369500 }, { "epoch": 67.65, "learning_rate": 1.6188856115691e-05, "loss": 1.9282, "step": 23370000 }, { "epoch": 67.65, "learning_rate": 1.618813246804372e-05, "loss": 1.8809, "step": 23370500 }, { "epoch": 67.65, "learning_rate": 1.6187408820396443e-05, "loss": 1.9232, "step": 23371000 }, { "epoch": 67.65, "learning_rate": 1.6186686620044462e-05, "loss": 1.9209, "step": 23371500 }, { "epoch": 67.65, "learning_rate": 1.6185962972397184e-05, "loss": 1.923, "step": 23372000 }, { "epoch": 67.65, "learning_rate": 1.6185239324749907e-05, "loss": 1.8952, "step": 23372500 }, { "epoch": 67.66, "learning_rate": 1.6184515677102632e-05, "loss": 1.9046, "step": 23373000 }, { "epoch": 67.66, "learning_rate": 1.6183792029455354e-05, "loss": 1.929, "step": 23373500 }, { "epoch": 67.66, "learning_rate": 1.6183068381808077e-05, "loss": 1.902, "step": 23374000 }, { "epoch": 67.66, "learning_rate": 1.6182344734160802e-05, "loss": 1.8903, "step": 23374500 }, { "epoch": 67.66, "learning_rate": 1.6181621086513525e-05, "loss": 1.906, "step": 23375000 }, { "epoch": 67.66, "learning_rate": 1.6180897438866247e-05, "loss": 1.9163, "step": 23375500 }, { "epoch": 67.66, "learning_rate": 1.6180173791218972e-05, "loss": 1.9206, "step": 23376000 }, { "epoch": 67.67, "learning_rate": 1.6179450143571695e-05, "loss": 1.8884, "step": 23376500 }, { "epoch": 67.67, "learning_rate": 1.6178727943219714e-05, "loss": 1.9052, "step": 23377000 }, { "epoch": 67.67, "learning_rate": 1.6178004295572436e-05, "loss": 1.896, "step": 23377500 }, { "epoch": 67.67, "learning_rate": 1.617728209522045e-05, "loss": 1.8992, "step": 23378000 }, { "epoch": 67.67, "learning_rate": 1.6176558447573177e-05, "loss": 1.9264, "step": 23378500 }, { "epoch": 67.67, "learning_rate": 1.61758347999259e-05, "loss": 1.9071, "step": 23379000 }, { "epoch": 67.67, "learning_rate": 1.617511115227862e-05, "loss": 1.924, "step": 23379500 }, { "epoch": 67.68, "learning_rate": 1.6174387504631347e-05, "loss": 1.9132, "step": 23380000 }, { "epoch": 67.68, "learning_rate": 1.617366385698407e-05, "loss": 1.9202, "step": 23380500 }, { "epoch": 67.68, "learning_rate": 1.617294020933679e-05, "loss": 1.9143, "step": 23381000 }, { "epoch": 67.68, "learning_rate": 1.6172216561689514e-05, "loss": 1.8837, "step": 23381500 }, { "epoch": 67.68, "learning_rate": 1.617149291404224e-05, "loss": 1.9113, "step": 23382000 }, { "epoch": 67.68, "learning_rate": 1.617076926639496e-05, "loss": 1.902, "step": 23382500 }, { "epoch": 67.68, "learning_rate": 1.6170047066042977e-05, "loss": 1.8861, "step": 23383000 }, { "epoch": 67.69, "learning_rate": 1.6169323418395703e-05, "loss": 1.9182, "step": 23383500 }, { "epoch": 67.69, "learning_rate": 1.616859977074843e-05, "loss": 1.8884, "step": 23384000 }, { "epoch": 67.69, "learning_rate": 1.616787612310115e-05, "loss": 1.875, "step": 23384500 }, { "epoch": 67.69, "learning_rate": 1.6167152475453873e-05, "loss": 1.9201, "step": 23385000 }, { "epoch": 67.69, "learning_rate": 1.6166428827806595e-05, "loss": 1.8786, "step": 23385500 }, { "epoch": 67.69, "learning_rate": 1.6165705180159317e-05, "loss": 1.9165, "step": 23386000 }, { "epoch": 67.69, "learning_rate": 1.6164982979807336e-05, "loss": 1.889, "step": 23386500 }, { "epoch": 67.7, "learning_rate": 1.616425933216006e-05, "loss": 1.9144, "step": 23387000 }, { "epoch": 67.7, "learning_rate": 1.6163535684512784e-05, "loss": 1.9314, "step": 23387500 }, { "epoch": 67.7, "learning_rate": 1.6162813484160803e-05, "loss": 1.92, "step": 23388000 }, { "epoch": 67.7, "learning_rate": 1.6162089836513525e-05, "loss": 1.8881, "step": 23388500 }, { "epoch": 67.7, "learning_rate": 1.6161366188866248e-05, "loss": 1.9263, "step": 23389000 }, { "epoch": 67.7, "learning_rate": 1.616064254121897e-05, "loss": 1.899, "step": 23389500 }, { "epoch": 67.7, "learning_rate": 1.6159918893571692e-05, "loss": 1.8999, "step": 23390000 }, { "epoch": 67.71, "learning_rate": 1.6159195245924418e-05, "loss": 1.9219, "step": 23390500 }, { "epoch": 67.71, "learning_rate": 1.6158471598277143e-05, "loss": 1.9196, "step": 23391000 }, { "epoch": 67.71, "learning_rate": 1.6157747950629866e-05, "loss": 1.9156, "step": 23391500 }, { "epoch": 67.71, "learning_rate": 1.6157024302982588e-05, "loss": 1.9039, "step": 23392000 }, { "epoch": 67.71, "learning_rate": 1.615630065533531e-05, "loss": 1.8967, "step": 23392500 }, { "epoch": 67.71, "learning_rate": 1.6155577007688032e-05, "loss": 1.895, "step": 23393000 }, { "epoch": 67.71, "learning_rate": 1.615485480733605e-05, "loss": 1.8949, "step": 23393500 }, { "epoch": 67.72, "learning_rate": 1.6154131159688773e-05, "loss": 1.9038, "step": 23394000 }, { "epoch": 67.72, "learning_rate": 1.6153407512041496e-05, "loss": 1.8941, "step": 23394500 }, { "epoch": 67.72, "learning_rate": 1.615268386439422e-05, "loss": 1.9152, "step": 23395000 }, { "epoch": 67.72, "learning_rate": 1.6151960216746944e-05, "loss": 1.9189, "step": 23395500 }, { "epoch": 67.72, "learning_rate": 1.6151238016394962e-05, "loss": 1.9143, "step": 23396000 }, { "epoch": 67.72, "learning_rate": 1.6150514368747685e-05, "loss": 1.9106, "step": 23396500 }, { "epoch": 67.72, "learning_rate": 1.6149790721100407e-05, "loss": 1.9181, "step": 23397000 }, { "epoch": 67.73, "learning_rate": 1.614906707345313e-05, "loss": 1.9236, "step": 23397500 }, { "epoch": 67.73, "learning_rate": 1.6148343425805855e-05, "loss": 1.9238, "step": 23398000 }, { "epoch": 67.73, "learning_rate": 1.614761977815858e-05, "loss": 1.9282, "step": 23398500 }, { "epoch": 67.73, "learning_rate": 1.6146896130511303e-05, "loss": 1.9046, "step": 23399000 }, { "epoch": 67.73, "learning_rate": 1.6146172482864025e-05, "loss": 1.9123, "step": 23399500 }, { "epoch": 67.73, "learning_rate": 1.6145450282512044e-05, "loss": 1.9172, "step": 23400000 }, { "epoch": 67.73, "learning_rate": 1.6144726634864766e-05, "loss": 1.9002, "step": 23400500 }, { "epoch": 67.74, "learning_rate": 1.6144002987217488e-05, "loss": 1.9362, "step": 23401000 }, { "epoch": 67.74, "learning_rate": 1.6143280786865507e-05, "loss": 1.9165, "step": 23401500 }, { "epoch": 67.74, "learning_rate": 1.614255713921823e-05, "loss": 1.9241, "step": 23402000 }, { "epoch": 67.74, "learning_rate": 1.6141833491570955e-05, "loss": 1.8965, "step": 23402500 }, { "epoch": 67.74, "learning_rate": 1.6141109843923677e-05, "loss": 1.8993, "step": 23403000 }, { "epoch": 67.74, "learning_rate": 1.61403861962764e-05, "loss": 1.8939, "step": 23403500 }, { "epoch": 67.74, "learning_rate": 1.6139662548629122e-05, "loss": 1.9248, "step": 23404000 }, { "epoch": 67.75, "learning_rate": 1.6138938900981844e-05, "loss": 1.8916, "step": 23404500 }, { "epoch": 67.75, "learning_rate": 1.613821525333457e-05, "loss": 1.9066, "step": 23405000 }, { "epoch": 67.75, "learning_rate": 1.6137493052982585e-05, "loss": 1.9137, "step": 23405500 }, { "epoch": 67.75, "learning_rate": 1.613676940533531e-05, "loss": 1.8926, "step": 23406000 }, { "epoch": 67.75, "learning_rate": 1.6136045757688033e-05, "loss": 1.8855, "step": 23406500 }, { "epoch": 67.75, "learning_rate": 1.613532211004076e-05, "loss": 1.9169, "step": 23407000 }, { "epoch": 67.76, "learning_rate": 1.613459846239348e-05, "loss": 1.9141, "step": 23407500 }, { "epoch": 67.76, "learning_rate": 1.6133876262041496e-05, "loss": 1.9098, "step": 23408000 }, { "epoch": 67.76, "learning_rate": 1.613315261439422e-05, "loss": 1.9092, "step": 23408500 }, { "epoch": 67.76, "learning_rate": 1.6132428966746944e-05, "loss": 1.9133, "step": 23409000 }, { "epoch": 67.76, "learning_rate": 1.613170531909967e-05, "loss": 1.9067, "step": 23409500 }, { "epoch": 67.76, "learning_rate": 1.6130981671452392e-05, "loss": 1.8968, "step": 23410000 }, { "epoch": 67.76, "learning_rate": 1.6130258023805114e-05, "loss": 1.9031, "step": 23410500 }, { "epoch": 67.77, "learning_rate": 1.6129535823453133e-05, "loss": 1.9252, "step": 23411000 }, { "epoch": 67.77, "learning_rate": 1.6128812175805856e-05, "loss": 1.9009, "step": 23411500 }, { "epoch": 67.77, "learning_rate": 1.6128088528158578e-05, "loss": 1.8921, "step": 23412000 }, { "epoch": 67.77, "learning_rate": 1.61273648805113e-05, "loss": 1.8958, "step": 23412500 }, { "epoch": 67.77, "learning_rate": 1.6126641232864022e-05, "loss": 1.8948, "step": 23413000 }, { "epoch": 67.77, "learning_rate": 1.6125917585216748e-05, "loss": 1.9124, "step": 23413500 }, { "epoch": 67.77, "learning_rate": 1.612519393756947e-05, "loss": 1.9035, "step": 23414000 }, { "epoch": 67.78, "learning_rate": 1.612447173721749e-05, "loss": 1.9064, "step": 23414500 }, { "epoch": 67.78, "learning_rate": 1.612374808957021e-05, "loss": 1.9029, "step": 23415000 }, { "epoch": 67.78, "learning_rate": 1.612302588921823e-05, "loss": 1.9013, "step": 23415500 }, { "epoch": 67.78, "learning_rate": 1.6122302241570952e-05, "loss": 1.9353, "step": 23416000 }, { "epoch": 67.78, "learning_rate": 1.6121578593923675e-05, "loss": 1.9099, "step": 23416500 }, { "epoch": 67.78, "learning_rate": 1.61208549462764e-05, "loss": 1.893, "step": 23417000 }, { "epoch": 67.78, "learning_rate": 1.6120131298629123e-05, "loss": 1.893, "step": 23417500 }, { "epoch": 67.79, "learning_rate": 1.611940909827714e-05, "loss": 1.8873, "step": 23418000 }, { "epoch": 67.79, "learning_rate": 1.6118685450629864e-05, "loss": 1.9125, "step": 23418500 }, { "epoch": 67.79, "learning_rate": 1.6117963250277883e-05, "loss": 1.9024, "step": 23419000 }, { "epoch": 67.79, "learning_rate": 1.6117239602630605e-05, "loss": 1.8839, "step": 23419500 }, { "epoch": 67.79, "learning_rate": 1.6116515954983327e-05, "loss": 1.9015, "step": 23420000 }, { "epoch": 67.79, "learning_rate": 1.611579230733605e-05, "loss": 1.9261, "step": 23420500 }, { "epoch": 67.79, "learning_rate": 1.6115068659688775e-05, "loss": 1.9144, "step": 23421000 }, { "epoch": 67.8, "learning_rate": 1.6114345012041497e-05, "loss": 1.9103, "step": 23421500 }, { "epoch": 67.8, "learning_rate": 1.6113621364394223e-05, "loss": 1.8993, "step": 23422000 }, { "epoch": 67.8, "learning_rate": 1.6112897716746945e-05, "loss": 1.896, "step": 23422500 }, { "epoch": 67.8, "learning_rate": 1.6112174069099667e-05, "loss": 1.8941, "step": 23423000 }, { "epoch": 67.8, "learning_rate": 1.6111451868747683e-05, "loss": 1.885, "step": 23423500 }, { "epoch": 67.8, "learning_rate": 1.611072822110041e-05, "loss": 1.8855, "step": 23424000 }, { "epoch": 67.8, "learning_rate": 1.611000457345313e-05, "loss": 1.9292, "step": 23424500 }, { "epoch": 67.81, "learning_rate": 1.6109280925805856e-05, "loss": 1.9076, "step": 23425000 }, { "epoch": 67.81, "learning_rate": 1.610855727815858e-05, "loss": 1.8866, "step": 23425500 }, { "epoch": 67.81, "learning_rate": 1.6107835077806598e-05, "loss": 1.8823, "step": 23426000 }, { "epoch": 67.81, "learning_rate": 1.610711143015932e-05, "loss": 1.9025, "step": 23426500 }, { "epoch": 67.81, "learning_rate": 1.6106387782512042e-05, "loss": 1.882, "step": 23427000 }, { "epoch": 67.81, "learning_rate": 1.6105664134864764e-05, "loss": 1.9168, "step": 23427500 }, { "epoch": 67.81, "learning_rate": 1.6104940487217486e-05, "loss": 1.9001, "step": 23428000 }, { "epoch": 67.82, "learning_rate": 1.6104216839570212e-05, "loss": 1.9044, "step": 23428500 }, { "epoch": 67.82, "learning_rate": 1.6103493191922934e-05, "loss": 1.9121, "step": 23429000 }, { "epoch": 67.82, "learning_rate": 1.6102770991570953e-05, "loss": 1.8875, "step": 23429500 }, { "epoch": 67.82, "learning_rate": 1.6102048791218972e-05, "loss": 1.8952, "step": 23430000 }, { "epoch": 67.82, "learning_rate": 1.6101325143571694e-05, "loss": 1.9221, "step": 23430500 }, { "epoch": 67.82, "learning_rate": 1.6100601495924417e-05, "loss": 1.9138, "step": 23431000 }, { "epoch": 67.82, "learning_rate": 1.609987784827714e-05, "loss": 1.8857, "step": 23431500 }, { "epoch": 67.83, "learning_rate": 1.609915420062986e-05, "loss": 1.8918, "step": 23432000 }, { "epoch": 67.83, "learning_rate": 1.6098430552982587e-05, "loss": 1.8947, "step": 23432500 }, { "epoch": 67.83, "learning_rate": 1.6097706905335312e-05, "loss": 1.907, "step": 23433000 }, { "epoch": 67.83, "learning_rate": 1.6096983257688035e-05, "loss": 1.923, "step": 23433500 }, { "epoch": 67.83, "learning_rate": 1.6096259610040757e-05, "loss": 1.9176, "step": 23434000 }, { "epoch": 67.83, "learning_rate": 1.6095537409688772e-05, "loss": 1.9053, "step": 23434500 }, { "epoch": 67.83, "learning_rate": 1.6094813762041498e-05, "loss": 1.912, "step": 23435000 }, { "epoch": 67.84, "learning_rate": 1.609409011439422e-05, "loss": 1.9055, "step": 23435500 }, { "epoch": 67.84, "learning_rate": 1.6093366466746946e-05, "loss": 1.914, "step": 23436000 }, { "epoch": 67.84, "learning_rate": 1.609264426639496e-05, "loss": 1.9024, "step": 23436500 }, { "epoch": 67.84, "learning_rate": 1.6091920618747687e-05, "loss": 1.8836, "step": 23437000 }, { "epoch": 67.84, "learning_rate": 1.609119697110041e-05, "loss": 1.9139, "step": 23437500 }, { "epoch": 67.84, "learning_rate": 1.609047332345313e-05, "loss": 1.934, "step": 23438000 }, { "epoch": 67.84, "learning_rate": 1.6089749675805854e-05, "loss": 1.8957, "step": 23438500 }, { "epoch": 67.85, "learning_rate": 1.6089027475453873e-05, "loss": 1.9042, "step": 23439000 }, { "epoch": 67.85, "learning_rate": 1.6088303827806595e-05, "loss": 1.8881, "step": 23439500 }, { "epoch": 67.85, "learning_rate": 1.608758018015932e-05, "loss": 1.9051, "step": 23440000 }, { "epoch": 67.85, "learning_rate": 1.6086856532512043e-05, "loss": 1.919, "step": 23440500 }, { "epoch": 67.85, "learning_rate": 1.6086132884864765e-05, "loss": 1.9177, "step": 23441000 }, { "epoch": 67.85, "learning_rate": 1.6085409237217487e-05, "loss": 1.9097, "step": 23441500 }, { "epoch": 67.85, "learning_rate": 1.6084685589570213e-05, "loss": 1.9044, "step": 23442000 }, { "epoch": 67.86, "learning_rate": 1.6083961941922935e-05, "loss": 1.8984, "step": 23442500 }, { "epoch": 67.86, "learning_rate": 1.6083238294275657e-05, "loss": 1.9004, "step": 23443000 }, { "epoch": 67.86, "learning_rate": 1.6082514646628383e-05, "loss": 1.9036, "step": 23443500 }, { "epoch": 67.86, "learning_rate": 1.6081792446276402e-05, "loss": 1.9401, "step": 23444000 }, { "epoch": 67.86, "learning_rate": 1.6081068798629124e-05, "loss": 1.9053, "step": 23444500 }, { "epoch": 67.86, "learning_rate": 1.6080345150981846e-05, "loss": 1.9345, "step": 23445000 }, { "epoch": 67.87, "learning_rate": 1.607962150333457e-05, "loss": 1.8881, "step": 23445500 }, { "epoch": 67.87, "learning_rate": 1.607889785568729e-05, "loss": 1.9132, "step": 23446000 }, { "epoch": 67.87, "learning_rate": 1.6078177102630603e-05, "loss": 1.9166, "step": 23446500 }, { "epoch": 67.87, "learning_rate": 1.6077454902278622e-05, "loss": 1.9078, "step": 23447000 }, { "epoch": 67.87, "learning_rate": 1.6076731254631348e-05, "loss": 1.8948, "step": 23447500 }, { "epoch": 67.87, "learning_rate": 1.607600760698407e-05, "loss": 1.8831, "step": 23448000 }, { "epoch": 67.87, "learning_rate": 1.6075283959336792e-05, "loss": 1.9076, "step": 23448500 }, { "epoch": 67.88, "learning_rate": 1.6074560311689514e-05, "loss": 1.893, "step": 23449000 }, { "epoch": 67.88, "learning_rate": 1.6073836664042237e-05, "loss": 1.9038, "step": 23449500 }, { "epoch": 67.88, "learning_rate": 1.6073114463690256e-05, "loss": 1.914, "step": 23450000 }, { "epoch": 67.88, "learning_rate": 1.6072390816042978e-05, "loss": 1.8788, "step": 23450500 }, { "epoch": 67.88, "learning_rate": 1.60716671683957e-05, "loss": 1.9144, "step": 23451000 }, { "epoch": 67.88, "learning_rate": 1.6070943520748426e-05, "loss": 1.8802, "step": 23451500 }, { "epoch": 67.88, "learning_rate": 1.607021987310115e-05, "loss": 1.9155, "step": 23452000 }, { "epoch": 67.89, "learning_rate": 1.6069497672749167e-05, "loss": 1.9232, "step": 23452500 }, { "epoch": 67.89, "learning_rate": 1.606877402510189e-05, "loss": 1.9105, "step": 23453000 }, { "epoch": 67.89, "learning_rate": 1.6068050377454615e-05, "loss": 1.923, "step": 23453500 }, { "epoch": 67.89, "learning_rate": 1.6067326729807337e-05, "loss": 1.8979, "step": 23454000 }, { "epoch": 67.89, "learning_rate": 1.6066604529455352e-05, "loss": 1.9177, "step": 23454500 }, { "epoch": 67.89, "learning_rate": 1.6065880881808078e-05, "loss": 1.9091, "step": 23455000 }, { "epoch": 67.89, "learning_rate": 1.60651572341608e-05, "loss": 1.9006, "step": 23455500 }, { "epoch": 67.9, "learning_rate": 1.6064433586513526e-05, "loss": 1.9079, "step": 23456000 }, { "epoch": 67.9, "learning_rate": 1.6063709938866248e-05, "loss": 1.9194, "step": 23456500 }, { "epoch": 67.9, "learning_rate": 1.606298629121897e-05, "loss": 1.8939, "step": 23457000 }, { "epoch": 67.9, "learning_rate": 1.6062262643571693e-05, "loss": 1.9154, "step": 23457500 }, { "epoch": 67.9, "learning_rate": 1.6061538995924415e-05, "loss": 1.8906, "step": 23458000 }, { "epoch": 67.9, "learning_rate": 1.606081534827714e-05, "loss": 1.9108, "step": 23458500 }, { "epoch": 67.9, "learning_rate": 1.6060091700629866e-05, "loss": 1.8901, "step": 23459000 }, { "epoch": 67.91, "learning_rate": 1.605936805298259e-05, "loss": 1.9274, "step": 23459500 }, { "epoch": 67.91, "learning_rate": 1.605864440533531e-05, "loss": 1.924, "step": 23460000 }, { "epoch": 67.91, "learning_rate": 1.6057922204983326e-05, "loss": 1.8973, "step": 23460500 }, { "epoch": 67.91, "learning_rate": 1.6057198557336052e-05, "loss": 1.8902, "step": 23461000 }, { "epoch": 67.91, "learning_rate": 1.6056474909688774e-05, "loss": 1.9092, "step": 23461500 }, { "epoch": 67.91, "learning_rate": 1.6055751262041496e-05, "loss": 1.8859, "step": 23462000 }, { "epoch": 67.91, "learning_rate": 1.6055027614394222e-05, "loss": 1.8882, "step": 23462500 }, { "epoch": 67.92, "learning_rate": 1.6054303966746944e-05, "loss": 1.9159, "step": 23463000 }, { "epoch": 67.92, "learning_rate": 1.6053580319099666e-05, "loss": 1.9035, "step": 23463500 }, { "epoch": 67.92, "learning_rate": 1.6052856671452392e-05, "loss": 1.9078, "step": 23464000 }, { "epoch": 67.92, "learning_rate": 1.6052133023805114e-05, "loss": 1.8953, "step": 23464500 }, { "epoch": 67.92, "learning_rate": 1.605141082345313e-05, "loss": 1.9151, "step": 23465000 }, { "epoch": 67.92, "learning_rate": 1.605068862310115e-05, "loss": 1.9098, "step": 23465500 }, { "epoch": 67.92, "learning_rate": 1.6049964975453874e-05, "loss": 1.9194, "step": 23466000 }, { "epoch": 67.93, "learning_rate": 1.6049241327806597e-05, "loss": 1.9244, "step": 23466500 }, { "epoch": 67.93, "learning_rate": 1.604851768015932e-05, "loss": 1.9171, "step": 23467000 }, { "epoch": 67.93, "learning_rate": 1.604779403251204e-05, "loss": 1.9259, "step": 23467500 }, { "epoch": 67.93, "learning_rate": 1.6047070384864767e-05, "loss": 1.9119, "step": 23468000 }, { "epoch": 67.93, "learning_rate": 1.604634673721749e-05, "loss": 1.9275, "step": 23468500 }, { "epoch": 67.93, "learning_rate": 1.6045624536865504e-05, "loss": 1.9234, "step": 23469000 }, { "epoch": 67.93, "learning_rate": 1.604490088921823e-05, "loss": 1.9315, "step": 23469500 }, { "epoch": 67.94, "learning_rate": 1.6044177241570956e-05, "loss": 1.8921, "step": 23470000 }, { "epoch": 67.94, "learning_rate": 1.6043453593923678e-05, "loss": 1.9327, "step": 23470500 }, { "epoch": 67.94, "learning_rate": 1.6042731393571693e-05, "loss": 1.9086, "step": 23471000 }, { "epoch": 67.94, "learning_rate": 1.6042007745924416e-05, "loss": 1.8858, "step": 23471500 }, { "epoch": 67.94, "learning_rate": 1.604128409827714e-05, "loss": 1.9301, "step": 23472000 }, { "epoch": 67.94, "learning_rate": 1.6040560450629864e-05, "loss": 1.8893, "step": 23472500 }, { "epoch": 67.94, "learning_rate": 1.6039836802982586e-05, "loss": 1.8902, "step": 23473000 }, { "epoch": 67.95, "learning_rate": 1.603911315533531e-05, "loss": 1.8756, "step": 23473500 }, { "epoch": 67.95, "learning_rate": 1.6038389507688034e-05, "loss": 1.9211, "step": 23474000 }, { "epoch": 67.95, "learning_rate": 1.6037665860040756e-05, "loss": 1.9046, "step": 23474500 }, { "epoch": 67.95, "learning_rate": 1.603694221239348e-05, "loss": 1.9216, "step": 23475000 }, { "epoch": 67.95, "learning_rate": 1.6036218564746204e-05, "loss": 1.8977, "step": 23475500 }, { "epoch": 67.95, "learning_rate": 1.603549636439422e-05, "loss": 1.8978, "step": 23476000 }, { "epoch": 67.95, "learning_rate": 1.603477271674694e-05, "loss": 1.9077, "step": 23476500 }, { "epoch": 67.96, "learning_rate": 1.6034049069099667e-05, "loss": 1.9325, "step": 23477000 }, { "epoch": 67.96, "learning_rate": 1.6033325421452393e-05, "loss": 1.9417, "step": 23477500 }, { "epoch": 67.96, "learning_rate": 1.6032601773805115e-05, "loss": 1.8842, "step": 23478000 }, { "epoch": 67.96, "learning_rate": 1.603187957345313e-05, "loss": 1.9181, "step": 23478500 }, { "epoch": 67.96, "learning_rate": 1.6031155925805856e-05, "loss": 1.9115, "step": 23479000 }, { "epoch": 67.96, "learning_rate": 1.603043227815858e-05, "loss": 1.9225, "step": 23479500 }, { "epoch": 67.96, "learning_rate": 1.60297086305113e-05, "loss": 1.9027, "step": 23480000 }, { "epoch": 67.97, "learning_rate": 1.6028984982864023e-05, "loss": 1.9133, "step": 23480500 }, { "epoch": 67.97, "learning_rate": 1.602826133521675e-05, "loss": 1.8877, "step": 23481000 }, { "epoch": 67.97, "learning_rate": 1.602753768756947e-05, "loss": 1.8992, "step": 23481500 }, { "epoch": 67.97, "learning_rate": 1.6026814039922193e-05, "loss": 1.8952, "step": 23482000 }, { "epoch": 67.97, "learning_rate": 1.602609039227492e-05, "loss": 1.9124, "step": 23482500 }, { "epoch": 67.97, "learning_rate": 1.602536674462764e-05, "loss": 1.9175, "step": 23483000 }, { "epoch": 67.98, "learning_rate": 1.6024643096980363e-05, "loss": 1.913, "step": 23483500 }, { "epoch": 67.98, "learning_rate": 1.6023920896628382e-05, "loss": 1.8947, "step": 23484000 }, { "epoch": 67.98, "learning_rate": 1.6023197248981108e-05, "loss": 1.896, "step": 23484500 }, { "epoch": 67.98, "learning_rate": 1.602247360133383e-05, "loss": 1.9084, "step": 23485000 }, { "epoch": 67.98, "learning_rate": 1.6021749953686552e-05, "loss": 1.9089, "step": 23485500 }, { "epoch": 67.98, "learning_rate": 1.6021026306039274e-05, "loss": 1.9046, "step": 23486000 }, { "epoch": 67.98, "learning_rate": 1.6020304105687293e-05, "loss": 1.9139, "step": 23486500 }, { "epoch": 67.99, "learning_rate": 1.6019580458040015e-05, "loss": 1.8961, "step": 23487000 }, { "epoch": 67.99, "learning_rate": 1.6018856810392738e-05, "loss": 1.9219, "step": 23487500 }, { "epoch": 67.99, "learning_rate": 1.6018133162745463e-05, "loss": 1.904, "step": 23488000 }, { "epoch": 67.99, "learning_rate": 1.6017410962393482e-05, "loss": 1.8957, "step": 23488500 }, { "epoch": 67.99, "learning_rate": 1.6016687314746205e-05, "loss": 1.8956, "step": 23489000 }, { "epoch": 67.99, "learning_rate": 1.6015963667098927e-05, "loss": 1.9031, "step": 23489500 }, { "epoch": 67.99, "learning_rate": 1.601524001945165e-05, "loss": 1.9046, "step": 23490000 }, { "epoch": 68.0, "learning_rate": 1.601451637180437e-05, "loss": 1.9052, "step": 23490500 }, { "epoch": 68.0, "learning_rate": 1.6013792724157093e-05, "loss": 1.9017, "step": 23491000 }, { "epoch": 68.0, "learning_rate": 1.601306907650982e-05, "loss": 1.9135, "step": 23491500 }, { "epoch": 68.0, "learning_rate": 1.6012345428862545e-05, "loss": 1.8981, "step": 23492000 }, { "epoch": 68.0, "eval_accuracy": 0.6819847140146029, "eval_accuracy_mlm": 0.6495766171259548, "eval_accuracy_nsp": 0.8557263523577749, "eval_loss": 2.1553232669830322, "eval_runtime": 331.691, "eval_samples_per_second": 1315.64, "eval_steps_per_second": 54.819, "step": 23492096 }, { "epoch": 68.0, "learning_rate": 1.6011621781215267e-05, "loss": 1.9085, "step": 23492500 }, { "epoch": 68.0, "learning_rate": 1.601089813356799e-05, "loss": 1.9069, "step": 23493000 }, { "epoch": 68.0, "learning_rate": 1.6010175933216008e-05, "loss": 1.8805, "step": 23493500 }, { "epoch": 68.01, "learning_rate": 1.600945228556873e-05, "loss": 1.8799, "step": 23494000 }, { "epoch": 68.01, "learning_rate": 1.6008728637921453e-05, "loss": 1.8973, "step": 23494500 }, { "epoch": 68.01, "learning_rate": 1.6008004990274175e-05, "loss": 1.8753, "step": 23495000 }, { "epoch": 68.01, "learning_rate": 1.60072813426269e-05, "loss": 1.8987, "step": 23495500 }, { "epoch": 68.01, "learning_rate": 1.6006557694979623e-05, "loss": 1.8959, "step": 23496000 }, { "epoch": 68.01, "learning_rate": 1.600583549462764e-05, "loss": 1.906, "step": 23496500 }, { "epoch": 68.01, "learning_rate": 1.6005111846980364e-05, "loss": 1.8724, "step": 23497000 }, { "epoch": 68.02, "learning_rate": 1.6004388199333086e-05, "loss": 1.8978, "step": 23497500 }, { "epoch": 68.02, "learning_rate": 1.6003665998981105e-05, "loss": 1.87, "step": 23498000 }, { "epoch": 68.02, "learning_rate": 1.6002942351333827e-05, "loss": 1.9131, "step": 23498500 }, { "epoch": 68.02, "learning_rate": 1.600221870368655e-05, "loss": 1.9028, "step": 23499000 }, { "epoch": 68.02, "learning_rate": 1.6001495056039275e-05, "loss": 1.8764, "step": 23499500 }, { "epoch": 68.02, "learning_rate": 1.6000771408391997e-05, "loss": 1.8746, "step": 23500000 }, { "epoch": 68.02, "learning_rate": 1.6000047760744723e-05, "loss": 1.8906, "step": 23500500 }, { "epoch": 68.03, "learning_rate": 1.5999324113097445e-05, "loss": 1.9123, "step": 23501000 }, { "epoch": 68.03, "learning_rate": 1.5998600465450167e-05, "loss": 1.9089, "step": 23501500 }, { "epoch": 68.03, "learning_rate": 1.599787681780289e-05, "loss": 1.9073, "step": 23502000 }, { "epoch": 68.03, "learning_rate": 1.5997153170155612e-05, "loss": 1.8743, "step": 23502500 }, { "epoch": 68.03, "learning_rate": 1.5996429522508338e-05, "loss": 1.8994, "step": 23503000 }, { "epoch": 68.03, "learning_rate": 1.599570587486106e-05, "loss": 1.8844, "step": 23503500 }, { "epoch": 68.03, "learning_rate": 1.5994982227213785e-05, "loss": 1.9179, "step": 23504000 }, { "epoch": 68.04, "learning_rate": 1.5994261474157098e-05, "loss": 1.8872, "step": 23504500 }, { "epoch": 68.04, "learning_rate": 1.599353782650982e-05, "loss": 1.9034, "step": 23505000 }, { "epoch": 68.04, "learning_rate": 1.5992814178862542e-05, "loss": 1.8874, "step": 23505500 }, { "epoch": 68.04, "learning_rate": 1.599209197851056e-05, "loss": 1.8802, "step": 23506000 }, { "epoch": 68.04, "learning_rate": 1.5991368330863283e-05, "loss": 1.9176, "step": 23506500 }, { "epoch": 68.04, "learning_rate": 1.599064468321601e-05, "loss": 1.8811, "step": 23507000 }, { "epoch": 68.04, "learning_rate": 1.598992103556873e-05, "loss": 1.8932, "step": 23507500 }, { "epoch": 68.05, "learning_rate": 1.5989197387921453e-05, "loss": 1.905, "step": 23508000 }, { "epoch": 68.05, "learning_rate": 1.5988475187569472e-05, "loss": 1.8843, "step": 23508500 }, { "epoch": 68.05, "learning_rate": 1.5987751539922195e-05, "loss": 1.9031, "step": 23509000 }, { "epoch": 68.05, "learning_rate": 1.5987027892274917e-05, "loss": 1.9187, "step": 23509500 }, { "epoch": 68.05, "learning_rate": 1.598630424462764e-05, "loss": 1.8949, "step": 23510000 }, { "epoch": 68.05, "learning_rate": 1.5985580596980365e-05, "loss": 1.8945, "step": 23510500 }, { "epoch": 68.05, "learning_rate": 1.5984856949333087e-05, "loss": 1.9056, "step": 23511000 }, { "epoch": 68.06, "learning_rate": 1.5984133301685812e-05, "loss": 1.9239, "step": 23511500 }, { "epoch": 68.06, "learning_rate": 1.5983409654038535e-05, "loss": 1.9138, "step": 23512000 }, { "epoch": 68.06, "learning_rate": 1.598268745368655e-05, "loss": 1.8937, "step": 23512500 }, { "epoch": 68.06, "learning_rate": 1.5981963806039273e-05, "loss": 1.9014, "step": 23513000 }, { "epoch": 68.06, "learning_rate": 1.5981240158391998e-05, "loss": 1.8784, "step": 23513500 }, { "epoch": 68.06, "learning_rate": 1.598051651074472e-05, "loss": 1.9006, "step": 23514000 }, { "epoch": 68.06, "learning_rate": 1.5979792863097446e-05, "loss": 1.8787, "step": 23514500 }, { "epoch": 68.07, "learning_rate": 1.597907066274546e-05, "loss": 1.8904, "step": 23515000 }, { "epoch": 68.07, "learning_rate": 1.597834846239348e-05, "loss": 1.8746, "step": 23515500 }, { "epoch": 68.07, "learning_rate": 1.5977624814746203e-05, "loss": 1.8877, "step": 23516000 }, { "epoch": 68.07, "learning_rate": 1.5976901167098925e-05, "loss": 1.8935, "step": 23516500 }, { "epoch": 68.07, "learning_rate": 1.5976177519451647e-05, "loss": 1.8775, "step": 23517000 }, { "epoch": 68.07, "learning_rate": 1.5975453871804373e-05, "loss": 1.8768, "step": 23517500 }, { "epoch": 68.07, "learning_rate": 1.597473167145239e-05, "loss": 1.88, "step": 23518000 }, { "epoch": 68.08, "learning_rate": 1.5974008023805114e-05, "loss": 1.9024, "step": 23518500 }, { "epoch": 68.08, "learning_rate": 1.5973285823453133e-05, "loss": 1.8928, "step": 23519000 }, { "epoch": 68.08, "learning_rate": 1.5972562175805855e-05, "loss": 1.9056, "step": 23519500 }, { "epoch": 68.08, "learning_rate": 1.5971838528158577e-05, "loss": 1.9061, "step": 23520000 }, { "epoch": 68.08, "learning_rate": 1.59711148805113e-05, "loss": 1.8782, "step": 23520500 }, { "epoch": 68.08, "learning_rate": 1.5970391232864025e-05, "loss": 1.8993, "step": 23521000 }, { "epoch": 68.09, "learning_rate": 1.5969667585216747e-05, "loss": 1.9025, "step": 23521500 }, { "epoch": 68.09, "learning_rate": 1.5968943937569473e-05, "loss": 1.8844, "step": 23522000 }, { "epoch": 68.09, "learning_rate": 1.5968220289922195e-05, "loss": 1.9004, "step": 23522500 }, { "epoch": 68.09, "learning_rate": 1.5967496642274918e-05, "loss": 1.871, "step": 23523000 }, { "epoch": 68.09, "learning_rate": 1.596677299462764e-05, "loss": 1.8883, "step": 23523500 }, { "epoch": 68.09, "learning_rate": 1.596605079427566e-05, "loss": 1.9001, "step": 23524000 }, { "epoch": 68.09, "learning_rate": 1.596532714662838e-05, "loss": 1.9049, "step": 23524500 }, { "epoch": 68.1, "learning_rate": 1.5964603498981103e-05, "loss": 1.8902, "step": 23525000 }, { "epoch": 68.1, "learning_rate": 1.596387985133383e-05, "loss": 1.916, "step": 23525500 }, { "epoch": 68.1, "learning_rate": 1.596315620368655e-05, "loss": 1.8838, "step": 23526000 }, { "epoch": 68.1, "learning_rate": 1.5962432556039277e-05, "loss": 1.8829, "step": 23526500 }, { "epoch": 68.1, "learning_rate": 1.5961710355687292e-05, "loss": 1.9042, "step": 23527000 }, { "epoch": 68.1, "learning_rate": 1.5960986708040014e-05, "loss": 1.8659, "step": 23527500 }, { "epoch": 68.1, "learning_rate": 1.5960263060392737e-05, "loss": 1.8961, "step": 23528000 }, { "epoch": 68.11, "learning_rate": 1.5959539412745462e-05, "loss": 1.891, "step": 23528500 }, { "epoch": 68.11, "learning_rate": 1.5958815765098185e-05, "loss": 1.8782, "step": 23529000 }, { "epoch": 68.11, "learning_rate": 1.595809211745091e-05, "loss": 1.8967, "step": 23529500 }, { "epoch": 68.11, "learning_rate": 1.5957368469803632e-05, "loss": 1.8914, "step": 23530000 }, { "epoch": 68.11, "learning_rate": 1.5956644822156355e-05, "loss": 1.9021, "step": 23530500 }, { "epoch": 68.11, "learning_rate": 1.5955921174509077e-05, "loss": 1.87, "step": 23531000 }, { "epoch": 68.11, "learning_rate": 1.5955197526861803e-05, "loss": 1.9091, "step": 23531500 }, { "epoch": 68.12, "learning_rate": 1.5954473879214525e-05, "loss": 1.8735, "step": 23532000 }, { "epoch": 68.12, "learning_rate": 1.595375167886254e-05, "loss": 1.8883, "step": 23532500 }, { "epoch": 68.12, "learning_rate": 1.5953028031215266e-05, "loss": 1.8865, "step": 23533000 }, { "epoch": 68.12, "learning_rate": 1.5952304383567988e-05, "loss": 1.8808, "step": 23533500 }, { "epoch": 68.12, "learning_rate": 1.5951582183216007e-05, "loss": 1.8807, "step": 23534000 }, { "epoch": 68.12, "learning_rate": 1.595085853556873e-05, "loss": 1.9035, "step": 23534500 }, { "epoch": 68.12, "learning_rate": 1.595013488792145e-05, "loss": 1.9001, "step": 23535000 }, { "epoch": 68.13, "learning_rate": 1.5949411240274177e-05, "loss": 1.8778, "step": 23535500 }, { "epoch": 68.13, "learning_rate": 1.59486875926269e-05, "loss": 1.8885, "step": 23536000 }, { "epoch": 68.13, "learning_rate": 1.5947963944979625e-05, "loss": 1.8963, "step": 23536500 }, { "epoch": 68.13, "learning_rate": 1.5947240297332347e-05, "loss": 1.9056, "step": 23537000 }, { "epoch": 68.13, "learning_rate": 1.5946518096980366e-05, "loss": 1.894, "step": 23537500 }, { "epoch": 68.13, "learning_rate": 1.594579444933309e-05, "loss": 1.9016, "step": 23538000 }, { "epoch": 68.13, "learning_rate": 1.5945072248981104e-05, "loss": 1.9007, "step": 23538500 }, { "epoch": 68.14, "learning_rate": 1.5944348601333826e-05, "loss": 1.9129, "step": 23539000 }, { "epoch": 68.14, "learning_rate": 1.5943624953686552e-05, "loss": 1.8823, "step": 23539500 }, { "epoch": 68.14, "learning_rate": 1.5942901306039274e-05, "loss": 1.8959, "step": 23540000 }, { "epoch": 68.14, "learning_rate": 1.5942177658392e-05, "loss": 1.9031, "step": 23540500 }, { "epoch": 68.14, "learning_rate": 1.5941454010744722e-05, "loss": 1.8776, "step": 23541000 }, { "epoch": 68.14, "learning_rate": 1.5940730363097444e-05, "loss": 1.9161, "step": 23541500 }, { "epoch": 68.14, "learning_rate": 1.5940006715450166e-05, "loss": 1.8806, "step": 23542000 }, { "epoch": 68.15, "learning_rate": 1.5939283067802892e-05, "loss": 1.8845, "step": 23542500 }, { "epoch": 68.15, "learning_rate": 1.5938559420155614e-05, "loss": 1.876, "step": 23543000 }, { "epoch": 68.15, "learning_rate": 1.593783721980363e-05, "loss": 1.9112, "step": 23543500 }, { "epoch": 68.15, "learning_rate": 1.5937113572156352e-05, "loss": 1.909, "step": 23544000 }, { "epoch": 68.15, "learning_rate": 1.5936389924509078e-05, "loss": 1.909, "step": 23544500 }, { "epoch": 68.15, "learning_rate": 1.5935666276861803e-05, "loss": 1.9121, "step": 23545000 }, { "epoch": 68.15, "learning_rate": 1.5934942629214526e-05, "loss": 1.8897, "step": 23545500 }, { "epoch": 68.16, "learning_rate": 1.5934218981567248e-05, "loss": 1.8896, "step": 23546000 }, { "epoch": 68.16, "learning_rate": 1.593349533391997e-05, "loss": 1.8941, "step": 23546500 }, { "epoch": 68.16, "learning_rate": 1.5932771686272692e-05, "loss": 1.8903, "step": 23547000 }, { "epoch": 68.16, "learning_rate": 1.5932048038625418e-05, "loss": 1.9057, "step": 23547500 }, { "epoch": 68.16, "learning_rate": 1.593132439097814e-05, "loss": 1.9015, "step": 23548000 }, { "epoch": 68.16, "learning_rate": 1.593060219062616e-05, "loss": 1.8744, "step": 23548500 }, { "epoch": 68.16, "learning_rate": 1.592987854297888e-05, "loss": 1.8954, "step": 23549000 }, { "epoch": 68.17, "learning_rate": 1.5929154895331604e-05, "loss": 1.8929, "step": 23549500 }, { "epoch": 68.17, "learning_rate": 1.592843124768433e-05, "loss": 1.9052, "step": 23550000 }, { "epoch": 68.17, "learning_rate": 1.592770760003705e-05, "loss": 1.887, "step": 23550500 }, { "epoch": 68.17, "learning_rate": 1.5926983952389774e-05, "loss": 1.8846, "step": 23551000 }, { "epoch": 68.17, "learning_rate": 1.59262603047425e-05, "loss": 1.9205, "step": 23551500 }, { "epoch": 68.17, "learning_rate": 1.592553665709522e-05, "loss": 1.9054, "step": 23552000 }, { "epoch": 68.17, "learning_rate": 1.5924813009447944e-05, "loss": 1.9189, "step": 23552500 }, { "epoch": 68.18, "learning_rate": 1.5924090809095963e-05, "loss": 1.9199, "step": 23553000 }, { "epoch": 68.18, "learning_rate": 1.5923367161448685e-05, "loss": 1.8914, "step": 23553500 }, { "epoch": 68.18, "learning_rate": 1.5922643513801407e-05, "loss": 1.9158, "step": 23554000 }, { "epoch": 68.18, "learning_rate": 1.592191986615413e-05, "loss": 1.8723, "step": 23554500 }, { "epoch": 68.18, "learning_rate": 1.5921196218506855e-05, "loss": 1.903, "step": 23555000 }, { "epoch": 68.18, "learning_rate": 1.5920474018154874e-05, "loss": 1.9165, "step": 23555500 }, { "epoch": 68.18, "learning_rate": 1.5919751817802893e-05, "loss": 1.9133, "step": 23556000 }, { "epoch": 68.19, "learning_rate": 1.5919028170155615e-05, "loss": 1.9091, "step": 23556500 }, { "epoch": 68.19, "learning_rate": 1.5918304522508337e-05, "loss": 1.8677, "step": 23557000 }, { "epoch": 68.19, "learning_rate": 1.5917582322156356e-05, "loss": 1.9186, "step": 23557500 }, { "epoch": 68.19, "learning_rate": 1.591685867450908e-05, "loss": 1.8929, "step": 23558000 }, { "epoch": 68.19, "learning_rate": 1.59161350268618e-05, "loss": 1.8902, "step": 23558500 }, { "epoch": 68.19, "learning_rate": 1.5915411379214526e-05, "loss": 1.8757, "step": 23559000 }, { "epoch": 68.2, "learning_rate": 1.591468773156725e-05, "loss": 1.9202, "step": 23559500 }, { "epoch": 68.2, "learning_rate": 1.591396408391997e-05, "loss": 1.9048, "step": 23560000 }, { "epoch": 68.2, "learning_rate": 1.5913240436272693e-05, "loss": 1.9045, "step": 23560500 }, { "epoch": 68.2, "learning_rate": 1.5912518235920712e-05, "loss": 1.8877, "step": 23561000 }, { "epoch": 68.2, "learning_rate": 1.5911794588273434e-05, "loss": 1.9289, "step": 23561500 }, { "epoch": 68.2, "learning_rate": 1.5911070940626156e-05, "loss": 1.9018, "step": 23562000 }, { "epoch": 68.2, "learning_rate": 1.5910347292978882e-05, "loss": 1.899, "step": 23562500 }, { "epoch": 68.21, "learning_rate": 1.5909623645331608e-05, "loss": 1.8986, "step": 23563000 }, { "epoch": 68.21, "learning_rate": 1.590889999768433e-05, "loss": 1.9041, "step": 23563500 }, { "epoch": 68.21, "learning_rate": 1.5908176350037052e-05, "loss": 1.9037, "step": 23564000 }, { "epoch": 68.21, "learning_rate": 1.5907452702389774e-05, "loss": 1.9237, "step": 23564500 }, { "epoch": 68.21, "learning_rate": 1.5906729054742497e-05, "loss": 1.8942, "step": 23565000 }, { "epoch": 68.21, "learning_rate": 1.590600540709522e-05, "loss": 1.889, "step": 23565500 }, { "epoch": 68.21, "learning_rate": 1.5905281759447944e-05, "loss": 1.882, "step": 23566000 }, { "epoch": 68.22, "learning_rate": 1.590455811180067e-05, "loss": 1.8912, "step": 23566500 }, { "epoch": 68.22, "learning_rate": 1.5903837358743982e-05, "loss": 1.9059, "step": 23567000 }, { "epoch": 68.22, "learning_rate": 1.5903113711096705e-05, "loss": 1.8955, "step": 23567500 }, { "epoch": 68.22, "learning_rate": 1.5902390063449427e-05, "loss": 1.927, "step": 23568000 }, { "epoch": 68.22, "learning_rate": 1.590166641580215e-05, "loss": 1.8996, "step": 23568500 }, { "epoch": 68.22, "learning_rate": 1.590094276815487e-05, "loss": 1.8792, "step": 23569000 }, { "epoch": 68.22, "learning_rate": 1.590022056780289e-05, "loss": 1.9179, "step": 23569500 }, { "epoch": 68.23, "learning_rate": 1.5899496920155612e-05, "loss": 1.8969, "step": 23570000 }, { "epoch": 68.23, "learning_rate": 1.5898773272508338e-05, "loss": 1.9089, "step": 23570500 }, { "epoch": 68.23, "learning_rate": 1.589804962486106e-05, "loss": 1.8919, "step": 23571000 }, { "epoch": 68.23, "learning_rate": 1.5897325977213783e-05, "loss": 1.8783, "step": 23571500 }, { "epoch": 68.23, "learning_rate": 1.58966037768618e-05, "loss": 1.899, "step": 23572000 }, { "epoch": 68.23, "learning_rate": 1.5895880129214524e-05, "loss": 1.8815, "step": 23572500 }, { "epoch": 68.23, "learning_rate": 1.5895156481567246e-05, "loss": 1.8757, "step": 23573000 }, { "epoch": 68.24, "learning_rate": 1.589443283391997e-05, "loss": 1.9276, "step": 23573500 }, { "epoch": 68.24, "learning_rate": 1.5893709186272697e-05, "loss": 1.9119, "step": 23574000 }, { "epoch": 68.24, "learning_rate": 1.589298553862542e-05, "loss": 1.8671, "step": 23574500 }, { "epoch": 68.24, "learning_rate": 1.589226189097814e-05, "loss": 1.8986, "step": 23575000 }, { "epoch": 68.24, "learning_rate": 1.5891538243330864e-05, "loss": 1.8656, "step": 23575500 }, { "epoch": 68.24, "learning_rate": 1.5890814595683586e-05, "loss": 1.8753, "step": 23576000 }, { "epoch": 68.24, "learning_rate": 1.5890092395331605e-05, "loss": 1.9065, "step": 23576500 }, { "epoch": 68.25, "learning_rate": 1.5889368747684327e-05, "loss": 1.8968, "step": 23577000 }, { "epoch": 68.25, "learning_rate": 1.5888645100037053e-05, "loss": 1.8829, "step": 23577500 }, { "epoch": 68.25, "learning_rate": 1.5887921452389775e-05, "loss": 1.8795, "step": 23578000 }, { "epoch": 68.25, "learning_rate": 1.5887199252037794e-05, "loss": 1.9057, "step": 23578500 }, { "epoch": 68.25, "learning_rate": 1.5886475604390516e-05, "loss": 1.8917, "step": 23579000 }, { "epoch": 68.25, "learning_rate": 1.588575195674324e-05, "loss": 1.8884, "step": 23579500 }, { "epoch": 68.25, "learning_rate": 1.5885029756391258e-05, "loss": 1.9101, "step": 23580000 }, { "epoch": 68.26, "learning_rate": 1.588430610874398e-05, "loss": 1.8851, "step": 23580500 }, { "epoch": 68.26, "learning_rate": 1.5883582461096702e-05, "loss": 1.9191, "step": 23581000 }, { "epoch": 68.26, "learning_rate": 1.5882858813449428e-05, "loss": 1.9019, "step": 23581500 }, { "epoch": 68.26, "learning_rate": 1.588213516580215e-05, "loss": 1.882, "step": 23582000 }, { "epoch": 68.26, "learning_rate": 1.5881411518154872e-05, "loss": 1.8893, "step": 23582500 }, { "epoch": 68.26, "learning_rate": 1.5880687870507598e-05, "loss": 1.9109, "step": 23583000 }, { "epoch": 68.26, "learning_rate": 1.587996422286032e-05, "loss": 1.8782, "step": 23583500 }, { "epoch": 68.27, "learning_rate": 1.5879240575213042e-05, "loss": 1.8933, "step": 23584000 }, { "epoch": 68.27, "learning_rate": 1.5878516927565764e-05, "loss": 1.8799, "step": 23584500 }, { "epoch": 68.27, "learning_rate": 1.587779327991849e-05, "loss": 1.9055, "step": 23585000 }, { "epoch": 68.27, "learning_rate": 1.5877069632271212e-05, "loss": 1.9136, "step": 23585500 }, { "epoch": 68.27, "learning_rate": 1.587634743191923e-05, "loss": 1.8594, "step": 23586000 }, { "epoch": 68.27, "learning_rate": 1.5875623784271953e-05, "loss": 1.8989, "step": 23586500 }, { "epoch": 68.27, "learning_rate": 1.5874900136624676e-05, "loss": 1.9075, "step": 23587000 }, { "epoch": 68.28, "learning_rate": 1.5874176488977398e-05, "loss": 1.8579, "step": 23587500 }, { "epoch": 68.28, "learning_rate": 1.5873452841330124e-05, "loss": 1.8949, "step": 23588000 }, { "epoch": 68.28, "learning_rate": 1.587272919368285e-05, "loss": 1.9007, "step": 23588500 }, { "epoch": 68.28, "learning_rate": 1.587200554603557e-05, "loss": 1.9059, "step": 23589000 }, { "epoch": 68.28, "learning_rate": 1.5871281898388294e-05, "loss": 1.8757, "step": 23589500 }, { "epoch": 68.28, "learning_rate": 1.5870558250741016e-05, "loss": 1.9072, "step": 23590000 }, { "epoch": 68.28, "learning_rate": 1.5869837497684328e-05, "loss": 1.892, "step": 23590500 }, { "epoch": 68.29, "learning_rate": 1.586911385003705e-05, "loss": 1.8839, "step": 23591000 }, { "epoch": 68.29, "learning_rate": 1.5868390202389773e-05, "loss": 1.9067, "step": 23591500 }, { "epoch": 68.29, "learning_rate": 1.5867666554742498e-05, "loss": 1.9157, "step": 23592000 }, { "epoch": 68.29, "learning_rate": 1.5866942907095224e-05, "loss": 1.8899, "step": 23592500 }, { "epoch": 68.29, "learning_rate": 1.5866219259447946e-05, "loss": 1.904, "step": 23593000 }, { "epoch": 68.29, "learning_rate": 1.586549705909596e-05, "loss": 1.8974, "step": 23593500 }, { "epoch": 68.29, "learning_rate": 1.5864773411448687e-05, "loss": 1.8952, "step": 23594000 }, { "epoch": 68.3, "learning_rate": 1.586404976380141e-05, "loss": 1.896, "step": 23594500 }, { "epoch": 68.3, "learning_rate": 1.5863326116154132e-05, "loss": 1.8962, "step": 23595000 }, { "epoch": 68.3, "learning_rate": 1.5862602468506854e-05, "loss": 1.876, "step": 23595500 }, { "epoch": 68.3, "learning_rate": 1.586187882085958e-05, "loss": 1.9134, "step": 23596000 }, { "epoch": 68.3, "learning_rate": 1.5861155173212302e-05, "loss": 1.9062, "step": 23596500 }, { "epoch": 68.3, "learning_rate": 1.5860431525565024e-05, "loss": 1.9187, "step": 23597000 }, { "epoch": 68.31, "learning_rate": 1.585970787791775e-05, "loss": 1.8951, "step": 23597500 }, { "epoch": 68.31, "learning_rate": 1.5858985677565765e-05, "loss": 1.884, "step": 23598000 }, { "epoch": 68.31, "learning_rate": 1.5858262029918487e-05, "loss": 1.9155, "step": 23598500 }, { "epoch": 68.31, "learning_rate": 1.5857538382271213e-05, "loss": 1.8772, "step": 23599000 }, { "epoch": 68.31, "learning_rate": 1.5856814734623935e-05, "loss": 1.9017, "step": 23599500 }, { "epoch": 68.31, "learning_rate": 1.5856092534271954e-05, "loss": 1.8803, "step": 23600000 }, { "epoch": 68.31, "learning_rate": 1.5855368886624676e-05, "loss": 1.9241, "step": 23600500 }, { "epoch": 68.32, "learning_rate": 1.58546452389774e-05, "loss": 1.9115, "step": 23601000 }, { "epoch": 68.32, "learning_rate": 1.5853921591330124e-05, "loss": 1.8837, "step": 23601500 }, { "epoch": 68.32, "learning_rate": 1.585319939097814e-05, "loss": 1.8999, "step": 23602000 }, { "epoch": 68.32, "learning_rate": 1.5852475743330862e-05, "loss": 1.9023, "step": 23602500 }, { "epoch": 68.32, "learning_rate": 1.585175354297888e-05, "loss": 1.8959, "step": 23603000 }, { "epoch": 68.32, "learning_rate": 1.5851029895331603e-05, "loss": 1.8934, "step": 23603500 }, { "epoch": 68.32, "learning_rate": 1.5850307694979626e-05, "loss": 1.8977, "step": 23604000 }, { "epoch": 68.33, "learning_rate": 1.5849584047332348e-05, "loss": 1.9255, "step": 23604500 }, { "epoch": 68.33, "learning_rate": 1.584886039968507e-05, "loss": 1.9027, "step": 23605000 }, { "epoch": 68.33, "learning_rate": 1.5848136752037792e-05, "loss": 1.8876, "step": 23605500 }, { "epoch": 68.33, "learning_rate": 1.5847413104390515e-05, "loss": 1.9137, "step": 23606000 }, { "epoch": 68.33, "learning_rate": 1.5846689456743237e-05, "loss": 1.901, "step": 23606500 }, { "epoch": 68.33, "learning_rate": 1.5845965809095962e-05, "loss": 1.9041, "step": 23607000 }, { "epoch": 68.33, "learning_rate": 1.5845242161448688e-05, "loss": 1.9038, "step": 23607500 }, { "epoch": 68.34, "learning_rate": 1.584451851380141e-05, "loss": 1.9024, "step": 23608000 }, { "epoch": 68.34, "learning_rate": 1.5843794866154133e-05, "loss": 1.9136, "step": 23608500 }, { "epoch": 68.34, "learning_rate": 1.5843071218506855e-05, "loss": 1.893, "step": 23609000 }, { "epoch": 68.34, "learning_rate": 1.5842349018154874e-05, "loss": 1.8911, "step": 23609500 }, { "epoch": 68.34, "learning_rate": 1.5841625370507596e-05, "loss": 1.8856, "step": 23610000 }, { "epoch": 68.34, "learning_rate": 1.5840901722860318e-05, "loss": 1.9229, "step": 23610500 }, { "epoch": 68.34, "learning_rate": 1.584017807521304e-05, "loss": 1.88, "step": 23611000 }, { "epoch": 68.35, "learning_rate": 1.5839454427565766e-05, "loss": 1.9092, "step": 23611500 }, { "epoch": 68.35, "learning_rate": 1.5838730779918488e-05, "loss": 1.9012, "step": 23612000 }, { "epoch": 68.35, "learning_rate": 1.5838007132271214e-05, "loss": 1.9085, "step": 23612500 }, { "epoch": 68.35, "learning_rate": 1.5837283484623936e-05, "loss": 1.8975, "step": 23613000 }, { "epoch": 68.35, "learning_rate": 1.583655983697666e-05, "loss": 1.9231, "step": 23613500 }, { "epoch": 68.35, "learning_rate": 1.5835837636624677e-05, "loss": 1.9316, "step": 23614000 }, { "epoch": 68.35, "learning_rate": 1.58351139889774e-05, "loss": 1.918, "step": 23614500 }, { "epoch": 68.36, "learning_rate": 1.5834390341330125e-05, "loss": 1.9402, "step": 23615000 }, { "epoch": 68.36, "learning_rate": 1.5833666693682847e-05, "loss": 1.892, "step": 23615500 }, { "epoch": 68.36, "learning_rate": 1.583294594062616e-05, "loss": 1.875, "step": 23616000 }, { "epoch": 68.36, "learning_rate": 1.5832223740274175e-05, "loss": 1.9165, "step": 23616500 }, { "epoch": 68.36, "learning_rate": 1.58315000926269e-05, "loss": 1.9121, "step": 23617000 }, { "epoch": 68.36, "learning_rate": 1.5830776444979623e-05, "loss": 1.8898, "step": 23617500 }, { "epoch": 68.36, "learning_rate": 1.583005424462764e-05, "loss": 1.8799, "step": 23618000 }, { "epoch": 68.37, "learning_rate": 1.5829330596980364e-05, "loss": 1.8861, "step": 23618500 }, { "epoch": 68.37, "learning_rate": 1.5828606949333086e-05, "loss": 1.873, "step": 23619000 }, { "epoch": 68.37, "learning_rate": 1.5827884748981105e-05, "loss": 1.9068, "step": 23619500 }, { "epoch": 68.37, "learning_rate": 1.5827161101333828e-05, "loss": 1.8923, "step": 23620000 }, { "epoch": 68.37, "learning_rate": 1.5826437453686553e-05, "loss": 1.9045, "step": 23620500 }, { "epoch": 68.37, "learning_rate": 1.5825713806039275e-05, "loss": 1.8792, "step": 23621000 }, { "epoch": 68.37, "learning_rate": 1.5824990158391998e-05, "loss": 1.8928, "step": 23621500 }, { "epoch": 68.38, "learning_rate": 1.582426651074472e-05, "loss": 1.9089, "step": 23622000 }, { "epoch": 68.38, "learning_rate": 1.5823542863097442e-05, "loss": 1.8972, "step": 23622500 }, { "epoch": 68.38, "learning_rate": 1.5822819215450168e-05, "loss": 1.8966, "step": 23623000 }, { "epoch": 68.38, "learning_rate": 1.582209556780289e-05, "loss": 1.8872, "step": 23623500 }, { "epoch": 68.38, "learning_rate": 1.5821371920155616e-05, "loss": 1.8886, "step": 23624000 }, { "epoch": 68.38, "learning_rate": 1.5820648272508338e-05, "loss": 1.9063, "step": 23624500 }, { "epoch": 68.38, "learning_rate": 1.581992462486106e-05, "loss": 1.9079, "step": 23625000 }, { "epoch": 68.39, "learning_rate": 1.5819200977213782e-05, "loss": 1.8935, "step": 23625500 }, { "epoch": 68.39, "learning_rate": 1.5818477329566505e-05, "loss": 1.8885, "step": 23626000 }, { "epoch": 68.39, "learning_rate": 1.581775368191923e-05, "loss": 1.923, "step": 23626500 }, { "epoch": 68.39, "learning_rate": 1.5817030034271952e-05, "loss": 1.917, "step": 23627000 }, { "epoch": 68.39, "learning_rate": 1.5816306386624678e-05, "loss": 1.8633, "step": 23627500 }, { "epoch": 68.39, "learning_rate": 1.5815584186272694e-05, "loss": 1.8834, "step": 23628000 }, { "epoch": 68.39, "learning_rate": 1.5814860538625416e-05, "loss": 1.9026, "step": 23628500 }, { "epoch": 68.4, "learning_rate": 1.581413689097814e-05, "loss": 1.8971, "step": 23629000 }, { "epoch": 68.4, "learning_rate": 1.5813413243330864e-05, "loss": 1.8958, "step": 23629500 }, { "epoch": 68.4, "learning_rate": 1.581269104297888e-05, "loss": 1.8914, "step": 23630000 }, { "epoch": 68.4, "learning_rate": 1.5811967395331605e-05, "loss": 1.9068, "step": 23630500 }, { "epoch": 68.4, "learning_rate": 1.581124374768433e-05, "loss": 1.8713, "step": 23631000 }, { "epoch": 68.4, "learning_rate": 1.5810521547332346e-05, "loss": 1.8971, "step": 23631500 }, { "epoch": 68.4, "learning_rate": 1.5809799346980365e-05, "loss": 1.9177, "step": 23632000 }, { "epoch": 68.41, "learning_rate": 1.5809075699333087e-05, "loss": 1.8884, "step": 23632500 }, { "epoch": 68.41, "learning_rate": 1.580835205168581e-05, "loss": 1.9214, "step": 23633000 }, { "epoch": 68.41, "learning_rate": 1.5807628404038532e-05, "loss": 1.8824, "step": 23633500 }, { "epoch": 68.41, "learning_rate": 1.5806904756391257e-05, "loss": 1.9217, "step": 23634000 }, { "epoch": 68.41, "learning_rate": 1.580618110874398e-05, "loss": 1.9109, "step": 23634500 }, { "epoch": 68.41, "learning_rate": 1.5805458908392e-05, "loss": 1.8837, "step": 23635000 }, { "epoch": 68.42, "learning_rate": 1.580473526074472e-05, "loss": 1.9007, "step": 23635500 }, { "epoch": 68.42, "learning_rate": 1.5804011613097443e-05, "loss": 1.9178, "step": 23636000 }, { "epoch": 68.42, "learning_rate": 1.580328796545017e-05, "loss": 1.913, "step": 23636500 }, { "epoch": 68.42, "learning_rate": 1.580256431780289e-05, "loss": 1.9002, "step": 23637000 }, { "epoch": 68.42, "learning_rate": 1.5801840670155613e-05, "loss": 1.8987, "step": 23637500 }, { "epoch": 68.42, "learning_rate": 1.580111702250834e-05, "loss": 1.8997, "step": 23638000 }, { "epoch": 68.42, "learning_rate": 1.580039337486106e-05, "loss": 1.8943, "step": 23638500 }, { "epoch": 68.43, "learning_rate": 1.5799669727213783e-05, "loss": 1.9056, "step": 23639000 }, { "epoch": 68.43, "learning_rate": 1.5798946079566505e-05, "loss": 1.8956, "step": 23639500 }, { "epoch": 68.43, "learning_rate": 1.579822243191923e-05, "loss": 1.917, "step": 23640000 }, { "epoch": 68.43, "learning_rate": 1.5797498784271953e-05, "loss": 1.8782, "step": 23640500 }, { "epoch": 68.43, "learning_rate": 1.579677658391997e-05, "loss": 1.8905, "step": 23641000 }, { "epoch": 68.43, "learning_rate": 1.5796052936272694e-05, "loss": 1.8931, "step": 23641500 }, { "epoch": 68.43, "learning_rate": 1.579532928862542e-05, "loss": 1.9226, "step": 23642000 }, { "epoch": 68.44, "learning_rate": 1.5794605640978142e-05, "loss": 1.9075, "step": 23642500 }, { "epoch": 68.44, "learning_rate": 1.5793883440626158e-05, "loss": 1.894, "step": 23643000 }, { "epoch": 68.44, "learning_rate": 1.579315979297888e-05, "loss": 1.8986, "step": 23643500 }, { "epoch": 68.44, "learning_rate": 1.5792436145331606e-05, "loss": 1.8715, "step": 23644000 }, { "epoch": 68.44, "learning_rate": 1.5791712497684328e-05, "loss": 1.8888, "step": 23644500 }, { "epoch": 68.44, "learning_rate": 1.5790988850037054e-05, "loss": 1.891, "step": 23645000 }, { "epoch": 68.44, "learning_rate": 1.5790265202389776e-05, "loss": 1.899, "step": 23645500 }, { "epoch": 68.45, "learning_rate": 1.5789541554742498e-05, "loss": 1.891, "step": 23646000 }, { "epoch": 68.45, "learning_rate": 1.578881790709522e-05, "loss": 1.9065, "step": 23646500 }, { "epoch": 68.45, "learning_rate": 1.5788094259447946e-05, "loss": 1.8981, "step": 23647000 }, { "epoch": 68.45, "learning_rate": 1.5787370611800668e-05, "loss": 1.8932, "step": 23647500 }, { "epoch": 68.45, "learning_rate": 1.5786648411448684e-05, "loss": 1.8976, "step": 23648000 }, { "epoch": 68.45, "learning_rate": 1.5785924763801406e-05, "loss": 1.9218, "step": 23648500 }, { "epoch": 68.45, "learning_rate": 1.578520111615413e-05, "loss": 1.873, "step": 23649000 }, { "epoch": 68.46, "learning_rate": 1.5784477468506857e-05, "loss": 1.899, "step": 23649500 }, { "epoch": 68.46, "learning_rate": 1.578375382085958e-05, "loss": 1.8993, "step": 23650000 }, { "epoch": 68.46, "learning_rate": 1.57830301732123e-05, "loss": 1.9171, "step": 23650500 }, { "epoch": 68.46, "learning_rate": 1.5782306525565024e-05, "loss": 1.8965, "step": 23651000 }, { "epoch": 68.46, "learning_rate": 1.5781582877917746e-05, "loss": 1.8886, "step": 23651500 }, { "epoch": 68.46, "learning_rate": 1.5780859230270468e-05, "loss": 1.8948, "step": 23652000 }, { "epoch": 68.46, "learning_rate": 1.5780135582623194e-05, "loss": 1.8712, "step": 23652500 }, { "epoch": 68.47, "learning_rate": 1.577941193497592e-05, "loss": 1.8825, "step": 23653000 }, { "epoch": 68.47, "learning_rate": 1.5778688287328642e-05, "loss": 1.8987, "step": 23653500 }, { "epoch": 68.47, "learning_rate": 1.5777966086976657e-05, "loss": 1.9104, "step": 23654000 }, { "epoch": 68.47, "learning_rate": 1.5777243886624676e-05, "loss": 1.9008, "step": 23654500 }, { "epoch": 68.47, "learning_rate": 1.57765202389774e-05, "loss": 1.8708, "step": 23655000 }, { "epoch": 68.47, "learning_rate": 1.577579659133012e-05, "loss": 1.8835, "step": 23655500 }, { "epoch": 68.47, "learning_rate": 1.5775072943682846e-05, "loss": 1.8898, "step": 23656000 }, { "epoch": 68.48, "learning_rate": 1.5774349296035572e-05, "loss": 1.8846, "step": 23656500 }, { "epoch": 68.48, "learning_rate": 1.5773625648388294e-05, "loss": 1.9067, "step": 23657000 }, { "epoch": 68.48, "learning_rate": 1.5772902000741016e-05, "loss": 1.9055, "step": 23657500 }, { "epoch": 68.48, "learning_rate": 1.577217835309374e-05, "loss": 1.8975, "step": 23658000 }, { "epoch": 68.48, "learning_rate": 1.577145470544646e-05, "loss": 1.9049, "step": 23658500 }, { "epoch": 68.48, "learning_rate": 1.5770731057799183e-05, "loss": 1.8899, "step": 23659000 }, { "epoch": 68.48, "learning_rate": 1.5770010304742495e-05, "loss": 1.8841, "step": 23659500 }, { "epoch": 68.49, "learning_rate": 1.576928665709522e-05, "loss": 1.8977, "step": 23660000 }, { "epoch": 68.49, "learning_rate": 1.576856445674324e-05, "loss": 1.8788, "step": 23660500 }, { "epoch": 68.49, "learning_rate": 1.5767840809095962e-05, "loss": 1.8851, "step": 23661000 }, { "epoch": 68.49, "learning_rate": 1.5767117161448684e-05, "loss": 1.8806, "step": 23661500 }, { "epoch": 68.49, "learning_rate": 1.576639351380141e-05, "loss": 1.8926, "step": 23662000 }, { "epoch": 68.49, "learning_rate": 1.5765669866154132e-05, "loss": 1.8913, "step": 23662500 }, { "epoch": 68.49, "learning_rate": 1.5764946218506855e-05, "loss": 1.9034, "step": 23663000 }, { "epoch": 68.5, "learning_rate": 1.576422257085958e-05, "loss": 1.8891, "step": 23663500 }, { "epoch": 68.5, "learning_rate": 1.5763498923212302e-05, "loss": 1.8764, "step": 23664000 }, { "epoch": 68.5, "learning_rate": 1.5762775275565025e-05, "loss": 1.9063, "step": 23664500 }, { "epoch": 68.5, "learning_rate": 1.5762053075213044e-05, "loss": 1.8982, "step": 23665000 }, { "epoch": 68.5, "learning_rate": 1.5761329427565766e-05, "loss": 1.8806, "step": 23665500 }, { "epoch": 68.5, "learning_rate": 1.5760605779918488e-05, "loss": 1.8838, "step": 23666000 }, { "epoch": 68.5, "learning_rate": 1.575988213227121e-05, "loss": 1.9351, "step": 23666500 }, { "epoch": 68.51, "learning_rate": 1.575915993191923e-05, "loss": 1.9046, "step": 23667000 }, { "epoch": 68.51, "learning_rate": 1.5758436284271955e-05, "loss": 1.8967, "step": 23667500 }, { "epoch": 68.51, "learning_rate": 1.5757712636624677e-05, "loss": 1.9153, "step": 23668000 }, { "epoch": 68.51, "learning_rate": 1.57569889889774e-05, "loss": 1.9075, "step": 23668500 }, { "epoch": 68.51, "learning_rate": 1.575626534133012e-05, "loss": 1.9127, "step": 23669000 }, { "epoch": 68.51, "learning_rate": 1.5755541693682847e-05, "loss": 1.8692, "step": 23669500 }, { "epoch": 68.51, "learning_rate": 1.5754819493330863e-05, "loss": 1.8956, "step": 23670000 }, { "epoch": 68.52, "learning_rate": 1.5754095845683585e-05, "loss": 1.9298, "step": 23670500 }, { "epoch": 68.52, "learning_rate": 1.5753373645331604e-05, "loss": 1.8901, "step": 23671000 }, { "epoch": 68.52, "learning_rate": 1.575264999768433e-05, "loss": 1.9241, "step": 23671500 }, { "epoch": 68.52, "learning_rate": 1.5751926350037052e-05, "loss": 1.8809, "step": 23672000 }, { "epoch": 68.52, "learning_rate": 1.5751202702389774e-05, "loss": 1.9001, "step": 23672500 }, { "epoch": 68.52, "learning_rate": 1.57504790547425e-05, "loss": 1.9026, "step": 23673000 }, { "epoch": 68.53, "learning_rate": 1.5749755407095222e-05, "loss": 1.9009, "step": 23673500 }, { "epoch": 68.53, "learning_rate": 1.5749031759447944e-05, "loss": 1.9367, "step": 23674000 }, { "epoch": 68.53, "learning_rate": 1.5748308111800666e-05, "loss": 1.8907, "step": 23674500 }, { "epoch": 68.53, "learning_rate": 1.5747584464153392e-05, "loss": 1.9224, "step": 23675000 }, { "epoch": 68.53, "learning_rate": 1.5746860816506114e-05, "loss": 1.8891, "step": 23675500 }, { "epoch": 68.53, "learning_rate": 1.5746138616154133e-05, "loss": 1.8855, "step": 23676000 }, { "epoch": 68.53, "learning_rate": 1.5745414968506855e-05, "loss": 1.9074, "step": 23676500 }, { "epoch": 68.54, "learning_rate": 1.5744691320859578e-05, "loss": 1.8879, "step": 23677000 }, { "epoch": 68.54, "learning_rate": 1.57439676732123e-05, "loss": 1.901, "step": 23677500 }, { "epoch": 68.54, "learning_rate": 1.5743244025565025e-05, "loss": 1.902, "step": 23678000 }, { "epoch": 68.54, "learning_rate": 1.574252037791775e-05, "loss": 1.9228, "step": 23678500 }, { "epoch": 68.54, "learning_rate": 1.5741796730270473e-05, "loss": 1.8827, "step": 23679000 }, { "epoch": 68.54, "learning_rate": 1.5741073082623196e-05, "loss": 1.9029, "step": 23679500 }, { "epoch": 68.54, "learning_rate": 1.5740349434975918e-05, "loss": 1.9109, "step": 23680000 }, { "epoch": 68.55, "learning_rate": 1.573962578732864e-05, "loss": 1.875, "step": 23680500 }, { "epoch": 68.55, "learning_rate": 1.573890358697666e-05, "loss": 1.9027, "step": 23681000 }, { "epoch": 68.55, "learning_rate": 1.573817993932938e-05, "loss": 1.8972, "step": 23681500 }, { "epoch": 68.55, "learning_rate": 1.5737456291682103e-05, "loss": 1.914, "step": 23682000 }, { "epoch": 68.55, "learning_rate": 1.573673264403483e-05, "loss": 1.9034, "step": 23682500 }, { "epoch": 68.55, "learning_rate": 1.573600899638755e-05, "loss": 1.8698, "step": 23683000 }, { "epoch": 68.55, "learning_rate": 1.5735285348740273e-05, "loss": 1.9202, "step": 23683500 }, { "epoch": 68.56, "learning_rate": 1.5734561701093e-05, "loss": 1.866, "step": 23684000 }, { "epoch": 68.56, "learning_rate": 1.5733839500741015e-05, "loss": 1.8946, "step": 23684500 }, { "epoch": 68.56, "learning_rate": 1.5733115853093737e-05, "loss": 1.8692, "step": 23685000 }, { "epoch": 68.56, "learning_rate": 1.5732392205446463e-05, "loss": 1.8924, "step": 23685500 }, { "epoch": 68.56, "learning_rate": 1.5731668557799188e-05, "loss": 1.8919, "step": 23686000 }, { "epoch": 68.56, "learning_rate": 1.5730946357447204e-05, "loss": 1.8928, "step": 23686500 }, { "epoch": 68.56, "learning_rate": 1.5730225604390516e-05, "loss": 1.9081, "step": 23687000 }, { "epoch": 68.57, "learning_rate": 1.5729501956743238e-05, "loss": 1.9149, "step": 23687500 }, { "epoch": 68.57, "learning_rate": 1.5728778309095964e-05, "loss": 1.9086, "step": 23688000 }, { "epoch": 68.57, "learning_rate": 1.5728054661448686e-05, "loss": 1.8997, "step": 23688500 }, { "epoch": 68.57, "learning_rate": 1.5727331013801408e-05, "loss": 1.8994, "step": 23689000 }, { "epoch": 68.57, "learning_rate": 1.5726608813449424e-05, "loss": 1.9193, "step": 23689500 }, { "epoch": 68.57, "learning_rate": 1.5725886613097443e-05, "loss": 1.8846, "step": 23690000 }, { "epoch": 68.57, "learning_rate": 1.572516296545017e-05, "loss": 1.8978, "step": 23690500 }, { "epoch": 68.58, "learning_rate": 1.572443931780289e-05, "loss": 1.9201, "step": 23691000 }, { "epoch": 68.58, "learning_rate": 1.5723715670155613e-05, "loss": 1.9094, "step": 23691500 }, { "epoch": 68.58, "learning_rate": 1.572299202250834e-05, "loss": 1.8919, "step": 23692000 }, { "epoch": 68.58, "learning_rate": 1.572226837486106e-05, "loss": 1.8859, "step": 23692500 }, { "epoch": 68.58, "learning_rate": 1.5721544727213783e-05, "loss": 1.8797, "step": 23693000 }, { "epoch": 68.58, "learning_rate": 1.5720822526861802e-05, "loss": 1.9046, "step": 23693500 }, { "epoch": 68.58, "learning_rate": 1.5720098879214528e-05, "loss": 1.8889, "step": 23694000 }, { "epoch": 68.59, "learning_rate": 1.571937523156725e-05, "loss": 1.9049, "step": 23694500 }, { "epoch": 68.59, "learning_rate": 1.5718651583919972e-05, "loss": 1.9053, "step": 23695000 }, { "epoch": 68.59, "learning_rate": 1.5717927936272694e-05, "loss": 1.9044, "step": 23695500 }, { "epoch": 68.59, "learning_rate": 1.5717205735920713e-05, "loss": 1.8948, "step": 23696000 }, { "epoch": 68.59, "learning_rate": 1.5716482088273435e-05, "loss": 1.8995, "step": 23696500 }, { "epoch": 68.59, "learning_rate": 1.5715758440626158e-05, "loss": 1.8897, "step": 23697000 }, { "epoch": 68.59, "learning_rate": 1.5715036240274177e-05, "loss": 1.8894, "step": 23697500 }, { "epoch": 68.6, "learning_rate": 1.5714312592626902e-05, "loss": 1.9076, "step": 23698000 }, { "epoch": 68.6, "learning_rate": 1.5713588944979624e-05, "loss": 1.9039, "step": 23698500 }, { "epoch": 68.6, "learning_rate": 1.5712865297332347e-05, "loss": 1.91, "step": 23699000 }, { "epoch": 68.6, "learning_rate": 1.571214164968507e-05, "loss": 1.8842, "step": 23699500 }, { "epoch": 68.6, "learning_rate": 1.571141800203779e-05, "loss": 1.8901, "step": 23700000 }, { "epoch": 68.6, "learning_rate": 1.5710694354390513e-05, "loss": 1.9005, "step": 23700500 }, { "epoch": 68.6, "learning_rate": 1.570997070674324e-05, "loss": 1.8782, "step": 23701000 }, { "epoch": 68.61, "learning_rate": 1.5709247059095965e-05, "loss": 1.8857, "step": 23701500 }, { "epoch": 68.61, "learning_rate": 1.5708523411448687e-05, "loss": 1.9061, "step": 23702000 }, { "epoch": 68.61, "learning_rate": 1.570779976380141e-05, "loss": 1.8832, "step": 23702500 }, { "epoch": 68.61, "learning_rate": 1.570707611615413e-05, "loss": 1.9023, "step": 23703000 }, { "epoch": 68.61, "learning_rate": 1.5706352468506854e-05, "loss": 1.9025, "step": 23703500 }, { "epoch": 68.61, "learning_rate": 1.570562882085958e-05, "loss": 1.8969, "step": 23704000 }, { "epoch": 68.61, "learning_rate": 1.57049051732123e-05, "loss": 1.887, "step": 23704500 }, { "epoch": 68.62, "learning_rate": 1.5704181525565027e-05, "loss": 1.8938, "step": 23705000 }, { "epoch": 68.62, "learning_rate": 1.570345787791775e-05, "loss": 1.918, "step": 23705500 }, { "epoch": 68.62, "learning_rate": 1.570273423027047e-05, "loss": 1.9152, "step": 23706000 }, { "epoch": 68.62, "learning_rate": 1.5702010582623194e-05, "loss": 1.9022, "step": 23706500 }, { "epoch": 68.62, "learning_rate": 1.5701286934975916e-05, "loss": 1.8946, "step": 23707000 }, { "epoch": 68.62, "learning_rate": 1.5700564734623935e-05, "loss": 1.9011, "step": 23707500 }, { "epoch": 68.62, "learning_rate": 1.5699841086976657e-05, "loss": 1.8933, "step": 23708000 }, { "epoch": 68.63, "learning_rate": 1.5699117439329383e-05, "loss": 1.9052, "step": 23708500 }, { "epoch": 68.63, "learning_rate": 1.5698393791682105e-05, "loss": 1.9419, "step": 23709000 }, { "epoch": 68.63, "learning_rate": 1.5697671591330124e-05, "loss": 1.8928, "step": 23709500 }, { "epoch": 68.63, "learning_rate": 1.5696947943682846e-05, "loss": 1.8903, "step": 23710000 }, { "epoch": 68.63, "learning_rate": 1.569622429603557e-05, "loss": 1.8729, "step": 23710500 }, { "epoch": 68.63, "learning_rate": 1.569550064838829e-05, "loss": 1.9017, "step": 23711000 }, { "epoch": 68.64, "learning_rate": 1.5694777000741016e-05, "loss": 1.8993, "step": 23711500 }, { "epoch": 68.64, "learning_rate": 1.5694054800389032e-05, "loss": 1.9084, "step": 23712000 }, { "epoch": 68.64, "learning_rate": 1.5693331152741757e-05, "loss": 1.8937, "step": 23712500 }, { "epoch": 68.64, "learning_rate": 1.569260750509448e-05, "loss": 1.8808, "step": 23713000 }, { "epoch": 68.64, "learning_rate": 1.5691883857447205e-05, "loss": 1.9137, "step": 23713500 }, { "epoch": 68.64, "learning_rate": 1.5691160209799928e-05, "loss": 1.9008, "step": 23714000 }, { "epoch": 68.64, "learning_rate": 1.569043656215265e-05, "loss": 1.8881, "step": 23714500 }, { "epoch": 68.65, "learning_rate": 1.5689712914505372e-05, "loss": 1.9063, "step": 23715000 }, { "epoch": 68.65, "learning_rate": 1.5688989266858094e-05, "loss": 1.9119, "step": 23715500 }, { "epoch": 68.65, "learning_rate": 1.5688267066506117e-05, "loss": 1.8951, "step": 23716000 }, { "epoch": 68.65, "learning_rate": 1.568754341885884e-05, "loss": 1.8843, "step": 23716500 }, { "epoch": 68.65, "learning_rate": 1.568681977121156e-05, "loss": 1.8988, "step": 23717000 }, { "epoch": 68.65, "learning_rate": 1.5686096123564283e-05, "loss": 1.9037, "step": 23717500 }, { "epoch": 68.65, "learning_rate": 1.5685372475917005e-05, "loss": 1.922, "step": 23718000 }, { "epoch": 68.66, "learning_rate": 1.568464882826973e-05, "loss": 1.9048, "step": 23718500 }, { "epoch": 68.66, "learning_rate": 1.5683925180622453e-05, "loss": 1.8854, "step": 23719000 }, { "epoch": 68.66, "learning_rate": 1.568320153297518e-05, "loss": 1.9146, "step": 23719500 }, { "epoch": 68.66, "learning_rate": 1.5682479332623195e-05, "loss": 1.8944, "step": 23720000 }, { "epoch": 68.66, "learning_rate": 1.5681755684975917e-05, "loss": 1.916, "step": 23720500 }, { "epoch": 68.66, "learning_rate": 1.5681032037328642e-05, "loss": 1.8842, "step": 23721000 }, { "epoch": 68.66, "learning_rate": 1.5680308389681365e-05, "loss": 1.9005, "step": 23721500 }, { "epoch": 68.67, "learning_rate": 1.5679584742034087e-05, "loss": 1.9107, "step": 23722000 }, { "epoch": 68.67, "learning_rate": 1.567886109438681e-05, "loss": 1.8991, "step": 23722500 }, { "epoch": 68.67, "learning_rate": 1.567813744673953e-05, "loss": 1.896, "step": 23723000 }, { "epoch": 68.67, "learning_rate": 1.5677415246387554e-05, "loss": 1.9108, "step": 23723500 }, { "epoch": 68.67, "learning_rate": 1.5676691598740276e-05, "loss": 1.9185, "step": 23724000 }, { "epoch": 68.67, "learning_rate": 1.5675967951092998e-05, "loss": 1.9111, "step": 23724500 }, { "epoch": 68.67, "learning_rate": 1.567524430344572e-05, "loss": 1.8855, "step": 23725000 }, { "epoch": 68.68, "learning_rate": 1.567452210309374e-05, "loss": 1.8838, "step": 23725500 }, { "epoch": 68.68, "learning_rate": 1.567379845544646e-05, "loss": 1.9128, "step": 23726000 }, { "epoch": 68.68, "learning_rate": 1.5673074807799184e-05, "loss": 1.9025, "step": 23726500 }, { "epoch": 68.68, "learning_rate": 1.5672352607447203e-05, "loss": 1.9178, "step": 23727000 }, { "epoch": 68.68, "learning_rate": 1.567162895979993e-05, "loss": 1.9374, "step": 23727500 }, { "epoch": 68.68, "learning_rate": 1.567090531215265e-05, "loss": 1.8862, "step": 23728000 }, { "epoch": 68.68, "learning_rate": 1.5670181664505373e-05, "loss": 1.8927, "step": 23728500 }, { "epoch": 68.69, "learning_rate": 1.5669458016858095e-05, "loss": 1.8879, "step": 23729000 }, { "epoch": 68.69, "learning_rate": 1.566873436921082e-05, "loss": 1.9168, "step": 23729500 }, { "epoch": 68.69, "learning_rate": 1.5668010721563543e-05, "loss": 1.8991, "step": 23730000 }, { "epoch": 68.69, "learning_rate": 1.5667287073916265e-05, "loss": 1.9014, "step": 23730500 }, { "epoch": 68.69, "learning_rate": 1.5666564873564284e-05, "loss": 1.8953, "step": 23731000 }, { "epoch": 68.69, "learning_rate": 1.5665841225917006e-05, "loss": 1.9289, "step": 23731500 }, { "epoch": 68.69, "learning_rate": 1.5665117578269732e-05, "loss": 1.9181, "step": 23732000 }, { "epoch": 68.7, "learning_rate": 1.5664393930622454e-05, "loss": 1.9035, "step": 23732500 }, { "epoch": 68.7, "learning_rate": 1.566367173027047e-05, "loss": 1.8984, "step": 23733000 }, { "epoch": 68.7, "learning_rate": 1.5662948082623195e-05, "loss": 1.9055, "step": 23733500 }, { "epoch": 68.7, "learning_rate": 1.5662224434975918e-05, "loss": 1.8957, "step": 23734000 }, { "epoch": 68.7, "learning_rate": 1.5661500787328643e-05, "loss": 1.9064, "step": 23734500 }, { "epoch": 68.7, "learning_rate": 1.5660777139681365e-05, "loss": 1.8938, "step": 23735000 }, { "epoch": 68.7, "learning_rate": 1.5660053492034088e-05, "loss": 1.8935, "step": 23735500 }, { "epoch": 68.71, "learning_rate": 1.565932984438681e-05, "loss": 1.8906, "step": 23736000 }, { "epoch": 68.71, "learning_rate": 1.5658606196739532e-05, "loss": 1.9244, "step": 23736500 }, { "epoch": 68.71, "learning_rate": 1.5657882549092258e-05, "loss": 1.8956, "step": 23737000 }, { "epoch": 68.71, "learning_rate": 1.565715890144498e-05, "loss": 1.9079, "step": 23737500 }, { "epoch": 68.71, "learning_rate": 1.5656435253797706e-05, "loss": 1.9098, "step": 23738000 }, { "epoch": 68.71, "learning_rate": 1.5655711606150428e-05, "loss": 1.8953, "step": 23738500 }, { "epoch": 68.71, "learning_rate": 1.565498795850315e-05, "loss": 1.8865, "step": 23739000 }, { "epoch": 68.72, "learning_rate": 1.5654264310855872e-05, "loss": 1.9094, "step": 23739500 }, { "epoch": 68.72, "learning_rate": 1.5653543557799185e-05, "loss": 1.8872, "step": 23740000 }, { "epoch": 68.72, "learning_rate": 1.565281991015191e-05, "loss": 1.9134, "step": 23740500 }, { "epoch": 68.72, "learning_rate": 1.5652096262504632e-05, "loss": 1.8893, "step": 23741000 }, { "epoch": 68.72, "learning_rate": 1.5651372614857355e-05, "loss": 1.9157, "step": 23741500 }, { "epoch": 68.72, "learning_rate": 1.565064896721008e-05, "loss": 1.8999, "step": 23742000 }, { "epoch": 68.72, "learning_rate": 1.5649925319562802e-05, "loss": 1.9192, "step": 23742500 }, { "epoch": 68.73, "learning_rate": 1.5649201671915525e-05, "loss": 1.8897, "step": 23743000 }, { "epoch": 68.73, "learning_rate": 1.5648478024268247e-05, "loss": 1.9017, "step": 23743500 }, { "epoch": 68.73, "learning_rate": 1.5647755823916266e-05, "loss": 1.899, "step": 23744000 }, { "epoch": 68.73, "learning_rate": 1.5647032176268988e-05, "loss": 1.9013, "step": 23744500 }, { "epoch": 68.73, "learning_rate": 1.564630852862171e-05, "loss": 1.9, "step": 23745000 }, { "epoch": 68.73, "learning_rate": 1.5645584880974436e-05, "loss": 1.9131, "step": 23745500 }, { "epoch": 68.73, "learning_rate": 1.564486123332716e-05, "loss": 1.9089, "step": 23746000 }, { "epoch": 68.74, "learning_rate": 1.5644137585679884e-05, "loss": 1.8901, "step": 23746500 }, { "epoch": 68.74, "learning_rate": 1.5643413938032606e-05, "loss": 1.8929, "step": 23747000 }, { "epoch": 68.74, "learning_rate": 1.564269029038533e-05, "loss": 1.9001, "step": 23747500 }, { "epoch": 68.74, "learning_rate": 1.564196664273805e-05, "loss": 1.9072, "step": 23748000 }, { "epoch": 68.74, "learning_rate": 1.5641242995090773e-05, "loss": 1.8845, "step": 23748500 }, { "epoch": 68.74, "learning_rate": 1.5640520794738792e-05, "loss": 1.8699, "step": 23749000 }, { "epoch": 68.75, "learning_rate": 1.5639797147091517e-05, "loss": 1.9024, "step": 23749500 }, { "epoch": 68.75, "learning_rate": 1.563907349944424e-05, "loss": 1.8932, "step": 23750000 }, { "epoch": 68.75, "learning_rate": 1.5638349851796962e-05, "loss": 1.9052, "step": 23750500 }, { "epoch": 68.75, "learning_rate": 1.5637629098740274e-05, "loss": 1.9, "step": 23751000 }, { "epoch": 68.75, "learning_rate": 1.5636905451092996e-05, "loss": 1.91, "step": 23751500 }, { "epoch": 68.75, "learning_rate": 1.5636181803445722e-05, "loss": 1.8958, "step": 23752000 }, { "epoch": 68.75, "learning_rate": 1.5635458155798444e-05, "loss": 1.9118, "step": 23752500 }, { "epoch": 68.76, "learning_rate": 1.5634734508151166e-05, "loss": 1.8929, "step": 23753000 }, { "epoch": 68.76, "learning_rate": 1.5634010860503892e-05, "loss": 1.9092, "step": 23753500 }, { "epoch": 68.76, "learning_rate": 1.5633287212856614e-05, "loss": 1.8894, "step": 23754000 }, { "epoch": 68.76, "learning_rate": 1.5632563565209336e-05, "loss": 1.9007, "step": 23754500 }, { "epoch": 68.76, "learning_rate": 1.5631839917562062e-05, "loss": 1.9053, "step": 23755000 }, { "epoch": 68.76, "learning_rate": 1.5631117717210078e-05, "loss": 1.8979, "step": 23755500 }, { "epoch": 68.76, "learning_rate": 1.56303940695628e-05, "loss": 1.9062, "step": 23756000 }, { "epoch": 68.77, "learning_rate": 1.5629670421915522e-05, "loss": 1.8939, "step": 23756500 }, { "epoch": 68.77, "learning_rate": 1.5628948221563544e-05, "loss": 1.9027, "step": 23757000 }, { "epoch": 68.77, "learning_rate": 1.5628224573916267e-05, "loss": 1.9037, "step": 23757500 }, { "epoch": 68.77, "learning_rate": 1.562750092626899e-05, "loss": 1.8941, "step": 23758000 }, { "epoch": 68.77, "learning_rate": 1.562677727862171e-05, "loss": 1.8857, "step": 23758500 }, { "epoch": 68.77, "learning_rate": 1.5626053630974437e-05, "loss": 1.9114, "step": 23759000 }, { "epoch": 68.77, "learning_rate": 1.562532998332716e-05, "loss": 1.8952, "step": 23759500 }, { "epoch": 68.78, "learning_rate": 1.562460633567988e-05, "loss": 1.8818, "step": 23760000 }, { "epoch": 68.78, "learning_rate": 1.5623882688032607e-05, "loss": 1.8861, "step": 23760500 }, { "epoch": 68.78, "learning_rate": 1.562315904038533e-05, "loss": 1.9123, "step": 23761000 }, { "epoch": 68.78, "learning_rate": 1.5622436840033348e-05, "loss": 1.9081, "step": 23761500 }, { "epoch": 68.78, "learning_rate": 1.5621714639681364e-05, "loss": 1.8817, "step": 23762000 }, { "epoch": 68.78, "learning_rate": 1.5620990992034086e-05, "loss": 1.8981, "step": 23762500 }, { "epoch": 68.78, "learning_rate": 1.562026734438681e-05, "loss": 1.8829, "step": 23763000 }, { "epoch": 68.79, "learning_rate": 1.5619545144034827e-05, "loss": 1.8962, "step": 23763500 }, { "epoch": 68.79, "learning_rate": 1.561882149638755e-05, "loss": 1.916, "step": 23764000 }, { "epoch": 68.79, "learning_rate": 1.5618097848740275e-05, "loss": 1.8978, "step": 23764500 }, { "epoch": 68.79, "learning_rate": 1.5617374201093e-05, "loss": 1.8819, "step": 23765000 }, { "epoch": 68.79, "learning_rate": 1.5616650553445723e-05, "loss": 1.8859, "step": 23765500 }, { "epoch": 68.79, "learning_rate": 1.5615926905798445e-05, "loss": 1.8929, "step": 23766000 }, { "epoch": 68.79, "learning_rate": 1.5615203258151167e-05, "loss": 1.9059, "step": 23766500 }, { "epoch": 68.8, "learning_rate": 1.561447961050389e-05, "loss": 1.9161, "step": 23767000 }, { "epoch": 68.8, "learning_rate": 1.561375596285661e-05, "loss": 1.8988, "step": 23767500 }, { "epoch": 68.8, "learning_rate": 1.5613035209799924e-05, "loss": 1.8739, "step": 23768000 }, { "epoch": 68.8, "learning_rate": 1.561231156215265e-05, "loss": 1.8998, "step": 23768500 }, { "epoch": 68.8, "learning_rate": 1.5611590809095962e-05, "loss": 1.8903, "step": 23769000 }, { "epoch": 68.8, "learning_rate": 1.5610867161448687e-05, "loss": 1.9057, "step": 23769500 }, { "epoch": 68.8, "learning_rate": 1.561014351380141e-05, "loss": 1.8984, "step": 23770000 }, { "epoch": 68.81, "learning_rate": 1.5609419866154132e-05, "loss": 1.8993, "step": 23770500 }, { "epoch": 68.81, "learning_rate": 1.5608696218506854e-05, "loss": 1.8944, "step": 23771000 }, { "epoch": 68.81, "learning_rate": 1.5607972570859576e-05, "loss": 1.91, "step": 23771500 }, { "epoch": 68.81, "learning_rate": 1.5607248923212302e-05, "loss": 1.92, "step": 23772000 }, { "epoch": 68.81, "learning_rate": 1.5606525275565028e-05, "loss": 1.9244, "step": 23772500 }, { "epoch": 68.81, "learning_rate": 1.560580162791775e-05, "loss": 1.9036, "step": 23773000 }, { "epoch": 68.81, "learning_rate": 1.5605077980270472e-05, "loss": 1.9114, "step": 23773500 }, { "epoch": 68.82, "learning_rate": 1.5604354332623194e-05, "loss": 1.9095, "step": 23774000 }, { "epoch": 68.82, "learning_rate": 1.5603630684975917e-05, "loss": 1.9089, "step": 23774500 }, { "epoch": 68.82, "learning_rate": 1.560290703732864e-05, "loss": 1.8926, "step": 23775000 }, { "epoch": 68.82, "learning_rate": 1.5602183389681364e-05, "loss": 1.9022, "step": 23775500 }, { "epoch": 68.82, "learning_rate": 1.5601461189329383e-05, "loss": 1.8897, "step": 23776000 }, { "epoch": 68.82, "learning_rate": 1.5600737541682106e-05, "loss": 1.901, "step": 23776500 }, { "epoch": 68.82, "learning_rate": 1.5600013894034828e-05, "loss": 1.9148, "step": 23777000 }, { "epoch": 68.83, "learning_rate": 1.5599290246387553e-05, "loss": 1.9126, "step": 23777500 }, { "epoch": 68.83, "learning_rate": 1.5598566598740276e-05, "loss": 1.8914, "step": 23778000 }, { "epoch": 68.83, "learning_rate": 1.559784439838829e-05, "loss": 1.9295, "step": 23778500 }, { "epoch": 68.83, "learning_rate": 1.559712219803631e-05, "loss": 1.8901, "step": 23779000 }, { "epoch": 68.83, "learning_rate": 1.5596398550389032e-05, "loss": 1.8888, "step": 23779500 }, { "epoch": 68.83, "learning_rate": 1.5595674902741758e-05, "loss": 1.9073, "step": 23780000 }, { "epoch": 68.83, "learning_rate": 1.559495125509448e-05, "loss": 1.8984, "step": 23780500 }, { "epoch": 68.84, "learning_rate": 1.5594227607447202e-05, "loss": 1.8736, "step": 23781000 }, { "epoch": 68.84, "learning_rate": 1.5593503959799928e-05, "loss": 1.8887, "step": 23781500 }, { "epoch": 68.84, "learning_rate": 1.559278031215265e-05, "loss": 1.9074, "step": 23782000 }, { "epoch": 68.84, "learning_rate": 1.5592056664505373e-05, "loss": 1.9211, "step": 23782500 }, { "epoch": 68.84, "learning_rate": 1.5591333016858095e-05, "loss": 1.9185, "step": 23783000 }, { "epoch": 68.84, "learning_rate": 1.559060936921082e-05, "loss": 1.8882, "step": 23783500 }, { "epoch": 68.84, "learning_rate": 1.5589885721563543e-05, "loss": 1.8928, "step": 23784000 }, { "epoch": 68.85, "learning_rate": 1.5589162073916265e-05, "loss": 1.8926, "step": 23784500 }, { "epoch": 68.85, "learning_rate": 1.558843842626899e-05, "loss": 1.922, "step": 23785000 }, { "epoch": 68.85, "learning_rate": 1.5587714778621713e-05, "loss": 1.8696, "step": 23785500 }, { "epoch": 68.85, "learning_rate": 1.5586991130974435e-05, "loss": 1.9066, "step": 23786000 }, { "epoch": 68.85, "learning_rate": 1.5586267483327157e-05, "loss": 1.905, "step": 23786500 }, { "epoch": 68.85, "learning_rate": 1.5585543835679883e-05, "loss": 1.9474, "step": 23787000 }, { "epoch": 68.86, "learning_rate": 1.5584821635327902e-05, "loss": 1.9009, "step": 23787500 }, { "epoch": 68.86, "learning_rate": 1.5584097987680624e-05, "loss": 1.8741, "step": 23788000 }, { "epoch": 68.86, "learning_rate": 1.5583374340033346e-05, "loss": 1.8942, "step": 23788500 }, { "epoch": 68.86, "learning_rate": 1.558265069238607e-05, "loss": 1.9142, "step": 23789000 }, { "epoch": 68.86, "learning_rate": 1.5581928492034087e-05, "loss": 1.9059, "step": 23789500 }, { "epoch": 68.86, "learning_rate": 1.558120484438681e-05, "loss": 1.9134, "step": 23790000 }, { "epoch": 68.86, "learning_rate": 1.5580481196739532e-05, "loss": 1.9021, "step": 23790500 }, { "epoch": 68.87, "learning_rate": 1.5579760443682848e-05, "loss": 1.8923, "step": 23791000 }, { "epoch": 68.87, "learning_rate": 1.557903679603557e-05, "loss": 1.9036, "step": 23791500 }, { "epoch": 68.87, "learning_rate": 1.5578313148388292e-05, "loss": 1.8973, "step": 23792000 }, { "epoch": 68.87, "learning_rate": 1.5577589500741018e-05, "loss": 1.911, "step": 23792500 }, { "epoch": 68.87, "learning_rate": 1.557686585309374e-05, "loss": 1.9148, "step": 23793000 }, { "epoch": 68.87, "learning_rate": 1.5576142205446462e-05, "loss": 1.9193, "step": 23793500 }, { "epoch": 68.87, "learning_rate": 1.5575418557799184e-05, "loss": 1.9183, "step": 23794000 }, { "epoch": 68.88, "learning_rate": 1.557469491015191e-05, "loss": 1.9012, "step": 23794500 }, { "epoch": 68.88, "learning_rate": 1.557397270979993e-05, "loss": 1.913, "step": 23795000 }, { "epoch": 68.88, "learning_rate": 1.557324906215265e-05, "loss": 1.8909, "step": 23795500 }, { "epoch": 68.88, "learning_rate": 1.5572525414505373e-05, "loss": 1.9005, "step": 23796000 }, { "epoch": 68.88, "learning_rate": 1.5571801766858096e-05, "loss": 1.9211, "step": 23796500 }, { "epoch": 68.88, "learning_rate": 1.5571078119210818e-05, "loss": 1.9173, "step": 23797000 }, { "epoch": 68.88, "learning_rate": 1.5570354471563543e-05, "loss": 1.9012, "step": 23797500 }, { "epoch": 68.89, "learning_rate": 1.5569630823916266e-05, "loss": 1.8825, "step": 23798000 }, { "epoch": 68.89, "learning_rate": 1.556890717626899e-05, "loss": 1.8888, "step": 23798500 }, { "epoch": 68.89, "learning_rate": 1.5568183528621714e-05, "loss": 1.9076, "step": 23799000 }, { "epoch": 68.89, "learning_rate": 1.5567459880974436e-05, "loss": 1.9035, "step": 23799500 }, { "epoch": 68.89, "learning_rate": 1.5566736233327158e-05, "loss": 1.9155, "step": 23800000 }, { "epoch": 68.89, "learning_rate": 1.556601258567988e-05, "loss": 1.8915, "step": 23800500 }, { "epoch": 68.89, "learning_rate": 1.55652903853279e-05, "loss": 1.898, "step": 23801000 }, { "epoch": 68.9, "learning_rate": 1.556456673768062e-05, "loss": 1.9136, "step": 23801500 }, { "epoch": 68.9, "learning_rate": 1.5563843090033347e-05, "loss": 1.9049, "step": 23802000 }, { "epoch": 68.9, "learning_rate": 1.5563120889681366e-05, "loss": 1.9265, "step": 23802500 }, { "epoch": 68.9, "learning_rate": 1.5562397242034088e-05, "loss": 1.875, "step": 23803000 }, { "epoch": 68.9, "learning_rate": 1.556167359438681e-05, "loss": 1.9114, "step": 23803500 }, { "epoch": 68.9, "learning_rate": 1.5560949946739533e-05, "loss": 1.901, "step": 23804000 }, { "epoch": 68.9, "learning_rate": 1.556022774638755e-05, "loss": 1.8784, "step": 23804500 }, { "epoch": 68.91, "learning_rate": 1.5559504098740274e-05, "loss": 1.8951, "step": 23805000 }, { "epoch": 68.91, "learning_rate": 1.5558780451092996e-05, "loss": 1.8864, "step": 23805500 }, { "epoch": 68.91, "learning_rate": 1.5558056803445722e-05, "loss": 1.8791, "step": 23806000 }, { "epoch": 68.91, "learning_rate": 1.5557333155798444e-05, "loss": 1.9373, "step": 23806500 }, { "epoch": 68.91, "learning_rate": 1.555660950815117e-05, "loss": 1.9207, "step": 23807000 }, { "epoch": 68.91, "learning_rate": 1.5555885860503892e-05, "loss": 1.9095, "step": 23807500 }, { "epoch": 68.91, "learning_rate": 1.5555162212856614e-05, "loss": 1.8946, "step": 23808000 }, { "epoch": 68.92, "learning_rate": 1.5554438565209336e-05, "loss": 1.9002, "step": 23808500 }, { "epoch": 68.92, "learning_rate": 1.555371491756206e-05, "loss": 1.9122, "step": 23809000 }, { "epoch": 68.92, "learning_rate": 1.5552991269914784e-05, "loss": 1.8933, "step": 23809500 }, { "epoch": 68.92, "learning_rate": 1.5552267622267506e-05, "loss": 1.916, "step": 23810000 }, { "epoch": 68.92, "learning_rate": 1.555154686921082e-05, "loss": 1.8875, "step": 23810500 }, { "epoch": 68.92, "learning_rate": 1.5550823221563544e-05, "loss": 1.9239, "step": 23811000 }, { "epoch": 68.92, "learning_rate": 1.5550099573916266e-05, "loss": 1.8893, "step": 23811500 }, { "epoch": 68.93, "learning_rate": 1.554937592626899e-05, "loss": 1.8811, "step": 23812000 }, { "epoch": 68.93, "learning_rate": 1.554865227862171e-05, "loss": 1.9142, "step": 23812500 }, { "epoch": 68.93, "learning_rate": 1.5547928630974437e-05, "loss": 1.9124, "step": 23813000 }, { "epoch": 68.93, "learning_rate": 1.5547206430622456e-05, "loss": 1.8878, "step": 23813500 }, { "epoch": 68.93, "learning_rate": 1.5546482782975178e-05, "loss": 1.914, "step": 23814000 }, { "epoch": 68.93, "learning_rate": 1.55457591353279e-05, "loss": 1.9286, "step": 23814500 }, { "epoch": 68.93, "learning_rate": 1.5545035487680622e-05, "loss": 1.9069, "step": 23815000 }, { "epoch": 68.94, "learning_rate": 1.5544311840033344e-05, "loss": 1.8986, "step": 23815500 }, { "epoch": 68.94, "learning_rate": 1.5543589639681363e-05, "loss": 1.9074, "step": 23816000 }, { "epoch": 68.94, "learning_rate": 1.5542867439329382e-05, "loss": 1.9248, "step": 23816500 }, { "epoch": 68.94, "learning_rate": 1.5542143791682105e-05, "loss": 1.9061, "step": 23817000 }, { "epoch": 68.94, "learning_rate": 1.554142014403483e-05, "loss": 1.9141, "step": 23817500 }, { "epoch": 68.94, "learning_rate": 1.5540696496387552e-05, "loss": 1.8846, "step": 23818000 }, { "epoch": 68.94, "learning_rate": 1.5539972848740275e-05, "loss": 1.9271, "step": 23818500 }, { "epoch": 68.95, "learning_rate": 1.5539250648388294e-05, "loss": 1.9077, "step": 23819000 }, { "epoch": 68.95, "learning_rate": 1.5538527000741016e-05, "loss": 1.921, "step": 23819500 }, { "epoch": 68.95, "learning_rate": 1.5537803353093738e-05, "loss": 1.9085, "step": 23820000 }, { "epoch": 68.95, "learning_rate": 1.553707970544646e-05, "loss": 1.8921, "step": 23820500 }, { "epoch": 68.95, "learning_rate": 1.5536356057799186e-05, "loss": 1.9004, "step": 23821000 }, { "epoch": 68.95, "learning_rate": 1.5535632410151908e-05, "loss": 1.8992, "step": 23821500 }, { "epoch": 68.95, "learning_rate": 1.5534908762504634e-05, "loss": 1.9099, "step": 23822000 }, { "epoch": 68.96, "learning_rate": 1.553418656215265e-05, "loss": 1.9204, "step": 23822500 }, { "epoch": 68.96, "learning_rate": 1.553346291450537e-05, "loss": 1.9144, "step": 23823000 }, { "epoch": 68.96, "learning_rate": 1.5532739266858097e-05, "loss": 1.9105, "step": 23823500 }, { "epoch": 68.96, "learning_rate": 1.553201561921082e-05, "loss": 1.9327, "step": 23824000 }, { "epoch": 68.96, "learning_rate": 1.5531293418858835e-05, "loss": 1.909, "step": 23824500 }, { "epoch": 68.96, "learning_rate": 1.553056977121156e-05, "loss": 1.9092, "step": 23825000 }, { "epoch": 68.97, "learning_rate": 1.5529846123564283e-05, "loss": 1.9081, "step": 23825500 }, { "epoch": 68.97, "learning_rate": 1.552912247591701e-05, "loss": 1.8992, "step": 23826000 }, { "epoch": 68.97, "learning_rate": 1.552839882826973e-05, "loss": 1.918, "step": 23826500 }, { "epoch": 68.97, "learning_rate": 1.5527676627917746e-05, "loss": 1.9161, "step": 23827000 }, { "epoch": 68.97, "learning_rate": 1.5526952980270472e-05, "loss": 1.8942, "step": 23827500 }, { "epoch": 68.97, "learning_rate": 1.5526229332623194e-05, "loss": 1.8872, "step": 23828000 }, { "epoch": 68.97, "learning_rate": 1.5525507132271213e-05, "loss": 1.9052, "step": 23828500 }, { "epoch": 68.98, "learning_rate": 1.5524783484623935e-05, "loss": 1.8732, "step": 23829000 }, { "epoch": 68.98, "learning_rate": 1.552405983697666e-05, "loss": 1.904, "step": 23829500 }, { "epoch": 68.98, "learning_rate": 1.5523336189329383e-05, "loss": 1.8986, "step": 23830000 }, { "epoch": 68.98, "learning_rate": 1.5522612541682105e-05, "loss": 1.8986, "step": 23830500 }, { "epoch": 68.98, "learning_rate": 1.5521888894034828e-05, "loss": 1.8921, "step": 23831000 }, { "epoch": 68.98, "learning_rate": 1.552116524638755e-05, "loss": 1.8855, "step": 23831500 }, { "epoch": 68.98, "learning_rate": 1.5520441598740275e-05, "loss": 1.8926, "step": 23832000 }, { "epoch": 68.99, "learning_rate": 1.5519717951092998e-05, "loss": 1.878, "step": 23832500 }, { "epoch": 68.99, "learning_rate": 1.5518994303445723e-05, "loss": 1.9402, "step": 23833000 }, { "epoch": 68.99, "learning_rate": 1.5518270655798446e-05, "loss": 1.9238, "step": 23833500 }, { "epoch": 68.99, "learning_rate": 1.5517547008151168e-05, "loss": 1.9156, "step": 23834000 }, { "epoch": 68.99, "learning_rate": 1.551682336050389e-05, "loss": 1.8885, "step": 23834500 }, { "epoch": 68.99, "learning_rate": 1.551610116015191e-05, "loss": 1.9015, "step": 23835000 }, { "epoch": 68.99, "learning_rate": 1.551537751250463e-05, "loss": 1.8786, "step": 23835500 }, { "epoch": 69.0, "learning_rate": 1.5514653864857357e-05, "loss": 1.9102, "step": 23836000 }, { "epoch": 69.0, "learning_rate": 1.551393021721008e-05, "loss": 1.892, "step": 23836500 }, { "epoch": 69.0, "learning_rate": 1.5513208016858098e-05, "loss": 1.8889, "step": 23837000 }, { "epoch": 69.0, "learning_rate": 1.551248436921082e-05, "loss": 1.8844, "step": 23837500 }, { "epoch": 69.0, "eval_accuracy": 0.6815025822821256, "eval_accuracy_mlm": 0.6492867280002595, "eval_accuracy_nsp": 0.8544110031027576, "eval_loss": 2.1765613555908203, "eval_runtime": 331.9311, "eval_samples_per_second": 1314.689, "eval_steps_per_second": 54.779, "step": 23837568 }, { "epoch": 69.0, "learning_rate": 1.5511760721563542e-05, "loss": 1.8976, "step": 23838000 }, { "epoch": 69.0, "learning_rate": 1.551103852121156e-05, "loss": 1.8852, "step": 23838500 }, { "epoch": 69.0, "learning_rate": 1.5510314873564284e-05, "loss": 1.8933, "step": 23839000 }, { "epoch": 69.01, "learning_rate": 1.550959122591701e-05, "loss": 1.8981, "step": 23839500 }, { "epoch": 69.01, "learning_rate": 1.550886757826973e-05, "loss": 1.8949, "step": 23840000 }, { "epoch": 69.01, "learning_rate": 1.5508143930622454e-05, "loss": 1.8782, "step": 23840500 }, { "epoch": 69.01, "learning_rate": 1.5507420282975176e-05, "loss": 1.8982, "step": 23841000 }, { "epoch": 69.01, "learning_rate": 1.5506696635327898e-05, "loss": 1.8801, "step": 23841500 }, { "epoch": 69.01, "learning_rate": 1.5505972987680624e-05, "loss": 1.8719, "step": 23842000 }, { "epoch": 69.01, "learning_rate": 1.550525078732864e-05, "loss": 1.8809, "step": 23842500 }, { "epoch": 69.02, "learning_rate": 1.550452713968136e-05, "loss": 1.8885, "step": 23843000 }, { "epoch": 69.02, "learning_rate": 1.5503804939329384e-05, "loss": 1.8936, "step": 23843500 }, { "epoch": 69.02, "learning_rate": 1.5503081291682106e-05, "loss": 1.8799, "step": 23844000 }, { "epoch": 69.02, "learning_rate": 1.5502359091330125e-05, "loss": 1.8921, "step": 23844500 }, { "epoch": 69.02, "learning_rate": 1.5501635443682847e-05, "loss": 1.8705, "step": 23845000 }, { "epoch": 69.02, "learning_rate": 1.550091179603557e-05, "loss": 1.8794, "step": 23845500 }, { "epoch": 69.02, "learning_rate": 1.550018959568359e-05, "loss": 1.8875, "step": 23846000 }, { "epoch": 69.03, "learning_rate": 1.549946594803631e-05, "loss": 1.9179, "step": 23846500 }, { "epoch": 69.03, "learning_rate": 1.5498742300389033e-05, "loss": 1.9024, "step": 23847000 }, { "epoch": 69.03, "learning_rate": 1.549801865274176e-05, "loss": 1.8701, "step": 23847500 }, { "epoch": 69.03, "learning_rate": 1.549729500509448e-05, "loss": 1.878, "step": 23848000 }, { "epoch": 69.03, "learning_rate": 1.5496571357447203e-05, "loss": 1.869, "step": 23848500 }, { "epoch": 69.03, "learning_rate": 1.5495847709799925e-05, "loss": 1.88, "step": 23849000 }, { "epoch": 69.03, "learning_rate": 1.549512406215265e-05, "loss": 1.8964, "step": 23849500 }, { "epoch": 69.04, "learning_rate": 1.5494400414505373e-05, "loss": 1.8856, "step": 23850000 }, { "epoch": 69.04, "learning_rate": 1.549367821415339e-05, "loss": 1.8967, "step": 23850500 }, { "epoch": 69.04, "learning_rate": 1.5492954566506114e-05, "loss": 1.8843, "step": 23851000 }, { "epoch": 69.04, "learning_rate": 1.5492230918858837e-05, "loss": 1.9164, "step": 23851500 }, { "epoch": 69.04, "learning_rate": 1.5491507271211562e-05, "loss": 1.8882, "step": 23852000 }, { "epoch": 69.04, "learning_rate": 1.5490783623564284e-05, "loss": 1.8948, "step": 23852500 }, { "epoch": 69.04, "learning_rate": 1.54900614232123e-05, "loss": 1.8751, "step": 23853000 }, { "epoch": 69.05, "learning_rate": 1.5489337775565026e-05, "loss": 1.885, "step": 23853500 }, { "epoch": 69.05, "learning_rate": 1.5488614127917748e-05, "loss": 1.877, "step": 23854000 }, { "epoch": 69.05, "learning_rate": 1.548789048027047e-05, "loss": 1.8815, "step": 23854500 }, { "epoch": 69.05, "learning_rate": 1.5487166832623196e-05, "loss": 1.8727, "step": 23855000 }, { "epoch": 69.05, "learning_rate": 1.5486444632271215e-05, "loss": 1.892, "step": 23855500 }, { "epoch": 69.05, "learning_rate": 1.5485720984623937e-05, "loss": 1.8703, "step": 23856000 }, { "epoch": 69.05, "learning_rate": 1.548499733697666e-05, "loss": 1.8909, "step": 23856500 }, { "epoch": 69.06, "learning_rate": 1.548427368932938e-05, "loss": 1.8757, "step": 23857000 }, { "epoch": 69.06, "learning_rate": 1.5483550041682104e-05, "loss": 1.8917, "step": 23857500 }, { "epoch": 69.06, "learning_rate": 1.5482826394034826e-05, "loss": 1.8812, "step": 23858000 }, { "epoch": 69.06, "learning_rate": 1.548210274638755e-05, "loss": 1.904, "step": 23858500 }, { "epoch": 69.06, "learning_rate": 1.5481379098740277e-05, "loss": 1.8844, "step": 23859000 }, { "epoch": 69.06, "learning_rate": 1.5480655451093e-05, "loss": 1.8795, "step": 23859500 }, { "epoch": 69.06, "learning_rate": 1.547993180344572e-05, "loss": 1.8889, "step": 23860000 }, { "epoch": 69.07, "learning_rate": 1.5479208155798444e-05, "loss": 1.8985, "step": 23860500 }, { "epoch": 69.07, "learning_rate": 1.5478485955446463e-05, "loss": 1.8937, "step": 23861000 }, { "epoch": 69.07, "learning_rate": 1.5477762307799185e-05, "loss": 1.8938, "step": 23861500 }, { "epoch": 69.07, "learning_rate": 1.547703866015191e-05, "loss": 1.8855, "step": 23862000 }, { "epoch": 69.07, "learning_rate": 1.5476315012504633e-05, "loss": 1.8968, "step": 23862500 }, { "epoch": 69.07, "learning_rate": 1.5475591364857355e-05, "loss": 1.8979, "step": 23863000 }, { "epoch": 69.08, "learning_rate": 1.5474867717210077e-05, "loss": 1.8764, "step": 23863500 }, { "epoch": 69.08, "learning_rate": 1.5474144069562803e-05, "loss": 1.8921, "step": 23864000 }, { "epoch": 69.08, "learning_rate": 1.5473420421915525e-05, "loss": 1.8726, "step": 23864500 }, { "epoch": 69.08, "learning_rate": 1.547269822156354e-05, "loss": 1.8815, "step": 23865000 }, { "epoch": 69.08, "learning_rate": 1.5471974573916266e-05, "loss": 1.9009, "step": 23865500 }, { "epoch": 69.08, "learning_rate": 1.5471252373564285e-05, "loss": 1.8816, "step": 23866000 }, { "epoch": 69.08, "learning_rate": 1.5470528725917007e-05, "loss": 1.8879, "step": 23866500 }, { "epoch": 69.09, "learning_rate": 1.546980507826973e-05, "loss": 1.9122, "step": 23867000 }, { "epoch": 69.09, "learning_rate": 1.5469081430622452e-05, "loss": 1.8788, "step": 23867500 }, { "epoch": 69.09, "learning_rate": 1.546835923027047e-05, "loss": 1.8984, "step": 23868000 }, { "epoch": 69.09, "learning_rate": 1.5467635582623193e-05, "loss": 1.8873, "step": 23868500 }, { "epoch": 69.09, "learning_rate": 1.5466914829566505e-05, "loss": 1.8761, "step": 23869000 }, { "epoch": 69.09, "learning_rate": 1.5466191181919228e-05, "loss": 1.8949, "step": 23869500 }, { "epoch": 69.09, "learning_rate": 1.5465467534271953e-05, "loss": 1.883, "step": 23870000 }, { "epoch": 69.1, "learning_rate": 1.546474388662468e-05, "loss": 1.8866, "step": 23870500 }, { "epoch": 69.1, "learning_rate": 1.54640202389774e-05, "loss": 1.8784, "step": 23871000 }, { "epoch": 69.1, "learning_rate": 1.5463298038625417e-05, "loss": 1.8986, "step": 23871500 }, { "epoch": 69.1, "learning_rate": 1.5462574390978142e-05, "loss": 1.877, "step": 23872000 }, { "epoch": 69.1, "learning_rate": 1.5461850743330864e-05, "loss": 1.869, "step": 23872500 }, { "epoch": 69.1, "learning_rate": 1.546112854297888e-05, "loss": 1.8981, "step": 23873000 }, { "epoch": 69.1, "learning_rate": 1.5460404895331602e-05, "loss": 1.8781, "step": 23873500 }, { "epoch": 69.11, "learning_rate": 1.5459681247684328e-05, "loss": 1.8813, "step": 23874000 }, { "epoch": 69.11, "learning_rate": 1.5458957600037054e-05, "loss": 1.8922, "step": 23874500 }, { "epoch": 69.11, "learning_rate": 1.5458233952389776e-05, "loss": 1.8623, "step": 23875000 }, { "epoch": 69.11, "learning_rate": 1.5457510304742498e-05, "loss": 1.9103, "step": 23875500 }, { "epoch": 69.11, "learning_rate": 1.545678665709522e-05, "loss": 1.8775, "step": 23876000 }, { "epoch": 69.11, "learning_rate": 1.5456063009447942e-05, "loss": 1.8624, "step": 23876500 }, { "epoch": 69.11, "learning_rate": 1.5455339361800668e-05, "loss": 1.896, "step": 23877000 }, { "epoch": 69.12, "learning_rate": 1.5454615714153394e-05, "loss": 1.9011, "step": 23877500 }, { "epoch": 69.12, "learning_rate": 1.5453892066506116e-05, "loss": 1.8859, "step": 23878000 }, { "epoch": 69.12, "learning_rate": 1.5453168418858838e-05, "loss": 1.8894, "step": 23878500 }, { "epoch": 69.12, "learning_rate": 1.545244477121156e-05, "loss": 1.8968, "step": 23879000 }, { "epoch": 69.12, "learning_rate": 1.5451721123564283e-05, "loss": 1.8874, "step": 23879500 }, { "epoch": 69.12, "learning_rate": 1.54509989232123e-05, "loss": 1.8839, "step": 23880000 }, { "epoch": 69.12, "learning_rate": 1.5450275275565024e-05, "loss": 1.9097, "step": 23880500 }, { "epoch": 69.13, "learning_rate": 1.544955162791775e-05, "loss": 1.9236, "step": 23881000 }, { "epoch": 69.13, "learning_rate": 1.544882798027047e-05, "loss": 1.8989, "step": 23881500 }, { "epoch": 69.13, "learning_rate": 1.5448104332623194e-05, "loss": 1.9063, "step": 23882000 }, { "epoch": 69.13, "learning_rate": 1.544738068497592e-05, "loss": 1.8979, "step": 23882500 }, { "epoch": 69.13, "learning_rate": 1.5446657037328642e-05, "loss": 1.8598, "step": 23883000 }, { "epoch": 69.13, "learning_rate": 1.5445934836976657e-05, "loss": 1.8834, "step": 23883500 }, { "epoch": 69.13, "learning_rate": 1.544521118932938e-05, "loss": 1.8941, "step": 23884000 }, { "epoch": 69.14, "learning_rate": 1.5444487541682105e-05, "loss": 1.8797, "step": 23884500 }, { "epoch": 69.14, "learning_rate": 1.544376389403483e-05, "loss": 1.8565, "step": 23885000 }, { "epoch": 69.14, "learning_rate": 1.5443040246387553e-05, "loss": 1.8824, "step": 23885500 }, { "epoch": 69.14, "learning_rate": 1.5442316598740275e-05, "loss": 1.8815, "step": 23886000 }, { "epoch": 69.14, "learning_rate": 1.5441592951092997e-05, "loss": 1.8857, "step": 23886500 }, { "epoch": 69.14, "learning_rate": 1.5440870750741016e-05, "loss": 1.9049, "step": 23887000 }, { "epoch": 69.14, "learning_rate": 1.544014710309374e-05, "loss": 1.9041, "step": 23887500 }, { "epoch": 69.15, "learning_rate": 1.543942345544646e-05, "loss": 1.8855, "step": 23888000 }, { "epoch": 69.15, "learning_rate": 1.5438699807799187e-05, "loss": 1.9028, "step": 23888500 }, { "epoch": 69.15, "learning_rate": 1.5437977607447205e-05, "loss": 1.8986, "step": 23889000 }, { "epoch": 69.15, "learning_rate": 1.5437253959799928e-05, "loss": 1.897, "step": 23889500 }, { "epoch": 69.15, "learning_rate": 1.543653031215265e-05, "loss": 1.9049, "step": 23890000 }, { "epoch": 69.15, "learning_rate": 1.5435806664505372e-05, "loss": 1.8943, "step": 23890500 }, { "epoch": 69.15, "learning_rate": 1.5435083016858094e-05, "loss": 1.8953, "step": 23891000 }, { "epoch": 69.16, "learning_rate": 1.543435936921082e-05, "loss": 1.8938, "step": 23891500 }, { "epoch": 69.16, "learning_rate": 1.5433635721563546e-05, "loss": 1.881, "step": 23892000 }, { "epoch": 69.16, "learning_rate": 1.5432912073916268e-05, "loss": 1.9108, "step": 23892500 }, { "epoch": 69.16, "learning_rate": 1.543218842626899e-05, "loss": 1.9032, "step": 23893000 }, { "epoch": 69.16, "learning_rate": 1.5431464778621712e-05, "loss": 1.9056, "step": 23893500 }, { "epoch": 69.16, "learning_rate": 1.5430741130974435e-05, "loss": 1.8675, "step": 23894000 }, { "epoch": 69.16, "learning_rate": 1.5430017483327157e-05, "loss": 1.8695, "step": 23894500 }, { "epoch": 69.17, "learning_rate": 1.5429293835679882e-05, "loss": 1.8728, "step": 23895000 }, { "epoch": 69.17, "learning_rate": 1.5428571635327898e-05, "loss": 1.8954, "step": 23895500 }, { "epoch": 69.17, "learning_rate": 1.5427847987680624e-05, "loss": 1.8888, "step": 23896000 }, { "epoch": 69.17, "learning_rate": 1.5427124340033346e-05, "loss": 1.886, "step": 23896500 }, { "epoch": 69.17, "learning_rate": 1.542640069238607e-05, "loss": 1.8999, "step": 23897000 }, { "epoch": 69.17, "learning_rate": 1.5425678492034087e-05, "loss": 1.88, "step": 23897500 }, { "epoch": 69.17, "learning_rate": 1.542495484438681e-05, "loss": 1.8946, "step": 23898000 }, { "epoch": 69.18, "learning_rate": 1.542423119673953e-05, "loss": 1.8825, "step": 23898500 }, { "epoch": 69.18, "learning_rate": 1.5423507549092257e-05, "loss": 1.8965, "step": 23899000 }, { "epoch": 69.18, "learning_rate": 1.5422785348740276e-05, "loss": 1.9141, "step": 23899500 }, { "epoch": 69.18, "learning_rate": 1.5422061701092998e-05, "loss": 1.8788, "step": 23900000 }, { "epoch": 69.18, "learning_rate": 1.5421339500741017e-05, "loss": 1.91, "step": 23900500 }, { "epoch": 69.18, "learning_rate": 1.542061585309374e-05, "loss": 1.8832, "step": 23901000 }, { "epoch": 69.19, "learning_rate": 1.541989220544646e-05, "loss": 1.897, "step": 23901500 }, { "epoch": 69.19, "learning_rate": 1.5419168557799184e-05, "loss": 1.8895, "step": 23902000 }, { "epoch": 69.19, "learning_rate": 1.541844491015191e-05, "loss": 1.8808, "step": 23902500 }, { "epoch": 69.19, "learning_rate": 1.5417721262504632e-05, "loss": 1.8937, "step": 23903000 }, { "epoch": 69.19, "learning_rate": 1.541699906215265e-05, "loss": 1.8886, "step": 23903500 }, { "epoch": 69.19, "learning_rate": 1.5416275414505373e-05, "loss": 1.8704, "step": 23904000 }, { "epoch": 69.19, "learning_rate": 1.5415551766858095e-05, "loss": 1.8884, "step": 23904500 }, { "epoch": 69.2, "learning_rate": 1.541482811921082e-05, "loss": 1.8821, "step": 23905000 }, { "epoch": 69.2, "learning_rate": 1.5414104471563543e-05, "loss": 1.9003, "step": 23905500 }, { "epoch": 69.2, "learning_rate": 1.541338227121156e-05, "loss": 1.8795, "step": 23906000 }, { "epoch": 69.2, "learning_rate": 1.5412658623564284e-05, "loss": 1.8958, "step": 23906500 }, { "epoch": 69.2, "learning_rate": 1.54119364232123e-05, "loss": 1.8676, "step": 23907000 }, { "epoch": 69.2, "learning_rate": 1.5411212775565025e-05, "loss": 1.8893, "step": 23907500 }, { "epoch": 69.2, "learning_rate": 1.5410489127917748e-05, "loss": 1.8939, "step": 23908000 }, { "epoch": 69.21, "learning_rate": 1.5409765480270473e-05, "loss": 1.8924, "step": 23908500 }, { "epoch": 69.21, "learning_rate": 1.5409041832623195e-05, "loss": 1.9056, "step": 23909000 }, { "epoch": 69.21, "learning_rate": 1.5408318184975918e-05, "loss": 1.8835, "step": 23909500 }, { "epoch": 69.21, "learning_rate": 1.540759453732864e-05, "loss": 1.8864, "step": 23910000 }, { "epoch": 69.21, "learning_rate": 1.5406870889681362e-05, "loss": 1.9014, "step": 23910500 }, { "epoch": 69.21, "learning_rate": 1.5406148689329385e-05, "loss": 1.8866, "step": 23911000 }, { "epoch": 69.21, "learning_rate": 1.5405425041682107e-05, "loss": 1.887, "step": 23911500 }, { "epoch": 69.22, "learning_rate": 1.540470139403483e-05, "loss": 1.9021, "step": 23912000 }, { "epoch": 69.22, "learning_rate": 1.540397774638755e-05, "loss": 1.8814, "step": 23912500 }, { "epoch": 69.22, "learning_rate": 1.5403254098740273e-05, "loss": 1.8769, "step": 23913000 }, { "epoch": 69.22, "learning_rate": 1.5402530451093e-05, "loss": 1.8974, "step": 23913500 }, { "epoch": 69.22, "learning_rate": 1.540180680344572e-05, "loss": 1.8929, "step": 23914000 }, { "epoch": 69.22, "learning_rate": 1.5401083155798447e-05, "loss": 1.8912, "step": 23914500 }, { "epoch": 69.22, "learning_rate": 1.540035950815117e-05, "loss": 1.8782, "step": 23915000 }, { "epoch": 69.23, "learning_rate": 1.539963586050389e-05, "loss": 1.8814, "step": 23915500 }, { "epoch": 69.23, "learning_rate": 1.5398912212856614e-05, "loss": 1.9097, "step": 23916000 }, { "epoch": 69.23, "learning_rate": 1.5398188565209336e-05, "loss": 1.888, "step": 23916500 }, { "epoch": 69.23, "learning_rate": 1.5397466364857355e-05, "loss": 1.8961, "step": 23917000 }, { "epoch": 69.23, "learning_rate": 1.5396742717210077e-05, "loss": 1.8871, "step": 23917500 }, { "epoch": 69.23, "learning_rate": 1.5396019069562803e-05, "loss": 1.8972, "step": 23918000 }, { "epoch": 69.23, "learning_rate": 1.539529686921082e-05, "loss": 1.8953, "step": 23918500 }, { "epoch": 69.24, "learning_rate": 1.5394573221563544e-05, "loss": 1.9074, "step": 23919000 }, { "epoch": 69.24, "learning_rate": 1.5393849573916266e-05, "loss": 1.8792, "step": 23919500 }, { "epoch": 69.24, "learning_rate": 1.539312592626899e-05, "loss": 1.9162, "step": 23920000 }, { "epoch": 69.24, "learning_rate": 1.539240227862171e-05, "loss": 1.87, "step": 23920500 }, { "epoch": 69.24, "learning_rate": 1.5391678630974436e-05, "loss": 1.8887, "step": 23921000 }, { "epoch": 69.24, "learning_rate": 1.539095498332716e-05, "loss": 1.9067, "step": 23921500 }, { "epoch": 69.24, "learning_rate": 1.5390231335679884e-05, "loss": 1.8896, "step": 23922000 }, { "epoch": 69.25, "learning_rate": 1.5389507688032606e-05, "loss": 1.8813, "step": 23922500 }, { "epoch": 69.25, "learning_rate": 1.538878404038533e-05, "loss": 1.8858, "step": 23923000 }, { "epoch": 69.25, "learning_rate": 1.538806039273805e-05, "loss": 1.8903, "step": 23923500 }, { "epoch": 69.25, "learning_rate": 1.5387336745090773e-05, "loss": 1.8903, "step": 23924000 }, { "epoch": 69.25, "learning_rate": 1.53866130974435e-05, "loss": 1.9156, "step": 23924500 }, { "epoch": 69.25, "learning_rate": 1.538588944979622e-05, "loss": 1.8812, "step": 23925000 }, { "epoch": 69.25, "learning_rate": 1.5385165802148946e-05, "loss": 1.8676, "step": 23925500 }, { "epoch": 69.26, "learning_rate": 1.5384443601796962e-05, "loss": 1.8776, "step": 23926000 }, { "epoch": 69.26, "learning_rate": 1.5383719954149688e-05, "loss": 1.9184, "step": 23926500 }, { "epoch": 69.26, "learning_rate": 1.5382997753797703e-05, "loss": 1.8897, "step": 23927000 }, { "epoch": 69.26, "learning_rate": 1.5382274106150425e-05, "loss": 1.9201, "step": 23927500 }, { "epoch": 69.26, "learning_rate": 1.538155045850315e-05, "loss": 1.8877, "step": 23928000 }, { "epoch": 69.26, "learning_rate": 1.5380826810855873e-05, "loss": 1.8928, "step": 23928500 }, { "epoch": 69.26, "learning_rate": 1.53801031632086e-05, "loss": 1.8876, "step": 23929000 }, { "epoch": 69.27, "learning_rate": 1.5379380962856614e-05, "loss": 1.8829, "step": 23929500 }, { "epoch": 69.27, "learning_rate": 1.5378657315209337e-05, "loss": 1.8676, "step": 23930000 }, { "epoch": 69.27, "learning_rate": 1.5377933667562062e-05, "loss": 1.8943, "step": 23930500 }, { "epoch": 69.27, "learning_rate": 1.5377210019914785e-05, "loss": 1.9076, "step": 23931000 }, { "epoch": 69.27, "learning_rate": 1.5376486372267507e-05, "loss": 1.9125, "step": 23931500 }, { "epoch": 69.27, "learning_rate": 1.537576272462023e-05, "loss": 1.8755, "step": 23932000 }, { "epoch": 69.27, "learning_rate": 1.537503907697295e-05, "loss": 1.8918, "step": 23932500 }, { "epoch": 69.28, "learning_rate": 1.5374315429325677e-05, "loss": 1.9097, "step": 23933000 }, { "epoch": 69.28, "learning_rate": 1.537359467626899e-05, "loss": 1.8968, "step": 23933500 }, { "epoch": 69.28, "learning_rate": 1.5372871028621715e-05, "loss": 1.9317, "step": 23934000 }, { "epoch": 69.28, "learning_rate": 1.5372147380974437e-05, "loss": 1.8821, "step": 23934500 }, { "epoch": 69.28, "learning_rate": 1.537142373332716e-05, "loss": 1.8926, "step": 23935000 }, { "epoch": 69.28, "learning_rate": 1.537070008567988e-05, "loss": 1.8837, "step": 23935500 }, { "epoch": 69.28, "learning_rate": 1.5369976438032604e-05, "loss": 1.8657, "step": 23936000 }, { "epoch": 69.29, "learning_rate": 1.5369254237680623e-05, "loss": 1.8972, "step": 23936500 }, { "epoch": 69.29, "learning_rate": 1.5368530590033348e-05, "loss": 1.8762, "step": 23937000 }, { "epoch": 69.29, "learning_rate": 1.536780694238607e-05, "loss": 1.869, "step": 23937500 }, { "epoch": 69.29, "learning_rate": 1.5367083294738793e-05, "loss": 1.8849, "step": 23938000 }, { "epoch": 69.29, "learning_rate": 1.5366359647091515e-05, "loss": 1.9192, "step": 23938500 }, { "epoch": 69.29, "learning_rate": 1.536563599944424e-05, "loss": 1.8986, "step": 23939000 }, { "epoch": 69.3, "learning_rate": 1.5364912351796963e-05, "loss": 1.8782, "step": 23939500 }, { "epoch": 69.3, "learning_rate": 1.536419015144498e-05, "loss": 1.8921, "step": 23940000 }, { "epoch": 69.3, "learning_rate": 1.5363466503797704e-05, "loss": 1.8998, "step": 23940500 }, { "epoch": 69.3, "learning_rate": 1.5362744303445723e-05, "loss": 1.9039, "step": 23941000 }, { "epoch": 69.3, "learning_rate": 1.536202210309374e-05, "loss": 1.8792, "step": 23941500 }, { "epoch": 69.3, "learning_rate": 1.5361298455446464e-05, "loss": 1.8779, "step": 23942000 }, { "epoch": 69.3, "learning_rate": 1.5360574807799186e-05, "loss": 1.893, "step": 23942500 }, { "epoch": 69.31, "learning_rate": 1.535985116015191e-05, "loss": 1.8918, "step": 23943000 }, { "epoch": 69.31, "learning_rate": 1.535912751250463e-05, "loss": 1.8942, "step": 23943500 }, { "epoch": 69.31, "learning_rate": 1.5358403864857353e-05, "loss": 1.8918, "step": 23944000 }, { "epoch": 69.31, "learning_rate": 1.535768021721008e-05, "loss": 1.8518, "step": 23944500 }, { "epoch": 69.31, "learning_rate": 1.5356956569562804e-05, "loss": 1.9064, "step": 23945000 }, { "epoch": 69.31, "learning_rate": 1.5356232921915526e-05, "loss": 1.8869, "step": 23945500 }, { "epoch": 69.31, "learning_rate": 1.535550927426825e-05, "loss": 1.9138, "step": 23946000 }, { "epoch": 69.32, "learning_rate": 1.535478562662097e-05, "loss": 1.9061, "step": 23946500 }, { "epoch": 69.32, "learning_rate": 1.5354061978973693e-05, "loss": 1.8987, "step": 23947000 }, { "epoch": 69.32, "learning_rate": 1.5353338331326415e-05, "loss": 1.8907, "step": 23947500 }, { "epoch": 69.32, "learning_rate": 1.535261468367914e-05, "loss": 1.8787, "step": 23948000 }, { "epoch": 69.32, "learning_rate": 1.535189248332716e-05, "loss": 1.9104, "step": 23948500 }, { "epoch": 69.32, "learning_rate": 1.5351168835679882e-05, "loss": 1.8851, "step": 23949000 }, { "epoch": 69.32, "learning_rate": 1.5350445188032604e-05, "loss": 1.8943, "step": 23949500 }, { "epoch": 69.33, "learning_rate": 1.534972154038533e-05, "loss": 1.8777, "step": 23950000 }, { "epoch": 69.33, "learning_rate": 1.5348997892738052e-05, "loss": 1.8934, "step": 23950500 }, { "epoch": 69.33, "learning_rate": 1.5348274245090775e-05, "loss": 1.8882, "step": 23951000 }, { "epoch": 69.33, "learning_rate": 1.534755204473879e-05, "loss": 1.9071, "step": 23951500 }, { "epoch": 69.33, "learning_rate": 1.5346828397091516e-05, "loss": 1.8871, "step": 23952000 }, { "epoch": 69.33, "learning_rate": 1.534610474944424e-05, "loss": 1.9022, "step": 23952500 }, { "epoch": 69.33, "learning_rate": 1.5345381101796964e-05, "loss": 1.8904, "step": 23953000 }, { "epoch": 69.34, "learning_rate": 1.5344657454149686e-05, "loss": 1.8889, "step": 23953500 }, { "epoch": 69.34, "learning_rate": 1.5343936701092998e-05, "loss": 1.8703, "step": 23954000 }, { "epoch": 69.34, "learning_rate": 1.534321305344572e-05, "loss": 1.8937, "step": 23954500 }, { "epoch": 69.34, "learning_rate": 1.5342489405798443e-05, "loss": 1.8796, "step": 23955000 }, { "epoch": 69.34, "learning_rate": 1.5341765758151165e-05, "loss": 1.8977, "step": 23955500 }, { "epoch": 69.34, "learning_rate": 1.534104211050389e-05, "loss": 1.903, "step": 23956000 }, { "epoch": 69.34, "learning_rate": 1.5340318462856616e-05, "loss": 1.8931, "step": 23956500 }, { "epoch": 69.35, "learning_rate": 1.533959626250463e-05, "loss": 1.8603, "step": 23957000 }, { "epoch": 69.35, "learning_rate": 1.5338872614857354e-05, "loss": 1.9379, "step": 23957500 }, { "epoch": 69.35, "learning_rate": 1.533814896721008e-05, "loss": 1.8834, "step": 23958000 }, { "epoch": 69.35, "learning_rate": 1.53374253195628e-05, "loss": 1.8858, "step": 23958500 }, { "epoch": 69.35, "learning_rate": 1.5336701671915524e-05, "loss": 1.8999, "step": 23959000 }, { "epoch": 69.35, "learning_rate": 1.533597802426825e-05, "loss": 1.8966, "step": 23959500 }, { "epoch": 69.35, "learning_rate": 1.533525582391627e-05, "loss": 1.9139, "step": 23960000 }, { "epoch": 69.36, "learning_rate": 1.533453217626899e-05, "loss": 1.8847, "step": 23960500 }, { "epoch": 69.36, "learning_rate": 1.5333808528621713e-05, "loss": 1.8983, "step": 23961000 }, { "epoch": 69.36, "learning_rate": 1.5333084880974435e-05, "loss": 1.8934, "step": 23961500 }, { "epoch": 69.36, "learning_rate": 1.5332361233327157e-05, "loss": 1.8887, "step": 23962000 }, { "epoch": 69.36, "learning_rate": 1.533163758567988e-05, "loss": 1.914, "step": 23962500 }, { "epoch": 69.36, "learning_rate": 1.5330913938032605e-05, "loss": 1.8876, "step": 23963000 }, { "epoch": 69.36, "learning_rate": 1.533019029038533e-05, "loss": 1.9084, "step": 23963500 }, { "epoch": 69.37, "learning_rate": 1.5329468090033346e-05, "loss": 1.8798, "step": 23964000 }, { "epoch": 69.37, "learning_rate": 1.532874444238607e-05, "loss": 1.895, "step": 23964500 }, { "epoch": 69.37, "learning_rate": 1.5328020794738794e-05, "loss": 1.8742, "step": 23965000 }, { "epoch": 69.37, "learning_rate": 1.5327297147091517e-05, "loss": 1.9154, "step": 23965500 }, { "epoch": 69.37, "learning_rate": 1.532657349944424e-05, "loss": 1.8888, "step": 23966000 }, { "epoch": 69.37, "learning_rate": 1.532584985179696e-05, "loss": 1.9049, "step": 23966500 }, { "epoch": 69.37, "learning_rate": 1.5325126204149687e-05, "loss": 1.8982, "step": 23967000 }, { "epoch": 69.38, "learning_rate": 1.532440255650241e-05, "loss": 1.907, "step": 23967500 }, { "epoch": 69.38, "learning_rate": 1.5323680356150428e-05, "loss": 1.9039, "step": 23968000 }, { "epoch": 69.38, "learning_rate": 1.532295670850315e-05, "loss": 1.9242, "step": 23968500 }, { "epoch": 69.38, "learning_rate": 1.5322233060855872e-05, "loss": 1.8814, "step": 23969000 }, { "epoch": 69.38, "learning_rate": 1.5321509413208594e-05, "loss": 1.87, "step": 23969500 }, { "epoch": 69.38, "learning_rate": 1.532078576556132e-05, "loss": 1.8962, "step": 23970000 }, { "epoch": 69.38, "learning_rate": 1.5320062117914046e-05, "loss": 1.8976, "step": 23970500 }, { "epoch": 69.39, "learning_rate": 1.5319338470266768e-05, "loss": 1.9198, "step": 23971000 }, { "epoch": 69.39, "learning_rate": 1.531861482261949e-05, "loss": 1.8822, "step": 23971500 }, { "epoch": 69.39, "learning_rate": 1.5317892622267506e-05, "loss": 1.8847, "step": 23972000 }, { "epoch": 69.39, "learning_rate": 1.531716897462023e-05, "loss": 1.9125, "step": 23972500 }, { "epoch": 69.39, "learning_rate": 1.5316445326972954e-05, "loss": 1.9016, "step": 23973000 }, { "epoch": 69.39, "learning_rate": 1.5315721679325676e-05, "loss": 1.8889, "step": 23973500 }, { "epoch": 69.39, "learning_rate": 1.5314999478973695e-05, "loss": 1.8986, "step": 23974000 }, { "epoch": 69.4, "learning_rate": 1.531427583132642e-05, "loss": 1.9015, "step": 23974500 }, { "epoch": 69.4, "learning_rate": 1.5313552183679143e-05, "loss": 1.8812, "step": 23975000 }, { "epoch": 69.4, "learning_rate": 1.5312828536031865e-05, "loss": 1.8848, "step": 23975500 }, { "epoch": 69.4, "learning_rate": 1.5312106335679884e-05, "loss": 1.8881, "step": 23976000 }, { "epoch": 69.4, "learning_rate": 1.5311382688032606e-05, "loss": 1.8875, "step": 23976500 }, { "epoch": 69.4, "learning_rate": 1.5310659040385328e-05, "loss": 1.9218, "step": 23977000 }, { "epoch": 69.41, "learning_rate": 1.530993539273805e-05, "loss": 1.9074, "step": 23977500 }, { "epoch": 69.41, "learning_rate": 1.5309211745090776e-05, "loss": 1.9166, "step": 23978000 }, { "epoch": 69.41, "learning_rate": 1.53084880974435e-05, "loss": 1.873, "step": 23978500 }, { "epoch": 69.41, "learning_rate": 1.5307765897091517e-05, "loss": 1.8742, "step": 23979000 }, { "epoch": 69.41, "learning_rate": 1.530704224944424e-05, "loss": 1.8771, "step": 23979500 }, { "epoch": 69.41, "learning_rate": 1.5306318601796962e-05, "loss": 1.8789, "step": 23980000 }, { "epoch": 69.41, "learning_rate": 1.5305594954149684e-05, "loss": 1.8823, "step": 23980500 }, { "epoch": 69.42, "learning_rate": 1.530487130650241e-05, "loss": 1.8789, "step": 23981000 }, { "epoch": 69.42, "learning_rate": 1.5304147658855135e-05, "loss": 1.8801, "step": 23981500 }, { "epoch": 69.42, "learning_rate": 1.5303424011207857e-05, "loss": 1.8957, "step": 23982000 }, { "epoch": 69.42, "learning_rate": 1.5302701810855873e-05, "loss": 1.9004, "step": 23982500 }, { "epoch": 69.42, "learning_rate": 1.5301979610503892e-05, "loss": 1.9338, "step": 23983000 }, { "epoch": 69.42, "learning_rate": 1.5301255962856614e-05, "loss": 1.8866, "step": 23983500 }, { "epoch": 69.42, "learning_rate": 1.5300532315209336e-05, "loss": 1.9106, "step": 23984000 }, { "epoch": 69.43, "learning_rate": 1.529980866756206e-05, "loss": 1.9054, "step": 23984500 }, { "epoch": 69.43, "learning_rate": 1.5299085019914784e-05, "loss": 1.9013, "step": 23985000 }, { "epoch": 69.43, "learning_rate": 1.529836137226751e-05, "loss": 1.9122, "step": 23985500 }, { "epoch": 69.43, "learning_rate": 1.5297637724620232e-05, "loss": 1.8971, "step": 23986000 }, { "epoch": 69.43, "learning_rate": 1.5296914076972954e-05, "loss": 1.8739, "step": 23986500 }, { "epoch": 69.43, "learning_rate": 1.5296190429325677e-05, "loss": 1.8792, "step": 23987000 }, { "epoch": 69.43, "learning_rate": 1.52954667816784e-05, "loss": 1.9088, "step": 23987500 }, { "epoch": 69.44, "learning_rate": 1.5294744581326418e-05, "loss": 1.89, "step": 23988000 }, { "epoch": 69.44, "learning_rate": 1.5294022380974433e-05, "loss": 1.866, "step": 23988500 }, { "epoch": 69.44, "learning_rate": 1.529329873332716e-05, "loss": 1.9036, "step": 23989000 }, { "epoch": 69.44, "learning_rate": 1.5292575085679885e-05, "loss": 1.8868, "step": 23989500 }, { "epoch": 69.44, "learning_rate": 1.5291851438032607e-05, "loss": 1.8945, "step": 23990000 }, { "epoch": 69.44, "learning_rate": 1.529112779038533e-05, "loss": 1.8976, "step": 23990500 }, { "epoch": 69.44, "learning_rate": 1.529040414273805e-05, "loss": 1.9004, "step": 23991000 }, { "epoch": 69.45, "learning_rate": 1.528968194238607e-05, "loss": 1.8985, "step": 23991500 }, { "epoch": 69.45, "learning_rate": 1.5288958294738792e-05, "loss": 1.8953, "step": 23992000 }, { "epoch": 69.45, "learning_rate": 1.5288234647091515e-05, "loss": 1.9041, "step": 23992500 }, { "epoch": 69.45, "learning_rate": 1.528751099944424e-05, "loss": 1.8981, "step": 23993000 }, { "epoch": 69.45, "learning_rate": 1.5286787351796963e-05, "loss": 1.9081, "step": 23993500 }, { "epoch": 69.45, "learning_rate": 1.5286063704149685e-05, "loss": 1.8799, "step": 23994000 }, { "epoch": 69.45, "learning_rate": 1.528534005650241e-05, "loss": 1.9012, "step": 23994500 }, { "epoch": 69.46, "learning_rate": 1.5284617856150426e-05, "loss": 1.8877, "step": 23995000 }, { "epoch": 69.46, "learning_rate": 1.5283894208503148e-05, "loss": 1.9126, "step": 23995500 }, { "epoch": 69.46, "learning_rate": 1.5283170560855874e-05, "loss": 1.8942, "step": 23996000 }, { "epoch": 69.46, "learning_rate": 1.52824469132086e-05, "loss": 1.9128, "step": 23996500 }, { "epoch": 69.46, "learning_rate": 1.5281724712856615e-05, "loss": 1.8832, "step": 23997000 }, { "epoch": 69.46, "learning_rate": 1.5281001065209337e-05, "loss": 1.9061, "step": 23997500 }, { "epoch": 69.46, "learning_rate": 1.528027741756206e-05, "loss": 1.894, "step": 23998000 }, { "epoch": 69.47, "learning_rate": 1.5279553769914785e-05, "loss": 1.8956, "step": 23998500 }, { "epoch": 69.47, "learning_rate": 1.5278830122267507e-05, "loss": 1.8914, "step": 23999000 }, { "epoch": 69.47, "learning_rate": 1.527810647462023e-05, "loss": 1.8746, "step": 23999500 }, { "epoch": 69.47, "learning_rate": 1.5277382826972952e-05, "loss": 1.9118, "step": 24000000 }, { "epoch": 69.47, "learning_rate": 1.5276659179325677e-05, "loss": 1.9072, "step": 24000500 }, { "epoch": 69.47, "learning_rate": 1.5275936978973696e-05, "loss": 1.8998, "step": 24001000 }, { "epoch": 69.47, "learning_rate": 1.527521333132642e-05, "loss": 1.8885, "step": 24001500 }, { "epoch": 69.48, "learning_rate": 1.527448968367914e-05, "loss": 1.8887, "step": 24002000 }, { "epoch": 69.48, "learning_rate": 1.5273766036031863e-05, "loss": 1.8781, "step": 24002500 }, { "epoch": 69.48, "learning_rate": 1.5273043835679882e-05, "loss": 1.889, "step": 24003000 }, { "epoch": 69.48, "learning_rate": 1.5272320188032604e-05, "loss": 1.9052, "step": 24003500 }, { "epoch": 69.48, "learning_rate": 1.5271596540385326e-05, "loss": 1.8897, "step": 24004000 }, { "epoch": 69.48, "learning_rate": 1.527087434003335e-05, "loss": 1.908, "step": 24004500 }, { "epoch": 69.48, "learning_rate": 1.527015069238607e-05, "loss": 1.9052, "step": 24005000 }, { "epoch": 69.49, "learning_rate": 1.5269427044738793e-05, "loss": 1.8877, "step": 24005500 }, { "epoch": 69.49, "learning_rate": 1.5268703397091516e-05, "loss": 1.8949, "step": 24006000 }, { "epoch": 69.49, "learning_rate": 1.5267981196739534e-05, "loss": 1.9047, "step": 24006500 }, { "epoch": 69.49, "learning_rate": 1.5267257549092257e-05, "loss": 1.8965, "step": 24007000 }, { "epoch": 69.49, "learning_rate": 1.526653390144498e-05, "loss": 1.8845, "step": 24007500 }, { "epoch": 69.49, "learning_rate": 1.5265810253797705e-05, "loss": 1.8847, "step": 24008000 }, { "epoch": 69.49, "learning_rate": 1.5265086606150427e-05, "loss": 1.8761, "step": 24008500 }, { "epoch": 69.5, "learning_rate": 1.526436295850315e-05, "loss": 1.8753, "step": 24009000 }, { "epoch": 69.5, "learning_rate": 1.5263639310855875e-05, "loss": 1.8784, "step": 24009500 }, { "epoch": 69.5, "learning_rate": 1.5262915663208597e-05, "loss": 1.8903, "step": 24010000 }, { "epoch": 69.5, "learning_rate": 1.526219201556132e-05, "loss": 1.8935, "step": 24010500 }, { "epoch": 69.5, "learning_rate": 1.526146836791404e-05, "loss": 1.9139, "step": 24011000 }, { "epoch": 69.5, "learning_rate": 1.5260744720266767e-05, "loss": 1.9013, "step": 24011500 }, { "epoch": 69.5, "learning_rate": 1.5260022519914786e-05, "loss": 1.8856, "step": 24012000 }, { "epoch": 69.51, "learning_rate": 1.5259298872267508e-05, "loss": 1.9185, "step": 24012500 }, { "epoch": 69.51, "learning_rate": 1.525857522462023e-05, "loss": 1.8788, "step": 24013000 }, { "epoch": 69.51, "learning_rate": 1.5257853024268248e-05, "loss": 1.9075, "step": 24013500 }, { "epoch": 69.51, "learning_rate": 1.5257129376620972e-05, "loss": 1.8606, "step": 24014000 }, { "epoch": 69.51, "learning_rate": 1.5256405728973694e-05, "loss": 1.9015, "step": 24014500 }, { "epoch": 69.51, "learning_rate": 1.5255682081326416e-05, "loss": 1.8885, "step": 24015000 }, { "epoch": 69.52, "learning_rate": 1.5254959880974437e-05, "loss": 1.8812, "step": 24015500 }, { "epoch": 69.52, "learning_rate": 1.525423623332716e-05, "loss": 1.8976, "step": 24016000 }, { "epoch": 69.52, "learning_rate": 1.5253512585679883e-05, "loss": 1.8971, "step": 24016500 }, { "epoch": 69.52, "learning_rate": 1.5252788938032605e-05, "loss": 1.8863, "step": 24017000 }, { "epoch": 69.52, "learning_rate": 1.5252065290385329e-05, "loss": 1.8987, "step": 24017500 }, { "epoch": 69.52, "learning_rate": 1.5251341642738051e-05, "loss": 1.8928, "step": 24018000 }, { "epoch": 69.52, "learning_rate": 1.5250617995090773e-05, "loss": 1.9168, "step": 24018500 }, { "epoch": 69.53, "learning_rate": 1.5249894347443499e-05, "loss": 1.8806, "step": 24019000 }, { "epoch": 69.53, "learning_rate": 1.5249170699796223e-05, "loss": 1.9185, "step": 24019500 }, { "epoch": 69.53, "learning_rate": 1.5248447052148945e-05, "loss": 1.9007, "step": 24020000 }, { "epoch": 69.53, "learning_rate": 1.5247723404501667e-05, "loss": 1.8998, "step": 24020500 }, { "epoch": 69.53, "learning_rate": 1.5246999756854391e-05, "loss": 1.9119, "step": 24021000 }, { "epoch": 69.53, "learning_rate": 1.5246276109207114e-05, "loss": 1.8847, "step": 24021500 }, { "epoch": 69.53, "learning_rate": 1.5245552461559836e-05, "loss": 1.8841, "step": 24022000 }, { "epoch": 69.54, "learning_rate": 1.5244828813912561e-05, "loss": 1.8696, "step": 24022500 }, { "epoch": 69.54, "learning_rate": 1.524410661356058e-05, "loss": 1.8867, "step": 24023000 }, { "epoch": 69.54, "learning_rate": 1.5243382965913303e-05, "loss": 1.9044, "step": 24023500 }, { "epoch": 69.54, "learning_rate": 1.5242659318266025e-05, "loss": 1.8933, "step": 24024000 }, { "epoch": 69.54, "learning_rate": 1.5241935670618749e-05, "loss": 1.8707, "step": 24024500 }, { "epoch": 69.54, "learning_rate": 1.5241212022971471e-05, "loss": 1.893, "step": 24025000 }, { "epoch": 69.54, "learning_rate": 1.5240488375324193e-05, "loss": 1.8601, "step": 24025500 }, { "epoch": 69.55, "learning_rate": 1.5239764727676917e-05, "loss": 1.8711, "step": 24026000 }, { "epoch": 69.55, "learning_rate": 1.5239041080029643e-05, "loss": 1.9158, "step": 24026500 }, { "epoch": 69.55, "learning_rate": 1.5238317432382365e-05, "loss": 1.876, "step": 24027000 }, { "epoch": 69.55, "learning_rate": 1.5237593784735087e-05, "loss": 1.8808, "step": 24027500 }, { "epoch": 69.55, "learning_rate": 1.5236870137087811e-05, "loss": 1.8949, "step": 24028000 }, { "epoch": 69.55, "learning_rate": 1.5236146489440533e-05, "loss": 1.8849, "step": 24028500 }, { "epoch": 69.55, "learning_rate": 1.5235422841793256e-05, "loss": 1.8983, "step": 24029000 }, { "epoch": 69.56, "learning_rate": 1.5234700641441275e-05, "loss": 1.8979, "step": 24029500 }, { "epoch": 69.56, "learning_rate": 1.5233976993794e-05, "loss": 1.8766, "step": 24030000 }, { "epoch": 69.56, "learning_rate": 1.5233253346146722e-05, "loss": 1.8784, "step": 24030500 }, { "epoch": 69.56, "learning_rate": 1.5232529698499445e-05, "loss": 1.901, "step": 24031000 }, { "epoch": 69.56, "learning_rate": 1.5231807498147464e-05, "loss": 1.8997, "step": 24031500 }, { "epoch": 69.56, "learning_rate": 1.5231083850500186e-05, "loss": 1.8636, "step": 24032000 }, { "epoch": 69.56, "learning_rate": 1.5230360202852908e-05, "loss": 1.8992, "step": 24032500 }, { "epoch": 69.57, "learning_rate": 1.5229636555205632e-05, "loss": 1.8888, "step": 24033000 }, { "epoch": 69.57, "learning_rate": 1.5228912907558358e-05, "loss": 1.896, "step": 24033500 }, { "epoch": 69.57, "learning_rate": 1.5228190707206375e-05, "loss": 1.8771, "step": 24034000 }, { "epoch": 69.57, "learning_rate": 1.5227467059559097e-05, "loss": 1.9051, "step": 24034500 }, { "epoch": 69.57, "learning_rate": 1.522674341191182e-05, "loss": 1.9056, "step": 24035000 }, { "epoch": 69.57, "learning_rate": 1.5226019764264543e-05, "loss": 1.8903, "step": 24035500 }, { "epoch": 69.57, "learning_rate": 1.5225296116617266e-05, "loss": 1.9163, "step": 24036000 }, { "epoch": 69.58, "learning_rate": 1.522457246896999e-05, "loss": 1.8859, "step": 24036500 }, { "epoch": 69.58, "learning_rate": 1.5223848821322712e-05, "loss": 1.9015, "step": 24037000 }, { "epoch": 69.58, "learning_rate": 1.5223125173675437e-05, "loss": 1.9065, "step": 24037500 }, { "epoch": 69.58, "learning_rate": 1.522240152602816e-05, "loss": 1.909, "step": 24038000 }, { "epoch": 69.58, "learning_rate": 1.5221679325676177e-05, "loss": 1.8895, "step": 24038500 }, { "epoch": 69.58, "learning_rate": 1.52209556780289e-05, "loss": 1.8824, "step": 24039000 }, { "epoch": 69.58, "learning_rate": 1.5220232030381623e-05, "loss": 1.884, "step": 24039500 }, { "epoch": 69.59, "learning_rate": 1.5219508382734345e-05, "loss": 1.8747, "step": 24040000 }, { "epoch": 69.59, "learning_rate": 1.5218784735087069e-05, "loss": 1.8867, "step": 24040500 }, { "epoch": 69.59, "learning_rate": 1.5218061087439795e-05, "loss": 1.8918, "step": 24041000 }, { "epoch": 69.59, "learning_rate": 1.5217338887087812e-05, "loss": 1.8965, "step": 24041500 }, { "epoch": 69.59, "learning_rate": 1.5216615239440534e-05, "loss": 1.8622, "step": 24042000 }, { "epoch": 69.59, "learning_rate": 1.5215891591793258e-05, "loss": 1.9008, "step": 24042500 }, { "epoch": 69.59, "learning_rate": 1.521516794414598e-05, "loss": 1.9028, "step": 24043000 }, { "epoch": 69.6, "learning_rate": 1.5214444296498703e-05, "loss": 1.907, "step": 24043500 }, { "epoch": 69.6, "learning_rate": 1.5213720648851427e-05, "loss": 1.9019, "step": 24044000 }, { "epoch": 69.6, "learning_rate": 1.5212998448499444e-05, "loss": 1.9186, "step": 24044500 }, { "epoch": 69.6, "learning_rate": 1.521227480085217e-05, "loss": 1.9021, "step": 24045000 }, { "epoch": 69.6, "learning_rate": 1.5211551153204892e-05, "loss": 1.8919, "step": 24045500 }, { "epoch": 69.6, "learning_rate": 1.5210827505557616e-05, "loss": 1.8942, "step": 24046000 }, { "epoch": 69.6, "learning_rate": 1.5210103857910338e-05, "loss": 1.8689, "step": 24046500 }, { "epoch": 69.61, "learning_rate": 1.520938021026306e-05, "loss": 1.8705, "step": 24047000 }, { "epoch": 69.61, "learning_rate": 1.5208656562615784e-05, "loss": 1.9085, "step": 24047500 }, { "epoch": 69.61, "learning_rate": 1.5207934362263801e-05, "loss": 1.8931, "step": 24048000 }, { "epoch": 69.61, "learning_rate": 1.5207210714616527e-05, "loss": 1.8859, "step": 24048500 }, { "epoch": 69.61, "learning_rate": 1.5206487066969249e-05, "loss": 1.8989, "step": 24049000 }, { "epoch": 69.61, "learning_rate": 1.5205763419321973e-05, "loss": 1.8854, "step": 24049500 }, { "epoch": 69.61, "learning_rate": 1.5205039771674695e-05, "loss": 1.8846, "step": 24050000 }, { "epoch": 69.62, "learning_rate": 1.5204317571322713e-05, "loss": 1.9105, "step": 24050500 }, { "epoch": 69.62, "learning_rate": 1.5203593923675435e-05, "loss": 1.9123, "step": 24051000 }, { "epoch": 69.62, "learning_rate": 1.5202870276028159e-05, "loss": 1.9095, "step": 24051500 }, { "epoch": 69.62, "learning_rate": 1.5202146628380884e-05, "loss": 1.9132, "step": 24052000 }, { "epoch": 69.62, "learning_rate": 1.5201422980733607e-05, "loss": 1.8898, "step": 24052500 }, { "epoch": 69.62, "learning_rate": 1.5200700780381624e-05, "loss": 1.9079, "step": 24053000 }, { "epoch": 69.63, "learning_rate": 1.5199977132734348e-05, "loss": 1.9037, "step": 24053500 }, { "epoch": 69.63, "learning_rate": 1.519925348508707e-05, "loss": 1.9132, "step": 24054000 }, { "epoch": 69.63, "learning_rate": 1.5198529837439792e-05, "loss": 1.8824, "step": 24054500 }, { "epoch": 69.63, "learning_rate": 1.5197806189792516e-05, "loss": 1.8937, "step": 24055000 }, { "epoch": 69.63, "learning_rate": 1.5197082542145238e-05, "loss": 1.9024, "step": 24055500 }, { "epoch": 69.63, "learning_rate": 1.5196358894497964e-05, "loss": 1.887, "step": 24056000 }, { "epoch": 69.63, "learning_rate": 1.5195635246850686e-05, "loss": 1.9031, "step": 24056500 }, { "epoch": 69.64, "learning_rate": 1.519491159920341e-05, "loss": 1.8961, "step": 24057000 }, { "epoch": 69.64, "learning_rate": 1.5194189398851427e-05, "loss": 1.9143, "step": 24057500 }, { "epoch": 69.64, "learning_rate": 1.519346575120415e-05, "loss": 1.8847, "step": 24058000 }, { "epoch": 69.64, "learning_rate": 1.5192743550852167e-05, "loss": 1.917, "step": 24058500 }, { "epoch": 69.64, "learning_rate": 1.519201990320489e-05, "loss": 1.891, "step": 24059000 }, { "epoch": 69.64, "learning_rate": 1.5191296255557616e-05, "loss": 1.8927, "step": 24059500 }, { "epoch": 69.64, "learning_rate": 1.5190572607910339e-05, "loss": 1.8963, "step": 24060000 }, { "epoch": 69.65, "learning_rate": 1.5189848960263061e-05, "loss": 1.8778, "step": 24060500 }, { "epoch": 69.65, "learning_rate": 1.5189125312615785e-05, "loss": 1.9078, "step": 24061000 }, { "epoch": 69.65, "learning_rate": 1.5188401664968507e-05, "loss": 1.8967, "step": 24061500 }, { "epoch": 69.65, "learning_rate": 1.5187679464616524e-05, "loss": 1.9063, "step": 24062000 }, { "epoch": 69.65, "learning_rate": 1.5186957264264543e-05, "loss": 1.9162, "step": 24062500 }, { "epoch": 69.65, "learning_rate": 1.5186233616617265e-05, "loss": 1.9022, "step": 24063000 }, { "epoch": 69.65, "learning_rate": 1.5185509968969991e-05, "loss": 1.9144, "step": 24063500 }, { "epoch": 69.66, "learning_rate": 1.5184786321322713e-05, "loss": 1.889, "step": 24064000 }, { "epoch": 69.66, "learning_rate": 1.5184062673675437e-05, "loss": 1.9127, "step": 24064500 }, { "epoch": 69.66, "learning_rate": 1.518333902602816e-05, "loss": 1.9098, "step": 24065000 }, { "epoch": 69.66, "learning_rate": 1.5182615378380882e-05, "loss": 1.905, "step": 24065500 }, { "epoch": 69.66, "learning_rate": 1.5181891730733606e-05, "loss": 1.919, "step": 24066000 }, { "epoch": 69.66, "learning_rate": 1.5181168083086328e-05, "loss": 1.8782, "step": 24066500 }, { "epoch": 69.66, "learning_rate": 1.5180444435439053e-05, "loss": 1.9198, "step": 24067000 }, { "epoch": 69.67, "learning_rate": 1.5179720787791776e-05, "loss": 1.9104, "step": 24067500 }, { "epoch": 69.67, "learning_rate": 1.51789971401445e-05, "loss": 1.8813, "step": 24068000 }, { "epoch": 69.67, "learning_rate": 1.5178274939792517e-05, "loss": 1.8749, "step": 24068500 }, { "epoch": 69.67, "learning_rate": 1.5177552739440534e-05, "loss": 1.8797, "step": 24069000 }, { "epoch": 69.67, "learning_rate": 1.5176829091793256e-05, "loss": 1.8844, "step": 24069500 }, { "epoch": 69.67, "learning_rate": 1.517610544414598e-05, "loss": 1.9269, "step": 24070000 }, { "epoch": 69.67, "learning_rate": 1.5175381796498703e-05, "loss": 1.906, "step": 24070500 }, { "epoch": 69.68, "learning_rate": 1.5174659596146723e-05, "loss": 1.8943, "step": 24071000 }, { "epoch": 69.68, "learning_rate": 1.5173935948499445e-05, "loss": 1.9307, "step": 24071500 }, { "epoch": 69.68, "learning_rate": 1.517321230085217e-05, "loss": 1.8763, "step": 24072000 }, { "epoch": 69.68, "learning_rate": 1.5172488653204892e-05, "loss": 1.8917, "step": 24072500 }, { "epoch": 69.68, "learning_rate": 1.5171765005557614e-05, "loss": 1.8821, "step": 24073000 }, { "epoch": 69.68, "learning_rate": 1.5171042805205633e-05, "loss": 1.9267, "step": 24073500 }, { "epoch": 69.68, "learning_rate": 1.517032060485365e-05, "loss": 1.8751, "step": 24074000 }, { "epoch": 69.69, "learning_rate": 1.5169598404501667e-05, "loss": 1.8761, "step": 24074500 }, { "epoch": 69.69, "learning_rate": 1.516887475685439e-05, "loss": 1.9046, "step": 24075000 }, { "epoch": 69.69, "learning_rate": 1.5168151109207115e-05, "loss": 1.9212, "step": 24075500 }, { "epoch": 69.69, "learning_rate": 1.5167427461559839e-05, "loss": 1.8731, "step": 24076000 }, { "epoch": 69.69, "learning_rate": 1.5166703813912561e-05, "loss": 1.8932, "step": 24076500 }, { "epoch": 69.69, "learning_rate": 1.5165980166265284e-05, "loss": 1.8796, "step": 24077000 }, { "epoch": 69.69, "learning_rate": 1.5165256518618007e-05, "loss": 1.9005, "step": 24077500 }, { "epoch": 69.7, "learning_rate": 1.516453287097073e-05, "loss": 1.8888, "step": 24078000 }, { "epoch": 69.7, "learning_rate": 1.5163809223323455e-05, "loss": 1.9073, "step": 24078500 }, { "epoch": 69.7, "learning_rate": 1.5163085575676178e-05, "loss": 1.9059, "step": 24079000 }, { "epoch": 69.7, "learning_rate": 1.5162361928028901e-05, "loss": 1.8994, "step": 24079500 }, { "epoch": 69.7, "learning_rate": 1.5161639727676919e-05, "loss": 1.8773, "step": 24080000 }, { "epoch": 69.7, "learning_rate": 1.5160916080029641e-05, "loss": 1.8808, "step": 24080500 }, { "epoch": 69.7, "learning_rate": 1.5160192432382365e-05, "loss": 1.897, "step": 24081000 }, { "epoch": 69.71, "learning_rate": 1.5159468784735087e-05, "loss": 1.8724, "step": 24081500 }, { "epoch": 69.71, "learning_rate": 1.5158746584383104e-05, "loss": 1.9069, "step": 24082000 }, { "epoch": 69.71, "learning_rate": 1.515802293673583e-05, "loss": 1.9175, "step": 24082500 }, { "epoch": 69.71, "learning_rate": 1.5157300736383847e-05, "loss": 1.8988, "step": 24083000 }, { "epoch": 69.71, "learning_rate": 1.5156577088736571e-05, "loss": 1.9087, "step": 24083500 }, { "epoch": 69.71, "learning_rate": 1.5155853441089293e-05, "loss": 1.9073, "step": 24084000 }, { "epoch": 69.71, "learning_rate": 1.5155129793442016e-05, "loss": 1.9187, "step": 24084500 }, { "epoch": 69.72, "learning_rate": 1.515440614579474e-05, "loss": 1.9137, "step": 24085000 }, { "epoch": 69.72, "learning_rate": 1.5153683945442757e-05, "loss": 1.8918, "step": 24085500 }, { "epoch": 69.72, "learning_rate": 1.5152961745090774e-05, "loss": 1.9003, "step": 24086000 }, { "epoch": 69.72, "learning_rate": 1.51522380974435e-05, "loss": 1.8823, "step": 24086500 }, { "epoch": 69.72, "learning_rate": 1.5151514449796222e-05, "loss": 1.8835, "step": 24087000 }, { "epoch": 69.72, "learning_rate": 1.5150790802148946e-05, "loss": 1.8958, "step": 24087500 }, { "epoch": 69.72, "learning_rate": 1.5150067154501668e-05, "loss": 1.8866, "step": 24088000 }, { "epoch": 69.73, "learning_rate": 1.5149344954149685e-05, "loss": 1.9122, "step": 24088500 }, { "epoch": 69.73, "learning_rate": 1.514862130650241e-05, "loss": 1.879, "step": 24089000 }, { "epoch": 69.73, "learning_rate": 1.5147897658855131e-05, "loss": 1.9245, "step": 24089500 }, { "epoch": 69.73, "learning_rate": 1.5147174011207854e-05, "loss": 1.9008, "step": 24090000 }, { "epoch": 69.73, "learning_rate": 1.514645036356058e-05, "loss": 1.9103, "step": 24090500 }, { "epoch": 69.73, "learning_rate": 1.5145726715913303e-05, "loss": 1.913, "step": 24091000 }, { "epoch": 69.74, "learning_rate": 1.5145005962856616e-05, "loss": 1.8987, "step": 24091500 }, { "epoch": 69.74, "learning_rate": 1.5144282315209338e-05, "loss": 1.9015, "step": 24092000 }, { "epoch": 69.74, "learning_rate": 1.514355866756206e-05, "loss": 1.918, "step": 24092500 }, { "epoch": 69.74, "learning_rate": 1.5142835019914784e-05, "loss": 1.9135, "step": 24093000 }, { "epoch": 69.74, "learning_rate": 1.5142111372267506e-05, "loss": 1.8905, "step": 24093500 }, { "epoch": 69.74, "learning_rate": 1.5141387724620232e-05, "loss": 1.8778, "step": 24094000 }, { "epoch": 69.74, "learning_rate": 1.5140664076972954e-05, "loss": 1.9154, "step": 24094500 }, { "epoch": 69.75, "learning_rate": 1.5139940429325678e-05, "loss": 1.8856, "step": 24095000 }, { "epoch": 69.75, "learning_rate": 1.51392167816784e-05, "loss": 1.9006, "step": 24095500 }, { "epoch": 69.75, "learning_rate": 1.5138493134031122e-05, "loss": 1.9124, "step": 24096000 }, { "epoch": 69.75, "learning_rate": 1.5137769486383846e-05, "loss": 1.8994, "step": 24096500 }, { "epoch": 69.75, "learning_rate": 1.5137045838736569e-05, "loss": 1.8897, "step": 24097000 }, { "epoch": 69.75, "learning_rate": 1.5136322191089294e-05, "loss": 1.9001, "step": 24097500 }, { "epoch": 69.75, "learning_rate": 1.5135599990737311e-05, "loss": 1.9005, "step": 24098000 }, { "epoch": 69.76, "learning_rate": 1.5134876343090035e-05, "loss": 1.8814, "step": 24098500 }, { "epoch": 69.76, "learning_rate": 1.5134152695442758e-05, "loss": 1.899, "step": 24099000 }, { "epoch": 69.76, "learning_rate": 1.513342904779548e-05, "loss": 1.9082, "step": 24099500 }, { "epoch": 69.76, "learning_rate": 1.5132705400148204e-05, "loss": 1.9048, "step": 24100000 }, { "epoch": 69.76, "learning_rate": 1.5131981752500926e-05, "loss": 1.8893, "step": 24100500 }, { "epoch": 69.76, "learning_rate": 1.5131258104853648e-05, "loss": 1.8955, "step": 24101000 }, { "epoch": 69.76, "learning_rate": 1.5130534457206374e-05, "loss": 1.8634, "step": 24101500 }, { "epoch": 69.77, "learning_rate": 1.5129812256854393e-05, "loss": 1.908, "step": 24102000 }, { "epoch": 69.77, "learning_rate": 1.5129088609207115e-05, "loss": 1.8993, "step": 24102500 }, { "epoch": 69.77, "learning_rate": 1.5128364961559837e-05, "loss": 1.906, "step": 24103000 }, { "epoch": 69.77, "learning_rate": 1.5127641313912561e-05, "loss": 1.8942, "step": 24103500 }, { "epoch": 69.77, "learning_rate": 1.5126917666265283e-05, "loss": 1.8919, "step": 24104000 }, { "epoch": 69.77, "learning_rate": 1.5126194018618006e-05, "loss": 1.896, "step": 24104500 }, { "epoch": 69.77, "learning_rate": 1.5125470370970731e-05, "loss": 1.901, "step": 24105000 }, { "epoch": 69.78, "learning_rate": 1.5124746723323455e-05, "loss": 1.8988, "step": 24105500 }, { "epoch": 69.78, "learning_rate": 1.5124023075676177e-05, "loss": 1.8975, "step": 24106000 }, { "epoch": 69.78, "learning_rate": 1.5123300875324195e-05, "loss": 1.8822, "step": 24106500 }, { "epoch": 69.78, "learning_rate": 1.5122577227676919e-05, "loss": 1.9022, "step": 24107000 }, { "epoch": 69.78, "learning_rate": 1.512185358002964e-05, "loss": 1.8912, "step": 24107500 }, { "epoch": 69.78, "learning_rate": 1.5121129932382363e-05, "loss": 1.9109, "step": 24108000 }, { "epoch": 69.78, "learning_rate": 1.512040773203038e-05, "loss": 1.887, "step": 24108500 }, { "epoch": 69.79, "learning_rate": 1.5119684084383106e-05, "loss": 1.879, "step": 24109000 }, { "epoch": 69.79, "learning_rate": 1.511896043673583e-05, "loss": 1.8749, "step": 24109500 }, { "epoch": 69.79, "learning_rate": 1.5118236789088552e-05, "loss": 1.8914, "step": 24110000 }, { "epoch": 69.79, "learning_rate": 1.5117513141441274e-05, "loss": 1.8819, "step": 24110500 }, { "epoch": 69.79, "learning_rate": 1.5116792388384588e-05, "loss": 1.8959, "step": 24111000 }, { "epoch": 69.79, "learning_rate": 1.511606874073731e-05, "loss": 1.8738, "step": 24111500 }, { "epoch": 69.79, "learning_rate": 1.5115345093090033e-05, "loss": 1.8908, "step": 24112000 }, { "epoch": 69.8, "learning_rate": 1.5114621445442757e-05, "loss": 1.8902, "step": 24112500 }, { "epoch": 69.8, "learning_rate": 1.5113897797795482e-05, "loss": 1.8775, "step": 24113000 }, { "epoch": 69.8, "learning_rate": 1.5113174150148205e-05, "loss": 1.9154, "step": 24113500 }, { "epoch": 69.8, "learning_rate": 1.5112451949796222e-05, "loss": 1.9185, "step": 24114000 }, { "epoch": 69.8, "learning_rate": 1.5111728302148944e-05, "loss": 1.8937, "step": 24114500 }, { "epoch": 69.8, "learning_rate": 1.5111004654501668e-05, "loss": 1.8915, "step": 24115000 }, { "epoch": 69.8, "learning_rate": 1.511028100685439e-05, "loss": 1.8825, "step": 24115500 }, { "epoch": 69.81, "learning_rate": 1.5109557359207112e-05, "loss": 1.9, "step": 24116000 }, { "epoch": 69.81, "learning_rate": 1.5108833711559838e-05, "loss": 1.919, "step": 24116500 }, { "epoch": 69.81, "learning_rate": 1.5108111511207857e-05, "loss": 1.8996, "step": 24117000 }, { "epoch": 69.81, "learning_rate": 1.510738786356058e-05, "loss": 1.8945, "step": 24117500 }, { "epoch": 69.81, "learning_rate": 1.5106664215913301e-05, "loss": 1.8848, "step": 24118000 }, { "epoch": 69.81, "learning_rate": 1.5105940568266025e-05, "loss": 1.9226, "step": 24118500 }, { "epoch": 69.81, "learning_rate": 1.5105218367914043e-05, "loss": 1.8879, "step": 24119000 }, { "epoch": 69.82, "learning_rate": 1.5104494720266765e-05, "loss": 1.9019, "step": 24119500 }, { "epoch": 69.82, "learning_rate": 1.5103771072619489e-05, "loss": 1.8885, "step": 24120000 }, { "epoch": 69.82, "learning_rate": 1.5103047424972214e-05, "loss": 1.8945, "step": 24120500 }, { "epoch": 69.82, "learning_rate": 1.5102323777324937e-05, "loss": 1.8919, "step": 24121000 }, { "epoch": 69.82, "learning_rate": 1.5101600129677659e-05, "loss": 1.8896, "step": 24121500 }, { "epoch": 69.82, "learning_rate": 1.5100876482030383e-05, "loss": 1.8746, "step": 24122000 }, { "epoch": 69.82, "learning_rate": 1.51001542816784e-05, "loss": 1.9028, "step": 24122500 }, { "epoch": 69.83, "learning_rate": 1.5099430634031122e-05, "loss": 1.8906, "step": 24123000 }, { "epoch": 69.83, "learning_rate": 1.5098706986383846e-05, "loss": 1.8831, "step": 24123500 }, { "epoch": 69.83, "learning_rate": 1.5097983338736572e-05, "loss": 1.891, "step": 24124000 }, { "epoch": 69.83, "learning_rate": 1.5097259691089294e-05, "loss": 1.9057, "step": 24124500 }, { "epoch": 69.83, "learning_rate": 1.5096536043442016e-05, "loss": 1.8783, "step": 24125000 }, { "epoch": 69.83, "learning_rate": 1.509581239579474e-05, "loss": 1.9017, "step": 24125500 }, { "epoch": 69.83, "learning_rate": 1.5095090195442757e-05, "loss": 1.8996, "step": 24126000 }, { "epoch": 69.84, "learning_rate": 1.509436654779548e-05, "loss": 1.8812, "step": 24126500 }, { "epoch": 69.84, "learning_rate": 1.5093642900148202e-05, "loss": 1.9167, "step": 24127000 }, { "epoch": 69.84, "learning_rate": 1.5092919252500928e-05, "loss": 1.909, "step": 24127500 }, { "epoch": 69.84, "learning_rate": 1.5092197052148947e-05, "loss": 1.8774, "step": 24128000 }, { "epoch": 69.84, "learning_rate": 1.5091473404501669e-05, "loss": 1.8977, "step": 24128500 }, { "epoch": 69.84, "learning_rate": 1.5090749756854391e-05, "loss": 1.8858, "step": 24129000 }, { "epoch": 69.85, "learning_rate": 1.5090026109207115e-05, "loss": 1.8917, "step": 24129500 }, { "epoch": 69.85, "learning_rate": 1.5089303908855132e-05, "loss": 1.8906, "step": 24130000 }, { "epoch": 69.85, "learning_rate": 1.5088580261207854e-05, "loss": 1.9149, "step": 24130500 }, { "epoch": 69.85, "learning_rate": 1.5087856613560578e-05, "loss": 1.884, "step": 24131000 }, { "epoch": 69.85, "learning_rate": 1.5087132965913304e-05, "loss": 1.9136, "step": 24131500 }, { "epoch": 69.85, "learning_rate": 1.5086409318266026e-05, "loss": 1.9113, "step": 24132000 }, { "epoch": 69.85, "learning_rate": 1.5085685670618748e-05, "loss": 1.9133, "step": 24132500 }, { "epoch": 69.86, "learning_rate": 1.5084962022971472e-05, "loss": 1.8701, "step": 24133000 }, { "epoch": 69.86, "learning_rate": 1.5084238375324195e-05, "loss": 1.9044, "step": 24133500 }, { "epoch": 69.86, "learning_rate": 1.5083516174972212e-05, "loss": 1.8999, "step": 24134000 }, { "epoch": 69.86, "learning_rate": 1.5082792527324934e-05, "loss": 1.9203, "step": 24134500 }, { "epoch": 69.86, "learning_rate": 1.508206887967766e-05, "loss": 1.8913, "step": 24135000 }, { "epoch": 69.86, "learning_rate": 1.5081345232030384e-05, "loss": 1.889, "step": 24135500 }, { "epoch": 69.86, "learning_rate": 1.50806230316784e-05, "loss": 1.8948, "step": 24136000 }, { "epoch": 69.87, "learning_rate": 1.5079899384031123e-05, "loss": 1.9236, "step": 24136500 }, { "epoch": 69.87, "learning_rate": 1.5079175736383847e-05, "loss": 1.9104, "step": 24137000 }, { "epoch": 69.87, "learning_rate": 1.5078453536031864e-05, "loss": 1.9013, "step": 24137500 }, { "epoch": 69.87, "learning_rate": 1.5077729888384586e-05, "loss": 1.8825, "step": 24138000 }, { "epoch": 69.87, "learning_rate": 1.507700624073731e-05, "loss": 1.9145, "step": 24138500 }, { "epoch": 69.87, "learning_rate": 1.5076282593090036e-05, "loss": 1.886, "step": 24139000 }, { "epoch": 69.87, "learning_rate": 1.5075558945442758e-05, "loss": 1.8844, "step": 24139500 }, { "epoch": 69.88, "learning_rate": 1.507483529779548e-05, "loss": 1.9061, "step": 24140000 }, { "epoch": 69.88, "learning_rate": 1.5074111650148204e-05, "loss": 1.8962, "step": 24140500 }, { "epoch": 69.88, "learning_rate": 1.5073388002500927e-05, "loss": 1.9, "step": 24141000 }, { "epoch": 69.88, "learning_rate": 1.5072664354853649e-05, "loss": 1.8957, "step": 24141500 }, { "epoch": 69.88, "learning_rate": 1.5071940707206373e-05, "loss": 1.9275, "step": 24142000 }, { "epoch": 69.88, "learning_rate": 1.5071217059559098e-05, "loss": 1.8903, "step": 24142500 }, { "epoch": 69.88, "learning_rate": 1.507049341191182e-05, "loss": 1.9157, "step": 24143000 }, { "epoch": 69.89, "learning_rate": 1.5069771211559838e-05, "loss": 1.8802, "step": 24143500 }, { "epoch": 69.89, "learning_rate": 1.5069047563912562e-05, "loss": 1.8983, "step": 24144000 }, { "epoch": 69.89, "learning_rate": 1.5068323916265284e-05, "loss": 1.8982, "step": 24144500 }, { "epoch": 69.89, "learning_rate": 1.5067600268618006e-05, "loss": 1.9194, "step": 24145000 }, { "epoch": 69.89, "learning_rate": 1.5066878068266024e-05, "loss": 1.9038, "step": 24145500 }, { "epoch": 69.89, "learning_rate": 1.5066154420618748e-05, "loss": 1.9112, "step": 24146000 }, { "epoch": 69.89, "learning_rate": 1.5065430772971473e-05, "loss": 1.9273, "step": 24146500 }, { "epoch": 69.9, "learning_rate": 1.5064707125324195e-05, "loss": 1.8901, "step": 24147000 }, { "epoch": 69.9, "learning_rate": 1.5063983477676918e-05, "loss": 1.9002, "step": 24147500 }, { "epoch": 69.9, "learning_rate": 1.5063259830029642e-05, "loss": 1.9203, "step": 24148000 }, { "epoch": 69.9, "learning_rate": 1.5062536182382364e-05, "loss": 1.9105, "step": 24148500 }, { "epoch": 69.9, "learning_rate": 1.5061812534735088e-05, "loss": 1.8488, "step": 24149000 }, { "epoch": 69.9, "learning_rate": 1.506108888708781e-05, "loss": 1.8797, "step": 24149500 }, { "epoch": 69.9, "learning_rate": 1.506036668673583e-05, "loss": 1.8962, "step": 24150000 }, { "epoch": 69.91, "learning_rate": 1.5059643039088553e-05, "loss": 1.8922, "step": 24150500 }, { "epoch": 69.91, "learning_rate": 1.505892083873657e-05, "loss": 1.9152, "step": 24151000 }, { "epoch": 69.91, "learning_rate": 1.5058197191089294e-05, "loss": 1.8746, "step": 24151500 }, { "epoch": 69.91, "learning_rate": 1.5057473543442016e-05, "loss": 1.9027, "step": 24152000 }, { "epoch": 69.91, "learning_rate": 1.5056749895794738e-05, "loss": 1.9063, "step": 24152500 }, { "epoch": 69.91, "learning_rate": 1.5056026248147462e-05, "loss": 1.9082, "step": 24153000 }, { "epoch": 69.91, "learning_rate": 1.5055302600500185e-05, "loss": 1.9137, "step": 24153500 }, { "epoch": 69.92, "learning_rate": 1.505457895285291e-05, "loss": 1.8784, "step": 24154000 }, { "epoch": 69.92, "learning_rate": 1.5053855305205632e-05, "loss": 1.9197, "step": 24154500 }, { "epoch": 69.92, "learning_rate": 1.5053133104853651e-05, "loss": 1.9044, "step": 24155000 }, { "epoch": 69.92, "learning_rate": 1.5052409457206374e-05, "loss": 1.8917, "step": 24155500 }, { "epoch": 69.92, "learning_rate": 1.5051685809559096e-05, "loss": 1.9023, "step": 24156000 }, { "epoch": 69.92, "learning_rate": 1.505096216191182e-05, "loss": 1.8813, "step": 24156500 }, { "epoch": 69.92, "learning_rate": 1.5050238514264542e-05, "loss": 1.9239, "step": 24157000 }, { "epoch": 69.93, "learning_rate": 1.5049514866617268e-05, "loss": 1.8886, "step": 24157500 }, { "epoch": 69.93, "learning_rate": 1.5048792666265285e-05, "loss": 1.8715, "step": 24158000 }, { "epoch": 69.93, "learning_rate": 1.5048069018618007e-05, "loss": 1.8977, "step": 24158500 }, { "epoch": 69.93, "learning_rate": 1.5047345370970731e-05, "loss": 1.883, "step": 24159000 }, { "epoch": 69.93, "learning_rate": 1.5046621723323453e-05, "loss": 1.9008, "step": 24159500 }, { "epoch": 69.93, "learning_rate": 1.5045898075676177e-05, "loss": 1.899, "step": 24160000 }, { "epoch": 69.93, "learning_rate": 1.50451744280289e-05, "loss": 1.894, "step": 24160500 }, { "epoch": 69.94, "learning_rate": 1.5044450780381625e-05, "loss": 1.8909, "step": 24161000 }, { "epoch": 69.94, "learning_rate": 1.5043727132734347e-05, "loss": 1.8955, "step": 24161500 }, { "epoch": 69.94, "learning_rate": 1.504300637967766e-05, "loss": 1.9088, "step": 24162000 }, { "epoch": 69.94, "learning_rate": 1.5042282732030383e-05, "loss": 1.8843, "step": 24162500 }, { "epoch": 69.94, "learning_rate": 1.5041559084383106e-05, "loss": 1.8949, "step": 24163000 }, { "epoch": 69.94, "learning_rate": 1.5040835436735828e-05, "loss": 1.9005, "step": 24163500 }, { "epoch": 69.94, "learning_rate": 1.5040111789088552e-05, "loss": 1.8605, "step": 24164000 }, { "epoch": 69.95, "learning_rate": 1.5039388141441274e-05, "loss": 1.899, "step": 24164500 }, { "epoch": 69.95, "learning_rate": 1.5038664493794e-05, "loss": 1.8991, "step": 24165000 }, { "epoch": 69.95, "learning_rate": 1.5037942293442017e-05, "loss": 1.9118, "step": 24165500 }, { "epoch": 69.95, "learning_rate": 1.503721864579474e-05, "loss": 1.8831, "step": 24166000 }, { "epoch": 69.95, "learning_rate": 1.5036494998147463e-05, "loss": 1.896, "step": 24166500 }, { "epoch": 69.95, "learning_rate": 1.5035771350500185e-05, "loss": 1.9082, "step": 24167000 }, { "epoch": 69.96, "learning_rate": 1.5035049150148203e-05, "loss": 1.8901, "step": 24167500 }, { "epoch": 69.96, "learning_rate": 1.5034325502500927e-05, "loss": 1.9129, "step": 24168000 }, { "epoch": 69.96, "learning_rate": 1.5033601854853649e-05, "loss": 1.9013, "step": 24168500 }, { "epoch": 69.96, "learning_rate": 1.5032878207206374e-05, "loss": 1.8942, "step": 24169000 }, { "epoch": 69.96, "learning_rate": 1.5032154559559097e-05, "loss": 1.8821, "step": 24169500 }, { "epoch": 69.96, "learning_rate": 1.503143091191182e-05, "loss": 1.8777, "step": 24170000 }, { "epoch": 69.96, "learning_rate": 1.5030710158855133e-05, "loss": 1.8802, "step": 24170500 }, { "epoch": 69.97, "learning_rate": 1.5029986511207855e-05, "loss": 1.8922, "step": 24171000 }, { "epoch": 69.97, "learning_rate": 1.5029262863560577e-05, "loss": 1.905, "step": 24171500 }, { "epoch": 69.97, "learning_rate": 1.5028539215913301e-05, "loss": 1.9207, "step": 24172000 }, { "epoch": 69.97, "learning_rate": 1.5027815568266027e-05, "loss": 1.8995, "step": 24172500 }, { "epoch": 69.97, "learning_rate": 1.5027091920618749e-05, "loss": 1.8986, "step": 24173000 }, { "epoch": 69.97, "learning_rate": 1.5026368272971473e-05, "loss": 1.8904, "step": 24173500 }, { "epoch": 69.97, "learning_rate": 1.5025644625324195e-05, "loss": 1.9078, "step": 24174000 }, { "epoch": 69.98, "learning_rate": 1.5024920977676917e-05, "loss": 1.8887, "step": 24174500 }, { "epoch": 69.98, "learning_rate": 1.5024197330029641e-05, "loss": 1.8829, "step": 24175000 }, { "epoch": 69.98, "learning_rate": 1.5023473682382364e-05, "loss": 1.8962, "step": 24175500 }, { "epoch": 69.98, "learning_rate": 1.502275003473509e-05, "loss": 1.8804, "step": 24176000 }, { "epoch": 69.98, "learning_rate": 1.5022026387087812e-05, "loss": 1.8795, "step": 24176500 }, { "epoch": 69.98, "learning_rate": 1.5021302739440535e-05, "loss": 1.8755, "step": 24177000 }, { "epoch": 69.98, "learning_rate": 1.5020580539088553e-05, "loss": 1.9017, "step": 24177500 }, { "epoch": 69.99, "learning_rate": 1.5019856891441275e-05, "loss": 1.9121, "step": 24178000 }, { "epoch": 69.99, "learning_rate": 1.5019133243793999e-05, "loss": 1.8884, "step": 24178500 }, { "epoch": 69.99, "learning_rate": 1.5018409596146721e-05, "loss": 1.8908, "step": 24179000 }, { "epoch": 69.99, "learning_rate": 1.5017687395794738e-05, "loss": 1.8928, "step": 24179500 }, { "epoch": 69.99, "learning_rate": 1.5016965195442756e-05, "loss": 1.8859, "step": 24180000 }, { "epoch": 69.99, "learning_rate": 1.5016241547795481e-05, "loss": 1.8996, "step": 24180500 }, { "epoch": 69.99, "learning_rate": 1.5015517900148205e-05, "loss": 1.8857, "step": 24181000 }, { "epoch": 70.0, "learning_rate": 1.5014794252500927e-05, "loss": 1.9045, "step": 24181500 }, { "epoch": 70.0, "learning_rate": 1.501407060485365e-05, "loss": 1.9055, "step": 24182000 }, { "epoch": 70.0, "learning_rate": 1.5013349851796962e-05, "loss": 1.9074, "step": 24182500 }, { "epoch": 70.0, "learning_rate": 1.5012626204149686e-05, "loss": 1.8752, "step": 24183000 }, { "epoch": 70.0, "eval_accuracy": 0.6823868252613496, "eval_accuracy_mlm": 0.6501814787087616, "eval_accuracy_nsp": 0.8551970044868534, "eval_loss": 2.1773810386657715, "eval_runtime": 332.0745, "eval_samples_per_second": 1314.121, "eval_steps_per_second": 54.756, "step": 24183040 }, { "epoch": 70.0, "learning_rate": 1.5011902556502408e-05, "loss": 1.8892, "step": 24183500 }, { "epoch": 70.0, "learning_rate": 1.5011180356150425e-05, "loss": 1.9202, "step": 24184000 }, { "epoch": 70.0, "learning_rate": 1.5010456708503151e-05, "loss": 1.8736, "step": 24184500 }, { "epoch": 70.01, "learning_rate": 1.5009733060855873e-05, "loss": 1.8732, "step": 24185000 }, { "epoch": 70.01, "learning_rate": 1.5009009413208597e-05, "loss": 1.8603, "step": 24185500 }, { "epoch": 70.01, "learning_rate": 1.5008287212856614e-05, "loss": 1.8659, "step": 24186000 }, { "epoch": 70.01, "learning_rate": 1.5007563565209337e-05, "loss": 1.8697, "step": 24186500 }, { "epoch": 70.01, "learning_rate": 1.500683991756206e-05, "loss": 1.8769, "step": 24187000 }, { "epoch": 70.01, "learning_rate": 1.5006116269914783e-05, "loss": 1.8649, "step": 24187500 }, { "epoch": 70.01, "learning_rate": 1.5005392622267508e-05, "loss": 1.9109, "step": 24188000 }, { "epoch": 70.02, "learning_rate": 1.500466897462023e-05, "loss": 1.8782, "step": 24188500 }, { "epoch": 70.02, "learning_rate": 1.5003945326972954e-05, "loss": 1.8893, "step": 24189000 }, { "epoch": 70.02, "learning_rate": 1.5003221679325677e-05, "loss": 1.8949, "step": 24189500 }, { "epoch": 70.02, "learning_rate": 1.5002498031678399e-05, "loss": 1.8814, "step": 24190000 }, { "epoch": 70.02, "learning_rate": 1.5001774384031123e-05, "loss": 1.8847, "step": 24190500 }, { "epoch": 70.02, "learning_rate": 1.5001050736383845e-05, "loss": 1.8932, "step": 24191000 }, { "epoch": 70.02, "learning_rate": 1.500032708873657e-05, "loss": 1.8723, "step": 24191500 }, { "epoch": 70.03, "learning_rate": 1.4999603441089295e-05, "loss": 1.8842, "step": 24192000 }, { "epoch": 70.03, "learning_rate": 1.4998879793442017e-05, "loss": 1.8734, "step": 24192500 }, { "epoch": 70.03, "learning_rate": 1.4998156145794739e-05, "loss": 1.8734, "step": 24193000 }, { "epoch": 70.03, "learning_rate": 1.4997432498147463e-05, "loss": 1.8784, "step": 24193500 }, { "epoch": 70.03, "learning_rate": 1.4996708850500185e-05, "loss": 1.8737, "step": 24194000 }, { "epoch": 70.03, "learning_rate": 1.4995985202852908e-05, "loss": 1.8895, "step": 24194500 }, { "epoch": 70.03, "learning_rate": 1.499526444979622e-05, "loss": 1.9051, "step": 24195000 }, { "epoch": 70.04, "learning_rate": 1.4994540802148945e-05, "loss": 1.8686, "step": 24195500 }, { "epoch": 70.04, "learning_rate": 1.499381715450167e-05, "loss": 1.8713, "step": 24196000 }, { "epoch": 70.04, "learning_rate": 1.4993093506854392e-05, "loss": 1.8808, "step": 24196500 }, { "epoch": 70.04, "learning_rate": 1.4992369859207114e-05, "loss": 1.872, "step": 24197000 }, { "epoch": 70.04, "learning_rate": 1.4991646211559838e-05, "loss": 1.8982, "step": 24197500 }, { "epoch": 70.04, "learning_rate": 1.499092256391256e-05, "loss": 1.8858, "step": 24198000 }, { "epoch": 70.04, "learning_rate": 1.4990198916265282e-05, "loss": 1.872, "step": 24198500 }, { "epoch": 70.05, "learning_rate": 1.4989476715913303e-05, "loss": 1.8696, "step": 24199000 }, { "epoch": 70.05, "learning_rate": 1.4988753068266027e-05, "loss": 1.8559, "step": 24199500 }, { "epoch": 70.05, "learning_rate": 1.4988029420618749e-05, "loss": 1.8736, "step": 24200000 }, { "epoch": 70.05, "learning_rate": 1.4987305772971471e-05, "loss": 1.9001, "step": 24200500 }, { "epoch": 70.05, "learning_rate": 1.4986582125324195e-05, "loss": 1.8709, "step": 24201000 }, { "epoch": 70.05, "learning_rate": 1.4985858477676917e-05, "loss": 1.8936, "step": 24201500 }, { "epoch": 70.05, "learning_rate": 1.498513483002964e-05, "loss": 1.894, "step": 24202000 }, { "epoch": 70.06, "learning_rate": 1.498441262967766e-05, "loss": 1.8419, "step": 24202500 }, { "epoch": 70.06, "learning_rate": 1.4983688982030382e-05, "loss": 1.9063, "step": 24203000 }, { "epoch": 70.06, "learning_rate": 1.4982965334383106e-05, "loss": 1.8535, "step": 24203500 }, { "epoch": 70.06, "learning_rate": 1.4982241686735829e-05, "loss": 1.8892, "step": 24204000 }, { "epoch": 70.06, "learning_rate": 1.4981518039088553e-05, "loss": 1.8664, "step": 24204500 }, { "epoch": 70.06, "learning_rate": 1.4980794391441275e-05, "loss": 1.8689, "step": 24205000 }, { "epoch": 70.07, "learning_rate": 1.4980072191089292e-05, "loss": 1.8927, "step": 24205500 }, { "epoch": 70.07, "learning_rate": 1.4979348543442014e-05, "loss": 1.9015, "step": 24206000 }, { "epoch": 70.07, "learning_rate": 1.497862489579474e-05, "loss": 1.8821, "step": 24206500 }, { "epoch": 70.07, "learning_rate": 1.4977901248147464e-05, "loss": 1.8734, "step": 24207000 }, { "epoch": 70.07, "learning_rate": 1.4977177600500186e-05, "loss": 1.8842, "step": 24207500 }, { "epoch": 70.07, "learning_rate": 1.4976453952852908e-05, "loss": 1.8726, "step": 24208000 }, { "epoch": 70.07, "learning_rate": 1.4975731752500927e-05, "loss": 1.9029, "step": 24208500 }, { "epoch": 70.08, "learning_rate": 1.497500810485365e-05, "loss": 1.8886, "step": 24209000 }, { "epoch": 70.08, "learning_rate": 1.4974284457206372e-05, "loss": 1.8777, "step": 24209500 }, { "epoch": 70.08, "learning_rate": 1.4973560809559097e-05, "loss": 1.894, "step": 24210000 }, { "epoch": 70.08, "learning_rate": 1.4972838609207116e-05, "loss": 1.8654, "step": 24210500 }, { "epoch": 70.08, "learning_rate": 1.4972114961559839e-05, "loss": 1.894, "step": 24211000 }, { "epoch": 70.08, "learning_rate": 1.497139131391256e-05, "loss": 1.8812, "step": 24211500 }, { "epoch": 70.08, "learning_rate": 1.4970667666265285e-05, "loss": 1.8897, "step": 24212000 }, { "epoch": 70.09, "learning_rate": 1.4969944018618007e-05, "loss": 1.8764, "step": 24212500 }, { "epoch": 70.09, "learning_rate": 1.4969221818266024e-05, "loss": 1.8816, "step": 24213000 }, { "epoch": 70.09, "learning_rate": 1.4968498170618746e-05, "loss": 1.8744, "step": 24213500 }, { "epoch": 70.09, "learning_rate": 1.4967774522971472e-05, "loss": 1.8877, "step": 24214000 }, { "epoch": 70.09, "learning_rate": 1.4967052322619491e-05, "loss": 1.8731, "step": 24214500 }, { "epoch": 70.09, "learning_rate": 1.4966328674972213e-05, "loss": 1.8809, "step": 24215000 }, { "epoch": 70.09, "learning_rate": 1.4965605027324935e-05, "loss": 1.8651, "step": 24215500 }, { "epoch": 70.1, "learning_rate": 1.496488137967766e-05, "loss": 1.8802, "step": 24216000 }, { "epoch": 70.1, "learning_rate": 1.4964157732030382e-05, "loss": 1.8779, "step": 24216500 }, { "epoch": 70.1, "learning_rate": 1.4963434084383104e-05, "loss": 1.8884, "step": 24217000 }, { "epoch": 70.1, "learning_rate": 1.496271043673583e-05, "loss": 1.8714, "step": 24217500 }, { "epoch": 70.1, "learning_rate": 1.4961986789088553e-05, "loss": 1.8762, "step": 24218000 }, { "epoch": 70.1, "learning_rate": 1.4961263141441276e-05, "loss": 1.8803, "step": 24218500 }, { "epoch": 70.1, "learning_rate": 1.4960539493793998e-05, "loss": 1.8957, "step": 24219000 }, { "epoch": 70.11, "learning_rate": 1.4959815846146722e-05, "loss": 1.9017, "step": 24219500 }, { "epoch": 70.11, "learning_rate": 1.4959092198499444e-05, "loss": 1.8954, "step": 24220000 }, { "epoch": 70.11, "learning_rate": 1.4958368550852166e-05, "loss": 1.8964, "step": 24220500 }, { "epoch": 70.11, "learning_rate": 1.4957644903204892e-05, "loss": 1.8922, "step": 24221000 }, { "epoch": 70.11, "learning_rate": 1.495692270285291e-05, "loss": 1.8844, "step": 24221500 }, { "epoch": 70.11, "learning_rate": 1.4956199055205633e-05, "loss": 1.886, "step": 24222000 }, { "epoch": 70.11, "learning_rate": 1.495547685485365e-05, "loss": 1.9091, "step": 24222500 }, { "epoch": 70.12, "learning_rate": 1.4954754654501668e-05, "loss": 1.8955, "step": 24223000 }, { "epoch": 70.12, "learning_rate": 1.4954031006854391e-05, "loss": 1.8844, "step": 24223500 }, { "epoch": 70.12, "learning_rate": 1.4953307359207114e-05, "loss": 1.9144, "step": 24224000 }, { "epoch": 70.12, "learning_rate": 1.4952585158855131e-05, "loss": 1.9042, "step": 24224500 }, { "epoch": 70.12, "learning_rate": 1.4951861511207855e-05, "loss": 1.862, "step": 24225000 }, { "epoch": 70.12, "learning_rate": 1.495113786356058e-05, "loss": 1.8764, "step": 24225500 }, { "epoch": 70.12, "learning_rate": 1.4950414215913303e-05, "loss": 1.8862, "step": 24226000 }, { "epoch": 70.13, "learning_rate": 1.4949690568266025e-05, "loss": 1.8994, "step": 24226500 }, { "epoch": 70.13, "learning_rate": 1.4948966920618749e-05, "loss": 1.8885, "step": 24227000 }, { "epoch": 70.13, "learning_rate": 1.4948243272971471e-05, "loss": 1.8844, "step": 24227500 }, { "epoch": 70.13, "learning_rate": 1.4947519625324193e-05, "loss": 1.8681, "step": 24228000 }, { "epoch": 70.13, "learning_rate": 1.4946797424972212e-05, "loss": 1.8814, "step": 24228500 }, { "epoch": 70.13, "learning_rate": 1.4946073777324936e-05, "loss": 1.8819, "step": 24229000 }, { "epoch": 70.13, "learning_rate": 1.4945351576972955e-05, "loss": 1.8969, "step": 24229500 }, { "epoch": 70.14, "learning_rate": 1.4944627929325677e-05, "loss": 1.8791, "step": 24230000 }, { "epoch": 70.14, "learning_rate": 1.49439042816784e-05, "loss": 1.8671, "step": 24230500 }, { "epoch": 70.14, "learning_rate": 1.4943180634031124e-05, "loss": 1.883, "step": 24231000 }, { "epoch": 70.14, "learning_rate": 1.4942456986383846e-05, "loss": 1.901, "step": 24231500 }, { "epoch": 70.14, "learning_rate": 1.4941733338736568e-05, "loss": 1.8611, "step": 24232000 }, { "epoch": 70.14, "learning_rate": 1.4941009691089294e-05, "loss": 1.873, "step": 24232500 }, { "epoch": 70.14, "learning_rate": 1.4940286043442018e-05, "loss": 1.897, "step": 24233000 }, { "epoch": 70.15, "learning_rate": 1.493956239579474e-05, "loss": 1.9069, "step": 24233500 }, { "epoch": 70.15, "learning_rate": 1.4938838748147462e-05, "loss": 1.8808, "step": 24234000 }, { "epoch": 70.15, "learning_rate": 1.4938116547795481e-05, "loss": 1.8737, "step": 24234500 }, { "epoch": 70.15, "learning_rate": 1.4937392900148203e-05, "loss": 1.8787, "step": 24235000 }, { "epoch": 70.15, "learning_rate": 1.4936669252500925e-05, "loss": 1.8636, "step": 24235500 }, { "epoch": 70.15, "learning_rate": 1.493594560485365e-05, "loss": 1.8781, "step": 24236000 }, { "epoch": 70.15, "learning_rate": 1.4935221957206375e-05, "loss": 1.8655, "step": 24236500 }, { "epoch": 70.16, "learning_rate": 1.4934498309559097e-05, "loss": 1.8892, "step": 24237000 }, { "epoch": 70.16, "learning_rate": 1.493377466191182e-05, "loss": 1.8666, "step": 24237500 }, { "epoch": 70.16, "learning_rate": 1.4933051014264543e-05, "loss": 1.8916, "step": 24238000 }, { "epoch": 70.16, "learning_rate": 1.493232881391256e-05, "loss": 1.8792, "step": 24238500 }, { "epoch": 70.16, "learning_rate": 1.4931605166265283e-05, "loss": 1.884, "step": 24239000 }, { "epoch": 70.16, "learning_rate": 1.4930881518618007e-05, "loss": 1.8536, "step": 24239500 }, { "epoch": 70.16, "learning_rate": 1.4930159318266026e-05, "loss": 1.8765, "step": 24240000 }, { "epoch": 70.17, "learning_rate": 1.492943567061875e-05, "loss": 1.8956, "step": 24240500 }, { "epoch": 70.17, "learning_rate": 1.4928712022971472e-05, "loss": 1.9013, "step": 24241000 }, { "epoch": 70.17, "learning_rate": 1.4927988375324196e-05, "loss": 1.9286, "step": 24241500 }, { "epoch": 70.17, "learning_rate": 1.4927264727676918e-05, "loss": 1.888, "step": 24242000 }, { "epoch": 70.17, "learning_rate": 1.4926542527324935e-05, "loss": 1.8766, "step": 24242500 }, { "epoch": 70.17, "learning_rate": 1.4925818879677658e-05, "loss": 1.8978, "step": 24243000 }, { "epoch": 70.18, "learning_rate": 1.4925095232030381e-05, "loss": 1.8896, "step": 24243500 }, { "epoch": 70.18, "learning_rate": 1.4924371584383107e-05, "loss": 1.8856, "step": 24244000 }, { "epoch": 70.18, "learning_rate": 1.492364793673583e-05, "loss": 1.8778, "step": 24244500 }, { "epoch": 70.18, "learning_rate": 1.4922924289088552e-05, "loss": 1.8881, "step": 24245000 }, { "epoch": 70.18, "learning_rate": 1.4922200641441276e-05, "loss": 1.8984, "step": 24245500 }, { "epoch": 70.18, "learning_rate": 1.4921476993793998e-05, "loss": 1.8766, "step": 24246000 }, { "epoch": 70.18, "learning_rate": 1.4920753346146722e-05, "loss": 1.8871, "step": 24246500 }, { "epoch": 70.19, "learning_rate": 1.4920029698499444e-05, "loss": 1.9104, "step": 24247000 }, { "epoch": 70.19, "learning_rate": 1.491930605085217e-05, "loss": 1.8821, "step": 24247500 }, { "epoch": 70.19, "learning_rate": 1.4918583850500187e-05, "loss": 1.8823, "step": 24248000 }, { "epoch": 70.19, "learning_rate": 1.4917860202852909e-05, "loss": 1.8665, "step": 24248500 }, { "epoch": 70.19, "learning_rate": 1.4917136555205633e-05, "loss": 1.8733, "step": 24249000 }, { "epoch": 70.19, "learning_rate": 1.4916412907558355e-05, "loss": 1.9048, "step": 24249500 }, { "epoch": 70.19, "learning_rate": 1.4915689259911077e-05, "loss": 1.895, "step": 24250000 }, { "epoch": 70.2, "learning_rate": 1.4914967059559096e-05, "loss": 1.8957, "step": 24250500 }, { "epoch": 70.2, "learning_rate": 1.4914243411911822e-05, "loss": 1.9161, "step": 24251000 }, { "epoch": 70.2, "learning_rate": 1.4913519764264544e-05, "loss": 1.8798, "step": 24251500 }, { "epoch": 70.2, "learning_rate": 1.4912796116617266e-05, "loss": 1.8668, "step": 24252000 }, { "epoch": 70.2, "learning_rate": 1.491207246896999e-05, "loss": 1.9128, "step": 24252500 }, { "epoch": 70.2, "learning_rate": 1.4911348821322713e-05, "loss": 1.8732, "step": 24253000 }, { "epoch": 70.2, "learning_rate": 1.4910625173675435e-05, "loss": 1.8995, "step": 24253500 }, { "epoch": 70.21, "learning_rate": 1.4909902973323454e-05, "loss": 1.9004, "step": 24254000 }, { "epoch": 70.21, "learning_rate": 1.4909179325676176e-05, "loss": 1.9162, "step": 24254500 }, { "epoch": 70.21, "learning_rate": 1.4908455678028902e-05, "loss": 1.9031, "step": 24255000 }, { "epoch": 70.21, "learning_rate": 1.4907732030381624e-05, "loss": 1.8706, "step": 24255500 }, { "epoch": 70.21, "learning_rate": 1.4907009830029641e-05, "loss": 1.8799, "step": 24256000 }, { "epoch": 70.21, "learning_rate": 1.490628762967766e-05, "loss": 1.9032, "step": 24256500 }, { "epoch": 70.21, "learning_rate": 1.4905563982030382e-05, "loss": 1.8745, "step": 24257000 }, { "epoch": 70.22, "learning_rate": 1.4904840334383105e-05, "loss": 1.8838, "step": 24257500 }, { "epoch": 70.22, "learning_rate": 1.4904116686735828e-05, "loss": 1.875, "step": 24258000 }, { "epoch": 70.22, "learning_rate": 1.490339303908855e-05, "loss": 1.8908, "step": 24258500 }, { "epoch": 70.22, "learning_rate": 1.4902669391441276e-05, "loss": 1.9133, "step": 24259000 }, { "epoch": 70.22, "learning_rate": 1.4901945743793999e-05, "loss": 1.8987, "step": 24259500 }, { "epoch": 70.22, "learning_rate": 1.4901222096146722e-05, "loss": 1.91, "step": 24260000 }, { "epoch": 70.22, "learning_rate": 1.4900498448499445e-05, "loss": 1.8904, "step": 24260500 }, { "epoch": 70.23, "learning_rate": 1.4899774800852167e-05, "loss": 1.8899, "step": 24261000 }, { "epoch": 70.23, "learning_rate": 1.4899051153204891e-05, "loss": 1.9081, "step": 24261500 }, { "epoch": 70.23, "learning_rate": 1.4898328952852908e-05, "loss": 1.9084, "step": 24262000 }, { "epoch": 70.23, "learning_rate": 1.4897605305205634e-05, "loss": 1.8646, "step": 24262500 }, { "epoch": 70.23, "learning_rate": 1.4896881657558356e-05, "loss": 1.8856, "step": 24263000 }, { "epoch": 70.23, "learning_rate": 1.489615800991108e-05, "loss": 1.8772, "step": 24263500 }, { "epoch": 70.23, "learning_rate": 1.4895435809559097e-05, "loss": 1.885, "step": 24264000 }, { "epoch": 70.24, "learning_rate": 1.489471216191182e-05, "loss": 1.8808, "step": 24264500 }, { "epoch": 70.24, "learning_rate": 1.4893988514264542e-05, "loss": 1.9018, "step": 24265000 }, { "epoch": 70.24, "learning_rate": 1.4893264866617266e-05, "loss": 1.8824, "step": 24265500 }, { "epoch": 70.24, "learning_rate": 1.4892541218969991e-05, "loss": 1.9031, "step": 24266000 }, { "epoch": 70.24, "learning_rate": 1.4891817571322713e-05, "loss": 1.8564, "step": 24266500 }, { "epoch": 70.24, "learning_rate": 1.4891093923675437e-05, "loss": 1.8642, "step": 24267000 }, { "epoch": 70.24, "learning_rate": 1.489037317061875e-05, "loss": 1.8828, "step": 24267500 }, { "epoch": 70.25, "learning_rate": 1.4889649522971472e-05, "loss": 1.8952, "step": 24268000 }, { "epoch": 70.25, "learning_rate": 1.4888925875324194e-05, "loss": 1.8839, "step": 24268500 }, { "epoch": 70.25, "learning_rate": 1.4888202227676918e-05, "loss": 1.8982, "step": 24269000 }, { "epoch": 70.25, "learning_rate": 1.488747858002964e-05, "loss": 1.9074, "step": 24269500 }, { "epoch": 70.25, "learning_rate": 1.4886754932382366e-05, "loss": 1.9135, "step": 24270000 }, { "epoch": 70.25, "learning_rate": 1.4886031284735088e-05, "loss": 1.8632, "step": 24270500 }, { "epoch": 70.25, "learning_rate": 1.4885307637087812e-05, "loss": 1.8905, "step": 24271000 }, { "epoch": 70.26, "learning_rate": 1.488458543673583e-05, "loss": 1.9037, "step": 24271500 }, { "epoch": 70.26, "learning_rate": 1.4883861789088551e-05, "loss": 1.8927, "step": 24272000 }, { "epoch": 70.26, "learning_rate": 1.4883138141441275e-05, "loss": 1.9002, "step": 24272500 }, { "epoch": 70.26, "learning_rate": 1.4882414493793998e-05, "loss": 1.8763, "step": 24273000 }, { "epoch": 70.26, "learning_rate": 1.4881690846146723e-05, "loss": 1.8786, "step": 24273500 }, { "epoch": 70.26, "learning_rate": 1.4880967198499445e-05, "loss": 1.8957, "step": 24274000 }, { "epoch": 70.26, "learning_rate": 1.488024355085217e-05, "loss": 1.8981, "step": 24274500 }, { "epoch": 70.27, "learning_rate": 1.4879519903204892e-05, "loss": 1.9018, "step": 24275000 }, { "epoch": 70.27, "learning_rate": 1.4878796255557614e-05, "loss": 1.9006, "step": 24275500 }, { "epoch": 70.27, "learning_rate": 1.4878072607910338e-05, "loss": 1.9053, "step": 24276000 }, { "epoch": 70.27, "learning_rate": 1.4877350407558355e-05, "loss": 1.9099, "step": 24276500 }, { "epoch": 70.27, "learning_rate": 1.4876628207206372e-05, "loss": 1.8908, "step": 24277000 }, { "epoch": 70.27, "learning_rate": 1.4875904559559098e-05, "loss": 1.8905, "step": 24277500 }, { "epoch": 70.27, "learning_rate": 1.4875182359207115e-05, "loss": 1.9037, "step": 24278000 }, { "epoch": 70.28, "learning_rate": 1.4874458711559839e-05, "loss": 1.9078, "step": 24278500 }, { "epoch": 70.28, "learning_rate": 1.4873736511207856e-05, "loss": 1.9011, "step": 24279000 }, { "epoch": 70.28, "learning_rate": 1.4873012863560579e-05, "loss": 1.8904, "step": 24279500 }, { "epoch": 70.28, "learning_rate": 1.4872290663208596e-05, "loss": 1.8706, "step": 24280000 }, { "epoch": 70.28, "learning_rate": 1.487156701556132e-05, "loss": 1.8894, "step": 24280500 }, { "epoch": 70.28, "learning_rate": 1.4870843367914042e-05, "loss": 1.8788, "step": 24281000 }, { "epoch": 70.29, "learning_rate": 1.4870119720266768e-05, "loss": 1.8948, "step": 24281500 }, { "epoch": 70.29, "learning_rate": 1.486939607261949e-05, "loss": 1.8848, "step": 24282000 }, { "epoch": 70.29, "learning_rate": 1.4868673872267507e-05, "loss": 1.8752, "step": 24282500 }, { "epoch": 70.29, "learning_rate": 1.4867950224620231e-05, "loss": 1.8787, "step": 24283000 }, { "epoch": 70.29, "learning_rate": 1.4867226576972953e-05, "loss": 1.8907, "step": 24283500 }, { "epoch": 70.29, "learning_rate": 1.4866502929325676e-05, "loss": 1.8824, "step": 24284000 }, { "epoch": 70.29, "learning_rate": 1.48657792816784e-05, "loss": 1.8804, "step": 24284500 }, { "epoch": 70.3, "learning_rate": 1.4865055634031122e-05, "loss": 1.8983, "step": 24285000 }, { "epoch": 70.3, "learning_rate": 1.4864331986383847e-05, "loss": 1.8943, "step": 24285500 }, { "epoch": 70.3, "learning_rate": 1.4863608338736571e-05, "loss": 1.8621, "step": 24286000 }, { "epoch": 70.3, "learning_rate": 1.4862884691089293e-05, "loss": 1.9142, "step": 24286500 }, { "epoch": 70.3, "learning_rate": 1.486216249073731e-05, "loss": 1.8806, "step": 24287000 }, { "epoch": 70.3, "learning_rate": 1.4861438843090033e-05, "loss": 1.8805, "step": 24287500 }, { "epoch": 70.3, "learning_rate": 1.4860715195442757e-05, "loss": 1.8997, "step": 24288000 }, { "epoch": 70.31, "learning_rate": 1.4859991547795479e-05, "loss": 1.8907, "step": 24288500 }, { "epoch": 70.31, "learning_rate": 1.4859267900148205e-05, "loss": 1.8851, "step": 24289000 }, { "epoch": 70.31, "learning_rate": 1.4858544252500927e-05, "loss": 1.8896, "step": 24289500 }, { "epoch": 70.31, "learning_rate": 1.4857822052148946e-05, "loss": 1.8715, "step": 24290000 }, { "epoch": 70.31, "learning_rate": 1.4857098404501668e-05, "loss": 1.8839, "step": 24290500 }, { "epoch": 70.31, "learning_rate": 1.485637475685439e-05, "loss": 1.9033, "step": 24291000 }, { "epoch": 70.31, "learning_rate": 1.4855651109207114e-05, "loss": 1.8977, "step": 24291500 }, { "epoch": 70.32, "learning_rate": 1.4854927461559837e-05, "loss": 1.89, "step": 24292000 }, { "epoch": 70.32, "learning_rate": 1.4854203813912562e-05, "loss": 1.8762, "step": 24292500 }, { "epoch": 70.32, "learning_rate": 1.4853480166265284e-05, "loss": 1.8827, "step": 24293000 }, { "epoch": 70.32, "learning_rate": 1.4852756518618008e-05, "loss": 1.8818, "step": 24293500 }, { "epoch": 70.32, "learning_rate": 1.485203287097073e-05, "loss": 1.8821, "step": 24294000 }, { "epoch": 70.32, "learning_rate": 1.4851309223323453e-05, "loss": 1.8819, "step": 24294500 }, { "epoch": 70.32, "learning_rate": 1.4850585575676177e-05, "loss": 1.8618, "step": 24295000 }, { "epoch": 70.33, "learning_rate": 1.4849861928028899e-05, "loss": 1.8839, "step": 24295500 }, { "epoch": 70.33, "learning_rate": 1.4849138280381625e-05, "loss": 1.9021, "step": 24296000 }, { "epoch": 70.33, "learning_rate": 1.4848414632734347e-05, "loss": 1.8665, "step": 24296500 }, { "epoch": 70.33, "learning_rate": 1.484769098508707e-05, "loss": 1.8939, "step": 24297000 }, { "epoch": 70.33, "learning_rate": 1.4846967337439793e-05, "loss": 1.885, "step": 24297500 }, { "epoch": 70.33, "learning_rate": 1.4846243689792517e-05, "loss": 1.8939, "step": 24298000 }, { "epoch": 70.33, "learning_rate": 1.4845520042145239e-05, "loss": 1.892, "step": 24298500 }, { "epoch": 70.34, "learning_rate": 1.4844796394497961e-05, "loss": 1.8759, "step": 24299000 }, { "epoch": 70.34, "learning_rate": 1.4844072746850687e-05, "loss": 1.872, "step": 24299500 }, { "epoch": 70.34, "learning_rate": 1.4843349099203411e-05, "loss": 1.8843, "step": 24300000 }, { "epoch": 70.34, "learning_rate": 1.4842625451556133e-05, "loss": 1.8742, "step": 24300500 }, { "epoch": 70.34, "learning_rate": 1.484190325120415e-05, "loss": 1.9108, "step": 24301000 }, { "epoch": 70.34, "learning_rate": 1.4841179603556873e-05, "loss": 1.9023, "step": 24301500 }, { "epoch": 70.34, "learning_rate": 1.4840457403204892e-05, "loss": 1.8939, "step": 24302000 }, { "epoch": 70.35, "learning_rate": 1.4839733755557614e-05, "loss": 1.8953, "step": 24302500 }, { "epoch": 70.35, "learning_rate": 1.4839010107910336e-05, "loss": 1.9029, "step": 24303000 }, { "epoch": 70.35, "learning_rate": 1.4838286460263062e-05, "loss": 1.8543, "step": 24303500 }, { "epoch": 70.35, "learning_rate": 1.4837562812615786e-05, "loss": 1.8946, "step": 24304000 }, { "epoch": 70.35, "learning_rate": 1.4836839164968508e-05, "loss": 1.8697, "step": 24304500 }, { "epoch": 70.35, "learning_rate": 1.4836116964616525e-05, "loss": 1.8877, "step": 24305000 }, { "epoch": 70.35, "learning_rate": 1.4835393316969249e-05, "loss": 1.8753, "step": 24305500 }, { "epoch": 70.36, "learning_rate": 1.4834671116617266e-05, "loss": 1.8583, "step": 24306000 }, { "epoch": 70.36, "learning_rate": 1.4833947468969988e-05, "loss": 1.8829, "step": 24306500 }, { "epoch": 70.36, "learning_rate": 1.483322382132271e-05, "loss": 1.9216, "step": 24307000 }, { "epoch": 70.36, "learning_rate": 1.4832500173675436e-05, "loss": 1.8448, "step": 24307500 }, { "epoch": 70.36, "learning_rate": 1.483177652602816e-05, "loss": 1.9179, "step": 24308000 }, { "epoch": 70.36, "learning_rate": 1.4831052878380882e-05, "loss": 1.9053, "step": 24308500 }, { "epoch": 70.36, "learning_rate": 1.4830329230733606e-05, "loss": 1.8785, "step": 24309000 }, { "epoch": 70.37, "learning_rate": 1.4829605583086329e-05, "loss": 1.8908, "step": 24309500 }, { "epoch": 70.37, "learning_rate": 1.4828881935439051e-05, "loss": 1.8733, "step": 24310000 }, { "epoch": 70.37, "learning_rate": 1.4828158287791775e-05, "loss": 1.9017, "step": 24310500 }, { "epoch": 70.37, "learning_rate": 1.48274346401445e-05, "loss": 1.8843, "step": 24311000 }, { "epoch": 70.37, "learning_rate": 1.4826710992497223e-05, "loss": 1.8869, "step": 24311500 }, { "epoch": 70.37, "learning_rate": 1.482598879214524e-05, "loss": 1.8806, "step": 24312000 }, { "epoch": 70.37, "learning_rate": 1.4825265144497962e-05, "loss": 1.9036, "step": 24312500 }, { "epoch": 70.38, "learning_rate": 1.4824541496850686e-05, "loss": 1.8999, "step": 24313000 }, { "epoch": 70.38, "learning_rate": 1.4823817849203408e-05, "loss": 1.8815, "step": 24313500 }, { "epoch": 70.38, "learning_rate": 1.482309709614672e-05, "loss": 1.9084, "step": 24314000 }, { "epoch": 70.38, "learning_rate": 1.4822373448499444e-05, "loss": 1.8912, "step": 24314500 }, { "epoch": 70.38, "learning_rate": 1.4821649800852168e-05, "loss": 1.8932, "step": 24315000 }, { "epoch": 70.38, "learning_rate": 1.4820926153204892e-05, "loss": 1.8683, "step": 24315500 }, { "epoch": 70.38, "learning_rate": 1.4820202505557615e-05, "loss": 1.8601, "step": 24316000 }, { "epoch": 70.39, "learning_rate": 1.4819478857910339e-05, "loss": 1.859, "step": 24316500 }, { "epoch": 70.39, "learning_rate": 1.4818756657558356e-05, "loss": 1.9129, "step": 24317000 }, { "epoch": 70.39, "learning_rate": 1.4818033009911078e-05, "loss": 1.8957, "step": 24317500 }, { "epoch": 70.39, "learning_rate": 1.48173093622638e-05, "loss": 1.888, "step": 24318000 }, { "epoch": 70.39, "learning_rate": 1.4816585714616526e-05, "loss": 1.9065, "step": 24318500 }, { "epoch": 70.39, "learning_rate": 1.481586206696925e-05, "loss": 1.8448, "step": 24319000 }, { "epoch": 70.39, "learning_rate": 1.4815138419321972e-05, "loss": 1.8816, "step": 24319500 }, { "epoch": 70.4, "learning_rate": 1.4814414771674694e-05, "loss": 1.896, "step": 24320000 }, { "epoch": 70.4, "learning_rate": 1.4813692571322713e-05, "loss": 1.8841, "step": 24320500 }, { "epoch": 70.4, "learning_rate": 1.481297037097073e-05, "loss": 1.8816, "step": 24321000 }, { "epoch": 70.4, "learning_rate": 1.4812246723323453e-05, "loss": 1.9033, "step": 24321500 }, { "epoch": 70.4, "learning_rate": 1.4811523075676177e-05, "loss": 1.9097, "step": 24322000 }, { "epoch": 70.4, "learning_rate": 1.4810799428028902e-05, "loss": 1.8612, "step": 24322500 }, { "epoch": 70.41, "learning_rate": 1.4810075780381624e-05, "loss": 1.8985, "step": 24323000 }, { "epoch": 70.41, "learning_rate": 1.4809353580029642e-05, "loss": 1.8988, "step": 24323500 }, { "epoch": 70.41, "learning_rate": 1.4808629932382364e-05, "loss": 1.8636, "step": 24324000 }, { "epoch": 70.41, "learning_rate": 1.4807906284735088e-05, "loss": 1.8845, "step": 24324500 }, { "epoch": 70.41, "learning_rate": 1.480718263708781e-05, "loss": 1.895, "step": 24325000 }, { "epoch": 70.41, "learning_rate": 1.4806458989440532e-05, "loss": 1.8795, "step": 24325500 }, { "epoch": 70.41, "learning_rate": 1.4805735341793258e-05, "loss": 1.8827, "step": 24326000 }, { "epoch": 70.42, "learning_rate": 1.4805011694145982e-05, "loss": 1.8599, "step": 24326500 }, { "epoch": 70.42, "learning_rate": 1.4804289493793999e-05, "loss": 1.8837, "step": 24327000 }, { "epoch": 70.42, "learning_rate": 1.4803565846146721e-05, "loss": 1.8781, "step": 24327500 }, { "epoch": 70.42, "learning_rate": 1.4802842198499445e-05, "loss": 1.887, "step": 24328000 }, { "epoch": 70.42, "learning_rate": 1.4802118550852168e-05, "loss": 1.8789, "step": 24328500 }, { "epoch": 70.42, "learning_rate": 1.480139490320489e-05, "loss": 1.8894, "step": 24329000 }, { "epoch": 70.42, "learning_rate": 1.4800671255557615e-05, "loss": 1.8537, "step": 24329500 }, { "epoch": 70.43, "learning_rate": 1.479994760791034e-05, "loss": 1.8903, "step": 24330000 }, { "epoch": 70.43, "learning_rate": 1.4799223960263062e-05, "loss": 1.8781, "step": 24330500 }, { "epoch": 70.43, "learning_rate": 1.4798501759911079e-05, "loss": 1.8931, "step": 24331000 }, { "epoch": 70.43, "learning_rate": 1.4797778112263803e-05, "loss": 1.9007, "step": 24331500 }, { "epoch": 70.43, "learning_rate": 1.4797054464616525e-05, "loss": 1.8799, "step": 24332000 }, { "epoch": 70.43, "learning_rate": 1.4796330816969247e-05, "loss": 1.8878, "step": 24332500 }, { "epoch": 70.43, "learning_rate": 1.4795607169321971e-05, "loss": 1.8735, "step": 24333000 }, { "epoch": 70.44, "learning_rate": 1.4794883521674697e-05, "loss": 1.8949, "step": 24333500 }, { "epoch": 70.44, "learning_rate": 1.4794159874027419e-05, "loss": 1.8869, "step": 24334000 }, { "epoch": 70.44, "learning_rate": 1.4793437673675436e-05, "loss": 1.8987, "step": 24334500 }, { "epoch": 70.44, "learning_rate": 1.479271402602816e-05, "loss": 1.8839, "step": 24335000 }, { "epoch": 70.44, "learning_rate": 1.4791990378380882e-05, "loss": 1.8846, "step": 24335500 }, { "epoch": 70.44, "learning_rate": 1.4791266730733605e-05, "loss": 1.9037, "step": 24336000 }, { "epoch": 70.44, "learning_rate": 1.4790544530381622e-05, "loss": 1.8882, "step": 24336500 }, { "epoch": 70.45, "learning_rate": 1.4789820882734346e-05, "loss": 1.8864, "step": 24337000 }, { "epoch": 70.45, "learning_rate": 1.4789097235087071e-05, "loss": 1.8902, "step": 24337500 }, { "epoch": 70.45, "learning_rate": 1.4788373587439794e-05, "loss": 1.8954, "step": 24338000 }, { "epoch": 70.45, "learning_rate": 1.4787649939792516e-05, "loss": 1.9179, "step": 24338500 }, { "epoch": 70.45, "learning_rate": 1.478692629214524e-05, "loss": 1.8665, "step": 24339000 }, { "epoch": 70.45, "learning_rate": 1.4786202644497962e-05, "loss": 1.8946, "step": 24339500 }, { "epoch": 70.45, "learning_rate": 1.4785478996850686e-05, "loss": 1.8946, "step": 24340000 }, { "epoch": 70.46, "learning_rate": 1.4784755349203412e-05, "loss": 1.8918, "step": 24340500 }, { "epoch": 70.46, "learning_rate": 1.4784031701556134e-05, "loss": 1.9154, "step": 24341000 }, { "epoch": 70.46, "learning_rate": 1.4783309501204151e-05, "loss": 1.8878, "step": 24341500 }, { "epoch": 70.46, "learning_rate": 1.4782585853556873e-05, "loss": 1.8794, "step": 24342000 }, { "epoch": 70.46, "learning_rate": 1.4781862205909597e-05, "loss": 1.8836, "step": 24342500 }, { "epoch": 70.46, "learning_rate": 1.478113855826232e-05, "loss": 1.8894, "step": 24343000 }, { "epoch": 70.46, "learning_rate": 1.4780416357910337e-05, "loss": 1.8781, "step": 24343500 }, { "epoch": 70.47, "learning_rate": 1.477969271026306e-05, "loss": 1.8865, "step": 24344000 }, { "epoch": 70.47, "learning_rate": 1.4778969062615786e-05, "loss": 1.8707, "step": 24344500 }, { "epoch": 70.47, "learning_rate": 1.4778245414968508e-05, "loss": 1.8901, "step": 24345000 }, { "epoch": 70.47, "learning_rate": 1.477752176732123e-05, "loss": 1.8655, "step": 24345500 }, { "epoch": 70.47, "learning_rate": 1.4776798119673955e-05, "loss": 1.8783, "step": 24346000 }, { "epoch": 70.47, "learning_rate": 1.4776074472026677e-05, "loss": 1.8781, "step": 24346500 }, { "epoch": 70.47, "learning_rate": 1.4775352271674694e-05, "loss": 1.8885, "step": 24347000 }, { "epoch": 70.48, "learning_rate": 1.4774628624027418e-05, "loss": 1.8915, "step": 24347500 }, { "epoch": 70.48, "learning_rate": 1.477390497638014e-05, "loss": 1.9076, "step": 24348000 }, { "epoch": 70.48, "learning_rate": 1.4773181328732866e-05, "loss": 1.8575, "step": 24348500 }, { "epoch": 70.48, "learning_rate": 1.4772459128380883e-05, "loss": 1.8842, "step": 24349000 }, { "epoch": 70.48, "learning_rate": 1.4771735480733605e-05, "loss": 1.9224, "step": 24349500 }, { "epoch": 70.48, "learning_rate": 1.477101183308633e-05, "loss": 1.8964, "step": 24350000 }, { "epoch": 70.48, "learning_rate": 1.4770289632734347e-05, "loss": 1.8863, "step": 24350500 }, { "epoch": 70.49, "learning_rate": 1.4769565985087069e-05, "loss": 1.9305, "step": 24351000 }, { "epoch": 70.49, "learning_rate": 1.4768842337439793e-05, "loss": 1.9027, "step": 24351500 }, { "epoch": 70.49, "learning_rate": 1.4768118689792518e-05, "loss": 1.9102, "step": 24352000 }, { "epoch": 70.49, "learning_rate": 1.4767396489440536e-05, "loss": 1.9124, "step": 24352500 }, { "epoch": 70.49, "learning_rate": 1.4766672841793258e-05, "loss": 1.8779, "step": 24353000 }, { "epoch": 70.49, "learning_rate": 1.4765949194145982e-05, "loss": 1.8833, "step": 24353500 }, { "epoch": 70.49, "learning_rate": 1.4765225546498704e-05, "loss": 1.8922, "step": 24354000 }, { "epoch": 70.5, "learning_rate": 1.4764501898851426e-05, "loss": 1.8767, "step": 24354500 }, { "epoch": 70.5, "learning_rate": 1.476377825120415e-05, "loss": 1.9049, "step": 24355000 }, { "epoch": 70.5, "learning_rate": 1.4763054603556872e-05, "loss": 1.9049, "step": 24355500 }, { "epoch": 70.5, "learning_rate": 1.4762330955909598e-05, "loss": 1.884, "step": 24356000 }, { "epoch": 70.5, "learning_rate": 1.476160730826232e-05, "loss": 1.8926, "step": 24356500 }, { "epoch": 70.5, "learning_rate": 1.4760883660615044e-05, "loss": 1.8548, "step": 24357000 }, { "epoch": 70.5, "learning_rate": 1.4760160012967766e-05, "loss": 1.8793, "step": 24357500 }, { "epoch": 70.51, "learning_rate": 1.4759437812615784e-05, "loss": 1.8978, "step": 24358000 }, { "epoch": 70.51, "learning_rate": 1.4758714164968508e-05, "loss": 1.8738, "step": 24358500 }, { "epoch": 70.51, "learning_rate": 1.4757991964616525e-05, "loss": 1.885, "step": 24359000 }, { "epoch": 70.51, "learning_rate": 1.475726831696925e-05, "loss": 1.883, "step": 24359500 }, { "epoch": 70.51, "learning_rate": 1.4756544669321973e-05, "loss": 1.8702, "step": 24360000 }, { "epoch": 70.51, "learning_rate": 1.4755821021674695e-05, "loss": 1.8681, "step": 24360500 }, { "epoch": 70.52, "learning_rate": 1.4755098821322714e-05, "loss": 1.8886, "step": 24361000 }, { "epoch": 70.52, "learning_rate": 1.4754375173675436e-05, "loss": 1.8765, "step": 24361500 }, { "epoch": 70.52, "learning_rate": 1.4753651526028158e-05, "loss": 1.8909, "step": 24362000 }, { "epoch": 70.52, "learning_rate": 1.4752927878380882e-05, "loss": 1.8954, "step": 24362500 }, { "epoch": 70.52, "learning_rate": 1.4752204230733605e-05, "loss": 1.8884, "step": 24363000 }, { "epoch": 70.52, "learning_rate": 1.4751483477676917e-05, "loss": 1.8738, "step": 24363500 }, { "epoch": 70.52, "learning_rate": 1.4750759830029642e-05, "loss": 1.8921, "step": 24364000 }, { "epoch": 70.53, "learning_rate": 1.4750036182382365e-05, "loss": 1.8708, "step": 24364500 }, { "epoch": 70.53, "learning_rate": 1.4749312534735089e-05, "loss": 1.8674, "step": 24365000 }, { "epoch": 70.53, "learning_rate": 1.474858888708781e-05, "loss": 1.9199, "step": 24365500 }, { "epoch": 70.53, "learning_rate": 1.4747865239440533e-05, "loss": 1.8966, "step": 24366000 }, { "epoch": 70.53, "learning_rate": 1.4747141591793257e-05, "loss": 1.8898, "step": 24366500 }, { "epoch": 70.53, "learning_rate": 1.474641794414598e-05, "loss": 1.8869, "step": 24367000 }, { "epoch": 70.53, "learning_rate": 1.4745694296498705e-05, "loss": 1.889, "step": 24367500 }, { "epoch": 70.54, "learning_rate": 1.4744972096146722e-05, "loss": 1.8703, "step": 24368000 }, { "epoch": 70.54, "learning_rate": 1.4744248448499446e-05, "loss": 1.8831, "step": 24368500 }, { "epoch": 70.54, "learning_rate": 1.4743524800852168e-05, "loss": 1.8866, "step": 24369000 }, { "epoch": 70.54, "learning_rate": 1.474280115320489e-05, "loss": 1.8834, "step": 24369500 }, { "epoch": 70.54, "learning_rate": 1.4742077505557614e-05, "loss": 1.8909, "step": 24370000 }, { "epoch": 70.54, "learning_rate": 1.4741353857910337e-05, "loss": 1.8704, "step": 24370500 }, { "epoch": 70.54, "learning_rate": 1.4740630210263062e-05, "loss": 1.9065, "step": 24371000 }, { "epoch": 70.55, "learning_rate": 1.4739906562615784e-05, "loss": 1.8833, "step": 24371500 }, { "epoch": 70.55, "learning_rate": 1.4739184362263803e-05, "loss": 1.8706, "step": 24372000 }, { "epoch": 70.55, "learning_rate": 1.4738460714616526e-05, "loss": 1.8769, "step": 24372500 }, { "epoch": 70.55, "learning_rate": 1.4737737066969248e-05, "loss": 1.8885, "step": 24373000 }, { "epoch": 70.55, "learning_rate": 1.4737013419321972e-05, "loss": 1.9041, "step": 24373500 }, { "epoch": 70.55, "learning_rate": 1.4736291218969989e-05, "loss": 1.9016, "step": 24374000 }, { "epoch": 70.55, "learning_rate": 1.4735567571322711e-05, "loss": 1.8798, "step": 24374500 }, { "epoch": 70.56, "learning_rate": 1.4734843923675437e-05, "loss": 1.8824, "step": 24375000 }, { "epoch": 70.56, "learning_rate": 1.4734120276028159e-05, "loss": 1.9054, "step": 24375500 }, { "epoch": 70.56, "learning_rate": 1.4733398075676178e-05, "loss": 1.8854, "step": 24376000 }, { "epoch": 70.56, "learning_rate": 1.47326744280289e-05, "loss": 1.9064, "step": 24376500 }, { "epoch": 70.56, "learning_rate": 1.4731950780381623e-05, "loss": 1.907, "step": 24377000 }, { "epoch": 70.56, "learning_rate": 1.4731228580029641e-05, "loss": 1.9039, "step": 24377500 }, { "epoch": 70.56, "learning_rate": 1.4730504932382364e-05, "loss": 1.8666, "step": 24378000 }, { "epoch": 70.57, "learning_rate": 1.472978128473509e-05, "loss": 1.8575, "step": 24378500 }, { "epoch": 70.57, "learning_rate": 1.4729057637087812e-05, "loss": 1.8997, "step": 24379000 }, { "epoch": 70.57, "learning_rate": 1.4728333989440536e-05, "loss": 1.9088, "step": 24379500 }, { "epoch": 70.57, "learning_rate": 1.4727611789088553e-05, "loss": 1.8834, "step": 24380000 }, { "epoch": 70.57, "learning_rate": 1.4726888141441275e-05, "loss": 1.8943, "step": 24380500 }, { "epoch": 70.57, "learning_rate": 1.4726164493793997e-05, "loss": 1.8715, "step": 24381000 }, { "epoch": 70.57, "learning_rate": 1.4725440846146721e-05, "loss": 1.8773, "step": 24381500 }, { "epoch": 70.58, "learning_rate": 1.4724718645794738e-05, "loss": 1.892, "step": 24382000 }, { "epoch": 70.58, "learning_rate": 1.4723994998147464e-05, "loss": 1.8826, "step": 24382500 }, { "epoch": 70.58, "learning_rate": 1.4723271350500186e-05, "loss": 1.8712, "step": 24383000 }, { "epoch": 70.58, "learning_rate": 1.472254770285291e-05, "loss": 1.9135, "step": 24383500 }, { "epoch": 70.58, "learning_rate": 1.4721825502500927e-05, "loss": 1.9025, "step": 24384000 }, { "epoch": 70.58, "learning_rate": 1.4721103302148945e-05, "loss": 1.8857, "step": 24384500 }, { "epoch": 70.58, "learning_rate": 1.4720379654501667e-05, "loss": 1.9237, "step": 24385000 }, { "epoch": 70.59, "learning_rate": 1.4719656006854391e-05, "loss": 1.8652, "step": 24385500 }, { "epoch": 70.59, "learning_rate": 1.4718932359207113e-05, "loss": 1.8887, "step": 24386000 }, { "epoch": 70.59, "learning_rate": 1.4718208711559839e-05, "loss": 1.8941, "step": 24386500 }, { "epoch": 70.59, "learning_rate": 1.4717486511207856e-05, "loss": 1.8842, "step": 24387000 }, { "epoch": 70.59, "learning_rate": 1.471676286356058e-05, "loss": 1.9018, "step": 24387500 }, { "epoch": 70.59, "learning_rate": 1.4716039215913302e-05, "loss": 1.8775, "step": 24388000 }, { "epoch": 70.59, "learning_rate": 1.4715315568266024e-05, "loss": 1.8902, "step": 24388500 }, { "epoch": 70.6, "learning_rate": 1.4714591920618748e-05, "loss": 1.8846, "step": 24389000 }, { "epoch": 70.6, "learning_rate": 1.471386827297147e-05, "loss": 1.8838, "step": 24389500 }, { "epoch": 70.6, "learning_rate": 1.4713144625324196e-05, "loss": 1.8784, "step": 24390000 }, { "epoch": 70.6, "learning_rate": 1.4712420977676918e-05, "loss": 1.8773, "step": 24390500 }, { "epoch": 70.6, "learning_rate": 1.4711698777324937e-05, "loss": 1.9026, "step": 24391000 }, { "epoch": 70.6, "learning_rate": 1.471097512967766e-05, "loss": 1.8721, "step": 24391500 }, { "epoch": 70.6, "learning_rate": 1.4710251482030382e-05, "loss": 1.8754, "step": 24392000 }, { "epoch": 70.61, "learning_rate": 1.4709527834383106e-05, "loss": 1.912, "step": 24392500 }, { "epoch": 70.61, "learning_rate": 1.4708805634031123e-05, "loss": 1.8769, "step": 24393000 }, { "epoch": 70.61, "learning_rate": 1.4708081986383845e-05, "loss": 1.8774, "step": 24393500 }, { "epoch": 70.61, "learning_rate": 1.470735833873657e-05, "loss": 1.9106, "step": 24394000 }, { "epoch": 70.61, "learning_rate": 1.4706634691089293e-05, "loss": 1.8657, "step": 24394500 }, { "epoch": 70.61, "learning_rate": 1.4705911043442017e-05, "loss": 1.879, "step": 24395000 }, { "epoch": 70.61, "learning_rate": 1.470518739579474e-05, "loss": 1.9163, "step": 24395500 }, { "epoch": 70.62, "learning_rate": 1.4704463748147463e-05, "loss": 1.8875, "step": 24396000 }, { "epoch": 70.62, "learning_rate": 1.4703740100500185e-05, "loss": 1.9032, "step": 24396500 }, { "epoch": 70.62, "learning_rate": 1.4703017900148203e-05, "loss": 1.888, "step": 24397000 }, { "epoch": 70.62, "learning_rate": 1.4702294252500928e-05, "loss": 1.9097, "step": 24397500 }, { "epoch": 70.62, "learning_rate": 1.4701572052148945e-05, "loss": 1.878, "step": 24398000 }, { "epoch": 70.62, "learning_rate": 1.470084840450167e-05, "loss": 1.9062, "step": 24398500 }, { "epoch": 70.63, "learning_rate": 1.4700124756854392e-05, "loss": 1.9036, "step": 24399000 }, { "epoch": 70.63, "learning_rate": 1.4699401109207114e-05, "loss": 1.9068, "step": 24399500 }, { "epoch": 70.63, "learning_rate": 1.4698677461559838e-05, "loss": 1.9179, "step": 24400000 }, { "epoch": 70.63, "learning_rate": 1.469795381391256e-05, "loss": 1.887, "step": 24400500 }, { "epoch": 70.63, "learning_rate": 1.4697230166265282e-05, "loss": 1.9076, "step": 24401000 }, { "epoch": 70.63, "learning_rate": 1.4696506518618008e-05, "loss": 1.9134, "step": 24401500 }, { "epoch": 70.63, "learning_rate": 1.4695782870970732e-05, "loss": 1.8761, "step": 24402000 }, { "epoch": 70.64, "learning_rate": 1.4695059223323454e-05, "loss": 1.8935, "step": 24402500 }, { "epoch": 70.64, "learning_rate": 1.4694335575676176e-05, "loss": 1.9056, "step": 24403000 }, { "epoch": 70.64, "learning_rate": 1.46936119280289e-05, "loss": 1.8861, "step": 24403500 }, { "epoch": 70.64, "learning_rate": 1.4692888280381622e-05, "loss": 1.8854, "step": 24404000 }, { "epoch": 70.64, "learning_rate": 1.4692164632734345e-05, "loss": 1.8816, "step": 24404500 }, { "epoch": 70.64, "learning_rate": 1.469144098508707e-05, "loss": 1.9057, "step": 24405000 }, { "epoch": 70.64, "learning_rate": 1.4690717337439794e-05, "loss": 1.8794, "step": 24405500 }, { "epoch": 70.65, "learning_rate": 1.4689995137087811e-05, "loss": 1.8831, "step": 24406000 }, { "epoch": 70.65, "learning_rate": 1.4689271489440534e-05, "loss": 1.8927, "step": 24406500 }, { "epoch": 70.65, "learning_rate": 1.4688547841793258e-05, "loss": 1.8918, "step": 24407000 }, { "epoch": 70.65, "learning_rate": 1.468782419414598e-05, "loss": 1.8674, "step": 24407500 }, { "epoch": 70.65, "learning_rate": 1.4687100546498702e-05, "loss": 1.903, "step": 24408000 }, { "epoch": 70.65, "learning_rate": 1.4686376898851428e-05, "loss": 1.8908, "step": 24408500 }, { "epoch": 70.65, "learning_rate": 1.4685653251204152e-05, "loss": 1.8875, "step": 24409000 }, { "epoch": 70.66, "learning_rate": 1.4684931050852169e-05, "loss": 1.8892, "step": 24409500 }, { "epoch": 70.66, "learning_rate": 1.4684207403204891e-05, "loss": 1.8686, "step": 24410000 }, { "epoch": 70.66, "learning_rate": 1.4683485202852908e-05, "loss": 1.8841, "step": 24410500 }, { "epoch": 70.66, "learning_rate": 1.4682761555205632e-05, "loss": 1.9042, "step": 24411000 }, { "epoch": 70.66, "learning_rate": 1.4682037907558355e-05, "loss": 1.8863, "step": 24411500 }, { "epoch": 70.66, "learning_rate": 1.4681314259911077e-05, "loss": 1.921, "step": 24412000 }, { "epoch": 70.66, "learning_rate": 1.4680590612263802e-05, "loss": 1.8696, "step": 24412500 }, { "epoch": 70.67, "learning_rate": 1.4679866964616526e-05, "loss": 1.8971, "step": 24413000 }, { "epoch": 70.67, "learning_rate": 1.4679143316969249e-05, "loss": 1.8833, "step": 24413500 }, { "epoch": 70.67, "learning_rate": 1.4678419669321972e-05, "loss": 1.8822, "step": 24414000 }, { "epoch": 70.67, "learning_rate": 1.467769746896999e-05, "loss": 1.8864, "step": 24414500 }, { "epoch": 70.67, "learning_rate": 1.4676973821322712e-05, "loss": 1.8816, "step": 24415000 }, { "epoch": 70.67, "learning_rate": 1.4676250173675434e-05, "loss": 1.9141, "step": 24415500 }, { "epoch": 70.67, "learning_rate": 1.467552652602816e-05, "loss": 1.8783, "step": 24416000 }, { "epoch": 70.68, "learning_rate": 1.4674802878380884e-05, "loss": 1.8648, "step": 24416500 }, { "epoch": 70.68, "learning_rate": 1.4674080678028901e-05, "loss": 1.883, "step": 24417000 }, { "epoch": 70.68, "learning_rate": 1.4673357030381623e-05, "loss": 1.9064, "step": 24417500 }, { "epoch": 70.68, "learning_rate": 1.4672633382734347e-05, "loss": 1.8983, "step": 24418000 }, { "epoch": 70.68, "learning_rate": 1.467190973508707e-05, "loss": 1.885, "step": 24418500 }, { "epoch": 70.68, "learning_rate": 1.4671186087439792e-05, "loss": 1.896, "step": 24419000 }, { "epoch": 70.68, "learning_rate": 1.4670465334383104e-05, "loss": 1.9012, "step": 24419500 }, { "epoch": 70.69, "learning_rate": 1.466974168673583e-05, "loss": 1.891, "step": 24420000 }, { "epoch": 70.69, "learning_rate": 1.4669018039088553e-05, "loss": 1.8936, "step": 24420500 }, { "epoch": 70.69, "learning_rate": 1.4668294391441276e-05, "loss": 1.8968, "step": 24421000 }, { "epoch": 70.69, "learning_rate": 1.4667570743793998e-05, "loss": 1.8995, "step": 24421500 }, { "epoch": 70.69, "learning_rate": 1.4666847096146722e-05, "loss": 1.9089, "step": 24422000 }, { "epoch": 70.69, "learning_rate": 1.4666123448499444e-05, "loss": 1.8979, "step": 24422500 }, { "epoch": 70.69, "learning_rate": 1.4665399800852166e-05, "loss": 1.8985, "step": 24423000 }, { "epoch": 70.7, "learning_rate": 1.4664676153204892e-05, "loss": 1.8769, "step": 24423500 }, { "epoch": 70.7, "learning_rate": 1.4663952505557616e-05, "loss": 1.9026, "step": 24424000 }, { "epoch": 70.7, "learning_rate": 1.4663228857910338e-05, "loss": 1.8882, "step": 24424500 }, { "epoch": 70.7, "learning_rate": 1.466250521026306e-05, "loss": 1.8794, "step": 24425000 }, { "epoch": 70.7, "learning_rate": 1.4661781562615784e-05, "loss": 1.8844, "step": 24425500 }, { "epoch": 70.7, "learning_rate": 1.4661059362263802e-05, "loss": 1.9165, "step": 24426000 }, { "epoch": 70.7, "learning_rate": 1.4660337161911819e-05, "loss": 1.8821, "step": 24426500 }, { "epoch": 70.71, "learning_rate": 1.4659613514264543e-05, "loss": 1.8876, "step": 24427000 }, { "epoch": 70.71, "learning_rate": 1.4658889866617268e-05, "loss": 1.9039, "step": 24427500 }, { "epoch": 70.71, "learning_rate": 1.465816621896999e-05, "loss": 1.8975, "step": 24428000 }, { "epoch": 70.71, "learning_rate": 1.4657442571322713e-05, "loss": 1.8748, "step": 24428500 }, { "epoch": 70.71, "learning_rate": 1.4656718923675437e-05, "loss": 1.9014, "step": 24429000 }, { "epoch": 70.71, "learning_rate": 1.4655995276028159e-05, "loss": 1.9151, "step": 24429500 }, { "epoch": 70.71, "learning_rate": 1.4655273075676176e-05, "loss": 1.8973, "step": 24430000 }, { "epoch": 70.72, "learning_rate": 1.4654549428028898e-05, "loss": 1.8881, "step": 24430500 }, { "epoch": 70.72, "learning_rate": 1.4653825780381624e-05, "loss": 1.8866, "step": 24431000 }, { "epoch": 70.72, "learning_rate": 1.4653102132734348e-05, "loss": 1.8849, "step": 24431500 }, { "epoch": 70.72, "learning_rate": 1.465237848508707e-05, "loss": 1.9057, "step": 24432000 }, { "epoch": 70.72, "learning_rate": 1.4651654837439792e-05, "loss": 1.8783, "step": 24432500 }, { "epoch": 70.72, "learning_rate": 1.4650932637087811e-05, "loss": 1.8559, "step": 24433000 }, { "epoch": 70.72, "learning_rate": 1.4650208989440534e-05, "loss": 1.8883, "step": 24433500 }, { "epoch": 70.73, "learning_rate": 1.4649485341793256e-05, "loss": 1.8966, "step": 24434000 }, { "epoch": 70.73, "learning_rate": 1.464876169414598e-05, "loss": 1.8766, "step": 24434500 }, { "epoch": 70.73, "learning_rate": 1.4648038046498705e-05, "loss": 1.8787, "step": 24435000 }, { "epoch": 70.73, "learning_rate": 1.4647315846146723e-05, "loss": 1.8925, "step": 24435500 }, { "epoch": 70.73, "learning_rate": 1.4646592198499445e-05, "loss": 1.8696, "step": 24436000 }, { "epoch": 70.73, "learning_rate": 1.4645868550852169e-05, "loss": 1.8927, "step": 24436500 }, { "epoch": 70.74, "learning_rate": 1.4645144903204891e-05, "loss": 1.876, "step": 24437000 }, { "epoch": 70.74, "learning_rate": 1.4644421255557613e-05, "loss": 1.9008, "step": 24437500 }, { "epoch": 70.74, "learning_rate": 1.4643697607910337e-05, "loss": 1.8846, "step": 24438000 }, { "epoch": 70.74, "learning_rate": 1.4642973960263063e-05, "loss": 1.8839, "step": 24438500 }, { "epoch": 70.74, "learning_rate": 1.4642250312615785e-05, "loss": 1.897, "step": 24439000 }, { "epoch": 70.74, "learning_rate": 1.4641528112263802e-05, "loss": 1.91, "step": 24439500 }, { "epoch": 70.74, "learning_rate": 1.4640804464616526e-05, "loss": 1.9033, "step": 24440000 }, { "epoch": 70.75, "learning_rate": 1.4640080816969248e-05, "loss": 1.9108, "step": 24440500 }, { "epoch": 70.75, "learning_rate": 1.463935716932197e-05, "loss": 1.8968, "step": 24441000 }, { "epoch": 70.75, "learning_rate": 1.4638633521674695e-05, "loss": 1.8824, "step": 24441500 }, { "epoch": 70.75, "learning_rate": 1.463790987402742e-05, "loss": 1.8808, "step": 24442000 }, { "epoch": 70.75, "learning_rate": 1.4637186226380142e-05, "loss": 1.9079, "step": 24442500 }, { "epoch": 70.75, "learning_rate": 1.463646402602816e-05, "loss": 1.8798, "step": 24443000 }, { "epoch": 70.75, "learning_rate": 1.4635740378380882e-05, "loss": 1.9072, "step": 24443500 }, { "epoch": 70.76, "learning_rate": 1.4635016730733606e-05, "loss": 1.9017, "step": 24444000 }, { "epoch": 70.76, "learning_rate": 1.4634293083086328e-05, "loss": 1.8826, "step": 24444500 }, { "epoch": 70.76, "learning_rate": 1.4633569435439052e-05, "loss": 1.8863, "step": 24445000 }, { "epoch": 70.76, "learning_rate": 1.4632845787791774e-05, "loss": 1.8877, "step": 24445500 }, { "epoch": 70.76, "learning_rate": 1.46321221401445e-05, "loss": 1.8795, "step": 24446000 }, { "epoch": 70.76, "learning_rate": 1.4631399939792517e-05, "loss": 1.8905, "step": 24446500 }, { "epoch": 70.76, "learning_rate": 1.463067629214524e-05, "loss": 1.9093, "step": 24447000 }, { "epoch": 70.77, "learning_rate": 1.4629954091793258e-05, "loss": 1.8773, "step": 24447500 }, { "epoch": 70.77, "learning_rate": 1.462923044414598e-05, "loss": 1.8927, "step": 24448000 }, { "epoch": 70.77, "learning_rate": 1.4628506796498703e-05, "loss": 1.8903, "step": 24448500 }, { "epoch": 70.77, "learning_rate": 1.4627783148851427e-05, "loss": 1.8904, "step": 24449000 }, { "epoch": 70.77, "learning_rate": 1.4627059501204152e-05, "loss": 1.8924, "step": 24449500 }, { "epoch": 70.77, "learning_rate": 1.4626335853556875e-05, "loss": 1.8961, "step": 24450000 }, { "epoch": 70.77, "learning_rate": 1.4625612205909597e-05, "loss": 1.896, "step": 24450500 }, { "epoch": 70.78, "learning_rate": 1.462488855826232e-05, "loss": 1.909, "step": 24451000 }, { "epoch": 70.78, "learning_rate": 1.4624166357910338e-05, "loss": 1.874, "step": 24451500 }, { "epoch": 70.78, "learning_rate": 1.462344271026306e-05, "loss": 1.9146, "step": 24452000 }, { "epoch": 70.78, "learning_rate": 1.4622720509911077e-05, "loss": 1.8933, "step": 24452500 }, { "epoch": 70.78, "learning_rate": 1.4621996862263801e-05, "loss": 1.8943, "step": 24453000 }, { "epoch": 70.78, "learning_rate": 1.4621273214616527e-05, "loss": 1.906, "step": 24453500 }, { "epoch": 70.78, "learning_rate": 1.462054956696925e-05, "loss": 1.9124, "step": 24454000 }, { "epoch": 70.79, "learning_rate": 1.4619825919321971e-05, "loss": 1.8925, "step": 24454500 }, { "epoch": 70.79, "learning_rate": 1.4619102271674695e-05, "loss": 1.8878, "step": 24455000 }, { "epoch": 70.79, "learning_rate": 1.4618378624027418e-05, "loss": 1.9018, "step": 24455500 }, { "epoch": 70.79, "learning_rate": 1.461765497638014e-05, "loss": 1.8804, "step": 24456000 }, { "epoch": 70.79, "learning_rate": 1.4616931328732864e-05, "loss": 1.8878, "step": 24456500 }, { "epoch": 70.79, "learning_rate": 1.461620768108559e-05, "loss": 1.8975, "step": 24457000 }, { "epoch": 70.79, "learning_rate": 1.4615485480733607e-05, "loss": 1.9155, "step": 24457500 }, { "epoch": 70.8, "learning_rate": 1.4614761833086329e-05, "loss": 1.874, "step": 24458000 }, { "epoch": 70.8, "learning_rate": 1.4614038185439053e-05, "loss": 1.9015, "step": 24458500 }, { "epoch": 70.8, "learning_rate": 1.4613314537791775e-05, "loss": 1.8826, "step": 24459000 }, { "epoch": 70.8, "learning_rate": 1.4612590890144497e-05, "loss": 1.8876, "step": 24459500 }, { "epoch": 70.8, "learning_rate": 1.4611867242497221e-05, "loss": 1.9028, "step": 24460000 }, { "epoch": 70.8, "learning_rate": 1.4611145042145238e-05, "loss": 1.8721, "step": 24460500 }, { "epoch": 70.8, "learning_rate": 1.4610421394497964e-05, "loss": 1.876, "step": 24461000 }, { "epoch": 70.81, "learning_rate": 1.4609697746850686e-05, "loss": 1.9017, "step": 24461500 }, { "epoch": 70.81, "learning_rate": 1.4608975546498704e-05, "loss": 1.8696, "step": 24462000 }, { "epoch": 70.81, "learning_rate": 1.4608251898851428e-05, "loss": 1.8744, "step": 24462500 }, { "epoch": 70.81, "learning_rate": 1.460752825120415e-05, "loss": 1.8808, "step": 24463000 }, { "epoch": 70.81, "learning_rate": 1.4606804603556874e-05, "loss": 1.8812, "step": 24463500 }, { "epoch": 70.81, "learning_rate": 1.4606080955909596e-05, "loss": 1.8783, "step": 24464000 }, { "epoch": 70.81, "learning_rate": 1.4605357308262322e-05, "loss": 1.878, "step": 24464500 }, { "epoch": 70.82, "learning_rate": 1.4604633660615044e-05, "loss": 1.8777, "step": 24465000 }, { "epoch": 70.82, "learning_rate": 1.4603911460263061e-05, "loss": 1.8735, "step": 24465500 }, { "epoch": 70.82, "learning_rate": 1.4603187812615785e-05, "loss": 1.87, "step": 24466000 }, { "epoch": 70.82, "learning_rate": 1.4602464164968507e-05, "loss": 1.9153, "step": 24466500 }, { "epoch": 70.82, "learning_rate": 1.460174051732123e-05, "loss": 1.8952, "step": 24467000 }, { "epoch": 70.82, "learning_rate": 1.4601016869673953e-05, "loss": 1.924, "step": 24467500 }, { "epoch": 70.82, "learning_rate": 1.4600293222026679e-05, "loss": 1.8988, "step": 24468000 }, { "epoch": 70.83, "learning_rate": 1.4599569574379401e-05, "loss": 1.913, "step": 24468500 }, { "epoch": 70.83, "learning_rate": 1.4598845926732123e-05, "loss": 1.8892, "step": 24469000 }, { "epoch": 70.83, "learning_rate": 1.4598122279084847e-05, "loss": 1.8843, "step": 24469500 }, { "epoch": 70.83, "learning_rate": 1.459739863143757e-05, "loss": 1.9101, "step": 24470000 }, { "epoch": 70.83, "learning_rate": 1.4596676431085587e-05, "loss": 1.8823, "step": 24470500 }, { "epoch": 70.83, "learning_rate": 1.459595278343831e-05, "loss": 1.8975, "step": 24471000 }, { "epoch": 70.83, "learning_rate": 1.4595229135791033e-05, "loss": 1.9133, "step": 24471500 }, { "epoch": 70.84, "learning_rate": 1.4594505488143759e-05, "loss": 1.8986, "step": 24472000 }, { "epoch": 70.84, "learning_rate": 1.4593781840496481e-05, "loss": 1.883, "step": 24472500 }, { "epoch": 70.84, "learning_rate": 1.4593058192849205e-05, "loss": 1.8677, "step": 24473000 }, { "epoch": 70.84, "learning_rate": 1.4592334545201927e-05, "loss": 1.8898, "step": 24473500 }, { "epoch": 70.84, "learning_rate": 1.459161089755465e-05, "loss": 1.8915, "step": 24474000 }, { "epoch": 70.84, "learning_rate": 1.4590887249907373e-05, "loss": 1.8756, "step": 24474500 }, { "epoch": 70.85, "learning_rate": 1.459016504955539e-05, "loss": 1.9006, "step": 24475000 }, { "epoch": 70.85, "learning_rate": 1.4589441401908116e-05, "loss": 1.8959, "step": 24475500 }, { "epoch": 70.85, "learning_rate": 1.4588717754260838e-05, "loss": 1.886, "step": 24476000 }, { "epoch": 70.85, "learning_rate": 1.4587994106613562e-05, "loss": 1.9059, "step": 24476500 }, { "epoch": 70.85, "learning_rate": 1.458727190626158e-05, "loss": 1.8964, "step": 24477000 }, { "epoch": 70.85, "learning_rate": 1.4586548258614302e-05, "loss": 1.8879, "step": 24477500 }, { "epoch": 70.85, "learning_rate": 1.4585826058262319e-05, "loss": 1.8741, "step": 24478000 }, { "epoch": 70.86, "learning_rate": 1.4585102410615043e-05, "loss": 1.879, "step": 24478500 }, { "epoch": 70.86, "learning_rate": 1.4584378762967765e-05, "loss": 1.8907, "step": 24479000 }, { "epoch": 70.86, "learning_rate": 1.458365511532049e-05, "loss": 1.8785, "step": 24479500 }, { "epoch": 70.86, "learning_rate": 1.4582931467673213e-05, "loss": 1.8785, "step": 24480000 }, { "epoch": 70.86, "learning_rate": 1.4582209267321232e-05, "loss": 1.9117, "step": 24480500 }, { "epoch": 70.86, "learning_rate": 1.4581485619673954e-05, "loss": 1.8831, "step": 24481000 }, { "epoch": 70.86, "learning_rate": 1.4580761972026676e-05, "loss": 1.8893, "step": 24481500 }, { "epoch": 70.87, "learning_rate": 1.45800383243794e-05, "loss": 1.8958, "step": 24482000 }, { "epoch": 70.87, "learning_rate": 1.4579314676732123e-05, "loss": 1.892, "step": 24482500 }, { "epoch": 70.87, "learning_rate": 1.4578591029084848e-05, "loss": 1.8788, "step": 24483000 }, { "epoch": 70.87, "learning_rate": 1.457786738143757e-05, "loss": 1.9001, "step": 24483500 }, { "epoch": 70.87, "learning_rate": 1.4577143733790294e-05, "loss": 1.8923, "step": 24484000 }, { "epoch": 70.87, "learning_rate": 1.4576420086143017e-05, "loss": 1.9062, "step": 24484500 }, { "epoch": 70.87, "learning_rate": 1.4575696438495739e-05, "loss": 1.8641, "step": 24485000 }, { "epoch": 70.88, "learning_rate": 1.4574972790848463e-05, "loss": 1.8888, "step": 24485500 }, { "epoch": 70.88, "learning_rate": 1.4574249143201185e-05, "loss": 1.8824, "step": 24486000 }, { "epoch": 70.88, "learning_rate": 1.4573526942849202e-05, "loss": 1.8931, "step": 24486500 }, { "epoch": 70.88, "learning_rate": 1.4572803295201928e-05, "loss": 1.8789, "step": 24487000 }, { "epoch": 70.88, "learning_rate": 1.4572079647554652e-05, "loss": 1.8679, "step": 24487500 }, { "epoch": 70.88, "learning_rate": 1.4571357447202669e-05, "loss": 1.8741, "step": 24488000 }, { "epoch": 70.88, "learning_rate": 1.4570633799555391e-05, "loss": 1.8979, "step": 24488500 }, { "epoch": 70.89, "learning_rate": 1.4569910151908115e-05, "loss": 1.8597, "step": 24489000 }, { "epoch": 70.89, "learning_rate": 1.4569186504260837e-05, "loss": 1.9047, "step": 24489500 }, { "epoch": 70.89, "learning_rate": 1.456846285661356e-05, "loss": 1.924, "step": 24490000 }, { "epoch": 70.89, "learning_rate": 1.4567739208966285e-05, "loss": 1.9064, "step": 24490500 }, { "epoch": 70.89, "learning_rate": 1.456701556131901e-05, "loss": 1.8899, "step": 24491000 }, { "epoch": 70.89, "learning_rate": 1.4566291913671731e-05, "loss": 1.8892, "step": 24491500 }, { "epoch": 70.89, "learning_rate": 1.4565569713319749e-05, "loss": 1.9035, "step": 24492000 }, { "epoch": 70.9, "learning_rate": 1.4564846065672471e-05, "loss": 1.8948, "step": 24492500 }, { "epoch": 70.9, "learning_rate": 1.4564122418025195e-05, "loss": 1.892, "step": 24493000 }, { "epoch": 70.9, "learning_rate": 1.4563398770377917e-05, "loss": 1.894, "step": 24493500 }, { "epoch": 70.9, "learning_rate": 1.4562676570025934e-05, "loss": 1.9003, "step": 24494000 }, { "epoch": 70.9, "learning_rate": 1.4561954369673955e-05, "loss": 1.9023, "step": 24494500 }, { "epoch": 70.9, "learning_rate": 1.4561230722026679e-05, "loss": 1.9098, "step": 24495000 }, { "epoch": 70.9, "learning_rate": 1.4560507074379401e-05, "loss": 1.8856, "step": 24495500 }, { "epoch": 70.91, "learning_rate": 1.4559783426732123e-05, "loss": 1.8721, "step": 24496000 }, { "epoch": 70.91, "learning_rate": 1.455906122638014e-05, "loss": 1.8683, "step": 24496500 }, { "epoch": 70.91, "learning_rate": 1.4558337578732865e-05, "loss": 1.8986, "step": 24497000 }, { "epoch": 70.91, "learning_rate": 1.4557613931085587e-05, "loss": 1.8813, "step": 24497500 }, { "epoch": 70.91, "learning_rate": 1.4556890283438312e-05, "loss": 1.9027, "step": 24498000 }, { "epoch": 70.91, "learning_rate": 1.4556166635791035e-05, "loss": 1.8883, "step": 24498500 }, { "epoch": 70.91, "learning_rate": 1.4555442988143759e-05, "loss": 1.877, "step": 24499000 }, { "epoch": 70.92, "learning_rate": 1.4554720787791776e-05, "loss": 1.9, "step": 24499500 }, { "epoch": 70.92, "learning_rate": 1.4553997140144498e-05, "loss": 1.9012, "step": 24500000 }, { "epoch": 70.92, "learning_rate": 1.4553273492497222e-05, "loss": 1.8974, "step": 24500500 }, { "epoch": 70.92, "learning_rate": 1.4552549844849944e-05, "loss": 1.9135, "step": 24501000 }, { "epoch": 70.92, "learning_rate": 1.4551826197202666e-05, "loss": 1.9014, "step": 24501500 }, { "epoch": 70.92, "learning_rate": 1.4551102549555392e-05, "loss": 1.8887, "step": 24502000 }, { "epoch": 70.92, "learning_rate": 1.4550378901908116e-05, "loss": 1.9017, "step": 24502500 }, { "epoch": 70.93, "learning_rate": 1.4549655254260838e-05, "loss": 1.9078, "step": 24503000 }, { "epoch": 70.93, "learning_rate": 1.4548933053908855e-05, "loss": 1.891, "step": 24503500 }, { "epoch": 70.93, "learning_rate": 1.454820940626158e-05, "loss": 1.8895, "step": 24504000 }, { "epoch": 70.93, "learning_rate": 1.4547485758614302e-05, "loss": 1.9015, "step": 24504500 }, { "epoch": 70.93, "learning_rate": 1.4546762110967024e-05, "loss": 1.878, "step": 24505000 }, { "epoch": 70.93, "learning_rate": 1.454603846331975e-05, "loss": 1.8836, "step": 24505500 }, { "epoch": 70.93, "learning_rate": 1.4545314815672473e-05, "loss": 1.8877, "step": 24506000 }, { "epoch": 70.94, "learning_rate": 1.4544591168025196e-05, "loss": 1.8926, "step": 24506500 }, { "epoch": 70.94, "learning_rate": 1.4543867520377918e-05, "loss": 1.8819, "step": 24507000 }, { "epoch": 70.94, "learning_rate": 1.4543143872730642e-05, "loss": 1.8556, "step": 24507500 }, { "epoch": 70.94, "learning_rate": 1.4542421672378659e-05, "loss": 1.8951, "step": 24508000 }, { "epoch": 70.94, "learning_rate": 1.4541698024731381e-05, "loss": 1.8746, "step": 24508500 }, { "epoch": 70.94, "learning_rate": 1.4540974377084107e-05, "loss": 1.903, "step": 24509000 }, { "epoch": 70.94, "learning_rate": 1.454025072943683e-05, "loss": 1.8798, "step": 24509500 }, { "epoch": 70.95, "learning_rate": 1.4539527081789553e-05, "loss": 1.9124, "step": 24510000 }, { "epoch": 70.95, "learning_rate": 1.453880488143757e-05, "loss": 1.9107, "step": 24510500 }, { "epoch": 70.95, "learning_rate": 1.4538081233790293e-05, "loss": 1.8938, "step": 24511000 }, { "epoch": 70.95, "learning_rate": 1.4537357586143016e-05, "loss": 1.8844, "step": 24511500 }, { "epoch": 70.95, "learning_rate": 1.4536633938495739e-05, "loss": 1.8821, "step": 24512000 }, { "epoch": 70.95, "learning_rate": 1.4535911738143756e-05, "loss": 1.8989, "step": 24512500 }, { "epoch": 70.96, "learning_rate": 1.4535188090496482e-05, "loss": 1.8988, "step": 24513000 }, { "epoch": 70.96, "learning_rate": 1.45344658901445e-05, "loss": 1.8817, "step": 24513500 }, { "epoch": 70.96, "learning_rate": 1.4533742242497223e-05, "loss": 1.8828, "step": 24514000 }, { "epoch": 70.96, "learning_rate": 1.4533018594849945e-05, "loss": 1.8938, "step": 24514500 }, { "epoch": 70.96, "learning_rate": 1.4532294947202669e-05, "loss": 1.866, "step": 24515000 }, { "epoch": 70.96, "learning_rate": 1.4531571299555391e-05, "loss": 1.8901, "step": 24515500 }, { "epoch": 70.96, "learning_rate": 1.4530849099203408e-05, "loss": 1.8904, "step": 24516000 }, { "epoch": 70.97, "learning_rate": 1.453012545155613e-05, "loss": 1.9082, "step": 24516500 }, { "epoch": 70.97, "learning_rate": 1.4529403251204151e-05, "loss": 1.8666, "step": 24517000 }, { "epoch": 70.97, "learning_rate": 1.4528681050852169e-05, "loss": 1.9064, "step": 24517500 }, { "epoch": 70.97, "learning_rate": 1.4527957403204892e-05, "loss": 1.9035, "step": 24518000 }, { "epoch": 70.97, "learning_rate": 1.4527233755557615e-05, "loss": 1.903, "step": 24518500 }, { "epoch": 70.97, "learning_rate": 1.4526510107910337e-05, "loss": 1.8748, "step": 24519000 }, { "epoch": 70.97, "learning_rate": 1.452578646026306e-05, "loss": 1.8982, "step": 24519500 }, { "epoch": 70.98, "learning_rate": 1.4525062812615783e-05, "loss": 1.9011, "step": 24520000 }, { "epoch": 70.98, "learning_rate": 1.4524339164968507e-05, "loss": 1.8925, "step": 24520500 }, { "epoch": 70.98, "learning_rate": 1.4523615517321233e-05, "loss": 1.8688, "step": 24521000 }, { "epoch": 70.98, "learning_rate": 1.4522891869673955e-05, "loss": 1.8655, "step": 24521500 }, { "epoch": 70.98, "learning_rate": 1.4522168222026677e-05, "loss": 1.8907, "step": 24522000 }, { "epoch": 70.98, "learning_rate": 1.4521444574379401e-05, "loss": 1.8906, "step": 24522500 }, { "epoch": 70.98, "learning_rate": 1.4520720926732123e-05, "loss": 1.9012, "step": 24523000 }, { "epoch": 70.99, "learning_rate": 1.4519997279084845e-05, "loss": 1.8796, "step": 24523500 }, { "epoch": 70.99, "learning_rate": 1.451927363143757e-05, "loss": 1.8834, "step": 24524000 }, { "epoch": 70.99, "learning_rate": 1.4518549983790295e-05, "loss": 1.8959, "step": 24524500 }, { "epoch": 70.99, "learning_rate": 1.4517826336143017e-05, "loss": 1.8738, "step": 24525000 }, { "epoch": 70.99, "learning_rate": 1.451710268849574e-05, "loss": 1.8862, "step": 24525500 }, { "epoch": 70.99, "learning_rate": 1.4516380488143758e-05, "loss": 1.8893, "step": 24526000 }, { "epoch": 70.99, "learning_rate": 1.451565684049648e-05, "loss": 1.8821, "step": 24526500 }, { "epoch": 71.0, "learning_rate": 1.4514933192849203e-05, "loss": 1.8984, "step": 24527000 }, { "epoch": 71.0, "learning_rate": 1.4514209545201927e-05, "loss": 1.8981, "step": 24527500 }, { "epoch": 71.0, "learning_rate": 1.4513487344849946e-05, "loss": 1.882, "step": 24528000 }, { "epoch": 71.0, "learning_rate": 1.451276369720267e-05, "loss": 1.8848, "step": 24528500 }, { "epoch": 71.0, "eval_accuracy": 0.6825834757584475, "eval_accuracy_mlm": 0.6505377607164504, "eval_accuracy_nsp": 0.8545003735225236, "eval_loss": 2.1872825622558594, "eval_runtime": 331.9644, "eval_samples_per_second": 1314.557, "eval_steps_per_second": 54.774, "step": 24528512 }, { "epoch": 71.0, "learning_rate": 1.4512040049555392e-05, "loss": 1.8681, "step": 24529000 }, { "epoch": 71.0, "learning_rate": 1.4511316401908114e-05, "loss": 1.8673, "step": 24529500 }, { "epoch": 71.0, "learning_rate": 1.4510595648851426e-05, "loss": 1.869, "step": 24530000 }, { "epoch": 71.01, "learning_rate": 1.450987200120415e-05, "loss": 1.8997, "step": 24530500 }, { "epoch": 71.01, "learning_rate": 1.4509148353556873e-05, "loss": 1.8761, "step": 24531000 }, { "epoch": 71.01, "learning_rate": 1.450842615320489e-05, "loss": 1.8892, "step": 24531500 }, { "epoch": 71.01, "learning_rate": 1.4507702505557615e-05, "loss": 1.8866, "step": 24532000 }, { "epoch": 71.01, "learning_rate": 1.450697885791034e-05, "loss": 1.8991, "step": 24532500 }, { "epoch": 71.01, "learning_rate": 1.4506255210263062e-05, "loss": 1.8743, "step": 24533000 }, { "epoch": 71.01, "learning_rate": 1.4505531562615784e-05, "loss": 1.866, "step": 24533500 }, { "epoch": 71.02, "learning_rate": 1.4504807914968508e-05, "loss": 1.8786, "step": 24534000 }, { "epoch": 71.02, "learning_rate": 1.450408426732123e-05, "loss": 1.8919, "step": 24534500 }, { "epoch": 71.02, "learning_rate": 1.4503360619673952e-05, "loss": 1.8672, "step": 24535000 }, { "epoch": 71.02, "learning_rate": 1.4502636972026678e-05, "loss": 1.8864, "step": 24535500 }, { "epoch": 71.02, "learning_rate": 1.4501914771674697e-05, "loss": 1.8564, "step": 24536000 }, { "epoch": 71.02, "learning_rate": 1.4501191124027419e-05, "loss": 1.8811, "step": 24536500 }, { "epoch": 71.02, "learning_rate": 1.4500468923675436e-05, "loss": 1.8774, "step": 24537000 }, { "epoch": 71.03, "learning_rate": 1.4499745276028159e-05, "loss": 1.8609, "step": 24537500 }, { "epoch": 71.03, "learning_rate": 1.4499021628380882e-05, "loss": 1.8882, "step": 24538000 }, { "epoch": 71.03, "learning_rate": 1.4498297980733605e-05, "loss": 1.8811, "step": 24538500 }, { "epoch": 71.03, "learning_rate": 1.4497574333086329e-05, "loss": 1.8734, "step": 24539000 }, { "epoch": 71.03, "learning_rate": 1.4496850685439054e-05, "loss": 1.8869, "step": 24539500 }, { "epoch": 71.03, "learning_rate": 1.4496127037791776e-05, "loss": 1.8699, "step": 24540000 }, { "epoch": 71.03, "learning_rate": 1.4495404837439794e-05, "loss": 1.9038, "step": 24540500 }, { "epoch": 71.04, "learning_rate": 1.4494681189792516e-05, "loss": 1.8797, "step": 24541000 }, { "epoch": 71.04, "learning_rate": 1.449395754214524e-05, "loss": 1.9084, "step": 24541500 }, { "epoch": 71.04, "learning_rate": 1.4493233894497962e-05, "loss": 1.8951, "step": 24542000 }, { "epoch": 71.04, "learning_rate": 1.449251169414598e-05, "loss": 1.8877, "step": 24542500 }, { "epoch": 71.04, "learning_rate": 1.4491788046498703e-05, "loss": 1.8866, "step": 24543000 }, { "epoch": 71.04, "learning_rate": 1.4491064398851429e-05, "loss": 1.8982, "step": 24543500 }, { "epoch": 71.04, "learning_rate": 1.4490340751204151e-05, "loss": 1.8734, "step": 24544000 }, { "epoch": 71.05, "learning_rate": 1.4489618550852168e-05, "loss": 1.8768, "step": 24544500 }, { "epoch": 71.05, "learning_rate": 1.4488894903204892e-05, "loss": 1.8615, "step": 24545000 }, { "epoch": 71.05, "learning_rate": 1.4488171255557615e-05, "loss": 1.8635, "step": 24545500 }, { "epoch": 71.05, "learning_rate": 1.4487447607910337e-05, "loss": 1.8836, "step": 24546000 }, { "epoch": 71.05, "learning_rate": 1.448672396026306e-05, "loss": 1.8826, "step": 24546500 }, { "epoch": 71.05, "learning_rate": 1.4486000312615786e-05, "loss": 1.8706, "step": 24547000 }, { "epoch": 71.05, "learning_rate": 1.4485276664968509e-05, "loss": 1.8706, "step": 24547500 }, { "epoch": 71.06, "learning_rate": 1.448455301732123e-05, "loss": 1.8618, "step": 24548000 }, { "epoch": 71.06, "learning_rate": 1.4483830816969248e-05, "loss": 1.8708, "step": 24548500 }, { "epoch": 71.06, "learning_rate": 1.4483107169321972e-05, "loss": 1.9225, "step": 24549000 }, { "epoch": 71.06, "learning_rate": 1.4482383521674694e-05, "loss": 1.8797, "step": 24549500 }, { "epoch": 71.06, "learning_rate": 1.4481659874027418e-05, "loss": 1.8815, "step": 24550000 }, { "epoch": 71.06, "learning_rate": 1.448093622638014e-05, "loss": 1.8834, "step": 24550500 }, { "epoch": 71.07, "learning_rate": 1.4480212578732866e-05, "loss": 1.8476, "step": 24551000 }, { "epoch": 71.07, "learning_rate": 1.4479490378380883e-05, "loss": 1.8871, "step": 24551500 }, { "epoch": 71.07, "learning_rate": 1.4478766730733605e-05, "loss": 1.8803, "step": 24552000 }, { "epoch": 71.07, "learning_rate": 1.447804308308633e-05, "loss": 1.8935, "step": 24552500 }, { "epoch": 71.07, "learning_rate": 1.4477320882734347e-05, "loss": 1.8816, "step": 24553000 }, { "epoch": 71.07, "learning_rate": 1.4476597235087069e-05, "loss": 1.8767, "step": 24553500 }, { "epoch": 71.07, "learning_rate": 1.4475873587439793e-05, "loss": 1.8657, "step": 24554000 }, { "epoch": 71.08, "learning_rate": 1.4475149939792518e-05, "loss": 1.9121, "step": 24554500 }, { "epoch": 71.08, "learning_rate": 1.447442629214524e-05, "loss": 1.8794, "step": 24555000 }, { "epoch": 71.08, "learning_rate": 1.4473702644497963e-05, "loss": 1.8657, "step": 24555500 }, { "epoch": 71.08, "learning_rate": 1.4472978996850687e-05, "loss": 1.868, "step": 24556000 }, { "epoch": 71.08, "learning_rate": 1.4472255349203409e-05, "loss": 1.8876, "step": 24556500 }, { "epoch": 71.08, "learning_rate": 1.4471531701556131e-05, "loss": 1.8961, "step": 24557000 }, { "epoch": 71.08, "learning_rate": 1.447080950120415e-05, "loss": 1.8729, "step": 24557500 }, { "epoch": 71.09, "learning_rate": 1.4470085853556872e-05, "loss": 1.9084, "step": 24558000 }, { "epoch": 71.09, "learning_rate": 1.4469362205909598e-05, "loss": 1.8707, "step": 24558500 }, { "epoch": 71.09, "learning_rate": 1.446863855826232e-05, "loss": 1.8946, "step": 24559000 }, { "epoch": 71.09, "learning_rate": 1.4467916357910338e-05, "loss": 1.8772, "step": 24559500 }, { "epoch": 71.09, "learning_rate": 1.4467192710263062e-05, "loss": 1.8478, "step": 24560000 }, { "epoch": 71.09, "learning_rate": 1.4466469062615784e-05, "loss": 1.8903, "step": 24560500 }, { "epoch": 71.09, "learning_rate": 1.4465745414968506e-05, "loss": 1.8661, "step": 24561000 }, { "epoch": 71.1, "learning_rate": 1.446502176732123e-05, "loss": 1.876, "step": 24561500 }, { "epoch": 71.1, "learning_rate": 1.4464298119673956e-05, "loss": 1.8789, "step": 24562000 }, { "epoch": 71.1, "learning_rate": 1.4463574472026678e-05, "loss": 1.8671, "step": 24562500 }, { "epoch": 71.1, "learning_rate": 1.4462850824379402e-05, "loss": 1.8751, "step": 24563000 }, { "epoch": 71.1, "learning_rate": 1.4462128624027419e-05, "loss": 1.8929, "step": 24563500 }, { "epoch": 71.1, "learning_rate": 1.4461404976380141e-05, "loss": 1.893, "step": 24564000 }, { "epoch": 71.1, "learning_rate": 1.4460681328732863e-05, "loss": 1.8617, "step": 24564500 }, { "epoch": 71.11, "learning_rate": 1.4459957681085587e-05, "loss": 1.8864, "step": 24565000 }, { "epoch": 71.11, "learning_rate": 1.4459234033438313e-05, "loss": 1.8568, "step": 24565500 }, { "epoch": 71.11, "learning_rate": 1.4458510385791035e-05, "loss": 1.8629, "step": 24566000 }, { "epoch": 71.11, "learning_rate": 1.4457786738143757e-05, "loss": 1.865, "step": 24566500 }, { "epoch": 71.11, "learning_rate": 1.4457063090496481e-05, "loss": 1.9045, "step": 24567000 }, { "epoch": 71.11, "learning_rate": 1.4456340890144499e-05, "loss": 1.8647, "step": 24567500 }, { "epoch": 71.11, "learning_rate": 1.445561724249722e-05, "loss": 1.8812, "step": 24568000 }, { "epoch": 71.12, "learning_rate": 1.4454893594849945e-05, "loss": 1.8854, "step": 24568500 }, { "epoch": 71.12, "learning_rate": 1.4454169947202667e-05, "loss": 1.8748, "step": 24569000 }, { "epoch": 71.12, "learning_rate": 1.4453446299555393e-05, "loss": 1.8756, "step": 24569500 }, { "epoch": 71.12, "learning_rate": 1.4452722651908115e-05, "loss": 1.8731, "step": 24570000 }, { "epoch": 71.12, "learning_rate": 1.4451999004260839e-05, "loss": 1.8683, "step": 24570500 }, { "epoch": 71.12, "learning_rate": 1.4451276803908856e-05, "loss": 1.89, "step": 24571000 }, { "epoch": 71.12, "learning_rate": 1.4450553156261578e-05, "loss": 1.89, "step": 24571500 }, { "epoch": 71.13, "learning_rate": 1.4449829508614302e-05, "loss": 1.8872, "step": 24572000 }, { "epoch": 71.13, "learning_rate": 1.444910730826232e-05, "loss": 1.8819, "step": 24572500 }, { "epoch": 71.13, "learning_rate": 1.4448383660615045e-05, "loss": 1.8965, "step": 24573000 }, { "epoch": 71.13, "learning_rate": 1.4447660012967767e-05, "loss": 1.8887, "step": 24573500 }, { "epoch": 71.13, "learning_rate": 1.4446937812615785e-05, "loss": 1.8579, "step": 24574000 }, { "epoch": 71.13, "learning_rate": 1.4446214164968508e-05, "loss": 1.8778, "step": 24574500 }, { "epoch": 71.13, "learning_rate": 1.444549051732123e-05, "loss": 1.8628, "step": 24575000 }, { "epoch": 71.14, "learning_rate": 1.4444766869673953e-05, "loss": 1.9025, "step": 24575500 }, { "epoch": 71.14, "learning_rate": 1.4444043222026677e-05, "loss": 1.9078, "step": 24576000 }, { "epoch": 71.14, "learning_rate": 1.4443319574379399e-05, "loss": 1.8873, "step": 24576500 }, { "epoch": 71.14, "learning_rate": 1.4442595926732125e-05, "loss": 1.8913, "step": 24577000 }, { "epoch": 71.14, "learning_rate": 1.4441872279084847e-05, "loss": 1.8864, "step": 24577500 }, { "epoch": 71.14, "learning_rate": 1.4441148631437571e-05, "loss": 1.8812, "step": 24578000 }, { "epoch": 71.14, "learning_rate": 1.4440424983790293e-05, "loss": 1.9087, "step": 24578500 }, { "epoch": 71.15, "learning_rate": 1.4439701336143015e-05, "loss": 1.899, "step": 24579000 }, { "epoch": 71.15, "learning_rate": 1.443897768849574e-05, "loss": 1.8599, "step": 24579500 }, { "epoch": 71.15, "learning_rate": 1.4438254040848462e-05, "loss": 1.894, "step": 24580000 }, { "epoch": 71.15, "learning_rate": 1.4437531840496482e-05, "loss": 1.8533, "step": 24580500 }, { "epoch": 71.15, "learning_rate": 1.44368096401445e-05, "loss": 1.8911, "step": 24581000 }, { "epoch": 71.15, "learning_rate": 1.4436085992497223e-05, "loss": 1.8917, "step": 24581500 }, { "epoch": 71.15, "learning_rate": 1.4435362344849946e-05, "loss": 1.8749, "step": 24582000 }, { "epoch": 71.16, "learning_rate": 1.4434638697202668e-05, "loss": 1.8832, "step": 24582500 }, { "epoch": 71.16, "learning_rate": 1.4433915049555392e-05, "loss": 1.8911, "step": 24583000 }, { "epoch": 71.16, "learning_rate": 1.4433191401908114e-05, "loss": 1.8743, "step": 24583500 }, { "epoch": 71.16, "learning_rate": 1.4432469201556131e-05, "loss": 1.8714, "step": 24584000 }, { "epoch": 71.16, "learning_rate": 1.4431745553908857e-05, "loss": 1.861, "step": 24584500 }, { "epoch": 71.16, "learning_rate": 1.4431021906261579e-05, "loss": 1.86, "step": 24585000 }, { "epoch": 71.16, "learning_rate": 1.4430298258614303e-05, "loss": 1.8915, "step": 24585500 }, { "epoch": 71.17, "learning_rate": 1.4429574610967025e-05, "loss": 1.8731, "step": 24586000 }, { "epoch": 71.17, "learning_rate": 1.4428850963319747e-05, "loss": 1.8849, "step": 24586500 }, { "epoch": 71.17, "learning_rate": 1.4428127315672471e-05, "loss": 1.8778, "step": 24587000 }, { "epoch": 71.17, "learning_rate": 1.4427403668025194e-05, "loss": 1.8845, "step": 24587500 }, { "epoch": 71.17, "learning_rate": 1.4426681467673214e-05, "loss": 1.8856, "step": 24588000 }, { "epoch": 71.17, "learning_rate": 1.4425959267321232e-05, "loss": 1.8914, "step": 24588500 }, { "epoch": 71.18, "learning_rate": 1.4425235619673955e-05, "loss": 1.8972, "step": 24589000 }, { "epoch": 71.18, "learning_rate": 1.4424511972026678e-05, "loss": 1.8782, "step": 24589500 }, { "epoch": 71.18, "learning_rate": 1.44237883243794e-05, "loss": 1.8777, "step": 24590000 }, { "epoch": 71.18, "learning_rate": 1.4423064676732124e-05, "loss": 1.8789, "step": 24590500 }, { "epoch": 71.18, "learning_rate": 1.4422341029084846e-05, "loss": 1.8991, "step": 24591000 }, { "epoch": 71.18, "learning_rate": 1.4421617381437568e-05, "loss": 1.8787, "step": 24591500 }, { "epoch": 71.18, "learning_rate": 1.4420893733790294e-05, "loss": 1.8781, "step": 24592000 }, { "epoch": 71.19, "learning_rate": 1.4420171533438311e-05, "loss": 1.8771, "step": 24592500 }, { "epoch": 71.19, "learning_rate": 1.4419447885791035e-05, "loss": 1.879, "step": 24593000 }, { "epoch": 71.19, "learning_rate": 1.4418724238143757e-05, "loss": 1.881, "step": 24593500 }, { "epoch": 71.19, "learning_rate": 1.4418000590496481e-05, "loss": 1.8981, "step": 24594000 }, { "epoch": 71.19, "learning_rate": 1.4417276942849203e-05, "loss": 1.8742, "step": 24594500 }, { "epoch": 71.19, "learning_rate": 1.4416553295201926e-05, "loss": 1.8931, "step": 24595000 }, { "epoch": 71.19, "learning_rate": 1.4415831094849946e-05, "loss": 1.8894, "step": 24595500 }, { "epoch": 71.2, "learning_rate": 1.4415107447202669e-05, "loss": 1.8946, "step": 24596000 }, { "epoch": 71.2, "learning_rate": 1.4414383799555393e-05, "loss": 1.8719, "step": 24596500 }, { "epoch": 71.2, "learning_rate": 1.4413660151908115e-05, "loss": 1.8809, "step": 24597000 }, { "epoch": 71.2, "learning_rate": 1.4412936504260837e-05, "loss": 1.8831, "step": 24597500 }, { "epoch": 71.2, "learning_rate": 1.4412214303908856e-05, "loss": 1.8625, "step": 24598000 }, { "epoch": 71.2, "learning_rate": 1.4411490656261578e-05, "loss": 1.8861, "step": 24598500 }, { "epoch": 71.2, "learning_rate": 1.44107670086143e-05, "loss": 1.8536, "step": 24599000 }, { "epoch": 71.21, "learning_rate": 1.4410043360967026e-05, "loss": 1.8767, "step": 24599500 }, { "epoch": 71.21, "learning_rate": 1.4409321160615045e-05, "loss": 1.8901, "step": 24600000 }, { "epoch": 71.21, "learning_rate": 1.4408597512967767e-05, "loss": 1.861, "step": 24600500 }, { "epoch": 71.21, "learning_rate": 1.440787386532049e-05, "loss": 1.8634, "step": 24601000 }, { "epoch": 71.21, "learning_rate": 1.4407150217673213e-05, "loss": 1.8925, "step": 24601500 }, { "epoch": 71.21, "learning_rate": 1.4406426570025936e-05, "loss": 1.8867, "step": 24602000 }, { "epoch": 71.21, "learning_rate": 1.4405702922378658e-05, "loss": 1.8931, "step": 24602500 }, { "epoch": 71.22, "learning_rate": 1.4404979274731383e-05, "loss": 1.8977, "step": 24603000 }, { "epoch": 71.22, "learning_rate": 1.44042570743794e-05, "loss": 1.8823, "step": 24603500 }, { "epoch": 71.22, "learning_rate": 1.440353487402742e-05, "loss": 1.8622, "step": 24604000 }, { "epoch": 71.22, "learning_rate": 1.4402811226380142e-05, "loss": 1.8608, "step": 24604500 }, { "epoch": 71.22, "learning_rate": 1.4402087578732864e-05, "loss": 1.8774, "step": 24605000 }, { "epoch": 71.22, "learning_rate": 1.4401363931085588e-05, "loss": 1.8898, "step": 24605500 }, { "epoch": 71.22, "learning_rate": 1.440064028343831e-05, "loss": 1.8796, "step": 24606000 }, { "epoch": 71.23, "learning_rate": 1.4399916635791032e-05, "loss": 1.881, "step": 24606500 }, { "epoch": 71.23, "learning_rate": 1.4399194435439053e-05, "loss": 1.9093, "step": 24607000 }, { "epoch": 71.23, "learning_rate": 1.4398470787791777e-05, "loss": 1.8701, "step": 24607500 }, { "epoch": 71.23, "learning_rate": 1.43977471401445e-05, "loss": 1.8851, "step": 24608000 }, { "epoch": 71.23, "learning_rate": 1.4397023492497222e-05, "loss": 1.8661, "step": 24608500 }, { "epoch": 71.23, "learning_rate": 1.4396299844849945e-05, "loss": 1.8844, "step": 24609000 }, { "epoch": 71.23, "learning_rate": 1.4395576197202668e-05, "loss": 1.8987, "step": 24609500 }, { "epoch": 71.24, "learning_rate": 1.439485254955539e-05, "loss": 1.8798, "step": 24610000 }, { "epoch": 71.24, "learning_rate": 1.4394128901908116e-05, "loss": 1.8826, "step": 24610500 }, { "epoch": 71.24, "learning_rate": 1.439340525426084e-05, "loss": 1.8949, "step": 24611000 }, { "epoch": 71.24, "learning_rate": 1.4392681606613562e-05, "loss": 1.8866, "step": 24611500 }, { "epoch": 71.24, "learning_rate": 1.4391957958966284e-05, "loss": 1.8987, "step": 24612000 }, { "epoch": 71.24, "learning_rate": 1.4391234311319008e-05, "loss": 1.899, "step": 24612500 }, { "epoch": 71.24, "learning_rate": 1.439051355826232e-05, "loss": 1.9047, "step": 24613000 }, { "epoch": 71.25, "learning_rate": 1.4389789910615042e-05, "loss": 1.9062, "step": 24613500 }, { "epoch": 71.25, "learning_rate": 1.4389066262967765e-05, "loss": 1.8889, "step": 24614000 }, { "epoch": 71.25, "learning_rate": 1.438834261532049e-05, "loss": 1.892, "step": 24614500 }, { "epoch": 71.25, "learning_rate": 1.4387618967673214e-05, "loss": 1.8634, "step": 24615000 }, { "epoch": 71.25, "learning_rate": 1.4386895320025936e-05, "loss": 1.8921, "step": 24615500 }, { "epoch": 71.25, "learning_rate": 1.4386171672378659e-05, "loss": 1.8988, "step": 24616000 }, { "epoch": 71.25, "learning_rate": 1.4385448024731383e-05, "loss": 1.8785, "step": 24616500 }, { "epoch": 71.26, "learning_rate": 1.4384724377084105e-05, "loss": 1.8836, "step": 24617000 }, { "epoch": 71.26, "learning_rate": 1.4384000729436829e-05, "loss": 1.8739, "step": 24617500 }, { "epoch": 71.26, "learning_rate": 1.4383277081789553e-05, "loss": 1.8874, "step": 24618000 }, { "epoch": 71.26, "learning_rate": 1.4382556328732865e-05, "loss": 1.9087, "step": 24618500 }, { "epoch": 71.26, "learning_rate": 1.4381832681085589e-05, "loss": 1.8896, "step": 24619000 }, { "epoch": 71.26, "learning_rate": 1.4381109033438311e-05, "loss": 1.8696, "step": 24619500 }, { "epoch": 71.26, "learning_rate": 1.4380385385791035e-05, "loss": 1.9054, "step": 24620000 }, { "epoch": 71.27, "learning_rate": 1.4379661738143757e-05, "loss": 1.8835, "step": 24620500 }, { "epoch": 71.27, "learning_rate": 1.437893809049648e-05, "loss": 1.8849, "step": 24621000 }, { "epoch": 71.27, "learning_rate": 1.4378214442849203e-05, "loss": 1.872, "step": 24621500 }, { "epoch": 71.27, "learning_rate": 1.4377490795201929e-05, "loss": 1.8701, "step": 24622000 }, { "epoch": 71.27, "learning_rate": 1.4376767147554651e-05, "loss": 1.8943, "step": 24622500 }, { "epoch": 71.27, "learning_rate": 1.4376043499907373e-05, "loss": 1.8785, "step": 24623000 }, { "epoch": 71.27, "learning_rate": 1.4375319852260097e-05, "loss": 1.8817, "step": 24623500 }, { "epoch": 71.28, "learning_rate": 1.437459620461282e-05, "loss": 1.8569, "step": 24624000 }, { "epoch": 71.28, "learning_rate": 1.4373872556965542e-05, "loss": 1.8589, "step": 24624500 }, { "epoch": 71.28, "learning_rate": 1.4373151803908854e-05, "loss": 1.8868, "step": 24625000 }, { "epoch": 71.28, "learning_rate": 1.437242815626158e-05, "loss": 1.8965, "step": 24625500 }, { "epoch": 71.28, "learning_rate": 1.4371704508614304e-05, "loss": 1.9056, "step": 24626000 }, { "epoch": 71.28, "learning_rate": 1.4370982308262321e-05, "loss": 1.893, "step": 24626500 }, { "epoch": 71.29, "learning_rate": 1.4370258660615043e-05, "loss": 1.881, "step": 24627000 }, { "epoch": 71.29, "learning_rate": 1.436953646026306e-05, "loss": 1.8793, "step": 24627500 }, { "epoch": 71.29, "learning_rate": 1.4368812812615784e-05, "loss": 1.874, "step": 24628000 }, { "epoch": 71.29, "learning_rate": 1.4368089164968507e-05, "loss": 1.8758, "step": 24628500 }, { "epoch": 71.29, "learning_rate": 1.4367365517321229e-05, "loss": 1.9223, "step": 24629000 }, { "epoch": 71.29, "learning_rate": 1.436664331696925e-05, "loss": 1.8741, "step": 24629500 }, { "epoch": 71.29, "learning_rate": 1.4365919669321973e-05, "loss": 1.8738, "step": 24630000 }, { "epoch": 71.3, "learning_rate": 1.4365196021674696e-05, "loss": 1.8757, "step": 24630500 }, { "epoch": 71.3, "learning_rate": 1.4364472374027418e-05, "loss": 1.8664, "step": 24631000 }, { "epoch": 71.3, "learning_rate": 1.4363748726380142e-05, "loss": 1.8792, "step": 24631500 }, { "epoch": 71.3, "learning_rate": 1.4363025078732864e-05, "loss": 1.8781, "step": 24632000 }, { "epoch": 71.3, "learning_rate": 1.4362301431085586e-05, "loss": 1.9117, "step": 24632500 }, { "epoch": 71.3, "learning_rate": 1.4361577783438312e-05, "loss": 1.8669, "step": 24633000 }, { "epoch": 71.3, "learning_rate": 1.4360854135791036e-05, "loss": 1.885, "step": 24633500 }, { "epoch": 71.31, "learning_rate": 1.4360130488143758e-05, "loss": 1.8659, "step": 24634000 }, { "epoch": 71.31, "learning_rate": 1.435940684049648e-05, "loss": 1.8656, "step": 24634500 }, { "epoch": 71.31, "learning_rate": 1.43586846401445e-05, "loss": 1.8985, "step": 24635000 }, { "epoch": 71.31, "learning_rate": 1.4357960992497221e-05, "loss": 1.8757, "step": 24635500 }, { "epoch": 71.31, "learning_rate": 1.4357237344849944e-05, "loss": 1.8615, "step": 24636000 }, { "epoch": 71.31, "learning_rate": 1.4356513697202668e-05, "loss": 1.8534, "step": 24636500 }, { "epoch": 71.31, "learning_rate": 1.4355790049555393e-05, "loss": 1.8856, "step": 24637000 }, { "epoch": 71.32, "learning_rate": 1.4355066401908115e-05, "loss": 1.8759, "step": 24637500 }, { "epoch": 71.32, "learning_rate": 1.4354342754260838e-05, "loss": 1.8755, "step": 24638000 }, { "epoch": 71.32, "learning_rate": 1.4353619106613562e-05, "loss": 1.8795, "step": 24638500 }, { "epoch": 71.32, "learning_rate": 1.4352895458966284e-05, "loss": 1.8981, "step": 24639000 }, { "epoch": 71.32, "learning_rate": 1.4352171811319006e-05, "loss": 1.8755, "step": 24639500 }, { "epoch": 71.32, "learning_rate": 1.4351449610967025e-05, "loss": 1.8933, "step": 24640000 }, { "epoch": 71.32, "learning_rate": 1.435072596331975e-05, "loss": 1.8461, "step": 24640500 }, { "epoch": 71.33, "learning_rate": 1.4350002315672473e-05, "loss": 1.8647, "step": 24641000 }, { "epoch": 71.33, "learning_rate": 1.4349281562615785e-05, "loss": 1.8791, "step": 24641500 }, { "epoch": 71.33, "learning_rate": 1.4348557914968507e-05, "loss": 1.8977, "step": 24642000 }, { "epoch": 71.33, "learning_rate": 1.4347834267321231e-05, "loss": 1.8806, "step": 24642500 }, { "epoch": 71.33, "learning_rate": 1.4347110619673954e-05, "loss": 1.8805, "step": 24643000 }, { "epoch": 71.33, "learning_rate": 1.4346386972026676e-05, "loss": 1.8828, "step": 24643500 }, { "epoch": 71.33, "learning_rate": 1.43456633243794e-05, "loss": 1.8553, "step": 24644000 }, { "epoch": 71.34, "learning_rate": 1.4344939676732125e-05, "loss": 1.8823, "step": 24644500 }, { "epoch": 71.34, "learning_rate": 1.4344216029084848e-05, "loss": 1.9112, "step": 24645000 }, { "epoch": 71.34, "learning_rate": 1.434349238143757e-05, "loss": 1.8996, "step": 24645500 }, { "epoch": 71.34, "learning_rate": 1.4342768733790294e-05, "loss": 1.9156, "step": 24646000 }, { "epoch": 71.34, "learning_rate": 1.4342045086143016e-05, "loss": 1.8857, "step": 24646500 }, { "epoch": 71.34, "learning_rate": 1.4341321438495738e-05, "loss": 1.8849, "step": 24647000 }, { "epoch": 71.34, "learning_rate": 1.4340597790848462e-05, "loss": 1.8699, "step": 24647500 }, { "epoch": 71.35, "learning_rate": 1.4339874143201188e-05, "loss": 1.8845, "step": 24648000 }, { "epoch": 71.35, "learning_rate": 1.433915049555391e-05, "loss": 1.9044, "step": 24648500 }, { "epoch": 71.35, "learning_rate": 1.4338428295201927e-05, "loss": 1.8845, "step": 24649000 }, { "epoch": 71.35, "learning_rate": 1.4337704647554651e-05, "loss": 1.911, "step": 24649500 }, { "epoch": 71.35, "learning_rate": 1.4336980999907373e-05, "loss": 1.8765, "step": 24650000 }, { "epoch": 71.35, "learning_rate": 1.4336257352260096e-05, "loss": 1.8954, "step": 24650500 }, { "epoch": 71.35, "learning_rate": 1.4335535151908115e-05, "loss": 1.8536, "step": 24651000 }, { "epoch": 71.36, "learning_rate": 1.433481150426084e-05, "loss": 1.8836, "step": 24651500 }, { "epoch": 71.36, "learning_rate": 1.4334087856613562e-05, "loss": 1.8821, "step": 24652000 }, { "epoch": 71.36, "learning_rate": 1.4333364208966285e-05, "loss": 1.8685, "step": 24652500 }, { "epoch": 71.36, "learning_rate": 1.4332640561319009e-05, "loss": 1.8415, "step": 24653000 }, { "epoch": 71.36, "learning_rate": 1.433191691367173e-05, "loss": 1.8922, "step": 24653500 }, { "epoch": 71.36, "learning_rate": 1.4331194713319748e-05, "loss": 1.8648, "step": 24654000 }, { "epoch": 71.36, "learning_rate": 1.433047106567247e-05, "loss": 1.852, "step": 24654500 }, { "epoch": 71.37, "learning_rate": 1.4329747418025194e-05, "loss": 1.8817, "step": 24655000 }, { "epoch": 71.37, "learning_rate": 1.432902377037792e-05, "loss": 1.8748, "step": 24655500 }, { "epoch": 71.37, "learning_rate": 1.4328300122730642e-05, "loss": 1.8635, "step": 24656000 }, { "epoch": 71.37, "learning_rate": 1.4327576475083366e-05, "loss": 1.8845, "step": 24656500 }, { "epoch": 71.37, "learning_rate": 1.4326852827436088e-05, "loss": 1.89, "step": 24657000 }, { "epoch": 71.37, "learning_rate": 1.432612917978881e-05, "loss": 1.8687, "step": 24657500 }, { "epoch": 71.37, "learning_rate": 1.4325406979436828e-05, "loss": 1.8945, "step": 24658000 }, { "epoch": 71.38, "learning_rate": 1.4324683331789552e-05, "loss": 1.8893, "step": 24658500 }, { "epoch": 71.38, "learning_rate": 1.4323959684142277e-05, "loss": 1.8666, "step": 24659000 }, { "epoch": 71.38, "learning_rate": 1.4323237483790295e-05, "loss": 1.8992, "step": 24659500 }, { "epoch": 71.38, "learning_rate": 1.4322513836143017e-05, "loss": 1.8696, "step": 24660000 }, { "epoch": 71.38, "learning_rate": 1.432179018849574e-05, "loss": 1.894, "step": 24660500 }, { "epoch": 71.38, "learning_rate": 1.4321066540848463e-05, "loss": 1.8609, "step": 24661000 }, { "epoch": 71.38, "learning_rate": 1.432034434049648e-05, "loss": 1.9009, "step": 24661500 }, { "epoch": 71.39, "learning_rate": 1.4319620692849204e-05, "loss": 1.874, "step": 24662000 }, { "epoch": 71.39, "learning_rate": 1.4318897045201926e-05, "loss": 1.8875, "step": 24662500 }, { "epoch": 71.39, "learning_rate": 1.4318173397554652e-05, "loss": 1.8739, "step": 24663000 }, { "epoch": 71.39, "learning_rate": 1.4317449749907374e-05, "loss": 1.8669, "step": 24663500 }, { "epoch": 71.39, "learning_rate": 1.4316726102260098e-05, "loss": 1.8906, "step": 24664000 }, { "epoch": 71.39, "learning_rate": 1.431600245461282e-05, "loss": 1.8876, "step": 24664500 }, { "epoch": 71.4, "learning_rate": 1.4315278806965543e-05, "loss": 1.8617, "step": 24665000 }, { "epoch": 71.4, "learning_rate": 1.4314555159318266e-05, "loss": 1.8826, "step": 24665500 }, { "epoch": 71.4, "learning_rate": 1.4313832958966284e-05, "loss": 1.8774, "step": 24666000 }, { "epoch": 71.4, "learning_rate": 1.431310931131901e-05, "loss": 1.8845, "step": 24666500 }, { "epoch": 71.4, "learning_rate": 1.4312385663671732e-05, "loss": 1.8916, "step": 24667000 }, { "epoch": 71.4, "learning_rate": 1.4311662016024454e-05, "loss": 1.8989, "step": 24667500 }, { "epoch": 71.4, "learning_rate": 1.4310939815672473e-05, "loss": 1.8733, "step": 24668000 }, { "epoch": 71.41, "learning_rate": 1.4310216168025195e-05, "loss": 1.8737, "step": 24668500 }, { "epoch": 71.41, "learning_rate": 1.4309492520377917e-05, "loss": 1.907, "step": 24669000 }, { "epoch": 71.41, "learning_rate": 1.4308768872730641e-05, "loss": 1.8939, "step": 24669500 }, { "epoch": 71.41, "learning_rate": 1.4308045225083363e-05, "loss": 1.8869, "step": 24670000 }, { "epoch": 71.41, "learning_rate": 1.4307321577436089e-05, "loss": 1.8613, "step": 24670500 }, { "epoch": 71.41, "learning_rate": 1.4306599377084106e-05, "loss": 1.8879, "step": 24671000 }, { "epoch": 71.41, "learning_rate": 1.430587572943683e-05, "loss": 1.8822, "step": 24671500 }, { "epoch": 71.42, "learning_rate": 1.4305152081789552e-05, "loss": 1.9049, "step": 24672000 }, { "epoch": 71.42, "learning_rate": 1.4304428434142275e-05, "loss": 1.8591, "step": 24672500 }, { "epoch": 71.42, "learning_rate": 1.4303704786494999e-05, "loss": 1.8773, "step": 24673000 }, { "epoch": 71.42, "learning_rate": 1.4302982586143016e-05, "loss": 1.8733, "step": 24673500 }, { "epoch": 71.42, "learning_rate": 1.4302258938495741e-05, "loss": 1.8805, "step": 24674000 }, { "epoch": 71.42, "learning_rate": 1.4301535290848464e-05, "loss": 1.8657, "step": 24674500 }, { "epoch": 71.42, "learning_rate": 1.4300811643201188e-05, "loss": 1.8982, "step": 24675000 }, { "epoch": 71.43, "learning_rate": 1.430008799555391e-05, "loss": 1.8924, "step": 24675500 }, { "epoch": 71.43, "learning_rate": 1.4299365795201927e-05, "loss": 1.8875, "step": 24676000 }, { "epoch": 71.43, "learning_rate": 1.429864214755465e-05, "loss": 1.8812, "step": 24676500 }, { "epoch": 71.43, "learning_rate": 1.4297918499907373e-05, "loss": 1.8774, "step": 24677000 }, { "epoch": 71.43, "learning_rate": 1.4297194852260095e-05, "loss": 1.8638, "step": 24677500 }, { "epoch": 71.43, "learning_rate": 1.4296471204612821e-05, "loss": 1.8791, "step": 24678000 }, { "epoch": 71.43, "learning_rate": 1.4295747556965543e-05, "loss": 1.8722, "step": 24678500 }, { "epoch": 71.44, "learning_rate": 1.4295023909318267e-05, "loss": 1.8741, "step": 24679000 }, { "epoch": 71.44, "learning_rate": 1.4294301708966285e-05, "loss": 1.8931, "step": 24679500 }, { "epoch": 71.44, "learning_rate": 1.4293579508614302e-05, "loss": 1.8885, "step": 24680000 }, { "epoch": 71.44, "learning_rate": 1.4292855860967026e-05, "loss": 1.8894, "step": 24680500 }, { "epoch": 71.44, "learning_rate": 1.4292132213319748e-05, "loss": 1.8657, "step": 24681000 }, { "epoch": 71.44, "learning_rate": 1.4291408565672474e-05, "loss": 1.8917, "step": 24681500 }, { "epoch": 71.44, "learning_rate": 1.4290684918025196e-05, "loss": 1.8794, "step": 24682000 }, { "epoch": 71.45, "learning_rate": 1.428996127037792e-05, "loss": 1.887, "step": 24682500 }, { "epoch": 71.45, "learning_rate": 1.4289237622730642e-05, "loss": 1.8557, "step": 24683000 }, { "epoch": 71.45, "learning_rate": 1.4288513975083364e-05, "loss": 1.9033, "step": 24683500 }, { "epoch": 71.45, "learning_rate": 1.4287790327436088e-05, "loss": 1.8845, "step": 24684000 }, { "epoch": 71.45, "learning_rate": 1.428706667978881e-05, "loss": 1.8868, "step": 24684500 }, { "epoch": 71.45, "learning_rate": 1.4286343032141536e-05, "loss": 1.9061, "step": 24685000 }, { "epoch": 71.45, "learning_rate": 1.4285619384494258e-05, "loss": 1.8718, "step": 24685500 }, { "epoch": 71.46, "learning_rate": 1.4284895736846982e-05, "loss": 1.9068, "step": 24686000 }, { "epoch": 71.46, "learning_rate": 1.4284172089199704e-05, "loss": 1.9103, "step": 24686500 }, { "epoch": 71.46, "learning_rate": 1.4283449888847722e-05, "loss": 1.8732, "step": 24687000 }, { "epoch": 71.46, "learning_rate": 1.4282727688495739e-05, "loss": 1.8824, "step": 24687500 }, { "epoch": 71.46, "learning_rate": 1.4282004040848463e-05, "loss": 1.8785, "step": 24688000 }, { "epoch": 71.46, "learning_rate": 1.4281280393201185e-05, "loss": 1.8909, "step": 24688500 }, { "epoch": 71.46, "learning_rate": 1.428055674555391e-05, "loss": 1.8637, "step": 24689000 }, { "epoch": 71.47, "learning_rate": 1.4279833097906633e-05, "loss": 1.9061, "step": 24689500 }, { "epoch": 71.47, "learning_rate": 1.4279109450259357e-05, "loss": 1.881, "step": 24690000 }, { "epoch": 71.47, "learning_rate": 1.4278385802612079e-05, "loss": 1.8846, "step": 24690500 }, { "epoch": 71.47, "learning_rate": 1.4277662154964801e-05, "loss": 1.8541, "step": 24691000 }, { "epoch": 71.47, "learning_rate": 1.4276938507317525e-05, "loss": 1.884, "step": 24691500 }, { "epoch": 71.47, "learning_rate": 1.4276216306965542e-05, "loss": 1.8882, "step": 24692000 }, { "epoch": 71.47, "learning_rate": 1.4275492659318268e-05, "loss": 1.8652, "step": 24692500 }, { "epoch": 71.48, "learning_rate": 1.4274770458966285e-05, "loss": 1.8786, "step": 24693000 }, { "epoch": 71.48, "learning_rate": 1.427404681131901e-05, "loss": 1.8585, "step": 24693500 }, { "epoch": 71.48, "learning_rate": 1.4273323163671731e-05, "loss": 1.9002, "step": 24694000 }, { "epoch": 71.48, "learning_rate": 1.4272599516024454e-05, "loss": 1.9046, "step": 24694500 }, { "epoch": 71.48, "learning_rate": 1.4271875868377178e-05, "loss": 1.8534, "step": 24695000 }, { "epoch": 71.48, "learning_rate": 1.42711522207299e-05, "loss": 1.8648, "step": 24695500 }, { "epoch": 71.48, "learning_rate": 1.4270430020377917e-05, "loss": 1.8801, "step": 24696000 }, { "epoch": 71.49, "learning_rate": 1.4269706372730643e-05, "loss": 1.8914, "step": 24696500 }, { "epoch": 71.49, "learning_rate": 1.4268982725083365e-05, "loss": 1.8724, "step": 24697000 }, { "epoch": 71.49, "learning_rate": 1.4268259077436089e-05, "loss": 1.9047, "step": 24697500 }, { "epoch": 71.49, "learning_rate": 1.4267535429788811e-05, "loss": 1.8792, "step": 24698000 }, { "epoch": 71.49, "learning_rate": 1.4266811782141535e-05, "loss": 1.8649, "step": 24698500 }, { "epoch": 71.49, "learning_rate": 1.4266089581789552e-05, "loss": 1.8929, "step": 24699000 }, { "epoch": 71.49, "learning_rate": 1.4265365934142275e-05, "loss": 1.885, "step": 24699500 }, { "epoch": 71.5, "learning_rate": 1.4264642286494997e-05, "loss": 1.9005, "step": 24700000 }, { "epoch": 71.5, "learning_rate": 1.4263918638847722e-05, "loss": 1.8914, "step": 24700500 }, { "epoch": 71.5, "learning_rate": 1.4263194991200446e-05, "loss": 1.8813, "step": 24701000 }, { "epoch": 71.5, "learning_rate": 1.4262471343553169e-05, "loss": 1.876, "step": 24701500 }, { "epoch": 71.5, "learning_rate": 1.426174769590589e-05, "loss": 1.8742, "step": 24702000 }, { "epoch": 71.5, "learning_rate": 1.4261024048258615e-05, "loss": 1.8807, "step": 24702500 }, { "epoch": 71.51, "learning_rate": 1.4260300400611337e-05, "loss": 1.8844, "step": 24703000 }, { "epoch": 71.51, "learning_rate": 1.4259576752964063e-05, "loss": 1.8745, "step": 24703500 }, { "epoch": 71.51, "learning_rate": 1.4258853105316785e-05, "loss": 1.8904, "step": 24704000 }, { "epoch": 71.51, "learning_rate": 1.4258130904964804e-05, "loss": 1.8826, "step": 24704500 }, { "epoch": 71.51, "learning_rate": 1.4257408704612821e-05, "loss": 1.8662, "step": 24705000 }, { "epoch": 71.51, "learning_rate": 1.4256685056965543e-05, "loss": 1.8734, "step": 24705500 }, { "epoch": 71.51, "learning_rate": 1.4255961409318267e-05, "loss": 1.8754, "step": 24706000 }, { "epoch": 71.52, "learning_rate": 1.425523776167099e-05, "loss": 1.866, "step": 24706500 }, { "epoch": 71.52, "learning_rate": 1.4254514114023712e-05, "loss": 1.8669, "step": 24707000 }, { "epoch": 71.52, "learning_rate": 1.4253791913671729e-05, "loss": 1.85, "step": 24707500 }, { "epoch": 71.52, "learning_rate": 1.4253068266024455e-05, "loss": 1.8819, "step": 24708000 }, { "epoch": 71.52, "learning_rate": 1.4252346065672473e-05, "loss": 1.8823, "step": 24708500 }, { "epoch": 71.52, "learning_rate": 1.4251622418025196e-05, "loss": 1.8897, "step": 24709000 }, { "epoch": 71.52, "learning_rate": 1.4250898770377918e-05, "loss": 1.8827, "step": 24709500 }, { "epoch": 71.53, "learning_rate": 1.4250175122730642e-05, "loss": 1.8821, "step": 24710000 }, { "epoch": 71.53, "learning_rate": 1.4249451475083364e-05, "loss": 1.8895, "step": 24710500 }, { "epoch": 71.53, "learning_rate": 1.4248727827436086e-05, "loss": 1.883, "step": 24711000 }, { "epoch": 71.53, "learning_rate": 1.4248004179788812e-05, "loss": 1.8767, "step": 24711500 }, { "epoch": 71.53, "learning_rate": 1.4247280532141536e-05, "loss": 1.8859, "step": 24712000 }, { "epoch": 71.53, "learning_rate": 1.4246556884494258e-05, "loss": 1.8864, "step": 24712500 }, { "epoch": 71.53, "learning_rate": 1.424583323684698e-05, "loss": 1.8758, "step": 24713000 }, { "epoch": 71.54, "learning_rate": 1.4245109589199704e-05, "loss": 1.8713, "step": 24713500 }, { "epoch": 71.54, "learning_rate": 1.4244385941552426e-05, "loss": 1.8902, "step": 24714000 }, { "epoch": 71.54, "learning_rate": 1.4243663741200444e-05, "loss": 1.8933, "step": 24714500 }, { "epoch": 71.54, "learning_rate": 1.424294009355317e-05, "loss": 1.8801, "step": 24715000 }, { "epoch": 71.54, "learning_rate": 1.4242216445905893e-05, "loss": 1.8609, "step": 24715500 }, { "epoch": 71.54, "learning_rate": 1.4241492798258616e-05, "loss": 1.87, "step": 24716000 }, { "epoch": 71.54, "learning_rate": 1.4240769150611338e-05, "loss": 1.9036, "step": 24716500 }, { "epoch": 71.55, "learning_rate": 1.4240045502964062e-05, "loss": 1.8931, "step": 24717000 }, { "epoch": 71.55, "learning_rate": 1.4239321855316784e-05, "loss": 1.8982, "step": 24717500 }, { "epoch": 71.55, "learning_rate": 1.4238598207669506e-05, "loss": 1.8926, "step": 24718000 }, { "epoch": 71.55, "learning_rate": 1.4237874560022232e-05, "loss": 1.8721, "step": 24718500 }, { "epoch": 71.55, "learning_rate": 1.4237150912374956e-05, "loss": 1.8933, "step": 24719000 }, { "epoch": 71.55, "learning_rate": 1.4236427264727678e-05, "loss": 1.875, "step": 24719500 }, { "epoch": 71.55, "learning_rate": 1.42357036170804e-05, "loss": 1.8848, "step": 24720000 }, { "epoch": 71.56, "learning_rate": 1.4234982864023712e-05, "loss": 1.8866, "step": 24720500 }, { "epoch": 71.56, "learning_rate": 1.4234259216376436e-05, "loss": 1.8884, "step": 24721000 }, { "epoch": 71.56, "learning_rate": 1.4233535568729159e-05, "loss": 1.8721, "step": 24721500 }, { "epoch": 71.56, "learning_rate": 1.423281192108188e-05, "loss": 1.902, "step": 24722000 }, { "epoch": 71.56, "learning_rate": 1.4232088273434606e-05, "loss": 1.8911, "step": 24722500 }, { "epoch": 71.56, "learning_rate": 1.423136462578733e-05, "loss": 1.9091, "step": 24723000 }, { "epoch": 71.56, "learning_rate": 1.4230640978140053e-05, "loss": 1.8852, "step": 24723500 }, { "epoch": 71.57, "learning_rate": 1.4229917330492777e-05, "loss": 1.881, "step": 24724000 }, { "epoch": 71.57, "learning_rate": 1.4229195130140794e-05, "loss": 1.8955, "step": 24724500 }, { "epoch": 71.57, "learning_rate": 1.4228471482493516e-05, "loss": 1.8904, "step": 24725000 }, { "epoch": 71.57, "learning_rate": 1.4227747834846238e-05, "loss": 1.8894, "step": 24725500 }, { "epoch": 71.57, "learning_rate": 1.4227024187198964e-05, "loss": 1.8999, "step": 24726000 }, { "epoch": 71.57, "learning_rate": 1.4226300539551688e-05, "loss": 1.8835, "step": 24726500 }, { "epoch": 71.57, "learning_rate": 1.4225578339199705e-05, "loss": 1.8853, "step": 24727000 }, { "epoch": 71.58, "learning_rate": 1.4224856138847722e-05, "loss": 1.8918, "step": 24727500 }, { "epoch": 71.58, "learning_rate": 1.422413393849574e-05, "loss": 1.89, "step": 24728000 }, { "epoch": 71.58, "learning_rate": 1.4223410290848463e-05, "loss": 1.8697, "step": 24728500 }, { "epoch": 71.58, "learning_rate": 1.4222686643201186e-05, "loss": 1.8845, "step": 24729000 }, { "epoch": 71.58, "learning_rate": 1.4221962995553908e-05, "loss": 1.9041, "step": 24729500 }, { "epoch": 71.58, "learning_rate": 1.4221239347906634e-05, "loss": 1.8876, "step": 24730000 }, { "epoch": 71.58, "learning_rate": 1.4220515700259358e-05, "loss": 1.9244, "step": 24730500 }, { "epoch": 71.59, "learning_rate": 1.421979205261208e-05, "loss": 1.8836, "step": 24731000 }, { "epoch": 71.59, "learning_rate": 1.4219068404964802e-05, "loss": 1.8758, "step": 24731500 }, { "epoch": 71.59, "learning_rate": 1.4218346204612821e-05, "loss": 1.8853, "step": 24732000 }, { "epoch": 71.59, "learning_rate": 1.4217622556965543e-05, "loss": 1.8756, "step": 24732500 }, { "epoch": 71.59, "learning_rate": 1.4216898909318265e-05, "loss": 1.8832, "step": 24733000 }, { "epoch": 71.59, "learning_rate": 1.421617526167099e-05, "loss": 1.8944, "step": 24733500 }, { "epoch": 71.59, "learning_rate": 1.4215451614023715e-05, "loss": 1.9047, "step": 24734000 }, { "epoch": 71.6, "learning_rate": 1.4214727966376437e-05, "loss": 1.8864, "step": 24734500 }, { "epoch": 71.6, "learning_rate": 1.421400431872916e-05, "loss": 1.8651, "step": 24735000 }, { "epoch": 71.6, "learning_rate": 1.4213280671081883e-05, "loss": 1.9005, "step": 24735500 }, { "epoch": 71.6, "learning_rate": 1.4212557023434606e-05, "loss": 1.9071, "step": 24736000 }, { "epoch": 71.6, "learning_rate": 1.4211834823082623e-05, "loss": 1.8832, "step": 24736500 }, { "epoch": 71.6, "learning_rate": 1.4211111175435347e-05, "loss": 1.8773, "step": 24737000 }, { "epoch": 71.6, "learning_rate": 1.4210387527788072e-05, "loss": 1.8836, "step": 24737500 }, { "epoch": 71.61, "learning_rate": 1.4209663880140795e-05, "loss": 1.8646, "step": 24738000 }, { "epoch": 71.61, "learning_rate": 1.4208940232493517e-05, "loss": 1.8723, "step": 24738500 }, { "epoch": 71.61, "learning_rate": 1.4208218032141534e-05, "loss": 1.8848, "step": 24739000 }, { "epoch": 71.61, "learning_rate": 1.4207494384494258e-05, "loss": 1.884, "step": 24739500 }, { "epoch": 71.61, "learning_rate": 1.420677073684698e-05, "loss": 1.8682, "step": 24740000 }, { "epoch": 71.61, "learning_rate": 1.4206047089199702e-05, "loss": 1.8704, "step": 24740500 }, { "epoch": 71.62, "learning_rate": 1.4205323441552428e-05, "loss": 1.8856, "step": 24741000 }, { "epoch": 71.62, "learning_rate": 1.4204601241200447e-05, "loss": 1.9022, "step": 24741500 }, { "epoch": 71.62, "learning_rate": 1.420387759355317e-05, "loss": 1.8731, "step": 24742000 }, { "epoch": 71.62, "learning_rate": 1.4203153945905892e-05, "loss": 1.9163, "step": 24742500 }, { "epoch": 71.62, "learning_rate": 1.4202430298258615e-05, "loss": 1.8893, "step": 24743000 }, { "epoch": 71.62, "learning_rate": 1.4201706650611338e-05, "loss": 1.8811, "step": 24743500 }, { "epoch": 71.62, "learning_rate": 1.420098300296406e-05, "loss": 1.8748, "step": 24744000 }, { "epoch": 71.63, "learning_rate": 1.4200260802612079e-05, "loss": 1.8957, "step": 24744500 }, { "epoch": 71.63, "learning_rate": 1.4199537154964804e-05, "loss": 1.8737, "step": 24745000 }, { "epoch": 71.63, "learning_rate": 1.4198813507317527e-05, "loss": 1.8921, "step": 24745500 }, { "epoch": 71.63, "learning_rate": 1.4198089859670249e-05, "loss": 1.8769, "step": 24746000 }, { "epoch": 71.63, "learning_rate": 1.4197366212022973e-05, "loss": 1.8749, "step": 24746500 }, { "epoch": 71.63, "learning_rate": 1.4196642564375695e-05, "loss": 1.8711, "step": 24747000 }, { "epoch": 71.63, "learning_rate": 1.4195921811319007e-05, "loss": 1.8933, "step": 24747500 }, { "epoch": 71.64, "learning_rate": 1.419519816367173e-05, "loss": 1.924, "step": 24748000 }, { "epoch": 71.64, "learning_rate": 1.4194474516024454e-05, "loss": 1.8976, "step": 24748500 }, { "epoch": 71.64, "learning_rate": 1.4193750868377179e-05, "loss": 1.8812, "step": 24749000 }, { "epoch": 71.64, "learning_rate": 1.4193027220729901e-05, "loss": 1.8781, "step": 24749500 }, { "epoch": 71.64, "learning_rate": 1.4192303573082624e-05, "loss": 1.8805, "step": 24750000 }, { "epoch": 71.64, "learning_rate": 1.4191579925435348e-05, "loss": 1.8651, "step": 24750500 }, { "epoch": 71.64, "learning_rate": 1.419085627778807e-05, "loss": 1.9103, "step": 24751000 }, { "epoch": 71.65, "learning_rate": 1.4190132630140792e-05, "loss": 1.9052, "step": 24751500 }, { "epoch": 71.65, "learning_rate": 1.4189410429788811e-05, "loss": 1.8893, "step": 24752000 }, { "epoch": 71.65, "learning_rate": 1.4188686782141537e-05, "loss": 1.8766, "step": 24752500 }, { "epoch": 71.65, "learning_rate": 1.4187963134494259e-05, "loss": 1.8785, "step": 24753000 }, { "epoch": 71.65, "learning_rate": 1.4187239486846981e-05, "loss": 1.8648, "step": 24753500 }, { "epoch": 71.65, "learning_rate": 1.4186517286494998e-05, "loss": 1.9069, "step": 24754000 }, { "epoch": 71.65, "learning_rate": 1.4185793638847722e-05, "loss": 1.8856, "step": 24754500 }, { "epoch": 71.66, "learning_rate": 1.4185069991200444e-05, "loss": 1.8611, "step": 24755000 }, { "epoch": 71.66, "learning_rate": 1.4184346343553168e-05, "loss": 1.8885, "step": 24755500 }, { "epoch": 71.66, "learning_rate": 1.418362269590589e-05, "loss": 1.922, "step": 24756000 }, { "epoch": 71.66, "learning_rate": 1.4182899048258616e-05, "loss": 1.8972, "step": 24756500 }, { "epoch": 71.66, "learning_rate": 1.4182175400611338e-05, "loss": 1.8755, "step": 24757000 }, { "epoch": 71.66, "learning_rate": 1.4181451752964062e-05, "loss": 1.8806, "step": 24757500 }, { "epoch": 71.66, "learning_rate": 1.418072955261208e-05, "loss": 1.9101, "step": 24758000 }, { "epoch": 71.67, "learning_rate": 1.4180005904964802e-05, "loss": 1.8792, "step": 24758500 }, { "epoch": 71.67, "learning_rate": 1.4179282257317524e-05, "loss": 1.8914, "step": 24759000 }, { "epoch": 71.67, "learning_rate": 1.4178558609670248e-05, "loss": 1.8813, "step": 24759500 }, { "epoch": 71.67, "learning_rate": 1.4177834962022974e-05, "loss": 1.8837, "step": 24760000 }, { "epoch": 71.67, "learning_rate": 1.4177111314375696e-05, "loss": 1.9115, "step": 24760500 }, { "epoch": 71.67, "learning_rate": 1.417638766672842e-05, "loss": 1.8977, "step": 24761000 }, { "epoch": 71.67, "learning_rate": 1.4175664019081142e-05, "loss": 1.8872, "step": 24761500 }, { "epoch": 71.68, "learning_rate": 1.4174940371433864e-05, "loss": 1.898, "step": 24762000 }, { "epoch": 71.68, "learning_rate": 1.4174218171081882e-05, "loss": 1.8882, "step": 24762500 }, { "epoch": 71.68, "learning_rate": 1.41734959707299e-05, "loss": 1.8724, "step": 24763000 }, { "epoch": 71.68, "learning_rate": 1.4172772323082623e-05, "loss": 1.8808, "step": 24763500 }, { "epoch": 71.68, "learning_rate": 1.4172048675435348e-05, "loss": 1.8981, "step": 24764000 }, { "epoch": 71.68, "learning_rate": 1.417132502778807e-05, "loss": 1.8724, "step": 24764500 }, { "epoch": 71.68, "learning_rate": 1.4170601380140794e-05, "loss": 1.887, "step": 24765000 }, { "epoch": 71.69, "learning_rate": 1.4169879179788812e-05, "loss": 1.8841, "step": 24765500 }, { "epoch": 71.69, "learning_rate": 1.4169155532141534e-05, "loss": 1.8761, "step": 24766000 }, { "epoch": 71.69, "learning_rate": 1.4168433331789551e-05, "loss": 1.8706, "step": 24766500 }, { "epoch": 71.69, "learning_rate": 1.4167709684142275e-05, "loss": 1.8769, "step": 24767000 }, { "epoch": 71.69, "learning_rate": 1.4166986036494997e-05, "loss": 1.8945, "step": 24767500 }, { "epoch": 71.69, "learning_rate": 1.4166262388847723e-05, "loss": 1.9085, "step": 24768000 }, { "epoch": 71.69, "learning_rate": 1.4165538741200445e-05, "loss": 1.8974, "step": 24768500 }, { "epoch": 71.7, "learning_rate": 1.416481509355317e-05, "loss": 1.8756, "step": 24769000 }, { "epoch": 71.7, "learning_rate": 1.4164092893201186e-05, "loss": 1.8761, "step": 24769500 }, { "epoch": 71.7, "learning_rate": 1.4163369245553909e-05, "loss": 1.8829, "step": 24770000 }, { "epoch": 71.7, "learning_rate": 1.4162645597906633e-05, "loss": 1.8831, "step": 24770500 }, { "epoch": 71.7, "learning_rate": 1.416192339755465e-05, "loss": 1.8925, "step": 24771000 }, { "epoch": 71.7, "learning_rate": 1.4161199749907375e-05, "loss": 1.8654, "step": 24771500 }, { "epoch": 71.7, "learning_rate": 1.4160476102260098e-05, "loss": 1.8996, "step": 24772000 }, { "epoch": 71.71, "learning_rate": 1.415975245461282e-05, "loss": 1.8856, "step": 24772500 }, { "epoch": 71.71, "learning_rate": 1.4159028806965544e-05, "loss": 1.8793, "step": 24773000 }, { "epoch": 71.71, "learning_rate": 1.4158305159318266e-05, "loss": 1.8789, "step": 24773500 }, { "epoch": 71.71, "learning_rate": 1.415758151167099e-05, "loss": 1.8609, "step": 24774000 }, { "epoch": 71.71, "learning_rate": 1.4156857864023712e-05, "loss": 1.8817, "step": 24774500 }, { "epoch": 71.71, "learning_rate": 1.4156134216376438e-05, "loss": 1.8792, "step": 24775000 }, { "epoch": 71.71, "learning_rate": 1.415541056872916e-05, "loss": 1.8951, "step": 24775500 }, { "epoch": 71.72, "learning_rate": 1.4154686921081884e-05, "loss": 1.8932, "step": 24776000 }, { "epoch": 71.72, "learning_rate": 1.4153963273434606e-05, "loss": 1.8911, "step": 24776500 }, { "epoch": 71.72, "learning_rate": 1.4153241073082624e-05, "loss": 1.9067, "step": 24777000 }, { "epoch": 71.72, "learning_rate": 1.4152517425435346e-05, "loss": 1.9156, "step": 24777500 }, { "epoch": 71.72, "learning_rate": 1.415179377778807e-05, "loss": 1.8746, "step": 24778000 }, { "epoch": 71.72, "learning_rate": 1.4151070130140792e-05, "loss": 1.8662, "step": 24778500 }, { "epoch": 71.73, "learning_rate": 1.4150346482493518e-05, "loss": 1.8693, "step": 24779000 }, { "epoch": 71.73, "learning_rate": 1.4149622834846241e-05, "loss": 1.8832, "step": 24779500 }, { "epoch": 71.73, "learning_rate": 1.4148899187198964e-05, "loss": 1.8912, "step": 24780000 }, { "epoch": 71.73, "learning_rate": 1.4148176986846981e-05, "loss": 1.8895, "step": 24780500 }, { "epoch": 71.73, "learning_rate": 1.4147453339199703e-05, "loss": 1.8925, "step": 24781000 }, { "epoch": 71.73, "learning_rate": 1.4146729691552427e-05, "loss": 1.8808, "step": 24781500 }, { "epoch": 71.73, "learning_rate": 1.414600604390515e-05, "loss": 1.885, "step": 24782000 }, { "epoch": 71.74, "learning_rate": 1.4145282396257875e-05, "loss": 1.859, "step": 24782500 }, { "epoch": 71.74, "learning_rate": 1.4144558748610597e-05, "loss": 1.877, "step": 24783000 }, { "epoch": 71.74, "learning_rate": 1.4143835100963321e-05, "loss": 1.905, "step": 24783500 }, { "epoch": 71.74, "learning_rate": 1.4143111453316043e-05, "loss": 1.8734, "step": 24784000 }, { "epoch": 71.74, "learning_rate": 1.414238925296406e-05, "loss": 1.916, "step": 24784500 }, { "epoch": 71.74, "learning_rate": 1.414166705261208e-05, "loss": 1.9112, "step": 24785000 }, { "epoch": 71.74, "learning_rate": 1.4140943404964802e-05, "loss": 1.8812, "step": 24785500 }, { "epoch": 71.75, "learning_rate": 1.4140219757317524e-05, "loss": 1.8896, "step": 24786000 }, { "epoch": 71.75, "learning_rate": 1.413949610967025e-05, "loss": 1.8992, "step": 24786500 }, { "epoch": 71.75, "learning_rate": 1.4138772462022974e-05, "loss": 1.8858, "step": 24787000 }, { "epoch": 71.75, "learning_rate": 1.4138048814375696e-05, "loss": 1.8772, "step": 24787500 }, { "epoch": 71.75, "learning_rate": 1.4137326614023713e-05, "loss": 1.8821, "step": 24788000 }, { "epoch": 71.75, "learning_rate": 1.4136602966376435e-05, "loss": 1.8722, "step": 24788500 }, { "epoch": 71.75, "learning_rate": 1.413587931872916e-05, "loss": 1.8583, "step": 24789000 }, { "epoch": 71.76, "learning_rate": 1.4135155671081881e-05, "loss": 1.8446, "step": 24789500 }, { "epoch": 71.76, "learning_rate": 1.4134432023434607e-05, "loss": 1.8881, "step": 24790000 }, { "epoch": 71.76, "learning_rate": 1.413370837578733e-05, "loss": 1.8903, "step": 24790500 }, { "epoch": 71.76, "learning_rate": 1.4132984728140053e-05, "loss": 1.9005, "step": 24791000 }, { "epoch": 71.76, "learning_rate": 1.4132261080492775e-05, "loss": 1.8849, "step": 24791500 }, { "epoch": 71.76, "learning_rate": 1.41315374328455e-05, "loss": 1.8606, "step": 24792000 }, { "epoch": 71.76, "learning_rate": 1.4130815232493517e-05, "loss": 1.8866, "step": 24792500 }, { "epoch": 71.77, "learning_rate": 1.4130091584846239e-05, "loss": 1.8654, "step": 24793000 }, { "epoch": 71.77, "learning_rate": 1.4129367937198964e-05, "loss": 1.8853, "step": 24793500 }, { "epoch": 71.77, "learning_rate": 1.4128644289551687e-05, "loss": 1.8719, "step": 24794000 }, { "epoch": 71.77, "learning_rate": 1.412792064190441e-05, "loss": 1.8801, "step": 24794500 }, { "epoch": 71.77, "learning_rate": 1.4127196994257133e-05, "loss": 1.8797, "step": 24795000 }, { "epoch": 71.77, "learning_rate": 1.412647479390515e-05, "loss": 1.8544, "step": 24795500 }, { "epoch": 71.77, "learning_rate": 1.4125751146257874e-05, "loss": 1.8921, "step": 24796000 }, { "epoch": 71.78, "learning_rate": 1.4125027498610596e-05, "loss": 1.8735, "step": 24796500 }, { "epoch": 71.78, "learning_rate": 1.4124303850963319e-05, "loss": 1.8729, "step": 24797000 }, { "epoch": 71.78, "learning_rate": 1.4123580203316044e-05, "loss": 1.8614, "step": 24797500 }, { "epoch": 71.78, "learning_rate": 1.4122856555668768e-05, "loss": 1.9062, "step": 24798000 }, { "epoch": 71.78, "learning_rate": 1.412213580261208e-05, "loss": 1.8488, "step": 24798500 }, { "epoch": 71.78, "learning_rate": 1.4121412154964803e-05, "loss": 1.8947, "step": 24799000 }, { "epoch": 71.78, "learning_rate": 1.4120688507317525e-05, "loss": 1.8951, "step": 24799500 }, { "epoch": 71.79, "learning_rate": 1.4119964859670249e-05, "loss": 1.8935, "step": 24800000 }, { "epoch": 71.79, "learning_rate": 1.4119241212022971e-05, "loss": 1.9156, "step": 24800500 }, { "epoch": 71.79, "learning_rate": 1.4118519011670988e-05, "loss": 1.9226, "step": 24801000 }, { "epoch": 71.79, "learning_rate": 1.4117795364023714e-05, "loss": 1.8789, "step": 24801500 }, { "epoch": 71.79, "learning_rate": 1.4117071716376438e-05, "loss": 1.8889, "step": 24802000 }, { "epoch": 71.79, "learning_rate": 1.411634806872916e-05, "loss": 1.9036, "step": 24802500 }, { "epoch": 71.79, "learning_rate": 1.4115625868377177e-05, "loss": 1.9061, "step": 24803000 }, { "epoch": 71.8, "learning_rate": 1.41149022207299e-05, "loss": 1.8806, "step": 24803500 }, { "epoch": 71.8, "learning_rate": 1.4114178573082623e-05, "loss": 1.8785, "step": 24804000 }, { "epoch": 71.8, "learning_rate": 1.4113454925435346e-05, "loss": 1.8992, "step": 24804500 }, { "epoch": 71.8, "learning_rate": 1.4112731277788071e-05, "loss": 1.8761, "step": 24805000 }, { "epoch": 71.8, "learning_rate": 1.4112007630140795e-05, "loss": 1.9059, "step": 24805500 }, { "epoch": 71.8, "learning_rate": 1.4111283982493517e-05, "loss": 1.8971, "step": 24806000 }, { "epoch": 71.8, "learning_rate": 1.411056033484624e-05, "loss": 1.8746, "step": 24806500 }, { "epoch": 71.81, "learning_rate": 1.4109836687198964e-05, "loss": 1.8934, "step": 24807000 }, { "epoch": 71.81, "learning_rate": 1.410911448684698e-05, "loss": 1.8827, "step": 24807500 }, { "epoch": 71.81, "learning_rate": 1.4108390839199703e-05, "loss": 1.8885, "step": 24808000 }, { "epoch": 71.81, "learning_rate": 1.4107667191552429e-05, "loss": 1.8657, "step": 24808500 }, { "epoch": 71.81, "learning_rate": 1.4106943543905151e-05, "loss": 1.899, "step": 24809000 }, { "epoch": 71.81, "learning_rate": 1.4106219896257875e-05, "loss": 1.8827, "step": 24809500 }, { "epoch": 71.81, "learning_rate": 1.4105496248610597e-05, "loss": 1.8844, "step": 24810000 }, { "epoch": 71.82, "learning_rate": 1.4104774048258614e-05, "loss": 1.9015, "step": 24810500 }, { "epoch": 71.82, "learning_rate": 1.4104050400611338e-05, "loss": 1.8893, "step": 24811000 }, { "epoch": 71.82, "learning_rate": 1.410332675296406e-05, "loss": 1.8993, "step": 24811500 }, { "epoch": 71.82, "learning_rate": 1.4102603105316783e-05, "loss": 1.9115, "step": 24812000 }, { "epoch": 71.82, "learning_rate": 1.4101879457669508e-05, "loss": 1.8789, "step": 24812500 }, { "epoch": 71.82, "learning_rate": 1.4101157257317527e-05, "loss": 1.8675, "step": 24813000 }, { "epoch": 71.82, "learning_rate": 1.410043360967025e-05, "loss": 1.8618, "step": 24813500 }, { "epoch": 71.83, "learning_rate": 1.4099709962022972e-05, "loss": 1.893, "step": 24814000 }, { "epoch": 71.83, "learning_rate": 1.4098986314375696e-05, "loss": 1.8807, "step": 24814500 }, { "epoch": 71.83, "learning_rate": 1.4098262666728418e-05, "loss": 1.9032, "step": 24815000 }, { "epoch": 71.83, "learning_rate": 1.4097540466376435e-05, "loss": 1.8869, "step": 24815500 }, { "epoch": 71.83, "learning_rate": 1.4096816818729159e-05, "loss": 1.8789, "step": 24816000 }, { "epoch": 71.83, "learning_rate": 1.4096093171081885e-05, "loss": 1.8897, "step": 24816500 }, { "epoch": 71.84, "learning_rate": 1.4095370970729902e-05, "loss": 1.8909, "step": 24817000 }, { "epoch": 71.84, "learning_rate": 1.4094647323082624e-05, "loss": 1.8919, "step": 24817500 }, { "epoch": 71.84, "learning_rate": 1.4093925122730641e-05, "loss": 1.8862, "step": 24818000 }, { "epoch": 71.84, "learning_rate": 1.4093201475083365e-05, "loss": 1.8891, "step": 24818500 }, { "epoch": 71.84, "learning_rate": 1.4092477827436088e-05, "loss": 1.8762, "step": 24819000 }, { "epoch": 71.84, "learning_rate": 1.409175417978881e-05, "loss": 1.925, "step": 24819500 }, { "epoch": 71.84, "learning_rate": 1.4091030532141535e-05, "loss": 1.8882, "step": 24820000 }, { "epoch": 71.85, "learning_rate": 1.409030688449426e-05, "loss": 1.9016, "step": 24820500 }, { "epoch": 71.85, "learning_rate": 1.4089583236846982e-05, "loss": 1.9078, "step": 24821000 }, { "epoch": 71.85, "learning_rate": 1.4088859589199704e-05, "loss": 1.8862, "step": 24821500 }, { "epoch": 71.85, "learning_rate": 1.4088135941552428e-05, "loss": 1.8945, "step": 24822000 }, { "epoch": 71.85, "learning_rate": 1.408741229390515e-05, "loss": 1.881, "step": 24822500 }, { "epoch": 71.85, "learning_rate": 1.4086688646257872e-05, "loss": 1.8925, "step": 24823000 }, { "epoch": 71.85, "learning_rate": 1.4085964998610598e-05, "loss": 1.8751, "step": 24823500 }, { "epoch": 71.86, "learning_rate": 1.4085241350963322e-05, "loss": 1.8879, "step": 24824000 }, { "epoch": 71.86, "learning_rate": 1.4084519150611339e-05, "loss": 1.9001, "step": 24824500 }, { "epoch": 71.86, "learning_rate": 1.4083795502964061e-05, "loss": 1.8972, "step": 24825000 }, { "epoch": 71.86, "learning_rate": 1.4083071855316785e-05, "loss": 1.8958, "step": 24825500 }, { "epoch": 71.86, "learning_rate": 1.4082348207669507e-05, "loss": 1.9053, "step": 24826000 }, { "epoch": 71.86, "learning_rate": 1.408162456002223e-05, "loss": 1.9051, "step": 24826500 }, { "epoch": 71.86, "learning_rate": 1.4080900912374954e-05, "loss": 1.8896, "step": 24827000 }, { "epoch": 71.87, "learning_rate": 1.408017726472768e-05, "loss": 1.8819, "step": 24827500 }, { "epoch": 71.87, "learning_rate": 1.4079453617080401e-05, "loss": 1.8796, "step": 24828000 }, { "epoch": 71.87, "learning_rate": 1.4078731416728419e-05, "loss": 1.8844, "step": 24828500 }, { "epoch": 71.87, "learning_rate": 1.4078007769081143e-05, "loss": 1.8616, "step": 24829000 }, { "epoch": 71.87, "learning_rate": 1.4077284121433865e-05, "loss": 1.86, "step": 24829500 }, { "epoch": 71.87, "learning_rate": 1.4076560473786587e-05, "loss": 1.9117, "step": 24830000 }, { "epoch": 71.87, "learning_rate": 1.4075836826139311e-05, "loss": 1.8907, "step": 24830500 }, { "epoch": 71.88, "learning_rate": 1.4075113178492037e-05, "loss": 1.8814, "step": 24831000 }, { "epoch": 71.88, "learning_rate": 1.4074389530844759e-05, "loss": 1.8909, "step": 24831500 }, { "epoch": 71.88, "learning_rate": 1.4073667330492776e-05, "loss": 1.9018, "step": 24832000 }, { "epoch": 71.88, "learning_rate": 1.4072945130140793e-05, "loss": 1.8894, "step": 24832500 }, { "epoch": 71.88, "learning_rate": 1.4072221482493517e-05, "loss": 1.8865, "step": 24833000 }, { "epoch": 71.88, "learning_rate": 1.407149783484624e-05, "loss": 1.8911, "step": 24833500 }, { "epoch": 71.88, "learning_rate": 1.4070774187198962e-05, "loss": 1.8781, "step": 24834000 }, { "epoch": 71.89, "learning_rate": 1.4070050539551686e-05, "loss": 1.8992, "step": 24834500 }, { "epoch": 71.89, "learning_rate": 1.4069326891904411e-05, "loss": 1.8814, "step": 24835000 }, { "epoch": 71.89, "learning_rate": 1.4068603244257134e-05, "loss": 1.8823, "step": 24835500 }, { "epoch": 71.89, "learning_rate": 1.4067879596609856e-05, "loss": 1.8915, "step": 24836000 }, { "epoch": 71.89, "learning_rate": 1.406715594896258e-05, "loss": 1.8951, "step": 24836500 }, { "epoch": 71.89, "learning_rate": 1.4066433748610597e-05, "loss": 1.8732, "step": 24837000 }, { "epoch": 71.89, "learning_rate": 1.406571010096332e-05, "loss": 1.8943, "step": 24837500 }, { "epoch": 71.9, "learning_rate": 1.4064986453316043e-05, "loss": 1.883, "step": 24838000 }, { "epoch": 71.9, "learning_rate": 1.4064262805668769e-05, "loss": 1.8977, "step": 24838500 }, { "epoch": 71.9, "learning_rate": 1.4063539158021491e-05, "loss": 1.9118, "step": 24839000 }, { "epoch": 71.9, "learning_rate": 1.4062815510374213e-05, "loss": 1.8844, "step": 24839500 }, { "epoch": 71.9, "learning_rate": 1.4062091862726937e-05, "loss": 1.8647, "step": 24840000 }, { "epoch": 71.9, "learning_rate": 1.4061369662374954e-05, "loss": 1.9022, "step": 24840500 }, { "epoch": 71.9, "learning_rate": 1.4060647462022972e-05, "loss": 1.8929, "step": 24841000 }, { "epoch": 71.91, "learning_rate": 1.4059923814375694e-05, "loss": 1.8927, "step": 24841500 }, { "epoch": 71.91, "learning_rate": 1.4059200166728418e-05, "loss": 1.8757, "step": 24842000 }, { "epoch": 71.91, "learning_rate": 1.4058476519081143e-05, "loss": 1.8699, "step": 24842500 }, { "epoch": 71.91, "learning_rate": 1.4057752871433866e-05, "loss": 1.861, "step": 24843000 }, { "epoch": 71.91, "learning_rate": 1.4057029223786588e-05, "loss": 1.8779, "step": 24843500 }, { "epoch": 71.91, "learning_rate": 1.4056305576139312e-05, "loss": 1.8863, "step": 24844000 }, { "epoch": 71.91, "learning_rate": 1.4055583375787329e-05, "loss": 1.9079, "step": 24844500 }, { "epoch": 71.92, "learning_rate": 1.4054859728140051e-05, "loss": 1.9071, "step": 24845000 }, { "epoch": 71.92, "learning_rate": 1.4054136080492775e-05, "loss": 1.8976, "step": 24845500 }, { "epoch": 71.92, "learning_rate": 1.4053412432845501e-05, "loss": 1.8703, "step": 24846000 }, { "epoch": 71.92, "learning_rate": 1.4052688785198223e-05, "loss": 1.8747, "step": 24846500 }, { "epoch": 71.92, "learning_rate": 1.405196658484624e-05, "loss": 1.8684, "step": 24847000 }, { "epoch": 71.92, "learning_rate": 1.4051242937198964e-05, "loss": 1.8718, "step": 24847500 }, { "epoch": 71.92, "learning_rate": 1.4050519289551687e-05, "loss": 1.8695, "step": 24848000 }, { "epoch": 71.93, "learning_rate": 1.4049795641904409e-05, "loss": 1.8771, "step": 24848500 }, { "epoch": 71.93, "learning_rate": 1.4049071994257133e-05, "loss": 1.887, "step": 24849000 }, { "epoch": 71.93, "learning_rate": 1.4048348346609858e-05, "loss": 1.8804, "step": 24849500 }, { "epoch": 71.93, "learning_rate": 1.404762469896258e-05, "loss": 1.8802, "step": 24850000 }, { "epoch": 71.93, "learning_rate": 1.4046902498610598e-05, "loss": 1.9156, "step": 24850500 }, { "epoch": 71.93, "learning_rate": 1.404617885096332e-05, "loss": 1.8719, "step": 24851000 }, { "epoch": 71.93, "learning_rate": 1.4045455203316044e-05, "loss": 1.898, "step": 24851500 }, { "epoch": 71.94, "learning_rate": 1.4044731555668766e-05, "loss": 1.8925, "step": 24852000 }, { "epoch": 71.94, "learning_rate": 1.404400790802149e-05, "loss": 1.8844, "step": 24852500 }, { "epoch": 71.94, "learning_rate": 1.4043284260374212e-05, "loss": 1.8817, "step": 24853000 }, { "epoch": 71.94, "learning_rate": 1.4042560612726938e-05, "loss": 1.8931, "step": 24853500 }, { "epoch": 71.94, "learning_rate": 1.404183696507966e-05, "loss": 1.8874, "step": 24854000 }, { "epoch": 71.94, "learning_rate": 1.4041113317432384e-05, "loss": 1.8682, "step": 24854500 }, { "epoch": 71.95, "learning_rate": 1.4040389669785106e-05, "loss": 1.9016, "step": 24855000 }, { "epoch": 71.95, "learning_rate": 1.4039667469433124e-05, "loss": 1.8906, "step": 24855500 }, { "epoch": 71.95, "learning_rate": 1.4038943821785846e-05, "loss": 1.8825, "step": 24856000 }, { "epoch": 71.95, "learning_rate": 1.403822017413857e-05, "loss": 1.8794, "step": 24856500 }, { "epoch": 71.95, "learning_rate": 1.4037496526491295e-05, "loss": 1.8867, "step": 24857000 }, { "epoch": 71.95, "learning_rate": 1.4036772878844018e-05, "loss": 1.8756, "step": 24857500 }, { "epoch": 71.95, "learning_rate": 1.4036050678492035e-05, "loss": 1.9, "step": 24858000 }, { "epoch": 71.96, "learning_rate": 1.4035327030844759e-05, "loss": 1.9016, "step": 24858500 }, { "epoch": 71.96, "learning_rate": 1.4034604830492776e-05, "loss": 1.8906, "step": 24859000 }, { "epoch": 71.96, "learning_rate": 1.4033881182845498e-05, "loss": 1.8642, "step": 24859500 }, { "epoch": 71.96, "learning_rate": 1.4033157535198222e-05, "loss": 1.8752, "step": 24860000 }, { "epoch": 71.96, "learning_rate": 1.4032433887550944e-05, "loss": 1.9147, "step": 24860500 }, { "epoch": 71.96, "learning_rate": 1.403171023990367e-05, "loss": 1.8641, "step": 24861000 }, { "epoch": 71.96, "learning_rate": 1.4030986592256392e-05, "loss": 1.8899, "step": 24861500 }, { "epoch": 71.97, "learning_rate": 1.4030262944609116e-05, "loss": 1.858, "step": 24862000 }, { "epoch": 71.97, "learning_rate": 1.4029540744257133e-05, "loss": 1.8832, "step": 24862500 }, { "epoch": 71.97, "learning_rate": 1.4028817096609856e-05, "loss": 1.8855, "step": 24863000 }, { "epoch": 71.97, "learning_rate": 1.4028093448962578e-05, "loss": 1.8685, "step": 24863500 }, { "epoch": 71.97, "learning_rate": 1.4027369801315302e-05, "loss": 1.8748, "step": 24864000 }, { "epoch": 71.97, "learning_rate": 1.4026646153668027e-05, "loss": 1.8782, "step": 24864500 }, { "epoch": 71.97, "learning_rate": 1.402592250602075e-05, "loss": 1.905, "step": 24865000 }, { "epoch": 71.98, "learning_rate": 1.4025198858373474e-05, "loss": 1.8841, "step": 24865500 }, { "epoch": 71.98, "learning_rate": 1.4024475210726196e-05, "loss": 1.8816, "step": 24866000 }, { "epoch": 71.98, "learning_rate": 1.4023753010374213e-05, "loss": 1.8704, "step": 24866500 }, { "epoch": 71.98, "learning_rate": 1.4023029362726935e-05, "loss": 1.8972, "step": 24867000 }, { "epoch": 71.98, "learning_rate": 1.402230571507966e-05, "loss": 1.898, "step": 24867500 }, { "epoch": 71.98, "learning_rate": 1.4021582067432382e-05, "loss": 1.8637, "step": 24868000 }, { "epoch": 71.98, "learning_rate": 1.4020858419785107e-05, "loss": 1.8599, "step": 24868500 }, { "epoch": 71.99, "learning_rate": 1.402013477213783e-05, "loss": 1.8968, "step": 24869000 }, { "epoch": 71.99, "learning_rate": 1.4019411124490553e-05, "loss": 1.899, "step": 24869500 }, { "epoch": 71.99, "learning_rate": 1.4018687476843276e-05, "loss": 1.8986, "step": 24870000 }, { "epoch": 71.99, "learning_rate": 1.4017965276491293e-05, "loss": 1.8976, "step": 24870500 }, { "epoch": 71.99, "learning_rate": 1.4017241628844017e-05, "loss": 1.8898, "step": 24871000 }, { "epoch": 71.99, "learning_rate": 1.4016517981196739e-05, "loss": 1.9014, "step": 24871500 }, { "epoch": 71.99, "learning_rate": 1.4015794333549465e-05, "loss": 1.903, "step": 24872000 }, { "epoch": 72.0, "learning_rate": 1.4015070685902187e-05, "loss": 1.8719, "step": 24872500 }, { "epoch": 72.0, "learning_rate": 1.4014348485550206e-05, "loss": 1.8914, "step": 24873000 }, { "epoch": 72.0, "learning_rate": 1.4013624837902928e-05, "loss": 1.9047, "step": 24873500 }, { "epoch": 72.0, "eval_accuracy": 0.6825389571649303, "eval_accuracy_mlm": 0.6505490265045512, "eval_accuracy_nsp": 0.8542735101492716, "eval_loss": 2.159640312194824, "eval_runtime": 331.9377, "eval_samples_per_second": 1314.662, "eval_steps_per_second": 54.778, "step": 24873984 }, { "epoch": 72.0, "learning_rate": 1.401290119025565e-05, "loss": 1.8871, "step": 24874000 }, { "epoch": 72.0, "learning_rate": 1.4012177542608374e-05, "loss": 1.8634, "step": 24874500 }, { "epoch": 72.0, "learning_rate": 1.4011455342256391e-05, "loss": 1.8705, "step": 24875000 }, { "epoch": 72.0, "learning_rate": 1.4010731694609114e-05, "loss": 1.8847, "step": 24875500 }, { "epoch": 72.01, "learning_rate": 1.401000804696184e-05, "loss": 1.8534, "step": 24876000 }, { "epoch": 72.01, "learning_rate": 1.4009284399314561e-05, "loss": 1.8673, "step": 24876500 }, { "epoch": 72.01, "learning_rate": 1.4008560751667285e-05, "loss": 1.8791, "step": 24877000 }, { "epoch": 72.01, "learning_rate": 1.4007837104020008e-05, "loss": 1.8621, "step": 24877500 }, { "epoch": 72.01, "learning_rate": 1.4007113456372732e-05, "loss": 1.8511, "step": 24878000 }, { "epoch": 72.01, "learning_rate": 1.4006392703316044e-05, "loss": 1.8884, "step": 24878500 }, { "epoch": 72.01, "learning_rate": 1.4005669055668766e-05, "loss": 1.8857, "step": 24879000 }, { "epoch": 72.02, "learning_rate": 1.4004945408021492e-05, "loss": 1.8587, "step": 24879500 }, { "epoch": 72.02, "learning_rate": 1.4004221760374214e-05, "loss": 1.8599, "step": 24880000 }, { "epoch": 72.02, "learning_rate": 1.4003498112726938e-05, "loss": 1.8611, "step": 24880500 }, { "epoch": 72.02, "learning_rate": 1.400277446507966e-05, "loss": 1.8707, "step": 24881000 }, { "epoch": 72.02, "learning_rate": 1.4002050817432382e-05, "loss": 1.863, "step": 24881500 }, { "epoch": 72.02, "learning_rate": 1.4001327169785106e-05, "loss": 1.8595, "step": 24882000 }, { "epoch": 72.02, "learning_rate": 1.4000604969433123e-05, "loss": 1.8706, "step": 24882500 }, { "epoch": 72.03, "learning_rate": 1.3999881321785846e-05, "loss": 1.8959, "step": 24883000 }, { "epoch": 72.03, "learning_rate": 1.3999157674138571e-05, "loss": 1.8828, "step": 24883500 }, { "epoch": 72.03, "learning_rate": 1.3998434026491295e-05, "loss": 1.8807, "step": 24884000 }, { "epoch": 72.03, "learning_rate": 1.3997710378844018e-05, "loss": 1.8926, "step": 24884500 }, { "epoch": 72.03, "learning_rate": 1.399698673119674e-05, "loss": 1.8784, "step": 24885000 }, { "epoch": 72.03, "learning_rate": 1.3996263083549464e-05, "loss": 1.8672, "step": 24885500 }, { "epoch": 72.03, "learning_rate": 1.3995539435902186e-05, "loss": 1.8868, "step": 24886000 }, { "epoch": 72.04, "learning_rate": 1.3994815788254908e-05, "loss": 1.8977, "step": 24886500 }, { "epoch": 72.04, "learning_rate": 1.3994092140607634e-05, "loss": 1.8788, "step": 24887000 }, { "epoch": 72.04, "learning_rate": 1.3993368492960358e-05, "loss": 1.8851, "step": 24887500 }, { "epoch": 72.04, "learning_rate": 1.399264484531308e-05, "loss": 1.897, "step": 24888000 }, { "epoch": 72.04, "learning_rate": 1.3991922644961097e-05, "loss": 1.8768, "step": 24888500 }, { "epoch": 72.04, "learning_rate": 1.399119899731382e-05, "loss": 1.8577, "step": 24889000 }, { "epoch": 72.04, "learning_rate": 1.3990475349666543e-05, "loss": 1.8748, "step": 24889500 }, { "epoch": 72.05, "learning_rate": 1.398975314931456e-05, "loss": 1.8639, "step": 24890000 }, { "epoch": 72.05, "learning_rate": 1.3989029501667286e-05, "loss": 1.8649, "step": 24890500 }, { "epoch": 72.05, "learning_rate": 1.3988305854020008e-05, "loss": 1.8843, "step": 24891000 }, { "epoch": 72.05, "learning_rate": 1.3987582206372732e-05, "loss": 1.8602, "step": 24891500 }, { "epoch": 72.05, "learning_rate": 1.3986858558725455e-05, "loss": 1.9129, "step": 24892000 }, { "epoch": 72.05, "learning_rate": 1.3986134911078177e-05, "loss": 1.8772, "step": 24892500 }, { "epoch": 72.06, "learning_rate": 1.39854112634309e-05, "loss": 1.8649, "step": 24893000 }, { "epoch": 72.06, "learning_rate": 1.3984689063078918e-05, "loss": 1.8784, "step": 24893500 }, { "epoch": 72.06, "learning_rate": 1.398396541543164e-05, "loss": 1.9203, "step": 24894000 }, { "epoch": 72.06, "learning_rate": 1.3983243215079661e-05, "loss": 1.8729, "step": 24894500 }, { "epoch": 72.06, "learning_rate": 1.3982519567432383e-05, "loss": 1.9017, "step": 24895000 }, { "epoch": 72.06, "learning_rate": 1.3981795919785107e-05, "loss": 1.8728, "step": 24895500 }, { "epoch": 72.06, "learning_rate": 1.398107227213783e-05, "loss": 1.8608, "step": 24896000 }, { "epoch": 72.07, "learning_rate": 1.3980350071785847e-05, "loss": 1.8572, "step": 24896500 }, { "epoch": 72.07, "learning_rate": 1.397962642413857e-05, "loss": 1.8858, "step": 24897000 }, { "epoch": 72.07, "learning_rate": 1.3978902776491293e-05, "loss": 1.8913, "step": 24897500 }, { "epoch": 72.07, "learning_rate": 1.3978179128844015e-05, "loss": 1.8969, "step": 24898000 }, { "epoch": 72.07, "learning_rate": 1.397745548119674e-05, "loss": 1.8557, "step": 24898500 }, { "epoch": 72.07, "learning_rate": 1.3976731833549464e-05, "loss": 1.8702, "step": 24899000 }, { "epoch": 72.07, "learning_rate": 1.3976008185902187e-05, "loss": 1.8878, "step": 24899500 }, { "epoch": 72.08, "learning_rate": 1.3975284538254909e-05, "loss": 1.8901, "step": 24900000 }, { "epoch": 72.08, "learning_rate": 1.3974562337902928e-05, "loss": 1.914, "step": 24900500 }, { "epoch": 72.08, "learning_rate": 1.397383869025565e-05, "loss": 1.888, "step": 24901000 }, { "epoch": 72.08, "learning_rate": 1.3973115042608372e-05, "loss": 1.8777, "step": 24901500 }, { "epoch": 72.08, "learning_rate": 1.3972391394961098e-05, "loss": 1.8687, "step": 24902000 }, { "epoch": 72.08, "learning_rate": 1.3971667747313822e-05, "loss": 1.8577, "step": 24902500 }, { "epoch": 72.08, "learning_rate": 1.3970944099666544e-05, "loss": 1.8495, "step": 24903000 }, { "epoch": 72.09, "learning_rate": 1.3970220452019266e-05, "loss": 1.8829, "step": 24903500 }, { "epoch": 72.09, "learning_rate": 1.3969499698962579e-05, "loss": 1.8697, "step": 24904000 }, { "epoch": 72.09, "learning_rate": 1.3968776051315303e-05, "loss": 1.874, "step": 24904500 }, { "epoch": 72.09, "learning_rate": 1.3968052403668025e-05, "loss": 1.8813, "step": 24905000 }, { "epoch": 72.09, "learning_rate": 1.3967328756020747e-05, "loss": 1.8688, "step": 24905500 }, { "epoch": 72.09, "learning_rate": 1.3966605108373473e-05, "loss": 1.8797, "step": 24906000 }, { "epoch": 72.09, "learning_rate": 1.3965881460726197e-05, "loss": 1.8593, "step": 24906500 }, { "epoch": 72.1, "learning_rate": 1.3965157813078919e-05, "loss": 1.8614, "step": 24907000 }, { "epoch": 72.1, "learning_rate": 1.3964434165431641e-05, "loss": 1.871, "step": 24907500 }, { "epoch": 72.1, "learning_rate": 1.3963710517784365e-05, "loss": 1.8667, "step": 24908000 }, { "epoch": 72.1, "learning_rate": 1.3962986870137087e-05, "loss": 1.8618, "step": 24908500 }, { "epoch": 72.1, "learning_rate": 1.3962263222489811e-05, "loss": 1.8688, "step": 24909000 }, { "epoch": 72.1, "learning_rate": 1.3961539574842537e-05, "loss": 1.8653, "step": 24909500 }, { "epoch": 72.1, "learning_rate": 1.3960815927195259e-05, "loss": 1.8728, "step": 24910000 }, { "epoch": 72.11, "learning_rate": 1.3960093726843276e-05, "loss": 1.8844, "step": 24910500 }, { "epoch": 72.11, "learning_rate": 1.3959370079195998e-05, "loss": 1.8921, "step": 24911000 }, { "epoch": 72.11, "learning_rate": 1.3958646431548722e-05, "loss": 1.8848, "step": 24911500 }, { "epoch": 72.11, "learning_rate": 1.3957922783901445e-05, "loss": 1.8691, "step": 24912000 }, { "epoch": 72.11, "learning_rate": 1.3957199136254167e-05, "loss": 1.9062, "step": 24912500 }, { "epoch": 72.11, "learning_rate": 1.3956475488606892e-05, "loss": 1.859, "step": 24913000 }, { "epoch": 72.11, "learning_rate": 1.3955751840959616e-05, "loss": 1.8923, "step": 24913500 }, { "epoch": 72.12, "learning_rate": 1.3955028193312339e-05, "loss": 1.8871, "step": 24914000 }, { "epoch": 72.12, "learning_rate": 1.3954304545665063e-05, "loss": 1.8724, "step": 24914500 }, { "epoch": 72.12, "learning_rate": 1.3953580898017785e-05, "loss": 1.8543, "step": 24915000 }, { "epoch": 72.12, "learning_rate": 1.3952858697665802e-05, "loss": 1.8871, "step": 24915500 }, { "epoch": 72.12, "learning_rate": 1.3952135050018524e-05, "loss": 1.903, "step": 24916000 }, { "epoch": 72.12, "learning_rate": 1.395141140237125e-05, "loss": 1.8943, "step": 24916500 }, { "epoch": 72.12, "learning_rate": 1.3950689202019269e-05, "loss": 1.89, "step": 24917000 }, { "epoch": 72.13, "learning_rate": 1.3949967001667286e-05, "loss": 1.886, "step": 24917500 }, { "epoch": 72.13, "learning_rate": 1.3949243354020008e-05, "loss": 1.8388, "step": 24918000 }, { "epoch": 72.13, "learning_rate": 1.394851970637273e-05, "loss": 1.8603, "step": 24918500 }, { "epoch": 72.13, "learning_rate": 1.3947796058725454e-05, "loss": 1.8765, "step": 24919000 }, { "epoch": 72.13, "learning_rate": 1.3947072411078177e-05, "loss": 1.8506, "step": 24919500 }, { "epoch": 72.13, "learning_rate": 1.3946348763430899e-05, "loss": 1.8683, "step": 24920000 }, { "epoch": 72.13, "learning_rate": 1.3945625115783625e-05, "loss": 1.8804, "step": 24920500 }, { "epoch": 72.14, "learning_rate": 1.3944901468136349e-05, "loss": 1.857, "step": 24921000 }, { "epoch": 72.14, "learning_rate": 1.394417782048907e-05, "loss": 1.8601, "step": 24921500 }, { "epoch": 72.14, "learning_rate": 1.3943454172841795e-05, "loss": 1.8698, "step": 24922000 }, { "epoch": 72.14, "learning_rate": 1.3942731972489812e-05, "loss": 1.8611, "step": 24922500 }, { "epoch": 72.14, "learning_rate": 1.3942008324842534e-05, "loss": 1.9166, "step": 24923000 }, { "epoch": 72.14, "learning_rate": 1.3941284677195256e-05, "loss": 1.8757, "step": 24923500 }, { "epoch": 72.14, "learning_rate": 1.3940561029547982e-05, "loss": 1.8535, "step": 24924000 }, { "epoch": 72.15, "learning_rate": 1.3939837381900706e-05, "loss": 1.8807, "step": 24924500 }, { "epoch": 72.15, "learning_rate": 1.3939113734253428e-05, "loss": 1.8837, "step": 24925000 }, { "epoch": 72.15, "learning_rate": 1.393839008660615e-05, "loss": 1.8656, "step": 24925500 }, { "epoch": 72.15, "learning_rate": 1.393766788625417e-05, "loss": 1.8741, "step": 24926000 }, { "epoch": 72.15, "learning_rate": 1.3936944238606892e-05, "loss": 1.8794, "step": 24926500 }, { "epoch": 72.15, "learning_rate": 1.3936220590959614e-05, "loss": 1.8801, "step": 24927000 }, { "epoch": 72.15, "learning_rate": 1.3935496943312338e-05, "loss": 1.8841, "step": 24927500 }, { "epoch": 72.16, "learning_rate": 1.3934773295665063e-05, "loss": 1.8652, "step": 24928000 }, { "epoch": 72.16, "learning_rate": 1.3934049648017786e-05, "loss": 1.8752, "step": 24928500 }, { "epoch": 72.16, "learning_rate": 1.3933326000370508e-05, "loss": 1.8604, "step": 24929000 }, { "epoch": 72.16, "learning_rate": 1.3932602352723232e-05, "loss": 1.8614, "step": 24929500 }, { "epoch": 72.16, "learning_rate": 1.3931878705075954e-05, "loss": 1.8756, "step": 24930000 }, { "epoch": 72.16, "learning_rate": 1.3931156504723971e-05, "loss": 1.8851, "step": 24930500 }, { "epoch": 72.17, "learning_rate": 1.3930432857076695e-05, "loss": 1.8955, "step": 24931000 }, { "epoch": 72.17, "learning_rate": 1.392970920942942e-05, "loss": 1.8699, "step": 24931500 }, { "epoch": 72.17, "learning_rate": 1.3928987009077438e-05, "loss": 1.8762, "step": 24932000 }, { "epoch": 72.17, "learning_rate": 1.392826336143016e-05, "loss": 1.8839, "step": 24932500 }, { "epoch": 72.17, "learning_rate": 1.3927539713782884e-05, "loss": 1.8584, "step": 24933000 }, { "epoch": 72.17, "learning_rate": 1.3926816066135606e-05, "loss": 1.8757, "step": 24933500 }, { "epoch": 72.17, "learning_rate": 1.3926092418488329e-05, "loss": 1.8784, "step": 24934000 }, { "epoch": 72.18, "learning_rate": 1.3925368770841053e-05, "loss": 1.8722, "step": 24934500 }, { "epoch": 72.18, "learning_rate": 1.392464657048907e-05, "loss": 1.8911, "step": 24935000 }, { "epoch": 72.18, "learning_rate": 1.392392437013709e-05, "loss": 1.8791, "step": 24935500 }, { "epoch": 72.18, "learning_rate": 1.3923200722489813e-05, "loss": 1.8982, "step": 24936000 }, { "epoch": 72.18, "learning_rate": 1.3922477074842535e-05, "loss": 1.8909, "step": 24936500 }, { "epoch": 72.18, "learning_rate": 1.3921753427195259e-05, "loss": 1.8989, "step": 24937000 }, { "epoch": 72.18, "learning_rate": 1.3921031226843276e-05, "loss": 1.8885, "step": 24937500 }, { "epoch": 72.19, "learning_rate": 1.3920307579195998e-05, "loss": 1.8746, "step": 24938000 }, { "epoch": 72.19, "learning_rate": 1.391958393154872e-05, "loss": 1.859, "step": 24938500 }, { "epoch": 72.19, "learning_rate": 1.3918860283901446e-05, "loss": 1.8681, "step": 24939000 }, { "epoch": 72.19, "learning_rate": 1.391813663625417e-05, "loss": 1.8645, "step": 24939500 }, { "epoch": 72.19, "learning_rate": 1.3917412988606892e-05, "loss": 1.8908, "step": 24940000 }, { "epoch": 72.19, "learning_rate": 1.3916689340959616e-05, "loss": 1.8697, "step": 24940500 }, { "epoch": 72.19, "learning_rate": 1.3915965693312339e-05, "loss": 1.8773, "step": 24941000 }, { "epoch": 72.2, "learning_rate": 1.391524204566506e-05, "loss": 1.8595, "step": 24941500 }, { "epoch": 72.2, "learning_rate": 1.3914518398017785e-05, "loss": 1.8835, "step": 24942000 }, { "epoch": 72.2, "learning_rate": 1.391379475037051e-05, "loss": 1.8768, "step": 24942500 }, { "epoch": 72.2, "learning_rate": 1.3913071102723233e-05, "loss": 1.8718, "step": 24943000 }, { "epoch": 72.2, "learning_rate": 1.391234890237125e-05, "loss": 1.884, "step": 24943500 }, { "epoch": 72.2, "learning_rate": 1.3911625254723972e-05, "loss": 1.8675, "step": 24944000 }, { "epoch": 72.2, "learning_rate": 1.3910901607076696e-05, "loss": 1.8712, "step": 24944500 }, { "epoch": 72.21, "learning_rate": 1.3910177959429418e-05, "loss": 1.85, "step": 24945000 }, { "epoch": 72.21, "learning_rate": 1.3909454311782142e-05, "loss": 1.8632, "step": 24945500 }, { "epoch": 72.21, "learning_rate": 1.390873211143016e-05, "loss": 1.8679, "step": 24946000 }, { "epoch": 72.21, "learning_rate": 1.3908008463782885e-05, "loss": 1.8616, "step": 24946500 }, { "epoch": 72.21, "learning_rate": 1.3907284816135607e-05, "loss": 1.8913, "step": 24947000 }, { "epoch": 72.21, "learning_rate": 1.390656116848833e-05, "loss": 1.8615, "step": 24947500 }, { "epoch": 72.21, "learning_rate": 1.3905837520841053e-05, "loss": 1.888, "step": 24948000 }, { "epoch": 72.22, "learning_rate": 1.3905113873193776e-05, "loss": 1.8956, "step": 24948500 }, { "epoch": 72.22, "learning_rate": 1.3904391672841793e-05, "loss": 1.8832, "step": 24949000 }, { "epoch": 72.22, "learning_rate": 1.3903668025194517e-05, "loss": 1.8814, "step": 24949500 }, { "epoch": 72.22, "learning_rate": 1.3902944377547242e-05, "loss": 1.8949, "step": 24950000 }, { "epoch": 72.22, "learning_rate": 1.3902220729899965e-05, "loss": 1.8788, "step": 24950500 }, { "epoch": 72.22, "learning_rate": 1.3901497082252687e-05, "loss": 1.8662, "step": 24951000 }, { "epoch": 72.22, "learning_rate": 1.390077343460541e-05, "loss": 1.876, "step": 24951500 }, { "epoch": 72.23, "learning_rate": 1.3900049786958133e-05, "loss": 1.8857, "step": 24952000 }, { "epoch": 72.23, "learning_rate": 1.389932758660615e-05, "loss": 1.8506, "step": 24952500 }, { "epoch": 72.23, "learning_rate": 1.3898603938958874e-05, "loss": 1.8743, "step": 24953000 }, { "epoch": 72.23, "learning_rate": 1.3897880291311596e-05, "loss": 1.8903, "step": 24953500 }, { "epoch": 72.23, "learning_rate": 1.3897158090959617e-05, "loss": 1.856, "step": 24954000 }, { "epoch": 72.23, "learning_rate": 1.389643444331234e-05, "loss": 1.869, "step": 24954500 }, { "epoch": 72.23, "learning_rate": 1.3895710795665062e-05, "loss": 1.8539, "step": 24955000 }, { "epoch": 72.24, "learning_rate": 1.3894987148017785e-05, "loss": 1.8558, "step": 24955500 }, { "epoch": 72.24, "learning_rate": 1.3894264947665803e-05, "loss": 1.8682, "step": 24956000 }, { "epoch": 72.24, "learning_rate": 1.3893541300018525e-05, "loss": 1.8798, "step": 24956500 }, { "epoch": 72.24, "learning_rate": 1.3892817652371249e-05, "loss": 1.8684, "step": 24957000 }, { "epoch": 72.24, "learning_rate": 1.3892094004723971e-05, "loss": 1.8592, "step": 24957500 }, { "epoch": 72.24, "learning_rate": 1.3891370357076697e-05, "loss": 1.8605, "step": 24958000 }, { "epoch": 72.24, "learning_rate": 1.3890646709429419e-05, "loss": 1.8812, "step": 24958500 }, { "epoch": 72.25, "learning_rate": 1.3889923061782143e-05, "loss": 1.8793, "step": 24959000 }, { "epoch": 72.25, "learning_rate": 1.3889199414134865e-05, "loss": 1.8676, "step": 24959500 }, { "epoch": 72.25, "learning_rate": 1.3888478661078177e-05, "loss": 1.8991, "step": 24960000 }, { "epoch": 72.25, "learning_rate": 1.38877550134309e-05, "loss": 1.8731, "step": 24960500 }, { "epoch": 72.25, "learning_rate": 1.3887031365783624e-05, "loss": 1.8958, "step": 24961000 }, { "epoch": 72.25, "learning_rate": 1.388630771813635e-05, "loss": 1.8634, "step": 24961500 }, { "epoch": 72.25, "learning_rate": 1.3885584070489071e-05, "loss": 1.8846, "step": 24962000 }, { "epoch": 72.26, "learning_rate": 1.3884861870137089e-05, "loss": 1.8756, "step": 24962500 }, { "epoch": 72.26, "learning_rate": 1.3884138222489813e-05, "loss": 1.8771, "step": 24963000 }, { "epoch": 72.26, "learning_rate": 1.3883414574842535e-05, "loss": 1.8946, "step": 24963500 }, { "epoch": 72.26, "learning_rate": 1.3882690927195257e-05, "loss": 1.8824, "step": 24964000 }, { "epoch": 72.26, "learning_rate": 1.3881968726843276e-05, "loss": 1.8743, "step": 24964500 }, { "epoch": 72.26, "learning_rate": 1.3881245079195998e-05, "loss": 1.8816, "step": 24965000 }, { "epoch": 72.26, "learning_rate": 1.3880521431548724e-05, "loss": 1.8836, "step": 24965500 }, { "epoch": 72.27, "learning_rate": 1.3879797783901446e-05, "loss": 1.8755, "step": 24966000 }, { "epoch": 72.27, "learning_rate": 1.387907413625417e-05, "loss": 1.8872, "step": 24966500 }, { "epoch": 72.27, "learning_rate": 1.3878350488606892e-05, "loss": 1.8663, "step": 24967000 }, { "epoch": 72.27, "learning_rate": 1.3877626840959615e-05, "loss": 1.8688, "step": 24967500 }, { "epoch": 72.27, "learning_rate": 1.3876903193312338e-05, "loss": 1.8572, "step": 24968000 }, { "epoch": 72.27, "learning_rate": 1.387617954566506e-05, "loss": 1.8948, "step": 24968500 }, { "epoch": 72.28, "learning_rate": 1.3875455898017786e-05, "loss": 1.8804, "step": 24969000 }, { "epoch": 72.28, "learning_rate": 1.3874732250370509e-05, "loss": 1.86, "step": 24969500 }, { "epoch": 72.28, "learning_rate": 1.3874008602723232e-05, "loss": 1.8877, "step": 24970000 }, { "epoch": 72.28, "learning_rate": 1.3873284955075955e-05, "loss": 1.8828, "step": 24970500 }, { "epoch": 72.28, "learning_rate": 1.3872562754723972e-05, "loss": 1.8845, "step": 24971000 }, { "epoch": 72.28, "learning_rate": 1.3871842001667284e-05, "loss": 1.8605, "step": 24971500 }, { "epoch": 72.28, "learning_rate": 1.3871118354020008e-05, "loss": 1.8806, "step": 24972000 }, { "epoch": 72.29, "learning_rate": 1.387039470637273e-05, "loss": 1.8669, "step": 24972500 }, { "epoch": 72.29, "learning_rate": 1.3869671058725456e-05, "loss": 1.8814, "step": 24973000 }, { "epoch": 72.29, "learning_rate": 1.3868948858373473e-05, "loss": 1.8573, "step": 24973500 }, { "epoch": 72.29, "learning_rate": 1.3868225210726195e-05, "loss": 1.8689, "step": 24974000 }, { "epoch": 72.29, "learning_rate": 1.386750156307892e-05, "loss": 1.8683, "step": 24974500 }, { "epoch": 72.29, "learning_rate": 1.3866777915431642e-05, "loss": 1.8656, "step": 24975000 }, { "epoch": 72.29, "learning_rate": 1.3866054267784364e-05, "loss": 1.8513, "step": 24975500 }, { "epoch": 72.3, "learning_rate": 1.3865330620137088e-05, "loss": 1.8692, "step": 24976000 }, { "epoch": 72.3, "learning_rate": 1.386460697248981e-05, "loss": 1.8699, "step": 24976500 }, { "epoch": 72.3, "learning_rate": 1.386388477213783e-05, "loss": 1.8895, "step": 24977000 }, { "epoch": 72.3, "learning_rate": 1.3863161124490553e-05, "loss": 1.8737, "step": 24977500 }, { "epoch": 72.3, "learning_rate": 1.3862437476843277e-05, "loss": 1.8704, "step": 24978000 }, { "epoch": 72.3, "learning_rate": 1.3861713829195999e-05, "loss": 1.8577, "step": 24978500 }, { "epoch": 72.3, "learning_rate": 1.3860990181548721e-05, "loss": 1.8753, "step": 24979000 }, { "epoch": 72.31, "learning_rate": 1.3860266533901445e-05, "loss": 1.8604, "step": 24979500 }, { "epoch": 72.31, "learning_rate": 1.3859542886254167e-05, "loss": 1.8796, "step": 24980000 }, { "epoch": 72.31, "learning_rate": 1.3858819238606893e-05, "loss": 1.8668, "step": 24980500 }, { "epoch": 72.31, "learning_rate": 1.3858095590959615e-05, "loss": 1.874, "step": 24981000 }, { "epoch": 72.31, "learning_rate": 1.3857373390607634e-05, "loss": 1.8788, "step": 24981500 }, { "epoch": 72.31, "learning_rate": 1.3856649742960356e-05, "loss": 1.8918, "step": 24982000 }, { "epoch": 72.31, "learning_rate": 1.3855926095313079e-05, "loss": 1.8745, "step": 24982500 }, { "epoch": 72.32, "learning_rate": 1.3855203894961098e-05, "loss": 1.9018, "step": 24983000 }, { "epoch": 72.32, "learning_rate": 1.385448024731382e-05, "loss": 1.8797, "step": 24983500 }, { "epoch": 72.32, "learning_rate": 1.3853756599666542e-05, "loss": 1.8862, "step": 24984000 }, { "epoch": 72.32, "learning_rate": 1.3853032952019268e-05, "loss": 1.9026, "step": 24984500 }, { "epoch": 72.32, "learning_rate": 1.3852309304371992e-05, "loss": 1.8437, "step": 24985000 }, { "epoch": 72.32, "learning_rate": 1.3851585656724714e-05, "loss": 1.8512, "step": 24985500 }, { "epoch": 72.32, "learning_rate": 1.3850862009077436e-05, "loss": 1.8433, "step": 24986000 }, { "epoch": 72.33, "learning_rate": 1.385013836143016e-05, "loss": 1.8706, "step": 24986500 }, { "epoch": 72.33, "learning_rate": 1.3849414713782882e-05, "loss": 1.8699, "step": 24987000 }, { "epoch": 72.33, "learning_rate": 1.3848691066135605e-05, "loss": 1.8934, "step": 24987500 }, { "epoch": 72.33, "learning_rate": 1.384796741848833e-05, "loss": 1.8775, "step": 24988000 }, { "epoch": 72.33, "learning_rate": 1.3847245218136347e-05, "loss": 1.8706, "step": 24988500 }, { "epoch": 72.33, "learning_rate": 1.3846521570489071e-05, "loss": 1.8811, "step": 24989000 }, { "epoch": 72.33, "learning_rate": 1.3845797922841794e-05, "loss": 1.8661, "step": 24989500 }, { "epoch": 72.34, "learning_rate": 1.3845074275194517e-05, "loss": 1.8797, "step": 24990000 }, { "epoch": 72.34, "learning_rate": 1.384435062754724e-05, "loss": 1.8847, "step": 24990500 }, { "epoch": 72.34, "learning_rate": 1.3843626979899962e-05, "loss": 1.9003, "step": 24991000 }, { "epoch": 72.34, "learning_rate": 1.3842903332252688e-05, "loss": 1.8707, "step": 24991500 }, { "epoch": 72.34, "learning_rate": 1.3842179684605412e-05, "loss": 1.8903, "step": 24992000 }, { "epoch": 72.34, "learning_rate": 1.3841456036958134e-05, "loss": 1.8541, "step": 24992500 }, { "epoch": 72.34, "learning_rate": 1.3840732389310856e-05, "loss": 1.8902, "step": 24993000 }, { "epoch": 72.35, "learning_rate": 1.384000874166358e-05, "loss": 1.8839, "step": 24993500 }, { "epoch": 72.35, "learning_rate": 1.3839285094016302e-05, "loss": 1.8999, "step": 24994000 }, { "epoch": 72.35, "learning_rate": 1.3838561446369024e-05, "loss": 1.856, "step": 24994500 }, { "epoch": 72.35, "learning_rate": 1.383783779872175e-05, "loss": 1.8726, "step": 24995000 }, { "epoch": 72.35, "learning_rate": 1.3837117045665062e-05, "loss": 1.9216, "step": 24995500 }, { "epoch": 72.35, "learning_rate": 1.3836393398017786e-05, "loss": 1.8675, "step": 24996000 }, { "epoch": 72.35, "learning_rate": 1.3835669750370508e-05, "loss": 1.8711, "step": 24996500 }, { "epoch": 72.36, "learning_rate": 1.383494610272323e-05, "loss": 1.8746, "step": 24997000 }, { "epoch": 72.36, "learning_rate": 1.3834222455075955e-05, "loss": 1.8924, "step": 24997500 }, { "epoch": 72.36, "learning_rate": 1.3833500254723972e-05, "loss": 1.8732, "step": 24998000 }, { "epoch": 72.36, "learning_rate": 1.3832776607076694e-05, "loss": 1.8742, "step": 24998500 }, { "epoch": 72.36, "learning_rate": 1.383205295942942e-05, "loss": 1.853, "step": 24999000 }, { "epoch": 72.36, "learning_rate": 1.3831329311782144e-05, "loss": 1.8977, "step": 24999500 }, { "epoch": 72.36, "learning_rate": 1.3830605664134866e-05, "loss": 1.8859, "step": 25000000 }, { "epoch": 72.37, "learning_rate": 1.3829882016487588e-05, "loss": 1.8775, "step": 25000500 }, { "epoch": 72.37, "learning_rate": 1.3829158368840312e-05, "loss": 1.891, "step": 25001000 }, { "epoch": 72.37, "learning_rate": 1.3828434721193034e-05, "loss": 1.8604, "step": 25001500 }, { "epoch": 72.37, "learning_rate": 1.3827712520841051e-05, "loss": 1.9073, "step": 25002000 }, { "epoch": 72.37, "learning_rate": 1.3826988873193777e-05, "loss": 1.8869, "step": 25002500 }, { "epoch": 72.37, "learning_rate": 1.3826266672841794e-05, "loss": 1.8958, "step": 25003000 }, { "epoch": 72.37, "learning_rate": 1.3825543025194518e-05, "loss": 1.8678, "step": 25003500 }, { "epoch": 72.38, "learning_rate": 1.382481937754724e-05, "loss": 1.8853, "step": 25004000 }, { "epoch": 72.38, "learning_rate": 1.3824095729899963e-05, "loss": 1.8905, "step": 25004500 }, { "epoch": 72.38, "learning_rate": 1.3823372082252687e-05, "loss": 1.8541, "step": 25005000 }, { "epoch": 72.38, "learning_rate": 1.3822648434605409e-05, "loss": 1.8809, "step": 25005500 }, { "epoch": 72.38, "learning_rate": 1.3821924786958131e-05, "loss": 1.8985, "step": 25006000 }, { "epoch": 72.38, "learning_rate": 1.3821201139310857e-05, "loss": 1.9013, "step": 25006500 }, { "epoch": 72.39, "learning_rate": 1.382047749166358e-05, "loss": 1.8464, "step": 25007000 }, { "epoch": 72.39, "learning_rate": 1.3819755291311598e-05, "loss": 1.9006, "step": 25007500 }, { "epoch": 72.39, "learning_rate": 1.381903164366432e-05, "loss": 1.8793, "step": 25008000 }, { "epoch": 72.39, "learning_rate": 1.3818307996017044e-05, "loss": 1.8801, "step": 25008500 }, { "epoch": 72.39, "learning_rate": 1.3817584348369766e-05, "loss": 1.8892, "step": 25009000 }, { "epoch": 72.39, "learning_rate": 1.3816860700722489e-05, "loss": 1.8518, "step": 25009500 }, { "epoch": 72.39, "learning_rate": 1.3816137053075214e-05, "loss": 1.8862, "step": 25010000 }, { "epoch": 72.4, "learning_rate": 1.3815414852723233e-05, "loss": 1.8659, "step": 25010500 }, { "epoch": 72.4, "learning_rate": 1.3814691205075955e-05, "loss": 1.8718, "step": 25011000 }, { "epoch": 72.4, "learning_rate": 1.3813967557428678e-05, "loss": 1.8866, "step": 25011500 }, { "epoch": 72.4, "learning_rate": 1.3813243909781402e-05, "loss": 1.8958, "step": 25012000 }, { "epoch": 72.4, "learning_rate": 1.3812521709429419e-05, "loss": 1.8867, "step": 25012500 }, { "epoch": 72.4, "learning_rate": 1.3811799509077436e-05, "loss": 1.8912, "step": 25013000 }, { "epoch": 72.4, "learning_rate": 1.3811075861430158e-05, "loss": 1.8552, "step": 25013500 }, { "epoch": 72.41, "learning_rate": 1.3810352213782884e-05, "loss": 1.871, "step": 25014000 }, { "epoch": 72.41, "learning_rate": 1.3809628566135608e-05, "loss": 1.8762, "step": 25014500 }, { "epoch": 72.41, "learning_rate": 1.380890491848833e-05, "loss": 1.8619, "step": 25015000 }, { "epoch": 72.41, "learning_rate": 1.3808181270841052e-05, "loss": 1.8742, "step": 25015500 }, { "epoch": 72.41, "learning_rate": 1.3807459070489071e-05, "loss": 1.8938, "step": 25016000 }, { "epoch": 72.41, "learning_rate": 1.3806735422841793e-05, "loss": 1.8748, "step": 25016500 }, { "epoch": 72.41, "learning_rate": 1.3806011775194516e-05, "loss": 1.8666, "step": 25017000 }, { "epoch": 72.42, "learning_rate": 1.3805288127547241e-05, "loss": 1.8813, "step": 25017500 }, { "epoch": 72.42, "learning_rate": 1.3804564479899965e-05, "loss": 1.8674, "step": 25018000 }, { "epoch": 72.42, "learning_rate": 1.3803840832252687e-05, "loss": 1.8958, "step": 25018500 }, { "epoch": 72.42, "learning_rate": 1.380311718460541e-05, "loss": 1.874, "step": 25019000 }, { "epoch": 72.42, "learning_rate": 1.3802394984253429e-05, "loss": 1.8812, "step": 25019500 }, { "epoch": 72.42, "learning_rate": 1.3801671336606151e-05, "loss": 1.8638, "step": 25020000 }, { "epoch": 72.42, "learning_rate": 1.3800947688958873e-05, "loss": 1.8799, "step": 25020500 }, { "epoch": 72.43, "learning_rate": 1.3800224041311597e-05, "loss": 1.8592, "step": 25021000 }, { "epoch": 72.43, "learning_rate": 1.3799500393664323e-05, "loss": 1.8989, "step": 25021500 }, { "epoch": 72.43, "learning_rate": 1.3798776746017045e-05, "loss": 1.892, "step": 25022000 }, { "epoch": 72.43, "learning_rate": 1.3798053098369767e-05, "loss": 1.8771, "step": 25022500 }, { "epoch": 72.43, "learning_rate": 1.3797329450722491e-05, "loss": 1.8828, "step": 25023000 }, { "epoch": 72.43, "learning_rate": 1.3796605803075213e-05, "loss": 1.8794, "step": 25023500 }, { "epoch": 72.43, "learning_rate": 1.379588360272323e-05, "loss": 1.8708, "step": 25024000 }, { "epoch": 72.44, "learning_rate": 1.3795159955075953e-05, "loss": 1.8459, "step": 25024500 }, { "epoch": 72.44, "learning_rate": 1.3794436307428678e-05, "loss": 1.8847, "step": 25025000 }, { "epoch": 72.44, "learning_rate": 1.3793712659781402e-05, "loss": 1.8766, "step": 25025500 }, { "epoch": 72.44, "learning_rate": 1.379299045942942e-05, "loss": 1.8826, "step": 25026000 }, { "epoch": 72.44, "learning_rate": 1.3792266811782142e-05, "loss": 1.862, "step": 25026500 }, { "epoch": 72.44, "learning_rate": 1.3791543164134866e-05, "loss": 1.8777, "step": 25027000 }, { "epoch": 72.44, "learning_rate": 1.3790819516487588e-05, "loss": 1.8874, "step": 25027500 }, { "epoch": 72.45, "learning_rate": 1.3790097316135605e-05, "loss": 1.8683, "step": 25028000 }, { "epoch": 72.45, "learning_rate": 1.3789373668488329e-05, "loss": 1.8939, "step": 25028500 }, { "epoch": 72.45, "learning_rate": 1.3788650020841055e-05, "loss": 1.9009, "step": 25029000 }, { "epoch": 72.45, "learning_rate": 1.3787926373193777e-05, "loss": 1.8641, "step": 25029500 }, { "epoch": 72.45, "learning_rate": 1.37872027255465e-05, "loss": 1.8901, "step": 25030000 }, { "epoch": 72.45, "learning_rate": 1.3786479077899223e-05, "loss": 1.8618, "step": 25030500 }, { "epoch": 72.45, "learning_rate": 1.3785755430251945e-05, "loss": 1.8724, "step": 25031000 }, { "epoch": 72.46, "learning_rate": 1.3785033229899963e-05, "loss": 1.8733, "step": 25031500 }, { "epoch": 72.46, "learning_rate": 1.378431102954798e-05, "loss": 1.8684, "step": 25032000 }, { "epoch": 72.46, "learning_rate": 1.3783587381900704e-05, "loss": 1.8812, "step": 25032500 }, { "epoch": 72.46, "learning_rate": 1.378286373425343e-05, "loss": 1.8827, "step": 25033000 }, { "epoch": 72.46, "learning_rate": 1.3782141533901447e-05, "loss": 1.8537, "step": 25033500 }, { "epoch": 72.46, "learning_rate": 1.3781417886254169e-05, "loss": 1.8816, "step": 25034000 }, { "epoch": 72.46, "learning_rate": 1.3780694238606893e-05, "loss": 1.8908, "step": 25034500 }, { "epoch": 72.47, "learning_rate": 1.3779970590959615e-05, "loss": 1.8565, "step": 25035000 }, { "epoch": 72.47, "learning_rate": 1.3779246943312337e-05, "loss": 1.8671, "step": 25035500 }, { "epoch": 72.47, "learning_rate": 1.3778523295665061e-05, "loss": 1.8792, "step": 25036000 }, { "epoch": 72.47, "learning_rate": 1.3777799648017787e-05, "loss": 1.8804, "step": 25036500 }, { "epoch": 72.47, "learning_rate": 1.3777076000370509e-05, "loss": 1.8778, "step": 25037000 }, { "epoch": 72.47, "learning_rate": 1.3776352352723231e-05, "loss": 1.8815, "step": 25037500 }, { "epoch": 72.47, "learning_rate": 1.3775630152371249e-05, "loss": 1.8732, "step": 25038000 }, { "epoch": 72.48, "learning_rate": 1.3774906504723973e-05, "loss": 1.8839, "step": 25038500 }, { "epoch": 72.48, "learning_rate": 1.3774182857076695e-05, "loss": 1.8804, "step": 25039000 }, { "epoch": 72.48, "learning_rate": 1.3773459209429419e-05, "loss": 1.8723, "step": 25039500 }, { "epoch": 72.48, "learning_rate": 1.3772735561782144e-05, "loss": 1.8738, "step": 25040000 }, { "epoch": 72.48, "learning_rate": 1.3772011914134867e-05, "loss": 1.8914, "step": 25040500 }, { "epoch": 72.48, "learning_rate": 1.3771288266487589e-05, "loss": 1.866, "step": 25041000 }, { "epoch": 72.48, "learning_rate": 1.3770564618840313e-05, "loss": 1.8999, "step": 25041500 }, { "epoch": 72.49, "learning_rate": 1.376984241848833e-05, "loss": 1.8662, "step": 25042000 }, { "epoch": 72.49, "learning_rate": 1.3769118770841052e-05, "loss": 1.858, "step": 25042500 }, { "epoch": 72.49, "learning_rate": 1.376839657048907e-05, "loss": 1.8744, "step": 25043000 }, { "epoch": 72.49, "learning_rate": 1.3767672922841793e-05, "loss": 1.8999, "step": 25043500 }, { "epoch": 72.49, "learning_rate": 1.3766949275194519e-05, "loss": 1.8853, "step": 25044000 }, { "epoch": 72.49, "learning_rate": 1.3766225627547241e-05, "loss": 1.877, "step": 25044500 }, { "epoch": 72.5, "learning_rate": 1.3765501979899963e-05, "loss": 1.8589, "step": 25045000 }, { "epoch": 72.5, "learning_rate": 1.3764778332252687e-05, "loss": 1.8725, "step": 25045500 }, { "epoch": 72.5, "learning_rate": 1.376405468460541e-05, "loss": 1.8711, "step": 25046000 }, { "epoch": 72.5, "learning_rate": 1.3763331036958132e-05, "loss": 1.8991, "step": 25046500 }, { "epoch": 72.5, "learning_rate": 1.376260883660615e-05, "loss": 1.8827, "step": 25047000 }, { "epoch": 72.5, "learning_rate": 1.3761885188958876e-05, "loss": 1.8866, "step": 25047500 }, { "epoch": 72.5, "learning_rate": 1.3761161541311599e-05, "loss": 1.8729, "step": 25048000 }, { "epoch": 72.51, "learning_rate": 1.3760437893664321e-05, "loss": 1.9098, "step": 25048500 }, { "epoch": 72.51, "learning_rate": 1.3759714246017045e-05, "loss": 1.8625, "step": 25049000 }, { "epoch": 72.51, "learning_rate": 1.3758992045665062e-05, "loss": 1.8564, "step": 25049500 }, { "epoch": 72.51, "learning_rate": 1.3758268398017784e-05, "loss": 1.8717, "step": 25050000 }, { "epoch": 72.51, "learning_rate": 1.3757544750370508e-05, "loss": 1.8528, "step": 25050500 }, { "epoch": 72.51, "learning_rate": 1.375682110272323e-05, "loss": 1.8895, "step": 25051000 }, { "epoch": 72.51, "learning_rate": 1.3756100349666543e-05, "loss": 1.8639, "step": 25051500 }, { "epoch": 72.52, "learning_rate": 1.3755376702019268e-05, "loss": 1.8971, "step": 25052000 }, { "epoch": 72.52, "learning_rate": 1.375465305437199e-05, "loss": 1.8871, "step": 25052500 }, { "epoch": 72.52, "learning_rate": 1.3753929406724714e-05, "loss": 1.879, "step": 25053000 }, { "epoch": 72.52, "learning_rate": 1.3753205759077437e-05, "loss": 1.8529, "step": 25053500 }, { "epoch": 72.52, "learning_rate": 1.3752482111430159e-05, "loss": 1.898, "step": 25054000 }, { "epoch": 72.52, "learning_rate": 1.3751758463782883e-05, "loss": 1.8854, "step": 25054500 }, { "epoch": 72.52, "learning_rate": 1.3751034816135605e-05, "loss": 1.8908, "step": 25055000 }, { "epoch": 72.53, "learning_rate": 1.3750312615783626e-05, "loss": 1.9036, "step": 25055500 }, { "epoch": 72.53, "learning_rate": 1.3749588968136348e-05, "loss": 1.8843, "step": 25056000 }, { "epoch": 72.53, "learning_rate": 1.374886532048907e-05, "loss": 1.8642, "step": 25056500 }, { "epoch": 72.53, "learning_rate": 1.374814312013709e-05, "loss": 1.8729, "step": 25057000 }, { "epoch": 72.53, "learning_rate": 1.3747420919785106e-05, "loss": 1.8694, "step": 25057500 }, { "epoch": 72.53, "learning_rate": 1.3746697272137829e-05, "loss": 1.8845, "step": 25058000 }, { "epoch": 72.53, "learning_rate": 1.3745973624490553e-05, "loss": 1.8816, "step": 25058500 }, { "epoch": 72.54, "learning_rate": 1.3745249976843275e-05, "loss": 1.8713, "step": 25059000 }, { "epoch": 72.54, "learning_rate": 1.3744526329196e-05, "loss": 1.8875, "step": 25059500 }, { "epoch": 72.54, "learning_rate": 1.3743802681548723e-05, "loss": 1.8803, "step": 25060000 }, { "epoch": 72.54, "learning_rate": 1.3743079033901447e-05, "loss": 1.8981, "step": 25060500 }, { "epoch": 72.54, "learning_rate": 1.3742355386254169e-05, "loss": 1.91, "step": 25061000 }, { "epoch": 72.54, "learning_rate": 1.3741631738606891e-05, "loss": 1.8671, "step": 25061500 }, { "epoch": 72.54, "learning_rate": 1.3740909538254908e-05, "loss": 1.9003, "step": 25062000 }, { "epoch": 72.55, "learning_rate": 1.3740185890607632e-05, "loss": 1.8812, "step": 25062500 }, { "epoch": 72.55, "learning_rate": 1.3739462242960358e-05, "loss": 1.8933, "step": 25063000 }, { "epoch": 72.55, "learning_rate": 1.373873859531308e-05, "loss": 1.8749, "step": 25063500 }, { "epoch": 72.55, "learning_rate": 1.3738016394961097e-05, "loss": 1.8899, "step": 25064000 }, { "epoch": 72.55, "learning_rate": 1.3737292747313821e-05, "loss": 1.8715, "step": 25064500 }, { "epoch": 72.55, "learning_rate": 1.3736569099666544e-05, "loss": 1.8846, "step": 25065000 }, { "epoch": 72.55, "learning_rate": 1.3735845452019266e-05, "loss": 1.8868, "step": 25065500 }, { "epoch": 72.56, "learning_rate": 1.373512180437199e-05, "loss": 1.8727, "step": 25066000 }, { "epoch": 72.56, "learning_rate": 1.3734398156724715e-05, "loss": 1.8718, "step": 25066500 }, { "epoch": 72.56, "learning_rate": 1.3733674509077438e-05, "loss": 1.8711, "step": 25067000 }, { "epoch": 72.56, "learning_rate": 1.373295086143016e-05, "loss": 1.8957, "step": 25067500 }, { "epoch": 72.56, "learning_rate": 1.3732227213782884e-05, "loss": 1.883, "step": 25068000 }, { "epoch": 72.56, "learning_rate": 1.3731503566135606e-05, "loss": 1.8717, "step": 25068500 }, { "epoch": 72.56, "learning_rate": 1.373077991848833e-05, "loss": 1.8789, "step": 25069000 }, { "epoch": 72.57, "learning_rate": 1.3730057718136347e-05, "loss": 1.8554, "step": 25069500 }, { "epoch": 72.57, "learning_rate": 1.372933407048907e-05, "loss": 1.8919, "step": 25070000 }, { "epoch": 72.57, "learning_rate": 1.3728610422841795e-05, "loss": 1.896, "step": 25070500 }, { "epoch": 72.57, "learning_rate": 1.3727886775194517e-05, "loss": 1.895, "step": 25071000 }, { "epoch": 72.57, "learning_rate": 1.3727163127547241e-05, "loss": 1.8659, "step": 25071500 }, { "epoch": 72.57, "learning_rate": 1.3726439479899963e-05, "loss": 1.861, "step": 25072000 }, { "epoch": 72.57, "learning_rate": 1.3725715832252686e-05, "loss": 1.8815, "step": 25072500 }, { "epoch": 72.58, "learning_rate": 1.372499218460541e-05, "loss": 1.8793, "step": 25073000 }, { "epoch": 72.58, "learning_rate": 1.3724268536958132e-05, "loss": 1.8824, "step": 25073500 }, { "epoch": 72.58, "learning_rate": 1.3723546336606152e-05, "loss": 1.8831, "step": 25074000 }, { "epoch": 72.58, "learning_rate": 1.372282413625417e-05, "loss": 1.872, "step": 25074500 }, { "epoch": 72.58, "learning_rate": 1.3722100488606892e-05, "loss": 1.892, "step": 25075000 }, { "epoch": 72.58, "learning_rate": 1.3721376840959616e-05, "loss": 1.8851, "step": 25075500 }, { "epoch": 72.58, "learning_rate": 1.3720653193312338e-05, "loss": 1.8833, "step": 25076000 }, { "epoch": 72.59, "learning_rate": 1.3719929545665062e-05, "loss": 1.8834, "step": 25076500 }, { "epoch": 72.59, "learning_rate": 1.3719205898017784e-05, "loss": 1.9023, "step": 25077000 }, { "epoch": 72.59, "learning_rate": 1.3718483697665801e-05, "loss": 1.8742, "step": 25077500 }, { "epoch": 72.59, "learning_rate": 1.3717760050018527e-05, "loss": 1.8621, "step": 25078000 }, { "epoch": 72.59, "learning_rate": 1.371703640237125e-05, "loss": 1.8994, "step": 25078500 }, { "epoch": 72.59, "learning_rate": 1.3716312754723973e-05, "loss": 1.8788, "step": 25079000 }, { "epoch": 72.59, "learning_rate": 1.3715589107076695e-05, "loss": 1.8817, "step": 25079500 }, { "epoch": 72.6, "learning_rate": 1.3714865459429418e-05, "loss": 1.889, "step": 25080000 }, { "epoch": 72.6, "learning_rate": 1.3714141811782142e-05, "loss": 1.8921, "step": 25080500 }, { "epoch": 72.6, "learning_rate": 1.3713418164134864e-05, "loss": 1.8976, "step": 25081000 }, { "epoch": 72.6, "learning_rate": 1.371269451648759e-05, "loss": 1.8816, "step": 25081500 }, { "epoch": 72.6, "learning_rate": 1.3711970868840313e-05, "loss": 1.8806, "step": 25082000 }, { "epoch": 72.6, "learning_rate": 1.371124866848833e-05, "loss": 1.8804, "step": 25082500 }, { "epoch": 72.61, "learning_rate": 1.3710526468136348e-05, "loss": 1.8359, "step": 25083000 }, { "epoch": 72.61, "learning_rate": 1.370980282048907e-05, "loss": 1.8891, "step": 25083500 }, { "epoch": 72.61, "learning_rate": 1.3709079172841794e-05, "loss": 1.8737, "step": 25084000 }, { "epoch": 72.61, "learning_rate": 1.3708355525194516e-05, "loss": 1.8654, "step": 25084500 }, { "epoch": 72.61, "learning_rate": 1.3707631877547242e-05, "loss": 1.8917, "step": 25085000 }, { "epoch": 72.61, "learning_rate": 1.3706908229899964e-05, "loss": 1.8747, "step": 25085500 }, { "epoch": 72.61, "learning_rate": 1.3706184582252688e-05, "loss": 1.8687, "step": 25086000 }, { "epoch": 72.62, "learning_rate": 1.3705462381900705e-05, "loss": 1.8819, "step": 25086500 }, { "epoch": 72.62, "learning_rate": 1.3704738734253428e-05, "loss": 1.8601, "step": 25087000 }, { "epoch": 72.62, "learning_rate": 1.3704015086606151e-05, "loss": 1.8854, "step": 25087500 }, { "epoch": 72.62, "learning_rate": 1.3703291438958874e-05, "loss": 1.8835, "step": 25088000 }, { "epoch": 72.62, "learning_rate": 1.3702567791311596e-05, "loss": 1.8549, "step": 25088500 }, { "epoch": 72.62, "learning_rate": 1.3701844143664322e-05, "loss": 1.8711, "step": 25089000 }, { "epoch": 72.62, "learning_rate": 1.3701120496017045e-05, "loss": 1.891, "step": 25089500 }, { "epoch": 72.63, "learning_rate": 1.3700396848369768e-05, "loss": 1.8963, "step": 25090000 }, { "epoch": 72.63, "learning_rate": 1.369967320072249e-05, "loss": 1.8599, "step": 25090500 }, { "epoch": 72.63, "learning_rate": 1.3698951000370507e-05, "loss": 1.893, "step": 25091000 }, { "epoch": 72.63, "learning_rate": 1.3698227352723231e-05, "loss": 1.9034, "step": 25091500 }, { "epoch": 72.63, "learning_rate": 1.3697503705075953e-05, "loss": 1.8472, "step": 25092000 }, { "epoch": 72.63, "learning_rate": 1.3696780057428679e-05, "loss": 1.8912, "step": 25092500 }, { "epoch": 72.63, "learning_rate": 1.3696056409781401e-05, "loss": 1.8733, "step": 25093000 }, { "epoch": 72.64, "learning_rate": 1.3695332762134125e-05, "loss": 1.9167, "step": 25093500 }, { "epoch": 72.64, "learning_rate": 1.3694609114486847e-05, "loss": 1.8613, "step": 25094000 }, { "epoch": 72.64, "learning_rate": 1.369388836143016e-05, "loss": 1.8938, "step": 25094500 }, { "epoch": 72.64, "learning_rate": 1.3693164713782884e-05, "loss": 1.8727, "step": 25095000 }, { "epoch": 72.64, "learning_rate": 1.3692441066135606e-05, "loss": 1.8657, "step": 25095500 }, { "epoch": 72.64, "learning_rate": 1.3691717418488328e-05, "loss": 1.8765, "step": 25096000 }, { "epoch": 72.64, "learning_rate": 1.3690993770841054e-05, "loss": 1.8799, "step": 25096500 }, { "epoch": 72.65, "learning_rate": 1.3690270123193778e-05, "loss": 1.8795, "step": 25097000 }, { "epoch": 72.65, "learning_rate": 1.36895464755465e-05, "loss": 1.8647, "step": 25097500 }, { "epoch": 72.65, "learning_rate": 1.3688822827899222e-05, "loss": 1.8844, "step": 25098000 }, { "epoch": 72.65, "learning_rate": 1.3688099180251946e-05, "loss": 1.8799, "step": 25098500 }, { "epoch": 72.65, "learning_rate": 1.3687375532604668e-05, "loss": 1.8653, "step": 25099000 }, { "epoch": 72.65, "learning_rate": 1.368665188495739e-05, "loss": 1.8699, "step": 25099500 }, { "epoch": 72.65, "learning_rate": 1.3685929684605411e-05, "loss": 1.8643, "step": 25100000 }, { "epoch": 72.66, "learning_rate": 1.3685206036958135e-05, "loss": 1.8901, "step": 25100500 }, { "epoch": 72.66, "learning_rate": 1.3684482389310857e-05, "loss": 1.8851, "step": 25101000 }, { "epoch": 72.66, "learning_rate": 1.368375874166358e-05, "loss": 1.8771, "step": 25101500 }, { "epoch": 72.66, "learning_rate": 1.3683035094016303e-05, "loss": 1.923, "step": 25102000 }, { "epoch": 72.66, "learning_rate": 1.368231289366432e-05, "loss": 1.8559, "step": 25102500 }, { "epoch": 72.66, "learning_rate": 1.3681589246017043e-05, "loss": 1.8823, "step": 25103000 }, { "epoch": 72.66, "learning_rate": 1.3680865598369765e-05, "loss": 1.8791, "step": 25103500 }, { "epoch": 72.67, "learning_rate": 1.368014195072249e-05, "loss": 1.862, "step": 25104000 }, { "epoch": 72.67, "learning_rate": 1.3679418303075215e-05, "loss": 1.8706, "step": 25104500 }, { "epoch": 72.67, "learning_rate": 1.3678694655427937e-05, "loss": 1.9089, "step": 25105000 }, { "epoch": 72.67, "learning_rate": 1.3677971007780659e-05, "loss": 1.8601, "step": 25105500 }, { "epoch": 72.67, "learning_rate": 1.3677248807428678e-05, "loss": 1.8803, "step": 25106000 }, { "epoch": 72.67, "learning_rate": 1.36765251597814e-05, "loss": 1.9106, "step": 25106500 }, { "epoch": 72.67, "learning_rate": 1.3675801512134123e-05, "loss": 1.8577, "step": 25107000 }, { "epoch": 72.68, "learning_rate": 1.3675079311782143e-05, "loss": 1.8547, "step": 25107500 }, { "epoch": 72.68, "learning_rate": 1.3674355664134867e-05, "loss": 1.8806, "step": 25108000 }, { "epoch": 72.68, "learning_rate": 1.3673633463782884e-05, "loss": 1.8715, "step": 25108500 }, { "epoch": 72.68, "learning_rate": 1.3672909816135607e-05, "loss": 1.868, "step": 25109000 }, { "epoch": 72.68, "learning_rate": 1.3672186168488329e-05, "loss": 1.8717, "step": 25109500 }, { "epoch": 72.68, "learning_rate": 1.3671462520841053e-05, "loss": 1.8727, "step": 25110000 }, { "epoch": 72.68, "learning_rate": 1.3670738873193775e-05, "loss": 1.8972, "step": 25110500 }, { "epoch": 72.69, "learning_rate": 1.3670015225546497e-05, "loss": 1.8907, "step": 25111000 }, { "epoch": 72.69, "learning_rate": 1.3669291577899223e-05, "loss": 1.8679, "step": 25111500 }, { "epoch": 72.69, "learning_rate": 1.3668567930251947e-05, "loss": 1.8722, "step": 25112000 }, { "epoch": 72.69, "learning_rate": 1.3667844282604669e-05, "loss": 1.8558, "step": 25112500 }, { "epoch": 72.69, "learning_rate": 1.3667120634957393e-05, "loss": 1.8733, "step": 25113000 }, { "epoch": 72.69, "learning_rate": 1.3666396987310115e-05, "loss": 1.864, "step": 25113500 }, { "epoch": 72.69, "learning_rate": 1.3665673339662837e-05, "loss": 1.886, "step": 25114000 }, { "epoch": 72.7, "learning_rate": 1.3664949692015561e-05, "loss": 1.9027, "step": 25114500 }, { "epoch": 72.7, "learning_rate": 1.3664226044368287e-05, "loss": 1.8703, "step": 25115000 }, { "epoch": 72.7, "learning_rate": 1.366350239672101e-05, "loss": 1.8788, "step": 25115500 }, { "epoch": 72.7, "learning_rate": 1.3662780196369026e-05, "loss": 1.8653, "step": 25116000 }, { "epoch": 72.7, "learning_rate": 1.3662056548721749e-05, "loss": 1.8732, "step": 25116500 }, { "epoch": 72.7, "learning_rate": 1.3661332901074473e-05, "loss": 1.8982, "step": 25117000 }, { "epoch": 72.7, "learning_rate": 1.3660609253427195e-05, "loss": 1.8546, "step": 25117500 }, { "epoch": 72.71, "learning_rate": 1.3659885605779919e-05, "loss": 1.8652, "step": 25118000 }, { "epoch": 72.71, "learning_rate": 1.3659161958132643e-05, "loss": 1.8916, "step": 25118500 }, { "epoch": 72.71, "learning_rate": 1.3658438310485367e-05, "loss": 1.8954, "step": 25119000 }, { "epoch": 72.71, "learning_rate": 1.3657714662838089e-05, "loss": 1.8791, "step": 25119500 }, { "epoch": 72.71, "learning_rate": 1.3656991015190813e-05, "loss": 1.8756, "step": 25120000 }, { "epoch": 72.71, "learning_rate": 1.3656267367543535e-05, "loss": 1.8865, "step": 25120500 }, { "epoch": 72.72, "learning_rate": 1.3655543719896257e-05, "loss": 1.885, "step": 25121000 }, { "epoch": 72.72, "learning_rate": 1.3654820072248981e-05, "loss": 1.8725, "step": 25121500 }, { "epoch": 72.72, "learning_rate": 1.3654097871897e-05, "loss": 1.8815, "step": 25122000 }, { "epoch": 72.72, "learning_rate": 1.3653374224249724e-05, "loss": 1.8602, "step": 25122500 }, { "epoch": 72.72, "learning_rate": 1.3652650576602446e-05, "loss": 1.869, "step": 25123000 }, { "epoch": 72.72, "learning_rate": 1.3651926928955169e-05, "loss": 1.8745, "step": 25123500 }, { "epoch": 72.72, "learning_rate": 1.3651203281307892e-05, "loss": 1.8578, "step": 25124000 }, { "epoch": 72.73, "learning_rate": 1.365048108095591e-05, "loss": 1.8957, "step": 25124500 }, { "epoch": 72.73, "learning_rate": 1.3649758880603927e-05, "loss": 1.8732, "step": 25125000 }, { "epoch": 72.73, "learning_rate": 1.3649035232956651e-05, "loss": 1.9137, "step": 25125500 }, { "epoch": 72.73, "learning_rate": 1.3648311585309376e-05, "loss": 1.8796, "step": 25126000 }, { "epoch": 72.73, "learning_rate": 1.3647587937662099e-05, "loss": 1.866, "step": 25126500 }, { "epoch": 72.73, "learning_rate": 1.3646864290014821e-05, "loss": 1.8954, "step": 25127000 }, { "epoch": 72.73, "learning_rate": 1.3646142089662838e-05, "loss": 1.8769, "step": 25127500 }, { "epoch": 72.74, "learning_rate": 1.3645418442015562e-05, "loss": 1.8708, "step": 25128000 }, { "epoch": 72.74, "learning_rate": 1.3644694794368284e-05, "loss": 1.8932, "step": 25128500 }, { "epoch": 72.74, "learning_rate": 1.3643971146721007e-05, "loss": 1.865, "step": 25129000 }, { "epoch": 72.74, "learning_rate": 1.3643248946369026e-05, "loss": 1.8926, "step": 25129500 }, { "epoch": 72.74, "learning_rate": 1.3642525298721751e-05, "loss": 1.8587, "step": 25130000 }, { "epoch": 72.74, "learning_rate": 1.3641801651074473e-05, "loss": 1.8954, "step": 25130500 }, { "epoch": 72.74, "learning_rate": 1.3641078003427196e-05, "loss": 1.9094, "step": 25131000 }, { "epoch": 72.75, "learning_rate": 1.364035435577992e-05, "loss": 1.8856, "step": 25131500 }, { "epoch": 72.75, "learning_rate": 1.3639630708132642e-05, "loss": 1.8938, "step": 25132000 }, { "epoch": 72.75, "learning_rate": 1.3638907060485364e-05, "loss": 1.8856, "step": 25132500 }, { "epoch": 72.75, "learning_rate": 1.3638183412838088e-05, "loss": 1.8732, "step": 25133000 }, { "epoch": 72.75, "learning_rate": 1.3637461212486109e-05, "loss": 1.8667, "step": 25133500 }, { "epoch": 72.75, "learning_rate": 1.363673756483883e-05, "loss": 1.8671, "step": 25134000 }, { "epoch": 72.75, "learning_rate": 1.3636013917191553e-05, "loss": 1.8973, "step": 25134500 }, { "epoch": 72.76, "learning_rate": 1.363529171683957e-05, "loss": 1.8811, "step": 25135000 }, { "epoch": 72.76, "learning_rate": 1.3634568069192294e-05, "loss": 1.8884, "step": 25135500 }, { "epoch": 72.76, "learning_rate": 1.3633844421545016e-05, "loss": 1.9003, "step": 25136000 }, { "epoch": 72.76, "learning_rate": 1.363312077389774e-05, "loss": 1.889, "step": 25136500 }, { "epoch": 72.76, "learning_rate": 1.3632397126250464e-05, "loss": 1.8587, "step": 25137000 }, { "epoch": 72.76, "learning_rate": 1.3631673478603188e-05, "loss": 1.893, "step": 25137500 }, { "epoch": 72.76, "learning_rate": 1.363094983095591e-05, "loss": 1.9089, "step": 25138000 }, { "epoch": 72.77, "learning_rate": 1.3630226183308634e-05, "loss": 1.8787, "step": 25138500 }, { "epoch": 72.77, "learning_rate": 1.3629503982956652e-05, "loss": 1.8676, "step": 25139000 }, { "epoch": 72.77, "learning_rate": 1.3628781782604669e-05, "loss": 1.8845, "step": 25139500 }, { "epoch": 72.77, "learning_rate": 1.3628058134957391e-05, "loss": 1.8861, "step": 25140000 }, { "epoch": 72.77, "learning_rate": 1.3627334487310115e-05, "loss": 1.8828, "step": 25140500 }, { "epoch": 72.77, "learning_rate": 1.362661083966284e-05, "loss": 1.854, "step": 25141000 }, { "epoch": 72.77, "learning_rate": 1.3625887192015563e-05, "loss": 1.8758, "step": 25141500 }, { "epoch": 72.78, "learning_rate": 1.3625163544368285e-05, "loss": 1.9087, "step": 25142000 }, { "epoch": 72.78, "learning_rate": 1.3624439896721009e-05, "loss": 1.8788, "step": 25142500 }, { "epoch": 72.78, "learning_rate": 1.3623717696369026e-05, "loss": 1.8859, "step": 25143000 }, { "epoch": 72.78, "learning_rate": 1.3622994048721749e-05, "loss": 1.8812, "step": 25143500 }, { "epoch": 72.78, "learning_rate": 1.3622270401074473e-05, "loss": 1.8738, "step": 25144000 }, { "epoch": 72.78, "learning_rate": 1.3621546753427195e-05, "loss": 1.9002, "step": 25144500 }, { "epoch": 72.78, "learning_rate": 1.3620824553075215e-05, "loss": 1.8639, "step": 25145000 }, { "epoch": 72.79, "learning_rate": 1.3620102352723233e-05, "loss": 1.8939, "step": 25145500 }, { "epoch": 72.79, "learning_rate": 1.3619378705075955e-05, "loss": 1.8786, "step": 25146000 }, { "epoch": 72.79, "learning_rate": 1.3618655057428679e-05, "loss": 1.9125, "step": 25146500 }, { "epoch": 72.79, "learning_rate": 1.3617931409781401e-05, "loss": 1.8615, "step": 25147000 }, { "epoch": 72.79, "learning_rate": 1.3617207762134123e-05, "loss": 1.8553, "step": 25147500 }, { "epoch": 72.79, "learning_rate": 1.361648556178214e-05, "loss": 1.8979, "step": 25148000 }, { "epoch": 72.79, "learning_rate": 1.3615761914134864e-05, "loss": 1.8793, "step": 25148500 }, { "epoch": 72.8, "learning_rate": 1.361503826648759e-05, "loss": 1.8829, "step": 25149000 }, { "epoch": 72.8, "learning_rate": 1.3614314618840312e-05, "loss": 1.8637, "step": 25149500 }, { "epoch": 72.8, "learning_rate": 1.3613590971193036e-05, "loss": 1.856, "step": 25150000 }, { "epoch": 72.8, "learning_rate": 1.3612867323545758e-05, "loss": 1.8799, "step": 25150500 }, { "epoch": 72.8, "learning_rate": 1.361214367589848e-05, "loss": 1.8985, "step": 25151000 }, { "epoch": 72.8, "learning_rate": 1.3611420028251205e-05, "loss": 1.893, "step": 25151500 }, { "epoch": 72.8, "learning_rate": 1.3610696380603927e-05, "loss": 1.9184, "step": 25152000 }, { "epoch": 72.81, "learning_rate": 1.3609974180251947e-05, "loss": 1.9007, "step": 25152500 }, { "epoch": 72.81, "learning_rate": 1.360925053260467e-05, "loss": 1.9008, "step": 25153000 }, { "epoch": 72.81, "learning_rate": 1.3608526884957392e-05, "loss": 1.8412, "step": 25153500 }, { "epoch": 72.81, "learning_rate": 1.3607803237310116e-05, "loss": 1.8824, "step": 25154000 }, { "epoch": 72.81, "learning_rate": 1.3607079589662838e-05, "loss": 1.8788, "step": 25154500 }, { "epoch": 72.81, "learning_rate": 1.360635594201556e-05, "loss": 1.8699, "step": 25155000 }, { "epoch": 72.81, "learning_rate": 1.3605632294368284e-05, "loss": 1.9094, "step": 25155500 }, { "epoch": 72.82, "learning_rate": 1.360490864672101e-05, "loss": 1.8682, "step": 25156000 }, { "epoch": 72.82, "learning_rate": 1.3604186446369027e-05, "loss": 1.8889, "step": 25156500 }, { "epoch": 72.82, "learning_rate": 1.360346279872175e-05, "loss": 1.8738, "step": 25157000 }, { "epoch": 72.82, "learning_rate": 1.3602739151074473e-05, "loss": 1.8706, "step": 25157500 }, { "epoch": 72.82, "learning_rate": 1.360201695072249e-05, "loss": 1.8796, "step": 25158000 }, { "epoch": 72.82, "learning_rate": 1.3601293303075213e-05, "loss": 1.901, "step": 25158500 }, { "epoch": 72.83, "learning_rate": 1.360057110272323e-05, "loss": 1.9011, "step": 25159000 }, { "epoch": 72.83, "learning_rate": 1.3599847455075954e-05, "loss": 1.8848, "step": 25159500 }, { "epoch": 72.83, "learning_rate": 1.359912380742868e-05, "loss": 1.8828, "step": 25160000 }, { "epoch": 72.83, "learning_rate": 1.3598400159781402e-05, "loss": 1.9135, "step": 25160500 }, { "epoch": 72.83, "learning_rate": 1.3597676512134124e-05, "loss": 1.8933, "step": 25161000 }, { "epoch": 72.83, "learning_rate": 1.3596954311782143e-05, "loss": 1.8939, "step": 25161500 }, { "epoch": 72.83, "learning_rate": 1.3596230664134865e-05, "loss": 1.8703, "step": 25162000 }, { "epoch": 72.84, "learning_rate": 1.3595507016487587e-05, "loss": 1.8891, "step": 25162500 }, { "epoch": 72.84, "learning_rate": 1.3594783368840311e-05, "loss": 1.8896, "step": 25163000 }, { "epoch": 72.84, "learning_rate": 1.3594059721193034e-05, "loss": 1.8972, "step": 25163500 }, { "epoch": 72.84, "learning_rate": 1.359333607354576e-05, "loss": 1.8664, "step": 25164000 }, { "epoch": 72.84, "learning_rate": 1.3592612425898481e-05, "loss": 1.891, "step": 25164500 }, { "epoch": 72.84, "learning_rate": 1.3591888778251205e-05, "loss": 1.8736, "step": 25165000 }, { "epoch": 72.84, "learning_rate": 1.3591165130603928e-05, "loss": 1.9057, "step": 25165500 }, { "epoch": 72.85, "learning_rate": 1.3590442930251945e-05, "loss": 1.8754, "step": 25166000 }, { "epoch": 72.85, "learning_rate": 1.3589719282604669e-05, "loss": 1.8756, "step": 25166500 }, { "epoch": 72.85, "learning_rate": 1.3588995634957391e-05, "loss": 1.8638, "step": 25167000 }, { "epoch": 72.85, "learning_rate": 1.3588273434605412e-05, "loss": 1.8912, "step": 25167500 }, { "epoch": 72.85, "learning_rate": 1.3587549786958134e-05, "loss": 1.8899, "step": 25168000 }, { "epoch": 72.85, "learning_rate": 1.3586826139310858e-05, "loss": 1.8706, "step": 25168500 }, { "epoch": 72.85, "learning_rate": 1.358610249166358e-05, "loss": 1.9075, "step": 25169000 }, { "epoch": 72.86, "learning_rate": 1.3585380291311597e-05, "loss": 1.88, "step": 25169500 }, { "epoch": 72.86, "learning_rate": 1.358465664366432e-05, "loss": 1.8803, "step": 25170000 }, { "epoch": 72.86, "learning_rate": 1.3583932996017043e-05, "loss": 1.8727, "step": 25170500 }, { "epoch": 72.86, "learning_rate": 1.3583209348369766e-05, "loss": 1.866, "step": 25171000 }, { "epoch": 72.86, "learning_rate": 1.3582485700722491e-05, "loss": 1.894, "step": 25171500 }, { "epoch": 72.86, "learning_rate": 1.3581762053075214e-05, "loss": 1.895, "step": 25172000 }, { "epoch": 72.86, "learning_rate": 1.3581038405427938e-05, "loss": 1.8894, "step": 25172500 }, { "epoch": 72.87, "learning_rate": 1.358031475778066e-05, "loss": 1.8838, "step": 25173000 }, { "epoch": 72.87, "learning_rate": 1.3579591110133382e-05, "loss": 1.8824, "step": 25173500 }, { "epoch": 72.87, "learning_rate": 1.3578867462486106e-05, "loss": 1.8921, "step": 25174000 }, { "epoch": 72.87, "learning_rate": 1.3578143814838828e-05, "loss": 1.8793, "step": 25174500 }, { "epoch": 72.87, "learning_rate": 1.3577420167191554e-05, "loss": 1.8763, "step": 25175000 }, { "epoch": 72.87, "learning_rate": 1.3576696519544278e-05, "loss": 1.8592, "step": 25175500 }, { "epoch": 72.87, "learning_rate": 1.3575972871897e-05, "loss": 1.9077, "step": 25176000 }, { "epoch": 72.88, "learning_rate": 1.3575249224249722e-05, "loss": 1.8816, "step": 25176500 }, { "epoch": 72.88, "learning_rate": 1.3574525576602446e-05, "loss": 1.9096, "step": 25177000 }, { "epoch": 72.88, "learning_rate": 1.3573801928955168e-05, "loss": 1.861, "step": 25177500 }, { "epoch": 72.88, "learning_rate": 1.3573079728603186e-05, "loss": 1.8699, "step": 25178000 }, { "epoch": 72.88, "learning_rate": 1.3572357528251206e-05, "loss": 1.8686, "step": 25178500 }, { "epoch": 72.88, "learning_rate": 1.3571633880603928e-05, "loss": 1.865, "step": 25179000 }, { "epoch": 72.88, "learning_rate": 1.3570910232956652e-05, "loss": 1.8907, "step": 25179500 }, { "epoch": 72.89, "learning_rate": 1.3570186585309375e-05, "loss": 1.8717, "step": 25180000 }, { "epoch": 72.89, "learning_rate": 1.3569462937662097e-05, "loss": 1.8796, "step": 25180500 }, { "epoch": 72.89, "learning_rate": 1.3568740737310116e-05, "loss": 1.8803, "step": 25181000 }, { "epoch": 72.89, "learning_rate": 1.3568017089662838e-05, "loss": 1.9126, "step": 25181500 }, { "epoch": 72.89, "learning_rate": 1.356729344201556e-05, "loss": 1.8813, "step": 25182000 }, { "epoch": 72.89, "learning_rate": 1.3566571241663581e-05, "loss": 1.8902, "step": 25182500 }, { "epoch": 72.89, "learning_rate": 1.3565847594016303e-05, "loss": 1.888, "step": 25183000 }, { "epoch": 72.9, "learning_rate": 1.3565123946369027e-05, "loss": 1.8872, "step": 25183500 }, { "epoch": 72.9, "learning_rate": 1.356440029872175e-05, "loss": 1.9083, "step": 25184000 }, { "epoch": 72.9, "learning_rate": 1.3563678098369767e-05, "loss": 1.901, "step": 25184500 }, { "epoch": 72.9, "learning_rate": 1.356295445072249e-05, "loss": 1.8574, "step": 25185000 }, { "epoch": 72.9, "learning_rate": 1.3562230803075213e-05, "loss": 1.8916, "step": 25185500 }, { "epoch": 72.9, "learning_rate": 1.3561507155427938e-05, "loss": 1.8635, "step": 25186000 }, { "epoch": 72.9, "learning_rate": 1.356078350778066e-05, "loss": 1.868, "step": 25186500 }, { "epoch": 72.91, "learning_rate": 1.3560059860133384e-05, "loss": 1.8933, "step": 25187000 }, { "epoch": 72.91, "learning_rate": 1.3559336212486107e-05, "loss": 1.8845, "step": 25187500 }, { "epoch": 72.91, "learning_rate": 1.3558612564838829e-05, "loss": 1.9051, "step": 25188000 }, { "epoch": 72.91, "learning_rate": 1.3557890364486848e-05, "loss": 1.8537, "step": 25188500 }, { "epoch": 72.91, "learning_rate": 1.355716671683957e-05, "loss": 1.8818, "step": 25189000 }, { "epoch": 72.91, "learning_rate": 1.3556443069192292e-05, "loss": 1.8845, "step": 25189500 }, { "epoch": 72.91, "learning_rate": 1.3555719421545018e-05, "loss": 1.8467, "step": 25190000 }, { "epoch": 72.92, "learning_rate": 1.3554997221193035e-05, "loss": 1.8787, "step": 25190500 }, { "epoch": 72.92, "learning_rate": 1.3554273573545759e-05, "loss": 1.8947, "step": 25191000 }, { "epoch": 72.92, "learning_rate": 1.3553549925898481e-05, "loss": 1.87, "step": 25191500 }, { "epoch": 72.92, "learning_rate": 1.3552826278251204e-05, "loss": 1.8786, "step": 25192000 }, { "epoch": 72.92, "learning_rate": 1.3552102630603928e-05, "loss": 1.8738, "step": 25192500 }, { "epoch": 72.92, "learning_rate": 1.355137898295665e-05, "loss": 1.858, "step": 25193000 }, { "epoch": 72.92, "learning_rate": 1.355065678260467e-05, "loss": 1.8786, "step": 25193500 }, { "epoch": 72.93, "learning_rate": 1.3549933134957393e-05, "loss": 1.875, "step": 25194000 }, { "epoch": 72.93, "learning_rate": 1.3549209487310117e-05, "loss": 1.9014, "step": 25194500 }, { "epoch": 72.93, "learning_rate": 1.3548485839662839e-05, "loss": 1.8687, "step": 25195000 }, { "epoch": 72.93, "learning_rate": 1.3547762192015561e-05, "loss": 1.8698, "step": 25195500 }, { "epoch": 72.93, "learning_rate": 1.3547038544368285e-05, "loss": 1.8892, "step": 25196000 }, { "epoch": 72.93, "learning_rate": 1.3546314896721007e-05, "loss": 1.885, "step": 25196500 }, { "epoch": 72.93, "learning_rate": 1.3545591249073733e-05, "loss": 1.8871, "step": 25197000 }, { "epoch": 72.94, "learning_rate": 1.3544867601426455e-05, "loss": 1.873, "step": 25197500 }, { "epoch": 72.94, "learning_rate": 1.3544145401074474e-05, "loss": 1.8728, "step": 25198000 }, { "epoch": 72.94, "learning_rate": 1.3543421753427196e-05, "loss": 1.8947, "step": 25198500 }, { "epoch": 72.94, "learning_rate": 1.3542698105779918e-05, "loss": 1.8786, "step": 25199000 }, { "epoch": 72.94, "learning_rate": 1.3541974458132642e-05, "loss": 1.8771, "step": 25199500 }, { "epoch": 72.94, "learning_rate": 1.3541250810485365e-05, "loss": 1.8865, "step": 25200000 }, { "epoch": 72.95, "learning_rate": 1.3540527162838087e-05, "loss": 1.8934, "step": 25200500 }, { "epoch": 72.95, "learning_rate": 1.3539803515190812e-05, "loss": 1.8454, "step": 25201000 }, { "epoch": 72.95, "learning_rate": 1.3539079867543536e-05, "loss": 1.885, "step": 25201500 }, { "epoch": 72.95, "learning_rate": 1.3538356219896259e-05, "loss": 1.871, "step": 25202000 }, { "epoch": 72.95, "learning_rate": 1.3537634019544276e-05, "loss": 1.9129, "step": 25202500 }, { "epoch": 72.95, "learning_rate": 1.3536910371897e-05, "loss": 1.8661, "step": 25203000 }, { "epoch": 72.95, "learning_rate": 1.3536186724249722e-05, "loss": 1.9106, "step": 25203500 }, { "epoch": 72.96, "learning_rate": 1.3535463076602444e-05, "loss": 1.8866, "step": 25204000 }, { "epoch": 72.96, "learning_rate": 1.353473942895517e-05, "loss": 1.8788, "step": 25204500 }, { "epoch": 72.96, "learning_rate": 1.3534017228603187e-05, "loss": 1.8721, "step": 25205000 }, { "epoch": 72.96, "learning_rate": 1.3533295028251206e-05, "loss": 1.8833, "step": 25205500 }, { "epoch": 72.96, "learning_rate": 1.3532571380603928e-05, "loss": 1.8762, "step": 25206000 }, { "epoch": 72.96, "learning_rate": 1.353184773295665e-05, "loss": 1.8636, "step": 25206500 }, { "epoch": 72.96, "learning_rate": 1.3531124085309374e-05, "loss": 1.8773, "step": 25207000 }, { "epoch": 72.97, "learning_rate": 1.3530400437662097e-05, "loss": 1.8731, "step": 25207500 }, { "epoch": 72.97, "learning_rate": 1.3529676790014819e-05, "loss": 1.8773, "step": 25208000 }, { "epoch": 72.97, "learning_rate": 1.352895458966284e-05, "loss": 1.8786, "step": 25208500 }, { "epoch": 72.97, "learning_rate": 1.3528230942015564e-05, "loss": 1.894, "step": 25209000 }, { "epoch": 72.97, "learning_rate": 1.3527507294368286e-05, "loss": 1.8649, "step": 25209500 }, { "epoch": 72.97, "learning_rate": 1.3526783646721008e-05, "loss": 1.8684, "step": 25210000 }, { "epoch": 72.97, "learning_rate": 1.3526059999073732e-05, "loss": 1.8876, "step": 25210500 }, { "epoch": 72.98, "learning_rate": 1.352533779872175e-05, "loss": 1.9046, "step": 25211000 }, { "epoch": 72.98, "learning_rate": 1.3524614151074471e-05, "loss": 1.8734, "step": 25211500 }, { "epoch": 72.98, "learning_rate": 1.3523890503427195e-05, "loss": 1.8904, "step": 25212000 }, { "epoch": 72.98, "learning_rate": 1.3523166855779921e-05, "loss": 1.8952, "step": 25212500 }, { "epoch": 72.98, "learning_rate": 1.3522443208132643e-05, "loss": 1.8766, "step": 25213000 }, { "epoch": 72.98, "learning_rate": 1.352172100778066e-05, "loss": 1.8725, "step": 25213500 }, { "epoch": 72.98, "learning_rate": 1.3520997360133383e-05, "loss": 1.8826, "step": 25214000 }, { "epoch": 72.99, "learning_rate": 1.3520273712486107e-05, "loss": 1.8841, "step": 25214500 }, { "epoch": 72.99, "learning_rate": 1.3519550064838829e-05, "loss": 1.8777, "step": 25215000 }, { "epoch": 72.99, "learning_rate": 1.3518826417191551e-05, "loss": 1.8723, "step": 25215500 }, { "epoch": 72.99, "learning_rate": 1.3518104216839572e-05, "loss": 1.8597, "step": 25216000 }, { "epoch": 72.99, "learning_rate": 1.3517380569192296e-05, "loss": 1.8876, "step": 25216500 }, { "epoch": 72.99, "learning_rate": 1.3516656921545018e-05, "loss": 1.8446, "step": 25217000 }, { "epoch": 72.99, "learning_rate": 1.351593327389774e-05, "loss": 1.88, "step": 25217500 }, { "epoch": 73.0, "learning_rate": 1.3515209626250464e-05, "loss": 1.9006, "step": 25218000 }, { "epoch": 73.0, "learning_rate": 1.3514487425898481e-05, "loss": 1.8621, "step": 25218500 }, { "epoch": 73.0, "learning_rate": 1.3513763778251204e-05, "loss": 1.8785, "step": 25219000 }, { "epoch": 73.0, "eval_accuracy": 0.6834092778242686, "eval_accuracy_mlm": 0.6517797263997914, "eval_accuracy_nsp": 0.8529810763865019, "eval_loss": 2.188420534133911, "eval_runtime": 331.8659, "eval_samples_per_second": 1314.947, "eval_steps_per_second": 54.79, "step": 25219456 }, { "epoch": 73.0, "learning_rate": 1.3513040130603927e-05, "loss": 1.8595, "step": 25219500 }, { "epoch": 73.0, "learning_rate": 1.3512316482956653e-05, "loss": 1.8604, "step": 25220000 }, { "epoch": 73.0, "learning_rate": 1.3511592835309375e-05, "loss": 1.8582, "step": 25220500 }, { "epoch": 73.0, "learning_rate": 1.3510869187662098e-05, "loss": 1.895, "step": 25221000 }, { "epoch": 73.01, "learning_rate": 1.3510145540014821e-05, "loss": 1.8563, "step": 25221500 }, { "epoch": 73.01, "learning_rate": 1.3509423339662839e-05, "loss": 1.8559, "step": 25222000 }, { "epoch": 73.01, "learning_rate": 1.3508699692015561e-05, "loss": 1.8866, "step": 25222500 }, { "epoch": 73.01, "learning_rate": 1.3507976044368283e-05, "loss": 1.8636, "step": 25223000 }, { "epoch": 73.01, "learning_rate": 1.3507252396721009e-05, "loss": 1.8467, "step": 25223500 }, { "epoch": 73.01, "learning_rate": 1.3506528749073733e-05, "loss": 1.8771, "step": 25224000 }, { "epoch": 73.01, "learning_rate": 1.3505805101426455e-05, "loss": 1.8804, "step": 25224500 }, { "epoch": 73.02, "learning_rate": 1.3505081453779179e-05, "loss": 1.8867, "step": 25225000 }, { "epoch": 73.02, "learning_rate": 1.3504359253427196e-05, "loss": 1.8784, "step": 25225500 }, { "epoch": 73.02, "learning_rate": 1.3503635605779918e-05, "loss": 1.8907, "step": 25226000 }, { "epoch": 73.02, "learning_rate": 1.350291195813264e-05, "loss": 1.8668, "step": 25226500 }, { "epoch": 73.02, "learning_rate": 1.3502188310485366e-05, "loss": 1.8868, "step": 25227000 }, { "epoch": 73.02, "learning_rate": 1.350146466283809e-05, "loss": 1.874, "step": 25227500 }, { "epoch": 73.02, "learning_rate": 1.3500742462486107e-05, "loss": 1.8696, "step": 25228000 }, { "epoch": 73.03, "learning_rate": 1.350001881483883e-05, "loss": 1.8742, "step": 25228500 }, { "epoch": 73.03, "learning_rate": 1.3499295167191554e-05, "loss": 1.8489, "step": 25229000 }, { "epoch": 73.03, "learning_rate": 1.3498571519544276e-05, "loss": 1.8635, "step": 25229500 }, { "epoch": 73.03, "learning_rate": 1.3497849319192293e-05, "loss": 1.8775, "step": 25230000 }, { "epoch": 73.03, "learning_rate": 1.3497125671545017e-05, "loss": 1.8435, "step": 25230500 }, { "epoch": 73.03, "learning_rate": 1.3496402023897743e-05, "loss": 1.8498, "step": 25231000 }, { "epoch": 73.03, "learning_rate": 1.3495678376250465e-05, "loss": 1.8447, "step": 25231500 }, { "epoch": 73.04, "learning_rate": 1.3494954728603187e-05, "loss": 1.8592, "step": 25232000 }, { "epoch": 73.04, "learning_rate": 1.3494231080955911e-05, "loss": 1.8782, "step": 25232500 }, { "epoch": 73.04, "learning_rate": 1.3493507433308633e-05, "loss": 1.8549, "step": 25233000 }, { "epoch": 73.04, "learning_rate": 1.3492783785661355e-05, "loss": 1.8583, "step": 25233500 }, { "epoch": 73.04, "learning_rate": 1.349206013801408e-05, "loss": 1.8886, "step": 25234000 }, { "epoch": 73.04, "learning_rate": 1.3491336490366805e-05, "loss": 1.8662, "step": 25234500 }, { "epoch": 73.04, "learning_rate": 1.3490612842719527e-05, "loss": 1.8692, "step": 25235000 }, { "epoch": 73.05, "learning_rate": 1.348988919507225e-05, "loss": 1.8598, "step": 25235500 }, { "epoch": 73.05, "learning_rate": 1.3489165547424973e-05, "loss": 1.8669, "step": 25236000 }, { "epoch": 73.05, "learning_rate": 1.3488441899777696e-05, "loss": 1.8584, "step": 25236500 }, { "epoch": 73.05, "learning_rate": 1.3487718252130418e-05, "loss": 1.8942, "step": 25237000 }, { "epoch": 73.05, "learning_rate": 1.3486994604483142e-05, "loss": 1.8784, "step": 25237500 }, { "epoch": 73.05, "learning_rate": 1.3486272404131162e-05, "loss": 1.8696, "step": 25238000 }, { "epoch": 73.06, "learning_rate": 1.3485548756483885e-05, "loss": 1.8428, "step": 25238500 }, { "epoch": 73.06, "learning_rate": 1.3484825108836607e-05, "loss": 1.8839, "step": 25239000 }, { "epoch": 73.06, "learning_rate": 1.348410146118933e-05, "loss": 1.8671, "step": 25239500 }, { "epoch": 73.06, "learning_rate": 1.3483377813542053e-05, "loss": 1.8572, "step": 25240000 }, { "epoch": 73.06, "learning_rate": 1.3482654165894775e-05, "loss": 1.8736, "step": 25240500 }, { "epoch": 73.06, "learning_rate": 1.3481931965542793e-05, "loss": 1.8912, "step": 25241000 }, { "epoch": 73.06, "learning_rate": 1.3481208317895516e-05, "loss": 1.8476, "step": 25241500 }, { "epoch": 73.07, "learning_rate": 1.3480484670248242e-05, "loss": 1.9075, "step": 25242000 }, { "epoch": 73.07, "learning_rate": 1.347976246989626e-05, "loss": 1.8395, "step": 25242500 }, { "epoch": 73.07, "learning_rate": 1.3479038822248982e-05, "loss": 1.8753, "step": 25243000 }, { "epoch": 73.07, "learning_rate": 1.3478315174601705e-05, "loss": 1.8705, "step": 25243500 }, { "epoch": 73.07, "learning_rate": 1.3477592974249723e-05, "loss": 1.8484, "step": 25244000 }, { "epoch": 73.07, "learning_rate": 1.3476869326602445e-05, "loss": 1.8705, "step": 25244500 }, { "epoch": 73.07, "learning_rate": 1.3476145678955169e-05, "loss": 1.856, "step": 25245000 }, { "epoch": 73.08, "learning_rate": 1.3475422031307895e-05, "loss": 1.8835, "step": 25245500 }, { "epoch": 73.08, "learning_rate": 1.3474698383660617e-05, "loss": 1.863, "step": 25246000 }, { "epoch": 73.08, "learning_rate": 1.3473974736013339e-05, "loss": 1.8678, "step": 25246500 }, { "epoch": 73.08, "learning_rate": 1.3473251088366063e-05, "loss": 1.8393, "step": 25247000 }, { "epoch": 73.08, "learning_rate": 1.3472527440718785e-05, "loss": 1.8546, "step": 25247500 }, { "epoch": 73.08, "learning_rate": 1.3471805240366802e-05, "loss": 1.8664, "step": 25248000 }, { "epoch": 73.08, "learning_rate": 1.3471081592719526e-05, "loss": 1.8631, "step": 25248500 }, { "epoch": 73.09, "learning_rate": 1.3470357945072249e-05, "loss": 1.8643, "step": 25249000 }, { "epoch": 73.09, "learning_rate": 1.3469634297424974e-05, "loss": 1.8822, "step": 25249500 }, { "epoch": 73.09, "learning_rate": 1.3468910649777696e-05, "loss": 1.879, "step": 25250000 }, { "epoch": 73.09, "learning_rate": 1.346818700213042e-05, "loss": 1.8289, "step": 25250500 }, { "epoch": 73.09, "learning_rate": 1.3467463354483143e-05, "loss": 1.8772, "step": 25251000 }, { "epoch": 73.09, "learning_rate": 1.3466739706835865e-05, "loss": 1.8413, "step": 25251500 }, { "epoch": 73.09, "learning_rate": 1.3466016059188589e-05, "loss": 1.8797, "step": 25252000 }, { "epoch": 73.1, "learning_rate": 1.3465293858836606e-05, "loss": 1.8607, "step": 25252500 }, { "epoch": 73.1, "learning_rate": 1.3464570211189332e-05, "loss": 1.8589, "step": 25253000 }, { "epoch": 73.1, "learning_rate": 1.3463846563542054e-05, "loss": 1.8555, "step": 25253500 }, { "epoch": 73.1, "learning_rate": 1.3463122915894776e-05, "loss": 1.8785, "step": 25254000 }, { "epoch": 73.1, "learning_rate": 1.34623992682475e-05, "loss": 1.8692, "step": 25254500 }, { "epoch": 73.1, "learning_rate": 1.3461675620600222e-05, "loss": 1.876, "step": 25255000 }, { "epoch": 73.1, "learning_rate": 1.3460951972952946e-05, "loss": 1.8671, "step": 25255500 }, { "epoch": 73.11, "learning_rate": 1.3460228325305668e-05, "loss": 1.8518, "step": 25256000 }, { "epoch": 73.11, "learning_rate": 1.3459506124953689e-05, "loss": 1.8631, "step": 25256500 }, { "epoch": 73.11, "learning_rate": 1.3458782477306411e-05, "loss": 1.8874, "step": 25257000 }, { "epoch": 73.11, "learning_rate": 1.3458060276954429e-05, "loss": 1.8848, "step": 25257500 }, { "epoch": 73.11, "learning_rate": 1.3457336629307152e-05, "loss": 1.8632, "step": 25258000 }, { "epoch": 73.11, "learning_rate": 1.3456612981659875e-05, "loss": 1.8842, "step": 25258500 }, { "epoch": 73.11, "learning_rate": 1.3455889334012597e-05, "loss": 1.8695, "step": 25259000 }, { "epoch": 73.12, "learning_rate": 1.345516568636532e-05, "loss": 1.8737, "step": 25259500 }, { "epoch": 73.12, "learning_rate": 1.3454442038718043e-05, "loss": 1.8836, "step": 25260000 }, { "epoch": 73.12, "learning_rate": 1.3453718391070769e-05, "loss": 1.8789, "step": 25260500 }, { "epoch": 73.12, "learning_rate": 1.3452994743423491e-05, "loss": 1.8827, "step": 25261000 }, { "epoch": 73.12, "learning_rate": 1.345227254307151e-05, "loss": 1.8595, "step": 25261500 }, { "epoch": 73.12, "learning_rate": 1.3451550342719527e-05, "loss": 1.8557, "step": 25262000 }, { "epoch": 73.12, "learning_rate": 1.345082669507225e-05, "loss": 1.868, "step": 25262500 }, { "epoch": 73.13, "learning_rate": 1.3450103047424972e-05, "loss": 1.8662, "step": 25263000 }, { "epoch": 73.13, "learning_rate": 1.3449379399777696e-05, "loss": 1.8606, "step": 25263500 }, { "epoch": 73.13, "learning_rate": 1.3448655752130418e-05, "loss": 1.8775, "step": 25264000 }, { "epoch": 73.13, "learning_rate": 1.3447932104483143e-05, "loss": 1.8745, "step": 25264500 }, { "epoch": 73.13, "learning_rate": 1.344720990413116e-05, "loss": 1.8631, "step": 25265000 }, { "epoch": 73.13, "learning_rate": 1.3446486256483885e-05, "loss": 1.859, "step": 25265500 }, { "epoch": 73.13, "learning_rate": 1.3445762608836607e-05, "loss": 1.8842, "step": 25266000 }, { "epoch": 73.14, "learning_rate": 1.3445038961189329e-05, "loss": 1.8937, "step": 25266500 }, { "epoch": 73.14, "learning_rate": 1.3444315313542053e-05, "loss": 1.843, "step": 25267000 }, { "epoch": 73.14, "learning_rate": 1.3443591665894775e-05, "loss": 1.8535, "step": 25267500 }, { "epoch": 73.14, "learning_rate": 1.3442869465542796e-05, "loss": 1.8669, "step": 25268000 }, { "epoch": 73.14, "learning_rate": 1.3442145817895518e-05, "loss": 1.8731, "step": 25268500 }, { "epoch": 73.14, "learning_rate": 1.3441422170248242e-05, "loss": 1.8821, "step": 25269000 }, { "epoch": 73.14, "learning_rate": 1.3440698522600964e-05, "loss": 1.8428, "step": 25269500 }, { "epoch": 73.15, "learning_rate": 1.3439976322248981e-05, "loss": 1.8439, "step": 25270000 }, { "epoch": 73.15, "learning_rate": 1.3439252674601704e-05, "loss": 1.8769, "step": 25270500 }, { "epoch": 73.15, "learning_rate": 1.3438529026954428e-05, "loss": 1.8775, "step": 25271000 }, { "epoch": 73.15, "learning_rate": 1.343780537930715e-05, "loss": 1.8808, "step": 25271500 }, { "epoch": 73.15, "learning_rate": 1.343708317895517e-05, "loss": 1.8501, "step": 25272000 }, { "epoch": 73.15, "learning_rate": 1.3436359531307893e-05, "loss": 1.8607, "step": 25272500 }, { "epoch": 73.15, "learning_rate": 1.3435635883660617e-05, "loss": 1.8742, "step": 25273000 }, { "epoch": 73.16, "learning_rate": 1.3434912236013339e-05, "loss": 1.8568, "step": 25273500 }, { "epoch": 73.16, "learning_rate": 1.3434188588366061e-05, "loss": 1.8535, "step": 25274000 }, { "epoch": 73.16, "learning_rate": 1.3433464940718785e-05, "loss": 1.8698, "step": 25274500 }, { "epoch": 73.16, "learning_rate": 1.3432741293071507e-05, "loss": 1.859, "step": 25275000 }, { "epoch": 73.16, "learning_rate": 1.3432017645424233e-05, "loss": 1.8463, "step": 25275500 }, { "epoch": 73.16, "learning_rate": 1.3431293997776955e-05, "loss": 1.8566, "step": 25276000 }, { "epoch": 73.17, "learning_rate": 1.3430570350129679e-05, "loss": 1.8717, "step": 25276500 }, { "epoch": 73.17, "learning_rate": 1.3429846702482401e-05, "loss": 1.8705, "step": 25277000 }, { "epoch": 73.17, "learning_rate": 1.3429124502130419e-05, "loss": 1.8614, "step": 25277500 }, { "epoch": 73.17, "learning_rate": 1.3428400854483142e-05, "loss": 1.8993, "step": 25278000 }, { "epoch": 73.17, "learning_rate": 1.3427677206835865e-05, "loss": 1.8551, "step": 25278500 }, { "epoch": 73.17, "learning_rate": 1.342695355918859e-05, "loss": 1.8844, "step": 25279000 }, { "epoch": 73.17, "learning_rate": 1.3426229911541313e-05, "loss": 1.8867, "step": 25279500 }, { "epoch": 73.18, "learning_rate": 1.3425507711189332e-05, "loss": 1.8769, "step": 25280000 }, { "epoch": 73.18, "learning_rate": 1.3424784063542054e-05, "loss": 1.8872, "step": 25280500 }, { "epoch": 73.18, "learning_rate": 1.3424060415894776e-05, "loss": 1.8955, "step": 25281000 }, { "epoch": 73.18, "learning_rate": 1.34233367682475e-05, "loss": 1.883, "step": 25281500 }, { "epoch": 73.18, "learning_rate": 1.3422613120600222e-05, "loss": 1.867, "step": 25282000 }, { "epoch": 73.18, "learning_rate": 1.3421889472952944e-05, "loss": 1.8673, "step": 25282500 }, { "epoch": 73.18, "learning_rate": 1.342116582530567e-05, "loss": 1.8969, "step": 25283000 }, { "epoch": 73.19, "learning_rate": 1.3420442177658394e-05, "loss": 1.8654, "step": 25283500 }, { "epoch": 73.19, "learning_rate": 1.3419719977306411e-05, "loss": 1.8758, "step": 25284000 }, { "epoch": 73.19, "learning_rate": 1.3418997776954428e-05, "loss": 1.842, "step": 25284500 }, { "epoch": 73.19, "learning_rate": 1.341827412930715e-05, "loss": 1.8572, "step": 25285000 }, { "epoch": 73.19, "learning_rate": 1.3417550481659875e-05, "loss": 1.8436, "step": 25285500 }, { "epoch": 73.19, "learning_rate": 1.3416831175898482e-05, "loss": 1.8596, "step": 25286000 }, { "epoch": 73.19, "learning_rate": 1.3416107528251204e-05, "loss": 1.871, "step": 25286500 }, { "epoch": 73.2, "learning_rate": 1.3415383880603926e-05, "loss": 1.8648, "step": 25287000 }, { "epoch": 73.2, "learning_rate": 1.3414660232956652e-05, "loss": 1.8856, "step": 25287500 }, { "epoch": 73.2, "learning_rate": 1.3413936585309376e-05, "loss": 1.8746, "step": 25288000 }, { "epoch": 73.2, "learning_rate": 1.3413212937662098e-05, "loss": 1.8762, "step": 25288500 }, { "epoch": 73.2, "learning_rate": 1.341248929001482e-05, "loss": 1.9009, "step": 25289000 }, { "epoch": 73.2, "learning_rate": 1.3411765642367544e-05, "loss": 1.8546, "step": 25289500 }, { "epoch": 73.2, "learning_rate": 1.3411041994720267e-05, "loss": 1.8792, "step": 25290000 }, { "epoch": 73.21, "learning_rate": 1.3410318347072989e-05, "loss": 1.881, "step": 25290500 }, { "epoch": 73.21, "learning_rate": 1.3409594699425714e-05, "loss": 1.8786, "step": 25291000 }, { "epoch": 73.21, "learning_rate": 1.3408871051778438e-05, "loss": 1.8751, "step": 25291500 }, { "epoch": 73.21, "learning_rate": 1.340814740413116e-05, "loss": 1.8779, "step": 25292000 }, { "epoch": 73.21, "learning_rate": 1.3407423756483883e-05, "loss": 1.859, "step": 25292500 }, { "epoch": 73.21, "learning_rate": 1.3406700108836607e-05, "loss": 1.8765, "step": 25293000 }, { "epoch": 73.21, "learning_rate": 1.3405977908484624e-05, "loss": 1.8436, "step": 25293500 }, { "epoch": 73.22, "learning_rate": 1.3405254260837346e-05, "loss": 1.8813, "step": 25294000 }, { "epoch": 73.22, "learning_rate": 1.3404530613190072e-05, "loss": 1.8601, "step": 25294500 }, { "epoch": 73.22, "learning_rate": 1.3403806965542796e-05, "loss": 1.856, "step": 25295000 }, { "epoch": 73.22, "learning_rate": 1.3403083317895518e-05, "loss": 1.8619, "step": 25295500 }, { "epoch": 73.22, "learning_rate": 1.340235967024824e-05, "loss": 1.8551, "step": 25296000 }, { "epoch": 73.22, "learning_rate": 1.3401636022600964e-05, "loss": 1.8682, "step": 25296500 }, { "epoch": 73.22, "learning_rate": 1.3400912374953686e-05, "loss": 1.8746, "step": 25297000 }, { "epoch": 73.23, "learning_rate": 1.3400188727306409e-05, "loss": 1.8782, "step": 25297500 }, { "epoch": 73.23, "learning_rate": 1.3399465079659134e-05, "loss": 1.8829, "step": 25298000 }, { "epoch": 73.23, "learning_rate": 1.3398741432011858e-05, "loss": 1.8841, "step": 25298500 }, { "epoch": 73.23, "learning_rate": 1.339801778436458e-05, "loss": 1.8791, "step": 25299000 }, { "epoch": 73.23, "learning_rate": 1.3397295584012598e-05, "loss": 1.863, "step": 25299500 }, { "epoch": 73.23, "learning_rate": 1.3396573383660615e-05, "loss": 1.8949, "step": 25300000 }, { "epoch": 73.23, "learning_rate": 1.3395849736013339e-05, "loss": 1.8615, "step": 25300500 }, { "epoch": 73.24, "learning_rate": 1.3395126088366061e-05, "loss": 1.8763, "step": 25301000 }, { "epoch": 73.24, "learning_rate": 1.3394402440718783e-05, "loss": 1.867, "step": 25301500 }, { "epoch": 73.24, "learning_rate": 1.3393678793071509e-05, "loss": 1.8811, "step": 25302000 }, { "epoch": 73.24, "learning_rate": 1.3392955145424233e-05, "loss": 1.8723, "step": 25302500 }, { "epoch": 73.24, "learning_rate": 1.339223294507225e-05, "loss": 1.8875, "step": 25303000 }, { "epoch": 73.24, "learning_rate": 1.3391509297424972e-05, "loss": 1.8529, "step": 25303500 }, { "epoch": 73.24, "learning_rate": 1.3390785649777696e-05, "loss": 1.8597, "step": 25304000 }, { "epoch": 73.25, "learning_rate": 1.3390062002130418e-05, "loss": 1.8578, "step": 25304500 }, { "epoch": 73.25, "learning_rate": 1.338933835448314e-05, "loss": 1.87, "step": 25305000 }, { "epoch": 73.25, "learning_rate": 1.3388614706835866e-05, "loss": 1.9013, "step": 25305500 }, { "epoch": 73.25, "learning_rate": 1.338789105918859e-05, "loss": 1.86, "step": 25306000 }, { "epoch": 73.25, "learning_rate": 1.3387167411541312e-05, "loss": 1.8822, "step": 25306500 }, { "epoch": 73.25, "learning_rate": 1.3386443763894035e-05, "loss": 1.869, "step": 25307000 }, { "epoch": 73.25, "learning_rate": 1.3385720116246759e-05, "loss": 1.8732, "step": 25307500 }, { "epoch": 73.26, "learning_rate": 1.3384996468599481e-05, "loss": 1.8453, "step": 25308000 }, { "epoch": 73.26, "learning_rate": 1.3384272820952203e-05, "loss": 1.8563, "step": 25308500 }, { "epoch": 73.26, "learning_rate": 1.3383549173304929e-05, "loss": 1.834, "step": 25309000 }, { "epoch": 73.26, "learning_rate": 1.3382826972952948e-05, "loss": 1.8586, "step": 25309500 }, { "epoch": 73.26, "learning_rate": 1.338210332530567e-05, "loss": 1.8704, "step": 25310000 }, { "epoch": 73.26, "learning_rate": 1.3381379677658392e-05, "loss": 1.8882, "step": 25310500 }, { "epoch": 73.26, "learning_rate": 1.3380656030011116e-05, "loss": 1.9042, "step": 25311000 }, { "epoch": 73.27, "learning_rate": 1.3379935276954428e-05, "loss": 1.8716, "step": 25311500 }, { "epoch": 73.27, "learning_rate": 1.337921162930715e-05, "loss": 1.874, "step": 25312000 }, { "epoch": 73.27, "learning_rate": 1.3378487981659873e-05, "loss": 1.8593, "step": 25312500 }, { "epoch": 73.27, "learning_rate": 1.3377764334012598e-05, "loss": 1.8901, "step": 25313000 }, { "epoch": 73.27, "learning_rate": 1.3377040686365322e-05, "loss": 1.8585, "step": 25313500 }, { "epoch": 73.27, "learning_rate": 1.3376317038718045e-05, "loss": 1.866, "step": 25314000 }, { "epoch": 73.28, "learning_rate": 1.3375594838366062e-05, "loss": 1.8759, "step": 25314500 }, { "epoch": 73.28, "learning_rate": 1.3374871190718786e-05, "loss": 1.8526, "step": 25315000 }, { "epoch": 73.28, "learning_rate": 1.3374147543071508e-05, "loss": 1.8789, "step": 25315500 }, { "epoch": 73.28, "learning_rate": 1.337342389542423e-05, "loss": 1.8771, "step": 25316000 }, { "epoch": 73.28, "learning_rate": 1.3372700247776956e-05, "loss": 1.8813, "step": 25316500 }, { "epoch": 73.28, "learning_rate": 1.337197660012968e-05, "loss": 1.881, "step": 25317000 }, { "epoch": 73.28, "learning_rate": 1.3371254399777697e-05, "loss": 1.872, "step": 25317500 }, { "epoch": 73.29, "learning_rate": 1.337053075213042e-05, "loss": 1.8628, "step": 25318000 }, { "epoch": 73.29, "learning_rate": 1.3369807104483143e-05, "loss": 1.8512, "step": 25318500 }, { "epoch": 73.29, "learning_rate": 1.3369083456835865e-05, "loss": 1.8739, "step": 25319000 }, { "epoch": 73.29, "learning_rate": 1.3368359809188588e-05, "loss": 1.8546, "step": 25319500 }, { "epoch": 73.29, "learning_rate": 1.3367636161541312e-05, "loss": 1.8744, "step": 25320000 }, { "epoch": 73.29, "learning_rate": 1.3366912513894037e-05, "loss": 1.8758, "step": 25320500 }, { "epoch": 73.29, "learning_rate": 1.336618886624676e-05, "loss": 1.8934, "step": 25321000 }, { "epoch": 73.3, "learning_rate": 1.3365465218599482e-05, "loss": 1.8785, "step": 25321500 }, { "epoch": 73.3, "learning_rate": 1.3364741570952206e-05, "loss": 1.8801, "step": 25322000 }, { "epoch": 73.3, "learning_rate": 1.3364019370600223e-05, "loss": 1.8673, "step": 25322500 }, { "epoch": 73.3, "learning_rate": 1.3363295722952945e-05, "loss": 1.8491, "step": 25323000 }, { "epoch": 73.3, "learning_rate": 1.3362572075305669e-05, "loss": 1.8682, "step": 25323500 }, { "epoch": 73.3, "learning_rate": 1.3361848427658395e-05, "loss": 1.8621, "step": 25324000 }, { "epoch": 73.3, "learning_rate": 1.3361124780011117e-05, "loss": 1.8564, "step": 25324500 }, { "epoch": 73.31, "learning_rate": 1.3360404026954429e-05, "loss": 1.8815, "step": 25325000 }, { "epoch": 73.31, "learning_rate": 1.3359680379307151e-05, "loss": 1.8912, "step": 25325500 }, { "epoch": 73.31, "learning_rate": 1.3358956731659875e-05, "loss": 1.8863, "step": 25326000 }, { "epoch": 73.31, "learning_rate": 1.3358233084012598e-05, "loss": 1.8735, "step": 25326500 }, { "epoch": 73.31, "learning_rate": 1.335750943636532e-05, "loss": 1.8532, "step": 25327000 }, { "epoch": 73.31, "learning_rate": 1.3356785788718044e-05, "loss": 1.8828, "step": 25327500 }, { "epoch": 73.31, "learning_rate": 1.335606214107077e-05, "loss": 1.8455, "step": 25328000 }, { "epoch": 73.32, "learning_rate": 1.3355339940718787e-05, "loss": 1.8787, "step": 25328500 }, { "epoch": 73.32, "learning_rate": 1.3354616293071509e-05, "loss": 1.8644, "step": 25329000 }, { "epoch": 73.32, "learning_rate": 1.3353892645424233e-05, "loss": 1.8486, "step": 25329500 }, { "epoch": 73.32, "learning_rate": 1.335317044507225e-05, "loss": 1.8626, "step": 25330000 }, { "epoch": 73.32, "learning_rate": 1.3352446797424972e-05, "loss": 1.8738, "step": 25330500 }, { "epoch": 73.32, "learning_rate": 1.3351723149777694e-05, "loss": 1.882, "step": 25331000 }, { "epoch": 73.32, "learning_rate": 1.3350999502130418e-05, "loss": 1.8806, "step": 25331500 }, { "epoch": 73.33, "learning_rate": 1.3350275854483144e-05, "loss": 1.8321, "step": 25332000 }, { "epoch": 73.33, "learning_rate": 1.3349553654131161e-05, "loss": 1.895, "step": 25332500 }, { "epoch": 73.33, "learning_rate": 1.3348830006483883e-05, "loss": 1.8784, "step": 25333000 }, { "epoch": 73.33, "learning_rate": 1.3348106358836607e-05, "loss": 1.8787, "step": 25333500 }, { "epoch": 73.33, "learning_rate": 1.334738271118933e-05, "loss": 1.8855, "step": 25334000 }, { "epoch": 73.33, "learning_rate": 1.3346659063542052e-05, "loss": 1.8875, "step": 25334500 }, { "epoch": 73.33, "learning_rate": 1.3345935415894776e-05, "loss": 1.8988, "step": 25335000 }, { "epoch": 73.34, "learning_rate": 1.3345211768247501e-05, "loss": 1.8754, "step": 25335500 }, { "epoch": 73.34, "learning_rate": 1.3344488120600224e-05, "loss": 1.8698, "step": 25336000 }, { "epoch": 73.34, "learning_rate": 1.3343764472952946e-05, "loss": 1.8766, "step": 25336500 }, { "epoch": 73.34, "learning_rate": 1.334304082530567e-05, "loss": 1.8732, "step": 25337000 }, { "epoch": 73.34, "learning_rate": 1.3342318624953687e-05, "loss": 1.8654, "step": 25337500 }, { "epoch": 73.34, "learning_rate": 1.334159497730641e-05, "loss": 1.8739, "step": 25338000 }, { "epoch": 73.34, "learning_rate": 1.3340871329659133e-05, "loss": 1.8912, "step": 25338500 }, { "epoch": 73.35, "learning_rate": 1.3340147682011859e-05, "loss": 1.8783, "step": 25339000 }, { "epoch": 73.35, "learning_rate": 1.3339424034364581e-05, "loss": 1.8688, "step": 25339500 }, { "epoch": 73.35, "learning_rate": 1.3338700386717303e-05, "loss": 1.8484, "step": 25340000 }, { "epoch": 73.35, "learning_rate": 1.3337976739070027e-05, "loss": 1.8858, "step": 25340500 }, { "epoch": 73.35, "learning_rate": 1.333725309142275e-05, "loss": 1.9018, "step": 25341000 }, { "epoch": 73.35, "learning_rate": 1.3336530891070767e-05, "loss": 1.8587, "step": 25341500 }, { "epoch": 73.35, "learning_rate": 1.333580724342349e-05, "loss": 1.8864, "step": 25342000 }, { "epoch": 73.36, "learning_rate": 1.3335083595776213e-05, "loss": 1.8433, "step": 25342500 }, { "epoch": 73.36, "learning_rate": 1.3334359948128938e-05, "loss": 1.8879, "step": 25343000 }, { "epoch": 73.36, "learning_rate": 1.333363630048166e-05, "loss": 1.8845, "step": 25343500 }, { "epoch": 73.36, "learning_rate": 1.3332915547424973e-05, "loss": 1.8596, "step": 25344000 }, { "epoch": 73.36, "learning_rate": 1.3332191899777697e-05, "loss": 1.8448, "step": 25344500 }, { "epoch": 73.36, "learning_rate": 1.3331468252130419e-05, "loss": 1.8731, "step": 25345000 }, { "epoch": 73.36, "learning_rate": 1.3330744604483141e-05, "loss": 1.8812, "step": 25345500 }, { "epoch": 73.37, "learning_rate": 1.3330020956835865e-05, "loss": 1.8884, "step": 25346000 }, { "epoch": 73.37, "learning_rate": 1.3329297309188591e-05, "loss": 1.8729, "step": 25346500 }, { "epoch": 73.37, "learning_rate": 1.3328573661541313e-05, "loss": 1.867, "step": 25347000 }, { "epoch": 73.37, "learning_rate": 1.3327850013894035e-05, "loss": 1.8832, "step": 25347500 }, { "epoch": 73.37, "learning_rate": 1.332712636624676e-05, "loss": 1.8617, "step": 25348000 }, { "epoch": 73.37, "learning_rate": 1.3326405613190072e-05, "loss": 1.8651, "step": 25348500 }, { "epoch": 73.37, "learning_rate": 1.3325681965542794e-05, "loss": 1.8728, "step": 25349000 }, { "epoch": 73.38, "learning_rate": 1.3324959765190811e-05, "loss": 1.8596, "step": 25349500 }, { "epoch": 73.38, "learning_rate": 1.3324236117543535e-05, "loss": 1.9019, "step": 25350000 }, { "epoch": 73.38, "learning_rate": 1.332351246989626e-05, "loss": 1.877, "step": 25350500 }, { "epoch": 73.38, "learning_rate": 1.3322788822248983e-05, "loss": 1.8701, "step": 25351000 }, { "epoch": 73.38, "learning_rate": 1.3322065174601705e-05, "loss": 1.8437, "step": 25351500 }, { "epoch": 73.38, "learning_rate": 1.3321341526954429e-05, "loss": 1.8799, "step": 25352000 }, { "epoch": 73.39, "learning_rate": 1.3320617879307151e-05, "loss": 1.8909, "step": 25352500 }, { "epoch": 73.39, "learning_rate": 1.3319894231659873e-05, "loss": 1.8824, "step": 25353000 }, { "epoch": 73.39, "learning_rate": 1.3319170584012597e-05, "loss": 1.8843, "step": 25353500 }, { "epoch": 73.39, "learning_rate": 1.3318446936365323e-05, "loss": 1.8539, "step": 25354000 }, { "epoch": 73.39, "learning_rate": 1.3317723288718045e-05, "loss": 1.8772, "step": 25354500 }, { "epoch": 73.39, "learning_rate": 1.3316999641070767e-05, "loss": 1.8715, "step": 25355000 }, { "epoch": 73.39, "learning_rate": 1.3316277440718786e-05, "loss": 1.8389, "step": 25355500 }, { "epoch": 73.4, "learning_rate": 1.3315553793071509e-05, "loss": 1.8629, "step": 25356000 }, { "epoch": 73.4, "learning_rate": 1.3314830145424231e-05, "loss": 1.8862, "step": 25356500 }, { "epoch": 73.4, "learning_rate": 1.3314106497776955e-05, "loss": 1.8674, "step": 25357000 }, { "epoch": 73.4, "learning_rate": 1.3313384297424972e-05, "loss": 1.8844, "step": 25357500 }, { "epoch": 73.4, "learning_rate": 1.3312660649777698e-05, "loss": 1.8668, "step": 25358000 }, { "epoch": 73.4, "learning_rate": 1.3311938449425715e-05, "loss": 1.8758, "step": 25358500 }, { "epoch": 73.4, "learning_rate": 1.3311214801778437e-05, "loss": 1.8785, "step": 25359000 }, { "epoch": 73.41, "learning_rate": 1.3310491154131161e-05, "loss": 1.8898, "step": 25359500 }, { "epoch": 73.41, "learning_rate": 1.3309767506483883e-05, "loss": 1.875, "step": 25360000 }, { "epoch": 73.41, "learning_rate": 1.3309043858836606e-05, "loss": 1.8557, "step": 25360500 }, { "epoch": 73.41, "learning_rate": 1.330832021118933e-05, "loss": 1.8796, "step": 25361000 }, { "epoch": 73.41, "learning_rate": 1.3307596563542052e-05, "loss": 1.8877, "step": 25361500 }, { "epoch": 73.41, "learning_rate": 1.3306872915894777e-05, "loss": 1.8871, "step": 25362000 }, { "epoch": 73.41, "learning_rate": 1.3306150715542795e-05, "loss": 1.8774, "step": 25362500 }, { "epoch": 73.42, "learning_rate": 1.3305427067895519e-05, "loss": 1.8569, "step": 25363000 }, { "epoch": 73.42, "learning_rate": 1.330470342024824e-05, "loss": 1.8763, "step": 25363500 }, { "epoch": 73.42, "learning_rate": 1.3303979772600963e-05, "loss": 1.8726, "step": 25364000 }, { "epoch": 73.42, "learning_rate": 1.3303256124953687e-05, "loss": 1.857, "step": 25364500 }, { "epoch": 73.42, "learning_rate": 1.330253247730641e-05, "loss": 1.8825, "step": 25365000 }, { "epoch": 73.42, "learning_rate": 1.3301808829659135e-05, "loss": 1.895, "step": 25365500 }, { "epoch": 73.42, "learning_rate": 1.3301086629307152e-05, "loss": 1.8765, "step": 25366000 }, { "epoch": 73.43, "learning_rate": 1.3300362981659876e-05, "loss": 1.8766, "step": 25366500 }, { "epoch": 73.43, "learning_rate": 1.3299639334012598e-05, "loss": 1.8794, "step": 25367000 }, { "epoch": 73.43, "learning_rate": 1.329891568636532e-05, "loss": 1.8605, "step": 25367500 }, { "epoch": 73.43, "learning_rate": 1.3298192038718044e-05, "loss": 1.8915, "step": 25368000 }, { "epoch": 73.43, "learning_rate": 1.3297468391070767e-05, "loss": 1.88, "step": 25368500 }, { "epoch": 73.43, "learning_rate": 1.3296744743423492e-05, "loss": 1.8455, "step": 25369000 }, { "epoch": 73.43, "learning_rate": 1.329602254307151e-05, "loss": 1.8683, "step": 25369500 }, { "epoch": 73.44, "learning_rate": 1.3295298895424232e-05, "loss": 1.895, "step": 25370000 }, { "epoch": 73.44, "learning_rate": 1.3294575247776956e-05, "loss": 1.8723, "step": 25370500 }, { "epoch": 73.44, "learning_rate": 1.3293851600129678e-05, "loss": 1.8779, "step": 25371000 }, { "epoch": 73.44, "learning_rate": 1.3293127952482402e-05, "loss": 1.8907, "step": 25371500 }, { "epoch": 73.44, "learning_rate": 1.3292404304835124e-05, "loss": 1.8838, "step": 25372000 }, { "epoch": 73.44, "learning_rate": 1.3291680657187846e-05, "loss": 1.8766, "step": 25372500 }, { "epoch": 73.44, "learning_rate": 1.3290957009540572e-05, "loss": 1.8628, "step": 25373000 }, { "epoch": 73.45, "learning_rate": 1.3290233361893296e-05, "loss": 1.8664, "step": 25373500 }, { "epoch": 73.45, "learning_rate": 1.3289509714246018e-05, "loss": 1.9007, "step": 25374000 }, { "epoch": 73.45, "learning_rate": 1.328878606659874e-05, "loss": 1.8566, "step": 25374500 }, { "epoch": 73.45, "learning_rate": 1.3288063866246758e-05, "loss": 1.9025, "step": 25375000 }, { "epoch": 73.45, "learning_rate": 1.3287340218599481e-05, "loss": 1.8982, "step": 25375500 }, { "epoch": 73.45, "learning_rate": 1.3286616570952204e-05, "loss": 1.8635, "step": 25376000 }, { "epoch": 73.45, "learning_rate": 1.328589292330493e-05, "loss": 1.8975, "step": 25376500 }, { "epoch": 73.46, "learning_rate": 1.3285169275657652e-05, "loss": 1.8375, "step": 25377000 }, { "epoch": 73.46, "learning_rate": 1.328444707530567e-05, "loss": 1.8554, "step": 25377500 }, { "epoch": 73.46, "learning_rate": 1.3283723427658393e-05, "loss": 1.8962, "step": 25378000 }, { "epoch": 73.46, "learning_rate": 1.3282999780011115e-05, "loss": 1.875, "step": 25378500 }, { "epoch": 73.46, "learning_rate": 1.3282276132363839e-05, "loss": 1.8563, "step": 25379000 }, { "epoch": 73.46, "learning_rate": 1.3281553932011856e-05, "loss": 1.8437, "step": 25379500 }, { "epoch": 73.46, "learning_rate": 1.3280830284364578e-05, "loss": 1.8885, "step": 25380000 }, { "epoch": 73.47, "learning_rate": 1.3280106636717304e-05, "loss": 1.8653, "step": 25380500 }, { "epoch": 73.47, "learning_rate": 1.3279382989070028e-05, "loss": 1.8602, "step": 25381000 }, { "epoch": 73.47, "learning_rate": 1.327865934142275e-05, "loss": 1.8835, "step": 25381500 }, { "epoch": 73.47, "learning_rate": 1.3277935693775472e-05, "loss": 1.873, "step": 25382000 }, { "epoch": 73.47, "learning_rate": 1.3277212046128196e-05, "loss": 1.8732, "step": 25382500 }, { "epoch": 73.47, "learning_rate": 1.3276489845776214e-05, "loss": 1.8532, "step": 25383000 }, { "epoch": 73.47, "learning_rate": 1.3275766198128936e-05, "loss": 1.8712, "step": 25383500 }, { "epoch": 73.48, "learning_rate": 1.3275043997776956e-05, "loss": 1.871, "step": 25384000 }, { "epoch": 73.48, "learning_rate": 1.3274320350129679e-05, "loss": 1.8907, "step": 25384500 }, { "epoch": 73.48, "learning_rate": 1.3273596702482403e-05, "loss": 1.8956, "step": 25385000 }, { "epoch": 73.48, "learning_rate": 1.3272873054835125e-05, "loss": 1.8806, "step": 25385500 }, { "epoch": 73.48, "learning_rate": 1.3272149407187847e-05, "loss": 1.8723, "step": 25386000 }, { "epoch": 73.48, "learning_rate": 1.3271425759540571e-05, "loss": 1.9118, "step": 25386500 }, { "epoch": 73.48, "learning_rate": 1.3270702111893293e-05, "loss": 1.8795, "step": 25387000 }, { "epoch": 73.49, "learning_rate": 1.3269978464246019e-05, "loss": 1.8891, "step": 25387500 }, { "epoch": 73.49, "learning_rate": 1.3269254816598741e-05, "loss": 1.8627, "step": 25388000 }, { "epoch": 73.49, "learning_rate": 1.3268531168951465e-05, "loss": 1.9013, "step": 25388500 }, { "epoch": 73.49, "learning_rate": 1.3267807521304187e-05, "loss": 1.8808, "step": 25389000 }, { "epoch": 73.49, "learning_rate": 1.326708387365691e-05, "loss": 1.8735, "step": 25389500 }, { "epoch": 73.49, "learning_rate": 1.3266361673304928e-05, "loss": 1.8426, "step": 25390000 }, { "epoch": 73.5, "learning_rate": 1.3265639472952946e-05, "loss": 1.8724, "step": 25390500 }, { "epoch": 73.5, "learning_rate": 1.3264915825305668e-05, "loss": 1.891, "step": 25391000 }, { "epoch": 73.5, "learning_rate": 1.3264192177658394e-05, "loss": 1.8629, "step": 25391500 }, { "epoch": 73.5, "learning_rate": 1.3263468530011117e-05, "loss": 1.852, "step": 25392000 }, { "epoch": 73.5, "learning_rate": 1.326274488236384e-05, "loss": 1.8898, "step": 25392500 }, { "epoch": 73.5, "learning_rate": 1.3262022682011857e-05, "loss": 1.883, "step": 25393000 }, { "epoch": 73.5, "learning_rate": 1.326129903436458e-05, "loss": 1.8795, "step": 25393500 }, { "epoch": 73.51, "learning_rate": 1.3260575386717303e-05, "loss": 1.8737, "step": 25394000 }, { "epoch": 73.51, "learning_rate": 1.325985318636532e-05, "loss": 1.8762, "step": 25394500 }, { "epoch": 73.51, "learning_rate": 1.3259129538718043e-05, "loss": 1.8926, "step": 25395000 }, { "epoch": 73.51, "learning_rate": 1.3258405891070768e-05, "loss": 1.8865, "step": 25395500 }, { "epoch": 73.51, "learning_rate": 1.3257682243423492e-05, "loss": 1.8867, "step": 25396000 }, { "epoch": 73.51, "learning_rate": 1.3256958595776214e-05, "loss": 1.8712, "step": 25396500 }, { "epoch": 73.51, "learning_rate": 1.3256234948128937e-05, "loss": 1.8727, "step": 25397000 }, { "epoch": 73.52, "learning_rate": 1.325551130048166e-05, "loss": 1.8801, "step": 25397500 }, { "epoch": 73.52, "learning_rate": 1.3254787652834383e-05, "loss": 1.8539, "step": 25398000 }, { "epoch": 73.52, "learning_rate": 1.32540654524824e-05, "loss": 1.8997, "step": 25398500 }, { "epoch": 73.52, "learning_rate": 1.3253341804835126e-05, "loss": 1.849, "step": 25399000 }, { "epoch": 73.52, "learning_rate": 1.325261815718785e-05, "loss": 1.8551, "step": 25399500 }, { "epoch": 73.52, "learning_rate": 1.3251894509540572e-05, "loss": 1.9004, "step": 25400000 }, { "epoch": 73.52, "learning_rate": 1.3251170861893294e-05, "loss": 1.8895, "step": 25400500 }, { "epoch": 73.53, "learning_rate": 1.3250447214246018e-05, "loss": 1.8752, "step": 25401000 }, { "epoch": 73.53, "learning_rate": 1.324972356659874e-05, "loss": 1.8672, "step": 25401500 }, { "epoch": 73.53, "learning_rate": 1.3248999918951462e-05, "loss": 1.8643, "step": 25402000 }, { "epoch": 73.53, "learning_rate": 1.3248276271304188e-05, "loss": 1.8623, "step": 25402500 }, { "epoch": 73.53, "learning_rate": 1.3247552623656912e-05, "loss": 1.8923, "step": 25403000 }, { "epoch": 73.53, "learning_rate": 1.3246828976009634e-05, "loss": 1.8976, "step": 25403500 }, { "epoch": 73.53, "learning_rate": 1.3246105328362356e-05, "loss": 1.897, "step": 25404000 }, { "epoch": 73.54, "learning_rate": 1.324538168071508e-05, "loss": 1.8802, "step": 25404500 }, { "epoch": 73.54, "learning_rate": 1.3244658033067803e-05, "loss": 1.9005, "step": 25405000 }, { "epoch": 73.54, "learning_rate": 1.324393583271582e-05, "loss": 1.8832, "step": 25405500 }, { "epoch": 73.54, "learning_rate": 1.3243213632363837e-05, "loss": 1.8906, "step": 25406000 }, { "epoch": 73.54, "learning_rate": 1.3242489984716563e-05, "loss": 1.8754, "step": 25406500 }, { "epoch": 73.54, "learning_rate": 1.3241766337069287e-05, "loss": 1.8839, "step": 25407000 }, { "epoch": 73.54, "learning_rate": 1.3241042689422009e-05, "loss": 1.8672, "step": 25407500 }, { "epoch": 73.55, "learning_rate": 1.3240319041774731e-05, "loss": 1.8752, "step": 25408000 }, { "epoch": 73.55, "learning_rate": 1.3239595394127455e-05, "loss": 1.8644, "step": 25408500 }, { "epoch": 73.55, "learning_rate": 1.3238871746480177e-05, "loss": 1.8649, "step": 25409000 }, { "epoch": 73.55, "learning_rate": 1.3238148098832901e-05, "loss": 1.862, "step": 25409500 }, { "epoch": 73.55, "learning_rate": 1.3237424451185627e-05, "loss": 1.8924, "step": 25410000 }, { "epoch": 73.55, "learning_rate": 1.3236702250833644e-05, "loss": 1.8553, "step": 25410500 }, { "epoch": 73.55, "learning_rate": 1.3235978603186366e-05, "loss": 1.8541, "step": 25411000 }, { "epoch": 73.56, "learning_rate": 1.3235254955539089e-05, "loss": 1.8881, "step": 25411500 }, { "epoch": 73.56, "learning_rate": 1.3234531307891812e-05, "loss": 1.846, "step": 25412000 }, { "epoch": 73.56, "learning_rate": 1.3233807660244535e-05, "loss": 1.8661, "step": 25412500 }, { "epoch": 73.56, "learning_rate": 1.3233085459892552e-05, "loss": 1.8947, "step": 25413000 }, { "epoch": 73.56, "learning_rate": 1.3232361812245278e-05, "loss": 1.8662, "step": 25413500 }, { "epoch": 73.56, "learning_rate": 1.3231638164598001e-05, "loss": 1.84, "step": 25414000 }, { "epoch": 73.56, "learning_rate": 1.3230914516950724e-05, "loss": 1.8873, "step": 25414500 }, { "epoch": 73.57, "learning_rate": 1.3230192316598741e-05, "loss": 1.8651, "step": 25415000 }, { "epoch": 73.57, "learning_rate": 1.3229468668951465e-05, "loss": 1.8595, "step": 25415500 }, { "epoch": 73.57, "learning_rate": 1.3228745021304187e-05, "loss": 1.869, "step": 25416000 }, { "epoch": 73.57, "learning_rate": 1.322802137365691e-05, "loss": 1.8897, "step": 25416500 }, { "epoch": 73.57, "learning_rate": 1.3227299173304927e-05, "loss": 1.8647, "step": 25417000 }, { "epoch": 73.57, "learning_rate": 1.3226575525657652e-05, "loss": 1.8817, "step": 25417500 }, { "epoch": 73.57, "learning_rate": 1.3225851878010376e-05, "loss": 1.8736, "step": 25418000 }, { "epoch": 73.58, "learning_rate": 1.3225128230363098e-05, "loss": 1.8665, "step": 25418500 }, { "epoch": 73.58, "learning_rate": 1.322440458271582e-05, "loss": 1.8486, "step": 25419000 }, { "epoch": 73.58, "learning_rate": 1.322368238236384e-05, "loss": 1.8467, "step": 25419500 }, { "epoch": 73.58, "learning_rate": 1.3222958734716562e-05, "loss": 1.8784, "step": 25420000 }, { "epoch": 73.58, "learning_rate": 1.3222235087069284e-05, "loss": 1.8836, "step": 25420500 }, { "epoch": 73.58, "learning_rate": 1.3221511439422008e-05, "loss": 1.8778, "step": 25421000 }, { "epoch": 73.58, "learning_rate": 1.3220787791774734e-05, "loss": 1.8845, "step": 25421500 }, { "epoch": 73.59, "learning_rate": 1.322006559142275e-05, "loss": 1.8801, "step": 25422000 }, { "epoch": 73.59, "learning_rate": 1.3219341943775473e-05, "loss": 1.8973, "step": 25422500 }, { "epoch": 73.59, "learning_rate": 1.3218618296128197e-05, "loss": 1.8976, "step": 25423000 }, { "epoch": 73.59, "learning_rate": 1.321789464848092e-05, "loss": 1.8579, "step": 25423500 }, { "epoch": 73.59, "learning_rate": 1.3217171000833641e-05, "loss": 1.8733, "step": 25424000 }, { "epoch": 73.59, "learning_rate": 1.3216447353186365e-05, "loss": 1.8864, "step": 25424500 }, { "epoch": 73.59, "learning_rate": 1.3215723705539091e-05, "loss": 1.8741, "step": 25425000 }, { "epoch": 73.6, "learning_rate": 1.3215001505187108e-05, "loss": 1.8593, "step": 25425500 }, { "epoch": 73.6, "learning_rate": 1.3214279304835126e-05, "loss": 1.8465, "step": 25426000 }, { "epoch": 73.6, "learning_rate": 1.3213555657187848e-05, "loss": 1.8584, "step": 25426500 }, { "epoch": 73.6, "learning_rate": 1.3212832009540572e-05, "loss": 1.8529, "step": 25427000 }, { "epoch": 73.6, "learning_rate": 1.3212108361893294e-05, "loss": 1.8803, "step": 25427500 }, { "epoch": 73.6, "learning_rate": 1.3211384714246016e-05, "loss": 1.8756, "step": 25428000 }, { "epoch": 73.61, "learning_rate": 1.321066106659874e-05, "loss": 1.8962, "step": 25428500 }, { "epoch": 73.61, "learning_rate": 1.320993886624676e-05, "loss": 1.8798, "step": 25429000 }, { "epoch": 73.61, "learning_rate": 1.3209215218599483e-05, "loss": 1.8692, "step": 25429500 }, { "epoch": 73.61, "learning_rate": 1.3208491570952205e-05, "loss": 1.8662, "step": 25430000 }, { "epoch": 73.61, "learning_rate": 1.3207767923304929e-05, "loss": 1.8948, "step": 25430500 }, { "epoch": 73.61, "learning_rate": 1.3207044275657651e-05, "loss": 1.8781, "step": 25431000 }, { "epoch": 73.61, "learning_rate": 1.3206320628010374e-05, "loss": 1.8898, "step": 25431500 }, { "epoch": 73.62, "learning_rate": 1.3205596980363097e-05, "loss": 1.8791, "step": 25432000 }, { "epoch": 73.62, "learning_rate": 1.3204873332715823e-05, "loss": 1.8659, "step": 25432500 }, { "epoch": 73.62, "learning_rate": 1.3204149685068545e-05, "loss": 1.8644, "step": 25433000 }, { "epoch": 73.62, "learning_rate": 1.3203426037421268e-05, "loss": 1.8955, "step": 25433500 }, { "epoch": 73.62, "learning_rate": 1.3202702389773992e-05, "loss": 1.8935, "step": 25434000 }, { "epoch": 73.62, "learning_rate": 1.3201981636717304e-05, "loss": 1.9012, "step": 25434500 }, { "epoch": 73.62, "learning_rate": 1.3201257989070026e-05, "loss": 1.881, "step": 25435000 }, { "epoch": 73.63, "learning_rate": 1.3200534341422748e-05, "loss": 1.8929, "step": 25435500 }, { "epoch": 73.63, "learning_rate": 1.3199810693775472e-05, "loss": 1.879, "step": 25436000 }, { "epoch": 73.63, "learning_rate": 1.3199087046128198e-05, "loss": 1.8848, "step": 25436500 }, { "epoch": 73.63, "learning_rate": 1.319836339848092e-05, "loss": 1.8717, "step": 25437000 }, { "epoch": 73.63, "learning_rate": 1.3197639750833642e-05, "loss": 1.8876, "step": 25437500 }, { "epoch": 73.63, "learning_rate": 1.3196916103186366e-05, "loss": 1.8799, "step": 25438000 }, { "epoch": 73.63, "learning_rate": 1.3196193902834383e-05, "loss": 1.8883, "step": 25438500 }, { "epoch": 73.64, "learning_rate": 1.3195470255187106e-05, "loss": 1.8858, "step": 25439000 }, { "epoch": 73.64, "learning_rate": 1.319474660753983e-05, "loss": 1.877, "step": 25439500 }, { "epoch": 73.64, "learning_rate": 1.3194022959892555e-05, "loss": 1.8651, "step": 25440000 }, { "epoch": 73.64, "learning_rate": 1.3193299312245277e-05, "loss": 1.8369, "step": 25440500 }, { "epoch": 73.64, "learning_rate": 1.3192575664598e-05, "loss": 1.9021, "step": 25441000 }, { "epoch": 73.64, "learning_rate": 1.3191852016950724e-05, "loss": 1.8675, "step": 25441500 }, { "epoch": 73.64, "learning_rate": 1.3191128369303446e-05, "loss": 1.8709, "step": 25442000 }, { "epoch": 73.65, "learning_rate": 1.3190404721656168e-05, "loss": 1.8691, "step": 25442500 }, { "epoch": 73.65, "learning_rate": 1.3189681074008892e-05, "loss": 1.8469, "step": 25443000 }, { "epoch": 73.65, "learning_rate": 1.3188960320952204e-05, "loss": 1.8772, "step": 25443500 }, { "epoch": 73.65, "learning_rate": 1.318823667330493e-05, "loss": 1.8738, "step": 25444000 }, { "epoch": 73.65, "learning_rate": 1.3187513025657652e-05, "loss": 1.8659, "step": 25444500 }, { "epoch": 73.65, "learning_rate": 1.3186789378010374e-05, "loss": 1.8781, "step": 25445000 }, { "epoch": 73.65, "learning_rate": 1.3186065730363098e-05, "loss": 1.8919, "step": 25445500 }, { "epoch": 73.66, "learning_rate": 1.3185343530011116e-05, "loss": 1.88, "step": 25446000 }, { "epoch": 73.66, "learning_rate": 1.3184619882363838e-05, "loss": 1.8991, "step": 25446500 }, { "epoch": 73.66, "learning_rate": 1.3183896234716562e-05, "loss": 1.8684, "step": 25447000 }, { "epoch": 73.66, "learning_rate": 1.3183172587069287e-05, "loss": 1.876, "step": 25447500 }, { "epoch": 73.66, "learning_rate": 1.318244893942201e-05, "loss": 1.8679, "step": 25448000 }, { "epoch": 73.66, "learning_rate": 1.3181725291774732e-05, "loss": 1.8566, "step": 25448500 }, { "epoch": 73.66, "learning_rate": 1.3181001644127456e-05, "loss": 1.8882, "step": 25449000 }, { "epoch": 73.67, "learning_rate": 1.3180277996480178e-05, "loss": 1.8882, "step": 25449500 }, { "epoch": 73.67, "learning_rate": 1.31795543488329e-05, "loss": 1.9084, "step": 25450000 }, { "epoch": 73.67, "learning_rate": 1.3178832148480919e-05, "loss": 1.8733, "step": 25450500 }, { "epoch": 73.67, "learning_rate": 1.3178108500833641e-05, "loss": 1.8928, "step": 25451000 }, { "epoch": 73.67, "learning_rate": 1.3177384853186367e-05, "loss": 1.8947, "step": 25451500 }, { "epoch": 73.67, "learning_rate": 1.317666120553909e-05, "loss": 1.8765, "step": 25452000 }, { "epoch": 73.67, "learning_rate": 1.3175937557891813e-05, "loss": 1.8644, "step": 25452500 }, { "epoch": 73.68, "learning_rate": 1.317521535753983e-05, "loss": 1.8694, "step": 25453000 }, { "epoch": 73.68, "learning_rate": 1.3174491709892553e-05, "loss": 1.89, "step": 25453500 }, { "epoch": 73.68, "learning_rate": 1.3173768062245277e-05, "loss": 1.835, "step": 25454000 }, { "epoch": 73.68, "learning_rate": 1.3173044414597999e-05, "loss": 1.8668, "step": 25454500 }, { "epoch": 73.68, "learning_rate": 1.3172320766950724e-05, "loss": 1.8712, "step": 25455000 }, { "epoch": 73.68, "learning_rate": 1.3171597119303447e-05, "loss": 1.9032, "step": 25455500 }, { "epoch": 73.68, "learning_rate": 1.317087347165617e-05, "loss": 1.8802, "step": 25456000 }, { "epoch": 73.69, "learning_rate": 1.3170149824008893e-05, "loss": 1.8613, "step": 25456500 }, { "epoch": 73.69, "learning_rate": 1.316942762365691e-05, "loss": 1.8674, "step": 25457000 }, { "epoch": 73.69, "learning_rate": 1.3168703976009632e-05, "loss": 1.8814, "step": 25457500 }, { "epoch": 73.69, "learning_rate": 1.3167980328362356e-05, "loss": 1.9132, "step": 25458000 }, { "epoch": 73.69, "learning_rate": 1.3167256680715082e-05, "loss": 1.854, "step": 25458500 }, { "epoch": 73.69, "learning_rate": 1.3166533033067804e-05, "loss": 1.8885, "step": 25459000 }, { "epoch": 73.69, "learning_rate": 1.3165810832715821e-05, "loss": 1.8624, "step": 25459500 }, { "epoch": 73.7, "learning_rate": 1.3165087185068545e-05, "loss": 1.8633, "step": 25460000 }, { "epoch": 73.7, "learning_rate": 1.3164363537421267e-05, "loss": 1.8832, "step": 25460500 }, { "epoch": 73.7, "learning_rate": 1.3163641337069285e-05, "loss": 1.8674, "step": 25461000 }, { "epoch": 73.7, "learning_rate": 1.3162917689422009e-05, "loss": 1.8919, "step": 25461500 }, { "epoch": 73.7, "learning_rate": 1.3162194041774731e-05, "loss": 1.8639, "step": 25462000 }, { "epoch": 73.7, "learning_rate": 1.3161470394127457e-05, "loss": 1.8919, "step": 25462500 }, { "epoch": 73.7, "learning_rate": 1.3160746746480179e-05, "loss": 1.8741, "step": 25463000 }, { "epoch": 73.71, "learning_rate": 1.3160023098832903e-05, "loss": 1.8761, "step": 25463500 }, { "epoch": 73.71, "learning_rate": 1.3159299451185625e-05, "loss": 1.874, "step": 25464000 }, { "epoch": 73.71, "learning_rate": 1.3158577250833642e-05, "loss": 1.889, "step": 25464500 }, { "epoch": 73.71, "learning_rate": 1.3157853603186366e-05, "loss": 1.8896, "step": 25465000 }, { "epoch": 73.71, "learning_rate": 1.3157129955539088e-05, "loss": 1.8513, "step": 25465500 }, { "epoch": 73.71, "learning_rate": 1.3156406307891814e-05, "loss": 1.8952, "step": 25466000 }, { "epoch": 73.72, "learning_rate": 1.3155682660244536e-05, "loss": 1.8839, "step": 25466500 }, { "epoch": 73.72, "learning_rate": 1.3154960459892553e-05, "loss": 1.8557, "step": 25467000 }, { "epoch": 73.72, "learning_rate": 1.3154236812245277e-05, "loss": 1.8698, "step": 25467500 }, { "epoch": 73.72, "learning_rate": 1.3153513164598e-05, "loss": 1.8814, "step": 25468000 }, { "epoch": 73.72, "learning_rate": 1.3152789516950722e-05, "loss": 1.8823, "step": 25468500 }, { "epoch": 73.72, "learning_rate": 1.3152065869303446e-05, "loss": 1.8484, "step": 25469000 }, { "epoch": 73.72, "learning_rate": 1.3151343668951463e-05, "loss": 1.8565, "step": 25469500 }, { "epoch": 73.73, "learning_rate": 1.3150620021304189e-05, "loss": 1.876, "step": 25470000 }, { "epoch": 73.73, "learning_rate": 1.3149896373656911e-05, "loss": 1.845, "step": 25470500 }, { "epoch": 73.73, "learning_rate": 1.3149172726009635e-05, "loss": 1.8764, "step": 25471000 }, { "epoch": 73.73, "learning_rate": 1.3148449078362357e-05, "loss": 1.8788, "step": 25471500 }, { "epoch": 73.73, "learning_rate": 1.314772543071508e-05, "loss": 1.8825, "step": 25472000 }, { "epoch": 73.73, "learning_rate": 1.3147001783067803e-05, "loss": 1.8814, "step": 25472500 }, { "epoch": 73.73, "learning_rate": 1.3146278135420525e-05, "loss": 1.8722, "step": 25473000 }, { "epoch": 73.74, "learning_rate": 1.3145555935068546e-05, "loss": 1.8726, "step": 25473500 }, { "epoch": 73.74, "learning_rate": 1.3144832287421268e-05, "loss": 1.8822, "step": 25474000 }, { "epoch": 73.74, "learning_rate": 1.3144108639773992e-05, "loss": 1.8729, "step": 25474500 }, { "epoch": 73.74, "learning_rate": 1.3143384992126714e-05, "loss": 1.868, "step": 25475000 }, { "epoch": 73.74, "learning_rate": 1.3142662791774732e-05, "loss": 1.8864, "step": 25475500 }, { "epoch": 73.74, "learning_rate": 1.3141939144127454e-05, "loss": 1.8736, "step": 25476000 }, { "epoch": 73.74, "learning_rate": 1.3141216943775473e-05, "loss": 1.8762, "step": 25476500 }, { "epoch": 73.75, "learning_rate": 1.3140493296128195e-05, "loss": 1.8697, "step": 25477000 }, { "epoch": 73.75, "learning_rate": 1.313976964848092e-05, "loss": 1.8745, "step": 25477500 }, { "epoch": 73.75, "learning_rate": 1.3139046000833643e-05, "loss": 1.8807, "step": 25478000 }, { "epoch": 73.75, "learning_rate": 1.3138323800481662e-05, "loss": 1.8988, "step": 25478500 }, { "epoch": 73.75, "learning_rate": 1.3137600152834384e-05, "loss": 1.8862, "step": 25479000 }, { "epoch": 73.75, "learning_rate": 1.3136876505187106e-05, "loss": 1.8766, "step": 25479500 }, { "epoch": 73.75, "learning_rate": 1.313615285753983e-05, "loss": 1.8612, "step": 25480000 }, { "epoch": 73.76, "learning_rate": 1.3135429209892553e-05, "loss": 1.8528, "step": 25480500 }, { "epoch": 73.76, "learning_rate": 1.313470700954057e-05, "loss": 1.8738, "step": 25481000 }, { "epoch": 73.76, "learning_rate": 1.3133983361893295e-05, "loss": 1.8617, "step": 25481500 }, { "epoch": 73.76, "learning_rate": 1.3133259714246018e-05, "loss": 1.8648, "step": 25482000 }, { "epoch": 73.76, "learning_rate": 1.3132536066598742e-05, "loss": 1.8672, "step": 25482500 }, { "epoch": 73.76, "learning_rate": 1.3131812418951464e-05, "loss": 1.8735, "step": 25483000 }, { "epoch": 73.76, "learning_rate": 1.3131090218599481e-05, "loss": 1.8889, "step": 25483500 }, { "epoch": 73.77, "learning_rate": 1.3130366570952205e-05, "loss": 1.8626, "step": 25484000 }, { "epoch": 73.77, "learning_rate": 1.3129642923304927e-05, "loss": 1.8701, "step": 25484500 }, { "epoch": 73.77, "learning_rate": 1.3128919275657653e-05, "loss": 1.8873, "step": 25485000 }, { "epoch": 73.77, "learning_rate": 1.3128195628010375e-05, "loss": 1.874, "step": 25485500 }, { "epoch": 73.77, "learning_rate": 1.3127471980363099e-05, "loss": 1.8886, "step": 25486000 }, { "epoch": 73.77, "learning_rate": 1.3126748332715821e-05, "loss": 1.894, "step": 25486500 }, { "epoch": 73.77, "learning_rate": 1.3126024685068543e-05, "loss": 1.8755, "step": 25487000 }, { "epoch": 73.78, "learning_rate": 1.3125302484716562e-05, "loss": 1.8614, "step": 25487500 }, { "epoch": 73.78, "learning_rate": 1.3124578837069285e-05, "loss": 1.857, "step": 25488000 }, { "epoch": 73.78, "learning_rate": 1.3123856636717302e-05, "loss": 1.8855, "step": 25488500 }, { "epoch": 73.78, "learning_rate": 1.3123132989070027e-05, "loss": 1.899, "step": 25489000 }, { "epoch": 73.78, "learning_rate": 1.3122409341422751e-05, "loss": 1.8757, "step": 25489500 }, { "epoch": 73.78, "learning_rate": 1.3121685693775474e-05, "loss": 1.8811, "step": 25490000 }, { "epoch": 73.78, "learning_rate": 1.3120963493423491e-05, "loss": 1.877, "step": 25490500 }, { "epoch": 73.79, "learning_rate": 1.3120239845776213e-05, "loss": 1.8612, "step": 25491000 }, { "epoch": 73.79, "learning_rate": 1.3119516198128937e-05, "loss": 1.8747, "step": 25491500 }, { "epoch": 73.79, "learning_rate": 1.311879255048166e-05, "loss": 1.8432, "step": 25492000 }, { "epoch": 73.79, "learning_rate": 1.3118068902834385e-05, "loss": 1.8655, "step": 25492500 }, { "epoch": 73.79, "learning_rate": 1.3117346702482402e-05, "loss": 1.8682, "step": 25493000 }, { "epoch": 73.79, "learning_rate": 1.311662450213042e-05, "loss": 1.913, "step": 25493500 }, { "epoch": 73.79, "learning_rate": 1.3115900854483143e-05, "loss": 1.8702, "step": 25494000 }, { "epoch": 73.8, "learning_rate": 1.3115177206835866e-05, "loss": 1.8719, "step": 25494500 }, { "epoch": 73.8, "learning_rate": 1.3114453559188588e-05, "loss": 1.8647, "step": 25495000 }, { "epoch": 73.8, "learning_rate": 1.3113729911541312e-05, "loss": 1.8954, "step": 25495500 }, { "epoch": 73.8, "learning_rate": 1.3113006263894034e-05, "loss": 1.8453, "step": 25496000 }, { "epoch": 73.8, "learning_rate": 1.311228261624676e-05, "loss": 1.8811, "step": 25496500 }, { "epoch": 73.8, "learning_rate": 1.3111558968599484e-05, "loss": 1.8879, "step": 25497000 }, { "epoch": 73.8, "learning_rate": 1.3110835320952206e-05, "loss": 1.8728, "step": 25497500 }, { "epoch": 73.81, "learning_rate": 1.3110111673304928e-05, "loss": 1.8689, "step": 25498000 }, { "epoch": 73.81, "learning_rate": 1.3109389472952945e-05, "loss": 1.8717, "step": 25498500 }, { "epoch": 73.81, "learning_rate": 1.310866582530567e-05, "loss": 1.8697, "step": 25499000 }, { "epoch": 73.81, "learning_rate": 1.3107943624953686e-05, "loss": 1.8922, "step": 25499500 }, { "epoch": 73.81, "learning_rate": 1.3107219977306409e-05, "loss": 1.8734, "step": 25500000 }, { "epoch": 73.81, "learning_rate": 1.3106496329659134e-05, "loss": 1.87, "step": 25500500 }, { "epoch": 73.81, "learning_rate": 1.3105772682011858e-05, "loss": 1.9036, "step": 25501000 }, { "epoch": 73.82, "learning_rate": 1.3105050481659875e-05, "loss": 1.8709, "step": 25501500 }, { "epoch": 73.82, "learning_rate": 1.3104326834012598e-05, "loss": 1.8479, "step": 25502000 }, { "epoch": 73.82, "learning_rate": 1.3103603186365322e-05, "loss": 1.8895, "step": 25502500 }, { "epoch": 73.82, "learning_rate": 1.3102879538718044e-05, "loss": 1.864, "step": 25503000 }, { "epoch": 73.82, "learning_rate": 1.3102155891070766e-05, "loss": 1.8872, "step": 25503500 }, { "epoch": 73.82, "learning_rate": 1.3101432243423492e-05, "loss": 1.8919, "step": 25504000 }, { "epoch": 73.83, "learning_rate": 1.3100708595776216e-05, "loss": 1.8838, "step": 25504500 }, { "epoch": 73.83, "learning_rate": 1.3099984948128938e-05, "loss": 1.8837, "step": 25505000 }, { "epoch": 73.83, "learning_rate": 1.309926130048166e-05, "loss": 1.8636, "step": 25505500 }, { "epoch": 73.83, "learning_rate": 1.3098537652834384e-05, "loss": 1.8907, "step": 25506000 }, { "epoch": 73.83, "learning_rate": 1.3097814005187106e-05, "loss": 1.8841, "step": 25506500 }, { "epoch": 73.83, "learning_rate": 1.3097090357539828e-05, "loss": 1.8481, "step": 25507000 }, { "epoch": 73.83, "learning_rate": 1.3096368157187847e-05, "loss": 1.8791, "step": 25507500 }, { "epoch": 73.84, "learning_rate": 1.3095644509540571e-05, "loss": 1.879, "step": 25508000 }, { "epoch": 73.84, "learning_rate": 1.3094920861893295e-05, "loss": 1.886, "step": 25508500 }, { "epoch": 73.84, "learning_rate": 1.3094197214246018e-05, "loss": 1.8925, "step": 25509000 }, { "epoch": 73.84, "learning_rate": 1.3093473566598741e-05, "loss": 1.8723, "step": 25509500 }, { "epoch": 73.84, "learning_rate": 1.3092751366246759e-05, "loss": 1.8863, "step": 25510000 }, { "epoch": 73.84, "learning_rate": 1.3092027718599481e-05, "loss": 1.8865, "step": 25510500 }, { "epoch": 73.84, "learning_rate": 1.3091304070952203e-05, "loss": 1.8798, "step": 25511000 }, { "epoch": 73.85, "learning_rate": 1.3090580423304929e-05, "loss": 1.8814, "step": 25511500 }, { "epoch": 73.85, "learning_rate": 1.3089856775657653e-05, "loss": 1.8636, "step": 25512000 }, { "epoch": 73.85, "learning_rate": 1.3089133128010375e-05, "loss": 1.8474, "step": 25512500 }, { "epoch": 73.85, "learning_rate": 1.3088410927658392e-05, "loss": 1.8873, "step": 25513000 }, { "epoch": 73.85, "learning_rate": 1.3087687280011116e-05, "loss": 1.8868, "step": 25513500 }, { "epoch": 73.85, "learning_rate": 1.3086963632363838e-05, "loss": 1.8934, "step": 25514000 }, { "epoch": 73.85, "learning_rate": 1.308623998471656e-05, "loss": 1.8814, "step": 25514500 }, { "epoch": 73.86, "learning_rate": 1.3085516337069286e-05, "loss": 1.8991, "step": 25515000 }, { "epoch": 73.86, "learning_rate": 1.3084794136717305e-05, "loss": 1.8735, "step": 25515500 }, { "epoch": 73.86, "learning_rate": 1.3084070489070027e-05, "loss": 1.8528, "step": 25516000 }, { "epoch": 73.86, "learning_rate": 1.308334684142275e-05, "loss": 1.8744, "step": 25516500 }, { "epoch": 73.86, "learning_rate": 1.3082623193775474e-05, "loss": 1.8883, "step": 25517000 }, { "epoch": 73.86, "learning_rate": 1.3081899546128196e-05, "loss": 1.899, "step": 25517500 }, { "epoch": 73.86, "learning_rate": 1.3081175898480918e-05, "loss": 1.8785, "step": 25518000 }, { "epoch": 73.87, "learning_rate": 1.3080453698128935e-05, "loss": 1.8619, "step": 25518500 }, { "epoch": 73.87, "learning_rate": 1.3079730050481661e-05, "loss": 1.8812, "step": 25519000 }, { "epoch": 73.87, "learning_rate": 1.3079006402834385e-05, "loss": 1.8931, "step": 25519500 }, { "epoch": 73.87, "learning_rate": 1.3078282755187107e-05, "loss": 1.8799, "step": 25520000 }, { "epoch": 73.87, "learning_rate": 1.3077559107539831e-05, "loss": 1.901, "step": 25520500 }, { "epoch": 73.87, "learning_rate": 1.3076835459892553e-05, "loss": 1.9221, "step": 25521000 }, { "epoch": 73.87, "learning_rate": 1.3076111812245275e-05, "loss": 1.8802, "step": 25521500 }, { "epoch": 73.88, "learning_rate": 1.3075388164598e-05, "loss": 1.893, "step": 25522000 }, { "epoch": 73.88, "learning_rate": 1.3074664516950725e-05, "loss": 1.8775, "step": 25522500 }, { "epoch": 73.88, "learning_rate": 1.3073940869303447e-05, "loss": 1.8879, "step": 25523000 }, { "epoch": 73.88, "learning_rate": 1.307321722165617e-05, "loss": 1.8792, "step": 25523500 }, { "epoch": 73.88, "learning_rate": 1.3072493574008893e-05, "loss": 1.9048, "step": 25524000 }, { "epoch": 73.88, "learning_rate": 1.3071769926361616e-05, "loss": 1.8809, "step": 25524500 }, { "epoch": 73.88, "learning_rate": 1.3071047726009633e-05, "loss": 1.8819, "step": 25525000 }, { "epoch": 73.89, "learning_rate": 1.307032552565765e-05, "loss": 1.8616, "step": 25525500 }, { "epoch": 73.89, "learning_rate": 1.3069601878010374e-05, "loss": 1.8711, "step": 25526000 }, { "epoch": 73.89, "learning_rate": 1.30688782303631e-05, "loss": 1.8745, "step": 25526500 }, { "epoch": 73.89, "learning_rate": 1.3068154582715822e-05, "loss": 1.8476, "step": 25527000 }, { "epoch": 73.89, "learning_rate": 1.306743238236384e-05, "loss": 1.8966, "step": 25527500 }, { "epoch": 73.89, "learning_rate": 1.3066708734716563e-05, "loss": 1.8355, "step": 25528000 }, { "epoch": 73.89, "learning_rate": 1.3065985087069285e-05, "loss": 1.8896, "step": 25528500 }, { "epoch": 73.9, "learning_rate": 1.3065261439422008e-05, "loss": 1.9021, "step": 25529000 }, { "epoch": 73.9, "learning_rate": 1.3064539239070025e-05, "loss": 1.8493, "step": 25529500 }, { "epoch": 73.9, "learning_rate": 1.3063817038718044e-05, "loss": 1.8726, "step": 25530000 }, { "epoch": 73.9, "learning_rate": 1.306309339107077e-05, "loss": 1.8814, "step": 25530500 }, { "epoch": 73.9, "learning_rate": 1.3062369743423492e-05, "loss": 1.8716, "step": 25531000 }, { "epoch": 73.9, "learning_rate": 1.3061646095776214e-05, "loss": 1.895, "step": 25531500 }, { "epoch": 73.9, "learning_rate": 1.3060922448128938e-05, "loss": 1.8743, "step": 25532000 }, { "epoch": 73.91, "learning_rate": 1.306019880048166e-05, "loss": 1.8907, "step": 25532500 }, { "epoch": 73.91, "learning_rate": 1.3059475152834382e-05, "loss": 1.8689, "step": 25533000 }, { "epoch": 73.91, "learning_rate": 1.3058752952482401e-05, "loss": 1.8511, "step": 25533500 }, { "epoch": 73.91, "learning_rate": 1.3058029304835127e-05, "loss": 1.848, "step": 25534000 }, { "epoch": 73.91, "learning_rate": 1.3057305657187849e-05, "loss": 1.8848, "step": 25534500 }, { "epoch": 73.91, "learning_rate": 1.3056582009540571e-05, "loss": 1.8937, "step": 25535000 }, { "epoch": 73.91, "learning_rate": 1.3055858361893295e-05, "loss": 1.8643, "step": 25535500 }, { "epoch": 73.92, "learning_rate": 1.3055134714246017e-05, "loss": 1.8899, "step": 25536000 }, { "epoch": 73.92, "learning_rate": 1.305441106659874e-05, "loss": 1.8772, "step": 25536500 }, { "epoch": 73.92, "learning_rate": 1.3053687418951464e-05, "loss": 1.8793, "step": 25537000 }, { "epoch": 73.92, "learning_rate": 1.3052965218599483e-05, "loss": 1.872, "step": 25537500 }, { "epoch": 73.92, "learning_rate": 1.3052241570952206e-05, "loss": 1.8646, "step": 25538000 }, { "epoch": 73.92, "learning_rate": 1.3051517923304929e-05, "loss": 1.8857, "step": 25538500 }, { "epoch": 73.92, "learning_rate": 1.3050795722952946e-05, "loss": 1.8488, "step": 25539000 }, { "epoch": 73.93, "learning_rate": 1.305007207530567e-05, "loss": 1.892, "step": 25539500 }, { "epoch": 73.93, "learning_rate": 1.3049348427658392e-05, "loss": 1.8542, "step": 25540000 }, { "epoch": 73.93, "learning_rate": 1.3048624780011114e-05, "loss": 1.9048, "step": 25540500 }, { "epoch": 73.93, "learning_rate": 1.3047901132363838e-05, "loss": 1.8655, "step": 25541000 }, { "epoch": 73.93, "learning_rate": 1.3047178932011859e-05, "loss": 1.8756, "step": 25541500 }, { "epoch": 73.93, "learning_rate": 1.3046455284364581e-05, "loss": 1.877, "step": 25542000 }, { "epoch": 73.94, "learning_rate": 1.3045731636717303e-05, "loss": 1.8785, "step": 25542500 }, { "epoch": 73.94, "learning_rate": 1.3045007989070027e-05, "loss": 1.8788, "step": 25543000 }, { "epoch": 73.94, "learning_rate": 1.304428434142275e-05, "loss": 1.8701, "step": 25543500 }, { "epoch": 73.94, "learning_rate": 1.3043560693775472e-05, "loss": 1.87, "step": 25544000 }, { "epoch": 73.94, "learning_rate": 1.3042837046128196e-05, "loss": 1.8883, "step": 25544500 }, { "epoch": 73.94, "learning_rate": 1.3042113398480921e-05, "loss": 1.8669, "step": 25545000 }, { "epoch": 73.94, "learning_rate": 1.3041389750833644e-05, "loss": 1.9055, "step": 25545500 }, { "epoch": 73.95, "learning_rate": 1.3040666103186366e-05, "loss": 1.8839, "step": 25546000 }, { "epoch": 73.95, "learning_rate": 1.303994245553909e-05, "loss": 1.8689, "step": 25546500 }, { "epoch": 73.95, "learning_rate": 1.3039218807891812e-05, "loss": 1.869, "step": 25547000 }, { "epoch": 73.95, "learning_rate": 1.303849660753983e-05, "loss": 1.8735, "step": 25547500 }, { "epoch": 73.95, "learning_rate": 1.3037774407187846e-05, "loss": 1.8824, "step": 25548000 }, { "epoch": 73.95, "learning_rate": 1.303705075954057e-05, "loss": 1.8865, "step": 25548500 }, { "epoch": 73.95, "learning_rate": 1.3036327111893296e-05, "loss": 1.8919, "step": 25549000 }, { "epoch": 73.96, "learning_rate": 1.3035603464246018e-05, "loss": 1.8819, "step": 25549500 }, { "epoch": 73.96, "learning_rate": 1.303487981659874e-05, "loss": 1.8677, "step": 25550000 }, { "epoch": 73.96, "learning_rate": 1.3034156168951464e-05, "loss": 1.8692, "step": 25550500 }, { "epoch": 73.96, "learning_rate": 1.3033433968599482e-05, "loss": 1.8773, "step": 25551000 }, { "epoch": 73.96, "learning_rate": 1.3032710320952204e-05, "loss": 1.8477, "step": 25551500 }, { "epoch": 73.96, "learning_rate": 1.3031986673304928e-05, "loss": 1.8667, "step": 25552000 }, { "epoch": 73.96, "learning_rate": 1.3031263025657653e-05, "loss": 1.8614, "step": 25552500 }, { "epoch": 73.97, "learning_rate": 1.3030539378010376e-05, "loss": 1.8813, "step": 25553000 }, { "epoch": 73.97, "learning_rate": 1.3029815730363098e-05, "loss": 1.8667, "step": 25553500 }, { "epoch": 73.97, "learning_rate": 1.3029093530011117e-05, "loss": 1.8947, "step": 25554000 }, { "epoch": 73.97, "learning_rate": 1.3028369882363839e-05, "loss": 1.8547, "step": 25554500 }, { "epoch": 73.97, "learning_rate": 1.3027647682011856e-05, "loss": 1.9198, "step": 25555000 }, { "epoch": 73.97, "learning_rate": 1.3026924034364579e-05, "loss": 1.8592, "step": 25555500 }, { "epoch": 73.97, "learning_rate": 1.3026200386717302e-05, "loss": 1.8996, "step": 25556000 }, { "epoch": 73.98, "learning_rate": 1.3025476739070028e-05, "loss": 1.8892, "step": 25556500 }, { "epoch": 73.98, "learning_rate": 1.302475309142275e-05, "loss": 1.8617, "step": 25557000 }, { "epoch": 73.98, "learning_rate": 1.3024030891070768e-05, "loss": 1.8744, "step": 25557500 }, { "epoch": 73.98, "learning_rate": 1.3023307243423491e-05, "loss": 1.8756, "step": 25558000 }, { "epoch": 73.98, "learning_rate": 1.3022583595776214e-05, "loss": 1.8669, "step": 25558500 }, { "epoch": 73.98, "learning_rate": 1.3021859948128936e-05, "loss": 1.8748, "step": 25559000 }, { "epoch": 73.98, "learning_rate": 1.302113630048166e-05, "loss": 1.8615, "step": 25559500 }, { "epoch": 73.99, "learning_rate": 1.3020412652834386e-05, "loss": 1.8846, "step": 25560000 }, { "epoch": 73.99, "learning_rate": 1.3019689005187108e-05, "loss": 1.8722, "step": 25560500 }, { "epoch": 73.99, "learning_rate": 1.301896535753983e-05, "loss": 1.875, "step": 25561000 }, { "epoch": 73.99, "learning_rate": 1.3018241709892554e-05, "loss": 1.8676, "step": 25561500 }, { "epoch": 73.99, "learning_rate": 1.3017519509540571e-05, "loss": 1.884, "step": 25562000 }, { "epoch": 73.99, "learning_rate": 1.3016795861893293e-05, "loss": 1.8751, "step": 25562500 }, { "epoch": 73.99, "learning_rate": 1.3016072214246017e-05, "loss": 1.8725, "step": 25563000 }, { "epoch": 74.0, "learning_rate": 1.301534856659874e-05, "loss": 1.8565, "step": 25563500 }, { "epoch": 74.0, "learning_rate": 1.3014624918951465e-05, "loss": 1.8819, "step": 25564000 }, { "epoch": 74.0, "learning_rate": 1.3013901271304187e-05, "loss": 1.865, "step": 25564500 }, { "epoch": 74.0, "eval_accuracy": 0.683905754035801, "eval_accuracy_mlm": 0.6517238829352281, "eval_accuracy_nsp": 0.8564321495190038, "eval_loss": 2.1691441535949707, "eval_runtime": 331.9916, "eval_samples_per_second": 1314.449, "eval_steps_per_second": 54.769, "step": 25564928 }, { "epoch": 74.0, "learning_rate": 1.3013177623656911e-05, "loss": 1.8766, "step": 25565000 }, { "epoch": 74.0, "learning_rate": 1.3012453976009634e-05, "loss": 1.8836, "step": 25565500 }, { "epoch": 74.0, "learning_rate": 1.301173177565765e-05, "loss": 1.8374, "step": 25566000 }, { "epoch": 74.0, "learning_rate": 1.3011008128010375e-05, "loss": 1.8638, "step": 25566500 }, { "epoch": 74.01, "learning_rate": 1.3010285927658392e-05, "loss": 1.8539, "step": 25567000 }, { "epoch": 74.01, "learning_rate": 1.3009562280011118e-05, "loss": 1.8854, "step": 25567500 }, { "epoch": 74.01, "learning_rate": 1.3008840079659135e-05, "loss": 1.8779, "step": 25568000 }, { "epoch": 74.01, "learning_rate": 1.3008117879307152e-05, "loss": 1.8714, "step": 25568500 }, { "epoch": 74.01, "learning_rate": 1.3007394231659874e-05, "loss": 1.8568, "step": 25569000 }, { "epoch": 74.01, "learning_rate": 1.3006670584012598e-05, "loss": 1.8676, "step": 25569500 }, { "epoch": 74.01, "learning_rate": 1.3005948383660616e-05, "loss": 1.8523, "step": 25570000 }, { "epoch": 74.02, "learning_rate": 1.3005224736013338e-05, "loss": 1.8602, "step": 25570500 }, { "epoch": 74.02, "learning_rate": 1.3004501088366062e-05, "loss": 1.8691, "step": 25571000 }, { "epoch": 74.02, "learning_rate": 1.3003777440718784e-05, "loss": 1.8772, "step": 25571500 }, { "epoch": 74.02, "learning_rate": 1.300305379307151e-05, "loss": 1.8802, "step": 25572000 }, { "epoch": 74.02, "learning_rate": 1.3002330145424232e-05, "loss": 1.8555, "step": 25572500 }, { "epoch": 74.02, "learning_rate": 1.3001606497776956e-05, "loss": 1.8829, "step": 25573000 }, { "epoch": 74.02, "learning_rate": 1.3000884297424973e-05, "loss": 1.8468, "step": 25573500 }, { "epoch": 74.03, "learning_rate": 1.3000160649777695e-05, "loss": 1.8312, "step": 25574000 }, { "epoch": 74.03, "learning_rate": 1.2999437002130419e-05, "loss": 1.8562, "step": 25574500 }, { "epoch": 74.03, "learning_rate": 1.2998713354483141e-05, "loss": 1.8692, "step": 25575000 }, { "epoch": 74.03, "learning_rate": 1.2997989706835867e-05, "loss": 1.8611, "step": 25575500 }, { "epoch": 74.03, "learning_rate": 1.299726605918859e-05, "loss": 1.856, "step": 25576000 }, { "epoch": 74.03, "learning_rate": 1.2996542411541313e-05, "loss": 1.8584, "step": 25576500 }, { "epoch": 74.03, "learning_rate": 1.2995818763894035e-05, "loss": 1.8724, "step": 25577000 }, { "epoch": 74.04, "learning_rate": 1.2995096563542053e-05, "loss": 1.8901, "step": 25577500 }, { "epoch": 74.04, "learning_rate": 1.2994372915894777e-05, "loss": 1.8719, "step": 25578000 }, { "epoch": 74.04, "learning_rate": 1.2993649268247499e-05, "loss": 1.87, "step": 25578500 }, { "epoch": 74.04, "learning_rate": 1.2992925620600224e-05, "loss": 1.8725, "step": 25579000 }, { "epoch": 74.04, "learning_rate": 1.2992201972952947e-05, "loss": 1.8593, "step": 25579500 }, { "epoch": 74.04, "learning_rate": 1.299147832530567e-05, "loss": 1.852, "step": 25580000 }, { "epoch": 74.05, "learning_rate": 1.2990754677658393e-05, "loss": 1.8833, "step": 25580500 }, { "epoch": 74.05, "learning_rate": 1.299003247730641e-05, "loss": 1.8798, "step": 25581000 }, { "epoch": 74.05, "learning_rate": 1.2989308829659132e-05, "loss": 1.8426, "step": 25581500 }, { "epoch": 74.05, "learning_rate": 1.2988585182011856e-05, "loss": 1.8753, "step": 25582000 }, { "epoch": 74.05, "learning_rate": 1.2987861534364578e-05, "loss": 1.8586, "step": 25582500 }, { "epoch": 74.05, "learning_rate": 1.2987137886717304e-05, "loss": 1.8587, "step": 25583000 }, { "epoch": 74.05, "learning_rate": 1.2986414239070028e-05, "loss": 1.8657, "step": 25583500 }, { "epoch": 74.06, "learning_rate": 1.298569059142275e-05, "loss": 1.8668, "step": 25584000 }, { "epoch": 74.06, "learning_rate": 1.2984966943775472e-05, "loss": 1.87, "step": 25584500 }, { "epoch": 74.06, "learning_rate": 1.2984243296128196e-05, "loss": 1.8409, "step": 25585000 }, { "epoch": 74.06, "learning_rate": 1.2983519648480919e-05, "loss": 1.8457, "step": 25585500 }, { "epoch": 74.06, "learning_rate": 1.2982796000833641e-05, "loss": 1.8714, "step": 25586000 }, { "epoch": 74.06, "learning_rate": 1.2982073800481661e-05, "loss": 1.8781, "step": 25586500 }, { "epoch": 74.06, "learning_rate": 1.2981350152834384e-05, "loss": 1.8858, "step": 25587000 }, { "epoch": 74.07, "learning_rate": 1.2980626505187108e-05, "loss": 1.8781, "step": 25587500 }, { "epoch": 74.07, "learning_rate": 1.297990285753983e-05, "loss": 1.8695, "step": 25588000 }, { "epoch": 74.07, "learning_rate": 1.2979179209892554e-05, "loss": 1.8559, "step": 25588500 }, { "epoch": 74.07, "learning_rate": 1.2978455562245276e-05, "loss": 1.8752, "step": 25589000 }, { "epoch": 74.07, "learning_rate": 1.2977731914597998e-05, "loss": 1.8637, "step": 25589500 }, { "epoch": 74.07, "learning_rate": 1.2977008266950724e-05, "loss": 1.8744, "step": 25590000 }, { "epoch": 74.07, "learning_rate": 1.2976284619303448e-05, "loss": 1.8856, "step": 25590500 }, { "epoch": 74.08, "learning_rate": 1.297556097165617e-05, "loss": 1.8438, "step": 25591000 }, { "epoch": 74.08, "learning_rate": 1.2974837324008892e-05, "loss": 1.884, "step": 25591500 }, { "epoch": 74.08, "learning_rate": 1.2974113676361616e-05, "loss": 1.8512, "step": 25592000 }, { "epoch": 74.08, "learning_rate": 1.2973390028714338e-05, "loss": 1.8728, "step": 25592500 }, { "epoch": 74.08, "learning_rate": 1.2972667828362356e-05, "loss": 1.8617, "step": 25593000 }, { "epoch": 74.08, "learning_rate": 1.2971944180715081e-05, "loss": 1.8577, "step": 25593500 }, { "epoch": 74.08, "learning_rate": 1.2971220533067804e-05, "loss": 1.8476, "step": 25594000 }, { "epoch": 74.09, "learning_rate": 1.2970498332715823e-05, "loss": 1.8538, "step": 25594500 }, { "epoch": 74.09, "learning_rate": 1.296977613236384e-05, "loss": 1.8806, "step": 25595000 }, { "epoch": 74.09, "learning_rate": 1.2969052484716562e-05, "loss": 1.8718, "step": 25595500 }, { "epoch": 74.09, "learning_rate": 1.2968328837069286e-05, "loss": 1.8453, "step": 25596000 }, { "epoch": 74.09, "learning_rate": 1.2967605189422008e-05, "loss": 1.846, "step": 25596500 }, { "epoch": 74.09, "learning_rate": 1.296688154177473e-05, "loss": 1.8563, "step": 25597000 }, { "epoch": 74.09, "learning_rate": 1.2966159341422751e-05, "loss": 1.8752, "step": 25597500 }, { "epoch": 74.1, "learning_rate": 1.2965435693775473e-05, "loss": 1.8769, "step": 25598000 }, { "epoch": 74.1, "learning_rate": 1.2964712046128197e-05, "loss": 1.8646, "step": 25598500 }, { "epoch": 74.1, "learning_rate": 1.296398839848092e-05, "loss": 1.8668, "step": 25599000 }, { "epoch": 74.1, "learning_rate": 1.2963264750833642e-05, "loss": 1.8663, "step": 25599500 }, { "epoch": 74.1, "learning_rate": 1.2962541103186366e-05, "loss": 1.8639, "step": 25600000 }, { "epoch": 74.1, "learning_rate": 1.2961817455539088e-05, "loss": 1.8522, "step": 25600500 }, { "epoch": 74.1, "learning_rate": 1.2961093807891813e-05, "loss": 1.8422, "step": 25601000 }, { "epoch": 74.11, "learning_rate": 1.2960370160244537e-05, "loss": 1.8807, "step": 25601500 }, { "epoch": 74.11, "learning_rate": 1.2959647959892555e-05, "loss": 1.8776, "step": 25602000 }, { "epoch": 74.11, "learning_rate": 1.2958924312245277e-05, "loss": 1.863, "step": 25602500 }, { "epoch": 74.11, "learning_rate": 1.2958200664597999e-05, "loss": 1.8595, "step": 25603000 }, { "epoch": 74.11, "learning_rate": 1.2957477016950723e-05, "loss": 1.861, "step": 25603500 }, { "epoch": 74.11, "learning_rate": 1.2956753369303445e-05, "loss": 1.8579, "step": 25604000 }, { "epoch": 74.11, "learning_rate": 1.2956029721656167e-05, "loss": 1.862, "step": 25604500 }, { "epoch": 74.12, "learning_rate": 1.2955307521304188e-05, "loss": 1.8644, "step": 25605000 }, { "epoch": 74.12, "learning_rate": 1.2954583873656912e-05, "loss": 1.8587, "step": 25605500 }, { "epoch": 74.12, "learning_rate": 1.2953860226009634e-05, "loss": 1.8592, "step": 25606000 }, { "epoch": 74.12, "learning_rate": 1.2953136578362356e-05, "loss": 1.8631, "step": 25606500 }, { "epoch": 74.12, "learning_rate": 1.2952414378010375e-05, "loss": 1.8393, "step": 25607000 }, { "epoch": 74.12, "learning_rate": 1.2951690730363098e-05, "loss": 1.8708, "step": 25607500 }, { "epoch": 74.12, "learning_rate": 1.295096708271582e-05, "loss": 1.9054, "step": 25608000 }, { "epoch": 74.13, "learning_rate": 1.2950243435068546e-05, "loss": 1.8491, "step": 25608500 }, { "epoch": 74.13, "learning_rate": 1.294951978742127e-05, "loss": 1.8563, "step": 25609000 }, { "epoch": 74.13, "learning_rate": 1.2948796139773992e-05, "loss": 1.8621, "step": 25609500 }, { "epoch": 74.13, "learning_rate": 1.2948072492126714e-05, "loss": 1.861, "step": 25610000 }, { "epoch": 74.13, "learning_rate": 1.2947350291774731e-05, "loss": 1.8619, "step": 25610500 }, { "epoch": 74.13, "learning_rate": 1.2946626644127455e-05, "loss": 1.8591, "step": 25611000 }, { "epoch": 74.13, "learning_rate": 1.2945902996480177e-05, "loss": 1.8449, "step": 25611500 }, { "epoch": 74.14, "learning_rate": 1.29451793488329e-05, "loss": 1.85, "step": 25612000 }, { "epoch": 74.14, "learning_rate": 1.2944455701185625e-05, "loss": 1.8803, "step": 25612500 }, { "epoch": 74.14, "learning_rate": 1.2943732053538349e-05, "loss": 1.8382, "step": 25613000 }, { "epoch": 74.14, "learning_rate": 1.2943008405891071e-05, "loss": 1.8672, "step": 25613500 }, { "epoch": 74.14, "learning_rate": 1.2942284758243795e-05, "loss": 1.8569, "step": 25614000 }, { "epoch": 74.14, "learning_rate": 1.2941561110596518e-05, "loss": 1.8729, "step": 25614500 }, { "epoch": 74.14, "learning_rate": 1.294083746294924e-05, "loss": 1.8664, "step": 25615000 }, { "epoch": 74.15, "learning_rate": 1.2940113815301964e-05, "loss": 1.8779, "step": 25615500 }, { "epoch": 74.15, "learning_rate": 1.293939016765469e-05, "loss": 1.8822, "step": 25616000 }, { "epoch": 74.15, "learning_rate": 1.2938666520007412e-05, "loss": 1.8537, "step": 25616500 }, { "epoch": 74.15, "learning_rate": 1.2937942872360134e-05, "loss": 1.872, "step": 25617000 }, { "epoch": 74.15, "learning_rate": 1.2937219224712858e-05, "loss": 1.8615, "step": 25617500 }, { "epoch": 74.15, "learning_rate": 1.293649557706558e-05, "loss": 1.8534, "step": 25618000 }, { "epoch": 74.16, "learning_rate": 1.2935773376713597e-05, "loss": 1.8584, "step": 25618500 }, { "epoch": 74.16, "learning_rate": 1.2935049729066321e-05, "loss": 1.8659, "step": 25619000 }, { "epoch": 74.16, "learning_rate": 1.2934326081419047e-05, "loss": 1.8656, "step": 25619500 }, { "epoch": 74.16, "learning_rate": 1.2933602433771769e-05, "loss": 1.8892, "step": 25620000 }, { "epoch": 74.16, "learning_rate": 1.2932880233419786e-05, "loss": 1.8941, "step": 25620500 }, { "epoch": 74.16, "learning_rate": 1.2932156585772508e-05, "loss": 1.846, "step": 25621000 }, { "epoch": 74.16, "learning_rate": 1.2931432938125232e-05, "loss": 1.8795, "step": 25621500 }, { "epoch": 74.17, "learning_rate": 1.2930709290477955e-05, "loss": 1.8984, "step": 25622000 }, { "epoch": 74.17, "learning_rate": 1.2929985642830677e-05, "loss": 1.8795, "step": 25622500 }, { "epoch": 74.17, "learning_rate": 1.2929261995183402e-05, "loss": 1.8778, "step": 25623000 }, { "epoch": 74.17, "learning_rate": 1.2928538347536126e-05, "loss": 1.8631, "step": 25623500 }, { "epoch": 74.17, "learning_rate": 1.2927814699888849e-05, "loss": 1.8671, "step": 25624000 }, { "epoch": 74.17, "learning_rate": 1.2927092499536866e-05, "loss": 1.8706, "step": 25624500 }, { "epoch": 74.17, "learning_rate": 1.2926370299184883e-05, "loss": 1.8617, "step": 25625000 }, { "epoch": 74.18, "learning_rate": 1.2925646651537607e-05, "loss": 1.8757, "step": 25625500 }, { "epoch": 74.18, "learning_rate": 1.292492300389033e-05, "loss": 1.8816, "step": 25626000 }, { "epoch": 74.18, "learning_rate": 1.2924199356243053e-05, "loss": 1.8578, "step": 25626500 }, { "epoch": 74.18, "learning_rate": 1.2923475708595779e-05, "loss": 1.8654, "step": 25627000 }, { "epoch": 74.18, "learning_rate": 1.2922753508243796e-05, "loss": 1.8865, "step": 25627500 }, { "epoch": 74.18, "learning_rate": 1.2922029860596518e-05, "loss": 1.879, "step": 25628000 }, { "epoch": 74.18, "learning_rate": 1.292130621294924e-05, "loss": 1.8936, "step": 25628500 }, { "epoch": 74.19, "learning_rate": 1.2920582565301964e-05, "loss": 1.8585, "step": 25629000 }, { "epoch": 74.19, "learning_rate": 1.2919858917654687e-05, "loss": 1.8535, "step": 25629500 }, { "epoch": 74.19, "learning_rate": 1.2919136717302704e-05, "loss": 1.8578, "step": 25630000 }, { "epoch": 74.19, "learning_rate": 1.2918413069655428e-05, "loss": 1.8972, "step": 25630500 }, { "epoch": 74.19, "learning_rate": 1.2917689422008154e-05, "loss": 1.8834, "step": 25631000 }, { "epoch": 74.19, "learning_rate": 1.2916965774360876e-05, "loss": 1.8353, "step": 25631500 }, { "epoch": 74.19, "learning_rate": 1.2916242126713598e-05, "loss": 1.8555, "step": 25632000 }, { "epoch": 74.2, "learning_rate": 1.2915518479066322e-05, "loss": 1.8533, "step": 25632500 }, { "epoch": 74.2, "learning_rate": 1.2914794831419044e-05, "loss": 1.8585, "step": 25633000 }, { "epoch": 74.2, "learning_rate": 1.2914071183771766e-05, "loss": 1.8714, "step": 25633500 }, { "epoch": 74.2, "learning_rate": 1.291334753612449e-05, "loss": 1.8723, "step": 25634000 }, { "epoch": 74.2, "learning_rate": 1.2912623888477216e-05, "loss": 1.8809, "step": 25634500 }, { "epoch": 74.2, "learning_rate": 1.2911901688125233e-05, "loss": 1.8586, "step": 25635000 }, { "epoch": 74.2, "learning_rate": 1.2911178040477955e-05, "loss": 1.8758, "step": 25635500 }, { "epoch": 74.21, "learning_rate": 1.291045439283068e-05, "loss": 1.8724, "step": 25636000 }, { "epoch": 74.21, "learning_rate": 1.2909730745183402e-05, "loss": 1.849, "step": 25636500 }, { "epoch": 74.21, "learning_rate": 1.2909008544831419e-05, "loss": 1.8768, "step": 25637000 }, { "epoch": 74.21, "learning_rate": 1.2908284897184143e-05, "loss": 1.8423, "step": 25637500 }, { "epoch": 74.21, "learning_rate": 1.2907561249536865e-05, "loss": 1.8815, "step": 25638000 }, { "epoch": 74.21, "learning_rate": 1.290683760188959e-05, "loss": 1.8733, "step": 25638500 }, { "epoch": 74.21, "learning_rate": 1.2906113954242313e-05, "loss": 1.8769, "step": 25639000 }, { "epoch": 74.22, "learning_rate": 1.2905390306595037e-05, "loss": 1.8551, "step": 25639500 }, { "epoch": 74.22, "learning_rate": 1.2904666658947759e-05, "loss": 1.8807, "step": 25640000 }, { "epoch": 74.22, "learning_rate": 1.2903943011300481e-05, "loss": 1.8362, "step": 25640500 }, { "epoch": 74.22, "learning_rate": 1.2903219363653205e-05, "loss": 1.8615, "step": 25641000 }, { "epoch": 74.22, "learning_rate": 1.290249571600593e-05, "loss": 1.8538, "step": 25641500 }, { "epoch": 74.22, "learning_rate": 1.2901772068358653e-05, "loss": 1.8715, "step": 25642000 }, { "epoch": 74.22, "learning_rate": 1.2901048420711375e-05, "loss": 1.8426, "step": 25642500 }, { "epoch": 74.23, "learning_rate": 1.29003247730641e-05, "loss": 1.8711, "step": 25643000 }, { "epoch": 74.23, "learning_rate": 1.2899602572712116e-05, "loss": 1.8671, "step": 25643500 }, { "epoch": 74.23, "learning_rate": 1.2898878925064839e-05, "loss": 1.8627, "step": 25644000 }, { "epoch": 74.23, "learning_rate": 1.2898155277417563e-05, "loss": 1.8687, "step": 25644500 }, { "epoch": 74.23, "learning_rate": 1.289743307706558e-05, "loss": 1.8507, "step": 25645000 }, { "epoch": 74.23, "learning_rate": 1.2896710876713597e-05, "loss": 1.8536, "step": 25645500 }, { "epoch": 74.23, "learning_rate": 1.2895987229066323e-05, "loss": 1.8822, "step": 25646000 }, { "epoch": 74.24, "learning_rate": 1.2895263581419045e-05, "loss": 1.842, "step": 25646500 }, { "epoch": 74.24, "learning_rate": 1.2894539933771769e-05, "loss": 1.8745, "step": 25647000 }, { "epoch": 74.24, "learning_rate": 1.2893816286124491e-05, "loss": 1.892, "step": 25647500 }, { "epoch": 74.24, "learning_rate": 1.2893092638477213e-05, "loss": 1.8604, "step": 25648000 }, { "epoch": 74.24, "learning_rate": 1.2892368990829937e-05, "loss": 1.8566, "step": 25648500 }, { "epoch": 74.24, "learning_rate": 1.289164534318266e-05, "loss": 1.873, "step": 25649000 }, { "epoch": 74.24, "learning_rate": 1.2890921695535385e-05, "loss": 1.8613, "step": 25649500 }, { "epoch": 74.25, "learning_rate": 1.2890198047888107e-05, "loss": 1.8793, "step": 25650000 }, { "epoch": 74.25, "learning_rate": 1.2889474400240831e-05, "loss": 1.8848, "step": 25650500 }, { "epoch": 74.25, "learning_rate": 1.2888753647184144e-05, "loss": 1.8684, "step": 25651000 }, { "epoch": 74.25, "learning_rate": 1.2888029999536866e-05, "loss": 1.8614, "step": 25651500 }, { "epoch": 74.25, "learning_rate": 1.2887306351889588e-05, "loss": 1.8901, "step": 25652000 }, { "epoch": 74.25, "learning_rate": 1.2886582704242312e-05, "loss": 1.859, "step": 25652500 }, { "epoch": 74.25, "learning_rate": 1.2885859056595038e-05, "loss": 1.8816, "step": 25653000 }, { "epoch": 74.26, "learning_rate": 1.288513540894776e-05, "loss": 1.8775, "step": 25653500 }, { "epoch": 74.26, "learning_rate": 1.2884413208595777e-05, "loss": 1.87, "step": 25654000 }, { "epoch": 74.26, "learning_rate": 1.2883689560948501e-05, "loss": 1.8511, "step": 25654500 }, { "epoch": 74.26, "learning_rate": 1.2882965913301223e-05, "loss": 1.8875, "step": 25655000 }, { "epoch": 74.26, "learning_rate": 1.2882242265653945e-05, "loss": 1.872, "step": 25655500 }, { "epoch": 74.26, "learning_rate": 1.288151861800667e-05, "loss": 1.8887, "step": 25656000 }, { "epoch": 74.27, "learning_rate": 1.2880794970359392e-05, "loss": 1.879, "step": 25656500 }, { "epoch": 74.27, "learning_rate": 1.2880071322712117e-05, "loss": 1.8574, "step": 25657000 }, { "epoch": 74.27, "learning_rate": 1.287934767506484e-05, "loss": 1.8656, "step": 25657500 }, { "epoch": 74.27, "learning_rate": 1.2878624027417563e-05, "loss": 1.8684, "step": 25658000 }, { "epoch": 74.27, "learning_rate": 1.2877900379770286e-05, "loss": 1.8387, "step": 25658500 }, { "epoch": 74.27, "learning_rate": 1.2877176732123008e-05, "loss": 1.8701, "step": 25659000 }, { "epoch": 74.27, "learning_rate": 1.2876454531771027e-05, "loss": 1.8678, "step": 25659500 }, { "epoch": 74.28, "learning_rate": 1.2875730884123749e-05, "loss": 1.8694, "step": 25660000 }, { "epoch": 74.28, "learning_rate": 1.2875007236476475e-05, "loss": 1.85, "step": 25660500 }, { "epoch": 74.28, "learning_rate": 1.2874283588829197e-05, "loss": 1.8828, "step": 25661000 }, { "epoch": 74.28, "learning_rate": 1.287355994118192e-05, "loss": 1.8576, "step": 25661500 }, { "epoch": 74.28, "learning_rate": 1.2872836293534643e-05, "loss": 1.8716, "step": 25662000 }, { "epoch": 74.28, "learning_rate": 1.2872112645887365e-05, "loss": 1.8851, "step": 25662500 }, { "epoch": 74.28, "learning_rate": 1.287138899824009e-05, "loss": 1.8652, "step": 25663000 }, { "epoch": 74.29, "learning_rate": 1.2870666797888106e-05, "loss": 1.8503, "step": 25663500 }, { "epoch": 74.29, "learning_rate": 1.2869943150240832e-05, "loss": 1.8456, "step": 25664000 }, { "epoch": 74.29, "learning_rate": 1.286922094988885e-05, "loss": 1.8925, "step": 25664500 }, { "epoch": 74.29, "learning_rate": 1.2868497302241572e-05, "loss": 1.8656, "step": 25665000 }, { "epoch": 74.29, "learning_rate": 1.2867773654594295e-05, "loss": 1.8586, "step": 25665500 }, { "epoch": 74.29, "learning_rate": 1.2867050006947018e-05, "loss": 1.8586, "step": 25666000 }, { "epoch": 74.29, "learning_rate": 1.2866327806595035e-05, "loss": 1.8578, "step": 25666500 }, { "epoch": 74.3, "learning_rate": 1.2865605606243052e-05, "loss": 1.8479, "step": 25667000 }, { "epoch": 74.3, "learning_rate": 1.2864881958595776e-05, "loss": 1.8989, "step": 25667500 }, { "epoch": 74.3, "learning_rate": 1.2864158310948502e-05, "loss": 1.8891, "step": 25668000 }, { "epoch": 74.3, "learning_rate": 1.2863434663301224e-05, "loss": 1.879, "step": 25668500 }, { "epoch": 74.3, "learning_rate": 1.2862711015653948e-05, "loss": 1.8785, "step": 25669000 }, { "epoch": 74.3, "learning_rate": 1.286198736800667e-05, "loss": 1.8729, "step": 25669500 }, { "epoch": 74.3, "learning_rate": 1.2861263720359392e-05, "loss": 1.8579, "step": 25670000 }, { "epoch": 74.31, "learning_rate": 1.286054152000741e-05, "loss": 1.8699, "step": 25670500 }, { "epoch": 74.31, "learning_rate": 1.2859819319655429e-05, "loss": 1.8759, "step": 25671000 }, { "epoch": 74.31, "learning_rate": 1.285909567200815e-05, "loss": 1.8607, "step": 25671500 }, { "epoch": 74.31, "learning_rate": 1.2858372024360876e-05, "loss": 1.8896, "step": 25672000 }, { "epoch": 74.31, "learning_rate": 1.2857648376713599e-05, "loss": 1.866, "step": 25672500 }, { "epoch": 74.31, "learning_rate": 1.2856924729066323e-05, "loss": 1.8707, "step": 25673000 }, { "epoch": 74.31, "learning_rate": 1.2856201081419045e-05, "loss": 1.8633, "step": 25673500 }, { "epoch": 74.32, "learning_rate": 1.2855477433771767e-05, "loss": 1.8752, "step": 25674000 }, { "epoch": 74.32, "learning_rate": 1.2854753786124491e-05, "loss": 1.8634, "step": 25674500 }, { "epoch": 74.32, "learning_rate": 1.2854030138477213e-05, "loss": 1.8539, "step": 25675000 }, { "epoch": 74.32, "learning_rate": 1.2853306490829939e-05, "loss": 1.8846, "step": 25675500 }, { "epoch": 74.32, "learning_rate": 1.2852584290477956e-05, "loss": 1.8624, "step": 25676000 }, { "epoch": 74.32, "learning_rate": 1.285186064283068e-05, "loss": 1.8563, "step": 25676500 }, { "epoch": 74.32, "learning_rate": 1.2851136995183402e-05, "loss": 1.8674, "step": 25677000 }, { "epoch": 74.33, "learning_rate": 1.2850413347536124e-05, "loss": 1.8831, "step": 25677500 }, { "epoch": 74.33, "learning_rate": 1.2849689699888848e-05, "loss": 1.8746, "step": 25678000 }, { "epoch": 74.33, "learning_rate": 1.284896605224157e-05, "loss": 1.8682, "step": 25678500 }, { "epoch": 74.33, "learning_rate": 1.2848242404594296e-05, "loss": 1.8757, "step": 25679000 }, { "epoch": 74.33, "learning_rate": 1.2847518756947018e-05, "loss": 1.8754, "step": 25679500 }, { "epoch": 74.33, "learning_rate": 1.2846795109299742e-05, "loss": 1.8552, "step": 25680000 }, { "epoch": 74.33, "learning_rate": 1.2846071461652465e-05, "loss": 1.8785, "step": 25680500 }, { "epoch": 74.34, "learning_rate": 1.2845349261300482e-05, "loss": 1.8723, "step": 25681000 }, { "epoch": 74.34, "learning_rate": 1.2844625613653206e-05, "loss": 1.8607, "step": 25681500 }, { "epoch": 74.34, "learning_rate": 1.2843901966005928e-05, "loss": 1.8916, "step": 25682000 }, { "epoch": 74.34, "learning_rate": 1.284317831835865e-05, "loss": 1.9049, "step": 25682500 }, { "epoch": 74.34, "learning_rate": 1.2842454670711376e-05, "loss": 1.8837, "step": 25683000 }, { "epoch": 74.34, "learning_rate": 1.28417310230641e-05, "loss": 1.8582, "step": 25683500 }, { "epoch": 74.34, "learning_rate": 1.2841007375416822e-05, "loss": 1.8933, "step": 25684000 }, { "epoch": 74.35, "learning_rate": 1.2840283727769544e-05, "loss": 1.8792, "step": 25684500 }, { "epoch": 74.35, "learning_rate": 1.2839562974712857e-05, "loss": 1.8868, "step": 25685000 }, { "epoch": 74.35, "learning_rate": 1.283883932706558e-05, "loss": 1.8663, "step": 25685500 }, { "epoch": 74.35, "learning_rate": 1.2838115679418303e-05, "loss": 1.8706, "step": 25686000 }, { "epoch": 74.35, "learning_rate": 1.2837392031771025e-05, "loss": 1.8809, "step": 25686500 }, { "epoch": 74.35, "learning_rate": 1.283666838412375e-05, "loss": 1.8783, "step": 25687000 }, { "epoch": 74.35, "learning_rate": 1.2835944736476475e-05, "loss": 1.888, "step": 25687500 }, { "epoch": 74.36, "learning_rate": 1.2835221088829197e-05, "loss": 1.8741, "step": 25688000 }, { "epoch": 74.36, "learning_rate": 1.2834497441181919e-05, "loss": 1.8761, "step": 25688500 }, { "epoch": 74.36, "learning_rate": 1.2833775240829938e-05, "loss": 1.8632, "step": 25689000 }, { "epoch": 74.36, "learning_rate": 1.283305159318266e-05, "loss": 1.8881, "step": 25689500 }, { "epoch": 74.36, "learning_rate": 1.2832327945535382e-05, "loss": 1.8827, "step": 25690000 }, { "epoch": 74.36, "learning_rate": 1.2831604297888108e-05, "loss": 1.8519, "step": 25690500 }, { "epoch": 74.36, "learning_rate": 1.2830880650240832e-05, "loss": 1.8488, "step": 25691000 }, { "epoch": 74.37, "learning_rate": 1.2830157002593554e-05, "loss": 1.8752, "step": 25691500 }, { "epoch": 74.37, "learning_rate": 1.2829433354946276e-05, "loss": 1.8578, "step": 25692000 }, { "epoch": 74.37, "learning_rate": 1.2828711154594294e-05, "loss": 1.8695, "step": 25692500 }, { "epoch": 74.37, "learning_rate": 1.2827988954242313e-05, "loss": 1.8557, "step": 25693000 }, { "epoch": 74.37, "learning_rate": 1.2827265306595035e-05, "loss": 1.8755, "step": 25693500 }, { "epoch": 74.37, "learning_rate": 1.2826541658947757e-05, "loss": 1.8674, "step": 25694000 }, { "epoch": 74.38, "learning_rate": 1.2825818011300483e-05, "loss": 1.8787, "step": 25694500 }, { "epoch": 74.38, "learning_rate": 1.2825094363653207e-05, "loss": 1.8663, "step": 25695000 }, { "epoch": 74.38, "learning_rate": 1.2824370716005929e-05, "loss": 1.886, "step": 25695500 }, { "epoch": 74.38, "learning_rate": 1.2823647068358651e-05, "loss": 1.894, "step": 25696000 }, { "epoch": 74.38, "learning_rate": 1.2822923420711375e-05, "loss": 1.8583, "step": 25696500 }, { "epoch": 74.38, "learning_rate": 1.2822201220359392e-05, "loss": 1.8597, "step": 25697000 }, { "epoch": 74.38, "learning_rate": 1.2821477572712115e-05, "loss": 1.8529, "step": 25697500 }, { "epoch": 74.39, "learning_rate": 1.2820755372360135e-05, "loss": 1.884, "step": 25698000 }, { "epoch": 74.39, "learning_rate": 1.2820031724712857e-05, "loss": 1.8736, "step": 25698500 }, { "epoch": 74.39, "learning_rate": 1.2819308077065581e-05, "loss": 1.8871, "step": 25699000 }, { "epoch": 74.39, "learning_rate": 1.2818584429418304e-05, "loss": 1.8545, "step": 25699500 }, { "epoch": 74.39, "learning_rate": 1.2817860781771027e-05, "loss": 1.8477, "step": 25700000 }, { "epoch": 74.39, "learning_rate": 1.281713713412375e-05, "loss": 1.8672, "step": 25700500 }, { "epoch": 74.39, "learning_rate": 1.2816413486476472e-05, "loss": 1.8624, "step": 25701000 }, { "epoch": 74.4, "learning_rate": 1.2815689838829198e-05, "loss": 1.8755, "step": 25701500 }, { "epoch": 74.4, "learning_rate": 1.2814966191181921e-05, "loss": 1.9007, "step": 25702000 }, { "epoch": 74.4, "learning_rate": 1.2814243990829939e-05, "loss": 1.8749, "step": 25702500 }, { "epoch": 74.4, "learning_rate": 1.2813520343182661e-05, "loss": 1.873, "step": 25703000 }, { "epoch": 74.4, "learning_rate": 1.2812796695535383e-05, "loss": 1.8475, "step": 25703500 }, { "epoch": 74.4, "learning_rate": 1.2812073047888107e-05, "loss": 1.8705, "step": 25704000 }, { "epoch": 74.4, "learning_rate": 1.2811350847536124e-05, "loss": 1.8518, "step": 25704500 }, { "epoch": 74.41, "learning_rate": 1.2810627199888847e-05, "loss": 1.8668, "step": 25705000 }, { "epoch": 74.41, "learning_rate": 1.2809904999536866e-05, "loss": 1.8656, "step": 25705500 }, { "epoch": 74.41, "learning_rate": 1.2809181351889591e-05, "loss": 1.8662, "step": 25706000 }, { "epoch": 74.41, "learning_rate": 1.2808457704242313e-05, "loss": 1.9035, "step": 25706500 }, { "epoch": 74.41, "learning_rate": 1.2807734056595036e-05, "loss": 1.856, "step": 25707000 }, { "epoch": 74.41, "learning_rate": 1.280701040894776e-05, "loss": 1.8711, "step": 25707500 }, { "epoch": 74.41, "learning_rate": 1.2806286761300482e-05, "loss": 1.8714, "step": 25708000 }, { "epoch": 74.42, "learning_rate": 1.2805563113653204e-05, "loss": 1.856, "step": 25708500 }, { "epoch": 74.42, "learning_rate": 1.280483946600593e-05, "loss": 1.8615, "step": 25709000 }, { "epoch": 74.42, "learning_rate": 1.2804115818358654e-05, "loss": 1.8637, "step": 25709500 }, { "epoch": 74.42, "learning_rate": 1.2803392170711376e-05, "loss": 1.8839, "step": 25710000 }, { "epoch": 74.42, "learning_rate": 1.2802668523064098e-05, "loss": 1.864, "step": 25710500 }, { "epoch": 74.42, "learning_rate": 1.2801944875416822e-05, "loss": 1.879, "step": 25711000 }, { "epoch": 74.42, "learning_rate": 1.280122267506484e-05, "loss": 1.8685, "step": 25711500 }, { "epoch": 74.43, "learning_rate": 1.2800499027417561e-05, "loss": 1.8603, "step": 25712000 }, { "epoch": 74.43, "learning_rate": 1.2799775379770285e-05, "loss": 1.8713, "step": 25712500 }, { "epoch": 74.43, "learning_rate": 1.2799051732123011e-05, "loss": 1.8369, "step": 25713000 }, { "epoch": 74.43, "learning_rate": 1.2798329531771028e-05, "loss": 1.8515, "step": 25713500 }, { "epoch": 74.43, "learning_rate": 1.279760588412375e-05, "loss": 1.8857, "step": 25714000 }, { "epoch": 74.43, "learning_rate": 1.2796882236476473e-05, "loss": 1.8831, "step": 25714500 }, { "epoch": 74.43, "learning_rate": 1.2796158588829197e-05, "loss": 1.8715, "step": 25715000 }, { "epoch": 74.44, "learning_rate": 1.2795434941181919e-05, "loss": 1.8628, "step": 25715500 }, { "epoch": 74.44, "learning_rate": 1.2794711293534641e-05, "loss": 1.8587, "step": 25716000 }, { "epoch": 74.44, "learning_rate": 1.279398909318266e-05, "loss": 1.863, "step": 25716500 }, { "epoch": 74.44, "learning_rate": 1.2793266892830679e-05, "loss": 1.8529, "step": 25717000 }, { "epoch": 74.44, "learning_rate": 1.2792543245183403e-05, "loss": 1.8513, "step": 25717500 }, { "epoch": 74.44, "learning_rate": 1.2791819597536125e-05, "loss": 1.8899, "step": 25718000 }, { "epoch": 74.44, "learning_rate": 1.2791095949888849e-05, "loss": 1.8604, "step": 25718500 }, { "epoch": 74.45, "learning_rate": 1.2790372302241571e-05, "loss": 1.8829, "step": 25719000 }, { "epoch": 74.45, "learning_rate": 1.2789648654594294e-05, "loss": 1.8858, "step": 25719500 }, { "epoch": 74.45, "learning_rate": 1.2788925006947017e-05, "loss": 1.8571, "step": 25720000 }, { "epoch": 74.45, "learning_rate": 1.2788201359299743e-05, "loss": 1.8879, "step": 25720500 }, { "epoch": 74.45, "learning_rate": 1.278747915894776e-05, "loss": 1.8683, "step": 25721000 }, { "epoch": 74.45, "learning_rate": 1.2786755511300483e-05, "loss": 1.9108, "step": 25721500 }, { "epoch": 74.45, "learning_rate": 1.2786031863653205e-05, "loss": 1.8708, "step": 25722000 }, { "epoch": 74.46, "learning_rate": 1.2785309663301224e-05, "loss": 1.8591, "step": 25722500 }, { "epoch": 74.46, "learning_rate": 1.2784586015653946e-05, "loss": 1.879, "step": 25723000 }, { "epoch": 74.46, "learning_rate": 1.2783862368006668e-05, "loss": 1.8715, "step": 25723500 }, { "epoch": 74.46, "learning_rate": 1.2783138720359392e-05, "loss": 1.8826, "step": 25724000 }, { "epoch": 74.46, "learning_rate": 1.2782415072712118e-05, "loss": 1.8781, "step": 25724500 }, { "epoch": 74.46, "learning_rate": 1.278169142506484e-05, "loss": 1.8748, "step": 25725000 }, { "epoch": 74.46, "learning_rate": 1.2780969224712857e-05, "loss": 1.8686, "step": 25725500 }, { "epoch": 74.47, "learning_rate": 1.2780245577065581e-05, "loss": 1.886, "step": 25726000 }, { "epoch": 74.47, "learning_rate": 1.2779521929418303e-05, "loss": 1.8718, "step": 25726500 }, { "epoch": 74.47, "learning_rate": 1.2778798281771026e-05, "loss": 1.8603, "step": 25727000 }, { "epoch": 74.47, "learning_rate": 1.277807463412375e-05, "loss": 1.8711, "step": 25727500 }, { "epoch": 74.47, "learning_rate": 1.2777350986476475e-05, "loss": 1.8838, "step": 25728000 }, { "epoch": 74.47, "learning_rate": 1.2776627338829197e-05, "loss": 1.8853, "step": 25728500 }, { "epoch": 74.47, "learning_rate": 1.277590369118192e-05, "loss": 1.8514, "step": 25729000 }, { "epoch": 74.48, "learning_rate": 1.2775180043534644e-05, "loss": 1.8831, "step": 25729500 }, { "epoch": 74.48, "learning_rate": 1.2774457843182661e-05, "loss": 1.8762, "step": 25730000 }, { "epoch": 74.48, "learning_rate": 1.2773734195535383e-05, "loss": 1.8853, "step": 25730500 }, { "epoch": 74.48, "learning_rate": 1.2773010547888107e-05, "loss": 1.8776, "step": 25731000 }, { "epoch": 74.48, "learning_rate": 1.2772286900240833e-05, "loss": 1.8695, "step": 25731500 }, { "epoch": 74.48, "learning_rate": 1.277156469988885e-05, "loss": 1.8768, "step": 25732000 }, { "epoch": 74.49, "learning_rate": 1.2770842499536867e-05, "loss": 1.8675, "step": 25732500 }, { "epoch": 74.49, "learning_rate": 1.277011885188959e-05, "loss": 1.8814, "step": 25733000 }, { "epoch": 74.49, "learning_rate": 1.2769395204242313e-05, "loss": 1.8669, "step": 25733500 }, { "epoch": 74.49, "learning_rate": 1.2768671556595036e-05, "loss": 1.8621, "step": 25734000 }, { "epoch": 74.49, "learning_rate": 1.2767949356243053e-05, "loss": 1.8624, "step": 25734500 }, { "epoch": 74.49, "learning_rate": 1.2767225708595775e-05, "loss": 1.8501, "step": 25735000 }, { "epoch": 74.49, "learning_rate": 1.27665020609485e-05, "loss": 1.8391, "step": 25735500 }, { "epoch": 74.5, "learning_rate": 1.2765778413301225e-05, "loss": 1.8835, "step": 25736000 }, { "epoch": 74.5, "learning_rate": 1.2765056212949242e-05, "loss": 1.8665, "step": 25736500 }, { "epoch": 74.5, "learning_rate": 1.2764332565301964e-05, "loss": 1.9007, "step": 25737000 }, { "epoch": 74.5, "learning_rate": 1.2763608917654688e-05, "loss": 1.8715, "step": 25737500 }, { "epoch": 74.5, "learning_rate": 1.276288527000741e-05, "loss": 1.8638, "step": 25738000 }, { "epoch": 74.5, "learning_rate": 1.2762161622360132e-05, "loss": 1.8732, "step": 25738500 }, { "epoch": 74.5, "learning_rate": 1.2761439422008151e-05, "loss": 1.8593, "step": 25739000 }, { "epoch": 74.51, "learning_rate": 1.2760715774360877e-05, "loss": 1.8607, "step": 25739500 }, { "epoch": 74.51, "learning_rate": 1.27599921267136e-05, "loss": 1.8471, "step": 25740000 }, { "epoch": 74.51, "learning_rate": 1.2759268479066321e-05, "loss": 1.8638, "step": 25740500 }, { "epoch": 74.51, "learning_rate": 1.2758544831419045e-05, "loss": 1.8603, "step": 25741000 }, { "epoch": 74.51, "learning_rate": 1.2757821183771768e-05, "loss": 1.8456, "step": 25741500 }, { "epoch": 74.51, "learning_rate": 1.275709753612449e-05, "loss": 1.8919, "step": 25742000 }, { "epoch": 74.51, "learning_rate": 1.2756373888477214e-05, "loss": 1.8727, "step": 25742500 }, { "epoch": 74.52, "learning_rate": 1.275565024082994e-05, "loss": 1.87, "step": 25743000 }, { "epoch": 74.52, "learning_rate": 1.2754926593182662e-05, "loss": 1.8503, "step": 25743500 }, { "epoch": 74.52, "learning_rate": 1.2754202945535384e-05, "loss": 1.8523, "step": 25744000 }, { "epoch": 74.52, "learning_rate": 1.2753479297888108e-05, "loss": 1.8817, "step": 25744500 }, { "epoch": 74.52, "learning_rate": 1.275275565024083e-05, "loss": 1.8767, "step": 25745000 }, { "epoch": 74.52, "learning_rate": 1.2752033449888847e-05, "loss": 1.8523, "step": 25745500 }, { "epoch": 74.52, "learning_rate": 1.2751309802241571e-05, "loss": 1.8813, "step": 25746000 }, { "epoch": 74.53, "learning_rate": 1.2750586154594297e-05, "loss": 1.881, "step": 25746500 }, { "epoch": 74.53, "learning_rate": 1.2749862506947019e-05, "loss": 1.8415, "step": 25747000 }, { "epoch": 74.53, "learning_rate": 1.2749138859299741e-05, "loss": 1.8841, "step": 25747500 }, { "epoch": 74.53, "learning_rate": 1.2748416658947759e-05, "loss": 1.8834, "step": 25748000 }, { "epoch": 74.53, "learning_rate": 1.2747693011300482e-05, "loss": 1.8848, "step": 25748500 }, { "epoch": 74.53, "learning_rate": 1.27469708109485e-05, "loss": 1.9024, "step": 25749000 }, { "epoch": 74.53, "learning_rate": 1.2746247163301222e-05, "loss": 1.8651, "step": 25749500 }, { "epoch": 74.54, "learning_rate": 1.2745523515653946e-05, "loss": 1.8427, "step": 25750000 }, { "epoch": 74.54, "learning_rate": 1.2744799868006672e-05, "loss": 1.8667, "step": 25750500 }, { "epoch": 74.54, "learning_rate": 1.2744076220359394e-05, "loss": 1.8584, "step": 25751000 }, { "epoch": 74.54, "learning_rate": 1.2743354020007411e-05, "loss": 1.8404, "step": 25751500 }, { "epoch": 74.54, "learning_rate": 1.2742630372360135e-05, "loss": 1.8661, "step": 25752000 }, { "epoch": 74.54, "learning_rate": 1.2741906724712857e-05, "loss": 1.8534, "step": 25752500 }, { "epoch": 74.54, "learning_rate": 1.274118307706558e-05, "loss": 1.8523, "step": 25753000 }, { "epoch": 74.55, "learning_rate": 1.2740459429418303e-05, "loss": 1.8796, "step": 25753500 }, { "epoch": 74.55, "learning_rate": 1.2739735781771026e-05, "loss": 1.8825, "step": 25754000 }, { "epoch": 74.55, "learning_rate": 1.2739012134123751e-05, "loss": 1.8603, "step": 25754500 }, { "epoch": 74.55, "learning_rate": 1.2738288486476473e-05, "loss": 1.8837, "step": 25755000 }, { "epoch": 74.55, "learning_rate": 1.2737567733419786e-05, "loss": 1.8697, "step": 25755500 }, { "epoch": 74.55, "learning_rate": 1.273684408577251e-05, "loss": 1.8712, "step": 25756000 }, { "epoch": 74.55, "learning_rate": 1.2736120438125232e-05, "loss": 1.8921, "step": 25756500 }, { "epoch": 74.56, "learning_rate": 1.2735398237773249e-05, "loss": 1.8254, "step": 25757000 }, { "epoch": 74.56, "learning_rate": 1.2734676037421266e-05, "loss": 1.8733, "step": 25757500 }, { "epoch": 74.56, "learning_rate": 1.273395238977399e-05, "loss": 1.8556, "step": 25758000 }, { "epoch": 74.56, "learning_rate": 1.2733228742126716e-05, "loss": 1.8965, "step": 25758500 }, { "epoch": 74.56, "learning_rate": 1.2732505094479438e-05, "loss": 1.8902, "step": 25759000 }, { "epoch": 74.56, "learning_rate": 1.273178144683216e-05, "loss": 1.8747, "step": 25759500 }, { "epoch": 74.56, "learning_rate": 1.2731057799184884e-05, "loss": 1.8753, "step": 25760000 }, { "epoch": 74.57, "learning_rate": 1.2730334151537607e-05, "loss": 1.8673, "step": 25760500 }, { "epoch": 74.57, "learning_rate": 1.2729610503890329e-05, "loss": 1.8804, "step": 25761000 }, { "epoch": 74.57, "learning_rate": 1.2728888303538348e-05, "loss": 1.8618, "step": 25761500 }, { "epoch": 74.57, "learning_rate": 1.2728164655891073e-05, "loss": 1.8625, "step": 25762000 }, { "epoch": 74.57, "learning_rate": 1.2727441008243796e-05, "loss": 1.8525, "step": 25762500 }, { "epoch": 74.57, "learning_rate": 1.2726717360596518e-05, "loss": 1.8775, "step": 25763000 }, { "epoch": 74.57, "learning_rate": 1.2725993712949242e-05, "loss": 1.8507, "step": 25763500 }, { "epoch": 74.58, "learning_rate": 1.2725270065301964e-05, "loss": 1.8661, "step": 25764000 }, { "epoch": 74.58, "learning_rate": 1.2724546417654686e-05, "loss": 1.8746, "step": 25764500 }, { "epoch": 74.58, "learning_rate": 1.2723824217302705e-05, "loss": 1.8494, "step": 25765000 }, { "epoch": 74.58, "learning_rate": 1.2723100569655427e-05, "loss": 1.8657, "step": 25765500 }, { "epoch": 74.58, "learning_rate": 1.2722376922008153e-05, "loss": 1.889, "step": 25766000 }, { "epoch": 74.58, "learning_rate": 1.2721653274360875e-05, "loss": 1.8514, "step": 25766500 }, { "epoch": 74.58, "learning_rate": 1.2720929626713599e-05, "loss": 1.8633, "step": 25767000 }, { "epoch": 74.59, "learning_rate": 1.2720205979066321e-05, "loss": 1.8598, "step": 25767500 }, { "epoch": 74.59, "learning_rate": 1.2719482331419044e-05, "loss": 1.8897, "step": 25768000 }, { "epoch": 74.59, "learning_rate": 1.2718758683771768e-05, "loss": 1.88, "step": 25768500 }, { "epoch": 74.59, "learning_rate": 1.2718036483419785e-05, "loss": 1.8576, "step": 25769000 }, { "epoch": 74.59, "learning_rate": 1.271731283577251e-05, "loss": 1.883, "step": 25769500 }, { "epoch": 74.59, "learning_rate": 1.2716589188125233e-05, "loss": 1.8868, "step": 25770000 }, { "epoch": 74.6, "learning_rate": 1.2715865540477957e-05, "loss": 1.8803, "step": 25770500 }, { "epoch": 74.6, "learning_rate": 1.2715143340125974e-05, "loss": 1.8818, "step": 25771000 }, { "epoch": 74.6, "learning_rate": 1.2714419692478696e-05, "loss": 1.8682, "step": 25771500 }, { "epoch": 74.6, "learning_rate": 1.2713696044831418e-05, "loss": 1.867, "step": 25772000 }, { "epoch": 74.6, "learning_rate": 1.2712972397184142e-05, "loss": 1.8657, "step": 25772500 }, { "epoch": 74.6, "learning_rate": 1.2712248749536864e-05, "loss": 1.8409, "step": 25773000 }, { "epoch": 74.6, "learning_rate": 1.271152510188959e-05, "loss": 1.868, "step": 25773500 }, { "epoch": 74.61, "learning_rate": 1.2710801454242314e-05, "loss": 1.8814, "step": 25774000 }, { "epoch": 74.61, "learning_rate": 1.2710077806595036e-05, "loss": 1.8794, "step": 25774500 }, { "epoch": 74.61, "learning_rate": 1.2709355606243053e-05, "loss": 1.8589, "step": 25775000 }, { "epoch": 74.61, "learning_rate": 1.2708631958595776e-05, "loss": 1.8652, "step": 25775500 }, { "epoch": 74.61, "learning_rate": 1.2707909758243795e-05, "loss": 1.906, "step": 25776000 }, { "epoch": 74.61, "learning_rate": 1.2707186110596517e-05, "loss": 1.8906, "step": 25776500 }, { "epoch": 74.61, "learning_rate": 1.2706462462949243e-05, "loss": 1.8732, "step": 25777000 }, { "epoch": 74.62, "learning_rate": 1.270574026259726e-05, "loss": 1.8486, "step": 25777500 }, { "epoch": 74.62, "learning_rate": 1.2705016614949982e-05, "loss": 1.8636, "step": 25778000 }, { "epoch": 74.62, "learning_rate": 1.2704292967302706e-05, "loss": 1.8869, "step": 25778500 }, { "epoch": 74.62, "learning_rate": 1.2703569319655428e-05, "loss": 1.8725, "step": 25779000 }, { "epoch": 74.62, "learning_rate": 1.270284567200815e-05, "loss": 1.861, "step": 25779500 }, { "epoch": 74.62, "learning_rate": 1.2702122024360874e-05, "loss": 1.8875, "step": 25780000 }, { "epoch": 74.62, "learning_rate": 1.2701398376713597e-05, "loss": 1.8912, "step": 25780500 }, { "epoch": 74.63, "learning_rate": 1.2700674729066322e-05, "loss": 1.8526, "step": 25781000 }, { "epoch": 74.63, "learning_rate": 1.2699951081419046e-05, "loss": 1.8711, "step": 25781500 }, { "epoch": 74.63, "learning_rate": 1.2699227433771768e-05, "loss": 1.881, "step": 25782000 }, { "epoch": 74.63, "learning_rate": 1.269850378612449e-05, "loss": 1.8879, "step": 25782500 }, { "epoch": 74.63, "learning_rate": 1.2697780138477215e-05, "loss": 1.8843, "step": 25783000 }, { "epoch": 74.63, "learning_rate": 1.2697056490829937e-05, "loss": 1.8816, "step": 25783500 }, { "epoch": 74.63, "learning_rate": 1.2696332843182659e-05, "loss": 1.8598, "step": 25784000 }, { "epoch": 74.64, "learning_rate": 1.2695609195535385e-05, "loss": 1.8399, "step": 25784500 }, { "epoch": 74.64, "learning_rate": 1.2694885547888109e-05, "loss": 1.8663, "step": 25785000 }, { "epoch": 74.64, "learning_rate": 1.269416190024083e-05, "loss": 1.8751, "step": 25785500 }, { "epoch": 74.64, "learning_rate": 1.2693441147184143e-05, "loss": 1.8465, "step": 25786000 }, { "epoch": 74.64, "learning_rate": 1.269271894683216e-05, "loss": 1.8692, "step": 25786500 }, { "epoch": 74.64, "learning_rate": 1.2691995299184884e-05, "loss": 1.8686, "step": 25787000 }, { "epoch": 74.64, "learning_rate": 1.2691271651537606e-05, "loss": 1.8806, "step": 25787500 }, { "epoch": 74.65, "learning_rate": 1.2690548003890329e-05, "loss": 1.8645, "step": 25788000 }, { "epoch": 74.65, "learning_rate": 1.2689824356243054e-05, "loss": 1.8834, "step": 25788500 }, { "epoch": 74.65, "learning_rate": 1.2689100708595778e-05, "loss": 1.8831, "step": 25789000 }, { "epoch": 74.65, "learning_rate": 1.26883770609485e-05, "loss": 1.8754, "step": 25789500 }, { "epoch": 74.65, "learning_rate": 1.2687653413301223e-05, "loss": 1.8703, "step": 25790000 }, { "epoch": 74.65, "learning_rate": 1.2686929765653947e-05, "loss": 1.887, "step": 25790500 }, { "epoch": 74.65, "learning_rate": 1.2686206118006669e-05, "loss": 1.8651, "step": 25791000 }, { "epoch": 74.66, "learning_rate": 1.2685482470359391e-05, "loss": 1.8612, "step": 25791500 }, { "epoch": 74.66, "learning_rate": 1.2684758822712117e-05, "loss": 1.8688, "step": 25792000 }, { "epoch": 74.66, "learning_rate": 1.268403517506484e-05, "loss": 1.8871, "step": 25792500 }, { "epoch": 74.66, "learning_rate": 1.2683312974712858e-05, "loss": 1.8948, "step": 25793000 }, { "epoch": 74.66, "learning_rate": 1.2682590774360875e-05, "loss": 1.8704, "step": 25793500 }, { "epoch": 74.66, "learning_rate": 1.2681867126713597e-05, "loss": 1.864, "step": 25794000 }, { "epoch": 74.66, "learning_rate": 1.2681144926361616e-05, "loss": 1.8742, "step": 25794500 }, { "epoch": 74.67, "learning_rate": 1.2680421278714339e-05, "loss": 1.881, "step": 25795000 }, { "epoch": 74.67, "learning_rate": 1.267969763106706e-05, "loss": 1.8827, "step": 25795500 }, { "epoch": 74.67, "learning_rate": 1.2678973983419786e-05, "loss": 1.8597, "step": 25796000 }, { "epoch": 74.67, "learning_rate": 1.267825033577251e-05, "loss": 1.8755, "step": 25796500 }, { "epoch": 74.67, "learning_rate": 1.2677526688125233e-05, "loss": 1.8443, "step": 25797000 }, { "epoch": 74.67, "learning_rate": 1.2676803040477955e-05, "loss": 1.8787, "step": 25797500 }, { "epoch": 74.67, "learning_rate": 1.2676079392830679e-05, "loss": 1.8605, "step": 25798000 }, { "epoch": 74.68, "learning_rate": 1.2675355745183401e-05, "loss": 1.8802, "step": 25798500 }, { "epoch": 74.68, "learning_rate": 1.2674632097536123e-05, "loss": 1.8812, "step": 25799000 }, { "epoch": 74.68, "learning_rate": 1.2673908449888849e-05, "loss": 1.8609, "step": 25799500 }, { "epoch": 74.68, "learning_rate": 1.2673186249536868e-05, "loss": 1.8813, "step": 25800000 }, { "epoch": 74.68, "learning_rate": 1.267246260188959e-05, "loss": 1.8776, "step": 25800500 }, { "epoch": 74.68, "learning_rate": 1.2671738954242312e-05, "loss": 1.8701, "step": 25801000 }, { "epoch": 74.68, "learning_rate": 1.267101675389033e-05, "loss": 1.8532, "step": 25801500 }, { "epoch": 74.69, "learning_rate": 1.2670293106243053e-05, "loss": 1.8654, "step": 25802000 }, { "epoch": 74.69, "learning_rate": 1.2669569458595776e-05, "loss": 1.8644, "step": 25802500 }, { "epoch": 74.69, "learning_rate": 1.2668845810948501e-05, "loss": 1.8659, "step": 25803000 }, { "epoch": 74.69, "learning_rate": 1.2668123610596518e-05, "loss": 1.8666, "step": 25803500 }, { "epoch": 74.69, "learning_rate": 1.2667399962949242e-05, "loss": 1.8821, "step": 25804000 }, { "epoch": 74.69, "learning_rate": 1.2666676315301965e-05, "loss": 1.8759, "step": 25804500 }, { "epoch": 74.69, "learning_rate": 1.2665952667654687e-05, "loss": 1.867, "step": 25805000 }, { "epoch": 74.7, "learning_rate": 1.266522902000741e-05, "loss": 1.8525, "step": 25805500 }, { "epoch": 74.7, "learning_rate": 1.2664505372360133e-05, "loss": 1.8757, "step": 25806000 }, { "epoch": 74.7, "learning_rate": 1.2663781724712855e-05, "loss": 1.8719, "step": 25806500 }, { "epoch": 74.7, "learning_rate": 1.2663059524360876e-05, "loss": 1.8693, "step": 25807000 }, { "epoch": 74.7, "learning_rate": 1.26623358767136e-05, "loss": 1.876, "step": 25807500 }, { "epoch": 74.7, "learning_rate": 1.2661612229066322e-05, "loss": 1.8789, "step": 25808000 }, { "epoch": 74.71, "learning_rate": 1.2660888581419044e-05, "loss": 1.8616, "step": 25808500 }, { "epoch": 74.71, "learning_rate": 1.2660164933771768e-05, "loss": 1.8688, "step": 25809000 }, { "epoch": 74.71, "learning_rate": 1.265944128612449e-05, "loss": 1.8644, "step": 25809500 }, { "epoch": 74.71, "learning_rate": 1.2658717638477213e-05, "loss": 1.8839, "step": 25810000 }, { "epoch": 74.71, "learning_rate": 1.2657993990829938e-05, "loss": 1.8739, "step": 25810500 }, { "epoch": 74.71, "learning_rate": 1.2657270343182662e-05, "loss": 1.8585, "step": 25811000 }, { "epoch": 74.71, "learning_rate": 1.265654814283068e-05, "loss": 1.8484, "step": 25811500 }, { "epoch": 74.72, "learning_rate": 1.2655824495183402e-05, "loss": 1.8739, "step": 25812000 }, { "epoch": 74.72, "learning_rate": 1.2655100847536126e-05, "loss": 1.9062, "step": 25812500 }, { "epoch": 74.72, "learning_rate": 1.2654378647184143e-05, "loss": 1.8701, "step": 25813000 }, { "epoch": 74.72, "learning_rate": 1.2653654999536865e-05, "loss": 1.8564, "step": 25813500 }, { "epoch": 74.72, "learning_rate": 1.2652931351889587e-05, "loss": 1.8566, "step": 25814000 }, { "epoch": 74.72, "learning_rate": 1.2652207704242313e-05, "loss": 1.8582, "step": 25814500 }, { "epoch": 74.72, "learning_rate": 1.2651484056595037e-05, "loss": 1.866, "step": 25815000 }, { "epoch": 74.73, "learning_rate": 1.2650761856243054e-05, "loss": 1.8846, "step": 25815500 }, { "epoch": 74.73, "learning_rate": 1.2650038208595776e-05, "loss": 1.8846, "step": 25816000 }, { "epoch": 74.73, "learning_rate": 1.26493145609485e-05, "loss": 1.8678, "step": 25816500 }, { "epoch": 74.73, "learning_rate": 1.2648590913301223e-05, "loss": 1.8806, "step": 25817000 }, { "epoch": 74.73, "learning_rate": 1.2647867265653945e-05, "loss": 1.8668, "step": 25817500 }, { "epoch": 74.73, "learning_rate": 1.264714361800667e-05, "loss": 1.8808, "step": 25818000 }, { "epoch": 74.73, "learning_rate": 1.2646419970359394e-05, "loss": 1.8809, "step": 25818500 }, { "epoch": 74.74, "learning_rate": 1.2645696322712117e-05, "loss": 1.8829, "step": 25819000 }, { "epoch": 74.74, "learning_rate": 1.2644972675064839e-05, "loss": 1.8633, "step": 25819500 }, { "epoch": 74.74, "learning_rate": 1.2644250474712858e-05, "loss": 1.8943, "step": 25820000 }, { "epoch": 74.74, "learning_rate": 1.264352682706558e-05, "loss": 1.8528, "step": 25820500 }, { "epoch": 74.74, "learning_rate": 1.2642803179418302e-05, "loss": 1.8958, "step": 25821000 }, { "epoch": 74.74, "learning_rate": 1.2642079531771026e-05, "loss": 1.8809, "step": 25821500 }, { "epoch": 74.74, "learning_rate": 1.2641355884123752e-05, "loss": 1.872, "step": 25822000 }, { "epoch": 74.75, "learning_rate": 1.2640632236476474e-05, "loss": 1.8734, "step": 25822500 }, { "epoch": 74.75, "learning_rate": 1.2639908588829196e-05, "loss": 1.8636, "step": 25823000 }, { "epoch": 74.75, "learning_rate": 1.263918494118192e-05, "loss": 1.889, "step": 25823500 }, { "epoch": 74.75, "learning_rate": 1.2638461293534642e-05, "loss": 1.8944, "step": 25824000 }, { "epoch": 74.75, "learning_rate": 1.2637737645887365e-05, "loss": 1.8666, "step": 25824500 }, { "epoch": 74.75, "learning_rate": 1.2637015445535384e-05, "loss": 1.8894, "step": 25825000 }, { "epoch": 74.75, "learning_rate": 1.263629179788811e-05, "loss": 1.8886, "step": 25825500 }, { "epoch": 74.76, "learning_rate": 1.2635568150240831e-05, "loss": 1.864, "step": 25826000 }, { "epoch": 74.76, "learning_rate": 1.2634844502593554e-05, "loss": 1.881, "step": 25826500 }, { "epoch": 74.76, "learning_rate": 1.2634122302241571e-05, "loss": 1.8614, "step": 25827000 }, { "epoch": 74.76, "learning_rate": 1.2633398654594295e-05, "loss": 1.8869, "step": 25827500 }, { "epoch": 74.76, "learning_rate": 1.2632675006947017e-05, "loss": 1.8846, "step": 25828000 }, { "epoch": 74.76, "learning_rate": 1.263195135929974e-05, "loss": 1.892, "step": 25828500 }, { "epoch": 74.76, "learning_rate": 1.2631227711652465e-05, "loss": 1.8805, "step": 25829000 }, { "epoch": 74.77, "learning_rate": 1.2630505511300484e-05, "loss": 1.869, "step": 25829500 }, { "epoch": 74.77, "learning_rate": 1.2629781863653206e-05, "loss": 1.8663, "step": 25830000 }, { "epoch": 74.77, "learning_rate": 1.2629058216005928e-05, "loss": 1.8613, "step": 25830500 }, { "epoch": 74.77, "learning_rate": 1.2628334568358652e-05, "loss": 1.8609, "step": 25831000 }, { "epoch": 74.77, "learning_rate": 1.262761236800667e-05, "loss": 1.8482, "step": 25831500 }, { "epoch": 74.77, "learning_rate": 1.2626890167654687e-05, "loss": 1.8801, "step": 25832000 }, { "epoch": 74.77, "learning_rate": 1.2626166520007409e-05, "loss": 1.8901, "step": 25832500 }, { "epoch": 74.78, "learning_rate": 1.2625442872360135e-05, "loss": 1.8853, "step": 25833000 }, { "epoch": 74.78, "learning_rate": 1.2624719224712859e-05, "loss": 1.8743, "step": 25833500 }, { "epoch": 74.78, "learning_rate": 1.262399557706558e-05, "loss": 1.8791, "step": 25834000 }, { "epoch": 74.78, "learning_rate": 1.2623273376713598e-05, "loss": 1.8658, "step": 25834500 }, { "epoch": 74.78, "learning_rate": 1.2622549729066322e-05, "loss": 1.863, "step": 25835000 }, { "epoch": 74.78, "learning_rate": 1.2621826081419044e-05, "loss": 1.8395, "step": 25835500 }, { "epoch": 74.78, "learning_rate": 1.2621103881067061e-05, "loss": 1.8535, "step": 25836000 }, { "epoch": 74.79, "learning_rate": 1.2620380233419785e-05, "loss": 1.8762, "step": 25836500 }, { "epoch": 74.79, "learning_rate": 1.2619656585772511e-05, "loss": 1.8447, "step": 25837000 }, { "epoch": 74.79, "learning_rate": 1.2618932938125233e-05, "loss": 1.8691, "step": 25837500 }, { "epoch": 74.79, "learning_rate": 1.2618209290477955e-05, "loss": 1.8717, "step": 25838000 }, { "epoch": 74.79, "learning_rate": 1.261748564283068e-05, "loss": 1.8786, "step": 25838500 }, { "epoch": 74.79, "learning_rate": 1.2616761995183402e-05, "loss": 1.8861, "step": 25839000 }, { "epoch": 74.79, "learning_rate": 1.2616038347536124e-05, "loss": 1.9036, "step": 25839500 }, { "epoch": 74.8, "learning_rate": 1.2615314699888848e-05, "loss": 1.874, "step": 25840000 }, { "epoch": 74.8, "learning_rate": 1.2614592499536865e-05, "loss": 1.8732, "step": 25840500 }, { "epoch": 74.8, "learning_rate": 1.2613870299184886e-05, "loss": 1.879, "step": 25841000 }, { "epoch": 74.8, "learning_rate": 1.2613146651537608e-05, "loss": 1.8736, "step": 25841500 }, { "epoch": 74.8, "learning_rate": 1.261242300389033e-05, "loss": 1.8773, "step": 25842000 }, { "epoch": 74.8, "learning_rate": 1.2611699356243054e-05, "loss": 1.8644, "step": 25842500 }, { "epoch": 74.8, "learning_rate": 1.2610975708595776e-05, "loss": 1.8839, "step": 25843000 }, { "epoch": 74.81, "learning_rate": 1.2610253508243794e-05, "loss": 1.857, "step": 25843500 }, { "epoch": 74.81, "learning_rate": 1.260953130789181e-05, "loss": 1.8675, "step": 25844000 }, { "epoch": 74.81, "learning_rate": 1.2608807660244535e-05, "loss": 1.8725, "step": 25844500 }, { "epoch": 74.81, "learning_rate": 1.260808401259726e-05, "loss": 1.9046, "step": 25845000 }, { "epoch": 74.81, "learning_rate": 1.2607360364949983e-05, "loss": 1.8829, "step": 25845500 }, { "epoch": 74.81, "learning_rate": 1.2606636717302705e-05, "loss": 1.8708, "step": 25846000 }, { "epoch": 74.82, "learning_rate": 1.2605914516950724e-05, "loss": 1.8742, "step": 25846500 }, { "epoch": 74.82, "learning_rate": 1.2605190869303446e-05, "loss": 1.8749, "step": 25847000 }, { "epoch": 74.82, "learning_rate": 1.2604467221656168e-05, "loss": 1.8946, "step": 25847500 }, { "epoch": 74.82, "learning_rate": 1.2603743574008892e-05, "loss": 1.8523, "step": 25848000 }, { "epoch": 74.82, "learning_rate": 1.2603019926361618e-05, "loss": 1.8433, "step": 25848500 }, { "epoch": 74.82, "learning_rate": 1.260229627871434e-05, "loss": 1.9001, "step": 25849000 }, { "epoch": 74.82, "learning_rate": 1.2601572631067062e-05, "loss": 1.8689, "step": 25849500 }, { "epoch": 74.83, "learning_rate": 1.2600848983419786e-05, "loss": 1.8813, "step": 25850000 }, { "epoch": 74.83, "learning_rate": 1.2600125335772508e-05, "loss": 1.8666, "step": 25850500 }, { "epoch": 74.83, "learning_rate": 1.259940168812523e-05, "loss": 1.8656, "step": 25851000 }, { "epoch": 74.83, "learning_rate": 1.2598678040477955e-05, "loss": 1.8821, "step": 25851500 }, { "epoch": 74.83, "learning_rate": 1.2597955840125975e-05, "loss": 1.8684, "step": 25852000 }, { "epoch": 74.83, "learning_rate": 1.2597232192478697e-05, "loss": 1.8796, "step": 25852500 }, { "epoch": 74.83, "learning_rate": 1.259650854483142e-05, "loss": 1.8461, "step": 25853000 }, { "epoch": 74.84, "learning_rate": 1.2595784897184144e-05, "loss": 1.8714, "step": 25853500 }, { "epoch": 74.84, "learning_rate": 1.2595064144127456e-05, "loss": 1.8857, "step": 25854000 }, { "epoch": 74.84, "learning_rate": 1.2594340496480178e-05, "loss": 1.8586, "step": 25854500 }, { "epoch": 74.84, "learning_rate": 1.25936168488329e-05, "loss": 1.8878, "step": 25855000 }, { "epoch": 74.84, "learning_rate": 1.2592893201185624e-05, "loss": 1.8715, "step": 25855500 }, { "epoch": 74.84, "learning_rate": 1.259216955353835e-05, "loss": 1.8858, "step": 25856000 }, { "epoch": 74.84, "learning_rate": 1.2591445905891072e-05, "loss": 1.8588, "step": 25856500 }, { "epoch": 74.85, "learning_rate": 1.259072370553909e-05, "loss": 1.8668, "step": 25857000 }, { "epoch": 74.85, "learning_rate": 1.2590000057891813e-05, "loss": 1.8686, "step": 25857500 }, { "epoch": 74.85, "learning_rate": 1.258927785753983e-05, "loss": 1.8505, "step": 25858000 }, { "epoch": 74.85, "learning_rate": 1.2588554209892553e-05, "loss": 1.8712, "step": 25858500 }, { "epoch": 74.85, "learning_rate": 1.2587830562245275e-05, "loss": 1.8822, "step": 25859000 }, { "epoch": 74.85, "learning_rate": 1.2587106914597999e-05, "loss": 1.8812, "step": 25859500 }, { "epoch": 74.85, "learning_rate": 1.2586383266950725e-05, "loss": 1.8986, "step": 25860000 }, { "epoch": 74.86, "learning_rate": 1.2585659619303447e-05, "loss": 1.8577, "step": 25860500 }, { "epoch": 74.86, "learning_rate": 1.258493597165617e-05, "loss": 1.8723, "step": 25861000 }, { "epoch": 74.86, "learning_rate": 1.2584212324008893e-05, "loss": 1.8932, "step": 25861500 }, { "epoch": 74.86, "learning_rate": 1.2583488676361615e-05, "loss": 1.8733, "step": 25862000 }, { "epoch": 74.86, "learning_rate": 1.2582765028714339e-05, "loss": 1.8709, "step": 25862500 }, { "epoch": 74.86, "learning_rate": 1.2582041381067061e-05, "loss": 1.8615, "step": 25863000 }, { "epoch": 74.86, "learning_rate": 1.2581317733419787e-05, "loss": 1.8685, "step": 25863500 }, { "epoch": 74.87, "learning_rate": 1.258059408577251e-05, "loss": 1.8528, "step": 25864000 }, { "epoch": 74.87, "learning_rate": 1.2579870438125233e-05, "loss": 1.8664, "step": 25864500 }, { "epoch": 74.87, "learning_rate": 1.2579146790477955e-05, "loss": 1.8829, "step": 25865000 }, { "epoch": 74.87, "learning_rate": 1.2578423142830678e-05, "loss": 1.8749, "step": 25865500 }, { "epoch": 74.87, "learning_rate": 1.2577699495183402e-05, "loss": 1.8881, "step": 25866000 }, { "epoch": 74.87, "learning_rate": 1.2576975847536124e-05, "loss": 1.8532, "step": 25866500 }, { "epoch": 74.87, "learning_rate": 1.257625219988885e-05, "loss": 1.8843, "step": 25867000 }, { "epoch": 74.88, "learning_rate": 1.2575528552241572e-05, "loss": 1.8574, "step": 25867500 }, { "epoch": 74.88, "learning_rate": 1.2574807799184884e-05, "loss": 1.8823, "step": 25868000 }, { "epoch": 74.88, "learning_rate": 1.2574084151537608e-05, "loss": 1.8663, "step": 25868500 }, { "epoch": 74.88, "learning_rate": 1.257336050389033e-05, "loss": 1.8461, "step": 25869000 }, { "epoch": 74.88, "learning_rate": 1.2572636856243052e-05, "loss": 1.8824, "step": 25869500 }, { "epoch": 74.88, "learning_rate": 1.2571913208595776e-05, "loss": 1.8674, "step": 25870000 }, { "epoch": 74.88, "learning_rate": 1.2571189560948502e-05, "loss": 1.8817, "step": 25870500 }, { "epoch": 74.89, "learning_rate": 1.2570465913301224e-05, "loss": 1.8641, "step": 25871000 }, { "epoch": 74.89, "learning_rate": 1.2569742265653946e-05, "loss": 1.8766, "step": 25871500 }, { "epoch": 74.89, "learning_rate": 1.256901861800667e-05, "loss": 1.8834, "step": 25872000 }, { "epoch": 74.89, "learning_rate": 1.2568296417654687e-05, "loss": 1.8563, "step": 25872500 }, { "epoch": 74.89, "learning_rate": 1.256757277000741e-05, "loss": 1.8738, "step": 25873000 }, { "epoch": 74.89, "learning_rate": 1.2566850569655429e-05, "loss": 1.8527, "step": 25873500 }, { "epoch": 74.89, "learning_rate": 1.2566126922008151e-05, "loss": 1.9105, "step": 25874000 }, { "epoch": 74.9, "learning_rate": 1.2565404721656168e-05, "loss": 1.8766, "step": 25874500 }, { "epoch": 74.9, "learning_rate": 1.2564681074008894e-05, "loss": 1.8772, "step": 25875000 }, { "epoch": 74.9, "learning_rate": 1.2563957426361616e-05, "loss": 1.8866, "step": 25875500 }, { "epoch": 74.9, "learning_rate": 1.256323377871434e-05, "loss": 1.8738, "step": 25876000 }, { "epoch": 74.9, "learning_rate": 1.2562510131067062e-05, "loss": 1.879, "step": 25876500 }, { "epoch": 74.9, "learning_rate": 1.2561786483419784e-05, "loss": 1.879, "step": 25877000 }, { "epoch": 74.9, "learning_rate": 1.2561062835772508e-05, "loss": 1.8459, "step": 25877500 }, { "epoch": 74.91, "learning_rate": 1.256033918812523e-05, "loss": 1.8537, "step": 25878000 }, { "epoch": 74.91, "learning_rate": 1.2559615540477956e-05, "loss": 1.8836, "step": 25878500 }, { "epoch": 74.91, "learning_rate": 1.2558891892830678e-05, "loss": 1.874, "step": 25879000 }, { "epoch": 74.91, "learning_rate": 1.2558168245183402e-05, "loss": 1.9004, "step": 25879500 }, { "epoch": 74.91, "learning_rate": 1.255744604483142e-05, "loss": 1.8926, "step": 25880000 }, { "epoch": 74.91, "learning_rate": 1.2556722397184142e-05, "loss": 1.8733, "step": 25880500 }, { "epoch": 74.91, "learning_rate": 1.255600019683216e-05, "loss": 1.8774, "step": 25881000 }, { "epoch": 74.92, "learning_rate": 1.2555276549184883e-05, "loss": 1.8732, "step": 25881500 }, { "epoch": 74.92, "learning_rate": 1.2554552901537609e-05, "loss": 1.8939, "step": 25882000 }, { "epoch": 74.92, "learning_rate": 1.255382925389033e-05, "loss": 1.8781, "step": 25882500 }, { "epoch": 74.92, "learning_rate": 1.2553107053538348e-05, "loss": 1.8786, "step": 25883000 }, { "epoch": 74.92, "learning_rate": 1.2552383405891072e-05, "loss": 1.8652, "step": 25883500 }, { "epoch": 74.92, "learning_rate": 1.2551659758243794e-05, "loss": 1.86, "step": 25884000 }, { "epoch": 74.93, "learning_rate": 1.2550936110596516e-05, "loss": 1.8592, "step": 25884500 }, { "epoch": 74.93, "learning_rate": 1.255021246294924e-05, "loss": 1.8679, "step": 25885000 }, { "epoch": 74.93, "learning_rate": 1.2549488815301963e-05, "loss": 1.8663, "step": 25885500 }, { "epoch": 74.93, "learning_rate": 1.2548765167654688e-05, "loss": 1.8484, "step": 25886000 }, { "epoch": 74.93, "learning_rate": 1.2548041520007412e-05, "loss": 1.8842, "step": 25886500 }, { "epoch": 74.93, "learning_rate": 1.2547317872360134e-05, "loss": 1.8902, "step": 25887000 }, { "epoch": 74.93, "learning_rate": 1.2546594224712857e-05, "loss": 1.8584, "step": 25887500 }, { "epoch": 74.94, "learning_rate": 1.2545872024360874e-05, "loss": 1.8767, "step": 25888000 }, { "epoch": 74.94, "learning_rate": 1.2545148376713598e-05, "loss": 1.8954, "step": 25888500 }, { "epoch": 74.94, "learning_rate": 1.2544426176361615e-05, "loss": 1.8839, "step": 25889000 }, { "epoch": 74.94, "learning_rate": 1.254370252871434e-05, "loss": 1.8879, "step": 25889500 }, { "epoch": 74.94, "learning_rate": 1.2542980328362358e-05, "loss": 1.8822, "step": 25890000 }, { "epoch": 74.94, "learning_rate": 1.254225668071508e-05, "loss": 1.8907, "step": 25890500 }, { "epoch": 74.94, "learning_rate": 1.2541533033067804e-05, "loss": 1.8504, "step": 25891000 }, { "epoch": 74.95, "learning_rate": 1.2540809385420526e-05, "loss": 1.9025, "step": 25891500 }, { "epoch": 74.95, "learning_rate": 1.254008573777325e-05, "loss": 1.8796, "step": 25892000 }, { "epoch": 74.95, "learning_rate": 1.2539362090125973e-05, "loss": 1.8623, "step": 25892500 }, { "epoch": 74.95, "learning_rate": 1.2538638442478695e-05, "loss": 1.8838, "step": 25893000 }, { "epoch": 74.95, "learning_rate": 1.253791479483142e-05, "loss": 1.8658, "step": 25893500 }, { "epoch": 74.95, "learning_rate": 1.2537191147184144e-05, "loss": 1.8613, "step": 25894000 }, { "epoch": 74.95, "learning_rate": 1.2536468946832162e-05, "loss": 1.8364, "step": 25894500 }, { "epoch": 74.96, "learning_rate": 1.2535745299184884e-05, "loss": 1.856, "step": 25895000 }, { "epoch": 74.96, "learning_rate": 1.2535021651537606e-05, "loss": 1.8836, "step": 25895500 }, { "epoch": 74.96, "learning_rate": 1.253429800389033e-05, "loss": 1.8975, "step": 25896000 }, { "epoch": 74.96, "learning_rate": 1.2533574356243052e-05, "loss": 1.8516, "step": 25896500 }, { "epoch": 74.96, "learning_rate": 1.2532850708595778e-05, "loss": 1.8671, "step": 25897000 }, { "epoch": 74.96, "learning_rate": 1.25321270609485e-05, "loss": 1.8736, "step": 25897500 }, { "epoch": 74.96, "learning_rate": 1.2531403413301224e-05, "loss": 1.8859, "step": 25898000 }, { "epoch": 74.97, "learning_rate": 1.2530679765653946e-05, "loss": 1.8549, "step": 25898500 }, { "epoch": 74.97, "learning_rate": 1.2529957565301963e-05, "loss": 1.8613, "step": 25899000 }, { "epoch": 74.97, "learning_rate": 1.2529233917654687e-05, "loss": 1.8877, "step": 25899500 }, { "epoch": 74.97, "learning_rate": 1.252851027000741e-05, "loss": 1.8743, "step": 25900000 }, { "epoch": 74.97, "learning_rate": 1.2527786622360135e-05, "loss": 1.8679, "step": 25900500 }, { "epoch": 74.97, "learning_rate": 1.2527062974712857e-05, "loss": 1.8612, "step": 25901000 }, { "epoch": 74.97, "learning_rate": 1.2526339327065581e-05, "loss": 1.8969, "step": 25901500 }, { "epoch": 74.98, "learning_rate": 1.2525615679418304e-05, "loss": 1.8788, "step": 25902000 }, { "epoch": 74.98, "learning_rate": 1.2524892031771026e-05, "loss": 1.888, "step": 25902500 }, { "epoch": 74.98, "learning_rate": 1.2524169831419045e-05, "loss": 1.8706, "step": 25903000 }, { "epoch": 74.98, "learning_rate": 1.2523446183771767e-05, "loss": 1.852, "step": 25903500 }, { "epoch": 74.98, "learning_rate": 1.252272253612449e-05, "loss": 1.8501, "step": 25904000 }, { "epoch": 74.98, "learning_rate": 1.2521998888477215e-05, "loss": 1.883, "step": 25904500 }, { "epoch": 74.98, "learning_rate": 1.2521275240829939e-05, "loss": 1.8713, "step": 25905000 }, { "epoch": 74.99, "learning_rate": 1.2520551593182661e-05, "loss": 1.8901, "step": 25905500 }, { "epoch": 74.99, "learning_rate": 1.2519827945535383e-05, "loss": 1.889, "step": 25906000 }, { "epoch": 74.99, "learning_rate": 1.2519107192478696e-05, "loss": 1.8742, "step": 25906500 }, { "epoch": 74.99, "learning_rate": 1.251838354483142e-05, "loss": 1.8797, "step": 25907000 }, { "epoch": 74.99, "learning_rate": 1.2517659897184142e-05, "loss": 1.8634, "step": 25907500 }, { "epoch": 74.99, "learning_rate": 1.2516936249536864e-05, "loss": 1.8874, "step": 25908000 }, { "epoch": 74.99, "learning_rate": 1.251621260188959e-05, "loss": 1.8814, "step": 25908500 }, { "epoch": 75.0, "learning_rate": 1.2515488954242313e-05, "loss": 1.8743, "step": 25909000 }, { "epoch": 75.0, "learning_rate": 1.2514765306595036e-05, "loss": 1.8666, "step": 25909500 }, { "epoch": 75.0, "learning_rate": 1.251404165894776e-05, "loss": 1.882, "step": 25910000 }, { "epoch": 75.0, "eval_accuracy": 0.6849771861146398, "eval_accuracy_mlm": 0.6533634930602724, "eval_accuracy_nsp": 0.8545599538023676, "eval_loss": 2.194267988204956, "eval_runtime": 331.9824, "eval_samples_per_second": 1314.485, "eval_steps_per_second": 54.771, "step": 25910400 }, { "epoch": 75.0, "learning_rate": 1.2513318011300482e-05, "loss": 1.8657, "step": 25910500 }, { "epoch": 75.0, "learning_rate": 1.2512594363653204e-05, "loss": 1.8631, "step": 25911000 }, { "epoch": 75.0, "learning_rate": 1.2511872163301221e-05, "loss": 1.8706, "step": 25911500 }, { "epoch": 75.0, "learning_rate": 1.2511148515653947e-05, "loss": 1.8469, "step": 25912000 }, { "epoch": 75.01, "learning_rate": 1.2510424868006671e-05, "loss": 1.8602, "step": 25912500 }, { "epoch": 75.01, "learning_rate": 1.2509701220359393e-05, "loss": 1.8533, "step": 25913000 }, { "epoch": 75.01, "learning_rate": 1.2508977572712115e-05, "loss": 1.8486, "step": 25913500 }, { "epoch": 75.01, "learning_rate": 1.250825392506484e-05, "loss": 1.8783, "step": 25914000 }, { "epoch": 75.01, "learning_rate": 1.2507531724712857e-05, "loss": 1.8618, "step": 25914500 }, { "epoch": 75.01, "learning_rate": 1.2506808077065579e-05, "loss": 1.852, "step": 25915000 }, { "epoch": 75.01, "learning_rate": 1.2506084429418304e-05, "loss": 1.861, "step": 25915500 }, { "epoch": 75.02, "learning_rate": 1.2505362229066322e-05, "loss": 1.8511, "step": 25916000 }, { "epoch": 75.02, "learning_rate": 1.2504638581419046e-05, "loss": 1.838, "step": 25916500 }, { "epoch": 75.02, "learning_rate": 1.2503914933771768e-05, "loss": 1.8858, "step": 25917000 }, { "epoch": 75.02, "learning_rate": 1.2503191286124492e-05, "loss": 1.8795, "step": 25917500 }, { "epoch": 75.02, "learning_rate": 1.2502467638477214e-05, "loss": 1.8232, "step": 25918000 }, { "epoch": 75.02, "learning_rate": 1.2501743990829936e-05, "loss": 1.8517, "step": 25918500 }, { "epoch": 75.02, "learning_rate": 1.2501021790477953e-05, "loss": 1.8576, "step": 25919000 }, { "epoch": 75.03, "learning_rate": 1.2500298142830679e-05, "loss": 1.8579, "step": 25919500 }, { "epoch": 75.03, "learning_rate": 1.2499575942478696e-05, "loss": 1.857, "step": 25920000 }, { "epoch": 75.03, "learning_rate": 1.249885229483142e-05, "loss": 1.8596, "step": 25920500 }, { "epoch": 75.03, "learning_rate": 1.2498128647184143e-05, "loss": 1.8606, "step": 25921000 }, { "epoch": 75.03, "learning_rate": 1.2497404999536866e-05, "loss": 1.8501, "step": 25921500 }, { "epoch": 75.03, "learning_rate": 1.2496681351889589e-05, "loss": 1.8645, "step": 25922000 }, { "epoch": 75.04, "learning_rate": 1.2495957704242313e-05, "loss": 1.8384, "step": 25922500 }, { "epoch": 75.04, "learning_rate": 1.2495234056595037e-05, "loss": 1.867, "step": 25923000 }, { "epoch": 75.04, "learning_rate": 1.2494510408947759e-05, "loss": 1.8335, "step": 25923500 }, { "epoch": 75.04, "learning_rate": 1.2493786761300483e-05, "loss": 1.8805, "step": 25924000 }, { "epoch": 75.04, "learning_rate": 1.2493063113653205e-05, "loss": 1.8458, "step": 25924500 }, { "epoch": 75.04, "learning_rate": 1.2492339466005929e-05, "loss": 1.8454, "step": 25925000 }, { "epoch": 75.04, "learning_rate": 1.2491615818358653e-05, "loss": 1.8511, "step": 25925500 }, { "epoch": 75.05, "learning_rate": 1.2490892170711375e-05, "loss": 1.8554, "step": 25926000 }, { "epoch": 75.05, "learning_rate": 1.2490168523064099e-05, "loss": 1.8483, "step": 25926500 }, { "epoch": 75.05, "learning_rate": 1.2489444875416821e-05, "loss": 1.8532, "step": 25927000 }, { "epoch": 75.05, "learning_rate": 1.2488721227769545e-05, "loss": 1.8672, "step": 25927500 }, { "epoch": 75.05, "learning_rate": 1.2487997580122267e-05, "loss": 1.8404, "step": 25928000 }, { "epoch": 75.05, "learning_rate": 1.2487275379770286e-05, "loss": 1.8534, "step": 25928500 }, { "epoch": 75.05, "learning_rate": 1.2486551732123009e-05, "loss": 1.8613, "step": 25929000 }, { "epoch": 75.06, "learning_rate": 1.2485828084475732e-05, "loss": 1.8554, "step": 25929500 }, { "epoch": 75.06, "learning_rate": 1.2485104436828456e-05, "loss": 1.8648, "step": 25930000 }, { "epoch": 75.06, "learning_rate": 1.2484382236476474e-05, "loss": 1.8623, "step": 25930500 }, { "epoch": 75.06, "learning_rate": 1.2483658588829196e-05, "loss": 1.8747, "step": 25931000 }, { "epoch": 75.06, "learning_rate": 1.248293494118192e-05, "loss": 1.8673, "step": 25931500 }, { "epoch": 75.06, "learning_rate": 1.2482211293534644e-05, "loss": 1.8525, "step": 25932000 }, { "epoch": 75.06, "learning_rate": 1.2481489093182661e-05, "loss": 1.8574, "step": 25932500 }, { "epoch": 75.07, "learning_rate": 1.2480765445535383e-05, "loss": 1.8528, "step": 25933000 }, { "epoch": 75.07, "learning_rate": 1.2480043245183402e-05, "loss": 1.863, "step": 25933500 }, { "epoch": 75.07, "learning_rate": 1.247932104483142e-05, "loss": 1.865, "step": 25934000 }, { "epoch": 75.07, "learning_rate": 1.2478597397184143e-05, "loss": 1.8561, "step": 25934500 }, { "epoch": 75.07, "learning_rate": 1.2477873749536866e-05, "loss": 1.8483, "step": 25935000 }, { "epoch": 75.07, "learning_rate": 1.247715010188959e-05, "loss": 1.8724, "step": 25935500 }, { "epoch": 75.07, "learning_rate": 1.2476426454242313e-05, "loss": 1.8627, "step": 25936000 }, { "epoch": 75.08, "learning_rate": 1.247570425389033e-05, "loss": 1.8608, "step": 25936500 }, { "epoch": 75.08, "learning_rate": 1.2474980606243053e-05, "loss": 1.8671, "step": 25937000 }, { "epoch": 75.08, "learning_rate": 1.2474256958595777e-05, "loss": 1.8774, "step": 25937500 }, { "epoch": 75.08, "learning_rate": 1.24735333109485e-05, "loss": 1.8779, "step": 25938000 }, { "epoch": 75.08, "learning_rate": 1.2472809663301223e-05, "loss": 1.8424, "step": 25938500 }, { "epoch": 75.08, "learning_rate": 1.2472086015653947e-05, "loss": 1.8827, "step": 25939000 }, { "epoch": 75.08, "learning_rate": 1.2471362368006669e-05, "loss": 1.865, "step": 25939500 }, { "epoch": 75.09, "learning_rate": 1.2470638720359393e-05, "loss": 1.8649, "step": 25940000 }, { "epoch": 75.09, "learning_rate": 1.2469915072712115e-05, "loss": 1.842, "step": 25940500 }, { "epoch": 75.09, "learning_rate": 1.2469192872360134e-05, "loss": 1.8572, "step": 25941000 }, { "epoch": 75.09, "learning_rate": 1.2468470672008151e-05, "loss": 1.843, "step": 25941500 }, { "epoch": 75.09, "learning_rate": 1.2467747024360875e-05, "loss": 1.86, "step": 25942000 }, { "epoch": 75.09, "learning_rate": 1.2467024824008893e-05, "loss": 1.8509, "step": 25942500 }, { "epoch": 75.09, "learning_rate": 1.2466301176361617e-05, "loss": 1.8793, "step": 25943000 }, { "epoch": 75.1, "learning_rate": 1.2465578976009634e-05, "loss": 1.856, "step": 25943500 }, { "epoch": 75.1, "learning_rate": 1.2464855328362358e-05, "loss": 1.8928, "step": 25944000 }, { "epoch": 75.1, "learning_rate": 1.246413168071508e-05, "loss": 1.8596, "step": 25944500 }, { "epoch": 75.1, "learning_rate": 1.2463408033067804e-05, "loss": 1.8662, "step": 25945000 }, { "epoch": 75.1, "learning_rate": 1.2462684385420526e-05, "loss": 1.8889, "step": 25945500 }, { "epoch": 75.1, "learning_rate": 1.246196073777325e-05, "loss": 1.8653, "step": 25946000 }, { "epoch": 75.1, "learning_rate": 1.2461237090125972e-05, "loss": 1.861, "step": 25946500 }, { "epoch": 75.11, "learning_rate": 1.2460513442478696e-05, "loss": 1.8788, "step": 25947000 }, { "epoch": 75.11, "learning_rate": 1.245978979483142e-05, "loss": 1.8609, "step": 25947500 }, { "epoch": 75.11, "learning_rate": 1.2459066147184142e-05, "loss": 1.8373, "step": 25948000 }, { "epoch": 75.11, "learning_rate": 1.2458342499536866e-05, "loss": 1.8659, "step": 25948500 }, { "epoch": 75.11, "learning_rate": 1.245761885188959e-05, "loss": 1.8532, "step": 25949000 }, { "epoch": 75.11, "learning_rate": 1.2456895204242312e-05, "loss": 1.8686, "step": 25949500 }, { "epoch": 75.11, "learning_rate": 1.2456171556595035e-05, "loss": 1.8456, "step": 25950000 }, { "epoch": 75.12, "learning_rate": 1.2455447908947759e-05, "loss": 1.8625, "step": 25950500 }, { "epoch": 75.12, "learning_rate": 1.2454724261300483e-05, "loss": 1.8808, "step": 25951000 }, { "epoch": 75.12, "learning_rate": 1.2454000613653205e-05, "loss": 1.8556, "step": 25951500 }, { "epoch": 75.12, "learning_rate": 1.2453276966005929e-05, "loss": 1.8565, "step": 25952000 }, { "epoch": 75.12, "learning_rate": 1.2452553318358653e-05, "loss": 1.8471, "step": 25952500 }, { "epoch": 75.12, "learning_rate": 1.245183111800667e-05, "loss": 1.8456, "step": 25953000 }, { "epoch": 75.12, "learning_rate": 1.2451107470359392e-05, "loss": 1.8637, "step": 25953500 }, { "epoch": 75.13, "learning_rate": 1.2450383822712116e-05, "loss": 1.8339, "step": 25954000 }, { "epoch": 75.13, "learning_rate": 1.2449661622360135e-05, "loss": 1.843, "step": 25954500 }, { "epoch": 75.13, "learning_rate": 1.2448937974712857e-05, "loss": 1.8471, "step": 25955000 }, { "epoch": 75.13, "learning_rate": 1.244821432706558e-05, "loss": 1.8849, "step": 25955500 }, { "epoch": 75.13, "learning_rate": 1.2447490679418303e-05, "loss": 1.8607, "step": 25956000 }, { "epoch": 75.13, "learning_rate": 1.2446768479066322e-05, "loss": 1.8361, "step": 25956500 }, { "epoch": 75.13, "learning_rate": 1.2446044831419045e-05, "loss": 1.8824, "step": 25957000 }, { "epoch": 75.14, "learning_rate": 1.2445321183771767e-05, "loss": 1.8782, "step": 25957500 }, { "epoch": 75.14, "learning_rate": 1.244459753612449e-05, "loss": 1.8528, "step": 25958000 }, { "epoch": 75.14, "learning_rate": 1.2443873888477215e-05, "loss": 1.8838, "step": 25958500 }, { "epoch": 75.14, "learning_rate": 1.2443150240829937e-05, "loss": 1.8614, "step": 25959000 }, { "epoch": 75.14, "learning_rate": 1.244242659318266e-05, "loss": 1.858, "step": 25959500 }, { "epoch": 75.14, "learning_rate": 1.2441702945535385e-05, "loss": 1.837, "step": 25960000 }, { "epoch": 75.15, "learning_rate": 1.2440979297888107e-05, "loss": 1.8886, "step": 25960500 }, { "epoch": 75.15, "learning_rate": 1.244025565024083e-05, "loss": 1.8965, "step": 25961000 }, { "epoch": 75.15, "learning_rate": 1.2439532002593555e-05, "loss": 1.8655, "step": 25961500 }, { "epoch": 75.15, "learning_rate": 1.2438808354946277e-05, "loss": 1.8641, "step": 25962000 }, { "epoch": 75.15, "learning_rate": 1.2438084707299e-05, "loss": 1.8535, "step": 25962500 }, { "epoch": 75.15, "learning_rate": 1.2437361059651723e-05, "loss": 1.8648, "step": 25963000 }, { "epoch": 75.15, "learning_rate": 1.2436640306595036e-05, "loss": 1.8681, "step": 25963500 }, { "epoch": 75.16, "learning_rate": 1.243591665894776e-05, "loss": 1.869, "step": 25964000 }, { "epoch": 75.16, "learning_rate": 1.2435193011300482e-05, "loss": 1.8544, "step": 25964500 }, { "epoch": 75.16, "learning_rate": 1.2434469363653206e-05, "loss": 1.8833, "step": 25965000 }, { "epoch": 75.16, "learning_rate": 1.243374571600593e-05, "loss": 1.8612, "step": 25965500 }, { "epoch": 75.16, "learning_rate": 1.2433022068358652e-05, "loss": 1.8884, "step": 25966000 }, { "epoch": 75.16, "learning_rate": 1.2432299868006669e-05, "loss": 1.8735, "step": 25966500 }, { "epoch": 75.16, "learning_rate": 1.2431576220359393e-05, "loss": 1.8772, "step": 25967000 }, { "epoch": 75.17, "learning_rate": 1.2430852572712117e-05, "loss": 1.8481, "step": 25967500 }, { "epoch": 75.17, "learning_rate": 1.2430128925064839e-05, "loss": 1.8529, "step": 25968000 }, { "epoch": 75.17, "learning_rate": 1.2429406724712856e-05, "loss": 1.8924, "step": 25968500 }, { "epoch": 75.17, "learning_rate": 1.242868307706558e-05, "loss": 1.8902, "step": 25969000 }, { "epoch": 75.17, "learning_rate": 1.2427959429418304e-05, "loss": 1.8404, "step": 25969500 }, { "epoch": 75.17, "learning_rate": 1.2427237229066321e-05, "loss": 1.8749, "step": 25970000 }, { "epoch": 75.17, "learning_rate": 1.2426513581419044e-05, "loss": 1.8594, "step": 25970500 }, { "epoch": 75.18, "learning_rate": 1.2425789933771768e-05, "loss": 1.8318, "step": 25971000 }, { "epoch": 75.18, "learning_rate": 1.2425066286124492e-05, "loss": 1.8735, "step": 25971500 }, { "epoch": 75.18, "learning_rate": 1.2424342638477214e-05, "loss": 1.8613, "step": 25972000 }, { "epoch": 75.18, "learning_rate": 1.2423618990829938e-05, "loss": 1.8338, "step": 25972500 }, { "epoch": 75.18, "learning_rate": 1.2422895343182662e-05, "loss": 1.8725, "step": 25973000 }, { "epoch": 75.18, "learning_rate": 1.2422171695535384e-05, "loss": 1.8646, "step": 25973500 }, { "epoch": 75.18, "learning_rate": 1.2421448047888106e-05, "loss": 1.8725, "step": 25974000 }, { "epoch": 75.19, "learning_rate": 1.2420724400240832e-05, "loss": 1.8708, "step": 25974500 }, { "epoch": 75.19, "learning_rate": 1.2420000752593554e-05, "loss": 1.8497, "step": 25975000 }, { "epoch": 75.19, "learning_rate": 1.2419277104946276e-05, "loss": 1.8578, "step": 25975500 }, { "epoch": 75.19, "learning_rate": 1.2418553457299e-05, "loss": 1.8761, "step": 25976000 }, { "epoch": 75.19, "learning_rate": 1.2417829809651724e-05, "loss": 1.8687, "step": 25976500 }, { "epoch": 75.19, "learning_rate": 1.2417106162004446e-05, "loss": 1.8776, "step": 25977000 }, { "epoch": 75.19, "learning_rate": 1.241638251435717e-05, "loss": 1.8865, "step": 25977500 }, { "epoch": 75.2, "learning_rate": 1.2415658866709894e-05, "loss": 1.867, "step": 25978000 }, { "epoch": 75.2, "learning_rate": 1.2414935219062616e-05, "loss": 1.841, "step": 25978500 }, { "epoch": 75.2, "learning_rate": 1.2414213018710634e-05, "loss": 1.8532, "step": 25979000 }, { "epoch": 75.2, "learning_rate": 1.2413489371063358e-05, "loss": 1.8469, "step": 25979500 }, { "epoch": 75.2, "learning_rate": 1.2412765723416081e-05, "loss": 1.8639, "step": 25980000 }, { "epoch": 75.2, "learning_rate": 1.2412042075768804e-05, "loss": 1.8252, "step": 25980500 }, { "epoch": 75.2, "learning_rate": 1.2411318428121526e-05, "loss": 1.8462, "step": 25981000 }, { "epoch": 75.21, "learning_rate": 1.2410594780474252e-05, "loss": 1.8593, "step": 25981500 }, { "epoch": 75.21, "learning_rate": 1.2409872580122269e-05, "loss": 1.8727, "step": 25982000 }, { "epoch": 75.21, "learning_rate": 1.2409148932474991e-05, "loss": 1.8567, "step": 25982500 }, { "epoch": 75.21, "learning_rate": 1.2408425284827713e-05, "loss": 1.874, "step": 25983000 }, { "epoch": 75.21, "learning_rate": 1.2407701637180439e-05, "loss": 1.8482, "step": 25983500 }, { "epoch": 75.21, "learning_rate": 1.2406977989533161e-05, "loss": 1.8501, "step": 25984000 }, { "epoch": 75.21, "learning_rate": 1.2406254341885883e-05, "loss": 1.874, "step": 25984500 }, { "epoch": 75.22, "learning_rate": 1.2405532141533902e-05, "loss": 1.8665, "step": 25985000 }, { "epoch": 75.22, "learning_rate": 1.2404808493886626e-05, "loss": 1.8577, "step": 25985500 }, { "epoch": 75.22, "learning_rate": 1.2404084846239348e-05, "loss": 1.8754, "step": 25986000 }, { "epoch": 75.22, "learning_rate": 1.240336119859207e-05, "loss": 1.8358, "step": 25986500 }, { "epoch": 75.22, "learning_rate": 1.2402637550944796e-05, "loss": 1.8646, "step": 25987000 }, { "epoch": 75.22, "learning_rate": 1.2401913903297519e-05, "loss": 1.8772, "step": 25987500 }, { "epoch": 75.22, "learning_rate": 1.2401191702945536e-05, "loss": 1.8507, "step": 25988000 }, { "epoch": 75.23, "learning_rate": 1.2400468055298258e-05, "loss": 1.8714, "step": 25988500 }, { "epoch": 75.23, "learning_rate": 1.2399744407650984e-05, "loss": 1.8629, "step": 25989000 }, { "epoch": 75.23, "learning_rate": 1.2399020760003706e-05, "loss": 1.8722, "step": 25989500 }, { "epoch": 75.23, "learning_rate": 1.2398297112356428e-05, "loss": 1.8513, "step": 25990000 }, { "epoch": 75.23, "learning_rate": 1.2397574912004447e-05, "loss": 1.8904, "step": 25990500 }, { "epoch": 75.23, "learning_rate": 1.2396851264357171e-05, "loss": 1.8719, "step": 25991000 }, { "epoch": 75.23, "learning_rate": 1.2396127616709893e-05, "loss": 1.8632, "step": 25991500 }, { "epoch": 75.24, "learning_rate": 1.2395403969062615e-05, "loss": 1.8626, "step": 25992000 }, { "epoch": 75.24, "learning_rate": 1.239468032141534e-05, "loss": 1.8869, "step": 25992500 }, { "epoch": 75.24, "learning_rate": 1.2393956673768063e-05, "loss": 1.8801, "step": 25993000 }, { "epoch": 75.24, "learning_rate": 1.239323447341608e-05, "loss": 1.866, "step": 25993500 }, { "epoch": 75.24, "learning_rate": 1.2392510825768803e-05, "loss": 1.8664, "step": 25994000 }, { "epoch": 75.24, "learning_rate": 1.2391787178121527e-05, "loss": 1.836, "step": 25994500 }, { "epoch": 75.24, "learning_rate": 1.239106353047425e-05, "loss": 1.869, "step": 25995000 }, { "epoch": 75.25, "learning_rate": 1.2390339882826973e-05, "loss": 1.8954, "step": 25995500 }, { "epoch": 75.25, "learning_rate": 1.238961768247499e-05, "loss": 1.8784, "step": 25996000 }, { "epoch": 75.25, "learning_rate": 1.2388895482123009e-05, "loss": 1.8577, "step": 25996500 }, { "epoch": 75.25, "learning_rate": 1.2388171834475733e-05, "loss": 1.8729, "step": 25997000 }, { "epoch": 75.25, "learning_rate": 1.2387448186828455e-05, "loss": 1.8624, "step": 25997500 }, { "epoch": 75.25, "learning_rate": 1.238672453918118e-05, "loss": 1.8456, "step": 25998000 }, { "epoch": 75.26, "learning_rate": 1.2386000891533903e-05, "loss": 1.8896, "step": 25998500 }, { "epoch": 75.26, "learning_rate": 1.2385277243886625e-05, "loss": 1.8629, "step": 25999000 }, { "epoch": 75.26, "learning_rate": 1.2384553596239348e-05, "loss": 1.8618, "step": 25999500 }, { "epoch": 75.26, "learning_rate": 1.2383829948592072e-05, "loss": 1.8961, "step": 26000000 }, { "epoch": 75.26, "learning_rate": 1.2383106300944795e-05, "loss": 1.8808, "step": 26000500 }, { "epoch": 75.26, "learning_rate": 1.2382382653297518e-05, "loss": 1.8821, "step": 26001000 }, { "epoch": 75.26, "learning_rate": 1.2381659005650242e-05, "loss": 1.8542, "step": 26001500 }, { "epoch": 75.27, "learning_rate": 1.2380935358002966e-05, "loss": 1.8805, "step": 26002000 }, { "epoch": 75.27, "learning_rate": 1.2380211710355688e-05, "loss": 1.8515, "step": 26002500 }, { "epoch": 75.27, "learning_rate": 1.2379489510003705e-05, "loss": 1.8531, "step": 26003000 }, { "epoch": 75.27, "learning_rate": 1.2378765862356429e-05, "loss": 1.8443, "step": 26003500 }, { "epoch": 75.27, "learning_rate": 1.2378042214709153e-05, "loss": 1.8481, "step": 26004000 }, { "epoch": 75.27, "learning_rate": 1.2377318567061875e-05, "loss": 1.8896, "step": 26004500 }, { "epoch": 75.27, "learning_rate": 1.2376594919414599e-05, "loss": 1.8672, "step": 26005000 }, { "epoch": 75.28, "learning_rate": 1.2375872719062616e-05, "loss": 1.8606, "step": 26005500 }, { "epoch": 75.28, "learning_rate": 1.2375150518710635e-05, "loss": 1.9027, "step": 26006000 }, { "epoch": 75.28, "learning_rate": 1.2374426871063357e-05, "loss": 1.8714, "step": 26006500 }, { "epoch": 75.28, "learning_rate": 1.237370322341608e-05, "loss": 1.86, "step": 26007000 }, { "epoch": 75.28, "learning_rate": 1.2372982470359392e-05, "loss": 1.87, "step": 26007500 }, { "epoch": 75.28, "learning_rate": 1.2372258822712116e-05, "loss": 1.8746, "step": 26008000 }, { "epoch": 75.28, "learning_rate": 1.237153517506484e-05, "loss": 1.8797, "step": 26008500 }, { "epoch": 75.29, "learning_rate": 1.2370811527417562e-05, "loss": 1.852, "step": 26009000 }, { "epoch": 75.29, "learning_rate": 1.2370087879770286e-05, "loss": 1.8758, "step": 26009500 }, { "epoch": 75.29, "learning_rate": 1.236936423212301e-05, "loss": 1.8469, "step": 26010000 }, { "epoch": 75.29, "learning_rate": 1.2368640584475732e-05, "loss": 1.8495, "step": 26010500 }, { "epoch": 75.29, "learning_rate": 1.2367916936828456e-05, "loss": 1.8779, "step": 26011000 }, { "epoch": 75.29, "learning_rate": 1.2367193289181178e-05, "loss": 1.878, "step": 26011500 }, { "epoch": 75.29, "learning_rate": 1.2366469641533902e-05, "loss": 1.855, "step": 26012000 }, { "epoch": 75.3, "learning_rate": 1.2365745993886624e-05, "loss": 1.8476, "step": 26012500 }, { "epoch": 75.3, "learning_rate": 1.2365022346239348e-05, "loss": 1.8428, "step": 26013000 }, { "epoch": 75.3, "learning_rate": 1.2364300145887367e-05, "loss": 1.8436, "step": 26013500 }, { "epoch": 75.3, "learning_rate": 1.236357649824009e-05, "loss": 1.8535, "step": 26014000 }, { "epoch": 75.3, "learning_rate": 1.2362854297888107e-05, "loss": 1.8592, "step": 26014500 }, { "epoch": 75.3, "learning_rate": 1.236213065024083e-05, "loss": 1.8661, "step": 26015000 }, { "epoch": 75.3, "learning_rate": 1.2361407002593555e-05, "loss": 1.8853, "step": 26015500 }, { "epoch": 75.31, "learning_rate": 1.2360683354946277e-05, "loss": 1.8435, "step": 26016000 }, { "epoch": 75.31, "learning_rate": 1.2359959707299e-05, "loss": 1.8549, "step": 26016500 }, { "epoch": 75.31, "learning_rate": 1.2359236059651723e-05, "loss": 1.8609, "step": 26017000 }, { "epoch": 75.31, "learning_rate": 1.2358512412004447e-05, "loss": 1.8466, "step": 26017500 }, { "epoch": 75.31, "learning_rate": 1.235778876435717e-05, "loss": 1.8585, "step": 26018000 }, { "epoch": 75.31, "learning_rate": 1.2357065116709893e-05, "loss": 1.8596, "step": 26018500 }, { "epoch": 75.31, "learning_rate": 1.235634291635791e-05, "loss": 1.8373, "step": 26019000 }, { "epoch": 75.32, "learning_rate": 1.2355619268710634e-05, "loss": 1.8541, "step": 26019500 }, { "epoch": 75.32, "learning_rate": 1.2354895621063357e-05, "loss": 1.8644, "step": 26020000 }, { "epoch": 75.32, "learning_rate": 1.235417197341608e-05, "loss": 1.8495, "step": 26020500 }, { "epoch": 75.32, "learning_rate": 1.2353448325768804e-05, "loss": 1.844, "step": 26021000 }, { "epoch": 75.32, "learning_rate": 1.2352724678121527e-05, "loss": 1.8437, "step": 26021500 }, { "epoch": 75.32, "learning_rate": 1.235200103047425e-05, "loss": 1.8601, "step": 26022000 }, { "epoch": 75.32, "learning_rate": 1.2351280277417563e-05, "loss": 1.8714, "step": 26022500 }, { "epoch": 75.33, "learning_rate": 1.2350556629770287e-05, "loss": 1.8646, "step": 26023000 }, { "epoch": 75.33, "learning_rate": 1.2349832982123009e-05, "loss": 1.8728, "step": 26023500 }, { "epoch": 75.33, "learning_rate": 1.2349109334475733e-05, "loss": 1.8547, "step": 26024000 }, { "epoch": 75.33, "learning_rate": 1.2348385686828455e-05, "loss": 1.837, "step": 26024500 }, { "epoch": 75.33, "learning_rate": 1.2347663486476474e-05, "loss": 1.8958, "step": 26025000 }, { "epoch": 75.33, "learning_rate": 1.2346939838829196e-05, "loss": 1.8626, "step": 26025500 }, { "epoch": 75.33, "learning_rate": 1.234621619118192e-05, "loss": 1.8454, "step": 26026000 }, { "epoch": 75.34, "learning_rate": 1.2345492543534642e-05, "loss": 1.8863, "step": 26026500 }, { "epoch": 75.34, "learning_rate": 1.2344768895887366e-05, "loss": 1.8458, "step": 26027000 }, { "epoch": 75.34, "learning_rate": 1.234404524824009e-05, "loss": 1.8661, "step": 26027500 }, { "epoch": 75.34, "learning_rate": 1.2343321600592813e-05, "loss": 1.8914, "step": 26028000 }, { "epoch": 75.34, "learning_rate": 1.2342597952945537e-05, "loss": 1.8875, "step": 26028500 }, { "epoch": 75.34, "learning_rate": 1.2341874305298259e-05, "loss": 1.8802, "step": 26029000 }, { "epoch": 75.34, "learning_rate": 1.2341152104946278e-05, "loss": 1.8666, "step": 26029500 }, { "epoch": 75.35, "learning_rate": 1.2340428457299e-05, "loss": 1.8722, "step": 26030000 }, { "epoch": 75.35, "learning_rate": 1.2339704809651724e-05, "loss": 1.8718, "step": 26030500 }, { "epoch": 75.35, "learning_rate": 1.2338981162004446e-05, "loss": 1.8513, "step": 26031000 }, { "epoch": 75.35, "learning_rate": 1.2338258961652465e-05, "loss": 1.8488, "step": 26031500 }, { "epoch": 75.35, "learning_rate": 1.2337535314005187e-05, "loss": 1.8667, "step": 26032000 }, { "epoch": 75.35, "learning_rate": 1.2336811666357911e-05, "loss": 1.8707, "step": 26032500 }, { "epoch": 75.35, "learning_rate": 1.2336088018710633e-05, "loss": 1.8643, "step": 26033000 }, { "epoch": 75.36, "learning_rate": 1.2335364371063357e-05, "loss": 1.8552, "step": 26033500 }, { "epoch": 75.36, "learning_rate": 1.2334640723416081e-05, "loss": 1.8898, "step": 26034000 }, { "epoch": 75.36, "learning_rate": 1.2333919970359394e-05, "loss": 1.8622, "step": 26034500 }, { "epoch": 75.36, "learning_rate": 1.2333196322712116e-05, "loss": 1.8783, "step": 26035000 }, { "epoch": 75.36, "learning_rate": 1.233247267506484e-05, "loss": 1.8641, "step": 26035500 }, { "epoch": 75.36, "learning_rate": 1.2331749027417562e-05, "loss": 1.8548, "step": 26036000 }, { "epoch": 75.37, "learning_rate": 1.2331025379770286e-05, "loss": 1.8843, "step": 26036500 }, { "epoch": 75.37, "learning_rate": 1.233030173212301e-05, "loss": 1.8965, "step": 26037000 }, { "epoch": 75.37, "learning_rate": 1.2329579531771027e-05, "loss": 1.8715, "step": 26037500 }, { "epoch": 75.37, "learning_rate": 1.2328857331419044e-05, "loss": 1.8607, "step": 26038000 }, { "epoch": 75.37, "learning_rate": 1.2328133683771768e-05, "loss": 1.8576, "step": 26038500 }, { "epoch": 75.37, "learning_rate": 1.2327411483419785e-05, "loss": 1.8609, "step": 26039000 }, { "epoch": 75.37, "learning_rate": 1.232668783577251e-05, "loss": 1.8234, "step": 26039500 }, { "epoch": 75.38, "learning_rate": 1.2325964188125232e-05, "loss": 1.8781, "step": 26040000 }, { "epoch": 75.38, "learning_rate": 1.2325240540477956e-05, "loss": 1.8681, "step": 26040500 }, { "epoch": 75.38, "learning_rate": 1.232451689283068e-05, "loss": 1.8812, "step": 26041000 }, { "epoch": 75.38, "learning_rate": 1.2323793245183402e-05, "loss": 1.8763, "step": 26041500 }, { "epoch": 75.38, "learning_rate": 1.2323069597536126e-05, "loss": 1.8666, "step": 26042000 }, { "epoch": 75.38, "learning_rate": 1.2322345949888848e-05, "loss": 1.8718, "step": 26042500 }, { "epoch": 75.38, "learning_rate": 1.2321622302241572e-05, "loss": 1.8391, "step": 26043000 }, { "epoch": 75.39, "learning_rate": 1.2320898654594294e-05, "loss": 1.8567, "step": 26043500 }, { "epoch": 75.39, "learning_rate": 1.2320175006947018e-05, "loss": 1.8731, "step": 26044000 }, { "epoch": 75.39, "learning_rate": 1.2319451359299742e-05, "loss": 1.8843, "step": 26044500 }, { "epoch": 75.39, "learning_rate": 1.2318727711652464e-05, "loss": 1.8766, "step": 26045000 }, { "epoch": 75.39, "learning_rate": 1.2318004064005188e-05, "loss": 1.8344, "step": 26045500 }, { "epoch": 75.39, "learning_rate": 1.2317281863653205e-05, "loss": 1.8702, "step": 26046000 }, { "epoch": 75.39, "learning_rate": 1.231655821600593e-05, "loss": 1.8609, "step": 26046500 }, { "epoch": 75.4, "learning_rate": 1.2315836015653946e-05, "loss": 1.8442, "step": 26047000 }, { "epoch": 75.4, "learning_rate": 1.2315112368006669e-05, "loss": 1.8747, "step": 26047500 }, { "epoch": 75.4, "learning_rate": 1.2314388720359393e-05, "loss": 1.8702, "step": 26048000 }, { "epoch": 75.4, "learning_rate": 1.2313665072712117e-05, "loss": 1.853, "step": 26048500 }, { "epoch": 75.4, "learning_rate": 1.2312941425064839e-05, "loss": 1.8286, "step": 26049000 }, { "epoch": 75.4, "learning_rate": 1.2312219224712858e-05, "loss": 1.8863, "step": 26049500 }, { "epoch": 75.4, "learning_rate": 1.231149557706558e-05, "loss": 1.8671, "step": 26050000 }, { "epoch": 75.41, "learning_rate": 1.2310771929418304e-05, "loss": 1.8553, "step": 26050500 }, { "epoch": 75.41, "learning_rate": 1.2310048281771026e-05, "loss": 1.8686, "step": 26051000 }, { "epoch": 75.41, "learning_rate": 1.230932463412375e-05, "loss": 1.8502, "step": 26051500 }, { "epoch": 75.41, "learning_rate": 1.2308600986476474e-05, "loss": 1.845, "step": 26052000 }, { "epoch": 75.41, "learning_rate": 1.2307877338829196e-05, "loss": 1.8707, "step": 26052500 }, { "epoch": 75.41, "learning_rate": 1.230715369118192e-05, "loss": 1.8662, "step": 26053000 }, { "epoch": 75.41, "learning_rate": 1.2306431490829937e-05, "loss": 1.8513, "step": 26053500 }, { "epoch": 75.42, "learning_rate": 1.2305707843182661e-05, "loss": 1.8822, "step": 26054000 }, { "epoch": 75.42, "learning_rate": 1.2304984195535384e-05, "loss": 1.8757, "step": 26054500 }, { "epoch": 75.42, "learning_rate": 1.2304260547888107e-05, "loss": 1.8554, "step": 26055000 }, { "epoch": 75.42, "learning_rate": 1.2303536900240831e-05, "loss": 1.8514, "step": 26055500 }, { "epoch": 75.42, "learning_rate": 1.2302813252593554e-05, "loss": 1.8752, "step": 26056000 }, { "epoch": 75.42, "learning_rate": 1.2302089604946276e-05, "loss": 1.8705, "step": 26056500 }, { "epoch": 75.42, "learning_rate": 1.2301365957299e-05, "loss": 1.8332, "step": 26057000 }, { "epoch": 75.43, "learning_rate": 1.2300642309651724e-05, "loss": 1.8752, "step": 26057500 }, { "epoch": 75.43, "learning_rate": 1.2299918662004446e-05, "loss": 1.8391, "step": 26058000 }, { "epoch": 75.43, "learning_rate": 1.2299196461652463e-05, "loss": 1.8313, "step": 26058500 }, { "epoch": 75.43, "learning_rate": 1.2298472814005189e-05, "loss": 1.8653, "step": 26059000 }, { "epoch": 75.43, "learning_rate": 1.2297749166357911e-05, "loss": 1.8883, "step": 26059500 }, { "epoch": 75.43, "learning_rate": 1.2297025518710633e-05, "loss": 1.8354, "step": 26060000 }, { "epoch": 75.43, "learning_rate": 1.2296301871063357e-05, "loss": 1.8781, "step": 26060500 }, { "epoch": 75.44, "learning_rate": 1.2295579670711376e-05, "loss": 1.8478, "step": 26061000 }, { "epoch": 75.44, "learning_rate": 1.2294856023064098e-05, "loss": 1.8405, "step": 26061500 }, { "epoch": 75.44, "learning_rate": 1.229413237541682e-05, "loss": 1.8575, "step": 26062000 }, { "epoch": 75.44, "learning_rate": 1.229341017506484e-05, "loss": 1.8756, "step": 26062500 }, { "epoch": 75.44, "learning_rate": 1.2292686527417564e-05, "loss": 1.8606, "step": 26063000 }, { "epoch": 75.44, "learning_rate": 1.2291962879770286e-05, "loss": 1.8363, "step": 26063500 }, { "epoch": 75.44, "learning_rate": 1.2291239232123008e-05, "loss": 1.8615, "step": 26064000 }, { "epoch": 75.45, "learning_rate": 1.2290515584475732e-05, "loss": 1.856, "step": 26064500 }, { "epoch": 75.45, "learning_rate": 1.2289791936828456e-05, "loss": 1.8674, "step": 26065000 }, { "epoch": 75.45, "learning_rate": 1.2289069736476473e-05, "loss": 1.8699, "step": 26065500 }, { "epoch": 75.45, "learning_rate": 1.2288346088829195e-05, "loss": 1.8774, "step": 26066000 }, { "epoch": 75.45, "learning_rate": 1.2287622441181921e-05, "loss": 1.8576, "step": 26066500 }, { "epoch": 75.45, "learning_rate": 1.2286898793534643e-05, "loss": 1.8671, "step": 26067000 }, { "epoch": 75.45, "learning_rate": 1.2286175145887365e-05, "loss": 1.8918, "step": 26067500 }, { "epoch": 75.46, "learning_rate": 1.228545149824009e-05, "loss": 1.8567, "step": 26068000 }, { "epoch": 75.46, "learning_rate": 1.2284727850592813e-05, "loss": 1.8221, "step": 26068500 }, { "epoch": 75.46, "learning_rate": 1.228400565024083e-05, "loss": 1.8529, "step": 26069000 }, { "epoch": 75.46, "learning_rate": 1.2283282002593553e-05, "loss": 1.8732, "step": 26069500 }, { "epoch": 75.46, "learning_rate": 1.2282558354946277e-05, "loss": 1.8691, "step": 26070000 }, { "epoch": 75.46, "learning_rate": 1.2281834707299e-05, "loss": 1.889, "step": 26070500 }, { "epoch": 75.46, "learning_rate": 1.2281111059651723e-05, "loss": 1.8545, "step": 26071000 }, { "epoch": 75.47, "learning_rate": 1.2280387412004447e-05, "loss": 1.8784, "step": 26071500 }, { "epoch": 75.47, "learning_rate": 1.2279665211652466e-05, "loss": 1.8626, "step": 26072000 }, { "epoch": 75.47, "learning_rate": 1.2278941564005188e-05, "loss": 1.8882, "step": 26072500 }, { "epoch": 75.47, "learning_rate": 1.227821791635791e-05, "loss": 1.8639, "step": 26073000 }, { "epoch": 75.47, "learning_rate": 1.2277494268710634e-05, "loss": 1.8761, "step": 26073500 }, { "epoch": 75.47, "learning_rate": 1.2276770621063358e-05, "loss": 1.8612, "step": 26074000 }, { "epoch": 75.48, "learning_rate": 1.227604697341608e-05, "loss": 1.8762, "step": 26074500 }, { "epoch": 75.48, "learning_rate": 1.2275324773064098e-05, "loss": 1.873, "step": 26075000 }, { "epoch": 75.48, "learning_rate": 1.2274602572712115e-05, "loss": 1.8745, "step": 26075500 }, { "epoch": 75.48, "learning_rate": 1.227387892506484e-05, "loss": 1.8683, "step": 26076000 }, { "epoch": 75.48, "learning_rate": 1.2273155277417563e-05, "loss": 1.8695, "step": 26076500 }, { "epoch": 75.48, "learning_rate": 1.2272431629770285e-05, "loss": 1.8928, "step": 26077000 }, { "epoch": 75.48, "learning_rate": 1.227170798212301e-05, "loss": 1.8721, "step": 26077500 }, { "epoch": 75.49, "learning_rate": 1.2270984334475733e-05, "loss": 1.86, "step": 26078000 }, { "epoch": 75.49, "learning_rate": 1.2270260686828455e-05, "loss": 1.8662, "step": 26078500 }, { "epoch": 75.49, "learning_rate": 1.2269537039181179e-05, "loss": 1.8479, "step": 26079000 }, { "epoch": 75.49, "learning_rate": 1.2268813391533903e-05, "loss": 1.8593, "step": 26079500 }, { "epoch": 75.49, "learning_rate": 1.2268089743886625e-05, "loss": 1.8785, "step": 26080000 }, { "epoch": 75.49, "learning_rate": 1.2267367543534642e-05, "loss": 1.8419, "step": 26080500 }, { "epoch": 75.49, "learning_rate": 1.226664534318266e-05, "loss": 1.8778, "step": 26081000 }, { "epoch": 75.5, "learning_rate": 1.2265921695535385e-05, "loss": 1.8494, "step": 26081500 }, { "epoch": 75.5, "learning_rate": 1.2265198047888107e-05, "loss": 1.8665, "step": 26082000 }, { "epoch": 75.5, "learning_rate": 1.226447440024083e-05, "loss": 1.8308, "step": 26082500 }, { "epoch": 75.5, "learning_rate": 1.2263752199888847e-05, "loss": 1.8569, "step": 26083000 }, { "epoch": 75.5, "learning_rate": 1.2263028552241572e-05, "loss": 1.8746, "step": 26083500 }, { "epoch": 75.5, "learning_rate": 1.2262304904594295e-05, "loss": 1.8787, "step": 26084000 }, { "epoch": 75.5, "learning_rate": 1.2261581256947017e-05, "loss": 1.8724, "step": 26084500 }, { "epoch": 75.51, "learning_rate": 1.2260857609299743e-05, "loss": 1.8831, "step": 26085000 }, { "epoch": 75.51, "learning_rate": 1.226013540894776e-05, "loss": 1.8679, "step": 26085500 }, { "epoch": 75.51, "learning_rate": 1.2259411761300482e-05, "loss": 1.8542, "step": 26086000 }, { "epoch": 75.51, "learning_rate": 1.2258688113653204e-05, "loss": 1.872, "step": 26086500 }, { "epoch": 75.51, "learning_rate": 1.225796446600593e-05, "loss": 1.8711, "step": 26087000 }, { "epoch": 75.51, "learning_rate": 1.2257240818358652e-05, "loss": 1.8681, "step": 26087500 }, { "epoch": 75.51, "learning_rate": 1.2256517170711374e-05, "loss": 1.8714, "step": 26088000 }, { "epoch": 75.52, "learning_rate": 1.2255793523064098e-05, "loss": 1.8435, "step": 26088500 }, { "epoch": 75.52, "learning_rate": 1.2255069875416822e-05, "loss": 1.8906, "step": 26089000 }, { "epoch": 75.52, "learning_rate": 1.2254346227769544e-05, "loss": 1.8936, "step": 26089500 }, { "epoch": 75.52, "learning_rate": 1.2253622580122268e-05, "loss": 1.869, "step": 26090000 }, { "epoch": 75.52, "learning_rate": 1.2252898932474992e-05, "loss": 1.874, "step": 26090500 }, { "epoch": 75.52, "learning_rate": 1.2252175284827715e-05, "loss": 1.8617, "step": 26091000 }, { "epoch": 75.52, "learning_rate": 1.2251451637180437e-05, "loss": 1.8732, "step": 26091500 }, { "epoch": 75.53, "learning_rate": 1.2250727989533162e-05, "loss": 1.848, "step": 26092000 }, { "epoch": 75.53, "learning_rate": 1.2250004341885885e-05, "loss": 1.8547, "step": 26092500 }, { "epoch": 75.53, "learning_rate": 1.2249280694238607e-05, "loss": 1.8621, "step": 26093000 }, { "epoch": 75.53, "learning_rate": 1.224855704659133e-05, "loss": 1.8835, "step": 26093500 }, { "epoch": 75.53, "learning_rate": 1.2247836293534643e-05, "loss": 1.881, "step": 26094000 }, { "epoch": 75.53, "learning_rate": 1.2247112645887367e-05, "loss": 1.8373, "step": 26094500 }, { "epoch": 75.53, "learning_rate": 1.224638899824009e-05, "loss": 1.8615, "step": 26095000 }, { "epoch": 75.54, "learning_rate": 1.2245665350592813e-05, "loss": 1.9021, "step": 26095500 }, { "epoch": 75.54, "learning_rate": 1.2244941702945537e-05, "loss": 1.8654, "step": 26096000 }, { "epoch": 75.54, "learning_rate": 1.224421805529826e-05, "loss": 1.8497, "step": 26096500 }, { "epoch": 75.54, "learning_rate": 1.2243494407650982e-05, "loss": 1.8658, "step": 26097000 }, { "epoch": 75.54, "learning_rate": 1.2242770760003705e-05, "loss": 1.8366, "step": 26097500 }, { "epoch": 75.54, "learning_rate": 1.2242048559651724e-05, "loss": 1.863, "step": 26098000 }, { "epoch": 75.54, "learning_rate": 1.2241324912004447e-05, "loss": 1.8565, "step": 26098500 }, { "epoch": 75.55, "learning_rate": 1.2240601264357169e-05, "loss": 1.8644, "step": 26099000 }, { "epoch": 75.55, "learning_rate": 1.2239877616709893e-05, "loss": 1.8565, "step": 26099500 }, { "epoch": 75.55, "learning_rate": 1.2239153969062617e-05, "loss": 1.8857, "step": 26100000 }, { "epoch": 75.55, "learning_rate": 1.2238430321415339e-05, "loss": 1.845, "step": 26100500 }, { "epoch": 75.55, "learning_rate": 1.2237706673768063e-05, "loss": 1.8798, "step": 26101000 }, { "epoch": 75.55, "learning_rate": 1.2236983026120787e-05, "loss": 1.8821, "step": 26101500 }, { "epoch": 75.55, "learning_rate": 1.2236260825768804e-05, "loss": 1.8472, "step": 26102000 }, { "epoch": 75.56, "learning_rate": 1.2235537178121526e-05, "loss": 1.878, "step": 26102500 }, { "epoch": 75.56, "learning_rate": 1.223481353047425e-05, "loss": 1.8709, "step": 26103000 }, { "epoch": 75.56, "learning_rate": 1.2234089882826974e-05, "loss": 1.8866, "step": 26103500 }, { "epoch": 75.56, "learning_rate": 1.2233366235179696e-05, "loss": 1.8675, "step": 26104000 }, { "epoch": 75.56, "learning_rate": 1.2232644034827714e-05, "loss": 1.8312, "step": 26104500 }, { "epoch": 75.56, "learning_rate": 1.2231920387180438e-05, "loss": 1.8648, "step": 26105000 }, { "epoch": 75.56, "learning_rate": 1.2231198186828457e-05, "loss": 1.8687, "step": 26105500 }, { "epoch": 75.57, "learning_rate": 1.2230474539181179e-05, "loss": 1.8501, "step": 26106000 }, { "epoch": 75.57, "learning_rate": 1.2229752338829196e-05, "loss": 1.8749, "step": 26106500 }, { "epoch": 75.57, "learning_rate": 1.222902869118192e-05, "loss": 1.8507, "step": 26107000 }, { "epoch": 75.57, "learning_rate": 1.2228305043534644e-05, "loss": 1.8842, "step": 26107500 }, { "epoch": 75.57, "learning_rate": 1.2227581395887366e-05, "loss": 1.8548, "step": 26108000 }, { "epoch": 75.57, "learning_rate": 1.222685774824009e-05, "loss": 1.8799, "step": 26108500 }, { "epoch": 75.57, "learning_rate": 1.2226134100592812e-05, "loss": 1.8446, "step": 26109000 }, { "epoch": 75.58, "learning_rate": 1.2225411900240831e-05, "loss": 1.8878, "step": 26109500 }, { "epoch": 75.58, "learning_rate": 1.2224688252593553e-05, "loss": 1.8598, "step": 26110000 }, { "epoch": 75.58, "learning_rate": 1.2223964604946277e-05, "loss": 1.8508, "step": 26110500 }, { "epoch": 75.58, "learning_rate": 1.2223240957299001e-05, "loss": 1.8826, "step": 26111000 }, { "epoch": 75.58, "learning_rate": 1.2222517309651724e-05, "loss": 1.8762, "step": 26111500 }, { "epoch": 75.58, "learning_rate": 1.2221793662004446e-05, "loss": 1.873, "step": 26112000 }, { "epoch": 75.58, "learning_rate": 1.222107001435717e-05, "loss": 1.8335, "step": 26112500 }, { "epoch": 75.59, "learning_rate": 1.2220346366709894e-05, "loss": 1.871, "step": 26113000 }, { "epoch": 75.59, "learning_rate": 1.2219622719062616e-05, "loss": 1.8576, "step": 26113500 }, { "epoch": 75.59, "learning_rate": 1.221889907141534e-05, "loss": 1.8731, "step": 26114000 }, { "epoch": 75.59, "learning_rate": 1.2218175423768064e-05, "loss": 1.8722, "step": 26114500 }, { "epoch": 75.59, "learning_rate": 1.2217453223416081e-05, "loss": 1.8517, "step": 26115000 }, { "epoch": 75.59, "learning_rate": 1.2216729575768803e-05, "loss": 1.868, "step": 26115500 }, { "epoch": 75.6, "learning_rate": 1.2216005928121527e-05, "loss": 1.86, "step": 26116000 }, { "epoch": 75.6, "learning_rate": 1.2215283727769544e-05, "loss": 1.848, "step": 26116500 }, { "epoch": 75.6, "learning_rate": 1.2214560080122268e-05, "loss": 1.8436, "step": 26117000 }, { "epoch": 75.6, "learning_rate": 1.221383643247499e-05, "loss": 1.8783, "step": 26117500 }, { "epoch": 75.6, "learning_rate": 1.2213112784827714e-05, "loss": 1.866, "step": 26118000 }, { "epoch": 75.6, "learning_rate": 1.2212389137180438e-05, "loss": 1.8665, "step": 26118500 }, { "epoch": 75.6, "learning_rate": 1.221166548953316e-05, "loss": 1.8875, "step": 26119000 }, { "epoch": 75.61, "learning_rate": 1.2210941841885885e-05, "loss": 1.8661, "step": 26119500 }, { "epoch": 75.61, "learning_rate": 1.2210218194238607e-05, "loss": 1.8634, "step": 26120000 }, { "epoch": 75.61, "learning_rate": 1.220949454659133e-05, "loss": 1.8695, "step": 26120500 }, { "epoch": 75.61, "learning_rate": 1.2208770898944055e-05, "loss": 1.877, "step": 26121000 }, { "epoch": 75.61, "learning_rate": 1.2208047251296777e-05, "loss": 1.8371, "step": 26121500 }, { "epoch": 75.61, "learning_rate": 1.22073236036495e-05, "loss": 1.8453, "step": 26122000 }, { "epoch": 75.61, "learning_rate": 1.2206601403297518e-05, "loss": 1.8622, "step": 26122500 }, { "epoch": 75.62, "learning_rate": 1.2205877755650242e-05, "loss": 1.8931, "step": 26123000 }, { "epoch": 75.62, "learning_rate": 1.2205154108002964e-05, "loss": 1.8543, "step": 26123500 }, { "epoch": 75.62, "learning_rate": 1.2204431907650983e-05, "loss": 1.8579, "step": 26124000 }, { "epoch": 75.62, "learning_rate": 1.2203708260003705e-05, "loss": 1.8495, "step": 26124500 }, { "epoch": 75.62, "learning_rate": 1.220298461235643e-05, "loss": 1.8702, "step": 26125000 }, { "epoch": 75.62, "learning_rate": 1.2202262412004447e-05, "loss": 1.8622, "step": 26125500 }, { "epoch": 75.62, "learning_rate": 1.220153876435717e-05, "loss": 1.8547, "step": 26126000 }, { "epoch": 75.63, "learning_rate": 1.2200816564005188e-05, "loss": 1.8397, "step": 26126500 }, { "epoch": 75.63, "learning_rate": 1.2200092916357912e-05, "loss": 1.8636, "step": 26127000 }, { "epoch": 75.63, "learning_rate": 1.2199369268710634e-05, "loss": 1.8821, "step": 26127500 }, { "epoch": 75.63, "learning_rate": 1.2198645621063358e-05, "loss": 1.8592, "step": 26128000 }, { "epoch": 75.63, "learning_rate": 1.219792197341608e-05, "loss": 1.8382, "step": 26128500 }, { "epoch": 75.63, "learning_rate": 1.2197198325768804e-05, "loss": 1.9039, "step": 26129000 }, { "epoch": 75.63, "learning_rate": 1.2196474678121526e-05, "loss": 1.8798, "step": 26129500 }, { "epoch": 75.64, "learning_rate": 1.219575103047425e-05, "loss": 1.8598, "step": 26130000 }, { "epoch": 75.64, "learning_rate": 1.2195027382826974e-05, "loss": 1.8853, "step": 26130500 }, { "epoch": 75.64, "learning_rate": 1.2194305182474991e-05, "loss": 1.8712, "step": 26131000 }, { "epoch": 75.64, "learning_rate": 1.2193581534827715e-05, "loss": 1.8717, "step": 26131500 }, { "epoch": 75.64, "learning_rate": 1.2192857887180437e-05, "loss": 1.8549, "step": 26132000 }, { "epoch": 75.64, "learning_rate": 1.2192134239533161e-05, "loss": 1.8526, "step": 26132500 }, { "epoch": 75.64, "learning_rate": 1.2191410591885884e-05, "loss": 1.8802, "step": 26133000 }, { "epoch": 75.65, "learning_rate": 1.2190688391533903e-05, "loss": 1.8597, "step": 26133500 }, { "epoch": 75.65, "learning_rate": 1.2189964743886625e-05, "loss": 1.8806, "step": 26134000 }, { "epoch": 75.65, "learning_rate": 1.2189241096239349e-05, "loss": 1.8758, "step": 26134500 }, { "epoch": 75.65, "learning_rate": 1.2188517448592071e-05, "loss": 1.8716, "step": 26135000 }, { "epoch": 75.65, "learning_rate": 1.2187793800944795e-05, "loss": 1.8867, "step": 26135500 }, { "epoch": 75.65, "learning_rate": 1.2187070153297519e-05, "loss": 1.8767, "step": 26136000 }, { "epoch": 75.65, "learning_rate": 1.2186346505650241e-05, "loss": 1.8808, "step": 26136500 }, { "epoch": 75.66, "learning_rate": 1.2185622858002965e-05, "loss": 1.8708, "step": 26137000 }, { "epoch": 75.66, "learning_rate": 1.2184900657650982e-05, "loss": 1.8882, "step": 26137500 }, { "epoch": 75.66, "learning_rate": 1.2184177010003706e-05, "loss": 1.8734, "step": 26138000 }, { "epoch": 75.66, "learning_rate": 1.2183453362356428e-05, "loss": 1.8626, "step": 26138500 }, { "epoch": 75.66, "learning_rate": 1.2182729714709152e-05, "loss": 1.8647, "step": 26139000 }, { "epoch": 75.66, "learning_rate": 1.2182006067061876e-05, "loss": 1.8729, "step": 26139500 }, { "epoch": 75.66, "learning_rate": 1.2181282419414599e-05, "loss": 1.8782, "step": 26140000 }, { "epoch": 75.67, "learning_rate": 1.218055877176732e-05, "loss": 1.8587, "step": 26140500 }, { "epoch": 75.67, "learning_rate": 1.2179835124120045e-05, "loss": 1.8591, "step": 26141000 }, { "epoch": 75.67, "learning_rate": 1.2179111476472769e-05, "loss": 1.8667, "step": 26141500 }, { "epoch": 75.67, "learning_rate": 1.2178389276120786e-05, "loss": 1.8563, "step": 26142000 }, { "epoch": 75.67, "learning_rate": 1.217766562847351e-05, "loss": 1.8774, "step": 26142500 }, { "epoch": 75.67, "learning_rate": 1.2176941980826232e-05, "loss": 1.8607, "step": 26143000 }, { "epoch": 75.67, "learning_rate": 1.2176218333178956e-05, "loss": 1.8568, "step": 26143500 }, { "epoch": 75.68, "learning_rate": 1.2175496132826973e-05, "loss": 1.8565, "step": 26144000 }, { "epoch": 75.68, "learning_rate": 1.2174772485179697e-05, "loss": 1.8633, "step": 26144500 }, { "epoch": 75.68, "learning_rate": 1.2174048837532421e-05, "loss": 1.8739, "step": 26145000 }, { "epoch": 75.68, "learning_rate": 1.2173326637180438e-05, "loss": 1.881, "step": 26145500 }, { "epoch": 75.68, "learning_rate": 1.2172604436828456e-05, "loss": 1.8665, "step": 26146000 }, { "epoch": 75.68, "learning_rate": 1.2171880789181178e-05, "loss": 1.8574, "step": 26146500 }, { "epoch": 75.68, "learning_rate": 1.2171157141533902e-05, "loss": 1.859, "step": 26147000 }, { "epoch": 75.69, "learning_rate": 1.2170433493886626e-05, "loss": 1.8593, "step": 26147500 }, { "epoch": 75.69, "learning_rate": 1.2169709846239348e-05, "loss": 1.843, "step": 26148000 }, { "epoch": 75.69, "learning_rate": 1.2168986198592072e-05, "loss": 1.8676, "step": 26148500 }, { "epoch": 75.69, "learning_rate": 1.2168262550944796e-05, "loss": 1.8719, "step": 26149000 }, { "epoch": 75.69, "learning_rate": 1.2167538903297518e-05, "loss": 1.8859, "step": 26149500 }, { "epoch": 75.69, "learning_rate": 1.2166815255650242e-05, "loss": 1.8545, "step": 26150000 }, { "epoch": 75.69, "learning_rate": 1.2166091608002964e-05, "loss": 1.8558, "step": 26150500 }, { "epoch": 75.7, "learning_rate": 1.2165367960355688e-05, "loss": 1.8848, "step": 26151000 }, { "epoch": 75.7, "learning_rate": 1.2164645760003705e-05, "loss": 1.867, "step": 26151500 }, { "epoch": 75.7, "learning_rate": 1.216392211235643e-05, "loss": 1.8576, "step": 26152000 }, { "epoch": 75.7, "learning_rate": 1.2163198464709153e-05, "loss": 1.8809, "step": 26152500 }, { "epoch": 75.7, "learning_rate": 1.2162474817061875e-05, "loss": 1.8661, "step": 26153000 }, { "epoch": 75.7, "learning_rate": 1.2161751169414598e-05, "loss": 1.8743, "step": 26153500 }, { "epoch": 75.71, "learning_rate": 1.2161027521767322e-05, "loss": 1.8682, "step": 26154000 }, { "epoch": 75.71, "learning_rate": 1.216030532141534e-05, "loss": 1.8898, "step": 26154500 }, { "epoch": 75.71, "learning_rate": 1.2159581673768063e-05, "loss": 1.8702, "step": 26155000 }, { "epoch": 75.71, "learning_rate": 1.2158858026120785e-05, "loss": 1.8608, "step": 26155500 }, { "epoch": 75.71, "learning_rate": 1.2158134378473509e-05, "loss": 1.8698, "step": 26156000 }, { "epoch": 75.71, "learning_rate": 1.2157410730826233e-05, "loss": 1.8748, "step": 26156500 }, { "epoch": 75.71, "learning_rate": 1.2156687083178955e-05, "loss": 1.8937, "step": 26157000 }, { "epoch": 75.72, "learning_rate": 1.2155963435531679e-05, "loss": 1.8938, "step": 26157500 }, { "epoch": 75.72, "learning_rate": 1.2155239787884403e-05, "loss": 1.8769, "step": 26158000 }, { "epoch": 75.72, "learning_rate": 1.215451758753242e-05, "loss": 1.869, "step": 26158500 }, { "epoch": 75.72, "learning_rate": 1.2153793939885142e-05, "loss": 1.8552, "step": 26159000 }, { "epoch": 75.72, "learning_rate": 1.2153071739533161e-05, "loss": 1.8747, "step": 26159500 }, { "epoch": 75.72, "learning_rate": 1.2152348091885885e-05, "loss": 1.8847, "step": 26160000 }, { "epoch": 75.72, "learning_rate": 1.2151624444238607e-05, "loss": 1.853, "step": 26160500 }, { "epoch": 75.73, "learning_rate": 1.215090079659133e-05, "loss": 1.8623, "step": 26161000 }, { "epoch": 75.73, "learning_rate": 1.2150177148944054e-05, "loss": 1.8812, "step": 26161500 }, { "epoch": 75.73, "learning_rate": 1.2149453501296778e-05, "loss": 1.8844, "step": 26162000 }, { "epoch": 75.73, "learning_rate": 1.21487298536495e-05, "loss": 1.8612, "step": 26162500 }, { "epoch": 75.73, "learning_rate": 1.2148006206002224e-05, "loss": 1.8977, "step": 26163000 }, { "epoch": 75.73, "learning_rate": 1.2147284005650243e-05, "loss": 1.8699, "step": 26163500 }, { "epoch": 75.73, "learning_rate": 1.214656180529826e-05, "loss": 1.8718, "step": 26164000 }, { "epoch": 75.74, "learning_rate": 1.2145838157650982e-05, "loss": 1.8909, "step": 26164500 }, { "epoch": 75.74, "learning_rate": 1.2145114510003704e-05, "loss": 1.8868, "step": 26165000 }, { "epoch": 75.74, "learning_rate": 1.214439086235643e-05, "loss": 1.8645, "step": 26165500 }, { "epoch": 75.74, "learning_rate": 1.2143667214709152e-05, "loss": 1.8715, "step": 26166000 }, { "epoch": 75.74, "learning_rate": 1.2142943567061874e-05, "loss": 1.8502, "step": 26166500 }, { "epoch": 75.74, "learning_rate": 1.2142219919414598e-05, "loss": 1.8636, "step": 26167000 }, { "epoch": 75.74, "learning_rate": 1.2141496271767322e-05, "loss": 1.8466, "step": 26167500 }, { "epoch": 75.75, "learning_rate": 1.214077407141534e-05, "loss": 1.8685, "step": 26168000 }, { "epoch": 75.75, "learning_rate": 1.2140050423768062e-05, "loss": 1.8905, "step": 26168500 }, { "epoch": 75.75, "learning_rate": 1.2139326776120786e-05, "loss": 1.8646, "step": 26169000 }, { "epoch": 75.75, "learning_rate": 1.213860312847351e-05, "loss": 1.8384, "step": 26169500 }, { "epoch": 75.75, "learning_rate": 1.2137879480826232e-05, "loss": 1.8464, "step": 26170000 }, { "epoch": 75.75, "learning_rate": 1.2137157280474249e-05, "loss": 1.8581, "step": 26170500 }, { "epoch": 75.75, "learning_rate": 1.2136433632826975e-05, "loss": 1.8746, "step": 26171000 }, { "epoch": 75.76, "learning_rate": 1.2135709985179697e-05, "loss": 1.8718, "step": 26171500 }, { "epoch": 75.76, "learning_rate": 1.213498633753242e-05, "loss": 1.8535, "step": 26172000 }, { "epoch": 75.76, "learning_rate": 1.2134264137180436e-05, "loss": 1.8453, "step": 26172500 }, { "epoch": 75.76, "learning_rate": 1.2133541936828455e-05, "loss": 1.8491, "step": 26173000 }, { "epoch": 75.76, "learning_rate": 1.213281828918118e-05, "loss": 1.8579, "step": 26173500 }, { "epoch": 75.76, "learning_rate": 1.2132094641533902e-05, "loss": 1.8945, "step": 26174000 }, { "epoch": 75.76, "learning_rate": 1.2131370993886624e-05, "loss": 1.8763, "step": 26174500 }, { "epoch": 75.77, "learning_rate": 1.213064734623935e-05, "loss": 1.9218, "step": 26175000 }, { "epoch": 75.77, "learning_rate": 1.2129923698592072e-05, "loss": 1.8544, "step": 26175500 }, { "epoch": 75.77, "learning_rate": 1.2129200050944794e-05, "loss": 1.8663, "step": 26176000 }, { "epoch": 75.77, "learning_rate": 1.2128477850592813e-05, "loss": 1.8654, "step": 26176500 }, { "epoch": 75.77, "learning_rate": 1.2127755650240832e-05, "loss": 1.8516, "step": 26177000 }, { "epoch": 75.77, "learning_rate": 1.2127032002593554e-05, "loss": 1.8888, "step": 26177500 }, { "epoch": 75.77, "learning_rate": 1.2126308354946276e-05, "loss": 1.8801, "step": 26178000 }, { "epoch": 75.78, "learning_rate": 1.2125584707299e-05, "loss": 1.8772, "step": 26178500 }, { "epoch": 75.78, "learning_rate": 1.2124861059651724e-05, "loss": 1.8769, "step": 26179000 }, { "epoch": 75.78, "learning_rate": 1.2124137412004446e-05, "loss": 1.878, "step": 26179500 }, { "epoch": 75.78, "learning_rate": 1.2123413764357169e-05, "loss": 1.8609, "step": 26180000 }, { "epoch": 75.78, "learning_rate": 1.2122690116709894e-05, "loss": 1.8576, "step": 26180500 }, { "epoch": 75.78, "learning_rate": 1.2121966469062616e-05, "loss": 1.878, "step": 26181000 }, { "epoch": 75.78, "learning_rate": 1.2121242821415339e-05, "loss": 1.8584, "step": 26181500 }, { "epoch": 75.79, "learning_rate": 1.2120520621063358e-05, "loss": 1.8476, "step": 26182000 }, { "epoch": 75.79, "learning_rate": 1.2119796973416082e-05, "loss": 1.8882, "step": 26182500 }, { "epoch": 75.79, "learning_rate": 1.2119074773064099e-05, "loss": 1.8609, "step": 26183000 }, { "epoch": 75.79, "learning_rate": 1.2118351125416821e-05, "loss": 1.89, "step": 26183500 }, { "epoch": 75.79, "learning_rate": 1.2117627477769545e-05, "loss": 1.8637, "step": 26184000 }, { "epoch": 75.79, "learning_rate": 1.2116903830122269e-05, "loss": 1.8797, "step": 26184500 }, { "epoch": 75.79, "learning_rate": 1.2116180182474991e-05, "loss": 1.8539, "step": 26185000 }, { "epoch": 75.8, "learning_rate": 1.2115456534827713e-05, "loss": 1.8793, "step": 26185500 }, { "epoch": 75.8, "learning_rate": 1.2114732887180439e-05, "loss": 1.865, "step": 26186000 }, { "epoch": 75.8, "learning_rate": 1.2114009239533161e-05, "loss": 1.8842, "step": 26186500 }, { "epoch": 75.8, "learning_rate": 1.2113287039181178e-05, "loss": 1.8935, "step": 26187000 }, { "epoch": 75.8, "learning_rate": 1.21125633915339e-05, "loss": 1.8546, "step": 26187500 }, { "epoch": 75.8, "learning_rate": 1.2111839743886626e-05, "loss": 1.8692, "step": 26188000 }, { "epoch": 75.8, "learning_rate": 1.2111116096239349e-05, "loss": 1.8788, "step": 26188500 }, { "epoch": 75.81, "learning_rate": 1.211039244859207e-05, "loss": 1.8505, "step": 26189000 }, { "epoch": 75.81, "learning_rate": 1.2109668800944796e-05, "loss": 1.8756, "step": 26189500 }, { "epoch": 75.81, "learning_rate": 1.2108945153297519e-05, "loss": 1.875, "step": 26190000 }, { "epoch": 75.81, "learning_rate": 1.2108221505650241e-05, "loss": 1.8413, "step": 26190500 }, { "epoch": 75.81, "learning_rate": 1.2107497858002965e-05, "loss": 1.8721, "step": 26191000 }, { "epoch": 75.81, "learning_rate": 1.2106774210355689e-05, "loss": 1.8804, "step": 26191500 }, { "epoch": 75.82, "learning_rate": 1.2106050562708411e-05, "loss": 1.8915, "step": 26192000 }, { "epoch": 75.82, "learning_rate": 1.2105326915061133e-05, "loss": 1.8496, "step": 26192500 }, { "epoch": 75.82, "learning_rate": 1.2104604714709152e-05, "loss": 1.8489, "step": 26193000 }, { "epoch": 75.82, "learning_rate": 1.2103881067061876e-05, "loss": 1.865, "step": 26193500 }, { "epoch": 75.82, "learning_rate": 1.2103157419414598e-05, "loss": 1.8763, "step": 26194000 }, { "epoch": 75.82, "learning_rate": 1.2102433771767322e-05, "loss": 1.8465, "step": 26194500 }, { "epoch": 75.82, "learning_rate": 1.210171157141534e-05, "loss": 1.8852, "step": 26195000 }, { "epoch": 75.83, "learning_rate": 1.2100987923768063e-05, "loss": 1.8707, "step": 26195500 }, { "epoch": 75.83, "learning_rate": 1.210026572341608e-05, "loss": 1.9067, "step": 26196000 }, { "epoch": 75.83, "learning_rate": 1.2099542075768803e-05, "loss": 1.8548, "step": 26196500 }, { "epoch": 75.83, "learning_rate": 1.2098818428121527e-05, "loss": 1.8669, "step": 26197000 }, { "epoch": 75.83, "learning_rate": 1.209809478047425e-05, "loss": 1.8467, "step": 26197500 }, { "epoch": 75.83, "learning_rate": 1.2097371132826973e-05, "loss": 1.883, "step": 26198000 }, { "epoch": 75.83, "learning_rate": 1.2096647485179697e-05, "loss": 1.8696, "step": 26198500 }, { "epoch": 75.84, "learning_rate": 1.209592383753242e-05, "loss": 1.8762, "step": 26199000 }, { "epoch": 75.84, "learning_rate": 1.2095201637180438e-05, "loss": 1.9189, "step": 26199500 }, { "epoch": 75.84, "learning_rate": 1.209447798953316e-05, "loss": 1.8894, "step": 26200000 }, { "epoch": 75.84, "learning_rate": 1.2093754341885884e-05, "loss": 1.8585, "step": 26200500 }, { "epoch": 75.84, "learning_rate": 1.2093030694238608e-05, "loss": 1.8879, "step": 26201000 }, { "epoch": 75.84, "learning_rate": 1.2092308493886625e-05, "loss": 1.869, "step": 26201500 }, { "epoch": 75.84, "learning_rate": 1.2091584846239348e-05, "loss": 1.8655, "step": 26202000 }, { "epoch": 75.85, "learning_rate": 1.2090861198592072e-05, "loss": 1.8593, "step": 26202500 }, { "epoch": 75.85, "learning_rate": 1.2090137550944796e-05, "loss": 1.8614, "step": 26203000 }, { "epoch": 75.85, "learning_rate": 1.2089415350592813e-05, "loss": 1.8923, "step": 26203500 }, { "epoch": 75.85, "learning_rate": 1.2088691702945535e-05, "loss": 1.8965, "step": 26204000 }, { "epoch": 75.85, "learning_rate": 1.2087968055298259e-05, "loss": 1.8532, "step": 26204500 }, { "epoch": 75.85, "learning_rate": 1.2087244407650983e-05, "loss": 1.8819, "step": 26205000 }, { "epoch": 75.85, "learning_rate": 1.2086520760003705e-05, "loss": 1.8584, "step": 26205500 }, { "epoch": 75.86, "learning_rate": 1.2085798559651722e-05, "loss": 1.8841, "step": 26206000 }, { "epoch": 75.86, "learning_rate": 1.2085074912004448e-05, "loss": 1.8525, "step": 26206500 }, { "epoch": 75.86, "learning_rate": 1.208435126435717e-05, "loss": 1.8731, "step": 26207000 }, { "epoch": 75.86, "learning_rate": 1.2083627616709892e-05, "loss": 1.8496, "step": 26207500 }, { "epoch": 75.86, "learning_rate": 1.2082903969062616e-05, "loss": 1.8552, "step": 26208000 }, { "epoch": 75.86, "learning_rate": 1.208218032141534e-05, "loss": 1.857, "step": 26208500 }, { "epoch": 75.86, "learning_rate": 1.2081456673768063e-05, "loss": 1.879, "step": 26209000 }, { "epoch": 75.87, "learning_rate": 1.2080733026120786e-05, "loss": 1.85, "step": 26209500 }, { "epoch": 75.87, "learning_rate": 1.208000937847351e-05, "loss": 1.8583, "step": 26210000 }, { "epoch": 75.87, "learning_rate": 1.2079285730826233e-05, "loss": 1.8489, "step": 26210500 }, { "epoch": 75.87, "learning_rate": 1.2078562083178955e-05, "loss": 1.8653, "step": 26211000 }, { "epoch": 75.87, "learning_rate": 1.2077838435531679e-05, "loss": 1.8648, "step": 26211500 }, { "epoch": 75.87, "learning_rate": 1.2077116235179698e-05, "loss": 1.8743, "step": 26212000 }, { "epoch": 75.87, "learning_rate": 1.207639258753242e-05, "loss": 1.8723, "step": 26212500 }, { "epoch": 75.88, "learning_rate": 1.2075668939885144e-05, "loss": 1.8637, "step": 26213000 }, { "epoch": 75.88, "learning_rate": 1.2074945292237866e-05, "loss": 1.8696, "step": 26213500 }, { "epoch": 75.88, "learning_rate": 1.207422164459059e-05, "loss": 1.836, "step": 26214000 }, { "epoch": 75.88, "learning_rate": 1.2073499444238607e-05, "loss": 1.8674, "step": 26214500 }, { "epoch": 75.88, "learning_rate": 1.2072775796591331e-05, "loss": 1.8739, "step": 26215000 }, { "epoch": 75.88, "learning_rate": 1.2072052148944053e-05, "loss": 1.8451, "step": 26215500 }, { "epoch": 75.88, "learning_rate": 1.2071329948592072e-05, "loss": 1.8844, "step": 26216000 }, { "epoch": 75.89, "learning_rate": 1.207060774824009e-05, "loss": 1.8446, "step": 26216500 }, { "epoch": 75.89, "learning_rate": 1.2069884100592812e-05, "loss": 1.867, "step": 26217000 }, { "epoch": 75.89, "learning_rate": 1.2069160452945536e-05, "loss": 1.8519, "step": 26217500 }, { "epoch": 75.89, "learning_rate": 1.206843680529826e-05, "loss": 1.8577, "step": 26218000 }, { "epoch": 75.89, "learning_rate": 1.2067713157650982e-05, "loss": 1.8816, "step": 26218500 }, { "epoch": 75.89, "learning_rate": 1.2066989510003706e-05, "loss": 1.8517, "step": 26219000 }, { "epoch": 75.89, "learning_rate": 1.206626586235643e-05, "loss": 1.8708, "step": 26219500 }, { "epoch": 75.9, "learning_rate": 1.2065543662004447e-05, "loss": 1.8832, "step": 26220000 }, { "epoch": 75.9, "learning_rate": 1.206482001435717e-05, "loss": 1.8679, "step": 26220500 }, { "epoch": 75.9, "learning_rate": 1.2064096366709893e-05, "loss": 1.8579, "step": 26221000 }, { "epoch": 75.9, "learning_rate": 1.2063372719062617e-05, "loss": 1.8761, "step": 26221500 }, { "epoch": 75.9, "learning_rate": 1.206264907141534e-05, "loss": 1.8711, "step": 26222000 }, { "epoch": 75.9, "learning_rate": 1.2061925423768063e-05, "loss": 1.8622, "step": 26222500 }, { "epoch": 75.9, "learning_rate": 1.2061201776120786e-05, "loss": 1.8323, "step": 26223000 }, { "epoch": 75.91, "learning_rate": 1.206047812847351e-05, "loss": 1.8703, "step": 26223500 }, { "epoch": 75.91, "learning_rate": 1.2059754480826232e-05, "loss": 1.87, "step": 26224000 }, { "epoch": 75.91, "learning_rate": 1.205903228047425e-05, "loss": 1.8672, "step": 26224500 }, { "epoch": 75.91, "learning_rate": 1.2058308632826973e-05, "loss": 1.8676, "step": 26225000 }, { "epoch": 75.91, "learning_rate": 1.2057584985179697e-05, "loss": 1.8489, "step": 26225500 }, { "epoch": 75.91, "learning_rate": 1.205686133753242e-05, "loss": 1.8691, "step": 26226000 }, { "epoch": 75.91, "learning_rate": 1.2056137689885143e-05, "loss": 1.8695, "step": 26226500 }, { "epoch": 75.92, "learning_rate": 1.2055415489533162e-05, "loss": 1.8967, "step": 26227000 }, { "epoch": 75.92, "learning_rate": 1.2054691841885884e-05, "loss": 1.8623, "step": 26227500 }, { "epoch": 75.92, "learning_rate": 1.2053968194238608e-05, "loss": 1.8844, "step": 26228000 }, { "epoch": 75.92, "learning_rate": 1.205324454659133e-05, "loss": 1.8858, "step": 26228500 }, { "epoch": 75.92, "learning_rate": 1.2052520898944054e-05, "loss": 1.8574, "step": 26229000 }, { "epoch": 75.92, "learning_rate": 1.2051797251296776e-05, "loss": 1.8778, "step": 26229500 }, { "epoch": 75.93, "learning_rate": 1.20510736036495e-05, "loss": 1.8793, "step": 26230000 }, { "epoch": 75.93, "learning_rate": 1.2050349956002224e-05, "loss": 1.8833, "step": 26230500 }, { "epoch": 75.93, "learning_rate": 1.2049626308354947e-05, "loss": 1.8618, "step": 26231000 }, { "epoch": 75.93, "learning_rate": 1.2048904108002965e-05, "loss": 1.873, "step": 26231500 }, { "epoch": 75.93, "learning_rate": 1.2048180460355688e-05, "loss": 1.8538, "step": 26232000 }, { "epoch": 75.93, "learning_rate": 1.2047456812708412e-05, "loss": 1.8848, "step": 26232500 }, { "epoch": 75.93, "learning_rate": 1.2046733165061134e-05, "loss": 1.8443, "step": 26233000 }, { "epoch": 75.94, "learning_rate": 1.2046010964709153e-05, "loss": 1.8615, "step": 26233500 }, { "epoch": 75.94, "learning_rate": 1.204528876435717e-05, "loss": 1.8771, "step": 26234000 }, { "epoch": 75.94, "learning_rate": 1.2044565116709892e-05, "loss": 1.8589, "step": 26234500 }, { "epoch": 75.94, "learning_rate": 1.2043841469062616e-05, "loss": 1.869, "step": 26235000 }, { "epoch": 75.94, "learning_rate": 1.204311782141534e-05, "loss": 1.8759, "step": 26235500 }, { "epoch": 75.94, "learning_rate": 1.2042394173768062e-05, "loss": 1.8701, "step": 26236000 }, { "epoch": 75.94, "learning_rate": 1.2041670526120786e-05, "loss": 1.8667, "step": 26236500 }, { "epoch": 75.95, "learning_rate": 1.2040946878473509e-05, "loss": 1.8739, "step": 26237000 }, { "epoch": 75.95, "learning_rate": 1.2040223230826232e-05, "loss": 1.8928, "step": 26237500 }, { "epoch": 75.95, "learning_rate": 1.203950103047425e-05, "loss": 1.8834, "step": 26238000 }, { "epoch": 75.95, "learning_rate": 1.2038777382826974e-05, "loss": 1.8706, "step": 26238500 }, { "epoch": 75.95, "learning_rate": 1.2038053735179698e-05, "loss": 1.8802, "step": 26239000 }, { "epoch": 75.95, "learning_rate": 1.203733008753242e-05, "loss": 1.8798, "step": 26239500 }, { "epoch": 75.95, "learning_rate": 1.2036607887180437e-05, "loss": 1.8661, "step": 26240000 }, { "epoch": 75.96, "learning_rate": 1.2035884239533161e-05, "loss": 1.8659, "step": 26240500 }, { "epoch": 75.96, "learning_rate": 1.2035160591885885e-05, "loss": 1.8785, "step": 26241000 }, { "epoch": 75.96, "learning_rate": 1.2034436944238607e-05, "loss": 1.8695, "step": 26241500 }, { "epoch": 75.96, "learning_rate": 1.2033714743886624e-05, "loss": 1.8604, "step": 26242000 }, { "epoch": 75.96, "learning_rate": 1.2032992543534643e-05, "loss": 1.8901, "step": 26242500 }, { "epoch": 75.96, "learning_rate": 1.2032268895887366e-05, "loss": 1.8678, "step": 26243000 }, { "epoch": 75.96, "learning_rate": 1.203154524824009e-05, "loss": 1.8889, "step": 26243500 }, { "epoch": 75.97, "learning_rate": 1.2030821600592812e-05, "loss": 1.8872, "step": 26244000 }, { "epoch": 75.97, "learning_rate": 1.203009940024083e-05, "loss": 1.8481, "step": 26244500 }, { "epoch": 75.97, "learning_rate": 1.2029375752593555e-05, "loss": 1.8609, "step": 26245000 }, { "epoch": 75.97, "learning_rate": 1.2028652104946277e-05, "loss": 1.8787, "step": 26245500 }, { "epoch": 75.97, "learning_rate": 1.2027928457299e-05, "loss": 1.8677, "step": 26246000 }, { "epoch": 75.97, "learning_rate": 1.2027204809651723e-05, "loss": 1.8813, "step": 26246500 }, { "epoch": 75.97, "learning_rate": 1.2026481162004447e-05, "loss": 1.8847, "step": 26247000 }, { "epoch": 75.98, "learning_rate": 1.202575751435717e-05, "loss": 1.8834, "step": 26247500 }, { "epoch": 75.98, "learning_rate": 1.2025033866709893e-05, "loss": 1.8602, "step": 26248000 }, { "epoch": 75.98, "learning_rate": 1.202431166635791e-05, "loss": 1.8693, "step": 26248500 }, { "epoch": 75.98, "learning_rate": 1.2023588018710634e-05, "loss": 1.8551, "step": 26249000 }, { "epoch": 75.98, "learning_rate": 1.2022864371063357e-05, "loss": 1.8753, "step": 26249500 }, { "epoch": 75.98, "learning_rate": 1.202214072341608e-05, "loss": 1.8774, "step": 26250000 }, { "epoch": 75.98, "learning_rate": 1.2021417075768804e-05, "loss": 1.8392, "step": 26250500 }, { "epoch": 75.99, "learning_rate": 1.2020693428121527e-05, "loss": 1.8731, "step": 26251000 }, { "epoch": 75.99, "learning_rate": 1.201996978047425e-05, "loss": 1.8436, "step": 26251500 }, { "epoch": 75.99, "learning_rate": 1.2019246132826974e-05, "loss": 1.8956, "step": 26252000 }, { "epoch": 75.99, "learning_rate": 1.2018523932474992e-05, "loss": 1.8646, "step": 26252500 }, { "epoch": 75.99, "learning_rate": 1.2017800284827714e-05, "loss": 1.8364, "step": 26253000 }, { "epoch": 75.99, "learning_rate": 1.2017078084475731e-05, "loss": 1.8712, "step": 26253500 }, { "epoch": 75.99, "learning_rate": 1.2016354436828455e-05, "loss": 1.8804, "step": 26254000 }, { "epoch": 76.0, "learning_rate": 1.2015630789181179e-05, "loss": 1.8773, "step": 26254500 }, { "epoch": 76.0, "learning_rate": 1.2014907141533901e-05, "loss": 1.8635, "step": 26255000 }, { "epoch": 76.0, "learning_rate": 1.2014183493886625e-05, "loss": 1.8549, "step": 26255500 }, { "epoch": 76.0, "eval_accuracy": 0.6843415744962393, "eval_accuracy_mlm": 0.6530730861540301, "eval_accuracy_nsp": 0.8521263285256631, "eval_loss": 2.177988290786743, "eval_runtime": 331.6171, "eval_samples_per_second": 1315.933, "eval_steps_per_second": 54.831, "step": 26255872 }, { "epoch": 76.0, "learning_rate": 1.2013459846239349e-05, "loss": 1.8537, "step": 26256000 }, { "epoch": 76.0, "learning_rate": 1.2012737645887366e-05, "loss": 1.8452, "step": 26256500 }, { "epoch": 76.0, "learning_rate": 1.2012013998240089e-05, "loss": 1.8671, "step": 26257000 }, { "epoch": 76.0, "learning_rate": 1.2011290350592813e-05, "loss": 1.8532, "step": 26257500 }, { "epoch": 76.01, "learning_rate": 1.2010566702945536e-05, "loss": 1.8417, "step": 26258000 }, { "epoch": 76.01, "learning_rate": 1.2009844502593554e-05, "loss": 1.8551, "step": 26258500 }, { "epoch": 76.01, "learning_rate": 1.2009120854946276e-05, "loss": 1.8972, "step": 26259000 }, { "epoch": 76.01, "learning_rate": 1.2008398654594295e-05, "loss": 1.8455, "step": 26259500 }, { "epoch": 76.01, "learning_rate": 1.2007675006947019e-05, "loss": 1.8501, "step": 26260000 }, { "epoch": 76.01, "learning_rate": 1.2006951359299741e-05, "loss": 1.8463, "step": 26260500 }, { "epoch": 76.01, "learning_rate": 1.2006227711652463e-05, "loss": 1.8241, "step": 26261000 }, { "epoch": 76.02, "learning_rate": 1.2005504064005187e-05, "loss": 1.864, "step": 26261500 }, { "epoch": 76.02, "learning_rate": 1.2004781863653206e-05, "loss": 1.8483, "step": 26262000 }, { "epoch": 76.02, "learning_rate": 1.2004058216005928e-05, "loss": 1.8683, "step": 26262500 }, { "epoch": 76.02, "learning_rate": 1.2003334568358652e-05, "loss": 1.8485, "step": 26263000 }, { "epoch": 76.02, "learning_rate": 1.2002610920711376e-05, "loss": 1.8659, "step": 26263500 }, { "epoch": 76.02, "learning_rate": 1.2001887273064098e-05, "loss": 1.8466, "step": 26264000 }, { "epoch": 76.02, "learning_rate": 1.200116362541682e-05, "loss": 1.8445, "step": 26264500 }, { "epoch": 76.03, "learning_rate": 1.2000439977769545e-05, "loss": 1.8585, "step": 26265000 }, { "epoch": 76.03, "learning_rate": 1.1999717777417564e-05, "loss": 1.8613, "step": 26265500 }, { "epoch": 76.03, "learning_rate": 1.1998994129770286e-05, "loss": 1.8617, "step": 26266000 }, { "epoch": 76.03, "learning_rate": 1.1998270482123008e-05, "loss": 1.8702, "step": 26266500 }, { "epoch": 76.03, "learning_rate": 1.1997546834475732e-05, "loss": 1.8325, "step": 26267000 }, { "epoch": 76.03, "learning_rate": 1.1996823186828456e-05, "loss": 1.8455, "step": 26267500 }, { "epoch": 76.04, "learning_rate": 1.1996099539181178e-05, "loss": 1.8749, "step": 26268000 }, { "epoch": 76.04, "learning_rate": 1.1995375891533902e-05, "loss": 1.851, "step": 26268500 }, { "epoch": 76.04, "learning_rate": 1.1994652243886626e-05, "loss": 1.8224, "step": 26269000 }, { "epoch": 76.04, "learning_rate": 1.1993928596239348e-05, "loss": 1.8529, "step": 26269500 }, { "epoch": 76.04, "learning_rate": 1.1993206395887365e-05, "loss": 1.8881, "step": 26270000 }, { "epoch": 76.04, "learning_rate": 1.199248274824009e-05, "loss": 1.8577, "step": 26270500 }, { "epoch": 76.04, "learning_rate": 1.1991759100592813e-05, "loss": 1.8649, "step": 26271000 }, { "epoch": 76.05, "learning_rate": 1.1991035452945536e-05, "loss": 1.8284, "step": 26271500 }, { "epoch": 76.05, "learning_rate": 1.1990311805298258e-05, "loss": 1.8559, "step": 26272000 }, { "epoch": 76.05, "learning_rate": 1.1989588157650983e-05, "loss": 1.8392, "step": 26272500 }, { "epoch": 76.05, "learning_rate": 1.1988865957299e-05, "loss": 1.8409, "step": 26273000 }, { "epoch": 76.05, "learning_rate": 1.1988142309651723e-05, "loss": 1.8615, "step": 26273500 }, { "epoch": 76.05, "learning_rate": 1.1987418662004447e-05, "loss": 1.8193, "step": 26274000 }, { "epoch": 76.05, "learning_rate": 1.198669501435717e-05, "loss": 1.8635, "step": 26274500 }, { "epoch": 76.06, "learning_rate": 1.1985971366709893e-05, "loss": 1.856, "step": 26275000 }, { "epoch": 76.06, "learning_rate": 1.1985247719062615e-05, "loss": 1.8353, "step": 26275500 }, { "epoch": 76.06, "learning_rate": 1.1984524071415341e-05, "loss": 1.8546, "step": 26276000 }, { "epoch": 76.06, "learning_rate": 1.1983801871063358e-05, "loss": 1.8574, "step": 26276500 }, { "epoch": 76.06, "learning_rate": 1.198307822341608e-05, "loss": 1.8549, "step": 26277000 }, { "epoch": 76.06, "learning_rate": 1.1982354575768803e-05, "loss": 1.8475, "step": 26277500 }, { "epoch": 76.06, "learning_rate": 1.1981630928121528e-05, "loss": 1.8651, "step": 26278000 }, { "epoch": 76.07, "learning_rate": 1.198090728047425e-05, "loss": 1.8623, "step": 26278500 }, { "epoch": 76.07, "learning_rate": 1.1980183632826973e-05, "loss": 1.8919, "step": 26279000 }, { "epoch": 76.07, "learning_rate": 1.1979459985179697e-05, "loss": 1.8292, "step": 26279500 }, { "epoch": 76.07, "learning_rate": 1.197873633753242e-05, "loss": 1.8768, "step": 26280000 }, { "epoch": 76.07, "learning_rate": 1.1978014137180438e-05, "loss": 1.8658, "step": 26280500 }, { "epoch": 76.07, "learning_rate": 1.197729048953316e-05, "loss": 1.871, "step": 26281000 }, { "epoch": 76.07, "learning_rate": 1.1976568289181179e-05, "loss": 1.8394, "step": 26281500 }, { "epoch": 76.08, "learning_rate": 1.1975844641533903e-05, "loss": 1.8531, "step": 26282000 }, { "epoch": 76.08, "learning_rate": 1.1975120993886625e-05, "loss": 1.8433, "step": 26282500 }, { "epoch": 76.08, "learning_rate": 1.1974397346239347e-05, "loss": 1.8273, "step": 26283000 }, { "epoch": 76.08, "learning_rate": 1.1973673698592073e-05, "loss": 1.8636, "step": 26283500 }, { "epoch": 76.08, "learning_rate": 1.197295149824009e-05, "loss": 1.8478, "step": 26284000 }, { "epoch": 76.08, "learning_rate": 1.1972227850592812e-05, "loss": 1.8524, "step": 26284500 }, { "epoch": 76.08, "learning_rate": 1.1971504202945535e-05, "loss": 1.8403, "step": 26285000 }, { "epoch": 76.09, "learning_rate": 1.197078055529826e-05, "loss": 1.8464, "step": 26285500 }, { "epoch": 76.09, "learning_rate": 1.1970056907650983e-05, "loss": 1.8697, "step": 26286000 }, { "epoch": 76.09, "learning_rate": 1.1969333260003705e-05, "loss": 1.8438, "step": 26286500 }, { "epoch": 76.09, "learning_rate": 1.1968609612356429e-05, "loss": 1.8317, "step": 26287000 }, { "epoch": 76.09, "learning_rate": 1.1967885964709153e-05, "loss": 1.8612, "step": 26287500 }, { "epoch": 76.09, "learning_rate": 1.1967162317061875e-05, "loss": 1.858, "step": 26288000 }, { "epoch": 76.09, "learning_rate": 1.1966440116709892e-05, "loss": 1.8217, "step": 26288500 }, { "epoch": 76.1, "learning_rate": 1.1965716469062618e-05, "loss": 1.8471, "step": 26289000 }, { "epoch": 76.1, "learning_rate": 1.1964994268710635e-05, "loss": 1.843, "step": 26289500 }, { "epoch": 76.1, "learning_rate": 1.1964270621063357e-05, "loss": 1.8558, "step": 26290000 }, { "epoch": 76.1, "learning_rate": 1.1963548420711374e-05, "loss": 1.8677, "step": 26290500 }, { "epoch": 76.1, "learning_rate": 1.1962824773064098e-05, "loss": 1.8431, "step": 26291000 }, { "epoch": 76.1, "learning_rate": 1.1962101125416822e-05, "loss": 1.8757, "step": 26291500 }, { "epoch": 76.1, "learning_rate": 1.1961377477769545e-05, "loss": 1.8598, "step": 26292000 }, { "epoch": 76.11, "learning_rate": 1.1960653830122267e-05, "loss": 1.8489, "step": 26292500 }, { "epoch": 76.11, "learning_rate": 1.1959930182474992e-05, "loss": 1.8766, "step": 26293000 }, { "epoch": 76.11, "learning_rate": 1.1959206534827715e-05, "loss": 1.8658, "step": 26293500 }, { "epoch": 76.11, "learning_rate": 1.1958482887180437e-05, "loss": 1.8903, "step": 26294000 }, { "epoch": 76.11, "learning_rate": 1.1957759239533162e-05, "loss": 1.8699, "step": 26294500 }, { "epoch": 76.11, "learning_rate": 1.1957035591885885e-05, "loss": 1.8534, "step": 26295000 }, { "epoch": 76.11, "learning_rate": 1.1956311944238607e-05, "loss": 1.8422, "step": 26295500 }, { "epoch": 76.12, "learning_rate": 1.1955588296591331e-05, "loss": 1.85, "step": 26296000 }, { "epoch": 76.12, "learning_rate": 1.195486609623935e-05, "loss": 1.8665, "step": 26296500 }, { "epoch": 76.12, "learning_rate": 1.1954143895887367e-05, "loss": 1.8567, "step": 26297000 }, { "epoch": 76.12, "learning_rate": 1.195342024824009e-05, "loss": 1.8679, "step": 26297500 }, { "epoch": 76.12, "learning_rate": 1.1952696600592812e-05, "loss": 1.8524, "step": 26298000 }, { "epoch": 76.12, "learning_rate": 1.1951972952945537e-05, "loss": 1.8608, "step": 26298500 }, { "epoch": 76.12, "learning_rate": 1.1951250752593554e-05, "loss": 1.8375, "step": 26299000 }, { "epoch": 76.13, "learning_rate": 1.1950527104946277e-05, "loss": 1.8558, "step": 26299500 }, { "epoch": 76.13, "learning_rate": 1.1949803457299e-05, "loss": 1.8505, "step": 26300000 }, { "epoch": 76.13, "learning_rate": 1.1949079809651725e-05, "loss": 1.847, "step": 26300500 }, { "epoch": 76.13, "learning_rate": 1.1948356162004447e-05, "loss": 1.8631, "step": 26301000 }, { "epoch": 76.13, "learning_rate": 1.1947632514357169e-05, "loss": 1.8776, "step": 26301500 }, { "epoch": 76.13, "learning_rate": 1.1946908866709893e-05, "loss": 1.8561, "step": 26302000 }, { "epoch": 76.13, "learning_rate": 1.1946185219062617e-05, "loss": 1.8609, "step": 26302500 }, { "epoch": 76.14, "learning_rate": 1.1945461571415339e-05, "loss": 1.8505, "step": 26303000 }, { "epoch": 76.14, "learning_rate": 1.1944737923768063e-05, "loss": 1.8354, "step": 26303500 }, { "epoch": 76.14, "learning_rate": 1.1944014276120787e-05, "loss": 1.8611, "step": 26304000 }, { "epoch": 76.14, "learning_rate": 1.194329062847351e-05, "loss": 1.8641, "step": 26304500 }, { "epoch": 76.14, "learning_rate": 1.1942568428121526e-05, "loss": 1.8452, "step": 26305000 }, { "epoch": 76.14, "learning_rate": 1.194184478047425e-05, "loss": 1.8336, "step": 26305500 }, { "epoch": 76.15, "learning_rate": 1.1941121132826974e-05, "loss": 1.8549, "step": 26306000 }, { "epoch": 76.15, "learning_rate": 1.1940397485179696e-05, "loss": 1.8805, "step": 26306500 }, { "epoch": 76.15, "learning_rate": 1.1939675284827714e-05, "loss": 1.8753, "step": 26307000 }, { "epoch": 76.15, "learning_rate": 1.1938951637180438e-05, "loss": 1.8602, "step": 26307500 }, { "epoch": 76.15, "learning_rate": 1.1938227989533162e-05, "loss": 1.8785, "step": 26308000 }, { "epoch": 76.15, "learning_rate": 1.1937504341885884e-05, "loss": 1.8669, "step": 26308500 }, { "epoch": 76.15, "learning_rate": 1.1936780694238608e-05, "loss": 1.8277, "step": 26309000 }, { "epoch": 76.16, "learning_rate": 1.1936057046591332e-05, "loss": 1.8576, "step": 26309500 }, { "epoch": 76.16, "learning_rate": 1.1935333398944054e-05, "loss": 1.8437, "step": 26310000 }, { "epoch": 76.16, "learning_rate": 1.1934609751296776e-05, "loss": 1.859, "step": 26310500 }, { "epoch": 76.16, "learning_rate": 1.19338861036495e-05, "loss": 1.8799, "step": 26311000 }, { "epoch": 76.16, "learning_rate": 1.1933165350592812e-05, "loss": 1.8592, "step": 26311500 }, { "epoch": 76.16, "learning_rate": 1.1932441702945536e-05, "loss": 1.8667, "step": 26312000 }, { "epoch": 76.16, "learning_rate": 1.1931718055298259e-05, "loss": 1.8516, "step": 26312500 }, { "epoch": 76.17, "learning_rate": 1.1930994407650982e-05, "loss": 1.8574, "step": 26313000 }, { "epoch": 76.17, "learning_rate": 1.1930270760003706e-05, "loss": 1.8681, "step": 26313500 }, { "epoch": 76.17, "learning_rate": 1.1929547112356429e-05, "loss": 1.8528, "step": 26314000 }, { "epoch": 76.17, "learning_rate": 1.1928823464709153e-05, "loss": 1.8461, "step": 26314500 }, { "epoch": 76.17, "learning_rate": 1.192810126435717e-05, "loss": 1.8303, "step": 26315000 }, { "epoch": 76.17, "learning_rate": 1.1927377616709894e-05, "loss": 1.8931, "step": 26315500 }, { "epoch": 76.17, "learning_rate": 1.1926653969062616e-05, "loss": 1.8359, "step": 26316000 }, { "epoch": 76.18, "learning_rate": 1.192593032141534e-05, "loss": 1.8686, "step": 26316500 }, { "epoch": 76.18, "learning_rate": 1.1925206673768064e-05, "loss": 1.8684, "step": 26317000 }, { "epoch": 76.18, "learning_rate": 1.1924484473416081e-05, "loss": 1.8809, "step": 26317500 }, { "epoch": 76.18, "learning_rate": 1.1923762273064098e-05, "loss": 1.8764, "step": 26318000 }, { "epoch": 76.18, "learning_rate": 1.1923038625416822e-05, "loss": 1.8716, "step": 26318500 }, { "epoch": 76.18, "learning_rate": 1.1922314977769544e-05, "loss": 1.8629, "step": 26319000 }, { "epoch": 76.18, "learning_rate": 1.1921591330122268e-05, "loss": 1.8511, "step": 26319500 }, { "epoch": 76.19, "learning_rate": 1.192086768247499e-05, "loss": 1.8505, "step": 26320000 }, { "epoch": 76.19, "learning_rate": 1.1920144034827715e-05, "loss": 1.868, "step": 26320500 }, { "epoch": 76.19, "learning_rate": 1.1919420387180438e-05, "loss": 1.8277, "step": 26321000 }, { "epoch": 76.19, "learning_rate": 1.191869673953316e-05, "loss": 1.8826, "step": 26321500 }, { "epoch": 76.19, "learning_rate": 1.1917973091885885e-05, "loss": 1.8574, "step": 26322000 }, { "epoch": 76.19, "learning_rate": 1.1917250891533902e-05, "loss": 1.8584, "step": 26322500 }, { "epoch": 76.19, "learning_rate": 1.1916530138477214e-05, "loss": 1.8866, "step": 26323000 }, { "epoch": 76.2, "learning_rate": 1.1915807938125233e-05, "loss": 1.8606, "step": 26323500 }, { "epoch": 76.2, "learning_rate": 1.1915084290477955e-05, "loss": 1.8643, "step": 26324000 }, { "epoch": 76.2, "learning_rate": 1.1914360642830678e-05, "loss": 1.8633, "step": 26324500 }, { "epoch": 76.2, "learning_rate": 1.1913636995183401e-05, "loss": 1.8647, "step": 26325000 }, { "epoch": 76.2, "learning_rate": 1.191291479483142e-05, "loss": 1.8306, "step": 26325500 }, { "epoch": 76.2, "learning_rate": 1.1912191147184143e-05, "loss": 1.8711, "step": 26326000 }, { "epoch": 76.2, "learning_rate": 1.1911467499536867e-05, "loss": 1.8661, "step": 26326500 }, { "epoch": 76.21, "learning_rate": 1.1910743851889589e-05, "loss": 1.853, "step": 26327000 }, { "epoch": 76.21, "learning_rate": 1.1910020204242313e-05, "loss": 1.8377, "step": 26327500 }, { "epoch": 76.21, "learning_rate": 1.1909296556595035e-05, "loss": 1.8387, "step": 26328000 }, { "epoch": 76.21, "learning_rate": 1.1908572908947759e-05, "loss": 1.8821, "step": 26328500 }, { "epoch": 76.21, "learning_rate": 1.1907849261300483e-05, "loss": 1.8775, "step": 26329000 }, { "epoch": 76.21, "learning_rate": 1.1907125613653205e-05, "loss": 1.8603, "step": 26329500 }, { "epoch": 76.21, "learning_rate": 1.1906401966005929e-05, "loss": 1.8482, "step": 26330000 }, { "epoch": 76.22, "learning_rate": 1.1905678318358653e-05, "loss": 1.8581, "step": 26330500 }, { "epoch": 76.22, "learning_rate": 1.1904954670711375e-05, "loss": 1.8504, "step": 26331000 }, { "epoch": 76.22, "learning_rate": 1.1904231023064099e-05, "loss": 1.8382, "step": 26331500 }, { "epoch": 76.22, "learning_rate": 1.1903507375416821e-05, "loss": 1.8769, "step": 26332000 }, { "epoch": 76.22, "learning_rate": 1.1902783727769545e-05, "loss": 1.8448, "step": 26332500 }, { "epoch": 76.22, "learning_rate": 1.1902060080122267e-05, "loss": 1.866, "step": 26333000 }, { "epoch": 76.22, "learning_rate": 1.1901336432474991e-05, "loss": 1.8858, "step": 26333500 }, { "epoch": 76.23, "learning_rate": 1.1900612784827715e-05, "loss": 1.864, "step": 26334000 }, { "epoch": 76.23, "learning_rate": 1.1899889137180438e-05, "loss": 1.8717, "step": 26334500 }, { "epoch": 76.23, "learning_rate": 1.1899165489533161e-05, "loss": 1.8535, "step": 26335000 }, { "epoch": 76.23, "learning_rate": 1.1898441841885884e-05, "loss": 1.8563, "step": 26335500 }, { "epoch": 76.23, "learning_rate": 1.1897718194238608e-05, "loss": 1.8917, "step": 26336000 }, { "epoch": 76.23, "learning_rate": 1.189699454659133e-05, "loss": 1.8422, "step": 26336500 }, { "epoch": 76.23, "learning_rate": 1.1896270898944054e-05, "loss": 1.8653, "step": 26337000 }, { "epoch": 76.24, "learning_rate": 1.1895547251296778e-05, "loss": 1.8551, "step": 26337500 }, { "epoch": 76.24, "learning_rate": 1.1894825050944795e-05, "loss": 1.8611, "step": 26338000 }, { "epoch": 76.24, "learning_rate": 1.1894104297888107e-05, "loss": 1.8702, "step": 26338500 }, { "epoch": 76.24, "learning_rate": 1.1893380650240831e-05, "loss": 1.8764, "step": 26339000 }, { "epoch": 76.24, "learning_rate": 1.1892657002593553e-05, "loss": 1.8801, "step": 26339500 }, { "epoch": 76.24, "learning_rate": 1.1891933354946277e-05, "loss": 1.8541, "step": 26340000 }, { "epoch": 76.24, "learning_rate": 1.1891209707299e-05, "loss": 1.8344, "step": 26340500 }, { "epoch": 76.25, "learning_rate": 1.1890486059651724e-05, "loss": 1.8457, "step": 26341000 }, { "epoch": 76.25, "learning_rate": 1.1889762412004447e-05, "loss": 1.8481, "step": 26341500 }, { "epoch": 76.25, "learning_rate": 1.188903876435717e-05, "loss": 1.86, "step": 26342000 }, { "epoch": 76.25, "learning_rate": 1.1888315116709894e-05, "loss": 1.8617, "step": 26342500 }, { "epoch": 76.25, "learning_rate": 1.1887591469062616e-05, "loss": 1.8677, "step": 26343000 }, { "epoch": 76.25, "learning_rate": 1.188686782141534e-05, "loss": 1.8676, "step": 26343500 }, { "epoch": 76.26, "learning_rate": 1.1886144173768064e-05, "loss": 1.8736, "step": 26344000 }, { "epoch": 76.26, "learning_rate": 1.1885420526120786e-05, "loss": 1.8739, "step": 26344500 }, { "epoch": 76.26, "learning_rate": 1.1884698325768803e-05, "loss": 1.8386, "step": 26345000 }, { "epoch": 76.26, "learning_rate": 1.1883976125416822e-05, "loss": 1.8598, "step": 26345500 }, { "epoch": 76.26, "learning_rate": 1.1883252477769544e-05, "loss": 1.8498, "step": 26346000 }, { "epoch": 76.26, "learning_rate": 1.1882528830122268e-05, "loss": 1.8778, "step": 26346500 }, { "epoch": 76.26, "learning_rate": 1.188180518247499e-05, "loss": 1.8697, "step": 26347000 }, { "epoch": 76.27, "learning_rate": 1.1881081534827714e-05, "loss": 1.8714, "step": 26347500 }, { "epoch": 76.27, "learning_rate": 1.1880357887180438e-05, "loss": 1.8697, "step": 26348000 }, { "epoch": 76.27, "learning_rate": 1.187963423953316e-05, "loss": 1.8798, "step": 26348500 }, { "epoch": 76.27, "learning_rate": 1.1878910591885885e-05, "loss": 1.8535, "step": 26349000 }, { "epoch": 76.27, "learning_rate": 1.1878188391533902e-05, "loss": 1.8717, "step": 26349500 }, { "epoch": 76.27, "learning_rate": 1.187746619118192e-05, "loss": 1.8694, "step": 26350000 }, { "epoch": 76.27, "learning_rate": 1.1876742543534643e-05, "loss": 1.8503, "step": 26350500 }, { "epoch": 76.28, "learning_rate": 1.1876018895887367e-05, "loss": 1.8431, "step": 26351000 }, { "epoch": 76.28, "learning_rate": 1.1875295248240089e-05, "loss": 1.8652, "step": 26351500 }, { "epoch": 76.28, "learning_rate": 1.1874571600592813e-05, "loss": 1.8287, "step": 26352000 }, { "epoch": 76.28, "learning_rate": 1.187384940024083e-05, "loss": 1.8707, "step": 26352500 }, { "epoch": 76.28, "learning_rate": 1.1873125752593554e-05, "loss": 1.8837, "step": 26353000 }, { "epoch": 76.28, "learning_rate": 1.1872402104946276e-05, "loss": 1.8539, "step": 26353500 }, { "epoch": 76.28, "learning_rate": 1.1871678457299e-05, "loss": 1.8467, "step": 26354000 }, { "epoch": 76.29, "learning_rate": 1.1870956256947018e-05, "loss": 1.8728, "step": 26354500 }, { "epoch": 76.29, "learning_rate": 1.1870232609299742e-05, "loss": 1.8589, "step": 26355000 }, { "epoch": 76.29, "learning_rate": 1.1869510408947759e-05, "loss": 1.8623, "step": 26355500 }, { "epoch": 76.29, "learning_rate": 1.1868788208595776e-05, "loss": 1.8676, "step": 26356000 }, { "epoch": 76.29, "learning_rate": 1.18680645609485e-05, "loss": 1.8616, "step": 26356500 }, { "epoch": 76.29, "learning_rate": 1.1867340913301222e-05, "loss": 1.8575, "step": 26357000 }, { "epoch": 76.29, "learning_rate": 1.1866617265653946e-05, "loss": 1.847, "step": 26357500 }, { "epoch": 76.3, "learning_rate": 1.186589361800667e-05, "loss": 1.8634, "step": 26358000 }, { "epoch": 76.3, "learning_rate": 1.1865169970359392e-05, "loss": 1.837, "step": 26358500 }, { "epoch": 76.3, "learning_rate": 1.1864446322712116e-05, "loss": 1.8701, "step": 26359000 }, { "epoch": 76.3, "learning_rate": 1.186372267506484e-05, "loss": 1.8687, "step": 26359500 }, { "epoch": 76.3, "learning_rate": 1.1862999027417562e-05, "loss": 1.8503, "step": 26360000 }, { "epoch": 76.3, "learning_rate": 1.186227682706558e-05, "loss": 1.8846, "step": 26360500 }, { "epoch": 76.3, "learning_rate": 1.1861553179418304e-05, "loss": 1.8434, "step": 26361000 }, { "epoch": 76.31, "learning_rate": 1.1860829531771028e-05, "loss": 1.8519, "step": 26361500 }, { "epoch": 76.31, "learning_rate": 1.186010588412375e-05, "loss": 1.861, "step": 26362000 }, { "epoch": 76.31, "learning_rate": 1.1859382236476474e-05, "loss": 1.8509, "step": 26362500 }, { "epoch": 76.31, "learning_rate": 1.1858660036124491e-05, "loss": 1.8562, "step": 26363000 }, { "epoch": 76.31, "learning_rate": 1.1857936388477215e-05, "loss": 1.86, "step": 26363500 }, { "epoch": 76.31, "learning_rate": 1.1857212740829937e-05, "loss": 1.8246, "step": 26364000 }, { "epoch": 76.31, "learning_rate": 1.1856489093182661e-05, "loss": 1.882, "step": 26364500 }, { "epoch": 76.32, "learning_rate": 1.1855765445535385e-05, "loss": 1.8551, "step": 26365000 }, { "epoch": 76.32, "learning_rate": 1.1855041797888107e-05, "loss": 1.8558, "step": 26365500 }, { "epoch": 76.32, "learning_rate": 1.185431815024083e-05, "loss": 1.8438, "step": 26366000 }, { "epoch": 76.32, "learning_rate": 1.1853594502593553e-05, "loss": 1.8516, "step": 26366500 }, { "epoch": 76.32, "learning_rate": 1.1852870854946277e-05, "loss": 1.887, "step": 26367000 }, { "epoch": 76.32, "learning_rate": 1.1852147207299e-05, "loss": 1.8679, "step": 26367500 }, { "epoch": 76.32, "learning_rate": 1.1851423559651723e-05, "loss": 1.8639, "step": 26368000 }, { "epoch": 76.33, "learning_rate": 1.1850699912004447e-05, "loss": 1.8695, "step": 26368500 }, { "epoch": 76.33, "learning_rate": 1.1849977711652465e-05, "loss": 1.8829, "step": 26369000 }, { "epoch": 76.33, "learning_rate": 1.1849255511300482e-05, "loss": 1.8645, "step": 26369500 }, { "epoch": 76.33, "learning_rate": 1.1848531863653206e-05, "loss": 1.8718, "step": 26370000 }, { "epoch": 76.33, "learning_rate": 1.184780821600593e-05, "loss": 1.8565, "step": 26370500 }, { "epoch": 76.33, "learning_rate": 1.1847084568358652e-05, "loss": 1.8681, "step": 26371000 }, { "epoch": 76.33, "learning_rate": 1.1846360920711374e-05, "loss": 1.8533, "step": 26371500 }, { "epoch": 76.34, "learning_rate": 1.1845637273064098e-05, "loss": 1.83, "step": 26372000 }, { "epoch": 76.34, "learning_rate": 1.1844915072712117e-05, "loss": 1.8371, "step": 26372500 }, { "epoch": 76.34, "learning_rate": 1.184419142506484e-05, "loss": 1.8396, "step": 26373000 }, { "epoch": 76.34, "learning_rate": 1.1843467777417561e-05, "loss": 1.8669, "step": 26373500 }, { "epoch": 76.34, "learning_rate": 1.1842744129770285e-05, "loss": 1.8737, "step": 26374000 }, { "epoch": 76.34, "learning_rate": 1.184202048212301e-05, "loss": 1.8634, "step": 26374500 }, { "epoch": 76.34, "learning_rate": 1.1841296834475732e-05, "loss": 1.8673, "step": 26375000 }, { "epoch": 76.35, "learning_rate": 1.1840573186828456e-05, "loss": 1.869, "step": 26375500 }, { "epoch": 76.35, "learning_rate": 1.183984953918118e-05, "loss": 1.8455, "step": 26376000 }, { "epoch": 76.35, "learning_rate": 1.1839127338829197e-05, "loss": 1.8726, "step": 26376500 }, { "epoch": 76.35, "learning_rate": 1.1838405138477214e-05, "loss": 1.8656, "step": 26377000 }, { "epoch": 76.35, "learning_rate": 1.1837681490829938e-05, "loss": 1.8778, "step": 26377500 }, { "epoch": 76.35, "learning_rate": 1.1836957843182662e-05, "loss": 1.8785, "step": 26378000 }, { "epoch": 76.35, "learning_rate": 1.1836234195535384e-05, "loss": 1.863, "step": 26378500 }, { "epoch": 76.36, "learning_rate": 1.1835511995183401e-05, "loss": 1.8518, "step": 26379000 }, { "epoch": 76.36, "learning_rate": 1.1834788347536125e-05, "loss": 1.8395, "step": 26379500 }, { "epoch": 76.36, "learning_rate": 1.1834064699888849e-05, "loss": 1.8512, "step": 26380000 }, { "epoch": 76.36, "learning_rate": 1.1833341052241571e-05, "loss": 1.8793, "step": 26380500 }, { "epoch": 76.36, "learning_rate": 1.1832617404594294e-05, "loss": 1.8788, "step": 26381000 }, { "epoch": 76.36, "learning_rate": 1.183189375694702e-05, "loss": 1.839, "step": 26381500 }, { "epoch": 76.37, "learning_rate": 1.1831170109299741e-05, "loss": 1.8466, "step": 26382000 }, { "epoch": 76.37, "learning_rate": 1.1830446461652464e-05, "loss": 1.8301, "step": 26382500 }, { "epoch": 76.37, "learning_rate": 1.1829722814005188e-05, "loss": 1.8351, "step": 26383000 }, { "epoch": 76.37, "learning_rate": 1.1829000613653207e-05, "loss": 1.8398, "step": 26383500 }, { "epoch": 76.37, "learning_rate": 1.1828276966005929e-05, "loss": 1.8509, "step": 26384000 }, { "epoch": 76.37, "learning_rate": 1.1827553318358651e-05, "loss": 1.8602, "step": 26384500 }, { "epoch": 76.37, "learning_rate": 1.1826829670711375e-05, "loss": 1.8677, "step": 26385000 }, { "epoch": 76.38, "learning_rate": 1.1826106023064099e-05, "loss": 1.8509, "step": 26385500 }, { "epoch": 76.38, "learning_rate": 1.1825382375416821e-05, "loss": 1.8645, "step": 26386000 }, { "epoch": 76.38, "learning_rate": 1.1824660175064838e-05, "loss": 1.8609, "step": 26386500 }, { "epoch": 76.38, "learning_rate": 1.1823936527417562e-05, "loss": 1.8468, "step": 26387000 }, { "epoch": 76.38, "learning_rate": 1.1823212879770286e-05, "loss": 1.847, "step": 26387500 }, { "epoch": 76.38, "learning_rate": 1.1822489232123008e-05, "loss": 1.8595, "step": 26388000 }, { "epoch": 76.38, "learning_rate": 1.1821765584475732e-05, "loss": 1.8491, "step": 26388500 }, { "epoch": 76.39, "learning_rate": 1.1821043384123751e-05, "loss": 1.8657, "step": 26389000 }, { "epoch": 76.39, "learning_rate": 1.1820319736476474e-05, "loss": 1.8441, "step": 26389500 }, { "epoch": 76.39, "learning_rate": 1.1819596088829196e-05, "loss": 1.8373, "step": 26390000 }, { "epoch": 76.39, "learning_rate": 1.181887244118192e-05, "loss": 1.8358, "step": 26390500 }, { "epoch": 76.39, "learning_rate": 1.1818148793534644e-05, "loss": 1.841, "step": 26391000 }, { "epoch": 76.39, "learning_rate": 1.1817425145887366e-05, "loss": 1.8509, "step": 26391500 }, { "epoch": 76.39, "learning_rate": 1.1816701498240088e-05, "loss": 1.904, "step": 26392000 }, { "epoch": 76.4, "learning_rate": 1.1815979297888107e-05, "loss": 1.8613, "step": 26392500 }, { "epoch": 76.4, "learning_rate": 1.1815255650240831e-05, "loss": 1.8681, "step": 26393000 }, { "epoch": 76.4, "learning_rate": 1.1814532002593553e-05, "loss": 1.8556, "step": 26393500 }, { "epoch": 76.4, "learning_rate": 1.181380980224157e-05, "loss": 1.8764, "step": 26394000 }, { "epoch": 76.4, "learning_rate": 1.1813086154594296e-05, "loss": 1.8478, "step": 26394500 }, { "epoch": 76.4, "learning_rate": 1.1812362506947018e-05, "loss": 1.8459, "step": 26395000 }, { "epoch": 76.4, "learning_rate": 1.1811640306595036e-05, "loss": 1.8787, "step": 26395500 }, { "epoch": 76.41, "learning_rate": 1.1810916658947758e-05, "loss": 1.8645, "step": 26396000 }, { "epoch": 76.41, "learning_rate": 1.1810193011300483e-05, "loss": 1.8393, "step": 26396500 }, { "epoch": 76.41, "learning_rate": 1.1809469363653206e-05, "loss": 1.8673, "step": 26397000 }, { "epoch": 76.41, "learning_rate": 1.1808745716005928e-05, "loss": 1.8682, "step": 26397500 }, { "epoch": 76.41, "learning_rate": 1.1808022068358652e-05, "loss": 1.8726, "step": 26398000 }, { "epoch": 76.41, "learning_rate": 1.1807298420711376e-05, "loss": 1.8462, "step": 26398500 }, { "epoch": 76.41, "learning_rate": 1.1806574773064098e-05, "loss": 1.8971, "step": 26399000 }, { "epoch": 76.42, "learning_rate": 1.1805851125416822e-05, "loss": 1.8437, "step": 26399500 }, { "epoch": 76.42, "learning_rate": 1.1805127477769546e-05, "loss": 1.8623, "step": 26400000 }, { "epoch": 76.42, "learning_rate": 1.1804403830122268e-05, "loss": 1.8536, "step": 26400500 }, { "epoch": 76.42, "learning_rate": 1.1803681629770285e-05, "loss": 1.8892, "step": 26401000 }, { "epoch": 76.42, "learning_rate": 1.180295798212301e-05, "loss": 1.8556, "step": 26401500 }, { "epoch": 76.42, "learning_rate": 1.1802234334475733e-05, "loss": 1.864, "step": 26402000 }, { "epoch": 76.42, "learning_rate": 1.1801510686828455e-05, "loss": 1.848, "step": 26402500 }, { "epoch": 76.43, "learning_rate": 1.1800787039181178e-05, "loss": 1.8705, "step": 26403000 }, { "epoch": 76.43, "learning_rate": 1.1800063391533903e-05, "loss": 1.8552, "step": 26403500 }, { "epoch": 76.43, "learning_rate": 1.1799339743886625e-05, "loss": 1.8679, "step": 26404000 }, { "epoch": 76.43, "learning_rate": 1.1798616096239348e-05, "loss": 1.8397, "step": 26404500 }, { "epoch": 76.43, "learning_rate": 1.1797893895887365e-05, "loss": 1.8716, "step": 26405000 }, { "epoch": 76.43, "learning_rate": 1.179717024824009e-05, "loss": 1.8393, "step": 26405500 }, { "epoch": 76.43, "learning_rate": 1.1796446600592813e-05, "loss": 1.8654, "step": 26406000 }, { "epoch": 76.44, "learning_rate": 1.1795722952945535e-05, "loss": 1.8294, "step": 26406500 }, { "epoch": 76.44, "learning_rate": 1.1794999305298259e-05, "loss": 1.8665, "step": 26407000 }, { "epoch": 76.44, "learning_rate": 1.1794275657650983e-05, "loss": 1.8592, "step": 26407500 }, { "epoch": 76.44, "learning_rate": 1.1793552010003705e-05, "loss": 1.8571, "step": 26408000 }, { "epoch": 76.44, "learning_rate": 1.1792829809651722e-05, "loss": 1.8669, "step": 26408500 }, { "epoch": 76.44, "learning_rate": 1.1792106162004448e-05, "loss": 1.8383, "step": 26409000 }, { "epoch": 76.44, "learning_rate": 1.179138251435717e-05, "loss": 1.8731, "step": 26409500 }, { "epoch": 76.45, "learning_rate": 1.1790658866709892e-05, "loss": 1.8463, "step": 26410000 }, { "epoch": 76.45, "learning_rate": 1.1789935219062616e-05, "loss": 1.8537, "step": 26410500 }, { "epoch": 76.45, "learning_rate": 1.178921157141534e-05, "loss": 1.872, "step": 26411000 }, { "epoch": 76.45, "learning_rate": 1.1788487923768063e-05, "loss": 1.8492, "step": 26411500 }, { "epoch": 76.45, "learning_rate": 1.1787764276120787e-05, "loss": 1.8872, "step": 26412000 }, { "epoch": 76.45, "learning_rate": 1.1787042075768804e-05, "loss": 1.8375, "step": 26412500 }, { "epoch": 76.45, "learning_rate": 1.1786319875416823e-05, "loss": 1.8783, "step": 26413000 }, { "epoch": 76.46, "learning_rate": 1.1785596227769545e-05, "loss": 1.8375, "step": 26413500 }, { "epoch": 76.46, "learning_rate": 1.1784872580122267e-05, "loss": 1.8694, "step": 26414000 }, { "epoch": 76.46, "learning_rate": 1.1784148932474991e-05, "loss": 1.8274, "step": 26414500 }, { "epoch": 76.46, "learning_rate": 1.178342673212301e-05, "loss": 1.8364, "step": 26415000 }, { "epoch": 76.46, "learning_rate": 1.1782703084475732e-05, "loss": 1.888, "step": 26415500 }, { "epoch": 76.46, "learning_rate": 1.1781979436828455e-05, "loss": 1.8723, "step": 26416000 }, { "epoch": 76.46, "learning_rate": 1.1781255789181178e-05, "loss": 1.8507, "step": 26416500 }, { "epoch": 76.47, "learning_rate": 1.1780532141533902e-05, "loss": 1.8466, "step": 26417000 }, { "epoch": 76.47, "learning_rate": 1.1779808493886625e-05, "loss": 1.8615, "step": 26417500 }, { "epoch": 76.47, "learning_rate": 1.1779084846239349e-05, "loss": 1.8566, "step": 26418000 }, { "epoch": 76.47, "learning_rate": 1.1778361198592072e-05, "loss": 1.857, "step": 26418500 }, { "epoch": 76.47, "learning_rate": 1.1777637550944795e-05, "loss": 1.849, "step": 26419000 }, { "epoch": 76.47, "learning_rate": 1.1776913903297519e-05, "loss": 1.8484, "step": 26419500 }, { "epoch": 76.48, "learning_rate": 1.1776191702945536e-05, "loss": 1.8608, "step": 26420000 }, { "epoch": 76.48, "learning_rate": 1.177546805529826e-05, "loss": 1.864, "step": 26420500 }, { "epoch": 76.48, "learning_rate": 1.1774744407650982e-05, "loss": 1.8527, "step": 26421000 }, { "epoch": 76.48, "learning_rate": 1.1774020760003706e-05, "loss": 1.8478, "step": 26421500 }, { "epoch": 76.48, "learning_rate": 1.177329711235643e-05, "loss": 1.8696, "step": 26422000 }, { "epoch": 76.48, "learning_rate": 1.1772573464709152e-05, "loss": 1.8673, "step": 26422500 }, { "epoch": 76.48, "learning_rate": 1.177185126435717e-05, "loss": 1.8705, "step": 26423000 }, { "epoch": 76.49, "learning_rate": 1.1771127616709893e-05, "loss": 1.8628, "step": 26423500 }, { "epoch": 76.49, "learning_rate": 1.1770403969062617e-05, "loss": 1.8524, "step": 26424000 }, { "epoch": 76.49, "learning_rate": 1.176968032141534e-05, "loss": 1.8377, "step": 26424500 }, { "epoch": 76.49, "learning_rate": 1.1768956673768063e-05, "loss": 1.878, "step": 26425000 }, { "epoch": 76.49, "learning_rate": 1.1768233026120786e-05, "loss": 1.8627, "step": 26425500 }, { "epoch": 76.49, "learning_rate": 1.1767510825768805e-05, "loss": 1.8555, "step": 26426000 }, { "epoch": 76.49, "learning_rate": 1.1766787178121527e-05, "loss": 1.8576, "step": 26426500 }, { "epoch": 76.5, "learning_rate": 1.176606353047425e-05, "loss": 1.8601, "step": 26427000 }, { "epoch": 76.5, "learning_rate": 1.1765339882826973e-05, "loss": 1.8733, "step": 26427500 }, { "epoch": 76.5, "learning_rate": 1.1764616235179697e-05, "loss": 1.8791, "step": 26428000 }, { "epoch": 76.5, "learning_rate": 1.1763892587532419e-05, "loss": 1.8718, "step": 26428500 }, { "epoch": 76.5, "learning_rate": 1.1763168939885143e-05, "loss": 1.8595, "step": 26429000 }, { "epoch": 76.5, "learning_rate": 1.1762445292237867e-05, "loss": 1.855, "step": 26429500 }, { "epoch": 76.5, "learning_rate": 1.1761723091885884e-05, "loss": 1.8709, "step": 26430000 }, { "epoch": 76.51, "learning_rate": 1.1760999444238608e-05, "loss": 1.8367, "step": 26430500 }, { "epoch": 76.51, "learning_rate": 1.176027579659133e-05, "loss": 1.8822, "step": 26431000 }, { "epoch": 76.51, "learning_rate": 1.1759552148944054e-05, "loss": 1.8863, "step": 26431500 }, { "epoch": 76.51, "learning_rate": 1.1758828501296777e-05, "loss": 1.8531, "step": 26432000 }, { "epoch": 76.51, "learning_rate": 1.1758106300944795e-05, "loss": 1.8445, "step": 26432500 }, { "epoch": 76.51, "learning_rate": 1.1757382653297518e-05, "loss": 1.8629, "step": 26433000 }, { "epoch": 76.51, "learning_rate": 1.1756659005650242e-05, "loss": 1.8581, "step": 26433500 }, { "epoch": 76.52, "learning_rate": 1.1755935358002964e-05, "loss": 1.8423, "step": 26434000 }, { "epoch": 76.52, "learning_rate": 1.1755213157650983e-05, "loss": 1.8452, "step": 26434500 }, { "epoch": 76.52, "learning_rate": 1.1754489510003705e-05, "loss": 1.844, "step": 26435000 }, { "epoch": 76.52, "learning_rate": 1.1753765862356429e-05, "loss": 1.8483, "step": 26435500 }, { "epoch": 76.52, "learning_rate": 1.1753042214709153e-05, "loss": 1.8692, "step": 26436000 }, { "epoch": 76.52, "learning_rate": 1.1752318567061875e-05, "loss": 1.8764, "step": 26436500 }, { "epoch": 76.52, "learning_rate": 1.1751596366709892e-05, "loss": 1.846, "step": 26437000 }, { "epoch": 76.53, "learning_rate": 1.1750872719062616e-05, "loss": 1.867, "step": 26437500 }, { "epoch": 76.53, "learning_rate": 1.175014907141534e-05, "loss": 1.865, "step": 26438000 }, { "epoch": 76.53, "learning_rate": 1.1749425423768062e-05, "loss": 1.8703, "step": 26438500 }, { "epoch": 76.53, "learning_rate": 1.1748701776120786e-05, "loss": 1.8809, "step": 26439000 }, { "epoch": 76.53, "learning_rate": 1.1747978128473509e-05, "loss": 1.8203, "step": 26439500 }, { "epoch": 76.53, "learning_rate": 1.1747254480826233e-05, "loss": 1.844, "step": 26440000 }, { "epoch": 76.53, "learning_rate": 1.1746530833178957e-05, "loss": 1.8464, "step": 26440500 }, { "epoch": 76.54, "learning_rate": 1.1745808632826974e-05, "loss": 1.849, "step": 26441000 }, { "epoch": 76.54, "learning_rate": 1.1745084985179696e-05, "loss": 1.8453, "step": 26441500 }, { "epoch": 76.54, "learning_rate": 1.1744362784827715e-05, "loss": 1.8618, "step": 26442000 }, { "epoch": 76.54, "learning_rate": 1.1743640584475732e-05, "loss": 1.8539, "step": 26442500 }, { "epoch": 76.54, "learning_rate": 1.1742916936828456e-05, "loss": 1.8731, "step": 26443000 }, { "epoch": 76.54, "learning_rate": 1.1742193289181178e-05, "loss": 1.8375, "step": 26443500 }, { "epoch": 76.54, "learning_rate": 1.1741469641533902e-05, "loss": 1.8518, "step": 26444000 }, { "epoch": 76.55, "learning_rate": 1.1740745993886624e-05, "loss": 1.8524, "step": 26444500 }, { "epoch": 76.55, "learning_rate": 1.1740022346239348e-05, "loss": 1.8664, "step": 26445000 }, { "epoch": 76.55, "learning_rate": 1.1739300145887366e-05, "loss": 1.8449, "step": 26445500 }, { "epoch": 76.55, "learning_rate": 1.173857649824009e-05, "loss": 1.8645, "step": 26446000 }, { "epoch": 76.55, "learning_rate": 1.1737852850592812e-05, "loss": 1.8432, "step": 26446500 }, { "epoch": 76.55, "learning_rate": 1.1737129202945536e-05, "loss": 1.8378, "step": 26447000 }, { "epoch": 76.55, "learning_rate": 1.173640555529826e-05, "loss": 1.8287, "step": 26447500 }, { "epoch": 76.56, "learning_rate": 1.1735681907650982e-05, "loss": 1.8614, "step": 26448000 }, { "epoch": 76.56, "learning_rate": 1.1734958260003706e-05, "loss": 1.8639, "step": 26448500 }, { "epoch": 76.56, "learning_rate": 1.173423461235643e-05, "loss": 1.8806, "step": 26449000 }, { "epoch": 76.56, "learning_rate": 1.1733510964709152e-05, "loss": 1.8625, "step": 26449500 }, { "epoch": 76.56, "learning_rate": 1.173278876435717e-05, "loss": 1.8813, "step": 26450000 }, { "epoch": 76.56, "learning_rate": 1.1732066564005188e-05, "loss": 1.8729, "step": 26450500 }, { "epoch": 76.56, "learning_rate": 1.173134291635791e-05, "loss": 1.8586, "step": 26451000 }, { "epoch": 76.57, "learning_rate": 1.1730619268710634e-05, "loss": 1.8718, "step": 26451500 }, { "epoch": 76.57, "learning_rate": 1.1729895621063357e-05, "loss": 1.8448, "step": 26452000 }, { "epoch": 76.57, "learning_rate": 1.1729173420711376e-05, "loss": 1.8519, "step": 26452500 }, { "epoch": 76.57, "learning_rate": 1.1728449773064098e-05, "loss": 1.8513, "step": 26453000 }, { "epoch": 76.57, "learning_rate": 1.1727726125416822e-05, "loss": 1.8671, "step": 26453500 }, { "epoch": 76.57, "learning_rate": 1.1727002477769544e-05, "loss": 1.8797, "step": 26454000 }, { "epoch": 76.57, "learning_rate": 1.1726278830122268e-05, "loss": 1.8661, "step": 26454500 }, { "epoch": 76.58, "learning_rate": 1.1725555182474992e-05, "loss": 1.8659, "step": 26455000 }, { "epoch": 76.58, "learning_rate": 1.1724831534827714e-05, "loss": 1.8658, "step": 26455500 }, { "epoch": 76.58, "learning_rate": 1.1724109334475731e-05, "loss": 1.8413, "step": 26456000 }, { "epoch": 76.58, "learning_rate": 1.1723385686828455e-05, "loss": 1.882, "step": 26456500 }, { "epoch": 76.58, "learning_rate": 1.1722662039181179e-05, "loss": 1.8685, "step": 26457000 }, { "epoch": 76.58, "learning_rate": 1.1721938391533901e-05, "loss": 1.8621, "step": 26457500 }, { "epoch": 76.59, "learning_rate": 1.1721214743886625e-05, "loss": 1.8561, "step": 26458000 }, { "epoch": 76.59, "learning_rate": 1.1720492543534643e-05, "loss": 1.8444, "step": 26458500 }, { "epoch": 76.59, "learning_rate": 1.1719768895887366e-05, "loss": 1.8403, "step": 26459000 }, { "epoch": 76.59, "learning_rate": 1.1719046695535384e-05, "loss": 1.8491, "step": 26459500 }, { "epoch": 76.59, "learning_rate": 1.1718323047888108e-05, "loss": 1.8239, "step": 26460000 }, { "epoch": 76.59, "learning_rate": 1.171759940024083e-05, "loss": 1.8749, "step": 26460500 }, { "epoch": 76.59, "learning_rate": 1.1716875752593554e-05, "loss": 1.8777, "step": 26461000 }, { "epoch": 76.6, "learning_rate": 1.1716152104946276e-05, "loss": 1.8756, "step": 26461500 }, { "epoch": 76.6, "learning_rate": 1.1715428457299e-05, "loss": 1.8634, "step": 26462000 }, { "epoch": 76.6, "learning_rate": 1.1714704809651724e-05, "loss": 1.864, "step": 26462500 }, { "epoch": 76.6, "learning_rate": 1.1713981162004446e-05, "loss": 1.874, "step": 26463000 }, { "epoch": 76.6, "learning_rate": 1.171325751435717e-05, "loss": 1.8415, "step": 26463500 }, { "epoch": 76.6, "learning_rate": 1.1712533866709894e-05, "loss": 1.8585, "step": 26464000 }, { "epoch": 76.6, "learning_rate": 1.1711810219062616e-05, "loss": 1.8618, "step": 26464500 }, { "epoch": 76.61, "learning_rate": 1.1711086571415338e-05, "loss": 1.8804, "step": 26465000 }, { "epoch": 76.61, "learning_rate": 1.1710362923768062e-05, "loss": 1.8369, "step": 26465500 }, { "epoch": 76.61, "learning_rate": 1.1709640723416081e-05, "loss": 1.8768, "step": 26466000 }, { "epoch": 76.61, "learning_rate": 1.1708917075768804e-05, "loss": 1.8857, "step": 26466500 }, { "epoch": 76.61, "learning_rate": 1.1708193428121526e-05, "loss": 1.8569, "step": 26467000 }, { "epoch": 76.61, "learning_rate": 1.1707469780474251e-05, "loss": 1.8268, "step": 26467500 }, { "epoch": 76.61, "learning_rate": 1.1706746132826974e-05, "loss": 1.8642, "step": 26468000 }, { "epoch": 76.62, "learning_rate": 1.1706023932474991e-05, "loss": 1.8463, "step": 26468500 }, { "epoch": 76.62, "learning_rate": 1.1705300284827715e-05, "loss": 1.8551, "step": 26469000 }, { "epoch": 76.62, "learning_rate": 1.1704576637180439e-05, "loss": 1.8565, "step": 26469500 }, { "epoch": 76.62, "learning_rate": 1.1703852989533161e-05, "loss": 1.8558, "step": 26470000 }, { "epoch": 76.62, "learning_rate": 1.1703129341885883e-05, "loss": 1.8638, "step": 26470500 }, { "epoch": 76.62, "learning_rate": 1.1702407141533902e-05, "loss": 1.8411, "step": 26471000 }, { "epoch": 76.62, "learning_rate": 1.1701683493886626e-05, "loss": 1.872, "step": 26471500 }, { "epoch": 76.63, "learning_rate": 1.1700959846239348e-05, "loss": 1.8542, "step": 26472000 }, { "epoch": 76.63, "learning_rate": 1.170023619859207e-05, "loss": 1.8573, "step": 26472500 }, { "epoch": 76.63, "learning_rate": 1.169951399824009e-05, "loss": 1.8398, "step": 26473000 }, { "epoch": 76.63, "learning_rate": 1.1698790350592813e-05, "loss": 1.8487, "step": 26473500 }, { "epoch": 76.63, "learning_rate": 1.1698066702945536e-05, "loss": 1.8459, "step": 26474000 }, { "epoch": 76.63, "learning_rate": 1.1697343055298258e-05, "loss": 1.8455, "step": 26474500 }, { "epoch": 76.63, "learning_rate": 1.1696619407650984e-05, "loss": 1.8843, "step": 26475000 }, { "epoch": 76.64, "learning_rate": 1.1695895760003706e-05, "loss": 1.8478, "step": 26475500 }, { "epoch": 76.64, "learning_rate": 1.1695172112356428e-05, "loss": 1.8546, "step": 26476000 }, { "epoch": 76.64, "learning_rate": 1.1694448464709152e-05, "loss": 1.8595, "step": 26476500 }, { "epoch": 76.64, "learning_rate": 1.1693724817061876e-05, "loss": 1.8643, "step": 26477000 }, { "epoch": 76.64, "learning_rate": 1.1693002616709893e-05, "loss": 1.8688, "step": 26477500 }, { "epoch": 76.64, "learning_rate": 1.1692278969062615e-05, "loss": 1.8436, "step": 26478000 }, { "epoch": 76.64, "learning_rate": 1.169155532141534e-05, "loss": 1.8669, "step": 26478500 }, { "epoch": 76.65, "learning_rate": 1.1690831673768063e-05, "loss": 1.8356, "step": 26479000 }, { "epoch": 76.65, "learning_rate": 1.1690108026120785e-05, "loss": 1.8539, "step": 26479500 }, { "epoch": 76.65, "learning_rate": 1.168938437847351e-05, "loss": 1.8648, "step": 26480000 }, { "epoch": 76.65, "learning_rate": 1.1688662178121528e-05, "loss": 1.8465, "step": 26480500 }, { "epoch": 76.65, "learning_rate": 1.168793853047425e-05, "loss": 1.8463, "step": 26481000 }, { "epoch": 76.65, "learning_rate": 1.1687214882826973e-05, "loss": 1.854, "step": 26481500 }, { "epoch": 76.65, "learning_rate": 1.1686491235179697e-05, "loss": 1.8561, "step": 26482000 }, { "epoch": 76.66, "learning_rate": 1.168576758753242e-05, "loss": 1.8619, "step": 26482500 }, { "epoch": 76.66, "learning_rate": 1.1685045387180438e-05, "loss": 1.848, "step": 26483000 }, { "epoch": 76.66, "learning_rate": 1.168432173953316e-05, "loss": 1.8588, "step": 26483500 }, { "epoch": 76.66, "learning_rate": 1.1683598091885884e-05, "loss": 1.859, "step": 26484000 }, { "epoch": 76.66, "learning_rate": 1.1682874444238608e-05, "loss": 1.8565, "step": 26484500 }, { "epoch": 76.66, "learning_rate": 1.168215079659133e-05, "loss": 1.8599, "step": 26485000 }, { "epoch": 76.66, "learning_rate": 1.1681427148944054e-05, "loss": 1.8949, "step": 26485500 }, { "epoch": 76.67, "learning_rate": 1.1680704948592073e-05, "loss": 1.8569, "step": 26486000 }, { "epoch": 76.67, "learning_rate": 1.1679981300944795e-05, "loss": 1.8662, "step": 26486500 }, { "epoch": 76.67, "learning_rate": 1.1679257653297518e-05, "loss": 1.8736, "step": 26487000 }, { "epoch": 76.67, "learning_rate": 1.1678534005650241e-05, "loss": 1.8493, "step": 26487500 }, { "epoch": 76.67, "learning_rate": 1.1677810358002965e-05, "loss": 1.8706, "step": 26488000 }, { "epoch": 76.67, "learning_rate": 1.1677086710355688e-05, "loss": 1.8713, "step": 26488500 }, { "epoch": 76.67, "learning_rate": 1.1676364510003705e-05, "loss": 1.8665, "step": 26489000 }, { "epoch": 76.68, "learning_rate": 1.1675640862356429e-05, "loss": 1.8918, "step": 26489500 }, { "epoch": 76.68, "learning_rate": 1.1674917214709153e-05, "loss": 1.8633, "step": 26490000 }, { "epoch": 76.68, "learning_rate": 1.1674193567061875e-05, "loss": 1.8646, "step": 26490500 }, { "epoch": 76.68, "learning_rate": 1.1673469919414597e-05, "loss": 1.8622, "step": 26491000 }, { "epoch": 76.68, "learning_rate": 1.1672747719062616e-05, "loss": 1.8545, "step": 26491500 }, { "epoch": 76.68, "learning_rate": 1.1672025518710635e-05, "loss": 1.835, "step": 26492000 }, { "epoch": 76.68, "learning_rate": 1.1671301871063357e-05, "loss": 1.8705, "step": 26492500 }, { "epoch": 76.69, "learning_rate": 1.1670579670711375e-05, "loss": 1.8704, "step": 26493000 }, { "epoch": 76.69, "learning_rate": 1.1669856023064098e-05, "loss": 1.8698, "step": 26493500 }, { "epoch": 76.69, "learning_rate": 1.1669132375416822e-05, "loss": 1.8704, "step": 26494000 }, { "epoch": 76.69, "learning_rate": 1.1668408727769545e-05, "loss": 1.8482, "step": 26494500 }, { "epoch": 76.69, "learning_rate": 1.1667685080122267e-05, "loss": 1.8517, "step": 26495000 }, { "epoch": 76.69, "learning_rate": 1.1666961432474992e-05, "loss": 1.85, "step": 26495500 }, { "epoch": 76.7, "learning_rate": 1.1666237784827715e-05, "loss": 1.8795, "step": 26496000 }, { "epoch": 76.7, "learning_rate": 1.1665514137180437e-05, "loss": 1.8628, "step": 26496500 }, { "epoch": 76.7, "learning_rate": 1.1664790489533161e-05, "loss": 1.8708, "step": 26497000 }, { "epoch": 76.7, "learning_rate": 1.1664066841885885e-05, "loss": 1.8824, "step": 26497500 }, { "epoch": 76.7, "learning_rate": 1.1663344641533902e-05, "loss": 1.8553, "step": 26498000 }, { "epoch": 76.7, "learning_rate": 1.1662620993886624e-05, "loss": 1.8717, "step": 26498500 }, { "epoch": 76.7, "learning_rate": 1.166189734623935e-05, "loss": 1.8423, "step": 26499000 }, { "epoch": 76.71, "learning_rate": 1.1661173698592072e-05, "loss": 1.8724, "step": 26499500 }, { "epoch": 76.71, "learning_rate": 1.1660450050944794e-05, "loss": 1.8682, "step": 26500000 }, { "epoch": 76.71, "learning_rate": 1.1659726403297518e-05, "loss": 1.8493, "step": 26500500 }, { "epoch": 76.71, "learning_rate": 1.1659002755650242e-05, "loss": 1.8707, "step": 26501000 }, { "epoch": 76.71, "learning_rate": 1.1658279108002964e-05, "loss": 1.8627, "step": 26501500 }, { "epoch": 76.71, "learning_rate": 1.1657555460355687e-05, "loss": 1.8903, "step": 26502000 }, { "epoch": 76.71, "learning_rate": 1.1656831812708412e-05, "loss": 1.8613, "step": 26502500 }, { "epoch": 76.72, "learning_rate": 1.1656108165061135e-05, "loss": 1.8616, "step": 26503000 }, { "epoch": 76.72, "learning_rate": 1.1655385964709152e-05, "loss": 1.8815, "step": 26503500 }, { "epoch": 76.72, "learning_rate": 1.1654662317061876e-05, "loss": 1.8632, "step": 26504000 }, { "epoch": 76.72, "learning_rate": 1.16539386694146e-05, "loss": 1.8806, "step": 26504500 }, { "epoch": 76.72, "learning_rate": 1.1653215021767322e-05, "loss": 1.8648, "step": 26505000 }, { "epoch": 76.72, "learning_rate": 1.1652492821415339e-05, "loss": 1.8881, "step": 26505500 }, { "epoch": 76.72, "learning_rate": 1.1651769173768063e-05, "loss": 1.8671, "step": 26506000 }, { "epoch": 76.73, "learning_rate": 1.1651045526120787e-05, "loss": 1.8795, "step": 26506500 }, { "epoch": 76.73, "learning_rate": 1.165032187847351e-05, "loss": 1.8596, "step": 26507000 }, { "epoch": 76.73, "learning_rate": 1.1649598230826231e-05, "loss": 1.8839, "step": 26507500 }, { "epoch": 76.73, "learning_rate": 1.1648874583178957e-05, "loss": 1.8395, "step": 26508000 }, { "epoch": 76.73, "learning_rate": 1.164815093553168e-05, "loss": 1.8651, "step": 26508500 }, { "epoch": 76.73, "learning_rate": 1.1647427287884402e-05, "loss": 1.8615, "step": 26509000 }, { "epoch": 76.73, "learning_rate": 1.1646703640237125e-05, "loss": 1.8585, "step": 26509500 }, { "epoch": 76.74, "learning_rate": 1.1645981439885144e-05, "loss": 1.8435, "step": 26510000 }, { "epoch": 76.74, "learning_rate": 1.1645257792237867e-05, "loss": 1.8638, "step": 26510500 }, { "epoch": 76.74, "learning_rate": 1.1644534144590589e-05, "loss": 1.8344, "step": 26511000 }, { "epoch": 76.74, "learning_rate": 1.1643810496943313e-05, "loss": 1.8841, "step": 26511500 }, { "epoch": 76.74, "learning_rate": 1.1643086849296037e-05, "loss": 1.8647, "step": 26512000 }, { "epoch": 76.74, "learning_rate": 1.1642363201648759e-05, "loss": 1.8483, "step": 26512500 }, { "epoch": 76.74, "learning_rate": 1.1641639554001483e-05, "loss": 1.8535, "step": 26513000 }, { "epoch": 76.75, "learning_rate": 1.1640915906354207e-05, "loss": 1.8573, "step": 26513500 }, { "epoch": 76.75, "learning_rate": 1.1640193706002224e-05, "loss": 1.8534, "step": 26514000 }, { "epoch": 76.75, "learning_rate": 1.1639471505650241e-05, "loss": 1.8592, "step": 26514500 }, { "epoch": 76.75, "learning_rate": 1.1638747858002964e-05, "loss": 1.8217, "step": 26515000 }, { "epoch": 76.75, "learning_rate": 1.1638024210355688e-05, "loss": 1.8444, "step": 26515500 }, { "epoch": 76.75, "learning_rate": 1.1637300562708411e-05, "loss": 1.8471, "step": 26516000 }, { "epoch": 76.75, "learning_rate": 1.1636576915061134e-05, "loss": 1.8679, "step": 26516500 }, { "epoch": 76.76, "learning_rate": 1.1635853267413858e-05, "loss": 1.8576, "step": 26517000 }, { "epoch": 76.76, "learning_rate": 1.1635129619766582e-05, "loss": 1.8541, "step": 26517500 }, { "epoch": 76.76, "learning_rate": 1.1634407419414599e-05, "loss": 1.8588, "step": 26518000 }, { "epoch": 76.76, "learning_rate": 1.1633683771767321e-05, "loss": 1.9011, "step": 26518500 }, { "epoch": 76.76, "learning_rate": 1.1632960124120045e-05, "loss": 1.87, "step": 26519000 }, { "epoch": 76.76, "learning_rate": 1.1632236476472769e-05, "loss": 1.875, "step": 26519500 }, { "epoch": 76.76, "learning_rate": 1.1631512828825491e-05, "loss": 1.8649, "step": 26520000 }, { "epoch": 76.77, "learning_rate": 1.1630790628473508e-05, "loss": 1.8627, "step": 26520500 }, { "epoch": 76.77, "learning_rate": 1.1630066980826232e-05, "loss": 1.8815, "step": 26521000 }, { "epoch": 76.77, "learning_rate": 1.1629343333178956e-05, "loss": 1.856, "step": 26521500 }, { "epoch": 76.77, "learning_rate": 1.1628619685531678e-05, "loss": 1.8564, "step": 26522000 }, { "epoch": 76.77, "learning_rate": 1.1627896037884402e-05, "loss": 1.8398, "step": 26522500 }, { "epoch": 76.77, "learning_rate": 1.162717383753242e-05, "loss": 1.8679, "step": 26523000 }, { "epoch": 76.77, "learning_rate": 1.1626450189885144e-05, "loss": 1.8626, "step": 26523500 }, { "epoch": 76.78, "learning_rate": 1.1625726542237866e-05, "loss": 1.8619, "step": 26524000 }, { "epoch": 76.78, "learning_rate": 1.162500289459059e-05, "loss": 1.8645, "step": 26524500 }, { "epoch": 76.78, "learning_rate": 1.1624280694238607e-05, "loss": 1.8897, "step": 26525000 }, { "epoch": 76.78, "learning_rate": 1.1623557046591331e-05, "loss": 1.8835, "step": 26525500 }, { "epoch": 76.78, "learning_rate": 1.1622833398944053e-05, "loss": 1.8755, "step": 26526000 }, { "epoch": 76.78, "learning_rate": 1.1622109751296777e-05, "loss": 1.8505, "step": 26526500 }, { "epoch": 76.78, "learning_rate": 1.1621387550944796e-05, "loss": 1.8497, "step": 26527000 }, { "epoch": 76.79, "learning_rate": 1.1620663903297518e-05, "loss": 1.8532, "step": 26527500 }, { "epoch": 76.79, "learning_rate": 1.161994025565024e-05, "loss": 1.9012, "step": 26528000 }, { "epoch": 76.79, "learning_rate": 1.1619216608002964e-05, "loss": 1.877, "step": 26528500 }, { "epoch": 76.79, "learning_rate": 1.1618492960355688e-05, "loss": 1.8724, "step": 26529000 }, { "epoch": 76.79, "learning_rate": 1.161776931270841e-05, "loss": 1.8693, "step": 26529500 }, { "epoch": 76.79, "learning_rate": 1.1617045665061134e-05, "loss": 1.8614, "step": 26530000 }, { "epoch": 76.79, "learning_rate": 1.1616322017413858e-05, "loss": 1.861, "step": 26530500 }, { "epoch": 76.8, "learning_rate": 1.161559836976658e-05, "loss": 1.8568, "step": 26531000 }, { "epoch": 76.8, "learning_rate": 1.1614874722119305e-05, "loss": 1.8819, "step": 26531500 }, { "epoch": 76.8, "learning_rate": 1.1614152521767322e-05, "loss": 1.8835, "step": 26532000 }, { "epoch": 76.8, "learning_rate": 1.1613430321415339e-05, "loss": 1.8435, "step": 26532500 }, { "epoch": 76.8, "learning_rate": 1.1612706673768063e-05, "loss": 1.8797, "step": 26533000 }, { "epoch": 76.8, "learning_rate": 1.1611983026120785e-05, "loss": 1.8408, "step": 26533500 }, { "epoch": 76.81, "learning_rate": 1.1611259378473509e-05, "loss": 1.852, "step": 26534000 }, { "epoch": 76.81, "learning_rate": 1.1610535730826233e-05, "loss": 1.8721, "step": 26534500 }, { "epoch": 76.81, "learning_rate": 1.1609812083178955e-05, "loss": 1.8534, "step": 26535000 }, { "epoch": 76.81, "learning_rate": 1.160908843553168e-05, "loss": 1.8763, "step": 26535500 }, { "epoch": 76.81, "learning_rate": 1.1608364787884401e-05, "loss": 1.8771, "step": 26536000 }, { "epoch": 76.81, "learning_rate": 1.1607641140237125e-05, "loss": 1.8569, "step": 26536500 }, { "epoch": 76.81, "learning_rate": 1.160691749258985e-05, "loss": 1.854, "step": 26537000 }, { "epoch": 76.82, "learning_rate": 1.1606195292237867e-05, "loss": 1.8661, "step": 26537500 }, { "epoch": 76.82, "learning_rate": 1.160547164459059e-05, "loss": 1.8694, "step": 26538000 }, { "epoch": 76.82, "learning_rate": 1.1604747996943313e-05, "loss": 1.8683, "step": 26538500 }, { "epoch": 76.82, "learning_rate": 1.1604024349296037e-05, "loss": 1.8582, "step": 26539000 }, { "epoch": 76.82, "learning_rate": 1.1603302148944054e-05, "loss": 1.8892, "step": 26539500 }, { "epoch": 76.82, "learning_rate": 1.1602578501296778e-05, "loss": 1.8631, "step": 26540000 }, { "epoch": 76.82, "learning_rate": 1.16018548536495e-05, "loss": 1.8486, "step": 26540500 }, { "epoch": 76.83, "learning_rate": 1.1601131206002224e-05, "loss": 1.8425, "step": 26541000 }, { "epoch": 76.83, "learning_rate": 1.1600407558354946e-05, "loss": 1.8619, "step": 26541500 }, { "epoch": 76.83, "learning_rate": 1.159968391070767e-05, "loss": 1.8416, "step": 26542000 }, { "epoch": 76.83, "learning_rate": 1.1598960263060394e-05, "loss": 1.8649, "step": 26542500 }, { "epoch": 76.83, "learning_rate": 1.1598238062708411e-05, "loss": 1.8377, "step": 26543000 }, { "epoch": 76.83, "learning_rate": 1.1597515862356429e-05, "loss": 1.8686, "step": 26543500 }, { "epoch": 76.83, "learning_rate": 1.1596792214709153e-05, "loss": 1.8616, "step": 26544000 }, { "epoch": 76.84, "learning_rate": 1.1596068567061875e-05, "loss": 1.8305, "step": 26544500 }, { "epoch": 76.84, "learning_rate": 1.1595344919414599e-05, "loss": 1.8958, "step": 26545000 }, { "epoch": 76.84, "learning_rate": 1.1594621271767321e-05, "loss": 1.8586, "step": 26545500 }, { "epoch": 76.84, "learning_rate": 1.1593897624120045e-05, "loss": 1.8281, "step": 26546000 }, { "epoch": 76.84, "learning_rate": 1.1593173976472769e-05, "loss": 1.8733, "step": 26546500 }, { "epoch": 76.84, "learning_rate": 1.1592451776120786e-05, "loss": 1.8424, "step": 26547000 }, { "epoch": 76.84, "learning_rate": 1.159172812847351e-05, "loss": 1.8534, "step": 26547500 }, { "epoch": 76.85, "learning_rate": 1.1591004480826232e-05, "loss": 1.8598, "step": 26548000 }, { "epoch": 76.85, "learning_rate": 1.1590280833178956e-05, "loss": 1.8655, "step": 26548500 }, { "epoch": 76.85, "learning_rate": 1.1589557185531678e-05, "loss": 1.8766, "step": 26549000 }, { "epoch": 76.85, "learning_rate": 1.1588833537884402e-05, "loss": 1.8575, "step": 26549500 }, { "epoch": 76.85, "learning_rate": 1.1588109890237126e-05, "loss": 1.8444, "step": 26550000 }, { "epoch": 76.85, "learning_rate": 1.1587386242589848e-05, "loss": 1.8482, "step": 26550500 }, { "epoch": 76.85, "learning_rate": 1.1586662594942572e-05, "loss": 1.8556, "step": 26551000 }, { "epoch": 76.86, "learning_rate": 1.1585938947295295e-05, "loss": 1.8366, "step": 26551500 }, { "epoch": 76.86, "learning_rate": 1.1585215299648019e-05, "loss": 1.8694, "step": 26552000 }, { "epoch": 76.86, "learning_rate": 1.158449165200074e-05, "loss": 1.8632, "step": 26552500 }, { "epoch": 76.86, "learning_rate": 1.1583768004353465e-05, "loss": 1.8766, "step": 26553000 }, { "epoch": 76.86, "learning_rate": 1.1583044356706189e-05, "loss": 1.8808, "step": 26553500 }, { "epoch": 76.86, "learning_rate": 1.158232070905891e-05, "loss": 1.8498, "step": 26554000 }, { "epoch": 76.86, "learning_rate": 1.1581597061411635e-05, "loss": 1.8492, "step": 26554500 }, { "epoch": 76.87, "learning_rate": 1.1580874861059652e-05, "loss": 1.8569, "step": 26555000 }, { "epoch": 76.87, "learning_rate": 1.1580151213412376e-05, "loss": 1.8453, "step": 26555500 }, { "epoch": 76.87, "learning_rate": 1.1579429013060393e-05, "loss": 1.8608, "step": 26556000 }, { "epoch": 76.87, "learning_rate": 1.1578705365413115e-05, "loss": 1.8667, "step": 26556500 }, { "epoch": 76.87, "learning_rate": 1.157798171776584e-05, "loss": 1.8831, "step": 26557000 }, { "epoch": 76.87, "learning_rate": 1.1577258070118563e-05, "loss": 1.8847, "step": 26557500 }, { "epoch": 76.87, "learning_rate": 1.1576534422471285e-05, "loss": 1.8484, "step": 26558000 }, { "epoch": 76.88, "learning_rate": 1.157581077482401e-05, "loss": 1.8657, "step": 26558500 }, { "epoch": 76.88, "learning_rate": 1.1575087127176733e-05, "loss": 1.8804, "step": 26559000 }, { "epoch": 76.88, "learning_rate": 1.1574363479529456e-05, "loss": 1.8556, "step": 26559500 }, { "epoch": 76.88, "learning_rate": 1.1573641279177473e-05, "loss": 1.8695, "step": 26560000 }, { "epoch": 76.88, "learning_rate": 1.1572917631530197e-05, "loss": 1.8483, "step": 26560500 }, { "epoch": 76.88, "learning_rate": 1.157219398388292e-05, "loss": 1.8656, "step": 26561000 }, { "epoch": 76.88, "learning_rate": 1.1571470336235643e-05, "loss": 1.8702, "step": 26561500 }, { "epoch": 76.89, "learning_rate": 1.1570746688588367e-05, "loss": 1.8683, "step": 26562000 }, { "epoch": 76.89, "learning_rate": 1.157002304094109e-05, "loss": 1.8577, "step": 26562500 }, { "epoch": 76.89, "learning_rate": 1.1569299393293813e-05, "loss": 1.8502, "step": 26563000 }, { "epoch": 76.89, "learning_rate": 1.156857719294183e-05, "loss": 1.8719, "step": 26563500 }, { "epoch": 76.89, "learning_rate": 1.1567853545294554e-05, "loss": 1.8551, "step": 26564000 }, { "epoch": 76.89, "learning_rate": 1.1567129897647278e-05, "loss": 1.8585, "step": 26564500 }, { "epoch": 76.89, "learning_rate": 1.156640625e-05, "loss": 1.8559, "step": 26565000 }, { "epoch": 76.9, "learning_rate": 1.1565684049648018e-05, "loss": 1.8526, "step": 26565500 }, { "epoch": 76.9, "learning_rate": 1.1564960402000742e-05, "loss": 1.8605, "step": 26566000 }, { "epoch": 76.9, "learning_rate": 1.1564236754353465e-05, "loss": 1.8447, "step": 26566500 }, { "epoch": 76.9, "learning_rate": 1.1563513106706188e-05, "loss": 1.8492, "step": 26567000 }, { "epoch": 76.9, "learning_rate": 1.156278945905891e-05, "loss": 1.8691, "step": 26567500 }, { "epoch": 76.9, "learning_rate": 1.1562065811411636e-05, "loss": 1.843, "step": 26568000 }, { "epoch": 76.9, "learning_rate": 1.1561343611059653e-05, "loss": 1.8508, "step": 26568500 }, { "epoch": 76.91, "learning_rate": 1.1560619963412375e-05, "loss": 1.8665, "step": 26569000 }, { "epoch": 76.91, "learning_rate": 1.1559896315765099e-05, "loss": 1.8468, "step": 26569500 }, { "epoch": 76.91, "learning_rate": 1.1559172668117823e-05, "loss": 1.8458, "step": 26570000 }, { "epoch": 76.91, "learning_rate": 1.1558449020470545e-05, "loss": 1.9079, "step": 26570500 }, { "epoch": 76.91, "learning_rate": 1.1557725372823267e-05, "loss": 1.8719, "step": 26571000 }, { "epoch": 76.91, "learning_rate": 1.1557003172471286e-05, "loss": 1.8537, "step": 26571500 }, { "epoch": 76.92, "learning_rate": 1.155627952482401e-05, "loss": 1.8546, "step": 26572000 }, { "epoch": 76.92, "learning_rate": 1.1555555877176732e-05, "loss": 1.8531, "step": 26572500 }, { "epoch": 76.92, "learning_rate": 1.1554832229529455e-05, "loss": 1.8804, "step": 26573000 }, { "epoch": 76.92, "learning_rate": 1.155410858188218e-05, "loss": 1.8751, "step": 26573500 }, { "epoch": 76.92, "learning_rate": 1.1553384934234903e-05, "loss": 1.8532, "step": 26574000 }, { "epoch": 76.92, "learning_rate": 1.1552661286587625e-05, "loss": 1.8745, "step": 26574500 }, { "epoch": 76.92, "learning_rate": 1.1551937638940349e-05, "loss": 1.8539, "step": 26575000 }, { "epoch": 76.93, "learning_rate": 1.1551213991293073e-05, "loss": 1.8854, "step": 26575500 }, { "epoch": 76.93, "learning_rate": 1.1550490343645795e-05, "loss": 1.8523, "step": 26576000 }, { "epoch": 76.93, "learning_rate": 1.1549766695998517e-05, "loss": 1.8668, "step": 26576500 }, { "epoch": 76.93, "learning_rate": 1.1549043048351243e-05, "loss": 1.8396, "step": 26577000 }, { "epoch": 76.93, "learning_rate": 1.1548319400703965e-05, "loss": 1.9032, "step": 26577500 }, { "epoch": 76.93, "learning_rate": 1.1547595753056687e-05, "loss": 1.8778, "step": 26578000 }, { "epoch": 76.93, "learning_rate": 1.1546872105409413e-05, "loss": 1.861, "step": 26578500 }, { "epoch": 76.94, "learning_rate": 1.154614990505743e-05, "loss": 1.8727, "step": 26579000 }, { "epoch": 76.94, "learning_rate": 1.1545426257410152e-05, "loss": 1.8404, "step": 26579500 }, { "epoch": 76.94, "learning_rate": 1.154470405705817e-05, "loss": 1.8871, "step": 26580000 }, { "epoch": 76.94, "learning_rate": 1.1543980409410893e-05, "loss": 1.8763, "step": 26580500 }, { "epoch": 76.94, "learning_rate": 1.1543258209058912e-05, "loss": 1.8345, "step": 26581000 }, { "epoch": 76.94, "learning_rate": 1.1542534561411635e-05, "loss": 1.8639, "step": 26581500 }, { "epoch": 76.94, "learning_rate": 1.1541810913764357e-05, "loss": 1.8604, "step": 26582000 }, { "epoch": 76.95, "learning_rate": 1.154108726611708e-05, "loss": 1.8774, "step": 26582500 }, { "epoch": 76.95, "learning_rate": 1.1540363618469805e-05, "loss": 1.8404, "step": 26583000 }, { "epoch": 76.95, "learning_rate": 1.1539641418117822e-05, "loss": 1.8703, "step": 26583500 }, { "epoch": 76.95, "learning_rate": 1.1538917770470544e-05, "loss": 1.8717, "step": 26584000 }, { "epoch": 76.95, "learning_rate": 1.153819412282327e-05, "loss": 1.8806, "step": 26584500 }, { "epoch": 76.95, "learning_rate": 1.1537470475175992e-05, "loss": 1.8769, "step": 26585000 }, { "epoch": 76.95, "learning_rate": 1.1536746827528714e-05, "loss": 1.8537, "step": 26585500 }, { "epoch": 76.96, "learning_rate": 1.1536023179881438e-05, "loss": 1.8341, "step": 26586000 }, { "epoch": 76.96, "learning_rate": 1.1535299532234162e-05, "loss": 1.8698, "step": 26586500 }, { "epoch": 76.96, "learning_rate": 1.1534575884586884e-05, "loss": 1.8548, "step": 26587000 }, { "epoch": 76.96, "learning_rate": 1.1533852236939607e-05, "loss": 1.8679, "step": 26587500 }, { "epoch": 76.96, "learning_rate": 1.1533128589292332e-05, "loss": 1.8819, "step": 26588000 }, { "epoch": 76.96, "learning_rate": 1.1532404941645054e-05, "loss": 1.8695, "step": 26588500 }, { "epoch": 76.96, "learning_rate": 1.1531681293997777e-05, "loss": 1.8633, "step": 26589000 }, { "epoch": 76.97, "learning_rate": 1.15309576463505e-05, "loss": 1.8657, "step": 26589500 }, { "epoch": 76.97, "learning_rate": 1.153023544599852e-05, "loss": 1.8679, "step": 26590000 }, { "epoch": 76.97, "learning_rate": 1.1529514692941832e-05, "loss": 1.8652, "step": 26590500 }, { "epoch": 76.97, "learning_rate": 1.1528791045294554e-05, "loss": 1.8908, "step": 26591000 }, { "epoch": 76.97, "learning_rate": 1.1528067397647276e-05, "loss": 1.8728, "step": 26591500 }, { "epoch": 76.97, "learning_rate": 1.1527343750000002e-05, "loss": 1.8551, "step": 26592000 }, { "epoch": 76.97, "learning_rate": 1.1526620102352724e-05, "loss": 1.8584, "step": 26592500 }, { "epoch": 76.98, "learning_rate": 1.1525897902000741e-05, "loss": 1.853, "step": 26593000 }, { "epoch": 76.98, "learning_rate": 1.1525174254353464e-05, "loss": 1.8674, "step": 26593500 }, { "epoch": 76.98, "learning_rate": 1.152445060670619e-05, "loss": 1.8434, "step": 26594000 }, { "epoch": 76.98, "learning_rate": 1.1523726959058912e-05, "loss": 1.8551, "step": 26594500 }, { "epoch": 76.98, "learning_rate": 1.1523003311411634e-05, "loss": 1.8318, "step": 26595000 }, { "epoch": 76.98, "learning_rate": 1.1522281111059651e-05, "loss": 1.8883, "step": 26595500 }, { "epoch": 76.98, "learning_rate": 1.1521557463412377e-05, "loss": 1.8636, "step": 26596000 }, { "epoch": 76.99, "learning_rate": 1.1520833815765099e-05, "loss": 1.8719, "step": 26596500 }, { "epoch": 76.99, "learning_rate": 1.1520110168117821e-05, "loss": 1.8725, "step": 26597000 }, { "epoch": 76.99, "learning_rate": 1.1519386520470545e-05, "loss": 1.874, "step": 26597500 }, { "epoch": 76.99, "learning_rate": 1.1518662872823269e-05, "loss": 1.8746, "step": 26598000 }, { "epoch": 76.99, "learning_rate": 1.1517939225175991e-05, "loss": 1.8565, "step": 26598500 }, { "epoch": 76.99, "learning_rate": 1.1517215577528715e-05, "loss": 1.8595, "step": 26599000 }, { "epoch": 76.99, "learning_rate": 1.1516491929881439e-05, "loss": 1.8689, "step": 26599500 }, { "epoch": 77.0, "learning_rate": 1.1515768282234161e-05, "loss": 1.855, "step": 26600000 }, { "epoch": 77.0, "learning_rate": 1.1515044634586883e-05, "loss": 1.8537, "step": 26600500 }, { "epoch": 77.0, "learning_rate": 1.1514320986939609e-05, "loss": 1.8616, "step": 26601000 }, { "epoch": 77.0, "eval_accuracy": 0.6857031010568782, "eval_accuracy_mlm": 0.6540023252663993, "eval_accuracy_nsp": 0.8557171861608759, "eval_loss": 2.174851417541504, "eval_runtime": 331.1899, "eval_samples_per_second": 1317.631, "eval_steps_per_second": 54.902, "step": 26601344 }, { "epoch": 77.0, "learning_rate": 1.1513597339292331e-05, "loss": 1.8593, "step": 26601500 }, { "epoch": 77.0, "learning_rate": 1.1512873691645054e-05, "loss": 1.852, "step": 26602000 }, { "epoch": 77.0, "learning_rate": 1.1512151491293073e-05, "loss": 1.8842, "step": 26602500 }, { "epoch": 77.0, "learning_rate": 1.1511427843645796e-05, "loss": 1.8417, "step": 26603000 }, { "epoch": 77.01, "learning_rate": 1.1510704195998519e-05, "loss": 1.8248, "step": 26603500 }, { "epoch": 77.01, "learning_rate": 1.1509980548351241e-05, "loss": 1.8625, "step": 26604000 }, { "epoch": 77.01, "learning_rate": 1.1509256900703965e-05, "loss": 1.8314, "step": 26604500 }, { "epoch": 77.01, "learning_rate": 1.1508536147647277e-05, "loss": 1.8478, "step": 26605000 }, { "epoch": 77.01, "learning_rate": 1.1507812500000001e-05, "loss": 1.8534, "step": 26605500 }, { "epoch": 77.01, "learning_rate": 1.1507088852352723e-05, "loss": 1.8672, "step": 26606000 }, { "epoch": 77.01, "learning_rate": 1.1506365204705447e-05, "loss": 1.8686, "step": 26606500 }, { "epoch": 77.02, "learning_rate": 1.1505641557058171e-05, "loss": 1.8681, "step": 26607000 }, { "epoch": 77.02, "learning_rate": 1.1504917909410893e-05, "loss": 1.8587, "step": 26607500 }, { "epoch": 77.02, "learning_rate": 1.150419570905891e-05, "loss": 1.8509, "step": 26608000 }, { "epoch": 77.02, "learning_rate": 1.1503472061411635e-05, "loss": 1.8118, "step": 26608500 }, { "epoch": 77.02, "learning_rate": 1.1502748413764358e-05, "loss": 1.8338, "step": 26609000 }, { "epoch": 77.02, "learning_rate": 1.150202476611708e-05, "loss": 1.8662, "step": 26609500 }, { "epoch": 77.03, "learning_rate": 1.1501301118469805e-05, "loss": 1.8558, "step": 26610000 }, { "epoch": 77.03, "learning_rate": 1.1500577470822529e-05, "loss": 1.8496, "step": 26610500 }, { "epoch": 77.03, "learning_rate": 1.149985382317525e-05, "loss": 1.8687, "step": 26611000 }, { "epoch": 77.03, "learning_rate": 1.1499130175527973e-05, "loss": 1.8529, "step": 26611500 }, { "epoch": 77.03, "learning_rate": 1.1498407975175992e-05, "loss": 1.8526, "step": 26612000 }, { "epoch": 77.03, "learning_rate": 1.1497684327528716e-05, "loss": 1.8682, "step": 26612500 }, { "epoch": 77.03, "learning_rate": 1.1496960679881438e-05, "loss": 1.8416, "step": 26613000 }, { "epoch": 77.04, "learning_rate": 1.149623703223416e-05, "loss": 1.8599, "step": 26613500 }, { "epoch": 77.04, "learning_rate": 1.1495513384586884e-05, "loss": 1.8525, "step": 26614000 }, { "epoch": 77.04, "learning_rate": 1.1494789736939608e-05, "loss": 1.873, "step": 26614500 }, { "epoch": 77.04, "learning_rate": 1.149406608929233e-05, "loss": 1.8453, "step": 26615000 }, { "epoch": 77.04, "learning_rate": 1.149334388894035e-05, "loss": 1.8805, "step": 26615500 }, { "epoch": 77.04, "learning_rate": 1.1492620241293072e-05, "loss": 1.8352, "step": 26616000 }, { "epoch": 77.04, "learning_rate": 1.149189804094109e-05, "loss": 1.839, "step": 26616500 }, { "epoch": 77.05, "learning_rate": 1.1491174393293813e-05, "loss": 1.8468, "step": 26617000 }, { "epoch": 77.05, "learning_rate": 1.1490450745646537e-05, "loss": 1.8459, "step": 26617500 }, { "epoch": 77.05, "learning_rate": 1.148972709799926e-05, "loss": 1.8616, "step": 26618000 }, { "epoch": 77.05, "learning_rate": 1.1489003450351983e-05, "loss": 1.84, "step": 26618500 }, { "epoch": 77.05, "learning_rate": 1.1488279802704705e-05, "loss": 1.8552, "step": 26619000 }, { "epoch": 77.05, "learning_rate": 1.1487556155057429e-05, "loss": 1.8368, "step": 26619500 }, { "epoch": 77.05, "learning_rate": 1.1486833954705448e-05, "loss": 1.8275, "step": 26620000 }, { "epoch": 77.06, "learning_rate": 1.148611030705817e-05, "loss": 1.84, "step": 26620500 }, { "epoch": 77.06, "learning_rate": 1.1485386659410892e-05, "loss": 1.8352, "step": 26621000 }, { "epoch": 77.06, "learning_rate": 1.1484663011763616e-05, "loss": 1.8724, "step": 26621500 }, { "epoch": 77.06, "learning_rate": 1.148393936411634e-05, "loss": 1.8649, "step": 26622000 }, { "epoch": 77.06, "learning_rate": 1.1483215716469063e-05, "loss": 1.8252, "step": 26622500 }, { "epoch": 77.06, "learning_rate": 1.1482492068821786e-05, "loss": 1.8386, "step": 26623000 }, { "epoch": 77.06, "learning_rate": 1.148176842117451e-05, "loss": 1.8563, "step": 26623500 }, { "epoch": 77.07, "learning_rate": 1.1481044773527233e-05, "loss": 1.8343, "step": 26624000 }, { "epoch": 77.07, "learning_rate": 1.1480321125879957e-05, "loss": 1.8526, "step": 26624500 }, { "epoch": 77.07, "learning_rate": 1.1479597478232679e-05, "loss": 1.8586, "step": 26625000 }, { "epoch": 77.07, "learning_rate": 1.1478873830585403e-05, "loss": 1.856, "step": 26625500 }, { "epoch": 77.07, "learning_rate": 1.147815163023342e-05, "loss": 1.8527, "step": 26626000 }, { "epoch": 77.07, "learning_rate": 1.1477429429881437e-05, "loss": 1.8461, "step": 26626500 }, { "epoch": 77.07, "learning_rate": 1.1476705782234161e-05, "loss": 1.8196, "step": 26627000 }, { "epoch": 77.08, "learning_rate": 1.1475982134586885e-05, "loss": 1.8384, "step": 26627500 }, { "epoch": 77.08, "learning_rate": 1.1475258486939607e-05, "loss": 1.8549, "step": 26628000 }, { "epoch": 77.08, "learning_rate": 1.1474534839292331e-05, "loss": 1.8742, "step": 26628500 }, { "epoch": 77.08, "learning_rate": 1.1473811191645053e-05, "loss": 1.858, "step": 26629000 }, { "epoch": 77.08, "learning_rate": 1.1473087543997777e-05, "loss": 1.8519, "step": 26629500 }, { "epoch": 77.08, "learning_rate": 1.1472363896350501e-05, "loss": 1.9013, "step": 26630000 }, { "epoch": 77.08, "learning_rate": 1.1471640248703224e-05, "loss": 1.8487, "step": 26630500 }, { "epoch": 77.09, "learning_rate": 1.1470916601055948e-05, "loss": 1.8731, "step": 26631000 }, { "epoch": 77.09, "learning_rate": 1.147019295340867e-05, "loss": 1.8489, "step": 26631500 }, { "epoch": 77.09, "learning_rate": 1.1469470753056689e-05, "loss": 1.855, "step": 26632000 }, { "epoch": 77.09, "learning_rate": 1.1468747105409411e-05, "loss": 1.8566, "step": 26632500 }, { "epoch": 77.09, "learning_rate": 1.1468023457762135e-05, "loss": 1.8449, "step": 26633000 }, { "epoch": 77.09, "learning_rate": 1.1467299810114859e-05, "loss": 1.8308, "step": 26633500 }, { "epoch": 77.09, "learning_rate": 1.1466576162467581e-05, "loss": 1.8667, "step": 26634000 }, { "epoch": 77.1, "learning_rate": 1.1465853962115598e-05, "loss": 1.8448, "step": 26634500 }, { "epoch": 77.1, "learning_rate": 1.1465130314468322e-05, "loss": 1.8316, "step": 26635000 }, { "epoch": 77.1, "learning_rate": 1.1464406666821046e-05, "loss": 1.863, "step": 26635500 }, { "epoch": 77.1, "learning_rate": 1.1463683019173768e-05, "loss": 1.8524, "step": 26636000 }, { "epoch": 77.1, "learning_rate": 1.1462959371526492e-05, "loss": 1.8693, "step": 26636500 }, { "epoch": 77.1, "learning_rate": 1.1462235723879214e-05, "loss": 1.8412, "step": 26637000 }, { "epoch": 77.1, "learning_rate": 1.1461512076231938e-05, "loss": 1.8501, "step": 26637500 }, { "epoch": 77.11, "learning_rate": 1.1460789875879956e-05, "loss": 1.8445, "step": 26638000 }, { "epoch": 77.11, "learning_rate": 1.146006622823268e-05, "loss": 1.8355, "step": 26638500 }, { "epoch": 77.11, "learning_rate": 1.1459342580585402e-05, "loss": 1.8431, "step": 26639000 }, { "epoch": 77.11, "learning_rate": 1.1458618932938126e-05, "loss": 1.8443, "step": 26639500 }, { "epoch": 77.11, "learning_rate": 1.1457895285290848e-05, "loss": 1.857, "step": 26640000 }, { "epoch": 77.11, "learning_rate": 1.1457173084938867e-05, "loss": 1.8434, "step": 26640500 }, { "epoch": 77.11, "learning_rate": 1.1456449437291591e-05, "loss": 1.8604, "step": 26641000 }, { "epoch": 77.12, "learning_rate": 1.1455725789644313e-05, "loss": 1.8623, "step": 26641500 }, { "epoch": 77.12, "learning_rate": 1.1455002141997037e-05, "loss": 1.8826, "step": 26642000 }, { "epoch": 77.12, "learning_rate": 1.1454279941645054e-05, "loss": 1.8498, "step": 26642500 }, { "epoch": 77.12, "learning_rate": 1.1453556293997778e-05, "loss": 1.8425, "step": 26643000 }, { "epoch": 77.12, "learning_rate": 1.14528326463505e-05, "loss": 1.8658, "step": 26643500 }, { "epoch": 77.12, "learning_rate": 1.1452108998703224e-05, "loss": 1.8648, "step": 26644000 }, { "epoch": 77.12, "learning_rate": 1.1451385351055947e-05, "loss": 1.8665, "step": 26644500 }, { "epoch": 77.13, "learning_rate": 1.145066170340867e-05, "loss": 1.8553, "step": 26645000 }, { "epoch": 77.13, "learning_rate": 1.1449938055761393e-05, "loss": 1.8279, "step": 26645500 }, { "epoch": 77.13, "learning_rate": 1.1449215855409412e-05, "loss": 1.8487, "step": 26646000 }, { "epoch": 77.13, "learning_rate": 1.1448492207762136e-05, "loss": 1.861, "step": 26646500 }, { "epoch": 77.13, "learning_rate": 1.1447768560114858e-05, "loss": 1.856, "step": 26647000 }, { "epoch": 77.13, "learning_rate": 1.144704491246758e-05, "loss": 1.8537, "step": 26647500 }, { "epoch": 77.14, "learning_rate": 1.1446321264820304e-05, "loss": 1.8652, "step": 26648000 }, { "epoch": 77.14, "learning_rate": 1.1445597617173028e-05, "loss": 1.8505, "step": 26648500 }, { "epoch": 77.14, "learning_rate": 1.1444875416821045e-05, "loss": 1.8762, "step": 26649000 }, { "epoch": 77.14, "learning_rate": 1.1444151769173769e-05, "loss": 1.8629, "step": 26649500 }, { "epoch": 77.14, "learning_rate": 1.1443428121526491e-05, "loss": 1.8813, "step": 26650000 }, { "epoch": 77.14, "learning_rate": 1.1442704473879215e-05, "loss": 1.8561, "step": 26650500 }, { "epoch": 77.14, "learning_rate": 1.1441980826231938e-05, "loss": 1.8491, "step": 26651000 }, { "epoch": 77.15, "learning_rate": 1.1441257178584661e-05, "loss": 1.828, "step": 26651500 }, { "epoch": 77.15, "learning_rate": 1.144053497823268e-05, "loss": 1.8484, "step": 26652000 }, { "epoch": 77.15, "learning_rate": 1.1439811330585403e-05, "loss": 1.8672, "step": 26652500 }, { "epoch": 77.15, "learning_rate": 1.1439087682938125e-05, "loss": 1.8667, "step": 26653000 }, { "epoch": 77.15, "learning_rate": 1.1438364035290849e-05, "loss": 1.8701, "step": 26653500 }, { "epoch": 77.15, "learning_rate": 1.1437640387643573e-05, "loss": 1.863, "step": 26654000 }, { "epoch": 77.15, "learning_rate": 1.143691818729159e-05, "loss": 1.8567, "step": 26654500 }, { "epoch": 77.16, "learning_rate": 1.1436194539644312e-05, "loss": 1.8461, "step": 26655000 }, { "epoch": 77.16, "learning_rate": 1.1435470891997036e-05, "loss": 1.8648, "step": 26655500 }, { "epoch": 77.16, "learning_rate": 1.143474724434976e-05, "loss": 1.8846, "step": 26656000 }, { "epoch": 77.16, "learning_rate": 1.1434023596702482e-05, "loss": 1.8364, "step": 26656500 }, { "epoch": 77.16, "learning_rate": 1.1433299949055206e-05, "loss": 1.842, "step": 26657000 }, { "epoch": 77.16, "learning_rate": 1.1432577748703223e-05, "loss": 1.8567, "step": 26657500 }, { "epoch": 77.16, "learning_rate": 1.1431854101055947e-05, "loss": 1.871, "step": 26658000 }, { "epoch": 77.17, "learning_rate": 1.143113045340867e-05, "loss": 1.8392, "step": 26658500 }, { "epoch": 77.17, "learning_rate": 1.1430406805761394e-05, "loss": 1.8353, "step": 26659000 }, { "epoch": 77.17, "learning_rate": 1.1429684605409413e-05, "loss": 1.8634, "step": 26659500 }, { "epoch": 77.17, "learning_rate": 1.1428960957762135e-05, "loss": 1.8455, "step": 26660000 }, { "epoch": 77.17, "learning_rate": 1.1428237310114857e-05, "loss": 1.853, "step": 26660500 }, { "epoch": 77.17, "learning_rate": 1.1427513662467581e-05, "loss": 1.846, "step": 26661000 }, { "epoch": 77.17, "learning_rate": 1.1426790014820305e-05, "loss": 1.829, "step": 26661500 }, { "epoch": 77.18, "learning_rate": 1.1426066367173027e-05, "loss": 1.8467, "step": 26662000 }, { "epoch": 77.18, "learning_rate": 1.1425342719525751e-05, "loss": 1.8534, "step": 26662500 }, { "epoch": 77.18, "learning_rate": 1.1424619071878475e-05, "loss": 1.8474, "step": 26663000 }, { "epoch": 77.18, "learning_rate": 1.1423895424231197e-05, "loss": 1.8378, "step": 26663500 }, { "epoch": 77.18, "learning_rate": 1.142317177658392e-05, "loss": 1.8449, "step": 26664000 }, { "epoch": 77.18, "learning_rate": 1.1422448128936645e-05, "loss": 1.8416, "step": 26664500 }, { "epoch": 77.18, "learning_rate": 1.1421725928584662e-05, "loss": 1.8551, "step": 26665000 }, { "epoch": 77.19, "learning_rate": 1.1421002280937384e-05, "loss": 1.8446, "step": 26665500 }, { "epoch": 77.19, "learning_rate": 1.1420278633290107e-05, "loss": 1.856, "step": 26666000 }, { "epoch": 77.19, "learning_rate": 1.1419554985642832e-05, "loss": 1.883, "step": 26666500 }, { "epoch": 77.19, "learning_rate": 1.1418831337995555e-05, "loss": 1.8538, "step": 26667000 }, { "epoch": 77.19, "learning_rate": 1.1418107690348277e-05, "loss": 1.8436, "step": 26667500 }, { "epoch": 77.19, "learning_rate": 1.1417385489996294e-05, "loss": 1.8202, "step": 26668000 }, { "epoch": 77.19, "learning_rate": 1.141666184234902e-05, "loss": 1.859, "step": 26668500 }, { "epoch": 77.2, "learning_rate": 1.1415938194701742e-05, "loss": 1.8767, "step": 26669000 }, { "epoch": 77.2, "learning_rate": 1.1415214547054464e-05, "loss": 1.8258, "step": 26669500 }, { "epoch": 77.2, "learning_rate": 1.1414490899407188e-05, "loss": 1.8518, "step": 26670000 }, { "epoch": 77.2, "learning_rate": 1.1413767251759912e-05, "loss": 1.8706, "step": 26670500 }, { "epoch": 77.2, "learning_rate": 1.141304505140793e-05, "loss": 1.8631, "step": 26671000 }, { "epoch": 77.2, "learning_rate": 1.1412321403760651e-05, "loss": 1.8728, "step": 26671500 }, { "epoch": 77.2, "learning_rate": 1.1411597756113377e-05, "loss": 1.8651, "step": 26672000 }, { "epoch": 77.21, "learning_rate": 1.1410875555761394e-05, "loss": 1.8367, "step": 26672500 }, { "epoch": 77.21, "learning_rate": 1.1410151908114117e-05, "loss": 1.8775, "step": 26673000 }, { "epoch": 77.21, "learning_rate": 1.1409428260466839e-05, "loss": 1.8556, "step": 26673500 }, { "epoch": 77.21, "learning_rate": 1.1408704612819564e-05, "loss": 1.8777, "step": 26674000 }, { "epoch": 77.21, "learning_rate": 1.1407980965172287e-05, "loss": 1.8685, "step": 26674500 }, { "epoch": 77.21, "learning_rate": 1.1407258764820304e-05, "loss": 1.8774, "step": 26675000 }, { "epoch": 77.21, "learning_rate": 1.1406535117173026e-05, "loss": 1.8563, "step": 26675500 }, { "epoch": 77.22, "learning_rate": 1.1405811469525752e-05, "loss": 1.8709, "step": 26676000 }, { "epoch": 77.22, "learning_rate": 1.1405087821878474e-05, "loss": 1.8513, "step": 26676500 }, { "epoch": 77.22, "learning_rate": 1.1404365621526491e-05, "loss": 1.8551, "step": 26677000 }, { "epoch": 77.22, "learning_rate": 1.1403641973879215e-05, "loss": 1.8553, "step": 26677500 }, { "epoch": 77.22, "learning_rate": 1.1402918326231939e-05, "loss": 1.8632, "step": 26678000 }, { "epoch": 77.22, "learning_rate": 1.1402194678584661e-05, "loss": 1.8199, "step": 26678500 }, { "epoch": 77.22, "learning_rate": 1.1401471030937384e-05, "loss": 1.8566, "step": 26679000 }, { "epoch": 77.23, "learning_rate": 1.140074738329011e-05, "loss": 1.8474, "step": 26679500 }, { "epoch": 77.23, "learning_rate": 1.1400023735642831e-05, "loss": 1.8421, "step": 26680000 }, { "epoch": 77.23, "learning_rate": 1.1399300087995554e-05, "loss": 1.8303, "step": 26680500 }, { "epoch": 77.23, "learning_rate": 1.1398576440348278e-05, "loss": 1.8806, "step": 26681000 }, { "epoch": 77.23, "learning_rate": 1.1397854239996297e-05, "loss": 1.8564, "step": 26681500 }, { "epoch": 77.23, "learning_rate": 1.1397130592349019e-05, "loss": 1.8689, "step": 26682000 }, { "epoch": 77.23, "learning_rate": 1.1396406944701741e-05, "loss": 1.8398, "step": 26682500 }, { "epoch": 77.24, "learning_rate": 1.1395683297054467e-05, "loss": 1.8482, "step": 26683000 }, { "epoch": 77.24, "learning_rate": 1.1394959649407189e-05, "loss": 1.8821, "step": 26683500 }, { "epoch": 77.24, "learning_rate": 1.1394236001759911e-05, "loss": 1.8405, "step": 26684000 }, { "epoch": 77.24, "learning_rate": 1.1393512354112635e-05, "loss": 1.858, "step": 26684500 }, { "epoch": 77.24, "learning_rate": 1.1392788706465359e-05, "loss": 1.8578, "step": 26685000 }, { "epoch": 77.24, "learning_rate": 1.1392065058818081e-05, "loss": 1.8447, "step": 26685500 }, { "epoch": 77.25, "learning_rate": 1.1391342858466098e-05, "loss": 1.8431, "step": 26686000 }, { "epoch": 77.25, "learning_rate": 1.1390620658114116e-05, "loss": 1.844, "step": 26686500 }, { "epoch": 77.25, "learning_rate": 1.1389897010466841e-05, "loss": 1.8568, "step": 26687000 }, { "epoch": 77.25, "learning_rate": 1.1389173362819564e-05, "loss": 1.8381, "step": 26687500 }, { "epoch": 77.25, "learning_rate": 1.1388449715172286e-05, "loss": 1.8394, "step": 26688000 }, { "epoch": 77.25, "learning_rate": 1.138772606752501e-05, "loss": 1.8456, "step": 26688500 }, { "epoch": 77.25, "learning_rate": 1.1387002419877734e-05, "loss": 1.8195, "step": 26689000 }, { "epoch": 77.26, "learning_rate": 1.1386278772230456e-05, "loss": 1.8327, "step": 26689500 }, { "epoch": 77.26, "learning_rate": 1.138555512458318e-05, "loss": 1.853, "step": 26690000 }, { "epoch": 77.26, "learning_rate": 1.1384832924231199e-05, "loss": 1.8706, "step": 26690500 }, { "epoch": 77.26, "learning_rate": 1.1384109276583921e-05, "loss": 1.8402, "step": 26691000 }, { "epoch": 77.26, "learning_rate": 1.1383385628936643e-05, "loss": 1.8339, "step": 26691500 }, { "epoch": 77.26, "learning_rate": 1.138266342858466e-05, "loss": 1.8438, "step": 26692000 }, { "epoch": 77.26, "learning_rate": 1.1381939780937386e-05, "loss": 1.8643, "step": 26692500 }, { "epoch": 77.27, "learning_rate": 1.1381216133290108e-05, "loss": 1.8396, "step": 26693000 }, { "epoch": 77.27, "learning_rate": 1.138049248564283e-05, "loss": 1.8639, "step": 26693500 }, { "epoch": 77.27, "learning_rate": 1.1379768837995554e-05, "loss": 1.8546, "step": 26694000 }, { "epoch": 77.27, "learning_rate": 1.1379045190348278e-05, "loss": 1.8815, "step": 26694500 }, { "epoch": 77.27, "learning_rate": 1.1378321542701e-05, "loss": 1.8603, "step": 26695000 }, { "epoch": 77.27, "learning_rate": 1.1377597895053725e-05, "loss": 1.8478, "step": 26695500 }, { "epoch": 77.27, "learning_rate": 1.1376875694701742e-05, "loss": 1.8589, "step": 26696000 }, { "epoch": 77.28, "learning_rate": 1.137615349434976e-05, "loss": 1.8424, "step": 26696500 }, { "epoch": 77.28, "learning_rate": 1.1375429846702483e-05, "loss": 1.8541, "step": 26697000 }, { "epoch": 77.28, "learning_rate": 1.13747076463505e-05, "loss": 1.8677, "step": 26697500 }, { "epoch": 77.28, "learning_rate": 1.1373983998703224e-05, "loss": 1.865, "step": 26698000 }, { "epoch": 77.28, "learning_rate": 1.1373260351055948e-05, "loss": 1.8551, "step": 26698500 }, { "epoch": 77.28, "learning_rate": 1.137253670340867e-05, "loss": 1.8615, "step": 26699000 }, { "epoch": 77.28, "learning_rate": 1.1371814503056688e-05, "loss": 1.8699, "step": 26699500 }, { "epoch": 77.29, "learning_rate": 1.1371090855409412e-05, "loss": 1.8649, "step": 26700000 }, { "epoch": 77.29, "learning_rate": 1.1370367207762135e-05, "loss": 1.8463, "step": 26700500 }, { "epoch": 77.29, "learning_rate": 1.1369643560114858e-05, "loss": 1.8593, "step": 26701000 }, { "epoch": 77.29, "learning_rate": 1.1368919912467582e-05, "loss": 1.8536, "step": 26701500 }, { "epoch": 77.29, "learning_rate": 1.1368196264820306e-05, "loss": 1.8731, "step": 26702000 }, { "epoch": 77.29, "learning_rate": 1.1367472617173028e-05, "loss": 1.846, "step": 26702500 }, { "epoch": 77.29, "learning_rate": 1.136674896952575e-05, "loss": 1.8588, "step": 26703000 }, { "epoch": 77.3, "learning_rate": 1.1366025321878474e-05, "loss": 1.8681, "step": 26703500 }, { "epoch": 77.3, "learning_rate": 1.1365301674231198e-05, "loss": 1.8663, "step": 26704000 }, { "epoch": 77.3, "learning_rate": 1.136457802658392e-05, "loss": 1.8834, "step": 26704500 }, { "epoch": 77.3, "learning_rate": 1.1363854378936644e-05, "loss": 1.8431, "step": 26705000 }, { "epoch": 77.3, "learning_rate": 1.1363130731289368e-05, "loss": 1.8564, "step": 26705500 }, { "epoch": 77.3, "learning_rate": 1.136240708364209e-05, "loss": 1.8593, "step": 26706000 }, { "epoch": 77.3, "learning_rate": 1.1361683435994812e-05, "loss": 1.8691, "step": 26706500 }, { "epoch": 77.31, "learning_rate": 1.1360959788347536e-05, "loss": 1.8598, "step": 26707000 }, { "epoch": 77.31, "learning_rate": 1.1360237587995555e-05, "loss": 1.8506, "step": 26707500 }, { "epoch": 77.31, "learning_rate": 1.1359513940348277e-05, "loss": 1.8584, "step": 26708000 }, { "epoch": 77.31, "learning_rate": 1.1358790292701001e-05, "loss": 1.862, "step": 26708500 }, { "epoch": 77.31, "learning_rate": 1.1358068092349019e-05, "loss": 1.8623, "step": 26709000 }, { "epoch": 77.31, "learning_rate": 1.1357344444701743e-05, "loss": 1.8617, "step": 26709500 }, { "epoch": 77.31, "learning_rate": 1.1356620797054465e-05, "loss": 1.8673, "step": 26710000 }, { "epoch": 77.32, "learning_rate": 1.1355897149407189e-05, "loss": 1.8374, "step": 26710500 }, { "epoch": 77.32, "learning_rate": 1.1355173501759913e-05, "loss": 1.8603, "step": 26711000 }, { "epoch": 77.32, "learning_rate": 1.1354449854112635e-05, "loss": 1.8777, "step": 26711500 }, { "epoch": 77.32, "learning_rate": 1.1353726206465357e-05, "loss": 1.8459, "step": 26712000 }, { "epoch": 77.32, "learning_rate": 1.1353004006113376e-05, "loss": 1.8536, "step": 26712500 }, { "epoch": 77.32, "learning_rate": 1.13522803584661e-05, "loss": 1.8679, "step": 26713000 }, { "epoch": 77.32, "learning_rate": 1.1351556710818822e-05, "loss": 1.8668, "step": 26713500 }, { "epoch": 77.33, "learning_rate": 1.1350833063171546e-05, "loss": 1.8817, "step": 26714000 }, { "epoch": 77.33, "learning_rate": 1.1350109415524268e-05, "loss": 1.8367, "step": 26714500 }, { "epoch": 77.33, "learning_rate": 1.1349385767876992e-05, "loss": 1.8791, "step": 26715000 }, { "epoch": 77.33, "learning_rate": 1.1348662120229715e-05, "loss": 1.8627, "step": 26715500 }, { "epoch": 77.33, "learning_rate": 1.1347938472582439e-05, "loss": 1.8672, "step": 26716000 }, { "epoch": 77.33, "learning_rate": 1.1347216272230456e-05, "loss": 1.8538, "step": 26716500 }, { "epoch": 77.33, "learning_rate": 1.134649262458318e-05, "loss": 1.8466, "step": 26717000 }, { "epoch": 77.34, "learning_rate": 1.1345768976935902e-05, "loss": 1.8663, "step": 26717500 }, { "epoch": 77.34, "learning_rate": 1.1345046776583921e-05, "loss": 1.8384, "step": 26718000 }, { "epoch": 77.34, "learning_rate": 1.1344323128936643e-05, "loss": 1.8527, "step": 26718500 }, { "epoch": 77.34, "learning_rate": 1.1343599481289367e-05, "loss": 1.8633, "step": 26719000 }, { "epoch": 77.34, "learning_rate": 1.1342875833642091e-05, "loss": 1.8222, "step": 26719500 }, { "epoch": 77.34, "learning_rate": 1.1342152185994813e-05, "loss": 1.8724, "step": 26720000 }, { "epoch": 77.34, "learning_rate": 1.1341428538347537e-05, "loss": 1.875, "step": 26720500 }, { "epoch": 77.35, "learning_rate": 1.1340706337995554e-05, "loss": 1.838, "step": 26721000 }, { "epoch": 77.35, "learning_rate": 1.1339984137643572e-05, "loss": 1.8572, "step": 26721500 }, { "epoch": 77.35, "learning_rate": 1.1339260489996296e-05, "loss": 1.8553, "step": 26722000 }, { "epoch": 77.35, "learning_rate": 1.133853684234902e-05, "loss": 1.861, "step": 26722500 }, { "epoch": 77.35, "learning_rate": 1.1337813194701742e-05, "loss": 1.8357, "step": 26723000 }, { "epoch": 77.35, "learning_rate": 1.1337089547054466e-05, "loss": 1.8312, "step": 26723500 }, { "epoch": 77.36, "learning_rate": 1.1336365899407188e-05, "loss": 1.8459, "step": 26724000 }, { "epoch": 77.36, "learning_rate": 1.1335643699055207e-05, "loss": 1.869, "step": 26724500 }, { "epoch": 77.36, "learning_rate": 1.1334920051407929e-05, "loss": 1.8604, "step": 26725000 }, { "epoch": 77.36, "learning_rate": 1.1334196403760653e-05, "loss": 1.859, "step": 26725500 }, { "epoch": 77.36, "learning_rate": 1.133347420340867e-05, "loss": 1.8623, "step": 26726000 }, { "epoch": 77.36, "learning_rate": 1.1332750555761394e-05, "loss": 1.8624, "step": 26726500 }, { "epoch": 77.36, "learning_rate": 1.1332026908114116e-05, "loss": 1.8469, "step": 26727000 }, { "epoch": 77.37, "learning_rate": 1.133130326046684e-05, "loss": 1.8508, "step": 26727500 }, { "epoch": 77.37, "learning_rate": 1.1330579612819563e-05, "loss": 1.8571, "step": 26728000 }, { "epoch": 77.37, "learning_rate": 1.1329855965172286e-05, "loss": 1.848, "step": 26728500 }, { "epoch": 77.37, "learning_rate": 1.132913231752501e-05, "loss": 1.8491, "step": 26729000 }, { "epoch": 77.37, "learning_rate": 1.1328408669877733e-05, "loss": 1.843, "step": 26729500 }, { "epoch": 77.37, "learning_rate": 1.1327685022230457e-05, "loss": 1.8803, "step": 26730000 }, { "epoch": 77.37, "learning_rate": 1.1326961374583179e-05, "loss": 1.8503, "step": 26730500 }, { "epoch": 77.38, "learning_rate": 1.1326237726935903e-05, "loss": 1.8529, "step": 26731000 }, { "epoch": 77.38, "learning_rate": 1.132551552658392e-05, "loss": 1.8765, "step": 26731500 }, { "epoch": 77.38, "learning_rate": 1.1324791878936644e-05, "loss": 1.8605, "step": 26732000 }, { "epoch": 77.38, "learning_rate": 1.1324068231289368e-05, "loss": 1.8481, "step": 26732500 }, { "epoch": 77.38, "learning_rate": 1.132334458364209e-05, "loss": 1.8544, "step": 26733000 }, { "epoch": 77.38, "learning_rate": 1.1322620935994814e-05, "loss": 1.8296, "step": 26733500 }, { "epoch": 77.38, "learning_rate": 1.1321897288347536e-05, "loss": 1.8591, "step": 26734000 }, { "epoch": 77.39, "learning_rate": 1.132117364070026e-05, "loss": 1.885, "step": 26734500 }, { "epoch": 77.39, "learning_rate": 1.1320449993052982e-05, "loss": 1.8674, "step": 26735000 }, { "epoch": 77.39, "learning_rate": 1.1319726345405706e-05, "loss": 1.8205, "step": 26735500 }, { "epoch": 77.39, "learning_rate": 1.131900269775843e-05, "loss": 1.8504, "step": 26736000 }, { "epoch": 77.39, "learning_rate": 1.1318280497406447e-05, "loss": 1.8635, "step": 26736500 }, { "epoch": 77.39, "learning_rate": 1.131755684975917e-05, "loss": 1.8581, "step": 26737000 }, { "epoch": 77.39, "learning_rate": 1.1316833202111894e-05, "loss": 1.8458, "step": 26737500 }, { "epoch": 77.4, "learning_rate": 1.1316109554464618e-05, "loss": 1.8363, "step": 26738000 }, { "epoch": 77.4, "learning_rate": 1.131538590681734e-05, "loss": 1.8382, "step": 26738500 }, { "epoch": 77.4, "learning_rate": 1.1314662259170064e-05, "loss": 1.8652, "step": 26739000 }, { "epoch": 77.4, "learning_rate": 1.1313938611522788e-05, "loss": 1.869, "step": 26739500 }, { "epoch": 77.4, "learning_rate": 1.1313216411170805e-05, "loss": 1.8601, "step": 26740000 }, { "epoch": 77.4, "learning_rate": 1.1312492763523527e-05, "loss": 1.8791, "step": 26740500 }, { "epoch": 77.4, "learning_rate": 1.1311769115876251e-05, "loss": 1.8413, "step": 26741000 }, { "epoch": 77.41, "learning_rate": 1.1311045468228975e-05, "loss": 1.8399, "step": 26741500 }, { "epoch": 77.41, "learning_rate": 1.1310321820581697e-05, "loss": 1.8488, "step": 26742000 }, { "epoch": 77.41, "learning_rate": 1.1309598172934421e-05, "loss": 1.8758, "step": 26742500 }, { "epoch": 77.41, "learning_rate": 1.1308874525287143e-05, "loss": 1.8421, "step": 26743000 }, { "epoch": 77.41, "learning_rate": 1.1308150877639867e-05, "loss": 1.8383, "step": 26743500 }, { "epoch": 77.41, "learning_rate": 1.1307428677287885e-05, "loss": 1.8662, "step": 26744000 }, { "epoch": 77.41, "learning_rate": 1.1306705029640609e-05, "loss": 1.844, "step": 26744500 }, { "epoch": 77.42, "learning_rate": 1.1305981381993332e-05, "loss": 1.8392, "step": 26745000 }, { "epoch": 77.42, "learning_rate": 1.130525918164135e-05, "loss": 1.8539, "step": 26745500 }, { "epoch": 77.42, "learning_rate": 1.1304535533994072e-05, "loss": 1.8247, "step": 26746000 }, { "epoch": 77.42, "learning_rate": 1.1303811886346796e-05, "loss": 1.849, "step": 26746500 }, { "epoch": 77.42, "learning_rate": 1.130308823869952e-05, "loss": 1.8617, "step": 26747000 }, { "epoch": 77.42, "learning_rate": 1.1302364591052242e-05, "loss": 1.8611, "step": 26747500 }, { "epoch": 77.42, "learning_rate": 1.1301640943404964e-05, "loss": 1.8359, "step": 26748000 }, { "epoch": 77.43, "learning_rate": 1.1300917295757688e-05, "loss": 1.8577, "step": 26748500 }, { "epoch": 77.43, "learning_rate": 1.1300193648110412e-05, "loss": 1.8469, "step": 26749000 }, { "epoch": 77.43, "learning_rate": 1.1299470000463134e-05, "loss": 1.8813, "step": 26749500 }, { "epoch": 77.43, "learning_rate": 1.1298746352815858e-05, "loss": 1.8403, "step": 26750000 }, { "epoch": 77.43, "learning_rate": 1.1298022705168582e-05, "loss": 1.8515, "step": 26750500 }, { "epoch": 77.43, "learning_rate": 1.1297299057521304e-05, "loss": 1.8551, "step": 26751000 }, { "epoch": 77.43, "learning_rate": 1.1296575409874027e-05, "loss": 1.8507, "step": 26751500 }, { "epoch": 77.44, "learning_rate": 1.1295853209522046e-05, "loss": 1.854, "step": 26752000 }, { "epoch": 77.44, "learning_rate": 1.129512956187477e-05, "loss": 1.8672, "step": 26752500 }, { "epoch": 77.44, "learning_rate": 1.1294405914227492e-05, "loss": 1.8542, "step": 26753000 }, { "epoch": 77.44, "learning_rate": 1.1293682266580216e-05, "loss": 1.851, "step": 26753500 }, { "epoch": 77.44, "learning_rate": 1.1292961513523528e-05, "loss": 1.8655, "step": 26754000 }, { "epoch": 77.44, "learning_rate": 1.1292237865876252e-05, "loss": 1.8694, "step": 26754500 }, { "epoch": 77.44, "learning_rate": 1.1291514218228974e-05, "loss": 1.8556, "step": 26755000 }, { "epoch": 77.45, "learning_rate": 1.1290790570581696e-05, "loss": 1.868, "step": 26755500 }, { "epoch": 77.45, "learning_rate": 1.129006692293442e-05, "loss": 1.8283, "step": 26756000 }, { "epoch": 77.45, "learning_rate": 1.1289343275287144e-05, "loss": 1.8666, "step": 26756500 }, { "epoch": 77.45, "learning_rate": 1.1288619627639866e-05, "loss": 1.8627, "step": 26757000 }, { "epoch": 77.45, "learning_rate": 1.128789597999259e-05, "loss": 1.857, "step": 26757500 }, { "epoch": 77.45, "learning_rate": 1.128717377964061e-05, "loss": 1.8634, "step": 26758000 }, { "epoch": 77.45, "learning_rate": 1.1286450131993332e-05, "loss": 1.8586, "step": 26758500 }, { "epoch": 77.46, "learning_rate": 1.1285726484346054e-05, "loss": 1.8385, "step": 26759000 }, { "epoch": 77.46, "learning_rate": 1.1285002836698778e-05, "loss": 1.8623, "step": 26759500 }, { "epoch": 77.46, "learning_rate": 1.128428208364209e-05, "loss": 1.8503, "step": 26760000 }, { "epoch": 77.46, "learning_rate": 1.1283558435994814e-05, "loss": 1.8636, "step": 26760500 }, { "epoch": 77.46, "learning_rate": 1.1282834788347536e-05, "loss": 1.859, "step": 26761000 }, { "epoch": 77.46, "learning_rate": 1.128211114070026e-05, "loss": 1.8517, "step": 26761500 }, { "epoch": 77.47, "learning_rate": 1.1281387493052984e-05, "loss": 1.8492, "step": 26762000 }, { "epoch": 77.47, "learning_rate": 1.1280663845405706e-05, "loss": 1.8719, "step": 26762500 }, { "epoch": 77.47, "learning_rate": 1.1279940197758428e-05, "loss": 1.8467, "step": 26763000 }, { "epoch": 77.47, "learning_rate": 1.1279216550111154e-05, "loss": 1.8626, "step": 26763500 }, { "epoch": 77.47, "learning_rate": 1.1278492902463876e-05, "loss": 1.8586, "step": 26764000 }, { "epoch": 77.47, "learning_rate": 1.1277770702111894e-05, "loss": 1.8467, "step": 26764500 }, { "epoch": 77.47, "learning_rate": 1.1277047054464616e-05, "loss": 1.8472, "step": 26765000 }, { "epoch": 77.48, "learning_rate": 1.1276323406817341e-05, "loss": 1.8572, "step": 26765500 }, { "epoch": 77.48, "learning_rate": 1.1275599759170064e-05, "loss": 1.8325, "step": 26766000 }, { "epoch": 77.48, "learning_rate": 1.1274876111522786e-05, "loss": 1.8524, "step": 26766500 }, { "epoch": 77.48, "learning_rate": 1.1274155358466098e-05, "loss": 1.8443, "step": 26767000 }, { "epoch": 77.48, "learning_rate": 1.1273431710818822e-05, "loss": 1.8642, "step": 26767500 }, { "epoch": 77.48, "learning_rate": 1.1272708063171546e-05, "loss": 1.889, "step": 26768000 }, { "epoch": 77.48, "learning_rate": 1.1271984415524268e-05, "loss": 1.8893, "step": 26768500 }, { "epoch": 77.49, "learning_rate": 1.127126366246758e-05, "loss": 1.8557, "step": 26769000 }, { "epoch": 77.49, "learning_rate": 1.1270540014820304e-05, "loss": 1.8565, "step": 26769500 }, { "epoch": 77.49, "learning_rate": 1.1269816367173028e-05, "loss": 1.8662, "step": 26770000 }, { "epoch": 77.49, "learning_rate": 1.126909271952575e-05, "loss": 1.8654, "step": 26770500 }, { "epoch": 77.49, "learning_rate": 1.1268369071878473e-05, "loss": 1.8625, "step": 26771000 }, { "epoch": 77.49, "learning_rate": 1.1267646871526492e-05, "loss": 1.8481, "step": 26771500 }, { "epoch": 77.49, "learning_rate": 1.1266923223879216e-05, "loss": 1.8527, "step": 26772000 }, { "epoch": 77.5, "learning_rate": 1.1266199576231938e-05, "loss": 1.8956, "step": 26772500 }, { "epoch": 77.5, "learning_rate": 1.126547592858466e-05, "loss": 1.8573, "step": 26773000 }, { "epoch": 77.5, "learning_rate": 1.1264752280937386e-05, "loss": 1.8385, "step": 26773500 }, { "epoch": 77.5, "learning_rate": 1.1264028633290108e-05, "loss": 1.8488, "step": 26774000 }, { "epoch": 77.5, "learning_rate": 1.126330498564283e-05, "loss": 1.8443, "step": 26774500 }, { "epoch": 77.5, "learning_rate": 1.1262581337995554e-05, "loss": 1.8725, "step": 26775000 }, { "epoch": 77.5, "learning_rate": 1.1261857690348278e-05, "loss": 1.8507, "step": 26775500 }, { "epoch": 77.51, "learning_rate": 1.1261134042701e-05, "loss": 1.8552, "step": 26776000 }, { "epoch": 77.51, "learning_rate": 1.1260410395053724e-05, "loss": 1.8398, "step": 26776500 }, { "epoch": 77.51, "learning_rate": 1.1259686747406448e-05, "loss": 1.8484, "step": 26777000 }, { "epoch": 77.51, "learning_rate": 1.1258964547054465e-05, "loss": 1.8671, "step": 26777500 }, { "epoch": 77.51, "learning_rate": 1.1258240899407188e-05, "loss": 1.8425, "step": 26778000 }, { "epoch": 77.51, "learning_rate": 1.1257517251759912e-05, "loss": 1.8285, "step": 26778500 }, { "epoch": 77.51, "learning_rate": 1.1256793604112636e-05, "loss": 1.8541, "step": 26779000 }, { "epoch": 77.52, "learning_rate": 1.1256069956465358e-05, "loss": 1.8639, "step": 26779500 }, { "epoch": 77.52, "learning_rate": 1.1255347756113375e-05, "loss": 1.8624, "step": 26780000 }, { "epoch": 77.52, "learning_rate": 1.1254624108466099e-05, "loss": 1.842, "step": 26780500 }, { "epoch": 77.52, "learning_rate": 1.1253900460818823e-05, "loss": 1.8552, "step": 26781000 }, { "epoch": 77.52, "learning_rate": 1.1253176813171545e-05, "loss": 1.8495, "step": 26781500 }, { "epoch": 77.52, "learning_rate": 1.1252453165524269e-05, "loss": 1.8491, "step": 26782000 }, { "epoch": 77.52, "learning_rate": 1.1251730965172288e-05, "loss": 1.8529, "step": 26782500 }, { "epoch": 77.53, "learning_rate": 1.125100731752501e-05, "loss": 1.8483, "step": 26783000 }, { "epoch": 77.53, "learning_rate": 1.1250285117173027e-05, "loss": 1.8736, "step": 26783500 }, { "epoch": 77.53, "learning_rate": 1.124956146952575e-05, "loss": 1.8443, "step": 26784000 }, { "epoch": 77.53, "learning_rate": 1.1248837821878475e-05, "loss": 1.8492, "step": 26784500 }, { "epoch": 77.53, "learning_rate": 1.1248114174231198e-05, "loss": 1.8426, "step": 26785000 }, { "epoch": 77.53, "learning_rate": 1.124739052658392e-05, "loss": 1.8525, "step": 26785500 }, { "epoch": 77.53, "learning_rate": 1.1246666878936644e-05, "loss": 1.8562, "step": 26786000 }, { "epoch": 77.54, "learning_rate": 1.1245943231289368e-05, "loss": 1.8518, "step": 26786500 }, { "epoch": 77.54, "learning_rate": 1.124521958364209e-05, "loss": 1.8441, "step": 26787000 }, { "epoch": 77.54, "learning_rate": 1.1244495935994814e-05, "loss": 1.8409, "step": 26787500 }, { "epoch": 77.54, "learning_rate": 1.1243772288347538e-05, "loss": 1.8538, "step": 26788000 }, { "epoch": 77.54, "learning_rate": 1.1243050087995555e-05, "loss": 1.8521, "step": 26788500 }, { "epoch": 77.54, "learning_rate": 1.1242326440348277e-05, "loss": 1.8576, "step": 26789000 }, { "epoch": 77.54, "learning_rate": 1.1241602792701001e-05, "loss": 1.8637, "step": 26789500 }, { "epoch": 77.55, "learning_rate": 1.1240879145053725e-05, "loss": 1.8376, "step": 26790000 }, { "epoch": 77.55, "learning_rate": 1.1240155497406447e-05, "loss": 1.8667, "step": 26790500 }, { "epoch": 77.55, "learning_rate": 1.123943184975917e-05, "loss": 1.8644, "step": 26791000 }, { "epoch": 77.55, "learning_rate": 1.1238708202111895e-05, "loss": 1.8728, "step": 26791500 }, { "epoch": 77.55, "learning_rate": 1.1237986001759912e-05, "loss": 1.8869, "step": 26792000 }, { "epoch": 77.55, "learning_rate": 1.1237262354112635e-05, "loss": 1.854, "step": 26792500 }, { "epoch": 77.55, "learning_rate": 1.1236538706465357e-05, "loss": 1.8665, "step": 26793000 }, { "epoch": 77.56, "learning_rate": 1.1235816506113376e-05, "loss": 1.8624, "step": 26793500 }, { "epoch": 77.56, "learning_rate": 1.12350928584661e-05, "loss": 1.9013, "step": 26794000 }, { "epoch": 77.56, "learning_rate": 1.1234369210818822e-05, "loss": 1.8901, "step": 26794500 }, { "epoch": 77.56, "learning_rate": 1.1233645563171546e-05, "loss": 1.8976, "step": 26795000 }, { "epoch": 77.56, "learning_rate": 1.1232923362819563e-05, "loss": 1.8982, "step": 26795500 }, { "epoch": 77.56, "learning_rate": 1.1232199715172287e-05, "loss": 1.8847, "step": 26796000 }, { "epoch": 77.56, "learning_rate": 1.1231477514820304e-05, "loss": 1.8771, "step": 26796500 }, { "epoch": 77.57, "learning_rate": 1.1230755314468322e-05, "loss": 1.8519, "step": 26797000 }, { "epoch": 77.57, "learning_rate": 1.1230031666821045e-05, "loss": 1.877, "step": 26797500 }, { "epoch": 77.57, "learning_rate": 1.122930801917377e-05, "loss": 1.8635, "step": 26798000 }, { "epoch": 77.57, "learning_rate": 1.1228584371526492e-05, "loss": 1.8662, "step": 26798500 }, { "epoch": 77.57, "learning_rate": 1.1227860723879214e-05, "loss": 1.8622, "step": 26799000 }, { "epoch": 77.57, "learning_rate": 1.122713707623194e-05, "loss": 1.8748, "step": 26799500 }, { "epoch": 77.58, "learning_rate": 1.1226413428584662e-05, "loss": 1.8348, "step": 26800000 }, { "epoch": 77.58, "learning_rate": 1.1225689780937384e-05, "loss": 1.8775, "step": 26800500 }, { "epoch": 77.58, "learning_rate": 1.1224966133290108e-05, "loss": 1.8468, "step": 26801000 }, { "epoch": 77.58, "learning_rate": 1.1224242485642832e-05, "loss": 1.8567, "step": 26801500 }, { "epoch": 77.58, "learning_rate": 1.1223518837995554e-05, "loss": 1.8756, "step": 26802000 }, { "epoch": 77.58, "learning_rate": 1.1222795190348278e-05, "loss": 1.8729, "step": 26802500 }, { "epoch": 77.58, "learning_rate": 1.1222071542701002e-05, "loss": 1.8523, "step": 26803000 }, { "epoch": 77.59, "learning_rate": 1.122134934234902e-05, "loss": 1.8231, "step": 26803500 }, { "epoch": 77.59, "learning_rate": 1.1220627141997036e-05, "loss": 1.8501, "step": 26804000 }, { "epoch": 77.59, "learning_rate": 1.1219903494349759e-05, "loss": 1.8722, "step": 26804500 }, { "epoch": 77.59, "learning_rate": 1.1219181293997778e-05, "loss": 1.8582, "step": 26805000 }, { "epoch": 77.59, "learning_rate": 1.1218457646350502e-05, "loss": 1.859, "step": 26805500 }, { "epoch": 77.59, "learning_rate": 1.1217733998703224e-05, "loss": 1.8518, "step": 26806000 }, { "epoch": 77.59, "learning_rate": 1.1217011798351241e-05, "loss": 1.8308, "step": 26806500 }, { "epoch": 77.6, "learning_rate": 1.1216288150703965e-05, "loss": 1.8419, "step": 26807000 }, { "epoch": 77.6, "learning_rate": 1.1215564503056689e-05, "loss": 1.8758, "step": 26807500 }, { "epoch": 77.6, "learning_rate": 1.1214840855409411e-05, "loss": 1.8722, "step": 26808000 }, { "epoch": 77.6, "learning_rate": 1.1214117207762135e-05, "loss": 1.8618, "step": 26808500 }, { "epoch": 77.6, "learning_rate": 1.1213393560114859e-05, "loss": 1.885, "step": 26809000 }, { "epoch": 77.6, "learning_rate": 1.1212669912467581e-05, "loss": 1.8516, "step": 26809500 }, { "epoch": 77.6, "learning_rate": 1.1211946264820303e-05, "loss": 1.8552, "step": 26810000 }, { "epoch": 77.61, "learning_rate": 1.1211222617173027e-05, "loss": 1.8516, "step": 26810500 }, { "epoch": 77.61, "learning_rate": 1.1210498969525751e-05, "loss": 1.8692, "step": 26811000 }, { "epoch": 77.61, "learning_rate": 1.1209775321878474e-05, "loss": 1.8666, "step": 26811500 }, { "epoch": 77.61, "learning_rate": 1.1209051674231197e-05, "loss": 1.8471, "step": 26812000 }, { "epoch": 77.61, "learning_rate": 1.1208328026583921e-05, "loss": 1.8649, "step": 26812500 }, { "epoch": 77.61, "learning_rate": 1.1207604378936644e-05, "loss": 1.849, "step": 26813000 }, { "epoch": 77.61, "learning_rate": 1.1206880731289368e-05, "loss": 1.8346, "step": 26813500 }, { "epoch": 77.62, "learning_rate": 1.120615708364209e-05, "loss": 1.8479, "step": 26814000 }, { "epoch": 77.62, "learning_rate": 1.1205433435994814e-05, "loss": 1.8604, "step": 26814500 }, { "epoch": 77.62, "learning_rate": 1.1204709788347536e-05, "loss": 1.849, "step": 26815000 }, { "epoch": 77.62, "learning_rate": 1.120398614070026e-05, "loss": 1.8491, "step": 26815500 }, { "epoch": 77.62, "learning_rate": 1.1203262493052984e-05, "loss": 1.85, "step": 26816000 }, { "epoch": 77.62, "learning_rate": 1.1202538845405706e-05, "loss": 1.8554, "step": 26816500 }, { "epoch": 77.62, "learning_rate": 1.1201816645053723e-05, "loss": 1.8645, "step": 26817000 }, { "epoch": 77.63, "learning_rate": 1.1201092997406447e-05, "loss": 1.8283, "step": 26817500 }, { "epoch": 77.63, "learning_rate": 1.1200369349759171e-05, "loss": 1.8257, "step": 26818000 }, { "epoch": 77.63, "learning_rate": 1.1199645702111893e-05, "loss": 1.8597, "step": 26818500 }, { "epoch": 77.63, "learning_rate": 1.1198922054464617e-05, "loss": 1.8615, "step": 26819000 }, { "epoch": 77.63, "learning_rate": 1.1198198406817341e-05, "loss": 1.8459, "step": 26819500 }, { "epoch": 77.63, "learning_rate": 1.1197474759170063e-05, "loss": 1.8675, "step": 26820000 }, { "epoch": 77.63, "learning_rate": 1.119675255881808e-05, "loss": 1.868, "step": 26820500 }, { "epoch": 77.64, "learning_rate": 1.1196028911170805e-05, "loss": 1.8616, "step": 26821000 }, { "epoch": 77.64, "learning_rate": 1.1195305263523529e-05, "loss": 1.8371, "step": 26821500 }, { "epoch": 77.64, "learning_rate": 1.119458161587625e-05, "loss": 1.871, "step": 26822000 }, { "epoch": 77.64, "learning_rate": 1.1193857968228975e-05, "loss": 1.8749, "step": 26822500 }, { "epoch": 77.64, "learning_rate": 1.1193134320581697e-05, "loss": 1.8608, "step": 26823000 }, { "epoch": 77.64, "learning_rate": 1.1192410672934421e-05, "loss": 1.8291, "step": 26823500 }, { "epoch": 77.64, "learning_rate": 1.1191687025287143e-05, "loss": 1.8739, "step": 26824000 }, { "epoch": 77.65, "learning_rate": 1.1190964824935162e-05, "loss": 1.8762, "step": 26824500 }, { "epoch": 77.65, "learning_rate": 1.1190241177287884e-05, "loss": 1.8846, "step": 26825000 }, { "epoch": 77.65, "learning_rate": 1.1189517529640608e-05, "loss": 1.8433, "step": 26825500 }, { "epoch": 77.65, "learning_rate": 1.1188793881993332e-05, "loss": 1.8762, "step": 26826000 }, { "epoch": 77.65, "learning_rate": 1.1188070234346054e-05, "loss": 1.8553, "step": 26826500 }, { "epoch": 77.65, "learning_rate": 1.1187346586698778e-05, "loss": 1.8358, "step": 26827000 }, { "epoch": 77.65, "learning_rate": 1.11866229390515e-05, "loss": 1.832, "step": 26827500 }, { "epoch": 77.66, "learning_rate": 1.1185899291404224e-05, "loss": 1.8748, "step": 26828000 }, { "epoch": 77.66, "learning_rate": 1.1185175643756947e-05, "loss": 1.847, "step": 26828500 }, { "epoch": 77.66, "learning_rate": 1.118445199610967e-05, "loss": 1.8591, "step": 26829000 }, { "epoch": 77.66, "learning_rate": 1.1183728348462395e-05, "loss": 1.8565, "step": 26829500 }, { "epoch": 77.66, "learning_rate": 1.1183006148110412e-05, "loss": 1.8407, "step": 26830000 }, { "epoch": 77.66, "learning_rate": 1.1182283947758429e-05, "loss": 1.8522, "step": 26830500 }, { "epoch": 77.66, "learning_rate": 1.1181560300111153e-05, "loss": 1.8481, "step": 26831000 }, { "epoch": 77.67, "learning_rate": 1.1180836652463877e-05, "loss": 1.8562, "step": 26831500 }, { "epoch": 77.67, "learning_rate": 1.1180113004816599e-05, "loss": 1.8505, "step": 26832000 }, { "epoch": 77.67, "learning_rate": 1.1179389357169323e-05, "loss": 1.8686, "step": 26832500 }, { "epoch": 77.67, "learning_rate": 1.1178665709522045e-05, "loss": 1.833, "step": 26833000 }, { "epoch": 77.67, "learning_rate": 1.117794206187477e-05, "loss": 1.8498, "step": 26833500 }, { "epoch": 77.67, "learning_rate": 1.1177218414227491e-05, "loss": 1.8889, "step": 26834000 }, { "epoch": 77.67, "learning_rate": 1.1176494766580215e-05, "loss": 1.8716, "step": 26834500 }, { "epoch": 77.68, "learning_rate": 1.117577111893294e-05, "loss": 1.8607, "step": 26835000 }, { "epoch": 77.68, "learning_rate": 1.1175048918580957e-05, "loss": 1.8764, "step": 26835500 }, { "epoch": 77.68, "learning_rate": 1.1174325270933679e-05, "loss": 1.8318, "step": 26836000 }, { "epoch": 77.68, "learning_rate": 1.1173601623286403e-05, "loss": 1.8676, "step": 26836500 }, { "epoch": 77.68, "learning_rate": 1.1172877975639127e-05, "loss": 1.8322, "step": 26837000 }, { "epoch": 77.68, "learning_rate": 1.1172155775287144e-05, "loss": 1.8336, "step": 26837500 }, { "epoch": 77.69, "learning_rate": 1.1171432127639866e-05, "loss": 1.8595, "step": 26838000 }, { "epoch": 77.69, "learning_rate": 1.117070847999259e-05, "loss": 1.8518, "step": 26838500 }, { "epoch": 77.69, "learning_rate": 1.1169986279640609e-05, "loss": 1.8841, "step": 26839000 }, { "epoch": 77.69, "learning_rate": 1.1169262631993331e-05, "loss": 1.8519, "step": 26839500 }, { "epoch": 77.69, "learning_rate": 1.1168538984346055e-05, "loss": 1.8459, "step": 26840000 }, { "epoch": 77.69, "learning_rate": 1.1167815336698777e-05, "loss": 1.8679, "step": 26840500 }, { "epoch": 77.69, "learning_rate": 1.1167091689051501e-05, "loss": 1.8369, "step": 26841000 }, { "epoch": 77.7, "learning_rate": 1.1166368041404224e-05, "loss": 1.8827, "step": 26841500 }, { "epoch": 77.7, "learning_rate": 1.1165644393756947e-05, "loss": 1.8565, "step": 26842000 }, { "epoch": 77.7, "learning_rate": 1.1164920746109671e-05, "loss": 1.8624, "step": 26842500 }, { "epoch": 77.7, "learning_rate": 1.1164198545757689e-05, "loss": 1.8741, "step": 26843000 }, { "epoch": 77.7, "learning_rate": 1.1163474898110411e-05, "loss": 1.868, "step": 26843500 }, { "epoch": 77.7, "learning_rate": 1.1162751250463135e-05, "loss": 1.8683, "step": 26844000 }, { "epoch": 77.7, "learning_rate": 1.1162027602815859e-05, "loss": 1.8433, "step": 26844500 }, { "epoch": 77.71, "learning_rate": 1.1161305402463876e-05, "loss": 1.8702, "step": 26845000 }, { "epoch": 77.71, "learning_rate": 1.1160581754816598e-05, "loss": 1.869, "step": 26845500 }, { "epoch": 77.71, "learning_rate": 1.1159858107169322e-05, "loss": 1.8317, "step": 26846000 }, { "epoch": 77.71, "learning_rate": 1.1159134459522046e-05, "loss": 1.8413, "step": 26846500 }, { "epoch": 77.71, "learning_rate": 1.1158412259170063e-05, "loss": 1.8595, "step": 26847000 }, { "epoch": 77.71, "learning_rate": 1.115769005881808e-05, "loss": 1.868, "step": 26847500 }, { "epoch": 77.71, "learning_rate": 1.1156966411170805e-05, "loss": 1.8548, "step": 26848000 }, { "epoch": 77.72, "learning_rate": 1.1156242763523528e-05, "loss": 1.8438, "step": 26848500 }, { "epoch": 77.72, "learning_rate": 1.115551911587625e-05, "loss": 1.8691, "step": 26849000 }, { "epoch": 77.72, "learning_rate": 1.1154795468228975e-05, "loss": 1.8686, "step": 26849500 }, { "epoch": 77.72, "learning_rate": 1.1154073267876992e-05, "loss": 1.853, "step": 26850000 }, { "epoch": 77.72, "learning_rate": 1.1153349620229716e-05, "loss": 1.8512, "step": 26850500 }, { "epoch": 77.72, "learning_rate": 1.1152625972582438e-05, "loss": 1.8694, "step": 26851000 }, { "epoch": 77.72, "learning_rate": 1.1151903772230455e-05, "loss": 1.8622, "step": 26851500 }, { "epoch": 77.73, "learning_rate": 1.1151181571878474e-05, "loss": 1.8762, "step": 26852000 }, { "epoch": 77.73, "learning_rate": 1.1150457924231198e-05, "loss": 1.869, "step": 26852500 }, { "epoch": 77.73, "learning_rate": 1.114973427658392e-05, "loss": 1.8506, "step": 26853000 }, { "epoch": 77.73, "learning_rate": 1.1149010628936643e-05, "loss": 1.8814, "step": 26853500 }, { "epoch": 77.73, "learning_rate": 1.1148288428584662e-05, "loss": 1.8578, "step": 26854000 }, { "epoch": 77.73, "learning_rate": 1.1147564780937385e-05, "loss": 1.851, "step": 26854500 }, { "epoch": 77.73, "learning_rate": 1.1146841133290108e-05, "loss": 1.8278, "step": 26855000 }, { "epoch": 77.74, "learning_rate": 1.1146117485642832e-05, "loss": 1.8481, "step": 26855500 }, { "epoch": 77.74, "learning_rate": 1.1145393837995554e-05, "loss": 1.853, "step": 26856000 }, { "epoch": 77.74, "learning_rate": 1.1144670190348278e-05, "loss": 1.8404, "step": 26856500 }, { "epoch": 77.74, "learning_rate": 1.1143946542701e-05, "loss": 1.8478, "step": 26857000 }, { "epoch": 77.74, "learning_rate": 1.1143222895053724e-05, "loss": 1.8591, "step": 26857500 }, { "epoch": 77.74, "learning_rate": 1.1142499247406448e-05, "loss": 1.8584, "step": 26858000 }, { "epoch": 77.74, "learning_rate": 1.114177559975917e-05, "loss": 1.8927, "step": 26858500 }, { "epoch": 77.75, "learning_rate": 1.1141051952111894e-05, "loss": 1.8465, "step": 26859000 }, { "epoch": 77.75, "learning_rate": 1.1140328304464618e-05, "loss": 1.8606, "step": 26859500 }, { "epoch": 77.75, "learning_rate": 1.113960465681734e-05, "loss": 1.8526, "step": 26860000 }, { "epoch": 77.75, "learning_rate": 1.1138881009170062e-05, "loss": 1.8315, "step": 26860500 }, { "epoch": 77.75, "learning_rate": 1.1138157361522786e-05, "loss": 1.8575, "step": 26861000 }, { "epoch": 77.75, "learning_rate": 1.113743371387551e-05, "loss": 1.8759, "step": 26861500 }, { "epoch": 77.75, "learning_rate": 1.1136710066228233e-05, "loss": 1.8511, "step": 26862000 }, { "epoch": 77.76, "learning_rate": 1.113598786587625e-05, "loss": 1.8846, "step": 26862500 }, { "epoch": 77.76, "learning_rate": 1.1135264218228975e-05, "loss": 1.8617, "step": 26863000 }, { "epoch": 77.76, "learning_rate": 1.1134543465172288e-05, "loss": 1.8516, "step": 26863500 }, { "epoch": 77.76, "learning_rate": 1.113381981752501e-05, "loss": 1.8578, "step": 26864000 }, { "epoch": 77.76, "learning_rate": 1.1133096169877732e-05, "loss": 1.8552, "step": 26864500 }, { "epoch": 77.76, "learning_rate": 1.1132372522230456e-05, "loss": 1.857, "step": 26865000 }, { "epoch": 77.76, "learning_rate": 1.113164887458318e-05, "loss": 1.846, "step": 26865500 }, { "epoch": 77.77, "learning_rate": 1.1130925226935902e-05, "loss": 1.8418, "step": 26866000 }, { "epoch": 77.77, "learning_rate": 1.1130201579288626e-05, "loss": 1.8635, "step": 26866500 }, { "epoch": 77.77, "learning_rate": 1.112947793164135e-05, "loss": 1.843, "step": 26867000 }, { "epoch": 77.77, "learning_rate": 1.1128754283994072e-05, "loss": 1.8633, "step": 26867500 }, { "epoch": 77.77, "learning_rate": 1.1128030636346795e-05, "loss": 1.8564, "step": 26868000 }, { "epoch": 77.77, "learning_rate": 1.112730698869952e-05, "loss": 1.8944, "step": 26868500 }, { "epoch": 77.77, "learning_rate": 1.1126583341052242e-05, "loss": 1.858, "step": 26869000 }, { "epoch": 77.78, "learning_rate": 1.1125859693404965e-05, "loss": 1.8713, "step": 26869500 }, { "epoch": 77.78, "learning_rate": 1.1125136045757689e-05, "loss": 1.8647, "step": 26870000 }, { "epoch": 77.78, "learning_rate": 1.1124412398110412e-05, "loss": 1.846, "step": 26870500 }, { "epoch": 77.78, "learning_rate": 1.1123688750463135e-05, "loss": 1.8564, "step": 26871000 }, { "epoch": 77.78, "learning_rate": 1.1122965102815857e-05, "loss": 1.8668, "step": 26871500 }, { "epoch": 77.78, "learning_rate": 1.1122241455168583e-05, "loss": 1.8485, "step": 26872000 }, { "epoch": 77.78, "learning_rate": 1.11215192548166e-05, "loss": 1.8684, "step": 26872500 }, { "epoch": 77.79, "learning_rate": 1.1120797054464617e-05, "loss": 1.8701, "step": 26873000 }, { "epoch": 77.79, "learning_rate": 1.1120074854112634e-05, "loss": 1.8706, "step": 26873500 }, { "epoch": 77.79, "learning_rate": 1.1119351206465357e-05, "loss": 1.8418, "step": 26874000 }, { "epoch": 77.79, "learning_rate": 1.1118627558818082e-05, "loss": 1.8427, "step": 26874500 }, { "epoch": 77.79, "learning_rate": 1.1117903911170804e-05, "loss": 1.8457, "step": 26875000 }, { "epoch": 77.79, "learning_rate": 1.1117180263523527e-05, "loss": 1.8569, "step": 26875500 }, { "epoch": 77.8, "learning_rate": 1.1116456615876252e-05, "loss": 1.8598, "step": 26876000 }, { "epoch": 77.8, "learning_rate": 1.1115732968228974e-05, "loss": 1.8642, "step": 26876500 }, { "epoch": 77.8, "learning_rate": 1.1115009320581697e-05, "loss": 1.8394, "step": 26877000 }, { "epoch": 77.8, "learning_rate": 1.111428567293442e-05, "loss": 1.8691, "step": 26877500 }, { "epoch": 77.8, "learning_rate": 1.1113562025287145e-05, "loss": 1.8487, "step": 26878000 }, { "epoch": 77.8, "learning_rate": 1.1112839824935162e-05, "loss": 1.8713, "step": 26878500 }, { "epoch": 77.8, "learning_rate": 1.1112116177287884e-05, "loss": 1.8688, "step": 26879000 }, { "epoch": 77.81, "learning_rate": 1.1111392529640608e-05, "loss": 1.8616, "step": 26879500 }, { "epoch": 77.81, "learning_rate": 1.1110668881993332e-05, "loss": 1.8614, "step": 26880000 }, { "epoch": 77.81, "learning_rate": 1.1109945234346054e-05, "loss": 1.8415, "step": 26880500 }, { "epoch": 77.81, "learning_rate": 1.1109221586698778e-05, "loss": 1.86, "step": 26881000 }, { "epoch": 77.81, "learning_rate": 1.1108497939051502e-05, "loss": 1.8536, "step": 26881500 }, { "epoch": 77.81, "learning_rate": 1.1107774291404224e-05, "loss": 1.8554, "step": 26882000 }, { "epoch": 77.81, "learning_rate": 1.1107050643756946e-05, "loss": 1.8293, "step": 26882500 }, { "epoch": 77.82, "learning_rate": 1.1106326996109672e-05, "loss": 1.8439, "step": 26883000 }, { "epoch": 77.82, "learning_rate": 1.1105603348462394e-05, "loss": 1.8672, "step": 26883500 }, { "epoch": 77.82, "learning_rate": 1.1104879700815117e-05, "loss": 1.8651, "step": 26884000 }, { "epoch": 77.82, "learning_rate": 1.1104157500463134e-05, "loss": 1.8698, "step": 26884500 }, { "epoch": 77.82, "learning_rate": 1.110343385281586e-05, "loss": 1.8687, "step": 26885000 }, { "epoch": 77.82, "learning_rate": 1.1102710205168582e-05, "loss": 1.8632, "step": 26885500 }, { "epoch": 77.82, "learning_rate": 1.1101986557521304e-05, "loss": 1.8501, "step": 26886000 }, { "epoch": 77.83, "learning_rate": 1.1101262909874028e-05, "loss": 1.8759, "step": 26886500 }, { "epoch": 77.83, "learning_rate": 1.1100539262226752e-05, "loss": 1.8598, "step": 26887000 }, { "epoch": 77.83, "learning_rate": 1.1099817061874769e-05, "loss": 1.8599, "step": 26887500 }, { "epoch": 77.83, "learning_rate": 1.1099094861522786e-05, "loss": 1.8236, "step": 26888000 }, { "epoch": 77.83, "learning_rate": 1.109837121387551e-05, "loss": 1.8398, "step": 26888500 }, { "epoch": 77.83, "learning_rate": 1.1097647566228234e-05, "loss": 1.86, "step": 26889000 }, { "epoch": 77.83, "learning_rate": 1.1096923918580956e-05, "loss": 1.8489, "step": 26889500 }, { "epoch": 77.84, "learning_rate": 1.1096200270933679e-05, "loss": 1.8538, "step": 26890000 }, { "epoch": 77.84, "learning_rate": 1.1095476623286404e-05, "loss": 1.8502, "step": 26890500 }, { "epoch": 77.84, "learning_rate": 1.1094754422934421e-05, "loss": 1.8418, "step": 26891000 }, { "epoch": 77.84, "learning_rate": 1.1094030775287144e-05, "loss": 1.828, "step": 26891500 }, { "epoch": 77.84, "learning_rate": 1.1093307127639866e-05, "loss": 1.8424, "step": 26892000 }, { "epoch": 77.84, "learning_rate": 1.1092583479992592e-05, "loss": 1.8621, "step": 26892500 }, { "epoch": 77.84, "learning_rate": 1.1091861279640609e-05, "loss": 1.8521, "step": 26893000 }, { "epoch": 77.85, "learning_rate": 1.1091137631993331e-05, "loss": 1.857, "step": 26893500 }, { "epoch": 77.85, "learning_rate": 1.1090413984346055e-05, "loss": 1.8515, "step": 26894000 }, { "epoch": 77.85, "learning_rate": 1.1089690336698779e-05, "loss": 1.8672, "step": 26894500 }, { "epoch": 77.85, "learning_rate": 1.1088966689051501e-05, "loss": 1.8505, "step": 26895000 }, { "epoch": 77.85, "learning_rate": 1.1088243041404223e-05, "loss": 1.843, "step": 26895500 }, { "epoch": 77.85, "learning_rate": 1.1087519393756947e-05, "loss": 1.8663, "step": 26896000 }, { "epoch": 77.85, "learning_rate": 1.1086795746109671e-05, "loss": 1.8416, "step": 26896500 }, { "epoch": 77.86, "learning_rate": 1.1086072098462393e-05, "loss": 1.8658, "step": 26897000 }, { "epoch": 77.86, "learning_rate": 1.108534989811041e-05, "loss": 1.8456, "step": 26897500 }, { "epoch": 77.86, "learning_rate": 1.1084626250463136e-05, "loss": 1.8528, "step": 26898000 }, { "epoch": 77.86, "learning_rate": 1.1083902602815859e-05, "loss": 1.8592, "step": 26898500 }, { "epoch": 77.86, "learning_rate": 1.108317895516858e-05, "loss": 1.8819, "step": 26899000 }, { "epoch": 77.86, "learning_rate": 1.10824567548166e-05, "loss": 1.859, "step": 26899500 }, { "epoch": 77.86, "learning_rate": 1.1081733107169324e-05, "loss": 1.8413, "step": 26900000 }, { "epoch": 77.87, "learning_rate": 1.1081010906817341e-05, "loss": 1.8544, "step": 26900500 }, { "epoch": 77.87, "learning_rate": 1.1080287259170063e-05, "loss": 1.8467, "step": 26901000 }, { "epoch": 77.87, "learning_rate": 1.1079563611522787e-05, "loss": 1.8647, "step": 26901500 }, { "epoch": 77.87, "learning_rate": 1.1078839963875511e-05, "loss": 1.851, "step": 26902000 }, { "epoch": 77.87, "learning_rate": 1.1078116316228233e-05, "loss": 1.8216, "step": 26902500 }, { "epoch": 77.87, "learning_rate": 1.1077392668580955e-05, "loss": 1.8643, "step": 26903000 }, { "epoch": 77.87, "learning_rate": 1.107666902093368e-05, "loss": 1.8459, "step": 26903500 }, { "epoch": 77.88, "learning_rate": 1.1075945373286403e-05, "loss": 1.8799, "step": 26904000 }, { "epoch": 77.88, "learning_rate": 1.1075221725639126e-05, "loss": 1.8937, "step": 26904500 }, { "epoch": 77.88, "learning_rate": 1.107449807799185e-05, "loss": 1.8547, "step": 26905000 }, { "epoch": 77.88, "learning_rate": 1.1073774430344573e-05, "loss": 1.8757, "step": 26905500 }, { "epoch": 77.88, "learning_rate": 1.1073050782697296e-05, "loss": 1.8254, "step": 26906000 }, { "epoch": 77.88, "learning_rate": 1.107232713505002e-05, "loss": 1.8393, "step": 26906500 }, { "epoch": 77.88, "learning_rate": 1.1071604934698037e-05, "loss": 1.8518, "step": 26907000 }, { "epoch": 77.89, "learning_rate": 1.107088128705076e-05, "loss": 1.8539, "step": 26907500 }, { "epoch": 77.89, "learning_rate": 1.1070157639403483e-05, "loss": 1.8607, "step": 26908000 }, { "epoch": 77.89, "learning_rate": 1.1069433991756207e-05, "loss": 1.8342, "step": 26908500 }, { "epoch": 77.89, "learning_rate": 1.106871034410893e-05, "loss": 1.8753, "step": 26909000 }, { "epoch": 77.89, "learning_rate": 1.1067986696461653e-05, "loss": 1.8861, "step": 26909500 }, { "epoch": 77.89, "learning_rate": 1.1067263048814375e-05, "loss": 1.8516, "step": 26910000 }, { "epoch": 77.89, "learning_rate": 1.10665394011671e-05, "loss": 1.8652, "step": 26910500 }, { "epoch": 77.9, "learning_rate": 1.1065817200815118e-05, "loss": 1.8709, "step": 26911000 }, { "epoch": 77.9, "learning_rate": 1.1065095000463135e-05, "loss": 1.8607, "step": 26911500 }, { "epoch": 77.9, "learning_rate": 1.1064371352815858e-05, "loss": 1.8362, "step": 26912000 }, { "epoch": 77.9, "learning_rate": 1.1063647705168582e-05, "loss": 1.8716, "step": 26912500 }, { "epoch": 77.9, "learning_rate": 1.1062924057521305e-05, "loss": 1.8759, "step": 26913000 }, { "epoch": 77.9, "learning_rate": 1.1062200409874028e-05, "loss": 1.8969, "step": 26913500 }, { "epoch": 77.91, "learning_rate": 1.1061478209522045e-05, "loss": 1.8717, "step": 26914000 }, { "epoch": 77.91, "learning_rate": 1.1060754561874769e-05, "loss": 1.8338, "step": 26914500 }, { "epoch": 77.91, "learning_rate": 1.1060030914227493e-05, "loss": 1.845, "step": 26915000 }, { "epoch": 77.91, "learning_rate": 1.1059307266580215e-05, "loss": 1.8385, "step": 26915500 }, { "epoch": 77.91, "learning_rate": 1.1058583618932939e-05, "loss": 1.8746, "step": 26916000 }, { "epoch": 77.91, "learning_rate": 1.1057859971285661e-05, "loss": 1.8664, "step": 26916500 }, { "epoch": 77.91, "learning_rate": 1.1057136323638385e-05, "loss": 1.8634, "step": 26917000 }, { "epoch": 77.92, "learning_rate": 1.1056412675991109e-05, "loss": 1.8668, "step": 26917500 }, { "epoch": 77.92, "learning_rate": 1.1055690475639126e-05, "loss": 1.8493, "step": 26918000 }, { "epoch": 77.92, "learning_rate": 1.1054968275287144e-05, "loss": 1.8769, "step": 26918500 }, { "epoch": 77.92, "learning_rate": 1.1054244627639868e-05, "loss": 1.8644, "step": 26919000 }, { "epoch": 77.92, "learning_rate": 1.105352097999259e-05, "loss": 1.8589, "step": 26919500 }, { "epoch": 77.92, "learning_rate": 1.1052797332345314e-05, "loss": 1.8493, "step": 26920000 }, { "epoch": 77.92, "learning_rate": 1.1052073684698038e-05, "loss": 1.8335, "step": 26920500 }, { "epoch": 77.93, "learning_rate": 1.105135003705076e-05, "loss": 1.8848, "step": 26921000 }, { "epoch": 77.93, "learning_rate": 1.1050626389403484e-05, "loss": 1.8847, "step": 26921500 }, { "epoch": 77.93, "learning_rate": 1.1049902741756206e-05, "loss": 1.8451, "step": 26922000 }, { "epoch": 77.93, "learning_rate": 1.104917909410893e-05, "loss": 1.8301, "step": 26922500 }, { "epoch": 77.93, "learning_rate": 1.1048456893756947e-05, "loss": 1.8555, "step": 26923000 }, { "epoch": 77.93, "learning_rate": 1.1047733246109671e-05, "loss": 1.8679, "step": 26923500 }, { "epoch": 77.93, "learning_rate": 1.1047009598462393e-05, "loss": 1.8645, "step": 26924000 }, { "epoch": 77.94, "learning_rate": 1.1046285950815117e-05, "loss": 1.8339, "step": 26924500 }, { "epoch": 77.94, "learning_rate": 1.1045562303167841e-05, "loss": 1.861, "step": 26925000 }, { "epoch": 77.94, "learning_rate": 1.1044840102815858e-05, "loss": 1.88, "step": 26925500 }, { "epoch": 77.94, "learning_rate": 1.1044116455168582e-05, "loss": 1.856, "step": 26926000 }, { "epoch": 77.94, "learning_rate": 1.1043392807521305e-05, "loss": 1.8633, "step": 26926500 }, { "epoch": 77.94, "learning_rate": 1.1042670607169322e-05, "loss": 1.8378, "step": 26927000 }, { "epoch": 77.94, "learning_rate": 1.1041946959522046e-05, "loss": 1.8295, "step": 26927500 }, { "epoch": 77.95, "learning_rate": 1.104122331187477e-05, "loss": 1.853, "step": 26928000 }, { "epoch": 77.95, "learning_rate": 1.1040499664227492e-05, "loss": 1.8616, "step": 26928500 }, { "epoch": 77.95, "learning_rate": 1.103977746387551e-05, "loss": 1.868, "step": 26929000 }, { "epoch": 77.95, "learning_rate": 1.1039053816228233e-05, "loss": 1.8675, "step": 26929500 }, { "epoch": 77.95, "learning_rate": 1.1038330168580957e-05, "loss": 1.8545, "step": 26930000 }, { "epoch": 77.95, "learning_rate": 1.103760652093368e-05, "loss": 1.8513, "step": 26930500 }, { "epoch": 77.95, "learning_rate": 1.1036882873286403e-05, "loss": 1.852, "step": 26931000 }, { "epoch": 77.96, "learning_rate": 1.1036159225639125e-05, "loss": 1.8573, "step": 26931500 }, { "epoch": 77.96, "learning_rate": 1.1035437025287144e-05, "loss": 1.8652, "step": 26932000 }, { "epoch": 77.96, "learning_rate": 1.1034713377639867e-05, "loss": 1.8772, "step": 26932500 }, { "epoch": 77.96, "learning_rate": 1.103398972999259e-05, "loss": 1.8593, "step": 26933000 }, { "epoch": 77.96, "learning_rate": 1.1033266082345313e-05, "loss": 1.8787, "step": 26933500 }, { "epoch": 77.96, "learning_rate": 1.1032542434698037e-05, "loss": 1.8768, "step": 26934000 }, { "epoch": 77.96, "learning_rate": 1.1031820234346054e-05, "loss": 1.8561, "step": 26934500 }, { "epoch": 77.97, "learning_rate": 1.1031096586698778e-05, "loss": 1.8569, "step": 26935000 }, { "epoch": 77.97, "learning_rate": 1.1030372939051502e-05, "loss": 1.899, "step": 26935500 }, { "epoch": 77.97, "learning_rate": 1.1029649291404224e-05, "loss": 1.8649, "step": 26936000 }, { "epoch": 77.97, "learning_rate": 1.1028927091052243e-05, "loss": 1.8796, "step": 26936500 }, { "epoch": 77.97, "learning_rate": 1.1028203443404965e-05, "loss": 1.8431, "step": 26937000 }, { "epoch": 77.97, "learning_rate": 1.1027479795757689e-05, "loss": 1.8708, "step": 26937500 }, { "epoch": 77.97, "learning_rate": 1.1026756148110411e-05, "loss": 1.8479, "step": 26938000 }, { "epoch": 77.98, "learning_rate": 1.1026032500463135e-05, "loss": 1.8196, "step": 26938500 }, { "epoch": 77.98, "learning_rate": 1.1025310300111153e-05, "loss": 1.8642, "step": 26939000 }, { "epoch": 77.98, "learning_rate": 1.1024586652463876e-05, "loss": 1.8392, "step": 26939500 }, { "epoch": 77.98, "learning_rate": 1.1023863004816599e-05, "loss": 1.8438, "step": 26940000 }, { "epoch": 77.98, "learning_rate": 1.1023139357169323e-05, "loss": 1.8403, "step": 26940500 }, { "epoch": 77.98, "learning_rate": 1.1022415709522045e-05, "loss": 1.8621, "step": 26941000 }, { "epoch": 77.98, "learning_rate": 1.1021692061874769e-05, "loss": 1.8733, "step": 26941500 }, { "epoch": 77.99, "learning_rate": 1.1020968414227493e-05, "loss": 1.8721, "step": 26942000 }, { "epoch": 77.99, "learning_rate": 1.1020244766580215e-05, "loss": 1.8477, "step": 26942500 }, { "epoch": 77.99, "learning_rate": 1.1019524013523527e-05, "loss": 1.8744, "step": 26943000 }, { "epoch": 77.99, "learning_rate": 1.1018800365876251e-05, "loss": 1.8542, "step": 26943500 }, { "epoch": 77.99, "learning_rate": 1.1018078165524268e-05, "loss": 1.8418, "step": 26944000 }, { "epoch": 77.99, "learning_rate": 1.1017354517876992e-05, "loss": 1.8624, "step": 26944500 }, { "epoch": 77.99, "learning_rate": 1.1016630870229715e-05, "loss": 1.8503, "step": 26945000 }, { "epoch": 78.0, "learning_rate": 1.1015907222582438e-05, "loss": 1.8694, "step": 26945500 }, { "epoch": 78.0, "learning_rate": 1.1015185022230456e-05, "loss": 1.8187, "step": 26946000 }, { "epoch": 78.0, "learning_rate": 1.101446137458318e-05, "loss": 1.878, "step": 26946500 }, { "epoch": 78.0, "eval_accuracy": 0.6855696018358522, "eval_accuracy_mlm": 0.6541292855470305, "eval_accuracy_nsp": 0.8541818481802808, "eval_loss": 2.1763460636138916, "eval_runtime": 331.9937, "eval_samples_per_second": 1314.441, "eval_steps_per_second": 54.769, "step": 26946816 }, { "epoch": 78.0, "learning_rate": 1.1013737726935902e-05, "loss": 1.8472, "step": 26947000 }, { "epoch": 78.0, "learning_rate": 1.1013014079288626e-05, "loss": 1.8715, "step": 26947500 }, { "epoch": 78.0, "learning_rate": 1.101229043164135e-05, "loss": 1.8224, "step": 26948000 }, { "epoch": 78.0, "learning_rate": 1.1011566783994072e-05, "loss": 1.85, "step": 26948500 }, { "epoch": 78.01, "learning_rate": 1.1010843136346796e-05, "loss": 1.8349, "step": 26949000 }, { "epoch": 78.01, "learning_rate": 1.101011948869952e-05, "loss": 1.8636, "step": 26949500 }, { "epoch": 78.01, "learning_rate": 1.1009395841052242e-05, "loss": 1.8422, "step": 26950000 }, { "epoch": 78.01, "learning_rate": 1.1008672193404964e-05, "loss": 1.8574, "step": 26950500 }, { "epoch": 78.01, "learning_rate": 1.1007948545757688e-05, "loss": 1.8396, "step": 26951000 }, { "epoch": 78.01, "learning_rate": 1.1007224898110412e-05, "loss": 1.836, "step": 26951500 }, { "epoch": 78.02, "learning_rate": 1.100650269775843e-05, "loss": 1.8574, "step": 26952000 }, { "epoch": 78.02, "learning_rate": 1.1005779050111152e-05, "loss": 1.8467, "step": 26952500 }, { "epoch": 78.02, "learning_rate": 1.1005055402463876e-05, "loss": 1.8555, "step": 26953000 }, { "epoch": 78.02, "learning_rate": 1.10043317548166e-05, "loss": 1.8624, "step": 26953500 }, { "epoch": 78.02, "learning_rate": 1.1003608107169322e-05, "loss": 1.8514, "step": 26954000 }, { "epoch": 78.02, "learning_rate": 1.1002884459522046e-05, "loss": 1.8685, "step": 26954500 }, { "epoch": 78.02, "learning_rate": 1.100216081187477e-05, "loss": 1.8514, "step": 26955000 }, { "epoch": 78.03, "learning_rate": 1.1001437164227492e-05, "loss": 1.8658, "step": 26955500 }, { "epoch": 78.03, "learning_rate": 1.1000714963875509e-05, "loss": 1.8391, "step": 26956000 }, { "epoch": 78.03, "learning_rate": 1.0999991316228233e-05, "loss": 1.8468, "step": 26956500 }, { "epoch": 78.03, "learning_rate": 1.0999267668580957e-05, "loss": 1.8444, "step": 26957000 }, { "epoch": 78.03, "learning_rate": 1.099854691552427e-05, "loss": 1.8602, "step": 26957500 }, { "epoch": 78.03, "learning_rate": 1.0997823267876991e-05, "loss": 1.8507, "step": 26958000 }, { "epoch": 78.03, "learning_rate": 1.0997099620229715e-05, "loss": 1.8412, "step": 26958500 }, { "epoch": 78.04, "learning_rate": 1.099637597258244e-05, "loss": 1.8467, "step": 26959000 }, { "epoch": 78.04, "learning_rate": 1.0995652324935162e-05, "loss": 1.8346, "step": 26959500 }, { "epoch": 78.04, "learning_rate": 1.0994928677287884e-05, "loss": 1.8516, "step": 26960000 }, { "epoch": 78.04, "learning_rate": 1.0994205029640608e-05, "loss": 1.8635, "step": 26960500 }, { "epoch": 78.04, "learning_rate": 1.0993481381993332e-05, "loss": 1.8576, "step": 26961000 }, { "epoch": 78.04, "learning_rate": 1.0992757734346054e-05, "loss": 1.8569, "step": 26961500 }, { "epoch": 78.04, "learning_rate": 1.0992034086698778e-05, "loss": 1.8387, "step": 26962000 }, { "epoch": 78.05, "learning_rate": 1.0991311886346797e-05, "loss": 1.8618, "step": 26962500 }, { "epoch": 78.05, "learning_rate": 1.0990588238699519e-05, "loss": 1.8427, "step": 26963000 }, { "epoch": 78.05, "learning_rate": 1.0989864591052241e-05, "loss": 1.8486, "step": 26963500 }, { "epoch": 78.05, "learning_rate": 1.0989140943404965e-05, "loss": 1.8761, "step": 26964000 }, { "epoch": 78.05, "learning_rate": 1.0988418743052984e-05, "loss": 1.8519, "step": 26964500 }, { "epoch": 78.05, "learning_rate": 1.0987695095405706e-05, "loss": 1.856, "step": 26965000 }, { "epoch": 78.05, "learning_rate": 1.0986971447758429e-05, "loss": 1.8213, "step": 26965500 }, { "epoch": 78.06, "learning_rate": 1.0986247800111152e-05, "loss": 1.8451, "step": 26966000 }, { "epoch": 78.06, "learning_rate": 1.0985525599759171e-05, "loss": 1.8256, "step": 26966500 }, { "epoch": 78.06, "learning_rate": 1.0984801952111894e-05, "loss": 1.8699, "step": 26967000 }, { "epoch": 78.06, "learning_rate": 1.0984079751759911e-05, "loss": 1.8302, "step": 26967500 }, { "epoch": 78.06, "learning_rate": 1.0983356104112635e-05, "loss": 1.8532, "step": 26968000 }, { "epoch": 78.06, "learning_rate": 1.0982632456465359e-05, "loss": 1.8438, "step": 26968500 }, { "epoch": 78.06, "learning_rate": 1.0981908808818081e-05, "loss": 1.8607, "step": 26969000 }, { "epoch": 78.07, "learning_rate": 1.0981185161170803e-05, "loss": 1.8333, "step": 26969500 }, { "epoch": 78.07, "learning_rate": 1.0980461513523529e-05, "loss": 1.8274, "step": 26970000 }, { "epoch": 78.07, "learning_rate": 1.0979737865876251e-05, "loss": 1.8545, "step": 26970500 }, { "epoch": 78.07, "learning_rate": 1.0979015665524268e-05, "loss": 1.8413, "step": 26971000 }, { "epoch": 78.07, "learning_rate": 1.0978292017876992e-05, "loss": 1.8449, "step": 26971500 }, { "epoch": 78.07, "learning_rate": 1.0977568370229716e-05, "loss": 1.8433, "step": 26972000 }, { "epoch": 78.07, "learning_rate": 1.0976844722582438e-05, "loss": 1.8279, "step": 26972500 }, { "epoch": 78.08, "learning_rate": 1.097612107493516e-05, "loss": 1.851, "step": 26973000 }, { "epoch": 78.08, "learning_rate": 1.0975397427287885e-05, "loss": 1.8328, "step": 26973500 }, { "epoch": 78.08, "learning_rate": 1.0974673779640608e-05, "loss": 1.8581, "step": 26974000 }, { "epoch": 78.08, "learning_rate": 1.097395013199333e-05, "loss": 1.857, "step": 26974500 }, { "epoch": 78.08, "learning_rate": 1.0973226484346055e-05, "loss": 1.8386, "step": 26975000 }, { "epoch": 78.08, "learning_rate": 1.0972504283994074e-05, "loss": 1.8384, "step": 26975500 }, { "epoch": 78.08, "learning_rate": 1.0971780636346796e-05, "loss": 1.8326, "step": 26976000 }, { "epoch": 78.09, "learning_rate": 1.0971056988699518e-05, "loss": 1.8421, "step": 26976500 }, { "epoch": 78.09, "learning_rate": 1.0970333341052242e-05, "loss": 1.845, "step": 26977000 }, { "epoch": 78.09, "learning_rate": 1.0969609693404966e-05, "loss": 1.8354, "step": 26977500 }, { "epoch": 78.09, "learning_rate": 1.0968887493052983e-05, "loss": 1.8471, "step": 26978000 }, { "epoch": 78.09, "learning_rate": 1.0968163845405705e-05, "loss": 1.8543, "step": 26978500 }, { "epoch": 78.09, "learning_rate": 1.0967441645053723e-05, "loss": 1.8302, "step": 26979000 }, { "epoch": 78.09, "learning_rate": 1.0966717997406448e-05, "loss": 1.8402, "step": 26979500 }, { "epoch": 78.1, "learning_rate": 1.096599434975917e-05, "loss": 1.8574, "step": 26980000 }, { "epoch": 78.1, "learning_rate": 1.0965270702111893e-05, "loss": 1.8081, "step": 26980500 }, { "epoch": 78.1, "learning_rate": 1.0964547054464618e-05, "loss": 1.8285, "step": 26981000 }, { "epoch": 78.1, "learning_rate": 1.096382340681734e-05, "loss": 1.8284, "step": 26981500 }, { "epoch": 78.1, "learning_rate": 1.0963099759170063e-05, "loss": 1.8376, "step": 26982000 }, { "epoch": 78.1, "learning_rate": 1.0962376111522787e-05, "loss": 1.8484, "step": 26982500 }, { "epoch": 78.1, "learning_rate": 1.0961653911170806e-05, "loss": 1.8523, "step": 26983000 }, { "epoch": 78.11, "learning_rate": 1.0960931710818823e-05, "loss": 1.8331, "step": 26983500 }, { "epoch": 78.11, "learning_rate": 1.0960208063171545e-05, "loss": 1.8718, "step": 26984000 }, { "epoch": 78.11, "learning_rate": 1.0959484415524267e-05, "loss": 1.8442, "step": 26984500 }, { "epoch": 78.11, "learning_rate": 1.0958760767876993e-05, "loss": 1.8348, "step": 26985000 }, { "epoch": 78.11, "learning_rate": 1.095803856752501e-05, "loss": 1.8184, "step": 26985500 }, { "epoch": 78.11, "learning_rate": 1.0957314919877733e-05, "loss": 1.809, "step": 26986000 }, { "epoch": 78.11, "learning_rate": 1.0956591272230456e-05, "loss": 1.8443, "step": 26986500 }, { "epoch": 78.12, "learning_rate": 1.095586762458318e-05, "loss": 1.8312, "step": 26987000 }, { "epoch": 78.12, "learning_rate": 1.0955145424231198e-05, "loss": 1.8389, "step": 26987500 }, { "epoch": 78.12, "learning_rate": 1.095442177658392e-05, "loss": 1.8274, "step": 26988000 }, { "epoch": 78.12, "learning_rate": 1.0953698128936644e-05, "loss": 1.8172, "step": 26988500 }, { "epoch": 78.12, "learning_rate": 1.0952974481289368e-05, "loss": 1.8341, "step": 26989000 }, { "epoch": 78.12, "learning_rate": 1.095225083364209e-05, "loss": 1.8395, "step": 26989500 }, { "epoch": 78.12, "learning_rate": 1.0951527185994812e-05, "loss": 1.8577, "step": 26990000 }, { "epoch": 78.13, "learning_rate": 1.0950803538347538e-05, "loss": 1.835, "step": 26990500 }, { "epoch": 78.13, "learning_rate": 1.095007989070026e-05, "loss": 1.8612, "step": 26991000 }, { "epoch": 78.13, "learning_rate": 1.0949357690348277e-05, "loss": 1.8759, "step": 26991500 }, { "epoch": 78.13, "learning_rate": 1.0948634042701e-05, "loss": 1.842, "step": 26992000 }, { "epoch": 78.13, "learning_rate": 1.0947910395053725e-05, "loss": 1.8523, "step": 26992500 }, { "epoch": 78.13, "learning_rate": 1.0947188194701742e-05, "loss": 1.8346, "step": 26993000 }, { "epoch": 78.14, "learning_rate": 1.0946464547054465e-05, "loss": 1.8369, "step": 26993500 }, { "epoch": 78.14, "learning_rate": 1.0945740899407189e-05, "loss": 1.8457, "step": 26994000 }, { "epoch": 78.14, "learning_rate": 1.0945017251759912e-05, "loss": 1.8418, "step": 26994500 }, { "epoch": 78.14, "learning_rate": 1.0944293604112635e-05, "loss": 1.8392, "step": 26995000 }, { "epoch": 78.14, "learning_rate": 1.0943569956465357e-05, "loss": 1.8417, "step": 26995500 }, { "epoch": 78.14, "learning_rate": 1.0942846308818083e-05, "loss": 1.859, "step": 26996000 }, { "epoch": 78.14, "learning_rate": 1.0942122661170805e-05, "loss": 1.8425, "step": 26996500 }, { "epoch": 78.15, "learning_rate": 1.0941399013523527e-05, "loss": 1.8326, "step": 26997000 }, { "epoch": 78.15, "learning_rate": 1.0940676813171544e-05, "loss": 1.8643, "step": 26997500 }, { "epoch": 78.15, "learning_rate": 1.093995316552427e-05, "loss": 1.8581, "step": 26998000 }, { "epoch": 78.15, "learning_rate": 1.0939229517876992e-05, "loss": 1.8446, "step": 26998500 }, { "epoch": 78.15, "learning_rate": 1.0938505870229714e-05, "loss": 1.8443, "step": 26999000 }, { "epoch": 78.15, "learning_rate": 1.0937782222582438e-05, "loss": 1.8384, "step": 26999500 }, { "epoch": 78.15, "learning_rate": 1.0937058574935162e-05, "loss": 1.8467, "step": 27000000 }, { "epoch": 78.16, "learning_rate": 1.0936334927287884e-05, "loss": 1.8462, "step": 27000500 }, { "epoch": 78.16, "learning_rate": 1.0935611279640608e-05, "loss": 1.8383, "step": 27001000 }, { "epoch": 78.16, "learning_rate": 1.0934887631993332e-05, "loss": 1.8632, "step": 27001500 }, { "epoch": 78.16, "learning_rate": 1.093416543164135e-05, "loss": 1.853, "step": 27002000 }, { "epoch": 78.16, "learning_rate": 1.0933441783994072e-05, "loss": 1.8325, "step": 27002500 }, { "epoch": 78.16, "learning_rate": 1.0932718136346796e-05, "loss": 1.846, "step": 27003000 }, { "epoch": 78.16, "learning_rate": 1.093199448869952e-05, "loss": 1.8519, "step": 27003500 }, { "epoch": 78.17, "learning_rate": 1.0931270841052242e-05, "loss": 1.8409, "step": 27004000 }, { "epoch": 78.17, "learning_rate": 1.0930548640700259e-05, "loss": 1.8614, "step": 27004500 }, { "epoch": 78.17, "learning_rate": 1.0929824993052983e-05, "loss": 1.8626, "step": 27005000 }, { "epoch": 78.17, "learning_rate": 1.0929101345405707e-05, "loss": 1.8398, "step": 27005500 }, { "epoch": 78.17, "learning_rate": 1.092837769775843e-05, "loss": 1.8308, "step": 27006000 }, { "epoch": 78.17, "learning_rate": 1.0927654050111153e-05, "loss": 1.8485, "step": 27006500 }, { "epoch": 78.17, "learning_rate": 1.0926930402463877e-05, "loss": 1.8673, "step": 27007000 }, { "epoch": 78.18, "learning_rate": 1.0926208202111894e-05, "loss": 1.8656, "step": 27007500 }, { "epoch": 78.18, "learning_rate": 1.0925484554464617e-05, "loss": 1.8571, "step": 27008000 }, { "epoch": 78.18, "learning_rate": 1.092476090681734e-05, "loss": 1.8577, "step": 27008500 }, { "epoch": 78.18, "learning_rate": 1.0924037259170064e-05, "loss": 1.8271, "step": 27009000 }, { "epoch": 78.18, "learning_rate": 1.0923315058818082e-05, "loss": 1.8197, "step": 27009500 }, { "epoch": 78.18, "learning_rate": 1.0922591411170804e-05, "loss": 1.8465, "step": 27010000 }, { "epoch": 78.18, "learning_rate": 1.0921870658114116e-05, "loss": 1.8531, "step": 27010500 }, { "epoch": 78.19, "learning_rate": 1.092114701046684e-05, "loss": 1.8314, "step": 27011000 }, { "epoch": 78.19, "learning_rate": 1.0920423362819564e-05, "loss": 1.8571, "step": 27011500 }, { "epoch": 78.19, "learning_rate": 1.0919699715172286e-05, "loss": 1.85, "step": 27012000 }, { "epoch": 78.19, "learning_rate": 1.091897606752501e-05, "loss": 1.8325, "step": 27012500 }, { "epoch": 78.19, "learning_rate": 1.0918253867173027e-05, "loss": 1.8427, "step": 27013000 }, { "epoch": 78.19, "learning_rate": 1.0917530219525751e-05, "loss": 1.8308, "step": 27013500 }, { "epoch": 78.19, "learning_rate": 1.0916806571878474e-05, "loss": 1.878, "step": 27014000 }, { "epoch": 78.2, "learning_rate": 1.0916082924231198e-05, "loss": 1.8318, "step": 27014500 }, { "epoch": 78.2, "learning_rate": 1.0915359276583921e-05, "loss": 1.8406, "step": 27015000 }, { "epoch": 78.2, "learning_rate": 1.0914635628936644e-05, "loss": 1.8443, "step": 27015500 }, { "epoch": 78.2, "learning_rate": 1.0913911981289366e-05, "loss": 1.8392, "step": 27016000 }, { "epoch": 78.2, "learning_rate": 1.091318833364209e-05, "loss": 1.8458, "step": 27016500 }, { "epoch": 78.2, "learning_rate": 1.0912464685994814e-05, "loss": 1.8461, "step": 27017000 }, { "epoch": 78.2, "learning_rate": 1.0911741038347536e-05, "loss": 1.8339, "step": 27017500 }, { "epoch": 78.21, "learning_rate": 1.091101739070026e-05, "loss": 1.8563, "step": 27018000 }, { "epoch": 78.21, "learning_rate": 1.0910293743052984e-05, "loss": 1.8684, "step": 27018500 }, { "epoch": 78.21, "learning_rate": 1.0909570095405706e-05, "loss": 1.8683, "step": 27019000 }, { "epoch": 78.21, "learning_rate": 1.090884644775843e-05, "loss": 1.8393, "step": 27019500 }, { "epoch": 78.21, "learning_rate": 1.0908122800111152e-05, "loss": 1.8715, "step": 27020000 }, { "epoch": 78.21, "learning_rate": 1.0907399152463876e-05, "loss": 1.8372, "step": 27020500 }, { "epoch": 78.21, "learning_rate": 1.0906675504816598e-05, "loss": 1.8159, "step": 27021000 }, { "epoch": 78.22, "learning_rate": 1.0905951857169322e-05, "loss": 1.881, "step": 27021500 }, { "epoch": 78.22, "learning_rate": 1.0905229656817341e-05, "loss": 1.8635, "step": 27022000 }, { "epoch": 78.22, "learning_rate": 1.0904507456465359e-05, "loss": 1.842, "step": 27022500 }, { "epoch": 78.22, "learning_rate": 1.090378380881808e-05, "loss": 1.836, "step": 27023000 }, { "epoch": 78.22, "learning_rate": 1.0903060161170805e-05, "loss": 1.8339, "step": 27023500 }, { "epoch": 78.22, "learning_rate": 1.0902337960818822e-05, "loss": 1.8495, "step": 27024000 }, { "epoch": 78.22, "learning_rate": 1.0901614313171546e-05, "loss": 1.8361, "step": 27024500 }, { "epoch": 78.23, "learning_rate": 1.0900890665524268e-05, "loss": 1.8501, "step": 27025000 }, { "epoch": 78.23, "learning_rate": 1.0900167017876992e-05, "loss": 1.8457, "step": 27025500 }, { "epoch": 78.23, "learning_rate": 1.089944481752501e-05, "loss": 1.8417, "step": 27026000 }, { "epoch": 78.23, "learning_rate": 1.0898721169877733e-05, "loss": 1.8596, "step": 27026500 }, { "epoch": 78.23, "learning_rate": 1.0897997522230455e-05, "loss": 1.8616, "step": 27027000 }, { "epoch": 78.23, "learning_rate": 1.089727387458318e-05, "loss": 1.8765, "step": 27027500 }, { "epoch": 78.23, "learning_rate": 1.0896550226935903e-05, "loss": 1.8695, "step": 27028000 }, { "epoch": 78.24, "learning_rate": 1.0895826579288626e-05, "loss": 1.8418, "step": 27028500 }, { "epoch": 78.24, "learning_rate": 1.089510293164135e-05, "loss": 1.8228, "step": 27029000 }, { "epoch": 78.24, "learning_rate": 1.0894380731289367e-05, "loss": 1.8387, "step": 27029500 }, { "epoch": 78.24, "learning_rate": 1.089365708364209e-05, "loss": 1.8675, "step": 27030000 }, { "epoch": 78.24, "learning_rate": 1.0892933435994813e-05, "loss": 1.8607, "step": 27030500 }, { "epoch": 78.24, "learning_rate": 1.0892209788347537e-05, "loss": 1.8392, "step": 27031000 }, { "epoch": 78.25, "learning_rate": 1.0891487587995554e-05, "loss": 1.8286, "step": 27031500 }, { "epoch": 78.25, "learning_rate": 1.0890763940348278e-05, "loss": 1.8509, "step": 27032000 }, { "epoch": 78.25, "learning_rate": 1.0890040292701e-05, "loss": 1.8452, "step": 27032500 }, { "epoch": 78.25, "learning_rate": 1.0889316645053724e-05, "loss": 1.8196, "step": 27033000 }, { "epoch": 78.25, "learning_rate": 1.0888592997406448e-05, "loss": 1.8621, "step": 27033500 }, { "epoch": 78.25, "learning_rate": 1.088786934975917e-05, "loss": 1.8331, "step": 27034000 }, { "epoch": 78.25, "learning_rate": 1.0887145702111894e-05, "loss": 1.8547, "step": 27034500 }, { "epoch": 78.26, "learning_rate": 1.0886422054464616e-05, "loss": 1.8559, "step": 27035000 }, { "epoch": 78.26, "learning_rate": 1.088569840681734e-05, "loss": 1.8494, "step": 27035500 }, { "epoch": 78.26, "learning_rate": 1.0884974759170064e-05, "loss": 1.8613, "step": 27036000 }, { "epoch": 78.26, "learning_rate": 1.0884251111522787e-05, "loss": 1.8447, "step": 27036500 }, { "epoch": 78.26, "learning_rate": 1.088352746387551e-05, "loss": 1.8482, "step": 27037000 }, { "epoch": 78.26, "learning_rate": 1.0882805263523528e-05, "loss": 1.8283, "step": 27037500 }, { "epoch": 78.26, "learning_rate": 1.0882081615876252e-05, "loss": 1.8481, "step": 27038000 }, { "epoch": 78.27, "learning_rate": 1.0881357968228974e-05, "loss": 1.8524, "step": 27038500 }, { "epoch": 78.27, "learning_rate": 1.0880634320581698e-05, "loss": 1.8364, "step": 27039000 }, { "epoch": 78.27, "learning_rate": 1.087991067293442e-05, "loss": 1.8853, "step": 27039500 }, { "epoch": 78.27, "learning_rate": 1.0879187025287144e-05, "loss": 1.8468, "step": 27040000 }, { "epoch": 78.27, "learning_rate": 1.0878463377639866e-05, "loss": 1.8526, "step": 27040500 }, { "epoch": 78.27, "learning_rate": 1.0877741177287885e-05, "loss": 1.8411, "step": 27041000 }, { "epoch": 78.27, "learning_rate": 1.0877017529640607e-05, "loss": 1.8479, "step": 27041500 }, { "epoch": 78.28, "learning_rate": 1.0876293881993331e-05, "loss": 1.8601, "step": 27042000 }, { "epoch": 78.28, "learning_rate": 1.0875570234346055e-05, "loss": 1.8348, "step": 27042500 }, { "epoch": 78.28, "learning_rate": 1.0874846586698777e-05, "loss": 1.8368, "step": 27043000 }, { "epoch": 78.28, "learning_rate": 1.0874122939051501e-05, "loss": 1.8268, "step": 27043500 }, { "epoch": 78.28, "learning_rate": 1.0873399291404224e-05, "loss": 1.8558, "step": 27044000 }, { "epoch": 78.28, "learning_rate": 1.0872675643756948e-05, "loss": 1.8539, "step": 27044500 }, { "epoch": 78.28, "learning_rate": 1.0871953443404965e-05, "loss": 1.845, "step": 27045000 }, { "epoch": 78.29, "learning_rate": 1.0871229795757689e-05, "loss": 1.8433, "step": 27045500 }, { "epoch": 78.29, "learning_rate": 1.0870506148110411e-05, "loss": 1.8339, "step": 27046000 }, { "epoch": 78.29, "learning_rate": 1.0869782500463135e-05, "loss": 1.833, "step": 27046500 }, { "epoch": 78.29, "learning_rate": 1.0869061747406447e-05, "loss": 1.8533, "step": 27047000 }, { "epoch": 78.29, "learning_rate": 1.0868338099759171e-05, "loss": 1.8576, "step": 27047500 }, { "epoch": 78.29, "learning_rate": 1.0867614452111893e-05, "loss": 1.8571, "step": 27048000 }, { "epoch": 78.29, "learning_rate": 1.0866892251759912e-05, "loss": 1.8625, "step": 27048500 }, { "epoch": 78.3, "learning_rate": 1.0866168604112634e-05, "loss": 1.8487, "step": 27049000 }, { "epoch": 78.3, "learning_rate": 1.0865444956465358e-05, "loss": 1.8597, "step": 27049500 }, { "epoch": 78.3, "learning_rate": 1.086472130881808e-05, "loss": 1.8616, "step": 27050000 }, { "epoch": 78.3, "learning_rate": 1.0863997661170805e-05, "loss": 1.8448, "step": 27050500 }, { "epoch": 78.3, "learning_rate": 1.0863274013523529e-05, "loss": 1.8388, "step": 27051000 }, { "epoch": 78.3, "learning_rate": 1.086255036587625e-05, "loss": 1.8625, "step": 27051500 }, { "epoch": 78.3, "learning_rate": 1.0861826718228975e-05, "loss": 1.8448, "step": 27052000 }, { "epoch": 78.31, "learning_rate": 1.0861103070581697e-05, "loss": 1.8725, "step": 27052500 }, { "epoch": 78.31, "learning_rate": 1.086037942293442e-05, "loss": 1.8508, "step": 27053000 }, { "epoch": 78.31, "learning_rate": 1.0859655775287143e-05, "loss": 1.8506, "step": 27053500 }, { "epoch": 78.31, "learning_rate": 1.0858932127639867e-05, "loss": 1.8468, "step": 27054000 }, { "epoch": 78.31, "learning_rate": 1.0858208479992591e-05, "loss": 1.8234, "step": 27054500 }, { "epoch": 78.31, "learning_rate": 1.0857486279640608e-05, "loss": 1.8544, "step": 27055000 }, { "epoch": 78.31, "learning_rate": 1.085676263199333e-05, "loss": 1.8492, "step": 27055500 }, { "epoch": 78.32, "learning_rate": 1.0856038984346054e-05, "loss": 1.8498, "step": 27056000 }, { "epoch": 78.32, "learning_rate": 1.0855316783994073e-05, "loss": 1.8416, "step": 27056500 }, { "epoch": 78.32, "learning_rate": 1.0854593136346796e-05, "loss": 1.8503, "step": 27057000 }, { "epoch": 78.32, "learning_rate": 1.0853869488699518e-05, "loss": 1.8257, "step": 27057500 }, { "epoch": 78.32, "learning_rate": 1.0853145841052242e-05, "loss": 1.8459, "step": 27058000 }, { "epoch": 78.32, "learning_rate": 1.0852422193404966e-05, "loss": 1.8285, "step": 27058500 }, { "epoch": 78.32, "learning_rate": 1.0851698545757688e-05, "loss": 1.8585, "step": 27059000 }, { "epoch": 78.33, "learning_rate": 1.0850974898110412e-05, "loss": 1.8566, "step": 27059500 }, { "epoch": 78.33, "learning_rate": 1.0850251250463136e-05, "loss": 1.8716, "step": 27060000 }, { "epoch": 78.33, "learning_rate": 1.0849527602815858e-05, "loss": 1.8543, "step": 27060500 }, { "epoch": 78.33, "learning_rate": 1.0848803955168582e-05, "loss": 1.8356, "step": 27061000 }, { "epoch": 78.33, "learning_rate": 1.0848080307521306e-05, "loss": 1.8398, "step": 27061500 }, { "epoch": 78.33, "learning_rate": 1.0847356659874028e-05, "loss": 1.8314, "step": 27062000 }, { "epoch": 78.33, "learning_rate": 1.084663301222675e-05, "loss": 1.8631, "step": 27062500 }, { "epoch": 78.34, "learning_rate": 1.0845912259170063e-05, "loss": 1.8512, "step": 27063000 }, { "epoch": 78.34, "learning_rate": 1.0845188611522786e-05, "loss": 1.8509, "step": 27063500 }, { "epoch": 78.34, "learning_rate": 1.084446496387551e-05, "loss": 1.8481, "step": 27064000 }, { "epoch": 78.34, "learning_rate": 1.0843742763523528e-05, "loss": 1.8549, "step": 27064500 }, { "epoch": 78.34, "learning_rate": 1.084301911587625e-05, "loss": 1.8474, "step": 27065000 }, { "epoch": 78.34, "learning_rate": 1.0842295468228974e-05, "loss": 1.8349, "step": 27065500 }, { "epoch": 78.34, "learning_rate": 1.0841571820581698e-05, "loss": 1.8361, "step": 27066000 }, { "epoch": 78.35, "learning_rate": 1.084084817293442e-05, "loss": 1.8477, "step": 27066500 }, { "epoch": 78.35, "learning_rate": 1.0840124525287144e-05, "loss": 1.8383, "step": 27067000 }, { "epoch": 78.35, "learning_rate": 1.0839400877639868e-05, "loss": 1.8447, "step": 27067500 }, { "epoch": 78.35, "learning_rate": 1.083867722999259e-05, "loss": 1.8683, "step": 27068000 }, { "epoch": 78.35, "learning_rate": 1.0837953582345312e-05, "loss": 1.8568, "step": 27068500 }, { "epoch": 78.35, "learning_rate": 1.0837229934698038e-05, "loss": 1.854, "step": 27069000 }, { "epoch": 78.36, "learning_rate": 1.083650918164135e-05, "loss": 1.8076, "step": 27069500 }, { "epoch": 78.36, "learning_rate": 1.0835785533994072e-05, "loss": 1.8779, "step": 27070000 }, { "epoch": 78.36, "learning_rate": 1.0835061886346795e-05, "loss": 1.8713, "step": 27070500 }, { "epoch": 78.36, "learning_rate": 1.0834338238699519e-05, "loss": 1.8509, "step": 27071000 }, { "epoch": 78.36, "learning_rate": 1.0833614591052242e-05, "loss": 1.8556, "step": 27071500 }, { "epoch": 78.36, "learning_rate": 1.0832890943404965e-05, "loss": 1.8475, "step": 27072000 }, { "epoch": 78.36, "learning_rate": 1.0832167295757689e-05, "loss": 1.8478, "step": 27072500 }, { "epoch": 78.37, "learning_rate": 1.0831443648110413e-05, "loss": 1.8432, "step": 27073000 }, { "epoch": 78.37, "learning_rate": 1.0830720000463135e-05, "loss": 1.8579, "step": 27073500 }, { "epoch": 78.37, "learning_rate": 1.0829997800111152e-05, "loss": 1.8496, "step": 27074000 }, { "epoch": 78.37, "learning_rate": 1.0829274152463876e-05, "loss": 1.8424, "step": 27074500 }, { "epoch": 78.37, "learning_rate": 1.08285505048166e-05, "loss": 1.8661, "step": 27075000 }, { "epoch": 78.37, "learning_rate": 1.0827826857169322e-05, "loss": 1.8402, "step": 27075500 }, { "epoch": 78.37, "learning_rate": 1.082710465681734e-05, "loss": 1.8439, "step": 27076000 }, { "epoch": 78.38, "learning_rate": 1.0826382456465357e-05, "loss": 1.8458, "step": 27076500 }, { "epoch": 78.38, "learning_rate": 1.0825658808818082e-05, "loss": 1.8316, "step": 27077000 }, { "epoch": 78.38, "learning_rate": 1.0824935161170804e-05, "loss": 1.8487, "step": 27077500 }, { "epoch": 78.38, "learning_rate": 1.0824211513523527e-05, "loss": 1.839, "step": 27078000 }, { "epoch": 78.38, "learning_rate": 1.082348786587625e-05, "loss": 1.8541, "step": 27078500 }, { "epoch": 78.38, "learning_rate": 1.0822764218228975e-05, "loss": 1.8254, "step": 27079000 }, { "epoch": 78.38, "learning_rate": 1.0822040570581697e-05, "loss": 1.8421, "step": 27079500 }, { "epoch": 78.39, "learning_rate": 1.082131692293442e-05, "loss": 1.8512, "step": 27080000 }, { "epoch": 78.39, "learning_rate": 1.0820593275287145e-05, "loss": 1.8346, "step": 27080500 }, { "epoch": 78.39, "learning_rate": 1.0819871074935162e-05, "loss": 1.8427, "step": 27081000 }, { "epoch": 78.39, "learning_rate": 1.0819147427287884e-05, "loss": 1.8382, "step": 27081500 }, { "epoch": 78.39, "learning_rate": 1.0818423779640608e-05, "loss": 1.864, "step": 27082000 }, { "epoch": 78.39, "learning_rate": 1.0817700131993332e-05, "loss": 1.8501, "step": 27082500 }, { "epoch": 78.39, "learning_rate": 1.0816976484346054e-05, "loss": 1.8278, "step": 27083000 }, { "epoch": 78.4, "learning_rate": 1.0816254283994071e-05, "loss": 1.8558, "step": 27083500 }, { "epoch": 78.4, "learning_rate": 1.0815530636346795e-05, "loss": 1.8483, "step": 27084000 }, { "epoch": 78.4, "learning_rate": 1.081480698869952e-05, "loss": 1.839, "step": 27084500 }, { "epoch": 78.4, "learning_rate": 1.0814083341052242e-05, "loss": 1.8475, "step": 27085000 }, { "epoch": 78.4, "learning_rate": 1.0813359693404965e-05, "loss": 1.8414, "step": 27085500 }, { "epoch": 78.4, "learning_rate": 1.081263604575769e-05, "loss": 1.8533, "step": 27086000 }, { "epoch": 78.4, "learning_rate": 1.0811912398110412e-05, "loss": 1.8508, "step": 27086500 }, { "epoch": 78.41, "learning_rate": 1.0811190197758429e-05, "loss": 1.8559, "step": 27087000 }, { "epoch": 78.41, "learning_rate": 1.0810466550111153e-05, "loss": 1.8876, "step": 27087500 }, { "epoch": 78.41, "learning_rate": 1.0809744349759172e-05, "loss": 1.8616, "step": 27088000 }, { "epoch": 78.41, "learning_rate": 1.0809020702111894e-05, "loss": 1.8674, "step": 27088500 }, { "epoch": 78.41, "learning_rate": 1.0808297054464616e-05, "loss": 1.8435, "step": 27089000 }, { "epoch": 78.41, "learning_rate": 1.080757340681734e-05, "loss": 1.8359, "step": 27089500 }, { "epoch": 78.41, "learning_rate": 1.0806849759170064e-05, "loss": 1.8612, "step": 27090000 }, { "epoch": 78.42, "learning_rate": 1.0806126111522786e-05, "loss": 1.8379, "step": 27090500 }, { "epoch": 78.42, "learning_rate": 1.080540246387551e-05, "loss": 1.8421, "step": 27091000 }, { "epoch": 78.42, "learning_rate": 1.0804678816228234e-05, "loss": 1.8511, "step": 27091500 }, { "epoch": 78.42, "learning_rate": 1.0803956615876251e-05, "loss": 1.8279, "step": 27092000 }, { "epoch": 78.42, "learning_rate": 1.0803232968228974e-05, "loss": 1.8589, "step": 27092500 }, { "epoch": 78.42, "learning_rate": 1.0802509320581698e-05, "loss": 1.8441, "step": 27093000 }, { "epoch": 78.42, "learning_rate": 1.0801785672934422e-05, "loss": 1.8453, "step": 27093500 }, { "epoch": 78.43, "learning_rate": 1.0801062025287144e-05, "loss": 1.8429, "step": 27094000 }, { "epoch": 78.43, "learning_rate": 1.0800338377639866e-05, "loss": 1.845, "step": 27094500 }, { "epoch": 78.43, "learning_rate": 1.0799614729992592e-05, "loss": 1.8351, "step": 27095000 }, { "epoch": 78.43, "learning_rate": 1.0798891082345314e-05, "loss": 1.8544, "step": 27095500 }, { "epoch": 78.43, "learning_rate": 1.0798167434698036e-05, "loss": 1.8528, "step": 27096000 }, { "epoch": 78.43, "learning_rate": 1.079744378705076e-05, "loss": 1.8505, "step": 27096500 }, { "epoch": 78.43, "learning_rate": 1.0796720139403484e-05, "loss": 1.8538, "step": 27097000 }, { "epoch": 78.44, "learning_rate": 1.0795996491756206e-05, "loss": 1.8525, "step": 27097500 }, { "epoch": 78.44, "learning_rate": 1.0795274291404223e-05, "loss": 1.8643, "step": 27098000 }, { "epoch": 78.44, "learning_rate": 1.0794550643756947e-05, "loss": 1.8347, "step": 27098500 }, { "epoch": 78.44, "learning_rate": 1.0793826996109671e-05, "loss": 1.8493, "step": 27099000 }, { "epoch": 78.44, "learning_rate": 1.0793103348462394e-05, "loss": 1.8458, "step": 27099500 }, { "epoch": 78.44, "learning_rate": 1.0792379700815117e-05, "loss": 1.8602, "step": 27100000 }, { "epoch": 78.44, "learning_rate": 1.0791657500463136e-05, "loss": 1.8392, "step": 27100500 }, { "epoch": 78.45, "learning_rate": 1.0790933852815859e-05, "loss": 1.8742, "step": 27101000 }, { "epoch": 78.45, "learning_rate": 1.079021020516858e-05, "loss": 1.8584, "step": 27101500 }, { "epoch": 78.45, "learning_rate": 1.0789488004816598e-05, "loss": 1.8332, "step": 27102000 }, { "epoch": 78.45, "learning_rate": 1.0788764357169324e-05, "loss": 1.825, "step": 27102500 }, { "epoch": 78.45, "learning_rate": 1.0788040709522046e-05, "loss": 1.8787, "step": 27103000 }, { "epoch": 78.45, "learning_rate": 1.0787317061874768e-05, "loss": 1.8479, "step": 27103500 }, { "epoch": 78.45, "learning_rate": 1.0786593414227492e-05, "loss": 1.8283, "step": 27104000 }, { "epoch": 78.46, "learning_rate": 1.0785869766580216e-05, "loss": 1.8594, "step": 27104500 }, { "epoch": 78.46, "learning_rate": 1.0785146118932938e-05, "loss": 1.8554, "step": 27105000 }, { "epoch": 78.46, "learning_rate": 1.0784422471285662e-05, "loss": 1.8702, "step": 27105500 }, { "epoch": 78.46, "learning_rate": 1.0783698823638386e-05, "loss": 1.8547, "step": 27106000 }, { "epoch": 78.46, "learning_rate": 1.0782975175991108e-05, "loss": 1.8493, "step": 27106500 }, { "epoch": 78.46, "learning_rate": 1.078225152834383e-05, "loss": 1.8348, "step": 27107000 }, { "epoch": 78.47, "learning_rate": 1.078152932799185e-05, "loss": 1.8454, "step": 27107500 }, { "epoch": 78.47, "learning_rate": 1.0780805680344573e-05, "loss": 1.8229, "step": 27108000 }, { "epoch": 78.47, "learning_rate": 1.0780082032697296e-05, "loss": 1.852, "step": 27108500 }, { "epoch": 78.47, "learning_rate": 1.0779358385050018e-05, "loss": 1.8535, "step": 27109000 }, { "epoch": 78.47, "learning_rate": 1.0778634737402742e-05, "loss": 1.8604, "step": 27109500 }, { "epoch": 78.47, "learning_rate": 1.0777911089755466e-05, "loss": 1.8417, "step": 27110000 }, { "epoch": 78.47, "learning_rate": 1.0777187442108188e-05, "loss": 1.8702, "step": 27110500 }, { "epoch": 78.48, "learning_rate": 1.0776463794460912e-05, "loss": 1.8782, "step": 27111000 }, { "epoch": 78.48, "learning_rate": 1.0775741594108931e-05, "loss": 1.8784, "step": 27111500 }, { "epoch": 78.48, "learning_rate": 1.0775017946461653e-05, "loss": 1.8582, "step": 27112000 }, { "epoch": 78.48, "learning_rate": 1.0774294298814375e-05, "loss": 1.8367, "step": 27112500 }, { "epoch": 78.48, "learning_rate": 1.0773572098462394e-05, "loss": 1.8595, "step": 27113000 }, { "epoch": 78.48, "learning_rate": 1.0772848450815118e-05, "loss": 1.852, "step": 27113500 }, { "epoch": 78.48, "learning_rate": 1.077212480316784e-05, "loss": 1.8658, "step": 27114000 }, { "epoch": 78.49, "learning_rate": 1.0771402602815858e-05, "loss": 1.8517, "step": 27114500 }, { "epoch": 78.49, "learning_rate": 1.0770678955168582e-05, "loss": 1.8394, "step": 27115000 }, { "epoch": 78.49, "learning_rate": 1.0769955307521306e-05, "loss": 1.8733, "step": 27115500 }, { "epoch": 78.49, "learning_rate": 1.0769231659874028e-05, "loss": 1.8501, "step": 27116000 }, { "epoch": 78.49, "learning_rate": 1.0768508012226752e-05, "loss": 1.8568, "step": 27116500 }, { "epoch": 78.49, "learning_rate": 1.0767784364579474e-05, "loss": 1.8531, "step": 27117000 }, { "epoch": 78.49, "learning_rate": 1.0767060716932198e-05, "loss": 1.8383, "step": 27117500 }, { "epoch": 78.5, "learning_rate": 1.076633706928492e-05, "loss": 1.8523, "step": 27118000 }, { "epoch": 78.5, "learning_rate": 1.0765613421637644e-05, "loss": 1.8542, "step": 27118500 }, { "epoch": 78.5, "learning_rate": 1.0764889773990368e-05, "loss": 1.8529, "step": 27119000 }, { "epoch": 78.5, "learning_rate": 1.0764167573638385e-05, "loss": 1.8718, "step": 27119500 }, { "epoch": 78.5, "learning_rate": 1.0763443925991107e-05, "loss": 1.843, "step": 27120000 }, { "epoch": 78.5, "learning_rate": 1.0762720278343831e-05, "loss": 1.8522, "step": 27120500 }, { "epoch": 78.5, "learning_rate": 1.0761996630696555e-05, "loss": 1.8558, "step": 27121000 }, { "epoch": 78.51, "learning_rate": 1.0761272983049278e-05, "loss": 1.8679, "step": 27121500 }, { "epoch": 78.51, "learning_rate": 1.0760549335402001e-05, "loss": 1.8635, "step": 27122000 }, { "epoch": 78.51, "learning_rate": 1.0759827135050019e-05, "loss": 1.8464, "step": 27122500 }, { "epoch": 78.51, "learning_rate": 1.0759103487402743e-05, "loss": 1.844, "step": 27123000 }, { "epoch": 78.51, "learning_rate": 1.0758379839755465e-05, "loss": 1.879, "step": 27123500 }, { "epoch": 78.51, "learning_rate": 1.0757656192108189e-05, "loss": 1.8358, "step": 27124000 }, { "epoch": 78.51, "learning_rate": 1.0756932544460913e-05, "loss": 1.8709, "step": 27124500 }, { "epoch": 78.52, "learning_rate": 1.0756208896813635e-05, "loss": 1.8585, "step": 27125000 }, { "epoch": 78.52, "learning_rate": 1.0755486696461652e-05, "loss": 1.8459, "step": 27125500 }, { "epoch": 78.52, "learning_rate": 1.0754763048814376e-05, "loss": 1.8459, "step": 27126000 }, { "epoch": 78.52, "learning_rate": 1.07540394011671e-05, "loss": 1.845, "step": 27126500 }, { "epoch": 78.52, "learning_rate": 1.0753315753519822e-05, "loss": 1.8487, "step": 27127000 }, { "epoch": 78.52, "learning_rate": 1.0752592105872546e-05, "loss": 1.8766, "step": 27127500 }, { "epoch": 78.52, "learning_rate": 1.0751868458225268e-05, "loss": 1.8564, "step": 27128000 }, { "epoch": 78.53, "learning_rate": 1.0751144810577992e-05, "loss": 1.836, "step": 27128500 }, { "epoch": 78.53, "learning_rate": 1.0750421162930716e-05, "loss": 1.833, "step": 27129000 }, { "epoch": 78.53, "learning_rate": 1.0749698962578734e-05, "loss": 1.8413, "step": 27129500 }, { "epoch": 78.53, "learning_rate": 1.074897676222675e-05, "loss": 1.8376, "step": 27130000 }, { "epoch": 78.53, "learning_rate": 1.0748253114579475e-05, "loss": 1.8691, "step": 27130500 }, { "epoch": 78.53, "learning_rate": 1.0747529466932197e-05, "loss": 1.8601, "step": 27131000 }, { "epoch": 78.53, "learning_rate": 1.0746805819284921e-05, "loss": 1.8532, "step": 27131500 }, { "epoch": 78.54, "learning_rate": 1.0746082171637645e-05, "loss": 1.8459, "step": 27132000 }, { "epoch": 78.54, "learning_rate": 1.0745358523990367e-05, "loss": 1.8531, "step": 27132500 }, { "epoch": 78.54, "learning_rate": 1.0744634876343091e-05, "loss": 1.8637, "step": 27133000 }, { "epoch": 78.54, "learning_rate": 1.0743912675991108e-05, "loss": 1.8554, "step": 27133500 }, { "epoch": 78.54, "learning_rate": 1.0743189028343832e-05, "loss": 1.8567, "step": 27134000 }, { "epoch": 78.54, "learning_rate": 1.074246682799185e-05, "loss": 1.8483, "step": 27134500 }, { "epoch": 78.54, "learning_rate": 1.0741743180344573e-05, "loss": 1.8743, "step": 27135000 }, { "epoch": 78.55, "learning_rate": 1.0741019532697296e-05, "loss": 1.8553, "step": 27135500 }, { "epoch": 78.55, "learning_rate": 1.074029588505002e-05, "loss": 1.8521, "step": 27136000 }, { "epoch": 78.55, "learning_rate": 1.0739572237402742e-05, "loss": 1.8471, "step": 27136500 }, { "epoch": 78.55, "learning_rate": 1.0738848589755466e-05, "loss": 1.8404, "step": 27137000 }, { "epoch": 78.55, "learning_rate": 1.0738124942108188e-05, "loss": 1.8598, "step": 27137500 }, { "epoch": 78.55, "learning_rate": 1.0737401294460912e-05, "loss": 1.8432, "step": 27138000 }, { "epoch": 78.55, "learning_rate": 1.0736679094108929e-05, "loss": 1.8264, "step": 27138500 }, { "epoch": 78.56, "learning_rate": 1.0735955446461653e-05, "loss": 1.8792, "step": 27139000 }, { "epoch": 78.56, "learning_rate": 1.073523324610967e-05, "loss": 1.8573, "step": 27139500 }, { "epoch": 78.56, "learning_rate": 1.0734509598462394e-05, "loss": 1.8511, "step": 27140000 }, { "epoch": 78.56, "learning_rate": 1.0733785950815118e-05, "loss": 1.8539, "step": 27140500 }, { "epoch": 78.56, "learning_rate": 1.073306230316784e-05, "loss": 1.8404, "step": 27141000 }, { "epoch": 78.56, "learning_rate": 1.0732338655520564e-05, "loss": 1.8424, "step": 27141500 }, { "epoch": 78.56, "learning_rate": 1.0731615007873287e-05, "loss": 1.8283, "step": 27142000 }, { "epoch": 78.57, "learning_rate": 1.073089136022601e-05, "loss": 1.8534, "step": 27142500 }, { "epoch": 78.57, "learning_rate": 1.0730167712578733e-05, "loss": 1.8602, "step": 27143000 }, { "epoch": 78.57, "learning_rate": 1.0729444064931457e-05, "loss": 1.8298, "step": 27143500 }, { "epoch": 78.57, "learning_rate": 1.072872041728418e-05, "loss": 1.8411, "step": 27144000 }, { "epoch": 78.57, "learning_rate": 1.0727996769636903e-05, "loss": 1.8585, "step": 27144500 }, { "epoch": 78.57, "learning_rate": 1.072727456928492e-05, "loss": 1.8538, "step": 27145000 }, { "epoch": 78.58, "learning_rate": 1.0726550921637644e-05, "loss": 1.8645, "step": 27145500 }, { "epoch": 78.58, "learning_rate": 1.0725827273990368e-05, "loss": 1.8516, "step": 27146000 }, { "epoch": 78.58, "learning_rate": 1.072510362634309e-05, "loss": 1.8416, "step": 27146500 }, { "epoch": 78.58, "learning_rate": 1.0724379978695814e-05, "loss": 1.8822, "step": 27147000 }, { "epoch": 78.58, "learning_rate": 1.0723656331048538e-05, "loss": 1.8388, "step": 27147500 }, { "epoch": 78.58, "learning_rate": 1.072293268340126e-05, "loss": 1.8484, "step": 27148000 }, { "epoch": 78.58, "learning_rate": 1.0722210483049277e-05, "loss": 1.8805, "step": 27148500 }, { "epoch": 78.59, "learning_rate": 1.0721486835402001e-05, "loss": 1.8552, "step": 27149000 }, { "epoch": 78.59, "learning_rate": 1.0720763187754725e-05, "loss": 1.8262, "step": 27149500 }, { "epoch": 78.59, "learning_rate": 1.0720039540107448e-05, "loss": 1.8707, "step": 27150000 }, { "epoch": 78.59, "learning_rate": 1.071931589246017e-05, "loss": 1.8482, "step": 27150500 }, { "epoch": 78.59, "learning_rate": 1.0718592244812894e-05, "loss": 1.8698, "step": 27151000 }, { "epoch": 78.59, "learning_rate": 1.0717868597165618e-05, "loss": 1.8704, "step": 27151500 }, { "epoch": 78.59, "learning_rate": 1.071714494951834e-05, "loss": 1.8578, "step": 27152000 }, { "epoch": 78.6, "learning_rate": 1.0716421301871064e-05, "loss": 1.8457, "step": 27152500 }, { "epoch": 78.6, "learning_rate": 1.0715700548814376e-05, "loss": 1.8908, "step": 27153000 }, { "epoch": 78.6, "learning_rate": 1.07149769011671e-05, "loss": 1.8543, "step": 27153500 }, { "epoch": 78.6, "learning_rate": 1.0714253253519822e-05, "loss": 1.8577, "step": 27154000 }, { "epoch": 78.6, "learning_rate": 1.0713529605872546e-05, "loss": 1.8561, "step": 27154500 }, { "epoch": 78.6, "learning_rate": 1.071280595822527e-05, "loss": 1.8572, "step": 27155000 }, { "epoch": 78.6, "learning_rate": 1.0712082310577992e-05, "loss": 1.8403, "step": 27155500 }, { "epoch": 78.61, "learning_rate": 1.0711358662930715e-05, "loss": 1.8633, "step": 27156000 }, { "epoch": 78.61, "learning_rate": 1.0710636462578733e-05, "loss": 1.8352, "step": 27156500 }, { "epoch": 78.61, "learning_rate": 1.0709912814931457e-05, "loss": 1.8333, "step": 27157000 }, { "epoch": 78.61, "learning_rate": 1.070918916728418e-05, "loss": 1.8539, "step": 27157500 }, { "epoch": 78.61, "learning_rate": 1.0708465519636902e-05, "loss": 1.8334, "step": 27158000 }, { "epoch": 78.61, "learning_rate": 1.0707741871989626e-05, "loss": 1.8636, "step": 27158500 }, { "epoch": 78.61, "learning_rate": 1.070701822434235e-05, "loss": 1.8304, "step": 27159000 }, { "epoch": 78.62, "learning_rate": 1.0706296023990367e-05, "loss": 1.8493, "step": 27159500 }, { "epoch": 78.62, "learning_rate": 1.0705572376343091e-05, "loss": 1.8215, "step": 27160000 }, { "epoch": 78.62, "learning_rate": 1.0704848728695815e-05, "loss": 1.8563, "step": 27160500 }, { "epoch": 78.62, "learning_rate": 1.0704125081048537e-05, "loss": 1.8599, "step": 27161000 }, { "epoch": 78.62, "learning_rate": 1.0703402880696554e-05, "loss": 1.865, "step": 27161500 }, { "epoch": 78.62, "learning_rate": 1.0702679233049278e-05, "loss": 1.8613, "step": 27162000 }, { "epoch": 78.62, "learning_rate": 1.0701955585402002e-05, "loss": 1.8387, "step": 27162500 }, { "epoch": 78.63, "learning_rate": 1.0701231937754724e-05, "loss": 1.8712, "step": 27163000 }, { "epoch": 78.63, "learning_rate": 1.0700509737402742e-05, "loss": 1.8538, "step": 27163500 }, { "epoch": 78.63, "learning_rate": 1.0699786089755466e-05, "loss": 1.8547, "step": 27164000 }, { "epoch": 78.63, "learning_rate": 1.069906244210819e-05, "loss": 1.8476, "step": 27164500 }, { "epoch": 78.63, "learning_rate": 1.0698338794460912e-05, "loss": 1.836, "step": 27165000 }, { "epoch": 78.63, "learning_rate": 1.0697615146813634e-05, "loss": 1.8332, "step": 27165500 }, { "epoch": 78.63, "learning_rate": 1.069689149916636e-05, "loss": 1.8524, "step": 27166000 }, { "epoch": 78.64, "learning_rate": 1.0696167851519082e-05, "loss": 1.8771, "step": 27166500 }, { "epoch": 78.64, "learning_rate": 1.0695444203871804e-05, "loss": 1.8672, "step": 27167000 }, { "epoch": 78.64, "learning_rate": 1.0694720556224528e-05, "loss": 1.877, "step": 27167500 }, { "epoch": 78.64, "learning_rate": 1.0693996908577252e-05, "loss": 1.8437, "step": 27168000 }, { "epoch": 78.64, "learning_rate": 1.0693273260929974e-05, "loss": 1.8422, "step": 27168500 }, { "epoch": 78.64, "learning_rate": 1.0692549613282696e-05, "loss": 1.8547, "step": 27169000 }, { "epoch": 78.64, "learning_rate": 1.0691827412930715e-05, "loss": 1.866, "step": 27169500 }, { "epoch": 78.65, "learning_rate": 1.0691105212578734e-05, "loss": 1.871, "step": 27170000 }, { "epoch": 78.65, "learning_rate": 1.0690381564931457e-05, "loss": 1.8541, "step": 27170500 }, { "epoch": 78.65, "learning_rate": 1.0689659364579474e-05, "loss": 1.8443, "step": 27171000 }, { "epoch": 78.65, "learning_rate": 1.0688935716932198e-05, "loss": 1.8608, "step": 27171500 }, { "epoch": 78.65, "learning_rate": 1.0688212069284922e-05, "loss": 1.8509, "step": 27172000 }, { "epoch": 78.65, "learning_rate": 1.0687488421637644e-05, "loss": 1.8315, "step": 27172500 }, { "epoch": 78.65, "learning_rate": 1.0686764773990366e-05, "loss": 1.8834, "step": 27173000 }, { "epoch": 78.66, "learning_rate": 1.0686041126343092e-05, "loss": 1.8531, "step": 27173500 }, { "epoch": 78.66, "learning_rate": 1.0685317478695814e-05, "loss": 1.8824, "step": 27174000 }, { "epoch": 78.66, "learning_rate": 1.0684593831048536e-05, "loss": 1.8711, "step": 27174500 }, { "epoch": 78.66, "learning_rate": 1.068387018340126e-05, "loss": 1.8684, "step": 27175000 }, { "epoch": 78.66, "learning_rate": 1.0683146535753984e-05, "loss": 1.8733, "step": 27175500 }, { "epoch": 78.66, "learning_rate": 1.0682424335402001e-05, "loss": 1.8353, "step": 27176000 }, { "epoch": 78.66, "learning_rate": 1.0681700687754724e-05, "loss": 1.8445, "step": 27176500 }, { "epoch": 78.67, "learning_rate": 1.0680977040107447e-05, "loss": 1.873, "step": 27177000 }, { "epoch": 78.67, "learning_rate": 1.0680253392460171e-05, "loss": 1.8679, "step": 27177500 }, { "epoch": 78.67, "learning_rate": 1.0679531192108189e-05, "loss": 1.8263, "step": 27178000 }, { "epoch": 78.67, "learning_rate": 1.067880754446091e-05, "loss": 1.8363, "step": 27178500 }, { "epoch": 78.67, "learning_rate": 1.0678083896813636e-05, "loss": 1.8623, "step": 27179000 }, { "epoch": 78.67, "learning_rate": 1.0677360249166359e-05, "loss": 1.8425, "step": 27179500 }, { "epoch": 78.67, "learning_rate": 1.0676636601519081e-05, "loss": 1.8542, "step": 27180000 }, { "epoch": 78.68, "learning_rate": 1.0675912953871805e-05, "loss": 1.8225, "step": 27180500 }, { "epoch": 78.68, "learning_rate": 1.0675189306224529e-05, "loss": 1.8467, "step": 27181000 }, { "epoch": 78.68, "learning_rate": 1.0674465658577251e-05, "loss": 1.8314, "step": 27181500 }, { "epoch": 78.68, "learning_rate": 1.0673743458225268e-05, "loss": 1.825, "step": 27182000 }, { "epoch": 78.68, "learning_rate": 1.0673019810577992e-05, "loss": 1.8196, "step": 27182500 }, { "epoch": 78.68, "learning_rate": 1.0672296162930716e-05, "loss": 1.8481, "step": 27183000 }, { "epoch": 78.69, "learning_rate": 1.0671572515283438e-05, "loss": 1.8528, "step": 27183500 }, { "epoch": 78.69, "learning_rate": 1.0670848867636162e-05, "loss": 1.8458, "step": 27184000 }, { "epoch": 78.69, "learning_rate": 1.0670125219988886e-05, "loss": 1.8195, "step": 27184500 }, { "epoch": 78.69, "learning_rate": 1.0669401572341608e-05, "loss": 1.8572, "step": 27185000 }, { "epoch": 78.69, "learning_rate": 1.0668679371989626e-05, "loss": 1.8333, "step": 27185500 }, { "epoch": 78.69, "learning_rate": 1.0667957171637643e-05, "loss": 1.846, "step": 27186000 }, { "epoch": 78.69, "learning_rate": 1.0667233523990369e-05, "loss": 1.8589, "step": 27186500 }, { "epoch": 78.7, "learning_rate": 1.066650987634309e-05, "loss": 1.856, "step": 27187000 }, { "epoch": 78.7, "learning_rate": 1.0665786228695813e-05, "loss": 1.8684, "step": 27187500 }, { "epoch": 78.7, "learning_rate": 1.0665062581048537e-05, "loss": 1.8604, "step": 27188000 }, { "epoch": 78.7, "learning_rate": 1.0664338933401261e-05, "loss": 1.8663, "step": 27188500 }, { "epoch": 78.7, "learning_rate": 1.0663615285753983e-05, "loss": 1.8648, "step": 27189000 }, { "epoch": 78.7, "learning_rate": 1.0662891638106707e-05, "loss": 1.8627, "step": 27189500 }, { "epoch": 78.7, "learning_rate": 1.0662167990459431e-05, "loss": 1.8654, "step": 27190000 }, { "epoch": 78.71, "learning_rate": 1.0661444342812153e-05, "loss": 1.8517, "step": 27190500 }, { "epoch": 78.71, "learning_rate": 1.066072214246017e-05, "loss": 1.8321, "step": 27191000 }, { "epoch": 78.71, "learning_rate": 1.0659998494812894e-05, "loss": 1.8765, "step": 27191500 }, { "epoch": 78.71, "learning_rate": 1.0659276294460913e-05, "loss": 1.8649, "step": 27192000 }, { "epoch": 78.71, "learning_rate": 1.0658552646813636e-05, "loss": 1.8548, "step": 27192500 }, { "epoch": 78.71, "learning_rate": 1.0657828999166358e-05, "loss": 1.837, "step": 27193000 }, { "epoch": 78.71, "learning_rate": 1.0657105351519082e-05, "loss": 1.863, "step": 27193500 }, { "epoch": 78.72, "learning_rate": 1.0656381703871806e-05, "loss": 1.8749, "step": 27194000 }, { "epoch": 78.72, "learning_rate": 1.0655658056224528e-05, "loss": 1.8612, "step": 27194500 }, { "epoch": 78.72, "learning_rate": 1.065493440857725e-05, "loss": 1.8579, "step": 27195000 }, { "epoch": 78.72, "learning_rate": 1.0654210760929976e-05, "loss": 1.8634, "step": 27195500 }, { "epoch": 78.72, "learning_rate": 1.0653488560577993e-05, "loss": 1.8568, "step": 27196000 }, { "epoch": 78.72, "learning_rate": 1.0652764912930715e-05, "loss": 1.8472, "step": 27196500 }, { "epoch": 78.72, "learning_rate": 1.065204126528344e-05, "loss": 1.8502, "step": 27197000 }, { "epoch": 78.73, "learning_rate": 1.0651317617636163e-05, "loss": 1.8531, "step": 27197500 }, { "epoch": 78.73, "learning_rate": 1.0650593969988885e-05, "loss": 1.8371, "step": 27198000 }, { "epoch": 78.73, "learning_rate": 1.0649870322341608e-05, "loss": 1.8637, "step": 27198500 }, { "epoch": 78.73, "learning_rate": 1.0649146674694331e-05, "loss": 1.8321, "step": 27199000 }, { "epoch": 78.73, "learning_rate": 1.0648423027047055e-05, "loss": 1.8408, "step": 27199500 }, { "epoch": 78.73, "learning_rate": 1.0647700826695073e-05, "loss": 1.8293, "step": 27200000 }, { "epoch": 78.73, "learning_rate": 1.0646977179047795e-05, "loss": 1.8555, "step": 27200500 }, { "epoch": 78.74, "learning_rate": 1.064625353140052e-05, "loss": 1.8507, "step": 27201000 }, { "epoch": 78.74, "learning_rate": 1.0645529883753243e-05, "loss": 1.8666, "step": 27201500 }, { "epoch": 78.74, "learning_rate": 1.0644809130696555e-05, "loss": 1.8457, "step": 27202000 }, { "epoch": 78.74, "learning_rate": 1.0644085483049277e-05, "loss": 1.8259, "step": 27202500 }, { "epoch": 78.74, "learning_rate": 1.0643363282697296e-05, "loss": 1.8802, "step": 27203000 }, { "epoch": 78.74, "learning_rate": 1.064263963505002e-05, "loss": 1.878, "step": 27203500 }, { "epoch": 78.74, "learning_rate": 1.0641915987402742e-05, "loss": 1.854, "step": 27204000 }, { "epoch": 78.75, "learning_rate": 1.0641192339755465e-05, "loss": 1.8438, "step": 27204500 }, { "epoch": 78.75, "learning_rate": 1.0640468692108189e-05, "loss": 1.8591, "step": 27205000 }, { "epoch": 78.75, "learning_rate": 1.0639745044460912e-05, "loss": 1.8554, "step": 27205500 }, { "epoch": 78.75, "learning_rate": 1.063902284410893e-05, "loss": 1.8512, "step": 27206000 }, { "epoch": 78.75, "learning_rate": 1.0638299196461652e-05, "loss": 1.8602, "step": 27206500 }, { "epoch": 78.75, "learning_rate": 1.0637575548814376e-05, "loss": 1.8388, "step": 27207000 }, { "epoch": 78.75, "learning_rate": 1.06368519011671e-05, "loss": 1.8364, "step": 27207500 }, { "epoch": 78.76, "learning_rate": 1.0636128253519822e-05, "loss": 1.8361, "step": 27208000 }, { "epoch": 78.76, "learning_rate": 1.0635404605872546e-05, "loss": 1.837, "step": 27208500 }, { "epoch": 78.76, "learning_rate": 1.063468095822527e-05, "loss": 1.8634, "step": 27209000 }, { "epoch": 78.76, "learning_rate": 1.0633957310577992e-05, "loss": 1.8538, "step": 27209500 }, { "epoch": 78.76, "learning_rate": 1.0633233662930716e-05, "loss": 1.8618, "step": 27210000 }, { "epoch": 78.76, "learning_rate": 1.063251001528344e-05, "loss": 1.862, "step": 27210500 }, { "epoch": 78.76, "learning_rate": 1.0631787814931457e-05, "loss": 1.8322, "step": 27211000 }, { "epoch": 78.77, "learning_rate": 1.063106416728418e-05, "loss": 1.8516, "step": 27211500 }, { "epoch": 78.77, "learning_rate": 1.0630340519636903e-05, "loss": 1.8582, "step": 27212000 }, { "epoch": 78.77, "learning_rate": 1.0629616871989627e-05, "loss": 1.8722, "step": 27212500 }, { "epoch": 78.77, "learning_rate": 1.062889322434235e-05, "loss": 1.8415, "step": 27213000 }, { "epoch": 78.77, "learning_rate": 1.0628169576695072e-05, "loss": 1.8696, "step": 27213500 }, { "epoch": 78.77, "learning_rate": 1.0627445929047796e-05, "loss": 1.8112, "step": 27214000 }, { "epoch": 78.77, "learning_rate": 1.0626723728695815e-05, "loss": 1.8839, "step": 27214500 }, { "epoch": 78.78, "learning_rate": 1.0626000081048537e-05, "loss": 1.8338, "step": 27215000 }, { "epoch": 78.78, "learning_rate": 1.062527643340126e-05, "loss": 1.8363, "step": 27215500 }, { "epoch": 78.78, "learning_rate": 1.0624554233049278e-05, "loss": 1.8441, "step": 27216000 }, { "epoch": 78.78, "learning_rate": 1.0623830585402002e-05, "loss": 1.8509, "step": 27216500 }, { "epoch": 78.78, "learning_rate": 1.0623106937754724e-05, "loss": 1.8345, "step": 27217000 }, { "epoch": 78.78, "learning_rate": 1.0622383290107448e-05, "loss": 1.8673, "step": 27217500 }, { "epoch": 78.78, "learning_rate": 1.062165964246017e-05, "loss": 1.8313, "step": 27218000 }, { "epoch": 78.79, "learning_rate": 1.0620935994812894e-05, "loss": 1.8625, "step": 27218500 }, { "epoch": 78.79, "learning_rate": 1.0620212347165617e-05, "loss": 1.8282, "step": 27219000 }, { "epoch": 78.79, "learning_rate": 1.061948869951834e-05, "loss": 1.8488, "step": 27219500 }, { "epoch": 78.79, "learning_rate": 1.0618765051871064e-05, "loss": 1.8558, "step": 27220000 }, { "epoch": 78.79, "learning_rate": 1.0618041404223787e-05, "loss": 1.8487, "step": 27220500 }, { "epoch": 78.79, "learning_rate": 1.061731775657651e-05, "loss": 1.8758, "step": 27221000 }, { "epoch": 78.8, "learning_rate": 1.0616595556224528e-05, "loss": 1.8573, "step": 27221500 }, { "epoch": 78.8, "learning_rate": 1.0615871908577252e-05, "loss": 1.8611, "step": 27222000 }, { "epoch": 78.8, "learning_rate": 1.0615148260929974e-05, "loss": 1.8219, "step": 27222500 }, { "epoch": 78.8, "learning_rate": 1.0614426060577993e-05, "loss": 1.8777, "step": 27223000 }, { "epoch": 78.8, "learning_rate": 1.0613702412930715e-05, "loss": 1.8372, "step": 27223500 }, { "epoch": 78.8, "learning_rate": 1.0612978765283439e-05, "loss": 1.8641, "step": 27224000 }, { "epoch": 78.8, "learning_rate": 1.0612255117636161e-05, "loss": 1.8513, "step": 27224500 }, { "epoch": 78.81, "learning_rate": 1.0611531469988885e-05, "loss": 1.8466, "step": 27225000 }, { "epoch": 78.81, "learning_rate": 1.0610807822341609e-05, "loss": 1.8468, "step": 27225500 }, { "epoch": 78.81, "learning_rate": 1.0610084174694331e-05, "loss": 1.841, "step": 27226000 }, { "epoch": 78.81, "learning_rate": 1.0609360527047055e-05, "loss": 1.8558, "step": 27226500 }, { "epoch": 78.81, "learning_rate": 1.0608638326695073e-05, "loss": 1.85, "step": 27227000 }, { "epoch": 78.81, "learning_rate": 1.0607914679047796e-05, "loss": 1.8762, "step": 27227500 }, { "epoch": 78.81, "learning_rate": 1.0607191031400519e-05, "loss": 1.8638, "step": 27228000 }, { "epoch": 78.82, "learning_rate": 1.0606468831048538e-05, "loss": 1.8611, "step": 27228500 }, { "epoch": 78.82, "learning_rate": 1.060574518340126e-05, "loss": 1.8584, "step": 27229000 }, { "epoch": 78.82, "learning_rate": 1.0605021535753984e-05, "loss": 1.8532, "step": 27229500 }, { "epoch": 78.82, "learning_rate": 1.0604297888106706e-05, "loss": 1.8467, "step": 27230000 }, { "epoch": 78.82, "learning_rate": 1.060357424045943e-05, "loss": 1.8349, "step": 27230500 }, { "epoch": 78.82, "learning_rate": 1.0602850592812154e-05, "loss": 1.8812, "step": 27231000 }, { "epoch": 78.82, "learning_rate": 1.0602128392460171e-05, "loss": 1.8314, "step": 27231500 }, { "epoch": 78.83, "learning_rate": 1.0601404744812893e-05, "loss": 1.8569, "step": 27232000 }, { "epoch": 78.83, "learning_rate": 1.0600681097165617e-05, "loss": 1.859, "step": 27232500 }, { "epoch": 78.83, "learning_rate": 1.0599957449518341e-05, "loss": 1.8495, "step": 27233000 }, { "epoch": 78.83, "learning_rate": 1.0599233801871063e-05, "loss": 1.85, "step": 27233500 }, { "epoch": 78.83, "learning_rate": 1.0598510154223787e-05, "loss": 1.8578, "step": 27234000 }, { "epoch": 78.83, "learning_rate": 1.059778650657651e-05, "loss": 1.8623, "step": 27234500 }, { "epoch": 78.83, "learning_rate": 1.0597062858929234e-05, "loss": 1.8481, "step": 27235000 }, { "epoch": 78.84, "learning_rate": 1.0596339211281957e-05, "loss": 1.8574, "step": 27235500 }, { "epoch": 78.84, "learning_rate": 1.059561556363468e-05, "loss": 1.8456, "step": 27236000 }, { "epoch": 78.84, "learning_rate": 1.0594891915987404e-05, "loss": 1.8494, "step": 27236500 }, { "epoch": 78.84, "learning_rate": 1.0594169715635421e-05, "loss": 1.8363, "step": 27237000 }, { "epoch": 78.84, "learning_rate": 1.0593446067988145e-05, "loss": 1.8362, "step": 27237500 }, { "epoch": 78.84, "learning_rate": 1.0592722420340867e-05, "loss": 1.8636, "step": 27238000 }, { "epoch": 78.84, "learning_rate": 1.0591998772693591e-05, "loss": 1.8763, "step": 27238500 }, { "epoch": 78.85, "learning_rate": 1.0591275125046315e-05, "loss": 1.8268, "step": 27239000 }, { "epoch": 78.85, "learning_rate": 1.0590551477399037e-05, "loss": 1.8565, "step": 27239500 }, { "epoch": 78.85, "learning_rate": 1.058982782975176e-05, "loss": 1.8441, "step": 27240000 }, { "epoch": 78.85, "learning_rate": 1.0589104182104483e-05, "loss": 1.8619, "step": 27240500 }, { "epoch": 78.85, "learning_rate": 1.0588381981752502e-05, "loss": 1.8311, "step": 27241000 }, { "epoch": 78.85, "learning_rate": 1.0587658334105224e-05, "loss": 1.8401, "step": 27241500 }, { "epoch": 78.85, "learning_rate": 1.0586934686457948e-05, "loss": 1.8523, "step": 27242000 }, { "epoch": 78.86, "learning_rate": 1.058621103881067e-05, "loss": 1.8583, "step": 27242500 }, { "epoch": 78.86, "learning_rate": 1.0585490285753983e-05, "loss": 1.8575, "step": 27243000 }, { "epoch": 78.86, "learning_rate": 1.0584766638106707e-05, "loss": 1.8679, "step": 27243500 }, { "epoch": 78.86, "learning_rate": 1.0584042990459429e-05, "loss": 1.8569, "step": 27244000 }, { "epoch": 78.86, "learning_rate": 1.0583319342812153e-05, "loss": 1.8606, "step": 27244500 }, { "epoch": 78.86, "learning_rate": 1.058259714246017e-05, "loss": 1.8555, "step": 27245000 }, { "epoch": 78.86, "learning_rate": 1.0581873494812894e-05, "loss": 1.843, "step": 27245500 }, { "epoch": 78.87, "learning_rate": 1.0581149847165616e-05, "loss": 1.8476, "step": 27246000 }, { "epoch": 78.87, "learning_rate": 1.058042619951834e-05, "loss": 1.8609, "step": 27246500 }, { "epoch": 78.87, "learning_rate": 1.0579702551871064e-05, "loss": 1.8593, "step": 27247000 }, { "epoch": 78.87, "learning_rate": 1.0578978904223787e-05, "loss": 1.8618, "step": 27247500 }, { "epoch": 78.87, "learning_rate": 1.057825525657651e-05, "loss": 1.8541, "step": 27248000 }, { "epoch": 78.87, "learning_rate": 1.0577531608929234e-05, "loss": 1.8718, "step": 27248500 }, { "epoch": 78.87, "learning_rate": 1.0576809408577252e-05, "loss": 1.8685, "step": 27249000 }, { "epoch": 78.88, "learning_rate": 1.0576085760929974e-05, "loss": 1.8355, "step": 27249500 }, { "epoch": 78.88, "learning_rate": 1.0575362113282698e-05, "loss": 1.8871, "step": 27250000 }, { "epoch": 78.88, "learning_rate": 1.0574638465635422e-05, "loss": 1.8854, "step": 27250500 }, { "epoch": 78.88, "learning_rate": 1.0573914817988144e-05, "loss": 1.8591, "step": 27251000 }, { "epoch": 78.88, "learning_rate": 1.0573192617636161e-05, "loss": 1.8584, "step": 27251500 }, { "epoch": 78.88, "learning_rate": 1.057247041728418e-05, "loss": 1.8644, "step": 27252000 }, { "epoch": 78.88, "learning_rate": 1.0571746769636904e-05, "loss": 1.8408, "step": 27252500 }, { "epoch": 78.89, "learning_rate": 1.0571024569284921e-05, "loss": 1.8725, "step": 27253000 }, { "epoch": 78.89, "learning_rate": 1.0570300921637644e-05, "loss": 1.8538, "step": 27253500 }, { "epoch": 78.89, "learning_rate": 1.0569577273990367e-05, "loss": 1.8408, "step": 27254000 }, { "epoch": 78.89, "learning_rate": 1.0568853626343091e-05, "loss": 1.8692, "step": 27254500 }, { "epoch": 78.89, "learning_rate": 1.0568129978695814e-05, "loss": 1.8424, "step": 27255000 }, { "epoch": 78.89, "learning_rate": 1.0567406331048536e-05, "loss": 1.8382, "step": 27255500 }, { "epoch": 78.89, "learning_rate": 1.056668268340126e-05, "loss": 1.8538, "step": 27256000 }, { "epoch": 78.9, "learning_rate": 1.0565959035753984e-05, "loss": 1.8663, "step": 27256500 }, { "epoch": 78.9, "learning_rate": 1.0565236835402001e-05, "loss": 1.8594, "step": 27257000 }, { "epoch": 78.9, "learning_rate": 1.0564513187754725e-05, "loss": 1.8577, "step": 27257500 }, { "epoch": 78.9, "learning_rate": 1.0563789540107449e-05, "loss": 1.8371, "step": 27258000 }, { "epoch": 78.9, "learning_rate": 1.0563065892460171e-05, "loss": 1.85, "step": 27258500 }, { "epoch": 78.9, "learning_rate": 1.0562342244812893e-05, "loss": 1.8791, "step": 27259000 }, { "epoch": 78.91, "learning_rate": 1.0561618597165617e-05, "loss": 1.8496, "step": 27259500 }, { "epoch": 78.91, "learning_rate": 1.0560894949518341e-05, "loss": 1.8422, "step": 27260000 }, { "epoch": 78.91, "learning_rate": 1.0560171301871063e-05, "loss": 1.859, "step": 27260500 }, { "epoch": 78.91, "learning_rate": 1.0559447654223787e-05, "loss": 1.8234, "step": 27261000 }, { "epoch": 78.91, "learning_rate": 1.0558724006576511e-05, "loss": 1.8534, "step": 27261500 }, { "epoch": 78.91, "learning_rate": 1.0558000358929233e-05, "loss": 1.8483, "step": 27262000 }, { "epoch": 78.91, "learning_rate": 1.055727815857725e-05, "loss": 1.8552, "step": 27262500 }, { "epoch": 78.92, "learning_rate": 1.0556554510929975e-05, "loss": 1.8494, "step": 27263000 }, { "epoch": 78.92, "learning_rate": 1.0555830863282699e-05, "loss": 1.8486, "step": 27263500 }, { "epoch": 78.92, "learning_rate": 1.055510721563542e-05, "loss": 1.849, "step": 27264000 }, { "epoch": 78.92, "learning_rate": 1.0554383567988143e-05, "loss": 1.8497, "step": 27264500 }, { "epoch": 78.92, "learning_rate": 1.0553659920340869e-05, "loss": 1.8609, "step": 27265000 }, { "epoch": 78.92, "learning_rate": 1.0552937719988886e-05, "loss": 1.8585, "step": 27265500 }, { "epoch": 78.92, "learning_rate": 1.0552215519636903e-05, "loss": 1.852, "step": 27266000 }, { "epoch": 78.93, "learning_rate": 1.0551491871989625e-05, "loss": 1.8574, "step": 27266500 }, { "epoch": 78.93, "learning_rate": 1.055076822434235e-05, "loss": 1.8465, "step": 27267000 }, { "epoch": 78.93, "learning_rate": 1.0550044576695073e-05, "loss": 1.8526, "step": 27267500 }, { "epoch": 78.93, "learning_rate": 1.0549320929047795e-05, "loss": 1.8604, "step": 27268000 }, { "epoch": 78.93, "learning_rate": 1.054859728140052e-05, "loss": 1.8405, "step": 27268500 }, { "epoch": 78.93, "learning_rate": 1.0547875081048537e-05, "loss": 1.8461, "step": 27269000 }, { "epoch": 78.93, "learning_rate": 1.054715143340126e-05, "loss": 1.8813, "step": 27269500 }, { "epoch": 78.94, "learning_rate": 1.0546427785753983e-05, "loss": 1.8505, "step": 27270000 }, { "epoch": 78.94, "learning_rate": 1.0545704138106707e-05, "loss": 1.8637, "step": 27270500 }, { "epoch": 78.94, "learning_rate": 1.054498049045943e-05, "loss": 1.8784, "step": 27271000 }, { "epoch": 78.94, "learning_rate": 1.0544256842812153e-05, "loss": 1.8639, "step": 27271500 }, { "epoch": 78.94, "learning_rate": 1.0543533195164875e-05, "loss": 1.8439, "step": 27272000 }, { "epoch": 78.94, "learning_rate": 1.05428095475176e-05, "loss": 1.8388, "step": 27272500 }, { "epoch": 78.94, "learning_rate": 1.0542085899870323e-05, "loss": 1.8365, "step": 27273000 }, { "epoch": 78.95, "learning_rate": 1.054136369951834e-05, "loss": 1.8685, "step": 27273500 }, { "epoch": 78.95, "learning_rate": 1.0540640051871062e-05, "loss": 1.8634, "step": 27274000 }, { "epoch": 78.95, "learning_rate": 1.0539916404223788e-05, "loss": 1.8633, "step": 27274500 }, { "epoch": 78.95, "learning_rate": 1.053919275657651e-05, "loss": 1.8362, "step": 27275000 }, { "epoch": 78.95, "learning_rate": 1.0538470556224528e-05, "loss": 1.8362, "step": 27275500 }, { "epoch": 78.95, "learning_rate": 1.0537746908577252e-05, "loss": 1.8726, "step": 27276000 }, { "epoch": 78.95, "learning_rate": 1.0537023260929975e-05, "loss": 1.856, "step": 27276500 }, { "epoch": 78.96, "learning_rate": 1.0536299613282698e-05, "loss": 1.8707, "step": 27277000 }, { "epoch": 78.96, "learning_rate": 1.053557596563542e-05, "loss": 1.8488, "step": 27277500 }, { "epoch": 78.96, "learning_rate": 1.0534853765283439e-05, "loss": 1.8509, "step": 27278000 }, { "epoch": 78.96, "learning_rate": 1.0534130117636163e-05, "loss": 1.8412, "step": 27278500 }, { "epoch": 78.96, "learning_rate": 1.0533406469988885e-05, "loss": 1.865, "step": 27279000 }, { "epoch": 78.96, "learning_rate": 1.0532682822341607e-05, "loss": 1.8442, "step": 27279500 }, { "epoch": 78.96, "learning_rate": 1.0531959174694333e-05, "loss": 1.8422, "step": 27280000 }, { "epoch": 78.97, "learning_rate": 1.0531235527047055e-05, "loss": 1.8257, "step": 27280500 }, { "epoch": 78.97, "learning_rate": 1.0530511879399777e-05, "loss": 1.8487, "step": 27281000 }, { "epoch": 78.97, "learning_rate": 1.0529788231752501e-05, "loss": 1.8617, "step": 27281500 }, { "epoch": 78.97, "learning_rate": 1.0529064584105225e-05, "loss": 1.8615, "step": 27282000 }, { "epoch": 78.97, "learning_rate": 1.0528342383753242e-05, "loss": 1.8458, "step": 27282500 }, { "epoch": 78.97, "learning_rate": 1.0527618736105965e-05, "loss": 1.8584, "step": 27283000 }, { "epoch": 78.97, "learning_rate": 1.052689508845869e-05, "loss": 1.8678, "step": 27283500 }, { "epoch": 78.98, "learning_rate": 1.0526171440811413e-05, "loss": 1.8548, "step": 27284000 }, { "epoch": 78.98, "learning_rate": 1.052544924045943e-05, "loss": 1.8527, "step": 27284500 }, { "epoch": 78.98, "learning_rate": 1.0524727040107447e-05, "loss": 1.8696, "step": 27285000 }, { "epoch": 78.98, "learning_rate": 1.0524003392460171e-05, "loss": 1.8737, "step": 27285500 }, { "epoch": 78.98, "learning_rate": 1.0523279744812895e-05, "loss": 1.8588, "step": 27286000 }, { "epoch": 78.98, "learning_rate": 1.0522556097165617e-05, "loss": 1.8653, "step": 27286500 }, { "epoch": 78.98, "learning_rate": 1.052183244951834e-05, "loss": 1.8543, "step": 27287000 }, { "epoch": 78.99, "learning_rate": 1.0521108801871065e-05, "loss": 1.8492, "step": 27287500 }, { "epoch": 78.99, "learning_rate": 1.0520385154223787e-05, "loss": 1.8664, "step": 27288000 }, { "epoch": 78.99, "learning_rate": 1.051966150657651e-05, "loss": 1.8615, "step": 27288500 }, { "epoch": 78.99, "learning_rate": 1.0518939306224528e-05, "loss": 1.8571, "step": 27289000 }, { "epoch": 78.99, "learning_rate": 1.0518215658577252e-05, "loss": 1.8749, "step": 27289500 }, { "epoch": 78.99, "learning_rate": 1.0517492010929975e-05, "loss": 1.8434, "step": 27290000 }, { "epoch": 78.99, "learning_rate": 1.0516769810577992e-05, "loss": 1.8481, "step": 27290500 }, { "epoch": 79.0, "learning_rate": 1.0516047610226009e-05, "loss": 1.8619, "step": 27291000 }, { "epoch": 79.0, "learning_rate": 1.0515325409874028e-05, "loss": 1.8464, "step": 27291500 }, { "epoch": 79.0, "learning_rate": 1.0514601762226752e-05, "loss": 1.8542, "step": 27292000 }, { "epoch": 79.0, "eval_accuracy": 0.6850552637273573, "eval_accuracy_mlm": 0.6540332423012534, "eval_accuracy_nsp": 0.8514686538981544, "eval_loss": 2.189836263656616, "eval_runtime": 334.9805, "eval_samples_per_second": 1302.721, "eval_steps_per_second": 54.281, "step": 27292288 }, { "epoch": 79.0, "learning_rate": 1.0513878114579474e-05, "loss": 1.8447, "step": 27292500 }, { "epoch": 79.0, "learning_rate": 1.0513154466932196e-05, "loss": 1.865, "step": 27293000 }, { "epoch": 79.0, "learning_rate": 1.0512430819284922e-05, "loss": 1.8745, "step": 27293500 }, { "epoch": 79.0, "learning_rate": 1.051170861893294e-05, "loss": 1.8448, "step": 27294000 }, { "epoch": 79.01, "learning_rate": 1.0510984971285661e-05, "loss": 1.8455, "step": 27294500 }, { "epoch": 79.01, "learning_rate": 1.0510261323638384e-05, "loss": 1.8469, "step": 27295000 }, { "epoch": 79.01, "learning_rate": 1.050953767599111e-05, "loss": 1.8447, "step": 27295500 }, { "epoch": 79.01, "learning_rate": 1.0508814028343832e-05, "loss": 1.817, "step": 27296000 }, { "epoch": 79.01, "learning_rate": 1.0508090380696554e-05, "loss": 1.8682, "step": 27296500 }, { "epoch": 79.01, "learning_rate": 1.050736673304928e-05, "loss": 1.8316, "step": 27297000 }, { "epoch": 79.02, "learning_rate": 1.0506643085402002e-05, "loss": 1.8368, "step": 27297500 }, { "epoch": 79.02, "learning_rate": 1.0505919437754724e-05, "loss": 1.8646, "step": 27298000 }, { "epoch": 79.02, "learning_rate": 1.0505195790107448e-05, "loss": 1.8494, "step": 27298500 }, { "epoch": 79.02, "learning_rate": 1.0504472142460172e-05, "loss": 1.8147, "step": 27299000 }, { "epoch": 79.02, "learning_rate": 1.0503748494812894e-05, "loss": 1.8528, "step": 27299500 }, { "epoch": 79.02, "learning_rate": 1.0503024847165616e-05, "loss": 1.834, "step": 27300000 }, { "epoch": 79.02, "learning_rate": 1.0502302646813635e-05, "loss": 1.8273, "step": 27300500 }, { "epoch": 79.03, "learning_rate": 1.0501578999166359e-05, "loss": 1.8711, "step": 27301000 }, { "epoch": 79.03, "learning_rate": 1.0500855351519081e-05, "loss": 1.8414, "step": 27301500 }, { "epoch": 79.03, "learning_rate": 1.0500131703871805e-05, "loss": 1.8264, "step": 27302000 }, { "epoch": 79.03, "learning_rate": 1.049940805622453e-05, "loss": 1.8391, "step": 27302500 }, { "epoch": 79.03, "learning_rate": 1.0498684408577251e-05, "loss": 1.8355, "step": 27303000 }, { "epoch": 79.03, "learning_rate": 1.0497962208225269e-05, "loss": 1.8215, "step": 27303500 }, { "epoch": 79.03, "learning_rate": 1.0497238560577993e-05, "loss": 1.8559, "step": 27304000 }, { "epoch": 79.04, "learning_rate": 1.0496514912930717e-05, "loss": 1.83, "step": 27304500 }, { "epoch": 79.04, "learning_rate": 1.0495791265283439e-05, "loss": 1.8467, "step": 27305000 }, { "epoch": 79.04, "learning_rate": 1.0495067617636161e-05, "loss": 1.8302, "step": 27305500 }, { "epoch": 79.04, "learning_rate": 1.0494343969988885e-05, "loss": 1.8377, "step": 27306000 }, { "epoch": 79.04, "learning_rate": 1.0493620322341609e-05, "loss": 1.8415, "step": 27306500 }, { "epoch": 79.04, "learning_rate": 1.0492896674694331e-05, "loss": 1.8564, "step": 27307000 }, { "epoch": 79.04, "learning_rate": 1.049217447434235e-05, "loss": 1.8172, "step": 27307500 }, { "epoch": 79.05, "learning_rate": 1.0491450826695074e-05, "loss": 1.8314, "step": 27308000 }, { "epoch": 79.05, "learning_rate": 1.0490728626343091e-05, "loss": 1.8654, "step": 27308500 }, { "epoch": 79.05, "learning_rate": 1.0490004978695813e-05, "loss": 1.8448, "step": 27309000 }, { "epoch": 79.05, "learning_rate": 1.0489281331048537e-05, "loss": 1.8206, "step": 27309500 }, { "epoch": 79.05, "learning_rate": 1.0488557683401261e-05, "loss": 1.8654, "step": 27310000 }, { "epoch": 79.05, "learning_rate": 1.0487835483049279e-05, "loss": 1.8329, "step": 27310500 }, { "epoch": 79.05, "learning_rate": 1.0487111835402e-05, "loss": 1.8448, "step": 27311000 }, { "epoch": 79.06, "learning_rate": 1.0486388187754725e-05, "loss": 1.8647, "step": 27311500 }, { "epoch": 79.06, "learning_rate": 1.0485664540107449e-05, "loss": 1.8345, "step": 27312000 }, { "epoch": 79.06, "learning_rate": 1.0484940892460171e-05, "loss": 1.8403, "step": 27312500 }, { "epoch": 79.06, "learning_rate": 1.0484218692108188e-05, "loss": 1.8416, "step": 27313000 }, { "epoch": 79.06, "learning_rate": 1.0483495044460912e-05, "loss": 1.8412, "step": 27313500 }, { "epoch": 79.06, "learning_rate": 1.0482771396813636e-05, "loss": 1.8364, "step": 27314000 }, { "epoch": 79.06, "learning_rate": 1.0482047749166358e-05, "loss": 1.8228, "step": 27314500 }, { "epoch": 79.07, "learning_rate": 1.0481324101519082e-05, "loss": 1.8346, "step": 27315000 }, { "epoch": 79.07, "learning_rate": 1.04806019011671e-05, "loss": 1.8463, "step": 27315500 }, { "epoch": 79.07, "learning_rate": 1.0479878253519823e-05, "loss": 1.8671, "step": 27316000 }, { "epoch": 79.07, "learning_rate": 1.0479154605872546e-05, "loss": 1.8588, "step": 27316500 }, { "epoch": 79.07, "learning_rate": 1.047843095822527e-05, "loss": 1.8454, "step": 27317000 }, { "epoch": 79.07, "learning_rate": 1.0477708757873287e-05, "loss": 1.8553, "step": 27317500 }, { "epoch": 79.07, "learning_rate": 1.047698511022601e-05, "loss": 1.841, "step": 27318000 }, { "epoch": 79.08, "learning_rate": 1.0476261462578733e-05, "loss": 1.8501, "step": 27318500 }, { "epoch": 79.08, "learning_rate": 1.0475537814931457e-05, "loss": 1.8658, "step": 27319000 }, { "epoch": 79.08, "learning_rate": 1.047481416728418e-05, "loss": 1.8388, "step": 27319500 }, { "epoch": 79.08, "learning_rate": 1.0474090519636903e-05, "loss": 1.8375, "step": 27320000 }, { "epoch": 79.08, "learning_rate": 1.0473366871989627e-05, "loss": 1.8357, "step": 27320500 }, { "epoch": 79.08, "learning_rate": 1.0472643224342349e-05, "loss": 1.8395, "step": 27321000 }, { "epoch": 79.08, "learning_rate": 1.0471922471285661e-05, "loss": 1.8472, "step": 27321500 }, { "epoch": 79.09, "learning_rate": 1.0471198823638385e-05, "loss": 1.8336, "step": 27322000 }, { "epoch": 79.09, "learning_rate": 1.0470475175991108e-05, "loss": 1.8677, "step": 27322500 }, { "epoch": 79.09, "learning_rate": 1.0469751528343831e-05, "loss": 1.8614, "step": 27323000 }, { "epoch": 79.09, "learning_rate": 1.0469027880696555e-05, "loss": 1.8568, "step": 27323500 }, { "epoch": 79.09, "learning_rate": 1.0468304233049278e-05, "loss": 1.8383, "step": 27324000 }, { "epoch": 79.09, "learning_rate": 1.0467580585402002e-05, "loss": 1.8584, "step": 27324500 }, { "epoch": 79.09, "learning_rate": 1.0466856937754725e-05, "loss": 1.8481, "step": 27325000 }, { "epoch": 79.1, "learning_rate": 1.0466133290107448e-05, "loss": 1.8189, "step": 27325500 }, { "epoch": 79.1, "learning_rate": 1.0465411089755465e-05, "loss": 1.8424, "step": 27326000 }, { "epoch": 79.1, "learning_rate": 1.0464687442108189e-05, "loss": 1.8317, "step": 27326500 }, { "epoch": 79.1, "learning_rate": 1.0463966689051501e-05, "loss": 1.8652, "step": 27327000 }, { "epoch": 79.1, "learning_rate": 1.0463243041404225e-05, "loss": 1.8266, "step": 27327500 }, { "epoch": 79.1, "learning_rate": 1.0462519393756947e-05, "loss": 1.8478, "step": 27328000 }, { "epoch": 79.1, "learning_rate": 1.0461795746109671e-05, "loss": 1.8325, "step": 27328500 }, { "epoch": 79.11, "learning_rate": 1.0461072098462393e-05, "loss": 1.8191, "step": 27329000 }, { "epoch": 79.11, "learning_rate": 1.0460348450815117e-05, "loss": 1.8376, "step": 27329500 }, { "epoch": 79.11, "learning_rate": 1.045962480316784e-05, "loss": 1.8417, "step": 27330000 }, { "epoch": 79.11, "learning_rate": 1.0458901155520564e-05, "loss": 1.8533, "step": 27330500 }, { "epoch": 79.11, "learning_rate": 1.0458178955168583e-05, "loss": 1.849, "step": 27331000 }, { "epoch": 79.11, "learning_rate": 1.0457455307521305e-05, "loss": 1.8323, "step": 27331500 }, { "epoch": 79.11, "learning_rate": 1.0456731659874027e-05, "loss": 1.8495, "step": 27332000 }, { "epoch": 79.12, "learning_rate": 1.0456008012226751e-05, "loss": 1.8801, "step": 27332500 }, { "epoch": 79.12, "learning_rate": 1.0455284364579475e-05, "loss": 1.8691, "step": 27333000 }, { "epoch": 79.12, "learning_rate": 1.0454560716932197e-05, "loss": 1.8314, "step": 27333500 }, { "epoch": 79.12, "learning_rate": 1.0453837069284921e-05, "loss": 1.8348, "step": 27334000 }, { "epoch": 79.12, "learning_rate": 1.0453113421637645e-05, "loss": 1.8292, "step": 27334500 }, { "epoch": 79.12, "learning_rate": 1.0452389773990367e-05, "loss": 1.8271, "step": 27335000 }, { "epoch": 79.13, "learning_rate": 1.0451667573638384e-05, "loss": 1.8464, "step": 27335500 }, { "epoch": 79.13, "learning_rate": 1.0450943925991108e-05, "loss": 1.8367, "step": 27336000 }, { "epoch": 79.13, "learning_rate": 1.0450220278343832e-05, "loss": 1.829, "step": 27336500 }, { "epoch": 79.13, "learning_rate": 1.0449496630696554e-05, "loss": 1.8566, "step": 27337000 }, { "epoch": 79.13, "learning_rate": 1.0448774430344572e-05, "loss": 1.8476, "step": 27337500 }, { "epoch": 79.13, "learning_rate": 1.0448050782697296e-05, "loss": 1.8489, "step": 27338000 }, { "epoch": 79.13, "learning_rate": 1.044732713505002e-05, "loss": 1.8474, "step": 27338500 }, { "epoch": 79.14, "learning_rate": 1.0446603487402742e-05, "loss": 1.8492, "step": 27339000 }, { "epoch": 79.14, "learning_rate": 1.044588128705076e-05, "loss": 1.8397, "step": 27339500 }, { "epoch": 79.14, "learning_rate": 1.0445159086698778e-05, "loss": 1.8601, "step": 27340000 }, { "epoch": 79.14, "learning_rate": 1.0444435439051502e-05, "loss": 1.822, "step": 27340500 }, { "epoch": 79.14, "learning_rate": 1.0443711791404224e-05, "loss": 1.8421, "step": 27341000 }, { "epoch": 79.14, "learning_rate": 1.0442988143756948e-05, "loss": 1.8482, "step": 27341500 }, { "epoch": 79.14, "learning_rate": 1.044226449610967e-05, "loss": 1.8722, "step": 27342000 }, { "epoch": 79.15, "learning_rate": 1.044154229575769e-05, "loss": 1.8151, "step": 27342500 }, { "epoch": 79.15, "learning_rate": 1.0440818648110412e-05, "loss": 1.8314, "step": 27343000 }, { "epoch": 79.15, "learning_rate": 1.0440095000463135e-05, "loss": 1.8461, "step": 27343500 }, { "epoch": 79.15, "learning_rate": 1.0439371352815858e-05, "loss": 1.8489, "step": 27344000 }, { "epoch": 79.15, "learning_rate": 1.0438647705168582e-05, "loss": 1.8466, "step": 27344500 }, { "epoch": 79.15, "learning_rate": 1.0437924057521306e-05, "loss": 1.8273, "step": 27345000 }, { "epoch": 79.15, "learning_rate": 1.0437200409874028e-05, "loss": 1.8302, "step": 27345500 }, { "epoch": 79.16, "learning_rate": 1.0436476762226752e-05, "loss": 1.8463, "step": 27346000 }, { "epoch": 79.16, "learning_rate": 1.0435753114579474e-05, "loss": 1.8239, "step": 27346500 }, { "epoch": 79.16, "learning_rate": 1.0435029466932198e-05, "loss": 1.84, "step": 27347000 }, { "epoch": 79.16, "learning_rate": 1.043430871387551e-05, "loss": 1.8355, "step": 27347500 }, { "epoch": 79.16, "learning_rate": 1.0433585066228232e-05, "loss": 1.8525, "step": 27348000 }, { "epoch": 79.16, "learning_rate": 1.0432861418580956e-05, "loss": 1.8381, "step": 27348500 }, { "epoch": 79.16, "learning_rate": 1.043213777093368e-05, "loss": 1.8596, "step": 27349000 }, { "epoch": 79.17, "learning_rate": 1.0431414123286402e-05, "loss": 1.8596, "step": 27349500 }, { "epoch": 79.17, "learning_rate": 1.0430691922934421e-05, "loss": 1.8547, "step": 27350000 }, { "epoch": 79.17, "learning_rate": 1.0429968275287144e-05, "loss": 1.8412, "step": 27350500 }, { "epoch": 79.17, "learning_rate": 1.0429244627639868e-05, "loss": 1.8242, "step": 27351000 }, { "epoch": 79.17, "learning_rate": 1.042852097999259e-05, "loss": 1.8406, "step": 27351500 }, { "epoch": 79.17, "learning_rate": 1.0427797332345314e-05, "loss": 1.852, "step": 27352000 }, { "epoch": 79.17, "learning_rate": 1.0427073684698038e-05, "loss": 1.8314, "step": 27352500 }, { "epoch": 79.18, "learning_rate": 1.042635003705076e-05, "loss": 1.8641, "step": 27353000 }, { "epoch": 79.18, "learning_rate": 1.0425627836698777e-05, "loss": 1.8318, "step": 27353500 }, { "epoch": 79.18, "learning_rate": 1.0424904189051501e-05, "loss": 1.8449, "step": 27354000 }, { "epoch": 79.18, "learning_rate": 1.0424180541404225e-05, "loss": 1.8703, "step": 27354500 }, { "epoch": 79.18, "learning_rate": 1.0423456893756947e-05, "loss": 1.8224, "step": 27355000 }, { "epoch": 79.18, "learning_rate": 1.0422734693404964e-05, "loss": 1.8318, "step": 27355500 }, { "epoch": 79.18, "learning_rate": 1.0422011045757688e-05, "loss": 1.8256, "step": 27356000 }, { "epoch": 79.19, "learning_rate": 1.0421287398110412e-05, "loss": 1.8378, "step": 27356500 }, { "epoch": 79.19, "learning_rate": 1.0420563750463135e-05, "loss": 1.8509, "step": 27357000 }, { "epoch": 79.19, "learning_rate": 1.0419840102815858e-05, "loss": 1.8355, "step": 27357500 }, { "epoch": 79.19, "learning_rate": 1.0419116455168582e-05, "loss": 1.8552, "step": 27358000 }, { "epoch": 79.19, "learning_rate": 1.0418392807521305e-05, "loss": 1.8576, "step": 27358500 }, { "epoch": 79.19, "learning_rate": 1.0417669159874027e-05, "loss": 1.8419, "step": 27359000 }, { "epoch": 79.19, "learning_rate": 1.0416946959522046e-05, "loss": 1.848, "step": 27359500 }, { "epoch": 79.2, "learning_rate": 1.041622331187477e-05, "loss": 1.8435, "step": 27360000 }, { "epoch": 79.2, "learning_rate": 1.0415499664227492e-05, "loss": 1.8325, "step": 27360500 }, { "epoch": 79.2, "learning_rate": 1.0414776016580216e-05, "loss": 1.8229, "step": 27361000 }, { "epoch": 79.2, "learning_rate": 1.0414052368932938e-05, "loss": 1.8406, "step": 27361500 }, { "epoch": 79.2, "learning_rate": 1.0413328721285662e-05, "loss": 1.8579, "step": 27362000 }, { "epoch": 79.2, "learning_rate": 1.0412605073638384e-05, "loss": 1.8801, "step": 27362500 }, { "epoch": 79.2, "learning_rate": 1.0411881425991108e-05, "loss": 1.8612, "step": 27363000 }, { "epoch": 79.21, "learning_rate": 1.0411157778343832e-05, "loss": 1.8594, "step": 27363500 }, { "epoch": 79.21, "learning_rate": 1.041043557799185e-05, "loss": 1.8632, "step": 27364000 }, { "epoch": 79.21, "learning_rate": 1.0409711930344572e-05, "loss": 1.8463, "step": 27364500 }, { "epoch": 79.21, "learning_rate": 1.0408988282697296e-05, "loss": 1.8656, "step": 27365000 }, { "epoch": 79.21, "learning_rate": 1.040826463505002e-05, "loss": 1.8395, "step": 27365500 }, { "epoch": 79.21, "learning_rate": 1.0407542434698037e-05, "loss": 1.8372, "step": 27366000 }, { "epoch": 79.21, "learning_rate": 1.0406818787050759e-05, "loss": 1.8472, "step": 27366500 }, { "epoch": 79.22, "learning_rate": 1.0406095139403483e-05, "loss": 1.8403, "step": 27367000 }, { "epoch": 79.22, "learning_rate": 1.0405371491756207e-05, "loss": 1.8337, "step": 27367500 }, { "epoch": 79.22, "learning_rate": 1.0404649291404224e-05, "loss": 1.8396, "step": 27368000 }, { "epoch": 79.22, "learning_rate": 1.0403925643756946e-05, "loss": 1.8554, "step": 27368500 }, { "epoch": 79.22, "learning_rate": 1.040320199610967e-05, "loss": 1.8519, "step": 27369000 }, { "epoch": 79.22, "learning_rate": 1.040247979575769e-05, "loss": 1.8222, "step": 27369500 }, { "epoch": 79.22, "learning_rate": 1.0401756148110411e-05, "loss": 1.8348, "step": 27370000 }, { "epoch": 79.23, "learning_rate": 1.0401032500463135e-05, "loss": 1.8395, "step": 27370500 }, { "epoch": 79.23, "learning_rate": 1.040030885281586e-05, "loss": 1.8599, "step": 27371000 }, { "epoch": 79.23, "learning_rate": 1.0399585205168582e-05, "loss": 1.86, "step": 27371500 }, { "epoch": 79.23, "learning_rate": 1.0398861557521304e-05, "loss": 1.8422, "step": 27372000 }, { "epoch": 79.23, "learning_rate": 1.0398137909874028e-05, "loss": 1.8562, "step": 27372500 }, { "epoch": 79.23, "learning_rate": 1.0397414262226752e-05, "loss": 1.87, "step": 27373000 }, { "epoch": 79.24, "learning_rate": 1.0396690614579474e-05, "loss": 1.8484, "step": 27373500 }, { "epoch": 79.24, "learning_rate": 1.0395966966932198e-05, "loss": 1.8328, "step": 27374000 }, { "epoch": 79.24, "learning_rate": 1.0395243319284922e-05, "loss": 1.8343, "step": 27374500 }, { "epoch": 79.24, "learning_rate": 1.0394521118932939e-05, "loss": 1.8667, "step": 27375000 }, { "epoch": 79.24, "learning_rate": 1.0393797471285661e-05, "loss": 1.8445, "step": 27375500 }, { "epoch": 79.24, "learning_rate": 1.0393075270933678e-05, "loss": 1.8446, "step": 27376000 }, { "epoch": 79.24, "learning_rate": 1.0392351623286404e-05, "loss": 1.843, "step": 27376500 }, { "epoch": 79.25, "learning_rate": 1.0391627975639126e-05, "loss": 1.8455, "step": 27377000 }, { "epoch": 79.25, "learning_rate": 1.0390904327991849e-05, "loss": 1.8593, "step": 27377500 }, { "epoch": 79.25, "learning_rate": 1.0390180680344572e-05, "loss": 1.8644, "step": 27378000 }, { "epoch": 79.25, "learning_rate": 1.0389457032697296e-05, "loss": 1.855, "step": 27378500 }, { "epoch": 79.25, "learning_rate": 1.0388733385050019e-05, "loss": 1.8655, "step": 27379000 }, { "epoch": 79.25, "learning_rate": 1.038800973740274e-05, "loss": 1.837, "step": 27379500 }, { "epoch": 79.25, "learning_rate": 1.0387286089755466e-05, "loss": 1.8762, "step": 27380000 }, { "epoch": 79.26, "learning_rate": 1.0386562442108189e-05, "loss": 1.8581, "step": 27380500 }, { "epoch": 79.26, "learning_rate": 1.0385840241756206e-05, "loss": 1.8287, "step": 27381000 }, { "epoch": 79.26, "learning_rate": 1.038511659410893e-05, "loss": 1.8293, "step": 27381500 }, { "epoch": 79.26, "learning_rate": 1.0384392946461654e-05, "loss": 1.8481, "step": 27382000 }, { "epoch": 79.26, "learning_rate": 1.0383669298814376e-05, "loss": 1.8606, "step": 27382500 }, { "epoch": 79.26, "learning_rate": 1.0382947098462393e-05, "loss": 1.857, "step": 27383000 }, { "epoch": 79.26, "learning_rate": 1.0382223450815117e-05, "loss": 1.8408, "step": 27383500 }, { "epoch": 79.27, "learning_rate": 1.0381499803167841e-05, "loss": 1.8504, "step": 27384000 }, { "epoch": 79.27, "learning_rate": 1.0380776155520563e-05, "loss": 1.8448, "step": 27384500 }, { "epoch": 79.27, "learning_rate": 1.0380052507873286e-05, "loss": 1.8501, "step": 27385000 }, { "epoch": 79.27, "learning_rate": 1.0379328860226011e-05, "loss": 1.8397, "step": 27385500 }, { "epoch": 79.27, "learning_rate": 1.0378605212578733e-05, "loss": 1.8389, "step": 27386000 }, { "epoch": 79.27, "learning_rate": 1.0377881564931456e-05, "loss": 1.8505, "step": 27386500 }, { "epoch": 79.27, "learning_rate": 1.0377159364579473e-05, "loss": 1.8735, "step": 27387000 }, { "epoch": 79.28, "learning_rate": 1.0376437164227492e-05, "loss": 1.848, "step": 27387500 }, { "epoch": 79.28, "learning_rate": 1.0375713516580216e-05, "loss": 1.8694, "step": 27388000 }, { "epoch": 79.28, "learning_rate": 1.0374989868932938e-05, "loss": 1.8423, "step": 27388500 }, { "epoch": 79.28, "learning_rate": 1.0374266221285662e-05, "loss": 1.8691, "step": 27389000 }, { "epoch": 79.28, "learning_rate": 1.0373544020933681e-05, "loss": 1.8418, "step": 27389500 }, { "epoch": 79.28, "learning_rate": 1.0372820373286403e-05, "loss": 1.8674, "step": 27390000 }, { "epoch": 79.28, "learning_rate": 1.0372096725639125e-05, "loss": 1.8381, "step": 27390500 }, { "epoch": 79.29, "learning_rate": 1.037137307799185e-05, "loss": 1.8388, "step": 27391000 }, { "epoch": 79.29, "learning_rate": 1.0370649430344573e-05, "loss": 1.8387, "step": 27391500 }, { "epoch": 79.29, "learning_rate": 1.0369925782697295e-05, "loss": 1.8654, "step": 27392000 }, { "epoch": 79.29, "learning_rate": 1.0369202135050018e-05, "loss": 1.8233, "step": 27392500 }, { "epoch": 79.29, "learning_rate": 1.0368478487402743e-05, "loss": 1.8709, "step": 27393000 }, { "epoch": 79.29, "learning_rate": 1.0367754839755466e-05, "loss": 1.8506, "step": 27393500 }, { "epoch": 79.29, "learning_rate": 1.0367031192108188e-05, "loss": 1.8357, "step": 27394000 }, { "epoch": 79.3, "learning_rate": 1.0366308991756207e-05, "loss": 1.8314, "step": 27394500 }, { "epoch": 79.3, "learning_rate": 1.036558534410893e-05, "loss": 1.8724, "step": 27395000 }, { "epoch": 79.3, "learning_rate": 1.0364861696461653e-05, "loss": 1.8279, "step": 27395500 }, { "epoch": 79.3, "learning_rate": 1.0364138048814375e-05, "loss": 1.8307, "step": 27396000 }, { "epoch": 79.3, "learning_rate": 1.03634144011671e-05, "loss": 1.8588, "step": 27396500 }, { "epoch": 79.3, "learning_rate": 1.0362690753519823e-05, "loss": 1.8381, "step": 27397000 }, { "epoch": 79.3, "learning_rate": 1.0361967105872545e-05, "loss": 1.8349, "step": 27397500 }, { "epoch": 79.31, "learning_rate": 1.0361243458225269e-05, "loss": 1.8295, "step": 27398000 }, { "epoch": 79.31, "learning_rate": 1.0360519810577993e-05, "loss": 1.8479, "step": 27398500 }, { "epoch": 79.31, "learning_rate": 1.035979761022601e-05, "loss": 1.8561, "step": 27399000 }, { "epoch": 79.31, "learning_rate": 1.0359075409874028e-05, "loss": 1.854, "step": 27399500 }, { "epoch": 79.31, "learning_rate": 1.035835176222675e-05, "loss": 1.8694, "step": 27400000 }, { "epoch": 79.31, "learning_rate": 1.0357628114579475e-05, "loss": 1.861, "step": 27400500 }, { "epoch": 79.31, "learning_rate": 1.0356907361522788e-05, "loss": 1.8382, "step": 27401000 }, { "epoch": 79.32, "learning_rate": 1.035618371387551e-05, "loss": 1.8563, "step": 27401500 }, { "epoch": 79.32, "learning_rate": 1.0355460066228232e-05, "loss": 1.8346, "step": 27402000 }, { "epoch": 79.32, "learning_rate": 1.0354736418580958e-05, "loss": 1.8347, "step": 27402500 }, { "epoch": 79.32, "learning_rate": 1.035401277093368e-05, "loss": 1.8547, "step": 27403000 }, { "epoch": 79.32, "learning_rate": 1.0353289123286402e-05, "loss": 1.8301, "step": 27403500 }, { "epoch": 79.32, "learning_rate": 1.0352565475639126e-05, "loss": 1.8447, "step": 27404000 }, { "epoch": 79.32, "learning_rate": 1.035184182799185e-05, "loss": 1.8476, "step": 27404500 }, { "epoch": 79.33, "learning_rate": 1.0351118180344572e-05, "loss": 1.8358, "step": 27405000 }, { "epoch": 79.33, "learning_rate": 1.035039597999259e-05, "loss": 1.8416, "step": 27405500 }, { "epoch": 79.33, "learning_rate": 1.0349672332345314e-05, "loss": 1.8587, "step": 27406000 }, { "epoch": 79.33, "learning_rate": 1.0348948684698037e-05, "loss": 1.8372, "step": 27406500 }, { "epoch": 79.33, "learning_rate": 1.034822503705076e-05, "loss": 1.8502, "step": 27407000 }, { "epoch": 79.33, "learning_rate": 1.0347501389403484e-05, "loss": 1.8582, "step": 27407500 }, { "epoch": 79.33, "learning_rate": 1.0346777741756208e-05, "loss": 1.8363, "step": 27408000 }, { "epoch": 79.34, "learning_rate": 1.034605409410893e-05, "loss": 1.8813, "step": 27408500 }, { "epoch": 79.34, "learning_rate": 1.0345330446461652e-05, "loss": 1.8623, "step": 27409000 }, { "epoch": 79.34, "learning_rate": 1.0344608246109671e-05, "loss": 1.8724, "step": 27409500 }, { "epoch": 79.34, "learning_rate": 1.0343884598462395e-05, "loss": 1.8727, "step": 27410000 }, { "epoch": 79.34, "learning_rate": 1.0343160950815117e-05, "loss": 1.828, "step": 27410500 }, { "epoch": 79.34, "learning_rate": 1.034243730316784e-05, "loss": 1.8502, "step": 27411000 }, { "epoch": 79.35, "learning_rate": 1.0341715102815858e-05, "loss": 1.8762, "step": 27411500 }, { "epoch": 79.35, "learning_rate": 1.0340991455168582e-05, "loss": 1.8344, "step": 27412000 }, { "epoch": 79.35, "learning_rate": 1.0340267807521304e-05, "loss": 1.8256, "step": 27412500 }, { "epoch": 79.35, "learning_rate": 1.0339544159874028e-05, "loss": 1.7982, "step": 27413000 }, { "epoch": 79.35, "learning_rate": 1.0338820512226752e-05, "loss": 1.8426, "step": 27413500 }, { "epoch": 79.35, "learning_rate": 1.0338096864579475e-05, "loss": 1.8492, "step": 27414000 }, { "epoch": 79.35, "learning_rate": 1.0337374664227492e-05, "loss": 1.8548, "step": 27414500 }, { "epoch": 79.36, "learning_rate": 1.0336651016580216e-05, "loss": 1.8252, "step": 27415000 }, { "epoch": 79.36, "learning_rate": 1.033592736893294e-05, "loss": 1.8667, "step": 27415500 }, { "epoch": 79.36, "learning_rate": 1.0335203721285662e-05, "loss": 1.8667, "step": 27416000 }, { "epoch": 79.36, "learning_rate": 1.0334480073638384e-05, "loss": 1.8496, "step": 27416500 }, { "epoch": 79.36, "learning_rate": 1.0333756425991108e-05, "loss": 1.8448, "step": 27417000 }, { "epoch": 79.36, "learning_rate": 1.0333032778343832e-05, "loss": 1.8401, "step": 27417500 }, { "epoch": 79.36, "learning_rate": 1.0332309130696554e-05, "loss": 1.8362, "step": 27418000 }, { "epoch": 79.37, "learning_rate": 1.0331586930344571e-05, "loss": 1.8599, "step": 27418500 }, { "epoch": 79.37, "learning_rate": 1.0330863282697297e-05, "loss": 1.8477, "step": 27419000 }, { "epoch": 79.37, "learning_rate": 1.033013963505002e-05, "loss": 1.8604, "step": 27419500 }, { "epoch": 79.37, "learning_rate": 1.0329415987402742e-05, "loss": 1.8806, "step": 27420000 }, { "epoch": 79.37, "learning_rate": 1.032869378705076e-05, "loss": 1.8287, "step": 27420500 }, { "epoch": 79.37, "learning_rate": 1.0327970139403484e-05, "loss": 1.8414, "step": 27421000 }, { "epoch": 79.37, "learning_rate": 1.0327246491756207e-05, "loss": 1.8398, "step": 27421500 }, { "epoch": 79.38, "learning_rate": 1.0326522844108929e-05, "loss": 1.8354, "step": 27422000 }, { "epoch": 79.38, "learning_rate": 1.0325800643756948e-05, "loss": 1.83, "step": 27422500 }, { "epoch": 79.38, "learning_rate": 1.0325076996109672e-05, "loss": 1.8312, "step": 27423000 }, { "epoch": 79.38, "learning_rate": 1.0324353348462394e-05, "loss": 1.8377, "step": 27423500 }, { "epoch": 79.38, "learning_rate": 1.0323629700815116e-05, "loss": 1.8388, "step": 27424000 }, { "epoch": 79.38, "learning_rate": 1.032290605316784e-05, "loss": 1.8516, "step": 27424500 }, { "epoch": 79.38, "learning_rate": 1.0322183852815859e-05, "loss": 1.8444, "step": 27425000 }, { "epoch": 79.39, "learning_rate": 1.0321460205168581e-05, "loss": 1.8398, "step": 27425500 }, { "epoch": 79.39, "learning_rate": 1.0320736557521305e-05, "loss": 1.8172, "step": 27426000 }, { "epoch": 79.39, "learning_rate": 1.0320012909874027e-05, "loss": 1.8612, "step": 27426500 }, { "epoch": 79.39, "learning_rate": 1.0319289262226751e-05, "loss": 1.834, "step": 27427000 }, { "epoch": 79.39, "learning_rate": 1.0318565614579474e-05, "loss": 1.8312, "step": 27427500 }, { "epoch": 79.39, "learning_rate": 1.0317843414227493e-05, "loss": 1.8449, "step": 27428000 }, { "epoch": 79.39, "learning_rate": 1.0317119766580217e-05, "loss": 1.8347, "step": 27428500 }, { "epoch": 79.4, "learning_rate": 1.0316396118932939e-05, "loss": 1.8372, "step": 27429000 }, { "epoch": 79.4, "learning_rate": 1.0315672471285661e-05, "loss": 1.8534, "step": 27429500 }, { "epoch": 79.4, "learning_rate": 1.0314948823638385e-05, "loss": 1.8557, "step": 27430000 }, { "epoch": 79.4, "learning_rate": 1.0314226623286404e-05, "loss": 1.8515, "step": 27430500 }, { "epoch": 79.4, "learning_rate": 1.0313502975639126e-05, "loss": 1.8588, "step": 27431000 }, { "epoch": 79.4, "learning_rate": 1.0312779327991848e-05, "loss": 1.8625, "step": 27431500 }, { "epoch": 79.4, "learning_rate": 1.0312055680344572e-05, "loss": 1.8126, "step": 27432000 }, { "epoch": 79.41, "learning_rate": 1.0311332032697296e-05, "loss": 1.8515, "step": 27432500 }, { "epoch": 79.41, "learning_rate": 1.0310608385050018e-05, "loss": 1.8588, "step": 27433000 }, { "epoch": 79.41, "learning_rate": 1.0309884737402742e-05, "loss": 1.8395, "step": 27433500 }, { "epoch": 79.41, "learning_rate": 1.030916253705076e-05, "loss": 1.8779, "step": 27434000 }, { "epoch": 79.41, "learning_rate": 1.0308438889403484e-05, "loss": 1.8498, "step": 27434500 }, { "epoch": 79.41, "learning_rate": 1.0307715241756206e-05, "loss": 1.8504, "step": 27435000 }, { "epoch": 79.41, "learning_rate": 1.030699159410893e-05, "loss": 1.8643, "step": 27435500 }, { "epoch": 79.42, "learning_rate": 1.0306267946461654e-05, "loss": 1.8556, "step": 27436000 }, { "epoch": 79.42, "learning_rate": 1.0305544298814376e-05, "loss": 1.8354, "step": 27436500 }, { "epoch": 79.42, "learning_rate": 1.0304822098462393e-05, "loss": 1.829, "step": 27437000 }, { "epoch": 79.42, "learning_rate": 1.0304098450815117e-05, "loss": 1.8839, "step": 27437500 }, { "epoch": 79.42, "learning_rate": 1.0303374803167841e-05, "loss": 1.8679, "step": 27438000 }, { "epoch": 79.42, "learning_rate": 1.0302651155520563e-05, "loss": 1.8953, "step": 27438500 }, { "epoch": 79.42, "learning_rate": 1.0301927507873287e-05, "loss": 1.8424, "step": 27439000 }, { "epoch": 79.43, "learning_rate": 1.0301203860226011e-05, "loss": 1.8394, "step": 27439500 }, { "epoch": 79.43, "learning_rate": 1.0300480212578733e-05, "loss": 1.8334, "step": 27440000 }, { "epoch": 79.43, "learning_rate": 1.029975801222675e-05, "loss": 1.8514, "step": 27440500 }, { "epoch": 79.43, "learning_rate": 1.0299034364579474e-05, "loss": 1.8524, "step": 27441000 }, { "epoch": 79.43, "learning_rate": 1.0298312164227492e-05, "loss": 1.8455, "step": 27441500 }, { "epoch": 79.43, "learning_rate": 1.0297588516580216e-05, "loss": 1.8434, "step": 27442000 }, { "epoch": 79.43, "learning_rate": 1.0296864868932938e-05, "loss": 1.8498, "step": 27442500 }, { "epoch": 79.44, "learning_rate": 1.0296141221285662e-05, "loss": 1.8655, "step": 27443000 }, { "epoch": 79.44, "learning_rate": 1.0295419020933679e-05, "loss": 1.8285, "step": 27443500 }, { "epoch": 79.44, "learning_rate": 1.0294695373286403e-05, "loss": 1.8459, "step": 27444000 }, { "epoch": 79.44, "learning_rate": 1.0293971725639127e-05, "loss": 1.8545, "step": 27444500 }, { "epoch": 79.44, "learning_rate": 1.0293248077991849e-05, "loss": 1.8293, "step": 27445000 }, { "epoch": 79.44, "learning_rate": 1.0292524430344573e-05, "loss": 1.8544, "step": 27445500 }, { "epoch": 79.44, "learning_rate": 1.0291800782697295e-05, "loss": 1.8461, "step": 27446000 }, { "epoch": 79.45, "learning_rate": 1.029107713505002e-05, "loss": 1.8323, "step": 27446500 }, { "epoch": 79.45, "learning_rate": 1.0290353487402741e-05, "loss": 1.8501, "step": 27447000 }, { "epoch": 79.45, "learning_rate": 1.0289629839755465e-05, "loss": 1.8445, "step": 27447500 }, { "epoch": 79.45, "learning_rate": 1.028890619210819e-05, "loss": 1.8499, "step": 27448000 }, { "epoch": 79.45, "learning_rate": 1.0288182544460912e-05, "loss": 1.8529, "step": 27448500 }, { "epoch": 79.45, "learning_rate": 1.0287458896813635e-05, "loss": 1.8369, "step": 27449000 }, { "epoch": 79.46, "learning_rate": 1.0286736696461653e-05, "loss": 1.8668, "step": 27449500 }, { "epoch": 79.46, "learning_rate": 1.0286013048814377e-05, "loss": 1.8393, "step": 27450000 }, { "epoch": 79.46, "learning_rate": 1.0285289401167099e-05, "loss": 1.8383, "step": 27450500 }, { "epoch": 79.46, "learning_rate": 1.0284565753519823e-05, "loss": 1.8778, "step": 27451000 }, { "epoch": 79.46, "learning_rate": 1.0283842105872547e-05, "loss": 1.852, "step": 27451500 }, { "epoch": 79.46, "learning_rate": 1.0283118458225269e-05, "loss": 1.8602, "step": 27452000 }, { "epoch": 79.46, "learning_rate": 1.0282396257873286e-05, "loss": 1.8682, "step": 27452500 }, { "epoch": 79.47, "learning_rate": 1.028167261022601e-05, "loss": 1.8521, "step": 27453000 }, { "epoch": 79.47, "learning_rate": 1.0280948962578734e-05, "loss": 1.8628, "step": 27453500 }, { "epoch": 79.47, "learning_rate": 1.0280225314931456e-05, "loss": 1.8412, "step": 27454000 }, { "epoch": 79.47, "learning_rate": 1.027950166728418e-05, "loss": 1.8886, "step": 27454500 }, { "epoch": 79.47, "learning_rate": 1.0278778019636902e-05, "loss": 1.8488, "step": 27455000 }, { "epoch": 79.47, "learning_rate": 1.0278054371989626e-05, "loss": 1.8435, "step": 27455500 }, { "epoch": 79.47, "learning_rate": 1.0277330724342349e-05, "loss": 1.8324, "step": 27456000 }, { "epoch": 79.48, "learning_rate": 1.0276608523990368e-05, "loss": 1.8489, "step": 27456500 }, { "epoch": 79.48, "learning_rate": 1.0275884876343091e-05, "loss": 1.8468, "step": 27457000 }, { "epoch": 79.48, "learning_rate": 1.0275161228695814e-05, "loss": 1.8201, "step": 27457500 }, { "epoch": 79.48, "learning_rate": 1.0274437581048536e-05, "loss": 1.842, "step": 27458000 }, { "epoch": 79.48, "learning_rate": 1.027371393340126e-05, "loss": 1.8348, "step": 27458500 }, { "epoch": 79.48, "learning_rate": 1.0272990285753984e-05, "loss": 1.843, "step": 27459000 }, { "epoch": 79.48, "learning_rate": 1.0272266638106706e-05, "loss": 1.842, "step": 27459500 }, { "epoch": 79.49, "learning_rate": 1.027154299045943e-05, "loss": 1.8512, "step": 27460000 }, { "epoch": 79.49, "learning_rate": 1.0270822237402742e-05, "loss": 1.8455, "step": 27460500 }, { "epoch": 79.49, "learning_rate": 1.0270098589755466e-05, "loss": 1.8545, "step": 27461000 }, { "epoch": 79.49, "learning_rate": 1.0269376389403483e-05, "loss": 1.8532, "step": 27461500 }, { "epoch": 79.49, "learning_rate": 1.0268652741756206e-05, "loss": 1.8564, "step": 27462000 }, { "epoch": 79.49, "learning_rate": 1.026792909410893e-05, "loss": 1.8565, "step": 27462500 }, { "epoch": 79.49, "learning_rate": 1.0267205446461653e-05, "loss": 1.8462, "step": 27463000 }, { "epoch": 79.5, "learning_rate": 1.0266481798814376e-05, "loss": 1.8419, "step": 27463500 }, { "epoch": 79.5, "learning_rate": 1.0265759598462393e-05, "loss": 1.8503, "step": 27464000 }, { "epoch": 79.5, "learning_rate": 1.0265035950815117e-05, "loss": 1.868, "step": 27464500 }, { "epoch": 79.5, "learning_rate": 1.026431230316784e-05, "loss": 1.8588, "step": 27465000 }, { "epoch": 79.5, "learning_rate": 1.0263588655520563e-05, "loss": 1.8413, "step": 27465500 }, { "epoch": 79.5, "learning_rate": 1.0262865007873287e-05, "loss": 1.8468, "step": 27466000 }, { "epoch": 79.5, "learning_rate": 1.0262141360226011e-05, "loss": 1.8565, "step": 27466500 }, { "epoch": 79.51, "learning_rate": 1.0261417712578733e-05, "loss": 1.8372, "step": 27467000 }, { "epoch": 79.51, "learning_rate": 1.0260694064931455e-05, "loss": 1.825, "step": 27467500 }, { "epoch": 79.51, "learning_rate": 1.025997041728418e-05, "loss": 1.8548, "step": 27468000 }, { "epoch": 79.51, "learning_rate": 1.0259248216932198e-05, "loss": 1.8408, "step": 27468500 }, { "epoch": 79.51, "learning_rate": 1.025852456928492e-05, "loss": 1.8697, "step": 27469000 }, { "epoch": 79.51, "learning_rate": 1.0257800921637644e-05, "loss": 1.8541, "step": 27469500 }, { "epoch": 79.51, "learning_rate": 1.0257077273990368e-05, "loss": 1.8516, "step": 27470000 }, { "epoch": 79.52, "learning_rate": 1.0256355073638386e-05, "loss": 1.8817, "step": 27470500 }, { "epoch": 79.52, "learning_rate": 1.0255631425991108e-05, "loss": 1.8493, "step": 27471000 }, { "epoch": 79.52, "learning_rate": 1.0254907778343832e-05, "loss": 1.8376, "step": 27471500 }, { "epoch": 79.52, "learning_rate": 1.0254184130696556e-05, "loss": 1.8522, "step": 27472000 }, { "epoch": 79.52, "learning_rate": 1.0253461930344573e-05, "loss": 1.8479, "step": 27472500 }, { "epoch": 79.52, "learning_rate": 1.0252738282697295e-05, "loss": 1.8835, "step": 27473000 }, { "epoch": 79.52, "learning_rate": 1.0252014635050019e-05, "loss": 1.8634, "step": 27473500 }, { "epoch": 79.53, "learning_rate": 1.0251290987402743e-05, "loss": 1.8563, "step": 27474000 }, { "epoch": 79.53, "learning_rate": 1.0250567339755465e-05, "loss": 1.8518, "step": 27474500 }, { "epoch": 79.53, "learning_rate": 1.0249845139403483e-05, "loss": 1.8416, "step": 27475000 }, { "epoch": 79.53, "learning_rate": 1.0249121491756206e-05, "loss": 1.8642, "step": 27475500 }, { "epoch": 79.53, "learning_rate": 1.024839784410893e-05, "loss": 1.8585, "step": 27476000 }, { "epoch": 79.53, "learning_rate": 1.0247674196461653e-05, "loss": 1.8525, "step": 27476500 }, { "epoch": 79.53, "learning_rate": 1.0246950548814375e-05, "loss": 1.8574, "step": 27477000 }, { "epoch": 79.54, "learning_rate": 1.0246228348462394e-05, "loss": 1.8605, "step": 27477500 }, { "epoch": 79.54, "learning_rate": 1.0245504700815118e-05, "loss": 1.8307, "step": 27478000 }, { "epoch": 79.54, "learning_rate": 1.024478105316784e-05, "loss": 1.8684, "step": 27478500 }, { "epoch": 79.54, "learning_rate": 1.0244057405520564e-05, "loss": 1.8499, "step": 27479000 }, { "epoch": 79.54, "learning_rate": 1.0243333757873288e-05, "loss": 1.8132, "step": 27479500 }, { "epoch": 79.54, "learning_rate": 1.024261011022601e-05, "loss": 1.8579, "step": 27480000 }, { "epoch": 79.54, "learning_rate": 1.0241887909874027e-05, "loss": 1.8454, "step": 27480500 }, { "epoch": 79.55, "learning_rate": 1.0241164262226751e-05, "loss": 1.8514, "step": 27481000 }, { "epoch": 79.55, "learning_rate": 1.0240440614579475e-05, "loss": 1.8394, "step": 27481500 }, { "epoch": 79.55, "learning_rate": 1.0239716966932197e-05, "loss": 1.8606, "step": 27482000 }, { "epoch": 79.55, "learning_rate": 1.023899331928492e-05, "loss": 1.8433, "step": 27482500 }, { "epoch": 79.55, "learning_rate": 1.0238271118932939e-05, "loss": 1.8356, "step": 27483000 }, { "epoch": 79.55, "learning_rate": 1.0237547471285662e-05, "loss": 1.8571, "step": 27483500 }, { "epoch": 79.55, "learning_rate": 1.0236823823638385e-05, "loss": 1.8445, "step": 27484000 }, { "epoch": 79.56, "learning_rate": 1.0236100175991107e-05, "loss": 1.8463, "step": 27484500 }, { "epoch": 79.56, "learning_rate": 1.0235376528343833e-05, "loss": 1.8515, "step": 27485000 }, { "epoch": 79.56, "learning_rate": 1.023465432799185e-05, "loss": 1.8557, "step": 27485500 }, { "epoch": 79.56, "learning_rate": 1.0233930680344572e-05, "loss": 1.8317, "step": 27486000 }, { "epoch": 79.56, "learning_rate": 1.0233207032697296e-05, "loss": 1.8586, "step": 27486500 }, { "epoch": 79.56, "learning_rate": 1.023248338505002e-05, "loss": 1.8407, "step": 27487000 }, { "epoch": 79.57, "learning_rate": 1.0231761184698037e-05, "loss": 1.8542, "step": 27487500 }, { "epoch": 79.57, "learning_rate": 1.023103753705076e-05, "loss": 1.8332, "step": 27488000 }, { "epoch": 79.57, "learning_rate": 1.0230313889403483e-05, "loss": 1.8329, "step": 27488500 }, { "epoch": 79.57, "learning_rate": 1.0229590241756207e-05, "loss": 1.8589, "step": 27489000 }, { "epoch": 79.57, "learning_rate": 1.022886659410893e-05, "loss": 1.8571, "step": 27489500 }, { "epoch": 79.57, "learning_rate": 1.0228142946461652e-05, "loss": 1.8563, "step": 27490000 }, { "epoch": 79.57, "learning_rate": 1.0227419298814377e-05, "loss": 1.8668, "step": 27490500 }, { "epoch": 79.58, "learning_rate": 1.02266956511671e-05, "loss": 1.843, "step": 27491000 }, { "epoch": 79.58, "learning_rate": 1.0225972003519822e-05, "loss": 1.8766, "step": 27491500 }, { "epoch": 79.58, "learning_rate": 1.0225249803167839e-05, "loss": 1.8418, "step": 27492000 }, { "epoch": 79.58, "learning_rate": 1.0224526155520565e-05, "loss": 1.8469, "step": 27492500 }, { "epoch": 79.58, "learning_rate": 1.0223803955168582e-05, "loss": 1.8362, "step": 27493000 }, { "epoch": 79.58, "learning_rate": 1.0223080307521304e-05, "loss": 1.8102, "step": 27493500 }, { "epoch": 79.58, "learning_rate": 1.0222356659874028e-05, "loss": 1.8379, "step": 27494000 }, { "epoch": 79.59, "learning_rate": 1.0221633012226752e-05, "loss": 1.8344, "step": 27494500 }, { "epoch": 79.59, "learning_rate": 1.0220909364579474e-05, "loss": 1.8568, "step": 27495000 }, { "epoch": 79.59, "learning_rate": 1.0220185716932196e-05, "loss": 1.827, "step": 27495500 }, { "epoch": 79.59, "learning_rate": 1.0219463516580215e-05, "loss": 1.8444, "step": 27496000 }, { "epoch": 79.59, "learning_rate": 1.021873986893294e-05, "loss": 1.8712, "step": 27496500 }, { "epoch": 79.59, "learning_rate": 1.0218016221285662e-05, "loss": 1.8546, "step": 27497000 }, { "epoch": 79.59, "learning_rate": 1.0217292573638384e-05, "loss": 1.8697, "step": 27497500 }, { "epoch": 79.6, "learning_rate": 1.021656892599111e-05, "loss": 1.8382, "step": 27498000 }, { "epoch": 79.6, "learning_rate": 1.0215845278343832e-05, "loss": 1.8713, "step": 27498500 }, { "epoch": 79.6, "learning_rate": 1.0215123077991849e-05, "loss": 1.8325, "step": 27499000 }, { "epoch": 79.6, "learning_rate": 1.0214399430344571e-05, "loss": 1.8355, "step": 27499500 }, { "epoch": 79.6, "learning_rate": 1.0213675782697297e-05, "loss": 1.8483, "step": 27500000 }, { "epoch": 79.6, "learning_rate": 1.0212952135050019e-05, "loss": 1.8509, "step": 27500500 }, { "epoch": 79.6, "learning_rate": 1.0212228487402741e-05, "loss": 1.8599, "step": 27501000 }, { "epoch": 79.61, "learning_rate": 1.0211504839755467e-05, "loss": 1.8332, "step": 27501500 }, { "epoch": 79.61, "learning_rate": 1.0210781192108189e-05, "loss": 1.8744, "step": 27502000 }, { "epoch": 79.61, "learning_rate": 1.0210058991756206e-05, "loss": 1.8478, "step": 27502500 }, { "epoch": 79.61, "learning_rate": 1.0209335344108929e-05, "loss": 1.8446, "step": 27503000 }, { "epoch": 79.61, "learning_rate": 1.0208611696461654e-05, "loss": 1.8522, "step": 27503500 }, { "epoch": 79.61, "learning_rate": 1.0207888048814376e-05, "loss": 1.8213, "step": 27504000 }, { "epoch": 79.61, "learning_rate": 1.0207164401167099e-05, "loss": 1.8602, "step": 27504500 }, { "epoch": 79.62, "learning_rate": 1.0206440753519823e-05, "loss": 1.8584, "step": 27505000 }, { "epoch": 79.62, "learning_rate": 1.0205717105872547e-05, "loss": 1.8362, "step": 27505500 }, { "epoch": 79.62, "learning_rate": 1.0204994905520564e-05, "loss": 1.8541, "step": 27506000 }, { "epoch": 79.62, "learning_rate": 1.0204271257873286e-05, "loss": 1.8332, "step": 27506500 }, { "epoch": 79.62, "learning_rate": 1.0203547610226012e-05, "loss": 1.8353, "step": 27507000 }, { "epoch": 79.62, "learning_rate": 1.0202823962578734e-05, "loss": 1.8563, "step": 27507500 }, { "epoch": 79.62, "learning_rate": 1.0202100314931456e-05, "loss": 1.833, "step": 27508000 }, { "epoch": 79.63, "learning_rate": 1.020137666728418e-05, "loss": 1.8364, "step": 27508500 }, { "epoch": 79.63, "learning_rate": 1.0200653019636904e-05, "loss": 1.8516, "step": 27509000 }, { "epoch": 79.63, "learning_rate": 1.0199929371989626e-05, "loss": 1.8516, "step": 27509500 }, { "epoch": 79.63, "learning_rate": 1.0199207171637643e-05, "loss": 1.8816, "step": 27510000 }, { "epoch": 79.63, "learning_rate": 1.019848497128566e-05, "loss": 1.8546, "step": 27510500 }, { "epoch": 79.63, "learning_rate": 1.0197761323638386e-05, "loss": 1.8799, "step": 27511000 }, { "epoch": 79.63, "learning_rate": 1.0197037675991109e-05, "loss": 1.8605, "step": 27511500 }, { "epoch": 79.64, "learning_rate": 1.019631402834383e-05, "loss": 1.8413, "step": 27512000 }, { "epoch": 79.64, "learning_rate": 1.0195590380696555e-05, "loss": 1.8429, "step": 27512500 }, { "epoch": 79.64, "learning_rate": 1.0194866733049279e-05, "loss": 1.8628, "step": 27513000 }, { "epoch": 79.64, "learning_rate": 1.0194143085402e-05, "loss": 1.8468, "step": 27513500 }, { "epoch": 79.64, "learning_rate": 1.0193420885050018e-05, "loss": 1.8394, "step": 27514000 }, { "epoch": 79.64, "learning_rate": 1.0192697237402742e-05, "loss": 1.8454, "step": 27514500 }, { "epoch": 79.64, "learning_rate": 1.0191973589755466e-05, "loss": 1.8393, "step": 27515000 }, { "epoch": 79.65, "learning_rate": 1.0191249942108188e-05, "loss": 1.8567, "step": 27515500 }, { "epoch": 79.65, "learning_rate": 1.0190526294460912e-05, "loss": 1.8373, "step": 27516000 }, { "epoch": 79.65, "learning_rate": 1.0189802646813636e-05, "loss": 1.848, "step": 27516500 }, { "epoch": 79.65, "learning_rate": 1.0189078999166358e-05, "loss": 1.8315, "step": 27517000 }, { "epoch": 79.65, "learning_rate": 1.018835535151908e-05, "loss": 1.8621, "step": 27517500 }, { "epoch": 79.65, "learning_rate": 1.01876331511671e-05, "loss": 1.8444, "step": 27518000 }, { "epoch": 79.65, "learning_rate": 1.0186909503519823e-05, "loss": 1.8752, "step": 27518500 }, { "epoch": 79.66, "learning_rate": 1.0186185855872546e-05, "loss": 1.8804, "step": 27519000 }, { "epoch": 79.66, "learning_rate": 1.018546220822527e-05, "loss": 1.8395, "step": 27519500 }, { "epoch": 79.66, "learning_rate": 1.0184738560577993e-05, "loss": 1.8331, "step": 27520000 }, { "epoch": 79.66, "learning_rate": 1.0184014912930716e-05, "loss": 1.855, "step": 27520500 }, { "epoch": 79.66, "learning_rate": 1.0183291265283438e-05, "loss": 1.8447, "step": 27521000 }, { "epoch": 79.66, "learning_rate": 1.0182567617636162e-05, "loss": 1.8552, "step": 27521500 }, { "epoch": 79.66, "learning_rate": 1.018184541728418e-05, "loss": 1.856, "step": 27522000 }, { "epoch": 79.67, "learning_rate": 1.0181121769636903e-05, "loss": 1.8737, "step": 27522500 }, { "epoch": 79.67, "learning_rate": 1.0180398121989625e-05, "loss": 1.8329, "step": 27523000 }, { "epoch": 79.67, "learning_rate": 1.017967447434235e-05, "loss": 1.8611, "step": 27523500 }, { "epoch": 79.67, "learning_rate": 1.0178950826695073e-05, "loss": 1.855, "step": 27524000 }, { "epoch": 79.67, "learning_rate": 1.017822862634309e-05, "loss": 1.8556, "step": 27524500 }, { "epoch": 79.67, "learning_rate": 1.0177504978695814e-05, "loss": 1.8584, "step": 27525000 }, { "epoch": 79.68, "learning_rate": 1.0176781331048537e-05, "loss": 1.8535, "step": 27525500 }, { "epoch": 79.68, "learning_rate": 1.017605768340126e-05, "loss": 1.8555, "step": 27526000 }, { "epoch": 79.68, "learning_rate": 1.0175335483049278e-05, "loss": 1.8325, "step": 27526500 }, { "epoch": 79.68, "learning_rate": 1.0174613282697295e-05, "loss": 1.824, "step": 27527000 }, { "epoch": 79.68, "learning_rate": 1.0173891082345314e-05, "loss": 1.8446, "step": 27527500 }, { "epoch": 79.68, "learning_rate": 1.0173167434698038e-05, "loss": 1.8321, "step": 27528000 }, { "epoch": 79.68, "learning_rate": 1.017244378705076e-05, "loss": 1.8499, "step": 27528500 }, { "epoch": 79.69, "learning_rate": 1.0171720139403482e-05, "loss": 1.8459, "step": 27529000 }, { "epoch": 79.69, "learning_rate": 1.0170996491756206e-05, "loss": 1.8538, "step": 27529500 }, { "epoch": 79.69, "learning_rate": 1.017027284410893e-05, "loss": 1.869, "step": 27530000 }, { "epoch": 79.69, "learning_rate": 1.0169549196461652e-05, "loss": 1.844, "step": 27530500 }, { "epoch": 79.69, "learning_rate": 1.0168825548814376e-05, "loss": 1.8496, "step": 27531000 }, { "epoch": 79.69, "learning_rate": 1.01681019011671e-05, "loss": 1.8308, "step": 27531500 }, { "epoch": 79.69, "learning_rate": 1.0167378253519822e-05, "loss": 1.8138, "step": 27532000 }, { "epoch": 79.7, "learning_rate": 1.0166654605872546e-05, "loss": 1.8764, "step": 27532500 }, { "epoch": 79.7, "learning_rate": 1.0165932405520564e-05, "loss": 1.8231, "step": 27533000 }, { "epoch": 79.7, "learning_rate": 1.0165208757873288e-05, "loss": 1.8207, "step": 27533500 }, { "epoch": 79.7, "learning_rate": 1.016448511022601e-05, "loss": 1.8212, "step": 27534000 }, { "epoch": 79.7, "learning_rate": 1.0163761462578734e-05, "loss": 1.8392, "step": 27534500 }, { "epoch": 79.7, "learning_rate": 1.0163037814931456e-05, "loss": 1.8447, "step": 27535000 }, { "epoch": 79.7, "learning_rate": 1.016231416728418e-05, "loss": 1.8646, "step": 27535500 }, { "epoch": 79.71, "learning_rate": 1.0161590519636902e-05, "loss": 1.811, "step": 27536000 }, { "epoch": 79.71, "learning_rate": 1.0160866871989626e-05, "loss": 1.8573, "step": 27536500 }, { "epoch": 79.71, "learning_rate": 1.016014322434235e-05, "loss": 1.85, "step": 27537000 }, { "epoch": 79.71, "learning_rate": 1.0159419576695072e-05, "loss": 1.8436, "step": 27537500 }, { "epoch": 79.71, "learning_rate": 1.0158695929047796e-05, "loss": 1.8533, "step": 27538000 }, { "epoch": 79.71, "learning_rate": 1.0157973728695813e-05, "loss": 1.8417, "step": 27538500 }, { "epoch": 79.71, "learning_rate": 1.0157250081048537e-05, "loss": 1.85, "step": 27539000 }, { "epoch": 79.72, "learning_rate": 1.0156527880696555e-05, "loss": 1.8493, "step": 27539500 }, { "epoch": 79.72, "learning_rate": 1.0155804233049279e-05, "loss": 1.8495, "step": 27540000 }, { "epoch": 79.72, "learning_rate": 1.0155080585402e-05, "loss": 1.8711, "step": 27540500 }, { "epoch": 79.72, "learning_rate": 1.0154356937754725e-05, "loss": 1.8593, "step": 27541000 }, { "epoch": 79.72, "learning_rate": 1.0153633290107447e-05, "loss": 1.8553, "step": 27541500 }, { "epoch": 79.72, "learning_rate": 1.0152911089755466e-05, "loss": 1.8743, "step": 27542000 }, { "epoch": 79.72, "learning_rate": 1.0152187442108188e-05, "loss": 1.8305, "step": 27542500 }, { "epoch": 79.73, "learning_rate": 1.0151463794460912e-05, "loss": 1.8713, "step": 27543000 }, { "epoch": 79.73, "learning_rate": 1.0150740146813636e-05, "loss": 1.8341, "step": 27543500 }, { "epoch": 79.73, "learning_rate": 1.0150016499166358e-05, "loss": 1.8516, "step": 27544000 }, { "epoch": 79.73, "learning_rate": 1.0149292851519082e-05, "loss": 1.8523, "step": 27544500 }, { "epoch": 79.73, "learning_rate": 1.0148569203871804e-05, "loss": 1.8532, "step": 27545000 }, { "epoch": 79.73, "learning_rate": 1.0147845556224528e-05, "loss": 1.8597, "step": 27545500 }, { "epoch": 79.73, "learning_rate": 1.014712190857725e-05, "loss": 1.8397, "step": 27546000 }, { "epoch": 79.74, "learning_rate": 1.014639970822527e-05, "loss": 1.8577, "step": 27546500 }, { "epoch": 79.74, "learning_rate": 1.0145677507873287e-05, "loss": 1.8381, "step": 27547000 }, { "epoch": 79.74, "learning_rate": 1.0144955307521304e-05, "loss": 1.8461, "step": 27547500 }, { "epoch": 79.74, "learning_rate": 1.0144231659874028e-05, "loss": 1.8476, "step": 27548000 }, { "epoch": 79.74, "learning_rate": 1.0143508012226752e-05, "loss": 1.826, "step": 27548500 }, { "epoch": 79.74, "learning_rate": 1.0142784364579474e-05, "loss": 1.8523, "step": 27549000 }, { "epoch": 79.74, "learning_rate": 1.0142060716932198e-05, "loss": 1.855, "step": 27549500 }, { "epoch": 79.75, "learning_rate": 1.014133706928492e-05, "loss": 1.8494, "step": 27550000 }, { "epoch": 79.75, "learning_rate": 1.0140614868932939e-05, "loss": 1.8483, "step": 27550500 }, { "epoch": 79.75, "learning_rate": 1.0139891221285661e-05, "loss": 1.8333, "step": 27551000 }, { "epoch": 79.75, "learning_rate": 1.0139167573638385e-05, "loss": 1.8358, "step": 27551500 }, { "epoch": 79.75, "learning_rate": 1.0138443925991108e-05, "loss": 1.8562, "step": 27552000 }, { "epoch": 79.75, "learning_rate": 1.0137720278343831e-05, "loss": 1.8504, "step": 27552500 }, { "epoch": 79.75, "learning_rate": 1.0136996630696555e-05, "loss": 1.8508, "step": 27553000 }, { "epoch": 79.76, "learning_rate": 1.0136272983049278e-05, "loss": 1.8684, "step": 27553500 }, { "epoch": 79.76, "learning_rate": 1.0135550782697297e-05, "loss": 1.862, "step": 27554000 }, { "epoch": 79.76, "learning_rate": 1.0134827135050019e-05, "loss": 1.8464, "step": 27554500 }, { "epoch": 79.76, "learning_rate": 1.0134103487402743e-05, "loss": 1.8352, "step": 27555000 }, { "epoch": 79.76, "learning_rate": 1.0133379839755465e-05, "loss": 1.8202, "step": 27555500 }, { "epoch": 79.76, "learning_rate": 1.0132656192108189e-05, "loss": 1.858, "step": 27556000 }, { "epoch": 79.76, "learning_rate": 1.0131932544460913e-05, "loss": 1.8276, "step": 27556500 }, { "epoch": 79.77, "learning_rate": 1.0131208896813635e-05, "loss": 1.8232, "step": 27557000 }, { "epoch": 79.77, "learning_rate": 1.0130485249166359e-05, "loss": 1.8423, "step": 27557500 }, { "epoch": 79.77, "learning_rate": 1.0129761601519081e-05, "loss": 1.8393, "step": 27558000 }, { "epoch": 79.77, "learning_rate": 1.0129037953871805e-05, "loss": 1.8586, "step": 27558500 }, { "epoch": 79.77, "learning_rate": 1.0128314306224527e-05, "loss": 1.858, "step": 27559000 }, { "epoch": 79.77, "learning_rate": 1.0127590658577251e-05, "loss": 1.8537, "step": 27559500 }, { "epoch": 79.77, "learning_rate": 1.0126867010929975e-05, "loss": 1.855, "step": 27560000 }, { "epoch": 79.78, "learning_rate": 1.0126146257873287e-05, "loss": 1.847, "step": 27560500 }, { "epoch": 79.78, "learning_rate": 1.012542261022601e-05, "loss": 1.866, "step": 27561000 }, { "epoch": 79.78, "learning_rate": 1.0124698962578734e-05, "loss": 1.838, "step": 27561500 }, { "epoch": 79.78, "learning_rate": 1.0123975314931458e-05, "loss": 1.8249, "step": 27562000 }, { "epoch": 79.78, "learning_rate": 1.012325166728418e-05, "loss": 1.8229, "step": 27562500 }, { "epoch": 79.78, "learning_rate": 1.0122528019636902e-05, "loss": 1.8554, "step": 27563000 }, { "epoch": 79.79, "learning_rate": 1.0121804371989626e-05, "loss": 1.8628, "step": 27563500 }, { "epoch": 79.79, "learning_rate": 1.012108072434235e-05, "loss": 1.8444, "step": 27564000 }, { "epoch": 79.79, "learning_rate": 1.0120357076695072e-05, "loss": 1.852, "step": 27564500 }, { "epoch": 79.79, "learning_rate": 1.0119633429047796e-05, "loss": 1.8666, "step": 27565000 }, { "epoch": 79.79, "learning_rate": 1.011890978140052e-05, "loss": 1.849, "step": 27565500 }, { "epoch": 79.79, "learning_rate": 1.0118187581048537e-05, "loss": 1.8424, "step": 27566000 }, { "epoch": 79.79, "learning_rate": 1.011746393340126e-05, "loss": 1.8623, "step": 27566500 }, { "epoch": 79.8, "learning_rate": 1.0116740285753983e-05, "loss": 1.835, "step": 27567000 }, { "epoch": 79.8, "learning_rate": 1.0116016638106707e-05, "loss": 1.8545, "step": 27567500 }, { "epoch": 79.8, "learning_rate": 1.011529299045943e-05, "loss": 1.8392, "step": 27568000 }, { "epoch": 79.8, "learning_rate": 1.0114569342812153e-05, "loss": 1.8257, "step": 27568500 }, { "epoch": 79.8, "learning_rate": 1.0113845695164877e-05, "loss": 1.8278, "step": 27569000 }, { "epoch": 79.8, "learning_rate": 1.01131220475176e-05, "loss": 1.8376, "step": 27569500 }, { "epoch": 79.8, "learning_rate": 1.0112399847165617e-05, "loss": 1.8625, "step": 27570000 }, { "epoch": 79.81, "learning_rate": 1.011167619951834e-05, "loss": 1.8248, "step": 27570500 }, { "epoch": 79.81, "learning_rate": 1.0110952551871065e-05, "loss": 1.8537, "step": 27571000 }, { "epoch": 79.81, "learning_rate": 1.0110228904223787e-05, "loss": 1.8522, "step": 27571500 }, { "epoch": 79.81, "learning_rate": 1.0109506703871804e-05, "loss": 1.8455, "step": 27572000 }, { "epoch": 79.81, "learning_rate": 1.0108785950815116e-05, "loss": 1.8637, "step": 27572500 }, { "epoch": 79.81, "learning_rate": 1.010806230316784e-05, "loss": 1.8748, "step": 27573000 }, { "epoch": 79.81, "learning_rate": 1.0107338655520564e-05, "loss": 1.8516, "step": 27573500 }, { "epoch": 79.82, "learning_rate": 1.0106615007873287e-05, "loss": 1.8695, "step": 27574000 }, { "epoch": 79.82, "learning_rate": 1.010589136022601e-05, "loss": 1.8726, "step": 27574500 }, { "epoch": 79.82, "learning_rate": 1.0105167712578734e-05, "loss": 1.8517, "step": 27575000 }, { "epoch": 79.82, "learning_rate": 1.0104444064931457e-05, "loss": 1.8588, "step": 27575500 }, { "epoch": 79.82, "learning_rate": 1.0103720417284179e-05, "loss": 1.8702, "step": 27576000 }, { "epoch": 79.82, "learning_rate": 1.0102996769636903e-05, "loss": 1.8348, "step": 27576500 }, { "epoch": 79.82, "learning_rate": 1.0102274569284922e-05, "loss": 1.8452, "step": 27577000 }, { "epoch": 79.83, "learning_rate": 1.0101550921637644e-05, "loss": 1.85, "step": 27577500 }, { "epoch": 79.83, "learning_rate": 1.0100827273990366e-05, "loss": 1.8807, "step": 27578000 }, { "epoch": 79.83, "learning_rate": 1.010010362634309e-05, "loss": 1.8617, "step": 27578500 }, { "epoch": 79.83, "learning_rate": 1.0099379978695814e-05, "loss": 1.8401, "step": 27579000 }, { "epoch": 79.83, "learning_rate": 1.0098656331048536e-05, "loss": 1.8615, "step": 27579500 }, { "epoch": 79.83, "learning_rate": 1.0097934130696554e-05, "loss": 1.8533, "step": 27580000 }, { "epoch": 79.83, "learning_rate": 1.009721048304928e-05, "loss": 1.8475, "step": 27580500 }, { "epoch": 79.84, "learning_rate": 1.0096486835402001e-05, "loss": 1.8462, "step": 27581000 }, { "epoch": 79.84, "learning_rate": 1.0095763187754724e-05, "loss": 1.8599, "step": 27581500 }, { "epoch": 79.84, "learning_rate": 1.0095039540107448e-05, "loss": 1.8396, "step": 27582000 }, { "epoch": 79.84, "learning_rate": 1.0094317339755467e-05, "loss": 1.8495, "step": 27582500 }, { "epoch": 79.84, "learning_rate": 1.0093593692108189e-05, "loss": 1.8641, "step": 27583000 }, { "epoch": 79.84, "learning_rate": 1.0092870044460911e-05, "loss": 1.8683, "step": 27583500 }, { "epoch": 79.84, "learning_rate": 1.0092146396813635e-05, "loss": 1.8418, "step": 27584000 }, { "epoch": 79.85, "learning_rate": 1.0091422749166359e-05, "loss": 1.8542, "step": 27584500 }, { "epoch": 79.85, "learning_rate": 1.0090699101519081e-05, "loss": 1.8458, "step": 27585000 }, { "epoch": 79.85, "learning_rate": 1.0089975453871805e-05, "loss": 1.849, "step": 27585500 }, { "epoch": 79.85, "learning_rate": 1.0089251806224529e-05, "loss": 1.8697, "step": 27586000 }, { "epoch": 79.85, "learning_rate": 1.0088528158577251e-05, "loss": 1.8377, "step": 27586500 }, { "epoch": 79.85, "learning_rate": 1.0087805958225268e-05, "loss": 1.8424, "step": 27587000 }, { "epoch": 79.85, "learning_rate": 1.0087082310577992e-05, "loss": 1.8424, "step": 27587500 }, { "epoch": 79.86, "learning_rate": 1.0086360110226011e-05, "loss": 1.84, "step": 27588000 }, { "epoch": 79.86, "learning_rate": 1.0085636462578734e-05, "loss": 1.8739, "step": 27588500 }, { "epoch": 79.86, "learning_rate": 1.0084912814931456e-05, "loss": 1.852, "step": 27589000 }, { "epoch": 79.86, "learning_rate": 1.008418916728418e-05, "loss": 1.8573, "step": 27589500 }, { "epoch": 79.86, "learning_rate": 1.0083466966932199e-05, "loss": 1.846, "step": 27590000 }, { "epoch": 79.86, "learning_rate": 1.0082743319284921e-05, "loss": 1.8368, "step": 27590500 }, { "epoch": 79.86, "learning_rate": 1.0082019671637643e-05, "loss": 1.8263, "step": 27591000 }, { "epoch": 79.87, "learning_rate": 1.0081296023990367e-05, "loss": 1.8571, "step": 27591500 }, { "epoch": 79.87, "learning_rate": 1.0080572376343091e-05, "loss": 1.8372, "step": 27592000 }, { "epoch": 79.87, "learning_rate": 1.0079848728695813e-05, "loss": 1.8443, "step": 27592500 }, { "epoch": 79.87, "learning_rate": 1.0079125081048537e-05, "loss": 1.8401, "step": 27593000 }, { "epoch": 79.87, "learning_rate": 1.0078401433401261e-05, "loss": 1.8496, "step": 27593500 }, { "epoch": 79.87, "learning_rate": 1.0077677785753983e-05, "loss": 1.8332, "step": 27594000 }, { "epoch": 79.87, "learning_rate": 1.0076955585402e-05, "loss": 1.8502, "step": 27594500 }, { "epoch": 79.88, "learning_rate": 1.0076231937754724e-05, "loss": 1.8382, "step": 27595000 }, { "epoch": 79.88, "learning_rate": 1.0075509737402743e-05, "loss": 1.8244, "step": 27595500 }, { "epoch": 79.88, "learning_rate": 1.0074786089755466e-05, "loss": 1.868, "step": 27596000 }, { "epoch": 79.88, "learning_rate": 1.0074062442108188e-05, "loss": 1.862, "step": 27596500 }, { "epoch": 79.88, "learning_rate": 1.0073338794460912e-05, "loss": 1.8565, "step": 27597000 }, { "epoch": 79.88, "learning_rate": 1.0072615146813636e-05, "loss": 1.8705, "step": 27597500 }, { "epoch": 79.88, "learning_rate": 1.0071892946461653e-05, "loss": 1.8751, "step": 27598000 }, { "epoch": 79.89, "learning_rate": 1.0071169298814375e-05, "loss": 1.8575, "step": 27598500 }, { "epoch": 79.89, "learning_rate": 1.0070445651167099e-05, "loss": 1.8484, "step": 27599000 }, { "epoch": 79.89, "learning_rate": 1.0069722003519823e-05, "loss": 1.8292, "step": 27599500 }, { "epoch": 79.89, "learning_rate": 1.006899980316784e-05, "loss": 1.849, "step": 27600000 }, { "epoch": 79.89, "learning_rate": 1.0068276155520563e-05, "loss": 1.8566, "step": 27600500 }, { "epoch": 79.89, "learning_rate": 1.0067553955168581e-05, "loss": 1.8625, "step": 27601000 }, { "epoch": 79.9, "learning_rate": 1.0066830307521305e-05, "loss": 1.8663, "step": 27601500 }, { "epoch": 79.9, "learning_rate": 1.0066106659874028e-05, "loss": 1.8825, "step": 27602000 }, { "epoch": 79.9, "learning_rate": 1.006538301222675e-05, "loss": 1.859, "step": 27602500 }, { "epoch": 79.9, "learning_rate": 1.0064659364579476e-05, "loss": 1.8622, "step": 27603000 }, { "epoch": 79.9, "learning_rate": 1.0063937164227493e-05, "loss": 1.8654, "step": 27603500 }, { "epoch": 79.9, "learning_rate": 1.0063213516580215e-05, "loss": 1.8305, "step": 27604000 }, { "epoch": 79.9, "learning_rate": 1.0062489868932937e-05, "loss": 1.8208, "step": 27604500 }, { "epoch": 79.91, "learning_rate": 1.0061766221285663e-05, "loss": 1.8239, "step": 27605000 }, { "epoch": 79.91, "learning_rate": 1.0061042573638385e-05, "loss": 1.8411, "step": 27605500 }, { "epoch": 79.91, "learning_rate": 1.0060318925991107e-05, "loss": 1.8622, "step": 27606000 }, { "epoch": 79.91, "learning_rate": 1.0059595278343833e-05, "loss": 1.836, "step": 27606500 }, { "epoch": 79.91, "learning_rate": 1.0058871630696555e-05, "loss": 1.8719, "step": 27607000 }, { "epoch": 79.91, "learning_rate": 1.0058147983049277e-05, "loss": 1.8375, "step": 27607500 }, { "epoch": 79.91, "learning_rate": 1.0057424335402001e-05, "loss": 1.8547, "step": 27608000 }, { "epoch": 79.92, "learning_rate": 1.0056700687754725e-05, "loss": 1.8508, "step": 27608500 }, { "epoch": 79.92, "learning_rate": 1.0055977040107447e-05, "loss": 1.8422, "step": 27609000 }, { "epoch": 79.92, "learning_rate": 1.005525628705076e-05, "loss": 1.8515, "step": 27609500 }, { "epoch": 79.92, "learning_rate": 1.0054532639403482e-05, "loss": 1.8648, "step": 27610000 }, { "epoch": 79.92, "learning_rate": 1.0053808991756208e-05, "loss": 1.8358, "step": 27610500 }, { "epoch": 79.92, "learning_rate": 1.005308534410893e-05, "loss": 1.8667, "step": 27611000 }, { "epoch": 79.92, "learning_rate": 1.0052361696461652e-05, "loss": 1.8784, "step": 27611500 }, { "epoch": 79.93, "learning_rate": 1.0051638048814376e-05, "loss": 1.8552, "step": 27612000 }, { "epoch": 79.93, "learning_rate": 1.0050915848462395e-05, "loss": 1.8564, "step": 27612500 }, { "epoch": 79.93, "learning_rate": 1.0050192200815117e-05, "loss": 1.8543, "step": 27613000 }, { "epoch": 79.93, "learning_rate": 1.004946855316784e-05, "loss": 1.8558, "step": 27613500 }, { "epoch": 79.93, "learning_rate": 1.0048744905520565e-05, "loss": 1.844, "step": 27614000 }, { "epoch": 79.93, "learning_rate": 1.0048022705168582e-05, "loss": 1.8369, "step": 27614500 }, { "epoch": 79.93, "learning_rate": 1.0047299057521305e-05, "loss": 1.8628, "step": 27615000 }, { "epoch": 79.94, "learning_rate": 1.0046575409874027e-05, "loss": 1.8773, "step": 27615500 }, { "epoch": 79.94, "learning_rate": 1.0045851762226752e-05, "loss": 1.886, "step": 27616000 }, { "epoch": 79.94, "learning_rate": 1.004512956187477e-05, "loss": 1.8417, "step": 27616500 }, { "epoch": 79.94, "learning_rate": 1.0044405914227492e-05, "loss": 1.8755, "step": 27617000 }, { "epoch": 79.94, "learning_rate": 1.0043682266580216e-05, "loss": 1.8695, "step": 27617500 }, { "epoch": 79.94, "learning_rate": 1.004295861893294e-05, "loss": 1.8417, "step": 27618000 }, { "epoch": 79.94, "learning_rate": 1.0042236418580957e-05, "loss": 1.8382, "step": 27618500 }, { "epoch": 79.95, "learning_rate": 1.004151277093368e-05, "loss": 1.8693, "step": 27619000 }, { "epoch": 79.95, "learning_rate": 1.0040789123286403e-05, "loss": 1.8382, "step": 27619500 }, { "epoch": 79.95, "learning_rate": 1.0040065475639127e-05, "loss": 1.8535, "step": 27620000 }, { "epoch": 79.95, "learning_rate": 1.003934182799185e-05, "loss": 1.8726, "step": 27620500 }, { "epoch": 79.95, "learning_rate": 1.0038618180344572e-05, "loss": 1.8676, "step": 27621000 }, { "epoch": 79.95, "learning_rate": 1.0037894532697297e-05, "loss": 1.8465, "step": 27621500 }, { "epoch": 79.95, "learning_rate": 1.003717088505002e-05, "loss": 1.8352, "step": 27622000 }, { "epoch": 79.96, "learning_rate": 1.0036447237402742e-05, "loss": 1.8422, "step": 27622500 }, { "epoch": 79.96, "learning_rate": 1.0035725037050759e-05, "loss": 1.8764, "step": 27623000 }, { "epoch": 79.96, "learning_rate": 1.0035001389403484e-05, "loss": 1.8679, "step": 27623500 }, { "epoch": 79.96, "learning_rate": 1.0034277741756207e-05, "loss": 1.8671, "step": 27624000 }, { "epoch": 79.96, "learning_rate": 1.0033554094108929e-05, "loss": 1.857, "step": 27624500 }, { "epoch": 79.96, "learning_rate": 1.0032830446461653e-05, "loss": 1.8229, "step": 27625000 }, { "epoch": 79.96, "learning_rate": 1.0032108246109672e-05, "loss": 1.8531, "step": 27625500 }, { "epoch": 79.97, "learning_rate": 1.0031384598462394e-05, "loss": 1.8586, "step": 27626000 }, { "epoch": 79.97, "learning_rate": 1.0030660950815116e-05, "loss": 1.8472, "step": 27626500 }, { "epoch": 79.97, "learning_rate": 1.002993730316784e-05, "loss": 1.849, "step": 27627000 }, { "epoch": 79.97, "learning_rate": 1.0029213655520564e-05, "loss": 1.8639, "step": 27627500 }, { "epoch": 79.97, "learning_rate": 1.0028490007873286e-05, "loss": 1.8642, "step": 27628000 }, { "epoch": 79.97, "learning_rate": 1.002776636022601e-05, "loss": 1.8463, "step": 27628500 }, { "epoch": 79.97, "learning_rate": 1.0027044159874028e-05, "loss": 1.8589, "step": 27629000 }, { "epoch": 79.98, "learning_rate": 1.0026320512226751e-05, "loss": 1.8557, "step": 27629500 }, { "epoch": 79.98, "learning_rate": 1.0025596864579474e-05, "loss": 1.8448, "step": 27630000 }, { "epoch": 79.98, "learning_rate": 1.0024873216932198e-05, "loss": 1.8753, "step": 27630500 }, { "epoch": 79.98, "learning_rate": 1.0024149569284922e-05, "loss": 1.8332, "step": 27631000 }, { "epoch": 79.98, "learning_rate": 1.0023427368932939e-05, "loss": 1.87, "step": 27631500 }, { "epoch": 79.98, "learning_rate": 1.0022703721285661e-05, "loss": 1.8378, "step": 27632000 }, { "epoch": 79.98, "learning_rate": 1.0021980073638385e-05, "loss": 1.85, "step": 27632500 }, { "epoch": 79.99, "learning_rate": 1.0021256425991109e-05, "loss": 1.8343, "step": 27633000 }, { "epoch": 79.99, "learning_rate": 1.0020532778343831e-05, "loss": 1.8511, "step": 27633500 }, { "epoch": 79.99, "learning_rate": 1.0019810577991848e-05, "loss": 1.8524, "step": 27634000 }, { "epoch": 79.99, "learning_rate": 1.0019086930344572e-05, "loss": 1.815, "step": 27634500 }, { "epoch": 79.99, "learning_rate": 1.0018363282697296e-05, "loss": 1.8743, "step": 27635000 }, { "epoch": 79.99, "learning_rate": 1.0017641082345313e-05, "loss": 1.82, "step": 27635500 }, { "epoch": 79.99, "learning_rate": 1.0016917434698036e-05, "loss": 1.8466, "step": 27636000 }, { "epoch": 80.0, "learning_rate": 1.001619378705076e-05, "loss": 1.8305, "step": 27636500 }, { "epoch": 80.0, "learning_rate": 1.0015470139403484e-05, "loss": 1.8676, "step": 27637000 }, { "epoch": 80.0, "learning_rate": 1.0014746491756206e-05, "loss": 1.8431, "step": 27637500 }, { "epoch": 80.0, "eval_accuracy": 0.6860712124704662, "eval_accuracy_mlm": 0.6545437918305845, "eval_accuracy_nsp": 0.8551351326577846, "eval_loss": 2.1838855743408203, "eval_runtime": 332.1676, "eval_samples_per_second": 1313.752, "eval_steps_per_second": 54.74, "step": 27637760 }, { "epoch": 80.0, "learning_rate": 1.001402284410893e-05, "loss": 1.8644, "step": 27638000 }, { "epoch": 80.0, "learning_rate": 1.0013299196461654e-05, "loss": 1.8433, "step": 27638500 }, { "epoch": 80.0, "learning_rate": 1.0012576996109671e-05, "loss": 1.871, "step": 27639000 }, { "epoch": 80.01, "learning_rate": 1.0011853348462393e-05, "loss": 1.827, "step": 27639500 }, { "epoch": 80.01, "learning_rate": 1.0011129700815117e-05, "loss": 1.8403, "step": 27640000 }, { "epoch": 80.01, "learning_rate": 1.0010406053167841e-05, "loss": 1.855, "step": 27640500 }, { "epoch": 80.01, "learning_rate": 1.0009682405520563e-05, "loss": 1.8528, "step": 27641000 }, { "epoch": 80.01, "learning_rate": 1.0008958757873287e-05, "loss": 1.8581, "step": 27641500 }, { "epoch": 80.01, "learning_rate": 1.0008235110226011e-05, "loss": 1.8319, "step": 27642000 }, { "epoch": 80.01, "learning_rate": 1.0007511462578733e-05, "loss": 1.8088, "step": 27642500 }, { "epoch": 80.02, "learning_rate": 1.0006790709522046e-05, "loss": 1.8774, "step": 27643000 }, { "epoch": 80.02, "learning_rate": 1.000606706187477e-05, "loss": 1.8237, "step": 27643500 }, { "epoch": 80.02, "learning_rate": 1.0005343414227492e-05, "loss": 1.832, "step": 27644000 }, { "epoch": 80.02, "learning_rate": 1.0004619766580216e-05, "loss": 1.8556, "step": 27644500 }, { "epoch": 80.02, "learning_rate": 1.0003896118932938e-05, "loss": 1.8505, "step": 27645000 }, { "epoch": 80.02, "learning_rate": 1.0003172471285662e-05, "loss": 1.8303, "step": 27645500 }, { "epoch": 80.02, "learning_rate": 1.0002448823638386e-05, "loss": 1.8412, "step": 27646000 }, { "epoch": 80.03, "learning_rate": 1.0001725175991108e-05, "loss": 1.8324, "step": 27646500 }, { "epoch": 80.03, "learning_rate": 1.0001001528343832e-05, "loss": 1.8264, "step": 27647000 }, { "epoch": 80.03, "learning_rate": 1.0000277880696554e-05, "loss": 1.8437, "step": 27647500 }, { "epoch": 80.03, "learning_rate": 9.999554233049278e-06, "loss": 1.8318, "step": 27648000 }, { "epoch": 80.03, "learning_rate": 9.998832032697295e-06, "loss": 1.8657, "step": 27648500 }, { "epoch": 80.03, "learning_rate": 9.99810838505002e-06, "loss": 1.8668, "step": 27649000 }, { "epoch": 80.03, "learning_rate": 9.997384737402742e-06, "loss": 1.8192, "step": 27649500 }, { "epoch": 80.04, "learning_rate": 9.996661089755465e-06, "loss": 1.8476, "step": 27650000 }, { "epoch": 80.04, "learning_rate": 9.995938889403483e-06, "loss": 1.8293, "step": 27650500 }, { "epoch": 80.04, "learning_rate": 9.995215241756207e-06, "loss": 1.8371, "step": 27651000 }, { "epoch": 80.04, "learning_rate": 9.99449159410893e-06, "loss": 1.831, "step": 27651500 }, { "epoch": 80.04, "learning_rate": 9.993769393756948e-06, "loss": 1.8493, "step": 27652000 }, { "epoch": 80.04, "learning_rate": 9.99304574610967e-06, "loss": 1.8303, "step": 27652500 }, { "epoch": 80.04, "learning_rate": 9.992322098462394e-06, "loss": 1.8191, "step": 27653000 }, { "epoch": 80.05, "learning_rate": 9.991598450815118e-06, "loss": 1.8201, "step": 27653500 }, { "epoch": 80.05, "learning_rate": 9.99087480316784e-06, "loss": 1.8283, "step": 27654000 }, { "epoch": 80.05, "learning_rate": 9.990151155520564e-06, "loss": 1.8234, "step": 27654500 }, { "epoch": 80.05, "learning_rate": 9.989427507873286e-06, "loss": 1.8271, "step": 27655000 }, { "epoch": 80.05, "learning_rate": 9.98870386022601e-06, "loss": 1.8273, "step": 27655500 }, { "epoch": 80.05, "learning_rate": 9.987980212578734e-06, "loss": 1.8282, "step": 27656000 }, { "epoch": 80.05, "learning_rate": 9.987258012226751e-06, "loss": 1.8412, "step": 27656500 }, { "epoch": 80.06, "learning_rate": 9.986534364579474e-06, "loss": 1.8249, "step": 27657000 }, { "epoch": 80.06, "learning_rate": 9.985810716932198e-06, "loss": 1.8391, "step": 27657500 }, { "epoch": 80.06, "learning_rate": 9.985087069284921e-06, "loss": 1.8461, "step": 27658000 }, { "epoch": 80.06, "learning_rate": 9.984363421637644e-06, "loss": 1.844, "step": 27658500 }, { "epoch": 80.06, "learning_rate": 9.983639773990368e-06, "loss": 1.8462, "step": 27659000 }, { "epoch": 80.06, "learning_rate": 9.98291612634309e-06, "loss": 1.8083, "step": 27659500 }, { "epoch": 80.06, "learning_rate": 9.982193925991109e-06, "loss": 1.8099, "step": 27660000 }, { "epoch": 80.07, "learning_rate": 9.981470278343831e-06, "loss": 1.8519, "step": 27660500 }, { "epoch": 80.07, "learning_rate": 9.98074807799185e-06, "loss": 1.8394, "step": 27661000 }, { "epoch": 80.07, "learning_rate": 9.980024430344572e-06, "loss": 1.8224, "step": 27661500 }, { "epoch": 80.07, "learning_rate": 9.979300782697296e-06, "loss": 1.8759, "step": 27662000 }, { "epoch": 80.07, "learning_rate": 9.978577135050018e-06, "loss": 1.8483, "step": 27662500 }, { "epoch": 80.07, "learning_rate": 9.977853487402742e-06, "loss": 1.8401, "step": 27663000 }, { "epoch": 80.07, "learning_rate": 9.977129839755466e-06, "loss": 1.8655, "step": 27663500 }, { "epoch": 80.08, "learning_rate": 9.976406192108188e-06, "loss": 1.8436, "step": 27664000 }, { "epoch": 80.08, "learning_rate": 9.975682544460912e-06, "loss": 1.8348, "step": 27664500 }, { "epoch": 80.08, "learning_rate": 9.974958896813635e-06, "loss": 1.8263, "step": 27665000 }, { "epoch": 80.08, "learning_rate": 9.974236696461654e-06, "loss": 1.8617, "step": 27665500 }, { "epoch": 80.08, "learning_rate": 9.973515943404966e-06, "loss": 1.8325, "step": 27666000 }, { "epoch": 80.08, "learning_rate": 9.972792295757688e-06, "loss": 1.8528, "step": 27666500 }, { "epoch": 80.08, "learning_rate": 9.972068648110412e-06, "loss": 1.8486, "step": 27667000 }, { "epoch": 80.09, "learning_rate": 9.971345000463136e-06, "loss": 1.8331, "step": 27667500 }, { "epoch": 80.09, "learning_rate": 9.970621352815858e-06, "loss": 1.8281, "step": 27668000 }, { "epoch": 80.09, "learning_rate": 9.969897705168582e-06, "loss": 1.8552, "step": 27668500 }, { "epoch": 80.09, "learning_rate": 9.969174057521304e-06, "loss": 1.8315, "step": 27669000 }, { "epoch": 80.09, "learning_rate": 9.968450409874028e-06, "loss": 1.8378, "step": 27669500 }, { "epoch": 80.09, "learning_rate": 9.96772676222675e-06, "loss": 1.7938, "step": 27670000 }, { "epoch": 80.09, "learning_rate": 9.967003114579474e-06, "loss": 1.8326, "step": 27670500 }, { "epoch": 80.1, "learning_rate": 9.966279466932198e-06, "loss": 1.8635, "step": 27671000 }, { "epoch": 80.1, "learning_rate": 9.96555581928492e-06, "loss": 1.8402, "step": 27671500 }, { "epoch": 80.1, "learning_rate": 9.964832171637644e-06, "loss": 1.8225, "step": 27672000 }, { "epoch": 80.1, "learning_rate": 9.964108523990367e-06, "loss": 1.8161, "step": 27672500 }, { "epoch": 80.1, "learning_rate": 9.96338487634309e-06, "loss": 1.8316, "step": 27673000 }, { "epoch": 80.1, "learning_rate": 9.962661228695813e-06, "loss": 1.847, "step": 27673500 }, { "epoch": 80.1, "learning_rate": 9.961939028343832e-06, "loss": 1.8424, "step": 27674000 }, { "epoch": 80.11, "learning_rate": 9.961215380696556e-06, "loss": 1.8444, "step": 27674500 }, { "epoch": 80.11, "learning_rate": 9.960491733049278e-06, "loss": 1.8479, "step": 27675000 }, { "epoch": 80.11, "learning_rate": 9.959768085402e-06, "loss": 1.8371, "step": 27675500 }, { "epoch": 80.11, "learning_rate": 9.959044437754724e-06, "loss": 1.8227, "step": 27676000 }, { "epoch": 80.11, "learning_rate": 9.958322237402743e-06, "loss": 1.8427, "step": 27676500 }, { "epoch": 80.11, "learning_rate": 9.957598589755465e-06, "loss": 1.8406, "step": 27677000 }, { "epoch": 80.12, "learning_rate": 9.956874942108188e-06, "loss": 1.8354, "step": 27677500 }, { "epoch": 80.12, "learning_rate": 9.956152741756207e-06, "loss": 1.8422, "step": 27678000 }, { "epoch": 80.12, "learning_rate": 9.95542909410893e-06, "loss": 1.8564, "step": 27678500 }, { "epoch": 80.12, "learning_rate": 9.954705446461653e-06, "loss": 1.8405, "step": 27679000 }, { "epoch": 80.12, "learning_rate": 9.953981798814375e-06, "loss": 1.8881, "step": 27679500 }, { "epoch": 80.12, "learning_rate": 9.9532581511671e-06, "loss": 1.8583, "step": 27680000 }, { "epoch": 80.12, "learning_rate": 9.952534503519823e-06, "loss": 1.8551, "step": 27680500 }, { "epoch": 80.13, "learning_rate": 9.951810855872545e-06, "loss": 1.8598, "step": 27681000 }, { "epoch": 80.13, "learning_rate": 9.951087208225269e-06, "loss": 1.8564, "step": 27681500 }, { "epoch": 80.13, "learning_rate": 9.950363560577993e-06, "loss": 1.833, "step": 27682000 }, { "epoch": 80.13, "learning_rate": 9.94964136022601e-06, "loss": 1.8056, "step": 27682500 }, { "epoch": 80.13, "learning_rate": 9.948917712578732e-06, "loss": 1.8523, "step": 27683000 }, { "epoch": 80.13, "learning_rate": 9.948194064931456e-06, "loss": 1.8411, "step": 27683500 }, { "epoch": 80.13, "learning_rate": 9.94747041728418e-06, "loss": 1.8393, "step": 27684000 }, { "epoch": 80.14, "learning_rate": 9.946746769636902e-06, "loss": 1.8126, "step": 27684500 }, { "epoch": 80.14, "learning_rate": 9.94602456928492e-06, "loss": 1.8354, "step": 27685000 }, { "epoch": 80.14, "learning_rate": 9.945302368932939e-06, "loss": 1.8283, "step": 27685500 }, { "epoch": 80.14, "learning_rate": 9.944578721285663e-06, "loss": 1.8531, "step": 27686000 }, { "epoch": 80.14, "learning_rate": 9.943855073638385e-06, "loss": 1.838, "step": 27686500 }, { "epoch": 80.14, "learning_rate": 9.943131425991107e-06, "loss": 1.8059, "step": 27687000 }, { "epoch": 80.14, "learning_rate": 9.942407778343833e-06, "loss": 1.8481, "step": 27687500 }, { "epoch": 80.15, "learning_rate": 9.941684130696555e-06, "loss": 1.8286, "step": 27688000 }, { "epoch": 80.15, "learning_rate": 9.940961930344572e-06, "loss": 1.8287, "step": 27688500 }, { "epoch": 80.15, "learning_rate": 9.940238282697296e-06, "loss": 1.8459, "step": 27689000 }, { "epoch": 80.15, "learning_rate": 9.93951463505002e-06, "loss": 1.8324, "step": 27689500 }, { "epoch": 80.15, "learning_rate": 9.938790987402742e-06, "loss": 1.8388, "step": 27690000 }, { "epoch": 80.15, "learning_rate": 9.938067339755464e-06, "loss": 1.8392, "step": 27690500 }, { "epoch": 80.15, "learning_rate": 9.937343692108188e-06, "loss": 1.8476, "step": 27691000 }, { "epoch": 80.16, "learning_rate": 9.936620044460912e-06, "loss": 1.8404, "step": 27691500 }, { "epoch": 80.16, "learning_rate": 9.935896396813635e-06, "loss": 1.848, "step": 27692000 }, { "epoch": 80.16, "learning_rate": 9.935172749166358e-06, "loss": 1.8525, "step": 27692500 }, { "epoch": 80.16, "learning_rate": 9.934449101519082e-06, "loss": 1.8608, "step": 27693000 }, { "epoch": 80.16, "learning_rate": 9.933725453871805e-06, "loss": 1.8352, "step": 27693500 }, { "epoch": 80.16, "learning_rate": 9.933003253519822e-06, "loss": 1.8317, "step": 27694000 }, { "epoch": 80.16, "learning_rate": 9.932279605872546e-06, "loss": 1.8476, "step": 27694500 }, { "epoch": 80.17, "learning_rate": 9.93155595822527e-06, "loss": 1.8491, "step": 27695000 }, { "epoch": 80.17, "learning_rate": 9.930832310577992e-06, "loss": 1.8512, "step": 27695500 }, { "epoch": 80.17, "learning_rate": 9.93011011022601e-06, "loss": 1.8225, "step": 27696000 }, { "epoch": 80.17, "learning_rate": 9.929387909874026e-06, "loss": 1.8126, "step": 27696500 }, { "epoch": 80.17, "learning_rate": 9.928664262226752e-06, "loss": 1.8645, "step": 27697000 }, { "epoch": 80.17, "learning_rate": 9.927940614579474e-06, "loss": 1.836, "step": 27697500 }, { "epoch": 80.17, "learning_rate": 9.927216966932197e-06, "loss": 1.8289, "step": 27698000 }, { "epoch": 80.18, "learning_rate": 9.926494766580215e-06, "loss": 1.8454, "step": 27698500 }, { "epoch": 80.18, "learning_rate": 9.92577111893294e-06, "loss": 1.8273, "step": 27699000 }, { "epoch": 80.18, "learning_rate": 9.925047471285662e-06, "loss": 1.8465, "step": 27699500 }, { "epoch": 80.18, "learning_rate": 9.924323823638384e-06, "loss": 1.8584, "step": 27700000 }, { "epoch": 80.18, "learning_rate": 9.92360017599111e-06, "loss": 1.8345, "step": 27700500 }, { "epoch": 80.18, "learning_rate": 9.922876528343832e-06, "loss": 1.848, "step": 27701000 }, { "epoch": 80.18, "learning_rate": 9.922152880696554e-06, "loss": 1.8364, "step": 27701500 }, { "epoch": 80.19, "learning_rate": 9.921430680344571e-06, "loss": 1.8502, "step": 27702000 }, { "epoch": 80.19, "learning_rate": 9.920707032697297e-06, "loss": 1.8316, "step": 27702500 }, { "epoch": 80.19, "learning_rate": 9.919983385050019e-06, "loss": 1.8334, "step": 27703000 }, { "epoch": 80.19, "learning_rate": 9.919259737402741e-06, "loss": 1.8236, "step": 27703500 }, { "epoch": 80.19, "learning_rate": 9.918536089755465e-06, "loss": 1.8082, "step": 27704000 }, { "epoch": 80.19, "learning_rate": 9.91781244210819e-06, "loss": 1.8742, "step": 27704500 }, { "epoch": 80.19, "learning_rate": 9.917088794460911e-06, "loss": 1.8607, "step": 27705000 }, { "epoch": 80.2, "learning_rate": 9.916365146813635e-06, "loss": 1.8531, "step": 27705500 }, { "epoch": 80.2, "learning_rate": 9.91564149916636e-06, "loss": 1.8238, "step": 27706000 }, { "epoch": 80.2, "learning_rate": 9.914917851519081e-06, "loss": 1.8489, "step": 27706500 }, { "epoch": 80.2, "learning_rate": 9.914197098462394e-06, "loss": 1.8319, "step": 27707000 }, { "epoch": 80.2, "learning_rate": 9.913473450815116e-06, "loss": 1.8456, "step": 27707500 }, { "epoch": 80.2, "learning_rate": 9.912751250463135e-06, "loss": 1.8179, "step": 27708000 }, { "epoch": 80.2, "learning_rate": 9.912027602815859e-06, "loss": 1.8676, "step": 27708500 }, { "epoch": 80.21, "learning_rate": 9.911303955168581e-06, "loss": 1.8349, "step": 27709000 }, { "epoch": 80.21, "learning_rate": 9.910580307521303e-06, "loss": 1.8276, "step": 27709500 }, { "epoch": 80.21, "learning_rate": 9.909856659874029e-06, "loss": 1.842, "step": 27710000 }, { "epoch": 80.21, "learning_rate": 9.909133012226751e-06, "loss": 1.8157, "step": 27710500 }, { "epoch": 80.21, "learning_rate": 9.908409364579473e-06, "loss": 1.8286, "step": 27711000 }, { "epoch": 80.21, "learning_rate": 9.907685716932199e-06, "loss": 1.8276, "step": 27711500 }, { "epoch": 80.21, "learning_rate": 9.906962069284921e-06, "loss": 1.8279, "step": 27712000 }, { "epoch": 80.22, "learning_rate": 9.906238421637643e-06, "loss": 1.8344, "step": 27712500 }, { "epoch": 80.22, "learning_rate": 9.905514773990367e-06, "loss": 1.8435, "step": 27713000 }, { "epoch": 80.22, "learning_rate": 9.904792573638386e-06, "loss": 1.8341, "step": 27713500 }, { "epoch": 80.22, "learning_rate": 9.904068925991109e-06, "loss": 1.8589, "step": 27714000 }, { "epoch": 80.22, "learning_rate": 9.90334527834383e-06, "loss": 1.8543, "step": 27714500 }, { "epoch": 80.22, "learning_rate": 9.902621630696555e-06, "loss": 1.8243, "step": 27715000 }, { "epoch": 80.23, "learning_rate": 9.901897983049279e-06, "loss": 1.833, "step": 27715500 }, { "epoch": 80.23, "learning_rate": 9.901175782697296e-06, "loss": 1.8207, "step": 27716000 }, { "epoch": 80.23, "learning_rate": 9.900453582345313e-06, "loss": 1.8528, "step": 27716500 }, { "epoch": 80.23, "learning_rate": 9.899729934698037e-06, "loss": 1.8492, "step": 27717000 }, { "epoch": 80.23, "learning_rate": 9.899006287050761e-06, "loss": 1.8176, "step": 27717500 }, { "epoch": 80.23, "learning_rate": 9.898282639403483e-06, "loss": 1.8301, "step": 27718000 }, { "epoch": 80.23, "learning_rate": 9.897558991756206e-06, "loss": 1.8282, "step": 27718500 }, { "epoch": 80.24, "learning_rate": 9.896835344108931e-06, "loss": 1.8508, "step": 27719000 }, { "epoch": 80.24, "learning_rate": 9.896111696461653e-06, "loss": 1.8552, "step": 27719500 }, { "epoch": 80.24, "learning_rate": 9.895388048814376e-06, "loss": 1.8319, "step": 27720000 }, { "epoch": 80.24, "learning_rate": 9.8946644011671e-06, "loss": 1.8362, "step": 27720500 }, { "epoch": 80.24, "learning_rate": 9.893940753519823e-06, "loss": 1.8694, "step": 27721000 }, { "epoch": 80.24, "learning_rate": 9.893217105872546e-06, "loss": 1.8572, "step": 27721500 }, { "epoch": 80.24, "learning_rate": 9.892493458225268e-06, "loss": 1.8486, "step": 27722000 }, { "epoch": 80.25, "learning_rate": 9.891771257873287e-06, "loss": 1.8383, "step": 27722500 }, { "epoch": 80.25, "learning_rate": 9.89104761022601e-06, "loss": 1.8415, "step": 27723000 }, { "epoch": 80.25, "learning_rate": 9.890323962578733e-06, "loss": 1.8146, "step": 27723500 }, { "epoch": 80.25, "learning_rate": 9.889600314931457e-06, "loss": 1.8331, "step": 27724000 }, { "epoch": 80.25, "learning_rate": 9.888876667284181e-06, "loss": 1.8371, "step": 27724500 }, { "epoch": 80.25, "learning_rate": 9.888154466932198e-06, "loss": 1.8412, "step": 27725000 }, { "epoch": 80.25, "learning_rate": 9.887432266580215e-06, "loss": 1.8402, "step": 27725500 }, { "epoch": 80.26, "learning_rate": 9.886708618932938e-06, "loss": 1.8432, "step": 27726000 }, { "epoch": 80.26, "learning_rate": 9.885984971285662e-06, "loss": 1.8525, "step": 27726500 }, { "epoch": 80.26, "learning_rate": 9.885261323638385e-06, "loss": 1.8552, "step": 27727000 }, { "epoch": 80.26, "learning_rate": 9.884537675991108e-06, "loss": 1.8454, "step": 27727500 }, { "epoch": 80.26, "learning_rate": 9.883814028343832e-06, "loss": 1.8522, "step": 27728000 }, { "epoch": 80.26, "learning_rate": 9.883090380696556e-06, "loss": 1.8556, "step": 27728500 }, { "epoch": 80.26, "learning_rate": 9.882366733049278e-06, "loss": 1.8425, "step": 27729000 }, { "epoch": 80.27, "learning_rate": 9.881643085402002e-06, "loss": 1.8569, "step": 27729500 }, { "epoch": 80.27, "learning_rate": 9.880919437754726e-06, "loss": 1.8493, "step": 27730000 }, { "epoch": 80.27, "learning_rate": 9.880195790107448e-06, "loss": 1.8319, "step": 27730500 }, { "epoch": 80.27, "learning_rate": 9.87947214246017e-06, "loss": 1.8534, "step": 27731000 }, { "epoch": 80.27, "learning_rate": 9.878748494812894e-06, "loss": 1.8218, "step": 27731500 }, { "epoch": 80.27, "learning_rate": 9.878024847165618e-06, "loss": 1.8522, "step": 27732000 }, { "epoch": 80.27, "learning_rate": 9.877302646813635e-06, "loss": 1.8305, "step": 27732500 }, { "epoch": 80.28, "learning_rate": 9.876578999166357e-06, "loss": 1.8361, "step": 27733000 }, { "epoch": 80.28, "learning_rate": 9.875856798814376e-06, "loss": 1.8653, "step": 27733500 }, { "epoch": 80.28, "learning_rate": 9.875134598462394e-06, "loss": 1.8277, "step": 27734000 }, { "epoch": 80.28, "learning_rate": 9.874410950815118e-06, "loss": 1.8207, "step": 27734500 }, { "epoch": 80.28, "learning_rate": 9.87368730316784e-06, "loss": 1.848, "step": 27735000 }, { "epoch": 80.28, "learning_rate": 9.872963655520564e-06, "loss": 1.8562, "step": 27735500 }, { "epoch": 80.28, "learning_rate": 9.872240007873288e-06, "loss": 1.8197, "step": 27736000 }, { "epoch": 80.29, "learning_rate": 9.87151636022601e-06, "loss": 1.838, "step": 27736500 }, { "epoch": 80.29, "learning_rate": 9.870792712578734e-06, "loss": 1.817, "step": 27737000 }, { "epoch": 80.29, "learning_rate": 9.870069064931456e-06, "loss": 1.8602, "step": 27737500 }, { "epoch": 80.29, "learning_rate": 9.86934541728418e-06, "loss": 1.8545, "step": 27738000 }, { "epoch": 80.29, "learning_rate": 9.868623216932197e-06, "loss": 1.8344, "step": 27738500 }, { "epoch": 80.29, "learning_rate": 9.867899569284921e-06, "loss": 1.8583, "step": 27739000 }, { "epoch": 80.29, "learning_rate": 9.867175921637645e-06, "loss": 1.8368, "step": 27739500 }, { "epoch": 80.3, "learning_rate": 9.866452273990367e-06, "loss": 1.8144, "step": 27740000 }, { "epoch": 80.3, "learning_rate": 9.86572862634309e-06, "loss": 1.8418, "step": 27740500 }, { "epoch": 80.3, "learning_rate": 9.865004978695813e-06, "loss": 1.8499, "step": 27741000 }, { "epoch": 80.3, "learning_rate": 9.864281331048537e-06, "loss": 1.8573, "step": 27741500 }, { "epoch": 80.3, "learning_rate": 9.86355768340126e-06, "loss": 1.8552, "step": 27742000 }, { "epoch": 80.3, "learning_rate": 9.862835483049279e-06, "loss": 1.8493, "step": 27742500 }, { "epoch": 80.3, "learning_rate": 9.862111835402e-06, "loss": 1.8315, "step": 27743000 }, { "epoch": 80.31, "learning_rate": 9.861388187754725e-06, "loss": 1.832, "step": 27743500 }, { "epoch": 80.31, "learning_rate": 9.860664540107447e-06, "loss": 1.8466, "step": 27744000 }, { "epoch": 80.31, "learning_rate": 9.859940892460171e-06, "loss": 1.8625, "step": 27744500 }, { "epoch": 80.31, "learning_rate": 9.859218692108188e-06, "loss": 1.8634, "step": 27745000 }, { "epoch": 80.31, "learning_rate": 9.858495044460912e-06, "loss": 1.865, "step": 27745500 }, { "epoch": 80.31, "learning_rate": 9.857771396813634e-06, "loss": 1.8415, "step": 27746000 }, { "epoch": 80.31, "learning_rate": 9.857047749166358e-06, "loss": 1.834, "step": 27746500 }, { "epoch": 80.32, "learning_rate": 9.856325548814375e-06, "loss": 1.8384, "step": 27747000 }, { "epoch": 80.32, "learning_rate": 9.8556019011671e-06, "loss": 1.8221, "step": 27747500 }, { "epoch": 80.32, "learning_rate": 9.854878253519823e-06, "loss": 1.8288, "step": 27748000 }, { "epoch": 80.32, "learning_rate": 9.854154605872546e-06, "loss": 1.8634, "step": 27748500 }, { "epoch": 80.32, "learning_rate": 9.85343095822527e-06, "loss": 1.8581, "step": 27749000 }, { "epoch": 80.32, "learning_rate": 9.852707310577992e-06, "loss": 1.825, "step": 27749500 }, { "epoch": 80.32, "learning_rate": 9.85198511022601e-06, "loss": 1.8421, "step": 27750000 }, { "epoch": 80.33, "learning_rate": 9.851261462578733e-06, "loss": 1.8254, "step": 27750500 }, { "epoch": 80.33, "learning_rate": 9.850537814931457e-06, "loss": 1.8395, "step": 27751000 }, { "epoch": 80.33, "learning_rate": 9.849814167284179e-06, "loss": 1.8505, "step": 27751500 }, { "epoch": 80.33, "learning_rate": 9.849090519636903e-06, "loss": 1.8581, "step": 27752000 }, { "epoch": 80.33, "learning_rate": 9.848366871989627e-06, "loss": 1.8245, "step": 27752500 }, { "epoch": 80.33, "learning_rate": 9.847644671637644e-06, "loss": 1.8591, "step": 27753000 }, { "epoch": 80.34, "learning_rate": 9.846921023990366e-06, "loss": 1.8287, "step": 27753500 }, { "epoch": 80.34, "learning_rate": 9.84619737634309e-06, "loss": 1.8353, "step": 27754000 }, { "epoch": 80.34, "learning_rate": 9.845475175991108e-06, "loss": 1.848, "step": 27754500 }, { "epoch": 80.34, "learning_rate": 9.844751528343832e-06, "loss": 1.8321, "step": 27755000 }, { "epoch": 80.34, "learning_rate": 9.844027880696555e-06, "loss": 1.824, "step": 27755500 }, { "epoch": 80.34, "learning_rate": 9.843304233049278e-06, "loss": 1.8427, "step": 27756000 }, { "epoch": 80.34, "learning_rate": 9.842580585402002e-06, "loss": 1.8296, "step": 27756500 }, { "epoch": 80.35, "learning_rate": 9.841858385050019e-06, "loss": 1.8471, "step": 27757000 }, { "epoch": 80.35, "learning_rate": 9.841136184698036e-06, "loss": 1.8508, "step": 27757500 }, { "epoch": 80.35, "learning_rate": 9.84041253705076e-06, "loss": 1.808, "step": 27758000 }, { "epoch": 80.35, "learning_rate": 9.839690336698777e-06, "loss": 1.8495, "step": 27758500 }, { "epoch": 80.35, "learning_rate": 9.838966689051501e-06, "loss": 1.8316, "step": 27759000 }, { "epoch": 80.35, "learning_rate": 9.838243041404223e-06, "loss": 1.8486, "step": 27759500 }, { "epoch": 80.35, "learning_rate": 9.837519393756947e-06, "loss": 1.8566, "step": 27760000 }, { "epoch": 80.36, "learning_rate": 9.836795746109671e-06, "loss": 1.829, "step": 27760500 }, { "epoch": 80.36, "learning_rate": 9.836072098462394e-06, "loss": 1.8525, "step": 27761000 }, { "epoch": 80.36, "learning_rate": 9.835348450815117e-06, "loss": 1.8342, "step": 27761500 }, { "epoch": 80.36, "learning_rate": 9.83462480316784e-06, "loss": 1.8395, "step": 27762000 }, { "epoch": 80.36, "learning_rate": 9.833901155520564e-06, "loss": 1.8469, "step": 27762500 }, { "epoch": 80.36, "learning_rate": 9.833177507873288e-06, "loss": 1.8406, "step": 27763000 }, { "epoch": 80.36, "learning_rate": 9.83245386022601e-06, "loss": 1.8413, "step": 27763500 }, { "epoch": 80.37, "learning_rate": 9.831730212578734e-06, "loss": 1.8365, "step": 27764000 }, { "epoch": 80.37, "learning_rate": 9.831006564931456e-06, "loss": 1.8251, "step": 27764500 }, { "epoch": 80.37, "learning_rate": 9.83028291728418e-06, "loss": 1.8527, "step": 27765000 }, { "epoch": 80.37, "learning_rate": 9.829559269636902e-06, "loss": 1.8434, "step": 27765500 }, { "epoch": 80.37, "learning_rate": 9.828835621989626e-06, "loss": 1.8452, "step": 27766000 }, { "epoch": 80.37, "learning_rate": 9.828113421637645e-06, "loss": 1.8441, "step": 27766500 }, { "epoch": 80.37, "learning_rate": 9.827389773990367e-06, "loss": 1.8945, "step": 27767000 }, { "epoch": 80.38, "learning_rate": 9.826666126343091e-06, "loss": 1.8237, "step": 27767500 }, { "epoch": 80.38, "learning_rate": 9.825942478695813e-06, "loss": 1.8551, "step": 27768000 }, { "epoch": 80.38, "learning_rate": 9.825218831048537e-06, "loss": 1.8562, "step": 27768500 }, { "epoch": 80.38, "learning_rate": 9.82449518340126e-06, "loss": 1.8369, "step": 27769000 }, { "epoch": 80.38, "learning_rate": 9.823771535753983e-06, "loss": 1.8473, "step": 27769500 }, { "epoch": 80.38, "learning_rate": 9.823047888106707e-06, "loss": 1.8479, "step": 27770000 }, { "epoch": 80.38, "learning_rate": 9.822325687754725e-06, "loss": 1.8459, "step": 27770500 }, { "epoch": 80.39, "learning_rate": 9.821602040107447e-06, "loss": 1.8475, "step": 27771000 }, { "epoch": 80.39, "learning_rate": 9.82087839246017e-06, "loss": 1.8527, "step": 27771500 }, { "epoch": 80.39, "learning_rate": 9.820154744812895e-06, "loss": 1.8347, "step": 27772000 }, { "epoch": 80.39, "learning_rate": 9.819431097165617e-06, "loss": 1.8381, "step": 27772500 }, { "epoch": 80.39, "learning_rate": 9.818707449518341e-06, "loss": 1.8644, "step": 27773000 }, { "epoch": 80.39, "learning_rate": 9.817983801871065e-06, "loss": 1.8709, "step": 27773500 }, { "epoch": 80.39, "learning_rate": 9.817261601519082e-06, "loss": 1.8426, "step": 27774000 }, { "epoch": 80.4, "learning_rate": 9.816537953871804e-06, "loss": 1.8635, "step": 27774500 }, { "epoch": 80.4, "learning_rate": 9.815814306224528e-06, "loss": 1.8244, "step": 27775000 }, { "epoch": 80.4, "learning_rate": 9.815092105872545e-06, "loss": 1.8319, "step": 27775500 }, { "epoch": 80.4, "learning_rate": 9.81436845822527e-06, "loss": 1.8683, "step": 27776000 }, { "epoch": 80.4, "learning_rate": 9.813644810577992e-06, "loss": 1.8195, "step": 27776500 }, { "epoch": 80.4, "learning_rate": 9.812921162930716e-06, "loss": 1.8611, "step": 27777000 }, { "epoch": 80.4, "learning_rate": 9.81219751528344e-06, "loss": 1.8574, "step": 27777500 }, { "epoch": 80.41, "learning_rate": 9.811473867636162e-06, "loss": 1.8549, "step": 27778000 }, { "epoch": 80.41, "learning_rate": 9.810750219988884e-06, "loss": 1.8376, "step": 27778500 }, { "epoch": 80.41, "learning_rate": 9.81002657234161e-06, "loss": 1.8269, "step": 27779000 }, { "epoch": 80.41, "learning_rate": 9.809304371989627e-06, "loss": 1.8703, "step": 27779500 }, { "epoch": 80.41, "learning_rate": 9.808580724342349e-06, "loss": 1.8423, "step": 27780000 }, { "epoch": 80.41, "learning_rate": 9.807857076695073e-06, "loss": 1.8409, "step": 27780500 }, { "epoch": 80.41, "learning_rate": 9.807133429047797e-06, "loss": 1.8481, "step": 27781000 }, { "epoch": 80.42, "learning_rate": 9.80640978140052e-06, "loss": 1.8414, "step": 27781500 }, { "epoch": 80.42, "learning_rate": 9.805686133753241e-06, "loss": 1.8321, "step": 27782000 }, { "epoch": 80.42, "learning_rate": 9.80496393340126e-06, "loss": 1.8099, "step": 27782500 }, { "epoch": 80.42, "learning_rate": 9.804241733049278e-06, "loss": 1.8315, "step": 27783000 }, { "epoch": 80.42, "learning_rate": 9.803518085402002e-06, "loss": 1.8324, "step": 27783500 }, { "epoch": 80.42, "learning_rate": 9.802794437754724e-06, "loss": 1.8374, "step": 27784000 }, { "epoch": 80.42, "learning_rate": 9.802070790107448e-06, "loss": 1.8385, "step": 27784500 }, { "epoch": 80.43, "learning_rate": 9.801347142460172e-06, "loss": 1.8435, "step": 27785000 }, { "epoch": 80.43, "learning_rate": 9.800623494812894e-06, "loss": 1.8505, "step": 27785500 }, { "epoch": 80.43, "learning_rate": 9.799899847165616e-06, "loss": 1.8556, "step": 27786000 }, { "epoch": 80.43, "learning_rate": 9.799176199518342e-06, "loss": 1.8646, "step": 27786500 }, { "epoch": 80.43, "learning_rate": 9.798452551871064e-06, "loss": 1.8454, "step": 27787000 }, { "epoch": 80.43, "learning_rate": 9.797728904223786e-06, "loss": 1.856, "step": 27787500 }, { "epoch": 80.43, "learning_rate": 9.79700525657651e-06, "loss": 1.8189, "step": 27788000 }, { "epoch": 80.44, "learning_rate": 9.796283056224529e-06, "loss": 1.849, "step": 27788500 }, { "epoch": 80.44, "learning_rate": 9.795559408577251e-06, "loss": 1.8597, "step": 27789000 }, { "epoch": 80.44, "learning_rate": 9.794835760929973e-06, "loss": 1.8216, "step": 27789500 }, { "epoch": 80.44, "learning_rate": 9.794112113282697e-06, "loss": 1.8375, "step": 27790000 }, { "epoch": 80.44, "learning_rate": 9.793388465635421e-06, "loss": 1.8473, "step": 27790500 }, { "epoch": 80.44, "learning_rate": 9.792664817988144e-06, "loss": 1.8717, "step": 27791000 }, { "epoch": 80.45, "learning_rate": 9.79194261763616e-06, "loss": 1.8526, "step": 27791500 }, { "epoch": 80.45, "learning_rate": 9.791218969988886e-06, "loss": 1.8298, "step": 27792000 }, { "epoch": 80.45, "learning_rate": 9.790495322341609e-06, "loss": 1.8561, "step": 27792500 }, { "epoch": 80.45, "learning_rate": 9.789771674694331e-06, "loss": 1.8603, "step": 27793000 }, { "epoch": 80.45, "learning_rate": 9.789049474342348e-06, "loss": 1.8815, "step": 27793500 }, { "epoch": 80.45, "learning_rate": 9.788325826695074e-06, "loss": 1.8288, "step": 27794000 }, { "epoch": 80.45, "learning_rate": 9.787602179047796e-06, "loss": 1.8078, "step": 27794500 }, { "epoch": 80.46, "learning_rate": 9.786878531400518e-06, "loss": 1.857, "step": 27795000 }, { "epoch": 80.46, "learning_rate": 9.786156331048536e-06, "loss": 1.8389, "step": 27795500 }, { "epoch": 80.46, "learning_rate": 9.785432683401261e-06, "loss": 1.8533, "step": 27796000 }, { "epoch": 80.46, "learning_rate": 9.784709035753983e-06, "loss": 1.8528, "step": 27796500 }, { "epoch": 80.46, "learning_rate": 9.783985388106706e-06, "loss": 1.8518, "step": 27797000 }, { "epoch": 80.46, "learning_rate": 9.783261740459431e-06, "loss": 1.846, "step": 27797500 }, { "epoch": 80.46, "learning_rate": 9.782539540107448e-06, "loss": 1.825, "step": 27798000 }, { "epoch": 80.47, "learning_rate": 9.781817339755466e-06, "loss": 1.831, "step": 27798500 }, { "epoch": 80.47, "learning_rate": 9.781093692108188e-06, "loss": 1.8337, "step": 27799000 }, { "epoch": 80.47, "learning_rate": 9.780370044460912e-06, "loss": 1.8391, "step": 27799500 }, { "epoch": 80.47, "learning_rate": 9.779646396813636e-06, "loss": 1.8564, "step": 27800000 }, { "epoch": 80.47, "learning_rate": 9.778922749166358e-06, "loss": 1.8565, "step": 27800500 }, { "epoch": 80.47, "learning_rate": 9.77819910151908e-06, "loss": 1.8277, "step": 27801000 }, { "epoch": 80.47, "learning_rate": 9.777475453871806e-06, "loss": 1.8732, "step": 27801500 }, { "epoch": 80.48, "learning_rate": 9.776751806224528e-06, "loss": 1.8395, "step": 27802000 }, { "epoch": 80.48, "learning_rate": 9.77602815857725e-06, "loss": 1.8371, "step": 27802500 }, { "epoch": 80.48, "learning_rate": 9.775304510929974e-06, "loss": 1.8393, "step": 27803000 }, { "epoch": 80.48, "learning_rate": 9.774580863282698e-06, "loss": 1.8429, "step": 27803500 }, { "epoch": 80.48, "learning_rate": 9.77385721563542e-06, "loss": 1.814, "step": 27804000 }, { "epoch": 80.48, "learning_rate": 9.773135015283438e-06, "loss": 1.8395, "step": 27804500 }, { "epoch": 80.48, "learning_rate": 9.772411367636163e-06, "loss": 1.8544, "step": 27805000 }, { "epoch": 80.49, "learning_rate": 9.771687719988886e-06, "loss": 1.862, "step": 27805500 }, { "epoch": 80.49, "learning_rate": 9.770964072341608e-06, "loss": 1.8161, "step": 27806000 }, { "epoch": 80.49, "learning_rate": 9.770241871989625e-06, "loss": 1.8623, "step": 27806500 }, { "epoch": 80.49, "learning_rate": 9.76951822434235e-06, "loss": 1.8479, "step": 27807000 }, { "epoch": 80.49, "learning_rate": 9.768794576695073e-06, "loss": 1.8398, "step": 27807500 }, { "epoch": 80.49, "learning_rate": 9.768070929047795e-06, "loss": 1.8276, "step": 27808000 }, { "epoch": 80.49, "learning_rate": 9.767347281400519e-06, "loss": 1.8601, "step": 27808500 }, { "epoch": 80.5, "learning_rate": 9.766625081048538e-06, "loss": 1.8547, "step": 27809000 }, { "epoch": 80.5, "learning_rate": 9.76590143340126e-06, "loss": 1.8279, "step": 27809500 }, { "epoch": 80.5, "learning_rate": 9.765177785753982e-06, "loss": 1.8481, "step": 27810000 }, { "epoch": 80.5, "learning_rate": 9.764454138106708e-06, "loss": 1.8384, "step": 27810500 }, { "epoch": 80.5, "learning_rate": 9.76373049045943e-06, "loss": 1.8367, "step": 27811000 }, { "epoch": 80.5, "learning_rate": 9.763006842812153e-06, "loss": 1.8433, "step": 27811500 }, { "epoch": 80.5, "learning_rate": 9.762283195164876e-06, "loss": 1.8387, "step": 27812000 }, { "epoch": 80.51, "learning_rate": 9.7615595475176e-06, "loss": 1.8479, "step": 27812500 }, { "epoch": 80.51, "learning_rate": 9.760837347165618e-06, "loss": 1.8372, "step": 27813000 }, { "epoch": 80.51, "learning_rate": 9.76011369951834e-06, "loss": 1.8609, "step": 27813500 }, { "epoch": 80.51, "learning_rate": 9.759391499166357e-06, "loss": 1.8423, "step": 27814000 }, { "epoch": 80.51, "learning_rate": 9.758667851519083e-06, "loss": 1.848, "step": 27814500 }, { "epoch": 80.51, "learning_rate": 9.757944203871805e-06, "loss": 1.8335, "step": 27815000 }, { "epoch": 80.51, "learning_rate": 9.757220556224527e-06, "loss": 1.8353, "step": 27815500 }, { "epoch": 80.52, "learning_rate": 9.756496908577251e-06, "loss": 1.805, "step": 27816000 }, { "epoch": 80.52, "learning_rate": 9.755773260929975e-06, "loss": 1.8468, "step": 27816500 }, { "epoch": 80.52, "learning_rate": 9.755049613282697e-06, "loss": 1.8761, "step": 27817000 }, { "epoch": 80.52, "learning_rate": 9.754325965635421e-06, "loss": 1.8345, "step": 27817500 }, { "epoch": 80.52, "learning_rate": 9.753602317988145e-06, "loss": 1.8353, "step": 27818000 }, { "epoch": 80.52, "learning_rate": 9.752880117636162e-06, "loss": 1.861, "step": 27818500 }, { "epoch": 80.52, "learning_rate": 9.752156469988885e-06, "loss": 1.8225, "step": 27819000 }, { "epoch": 80.53, "learning_rate": 9.751432822341609e-06, "loss": 1.8393, "step": 27819500 }, { "epoch": 80.53, "learning_rate": 9.750709174694333e-06, "loss": 1.8608, "step": 27820000 }, { "epoch": 80.53, "learning_rate": 9.74998697434235e-06, "loss": 1.8313, "step": 27820500 }, { "epoch": 80.53, "learning_rate": 9.749263326695072e-06, "loss": 1.8435, "step": 27821000 }, { "epoch": 80.53, "learning_rate": 9.74854112634309e-06, "loss": 1.8378, "step": 27821500 }, { "epoch": 80.53, "learning_rate": 9.747817478695815e-06, "loss": 1.8375, "step": 27822000 }, { "epoch": 80.53, "learning_rate": 9.747093831048537e-06, "loss": 1.8321, "step": 27822500 }, { "epoch": 80.54, "learning_rate": 9.74637018340126e-06, "loss": 1.8491, "step": 27823000 }, { "epoch": 80.54, "learning_rate": 9.745646535753983e-06, "loss": 1.8582, "step": 27823500 }, { "epoch": 80.54, "learning_rate": 9.744924335402002e-06, "loss": 1.8377, "step": 27824000 }, { "epoch": 80.54, "learning_rate": 9.744200687754724e-06, "loss": 1.8516, "step": 27824500 }, { "epoch": 80.54, "learning_rate": 9.743477040107447e-06, "loss": 1.8329, "step": 27825000 }, { "epoch": 80.54, "learning_rate": 9.74275339246017e-06, "loss": 1.8349, "step": 27825500 }, { "epoch": 80.54, "learning_rate": 9.742029744812895e-06, "loss": 1.859, "step": 27826000 }, { "epoch": 80.55, "learning_rate": 9.741307544460912e-06, "loss": 1.8631, "step": 27826500 }, { "epoch": 80.55, "learning_rate": 9.740583896813634e-06, "loss": 1.8315, "step": 27827000 }, { "epoch": 80.55, "learning_rate": 9.73986024916636e-06, "loss": 1.8301, "step": 27827500 }, { "epoch": 80.55, "learning_rate": 9.739136601519082e-06, "loss": 1.8355, "step": 27828000 }, { "epoch": 80.55, "learning_rate": 9.738412953871804e-06, "loss": 1.8445, "step": 27828500 }, { "epoch": 80.55, "learning_rate": 9.737689306224528e-06, "loss": 1.832, "step": 27829000 }, { "epoch": 80.56, "learning_rate": 9.736965658577252e-06, "loss": 1.863, "step": 27829500 }, { "epoch": 80.56, "learning_rate": 9.736242010929974e-06, "loss": 1.8662, "step": 27830000 }, { "epoch": 80.56, "learning_rate": 9.735519810577991e-06, "loss": 1.8471, "step": 27830500 }, { "epoch": 80.56, "learning_rate": 9.734796162930715e-06, "loss": 1.8388, "step": 27831000 }, { "epoch": 80.56, "learning_rate": 9.73407251528344e-06, "loss": 1.8228, "step": 27831500 }, { "epoch": 80.56, "learning_rate": 9.733348867636162e-06, "loss": 1.842, "step": 27832000 }, { "epoch": 80.56, "learning_rate": 9.732625219988885e-06, "loss": 1.8301, "step": 27832500 }, { "epoch": 80.57, "learning_rate": 9.73190157234161e-06, "loss": 1.8271, "step": 27833000 }, { "epoch": 80.57, "learning_rate": 9.731177924694332e-06, "loss": 1.8585, "step": 27833500 }, { "epoch": 80.57, "learning_rate": 9.730454277047056e-06, "loss": 1.8493, "step": 27834000 }, { "epoch": 80.57, "learning_rate": 9.729730629399778e-06, "loss": 1.8409, "step": 27834500 }, { "epoch": 80.57, "learning_rate": 9.729008429047797e-06, "loss": 1.8448, "step": 27835000 }, { "epoch": 80.57, "learning_rate": 9.728284781400519e-06, "loss": 1.8387, "step": 27835500 }, { "epoch": 80.57, "learning_rate": 9.727561133753243e-06, "loss": 1.8642, "step": 27836000 }, { "epoch": 80.58, "learning_rate": 9.726837486105965e-06, "loss": 1.8401, "step": 27836500 }, { "epoch": 80.58, "learning_rate": 9.726113838458689e-06, "loss": 1.8573, "step": 27837000 }, { "epoch": 80.58, "learning_rate": 9.725391638106706e-06, "loss": 1.8552, "step": 27837500 }, { "epoch": 80.58, "learning_rate": 9.72466799045943e-06, "loss": 1.8386, "step": 27838000 }, { "epoch": 80.58, "learning_rate": 9.723944342812154e-06, "loss": 1.84, "step": 27838500 }, { "epoch": 80.58, "learning_rate": 9.723220695164876e-06, "loss": 1.8594, "step": 27839000 }, { "epoch": 80.58, "learning_rate": 9.722497047517599e-06, "loss": 1.854, "step": 27839500 }, { "epoch": 80.59, "learning_rate": 9.721774847165618e-06, "loss": 1.8628, "step": 27840000 }, { "epoch": 80.59, "learning_rate": 9.721051199518341e-06, "loss": 1.8427, "step": 27840500 }, { "epoch": 80.59, "learning_rate": 9.720327551871064e-06, "loss": 1.857, "step": 27841000 }, { "epoch": 80.59, "learning_rate": 9.719603904223788e-06, "loss": 1.8742, "step": 27841500 }, { "epoch": 80.59, "learning_rate": 9.71888025657651e-06, "loss": 1.8519, "step": 27842000 }, { "epoch": 80.59, "learning_rate": 9.718156608929234e-06, "loss": 1.8522, "step": 27842500 }, { "epoch": 80.59, "learning_rate": 9.717434408577251e-06, "loss": 1.8495, "step": 27843000 }, { "epoch": 80.6, "learning_rate": 9.716712208225268e-06, "loss": 1.8418, "step": 27843500 }, { "epoch": 80.6, "learning_rate": 9.715988560577992e-06, "loss": 1.8508, "step": 27844000 }, { "epoch": 80.6, "learning_rate": 9.715264912930716e-06, "loss": 1.8148, "step": 27844500 }, { "epoch": 80.6, "learning_rate": 9.714541265283438e-06, "loss": 1.8304, "step": 27845000 }, { "epoch": 80.6, "learning_rate": 9.713817617636162e-06, "loss": 1.8672, "step": 27845500 }, { "epoch": 80.6, "learning_rate": 9.71309541728418e-06, "loss": 1.8555, "step": 27846000 }, { "epoch": 80.6, "learning_rate": 9.712371769636903e-06, "loss": 1.836, "step": 27846500 }, { "epoch": 80.61, "learning_rate": 9.711648121989626e-06, "loss": 1.8178, "step": 27847000 }, { "epoch": 80.61, "learning_rate": 9.71092447434235e-06, "loss": 1.8483, "step": 27847500 }, { "epoch": 80.61, "learning_rate": 9.710200826695074e-06, "loss": 1.8455, "step": 27848000 }, { "epoch": 80.61, "learning_rate": 9.709477179047796e-06, "loss": 1.845, "step": 27848500 }, { "epoch": 80.61, "learning_rate": 9.70875353140052e-06, "loss": 1.8742, "step": 27849000 }, { "epoch": 80.61, "learning_rate": 9.708029883753242e-06, "loss": 1.8888, "step": 27849500 }, { "epoch": 80.61, "learning_rate": 9.707306236105966e-06, "loss": 1.8099, "step": 27850000 }, { "epoch": 80.62, "learning_rate": 9.706582588458688e-06, "loss": 1.8458, "step": 27850500 }, { "epoch": 80.62, "learning_rate": 9.705858940811412e-06, "loss": 1.857, "step": 27851000 }, { "epoch": 80.62, "learning_rate": 9.705135293164136e-06, "loss": 1.8217, "step": 27851500 }, { "epoch": 80.62, "learning_rate": 9.704414540107448e-06, "loss": 1.8261, "step": 27852000 }, { "epoch": 80.62, "learning_rate": 9.70369089246017e-06, "loss": 1.8399, "step": 27852500 }, { "epoch": 80.62, "learning_rate": 9.702967244812894e-06, "loss": 1.8326, "step": 27853000 }, { "epoch": 80.62, "learning_rate": 9.702243597165617e-06, "loss": 1.8545, "step": 27853500 }, { "epoch": 80.63, "learning_rate": 9.70151994951834e-06, "loss": 1.8474, "step": 27854000 }, { "epoch": 80.63, "learning_rate": 9.700797749166358e-06, "loss": 1.8733, "step": 27854500 }, { "epoch": 80.63, "learning_rate": 9.700074101519082e-06, "loss": 1.8421, "step": 27855000 }, { "epoch": 80.63, "learning_rate": 9.699350453871806e-06, "loss": 1.8604, "step": 27855500 }, { "epoch": 80.63, "learning_rate": 9.698626806224528e-06, "loss": 1.8269, "step": 27856000 }, { "epoch": 80.63, "learning_rate": 9.697903158577252e-06, "loss": 1.8263, "step": 27856500 }, { "epoch": 80.63, "learning_rate": 9.697179510929974e-06, "loss": 1.8772, "step": 27857000 }, { "epoch": 80.64, "learning_rate": 9.696457310577993e-06, "loss": 1.8339, "step": 27857500 }, { "epoch": 80.64, "learning_rate": 9.695733662930715e-06, "loss": 1.8405, "step": 27858000 }, { "epoch": 80.64, "learning_rate": 9.69501001528344e-06, "loss": 1.8196, "step": 27858500 }, { "epoch": 80.64, "learning_rate": 9.694286367636161e-06, "loss": 1.8334, "step": 27859000 }, { "epoch": 80.64, "learning_rate": 9.693562719988885e-06, "loss": 1.8652, "step": 27859500 }, { "epoch": 80.64, "learning_rate": 9.69283907234161e-06, "loss": 1.8569, "step": 27860000 }, { "epoch": 80.64, "learning_rate": 9.692115424694332e-06, "loss": 1.871, "step": 27860500 }, { "epoch": 80.65, "learning_rate": 9.691391777047055e-06, "loss": 1.8229, "step": 27861000 }, { "epoch": 80.65, "learning_rate": 9.690668129399778e-06, "loss": 1.8458, "step": 27861500 }, { "epoch": 80.65, "learning_rate": 9.689945929047797e-06, "loss": 1.8228, "step": 27862000 }, { "epoch": 80.65, "learning_rate": 9.689222281400519e-06, "loss": 1.8346, "step": 27862500 }, { "epoch": 80.65, "learning_rate": 9.688498633753243e-06, "loss": 1.8272, "step": 27863000 }, { "epoch": 80.65, "learning_rate": 9.687774986105965e-06, "loss": 1.8322, "step": 27863500 }, { "epoch": 80.65, "learning_rate": 9.687051338458689e-06, "loss": 1.8386, "step": 27864000 }, { "epoch": 80.66, "learning_rate": 9.686327690811411e-06, "loss": 1.854, "step": 27864500 }, { "epoch": 80.66, "learning_rate": 9.685604043164135e-06, "loss": 1.8504, "step": 27865000 }, { "epoch": 80.66, "learning_rate": 9.684880395516859e-06, "loss": 1.8243, "step": 27865500 }, { "epoch": 80.66, "learning_rate": 9.684158195164876e-06, "loss": 1.8357, "step": 27866000 }, { "epoch": 80.66, "learning_rate": 9.683435994812894e-06, "loss": 1.8612, "step": 27866500 }, { "epoch": 80.66, "learning_rate": 9.682712347165617e-06, "loss": 1.8434, "step": 27867000 }, { "epoch": 80.67, "learning_rate": 9.681988699518341e-06, "loss": 1.8442, "step": 27867500 }, { "epoch": 80.67, "learning_rate": 9.681265051871064e-06, "loss": 1.8503, "step": 27868000 }, { "epoch": 80.67, "learning_rate": 9.680541404223788e-06, "loss": 1.8459, "step": 27868500 }, { "epoch": 80.67, "learning_rate": 9.67981775657651e-06, "loss": 1.8372, "step": 27869000 }, { "epoch": 80.67, "learning_rate": 9.679095556224529e-06, "loss": 1.8667, "step": 27869500 }, { "epoch": 80.67, "learning_rate": 9.678371908577251e-06, "loss": 1.854, "step": 27870000 }, { "epoch": 80.67, "learning_rate": 9.677648260929975e-06, "loss": 1.8306, "step": 27870500 }, { "epoch": 80.68, "learning_rate": 9.676924613282697e-06, "loss": 1.8346, "step": 27871000 }, { "epoch": 80.68, "learning_rate": 9.676200965635421e-06, "loss": 1.849, "step": 27871500 }, { "epoch": 80.68, "learning_rate": 9.675477317988143e-06, "loss": 1.8433, "step": 27872000 }, { "epoch": 80.68, "learning_rate": 9.674753670340867e-06, "loss": 1.8362, "step": 27872500 }, { "epoch": 80.68, "learning_rate": 9.674030022693591e-06, "loss": 1.8148, "step": 27873000 }, { "epoch": 80.68, "learning_rate": 9.673307822341608e-06, "loss": 1.8349, "step": 27873500 }, { "epoch": 80.68, "learning_rate": 9.67258417469433e-06, "loss": 1.8548, "step": 27874000 }, { "epoch": 80.69, "learning_rate": 9.671860527047055e-06, "loss": 1.8459, "step": 27874500 }, { "epoch": 80.69, "learning_rate": 9.671136879399778e-06, "loss": 1.8239, "step": 27875000 }, { "epoch": 80.69, "learning_rate": 9.6704132317525e-06, "loss": 1.848, "step": 27875500 }, { "epoch": 80.69, "learning_rate": 9.669689584105225e-06, "loss": 1.8668, "step": 27876000 }, { "epoch": 80.69, "learning_rate": 9.668965936457949e-06, "loss": 1.8614, "step": 27876500 }, { "epoch": 80.69, "learning_rate": 9.66824228881067e-06, "loss": 1.8251, "step": 27877000 }, { "epoch": 80.69, "learning_rate": 9.667518641163393e-06, "loss": 1.831, "step": 27877500 }, { "epoch": 80.7, "learning_rate": 9.666796440811412e-06, "loss": 1.8285, "step": 27878000 }, { "epoch": 80.7, "learning_rate": 9.666074240459431e-06, "loss": 1.8403, "step": 27878500 }, { "epoch": 80.7, "learning_rate": 9.665350592812153e-06, "loss": 1.8382, "step": 27879000 }, { "epoch": 80.7, "learning_rate": 9.664626945164875e-06, "loss": 1.8674, "step": 27879500 }, { "epoch": 80.7, "learning_rate": 9.6639032975176e-06, "loss": 1.8351, "step": 27880000 }, { "epoch": 80.7, "learning_rate": 9.663179649870323e-06, "loss": 1.8691, "step": 27880500 }, { "epoch": 80.7, "learning_rate": 9.662456002223045e-06, "loss": 1.8377, "step": 27881000 }, { "epoch": 80.71, "learning_rate": 9.66173235457577e-06, "loss": 1.8467, "step": 27881500 }, { "epoch": 80.71, "learning_rate": 9.661008706928493e-06, "loss": 1.8723, "step": 27882000 }, { "epoch": 80.71, "learning_rate": 9.660285059281216e-06, "loss": 1.8445, "step": 27882500 }, { "epoch": 80.71, "learning_rate": 9.659562858929233e-06, "loss": 1.8601, "step": 27883000 }, { "epoch": 80.71, "learning_rate": 9.65884065857725e-06, "loss": 1.8663, "step": 27883500 }, { "epoch": 80.71, "learning_rate": 9.658117010929976e-06, "loss": 1.8315, "step": 27884000 }, { "epoch": 80.71, "learning_rate": 9.657393363282698e-06, "loss": 1.8589, "step": 27884500 }, { "epoch": 80.72, "learning_rate": 9.65666971563542e-06, "loss": 1.8499, "step": 27885000 }, { "epoch": 80.72, "learning_rate": 9.655946067988144e-06, "loss": 1.8573, "step": 27885500 }, { "epoch": 80.72, "learning_rate": 9.655222420340868e-06, "loss": 1.8716, "step": 27886000 }, { "epoch": 80.72, "learning_rate": 9.65449877269359e-06, "loss": 1.8466, "step": 27886500 }, { "epoch": 80.72, "learning_rate": 9.653775125046314e-06, "loss": 1.8431, "step": 27887000 }, { "epoch": 80.72, "learning_rate": 9.653051477399038e-06, "loss": 1.8492, "step": 27887500 }, { "epoch": 80.72, "learning_rate": 9.652329277047055e-06, "loss": 1.8582, "step": 27888000 }, { "epoch": 80.73, "learning_rate": 9.651605629399778e-06, "loss": 1.8428, "step": 27888500 }, { "epoch": 80.73, "learning_rate": 9.650883429047795e-06, "loss": 1.8542, "step": 27889000 }, { "epoch": 80.73, "learning_rate": 9.650159781400519e-06, "loss": 1.8536, "step": 27889500 }, { "epoch": 80.73, "learning_rate": 9.649436133753243e-06, "loss": 1.8535, "step": 27890000 }, { "epoch": 80.73, "learning_rate": 9.648712486105965e-06, "loss": 1.8466, "step": 27890500 }, { "epoch": 80.73, "learning_rate": 9.647991733049277e-06, "loss": 1.8539, "step": 27891000 }, { "epoch": 80.73, "learning_rate": 9.647268085402001e-06, "loss": 1.8466, "step": 27891500 }, { "epoch": 80.74, "learning_rate": 9.646544437754725e-06, "loss": 1.8618, "step": 27892000 }, { "epoch": 80.74, "learning_rate": 9.645820790107447e-06, "loss": 1.8648, "step": 27892500 }, { "epoch": 80.74, "learning_rate": 9.64509714246017e-06, "loss": 1.8423, "step": 27893000 }, { "epoch": 80.74, "learning_rate": 9.644373494812895e-06, "loss": 1.8682, "step": 27893500 }, { "epoch": 80.74, "learning_rate": 9.643651294460912e-06, "loss": 1.8803, "step": 27894000 }, { "epoch": 80.74, "learning_rate": 9.642927646813635e-06, "loss": 1.8301, "step": 27894500 }, { "epoch": 80.74, "learning_rate": 9.642203999166359e-06, "loss": 1.8815, "step": 27895000 }, { "epoch": 80.75, "learning_rate": 9.641480351519082e-06, "loss": 1.832, "step": 27895500 }, { "epoch": 80.75, "learning_rate": 9.640756703871805e-06, "loss": 1.8724, "step": 27896000 }, { "epoch": 80.75, "learning_rate": 9.640033056224527e-06, "loss": 1.8244, "step": 27896500 }, { "epoch": 80.75, "learning_rate": 9.639309408577253e-06, "loss": 1.8602, "step": 27897000 }, { "epoch": 80.75, "learning_rate": 9.638585760929975e-06, "loss": 1.8623, "step": 27897500 }, { "epoch": 80.75, "learning_rate": 9.637862113282697e-06, "loss": 1.8306, "step": 27898000 }, { "epoch": 80.75, "learning_rate": 9.637138465635421e-06, "loss": 1.8333, "step": 27898500 }, { "epoch": 80.76, "learning_rate": 9.63641626528344e-06, "loss": 1.8691, "step": 27899000 }, { "epoch": 80.76, "learning_rate": 9.635692617636162e-06, "loss": 1.8522, "step": 27899500 }, { "epoch": 80.76, "learning_rate": 9.634968969988884e-06, "loss": 1.8605, "step": 27900000 }, { "epoch": 80.76, "learning_rate": 9.634245322341608e-06, "loss": 1.8485, "step": 27900500 }, { "epoch": 80.76, "learning_rate": 9.633521674694332e-06, "loss": 1.843, "step": 27901000 }, { "epoch": 80.76, "learning_rate": 9.632798027047054e-06, "loss": 1.8381, "step": 27901500 }, { "epoch": 80.76, "learning_rate": 9.632074379399778e-06, "loss": 1.8373, "step": 27902000 }, { "epoch": 80.77, "learning_rate": 9.631350731752502e-06, "loss": 1.8511, "step": 27902500 }, { "epoch": 80.77, "learning_rate": 9.630627084105225e-06, "loss": 1.8615, "step": 27903000 }, { "epoch": 80.77, "learning_rate": 9.629904883753242e-06, "loss": 1.8586, "step": 27903500 }, { "epoch": 80.77, "learning_rate": 9.629181236105966e-06, "loss": 1.8587, "step": 27904000 }, { "epoch": 80.77, "learning_rate": 9.62845758845869e-06, "loss": 1.8511, "step": 27904500 }, { "epoch": 80.77, "learning_rate": 9.627733940811412e-06, "loss": 1.8228, "step": 27905000 }, { "epoch": 80.77, "learning_rate": 9.627010293164134e-06, "loss": 1.8506, "step": 27905500 }, { "epoch": 80.78, "learning_rate": 9.62628664551686e-06, "loss": 1.859, "step": 27906000 }, { "epoch": 80.78, "learning_rate": 9.625562997869582e-06, "loss": 1.845, "step": 27906500 }, { "epoch": 80.78, "learning_rate": 9.624839350222304e-06, "loss": 1.8543, "step": 27907000 }, { "epoch": 80.78, "learning_rate": 9.624115702575028e-06, "loss": 1.8285, "step": 27907500 }, { "epoch": 80.78, "learning_rate": 9.623392054927752e-06, "loss": 1.8575, "step": 27908000 }, { "epoch": 80.78, "learning_rate": 9.62266985457577e-06, "loss": 1.8239, "step": 27908500 }, { "epoch": 80.79, "learning_rate": 9.621946206928492e-06, "loss": 1.8468, "step": 27909000 }, { "epoch": 80.79, "learning_rate": 9.621222559281217e-06, "loss": 1.8545, "step": 27909500 }, { "epoch": 80.79, "learning_rate": 9.62050180622453e-06, "loss": 1.8411, "step": 27910000 }, { "epoch": 80.79, "learning_rate": 9.619778158577252e-06, "loss": 1.8309, "step": 27910500 }, { "epoch": 80.79, "learning_rate": 9.619054510929974e-06, "loss": 1.847, "step": 27911000 }, { "epoch": 80.79, "learning_rate": 9.618330863282698e-06, "loss": 1.8427, "step": 27911500 }, { "epoch": 80.79, "learning_rate": 9.617607215635422e-06, "loss": 1.8534, "step": 27912000 }, { "epoch": 80.8, "learning_rate": 9.616883567988144e-06, "loss": 1.8482, "step": 27912500 }, { "epoch": 80.8, "learning_rate": 9.616159920340866e-06, "loss": 1.8491, "step": 27913000 }, { "epoch": 80.8, "learning_rate": 9.615436272693592e-06, "loss": 1.8375, "step": 27913500 }, { "epoch": 80.8, "learning_rate": 9.614712625046314e-06, "loss": 1.8436, "step": 27914000 }, { "epoch": 80.8, "learning_rate": 9.613988977399036e-06, "loss": 1.8376, "step": 27914500 }, { "epoch": 80.8, "learning_rate": 9.61326532975176e-06, "loss": 1.8415, "step": 27915000 }, { "epoch": 80.8, "learning_rate": 9.612541682104484e-06, "loss": 1.8745, "step": 27915500 }, { "epoch": 80.81, "learning_rate": 9.611818034457206e-06, "loss": 1.8609, "step": 27916000 }, { "epoch": 80.81, "learning_rate": 9.61109438680993e-06, "loss": 1.8515, "step": 27916500 }, { "epoch": 80.81, "learning_rate": 9.61037218645795e-06, "loss": 1.8599, "step": 27917000 }, { "epoch": 80.81, "learning_rate": 9.609649986105966e-06, "loss": 1.8141, "step": 27917500 }, { "epoch": 80.81, "learning_rate": 9.608927785753984e-06, "loss": 1.8491, "step": 27918000 }, { "epoch": 80.81, "learning_rate": 9.608204138106706e-06, "loss": 1.8631, "step": 27918500 }, { "epoch": 80.81, "learning_rate": 9.60748049045943e-06, "loss": 1.8481, "step": 27919000 }, { "epoch": 80.82, "learning_rate": 9.606758290107449e-06, "loss": 1.8571, "step": 27919500 }, { "epoch": 80.82, "learning_rate": 9.606034642460171e-06, "loss": 1.8388, "step": 27920000 }, { "epoch": 80.82, "learning_rate": 9.605310994812893e-06, "loss": 1.838, "step": 27920500 }, { "epoch": 80.82, "learning_rate": 9.604587347165617e-06, "loss": 1.8462, "step": 27921000 }, { "epoch": 80.82, "learning_rate": 9.603863699518341e-06, "loss": 1.8586, "step": 27921500 }, { "epoch": 80.82, "learning_rate": 9.603140051871063e-06, "loss": 1.8373, "step": 27922000 }, { "epoch": 80.82, "learning_rate": 9.602416404223787e-06, "loss": 1.8561, "step": 27922500 }, { "epoch": 80.83, "learning_rate": 9.601692756576511e-06, "loss": 1.8326, "step": 27923000 }, { "epoch": 80.83, "learning_rate": 9.600969108929233e-06, "loss": 1.8378, "step": 27923500 }, { "epoch": 80.83, "learning_rate": 9.600245461281956e-06, "loss": 1.8278, "step": 27924000 }, { "epoch": 80.83, "learning_rate": 9.59952181363468e-06, "loss": 1.8543, "step": 27924500 }, { "epoch": 80.83, "learning_rate": 9.598798165987404e-06, "loss": 1.8488, "step": 27925000 }, { "epoch": 80.83, "learning_rate": 9.598074518340126e-06, "loss": 1.851, "step": 27925500 }, { "epoch": 80.83, "learning_rate": 9.59735087069285e-06, "loss": 1.833, "step": 27926000 }, { "epoch": 80.84, "learning_rate": 9.596627223045574e-06, "loss": 1.828, "step": 27926500 }, { "epoch": 80.84, "learning_rate": 9.595903575398296e-06, "loss": 1.8462, "step": 27927000 }, { "epoch": 80.84, "learning_rate": 9.595181375046313e-06, "loss": 1.8602, "step": 27927500 }, { "epoch": 80.84, "learning_rate": 9.594457727399037e-06, "loss": 1.8557, "step": 27928000 }, { "epoch": 80.84, "learning_rate": 9.593734079751761e-06, "loss": 1.8805, "step": 27928500 }, { "epoch": 80.84, "learning_rate": 9.593010432104483e-06, "loss": 1.8484, "step": 27929000 }, { "epoch": 80.84, "learning_rate": 9.592286784457207e-06, "loss": 1.8488, "step": 27929500 }, { "epoch": 80.85, "learning_rate": 9.591564584105224e-06, "loss": 1.8305, "step": 27930000 }, { "epoch": 80.85, "learning_rate": 9.590840936457948e-06, "loss": 1.8507, "step": 27930500 }, { "epoch": 80.85, "learning_rate": 9.59011728881067e-06, "loss": 1.8689, "step": 27931000 }, { "epoch": 80.85, "learning_rate": 9.589393641163395e-06, "loss": 1.8415, "step": 27931500 }, { "epoch": 80.85, "learning_rate": 9.588671440811412e-06, "loss": 1.8615, "step": 27932000 }, { "epoch": 80.85, "learning_rate": 9.587947793164136e-06, "loss": 1.8329, "step": 27932500 }, { "epoch": 80.85, "learning_rate": 9.587224145516858e-06, "loss": 1.8238, "step": 27933000 }, { "epoch": 80.86, "learning_rate": 9.586500497869582e-06, "loss": 1.8683, "step": 27933500 }, { "epoch": 80.86, "learning_rate": 9.585776850222306e-06, "loss": 1.8459, "step": 27934000 }, { "epoch": 80.86, "learning_rate": 9.585054649870323e-06, "loss": 1.8452, "step": 27934500 }, { "epoch": 80.86, "learning_rate": 9.584331002223045e-06, "loss": 1.8456, "step": 27935000 }, { "epoch": 80.86, "learning_rate": 9.58360735457577e-06, "loss": 1.8432, "step": 27935500 }, { "epoch": 80.86, "learning_rate": 9.582883706928493e-06, "loss": 1.837, "step": 27936000 }, { "epoch": 80.86, "learning_rate": 9.582160059281215e-06, "loss": 1.8622, "step": 27936500 }, { "epoch": 80.87, "learning_rate": 9.58143641163394e-06, "loss": 1.8499, "step": 27937000 }, { "epoch": 80.87, "learning_rate": 9.580714211281957e-06, "loss": 1.8471, "step": 27937500 }, { "epoch": 80.87, "learning_rate": 9.57999056363468e-06, "loss": 1.8596, "step": 27938000 }, { "epoch": 80.87, "learning_rate": 9.579268363282698e-06, "loss": 1.8501, "step": 27938500 }, { "epoch": 80.87, "learning_rate": 9.57854471563542e-06, "loss": 1.8649, "step": 27939000 }, { "epoch": 80.87, "learning_rate": 9.577821067988144e-06, "loss": 1.8399, "step": 27939500 }, { "epoch": 80.87, "learning_rate": 9.577097420340868e-06, "loss": 1.8438, "step": 27940000 }, { "epoch": 80.88, "learning_rate": 9.57637377269359e-06, "loss": 1.8387, "step": 27940500 }, { "epoch": 80.88, "learning_rate": 9.575650125046314e-06, "loss": 1.8661, "step": 27941000 }, { "epoch": 80.88, "learning_rate": 9.574926477399038e-06, "loss": 1.8209, "step": 27941500 }, { "epoch": 80.88, "learning_rate": 9.57420282975176e-06, "loss": 1.8292, "step": 27942000 }, { "epoch": 80.88, "learning_rate": 9.573479182104484e-06, "loss": 1.8229, "step": 27942500 }, { "epoch": 80.88, "learning_rate": 9.572755534457206e-06, "loss": 1.8591, "step": 27943000 }, { "epoch": 80.88, "learning_rate": 9.57203188680993e-06, "loss": 1.8529, "step": 27943500 }, { "epoch": 80.89, "learning_rate": 9.571309686457947e-06, "loss": 1.8315, "step": 27944000 }, { "epoch": 80.89, "learning_rate": 9.570587486105965e-06, "loss": 1.8449, "step": 27944500 }, { "epoch": 80.89, "learning_rate": 9.569863838458689e-06, "loss": 1.8481, "step": 27945000 }, { "epoch": 80.89, "learning_rate": 9.569140190811413e-06, "loss": 1.8462, "step": 27945500 }, { "epoch": 80.89, "learning_rate": 9.568416543164135e-06, "loss": 1.8399, "step": 27946000 }, { "epoch": 80.89, "learning_rate": 9.567692895516859e-06, "loss": 1.8525, "step": 27946500 }, { "epoch": 80.9, "learning_rate": 9.566969247869583e-06, "loss": 1.84, "step": 27947000 }, { "epoch": 80.9, "learning_rate": 9.566245600222305e-06, "loss": 1.8579, "step": 27947500 }, { "epoch": 80.9, "learning_rate": 9.565521952575029e-06, "loss": 1.8431, "step": 27948000 }, { "epoch": 80.9, "learning_rate": 9.564798304927751e-06, "loss": 1.846, "step": 27948500 }, { "epoch": 80.9, "learning_rate": 9.564074657280475e-06, "loss": 1.8423, "step": 27949000 }, { "epoch": 80.9, "learning_rate": 9.563352456928492e-06, "loss": 1.8228, "step": 27949500 }, { "epoch": 80.9, "learning_rate": 9.562628809281216e-06, "loss": 1.8442, "step": 27950000 }, { "epoch": 80.91, "learning_rate": 9.561905161633938e-06, "loss": 1.856, "step": 27950500 }, { "epoch": 80.91, "learning_rate": 9.561181513986662e-06, "loss": 1.8431, "step": 27951000 }, { "epoch": 80.91, "learning_rate": 9.560457866339386e-06, "loss": 1.8479, "step": 27951500 }, { "epoch": 80.91, "learning_rate": 9.559735665987403e-06, "loss": 1.8537, "step": 27952000 }, { "epoch": 80.91, "learning_rate": 9.559012018340126e-06, "loss": 1.8552, "step": 27952500 }, { "epoch": 80.91, "learning_rate": 9.55828837069285e-06, "loss": 1.8389, "step": 27953000 }, { "epoch": 80.91, "learning_rate": 9.557564723045574e-06, "loss": 1.8456, "step": 27953500 }, { "epoch": 80.92, "learning_rate": 9.55684252269359e-06, "loss": 1.8267, "step": 27954000 }, { "epoch": 80.92, "learning_rate": 9.556120322341608e-06, "loss": 1.8282, "step": 27954500 }, { "epoch": 80.92, "learning_rate": 9.555396674694332e-06, "loss": 1.8594, "step": 27955000 }, { "epoch": 80.92, "learning_rate": 9.554673027047054e-06, "loss": 1.8785, "step": 27955500 }, { "epoch": 80.92, "learning_rate": 9.553949379399778e-06, "loss": 1.8757, "step": 27956000 }, { "epoch": 80.92, "learning_rate": 9.553225731752502e-06, "loss": 1.8299, "step": 27956500 }, { "epoch": 80.92, "learning_rate": 9.552502084105224e-06, "loss": 1.8434, "step": 27957000 }, { "epoch": 80.93, "learning_rate": 9.551779883753242e-06, "loss": 1.8436, "step": 27957500 }, { "epoch": 80.93, "learning_rate": 9.551056236105965e-06, "loss": 1.8542, "step": 27958000 }, { "epoch": 80.93, "learning_rate": 9.55033258845869e-06, "loss": 1.8634, "step": 27958500 }, { "epoch": 80.93, "learning_rate": 9.549608940811412e-06, "loss": 1.8504, "step": 27959000 }, { "epoch": 80.93, "learning_rate": 9.548885293164136e-06, "loss": 1.85, "step": 27959500 }, { "epoch": 80.93, "learning_rate": 9.548161645516858e-06, "loss": 1.8499, "step": 27960000 }, { "epoch": 80.93, "learning_rate": 9.547437997869582e-06, "loss": 1.8234, "step": 27960500 }, { "epoch": 80.94, "learning_rate": 9.546714350222306e-06, "loss": 1.8596, "step": 27961000 }, { "epoch": 80.94, "learning_rate": 9.545990702575028e-06, "loss": 1.8349, "step": 27961500 }, { "epoch": 80.94, "learning_rate": 9.545268502223045e-06, "loss": 1.8672, "step": 27962000 }, { "epoch": 80.94, "learning_rate": 9.544544854575769e-06, "loss": 1.8367, "step": 27962500 }, { "epoch": 80.94, "learning_rate": 9.543821206928493e-06, "loss": 1.833, "step": 27963000 }, { "epoch": 80.94, "learning_rate": 9.543097559281215e-06, "loss": 1.8287, "step": 27963500 }, { "epoch": 80.94, "learning_rate": 9.542375358929234e-06, "loss": 1.8454, "step": 27964000 }, { "epoch": 80.95, "learning_rate": 9.541651711281956e-06, "loss": 1.8467, "step": 27964500 }, { "epoch": 80.95, "learning_rate": 9.54092806363468e-06, "loss": 1.8432, "step": 27965000 }, { "epoch": 80.95, "learning_rate": 9.540204415987403e-06, "loss": 1.853, "step": 27965500 }, { "epoch": 80.95, "learning_rate": 9.539480768340127e-06, "loss": 1.8599, "step": 27966000 }, { "epoch": 80.95, "learning_rate": 9.53875712069285e-06, "loss": 1.8262, "step": 27966500 }, { "epoch": 80.95, "learning_rate": 9.538033473045573e-06, "loss": 1.8558, "step": 27967000 }, { "epoch": 80.95, "learning_rate": 9.53731127269359e-06, "loss": 1.8442, "step": 27967500 }, { "epoch": 80.96, "learning_rate": 9.536587625046314e-06, "loss": 1.836, "step": 27968000 }, { "epoch": 80.96, "learning_rate": 9.535863977399038e-06, "loss": 1.8375, "step": 27968500 }, { "epoch": 80.96, "learning_rate": 9.53514032975176e-06, "loss": 1.8443, "step": 27969000 }, { "epoch": 80.96, "learning_rate": 9.534416682104484e-06, "loss": 1.8288, "step": 27969500 }, { "epoch": 80.96, "learning_rate": 9.533693034457208e-06, "loss": 1.8366, "step": 27970000 }, { "epoch": 80.96, "learning_rate": 9.53296938680993e-06, "loss": 1.859, "step": 27970500 }, { "epoch": 80.96, "learning_rate": 9.532245739162652e-06, "loss": 1.8444, "step": 27971000 }, { "epoch": 80.97, "learning_rate": 9.531522091515376e-06, "loss": 1.8493, "step": 27971500 }, { "epoch": 80.97, "learning_rate": 9.5307984438681e-06, "loss": 1.8461, "step": 27972000 }, { "epoch": 80.97, "learning_rate": 9.530074796220822e-06, "loss": 1.8595, "step": 27972500 }, { "epoch": 80.97, "learning_rate": 9.529351148573546e-06, "loss": 1.8667, "step": 27973000 }, { "epoch": 80.97, "learning_rate": 9.528628948221564e-06, "loss": 1.8656, "step": 27973500 }, { "epoch": 80.97, "learning_rate": 9.527905300574288e-06, "loss": 1.8372, "step": 27974000 }, { "epoch": 80.97, "learning_rate": 9.527183100222305e-06, "loss": 1.8724, "step": 27974500 }, { "epoch": 80.98, "learning_rate": 9.526459452575029e-06, "loss": 1.8354, "step": 27975000 }, { "epoch": 80.98, "learning_rate": 9.525735804927751e-06, "loss": 1.8524, "step": 27975500 }, { "epoch": 80.98, "learning_rate": 9.525012157280475e-06, "loss": 1.8496, "step": 27976000 }, { "epoch": 80.98, "learning_rate": 9.524288509633197e-06, "loss": 1.8127, "step": 27976500 }, { "epoch": 80.98, "learning_rate": 9.523564861985921e-06, "loss": 1.8504, "step": 27977000 }, { "epoch": 80.98, "learning_rate": 9.522841214338645e-06, "loss": 1.8636, "step": 27977500 }, { "epoch": 80.98, "learning_rate": 9.522117566691367e-06, "loss": 1.841, "step": 27978000 }, { "epoch": 80.99, "learning_rate": 9.521393919044091e-06, "loss": 1.8518, "step": 27978500 }, { "epoch": 80.99, "learning_rate": 9.520671718692108e-06, "loss": 1.8402, "step": 27979000 }, { "epoch": 80.99, "learning_rate": 9.519948071044832e-06, "loss": 1.842, "step": 27979500 }, { "epoch": 80.99, "learning_rate": 9.519224423397555e-06, "loss": 1.8683, "step": 27980000 }, { "epoch": 80.99, "learning_rate": 9.518500775750278e-06, "loss": 1.8396, "step": 27980500 }, { "epoch": 80.99, "learning_rate": 9.517777128103002e-06, "loss": 1.8664, "step": 27981000 }, { "epoch": 80.99, "learning_rate": 9.517053480455725e-06, "loss": 1.853, "step": 27981500 }, { "epoch": 81.0, "learning_rate": 9.516329832808447e-06, "loss": 1.8603, "step": 27982000 }, { "epoch": 81.0, "learning_rate": 9.515607632456466e-06, "loss": 1.839, "step": 27982500 }, { "epoch": 81.0, "learning_rate": 9.51488398480919e-06, "loss": 1.8393, "step": 27983000 }, { "epoch": 81.0, "eval_accuracy": 0.6873256623884321, "eval_accuracy_mlm": 0.6559826513935384, "eval_accuracy_nsp": 0.8555109467306468, "eval_loss": 2.204529047012329, "eval_runtime": 331.9624, "eval_samples_per_second": 1314.564, "eval_steps_per_second": 54.774, "step": 27983232 }, { "epoch": 81.0, "learning_rate": 9.514160337161912e-06, "loss": 1.8289, "step": 27983500 }, { "epoch": 81.0, "learning_rate": 9.513436689514634e-06, "loss": 1.848, "step": 27984000 }, { "epoch": 81.0, "learning_rate": 9.51271304186736e-06, "loss": 1.811, "step": 27984500 }, { "epoch": 81.01, "learning_rate": 9.511990841515377e-06, "loss": 1.8314, "step": 27985000 }, { "epoch": 81.01, "learning_rate": 9.5112671938681e-06, "loss": 1.8233, "step": 27985500 }, { "epoch": 81.01, "learning_rate": 9.510543546220823e-06, "loss": 1.8343, "step": 27986000 }, { "epoch": 81.01, "learning_rate": 9.509819898573547e-06, "loss": 1.8559, "step": 27986500 }, { "epoch": 81.01, "learning_rate": 9.509097698221564e-06, "loss": 1.8376, "step": 27987000 }, { "epoch": 81.01, "learning_rate": 9.508374050574287e-06, "loss": 1.7997, "step": 27987500 }, { "epoch": 81.01, "learning_rate": 9.50765040292701e-06, "loss": 1.8277, "step": 27988000 }, { "epoch": 81.02, "learning_rate": 9.506926755279734e-06, "loss": 1.8183, "step": 27988500 }, { "epoch": 81.02, "learning_rate": 9.506204554927752e-06, "loss": 1.8362, "step": 27989000 }, { "epoch": 81.02, "learning_rate": 9.505480907280474e-06, "loss": 1.8496, "step": 27989500 }, { "epoch": 81.02, "learning_rate": 9.504757259633198e-06, "loss": 1.8604, "step": 27990000 }, { "epoch": 81.02, "learning_rate": 9.504033611985922e-06, "loss": 1.8382, "step": 27990500 }, { "epoch": 81.02, "learning_rate": 9.503309964338644e-06, "loss": 1.8236, "step": 27991000 }, { "epoch": 81.02, "learning_rate": 9.502586316691366e-06, "loss": 1.8519, "step": 27991500 }, { "epoch": 81.03, "learning_rate": 9.501862669044092e-06, "loss": 1.8567, "step": 27992000 }, { "epoch": 81.03, "learning_rate": 9.501139021396814e-06, "loss": 1.8519, "step": 27992500 }, { "epoch": 81.03, "learning_rate": 9.500415373749536e-06, "loss": 1.8376, "step": 27993000 }, { "epoch": 81.03, "learning_rate": 9.499693173397554e-06, "loss": 1.8397, "step": 27993500 }, { "epoch": 81.03, "learning_rate": 9.49896952575028e-06, "loss": 1.8032, "step": 27994000 }, { "epoch": 81.03, "learning_rate": 9.498245878103001e-06, "loss": 1.8208, "step": 27994500 }, { "epoch": 81.03, "learning_rate": 9.497522230455724e-06, "loss": 1.8153, "step": 27995000 }, { "epoch": 81.04, "learning_rate": 9.496800030103743e-06, "loss": 1.8227, "step": 27995500 }, { "epoch": 81.04, "learning_rate": 9.496077829751762e-06, "loss": 1.8619, "step": 27996000 }, { "epoch": 81.04, "learning_rate": 9.495354182104484e-06, "loss": 1.8431, "step": 27996500 }, { "epoch": 81.04, "learning_rate": 9.494630534457206e-06, "loss": 1.8263, "step": 27997000 }, { "epoch": 81.04, "learning_rate": 9.49390688680993e-06, "loss": 1.8264, "step": 27997500 }, { "epoch": 81.04, "learning_rate": 9.493183239162654e-06, "loss": 1.8234, "step": 27998000 }, { "epoch": 81.04, "learning_rate": 9.492463933401261e-06, "loss": 1.8351, "step": 27998500 }, { "epoch": 81.05, "learning_rate": 9.491740285753983e-06, "loss": 1.8437, "step": 27999000 }, { "epoch": 81.05, "learning_rate": 9.491016638106706e-06, "loss": 1.8498, "step": 27999500 }, { "epoch": 81.05, "learning_rate": 9.49029299045943e-06, "loss": 1.8355, "step": 28000000 }, { "epoch": 81.05, "learning_rate": 9.489569342812154e-06, "loss": 1.84, "step": 28000500 }, { "epoch": 81.05, "learning_rate": 9.488845695164876e-06, "loss": 1.8398, "step": 28001000 }, { "epoch": 81.05, "learning_rate": 9.4881220475176e-06, "loss": 1.8394, "step": 28001500 }, { "epoch": 81.05, "learning_rate": 9.487399847165619e-06, "loss": 1.842, "step": 28002000 }, { "epoch": 81.06, "learning_rate": 9.486676199518341e-06, "loss": 1.8563, "step": 28002500 }, { "epoch": 81.06, "learning_rate": 9.485952551871063e-06, "loss": 1.8594, "step": 28003000 }, { "epoch": 81.06, "learning_rate": 9.485228904223787e-06, "loss": 1.8372, "step": 28003500 }, { "epoch": 81.06, "learning_rate": 9.484505256576511e-06, "loss": 1.8357, "step": 28004000 }, { "epoch": 81.06, "learning_rate": 9.483781608929233e-06, "loss": 1.8588, "step": 28004500 }, { "epoch": 81.06, "learning_rate": 9.483057961281955e-06, "loss": 1.8487, "step": 28005000 }, { "epoch": 81.06, "learning_rate": 9.482334313634681e-06, "loss": 1.8661, "step": 28005500 }, { "epoch": 81.07, "learning_rate": 9.481612113282698e-06, "loss": 1.8235, "step": 28006000 }, { "epoch": 81.07, "learning_rate": 9.48088846563542e-06, "loss": 1.8069, "step": 28006500 }, { "epoch": 81.07, "learning_rate": 9.480164817988143e-06, "loss": 1.8556, "step": 28007000 }, { "epoch": 81.07, "learning_rate": 9.479441170340868e-06, "loss": 1.8299, "step": 28007500 }, { "epoch": 81.07, "learning_rate": 9.47871752269359e-06, "loss": 1.8301, "step": 28008000 }, { "epoch": 81.07, "learning_rate": 9.477993875046313e-06, "loss": 1.8214, "step": 28008500 }, { "epoch": 81.07, "learning_rate": 9.477270227399038e-06, "loss": 1.8483, "step": 28009000 }, { "epoch": 81.08, "learning_rate": 9.47654657975176e-06, "loss": 1.8412, "step": 28009500 }, { "epoch": 81.08, "learning_rate": 9.475822932104483e-06, "loss": 1.8414, "step": 28010000 }, { "epoch": 81.08, "learning_rate": 9.475099284457207e-06, "loss": 1.8161, "step": 28010500 }, { "epoch": 81.08, "learning_rate": 9.47437563680993e-06, "loss": 1.8472, "step": 28011000 }, { "epoch": 81.08, "learning_rate": 9.473653436457948e-06, "loss": 1.833, "step": 28011500 }, { "epoch": 81.08, "learning_rate": 9.47292978881067e-06, "loss": 1.8694, "step": 28012000 }, { "epoch": 81.08, "learning_rate": 9.472206141163394e-06, "loss": 1.8293, "step": 28012500 }, { "epoch": 81.09, "learning_rate": 9.471482493516118e-06, "loss": 1.8515, "step": 28013000 }, { "epoch": 81.09, "learning_rate": 9.47075884586884e-06, "loss": 1.8344, "step": 28013500 }, { "epoch": 81.09, "learning_rate": 9.470035198221564e-06, "loss": 1.8182, "step": 28014000 }, { "epoch": 81.09, "learning_rate": 9.469311550574288e-06, "loss": 1.8183, "step": 28014500 }, { "epoch": 81.09, "learning_rate": 9.468589350222305e-06, "loss": 1.8361, "step": 28015000 }, { "epoch": 81.09, "learning_rate": 9.467865702575028e-06, "loss": 1.8372, "step": 28015500 }, { "epoch": 81.09, "learning_rate": 9.467142054927752e-06, "loss": 1.8461, "step": 28016000 }, { "epoch": 81.1, "learning_rate": 9.466418407280476e-06, "loss": 1.8309, "step": 28016500 }, { "epoch": 81.1, "learning_rate": 9.465696206928493e-06, "loss": 1.8554, "step": 28017000 }, { "epoch": 81.1, "learning_rate": 9.464972559281215e-06, "loss": 1.8348, "step": 28017500 }, { "epoch": 81.1, "learning_rate": 9.464248911633939e-06, "loss": 1.8345, "step": 28018000 }, { "epoch": 81.1, "learning_rate": 9.463525263986663e-06, "loss": 1.826, "step": 28018500 }, { "epoch": 81.1, "learning_rate": 9.46280306363468e-06, "loss": 1.8249, "step": 28019000 }, { "epoch": 81.1, "learning_rate": 9.462079415987402e-06, "loss": 1.8342, "step": 28019500 }, { "epoch": 81.11, "learning_rate": 9.461355768340126e-06, "loss": 1.8358, "step": 28020000 }, { "epoch": 81.11, "learning_rate": 9.46063212069285e-06, "loss": 1.8147, "step": 28020500 }, { "epoch": 81.11, "learning_rate": 9.459908473045572e-06, "loss": 1.829, "step": 28021000 }, { "epoch": 81.11, "learning_rate": 9.459184825398296e-06, "loss": 1.8359, "step": 28021500 }, { "epoch": 81.11, "learning_rate": 9.458462625046315e-06, "loss": 1.8706, "step": 28022000 }, { "epoch": 81.11, "learning_rate": 9.457738977399038e-06, "loss": 1.8516, "step": 28022500 }, { "epoch": 81.12, "learning_rate": 9.45701532975176e-06, "loss": 1.8394, "step": 28023000 }, { "epoch": 81.12, "learning_rate": 9.456291682104484e-06, "loss": 1.8269, "step": 28023500 }, { "epoch": 81.12, "learning_rate": 9.455569481752503e-06, "loss": 1.8369, "step": 28024000 }, { "epoch": 81.12, "learning_rate": 9.454845834105225e-06, "loss": 1.8259, "step": 28024500 }, { "epoch": 81.12, "learning_rate": 9.454122186457947e-06, "loss": 1.8331, "step": 28025000 }, { "epoch": 81.12, "learning_rate": 9.453398538810671e-06, "loss": 1.8491, "step": 28025500 }, { "epoch": 81.12, "learning_rate": 9.45267633845869e-06, "loss": 1.8327, "step": 28026000 }, { "epoch": 81.13, "learning_rate": 9.451952690811412e-06, "loss": 1.8324, "step": 28026500 }, { "epoch": 81.13, "learning_rate": 9.451229043164134e-06, "loss": 1.8392, "step": 28027000 }, { "epoch": 81.13, "learning_rate": 9.450505395516858e-06, "loss": 1.8694, "step": 28027500 }, { "epoch": 81.13, "learning_rate": 9.449781747869582e-06, "loss": 1.8403, "step": 28028000 }, { "epoch": 81.13, "learning_rate": 9.4490595475176e-06, "loss": 1.8262, "step": 28028500 }, { "epoch": 81.13, "learning_rate": 9.448337347165617e-06, "loss": 1.8276, "step": 28029000 }, { "epoch": 81.13, "learning_rate": 9.44761369951834e-06, "loss": 1.8106, "step": 28029500 }, { "epoch": 81.14, "learning_rate": 9.446890051871065e-06, "loss": 1.8599, "step": 28030000 }, { "epoch": 81.14, "learning_rate": 9.446166404223787e-06, "loss": 1.8387, "step": 28030500 }, { "epoch": 81.14, "learning_rate": 9.44544275657651e-06, "loss": 1.8438, "step": 28031000 }, { "epoch": 81.14, "learning_rate": 9.444719108929235e-06, "loss": 1.8542, "step": 28031500 }, { "epoch": 81.14, "learning_rate": 9.443995461281957e-06, "loss": 1.8509, "step": 28032000 }, { "epoch": 81.14, "learning_rate": 9.44327181363468e-06, "loss": 1.8455, "step": 28032500 }, { "epoch": 81.14, "learning_rate": 9.442548165987403e-06, "loss": 1.8615, "step": 28033000 }, { "epoch": 81.15, "learning_rate": 9.441825965635422e-06, "loss": 1.832, "step": 28033500 }, { "epoch": 81.15, "learning_rate": 9.441102317988144e-06, "loss": 1.8418, "step": 28034000 }, { "epoch": 81.15, "learning_rate": 9.440378670340867e-06, "loss": 1.8403, "step": 28034500 }, { "epoch": 81.15, "learning_rate": 9.43965502269359e-06, "loss": 1.8572, "step": 28035000 }, { "epoch": 81.15, "learning_rate": 9.43893282234161e-06, "loss": 1.8228, "step": 28035500 }, { "epoch": 81.15, "learning_rate": 9.438209174694332e-06, "loss": 1.8609, "step": 28036000 }, { "epoch": 81.15, "learning_rate": 9.437485527047054e-06, "loss": 1.8482, "step": 28036500 }, { "epoch": 81.16, "learning_rate": 9.436761879399778e-06, "loss": 1.8307, "step": 28037000 }, { "epoch": 81.16, "learning_rate": 9.436038231752502e-06, "loss": 1.8225, "step": 28037500 }, { "epoch": 81.16, "learning_rate": 9.435314584105224e-06, "loss": 1.8357, "step": 28038000 }, { "epoch": 81.16, "learning_rate": 9.434590936457948e-06, "loss": 1.8271, "step": 28038500 }, { "epoch": 81.16, "learning_rate": 9.433867288810672e-06, "loss": 1.8505, "step": 28039000 }, { "epoch": 81.16, "learning_rate": 9.433143641163394e-06, "loss": 1.8122, "step": 28039500 }, { "epoch": 81.16, "learning_rate": 9.432419993516118e-06, "loss": 1.86, "step": 28040000 }, { "epoch": 81.17, "learning_rate": 9.43169634586884e-06, "loss": 1.8237, "step": 28040500 }, { "epoch": 81.17, "learning_rate": 9.430972698221564e-06, "loss": 1.857, "step": 28041000 }, { "epoch": 81.17, "learning_rate": 9.430250497869581e-06, "loss": 1.8401, "step": 28041500 }, { "epoch": 81.17, "learning_rate": 9.429526850222305e-06, "loss": 1.8803, "step": 28042000 }, { "epoch": 81.17, "learning_rate": 9.42880320257503e-06, "loss": 1.8237, "step": 28042500 }, { "epoch": 81.17, "learning_rate": 9.428079554927752e-06, "loss": 1.8372, "step": 28043000 }, { "epoch": 81.17, "learning_rate": 9.427355907280474e-06, "loss": 1.8371, "step": 28043500 }, { "epoch": 81.18, "learning_rate": 9.426632259633198e-06, "loss": 1.8199, "step": 28044000 }, { "epoch": 81.18, "learning_rate": 9.425908611985922e-06, "loss": 1.8087, "step": 28044500 }, { "epoch": 81.18, "learning_rate": 9.425184964338644e-06, "loss": 1.8347, "step": 28045000 }, { "epoch": 81.18, "learning_rate": 9.424461316691368e-06, "loss": 1.8429, "step": 28045500 }, { "epoch": 81.18, "learning_rate": 9.423739116339385e-06, "loss": 1.8317, "step": 28046000 }, { "epoch": 81.18, "learning_rate": 9.423015468692109e-06, "loss": 1.8337, "step": 28046500 }, { "epoch": 81.18, "learning_rate": 9.422291821044831e-06, "loss": 1.8448, "step": 28047000 }, { "epoch": 81.19, "learning_rate": 9.42156962069285e-06, "loss": 1.8472, "step": 28047500 }, { "epoch": 81.19, "learning_rate": 9.420845973045572e-06, "loss": 1.8512, "step": 28048000 }, { "epoch": 81.19, "learning_rate": 9.420122325398296e-06, "loss": 1.8322, "step": 28048500 }, { "epoch": 81.19, "learning_rate": 9.419398677751019e-06, "loss": 1.824, "step": 28049000 }, { "epoch": 81.19, "learning_rate": 9.418676477399037e-06, "loss": 1.8608, "step": 28049500 }, { "epoch": 81.19, "learning_rate": 9.41795282975176e-06, "loss": 1.8263, "step": 28050000 }, { "epoch": 81.19, "learning_rate": 9.417230629399779e-06, "loss": 1.8285, "step": 28050500 }, { "epoch": 81.2, "learning_rate": 9.416506981752501e-06, "loss": 1.8364, "step": 28051000 }, { "epoch": 81.2, "learning_rate": 9.415783334105225e-06, "loss": 1.8554, "step": 28051500 }, { "epoch": 81.2, "learning_rate": 9.415059686457949e-06, "loss": 1.8422, "step": 28052000 }, { "epoch": 81.2, "learning_rate": 9.414336038810671e-06, "loss": 1.8272, "step": 28052500 }, { "epoch": 81.2, "learning_rate": 9.413612391163395e-06, "loss": 1.8391, "step": 28053000 }, { "epoch": 81.2, "learning_rate": 9.412888743516117e-06, "loss": 1.839, "step": 28053500 }, { "epoch": 81.2, "learning_rate": 9.412166543164136e-06, "loss": 1.7969, "step": 28054000 }, { "epoch": 81.21, "learning_rate": 9.411442895516858e-06, "loss": 1.8308, "step": 28054500 }, { "epoch": 81.21, "learning_rate": 9.410719247869582e-06, "loss": 1.8408, "step": 28055000 }, { "epoch": 81.21, "learning_rate": 9.409995600222304e-06, "loss": 1.8399, "step": 28055500 }, { "epoch": 81.21, "learning_rate": 9.409271952575028e-06, "loss": 1.8243, "step": 28056000 }, { "epoch": 81.21, "learning_rate": 9.40854830492775e-06, "loss": 1.8401, "step": 28056500 }, { "epoch": 81.21, "learning_rate": 9.407824657280475e-06, "loss": 1.8363, "step": 28057000 }, { "epoch": 81.21, "learning_rate": 9.407102456928492e-06, "loss": 1.8535, "step": 28057500 }, { "epoch": 81.22, "learning_rate": 9.406378809281216e-06, "loss": 1.873, "step": 28058000 }, { "epoch": 81.22, "learning_rate": 9.40565516163394e-06, "loss": 1.8557, "step": 28058500 }, { "epoch": 81.22, "learning_rate": 9.404931513986662e-06, "loss": 1.8136, "step": 28059000 }, { "epoch": 81.22, "learning_rate": 9.404207866339386e-06, "loss": 1.8438, "step": 28059500 }, { "epoch": 81.22, "learning_rate": 9.403484218692108e-06, "loss": 1.8337, "step": 28060000 }, { "epoch": 81.22, "learning_rate": 9.402760571044832e-06, "loss": 1.843, "step": 28060500 }, { "epoch": 81.23, "learning_rate": 9.402036923397554e-06, "loss": 1.8601, "step": 28061000 }, { "epoch": 81.23, "learning_rate": 9.401313275750278e-06, "loss": 1.858, "step": 28061500 }, { "epoch": 81.23, "learning_rate": 9.400589628103002e-06, "loss": 1.8331, "step": 28062000 }, { "epoch": 81.23, "learning_rate": 9.39986742775102e-06, "loss": 1.8421, "step": 28062500 }, { "epoch": 81.23, "learning_rate": 9.399143780103743e-06, "loss": 1.8214, "step": 28063000 }, { "epoch": 81.23, "learning_rate": 9.398420132456465e-06, "loss": 1.8365, "step": 28063500 }, { "epoch": 81.23, "learning_rate": 9.39769648480919e-06, "loss": 1.8491, "step": 28064000 }, { "epoch": 81.24, "learning_rate": 9.396974284457207e-06, "loss": 1.8582, "step": 28064500 }, { "epoch": 81.24, "learning_rate": 9.396252084105224e-06, "loss": 1.8318, "step": 28065000 }, { "epoch": 81.24, "learning_rate": 9.395528436457948e-06, "loss": 1.8149, "step": 28065500 }, { "epoch": 81.24, "learning_rate": 9.394804788810672e-06, "loss": 1.8139, "step": 28066000 }, { "epoch": 81.24, "learning_rate": 9.394082588458689e-06, "loss": 1.8591, "step": 28066500 }, { "epoch": 81.24, "learning_rate": 9.393358940811411e-06, "loss": 1.8344, "step": 28067000 }, { "epoch": 81.24, "learning_rate": 9.392635293164135e-06, "loss": 1.8256, "step": 28067500 }, { "epoch": 81.25, "learning_rate": 9.391911645516859e-06, "loss": 1.8287, "step": 28068000 }, { "epoch": 81.25, "learning_rate": 9.391187997869581e-06, "loss": 1.808, "step": 28068500 }, { "epoch": 81.25, "learning_rate": 9.390464350222305e-06, "loss": 1.833, "step": 28069000 }, { "epoch": 81.25, "learning_rate": 9.38974070257503e-06, "loss": 1.831, "step": 28069500 }, { "epoch": 81.25, "learning_rate": 9.389017054927751e-06, "loss": 1.8145, "step": 28070000 }, { "epoch": 81.25, "learning_rate": 9.388293407280474e-06, "loss": 1.8608, "step": 28070500 }, { "epoch": 81.25, "learning_rate": 9.387569759633198e-06, "loss": 1.8501, "step": 28071000 }, { "epoch": 81.26, "learning_rate": 9.386846111985922e-06, "loss": 1.8216, "step": 28071500 }, { "epoch": 81.26, "learning_rate": 9.386122464338644e-06, "loss": 1.8326, "step": 28072000 }, { "epoch": 81.26, "learning_rate": 9.385398816691368e-06, "loss": 1.8662, "step": 28072500 }, { "epoch": 81.26, "learning_rate": 9.384675169044092e-06, "loss": 1.8157, "step": 28073000 }, { "epoch": 81.26, "learning_rate": 9.383952968692109e-06, "loss": 1.8315, "step": 28073500 }, { "epoch": 81.26, "learning_rate": 9.383230768340126e-06, "loss": 1.8216, "step": 28074000 }, { "epoch": 81.26, "learning_rate": 9.38250712069285e-06, "loss": 1.828, "step": 28074500 }, { "epoch": 81.27, "learning_rate": 9.381783473045572e-06, "loss": 1.8481, "step": 28075000 }, { "epoch": 81.27, "learning_rate": 9.381059825398296e-06, "loss": 1.8319, "step": 28075500 }, { "epoch": 81.27, "learning_rate": 9.380336177751018e-06, "loss": 1.8398, "step": 28076000 }, { "epoch": 81.27, "learning_rate": 9.379612530103742e-06, "loss": 1.8358, "step": 28076500 }, { "epoch": 81.27, "learning_rate": 9.378888882456466e-06, "loss": 1.8443, "step": 28077000 }, { "epoch": 81.27, "learning_rate": 9.378165234809189e-06, "loss": 1.8221, "step": 28077500 }, { "epoch": 81.27, "learning_rate": 9.377441587161912e-06, "loss": 1.8341, "step": 28078000 }, { "epoch": 81.28, "learning_rate": 9.376717939514636e-06, "loss": 1.8383, "step": 28078500 }, { "epoch": 81.28, "learning_rate": 9.375995739162654e-06, "loss": 1.8397, "step": 28079000 }, { "epoch": 81.28, "learning_rate": 9.375272091515376e-06, "loss": 1.8399, "step": 28079500 }, { "epoch": 81.28, "learning_rate": 9.3745484438681e-06, "loss": 1.8107, "step": 28080000 }, { "epoch": 81.28, "learning_rate": 9.373824796220824e-06, "loss": 1.8307, "step": 28080500 }, { "epoch": 81.28, "learning_rate": 9.373101148573546e-06, "loss": 1.8677, "step": 28081000 }, { "epoch": 81.28, "learning_rate": 9.372378948221563e-06, "loss": 1.8286, "step": 28081500 }, { "epoch": 81.29, "learning_rate": 9.371655300574287e-06, "loss": 1.8202, "step": 28082000 }, { "epoch": 81.29, "learning_rate": 9.370931652927011e-06, "loss": 1.8226, "step": 28082500 }, { "epoch": 81.29, "learning_rate": 9.370208005279733e-06, "loss": 1.8461, "step": 28083000 }, { "epoch": 81.29, "learning_rate": 9.369484357632457e-06, "loss": 1.8176, "step": 28083500 }, { "epoch": 81.29, "learning_rate": 9.368762157280474e-06, "loss": 1.8386, "step": 28084000 }, { "epoch": 81.29, "learning_rate": 9.368038509633198e-06, "loss": 1.8385, "step": 28084500 }, { "epoch": 81.29, "learning_rate": 9.36731486198592e-06, "loss": 1.8218, "step": 28085000 }, { "epoch": 81.3, "learning_rate": 9.366591214338645e-06, "loss": 1.8435, "step": 28085500 }, { "epoch": 81.3, "learning_rate": 9.365867566691368e-06, "loss": 1.8448, "step": 28086000 }, { "epoch": 81.3, "learning_rate": 9.36514391904409e-06, "loss": 1.8555, "step": 28086500 }, { "epoch": 81.3, "learning_rate": 9.364420271396813e-06, "loss": 1.8171, "step": 28087000 }, { "epoch": 81.3, "learning_rate": 9.363698071044832e-06, "loss": 1.8133, "step": 28087500 }, { "epoch": 81.3, "learning_rate": 9.362974423397556e-06, "loss": 1.8265, "step": 28088000 }, { "epoch": 81.3, "learning_rate": 9.362250775750278e-06, "loss": 1.838, "step": 28088500 }, { "epoch": 81.31, "learning_rate": 9.361528575398295e-06, "loss": 1.8439, "step": 28089000 }, { "epoch": 81.31, "learning_rate": 9.36080492775102e-06, "loss": 1.8412, "step": 28089500 }, { "epoch": 81.31, "learning_rate": 9.360081280103743e-06, "loss": 1.865, "step": 28090000 }, { "epoch": 81.31, "learning_rate": 9.359357632456465e-06, "loss": 1.8537, "step": 28090500 }, { "epoch": 81.31, "learning_rate": 9.358633984809188e-06, "loss": 1.8527, "step": 28091000 }, { "epoch": 81.31, "learning_rate": 9.357911784457207e-06, "loss": 1.842, "step": 28091500 }, { "epoch": 81.31, "learning_rate": 9.35718813680993e-06, "loss": 1.823, "step": 28092000 }, { "epoch": 81.32, "learning_rate": 9.356464489162653e-06, "loss": 1.8337, "step": 28092500 }, { "epoch": 81.32, "learning_rate": 9.355740841515377e-06, "loss": 1.8205, "step": 28093000 }, { "epoch": 81.32, "learning_rate": 9.3550171938681e-06, "loss": 1.8276, "step": 28093500 }, { "epoch": 81.32, "learning_rate": 9.354293546220823e-06, "loss": 1.8425, "step": 28094000 }, { "epoch": 81.32, "learning_rate": 9.353569898573545e-06, "loss": 1.8421, "step": 28094500 }, { "epoch": 81.32, "learning_rate": 9.352847698221564e-06, "loss": 1.8273, "step": 28095000 }, { "epoch": 81.32, "learning_rate": 9.352124050574288e-06, "loss": 1.853, "step": 28095500 }, { "epoch": 81.33, "learning_rate": 9.35140040292701e-06, "loss": 1.8452, "step": 28096000 }, { "epoch": 81.33, "learning_rate": 9.350676755279732e-06, "loss": 1.845, "step": 28096500 }, { "epoch": 81.33, "learning_rate": 9.349953107632458e-06, "loss": 1.8544, "step": 28097000 }, { "epoch": 81.33, "learning_rate": 9.34922945998518e-06, "loss": 1.8461, "step": 28097500 }, { "epoch": 81.33, "learning_rate": 9.348505812337902e-06, "loss": 1.8449, "step": 28098000 }, { "epoch": 81.33, "learning_rate": 9.347782164690626e-06, "loss": 1.828, "step": 28098500 }, { "epoch": 81.34, "learning_rate": 9.34705851704335e-06, "loss": 1.8221, "step": 28099000 }, { "epoch": 81.34, "learning_rate": 9.346336316691368e-06, "loss": 1.841, "step": 28099500 }, { "epoch": 81.34, "learning_rate": 9.34561266904409e-06, "loss": 1.8598, "step": 28100000 }, { "epoch": 81.34, "learning_rate": 9.344889021396815e-06, "loss": 1.8421, "step": 28100500 }, { "epoch": 81.34, "learning_rate": 9.344165373749538e-06, "loss": 1.8374, "step": 28101000 }, { "epoch": 81.34, "learning_rate": 9.34344172610226e-06, "loss": 1.8507, "step": 28101500 }, { "epoch": 81.34, "learning_rate": 9.342718078454984e-06, "loss": 1.849, "step": 28102000 }, { "epoch": 81.35, "learning_rate": 9.341997325398296e-06, "loss": 1.8547, "step": 28102500 }, { "epoch": 81.35, "learning_rate": 9.34127367775102e-06, "loss": 1.8373, "step": 28103000 }, { "epoch": 81.35, "learning_rate": 9.340550030103742e-06, "loss": 1.8419, "step": 28103500 }, { "epoch": 81.35, "learning_rate": 9.339826382456464e-06, "loss": 1.808, "step": 28104000 }, { "epoch": 81.35, "learning_rate": 9.33910273480919e-06, "loss": 1.852, "step": 28104500 }, { "epoch": 81.35, "learning_rate": 9.338379087161912e-06, "loss": 1.8363, "step": 28105000 }, { "epoch": 81.35, "learning_rate": 9.337655439514635e-06, "loss": 1.8413, "step": 28105500 }, { "epoch": 81.36, "learning_rate": 9.336931791867358e-06, "loss": 1.8502, "step": 28106000 }, { "epoch": 81.36, "learning_rate": 9.336208144220082e-06, "loss": 1.8622, "step": 28106500 }, { "epoch": 81.36, "learning_rate": 9.335484496572805e-06, "loss": 1.8514, "step": 28107000 }, { "epoch": 81.36, "learning_rate": 9.334762296220822e-06, "loss": 1.8561, "step": 28107500 }, { "epoch": 81.36, "learning_rate": 9.334038648573548e-06, "loss": 1.8254, "step": 28108000 }, { "epoch": 81.36, "learning_rate": 9.33331500092627e-06, "loss": 1.8319, "step": 28108500 }, { "epoch": 81.36, "learning_rate": 9.332591353278992e-06, "loss": 1.8358, "step": 28109000 }, { "epoch": 81.37, "learning_rate": 9.331867705631716e-06, "loss": 1.8403, "step": 28109500 }, { "epoch": 81.37, "learning_rate": 9.331145505279735e-06, "loss": 1.8545, "step": 28110000 }, { "epoch": 81.37, "learning_rate": 9.330421857632457e-06, "loss": 1.8409, "step": 28110500 }, { "epoch": 81.37, "learning_rate": 9.329699657280474e-06, "loss": 1.8493, "step": 28111000 }, { "epoch": 81.37, "learning_rate": 9.328976009633197e-06, "loss": 1.8138, "step": 28111500 }, { "epoch": 81.37, "learning_rate": 9.328252361985922e-06, "loss": 1.8249, "step": 28112000 }, { "epoch": 81.37, "learning_rate": 9.327528714338644e-06, "loss": 1.8348, "step": 28112500 }, { "epoch": 81.38, "learning_rate": 9.326805066691367e-06, "loss": 1.8528, "step": 28113000 }, { "epoch": 81.38, "learning_rate": 9.326081419044092e-06, "loss": 1.8142, "step": 28113500 }, { "epoch": 81.38, "learning_rate": 9.325357771396815e-06, "loss": 1.8553, "step": 28114000 }, { "epoch": 81.38, "learning_rate": 9.324634123749537e-06, "loss": 1.8281, "step": 28114500 }, { "epoch": 81.38, "learning_rate": 9.32391047610226e-06, "loss": 1.855, "step": 28115000 }, { "epoch": 81.38, "learning_rate": 9.323186828454985e-06, "loss": 1.8459, "step": 28115500 }, { "epoch": 81.38, "learning_rate": 9.322463180807707e-06, "loss": 1.8369, "step": 28116000 }, { "epoch": 81.39, "learning_rate": 9.321739533160429e-06, "loss": 1.8505, "step": 28116500 }, { "epoch": 81.39, "learning_rate": 9.321015885513155e-06, "loss": 1.8331, "step": 28117000 }, { "epoch": 81.39, "learning_rate": 9.320293685161172e-06, "loss": 1.8412, "step": 28117500 }, { "epoch": 81.39, "learning_rate": 9.31957148480919e-06, "loss": 1.8537, "step": 28118000 }, { "epoch": 81.39, "learning_rate": 9.318849284457206e-06, "loss": 1.8511, "step": 28118500 }, { "epoch": 81.39, "learning_rate": 9.31812563680993e-06, "loss": 1.8571, "step": 28119000 }, { "epoch": 81.39, "learning_rate": 9.317404883753243e-06, "loss": 1.8292, "step": 28119500 }, { "epoch": 81.4, "learning_rate": 9.316681236105967e-06, "loss": 1.8141, "step": 28120000 }, { "epoch": 81.4, "learning_rate": 9.315957588458689e-06, "loss": 1.8402, "step": 28120500 }, { "epoch": 81.4, "learning_rate": 9.315235388106706e-06, "loss": 1.8458, "step": 28121000 }, { "epoch": 81.4, "learning_rate": 9.31451174045943e-06, "loss": 1.8231, "step": 28121500 }, { "epoch": 81.4, "learning_rate": 9.313788092812154e-06, "loss": 1.8341, "step": 28122000 }, { "epoch": 81.4, "learning_rate": 9.313064445164876e-06, "loss": 1.8655, "step": 28122500 }, { "epoch": 81.4, "learning_rate": 9.312340797517598e-06, "loss": 1.8107, "step": 28123000 }, { "epoch": 81.41, "learning_rate": 9.311617149870324e-06, "loss": 1.8444, "step": 28123500 }, { "epoch": 81.41, "learning_rate": 9.310893502223046e-06, "loss": 1.8062, "step": 28124000 }, { "epoch": 81.41, "learning_rate": 9.310169854575768e-06, "loss": 1.8626, "step": 28124500 }, { "epoch": 81.41, "learning_rate": 9.309446206928492e-06, "loss": 1.8364, "step": 28125000 }, { "epoch": 81.41, "learning_rate": 9.308722559281216e-06, "loss": 1.8458, "step": 28125500 }, { "epoch": 81.41, "learning_rate": 9.307998911633939e-06, "loss": 1.8453, "step": 28126000 }, { "epoch": 81.41, "learning_rate": 9.307276711281956e-06, "loss": 1.8377, "step": 28126500 }, { "epoch": 81.42, "learning_rate": 9.30655306363468e-06, "loss": 1.8458, "step": 28127000 }, { "epoch": 81.42, "learning_rate": 9.305829415987404e-06, "loss": 1.8562, "step": 28127500 }, { "epoch": 81.42, "learning_rate": 9.305105768340126e-06, "loss": 1.8492, "step": 28128000 }, { "epoch": 81.42, "learning_rate": 9.30438212069285e-06, "loss": 1.8516, "step": 28128500 }, { "epoch": 81.42, "learning_rate": 9.303658473045574e-06, "loss": 1.8426, "step": 28129000 }, { "epoch": 81.42, "learning_rate": 9.302934825398296e-06, "loss": 1.8444, "step": 28129500 }, { "epoch": 81.42, "learning_rate": 9.302211177751018e-06, "loss": 1.833, "step": 28130000 }, { "epoch": 81.43, "learning_rate": 9.301487530103744e-06, "loss": 1.821, "step": 28130500 }, { "epoch": 81.43, "learning_rate": 9.300765329751761e-06, "loss": 1.8591, "step": 28131000 }, { "epoch": 81.43, "learning_rate": 9.300041682104483e-06, "loss": 1.8287, "step": 28131500 }, { "epoch": 81.43, "learning_rate": 9.299318034457207e-06, "loss": 1.8434, "step": 28132000 }, { "epoch": 81.43, "learning_rate": 9.298594386809931e-06, "loss": 1.8533, "step": 28132500 }, { "epoch": 81.43, "learning_rate": 9.297870739162653e-06, "loss": 1.8507, "step": 28133000 }, { "epoch": 81.43, "learning_rate": 9.297147091515376e-06, "loss": 1.8176, "step": 28133500 }, { "epoch": 81.44, "learning_rate": 9.2964234438681e-06, "loss": 1.8245, "step": 28134000 }, { "epoch": 81.44, "learning_rate": 9.295699796220823e-06, "loss": 1.8493, "step": 28134500 }, { "epoch": 81.44, "learning_rate": 9.294976148573546e-06, "loss": 1.8209, "step": 28135000 }, { "epoch": 81.44, "learning_rate": 9.29425250092627e-06, "loss": 1.843, "step": 28135500 }, { "epoch": 81.44, "learning_rate": 9.293528853278994e-06, "loss": 1.8293, "step": 28136000 }, { "epoch": 81.44, "learning_rate": 9.292805205631716e-06, "loss": 1.8104, "step": 28136500 }, { "epoch": 81.45, "learning_rate": 9.29208155798444e-06, "loss": 1.8752, "step": 28137000 }, { "epoch": 81.45, "learning_rate": 9.291359357632457e-06, "loss": 1.842, "step": 28137500 }, { "epoch": 81.45, "learning_rate": 9.290635709985181e-06, "loss": 1.8064, "step": 28138000 }, { "epoch": 81.45, "learning_rate": 9.289912062337903e-06, "loss": 1.8358, "step": 28138500 }, { "epoch": 81.45, "learning_rate": 9.289188414690627e-06, "loss": 1.813, "step": 28139000 }, { "epoch": 81.45, "learning_rate": 9.28846476704335e-06, "loss": 1.8388, "step": 28139500 }, { "epoch": 81.45, "learning_rate": 9.287741119396073e-06, "loss": 1.8572, "step": 28140000 }, { "epoch": 81.46, "learning_rate": 9.287017471748795e-06, "loss": 1.8594, "step": 28140500 }, { "epoch": 81.46, "learning_rate": 9.28629382410152e-06, "loss": 1.8446, "step": 28141000 }, { "epoch": 81.46, "learning_rate": 9.285571623749538e-06, "loss": 1.8225, "step": 28141500 }, { "epoch": 81.46, "learning_rate": 9.28484797610226e-06, "loss": 1.8487, "step": 28142000 }, { "epoch": 81.46, "learning_rate": 9.284124328454983e-06, "loss": 1.8234, "step": 28142500 }, { "epoch": 81.46, "learning_rate": 9.283402128103002e-06, "loss": 1.8445, "step": 28143000 }, { "epoch": 81.46, "learning_rate": 9.282678480455726e-06, "loss": 1.84, "step": 28143500 }, { "epoch": 81.47, "learning_rate": 9.281954832808448e-06, "loss": 1.846, "step": 28144000 }, { "epoch": 81.47, "learning_rate": 9.281231185161172e-06, "loss": 1.8189, "step": 28144500 }, { "epoch": 81.47, "learning_rate": 9.280507537513894e-06, "loss": 1.8056, "step": 28145000 }, { "epoch": 81.47, "learning_rate": 9.279783889866618e-06, "loss": 1.8407, "step": 28145500 }, { "epoch": 81.47, "learning_rate": 9.279061689514635e-06, "loss": 1.8207, "step": 28146000 }, { "epoch": 81.47, "learning_rate": 9.27833804186736e-06, "loss": 1.8146, "step": 28146500 }, { "epoch": 81.47, "learning_rate": 9.277614394220081e-06, "loss": 1.8609, "step": 28147000 }, { "epoch": 81.48, "learning_rate": 9.276890746572805e-06, "loss": 1.854, "step": 28147500 }, { "epoch": 81.48, "learning_rate": 9.276167098925528e-06, "loss": 1.8391, "step": 28148000 }, { "epoch": 81.48, "learning_rate": 9.275443451278252e-06, "loss": 1.8447, "step": 28148500 }, { "epoch": 81.48, "learning_rate": 9.274719803630975e-06, "loss": 1.801, "step": 28149000 }, { "epoch": 81.48, "learning_rate": 9.273996155983698e-06, "loss": 1.8323, "step": 28149500 }, { "epoch": 81.48, "learning_rate": 9.273272508336422e-06, "loss": 1.8492, "step": 28150000 }, { "epoch": 81.48, "learning_rate": 9.272550307984439e-06, "loss": 1.8455, "step": 28150500 }, { "epoch": 81.49, "learning_rate": 9.271826660337163e-06, "loss": 1.8406, "step": 28151000 }, { "epoch": 81.49, "learning_rate": 9.271103012689885e-06, "loss": 1.8437, "step": 28151500 }, { "epoch": 81.49, "learning_rate": 9.270379365042609e-06, "loss": 1.8462, "step": 28152000 }, { "epoch": 81.49, "learning_rate": 9.269655717395333e-06, "loss": 1.8382, "step": 28152500 }, { "epoch": 81.49, "learning_rate": 9.268932069748055e-06, "loss": 1.8453, "step": 28153000 }, { "epoch": 81.49, "learning_rate": 9.268208422100779e-06, "loss": 1.8517, "step": 28153500 }, { "epoch": 81.49, "learning_rate": 9.267486221748796e-06, "loss": 1.8559, "step": 28154000 }, { "epoch": 81.5, "learning_rate": 9.26676257410152e-06, "loss": 1.8487, "step": 28154500 }, { "epoch": 81.5, "learning_rate": 9.266038926454242e-06, "loss": 1.8384, "step": 28155000 }, { "epoch": 81.5, "learning_rate": 9.265315278806966e-06, "loss": 1.8456, "step": 28155500 }, { "epoch": 81.5, "learning_rate": 9.264591631159689e-06, "loss": 1.8272, "step": 28156000 }, { "epoch": 81.5, "learning_rate": 9.263867983512413e-06, "loss": 1.8412, "step": 28156500 }, { "epoch": 81.5, "learning_rate": 9.263144335865136e-06, "loss": 1.8219, "step": 28157000 }, { "epoch": 81.5, "learning_rate": 9.262422135513154e-06, "loss": 1.8328, "step": 28157500 }, { "epoch": 81.51, "learning_rate": 9.261698487865876e-06, "loss": 1.8457, "step": 28158000 }, { "epoch": 81.51, "learning_rate": 9.2609748402186e-06, "loss": 1.8367, "step": 28158500 }, { "epoch": 81.51, "learning_rate": 9.260251192571324e-06, "loss": 1.8398, "step": 28159000 }, { "epoch": 81.51, "learning_rate": 9.259528992219341e-06, "loss": 1.8051, "step": 28159500 }, { "epoch": 81.51, "learning_rate": 9.258805344572063e-06, "loss": 1.8597, "step": 28160000 }, { "epoch": 81.51, "learning_rate": 9.258081696924787e-06, "loss": 1.8296, "step": 28160500 }, { "epoch": 81.51, "learning_rate": 9.257358049277511e-06, "loss": 1.8271, "step": 28161000 }, { "epoch": 81.52, "learning_rate": 9.256634401630233e-06, "loss": 1.8257, "step": 28161500 }, { "epoch": 81.52, "learning_rate": 9.255910753982957e-06, "loss": 1.819, "step": 28162000 }, { "epoch": 81.52, "learning_rate": 9.255188553630975e-06, "loss": 1.8156, "step": 28162500 }, { "epoch": 81.52, "learning_rate": 9.254464905983698e-06, "loss": 1.8464, "step": 28163000 }, { "epoch": 81.52, "learning_rate": 9.25374125833642e-06, "loss": 1.804, "step": 28163500 }, { "epoch": 81.52, "learning_rate": 9.253017610689145e-06, "loss": 1.839, "step": 28164000 }, { "epoch": 81.52, "learning_rate": 9.252293963041869e-06, "loss": 1.813, "step": 28164500 }, { "epoch": 81.53, "learning_rate": 9.251571762689886e-06, "loss": 1.8297, "step": 28165000 }, { "epoch": 81.53, "learning_rate": 9.250848115042608e-06, "loss": 1.85, "step": 28165500 }, { "epoch": 81.53, "learning_rate": 9.250124467395332e-06, "loss": 1.8398, "step": 28166000 }, { "epoch": 81.53, "learning_rate": 9.249400819748056e-06, "loss": 1.8291, "step": 28166500 }, { "epoch": 81.53, "learning_rate": 9.248677172100778e-06, "loss": 1.818, "step": 28167000 }, { "epoch": 81.53, "learning_rate": 9.247953524453502e-06, "loss": 1.8427, "step": 28167500 }, { "epoch": 81.53, "learning_rate": 9.24723132410152e-06, "loss": 1.8501, "step": 28168000 }, { "epoch": 81.54, "learning_rate": 9.246507676454243e-06, "loss": 1.8394, "step": 28168500 }, { "epoch": 81.54, "learning_rate": 9.245784028806965e-06, "loss": 1.8247, "step": 28169000 }, { "epoch": 81.54, "learning_rate": 9.245061828454983e-06, "loss": 1.8314, "step": 28169500 }, { "epoch": 81.54, "learning_rate": 9.244338180807707e-06, "loss": 1.8543, "step": 28170000 }, { "epoch": 81.54, "learning_rate": 9.24361453316043e-06, "loss": 1.8381, "step": 28170500 }, { "epoch": 81.54, "learning_rate": 9.242890885513153e-06, "loss": 1.8465, "step": 28171000 }, { "epoch": 81.54, "learning_rate": 9.242167237865877e-06, "loss": 1.7891, "step": 28171500 }, { "epoch": 81.55, "learning_rate": 9.2414435902186e-06, "loss": 1.8432, "step": 28172000 }, { "epoch": 81.55, "learning_rate": 9.240719942571323e-06, "loss": 1.8544, "step": 28172500 }, { "epoch": 81.55, "learning_rate": 9.239996294924047e-06, "loss": 1.8346, "step": 28173000 }, { "epoch": 81.55, "learning_rate": 9.239274094572064e-06, "loss": 1.8611, "step": 28173500 }, { "epoch": 81.55, "learning_rate": 9.238551894220081e-06, "loss": 1.8116, "step": 28174000 }, { "epoch": 81.55, "learning_rate": 9.237828246572805e-06, "loss": 1.8579, "step": 28174500 }, { "epoch": 81.56, "learning_rate": 9.237104598925527e-06, "loss": 1.8842, "step": 28175000 }, { "epoch": 81.56, "learning_rate": 9.236380951278251e-06, "loss": 1.8845, "step": 28175500 }, { "epoch": 81.56, "learning_rate": 9.23565875092627e-06, "loss": 1.8406, "step": 28176000 }, { "epoch": 81.56, "learning_rate": 9.234935103278993e-06, "loss": 1.8472, "step": 28176500 }, { "epoch": 81.56, "learning_rate": 9.234211455631715e-06, "loss": 1.8484, "step": 28177000 }, { "epoch": 81.56, "learning_rate": 9.233487807984439e-06, "loss": 1.8392, "step": 28177500 }, { "epoch": 81.56, "learning_rate": 9.232764160337163e-06, "loss": 1.8614, "step": 28178000 }, { "epoch": 81.57, "learning_rate": 9.232040512689885e-06, "loss": 1.8342, "step": 28178500 }, { "epoch": 81.57, "learning_rate": 9.231316865042609e-06, "loss": 1.8176, "step": 28179000 }, { "epoch": 81.57, "learning_rate": 9.230593217395333e-06, "loss": 1.8639, "step": 28179500 }, { "epoch": 81.57, "learning_rate": 9.22987101704335e-06, "loss": 1.8222, "step": 28180000 }, { "epoch": 81.57, "learning_rate": 9.229147369396072e-06, "loss": 1.8237, "step": 28180500 }, { "epoch": 81.57, "learning_rate": 9.228423721748796e-06, "loss": 1.8528, "step": 28181000 }, { "epoch": 81.57, "learning_rate": 9.22770007410152e-06, "loss": 1.8472, "step": 28181500 }, { "epoch": 81.58, "learning_rate": 9.226977873749537e-06, "loss": 1.8501, "step": 28182000 }, { "epoch": 81.58, "learning_rate": 9.226255673397555e-06, "loss": 1.8426, "step": 28182500 }, { "epoch": 81.58, "learning_rate": 9.225532025750279e-06, "loss": 1.8651, "step": 28183000 }, { "epoch": 81.58, "learning_rate": 9.224808378103002e-06, "loss": 1.841, "step": 28183500 }, { "epoch": 81.58, "learning_rate": 9.224084730455725e-06, "loss": 1.8458, "step": 28184000 }, { "epoch": 81.58, "learning_rate": 9.223361082808447e-06, "loss": 1.8507, "step": 28184500 }, { "epoch": 81.58, "learning_rate": 9.222637435161171e-06, "loss": 1.8845, "step": 28185000 }, { "epoch": 81.59, "learning_rate": 9.221913787513895e-06, "loss": 1.8669, "step": 28185500 }, { "epoch": 81.59, "learning_rate": 9.221190139866617e-06, "loss": 1.8852, "step": 28186000 }, { "epoch": 81.59, "learning_rate": 9.220466492219341e-06, "loss": 1.838, "step": 28186500 }, { "epoch": 81.59, "learning_rate": 9.219742844572065e-06, "loss": 1.8563, "step": 28187000 }, { "epoch": 81.59, "learning_rate": 9.219019196924787e-06, "loss": 1.833, "step": 28187500 }, { "epoch": 81.59, "learning_rate": 9.21829554927751e-06, "loss": 1.8407, "step": 28188000 }, { "epoch": 81.59, "learning_rate": 9.217573348925528e-06, "loss": 1.8688, "step": 28188500 }, { "epoch": 81.6, "learning_rate": 9.216849701278252e-06, "loss": 1.8478, "step": 28189000 }, { "epoch": 81.6, "learning_rate": 9.216126053630974e-06, "loss": 1.8268, "step": 28189500 }, { "epoch": 81.6, "learning_rate": 9.215402405983697e-06, "loss": 1.8135, "step": 28190000 }, { "epoch": 81.6, "learning_rate": 9.214680205631716e-06, "loss": 1.844, "step": 28190500 }, { "epoch": 81.6, "learning_rate": 9.21395655798444e-06, "loss": 1.8467, "step": 28191000 }, { "epoch": 81.6, "learning_rate": 9.213232910337162e-06, "loss": 1.8294, "step": 28191500 }, { "epoch": 81.6, "learning_rate": 9.212509262689886e-06, "loss": 1.8465, "step": 28192000 }, { "epoch": 81.61, "learning_rate": 9.211787062337903e-06, "loss": 1.8117, "step": 28192500 }, { "epoch": 81.61, "learning_rate": 9.211063414690627e-06, "loss": 1.8809, "step": 28193000 }, { "epoch": 81.61, "learning_rate": 9.210339767043349e-06, "loss": 1.8351, "step": 28193500 }, { "epoch": 81.61, "learning_rate": 9.209616119396073e-06, "loss": 1.8212, "step": 28194000 }, { "epoch": 81.61, "learning_rate": 9.208893919044092e-06, "loss": 1.8661, "step": 28194500 }, { "epoch": 81.61, "learning_rate": 9.208170271396814e-06, "loss": 1.8602, "step": 28195000 }, { "epoch": 81.61, "learning_rate": 9.207446623749536e-06, "loss": 1.8325, "step": 28195500 }, { "epoch": 81.62, "learning_rate": 9.20672297610226e-06, "loss": 1.8239, "step": 28196000 }, { "epoch": 81.62, "learning_rate": 9.205999328454984e-06, "loss": 1.8615, "step": 28196500 }, { "epoch": 81.62, "learning_rate": 9.205275680807707e-06, "loss": 1.8186, "step": 28197000 }, { "epoch": 81.62, "learning_rate": 9.204552033160429e-06, "loss": 1.8704, "step": 28197500 }, { "epoch": 81.62, "learning_rate": 9.203828385513154e-06, "loss": 1.8306, "step": 28198000 }, { "epoch": 81.62, "learning_rate": 9.203104737865877e-06, "loss": 1.8617, "step": 28198500 }, { "epoch": 81.62, "learning_rate": 9.202382537513894e-06, "loss": 1.8647, "step": 28199000 }, { "epoch": 81.63, "learning_rate": 9.201658889866618e-06, "loss": 1.8524, "step": 28199500 }, { "epoch": 81.63, "learning_rate": 9.200936689514637e-06, "loss": 1.833, "step": 28200000 }, { "epoch": 81.63, "learning_rate": 9.200213041867359e-06, "loss": 1.8235, "step": 28200500 }, { "epoch": 81.63, "learning_rate": 9.199489394220081e-06, "loss": 1.8369, "step": 28201000 }, { "epoch": 81.63, "learning_rate": 9.198765746572805e-06, "loss": 1.8494, "step": 28201500 }, { "epoch": 81.63, "learning_rate": 9.198042098925529e-06, "loss": 1.8471, "step": 28202000 }, { "epoch": 81.63, "learning_rate": 9.197319898573546e-06, "loss": 1.825, "step": 28202500 }, { "epoch": 81.64, "learning_rate": 9.196596250926269e-06, "loss": 1.8174, "step": 28203000 }, { "epoch": 81.64, "learning_rate": 9.195872603278992e-06, "loss": 1.8413, "step": 28203500 }, { "epoch": 81.64, "learning_rate": 9.195148955631716e-06, "loss": 1.8144, "step": 28204000 }, { "epoch": 81.64, "learning_rate": 9.194425307984439e-06, "loss": 1.8482, "step": 28204500 }, { "epoch": 81.64, "learning_rate": 9.193701660337163e-06, "loss": 1.818, "step": 28205000 }, { "epoch": 81.64, "learning_rate": 9.192979459985182e-06, "loss": 1.8226, "step": 28205500 }, { "epoch": 81.64, "learning_rate": 9.192255812337904e-06, "loss": 1.8428, "step": 28206000 }, { "epoch": 81.65, "learning_rate": 9.191532164690626e-06, "loss": 1.8289, "step": 28206500 }, { "epoch": 81.65, "learning_rate": 9.19080851704335e-06, "loss": 1.8424, "step": 28207000 }, { "epoch": 81.65, "learning_rate": 9.190086316691369e-06, "loss": 1.8268, "step": 28207500 }, { "epoch": 81.65, "learning_rate": 9.189362669044091e-06, "loss": 1.8342, "step": 28208000 }, { "epoch": 81.65, "learning_rate": 9.188639021396813e-06, "loss": 1.8551, "step": 28208500 }, { "epoch": 81.65, "learning_rate": 9.187915373749537e-06, "loss": 1.8604, "step": 28209000 }, { "epoch": 81.65, "learning_rate": 9.187191726102261e-06, "loss": 1.837, "step": 28209500 }, { "epoch": 81.66, "learning_rate": 9.186468078454983e-06, "loss": 1.8357, "step": 28210000 }, { "epoch": 81.66, "learning_rate": 9.185744430807706e-06, "loss": 1.8831, "step": 28210500 }, { "epoch": 81.66, "learning_rate": 9.185022230455725e-06, "loss": 1.8531, "step": 28211000 }, { "epoch": 81.66, "learning_rate": 9.184298582808449e-06, "loss": 1.8323, "step": 28211500 }, { "epoch": 81.66, "learning_rate": 9.18357493516117e-06, "loss": 1.8546, "step": 28212000 }, { "epoch": 81.66, "learning_rate": 9.182852734809188e-06, "loss": 1.8336, "step": 28212500 }, { "epoch": 81.67, "learning_rate": 9.182129087161914e-06, "loss": 1.8387, "step": 28213000 }, { "epoch": 81.67, "learning_rate": 9.181406886809931e-06, "loss": 1.8396, "step": 28213500 }, { "epoch": 81.67, "learning_rate": 9.180683239162653e-06, "loss": 1.8457, "step": 28214000 }, { "epoch": 81.67, "learning_rate": 9.179959591515375e-06, "loss": 1.8573, "step": 28214500 }, { "epoch": 81.67, "learning_rate": 9.179235943868101e-06, "loss": 1.8571, "step": 28215000 }, { "epoch": 81.67, "learning_rate": 9.178512296220823e-06, "loss": 1.8531, "step": 28215500 }, { "epoch": 81.67, "learning_rate": 9.177788648573545e-06, "loss": 1.8433, "step": 28216000 }, { "epoch": 81.68, "learning_rate": 9.17706500092627e-06, "loss": 1.8498, "step": 28216500 }, { "epoch": 81.68, "learning_rate": 9.176341353278993e-06, "loss": 1.8575, "step": 28217000 }, { "epoch": 81.68, "learning_rate": 9.175617705631716e-06, "loss": 1.8393, "step": 28217500 }, { "epoch": 81.68, "learning_rate": 9.17489405798444e-06, "loss": 1.8376, "step": 28218000 }, { "epoch": 81.68, "learning_rate": 9.174170410337163e-06, "loss": 1.8307, "step": 28218500 }, { "epoch": 81.68, "learning_rate": 9.173446762689886e-06, "loss": 1.8105, "step": 28219000 }, { "epoch": 81.68, "learning_rate": 9.172723115042608e-06, "loss": 1.8574, "step": 28219500 }, { "epoch": 81.69, "learning_rate": 9.171999467395333e-06, "loss": 1.8685, "step": 28220000 }, { "epoch": 81.69, "learning_rate": 9.171275819748056e-06, "loss": 1.8458, "step": 28220500 }, { "epoch": 81.69, "learning_rate": 9.170552172100778e-06, "loss": 1.8253, "step": 28221000 }, { "epoch": 81.69, "learning_rate": 9.169829971748795e-06, "loss": 1.8266, "step": 28221500 }, { "epoch": 81.69, "learning_rate": 9.16910632410152e-06, "loss": 1.8485, "step": 28222000 }, { "epoch": 81.69, "learning_rate": 9.168382676454243e-06, "loss": 1.8596, "step": 28222500 }, { "epoch": 81.69, "learning_rate": 9.167659028806965e-06, "loss": 1.8404, "step": 28223000 }, { "epoch": 81.7, "learning_rate": 9.16693538115969e-06, "loss": 1.8501, "step": 28223500 }, { "epoch": 81.7, "learning_rate": 9.166211733512413e-06, "loss": 1.8343, "step": 28224000 }, { "epoch": 81.7, "learning_rate": 9.16548953316043e-06, "loss": 1.8374, "step": 28224500 }, { "epoch": 81.7, "learning_rate": 9.164765885513153e-06, "loss": 1.8215, "step": 28225000 }, { "epoch": 81.7, "learning_rate": 9.164042237865877e-06, "loss": 1.8611, "step": 28225500 }, { "epoch": 81.7, "learning_rate": 9.1633185902186e-06, "loss": 1.8223, "step": 28226000 }, { "epoch": 81.7, "learning_rate": 9.162596389866618e-06, "loss": 1.849, "step": 28226500 }, { "epoch": 81.71, "learning_rate": 9.161874189514635e-06, "loss": 1.834, "step": 28227000 }, { "epoch": 81.71, "learning_rate": 9.161150541867359e-06, "loss": 1.855, "step": 28227500 }, { "epoch": 81.71, "learning_rate": 9.160426894220083e-06, "loss": 1.8247, "step": 28228000 }, { "epoch": 81.71, "learning_rate": 9.159703246572805e-06, "loss": 1.843, "step": 28228500 }, { "epoch": 81.71, "learning_rate": 9.158979598925527e-06, "loss": 1.8368, "step": 28229000 }, { "epoch": 81.71, "learning_rate": 9.158255951278253e-06, "loss": 1.8529, "step": 28229500 }, { "epoch": 81.71, "learning_rate": 9.157532303630975e-06, "loss": 1.8475, "step": 28230000 }, { "epoch": 81.72, "learning_rate": 9.156808655983697e-06, "loss": 1.8216, "step": 28230500 }, { "epoch": 81.72, "learning_rate": 9.156085008336421e-06, "loss": 1.8424, "step": 28231000 }, { "epoch": 81.72, "learning_rate": 9.15536280798444e-06, "loss": 1.8436, "step": 28231500 }, { "epoch": 81.72, "learning_rate": 9.154639160337162e-06, "loss": 1.8152, "step": 28232000 }, { "epoch": 81.72, "learning_rate": 9.153915512689885e-06, "loss": 1.8499, "step": 28232500 }, { "epoch": 81.72, "learning_rate": 9.153191865042609e-06, "loss": 1.8385, "step": 28233000 }, { "epoch": 81.72, "learning_rate": 9.152468217395333e-06, "loss": 1.8582, "step": 28233500 }, { "epoch": 81.73, "learning_rate": 9.151744569748055e-06, "loss": 1.8458, "step": 28234000 }, { "epoch": 81.73, "learning_rate": 9.151022369396072e-06, "loss": 1.8448, "step": 28234500 }, { "epoch": 81.73, "learning_rate": 9.150298721748796e-06, "loss": 1.8612, "step": 28235000 }, { "epoch": 81.73, "learning_rate": 9.14957507410152e-06, "loss": 1.8224, "step": 28235500 }, { "epoch": 81.73, "learning_rate": 9.148851426454242e-06, "loss": 1.8421, "step": 28236000 }, { "epoch": 81.73, "learning_rate": 9.148129226102261e-06, "loss": 1.8394, "step": 28236500 }, { "epoch": 81.73, "learning_rate": 9.147405578454983e-06, "loss": 1.8295, "step": 28237000 }, { "epoch": 81.74, "learning_rate": 9.146681930807707e-06, "loss": 1.8248, "step": 28237500 }, { "epoch": 81.74, "learning_rate": 9.14595828316043e-06, "loss": 1.8375, "step": 28238000 }, { "epoch": 81.74, "learning_rate": 9.145236082808448e-06, "loss": 1.8097, "step": 28238500 }, { "epoch": 81.74, "learning_rate": 9.144512435161172e-06, "loss": 1.8277, "step": 28239000 }, { "epoch": 81.74, "learning_rate": 9.143788787513895e-06, "loss": 1.8477, "step": 28239500 }, { "epoch": 81.74, "learning_rate": 9.143065139866617e-06, "loss": 1.8364, "step": 28240000 }, { "epoch": 81.74, "learning_rate": 9.14234149221934e-06, "loss": 1.8456, "step": 28240500 }, { "epoch": 81.75, "learning_rate": 9.141617844572065e-06, "loss": 1.8398, "step": 28241000 }, { "epoch": 81.75, "learning_rate": 9.140894196924787e-06, "loss": 1.8407, "step": 28241500 }, { "epoch": 81.75, "learning_rate": 9.140171996572804e-06, "loss": 1.8201, "step": 28242000 }, { "epoch": 81.75, "learning_rate": 9.139448348925528e-06, "loss": 1.8519, "step": 28242500 }, { "epoch": 81.75, "learning_rate": 9.138724701278252e-06, "loss": 1.8229, "step": 28243000 }, { "epoch": 81.75, "learning_rate": 9.13800250092627e-06, "loss": 1.8368, "step": 28243500 }, { "epoch": 81.75, "learning_rate": 9.137278853278993e-06, "loss": 1.8494, "step": 28244000 }, { "epoch": 81.76, "learning_rate": 9.136555205631715e-06, "loss": 1.8452, "step": 28244500 }, { "epoch": 81.76, "learning_rate": 9.13583155798444e-06, "loss": 1.8461, "step": 28245000 }, { "epoch": 81.76, "learning_rate": 9.135107910337162e-06, "loss": 1.8437, "step": 28245500 }, { "epoch": 81.76, "learning_rate": 9.134384262689886e-06, "loss": 1.8588, "step": 28246000 }, { "epoch": 81.76, "learning_rate": 9.13366061504261e-06, "loss": 1.859, "step": 28246500 }, { "epoch": 81.76, "learning_rate": 9.132936967395332e-06, "loss": 1.8402, "step": 28247000 }, { "epoch": 81.76, "learning_rate": 9.132213319748056e-06, "loss": 1.8495, "step": 28247500 }, { "epoch": 81.77, "learning_rate": 9.131489672100778e-06, "loss": 1.8333, "step": 28248000 }, { "epoch": 81.77, "learning_rate": 9.130767471748797e-06, "loss": 1.8344, "step": 28248500 }, { "epoch": 81.77, "learning_rate": 9.130043824101519e-06, "loss": 1.8592, "step": 28249000 }, { "epoch": 81.77, "learning_rate": 9.129320176454243e-06, "loss": 1.8427, "step": 28249500 }, { "epoch": 81.77, "learning_rate": 9.128596528806967e-06, "loss": 1.8724, "step": 28250000 }, { "epoch": 81.77, "learning_rate": 9.127872881159689e-06, "loss": 1.8168, "step": 28250500 }, { "epoch": 81.78, "learning_rate": 9.127149233512413e-06, "loss": 1.8514, "step": 28251000 }, { "epoch": 81.78, "learning_rate": 9.126425585865135e-06, "loss": 1.8499, "step": 28251500 }, { "epoch": 81.78, "learning_rate": 9.12570193821786e-06, "loss": 1.8595, "step": 28252000 }, { "epoch": 81.78, "learning_rate": 9.124979737865876e-06, "loss": 1.8542, "step": 28252500 }, { "epoch": 81.78, "learning_rate": 9.1242560902186e-06, "loss": 1.8522, "step": 28253000 }, { "epoch": 81.78, "learning_rate": 9.123532442571323e-06, "loss": 1.8477, "step": 28253500 }, { "epoch": 81.78, "learning_rate": 9.122808794924047e-06, "loss": 1.8469, "step": 28254000 }, { "epoch": 81.79, "learning_rate": 9.122086594572064e-06, "loss": 1.8539, "step": 28254500 }, { "epoch": 81.79, "learning_rate": 9.121362946924788e-06, "loss": 1.8225, "step": 28255000 }, { "epoch": 81.79, "learning_rate": 9.12063929927751e-06, "loss": 1.8169, "step": 28255500 }, { "epoch": 81.79, "learning_rate": 9.119917098925529e-06, "loss": 1.8638, "step": 28256000 }, { "epoch": 81.79, "learning_rate": 9.119193451278251e-06, "loss": 1.8439, "step": 28256500 }, { "epoch": 81.79, "learning_rate": 9.118469803630975e-06, "loss": 1.8486, "step": 28257000 }, { "epoch": 81.79, "learning_rate": 9.117746155983697e-06, "loss": 1.8453, "step": 28257500 }, { "epoch": 81.8, "learning_rate": 9.117023955631716e-06, "loss": 1.8318, "step": 28258000 }, { "epoch": 81.8, "learning_rate": 9.116300307984438e-06, "loss": 1.8458, "step": 28258500 }, { "epoch": 81.8, "learning_rate": 9.115576660337162e-06, "loss": 1.8744, "step": 28259000 }, { "epoch": 81.8, "learning_rate": 9.114853012689886e-06, "loss": 1.84, "step": 28259500 }, { "epoch": 81.8, "learning_rate": 9.114129365042609e-06, "loss": 1.8733, "step": 28260000 }, { "epoch": 81.8, "learning_rate": 9.113405717395332e-06, "loss": 1.8493, "step": 28260500 }, { "epoch": 81.8, "learning_rate": 9.112682069748055e-06, "loss": 1.8247, "step": 28261000 }, { "epoch": 81.81, "learning_rate": 9.111958422100779e-06, "loss": 1.841, "step": 28261500 }, { "epoch": 81.81, "learning_rate": 9.111236221748796e-06, "loss": 1.8351, "step": 28262000 }, { "epoch": 81.81, "learning_rate": 9.11051257410152e-06, "loss": 1.8245, "step": 28262500 }, { "epoch": 81.81, "learning_rate": 9.109790373749537e-06, "loss": 1.854, "step": 28263000 }, { "epoch": 81.81, "learning_rate": 9.109066726102261e-06, "loss": 1.8365, "step": 28263500 }, { "epoch": 81.81, "learning_rate": 9.108343078454983e-06, "loss": 1.8241, "step": 28264000 }, { "epoch": 81.81, "learning_rate": 9.107619430807707e-06, "loss": 1.8312, "step": 28264500 }, { "epoch": 81.82, "learning_rate": 9.10689578316043e-06, "loss": 1.8161, "step": 28265000 }, { "epoch": 81.82, "learning_rate": 9.106173582808448e-06, "loss": 1.8163, "step": 28265500 }, { "epoch": 81.82, "learning_rate": 9.10544993516117e-06, "loss": 1.8412, "step": 28266000 }, { "epoch": 81.82, "learning_rate": 9.104726287513894e-06, "loss": 1.8258, "step": 28266500 }, { "epoch": 81.82, "learning_rate": 9.104002639866618e-06, "loss": 1.8133, "step": 28267000 }, { "epoch": 81.82, "learning_rate": 9.10327899221934e-06, "loss": 1.8419, "step": 28267500 }, { "epoch": 81.82, "learning_rate": 9.102555344572065e-06, "loss": 1.8614, "step": 28268000 }, { "epoch": 81.83, "learning_rate": 9.101831696924787e-06, "loss": 1.8831, "step": 28268500 }, { "epoch": 81.83, "learning_rate": 9.10110804927751e-06, "loss": 1.8581, "step": 28269000 }, { "epoch": 81.83, "learning_rate": 9.100385848925528e-06, "loss": 1.8434, "step": 28269500 }, { "epoch": 81.83, "learning_rate": 9.099662201278252e-06, "loss": 1.8293, "step": 28270000 }, { "epoch": 81.83, "learning_rate": 9.098938553630974e-06, "loss": 1.8377, "step": 28270500 }, { "epoch": 81.83, "learning_rate": 9.098214905983698e-06, "loss": 1.835, "step": 28271000 }, { "epoch": 81.83, "learning_rate": 9.097492705631715e-06, "loss": 1.8247, "step": 28271500 }, { "epoch": 81.84, "learning_rate": 9.09676905798444e-06, "loss": 1.8758, "step": 28272000 }, { "epoch": 81.84, "learning_rate": 9.096045410337161e-06, "loss": 1.8288, "step": 28272500 }, { "epoch": 81.84, "learning_rate": 9.095321762689885e-06, "loss": 1.8545, "step": 28273000 }, { "epoch": 81.84, "learning_rate": 9.09459811504261e-06, "loss": 1.812, "step": 28273500 }, { "epoch": 81.84, "learning_rate": 9.093874467395332e-06, "loss": 1.8057, "step": 28274000 }, { "epoch": 81.84, "learning_rate": 9.093150819748055e-06, "loss": 1.8396, "step": 28274500 }, { "epoch": 81.84, "learning_rate": 9.09242717210078e-06, "loss": 1.8454, "step": 28275000 }, { "epoch": 81.85, "learning_rate": 9.091704971748797e-06, "loss": 1.8462, "step": 28275500 }, { "epoch": 81.85, "learning_rate": 9.090981324101519e-06, "loss": 1.8549, "step": 28276000 }, { "epoch": 81.85, "learning_rate": 9.090259123749538e-06, "loss": 1.817, "step": 28276500 }, { "epoch": 81.85, "learning_rate": 9.08953547610226e-06, "loss": 1.8305, "step": 28277000 }, { "epoch": 81.85, "learning_rate": 9.088811828454984e-06, "loss": 1.8378, "step": 28277500 }, { "epoch": 81.85, "learning_rate": 9.088089628103001e-06, "loss": 1.8404, "step": 28278000 }, { "epoch": 81.85, "learning_rate": 9.087365980455725e-06, "loss": 1.8471, "step": 28278500 }, { "epoch": 81.86, "learning_rate": 9.086642332808447e-06, "loss": 1.852, "step": 28279000 }, { "epoch": 81.86, "learning_rate": 9.085918685161171e-06, "loss": 1.8631, "step": 28279500 }, { "epoch": 81.86, "learning_rate": 9.085195037513894e-06, "loss": 1.8263, "step": 28280000 }, { "epoch": 81.86, "learning_rate": 9.084471389866618e-06, "loss": 1.8387, "step": 28280500 }, { "epoch": 81.86, "learning_rate": 9.083747742219341e-06, "loss": 1.8508, "step": 28281000 }, { "epoch": 81.86, "learning_rate": 9.083024094572064e-06, "loss": 1.8476, "step": 28281500 }, { "epoch": 81.86, "learning_rate": 9.082300446924788e-06, "loss": 1.8646, "step": 28282000 }, { "epoch": 81.87, "learning_rate": 9.081578246572805e-06, "loss": 1.8499, "step": 28282500 }, { "epoch": 81.87, "learning_rate": 9.080854598925529e-06, "loss": 1.8195, "step": 28283000 }, { "epoch": 81.87, "learning_rate": 9.080130951278251e-06, "loss": 1.8526, "step": 28283500 }, { "epoch": 81.87, "learning_rate": 9.079407303630975e-06, "loss": 1.8453, "step": 28284000 }, { "epoch": 81.87, "learning_rate": 9.078685103278992e-06, "loss": 1.8636, "step": 28284500 }, { "epoch": 81.87, "learning_rate": 9.077962902927011e-06, "loss": 1.8507, "step": 28285000 }, { "epoch": 81.87, "learning_rate": 9.077239255279733e-06, "loss": 1.8317, "step": 28285500 }, { "epoch": 81.88, "learning_rate": 9.07651705492775e-06, "loss": 1.833, "step": 28286000 }, { "epoch": 81.88, "learning_rate": 9.075793407280475e-06, "loss": 1.8364, "step": 28286500 }, { "epoch": 81.88, "learning_rate": 9.075069759633198e-06, "loss": 1.8689, "step": 28287000 }, { "epoch": 81.88, "learning_rate": 9.07434611198592e-06, "loss": 1.8443, "step": 28287500 }, { "epoch": 81.88, "learning_rate": 9.073622464338645e-06, "loss": 1.8421, "step": 28288000 }, { "epoch": 81.88, "learning_rate": 9.072898816691369e-06, "loss": 1.8328, "step": 28288500 }, { "epoch": 81.89, "learning_rate": 9.07217516904409e-06, "loss": 1.8662, "step": 28289000 }, { "epoch": 81.89, "learning_rate": 9.071451521396813e-06, "loss": 1.8335, "step": 28289500 }, { "epoch": 81.89, "learning_rate": 9.070727873749537e-06, "loss": 1.8362, "step": 28290000 }, { "epoch": 81.89, "learning_rate": 9.070004226102261e-06, "loss": 1.859, "step": 28290500 }, { "epoch": 81.89, "learning_rate": 9.069280578454983e-06, "loss": 1.8322, "step": 28291000 }, { "epoch": 81.89, "learning_rate": 9.068556930807707e-06, "loss": 1.8448, "step": 28291500 }, { "epoch": 81.89, "learning_rate": 9.067834730455724e-06, "loss": 1.8566, "step": 28292000 }, { "epoch": 81.9, "learning_rate": 9.067111082808448e-06, "loss": 1.861, "step": 28292500 }, { "epoch": 81.9, "learning_rate": 9.06638743516117e-06, "loss": 1.8364, "step": 28293000 }, { "epoch": 81.9, "learning_rate": 9.065663787513894e-06, "loss": 1.8363, "step": 28293500 }, { "epoch": 81.9, "learning_rate": 9.064943034457207e-06, "loss": 1.8552, "step": 28294000 }, { "epoch": 81.9, "learning_rate": 9.06421938680993e-06, "loss": 1.8652, "step": 28294500 }, { "epoch": 81.9, "learning_rate": 9.063495739162653e-06, "loss": 1.8235, "step": 28295000 }, { "epoch": 81.9, "learning_rate": 9.062772091515377e-06, "loss": 1.8231, "step": 28295500 }, { "epoch": 81.91, "learning_rate": 9.0620484438681e-06, "loss": 1.8249, "step": 28296000 }, { "epoch": 81.91, "learning_rate": 9.061324796220823e-06, "loss": 1.8254, "step": 28296500 }, { "epoch": 81.91, "learning_rate": 9.060601148573545e-06, "loss": 1.8485, "step": 28297000 }, { "epoch": 81.91, "learning_rate": 9.059877500926269e-06, "loss": 1.8474, "step": 28297500 }, { "epoch": 81.91, "learning_rate": 9.059153853278993e-06, "loss": 1.8583, "step": 28298000 }, { "epoch": 81.91, "learning_rate": 9.05843165292701e-06, "loss": 1.8187, "step": 28298500 }, { "epoch": 81.91, "learning_rate": 9.057708005279732e-06, "loss": 1.8745, "step": 28299000 }, { "epoch": 81.92, "learning_rate": 9.056984357632458e-06, "loss": 1.8385, "step": 28299500 }, { "epoch": 81.92, "learning_rate": 9.05626070998518e-06, "loss": 1.8598, "step": 28300000 }, { "epoch": 81.92, "learning_rate": 9.055537062337903e-06, "loss": 1.8598, "step": 28300500 }, { "epoch": 81.92, "learning_rate": 9.054813414690626e-06, "loss": 1.8391, "step": 28301000 }, { "epoch": 81.92, "learning_rate": 9.05408976704335e-06, "loss": 1.8207, "step": 28301500 }, { "epoch": 81.92, "learning_rate": 9.053367566691368e-06, "loss": 1.7934, "step": 28302000 }, { "epoch": 81.92, "learning_rate": 9.05264391904409e-06, "loss": 1.8398, "step": 28302500 }, { "epoch": 81.93, "learning_rate": 9.051920271396814e-06, "loss": 1.8313, "step": 28303000 }, { "epoch": 81.93, "learning_rate": 9.051196623749538e-06, "loss": 1.8424, "step": 28303500 }, { "epoch": 81.93, "learning_rate": 9.05047297610226e-06, "loss": 1.8476, "step": 28304000 }, { "epoch": 81.93, "learning_rate": 9.049750775750277e-06, "loss": 1.8426, "step": 28304500 }, { "epoch": 81.93, "learning_rate": 9.049028575398296e-06, "loss": 1.8268, "step": 28305000 }, { "epoch": 81.93, "learning_rate": 9.048306375046315e-06, "loss": 1.8606, "step": 28305500 }, { "epoch": 81.93, "learning_rate": 9.047582727399037e-06, "loss": 1.8323, "step": 28306000 }, { "epoch": 81.94, "learning_rate": 9.04685907975176e-06, "loss": 1.8476, "step": 28306500 }, { "epoch": 81.94, "learning_rate": 9.046135432104484e-06, "loss": 1.8803, "step": 28307000 }, { "epoch": 81.94, "learning_rate": 9.045411784457207e-06, "loss": 1.8487, "step": 28307500 }, { "epoch": 81.94, "learning_rate": 9.04468813680993e-06, "loss": 1.8295, "step": 28308000 }, { "epoch": 81.94, "learning_rate": 9.043964489162652e-06, "loss": 1.8397, "step": 28308500 }, { "epoch": 81.94, "learning_rate": 9.043240841515378e-06, "loss": 1.8517, "step": 28309000 }, { "epoch": 81.94, "learning_rate": 9.0425171938681e-06, "loss": 1.8334, "step": 28309500 }, { "epoch": 81.95, "learning_rate": 9.041793546220822e-06, "loss": 1.8344, "step": 28310000 }, { "epoch": 81.95, "learning_rate": 9.041069898573546e-06, "loss": 1.8168, "step": 28310500 }, { "epoch": 81.95, "learning_rate": 9.04034625092627e-06, "loss": 1.8311, "step": 28311000 }, { "epoch": 81.95, "learning_rate": 9.039624050574287e-06, "loss": 1.8267, "step": 28311500 }, { "epoch": 81.95, "learning_rate": 9.03890040292701e-06, "loss": 1.8478, "step": 28312000 }, { "epoch": 81.95, "learning_rate": 9.038176755279735e-06, "loss": 1.8192, "step": 28312500 }, { "epoch": 81.95, "learning_rate": 9.037453107632457e-06, "loss": 1.8406, "step": 28313000 }, { "epoch": 81.96, "learning_rate": 9.03672945998518e-06, "loss": 1.8313, "step": 28313500 }, { "epoch": 81.96, "learning_rate": 9.036005812337903e-06, "loss": 1.8393, "step": 28314000 }, { "epoch": 81.96, "learning_rate": 9.035283611985922e-06, "loss": 1.8467, "step": 28314500 }, { "epoch": 81.96, "learning_rate": 9.034559964338645e-06, "loss": 1.8231, "step": 28315000 }, { "epoch": 81.96, "learning_rate": 9.033836316691367e-06, "loss": 1.8596, "step": 28315500 }, { "epoch": 81.96, "learning_rate": 9.033114116339384e-06, "loss": 1.8535, "step": 28316000 }, { "epoch": 81.96, "learning_rate": 9.03239046869211e-06, "loss": 1.8333, "step": 28316500 }, { "epoch": 81.97, "learning_rate": 9.031666821044832e-06, "loss": 1.8363, "step": 28317000 }, { "epoch": 81.97, "learning_rate": 9.030943173397554e-06, "loss": 1.8282, "step": 28317500 }, { "epoch": 81.97, "learning_rate": 9.03021952575028e-06, "loss": 1.8366, "step": 28318000 }, { "epoch": 81.97, "learning_rate": 9.029495878103002e-06, "loss": 1.8243, "step": 28318500 }, { "epoch": 81.97, "learning_rate": 9.028772230455724e-06, "loss": 1.8464, "step": 28319000 }, { "epoch": 81.97, "learning_rate": 9.028050030103741e-06, "loss": 1.8424, "step": 28319500 }, { "epoch": 81.97, "learning_rate": 9.027326382456467e-06, "loss": 1.8303, "step": 28320000 }, { "epoch": 81.98, "learning_rate": 9.02660273480919e-06, "loss": 1.8522, "step": 28320500 }, { "epoch": 81.98, "learning_rate": 9.025879087161912e-06, "loss": 1.8399, "step": 28321000 }, { "epoch": 81.98, "learning_rate": 9.025155439514635e-06, "loss": 1.8399, "step": 28321500 }, { "epoch": 81.98, "learning_rate": 9.02443179186736e-06, "loss": 1.8405, "step": 28322000 }, { "epoch": 81.98, "learning_rate": 9.023709591515377e-06, "loss": 1.8386, "step": 28322500 }, { "epoch": 81.98, "learning_rate": 9.022985943868099e-06, "loss": 1.8381, "step": 28323000 }, { "epoch": 81.98, "learning_rate": 9.022262296220824e-06, "loss": 1.839, "step": 28323500 }, { "epoch": 81.99, "learning_rate": 9.021538648573547e-06, "loss": 1.8397, "step": 28324000 }, { "epoch": 81.99, "learning_rate": 9.020815000926269e-06, "loss": 1.8307, "step": 28324500 }, { "epoch": 81.99, "learning_rate": 9.020091353278993e-06, "loss": 1.8392, "step": 28325000 }, { "epoch": 81.99, "learning_rate": 9.019367705631717e-06, "loss": 1.8508, "step": 28325500 }, { "epoch": 81.99, "learning_rate": 9.018644057984439e-06, "loss": 1.8258, "step": 28326000 }, { "epoch": 81.99, "learning_rate": 9.017921857632456e-06, "loss": 1.8354, "step": 28326500 }, { "epoch": 82.0, "learning_rate": 9.01719820998518e-06, "loss": 1.8423, "step": 28327000 }, { "epoch": 82.0, "learning_rate": 9.016474562337904e-06, "loss": 1.8488, "step": 28327500 }, { "epoch": 82.0, "learning_rate": 9.015750914690626e-06, "loss": 1.8605, "step": 28328000 }, { "epoch": 82.0, "learning_rate": 9.015027267043349e-06, "loss": 1.8234, "step": 28328500 }, { "epoch": 82.0, "eval_accuracy": 0.6864873525969934, "eval_accuracy_mlm": 0.6553302541283902, "eval_accuracy_nsp": 0.8536204186202124, "eval_loss": 2.1995444297790527, "eval_runtime": 331.7228, "eval_samples_per_second": 1315.514, "eval_steps_per_second": 54.814, "step": 28328704 }, { "epoch": 82.0, "learning_rate": 9.014305066691368e-06, "loss": 1.8094, "step": 28329000 }, { "epoch": 82.0, "learning_rate": 9.013581419044091e-06, "loss": 1.8229, "step": 28329500 }, { "epoch": 82.0, "learning_rate": 9.012857771396814e-06, "loss": 1.8183, "step": 28330000 }, { "epoch": 82.01, "learning_rate": 9.012134123749538e-06, "loss": 1.8287, "step": 28330500 }, { "epoch": 82.01, "learning_rate": 9.011410476102262e-06, "loss": 1.8349, "step": 28331000 }, { "epoch": 82.01, "learning_rate": 9.010688275750279e-06, "loss": 1.8124, "step": 28331500 }, { "epoch": 82.01, "learning_rate": 9.009964628103001e-06, "loss": 1.8298, "step": 28332000 }, { "epoch": 82.01, "learning_rate": 9.009240980455725e-06, "loss": 1.8416, "step": 28332500 }, { "epoch": 82.01, "learning_rate": 9.008517332808449e-06, "loss": 1.826, "step": 28333000 }, { "epoch": 82.01, "learning_rate": 9.007793685161171e-06, "loss": 1.8233, "step": 28333500 }, { "epoch": 82.02, "learning_rate": 9.007070037513893e-06, "loss": 1.8308, "step": 28334000 }, { "epoch": 82.02, "learning_rate": 9.006347837161912e-06, "loss": 1.836, "step": 28334500 }, { "epoch": 82.02, "learning_rate": 9.005624189514636e-06, "loss": 1.8416, "step": 28335000 }, { "epoch": 82.02, "learning_rate": 9.004900541867358e-06, "loss": 1.8328, "step": 28335500 }, { "epoch": 82.02, "learning_rate": 9.004176894220082e-06, "loss": 1.7986, "step": 28336000 }, { "epoch": 82.02, "learning_rate": 9.003453246572806e-06, "loss": 1.8578, "step": 28336500 }, { "epoch": 82.02, "learning_rate": 9.002729598925529e-06, "loss": 1.8195, "step": 28337000 }, { "epoch": 82.03, "learning_rate": 9.00200595127825e-06, "loss": 1.8404, "step": 28337500 }, { "epoch": 82.03, "learning_rate": 9.001282303630975e-06, "loss": 1.8299, "step": 28338000 }, { "epoch": 82.03, "learning_rate": 9.000558655983699e-06, "loss": 1.8384, "step": 28338500 }, { "epoch": 82.03, "learning_rate": 8.999836455631716e-06, "loss": 1.8112, "step": 28339000 }, { "epoch": 82.03, "learning_rate": 8.999112807984438e-06, "loss": 1.8198, "step": 28339500 }, { "epoch": 82.03, "learning_rate": 8.998390607632457e-06, "loss": 1.8368, "step": 28340000 }, { "epoch": 82.03, "learning_rate": 8.997666959985181e-06, "loss": 1.8213, "step": 28340500 }, { "epoch": 82.04, "learning_rate": 8.996943312337903e-06, "loss": 1.8182, "step": 28341000 }, { "epoch": 82.04, "learning_rate": 8.996219664690627e-06, "loss": 1.8516, "step": 28341500 }, { "epoch": 82.04, "learning_rate": 8.99549601704335e-06, "loss": 1.8292, "step": 28342000 }, { "epoch": 82.04, "learning_rate": 8.994772369396073e-06, "loss": 1.8201, "step": 28342500 }, { "epoch": 82.04, "learning_rate": 8.994048721748796e-06, "loss": 1.8461, "step": 28343000 }, { "epoch": 82.04, "learning_rate": 8.993326521396815e-06, "loss": 1.8323, "step": 28343500 }, { "epoch": 82.04, "learning_rate": 8.992602873749538e-06, "loss": 1.855, "step": 28344000 }, { "epoch": 82.05, "learning_rate": 8.99187922610226e-06, "loss": 1.8458, "step": 28344500 }, { "epoch": 82.05, "learning_rate": 8.991155578454983e-06, "loss": 1.8231, "step": 28345000 }, { "epoch": 82.05, "learning_rate": 8.990431930807707e-06, "loss": 1.839, "step": 28345500 }, { "epoch": 82.05, "learning_rate": 8.98970828316043e-06, "loss": 1.8244, "step": 28346000 }, { "epoch": 82.05, "learning_rate": 8.988986082808448e-06, "loss": 1.8344, "step": 28346500 }, { "epoch": 82.05, "learning_rate": 8.98826243516117e-06, "loss": 1.8527, "step": 28347000 }, { "epoch": 82.05, "learning_rate": 8.987538787513894e-06, "loss": 1.7969, "step": 28347500 }, { "epoch": 82.06, "learning_rate": 8.986815139866618e-06, "loss": 1.8122, "step": 28348000 }, { "epoch": 82.06, "learning_rate": 8.98609149221934e-06, "loss": 1.8202, "step": 28348500 }, { "epoch": 82.06, "learning_rate": 8.985367844572064e-06, "loss": 1.8187, "step": 28349000 }, { "epoch": 82.06, "learning_rate": 8.984645644220082e-06, "loss": 1.836, "step": 28349500 }, { "epoch": 82.06, "learning_rate": 8.983921996572805e-06, "loss": 1.8301, "step": 28350000 }, { "epoch": 82.06, "learning_rate": 8.983198348925528e-06, "loss": 1.8055, "step": 28350500 }, { "epoch": 82.06, "learning_rate": 8.982474701278252e-06, "loss": 1.8079, "step": 28351000 }, { "epoch": 82.07, "learning_rate": 8.981751053630976e-06, "loss": 1.8473, "step": 28351500 }, { "epoch": 82.07, "learning_rate": 8.981027405983698e-06, "loss": 1.8246, "step": 28352000 }, { "epoch": 82.07, "learning_rate": 8.980303758336422e-06, "loss": 1.815, "step": 28352500 }, { "epoch": 82.07, "learning_rate": 8.979580110689144e-06, "loss": 1.8351, "step": 28353000 }, { "epoch": 82.07, "learning_rate": 8.978856463041868e-06, "loss": 1.8447, "step": 28353500 }, { "epoch": 82.07, "learning_rate": 8.978134262689885e-06, "loss": 1.8552, "step": 28354000 }, { "epoch": 82.07, "learning_rate": 8.977410615042609e-06, "loss": 1.8334, "step": 28354500 }, { "epoch": 82.08, "learning_rate": 8.976686967395333e-06, "loss": 1.8334, "step": 28355000 }, { "epoch": 82.08, "learning_rate": 8.975963319748055e-06, "loss": 1.8317, "step": 28355500 }, { "epoch": 82.08, "learning_rate": 8.975239672100779e-06, "loss": 1.8226, "step": 28356000 }, { "epoch": 82.08, "learning_rate": 8.974516024453501e-06, "loss": 1.8432, "step": 28356500 }, { "epoch": 82.08, "learning_rate": 8.973792376806225e-06, "loss": 1.8221, "step": 28357000 }, { "epoch": 82.08, "learning_rate": 8.973068729158948e-06, "loss": 1.8267, "step": 28357500 }, { "epoch": 82.08, "learning_rate": 8.972346528806966e-06, "loss": 1.8253, "step": 28358000 }, { "epoch": 82.09, "learning_rate": 8.971622881159689e-06, "loss": 1.8295, "step": 28358500 }, { "epoch": 82.09, "learning_rate": 8.970899233512413e-06, "loss": 1.8403, "step": 28359000 }, { "epoch": 82.09, "learning_rate": 8.970175585865135e-06, "loss": 1.8326, "step": 28359500 }, { "epoch": 82.09, "learning_rate": 8.969451938217859e-06, "loss": 1.8384, "step": 28360000 }, { "epoch": 82.09, "learning_rate": 8.968728290570583e-06, "loss": 1.858, "step": 28360500 }, { "epoch": 82.09, "learning_rate": 8.9680060902186e-06, "loss": 1.82, "step": 28361000 }, { "epoch": 82.09, "learning_rate": 8.967282442571324e-06, "loss": 1.8165, "step": 28361500 }, { "epoch": 82.1, "learning_rate": 8.966558794924046e-06, "loss": 1.821, "step": 28362000 }, { "epoch": 82.1, "learning_rate": 8.965836594572063e-06, "loss": 1.8346, "step": 28362500 }, { "epoch": 82.1, "learning_rate": 8.965112946924787e-06, "loss": 1.8624, "step": 28363000 }, { "epoch": 82.1, "learning_rate": 8.964389299277511e-06, "loss": 1.8395, "step": 28363500 }, { "epoch": 82.1, "learning_rate": 8.963665651630233e-06, "loss": 1.8173, "step": 28364000 }, { "epoch": 82.1, "learning_rate": 8.962943451278252e-06, "loss": 1.8391, "step": 28364500 }, { "epoch": 82.11, "learning_rate": 8.962219803630975e-06, "loss": 1.844, "step": 28365000 }, { "epoch": 82.11, "learning_rate": 8.961496155983699e-06, "loss": 1.805, "step": 28365500 }, { "epoch": 82.11, "learning_rate": 8.96077250833642e-06, "loss": 1.8406, "step": 28366000 }, { "epoch": 82.11, "learning_rate": 8.960048860689145e-06, "loss": 1.8572, "step": 28366500 }, { "epoch": 82.11, "learning_rate": 8.959325213041869e-06, "loss": 1.7849, "step": 28367000 }, { "epoch": 82.11, "learning_rate": 8.958601565394591e-06, "loss": 1.8114, "step": 28367500 }, { "epoch": 82.11, "learning_rate": 8.957877917747315e-06, "loss": 1.8321, "step": 28368000 }, { "epoch": 82.12, "learning_rate": 8.957154270100037e-06, "loss": 1.8642, "step": 28368500 }, { "epoch": 82.12, "learning_rate": 8.956430622452761e-06, "loss": 1.8387, "step": 28369000 }, { "epoch": 82.12, "learning_rate": 8.955708422100778e-06, "loss": 1.8207, "step": 28369500 }, { "epoch": 82.12, "learning_rate": 8.954986221748795e-06, "loss": 1.8472, "step": 28370000 }, { "epoch": 82.12, "learning_rate": 8.95426257410152e-06, "loss": 1.837, "step": 28370500 }, { "epoch": 82.12, "learning_rate": 8.953538926454243e-06, "loss": 1.8185, "step": 28371000 }, { "epoch": 82.12, "learning_rate": 8.952815278806966e-06, "loss": 1.8086, "step": 28371500 }, { "epoch": 82.13, "learning_rate": 8.952093078454983e-06, "loss": 1.7901, "step": 28372000 }, { "epoch": 82.13, "learning_rate": 8.951369430807707e-06, "loss": 1.8642, "step": 28372500 }, { "epoch": 82.13, "learning_rate": 8.95064578316043e-06, "loss": 1.8305, "step": 28373000 }, { "epoch": 82.13, "learning_rate": 8.949922135513153e-06, "loss": 1.7914, "step": 28373500 }, { "epoch": 82.13, "learning_rate": 8.949198487865877e-06, "loss": 1.8168, "step": 28374000 }, { "epoch": 82.13, "learning_rate": 8.9484748402186e-06, "loss": 1.8314, "step": 28374500 }, { "epoch": 82.13, "learning_rate": 8.947751192571323e-06, "loss": 1.8413, "step": 28375000 }, { "epoch": 82.14, "learning_rate": 8.947027544924047e-06, "loss": 1.8614, "step": 28375500 }, { "epoch": 82.14, "learning_rate": 8.946303897276769e-06, "loss": 1.8268, "step": 28376000 }, { "epoch": 82.14, "learning_rate": 8.945580249629493e-06, "loss": 1.8589, "step": 28376500 }, { "epoch": 82.14, "learning_rate": 8.944856601982215e-06, "loss": 1.827, "step": 28377000 }, { "epoch": 82.14, "learning_rate": 8.94413295433494e-06, "loss": 1.8204, "step": 28377500 }, { "epoch": 82.14, "learning_rate": 8.943409306687663e-06, "loss": 1.8419, "step": 28378000 }, { "epoch": 82.14, "learning_rate": 8.942685659040385e-06, "loss": 1.8333, "step": 28378500 }, { "epoch": 82.15, "learning_rate": 8.941963458688403e-06, "loss": 1.8561, "step": 28379000 }, { "epoch": 82.15, "learning_rate": 8.941239811041127e-06, "loss": 1.831, "step": 28379500 }, { "epoch": 82.15, "learning_rate": 8.94051616339385e-06, "loss": 1.8327, "step": 28380000 }, { "epoch": 82.15, "learning_rate": 8.939792515746573e-06, "loss": 1.834, "step": 28380500 }, { "epoch": 82.15, "learning_rate": 8.939068868099297e-06, "loss": 1.8123, "step": 28381000 }, { "epoch": 82.15, "learning_rate": 8.93834522045202e-06, "loss": 1.8428, "step": 28381500 }, { "epoch": 82.15, "learning_rate": 8.937621572804743e-06, "loss": 1.8345, "step": 28382000 }, { "epoch": 82.16, "learning_rate": 8.936897925157465e-06, "loss": 1.8369, "step": 28382500 }, { "epoch": 82.16, "learning_rate": 8.936175724805484e-06, "loss": 1.8383, "step": 28383000 }, { "epoch": 82.16, "learning_rate": 8.935452077158208e-06, "loss": 1.8248, "step": 28383500 }, { "epoch": 82.16, "learning_rate": 8.934729876806225e-06, "loss": 1.8567, "step": 28384000 }, { "epoch": 82.16, "learning_rate": 8.934006229158947e-06, "loss": 1.8266, "step": 28384500 }, { "epoch": 82.16, "learning_rate": 8.933282581511671e-06, "loss": 1.8388, "step": 28385000 }, { "epoch": 82.16, "learning_rate": 8.932558933864395e-06, "loss": 1.8287, "step": 28385500 }, { "epoch": 82.17, "learning_rate": 8.931836733512413e-06, "loss": 1.8435, "step": 28386000 }, { "epoch": 82.17, "learning_rate": 8.931113085865135e-06, "loss": 1.8207, "step": 28386500 }, { "epoch": 82.17, "learning_rate": 8.930389438217859e-06, "loss": 1.8461, "step": 28387000 }, { "epoch": 82.17, "learning_rate": 8.929665790570583e-06, "loss": 1.7996, "step": 28387500 }, { "epoch": 82.17, "learning_rate": 8.928942142923305e-06, "loss": 1.8456, "step": 28388000 }, { "epoch": 82.17, "learning_rate": 8.928218495276029e-06, "loss": 1.8543, "step": 28388500 }, { "epoch": 82.17, "learning_rate": 8.927494847628753e-06, "loss": 1.8454, "step": 28389000 }, { "epoch": 82.18, "learning_rate": 8.92677264727677e-06, "loss": 1.823, "step": 28389500 }, { "epoch": 82.18, "learning_rate": 8.926048999629492e-06, "loss": 1.8412, "step": 28390000 }, { "epoch": 82.18, "learning_rate": 8.92532679927751e-06, "loss": 1.8122, "step": 28390500 }, { "epoch": 82.18, "learning_rate": 8.924603151630235e-06, "loss": 1.8248, "step": 28391000 }, { "epoch": 82.18, "learning_rate": 8.923879503982957e-06, "loss": 1.8535, "step": 28391500 }, { "epoch": 82.18, "learning_rate": 8.92315585633568e-06, "loss": 1.8387, "step": 28392000 }, { "epoch": 82.18, "learning_rate": 8.922432208688403e-06, "loss": 1.795, "step": 28392500 }, { "epoch": 82.19, "learning_rate": 8.921708561041127e-06, "loss": 1.8115, "step": 28393000 }, { "epoch": 82.19, "learning_rate": 8.92098491339385e-06, "loss": 1.847, "step": 28393500 }, { "epoch": 82.19, "learning_rate": 8.920262713041867e-06, "loss": 1.8391, "step": 28394000 }, { "epoch": 82.19, "learning_rate": 8.91953906539459e-06, "loss": 1.8369, "step": 28394500 }, { "epoch": 82.19, "learning_rate": 8.918815417747315e-06, "loss": 1.8382, "step": 28395000 }, { "epoch": 82.19, "learning_rate": 8.918091770100037e-06, "loss": 1.8298, "step": 28395500 }, { "epoch": 82.19, "learning_rate": 8.917368122452761e-06, "loss": 1.8342, "step": 28396000 }, { "epoch": 82.2, "learning_rate": 8.916644474805485e-06, "loss": 1.8396, "step": 28396500 }, { "epoch": 82.2, "learning_rate": 8.915920827158207e-06, "loss": 1.8444, "step": 28397000 }, { "epoch": 82.2, "learning_rate": 8.91519717951093e-06, "loss": 1.861, "step": 28397500 }, { "epoch": 82.2, "learning_rate": 8.914474979158948e-06, "loss": 1.8273, "step": 28398000 }, { "epoch": 82.2, "learning_rate": 8.913751331511672e-06, "loss": 1.8684, "step": 28398500 }, { "epoch": 82.2, "learning_rate": 8.913027683864394e-06, "loss": 1.823, "step": 28399000 }, { "epoch": 82.2, "learning_rate": 8.912304036217117e-06, "loss": 1.8353, "step": 28399500 }, { "epoch": 82.21, "learning_rate": 8.911580388569842e-06, "loss": 1.8388, "step": 28400000 }, { "epoch": 82.21, "learning_rate": 8.910856740922564e-06, "loss": 1.816, "step": 28400500 }, { "epoch": 82.21, "learning_rate": 8.910134540570582e-06, "loss": 1.8367, "step": 28401000 }, { "epoch": 82.21, "learning_rate": 8.909410892923304e-06, "loss": 1.8279, "step": 28401500 }, { "epoch": 82.21, "learning_rate": 8.90868724527603e-06, "loss": 1.8667, "step": 28402000 }, { "epoch": 82.21, "learning_rate": 8.907963597628752e-06, "loss": 1.8386, "step": 28402500 }, { "epoch": 82.22, "learning_rate": 8.907239949981474e-06, "loss": 1.8532, "step": 28403000 }, { "epoch": 82.22, "learning_rate": 8.9065163023342e-06, "loss": 1.8288, "step": 28403500 }, { "epoch": 82.22, "learning_rate": 8.905792654686922e-06, "loss": 1.8295, "step": 28404000 }, { "epoch": 82.22, "learning_rate": 8.905069007039644e-06, "loss": 1.8453, "step": 28404500 }, { "epoch": 82.22, "learning_rate": 8.904346806687661e-06, "loss": 1.8428, "step": 28405000 }, { "epoch": 82.22, "learning_rate": 8.903623159040387e-06, "loss": 1.8148, "step": 28405500 }, { "epoch": 82.22, "learning_rate": 8.90289951139311e-06, "loss": 1.8332, "step": 28406000 }, { "epoch": 82.23, "learning_rate": 8.902175863745831e-06, "loss": 1.8385, "step": 28406500 }, { "epoch": 82.23, "learning_rate": 8.901452216098555e-06, "loss": 1.8151, "step": 28407000 }, { "epoch": 82.23, "learning_rate": 8.90072856845128e-06, "loss": 1.8404, "step": 28407500 }, { "epoch": 82.23, "learning_rate": 8.900006368099297e-06, "loss": 1.8559, "step": 28408000 }, { "epoch": 82.23, "learning_rate": 8.899282720452019e-06, "loss": 1.8132, "step": 28408500 }, { "epoch": 82.23, "learning_rate": 8.898559072804743e-06, "loss": 1.8473, "step": 28409000 }, { "epoch": 82.23, "learning_rate": 8.897835425157467e-06, "loss": 1.8785, "step": 28409500 }, { "epoch": 82.24, "learning_rate": 8.897111777510189e-06, "loss": 1.8185, "step": 28410000 }, { "epoch": 82.24, "learning_rate": 8.896389577158206e-06, "loss": 1.8307, "step": 28410500 }, { "epoch": 82.24, "learning_rate": 8.895665929510932e-06, "loss": 1.8532, "step": 28411000 }, { "epoch": 82.24, "learning_rate": 8.894942281863654e-06, "loss": 1.8308, "step": 28411500 }, { "epoch": 82.24, "learning_rate": 8.894218634216376e-06, "loss": 1.8168, "step": 28412000 }, { "epoch": 82.24, "learning_rate": 8.893496433864393e-06, "loss": 1.7987, "step": 28412500 }, { "epoch": 82.24, "learning_rate": 8.892772786217119e-06, "loss": 1.8425, "step": 28413000 }, { "epoch": 82.25, "learning_rate": 8.892049138569841e-06, "loss": 1.8254, "step": 28413500 }, { "epoch": 82.25, "learning_rate": 8.891325490922564e-06, "loss": 1.8316, "step": 28414000 }, { "epoch": 82.25, "learning_rate": 8.89060329057058e-06, "loss": 1.8516, "step": 28414500 }, { "epoch": 82.25, "learning_rate": 8.889879642923306e-06, "loss": 1.8286, "step": 28415000 }, { "epoch": 82.25, "learning_rate": 8.889155995276029e-06, "loss": 1.8485, "step": 28415500 }, { "epoch": 82.25, "learning_rate": 8.888432347628751e-06, "loss": 1.8349, "step": 28416000 }, { "epoch": 82.25, "learning_rate": 8.887708699981477e-06, "loss": 1.8176, "step": 28416500 }, { "epoch": 82.26, "learning_rate": 8.886985052334199e-06, "loss": 1.8285, "step": 28417000 }, { "epoch": 82.26, "learning_rate": 8.886262851982216e-06, "loss": 1.8561, "step": 28417500 }, { "epoch": 82.26, "learning_rate": 8.885539204334938e-06, "loss": 1.8243, "step": 28418000 }, { "epoch": 82.26, "learning_rate": 8.884817003982957e-06, "loss": 1.8061, "step": 28418500 }, { "epoch": 82.26, "learning_rate": 8.884093356335681e-06, "loss": 1.8296, "step": 28419000 }, { "epoch": 82.26, "learning_rate": 8.883369708688403e-06, "loss": 1.8227, "step": 28419500 }, { "epoch": 82.26, "learning_rate": 8.882646061041126e-06, "loss": 1.8628, "step": 28420000 }, { "epoch": 82.27, "learning_rate": 8.881922413393851e-06, "loss": 1.8153, "step": 28420500 }, { "epoch": 82.27, "learning_rate": 8.881198765746573e-06, "loss": 1.861, "step": 28421000 }, { "epoch": 82.27, "learning_rate": 8.880475118099296e-06, "loss": 1.8325, "step": 28421500 }, { "epoch": 82.27, "learning_rate": 8.87975147045202e-06, "loss": 1.8188, "step": 28422000 }, { "epoch": 82.27, "learning_rate": 8.879030717395334e-06, "loss": 1.8517, "step": 28422500 }, { "epoch": 82.27, "learning_rate": 8.878307069748056e-06, "loss": 1.862, "step": 28423000 }, { "epoch": 82.27, "learning_rate": 8.877583422100778e-06, "loss": 1.8475, "step": 28423500 }, { "epoch": 82.28, "learning_rate": 8.876859774453502e-06, "loss": 1.8272, "step": 28424000 }, { "epoch": 82.28, "learning_rate": 8.876136126806226e-06, "loss": 1.8287, "step": 28424500 }, { "epoch": 82.28, "learning_rate": 8.875413926454243e-06, "loss": 1.8199, "step": 28425000 }, { "epoch": 82.28, "learning_rate": 8.874690278806965e-06, "loss": 1.8448, "step": 28425500 }, { "epoch": 82.28, "learning_rate": 8.87396663115969e-06, "loss": 1.8464, "step": 28426000 }, { "epoch": 82.28, "learning_rate": 8.873242983512413e-06, "loss": 1.8324, "step": 28426500 }, { "epoch": 82.28, "learning_rate": 8.872519335865135e-06, "loss": 1.8519, "step": 28427000 }, { "epoch": 82.29, "learning_rate": 8.871795688217858e-06, "loss": 1.8291, "step": 28427500 }, { "epoch": 82.29, "learning_rate": 8.871072040570583e-06, "loss": 1.8158, "step": 28428000 }, { "epoch": 82.29, "learning_rate": 8.8703498402186e-06, "loss": 1.8545, "step": 28428500 }, { "epoch": 82.29, "learning_rate": 8.869626192571323e-06, "loss": 1.8381, "step": 28429000 }, { "epoch": 82.29, "learning_rate": 8.868902544924047e-06, "loss": 1.8284, "step": 28429500 }, { "epoch": 82.29, "learning_rate": 8.86817889727677e-06, "loss": 1.8352, "step": 28430000 }, { "epoch": 82.29, "learning_rate": 8.867455249629493e-06, "loss": 1.8108, "step": 28430500 }, { "epoch": 82.3, "learning_rate": 8.866734496572805e-06, "loss": 1.802, "step": 28431000 }, { "epoch": 82.3, "learning_rate": 8.866010848925527e-06, "loss": 1.8357, "step": 28431500 }, { "epoch": 82.3, "learning_rate": 8.865287201278253e-06, "loss": 1.8097, "step": 28432000 }, { "epoch": 82.3, "learning_rate": 8.86456500092627e-06, "loss": 1.8341, "step": 28432500 }, { "epoch": 82.3, "learning_rate": 8.863841353278992e-06, "loss": 1.8373, "step": 28433000 }, { "epoch": 82.3, "learning_rate": 8.863117705631715e-06, "loss": 1.8142, "step": 28433500 }, { "epoch": 82.3, "learning_rate": 8.86239405798444e-06, "loss": 1.8618, "step": 28434000 }, { "epoch": 82.31, "learning_rate": 8.861671857632458e-06, "loss": 1.863, "step": 28434500 }, { "epoch": 82.31, "learning_rate": 8.86094820998518e-06, "loss": 1.8519, "step": 28435000 }, { "epoch": 82.31, "learning_rate": 8.860224562337904e-06, "loss": 1.8191, "step": 28435500 }, { "epoch": 82.31, "learning_rate": 8.859500914690628e-06, "loss": 1.8158, "step": 28436000 }, { "epoch": 82.31, "learning_rate": 8.85877726704335e-06, "loss": 1.8041, "step": 28436500 }, { "epoch": 82.31, "learning_rate": 8.858053619396072e-06, "loss": 1.8203, "step": 28437000 }, { "epoch": 82.31, "learning_rate": 8.857329971748796e-06, "loss": 1.8218, "step": 28437500 }, { "epoch": 82.32, "learning_rate": 8.85660632410152e-06, "loss": 1.8511, "step": 28438000 }, { "epoch": 82.32, "learning_rate": 8.855882676454242e-06, "loss": 1.8448, "step": 28438500 }, { "epoch": 82.32, "learning_rate": 8.855159028806966e-06, "loss": 1.8203, "step": 28439000 }, { "epoch": 82.32, "learning_rate": 8.85443538115969e-06, "loss": 1.8408, "step": 28439500 }, { "epoch": 82.32, "learning_rate": 8.853711733512412e-06, "loss": 1.8369, "step": 28440000 }, { "epoch": 82.32, "learning_rate": 8.852988085865136e-06, "loss": 1.8273, "step": 28440500 }, { "epoch": 82.33, "learning_rate": 8.852265885513153e-06, "loss": 1.8336, "step": 28441000 }, { "epoch": 82.33, "learning_rate": 8.851542237865877e-06, "loss": 1.8449, "step": 28441500 }, { "epoch": 82.33, "learning_rate": 8.8508185902186e-06, "loss": 1.8333, "step": 28442000 }, { "epoch": 82.33, "learning_rate": 8.850094942571324e-06, "loss": 1.8448, "step": 28442500 }, { "epoch": 82.33, "learning_rate": 8.849371294924047e-06, "loss": 1.8241, "step": 28443000 }, { "epoch": 82.33, "learning_rate": 8.84864764727677e-06, "loss": 1.8554, "step": 28443500 }, { "epoch": 82.33, "learning_rate": 8.847923999629492e-06, "loss": 1.8122, "step": 28444000 }, { "epoch": 82.34, "learning_rate": 8.847200351982216e-06, "loss": 1.816, "step": 28444500 }, { "epoch": 82.34, "learning_rate": 8.84647670433494e-06, "loss": 1.8308, "step": 28445000 }, { "epoch": 82.34, "learning_rate": 8.845753056687662e-06, "loss": 1.8279, "step": 28445500 }, { "epoch": 82.34, "learning_rate": 8.84503085633568e-06, "loss": 1.8299, "step": 28446000 }, { "epoch": 82.34, "learning_rate": 8.844307208688403e-06, "loss": 1.8352, "step": 28446500 }, { "epoch": 82.34, "learning_rate": 8.843583561041127e-06, "loss": 1.8335, "step": 28447000 }, { "epoch": 82.34, "learning_rate": 8.84285991339385e-06, "loss": 1.8617, "step": 28447500 }, { "epoch": 82.35, "learning_rate": 8.842136265746573e-06, "loss": 1.8576, "step": 28448000 }, { "epoch": 82.35, "learning_rate": 8.84141406539459e-06, "loss": 1.8301, "step": 28448500 }, { "epoch": 82.35, "learning_rate": 8.840690417747314e-06, "loss": 1.8252, "step": 28449000 }, { "epoch": 82.35, "learning_rate": 8.839966770100037e-06, "loss": 1.8283, "step": 28449500 }, { "epoch": 82.35, "learning_rate": 8.83924312245276e-06, "loss": 1.841, "step": 28450000 }, { "epoch": 82.35, "learning_rate": 8.838520922100778e-06, "loss": 1.8472, "step": 28450500 }, { "epoch": 82.35, "learning_rate": 8.837797274453502e-06, "loss": 1.8331, "step": 28451000 }, { "epoch": 82.36, "learning_rate": 8.837073626806224e-06, "loss": 1.8177, "step": 28451500 }, { "epoch": 82.36, "learning_rate": 8.836349979158948e-06, "loss": 1.8439, "step": 28452000 }, { "epoch": 82.36, "learning_rate": 8.835626331511672e-06, "loss": 1.8496, "step": 28452500 }, { "epoch": 82.36, "learning_rate": 8.834902683864394e-06, "loss": 1.8568, "step": 28453000 }, { "epoch": 82.36, "learning_rate": 8.834179036217118e-06, "loss": 1.8262, "step": 28453500 }, { "epoch": 82.36, "learning_rate": 8.833456835865135e-06, "loss": 1.8523, "step": 28454000 }, { "epoch": 82.36, "learning_rate": 8.83273318821786e-06, "loss": 1.8232, "step": 28454500 }, { "epoch": 82.37, "learning_rate": 8.832009540570581e-06, "loss": 1.8563, "step": 28455000 }, { "epoch": 82.37, "learning_rate": 8.831285892923305e-06, "loss": 1.8057, "step": 28455500 }, { "epoch": 82.37, "learning_rate": 8.83056224527603e-06, "loss": 1.8408, "step": 28456000 }, { "epoch": 82.37, "learning_rate": 8.829838597628752e-06, "loss": 1.825, "step": 28456500 }, { "epoch": 82.37, "learning_rate": 8.829114949981476e-06, "loss": 1.8363, "step": 28457000 }, { "epoch": 82.37, "learning_rate": 8.828391302334198e-06, "loss": 1.8466, "step": 28457500 }, { "epoch": 82.37, "learning_rate": 8.827669101982217e-06, "loss": 1.8512, "step": 28458000 }, { "epoch": 82.38, "learning_rate": 8.826945454334939e-06, "loss": 1.8244, "step": 28458500 }, { "epoch": 82.38, "learning_rate": 8.826221806687663e-06, "loss": 1.8511, "step": 28459000 }, { "epoch": 82.38, "learning_rate": 8.825498159040385e-06, "loss": 1.8421, "step": 28459500 }, { "epoch": 82.38, "learning_rate": 8.824774511393109e-06, "loss": 1.8404, "step": 28460000 }, { "epoch": 82.38, "learning_rate": 8.824050863745833e-06, "loss": 1.858, "step": 28460500 }, { "epoch": 82.38, "learning_rate": 8.823327216098555e-06, "loss": 1.8502, "step": 28461000 }, { "epoch": 82.38, "learning_rate": 8.822605015746572e-06, "loss": 1.85, "step": 28461500 }, { "epoch": 82.39, "learning_rate": 8.821881368099296e-06, "loss": 1.8663, "step": 28462000 }, { "epoch": 82.39, "learning_rate": 8.82115772045202e-06, "loss": 1.8598, "step": 28462500 }, { "epoch": 82.39, "learning_rate": 8.820434072804743e-06, "loss": 1.8418, "step": 28463000 }, { "epoch": 82.39, "learning_rate": 8.819710425157466e-06, "loss": 1.8241, "step": 28463500 }, { "epoch": 82.39, "learning_rate": 8.818986777510189e-06, "loss": 1.8538, "step": 28464000 }, { "epoch": 82.39, "learning_rate": 8.818263129862913e-06, "loss": 1.8412, "step": 28464500 }, { "epoch": 82.39, "learning_rate": 8.817539482215637e-06, "loss": 1.8522, "step": 28465000 }, { "epoch": 82.4, "learning_rate": 8.816815834568359e-06, "loss": 1.8372, "step": 28465500 }, { "epoch": 82.4, "learning_rate": 8.816093634216378e-06, "loss": 1.8607, "step": 28466000 }, { "epoch": 82.4, "learning_rate": 8.8153699865691e-06, "loss": 1.8265, "step": 28466500 }, { "epoch": 82.4, "learning_rate": 8.814647786217117e-06, "loss": 1.8275, "step": 28467000 }, { "epoch": 82.4, "learning_rate": 8.813924138569841e-06, "loss": 1.8367, "step": 28467500 }, { "epoch": 82.4, "learning_rate": 8.813200490922565e-06, "loss": 1.8409, "step": 28468000 }, { "epoch": 82.4, "learning_rate": 8.812476843275287e-06, "loss": 1.8286, "step": 28468500 }, { "epoch": 82.41, "learning_rate": 8.811753195628011e-06, "loss": 1.8201, "step": 28469000 }, { "epoch": 82.41, "learning_rate": 8.811032442571323e-06, "loss": 1.8408, "step": 28469500 }, { "epoch": 82.41, "learning_rate": 8.810308794924046e-06, "loss": 1.8568, "step": 28470000 }, { "epoch": 82.41, "learning_rate": 8.80958514727677e-06, "loss": 1.8305, "step": 28470500 }, { "epoch": 82.41, "learning_rate": 8.808861499629492e-06, "loss": 1.8308, "step": 28471000 }, { "epoch": 82.41, "learning_rate": 8.808137851982216e-06, "loss": 1.8349, "step": 28471500 }, { "epoch": 82.41, "learning_rate": 8.80741420433494e-06, "loss": 1.8335, "step": 28472000 }, { "epoch": 82.42, "learning_rate": 8.806690556687662e-06, "loss": 1.846, "step": 28472500 }, { "epoch": 82.42, "learning_rate": 8.805966909040386e-06, "loss": 1.8307, "step": 28473000 }, { "epoch": 82.42, "learning_rate": 8.80524326139311e-06, "loss": 1.8195, "step": 28473500 }, { "epoch": 82.42, "learning_rate": 8.804521061041127e-06, "loss": 1.8118, "step": 28474000 }, { "epoch": 82.42, "learning_rate": 8.80379741339385e-06, "loss": 1.8352, "step": 28474500 }, { "epoch": 82.42, "learning_rate": 8.803073765746573e-06, "loss": 1.8719, "step": 28475000 }, { "epoch": 82.42, "learning_rate": 8.802350118099297e-06, "loss": 1.8515, "step": 28475500 }, { "epoch": 82.43, "learning_rate": 8.801627917747314e-06, "loss": 1.8392, "step": 28476000 }, { "epoch": 82.43, "learning_rate": 8.800904270100037e-06, "loss": 1.8293, "step": 28476500 }, { "epoch": 82.43, "learning_rate": 8.80018062245276e-06, "loss": 1.8371, "step": 28477000 }, { "epoch": 82.43, "learning_rate": 8.799456974805484e-06, "loss": 1.8333, "step": 28477500 }, { "epoch": 82.43, "learning_rate": 8.798734774453502e-06, "loss": 1.8115, "step": 28478000 }, { "epoch": 82.43, "learning_rate": 8.798011126806224e-06, "loss": 1.8524, "step": 28478500 }, { "epoch": 82.44, "learning_rate": 8.797287479158948e-06, "loss": 1.8574, "step": 28479000 }, { "epoch": 82.44, "learning_rate": 8.796563831511672e-06, "loss": 1.8229, "step": 28479500 }, { "epoch": 82.44, "learning_rate": 8.795840183864394e-06, "loss": 1.799, "step": 28480000 }, { "epoch": 82.44, "learning_rate": 8.795117983512413e-06, "loss": 1.8311, "step": 28480500 }, { "epoch": 82.44, "learning_rate": 8.794394335865135e-06, "loss": 1.8428, "step": 28481000 }, { "epoch": 82.44, "learning_rate": 8.793670688217859e-06, "loss": 1.8075, "step": 28481500 }, { "epoch": 82.44, "learning_rate": 8.792947040570581e-06, "loss": 1.8206, "step": 28482000 }, { "epoch": 82.45, "learning_rate": 8.792223392923305e-06, "loss": 1.8424, "step": 28482500 }, { "epoch": 82.45, "learning_rate": 8.791501192571323e-06, "loss": 1.8316, "step": 28483000 }, { "epoch": 82.45, "learning_rate": 8.790777544924046e-06, "loss": 1.8436, "step": 28483500 }, { "epoch": 82.45, "learning_rate": 8.790053897276769e-06, "loss": 1.8273, "step": 28484000 }, { "epoch": 82.45, "learning_rate": 8.789331696924788e-06, "loss": 1.8626, "step": 28484500 }, { "epoch": 82.45, "learning_rate": 8.788608049277512e-06, "loss": 1.8164, "step": 28485000 }, { "epoch": 82.45, "learning_rate": 8.787884401630234e-06, "loss": 1.8555, "step": 28485500 }, { "epoch": 82.46, "learning_rate": 8.787160753982956e-06, "loss": 1.8413, "step": 28486000 }, { "epoch": 82.46, "learning_rate": 8.78643710633568e-06, "loss": 1.8281, "step": 28486500 }, { "epoch": 82.46, "learning_rate": 8.785713458688404e-06, "loss": 1.8542, "step": 28487000 }, { "epoch": 82.46, "learning_rate": 8.784989811041126e-06, "loss": 1.8451, "step": 28487500 }, { "epoch": 82.46, "learning_rate": 8.78426616339385e-06, "loss": 1.8503, "step": 28488000 }, { "epoch": 82.46, "learning_rate": 8.783543963041867e-06, "loss": 1.8154, "step": 28488500 }, { "epoch": 82.46, "learning_rate": 8.782820315394591e-06, "loss": 1.8484, "step": 28489000 }, { "epoch": 82.47, "learning_rate": 8.782098115042609e-06, "loss": 1.8437, "step": 28489500 }, { "epoch": 82.47, "learning_rate": 8.781374467395332e-06, "loss": 1.8479, "step": 28490000 }, { "epoch": 82.47, "learning_rate": 8.780650819748056e-06, "loss": 1.8174, "step": 28490500 }, { "epoch": 82.47, "learning_rate": 8.779927172100779e-06, "loss": 1.8364, "step": 28491000 }, { "epoch": 82.47, "learning_rate": 8.779204971748796e-06, "loss": 1.8256, "step": 28491500 }, { "epoch": 82.47, "learning_rate": 8.77848132410152e-06, "loss": 1.8303, "step": 28492000 }, { "epoch": 82.47, "learning_rate": 8.777759123749537e-06, "loss": 1.8445, "step": 28492500 }, { "epoch": 82.48, "learning_rate": 8.777035476102261e-06, "loss": 1.8442, "step": 28493000 }, { "epoch": 82.48, "learning_rate": 8.776311828454983e-06, "loss": 1.816, "step": 28493500 }, { "epoch": 82.48, "learning_rate": 8.775588180807707e-06, "loss": 1.8397, "step": 28494000 }, { "epoch": 82.48, "learning_rate": 8.774864533160431e-06, "loss": 1.8031, "step": 28494500 }, { "epoch": 82.48, "learning_rate": 8.774140885513153e-06, "loss": 1.8424, "step": 28495000 }, { "epoch": 82.48, "learning_rate": 8.773417237865876e-06, "loss": 1.8462, "step": 28495500 }, { "epoch": 82.48, "learning_rate": 8.7726935902186e-06, "loss": 1.8433, "step": 28496000 }, { "epoch": 82.49, "learning_rate": 8.771969942571323e-06, "loss": 1.8394, "step": 28496500 }, { "epoch": 82.49, "learning_rate": 8.771246294924046e-06, "loss": 1.8478, "step": 28497000 }, { "epoch": 82.49, "learning_rate": 8.77052264727677e-06, "loss": 1.8345, "step": 28497500 }, { "epoch": 82.49, "learning_rate": 8.769798999629493e-06, "loss": 1.8478, "step": 28498000 }, { "epoch": 82.49, "learning_rate": 8.769075351982216e-06, "loss": 1.8205, "step": 28498500 }, { "epoch": 82.49, "learning_rate": 8.768353151630233e-06, "loss": 1.8541, "step": 28499000 }, { "epoch": 82.49, "learning_rate": 8.767629503982957e-06, "loss": 1.8373, "step": 28499500 }, { "epoch": 82.5, "learning_rate": 8.76690585633568e-06, "loss": 1.8243, "step": 28500000 }, { "epoch": 82.5, "learning_rate": 8.766183655983698e-06, "loss": 1.8379, "step": 28500500 }, { "epoch": 82.5, "learning_rate": 8.76546000833642e-06, "loss": 1.8287, "step": 28501000 }, { "epoch": 82.5, "learning_rate": 8.764736360689144e-06, "loss": 1.8368, "step": 28501500 }, { "epoch": 82.5, "learning_rate": 8.764012713041868e-06, "loss": 1.8371, "step": 28502000 }, { "epoch": 82.5, "learning_rate": 8.76328906539459e-06, "loss": 1.8287, "step": 28502500 }, { "epoch": 82.5, "learning_rate": 8.762565417747314e-06, "loss": 1.7944, "step": 28503000 }, { "epoch": 82.51, "learning_rate": 8.761841770100038e-06, "loss": 1.8361, "step": 28503500 }, { "epoch": 82.51, "learning_rate": 8.76111812245276e-06, "loss": 1.8452, "step": 28504000 }, { "epoch": 82.51, "learning_rate": 8.760394474805483e-06, "loss": 1.8374, "step": 28504500 }, { "epoch": 82.51, "learning_rate": 8.759672274453502e-06, "loss": 1.8383, "step": 28505000 }, { "epoch": 82.51, "learning_rate": 8.75895007410152e-06, "loss": 1.8206, "step": 28505500 }, { "epoch": 82.51, "learning_rate": 8.758226426454243e-06, "loss": 1.8102, "step": 28506000 }, { "epoch": 82.51, "learning_rate": 8.757502778806965e-06, "loss": 1.8268, "step": 28506500 }, { "epoch": 82.52, "learning_rate": 8.756779131159689e-06, "loss": 1.8314, "step": 28507000 }, { "epoch": 82.52, "learning_rate": 8.756055483512413e-06, "loss": 1.8186, "step": 28507500 }, { "epoch": 82.52, "learning_rate": 8.755331835865135e-06, "loss": 1.8387, "step": 28508000 }, { "epoch": 82.52, "learning_rate": 8.754608188217859e-06, "loss": 1.7927, "step": 28508500 }, { "epoch": 82.52, "learning_rate": 8.753884540570583e-06, "loss": 1.8516, "step": 28509000 }, { "epoch": 82.52, "learning_rate": 8.753160892923305e-06, "loss": 1.8235, "step": 28509500 }, { "epoch": 82.52, "learning_rate": 8.752438692571322e-06, "loss": 1.8465, "step": 28510000 }, { "epoch": 82.53, "learning_rate": 8.751715044924046e-06, "loss": 1.8421, "step": 28510500 }, { "epoch": 82.53, "learning_rate": 8.750992844572065e-06, "loss": 1.8027, "step": 28511000 }, { "epoch": 82.53, "learning_rate": 8.750270644220083e-06, "loss": 1.8222, "step": 28511500 }, { "epoch": 82.53, "learning_rate": 8.749546996572805e-06, "loss": 1.8179, "step": 28512000 }, { "epoch": 82.53, "learning_rate": 8.748823348925527e-06, "loss": 1.8419, "step": 28512500 }, { "epoch": 82.53, "learning_rate": 8.748099701278253e-06, "loss": 1.8399, "step": 28513000 }, { "epoch": 82.53, "learning_rate": 8.747376053630975e-06, "loss": 1.8485, "step": 28513500 }, { "epoch": 82.54, "learning_rate": 8.746652405983697e-06, "loss": 1.8619, "step": 28514000 }, { "epoch": 82.54, "learning_rate": 8.745928758336421e-06, "loss": 1.8501, "step": 28514500 }, { "epoch": 82.54, "learning_rate": 8.745205110689145e-06, "loss": 1.8676, "step": 28515000 }, { "epoch": 82.54, "learning_rate": 8.744481463041867e-06, "loss": 1.8439, "step": 28515500 }, { "epoch": 82.54, "learning_rate": 8.743757815394591e-06, "loss": 1.8407, "step": 28516000 }, { "epoch": 82.54, "learning_rate": 8.74303561504261e-06, "loss": 1.8367, "step": 28516500 }, { "epoch": 82.55, "learning_rate": 8.742311967395332e-06, "loss": 1.8405, "step": 28517000 }, { "epoch": 82.55, "learning_rate": 8.741588319748055e-06, "loss": 1.8534, "step": 28517500 }, { "epoch": 82.55, "learning_rate": 8.740864672100778e-06, "loss": 1.8075, "step": 28518000 }, { "epoch": 82.55, "learning_rate": 8.740141024453502e-06, "loss": 1.8136, "step": 28518500 }, { "epoch": 82.55, "learning_rate": 8.739417376806225e-06, "loss": 1.8403, "step": 28519000 }, { "epoch": 82.55, "learning_rate": 8.738693729158947e-06, "loss": 1.8307, "step": 28519500 }, { "epoch": 82.55, "learning_rate": 8.737971528806966e-06, "loss": 1.8365, "step": 28520000 }, { "epoch": 82.56, "learning_rate": 8.73724788115969e-06, "loss": 1.8427, "step": 28520500 }, { "epoch": 82.56, "learning_rate": 8.736524233512412e-06, "loss": 1.8349, "step": 28521000 }, { "epoch": 82.56, "learning_rate": 8.735800585865136e-06, "loss": 1.8179, "step": 28521500 }, { "epoch": 82.56, "learning_rate": 8.735078385513155e-06, "loss": 1.8109, "step": 28522000 }, { "epoch": 82.56, "learning_rate": 8.734354737865877e-06, "loss": 1.8189, "step": 28522500 }, { "epoch": 82.56, "learning_rate": 8.7336310902186e-06, "loss": 1.831, "step": 28523000 }, { "epoch": 82.56, "learning_rate": 8.732907442571323e-06, "loss": 1.8366, "step": 28523500 }, { "epoch": 82.57, "learning_rate": 8.732183794924047e-06, "loss": 1.847, "step": 28524000 }, { "epoch": 82.57, "learning_rate": 8.73146014727677e-06, "loss": 1.8394, "step": 28524500 }, { "epoch": 82.57, "learning_rate": 8.730736499629492e-06, "loss": 1.8456, "step": 28525000 }, { "epoch": 82.57, "learning_rate": 8.730012851982217e-06, "loss": 1.816, "step": 28525500 }, { "epoch": 82.57, "learning_rate": 8.72928920433494e-06, "loss": 1.8299, "step": 28526000 }, { "epoch": 82.57, "learning_rate": 8.728565556687662e-06, "loss": 1.8226, "step": 28526500 }, { "epoch": 82.57, "learning_rate": 8.727841909040386e-06, "loss": 1.8227, "step": 28527000 }, { "epoch": 82.58, "learning_rate": 8.72711826139311e-06, "loss": 1.8165, "step": 28527500 }, { "epoch": 82.58, "learning_rate": 8.726394613745832e-06, "loss": 1.8296, "step": 28528000 }, { "epoch": 82.58, "learning_rate": 8.725670966098556e-06, "loss": 1.8183, "step": 28528500 }, { "epoch": 82.58, "learning_rate": 8.724948765746573e-06, "loss": 1.8031, "step": 28529000 }, { "epoch": 82.58, "learning_rate": 8.724225118099297e-06, "loss": 1.8449, "step": 28529500 }, { "epoch": 82.58, "learning_rate": 8.72350147045202e-06, "loss": 1.8395, "step": 28530000 }, { "epoch": 82.58, "learning_rate": 8.722777822804743e-06, "loss": 1.8282, "step": 28530500 }, { "epoch": 82.59, "learning_rate": 8.722054175157467e-06, "loss": 1.8305, "step": 28531000 }, { "epoch": 82.59, "learning_rate": 8.72133052751019e-06, "loss": 1.8281, "step": 28531500 }, { "epoch": 82.59, "learning_rate": 8.720606879862911e-06, "loss": 1.8442, "step": 28532000 }, { "epoch": 82.59, "learning_rate": 8.719883232215637e-06, "loss": 1.8219, "step": 28532500 }, { "epoch": 82.59, "learning_rate": 8.719161031863654e-06, "loss": 1.8231, "step": 28533000 }, { "epoch": 82.59, "learning_rate": 8.718437384216377e-06, "loss": 1.8529, "step": 28533500 }, { "epoch": 82.59, "learning_rate": 8.7177137365691e-06, "loss": 1.8459, "step": 28534000 }, { "epoch": 82.6, "learning_rate": 8.716990088921824e-06, "loss": 1.8044, "step": 28534500 }, { "epoch": 82.6, "learning_rate": 8.716266441274547e-06, "loss": 1.8152, "step": 28535000 }, { "epoch": 82.6, "learning_rate": 8.715544240922564e-06, "loss": 1.8232, "step": 28535500 }, { "epoch": 82.6, "learning_rate": 8.714820593275288e-06, "loss": 1.8242, "step": 28536000 }, { "epoch": 82.6, "learning_rate": 8.714098392923305e-06, "loss": 1.8619, "step": 28536500 }, { "epoch": 82.6, "learning_rate": 8.713374745276029e-06, "loss": 1.8276, "step": 28537000 }, { "epoch": 82.6, "learning_rate": 8.712651097628751e-06, "loss": 1.8454, "step": 28537500 }, { "epoch": 82.61, "learning_rate": 8.711927449981475e-06, "loss": 1.8285, "step": 28538000 }, { "epoch": 82.61, "learning_rate": 8.711203802334199e-06, "loss": 1.8609, "step": 28538500 }, { "epoch": 82.61, "learning_rate": 8.710480154686921e-06, "loss": 1.8231, "step": 28539000 }, { "epoch": 82.61, "learning_rate": 8.709756507039645e-06, "loss": 1.8181, "step": 28539500 }, { "epoch": 82.61, "learning_rate": 8.709034306687663e-06, "loss": 1.8437, "step": 28540000 }, { "epoch": 82.61, "learning_rate": 8.708310659040386e-06, "loss": 1.8545, "step": 28540500 }, { "epoch": 82.61, "learning_rate": 8.707587011393109e-06, "loss": 1.8429, "step": 28541000 }, { "epoch": 82.62, "learning_rate": 8.706863363745833e-06, "loss": 1.8214, "step": 28541500 }, { "epoch": 82.62, "learning_rate": 8.706139716098557e-06, "loss": 1.8128, "step": 28542000 }, { "epoch": 82.62, "learning_rate": 8.705416068451279e-06, "loss": 1.8225, "step": 28542500 }, { "epoch": 82.62, "learning_rate": 8.704693868099296e-06, "loss": 1.8303, "step": 28543000 }, { "epoch": 82.62, "learning_rate": 8.70397022045202e-06, "loss": 1.841, "step": 28543500 }, { "epoch": 82.62, "learning_rate": 8.703246572804744e-06, "loss": 1.8157, "step": 28544000 }, { "epoch": 82.62, "learning_rate": 8.702522925157466e-06, "loss": 1.8306, "step": 28544500 }, { "epoch": 82.63, "learning_rate": 8.701799277510188e-06, "loss": 1.8594, "step": 28545000 }, { "epoch": 82.63, "learning_rate": 8.701077077158207e-06, "loss": 1.8155, "step": 28545500 }, { "epoch": 82.63, "learning_rate": 8.700353429510931e-06, "loss": 1.8173, "step": 28546000 }, { "epoch": 82.63, "learning_rate": 8.699629781863653e-06, "loss": 1.8363, "step": 28546500 }, { "epoch": 82.63, "learning_rate": 8.698906134216377e-06, "loss": 1.8297, "step": 28547000 }, { "epoch": 82.63, "learning_rate": 8.6981824865691e-06, "loss": 1.8253, "step": 28547500 }, { "epoch": 82.63, "learning_rate": 8.697458838921824e-06, "loss": 1.7961, "step": 28548000 }, { "epoch": 82.64, "learning_rate": 8.696735191274546e-06, "loss": 1.8393, "step": 28548500 }, { "epoch": 82.64, "learning_rate": 8.69601154362727e-06, "loss": 1.8364, "step": 28549000 }, { "epoch": 82.64, "learning_rate": 8.695289343275287e-06, "loss": 1.828, "step": 28549500 }, { "epoch": 82.64, "learning_rate": 8.694565695628011e-06, "loss": 1.8518, "step": 28550000 }, { "epoch": 82.64, "learning_rate": 8.693842047980733e-06, "loss": 1.8446, "step": 28550500 }, { "epoch": 82.64, "learning_rate": 8.693118400333457e-06, "loss": 1.8156, "step": 28551000 }, { "epoch": 82.64, "learning_rate": 8.692394752686181e-06, "loss": 1.8251, "step": 28551500 }, { "epoch": 82.65, "learning_rate": 8.691671105038903e-06, "loss": 1.8234, "step": 28552000 }, { "epoch": 82.65, "learning_rate": 8.690947457391627e-06, "loss": 1.8538, "step": 28552500 }, { "epoch": 82.65, "learning_rate": 8.690223809744351e-06, "loss": 1.8602, "step": 28553000 }, { "epoch": 82.65, "learning_rate": 8.689500162097073e-06, "loss": 1.8106, "step": 28553500 }, { "epoch": 82.65, "learning_rate": 8.68877796174509e-06, "loss": 1.8198, "step": 28554000 }, { "epoch": 82.65, "learning_rate": 8.688054314097814e-06, "loss": 1.841, "step": 28554500 }, { "epoch": 82.66, "learning_rate": 8.687330666450538e-06, "loss": 1.823, "step": 28555000 }, { "epoch": 82.66, "learning_rate": 8.68660701880326e-06, "loss": 1.8647, "step": 28555500 }, { "epoch": 82.66, "learning_rate": 8.685884818451278e-06, "loss": 1.8278, "step": 28556000 }, { "epoch": 82.66, "learning_rate": 8.685161170804002e-06, "loss": 1.8142, "step": 28556500 }, { "epoch": 82.66, "learning_rate": 8.684437523156726e-06, "loss": 1.8356, "step": 28557000 }, { "epoch": 82.66, "learning_rate": 8.683713875509448e-06, "loss": 1.8445, "step": 28557500 }, { "epoch": 82.66, "learning_rate": 8.682990227862172e-06, "loss": 1.8275, "step": 28558000 }, { "epoch": 82.67, "learning_rate": 8.682266580214894e-06, "loss": 1.8526, "step": 28558500 }, { "epoch": 82.67, "learning_rate": 8.681542932567618e-06, "loss": 1.8191, "step": 28559000 }, { "epoch": 82.67, "learning_rate": 8.680820732215635e-06, "loss": 1.8141, "step": 28559500 }, { "epoch": 82.67, "learning_rate": 8.68009708456836e-06, "loss": 1.8263, "step": 28560000 }, { "epoch": 82.67, "learning_rate": 8.679373436921081e-06, "loss": 1.8202, "step": 28560500 }, { "epoch": 82.67, "learning_rate": 8.678649789273805e-06, "loss": 1.8357, "step": 28561000 }, { "epoch": 82.67, "learning_rate": 8.67792614162653e-06, "loss": 1.8353, "step": 28561500 }, { "epoch": 82.68, "learning_rate": 8.677202493979252e-06, "loss": 1.8326, "step": 28562000 }, { "epoch": 82.68, "learning_rate": 8.676478846331975e-06, "loss": 1.8387, "step": 28562500 }, { "epoch": 82.68, "learning_rate": 8.675755198684698e-06, "loss": 1.8764, "step": 28563000 }, { "epoch": 82.68, "learning_rate": 8.675031551037422e-06, "loss": 1.8278, "step": 28563500 }, { "epoch": 82.68, "learning_rate": 8.674309350685439e-06, "loss": 1.8182, "step": 28564000 }, { "epoch": 82.68, "learning_rate": 8.673585703038163e-06, "loss": 1.8547, "step": 28564500 }, { "epoch": 82.68, "learning_rate": 8.672862055390887e-06, "loss": 1.8378, "step": 28565000 }, { "epoch": 82.69, "learning_rate": 8.672138407743609e-06, "loss": 1.8561, "step": 28565500 }, { "epoch": 82.69, "learning_rate": 8.671416207391626e-06, "loss": 1.8405, "step": 28566000 }, { "epoch": 82.69, "learning_rate": 8.67069255974435e-06, "loss": 1.8468, "step": 28566500 }, { "epoch": 82.69, "learning_rate": 8.669968912097074e-06, "loss": 1.8021, "step": 28567000 }, { "epoch": 82.69, "learning_rate": 8.669245264449796e-06, "loss": 1.8393, "step": 28567500 }, { "epoch": 82.69, "learning_rate": 8.66852161680252e-06, "loss": 1.8379, "step": 28568000 }, { "epoch": 82.69, "learning_rate": 8.667797969155242e-06, "loss": 1.833, "step": 28568500 }, { "epoch": 82.7, "learning_rate": 8.667074321507966e-06, "loss": 1.8348, "step": 28569000 }, { "epoch": 82.7, "learning_rate": 8.666350673860689e-06, "loss": 1.8372, "step": 28569500 }, { "epoch": 82.7, "learning_rate": 8.665627026213413e-06, "loss": 1.8384, "step": 28570000 }, { "epoch": 82.7, "learning_rate": 8.664903378566137e-06, "loss": 1.8634, "step": 28570500 }, { "epoch": 82.7, "learning_rate": 8.664179730918859e-06, "loss": 1.8345, "step": 28571000 }, { "epoch": 82.7, "learning_rate": 8.663456083271583e-06, "loss": 1.8177, "step": 28571500 }, { "epoch": 82.7, "learning_rate": 8.6627338829196e-06, "loss": 1.8609, "step": 28572000 }, { "epoch": 82.71, "learning_rate": 8.662010235272324e-06, "loss": 1.8058, "step": 28572500 }, { "epoch": 82.71, "learning_rate": 8.661288034920341e-06, "loss": 1.8271, "step": 28573000 }, { "epoch": 82.71, "learning_rate": 8.660564387273065e-06, "loss": 1.8117, "step": 28573500 }, { "epoch": 82.71, "learning_rate": 8.659840739625787e-06, "loss": 1.83, "step": 28574000 }, { "epoch": 82.71, "learning_rate": 8.659117091978511e-06, "loss": 1.8411, "step": 28574500 }, { "epoch": 82.71, "learning_rate": 8.658393444331233e-06, "loss": 1.8127, "step": 28575000 }, { "epoch": 82.71, "learning_rate": 8.657669796683957e-06, "loss": 1.8242, "step": 28575500 }, { "epoch": 82.72, "learning_rate": 8.656946149036681e-06, "loss": 1.8397, "step": 28576000 }, { "epoch": 82.72, "learning_rate": 8.656222501389404e-06, "loss": 1.8536, "step": 28576500 }, { "epoch": 82.72, "learning_rate": 8.65550030103742e-06, "loss": 1.8343, "step": 28577000 }, { "epoch": 82.72, "learning_rate": 8.65477810068544e-06, "loss": 1.8349, "step": 28577500 }, { "epoch": 82.72, "learning_rate": 8.654054453038164e-06, "loss": 1.8332, "step": 28578000 }, { "epoch": 82.72, "learning_rate": 8.653330805390886e-06, "loss": 1.8512, "step": 28578500 }, { "epoch": 82.72, "learning_rate": 8.652607157743608e-06, "loss": 1.8276, "step": 28579000 }, { "epoch": 82.73, "learning_rate": 8.651883510096332e-06, "loss": 1.8184, "step": 28579500 }, { "epoch": 82.73, "learning_rate": 8.651159862449056e-06, "loss": 1.8076, "step": 28580000 }, { "epoch": 82.73, "learning_rate": 8.650437662097073e-06, "loss": 1.8409, "step": 28580500 }, { "epoch": 82.73, "learning_rate": 8.649714014449795e-06, "loss": 1.829, "step": 28581000 }, { "epoch": 82.73, "learning_rate": 8.64899036680252e-06, "loss": 1.8469, "step": 28581500 }, { "epoch": 82.73, "learning_rate": 8.648266719155243e-06, "loss": 1.8227, "step": 28582000 }, { "epoch": 82.73, "learning_rate": 8.647543071507966e-06, "loss": 1.817, "step": 28582500 }, { "epoch": 82.74, "learning_rate": 8.64681942386069e-06, "loss": 1.8392, "step": 28583000 }, { "epoch": 82.74, "learning_rate": 8.646095776213413e-06, "loss": 1.8107, "step": 28583500 }, { "epoch": 82.74, "learning_rate": 8.645372128566136e-06, "loss": 1.7814, "step": 28584000 }, { "epoch": 82.74, "learning_rate": 8.644649928214153e-06, "loss": 1.8493, "step": 28584500 }, { "epoch": 82.74, "learning_rate": 8.643926280566877e-06, "loss": 1.8338, "step": 28585000 }, { "epoch": 82.74, "learning_rate": 8.6432026329196e-06, "loss": 1.8535, "step": 28585500 }, { "epoch": 82.74, "learning_rate": 8.642478985272323e-06, "loss": 1.8096, "step": 28586000 }, { "epoch": 82.75, "learning_rate": 8.64175678492034e-06, "loss": 1.826, "step": 28586500 }, { "epoch": 82.75, "learning_rate": 8.641034584568359e-06, "loss": 1.8317, "step": 28587000 }, { "epoch": 82.75, "learning_rate": 8.640310936921083e-06, "loss": 1.8384, "step": 28587500 }, { "epoch": 82.75, "learning_rate": 8.639587289273805e-06, "loss": 1.8246, "step": 28588000 }, { "epoch": 82.75, "learning_rate": 8.638863641626528e-06, "loss": 1.8007, "step": 28588500 }, { "epoch": 82.75, "learning_rate": 8.638139993979253e-06, "loss": 1.8517, "step": 28589000 }, { "epoch": 82.75, "learning_rate": 8.637416346331975e-06, "loss": 1.8155, "step": 28589500 }, { "epoch": 82.76, "learning_rate": 8.636692698684698e-06, "loss": 1.8151, "step": 28590000 }, { "epoch": 82.76, "learning_rate": 8.635970498332715e-06, "loss": 1.8489, "step": 28590500 }, { "epoch": 82.76, "learning_rate": 8.63524685068544e-06, "loss": 1.8183, "step": 28591000 }, { "epoch": 82.76, "learning_rate": 8.634523203038163e-06, "loss": 1.8485, "step": 28591500 }, { "epoch": 82.76, "learning_rate": 8.63380100268618e-06, "loss": 1.8218, "step": 28592000 }, { "epoch": 82.76, "learning_rate": 8.633077355038904e-06, "loss": 1.8489, "step": 28592500 }, { "epoch": 82.77, "learning_rate": 8.632353707391628e-06, "loss": 1.8492, "step": 28593000 }, { "epoch": 82.77, "learning_rate": 8.63163005974435e-06, "loss": 1.846, "step": 28593500 }, { "epoch": 82.77, "learning_rate": 8.630906412097072e-06, "loss": 1.8208, "step": 28594000 }, { "epoch": 82.77, "learning_rate": 8.630182764449796e-06, "loss": 1.805, "step": 28594500 }, { "epoch": 82.77, "learning_rate": 8.62945911680252e-06, "loss": 1.8336, "step": 28595000 }, { "epoch": 82.77, "learning_rate": 8.628735469155242e-06, "loss": 1.8419, "step": 28595500 }, { "epoch": 82.77, "learning_rate": 8.628011821507966e-06, "loss": 1.8498, "step": 28596000 }, { "epoch": 82.78, "learning_rate": 8.62728817386069e-06, "loss": 1.8157, "step": 28596500 }, { "epoch": 82.78, "learning_rate": 8.626564526213412e-06, "loss": 1.8405, "step": 28597000 }, { "epoch": 82.78, "learning_rate": 8.625840878566135e-06, "loss": 1.8264, "step": 28597500 }, { "epoch": 82.78, "learning_rate": 8.62511723091886e-06, "loss": 1.8258, "step": 28598000 }, { "epoch": 82.78, "learning_rate": 8.624393583271583e-06, "loss": 1.8135, "step": 28598500 }, { "epoch": 82.78, "learning_rate": 8.623669935624305e-06, "loss": 1.8234, "step": 28599000 }, { "epoch": 82.78, "learning_rate": 8.622946287977029e-06, "loss": 1.8132, "step": 28599500 }, { "epoch": 82.79, "learning_rate": 8.622224087625048e-06, "loss": 1.8353, "step": 28600000 }, { "epoch": 82.79, "learning_rate": 8.621501887273065e-06, "loss": 1.8138, "step": 28600500 }, { "epoch": 82.79, "learning_rate": 8.620779686921082e-06, "loss": 1.8512, "step": 28601000 }, { "epoch": 82.79, "learning_rate": 8.620056039273804e-06, "loss": 1.8468, "step": 28601500 }, { "epoch": 82.79, "learning_rate": 8.61933239162653e-06, "loss": 1.8184, "step": 28602000 }, { "epoch": 82.79, "learning_rate": 8.618608743979252e-06, "loss": 1.8447, "step": 28602500 }, { "epoch": 82.79, "learning_rate": 8.617885096331974e-06, "loss": 1.8263, "step": 28603000 }, { "epoch": 82.8, "learning_rate": 8.617161448684698e-06, "loss": 1.8525, "step": 28603500 }, { "epoch": 82.8, "learning_rate": 8.616437801037422e-06, "loss": 1.8223, "step": 28604000 }, { "epoch": 82.8, "learning_rate": 8.615714153390145e-06, "loss": 1.8225, "step": 28604500 }, { "epoch": 82.8, "learning_rate": 8.614990505742867e-06, "loss": 1.8433, "step": 28605000 }, { "epoch": 82.8, "learning_rate": 8.614266858095592e-06, "loss": 1.8558, "step": 28605500 }, { "epoch": 82.8, "learning_rate": 8.61354465774361e-06, "loss": 1.8224, "step": 28606000 }, { "epoch": 82.8, "learning_rate": 8.612822457391627e-06, "loss": 1.7999, "step": 28606500 }, { "epoch": 82.81, "learning_rate": 8.61209880974435e-06, "loss": 1.8387, "step": 28607000 }, { "epoch": 82.81, "learning_rate": 8.611375162097075e-06, "loss": 1.835, "step": 28607500 }, { "epoch": 82.81, "learning_rate": 8.610651514449797e-06, "loss": 1.8355, "step": 28608000 }, { "epoch": 82.81, "learning_rate": 8.60992786680252e-06, "loss": 1.8475, "step": 28608500 }, { "epoch": 82.81, "learning_rate": 8.609204219155243e-06, "loss": 1.7953, "step": 28609000 }, { "epoch": 82.81, "learning_rate": 8.608480571507967e-06, "loss": 1.8406, "step": 28609500 }, { "epoch": 82.81, "learning_rate": 8.60775692386069e-06, "loss": 1.8292, "step": 28610000 }, { "epoch": 82.82, "learning_rate": 8.607033276213412e-06, "loss": 1.8262, "step": 28610500 }, { "epoch": 82.82, "learning_rate": 8.606309628566137e-06, "loss": 1.8027, "step": 28611000 }, { "epoch": 82.82, "learning_rate": 8.60558598091886e-06, "loss": 1.8284, "step": 28611500 }, { "epoch": 82.82, "learning_rate": 8.604862333271582e-06, "loss": 1.8211, "step": 28612000 }, { "epoch": 82.82, "learning_rate": 8.604140132919599e-06, "loss": 1.8249, "step": 28612500 }, { "epoch": 82.82, "learning_rate": 8.603417932567618e-06, "loss": 1.8249, "step": 28613000 }, { "epoch": 82.82, "learning_rate": 8.602694284920342e-06, "loss": 1.8599, "step": 28613500 }, { "epoch": 82.83, "learning_rate": 8.601970637273064e-06, "loss": 1.8334, "step": 28614000 }, { "epoch": 82.83, "learning_rate": 8.601246989625788e-06, "loss": 1.8352, "step": 28614500 }, { "epoch": 82.83, "learning_rate": 8.600523341978512e-06, "loss": 1.8366, "step": 28615000 }, { "epoch": 82.83, "learning_rate": 8.599799694331234e-06, "loss": 1.8307, "step": 28615500 }, { "epoch": 82.83, "learning_rate": 8.599077493979251e-06, "loss": 1.8259, "step": 28616000 }, { "epoch": 82.83, "learning_rate": 8.598353846331975e-06, "loss": 1.8186, "step": 28616500 }, { "epoch": 82.83, "learning_rate": 8.5976301986847e-06, "loss": 1.842, "step": 28617000 }, { "epoch": 82.84, "learning_rate": 8.596906551037421e-06, "loss": 1.8776, "step": 28617500 }, { "epoch": 82.84, "learning_rate": 8.596182903390144e-06, "loss": 1.8266, "step": 28618000 }, { "epoch": 82.84, "learning_rate": 8.595460703038163e-06, "loss": 1.837, "step": 28618500 }, { "epoch": 82.84, "learning_rate": 8.594737055390887e-06, "loss": 1.8385, "step": 28619000 }, { "epoch": 82.84, "learning_rate": 8.594013407743609e-06, "loss": 1.847, "step": 28619500 }, { "epoch": 82.84, "learning_rate": 8.593289760096333e-06, "loss": 1.8257, "step": 28620000 }, { "epoch": 82.84, "learning_rate": 8.592566112449057e-06, "loss": 1.8447, "step": 28620500 }, { "epoch": 82.85, "learning_rate": 8.591842464801779e-06, "loss": 1.8255, "step": 28621000 }, { "epoch": 82.85, "learning_rate": 8.591120264449796e-06, "loss": 1.8462, "step": 28621500 }, { "epoch": 82.85, "learning_rate": 8.590398064097813e-06, "loss": 1.8388, "step": 28622000 }, { "epoch": 82.85, "learning_rate": 8.589674416450539e-06, "loss": 1.8272, "step": 28622500 }, { "epoch": 82.85, "learning_rate": 8.588950768803261e-06, "loss": 1.8246, "step": 28623000 }, { "epoch": 82.85, "learning_rate": 8.588227121155983e-06, "loss": 1.8365, "step": 28623500 }, { "epoch": 82.85, "learning_rate": 8.587503473508707e-06, "loss": 1.8425, "step": 28624000 }, { "epoch": 82.86, "learning_rate": 8.586781273156726e-06, "loss": 1.8485, "step": 28624500 }, { "epoch": 82.86, "learning_rate": 8.586057625509449e-06, "loss": 1.8515, "step": 28625000 }, { "epoch": 82.86, "learning_rate": 8.58533397786217e-06, "loss": 1.8293, "step": 28625500 }, { "epoch": 82.86, "learning_rate": 8.584610330214895e-06, "loss": 1.829, "step": 28626000 }, { "epoch": 82.86, "learning_rate": 8.583886682567619e-06, "loss": 1.8446, "step": 28626500 }, { "epoch": 82.86, "learning_rate": 8.583163034920341e-06, "loss": 1.8433, "step": 28627000 }, { "epoch": 82.86, "learning_rate": 8.582439387273065e-06, "loss": 1.8459, "step": 28627500 }, { "epoch": 82.87, "learning_rate": 8.581715739625789e-06, "loss": 1.8175, "step": 28628000 }, { "epoch": 82.87, "learning_rate": 8.580992091978511e-06, "loss": 1.8567, "step": 28628500 }, { "epoch": 82.87, "learning_rate": 8.580268444331233e-06, "loss": 1.8324, "step": 28629000 }, { "epoch": 82.87, "learning_rate": 8.579544796683957e-06, "loss": 1.8574, "step": 28629500 }, { "epoch": 82.87, "learning_rate": 8.578822596331976e-06, "loss": 1.8423, "step": 28630000 }, { "epoch": 82.87, "learning_rate": 8.578098948684698e-06, "loss": 1.8516, "step": 28630500 }, { "epoch": 82.88, "learning_rate": 8.57737530103742e-06, "loss": 1.848, "step": 28631000 }, { "epoch": 82.88, "learning_rate": 8.576651653390146e-06, "loss": 1.8146, "step": 28631500 }, { "epoch": 82.88, "learning_rate": 8.575928005742868e-06, "loss": 1.8206, "step": 28632000 }, { "epoch": 82.88, "learning_rate": 8.57520435809559e-06, "loss": 1.8331, "step": 28632500 }, { "epoch": 82.88, "learning_rate": 8.574480710448315e-06, "loss": 1.8281, "step": 28633000 }, { "epoch": 82.88, "learning_rate": 8.573757062801038e-06, "loss": 1.8292, "step": 28633500 }, { "epoch": 82.88, "learning_rate": 8.573034862449056e-06, "loss": 1.8128, "step": 28634000 }, { "epoch": 82.89, "learning_rate": 8.572311214801778e-06, "loss": 1.815, "step": 28634500 }, { "epoch": 82.89, "learning_rate": 8.571589014449797e-06, "loss": 1.8465, "step": 28635000 }, { "epoch": 82.89, "learning_rate": 8.57086536680252e-06, "loss": 1.8329, "step": 28635500 }, { "epoch": 82.89, "learning_rate": 8.570143166450538e-06, "loss": 1.8256, "step": 28636000 }, { "epoch": 82.89, "learning_rate": 8.56941951880326e-06, "loss": 1.8357, "step": 28636500 }, { "epoch": 82.89, "learning_rate": 8.568697318451278e-06, "loss": 1.8585, "step": 28637000 }, { "epoch": 82.89, "learning_rate": 8.567973670804002e-06, "loss": 1.8224, "step": 28637500 }, { "epoch": 82.9, "learning_rate": 8.567250023156725e-06, "loss": 1.8496, "step": 28638000 }, { "epoch": 82.9, "learning_rate": 8.566526375509448e-06, "loss": 1.833, "step": 28638500 }, { "epoch": 82.9, "learning_rate": 8.565802727862172e-06, "loss": 1.8468, "step": 28639000 }, { "epoch": 82.9, "learning_rate": 8.565079080214896e-06, "loss": 1.8224, "step": 28639500 }, { "epoch": 82.9, "learning_rate": 8.564355432567618e-06, "loss": 1.8252, "step": 28640000 }, { "epoch": 82.9, "learning_rate": 8.563631784920342e-06, "loss": 1.8406, "step": 28640500 }, { "epoch": 82.9, "learning_rate": 8.562908137273066e-06, "loss": 1.8357, "step": 28641000 }, { "epoch": 82.91, "learning_rate": 8.562184489625788e-06, "loss": 1.8418, "step": 28641500 }, { "epoch": 82.91, "learning_rate": 8.56146084197851e-06, "loss": 1.819, "step": 28642000 }, { "epoch": 82.91, "learning_rate": 8.560737194331234e-06, "loss": 1.8371, "step": 28642500 }, { "epoch": 82.91, "learning_rate": 8.560013546683958e-06, "loss": 1.8451, "step": 28643000 }, { "epoch": 82.91, "learning_rate": 8.55928989903668e-06, "loss": 1.8403, "step": 28643500 }, { "epoch": 82.91, "learning_rate": 8.558566251389404e-06, "loss": 1.8547, "step": 28644000 }, { "epoch": 82.91, "learning_rate": 8.557842603742128e-06, "loss": 1.827, "step": 28644500 }, { "epoch": 82.92, "learning_rate": 8.55711895609485e-06, "loss": 1.8187, "step": 28645000 }, { "epoch": 82.92, "learning_rate": 8.556395308447574e-06, "loss": 1.8223, "step": 28645500 }, { "epoch": 82.92, "learning_rate": 8.555673108095591e-06, "loss": 1.8515, "step": 28646000 }, { "epoch": 82.92, "learning_rate": 8.554949460448315e-06, "loss": 1.837, "step": 28646500 }, { "epoch": 82.92, "learning_rate": 8.554225812801038e-06, "loss": 1.8484, "step": 28647000 }, { "epoch": 82.92, "learning_rate": 8.553502165153762e-06, "loss": 1.8314, "step": 28647500 }, { "epoch": 82.92, "learning_rate": 8.552778517506484e-06, "loss": 1.8596, "step": 28648000 }, { "epoch": 82.93, "learning_rate": 8.552056317154503e-06, "loss": 1.8328, "step": 28648500 }, { "epoch": 82.93, "learning_rate": 8.551332669507225e-06, "loss": 1.8122, "step": 28649000 }, { "epoch": 82.93, "learning_rate": 8.550609021859949e-06, "loss": 1.8714, "step": 28649500 }, { "epoch": 82.93, "learning_rate": 8.549885374212671e-06, "loss": 1.8262, "step": 28650000 }, { "epoch": 82.93, "learning_rate": 8.54916317386069e-06, "loss": 1.8098, "step": 28650500 }, { "epoch": 82.93, "learning_rate": 8.548439526213412e-06, "loss": 1.8402, "step": 28651000 }, { "epoch": 82.93, "learning_rate": 8.547715878566136e-06, "loss": 1.8587, "step": 28651500 }, { "epoch": 82.94, "learning_rate": 8.54699223091886e-06, "loss": 1.8562, "step": 28652000 }, { "epoch": 82.94, "learning_rate": 8.546268583271582e-06, "loss": 1.8377, "step": 28652500 }, { "epoch": 82.94, "learning_rate": 8.5455463829196e-06, "loss": 1.819, "step": 28653000 }, { "epoch": 82.94, "learning_rate": 8.544822735272324e-06, "loss": 1.8415, "step": 28653500 }, { "epoch": 82.94, "learning_rate": 8.544099087625047e-06, "loss": 1.8537, "step": 28654000 }, { "epoch": 82.94, "learning_rate": 8.543376887273065e-06, "loss": 1.8301, "step": 28654500 }, { "epoch": 82.94, "learning_rate": 8.542653239625787e-06, "loss": 1.8421, "step": 28655000 }, { "epoch": 82.95, "learning_rate": 8.541929591978511e-06, "loss": 1.8311, "step": 28655500 }, { "epoch": 82.95, "learning_rate": 8.541205944331235e-06, "loss": 1.8311, "step": 28656000 }, { "epoch": 82.95, "learning_rate": 8.540482296683957e-06, "loss": 1.8402, "step": 28656500 }, { "epoch": 82.95, "learning_rate": 8.539758649036681e-06, "loss": 1.8473, "step": 28657000 }, { "epoch": 82.95, "learning_rate": 8.539035001389403e-06, "loss": 1.8241, "step": 28657500 }, { "epoch": 82.95, "learning_rate": 8.538311353742127e-06, "loss": 1.8271, "step": 28658000 }, { "epoch": 82.95, "learning_rate": 8.537587706094851e-06, "loss": 1.8187, "step": 28658500 }, { "epoch": 82.96, "learning_rate": 8.536865505742868e-06, "loss": 1.8231, "step": 28659000 }, { "epoch": 82.96, "learning_rate": 8.53614185809559e-06, "loss": 1.8409, "step": 28659500 }, { "epoch": 82.96, "learning_rate": 8.535418210448314e-06, "loss": 1.8413, "step": 28660000 }, { "epoch": 82.96, "learning_rate": 8.534694562801038e-06, "loss": 1.8463, "step": 28660500 }, { "epoch": 82.96, "learning_rate": 8.53397091515376e-06, "loss": 1.8611, "step": 28661000 }, { "epoch": 82.96, "learning_rate": 8.533247267506485e-06, "loss": 1.8282, "step": 28661500 }, { "epoch": 82.96, "learning_rate": 8.532525067154502e-06, "loss": 1.7978, "step": 28662000 }, { "epoch": 82.97, "learning_rate": 8.531801419507226e-06, "loss": 1.829, "step": 28662500 }, { "epoch": 82.97, "learning_rate": 8.531079219155243e-06, "loss": 1.8332, "step": 28663000 }, { "epoch": 82.97, "learning_rate": 8.530355571507967e-06, "loss": 1.83, "step": 28663500 }, { "epoch": 82.97, "learning_rate": 8.529631923860689e-06, "loss": 1.8597, "step": 28664000 }, { "epoch": 82.97, "learning_rate": 8.528908276213413e-06, "loss": 1.8296, "step": 28664500 }, { "epoch": 82.97, "learning_rate": 8.528184628566135e-06, "loss": 1.836, "step": 28665000 }, { "epoch": 82.97, "learning_rate": 8.52746098091886e-06, "loss": 1.854, "step": 28665500 }, { "epoch": 82.98, "learning_rate": 8.526737333271583e-06, "loss": 1.8243, "step": 28666000 }, { "epoch": 82.98, "learning_rate": 8.526013685624305e-06, "loss": 1.8226, "step": 28666500 }, { "epoch": 82.98, "learning_rate": 8.52529003797703e-06, "loss": 1.8301, "step": 28667000 }, { "epoch": 82.98, "learning_rate": 8.524566390329752e-06, "loss": 1.8566, "step": 28667500 }, { "epoch": 82.98, "learning_rate": 8.523842742682475e-06, "loss": 1.8444, "step": 28668000 }, { "epoch": 82.98, "learning_rate": 8.523119095035198e-06, "loss": 1.8206, "step": 28668500 }, { "epoch": 82.99, "learning_rate": 8.522396894683217e-06, "loss": 1.8227, "step": 28669000 }, { "epoch": 82.99, "learning_rate": 8.52167324703594e-06, "loss": 1.8123, "step": 28669500 }, { "epoch": 82.99, "learning_rate": 8.520951046683958e-06, "loss": 1.8587, "step": 28670000 }, { "epoch": 82.99, "learning_rate": 8.52022739903668e-06, "loss": 1.8301, "step": 28670500 }, { "epoch": 82.99, "learning_rate": 8.519503751389404e-06, "loss": 1.8423, "step": 28671000 }, { "epoch": 82.99, "learning_rate": 8.518780103742128e-06, "loss": 1.8202, "step": 28671500 }, { "epoch": 82.99, "learning_rate": 8.51805645609485e-06, "loss": 1.8189, "step": 28672000 }, { "epoch": 83.0, "learning_rate": 8.517334255742867e-06, "loss": 1.8269, "step": 28672500 }, { "epoch": 83.0, "learning_rate": 8.516610608095591e-06, "loss": 1.8158, "step": 28673000 }, { "epoch": 83.0, "learning_rate": 8.515886960448315e-06, "loss": 1.8249, "step": 28673500 }, { "epoch": 83.0, "learning_rate": 8.515163312801037e-06, "loss": 1.8303, "step": 28674000 }, { "epoch": 83.0, "eval_accuracy": 0.687225884893444, "eval_accuracy_mlm": 0.6562076438847384, "eval_accuracy_nsp": 0.8535722960864922, "eval_loss": 2.1801021099090576, "eval_runtime": 331.764, "eval_samples_per_second": 1315.351, "eval_steps_per_second": 54.807, "step": 28674176 }, { "epoch": 83.0, "learning_rate": 8.514439665153761e-06, "loss": 1.8254, "step": 28674500 }, { "epoch": 83.0, "learning_rate": 8.513717464801779e-06, "loss": 1.8495, "step": 28675000 }, { "epoch": 83.0, "learning_rate": 8.512993817154503e-06, "loss": 1.8164, "step": 28675500 }, { "epoch": 83.01, "learning_rate": 8.51227161680252e-06, "loss": 1.8121, "step": 28676000 }, { "epoch": 83.01, "learning_rate": 8.511547969155242e-06, "loss": 1.8166, "step": 28676500 }, { "epoch": 83.01, "learning_rate": 8.510824321507966e-06, "loss": 1.8246, "step": 28677000 }, { "epoch": 83.01, "learning_rate": 8.51010067386069e-06, "loss": 1.8081, "step": 28677500 }, { "epoch": 83.01, "learning_rate": 8.509377026213412e-06, "loss": 1.7938, "step": 28678000 }, { "epoch": 83.01, "learning_rate": 8.508653378566136e-06, "loss": 1.8412, "step": 28678500 }, { "epoch": 83.01, "learning_rate": 8.507931178214153e-06, "loss": 1.8292, "step": 28679000 }, { "epoch": 83.02, "learning_rate": 8.507207530566877e-06, "loss": 1.8329, "step": 28679500 }, { "epoch": 83.02, "learning_rate": 8.506485330214895e-06, "loss": 1.8243, "step": 28680000 }, { "epoch": 83.02, "learning_rate": 8.505761682567618e-06, "loss": 1.8325, "step": 28680500 }, { "epoch": 83.02, "learning_rate": 8.50503803492034e-06, "loss": 1.8277, "step": 28681000 }, { "epoch": 83.02, "learning_rate": 8.504314387273065e-06, "loss": 1.7974, "step": 28681500 }, { "epoch": 83.02, "learning_rate": 8.503590739625787e-06, "loss": 1.81, "step": 28682000 }, { "epoch": 83.02, "learning_rate": 8.50286709197851e-06, "loss": 1.7936, "step": 28682500 }, { "epoch": 83.03, "learning_rate": 8.502143444331235e-06, "loss": 1.8275, "step": 28683000 }, { "epoch": 83.03, "learning_rate": 8.501419796683957e-06, "loss": 1.8421, "step": 28683500 }, { "epoch": 83.03, "learning_rate": 8.500696149036681e-06, "loss": 1.8323, "step": 28684000 }, { "epoch": 83.03, "learning_rate": 8.499972501389405e-06, "loss": 1.8137, "step": 28684500 }, { "epoch": 83.03, "learning_rate": 8.499248853742127e-06, "loss": 1.8433, "step": 28685000 }, { "epoch": 83.03, "learning_rate": 8.49852520609485e-06, "loss": 1.8188, "step": 28685500 }, { "epoch": 83.03, "learning_rate": 8.497801558447573e-06, "loss": 1.8444, "step": 28686000 }, { "epoch": 83.04, "learning_rate": 8.497077910800297e-06, "loss": 1.7865, "step": 28686500 }, { "epoch": 83.04, "learning_rate": 8.49635426315302e-06, "loss": 1.8267, "step": 28687000 }, { "epoch": 83.04, "learning_rate": 8.495632062801037e-06, "loss": 1.8301, "step": 28687500 }, { "epoch": 83.04, "learning_rate": 8.494908415153762e-06, "loss": 1.8153, "step": 28688000 }, { "epoch": 83.04, "learning_rate": 8.494184767506484e-06, "loss": 1.8184, "step": 28688500 }, { "epoch": 83.04, "learning_rate": 8.493461119859207e-06, "loss": 1.8243, "step": 28689000 }, { "epoch": 83.04, "learning_rate": 8.492738919507224e-06, "loss": 1.8226, "step": 28689500 }, { "epoch": 83.05, "learning_rate": 8.49201527185995e-06, "loss": 1.848, "step": 28690000 }, { "epoch": 83.05, "learning_rate": 8.491291624212672e-06, "loss": 1.8042, "step": 28690500 }, { "epoch": 83.05, "learning_rate": 8.490567976565394e-06, "loss": 1.8137, "step": 28691000 }, { "epoch": 83.05, "learning_rate": 8.489845776213413e-06, "loss": 1.8397, "step": 28691500 }, { "epoch": 83.05, "learning_rate": 8.489122128566137e-06, "loss": 1.8053, "step": 28692000 }, { "epoch": 83.05, "learning_rate": 8.488398480918859e-06, "loss": 1.806, "step": 28692500 }, { "epoch": 83.05, "learning_rate": 8.487674833271581e-06, "loss": 1.8324, "step": 28693000 }, { "epoch": 83.06, "learning_rate": 8.486951185624307e-06, "loss": 1.8516, "step": 28693500 }, { "epoch": 83.06, "learning_rate": 8.48622753797703e-06, "loss": 1.8303, "step": 28694000 }, { "epoch": 83.06, "learning_rate": 8.485503890329751e-06, "loss": 1.8139, "step": 28694500 }, { "epoch": 83.06, "learning_rate": 8.484780242682475e-06, "loss": 1.8336, "step": 28695000 }, { "epoch": 83.06, "learning_rate": 8.4840565950352e-06, "loss": 1.819, "step": 28695500 }, { "epoch": 83.06, "learning_rate": 8.483334394683217e-06, "loss": 1.831, "step": 28696000 }, { "epoch": 83.06, "learning_rate": 8.482610747035939e-06, "loss": 1.8414, "step": 28696500 }, { "epoch": 83.07, "learning_rate": 8.481888546683956e-06, "loss": 1.803, "step": 28697000 }, { "epoch": 83.07, "learning_rate": 8.481164899036682e-06, "loss": 1.8122, "step": 28697500 }, { "epoch": 83.07, "learning_rate": 8.480441251389404e-06, "loss": 1.8398, "step": 28698000 }, { "epoch": 83.07, "learning_rate": 8.479717603742126e-06, "loss": 1.7984, "step": 28698500 }, { "epoch": 83.07, "learning_rate": 8.47899395609485e-06, "loss": 1.8575, "step": 28699000 }, { "epoch": 83.07, "learning_rate": 8.478270308447574e-06, "loss": 1.8223, "step": 28699500 }, { "epoch": 83.07, "learning_rate": 8.477546660800296e-06, "loss": 1.8165, "step": 28700000 }, { "epoch": 83.08, "learning_rate": 8.47682301315302e-06, "loss": 1.8243, "step": 28700500 }, { "epoch": 83.08, "learning_rate": 8.476100812801039e-06, "loss": 1.8006, "step": 28701000 }, { "epoch": 83.08, "learning_rate": 8.475377165153761e-06, "loss": 1.8187, "step": 28701500 }, { "epoch": 83.08, "learning_rate": 8.474653517506484e-06, "loss": 1.8338, "step": 28702000 }, { "epoch": 83.08, "learning_rate": 8.473929869859207e-06, "loss": 1.8302, "step": 28702500 }, { "epoch": 83.08, "learning_rate": 8.473206222211931e-06, "loss": 1.8223, "step": 28703000 }, { "epoch": 83.08, "learning_rate": 8.472482574564654e-06, "loss": 1.7938, "step": 28703500 }, { "epoch": 83.09, "learning_rate": 8.471758926917376e-06, "loss": 1.8182, "step": 28704000 }, { "epoch": 83.09, "learning_rate": 8.471036726565395e-06, "loss": 1.8518, "step": 28704500 }, { "epoch": 83.09, "learning_rate": 8.470313078918119e-06, "loss": 1.82, "step": 28705000 }, { "epoch": 83.09, "learning_rate": 8.469589431270841e-06, "loss": 1.8405, "step": 28705500 }, { "epoch": 83.09, "learning_rate": 8.468865783623565e-06, "loss": 1.8045, "step": 28706000 }, { "epoch": 83.09, "learning_rate": 8.468142135976289e-06, "loss": 1.8214, "step": 28706500 }, { "epoch": 83.1, "learning_rate": 8.467418488329011e-06, "loss": 1.8155, "step": 28707000 }, { "epoch": 83.1, "learning_rate": 8.466694840681733e-06, "loss": 1.8045, "step": 28707500 }, { "epoch": 83.1, "learning_rate": 8.465971193034459e-06, "loss": 1.8159, "step": 28708000 }, { "epoch": 83.1, "learning_rate": 8.465248992682476e-06, "loss": 1.8163, "step": 28708500 }, { "epoch": 83.1, "learning_rate": 8.464525345035198e-06, "loss": 1.8122, "step": 28709000 }, { "epoch": 83.1, "learning_rate": 8.46380169738792e-06, "loss": 1.8337, "step": 28709500 }, { "epoch": 83.1, "learning_rate": 8.46307949703594e-06, "loss": 1.8132, "step": 28710000 }, { "epoch": 83.11, "learning_rate": 8.462357296683959e-06, "loss": 1.8283, "step": 28710500 }, { "epoch": 83.11, "learning_rate": 8.46163364903668e-06, "loss": 1.8366, "step": 28711000 }, { "epoch": 83.11, "learning_rate": 8.460910001389403e-06, "loss": 1.8314, "step": 28711500 }, { "epoch": 83.11, "learning_rate": 8.460186353742127e-06, "loss": 1.8366, "step": 28712000 }, { "epoch": 83.11, "learning_rate": 8.45946270609485e-06, "loss": 1.8068, "step": 28712500 }, { "epoch": 83.11, "learning_rate": 8.458739058447573e-06, "loss": 1.8367, "step": 28713000 }, { "epoch": 83.11, "learning_rate": 8.458015410800297e-06, "loss": 1.8315, "step": 28713500 }, { "epoch": 83.12, "learning_rate": 8.457291763153021e-06, "loss": 1.8191, "step": 28714000 }, { "epoch": 83.12, "learning_rate": 8.456568115505743e-06, "loss": 1.8274, "step": 28714500 }, { "epoch": 83.12, "learning_rate": 8.455844467858465e-06, "loss": 1.807, "step": 28715000 }, { "epoch": 83.12, "learning_rate": 8.455122267506484e-06, "loss": 1.8183, "step": 28715500 }, { "epoch": 83.12, "learning_rate": 8.454400067154503e-06, "loss": 1.8307, "step": 28716000 }, { "epoch": 83.12, "learning_rate": 8.453676419507226e-06, "loss": 1.8307, "step": 28716500 }, { "epoch": 83.12, "learning_rate": 8.452952771859948e-06, "loss": 1.8307, "step": 28717000 }, { "epoch": 83.13, "learning_rate": 8.452229124212672e-06, "loss": 1.8421, "step": 28717500 }, { "epoch": 83.13, "learning_rate": 8.451505476565396e-06, "loss": 1.8123, "step": 28718000 }, { "epoch": 83.13, "learning_rate": 8.450781828918118e-06, "loss": 1.8319, "step": 28718500 }, { "epoch": 83.13, "learning_rate": 8.450058181270842e-06, "loss": 1.8179, "step": 28719000 }, { "epoch": 83.13, "learning_rate": 8.449334533623566e-06, "loss": 1.8224, "step": 28719500 }, { "epoch": 83.13, "learning_rate": 8.448610885976288e-06, "loss": 1.8335, "step": 28720000 }, { "epoch": 83.13, "learning_rate": 8.447888685624305e-06, "loss": 1.8402, "step": 28720500 }, { "epoch": 83.14, "learning_rate": 8.447165037977029e-06, "loss": 1.8365, "step": 28721000 }, { "epoch": 83.14, "learning_rate": 8.446441390329753e-06, "loss": 1.8157, "step": 28721500 }, { "epoch": 83.14, "learning_rate": 8.445717742682475e-06, "loss": 1.8243, "step": 28722000 }, { "epoch": 83.14, "learning_rate": 8.444995542330493e-06, "loss": 1.8181, "step": 28722500 }, { "epoch": 83.14, "learning_rate": 8.44427334197851e-06, "loss": 1.8086, "step": 28723000 }, { "epoch": 83.14, "learning_rate": 8.443549694331235e-06, "loss": 1.8065, "step": 28723500 }, { "epoch": 83.14, "learning_rate": 8.442826046683958e-06, "loss": 1.8283, "step": 28724000 }, { "epoch": 83.15, "learning_rate": 8.44210239903668e-06, "loss": 1.8231, "step": 28724500 }, { "epoch": 83.15, "learning_rate": 8.441378751389404e-06, "loss": 1.7976, "step": 28725000 }, { "epoch": 83.15, "learning_rate": 8.440655103742128e-06, "loss": 1.8206, "step": 28725500 }, { "epoch": 83.15, "learning_rate": 8.43993145609485e-06, "loss": 1.8042, "step": 28726000 }, { "epoch": 83.15, "learning_rate": 8.439207808447574e-06, "loss": 1.8385, "step": 28726500 }, { "epoch": 83.15, "learning_rate": 8.438484160800298e-06, "loss": 1.8249, "step": 28727000 }, { "epoch": 83.15, "learning_rate": 8.43776051315302e-06, "loss": 1.8275, "step": 28727500 }, { "epoch": 83.16, "learning_rate": 8.437036865505742e-06, "loss": 1.8431, "step": 28728000 }, { "epoch": 83.16, "learning_rate": 8.436313217858466e-06, "loss": 1.8389, "step": 28728500 }, { "epoch": 83.16, "learning_rate": 8.43558957021119e-06, "loss": 1.8422, "step": 28729000 }, { "epoch": 83.16, "learning_rate": 8.434865922563912e-06, "loss": 1.8327, "step": 28729500 }, { "epoch": 83.16, "learning_rate": 8.434142274916636e-06, "loss": 1.8088, "step": 28730000 }, { "epoch": 83.16, "learning_rate": 8.433420074564655e-06, "loss": 1.8367, "step": 28730500 }, { "epoch": 83.16, "learning_rate": 8.432696426917377e-06, "loss": 1.8296, "step": 28731000 }, { "epoch": 83.17, "learning_rate": 8.4319727792701e-06, "loss": 1.8361, "step": 28731500 }, { "epoch": 83.17, "learning_rate": 8.431249131622824e-06, "loss": 1.8283, "step": 28732000 }, { "epoch": 83.17, "learning_rate": 8.430525483975548e-06, "loss": 1.841, "step": 28732500 }, { "epoch": 83.17, "learning_rate": 8.42980183632827e-06, "loss": 1.8105, "step": 28733000 }, { "epoch": 83.17, "learning_rate": 8.429078188680994e-06, "loss": 1.8384, "step": 28733500 }, { "epoch": 83.17, "learning_rate": 8.428355988329011e-06, "loss": 1.8336, "step": 28734000 }, { "epoch": 83.17, "learning_rate": 8.427632340681735e-06, "loss": 1.8189, "step": 28734500 }, { "epoch": 83.18, "learning_rate": 8.426908693034457e-06, "loss": 1.836, "step": 28735000 }, { "epoch": 83.18, "learning_rate": 8.426185045387181e-06, "loss": 1.8075, "step": 28735500 }, { "epoch": 83.18, "learning_rate": 8.425461397739905e-06, "loss": 1.8337, "step": 28736000 }, { "epoch": 83.18, "learning_rate": 8.424737750092627e-06, "loss": 1.7994, "step": 28736500 }, { "epoch": 83.18, "learning_rate": 8.424015549740644e-06, "loss": 1.8136, "step": 28737000 }, { "epoch": 83.18, "learning_rate": 8.423291902093368e-06, "loss": 1.7992, "step": 28737500 }, { "epoch": 83.18, "learning_rate": 8.422568254446092e-06, "loss": 1.8304, "step": 28738000 }, { "epoch": 83.19, "learning_rate": 8.421844606798815e-06, "loss": 1.8181, "step": 28738500 }, { "epoch": 83.19, "learning_rate": 8.421120959151538e-06, "loss": 1.8149, "step": 28739000 }, { "epoch": 83.19, "learning_rate": 8.42039731150426e-06, "loss": 1.8088, "step": 28739500 }, { "epoch": 83.19, "learning_rate": 8.41967511115228e-06, "loss": 1.8259, "step": 28740000 }, { "epoch": 83.19, "learning_rate": 8.418951463505002e-06, "loss": 1.838, "step": 28740500 }, { "epoch": 83.19, "learning_rate": 8.418227815857726e-06, "loss": 1.8466, "step": 28741000 }, { "epoch": 83.19, "learning_rate": 8.41750416821045e-06, "loss": 1.8039, "step": 28741500 }, { "epoch": 83.2, "learning_rate": 8.416780520563172e-06, "loss": 1.8311, "step": 28742000 }, { "epoch": 83.2, "learning_rate": 8.41605832021119e-06, "loss": 1.8247, "step": 28742500 }, { "epoch": 83.2, "learning_rate": 8.415336119859208e-06, "loss": 1.8006, "step": 28743000 }, { "epoch": 83.2, "learning_rate": 8.41461247221193e-06, "loss": 1.8308, "step": 28743500 }, { "epoch": 83.2, "learning_rate": 8.413888824564654e-06, "loss": 1.8306, "step": 28744000 }, { "epoch": 83.2, "learning_rate": 8.413165176917377e-06, "loss": 1.8031, "step": 28744500 }, { "epoch": 83.21, "learning_rate": 8.4124415292701e-06, "loss": 1.8131, "step": 28745000 }, { "epoch": 83.21, "learning_rate": 8.411717881622824e-06, "loss": 1.8525, "step": 28745500 }, { "epoch": 83.21, "learning_rate": 8.410995681270842e-06, "loss": 1.8562, "step": 28746000 }, { "epoch": 83.21, "learning_rate": 8.410272033623564e-06, "loss": 1.8366, "step": 28746500 }, { "epoch": 83.21, "learning_rate": 8.409548385976288e-06, "loss": 1.7926, "step": 28747000 }, { "epoch": 83.21, "learning_rate": 8.408824738329012e-06, "loss": 1.8068, "step": 28747500 }, { "epoch": 83.21, "learning_rate": 8.408101090681734e-06, "loss": 1.8556, "step": 28748000 }, { "epoch": 83.22, "learning_rate": 8.407377443034458e-06, "loss": 1.8362, "step": 28748500 }, { "epoch": 83.22, "learning_rate": 8.40665379538718e-06, "loss": 1.8009, "step": 28749000 }, { "epoch": 83.22, "learning_rate": 8.405930147739904e-06, "loss": 1.8361, "step": 28749500 }, { "epoch": 83.22, "learning_rate": 8.405206500092628e-06, "loss": 1.8001, "step": 28750000 }, { "epoch": 83.22, "learning_rate": 8.404484299740645e-06, "loss": 1.8239, "step": 28750500 }, { "epoch": 83.22, "learning_rate": 8.40376065209337e-06, "loss": 1.8227, "step": 28751000 }, { "epoch": 83.22, "learning_rate": 8.403037004446091e-06, "loss": 1.8195, "step": 28751500 }, { "epoch": 83.23, "learning_rate": 8.402313356798815e-06, "loss": 1.8449, "step": 28752000 }, { "epoch": 83.23, "learning_rate": 8.401589709151538e-06, "loss": 1.8322, "step": 28752500 }, { "epoch": 83.23, "learning_rate": 8.400866061504262e-06, "loss": 1.8197, "step": 28753000 }, { "epoch": 83.23, "learning_rate": 8.400142413856984e-06, "loss": 1.861, "step": 28753500 }, { "epoch": 83.23, "learning_rate": 8.399420213505003e-06, "loss": 1.8365, "step": 28754000 }, { "epoch": 83.23, "learning_rate": 8.398696565857725e-06, "loss": 1.8226, "step": 28754500 }, { "epoch": 83.23, "learning_rate": 8.397972918210449e-06, "loss": 1.8119, "step": 28755000 }, { "epoch": 83.24, "learning_rate": 8.397249270563173e-06, "loss": 1.8112, "step": 28755500 }, { "epoch": 83.24, "learning_rate": 8.396528517506485e-06, "loss": 1.8356, "step": 28756000 }, { "epoch": 83.24, "learning_rate": 8.395804869859207e-06, "loss": 1.8347, "step": 28756500 }, { "epoch": 83.24, "learning_rate": 8.395081222211931e-06, "loss": 1.8428, "step": 28757000 }, { "epoch": 83.24, "learning_rate": 8.394357574564653e-06, "loss": 1.8291, "step": 28757500 }, { "epoch": 83.24, "learning_rate": 8.393633926917377e-06, "loss": 1.8306, "step": 28758000 }, { "epoch": 83.24, "learning_rate": 8.3929102792701e-06, "loss": 1.8298, "step": 28758500 }, { "epoch": 83.25, "learning_rate": 8.392186631622824e-06, "loss": 1.8381, "step": 28759000 }, { "epoch": 83.25, "learning_rate": 8.391462983975547e-06, "loss": 1.8123, "step": 28759500 }, { "epoch": 83.25, "learning_rate": 8.39073933632827e-06, "loss": 1.8257, "step": 28760000 }, { "epoch": 83.25, "learning_rate": 8.390017135976289e-06, "loss": 1.85, "step": 28760500 }, { "epoch": 83.25, "learning_rate": 8.389293488329011e-06, "loss": 1.8274, "step": 28761000 }, { "epoch": 83.25, "learning_rate": 8.388569840681735e-06, "loss": 1.8243, "step": 28761500 }, { "epoch": 83.25, "learning_rate": 8.387846193034457e-06, "loss": 1.849, "step": 28762000 }, { "epoch": 83.26, "learning_rate": 8.387122545387181e-06, "loss": 1.8079, "step": 28762500 }, { "epoch": 83.26, "learning_rate": 8.386398897739905e-06, "loss": 1.8406, "step": 28763000 }, { "epoch": 83.26, "learning_rate": 8.385675250092627e-06, "loss": 1.8249, "step": 28763500 }, { "epoch": 83.26, "learning_rate": 8.384953049740644e-06, "loss": 1.8347, "step": 28764000 }, { "epoch": 83.26, "learning_rate": 8.384229402093368e-06, "loss": 1.8125, "step": 28764500 }, { "epoch": 83.26, "learning_rate": 8.383505754446092e-06, "loss": 1.838, "step": 28765000 }, { "epoch": 83.26, "learning_rate": 8.382782106798814e-06, "loss": 1.8367, "step": 28765500 }, { "epoch": 83.27, "learning_rate": 8.382059906446832e-06, "loss": 1.8144, "step": 28766000 }, { "epoch": 83.27, "learning_rate": 8.38133770609485e-06, "loss": 1.813, "step": 28766500 }, { "epoch": 83.27, "learning_rate": 8.380614058447573e-06, "loss": 1.8348, "step": 28767000 }, { "epoch": 83.27, "learning_rate": 8.379890410800297e-06, "loss": 1.8301, "step": 28767500 }, { "epoch": 83.27, "learning_rate": 8.379166763153019e-06, "loss": 1.8282, "step": 28768000 }, { "epoch": 83.27, "learning_rate": 8.378443115505743e-06, "loss": 1.8081, "step": 28768500 }, { "epoch": 83.27, "learning_rate": 8.377719467858467e-06, "loss": 1.8173, "step": 28769000 }, { "epoch": 83.28, "learning_rate": 8.376995820211189e-06, "loss": 1.8356, "step": 28769500 }, { "epoch": 83.28, "learning_rate": 8.376272172563913e-06, "loss": 1.8302, "step": 28770000 }, { "epoch": 83.28, "learning_rate": 8.375548524916637e-06, "loss": 1.8119, "step": 28770500 }, { "epoch": 83.28, "learning_rate": 8.37482487726936e-06, "loss": 1.8298, "step": 28771000 }, { "epoch": 83.28, "learning_rate": 8.374101229622083e-06, "loss": 1.8197, "step": 28771500 }, { "epoch": 83.28, "learning_rate": 8.373377581974805e-06, "loss": 1.8495, "step": 28772000 }, { "epoch": 83.28, "learning_rate": 8.372655381622824e-06, "loss": 1.7945, "step": 28772500 }, { "epoch": 83.29, "learning_rate": 8.371933181270842e-06, "loss": 1.8451, "step": 28773000 }, { "epoch": 83.29, "learning_rate": 8.371210980918859e-06, "loss": 1.8223, "step": 28773500 }, { "epoch": 83.29, "learning_rate": 8.370487333271583e-06, "loss": 1.8307, "step": 28774000 }, { "epoch": 83.29, "learning_rate": 8.369763685624307e-06, "loss": 1.8161, "step": 28774500 }, { "epoch": 83.29, "learning_rate": 8.369040037977029e-06, "loss": 1.829, "step": 28775000 }, { "epoch": 83.29, "learning_rate": 8.368316390329751e-06, "loss": 1.8446, "step": 28775500 }, { "epoch": 83.29, "learning_rate": 8.367592742682475e-06, "loss": 1.8248, "step": 28776000 }, { "epoch": 83.3, "learning_rate": 8.366869095035199e-06, "loss": 1.8326, "step": 28776500 }, { "epoch": 83.3, "learning_rate": 8.366146894683216e-06, "loss": 1.8366, "step": 28777000 }, { "epoch": 83.3, "learning_rate": 8.36542324703594e-06, "loss": 1.8463, "step": 28777500 }, { "epoch": 83.3, "learning_rate": 8.364699599388662e-06, "loss": 1.8301, "step": 28778000 }, { "epoch": 83.3, "learning_rate": 8.363975951741386e-06, "loss": 1.8253, "step": 28778500 }, { "epoch": 83.3, "learning_rate": 8.363252304094109e-06, "loss": 1.836, "step": 28779000 }, { "epoch": 83.3, "learning_rate": 8.362528656446832e-06, "loss": 1.8248, "step": 28779500 }, { "epoch": 83.31, "learning_rate": 8.361805008799556e-06, "loss": 1.8094, "step": 28780000 }, { "epoch": 83.31, "learning_rate": 8.361081361152279e-06, "loss": 1.8224, "step": 28780500 }, { "epoch": 83.31, "learning_rate": 8.360357713505003e-06, "loss": 1.8163, "step": 28781000 }, { "epoch": 83.31, "learning_rate": 8.35963551315302e-06, "loss": 1.8261, "step": 28781500 }, { "epoch": 83.31, "learning_rate": 8.358911865505744e-06, "loss": 1.8349, "step": 28782000 }, { "epoch": 83.31, "learning_rate": 8.358188217858466e-06, "loss": 1.8104, "step": 28782500 }, { "epoch": 83.32, "learning_rate": 8.35746457021119e-06, "loss": 1.8586, "step": 28783000 }, { "epoch": 83.32, "learning_rate": 8.356740922563914e-06, "loss": 1.8259, "step": 28783500 }, { "epoch": 83.32, "learning_rate": 8.356018722211931e-06, "loss": 1.8155, "step": 28784000 }, { "epoch": 83.32, "learning_rate": 8.355295074564653e-06, "loss": 1.8112, "step": 28784500 }, { "epoch": 83.32, "learning_rate": 8.354571426917377e-06, "loss": 1.802, "step": 28785000 }, { "epoch": 83.32, "learning_rate": 8.353847779270101e-06, "loss": 1.8261, "step": 28785500 }, { "epoch": 83.32, "learning_rate": 8.353124131622823e-06, "loss": 1.8077, "step": 28786000 }, { "epoch": 83.33, "learning_rate": 8.352403378566136e-06, "loss": 1.7971, "step": 28786500 }, { "epoch": 83.33, "learning_rate": 8.35167973091886e-06, "loss": 1.8151, "step": 28787000 }, { "epoch": 83.33, "learning_rate": 8.350956083271584e-06, "loss": 1.8239, "step": 28787500 }, { "epoch": 83.33, "learning_rate": 8.350232435624306e-06, "loss": 1.8187, "step": 28788000 }, { "epoch": 83.33, "learning_rate": 8.349508787977028e-06, "loss": 1.8502, "step": 28788500 }, { "epoch": 83.33, "learning_rate": 8.348785140329752e-06, "loss": 1.8255, "step": 28789000 }, { "epoch": 83.33, "learning_rate": 8.348061492682476e-06, "loss": 1.8353, "step": 28789500 }, { "epoch": 83.34, "learning_rate": 8.347337845035198e-06, "loss": 1.8262, "step": 28790000 }, { "epoch": 83.34, "learning_rate": 8.346615644683215e-06, "loss": 1.8205, "step": 28790500 }, { "epoch": 83.34, "learning_rate": 8.34589199703594e-06, "loss": 1.8002, "step": 28791000 }, { "epoch": 83.34, "learning_rate": 8.345168349388663e-06, "loss": 1.8387, "step": 28791500 }, { "epoch": 83.34, "learning_rate": 8.344444701741385e-06, "loss": 1.8057, "step": 28792000 }, { "epoch": 83.34, "learning_rate": 8.34372105409411e-06, "loss": 1.818, "step": 28792500 }, { "epoch": 83.34, "learning_rate": 8.342997406446833e-06, "loss": 1.8135, "step": 28793000 }, { "epoch": 83.35, "learning_rate": 8.34227520609485e-06, "loss": 1.8305, "step": 28793500 }, { "epoch": 83.35, "learning_rate": 8.341551558447573e-06, "loss": 1.7949, "step": 28794000 }, { "epoch": 83.35, "learning_rate": 8.340827910800297e-06, "loss": 1.8449, "step": 28794500 }, { "epoch": 83.35, "learning_rate": 8.340105710448316e-06, "loss": 1.8308, "step": 28795000 }, { "epoch": 83.35, "learning_rate": 8.339382062801038e-06, "loss": 1.8322, "step": 28795500 }, { "epoch": 83.35, "learning_rate": 8.33865841515376e-06, "loss": 1.8268, "step": 28796000 }, { "epoch": 83.35, "learning_rate": 8.337934767506484e-06, "loss": 1.8421, "step": 28796500 }, { "epoch": 83.36, "learning_rate": 8.337211119859208e-06, "loss": 1.8403, "step": 28797000 }, { "epoch": 83.36, "learning_rate": 8.33648747221193e-06, "loss": 1.823, "step": 28797500 }, { "epoch": 83.36, "learning_rate": 8.335763824564654e-06, "loss": 1.8173, "step": 28798000 }, { "epoch": 83.36, "learning_rate": 8.335040176917378e-06, "loss": 1.8309, "step": 28798500 }, { "epoch": 83.36, "learning_rate": 8.334317976565395e-06, "loss": 1.8388, "step": 28799000 }, { "epoch": 83.36, "learning_rate": 8.333594328918118e-06, "loss": 1.831, "step": 28799500 }, { "epoch": 83.36, "learning_rate": 8.332870681270841e-06, "loss": 1.8115, "step": 28800000 }, { "epoch": 83.37, "learning_rate": 8.332147033623565e-06, "loss": 1.819, "step": 28800500 }, { "epoch": 83.37, "learning_rate": 8.331423385976288e-06, "loss": 1.8343, "step": 28801000 }, { "epoch": 83.37, "learning_rate": 8.33069973832901e-06, "loss": 1.8129, "step": 28801500 }, { "epoch": 83.37, "learning_rate": 8.329976090681735e-06, "loss": 1.835, "step": 28802000 }, { "epoch": 83.37, "learning_rate": 8.329252443034458e-06, "loss": 1.8292, "step": 28802500 }, { "epoch": 83.37, "learning_rate": 8.328530242682475e-06, "loss": 1.8259, "step": 28803000 }, { "epoch": 83.37, "learning_rate": 8.327806595035197e-06, "loss": 1.8386, "step": 28803500 }, { "epoch": 83.38, "learning_rate": 8.327084394683216e-06, "loss": 1.853, "step": 28804000 }, { "epoch": 83.38, "learning_rate": 8.32636074703594e-06, "loss": 1.8247, "step": 28804500 }, { "epoch": 83.38, "learning_rate": 8.325637099388662e-06, "loss": 1.8168, "step": 28805000 }, { "epoch": 83.38, "learning_rate": 8.324913451741386e-06, "loss": 1.8183, "step": 28805500 }, { "epoch": 83.38, "learning_rate": 8.32418980409411e-06, "loss": 1.8354, "step": 28806000 }, { "epoch": 83.38, "learning_rate": 8.323466156446832e-06, "loss": 1.8462, "step": 28806500 }, { "epoch": 83.38, "learning_rate": 8.32274395609485e-06, "loss": 1.838, "step": 28807000 }, { "epoch": 83.39, "learning_rate": 8.322020308447574e-06, "loss": 1.8241, "step": 28807500 }, { "epoch": 83.39, "learning_rate": 8.321296660800297e-06, "loss": 1.816, "step": 28808000 }, { "epoch": 83.39, "learning_rate": 8.32057301315302e-06, "loss": 1.8443, "step": 28808500 }, { "epoch": 83.39, "learning_rate": 8.319849365505742e-06, "loss": 1.8436, "step": 28809000 }, { "epoch": 83.39, "learning_rate": 8.319125717858468e-06, "loss": 1.8194, "step": 28809500 }, { "epoch": 83.39, "learning_rate": 8.318403517506485e-06, "loss": 1.8342, "step": 28810000 }, { "epoch": 83.39, "learning_rate": 8.317679869859207e-06, "loss": 1.8208, "step": 28810500 }, { "epoch": 83.4, "learning_rate": 8.316957669507224e-06, "loss": 1.8128, "step": 28811000 }, { "epoch": 83.4, "learning_rate": 8.31623402185995e-06, "loss": 1.8281, "step": 28811500 }, { "epoch": 83.4, "learning_rate": 8.315510374212672e-06, "loss": 1.8153, "step": 28812000 }, { "epoch": 83.4, "learning_rate": 8.314786726565394e-06, "loss": 1.811, "step": 28812500 }, { "epoch": 83.4, "learning_rate": 8.314063078918118e-06, "loss": 1.8122, "step": 28813000 }, { "epoch": 83.4, "learning_rate": 8.313339431270842e-06, "loss": 1.8541, "step": 28813500 }, { "epoch": 83.4, "learning_rate": 8.312615783623564e-06, "loss": 1.8201, "step": 28814000 }, { "epoch": 83.41, "learning_rate": 8.311895030566877e-06, "loss": 1.8314, "step": 28814500 }, { "epoch": 83.41, "learning_rate": 8.311171382919599e-06, "loss": 1.8207, "step": 28815000 }, { "epoch": 83.41, "learning_rate": 8.310447735272325e-06, "loss": 1.8367, "step": 28815500 }, { "epoch": 83.41, "learning_rate": 8.309724087625047e-06, "loss": 1.8416, "step": 28816000 }, { "epoch": 83.41, "learning_rate": 8.309000439977769e-06, "loss": 1.8369, "step": 28816500 }, { "epoch": 83.41, "learning_rate": 8.308276792330493e-06, "loss": 1.8331, "step": 28817000 }, { "epoch": 83.41, "learning_rate": 8.307553144683217e-06, "loss": 1.8272, "step": 28817500 }, { "epoch": 83.42, "learning_rate": 8.30682949703594e-06, "loss": 1.8367, "step": 28818000 }, { "epoch": 83.42, "learning_rate": 8.306105849388663e-06, "loss": 1.8194, "step": 28818500 }, { "epoch": 83.42, "learning_rate": 8.305382201741387e-06, "loss": 1.8357, "step": 28819000 }, { "epoch": 83.42, "learning_rate": 8.30465855409411e-06, "loss": 1.8405, "step": 28819500 }, { "epoch": 83.42, "learning_rate": 8.303936353742127e-06, "loss": 1.8075, "step": 28820000 }, { "epoch": 83.42, "learning_rate": 8.30321270609485e-06, "loss": 1.8165, "step": 28820500 }, { "epoch": 83.42, "learning_rate": 8.302489058447574e-06, "loss": 1.8112, "step": 28821000 }, { "epoch": 83.43, "learning_rate": 8.301765410800297e-06, "loss": 1.8355, "step": 28821500 }, { "epoch": 83.43, "learning_rate": 8.301041763153019e-06, "loss": 1.8112, "step": 28822000 }, { "epoch": 83.43, "learning_rate": 8.300318115505744e-06, "loss": 1.7968, "step": 28822500 }, { "epoch": 83.43, "learning_rate": 8.299595915153762e-06, "loss": 1.8276, "step": 28823000 }, { "epoch": 83.43, "learning_rate": 8.298872267506484e-06, "loss": 1.8221, "step": 28823500 }, { "epoch": 83.43, "learning_rate": 8.298148619859208e-06, "loss": 1.8378, "step": 28824000 }, { "epoch": 83.44, "learning_rate": 8.297424972211932e-06, "loss": 1.8157, "step": 28824500 }, { "epoch": 83.44, "learning_rate": 8.296701324564654e-06, "loss": 1.7994, "step": 28825000 }, { "epoch": 83.44, "learning_rate": 8.295977676917376e-06, "loss": 1.8249, "step": 28825500 }, { "epoch": 83.44, "learning_rate": 8.2952540292701e-06, "loss": 1.7955, "step": 28826000 }, { "epoch": 83.44, "learning_rate": 8.294531828918119e-06, "loss": 1.8157, "step": 28826500 }, { "epoch": 83.44, "learning_rate": 8.293808181270841e-06, "loss": 1.7958, "step": 28827000 }, { "epoch": 83.44, "learning_rate": 8.293084533623564e-06, "loss": 1.8144, "step": 28827500 }, { "epoch": 83.45, "learning_rate": 8.292362333271583e-06, "loss": 1.8252, "step": 28828000 }, { "epoch": 83.45, "learning_rate": 8.291638685624306e-06, "loss": 1.8399, "step": 28828500 }, { "epoch": 83.45, "learning_rate": 8.290915037977029e-06, "loss": 1.8336, "step": 28829000 }, { "epoch": 83.45, "learning_rate": 8.290191390329753e-06, "loss": 1.8193, "step": 28829500 }, { "epoch": 83.45, "learning_rate": 8.289467742682477e-06, "loss": 1.839, "step": 28830000 }, { "epoch": 83.45, "learning_rate": 8.288744095035199e-06, "loss": 1.8552, "step": 28830500 }, { "epoch": 83.45, "learning_rate": 8.288020447387921e-06, "loss": 1.8087, "step": 28831000 }, { "epoch": 83.46, "learning_rate": 8.287296799740645e-06, "loss": 1.8302, "step": 28831500 }, { "epoch": 83.46, "learning_rate": 8.286573152093369e-06, "loss": 1.7973, "step": 28832000 }, { "epoch": 83.46, "learning_rate": 8.285849504446091e-06, "loss": 1.8144, "step": 28832500 }, { "epoch": 83.46, "learning_rate": 8.285125856798815e-06, "loss": 1.8402, "step": 28833000 }, { "epoch": 83.46, "learning_rate": 8.284402209151539e-06, "loss": 1.8143, "step": 28833500 }, { "epoch": 83.46, "learning_rate": 8.283678561504261e-06, "loss": 1.8508, "step": 28834000 }, { "epoch": 83.46, "learning_rate": 8.282956361152278e-06, "loss": 1.8576, "step": 28834500 }, { "epoch": 83.47, "learning_rate": 8.282232713505002e-06, "loss": 1.8521, "step": 28835000 }, { "epoch": 83.47, "learning_rate": 8.281509065857726e-06, "loss": 1.8471, "step": 28835500 }, { "epoch": 83.47, "learning_rate": 8.280786865505744e-06, "loss": 1.8467, "step": 28836000 }, { "epoch": 83.47, "learning_rate": 8.280063217858466e-06, "loss": 1.8143, "step": 28836500 }, { "epoch": 83.47, "learning_rate": 8.27933957021119e-06, "loss": 1.8256, "step": 28837000 }, { "epoch": 83.47, "learning_rate": 8.278615922563914e-06, "loss": 1.8411, "step": 28837500 }, { "epoch": 83.47, "learning_rate": 8.277892274916636e-06, "loss": 1.8224, "step": 28838000 }, { "epoch": 83.48, "learning_rate": 8.27716862726936e-06, "loss": 1.8342, "step": 28838500 }, { "epoch": 83.48, "learning_rate": 8.276444979622084e-06, "loss": 1.8403, "step": 28839000 }, { "epoch": 83.48, "learning_rate": 8.275721331974806e-06, "loss": 1.8142, "step": 28839500 }, { "epoch": 83.48, "learning_rate": 8.274997684327528e-06, "loss": 1.8208, "step": 28840000 }, { "epoch": 83.48, "learning_rate": 8.274274036680252e-06, "loss": 1.8275, "step": 28840500 }, { "epoch": 83.48, "learning_rate": 8.273551836328271e-06, "loss": 1.8426, "step": 28841000 }, { "epoch": 83.48, "learning_rate": 8.272828188680993e-06, "loss": 1.8418, "step": 28841500 }, { "epoch": 83.49, "learning_rate": 8.272104541033717e-06, "loss": 1.8566, "step": 28842000 }, { "epoch": 83.49, "learning_rate": 8.27138089338644e-06, "loss": 1.8223, "step": 28842500 }, { "epoch": 83.49, "learning_rate": 8.270657245739163e-06, "loss": 1.8353, "step": 28843000 }, { "epoch": 83.49, "learning_rate": 8.26993504538718e-06, "loss": 1.8264, "step": 28843500 }, { "epoch": 83.49, "learning_rate": 8.269211397739905e-06, "loss": 1.8095, "step": 28844000 }, { "epoch": 83.49, "learning_rate": 8.268487750092627e-06, "loss": 1.8084, "step": 28844500 }, { "epoch": 83.49, "learning_rate": 8.267765549740646e-06, "loss": 1.8199, "step": 28845000 }, { "epoch": 83.5, "learning_rate": 8.267041902093368e-06, "loss": 1.8022, "step": 28845500 }, { "epoch": 83.5, "learning_rate": 8.266319701741385e-06, "loss": 1.8397, "step": 28846000 }, { "epoch": 83.5, "learning_rate": 8.26559605409411e-06, "loss": 1.8192, "step": 28846500 }, { "epoch": 83.5, "learning_rate": 8.264872406446833e-06, "loss": 1.8018, "step": 28847000 }, { "epoch": 83.5, "learning_rate": 8.264148758799555e-06, "loss": 1.8254, "step": 28847500 }, { "epoch": 83.5, "learning_rate": 8.26342511115228e-06, "loss": 1.8417, "step": 28848000 }, { "epoch": 83.5, "learning_rate": 8.262701463505003e-06, "loss": 1.8113, "step": 28848500 }, { "epoch": 83.51, "learning_rate": 8.26197926315302e-06, "loss": 1.8183, "step": 28849000 }, { "epoch": 83.51, "learning_rate": 8.261255615505743e-06, "loss": 1.8276, "step": 28849500 }, { "epoch": 83.51, "learning_rate": 8.260531967858467e-06, "loss": 1.8191, "step": 28850000 }, { "epoch": 83.51, "learning_rate": 8.25980832021119e-06, "loss": 1.8255, "step": 28850500 }, { "epoch": 83.51, "learning_rate": 8.259086119859208e-06, "loss": 1.8124, "step": 28851000 }, { "epoch": 83.51, "learning_rate": 8.25836247221193e-06, "loss": 1.8569, "step": 28851500 }, { "epoch": 83.51, "learning_rate": 8.257638824564654e-06, "loss": 1.8359, "step": 28852000 }, { "epoch": 83.52, "learning_rate": 8.256915176917378e-06, "loss": 1.8226, "step": 28852500 }, { "epoch": 83.52, "learning_rate": 8.2561915292701e-06, "loss": 1.8458, "step": 28853000 }, { "epoch": 83.52, "learning_rate": 8.255467881622824e-06, "loss": 1.8152, "step": 28853500 }, { "epoch": 83.52, "learning_rate": 8.254744233975546e-06, "loss": 1.8116, "step": 28854000 }, { "epoch": 83.52, "learning_rate": 8.25402058632827e-06, "loss": 1.8134, "step": 28854500 }, { "epoch": 83.52, "learning_rate": 8.253296938680994e-06, "loss": 1.8233, "step": 28855000 }, { "epoch": 83.52, "learning_rate": 8.252573291033716e-06, "loss": 1.8454, "step": 28855500 }, { "epoch": 83.53, "learning_rate": 8.25184964338644e-06, "loss": 1.8223, "step": 28856000 }, { "epoch": 83.53, "learning_rate": 8.251125995739162e-06, "loss": 1.8149, "step": 28856500 }, { "epoch": 83.53, "learning_rate": 8.250402348091886e-06, "loss": 1.8222, "step": 28857000 }, { "epoch": 83.53, "learning_rate": 8.249678700444609e-06, "loss": 1.8363, "step": 28857500 }, { "epoch": 83.53, "learning_rate": 8.248955052797333e-06, "loss": 1.8177, "step": 28858000 }, { "epoch": 83.53, "learning_rate": 8.248231405150057e-06, "loss": 1.8299, "step": 28858500 }, { "epoch": 83.53, "learning_rate": 8.247507757502779e-06, "loss": 1.8311, "step": 28859000 }, { "epoch": 83.54, "learning_rate": 8.246784109855503e-06, "loss": 1.8036, "step": 28859500 }, { "epoch": 83.54, "learning_rate": 8.24606190950352e-06, "loss": 1.8385, "step": 28860000 }, { "epoch": 83.54, "learning_rate": 8.245338261856244e-06, "loss": 1.8164, "step": 28860500 }, { "epoch": 83.54, "learning_rate": 8.244614614208966e-06, "loss": 1.8106, "step": 28861000 }, { "epoch": 83.54, "learning_rate": 8.24389096656169e-06, "loss": 1.833, "step": 28861500 }, { "epoch": 83.54, "learning_rate": 8.243167318914414e-06, "loss": 1.8298, "step": 28862000 }, { "epoch": 83.55, "learning_rate": 8.242443671267136e-06, "loss": 1.8026, "step": 28862500 }, { "epoch": 83.55, "learning_rate": 8.241721470915153e-06, "loss": 1.8131, "step": 28863000 }, { "epoch": 83.55, "learning_rate": 8.240997823267877e-06, "loss": 1.8207, "step": 28863500 }, { "epoch": 83.55, "learning_rate": 8.240274175620601e-06, "loss": 1.8436, "step": 28864000 }, { "epoch": 83.55, "learning_rate": 8.239550527973324e-06, "loss": 1.8199, "step": 28864500 }, { "epoch": 83.55, "learning_rate": 8.238826880326047e-06, "loss": 1.83, "step": 28865000 }, { "epoch": 83.55, "learning_rate": 8.238104679974065e-06, "loss": 1.8337, "step": 28865500 }, { "epoch": 83.56, "learning_rate": 8.237381032326789e-06, "loss": 1.8138, "step": 28866000 }, { "epoch": 83.56, "learning_rate": 8.236658831974806e-06, "loss": 1.8212, "step": 28866500 }, { "epoch": 83.56, "learning_rate": 8.235935184327528e-06, "loss": 1.8386, "step": 28867000 }, { "epoch": 83.56, "learning_rate": 8.235211536680252e-06, "loss": 1.8291, "step": 28867500 }, { "epoch": 83.56, "learning_rate": 8.234487889032976e-06, "loss": 1.8099, "step": 28868000 }, { "epoch": 83.56, "learning_rate": 8.233764241385698e-06, "loss": 1.8138, "step": 28868500 }, { "epoch": 83.56, "learning_rate": 8.233040593738422e-06, "loss": 1.835, "step": 28869000 }, { "epoch": 83.57, "learning_rate": 8.232316946091146e-06, "loss": 1.8366, "step": 28869500 }, { "epoch": 83.57, "learning_rate": 8.231594745739163e-06, "loss": 1.8045, "step": 28870000 }, { "epoch": 83.57, "learning_rate": 8.230871098091886e-06, "loss": 1.8406, "step": 28870500 }, { "epoch": 83.57, "learning_rate": 8.23014745044461e-06, "loss": 1.8256, "step": 28871000 }, { "epoch": 83.57, "learning_rate": 8.229423802797333e-06, "loss": 1.8057, "step": 28871500 }, { "epoch": 83.57, "learning_rate": 8.228700155150056e-06, "loss": 1.8277, "step": 28872000 }, { "epoch": 83.57, "learning_rate": 8.22797650750278e-06, "loss": 1.8133, "step": 28872500 }, { "epoch": 83.58, "learning_rate": 8.227252859855503e-06, "loss": 1.8114, "step": 28873000 }, { "epoch": 83.58, "learning_rate": 8.226529212208226e-06, "loss": 1.8029, "step": 28873500 }, { "epoch": 83.58, "learning_rate": 8.225805564560948e-06, "loss": 1.8352, "step": 28874000 }, { "epoch": 83.58, "learning_rate": 8.225083364208967e-06, "loss": 1.8171, "step": 28874500 }, { "epoch": 83.58, "learning_rate": 8.22435971656169e-06, "loss": 1.8478, "step": 28875000 }, { "epoch": 83.58, "learning_rate": 8.223637516209708e-06, "loss": 1.8568, "step": 28875500 }, { "epoch": 83.58, "learning_rate": 8.22291386856243e-06, "loss": 1.7938, "step": 28876000 }, { "epoch": 83.59, "learning_rate": 8.222190220915154e-06, "loss": 1.8068, "step": 28876500 }, { "epoch": 83.59, "learning_rate": 8.221466573267878e-06, "loss": 1.8431, "step": 28877000 }, { "epoch": 83.59, "learning_rate": 8.2207429256206e-06, "loss": 1.8298, "step": 28877500 }, { "epoch": 83.59, "learning_rate": 8.220019277973323e-06, "loss": 1.8458, "step": 28878000 }, { "epoch": 83.59, "learning_rate": 8.219295630326047e-06, "loss": 1.842, "step": 28878500 }, { "epoch": 83.59, "learning_rate": 8.21857198267877e-06, "loss": 1.8392, "step": 28879000 }, { "epoch": 83.59, "learning_rate": 8.217848335031493e-06, "loss": 1.8292, "step": 28879500 }, { "epoch": 83.6, "learning_rate": 8.217126134679512e-06, "loss": 1.8356, "step": 28880000 }, { "epoch": 83.6, "learning_rate": 8.216402487032236e-06, "loss": 1.8057, "step": 28880500 }, { "epoch": 83.6, "learning_rate": 8.215678839384958e-06, "loss": 1.8142, "step": 28881000 }, { "epoch": 83.6, "learning_rate": 8.21495519173768e-06, "loss": 1.8361, "step": 28881500 }, { "epoch": 83.6, "learning_rate": 8.214231544090404e-06, "loss": 1.8351, "step": 28882000 }, { "epoch": 83.6, "learning_rate": 8.213507896443128e-06, "loss": 1.8314, "step": 28882500 }, { "epoch": 83.6, "learning_rate": 8.21278424879585e-06, "loss": 1.8235, "step": 28883000 }, { "epoch": 83.61, "learning_rate": 8.212062048443867e-06, "loss": 1.8124, "step": 28883500 }, { "epoch": 83.61, "learning_rate": 8.211338400796591e-06, "loss": 1.8158, "step": 28884000 }, { "epoch": 83.61, "learning_rate": 8.210614753149315e-06, "loss": 1.8043, "step": 28884500 }, { "epoch": 83.61, "learning_rate": 8.209891105502037e-06, "loss": 1.8534, "step": 28885000 }, { "epoch": 83.61, "learning_rate": 8.209167457854761e-06, "loss": 1.8139, "step": 28885500 }, { "epoch": 83.61, "learning_rate": 8.20844525750278e-06, "loss": 1.8235, "step": 28886000 }, { "epoch": 83.61, "learning_rate": 8.207721609855503e-06, "loss": 1.8196, "step": 28886500 }, { "epoch": 83.62, "learning_rate": 8.206997962208225e-06, "loss": 1.8198, "step": 28887000 }, { "epoch": 83.62, "learning_rate": 8.206274314560949e-06, "loss": 1.8348, "step": 28887500 }, { "epoch": 83.62, "learning_rate": 8.205550666913673e-06, "loss": 1.8204, "step": 28888000 }, { "epoch": 83.62, "learning_rate": 8.204827019266395e-06, "loss": 1.8461, "step": 28888500 }, { "epoch": 83.62, "learning_rate": 8.204103371619117e-06, "loss": 1.8193, "step": 28889000 }, { "epoch": 83.62, "learning_rate": 8.203379723971843e-06, "loss": 1.8527, "step": 28889500 }, { "epoch": 83.62, "learning_rate": 8.20265752361986e-06, "loss": 1.8107, "step": 28890000 }, { "epoch": 83.63, "learning_rate": 8.201933875972582e-06, "loss": 1.8462, "step": 28890500 }, { "epoch": 83.63, "learning_rate": 8.201210228325306e-06, "loss": 1.8215, "step": 28891000 }, { "epoch": 83.63, "learning_rate": 8.20048658067803e-06, "loss": 1.8338, "step": 28891500 }, { "epoch": 83.63, "learning_rate": 8.199764380326047e-06, "loss": 1.8145, "step": 28892000 }, { "epoch": 83.63, "learning_rate": 8.19904073267877e-06, "loss": 1.8226, "step": 28892500 }, { "epoch": 83.63, "learning_rate": 8.198317085031493e-06, "loss": 1.838, "step": 28893000 }, { "epoch": 83.63, "learning_rate": 8.197593437384217e-06, "loss": 1.8317, "step": 28893500 }, { "epoch": 83.64, "learning_rate": 8.19686978973694e-06, "loss": 1.8192, "step": 28894000 }, { "epoch": 83.64, "learning_rate": 8.196146142089662e-06, "loss": 1.8143, "step": 28894500 }, { "epoch": 83.64, "learning_rate": 8.19542394173768e-06, "loss": 1.8195, "step": 28895000 }, { "epoch": 83.64, "learning_rate": 8.1947017413857e-06, "loss": 1.8432, "step": 28895500 }, { "epoch": 83.64, "learning_rate": 8.193978093738422e-06, "loss": 1.8414, "step": 28896000 }, { "epoch": 83.64, "learning_rate": 8.193254446091144e-06, "loss": 1.8518, "step": 28896500 }, { "epoch": 83.64, "learning_rate": 8.192530798443868e-06, "loss": 1.8295, "step": 28897000 }, { "epoch": 83.65, "learning_rate": 8.191807150796592e-06, "loss": 1.8313, "step": 28897500 }, { "epoch": 83.65, "learning_rate": 8.19108495044461e-06, "loss": 1.8337, "step": 28898000 }, { "epoch": 83.65, "learning_rate": 8.190361302797332e-06, "loss": 1.8242, "step": 28898500 }, { "epoch": 83.65, "learning_rate": 8.189637655150057e-06, "loss": 1.819, "step": 28899000 }, { "epoch": 83.65, "learning_rate": 8.18891400750278e-06, "loss": 1.8323, "step": 28899500 }, { "epoch": 83.65, "learning_rate": 8.188190359855502e-06, "loss": 1.8409, "step": 28900000 }, { "epoch": 83.66, "learning_rate": 8.187466712208226e-06, "loss": 1.8391, "step": 28900500 }, { "epoch": 83.66, "learning_rate": 8.18674306456095e-06, "loss": 1.8579, "step": 28901000 }, { "epoch": 83.66, "learning_rate": 8.186019416913672e-06, "loss": 1.8324, "step": 28901500 }, { "epoch": 83.66, "learning_rate": 8.185295769266394e-06, "loss": 1.8136, "step": 28902000 }, { "epoch": 83.66, "learning_rate": 8.184573568914413e-06, "loss": 1.8346, "step": 28902500 }, { "epoch": 83.66, "learning_rate": 8.183849921267137e-06, "loss": 1.7921, "step": 28903000 }, { "epoch": 83.66, "learning_rate": 8.183126273619859e-06, "loss": 1.8081, "step": 28903500 }, { "epoch": 83.67, "learning_rate": 8.182402625972583e-06, "loss": 1.8258, "step": 28904000 }, { "epoch": 83.67, "learning_rate": 8.181678978325307e-06, "loss": 1.8249, "step": 28904500 }, { "epoch": 83.67, "learning_rate": 8.18095533067803e-06, "loss": 1.8294, "step": 28905000 }, { "epoch": 83.67, "learning_rate": 8.180233130326046e-06, "loss": 1.816, "step": 28905500 }, { "epoch": 83.67, "learning_rate": 8.17950948267877e-06, "loss": 1.8213, "step": 28906000 }, { "epoch": 83.67, "learning_rate": 8.178785835031494e-06, "loss": 1.8545, "step": 28906500 }, { "epoch": 83.67, "learning_rate": 8.178062187384217e-06, "loss": 1.8137, "step": 28907000 }, { "epoch": 83.68, "learning_rate": 8.177338539736939e-06, "loss": 1.8322, "step": 28907500 }, { "epoch": 83.68, "learning_rate": 8.176614892089664e-06, "loss": 1.8167, "step": 28908000 }, { "epoch": 83.68, "learning_rate": 8.175892691737682e-06, "loss": 1.8411, "step": 28908500 }, { "epoch": 83.68, "learning_rate": 8.175169044090404e-06, "loss": 1.8048, "step": 28909000 }, { "epoch": 83.68, "learning_rate": 8.174445396443128e-06, "loss": 1.8285, "step": 28909500 }, { "epoch": 83.68, "learning_rate": 8.173721748795852e-06, "loss": 1.8518, "step": 28910000 }, { "epoch": 83.68, "learning_rate": 8.172999548443869e-06, "loss": 1.8111, "step": 28910500 }, { "epoch": 83.69, "learning_rate": 8.172275900796591e-06, "loss": 1.8222, "step": 28911000 }, { "epoch": 83.69, "learning_rate": 8.171552253149315e-06, "loss": 1.8391, "step": 28911500 }, { "epoch": 83.69, "learning_rate": 8.170830052797334e-06, "loss": 1.8162, "step": 28912000 }, { "epoch": 83.69, "learning_rate": 8.170106405150056e-06, "loss": 1.8264, "step": 28912500 }, { "epoch": 83.69, "learning_rate": 8.169382757502779e-06, "loss": 1.7986, "step": 28913000 }, { "epoch": 83.69, "learning_rate": 8.168659109855502e-06, "loss": 1.8441, "step": 28913500 }, { "epoch": 83.69, "learning_rate": 8.167935462208226e-06, "loss": 1.8024, "step": 28914000 }, { "epoch": 83.7, "learning_rate": 8.167211814560949e-06, "loss": 1.8381, "step": 28914500 }, { "epoch": 83.7, "learning_rate": 8.166489614208966e-06, "loss": 1.8331, "step": 28915000 }, { "epoch": 83.7, "learning_rate": 8.16576596656169e-06, "loss": 1.8446, "step": 28915500 }, { "epoch": 83.7, "learning_rate": 8.165042318914414e-06, "loss": 1.8528, "step": 28916000 }, { "epoch": 83.7, "learning_rate": 8.164318671267136e-06, "loss": 1.8181, "step": 28916500 }, { "epoch": 83.7, "learning_rate": 8.16359502361986e-06, "loss": 1.8185, "step": 28917000 }, { "epoch": 83.7, "learning_rate": 8.162871375972584e-06, "loss": 1.8254, "step": 28917500 }, { "epoch": 83.71, "learning_rate": 8.162147728325306e-06, "loss": 1.8412, "step": 28918000 }, { "epoch": 83.71, "learning_rate": 8.161425527973323e-06, "loss": 1.8276, "step": 28918500 }, { "epoch": 83.71, "learning_rate": 8.16070332762134e-06, "loss": 1.8184, "step": 28919000 }, { "epoch": 83.71, "learning_rate": 8.159979679974066e-06, "loss": 1.8396, "step": 28919500 }, { "epoch": 83.71, "learning_rate": 8.159256032326788e-06, "loss": 1.7902, "step": 28920000 }, { "epoch": 83.71, "learning_rate": 8.15853238467951e-06, "loss": 1.8336, "step": 28920500 }, { "epoch": 83.71, "learning_rate": 8.157808737032235e-06, "loss": 1.8236, "step": 28921000 }, { "epoch": 83.72, "learning_rate": 8.157085089384958e-06, "loss": 1.842, "step": 28921500 }, { "epoch": 83.72, "learning_rate": 8.15636144173768e-06, "loss": 1.8364, "step": 28922000 }, { "epoch": 83.72, "learning_rate": 8.155637794090405e-06, "loss": 1.8129, "step": 28922500 }, { "epoch": 83.72, "learning_rate": 8.154914146443129e-06, "loss": 1.8321, "step": 28923000 }, { "epoch": 83.72, "learning_rate": 8.154191946091146e-06, "loss": 1.8279, "step": 28923500 }, { "epoch": 83.72, "learning_rate": 8.153468298443868e-06, "loss": 1.8326, "step": 28924000 }, { "epoch": 83.72, "learning_rate": 8.152744650796592e-06, "loss": 1.828, "step": 28924500 }, { "epoch": 83.73, "learning_rate": 8.152021003149316e-06, "loss": 1.8348, "step": 28925000 }, { "epoch": 83.73, "learning_rate": 8.151297355502038e-06, "loss": 1.8429, "step": 28925500 }, { "epoch": 83.73, "learning_rate": 8.15057370785476e-06, "loss": 1.8037, "step": 28926000 }, { "epoch": 83.73, "learning_rate": 8.149850060207484e-06, "loss": 1.8138, "step": 28926500 }, { "epoch": 83.73, "learning_rate": 8.149127859855503e-06, "loss": 1.8102, "step": 28927000 }, { "epoch": 83.73, "learning_rate": 8.148404212208225e-06, "loss": 1.8209, "step": 28927500 }, { "epoch": 83.73, "learning_rate": 8.14768056456095e-06, "loss": 1.8388, "step": 28928000 }, { "epoch": 83.74, "learning_rate": 8.146956916913673e-06, "loss": 1.8326, "step": 28928500 }, { "epoch": 83.74, "learning_rate": 8.146233269266396e-06, "loss": 1.8183, "step": 28929000 }, { "epoch": 83.74, "learning_rate": 8.145509621619118e-06, "loss": 1.8171, "step": 28929500 }, { "epoch": 83.74, "learning_rate": 8.144785973971842e-06, "loss": 1.8164, "step": 28930000 }, { "epoch": 83.74, "learning_rate": 8.144062326324566e-06, "loss": 1.8176, "step": 28930500 }, { "epoch": 83.74, "learning_rate": 8.143341573267878e-06, "loss": 1.8211, "step": 28931000 }, { "epoch": 83.74, "learning_rate": 8.1426179256206e-06, "loss": 1.8181, "step": 28931500 }, { "epoch": 83.75, "learning_rate": 8.141894277973324e-06, "loss": 1.8098, "step": 28932000 }, { "epoch": 83.75, "learning_rate": 8.141170630326048e-06, "loss": 1.8437, "step": 28932500 }, { "epoch": 83.75, "learning_rate": 8.14044698267877e-06, "loss": 1.8254, "step": 28933000 }, { "epoch": 83.75, "learning_rate": 8.139723335031492e-06, "loss": 1.8154, "step": 28933500 }, { "epoch": 83.75, "learning_rate": 8.138999687384216e-06, "loss": 1.8376, "step": 28934000 }, { "epoch": 83.75, "learning_rate": 8.13827603973694e-06, "loss": 1.8048, "step": 28934500 }, { "epoch": 83.75, "learning_rate": 8.137552392089663e-06, "loss": 1.8286, "step": 28935000 }, { "epoch": 83.76, "learning_rate": 8.136828744442387e-06, "loss": 1.8495, "step": 28935500 }, { "epoch": 83.76, "learning_rate": 8.13610509679511e-06, "loss": 1.8313, "step": 28936000 }, { "epoch": 83.76, "learning_rate": 8.135381449147833e-06, "loss": 1.8011, "step": 28936500 }, { "epoch": 83.76, "learning_rate": 8.134660696091145e-06, "loss": 1.8255, "step": 28937000 }, { "epoch": 83.76, "learning_rate": 8.133937048443869e-06, "loss": 1.8459, "step": 28937500 }, { "epoch": 83.76, "learning_rate": 8.133213400796593e-06, "loss": 1.8092, "step": 28938000 }, { "epoch": 83.77, "learning_rate": 8.132489753149315e-06, "loss": 1.8357, "step": 28938500 }, { "epoch": 83.77, "learning_rate": 8.131766105502037e-06, "loss": 1.838, "step": 28939000 }, { "epoch": 83.77, "learning_rate": 8.131043905150056e-06, "loss": 1.8221, "step": 28939500 }, { "epoch": 83.77, "learning_rate": 8.13032025750278e-06, "loss": 1.8382, "step": 28940000 }, { "epoch": 83.77, "learning_rate": 8.129596609855502e-06, "loss": 1.8248, "step": 28940500 }, { "epoch": 83.77, "learning_rate": 8.128872962208226e-06, "loss": 1.8379, "step": 28941000 }, { "epoch": 83.77, "learning_rate": 8.128149314560949e-06, "loss": 1.7974, "step": 28941500 }, { "epoch": 83.78, "learning_rate": 8.12742856150426e-06, "loss": 1.8642, "step": 28942000 }, { "epoch": 83.78, "learning_rate": 8.126704913856985e-06, "loss": 1.8268, "step": 28942500 }, { "epoch": 83.78, "learning_rate": 8.125981266209707e-06, "loss": 1.8203, "step": 28943000 }, { "epoch": 83.78, "learning_rate": 8.125257618562431e-06, "loss": 1.8108, "step": 28943500 }, { "epoch": 83.78, "learning_rate": 8.124533970915155e-06, "loss": 1.8111, "step": 28944000 }, { "epoch": 83.78, "learning_rate": 8.123810323267877e-06, "loss": 1.8575, "step": 28944500 }, { "epoch": 83.78, "learning_rate": 8.123088122915894e-06, "loss": 1.8341, "step": 28945000 }, { "epoch": 83.79, "learning_rate": 8.122364475268618e-06, "loss": 1.8046, "step": 28945500 }, { "epoch": 83.79, "learning_rate": 8.121640827621342e-06, "loss": 1.8225, "step": 28946000 }, { "epoch": 83.79, "learning_rate": 8.120917179974064e-06, "loss": 1.8425, "step": 28946500 }, { "epoch": 83.79, "learning_rate": 8.120193532326788e-06, "loss": 1.8105, "step": 28947000 }, { "epoch": 83.79, "learning_rate": 8.119469884679512e-06, "loss": 1.8428, "step": 28947500 }, { "epoch": 83.79, "learning_rate": 8.118746237032234e-06, "loss": 1.8356, "step": 28948000 }, { "epoch": 83.79, "learning_rate": 8.118022589384958e-06, "loss": 1.8165, "step": 28948500 }, { "epoch": 83.8, "learning_rate": 8.117300389032976e-06, "loss": 1.8218, "step": 28949000 }, { "epoch": 83.8, "learning_rate": 8.116578188680993e-06, "loss": 1.8257, "step": 28949500 }, { "epoch": 83.8, "learning_rate": 8.115854541033717e-06, "loss": 1.8004, "step": 28950000 }, { "epoch": 83.8, "learning_rate": 8.115130893386439e-06, "loss": 1.8309, "step": 28950500 }, { "epoch": 83.8, "learning_rate": 8.114408693034458e-06, "loss": 1.8349, "step": 28951000 }, { "epoch": 83.8, "learning_rate": 8.11368504538718e-06, "loss": 1.8377, "step": 28951500 }, { "epoch": 83.8, "learning_rate": 8.112961397739904e-06, "loss": 1.8366, "step": 28952000 }, { "epoch": 83.81, "learning_rate": 8.112237750092626e-06, "loss": 1.8294, "step": 28952500 }, { "epoch": 83.81, "learning_rate": 8.11151410244535e-06, "loss": 1.824, "step": 28953000 }, { "epoch": 83.81, "learning_rate": 8.110790454798074e-06, "loss": 1.8451, "step": 28953500 }, { "epoch": 83.81, "learning_rate": 8.110066807150796e-06, "loss": 1.82, "step": 28954000 }, { "epoch": 83.81, "learning_rate": 8.10934315950352e-06, "loss": 1.799, "step": 28954500 }, { "epoch": 83.81, "learning_rate": 8.108619511856244e-06, "loss": 1.8162, "step": 28955000 }, { "epoch": 83.81, "learning_rate": 8.107895864208967e-06, "loss": 1.8465, "step": 28955500 }, { "epoch": 83.82, "learning_rate": 8.10717221656169e-06, "loss": 1.8423, "step": 28956000 }, { "epoch": 83.82, "learning_rate": 8.106448568914413e-06, "loss": 1.8332, "step": 28956500 }, { "epoch": 83.82, "learning_rate": 8.105726368562432e-06, "loss": 1.8327, "step": 28957000 }, { "epoch": 83.82, "learning_rate": 8.105002720915154e-06, "loss": 1.8391, "step": 28957500 }, { "epoch": 83.82, "learning_rate": 8.104279073267878e-06, "loss": 1.8462, "step": 28958000 }, { "epoch": 83.82, "learning_rate": 8.1035554256206e-06, "loss": 1.8234, "step": 28958500 }, { "epoch": 83.82, "learning_rate": 8.102833225268619e-06, "loss": 1.8208, "step": 28959000 }, { "epoch": 83.83, "learning_rate": 8.102109577621341e-06, "loss": 1.8383, "step": 28959500 }, { "epoch": 83.83, "learning_rate": 8.10138737726936e-06, "loss": 1.842, "step": 28960000 }, { "epoch": 83.83, "learning_rate": 8.100663729622082e-06, "loss": 1.8432, "step": 28960500 }, { "epoch": 83.83, "learning_rate": 8.099940081974806e-06, "loss": 1.8374, "step": 28961000 }, { "epoch": 83.83, "learning_rate": 8.099216434327529e-06, "loss": 1.8324, "step": 28961500 }, { "epoch": 83.83, "learning_rate": 8.098492786680253e-06, "loss": 1.8186, "step": 28962000 }, { "epoch": 83.83, "learning_rate": 8.097769139032975e-06, "loss": 1.8469, "step": 28962500 }, { "epoch": 83.84, "learning_rate": 8.097045491385699e-06, "loss": 1.8448, "step": 28963000 }, { "epoch": 83.84, "learning_rate": 8.096321843738423e-06, "loss": 1.8092, "step": 28963500 }, { "epoch": 83.84, "learning_rate": 8.095598196091145e-06, "loss": 1.8241, "step": 28964000 }, { "epoch": 83.84, "learning_rate": 8.094874548443869e-06, "loss": 1.8218, "step": 28964500 }, { "epoch": 83.84, "learning_rate": 8.094150900796591e-06, "loss": 1.8467, "step": 28965000 }, { "epoch": 83.84, "learning_rate": 8.093427253149315e-06, "loss": 1.8508, "step": 28965500 }, { "epoch": 83.84, "learning_rate": 8.092705052797332e-06, "loss": 1.8471, "step": 28966000 }, { "epoch": 83.85, "learning_rate": 8.091981405150056e-06, "loss": 1.8202, "step": 28966500 }, { "epoch": 83.85, "learning_rate": 8.09125775750278e-06, "loss": 1.85, "step": 28967000 }, { "epoch": 83.85, "learning_rate": 8.090534109855502e-06, "loss": 1.8243, "step": 28967500 }, { "epoch": 83.85, "learning_rate": 8.089810462208226e-06, "loss": 1.826, "step": 28968000 }, { "epoch": 83.85, "learning_rate": 8.089086814560948e-06, "loss": 1.8422, "step": 28968500 }, { "epoch": 83.85, "learning_rate": 8.088363166913672e-06, "loss": 1.8255, "step": 28969000 }, { "epoch": 83.85, "learning_rate": 8.087639519266395e-06, "loss": 1.8569, "step": 28969500 }, { "epoch": 83.86, "learning_rate": 8.086917318914414e-06, "loss": 1.8037, "step": 28970000 }, { "epoch": 83.86, "learning_rate": 8.08619511856243e-06, "loss": 1.8589, "step": 28970500 }, { "epoch": 83.86, "learning_rate": 8.085471470915155e-06, "loss": 1.8252, "step": 28971000 }, { "epoch": 83.86, "learning_rate": 8.084747823267877e-06, "loss": 1.8265, "step": 28971500 }, { "epoch": 83.86, "learning_rate": 8.084024175620601e-06, "loss": 1.8207, "step": 28972000 }, { "epoch": 83.86, "learning_rate": 8.083300527973325e-06, "loss": 1.8148, "step": 28972500 }, { "epoch": 83.86, "learning_rate": 8.082578327621342e-06, "loss": 1.8224, "step": 28973000 }, { "epoch": 83.87, "learning_rate": 8.081854679974064e-06, "loss": 1.8389, "step": 28973500 }, { "epoch": 83.87, "learning_rate": 8.081131032326788e-06, "loss": 1.8339, "step": 28974000 }, { "epoch": 83.87, "learning_rate": 8.080407384679512e-06, "loss": 1.8205, "step": 28974500 }, { "epoch": 83.87, "learning_rate": 8.07968518432753e-06, "loss": 1.8115, "step": 28975000 }, { "epoch": 83.87, "learning_rate": 8.078961536680252e-06, "loss": 1.8407, "step": 28975500 }, { "epoch": 83.87, "learning_rate": 8.078237889032976e-06, "loss": 1.8006, "step": 28976000 }, { "epoch": 83.88, "learning_rate": 8.077515688680993e-06, "loss": 1.8429, "step": 28976500 }, { "epoch": 83.88, "learning_rate": 8.076792041033717e-06, "loss": 1.8381, "step": 28977000 }, { "epoch": 83.88, "learning_rate": 8.076068393386439e-06, "loss": 1.8108, "step": 28977500 }, { "epoch": 83.88, "learning_rate": 8.075344745739163e-06, "loss": 1.8248, "step": 28978000 }, { "epoch": 83.88, "learning_rate": 8.074621098091887e-06, "loss": 1.8218, "step": 28978500 }, { "epoch": 83.88, "learning_rate": 8.073897450444609e-06, "loss": 1.8189, "step": 28979000 }, { "epoch": 83.88, "learning_rate": 8.073173802797333e-06, "loss": 1.8232, "step": 28979500 }, { "epoch": 83.89, "learning_rate": 8.072450155150057e-06, "loss": 1.8231, "step": 28980000 }, { "epoch": 83.89, "learning_rate": 8.071726507502779e-06, "loss": 1.8317, "step": 28980500 }, { "epoch": 83.89, "learning_rate": 8.071002859855501e-06, "loss": 1.8227, "step": 28981000 }, { "epoch": 83.89, "learning_rate": 8.07028065950352e-06, "loss": 1.8214, "step": 28981500 }, { "epoch": 83.89, "learning_rate": 8.069557011856244e-06, "loss": 1.8194, "step": 28982000 }, { "epoch": 83.89, "learning_rate": 8.068833364208966e-06, "loss": 1.8511, "step": 28982500 }, { "epoch": 83.89, "learning_rate": 8.068109716561689e-06, "loss": 1.8182, "step": 28983000 }, { "epoch": 83.9, "learning_rate": 8.067386068914413e-06, "loss": 1.8437, "step": 28983500 }, { "epoch": 83.9, "learning_rate": 8.066662421267137e-06, "loss": 1.8144, "step": 28984000 }, { "epoch": 83.9, "learning_rate": 8.065940220915154e-06, "loss": 1.8406, "step": 28984500 }, { "epoch": 83.9, "learning_rate": 8.065216573267878e-06, "loss": 1.8378, "step": 28985000 }, { "epoch": 83.9, "learning_rate": 8.064494372915895e-06, "loss": 1.8291, "step": 28985500 }, { "epoch": 83.9, "learning_rate": 8.063770725268619e-06, "loss": 1.8337, "step": 28986000 }, { "epoch": 83.9, "learning_rate": 8.063047077621341e-06, "loss": 1.8213, "step": 28986500 }, { "epoch": 83.91, "learning_rate": 8.062323429974065e-06, "loss": 1.8378, "step": 28987000 }, { "epoch": 83.91, "learning_rate": 8.061599782326789e-06, "loss": 1.8213, "step": 28987500 }, { "epoch": 83.91, "learning_rate": 8.060876134679511e-06, "loss": 1.8399, "step": 28988000 }, { "epoch": 83.91, "learning_rate": 8.060152487032233e-06, "loss": 1.8383, "step": 28988500 }, { "epoch": 83.91, "learning_rate": 8.059428839384957e-06, "loss": 1.8351, "step": 28989000 }, { "epoch": 83.91, "learning_rate": 8.058705191737681e-06, "loss": 1.842, "step": 28989500 }, { "epoch": 83.91, "learning_rate": 8.057981544090404e-06, "loss": 1.8346, "step": 28990000 }, { "epoch": 83.92, "learning_rate": 8.05725934373842e-06, "loss": 1.8573, "step": 28990500 }, { "epoch": 83.92, "learning_rate": 8.056535696091146e-06, "loss": 1.8306, "step": 28991000 }, { "epoch": 83.92, "learning_rate": 8.055812048443869e-06, "loss": 1.8347, "step": 28991500 }, { "epoch": 83.92, "learning_rate": 8.055088400796591e-06, "loss": 1.82, "step": 28992000 }, { "epoch": 83.92, "learning_rate": 8.054366200444608e-06, "loss": 1.8472, "step": 28992500 }, { "epoch": 83.92, "learning_rate": 8.053642552797334e-06, "loss": 1.8321, "step": 28993000 }, { "epoch": 83.92, "learning_rate": 8.052918905150056e-06, "loss": 1.8257, "step": 28993500 }, { "epoch": 83.93, "learning_rate": 8.052195257502778e-06, "loss": 1.818, "step": 28994000 }, { "epoch": 83.93, "learning_rate": 8.051471609855502e-06, "loss": 1.8059, "step": 28994500 }, { "epoch": 83.93, "learning_rate": 8.050749409503521e-06, "loss": 1.8469, "step": 28995000 }, { "epoch": 83.93, "learning_rate": 8.050025761856243e-06, "loss": 1.8194, "step": 28995500 }, { "epoch": 83.93, "learning_rate": 8.049302114208966e-06, "loss": 1.8343, "step": 28996000 }, { "epoch": 83.93, "learning_rate": 8.048578466561691e-06, "loss": 1.8011, "step": 28996500 }, { "epoch": 83.93, "learning_rate": 8.047854818914413e-06, "loss": 1.841, "step": 28997000 }, { "epoch": 83.94, "learning_rate": 8.047131171267136e-06, "loss": 1.8182, "step": 28997500 }, { "epoch": 83.94, "learning_rate": 8.046408970915153e-06, "loss": 1.8452, "step": 28998000 }, { "epoch": 83.94, "learning_rate": 8.045685323267879e-06, "loss": 1.852, "step": 28998500 }, { "epoch": 83.94, "learning_rate": 8.0449616756206e-06, "loss": 1.8246, "step": 28999000 }, { "epoch": 83.94, "learning_rate": 8.044238027973323e-06, "loss": 1.8201, "step": 28999500 }, { "epoch": 83.94, "learning_rate": 8.043514380326047e-06, "loss": 1.8356, "step": 29000000 }, { "epoch": 83.94, "learning_rate": 8.042790732678771e-06, "loss": 1.8278, "step": 29000500 }, { "epoch": 83.95, "learning_rate": 8.042068532326788e-06, "loss": 1.8367, "step": 29001000 }, { "epoch": 83.95, "learning_rate": 8.04134488467951e-06, "loss": 1.7993, "step": 29001500 }, { "epoch": 83.95, "learning_rate": 8.040621237032234e-06, "loss": 1.8236, "step": 29002000 }, { "epoch": 83.95, "learning_rate": 8.039899036680253e-06, "loss": 1.7993, "step": 29002500 }, { "epoch": 83.95, "learning_rate": 8.039175389032975e-06, "loss": 1.8634, "step": 29003000 }, { "epoch": 83.95, "learning_rate": 8.038451741385698e-06, "loss": 1.8261, "step": 29003500 }, { "epoch": 83.95, "learning_rate": 8.037728093738423e-06, "loss": 1.832, "step": 29004000 }, { "epoch": 83.96, "learning_rate": 8.037004446091146e-06, "loss": 1.825, "step": 29004500 }, { "epoch": 83.96, "learning_rate": 8.036280798443868e-06, "loss": 1.8507, "step": 29005000 }, { "epoch": 83.96, "learning_rate": 8.035557150796592e-06, "loss": 1.8064, "step": 29005500 }, { "epoch": 83.96, "learning_rate": 8.034833503149316e-06, "loss": 1.826, "step": 29006000 }, { "epoch": 83.96, "learning_rate": 8.034109855502038e-06, "loss": 1.8356, "step": 29006500 }, { "epoch": 83.96, "learning_rate": 8.033387655150055e-06, "loss": 1.8221, "step": 29007000 }, { "epoch": 83.96, "learning_rate": 8.032664007502779e-06, "loss": 1.83, "step": 29007500 }, { "epoch": 83.97, "learning_rate": 8.031941807150798e-06, "loss": 1.8041, "step": 29008000 }, { "epoch": 83.97, "learning_rate": 8.03121815950352e-06, "loss": 1.8383, "step": 29008500 }, { "epoch": 83.97, "learning_rate": 8.030494511856242e-06, "loss": 1.8344, "step": 29009000 }, { "epoch": 83.97, "learning_rate": 8.029770864208968e-06, "loss": 1.8171, "step": 29009500 }, { "epoch": 83.97, "learning_rate": 8.02904721656169e-06, "loss": 1.825, "step": 29010000 }, { "epoch": 83.97, "learning_rate": 8.028323568914413e-06, "loss": 1.8456, "step": 29010500 }, { "epoch": 83.97, "learning_rate": 8.027599921267136e-06, "loss": 1.8305, "step": 29011000 }, { "epoch": 83.98, "learning_rate": 8.02687627361986e-06, "loss": 1.8595, "step": 29011500 }, { "epoch": 83.98, "learning_rate": 8.026152625972583e-06, "loss": 1.851, "step": 29012000 }, { "epoch": 83.98, "learning_rate": 8.025428978325305e-06, "loss": 1.8228, "step": 29012500 }, { "epoch": 83.98, "learning_rate": 8.024706777973324e-06, "loss": 1.8398, "step": 29013000 }, { "epoch": 83.98, "learning_rate": 8.023983130326048e-06, "loss": 1.8326, "step": 29013500 }, { "epoch": 83.98, "learning_rate": 8.02325948267877e-06, "loss": 1.8269, "step": 29014000 }, { "epoch": 83.99, "learning_rate": 8.022535835031492e-06, "loss": 1.8196, "step": 29014500 }, { "epoch": 83.99, "learning_rate": 8.021813634679511e-06, "loss": 1.8159, "step": 29015000 }, { "epoch": 83.99, "learning_rate": 8.021089987032235e-06, "loss": 1.8126, "step": 29015500 }, { "epoch": 83.99, "learning_rate": 8.020366339384957e-06, "loss": 1.8328, "step": 29016000 }, { "epoch": 83.99, "learning_rate": 8.019642691737681e-06, "loss": 1.8244, "step": 29016500 }, { "epoch": 83.99, "learning_rate": 8.018919044090405e-06, "loss": 1.8513, "step": 29017000 }, { "epoch": 83.99, "learning_rate": 8.018195396443127e-06, "loss": 1.8135, "step": 29017500 }, { "epoch": 84.0, "learning_rate": 8.01747174879585e-06, "loss": 1.8204, "step": 29018000 }, { "epoch": 84.0, "learning_rate": 8.016749548443869e-06, "loss": 1.8493, "step": 29018500 }, { "epoch": 84.0, "learning_rate": 8.016025900796592e-06, "loss": 1.8158, "step": 29019000 }, { "epoch": 84.0, "learning_rate": 8.01530370044461e-06, "loss": 1.8117, "step": 29019500 }, { "epoch": 84.0, "eval_accuracy": 0.6880829597340532, "eval_accuracy_mlm": 0.6571719504827696, "eval_accuracy_nsp": 0.8539916495946249, "eval_loss": 2.1972768306732178, "eval_runtime": 331.8951, "eval_samples_per_second": 1314.831, "eval_steps_per_second": 54.785, "step": 29019648 }, { "epoch": 84.0, "learning_rate": 8.014581500092627e-06, "loss": 1.833, "step": 29020000 }, { "epoch": 84.0, "learning_rate": 8.01385785244535e-06, "loss": 1.8421, "step": 29020500 }, { "epoch": 84.0, "learning_rate": 8.013134204798075e-06, "loss": 1.8142, "step": 29021000 }, { "epoch": 84.01, "learning_rate": 8.012410557150797e-06, "loss": 1.8187, "step": 29021500 }, { "epoch": 84.01, "learning_rate": 8.01168690950352e-06, "loss": 1.8089, "step": 29022000 }, { "epoch": 84.01, "learning_rate": 8.010963261856245e-06, "loss": 1.834, "step": 29022500 }, { "epoch": 84.01, "learning_rate": 8.010239614208967e-06, "loss": 1.8241, "step": 29023000 }, { "epoch": 84.01, "learning_rate": 8.00951596656169e-06, "loss": 1.8236, "step": 29023500 }, { "epoch": 84.01, "learning_rate": 8.008792318914413e-06, "loss": 1.8181, "step": 29024000 }, { "epoch": 84.01, "learning_rate": 8.008068671267137e-06, "loss": 1.8156, "step": 29024500 }, { "epoch": 84.02, "learning_rate": 8.00734502361986e-06, "loss": 1.7956, "step": 29025000 }, { "epoch": 84.02, "learning_rate": 8.006621375972582e-06, "loss": 1.7979, "step": 29025500 }, { "epoch": 84.02, "learning_rate": 8.005897728325307e-06, "loss": 1.8344, "step": 29026000 }, { "epoch": 84.02, "learning_rate": 8.00517408067803e-06, "loss": 1.8403, "step": 29026500 }, { "epoch": 84.02, "learning_rate": 8.004451880326047e-06, "loss": 1.8157, "step": 29027000 }, { "epoch": 84.02, "learning_rate": 8.00372823267877e-06, "loss": 1.7896, "step": 29027500 }, { "epoch": 84.02, "learning_rate": 8.003004585031495e-06, "loss": 1.8299, "step": 29028000 }, { "epoch": 84.03, "learning_rate": 8.002280937384217e-06, "loss": 1.7937, "step": 29028500 }, { "epoch": 84.03, "learning_rate": 8.001558737032234e-06, "loss": 1.8232, "step": 29029000 }, { "epoch": 84.03, "learning_rate": 8.000835089384958e-06, "loss": 1.8142, "step": 29029500 }, { "epoch": 84.03, "learning_rate": 8.000111441737682e-06, "loss": 1.7983, "step": 29030000 }, { "epoch": 84.03, "learning_rate": 7.999387794090404e-06, "loss": 1.8046, "step": 29030500 }, { "epoch": 84.03, "learning_rate": 7.998665593738421e-06, "loss": 1.8189, "step": 29031000 }, { "epoch": 84.03, "learning_rate": 7.997941946091145e-06, "loss": 1.8289, "step": 29031500 }, { "epoch": 84.04, "learning_rate": 7.99721829844387e-06, "loss": 1.8186, "step": 29032000 }, { "epoch": 84.04, "learning_rate": 7.996494650796592e-06, "loss": 1.79, "step": 29032500 }, { "epoch": 84.04, "learning_rate": 7.995771003149314e-06, "loss": 1.8202, "step": 29033000 }, { "epoch": 84.04, "learning_rate": 7.995048802797333e-06, "loss": 1.8355, "step": 29033500 }, { "epoch": 84.04, "learning_rate": 7.994325155150057e-06, "loss": 1.8257, "step": 29034000 }, { "epoch": 84.04, "learning_rate": 7.993601507502779e-06, "loss": 1.8238, "step": 29034500 }, { "epoch": 84.04, "learning_rate": 7.992877859855503e-06, "loss": 1.8353, "step": 29035000 }, { "epoch": 84.05, "learning_rate": 7.992154212208227e-06, "loss": 1.8184, "step": 29035500 }, { "epoch": 84.05, "learning_rate": 7.991430564560949e-06, "loss": 1.8156, "step": 29036000 }, { "epoch": 84.05, "learning_rate": 7.990708364208966e-06, "loss": 1.8087, "step": 29036500 }, { "epoch": 84.05, "learning_rate": 7.98998471656169e-06, "loss": 1.8263, "step": 29037000 }, { "epoch": 84.05, "learning_rate": 7.989261068914414e-06, "loss": 1.8425, "step": 29037500 }, { "epoch": 84.05, "learning_rate": 7.988537421267136e-06, "loss": 1.8242, "step": 29038000 }, { "epoch": 84.05, "learning_rate": 7.987813773619859e-06, "loss": 1.7974, "step": 29038500 }, { "epoch": 84.06, "learning_rate": 7.987091573267878e-06, "loss": 1.8451, "step": 29039000 }, { "epoch": 84.06, "learning_rate": 7.986369372915895e-06, "loss": 1.8063, "step": 29039500 }, { "epoch": 84.06, "learning_rate": 7.985645725268619e-06, "loss": 1.8118, "step": 29040000 }, { "epoch": 84.06, "learning_rate": 7.984922077621341e-06, "loss": 1.8011, "step": 29040500 }, { "epoch": 84.06, "learning_rate": 7.984198429974065e-06, "loss": 1.8133, "step": 29041000 }, { "epoch": 84.06, "learning_rate": 7.983474782326789e-06, "loss": 1.8263, "step": 29041500 }, { "epoch": 84.06, "learning_rate": 7.982751134679511e-06, "loss": 1.8098, "step": 29042000 }, { "epoch": 84.07, "learning_rate": 7.982027487032235e-06, "loss": 1.8301, "step": 29042500 }, { "epoch": 84.07, "learning_rate": 7.981303839384959e-06, "loss": 1.8457, "step": 29043000 }, { "epoch": 84.07, "learning_rate": 7.980580191737681e-06, "loss": 1.8161, "step": 29043500 }, { "epoch": 84.07, "learning_rate": 7.979857991385698e-06, "loss": 1.8196, "step": 29044000 }, { "epoch": 84.07, "learning_rate": 7.979134343738422e-06, "loss": 1.8073, "step": 29044500 }, { "epoch": 84.07, "learning_rate": 7.978410696091146e-06, "loss": 1.8159, "step": 29045000 }, { "epoch": 84.07, "learning_rate": 7.977687048443868e-06, "loss": 1.8287, "step": 29045500 }, { "epoch": 84.08, "learning_rate": 7.976964848091886e-06, "loss": 1.8159, "step": 29046000 }, { "epoch": 84.08, "learning_rate": 7.97624120044461e-06, "loss": 1.8035, "step": 29046500 }, { "epoch": 84.08, "learning_rate": 7.975517552797334e-06, "loss": 1.8168, "step": 29047000 }, { "epoch": 84.08, "learning_rate": 7.97479535244535e-06, "loss": 1.8053, "step": 29047500 }, { "epoch": 84.08, "learning_rate": 7.974071704798073e-06, "loss": 1.8337, "step": 29048000 }, { "epoch": 84.08, "learning_rate": 7.973348057150797e-06, "loss": 1.8109, "step": 29048500 }, { "epoch": 84.08, "learning_rate": 7.972624409503521e-06, "loss": 1.8292, "step": 29049000 }, { "epoch": 84.09, "learning_rate": 7.971900761856243e-06, "loss": 1.8202, "step": 29049500 }, { "epoch": 84.09, "learning_rate": 7.971177114208967e-06, "loss": 1.8128, "step": 29050000 }, { "epoch": 84.09, "learning_rate": 7.97045346656169e-06, "loss": 1.8209, "step": 29050500 }, { "epoch": 84.09, "learning_rate": 7.969729818914413e-06, "loss": 1.795, "step": 29051000 }, { "epoch": 84.09, "learning_rate": 7.969006171267135e-06, "loss": 1.8093, "step": 29051500 }, { "epoch": 84.09, "learning_rate": 7.96828252361986e-06, "loss": 1.8038, "step": 29052000 }, { "epoch": 84.1, "learning_rate": 7.967558875972583e-06, "loss": 1.8244, "step": 29052500 }, { "epoch": 84.1, "learning_rate": 7.966835228325306e-06, "loss": 1.8071, "step": 29053000 }, { "epoch": 84.1, "learning_rate": 7.966113027973324e-06, "loss": 1.7958, "step": 29053500 }, { "epoch": 84.1, "learning_rate": 7.965389380326047e-06, "loss": 1.8157, "step": 29054000 }, { "epoch": 84.1, "learning_rate": 7.96466573267877e-06, "loss": 1.8529, "step": 29054500 }, { "epoch": 84.1, "learning_rate": 7.963942085031493e-06, "loss": 1.8146, "step": 29055000 }, { "epoch": 84.1, "learning_rate": 7.963218437384217e-06, "loss": 1.8112, "step": 29055500 }, { "epoch": 84.11, "learning_rate": 7.96249478973694e-06, "loss": 1.8217, "step": 29056000 }, { "epoch": 84.11, "learning_rate": 7.961772589384958e-06, "loss": 1.8158, "step": 29056500 }, { "epoch": 84.11, "learning_rate": 7.96104894173768e-06, "loss": 1.7986, "step": 29057000 }, { "epoch": 84.11, "learning_rate": 7.960325294090404e-06, "loss": 1.7896, "step": 29057500 }, { "epoch": 84.11, "learning_rate": 7.959603093738421e-06, "loss": 1.8426, "step": 29058000 }, { "epoch": 84.11, "learning_rate": 7.958879446091145e-06, "loss": 1.8119, "step": 29058500 }, { "epoch": 84.11, "learning_rate": 7.95815579844387e-06, "loss": 1.8128, "step": 29059000 }, { "epoch": 84.12, "learning_rate": 7.957432150796591e-06, "loss": 1.8097, "step": 29059500 }, { "epoch": 84.12, "learning_rate": 7.956708503149315e-06, "loss": 1.8358, "step": 29060000 }, { "epoch": 84.12, "learning_rate": 7.955984855502038e-06, "loss": 1.8049, "step": 29060500 }, { "epoch": 84.12, "learning_rate": 7.955261207854762e-06, "loss": 1.8103, "step": 29061000 }, { "epoch": 84.12, "learning_rate": 7.954539007502779e-06, "loss": 1.8063, "step": 29061500 }, { "epoch": 84.12, "learning_rate": 7.953815359855503e-06, "loss": 1.8149, "step": 29062000 }, { "epoch": 84.12, "learning_rate": 7.953091712208225e-06, "loss": 1.8172, "step": 29062500 }, { "epoch": 84.13, "learning_rate": 7.952368064560949e-06, "loss": 1.8245, "step": 29063000 }, { "epoch": 84.13, "learning_rate": 7.951644416913673e-06, "loss": 1.8322, "step": 29063500 }, { "epoch": 84.13, "learning_rate": 7.950920769266395e-06, "loss": 1.8208, "step": 29064000 }, { "epoch": 84.13, "learning_rate": 7.950197121619119e-06, "loss": 1.7943, "step": 29064500 }, { "epoch": 84.13, "learning_rate": 7.949473473971841e-06, "loss": 1.837, "step": 29065000 }, { "epoch": 84.13, "learning_rate": 7.948749826324565e-06, "loss": 1.8096, "step": 29065500 }, { "epoch": 84.13, "learning_rate": 7.948026178677289e-06, "loss": 1.8613, "step": 29066000 }, { "epoch": 84.14, "learning_rate": 7.947302531030011e-06, "loss": 1.8172, "step": 29066500 }, { "epoch": 84.14, "learning_rate": 7.946580330678029e-06, "loss": 1.8133, "step": 29067000 }, { "epoch": 84.14, "learning_rate": 7.945856683030752e-06, "loss": 1.8353, "step": 29067500 }, { "epoch": 84.14, "learning_rate": 7.945133035383476e-06, "loss": 1.8244, "step": 29068000 }, { "epoch": 84.14, "learning_rate": 7.944410835031494e-06, "loss": 1.8507, "step": 29068500 }, { "epoch": 84.14, "learning_rate": 7.943687187384216e-06, "loss": 1.8056, "step": 29069000 }, { "epoch": 84.14, "learning_rate": 7.94296353973694e-06, "loss": 1.7867, "step": 29069500 }, { "epoch": 84.15, "learning_rate": 7.942239892089664e-06, "loss": 1.8102, "step": 29070000 }, { "epoch": 84.15, "learning_rate": 7.941516244442386e-06, "loss": 1.8037, "step": 29070500 }, { "epoch": 84.15, "learning_rate": 7.94079259679511e-06, "loss": 1.8377, "step": 29071000 }, { "epoch": 84.15, "learning_rate": 7.940068949147834e-06, "loss": 1.7882, "step": 29071500 }, { "epoch": 84.15, "learning_rate": 7.939345301500556e-06, "loss": 1.7936, "step": 29072000 }, { "epoch": 84.15, "learning_rate": 7.938621653853278e-06, "loss": 1.8299, "step": 29072500 }, { "epoch": 84.15, "learning_rate": 7.937898006206002e-06, "loss": 1.8388, "step": 29073000 }, { "epoch": 84.16, "learning_rate": 7.937174358558726e-06, "loss": 1.82, "step": 29073500 }, { "epoch": 84.16, "learning_rate": 7.936452158206743e-06, "loss": 1.8471, "step": 29074000 }, { "epoch": 84.16, "learning_rate": 7.935728510559467e-06, "loss": 1.8045, "step": 29074500 }, { "epoch": 84.16, "learning_rate": 7.93500486291219e-06, "loss": 1.8247, "step": 29075000 }, { "epoch": 84.16, "learning_rate": 7.934281215264914e-06, "loss": 1.8377, "step": 29075500 }, { "epoch": 84.16, "learning_rate": 7.93355901491293e-06, "loss": 1.819, "step": 29076000 }, { "epoch": 84.16, "learning_rate": 7.932835367265655e-06, "loss": 1.8008, "step": 29076500 }, { "epoch": 84.17, "learning_rate": 7.932113166913672e-06, "loss": 1.8022, "step": 29077000 }, { "epoch": 84.17, "learning_rate": 7.931389519266396e-06, "loss": 1.8129, "step": 29077500 }, { "epoch": 84.17, "learning_rate": 7.930665871619118e-06, "loss": 1.8276, "step": 29078000 }, { "epoch": 84.17, "learning_rate": 7.929942223971842e-06, "loss": 1.7854, "step": 29078500 }, { "epoch": 84.17, "learning_rate": 7.929218576324566e-06, "loss": 1.8389, "step": 29079000 }, { "epoch": 84.17, "learning_rate": 7.928496375972583e-06, "loss": 1.8149, "step": 29079500 }, { "epoch": 84.17, "learning_rate": 7.927772728325305e-06, "loss": 1.822, "step": 29080000 }, { "epoch": 84.18, "learning_rate": 7.92704908067803e-06, "loss": 1.8405, "step": 29080500 }, { "epoch": 84.18, "learning_rate": 7.926325433030753e-06, "loss": 1.816, "step": 29081000 }, { "epoch": 84.18, "learning_rate": 7.925601785383476e-06, "loss": 1.7996, "step": 29081500 }, { "epoch": 84.18, "learning_rate": 7.924878137736198e-06, "loss": 1.786, "step": 29082000 }, { "epoch": 84.18, "learning_rate": 7.924154490088922e-06, "loss": 1.8284, "step": 29082500 }, { "epoch": 84.18, "learning_rate": 7.923430842441646e-06, "loss": 1.8129, "step": 29083000 }, { "epoch": 84.18, "learning_rate": 7.922707194794368e-06, "loss": 1.8122, "step": 29083500 }, { "epoch": 84.19, "learning_rate": 7.921983547147092e-06, "loss": 1.8267, "step": 29084000 }, { "epoch": 84.19, "learning_rate": 7.921259899499816e-06, "loss": 1.8092, "step": 29084500 }, { "epoch": 84.19, "learning_rate": 7.920536251852538e-06, "loss": 1.8261, "step": 29085000 }, { "epoch": 84.19, "learning_rate": 7.919814051500555e-06, "loss": 1.7743, "step": 29085500 }, { "epoch": 84.19, "learning_rate": 7.919091851148574e-06, "loss": 1.8171, "step": 29086000 }, { "epoch": 84.19, "learning_rate": 7.918368203501298e-06, "loss": 1.8131, "step": 29086500 }, { "epoch": 84.19, "learning_rate": 7.91764455585402e-06, "loss": 1.8109, "step": 29087000 }, { "epoch": 84.2, "learning_rate": 7.916920908206743e-06, "loss": 1.8242, "step": 29087500 }, { "epoch": 84.2, "learning_rate": 7.916197260559466e-06, "loss": 1.7861, "step": 29088000 }, { "epoch": 84.2, "learning_rate": 7.915475060207485e-06, "loss": 1.8299, "step": 29088500 }, { "epoch": 84.2, "learning_rate": 7.914751412560208e-06, "loss": 1.8302, "step": 29089000 }, { "epoch": 84.2, "learning_rate": 7.91402776491293e-06, "loss": 1.8108, "step": 29089500 }, { "epoch": 84.2, "learning_rate": 7.913304117265655e-06, "loss": 1.8053, "step": 29090000 }, { "epoch": 84.21, "learning_rate": 7.912583364208968e-06, "loss": 1.8233, "step": 29090500 }, { "epoch": 84.21, "learning_rate": 7.91185971656169e-06, "loss": 1.806, "step": 29091000 }, { "epoch": 84.21, "learning_rate": 7.911136068914412e-06, "loss": 1.8353, "step": 29091500 }, { "epoch": 84.21, "learning_rate": 7.910412421267136e-06, "loss": 1.8359, "step": 29092000 }, { "epoch": 84.21, "learning_rate": 7.90968877361986e-06, "loss": 1.8241, "step": 29092500 }, { "epoch": 84.21, "learning_rate": 7.908965125972582e-06, "loss": 1.8222, "step": 29093000 }, { "epoch": 84.21, "learning_rate": 7.908241478325306e-06, "loss": 1.8223, "step": 29093500 }, { "epoch": 84.22, "learning_rate": 7.90751783067803e-06, "loss": 1.8093, "step": 29094000 }, { "epoch": 84.22, "learning_rate": 7.906794183030752e-06, "loss": 1.7983, "step": 29094500 }, { "epoch": 84.22, "learning_rate": 7.906070535383475e-06, "loss": 1.8378, "step": 29095000 }, { "epoch": 84.22, "learning_rate": 7.9053468877362e-06, "loss": 1.8225, "step": 29095500 }, { "epoch": 84.22, "learning_rate": 7.904624687384217e-06, "loss": 1.8165, "step": 29096000 }, { "epoch": 84.22, "learning_rate": 7.90390103973694e-06, "loss": 1.8201, "step": 29096500 }, { "epoch": 84.22, "learning_rate": 7.903177392089662e-06, "loss": 1.8221, "step": 29097000 }, { "epoch": 84.23, "learning_rate": 7.902453744442388e-06, "loss": 1.8279, "step": 29097500 }, { "epoch": 84.23, "learning_rate": 7.90173009679511e-06, "loss": 1.8122, "step": 29098000 }, { "epoch": 84.23, "learning_rate": 7.901006449147832e-06, "loss": 1.817, "step": 29098500 }, { "epoch": 84.23, "learning_rate": 7.900282801500556e-06, "loss": 1.8168, "step": 29099000 }, { "epoch": 84.23, "learning_rate": 7.89955915385328e-06, "loss": 1.8087, "step": 29099500 }, { "epoch": 84.23, "learning_rate": 7.898836953501297e-06, "loss": 1.8326, "step": 29100000 }, { "epoch": 84.23, "learning_rate": 7.89811330585402e-06, "loss": 1.8001, "step": 29100500 }, { "epoch": 84.24, "learning_rate": 7.897389658206743e-06, "loss": 1.7939, "step": 29101000 }, { "epoch": 84.24, "learning_rate": 7.896667457854762e-06, "loss": 1.8373, "step": 29101500 }, { "epoch": 84.24, "learning_rate": 7.895943810207484e-06, "loss": 1.7953, "step": 29102000 }, { "epoch": 84.24, "learning_rate": 7.895221609855502e-06, "loss": 1.8037, "step": 29102500 }, { "epoch": 84.24, "learning_rate": 7.894497962208226e-06, "loss": 1.8302, "step": 29103000 }, { "epoch": 84.24, "learning_rate": 7.89377431456095e-06, "loss": 1.8167, "step": 29103500 }, { "epoch": 84.24, "learning_rate": 7.893050666913672e-06, "loss": 1.811, "step": 29104000 }, { "epoch": 84.25, "learning_rate": 7.892327019266394e-06, "loss": 1.8069, "step": 29104500 }, { "epoch": 84.25, "learning_rate": 7.89160337161912e-06, "loss": 1.8236, "step": 29105000 }, { "epoch": 84.25, "learning_rate": 7.890879723971842e-06, "loss": 1.7918, "step": 29105500 }, { "epoch": 84.25, "learning_rate": 7.890156076324564e-06, "loss": 1.8134, "step": 29106000 }, { "epoch": 84.25, "learning_rate": 7.889432428677288e-06, "loss": 1.8137, "step": 29106500 }, { "epoch": 84.25, "learning_rate": 7.888708781030012e-06, "loss": 1.8067, "step": 29107000 }, { "epoch": 84.25, "learning_rate": 7.887985133382734e-06, "loss": 1.8133, "step": 29107500 }, { "epoch": 84.26, "learning_rate": 7.887261485735458e-06, "loss": 1.8104, "step": 29108000 }, { "epoch": 84.26, "learning_rate": 7.88654073267877e-06, "loss": 1.8148, "step": 29108500 }, { "epoch": 84.26, "learning_rate": 7.885817085031494e-06, "loss": 1.8304, "step": 29109000 }, { "epoch": 84.26, "learning_rate": 7.885093437384217e-06, "loss": 1.8254, "step": 29109500 }, { "epoch": 84.26, "learning_rate": 7.884369789736939e-06, "loss": 1.8168, "step": 29110000 }, { "epoch": 84.26, "learning_rate": 7.883646142089664e-06, "loss": 1.8219, "step": 29110500 }, { "epoch": 84.26, "learning_rate": 7.882922494442387e-06, "loss": 1.8243, "step": 29111000 }, { "epoch": 84.27, "learning_rate": 7.882198846795109e-06, "loss": 1.8226, "step": 29111500 }, { "epoch": 84.27, "learning_rate": 7.881475199147833e-06, "loss": 1.848, "step": 29112000 }, { "epoch": 84.27, "learning_rate": 7.880752998795852e-06, "loss": 1.82, "step": 29112500 }, { "epoch": 84.27, "learning_rate": 7.880029351148574e-06, "loss": 1.8368, "step": 29113000 }, { "epoch": 84.27, "learning_rate": 7.879307150796591e-06, "loss": 1.8191, "step": 29113500 }, { "epoch": 84.27, "learning_rate": 7.878583503149315e-06, "loss": 1.7996, "step": 29114000 }, { "epoch": 84.27, "learning_rate": 7.877859855502039e-06, "loss": 1.8083, "step": 29114500 }, { "epoch": 84.28, "learning_rate": 7.877136207854761e-06, "loss": 1.8128, "step": 29115000 }, { "epoch": 84.28, "learning_rate": 7.876412560207484e-06, "loss": 1.7979, "step": 29115500 }, { "epoch": 84.28, "learning_rate": 7.875690359855503e-06, "loss": 1.8362, "step": 29116000 }, { "epoch": 84.28, "learning_rate": 7.874966712208226e-06, "loss": 1.8133, "step": 29116500 }, { "epoch": 84.28, "learning_rate": 7.874243064560949e-06, "loss": 1.7941, "step": 29117000 }, { "epoch": 84.28, "learning_rate": 7.873519416913671e-06, "loss": 1.812, "step": 29117500 }, { "epoch": 84.28, "learning_rate": 7.872795769266397e-06, "loss": 1.8408, "step": 29118000 }, { "epoch": 84.29, "learning_rate": 7.872073568914414e-06, "loss": 1.816, "step": 29118500 }, { "epoch": 84.29, "learning_rate": 7.871349921267136e-06, "loss": 1.834, "step": 29119000 }, { "epoch": 84.29, "learning_rate": 7.870626273619858e-06, "loss": 1.7816, "step": 29119500 }, { "epoch": 84.29, "learning_rate": 7.869902625972584e-06, "loss": 1.8292, "step": 29120000 }, { "epoch": 84.29, "learning_rate": 7.869180425620601e-06, "loss": 1.7958, "step": 29120500 }, { "epoch": 84.29, "learning_rate": 7.868456777973323e-06, "loss": 1.8309, "step": 29121000 }, { "epoch": 84.29, "learning_rate": 7.867733130326047e-06, "loss": 1.8218, "step": 29121500 }, { "epoch": 84.3, "learning_rate": 7.867009482678771e-06, "loss": 1.8499, "step": 29122000 }, { "epoch": 84.3, "learning_rate": 7.866285835031493e-06, "loss": 1.7968, "step": 29122500 }, { "epoch": 84.3, "learning_rate": 7.86556363467951e-06, "loss": 1.8342, "step": 29123000 }, { "epoch": 84.3, "learning_rate": 7.864839987032235e-06, "loss": 1.8007, "step": 29123500 }, { "epoch": 84.3, "learning_rate": 7.864116339384959e-06, "loss": 1.8153, "step": 29124000 }, { "epoch": 84.3, "learning_rate": 7.86339269173768e-06, "loss": 1.821, "step": 29124500 }, { "epoch": 84.3, "learning_rate": 7.862669044090403e-06, "loss": 1.8205, "step": 29125000 }, { "epoch": 84.31, "learning_rate": 7.861946843738422e-06, "loss": 1.814, "step": 29125500 }, { "epoch": 84.31, "learning_rate": 7.861223196091146e-06, "loss": 1.8111, "step": 29126000 }, { "epoch": 84.31, "learning_rate": 7.860499548443868e-06, "loss": 1.8439, "step": 29126500 }, { "epoch": 84.31, "learning_rate": 7.859775900796592e-06, "loss": 1.8259, "step": 29127000 }, { "epoch": 84.31, "learning_rate": 7.859052253149316e-06, "loss": 1.8094, "step": 29127500 }, { "epoch": 84.31, "learning_rate": 7.858328605502038e-06, "loss": 1.8303, "step": 29128000 }, { "epoch": 84.32, "learning_rate": 7.85760495785476e-06, "loss": 1.8306, "step": 29128500 }, { "epoch": 84.32, "learning_rate": 7.85688275750278e-06, "loss": 1.8125, "step": 29129000 }, { "epoch": 84.32, "learning_rate": 7.856159109855503e-06, "loss": 1.84, "step": 29129500 }, { "epoch": 84.32, "learning_rate": 7.855435462208226e-06, "loss": 1.824, "step": 29130000 }, { "epoch": 84.32, "learning_rate": 7.854711814560948e-06, "loss": 1.8083, "step": 29130500 }, { "epoch": 84.32, "learning_rate": 7.853989614208967e-06, "loss": 1.8408, "step": 29131000 }, { "epoch": 84.32, "learning_rate": 7.85326596656169e-06, "loss": 1.7977, "step": 29131500 }, { "epoch": 84.33, "learning_rate": 7.852542318914413e-06, "loss": 1.8182, "step": 29132000 }, { "epoch": 84.33, "learning_rate": 7.851818671267137e-06, "loss": 1.8367, "step": 29132500 }, { "epoch": 84.33, "learning_rate": 7.85109502361986e-06, "loss": 1.818, "step": 29133000 }, { "epoch": 84.33, "learning_rate": 7.850372823267878e-06, "loss": 1.799, "step": 29133500 }, { "epoch": 84.33, "learning_rate": 7.8496491756206e-06, "loss": 1.8322, "step": 29134000 }, { "epoch": 84.33, "learning_rate": 7.848925527973324e-06, "loss": 1.8194, "step": 29134500 }, { "epoch": 84.33, "learning_rate": 7.848201880326048e-06, "loss": 1.8176, "step": 29135000 }, { "epoch": 84.34, "learning_rate": 7.84747823267877e-06, "loss": 1.8317, "step": 29135500 }, { "epoch": 84.34, "learning_rate": 7.846754585031493e-06, "loss": 1.8235, "step": 29136000 }, { "epoch": 84.34, "learning_rate": 7.846032384679512e-06, "loss": 1.8301, "step": 29136500 }, { "epoch": 84.34, "learning_rate": 7.845308737032235e-06, "loss": 1.8583, "step": 29137000 }, { "epoch": 84.34, "learning_rate": 7.844585089384958e-06, "loss": 1.812, "step": 29137500 }, { "epoch": 84.34, "learning_rate": 7.84386144173768e-06, "loss": 1.8148, "step": 29138000 }, { "epoch": 84.34, "learning_rate": 7.843137794090404e-06, "loss": 1.8121, "step": 29138500 }, { "epoch": 84.35, "learning_rate": 7.842414146443128e-06, "loss": 1.8269, "step": 29139000 }, { "epoch": 84.35, "learning_rate": 7.841691946091145e-06, "loss": 1.8222, "step": 29139500 }, { "epoch": 84.35, "learning_rate": 7.840968298443869e-06, "loss": 1.825, "step": 29140000 }, { "epoch": 84.35, "learning_rate": 7.840246098091886e-06, "loss": 1.839, "step": 29140500 }, { "epoch": 84.35, "learning_rate": 7.83952245044461e-06, "loss": 1.8135, "step": 29141000 }, { "epoch": 84.35, "learning_rate": 7.838798802797332e-06, "loss": 1.8147, "step": 29141500 }, { "epoch": 84.35, "learning_rate": 7.838075155150056e-06, "loss": 1.8077, "step": 29142000 }, { "epoch": 84.36, "learning_rate": 7.83735150750278e-06, "loss": 1.8291, "step": 29142500 }, { "epoch": 84.36, "learning_rate": 7.836627859855502e-06, "loss": 1.7929, "step": 29143000 }, { "epoch": 84.36, "learning_rate": 7.835904212208225e-06, "loss": 1.8359, "step": 29143500 }, { "epoch": 84.36, "learning_rate": 7.835180564560949e-06, "loss": 1.8215, "step": 29144000 }, { "epoch": 84.36, "learning_rate": 7.834458364208968e-06, "loss": 1.8359, "step": 29144500 }, { "epoch": 84.36, "learning_rate": 7.83373471656169e-06, "loss": 1.8113, "step": 29145000 }, { "epoch": 84.36, "learning_rate": 7.833011068914414e-06, "loss": 1.8297, "step": 29145500 }, { "epoch": 84.37, "learning_rate": 7.832287421267136e-06, "loss": 1.7915, "step": 29146000 }, { "epoch": 84.37, "learning_rate": 7.83156377361986e-06, "loss": 1.8324, "step": 29146500 }, { "epoch": 84.37, "learning_rate": 7.830840125972582e-06, "loss": 1.8113, "step": 29147000 }, { "epoch": 84.37, "learning_rate": 7.830116478325306e-06, "loss": 1.8459, "step": 29147500 }, { "epoch": 84.37, "learning_rate": 7.82939283067803e-06, "loss": 1.8136, "step": 29148000 }, { "epoch": 84.37, "learning_rate": 7.828669183030752e-06, "loss": 1.8229, "step": 29148500 }, { "epoch": 84.37, "learning_rate": 7.82794698267877e-06, "loss": 1.8305, "step": 29149000 }, { "epoch": 84.38, "learning_rate": 7.827223335031493e-06, "loss": 1.8326, "step": 29149500 }, { "epoch": 84.38, "learning_rate": 7.826499687384217e-06, "loss": 1.8212, "step": 29150000 }, { "epoch": 84.38, "learning_rate": 7.82577603973694e-06, "loss": 1.822, "step": 29150500 }, { "epoch": 84.38, "learning_rate": 7.825052392089663e-06, "loss": 1.8205, "step": 29151000 }, { "epoch": 84.38, "learning_rate": 7.82433019173768e-06, "loss": 1.8183, "step": 29151500 }, { "epoch": 84.38, "learning_rate": 7.823606544090405e-06, "loss": 1.793, "step": 29152000 }, { "epoch": 84.38, "learning_rate": 7.822882896443127e-06, "loss": 1.8091, "step": 29152500 }, { "epoch": 84.39, "learning_rate": 7.82215924879585e-06, "loss": 1.814, "step": 29153000 }, { "epoch": 84.39, "learning_rate": 7.821435601148575e-06, "loss": 1.8331, "step": 29153500 }, { "epoch": 84.39, "learning_rate": 7.820713400796592e-06, "loss": 1.8322, "step": 29154000 }, { "epoch": 84.39, "learning_rate": 7.819989753149314e-06, "loss": 1.8286, "step": 29154500 }, { "epoch": 84.39, "learning_rate": 7.819266105502038e-06, "loss": 1.829, "step": 29155000 }, { "epoch": 84.39, "learning_rate": 7.818542457854762e-06, "loss": 1.8059, "step": 29155500 }, { "epoch": 84.39, "learning_rate": 7.81782025750278e-06, "loss": 1.843, "step": 29156000 }, { "epoch": 84.4, "learning_rate": 7.817096609855502e-06, "loss": 1.8278, "step": 29156500 }, { "epoch": 84.4, "learning_rate": 7.816372962208225e-06, "loss": 1.8285, "step": 29157000 }, { "epoch": 84.4, "learning_rate": 7.81564931456095e-06, "loss": 1.8546, "step": 29157500 }, { "epoch": 84.4, "learning_rate": 7.814925666913672e-06, "loss": 1.7881, "step": 29158000 }, { "epoch": 84.4, "learning_rate": 7.81420346656169e-06, "loss": 1.8323, "step": 29158500 }, { "epoch": 84.4, "learning_rate": 7.813479818914413e-06, "loss": 1.8288, "step": 29159000 }, { "epoch": 84.4, "learning_rate": 7.812756171267137e-06, "loss": 1.8224, "step": 29159500 }, { "epoch": 84.41, "learning_rate": 7.812032523619859e-06, "loss": 1.8234, "step": 29160000 }, { "epoch": 84.41, "learning_rate": 7.811308875972583e-06, "loss": 1.8382, "step": 29160500 }, { "epoch": 84.41, "learning_rate": 7.810585228325307e-06, "loss": 1.8311, "step": 29161000 }, { "epoch": 84.41, "learning_rate": 7.809861580678029e-06, "loss": 1.8303, "step": 29161500 }, { "epoch": 84.41, "learning_rate": 7.809137933030753e-06, "loss": 1.8269, "step": 29162000 }, { "epoch": 84.41, "learning_rate": 7.80841573267877e-06, "loss": 1.8171, "step": 29162500 }, { "epoch": 84.41, "learning_rate": 7.807693532326787e-06, "loss": 1.8042, "step": 29163000 }, { "epoch": 84.42, "learning_rate": 7.806969884679511e-06, "loss": 1.8243, "step": 29163500 }, { "epoch": 84.42, "learning_rate": 7.806246237032235e-06, "loss": 1.8191, "step": 29164000 }, { "epoch": 84.42, "learning_rate": 7.805524036680253e-06, "loss": 1.8425, "step": 29164500 }, { "epoch": 84.42, "learning_rate": 7.804800389032975e-06, "loss": 1.8137, "step": 29165000 }, { "epoch": 84.42, "learning_rate": 7.804076741385699e-06, "loss": 1.8261, "step": 29165500 }, { "epoch": 84.42, "learning_rate": 7.803353093738423e-06, "loss": 1.8078, "step": 29166000 }, { "epoch": 84.43, "learning_rate": 7.802629446091145e-06, "loss": 1.8151, "step": 29166500 }, { "epoch": 84.43, "learning_rate": 7.801905798443869e-06, "loss": 1.8128, "step": 29167000 }, { "epoch": 84.43, "learning_rate": 7.801182150796591e-06, "loss": 1.8359, "step": 29167500 }, { "epoch": 84.43, "learning_rate": 7.800458503149315e-06, "loss": 1.8248, "step": 29168000 }, { "epoch": 84.43, "learning_rate": 7.799734855502037e-06, "loss": 1.8165, "step": 29168500 }, { "epoch": 84.43, "learning_rate": 7.799012655150056e-06, "loss": 1.8281, "step": 29169000 }, { "epoch": 84.43, "learning_rate": 7.798289007502778e-06, "loss": 1.8276, "step": 29169500 }, { "epoch": 84.44, "learning_rate": 7.797565359855502e-06, "loss": 1.8328, "step": 29170000 }, { "epoch": 84.44, "learning_rate": 7.796841712208226e-06, "loss": 1.8137, "step": 29170500 }, { "epoch": 84.44, "learning_rate": 7.796118064560948e-06, "loss": 1.8474, "step": 29171000 }, { "epoch": 84.44, "learning_rate": 7.795394416913672e-06, "loss": 1.8255, "step": 29171500 }, { "epoch": 84.44, "learning_rate": 7.794670769266395e-06, "loss": 1.8386, "step": 29172000 }, { "epoch": 84.44, "learning_rate": 7.793948568914414e-06, "loss": 1.8241, "step": 29172500 }, { "epoch": 84.44, "learning_rate": 7.793224921267136e-06, "loss": 1.8366, "step": 29173000 }, { "epoch": 84.45, "learning_rate": 7.79250127361986e-06, "loss": 1.8281, "step": 29173500 }, { "epoch": 84.45, "learning_rate": 7.791777625972582e-06, "loss": 1.8147, "step": 29174000 }, { "epoch": 84.45, "learning_rate": 7.791053978325306e-06, "loss": 1.8152, "step": 29174500 }, { "epoch": 84.45, "learning_rate": 7.79033033067803e-06, "loss": 1.8098, "step": 29175000 }, { "epoch": 84.45, "learning_rate": 7.789608130326047e-06, "loss": 1.8357, "step": 29175500 }, { "epoch": 84.45, "learning_rate": 7.78888448267877e-06, "loss": 1.7985, "step": 29176000 }, { "epoch": 84.45, "learning_rate": 7.788160835031493e-06, "loss": 1.832, "step": 29176500 }, { "epoch": 84.46, "learning_rate": 7.787437187384217e-06, "loss": 1.8163, "step": 29177000 }, { "epoch": 84.46, "learning_rate": 7.786714987032234e-06, "loss": 1.8465, "step": 29177500 }, { "epoch": 84.46, "learning_rate": 7.785991339384958e-06, "loss": 1.8223, "step": 29178000 }, { "epoch": 84.46, "learning_rate": 7.78526769173768e-06, "loss": 1.8197, "step": 29178500 }, { "epoch": 84.46, "learning_rate": 7.7845454913857e-06, "loss": 1.8327, "step": 29179000 }, { "epoch": 84.46, "learning_rate": 7.783823291033717e-06, "loss": 1.8212, "step": 29179500 }, { "epoch": 84.46, "learning_rate": 7.783099643386439e-06, "loss": 1.8055, "step": 29180000 }, { "epoch": 84.47, "learning_rate": 7.782375995739163e-06, "loss": 1.8028, "step": 29180500 }, { "epoch": 84.47, "learning_rate": 7.781652348091887e-06, "loss": 1.8178, "step": 29181000 }, { "epoch": 84.47, "learning_rate": 7.780928700444609e-06, "loss": 1.8404, "step": 29181500 }, { "epoch": 84.47, "learning_rate": 7.780205052797333e-06, "loss": 1.8069, "step": 29182000 }, { "epoch": 84.47, "learning_rate": 7.779481405150057e-06, "loss": 1.822, "step": 29182500 }, { "epoch": 84.47, "learning_rate": 7.77875775750278e-06, "loss": 1.8174, "step": 29183000 }, { "epoch": 84.47, "learning_rate": 7.778034109855501e-06, "loss": 1.8279, "step": 29183500 }, { "epoch": 84.48, "learning_rate": 7.777310462208225e-06, "loss": 1.848, "step": 29184000 }, { "epoch": 84.48, "learning_rate": 7.77658681456095e-06, "loss": 1.8192, "step": 29184500 }, { "epoch": 84.48, "learning_rate": 7.775863166913672e-06, "loss": 1.8231, "step": 29185000 }, { "epoch": 84.48, "learning_rate": 7.775140966561689e-06, "loss": 1.8278, "step": 29185500 }, { "epoch": 84.48, "learning_rate": 7.774417318914413e-06, "loss": 1.8415, "step": 29186000 }, { "epoch": 84.48, "learning_rate": 7.773693671267137e-06, "loss": 1.8072, "step": 29186500 }, { "epoch": 84.48, "learning_rate": 7.772970023619859e-06, "loss": 1.8159, "step": 29187000 }, { "epoch": 84.49, "learning_rate": 7.772246375972583e-06, "loss": 1.8504, "step": 29187500 }, { "epoch": 84.49, "learning_rate": 7.771522728325307e-06, "loss": 1.8139, "step": 29188000 }, { "epoch": 84.49, "learning_rate": 7.770800527973324e-06, "loss": 1.7824, "step": 29188500 }, { "epoch": 84.49, "learning_rate": 7.770076880326046e-06, "loss": 1.8395, "step": 29189000 }, { "epoch": 84.49, "learning_rate": 7.76935323267877e-06, "loss": 1.8084, "step": 29189500 }, { "epoch": 84.49, "learning_rate": 7.768629585031494e-06, "loss": 1.8212, "step": 29190000 }, { "epoch": 84.49, "learning_rate": 7.767905937384216e-06, "loss": 1.7961, "step": 29190500 }, { "epoch": 84.5, "learning_rate": 7.76718228973694e-06, "loss": 1.834, "step": 29191000 }, { "epoch": 84.5, "learning_rate": 7.766458642089664e-06, "loss": 1.8063, "step": 29191500 }, { "epoch": 84.5, "learning_rate": 7.765734994442386e-06, "loss": 1.8445, "step": 29192000 }, { "epoch": 84.5, "learning_rate": 7.765011346795109e-06, "loss": 1.8149, "step": 29192500 }, { "epoch": 84.5, "learning_rate": 7.764287699147833e-06, "loss": 1.8362, "step": 29193000 }, { "epoch": 84.5, "learning_rate": 7.763565498795851e-06, "loss": 1.8091, "step": 29193500 }, { "epoch": 84.5, "learning_rate": 7.762841851148574e-06, "loss": 1.8186, "step": 29194000 }, { "epoch": 84.51, "learning_rate": 7.762118203501296e-06, "loss": 1.8077, "step": 29194500 }, { "epoch": 84.51, "learning_rate": 7.761394555854022e-06, "loss": 1.8433, "step": 29195000 }, { "epoch": 84.51, "learning_rate": 7.760670908206744e-06, "loss": 1.8217, "step": 29195500 }, { "epoch": 84.51, "learning_rate": 7.759947260559466e-06, "loss": 1.8142, "step": 29196000 }, { "epoch": 84.51, "learning_rate": 7.75922361291219e-06, "loss": 1.8165, "step": 29196500 }, { "epoch": 84.51, "learning_rate": 7.758501412560209e-06, "loss": 1.8181, "step": 29197000 }, { "epoch": 84.51, "learning_rate": 7.757779212208226e-06, "loss": 1.8155, "step": 29197500 }, { "epoch": 84.52, "learning_rate": 7.757055564560948e-06, "loss": 1.8263, "step": 29198000 }, { "epoch": 84.52, "learning_rate": 7.756331916913672e-06, "loss": 1.8061, "step": 29198500 }, { "epoch": 84.52, "learning_rate": 7.755608269266396e-06, "loss": 1.8346, "step": 29199000 }, { "epoch": 84.52, "learning_rate": 7.754884621619118e-06, "loss": 1.8518, "step": 29199500 }, { "epoch": 84.52, "learning_rate": 7.75416097397184e-06, "loss": 1.8204, "step": 29200000 }, { "epoch": 84.52, "learning_rate": 7.753437326324565e-06, "loss": 1.8538, "step": 29200500 }, { "epoch": 84.52, "learning_rate": 7.752715125972584e-06, "loss": 1.8271, "step": 29201000 }, { "epoch": 84.53, "learning_rate": 7.751991478325306e-06, "loss": 1.786, "step": 29201500 }, { "epoch": 84.53, "learning_rate": 7.751267830678028e-06, "loss": 1.811, "step": 29202000 }, { "epoch": 84.53, "learning_rate": 7.750544183030754e-06, "loss": 1.8003, "step": 29202500 }, { "epoch": 84.53, "learning_rate": 7.749820535383476e-06, "loss": 1.8391, "step": 29203000 }, { "epoch": 84.53, "learning_rate": 7.749096887736198e-06, "loss": 1.8234, "step": 29203500 }, { "epoch": 84.53, "learning_rate": 7.748373240088922e-06, "loss": 1.8291, "step": 29204000 }, { "epoch": 84.54, "learning_rate": 7.747649592441646e-06, "loss": 1.833, "step": 29204500 }, { "epoch": 84.54, "learning_rate": 7.746927392089663e-06, "loss": 1.84, "step": 29205000 }, { "epoch": 84.54, "learning_rate": 7.746203744442385e-06, "loss": 1.8181, "step": 29205500 }, { "epoch": 84.54, "learning_rate": 7.74548009679511e-06, "loss": 1.8282, "step": 29206000 }, { "epoch": 84.54, "learning_rate": 7.744756449147833e-06, "loss": 1.8034, "step": 29206500 }, { "epoch": 84.54, "learning_rate": 7.744032801500556e-06, "loss": 1.8447, "step": 29207000 }, { "epoch": 84.54, "learning_rate": 7.743310601148573e-06, "loss": 1.8269, "step": 29207500 }, { "epoch": 84.55, "learning_rate": 7.742586953501298e-06, "loss": 1.8233, "step": 29208000 }, { "epoch": 84.55, "learning_rate": 7.74186330585402e-06, "loss": 1.817, "step": 29208500 }, { "epoch": 84.55, "learning_rate": 7.741139658206743e-06, "loss": 1.8204, "step": 29209000 }, { "epoch": 84.55, "learning_rate": 7.74041745785476e-06, "loss": 1.8364, "step": 29209500 }, { "epoch": 84.55, "learning_rate": 7.739693810207486e-06, "loss": 1.7921, "step": 29210000 }, { "epoch": 84.55, "learning_rate": 7.738970162560208e-06, "loss": 1.8485, "step": 29210500 }, { "epoch": 84.55, "learning_rate": 7.73824651491293e-06, "loss": 1.8265, "step": 29211000 }, { "epoch": 84.56, "learning_rate": 7.737522867265654e-06, "loss": 1.8271, "step": 29211500 }, { "epoch": 84.56, "learning_rate": 7.736800666913673e-06, "loss": 1.8352, "step": 29212000 }, { "epoch": 84.56, "learning_rate": 7.736077019266395e-06, "loss": 1.8136, "step": 29212500 }, { "epoch": 84.56, "learning_rate": 7.735353371619118e-06, "loss": 1.8383, "step": 29213000 }, { "epoch": 84.56, "learning_rate": 7.734629723971843e-06, "loss": 1.8125, "step": 29213500 }, { "epoch": 84.56, "learning_rate": 7.733906076324565e-06, "loss": 1.8322, "step": 29214000 }, { "epoch": 84.56, "learning_rate": 7.733183875972583e-06, "loss": 1.828, "step": 29214500 }, { "epoch": 84.57, "learning_rate": 7.732460228325305e-06, "loss": 1.8233, "step": 29215000 }, { "epoch": 84.57, "learning_rate": 7.73173658067803e-06, "loss": 1.8301, "step": 29215500 }, { "epoch": 84.57, "learning_rate": 7.731012933030753e-06, "loss": 1.8432, "step": 29216000 }, { "epoch": 84.57, "learning_rate": 7.730289285383475e-06, "loss": 1.8482, "step": 29216500 }, { "epoch": 84.57, "learning_rate": 7.729565637736199e-06, "loss": 1.8298, "step": 29217000 }, { "epoch": 84.57, "learning_rate": 7.728843437384218e-06, "loss": 1.8585, "step": 29217500 }, { "epoch": 84.57, "learning_rate": 7.728121237032235e-06, "loss": 1.8561, "step": 29218000 }, { "epoch": 84.58, "learning_rate": 7.727397589384957e-06, "loss": 1.8041, "step": 29218500 }, { "epoch": 84.58, "learning_rate": 7.72667394173768e-06, "loss": 1.8322, "step": 29219000 }, { "epoch": 84.58, "learning_rate": 7.725950294090405e-06, "loss": 1.8121, "step": 29219500 }, { "epoch": 84.58, "learning_rate": 7.725228093738422e-06, "loss": 1.8242, "step": 29220000 }, { "epoch": 84.58, "learning_rate": 7.724504446091145e-06, "loss": 1.8242, "step": 29220500 }, { "epoch": 84.58, "learning_rate": 7.723780798443869e-06, "loss": 1.8104, "step": 29221000 }, { "epoch": 84.58, "learning_rate": 7.723057150796593e-06, "loss": 1.8215, "step": 29221500 }, { "epoch": 84.59, "learning_rate": 7.722333503149315e-06, "loss": 1.8197, "step": 29222000 }, { "epoch": 84.59, "learning_rate": 7.721609855502037e-06, "loss": 1.8121, "step": 29222500 }, { "epoch": 84.59, "learning_rate": 7.720886207854763e-06, "loss": 1.8561, "step": 29223000 }, { "epoch": 84.59, "learning_rate": 7.720162560207485e-06, "loss": 1.8033, "step": 29223500 }, { "epoch": 84.59, "learning_rate": 7.719438912560207e-06, "loss": 1.8424, "step": 29224000 }, { "epoch": 84.59, "learning_rate": 7.718716712208224e-06, "loss": 1.8088, "step": 29224500 }, { "epoch": 84.59, "learning_rate": 7.717994511856243e-06, "loss": 1.8149, "step": 29225000 }, { "epoch": 84.6, "learning_rate": 7.717272311504262e-06, "loss": 1.8243, "step": 29225500 }, { "epoch": 84.6, "learning_rate": 7.716548663856984e-06, "loss": 1.8461, "step": 29226000 }, { "epoch": 84.6, "learning_rate": 7.715825016209707e-06, "loss": 1.8361, "step": 29226500 }, { "epoch": 84.6, "learning_rate": 7.715101368562432e-06, "loss": 1.8211, "step": 29227000 }, { "epoch": 84.6, "learning_rate": 7.714377720915155e-06, "loss": 1.8281, "step": 29227500 }, { "epoch": 84.6, "learning_rate": 7.713654073267877e-06, "loss": 1.8182, "step": 29228000 }, { "epoch": 84.6, "learning_rate": 7.7129304256206e-06, "loss": 1.8266, "step": 29228500 }, { "epoch": 84.61, "learning_rate": 7.712206777973325e-06, "loss": 1.8366, "step": 29229000 }, { "epoch": 84.61, "learning_rate": 7.711484577621342e-06, "loss": 1.8279, "step": 29229500 }, { "epoch": 84.61, "learning_rate": 7.710760929974064e-06, "loss": 1.8505, "step": 29230000 }, { "epoch": 84.61, "learning_rate": 7.710037282326788e-06, "loss": 1.8142, "step": 29230500 }, { "epoch": 84.61, "learning_rate": 7.709313634679512e-06, "loss": 1.8213, "step": 29231000 }, { "epoch": 84.61, "learning_rate": 7.708589987032234e-06, "loss": 1.8358, "step": 29231500 }, { "epoch": 84.61, "learning_rate": 7.707866339384958e-06, "loss": 1.8623, "step": 29232000 }, { "epoch": 84.62, "learning_rate": 7.707142691737682e-06, "loss": 1.8453, "step": 29232500 }, { "epoch": 84.62, "learning_rate": 7.706419044090404e-06, "loss": 1.7991, "step": 29233000 }, { "epoch": 84.62, "learning_rate": 7.705695396443127e-06, "loss": 1.8315, "step": 29233500 }, { "epoch": 84.62, "learning_rate": 7.70497174879585e-06, "loss": 1.8272, "step": 29234000 }, { "epoch": 84.62, "learning_rate": 7.704248101148574e-06, "loss": 1.8159, "step": 29234500 }, { "epoch": 84.62, "learning_rate": 7.703524453501297e-06, "loss": 1.8092, "step": 29235000 }, { "epoch": 84.62, "learning_rate": 7.70280080585402e-06, "loss": 1.8299, "step": 29235500 }, { "epoch": 84.63, "learning_rate": 7.702078605502038e-06, "loss": 1.8387, "step": 29236000 }, { "epoch": 84.63, "learning_rate": 7.701354957854762e-06, "loss": 1.871, "step": 29236500 }, { "epoch": 84.63, "learning_rate": 7.700631310207484e-06, "loss": 1.8289, "step": 29237000 }, { "epoch": 84.63, "learning_rate": 7.699907662560208e-06, "loss": 1.8126, "step": 29237500 }, { "epoch": 84.63, "learning_rate": 7.699184014912932e-06, "loss": 1.827, "step": 29238000 }, { "epoch": 84.63, "learning_rate": 7.698460367265654e-06, "loss": 1.8427, "step": 29238500 }, { "epoch": 84.63, "learning_rate": 7.697736719618378e-06, "loss": 1.806, "step": 29239000 }, { "epoch": 84.64, "learning_rate": 7.697013071971102e-06, "loss": 1.8305, "step": 29239500 }, { "epoch": 84.64, "learning_rate": 7.69629087161912e-06, "loss": 1.8037, "step": 29240000 }, { "epoch": 84.64, "learning_rate": 7.695567223971841e-06, "loss": 1.7975, "step": 29240500 }, { "epoch": 84.64, "learning_rate": 7.694846470915154e-06, "loss": 1.8386, "step": 29241000 }, { "epoch": 84.64, "learning_rate": 7.694122823267878e-06, "loss": 1.8063, "step": 29241500 }, { "epoch": 84.64, "learning_rate": 7.693399175620602e-06, "loss": 1.7979, "step": 29242000 }, { "epoch": 84.65, "learning_rate": 7.692675527973324e-06, "loss": 1.8127, "step": 29242500 }, { "epoch": 84.65, "learning_rate": 7.691951880326046e-06, "loss": 1.8153, "step": 29243000 }, { "epoch": 84.65, "learning_rate": 7.69122823267877e-06, "loss": 1.8214, "step": 29243500 }, { "epoch": 84.65, "learning_rate": 7.690504585031494e-06, "loss": 1.8272, "step": 29244000 }, { "epoch": 84.65, "learning_rate": 7.689780937384216e-06, "loss": 1.8219, "step": 29244500 }, { "epoch": 84.65, "learning_rate": 7.689058737032235e-06, "loss": 1.8272, "step": 29245000 }, { "epoch": 84.65, "learning_rate": 7.688335089384959e-06, "loss": 1.8479, "step": 29245500 }, { "epoch": 84.66, "learning_rate": 7.687611441737681e-06, "loss": 1.8487, "step": 29246000 }, { "epoch": 84.66, "learning_rate": 7.686887794090403e-06, "loss": 1.8267, "step": 29246500 }, { "epoch": 84.66, "learning_rate": 7.686164146443127e-06, "loss": 1.8073, "step": 29247000 }, { "epoch": 84.66, "learning_rate": 7.685440498795851e-06, "loss": 1.8273, "step": 29247500 }, { "epoch": 84.66, "learning_rate": 7.684716851148574e-06, "loss": 1.8574, "step": 29248000 }, { "epoch": 84.66, "learning_rate": 7.683993203501297e-06, "loss": 1.8032, "step": 29248500 }, { "epoch": 84.66, "learning_rate": 7.683271003149315e-06, "loss": 1.8217, "step": 29249000 }, { "epoch": 84.67, "learning_rate": 7.682547355502039e-06, "loss": 1.8175, "step": 29249500 }, { "epoch": 84.67, "learning_rate": 7.681825155150056e-06, "loss": 1.8106, "step": 29250000 }, { "epoch": 84.67, "learning_rate": 7.68110150750278e-06, "loss": 1.7981, "step": 29250500 }, { "epoch": 84.67, "learning_rate": 7.680379307150797e-06, "loss": 1.8341, "step": 29251000 }, { "epoch": 84.67, "learning_rate": 7.679655659503521e-06, "loss": 1.8391, "step": 29251500 }, { "epoch": 84.67, "learning_rate": 7.678932011856243e-06, "loss": 1.8136, "step": 29252000 }, { "epoch": 84.67, "learning_rate": 7.678208364208967e-06, "loss": 1.82, "step": 29252500 }, { "epoch": 84.68, "learning_rate": 7.677486163856984e-06, "loss": 1.8341, "step": 29253000 }, { "epoch": 84.68, "learning_rate": 7.676762516209708e-06, "loss": 1.8002, "step": 29253500 }, { "epoch": 84.68, "learning_rate": 7.67603886856243e-06, "loss": 1.8595, "step": 29254000 }, { "epoch": 84.68, "learning_rate": 7.675315220915154e-06, "loss": 1.81, "step": 29254500 }, { "epoch": 84.68, "learning_rate": 7.674591573267878e-06, "loss": 1.7963, "step": 29255000 }, { "epoch": 84.68, "learning_rate": 7.6738679256206e-06, "loss": 1.8266, "step": 29255500 }, { "epoch": 84.68, "learning_rate": 7.673144277973323e-06, "loss": 1.8315, "step": 29256000 }, { "epoch": 84.69, "learning_rate": 7.672420630326047e-06, "loss": 1.8189, "step": 29256500 }, { "epoch": 84.69, "learning_rate": 7.67169698267877e-06, "loss": 1.8372, "step": 29257000 }, { "epoch": 84.69, "learning_rate": 7.670973335031493e-06, "loss": 1.8386, "step": 29257500 }, { "epoch": 84.69, "learning_rate": 7.670249687384217e-06, "loss": 1.8245, "step": 29258000 }, { "epoch": 84.69, "learning_rate": 7.66952603973694e-06, "loss": 1.828, "step": 29258500 }, { "epoch": 84.69, "learning_rate": 7.668803839384958e-06, "loss": 1.8017, "step": 29259000 }, { "epoch": 84.69, "learning_rate": 7.668081639032975e-06, "loss": 1.8005, "step": 29259500 }, { "epoch": 84.7, "learning_rate": 7.6673579913857e-06, "loss": 1.8156, "step": 29260000 }, { "epoch": 84.7, "learning_rate": 7.666634343738421e-06, "loss": 1.8186, "step": 29260500 }, { "epoch": 84.7, "learning_rate": 7.665910696091145e-06, "loss": 1.8255, "step": 29261000 }, { "epoch": 84.7, "learning_rate": 7.665187048443868e-06, "loss": 1.7977, "step": 29261500 }, { "epoch": 84.7, "learning_rate": 7.664463400796592e-06, "loss": 1.8167, "step": 29262000 }, { "epoch": 84.7, "learning_rate": 7.663741200444609e-06, "loss": 1.8199, "step": 29262500 }, { "epoch": 84.7, "learning_rate": 7.663017552797333e-06, "loss": 1.8223, "step": 29263000 }, { "epoch": 84.71, "learning_rate": 7.662293905150057e-06, "loss": 1.8269, "step": 29263500 }, { "epoch": 84.71, "learning_rate": 7.661571704798074e-06, "loss": 1.806, "step": 29264000 }, { "epoch": 84.71, "learning_rate": 7.660848057150798e-06, "loss": 1.8191, "step": 29264500 }, { "epoch": 84.71, "learning_rate": 7.66012440950352e-06, "loss": 1.8106, "step": 29265000 }, { "epoch": 84.71, "learning_rate": 7.659400761856244e-06, "loss": 1.8385, "step": 29265500 }, { "epoch": 84.71, "learning_rate": 7.658678561504261e-06, "loss": 1.8143, "step": 29266000 }, { "epoch": 84.71, "learning_rate": 7.657954913856985e-06, "loss": 1.793, "step": 29266500 }, { "epoch": 84.72, "learning_rate": 7.657231266209707e-06, "loss": 1.8201, "step": 29267000 }, { "epoch": 84.72, "learning_rate": 7.656507618562431e-06, "loss": 1.8259, "step": 29267500 }, { "epoch": 84.72, "learning_rate": 7.655783970915154e-06, "loss": 1.8215, "step": 29268000 }, { "epoch": 84.72, "learning_rate": 7.655060323267878e-06, "loss": 1.8272, "step": 29268500 }, { "epoch": 84.72, "learning_rate": 7.6543366756206e-06, "loss": 1.8317, "step": 29269000 }, { "epoch": 84.72, "learning_rate": 7.653613027973324e-06, "loss": 1.841, "step": 29269500 }, { "epoch": 84.72, "learning_rate": 7.652889380326048e-06, "loss": 1.8171, "step": 29270000 }, { "epoch": 84.73, "learning_rate": 7.65216573267877e-06, "loss": 1.8262, "step": 29270500 }, { "epoch": 84.73, "learning_rate": 7.651443532326789e-06, "loss": 1.8319, "step": 29271000 }, { "epoch": 84.73, "learning_rate": 7.650721331974806e-06, "loss": 1.8117, "step": 29271500 }, { "epoch": 84.73, "learning_rate": 7.649997684327528e-06, "loss": 1.8237, "step": 29272000 }, { "epoch": 84.73, "learning_rate": 7.649274036680252e-06, "loss": 1.8273, "step": 29272500 }, { "epoch": 84.73, "learning_rate": 7.648550389032976e-06, "loss": 1.8363, "step": 29273000 }, { "epoch": 84.73, "learning_rate": 7.647826741385698e-06, "loss": 1.8375, "step": 29273500 }, { "epoch": 84.74, "learning_rate": 7.647103093738422e-06, "loss": 1.8232, "step": 29274000 }, { "epoch": 84.74, "learning_rate": 7.646379446091145e-06, "loss": 1.8189, "step": 29274500 }, { "epoch": 84.74, "learning_rate": 7.645655798443868e-06, "loss": 1.8423, "step": 29275000 }, { "epoch": 84.74, "learning_rate": 7.644933598091886e-06, "loss": 1.8232, "step": 29275500 }, { "epoch": 84.74, "learning_rate": 7.64420995044461e-06, "loss": 1.8277, "step": 29276000 }, { "epoch": 84.74, "learning_rate": 7.643486302797334e-06, "loss": 1.8022, "step": 29276500 }, { "epoch": 84.74, "learning_rate": 7.642762655150056e-06, "loss": 1.8035, "step": 29277000 }, { "epoch": 84.75, "learning_rate": 7.64203900750278e-06, "loss": 1.8501, "step": 29277500 }, { "epoch": 84.75, "learning_rate": 7.641315359855502e-06, "loss": 1.8232, "step": 29278000 }, { "epoch": 84.75, "learning_rate": 7.640594606798814e-06, "loss": 1.8131, "step": 29278500 }, { "epoch": 84.75, "learning_rate": 7.639870959151538e-06, "loss": 1.828, "step": 29279000 }, { "epoch": 84.75, "learning_rate": 7.63914731150426e-06, "loss": 1.8332, "step": 29279500 }, { "epoch": 84.75, "learning_rate": 7.638423663856984e-06, "loss": 1.8343, "step": 29280000 }, { "epoch": 84.76, "learning_rate": 7.637700016209708e-06, "loss": 1.8334, "step": 29280500 }, { "epoch": 84.76, "learning_rate": 7.63697636856243e-06, "loss": 1.82, "step": 29281000 }, { "epoch": 84.76, "learning_rate": 7.636252720915154e-06, "loss": 1.8198, "step": 29281500 }, { "epoch": 84.76, "learning_rate": 7.635529073267878e-06, "loss": 1.8298, "step": 29282000 }, { "epoch": 84.76, "learning_rate": 7.634806872915896e-06, "loss": 1.8418, "step": 29282500 }, { "epoch": 84.76, "learning_rate": 7.634084672563913e-06, "loss": 1.8542, "step": 29283000 }, { "epoch": 84.76, "learning_rate": 7.633361024916637e-06, "loss": 1.8269, "step": 29283500 }, { "epoch": 84.77, "learning_rate": 7.632637377269359e-06, "loss": 1.8633, "step": 29284000 }, { "epoch": 84.77, "learning_rate": 7.631913729622083e-06, "loss": 1.8108, "step": 29284500 }, { "epoch": 84.77, "learning_rate": 7.631190081974805e-06, "loss": 1.8127, "step": 29285000 }, { "epoch": 84.77, "learning_rate": 7.630466434327529e-06, "loss": 1.8271, "step": 29285500 }, { "epoch": 84.77, "learning_rate": 7.629742786680253e-06, "loss": 1.8562, "step": 29286000 }, { "epoch": 84.77, "learning_rate": 7.629019139032975e-06, "loss": 1.8253, "step": 29286500 }, { "epoch": 84.77, "learning_rate": 7.628295491385699e-06, "loss": 1.8374, "step": 29287000 }, { "epoch": 84.78, "learning_rate": 7.627573291033717e-06, "loss": 1.8113, "step": 29287500 }, { "epoch": 84.78, "learning_rate": 7.6268496433864395e-06, "loss": 1.8171, "step": 29288000 }, { "epoch": 84.78, "learning_rate": 7.6261259957391625e-06, "loss": 1.851, "step": 29288500 }, { "epoch": 84.78, "learning_rate": 7.6254023480918865e-06, "loss": 1.8268, "step": 29289000 }, { "epoch": 84.78, "learning_rate": 7.6246787004446096e-06, "loss": 1.8049, "step": 29289500 }, { "epoch": 84.78, "learning_rate": 7.623956500092627e-06, "loss": 1.8095, "step": 29290000 }, { "epoch": 84.78, "learning_rate": 7.62323285244535e-06, "loss": 1.8019, "step": 29290500 }, { "epoch": 84.79, "learning_rate": 7.622509204798074e-06, "loss": 1.8253, "step": 29291000 }, { "epoch": 84.79, "learning_rate": 7.621785557150797e-06, "loss": 1.8327, "step": 29291500 }, { "epoch": 84.79, "learning_rate": 7.62106190950352e-06, "loss": 1.817, "step": 29292000 }, { "epoch": 84.79, "learning_rate": 7.620339709151537e-06, "loss": 1.8123, "step": 29292500 }, { "epoch": 84.79, "learning_rate": 7.619616061504262e-06, "loss": 1.8011, "step": 29293000 }, { "epoch": 84.79, "learning_rate": 7.618892413856984e-06, "loss": 1.8252, "step": 29293500 }, { "epoch": 84.79, "learning_rate": 7.618168766209707e-06, "loss": 1.8153, "step": 29294000 }, { "epoch": 84.8, "learning_rate": 7.6174465658577246e-06, "loss": 1.8532, "step": 29294500 }, { "epoch": 84.8, "learning_rate": 7.616722918210449e-06, "loss": 1.7922, "step": 29295000 }, { "epoch": 84.8, "learning_rate": 7.6159992705631716e-06, "loss": 1.8177, "step": 29295500 }, { "epoch": 84.8, "learning_rate": 7.615275622915895e-06, "loss": 1.8176, "step": 29296000 }, { "epoch": 84.8, "learning_rate": 7.614551975268619e-06, "loss": 1.8443, "step": 29296500 }, { "epoch": 84.8, "learning_rate": 7.613828327621342e-06, "loss": 1.8137, "step": 29297000 }, { "epoch": 84.8, "learning_rate": 7.61310612726936e-06, "loss": 1.8122, "step": 29297500 }, { "epoch": 84.81, "learning_rate": 7.612382479622082e-06, "loss": 1.8151, "step": 29298000 }, { "epoch": 84.81, "learning_rate": 7.6116602792701e-06, "loss": 1.8338, "step": 29298500 }, { "epoch": 84.81, "learning_rate": 7.610936631622824e-06, "loss": 1.8218, "step": 29299000 }, { "epoch": 84.81, "learning_rate": 7.610212983975547e-06, "loss": 1.8038, "step": 29299500 }, { "epoch": 84.81, "learning_rate": 7.609489336328269e-06, "loss": 1.8173, "step": 29300000 }, { "epoch": 84.81, "learning_rate": 7.608765688680994e-06, "loss": 1.8502, "step": 29300500 }, { "epoch": 84.81, "learning_rate": 7.608042041033716e-06, "loss": 1.8228, "step": 29301000 }, { "epoch": 84.82, "learning_rate": 7.607318393386439e-06, "loss": 1.8427, "step": 29301500 }, { "epoch": 84.82, "learning_rate": 7.606594745739163e-06, "loss": 1.8015, "step": 29302000 }, { "epoch": 84.82, "learning_rate": 7.6058710980918864e-06, "loss": 1.8021, "step": 29302500 }, { "epoch": 84.82, "learning_rate": 7.6051474504446095e-06, "loss": 1.8139, "step": 29303000 }, { "epoch": 84.82, "learning_rate": 7.604425250092627e-06, "loss": 1.8322, "step": 29303500 }, { "epoch": 84.82, "learning_rate": 7.6037016024453515e-06, "loss": 1.8041, "step": 29304000 }, { "epoch": 84.82, "learning_rate": 7.602977954798074e-06, "loss": 1.8178, "step": 29304500 }, { "epoch": 84.83, "learning_rate": 7.602254307150797e-06, "loss": 1.8123, "step": 29305000 }, { "epoch": 84.83, "learning_rate": 7.601532106798814e-06, "loss": 1.8283, "step": 29305500 }, { "epoch": 84.83, "learning_rate": 7.600808459151539e-06, "loss": 1.8532, "step": 29306000 }, { "epoch": 84.83, "learning_rate": 7.600084811504261e-06, "loss": 1.8219, "step": 29306500 }, { "epoch": 84.83, "learning_rate": 7.599361163856984e-06, "loss": 1.8496, "step": 29307000 }, { "epoch": 84.83, "learning_rate": 7.598637516209707e-06, "loss": 1.7974, "step": 29307500 }, { "epoch": 84.83, "learning_rate": 7.597913868562431e-06, "loss": 1.8445, "step": 29308000 }, { "epoch": 84.84, "learning_rate": 7.597190220915154e-06, "loss": 1.8341, "step": 29308500 }, { "epoch": 84.84, "learning_rate": 7.5964665732678765e-06, "loss": 1.7889, "step": 29309000 }, { "epoch": 84.84, "learning_rate": 7.595742925620601e-06, "loss": 1.8195, "step": 29309500 }, { "epoch": 84.84, "learning_rate": 7.5950207252686185e-06, "loss": 1.8328, "step": 29310000 }, { "epoch": 84.84, "learning_rate": 7.594297077621342e-06, "loss": 1.8185, "step": 29310500 }, { "epoch": 84.84, "learning_rate": 7.593574877269359e-06, "loss": 1.8046, "step": 29311000 }, { "epoch": 84.84, "learning_rate": 7.592851229622084e-06, "loss": 1.83, "step": 29311500 }, { "epoch": 84.85, "learning_rate": 7.592127581974806e-06, "loss": 1.8248, "step": 29312000 }, { "epoch": 84.85, "learning_rate": 7.591403934327529e-06, "loss": 1.8357, "step": 29312500 }, { "epoch": 84.85, "learning_rate": 7.590680286680252e-06, "loss": 1.8083, "step": 29313000 }, { "epoch": 84.85, "learning_rate": 7.589958086328271e-06, "loss": 1.8002, "step": 29313500 }, { "epoch": 84.85, "learning_rate": 7.589234438680993e-06, "loss": 1.814, "step": 29314000 }, { "epoch": 84.85, "learning_rate": 7.588510791033716e-06, "loss": 1.7825, "step": 29314500 }, { "epoch": 84.85, "learning_rate": 7.587787143386439e-06, "loss": 1.8282, "step": 29315000 }, { "epoch": 84.86, "learning_rate": 7.587064943034458e-06, "loss": 1.8848, "step": 29315500 }, { "epoch": 84.86, "learning_rate": 7.586341295387181e-06, "loss": 1.8334, "step": 29316000 }, { "epoch": 84.86, "learning_rate": 7.585617647739904e-06, "loss": 1.8289, "step": 29316500 }, { "epoch": 84.86, "learning_rate": 7.584894000092627e-06, "loss": 1.8118, "step": 29317000 }, { "epoch": 84.86, "learning_rate": 7.584170352445351e-06, "loss": 1.8372, "step": 29317500 }, { "epoch": 84.86, "learning_rate": 7.583446704798074e-06, "loss": 1.7997, "step": 29318000 }, { "epoch": 84.87, "learning_rate": 7.582723057150797e-06, "loss": 1.8088, "step": 29318500 }, { "epoch": 84.87, "learning_rate": 7.581999409503521e-06, "loss": 1.8175, "step": 29319000 }, { "epoch": 84.87, "learning_rate": 7.581275761856244e-06, "loss": 1.8289, "step": 29319500 }, { "epoch": 84.87, "learning_rate": 7.580553561504261e-06, "loss": 1.8459, "step": 29320000 }, { "epoch": 84.87, "learning_rate": 7.579829913856984e-06, "loss": 1.7984, "step": 29320500 }, { "epoch": 84.87, "learning_rate": 7.579106266209708e-06, "loss": 1.8319, "step": 29321000 }, { "epoch": 84.87, "learning_rate": 7.578382618562431e-06, "loss": 1.8084, "step": 29321500 }, { "epoch": 84.88, "learning_rate": 7.577660418210448e-06, "loss": 1.8325, "step": 29322000 }, { "epoch": 84.88, "learning_rate": 7.5769367705631715e-06, "loss": 1.8184, "step": 29322500 }, { "epoch": 84.88, "learning_rate": 7.576213122915895e-06, "loss": 1.8088, "step": 29323000 }, { "epoch": 84.88, "learning_rate": 7.5754894752686185e-06, "loss": 1.8291, "step": 29323500 }, { "epoch": 84.88, "learning_rate": 7.5747658276213416e-06, "loss": 1.8144, "step": 29324000 }, { "epoch": 84.88, "learning_rate": 7.5740421799740655e-06, "loss": 1.8423, "step": 29324500 }, { "epoch": 84.88, "learning_rate": 7.5733185323267886e-06, "loss": 1.8151, "step": 29325000 }, { "epoch": 84.89, "learning_rate": 7.572594884679511e-06, "loss": 1.8248, "step": 29325500 }, { "epoch": 84.89, "learning_rate": 7.571872684327529e-06, "loss": 1.8201, "step": 29326000 }, { "epoch": 84.89, "learning_rate": 7.571149036680253e-06, "loss": 1.8229, "step": 29326500 }, { "epoch": 84.89, "learning_rate": 7.570425389032976e-06, "loss": 1.8146, "step": 29327000 }, { "epoch": 84.89, "learning_rate": 7.569701741385698e-06, "loss": 1.83, "step": 29327500 }, { "epoch": 84.89, "learning_rate": 7.568979541033716e-06, "loss": 1.8171, "step": 29328000 }, { "epoch": 84.89, "learning_rate": 7.56825589338644e-06, "loss": 1.7998, "step": 29328500 }, { "epoch": 84.9, "learning_rate": 7.567532245739163e-06, "loss": 1.7945, "step": 29329000 }, { "epoch": 84.9, "learning_rate": 7.566808598091886e-06, "loss": 1.8038, "step": 29329500 }, { "epoch": 84.9, "learning_rate": 7.5660849504446086e-06, "loss": 1.8233, "step": 29330000 }, { "epoch": 84.9, "learning_rate": 7.5653627500926275e-06, "loss": 1.8069, "step": 29330500 }, { "epoch": 84.9, "learning_rate": 7.564639102445351e-06, "loss": 1.7961, "step": 29331000 }, { "epoch": 84.9, "learning_rate": 7.563915454798074e-06, "loss": 1.8026, "step": 29331500 }, { "epoch": 84.9, "learning_rate": 7.563191807150798e-06, "loss": 1.8146, "step": 29332000 }, { "epoch": 84.91, "learning_rate": 7.562468159503521e-06, "loss": 1.8605, "step": 29332500 }, { "epoch": 84.91, "learning_rate": 7.561744511856243e-06, "loss": 1.8249, "step": 29333000 }, { "epoch": 84.91, "learning_rate": 7.561020864208966e-06, "loss": 1.8226, "step": 29333500 }, { "epoch": 84.91, "learning_rate": 7.56029721656169e-06, "loss": 1.8238, "step": 29334000 }, { "epoch": 84.91, "learning_rate": 7.559575016209708e-06, "loss": 1.8341, "step": 29334500 }, { "epoch": 84.91, "learning_rate": 7.558851368562431e-06, "loss": 1.8206, "step": 29335000 }, { "epoch": 84.91, "learning_rate": 7.558127720915153e-06, "loss": 1.8295, "step": 29335500 }, { "epoch": 84.92, "learning_rate": 7.557404073267878e-06, "loss": 1.7973, "step": 29336000 }, { "epoch": 84.92, "learning_rate": 7.5566804256206e-06, "loss": 1.8149, "step": 29336500 }, { "epoch": 84.92, "learning_rate": 7.5559582252686184e-06, "loss": 1.8196, "step": 29337000 }, { "epoch": 84.92, "learning_rate": 7.555234577621341e-06, "loss": 1.8167, "step": 29337500 }, { "epoch": 84.92, "learning_rate": 7.5545109299740654e-06, "loss": 1.7899, "step": 29338000 }, { "epoch": 84.92, "learning_rate": 7.553787282326788e-06, "loss": 1.8071, "step": 29338500 }, { "epoch": 84.92, "learning_rate": 7.553063634679511e-06, "loss": 1.8094, "step": 29339000 }, { "epoch": 84.93, "learning_rate": 7.552339987032235e-06, "loss": 1.7853, "step": 29339500 }, { "epoch": 84.93, "learning_rate": 7.551617786680253e-06, "loss": 1.8053, "step": 29340000 }, { "epoch": 84.93, "learning_rate": 7.550894139032976e-06, "loss": 1.8471, "step": 29340500 }, { "epoch": 84.93, "learning_rate": 7.550171938680993e-06, "loss": 1.8304, "step": 29341000 }, { "epoch": 84.93, "learning_rate": 7.549448291033717e-06, "loss": 1.8371, "step": 29341500 }, { "epoch": 84.93, "learning_rate": 7.54872464338644e-06, "loss": 1.8137, "step": 29342000 }, { "epoch": 84.93, "learning_rate": 7.548000995739163e-06, "loss": 1.8125, "step": 29342500 }, { "epoch": 84.94, "learning_rate": 7.547277348091885e-06, "loss": 1.8112, "step": 29343000 }, { "epoch": 84.94, "learning_rate": 7.54655370044461e-06, "loss": 1.7889, "step": 29343500 }, { "epoch": 84.94, "learning_rate": 7.5458300527973324e-06, "loss": 1.8164, "step": 29344000 }, { "epoch": 84.94, "learning_rate": 7.5451064051500555e-06, "loss": 1.829, "step": 29344500 }, { "epoch": 84.94, "learning_rate": 7.5443827575027794e-06, "loss": 1.8108, "step": 29345000 }, { "epoch": 84.94, "learning_rate": 7.5436591098555025e-06, "loss": 1.8138, "step": 29345500 }, { "epoch": 84.94, "learning_rate": 7.542935462208226e-06, "loss": 1.8252, "step": 29346000 }, { "epoch": 84.95, "learning_rate": 7.542213261856243e-06, "loss": 1.8213, "step": 29346500 }, { "epoch": 84.95, "learning_rate": 7.541489614208968e-06, "loss": 1.8217, "step": 29347000 }, { "epoch": 84.95, "learning_rate": 7.54076596656169e-06, "loss": 1.8483, "step": 29347500 }, { "epoch": 84.95, "learning_rate": 7.540042318914413e-06, "loss": 1.8394, "step": 29348000 }, { "epoch": 84.95, "learning_rate": 7.539318671267136e-06, "loss": 1.8231, "step": 29348500 }, { "epoch": 84.95, "learning_rate": 7.538596470915155e-06, "loss": 1.8196, "step": 29349000 }, { "epoch": 84.95, "learning_rate": 7.537872823267877e-06, "loss": 1.8159, "step": 29349500 }, { "epoch": 84.96, "learning_rate": 7.5371491756206e-06, "loss": 1.8034, "step": 29350000 }, { "epoch": 84.96, "learning_rate": 7.536425527973323e-06, "loss": 1.8236, "step": 29350500 }, { "epoch": 84.96, "learning_rate": 7.535703327621342e-06, "loss": 1.8094, "step": 29351000 }, { "epoch": 84.96, "learning_rate": 7.5349796799740645e-06, "loss": 1.8445, "step": 29351500 }, { "epoch": 84.96, "learning_rate": 7.534256032326788e-06, "loss": 1.7982, "step": 29352000 }, { "epoch": 84.96, "learning_rate": 7.533533831974805e-06, "loss": 1.823, "step": 29352500 }, { "epoch": 84.96, "learning_rate": 7.53281018432753e-06, "loss": 1.8223, "step": 29353000 }, { "epoch": 84.97, "learning_rate": 7.532086536680253e-06, "loss": 1.8225, "step": 29353500 }, { "epoch": 84.97, "learning_rate": 7.531362889032975e-06, "loss": 1.8303, "step": 29354000 }, { "epoch": 84.97, "learning_rate": 7.5306392413857e-06, "loss": 1.8214, "step": 29354500 }, { "epoch": 84.97, "learning_rate": 7.529915593738422e-06, "loss": 1.8319, "step": 29355000 }, { "epoch": 84.97, "learning_rate": 7.52919339338644e-06, "loss": 1.8083, "step": 29355500 }, { "epoch": 84.97, "learning_rate": 7.528469745739162e-06, "loss": 1.8144, "step": 29356000 }, { "epoch": 84.98, "learning_rate": 7.527746098091887e-06, "loss": 1.7986, "step": 29356500 }, { "epoch": 84.98, "learning_rate": 7.527022450444609e-06, "loss": 1.8313, "step": 29357000 }, { "epoch": 84.98, "learning_rate": 7.526298802797332e-06, "loss": 1.8424, "step": 29357500 }, { "epoch": 84.98, "learning_rate": 7.5255751551500555e-06, "loss": 1.8297, "step": 29358000 }, { "epoch": 84.98, "learning_rate": 7.524851507502779e-06, "loss": 1.8413, "step": 29358500 }, { "epoch": 84.98, "learning_rate": 7.524129307150797e-06, "loss": 1.8378, "step": 29359000 }, { "epoch": 84.98, "learning_rate": 7.52340565950352e-06, "loss": 1.8011, "step": 29359500 }, { "epoch": 84.99, "learning_rate": 7.5226820118562445e-06, "loss": 1.8068, "step": 29360000 }, { "epoch": 84.99, "learning_rate": 7.521959811504262e-06, "loss": 1.8668, "step": 29360500 }, { "epoch": 84.99, "learning_rate": 7.521236163856985e-06, "loss": 1.8227, "step": 29361000 }, { "epoch": 84.99, "learning_rate": 7.520512516209707e-06, "loss": 1.815, "step": 29361500 }, { "epoch": 84.99, "learning_rate": 7.519788868562432e-06, "loss": 1.8104, "step": 29362000 }, { "epoch": 84.99, "learning_rate": 7.519065220915154e-06, "loss": 1.8364, "step": 29362500 }, { "epoch": 84.99, "learning_rate": 7.518341573267877e-06, "loss": 1.813, "step": 29363000 }, { "epoch": 85.0, "learning_rate": 7.5176179256206e-06, "loss": 1.8539, "step": 29363500 }, { "epoch": 85.0, "learning_rate": 7.516894277973324e-06, "loss": 1.8499, "step": 29364000 }, { "epoch": 85.0, "learning_rate": 7.516170630326047e-06, "loss": 1.8189, "step": 29364500 }, { "epoch": 85.0, "learning_rate": 7.51544698267877e-06, "loss": 1.8014, "step": 29365000 }, { "epoch": 85.0, "eval_accuracy": 0.6880164121352734, "eval_accuracy_mlm": 0.6568216000283631, "eval_accuracy_nsp": 0.8553665791294863, "eval_loss": 2.2023632526397705, "eval_runtime": 331.6134, "eval_samples_per_second": 1315.948, "eval_steps_per_second": 54.832, "step": 29365120 }, { "epoch": 85.0, "learning_rate": 7.514723335031494e-06, "loss": 1.8343, "step": 29365500 }, { "epoch": 85.0, "learning_rate": 7.5140025819748065e-06, "loss": 1.8123, "step": 29366000 }, { "epoch": 85.0, "learning_rate": 7.51327893432753e-06, "loss": 1.8284, "step": 29366500 }, { "epoch": 85.01, "learning_rate": 7.512555286680252e-06, "loss": 1.7882, "step": 29367000 }, { "epoch": 85.01, "learning_rate": 7.511831639032975e-06, "loss": 1.7784, "step": 29367500 }, { "epoch": 85.01, "learning_rate": 7.511107991385699e-06, "loss": 1.8092, "step": 29368000 }, { "epoch": 85.01, "learning_rate": 7.510384343738422e-06, "loss": 1.8391, "step": 29368500 }, { "epoch": 85.01, "learning_rate": 7.509660696091145e-06, "loss": 1.8096, "step": 29369000 }, { "epoch": 85.01, "learning_rate": 7.508937048443869e-06, "loss": 1.7975, "step": 29369500 }, { "epoch": 85.01, "learning_rate": 7.508213400796592e-06, "loss": 1.8003, "step": 29370000 }, { "epoch": 85.02, "learning_rate": 7.507489753149314e-06, "loss": 1.7968, "step": 29370500 }, { "epoch": 85.02, "learning_rate": 7.506766105502037e-06, "loss": 1.7995, "step": 29371000 }, { "epoch": 85.02, "learning_rate": 7.506042457854762e-06, "loss": 1.8169, "step": 29371500 }, { "epoch": 85.02, "learning_rate": 7.505318810207484e-06, "loss": 1.8182, "step": 29372000 }, { "epoch": 85.02, "learning_rate": 7.504595162560207e-06, "loss": 1.822, "step": 29372500 }, { "epoch": 85.02, "learning_rate": 7.503872962208226e-06, "loss": 1.8308, "step": 29373000 }, { "epoch": 85.02, "learning_rate": 7.5031493145609494e-06, "loss": 1.8135, "step": 29373500 }, { "epoch": 85.03, "learning_rate": 7.502427114208967e-06, "loss": 1.8089, "step": 29374000 }, { "epoch": 85.03, "learning_rate": 7.50170346656169e-06, "loss": 1.7905, "step": 29374500 }, { "epoch": 85.03, "learning_rate": 7.500979818914414e-06, "loss": 1.8199, "step": 29375000 }, { "epoch": 85.03, "learning_rate": 7.500256171267137e-06, "loss": 1.8124, "step": 29375500 }, { "epoch": 85.03, "learning_rate": 7.499532523619859e-06, "loss": 1.8014, "step": 29376000 }, { "epoch": 85.03, "learning_rate": 7.498808875972582e-06, "loss": 1.8236, "step": 29376500 }, { "epoch": 85.03, "learning_rate": 7.498085228325306e-06, "loss": 1.8074, "step": 29377000 }, { "epoch": 85.04, "learning_rate": 7.497361580678029e-06, "loss": 1.7947, "step": 29377500 }, { "epoch": 85.04, "learning_rate": 7.496637933030752e-06, "loss": 1.7856, "step": 29378000 }, { "epoch": 85.04, "learning_rate": 7.495914285383476e-06, "loss": 1.7997, "step": 29378500 }, { "epoch": 85.04, "learning_rate": 7.495190637736199e-06, "loss": 1.7985, "step": 29379000 }, { "epoch": 85.04, "learning_rate": 7.494466990088922e-06, "loss": 1.7922, "step": 29379500 }, { "epoch": 85.04, "learning_rate": 7.4937433424416445e-06, "loss": 1.7872, "step": 29380000 }, { "epoch": 85.04, "learning_rate": 7.4930225893849584e-06, "loss": 1.7991, "step": 29380500 }, { "epoch": 85.05, "learning_rate": 7.492300389032976e-06, "loss": 1.8179, "step": 29381000 }, { "epoch": 85.05, "learning_rate": 7.491578188680994e-06, "loss": 1.7883, "step": 29381500 }, { "epoch": 85.05, "learning_rate": 7.490854541033716e-06, "loss": 1.8012, "step": 29382000 }, { "epoch": 85.05, "learning_rate": 7.490130893386439e-06, "loss": 1.8042, "step": 29382500 }, { "epoch": 85.05, "learning_rate": 7.489407245739163e-06, "loss": 1.831, "step": 29383000 }, { "epoch": 85.05, "learning_rate": 7.488683598091886e-06, "loss": 1.7931, "step": 29383500 }, { "epoch": 85.05, "learning_rate": 7.487959950444609e-06, "loss": 1.817, "step": 29384000 }, { "epoch": 85.06, "learning_rate": 7.487236302797333e-06, "loss": 1.7861, "step": 29384500 }, { "epoch": 85.06, "learning_rate": 7.486512655150056e-06, "loss": 1.805, "step": 29385000 }, { "epoch": 85.06, "learning_rate": 7.485789007502779e-06, "loss": 1.8087, "step": 29385500 }, { "epoch": 85.06, "learning_rate": 7.4850668071507965e-06, "loss": 1.8216, "step": 29386000 }, { "epoch": 85.06, "learning_rate": 7.4843431595035205e-06, "loss": 1.7959, "step": 29386500 }, { "epoch": 85.06, "learning_rate": 7.4836195118562435e-06, "loss": 1.8292, "step": 29387000 }, { "epoch": 85.06, "learning_rate": 7.482895864208967e-06, "loss": 1.8095, "step": 29387500 }, { "epoch": 85.07, "learning_rate": 7.482172216561689e-06, "loss": 1.7998, "step": 29388000 }, { "epoch": 85.07, "learning_rate": 7.481448568914414e-06, "loss": 1.804, "step": 29388500 }, { "epoch": 85.07, "learning_rate": 7.480726368562431e-06, "loss": 1.8325, "step": 29389000 }, { "epoch": 85.07, "learning_rate": 7.480002720915154e-06, "loss": 1.8157, "step": 29389500 }, { "epoch": 85.07, "learning_rate": 7.479279073267878e-06, "loss": 1.8044, "step": 29390000 }, { "epoch": 85.07, "learning_rate": 7.478555425620601e-06, "loss": 1.8099, "step": 29390500 }, { "epoch": 85.07, "learning_rate": 7.477831777973324e-06, "loss": 1.8019, "step": 29391000 }, { "epoch": 85.08, "learning_rate": 7.477108130326046e-06, "loss": 1.8161, "step": 29391500 }, { "epoch": 85.08, "learning_rate": 7.476384482678771e-06, "loss": 1.8199, "step": 29392000 }, { "epoch": 85.08, "learning_rate": 7.475660835031493e-06, "loss": 1.7713, "step": 29392500 }, { "epoch": 85.08, "learning_rate": 7.474937187384216e-06, "loss": 1.7959, "step": 29393000 }, { "epoch": 85.08, "learning_rate": 7.474214987032234e-06, "loss": 1.8143, "step": 29393500 }, { "epoch": 85.08, "learning_rate": 7.473494233975546e-06, "loss": 1.805, "step": 29394000 }, { "epoch": 85.09, "learning_rate": 7.472770586328271e-06, "loss": 1.8213, "step": 29394500 }, { "epoch": 85.09, "learning_rate": 7.472046938680993e-06, "loss": 1.8171, "step": 29395000 }, { "epoch": 85.09, "learning_rate": 7.471323291033716e-06, "loss": 1.8002, "step": 29395500 }, { "epoch": 85.09, "learning_rate": 7.47059964338644e-06, "loss": 1.8093, "step": 29396000 }, { "epoch": 85.09, "learning_rate": 7.469875995739163e-06, "loss": 1.8306, "step": 29396500 }, { "epoch": 85.09, "learning_rate": 7.469152348091886e-06, "loss": 1.7977, "step": 29397000 }, { "epoch": 85.09, "learning_rate": 7.468428700444609e-06, "loss": 1.8182, "step": 29397500 }, { "epoch": 85.1, "learning_rate": 7.467705052797333e-06, "loss": 1.8069, "step": 29398000 }, { "epoch": 85.1, "learning_rate": 7.466981405150056e-06, "loss": 1.827, "step": 29398500 }, { "epoch": 85.1, "learning_rate": 7.466257757502778e-06, "loss": 1.801, "step": 29399000 }, { "epoch": 85.1, "learning_rate": 7.465534109855503e-06, "loss": 1.802, "step": 29399500 }, { "epoch": 85.1, "learning_rate": 7.464810462208225e-06, "loss": 1.7935, "step": 29400000 }, { "epoch": 85.1, "learning_rate": 7.4640868145609485e-06, "loss": 1.8154, "step": 29400500 }, { "epoch": 85.1, "learning_rate": 7.463364614208966e-06, "loss": 1.8111, "step": 29401000 }, { "epoch": 85.11, "learning_rate": 7.4626409665616905e-06, "loss": 1.8172, "step": 29401500 }, { "epoch": 85.11, "learning_rate": 7.461917318914413e-06, "loss": 1.8075, "step": 29402000 }, { "epoch": 85.11, "learning_rate": 7.461193671267136e-06, "loss": 1.8172, "step": 29402500 }, { "epoch": 85.11, "learning_rate": 7.460470023619861e-06, "loss": 1.8161, "step": 29403000 }, { "epoch": 85.11, "learning_rate": 7.459746375972583e-06, "loss": 1.8142, "step": 29403500 }, { "epoch": 85.11, "learning_rate": 7.459022728325306e-06, "loss": 1.8451, "step": 29404000 }, { "epoch": 85.11, "learning_rate": 7.458299080678029e-06, "loss": 1.7921, "step": 29404500 }, { "epoch": 85.12, "learning_rate": 7.457576880326048e-06, "loss": 1.8408, "step": 29405000 }, { "epoch": 85.12, "learning_rate": 7.45685323267877e-06, "loss": 1.7977, "step": 29405500 }, { "epoch": 85.12, "learning_rate": 7.456129585031493e-06, "loss": 1.8178, "step": 29406000 }, { "epoch": 85.12, "learning_rate": 7.455405937384216e-06, "loss": 1.8175, "step": 29406500 }, { "epoch": 85.12, "learning_rate": 7.454683737032235e-06, "loss": 1.8103, "step": 29407000 }, { "epoch": 85.12, "learning_rate": 7.4539600893849575e-06, "loss": 1.8051, "step": 29407500 }, { "epoch": 85.12, "learning_rate": 7.453236441737681e-06, "loss": 1.8154, "step": 29408000 }, { "epoch": 85.13, "learning_rate": 7.452512794090404e-06, "loss": 1.8123, "step": 29408500 }, { "epoch": 85.13, "learning_rate": 7.451789146443128e-06, "loss": 1.8057, "step": 29409000 }, { "epoch": 85.13, "learning_rate": 7.45106839338644e-06, "loss": 1.8153, "step": 29409500 }, { "epoch": 85.13, "learning_rate": 7.450344745739163e-06, "loss": 1.85, "step": 29410000 }, { "epoch": 85.13, "learning_rate": 7.449621098091886e-06, "loss": 1.8069, "step": 29410500 }, { "epoch": 85.13, "learning_rate": 7.44889745044461e-06, "loss": 1.8025, "step": 29411000 }, { "epoch": 85.13, "learning_rate": 7.448173802797333e-06, "loss": 1.7944, "step": 29411500 }, { "epoch": 85.14, "learning_rate": 7.447450155150055e-06, "loss": 1.8368, "step": 29412000 }, { "epoch": 85.14, "learning_rate": 7.44672650750278e-06, "loss": 1.7979, "step": 29412500 }, { "epoch": 85.14, "learning_rate": 7.446002859855502e-06, "loss": 1.8101, "step": 29413000 }, { "epoch": 85.14, "learning_rate": 7.44528065950352e-06, "loss": 1.8154, "step": 29413500 }, { "epoch": 85.14, "learning_rate": 7.444557011856243e-06, "loss": 1.829, "step": 29414000 }, { "epoch": 85.14, "learning_rate": 7.443833364208967e-06, "loss": 1.7983, "step": 29414500 }, { "epoch": 85.14, "learning_rate": 7.4431097165616904e-06, "loss": 1.794, "step": 29415000 }, { "epoch": 85.15, "learning_rate": 7.442386068914413e-06, "loss": 1.8181, "step": 29415500 }, { "epoch": 85.15, "learning_rate": 7.441663868562431e-06, "loss": 1.8119, "step": 29416000 }, { "epoch": 85.15, "learning_rate": 7.440940220915155e-06, "loss": 1.8354, "step": 29416500 }, { "epoch": 85.15, "learning_rate": 7.440216573267878e-06, "loss": 1.8006, "step": 29417000 }, { "epoch": 85.15, "learning_rate": 7.4394929256206e-06, "loss": 1.7893, "step": 29417500 }, { "epoch": 85.15, "learning_rate": 7.438770725268618e-06, "loss": 1.8167, "step": 29418000 }, { "epoch": 85.15, "learning_rate": 7.438047077621342e-06, "loss": 1.809, "step": 29418500 }, { "epoch": 85.16, "learning_rate": 7.437324877269359e-06, "loss": 1.7953, "step": 29419000 }, { "epoch": 85.16, "learning_rate": 7.436601229622082e-06, "loss": 1.8119, "step": 29419500 }, { "epoch": 85.16, "learning_rate": 7.4358775819748055e-06, "loss": 1.8307, "step": 29420000 }, { "epoch": 85.16, "learning_rate": 7.435153934327529e-06, "loss": 1.8, "step": 29420500 }, { "epoch": 85.16, "learning_rate": 7.4344302866802525e-06, "loss": 1.8198, "step": 29421000 }, { "epoch": 85.16, "learning_rate": 7.4337066390329755e-06, "loss": 1.8089, "step": 29421500 }, { "epoch": 85.16, "learning_rate": 7.4329829913856995e-06, "loss": 1.831, "step": 29422000 }, { "epoch": 85.17, "learning_rate": 7.4322593437384226e-06, "loss": 1.8217, "step": 29422500 }, { "epoch": 85.17, "learning_rate": 7.431535696091145e-06, "loss": 1.8175, "step": 29423000 }, { "epoch": 85.17, "learning_rate": 7.430812048443868e-06, "loss": 1.8064, "step": 29423500 }, { "epoch": 85.17, "learning_rate": 7.430089848091887e-06, "loss": 1.8073, "step": 29424000 }, { "epoch": 85.17, "learning_rate": 7.42936620044461e-06, "loss": 1.8103, "step": 29424500 }, { "epoch": 85.17, "learning_rate": 7.428642552797332e-06, "loss": 1.8039, "step": 29425000 }, { "epoch": 85.17, "learning_rate": 7.427918905150055e-06, "loss": 1.7891, "step": 29425500 }, { "epoch": 85.18, "learning_rate": 7.427195257502779e-06, "loss": 1.8317, "step": 29426000 }, { "epoch": 85.18, "learning_rate": 7.426471609855502e-06, "loss": 1.8016, "step": 29426500 }, { "epoch": 85.18, "learning_rate": 7.4257494095035195e-06, "loss": 1.7983, "step": 29427000 }, { "epoch": 85.18, "learning_rate": 7.425025761856244e-06, "loss": 1.8215, "step": 29427500 }, { "epoch": 85.18, "learning_rate": 7.424302114208967e-06, "loss": 1.8157, "step": 29428000 }, { "epoch": 85.18, "learning_rate": 7.4235784665616896e-06, "loss": 1.818, "step": 29428500 }, { "epoch": 85.18, "learning_rate": 7.422856266209708e-06, "loss": 1.7939, "step": 29429000 }, { "epoch": 85.19, "learning_rate": 7.4221326185624316e-06, "loss": 1.818, "step": 29429500 }, { "epoch": 85.19, "learning_rate": 7.421410418210449e-06, "loss": 1.795, "step": 29430000 }, { "epoch": 85.19, "learning_rate": 7.420686770563172e-06, "loss": 1.8453, "step": 29430500 }, { "epoch": 85.19, "learning_rate": 7.419963122915895e-06, "loss": 1.7924, "step": 29431000 }, { "epoch": 85.19, "learning_rate": 7.419239475268619e-06, "loss": 1.8136, "step": 29431500 }, { "epoch": 85.19, "learning_rate": 7.418515827621342e-06, "loss": 1.8025, "step": 29432000 }, { "epoch": 85.2, "learning_rate": 7.417792179974064e-06, "loss": 1.828, "step": 29432500 }, { "epoch": 85.2, "learning_rate": 7.417069979622082e-06, "loss": 1.8178, "step": 29433000 }, { "epoch": 85.2, "learning_rate": 7.416346331974806e-06, "loss": 1.7906, "step": 29433500 }, { "epoch": 85.2, "learning_rate": 7.415622684327529e-06, "loss": 1.8221, "step": 29434000 }, { "epoch": 85.2, "learning_rate": 7.414899036680252e-06, "loss": 1.8277, "step": 29434500 }, { "epoch": 85.2, "learning_rate": 7.414175389032975e-06, "loss": 1.83, "step": 29435000 }, { "epoch": 85.2, "learning_rate": 7.413451741385699e-06, "loss": 1.7964, "step": 29435500 }, { "epoch": 85.21, "learning_rate": 7.412728093738422e-06, "loss": 1.817, "step": 29436000 }, { "epoch": 85.21, "learning_rate": 7.412004446091145e-06, "loss": 1.8139, "step": 29436500 }, { "epoch": 85.21, "learning_rate": 7.411280798443869e-06, "loss": 1.8222, "step": 29437000 }, { "epoch": 85.21, "learning_rate": 7.410557150796592e-06, "loss": 1.8024, "step": 29437500 }, { "epoch": 85.21, "learning_rate": 7.409833503149315e-06, "loss": 1.8135, "step": 29438000 }, { "epoch": 85.21, "learning_rate": 7.409109855502037e-06, "loss": 1.7896, "step": 29438500 }, { "epoch": 85.21, "learning_rate": 7.408386207854762e-06, "loss": 1.8223, "step": 29439000 }, { "epoch": 85.22, "learning_rate": 7.407666902093368e-06, "loss": 1.8089, "step": 29439500 }, { "epoch": 85.22, "learning_rate": 7.406943254446091e-06, "loss": 1.8215, "step": 29440000 }, { "epoch": 85.22, "learning_rate": 7.4062196067988144e-06, "loss": 1.8203, "step": 29440500 }, { "epoch": 85.22, "learning_rate": 7.405495959151538e-06, "loss": 1.809, "step": 29441000 }, { "epoch": 85.22, "learning_rate": 7.4047723115042614e-06, "loss": 1.8145, "step": 29441500 }, { "epoch": 85.22, "learning_rate": 7.4040486638569845e-06, "loss": 1.8107, "step": 29442000 }, { "epoch": 85.22, "learning_rate": 7.403325016209707e-06, "loss": 1.8143, "step": 29442500 }, { "epoch": 85.23, "learning_rate": 7.4026013685624315e-06, "loss": 1.7909, "step": 29443000 }, { "epoch": 85.23, "learning_rate": 7.401877720915154e-06, "loss": 1.8128, "step": 29443500 }, { "epoch": 85.23, "learning_rate": 7.401155520563172e-06, "loss": 1.8394, "step": 29444000 }, { "epoch": 85.23, "learning_rate": 7.400433320211189e-06, "loss": 1.8011, "step": 29444500 }, { "epoch": 85.23, "learning_rate": 7.399709672563913e-06, "loss": 1.8345, "step": 29445000 }, { "epoch": 85.23, "learning_rate": 7.398986024916636e-06, "loss": 1.8197, "step": 29445500 }, { "epoch": 85.23, "learning_rate": 7.398262377269359e-06, "loss": 1.8072, "step": 29446000 }, { "epoch": 85.24, "learning_rate": 7.397538729622083e-06, "loss": 1.7898, "step": 29446500 }, { "epoch": 85.24, "learning_rate": 7.396815081974806e-06, "loss": 1.8047, "step": 29447000 }, { "epoch": 85.24, "learning_rate": 7.396091434327529e-06, "loss": 1.7934, "step": 29447500 }, { "epoch": 85.24, "learning_rate": 7.3953677866802515e-06, "loss": 1.8257, "step": 29448000 }, { "epoch": 85.24, "learning_rate": 7.394644139032976e-06, "loss": 1.808, "step": 29448500 }, { "epoch": 85.24, "learning_rate": 7.3939219386809935e-06, "loss": 1.8222, "step": 29449000 }, { "epoch": 85.24, "learning_rate": 7.393199738329011e-06, "loss": 1.82, "step": 29449500 }, { "epoch": 85.25, "learning_rate": 7.392476090681734e-06, "loss": 1.815, "step": 29450000 }, { "epoch": 85.25, "learning_rate": 7.391752443034458e-06, "loss": 1.8292, "step": 29450500 }, { "epoch": 85.25, "learning_rate": 7.391028795387181e-06, "loss": 1.823, "step": 29451000 }, { "epoch": 85.25, "learning_rate": 7.390305147739904e-06, "loss": 1.809, "step": 29451500 }, { "epoch": 85.25, "learning_rate": 7.389581500092627e-06, "loss": 1.8231, "step": 29452000 }, { "epoch": 85.25, "learning_rate": 7.388857852445351e-06, "loss": 1.812, "step": 29452500 }, { "epoch": 85.25, "learning_rate": 7.388134204798074e-06, "loss": 1.8117, "step": 29453000 }, { "epoch": 85.26, "learning_rate": 7.387412004446091e-06, "loss": 1.8214, "step": 29453500 }, { "epoch": 85.26, "learning_rate": 7.386688356798814e-06, "loss": 1.8085, "step": 29454000 }, { "epoch": 85.26, "learning_rate": 7.385964709151538e-06, "loss": 1.8439, "step": 29454500 }, { "epoch": 85.26, "learning_rate": 7.385241061504261e-06, "loss": 1.8117, "step": 29455000 }, { "epoch": 85.26, "learning_rate": 7.384517413856984e-06, "loss": 1.7961, "step": 29455500 }, { "epoch": 85.26, "learning_rate": 7.383793766209708e-06, "loss": 1.8002, "step": 29456000 }, { "epoch": 85.26, "learning_rate": 7.383070118562431e-06, "loss": 1.805, "step": 29456500 }, { "epoch": 85.27, "learning_rate": 7.382346470915154e-06, "loss": 1.8211, "step": 29457000 }, { "epoch": 85.27, "learning_rate": 7.381624270563171e-06, "loss": 1.8387, "step": 29457500 }, { "epoch": 85.27, "learning_rate": 7.380900622915896e-06, "loss": 1.7936, "step": 29458000 }, { "epoch": 85.27, "learning_rate": 7.380176975268619e-06, "loss": 1.8544, "step": 29458500 }, { "epoch": 85.27, "learning_rate": 7.379453327621341e-06, "loss": 1.8093, "step": 29459000 }, { "epoch": 85.27, "learning_rate": 7.378729679974066e-06, "loss": 1.7719, "step": 29459500 }, { "epoch": 85.27, "learning_rate": 7.378007479622083e-06, "loss": 1.8185, "step": 29460000 }, { "epoch": 85.28, "learning_rate": 7.377283831974806e-06, "loss": 1.8164, "step": 29460500 }, { "epoch": 85.28, "learning_rate": 7.376561631622823e-06, "loss": 1.7956, "step": 29461000 }, { "epoch": 85.28, "learning_rate": 7.3758379839755465e-06, "loss": 1.8328, "step": 29461500 }, { "epoch": 85.28, "learning_rate": 7.37511433632827e-06, "loss": 1.7834, "step": 29462000 }, { "epoch": 85.28, "learning_rate": 7.3743906886809935e-06, "loss": 1.8093, "step": 29462500 }, { "epoch": 85.28, "learning_rate": 7.373667041033716e-06, "loss": 1.8159, "step": 29463000 }, { "epoch": 85.28, "learning_rate": 7.3729433933864405e-06, "loss": 1.8107, "step": 29463500 }, { "epoch": 85.29, "learning_rate": 7.372219745739163e-06, "loss": 1.8166, "step": 29464000 }, { "epoch": 85.29, "learning_rate": 7.371496098091886e-06, "loss": 1.8245, "step": 29464500 }, { "epoch": 85.29, "learning_rate": 7.370772450444609e-06, "loss": 1.8027, "step": 29465000 }, { "epoch": 85.29, "learning_rate": 7.370048802797333e-06, "loss": 1.8003, "step": 29465500 }, { "epoch": 85.29, "learning_rate": 7.369326602445351e-06, "loss": 1.8233, "step": 29466000 }, { "epoch": 85.29, "learning_rate": 7.368602954798073e-06, "loss": 1.8282, "step": 29466500 }, { "epoch": 85.29, "learning_rate": 7.367879307150798e-06, "loss": 1.8009, "step": 29467000 }, { "epoch": 85.3, "learning_rate": 7.36715565950352e-06, "loss": 1.8141, "step": 29467500 }, { "epoch": 85.3, "learning_rate": 7.366432011856243e-06, "loss": 1.8042, "step": 29468000 }, { "epoch": 85.3, "learning_rate": 7.3657098115042605e-06, "loss": 1.8368, "step": 29468500 }, { "epoch": 85.3, "learning_rate": 7.364986163856985e-06, "loss": 1.8155, "step": 29469000 }, { "epoch": 85.3, "learning_rate": 7.3642625162097075e-06, "loss": 1.7819, "step": 29469500 }, { "epoch": 85.3, "learning_rate": 7.3635388685624306e-06, "loss": 1.8412, "step": 29470000 }, { "epoch": 85.31, "learning_rate": 7.362815220915154e-06, "loss": 1.8094, "step": 29470500 }, { "epoch": 85.31, "learning_rate": 7.362091573267878e-06, "loss": 1.7764, "step": 29471000 }, { "epoch": 85.31, "learning_rate": 7.361367925620601e-06, "loss": 1.8049, "step": 29471500 }, { "epoch": 85.31, "learning_rate": 7.360644277973324e-06, "loss": 1.8087, "step": 29472000 }, { "epoch": 85.31, "learning_rate": 7.359920630326048e-06, "loss": 1.8234, "step": 29472500 }, { "epoch": 85.31, "learning_rate": 7.359198429974065e-06, "loss": 1.8154, "step": 29473000 }, { "epoch": 85.31, "learning_rate": 7.358474782326788e-06, "loss": 1.8275, "step": 29473500 }, { "epoch": 85.32, "learning_rate": 7.357751134679511e-06, "loss": 1.8046, "step": 29474000 }, { "epoch": 85.32, "learning_rate": 7.357027487032235e-06, "loss": 1.7998, "step": 29474500 }, { "epoch": 85.32, "learning_rate": 7.356303839384958e-06, "loss": 1.8173, "step": 29475000 }, { "epoch": 85.32, "learning_rate": 7.355581639032975e-06, "loss": 1.839, "step": 29475500 }, { "epoch": 85.32, "learning_rate": 7.354859438680993e-06, "loss": 1.8025, "step": 29476000 }, { "epoch": 85.32, "learning_rate": 7.354135791033717e-06, "loss": 1.8103, "step": 29476500 }, { "epoch": 85.32, "learning_rate": 7.3534121433864404e-06, "loss": 1.8313, "step": 29477000 }, { "epoch": 85.33, "learning_rate": 7.352688495739163e-06, "loss": 1.8168, "step": 29477500 }, { "epoch": 85.33, "learning_rate": 7.351964848091886e-06, "loss": 1.811, "step": 29478000 }, { "epoch": 85.33, "learning_rate": 7.35124120044461e-06, "loss": 1.7918, "step": 29478500 }, { "epoch": 85.33, "learning_rate": 7.350517552797333e-06, "loss": 1.8032, "step": 29479000 }, { "epoch": 85.33, "learning_rate": 7.349793905150056e-06, "loss": 1.8505, "step": 29479500 }, { "epoch": 85.33, "learning_rate": 7.34907025750278e-06, "loss": 1.7942, "step": 29480000 }, { "epoch": 85.33, "learning_rate": 7.348346609855503e-06, "loss": 1.8179, "step": 29480500 }, { "epoch": 85.34, "learning_rate": 7.34762440950352e-06, "loss": 1.8134, "step": 29481000 }, { "epoch": 85.34, "learning_rate": 7.346900761856243e-06, "loss": 1.7972, "step": 29481500 }, { "epoch": 85.34, "learning_rate": 7.3461785615042604e-06, "loss": 1.8265, "step": 29482000 }, { "epoch": 85.34, "learning_rate": 7.345456361152279e-06, "loss": 1.8336, "step": 29482500 }, { "epoch": 85.34, "learning_rate": 7.3447327135050025e-06, "loss": 1.8228, "step": 29483000 }, { "epoch": 85.34, "learning_rate": 7.3440090658577255e-06, "loss": 1.8014, "step": 29483500 }, { "epoch": 85.34, "learning_rate": 7.3432854182104495e-06, "loss": 1.7873, "step": 29484000 }, { "epoch": 85.35, "learning_rate": 7.3425617705631725e-06, "loss": 1.8224, "step": 29484500 }, { "epoch": 85.35, "learning_rate": 7.341838122915895e-06, "loss": 1.8172, "step": 29485000 }, { "epoch": 85.35, "learning_rate": 7.341114475268618e-06, "loss": 1.8174, "step": 29485500 }, { "epoch": 85.35, "learning_rate": 7.340390827621342e-06, "loss": 1.8329, "step": 29486000 }, { "epoch": 85.35, "learning_rate": 7.339667179974065e-06, "loss": 1.8069, "step": 29486500 }, { "epoch": 85.35, "learning_rate": 7.338943532326788e-06, "loss": 1.7879, "step": 29487000 }, { "epoch": 85.35, "learning_rate": 7.338219884679512e-06, "loss": 1.8339, "step": 29487500 }, { "epoch": 85.36, "learning_rate": 7.337496237032235e-06, "loss": 1.7967, "step": 29488000 }, { "epoch": 85.36, "learning_rate": 7.336774036680252e-06, "loss": 1.7989, "step": 29488500 }, { "epoch": 85.36, "learning_rate": 7.336050389032975e-06, "loss": 1.8246, "step": 29489000 }, { "epoch": 85.36, "learning_rate": 7.335326741385699e-06, "loss": 1.8039, "step": 29489500 }, { "epoch": 85.36, "learning_rate": 7.334603093738422e-06, "loss": 1.8219, "step": 29490000 }, { "epoch": 85.36, "learning_rate": 7.3338808933864395e-06, "loss": 1.8139, "step": 29490500 }, { "epoch": 85.36, "learning_rate": 7.333157245739163e-06, "loss": 1.8107, "step": 29491000 }, { "epoch": 85.37, "learning_rate": 7.3324335980918866e-06, "loss": 1.8152, "step": 29491500 }, { "epoch": 85.37, "learning_rate": 7.33170995044461e-06, "loss": 1.8087, "step": 29492000 }, { "epoch": 85.37, "learning_rate": 7.330986302797333e-06, "loss": 1.8078, "step": 29492500 }, { "epoch": 85.37, "learning_rate": 7.33026410244535e-06, "loss": 1.8262, "step": 29493000 }, { "epoch": 85.37, "learning_rate": 7.329540454798074e-06, "loss": 1.8242, "step": 29493500 }, { "epoch": 85.37, "learning_rate": 7.328816807150797e-06, "loss": 1.8116, "step": 29494000 }, { "epoch": 85.37, "learning_rate": 7.32809315950352e-06, "loss": 1.7742, "step": 29494500 }, { "epoch": 85.38, "learning_rate": 7.327370959151537e-06, "loss": 1.8185, "step": 29495000 }, { "epoch": 85.38, "learning_rate": 7.326647311504261e-06, "loss": 1.8181, "step": 29495500 }, { "epoch": 85.38, "learning_rate": 7.325923663856984e-06, "loss": 1.8083, "step": 29496000 }, { "epoch": 85.38, "learning_rate": 7.325200016209707e-06, "loss": 1.8335, "step": 29496500 }, { "epoch": 85.38, "learning_rate": 7.324476368562431e-06, "loss": 1.8224, "step": 29497000 }, { "epoch": 85.38, "learning_rate": 7.323752720915154e-06, "loss": 1.8385, "step": 29497500 }, { "epoch": 85.38, "learning_rate": 7.323030520563172e-06, "loss": 1.8061, "step": 29498000 }, { "epoch": 85.39, "learning_rate": 7.322306872915895e-06, "loss": 1.8145, "step": 29498500 }, { "epoch": 85.39, "learning_rate": 7.321583225268619e-06, "loss": 1.8325, "step": 29499000 }, { "epoch": 85.39, "learning_rate": 7.320859577621342e-06, "loss": 1.8178, "step": 29499500 }, { "epoch": 85.39, "learning_rate": 7.320137377269359e-06, "loss": 1.809, "step": 29500000 }, { "epoch": 85.39, "learning_rate": 7.319413729622082e-06, "loss": 1.796, "step": 29500500 }, { "epoch": 85.39, "learning_rate": 7.318691529270099e-06, "loss": 1.8261, "step": 29501000 }, { "epoch": 85.39, "learning_rate": 7.317967881622824e-06, "loss": 1.8323, "step": 29501500 }, { "epoch": 85.4, "learning_rate": 7.317244233975547e-06, "loss": 1.8177, "step": 29502000 }, { "epoch": 85.4, "learning_rate": 7.316520586328269e-06, "loss": 1.7953, "step": 29502500 }, { "epoch": 85.4, "learning_rate": 7.315796938680994e-06, "loss": 1.8202, "step": 29503000 }, { "epoch": 85.4, "learning_rate": 7.315073291033716e-06, "loss": 1.8476, "step": 29503500 }, { "epoch": 85.4, "learning_rate": 7.3143496433864395e-06, "loss": 1.8035, "step": 29504000 }, { "epoch": 85.4, "learning_rate": 7.313627443034457e-06, "loss": 1.8288, "step": 29504500 }, { "epoch": 85.4, "learning_rate": 7.3129037953871815e-06, "loss": 1.8022, "step": 29505000 }, { "epoch": 85.41, "learning_rate": 7.312180147739904e-06, "loss": 1.8193, "step": 29505500 }, { "epoch": 85.41, "learning_rate": 7.311456500092627e-06, "loss": 1.8123, "step": 29506000 }, { "epoch": 85.41, "learning_rate": 7.310732852445351e-06, "loss": 1.8232, "step": 29506500 }, { "epoch": 85.41, "learning_rate": 7.310009204798074e-06, "loss": 1.8324, "step": 29507000 }, { "epoch": 85.41, "learning_rate": 7.309285557150797e-06, "loss": 1.8387, "step": 29507500 }, { "epoch": 85.41, "learning_rate": 7.30856190950352e-06, "loss": 1.7981, "step": 29508000 }, { "epoch": 85.42, "learning_rate": 7.307838261856244e-06, "loss": 1.8019, "step": 29508500 }, { "epoch": 85.42, "learning_rate": 7.307114614208967e-06, "loss": 1.822, "step": 29509000 }, { "epoch": 85.42, "learning_rate": 7.306392413856984e-06, "loss": 1.8114, "step": 29509500 }, { "epoch": 85.42, "learning_rate": 7.305668766209707e-06, "loss": 1.8218, "step": 29510000 }, { "epoch": 85.42, "learning_rate": 7.304945118562431e-06, "loss": 1.8075, "step": 29510500 }, { "epoch": 85.42, "learning_rate": 7.304221470915154e-06, "loss": 1.8095, "step": 29511000 }, { "epoch": 85.42, "learning_rate": 7.303499270563172e-06, "loss": 1.8068, "step": 29511500 }, { "epoch": 85.43, "learning_rate": 7.302775622915895e-06, "loss": 1.7787, "step": 29512000 }, { "epoch": 85.43, "learning_rate": 7.302051975268619e-06, "loss": 1.8149, "step": 29512500 }, { "epoch": 85.43, "learning_rate": 7.301328327621342e-06, "loss": 1.8022, "step": 29513000 }, { "epoch": 85.43, "learning_rate": 7.300604679974064e-06, "loss": 1.8113, "step": 29513500 }, { "epoch": 85.43, "learning_rate": 7.299881032326789e-06, "loss": 1.806, "step": 29514000 }, { "epoch": 85.43, "learning_rate": 7.299157384679512e-06, "loss": 1.795, "step": 29514500 }, { "epoch": 85.43, "learning_rate": 7.298433737032234e-06, "loss": 1.8412, "step": 29515000 }, { "epoch": 85.44, "learning_rate": 7.297711536680252e-06, "loss": 1.8284, "step": 29515500 }, { "epoch": 85.44, "learning_rate": 7.296987889032976e-06, "loss": 1.8341, "step": 29516000 }, { "epoch": 85.44, "learning_rate": 7.296264241385699e-06, "loss": 1.8261, "step": 29516500 }, { "epoch": 85.44, "learning_rate": 7.295540593738421e-06, "loss": 1.8122, "step": 29517000 }, { "epoch": 85.44, "learning_rate": 7.294816946091146e-06, "loss": 1.7986, "step": 29517500 }, { "epoch": 85.44, "learning_rate": 7.294094745739163e-06, "loss": 1.7994, "step": 29518000 }, { "epoch": 85.44, "learning_rate": 7.2933710980918865e-06, "loss": 1.8206, "step": 29518500 }, { "epoch": 85.45, "learning_rate": 7.292647450444609e-06, "loss": 1.8209, "step": 29519000 }, { "epoch": 85.45, "learning_rate": 7.291925250092627e-06, "loss": 1.8279, "step": 29519500 }, { "epoch": 85.45, "learning_rate": 7.291201602445351e-06, "loss": 1.8047, "step": 29520000 }, { "epoch": 85.45, "learning_rate": 7.290480849388663e-06, "loss": 1.822, "step": 29520500 }, { "epoch": 85.45, "learning_rate": 7.289757201741386e-06, "loss": 1.8241, "step": 29521000 }, { "epoch": 85.45, "learning_rate": 7.289033554094109e-06, "loss": 1.8201, "step": 29521500 }, { "epoch": 85.45, "learning_rate": 7.288309906446833e-06, "loss": 1.8068, "step": 29522000 }, { "epoch": 85.46, "learning_rate": 7.287586258799556e-06, "loss": 1.8042, "step": 29522500 }, { "epoch": 85.46, "learning_rate": 7.286862611152278e-06, "loss": 1.807, "step": 29523000 }, { "epoch": 85.46, "learning_rate": 7.286138963505003e-06, "loss": 1.818, "step": 29523500 }, { "epoch": 85.46, "learning_rate": 7.285415315857725e-06, "loss": 1.808, "step": 29524000 }, { "epoch": 85.46, "learning_rate": 7.2846931155057435e-06, "loss": 1.8425, "step": 29524500 }, { "epoch": 85.46, "learning_rate": 7.283969467858466e-06, "loss": 1.8036, "step": 29525000 }, { "epoch": 85.46, "learning_rate": 7.2832458202111905e-06, "loss": 1.7906, "step": 29525500 }, { "epoch": 85.47, "learning_rate": 7.282522172563913e-06, "loss": 1.835, "step": 29526000 }, { "epoch": 85.47, "learning_rate": 7.281798524916636e-06, "loss": 1.7935, "step": 29526500 }, { "epoch": 85.47, "learning_rate": 7.281076324564654e-06, "loss": 1.8343, "step": 29527000 }, { "epoch": 85.47, "learning_rate": 7.280352676917378e-06, "loss": 1.8296, "step": 29527500 }, { "epoch": 85.47, "learning_rate": 7.279629029270101e-06, "loss": 1.8518, "step": 29528000 }, { "epoch": 85.47, "learning_rate": 7.278905381622823e-06, "loss": 1.8182, "step": 29528500 }, { "epoch": 85.47, "learning_rate": 7.278181733975546e-06, "loss": 1.8231, "step": 29529000 }, { "epoch": 85.48, "learning_rate": 7.277459533623565e-06, "loss": 1.7998, "step": 29529500 }, { "epoch": 85.48, "learning_rate": 7.276735885976288e-06, "loss": 1.7942, "step": 29530000 }, { "epoch": 85.48, "learning_rate": 7.2760122383290105e-06, "loss": 1.8349, "step": 29530500 }, { "epoch": 85.48, "learning_rate": 7.275288590681735e-06, "loss": 1.8076, "step": 29531000 }, { "epoch": 85.48, "learning_rate": 7.2745649430344575e-06, "loss": 1.7918, "step": 29531500 }, { "epoch": 85.48, "learning_rate": 7.2738412953871806e-06, "loss": 1.8411, "step": 29532000 }, { "epoch": 85.48, "learning_rate": 7.273117647739904e-06, "loss": 1.8253, "step": 29532500 }, { "epoch": 85.49, "learning_rate": 7.272394000092628e-06, "loss": 1.8548, "step": 29533000 }, { "epoch": 85.49, "learning_rate": 7.271671799740646e-06, "loss": 1.833, "step": 29533500 }, { "epoch": 85.49, "learning_rate": 7.270948152093368e-06, "loss": 1.8118, "step": 29534000 }, { "epoch": 85.49, "learning_rate": 7.270224504446091e-06, "loss": 1.8082, "step": 29534500 }, { "epoch": 85.49, "learning_rate": 7.269500856798815e-06, "loss": 1.8259, "step": 29535000 }, { "epoch": 85.49, "learning_rate": 7.268777209151538e-06, "loss": 1.8024, "step": 29535500 }, { "epoch": 85.49, "learning_rate": 7.268053561504261e-06, "loss": 1.82, "step": 29536000 }, { "epoch": 85.5, "learning_rate": 7.267329913856985e-06, "loss": 1.8318, "step": 29536500 }, { "epoch": 85.5, "learning_rate": 7.266607713505002e-06, "loss": 1.8075, "step": 29537000 }, { "epoch": 85.5, "learning_rate": 7.265884065857725e-06, "loss": 1.8049, "step": 29537500 }, { "epoch": 85.5, "learning_rate": 7.265160418210448e-06, "loss": 1.839, "step": 29538000 }, { "epoch": 85.5, "learning_rate": 7.264436770563172e-06, "loss": 1.8072, "step": 29538500 }, { "epoch": 85.5, "learning_rate": 7.263713122915895e-06, "loss": 1.8111, "step": 29539000 }, { "epoch": 85.5, "learning_rate": 7.2629894752686185e-06, "loss": 1.843, "step": 29539500 }, { "epoch": 85.51, "learning_rate": 7.262265827621341e-06, "loss": 1.8209, "step": 29540000 }, { "epoch": 85.51, "learning_rate": 7.2615421799740655e-06, "loss": 1.8067, "step": 29540500 }, { "epoch": 85.51, "learning_rate": 7.260819979622083e-06, "loss": 1.86, "step": 29541000 }, { "epoch": 85.51, "learning_rate": 7.260096331974806e-06, "loss": 1.8124, "step": 29541500 }, { "epoch": 85.51, "learning_rate": 7.259372684327528e-06, "loss": 1.8304, "step": 29542000 }, { "epoch": 85.51, "learning_rate": 7.258649036680253e-06, "loss": 1.8042, "step": 29542500 }, { "epoch": 85.51, "learning_rate": 7.257925389032975e-06, "loss": 1.8072, "step": 29543000 }, { "epoch": 85.52, "learning_rate": 7.257201741385698e-06, "loss": 1.7948, "step": 29543500 }, { "epoch": 85.52, "learning_rate": 7.256478093738422e-06, "loss": 1.7957, "step": 29544000 }, { "epoch": 85.52, "learning_rate": 7.255754446091145e-06, "loss": 1.81, "step": 29544500 }, { "epoch": 85.52, "learning_rate": 7.255032245739163e-06, "loss": 1.8174, "step": 29545000 }, { "epoch": 85.52, "learning_rate": 7.2543085980918855e-06, "loss": 1.8252, "step": 29545500 }, { "epoch": 85.52, "learning_rate": 7.25358495044461e-06, "loss": 1.7982, "step": 29546000 }, { "epoch": 85.53, "learning_rate": 7.2528613027973325e-06, "loss": 1.8269, "step": 29546500 }, { "epoch": 85.53, "learning_rate": 7.252137655150056e-06, "loss": 1.8092, "step": 29547000 }, { "epoch": 85.53, "learning_rate": 7.2514140075027795e-06, "loss": 1.8291, "step": 29547500 }, { "epoch": 85.53, "learning_rate": 7.250691807150798e-06, "loss": 1.8082, "step": 29548000 }, { "epoch": 85.53, "learning_rate": 7.24996815950352e-06, "loss": 1.8201, "step": 29548500 }, { "epoch": 85.53, "learning_rate": 7.249244511856243e-06, "loss": 1.8044, "step": 29549000 }, { "epoch": 85.53, "learning_rate": 7.248520864208967e-06, "loss": 1.8375, "step": 29549500 }, { "epoch": 85.54, "learning_rate": 7.24779721656169e-06, "loss": 1.8352, "step": 29550000 }, { "epoch": 85.54, "learning_rate": 7.247076463505002e-06, "loss": 1.8121, "step": 29550500 }, { "epoch": 85.54, "learning_rate": 7.246352815857725e-06, "loss": 1.8492, "step": 29551000 }, { "epoch": 85.54, "learning_rate": 7.245629168210449e-06, "loss": 1.798, "step": 29551500 }, { "epoch": 85.54, "learning_rate": 7.244906967858467e-06, "loss": 1.806, "step": 29552000 }, { "epoch": 85.54, "learning_rate": 7.2441833202111895e-06, "loss": 1.7815, "step": 29552500 }, { "epoch": 85.54, "learning_rate": 7.243459672563913e-06, "loss": 1.821, "step": 29553000 }, { "epoch": 85.55, "learning_rate": 7.2427360249166365e-06, "loss": 1.8158, "step": 29553500 }, { "epoch": 85.55, "learning_rate": 7.24201237726936e-06, "loss": 1.8058, "step": 29554000 }, { "epoch": 85.55, "learning_rate": 7.241288729622083e-06, "loss": 1.8249, "step": 29554500 }, { "epoch": 85.55, "learning_rate": 7.240565081974805e-06, "loss": 1.8154, "step": 29555000 }, { "epoch": 85.55, "learning_rate": 7.23984143432753e-06, "loss": 1.8132, "step": 29555500 }, { "epoch": 85.55, "learning_rate": 7.239117786680252e-06, "loss": 1.8276, "step": 29556000 }, { "epoch": 85.55, "learning_rate": 7.238394139032975e-06, "loss": 1.8171, "step": 29556500 }, { "epoch": 85.56, "learning_rate": 7.237670491385699e-06, "loss": 1.804, "step": 29557000 }, { "epoch": 85.56, "learning_rate": 7.236946843738422e-06, "loss": 1.8301, "step": 29557500 }, { "epoch": 85.56, "learning_rate": 7.236223196091145e-06, "loss": 1.7991, "step": 29558000 }, { "epoch": 85.56, "learning_rate": 7.235499548443868e-06, "loss": 1.8128, "step": 29558500 }, { "epoch": 85.56, "learning_rate": 7.234777348091887e-06, "loss": 1.7934, "step": 29559000 }, { "epoch": 85.56, "learning_rate": 7.234053700444609e-06, "loss": 1.7979, "step": 29559500 }, { "epoch": 85.56, "learning_rate": 7.2333300527973325e-06, "loss": 1.8125, "step": 29560000 }, { "epoch": 85.57, "learning_rate": 7.2326064051500555e-06, "loss": 1.8544, "step": 29560500 }, { "epoch": 85.57, "learning_rate": 7.2318842047980745e-06, "loss": 1.8219, "step": 29561000 }, { "epoch": 85.57, "learning_rate": 7.231160557150797e-06, "loss": 1.8144, "step": 29561500 }, { "epoch": 85.57, "learning_rate": 7.23043690950352e-06, "loss": 1.8203, "step": 29562000 }, { "epoch": 85.57, "learning_rate": 7.229713261856244e-06, "loss": 1.8189, "step": 29562500 }, { "epoch": 85.57, "learning_rate": 7.228989614208967e-06, "loss": 1.8041, "step": 29563000 }, { "epoch": 85.57, "learning_rate": 7.22826596656169e-06, "loss": 1.8104, "step": 29563500 }, { "epoch": 85.58, "learning_rate": 7.227543766209707e-06, "loss": 1.7978, "step": 29564000 }, { "epoch": 85.58, "learning_rate": 7.226820118562432e-06, "loss": 1.8047, "step": 29564500 }, { "epoch": 85.58, "learning_rate": 7.226096470915154e-06, "loss": 1.8078, "step": 29565000 }, { "epoch": 85.58, "learning_rate": 7.225374270563172e-06, "loss": 1.8322, "step": 29565500 }, { "epoch": 85.58, "learning_rate": 7.2246506229158945e-06, "loss": 1.805, "step": 29566000 }, { "epoch": 85.58, "learning_rate": 7.223926975268619e-06, "loss": 1.817, "step": 29566500 }, { "epoch": 85.58, "learning_rate": 7.2232033276213415e-06, "loss": 1.8272, "step": 29567000 }, { "epoch": 85.59, "learning_rate": 7.2224796799740646e-06, "loss": 1.8039, "step": 29567500 }, { "epoch": 85.59, "learning_rate": 7.221757479622082e-06, "loss": 1.7826, "step": 29568000 }, { "epoch": 85.59, "learning_rate": 7.221033831974807e-06, "loss": 1.8137, "step": 29568500 }, { "epoch": 85.59, "learning_rate": 7.220310184327529e-06, "loss": 1.8121, "step": 29569000 }, { "epoch": 85.59, "learning_rate": 7.219586536680252e-06, "loss": 1.8385, "step": 29569500 }, { "epoch": 85.59, "learning_rate": 7.218862889032975e-06, "loss": 1.8089, "step": 29570000 }, { "epoch": 85.59, "learning_rate": 7.218140688680994e-06, "loss": 1.8102, "step": 29570500 }, { "epoch": 85.6, "learning_rate": 7.217417041033717e-06, "loss": 1.8276, "step": 29571000 }, { "epoch": 85.6, "learning_rate": 7.216693393386439e-06, "loss": 1.8373, "step": 29571500 }, { "epoch": 85.6, "learning_rate": 7.215969745739164e-06, "loss": 1.8201, "step": 29572000 }, { "epoch": 85.6, "learning_rate": 7.215246098091886e-06, "loss": 1.8177, "step": 29572500 }, { "epoch": 85.6, "learning_rate": 7.214522450444609e-06, "loss": 1.8013, "step": 29573000 }, { "epoch": 85.6, "learning_rate": 7.213798802797332e-06, "loss": 1.7819, "step": 29573500 }, { "epoch": 85.6, "learning_rate": 7.213075155150056e-06, "loss": 1.8307, "step": 29574000 }, { "epoch": 85.61, "learning_rate": 7.212351507502779e-06, "loss": 1.7874, "step": 29574500 }, { "epoch": 85.61, "learning_rate": 7.211629307150797e-06, "loss": 1.8194, "step": 29575000 }, { "epoch": 85.61, "learning_rate": 7.21090565950352e-06, "loss": 1.8334, "step": 29575500 }, { "epoch": 85.61, "learning_rate": 7.210182011856244e-06, "loss": 1.8259, "step": 29576000 }, { "epoch": 85.61, "learning_rate": 7.209458364208967e-06, "loss": 1.8054, "step": 29576500 }, { "epoch": 85.61, "learning_rate": 7.20873471656169e-06, "loss": 1.8422, "step": 29577000 }, { "epoch": 85.61, "learning_rate": 7.208012516209707e-06, "loss": 1.8571, "step": 29577500 }, { "epoch": 85.62, "learning_rate": 7.207288868562431e-06, "loss": 1.8044, "step": 29578000 }, { "epoch": 85.62, "learning_rate": 7.206566668210449e-06, "loss": 1.8173, "step": 29578500 }, { "epoch": 85.62, "learning_rate": 7.205843020563171e-06, "loss": 1.8222, "step": 29579000 }, { "epoch": 85.62, "learning_rate": 7.205119372915894e-06, "loss": 1.8344, "step": 29579500 }, { "epoch": 85.62, "learning_rate": 7.204395725268618e-06, "loss": 1.8099, "step": 29580000 }, { "epoch": 85.62, "learning_rate": 7.2036720776213414e-06, "loss": 1.8269, "step": 29580500 }, { "epoch": 85.62, "learning_rate": 7.2029484299740645e-06, "loss": 1.8147, "step": 29581000 }, { "epoch": 85.63, "learning_rate": 7.2022247823267884e-06, "loss": 1.8063, "step": 29581500 }, { "epoch": 85.63, "learning_rate": 7.2015011346795115e-06, "loss": 1.8199, "step": 29582000 }, { "epoch": 85.63, "learning_rate": 7.200778934327529e-06, "loss": 1.8048, "step": 29582500 }, { "epoch": 85.63, "learning_rate": 7.200055286680252e-06, "loss": 1.8181, "step": 29583000 }, { "epoch": 85.63, "learning_rate": 7.199331639032976e-06, "loss": 1.7908, "step": 29583500 }, { "epoch": 85.63, "learning_rate": 7.198607991385699e-06, "loss": 1.7858, "step": 29584000 }, { "epoch": 85.64, "learning_rate": 7.197884343738422e-06, "loss": 1.8285, "step": 29584500 }, { "epoch": 85.64, "learning_rate": 7.197162143386439e-06, "loss": 1.8263, "step": 29585000 }, { "epoch": 85.64, "learning_rate": 7.196438495739163e-06, "loss": 1.8024, "step": 29585500 }, { "epoch": 85.64, "learning_rate": 7.195714848091886e-06, "loss": 1.7999, "step": 29586000 }, { "epoch": 85.64, "learning_rate": 7.194991200444609e-06, "loss": 1.8091, "step": 29586500 }, { "epoch": 85.64, "learning_rate": 7.194267552797333e-06, "loss": 1.8055, "step": 29587000 }, { "epoch": 85.64, "learning_rate": 7.193543905150056e-06, "loss": 1.81, "step": 29587500 }, { "epoch": 85.65, "learning_rate": 7.1928217047980735e-06, "loss": 1.8205, "step": 29588000 }, { "epoch": 85.65, "learning_rate": 7.192099504446092e-06, "loss": 1.8074, "step": 29588500 }, { "epoch": 85.65, "learning_rate": 7.191375856798814e-06, "loss": 1.8208, "step": 29589000 }, { "epoch": 85.65, "learning_rate": 7.190652209151539e-06, "loss": 1.8069, "step": 29589500 }, { "epoch": 85.65, "learning_rate": 7.189928561504261e-06, "loss": 1.8024, "step": 29590000 }, { "epoch": 85.65, "learning_rate": 7.189204913856984e-06, "loss": 1.8165, "step": 29590500 }, { "epoch": 85.65, "learning_rate": 7.188481266209708e-06, "loss": 1.8357, "step": 29591000 }, { "epoch": 85.66, "learning_rate": 7.187757618562431e-06, "loss": 1.8239, "step": 29591500 }, { "epoch": 85.66, "learning_rate": 7.187033970915154e-06, "loss": 1.8038, "step": 29592000 }, { "epoch": 85.66, "learning_rate": 7.186310323267878e-06, "loss": 1.815, "step": 29592500 }, { "epoch": 85.66, "learning_rate": 7.185586675620601e-06, "loss": 1.8059, "step": 29593000 }, { "epoch": 85.66, "learning_rate": 7.184863027973323e-06, "loss": 1.8053, "step": 29593500 }, { "epoch": 85.66, "learning_rate": 7.184140827621341e-06, "loss": 1.8204, "step": 29594000 }, { "epoch": 85.66, "learning_rate": 7.183417179974065e-06, "loss": 1.8182, "step": 29594500 }, { "epoch": 85.67, "learning_rate": 7.182694979622083e-06, "loss": 1.8368, "step": 29595000 }, { "epoch": 85.67, "learning_rate": 7.181972779270101e-06, "loss": 1.7967, "step": 29595500 }, { "epoch": 85.67, "learning_rate": 7.181249131622824e-06, "loss": 1.8115, "step": 29596000 }, { "epoch": 85.67, "learning_rate": 7.180525483975546e-06, "loss": 1.8404, "step": 29596500 }, { "epoch": 85.67, "learning_rate": 7.179801836328271e-06, "loss": 1.8221, "step": 29597000 }, { "epoch": 85.67, "learning_rate": 7.179078188680993e-06, "loss": 1.8358, "step": 29597500 }, { "epoch": 85.67, "learning_rate": 7.178354541033716e-06, "loss": 1.8101, "step": 29598000 }, { "epoch": 85.68, "learning_rate": 7.17763089338644e-06, "loss": 1.7923, "step": 29598500 }, { "epoch": 85.68, "learning_rate": 7.176907245739163e-06, "loss": 1.8161, "step": 29599000 }, { "epoch": 85.68, "learning_rate": 7.176183598091886e-06, "loss": 1.8168, "step": 29599500 }, { "epoch": 85.68, "learning_rate": 7.175459950444608e-06, "loss": 1.8542, "step": 29600000 }, { "epoch": 85.68, "learning_rate": 7.174739197387922e-06, "loss": 1.8186, "step": 29600500 }, { "epoch": 85.68, "learning_rate": 7.174015549740645e-06, "loss": 1.8059, "step": 29601000 }, { "epoch": 85.68, "learning_rate": 7.1732919020933685e-06, "loss": 1.8265, "step": 29601500 }, { "epoch": 85.69, "learning_rate": 7.172568254446091e-06, "loss": 1.8076, "step": 29602000 }, { "epoch": 85.69, "learning_rate": 7.1718446067988155e-06, "loss": 1.831, "step": 29602500 }, { "epoch": 85.69, "learning_rate": 7.171120959151538e-06, "loss": 1.8225, "step": 29603000 }, { "epoch": 85.69, "learning_rate": 7.170397311504261e-06, "loss": 1.8151, "step": 29603500 }, { "epoch": 85.69, "learning_rate": 7.169673663856985e-06, "loss": 1.8152, "step": 29604000 }, { "epoch": 85.69, "learning_rate": 7.168950016209708e-06, "loss": 1.8164, "step": 29604500 }, { "epoch": 85.69, "learning_rate": 7.168226368562431e-06, "loss": 1.7987, "step": 29605000 }, { "epoch": 85.7, "learning_rate": 7.167502720915153e-06, "loss": 1.7922, "step": 29605500 }, { "epoch": 85.7, "learning_rate": 7.166779073267878e-06, "loss": 1.8333, "step": 29606000 }, { "epoch": 85.7, "learning_rate": 7.1660554256206e-06, "loss": 1.8024, "step": 29606500 }, { "epoch": 85.7, "learning_rate": 7.165333225268618e-06, "loss": 1.8052, "step": 29607000 }, { "epoch": 85.7, "learning_rate": 7.164609577621341e-06, "loss": 1.8249, "step": 29607500 }, { "epoch": 85.7, "learning_rate": 7.163885929974065e-06, "loss": 1.8403, "step": 29608000 }, { "epoch": 85.7, "learning_rate": 7.163162282326788e-06, "loss": 1.8221, "step": 29608500 }, { "epoch": 85.71, "learning_rate": 7.1624386346795106e-06, "loss": 1.7836, "step": 29609000 }, { "epoch": 85.71, "learning_rate": 7.161716434327529e-06, "loss": 1.8174, "step": 29609500 }, { "epoch": 85.71, "learning_rate": 7.160992786680253e-06, "loss": 1.8151, "step": 29610000 }, { "epoch": 85.71, "learning_rate": 7.16027058632827e-06, "loss": 1.7995, "step": 29610500 }, { "epoch": 85.71, "learning_rate": 7.159546938680993e-06, "loss": 1.8053, "step": 29611000 }, { "epoch": 85.71, "learning_rate": 7.158823291033717e-06, "loss": 1.8129, "step": 29611500 }, { "epoch": 85.71, "learning_rate": 7.15809964338644e-06, "loss": 1.8026, "step": 29612000 }, { "epoch": 85.72, "learning_rate": 7.157375995739163e-06, "loss": 1.8135, "step": 29612500 }, { "epoch": 85.72, "learning_rate": 7.156652348091886e-06, "loss": 1.8086, "step": 29613000 }, { "epoch": 85.72, "learning_rate": 7.15592870044461e-06, "loss": 1.7954, "step": 29613500 }, { "epoch": 85.72, "learning_rate": 7.155205052797333e-06, "loss": 1.8314, "step": 29614000 }, { "epoch": 85.72, "learning_rate": 7.154481405150055e-06, "loss": 1.8128, "step": 29614500 }, { "epoch": 85.72, "learning_rate": 7.1537592047980734e-06, "loss": 1.8218, "step": 29615000 }, { "epoch": 85.72, "learning_rate": 7.153035557150797e-06, "loss": 1.7991, "step": 29615500 }, { "epoch": 85.73, "learning_rate": 7.1523119095035204e-06, "loss": 1.8102, "step": 29616000 }, { "epoch": 85.73, "learning_rate": 7.151589709151538e-06, "loss": 1.8248, "step": 29616500 }, { "epoch": 85.73, "learning_rate": 7.150866061504261e-06, "loss": 1.8068, "step": 29617000 }, { "epoch": 85.73, "learning_rate": 7.150142413856985e-06, "loss": 1.7997, "step": 29617500 }, { "epoch": 85.73, "learning_rate": 7.149418766209708e-06, "loss": 1.8076, "step": 29618000 }, { "epoch": 85.73, "learning_rate": 7.148696565857725e-06, "loss": 1.8304, "step": 29618500 }, { "epoch": 85.73, "learning_rate": 7.147972918210449e-06, "loss": 1.8005, "step": 29619000 }, { "epoch": 85.74, "learning_rate": 7.147249270563172e-06, "loss": 1.8306, "step": 29619500 }, { "epoch": 85.74, "learning_rate": 7.146525622915895e-06, "loss": 1.8096, "step": 29620000 }, { "epoch": 85.74, "learning_rate": 7.145801975268618e-06, "loss": 1.8093, "step": 29620500 }, { "epoch": 85.74, "learning_rate": 7.145078327621342e-06, "loss": 1.8276, "step": 29621000 }, { "epoch": 85.74, "learning_rate": 7.144356127269359e-06, "loss": 1.8128, "step": 29621500 }, { "epoch": 85.74, "learning_rate": 7.1436324796220825e-06, "loss": 1.8016, "step": 29622000 }, { "epoch": 85.75, "learning_rate": 7.1429088319748055e-06, "loss": 1.8253, "step": 29622500 }, { "epoch": 85.75, "learning_rate": 7.1421851843275295e-06, "loss": 1.8142, "step": 29623000 }, { "epoch": 85.75, "learning_rate": 7.1414615366802525e-06, "loss": 1.8015, "step": 29623500 }, { "epoch": 85.75, "learning_rate": 7.140737889032975e-06, "loss": 1.8453, "step": 29624000 }, { "epoch": 85.75, "learning_rate": 7.1400142413856996e-06, "loss": 1.8206, "step": 29624500 }, { "epoch": 85.75, "learning_rate": 7.139292041033717e-06, "loss": 1.8323, "step": 29625000 }, { "epoch": 85.75, "learning_rate": 7.13856839338644e-06, "loss": 1.8086, "step": 29625500 }, { "epoch": 85.76, "learning_rate": 7.137844745739163e-06, "loss": 1.8492, "step": 29626000 }, { "epoch": 85.76, "learning_rate": 7.137121098091887e-06, "loss": 1.7921, "step": 29626500 }, { "epoch": 85.76, "learning_rate": 7.13639745044461e-06, "loss": 1.8132, "step": 29627000 }, { "epoch": 85.76, "learning_rate": 7.135673802797332e-06, "loss": 1.8338, "step": 29627500 }, { "epoch": 85.76, "learning_rate": 7.134950155150055e-06, "loss": 1.8008, "step": 29628000 }, { "epoch": 85.76, "learning_rate": 7.134226507502779e-06, "loss": 1.8223, "step": 29628500 }, { "epoch": 85.76, "learning_rate": 7.133502859855502e-06, "loss": 1.8013, "step": 29629000 }, { "epoch": 85.77, "learning_rate": 7.132779212208225e-06, "loss": 1.7769, "step": 29629500 }, { "epoch": 85.77, "learning_rate": 7.132055564560949e-06, "loss": 1.8131, "step": 29630000 }, { "epoch": 85.77, "learning_rate": 7.131331916913672e-06, "loss": 1.8474, "step": 29630500 }, { "epoch": 85.77, "learning_rate": 7.13060971656169e-06, "loss": 1.8278, "step": 29631000 }, { "epoch": 85.77, "learning_rate": 7.129886068914413e-06, "loss": 1.829, "step": 29631500 }, { "epoch": 85.77, "learning_rate": 7.129162421267137e-06, "loss": 1.7918, "step": 29632000 }, { "epoch": 85.77, "learning_rate": 7.12843877361986e-06, "loss": 1.8141, "step": 29632500 }, { "epoch": 85.78, "learning_rate": 7.127716573267877e-06, "loss": 1.7849, "step": 29633000 }, { "epoch": 85.78, "learning_rate": 7.126994372915895e-06, "loss": 1.831, "step": 29633500 }, { "epoch": 85.78, "learning_rate": 7.126270725268619e-06, "loss": 1.8048, "step": 29634000 }, { "epoch": 85.78, "learning_rate": 7.125547077621342e-06, "loss": 1.8106, "step": 29634500 }, { "epoch": 85.78, "learning_rate": 7.124823429974064e-06, "loss": 1.8055, "step": 29635000 }, { "epoch": 85.78, "learning_rate": 7.124099782326787e-06, "loss": 1.7954, "step": 29635500 }, { "epoch": 85.78, "learning_rate": 7.123376134679511e-06, "loss": 1.8082, "step": 29636000 }, { "epoch": 85.79, "learning_rate": 7.122652487032234e-06, "loss": 1.7923, "step": 29636500 }, { "epoch": 85.79, "learning_rate": 7.121930286680252e-06, "loss": 1.8231, "step": 29637000 }, { "epoch": 85.79, "learning_rate": 7.121206639032975e-06, "loss": 1.8192, "step": 29637500 }, { "epoch": 85.79, "learning_rate": 7.1204829913856995e-06, "loss": 1.8068, "step": 29638000 }, { "epoch": 85.79, "learning_rate": 7.119759343738422e-06, "loss": 1.807, "step": 29638500 }, { "epoch": 85.79, "learning_rate": 7.11903714338644e-06, "loss": 1.7766, "step": 29639000 }, { "epoch": 85.79, "learning_rate": 7.118313495739164e-06, "loss": 1.8065, "step": 29639500 }, { "epoch": 85.8, "learning_rate": 7.117589848091887e-06, "loss": 1.8175, "step": 29640000 }, { "epoch": 85.8, "learning_rate": 7.116866200444609e-06, "loss": 1.7879, "step": 29640500 }, { "epoch": 85.8, "learning_rate": 7.116144000092627e-06, "loss": 1.8108, "step": 29641000 }, { "epoch": 85.8, "learning_rate": 7.115420352445351e-06, "loss": 1.8236, "step": 29641500 }, { "epoch": 85.8, "learning_rate": 7.114696704798074e-06, "loss": 1.7955, "step": 29642000 }, { "epoch": 85.8, "learning_rate": 7.113973057150796e-06, "loss": 1.8124, "step": 29642500 }, { "epoch": 85.8, "learning_rate": 7.1132494095035195e-06, "loss": 1.821, "step": 29643000 }, { "epoch": 85.81, "learning_rate": 7.1125272091515384e-06, "loss": 1.8074, "step": 29643500 }, { "epoch": 85.81, "learning_rate": 7.1118035615042615e-06, "loss": 1.8385, "step": 29644000 }, { "epoch": 85.81, "learning_rate": 7.111079913856985e-06, "loss": 1.803, "step": 29644500 }, { "epoch": 85.81, "learning_rate": 7.110356266209707e-06, "loss": 1.8247, "step": 29645000 }, { "epoch": 85.81, "learning_rate": 7.109632618562432e-06, "loss": 1.8097, "step": 29645500 }, { "epoch": 85.81, "learning_rate": 7.108908970915154e-06, "loss": 1.8128, "step": 29646000 }, { "epoch": 85.81, "learning_rate": 7.108185323267877e-06, "loss": 1.817, "step": 29646500 }, { "epoch": 85.82, "learning_rate": 7.107461675620601e-06, "loss": 1.8081, "step": 29647000 }, { "epoch": 85.82, "learning_rate": 7.106739475268619e-06, "loss": 1.8241, "step": 29647500 }, { "epoch": 85.82, "learning_rate": 7.106015827621341e-06, "loss": 1.8232, "step": 29648000 }, { "epoch": 85.82, "learning_rate": 7.105292179974064e-06, "loss": 1.8123, "step": 29648500 }, { "epoch": 85.82, "learning_rate": 7.104568532326788e-06, "loss": 1.8007, "step": 29649000 }, { "epoch": 85.82, "learning_rate": 7.103846331974806e-06, "loss": 1.8193, "step": 29649500 }, { "epoch": 85.82, "learning_rate": 7.1031226843275285e-06, "loss": 1.8123, "step": 29650000 }, { "epoch": 85.83, "learning_rate": 7.102400483975547e-06, "loss": 1.8203, "step": 29650500 }, { "epoch": 85.83, "learning_rate": 7.1016768363282705e-06, "loss": 1.801, "step": 29651000 }, { "epoch": 85.83, "learning_rate": 7.100953188680994e-06, "loss": 1.8418, "step": 29651500 }, { "epoch": 85.83, "learning_rate": 7.100229541033717e-06, "loss": 1.7934, "step": 29652000 }, { "epoch": 85.83, "learning_rate": 7.099505893386439e-06, "loss": 1.7956, "step": 29652500 }, { "epoch": 85.83, "learning_rate": 7.098785140329751e-06, "loss": 1.8027, "step": 29653000 }, { "epoch": 85.83, "learning_rate": 7.098061492682476e-06, "loss": 1.8238, "step": 29653500 }, { "epoch": 85.84, "learning_rate": 7.097337845035198e-06, "loss": 1.8305, "step": 29654000 }, { "epoch": 85.84, "learning_rate": 7.096614197387921e-06, "loss": 1.7944, "step": 29654500 }, { "epoch": 85.84, "learning_rate": 7.095890549740645e-06, "loss": 1.7939, "step": 29655000 }, { "epoch": 85.84, "learning_rate": 7.095166902093368e-06, "loss": 1.8169, "step": 29655500 }, { "epoch": 85.84, "learning_rate": 7.094443254446091e-06, "loss": 1.81, "step": 29656000 }, { "epoch": 85.84, "learning_rate": 7.0937196067988145e-06, "loss": 1.821, "step": 29656500 }, { "epoch": 85.84, "learning_rate": 7.092995959151538e-06, "loss": 1.8091, "step": 29657000 }, { "epoch": 85.85, "learning_rate": 7.0922723115042615e-06, "loss": 1.8233, "step": 29657500 }, { "epoch": 85.85, "learning_rate": 7.091548663856984e-06, "loss": 1.8329, "step": 29658000 }, { "epoch": 85.85, "learning_rate": 7.0908250162097085e-06, "loss": 1.8263, "step": 29658500 }, { "epoch": 85.85, "learning_rate": 7.090102815857726e-06, "loss": 1.8129, "step": 29659000 }, { "epoch": 85.85, "learning_rate": 7.089379168210449e-06, "loss": 1.7994, "step": 29659500 }, { "epoch": 85.85, "learning_rate": 7.088655520563171e-06, "loss": 1.8096, "step": 29660000 }, { "epoch": 85.86, "learning_rate": 7.087931872915896e-06, "loss": 1.8112, "step": 29660500 }, { "epoch": 85.86, "learning_rate": 7.087208225268618e-06, "loss": 1.8121, "step": 29661000 }, { "epoch": 85.86, "learning_rate": 7.086486024916636e-06, "loss": 1.8305, "step": 29661500 }, { "epoch": 85.86, "learning_rate": 7.085762377269358e-06, "loss": 1.8024, "step": 29662000 }, { "epoch": 85.86, "learning_rate": 7.085038729622083e-06, "loss": 1.8217, "step": 29662500 }, { "epoch": 85.86, "learning_rate": 7.084315081974806e-06, "loss": 1.7958, "step": 29663000 }, { "epoch": 85.86, "learning_rate": 7.0835914343275285e-06, "loss": 1.817, "step": 29663500 }, { "epoch": 85.87, "learning_rate": 7.082867786680253e-06, "loss": 1.7998, "step": 29664000 }, { "epoch": 85.87, "learning_rate": 7.0821455863282705e-06, "loss": 1.8058, "step": 29664500 }, { "epoch": 85.87, "learning_rate": 7.0814219386809936e-06, "loss": 1.8142, "step": 29665000 }, { "epoch": 85.87, "learning_rate": 7.080698291033716e-06, "loss": 1.7899, "step": 29665500 }, { "epoch": 85.87, "learning_rate": 7.0799746433864406e-06, "loss": 1.8122, "step": 29666000 }, { "epoch": 85.87, "learning_rate": 7.079250995739163e-06, "loss": 1.8138, "step": 29666500 }, { "epoch": 85.87, "learning_rate": 7.078527348091886e-06, "loss": 1.8058, "step": 29667000 }, { "epoch": 85.88, "learning_rate": 7.077803700444609e-06, "loss": 1.8106, "step": 29667500 }, { "epoch": 85.88, "learning_rate": 7.077080052797333e-06, "loss": 1.8015, "step": 29668000 }, { "epoch": 85.88, "learning_rate": 7.07635785244535e-06, "loss": 1.8213, "step": 29668500 }, { "epoch": 85.88, "learning_rate": 7.075634204798073e-06, "loss": 1.8454, "step": 29669000 }, { "epoch": 85.88, "learning_rate": 7.074910557150798e-06, "loss": 1.8116, "step": 29669500 }, { "epoch": 85.88, "learning_rate": 7.07418690950352e-06, "loss": 1.8019, "step": 29670000 }, { "epoch": 85.88, "learning_rate": 7.073464709151538e-06, "loss": 1.8057, "step": 29670500 }, { "epoch": 85.89, "learning_rate": 7.072742508799556e-06, "loss": 1.8446, "step": 29671000 }, { "epoch": 85.89, "learning_rate": 7.072020308447573e-06, "loss": 1.8203, "step": 29671500 }, { "epoch": 85.89, "learning_rate": 7.071296660800298e-06, "loss": 1.8158, "step": 29672000 }, { "epoch": 85.89, "learning_rate": 7.07057301315302e-06, "loss": 1.823, "step": 29672500 }, { "epoch": 85.89, "learning_rate": 7.069849365505743e-06, "loss": 1.808, "step": 29673000 }, { "epoch": 85.89, "learning_rate": 7.069125717858466e-06, "loss": 1.8111, "step": 29673500 }, { "epoch": 85.89, "learning_rate": 7.06840207021119e-06, "loss": 1.824, "step": 29674000 }, { "epoch": 85.9, "learning_rate": 7.067678422563913e-06, "loss": 1.7828, "step": 29674500 }, { "epoch": 85.9, "learning_rate": 7.066954774916635e-06, "loss": 1.829, "step": 29675000 }, { "epoch": 85.9, "learning_rate": 7.066232574564654e-06, "loss": 1.8159, "step": 29675500 }, { "epoch": 85.9, "learning_rate": 7.065508926917377e-06, "loss": 1.8433, "step": 29676000 }, { "epoch": 85.9, "learning_rate": 7.0647852792701e-06, "loss": 1.8166, "step": 29676500 }, { "epoch": 85.9, "learning_rate": 7.0640616316228234e-06, "loss": 1.8298, "step": 29677000 }, { "epoch": 85.9, "learning_rate": 7.063337983975547e-06, "loss": 1.7998, "step": 29677500 }, { "epoch": 85.91, "learning_rate": 7.062615783623565e-06, "loss": 1.8396, "step": 29678000 }, { "epoch": 85.91, "learning_rate": 7.061892135976288e-06, "loss": 1.8131, "step": 29678500 }, { "epoch": 85.91, "learning_rate": 7.061168488329011e-06, "loss": 1.7934, "step": 29679000 }, { "epoch": 85.91, "learning_rate": 7.060444840681735e-06, "loss": 1.8106, "step": 29679500 }, { "epoch": 85.91, "learning_rate": 7.059721193034458e-06, "loss": 1.8208, "step": 29680000 }, { "epoch": 85.91, "learning_rate": 7.05899754538718e-06, "loss": 1.8378, "step": 29680500 }, { "epoch": 85.91, "learning_rate": 7.058273897739905e-06, "loss": 1.8091, "step": 29681000 }, { "epoch": 85.92, "learning_rate": 7.057550250092627e-06, "loss": 1.7957, "step": 29681500 }, { "epoch": 85.92, "learning_rate": 7.05682949703594e-06, "loss": 1.8082, "step": 29682000 }, { "epoch": 85.92, "learning_rate": 7.056105849388662e-06, "loss": 1.8343, "step": 29682500 }, { "epoch": 85.92, "learning_rate": 7.0553822017413854e-06, "loss": 1.8202, "step": 29683000 }, { "epoch": 85.92, "learning_rate": 7.054658554094109e-06, "loss": 1.7903, "step": 29683500 }, { "epoch": 85.92, "learning_rate": 7.0539349064468324e-06, "loss": 1.8242, "step": 29684000 }, { "epoch": 85.92, "learning_rate": 7.0532112587995555e-06, "loss": 1.7962, "step": 29684500 }, { "epoch": 85.93, "learning_rate": 7.0524890584475745e-06, "loss": 1.8026, "step": 29685000 }, { "epoch": 85.93, "learning_rate": 7.051766858095592e-06, "loss": 1.7886, "step": 29685500 }, { "epoch": 85.93, "learning_rate": 7.051043210448315e-06, "loss": 1.8119, "step": 29686000 }, { "epoch": 85.93, "learning_rate": 7.050319562801037e-06, "loss": 1.8018, "step": 29686500 }, { "epoch": 85.93, "learning_rate": 7.049595915153762e-06, "loss": 1.7875, "step": 29687000 }, { "epoch": 85.93, "learning_rate": 7.048872267506484e-06, "loss": 1.8333, "step": 29687500 }, { "epoch": 85.93, "learning_rate": 7.048148619859207e-06, "loss": 1.8068, "step": 29688000 }, { "epoch": 85.94, "learning_rate": 7.04742497221193e-06, "loss": 1.8118, "step": 29688500 }, { "epoch": 85.94, "learning_rate": 7.046701324564654e-06, "loss": 1.8259, "step": 29689000 }, { "epoch": 85.94, "learning_rate": 7.045977676917377e-06, "loss": 1.8196, "step": 29689500 }, { "epoch": 85.94, "learning_rate": 7.0452540292701e-06, "loss": 1.8092, "step": 29690000 }, { "epoch": 85.94, "learning_rate": 7.044530381622824e-06, "loss": 1.8165, "step": 29690500 }, { "epoch": 85.94, "learning_rate": 7.043806733975547e-06, "loss": 1.8074, "step": 29691000 }, { "epoch": 85.94, "learning_rate": 7.0430845336235646e-06, "loss": 1.8085, "step": 29691500 }, { "epoch": 85.95, "learning_rate": 7.042360885976288e-06, "loss": 1.7995, "step": 29692000 }, { "epoch": 85.95, "learning_rate": 7.0416372383290116e-06, "loss": 1.8559, "step": 29692500 }, { "epoch": 85.95, "learning_rate": 7.040915037977029e-06, "loss": 1.7842, "step": 29693000 }, { "epoch": 85.95, "learning_rate": 7.040191390329752e-06, "loss": 1.832, "step": 29693500 }, { "epoch": 85.95, "learning_rate": 7.039467742682475e-06, "loss": 1.8214, "step": 29694000 }, { "epoch": 85.95, "learning_rate": 7.038744095035199e-06, "loss": 1.8163, "step": 29694500 }, { "epoch": 85.95, "learning_rate": 7.038020447387922e-06, "loss": 1.7967, "step": 29695000 }, { "epoch": 85.96, "learning_rate": 7.037296799740645e-06, "loss": 1.8295, "step": 29695500 }, { "epoch": 85.96, "learning_rate": 7.036574599388662e-06, "loss": 1.8278, "step": 29696000 }, { "epoch": 85.96, "learning_rate": 7.035850951741386e-06, "loss": 1.8399, "step": 29696500 }, { "epoch": 85.96, "learning_rate": 7.035127304094109e-06, "loss": 1.818, "step": 29697000 }, { "epoch": 85.96, "learning_rate": 7.034403656446832e-06, "loss": 1.8296, "step": 29697500 }, { "epoch": 85.96, "learning_rate": 7.033680008799556e-06, "loss": 1.8171, "step": 29698000 }, { "epoch": 85.96, "learning_rate": 7.032956361152279e-06, "loss": 1.8105, "step": 29698500 }, { "epoch": 85.97, "learning_rate": 7.032232713505002e-06, "loss": 1.8188, "step": 29699000 }, { "epoch": 85.97, "learning_rate": 7.031509065857725e-06, "loss": 1.828, "step": 29699500 }, { "epoch": 85.97, "learning_rate": 7.030785418210449e-06, "loss": 1.8288, "step": 29700000 }, { "epoch": 85.97, "learning_rate": 7.030063217858467e-06, "loss": 1.8409, "step": 29700500 }, { "epoch": 85.97, "learning_rate": 7.02933957021119e-06, "loss": 1.8009, "step": 29701000 }, { "epoch": 85.97, "learning_rate": 7.028615922563912e-06, "loss": 1.7998, "step": 29701500 }, { "epoch": 85.98, "learning_rate": 7.027892274916637e-06, "loss": 1.7893, "step": 29702000 }, { "epoch": 85.98, "learning_rate": 7.027171521859949e-06, "loss": 1.8123, "step": 29702500 }, { "epoch": 85.98, "learning_rate": 7.026447874212671e-06, "loss": 1.7961, "step": 29703000 }, { "epoch": 85.98, "learning_rate": 7.025724226565394e-06, "loss": 1.8147, "step": 29703500 }, { "epoch": 85.98, "learning_rate": 7.025000578918118e-06, "loss": 1.8074, "step": 29704000 }, { "epoch": 85.98, "learning_rate": 7.0242783785661364e-06, "loss": 1.8356, "step": 29704500 }, { "epoch": 85.98, "learning_rate": 7.023556178214154e-06, "loss": 1.8284, "step": 29705000 }, { "epoch": 85.99, "learning_rate": 7.022832530566877e-06, "loss": 1.8202, "step": 29705500 }, { "epoch": 85.99, "learning_rate": 7.022108882919601e-06, "loss": 1.8051, "step": 29706000 }, { "epoch": 85.99, "learning_rate": 7.021385235272324e-06, "loss": 1.8213, "step": 29706500 }, { "epoch": 85.99, "learning_rate": 7.020661587625047e-06, "loss": 1.7744, "step": 29707000 }, { "epoch": 85.99, "learning_rate": 7.019937939977769e-06, "loss": 1.8404, "step": 29707500 }, { "epoch": 85.99, "learning_rate": 7.019214292330494e-06, "loss": 1.8147, "step": 29708000 }, { "epoch": 85.99, "learning_rate": 7.018492091978511e-06, "loss": 1.8006, "step": 29708500 }, { "epoch": 86.0, "learning_rate": 7.017768444331234e-06, "loss": 1.812, "step": 29709000 }, { "epoch": 86.0, "learning_rate": 7.0170447966839564e-06, "loss": 1.8255, "step": 29709500 }, { "epoch": 86.0, "learning_rate": 7.016321149036681e-06, "loss": 1.8123, "step": 29710000 }, { "epoch": 86.0, "learning_rate": 7.0155989486846985e-06, "loss": 1.8029, "step": 29710500 }, { "epoch": 86.0, "eval_accuracy": 0.6887171585544973, "eval_accuracy_mlm": 0.6578651829440271, "eval_accuracy_nsp": 0.8541566411388083, "eval_loss": 2.1977808475494385, "eval_runtime": 331.6135, "eval_samples_per_second": 1315.947, "eval_steps_per_second": 54.832, "step": 29710592 }, { "epoch": 86.0, "learning_rate": 7.0148753010374215e-06, "loss": 1.7942, "step": 29711000 }, { "epoch": 86.0, "learning_rate": 7.0141516533901455e-06, "loss": 1.7909, "step": 29711500 }, { "epoch": 86.0, "learning_rate": 7.0134280057428685e-06, "loss": 1.8082, "step": 29712000 }, { "epoch": 86.01, "learning_rate": 7.012705805390886e-06, "loss": 1.8063, "step": 29712500 }, { "epoch": 86.01, "learning_rate": 7.011982157743609e-06, "loss": 1.8065, "step": 29713000 }, { "epoch": 86.01, "learning_rate": 7.011258510096333e-06, "loss": 1.8123, "step": 29713500 }, { "epoch": 86.01, "learning_rate": 7.010536309744351e-06, "loss": 1.7888, "step": 29714000 }, { "epoch": 86.01, "learning_rate": 7.009812662097073e-06, "loss": 1.8073, "step": 29714500 }, { "epoch": 86.01, "learning_rate": 7.009089014449796e-06, "loss": 1.818, "step": 29715000 }, { "epoch": 86.01, "learning_rate": 7.00836536680252e-06, "loss": 1.8077, "step": 29715500 }, { "epoch": 86.02, "learning_rate": 7.007641719155243e-06, "loss": 1.8252, "step": 29716000 }, { "epoch": 86.02, "learning_rate": 7.006918071507966e-06, "loss": 1.7972, "step": 29716500 }, { "epoch": 86.02, "learning_rate": 7.0061944238606885e-06, "loss": 1.8308, "step": 29717000 }, { "epoch": 86.02, "learning_rate": 7.005470776213413e-06, "loss": 1.8159, "step": 29717500 }, { "epoch": 86.02, "learning_rate": 7.0047471285661355e-06, "loss": 1.8374, "step": 29718000 }, { "epoch": 86.02, "learning_rate": 7.004023480918859e-06, "loss": 1.7852, "step": 29718500 }, { "epoch": 86.02, "learning_rate": 7.0032998332715825e-06, "loss": 1.7811, "step": 29719000 }, { "epoch": 86.03, "learning_rate": 7.002576185624306e-06, "loss": 1.821, "step": 29719500 }, { "epoch": 86.03, "learning_rate": 7.001853985272324e-06, "loss": 1.8059, "step": 29720000 }, { "epoch": 86.03, "learning_rate": 7.001130337625046e-06, "loss": 1.7965, "step": 29720500 }, { "epoch": 86.03, "learning_rate": 7.000406689977771e-06, "loss": 1.8121, "step": 29721000 }, { "epoch": 86.03, "learning_rate": 6.999683042330493e-06, "loss": 1.7983, "step": 29721500 }, { "epoch": 86.03, "learning_rate": 6.998959394683216e-06, "loss": 1.8039, "step": 29722000 }, { "epoch": 86.03, "learning_rate": 6.998237194331233e-06, "loss": 1.796, "step": 29722500 }, { "epoch": 86.04, "learning_rate": 6.997513546683958e-06, "loss": 1.8091, "step": 29723000 }, { "epoch": 86.04, "learning_rate": 6.99678989903668e-06, "loss": 1.7802, "step": 29723500 }, { "epoch": 86.04, "learning_rate": 6.996066251389403e-06, "loss": 1.7894, "step": 29724000 }, { "epoch": 86.04, "learning_rate": 6.995345498332716e-06, "loss": 1.8035, "step": 29724500 }, { "epoch": 86.04, "learning_rate": 6.99462185068544e-06, "loss": 1.8062, "step": 29725000 }, { "epoch": 86.04, "learning_rate": 6.993898203038163e-06, "loss": 1.8306, "step": 29725500 }, { "epoch": 86.04, "learning_rate": 6.993174555390886e-06, "loss": 1.8104, "step": 29726000 }, { "epoch": 86.05, "learning_rate": 6.992450907743609e-06, "loss": 1.8223, "step": 29726500 }, { "epoch": 86.05, "learning_rate": 6.991727260096333e-06, "loss": 1.818, "step": 29727000 }, { "epoch": 86.05, "learning_rate": 6.99100505974435e-06, "loss": 1.8039, "step": 29727500 }, { "epoch": 86.05, "learning_rate": 6.990281412097073e-06, "loss": 1.8326, "step": 29728000 }, { "epoch": 86.05, "learning_rate": 6.989557764449796e-06, "loss": 1.7934, "step": 29728500 }, { "epoch": 86.05, "learning_rate": 6.98883411680252e-06, "loss": 1.7882, "step": 29729000 }, { "epoch": 86.05, "learning_rate": 6.988110469155243e-06, "loss": 1.7989, "step": 29729500 }, { "epoch": 86.06, "learning_rate": 6.987386821507965e-06, "loss": 1.777, "step": 29730000 }, { "epoch": 86.06, "learning_rate": 6.986664621155984e-06, "loss": 1.785, "step": 29730500 }, { "epoch": 86.06, "learning_rate": 6.985940973508707e-06, "loss": 1.8132, "step": 29731000 }, { "epoch": 86.06, "learning_rate": 6.9852173258614305e-06, "loss": 1.8205, "step": 29731500 }, { "epoch": 86.06, "learning_rate": 6.984493678214154e-06, "loss": 1.791, "step": 29732000 }, { "epoch": 86.06, "learning_rate": 6.9837700305668775e-06, "loss": 1.8254, "step": 29732500 }, { "epoch": 86.06, "learning_rate": 6.983046382919601e-06, "loss": 1.8278, "step": 29733000 }, { "epoch": 86.07, "learning_rate": 6.982322735272323e-06, "loss": 1.8181, "step": 29733500 }, { "epoch": 86.07, "learning_rate": 6.981599087625048e-06, "loss": 1.7845, "step": 29734000 }, { "epoch": 86.07, "learning_rate": 6.98087543997777e-06, "loss": 1.7865, "step": 29734500 }, { "epoch": 86.07, "learning_rate": 6.980151792330493e-06, "loss": 1.7887, "step": 29735000 }, { "epoch": 86.07, "learning_rate": 6.97942959197851e-06, "loss": 1.8078, "step": 29735500 }, { "epoch": 86.07, "learning_rate": 6.978707391626528e-06, "loss": 1.8032, "step": 29736000 }, { "epoch": 86.07, "learning_rate": 6.977983743979252e-06, "loss": 1.7904, "step": 29736500 }, { "epoch": 86.08, "learning_rate": 6.9772615436272694e-06, "loss": 1.7678, "step": 29737000 }, { "epoch": 86.08, "learning_rate": 6.9765378959799925e-06, "loss": 1.8355, "step": 29737500 }, { "epoch": 86.08, "learning_rate": 6.975814248332716e-06, "loss": 1.795, "step": 29738000 }, { "epoch": 86.08, "learning_rate": 6.9750906006854395e-06, "loss": 1.82, "step": 29738500 }, { "epoch": 86.08, "learning_rate": 6.974366953038163e-06, "loss": 1.8277, "step": 29739000 }, { "epoch": 86.08, "learning_rate": 6.973643305390886e-06, "loss": 1.8009, "step": 29739500 }, { "epoch": 86.09, "learning_rate": 6.97291965774361e-06, "loss": 1.8162, "step": 29740000 }, { "epoch": 86.09, "learning_rate": 6.972196010096333e-06, "loss": 1.8026, "step": 29740500 }, { "epoch": 86.09, "learning_rate": 6.971472362449055e-06, "loss": 1.8045, "step": 29741000 }, { "epoch": 86.09, "learning_rate": 6.97074871480178e-06, "loss": 1.829, "step": 29741500 }, { "epoch": 86.09, "learning_rate": 6.970025067154502e-06, "loss": 1.8101, "step": 29742000 }, { "epoch": 86.09, "learning_rate": 6.969301419507225e-06, "loss": 1.8071, "step": 29742500 }, { "epoch": 86.09, "learning_rate": 6.968579219155242e-06, "loss": 1.8088, "step": 29743000 }, { "epoch": 86.1, "learning_rate": 6.967855571507967e-06, "loss": 1.8201, "step": 29743500 }, { "epoch": 86.1, "learning_rate": 6.967133371155984e-06, "loss": 1.7804, "step": 29744000 }, { "epoch": 86.1, "learning_rate": 6.966409723508707e-06, "loss": 1.8005, "step": 29744500 }, { "epoch": 86.1, "learning_rate": 6.9656860758614305e-06, "loss": 1.796, "step": 29745000 }, { "epoch": 86.1, "learning_rate": 6.964962428214154e-06, "loss": 1.8071, "step": 29745500 }, { "epoch": 86.1, "learning_rate": 6.9642387805668775e-06, "loss": 1.8156, "step": 29746000 }, { "epoch": 86.1, "learning_rate": 6.9635151329196e-06, "loss": 1.8132, "step": 29746500 }, { "epoch": 86.11, "learning_rate": 6.962792932567618e-06, "loss": 1.7883, "step": 29747000 }, { "epoch": 86.11, "learning_rate": 6.962069284920342e-06, "loss": 1.8145, "step": 29747500 }, { "epoch": 86.11, "learning_rate": 6.961345637273065e-06, "loss": 1.8117, "step": 29748000 }, { "epoch": 86.11, "learning_rate": 6.960621989625787e-06, "loss": 1.8144, "step": 29748500 }, { "epoch": 86.11, "learning_rate": 6.95989834197851e-06, "loss": 1.8038, "step": 29749000 }, { "epoch": 86.11, "learning_rate": 6.959174694331234e-06, "loss": 1.8127, "step": 29749500 }, { "epoch": 86.11, "learning_rate": 6.958451046683957e-06, "loss": 1.7943, "step": 29750000 }, { "epoch": 86.12, "learning_rate": 6.95772739903668e-06, "loss": 1.8107, "step": 29750500 }, { "epoch": 86.12, "learning_rate": 6.957003751389404e-06, "loss": 1.7924, "step": 29751000 }, { "epoch": 86.12, "learning_rate": 6.956280103742127e-06, "loss": 1.8092, "step": 29751500 }, { "epoch": 86.12, "learning_rate": 6.9555579033901445e-06, "loss": 1.81, "step": 29752000 }, { "epoch": 86.12, "learning_rate": 6.9548357030381626e-06, "loss": 1.809, "step": 29752500 }, { "epoch": 86.12, "learning_rate": 6.9541120553908865e-06, "loss": 1.781, "step": 29753000 }, { "epoch": 86.12, "learning_rate": 6.9533884077436096e-06, "loss": 1.8043, "step": 29753500 }, { "epoch": 86.13, "learning_rate": 6.952664760096332e-06, "loss": 1.7764, "step": 29754000 }, { "epoch": 86.13, "learning_rate": 6.951941112449055e-06, "loss": 1.8324, "step": 29754500 }, { "epoch": 86.13, "learning_rate": 6.951217464801779e-06, "loss": 1.7951, "step": 29755000 }, { "epoch": 86.13, "learning_rate": 6.950493817154502e-06, "loss": 1.8059, "step": 29755500 }, { "epoch": 86.13, "learning_rate": 6.949770169507225e-06, "loss": 1.8034, "step": 29756000 }, { "epoch": 86.13, "learning_rate": 6.949046521859949e-06, "loss": 1.8039, "step": 29756500 }, { "epoch": 86.13, "learning_rate": 6.948322874212672e-06, "loss": 1.8332, "step": 29757000 }, { "epoch": 86.14, "learning_rate": 6.947599226565395e-06, "loss": 1.8153, "step": 29757500 }, { "epoch": 86.14, "learning_rate": 6.946877026213412e-06, "loss": 1.7899, "step": 29758000 }, { "epoch": 86.14, "learning_rate": 6.946153378566136e-06, "loss": 1.8233, "step": 29758500 }, { "epoch": 86.14, "learning_rate": 6.945431178214154e-06, "loss": 1.7933, "step": 29759000 }, { "epoch": 86.14, "learning_rate": 6.9447075305668766e-06, "loss": 1.8211, "step": 29759500 }, { "epoch": 86.14, "learning_rate": 6.9439838829196e-06, "loss": 1.8031, "step": 29760000 }, { "epoch": 86.14, "learning_rate": 6.9432602352723236e-06, "loss": 1.8123, "step": 29760500 }, { "epoch": 86.15, "learning_rate": 6.942536587625047e-06, "loss": 1.8119, "step": 29761000 }, { "epoch": 86.15, "learning_rate": 6.94181293997777e-06, "loss": 1.8341, "step": 29761500 }, { "epoch": 86.15, "learning_rate": 6.941089292330494e-06, "loss": 1.8165, "step": 29762000 }, { "epoch": 86.15, "learning_rate": 6.940365644683217e-06, "loss": 1.833, "step": 29762500 }, { "epoch": 86.15, "learning_rate": 6.93964199703594e-06, "loss": 1.8097, "step": 29763000 }, { "epoch": 86.15, "learning_rate": 6.938919796683957e-06, "loss": 1.8034, "step": 29763500 }, { "epoch": 86.15, "learning_rate": 6.938196149036681e-06, "loss": 1.8095, "step": 29764000 }, { "epoch": 86.16, "learning_rate": 6.937472501389404e-06, "loss": 1.8142, "step": 29764500 }, { "epoch": 86.16, "learning_rate": 6.936748853742127e-06, "loss": 1.7937, "step": 29765000 }, { "epoch": 86.16, "learning_rate": 6.936026653390144e-06, "loss": 1.8075, "step": 29765500 }, { "epoch": 86.16, "learning_rate": 6.935303005742868e-06, "loss": 1.7978, "step": 29766000 }, { "epoch": 86.16, "learning_rate": 6.934579358095591e-06, "loss": 1.82, "step": 29766500 }, { "epoch": 86.16, "learning_rate": 6.9338557104483145e-06, "loss": 1.757, "step": 29767000 }, { "epoch": 86.16, "learning_rate": 6.933133510096332e-06, "loss": 1.8271, "step": 29767500 }, { "epoch": 86.17, "learning_rate": 6.932409862449056e-06, "loss": 1.8089, "step": 29768000 }, { "epoch": 86.17, "learning_rate": 6.931686214801779e-06, "loss": 1.7651, "step": 29768500 }, { "epoch": 86.17, "learning_rate": 6.930962567154502e-06, "loss": 1.8187, "step": 29769000 }, { "epoch": 86.17, "learning_rate": 6.930238919507225e-06, "loss": 1.8107, "step": 29769500 }, { "epoch": 86.17, "learning_rate": 6.929515271859949e-06, "loss": 1.7893, "step": 29770000 }, { "epoch": 86.17, "learning_rate": 6.928791624212672e-06, "loss": 1.7851, "step": 29770500 }, { "epoch": 86.17, "learning_rate": 6.928067976565394e-06, "loss": 1.8108, "step": 29771000 }, { "epoch": 86.18, "learning_rate": 6.927345776213413e-06, "loss": 1.8118, "step": 29771500 }, { "epoch": 86.18, "learning_rate": 6.926622128566136e-06, "loss": 1.8219, "step": 29772000 }, { "epoch": 86.18, "learning_rate": 6.925898480918859e-06, "loss": 1.818, "step": 29772500 }, { "epoch": 86.18, "learning_rate": 6.9251748332715815e-06, "loss": 1.8062, "step": 29773000 }, { "epoch": 86.18, "learning_rate": 6.924451185624306e-06, "loss": 1.8163, "step": 29773500 }, { "epoch": 86.18, "learning_rate": 6.9237275379770285e-06, "loss": 1.8117, "step": 29774000 }, { "epoch": 86.18, "learning_rate": 6.923003890329752e-06, "loss": 1.8094, "step": 29774500 }, { "epoch": 86.19, "learning_rate": 6.922280242682476e-06, "loss": 1.8207, "step": 29775000 }, { "epoch": 86.19, "learning_rate": 6.921558042330494e-06, "loss": 1.7925, "step": 29775500 }, { "epoch": 86.19, "learning_rate": 6.920835841978511e-06, "loss": 1.8096, "step": 29776000 }, { "epoch": 86.19, "learning_rate": 6.920112194331234e-06, "loss": 1.8236, "step": 29776500 }, { "epoch": 86.19, "learning_rate": 6.919389993979251e-06, "loss": 1.8176, "step": 29777000 }, { "epoch": 86.19, "learning_rate": 6.918666346331976e-06, "loss": 1.7865, "step": 29777500 }, { "epoch": 86.2, "learning_rate": 6.917942698684698e-06, "loss": 1.8203, "step": 29778000 }, { "epoch": 86.2, "learning_rate": 6.917219051037421e-06, "loss": 1.8084, "step": 29778500 }, { "epoch": 86.2, "learning_rate": 6.916495403390145e-06, "loss": 1.8126, "step": 29779000 }, { "epoch": 86.2, "learning_rate": 6.915771755742868e-06, "loss": 1.8022, "step": 29779500 }, { "epoch": 86.2, "learning_rate": 6.915048108095591e-06, "loss": 1.7627, "step": 29780000 }, { "epoch": 86.2, "learning_rate": 6.914324460448314e-06, "loss": 1.8059, "step": 29780500 }, { "epoch": 86.2, "learning_rate": 6.9136022600963325e-06, "loss": 1.8046, "step": 29781000 }, { "epoch": 86.21, "learning_rate": 6.912878612449056e-06, "loss": 1.7891, "step": 29781500 }, { "epoch": 86.21, "learning_rate": 6.912156412097074e-06, "loss": 1.7927, "step": 29782000 }, { "epoch": 86.21, "learning_rate": 6.911432764449796e-06, "loss": 1.8031, "step": 29782500 }, { "epoch": 86.21, "learning_rate": 6.910709116802521e-06, "loss": 1.8083, "step": 29783000 }, { "epoch": 86.21, "learning_rate": 6.909985469155243e-06, "loss": 1.8047, "step": 29783500 }, { "epoch": 86.21, "learning_rate": 6.909261821507966e-06, "loss": 1.8085, "step": 29784000 }, { "epoch": 86.21, "learning_rate": 6.908538173860689e-06, "loss": 1.7981, "step": 29784500 }, { "epoch": 86.22, "learning_rate": 6.907814526213413e-06, "loss": 1.8112, "step": 29785000 }, { "epoch": 86.22, "learning_rate": 6.907090878566136e-06, "loss": 1.7989, "step": 29785500 }, { "epoch": 86.22, "learning_rate": 6.906370125509448e-06, "loss": 1.7878, "step": 29786000 }, { "epoch": 86.22, "learning_rate": 6.905646477862171e-06, "loss": 1.8176, "step": 29786500 }, { "epoch": 86.22, "learning_rate": 6.904922830214895e-06, "loss": 1.7928, "step": 29787000 }, { "epoch": 86.22, "learning_rate": 6.904199182567618e-06, "loss": 1.798, "step": 29787500 }, { "epoch": 86.22, "learning_rate": 6.903475534920341e-06, "loss": 1.8519, "step": 29788000 }, { "epoch": 86.23, "learning_rate": 6.9027518872730655e-06, "loss": 1.7654, "step": 29788500 }, { "epoch": 86.23, "learning_rate": 6.902028239625788e-06, "loss": 1.8099, "step": 29789000 }, { "epoch": 86.23, "learning_rate": 6.901304591978511e-06, "loss": 1.7896, "step": 29789500 }, { "epoch": 86.23, "learning_rate": 6.900580944331234e-06, "loss": 1.8094, "step": 29790000 }, { "epoch": 86.23, "learning_rate": 6.899858743979253e-06, "loss": 1.7939, "step": 29790500 }, { "epoch": 86.23, "learning_rate": 6.899135096331975e-06, "loss": 1.8004, "step": 29791000 }, { "epoch": 86.23, "learning_rate": 6.898411448684698e-06, "loss": 1.8175, "step": 29791500 }, { "epoch": 86.24, "learning_rate": 6.897687801037421e-06, "loss": 1.7847, "step": 29792000 }, { "epoch": 86.24, "learning_rate": 6.896964153390145e-06, "loss": 1.787, "step": 29792500 }, { "epoch": 86.24, "learning_rate": 6.896240505742868e-06, "loss": 1.8156, "step": 29793000 }, { "epoch": 86.24, "learning_rate": 6.8955183053908855e-06, "loss": 1.8159, "step": 29793500 }, { "epoch": 86.24, "learning_rate": 6.8947946577436086e-06, "loss": 1.7934, "step": 29794000 }, { "epoch": 86.24, "learning_rate": 6.8940710100963325e-06, "loss": 1.8045, "step": 29794500 }, { "epoch": 86.24, "learning_rate": 6.8933473624490556e-06, "loss": 1.8087, "step": 29795000 }, { "epoch": 86.25, "learning_rate": 6.892625162097073e-06, "loss": 1.791, "step": 29795500 }, { "epoch": 86.25, "learning_rate": 6.891901514449796e-06, "loss": 1.8146, "step": 29796000 }, { "epoch": 86.25, "learning_rate": 6.89117786680252e-06, "loss": 1.8239, "step": 29796500 }, { "epoch": 86.25, "learning_rate": 6.890454219155243e-06, "loss": 1.7856, "step": 29797000 }, { "epoch": 86.25, "learning_rate": 6.889730571507966e-06, "loss": 1.8293, "step": 29797500 }, { "epoch": 86.25, "learning_rate": 6.88900692386069e-06, "loss": 1.8236, "step": 29798000 }, { "epoch": 86.25, "learning_rate": 6.888283276213413e-06, "loss": 1.7856, "step": 29798500 }, { "epoch": 86.26, "learning_rate": 6.887559628566135e-06, "loss": 1.794, "step": 29799000 }, { "epoch": 86.26, "learning_rate": 6.8868388755094475e-06, "loss": 1.8041, "step": 29799500 }, { "epoch": 86.26, "learning_rate": 6.886115227862172e-06, "loss": 1.8215, "step": 29800000 }, { "epoch": 86.26, "learning_rate": 6.885391580214895e-06, "loss": 1.8109, "step": 29800500 }, { "epoch": 86.26, "learning_rate": 6.884667932567618e-06, "loss": 1.797, "step": 29801000 }, { "epoch": 86.26, "learning_rate": 6.883944284920341e-06, "loss": 1.8307, "step": 29801500 }, { "epoch": 86.26, "learning_rate": 6.88322208456836e-06, "loss": 1.8059, "step": 29802000 }, { "epoch": 86.27, "learning_rate": 6.882499884216377e-06, "loss": 1.8079, "step": 29802500 }, { "epoch": 86.27, "learning_rate": 6.8817762365691e-06, "loss": 1.788, "step": 29803000 }, { "epoch": 86.27, "learning_rate": 6.881052588921823e-06, "loss": 1.8097, "step": 29803500 }, { "epoch": 86.27, "learning_rate": 6.880328941274547e-06, "loss": 1.7836, "step": 29804000 }, { "epoch": 86.27, "learning_rate": 6.87960529362727e-06, "loss": 1.7991, "step": 29804500 }, { "epoch": 86.27, "learning_rate": 6.878881645979992e-06, "loss": 1.8143, "step": 29805000 }, { "epoch": 86.27, "learning_rate": 6.878157998332715e-06, "loss": 1.7954, "step": 29805500 }, { "epoch": 86.28, "learning_rate": 6.877434350685439e-06, "loss": 1.7959, "step": 29806000 }, { "epoch": 86.28, "learning_rate": 6.876710703038162e-06, "loss": 1.8276, "step": 29806500 }, { "epoch": 86.28, "learning_rate": 6.8759870553908854e-06, "loss": 1.8175, "step": 29807000 }, { "epoch": 86.28, "learning_rate": 6.875263407743609e-06, "loss": 1.7861, "step": 29807500 }, { "epoch": 86.28, "learning_rate": 6.8745397600963324e-06, "loss": 1.8043, "step": 29808000 }, { "epoch": 86.28, "learning_rate": 6.8738161124490555e-06, "loss": 1.7998, "step": 29808500 }, { "epoch": 86.28, "learning_rate": 6.873093912097073e-06, "loss": 1.8135, "step": 29809000 }, { "epoch": 86.29, "learning_rate": 6.872370264449797e-06, "loss": 1.7984, "step": 29809500 }, { "epoch": 86.29, "learning_rate": 6.87164661680252e-06, "loss": 1.815, "step": 29810000 }, { "epoch": 86.29, "learning_rate": 6.870922969155243e-06, "loss": 1.811, "step": 29810500 }, { "epoch": 86.29, "learning_rate": 6.870199321507967e-06, "loss": 1.8192, "step": 29811000 }, { "epoch": 86.29, "learning_rate": 6.86947567386069e-06, "loss": 1.8312, "step": 29811500 }, { "epoch": 86.29, "learning_rate": 6.868752026213412e-06, "loss": 1.8217, "step": 29812000 }, { "epoch": 86.29, "learning_rate": 6.868028378566135e-06, "loss": 1.8298, "step": 29812500 }, { "epoch": 86.3, "learning_rate": 6.8673076255094474e-06, "loss": 1.7917, "step": 29813000 }, { "epoch": 86.3, "learning_rate": 6.866583977862172e-06, "loss": 1.8095, "step": 29813500 }, { "epoch": 86.3, "learning_rate": 6.8658603302148945e-06, "loss": 1.7854, "step": 29814000 }, { "epoch": 86.3, "learning_rate": 6.8651366825676175e-06, "loss": 1.8135, "step": 29814500 }, { "epoch": 86.3, "learning_rate": 6.8644130349203415e-06, "loss": 1.7928, "step": 29815000 }, { "epoch": 86.3, "learning_rate": 6.8636893872730645e-06, "loss": 1.821, "step": 29815500 }, { "epoch": 86.31, "learning_rate": 6.862965739625788e-06, "loss": 1.7969, "step": 29816000 }, { "epoch": 86.31, "learning_rate": 6.862243539273805e-06, "loss": 1.8349, "step": 29816500 }, { "epoch": 86.31, "learning_rate": 6.861519891626529e-06, "loss": 1.7915, "step": 29817000 }, { "epoch": 86.31, "learning_rate": 6.860796243979252e-06, "loss": 1.7765, "step": 29817500 }, { "epoch": 86.31, "learning_rate": 6.860072596331975e-06, "loss": 1.8081, "step": 29818000 }, { "epoch": 86.31, "learning_rate": 6.859348948684699e-06, "loss": 1.8366, "step": 29818500 }, { "epoch": 86.31, "learning_rate": 6.858626748332717e-06, "loss": 1.8021, "step": 29819000 }, { "epoch": 86.32, "learning_rate": 6.857904547980734e-06, "loss": 1.7975, "step": 29819500 }, { "epoch": 86.32, "learning_rate": 6.857180900333457e-06, "loss": 1.8394, "step": 29820000 }, { "epoch": 86.32, "learning_rate": 6.8564572526861795e-06, "loss": 1.8023, "step": 29820500 }, { "epoch": 86.32, "learning_rate": 6.855733605038904e-06, "loss": 1.7921, "step": 29821000 }, { "epoch": 86.32, "learning_rate": 6.8550099573916266e-06, "loss": 1.795, "step": 29821500 }, { "epoch": 86.32, "learning_rate": 6.85428630974435e-06, "loss": 1.808, "step": 29822000 }, { "epoch": 86.32, "learning_rate": 6.8535626620970736e-06, "loss": 1.7871, "step": 29822500 }, { "epoch": 86.33, "learning_rate": 6.852839014449797e-06, "loss": 1.8114, "step": 29823000 }, { "epoch": 86.33, "learning_rate": 6.85211536680252e-06, "loss": 1.8029, "step": 29823500 }, { "epoch": 86.33, "learning_rate": 6.851393166450537e-06, "loss": 1.7708, "step": 29824000 }, { "epoch": 86.33, "learning_rate": 6.850669518803261e-06, "loss": 1.812, "step": 29824500 }, { "epoch": 86.33, "learning_rate": 6.849945871155984e-06, "loss": 1.8269, "step": 29825000 }, { "epoch": 86.33, "learning_rate": 6.849222223508707e-06, "loss": 1.7907, "step": 29825500 }, { "epoch": 86.33, "learning_rate": 6.848500023156724e-06, "loss": 1.7909, "step": 29826000 }, { "epoch": 86.34, "learning_rate": 6.847776375509449e-06, "loss": 1.8143, "step": 29826500 }, { "epoch": 86.34, "learning_rate": 6.847052727862171e-06, "loss": 1.8055, "step": 29827000 }, { "epoch": 86.34, "learning_rate": 6.846329080214894e-06, "loss": 1.8128, "step": 29827500 }, { "epoch": 86.34, "learning_rate": 6.845605432567618e-06, "loss": 1.7995, "step": 29828000 }, { "epoch": 86.34, "learning_rate": 6.8448832322156364e-06, "loss": 1.7957, "step": 29828500 }, { "epoch": 86.34, "learning_rate": 6.844159584568359e-06, "loss": 1.8045, "step": 29829000 }, { "epoch": 86.34, "learning_rate": 6.843435936921082e-06, "loss": 1.7986, "step": 29829500 }, { "epoch": 86.35, "learning_rate": 6.842712289273806e-06, "loss": 1.8055, "step": 29830000 }, { "epoch": 86.35, "learning_rate": 6.841988641626529e-06, "loss": 1.8119, "step": 29830500 }, { "epoch": 86.35, "learning_rate": 6.841266441274546e-06, "loss": 1.8108, "step": 29831000 }, { "epoch": 86.35, "learning_rate": 6.840542793627269e-06, "loss": 1.7868, "step": 29831500 }, { "epoch": 86.35, "learning_rate": 6.839819145979994e-06, "loss": 1.7819, "step": 29832000 }, { "epoch": 86.35, "learning_rate": 6.839095498332716e-06, "loss": 1.8316, "step": 29832500 }, { "epoch": 86.35, "learning_rate": 6.838371850685439e-06, "loss": 1.7978, "step": 29833000 }, { "epoch": 86.36, "learning_rate": 6.837648203038162e-06, "loss": 1.8445, "step": 29833500 }, { "epoch": 86.36, "learning_rate": 6.836924555390886e-06, "loss": 1.8006, "step": 29834000 }, { "epoch": 86.36, "learning_rate": 6.836200907743609e-06, "loss": 1.8312, "step": 29834500 }, { "epoch": 86.36, "learning_rate": 6.8354787073916265e-06, "loss": 1.7943, "step": 29835000 }, { "epoch": 86.36, "learning_rate": 6.8347550597443504e-06, "loss": 1.8034, "step": 29835500 }, { "epoch": 86.36, "learning_rate": 6.8340314120970735e-06, "loss": 1.8128, "step": 29836000 }, { "epoch": 86.36, "learning_rate": 6.833307764449797e-06, "loss": 1.7845, "step": 29836500 }, { "epoch": 86.37, "learning_rate": 6.83258411680252e-06, "loss": 1.8214, "step": 29837000 }, { "epoch": 86.37, "learning_rate": 6.831861916450538e-06, "loss": 1.8175, "step": 29837500 }, { "epoch": 86.37, "learning_rate": 6.831138268803261e-06, "loss": 1.7928, "step": 29838000 }, { "epoch": 86.37, "learning_rate": 6.830416068451279e-06, "loss": 1.8184, "step": 29838500 }, { "epoch": 86.37, "learning_rate": 6.829692420804001e-06, "loss": 1.789, "step": 29839000 }, { "epoch": 86.37, "learning_rate": 6.828968773156726e-06, "loss": 1.8178, "step": 29839500 }, { "epoch": 86.37, "learning_rate": 6.828245125509448e-06, "loss": 1.8102, "step": 29840000 }, { "epoch": 86.38, "learning_rate": 6.827521477862171e-06, "loss": 1.7935, "step": 29840500 }, { "epoch": 86.38, "learning_rate": 6.826797830214894e-06, "loss": 1.8093, "step": 29841000 }, { "epoch": 86.38, "learning_rate": 6.826074182567618e-06, "loss": 1.8121, "step": 29841500 }, { "epoch": 86.38, "learning_rate": 6.825350534920341e-06, "loss": 1.8207, "step": 29842000 }, { "epoch": 86.38, "learning_rate": 6.824629781863654e-06, "loss": 1.8032, "step": 29842500 }, { "epoch": 86.38, "learning_rate": 6.823906134216376e-06, "loss": 1.8011, "step": 29843000 }, { "epoch": 86.38, "learning_rate": 6.823182486569101e-06, "loss": 1.8326, "step": 29843500 }, { "epoch": 86.39, "learning_rate": 6.822458838921824e-06, "loss": 1.822, "step": 29844000 }, { "epoch": 86.39, "learning_rate": 6.821735191274546e-06, "loss": 1.8237, "step": 29844500 }, { "epoch": 86.39, "learning_rate": 6.821012990922564e-06, "loss": 1.8125, "step": 29845000 }, { "epoch": 86.39, "learning_rate": 6.820289343275288e-06, "loss": 1.802, "step": 29845500 }, { "epoch": 86.39, "learning_rate": 6.819565695628011e-06, "loss": 1.7967, "step": 29846000 }, { "epoch": 86.39, "learning_rate": 6.818842047980733e-06, "loss": 1.7995, "step": 29846500 }, { "epoch": 86.39, "learning_rate": 6.818119847628751e-06, "loss": 1.8095, "step": 29847000 }, { "epoch": 86.4, "learning_rate": 6.817396199981475e-06, "loss": 1.8265, "step": 29847500 }, { "epoch": 86.4, "learning_rate": 6.816672552334198e-06, "loss": 1.7769, "step": 29848000 }, { "epoch": 86.4, "learning_rate": 6.815948904686921e-06, "loss": 1.7864, "step": 29848500 }, { "epoch": 86.4, "learning_rate": 6.815225257039645e-06, "loss": 1.7968, "step": 29849000 }, { "epoch": 86.4, "learning_rate": 6.814501609392368e-06, "loss": 1.8187, "step": 29849500 }, { "epoch": 86.4, "learning_rate": 6.813777961745091e-06, "loss": 1.7943, "step": 29850000 }, { "epoch": 86.4, "learning_rate": 6.813054314097814e-06, "loss": 1.8311, "step": 29850500 }, { "epoch": 86.41, "learning_rate": 6.812330666450538e-06, "loss": 1.7921, "step": 29851000 }, { "epoch": 86.41, "learning_rate": 6.811608466098556e-06, "loss": 1.8337, "step": 29851500 }, { "epoch": 86.41, "learning_rate": 6.810884818451278e-06, "loss": 1.7949, "step": 29852000 }, { "epoch": 86.41, "learning_rate": 6.810161170804001e-06, "loss": 1.813, "step": 29852500 }, { "epoch": 86.41, "learning_rate": 6.809437523156725e-06, "loss": 1.7746, "step": 29853000 }, { "epoch": 86.41, "learning_rate": 6.808715322804743e-06, "loss": 1.7975, "step": 29853500 }, { "epoch": 86.42, "learning_rate": 6.80799312245276e-06, "loss": 1.8237, "step": 29854000 }, { "epoch": 86.42, "learning_rate": 6.8072694748054835e-06, "loss": 1.8037, "step": 29854500 }, { "epoch": 86.42, "learning_rate": 6.806545827158207e-06, "loss": 1.8153, "step": 29855000 }, { "epoch": 86.42, "learning_rate": 6.8058221795109305e-06, "loss": 1.7892, "step": 29855500 }, { "epoch": 86.42, "learning_rate": 6.805098531863653e-06, "loss": 1.8273, "step": 29856000 }, { "epoch": 86.42, "learning_rate": 6.8043748842163775e-06, "loss": 1.8331, "step": 29856500 }, { "epoch": 86.42, "learning_rate": 6.803651236569101e-06, "loss": 1.8193, "step": 29857000 }, { "epoch": 86.43, "learning_rate": 6.802927588921823e-06, "loss": 1.8437, "step": 29857500 }, { "epoch": 86.43, "learning_rate": 6.802203941274546e-06, "loss": 1.8086, "step": 29858000 }, { "epoch": 86.43, "learning_rate": 6.80148029362727e-06, "loss": 1.8325, "step": 29858500 }, { "epoch": 86.43, "learning_rate": 6.800756645979993e-06, "loss": 1.7911, "step": 29859000 }, { "epoch": 86.43, "learning_rate": 6.800032998332716e-06, "loss": 1.8359, "step": 29859500 }, { "epoch": 86.43, "learning_rate": 6.79930935068544e-06, "loss": 1.8307, "step": 29860000 }, { "epoch": 86.43, "learning_rate": 6.798585703038163e-06, "loss": 1.813, "step": 29860500 }, { "epoch": 86.44, "learning_rate": 6.797862055390885e-06, "loss": 1.8099, "step": 29861000 }, { "epoch": 86.44, "learning_rate": 6.797139855038903e-06, "loss": 1.8023, "step": 29861500 }, { "epoch": 86.44, "learning_rate": 6.796416207391627e-06, "loss": 1.8077, "step": 29862000 }, { "epoch": 86.44, "learning_rate": 6.795694007039645e-06, "loss": 1.8272, "step": 29862500 }, { "epoch": 86.44, "learning_rate": 6.794970359392368e-06, "loss": 1.8002, "step": 29863000 }, { "epoch": 86.44, "learning_rate": 6.794246711745091e-06, "loss": 1.8133, "step": 29863500 }, { "epoch": 86.44, "learning_rate": 6.793523064097815e-06, "loss": 1.825, "step": 29864000 }, { "epoch": 86.45, "learning_rate": 6.792799416450538e-06, "loss": 1.795, "step": 29864500 }, { "epoch": 86.45, "learning_rate": 6.792075768803261e-06, "loss": 1.8125, "step": 29865000 }, { "epoch": 86.45, "learning_rate": 6.791353568451278e-06, "loss": 1.816, "step": 29865500 }, { "epoch": 86.45, "learning_rate": 6.790629920804002e-06, "loss": 1.8272, "step": 29866000 }, { "epoch": 86.45, "learning_rate": 6.789906273156725e-06, "loss": 1.8044, "step": 29866500 }, { "epoch": 86.45, "learning_rate": 6.789184072804742e-06, "loss": 1.8104, "step": 29867000 }, { "epoch": 86.45, "learning_rate": 6.788460425157465e-06, "loss": 1.8036, "step": 29867500 }, { "epoch": 86.46, "learning_rate": 6.787736777510189e-06, "loss": 1.8056, "step": 29868000 }, { "epoch": 86.46, "learning_rate": 6.787013129862912e-06, "loss": 1.8417, "step": 29868500 }, { "epoch": 86.46, "learning_rate": 6.7862894822156354e-06, "loss": 1.8231, "step": 29869000 }, { "epoch": 86.46, "learning_rate": 6.785565834568359e-06, "loss": 1.8104, "step": 29869500 }, { "epoch": 86.46, "learning_rate": 6.7848421869210824e-06, "loss": 1.7812, "step": 29870000 }, { "epoch": 86.46, "learning_rate": 6.7841199865691e-06, "loss": 1.8048, "step": 29870500 }, { "epoch": 86.46, "learning_rate": 6.783396338921823e-06, "loss": 1.8066, "step": 29871000 }, { "epoch": 86.47, "learning_rate": 6.782672691274547e-06, "loss": 1.8158, "step": 29871500 }, { "epoch": 86.47, "learning_rate": 6.78194904362727e-06, "loss": 1.7987, "step": 29872000 }, { "epoch": 86.47, "learning_rate": 6.781225395979993e-06, "loss": 1.8062, "step": 29872500 }, { "epoch": 86.47, "learning_rate": 6.780501748332715e-06, "loss": 1.789, "step": 29873000 }, { "epoch": 86.47, "learning_rate": 6.77977810068544e-06, "loss": 1.8114, "step": 29873500 }, { "epoch": 86.47, "learning_rate": 6.779054453038162e-06, "loss": 1.7924, "step": 29874000 }, { "epoch": 86.47, "learning_rate": 6.778330805390885e-06, "loss": 1.8158, "step": 29874500 }, { "epoch": 86.48, "learning_rate": 6.77760715774361e-06, "loss": 1.8238, "step": 29875000 }, { "epoch": 86.48, "learning_rate": 6.776884957391627e-06, "loss": 1.8316, "step": 29875500 }, { "epoch": 86.48, "learning_rate": 6.77616130974435e-06, "loss": 1.7889, "step": 29876000 }, { "epoch": 86.48, "learning_rate": 6.7754376620970725e-06, "loss": 1.8144, "step": 29876500 }, { "epoch": 86.48, "learning_rate": 6.774714014449797e-06, "loss": 1.7953, "step": 29877000 }, { "epoch": 86.48, "learning_rate": 6.7739903668025195e-06, "loss": 1.7803, "step": 29877500 }, { "epoch": 86.48, "learning_rate": 6.773266719155243e-06, "loss": 1.8174, "step": 29878000 }, { "epoch": 86.49, "learning_rate": 6.7725430715079665e-06, "loss": 1.7986, "step": 29878500 }, { "epoch": 86.49, "learning_rate": 6.77181942386069e-06, "loss": 1.8107, "step": 29879000 }, { "epoch": 86.49, "learning_rate": 6.771097223508707e-06, "loss": 1.7875, "step": 29879500 }, { "epoch": 86.49, "learning_rate": 6.77037357586143e-06, "loss": 1.8079, "step": 29880000 }, { "epoch": 86.49, "learning_rate": 6.769649928214154e-06, "loss": 1.8216, "step": 29880500 }, { "epoch": 86.49, "learning_rate": 6.768926280566877e-06, "loss": 1.8209, "step": 29881000 }, { "epoch": 86.49, "learning_rate": 6.7682026329196e-06, "loss": 1.795, "step": 29881500 }, { "epoch": 86.5, "learning_rate": 6.767480432567617e-06, "loss": 1.83, "step": 29882000 }, { "epoch": 86.5, "learning_rate": 6.766756784920342e-06, "loss": 1.7991, "step": 29882500 }, { "epoch": 86.5, "learning_rate": 6.766033137273064e-06, "loss": 1.7896, "step": 29883000 }, { "epoch": 86.5, "learning_rate": 6.765309489625787e-06, "loss": 1.8229, "step": 29883500 }, { "epoch": 86.5, "learning_rate": 6.7645858419785104e-06, "loss": 1.815, "step": 29884000 }, { "epoch": 86.5, "learning_rate": 6.763863641626529e-06, "loss": 1.838, "step": 29884500 }, { "epoch": 86.5, "learning_rate": 6.763141441274547e-06, "loss": 1.8147, "step": 29885000 }, { "epoch": 86.51, "learning_rate": 6.76241779362727e-06, "loss": 1.8001, "step": 29885500 }, { "epoch": 86.51, "learning_rate": 6.761694145979992e-06, "loss": 1.8085, "step": 29886000 }, { "epoch": 86.51, "learning_rate": 6.760970498332717e-06, "loss": 1.8005, "step": 29886500 }, { "epoch": 86.51, "learning_rate": 6.76024685068544e-06, "loss": 1.8292, "step": 29887000 }, { "epoch": 86.51, "learning_rate": 6.759523203038162e-06, "loss": 1.8282, "step": 29887500 }, { "epoch": 86.51, "learning_rate": 6.758799555390887e-06, "loss": 1.8021, "step": 29888000 }, { "epoch": 86.51, "learning_rate": 6.758075907743609e-06, "loss": 1.8121, "step": 29888500 }, { "epoch": 86.52, "learning_rate": 6.757352260096332e-06, "loss": 1.825, "step": 29889000 }, { "epoch": 86.52, "learning_rate": 6.756630059744349e-06, "loss": 1.7995, "step": 29889500 }, { "epoch": 86.52, "learning_rate": 6.755906412097074e-06, "loss": 1.8154, "step": 29890000 }, { "epoch": 86.52, "learning_rate": 6.755184211745091e-06, "loss": 1.8154, "step": 29890500 }, { "epoch": 86.52, "learning_rate": 6.7544605640978145e-06, "loss": 1.8169, "step": 29891000 }, { "epoch": 86.52, "learning_rate": 6.753736916450537e-06, "loss": 1.8122, "step": 29891500 }, { "epoch": 86.53, "learning_rate": 6.7530132688032615e-06, "loss": 1.7977, "step": 29892000 }, { "epoch": 86.53, "learning_rate": 6.752289621155984e-06, "loss": 1.8163, "step": 29892500 }, { "epoch": 86.53, "learning_rate": 6.751565973508707e-06, "loss": 1.7756, "step": 29893000 }, { "epoch": 86.53, "learning_rate": 6.750842325861432e-06, "loss": 1.7799, "step": 29893500 }, { "epoch": 86.53, "learning_rate": 6.750118678214154e-06, "loss": 1.8228, "step": 29894000 }, { "epoch": 86.53, "learning_rate": 6.749396477862172e-06, "loss": 1.8082, "step": 29894500 }, { "epoch": 86.53, "learning_rate": 6.748674277510189e-06, "loss": 1.7961, "step": 29895000 }, { "epoch": 86.54, "learning_rate": 6.747950629862912e-06, "loss": 1.8145, "step": 29895500 }, { "epoch": 86.54, "learning_rate": 6.747226982215636e-06, "loss": 1.8172, "step": 29896000 }, { "epoch": 86.54, "learning_rate": 6.746503334568359e-06, "loss": 1.7858, "step": 29896500 }, { "epoch": 86.54, "learning_rate": 6.7457796869210815e-06, "loss": 1.8121, "step": 29897000 }, { "epoch": 86.54, "learning_rate": 6.745056039273806e-06, "loss": 1.7836, "step": 29897500 }, { "epoch": 86.54, "learning_rate": 6.7443323916265285e-06, "loss": 1.7921, "step": 29898000 }, { "epoch": 86.54, "learning_rate": 6.7436087439792516e-06, "loss": 1.8267, "step": 29898500 }, { "epoch": 86.55, "learning_rate": 6.742886543627269e-06, "loss": 1.7937, "step": 29899000 }, { "epoch": 86.55, "learning_rate": 6.742162895979994e-06, "loss": 1.7905, "step": 29899500 }, { "epoch": 86.55, "learning_rate": 6.741440695628011e-06, "loss": 1.7735, "step": 29900000 }, { "epoch": 86.55, "learning_rate": 6.740717047980734e-06, "loss": 1.8119, "step": 29900500 }, { "epoch": 86.55, "learning_rate": 6.739993400333457e-06, "loss": 1.8253, "step": 29901000 }, { "epoch": 86.55, "learning_rate": 6.739269752686181e-06, "loss": 1.8052, "step": 29901500 }, { "epoch": 86.55, "learning_rate": 6.738546105038904e-06, "loss": 1.8062, "step": 29902000 }, { "epoch": 86.56, "learning_rate": 6.737822457391626e-06, "loss": 1.805, "step": 29902500 }, { "epoch": 86.56, "learning_rate": 6.737098809744351e-06, "loss": 1.799, "step": 29903000 }, { "epoch": 86.56, "learning_rate": 6.736375162097073e-06, "loss": 1.8083, "step": 29903500 }, { "epoch": 86.56, "learning_rate": 6.735651514449796e-06, "loss": 1.8151, "step": 29904000 }, { "epoch": 86.56, "learning_rate": 6.734927866802519e-06, "loss": 1.7974, "step": 29904500 }, { "epoch": 86.56, "learning_rate": 6.734205666450538e-06, "loss": 1.8088, "step": 29905000 }, { "epoch": 86.56, "learning_rate": 6.733482018803261e-06, "loss": 1.8159, "step": 29905500 }, { "epoch": 86.57, "learning_rate": 6.732758371155984e-06, "loss": 1.785, "step": 29906000 }, { "epoch": 86.57, "learning_rate": 6.732034723508707e-06, "loss": 1.7927, "step": 29906500 }, { "epoch": 86.57, "learning_rate": 6.731311075861431e-06, "loss": 1.8074, "step": 29907000 }, { "epoch": 86.57, "learning_rate": 6.730587428214154e-06, "loss": 1.8039, "step": 29907500 }, { "epoch": 86.57, "learning_rate": 6.729866675157466e-06, "loss": 1.8067, "step": 29908000 }, { "epoch": 86.57, "learning_rate": 6.729143027510189e-06, "loss": 1.8141, "step": 29908500 }, { "epoch": 86.57, "learning_rate": 6.728419379862913e-06, "loss": 1.8083, "step": 29909000 }, { "epoch": 86.58, "learning_rate": 6.727695732215636e-06, "loss": 1.7784, "step": 29909500 }, { "epoch": 86.58, "learning_rate": 6.726972084568358e-06, "loss": 1.8316, "step": 29910000 }, { "epoch": 86.58, "learning_rate": 6.7262484369210814e-06, "loss": 1.8186, "step": 29910500 }, { "epoch": 86.58, "learning_rate": 6.725524789273805e-06, "loss": 1.7813, "step": 29911000 }, { "epoch": 86.58, "learning_rate": 6.7248011416265284e-06, "loss": 1.7994, "step": 29911500 }, { "epoch": 86.58, "learning_rate": 6.7240774939792515e-06, "loss": 1.8025, "step": 29912000 }, { "epoch": 86.58, "learning_rate": 6.7233552936272705e-06, "loss": 1.8096, "step": 29912500 }, { "epoch": 86.59, "learning_rate": 6.7226316459799935e-06, "loss": 1.8413, "step": 29913000 }, { "epoch": 86.59, "learning_rate": 6.721907998332716e-06, "loss": 1.8226, "step": 29913500 }, { "epoch": 86.59, "learning_rate": 6.721184350685439e-06, "loss": 1.8148, "step": 29914000 }, { "epoch": 86.59, "learning_rate": 6.720460703038163e-06, "loss": 1.8021, "step": 29914500 }, { "epoch": 86.59, "learning_rate": 6.719738502686181e-06, "loss": 1.8284, "step": 29915000 }, { "epoch": 86.59, "learning_rate": 6.719014855038903e-06, "loss": 1.7991, "step": 29915500 }, { "epoch": 86.59, "learning_rate": 6.718291207391626e-06, "loss": 1.7827, "step": 29916000 }, { "epoch": 86.6, "learning_rate": 6.71756755974435e-06, "loss": 1.7843, "step": 29916500 }, { "epoch": 86.6, "learning_rate": 6.716843912097073e-06, "loss": 1.8041, "step": 29917000 }, { "epoch": 86.6, "learning_rate": 6.7161217117450905e-06, "loss": 1.7988, "step": 29917500 }, { "epoch": 86.6, "learning_rate": 6.7153995113931086e-06, "loss": 1.8287, "step": 29918000 }, { "epoch": 86.6, "learning_rate": 6.7146758637458325e-06, "loss": 1.8156, "step": 29918500 }, { "epoch": 86.6, "learning_rate": 6.7139522160985556e-06, "loss": 1.7967, "step": 29919000 }, { "epoch": 86.6, "learning_rate": 6.713228568451279e-06, "loss": 1.8212, "step": 29919500 }, { "epoch": 86.61, "learning_rate": 6.712506368099296e-06, "loss": 1.8126, "step": 29920000 }, { "epoch": 86.61, "learning_rate": 6.71178272045202e-06, "loss": 1.7994, "step": 29920500 }, { "epoch": 86.61, "learning_rate": 6.711059072804743e-06, "loss": 1.7849, "step": 29921000 }, { "epoch": 86.61, "learning_rate": 6.710335425157466e-06, "loss": 1.8113, "step": 29921500 }, { "epoch": 86.61, "learning_rate": 6.70961177751019e-06, "loss": 1.8252, "step": 29922000 }, { "epoch": 86.61, "learning_rate": 6.708888129862913e-06, "loss": 1.8114, "step": 29922500 }, { "epoch": 86.61, "learning_rate": 6.708164482215635e-06, "loss": 1.8008, "step": 29923000 }, { "epoch": 86.62, "learning_rate": 6.707440834568358e-06, "loss": 1.8106, "step": 29923500 }, { "epoch": 86.62, "learning_rate": 6.706717186921082e-06, "loss": 1.8122, "step": 29924000 }, { "epoch": 86.62, "learning_rate": 6.705993539273805e-06, "loss": 1.7963, "step": 29924500 }, { "epoch": 86.62, "learning_rate": 6.705271338921823e-06, "loss": 1.8098, "step": 29925000 }, { "epoch": 86.62, "learning_rate": 6.704547691274546e-06, "loss": 1.7944, "step": 29925500 }, { "epoch": 86.62, "learning_rate": 6.70382404362727e-06, "loss": 1.8149, "step": 29926000 }, { "epoch": 86.62, "learning_rate": 6.703100395979993e-06, "loss": 1.793, "step": 29926500 }, { "epoch": 86.63, "learning_rate": 6.702376748332716e-06, "loss": 1.7886, "step": 29927000 }, { "epoch": 86.63, "learning_rate": 6.701655995276028e-06, "loss": 1.8079, "step": 29927500 }, { "epoch": 86.63, "learning_rate": 6.700932347628752e-06, "loss": 1.8061, "step": 29928000 }, { "epoch": 86.63, "learning_rate": 6.700208699981475e-06, "loss": 1.8068, "step": 29928500 }, { "epoch": 86.63, "learning_rate": 6.699485052334198e-06, "loss": 1.7873, "step": 29929000 }, { "epoch": 86.63, "learning_rate": 6.698761404686922e-06, "loss": 1.8184, "step": 29929500 }, { "epoch": 86.64, "learning_rate": 6.698037757039645e-06, "loss": 1.8363, "step": 29930000 }, { "epoch": 86.64, "learning_rate": 6.697314109392368e-06, "loss": 1.8307, "step": 29930500 }, { "epoch": 86.64, "learning_rate": 6.69659046174509e-06, "loss": 1.8157, "step": 29931000 }, { "epoch": 86.64, "learning_rate": 6.695868261393109e-06, "loss": 1.8168, "step": 29931500 }, { "epoch": 86.64, "learning_rate": 6.6951446137458324e-06, "loss": 1.7983, "step": 29932000 }, { "epoch": 86.64, "learning_rate": 6.6944209660985555e-06, "loss": 1.7923, "step": 29932500 }, { "epoch": 86.64, "learning_rate": 6.693697318451278e-06, "loss": 1.8385, "step": 29933000 }, { "epoch": 86.65, "learning_rate": 6.692975118099297e-06, "loss": 1.8269, "step": 29933500 }, { "epoch": 86.65, "learning_rate": 6.69225147045202e-06, "loss": 1.7897, "step": 29934000 }, { "epoch": 86.65, "learning_rate": 6.691527822804743e-06, "loss": 1.81, "step": 29934500 }, { "epoch": 86.65, "learning_rate": 6.690804175157465e-06, "loss": 1.8119, "step": 29935000 }, { "epoch": 86.65, "learning_rate": 6.69008052751019e-06, "loss": 1.7996, "step": 29935500 }, { "epoch": 86.65, "learning_rate": 6.689358327158207e-06, "loss": 1.7904, "step": 29936000 }, { "epoch": 86.65, "learning_rate": 6.68863467951093e-06, "loss": 1.806, "step": 29936500 }, { "epoch": 86.66, "learning_rate": 6.687911031863653e-06, "loss": 1.8117, "step": 29937000 }, { "epoch": 86.66, "learning_rate": 6.687187384216377e-06, "loss": 1.7923, "step": 29937500 }, { "epoch": 86.66, "learning_rate": 6.6864637365691e-06, "loss": 1.8179, "step": 29938000 }, { "epoch": 86.66, "learning_rate": 6.6857400889218225e-06, "loss": 1.7966, "step": 29938500 }, { "epoch": 86.66, "learning_rate": 6.685016441274547e-06, "loss": 1.7935, "step": 29939000 }, { "epoch": 86.66, "learning_rate": 6.6842927936272695e-06, "loss": 1.8116, "step": 29939500 }, { "epoch": 86.66, "learning_rate": 6.683569145979993e-06, "loss": 1.8065, "step": 29940000 }, { "epoch": 86.67, "learning_rate": 6.682845498332716e-06, "loss": 1.8147, "step": 29940500 }, { "epoch": 86.67, "learning_rate": 6.68212185068544e-06, "loss": 1.8148, "step": 29941000 }, { "epoch": 86.67, "learning_rate": 6.681398203038163e-06, "loss": 1.8076, "step": 29941500 }, { "epoch": 86.67, "learning_rate": 6.68067600268618e-06, "loss": 1.8123, "step": 29942000 }, { "epoch": 86.67, "learning_rate": 6.679953802334197e-06, "loss": 1.785, "step": 29942500 }, { "epoch": 86.67, "learning_rate": 6.679231601982216e-06, "loss": 1.8179, "step": 29943000 }, { "epoch": 86.67, "learning_rate": 6.678507954334939e-06, "loss": 1.8158, "step": 29943500 }, { "epoch": 86.68, "learning_rate": 6.677784306687662e-06, "loss": 1.802, "step": 29944000 }, { "epoch": 86.68, "learning_rate": 6.677060659040385e-06, "loss": 1.8043, "step": 29944500 }, { "epoch": 86.68, "learning_rate": 6.676337011393109e-06, "loss": 1.811, "step": 29945000 }, { "epoch": 86.68, "learning_rate": 6.675613363745832e-06, "loss": 1.7992, "step": 29945500 }, { "epoch": 86.68, "learning_rate": 6.674889716098555e-06, "loss": 1.8119, "step": 29946000 }, { "epoch": 86.68, "learning_rate": 6.674166068451279e-06, "loss": 1.7955, "step": 29946500 }, { "epoch": 86.68, "learning_rate": 6.673442420804002e-06, "loss": 1.8074, "step": 29947000 }, { "epoch": 86.69, "learning_rate": 6.67272022045202e-06, "loss": 1.7968, "step": 29947500 }, { "epoch": 86.69, "learning_rate": 6.671996572804742e-06, "loss": 1.783, "step": 29948000 }, { "epoch": 86.69, "learning_rate": 6.671274372452761e-06, "loss": 1.7843, "step": 29948500 }, { "epoch": 86.69, "learning_rate": 6.670550724805484e-06, "loss": 1.804, "step": 29949000 }, { "epoch": 86.69, "learning_rate": 6.669827077158207e-06, "loss": 1.799, "step": 29949500 }, { "epoch": 86.69, "learning_rate": 6.66910342951093e-06, "loss": 1.8334, "step": 29950000 }, { "epoch": 86.69, "learning_rate": 6.668379781863654e-06, "loss": 1.7898, "step": 29950500 }, { "epoch": 86.7, "learning_rate": 6.667656134216377e-06, "loss": 1.7964, "step": 29951000 }, { "epoch": 86.7, "learning_rate": 6.666932486569099e-06, "loss": 1.8334, "step": 29951500 }, { "epoch": 86.7, "learning_rate": 6.666208838921824e-06, "loss": 1.8103, "step": 29952000 }, { "epoch": 86.7, "learning_rate": 6.665485191274546e-06, "loss": 1.8052, "step": 29952500 }, { "epoch": 86.7, "learning_rate": 6.6647629909225645e-06, "loss": 1.7852, "step": 29953000 }, { "epoch": 86.7, "learning_rate": 6.664039343275287e-06, "loss": 1.8155, "step": 29953500 }, { "epoch": 86.7, "learning_rate": 6.6633156956280115e-06, "loss": 1.8041, "step": 29954000 }, { "epoch": 86.71, "learning_rate": 6.662592047980734e-06, "loss": 1.7877, "step": 29954500 }, { "epoch": 86.71, "learning_rate": 6.661868400333457e-06, "loss": 1.7974, "step": 29955000 }, { "epoch": 86.71, "learning_rate": 6.66114475268618e-06, "loss": 1.8468, "step": 29955500 }, { "epoch": 86.71, "learning_rate": 6.660422552334199e-06, "loss": 1.8045, "step": 29956000 }, { "epoch": 86.71, "learning_rate": 6.659700351982216e-06, "loss": 1.8258, "step": 29956500 }, { "epoch": 86.71, "learning_rate": 6.658976704334939e-06, "loss": 1.8164, "step": 29957000 }, { "epoch": 86.71, "learning_rate": 6.658253056687662e-06, "loss": 1.8145, "step": 29957500 }, { "epoch": 86.72, "learning_rate": 6.657529409040386e-06, "loss": 1.8167, "step": 29958000 }, { "epoch": 86.72, "learning_rate": 6.656805761393109e-06, "loss": 1.818, "step": 29958500 }, { "epoch": 86.72, "learning_rate": 6.6560821137458315e-06, "loss": 1.8135, "step": 29959000 }, { "epoch": 86.72, "learning_rate": 6.655358466098556e-06, "loss": 1.8195, "step": 29959500 }, { "epoch": 86.72, "learning_rate": 6.6546348184512785e-06, "loss": 1.8148, "step": 29960000 }, { "epoch": 86.72, "learning_rate": 6.6539111708040016e-06, "loss": 1.8047, "step": 29960500 }, { "epoch": 86.72, "learning_rate": 6.653188970452019e-06, "loss": 1.8386, "step": 29961000 }, { "epoch": 86.73, "learning_rate": 6.652465322804744e-06, "loss": 1.8289, "step": 29961500 }, { "epoch": 86.73, "learning_rate": 6.651741675157467e-06, "loss": 1.8005, "step": 29962000 }, { "epoch": 86.73, "learning_rate": 6.651018027510189e-06, "loss": 1.8171, "step": 29962500 }, { "epoch": 86.73, "learning_rate": 6.650294379862912e-06, "loss": 1.7933, "step": 29963000 }, { "epoch": 86.73, "learning_rate": 6.649570732215636e-06, "loss": 1.7961, "step": 29963500 }, { "epoch": 86.73, "learning_rate": 6.648848531863654e-06, "loss": 1.8021, "step": 29964000 }, { "epoch": 86.73, "learning_rate": 6.648124884216376e-06, "loss": 1.825, "step": 29964500 }, { "epoch": 86.74, "learning_rate": 6.647401236569099e-06, "loss": 1.8038, "step": 29965000 }, { "epoch": 86.74, "learning_rate": 6.646677588921823e-06, "loss": 1.7941, "step": 29965500 }, { "epoch": 86.74, "learning_rate": 6.645953941274546e-06, "loss": 1.8318, "step": 29966000 }, { "epoch": 86.74, "learning_rate": 6.645230293627269e-06, "loss": 1.8136, "step": 29966500 }, { "epoch": 86.74, "learning_rate": 6.644509540570582e-06, "loss": 1.8013, "step": 29967000 }, { "epoch": 86.74, "learning_rate": 6.643785892923306e-06, "loss": 1.787, "step": 29967500 }, { "epoch": 86.75, "learning_rate": 6.643062245276029e-06, "loss": 1.8029, "step": 29968000 }, { "epoch": 86.75, "learning_rate": 6.642338597628752e-06, "loss": 1.8033, "step": 29968500 }, { "epoch": 86.75, "learning_rate": 6.641614949981476e-06, "loss": 1.792, "step": 29969000 }, { "epoch": 86.75, "learning_rate": 6.640891302334199e-06, "loss": 1.8013, "step": 29969500 }, { "epoch": 86.75, "learning_rate": 6.640169101982216e-06, "loss": 1.8276, "step": 29970000 }, { "epoch": 86.75, "learning_rate": 6.639446901630233e-06, "loss": 1.7905, "step": 29970500 }, { "epoch": 86.75, "learning_rate": 6.638723253982956e-06, "loss": 1.8152, "step": 29971000 }, { "epoch": 86.76, "learning_rate": 6.63799960633568e-06, "loss": 1.8306, "step": 29971500 }, { "epoch": 86.76, "learning_rate": 6.637275958688403e-06, "loss": 1.8156, "step": 29972000 }, { "epoch": 86.76, "learning_rate": 6.6365523110411264e-06, "loss": 1.7857, "step": 29972500 }, { "epoch": 86.76, "learning_rate": 6.63582866339385e-06, "loss": 1.8184, "step": 29973000 }, { "epoch": 86.76, "learning_rate": 6.6351050157465735e-06, "loss": 1.8101, "step": 29973500 }, { "epoch": 86.76, "learning_rate": 6.6343813680992965e-06, "loss": 1.792, "step": 29974000 }, { "epoch": 86.76, "learning_rate": 6.633657720452019e-06, "loss": 1.8222, "step": 29974500 }, { "epoch": 86.77, "learning_rate": 6.6329340728047435e-06, "loss": 1.795, "step": 29975000 }, { "epoch": 86.77, "learning_rate": 6.632210425157466e-06, "loss": 1.8394, "step": 29975500 }, { "epoch": 86.77, "learning_rate": 6.631486777510189e-06, "loss": 1.8049, "step": 29976000 }, { "epoch": 86.77, "learning_rate": 6.630763129862913e-06, "loss": 1.7809, "step": 29976500 }, { "epoch": 86.77, "learning_rate": 6.630040929510931e-06, "loss": 1.8096, "step": 29977000 }, { "epoch": 86.77, "learning_rate": 6.629317281863653e-06, "loss": 1.7851, "step": 29977500 }, { "epoch": 86.77, "learning_rate": 6.628593634216376e-06, "loss": 1.7994, "step": 29978000 }, { "epoch": 86.78, "learning_rate": 6.6278699865691e-06, "loss": 1.807, "step": 29978500 }, { "epoch": 86.78, "learning_rate": 6.627147786217118e-06, "loss": 1.7975, "step": 29979000 }, { "epoch": 86.78, "learning_rate": 6.6264255858651355e-06, "loss": 1.8024, "step": 29979500 }, { "epoch": 86.78, "learning_rate": 6.6257019382178585e-06, "loss": 1.8017, "step": 29980000 }, { "epoch": 86.78, "learning_rate": 6.6249782905705825e-06, "loss": 1.8096, "step": 29980500 }, { "epoch": 86.78, "learning_rate": 6.6242546429233056e-06, "loss": 1.8005, "step": 29981000 }, { "epoch": 86.78, "learning_rate": 6.623530995276029e-06, "loss": 1.8274, "step": 29981500 }, { "epoch": 86.79, "learning_rate": 6.622808794924046e-06, "loss": 1.818, "step": 29982000 }, { "epoch": 86.79, "learning_rate": 6.62208514727677e-06, "loss": 1.8187, "step": 29982500 }, { "epoch": 86.79, "learning_rate": 6.621361499629493e-06, "loss": 1.8091, "step": 29983000 }, { "epoch": 86.79, "learning_rate": 6.620637851982216e-06, "loss": 1.8219, "step": 29983500 }, { "epoch": 86.79, "learning_rate": 6.619914204334938e-06, "loss": 1.8212, "step": 29984000 }, { "epoch": 86.79, "learning_rate": 6.619190556687663e-06, "loss": 1.7989, "step": 29984500 }, { "epoch": 86.79, "learning_rate": 6.618466909040385e-06, "loss": 1.8167, "step": 29985000 }, { "epoch": 86.8, "learning_rate": 6.617744708688403e-06, "loss": 1.7991, "step": 29985500 }, { "epoch": 86.8, "learning_rate": 6.617021061041127e-06, "loss": 1.8201, "step": 29986000 }, { "epoch": 86.8, "learning_rate": 6.61629741339385e-06, "loss": 1.816, "step": 29986500 }, { "epoch": 86.8, "learning_rate": 6.615573765746573e-06, "loss": 1.8332, "step": 29987000 }, { "epoch": 86.8, "learning_rate": 6.614850118099296e-06, "loss": 1.8142, "step": 29987500 }, { "epoch": 86.8, "learning_rate": 6.614127917747315e-06, "loss": 1.822, "step": 29988000 }, { "epoch": 86.8, "learning_rate": 6.613404270100038e-06, "loss": 1.8019, "step": 29988500 }, { "epoch": 86.81, "learning_rate": 6.612680622452761e-06, "loss": 1.8028, "step": 29989000 }, { "epoch": 86.81, "learning_rate": 6.611956974805483e-06, "loss": 1.7925, "step": 29989500 }, { "epoch": 86.81, "learning_rate": 6.611233327158208e-06, "loss": 1.7889, "step": 29990000 }, { "epoch": 86.81, "learning_rate": 6.61050967951093e-06, "loss": 1.814, "step": 29990500 }, { "epoch": 86.81, "learning_rate": 6.609786031863653e-06, "loss": 1.8052, "step": 29991000 }, { "epoch": 86.81, "learning_rate": 6.609062384216377e-06, "loss": 1.8086, "step": 29991500 }, { "epoch": 86.81, "learning_rate": 6.6083387365691e-06, "loss": 1.8225, "step": 29992000 }, { "epoch": 86.82, "learning_rate": 6.607616536217117e-06, "loss": 1.8225, "step": 29992500 }, { "epoch": 86.82, "learning_rate": 6.60689288856984e-06, "loss": 1.7744, "step": 29993000 }, { "epoch": 86.82, "learning_rate": 6.606169240922565e-06, "loss": 1.8262, "step": 29993500 }, { "epoch": 86.82, "learning_rate": 6.605445593275287e-06, "loss": 1.7938, "step": 29994000 }, { "epoch": 86.82, "learning_rate": 6.6047219456280105e-06, "loss": 1.7931, "step": 29994500 }, { "epoch": 86.82, "learning_rate": 6.6039982979807336e-06, "loss": 1.8074, "step": 29995000 }, { "epoch": 86.82, "learning_rate": 6.6032760976287525e-06, "loss": 1.8451, "step": 29995500 }, { "epoch": 86.83, "learning_rate": 6.602552449981475e-06, "loss": 1.8105, "step": 29996000 }, { "epoch": 86.83, "learning_rate": 6.601828802334198e-06, "loss": 1.7916, "step": 29996500 }, { "epoch": 86.83, "learning_rate": 6.601105154686922e-06, "loss": 1.7984, "step": 29997000 }, { "epoch": 86.83, "learning_rate": 6.60038295433494e-06, "loss": 1.7943, "step": 29997500 }, { "epoch": 86.83, "learning_rate": 6.599659306687662e-06, "loss": 1.8114, "step": 29998000 }, { "epoch": 86.83, "learning_rate": 6.598935659040385e-06, "loss": 1.7875, "step": 29998500 }, { "epoch": 86.83, "learning_rate": 6.598212011393109e-06, "loss": 1.8235, "step": 29999000 }, { "epoch": 86.84, "learning_rate": 6.597488363745832e-06, "loss": 1.8103, "step": 29999500 }, { "epoch": 86.84, "learning_rate": 6.59676616339385e-06, "loss": 1.8069, "step": 30000000 }, { "epoch": 86.84, "learning_rate": 6.5960425157465725e-06, "loss": 1.8024, "step": 30000500 }, { "epoch": 86.84, "learning_rate": 6.595318868099297e-06, "loss": 1.8211, "step": 30001000 }, { "epoch": 86.84, "learning_rate": 6.5945952204520195e-06, "loss": 1.7743, "step": 30001500 }, { "epoch": 86.84, "learning_rate": 6.593873020100038e-06, "loss": 1.7974, "step": 30002000 }, { "epoch": 86.84, "learning_rate": 6.59314937245276e-06, "loss": 1.7989, "step": 30002500 }, { "epoch": 86.85, "learning_rate": 6.592425724805485e-06, "loss": 1.8222, "step": 30003000 }, { "epoch": 86.85, "learning_rate": 6.591702077158207e-06, "loss": 1.8143, "step": 30003500 }, { "epoch": 86.85, "learning_rate": 6.59097842951093e-06, "loss": 1.781, "step": 30004000 }, { "epoch": 86.85, "learning_rate": 6.590254781863653e-06, "loss": 1.8252, "step": 30004500 }, { "epoch": 86.85, "learning_rate": 6.589532581511672e-06, "loss": 1.8309, "step": 30005000 }, { "epoch": 86.85, "learning_rate": 6.588810381159689e-06, "loss": 1.8055, "step": 30005500 }, { "epoch": 86.86, "learning_rate": 6.588086733512412e-06, "loss": 1.8173, "step": 30006000 }, { "epoch": 86.86, "learning_rate": 6.5873645331604295e-06, "loss": 1.8616, "step": 30006500 }, { "epoch": 86.86, "learning_rate": 6.586640885513154e-06, "loss": 1.7874, "step": 30007000 }, { "epoch": 86.86, "learning_rate": 6.5859172378658765e-06, "loss": 1.7936, "step": 30007500 }, { "epoch": 86.86, "learning_rate": 6.5851935902186e-06, "loss": 1.7917, "step": 30008000 }, { "epoch": 86.86, "learning_rate": 6.584469942571323e-06, "loss": 1.8293, "step": 30008500 }, { "epoch": 86.86, "learning_rate": 6.583746294924047e-06, "loss": 1.8149, "step": 30009000 }, { "epoch": 86.87, "learning_rate": 6.58302264727677e-06, "loss": 1.7915, "step": 30009500 }, { "epoch": 86.87, "learning_rate": 6.582298999629492e-06, "loss": 1.7981, "step": 30010000 }, { "epoch": 86.87, "learning_rate": 6.581575351982217e-06, "loss": 1.8037, "step": 30010500 }, { "epoch": 86.87, "learning_rate": 6.580851704334939e-06, "loss": 1.8024, "step": 30011000 }, { "epoch": 86.87, "learning_rate": 6.580128056687662e-06, "loss": 1.7933, "step": 30011500 }, { "epoch": 86.87, "learning_rate": 6.579404409040385e-06, "loss": 1.811, "step": 30012000 }, { "epoch": 86.87, "learning_rate": 6.578682208688404e-06, "loss": 1.8179, "step": 30012500 }, { "epoch": 86.88, "learning_rate": 6.577958561041127e-06, "loss": 1.7737, "step": 30013000 }, { "epoch": 86.88, "learning_rate": 6.577236360689144e-06, "loss": 1.8013, "step": 30013500 }, { "epoch": 86.88, "learning_rate": 6.5765127130418675e-06, "loss": 1.8067, "step": 30014000 }, { "epoch": 86.88, "learning_rate": 6.575789065394591e-06, "loss": 1.8279, "step": 30014500 }, { "epoch": 86.88, "learning_rate": 6.5750654177473145e-06, "loss": 1.8118, "step": 30015000 }, { "epoch": 86.88, "learning_rate": 6.574341770100037e-06, "loss": 1.8106, "step": 30015500 }, { "epoch": 86.88, "learning_rate": 6.5736181224527615e-06, "loss": 1.8111, "step": 30016000 }, { "epoch": 86.89, "learning_rate": 6.572894474805484e-06, "loss": 1.8063, "step": 30016500 }, { "epoch": 86.89, "learning_rate": 6.572170827158207e-06, "loss": 1.822, "step": 30017000 }, { "epoch": 86.89, "learning_rate": 6.57144717951093e-06, "loss": 1.8151, "step": 30017500 }, { "epoch": 86.89, "learning_rate": 6.570723531863654e-06, "loss": 1.7945, "step": 30018000 }, { "epoch": 86.89, "learning_rate": 6.569999884216377e-06, "loss": 1.7892, "step": 30018500 }, { "epoch": 86.89, "learning_rate": 6.569277683864394e-06, "loss": 1.7787, "step": 30019000 }, { "epoch": 86.89, "learning_rate": 6.568554036217117e-06, "loss": 1.7872, "step": 30019500 }, { "epoch": 86.9, "learning_rate": 6.567830388569841e-06, "loss": 1.8148, "step": 30020000 }, { "epoch": 86.9, "learning_rate": 6.567106740922564e-06, "loss": 1.8204, "step": 30020500 }, { "epoch": 86.9, "learning_rate": 6.5663845405705815e-06, "loss": 1.7992, "step": 30021000 }, { "epoch": 86.9, "learning_rate": 6.5656608929233046e-06, "loss": 1.7898, "step": 30021500 }, { "epoch": 86.9, "learning_rate": 6.5649372452760285e-06, "loss": 1.8133, "step": 30022000 }, { "epoch": 86.9, "learning_rate": 6.5642135976287516e-06, "loss": 1.8168, "step": 30022500 }, { "epoch": 86.9, "learning_rate": 6.563491397276769e-06, "loss": 1.8093, "step": 30023000 }, { "epoch": 86.91, "learning_rate": 6.562767749629492e-06, "loss": 1.8076, "step": 30023500 }, { "epoch": 86.91, "learning_rate": 6.562044101982217e-06, "loss": 1.805, "step": 30024000 }, { "epoch": 86.91, "learning_rate": 6.561320454334939e-06, "loss": 1.8228, "step": 30024500 }, { "epoch": 86.91, "learning_rate": 6.560596806687662e-06, "loss": 1.8236, "step": 30025000 }, { "epoch": 86.91, "learning_rate": 6.559873159040386e-06, "loss": 1.7984, "step": 30025500 }, { "epoch": 86.91, "learning_rate": 6.559150958688404e-06, "loss": 1.8144, "step": 30026000 }, { "epoch": 86.91, "learning_rate": 6.558427311041126e-06, "loss": 1.8399, "step": 30026500 }, { "epoch": 86.92, "learning_rate": 6.557703663393849e-06, "loss": 1.8146, "step": 30027000 }, { "epoch": 86.92, "learning_rate": 6.556980015746573e-06, "loss": 1.8259, "step": 30027500 }, { "epoch": 86.92, "learning_rate": 6.556257815394591e-06, "loss": 1.8174, "step": 30028000 }, { "epoch": 86.92, "learning_rate": 6.555534167747314e-06, "loss": 1.825, "step": 30028500 }, { "epoch": 86.92, "learning_rate": 6.554810520100037e-06, "loss": 1.8218, "step": 30029000 }, { "epoch": 86.92, "learning_rate": 6.554086872452761e-06, "loss": 1.8361, "step": 30029500 }, { "epoch": 86.92, "learning_rate": 6.553363224805484e-06, "loss": 1.7923, "step": 30030000 }, { "epoch": 86.93, "learning_rate": 6.552641024453502e-06, "loss": 1.804, "step": 30030500 }, { "epoch": 86.93, "learning_rate": 6.551917376806224e-06, "loss": 1.807, "step": 30031000 }, { "epoch": 86.93, "learning_rate": 6.551193729158949e-06, "loss": 1.8177, "step": 30031500 }, { "epoch": 86.93, "learning_rate": 6.550470081511671e-06, "loss": 1.8226, "step": 30032000 }, { "epoch": 86.93, "learning_rate": 6.549747881159689e-06, "loss": 1.823, "step": 30032500 }, { "epoch": 86.93, "learning_rate": 6.549024233512413e-06, "loss": 1.8143, "step": 30033000 }, { "epoch": 86.93, "learning_rate": 6.54830203316043e-06, "loss": 1.8031, "step": 30033500 }, { "epoch": 86.94, "learning_rate": 6.547578385513153e-06, "loss": 1.7879, "step": 30034000 }, { "epoch": 86.94, "learning_rate": 6.5468547378658764e-06, "loss": 1.804, "step": 30034500 }, { "epoch": 86.94, "learning_rate": 6.5461310902186e-06, "loss": 1.8127, "step": 30035000 }, { "epoch": 86.94, "learning_rate": 6.5454074425713234e-06, "loss": 1.8242, "step": 30035500 }, { "epoch": 86.94, "learning_rate": 6.544683794924046e-06, "loss": 1.8213, "step": 30036000 }, { "epoch": 86.94, "learning_rate": 6.543960147276769e-06, "loss": 1.7922, "step": 30036500 }, { "epoch": 86.94, "learning_rate": 6.543237946924788e-06, "loss": 1.7875, "step": 30037000 }, { "epoch": 86.95, "learning_rate": 6.542514299277511e-06, "loss": 1.8077, "step": 30037500 }, { "epoch": 86.95, "learning_rate": 6.541790651630234e-06, "loss": 1.8171, "step": 30038000 }, { "epoch": 86.95, "learning_rate": 6.541067003982956e-06, "loss": 1.8112, "step": 30038500 }, { "epoch": 86.95, "learning_rate": 6.540343356335681e-06, "loss": 1.8158, "step": 30039000 }, { "epoch": 86.95, "learning_rate": 6.539619708688403e-06, "loss": 1.8263, "step": 30039500 }, { "epoch": 86.95, "learning_rate": 6.538896061041126e-06, "loss": 1.7942, "step": 30040000 }, { "epoch": 86.95, "learning_rate": 6.53817241339385e-06, "loss": 1.8185, "step": 30040500 }, { "epoch": 86.96, "learning_rate": 6.537450213041868e-06, "loss": 1.8064, "step": 30041000 }, { "epoch": 86.96, "learning_rate": 6.5367265653945904e-06, "loss": 1.7902, "step": 30041500 }, { "epoch": 86.96, "learning_rate": 6.5360029177473135e-06, "loss": 1.8164, "step": 30042000 }, { "epoch": 86.96, "learning_rate": 6.5352792701000375e-06, "loss": 1.828, "step": 30042500 }, { "epoch": 86.96, "learning_rate": 6.5345556224527605e-06, "loss": 1.7964, "step": 30043000 }, { "epoch": 86.96, "learning_rate": 6.533831974805484e-06, "loss": 1.8194, "step": 30043500 }, { "epoch": 86.97, "learning_rate": 6.533108327158207e-06, "loss": 1.7874, "step": 30044000 }, { "epoch": 86.97, "learning_rate": 6.532384679510931e-06, "loss": 1.7989, "step": 30044500 }, { "epoch": 86.97, "learning_rate": 6.531661031863654e-06, "loss": 1.8051, "step": 30045000 }, { "epoch": 86.97, "learning_rate": 6.530938831511671e-06, "loss": 1.8054, "step": 30045500 }, { "epoch": 86.97, "learning_rate": 6.530216631159688e-06, "loss": 1.8082, "step": 30046000 }, { "epoch": 86.97, "learning_rate": 6.529492983512413e-06, "loss": 1.8469, "step": 30046500 }, { "epoch": 86.97, "learning_rate": 6.52877078316043e-06, "loss": 1.8226, "step": 30047000 }, { "epoch": 86.98, "learning_rate": 6.528047135513153e-06, "loss": 1.8128, "step": 30047500 }, { "epoch": 86.98, "learning_rate": 6.5273234878658755e-06, "loss": 1.8214, "step": 30048000 }, { "epoch": 86.98, "learning_rate": 6.5265998402186e-06, "loss": 1.8181, "step": 30048500 }, { "epoch": 86.98, "learning_rate": 6.525876192571323e-06, "loss": 1.8005, "step": 30049000 }, { "epoch": 86.98, "learning_rate": 6.525152544924046e-06, "loss": 1.8257, "step": 30049500 }, { "epoch": 86.98, "learning_rate": 6.52442889727677e-06, "loss": 1.8179, "step": 30050000 }, { "epoch": 86.98, "learning_rate": 6.523705249629493e-06, "loss": 1.8104, "step": 30050500 }, { "epoch": 86.99, "learning_rate": 6.522981601982216e-06, "loss": 1.8257, "step": 30051000 }, { "epoch": 86.99, "learning_rate": 6.522259401630233e-06, "loss": 1.7972, "step": 30051500 }, { "epoch": 86.99, "learning_rate": 6.521535753982958e-06, "loss": 1.8029, "step": 30052000 }, { "epoch": 86.99, "learning_rate": 6.52081210633568e-06, "loss": 1.8107, "step": 30052500 }, { "epoch": 86.99, "learning_rate": 6.520088458688403e-06, "loss": 1.7675, "step": 30053000 }, { "epoch": 86.99, "learning_rate": 6.51936625833642e-06, "loss": 1.8362, "step": 30053500 }, { "epoch": 86.99, "learning_rate": 6.518642610689145e-06, "loss": 1.8298, "step": 30054000 }, { "epoch": 87.0, "learning_rate": 6.517918963041867e-06, "loss": 1.8141, "step": 30054500 }, { "epoch": 87.0, "learning_rate": 6.517196762689885e-06, "loss": 1.7949, "step": 30055000 }, { "epoch": 87.0, "learning_rate": 6.5164731150426085e-06, "loss": 1.8136, "step": 30055500 }, { "epoch": 87.0, "learning_rate": 6.515749467395332e-06, "loss": 1.8164, "step": 30056000 }, { "epoch": 87.0, "eval_accuracy": 0.6891758386419078, "eval_accuracy_mlm": 0.6582064760563344, "eval_accuracy_nsp": 0.8552474185697982, "eval_loss": 2.1924755573272705, "eval_runtime": 331.6095, "eval_samples_per_second": 1315.964, "eval_steps_per_second": 54.833, "step": 30056064 }, { "epoch": 87.0, "learning_rate": 6.5150258197480555e-06, "loss": 1.7942, "step": 30056500 }, { "epoch": 87.0, "learning_rate": 6.514303619396073e-06, "loss": 1.7978, "step": 30057000 }, { "epoch": 87.0, "learning_rate": 6.513579971748796e-06, "loss": 1.7773, "step": 30057500 }, { "epoch": 87.01, "learning_rate": 6.51285632410152e-06, "loss": 1.802, "step": 30058000 }, { "epoch": 87.01, "learning_rate": 6.512132676454243e-06, "loss": 1.8097, "step": 30058500 }, { "epoch": 87.01, "learning_rate": 6.511409028806965e-06, "loss": 1.8013, "step": 30059000 }, { "epoch": 87.01, "learning_rate": 6.51068538115969e-06, "loss": 1.8092, "step": 30059500 }, { "epoch": 87.01, "learning_rate": 6.509961733512412e-06, "loss": 1.824, "step": 30060000 }, { "epoch": 87.01, "learning_rate": 6.509238085865135e-06, "loss": 1.8, "step": 30060500 }, { "epoch": 87.01, "learning_rate": 6.508514438217858e-06, "loss": 1.7946, "step": 30061000 }, { "epoch": 87.02, "learning_rate": 6.507790790570582e-06, "loss": 1.8039, "step": 30061500 }, { "epoch": 87.02, "learning_rate": 6.507067142923305e-06, "loss": 1.8098, "step": 30062000 }, { "epoch": 87.02, "learning_rate": 6.506343495276028e-06, "loss": 1.8167, "step": 30062500 }, { "epoch": 87.02, "learning_rate": 6.505621294924047e-06, "loss": 1.7944, "step": 30063000 }, { "epoch": 87.02, "learning_rate": 6.5048976472767695e-06, "loss": 1.7646, "step": 30063500 }, { "epoch": 87.02, "learning_rate": 6.504173999629493e-06, "loss": 1.812, "step": 30064000 }, { "epoch": 87.02, "learning_rate": 6.503450351982216e-06, "loss": 1.7998, "step": 30064500 }, { "epoch": 87.03, "learning_rate": 6.502728151630235e-06, "loss": 1.792, "step": 30065000 }, { "epoch": 87.03, "learning_rate": 6.502004503982957e-06, "loss": 1.8077, "step": 30065500 }, { "epoch": 87.03, "learning_rate": 6.50128085633568e-06, "loss": 1.7947, "step": 30066000 }, { "epoch": 87.03, "learning_rate": 6.500557208688403e-06, "loss": 1.7911, "step": 30066500 }, { "epoch": 87.03, "learning_rate": 6.499833561041127e-06, "loss": 1.8124, "step": 30067000 }, { "epoch": 87.03, "learning_rate": 6.49910991339385e-06, "loss": 1.8009, "step": 30067500 }, { "epoch": 87.03, "learning_rate": 6.498386265746573e-06, "loss": 1.813, "step": 30068000 }, { "epoch": 87.04, "learning_rate": 6.497662618099297e-06, "loss": 1.8096, "step": 30068500 }, { "epoch": 87.04, "learning_rate": 6.496940417747314e-06, "loss": 1.8371, "step": 30069000 }, { "epoch": 87.04, "learning_rate": 6.496216770100037e-06, "loss": 1.7751, "step": 30069500 }, { "epoch": 87.04, "learning_rate": 6.4954931224527604e-06, "loss": 1.8177, "step": 30070000 }, { "epoch": 87.04, "learning_rate": 6.494769474805484e-06, "loss": 1.784, "step": 30070500 }, { "epoch": 87.04, "learning_rate": 6.4940458271582074e-06, "loss": 1.7932, "step": 30071000 }, { "epoch": 87.04, "learning_rate": 6.49332217951093e-06, "loss": 1.7751, "step": 30071500 }, { "epoch": 87.05, "learning_rate": 6.492599979158948e-06, "loss": 1.8265, "step": 30072000 }, { "epoch": 87.05, "learning_rate": 6.491876331511672e-06, "loss": 1.8097, "step": 30072500 }, { "epoch": 87.05, "learning_rate": 6.491152683864395e-06, "loss": 1.7939, "step": 30073000 }, { "epoch": 87.05, "learning_rate": 6.490429036217118e-06, "loss": 1.7805, "step": 30073500 }, { "epoch": 87.05, "learning_rate": 6.489705388569842e-06, "loss": 1.784, "step": 30074000 }, { "epoch": 87.05, "learning_rate": 6.488981740922565e-06, "loss": 1.7833, "step": 30074500 }, { "epoch": 87.05, "learning_rate": 6.488259540570582e-06, "loss": 1.804, "step": 30075000 }, { "epoch": 87.06, "learning_rate": 6.487535892923305e-06, "loss": 1.7886, "step": 30075500 }, { "epoch": 87.06, "learning_rate": 6.486812245276029e-06, "loss": 1.7812, "step": 30076000 }, { "epoch": 87.06, "learning_rate": 6.486088597628752e-06, "loss": 1.8028, "step": 30076500 }, { "epoch": 87.06, "learning_rate": 6.4853649499814744e-06, "loss": 1.8244, "step": 30077000 }, { "epoch": 87.06, "learning_rate": 6.4846427496294925e-06, "loss": 1.8163, "step": 30077500 }, { "epoch": 87.06, "learning_rate": 6.4839191019822165e-06, "loss": 1.7895, "step": 30078000 }, { "epoch": 87.06, "learning_rate": 6.4831954543349395e-06, "loss": 1.82, "step": 30078500 }, { "epoch": 87.07, "learning_rate": 6.482471806687662e-06, "loss": 1.7831, "step": 30079000 }, { "epoch": 87.07, "learning_rate": 6.48174960633568e-06, "loss": 1.7943, "step": 30079500 }, { "epoch": 87.07, "learning_rate": 6.481025958688404e-06, "loss": 1.7992, "step": 30080000 }, { "epoch": 87.07, "learning_rate": 6.480302311041127e-06, "loss": 1.8005, "step": 30080500 }, { "epoch": 87.07, "learning_rate": 6.47957866339385e-06, "loss": 1.8163, "step": 30081000 }, { "epoch": 87.07, "learning_rate": 6.478855015746572e-06, "loss": 1.8188, "step": 30081500 }, { "epoch": 87.08, "learning_rate": 6.478132815394591e-06, "loss": 1.8054, "step": 30082000 }, { "epoch": 87.08, "learning_rate": 6.477410615042609e-06, "loss": 1.8262, "step": 30082500 }, { "epoch": 87.08, "learning_rate": 6.4766869673953315e-06, "loss": 1.8111, "step": 30083000 }, { "epoch": 87.08, "learning_rate": 6.4759633197480546e-06, "loss": 1.7927, "step": 30083500 }, { "epoch": 87.08, "learning_rate": 6.4752396721007785e-06, "loss": 1.8114, "step": 30084000 }, { "epoch": 87.08, "learning_rate": 6.4745160244535016e-06, "loss": 1.8165, "step": 30084500 }, { "epoch": 87.08, "learning_rate": 6.473792376806225e-06, "loss": 1.771, "step": 30085000 }, { "epoch": 87.09, "learning_rate": 6.4730687291589486e-06, "loss": 1.7927, "step": 30085500 }, { "epoch": 87.09, "learning_rate": 6.472345081511672e-06, "loss": 1.8014, "step": 30086000 }, { "epoch": 87.09, "learning_rate": 6.471622881159689e-06, "loss": 1.8011, "step": 30086500 }, { "epoch": 87.09, "learning_rate": 6.470899233512412e-06, "loss": 1.7978, "step": 30087000 }, { "epoch": 87.09, "learning_rate": 6.470175585865136e-06, "loss": 1.7782, "step": 30087500 }, { "epoch": 87.09, "learning_rate": 6.469451938217859e-06, "loss": 1.8051, "step": 30088000 }, { "epoch": 87.09, "learning_rate": 6.468728290570582e-06, "loss": 1.8114, "step": 30088500 }, { "epoch": 87.1, "learning_rate": 6.468006090218599e-06, "loss": 1.8095, "step": 30089000 }, { "epoch": 87.1, "learning_rate": 6.467282442571323e-06, "loss": 1.8022, "step": 30089500 }, { "epoch": 87.1, "learning_rate": 6.466558794924046e-06, "loss": 1.8082, "step": 30090000 }, { "epoch": 87.1, "learning_rate": 6.4658365945720636e-06, "loss": 1.7866, "step": 30090500 }, { "epoch": 87.1, "learning_rate": 6.465112946924787e-06, "loss": 1.8139, "step": 30091000 }, { "epoch": 87.1, "learning_rate": 6.464389299277511e-06, "loss": 1.8034, "step": 30091500 }, { "epoch": 87.1, "learning_rate": 6.463665651630234e-06, "loss": 1.81, "step": 30092000 }, { "epoch": 87.11, "learning_rate": 6.462942003982957e-06, "loss": 1.8135, "step": 30092500 }, { "epoch": 87.11, "learning_rate": 6.462218356335681e-06, "loss": 1.7939, "step": 30093000 }, { "epoch": 87.11, "learning_rate": 6.461494708688404e-06, "loss": 1.7994, "step": 30093500 }, { "epoch": 87.11, "learning_rate": 6.460771061041127e-06, "loss": 1.778, "step": 30094000 }, { "epoch": 87.11, "learning_rate": 6.460047413393849e-06, "loss": 1.7698, "step": 30094500 }, { "epoch": 87.11, "learning_rate": 6.459325213041868e-06, "loss": 1.7963, "step": 30095000 }, { "epoch": 87.11, "learning_rate": 6.458601565394591e-06, "loss": 1.7621, "step": 30095500 }, { "epoch": 87.12, "learning_rate": 6.457879365042608e-06, "loss": 1.8181, "step": 30096000 }, { "epoch": 87.12, "learning_rate": 6.457155717395331e-06, "loss": 1.808, "step": 30096500 }, { "epoch": 87.12, "learning_rate": 6.456432069748055e-06, "loss": 1.7986, "step": 30097000 }, { "epoch": 87.12, "learning_rate": 6.4557084221007784e-06, "loss": 1.7825, "step": 30097500 }, { "epoch": 87.12, "learning_rate": 6.4549847744535015e-06, "loss": 1.7975, "step": 30098000 }, { "epoch": 87.12, "learning_rate": 6.454261126806225e-06, "loss": 1.7896, "step": 30098500 }, { "epoch": 87.12, "learning_rate": 6.4535374791589485e-06, "loss": 1.7861, "step": 30099000 }, { "epoch": 87.13, "learning_rate": 6.452815278806966e-06, "loss": 1.8178, "step": 30099500 }, { "epoch": 87.13, "learning_rate": 6.452091631159689e-06, "loss": 1.7814, "step": 30100000 }, { "epoch": 87.13, "learning_rate": 6.451367983512413e-06, "loss": 1.8026, "step": 30100500 }, { "epoch": 87.13, "learning_rate": 6.450644335865136e-06, "loss": 1.8063, "step": 30101000 }, { "epoch": 87.13, "learning_rate": 6.449920688217859e-06, "loss": 1.7804, "step": 30101500 }, { "epoch": 87.13, "learning_rate": 6.449197040570581e-06, "loss": 1.8241, "step": 30102000 }, { "epoch": 87.13, "learning_rate": 6.448473392923306e-06, "loss": 1.7836, "step": 30102500 }, { "epoch": 87.14, "learning_rate": 6.447749745276028e-06, "loss": 1.7849, "step": 30103000 }, { "epoch": 87.14, "learning_rate": 6.447026097628751e-06, "loss": 1.7974, "step": 30103500 }, { "epoch": 87.14, "learning_rate": 6.4463038972767685e-06, "loss": 1.7953, "step": 30104000 }, { "epoch": 87.14, "learning_rate": 6.445580249629493e-06, "loss": 1.803, "step": 30104500 }, { "epoch": 87.14, "learning_rate": 6.444856601982216e-06, "loss": 1.8166, "step": 30105000 }, { "epoch": 87.14, "learning_rate": 6.444132954334939e-06, "loss": 1.8076, "step": 30105500 }, { "epoch": 87.14, "learning_rate": 6.443409306687663e-06, "loss": 1.7865, "step": 30106000 }, { "epoch": 87.15, "learning_rate": 6.442687106335681e-06, "loss": 1.8056, "step": 30106500 }, { "epoch": 87.15, "learning_rate": 6.441963458688404e-06, "loss": 1.7844, "step": 30107000 }, { "epoch": 87.15, "learning_rate": 6.441239811041126e-06, "loss": 1.8162, "step": 30107500 }, { "epoch": 87.15, "learning_rate": 6.440517610689144e-06, "loss": 1.7847, "step": 30108000 }, { "epoch": 87.15, "learning_rate": 6.439793963041868e-06, "loss": 1.7984, "step": 30108500 }, { "epoch": 87.15, "learning_rate": 6.439070315394591e-06, "loss": 1.8149, "step": 30109000 }, { "epoch": 87.15, "learning_rate": 6.438346667747313e-06, "loss": 1.7943, "step": 30109500 }, { "epoch": 87.16, "learning_rate": 6.437623020100038e-06, "loss": 1.7988, "step": 30110000 }, { "epoch": 87.16, "learning_rate": 6.43689937245276e-06, "loss": 1.7816, "step": 30110500 }, { "epoch": 87.16, "learning_rate": 6.436175724805483e-06, "loss": 1.8047, "step": 30111000 }, { "epoch": 87.16, "learning_rate": 6.4354520771582064e-06, "loss": 1.8152, "step": 30111500 }, { "epoch": 87.16, "learning_rate": 6.43472842951093e-06, "loss": 1.8119, "step": 30112000 }, { "epoch": 87.16, "learning_rate": 6.434007676454243e-06, "loss": 1.8216, "step": 30112500 }, { "epoch": 87.16, "learning_rate": 6.433284028806966e-06, "loss": 1.8085, "step": 30113000 }, { "epoch": 87.17, "learning_rate": 6.432560381159689e-06, "loss": 1.8099, "step": 30113500 }, { "epoch": 87.17, "learning_rate": 6.431836733512413e-06, "loss": 1.8064, "step": 30114000 }, { "epoch": 87.17, "learning_rate": 6.431113085865136e-06, "loss": 1.805, "step": 30114500 }, { "epoch": 87.17, "learning_rate": 6.430389438217858e-06, "loss": 1.8126, "step": 30115000 }, { "epoch": 87.17, "learning_rate": 6.429667237865876e-06, "loss": 1.806, "step": 30115500 }, { "epoch": 87.17, "learning_rate": 6.4289435902186e-06, "loss": 1.826, "step": 30116000 }, { "epoch": 87.17, "learning_rate": 6.428221389866617e-06, "loss": 1.7846, "step": 30116500 }, { "epoch": 87.18, "learning_rate": 6.42749774221934e-06, "loss": 1.8013, "step": 30117000 }, { "epoch": 87.18, "learning_rate": 6.4267740945720635e-06, "loss": 1.8039, "step": 30117500 }, { "epoch": 87.18, "learning_rate": 6.426050446924787e-06, "loss": 1.8032, "step": 30118000 }, { "epoch": 87.18, "learning_rate": 6.4253267992775105e-06, "loss": 1.7812, "step": 30118500 }, { "epoch": 87.18, "learning_rate": 6.4246031516302336e-06, "loss": 1.8051, "step": 30119000 }, { "epoch": 87.18, "learning_rate": 6.4238795039829575e-06, "loss": 1.7567, "step": 30119500 }, { "epoch": 87.19, "learning_rate": 6.4231558563356806e-06, "loss": 1.7968, "step": 30120000 }, { "epoch": 87.19, "learning_rate": 6.422432208688403e-06, "loss": 1.7963, "step": 30120500 }, { "epoch": 87.19, "learning_rate": 6.4217085610411276e-06, "loss": 1.7791, "step": 30121000 }, { "epoch": 87.19, "learning_rate": 6.42098491339385e-06, "loss": 1.7951, "step": 30121500 }, { "epoch": 87.19, "learning_rate": 6.420261265746573e-06, "loss": 1.7817, "step": 30122000 }, { "epoch": 87.19, "learning_rate": 6.419537618099296e-06, "loss": 1.7905, "step": 30122500 }, { "epoch": 87.19, "learning_rate": 6.41881397045202e-06, "loss": 1.8043, "step": 30123000 }, { "epoch": 87.2, "learning_rate": 6.418091770100038e-06, "loss": 1.8145, "step": 30123500 }, { "epoch": 87.2, "learning_rate": 6.41736812245276e-06, "loss": 1.8053, "step": 30124000 }, { "epoch": 87.2, "learning_rate": 6.416644474805483e-06, "loss": 1.825, "step": 30124500 }, { "epoch": 87.2, "learning_rate": 6.415920827158207e-06, "loss": 1.8062, "step": 30125000 }, { "epoch": 87.2, "learning_rate": 6.41519717951093e-06, "loss": 1.7965, "step": 30125500 }, { "epoch": 87.2, "learning_rate": 6.414473531863653e-06, "loss": 1.7963, "step": 30126000 }, { "epoch": 87.2, "learning_rate": 6.413751331511671e-06, "loss": 1.8085, "step": 30126500 }, { "epoch": 87.21, "learning_rate": 6.4130276838643946e-06, "loss": 1.7808, "step": 30127000 }, { "epoch": 87.21, "learning_rate": 6.412304036217118e-06, "loss": 1.7919, "step": 30127500 }, { "epoch": 87.21, "learning_rate": 6.411580388569841e-06, "loss": 1.8029, "step": 30128000 }, { "epoch": 87.21, "learning_rate": 6.410856740922565e-06, "loss": 1.8085, "step": 30128500 }, { "epoch": 87.21, "learning_rate": 6.410133093275288e-06, "loss": 1.8137, "step": 30129000 }, { "epoch": 87.21, "learning_rate": 6.409409445628011e-06, "loss": 1.7976, "step": 30129500 }, { "epoch": 87.21, "learning_rate": 6.408685797980733e-06, "loss": 1.8095, "step": 30130000 }, { "epoch": 87.22, "learning_rate": 6.407963597628752e-06, "loss": 1.7932, "step": 30130500 }, { "epoch": 87.22, "learning_rate": 6.40724139727677e-06, "loss": 1.8056, "step": 30131000 }, { "epoch": 87.22, "learning_rate": 6.406517749629492e-06, "loss": 1.821, "step": 30131500 }, { "epoch": 87.22, "learning_rate": 6.405794101982215e-06, "loss": 1.7971, "step": 30132000 }, { "epoch": 87.22, "learning_rate": 6.405070454334939e-06, "loss": 1.8057, "step": 30132500 }, { "epoch": 87.22, "learning_rate": 6.4043482539829574e-06, "loss": 1.7888, "step": 30133000 }, { "epoch": 87.22, "learning_rate": 6.40362460633568e-06, "loss": 1.7913, "step": 30133500 }, { "epoch": 87.23, "learning_rate": 6.402900958688403e-06, "loss": 1.8207, "step": 30134000 }, { "epoch": 87.23, "learning_rate": 6.402177311041127e-06, "loss": 1.7701, "step": 30134500 }, { "epoch": 87.23, "learning_rate": 6.40145366339385e-06, "loss": 1.8047, "step": 30135000 }, { "epoch": 87.23, "learning_rate": 6.400730015746573e-06, "loss": 1.8044, "step": 30135500 }, { "epoch": 87.23, "learning_rate": 6.400006368099297e-06, "loss": 1.7673, "step": 30136000 }, { "epoch": 87.23, "learning_rate": 6.39928272045202e-06, "loss": 1.8202, "step": 30136500 }, { "epoch": 87.23, "learning_rate": 6.398559072804743e-06, "loss": 1.796, "step": 30137000 }, { "epoch": 87.24, "learning_rate": 6.397835425157465e-06, "loss": 1.8184, "step": 30137500 }, { "epoch": 87.24, "learning_rate": 6.397113224805484e-06, "loss": 1.8206, "step": 30138000 }, { "epoch": 87.24, "learning_rate": 6.396391024453502e-06, "loss": 1.8303, "step": 30138500 }, { "epoch": 87.24, "learning_rate": 6.3956673768062244e-06, "loss": 1.8135, "step": 30139000 }, { "epoch": 87.24, "learning_rate": 6.3949437291589475e-06, "loss": 1.7973, "step": 30139500 }, { "epoch": 87.24, "learning_rate": 6.3942200815116714e-06, "loss": 1.8085, "step": 30140000 }, { "epoch": 87.24, "learning_rate": 6.3934964338643945e-06, "loss": 1.7961, "step": 30140500 }, { "epoch": 87.25, "learning_rate": 6.392772786217118e-06, "loss": 1.7932, "step": 30141000 }, { "epoch": 87.25, "learning_rate": 6.392050585865135e-06, "loss": 1.7879, "step": 30141500 }, { "epoch": 87.25, "learning_rate": 6.39132693821786e-06, "loss": 1.7904, "step": 30142000 }, { "epoch": 87.25, "learning_rate": 6.390603290570582e-06, "loss": 1.8028, "step": 30142500 }, { "epoch": 87.25, "learning_rate": 6.389879642923305e-06, "loss": 1.8187, "step": 30143000 }, { "epoch": 87.25, "learning_rate": 6.389155995276029e-06, "loss": 1.8192, "step": 30143500 }, { "epoch": 87.25, "learning_rate": 6.388432347628752e-06, "loss": 1.7982, "step": 30144000 }, { "epoch": 87.26, "learning_rate": 6.387708699981475e-06, "loss": 1.8216, "step": 30144500 }, { "epoch": 87.26, "learning_rate": 6.386985052334197e-06, "loss": 1.8142, "step": 30145000 }, { "epoch": 87.26, "learning_rate": 6.386262851982216e-06, "loss": 1.8081, "step": 30145500 }, { "epoch": 87.26, "learning_rate": 6.385539204334939e-06, "loss": 1.7903, "step": 30146000 }, { "epoch": 87.26, "learning_rate": 6.384815556687662e-06, "loss": 1.7799, "step": 30146500 }, { "epoch": 87.26, "learning_rate": 6.384091909040385e-06, "loss": 1.7961, "step": 30147000 }, { "epoch": 87.26, "learning_rate": 6.383368261393109e-06, "loss": 1.8214, "step": 30147500 }, { "epoch": 87.27, "learning_rate": 6.382646061041127e-06, "loss": 1.8114, "step": 30148000 }, { "epoch": 87.27, "learning_rate": 6.38192241339385e-06, "loss": 1.7829, "step": 30148500 }, { "epoch": 87.27, "learning_rate": 6.381198765746573e-06, "loss": 1.7947, "step": 30149000 }, { "epoch": 87.27, "learning_rate": 6.380475118099297e-06, "loss": 1.8002, "step": 30149500 }, { "epoch": 87.27, "learning_rate": 6.379752917747314e-06, "loss": 1.8119, "step": 30150000 }, { "epoch": 87.27, "learning_rate": 6.379029270100037e-06, "loss": 1.7976, "step": 30150500 }, { "epoch": 87.27, "learning_rate": 6.378305622452761e-06, "loss": 1.7986, "step": 30151000 }, { "epoch": 87.28, "learning_rate": 6.377581974805484e-06, "loss": 1.8091, "step": 30151500 }, { "epoch": 87.28, "learning_rate": 6.376859774453501e-06, "loss": 1.8053, "step": 30152000 }, { "epoch": 87.28, "learning_rate": 6.376136126806224e-06, "loss": 1.797, "step": 30152500 }, { "epoch": 87.28, "learning_rate": 6.375412479158948e-06, "loss": 1.8053, "step": 30153000 }, { "epoch": 87.28, "learning_rate": 6.374688831511671e-06, "loss": 1.8214, "step": 30153500 }, { "epoch": 87.28, "learning_rate": 6.3739651838643945e-06, "loss": 1.809, "step": 30154000 }, { "epoch": 87.28, "learning_rate": 6.3732415362171176e-06, "loss": 1.805, "step": 30154500 }, { "epoch": 87.29, "learning_rate": 6.3725178885698415e-06, "loss": 1.821, "step": 30155000 }, { "epoch": 87.29, "learning_rate": 6.371795688217859e-06, "loss": 1.7991, "step": 30155500 }, { "epoch": 87.29, "learning_rate": 6.371072040570582e-06, "loss": 1.8064, "step": 30156000 }, { "epoch": 87.29, "learning_rate": 6.370348392923305e-06, "loss": 1.8036, "step": 30156500 }, { "epoch": 87.29, "learning_rate": 6.369624745276029e-06, "loss": 1.8084, "step": 30157000 }, { "epoch": 87.29, "learning_rate": 6.368901097628752e-06, "loss": 1.7806, "step": 30157500 }, { "epoch": 87.3, "learning_rate": 6.368177449981474e-06, "loss": 1.8066, "step": 30158000 }, { "epoch": 87.3, "learning_rate": 6.367453802334199e-06, "loss": 1.8131, "step": 30158500 }, { "epoch": 87.3, "learning_rate": 6.366730154686921e-06, "loss": 1.8234, "step": 30159000 }, { "epoch": 87.3, "learning_rate": 6.366007954334939e-06, "loss": 1.7805, "step": 30159500 }, { "epoch": 87.3, "learning_rate": 6.365284306687662e-06, "loss": 1.8078, "step": 30160000 }, { "epoch": 87.3, "learning_rate": 6.364560659040386e-06, "loss": 1.8057, "step": 30160500 }, { "epoch": 87.3, "learning_rate": 6.363837011393109e-06, "loss": 1.803, "step": 30161000 }, { "epoch": 87.31, "learning_rate": 6.3631148110411266e-06, "loss": 1.8045, "step": 30161500 }, { "epoch": 87.31, "learning_rate": 6.36239116339385e-06, "loss": 1.7997, "step": 30162000 }, { "epoch": 87.31, "learning_rate": 6.361667515746574e-06, "loss": 1.8171, "step": 30162500 }, { "epoch": 87.31, "learning_rate": 6.360943868099297e-06, "loss": 1.7937, "step": 30163000 }, { "epoch": 87.31, "learning_rate": 6.360220220452019e-06, "loss": 1.7924, "step": 30163500 }, { "epoch": 87.31, "learning_rate": 6.359498020100037e-06, "loss": 1.7917, "step": 30164000 }, { "epoch": 87.31, "learning_rate": 6.358774372452761e-06, "loss": 1.7664, "step": 30164500 }, { "epoch": 87.32, "learning_rate": 6.358050724805484e-06, "loss": 1.8219, "step": 30165000 }, { "epoch": 87.32, "learning_rate": 6.357327077158206e-06, "loss": 1.7897, "step": 30165500 }, { "epoch": 87.32, "learning_rate": 6.356603429510931e-06, "loss": 1.8061, "step": 30166000 }, { "epoch": 87.32, "learning_rate": 6.355879781863654e-06, "loss": 1.7816, "step": 30166500 }, { "epoch": 87.32, "learning_rate": 6.355156134216376e-06, "loss": 1.8024, "step": 30167000 }, { "epoch": 87.32, "learning_rate": 6.354433933864394e-06, "loss": 1.815, "step": 30167500 }, { "epoch": 87.32, "learning_rate": 6.353710286217118e-06, "loss": 1.8152, "step": 30168000 }, { "epoch": 87.33, "learning_rate": 6.3529866385698414e-06, "loss": 1.7977, "step": 30168500 }, { "epoch": 87.33, "learning_rate": 6.352262990922564e-06, "loss": 1.8263, "step": 30169000 }, { "epoch": 87.33, "learning_rate": 6.351539343275287e-06, "loss": 1.8041, "step": 30169500 }, { "epoch": 87.33, "learning_rate": 6.350817142923306e-06, "loss": 1.8, "step": 30170000 }, { "epoch": 87.33, "learning_rate": 6.350093495276029e-06, "loss": 1.8197, "step": 30170500 }, { "epoch": 87.33, "learning_rate": 6.349369847628751e-06, "loss": 1.8002, "step": 30171000 }, { "epoch": 87.33, "learning_rate": 6.348646199981476e-06, "loss": 1.8198, "step": 30171500 }, { "epoch": 87.34, "learning_rate": 6.347922552334198e-06, "loss": 1.8136, "step": 30172000 }, { "epoch": 87.34, "learning_rate": 6.347198904686921e-06, "loss": 1.8065, "step": 30172500 }, { "epoch": 87.34, "learning_rate": 6.346476704334939e-06, "loss": 1.8219, "step": 30173000 }, { "epoch": 87.34, "learning_rate": 6.345753056687663e-06, "loss": 1.7967, "step": 30173500 }, { "epoch": 87.34, "learning_rate": 6.345029409040386e-06, "loss": 1.8055, "step": 30174000 }, { "epoch": 87.34, "learning_rate": 6.3443057613931084e-06, "loss": 1.8063, "step": 30174500 }, { "epoch": 87.34, "learning_rate": 6.3435821137458315e-06, "loss": 1.7994, "step": 30175000 }, { "epoch": 87.35, "learning_rate": 6.3428584660985554e-06, "loss": 1.8032, "step": 30175500 }, { "epoch": 87.35, "learning_rate": 6.3421348184512785e-06, "loss": 1.7804, "step": 30176000 }, { "epoch": 87.35, "learning_rate": 6.341411170804002e-06, "loss": 1.8112, "step": 30176500 }, { "epoch": 87.35, "learning_rate": 6.3406875231567255e-06, "loss": 1.7997, "step": 30177000 }, { "epoch": 87.35, "learning_rate": 6.339965322804743e-06, "loss": 1.832, "step": 30177500 }, { "epoch": 87.35, "learning_rate": 6.339241675157466e-06, "loss": 1.8308, "step": 30178000 }, { "epoch": 87.35, "learning_rate": 6.338518027510189e-06, "loss": 1.7964, "step": 30178500 }, { "epoch": 87.36, "learning_rate": 6.337794379862913e-06, "loss": 1.7909, "step": 30179000 }, { "epoch": 87.36, "learning_rate": 6.337070732215636e-06, "loss": 1.7993, "step": 30179500 }, { "epoch": 87.36, "learning_rate": 6.336348531863653e-06, "loss": 1.7907, "step": 30180000 }, { "epoch": 87.36, "learning_rate": 6.335624884216376e-06, "loss": 1.8099, "step": 30180500 }, { "epoch": 87.36, "learning_rate": 6.334902683864395e-06, "loss": 1.7976, "step": 30181000 }, { "epoch": 87.36, "learning_rate": 6.334179036217118e-06, "loss": 1.7845, "step": 30181500 }, { "epoch": 87.36, "learning_rate": 6.3334553885698405e-06, "loss": 1.7865, "step": 30182000 }, { "epoch": 87.37, "learning_rate": 6.332731740922564e-06, "loss": 1.78, "step": 30182500 }, { "epoch": 87.37, "learning_rate": 6.3320080932752875e-06, "loss": 1.809, "step": 30183000 }, { "epoch": 87.37, "learning_rate": 6.331284445628011e-06, "loss": 1.8027, "step": 30183500 }, { "epoch": 87.37, "learning_rate": 6.330560797980734e-06, "loss": 1.8136, "step": 30184000 }, { "epoch": 87.37, "learning_rate": 6.329837150333458e-06, "loss": 1.8066, "step": 30184500 }, { "epoch": 87.37, "learning_rate": 6.329114949981476e-06, "loss": 1.8096, "step": 30185000 }, { "epoch": 87.37, "learning_rate": 6.328391302334198e-06, "loss": 1.7982, "step": 30185500 }, { "epoch": 87.38, "learning_rate": 6.327667654686921e-06, "loss": 1.7995, "step": 30186000 }, { "epoch": 87.38, "learning_rate": 6.326944007039645e-06, "loss": 1.8262, "step": 30186500 }, { "epoch": 87.38, "learning_rate": 6.326220359392368e-06, "loss": 1.8341, "step": 30187000 }, { "epoch": 87.38, "learning_rate": 6.325498159040385e-06, "loss": 1.8289, "step": 30187500 }, { "epoch": 87.38, "learning_rate": 6.324774511393108e-06, "loss": 1.7882, "step": 30188000 }, { "epoch": 87.38, "learning_rate": 6.324050863745832e-06, "loss": 1.7845, "step": 30188500 }, { "epoch": 87.38, "learning_rate": 6.323327216098555e-06, "loss": 1.792, "step": 30189000 }, { "epoch": 87.39, "learning_rate": 6.322605015746573e-06, "loss": 1.7988, "step": 30189500 }, { "epoch": 87.39, "learning_rate": 6.321881368099296e-06, "loss": 1.7861, "step": 30190000 }, { "epoch": 87.39, "learning_rate": 6.32115772045202e-06, "loss": 1.7919, "step": 30190500 }, { "epoch": 87.39, "learning_rate": 6.320434072804743e-06, "loss": 1.773, "step": 30191000 }, { "epoch": 87.39, "learning_rate": 6.319710425157466e-06, "loss": 1.8235, "step": 30191500 }, { "epoch": 87.39, "learning_rate": 6.318988224805483e-06, "loss": 1.8032, "step": 30192000 }, { "epoch": 87.39, "learning_rate": 6.318264577158208e-06, "loss": 1.7988, "step": 30192500 }, { "epoch": 87.4, "learning_rate": 6.31754092951093e-06, "loss": 1.8049, "step": 30193000 }, { "epoch": 87.4, "learning_rate": 6.316817281863653e-06, "loss": 1.8019, "step": 30193500 }, { "epoch": 87.4, "learning_rate": 6.316093634216377e-06, "loss": 1.8073, "step": 30194000 }, { "epoch": 87.4, "learning_rate": 6.315371433864395e-06, "loss": 1.7871, "step": 30194500 }, { "epoch": 87.4, "learning_rate": 6.314647786217117e-06, "loss": 1.7935, "step": 30195000 }, { "epoch": 87.4, "learning_rate": 6.3139241385698405e-06, "loss": 1.8103, "step": 30195500 }, { "epoch": 87.41, "learning_rate": 6.313200490922564e-06, "loss": 1.8133, "step": 30196000 }, { "epoch": 87.41, "learning_rate": 6.3124782905705825e-06, "loss": 1.7978, "step": 30196500 }, { "epoch": 87.41, "learning_rate": 6.311754642923305e-06, "loss": 1.818, "step": 30197000 }, { "epoch": 87.41, "learning_rate": 6.311030995276028e-06, "loss": 1.7671, "step": 30197500 }, { "epoch": 87.41, "learning_rate": 6.310307347628753e-06, "loss": 1.8327, "step": 30198000 }, { "epoch": 87.41, "learning_rate": 6.309583699981475e-06, "loss": 1.797, "step": 30198500 }, { "epoch": 87.41, "learning_rate": 6.308860052334198e-06, "loss": 1.8155, "step": 30199000 }, { "epoch": 87.42, "learning_rate": 6.308136404686922e-06, "loss": 1.8201, "step": 30199500 }, { "epoch": 87.42, "learning_rate": 6.307412757039645e-06, "loss": 1.8219, "step": 30200000 }, { "epoch": 87.42, "learning_rate": 6.306689109392368e-06, "loss": 1.8307, "step": 30200500 }, { "epoch": 87.42, "learning_rate": 6.30596546174509e-06, "loss": 1.7679, "step": 30201000 }, { "epoch": 87.42, "learning_rate": 6.305241814097815e-06, "loss": 1.8427, "step": 30201500 }, { "epoch": 87.42, "learning_rate": 6.304518166450537e-06, "loss": 1.812, "step": 30202000 }, { "epoch": 87.42, "learning_rate": 6.303795966098555e-06, "loss": 1.789, "step": 30202500 }, { "epoch": 87.43, "learning_rate": 6.303072318451278e-06, "loss": 1.7973, "step": 30203000 }, { "epoch": 87.43, "learning_rate": 6.302348670804002e-06, "loss": 1.7926, "step": 30203500 }, { "epoch": 87.43, "learning_rate": 6.301625023156725e-06, "loss": 1.8234, "step": 30204000 }, { "epoch": 87.43, "learning_rate": 6.300901375509448e-06, "loss": 1.8286, "step": 30204500 }, { "epoch": 87.43, "learning_rate": 6.3001777278621724e-06, "loss": 1.8108, "step": 30205000 }, { "epoch": 87.43, "learning_rate": 6.299454080214895e-06, "loss": 1.8179, "step": 30205500 }, { "epoch": 87.43, "learning_rate": 6.298731879862913e-06, "loss": 1.7846, "step": 30206000 }, { "epoch": 87.44, "learning_rate": 6.298008232215635e-06, "loss": 1.8125, "step": 30206500 }, { "epoch": 87.44, "learning_rate": 6.297286031863653e-06, "loss": 1.8201, "step": 30207000 }, { "epoch": 87.44, "learning_rate": 6.296562384216377e-06, "loss": 1.8194, "step": 30207500 }, { "epoch": 87.44, "learning_rate": 6.2958387365691e-06, "loss": 1.8119, "step": 30208000 }, { "epoch": 87.44, "learning_rate": 6.295115088921822e-06, "loss": 1.7976, "step": 30208500 }, { "epoch": 87.44, "learning_rate": 6.294391441274547e-06, "loss": 1.8062, "step": 30209000 }, { "epoch": 87.44, "learning_rate": 6.29366779362727e-06, "loss": 1.8361, "step": 30209500 }, { "epoch": 87.45, "learning_rate": 6.292944145979992e-06, "loss": 1.8214, "step": 30210000 }, { "epoch": 87.45, "learning_rate": 6.2922204983327155e-06, "loss": 1.787, "step": 30210500 }, { "epoch": 87.45, "learning_rate": 6.2914968506854394e-06, "loss": 1.8001, "step": 30211000 }, { "epoch": 87.45, "learning_rate": 6.2907732030381625e-06, "loss": 1.7697, "step": 30211500 }, { "epoch": 87.45, "learning_rate": 6.29005100268618e-06, "loss": 1.7771, "step": 30212000 }, { "epoch": 87.45, "learning_rate": 6.2893273550389045e-06, "loss": 1.8292, "step": 30212500 }, { "epoch": 87.45, "learning_rate": 6.288605154686922e-06, "loss": 1.8192, "step": 30213000 }, { "epoch": 87.46, "learning_rate": 6.287882954334939e-06, "loss": 1.8023, "step": 30213500 }, { "epoch": 87.46, "learning_rate": 6.287159306687662e-06, "loss": 1.7866, "step": 30214000 }, { "epoch": 87.46, "learning_rate": 6.286435659040385e-06, "loss": 1.802, "step": 30214500 }, { "epoch": 87.46, "learning_rate": 6.285712011393109e-06, "loss": 1.8047, "step": 30215000 }, { "epoch": 87.46, "learning_rate": 6.284989811041126e-06, "loss": 1.792, "step": 30215500 }, { "epoch": 87.46, "learning_rate": 6.2842661633938495e-06, "loss": 1.8145, "step": 30216000 }, { "epoch": 87.46, "learning_rate": 6.2835425157465725e-06, "loss": 1.8035, "step": 30216500 }, { "epoch": 87.47, "learning_rate": 6.2828188680992965e-06, "loss": 1.7781, "step": 30217000 }, { "epoch": 87.47, "learning_rate": 6.2820952204520195e-06, "loss": 1.7895, "step": 30217500 }, { "epoch": 87.47, "learning_rate": 6.281371572804743e-06, "loss": 1.788, "step": 30218000 }, { "epoch": 87.47, "learning_rate": 6.2806479251574665e-06, "loss": 1.7845, "step": 30218500 }, { "epoch": 87.47, "learning_rate": 6.27992427751019e-06, "loss": 1.8075, "step": 30219000 }, { "epoch": 87.47, "learning_rate": 6.279200629862912e-06, "loss": 1.8072, "step": 30219500 }, { "epoch": 87.47, "learning_rate": 6.278476982215637e-06, "loss": 1.7774, "step": 30220000 }, { "epoch": 87.48, "learning_rate": 6.277753334568359e-06, "loss": 1.8201, "step": 30220500 }, { "epoch": 87.48, "learning_rate": 6.277029686921082e-06, "loss": 1.7974, "step": 30221000 }, { "epoch": 87.48, "learning_rate": 6.276306039273805e-06, "loss": 1.7822, "step": 30221500 }, { "epoch": 87.48, "learning_rate": 6.275583838921824e-06, "loss": 1.7863, "step": 30222000 }, { "epoch": 87.48, "learning_rate": 6.274860191274547e-06, "loss": 1.7855, "step": 30222500 }, { "epoch": 87.48, "learning_rate": 6.274136543627269e-06, "loss": 1.7848, "step": 30223000 }, { "epoch": 87.48, "learning_rate": 6.273412895979992e-06, "loss": 1.779, "step": 30223500 }, { "epoch": 87.49, "learning_rate": 6.272689248332716e-06, "loss": 1.8421, "step": 30224000 }, { "epoch": 87.49, "learning_rate": 6.271965600685439e-06, "loss": 1.7868, "step": 30224500 }, { "epoch": 87.49, "learning_rate": 6.2712419530381625e-06, "loss": 1.7889, "step": 30225000 }, { "epoch": 87.49, "learning_rate": 6.270518305390886e-06, "loss": 1.7887, "step": 30225500 }, { "epoch": 87.49, "learning_rate": 6.2697946577436095e-06, "loss": 1.8139, "step": 30226000 }, { "epoch": 87.49, "learning_rate": 6.269072457391627e-06, "loss": 1.7902, "step": 30226500 }, { "epoch": 87.49, "learning_rate": 6.26834880974435e-06, "loss": 1.8232, "step": 30227000 }, { "epoch": 87.5, "learning_rate": 6.267625162097074e-06, "loss": 1.8035, "step": 30227500 }, { "epoch": 87.5, "learning_rate": 6.266901514449797e-06, "loss": 1.8256, "step": 30228000 }, { "epoch": 87.5, "learning_rate": 6.26617786680252e-06, "loss": 1.7876, "step": 30228500 }, { "epoch": 87.5, "learning_rate": 6.265455666450537e-06, "loss": 1.7931, "step": 30229000 }, { "epoch": 87.5, "learning_rate": 6.264732018803261e-06, "loss": 1.7742, "step": 30229500 }, { "epoch": 87.5, "learning_rate": 6.264008371155984e-06, "loss": 1.8072, "step": 30230000 }, { "epoch": 87.5, "learning_rate": 6.263286170804001e-06, "loss": 1.8129, "step": 30230500 }, { "epoch": 87.51, "learning_rate": 6.2625625231567245e-06, "loss": 1.7912, "step": 30231000 }, { "epoch": 87.51, "learning_rate": 6.261838875509448e-06, "loss": 1.7931, "step": 30231500 }, { "epoch": 87.51, "learning_rate": 6.2611152278621715e-06, "loss": 1.8052, "step": 30232000 }, { "epoch": 87.51, "learning_rate": 6.2603915802148946e-06, "loss": 1.7949, "step": 30232500 }, { "epoch": 87.51, "learning_rate": 6.2596679325676185e-06, "loss": 1.7755, "step": 30233000 }, { "epoch": 87.51, "learning_rate": 6.2589442849203416e-06, "loss": 1.8078, "step": 30233500 }, { "epoch": 87.52, "learning_rate": 6.258220637273065e-06, "loss": 1.7919, "step": 30234000 }, { "epoch": 87.52, "learning_rate": 6.257496989625787e-06, "loss": 1.804, "step": 30234500 }, { "epoch": 87.52, "learning_rate": 6.256773341978512e-06, "loss": 1.8148, "step": 30235000 }, { "epoch": 87.52, "learning_rate": 6.256051141626529e-06, "loss": 1.8164, "step": 30235500 }, { "epoch": 87.52, "learning_rate": 6.255328941274546e-06, "loss": 1.7746, "step": 30236000 }, { "epoch": 87.52, "learning_rate": 6.254605293627269e-06, "loss": 1.8127, "step": 30236500 }, { "epoch": 87.52, "learning_rate": 6.253881645979993e-06, "loss": 1.8101, "step": 30237000 }, { "epoch": 87.53, "learning_rate": 6.253157998332716e-06, "loss": 1.8048, "step": 30237500 }, { "epoch": 87.53, "learning_rate": 6.252434350685439e-06, "loss": 1.7994, "step": 30238000 }, { "epoch": 87.53, "learning_rate": 6.2517107030381616e-06, "loss": 1.7999, "step": 30238500 }, { "epoch": 87.53, "learning_rate": 6.250987055390886e-06, "loss": 1.7997, "step": 30239000 }, { "epoch": 87.53, "learning_rate": 6.2502634077436086e-06, "loss": 1.788, "step": 30239500 }, { "epoch": 87.53, "learning_rate": 6.2495397600963325e-06, "loss": 1.8106, "step": 30240000 }, { "epoch": 87.53, "learning_rate": 6.24881755974435e-06, "loss": 1.7983, "step": 30240500 }, { "epoch": 87.54, "learning_rate": 6.248093912097074e-06, "loss": 1.82, "step": 30241000 }, { "epoch": 87.54, "learning_rate": 6.247370264449797e-06, "loss": 1.7744, "step": 30241500 }, { "epoch": 87.54, "learning_rate": 6.24664661680252e-06, "loss": 1.8322, "step": 30242000 }, { "epoch": 87.54, "learning_rate": 6.245922969155243e-06, "loss": 1.7948, "step": 30242500 }, { "epoch": 87.54, "learning_rate": 6.245200768803261e-06, "loss": 1.8033, "step": 30243000 }, { "epoch": 87.54, "learning_rate": 6.244477121155984e-06, "loss": 1.8007, "step": 30243500 }, { "epoch": 87.54, "learning_rate": 6.243753473508707e-06, "loss": 1.8112, "step": 30244000 }, { "epoch": 87.55, "learning_rate": 6.24302982586143e-06, "loss": 1.8124, "step": 30244500 }, { "epoch": 87.55, "learning_rate": 6.242306178214153e-06, "loss": 1.8152, "step": 30245000 }, { "epoch": 87.55, "learning_rate": 6.2415839778621714e-06, "loss": 1.8145, "step": 30245500 }, { "epoch": 87.55, "learning_rate": 6.2408603302148945e-06, "loss": 1.807, "step": 30246000 }, { "epoch": 87.55, "learning_rate": 6.240136682567618e-06, "loss": 1.7901, "step": 30246500 }, { "epoch": 87.55, "learning_rate": 6.2394130349203415e-06, "loss": 1.8013, "step": 30247000 }, { "epoch": 87.55, "learning_rate": 6.238690834568359e-06, "loss": 1.8184, "step": 30247500 }, { "epoch": 87.56, "learning_rate": 6.237967186921082e-06, "loss": 1.7942, "step": 30248000 }, { "epoch": 87.56, "learning_rate": 6.237243539273806e-06, "loss": 1.8087, "step": 30248500 }, { "epoch": 87.56, "learning_rate": 6.236519891626529e-06, "loss": 1.8118, "step": 30249000 }, { "epoch": 87.56, "learning_rate": 6.235796243979252e-06, "loss": 1.8043, "step": 30249500 }, { "epoch": 87.56, "learning_rate": 6.235072596331975e-06, "loss": 1.7781, "step": 30250000 }, { "epoch": 87.56, "learning_rate": 6.234348948684698e-06, "loss": 1.8014, "step": 30250500 }, { "epoch": 87.56, "learning_rate": 6.233625301037421e-06, "loss": 1.7901, "step": 30251000 }, { "epoch": 87.57, "learning_rate": 6.232903100685439e-06, "loss": 1.8068, "step": 30251500 }, { "epoch": 87.57, "learning_rate": 6.232180900333457e-06, "loss": 1.7881, "step": 30252000 }, { "epoch": 87.57, "learning_rate": 6.2314572526861805e-06, "loss": 1.7986, "step": 30252500 }, { "epoch": 87.57, "learning_rate": 6.2307336050389035e-06, "loss": 1.7873, "step": 30253000 }, { "epoch": 87.57, "learning_rate": 6.230011404686922e-06, "loss": 1.787, "step": 30253500 }, { "epoch": 87.57, "learning_rate": 6.229287757039645e-06, "loss": 1.7831, "step": 30254000 }, { "epoch": 87.57, "learning_rate": 6.228564109392368e-06, "loss": 1.8214, "step": 30254500 }, { "epoch": 87.58, "learning_rate": 6.227840461745091e-06, "loss": 1.8054, "step": 30255000 }, { "epoch": 87.58, "learning_rate": 6.227116814097814e-06, "loss": 1.8027, "step": 30255500 }, { "epoch": 87.58, "learning_rate": 6.226393166450537e-06, "loss": 1.7996, "step": 30256000 }, { "epoch": 87.58, "learning_rate": 6.225669518803261e-06, "loss": 1.7939, "step": 30256500 }, { "epoch": 87.58, "learning_rate": 6.224945871155984e-06, "loss": 1.7868, "step": 30257000 }, { "epoch": 87.58, "learning_rate": 6.224223670804002e-06, "loss": 1.8099, "step": 30257500 }, { "epoch": 87.58, "learning_rate": 6.223500023156725e-06, "loss": 1.7935, "step": 30258000 }, { "epoch": 87.59, "learning_rate": 6.222776375509448e-06, "loss": 1.8291, "step": 30258500 }, { "epoch": 87.59, "learning_rate": 6.222052727862171e-06, "loss": 1.7893, "step": 30259000 }, { "epoch": 87.59, "learning_rate": 6.2213290802148945e-06, "loss": 1.8155, "step": 30259500 }, { "epoch": 87.59, "learning_rate": 6.220605432567618e-06, "loss": 1.7982, "step": 30260000 }, { "epoch": 87.59, "learning_rate": 6.219883232215636e-06, "loss": 1.8104, "step": 30260500 }, { "epoch": 87.59, "learning_rate": 6.219159584568359e-06, "loss": 1.791, "step": 30261000 }, { "epoch": 87.59, "learning_rate": 6.218435936921082e-06, "loss": 1.8047, "step": 30261500 }, { "epoch": 87.6, "learning_rate": 6.217712289273806e-06, "loss": 1.8027, "step": 30262000 }, { "epoch": 87.6, "learning_rate": 6.216990088921823e-06, "loss": 1.8204, "step": 30262500 }, { "epoch": 87.6, "learning_rate": 6.216266441274547e-06, "loss": 1.7988, "step": 30263000 }, { "epoch": 87.6, "learning_rate": 6.215542793627269e-06, "loss": 1.7696, "step": 30263500 }, { "epoch": 87.6, "learning_rate": 6.214819145979993e-06, "loss": 1.8257, "step": 30264000 }, { "epoch": 87.6, "learning_rate": 6.214095498332716e-06, "loss": 1.8288, "step": 30264500 }, { "epoch": 87.6, "learning_rate": 6.213371850685439e-06, "loss": 1.7992, "step": 30265000 }, { "epoch": 87.61, "learning_rate": 6.212648203038163e-06, "loss": 1.8051, "step": 30265500 }, { "epoch": 87.61, "learning_rate": 6.211924555390885e-06, "loss": 1.8011, "step": 30266000 }, { "epoch": 87.61, "learning_rate": 6.2112023550389035e-06, "loss": 1.7942, "step": 30266500 }, { "epoch": 87.61, "learning_rate": 6.2104787073916266e-06, "loss": 1.7952, "step": 30267000 }, { "epoch": 87.61, "learning_rate": 6.2097550597443505e-06, "loss": 1.8084, "step": 30267500 }, { "epoch": 87.61, "learning_rate": 6.209031412097073e-06, "loss": 1.8111, "step": 30268000 }, { "epoch": 87.61, "learning_rate": 6.208307764449797e-06, "loss": 1.8183, "step": 30268500 }, { "epoch": 87.62, "learning_rate": 6.20758411680252e-06, "loss": 1.8003, "step": 30269000 }, { "epoch": 87.62, "learning_rate": 6.206860469155243e-06, "loss": 1.7824, "step": 30269500 }, { "epoch": 87.62, "learning_rate": 6.206136821507967e-06, "loss": 1.8481, "step": 30270000 }, { "epoch": 87.62, "learning_rate": 6.205413173860689e-06, "loss": 1.8196, "step": 30270500 }, { "epoch": 87.62, "learning_rate": 6.204690973508707e-06, "loss": 1.7942, "step": 30271000 }, { "epoch": 87.62, "learning_rate": 6.20396732586143e-06, "loss": 1.8084, "step": 30271500 }, { "epoch": 87.63, "learning_rate": 6.203243678214154e-06, "loss": 1.792, "step": 30272000 }, { "epoch": 87.63, "learning_rate": 6.202520030566876e-06, "loss": 1.8108, "step": 30272500 }, { "epoch": 87.63, "learning_rate": 6.201797830214895e-06, "loss": 1.8326, "step": 30273000 }, { "epoch": 87.63, "learning_rate": 6.2010741825676175e-06, "loss": 1.7913, "step": 30273500 }, { "epoch": 87.63, "learning_rate": 6.200350534920341e-06, "loss": 1.8285, "step": 30274000 }, { "epoch": 87.63, "learning_rate": 6.1996268872730645e-06, "loss": 1.7915, "step": 30274500 }, { "epoch": 87.63, "learning_rate": 6.1989032396257876e-06, "loss": 1.8038, "step": 30275000 }, { "epoch": 87.64, "learning_rate": 6.1981795919785115e-06, "loss": 1.8088, "step": 30275500 }, { "epoch": 87.64, "learning_rate": 6.197455944331234e-06, "loss": 1.825, "step": 30276000 }, { "epoch": 87.64, "learning_rate": 6.196732296683958e-06, "loss": 1.8138, "step": 30276500 }, { "epoch": 87.64, "learning_rate": 6.196008649036681e-06, "loss": 1.7977, "step": 30277000 }, { "epoch": 87.64, "learning_rate": 6.195285001389404e-06, "loss": 1.7981, "step": 30277500 }, { "epoch": 87.64, "learning_rate": 6.194561353742127e-06, "loss": 1.8187, "step": 30278000 }, { "epoch": 87.64, "learning_rate": 6.19383770609485e-06, "loss": 1.8151, "step": 30278500 }, { "epoch": 87.65, "learning_rate": 6.193115505742868e-06, "loss": 1.7886, "step": 30279000 }, { "epoch": 87.65, "learning_rate": 6.192393305390886e-06, "loss": 1.8159, "step": 30279500 }, { "epoch": 87.65, "learning_rate": 6.191669657743608e-06, "loss": 1.7888, "step": 30280000 }, { "epoch": 87.65, "learning_rate": 6.190946010096332e-06, "loss": 1.7859, "step": 30280500 }, { "epoch": 87.65, "learning_rate": 6.190222362449055e-06, "loss": 1.813, "step": 30281000 }, { "epoch": 87.65, "learning_rate": 6.1894987148017785e-06, "loss": 1.8152, "step": 30281500 }, { "epoch": 87.65, "learning_rate": 6.1887750671545024e-06, "loss": 1.8192, "step": 30282000 }, { "epoch": 87.66, "learning_rate": 6.18805286680252e-06, "loss": 1.8089, "step": 30282500 }, { "epoch": 87.66, "learning_rate": 6.187329219155244e-06, "loss": 1.8024, "step": 30283000 }, { "epoch": 87.66, "learning_rate": 6.186605571507966e-06, "loss": 1.8163, "step": 30283500 }, { "epoch": 87.66, "learning_rate": 6.18588192386069e-06, "loss": 1.8286, "step": 30284000 }, { "epoch": 87.66, "learning_rate": 6.185159723508707e-06, "loss": 1.8119, "step": 30284500 }, { "epoch": 87.66, "learning_rate": 6.184436075861431e-06, "loss": 1.8395, "step": 30285000 }, { "epoch": 87.66, "learning_rate": 6.183712428214153e-06, "loss": 1.8102, "step": 30285500 }, { "epoch": 87.67, "learning_rate": 6.182988780566877e-06, "loss": 1.7888, "step": 30286000 }, { "epoch": 87.67, "learning_rate": 6.1822651329196e-06, "loss": 1.7864, "step": 30286500 }, { "epoch": 87.67, "learning_rate": 6.181541485272323e-06, "loss": 1.8107, "step": 30287000 }, { "epoch": 87.67, "learning_rate": 6.180817837625046e-06, "loss": 1.8415, "step": 30287500 }, { "epoch": 87.67, "learning_rate": 6.1800970845683595e-06, "loss": 1.8061, "step": 30288000 }, { "epoch": 87.67, "learning_rate": 6.179373436921082e-06, "loss": 1.8241, "step": 30288500 }, { "epoch": 87.67, "learning_rate": 6.178649789273806e-06, "loss": 1.7866, "step": 30289000 }, { "epoch": 87.68, "learning_rate": 6.177926141626529e-06, "loss": 1.8047, "step": 30289500 }, { "epoch": 87.68, "learning_rate": 6.177202493979252e-06, "loss": 1.781, "step": 30290000 }, { "epoch": 87.68, "learning_rate": 6.176478846331975e-06, "loss": 1.7939, "step": 30290500 }, { "epoch": 87.68, "learning_rate": 6.175755198684698e-06, "loss": 1.8146, "step": 30291000 }, { "epoch": 87.68, "learning_rate": 6.175031551037422e-06, "loss": 1.8085, "step": 30291500 }, { "epoch": 87.68, "learning_rate": 6.174307903390145e-06, "loss": 1.7927, "step": 30292000 }, { "epoch": 87.68, "learning_rate": 6.173585703038163e-06, "loss": 1.7804, "step": 30292500 }, { "epoch": 87.69, "learning_rate": 6.172862055390886e-06, "loss": 1.8121, "step": 30293000 }, { "epoch": 87.69, "learning_rate": 6.172138407743609e-06, "loss": 1.778, "step": 30293500 }, { "epoch": 87.69, "learning_rate": 6.1714162073916265e-06, "loss": 1.8211, "step": 30294000 }, { "epoch": 87.69, "learning_rate": 6.17069255974435e-06, "loss": 1.8262, "step": 30294500 }, { "epoch": 87.69, "learning_rate": 6.1699689120970735e-06, "loss": 1.81, "step": 30295000 }, { "epoch": 87.69, "learning_rate": 6.1692452644497965e-06, "loss": 1.8006, "step": 30295500 }, { "epoch": 87.69, "learning_rate": 6.168523064097814e-06, "loss": 1.8077, "step": 30296000 }, { "epoch": 87.7, "learning_rate": 6.167799416450538e-06, "loss": 1.8484, "step": 30296500 }, { "epoch": 87.7, "learning_rate": 6.167075768803261e-06, "loss": 1.8034, "step": 30297000 }, { "epoch": 87.7, "learning_rate": 6.166352121155984e-06, "loss": 1.8062, "step": 30297500 }, { "epoch": 87.7, "learning_rate": 6.165628473508707e-06, "loss": 1.8073, "step": 30298000 }, { "epoch": 87.7, "learning_rate": 6.16490482586143e-06, "loss": 1.8007, "step": 30298500 }, { "epoch": 87.7, "learning_rate": 6.164181178214154e-06, "loss": 1.7938, "step": 30299000 }, { "epoch": 87.7, "learning_rate": 6.163457530566877e-06, "loss": 1.7881, "step": 30299500 }, { "epoch": 87.71, "learning_rate": 6.1627338829196e-06, "loss": 1.7954, "step": 30300000 }, { "epoch": 87.71, "learning_rate": 6.162011682567618e-06, "loss": 1.7975, "step": 30300500 }, { "epoch": 87.71, "learning_rate": 6.161288034920341e-06, "loss": 1.7792, "step": 30301000 }, { "epoch": 87.71, "learning_rate": 6.160564387273064e-06, "loss": 1.8014, "step": 30301500 }, { "epoch": 87.71, "learning_rate": 6.1598407396257875e-06, "loss": 1.7969, "step": 30302000 }, { "epoch": 87.71, "learning_rate": 6.1591185392738056e-06, "loss": 1.8206, "step": 30302500 }, { "epoch": 87.71, "learning_rate": 6.158394891626529e-06, "loss": 1.8145, "step": 30303000 }, { "epoch": 87.72, "learning_rate": 6.157671243979252e-06, "loss": 1.796, "step": 30303500 }, { "epoch": 87.72, "learning_rate": 6.156947596331975e-06, "loss": 1.7824, "step": 30304000 }, { "epoch": 87.72, "learning_rate": 6.156225395979993e-06, "loss": 1.7885, "step": 30304500 }, { "epoch": 87.72, "learning_rate": 6.155501748332716e-06, "loss": 1.8066, "step": 30305000 }, { "epoch": 87.72, "learning_rate": 6.154778100685439e-06, "loss": 1.8116, "step": 30305500 }, { "epoch": 87.72, "learning_rate": 6.154054453038163e-06, "loss": 1.7917, "step": 30306000 }, { "epoch": 87.72, "learning_rate": 6.153330805390886e-06, "loss": 1.8043, "step": 30306500 }, { "epoch": 87.73, "learning_rate": 6.152608605038903e-06, "loss": 1.8262, "step": 30307000 }, { "epoch": 87.73, "learning_rate": 6.151884957391626e-06, "loss": 1.8376, "step": 30307500 }, { "epoch": 87.73, "learning_rate": 6.15116130974435e-06, "loss": 1.8143, "step": 30308000 }, { "epoch": 87.73, "learning_rate": 6.150437662097073e-06, "loss": 1.83, "step": 30308500 }, { "epoch": 87.73, "learning_rate": 6.1497154617450915e-06, "loss": 1.8029, "step": 30309000 }, { "epoch": 87.73, "learning_rate": 6.148991814097815e-06, "loss": 1.8025, "step": 30309500 }, { "epoch": 87.74, "learning_rate": 6.148268166450538e-06, "loss": 1.8054, "step": 30310000 }, { "epoch": 87.74, "learning_rate": 6.147544518803261e-06, "loss": 1.789, "step": 30310500 }, { "epoch": 87.74, "learning_rate": 6.146820871155984e-06, "loss": 1.8201, "step": 30311000 }, { "epoch": 87.74, "learning_rate": 6.146098670804002e-06, "loss": 1.8292, "step": 30311500 }, { "epoch": 87.74, "learning_rate": 6.145375023156725e-06, "loss": 1.7879, "step": 30312000 }, { "epoch": 87.74, "learning_rate": 6.144651375509448e-06, "loss": 1.8368, "step": 30312500 }, { "epoch": 87.74, "learning_rate": 6.14393062245276e-06, "loss": 1.7833, "step": 30313000 }, { "epoch": 87.75, "learning_rate": 6.1432069748054834e-06, "loss": 1.8123, "step": 30313500 }, { "epoch": 87.75, "learning_rate": 6.142483327158207e-06, "loss": 1.7887, "step": 30314000 }, { "epoch": 87.75, "learning_rate": 6.1417596795109304e-06, "loss": 1.7951, "step": 30314500 }, { "epoch": 87.75, "learning_rate": 6.1410360318636535e-06, "loss": 1.801, "step": 30315000 }, { "epoch": 87.75, "learning_rate": 6.140312384216377e-06, "loss": 1.7933, "step": 30315500 }, { "epoch": 87.75, "learning_rate": 6.139590183864395e-06, "loss": 1.7822, "step": 30316000 }, { "epoch": 87.75, "learning_rate": 6.138866536217118e-06, "loss": 1.8122, "step": 30316500 }, { "epoch": 87.76, "learning_rate": 6.138142888569841e-06, "loss": 1.8093, "step": 30317000 }, { "epoch": 87.76, "learning_rate": 6.137419240922564e-06, "loss": 1.7799, "step": 30317500 }, { "epoch": 87.76, "learning_rate": 6.136695593275287e-06, "loss": 1.8101, "step": 30318000 }, { "epoch": 87.76, "learning_rate": 6.135971945628011e-06, "loss": 1.8023, "step": 30318500 }, { "epoch": 87.76, "learning_rate": 6.135248297980734e-06, "loss": 1.8235, "step": 30319000 }, { "epoch": 87.76, "learning_rate": 6.134524650333457e-06, "loss": 1.7778, "step": 30319500 }, { "epoch": 87.76, "learning_rate": 6.133802449981474e-06, "loss": 1.7874, "step": 30320000 }, { "epoch": 87.77, "learning_rate": 6.1330802496294925e-06, "loss": 1.8329, "step": 30320500 }, { "epoch": 87.77, "learning_rate": 6.1323566019822155e-06, "loss": 1.8202, "step": 30321000 }, { "epoch": 87.77, "learning_rate": 6.1316329543349395e-06, "loss": 1.809, "step": 30321500 }, { "epoch": 87.77, "learning_rate": 6.1309093066876625e-06, "loss": 1.8252, "step": 30322000 }, { "epoch": 87.77, "learning_rate": 6.130185659040386e-06, "loss": 1.8245, "step": 30322500 }, { "epoch": 87.77, "learning_rate": 6.129462011393109e-06, "loss": 1.7971, "step": 30323000 }, { "epoch": 87.77, "learning_rate": 6.128738363745832e-06, "loss": 1.8086, "step": 30323500 }, { "epoch": 87.78, "learning_rate": 6.128014716098556e-06, "loss": 1.8166, "step": 30324000 }, { "epoch": 87.78, "learning_rate": 6.127292515746573e-06, "loss": 1.8172, "step": 30324500 }, { "epoch": 87.78, "learning_rate": 6.126568868099297e-06, "loss": 1.7865, "step": 30325000 }, { "epoch": 87.78, "learning_rate": 6.125845220452019e-06, "loss": 1.7965, "step": 30325500 }, { "epoch": 87.78, "learning_rate": 6.125121572804743e-06, "loss": 1.7606, "step": 30326000 }, { "epoch": 87.78, "learning_rate": 6.124397925157465e-06, "loss": 1.82, "step": 30326500 }, { "epoch": 87.78, "learning_rate": 6.123674277510189e-06, "loss": 1.8018, "step": 30327000 }, { "epoch": 87.79, "learning_rate": 6.122950629862913e-06, "loss": 1.7833, "step": 30327500 }, { "epoch": 87.79, "learning_rate": 6.122226982215635e-06, "loss": 1.7893, "step": 30328000 }, { "epoch": 87.79, "learning_rate": 6.1215047818636535e-06, "loss": 1.8163, "step": 30328500 }, { "epoch": 87.79, "learning_rate": 6.1207811342163766e-06, "loss": 1.8034, "step": 30329000 }, { "epoch": 87.79, "learning_rate": 6.1200574865691005e-06, "loss": 1.7886, "step": 30329500 }, { "epoch": 87.79, "learning_rate": 6.119333838921823e-06, "loss": 1.8242, "step": 30330000 }, { "epoch": 87.79, "learning_rate": 6.118610191274547e-06, "loss": 1.808, "step": 30330500 }, { "epoch": 87.8, "learning_rate": 6.11788654362727e-06, "loss": 1.8018, "step": 30331000 }, { "epoch": 87.8, "learning_rate": 6.117162895979993e-06, "loss": 1.8008, "step": 30331500 }, { "epoch": 87.8, "learning_rate": 6.116439248332717e-06, "loss": 1.8218, "step": 30332000 }, { "epoch": 87.8, "learning_rate": 6.115715600685439e-06, "loss": 1.8011, "step": 30332500 }, { "epoch": 87.8, "learning_rate": 6.114991953038163e-06, "loss": 1.8045, "step": 30333000 }, { "epoch": 87.8, "learning_rate": 6.11426975268618e-06, "loss": 1.8117, "step": 30333500 }, { "epoch": 87.8, "learning_rate": 6.113546105038904e-06, "loss": 1.7937, "step": 30334000 }, { "epoch": 87.81, "learning_rate": 6.112822457391626e-06, "loss": 1.7971, "step": 30334500 }, { "epoch": 87.81, "learning_rate": 6.11209880974435e-06, "loss": 1.822, "step": 30335000 }, { "epoch": 87.81, "learning_rate": 6.111375162097073e-06, "loss": 1.8005, "step": 30335500 }, { "epoch": 87.81, "learning_rate": 6.110651514449796e-06, "loss": 1.7807, "step": 30336000 }, { "epoch": 87.81, "learning_rate": 6.10992786680252e-06, "loss": 1.7935, "step": 30336500 }, { "epoch": 87.81, "learning_rate": 6.1092056664505376e-06, "loss": 1.8101, "step": 30337000 }, { "epoch": 87.81, "learning_rate": 6.108482018803261e-06, "loss": 1.8069, "step": 30337500 }, { "epoch": 87.82, "learning_rate": 6.107758371155984e-06, "loss": 1.8005, "step": 30338000 }, { "epoch": 87.82, "learning_rate": 6.107034723508708e-06, "loss": 1.7802, "step": 30338500 }, { "epoch": 87.82, "learning_rate": 6.10631107586143e-06, "loss": 1.7936, "step": 30339000 }, { "epoch": 87.82, "learning_rate": 6.105587428214154e-06, "loss": 1.8109, "step": 30339500 }, { "epoch": 87.82, "learning_rate": 6.104865227862171e-06, "loss": 1.8208, "step": 30340000 }, { "epoch": 87.82, "learning_rate": 6.104141580214895e-06, "loss": 1.808, "step": 30340500 }, { "epoch": 87.82, "learning_rate": 6.103417932567618e-06, "loss": 1.7879, "step": 30341000 }, { "epoch": 87.83, "learning_rate": 6.102695732215636e-06, "loss": 1.8012, "step": 30341500 }, { "epoch": 87.83, "learning_rate": 6.101972084568358e-06, "loss": 1.7942, "step": 30342000 }, { "epoch": 87.83, "learning_rate": 6.101248436921082e-06, "loss": 1.7853, "step": 30342500 }, { "epoch": 87.83, "learning_rate": 6.100524789273805e-06, "loss": 1.8081, "step": 30343000 }, { "epoch": 87.83, "learning_rate": 6.0998011416265285e-06, "loss": 1.7937, "step": 30343500 }, { "epoch": 87.83, "learning_rate": 6.0990774939792524e-06, "loss": 1.8354, "step": 30344000 }, { "epoch": 87.83, "learning_rate": 6.098353846331975e-06, "loss": 1.8411, "step": 30344500 }, { "epoch": 87.84, "learning_rate": 6.097630198684699e-06, "loss": 1.814, "step": 30345000 }, { "epoch": 87.84, "learning_rate": 6.096907998332716e-06, "loss": 1.8185, "step": 30345500 }, { "epoch": 87.84, "learning_rate": 6.09618435068544e-06, "loss": 1.824, "step": 30346000 }, { "epoch": 87.84, "learning_rate": 6.095460703038163e-06, "loss": 1.81, "step": 30346500 }, { "epoch": 87.84, "learning_rate": 6.09473850268618e-06, "loss": 1.7795, "step": 30347000 }, { "epoch": 87.84, "learning_rate": 6.094014855038903e-06, "loss": 1.8017, "step": 30347500 }, { "epoch": 87.85, "learning_rate": 6.093291207391627e-06, "loss": 1.8047, "step": 30348000 }, { "epoch": 87.85, "learning_rate": 6.09256755974435e-06, "loss": 1.7845, "step": 30348500 }, { "epoch": 87.85, "learning_rate": 6.091843912097073e-06, "loss": 1.783, "step": 30349000 }, { "epoch": 87.85, "learning_rate": 6.091120264449796e-06, "loss": 1.8551, "step": 30349500 }, { "epoch": 87.85, "learning_rate": 6.090396616802519e-06, "loss": 1.8221, "step": 30350000 }, { "epoch": 87.85, "learning_rate": 6.089672969155243e-06, "loss": 1.8116, "step": 30350500 }, { "epoch": 87.85, "learning_rate": 6.088950768803261e-06, "loss": 1.805, "step": 30351000 }, { "epoch": 87.86, "learning_rate": 6.088227121155984e-06, "loss": 1.8205, "step": 30351500 }, { "epoch": 87.86, "learning_rate": 6.087503473508708e-06, "loss": 1.7777, "step": 30352000 }, { "epoch": 87.86, "learning_rate": 6.086779825861431e-06, "loss": 1.8189, "step": 30352500 }, { "epoch": 87.86, "learning_rate": 6.086056178214154e-06, "loss": 1.8367, "step": 30353000 }, { "epoch": 87.86, "learning_rate": 6.085332530566877e-06, "loss": 1.7773, "step": 30353500 }, { "epoch": 87.86, "learning_rate": 6.0846088829196e-06, "loss": 1.7906, "step": 30354000 }, { "epoch": 87.86, "learning_rate": 6.083885235272323e-06, "loss": 1.8052, "step": 30354500 }, { "epoch": 87.87, "learning_rate": 6.083163034920341e-06, "loss": 1.7921, "step": 30355000 }, { "epoch": 87.87, "learning_rate": 6.082440834568359e-06, "loss": 1.801, "step": 30355500 }, { "epoch": 87.87, "learning_rate": 6.081717186921082e-06, "loss": 1.8096, "step": 30356000 }, { "epoch": 87.87, "learning_rate": 6.080993539273805e-06, "loss": 1.7988, "step": 30356500 }, { "epoch": 87.87, "learning_rate": 6.0802698916265284e-06, "loss": 1.8225, "step": 30357000 }, { "epoch": 87.87, "learning_rate": 6.0795476912745465e-06, "loss": 1.7895, "step": 30357500 }, { "epoch": 87.87, "learning_rate": 6.07882404362727e-06, "loss": 1.7848, "step": 30358000 }, { "epoch": 87.88, "learning_rate": 6.078100395979993e-06, "loss": 1.798, "step": 30358500 }, { "epoch": 87.88, "learning_rate": 6.077376748332716e-06, "loss": 1.7983, "step": 30359000 }, { "epoch": 87.88, "learning_rate": 6.07665310068544e-06, "loss": 1.8082, "step": 30359500 }, { "epoch": 87.88, "learning_rate": 6.075930900333457e-06, "loss": 1.8115, "step": 30360000 }, { "epoch": 87.88, "learning_rate": 6.07520725268618e-06, "loss": 1.8223, "step": 30360500 }, { "epoch": 87.88, "learning_rate": 6.074483605038903e-06, "loss": 1.7817, "step": 30361000 }, { "epoch": 87.88, "learning_rate": 6.073759957391627e-06, "loss": 1.799, "step": 30361500 }, { "epoch": 87.89, "learning_rate": 6.07303630974435e-06, "loss": 1.8017, "step": 30362000 }, { "epoch": 87.89, "learning_rate": 6.072312662097073e-06, "loss": 1.787, "step": 30362500 }, { "epoch": 87.89, "learning_rate": 6.071590461745091e-06, "loss": 1.8127, "step": 30363000 }, { "epoch": 87.89, "learning_rate": 6.070866814097814e-06, "loss": 1.8122, "step": 30363500 }, { "epoch": 87.89, "learning_rate": 6.0701431664505375e-06, "loss": 1.7999, "step": 30364000 }, { "epoch": 87.89, "learning_rate": 6.0694195188032605e-06, "loss": 1.771, "step": 30364500 }, { "epoch": 87.89, "learning_rate": 6.0686958711559845e-06, "loss": 1.807, "step": 30365000 }, { "epoch": 87.9, "learning_rate": 6.067973670804002e-06, "loss": 1.8332, "step": 30365500 }, { "epoch": 87.9, "learning_rate": 6.067250023156725e-06, "loss": 1.813, "step": 30366000 }, { "epoch": 87.9, "learning_rate": 6.066526375509448e-06, "loss": 1.79, "step": 30366500 }, { "epoch": 87.9, "learning_rate": 6.065802727862172e-06, "loss": 1.7811, "step": 30367000 }, { "epoch": 87.9, "learning_rate": 6.065080527510189e-06, "loss": 1.7942, "step": 30367500 }, { "epoch": 87.9, "learning_rate": 6.064356879862913e-06, "loss": 1.7968, "step": 30368000 }, { "epoch": 87.9, "learning_rate": 6.06363467951093e-06, "loss": 1.8252, "step": 30368500 }, { "epoch": 87.91, "learning_rate": 6.062911031863653e-06, "loss": 1.8047, "step": 30369000 }, { "epoch": 87.91, "learning_rate": 6.062187384216376e-06, "loss": 1.8033, "step": 30369500 }, { "epoch": 87.91, "learning_rate": 6.0614637365691e-06, "loss": 1.7802, "step": 30370000 }, { "epoch": 87.91, "learning_rate": 6.0607400889218226e-06, "loss": 1.8013, "step": 30370500 }, { "epoch": 87.91, "learning_rate": 6.0600164412745465e-06, "loss": 1.813, "step": 30371000 }, { "epoch": 87.91, "learning_rate": 6.0592927936272696e-06, "loss": 1.8179, "step": 30371500 }, { "epoch": 87.91, "learning_rate": 6.058570593275288e-06, "loss": 1.7924, "step": 30372000 }, { "epoch": 87.92, "learning_rate": 6.057846945628011e-06, "loss": 1.7919, "step": 30372500 }, { "epoch": 87.92, "learning_rate": 6.057123297980734e-06, "loss": 1.8113, "step": 30373000 }, { "epoch": 87.92, "learning_rate": 6.056399650333457e-06, "loss": 1.7964, "step": 30373500 }, { "epoch": 87.92, "learning_rate": 6.05567600268618e-06, "loss": 1.7899, "step": 30374000 }, { "epoch": 87.92, "learning_rate": 6.054952355038904e-06, "loss": 1.7886, "step": 30374500 }, { "epoch": 87.92, "learning_rate": 6.054230154686921e-06, "loss": 1.7847, "step": 30375000 }, { "epoch": 87.92, "learning_rate": 6.053506507039645e-06, "loss": 1.8198, "step": 30375500 }, { "epoch": 87.93, "learning_rate": 6.052782859392367e-06, "loss": 1.7856, "step": 30376000 }, { "epoch": 87.93, "learning_rate": 6.052059211745091e-06, "loss": 1.8375, "step": 30376500 }, { "epoch": 87.93, "learning_rate": 6.051335564097814e-06, "loss": 1.8027, "step": 30377000 }, { "epoch": 87.93, "learning_rate": 6.0506133637458324e-06, "loss": 1.8211, "step": 30377500 }, { "epoch": 87.93, "learning_rate": 6.049889716098555e-06, "loss": 1.8212, "step": 30378000 }, { "epoch": 87.93, "learning_rate": 6.049166068451279e-06, "loss": 1.8265, "step": 30378500 }, { "epoch": 87.93, "learning_rate": 6.048442420804002e-06, "loss": 1.814, "step": 30379000 }, { "epoch": 87.94, "learning_rate": 6.047718773156725e-06, "loss": 1.8134, "step": 30379500 }, { "epoch": 87.94, "learning_rate": 6.046995125509449e-06, "loss": 1.8094, "step": 30380000 }, { "epoch": 87.94, "learning_rate": 6.046271477862171e-06, "loss": 1.8136, "step": 30380500 }, { "epoch": 87.94, "learning_rate": 6.045547830214895e-06, "loss": 1.8045, "step": 30381000 }, { "epoch": 87.94, "learning_rate": 6.044824182567618e-06, "loss": 1.788, "step": 30381500 }, { "epoch": 87.94, "learning_rate": 6.044101982215636e-06, "loss": 1.8157, "step": 30382000 }, { "epoch": 87.94, "learning_rate": 6.043378334568358e-06, "loss": 1.7997, "step": 30382500 }, { "epoch": 87.95, "learning_rate": 6.042654686921082e-06, "loss": 1.7934, "step": 30383000 }, { "epoch": 87.95, "learning_rate": 6.041931039273806e-06, "loss": 1.8065, "step": 30383500 }, { "epoch": 87.95, "learning_rate": 6.041208838921823e-06, "loss": 1.7962, "step": 30384000 }, { "epoch": 87.95, "learning_rate": 6.0404851912745464e-06, "loss": 1.7836, "step": 30384500 }, { "epoch": 87.95, "learning_rate": 6.0397615436272695e-06, "loss": 1.8014, "step": 30385000 }, { "epoch": 87.95, "learning_rate": 6.0390378959799934e-06, "loss": 1.8538, "step": 30385500 }, { "epoch": 87.96, "learning_rate": 6.038314248332716e-06, "loss": 1.8265, "step": 30386000 }, { "epoch": 87.96, "learning_rate": 6.037592047980735e-06, "loss": 1.8033, "step": 30386500 }, { "epoch": 87.96, "learning_rate": 6.036868400333457e-06, "loss": 1.8128, "step": 30387000 }, { "epoch": 87.96, "learning_rate": 6.036144752686181e-06, "loss": 1.7938, "step": 30387500 }, { "epoch": 87.96, "learning_rate": 6.035421105038903e-06, "loss": 1.7861, "step": 30388000 }, { "epoch": 87.96, "learning_rate": 6.034697457391627e-06, "loss": 1.801, "step": 30388500 }, { "epoch": 87.96, "learning_rate": 6.033975257039644e-06, "loss": 1.8133, "step": 30389000 }, { "epoch": 87.97, "learning_rate": 6.033251609392368e-06, "loss": 1.7965, "step": 30389500 }, { "epoch": 87.97, "learning_rate": 6.032529409040385e-06, "loss": 1.7968, "step": 30390000 }, { "epoch": 87.97, "learning_rate": 6.031805761393109e-06, "loss": 1.8012, "step": 30390500 }, { "epoch": 87.97, "learning_rate": 6.0310821137458315e-06, "loss": 1.7915, "step": 30391000 }, { "epoch": 87.97, "learning_rate": 6.0303584660985555e-06, "loss": 1.8328, "step": 30391500 }, { "epoch": 87.97, "learning_rate": 6.0296348184512785e-06, "loss": 1.7838, "step": 30392000 }, { "epoch": 87.97, "learning_rate": 6.028912618099297e-06, "loss": 1.8231, "step": 30392500 }, { "epoch": 87.98, "learning_rate": 6.02818897045202e-06, "loss": 1.808, "step": 30393000 }, { "epoch": 87.98, "learning_rate": 6.027465322804743e-06, "loss": 1.7991, "step": 30393500 }, { "epoch": 87.98, "learning_rate": 6.026741675157466e-06, "loss": 1.7757, "step": 30394000 }, { "epoch": 87.98, "learning_rate": 6.026018027510189e-06, "loss": 1.7893, "step": 30394500 }, { "epoch": 87.98, "learning_rate": 6.025294379862913e-06, "loss": 1.8197, "step": 30395000 }, { "epoch": 87.98, "learning_rate": 6.024570732215636e-06, "loss": 1.8207, "step": 30395500 }, { "epoch": 87.98, "learning_rate": 6.023847084568359e-06, "loss": 1.8053, "step": 30396000 }, { "epoch": 87.99, "learning_rate": 6.023123436921082e-06, "loss": 1.8117, "step": 30396500 }, { "epoch": 87.99, "learning_rate": 6.0224012365691e-06, "loss": 1.8445, "step": 30397000 }, { "epoch": 87.99, "learning_rate": 6.021677588921823e-06, "loss": 1.8305, "step": 30397500 }, { "epoch": 87.99, "learning_rate": 6.020953941274546e-06, "loss": 1.8192, "step": 30398000 }, { "epoch": 87.99, "learning_rate": 6.0202302936272695e-06, "loss": 1.7993, "step": 30398500 }, { "epoch": 87.99, "learning_rate": 6.0195066459799925e-06, "loss": 1.8001, "step": 30399000 }, { "epoch": 87.99, "learning_rate": 6.018784445628011e-06, "loss": 1.8175, "step": 30399500 }, { "epoch": 88.0, "learning_rate": 6.018060797980734e-06, "loss": 1.8197, "step": 30400000 }, { "epoch": 88.0, "learning_rate": 6.017337150333458e-06, "loss": 1.8015, "step": 30400500 }, { "epoch": 88.0, "learning_rate": 6.01661350268618e-06, "loss": 1.8118, "step": 30401000 }, { "epoch": 88.0, "learning_rate": 6.015889855038904e-06, "loss": 1.8174, "step": 30401500 }, { "epoch": 88.0, "eval_accuracy": 0.6895304195333495, "eval_accuracy_mlm": 0.6585697473221408, "eval_accuracy_nsp": 0.8556759382748301, "eval_loss": 2.19228458404541, "eval_runtime": 331.5315, "eval_samples_per_second": 1316.273, "eval_steps_per_second": 54.845, "step": 30401536 }, { "epoch": 88.0, "learning_rate": 6.015166207391627e-06, "loss": 1.7828, "step": 30402000 }, { "epoch": 88.0, "learning_rate": 6.01444255974435e-06, "loss": 1.8051, "step": 30402500 }, { "epoch": 88.0, "learning_rate": 6.013718912097073e-06, "loss": 1.7942, "step": 30403000 }, { "epoch": 88.01, "learning_rate": 6.012996711745091e-06, "loss": 1.7773, "step": 30403500 }, { "epoch": 88.01, "learning_rate": 6.012273064097814e-06, "loss": 1.8229, "step": 30404000 }, { "epoch": 88.01, "learning_rate": 6.011550863745832e-06, "loss": 1.7776, "step": 30404500 }, { "epoch": 88.01, "learning_rate": 6.010827216098555e-06, "loss": 1.7976, "step": 30405000 }, { "epoch": 88.01, "learning_rate": 6.0101035684512785e-06, "loss": 1.7747, "step": 30405500 }, { "epoch": 88.01, "learning_rate": 6.0093799208040016e-06, "loss": 1.7868, "step": 30406000 }, { "epoch": 88.01, "learning_rate": 6.008656273156725e-06, "loss": 1.814, "step": 30406500 }, { "epoch": 88.02, "learning_rate": 6.007932625509449e-06, "loss": 1.8139, "step": 30407000 }, { "epoch": 88.02, "learning_rate": 6.007208977862172e-06, "loss": 1.8089, "step": 30407500 }, { "epoch": 88.02, "learning_rate": 6.006485330214895e-06, "loss": 1.784, "step": 30408000 }, { "epoch": 88.02, "learning_rate": 6.005761682567618e-06, "loss": 1.8031, "step": 30408500 }, { "epoch": 88.02, "learning_rate": 6.005039482215636e-06, "loss": 1.7948, "step": 30409000 }, { "epoch": 88.02, "learning_rate": 6.004315834568359e-06, "loss": 1.8045, "step": 30409500 }, { "epoch": 88.02, "learning_rate": 6.003592186921082e-06, "loss": 1.8102, "step": 30410000 }, { "epoch": 88.03, "learning_rate": 6.002868539273805e-06, "loss": 1.8069, "step": 30410500 }, { "epoch": 88.03, "learning_rate": 6.002144891626529e-06, "loss": 1.8042, "step": 30411000 }, { "epoch": 88.03, "learning_rate": 6.001422691274546e-06, "loss": 1.7965, "step": 30411500 }, { "epoch": 88.03, "learning_rate": 6.000699043627269e-06, "loss": 1.7951, "step": 30412000 }, { "epoch": 88.03, "learning_rate": 5.9999753959799925e-06, "loss": 1.8128, "step": 30412500 }, { "epoch": 88.03, "learning_rate": 5.9992517483327164e-06, "loss": 1.7922, "step": 30413000 }, { "epoch": 88.03, "learning_rate": 5.998529547980734e-06, "loss": 1.7716, "step": 30413500 }, { "epoch": 88.04, "learning_rate": 5.997805900333458e-06, "loss": 1.7963, "step": 30414000 }, { "epoch": 88.04, "learning_rate": 5.99708225268618e-06, "loss": 1.7643, "step": 30414500 }, { "epoch": 88.04, "learning_rate": 5.996358605038904e-06, "loss": 1.7882, "step": 30415000 }, { "epoch": 88.04, "learning_rate": 5.995634957391627e-06, "loss": 1.8161, "step": 30415500 }, { "epoch": 88.04, "learning_rate": 5.99491130974435e-06, "loss": 1.8081, "step": 30416000 }, { "epoch": 88.04, "learning_rate": 5.994189109392368e-06, "loss": 1.7984, "step": 30416500 }, { "epoch": 88.04, "learning_rate": 5.993465461745091e-06, "loss": 1.7962, "step": 30417000 }, { "epoch": 88.05, "learning_rate": 5.992741814097814e-06, "loss": 1.8108, "step": 30417500 }, { "epoch": 88.05, "learning_rate": 5.992019613745832e-06, "loss": 1.8049, "step": 30418000 }, { "epoch": 88.05, "learning_rate": 5.991295966098555e-06, "loss": 1.7987, "step": 30418500 }, { "epoch": 88.05, "learning_rate": 5.9905723184512784e-06, "loss": 1.7897, "step": 30419000 }, { "epoch": 88.05, "learning_rate": 5.9898486708040015e-06, "loss": 1.807, "step": 30419500 }, { "epoch": 88.05, "learning_rate": 5.989125023156725e-06, "loss": 1.7927, "step": 30420000 }, { "epoch": 88.05, "learning_rate": 5.9884013755094485e-06, "loss": 1.7873, "step": 30420500 }, { "epoch": 88.06, "learning_rate": 5.987677727862172e-06, "loss": 1.8116, "step": 30421000 }, { "epoch": 88.06, "learning_rate": 5.98695552751019e-06, "loss": 1.7961, "step": 30421500 }, { "epoch": 88.06, "learning_rate": 5.986231879862912e-06, "loss": 1.7925, "step": 30422000 }, { "epoch": 88.06, "learning_rate": 5.985508232215636e-06, "loss": 1.793, "step": 30422500 }, { "epoch": 88.06, "learning_rate": 5.984784584568359e-06, "loss": 1.8137, "step": 30423000 }, { "epoch": 88.06, "learning_rate": 5.984060936921082e-06, "loss": 1.7935, "step": 30423500 }, { "epoch": 88.07, "learning_rate": 5.983337289273806e-06, "loss": 1.7991, "step": 30424000 }, { "epoch": 88.07, "learning_rate": 5.982613641626528e-06, "loss": 1.8143, "step": 30424500 }, { "epoch": 88.07, "learning_rate": 5.981889993979252e-06, "loss": 1.8027, "step": 30425000 }, { "epoch": 88.07, "learning_rate": 5.981166346331974e-06, "loss": 1.7724, "step": 30425500 }, { "epoch": 88.07, "learning_rate": 5.980442698684698e-06, "loss": 1.7676, "step": 30426000 }, { "epoch": 88.07, "learning_rate": 5.9797204983327155e-06, "loss": 1.8116, "step": 30426500 }, { "epoch": 88.07, "learning_rate": 5.9789968506854395e-06, "loss": 1.7833, "step": 30427000 }, { "epoch": 88.08, "learning_rate": 5.9782732030381625e-06, "loss": 1.7977, "step": 30427500 }, { "epoch": 88.08, "learning_rate": 5.977549555390886e-06, "loss": 1.7916, "step": 30428000 }, { "epoch": 88.08, "learning_rate": 5.976827355038903e-06, "loss": 1.8014, "step": 30428500 }, { "epoch": 88.08, "learning_rate": 5.976103707391627e-06, "loss": 1.83, "step": 30429000 }, { "epoch": 88.08, "learning_rate": 5.975380059744351e-06, "loss": 1.8077, "step": 30429500 }, { "epoch": 88.08, "learning_rate": 5.974656412097073e-06, "loss": 1.7749, "step": 30430000 }, { "epoch": 88.08, "learning_rate": 5.973932764449797e-06, "loss": 1.7812, "step": 30430500 }, { "epoch": 88.09, "learning_rate": 5.973209116802519e-06, "loss": 1.7976, "step": 30431000 }, { "epoch": 88.09, "learning_rate": 5.972486916450538e-06, "loss": 1.7747, "step": 30431500 }, { "epoch": 88.09, "learning_rate": 5.97176326880326e-06, "loss": 1.8143, "step": 30432000 }, { "epoch": 88.09, "learning_rate": 5.971039621155984e-06, "loss": 1.7982, "step": 30432500 }, { "epoch": 88.09, "learning_rate": 5.970315973508707e-06, "loss": 1.7945, "step": 30433000 }, { "epoch": 88.09, "learning_rate": 5.96959232586143e-06, "loss": 1.8086, "step": 30433500 }, { "epoch": 88.09, "learning_rate": 5.968868678214154e-06, "loss": 1.7928, "step": 30434000 }, { "epoch": 88.1, "learning_rate": 5.9681450305668765e-06, "loss": 1.7999, "step": 30434500 }, { "epoch": 88.1, "learning_rate": 5.9674213829196005e-06, "loss": 1.7898, "step": 30435000 }, { "epoch": 88.1, "learning_rate": 5.9666977352723235e-06, "loss": 1.7882, "step": 30435500 }, { "epoch": 88.1, "learning_rate": 5.965975534920342e-06, "loss": 1.7919, "step": 30436000 }, { "epoch": 88.1, "learning_rate": 5.965251887273064e-06, "loss": 1.8039, "step": 30436500 }, { "epoch": 88.1, "learning_rate": 5.964528239625788e-06, "loss": 1.7843, "step": 30437000 }, { "epoch": 88.1, "learning_rate": 5.963804591978511e-06, "loss": 1.8095, "step": 30437500 }, { "epoch": 88.11, "learning_rate": 5.963080944331234e-06, "loss": 1.7916, "step": 30438000 }, { "epoch": 88.11, "learning_rate": 5.962357296683958e-06, "loss": 1.8031, "step": 30438500 }, { "epoch": 88.11, "learning_rate": 5.96163364903668e-06, "loss": 1.7913, "step": 30439000 }, { "epoch": 88.11, "learning_rate": 5.960912895979992e-06, "loss": 1.8051, "step": 30439500 }, { "epoch": 88.11, "learning_rate": 5.960189248332716e-06, "loss": 1.8163, "step": 30440000 }, { "epoch": 88.11, "learning_rate": 5.959465600685439e-06, "loss": 1.7899, "step": 30440500 }, { "epoch": 88.11, "learning_rate": 5.9587419530381625e-06, "loss": 1.7971, "step": 30441000 }, { "epoch": 88.12, "learning_rate": 5.95801975268618e-06, "loss": 1.7875, "step": 30441500 }, { "epoch": 88.12, "learning_rate": 5.957296105038904e-06, "loss": 1.8326, "step": 30442000 }, { "epoch": 88.12, "learning_rate": 5.956572457391627e-06, "loss": 1.7952, "step": 30442500 }, { "epoch": 88.12, "learning_rate": 5.95584880974435e-06, "loss": 1.8008, "step": 30443000 }, { "epoch": 88.12, "learning_rate": 5.955125162097074e-06, "loss": 1.7865, "step": 30443500 }, { "epoch": 88.12, "learning_rate": 5.954401514449796e-06, "loss": 1.7737, "step": 30444000 }, { "epoch": 88.12, "learning_rate": 5.95367786680252e-06, "loss": 1.7704, "step": 30444500 }, { "epoch": 88.13, "learning_rate": 5.952954219155243e-06, "loss": 1.7945, "step": 30445000 }, { "epoch": 88.13, "learning_rate": 5.952230571507966e-06, "loss": 1.7947, "step": 30445500 }, { "epoch": 88.13, "learning_rate": 5.951506923860689e-06, "loss": 1.7832, "step": 30446000 }, { "epoch": 88.13, "learning_rate": 5.950783276213412e-06, "loss": 1.7762, "step": 30446500 }, { "epoch": 88.13, "learning_rate": 5.950059628566136e-06, "loss": 1.7859, "step": 30447000 }, { "epoch": 88.13, "learning_rate": 5.949337428214153e-06, "loss": 1.7986, "step": 30447500 }, { "epoch": 88.13, "learning_rate": 5.948613780566877e-06, "loss": 1.7904, "step": 30448000 }, { "epoch": 88.14, "learning_rate": 5.9478901329196e-06, "loss": 1.7841, "step": 30448500 }, { "epoch": 88.14, "learning_rate": 5.9471664852723235e-06, "loss": 1.8216, "step": 30449000 }, { "epoch": 88.14, "learning_rate": 5.946442837625047e-06, "loss": 1.8043, "step": 30449500 }, { "epoch": 88.14, "learning_rate": 5.94571918997777e-06, "loss": 1.7937, "step": 30450000 }, { "epoch": 88.14, "learning_rate": 5.944996989625788e-06, "loss": 1.8016, "step": 30450500 }, { "epoch": 88.14, "learning_rate": 5.944273341978511e-06, "loss": 1.789, "step": 30451000 }, { "epoch": 88.14, "learning_rate": 5.943549694331234e-06, "loss": 1.8098, "step": 30451500 }, { "epoch": 88.15, "learning_rate": 5.942826046683957e-06, "loss": 1.8036, "step": 30452000 }, { "epoch": 88.15, "learning_rate": 5.942103846331975e-06, "loss": 1.7985, "step": 30452500 }, { "epoch": 88.15, "learning_rate": 5.941380198684698e-06, "loss": 1.7904, "step": 30453000 }, { "epoch": 88.15, "learning_rate": 5.940656551037421e-06, "loss": 1.8183, "step": 30453500 }, { "epoch": 88.15, "learning_rate": 5.939934350685439e-06, "loss": 1.7926, "step": 30454000 }, { "epoch": 88.15, "learning_rate": 5.9392107030381624e-06, "loss": 1.8195, "step": 30454500 }, { "epoch": 88.15, "learning_rate": 5.9384870553908855e-06, "loss": 1.7919, "step": 30455000 }, { "epoch": 88.16, "learning_rate": 5.9377634077436094e-06, "loss": 1.8101, "step": 30455500 }, { "epoch": 88.16, "learning_rate": 5.9370397600963325e-06, "loss": 1.7962, "step": 30456000 }, { "epoch": 88.16, "learning_rate": 5.936316112449056e-06, "loss": 1.7993, "step": 30456500 }, { "epoch": 88.16, "learning_rate": 5.935592464801779e-06, "loss": 1.8125, "step": 30457000 }, { "epoch": 88.16, "learning_rate": 5.934868817154502e-06, "loss": 1.8227, "step": 30457500 }, { "epoch": 88.16, "learning_rate": 5.934145169507225e-06, "loss": 1.8021, "step": 30458000 }, { "epoch": 88.16, "learning_rate": 5.933424416450537e-06, "loss": 1.7845, "step": 30458500 }, { "epoch": 88.17, "learning_rate": 5.932700768803261e-06, "loss": 1.8058, "step": 30459000 }, { "epoch": 88.17, "learning_rate": 5.931977121155984e-06, "loss": 1.7734, "step": 30459500 }, { "epoch": 88.17, "learning_rate": 5.931253473508707e-06, "loss": 1.7917, "step": 30460000 }, { "epoch": 88.17, "learning_rate": 5.93052982586143e-06, "loss": 1.7869, "step": 30460500 }, { "epoch": 88.17, "learning_rate": 5.929806178214153e-06, "loss": 1.7936, "step": 30461000 }, { "epoch": 88.17, "learning_rate": 5.929082530566877e-06, "loss": 1.7954, "step": 30461500 }, { "epoch": 88.18, "learning_rate": 5.9283588829196e-06, "loss": 1.8046, "step": 30462000 }, { "epoch": 88.18, "learning_rate": 5.9276352352723234e-06, "loss": 1.7865, "step": 30462500 }, { "epoch": 88.18, "learning_rate": 5.926913034920341e-06, "loss": 1.8044, "step": 30463000 }, { "epoch": 88.18, "learning_rate": 5.926189387273065e-06, "loss": 1.7957, "step": 30463500 }, { "epoch": 88.18, "learning_rate": 5.925465739625788e-06, "loss": 1.806, "step": 30464000 }, { "epoch": 88.18, "learning_rate": 5.924742091978511e-06, "loss": 1.761, "step": 30464500 }, { "epoch": 88.18, "learning_rate": 5.924018444331234e-06, "loss": 1.7853, "step": 30465000 }, { "epoch": 88.19, "learning_rate": 5.923294796683957e-06, "loss": 1.7893, "step": 30465500 }, { "epoch": 88.19, "learning_rate": 5.922571149036681e-06, "loss": 1.8024, "step": 30466000 }, { "epoch": 88.19, "learning_rate": 5.921848948684698e-06, "loss": 1.8064, "step": 30466500 }, { "epoch": 88.19, "learning_rate": 5.921125301037422e-06, "loss": 1.7948, "step": 30467000 }, { "epoch": 88.19, "learning_rate": 5.920401653390144e-06, "loss": 1.7929, "step": 30467500 }, { "epoch": 88.19, "learning_rate": 5.919679453038162e-06, "loss": 1.8239, "step": 30468000 }, { "epoch": 88.19, "learning_rate": 5.9189558053908855e-06, "loss": 1.7995, "step": 30468500 }, { "epoch": 88.2, "learning_rate": 5.918232157743609e-06, "loss": 1.7685, "step": 30469000 }, { "epoch": 88.2, "learning_rate": 5.917508510096332e-06, "loss": 1.7717, "step": 30469500 }, { "epoch": 88.2, "learning_rate": 5.9167848624490556e-06, "loss": 1.7931, "step": 30470000 }, { "epoch": 88.2, "learning_rate": 5.916062662097073e-06, "loss": 1.7937, "step": 30470500 }, { "epoch": 88.2, "learning_rate": 5.915339014449797e-06, "loss": 1.8039, "step": 30471000 }, { "epoch": 88.2, "learning_rate": 5.91461536680252e-06, "loss": 1.781, "step": 30471500 }, { "epoch": 88.2, "learning_rate": 5.913891719155243e-06, "loss": 1.7908, "step": 30472000 }, { "epoch": 88.21, "learning_rate": 5.913168071507967e-06, "loss": 1.8089, "step": 30472500 }, { "epoch": 88.21, "learning_rate": 5.912444423860689e-06, "loss": 1.78, "step": 30473000 }, { "epoch": 88.21, "learning_rate": 5.911720776213413e-06, "loss": 1.8056, "step": 30473500 }, { "epoch": 88.21, "learning_rate": 5.91099857586143e-06, "loss": 1.7759, "step": 30474000 }, { "epoch": 88.21, "learning_rate": 5.910274928214154e-06, "loss": 1.7987, "step": 30474500 }, { "epoch": 88.21, "learning_rate": 5.909551280566876e-06, "loss": 1.801, "step": 30475000 }, { "epoch": 88.21, "learning_rate": 5.9088276329196e-06, "loss": 1.7958, "step": 30475500 }, { "epoch": 88.22, "learning_rate": 5.908103985272323e-06, "loss": 1.8216, "step": 30476000 }, { "epoch": 88.22, "learning_rate": 5.9073803376250465e-06, "loss": 1.7725, "step": 30476500 }, { "epoch": 88.22, "learning_rate": 5.90665668997777e-06, "loss": 1.8116, "step": 30477000 }, { "epoch": 88.22, "learning_rate": 5.905933042330493e-06, "loss": 1.8049, "step": 30477500 }, { "epoch": 88.22, "learning_rate": 5.9052093946832166e-06, "loss": 1.7965, "step": 30478000 }, { "epoch": 88.22, "learning_rate": 5.90448574703594e-06, "loss": 1.7807, "step": 30478500 }, { "epoch": 88.22, "learning_rate": 5.903763546683958e-06, "loss": 1.7824, "step": 30479000 }, { "epoch": 88.23, "learning_rate": 5.90303989903668e-06, "loss": 1.8166, "step": 30479500 }, { "epoch": 88.23, "learning_rate": 5.902316251389404e-06, "loss": 1.819, "step": 30480000 }, { "epoch": 88.23, "learning_rate": 5.901592603742127e-06, "loss": 1.8134, "step": 30480500 }, { "epoch": 88.23, "learning_rate": 5.900870403390145e-06, "loss": 1.8047, "step": 30481000 }, { "epoch": 88.23, "learning_rate": 5.900148203038162e-06, "loss": 1.797, "step": 30481500 }, { "epoch": 88.23, "learning_rate": 5.899424555390886e-06, "loss": 1.7951, "step": 30482000 }, { "epoch": 88.23, "learning_rate": 5.8987009077436085e-06, "loss": 1.8067, "step": 30482500 }, { "epoch": 88.24, "learning_rate": 5.897977260096332e-06, "loss": 1.7667, "step": 30483000 }, { "epoch": 88.24, "learning_rate": 5.8972536124490555e-06, "loss": 1.7984, "step": 30483500 }, { "epoch": 88.24, "learning_rate": 5.896529964801779e-06, "loss": 1.8005, "step": 30484000 }, { "epoch": 88.24, "learning_rate": 5.8958063171545025e-06, "loss": 1.7932, "step": 30484500 }, { "epoch": 88.24, "learning_rate": 5.895082669507225e-06, "loss": 1.8218, "step": 30485000 }, { "epoch": 88.24, "learning_rate": 5.894359021859949e-06, "loss": 1.7707, "step": 30485500 }, { "epoch": 88.24, "learning_rate": 5.893638268803261e-06, "loss": 1.7944, "step": 30486000 }, { "epoch": 88.25, "learning_rate": 5.892914621155984e-06, "loss": 1.787, "step": 30486500 }, { "epoch": 88.25, "learning_rate": 5.892190973508707e-06, "loss": 1.7645, "step": 30487000 }, { "epoch": 88.25, "learning_rate": 5.891467325861431e-06, "loss": 1.8216, "step": 30487500 }, { "epoch": 88.25, "learning_rate": 5.890743678214153e-06, "loss": 1.8031, "step": 30488000 }, { "epoch": 88.25, "learning_rate": 5.890020030566877e-06, "loss": 1.7803, "step": 30488500 }, { "epoch": 88.25, "learning_rate": 5.8892963829196e-06, "loss": 1.8041, "step": 30489000 }, { "epoch": 88.25, "learning_rate": 5.888572735272323e-06, "loss": 1.809, "step": 30489500 }, { "epoch": 88.26, "learning_rate": 5.8878490876250464e-06, "loss": 1.7937, "step": 30490000 }, { "epoch": 88.26, "learning_rate": 5.8871268872730645e-06, "loss": 1.8074, "step": 30490500 }, { "epoch": 88.26, "learning_rate": 5.886403239625788e-06, "loss": 1.8059, "step": 30491000 }, { "epoch": 88.26, "learning_rate": 5.885679591978511e-06, "loss": 1.8065, "step": 30491500 }, { "epoch": 88.26, "learning_rate": 5.884955944331235e-06, "loss": 1.791, "step": 30492000 }, { "epoch": 88.26, "learning_rate": 5.884232296683957e-06, "loss": 1.7771, "step": 30492500 }, { "epoch": 88.26, "learning_rate": 5.883508649036681e-06, "loss": 1.7607, "step": 30493000 }, { "epoch": 88.27, "learning_rate": 5.882785001389404e-06, "loss": 1.8009, "step": 30493500 }, { "epoch": 88.27, "learning_rate": 5.882061353742127e-06, "loss": 1.8088, "step": 30494000 }, { "epoch": 88.27, "learning_rate": 5.881340600685439e-06, "loss": 1.787, "step": 30494500 }, { "epoch": 88.27, "learning_rate": 5.880618400333457e-06, "loss": 1.8066, "step": 30495000 }, { "epoch": 88.27, "learning_rate": 5.87989475268618e-06, "loss": 1.826, "step": 30495500 }, { "epoch": 88.27, "learning_rate": 5.8791711050389035e-06, "loss": 1.7951, "step": 30496000 }, { "epoch": 88.27, "learning_rate": 5.8784474573916265e-06, "loss": 1.7615, "step": 30496500 }, { "epoch": 88.28, "learning_rate": 5.8777238097443505e-06, "loss": 1.796, "step": 30497000 }, { "epoch": 88.28, "learning_rate": 5.8770001620970735e-06, "loss": 1.7909, "step": 30497500 }, { "epoch": 88.28, "learning_rate": 5.876276514449797e-06, "loss": 1.7993, "step": 30498000 }, { "epoch": 88.28, "learning_rate": 5.87555286680252e-06, "loss": 1.8003, "step": 30498500 }, { "epoch": 88.28, "learning_rate": 5.874829219155243e-06, "loss": 1.7862, "step": 30499000 }, { "epoch": 88.28, "learning_rate": 5.874105571507967e-06, "loss": 1.8091, "step": 30499500 }, { "epoch": 88.29, "learning_rate": 5.873383371155984e-06, "loss": 1.7802, "step": 30500000 }, { "epoch": 88.29, "learning_rate": 5.872659723508707e-06, "loss": 1.7893, "step": 30500500 }, { "epoch": 88.29, "learning_rate": 5.87193607586143e-06, "loss": 1.7929, "step": 30501000 }, { "epoch": 88.29, "learning_rate": 5.871212428214154e-06, "loss": 1.787, "step": 30501500 }, { "epoch": 88.29, "learning_rate": 5.870488780566877e-06, "loss": 1.7897, "step": 30502000 }, { "epoch": 88.29, "learning_rate": 5.869766580214895e-06, "loss": 1.7646, "step": 30502500 }, { "epoch": 88.29, "learning_rate": 5.8690429325676175e-06, "loss": 1.7968, "step": 30503000 }, { "epoch": 88.3, "learning_rate": 5.868319284920341e-06, "loss": 1.7717, "step": 30503500 }, { "epoch": 88.3, "learning_rate": 5.8675956372730645e-06, "loss": 1.7926, "step": 30504000 }, { "epoch": 88.3, "learning_rate": 5.8668734369210826e-06, "loss": 1.8215, "step": 30504500 }, { "epoch": 88.3, "learning_rate": 5.866149789273806e-06, "loss": 1.7872, "step": 30505000 }, { "epoch": 88.3, "learning_rate": 5.865427588921823e-06, "loss": 1.7965, "step": 30505500 }, { "epoch": 88.3, "learning_rate": 5.864703941274546e-06, "loss": 1.8042, "step": 30506000 }, { "epoch": 88.3, "learning_rate": 5.86398029362727e-06, "loss": 1.7988, "step": 30506500 }, { "epoch": 88.31, "learning_rate": 5.863256645979993e-06, "loss": 1.789, "step": 30507000 }, { "epoch": 88.31, "learning_rate": 5.862534445628011e-06, "loss": 1.7999, "step": 30507500 }, { "epoch": 88.31, "learning_rate": 5.861810797980734e-06, "loss": 1.796, "step": 30508000 }, { "epoch": 88.31, "learning_rate": 5.861087150333457e-06, "loss": 1.8069, "step": 30508500 }, { "epoch": 88.31, "learning_rate": 5.86036350268618e-06, "loss": 1.8078, "step": 30509000 }, { "epoch": 88.31, "learning_rate": 5.859639855038903e-06, "loss": 1.8179, "step": 30509500 }, { "epoch": 88.31, "learning_rate": 5.8589162073916265e-06, "loss": 1.8214, "step": 30510000 }, { "epoch": 88.32, "learning_rate": 5.858194007039645e-06, "loss": 1.7939, "step": 30510500 }, { "epoch": 88.32, "learning_rate": 5.857470359392368e-06, "loss": 1.8212, "step": 30511000 }, { "epoch": 88.32, "learning_rate": 5.856748159040386e-06, "loss": 1.7974, "step": 30511500 }, { "epoch": 88.32, "learning_rate": 5.856024511393109e-06, "loss": 1.7786, "step": 30512000 }, { "epoch": 88.32, "learning_rate": 5.855300863745832e-06, "loss": 1.7805, "step": 30512500 }, { "epoch": 88.32, "learning_rate": 5.854577216098555e-06, "loss": 1.8287, "step": 30513000 }, { "epoch": 88.32, "learning_rate": 5.853853568451279e-06, "loss": 1.8035, "step": 30513500 }, { "epoch": 88.33, "learning_rate": 5.853129920804002e-06, "loss": 1.8053, "step": 30514000 }, { "epoch": 88.33, "learning_rate": 5.852406273156725e-06, "loss": 1.7787, "step": 30514500 }, { "epoch": 88.33, "learning_rate": 5.851682625509448e-06, "loss": 1.8216, "step": 30515000 }, { "epoch": 88.33, "learning_rate": 5.850958977862171e-06, "loss": 1.7876, "step": 30515500 }, { "epoch": 88.33, "learning_rate": 5.850235330214895e-06, "loss": 1.7932, "step": 30516000 }, { "epoch": 88.33, "learning_rate": 5.849511682567617e-06, "loss": 1.8006, "step": 30516500 }, { "epoch": 88.33, "learning_rate": 5.848788034920341e-06, "loss": 1.7855, "step": 30517000 }, { "epoch": 88.34, "learning_rate": 5.848065834568359e-06, "loss": 1.8047, "step": 30517500 }, { "epoch": 88.34, "learning_rate": 5.8473421869210825e-06, "loss": 1.8066, "step": 30518000 }, { "epoch": 88.34, "learning_rate": 5.8466199865691e-06, "loss": 1.7927, "step": 30518500 }, { "epoch": 88.34, "learning_rate": 5.845896338921823e-06, "loss": 1.8135, "step": 30519000 }, { "epoch": 88.34, "learning_rate": 5.845172691274546e-06, "loss": 1.7896, "step": 30519500 }, { "epoch": 88.34, "learning_rate": 5.84444904362727e-06, "loss": 1.8155, "step": 30520000 }, { "epoch": 88.34, "learning_rate": 5.843725395979993e-06, "loss": 1.8091, "step": 30520500 }, { "epoch": 88.35, "learning_rate": 5.843001748332716e-06, "loss": 1.7804, "step": 30521000 }, { "epoch": 88.35, "learning_rate": 5.842278100685439e-06, "loss": 1.7947, "step": 30521500 }, { "epoch": 88.35, "learning_rate": 5.841554453038162e-06, "loss": 1.8149, "step": 30522000 }, { "epoch": 88.35, "learning_rate": 5.840830805390886e-06, "loss": 1.7913, "step": 30522500 }, { "epoch": 88.35, "learning_rate": 5.840108605038903e-06, "loss": 1.7925, "step": 30523000 }, { "epoch": 88.35, "learning_rate": 5.839384957391627e-06, "loss": 1.8261, "step": 30523500 }, { "epoch": 88.35, "learning_rate": 5.8386613097443495e-06, "loss": 1.8179, "step": 30524000 }, { "epoch": 88.36, "learning_rate": 5.8379376620970734e-06, "loss": 1.8032, "step": 30524500 }, { "epoch": 88.36, "learning_rate": 5.8372140144497965e-06, "loss": 1.8074, "step": 30525000 }, { "epoch": 88.36, "learning_rate": 5.83649036680252e-06, "loss": 1.8047, "step": 30525500 }, { "epoch": 88.36, "learning_rate": 5.8357667191552435e-06, "loss": 1.7959, "step": 30526000 }, { "epoch": 88.36, "learning_rate": 5.835043071507966e-06, "loss": 1.7787, "step": 30526500 }, { "epoch": 88.36, "learning_rate": 5.834320871155984e-06, "loss": 1.8167, "step": 30527000 }, { "epoch": 88.36, "learning_rate": 5.833598670804002e-06, "loss": 1.7976, "step": 30527500 }, { "epoch": 88.37, "learning_rate": 5.832875023156725e-06, "loss": 1.8056, "step": 30528000 }, { "epoch": 88.37, "learning_rate": 5.832152822804743e-06, "loss": 1.7904, "step": 30528500 }, { "epoch": 88.37, "learning_rate": 5.831429175157465e-06, "loss": 1.8035, "step": 30529000 }, { "epoch": 88.37, "learning_rate": 5.830705527510189e-06, "loss": 1.8028, "step": 30529500 }, { "epoch": 88.37, "learning_rate": 5.829981879862912e-06, "loss": 1.7872, "step": 30530000 }, { "epoch": 88.37, "learning_rate": 5.8292582322156355e-06, "loss": 1.8124, "step": 30530500 }, { "epoch": 88.37, "learning_rate": 5.8285360318636536e-06, "loss": 1.7837, "step": 30531000 }, { "epoch": 88.38, "learning_rate": 5.827812384216377e-06, "loss": 1.8078, "step": 30531500 }, { "epoch": 88.38, "learning_rate": 5.8270887365691006e-06, "loss": 1.781, "step": 30532000 }, { "epoch": 88.38, "learning_rate": 5.826365088921823e-06, "loss": 1.8164, "step": 30532500 }, { "epoch": 88.38, "learning_rate": 5.825642888569841e-06, "loss": 1.8059, "step": 30533000 }, { "epoch": 88.38, "learning_rate": 5.824919240922564e-06, "loss": 1.8209, "step": 30533500 }, { "epoch": 88.38, "learning_rate": 5.824195593275288e-06, "loss": 1.8062, "step": 30534000 }, { "epoch": 88.38, "learning_rate": 5.82347194562801e-06, "loss": 1.8165, "step": 30534500 }, { "epoch": 88.39, "learning_rate": 5.822748297980734e-06, "loss": 1.7915, "step": 30535000 }, { "epoch": 88.39, "learning_rate": 5.822024650333457e-06, "loss": 1.7967, "step": 30535500 }, { "epoch": 88.39, "learning_rate": 5.82130100268618e-06, "loss": 1.8127, "step": 30536000 }, { "epoch": 88.39, "learning_rate": 5.8205788023341975e-06, "loss": 1.8175, "step": 30536500 }, { "epoch": 88.39, "learning_rate": 5.819855154686921e-06, "loss": 1.8063, "step": 30537000 }, { "epoch": 88.39, "learning_rate": 5.8191315070396445e-06, "loss": 1.7862, "step": 30537500 }, { "epoch": 88.4, "learning_rate": 5.8184078593923676e-06, "loss": 1.7875, "step": 30538000 }, { "epoch": 88.4, "learning_rate": 5.8176842117450915e-06, "loss": 1.8045, "step": 30538500 }, { "epoch": 88.4, "learning_rate": 5.816960564097814e-06, "loss": 1.8076, "step": 30539000 }, { "epoch": 88.4, "learning_rate": 5.816236916450538e-06, "loss": 1.7934, "step": 30539500 }, { "epoch": 88.4, "learning_rate": 5.815514716098555e-06, "loss": 1.7587, "step": 30540000 }, { "epoch": 88.4, "learning_rate": 5.814791068451279e-06, "loss": 1.7992, "step": 30540500 }, { "epoch": 88.4, "learning_rate": 5.814067420804002e-06, "loss": 1.7927, "step": 30541000 }, { "epoch": 88.41, "learning_rate": 5.813343773156725e-06, "loss": 1.7865, "step": 30541500 }, { "epoch": 88.41, "learning_rate": 5.812620125509449e-06, "loss": 1.7981, "step": 30542000 }, { "epoch": 88.41, "learning_rate": 5.811896477862171e-06, "loss": 1.767, "step": 30542500 }, { "epoch": 88.41, "learning_rate": 5.811172830214895e-06, "loss": 1.8072, "step": 30543000 }, { "epoch": 88.41, "learning_rate": 5.810449182567617e-06, "loss": 1.8144, "step": 30543500 }, { "epoch": 88.41, "learning_rate": 5.809725534920341e-06, "loss": 1.7955, "step": 30544000 }, { "epoch": 88.41, "learning_rate": 5.8090033345683585e-06, "loss": 1.8078, "step": 30544500 }, { "epoch": 88.42, "learning_rate": 5.808279686921082e-06, "loss": 1.7954, "step": 30545000 }, { "epoch": 88.42, "learning_rate": 5.8075560392738055e-06, "loss": 1.8099, "step": 30545500 }, { "epoch": 88.42, "learning_rate": 5.8068323916265286e-06, "loss": 1.7723, "step": 30546000 }, { "epoch": 88.42, "learning_rate": 5.806110191274546e-06, "loss": 1.7996, "step": 30546500 }, { "epoch": 88.42, "learning_rate": 5.80538654362727e-06, "loss": 1.7909, "step": 30547000 }, { "epoch": 88.42, "learning_rate": 5.804662895979993e-06, "loss": 1.7803, "step": 30547500 }, { "epoch": 88.42, "learning_rate": 5.803939248332716e-06, "loss": 1.7988, "step": 30548000 }, { "epoch": 88.43, "learning_rate": 5.803217047980734e-06, "loss": 1.8111, "step": 30548500 }, { "epoch": 88.43, "learning_rate": 5.802493400333457e-06, "loss": 1.845, "step": 30549000 }, { "epoch": 88.43, "learning_rate": 5.80176975268618e-06, "loss": 1.799, "step": 30549500 }, { "epoch": 88.43, "learning_rate": 5.801047552334198e-06, "loss": 1.7847, "step": 30550000 }, { "epoch": 88.43, "learning_rate": 5.800323904686921e-06, "loss": 1.7963, "step": 30550500 }, { "epoch": 88.43, "learning_rate": 5.7996017043349394e-06, "loss": 1.7788, "step": 30551000 }, { "epoch": 88.43, "learning_rate": 5.7988780566876625e-06, "loss": 1.787, "step": 30551500 }, { "epoch": 88.44, "learning_rate": 5.798154409040386e-06, "loss": 1.8023, "step": 30552000 }, { "epoch": 88.44, "learning_rate": 5.797430761393109e-06, "loss": 1.7791, "step": 30552500 }, { "epoch": 88.44, "learning_rate": 5.796707113745832e-06, "loss": 1.7656, "step": 30553000 }, { "epoch": 88.44, "learning_rate": 5.795983466098556e-06, "loss": 1.8062, "step": 30553500 }, { "epoch": 88.44, "learning_rate": 5.795259818451279e-06, "loss": 1.7969, "step": 30554000 }, { "epoch": 88.44, "learning_rate": 5.794536170804002e-06, "loss": 1.808, "step": 30554500 }, { "epoch": 88.44, "learning_rate": 5.793812523156725e-06, "loss": 1.7952, "step": 30555000 }, { "epoch": 88.45, "learning_rate": 5.793088875509448e-06, "loss": 1.7875, "step": 30555500 }, { "epoch": 88.45, "learning_rate": 5.792365227862172e-06, "loss": 1.8081, "step": 30556000 }, { "epoch": 88.45, "learning_rate": 5.791641580214895e-06, "loss": 1.7952, "step": 30556500 }, { "epoch": 88.45, "learning_rate": 5.790917932567618e-06, "loss": 1.8256, "step": 30557000 }, { "epoch": 88.45, "learning_rate": 5.790195732215635e-06, "loss": 1.8108, "step": 30557500 }, { "epoch": 88.45, "learning_rate": 5.789472084568359e-06, "loss": 1.8136, "step": 30558000 }, { "epoch": 88.45, "learning_rate": 5.788748436921082e-06, "loss": 1.7987, "step": 30558500 }, { "epoch": 88.46, "learning_rate": 5.7880247892738054e-06, "loss": 1.8137, "step": 30559000 }, { "epoch": 88.46, "learning_rate": 5.7873011416265285e-06, "loss": 1.784, "step": 30559500 }, { "epoch": 88.46, "learning_rate": 5.786577493979252e-06, "loss": 1.7938, "step": 30560000 }, { "epoch": 88.46, "learning_rate": 5.785853846331975e-06, "loss": 1.7766, "step": 30560500 }, { "epoch": 88.46, "learning_rate": 5.785130198684699e-06, "loss": 1.8036, "step": 30561000 }, { "epoch": 88.46, "learning_rate": 5.784407998332716e-06, "loss": 1.8154, "step": 30561500 }, { "epoch": 88.46, "learning_rate": 5.783684350685439e-06, "loss": 1.79, "step": 30562000 }, { "epoch": 88.47, "learning_rate": 5.782960703038163e-06, "loss": 1.8366, "step": 30562500 }, { "epoch": 88.47, "learning_rate": 5.782237055390886e-06, "loss": 1.7906, "step": 30563000 }, { "epoch": 88.47, "learning_rate": 5.781513407743609e-06, "loss": 1.8271, "step": 30563500 }, { "epoch": 88.47, "learning_rate": 5.780789760096332e-06, "loss": 1.7874, "step": 30564000 }, { "epoch": 88.47, "learning_rate": 5.780066112449055e-06, "loss": 1.7872, "step": 30564500 }, { "epoch": 88.47, "learning_rate": 5.779343912097073e-06, "loss": 1.7749, "step": 30565000 }, { "epoch": 88.47, "learning_rate": 5.778620264449796e-06, "loss": 1.7937, "step": 30565500 }, { "epoch": 88.48, "learning_rate": 5.7778966168025194e-06, "loss": 1.7967, "step": 30566000 }, { "epoch": 88.48, "learning_rate": 5.777172969155243e-06, "loss": 1.8344, "step": 30566500 }, { "epoch": 88.48, "learning_rate": 5.7764493215079665e-06, "loss": 1.8455, "step": 30567000 }, { "epoch": 88.48, "learning_rate": 5.7757256738606895e-06, "loss": 1.7935, "step": 30567500 }, { "epoch": 88.48, "learning_rate": 5.775002026213413e-06, "loss": 1.7972, "step": 30568000 }, { "epoch": 88.48, "learning_rate": 5.774279825861431e-06, "loss": 1.8104, "step": 30568500 }, { "epoch": 88.48, "learning_rate": 5.773556178214154e-06, "loss": 1.7951, "step": 30569000 }, { "epoch": 88.49, "learning_rate": 5.772832530566877e-06, "loss": 1.8129, "step": 30569500 }, { "epoch": 88.49, "learning_rate": 5.7721088829196e-06, "loss": 1.7829, "step": 30570000 }, { "epoch": 88.49, "learning_rate": 5.771385235272323e-06, "loss": 1.7921, "step": 30570500 }, { "epoch": 88.49, "learning_rate": 5.770661587625047e-06, "loss": 1.7865, "step": 30571000 }, { "epoch": 88.49, "learning_rate": 5.769937939977769e-06, "loss": 1.8043, "step": 30571500 }, { "epoch": 88.49, "learning_rate": 5.769215739625788e-06, "loss": 1.7848, "step": 30572000 }, { "epoch": 88.49, "learning_rate": 5.76849209197851e-06, "loss": 1.7881, "step": 30572500 }, { "epoch": 88.5, "learning_rate": 5.7677698916265285e-06, "loss": 1.7996, "step": 30573000 }, { "epoch": 88.5, "learning_rate": 5.7670462439792516e-06, "loss": 1.806, "step": 30573500 }, { "epoch": 88.5, "learning_rate": 5.7663225963319755e-06, "loss": 1.7993, "step": 30574000 }, { "epoch": 88.5, "learning_rate": 5.765598948684698e-06, "loss": 1.7858, "step": 30574500 }, { "epoch": 88.5, "learning_rate": 5.764875301037422e-06, "loss": 1.8234, "step": 30575000 }, { "epoch": 88.5, "learning_rate": 5.764153100685439e-06, "loss": 1.7955, "step": 30575500 }, { "epoch": 88.51, "learning_rate": 5.763429453038163e-06, "loss": 1.8184, "step": 30576000 }, { "epoch": 88.51, "learning_rate": 5.762705805390886e-06, "loss": 1.7851, "step": 30576500 }, { "epoch": 88.51, "learning_rate": 5.761982157743609e-06, "loss": 1.8044, "step": 30577000 }, { "epoch": 88.51, "learning_rate": 5.761258510096332e-06, "loss": 1.7937, "step": 30577500 }, { "epoch": 88.51, "learning_rate": 5.760534862449055e-06, "loss": 1.8083, "step": 30578000 }, { "epoch": 88.51, "learning_rate": 5.759811214801779e-06, "loss": 1.8035, "step": 30578500 }, { "epoch": 88.51, "learning_rate": 5.759087567154501e-06, "loss": 1.7949, "step": 30579000 }, { "epoch": 88.52, "learning_rate": 5.758363919507225e-06, "loss": 1.781, "step": 30579500 }, { "epoch": 88.52, "learning_rate": 5.757640271859948e-06, "loss": 1.8086, "step": 30580000 }, { "epoch": 88.52, "learning_rate": 5.756918071507966e-06, "loss": 1.7872, "step": 30580500 }, { "epoch": 88.52, "learning_rate": 5.7561944238606895e-06, "loss": 1.8084, "step": 30581000 }, { "epoch": 88.52, "learning_rate": 5.7554707762134126e-06, "loss": 1.7698, "step": 30581500 }, { "epoch": 88.52, "learning_rate": 5.7547471285661365e-06, "loss": 1.7891, "step": 30582000 }, { "epoch": 88.52, "learning_rate": 5.754023480918859e-06, "loss": 1.8125, "step": 30582500 }, { "epoch": 88.53, "learning_rate": 5.753299833271583e-06, "loss": 1.8073, "step": 30583000 }, { "epoch": 88.53, "learning_rate": 5.752576185624305e-06, "loss": 1.8188, "step": 30583500 }, { "epoch": 88.53, "learning_rate": 5.751852537977029e-06, "loss": 1.8043, "step": 30584000 }, { "epoch": 88.53, "learning_rate": 5.751130337625046e-06, "loss": 1.7481, "step": 30584500 }, { "epoch": 88.53, "learning_rate": 5.75040668997777e-06, "loss": 1.7868, "step": 30585000 }, { "epoch": 88.53, "learning_rate": 5.749683042330493e-06, "loss": 1.8074, "step": 30585500 }, { "epoch": 88.53, "learning_rate": 5.748959394683216e-06, "loss": 1.8052, "step": 30586000 }, { "epoch": 88.54, "learning_rate": 5.748237194331233e-06, "loss": 1.7869, "step": 30586500 }, { "epoch": 88.54, "learning_rate": 5.747513546683957e-06, "loss": 1.7691, "step": 30587000 }, { "epoch": 88.54, "learning_rate": 5.746789899036681e-06, "loss": 1.7841, "step": 30587500 }, { "epoch": 88.54, "learning_rate": 5.7460662513894035e-06, "loss": 1.8123, "step": 30588000 }, { "epoch": 88.54, "learning_rate": 5.745342603742127e-06, "loss": 1.8023, "step": 30588500 }, { "epoch": 88.54, "learning_rate": 5.74461895609485e-06, "loss": 1.8017, "step": 30589000 }, { "epoch": 88.54, "learning_rate": 5.743895308447574e-06, "loss": 1.8052, "step": 30589500 }, { "epoch": 88.55, "learning_rate": 5.743171660800297e-06, "loss": 1.8131, "step": 30590000 }, { "epoch": 88.55, "learning_rate": 5.742449460448315e-06, "loss": 1.8127, "step": 30590500 }, { "epoch": 88.55, "learning_rate": 5.741725812801038e-06, "loss": 1.7768, "step": 30591000 }, { "epoch": 88.55, "learning_rate": 5.741002165153761e-06, "loss": 1.8007, "step": 30591500 }, { "epoch": 88.55, "learning_rate": 5.740278517506484e-06, "loss": 1.7681, "step": 30592000 }, { "epoch": 88.55, "learning_rate": 5.739554869859207e-06, "loss": 1.7879, "step": 30592500 }, { "epoch": 88.55, "learning_rate": 5.738831222211931e-06, "loss": 1.7933, "step": 30593000 }, { "epoch": 88.56, "learning_rate": 5.738107574564654e-06, "loss": 1.8162, "step": 30593500 }, { "epoch": 88.56, "learning_rate": 5.737383926917377e-06, "loss": 1.8053, "step": 30594000 }, { "epoch": 88.56, "learning_rate": 5.736661726565394e-06, "loss": 1.7822, "step": 30594500 }, { "epoch": 88.56, "learning_rate": 5.735938078918118e-06, "loss": 1.8124, "step": 30595000 }, { "epoch": 88.56, "learning_rate": 5.7352144312708414e-06, "loss": 1.7946, "step": 30595500 }, { "epoch": 88.56, "learning_rate": 5.734493678214154e-06, "loss": 1.7864, "step": 30596000 }, { "epoch": 88.56, "learning_rate": 5.733770030566877e-06, "loss": 1.8049, "step": 30596500 }, { "epoch": 88.57, "learning_rate": 5.733046382919601e-06, "loss": 1.8, "step": 30597000 }, { "epoch": 88.57, "learning_rate": 5.732322735272323e-06, "loss": 1.7835, "step": 30597500 }, { "epoch": 88.57, "learning_rate": 5.731599087625047e-06, "loss": 1.8105, "step": 30598000 }, { "epoch": 88.57, "learning_rate": 5.73087543997777e-06, "loss": 1.7788, "step": 30598500 }, { "epoch": 88.57, "learning_rate": 5.730151792330493e-06, "loss": 1.8118, "step": 30599000 }, { "epoch": 88.57, "learning_rate": 5.729428144683216e-06, "loss": 1.808, "step": 30599500 }, { "epoch": 88.57, "learning_rate": 5.728704497035939e-06, "loss": 1.807, "step": 30600000 }, { "epoch": 88.58, "learning_rate": 5.727982296683957e-06, "loss": 1.792, "step": 30600500 }, { "epoch": 88.58, "learning_rate": 5.72725864903668e-06, "loss": 1.7641, "step": 30601000 }, { "epoch": 88.58, "learning_rate": 5.726535001389404e-06, "loss": 1.7987, "step": 30601500 }, { "epoch": 88.58, "learning_rate": 5.7258113537421265e-06, "loss": 1.7995, "step": 30602000 }, { "epoch": 88.58, "learning_rate": 5.7250877060948505e-06, "loss": 1.7754, "step": 30602500 }, { "epoch": 88.58, "learning_rate": 5.724365505742868e-06, "loss": 1.8005, "step": 30603000 }, { "epoch": 88.58, "learning_rate": 5.723641858095592e-06, "loss": 1.8045, "step": 30603500 }, { "epoch": 88.59, "learning_rate": 5.722918210448315e-06, "loss": 1.7898, "step": 30604000 }, { "epoch": 88.59, "learning_rate": 5.722194562801038e-06, "loss": 1.8106, "step": 30604500 }, { "epoch": 88.59, "learning_rate": 5.721470915153761e-06, "loss": 1.7897, "step": 30605000 }, { "epoch": 88.59, "learning_rate": 5.720748714801779e-06, "loss": 1.7669, "step": 30605500 }, { "epoch": 88.59, "learning_rate": 5.720025067154502e-06, "loss": 1.7809, "step": 30606000 }, { "epoch": 88.59, "learning_rate": 5.71930286680252e-06, "loss": 1.7752, "step": 30606500 }, { "epoch": 88.59, "learning_rate": 5.718580666450537e-06, "loss": 1.8162, "step": 30607000 }, { "epoch": 88.6, "learning_rate": 5.7178570188032605e-06, "loss": 1.8137, "step": 30607500 }, { "epoch": 88.6, "learning_rate": 5.7171348184512786e-06, "loss": 1.7916, "step": 30608000 }, { "epoch": 88.6, "learning_rate": 5.716411170804002e-06, "loss": 1.8012, "step": 30608500 }, { "epoch": 88.6, "learning_rate": 5.715687523156725e-06, "loss": 1.7725, "step": 30609000 }, { "epoch": 88.6, "learning_rate": 5.714963875509449e-06, "loss": 1.7854, "step": 30609500 }, { "epoch": 88.6, "learning_rate": 5.714240227862172e-06, "loss": 1.7898, "step": 30610000 }, { "epoch": 88.6, "learning_rate": 5.713516580214895e-06, "loss": 1.7682, "step": 30610500 }, { "epoch": 88.61, "learning_rate": 5.712792932567618e-06, "loss": 1.8085, "step": 30611000 }, { "epoch": 88.61, "learning_rate": 5.712069284920341e-06, "loss": 1.7981, "step": 30611500 }, { "epoch": 88.61, "learning_rate": 5.711345637273064e-06, "loss": 1.8207, "step": 30612000 }, { "epoch": 88.61, "learning_rate": 5.710621989625788e-06, "loss": 1.7783, "step": 30612500 }, { "epoch": 88.61, "learning_rate": 5.709898341978511e-06, "loss": 1.8069, "step": 30613000 }, { "epoch": 88.61, "learning_rate": 5.709174694331234e-06, "loss": 1.7969, "step": 30613500 }, { "epoch": 88.61, "learning_rate": 5.708451046683957e-06, "loss": 1.8241, "step": 30614000 }, { "epoch": 88.62, "learning_rate": 5.70772739903668e-06, "loss": 1.8198, "step": 30614500 }, { "epoch": 88.62, "learning_rate": 5.707003751389404e-06, "loss": 1.8067, "step": 30615000 }, { "epoch": 88.62, "learning_rate": 5.7062801037421265e-06, "loss": 1.8086, "step": 30615500 }, { "epoch": 88.62, "learning_rate": 5.70555645609485e-06, "loss": 1.7782, "step": 30616000 }, { "epoch": 88.62, "learning_rate": 5.704834255742868e-06, "loss": 1.8019, "step": 30616500 }, { "epoch": 88.62, "learning_rate": 5.7041106080955916e-06, "loss": 1.7863, "step": 30617000 }, { "epoch": 88.63, "learning_rate": 5.703386960448315e-06, "loss": 1.8029, "step": 30617500 }, { "epoch": 88.63, "learning_rate": 5.702663312801038e-06, "loss": 1.803, "step": 30618000 }, { "epoch": 88.63, "learning_rate": 5.701941112449055e-06, "loss": 1.8282, "step": 30618500 }, { "epoch": 88.63, "learning_rate": 5.701217464801779e-06, "loss": 1.7977, "step": 30619000 }, { "epoch": 88.63, "learning_rate": 5.700493817154502e-06, "loss": 1.8013, "step": 30619500 }, { "epoch": 88.63, "learning_rate": 5.699770169507225e-06, "loss": 1.7682, "step": 30620000 }, { "epoch": 88.63, "learning_rate": 5.699047969155243e-06, "loss": 1.8183, "step": 30620500 }, { "epoch": 88.64, "learning_rate": 5.698324321507966e-06, "loss": 1.805, "step": 30621000 }, { "epoch": 88.64, "learning_rate": 5.6976021211559835e-06, "loss": 1.7903, "step": 30621500 }, { "epoch": 88.64, "learning_rate": 5.6968784735087074e-06, "loss": 1.7912, "step": 30622000 }, { "epoch": 88.64, "learning_rate": 5.6961548258614305e-06, "loss": 1.8305, "step": 30622500 }, { "epoch": 88.64, "learning_rate": 5.695431178214154e-06, "loss": 1.8131, "step": 30623000 }, { "epoch": 88.64, "learning_rate": 5.694707530566877e-06, "loss": 1.7775, "step": 30623500 }, { "epoch": 88.64, "learning_rate": 5.6939838829196e-06, "loss": 1.7761, "step": 30624000 }, { "epoch": 88.65, "learning_rate": 5.693260235272324e-06, "loss": 1.7865, "step": 30624500 }, { "epoch": 88.65, "learning_rate": 5.692536587625046e-06, "loss": 1.8031, "step": 30625000 }, { "epoch": 88.65, "learning_rate": 5.69181293997777e-06, "loss": 1.7714, "step": 30625500 }, { "epoch": 88.65, "learning_rate": 5.691089292330493e-06, "loss": 1.7814, "step": 30626000 }, { "epoch": 88.65, "learning_rate": 5.690365644683216e-06, "loss": 1.8006, "step": 30626500 }, { "epoch": 88.65, "learning_rate": 5.68964199703594e-06, "loss": 1.8309, "step": 30627000 }, { "epoch": 88.65, "learning_rate": 5.688919796683957e-06, "loss": 1.7916, "step": 30627500 }, { "epoch": 88.66, "learning_rate": 5.688196149036681e-06, "loss": 1.8033, "step": 30628000 }, { "epoch": 88.66, "learning_rate": 5.687472501389403e-06, "loss": 1.793, "step": 30628500 }, { "epoch": 88.66, "learning_rate": 5.686748853742127e-06, "loss": 1.779, "step": 30629000 }, { "epoch": 88.66, "learning_rate": 5.6860252060948495e-06, "loss": 1.7958, "step": 30629500 }, { "epoch": 88.66, "learning_rate": 5.6853015584475734e-06, "loss": 1.7868, "step": 30630000 }, { "epoch": 88.66, "learning_rate": 5.684579358095591e-06, "loss": 1.7896, "step": 30630500 }, { "epoch": 88.66, "learning_rate": 5.683855710448315e-06, "loss": 1.7954, "step": 30631000 }, { "epoch": 88.67, "learning_rate": 5.683132062801038e-06, "loss": 1.7954, "step": 30631500 }, { "epoch": 88.67, "learning_rate": 5.682408415153761e-06, "loss": 1.8205, "step": 30632000 }, { "epoch": 88.67, "learning_rate": 5.681684767506485e-06, "loss": 1.811, "step": 30632500 }, { "epoch": 88.67, "learning_rate": 5.680961119859207e-06, "loss": 1.8114, "step": 30633000 }, { "epoch": 88.67, "learning_rate": 5.680237472211931e-06, "loss": 1.8298, "step": 30633500 }, { "epoch": 88.67, "learning_rate": 5.679513824564654e-06, "loss": 1.813, "step": 30634000 }, { "epoch": 88.67, "learning_rate": 5.678791624212672e-06, "loss": 1.7879, "step": 30634500 }, { "epoch": 88.68, "learning_rate": 5.678067976565394e-06, "loss": 1.8003, "step": 30635000 }, { "epoch": 88.68, "learning_rate": 5.677344328918118e-06, "loss": 1.7795, "step": 30635500 }, { "epoch": 88.68, "learning_rate": 5.676620681270841e-06, "loss": 1.8235, "step": 30636000 }, { "epoch": 88.68, "learning_rate": 5.675897033623564e-06, "loss": 1.8343, "step": 30636500 }, { "epoch": 88.68, "learning_rate": 5.675173385976288e-06, "loss": 1.7961, "step": 30637000 }, { "epoch": 88.68, "learning_rate": 5.6744497383290105e-06, "loss": 1.8055, "step": 30637500 }, { "epoch": 88.68, "learning_rate": 5.6737275379770295e-06, "loss": 1.8124, "step": 30638000 }, { "epoch": 88.69, "learning_rate": 5.673003890329752e-06, "loss": 1.789, "step": 30638500 }, { "epoch": 88.69, "learning_rate": 5.672280242682476e-06, "loss": 1.7971, "step": 30639000 }, { "epoch": 88.69, "learning_rate": 5.671556595035199e-06, "loss": 1.8077, "step": 30639500 }, { "epoch": 88.69, "learning_rate": 5.670834394683217e-06, "loss": 1.7789, "step": 30640000 }, { "epoch": 88.69, "learning_rate": 5.670110747035939e-06, "loss": 1.7791, "step": 30640500 }, { "epoch": 88.69, "learning_rate": 5.669387099388663e-06, "loss": 1.786, "step": 30641000 }, { "epoch": 88.69, "learning_rate": 5.668663451741386e-06, "loss": 1.7833, "step": 30641500 }, { "epoch": 88.7, "learning_rate": 5.667939804094109e-06, "loss": 1.8115, "step": 30642000 }, { "epoch": 88.7, "learning_rate": 5.667216156446833e-06, "loss": 1.823, "step": 30642500 }, { "epoch": 88.7, "learning_rate": 5.666492508799555e-06, "loss": 1.8087, "step": 30643000 }, { "epoch": 88.7, "learning_rate": 5.665768861152279e-06, "loss": 1.7957, "step": 30643500 }, { "epoch": 88.7, "learning_rate": 5.6650466608002965e-06, "loss": 1.7878, "step": 30644000 }, { "epoch": 88.7, "learning_rate": 5.66432301315302e-06, "loss": 1.7832, "step": 30644500 }, { "epoch": 88.7, "learning_rate": 5.663600812801038e-06, "loss": 1.8199, "step": 30645000 }, { "epoch": 88.71, "learning_rate": 5.6628771651537616e-06, "loss": 1.797, "step": 30645500 }, { "epoch": 88.71, "learning_rate": 5.662153517506484e-06, "loss": 1.7934, "step": 30646000 }, { "epoch": 88.71, "learning_rate": 5.661429869859208e-06, "loss": 1.7852, "step": 30646500 }, { "epoch": 88.71, "learning_rate": 5.660706222211931e-06, "loss": 1.7912, "step": 30647000 }, { "epoch": 88.71, "learning_rate": 5.659984021859949e-06, "loss": 1.7813, "step": 30647500 }, { "epoch": 88.71, "learning_rate": 5.659260374212671e-06, "loss": 1.7826, "step": 30648000 }, { "epoch": 88.71, "learning_rate": 5.658536726565395e-06, "loss": 1.8113, "step": 30648500 }, { "epoch": 88.72, "learning_rate": 5.657813078918118e-06, "loss": 1.7862, "step": 30649000 }, { "epoch": 88.72, "learning_rate": 5.657089431270841e-06, "loss": 1.7931, "step": 30649500 }, { "epoch": 88.72, "learning_rate": 5.656365783623564e-06, "loss": 1.7944, "step": 30650000 }, { "epoch": 88.72, "learning_rate": 5.655642135976287e-06, "loss": 1.7972, "step": 30650500 }, { "epoch": 88.72, "learning_rate": 5.654918488329011e-06, "loss": 1.8354, "step": 30651000 }, { "epoch": 88.72, "learning_rate": 5.654194840681734e-06, "loss": 1.8296, "step": 30651500 }, { "epoch": 88.72, "learning_rate": 5.6534711930344575e-06, "loss": 1.8018, "step": 30652000 }, { "epoch": 88.73, "learning_rate": 5.6527475453871806e-06, "loss": 1.7842, "step": 30652500 }, { "epoch": 88.73, "learning_rate": 5.652023897739904e-06, "loss": 1.7879, "step": 30653000 }, { "epoch": 88.73, "learning_rate": 5.651301697387922e-06, "loss": 1.7964, "step": 30653500 }, { "epoch": 88.73, "learning_rate": 5.65057949703594e-06, "loss": 1.7833, "step": 30654000 }, { "epoch": 88.73, "learning_rate": 5.649855849388663e-06, "loss": 1.7797, "step": 30654500 }, { "epoch": 88.73, "learning_rate": 5.649132201741386e-06, "loss": 1.7853, "step": 30655000 }, { "epoch": 88.74, "learning_rate": 5.648410001389404e-06, "loss": 1.816, "step": 30655500 }, { "epoch": 88.74, "learning_rate": 5.647686353742127e-06, "loss": 1.7918, "step": 30656000 }, { "epoch": 88.74, "learning_rate": 5.64696270609485e-06, "loss": 1.8024, "step": 30656500 }, { "epoch": 88.74, "learning_rate": 5.646239058447573e-06, "loss": 1.8383, "step": 30657000 }, { "epoch": 88.74, "learning_rate": 5.645515410800296e-06, "loss": 1.7849, "step": 30657500 }, { "epoch": 88.74, "learning_rate": 5.64479176315302e-06, "loss": 1.7806, "step": 30658000 }, { "epoch": 88.74, "learning_rate": 5.644068115505743e-06, "loss": 1.7767, "step": 30658500 }, { "epoch": 88.75, "learning_rate": 5.643345915153761e-06, "loss": 1.8079, "step": 30659000 }, { "epoch": 88.75, "learning_rate": 5.642622267506484e-06, "loss": 1.8202, "step": 30659500 }, { "epoch": 88.75, "learning_rate": 5.641898619859208e-06, "loss": 1.8004, "step": 30660000 }, { "epoch": 88.75, "learning_rate": 5.641174972211931e-06, "loss": 1.7922, "step": 30660500 }, { "epoch": 88.75, "learning_rate": 5.640451324564654e-06, "loss": 1.7893, "step": 30661000 }, { "epoch": 88.75, "learning_rate": 5.639727676917377e-06, "loss": 1.7877, "step": 30661500 }, { "epoch": 88.75, "learning_rate": 5.639005476565395e-06, "loss": 1.8066, "step": 30662000 }, { "epoch": 88.76, "learning_rate": 5.638281828918118e-06, "loss": 1.7778, "step": 30662500 }, { "epoch": 88.76, "learning_rate": 5.637558181270841e-06, "loss": 1.81, "step": 30663000 }, { "epoch": 88.76, "learning_rate": 5.636834533623564e-06, "loss": 1.7979, "step": 30663500 }, { "epoch": 88.76, "learning_rate": 5.636110885976287e-06, "loss": 1.8129, "step": 30664000 }, { "epoch": 88.76, "learning_rate": 5.6353886856243054e-06, "loss": 1.8271, "step": 30664500 }, { "epoch": 88.76, "learning_rate": 5.6346650379770285e-06, "loss": 1.8082, "step": 30665000 }, { "epoch": 88.76, "learning_rate": 5.6339413903297524e-06, "loss": 1.7953, "step": 30665500 }, { "epoch": 88.77, "learning_rate": 5.6332177426824755e-06, "loss": 1.7943, "step": 30666000 }, { "epoch": 88.77, "learning_rate": 5.632494095035199e-06, "loss": 1.8298, "step": 30666500 }, { "epoch": 88.77, "learning_rate": 5.631770447387922e-06, "loss": 1.8113, "step": 30667000 }, { "epoch": 88.77, "learning_rate": 5.631046799740645e-06, "loss": 1.8061, "step": 30667500 }, { "epoch": 88.77, "learning_rate": 5.630323152093369e-06, "loss": 1.8041, "step": 30668000 }, { "epoch": 88.77, "learning_rate": 5.629599504446091e-06, "loss": 1.7945, "step": 30668500 }, { "epoch": 88.77, "learning_rate": 5.628875856798815e-06, "loss": 1.7936, "step": 30669000 }, { "epoch": 88.78, "learning_rate": 5.628152209151538e-06, "loss": 1.8096, "step": 30669500 }, { "epoch": 88.78, "learning_rate": 5.627430008799556e-06, "loss": 1.8121, "step": 30670000 }, { "epoch": 88.78, "learning_rate": 5.626706361152278e-06, "loss": 1.8047, "step": 30670500 }, { "epoch": 88.78, "learning_rate": 5.625982713505002e-06, "loss": 1.7901, "step": 30671000 }, { "epoch": 88.78, "learning_rate": 5.625259065857725e-06, "loss": 1.8314, "step": 30671500 }, { "epoch": 88.78, "learning_rate": 5.624536865505743e-06, "loss": 1.8086, "step": 30672000 }, { "epoch": 88.78, "learning_rate": 5.6238132178584664e-06, "loss": 1.81, "step": 30672500 }, { "epoch": 88.79, "learning_rate": 5.6230895702111895e-06, "loss": 1.7877, "step": 30673000 }, { "epoch": 88.79, "learning_rate": 5.6223659225639135e-06, "loss": 1.7968, "step": 30673500 }, { "epoch": 88.79, "learning_rate": 5.621642274916636e-06, "loss": 1.792, "step": 30674000 }, { "epoch": 88.79, "learning_rate": 5.620920074564654e-06, "loss": 1.7781, "step": 30674500 }, { "epoch": 88.79, "learning_rate": 5.620196426917377e-06, "loss": 1.7894, "step": 30675000 }, { "epoch": 88.79, "learning_rate": 5.619472779270101e-06, "loss": 1.7805, "step": 30675500 }, { "epoch": 88.79, "learning_rate": 5.618749131622823e-06, "loss": 1.8162, "step": 30676000 }, { "epoch": 88.8, "learning_rate": 5.618025483975547e-06, "loss": 1.7985, "step": 30676500 }, { "epoch": 88.8, "learning_rate": 5.61730183632827e-06, "loss": 1.831, "step": 30677000 }, { "epoch": 88.8, "learning_rate": 5.616579635976288e-06, "loss": 1.7804, "step": 30677500 }, { "epoch": 88.8, "learning_rate": 5.61585598832901e-06, "loss": 1.8018, "step": 30678000 }, { "epoch": 88.8, "learning_rate": 5.615132340681734e-06, "loss": 1.793, "step": 30678500 }, { "epoch": 88.8, "learning_rate": 5.614408693034457e-06, "loss": 1.8014, "step": 30679000 }, { "epoch": 88.8, "learning_rate": 5.6136864926824755e-06, "loss": 1.8108, "step": 30679500 }, { "epoch": 88.81, "learning_rate": 5.6129628450351985e-06, "loss": 1.7938, "step": 30680000 }, { "epoch": 88.81, "learning_rate": 5.612239197387922e-06, "loss": 1.797, "step": 30680500 }, { "epoch": 88.81, "learning_rate": 5.6115155497406456e-06, "loss": 1.7892, "step": 30681000 }, { "epoch": 88.81, "learning_rate": 5.610791902093368e-06, "loss": 1.8063, "step": 30681500 }, { "epoch": 88.81, "learning_rate": 5.610069701741386e-06, "loss": 1.7641, "step": 30682000 }, { "epoch": 88.81, "learning_rate": 5.609346054094109e-06, "loss": 1.7891, "step": 30682500 }, { "epoch": 88.81, "learning_rate": 5.608622406446833e-06, "loss": 1.7897, "step": 30683000 }, { "epoch": 88.82, "learning_rate": 5.607898758799555e-06, "loss": 1.7908, "step": 30683500 }, { "epoch": 88.82, "learning_rate": 5.607175111152279e-06, "loss": 1.8167, "step": 30684000 }, { "epoch": 88.82, "learning_rate": 5.606451463505002e-06, "loss": 1.7911, "step": 30684500 }, { "epoch": 88.82, "learning_rate": 5.605727815857725e-06, "loss": 1.7907, "step": 30685000 }, { "epoch": 88.82, "learning_rate": 5.605004168210449e-06, "loss": 1.7749, "step": 30685500 }, { "epoch": 88.82, "learning_rate": 5.604281967858466e-06, "loss": 1.8016, "step": 30686000 }, { "epoch": 88.82, "learning_rate": 5.60355832021119e-06, "loss": 1.7762, "step": 30686500 }, { "epoch": 88.83, "learning_rate": 5.6028346725639126e-06, "loss": 1.7863, "step": 30687000 }, { "epoch": 88.83, "learning_rate": 5.602112472211931e-06, "loss": 1.8204, "step": 30687500 }, { "epoch": 88.83, "learning_rate": 5.601388824564654e-06, "loss": 1.8093, "step": 30688000 }, { "epoch": 88.83, "learning_rate": 5.600665176917378e-06, "loss": 1.7907, "step": 30688500 }, { "epoch": 88.83, "learning_rate": 5.5999415292701e-06, "loss": 1.7873, "step": 30689000 }, { "epoch": 88.83, "learning_rate": 5.599217881622824e-06, "loss": 1.7766, "step": 30689500 }, { "epoch": 88.83, "learning_rate": 5.598494233975547e-06, "loss": 1.8062, "step": 30690000 }, { "epoch": 88.84, "learning_rate": 5.59777058632827e-06, "loss": 1.8027, "step": 30690500 }, { "epoch": 88.84, "learning_rate": 5.597046938680993e-06, "loss": 1.8039, "step": 30691000 }, { "epoch": 88.84, "learning_rate": 5.596323291033716e-06, "loss": 1.795, "step": 30691500 }, { "epoch": 88.84, "learning_rate": 5.59559964338644e-06, "loss": 1.8143, "step": 30692000 }, { "epoch": 88.84, "learning_rate": 5.594875995739163e-06, "loss": 1.7956, "step": 30692500 }, { "epoch": 88.84, "learning_rate": 5.594153795387181e-06, "loss": 1.7918, "step": 30693000 }, { "epoch": 88.85, "learning_rate": 5.5934315950351985e-06, "loss": 1.7997, "step": 30693500 }, { "epoch": 88.85, "learning_rate": 5.5927079473879216e-06, "loss": 1.777, "step": 30694000 }, { "epoch": 88.85, "learning_rate": 5.591984299740645e-06, "loss": 1.833, "step": 30694500 }, { "epoch": 88.85, "learning_rate": 5.591260652093369e-06, "loss": 1.8051, "step": 30695000 }, { "epoch": 88.85, "learning_rate": 5.590537004446092e-06, "loss": 1.8037, "step": 30695500 }, { "epoch": 88.85, "learning_rate": 5.589813356798815e-06, "loss": 1.7909, "step": 30696000 }, { "epoch": 88.85, "learning_rate": 5.589089709151538e-06, "loss": 1.7984, "step": 30696500 }, { "epoch": 88.86, "learning_rate": 5.588366061504261e-06, "loss": 1.8042, "step": 30697000 }, { "epoch": 88.86, "learning_rate": 5.587642413856985e-06, "loss": 1.8021, "step": 30697500 }, { "epoch": 88.86, "learning_rate": 5.586920213505002e-06, "loss": 1.7887, "step": 30698000 }, { "epoch": 88.86, "learning_rate": 5.586196565857725e-06, "loss": 1.7873, "step": 30698500 }, { "epoch": 88.86, "learning_rate": 5.585472918210448e-06, "loss": 1.8064, "step": 30699000 }, { "epoch": 88.86, "learning_rate": 5.584749270563172e-06, "loss": 1.7902, "step": 30699500 }, { "epoch": 88.86, "learning_rate": 5.584025622915895e-06, "loss": 1.8006, "step": 30700000 }, { "epoch": 88.87, "learning_rate": 5.583303422563913e-06, "loss": 1.7635, "step": 30700500 }, { "epoch": 88.87, "learning_rate": 5.582579774916636e-06, "loss": 1.8105, "step": 30701000 }, { "epoch": 88.87, "learning_rate": 5.5818561272693595e-06, "loss": 1.8069, "step": 30701500 }, { "epoch": 88.87, "learning_rate": 5.581132479622083e-06, "loss": 1.7985, "step": 30702000 }, { "epoch": 88.87, "learning_rate": 5.580408831974806e-06, "loss": 1.8187, "step": 30702500 }, { "epoch": 88.87, "learning_rate": 5.579686631622824e-06, "loss": 1.8041, "step": 30703000 }, { "epoch": 88.87, "learning_rate": 5.578962983975547e-06, "loss": 1.7908, "step": 30703500 }, { "epoch": 88.88, "learning_rate": 5.57823933632827e-06, "loss": 1.7976, "step": 30704000 }, { "epoch": 88.88, "learning_rate": 5.577515688680993e-06, "loss": 1.7835, "step": 30704500 }, { "epoch": 88.88, "learning_rate": 5.576792041033716e-06, "loss": 1.7911, "step": 30705000 }, { "epoch": 88.88, "learning_rate": 5.57606839338644e-06, "loss": 1.8084, "step": 30705500 }, { "epoch": 88.88, "learning_rate": 5.575344745739163e-06, "loss": 1.8149, "step": 30706000 }, { "epoch": 88.88, "learning_rate": 5.57462254538718e-06, "loss": 1.7834, "step": 30706500 }, { "epoch": 88.88, "learning_rate": 5.573898897739904e-06, "loss": 1.774, "step": 30707000 }, { "epoch": 88.89, "learning_rate": 5.573175250092627e-06, "loss": 1.7932, "step": 30707500 }, { "epoch": 88.89, "learning_rate": 5.5724516024453504e-06, "loss": 1.8095, "step": 30708000 }, { "epoch": 88.89, "learning_rate": 5.5717279547980735e-06, "loss": 1.7979, "step": 30708500 }, { "epoch": 88.89, "learning_rate": 5.571005754446092e-06, "loss": 1.8277, "step": 30709000 }, { "epoch": 88.89, "learning_rate": 5.570282106798815e-06, "loss": 1.8034, "step": 30709500 }, { "epoch": 88.89, "learning_rate": 5.569558459151538e-06, "loss": 1.8037, "step": 30710000 }, { "epoch": 88.89, "learning_rate": 5.568834811504261e-06, "loss": 1.7848, "step": 30710500 }, { "epoch": 88.9, "learning_rate": 5.568111163856985e-06, "loss": 1.8218, "step": 30711000 }, { "epoch": 88.9, "learning_rate": 5.567387516209707e-06, "loss": 1.8063, "step": 30711500 }, { "epoch": 88.9, "learning_rate": 5.566663868562431e-06, "loss": 1.7922, "step": 30712000 }, { "epoch": 88.9, "learning_rate": 5.565940220915154e-06, "loss": 1.8251, "step": 30712500 }, { "epoch": 88.9, "learning_rate": 5.565218020563172e-06, "loss": 1.7845, "step": 30713000 }, { "epoch": 88.9, "learning_rate": 5.564495820211189e-06, "loss": 1.7928, "step": 30713500 }, { "epoch": 88.9, "learning_rate": 5.5637736198592075e-06, "loss": 1.7906, "step": 30714000 }, { "epoch": 88.91, "learning_rate": 5.5630499722119305e-06, "loss": 1.799, "step": 30714500 }, { "epoch": 88.91, "learning_rate": 5.562326324564654e-06, "loss": 1.7983, "step": 30715000 }, { "epoch": 88.91, "learning_rate": 5.561602676917377e-06, "loss": 1.7958, "step": 30715500 }, { "epoch": 88.91, "learning_rate": 5.560879029270101e-06, "loss": 1.8031, "step": 30716000 }, { "epoch": 88.91, "learning_rate": 5.560155381622824e-06, "loss": 1.8298, "step": 30716500 }, { "epoch": 88.91, "learning_rate": 5.559431733975547e-06, "loss": 1.7925, "step": 30717000 }, { "epoch": 88.91, "learning_rate": 5.558709533623564e-06, "loss": 1.7972, "step": 30717500 }, { "epoch": 88.92, "learning_rate": 5.557985885976288e-06, "loss": 1.7824, "step": 30718000 }, { "epoch": 88.92, "learning_rate": 5.557262238329011e-06, "loss": 1.8053, "step": 30718500 }, { "epoch": 88.92, "learning_rate": 5.556538590681734e-06, "loss": 1.7924, "step": 30719000 }, { "epoch": 88.92, "learning_rate": 5.555814943034457e-06, "loss": 1.8079, "step": 30719500 }, { "epoch": 88.92, "learning_rate": 5.55509129538718e-06, "loss": 1.7759, "step": 30720000 }, { "epoch": 88.92, "learning_rate": 5.554367647739904e-06, "loss": 1.7941, "step": 30720500 }, { "epoch": 88.92, "learning_rate": 5.553644000092627e-06, "loss": 1.8203, "step": 30721000 }, { "epoch": 88.93, "learning_rate": 5.55292035244535e-06, "loss": 1.8101, "step": 30721500 }, { "epoch": 88.93, "learning_rate": 5.5521967047980735e-06, "loss": 1.7978, "step": 30722000 }, { "epoch": 88.93, "learning_rate": 5.5514745044460916e-06, "loss": 1.8237, "step": 30722500 }, { "epoch": 88.93, "learning_rate": 5.550750856798815e-06, "loss": 1.8012, "step": 30723000 }, { "epoch": 88.93, "learning_rate": 5.550027209151538e-06, "loss": 1.8024, "step": 30723500 }, { "epoch": 88.93, "learning_rate": 5.549303561504262e-06, "loss": 1.7896, "step": 30724000 }, { "epoch": 88.93, "learning_rate": 5.548579913856984e-06, "loss": 1.7697, "step": 30724500 }, { "epoch": 88.94, "learning_rate": 5.547856266209708e-06, "loss": 1.8178, "step": 30725000 }, { "epoch": 88.94, "learning_rate": 5.547134065857725e-06, "loss": 1.8021, "step": 30725500 }, { "epoch": 88.94, "learning_rate": 5.546410418210449e-06, "loss": 1.8111, "step": 30726000 }, { "epoch": 88.94, "learning_rate": 5.545686770563171e-06, "loss": 1.7917, "step": 30726500 }, { "epoch": 88.94, "learning_rate": 5.544963122915895e-06, "loss": 1.8032, "step": 30727000 }, { "epoch": 88.94, "learning_rate": 5.544240922563912e-06, "loss": 1.8122, "step": 30727500 }, { "epoch": 88.94, "learning_rate": 5.543517274916636e-06, "loss": 1.7707, "step": 30728000 }, { "epoch": 88.95, "learning_rate": 5.542795074564654e-06, "loss": 1.8128, "step": 30728500 }, { "epoch": 88.95, "learning_rate": 5.5420714269173775e-06, "loss": 1.7955, "step": 30729000 }, { "epoch": 88.95, "learning_rate": 5.5413477792701e-06, "loss": 1.8046, "step": 30729500 }, { "epoch": 88.95, "learning_rate": 5.540624131622824e-06, "loss": 1.8012, "step": 30730000 }, { "epoch": 88.95, "learning_rate": 5.539900483975547e-06, "loss": 1.7883, "step": 30730500 }, { "epoch": 88.95, "learning_rate": 5.53917683632827e-06, "loss": 1.775, "step": 30731000 }, { "epoch": 88.96, "learning_rate": 5.538453188680994e-06, "loss": 1.8022, "step": 30731500 }, { "epoch": 88.96, "learning_rate": 5.537729541033716e-06, "loss": 1.7962, "step": 30732000 }, { "epoch": 88.96, "learning_rate": 5.537007340681735e-06, "loss": 1.7755, "step": 30732500 }, { "epoch": 88.96, "learning_rate": 5.536283693034457e-06, "loss": 1.7947, "step": 30733000 }, { "epoch": 88.96, "learning_rate": 5.535560045387181e-06, "loss": 1.8348, "step": 30733500 }, { "epoch": 88.96, "learning_rate": 5.534836397739903e-06, "loss": 1.8037, "step": 30734000 }, { "epoch": 88.96, "learning_rate": 5.534112750092627e-06, "loss": 1.7875, "step": 30734500 }, { "epoch": 88.97, "learning_rate": 5.53338910244535e-06, "loss": 1.8079, "step": 30735000 }, { "epoch": 88.97, "learning_rate": 5.532665454798073e-06, "loss": 1.8044, "step": 30735500 }, { "epoch": 88.97, "learning_rate": 5.5319432544460915e-06, "loss": 1.7965, "step": 30736000 }, { "epoch": 88.97, "learning_rate": 5.531219606798815e-06, "loss": 1.8064, "step": 30736500 }, { "epoch": 88.97, "learning_rate": 5.5304959591515385e-06, "loss": 1.7789, "step": 30737000 }, { "epoch": 88.97, "learning_rate": 5.529772311504261e-06, "loss": 1.7842, "step": 30737500 }, { "epoch": 88.97, "learning_rate": 5.529048663856985e-06, "loss": 1.8073, "step": 30738000 }, { "epoch": 88.98, "learning_rate": 5.528325016209708e-06, "loss": 1.7941, "step": 30738500 }, { "epoch": 88.98, "learning_rate": 5.527602815857726e-06, "loss": 1.7868, "step": 30739000 }, { "epoch": 88.98, "learning_rate": 5.526879168210448e-06, "loss": 1.783, "step": 30739500 }, { "epoch": 88.98, "learning_rate": 5.526155520563172e-06, "loss": 1.8051, "step": 30740000 }, { "epoch": 88.98, "learning_rate": 5.525431872915895e-06, "loss": 1.7868, "step": 30740500 }, { "epoch": 88.98, "learning_rate": 5.524709672563913e-06, "loss": 1.8062, "step": 30741000 }, { "epoch": 88.98, "learning_rate": 5.523986024916636e-06, "loss": 1.8043, "step": 30741500 }, { "epoch": 88.99, "learning_rate": 5.523262377269359e-06, "loss": 1.8041, "step": 30742000 }, { "epoch": 88.99, "learning_rate": 5.5225387296220824e-06, "loss": 1.7929, "step": 30742500 }, { "epoch": 88.99, "learning_rate": 5.5218150819748055e-06, "loss": 1.8043, "step": 30743000 }, { "epoch": 88.99, "learning_rate": 5.521092881622824e-06, "loss": 1.7979, "step": 30743500 }, { "epoch": 88.99, "learning_rate": 5.520369233975547e-06, "loss": 1.805, "step": 30744000 }, { "epoch": 88.99, "learning_rate": 5.519645586328271e-06, "loss": 1.7892, "step": 30744500 }, { "epoch": 88.99, "learning_rate": 5.518921938680993e-06, "loss": 1.8007, "step": 30745000 }, { "epoch": 89.0, "learning_rate": 5.518199738329011e-06, "loss": 1.8306, "step": 30745500 }, { "epoch": 89.0, "learning_rate": 5.517476090681734e-06, "loss": 1.8245, "step": 30746000 }, { "epoch": 89.0, "learning_rate": 5.516752443034458e-06, "loss": 1.7965, "step": 30746500 }, { "epoch": 89.0, "learning_rate": 5.51602879538718e-06, "loss": 1.776, "step": 30747000 }, { "epoch": 89.0, "eval_accuracy": 0.6898743273065717, "eval_accuracy_mlm": 0.6590290488740586, "eval_accuracy_nsp": 0.855270334062046, "eval_loss": 2.2004992961883545, "eval_runtime": 332.1031, "eval_samples_per_second": 1314.008, "eval_steps_per_second": 54.751, "step": 30747008 }, { "epoch": 89.0, "learning_rate": 5.515305147739904e-06, "loss": 1.7704, "step": 30747500 }, { "epoch": 89.0, "learning_rate": 5.514582947387921e-06, "loss": 1.7767, "step": 30748000 }, { "epoch": 89.0, "learning_rate": 5.513859299740645e-06, "loss": 1.8196, "step": 30748500 }, { "epoch": 89.01, "learning_rate": 5.5131370993886625e-06, "loss": 1.7982, "step": 30749000 }, { "epoch": 89.01, "learning_rate": 5.5124134517413865e-06, "loss": 1.7849, "step": 30749500 }, { "epoch": 89.01, "learning_rate": 5.511691251389404e-06, "loss": 1.8098, "step": 30750000 }, { "epoch": 89.01, "learning_rate": 5.510967603742127e-06, "loss": 1.7711, "step": 30750500 }, { "epoch": 89.01, "learning_rate": 5.51024395609485e-06, "loss": 1.8025, "step": 30751000 }, { "epoch": 89.01, "learning_rate": 5.509520308447574e-06, "loss": 1.7797, "step": 30751500 }, { "epoch": 89.01, "learning_rate": 5.508796660800297e-06, "loss": 1.7952, "step": 30752000 }, { "epoch": 89.02, "learning_rate": 5.50807301315302e-06, "loss": 1.8069, "step": 30752500 }, { "epoch": 89.02, "learning_rate": 5.507349365505743e-06, "loss": 1.8088, "step": 30753000 }, { "epoch": 89.02, "learning_rate": 5.506625717858466e-06, "loss": 1.8133, "step": 30753500 }, { "epoch": 89.02, "learning_rate": 5.50590207021119e-06, "loss": 1.7769, "step": 30754000 }, { "epoch": 89.02, "learning_rate": 5.505178422563913e-06, "loss": 1.8163, "step": 30754500 }, { "epoch": 89.02, "learning_rate": 5.504454774916636e-06, "loss": 1.7815, "step": 30755000 }, { "epoch": 89.02, "learning_rate": 5.503731127269359e-06, "loss": 1.7708, "step": 30755500 }, { "epoch": 89.03, "learning_rate": 5.503008926917377e-06, "loss": 1.7887, "step": 30756000 }, { "epoch": 89.03, "learning_rate": 5.5022852792701005e-06, "loss": 1.8025, "step": 30756500 }, { "epoch": 89.03, "learning_rate": 5.5015616316228236e-06, "loss": 1.7958, "step": 30757000 }, { "epoch": 89.03, "learning_rate": 5.500837983975547e-06, "loss": 1.7988, "step": 30757500 }, { "epoch": 89.03, "learning_rate": 5.50011433632827e-06, "loss": 1.7975, "step": 30758000 }, { "epoch": 89.03, "learning_rate": 5.499392135976288e-06, "loss": 1.8, "step": 30758500 }, { "epoch": 89.03, "learning_rate": 5.498668488329011e-06, "loss": 1.7667, "step": 30759000 }, { "epoch": 89.04, "learning_rate": 5.497946287977029e-06, "loss": 1.791, "step": 30759500 }, { "epoch": 89.04, "learning_rate": 5.497222640329752e-06, "loss": 1.7851, "step": 30760000 }, { "epoch": 89.04, "learning_rate": 5.496498992682475e-06, "loss": 1.7869, "step": 30760500 }, { "epoch": 89.04, "learning_rate": 5.495775345035198e-06, "loss": 1.7826, "step": 30761000 }, { "epoch": 89.04, "learning_rate": 5.495051697387921e-06, "loss": 1.8174, "step": 30761500 }, { "epoch": 89.04, "learning_rate": 5.494328049740645e-06, "loss": 1.771, "step": 30762000 }, { "epoch": 89.04, "learning_rate": 5.493604402093368e-06, "loss": 1.7957, "step": 30762500 }, { "epoch": 89.05, "learning_rate": 5.492880754446091e-06, "loss": 1.7851, "step": 30763000 }, { "epoch": 89.05, "learning_rate": 5.4921571067988145e-06, "loss": 1.771, "step": 30763500 }, { "epoch": 89.05, "learning_rate": 5.491434906446833e-06, "loss": 1.8031, "step": 30764000 }, { "epoch": 89.05, "learning_rate": 5.490711258799556e-06, "loss": 1.7789, "step": 30764500 }, { "epoch": 89.05, "learning_rate": 5.489989058447574e-06, "loss": 1.826, "step": 30765000 }, { "epoch": 89.05, "learning_rate": 5.489265410800297e-06, "loss": 1.7838, "step": 30765500 }, { "epoch": 89.05, "learning_rate": 5.488543210448314e-06, "loss": 1.7589, "step": 30766000 }, { "epoch": 89.06, "learning_rate": 5.487819562801038e-06, "loss": 1.7799, "step": 30766500 }, { "epoch": 89.06, "learning_rate": 5.487095915153761e-06, "loss": 1.7825, "step": 30767000 }, { "epoch": 89.06, "learning_rate": 5.486372267506484e-06, "loss": 1.805, "step": 30767500 }, { "epoch": 89.06, "learning_rate": 5.485648619859207e-06, "loss": 1.8091, "step": 30768000 }, { "epoch": 89.06, "learning_rate": 5.48492497221193e-06, "loss": 1.7883, "step": 30768500 }, { "epoch": 89.06, "learning_rate": 5.484201324564653e-06, "loss": 1.7935, "step": 30769000 }, { "epoch": 89.07, "learning_rate": 5.483477676917377e-06, "loss": 1.7914, "step": 30769500 }, { "epoch": 89.07, "learning_rate": 5.4827540292701004e-06, "loss": 1.7982, "step": 30770000 }, { "epoch": 89.07, "learning_rate": 5.4820303816228235e-06, "loss": 1.777, "step": 30770500 }, { "epoch": 89.07, "learning_rate": 5.481306733975547e-06, "loss": 1.7725, "step": 30771000 }, { "epoch": 89.07, "learning_rate": 5.48058308632827e-06, "loss": 1.7986, "step": 30771500 }, { "epoch": 89.07, "learning_rate": 5.479859438680994e-06, "loss": 1.7816, "step": 30772000 }, { "epoch": 89.07, "learning_rate": 5.479135791033716e-06, "loss": 1.7746, "step": 30772500 }, { "epoch": 89.08, "learning_rate": 5.478413590681735e-06, "loss": 1.7961, "step": 30773000 }, { "epoch": 89.08, "learning_rate": 5.477689943034457e-06, "loss": 1.8051, "step": 30773500 }, { "epoch": 89.08, "learning_rate": 5.476966295387181e-06, "loss": 1.8136, "step": 30774000 }, { "epoch": 89.08, "learning_rate": 5.476242647739904e-06, "loss": 1.8127, "step": 30774500 }, { "epoch": 89.08, "learning_rate": 5.475520447387922e-06, "loss": 1.7819, "step": 30775000 }, { "epoch": 89.08, "learning_rate": 5.474796799740644e-06, "loss": 1.7883, "step": 30775500 }, { "epoch": 89.08, "learning_rate": 5.474073152093368e-06, "loss": 1.7946, "step": 30776000 }, { "epoch": 89.09, "learning_rate": 5.473349504446091e-06, "loss": 1.7865, "step": 30776500 }, { "epoch": 89.09, "learning_rate": 5.4726258567988144e-06, "loss": 1.789, "step": 30777000 }, { "epoch": 89.09, "learning_rate": 5.4719036564468325e-06, "loss": 1.8155, "step": 30777500 }, { "epoch": 89.09, "learning_rate": 5.471180008799556e-06, "loss": 1.768, "step": 30778000 }, { "epoch": 89.09, "learning_rate": 5.470456361152279e-06, "loss": 1.8028, "step": 30778500 }, { "epoch": 89.09, "learning_rate": 5.469732713505002e-06, "loss": 1.7778, "step": 30779000 }, { "epoch": 89.09, "learning_rate": 5.46901051315302e-06, "loss": 1.8016, "step": 30779500 }, { "epoch": 89.1, "learning_rate": 5.468286865505743e-06, "loss": 1.7807, "step": 30780000 }, { "epoch": 89.1, "learning_rate": 5.467563217858467e-06, "loss": 1.7774, "step": 30780500 }, { "epoch": 89.1, "learning_rate": 5.466839570211189e-06, "loss": 1.7785, "step": 30781000 }, { "epoch": 89.1, "learning_rate": 5.466115922563913e-06, "loss": 1.7757, "step": 30781500 }, { "epoch": 89.1, "learning_rate": 5.465392274916636e-06, "loss": 1.7665, "step": 30782000 }, { "epoch": 89.1, "learning_rate": 5.464668627269359e-06, "loss": 1.8119, "step": 30782500 }, { "epoch": 89.1, "learning_rate": 5.463944979622083e-06, "loss": 1.7855, "step": 30783000 }, { "epoch": 89.11, "learning_rate": 5.463221331974805e-06, "loss": 1.7648, "step": 30783500 }, { "epoch": 89.11, "learning_rate": 5.462497684327529e-06, "loss": 1.7818, "step": 30784000 }, { "epoch": 89.11, "learning_rate": 5.461774036680252e-06, "loss": 1.7665, "step": 30784500 }, { "epoch": 89.11, "learning_rate": 5.4610518363282705e-06, "loss": 1.795, "step": 30785000 }, { "epoch": 89.11, "learning_rate": 5.460328188680993e-06, "loss": 1.781, "step": 30785500 }, { "epoch": 89.11, "learning_rate": 5.459604541033717e-06, "loss": 1.7861, "step": 30786000 }, { "epoch": 89.11, "learning_rate": 5.45888089338644e-06, "loss": 1.8029, "step": 30786500 }, { "epoch": 89.12, "learning_rate": 5.458158693034458e-06, "loss": 1.8129, "step": 30787000 }, { "epoch": 89.12, "learning_rate": 5.45743504538718e-06, "loss": 1.8179, "step": 30787500 }, { "epoch": 89.12, "learning_rate": 5.456711397739904e-06, "loss": 1.7811, "step": 30788000 }, { "epoch": 89.12, "learning_rate": 5.455987750092628e-06, "loss": 1.7897, "step": 30788500 }, { "epoch": 89.12, "learning_rate": 5.45526410244535e-06, "loss": 1.7958, "step": 30789000 }, { "epoch": 89.12, "learning_rate": 5.454540454798074e-06, "loss": 1.8085, "step": 30789500 }, { "epoch": 89.12, "learning_rate": 5.453816807150796e-06, "loss": 1.786, "step": 30790000 }, { "epoch": 89.13, "learning_rate": 5.45309315950352e-06, "loss": 1.7876, "step": 30790500 }, { "epoch": 89.13, "learning_rate": 5.452369511856243e-06, "loss": 1.7974, "step": 30791000 }, { "epoch": 89.13, "learning_rate": 5.451647311504261e-06, "loss": 1.7876, "step": 30791500 }, { "epoch": 89.13, "learning_rate": 5.4509236638569845e-06, "loss": 1.8097, "step": 30792000 }, { "epoch": 89.13, "learning_rate": 5.4502000162097076e-06, "loss": 1.7785, "step": 30792500 }, { "epoch": 89.13, "learning_rate": 5.449476368562431e-06, "loss": 1.7882, "step": 30793000 }, { "epoch": 89.13, "learning_rate": 5.448752720915154e-06, "loss": 1.7842, "step": 30793500 }, { "epoch": 89.14, "learning_rate": 5.448029073267878e-06, "loss": 1.7723, "step": 30794000 }, { "epoch": 89.14, "learning_rate": 5.447306872915895e-06, "loss": 1.7749, "step": 30794500 }, { "epoch": 89.14, "learning_rate": 5.446583225268619e-06, "loss": 1.7977, "step": 30795000 }, { "epoch": 89.14, "learning_rate": 5.445859577621341e-06, "loss": 1.8002, "step": 30795500 }, { "epoch": 89.14, "learning_rate": 5.445135929974065e-06, "loss": 1.805, "step": 30796000 }, { "epoch": 89.14, "learning_rate": 5.444413729622082e-06, "loss": 1.8035, "step": 30796500 }, { "epoch": 89.14, "learning_rate": 5.443690081974806e-06, "loss": 1.8027, "step": 30797000 }, { "epoch": 89.15, "learning_rate": 5.442966434327529e-06, "loss": 1.8104, "step": 30797500 }, { "epoch": 89.15, "learning_rate": 5.442242786680252e-06, "loss": 1.7945, "step": 30798000 }, { "epoch": 89.15, "learning_rate": 5.441519139032975e-06, "loss": 1.7549, "step": 30798500 }, { "epoch": 89.15, "learning_rate": 5.4407954913856985e-06, "loss": 1.7933, "step": 30799000 }, { "epoch": 89.15, "learning_rate": 5.440071843738422e-06, "loss": 1.7839, "step": 30799500 }, { "epoch": 89.15, "learning_rate": 5.4393481960911455e-06, "loss": 1.7684, "step": 30800000 }, { "epoch": 89.15, "learning_rate": 5.4386245484438686e-06, "loss": 1.7954, "step": 30800500 }, { "epoch": 89.16, "learning_rate": 5.437902348091886e-06, "loss": 1.7817, "step": 30801000 }, { "epoch": 89.16, "learning_rate": 5.43717870044461e-06, "loss": 1.8115, "step": 30801500 }, { "epoch": 89.16, "learning_rate": 5.436455052797333e-06, "loss": 1.7599, "step": 30802000 }, { "epoch": 89.16, "learning_rate": 5.435731405150056e-06, "loss": 1.7912, "step": 30802500 }, { "epoch": 89.16, "learning_rate": 5.435007757502779e-06, "loss": 1.8003, "step": 30803000 }, { "epoch": 89.16, "learning_rate": 5.434285557150797e-06, "loss": 1.7727, "step": 30803500 }, { "epoch": 89.16, "learning_rate": 5.43356190950352e-06, "loss": 1.8002, "step": 30804000 }, { "epoch": 89.17, "learning_rate": 5.432838261856243e-06, "loss": 1.7995, "step": 30804500 }, { "epoch": 89.17, "learning_rate": 5.432114614208966e-06, "loss": 1.7842, "step": 30805000 }, { "epoch": 89.17, "learning_rate": 5.431390966561689e-06, "loss": 1.792, "step": 30805500 }, { "epoch": 89.17, "learning_rate": 5.4306687662097075e-06, "loss": 1.7899, "step": 30806000 }, { "epoch": 89.17, "learning_rate": 5.429946565857726e-06, "loss": 1.8301, "step": 30806500 }, { "epoch": 89.17, "learning_rate": 5.429222918210449e-06, "loss": 1.7838, "step": 30807000 }, { "epoch": 89.18, "learning_rate": 5.428499270563172e-06, "loss": 1.779, "step": 30807500 }, { "epoch": 89.18, "learning_rate": 5.427775622915895e-06, "loss": 1.7797, "step": 30808000 }, { "epoch": 89.18, "learning_rate": 5.427051975268618e-06, "loss": 1.8396, "step": 30808500 }, { "epoch": 89.18, "learning_rate": 5.426328327621342e-06, "loss": 1.7869, "step": 30809000 }, { "epoch": 89.18, "learning_rate": 5.425604679974065e-06, "loss": 1.7888, "step": 30809500 }, { "epoch": 89.18, "learning_rate": 5.424881032326788e-06, "loss": 1.7768, "step": 30810000 }, { "epoch": 89.18, "learning_rate": 5.424157384679511e-06, "loss": 1.7797, "step": 30810500 }, { "epoch": 89.19, "learning_rate": 5.423433737032234e-06, "loss": 1.7856, "step": 30811000 }, { "epoch": 89.19, "learning_rate": 5.422711536680252e-06, "loss": 1.7926, "step": 30811500 }, { "epoch": 89.19, "learning_rate": 5.421987889032975e-06, "loss": 1.7873, "step": 30812000 }, { "epoch": 89.19, "learning_rate": 5.4212642413856984e-06, "loss": 1.824, "step": 30812500 }, { "epoch": 89.19, "learning_rate": 5.420540593738422e-06, "loss": 1.7751, "step": 30813000 }, { "epoch": 89.19, "learning_rate": 5.419816946091145e-06, "loss": 1.773, "step": 30813500 }, { "epoch": 89.19, "learning_rate": 5.4190932984438685e-06, "loss": 1.7494, "step": 30814000 }, { "epoch": 89.2, "learning_rate": 5.418369650796592e-06, "loss": 1.7919, "step": 30814500 }, { "epoch": 89.2, "learning_rate": 5.417646003149315e-06, "loss": 1.7818, "step": 30815000 }, { "epoch": 89.2, "learning_rate": 5.416925250092627e-06, "loss": 1.7911, "step": 30815500 }, { "epoch": 89.2, "learning_rate": 5.416201602445351e-06, "loss": 1.8029, "step": 30816000 }, { "epoch": 89.2, "learning_rate": 5.415477954798073e-06, "loss": 1.797, "step": 30816500 }, { "epoch": 89.2, "learning_rate": 5.414754307150797e-06, "loss": 1.7833, "step": 30817000 }, { "epoch": 89.2, "learning_rate": 5.41403065950352e-06, "loss": 1.7763, "step": 30817500 }, { "epoch": 89.21, "learning_rate": 5.413307011856243e-06, "loss": 1.788, "step": 30818000 }, { "epoch": 89.21, "learning_rate": 5.412583364208966e-06, "loss": 1.7894, "step": 30818500 }, { "epoch": 89.21, "learning_rate": 5.411859716561689e-06, "loss": 1.7776, "step": 30819000 }, { "epoch": 89.21, "learning_rate": 5.411136068914413e-06, "loss": 1.7972, "step": 30819500 }, { "epoch": 89.21, "learning_rate": 5.410412421267136e-06, "loss": 1.8352, "step": 30820000 }, { "epoch": 89.21, "learning_rate": 5.4096902209151545e-06, "loss": 1.806, "step": 30820500 }, { "epoch": 89.21, "learning_rate": 5.408966573267877e-06, "loss": 1.7748, "step": 30821000 }, { "epoch": 89.22, "learning_rate": 5.408242925620601e-06, "loss": 1.7862, "step": 30821500 }, { "epoch": 89.22, "learning_rate": 5.407519277973324e-06, "loss": 1.784, "step": 30822000 }, { "epoch": 89.22, "learning_rate": 5.406795630326047e-06, "loss": 1.798, "step": 30822500 }, { "epoch": 89.22, "learning_rate": 5.406073429974065e-06, "loss": 1.813, "step": 30823000 }, { "epoch": 89.22, "learning_rate": 5.405349782326788e-06, "loss": 1.7736, "step": 30823500 }, { "epoch": 89.22, "learning_rate": 5.404626134679511e-06, "loss": 1.8017, "step": 30824000 }, { "epoch": 89.22, "learning_rate": 5.403902487032234e-06, "loss": 1.7837, "step": 30824500 }, { "epoch": 89.23, "learning_rate": 5.403178839384958e-06, "loss": 1.8063, "step": 30825000 }, { "epoch": 89.23, "learning_rate": 5.402456639032975e-06, "loss": 1.7941, "step": 30825500 }, { "epoch": 89.23, "learning_rate": 5.401732991385699e-06, "loss": 1.8237, "step": 30826000 }, { "epoch": 89.23, "learning_rate": 5.4010093437384215e-06, "loss": 1.7938, "step": 30826500 }, { "epoch": 89.23, "learning_rate": 5.400285696091145e-06, "loss": 1.7677, "step": 30827000 }, { "epoch": 89.23, "learning_rate": 5.399562048443868e-06, "loss": 1.7592, "step": 30827500 }, { "epoch": 89.23, "learning_rate": 5.3988398480918866e-06, "loss": 1.8239, "step": 30828000 }, { "epoch": 89.24, "learning_rate": 5.398116200444609e-06, "loss": 1.8059, "step": 30828500 }, { "epoch": 89.24, "learning_rate": 5.397392552797333e-06, "loss": 1.802, "step": 30829000 }, { "epoch": 89.24, "learning_rate": 5.396668905150056e-06, "loss": 1.8062, "step": 30829500 }, { "epoch": 89.24, "learning_rate": 5.395946704798074e-06, "loss": 1.7811, "step": 30830000 }, { "epoch": 89.24, "learning_rate": 5.395223057150796e-06, "loss": 1.7872, "step": 30830500 }, { "epoch": 89.24, "learning_rate": 5.394500856798815e-06, "loss": 1.789, "step": 30831000 }, { "epoch": 89.24, "learning_rate": 5.393777209151537e-06, "loss": 1.7801, "step": 30831500 }, { "epoch": 89.25, "learning_rate": 5.393053561504261e-06, "loss": 1.7844, "step": 30832000 }, { "epoch": 89.25, "learning_rate": 5.392329913856984e-06, "loss": 1.8089, "step": 30832500 }, { "epoch": 89.25, "learning_rate": 5.391606266209707e-06, "loss": 1.7998, "step": 30833000 }, { "epoch": 89.25, "learning_rate": 5.390882618562431e-06, "loss": 1.7726, "step": 30833500 }, { "epoch": 89.25, "learning_rate": 5.3901589709151536e-06, "loss": 1.8009, "step": 30834000 }, { "epoch": 89.25, "learning_rate": 5.3894353232678775e-06, "loss": 1.7977, "step": 30834500 }, { "epoch": 89.25, "learning_rate": 5.3887116756206006e-06, "loss": 1.7998, "step": 30835000 }, { "epoch": 89.26, "learning_rate": 5.387988027973324e-06, "loss": 1.8005, "step": 30835500 }, { "epoch": 89.26, "learning_rate": 5.387264380326048e-06, "loss": 1.7919, "step": 30836000 }, { "epoch": 89.26, "learning_rate": 5.38654073267877e-06, "loss": 1.7892, "step": 30836500 }, { "epoch": 89.26, "learning_rate": 5.385818532326788e-06, "loss": 1.8, "step": 30837000 }, { "epoch": 89.26, "learning_rate": 5.385094884679511e-06, "loss": 1.8013, "step": 30837500 }, { "epoch": 89.26, "learning_rate": 5.384371237032235e-06, "loss": 1.7858, "step": 30838000 }, { "epoch": 89.26, "learning_rate": 5.383647589384957e-06, "loss": 1.7951, "step": 30838500 }, { "epoch": 89.27, "learning_rate": 5.382923941737681e-06, "loss": 1.7945, "step": 30839000 }, { "epoch": 89.27, "learning_rate": 5.382200294090404e-06, "loss": 1.795, "step": 30839500 }, { "epoch": 89.27, "learning_rate": 5.381476646443127e-06, "loss": 1.7995, "step": 30840000 }, { "epoch": 89.27, "learning_rate": 5.380752998795851e-06, "loss": 1.7789, "step": 30840500 }, { "epoch": 89.27, "learning_rate": 5.380030798443868e-06, "loss": 1.7972, "step": 30841000 }, { "epoch": 89.27, "learning_rate": 5.379308598091886e-06, "loss": 1.7728, "step": 30841500 }, { "epoch": 89.27, "learning_rate": 5.37858495044461e-06, "loss": 1.7895, "step": 30842000 }, { "epoch": 89.28, "learning_rate": 5.377861302797333e-06, "loss": 1.8242, "step": 30842500 }, { "epoch": 89.28, "learning_rate": 5.377137655150056e-06, "loss": 1.7912, "step": 30843000 }, { "epoch": 89.28, "learning_rate": 5.37641400750278e-06, "loss": 1.8175, "step": 30843500 }, { "epoch": 89.28, "learning_rate": 5.375690359855502e-06, "loss": 1.8192, "step": 30844000 }, { "epoch": 89.28, "learning_rate": 5.37496815950352e-06, "loss": 1.7802, "step": 30844500 }, { "epoch": 89.28, "learning_rate": 5.374244511856243e-06, "loss": 1.8148, "step": 30845000 }, { "epoch": 89.29, "learning_rate": 5.373522311504261e-06, "loss": 1.7656, "step": 30845500 }, { "epoch": 89.29, "learning_rate": 5.372798663856984e-06, "loss": 1.8151, "step": 30846000 }, { "epoch": 89.29, "learning_rate": 5.372075016209707e-06, "loss": 1.7621, "step": 30846500 }, { "epoch": 89.29, "learning_rate": 5.3713513685624304e-06, "loss": 1.7947, "step": 30847000 }, { "epoch": 89.29, "learning_rate": 5.3706291682104485e-06, "loss": 1.779, "step": 30847500 }, { "epoch": 89.29, "learning_rate": 5.369905520563172e-06, "loss": 1.7855, "step": 30848000 }, { "epoch": 89.29, "learning_rate": 5.3691818729158955e-06, "loss": 1.8079, "step": 30848500 }, { "epoch": 89.3, "learning_rate": 5.368458225268618e-06, "loss": 1.7875, "step": 30849000 }, { "epoch": 89.3, "learning_rate": 5.367734577621342e-06, "loss": 1.7686, "step": 30849500 }, { "epoch": 89.3, "learning_rate": 5.367010929974065e-06, "loss": 1.8098, "step": 30850000 }, { "epoch": 89.3, "learning_rate": 5.366287282326788e-06, "loss": 1.7724, "step": 30850500 }, { "epoch": 89.3, "learning_rate": 5.365563634679511e-06, "loss": 1.7742, "step": 30851000 }, { "epoch": 89.3, "learning_rate": 5.364839987032234e-06, "loss": 1.8101, "step": 30851500 }, { "epoch": 89.3, "learning_rate": 5.364117786680252e-06, "loss": 1.7968, "step": 30852000 }, { "epoch": 89.31, "learning_rate": 5.363394139032975e-06, "loss": 1.7879, "step": 30852500 }, { "epoch": 89.31, "learning_rate": 5.362670491385699e-06, "loss": 1.7915, "step": 30853000 }, { "epoch": 89.31, "learning_rate": 5.361946843738422e-06, "loss": 1.8097, "step": 30853500 }, { "epoch": 89.31, "learning_rate": 5.361223196091145e-06, "loss": 1.8121, "step": 30854000 }, { "epoch": 89.31, "learning_rate": 5.360499548443868e-06, "loss": 1.8015, "step": 30854500 }, { "epoch": 89.31, "learning_rate": 5.3597759007965914e-06, "loss": 1.7686, "step": 30855000 }, { "epoch": 89.31, "learning_rate": 5.3590537004446095e-06, "loss": 1.7935, "step": 30855500 }, { "epoch": 89.32, "learning_rate": 5.358330052797333e-06, "loss": 1.7767, "step": 30856000 }, { "epoch": 89.32, "learning_rate": 5.357606405150056e-06, "loss": 1.776, "step": 30856500 }, { "epoch": 89.32, "learning_rate": 5.356882757502779e-06, "loss": 1.7798, "step": 30857000 }, { "epoch": 89.32, "learning_rate": 5.356159109855502e-06, "loss": 1.7797, "step": 30857500 }, { "epoch": 89.32, "learning_rate": 5.35543690950352e-06, "loss": 1.7976, "step": 30858000 }, { "epoch": 89.32, "learning_rate": 5.354713261856243e-06, "loss": 1.7788, "step": 30858500 }, { "epoch": 89.32, "learning_rate": 5.353989614208967e-06, "loss": 1.8135, "step": 30859000 }, { "epoch": 89.33, "learning_rate": 5.35326596656169e-06, "loss": 1.7955, "step": 30859500 }, { "epoch": 89.33, "learning_rate": 5.352543766209707e-06, "loss": 1.806, "step": 30860000 }, { "epoch": 89.33, "learning_rate": 5.35182011856243e-06, "loss": 1.7723, "step": 30860500 }, { "epoch": 89.33, "learning_rate": 5.351096470915154e-06, "loss": 1.7835, "step": 30861000 }, { "epoch": 89.33, "learning_rate": 5.350372823267877e-06, "loss": 1.7923, "step": 30861500 }, { "epoch": 89.33, "learning_rate": 5.3496491756206005e-06, "loss": 1.8174, "step": 30862000 }, { "epoch": 89.33, "learning_rate": 5.3489269752686186e-06, "loss": 1.799, "step": 30862500 }, { "epoch": 89.34, "learning_rate": 5.348203327621342e-06, "loss": 1.8064, "step": 30863000 }, { "epoch": 89.34, "learning_rate": 5.347479679974065e-06, "loss": 1.7785, "step": 30863500 }, { "epoch": 89.34, "learning_rate": 5.346756032326788e-06, "loss": 1.7993, "step": 30864000 }, { "epoch": 89.34, "learning_rate": 5.346033831974806e-06, "loss": 1.7823, "step": 30864500 }, { "epoch": 89.34, "learning_rate": 5.345310184327529e-06, "loss": 1.8041, "step": 30865000 }, { "epoch": 89.34, "learning_rate": 5.344587983975547e-06, "loss": 1.808, "step": 30865500 }, { "epoch": 89.34, "learning_rate": 5.34386433632827e-06, "loss": 1.8157, "step": 30866000 }, { "epoch": 89.35, "learning_rate": 5.343140688680993e-06, "loss": 1.8063, "step": 30866500 }, { "epoch": 89.35, "learning_rate": 5.342417041033716e-06, "loss": 1.7825, "step": 30867000 }, { "epoch": 89.35, "learning_rate": 5.341693393386439e-06, "loss": 1.8088, "step": 30867500 }, { "epoch": 89.35, "learning_rate": 5.3409697457391625e-06, "loss": 1.7893, "step": 30868000 }, { "epoch": 89.35, "learning_rate": 5.340246098091886e-06, "loss": 1.783, "step": 30868500 }, { "epoch": 89.35, "learning_rate": 5.3395224504446095e-06, "loss": 1.8143, "step": 30869000 }, { "epoch": 89.35, "learning_rate": 5.3387988027973326e-06, "loss": 1.7874, "step": 30869500 }, { "epoch": 89.36, "learning_rate": 5.33807660244535e-06, "loss": 1.777, "step": 30870000 }, { "epoch": 89.36, "learning_rate": 5.337352954798074e-06, "loss": 1.7977, "step": 30870500 }, { "epoch": 89.36, "learning_rate": 5.336629307150797e-06, "loss": 1.8144, "step": 30871000 }, { "epoch": 89.36, "learning_rate": 5.33590565950352e-06, "loss": 1.7861, "step": 30871500 }, { "epoch": 89.36, "learning_rate": 5.335182011856244e-06, "loss": 1.793, "step": 30872000 }, { "epoch": 89.36, "learning_rate": 5.334458364208966e-06, "loss": 1.8127, "step": 30872500 }, { "epoch": 89.36, "learning_rate": 5.333736163856984e-06, "loss": 1.8187, "step": 30873000 }, { "epoch": 89.37, "learning_rate": 5.333012516209707e-06, "loss": 1.7985, "step": 30873500 }, { "epoch": 89.37, "learning_rate": 5.332290315857725e-06, "loss": 1.7974, "step": 30874000 }, { "epoch": 89.37, "learning_rate": 5.3315666682104484e-06, "loss": 1.7948, "step": 30874500 }, { "epoch": 89.37, "learning_rate": 5.3308444678584665e-06, "loss": 1.7771, "step": 30875000 }, { "epoch": 89.37, "learning_rate": 5.33012082021119e-06, "loss": 1.8022, "step": 30875500 }, { "epoch": 89.37, "learning_rate": 5.329397172563913e-06, "loss": 1.7882, "step": 30876000 }, { "epoch": 89.37, "learning_rate": 5.328673524916636e-06, "loss": 1.7747, "step": 30876500 }, { "epoch": 89.38, "learning_rate": 5.32794987726936e-06, "loss": 1.779, "step": 30877000 }, { "epoch": 89.38, "learning_rate": 5.327226229622082e-06, "loss": 1.8146, "step": 30877500 }, { "epoch": 89.38, "learning_rate": 5.326502581974806e-06, "loss": 1.7813, "step": 30878000 }, { "epoch": 89.38, "learning_rate": 5.325778934327529e-06, "loss": 1.8003, "step": 30878500 }, { "epoch": 89.38, "learning_rate": 5.325055286680252e-06, "loss": 1.7912, "step": 30879000 }, { "epoch": 89.38, "learning_rate": 5.324331639032976e-06, "loss": 1.7693, "step": 30879500 }, { "epoch": 89.38, "learning_rate": 5.323607991385698e-06, "loss": 1.8191, "step": 30880000 }, { "epoch": 89.39, "learning_rate": 5.322884343738422e-06, "loss": 1.8077, "step": 30880500 }, { "epoch": 89.39, "learning_rate": 5.322160696091145e-06, "loss": 1.7945, "step": 30881000 }, { "epoch": 89.39, "learning_rate": 5.321437048443868e-06, "loss": 1.777, "step": 30881500 }, { "epoch": 89.39, "learning_rate": 5.3207148480918855e-06, "loss": 1.8095, "step": 30882000 }, { "epoch": 89.39, "learning_rate": 5.3199912004446094e-06, "loss": 1.8006, "step": 30882500 }, { "epoch": 89.39, "learning_rate": 5.3192675527973325e-06, "loss": 1.8042, "step": 30883000 }, { "epoch": 89.4, "learning_rate": 5.318543905150056e-06, "loss": 1.7722, "step": 30883500 }, { "epoch": 89.4, "learning_rate": 5.3178202575027795e-06, "loss": 1.7624, "step": 30884000 }, { "epoch": 89.4, "learning_rate": 5.317098057150797e-06, "loss": 1.8085, "step": 30884500 }, { "epoch": 89.4, "learning_rate": 5.316374409503521e-06, "loss": 1.7934, "step": 30885000 }, { "epoch": 89.4, "learning_rate": 5.315650761856243e-06, "loss": 1.7951, "step": 30885500 }, { "epoch": 89.4, "learning_rate": 5.314927114208967e-06, "loss": 1.8126, "step": 30886000 }, { "epoch": 89.4, "learning_rate": 5.314203466561689e-06, "loss": 1.768, "step": 30886500 }, { "epoch": 89.41, "learning_rate": 5.313479818914413e-06, "loss": 1.7952, "step": 30887000 }, { "epoch": 89.41, "learning_rate": 5.312756171267137e-06, "loss": 1.7704, "step": 30887500 }, { "epoch": 89.41, "learning_rate": 5.312033970915154e-06, "loss": 1.811, "step": 30888000 }, { "epoch": 89.41, "learning_rate": 5.311310323267877e-06, "loss": 1.7927, "step": 30888500 }, { "epoch": 89.41, "learning_rate": 5.3105866756206e-06, "loss": 1.8056, "step": 30889000 }, { "epoch": 89.41, "learning_rate": 5.309864475268618e-06, "loss": 1.7903, "step": 30889500 }, { "epoch": 89.41, "learning_rate": 5.3091408276213415e-06, "loss": 1.7923, "step": 30890000 }, { "epoch": 89.42, "learning_rate": 5.3084171799740655e-06, "loss": 1.804, "step": 30890500 }, { "epoch": 89.42, "learning_rate": 5.307693532326788e-06, "loss": 1.8157, "step": 30891000 }, { "epoch": 89.42, "learning_rate": 5.306969884679512e-06, "loss": 1.7941, "step": 30891500 }, { "epoch": 89.42, "learning_rate": 5.306246237032234e-06, "loss": 1.7915, "step": 30892000 }, { "epoch": 89.42, "learning_rate": 5.305522589384958e-06, "loss": 1.8038, "step": 30892500 }, { "epoch": 89.42, "learning_rate": 5.304798941737681e-06, "loss": 1.7935, "step": 30893000 }, { "epoch": 89.42, "learning_rate": 5.304075294090404e-06, "loss": 1.7814, "step": 30893500 }, { "epoch": 89.43, "learning_rate": 5.303353093738422e-06, "loss": 1.7947, "step": 30894000 }, { "epoch": 89.43, "learning_rate": 5.302629446091145e-06, "loss": 1.7814, "step": 30894500 }, { "epoch": 89.43, "learning_rate": 5.301905798443868e-06, "loss": 1.8244, "step": 30895000 }, { "epoch": 89.43, "learning_rate": 5.301182150796591e-06, "loss": 1.7883, "step": 30895500 }, { "epoch": 89.43, "learning_rate": 5.300458503149315e-06, "loss": 1.795, "step": 30896000 }, { "epoch": 89.43, "learning_rate": 5.299734855502038e-06, "loss": 1.7829, "step": 30896500 }, { "epoch": 89.43, "learning_rate": 5.2990141024453506e-06, "loss": 1.8124, "step": 30897000 }, { "epoch": 89.44, "learning_rate": 5.298290454798074e-06, "loss": 1.7611, "step": 30897500 }, { "epoch": 89.44, "learning_rate": 5.297566807150797e-06, "loss": 1.8045, "step": 30898000 }, { "epoch": 89.44, "learning_rate": 5.29684315950352e-06, "loss": 1.7979, "step": 30898500 }, { "epoch": 89.44, "learning_rate": 5.296119511856244e-06, "loss": 1.8069, "step": 30899000 }, { "epoch": 89.44, "learning_rate": 5.295395864208967e-06, "loss": 1.8113, "step": 30899500 }, { "epoch": 89.44, "learning_rate": 5.29467221656169e-06, "loss": 1.7716, "step": 30900000 }, { "epoch": 89.44, "learning_rate": 5.293950016209707e-06, "loss": 1.7531, "step": 30900500 }, { "epoch": 89.45, "learning_rate": 5.293226368562431e-06, "loss": 1.7919, "step": 30901000 }, { "epoch": 89.45, "learning_rate": 5.292502720915154e-06, "loss": 1.7903, "step": 30901500 }, { "epoch": 89.45, "learning_rate": 5.291779073267877e-06, "loss": 1.8032, "step": 30902000 }, { "epoch": 89.45, "learning_rate": 5.2910554256206e-06, "loss": 1.7656, "step": 30902500 }, { "epoch": 89.45, "learning_rate": 5.290331777973323e-06, "loss": 1.7889, "step": 30903000 }, { "epoch": 89.45, "learning_rate": 5.289608130326047e-06, "loss": 1.7842, "step": 30903500 }, { "epoch": 89.45, "learning_rate": 5.28888448267877e-06, "loss": 1.8239, "step": 30904000 }, { "epoch": 89.46, "learning_rate": 5.2881608350314935e-06, "loss": 1.8013, "step": 30904500 }, { "epoch": 89.46, "learning_rate": 5.287438634679511e-06, "loss": 1.7905, "step": 30905000 }, { "epoch": 89.46, "learning_rate": 5.286716434327529e-06, "loss": 1.8192, "step": 30905500 }, { "epoch": 89.46, "learning_rate": 5.285992786680252e-06, "loss": 1.8042, "step": 30906000 }, { "epoch": 89.46, "learning_rate": 5.285269139032976e-06, "loss": 1.7833, "step": 30906500 }, { "epoch": 89.46, "learning_rate": 5.284545491385699e-06, "loss": 1.7767, "step": 30907000 }, { "epoch": 89.46, "learning_rate": 5.283821843738422e-06, "loss": 1.798, "step": 30907500 }, { "epoch": 89.47, "learning_rate": 5.283098196091145e-06, "loss": 1.7874, "step": 30908000 }, { "epoch": 89.47, "learning_rate": 5.282374548443868e-06, "loss": 1.7823, "step": 30908500 }, { "epoch": 89.47, "learning_rate": 5.281652348091886e-06, "loss": 1.794, "step": 30909000 }, { "epoch": 89.47, "learning_rate": 5.280928700444609e-06, "loss": 1.7808, "step": 30909500 }, { "epoch": 89.47, "learning_rate": 5.280205052797332e-06, "loss": 1.7993, "step": 30910000 }, { "epoch": 89.47, "learning_rate": 5.2794814051500555e-06, "loss": 1.7874, "step": 30910500 }, { "epoch": 89.47, "learning_rate": 5.2787577575027794e-06, "loss": 1.773, "step": 30911000 }, { "epoch": 89.48, "learning_rate": 5.2780341098555025e-06, "loss": 1.8006, "step": 30911500 }, { "epoch": 89.48, "learning_rate": 5.277310462208226e-06, "loss": 1.7746, "step": 30912000 }, { "epoch": 89.48, "learning_rate": 5.276586814560949e-06, "loss": 1.8218, "step": 30912500 }, { "epoch": 89.48, "learning_rate": 5.275864614208967e-06, "loss": 1.7972, "step": 30913000 }, { "epoch": 89.48, "learning_rate": 5.27514096656169e-06, "loss": 1.7798, "step": 30913500 }, { "epoch": 89.48, "learning_rate": 5.274418766209707e-06, "loss": 1.7952, "step": 30914000 }, { "epoch": 89.48, "learning_rate": 5.273695118562431e-06, "loss": 1.823, "step": 30914500 }, { "epoch": 89.49, "learning_rate": 5.272971470915154e-06, "loss": 1.7852, "step": 30915000 }, { "epoch": 89.49, "learning_rate": 5.272247823267877e-06, "loss": 1.7957, "step": 30915500 }, { "epoch": 89.49, "learning_rate": 5.2715241756206e-06, "loss": 1.7705, "step": 30916000 }, { "epoch": 89.49, "learning_rate": 5.270800527973323e-06, "loss": 1.819, "step": 30916500 }, { "epoch": 89.49, "learning_rate": 5.270076880326047e-06, "loss": 1.7904, "step": 30917000 }, { "epoch": 89.49, "learning_rate": 5.26935323267877e-06, "loss": 1.7753, "step": 30917500 }, { "epoch": 89.49, "learning_rate": 5.2686295850314934e-06, "loss": 1.8049, "step": 30918000 }, { "epoch": 89.5, "learning_rate": 5.267907384679511e-06, "loss": 1.8046, "step": 30918500 }, { "epoch": 89.5, "learning_rate": 5.267183737032235e-06, "loss": 1.7791, "step": 30919000 }, { "epoch": 89.5, "learning_rate": 5.266460089384958e-06, "loss": 1.7844, "step": 30919500 }, { "epoch": 89.5, "learning_rate": 5.265736441737681e-06, "loss": 1.7581, "step": 30920000 }, { "epoch": 89.5, "learning_rate": 5.265012794090404e-06, "loss": 1.7858, "step": 30920500 }, { "epoch": 89.5, "learning_rate": 5.264289146443127e-06, "loss": 1.7983, "step": 30921000 }, { "epoch": 89.51, "learning_rate": 5.263565498795851e-06, "loss": 1.7856, "step": 30921500 }, { "epoch": 89.51, "learning_rate": 5.262843298443868e-06, "loss": 1.8084, "step": 30922000 }, { "epoch": 89.51, "learning_rate": 5.262119650796592e-06, "loss": 1.8065, "step": 30922500 }, { "epoch": 89.51, "learning_rate": 5.261396003149314e-06, "loss": 1.7954, "step": 30923000 }, { "epoch": 89.51, "learning_rate": 5.260672355502038e-06, "loss": 1.7681, "step": 30923500 }, { "epoch": 89.51, "learning_rate": 5.2599501551500554e-06, "loss": 1.816, "step": 30924000 }, { "epoch": 89.51, "learning_rate": 5.2592279547980735e-06, "loss": 1.7926, "step": 30924500 }, { "epoch": 89.52, "learning_rate": 5.258504307150797e-06, "loss": 1.7981, "step": 30925000 }, { "epoch": 89.52, "learning_rate": 5.2577806595035206e-06, "loss": 1.7848, "step": 30925500 }, { "epoch": 89.52, "learning_rate": 5.257057011856243e-06, "loss": 1.7838, "step": 30926000 }, { "epoch": 89.52, "learning_rate": 5.256333364208967e-06, "loss": 1.7952, "step": 30926500 }, { "epoch": 89.52, "learning_rate": 5.255611163856984e-06, "loss": 1.7956, "step": 30927000 }, { "epoch": 89.52, "learning_rate": 5.254887516209708e-06, "loss": 1.8011, "step": 30927500 }, { "epoch": 89.52, "learning_rate": 5.254165315857725e-06, "loss": 1.806, "step": 30928000 }, { "epoch": 89.53, "learning_rate": 5.253441668210449e-06, "loss": 1.7756, "step": 30928500 }, { "epoch": 89.53, "learning_rate": 5.252718020563171e-06, "loss": 1.7642, "step": 30929000 }, { "epoch": 89.53, "learning_rate": 5.251994372915895e-06, "loss": 1.7903, "step": 30929500 }, { "epoch": 89.53, "learning_rate": 5.251270725268618e-06, "loss": 1.8139, "step": 30930000 }, { "epoch": 89.53, "learning_rate": 5.250547077621341e-06, "loss": 1.7884, "step": 30930500 }, { "epoch": 89.53, "learning_rate": 5.249823429974065e-06, "loss": 1.8002, "step": 30931000 }, { "epoch": 89.53, "learning_rate": 5.2490997823267876e-06, "loss": 1.7765, "step": 30931500 }, { "epoch": 89.54, "learning_rate": 5.2483761346795115e-06, "loss": 1.796, "step": 30932000 }, { "epoch": 89.54, "learning_rate": 5.247653934327529e-06, "loss": 1.7761, "step": 30932500 }, { "epoch": 89.54, "learning_rate": 5.246930286680253e-06, "loss": 1.7893, "step": 30933000 }, { "epoch": 89.54, "learning_rate": 5.246206639032975e-06, "loss": 1.7634, "step": 30933500 }, { "epoch": 89.54, "learning_rate": 5.245482991385699e-06, "loss": 1.8213, "step": 30934000 }, { "epoch": 89.54, "learning_rate": 5.244759343738422e-06, "loss": 1.7931, "step": 30934500 }, { "epoch": 89.54, "learning_rate": 5.244035696091145e-06, "loss": 1.7975, "step": 30935000 }, { "epoch": 89.55, "learning_rate": 5.243312048443869e-06, "loss": 1.7835, "step": 30935500 }, { "epoch": 89.55, "learning_rate": 5.242588400796591e-06, "loss": 1.7915, "step": 30936000 }, { "epoch": 89.55, "learning_rate": 5.24186620044461e-06, "loss": 1.7902, "step": 30936500 }, { "epoch": 89.55, "learning_rate": 5.241142552797332e-06, "loss": 1.7869, "step": 30937000 }, { "epoch": 89.55, "learning_rate": 5.240418905150056e-06, "loss": 1.7798, "step": 30937500 }, { "epoch": 89.55, "learning_rate": 5.2396952575027785e-06, "loss": 1.7993, "step": 30938000 }, { "epoch": 89.55, "learning_rate": 5.238971609855502e-06, "loss": 1.82, "step": 30938500 }, { "epoch": 89.56, "learning_rate": 5.23824940950352e-06, "loss": 1.7943, "step": 30939000 }, { "epoch": 89.56, "learning_rate": 5.237525761856244e-06, "loss": 1.7971, "step": 30939500 }, { "epoch": 89.56, "learning_rate": 5.236802114208967e-06, "loss": 1.8102, "step": 30940000 }, { "epoch": 89.56, "learning_rate": 5.23607846656169e-06, "loss": 1.8095, "step": 30940500 }, { "epoch": 89.56, "learning_rate": 5.235354818914414e-06, "loss": 1.793, "step": 30941000 }, { "epoch": 89.56, "learning_rate": 5.234631171267136e-06, "loss": 1.7859, "step": 30941500 }, { "epoch": 89.56, "learning_rate": 5.233908970915154e-06, "loss": 1.8066, "step": 30942000 }, { "epoch": 89.57, "learning_rate": 5.233186770563172e-06, "loss": 1.7943, "step": 30942500 }, { "epoch": 89.57, "learning_rate": 5.232463122915895e-06, "loss": 1.7752, "step": 30943000 }, { "epoch": 89.57, "learning_rate": 5.231739475268618e-06, "loss": 1.7916, "step": 30943500 }, { "epoch": 89.57, "learning_rate": 5.231015827621342e-06, "loss": 1.8122, "step": 30944000 }, { "epoch": 89.57, "learning_rate": 5.230292179974064e-06, "loss": 1.8139, "step": 30944500 }, { "epoch": 89.57, "learning_rate": 5.229568532326788e-06, "loss": 1.7927, "step": 30945000 }, { "epoch": 89.57, "learning_rate": 5.228846331974806e-06, "loss": 1.8077, "step": 30945500 }, { "epoch": 89.58, "learning_rate": 5.2281226843275295e-06, "loss": 1.7993, "step": 30946000 }, { "epoch": 89.58, "learning_rate": 5.227399036680252e-06, "loss": 1.7814, "step": 30946500 }, { "epoch": 89.58, "learning_rate": 5.226675389032976e-06, "loss": 1.7848, "step": 30947000 }, { "epoch": 89.58, "learning_rate": 5.225951741385699e-06, "loss": 1.7803, "step": 30947500 }, { "epoch": 89.58, "learning_rate": 5.225228093738422e-06, "loss": 1.8016, "step": 30948000 }, { "epoch": 89.58, "learning_rate": 5.224504446091145e-06, "loss": 1.7929, "step": 30948500 }, { "epoch": 89.58, "learning_rate": 5.223780798443868e-06, "loss": 1.7591, "step": 30949000 }, { "epoch": 89.59, "learning_rate": 5.223057150796592e-06, "loss": 1.7635, "step": 30949500 }, { "epoch": 89.59, "learning_rate": 5.222334950444609e-06, "loss": 1.8051, "step": 30950000 }, { "epoch": 89.59, "learning_rate": 5.221611302797333e-06, "loss": 1.7866, "step": 30950500 }, { "epoch": 89.59, "learning_rate": 5.220887655150055e-06, "loss": 1.7867, "step": 30951000 }, { "epoch": 89.59, "learning_rate": 5.220164007502779e-06, "loss": 1.7896, "step": 30951500 }, { "epoch": 89.59, "learning_rate": 5.2194418071507965e-06, "loss": 1.7819, "step": 30952000 }, { "epoch": 89.59, "learning_rate": 5.2187181595035204e-06, "loss": 1.7739, "step": 30952500 }, { "epoch": 89.6, "learning_rate": 5.2179945118562435e-06, "loss": 1.799, "step": 30953000 }, { "epoch": 89.6, "learning_rate": 5.217270864208967e-06, "loss": 1.7997, "step": 30953500 }, { "epoch": 89.6, "learning_rate": 5.216548663856984e-06, "loss": 1.7749, "step": 30954000 }, { "epoch": 89.6, "learning_rate": 5.215825016209708e-06, "loss": 1.7898, "step": 30954500 }, { "epoch": 89.6, "learning_rate": 5.215101368562431e-06, "loss": 1.7885, "step": 30955000 }, { "epoch": 89.6, "learning_rate": 5.214377720915154e-06, "loss": 1.8157, "step": 30955500 }, { "epoch": 89.6, "learning_rate": 5.213654073267877e-06, "loss": 1.7921, "step": 30956000 }, { "epoch": 89.61, "learning_rate": 5.2129304256206e-06, "loss": 1.7882, "step": 30956500 }, { "epoch": 89.61, "learning_rate": 5.212206777973324e-06, "loss": 1.7968, "step": 30957000 }, { "epoch": 89.61, "learning_rate": 5.211483130326047e-06, "loss": 1.7702, "step": 30957500 }, { "epoch": 89.61, "learning_rate": 5.210760929974065e-06, "loss": 1.8034, "step": 30958000 }, { "epoch": 89.61, "learning_rate": 5.2100387296220825e-06, "loss": 1.8123, "step": 30958500 }, { "epoch": 89.61, "learning_rate": 5.2093150819748055e-06, "loss": 1.7914, "step": 30959000 }, { "epoch": 89.62, "learning_rate": 5.208591434327529e-06, "loss": 1.7955, "step": 30959500 }, { "epoch": 89.62, "learning_rate": 5.2078677866802526e-06, "loss": 1.8086, "step": 30960000 }, { "epoch": 89.62, "learning_rate": 5.207144139032976e-06, "loss": 1.7875, "step": 30960500 }, { "epoch": 89.62, "learning_rate": 5.206421938680993e-06, "loss": 1.7878, "step": 30961000 }, { "epoch": 89.62, "learning_rate": 5.205698291033717e-06, "loss": 1.8229, "step": 30961500 }, { "epoch": 89.62, "learning_rate": 5.20497464338644e-06, "loss": 1.8213, "step": 30962000 }, { "epoch": 89.62, "learning_rate": 5.204250995739163e-06, "loss": 1.7744, "step": 30962500 }, { "epoch": 89.63, "learning_rate": 5.203528795387181e-06, "loss": 1.7995, "step": 30963000 }, { "epoch": 89.63, "learning_rate": 5.202805147739904e-06, "loss": 1.8202, "step": 30963500 }, { "epoch": 89.63, "learning_rate": 5.202081500092627e-06, "loss": 1.8075, "step": 30964000 }, { "epoch": 89.63, "learning_rate": 5.20135785244535e-06, "loss": 1.7971, "step": 30964500 }, { "epoch": 89.63, "learning_rate": 5.200635652093368e-06, "loss": 1.8086, "step": 30965000 }, { "epoch": 89.63, "learning_rate": 5.1999120044460915e-06, "loss": 1.7913, "step": 30965500 }, { "epoch": 89.63, "learning_rate": 5.1991883567988146e-06, "loss": 1.8049, "step": 30966000 }, { "epoch": 89.64, "learning_rate": 5.198464709151538e-06, "loss": 1.8083, "step": 30966500 }, { "epoch": 89.64, "learning_rate": 5.197741061504261e-06, "loss": 1.8193, "step": 30967000 }, { "epoch": 89.64, "learning_rate": 5.197017413856985e-06, "loss": 1.8019, "step": 30967500 }, { "epoch": 89.64, "learning_rate": 5.196293766209708e-06, "loss": 1.771, "step": 30968000 }, { "epoch": 89.64, "learning_rate": 5.195570118562431e-06, "loss": 1.777, "step": 30968500 }, { "epoch": 89.64, "learning_rate": 5.194847918210449e-06, "loss": 1.7998, "step": 30969000 }, { "epoch": 89.64, "learning_rate": 5.194124270563172e-06, "loss": 1.8034, "step": 30969500 }, { "epoch": 89.65, "learning_rate": 5.193402070211189e-06, "loss": 1.7851, "step": 30970000 }, { "epoch": 89.65, "learning_rate": 5.192678422563913e-06, "loss": 1.7876, "step": 30970500 }, { "epoch": 89.65, "learning_rate": 5.191954774916636e-06, "loss": 1.8089, "step": 30971000 }, { "epoch": 89.65, "learning_rate": 5.191231127269359e-06, "loss": 1.7942, "step": 30971500 }, { "epoch": 89.65, "learning_rate": 5.190507479622082e-06, "loss": 1.8281, "step": 30972000 }, { "epoch": 89.65, "learning_rate": 5.1897838319748055e-06, "loss": 1.8098, "step": 30972500 }, { "epoch": 89.65, "learning_rate": 5.189061631622824e-06, "loss": 1.8161, "step": 30973000 }, { "epoch": 89.66, "learning_rate": 5.188337983975547e-06, "loss": 1.8081, "step": 30973500 }, { "epoch": 89.66, "learning_rate": 5.18761433632827e-06, "loss": 1.78, "step": 30974000 }, { "epoch": 89.66, "learning_rate": 5.186893583271582e-06, "loss": 1.8002, "step": 30974500 }, { "epoch": 89.66, "learning_rate": 5.186169935624306e-06, "loss": 1.7664, "step": 30975000 }, { "epoch": 89.66, "learning_rate": 5.185446287977029e-06, "loss": 1.8404, "step": 30975500 }, { "epoch": 89.66, "learning_rate": 5.184722640329752e-06, "loss": 1.8375, "step": 30976000 }, { "epoch": 89.66, "learning_rate": 5.183998992682475e-06, "loss": 1.7808, "step": 30976500 }, { "epoch": 89.67, "learning_rate": 5.183275345035198e-06, "loss": 1.805, "step": 30977000 }, { "epoch": 89.67, "learning_rate": 5.182551697387922e-06, "loss": 1.8033, "step": 30977500 }, { "epoch": 89.67, "learning_rate": 5.1818280497406444e-06, "loss": 1.7912, "step": 30978000 }, { "epoch": 89.67, "learning_rate": 5.181104402093368e-06, "loss": 1.7901, "step": 30978500 }, { "epoch": 89.67, "learning_rate": 5.180382201741386e-06, "loss": 1.8059, "step": 30979000 }, { "epoch": 89.67, "learning_rate": 5.1796585540941095e-06, "loss": 1.7782, "step": 30979500 }, { "epoch": 89.67, "learning_rate": 5.178934906446833e-06, "loss": 1.7981, "step": 30980000 }, { "epoch": 89.68, "learning_rate": 5.178211258799556e-06, "loss": 1.7891, "step": 30980500 }, { "epoch": 89.68, "learning_rate": 5.177487611152279e-06, "loss": 1.7972, "step": 30981000 }, { "epoch": 89.68, "learning_rate": 5.176763963505002e-06, "loss": 1.7873, "step": 30981500 }, { "epoch": 89.68, "learning_rate": 5.176040315857726e-06, "loss": 1.803, "step": 30982000 }, { "epoch": 89.68, "learning_rate": 5.175316668210448e-06, "loss": 1.7999, "step": 30982500 }, { "epoch": 89.68, "learning_rate": 5.174593020563172e-06, "loss": 1.7938, "step": 30983000 }, { "epoch": 89.68, "learning_rate": 5.173869372915895e-06, "loss": 1.7776, "step": 30983500 }, { "epoch": 89.69, "learning_rate": 5.173145725268618e-06, "loss": 1.7962, "step": 30984000 }, { "epoch": 89.69, "learning_rate": 5.172422077621342e-06, "loss": 1.7922, "step": 30984500 }, { "epoch": 89.69, "learning_rate": 5.171698429974064e-06, "loss": 1.7883, "step": 30985000 }, { "epoch": 89.69, "learning_rate": 5.170974782326788e-06, "loss": 1.7984, "step": 30985500 }, { "epoch": 89.69, "learning_rate": 5.1702525819748054e-06, "loss": 1.8063, "step": 30986000 }, { "epoch": 89.69, "learning_rate": 5.169528934327529e-06, "loss": 1.8084, "step": 30986500 }, { "epoch": 89.69, "learning_rate": 5.168806733975547e-06, "loss": 1.8123, "step": 30987000 }, { "epoch": 89.7, "learning_rate": 5.1680830863282705e-06, "loss": 1.7992, "step": 30987500 }, { "epoch": 89.7, "learning_rate": 5.167360885976288e-06, "loss": 1.7767, "step": 30988000 }, { "epoch": 89.7, "learning_rate": 5.166637238329011e-06, "loss": 1.818, "step": 30988500 }, { "epoch": 89.7, "learning_rate": 5.165913590681734e-06, "loss": 1.8021, "step": 30989000 }, { "epoch": 89.7, "learning_rate": 5.165189943034458e-06, "loss": 1.7757, "step": 30989500 }, { "epoch": 89.7, "learning_rate": 5.16446629538718e-06, "loss": 1.8012, "step": 30990000 }, { "epoch": 89.7, "learning_rate": 5.163742647739904e-06, "loss": 1.765, "step": 30990500 }, { "epoch": 89.71, "learning_rate": 5.163019000092627e-06, "loss": 1.8126, "step": 30991000 }, { "epoch": 89.71, "learning_rate": 5.16229535244535e-06, "loss": 1.7737, "step": 30991500 }, { "epoch": 89.71, "learning_rate": 5.161571704798074e-06, "loss": 1.7965, "step": 30992000 }, { "epoch": 89.71, "learning_rate": 5.160849504446091e-06, "loss": 1.7666, "step": 30992500 }, { "epoch": 89.71, "learning_rate": 5.160125856798815e-06, "loss": 1.7811, "step": 30993000 }, { "epoch": 89.71, "learning_rate": 5.1594022091515375e-06, "loss": 1.7611, "step": 30993500 }, { "epoch": 89.71, "learning_rate": 5.1586785615042615e-06, "loss": 1.8108, "step": 30994000 }, { "epoch": 89.72, "learning_rate": 5.157954913856984e-06, "loss": 1.8089, "step": 30994500 }, { "epoch": 89.72, "learning_rate": 5.157231266209708e-06, "loss": 1.7982, "step": 30995000 }, { "epoch": 89.72, "learning_rate": 5.156507618562431e-06, "loss": 1.7982, "step": 30995500 }, { "epoch": 89.72, "learning_rate": 5.155785418210449e-06, "loss": 1.7935, "step": 30996000 }, { "epoch": 89.72, "learning_rate": 5.155061770563172e-06, "loss": 1.7853, "step": 30996500 }, { "epoch": 89.72, "learning_rate": 5.154338122915895e-06, "loss": 1.7923, "step": 30997000 }, { "epoch": 89.73, "learning_rate": 5.153615922563912e-06, "loss": 1.8013, "step": 30997500 }, { "epoch": 89.73, "learning_rate": 5.152892274916636e-06, "loss": 1.7685, "step": 30998000 }, { "epoch": 89.73, "learning_rate": 5.152168627269359e-06, "loss": 1.8018, "step": 30998500 }, { "epoch": 89.73, "learning_rate": 5.151444979622082e-06, "loss": 1.8154, "step": 30999000 }, { "epoch": 89.73, "learning_rate": 5.150721331974806e-06, "loss": 1.7818, "step": 30999500 }, { "epoch": 89.73, "learning_rate": 5.1499976843275285e-06, "loss": 1.7994, "step": 31000000 }, { "epoch": 89.73, "learning_rate": 5.149274036680252e-06, "loss": 1.7675, "step": 31000500 }, { "epoch": 89.74, "learning_rate": 5.1485503890329755e-06, "loss": 1.8077, "step": 31001000 }, { "epoch": 89.74, "learning_rate": 5.1478267413856986e-06, "loss": 1.7768, "step": 31001500 }, { "epoch": 89.74, "learning_rate": 5.1471030937384225e-06, "loss": 1.7881, "step": 31002000 }, { "epoch": 89.74, "learning_rate": 5.146379446091145e-06, "loss": 1.7947, "step": 31002500 }, { "epoch": 89.74, "learning_rate": 5.145655798443869e-06, "loss": 1.7803, "step": 31003000 }, { "epoch": 89.74, "learning_rate": 5.144932150796592e-06, "loss": 1.7442, "step": 31003500 }, { "epoch": 89.74, "learning_rate": 5.14420995044461e-06, "loss": 1.7713, "step": 31004000 }, { "epoch": 89.75, "learning_rate": 5.143486302797333e-06, "loss": 1.7784, "step": 31004500 }, { "epoch": 89.75, "learning_rate": 5.142762655150056e-06, "loss": 1.7992, "step": 31005000 }, { "epoch": 89.75, "learning_rate": 5.142039007502779e-06, "loss": 1.7866, "step": 31005500 }, { "epoch": 89.75, "learning_rate": 5.141315359855502e-06, "loss": 1.7928, "step": 31006000 }, { "epoch": 89.75, "learning_rate": 5.14059315950352e-06, "loss": 1.7959, "step": 31006500 }, { "epoch": 89.75, "learning_rate": 5.139869511856243e-06, "loss": 1.7867, "step": 31007000 }, { "epoch": 89.75, "learning_rate": 5.139145864208966e-06, "loss": 1.7719, "step": 31007500 }, { "epoch": 89.76, "learning_rate": 5.1384222165616895e-06, "loss": 1.79, "step": 31008000 }, { "epoch": 89.76, "learning_rate": 5.137698568914413e-06, "loss": 1.8012, "step": 31008500 }, { "epoch": 89.76, "learning_rate": 5.136976368562431e-06, "loss": 1.8, "step": 31009000 }, { "epoch": 89.76, "learning_rate": 5.136252720915154e-06, "loss": 1.8131, "step": 31009500 }, { "epoch": 89.76, "learning_rate": 5.135529073267877e-06, "loss": 1.7746, "step": 31010000 }, { "epoch": 89.76, "learning_rate": 5.134806872915895e-06, "loss": 1.8007, "step": 31010500 }, { "epoch": 89.76, "learning_rate": 5.134083225268618e-06, "loss": 1.7869, "step": 31011000 }, { "epoch": 89.77, "learning_rate": 5.133359577621342e-06, "loss": 1.7984, "step": 31011500 }, { "epoch": 89.77, "learning_rate": 5.132635929974065e-06, "loss": 1.8042, "step": 31012000 }, { "epoch": 89.77, "learning_rate": 5.131913729622082e-06, "loss": 1.7885, "step": 31012500 }, { "epoch": 89.77, "learning_rate": 5.131190081974805e-06, "loss": 1.7894, "step": 31013000 }, { "epoch": 89.77, "learning_rate": 5.130466434327529e-06, "loss": 1.8091, "step": 31013500 }, { "epoch": 89.77, "learning_rate": 5.129742786680252e-06, "loss": 1.7733, "step": 31014000 }, { "epoch": 89.77, "learning_rate": 5.1290191390329754e-06, "loss": 1.8082, "step": 31014500 }, { "epoch": 89.78, "learning_rate": 5.1282954913856985e-06, "loss": 1.7927, "step": 31015000 }, { "epoch": 89.78, "learning_rate": 5.127571843738422e-06, "loss": 1.7983, "step": 31015500 }, { "epoch": 89.78, "learning_rate": 5.12684964338644e-06, "loss": 1.7992, "step": 31016000 }, { "epoch": 89.78, "learning_rate": 5.126125995739163e-06, "loss": 1.7725, "step": 31016500 }, { "epoch": 89.78, "learning_rate": 5.125402348091886e-06, "loss": 1.7999, "step": 31017000 }, { "epoch": 89.78, "learning_rate": 5.12467870044461e-06, "loss": 1.8036, "step": 31017500 }, { "epoch": 89.78, "learning_rate": 5.123955052797333e-06, "loss": 1.8026, "step": 31018000 }, { "epoch": 89.79, "learning_rate": 5.123231405150056e-06, "loss": 1.8066, "step": 31018500 }, { "epoch": 89.79, "learning_rate": 5.122507757502779e-06, "loss": 1.7892, "step": 31019000 }, { "epoch": 89.79, "learning_rate": 5.121784109855502e-06, "loss": 1.8035, "step": 31019500 }, { "epoch": 89.79, "learning_rate": 5.121060462208226e-06, "loss": 1.7748, "step": 31020000 }, { "epoch": 89.79, "learning_rate": 5.120336814560948e-06, "loss": 1.8117, "step": 31020500 }, { "epoch": 89.79, "learning_rate": 5.119614614208966e-06, "loss": 1.7964, "step": 31021000 }, { "epoch": 89.79, "learning_rate": 5.1188909665616894e-06, "loss": 1.8089, "step": 31021500 }, { "epoch": 89.8, "learning_rate": 5.118167318914413e-06, "loss": 1.8005, "step": 31022000 }, { "epoch": 89.8, "learning_rate": 5.1174436712671364e-06, "loss": 1.7662, "step": 31022500 }, { "epoch": 89.8, "learning_rate": 5.1167200236198595e-06, "loss": 1.8157, "step": 31023000 }, { "epoch": 89.8, "learning_rate": 5.115996375972583e-06, "loss": 1.8105, "step": 31023500 }, { "epoch": 89.8, "learning_rate": 5.115272728325306e-06, "loss": 1.7839, "step": 31024000 }, { "epoch": 89.8, "learning_rate": 5.114550527973324e-06, "loss": 1.7666, "step": 31024500 }, { "epoch": 89.8, "learning_rate": 5.113826880326047e-06, "loss": 1.8154, "step": 31025000 }, { "epoch": 89.81, "learning_rate": 5.11310323267877e-06, "loss": 1.7914, "step": 31025500 }, { "epoch": 89.81, "learning_rate": 5.112379585031493e-06, "loss": 1.8192, "step": 31026000 }, { "epoch": 89.81, "learning_rate": 5.111655937384217e-06, "loss": 1.8151, "step": 31026500 }, { "epoch": 89.81, "learning_rate": 5.110932289736939e-06, "loss": 1.7558, "step": 31027000 }, { "epoch": 89.81, "learning_rate": 5.110208642089663e-06, "loss": 1.8293, "step": 31027500 }, { "epoch": 89.81, "learning_rate": 5.10948644173768e-06, "loss": 1.7896, "step": 31028000 }, { "epoch": 89.81, "learning_rate": 5.108762794090404e-06, "loss": 1.7697, "step": 31028500 }, { "epoch": 89.82, "learning_rate": 5.108039146443127e-06, "loss": 1.7629, "step": 31029000 }, { "epoch": 89.82, "learning_rate": 5.1073169460911455e-06, "loss": 1.775, "step": 31029500 }, { "epoch": 89.82, "learning_rate": 5.106593298443868e-06, "loss": 1.7662, "step": 31030000 }, { "epoch": 89.82, "learning_rate": 5.105869650796592e-06, "loss": 1.8131, "step": 31030500 }, { "epoch": 89.82, "learning_rate": 5.105146003149315e-06, "loss": 1.8293, "step": 31031000 }, { "epoch": 89.82, "learning_rate": 5.104422355502038e-06, "loss": 1.783, "step": 31031500 }, { "epoch": 89.82, "learning_rate": 5.103698707854762e-06, "loss": 1.8036, "step": 31032000 }, { "epoch": 89.83, "learning_rate": 5.102975060207484e-06, "loss": 1.804, "step": 31032500 }, { "epoch": 89.83, "learning_rate": 5.102252859855503e-06, "loss": 1.7901, "step": 31033000 }, { "epoch": 89.83, "learning_rate": 5.101529212208225e-06, "loss": 1.8096, "step": 31033500 }, { "epoch": 89.83, "learning_rate": 5.100805564560949e-06, "loss": 1.8027, "step": 31034000 }, { "epoch": 89.83, "learning_rate": 5.100081916913671e-06, "loss": 1.7731, "step": 31034500 }, { "epoch": 89.83, "learning_rate": 5.099358269266395e-06, "loss": 1.803, "step": 31035000 }, { "epoch": 89.84, "learning_rate": 5.098634621619119e-06, "loss": 1.8024, "step": 31035500 }, { "epoch": 89.84, "learning_rate": 5.097910973971841e-06, "loss": 1.8052, "step": 31036000 }, { "epoch": 89.84, "learning_rate": 5.097187326324565e-06, "loss": 1.7962, "step": 31036500 }, { "epoch": 89.84, "learning_rate": 5.0964651259725826e-06, "loss": 1.7983, "step": 31037000 }, { "epoch": 89.84, "learning_rate": 5.0957414783253065e-06, "loss": 1.7771, "step": 31037500 }, { "epoch": 89.84, "learning_rate": 5.095017830678029e-06, "loss": 1.7917, "step": 31038000 }, { "epoch": 89.84, "learning_rate": 5.094294183030753e-06, "loss": 1.8234, "step": 31038500 }, { "epoch": 89.85, "learning_rate": 5.093570535383476e-06, "loss": 1.7986, "step": 31039000 }, { "epoch": 89.85, "learning_rate": 5.092846887736199e-06, "loss": 1.7761, "step": 31039500 }, { "epoch": 89.85, "learning_rate": 5.092123240088923e-06, "loss": 1.7962, "step": 31040000 }, { "epoch": 89.85, "learning_rate": 5.09140103973694e-06, "loss": 1.8111, "step": 31040500 }, { "epoch": 89.85, "learning_rate": 5.090677392089663e-06, "loss": 1.8011, "step": 31041000 }, { "epoch": 89.85, "learning_rate": 5.089953744442386e-06, "loss": 1.7815, "step": 31041500 }, { "epoch": 89.85, "learning_rate": 5.08923009679511e-06, "loss": 1.7855, "step": 31042000 }, { "epoch": 89.86, "learning_rate": 5.088506449147832e-06, "loss": 1.809, "step": 31042500 }, { "epoch": 89.86, "learning_rate": 5.087782801500556e-06, "loss": 1.7832, "step": 31043000 }, { "epoch": 89.86, "learning_rate": 5.0870606011485735e-06, "loss": 1.8157, "step": 31043500 }, { "epoch": 89.86, "learning_rate": 5.086336953501297e-06, "loss": 1.808, "step": 31044000 }, { "epoch": 89.86, "learning_rate": 5.0856133058540205e-06, "loss": 1.8058, "step": 31044500 }, { "epoch": 89.86, "learning_rate": 5.0848896582067436e-06, "loss": 1.8014, "step": 31045000 }, { "epoch": 89.86, "learning_rate": 5.084166010559467e-06, "loss": 1.7971, "step": 31045500 }, { "epoch": 89.87, "learning_rate": 5.08344236291219e-06, "loss": 1.8013, "step": 31046000 }, { "epoch": 89.87, "learning_rate": 5.082718715264914e-06, "loss": 1.7971, "step": 31046500 }, { "epoch": 89.87, "learning_rate": 5.081995067617636e-06, "loss": 1.7918, "step": 31047000 }, { "epoch": 89.87, "learning_rate": 5.08127141997036e-06, "loss": 1.7778, "step": 31047500 }, { "epoch": 89.87, "learning_rate": 5.080547772323083e-06, "loss": 1.7877, "step": 31048000 }, { "epoch": 89.87, "learning_rate": 5.079825571971101e-06, "loss": 1.7932, "step": 31048500 }, { "epoch": 89.87, "learning_rate": 5.079103371619118e-06, "loss": 1.7995, "step": 31049000 }, { "epoch": 89.88, "learning_rate": 5.078379723971842e-06, "loss": 1.7883, "step": 31049500 }, { "epoch": 89.88, "learning_rate": 5.077656076324564e-06, "loss": 1.7742, "step": 31050000 }, { "epoch": 89.88, "learning_rate": 5.076932428677288e-06, "loss": 1.7876, "step": 31050500 }, { "epoch": 89.88, "learning_rate": 5.076208781030011e-06, "loss": 1.7816, "step": 31051000 }, { "epoch": 89.88, "learning_rate": 5.0754851333827345e-06, "loss": 1.8116, "step": 31051500 }, { "epoch": 89.88, "learning_rate": 5.0747614857354576e-06, "loss": 1.7867, "step": 31052000 }, { "epoch": 89.88, "learning_rate": 5.074037838088181e-06, "loss": 1.7992, "step": 31052500 }, { "epoch": 89.89, "learning_rate": 5.073314190440905e-06, "loss": 1.7951, "step": 31053000 }, { "epoch": 89.89, "learning_rate": 5.072591990088922e-06, "loss": 1.7902, "step": 31053500 }, { "epoch": 89.89, "learning_rate": 5.071868342441646e-06, "loss": 1.8223, "step": 31054000 }, { "epoch": 89.89, "learning_rate": 5.071144694794369e-06, "loss": 1.7848, "step": 31054500 }, { "epoch": 89.89, "learning_rate": 5.070421047147092e-06, "loss": 1.7924, "step": 31055000 }, { "epoch": 89.89, "learning_rate": 5.069697399499815e-06, "loss": 1.8117, "step": 31055500 }, { "epoch": 89.89, "learning_rate": 5.068975199147833e-06, "loss": 1.8007, "step": 31056000 }, { "epoch": 89.9, "learning_rate": 5.068251551500556e-06, "loss": 1.7975, "step": 31056500 }, { "epoch": 89.9, "learning_rate": 5.067527903853279e-06, "loss": 1.788, "step": 31057000 }, { "epoch": 89.9, "learning_rate": 5.066804256206002e-06, "loss": 1.7742, "step": 31057500 }, { "epoch": 89.9, "learning_rate": 5.0660820558540204e-06, "loss": 1.801, "step": 31058000 }, { "epoch": 89.9, "learning_rate": 5.065361302797333e-06, "loss": 1.814, "step": 31058500 }, { "epoch": 89.9, "learning_rate": 5.064637655150056e-06, "loss": 1.7834, "step": 31059000 }, { "epoch": 89.9, "learning_rate": 5.063914007502779e-06, "loss": 1.7889, "step": 31059500 }, { "epoch": 89.91, "learning_rate": 5.063190359855502e-06, "loss": 1.7877, "step": 31060000 }, { "epoch": 89.91, "learning_rate": 5.062466712208226e-06, "loss": 1.7991, "step": 31060500 }, { "epoch": 89.91, "learning_rate": 5.061743064560949e-06, "loss": 1.7721, "step": 31061000 }, { "epoch": 89.91, "learning_rate": 5.061019416913672e-06, "loss": 1.7735, "step": 31061500 }, { "epoch": 89.91, "learning_rate": 5.060295769266395e-06, "loss": 1.8069, "step": 31062000 }, { "epoch": 89.91, "learning_rate": 5.059572121619118e-06, "loss": 1.7819, "step": 31062500 }, { "epoch": 89.91, "learning_rate": 5.058848473971841e-06, "loss": 1.7999, "step": 31063000 }, { "epoch": 89.92, "learning_rate": 5.058124826324565e-06, "loss": 1.8044, "step": 31063500 }, { "epoch": 89.92, "learning_rate": 5.0574026259725824e-06, "loss": 1.7907, "step": 31064000 }, { "epoch": 89.92, "learning_rate": 5.0566789783253055e-06, "loss": 1.7785, "step": 31064500 }, { "epoch": 89.92, "learning_rate": 5.0559553306780295e-06, "loss": 1.7746, "step": 31065000 }, { "epoch": 89.92, "learning_rate": 5.0552316830307525e-06, "loss": 1.7963, "step": 31065500 }, { "epoch": 89.92, "learning_rate": 5.054508035383476e-06, "loss": 1.797, "step": 31066000 }, { "epoch": 89.92, "learning_rate": 5.053784387736199e-06, "loss": 1.7972, "step": 31066500 }, { "epoch": 89.93, "learning_rate": 5.053060740088922e-06, "loss": 1.7928, "step": 31067000 }, { "epoch": 89.93, "learning_rate": 5.052337092441646e-06, "loss": 1.8019, "step": 31067500 }, { "epoch": 89.93, "learning_rate": 5.051613444794368e-06, "loss": 1.7654, "step": 31068000 }, { "epoch": 89.93, "learning_rate": 5.050889797147092e-06, "loss": 1.7965, "step": 31068500 }, { "epoch": 89.93, "learning_rate": 5.050167596795109e-06, "loss": 1.7858, "step": 31069000 }, { "epoch": 89.93, "learning_rate": 5.049443949147833e-06, "loss": 1.8038, "step": 31069500 }, { "epoch": 89.93, "learning_rate": 5.048720301500556e-06, "loss": 1.7854, "step": 31070000 }, { "epoch": 89.94, "learning_rate": 5.047996653853279e-06, "loss": 1.8226, "step": 31070500 }, { "epoch": 89.94, "learning_rate": 5.0472744535012965e-06, "loss": 1.8072, "step": 31071000 }, { "epoch": 89.94, "learning_rate": 5.04655080585402e-06, "loss": 1.8024, "step": 31071500 }, { "epoch": 89.94, "learning_rate": 5.0458271582067435e-06, "loss": 1.7958, "step": 31072000 }, { "epoch": 89.94, "learning_rate": 5.0451035105594665e-06, "loss": 1.7608, "step": 31072500 }, { "epoch": 89.94, "learning_rate": 5.0443798629121905e-06, "loss": 1.7754, "step": 31073000 }, { "epoch": 89.95, "learning_rate": 5.043656215264913e-06, "loss": 1.8251, "step": 31073500 }, { "epoch": 89.95, "learning_rate": 5.042932567617637e-06, "loss": 1.7764, "step": 31074000 }, { "epoch": 89.95, "learning_rate": 5.04220891997036e-06, "loss": 1.7879, "step": 31074500 }, { "epoch": 89.95, "learning_rate": 5.041486719618378e-06, "loss": 1.7692, "step": 31075000 }, { "epoch": 89.95, "learning_rate": 5.0407630719711e-06, "loss": 1.7914, "step": 31075500 }, { "epoch": 89.95, "learning_rate": 5.040039424323824e-06, "loss": 1.8386, "step": 31076000 }, { "epoch": 89.95, "learning_rate": 5.039315776676547e-06, "loss": 1.801, "step": 31076500 }, { "epoch": 89.96, "learning_rate": 5.038595023619859e-06, "loss": 1.7975, "step": 31077000 }, { "epoch": 89.96, "learning_rate": 5.037871375972582e-06, "loss": 1.7818, "step": 31077500 }, { "epoch": 89.96, "learning_rate": 5.037147728325306e-06, "loss": 1.8002, "step": 31078000 }, { "epoch": 89.96, "learning_rate": 5.0364240806780286e-06, "loss": 1.796, "step": 31078500 }, { "epoch": 89.96, "learning_rate": 5.0357004330307525e-06, "loss": 1.8078, "step": 31079000 }, { "epoch": 89.96, "learning_rate": 5.0349767853834756e-06, "loss": 1.8191, "step": 31079500 }, { "epoch": 89.96, "learning_rate": 5.034253137736199e-06, "loss": 1.8095, "step": 31080000 }, { "epoch": 89.97, "learning_rate": 5.0335294900889226e-06, "loss": 1.7904, "step": 31080500 }, { "epoch": 89.97, "learning_rate": 5.032805842441645e-06, "loss": 1.7769, "step": 31081000 }, { "epoch": 89.97, "learning_rate": 5.032083642089663e-06, "loss": 1.814, "step": 31081500 }, { "epoch": 89.97, "learning_rate": 5.031359994442386e-06, "loss": 1.791, "step": 31082000 }, { "epoch": 89.97, "learning_rate": 5.03063634679511e-06, "loss": 1.7928, "step": 31082500 }, { "epoch": 89.97, "learning_rate": 5.029912699147832e-06, "loss": 1.8185, "step": 31083000 }, { "epoch": 89.97, "learning_rate": 5.029189051500556e-06, "loss": 1.7939, "step": 31083500 }, { "epoch": 89.98, "learning_rate": 5.028466851148573e-06, "loss": 1.7925, "step": 31084000 }, { "epoch": 89.98, "learning_rate": 5.027743203501297e-06, "loss": 1.7887, "step": 31084500 }, { "epoch": 89.98, "learning_rate": 5.0270210031493145e-06, "loss": 1.8083, "step": 31085000 }, { "epoch": 89.98, "learning_rate": 5.0262973555020384e-06, "loss": 1.7956, "step": 31085500 }, { "epoch": 89.98, "learning_rate": 5.025573707854761e-06, "loss": 1.7919, "step": 31086000 }, { "epoch": 89.98, "learning_rate": 5.024850060207485e-06, "loss": 1.7945, "step": 31086500 }, { "epoch": 89.98, "learning_rate": 5.024126412560208e-06, "loss": 1.7981, "step": 31087000 }, { "epoch": 89.99, "learning_rate": 5.023402764912931e-06, "loss": 1.7997, "step": 31087500 }, { "epoch": 89.99, "learning_rate": 5.022679117265655e-06, "loss": 1.7895, "step": 31088000 }, { "epoch": 89.99, "learning_rate": 5.021955469618377e-06, "loss": 1.7703, "step": 31088500 }, { "epoch": 89.99, "learning_rate": 5.021233269266396e-06, "loss": 1.8148, "step": 31089000 }, { "epoch": 89.99, "learning_rate": 5.020511068914413e-06, "loss": 1.791, "step": 31089500 }, { "epoch": 89.99, "learning_rate": 5.019787421267136e-06, "loss": 1.7994, "step": 31090000 }, { "epoch": 89.99, "learning_rate": 5.019063773619859e-06, "loss": 1.7964, "step": 31090500 }, { "epoch": 90.0, "learning_rate": 5.018340125972583e-06, "loss": 1.795, "step": 31091000 }, { "epoch": 90.0, "learning_rate": 5.0176164783253054e-06, "loss": 1.7833, "step": 31091500 }, { "epoch": 90.0, "learning_rate": 5.016892830678029e-06, "loss": 1.794, "step": 31092000 }, { "epoch": 90.0, "eval_accuracy": 0.6908986811886525, "eval_accuracy_mlm": 0.6604567266583212, "eval_accuracy_nsp": 0.8540649791698175, "eval_loss": 2.221858263015747, "eval_runtime": 331.9118, "eval_samples_per_second": 1314.765, "eval_steps_per_second": 54.783, "step": 31092480 }, { "epoch": 90.0, "learning_rate": 5.0161691830307524e-06, "loss": 1.7849, "step": 31092500 }, { "epoch": 90.0, "learning_rate": 5.0154455353834755e-06, "loss": 1.7706, "step": 31093000 }, { "epoch": 90.0, "learning_rate": 5.0147218877361994e-06, "loss": 1.79, "step": 31093500 }, { "epoch": 90.0, "learning_rate": 5.013999687384217e-06, "loss": 1.789, "step": 31094000 }, { "epoch": 90.01, "learning_rate": 5.013277487032234e-06, "loss": 1.7879, "step": 31094500 }, { "epoch": 90.01, "learning_rate": 5.012553839384958e-06, "loss": 1.7895, "step": 31095000 }, { "epoch": 90.01, "learning_rate": 5.011831639032975e-06, "loss": 1.7814, "step": 31095500 }, { "epoch": 90.01, "learning_rate": 5.011107991385699e-06, "loss": 1.7735, "step": 31096000 }, { "epoch": 90.01, "learning_rate": 5.010384343738421e-06, "loss": 1.7798, "step": 31096500 }, { "epoch": 90.01, "learning_rate": 5.009660696091145e-06, "loss": 1.7664, "step": 31097000 }, { "epoch": 90.01, "learning_rate": 5.008937048443868e-06, "loss": 1.8069, "step": 31097500 }, { "epoch": 90.02, "learning_rate": 5.008213400796591e-06, "loss": 1.8294, "step": 31098000 }, { "epoch": 90.02, "learning_rate": 5.007489753149315e-06, "loss": 1.8206, "step": 31098500 }, { "epoch": 90.02, "learning_rate": 5.0067661055020375e-06, "loss": 1.7878, "step": 31099000 }, { "epoch": 90.02, "learning_rate": 5.0060439051500565e-06, "loss": 1.779, "step": 31099500 }, { "epoch": 90.02, "learning_rate": 5.005320257502779e-06, "loss": 1.7837, "step": 31100000 }, { "epoch": 90.02, "learning_rate": 5.004596609855503e-06, "loss": 1.7972, "step": 31100500 }, { "epoch": 90.02, "learning_rate": 5.003872962208226e-06, "loss": 1.7919, "step": 31101000 }, { "epoch": 90.03, "learning_rate": 5.003149314560949e-06, "loss": 1.7866, "step": 31101500 }, { "epoch": 90.03, "learning_rate": 5.002425666913672e-06, "loss": 1.7474, "step": 31102000 }, { "epoch": 90.03, "learning_rate": 5.00170346656169e-06, "loss": 1.7926, "step": 31102500 }, { "epoch": 90.03, "learning_rate": 5.000979818914413e-06, "loss": 1.7904, "step": 31103000 }, { "epoch": 90.03, "learning_rate": 5.000257618562431e-06, "loss": 1.797, "step": 31103500 }, { "epoch": 90.03, "learning_rate": 4.999533970915154e-06, "loss": 1.7751, "step": 31104000 }, { "epoch": 90.03, "learning_rate": 4.998810323267877e-06, "loss": 1.787, "step": 31104500 }, { "epoch": 90.04, "learning_rate": 4.9980866756206e-06, "loss": 1.7796, "step": 31105000 }, { "epoch": 90.04, "learning_rate": 4.9973630279733235e-06, "loss": 1.7835, "step": 31105500 }, { "epoch": 90.04, "learning_rate": 4.996639380326047e-06, "loss": 1.779, "step": 31106000 }, { "epoch": 90.04, "learning_rate": 4.99591573267877e-06, "loss": 1.7942, "step": 31106500 }, { "epoch": 90.04, "learning_rate": 4.9951920850314936e-06, "loss": 1.7923, "step": 31107000 }, { "epoch": 90.04, "learning_rate": 4.994468437384217e-06, "loss": 1.779, "step": 31107500 }, { "epoch": 90.04, "learning_rate": 4.99374478973694e-06, "loss": 1.7857, "step": 31108000 }, { "epoch": 90.05, "learning_rate": 4.993021142089663e-06, "loss": 1.7934, "step": 31108500 }, { "epoch": 90.05, "learning_rate": 4.992297494442386e-06, "loss": 1.7854, "step": 31109000 }, { "epoch": 90.05, "learning_rate": 4.991575294090404e-06, "loss": 1.7697, "step": 31109500 }, { "epoch": 90.05, "learning_rate": 4.990851646443127e-06, "loss": 1.782, "step": 31110000 }, { "epoch": 90.05, "learning_rate": 4.990127998795851e-06, "loss": 1.7863, "step": 31110500 }, { "epoch": 90.05, "learning_rate": 4.989404351148574e-06, "loss": 1.7689, "step": 31111000 }, { "epoch": 90.06, "learning_rate": 4.988680703501297e-06, "loss": 1.7805, "step": 31111500 }, { "epoch": 90.06, "learning_rate": 4.98795705585402e-06, "loss": 1.8069, "step": 31112000 }, { "epoch": 90.06, "learning_rate": 4.987233408206743e-06, "loss": 1.8034, "step": 31112500 }, { "epoch": 90.06, "learning_rate": 4.986509760559466e-06, "loss": 1.7872, "step": 31113000 }, { "epoch": 90.06, "learning_rate": 4.9857875602074845e-06, "loss": 1.7749, "step": 31113500 }, { "epoch": 90.06, "learning_rate": 4.9850639125602076e-06, "loss": 1.7911, "step": 31114000 }, { "epoch": 90.06, "learning_rate": 4.984340264912931e-06, "loss": 1.782, "step": 31114500 }, { "epoch": 90.07, "learning_rate": 4.983618064560949e-06, "loss": 1.8004, "step": 31115000 }, { "epoch": 90.07, "learning_rate": 4.982894416913672e-06, "loss": 1.811, "step": 31115500 }, { "epoch": 90.07, "learning_rate": 4.982170769266395e-06, "loss": 1.8198, "step": 31116000 }, { "epoch": 90.07, "learning_rate": 4.981447121619119e-06, "loss": 1.7759, "step": 31116500 }, { "epoch": 90.07, "learning_rate": 4.980723473971842e-06, "loss": 1.783, "step": 31117000 }, { "epoch": 90.07, "learning_rate": 4.979999826324565e-06, "loss": 1.7723, "step": 31117500 }, { "epoch": 90.07, "learning_rate": 4.979276178677288e-06, "loss": 1.7938, "step": 31118000 }, { "epoch": 90.08, "learning_rate": 4.978552531030011e-06, "loss": 1.7767, "step": 31118500 }, { "epoch": 90.08, "learning_rate": 4.977828883382735e-06, "loss": 1.7954, "step": 31119000 }, { "epoch": 90.08, "learning_rate": 4.977106683030752e-06, "loss": 1.797, "step": 31119500 }, { "epoch": 90.08, "learning_rate": 4.976383035383475e-06, "loss": 1.7555, "step": 31120000 }, { "epoch": 90.08, "learning_rate": 4.9756593877361985e-06, "loss": 1.8036, "step": 31120500 }, { "epoch": 90.08, "learning_rate": 4.974935740088922e-06, "loss": 1.7866, "step": 31121000 }, { "epoch": 90.08, "learning_rate": 4.9742120924416455e-06, "loss": 1.7826, "step": 31121500 }, { "epoch": 90.09, "learning_rate": 4.973488444794369e-06, "loss": 1.7806, "step": 31122000 }, { "epoch": 90.09, "learning_rate": 4.972764797147092e-06, "loss": 1.8119, "step": 31122500 }, { "epoch": 90.09, "learning_rate": 4.972041149499815e-06, "loss": 1.7949, "step": 31123000 }, { "epoch": 90.09, "learning_rate": 4.971318949147833e-06, "loss": 1.7946, "step": 31123500 }, { "epoch": 90.09, "learning_rate": 4.970595301500556e-06, "loss": 1.7537, "step": 31124000 }, { "epoch": 90.09, "learning_rate": 4.969871653853279e-06, "loss": 1.8041, "step": 31124500 }, { "epoch": 90.09, "learning_rate": 4.969149453501297e-06, "loss": 1.7843, "step": 31125000 }, { "epoch": 90.1, "learning_rate": 4.96842580585402e-06, "loss": 1.7803, "step": 31125500 }, { "epoch": 90.1, "learning_rate": 4.967702158206743e-06, "loss": 1.7733, "step": 31126000 }, { "epoch": 90.1, "learning_rate": 4.966978510559467e-06, "loss": 1.7927, "step": 31126500 }, { "epoch": 90.1, "learning_rate": 4.966254862912189e-06, "loss": 1.7878, "step": 31127000 }, { "epoch": 90.1, "learning_rate": 4.965531215264913e-06, "loss": 1.7845, "step": 31127500 }, { "epoch": 90.1, "learning_rate": 4.9648075676176364e-06, "loss": 1.7796, "step": 31128000 }, { "epoch": 90.1, "learning_rate": 4.9640839199703595e-06, "loss": 1.7939, "step": 31128500 }, { "epoch": 90.11, "learning_rate": 4.9633602723230834e-06, "loss": 1.8164, "step": 31129000 }, { "epoch": 90.11, "learning_rate": 4.962638071971101e-06, "loss": 1.7666, "step": 31129500 }, { "epoch": 90.11, "learning_rate": 4.961914424323824e-06, "loss": 1.7706, "step": 31130000 }, { "epoch": 90.11, "learning_rate": 4.961190776676547e-06, "loss": 1.7703, "step": 31130500 }, { "epoch": 90.11, "learning_rate": 4.960468576324565e-06, "loss": 1.7856, "step": 31131000 }, { "epoch": 90.11, "learning_rate": 4.959744928677288e-06, "loss": 1.8459, "step": 31131500 }, { "epoch": 90.11, "learning_rate": 4.959021281030012e-06, "loss": 1.7823, "step": 31132000 }, { "epoch": 90.12, "learning_rate": 4.958297633382734e-06, "loss": 1.7816, "step": 31132500 }, { "epoch": 90.12, "learning_rate": 4.957573985735458e-06, "loss": 1.8029, "step": 31133000 }, { "epoch": 90.12, "learning_rate": 4.95685033808818e-06, "loss": 1.7755, "step": 31133500 }, { "epoch": 90.12, "learning_rate": 4.956126690440904e-06, "loss": 1.7814, "step": 31134000 }, { "epoch": 90.12, "learning_rate": 4.955403042793628e-06, "loss": 1.8036, "step": 31134500 }, { "epoch": 90.12, "learning_rate": 4.9546808424416455e-06, "loss": 1.8075, "step": 31135000 }, { "epoch": 90.12, "learning_rate": 4.9539571947943685e-06, "loss": 1.7824, "step": 31135500 }, { "epoch": 90.13, "learning_rate": 4.953233547147092e-06, "loss": 1.7896, "step": 31136000 }, { "epoch": 90.13, "learning_rate": 4.9525098994998155e-06, "loss": 1.775, "step": 31136500 }, { "epoch": 90.13, "learning_rate": 4.951787699147833e-06, "loss": 1.7894, "step": 31137000 }, { "epoch": 90.13, "learning_rate": 4.951064051500557e-06, "loss": 1.7945, "step": 31137500 }, { "epoch": 90.13, "learning_rate": 4.950340403853279e-06, "loss": 1.7907, "step": 31138000 }, { "epoch": 90.13, "learning_rate": 4.949616756206003e-06, "loss": 1.7874, "step": 31138500 }, { "epoch": 90.13, "learning_rate": 4.948893108558725e-06, "loss": 1.7964, "step": 31139000 }, { "epoch": 90.14, "learning_rate": 4.948169460911449e-06, "loss": 1.7931, "step": 31139500 }, { "epoch": 90.14, "learning_rate": 4.947447260559466e-06, "loss": 1.8132, "step": 31140000 }, { "epoch": 90.14, "learning_rate": 4.94672361291219e-06, "loss": 1.7813, "step": 31140500 }, { "epoch": 90.14, "learning_rate": 4.945999965264913e-06, "loss": 1.7923, "step": 31141000 }, { "epoch": 90.14, "learning_rate": 4.945276317617636e-06, "loss": 1.7839, "step": 31141500 }, { "epoch": 90.14, "learning_rate": 4.944554117265654e-06, "loss": 1.7957, "step": 31142000 }, { "epoch": 90.14, "learning_rate": 4.9438304696183776e-06, "loss": 1.7972, "step": 31142500 }, { "epoch": 90.15, "learning_rate": 4.943106821971101e-06, "loss": 1.7803, "step": 31143000 }, { "epoch": 90.15, "learning_rate": 4.942383174323824e-06, "loss": 1.7971, "step": 31143500 }, { "epoch": 90.15, "learning_rate": 4.941659526676548e-06, "loss": 1.818, "step": 31144000 }, { "epoch": 90.15, "learning_rate": 4.94093587902927e-06, "loss": 1.78, "step": 31144500 }, { "epoch": 90.15, "learning_rate": 4.940212231381994e-06, "loss": 1.7981, "step": 31145000 }, { "epoch": 90.15, "learning_rate": 4.939488583734717e-06, "loss": 1.8122, "step": 31145500 }, { "epoch": 90.15, "learning_rate": 4.938766383382735e-06, "loss": 1.766, "step": 31146000 }, { "epoch": 90.16, "learning_rate": 4.938044183030752e-06, "loss": 1.8091, "step": 31146500 }, { "epoch": 90.16, "learning_rate": 4.937320535383476e-06, "loss": 1.767, "step": 31147000 }, { "epoch": 90.16, "learning_rate": 4.936596887736198e-06, "loss": 1.7652, "step": 31147500 }, { "epoch": 90.16, "learning_rate": 4.935873240088922e-06, "loss": 1.7852, "step": 31148000 }, { "epoch": 90.16, "learning_rate": 4.935149592441645e-06, "loss": 1.7873, "step": 31148500 }, { "epoch": 90.16, "learning_rate": 4.9344259447943685e-06, "loss": 1.7576, "step": 31149000 }, { "epoch": 90.17, "learning_rate": 4.933703744442386e-06, "loss": 1.7617, "step": 31149500 }, { "epoch": 90.17, "learning_rate": 4.93298009679511e-06, "loss": 1.7655, "step": 31150000 }, { "epoch": 90.17, "learning_rate": 4.932256449147833e-06, "loss": 1.7841, "step": 31150500 }, { "epoch": 90.17, "learning_rate": 4.931532801500556e-06, "loss": 1.7695, "step": 31151000 }, { "epoch": 90.17, "learning_rate": 4.930810601148574e-06, "loss": 1.7861, "step": 31151500 }, { "epoch": 90.17, "learning_rate": 4.930086953501297e-06, "loss": 1.7784, "step": 31152000 }, { "epoch": 90.17, "learning_rate": 4.92936330585402e-06, "loss": 1.7752, "step": 31152500 }, { "epoch": 90.18, "learning_rate": 4.928641105502038e-06, "loss": 1.7681, "step": 31153000 }, { "epoch": 90.18, "learning_rate": 4.927917457854761e-06, "loss": 1.7669, "step": 31153500 }, { "epoch": 90.18, "learning_rate": 4.927193810207484e-06, "loss": 1.7655, "step": 31154000 }, { "epoch": 90.18, "learning_rate": 4.926470162560208e-06, "loss": 1.7694, "step": 31154500 }, { "epoch": 90.18, "learning_rate": 4.9257465149129305e-06, "loss": 1.7786, "step": 31155000 }, { "epoch": 90.18, "learning_rate": 4.925022867265654e-06, "loss": 1.8011, "step": 31155500 }, { "epoch": 90.18, "learning_rate": 4.9242992196183775e-06, "loss": 1.8095, "step": 31156000 }, { "epoch": 90.19, "learning_rate": 4.923575571971101e-06, "loss": 1.7838, "step": 31156500 }, { "epoch": 90.19, "learning_rate": 4.922851924323824e-06, "loss": 1.8045, "step": 31157000 }, { "epoch": 90.19, "learning_rate": 4.922129723971842e-06, "loss": 1.7851, "step": 31157500 }, { "epoch": 90.19, "learning_rate": 4.921406076324565e-06, "loss": 1.8122, "step": 31158000 }, { "epoch": 90.19, "learning_rate": 4.920683875972583e-06, "loss": 1.8119, "step": 31158500 }, { "epoch": 90.19, "learning_rate": 4.919960228325306e-06, "loss": 1.766, "step": 31159000 }, { "epoch": 90.19, "learning_rate": 4.919236580678029e-06, "loss": 1.7859, "step": 31159500 }, { "epoch": 90.2, "learning_rate": 4.918512933030752e-06, "loss": 1.768, "step": 31160000 }, { "epoch": 90.2, "learning_rate": 4.91779073267877e-06, "loss": 1.7731, "step": 31160500 }, { "epoch": 90.2, "learning_rate": 4.917067085031493e-06, "loss": 1.7872, "step": 31161000 }, { "epoch": 90.2, "learning_rate": 4.9163434373842164e-06, "loss": 1.7871, "step": 31161500 }, { "epoch": 90.2, "learning_rate": 4.9156197897369395e-06, "loss": 1.7707, "step": 31162000 }, { "epoch": 90.2, "learning_rate": 4.9148961420896634e-06, "loss": 1.7767, "step": 31162500 }, { "epoch": 90.2, "learning_rate": 4.9141724944423865e-06, "loss": 1.8039, "step": 31163000 }, { "epoch": 90.21, "learning_rate": 4.91344884679511e-06, "loss": 1.7665, "step": 31163500 }, { "epoch": 90.21, "learning_rate": 4.912725199147833e-06, "loss": 1.7727, "step": 31164000 }, { "epoch": 90.21, "learning_rate": 4.912002998795851e-06, "loss": 1.7988, "step": 31164500 }, { "epoch": 90.21, "learning_rate": 4.911279351148574e-06, "loss": 1.7831, "step": 31165000 }, { "epoch": 90.21, "learning_rate": 4.910555703501297e-06, "loss": 1.779, "step": 31165500 }, { "epoch": 90.21, "learning_rate": 4.90983205585402e-06, "loss": 1.7779, "step": 31166000 }, { "epoch": 90.21, "learning_rate": 4.909109855502038e-06, "loss": 1.7852, "step": 31166500 }, { "epoch": 90.22, "learning_rate": 4.908386207854761e-06, "loss": 1.7909, "step": 31167000 }, { "epoch": 90.22, "learning_rate": 4.9076654547980735e-06, "loss": 1.7843, "step": 31167500 }, { "epoch": 90.22, "learning_rate": 4.9069418071507965e-06, "loss": 1.8014, "step": 31168000 }, { "epoch": 90.22, "learning_rate": 4.90621815950352e-06, "loss": 1.7705, "step": 31168500 }, { "epoch": 90.22, "learning_rate": 4.9054945118562436e-06, "loss": 1.7863, "step": 31169000 }, { "epoch": 90.22, "learning_rate": 4.904770864208967e-06, "loss": 1.7659, "step": 31169500 }, { "epoch": 90.22, "learning_rate": 4.90404721656169e-06, "loss": 1.8, "step": 31170000 }, { "epoch": 90.23, "learning_rate": 4.903323568914413e-06, "loss": 1.7729, "step": 31170500 }, { "epoch": 90.23, "learning_rate": 4.902599921267136e-06, "loss": 1.7622, "step": 31171000 }, { "epoch": 90.23, "learning_rate": 4.901876273619859e-06, "loss": 1.7951, "step": 31171500 }, { "epoch": 90.23, "learning_rate": 4.901152625972583e-06, "loss": 1.8082, "step": 31172000 }, { "epoch": 90.23, "learning_rate": 4.900428978325306e-06, "loss": 1.7834, "step": 31172500 }, { "epoch": 90.23, "learning_rate": 4.899705330678029e-06, "loss": 1.7776, "step": 31173000 }, { "epoch": 90.23, "learning_rate": 4.898981683030752e-06, "loss": 1.7625, "step": 31173500 }, { "epoch": 90.24, "learning_rate": 4.898258035383475e-06, "loss": 1.7844, "step": 31174000 }, { "epoch": 90.24, "learning_rate": 4.897535835031493e-06, "loss": 1.7921, "step": 31174500 }, { "epoch": 90.24, "learning_rate": 4.896812187384216e-06, "loss": 1.765, "step": 31175000 }, { "epoch": 90.24, "learning_rate": 4.89608853973694e-06, "loss": 1.7972, "step": 31175500 }, { "epoch": 90.24, "learning_rate": 4.8953648920896625e-06, "loss": 1.7777, "step": 31176000 }, { "epoch": 90.24, "learning_rate": 4.8946412444423865e-06, "loss": 1.7826, "step": 31176500 }, { "epoch": 90.24, "learning_rate": 4.8939175967951096e-06, "loss": 1.8026, "step": 31177000 }, { "epoch": 90.25, "learning_rate": 4.893193949147833e-06, "loss": 1.797, "step": 31177500 }, { "epoch": 90.25, "learning_rate": 4.8924703015005566e-06, "loss": 1.7904, "step": 31178000 }, { "epoch": 90.25, "learning_rate": 4.891746653853279e-06, "loss": 1.8041, "step": 31178500 }, { "epoch": 90.25, "learning_rate": 4.891024453501297e-06, "loss": 1.7956, "step": 31179000 }, { "epoch": 90.25, "learning_rate": 4.89030080585402e-06, "loss": 1.7892, "step": 31179500 }, { "epoch": 90.25, "learning_rate": 4.889578605502038e-06, "loss": 1.7888, "step": 31180000 }, { "epoch": 90.25, "learning_rate": 4.888854957854761e-06, "loss": 1.792, "step": 31180500 }, { "epoch": 90.26, "learning_rate": 4.888131310207485e-06, "loss": 1.7778, "step": 31181000 }, { "epoch": 90.26, "learning_rate": 4.887407662560207e-06, "loss": 1.7914, "step": 31181500 }, { "epoch": 90.26, "learning_rate": 4.886684014912931e-06, "loss": 1.7802, "step": 31182000 }, { "epoch": 90.26, "learning_rate": 4.8859603672656535e-06, "loss": 1.7836, "step": 31182500 }, { "epoch": 90.26, "learning_rate": 4.885236719618377e-06, "loss": 1.8058, "step": 31183000 }, { "epoch": 90.26, "learning_rate": 4.884514519266395e-06, "loss": 1.7863, "step": 31183500 }, { "epoch": 90.26, "learning_rate": 4.883790871619119e-06, "loss": 1.789, "step": 31184000 }, { "epoch": 90.27, "learning_rate": 4.883067223971842e-06, "loss": 1.791, "step": 31184500 }, { "epoch": 90.27, "learning_rate": 4.882343576324565e-06, "loss": 1.8085, "step": 31185000 }, { "epoch": 90.27, "learning_rate": 4.881619928677289e-06, "loss": 1.7665, "step": 31185500 }, { "epoch": 90.27, "learning_rate": 4.880896281030011e-06, "loss": 1.7752, "step": 31186000 }, { "epoch": 90.27, "learning_rate": 4.880172633382735e-06, "loss": 1.7882, "step": 31186500 }, { "epoch": 90.27, "learning_rate": 4.879448985735458e-06, "loss": 1.7978, "step": 31187000 }, { "epoch": 90.28, "learning_rate": 4.878726785383476e-06, "loss": 1.7787, "step": 31187500 }, { "epoch": 90.28, "learning_rate": 4.878003137736198e-06, "loss": 1.8019, "step": 31188000 }, { "epoch": 90.28, "learning_rate": 4.877279490088922e-06, "loss": 1.8162, "step": 31188500 }, { "epoch": 90.28, "learning_rate": 4.876555842441645e-06, "loss": 1.7707, "step": 31189000 }, { "epoch": 90.28, "learning_rate": 4.875832194794368e-06, "loss": 1.7584, "step": 31189500 }, { "epoch": 90.28, "learning_rate": 4.8751099944423864e-06, "loss": 1.7962, "step": 31190000 }, { "epoch": 90.28, "learning_rate": 4.8743863467951095e-06, "loss": 1.7925, "step": 31190500 }, { "epoch": 90.29, "learning_rate": 4.8736626991478334e-06, "loss": 1.7944, "step": 31191000 }, { "epoch": 90.29, "learning_rate": 4.872939051500556e-06, "loss": 1.7859, "step": 31191500 }, { "epoch": 90.29, "learning_rate": 4.87221540385328e-06, "loss": 1.7875, "step": 31192000 }, { "epoch": 90.29, "learning_rate": 4.871493203501297e-06, "loss": 1.8118, "step": 31192500 }, { "epoch": 90.29, "learning_rate": 4.870771003149314e-06, "loss": 1.7886, "step": 31193000 }, { "epoch": 90.29, "learning_rate": 4.870047355502038e-06, "loss": 1.7953, "step": 31193500 }, { "epoch": 90.29, "learning_rate": 4.869323707854762e-06, "loss": 1.7771, "step": 31194000 }, { "epoch": 90.3, "learning_rate": 4.868600060207484e-06, "loss": 1.7902, "step": 31194500 }, { "epoch": 90.3, "learning_rate": 4.867876412560208e-06, "loss": 1.7969, "step": 31195000 }, { "epoch": 90.3, "learning_rate": 4.86715276491293e-06, "loss": 1.8057, "step": 31195500 }, { "epoch": 90.3, "learning_rate": 4.866429117265654e-06, "loss": 1.7889, "step": 31196000 }, { "epoch": 90.3, "learning_rate": 4.865705469618377e-06, "loss": 1.8185, "step": 31196500 }, { "epoch": 90.3, "learning_rate": 4.8649832692663954e-06, "loss": 1.8, "step": 31197000 }, { "epoch": 90.3, "learning_rate": 4.8642596216191185e-06, "loss": 1.7895, "step": 31197500 }, { "epoch": 90.31, "learning_rate": 4.863535973971842e-06, "loss": 1.762, "step": 31198000 }, { "epoch": 90.31, "learning_rate": 4.8628123263245655e-06, "loss": 1.7746, "step": 31198500 }, { "epoch": 90.31, "learning_rate": 4.862088678677288e-06, "loss": 1.7764, "step": 31199000 }, { "epoch": 90.31, "learning_rate": 4.861366478325306e-06, "loss": 1.7708, "step": 31199500 }, { "epoch": 90.31, "learning_rate": 4.860642830678029e-06, "loss": 1.774, "step": 31200000 }, { "epoch": 90.31, "learning_rate": 4.859919183030753e-06, "loss": 1.7945, "step": 31200500 }, { "epoch": 90.31, "learning_rate": 4.85919698267877e-06, "loss": 1.7779, "step": 31201000 }, { "epoch": 90.32, "learning_rate": 4.858473335031494e-06, "loss": 1.7854, "step": 31201500 }, { "epoch": 90.32, "learning_rate": 4.857749687384216e-06, "loss": 1.7824, "step": 31202000 }, { "epoch": 90.32, "learning_rate": 4.85702603973694e-06, "loss": 1.8147, "step": 31202500 }, { "epoch": 90.32, "learning_rate": 4.856302392089663e-06, "loss": 1.7825, "step": 31203000 }, { "epoch": 90.32, "learning_rate": 4.855578744442386e-06, "loss": 1.7914, "step": 31203500 }, { "epoch": 90.32, "learning_rate": 4.854856544090404e-06, "loss": 1.7945, "step": 31204000 }, { "epoch": 90.32, "learning_rate": 4.8541328964431275e-06, "loss": 1.7832, "step": 31204500 }, { "epoch": 90.33, "learning_rate": 4.853409248795851e-06, "loss": 1.8035, "step": 31205000 }, { "epoch": 90.33, "learning_rate": 4.852685601148574e-06, "loss": 1.7997, "step": 31205500 }, { "epoch": 90.33, "learning_rate": 4.851961953501297e-06, "loss": 1.8276, "step": 31206000 }, { "epoch": 90.33, "learning_rate": 4.85123830585402e-06, "loss": 1.8052, "step": 31206500 }, { "epoch": 90.33, "learning_rate": 4.850514658206744e-06, "loss": 1.7852, "step": 31207000 }, { "epoch": 90.33, "learning_rate": 4.849791010559467e-06, "loss": 1.7711, "step": 31207500 }, { "epoch": 90.33, "learning_rate": 4.84906736291219e-06, "loss": 1.7991, "step": 31208000 }, { "epoch": 90.34, "learning_rate": 4.848345162560207e-06, "loss": 1.7832, "step": 31208500 }, { "epoch": 90.34, "learning_rate": 4.847621514912931e-06, "loss": 1.786, "step": 31209000 }, { "epoch": 90.34, "learning_rate": 4.846897867265654e-06, "loss": 1.7936, "step": 31209500 }, { "epoch": 90.34, "learning_rate": 4.846174219618377e-06, "loss": 1.7605, "step": 31210000 }, { "epoch": 90.34, "learning_rate": 4.8454505719711e-06, "loss": 1.7681, "step": 31210500 }, { "epoch": 90.34, "learning_rate": 4.8447269243238235e-06, "loss": 1.7994, "step": 31211000 }, { "epoch": 90.34, "learning_rate": 4.844003276676547e-06, "loss": 1.7844, "step": 31211500 }, { "epoch": 90.35, "learning_rate": 4.843281076324565e-06, "loss": 1.7858, "step": 31212000 }, { "epoch": 90.35, "learning_rate": 4.8425574286772886e-06, "loss": 1.7801, "step": 31212500 }, { "epoch": 90.35, "learning_rate": 4.841833781030012e-06, "loss": 1.7885, "step": 31213000 }, { "epoch": 90.35, "learning_rate": 4.841110133382735e-06, "loss": 1.7967, "step": 31213500 }, { "epoch": 90.35, "learning_rate": 4.840386485735458e-06, "loss": 1.7677, "step": 31214000 }, { "epoch": 90.35, "learning_rate": 4.839664285383476e-06, "loss": 1.8075, "step": 31214500 }, { "epoch": 90.35, "learning_rate": 4.838940637736199e-06, "loss": 1.7773, "step": 31215000 }, { "epoch": 90.36, "learning_rate": 4.838216990088922e-06, "loss": 1.7889, "step": 31215500 }, { "epoch": 90.36, "learning_rate": 4.837493342441645e-06, "loss": 1.7712, "step": 31216000 }, { "epoch": 90.36, "learning_rate": 4.836769694794368e-06, "loss": 1.8034, "step": 31216500 }, { "epoch": 90.36, "learning_rate": 4.836046047147091e-06, "loss": 1.8043, "step": 31217000 }, { "epoch": 90.36, "learning_rate": 4.835323846795109e-06, "loss": 1.7833, "step": 31217500 }, { "epoch": 90.36, "learning_rate": 4.8346001991478325e-06, "loss": 1.7762, "step": 31218000 }, { "epoch": 90.36, "learning_rate": 4.833876551500556e-06, "loss": 1.8032, "step": 31218500 }, { "epoch": 90.37, "learning_rate": 4.8331529038532795e-06, "loss": 1.7891, "step": 31219000 }, { "epoch": 90.37, "learning_rate": 4.8324292562060026e-06, "loss": 1.7963, "step": 31219500 }, { "epoch": 90.37, "learning_rate": 4.83170705585402e-06, "loss": 1.7779, "step": 31220000 }, { "epoch": 90.37, "learning_rate": 4.830984855502038e-06, "loss": 1.7835, "step": 31220500 }, { "epoch": 90.37, "learning_rate": 4.830261207854761e-06, "loss": 1.7733, "step": 31221000 }, { "epoch": 90.37, "learning_rate": 4.829539007502779e-06, "loss": 1.7873, "step": 31221500 }, { "epoch": 90.37, "learning_rate": 4.828815359855502e-06, "loss": 1.7883, "step": 31222000 }, { "epoch": 90.38, "learning_rate": 4.828091712208225e-06, "loss": 1.7832, "step": 31222500 }, { "epoch": 90.38, "learning_rate": 4.827368064560948e-06, "loss": 1.7958, "step": 31223000 }, { "epoch": 90.38, "learning_rate": 4.826644416913672e-06, "loss": 1.7773, "step": 31223500 }, { "epoch": 90.38, "learning_rate": 4.825920769266395e-06, "loss": 1.7691, "step": 31224000 }, { "epoch": 90.38, "learning_rate": 4.8251971216191184e-06, "loss": 1.7608, "step": 31224500 }, { "epoch": 90.38, "learning_rate": 4.8244734739718415e-06, "loss": 1.8072, "step": 31225000 }, { "epoch": 90.39, "learning_rate": 4.823749826324565e-06, "loss": 1.8089, "step": 31225500 }, { "epoch": 90.39, "learning_rate": 4.8230261786772885e-06, "loss": 1.8053, "step": 31226000 }, { "epoch": 90.39, "learning_rate": 4.822302531030011e-06, "loss": 1.7617, "step": 31226500 }, { "epoch": 90.39, "learning_rate": 4.821578883382735e-06, "loss": 1.8127, "step": 31227000 }, { "epoch": 90.39, "learning_rate": 4.820855235735458e-06, "loss": 1.7897, "step": 31227500 }, { "epoch": 90.39, "learning_rate": 4.820131588088181e-06, "loss": 1.7943, "step": 31228000 }, { "epoch": 90.39, "learning_rate": 4.819407940440905e-06, "loss": 1.7907, "step": 31228500 }, { "epoch": 90.4, "learning_rate": 4.818685740088922e-06, "loss": 1.765, "step": 31229000 }, { "epoch": 90.4, "learning_rate": 4.817963539736939e-06, "loss": 1.812, "step": 31229500 }, { "epoch": 90.4, "learning_rate": 4.817239892089663e-06, "loss": 1.8112, "step": 31230000 }, { "epoch": 90.4, "learning_rate": 4.816516244442386e-06, "loss": 1.7805, "step": 31230500 }, { "epoch": 90.4, "learning_rate": 4.815792596795109e-06, "loss": 1.7992, "step": 31231000 }, { "epoch": 90.4, "learning_rate": 4.8150703964431274e-06, "loss": 1.7756, "step": 31231500 }, { "epoch": 90.4, "learning_rate": 4.8143467487958505e-06, "loss": 1.7701, "step": 31232000 }, { "epoch": 90.41, "learning_rate": 4.813623101148574e-06, "loss": 1.8, "step": 31232500 }, { "epoch": 90.41, "learning_rate": 4.812899453501297e-06, "loss": 1.7901, "step": 31233000 }, { "epoch": 90.41, "learning_rate": 4.812175805854021e-06, "loss": 1.8001, "step": 31233500 }, { "epoch": 90.41, "learning_rate": 4.811452158206743e-06, "loss": 1.8055, "step": 31234000 }, { "epoch": 90.41, "learning_rate": 4.810728510559467e-06, "loss": 1.8115, "step": 31234500 }, { "epoch": 90.41, "learning_rate": 4.81000486291219e-06, "loss": 1.784, "step": 31235000 }, { "epoch": 90.41, "learning_rate": 4.809282662560208e-06, "loss": 1.7623, "step": 31235500 }, { "epoch": 90.42, "learning_rate": 4.808559014912931e-06, "loss": 1.7732, "step": 31236000 }, { "epoch": 90.42, "learning_rate": 4.807835367265654e-06, "loss": 1.7772, "step": 31236500 }, { "epoch": 90.42, "learning_rate": 4.807111719618378e-06, "loss": 1.8004, "step": 31237000 }, { "epoch": 90.42, "learning_rate": 4.8063880719711e-06, "loss": 1.8031, "step": 31237500 }, { "epoch": 90.42, "learning_rate": 4.805664424323824e-06, "loss": 1.7627, "step": 31238000 }, { "epoch": 90.42, "learning_rate": 4.8049407766765464e-06, "loss": 1.7902, "step": 31238500 }, { "epoch": 90.42, "learning_rate": 4.80421712902927e-06, "loss": 1.8073, "step": 31239000 }, { "epoch": 90.43, "learning_rate": 4.803493481381994e-06, "loss": 1.8201, "step": 31239500 }, { "epoch": 90.43, "learning_rate": 4.8027698337347165e-06, "loss": 1.7819, "step": 31240000 }, { "epoch": 90.43, "learning_rate": 4.8020461860874405e-06, "loss": 1.7736, "step": 31240500 }, { "epoch": 90.43, "learning_rate": 4.801322538440163e-06, "loss": 1.7818, "step": 31241000 }, { "epoch": 90.43, "learning_rate": 4.800601785383475e-06, "loss": 1.7993, "step": 31241500 }, { "epoch": 90.43, "learning_rate": 4.799878137736199e-06, "loss": 1.7747, "step": 31242000 }, { "epoch": 90.43, "learning_rate": 4.799154490088922e-06, "loss": 1.7889, "step": 31242500 }, { "epoch": 90.44, "learning_rate": 4.798430842441645e-06, "loss": 1.7725, "step": 31243000 }, { "epoch": 90.44, "learning_rate": 4.797708642089663e-06, "loss": 1.7844, "step": 31243500 }, { "epoch": 90.44, "learning_rate": 4.796984994442386e-06, "loss": 1.7668, "step": 31244000 }, { "epoch": 90.44, "learning_rate": 4.7962627940904035e-06, "loss": 1.779, "step": 31244500 }, { "epoch": 90.44, "learning_rate": 4.795539146443127e-06, "loss": 1.7818, "step": 31245000 }, { "epoch": 90.44, "learning_rate": 4.7948154987958505e-06, "loss": 1.7841, "step": 31245500 }, { "epoch": 90.44, "learning_rate": 4.7940918511485736e-06, "loss": 1.7759, "step": 31246000 }, { "epoch": 90.45, "learning_rate": 4.793369650796592e-06, "loss": 1.7791, "step": 31246500 }, { "epoch": 90.45, "learning_rate": 4.792646003149315e-06, "loss": 1.7844, "step": 31247000 }, { "epoch": 90.45, "learning_rate": 4.791922355502039e-06, "loss": 1.7798, "step": 31247500 }, { "epoch": 90.45, "learning_rate": 4.791198707854761e-06, "loss": 1.7638, "step": 31248000 }, { "epoch": 90.45, "learning_rate": 4.790475060207485e-06, "loss": 1.8118, "step": 31248500 }, { "epoch": 90.45, "learning_rate": 4.789751412560208e-06, "loss": 1.7991, "step": 31249000 }, { "epoch": 90.45, "learning_rate": 4.789027764912931e-06, "loss": 1.801, "step": 31249500 }, { "epoch": 90.46, "learning_rate": 4.788304117265654e-06, "loss": 1.8309, "step": 31250000 }, { "epoch": 90.46, "learning_rate": 4.787581916913672e-06, "loss": 1.7998, "step": 31250500 }, { "epoch": 90.46, "learning_rate": 4.786858269266395e-06, "loss": 1.7969, "step": 31251000 }, { "epoch": 90.46, "learning_rate": 4.786134621619118e-06, "loss": 1.7851, "step": 31251500 }, { "epoch": 90.46, "learning_rate": 4.785410973971842e-06, "loss": 1.7536, "step": 31252000 }, { "epoch": 90.46, "learning_rate": 4.7846873263245645e-06, "loss": 1.791, "step": 31252500 }, { "epoch": 90.46, "learning_rate": 4.783963678677288e-06, "loss": 1.7731, "step": 31253000 }, { "epoch": 90.47, "learning_rate": 4.7832400310300115e-06, "loss": 1.7672, "step": 31253500 }, { "epoch": 90.47, "learning_rate": 4.7825163833827346e-06, "loss": 1.776, "step": 31254000 }, { "epoch": 90.47, "learning_rate": 4.781794183030752e-06, "loss": 1.7871, "step": 31254500 }, { "epoch": 90.47, "learning_rate": 4.781070535383476e-06, "loss": 1.7956, "step": 31255000 }, { "epoch": 90.47, "learning_rate": 4.780346887736199e-06, "loss": 1.777, "step": 31255500 }, { "epoch": 90.47, "learning_rate": 4.779623240088922e-06, "loss": 1.7735, "step": 31256000 }, { "epoch": 90.47, "learning_rate": 4.778899592441646e-06, "loss": 1.8235, "step": 31256500 }, { "epoch": 90.48, "learning_rate": 4.778175944794368e-06, "loss": 1.7735, "step": 31257000 }, { "epoch": 90.48, "learning_rate": 4.777452297147092e-06, "loss": 1.7897, "step": 31257500 }, { "epoch": 90.48, "learning_rate": 4.776730096795109e-06, "loss": 1.7838, "step": 31258000 }, { "epoch": 90.48, "learning_rate": 4.776006449147833e-06, "loss": 1.7888, "step": 31258500 }, { "epoch": 90.48, "learning_rate": 4.7752842487958504e-06, "loss": 1.7763, "step": 31259000 }, { "epoch": 90.48, "learning_rate": 4.774560601148574e-06, "loss": 1.7815, "step": 31259500 }, { "epoch": 90.48, "learning_rate": 4.773838400796592e-06, "loss": 1.7731, "step": 31260000 }, { "epoch": 90.49, "learning_rate": 4.773114753149315e-06, "loss": 1.7773, "step": 31260500 }, { "epoch": 90.49, "learning_rate": 4.772391105502038e-06, "loss": 1.7851, "step": 31261000 }, { "epoch": 90.49, "learning_rate": 4.771667457854762e-06, "loss": 1.7769, "step": 31261500 }, { "epoch": 90.49, "learning_rate": 4.770943810207485e-06, "loss": 1.7807, "step": 31262000 }, { "epoch": 90.49, "learning_rate": 4.770220162560208e-06, "loss": 1.7892, "step": 31262500 }, { "epoch": 90.49, "learning_rate": 4.769496514912931e-06, "loss": 1.7962, "step": 31263000 }, { "epoch": 90.5, "learning_rate": 4.768772867265654e-06, "loss": 1.7951, "step": 31263500 }, { "epoch": 90.5, "learning_rate": 4.768049219618377e-06, "loss": 1.8042, "step": 31264000 }, { "epoch": 90.5, "learning_rate": 4.767325571971101e-06, "loss": 1.8048, "step": 31264500 }, { "epoch": 90.5, "learning_rate": 4.766601924323824e-06, "loss": 1.8026, "step": 31265000 }, { "epoch": 90.5, "learning_rate": 4.765878276676547e-06, "loss": 1.8025, "step": 31265500 }, { "epoch": 90.5, "learning_rate": 4.765156076324565e-06, "loss": 1.7845, "step": 31266000 }, { "epoch": 90.5, "learning_rate": 4.7644338759725825e-06, "loss": 1.8178, "step": 31266500 }, { "epoch": 90.51, "learning_rate": 4.763710228325306e-06, "loss": 1.7842, "step": 31267000 }, { "epoch": 90.51, "learning_rate": 4.762986580678029e-06, "loss": 1.8026, "step": 31267500 }, { "epoch": 90.51, "learning_rate": 4.762262933030753e-06, "loss": 1.7946, "step": 31268000 }, { "epoch": 90.51, "learning_rate": 4.761539285383476e-06, "loss": 1.8132, "step": 31268500 }, { "epoch": 90.51, "learning_rate": 4.760815637736199e-06, "loss": 1.7904, "step": 31269000 }, { "epoch": 90.51, "learning_rate": 4.760091990088922e-06, "loss": 1.7923, "step": 31269500 }, { "epoch": 90.51, "learning_rate": 4.759368342441645e-06, "loss": 1.7559, "step": 31270000 }, { "epoch": 90.52, "learning_rate": 4.758646142089663e-06, "loss": 1.8118, "step": 31270500 }, { "epoch": 90.52, "learning_rate": 4.757922494442386e-06, "loss": 1.7839, "step": 31271000 }, { "epoch": 90.52, "learning_rate": 4.757198846795109e-06, "loss": 1.7858, "step": 31271500 }, { "epoch": 90.52, "learning_rate": 4.756475199147833e-06, "loss": 1.7911, "step": 31272000 }, { "epoch": 90.52, "learning_rate": 4.75575299879585e-06, "loss": 1.8056, "step": 31272500 }, { "epoch": 90.52, "learning_rate": 4.7550293511485735e-06, "loss": 1.7686, "step": 31273000 }, { "epoch": 90.52, "learning_rate": 4.7543057035012965e-06, "loss": 1.7847, "step": 31273500 }, { "epoch": 90.53, "learning_rate": 4.7535820558540205e-06, "loss": 1.8113, "step": 31274000 }, { "epoch": 90.53, "learning_rate": 4.752859855502038e-06, "loss": 1.8026, "step": 31274500 }, { "epoch": 90.53, "learning_rate": 4.752136207854762e-06, "loss": 1.7617, "step": 31275000 }, { "epoch": 90.53, "learning_rate": 4.751412560207485e-06, "loss": 1.7736, "step": 31275500 }, { "epoch": 90.53, "learning_rate": 4.750688912560208e-06, "loss": 1.8008, "step": 31276000 }, { "epoch": 90.53, "learning_rate": 4.749966712208225e-06, "loss": 1.7954, "step": 31276500 }, { "epoch": 90.53, "learning_rate": 4.749243064560949e-06, "loss": 1.7873, "step": 31277000 }, { "epoch": 90.54, "learning_rate": 4.748519416913672e-06, "loss": 1.7892, "step": 31277500 }, { "epoch": 90.54, "learning_rate": 4.747795769266395e-06, "loss": 1.775, "step": 31278000 }, { "epoch": 90.54, "learning_rate": 4.747072121619118e-06, "loss": 1.7829, "step": 31278500 }, { "epoch": 90.54, "learning_rate": 4.746348473971841e-06, "loss": 1.7935, "step": 31279000 }, { "epoch": 90.54, "learning_rate": 4.745624826324565e-06, "loss": 1.7945, "step": 31279500 }, { "epoch": 90.54, "learning_rate": 4.744901178677288e-06, "loss": 1.7666, "step": 31280000 }, { "epoch": 90.54, "learning_rate": 4.744177531030011e-06, "loss": 1.7907, "step": 31280500 }, { "epoch": 90.55, "learning_rate": 4.7434538833827345e-06, "loss": 1.8038, "step": 31281000 }, { "epoch": 90.55, "learning_rate": 4.7427302357354575e-06, "loss": 1.8002, "step": 31281500 }, { "epoch": 90.55, "learning_rate": 4.7420065880881815e-06, "loss": 1.787, "step": 31282000 }, { "epoch": 90.55, "learning_rate": 4.741284387736199e-06, "loss": 1.7877, "step": 31282500 }, { "epoch": 90.55, "learning_rate": 4.740560740088923e-06, "loss": 1.7833, "step": 31283000 }, { "epoch": 90.55, "learning_rate": 4.739837092441645e-06, "loss": 1.7701, "step": 31283500 }, { "epoch": 90.55, "learning_rate": 4.739113444794369e-06, "loss": 1.7697, "step": 31284000 }, { "epoch": 90.56, "learning_rate": 4.738389797147091e-06, "loss": 1.7634, "step": 31284500 }, { "epoch": 90.56, "learning_rate": 4.73766759679511e-06, "loss": 1.7808, "step": 31285000 }, { "epoch": 90.56, "learning_rate": 4.736945396443127e-06, "loss": 1.7887, "step": 31285500 }, { "epoch": 90.56, "learning_rate": 4.73622174879585e-06, "loss": 1.7804, "step": 31286000 }, { "epoch": 90.56, "learning_rate": 4.735498101148573e-06, "loss": 1.7751, "step": 31286500 }, { "epoch": 90.56, "learning_rate": 4.734774453501297e-06, "loss": 1.78, "step": 31287000 }, { "epoch": 90.56, "learning_rate": 4.7340508058540196e-06, "loss": 1.7615, "step": 31287500 }, { "epoch": 90.57, "learning_rate": 4.7333271582067435e-06, "loss": 1.8007, "step": 31288000 }, { "epoch": 90.57, "learning_rate": 4.7326035105594666e-06, "loss": 1.7988, "step": 31288500 }, { "epoch": 90.57, "learning_rate": 4.73187986291219e-06, "loss": 1.8131, "step": 31289000 }, { "epoch": 90.57, "learning_rate": 4.731157662560208e-06, "loss": 1.7488, "step": 31289500 }, { "epoch": 90.57, "learning_rate": 4.730435462208226e-06, "loss": 1.7667, "step": 31290000 }, { "epoch": 90.57, "learning_rate": 4.729711814560948e-06, "loss": 1.7872, "step": 31290500 }, { "epoch": 90.57, "learning_rate": 4.728988166913672e-06, "loss": 1.808, "step": 31291000 }, { "epoch": 90.58, "learning_rate": 4.728264519266395e-06, "loss": 1.8079, "step": 31291500 }, { "epoch": 90.58, "learning_rate": 4.727540871619118e-06, "loss": 1.8081, "step": 31292000 }, { "epoch": 90.58, "learning_rate": 4.726817223971842e-06, "loss": 1.8039, "step": 31292500 }, { "epoch": 90.58, "learning_rate": 4.726093576324564e-06, "loss": 1.776, "step": 31293000 }, { "epoch": 90.58, "learning_rate": 4.725369928677288e-06, "loss": 1.7903, "step": 31293500 }, { "epoch": 90.58, "learning_rate": 4.7246477283253055e-06, "loss": 1.766, "step": 31294000 }, { "epoch": 90.58, "learning_rate": 4.7239240806780294e-06, "loss": 1.7872, "step": 31294500 }, { "epoch": 90.59, "learning_rate": 4.723200433030752e-06, "loss": 1.7707, "step": 31295000 }, { "epoch": 90.59, "learning_rate": 4.722476785383476e-06, "loss": 1.7972, "step": 31295500 }, { "epoch": 90.59, "learning_rate": 4.721754585031493e-06, "loss": 1.7852, "step": 31296000 }, { "epoch": 90.59, "learning_rate": 4.721030937384217e-06, "loss": 1.7856, "step": 31296500 }, { "epoch": 90.59, "learning_rate": 4.720308737032234e-06, "loss": 1.7695, "step": 31297000 }, { "epoch": 90.59, "learning_rate": 4.719585089384958e-06, "loss": 1.7696, "step": 31297500 }, { "epoch": 90.59, "learning_rate": 4.71886144173768e-06, "loss": 1.7652, "step": 31298000 }, { "epoch": 90.6, "learning_rate": 4.718137794090404e-06, "loss": 1.7869, "step": 31298500 }, { "epoch": 90.6, "learning_rate": 4.717414146443128e-06, "loss": 1.7983, "step": 31299000 }, { "epoch": 90.6, "learning_rate": 4.71669049879585e-06, "loss": 1.7697, "step": 31299500 }, { "epoch": 90.6, "learning_rate": 4.715966851148574e-06, "loss": 1.797, "step": 31300000 }, { "epoch": 90.6, "learning_rate": 4.7152432035012964e-06, "loss": 1.7719, "step": 31300500 }, { "epoch": 90.6, "learning_rate": 4.71451955585402e-06, "loss": 1.8061, "step": 31301000 }, { "epoch": 90.61, "learning_rate": 4.713797355502038e-06, "loss": 1.7666, "step": 31301500 }, { "epoch": 90.61, "learning_rate": 4.7130737078547615e-06, "loss": 1.7786, "step": 31302000 }, { "epoch": 90.61, "learning_rate": 4.712350060207485e-06, "loss": 1.8004, "step": 31302500 }, { "epoch": 90.61, "learning_rate": 4.711626412560208e-06, "loss": 1.7887, "step": 31303000 }, { "epoch": 90.61, "learning_rate": 4.710904212208225e-06, "loss": 1.7526, "step": 31303500 }, { "epoch": 90.61, "learning_rate": 4.710182011856244e-06, "loss": 1.7929, "step": 31304000 }, { "epoch": 90.61, "learning_rate": 4.709458364208966e-06, "loss": 1.7934, "step": 31304500 }, { "epoch": 90.62, "learning_rate": 4.70873471656169e-06, "loss": 1.805, "step": 31305000 }, { "epoch": 90.62, "learning_rate": 4.708011068914413e-06, "loss": 1.7875, "step": 31305500 }, { "epoch": 90.62, "learning_rate": 4.707288868562431e-06, "loss": 1.8007, "step": 31306000 }, { "epoch": 90.62, "learning_rate": 4.7065652209151535e-06, "loss": 1.8141, "step": 31306500 }, { "epoch": 90.62, "learning_rate": 4.705841573267877e-06, "loss": 1.7864, "step": 31307000 }, { "epoch": 90.62, "learning_rate": 4.7051179256206005e-06, "loss": 1.7829, "step": 31307500 }, { "epoch": 90.62, "learning_rate": 4.7043942779733236e-06, "loss": 1.7846, "step": 31308000 }, { "epoch": 90.63, "learning_rate": 4.7036706303260475e-06, "loss": 1.7833, "step": 31308500 }, { "epoch": 90.63, "learning_rate": 4.70294698267877e-06, "loss": 1.7805, "step": 31309000 }, { "epoch": 90.63, "learning_rate": 4.702223335031494e-06, "loss": 1.8287, "step": 31309500 }, { "epoch": 90.63, "learning_rate": 4.701499687384217e-06, "loss": 1.795, "step": 31310000 }, { "epoch": 90.63, "learning_rate": 4.70077603973694e-06, "loss": 1.8097, "step": 31310500 }, { "epoch": 90.63, "learning_rate": 4.700052392089663e-06, "loss": 1.7965, "step": 31311000 }, { "epoch": 90.63, "learning_rate": 4.699328744442386e-06, "loss": 1.7808, "step": 31311500 }, { "epoch": 90.64, "learning_rate": 4.69860509679511e-06, "loss": 1.7886, "step": 31312000 }, { "epoch": 90.64, "learning_rate": 4.697881449147833e-06, "loss": 1.7803, "step": 31312500 }, { "epoch": 90.64, "learning_rate": 4.697157801500556e-06, "loss": 1.8213, "step": 31313000 }, { "epoch": 90.64, "learning_rate": 4.696434153853279e-06, "loss": 1.7757, "step": 31313500 }, { "epoch": 90.64, "learning_rate": 4.695710506206002e-06, "loss": 1.7821, "step": 31314000 }, { "epoch": 90.64, "learning_rate": 4.69498830585402e-06, "loss": 1.8095, "step": 31314500 }, { "epoch": 90.64, "learning_rate": 4.694266105502038e-06, "loss": 1.7909, "step": 31315000 }, { "epoch": 90.65, "learning_rate": 4.6935424578547615e-06, "loss": 1.7966, "step": 31315500 }, { "epoch": 90.65, "learning_rate": 4.6928188102074846e-06, "loss": 1.7793, "step": 31316000 }, { "epoch": 90.65, "learning_rate": 4.692095162560208e-06, "loss": 1.8039, "step": 31316500 }, { "epoch": 90.65, "learning_rate": 4.691371514912931e-06, "loss": 1.7668, "step": 31317000 }, { "epoch": 90.65, "learning_rate": 4.690647867265654e-06, "loss": 1.7832, "step": 31317500 }, { "epoch": 90.65, "learning_rate": 4.689924219618378e-06, "loss": 1.7751, "step": 31318000 }, { "epoch": 90.65, "learning_rate": 4.689202019266395e-06, "loss": 1.7862, "step": 31318500 }, { "epoch": 90.66, "learning_rate": 4.688478371619118e-06, "loss": 1.7993, "step": 31319000 }, { "epoch": 90.66, "learning_rate": 4.687754723971842e-06, "loss": 1.792, "step": 31319500 }, { "epoch": 90.66, "learning_rate": 4.687031076324565e-06, "loss": 1.8074, "step": 31320000 }, { "epoch": 90.66, "learning_rate": 4.686308875972582e-06, "loss": 1.7768, "step": 31320500 }, { "epoch": 90.66, "learning_rate": 4.685585228325306e-06, "loss": 1.778, "step": 31321000 }, { "epoch": 90.66, "learning_rate": 4.684861580678029e-06, "loss": 1.7716, "step": 31321500 }, { "epoch": 90.66, "learning_rate": 4.684137933030752e-06, "loss": 1.8006, "step": 31322000 }, { "epoch": 90.67, "learning_rate": 4.6834142853834755e-06, "loss": 1.8114, "step": 31322500 }, { "epoch": 90.67, "learning_rate": 4.682692085031494e-06, "loss": 1.7694, "step": 31323000 }, { "epoch": 90.67, "learning_rate": 4.681968437384217e-06, "loss": 1.791, "step": 31323500 }, { "epoch": 90.67, "learning_rate": 4.68124478973694e-06, "loss": 1.802, "step": 31324000 }, { "epoch": 90.67, "learning_rate": 4.680521142089663e-06, "loss": 1.785, "step": 31324500 }, { "epoch": 90.67, "learning_rate": 4.679797494442386e-06, "loss": 1.7556, "step": 31325000 }, { "epoch": 90.67, "learning_rate": 4.67907384679511e-06, "loss": 1.7985, "step": 31325500 }, { "epoch": 90.68, "learning_rate": 4.678350199147833e-06, "loss": 1.7741, "step": 31326000 }, { "epoch": 90.68, "learning_rate": 4.67762799879585e-06, "loss": 1.7772, "step": 31326500 }, { "epoch": 90.68, "learning_rate": 4.676904351148574e-06, "loss": 1.7946, "step": 31327000 }, { "epoch": 90.68, "learning_rate": 4.676180703501297e-06, "loss": 1.7903, "step": 31327500 }, { "epoch": 90.68, "learning_rate": 4.67545705585402e-06, "loss": 1.7968, "step": 31328000 }, { "epoch": 90.68, "learning_rate": 4.674733408206743e-06, "loss": 1.7718, "step": 31328500 }, { "epoch": 90.68, "learning_rate": 4.674009760559466e-06, "loss": 1.786, "step": 31329000 }, { "epoch": 90.69, "learning_rate": 4.6732875602074845e-06, "loss": 1.7914, "step": 31329500 }, { "epoch": 90.69, "learning_rate": 4.672563912560208e-06, "loss": 1.7628, "step": 31330000 }, { "epoch": 90.69, "learning_rate": 4.671840264912931e-06, "loss": 1.7961, "step": 31330500 }, { "epoch": 90.69, "learning_rate": 4.671118064560949e-06, "loss": 1.7935, "step": 31331000 }, { "epoch": 90.69, "learning_rate": 4.670394416913672e-06, "loss": 1.788, "step": 31331500 }, { "epoch": 90.69, "learning_rate": 4.669670769266395e-06, "loss": 1.796, "step": 31332000 }, { "epoch": 90.69, "learning_rate": 4.668947121619118e-06, "loss": 1.7895, "step": 31332500 }, { "epoch": 90.7, "learning_rate": 4.668223473971842e-06, "loss": 1.8099, "step": 31333000 }, { "epoch": 90.7, "learning_rate": 4.667499826324565e-06, "loss": 1.7642, "step": 31333500 }, { "epoch": 90.7, "learning_rate": 4.666776178677288e-06, "loss": 1.7778, "step": 31334000 }, { "epoch": 90.7, "learning_rate": 4.666052531030011e-06, "loss": 1.8078, "step": 31334500 }, { "epoch": 90.7, "learning_rate": 4.665328883382734e-06, "loss": 1.763, "step": 31335000 }, { "epoch": 90.7, "learning_rate": 4.664606683030752e-06, "loss": 1.7879, "step": 31335500 }, { "epoch": 90.7, "learning_rate": 4.6638830353834754e-06, "loss": 1.806, "step": 31336000 }, { "epoch": 90.71, "learning_rate": 4.663159387736199e-06, "loss": 1.7771, "step": 31336500 }, { "epoch": 90.71, "learning_rate": 4.662435740088922e-06, "loss": 1.7742, "step": 31337000 }, { "epoch": 90.71, "learning_rate": 4.6617120924416455e-06, "loss": 1.7861, "step": 31337500 }, { "epoch": 90.71, "learning_rate": 4.660988444794368e-06, "loss": 1.7709, "step": 31338000 }, { "epoch": 90.71, "learning_rate": 4.660266244442387e-06, "loss": 1.8034, "step": 31338500 }, { "epoch": 90.71, "learning_rate": 4.659542596795109e-06, "loss": 1.7905, "step": 31339000 }, { "epoch": 90.72, "learning_rate": 4.658818949147833e-06, "loss": 1.7819, "step": 31339500 }, { "epoch": 90.72, "learning_rate": 4.65809674879585e-06, "loss": 1.7763, "step": 31340000 }, { "epoch": 90.72, "learning_rate": 4.657373101148574e-06, "loss": 1.783, "step": 31340500 }, { "epoch": 90.72, "learning_rate": 4.656649453501296e-06, "loss": 1.7864, "step": 31341000 }, { "epoch": 90.72, "learning_rate": 4.65592580585402e-06, "loss": 1.8183, "step": 31341500 }, { "epoch": 90.72, "learning_rate": 4.655202158206744e-06, "loss": 1.7718, "step": 31342000 }, { "epoch": 90.72, "learning_rate": 4.654481405150056e-06, "loss": 1.7934, "step": 31342500 }, { "epoch": 90.73, "learning_rate": 4.653757757502779e-06, "loss": 1.7858, "step": 31343000 }, { "epoch": 90.73, "learning_rate": 4.6530341098555026e-06, "loss": 1.7833, "step": 31343500 }, { "epoch": 90.73, "learning_rate": 4.652310462208225e-06, "loss": 1.7958, "step": 31344000 }, { "epoch": 90.73, "learning_rate": 4.651586814560949e-06, "loss": 1.7751, "step": 31344500 }, { "epoch": 90.73, "learning_rate": 4.650863166913672e-06, "loss": 1.8108, "step": 31345000 }, { "epoch": 90.73, "learning_rate": 4.650139519266395e-06, "loss": 1.7868, "step": 31345500 }, { "epoch": 90.73, "learning_rate": 4.649415871619119e-06, "loss": 1.7927, "step": 31346000 }, { "epoch": 90.74, "learning_rate": 4.648692223971841e-06, "loss": 1.7787, "step": 31346500 }, { "epoch": 90.74, "learning_rate": 4.647968576324565e-06, "loss": 1.7757, "step": 31347000 }, { "epoch": 90.74, "learning_rate": 4.647244928677288e-06, "loss": 1.8028, "step": 31347500 }, { "epoch": 90.74, "learning_rate": 4.646521281030011e-06, "loss": 1.7873, "step": 31348000 }, { "epoch": 90.74, "learning_rate": 4.645797633382735e-06, "loss": 1.79, "step": 31348500 }, { "epoch": 90.74, "learning_rate": 4.645075433030752e-06, "loss": 1.7809, "step": 31349000 }, { "epoch": 90.74, "learning_rate": 4.644351785383476e-06, "loss": 1.7854, "step": 31349500 }, { "epoch": 90.75, "learning_rate": 4.6436295850314935e-06, "loss": 1.8091, "step": 31350000 }, { "epoch": 90.75, "learning_rate": 4.6429059373842166e-06, "loss": 1.7733, "step": 31350500 }, { "epoch": 90.75, "learning_rate": 4.64218228973694e-06, "loss": 1.767, "step": 31351000 }, { "epoch": 90.75, "learning_rate": 4.6414586420896636e-06, "loss": 1.7937, "step": 31351500 }, { "epoch": 90.75, "learning_rate": 4.640734994442386e-06, "loss": 1.7853, "step": 31352000 }, { "epoch": 90.75, "learning_rate": 4.64001134679511e-06, "loss": 1.7514, "step": 31352500 }, { "epoch": 90.75, "learning_rate": 4.639287699147833e-06, "loss": 1.7981, "step": 31353000 }, { "epoch": 90.76, "learning_rate": 4.638565498795851e-06, "loss": 1.7679, "step": 31353500 }, { "epoch": 90.76, "learning_rate": 4.637841851148573e-06, "loss": 1.7924, "step": 31354000 }, { "epoch": 90.76, "learning_rate": 4.637118203501297e-06, "loss": 1.7843, "step": 31354500 }, { "epoch": 90.76, "learning_rate": 4.63639455585402e-06, "loss": 1.7841, "step": 31355000 }, { "epoch": 90.76, "learning_rate": 4.635670908206743e-06, "loss": 1.7802, "step": 31355500 }, { "epoch": 90.76, "learning_rate": 4.634947260559467e-06, "loss": 1.7968, "step": 31356000 }, { "epoch": 90.76, "learning_rate": 4.634225060207484e-06, "loss": 1.7937, "step": 31356500 }, { "epoch": 90.77, "learning_rate": 4.633501412560208e-06, "loss": 1.8021, "step": 31357000 }, { "epoch": 90.77, "learning_rate": 4.6327777649129306e-06, "loss": 1.7782, "step": 31357500 }, { "epoch": 90.77, "learning_rate": 4.6320541172656545e-06, "loss": 1.7957, "step": 31358000 }, { "epoch": 90.77, "learning_rate": 4.631330469618378e-06, "loss": 1.7981, "step": 31358500 }, { "epoch": 90.77, "learning_rate": 4.630606821971101e-06, "loss": 1.8154, "step": 31359000 }, { "epoch": 90.77, "learning_rate": 4.629883174323824e-06, "loss": 1.7983, "step": 31359500 }, { "epoch": 90.77, "learning_rate": 4.629159526676547e-06, "loss": 1.8032, "step": 31360000 }, { "epoch": 90.78, "learning_rate": 4.628435879029271e-06, "loss": 1.7969, "step": 31360500 }, { "epoch": 90.78, "learning_rate": 4.627713678677288e-06, "loss": 1.7947, "step": 31361000 }, { "epoch": 90.78, "learning_rate": 4.626990031030011e-06, "loss": 1.781, "step": 31361500 }, { "epoch": 90.78, "learning_rate": 4.626266383382734e-06, "loss": 1.7911, "step": 31362000 }, { "epoch": 90.78, "learning_rate": 4.625542735735458e-06, "loss": 1.7957, "step": 31362500 }, { "epoch": 90.78, "learning_rate": 4.624820535383475e-06, "loss": 1.7975, "step": 31363000 }, { "epoch": 90.78, "learning_rate": 4.624096887736199e-06, "loss": 1.8067, "step": 31363500 }, { "epoch": 90.79, "learning_rate": 4.623373240088922e-06, "loss": 1.7982, "step": 31364000 }, { "epoch": 90.79, "learning_rate": 4.6226495924416454e-06, "loss": 1.7966, "step": 31364500 }, { "epoch": 90.79, "learning_rate": 4.6219259447943685e-06, "loss": 1.7795, "step": 31365000 }, { "epoch": 90.79, "learning_rate": 4.621203744442387e-06, "loss": 1.7968, "step": 31365500 }, { "epoch": 90.79, "learning_rate": 4.620481544090404e-06, "loss": 1.7926, "step": 31366000 }, { "epoch": 90.79, "learning_rate": 4.619759343738422e-06, "loss": 1.7727, "step": 31366500 }, { "epoch": 90.79, "learning_rate": 4.619035696091145e-06, "loss": 1.7861, "step": 31367000 }, { "epoch": 90.8, "learning_rate": 4.618312048443868e-06, "loss": 1.7924, "step": 31367500 }, { "epoch": 90.8, "learning_rate": 4.617588400796591e-06, "loss": 1.8057, "step": 31368000 }, { "epoch": 90.8, "learning_rate": 4.616864753149315e-06, "loss": 1.785, "step": 31368500 }, { "epoch": 90.8, "learning_rate": 4.616141105502038e-06, "loss": 1.7908, "step": 31369000 }, { "epoch": 90.8, "learning_rate": 4.615417457854761e-06, "loss": 1.7709, "step": 31369500 }, { "epoch": 90.8, "learning_rate": 4.614693810207484e-06, "loss": 1.7952, "step": 31370000 }, { "epoch": 90.8, "learning_rate": 4.6139701625602074e-06, "loss": 1.807, "step": 31370500 }, { "epoch": 90.81, "learning_rate": 4.613246514912931e-06, "loss": 1.791, "step": 31371000 }, { "epoch": 90.81, "learning_rate": 4.612524314560949e-06, "loss": 1.7952, "step": 31371500 }, { "epoch": 90.81, "learning_rate": 4.611800666913672e-06, "loss": 1.797, "step": 31372000 }, { "epoch": 90.81, "learning_rate": 4.611077019266395e-06, "loss": 1.7876, "step": 31372500 }, { "epoch": 90.81, "learning_rate": 4.610353371619119e-06, "loss": 1.7912, "step": 31373000 }, { "epoch": 90.81, "learning_rate": 4.609629723971842e-06, "loss": 1.7771, "step": 31373500 }, { "epoch": 90.81, "learning_rate": 4.608906076324565e-06, "loss": 1.7859, "step": 31374000 }, { "epoch": 90.82, "learning_rate": 4.608182428677288e-06, "loss": 1.7723, "step": 31374500 }, { "epoch": 90.82, "learning_rate": 4.607458781030011e-06, "loss": 1.7853, "step": 31375000 }, { "epoch": 90.82, "learning_rate": 4.606735133382734e-06, "loss": 1.7994, "step": 31375500 }, { "epoch": 90.82, "learning_rate": 4.606012933030752e-06, "loss": 1.7907, "step": 31376000 }, { "epoch": 90.82, "learning_rate": 4.605289285383475e-06, "loss": 1.7861, "step": 31376500 }, { "epoch": 90.82, "learning_rate": 4.604567085031493e-06, "loss": 1.791, "step": 31377000 }, { "epoch": 90.83, "learning_rate": 4.6038434373842165e-06, "loss": 1.7898, "step": 31377500 }, { "epoch": 90.83, "learning_rate": 4.6031197897369395e-06, "loss": 1.7886, "step": 31378000 }, { "epoch": 90.83, "learning_rate": 4.602396142089663e-06, "loss": 1.796, "step": 31378500 }, { "epoch": 90.83, "learning_rate": 4.6016724944423866e-06, "loss": 1.7999, "step": 31379000 }, { "epoch": 90.83, "learning_rate": 4.600950294090404e-06, "loss": 1.7841, "step": 31379500 }, { "epoch": 90.83, "learning_rate": 4.600226646443128e-06, "loss": 1.7888, "step": 31380000 }, { "epoch": 90.83, "learning_rate": 4.599502998795851e-06, "loss": 1.7773, "step": 31380500 }, { "epoch": 90.84, "learning_rate": 4.598780798443868e-06, "loss": 1.7838, "step": 31381000 }, { "epoch": 90.84, "learning_rate": 4.598057150796591e-06, "loss": 1.7921, "step": 31381500 }, { "epoch": 90.84, "learning_rate": 4.597333503149315e-06, "loss": 1.7755, "step": 31382000 }, { "epoch": 90.84, "learning_rate": 4.596609855502038e-06, "loss": 1.8043, "step": 31382500 }, { "epoch": 90.84, "learning_rate": 4.595886207854761e-06, "loss": 1.7618, "step": 31383000 }, { "epoch": 90.84, "learning_rate": 4.595162560207484e-06, "loss": 1.8096, "step": 31383500 }, { "epoch": 90.84, "learning_rate": 4.594438912560207e-06, "loss": 1.7772, "step": 31384000 }, { "epoch": 90.85, "learning_rate": 4.593715264912931e-06, "loss": 1.7838, "step": 31384500 }, { "epoch": 90.85, "learning_rate": 4.5929916172656536e-06, "loss": 1.7967, "step": 31385000 }, { "epoch": 90.85, "learning_rate": 4.5922679696183775e-06, "loss": 1.8059, "step": 31385500 }, { "epoch": 90.85, "learning_rate": 4.5915443219711006e-06, "loss": 1.8128, "step": 31386000 }, { "epoch": 90.85, "learning_rate": 4.590822121619119e-06, "loss": 1.7829, "step": 31386500 }, { "epoch": 90.85, "learning_rate": 4.590098473971842e-06, "loss": 1.8022, "step": 31387000 }, { "epoch": 90.85, "learning_rate": 4.589374826324565e-06, "loss": 1.7915, "step": 31387500 }, { "epoch": 90.86, "learning_rate": 4.588651178677288e-06, "loss": 1.7809, "step": 31388000 }, { "epoch": 90.86, "learning_rate": 4.587927531030011e-06, "loss": 1.7869, "step": 31388500 }, { "epoch": 90.86, "learning_rate": 4.587203883382735e-06, "loss": 1.786, "step": 31389000 }, { "epoch": 90.86, "learning_rate": 4.586480235735457e-06, "loss": 1.7947, "step": 31389500 }, { "epoch": 90.86, "learning_rate": 4.585758035383476e-06, "loss": 1.8196, "step": 31390000 }, { "epoch": 90.86, "learning_rate": 4.585034387736198e-06, "loss": 1.7788, "step": 31390500 }, { "epoch": 90.86, "learning_rate": 4.584310740088922e-06, "loss": 1.7733, "step": 31391000 }, { "epoch": 90.87, "learning_rate": 4.583587092441645e-06, "loss": 1.7733, "step": 31391500 }, { "epoch": 90.87, "learning_rate": 4.582863444794368e-06, "loss": 1.7697, "step": 31392000 }, { "epoch": 90.87, "learning_rate": 4.582139797147092e-06, "loss": 1.7502, "step": 31392500 }, { "epoch": 90.87, "learning_rate": 4.5814161494998146e-06, "loss": 1.8017, "step": 31393000 }, { "epoch": 90.87, "learning_rate": 4.5806925018525385e-06, "loss": 1.7938, "step": 31393500 }, { "epoch": 90.87, "learning_rate": 4.579970301500556e-06, "loss": 1.7752, "step": 31394000 }, { "epoch": 90.87, "learning_rate": 4.57924665385328e-06, "loss": 1.7915, "step": 31394500 }, { "epoch": 90.88, "learning_rate": 4.578523006206002e-06, "loss": 1.7881, "step": 31395000 }, { "epoch": 90.88, "learning_rate": 4.577799358558726e-06, "loss": 1.7789, "step": 31395500 }, { "epoch": 90.88, "learning_rate": 4.577075710911449e-06, "loss": 1.7894, "step": 31396000 }, { "epoch": 90.88, "learning_rate": 4.576352063264172e-06, "loss": 1.8066, "step": 31396500 }, { "epoch": 90.88, "learning_rate": 4.575628415616896e-06, "loss": 1.8076, "step": 31397000 }, { "epoch": 90.88, "learning_rate": 4.574904767969618e-06, "loss": 1.7946, "step": 31397500 }, { "epoch": 90.88, "learning_rate": 4.574182567617637e-06, "loss": 1.7919, "step": 31398000 }, { "epoch": 90.89, "learning_rate": 4.573458919970359e-06, "loss": 1.7834, "step": 31398500 }, { "epoch": 90.89, "learning_rate": 4.572735272323083e-06, "loss": 1.7686, "step": 31399000 }, { "epoch": 90.89, "learning_rate": 4.5720116246758055e-06, "loss": 1.7872, "step": 31399500 }, { "epoch": 90.89, "learning_rate": 4.5712894243238244e-06, "loss": 1.7724, "step": 31400000 }, { "epoch": 90.89, "learning_rate": 4.570565776676547e-06, "loss": 1.788, "step": 31400500 }, { "epoch": 90.89, "learning_rate": 4.569843576324566e-06, "loss": 1.7935, "step": 31401000 }, { "epoch": 90.89, "learning_rate": 4.569119928677288e-06, "loss": 1.7644, "step": 31401500 }, { "epoch": 90.9, "learning_rate": 4.568396281030012e-06, "loss": 1.7775, "step": 31402000 }, { "epoch": 90.9, "learning_rate": 4.567672633382734e-06, "loss": 1.8103, "step": 31402500 }, { "epoch": 90.9, "learning_rate": 4.566948985735458e-06, "loss": 1.8063, "step": 31403000 }, { "epoch": 90.9, "learning_rate": 4.566225338088181e-06, "loss": 1.797, "step": 31403500 }, { "epoch": 90.9, "learning_rate": 4.565501690440904e-06, "loss": 1.7686, "step": 31404000 }, { "epoch": 90.9, "learning_rate": 4.564778042793628e-06, "loss": 1.8034, "step": 31404500 }, { "epoch": 90.9, "learning_rate": 4.56405439514635e-06, "loss": 1.7779, "step": 31405000 }, { "epoch": 90.91, "learning_rate": 4.563332194794368e-06, "loss": 1.824, "step": 31405500 }, { "epoch": 90.91, "learning_rate": 4.5626085471470914e-06, "loss": 1.7837, "step": 31406000 }, { "epoch": 90.91, "learning_rate": 4.5618863467951095e-06, "loss": 1.7882, "step": 31406500 }, { "epoch": 90.91, "learning_rate": 4.561162699147833e-06, "loss": 1.8089, "step": 31407000 }, { "epoch": 90.91, "learning_rate": 4.5604390515005565e-06, "loss": 1.77, "step": 31407500 }, { "epoch": 90.91, "learning_rate": 4.559715403853279e-06, "loss": 1.7906, "step": 31408000 }, { "epoch": 90.91, "learning_rate": 4.558991756206003e-06, "loss": 1.7753, "step": 31408500 }, { "epoch": 90.92, "learning_rate": 4.558268108558726e-06, "loss": 1.8043, "step": 31409000 }, { "epoch": 90.92, "learning_rate": 4.557544460911449e-06, "loss": 1.7788, "step": 31409500 }, { "epoch": 90.92, "learning_rate": 4.556820813264172e-06, "loss": 1.763, "step": 31410000 }, { "epoch": 90.92, "learning_rate": 4.556097165616895e-06, "loss": 1.7886, "step": 31410500 }, { "epoch": 90.92, "learning_rate": 4.555373517969619e-06, "loss": 1.7882, "step": 31411000 }, { "epoch": 90.92, "learning_rate": 4.554652764912931e-06, "loss": 1.7853, "step": 31411500 }, { "epoch": 90.92, "learning_rate": 4.553929117265654e-06, "loss": 1.7868, "step": 31412000 }, { "epoch": 90.93, "learning_rate": 4.553205469618377e-06, "loss": 1.7906, "step": 31412500 }, { "epoch": 90.93, "learning_rate": 4.5524818219711005e-06, "loss": 1.7745, "step": 31413000 }, { "epoch": 90.93, "learning_rate": 4.5517581743238235e-06, "loss": 1.7922, "step": 31413500 }, { "epoch": 90.93, "learning_rate": 4.5510345266765475e-06, "loss": 1.7747, "step": 31414000 }, { "epoch": 90.93, "learning_rate": 4.5503108790292705e-06, "loss": 1.7939, "step": 31414500 }, { "epoch": 90.93, "learning_rate": 4.549587231381994e-06, "loss": 1.7766, "step": 31415000 }, { "epoch": 90.94, "learning_rate": 4.548865031030011e-06, "loss": 1.7936, "step": 31415500 }, { "epoch": 90.94, "learning_rate": 4.548141383382735e-06, "loss": 1.7432, "step": 31416000 }, { "epoch": 90.94, "learning_rate": 4.547417735735458e-06, "loss": 1.7773, "step": 31416500 }, { "epoch": 90.94, "learning_rate": 4.546694088088181e-06, "loss": 1.7843, "step": 31417000 }, { "epoch": 90.94, "learning_rate": 4.545970440440904e-06, "loss": 1.7821, "step": 31417500 }, { "epoch": 90.94, "learning_rate": 4.545246792793627e-06, "loss": 1.7638, "step": 31418000 }, { "epoch": 90.94, "learning_rate": 4.544523145146351e-06, "loss": 1.7616, "step": 31418500 }, { "epoch": 90.95, "learning_rate": 4.543800944794368e-06, "loss": 1.7622, "step": 31419000 }, { "epoch": 90.95, "learning_rate": 4.543077297147091e-06, "loss": 1.7915, "step": 31419500 }, { "epoch": 90.95, "learning_rate": 4.542353649499815e-06, "loss": 1.8288, "step": 31420000 }, { "epoch": 90.95, "learning_rate": 4.541630001852538e-06, "loss": 1.7973, "step": 31420500 }, { "epoch": 90.95, "learning_rate": 4.5409063542052615e-06, "loss": 1.8083, "step": 31421000 }, { "epoch": 90.95, "learning_rate": 4.5401827065579846e-06, "loss": 1.7921, "step": 31421500 }, { "epoch": 90.95, "learning_rate": 4.539459058910708e-06, "loss": 1.8066, "step": 31422000 }, { "epoch": 90.96, "learning_rate": 4.5387354112634316e-06, "loss": 1.7874, "step": 31422500 }, { "epoch": 90.96, "learning_rate": 4.538011763616155e-06, "loss": 1.7964, "step": 31423000 }, { "epoch": 90.96, "learning_rate": 4.537288115968878e-06, "loss": 1.7902, "step": 31423500 }, { "epoch": 90.96, "learning_rate": 4.536565915616895e-06, "loss": 1.7906, "step": 31424000 }, { "epoch": 90.96, "learning_rate": 4.535842267969619e-06, "loss": 1.7696, "step": 31424500 }, { "epoch": 90.96, "learning_rate": 4.535118620322342e-06, "loss": 1.7866, "step": 31425000 }, { "epoch": 90.96, "learning_rate": 4.534394972675065e-06, "loss": 1.7706, "step": 31425500 }, { "epoch": 90.97, "learning_rate": 4.533671325027788e-06, "loss": 1.793, "step": 31426000 }, { "epoch": 90.97, "learning_rate": 4.532949124675806e-06, "loss": 1.7672, "step": 31426500 }, { "epoch": 90.97, "learning_rate": 4.532225477028529e-06, "loss": 1.7994, "step": 31427000 }, { "epoch": 90.97, "learning_rate": 4.531501829381252e-06, "loss": 1.8032, "step": 31427500 }, { "epoch": 90.97, "learning_rate": 4.5307781817339755e-06, "loss": 1.775, "step": 31428000 }, { "epoch": 90.97, "learning_rate": 4.5300559813819936e-06, "loss": 1.8013, "step": 31428500 }, { "epoch": 90.97, "learning_rate": 4.529332333734717e-06, "loss": 1.7889, "step": 31429000 }, { "epoch": 90.98, "learning_rate": 4.528610133382735e-06, "loss": 1.8041, "step": 31429500 }, { "epoch": 90.98, "learning_rate": 4.527886485735458e-06, "loss": 1.7952, "step": 31430000 }, { "epoch": 90.98, "learning_rate": 4.527162838088181e-06, "loss": 1.7453, "step": 31430500 }, { "epoch": 90.98, "learning_rate": 4.526439190440904e-06, "loss": 1.7689, "step": 31431000 }, { "epoch": 90.98, "learning_rate": 4.525715542793627e-06, "loss": 1.7847, "step": 31431500 }, { "epoch": 90.98, "learning_rate": 4.524991895146351e-06, "loss": 1.7556, "step": 31432000 }, { "epoch": 90.98, "learning_rate": 4.524268247499074e-06, "loss": 1.7646, "step": 31432500 }, { "epoch": 90.99, "learning_rate": 4.523544599851797e-06, "loss": 1.7764, "step": 31433000 }, { "epoch": 90.99, "learning_rate": 4.52282095220452e-06, "loss": 1.7755, "step": 31433500 }, { "epoch": 90.99, "learning_rate": 4.522097304557243e-06, "loss": 1.7913, "step": 31434000 }, { "epoch": 90.99, "learning_rate": 4.521375104205261e-06, "loss": 1.7944, "step": 31434500 }, { "epoch": 90.99, "learning_rate": 4.5206529038532795e-06, "loss": 1.7884, "step": 31435000 }, { "epoch": 90.99, "learning_rate": 4.519929256206003e-06, "loss": 1.7796, "step": 31435500 }, { "epoch": 90.99, "learning_rate": 4.519205608558726e-06, "loss": 1.7929, "step": 31436000 }, { "epoch": 91.0, "learning_rate": 4.518481960911449e-06, "loss": 1.7831, "step": 31436500 }, { "epoch": 91.0, "learning_rate": 4.517758313264172e-06, "loss": 1.8058, "step": 31437000 }, { "epoch": 91.0, "learning_rate": 4.517034665616896e-06, "loss": 1.8082, "step": 31437500 }, { "epoch": 91.0, "eval_accuracy": 0.6909289299273819, "eval_accuracy_mlm": 0.6604581733782932, "eval_accuracy_nsp": 0.8542276791647762, "eval_loss": 2.194336414337158, "eval_runtime": 331.9434, "eval_samples_per_second": 1314.64, "eval_steps_per_second": 54.777, "step": 31437952 }, { "epoch": 91.0, "learning_rate": 4.516311017969618e-06, "loss": 1.7796, "step": 31438000 }, { "epoch": 91.0, "learning_rate": 4.515587370322342e-06, "loss": 1.7385, "step": 31438500 }, { "epoch": 91.0, "learning_rate": 4.514865169970359e-06, "loss": 1.7722, "step": 31439000 }, { "epoch": 91.0, "learning_rate": 4.514141522323083e-06, "loss": 1.7966, "step": 31439500 }, { "epoch": 91.01, "learning_rate": 4.513417874675805e-06, "loss": 1.7962, "step": 31440000 }, { "epoch": 91.01, "learning_rate": 4.512694227028529e-06, "loss": 1.7633, "step": 31440500 }, { "epoch": 91.01, "learning_rate": 4.511970579381253e-06, "loss": 1.8092, "step": 31441000 }, { "epoch": 91.01, "learning_rate": 4.5112469317339754e-06, "loss": 1.7663, "step": 31441500 }, { "epoch": 91.01, "learning_rate": 4.5105247313819935e-06, "loss": 1.7718, "step": 31442000 }, { "epoch": 91.01, "learning_rate": 4.509801083734717e-06, "loss": 1.7351, "step": 31442500 }, { "epoch": 91.01, "learning_rate": 4.5090774360874405e-06, "loss": 1.7962, "step": 31443000 }, { "epoch": 91.02, "learning_rate": 4.508353788440163e-06, "loss": 1.7664, "step": 31443500 }, { "epoch": 91.02, "learning_rate": 4.507630140792887e-06, "loss": 1.7815, "step": 31444000 }, { "epoch": 91.02, "learning_rate": 4.50690649314561e-06, "loss": 1.7621, "step": 31444500 }, { "epoch": 91.02, "learning_rate": 4.506182845498333e-06, "loss": 1.7981, "step": 31445000 }, { "epoch": 91.02, "learning_rate": 4.505462092441645e-06, "loss": 1.7902, "step": 31445500 }, { "epoch": 91.02, "learning_rate": 4.504738444794369e-06, "loss": 1.7886, "step": 31446000 }, { "epoch": 91.02, "learning_rate": 4.504014797147091e-06, "loss": 1.796, "step": 31446500 }, { "epoch": 91.03, "learning_rate": 4.503291149499815e-06, "loss": 1.7782, "step": 31447000 }, { "epoch": 91.03, "learning_rate": 4.502567501852538e-06, "loss": 1.7869, "step": 31447500 }, { "epoch": 91.03, "learning_rate": 4.501843854205261e-06, "loss": 1.7842, "step": 31448000 }, { "epoch": 91.03, "learning_rate": 4.501120206557985e-06, "loss": 1.7899, "step": 31448500 }, { "epoch": 91.03, "learning_rate": 4.5003965589107075e-06, "loss": 1.7633, "step": 31449000 }, { "epoch": 91.03, "learning_rate": 4.4996729112634315e-06, "loss": 1.7907, "step": 31449500 }, { "epoch": 91.03, "learning_rate": 4.4989492636161545e-06, "loss": 1.7938, "step": 31450000 }, { "epoch": 91.04, "learning_rate": 4.498227063264173e-06, "loss": 1.7826, "step": 31450500 }, { "epoch": 91.04, "learning_rate": 4.497503415616895e-06, "loss": 1.7652, "step": 31451000 }, { "epoch": 91.04, "learning_rate": 4.496779767969619e-06, "loss": 1.7934, "step": 31451500 }, { "epoch": 91.04, "learning_rate": 4.496056120322342e-06, "loss": 1.7732, "step": 31452000 }, { "epoch": 91.04, "learning_rate": 4.495332472675065e-06, "loss": 1.7959, "step": 31452500 }, { "epoch": 91.04, "learning_rate": 4.494610272323082e-06, "loss": 1.776, "step": 31453000 }, { "epoch": 91.05, "learning_rate": 4.493886624675806e-06, "loss": 1.79, "step": 31453500 }, { "epoch": 91.05, "learning_rate": 4.493162977028529e-06, "loss": 1.7698, "step": 31454000 }, { "epoch": 91.05, "learning_rate": 4.492439329381252e-06, "loss": 1.7779, "step": 31454500 }, { "epoch": 91.05, "learning_rate": 4.49171712902927e-06, "loss": 1.7621, "step": 31455000 }, { "epoch": 91.05, "learning_rate": 4.4909934813819935e-06, "loss": 1.786, "step": 31455500 }, { "epoch": 91.05, "learning_rate": 4.490269833734717e-06, "loss": 1.7729, "step": 31456000 }, { "epoch": 91.05, "learning_rate": 4.48954618608744e-06, "loss": 1.7983, "step": 31456500 }, { "epoch": 91.06, "learning_rate": 4.4888225384401636e-06, "loss": 1.7738, "step": 31457000 }, { "epoch": 91.06, "learning_rate": 4.488098890792887e-06, "loss": 1.7865, "step": 31457500 }, { "epoch": 91.06, "learning_rate": 4.48737524314561e-06, "loss": 1.7796, "step": 31458000 }, { "epoch": 91.06, "learning_rate": 4.486651595498333e-06, "loss": 1.7771, "step": 31458500 }, { "epoch": 91.06, "learning_rate": 4.485929395146351e-06, "loss": 1.7879, "step": 31459000 }, { "epoch": 91.06, "learning_rate": 4.485205747499074e-06, "loss": 1.7833, "step": 31459500 }, { "epoch": 91.06, "learning_rate": 4.484482099851797e-06, "loss": 1.7579, "step": 31460000 }, { "epoch": 91.07, "learning_rate": 4.48375845220452e-06, "loss": 1.7724, "step": 31460500 }, { "epoch": 91.07, "learning_rate": 4.483034804557243e-06, "loss": 1.7841, "step": 31461000 }, { "epoch": 91.07, "learning_rate": 4.482311156909967e-06, "loss": 1.7571, "step": 31461500 }, { "epoch": 91.07, "learning_rate": 4.481588956557984e-06, "loss": 1.7812, "step": 31462000 }, { "epoch": 91.07, "learning_rate": 4.480865308910708e-06, "loss": 1.768, "step": 31462500 }, { "epoch": 91.07, "learning_rate": 4.480141661263431e-06, "loss": 1.8046, "step": 31463000 }, { "epoch": 91.07, "learning_rate": 4.4794180136161545e-06, "loss": 1.8036, "step": 31463500 }, { "epoch": 91.08, "learning_rate": 4.478695813264172e-06, "loss": 1.7935, "step": 31464000 }, { "epoch": 91.08, "learning_rate": 4.477972165616896e-06, "loss": 1.766, "step": 31464500 }, { "epoch": 91.08, "learning_rate": 4.477248517969619e-06, "loss": 1.7976, "step": 31465000 }, { "epoch": 91.08, "learning_rate": 4.476524870322342e-06, "loss": 1.779, "step": 31465500 }, { "epoch": 91.08, "learning_rate": 4.475801222675065e-06, "loss": 1.7831, "step": 31466000 }, { "epoch": 91.08, "learning_rate": 4.475079022323083e-06, "loss": 1.7841, "step": 31466500 }, { "epoch": 91.08, "learning_rate": 4.4743568219711e-06, "loss": 1.8004, "step": 31467000 }, { "epoch": 91.09, "learning_rate": 4.473633174323824e-06, "loss": 1.7679, "step": 31467500 }, { "epoch": 91.09, "learning_rate": 4.472909526676547e-06, "loss": 1.7937, "step": 31468000 }, { "epoch": 91.09, "learning_rate": 4.47218587902927e-06, "loss": 1.7694, "step": 31468500 }, { "epoch": 91.09, "learning_rate": 4.471462231381993e-06, "loss": 1.7876, "step": 31469000 }, { "epoch": 91.09, "learning_rate": 4.4707385837347165e-06, "loss": 1.779, "step": 31469500 }, { "epoch": 91.09, "learning_rate": 4.4700149360874404e-06, "loss": 1.779, "step": 31470000 }, { "epoch": 91.09, "learning_rate": 4.4692912884401635e-06, "loss": 1.7754, "step": 31470500 }, { "epoch": 91.1, "learning_rate": 4.468569088088181e-06, "loss": 1.8031, "step": 31471000 }, { "epoch": 91.1, "learning_rate": 4.467845440440904e-06, "loss": 1.7678, "step": 31471500 }, { "epoch": 91.1, "learning_rate": 4.467121792793628e-06, "loss": 1.7826, "step": 31472000 }, { "epoch": 91.1, "learning_rate": 4.466398145146351e-06, "loss": 1.7626, "step": 31472500 }, { "epoch": 91.1, "learning_rate": 4.465674497499074e-06, "loss": 1.788, "step": 31473000 }, { "epoch": 91.1, "learning_rate": 4.464952297147092e-06, "loss": 1.7894, "step": 31473500 }, { "epoch": 91.1, "learning_rate": 4.464228649499815e-06, "loss": 1.7985, "step": 31474000 }, { "epoch": 91.11, "learning_rate": 4.463505001852538e-06, "loss": 1.7819, "step": 31474500 }, { "epoch": 91.11, "learning_rate": 4.462781354205261e-06, "loss": 1.7745, "step": 31475000 }, { "epoch": 91.11, "learning_rate": 4.462057706557984e-06, "loss": 1.7951, "step": 31475500 }, { "epoch": 91.11, "learning_rate": 4.461334058910708e-06, "loss": 1.7907, "step": 31476000 }, { "epoch": 91.11, "learning_rate": 4.460610411263431e-06, "loss": 1.7721, "step": 31476500 }, { "epoch": 91.11, "learning_rate": 4.459888210911449e-06, "loss": 1.7699, "step": 31477000 }, { "epoch": 91.11, "learning_rate": 4.459164563264172e-06, "loss": 1.7907, "step": 31477500 }, { "epoch": 91.12, "learning_rate": 4.458440915616896e-06, "loss": 1.7699, "step": 31478000 }, { "epoch": 91.12, "learning_rate": 4.457717267969619e-06, "loss": 1.7956, "step": 31478500 }, { "epoch": 91.12, "learning_rate": 4.456995067617637e-06, "loss": 1.7936, "step": 31479000 }, { "epoch": 91.12, "learning_rate": 4.45627141997036e-06, "loss": 1.7889, "step": 31479500 }, { "epoch": 91.12, "learning_rate": 4.455547772323083e-06, "loss": 1.7618, "step": 31480000 }, { "epoch": 91.12, "learning_rate": 4.454824124675806e-06, "loss": 1.7619, "step": 31480500 }, { "epoch": 91.12, "learning_rate": 4.454100477028529e-06, "loss": 1.7882, "step": 31481000 }, { "epoch": 91.13, "learning_rate": 4.453378276676547e-06, "loss": 1.7835, "step": 31481500 }, { "epoch": 91.13, "learning_rate": 4.45265462902927e-06, "loss": 1.7904, "step": 31482000 }, { "epoch": 91.13, "learning_rate": 4.451930981381993e-06, "loss": 1.7781, "step": 31482500 }, { "epoch": 91.13, "learning_rate": 4.4512073337347165e-06, "loss": 1.7875, "step": 31483000 }, { "epoch": 91.13, "learning_rate": 4.45048368608744e-06, "loss": 1.7922, "step": 31483500 }, { "epoch": 91.13, "learning_rate": 4.449760038440163e-06, "loss": 1.7998, "step": 31484000 }, { "epoch": 91.13, "learning_rate": 4.4490378380881816e-06, "loss": 1.7961, "step": 31484500 }, { "epoch": 91.14, "learning_rate": 4.448314190440904e-06, "loss": 1.8018, "step": 31485000 }, { "epoch": 91.14, "learning_rate": 4.447590542793628e-06, "loss": 1.7982, "step": 31485500 }, { "epoch": 91.14, "learning_rate": 4.446866895146351e-06, "loss": 1.767, "step": 31486000 }, { "epoch": 91.14, "learning_rate": 4.446143247499074e-06, "loss": 1.775, "step": 31486500 }, { "epoch": 91.14, "learning_rate": 4.445421047147091e-06, "loss": 1.7894, "step": 31487000 }, { "epoch": 91.14, "learning_rate": 4.444697399499815e-06, "loss": 1.7667, "step": 31487500 }, { "epoch": 91.14, "learning_rate": 4.443973751852538e-06, "loss": 1.7622, "step": 31488000 }, { "epoch": 91.15, "learning_rate": 4.443250104205261e-06, "loss": 1.7719, "step": 31488500 }, { "epoch": 91.15, "learning_rate": 4.4425293511485735e-06, "loss": 1.7549, "step": 31489000 }, { "epoch": 91.15, "learning_rate": 4.441805703501297e-06, "loss": 1.8076, "step": 31489500 }, { "epoch": 91.15, "learning_rate": 4.44108205585402e-06, "loss": 1.7985, "step": 31490000 }, { "epoch": 91.15, "learning_rate": 4.4403584082067436e-06, "loss": 1.7744, "step": 31490500 }, { "epoch": 91.15, "learning_rate": 4.439634760559467e-06, "loss": 1.7844, "step": 31491000 }, { "epoch": 91.15, "learning_rate": 4.43891111291219e-06, "loss": 1.7739, "step": 31491500 }, { "epoch": 91.16, "learning_rate": 4.438187465264914e-06, "loss": 1.7679, "step": 31492000 }, { "epoch": 91.16, "learning_rate": 4.437463817617636e-06, "loss": 1.7734, "step": 31492500 }, { "epoch": 91.16, "learning_rate": 4.436741617265654e-06, "loss": 1.8175, "step": 31493000 }, { "epoch": 91.16, "learning_rate": 4.436017969618377e-06, "loss": 1.7744, "step": 31493500 }, { "epoch": 91.16, "learning_rate": 4.435295769266395e-06, "loss": 1.7654, "step": 31494000 }, { "epoch": 91.16, "learning_rate": 4.434572121619118e-06, "loss": 1.7865, "step": 31494500 }, { "epoch": 91.17, "learning_rate": 4.433848473971842e-06, "loss": 1.7572, "step": 31495000 }, { "epoch": 91.17, "learning_rate": 4.433124826324564e-06, "loss": 1.7967, "step": 31495500 }, { "epoch": 91.17, "learning_rate": 4.4324026259725825e-06, "loss": 1.8041, "step": 31496000 }, { "epoch": 91.17, "learning_rate": 4.431678978325306e-06, "loss": 1.7787, "step": 31496500 }, { "epoch": 91.17, "learning_rate": 4.4309553306780295e-06, "loss": 1.7706, "step": 31497000 }, { "epoch": 91.17, "learning_rate": 4.430231683030752e-06, "loss": 1.7763, "step": 31497500 }, { "epoch": 91.17, "learning_rate": 4.429508035383476e-06, "loss": 1.7924, "step": 31498000 }, { "epoch": 91.18, "learning_rate": 4.428784387736199e-06, "loss": 1.7703, "step": 31498500 }, { "epoch": 91.18, "learning_rate": 4.428060740088922e-06, "loss": 1.7655, "step": 31499000 }, { "epoch": 91.18, "learning_rate": 4.427337092441646e-06, "loss": 1.7932, "step": 31499500 }, { "epoch": 91.18, "learning_rate": 4.426614892089663e-06, "loss": 1.7715, "step": 31500000 }, { "epoch": 91.18, "learning_rate": 4.425891244442387e-06, "loss": 1.7934, "step": 31500500 }, { "epoch": 91.18, "learning_rate": 4.425167596795109e-06, "loss": 1.805, "step": 31501000 }, { "epoch": 91.18, "learning_rate": 4.424443949147833e-06, "loss": 1.7868, "step": 31501500 }, { "epoch": 91.19, "learning_rate": 4.423720301500555e-06, "loss": 1.7753, "step": 31502000 }, { "epoch": 91.19, "learning_rate": 4.422996653853279e-06, "loss": 1.752, "step": 31502500 }, { "epoch": 91.19, "learning_rate": 4.422273006206003e-06, "loss": 1.814, "step": 31503000 }, { "epoch": 91.19, "learning_rate": 4.421549358558725e-06, "loss": 1.776, "step": 31503500 }, { "epoch": 91.19, "learning_rate": 4.420825710911449e-06, "loss": 1.8071, "step": 31504000 }, { "epoch": 91.19, "learning_rate": 4.420103510559467e-06, "loss": 1.7715, "step": 31504500 }, { "epoch": 91.19, "learning_rate": 4.419381310207484e-06, "loss": 1.7858, "step": 31505000 }, { "epoch": 91.2, "learning_rate": 4.418657662560208e-06, "loss": 1.7398, "step": 31505500 }, { "epoch": 91.2, "learning_rate": 4.417934014912931e-06, "loss": 1.7844, "step": 31506000 }, { "epoch": 91.2, "learning_rate": 4.417210367265654e-06, "loss": 1.7795, "step": 31506500 }, { "epoch": 91.2, "learning_rate": 4.416486719618378e-06, "loss": 1.8007, "step": 31507000 }, { "epoch": 91.2, "learning_rate": 4.415764519266395e-06, "loss": 1.7647, "step": 31507500 }, { "epoch": 91.2, "learning_rate": 4.415040871619119e-06, "loss": 1.7626, "step": 31508000 }, { "epoch": 91.2, "learning_rate": 4.414317223971841e-06, "loss": 1.7878, "step": 31508500 }, { "epoch": 91.21, "learning_rate": 4.413593576324565e-06, "loss": 1.7885, "step": 31509000 }, { "epoch": 91.21, "learning_rate": 4.412869928677288e-06, "loss": 1.7764, "step": 31509500 }, { "epoch": 91.21, "learning_rate": 4.412146281030011e-06, "loss": 1.7934, "step": 31510000 }, { "epoch": 91.21, "learning_rate": 4.411424080678029e-06, "loss": 1.7748, "step": 31510500 }, { "epoch": 91.21, "learning_rate": 4.4107004330307525e-06, "loss": 1.7842, "step": 31511000 }, { "epoch": 91.21, "learning_rate": 4.409976785383476e-06, "loss": 1.7701, "step": 31511500 }, { "epoch": 91.21, "learning_rate": 4.409253137736199e-06, "loss": 1.7947, "step": 31512000 }, { "epoch": 91.22, "learning_rate": 4.408529490088923e-06, "loss": 1.7899, "step": 31512500 }, { "epoch": 91.22, "learning_rate": 4.407805842441645e-06, "loss": 1.7606, "step": 31513000 }, { "epoch": 91.22, "learning_rate": 4.407082194794369e-06, "loss": 1.7679, "step": 31513500 }, { "epoch": 91.22, "learning_rate": 4.406358547147092e-06, "loss": 1.7851, "step": 31514000 }, { "epoch": 91.22, "learning_rate": 4.40563634679511e-06, "loss": 1.766, "step": 31514500 }, { "epoch": 91.22, "learning_rate": 4.404912699147832e-06, "loss": 1.8017, "step": 31515000 }, { "epoch": 91.22, "learning_rate": 4.404190498795851e-06, "loss": 1.8088, "step": 31515500 }, { "epoch": 91.23, "learning_rate": 4.403466851148573e-06, "loss": 1.8019, "step": 31516000 }, { "epoch": 91.23, "learning_rate": 4.402743203501297e-06, "loss": 1.7561, "step": 31516500 }, { "epoch": 91.23, "learning_rate": 4.40201955585402e-06, "loss": 1.7978, "step": 31517000 }, { "epoch": 91.23, "learning_rate": 4.4012959082067435e-06, "loss": 1.7844, "step": 31517500 }, { "epoch": 91.23, "learning_rate": 4.4005722605594665e-06, "loss": 1.778, "step": 31518000 }, { "epoch": 91.23, "learning_rate": 4.39984861291219e-06, "loss": 1.7717, "step": 31518500 }, { "epoch": 91.23, "learning_rate": 4.3991249652649136e-06, "loss": 1.7612, "step": 31519000 }, { "epoch": 91.24, "learning_rate": 4.398401317617637e-06, "loss": 1.7699, "step": 31519500 }, { "epoch": 91.24, "learning_rate": 4.39767766997036e-06, "loss": 1.7639, "step": 31520000 }, { "epoch": 91.24, "learning_rate": 4.396955469618377e-06, "loss": 1.7715, "step": 31520500 }, { "epoch": 91.24, "learning_rate": 4.396231821971101e-06, "loss": 1.7936, "step": 31521000 }, { "epoch": 91.24, "learning_rate": 4.395508174323824e-06, "loss": 1.799, "step": 31521500 }, { "epoch": 91.24, "learning_rate": 4.394784526676547e-06, "loss": 1.778, "step": 31522000 }, { "epoch": 91.24, "learning_rate": 4.39406087902927e-06, "loss": 1.8158, "step": 31522500 }, { "epoch": 91.25, "learning_rate": 4.393337231381993e-06, "loss": 1.7759, "step": 31523000 }, { "epoch": 91.25, "learning_rate": 4.392615031030011e-06, "loss": 1.7735, "step": 31523500 }, { "epoch": 91.25, "learning_rate": 4.391891383382734e-06, "loss": 1.7821, "step": 31524000 }, { "epoch": 91.25, "learning_rate": 4.3911677357354575e-06, "loss": 1.7782, "step": 31524500 }, { "epoch": 91.25, "learning_rate": 4.390444088088181e-06, "loss": 1.7913, "step": 31525000 }, { "epoch": 91.25, "learning_rate": 4.3897204404409045e-06, "loss": 1.8077, "step": 31525500 }, { "epoch": 91.25, "learning_rate": 4.388998240088922e-06, "loss": 1.7607, "step": 31526000 }, { "epoch": 91.26, "learning_rate": 4.38827603973694e-06, "loss": 1.7869, "step": 31526500 }, { "epoch": 91.26, "learning_rate": 4.387552392089663e-06, "loss": 1.7601, "step": 31527000 }, { "epoch": 91.26, "learning_rate": 4.386828744442386e-06, "loss": 1.7811, "step": 31527500 }, { "epoch": 91.26, "learning_rate": 4.38610509679511e-06, "loss": 1.7965, "step": 31528000 }, { "epoch": 91.26, "learning_rate": 4.385381449147833e-06, "loss": 1.7669, "step": 31528500 }, { "epoch": 91.26, "learning_rate": 4.384657801500556e-06, "loss": 1.7666, "step": 31529000 }, { "epoch": 91.26, "learning_rate": 4.383934153853279e-06, "loss": 1.7975, "step": 31529500 }, { "epoch": 91.27, "learning_rate": 4.383210506206002e-06, "loss": 1.782, "step": 31530000 }, { "epoch": 91.27, "learning_rate": 4.382486858558725e-06, "loss": 1.7727, "step": 31530500 }, { "epoch": 91.27, "learning_rate": 4.381764658206743e-06, "loss": 1.7611, "step": 31531000 }, { "epoch": 91.27, "learning_rate": 4.3810410105594665e-06, "loss": 1.7975, "step": 31531500 }, { "epoch": 91.27, "learning_rate": 4.38031736291219e-06, "loss": 1.7651, "step": 31532000 }, { "epoch": 91.27, "learning_rate": 4.3795937152649135e-06, "loss": 1.7593, "step": 31532500 }, { "epoch": 91.28, "learning_rate": 4.378870067617637e-06, "loss": 1.7737, "step": 31533000 }, { "epoch": 91.28, "learning_rate": 4.378147867265654e-06, "loss": 1.7852, "step": 31533500 }, { "epoch": 91.28, "learning_rate": 4.377424219618377e-06, "loss": 1.7708, "step": 31534000 }, { "epoch": 91.28, "learning_rate": 4.376700571971101e-06, "loss": 1.7546, "step": 31534500 }, { "epoch": 91.28, "learning_rate": 4.375976924323824e-06, "loss": 1.7834, "step": 31535000 }, { "epoch": 91.28, "learning_rate": 4.375253276676547e-06, "loss": 1.7578, "step": 31535500 }, { "epoch": 91.28, "learning_rate": 4.374531076324565e-06, "loss": 1.8179, "step": 31536000 }, { "epoch": 91.29, "learning_rate": 4.373808875972582e-06, "loss": 1.7621, "step": 31536500 }, { "epoch": 91.29, "learning_rate": 4.3730852283253054e-06, "loss": 1.7723, "step": 31537000 }, { "epoch": 91.29, "learning_rate": 4.372361580678029e-06, "loss": 1.7799, "step": 31537500 }, { "epoch": 91.29, "learning_rate": 4.3716379330307524e-06, "loss": 1.7814, "step": 31538000 }, { "epoch": 91.29, "learning_rate": 4.3709142853834755e-06, "loss": 1.7695, "step": 31538500 }, { "epoch": 91.29, "learning_rate": 4.370190637736199e-06, "loss": 1.7919, "step": 31539000 }, { "epoch": 91.29, "learning_rate": 4.369466990088922e-06, "loss": 1.7698, "step": 31539500 }, { "epoch": 91.3, "learning_rate": 4.368743342441646e-06, "loss": 1.784, "step": 31540000 }, { "epoch": 91.3, "learning_rate": 4.368019694794368e-06, "loss": 1.778, "step": 31540500 }, { "epoch": 91.3, "learning_rate": 4.367297494442387e-06, "loss": 1.7563, "step": 31541000 }, { "epoch": 91.3, "learning_rate": 4.366573846795109e-06, "loss": 1.7955, "step": 31541500 }, { "epoch": 91.3, "learning_rate": 4.365850199147833e-06, "loss": 1.7811, "step": 31542000 }, { "epoch": 91.3, "learning_rate": 4.365126551500556e-06, "loss": 1.7709, "step": 31542500 }, { "epoch": 91.3, "learning_rate": 4.364402903853279e-06, "loss": 1.781, "step": 31543000 }, { "epoch": 91.31, "learning_rate": 4.363679256206003e-06, "loss": 1.7889, "step": 31543500 }, { "epoch": 91.31, "learning_rate": 4.362955608558725e-06, "loss": 1.7786, "step": 31544000 }, { "epoch": 91.31, "learning_rate": 4.362233408206743e-06, "loss": 1.8086, "step": 31544500 }, { "epoch": 91.31, "learning_rate": 4.3615097605594664e-06, "loss": 1.7856, "step": 31545000 }, { "epoch": 91.31, "learning_rate": 4.36078611291219e-06, "loss": 1.7685, "step": 31545500 }, { "epoch": 91.31, "learning_rate": 4.360062465264913e-06, "loss": 1.7854, "step": 31546000 }, { "epoch": 91.31, "learning_rate": 4.3593388176176365e-06, "loss": 1.7568, "step": 31546500 }, { "epoch": 91.32, "learning_rate": 4.35861516997036e-06, "loss": 1.7806, "step": 31547000 }, { "epoch": 91.32, "learning_rate": 4.357891522323083e-06, "loss": 1.7804, "step": 31547500 }, { "epoch": 91.32, "learning_rate": 4.357167874675807e-06, "loss": 1.7711, "step": 31548000 }, { "epoch": 91.32, "learning_rate": 4.356447121619119e-06, "loss": 1.7901, "step": 31548500 }, { "epoch": 91.32, "learning_rate": 4.355724921267136e-06, "loss": 1.7674, "step": 31549000 }, { "epoch": 91.32, "learning_rate": 4.355001273619859e-06, "loss": 1.7596, "step": 31549500 }, { "epoch": 91.32, "learning_rate": 4.354277625972582e-06, "loss": 1.7809, "step": 31550000 }, { "epoch": 91.33, "learning_rate": 4.353553978325306e-06, "loss": 1.7881, "step": 31550500 }, { "epoch": 91.33, "learning_rate": 4.3528303306780285e-06, "loss": 1.7355, "step": 31551000 }, { "epoch": 91.33, "learning_rate": 4.352106683030752e-06, "loss": 1.804, "step": 31551500 }, { "epoch": 91.33, "learning_rate": 4.3513830353834755e-06, "loss": 1.806, "step": 31552000 }, { "epoch": 91.33, "learning_rate": 4.3506593877361985e-06, "loss": 1.7877, "step": 31552500 }, { "epoch": 91.33, "learning_rate": 4.349937187384217e-06, "loss": 1.7835, "step": 31553000 }, { "epoch": 91.33, "learning_rate": 4.34921353973694e-06, "loss": 1.8084, "step": 31553500 }, { "epoch": 91.34, "learning_rate": 4.348489892089664e-06, "loss": 1.8089, "step": 31554000 }, { "epoch": 91.34, "learning_rate": 4.347766244442386e-06, "loss": 1.766, "step": 31554500 }, { "epoch": 91.34, "learning_rate": 4.34704259679511e-06, "loss": 1.7772, "step": 31555000 }, { "epoch": 91.34, "learning_rate": 4.346318949147832e-06, "loss": 1.7781, "step": 31555500 }, { "epoch": 91.34, "learning_rate": 4.345598196091145e-06, "loss": 1.7804, "step": 31556000 }, { "epoch": 91.34, "learning_rate": 4.344874548443868e-06, "loss": 1.7716, "step": 31556500 }, { "epoch": 91.34, "learning_rate": 4.344150900796592e-06, "loss": 1.771, "step": 31557000 }, { "epoch": 91.35, "learning_rate": 4.343427253149314e-06, "loss": 1.8043, "step": 31557500 }, { "epoch": 91.35, "learning_rate": 4.342703605502038e-06, "loss": 1.779, "step": 31558000 }, { "epoch": 91.35, "learning_rate": 4.3419799578547606e-06, "loss": 1.7557, "step": 31558500 }, { "epoch": 91.35, "learning_rate": 4.3412563102074845e-06, "loss": 1.7951, "step": 31559000 }, { "epoch": 91.35, "learning_rate": 4.340532662560208e-06, "loss": 1.7683, "step": 31559500 }, { "epoch": 91.35, "learning_rate": 4.339810462208226e-06, "loss": 1.7916, "step": 31560000 }, { "epoch": 91.35, "learning_rate": 4.339086814560949e-06, "loss": 1.7998, "step": 31560500 }, { "epoch": 91.36, "learning_rate": 4.338363166913672e-06, "loss": 1.7842, "step": 31561000 }, { "epoch": 91.36, "learning_rate": 4.337639519266396e-06, "loss": 1.7908, "step": 31561500 }, { "epoch": 91.36, "learning_rate": 4.336915871619118e-06, "loss": 1.7927, "step": 31562000 }, { "epoch": 91.36, "learning_rate": 4.336192223971842e-06, "loss": 1.7961, "step": 31562500 }, { "epoch": 91.36, "learning_rate": 4.335468576324565e-06, "loss": 1.7867, "step": 31563000 }, { "epoch": 91.36, "learning_rate": 4.334744928677288e-06, "loss": 1.7504, "step": 31563500 }, { "epoch": 91.36, "learning_rate": 4.334022728325305e-06, "loss": 1.7794, "step": 31564000 }, { "epoch": 91.37, "learning_rate": 4.333299080678029e-06, "loss": 1.7788, "step": 31564500 }, { "epoch": 91.37, "learning_rate": 4.3325768803260465e-06, "loss": 1.7983, "step": 31565000 }, { "epoch": 91.37, "learning_rate": 4.3318532326787704e-06, "loss": 1.7873, "step": 31565500 }, { "epoch": 91.37, "learning_rate": 4.3311295850314935e-06, "loss": 1.7675, "step": 31566000 }, { "epoch": 91.37, "learning_rate": 4.330405937384217e-06, "loss": 1.7673, "step": 31566500 }, { "epoch": 91.37, "learning_rate": 4.32968228973694e-06, "loss": 1.7497, "step": 31567000 }, { "epoch": 91.37, "learning_rate": 4.328958642089663e-06, "loss": 1.7855, "step": 31567500 }, { "epoch": 91.38, "learning_rate": 4.328234994442387e-06, "loss": 1.784, "step": 31568000 }, { "epoch": 91.38, "learning_rate": 4.32751134679511e-06, "loss": 1.7849, "step": 31568500 }, { "epoch": 91.38, "learning_rate": 4.326787699147833e-06, "loss": 1.7799, "step": 31569000 }, { "epoch": 91.38, "learning_rate": 4.326064051500556e-06, "loss": 1.7704, "step": 31569500 }, { "epoch": 91.38, "learning_rate": 4.325341851148574e-06, "loss": 1.7847, "step": 31570000 }, { "epoch": 91.38, "learning_rate": 4.324618203501297e-06, "loss": 1.7893, "step": 31570500 }, { "epoch": 91.39, "learning_rate": 4.32389455585402e-06, "loss": 1.7544, "step": 31571000 }, { "epoch": 91.39, "learning_rate": 4.3231738027973324e-06, "loss": 1.794, "step": 31571500 }, { "epoch": 91.39, "learning_rate": 4.322450155150056e-06, "loss": 1.7737, "step": 31572000 }, { "epoch": 91.39, "learning_rate": 4.321726507502779e-06, "loss": 1.7708, "step": 31572500 }, { "epoch": 91.39, "learning_rate": 4.3210028598555025e-06, "loss": 1.7683, "step": 31573000 }, { "epoch": 91.39, "learning_rate": 4.320279212208226e-06, "loss": 1.7939, "step": 31573500 }, { "epoch": 91.39, "learning_rate": 4.319555564560949e-06, "loss": 1.7709, "step": 31574000 }, { "epoch": 91.4, "learning_rate": 4.318831916913672e-06, "loss": 1.7799, "step": 31574500 }, { "epoch": 91.4, "learning_rate": 4.318108269266395e-06, "loss": 1.7892, "step": 31575000 }, { "epoch": 91.4, "learning_rate": 4.317384621619119e-06, "loss": 1.8004, "step": 31575500 }, { "epoch": 91.4, "learning_rate": 4.316660973971842e-06, "loss": 1.7484, "step": 31576000 }, { "epoch": 91.4, "learning_rate": 4.315937326324565e-06, "loss": 1.8134, "step": 31576500 }, { "epoch": 91.4, "learning_rate": 4.315213678677288e-06, "loss": 1.7943, "step": 31577000 }, { "epoch": 91.4, "learning_rate": 4.314490031030011e-06, "loss": 1.7882, "step": 31577500 }, { "epoch": 91.41, "learning_rate": 4.313766383382734e-06, "loss": 1.7895, "step": 31578000 }, { "epoch": 91.41, "learning_rate": 4.313044183030752e-06, "loss": 1.7609, "step": 31578500 }, { "epoch": 91.41, "learning_rate": 4.312320535383475e-06, "loss": 1.8008, "step": 31579000 }, { "epoch": 91.41, "learning_rate": 4.3115968877361984e-06, "loss": 1.7554, "step": 31579500 }, { "epoch": 91.41, "learning_rate": 4.310873240088922e-06, "loss": 1.7841, "step": 31580000 }, { "epoch": 91.41, "learning_rate": 4.3101495924416455e-06, "loss": 1.7573, "step": 31580500 }, { "epoch": 91.41, "learning_rate": 4.3094259447943685e-06, "loss": 1.7835, "step": 31581000 }, { "epoch": 91.42, "learning_rate": 4.308705191737681e-06, "loss": 1.8051, "step": 31581500 }, { "epoch": 91.42, "learning_rate": 4.307981544090404e-06, "loss": 1.767, "step": 31582000 }, { "epoch": 91.42, "learning_rate": 4.307257896443127e-06, "loss": 1.7911, "step": 31582500 }, { "epoch": 91.42, "learning_rate": 4.306534248795851e-06, "loss": 1.7802, "step": 31583000 }, { "epoch": 91.42, "learning_rate": 4.305810601148574e-06, "loss": 1.7702, "step": 31583500 }, { "epoch": 91.42, "learning_rate": 4.305086953501297e-06, "loss": 1.8052, "step": 31584000 }, { "epoch": 91.42, "learning_rate": 4.30436330585402e-06, "loss": 1.7705, "step": 31584500 }, { "epoch": 91.43, "learning_rate": 4.303639658206743e-06, "loss": 1.8142, "step": 31585000 }, { "epoch": 91.43, "learning_rate": 4.302917457854761e-06, "loss": 1.7743, "step": 31585500 }, { "epoch": 91.43, "learning_rate": 4.302193810207484e-06, "loss": 1.7657, "step": 31586000 }, { "epoch": 91.43, "learning_rate": 4.3014701625602075e-06, "loss": 1.8114, "step": 31586500 }, { "epoch": 91.43, "learning_rate": 4.300746514912931e-06, "loss": 1.8052, "step": 31587000 }, { "epoch": 91.43, "learning_rate": 4.300022867265654e-06, "loss": 1.7955, "step": 31587500 }, { "epoch": 91.43, "learning_rate": 4.2992992196183776e-06, "loss": 1.7652, "step": 31588000 }, { "epoch": 91.44, "learning_rate": 4.298575571971101e-06, "loss": 1.7783, "step": 31588500 }, { "epoch": 91.44, "learning_rate": 4.297853371619119e-06, "loss": 1.7713, "step": 31589000 }, { "epoch": 91.44, "learning_rate": 4.297129723971842e-06, "loss": 1.7925, "step": 31589500 }, { "epoch": 91.44, "learning_rate": 4.296406076324565e-06, "loss": 1.8049, "step": 31590000 }, { "epoch": 91.44, "learning_rate": 4.295682428677288e-06, "loss": 1.7524, "step": 31590500 }, { "epoch": 91.44, "learning_rate": 4.294958781030011e-06, "loss": 1.7957, "step": 31591000 }, { "epoch": 91.44, "learning_rate": 4.294235133382735e-06, "loss": 1.7903, "step": 31591500 }, { "epoch": 91.45, "learning_rate": 4.293511485735457e-06, "loss": 1.7818, "step": 31592000 }, { "epoch": 91.45, "learning_rate": 4.292787838088181e-06, "loss": 1.7681, "step": 31592500 }, { "epoch": 91.45, "learning_rate": 4.292064190440904e-06, "loss": 1.7773, "step": 31593000 }, { "epoch": 91.45, "learning_rate": 4.291340542793627e-06, "loss": 1.7853, "step": 31593500 }, { "epoch": 91.45, "learning_rate": 4.290618342441645e-06, "loss": 1.7981, "step": 31594000 }, { "epoch": 91.45, "learning_rate": 4.2898946947943685e-06, "loss": 1.787, "step": 31594500 }, { "epoch": 91.45, "learning_rate": 4.2891710471470916e-06, "loss": 1.7497, "step": 31595000 }, { "epoch": 91.46, "learning_rate": 4.288447399499815e-06, "loss": 1.7771, "step": 31595500 }, { "epoch": 91.46, "learning_rate": 4.287725199147833e-06, "loss": 1.796, "step": 31596000 }, { "epoch": 91.46, "learning_rate": 4.287001551500556e-06, "loss": 1.7845, "step": 31596500 }, { "epoch": 91.46, "learning_rate": 4.28627790385328e-06, "loss": 1.7948, "step": 31597000 }, { "epoch": 91.46, "learning_rate": 4.285554256206002e-06, "loss": 1.7731, "step": 31597500 }, { "epoch": 91.46, "learning_rate": 4.284830608558726e-06, "loss": 1.7653, "step": 31598000 }, { "epoch": 91.46, "learning_rate": 4.284106960911448e-06, "loss": 1.7782, "step": 31598500 }, { "epoch": 91.47, "learning_rate": 4.283384760559467e-06, "loss": 1.7908, "step": 31599000 }, { "epoch": 91.47, "learning_rate": 4.282661112912189e-06, "loss": 1.8117, "step": 31599500 }, { "epoch": 91.47, "learning_rate": 4.281937465264913e-06, "loss": 1.7874, "step": 31600000 }, { "epoch": 91.47, "learning_rate": 4.281213817617636e-06, "loss": 1.7772, "step": 31600500 }, { "epoch": 91.47, "learning_rate": 4.280490169970359e-06, "loss": 1.7589, "step": 31601000 }, { "epoch": 91.47, "learning_rate": 4.279767969618377e-06, "loss": 1.7981, "step": 31601500 }, { "epoch": 91.47, "learning_rate": 4.279044321971101e-06, "loss": 1.7829, "step": 31602000 }, { "epoch": 91.48, "learning_rate": 4.2783206743238245e-06, "loss": 1.8027, "step": 31602500 }, { "epoch": 91.48, "learning_rate": 4.277597026676547e-06, "loss": 1.7769, "step": 31603000 }, { "epoch": 91.48, "learning_rate": 4.276874826324565e-06, "loss": 1.7877, "step": 31603500 }, { "epoch": 91.48, "learning_rate": 4.276151178677288e-06, "loss": 1.7539, "step": 31604000 }, { "epoch": 91.48, "learning_rate": 4.275427531030012e-06, "loss": 1.7687, "step": 31604500 }, { "epoch": 91.48, "learning_rate": 4.274705330678029e-06, "loss": 1.7392, "step": 31605000 }, { "epoch": 91.48, "learning_rate": 4.273981683030753e-06, "loss": 1.785, "step": 31605500 }, { "epoch": 91.49, "learning_rate": 4.273258035383475e-06, "loss": 1.7813, "step": 31606000 }, { "epoch": 91.49, "learning_rate": 4.272534387736199e-06, "loss": 1.7617, "step": 31606500 }, { "epoch": 91.49, "learning_rate": 4.2718107400889214e-06, "loss": 1.7785, "step": 31607000 }, { "epoch": 91.49, "learning_rate": 4.271087092441645e-06, "loss": 1.801, "step": 31607500 }, { "epoch": 91.49, "learning_rate": 4.270364892089663e-06, "loss": 1.7267, "step": 31608000 }, { "epoch": 91.49, "learning_rate": 4.2696412444423865e-06, "loss": 1.7803, "step": 31608500 }, { "epoch": 91.5, "learning_rate": 4.26891759679511e-06, "loss": 1.7599, "step": 31609000 }, { "epoch": 91.5, "learning_rate": 4.268193949147833e-06, "loss": 1.7862, "step": 31609500 }, { "epoch": 91.5, "learning_rate": 4.267470301500557e-06, "loss": 1.798, "step": 31610000 }, { "epoch": 91.5, "learning_rate": 4.266746653853279e-06, "loss": 1.7696, "step": 31610500 }, { "epoch": 91.5, "learning_rate": 4.266023006206003e-06, "loss": 1.7599, "step": 31611000 }, { "epoch": 91.5, "learning_rate": 4.265299358558726e-06, "loss": 1.7825, "step": 31611500 }, { "epoch": 91.5, "learning_rate": 4.264577158206744e-06, "loss": 1.7918, "step": 31612000 }, { "epoch": 91.51, "learning_rate": 4.263853510559466e-06, "loss": 1.786, "step": 31612500 }, { "epoch": 91.51, "learning_rate": 4.26312986291219e-06, "loss": 1.7566, "step": 31613000 }, { "epoch": 91.51, "learning_rate": 4.262406215264913e-06, "loss": 1.7843, "step": 31613500 }, { "epoch": 91.51, "learning_rate": 4.261682567617636e-06, "loss": 1.7709, "step": 31614000 }, { "epoch": 91.51, "learning_rate": 4.26095891997036e-06, "loss": 1.7859, "step": 31614500 }, { "epoch": 91.51, "learning_rate": 4.2602352723230824e-06, "loss": 1.7686, "step": 31615000 }, { "epoch": 91.51, "learning_rate": 4.2595130719711005e-06, "loss": 1.7657, "step": 31615500 }, { "epoch": 91.52, "learning_rate": 4.258789424323824e-06, "loss": 1.7744, "step": 31616000 }, { "epoch": 91.52, "learning_rate": 4.2580657766765475e-06, "loss": 1.7954, "step": 31616500 }, { "epoch": 91.52, "learning_rate": 4.25734212902927e-06, "loss": 1.7667, "step": 31617000 }, { "epoch": 91.52, "learning_rate": 4.256618481381994e-06, "loss": 1.7861, "step": 31617500 }, { "epoch": 91.52, "learning_rate": 4.255897728325306e-06, "loss": 1.7971, "step": 31618000 }, { "epoch": 91.52, "learning_rate": 4.255174080678029e-06, "loss": 1.7812, "step": 31618500 }, { "epoch": 91.52, "learning_rate": 4.254450433030752e-06, "loss": 1.7753, "step": 31619000 }, { "epoch": 91.53, "learning_rate": 4.253726785383476e-06, "loss": 1.7778, "step": 31619500 }, { "epoch": 91.53, "learning_rate": 4.253003137736198e-06, "loss": 1.7884, "step": 31620000 }, { "epoch": 91.53, "learning_rate": 4.252279490088922e-06, "loss": 1.7634, "step": 31620500 }, { "epoch": 91.53, "learning_rate": 4.251555842441645e-06, "loss": 1.765, "step": 31621000 }, { "epoch": 91.53, "learning_rate": 4.250833642089663e-06, "loss": 1.7968, "step": 31621500 }, { "epoch": 91.53, "learning_rate": 4.2501099944423865e-06, "loss": 1.7732, "step": 31622000 }, { "epoch": 91.53, "learning_rate": 4.2493863467951096e-06, "loss": 1.7683, "step": 31622500 }, { "epoch": 91.54, "learning_rate": 4.248662699147833e-06, "loss": 1.7934, "step": 31623000 }, { "epoch": 91.54, "learning_rate": 4.247939051500556e-06, "loss": 1.7904, "step": 31623500 }, { "epoch": 91.54, "learning_rate": 4.24721540385328e-06, "loss": 1.7707, "step": 31624000 }, { "epoch": 91.54, "learning_rate": 4.246491756206003e-06, "loss": 1.7848, "step": 31624500 }, { "epoch": 91.54, "learning_rate": 4.24576955585402e-06, "loss": 1.7878, "step": 31625000 }, { "epoch": 91.54, "learning_rate": 4.245045908206743e-06, "loss": 1.781, "step": 31625500 }, { "epoch": 91.54, "learning_rate": 4.244322260559467e-06, "loss": 1.7961, "step": 31626000 }, { "epoch": 91.55, "learning_rate": 4.243600060207484e-06, "loss": 1.7308, "step": 31626500 }, { "epoch": 91.55, "learning_rate": 4.242876412560208e-06, "loss": 1.8171, "step": 31627000 }, { "epoch": 91.55, "learning_rate": 4.242152764912931e-06, "loss": 1.7734, "step": 31627500 }, { "epoch": 91.55, "learning_rate": 4.241429117265654e-06, "loss": 1.8072, "step": 31628000 }, { "epoch": 91.55, "learning_rate": 4.240705469618377e-06, "loss": 1.7786, "step": 31628500 }, { "epoch": 91.55, "learning_rate": 4.2399818219711005e-06, "loss": 1.7937, "step": 31629000 }, { "epoch": 91.55, "learning_rate": 4.2392581743238236e-06, "loss": 1.8024, "step": 31629500 }, { "epoch": 91.56, "learning_rate": 4.238535973971842e-06, "loss": 1.7873, "step": 31630000 }, { "epoch": 91.56, "learning_rate": 4.237812326324565e-06, "loss": 1.7829, "step": 31630500 }, { "epoch": 91.56, "learning_rate": 4.237088678677288e-06, "loss": 1.8033, "step": 31631000 }, { "epoch": 91.56, "learning_rate": 4.236365031030011e-06, "loss": 1.7858, "step": 31631500 }, { "epoch": 91.56, "learning_rate": 4.235641383382735e-06, "loss": 1.7503, "step": 31632000 }, { "epoch": 91.56, "learning_rate": 4.234917735735458e-06, "loss": 1.7907, "step": 31632500 }, { "epoch": 91.56, "learning_rate": 4.234194088088181e-06, "loss": 1.7976, "step": 31633000 }, { "epoch": 91.57, "learning_rate": 4.233470440440904e-06, "loss": 1.7822, "step": 31633500 }, { "epoch": 91.57, "learning_rate": 4.232746792793627e-06, "loss": 1.7487, "step": 31634000 }, { "epoch": 91.57, "learning_rate": 4.232023145146351e-06, "loss": 1.7928, "step": 31634500 }, { "epoch": 91.57, "learning_rate": 4.231299497499074e-06, "loss": 1.7533, "step": 31635000 }, { "epoch": 91.57, "learning_rate": 4.230577297147091e-06, "loss": 1.7898, "step": 31635500 }, { "epoch": 91.57, "learning_rate": 4.2298536494998145e-06, "loss": 1.7728, "step": 31636000 }, { "epoch": 91.57, "learning_rate": 4.229130001852538e-06, "loss": 1.7875, "step": 31636500 }, { "epoch": 91.58, "learning_rate": 4.2284063542052615e-06, "loss": 1.7973, "step": 31637000 }, { "epoch": 91.58, "learning_rate": 4.227682706557985e-06, "loss": 1.774, "step": 31637500 }, { "epoch": 91.58, "learning_rate": 4.226959058910708e-06, "loss": 1.7892, "step": 31638000 }, { "epoch": 91.58, "learning_rate": 4.226235411263431e-06, "loss": 1.7596, "step": 31638500 }, { "epoch": 91.58, "learning_rate": 4.225511763616155e-06, "loss": 1.7767, "step": 31639000 }, { "epoch": 91.58, "learning_rate": 4.224789563264172e-06, "loss": 1.7867, "step": 31639500 }, { "epoch": 91.58, "learning_rate": 4.224065915616896e-06, "loss": 1.7628, "step": 31640000 }, { "epoch": 91.59, "learning_rate": 4.223342267969618e-06, "loss": 1.8039, "step": 31640500 }, { "epoch": 91.59, "learning_rate": 4.222618620322342e-06, "loss": 1.8028, "step": 31641000 }, { "epoch": 91.59, "learning_rate": 4.221894972675065e-06, "loss": 1.8021, "step": 31641500 }, { "epoch": 91.59, "learning_rate": 4.221171325027788e-06, "loss": 1.778, "step": 31642000 }, { "epoch": 91.59, "learning_rate": 4.220447677380512e-06, "loss": 1.7794, "step": 31642500 }, { "epoch": 91.59, "learning_rate": 4.219724029733234e-06, "loss": 1.7796, "step": 31643000 }, { "epoch": 91.59, "learning_rate": 4.219000382085958e-06, "loss": 1.7995, "step": 31643500 }, { "epoch": 91.6, "learning_rate": 4.2182781817339755e-06, "loss": 1.7779, "step": 31644000 }, { "epoch": 91.6, "learning_rate": 4.2175545340866994e-06, "loss": 1.7677, "step": 31644500 }, { "epoch": 91.6, "learning_rate": 4.216830886439422e-06, "loss": 1.7551, "step": 31645000 }, { "epoch": 91.6, "learning_rate": 4.216108686087441e-06, "loss": 1.7878, "step": 31645500 }, { "epoch": 91.6, "learning_rate": 4.215385038440163e-06, "loss": 1.7474, "step": 31646000 }, { "epoch": 91.6, "learning_rate": 4.214661390792887e-06, "loss": 1.7999, "step": 31646500 }, { "epoch": 91.61, "learning_rate": 4.213937743145609e-06, "loss": 1.7819, "step": 31647000 }, { "epoch": 91.61, "learning_rate": 4.213214095498333e-06, "loss": 1.7909, "step": 31647500 }, { "epoch": 91.61, "learning_rate": 4.212490447851056e-06, "loss": 1.7919, "step": 31648000 }, { "epoch": 91.61, "learning_rate": 4.211766800203779e-06, "loss": 1.793, "step": 31648500 }, { "epoch": 91.61, "learning_rate": 4.211044599851797e-06, "loss": 1.7901, "step": 31649000 }, { "epoch": 91.61, "learning_rate": 4.21032095220452e-06, "loss": 1.8138, "step": 31649500 }, { "epoch": 91.61, "learning_rate": 4.209597304557244e-06, "loss": 1.8051, "step": 31650000 }, { "epoch": 91.62, "learning_rate": 4.2088736569099664e-06, "loss": 1.7532, "step": 31650500 }, { "epoch": 91.62, "learning_rate": 4.20815000926269e-06, "loss": 1.8137, "step": 31651000 }, { "epoch": 91.62, "learning_rate": 4.2074263616154134e-06, "loss": 1.7928, "step": 31651500 }, { "epoch": 91.62, "learning_rate": 4.2067027139681365e-06, "loss": 1.7922, "step": 31652000 }, { "epoch": 91.62, "learning_rate": 4.2059790663208604e-06, "loss": 1.7838, "step": 31652500 }, { "epoch": 91.62, "learning_rate": 4.205255418673583e-06, "loss": 1.7806, "step": 31653000 }, { "epoch": 91.62, "learning_rate": 4.204531771026307e-06, "loss": 1.7842, "step": 31653500 }, { "epoch": 91.63, "learning_rate": 4.20380812337903e-06, "loss": 1.7696, "step": 31654000 }, { "epoch": 91.63, "learning_rate": 4.203085923027048e-06, "loss": 1.7767, "step": 31654500 }, { "epoch": 91.63, "learning_rate": 4.20236227537977e-06, "loss": 1.7809, "step": 31655000 }, { "epoch": 91.63, "learning_rate": 4.201638627732494e-06, "loss": 1.7829, "step": 31655500 }, { "epoch": 91.63, "learning_rate": 4.200914980085217e-06, "loss": 1.7801, "step": 31656000 }, { "epoch": 91.63, "learning_rate": 4.20019133243794e-06, "loss": 1.7479, "step": 31656500 }, { "epoch": 91.63, "learning_rate": 4.199469132085957e-06, "loss": 1.802, "step": 31657000 }, { "epoch": 91.64, "learning_rate": 4.198745484438681e-06, "loss": 1.7821, "step": 31657500 }, { "epoch": 91.64, "learning_rate": 4.1980232840866985e-06, "loss": 1.7689, "step": 31658000 }, { "epoch": 91.64, "learning_rate": 4.1972996364394225e-06, "loss": 1.7847, "step": 31658500 }, { "epoch": 91.64, "learning_rate": 4.1965759887921455e-06, "loss": 1.7892, "step": 31659000 }, { "epoch": 91.64, "learning_rate": 4.195853788440164e-06, "loss": 1.7869, "step": 31659500 }, { "epoch": 91.64, "learning_rate": 4.195130140792886e-06, "loss": 1.7738, "step": 31660000 }, { "epoch": 91.64, "learning_rate": 4.19440649314561e-06, "loss": 1.7773, "step": 31660500 }, { "epoch": 91.65, "learning_rate": 4.193682845498333e-06, "loss": 1.7668, "step": 31661000 }, { "epoch": 91.65, "learning_rate": 4.192959197851056e-06, "loss": 1.7941, "step": 31661500 }, { "epoch": 91.65, "learning_rate": 4.19223555020378e-06, "loss": 1.7655, "step": 31662000 }, { "epoch": 91.65, "learning_rate": 4.191511902556502e-06, "loss": 1.7765, "step": 31662500 }, { "epoch": 91.65, "learning_rate": 4.190788254909226e-06, "loss": 1.7769, "step": 31663000 }, { "epoch": 91.65, "learning_rate": 4.190064607261949e-06, "loss": 1.7641, "step": 31663500 }, { "epoch": 91.65, "learning_rate": 4.189340959614672e-06, "loss": 1.805, "step": 31664000 }, { "epoch": 91.66, "learning_rate": 4.188617311967395e-06, "loss": 1.7872, "step": 31664500 }, { "epoch": 91.66, "learning_rate": 4.187895111615413e-06, "loss": 1.7729, "step": 31665000 }, { "epoch": 91.66, "learning_rate": 4.1871714639681365e-06, "loss": 1.7742, "step": 31665500 }, { "epoch": 91.66, "learning_rate": 4.1864478163208595e-06, "loss": 1.787, "step": 31666000 }, { "epoch": 91.66, "learning_rate": 4.1857241686735835e-06, "loss": 1.7907, "step": 31666500 }, { "epoch": 91.66, "learning_rate": 4.1850005210263066e-06, "loss": 1.8008, "step": 31667000 }, { "epoch": 91.66, "learning_rate": 4.18427687337903e-06, "loss": 1.7965, "step": 31667500 }, { "epoch": 91.67, "learning_rate": 4.183553225731753e-06, "loss": 1.7787, "step": 31668000 }, { "epoch": 91.67, "learning_rate": 4.182829578084476e-06, "loss": 1.7886, "step": 31668500 }, { "epoch": 91.67, "learning_rate": 4.182105930437199e-06, "loss": 1.7996, "step": 31669000 }, { "epoch": 91.67, "learning_rate": 4.181382282789923e-06, "loss": 1.7655, "step": 31669500 }, { "epoch": 91.67, "learning_rate": 4.180658635142646e-06, "loss": 1.7732, "step": 31670000 }, { "epoch": 91.67, "learning_rate": 4.179937882085958e-06, "loss": 1.7743, "step": 31670500 }, { "epoch": 91.67, "learning_rate": 4.179214234438681e-06, "loss": 1.7764, "step": 31671000 }, { "epoch": 91.68, "learning_rate": 4.178490586791404e-06, "loss": 1.7507, "step": 31671500 }, { "epoch": 91.68, "learning_rate": 4.177766939144127e-06, "loss": 1.7719, "step": 31672000 }, { "epoch": 91.68, "learning_rate": 4.1770432914968505e-06, "loss": 1.756, "step": 31672500 }, { "epoch": 91.68, "learning_rate": 4.176319643849574e-06, "loss": 1.7592, "step": 31673000 }, { "epoch": 91.68, "learning_rate": 4.1755959962022975e-06, "loss": 1.7609, "step": 31673500 }, { "epoch": 91.68, "learning_rate": 4.1748723485550206e-06, "loss": 1.7613, "step": 31674000 }, { "epoch": 91.68, "learning_rate": 4.174148700907744e-06, "loss": 1.7965, "step": 31674500 }, { "epoch": 91.69, "learning_rate": 4.173426500555762e-06, "loss": 1.7756, "step": 31675000 }, { "epoch": 91.69, "learning_rate": 4.172702852908485e-06, "loss": 1.8089, "step": 31675500 }, { "epoch": 91.69, "learning_rate": 4.171979205261208e-06, "loss": 1.7683, "step": 31676000 }, { "epoch": 91.69, "learning_rate": 4.171257004909226e-06, "loss": 1.7742, "step": 31676500 }, { "epoch": 91.69, "learning_rate": 4.170533357261949e-06, "loss": 1.7951, "step": 31677000 }, { "epoch": 91.69, "learning_rate": 4.169809709614672e-06, "loss": 1.7536, "step": 31677500 }, { "epoch": 91.69, "learning_rate": 4.169086061967395e-06, "loss": 1.7736, "step": 31678000 }, { "epoch": 91.7, "learning_rate": 4.168362414320118e-06, "loss": 1.7857, "step": 31678500 }, { "epoch": 91.7, "learning_rate": 4.167638766672842e-06, "loss": 1.7773, "step": 31679000 }, { "epoch": 91.7, "learning_rate": 4.1669165663208595e-06, "loss": 1.7809, "step": 31679500 }, { "epoch": 91.7, "learning_rate": 4.1661929186735834e-06, "loss": 1.7895, "step": 31680000 }, { "epoch": 91.7, "learning_rate": 4.1654692710263065e-06, "loss": 1.7853, "step": 31680500 }, { "epoch": 91.7, "learning_rate": 4.16474562337903e-06, "loss": 1.7722, "step": 31681000 }, { "epoch": 91.7, "learning_rate": 4.164021975731753e-06, "loss": 1.7745, "step": 31681500 }, { "epoch": 91.71, "learning_rate": 4.163298328084476e-06, "loss": 1.8042, "step": 31682000 }, { "epoch": 91.71, "learning_rate": 4.1625746804372e-06, "loss": 1.7876, "step": 31682500 }, { "epoch": 91.71, "learning_rate": 4.161852480085217e-06, "loss": 1.7947, "step": 31683000 }, { "epoch": 91.71, "learning_rate": 4.16112883243794e-06, "loss": 1.7981, "step": 31683500 }, { "epoch": 91.71, "learning_rate": 4.160405184790663e-06, "loss": 1.7826, "step": 31684000 }, { "epoch": 91.71, "learning_rate": 4.159681537143387e-06, "loss": 1.7738, "step": 31684500 }, { "epoch": 91.72, "learning_rate": 4.158957889496109e-06, "loss": 1.7794, "step": 31685000 }, { "epoch": 91.72, "learning_rate": 4.158234241848833e-06, "loss": 1.7757, "step": 31685500 }, { "epoch": 91.72, "learning_rate": 4.157510594201556e-06, "loss": 1.7809, "step": 31686000 }, { "epoch": 91.72, "learning_rate": 4.156788393849574e-06, "loss": 1.8082, "step": 31686500 }, { "epoch": 91.72, "learning_rate": 4.1560647462022974e-06, "loss": 1.7612, "step": 31687000 }, { "epoch": 91.72, "learning_rate": 4.1553410985550205e-06, "loss": 1.7741, "step": 31687500 }, { "epoch": 91.72, "learning_rate": 4.154617450907744e-06, "loss": 1.7904, "step": 31688000 }, { "epoch": 91.73, "learning_rate": 4.153893803260467e-06, "loss": 1.7968, "step": 31688500 }, { "epoch": 91.73, "learning_rate": 4.153170155613191e-06, "loss": 1.7823, "step": 31689000 }, { "epoch": 91.73, "learning_rate": 4.152446507965913e-06, "loss": 1.7784, "step": 31689500 }, { "epoch": 91.73, "learning_rate": 4.151722860318637e-06, "loss": 1.7741, "step": 31690000 }, { "epoch": 91.73, "learning_rate": 4.15099921267136e-06, "loss": 1.768, "step": 31690500 }, { "epoch": 91.73, "learning_rate": 4.150277012319378e-06, "loss": 1.7893, "step": 31691000 }, { "epoch": 91.73, "learning_rate": 4.149553364672101e-06, "loss": 1.7444, "step": 31691500 }, { "epoch": 91.74, "learning_rate": 4.148829717024824e-06, "loss": 1.7816, "step": 31692000 }, { "epoch": 91.74, "learning_rate": 4.148107516672841e-06, "loss": 1.7877, "step": 31692500 }, { "epoch": 91.74, "learning_rate": 4.147383869025565e-06, "loss": 1.7493, "step": 31693000 }, { "epoch": 91.74, "learning_rate": 4.146660221378288e-06, "loss": 1.7737, "step": 31693500 }, { "epoch": 91.74, "learning_rate": 4.1459365737310114e-06, "loss": 1.7901, "step": 31694000 }, { "epoch": 91.74, "learning_rate": 4.145212926083735e-06, "loss": 1.7919, "step": 31694500 }, { "epoch": 91.74, "learning_rate": 4.144489278436458e-06, "loss": 1.788, "step": 31695000 }, { "epoch": 91.75, "learning_rate": 4.1437656307891815e-06, "loss": 1.7909, "step": 31695500 }, { "epoch": 91.75, "learning_rate": 4.143041983141905e-06, "loss": 1.7783, "step": 31696000 }, { "epoch": 91.75, "learning_rate": 4.142318335494628e-06, "loss": 1.7557, "step": 31696500 }, { "epoch": 91.75, "learning_rate": 4.141596135142645e-06, "loss": 1.7814, "step": 31697000 }, { "epoch": 91.75, "learning_rate": 4.140872487495369e-06, "loss": 1.7947, "step": 31697500 }, { "epoch": 91.75, "learning_rate": 4.140148839848093e-06, "loss": 1.7819, "step": 31698000 }, { "epoch": 91.75, "learning_rate": 4.139425192200815e-06, "loss": 1.7516, "step": 31698500 }, { "epoch": 91.76, "learning_rate": 4.138702991848833e-06, "loss": 1.7854, "step": 31699000 }, { "epoch": 91.76, "learning_rate": 4.137979344201556e-06, "loss": 1.82, "step": 31699500 }, { "epoch": 91.76, "learning_rate": 4.13725569655428e-06, "loss": 1.7883, "step": 31700000 }, { "epoch": 91.76, "learning_rate": 4.136532048907002e-06, "loss": 1.7715, "step": 31700500 }, { "epoch": 91.76, "learning_rate": 4.135809848555021e-06, "loss": 1.7875, "step": 31701000 }, { "epoch": 91.76, "learning_rate": 4.1350862009077435e-06, "loss": 1.8039, "step": 31701500 }, { "epoch": 91.76, "learning_rate": 4.1343625532604675e-06, "loss": 1.7822, "step": 31702000 }, { "epoch": 91.77, "learning_rate": 4.133640352908485e-06, "loss": 1.776, "step": 31702500 }, { "epoch": 91.77, "learning_rate": 4.132916705261209e-06, "loss": 1.784, "step": 31703000 }, { "epoch": 91.77, "learning_rate": 4.132193057613931e-06, "loss": 1.7812, "step": 31703500 }, { "epoch": 91.77, "learning_rate": 4.131469409966655e-06, "loss": 1.7705, "step": 31704000 }, { "epoch": 91.77, "learning_rate": 4.130745762319378e-06, "loss": 1.7858, "step": 31704500 }, { "epoch": 91.77, "learning_rate": 4.130022114672101e-06, "loss": 1.7972, "step": 31705000 }, { "epoch": 91.77, "learning_rate": 4.129298467024824e-06, "loss": 1.7994, "step": 31705500 }, { "epoch": 91.78, "learning_rate": 4.128574819377547e-06, "loss": 1.7713, "step": 31706000 }, { "epoch": 91.78, "learning_rate": 4.127851171730271e-06, "loss": 1.7779, "step": 31706500 }, { "epoch": 91.78, "learning_rate": 4.127128971378288e-06, "loss": 1.7543, "step": 31707000 }, { "epoch": 91.78, "learning_rate": 4.126405323731012e-06, "loss": 1.8031, "step": 31707500 }, { "epoch": 91.78, "learning_rate": 4.1256816760837345e-06, "loss": 1.7615, "step": 31708000 }, { "epoch": 91.78, "learning_rate": 4.124958028436458e-06, "loss": 1.7807, "step": 31708500 }, { "epoch": 91.78, "learning_rate": 4.1242343807891815e-06, "loss": 1.7805, "step": 31709000 }, { "epoch": 91.79, "learning_rate": 4.1235121804371996e-06, "loss": 1.7682, "step": 31709500 }, { "epoch": 91.79, "learning_rate": 4.122788532789923e-06, "loss": 1.7723, "step": 31710000 }, { "epoch": 91.79, "learning_rate": 4.122064885142646e-06, "loss": 1.7943, "step": 31710500 }, { "epoch": 91.79, "learning_rate": 4.121341237495369e-06, "loss": 1.7744, "step": 31711000 }, { "epoch": 91.79, "learning_rate": 4.120617589848092e-06, "loss": 1.776, "step": 31711500 }, { "epoch": 91.79, "learning_rate": 4.11989538949611e-06, "loss": 1.7765, "step": 31712000 }, { "epoch": 91.79, "learning_rate": 4.119171741848833e-06, "loss": 1.7745, "step": 31712500 }, { "epoch": 91.8, "learning_rate": 4.118448094201556e-06, "loss": 1.7728, "step": 31713000 }, { "epoch": 91.8, "learning_rate": 4.117724446554279e-06, "loss": 1.7778, "step": 31713500 }, { "epoch": 91.8, "learning_rate": 4.117002246202297e-06, "loss": 1.7913, "step": 31714000 }, { "epoch": 91.8, "learning_rate": 4.11627859855502e-06, "loss": 1.78, "step": 31714500 }, { "epoch": 91.8, "learning_rate": 4.115554950907744e-06, "loss": 1.7757, "step": 31715000 }, { "epoch": 91.8, "learning_rate": 4.1148313032604666e-06, "loss": 1.778, "step": 31715500 }, { "epoch": 91.8, "learning_rate": 4.1141076556131905e-06, "loss": 1.7819, "step": 31716000 }, { "epoch": 91.81, "learning_rate": 4.113384007965914e-06, "loss": 1.8045, "step": 31716500 }, { "epoch": 91.81, "learning_rate": 4.112661807613932e-06, "loss": 1.7831, "step": 31717000 }, { "epoch": 91.81, "learning_rate": 4.111938159966655e-06, "loss": 1.8053, "step": 31717500 }, { "epoch": 91.81, "learning_rate": 4.111214512319378e-06, "loss": 1.773, "step": 31718000 }, { "epoch": 91.81, "learning_rate": 4.110492311967395e-06, "loss": 1.7733, "step": 31718500 }, { "epoch": 91.81, "learning_rate": 4.109768664320119e-06, "loss": 1.7655, "step": 31719000 }, { "epoch": 91.81, "learning_rate": 4.109045016672842e-06, "loss": 1.79, "step": 31719500 }, { "epoch": 91.82, "learning_rate": 4.108321369025565e-06, "loss": 1.7858, "step": 31720000 }, { "epoch": 91.82, "learning_rate": 4.107597721378288e-06, "loss": 1.7719, "step": 31720500 }, { "epoch": 91.82, "learning_rate": 4.106875521026306e-06, "loss": 1.7667, "step": 31721000 }, { "epoch": 91.82, "learning_rate": 4.1061518733790294e-06, "loss": 1.801, "step": 31721500 }, { "epoch": 91.82, "learning_rate": 4.1054282257317525e-06, "loss": 1.7574, "step": 31722000 }, { "epoch": 91.82, "learning_rate": 4.104704578084476e-06, "loss": 1.8149, "step": 31722500 }, { "epoch": 91.83, "learning_rate": 4.1039809304371995e-06, "loss": 1.7682, "step": 31723000 }, { "epoch": 91.83, "learning_rate": 4.103257282789923e-06, "loss": 1.7872, "step": 31723500 }, { "epoch": 91.83, "learning_rate": 4.102533635142646e-06, "loss": 1.785, "step": 31724000 }, { "epoch": 91.83, "learning_rate": 4.101809987495369e-06, "loss": 1.8106, "step": 31724500 }, { "epoch": 91.83, "learning_rate": 4.101087787143387e-06, "loss": 1.787, "step": 31725000 }, { "epoch": 91.83, "learning_rate": 4.10036413949611e-06, "loss": 1.8119, "step": 31725500 }, { "epoch": 91.83, "learning_rate": 4.099640491848833e-06, "loss": 1.7893, "step": 31726000 }, { "epoch": 91.84, "learning_rate": 4.098916844201556e-06, "loss": 1.8007, "step": 31726500 }, { "epoch": 91.84, "learning_rate": 4.098193196554279e-06, "loss": 1.7805, "step": 31727000 }, { "epoch": 91.84, "learning_rate": 4.097469548907003e-06, "loss": 1.7798, "step": 31727500 }, { "epoch": 91.84, "learning_rate": 4.096745901259726e-06, "loss": 1.7922, "step": 31728000 }, { "epoch": 91.84, "learning_rate": 4.096022253612449e-06, "loss": 1.7934, "step": 31728500 }, { "epoch": 91.84, "learning_rate": 4.0953000532604665e-06, "loss": 1.7696, "step": 31729000 }, { "epoch": 91.84, "learning_rate": 4.0945764056131904e-06, "loss": 1.7797, "step": 31729500 }, { "epoch": 91.85, "learning_rate": 4.0938527579659135e-06, "loss": 1.7963, "step": 31730000 }, { "epoch": 91.85, "learning_rate": 4.093130557613932e-06, "loss": 1.8004, "step": 31730500 }, { "epoch": 91.85, "learning_rate": 4.092406909966655e-06, "loss": 1.7747, "step": 31731000 }, { "epoch": 91.85, "learning_rate": 4.091683262319378e-06, "loss": 1.771, "step": 31731500 }, { "epoch": 91.85, "learning_rate": 4.090959614672101e-06, "loss": 1.7797, "step": 31732000 }, { "epoch": 91.85, "learning_rate": 4.090235967024824e-06, "loss": 1.7849, "step": 31732500 }, { "epoch": 91.85, "learning_rate": 4.089512319377548e-06, "loss": 1.7569, "step": 31733000 }, { "epoch": 91.86, "learning_rate": 4.08878867173027e-06, "loss": 1.7908, "step": 31733500 }, { "epoch": 91.86, "learning_rate": 4.088065024082994e-06, "loss": 1.7781, "step": 31734000 }, { "epoch": 91.86, "learning_rate": 4.087341376435717e-06, "loss": 1.7952, "step": 31734500 }, { "epoch": 91.86, "learning_rate": 4.08661772878844e-06, "loss": 1.8023, "step": 31735000 }, { "epoch": 91.86, "learning_rate": 4.085895528436458e-06, "loss": 1.7601, "step": 31735500 }, { "epoch": 91.86, "learning_rate": 4.085171880789181e-06, "loss": 1.7528, "step": 31736000 }, { "epoch": 91.86, "learning_rate": 4.0844482331419045e-06, "loss": 1.7625, "step": 31736500 }, { "epoch": 91.87, "learning_rate": 4.0837260327899226e-06, "loss": 1.7798, "step": 31737000 }, { "epoch": 91.87, "learning_rate": 4.083002385142646e-06, "loss": 1.779, "step": 31737500 }, { "epoch": 91.87, "learning_rate": 4.082278737495369e-06, "loss": 1.7773, "step": 31738000 }, { "epoch": 91.87, "learning_rate": 4.081555089848093e-06, "loss": 1.7872, "step": 31738500 }, { "epoch": 91.87, "learning_rate": 4.080831442200815e-06, "loss": 1.7545, "step": 31739000 }, { "epoch": 91.87, "learning_rate": 4.080107794553539e-06, "loss": 1.7755, "step": 31739500 }, { "epoch": 91.87, "learning_rate": 4.079384146906261e-06, "loss": 1.7956, "step": 31740000 }, { "epoch": 91.88, "learning_rate": 4.078660499258985e-06, "loss": 1.7724, "step": 31740500 }, { "epoch": 91.88, "learning_rate": 4.077936851611709e-06, "loss": 1.8099, "step": 31741000 }, { "epoch": 91.88, "learning_rate": 4.077213203964431e-06, "loss": 1.789, "step": 31741500 }, { "epoch": 91.88, "learning_rate": 4.076491003612449e-06, "loss": 1.7989, "step": 31742000 }, { "epoch": 91.88, "learning_rate": 4.075767355965172e-06, "loss": 1.7848, "step": 31742500 }, { "epoch": 91.88, "learning_rate": 4.075043708317896e-06, "loss": 1.7705, "step": 31743000 }, { "epoch": 91.88, "learning_rate": 4.0743200606706185e-06, "loss": 1.7719, "step": 31743500 }, { "epoch": 91.89, "learning_rate": 4.073596413023342e-06, "loss": 1.8003, "step": 31744000 }, { "epoch": 91.89, "learning_rate": 4.07287421267136e-06, "loss": 1.7595, "step": 31744500 }, { "epoch": 91.89, "learning_rate": 4.0721505650240836e-06, "loss": 1.7873, "step": 31745000 }, { "epoch": 91.89, "learning_rate": 4.071426917376806e-06, "loss": 1.7661, "step": 31745500 }, { "epoch": 91.89, "learning_rate": 4.070704717024825e-06, "loss": 1.7956, "step": 31746000 }, { "epoch": 91.89, "learning_rate": 4.069981069377547e-06, "loss": 1.8133, "step": 31746500 }, { "epoch": 91.89, "learning_rate": 4.069257421730271e-06, "loss": 1.7872, "step": 31747000 }, { "epoch": 91.9, "learning_rate": 4.068533774082994e-06, "loss": 1.7904, "step": 31747500 }, { "epoch": 91.9, "learning_rate": 4.067810126435717e-06, "loss": 1.7847, "step": 31748000 }, { "epoch": 91.9, "learning_rate": 4.067086478788441e-06, "loss": 1.7781, "step": 31748500 }, { "epoch": 91.9, "learning_rate": 4.066364278436458e-06, "loss": 1.8101, "step": 31749000 }, { "epoch": 91.9, "learning_rate": 4.065640630789181e-06, "loss": 1.7775, "step": 31749500 }, { "epoch": 91.9, "learning_rate": 4.064916983141904e-06, "loss": 1.8081, "step": 31750000 }, { "epoch": 91.9, "learning_rate": 4.064193335494628e-06, "loss": 1.7847, "step": 31750500 }, { "epoch": 91.91, "learning_rate": 4.0634696878473506e-06, "loss": 1.7604, "step": 31751000 }, { "epoch": 91.91, "learning_rate": 4.0627460402000745e-06, "loss": 1.7835, "step": 31751500 }, { "epoch": 91.91, "learning_rate": 4.0620223925527976e-06, "loss": 1.7832, "step": 31752000 }, { "epoch": 91.91, "learning_rate": 4.061298744905521e-06, "loss": 1.7889, "step": 31752500 }, { "epoch": 91.91, "learning_rate": 4.060575097258245e-06, "loss": 1.7975, "step": 31753000 }, { "epoch": 91.91, "learning_rate": 4.059852896906262e-06, "loss": 1.7678, "step": 31753500 }, { "epoch": 91.91, "learning_rate": 4.059129249258985e-06, "loss": 1.7862, "step": 31754000 }, { "epoch": 91.92, "learning_rate": 4.058405601611708e-06, "loss": 1.7791, "step": 31754500 }, { "epoch": 91.92, "learning_rate": 4.057681953964432e-06, "loss": 1.7688, "step": 31755000 }, { "epoch": 91.92, "learning_rate": 4.056958306317154e-06, "loss": 1.7612, "step": 31755500 }, { "epoch": 91.92, "learning_rate": 4.056237553260466e-06, "loss": 1.7991, "step": 31756000 }, { "epoch": 91.92, "learning_rate": 4.05551390561319e-06, "loss": 1.7779, "step": 31756500 }, { "epoch": 91.92, "learning_rate": 4.0547902579659134e-06, "loss": 1.7873, "step": 31757000 }, { "epoch": 91.92, "learning_rate": 4.0540666103186365e-06, "loss": 1.803, "step": 31757500 }, { "epoch": 91.93, "learning_rate": 4.0533429626713604e-06, "loss": 1.7925, "step": 31758000 }, { "epoch": 91.93, "learning_rate": 4.052619315024083e-06, "loss": 1.7819, "step": 31758500 }, { "epoch": 91.93, "learning_rate": 4.051895667376807e-06, "loss": 1.7813, "step": 31759000 }, { "epoch": 91.93, "learning_rate": 4.05117201972953e-06, "loss": 1.7545, "step": 31759500 }, { "epoch": 91.93, "learning_rate": 4.050448372082253e-06, "loss": 1.7587, "step": 31760000 }, { "epoch": 91.93, "learning_rate": 4.049727619025565e-06, "loss": 1.777, "step": 31760500 }, { "epoch": 91.94, "learning_rate": 4.049003971378289e-06, "loss": 1.8038, "step": 31761000 }, { "epoch": 91.94, "learning_rate": 4.048280323731011e-06, "loss": 1.7705, "step": 31761500 }, { "epoch": 91.94, "learning_rate": 4.047556676083735e-06, "loss": 1.7829, "step": 31762000 }, { "epoch": 91.94, "learning_rate": 4.046833028436458e-06, "loss": 1.7783, "step": 31762500 }, { "epoch": 91.94, "learning_rate": 4.046109380789181e-06, "loss": 1.7646, "step": 31763000 }, { "epoch": 91.94, "learning_rate": 4.045385733141904e-06, "loss": 1.7872, "step": 31763500 }, { "epoch": 91.94, "learning_rate": 4.0446620854946274e-06, "loss": 1.7863, "step": 31764000 }, { "epoch": 91.95, "learning_rate": 4.0439398851426455e-06, "loss": 1.7995, "step": 31764500 }, { "epoch": 91.95, "learning_rate": 4.043216237495369e-06, "loss": 1.742, "step": 31765000 }, { "epoch": 91.95, "learning_rate": 4.0424925898480925e-06, "loss": 1.7913, "step": 31765500 }, { "epoch": 91.95, "learning_rate": 4.041768942200816e-06, "loss": 1.7636, "step": 31766000 }, { "epoch": 91.95, "learning_rate": 4.041046741848833e-06, "loss": 1.7792, "step": 31766500 }, { "epoch": 91.95, "learning_rate": 4.040323094201556e-06, "loss": 1.7669, "step": 31767000 }, { "epoch": 91.95, "learning_rate": 4.03959944655428e-06, "loss": 1.8036, "step": 31767500 }, { "epoch": 91.96, "learning_rate": 4.038875798907003e-06, "loss": 1.7857, "step": 31768000 }, { "epoch": 91.96, "learning_rate": 4.038152151259726e-06, "loss": 1.7644, "step": 31768500 }, { "epoch": 91.96, "learning_rate": 4.037429950907744e-06, "loss": 1.7764, "step": 31769000 }, { "epoch": 91.96, "learning_rate": 4.036706303260467e-06, "loss": 1.7955, "step": 31769500 }, { "epoch": 91.96, "learning_rate": 4.03598265561319e-06, "loss": 1.7755, "step": 31770000 }, { "epoch": 91.96, "learning_rate": 4.035259007965913e-06, "loss": 1.7906, "step": 31770500 }, { "epoch": 91.96, "learning_rate": 4.0345368076139315e-06, "loss": 1.7878, "step": 31771000 }, { "epoch": 91.97, "learning_rate": 4.0338131599666546e-06, "loss": 1.7888, "step": 31771500 }, { "epoch": 91.97, "learning_rate": 4.033089512319378e-06, "loss": 1.7847, "step": 31772000 }, { "epoch": 91.97, "learning_rate": 4.032365864672101e-06, "loss": 1.764, "step": 31772500 }, { "epoch": 91.97, "learning_rate": 4.031642217024824e-06, "loss": 1.7787, "step": 31773000 }, { "epoch": 91.97, "learning_rate": 4.030921463968137e-06, "loss": 1.799, "step": 31773500 }, { "epoch": 91.97, "learning_rate": 4.03019781632086e-06, "loss": 1.7864, "step": 31774000 }, { "epoch": 91.97, "learning_rate": 4.029474168673583e-06, "loss": 1.7923, "step": 31774500 }, { "epoch": 91.98, "learning_rate": 4.028750521026306e-06, "loss": 1.7588, "step": 31775000 }, { "epoch": 91.98, "learning_rate": 4.028028320674324e-06, "loss": 1.7767, "step": 31775500 }, { "epoch": 91.98, "learning_rate": 4.027304673027047e-06, "loss": 1.7747, "step": 31776000 }, { "epoch": 91.98, "learning_rate": 4.02658102537977e-06, "loss": 1.7586, "step": 31776500 }, { "epoch": 91.98, "learning_rate": 4.0258573777324935e-06, "loss": 1.8065, "step": 31777000 }, { "epoch": 91.98, "learning_rate": 4.0251337300852166e-06, "loss": 1.7712, "step": 31777500 }, { "epoch": 91.98, "learning_rate": 4.0244100824379405e-06, "loss": 1.7713, "step": 31778000 }, { "epoch": 91.99, "learning_rate": 4.0236864347906636e-06, "loss": 1.7937, "step": 31778500 }, { "epoch": 91.99, "learning_rate": 4.022962787143387e-06, "loss": 1.7784, "step": 31779000 }, { "epoch": 91.99, "learning_rate": 4.022240586791405e-06, "loss": 1.7766, "step": 31779500 }, { "epoch": 91.99, "learning_rate": 4.021516939144128e-06, "loss": 1.8046, "step": 31780000 }, { "epoch": 91.99, "learning_rate": 4.020793291496851e-06, "loss": 1.7828, "step": 31780500 }, { "epoch": 91.99, "learning_rate": 4.020069643849574e-06, "loss": 1.767, "step": 31781000 }, { "epoch": 91.99, "learning_rate": 4.019345996202297e-06, "loss": 1.8142, "step": 31781500 }, { "epoch": 92.0, "learning_rate": 4.018623795850315e-06, "loss": 1.7717, "step": 31782000 }, { "epoch": 92.0, "learning_rate": 4.017900148203038e-06, "loss": 1.7898, "step": 31782500 }, { "epoch": 92.0, "learning_rate": 4.017176500555761e-06, "loss": 1.7933, "step": 31783000 }, { "epoch": 92.0, "eval_accuracy": 0.6915582917948377, "eval_accuracy_mlm": 0.6610626242757264, "eval_accuracy_nsp": 0.8549815988597251, "eval_loss": 2.1977381706237793, "eval_runtime": 331.7608, "eval_samples_per_second": 1315.364, "eval_steps_per_second": 54.808, "step": 31783424 }, { "epoch": 92.0, "learning_rate": 4.016452852908484e-06, "loss": 1.7774, "step": 31783500 }, { "epoch": 92.0, "learning_rate": 4.015729205261208e-06, "loss": 1.7624, "step": 31784000 }, { "epoch": 92.0, "learning_rate": 4.015005557613931e-06, "loss": 1.7785, "step": 31784500 }, { "epoch": 92.0, "learning_rate": 4.0142833572619495e-06, "loss": 1.781, "step": 31785000 }, { "epoch": 92.01, "learning_rate": 4.013562604205262e-06, "loss": 1.7746, "step": 31785500 }, { "epoch": 92.01, "learning_rate": 4.012838956557985e-06, "loss": 1.7737, "step": 31786000 }, { "epoch": 92.01, "learning_rate": 4.012115308910708e-06, "loss": 1.7731, "step": 31786500 }, { "epoch": 92.01, "learning_rate": 4.011391661263431e-06, "loss": 1.7776, "step": 31787000 }, { "epoch": 92.01, "learning_rate": 4.010668013616154e-06, "loss": 1.7744, "step": 31787500 }, { "epoch": 92.01, "learning_rate": 4.009944365968878e-06, "loss": 1.7861, "step": 31788000 }, { "epoch": 92.01, "learning_rate": 4.0092207183216e-06, "loss": 1.7719, "step": 31788500 }, { "epoch": 92.02, "learning_rate": 4.008497070674324e-06, "loss": 1.7908, "step": 31789000 }, { "epoch": 92.02, "learning_rate": 4.007773423027047e-06, "loss": 1.8056, "step": 31789500 }, { "epoch": 92.02, "learning_rate": 4.00704977537977e-06, "loss": 1.7814, "step": 31790000 }, { "epoch": 92.02, "learning_rate": 4.0063261277324934e-06, "loss": 1.7632, "step": 31790500 }, { "epoch": 92.02, "learning_rate": 4.0056024800852165e-06, "loss": 1.7721, "step": 31791000 }, { "epoch": 92.02, "learning_rate": 4.004880279733235e-06, "loss": 1.7718, "step": 31791500 }, { "epoch": 92.02, "learning_rate": 4.004156632085958e-06, "loss": 1.7734, "step": 31792000 }, { "epoch": 92.03, "learning_rate": 4.003432984438682e-06, "loss": 1.7726, "step": 31792500 }, { "epoch": 92.03, "learning_rate": 4.002709336791404e-06, "loss": 1.7881, "step": 31793000 }, { "epoch": 92.03, "learning_rate": 4.001985689144128e-06, "loss": 1.8196, "step": 31793500 }, { "epoch": 92.03, "learning_rate": 4.001263488792145e-06, "loss": 1.7859, "step": 31794000 }, { "epoch": 92.03, "learning_rate": 4.000539841144869e-06, "loss": 1.7975, "step": 31794500 }, { "epoch": 92.03, "learning_rate": 3.999816193497592e-06, "loss": 1.7873, "step": 31795000 }, { "epoch": 92.03, "learning_rate": 3.999092545850315e-06, "loss": 1.7547, "step": 31795500 }, { "epoch": 92.04, "learning_rate": 3.998368898203038e-06, "loss": 1.7804, "step": 31796000 }, { "epoch": 92.04, "learning_rate": 3.997645250555761e-06, "loss": 1.7359, "step": 31796500 }, { "epoch": 92.04, "learning_rate": 3.996923050203779e-06, "loss": 1.7801, "step": 31797000 }, { "epoch": 92.04, "learning_rate": 3.9961994025565025e-06, "loss": 1.766, "step": 31797500 }, { "epoch": 92.04, "learning_rate": 3.99547720220452e-06, "loss": 1.7689, "step": 31798000 }, { "epoch": 92.04, "learning_rate": 3.994753554557244e-06, "loss": 1.769, "step": 31798500 }, { "epoch": 92.05, "learning_rate": 3.994029906909967e-06, "loss": 1.7762, "step": 31799000 }, { "epoch": 92.05, "learning_rate": 3.99330625926269e-06, "loss": 1.7453, "step": 31799500 }, { "epoch": 92.05, "learning_rate": 3.992584058910708e-06, "loss": 1.7695, "step": 31800000 }, { "epoch": 92.05, "learning_rate": 3.991860411263431e-06, "loss": 1.7895, "step": 31800500 }, { "epoch": 92.05, "learning_rate": 3.991138210911448e-06, "loss": 1.779, "step": 31801000 }, { "epoch": 92.05, "learning_rate": 3.990414563264172e-06, "loss": 1.7681, "step": 31801500 }, { "epoch": 92.05, "learning_rate": 3.989690915616895e-06, "loss": 1.775, "step": 31802000 }, { "epoch": 92.06, "learning_rate": 3.988967267969618e-06, "loss": 1.7741, "step": 31802500 }, { "epoch": 92.06, "learning_rate": 3.988243620322342e-06, "loss": 1.7653, "step": 31803000 }, { "epoch": 92.06, "learning_rate": 3.9875199726750645e-06, "loss": 1.784, "step": 31803500 }, { "epoch": 92.06, "learning_rate": 3.986796325027788e-06, "loss": 1.7747, "step": 31804000 }, { "epoch": 92.06, "learning_rate": 3.9860726773805115e-06, "loss": 1.7746, "step": 31804500 }, { "epoch": 92.06, "learning_rate": 3.9853490297332346e-06, "loss": 1.7582, "step": 31805000 }, { "epoch": 92.06, "learning_rate": 3.9846253820859585e-06, "loss": 1.7733, "step": 31805500 }, { "epoch": 92.07, "learning_rate": 3.983901734438681e-06, "loss": 1.7856, "step": 31806000 }, { "epoch": 92.07, "learning_rate": 3.983178086791405e-06, "loss": 1.788, "step": 31806500 }, { "epoch": 92.07, "learning_rate": 3.982455886439422e-06, "loss": 1.7942, "step": 31807000 }, { "epoch": 92.07, "learning_rate": 3.98173368608744e-06, "loss": 1.755, "step": 31807500 }, { "epoch": 92.07, "learning_rate": 3.981010038440163e-06, "loss": 1.7837, "step": 31808000 }, { "epoch": 92.07, "learning_rate": 3.980286390792887e-06, "loss": 1.7656, "step": 31808500 }, { "epoch": 92.07, "learning_rate": 3.979562743145609e-06, "loss": 1.8044, "step": 31809000 }, { "epoch": 92.08, "learning_rate": 3.978839095498333e-06, "loss": 1.7862, "step": 31809500 }, { "epoch": 92.08, "learning_rate": 3.978115447851056e-06, "loss": 1.7822, "step": 31810000 }, { "epoch": 92.08, "learning_rate": 3.977391800203779e-06, "loss": 1.7791, "step": 31810500 }, { "epoch": 92.08, "learning_rate": 3.976668152556503e-06, "loss": 1.7813, "step": 31811000 }, { "epoch": 92.08, "learning_rate": 3.9759445049092255e-06, "loss": 1.7542, "step": 31811500 }, { "epoch": 92.08, "learning_rate": 3.975222304557244e-06, "loss": 1.7789, "step": 31812000 }, { "epoch": 92.08, "learning_rate": 3.974498656909967e-06, "loss": 1.7736, "step": 31812500 }, { "epoch": 92.09, "learning_rate": 3.973775009262691e-06, "loss": 1.7484, "step": 31813000 }, { "epoch": 92.09, "learning_rate": 3.973051361615413e-06, "loss": 1.7913, "step": 31813500 }, { "epoch": 92.09, "learning_rate": 3.972329161263432e-06, "loss": 1.7687, "step": 31814000 }, { "epoch": 92.09, "learning_rate": 3.971605513616154e-06, "loss": 1.7888, "step": 31814500 }, { "epoch": 92.09, "learning_rate": 3.970881865968878e-06, "loss": 1.751, "step": 31815000 }, { "epoch": 92.09, "learning_rate": 3.970158218321601e-06, "loss": 1.7794, "step": 31815500 }, { "epoch": 92.09, "learning_rate": 3.969437465264913e-06, "loss": 1.7545, "step": 31816000 }, { "epoch": 92.1, "learning_rate": 3.968713817617636e-06, "loss": 1.7602, "step": 31816500 }, { "epoch": 92.1, "learning_rate": 3.96799016997036e-06, "loss": 1.7542, "step": 31817000 }, { "epoch": 92.1, "learning_rate": 3.9672665223230825e-06, "loss": 1.7815, "step": 31817500 }, { "epoch": 92.1, "learning_rate": 3.9665428746758064e-06, "loss": 1.7677, "step": 31818000 }, { "epoch": 92.1, "learning_rate": 3.965819227028529e-06, "loss": 1.7692, "step": 31818500 }, { "epoch": 92.1, "learning_rate": 3.965095579381253e-06, "loss": 1.7596, "step": 31819000 }, { "epoch": 92.1, "learning_rate": 3.964371931733976e-06, "loss": 1.7536, "step": 31819500 }, { "epoch": 92.11, "learning_rate": 3.963648284086699e-06, "loss": 1.7836, "step": 31820000 }, { "epoch": 92.11, "learning_rate": 3.962924636439423e-06, "loss": 1.7938, "step": 31820500 }, { "epoch": 92.11, "learning_rate": 3.96220243608744e-06, "loss": 1.777, "step": 31821000 }, { "epoch": 92.11, "learning_rate": 3.961478788440163e-06, "loss": 1.7647, "step": 31821500 }, { "epoch": 92.11, "learning_rate": 3.960755140792886e-06, "loss": 1.7682, "step": 31822000 }, { "epoch": 92.11, "learning_rate": 3.96003149314561e-06, "loss": 1.766, "step": 31822500 }, { "epoch": 92.11, "learning_rate": 3.959307845498333e-06, "loss": 1.7737, "step": 31823000 }, { "epoch": 92.12, "learning_rate": 3.958584197851056e-06, "loss": 1.7814, "step": 31823500 }, { "epoch": 92.12, "learning_rate": 3.957860550203779e-06, "loss": 1.765, "step": 31824000 }, { "epoch": 92.12, "learning_rate": 3.957136902556502e-06, "loss": 1.7516, "step": 31824500 }, { "epoch": 92.12, "learning_rate": 3.956413254909226e-06, "loss": 1.7866, "step": 31825000 }, { "epoch": 92.12, "learning_rate": 3.955689607261949e-06, "loss": 1.767, "step": 31825500 }, { "epoch": 92.12, "learning_rate": 3.954967406909967e-06, "loss": 1.8037, "step": 31826000 }, { "epoch": 92.12, "learning_rate": 3.95424375926269e-06, "loss": 1.7826, "step": 31826500 }, { "epoch": 92.13, "learning_rate": 3.953520111615414e-06, "loss": 1.7884, "step": 31827000 }, { "epoch": 92.13, "learning_rate": 3.952796463968137e-06, "loss": 1.7965, "step": 31827500 }, { "epoch": 92.13, "learning_rate": 3.95207281632086e-06, "loss": 1.7884, "step": 31828000 }, { "epoch": 92.13, "learning_rate": 3.951349168673583e-06, "loss": 1.7941, "step": 31828500 }, { "epoch": 92.13, "learning_rate": 3.950626968321601e-06, "loss": 1.7783, "step": 31829000 }, { "epoch": 92.13, "learning_rate": 3.949903320674324e-06, "loss": 1.7833, "step": 31829500 }, { "epoch": 92.13, "learning_rate": 3.949179673027047e-06, "loss": 1.7641, "step": 31830000 }, { "epoch": 92.14, "learning_rate": 3.94845602537977e-06, "loss": 1.7895, "step": 31830500 }, { "epoch": 92.14, "learning_rate": 3.947732377732494e-06, "loss": 1.7917, "step": 31831000 }, { "epoch": 92.14, "learning_rate": 3.947008730085217e-06, "loss": 1.7834, "step": 31831500 }, { "epoch": 92.14, "learning_rate": 3.9462865297332345e-06, "loss": 1.7667, "step": 31832000 }, { "epoch": 92.14, "learning_rate": 3.9455628820859575e-06, "loss": 1.7741, "step": 31832500 }, { "epoch": 92.14, "learning_rate": 3.9448392344386815e-06, "loss": 1.7959, "step": 31833000 }, { "epoch": 92.14, "learning_rate": 3.9441155867914045e-06, "loss": 1.7557, "step": 31833500 }, { "epoch": 92.15, "learning_rate": 3.943391939144128e-06, "loss": 1.7435, "step": 31834000 }, { "epoch": 92.15, "learning_rate": 3.942669738792146e-06, "loss": 1.8034, "step": 31834500 }, { "epoch": 92.15, "learning_rate": 3.941946091144869e-06, "loss": 1.7937, "step": 31835000 }, { "epoch": 92.15, "learning_rate": 3.941222443497592e-06, "loss": 1.7808, "step": 31835500 }, { "epoch": 92.15, "learning_rate": 3.940498795850315e-06, "loss": 1.7521, "step": 31836000 }, { "epoch": 92.15, "learning_rate": 3.939775148203038e-06, "loss": 1.7633, "step": 31836500 }, { "epoch": 92.16, "learning_rate": 3.939051500555761e-06, "loss": 1.7481, "step": 31837000 }, { "epoch": 92.16, "learning_rate": 3.938327852908485e-06, "loss": 1.7783, "step": 31837500 }, { "epoch": 92.16, "learning_rate": 3.937604205261208e-06, "loss": 1.7848, "step": 31838000 }, { "epoch": 92.16, "learning_rate": 3.93688345220452e-06, "loss": 1.7683, "step": 31838500 }, { "epoch": 92.16, "learning_rate": 3.9361598045572435e-06, "loss": 1.7945, "step": 31839000 }, { "epoch": 92.16, "learning_rate": 3.9354361569099666e-06, "loss": 1.7586, "step": 31839500 }, { "epoch": 92.16, "learning_rate": 3.93471250926269e-06, "loss": 1.7645, "step": 31840000 }, { "epoch": 92.17, "learning_rate": 3.9339888616154136e-06, "loss": 1.7833, "step": 31840500 }, { "epoch": 92.17, "learning_rate": 3.933265213968137e-06, "loss": 1.7822, "step": 31841000 }, { "epoch": 92.17, "learning_rate": 3.93254156632086e-06, "loss": 1.7859, "step": 31841500 }, { "epoch": 92.17, "learning_rate": 3.931819365968877e-06, "loss": 1.7843, "step": 31842000 }, { "epoch": 92.17, "learning_rate": 3.931095718321601e-06, "loss": 1.7747, "step": 31842500 }, { "epoch": 92.17, "learning_rate": 3.930372070674324e-06, "loss": 1.7812, "step": 31843000 }, { "epoch": 92.17, "learning_rate": 3.929648423027047e-06, "loss": 1.7961, "step": 31843500 }, { "epoch": 92.18, "learning_rate": 3.928926222675065e-06, "loss": 1.8005, "step": 31844000 }, { "epoch": 92.18, "learning_rate": 3.928202575027788e-06, "loss": 1.7489, "step": 31844500 }, { "epoch": 92.18, "learning_rate": 3.927478927380511e-06, "loss": 1.7841, "step": 31845000 }, { "epoch": 92.18, "learning_rate": 3.926755279733234e-06, "loss": 1.759, "step": 31845500 }, { "epoch": 92.18, "learning_rate": 3.926031632085958e-06, "loss": 1.778, "step": 31846000 }, { "epoch": 92.18, "learning_rate": 3.925309431733976e-06, "loss": 1.7465, "step": 31846500 }, { "epoch": 92.18, "learning_rate": 3.9245857840866995e-06, "loss": 1.772, "step": 31847000 }, { "epoch": 92.19, "learning_rate": 3.923862136439422e-06, "loss": 1.786, "step": 31847500 }, { "epoch": 92.19, "learning_rate": 3.923138488792146e-06, "loss": 1.7774, "step": 31848000 }, { "epoch": 92.19, "learning_rate": 3.922414841144869e-06, "loss": 1.7716, "step": 31848500 }, { "epoch": 92.19, "learning_rate": 3.921691193497592e-06, "loss": 1.791, "step": 31849000 }, { "epoch": 92.19, "learning_rate": 3.920967545850316e-06, "loss": 1.7971, "step": 31849500 }, { "epoch": 92.19, "learning_rate": 3.920243898203038e-06, "loss": 1.7884, "step": 31850000 }, { "epoch": 92.19, "learning_rate": 3.919521697851056e-06, "loss": 1.7892, "step": 31850500 }, { "epoch": 92.2, "learning_rate": 3.918798050203779e-06, "loss": 1.7501, "step": 31851000 }, { "epoch": 92.2, "learning_rate": 3.918074402556503e-06, "loss": 1.7652, "step": 31851500 }, { "epoch": 92.2, "learning_rate": 3.917350754909225e-06, "loss": 1.7772, "step": 31852000 }, { "epoch": 92.2, "learning_rate": 3.9166285545572434e-06, "loss": 1.8108, "step": 31852500 }, { "epoch": 92.2, "learning_rate": 3.9159049069099665e-06, "loss": 1.781, "step": 31853000 }, { "epoch": 92.2, "learning_rate": 3.9151812592626904e-06, "loss": 1.7836, "step": 31853500 }, { "epoch": 92.2, "learning_rate": 3.914457611615413e-06, "loss": 1.77, "step": 31854000 }, { "epoch": 92.21, "learning_rate": 3.913733963968137e-06, "loss": 1.8053, "step": 31854500 }, { "epoch": 92.21, "learning_rate": 3.913011763616154e-06, "loss": 1.78, "step": 31855000 }, { "epoch": 92.21, "learning_rate": 3.912288115968878e-06, "loss": 1.7789, "step": 31855500 }, { "epoch": 92.21, "learning_rate": 3.911564468321601e-06, "loss": 1.7767, "step": 31856000 }, { "epoch": 92.21, "learning_rate": 3.910840820674324e-06, "loss": 1.773, "step": 31856500 }, { "epoch": 92.21, "learning_rate": 3.910118620322341e-06, "loss": 1.7807, "step": 31857000 }, { "epoch": 92.21, "learning_rate": 3.90939641997036e-06, "loss": 1.7785, "step": 31857500 }, { "epoch": 92.22, "learning_rate": 3.908674219618377e-06, "loss": 1.7594, "step": 31858000 }, { "epoch": 92.22, "learning_rate": 3.9079505719711005e-06, "loss": 1.7715, "step": 31858500 }, { "epoch": 92.22, "learning_rate": 3.9072269243238235e-06, "loss": 1.7654, "step": 31859000 }, { "epoch": 92.22, "learning_rate": 3.9065032766765475e-06, "loss": 1.7376, "step": 31859500 }, { "epoch": 92.22, "learning_rate": 3.90577962902927e-06, "loss": 1.7823, "step": 31860000 }, { "epoch": 92.22, "learning_rate": 3.905055981381994e-06, "loss": 1.7564, "step": 31860500 }, { "epoch": 92.22, "learning_rate": 3.904332333734717e-06, "loss": 1.7789, "step": 31861000 }, { "epoch": 92.23, "learning_rate": 3.90360868608744e-06, "loss": 1.7689, "step": 31861500 }, { "epoch": 92.23, "learning_rate": 3.902885038440164e-06, "loss": 1.7743, "step": 31862000 }, { "epoch": 92.23, "learning_rate": 3.902161390792886e-06, "loss": 1.8009, "step": 31862500 }, { "epoch": 92.23, "learning_rate": 3.90143774314561e-06, "loss": 1.7835, "step": 31863000 }, { "epoch": 92.23, "learning_rate": 3.900714095498333e-06, "loss": 1.7892, "step": 31863500 }, { "epoch": 92.23, "learning_rate": 3.899990447851056e-06, "loss": 1.7827, "step": 31864000 }, { "epoch": 92.23, "learning_rate": 3.899268247499073e-06, "loss": 1.8006, "step": 31864500 }, { "epoch": 92.24, "learning_rate": 3.898544599851797e-06, "loss": 1.7523, "step": 31865000 }, { "epoch": 92.24, "learning_rate": 3.8978223994998145e-06, "loss": 1.7542, "step": 31865500 }, { "epoch": 92.24, "learning_rate": 3.897098751852538e-06, "loss": 1.7946, "step": 31866000 }, { "epoch": 92.24, "learning_rate": 3.8963751042052615e-06, "loss": 1.7825, "step": 31866500 }, { "epoch": 92.24, "learning_rate": 3.8956514565579846e-06, "loss": 1.7928, "step": 31867000 }, { "epoch": 92.24, "learning_rate": 3.8949278089107085e-06, "loss": 1.7765, "step": 31867500 }, { "epoch": 92.24, "learning_rate": 3.894204161263431e-06, "loss": 1.8117, "step": 31868000 }, { "epoch": 92.25, "learning_rate": 3.893481960911449e-06, "loss": 1.8038, "step": 31868500 }, { "epoch": 92.25, "learning_rate": 3.892758313264172e-06, "loss": 1.7825, "step": 31869000 }, { "epoch": 92.25, "learning_rate": 3.892034665616896e-06, "loss": 1.7693, "step": 31869500 }, { "epoch": 92.25, "learning_rate": 3.891311017969618e-06, "loss": 1.7761, "step": 31870000 }, { "epoch": 92.25, "learning_rate": 3.890587370322342e-06, "loss": 1.7572, "step": 31870500 }, { "epoch": 92.25, "learning_rate": 3.889863722675065e-06, "loss": 1.7769, "step": 31871000 }, { "epoch": 92.25, "learning_rate": 3.889140075027788e-06, "loss": 1.759, "step": 31871500 }, { "epoch": 92.26, "learning_rate": 3.888416427380512e-06, "loss": 1.7669, "step": 31872000 }, { "epoch": 92.26, "learning_rate": 3.887692779733234e-06, "loss": 1.7668, "step": 31872500 }, { "epoch": 92.26, "learning_rate": 3.886970579381252e-06, "loss": 1.7726, "step": 31873000 }, { "epoch": 92.26, "learning_rate": 3.8862469317339755e-06, "loss": 1.7694, "step": 31873500 }, { "epoch": 92.26, "learning_rate": 3.885523284086699e-06, "loss": 1.7745, "step": 31874000 }, { "epoch": 92.26, "learning_rate": 3.8847996364394225e-06, "loss": 1.7721, "step": 31874500 }, { "epoch": 92.27, "learning_rate": 3.8840759887921456e-06, "loss": 1.8038, "step": 31875000 }, { "epoch": 92.27, "learning_rate": 3.883353788440163e-06, "loss": 1.7531, "step": 31875500 }, { "epoch": 92.27, "learning_rate": 3.882630140792887e-06, "loss": 1.7842, "step": 31876000 }, { "epoch": 92.27, "learning_rate": 3.88190649314561e-06, "loss": 1.7826, "step": 31876500 }, { "epoch": 92.27, "learning_rate": 3.881184292793628e-06, "loss": 1.7908, "step": 31877000 }, { "epoch": 92.27, "learning_rate": 3.88046064514635e-06, "loss": 1.7523, "step": 31877500 }, { "epoch": 92.27, "learning_rate": 3.879736997499074e-06, "loss": 1.7673, "step": 31878000 }, { "epoch": 92.28, "learning_rate": 3.879013349851797e-06, "loss": 1.7831, "step": 31878500 }, { "epoch": 92.28, "learning_rate": 3.87828970220452e-06, "loss": 1.7851, "step": 31879000 }, { "epoch": 92.28, "learning_rate": 3.877566054557243e-06, "loss": 1.7711, "step": 31879500 }, { "epoch": 92.28, "learning_rate": 3.876842406909966e-06, "loss": 1.7927, "step": 31880000 }, { "epoch": 92.28, "learning_rate": 3.87611875926269e-06, "loss": 1.761, "step": 31880500 }, { "epoch": 92.28, "learning_rate": 3.875395111615413e-06, "loss": 1.7726, "step": 31881000 }, { "epoch": 92.28, "learning_rate": 3.8746729112634315e-06, "loss": 1.7766, "step": 31881500 }, { "epoch": 92.29, "learning_rate": 3.873949263616155e-06, "loss": 1.7837, "step": 31882000 }, { "epoch": 92.29, "learning_rate": 3.873227063264172e-06, "loss": 1.8061, "step": 31882500 }, { "epoch": 92.29, "learning_rate": 3.872503415616895e-06, "loss": 1.7798, "step": 31883000 }, { "epoch": 92.29, "learning_rate": 3.871779767969619e-06, "loss": 1.7641, "step": 31883500 }, { "epoch": 92.29, "learning_rate": 3.871056120322342e-06, "loss": 1.7884, "step": 31884000 }, { "epoch": 92.29, "learning_rate": 3.870332472675065e-06, "loss": 1.7877, "step": 31884500 }, { "epoch": 92.29, "learning_rate": 3.869608825027788e-06, "loss": 1.7464, "step": 31885000 }, { "epoch": 92.3, "learning_rate": 3.868885177380511e-06, "loss": 1.7593, "step": 31885500 }, { "epoch": 92.3, "learning_rate": 3.868161529733235e-06, "loss": 1.7787, "step": 31886000 }, { "epoch": 92.3, "learning_rate": 3.867439329381252e-06, "loss": 1.765, "step": 31886500 }, { "epoch": 92.3, "learning_rate": 3.8667156817339754e-06, "loss": 1.7916, "step": 31887000 }, { "epoch": 92.3, "learning_rate": 3.865992034086699e-06, "loss": 1.7828, "step": 31887500 }, { "epoch": 92.3, "learning_rate": 3.8652683864394224e-06, "loss": 1.7679, "step": 31888000 }, { "epoch": 92.3, "learning_rate": 3.8645447387921455e-06, "loss": 1.7957, "step": 31888500 }, { "epoch": 92.31, "learning_rate": 3.863822538440163e-06, "loss": 1.7692, "step": 31889000 }, { "epoch": 92.31, "learning_rate": 3.863098890792887e-06, "loss": 1.7698, "step": 31889500 }, { "epoch": 92.31, "learning_rate": 3.86237524314561e-06, "loss": 1.7504, "step": 31890000 }, { "epoch": 92.31, "learning_rate": 3.861651595498333e-06, "loss": 1.7636, "step": 31890500 }, { "epoch": 92.31, "learning_rate": 3.860927947851056e-06, "loss": 1.7871, "step": 31891000 }, { "epoch": 92.31, "learning_rate": 3.860205747499074e-06, "loss": 1.7571, "step": 31891500 }, { "epoch": 92.31, "learning_rate": 3.859482099851797e-06, "loss": 1.7844, "step": 31892000 }, { "epoch": 92.32, "learning_rate": 3.858759899499815e-06, "loss": 1.8082, "step": 31892500 }, { "epoch": 92.32, "learning_rate": 3.858036251852538e-06, "loss": 1.7821, "step": 31893000 }, { "epoch": 92.32, "learning_rate": 3.857312604205261e-06, "loss": 1.7743, "step": 31893500 }, { "epoch": 92.32, "learning_rate": 3.8565889565579845e-06, "loss": 1.8001, "step": 31894000 }, { "epoch": 92.32, "learning_rate": 3.8558653089107075e-06, "loss": 1.7598, "step": 31894500 }, { "epoch": 92.32, "learning_rate": 3.8551416612634315e-06, "loss": 1.7695, "step": 31895000 }, { "epoch": 92.32, "learning_rate": 3.8544180136161545e-06, "loss": 1.7871, "step": 31895500 }, { "epoch": 92.33, "learning_rate": 3.853694365968878e-06, "loss": 1.7558, "step": 31896000 }, { "epoch": 92.33, "learning_rate": 3.852972165616895e-06, "loss": 1.7758, "step": 31896500 }, { "epoch": 92.33, "learning_rate": 3.852248517969619e-06, "loss": 1.7749, "step": 31897000 }, { "epoch": 92.33, "learning_rate": 3.851524870322342e-06, "loss": 1.7753, "step": 31897500 }, { "epoch": 92.33, "learning_rate": 3.850801222675065e-06, "loss": 1.7755, "step": 31898000 }, { "epoch": 92.33, "learning_rate": 3.850077575027788e-06, "loss": 1.7508, "step": 31898500 }, { "epoch": 92.33, "learning_rate": 3.849355374675806e-06, "loss": 1.7702, "step": 31899000 }, { "epoch": 92.34, "learning_rate": 3.848631727028529e-06, "loss": 1.7632, "step": 31899500 }, { "epoch": 92.34, "learning_rate": 3.847908079381252e-06, "loss": 1.7912, "step": 31900000 }, { "epoch": 92.34, "learning_rate": 3.847184431733976e-06, "loss": 1.7922, "step": 31900500 }, { "epoch": 92.34, "learning_rate": 3.8464622313819935e-06, "loss": 1.7867, "step": 31901000 }, { "epoch": 92.34, "learning_rate": 3.8457385837347166e-06, "loss": 1.7604, "step": 31901500 }, { "epoch": 92.34, "learning_rate": 3.84501493608744e-06, "loss": 1.7805, "step": 31902000 }, { "epoch": 92.34, "learning_rate": 3.844292735735458e-06, "loss": 1.7663, "step": 31902500 }, { "epoch": 92.35, "learning_rate": 3.843569088088181e-06, "loss": 1.7816, "step": 31903000 }, { "epoch": 92.35, "learning_rate": 3.842845440440905e-06, "loss": 1.7779, "step": 31903500 }, { "epoch": 92.35, "learning_rate": 3.842121792793627e-06, "loss": 1.7572, "step": 31904000 }, { "epoch": 92.35, "learning_rate": 3.841398145146351e-06, "loss": 1.766, "step": 31904500 }, { "epoch": 92.35, "learning_rate": 3.840674497499074e-06, "loss": 1.7848, "step": 31905000 }, { "epoch": 92.35, "learning_rate": 3.839950849851797e-06, "loss": 1.7535, "step": 31905500 }, { "epoch": 92.35, "learning_rate": 3.839227202204521e-06, "loss": 1.76, "step": 31906000 }, { "epoch": 92.36, "learning_rate": 3.838503554557243e-06, "loss": 1.7579, "step": 31906500 }, { "epoch": 92.36, "learning_rate": 3.837781354205261e-06, "loss": 1.7624, "step": 31907000 }, { "epoch": 92.36, "learning_rate": 3.837057706557984e-06, "loss": 1.7812, "step": 31907500 }, { "epoch": 92.36, "learning_rate": 3.836334058910708e-06, "loss": 1.777, "step": 31908000 }, { "epoch": 92.36, "learning_rate": 3.8356104112634306e-06, "loss": 1.7774, "step": 31908500 }, { "epoch": 92.36, "learning_rate": 3.8348882109114495e-06, "loss": 1.7985, "step": 31909000 }, { "epoch": 92.36, "learning_rate": 3.834164563264172e-06, "loss": 1.7766, "step": 31909500 }, { "epoch": 92.37, "learning_rate": 3.833440915616896e-06, "loss": 1.7926, "step": 31910000 }, { "epoch": 92.37, "learning_rate": 3.832717267969618e-06, "loss": 1.7747, "step": 31910500 }, { "epoch": 92.37, "learning_rate": 3.831993620322342e-06, "loss": 1.8035, "step": 31911000 }, { "epoch": 92.37, "learning_rate": 3.831269972675065e-06, "loss": 1.7618, "step": 31911500 }, { "epoch": 92.37, "learning_rate": 3.830547772323083e-06, "loss": 1.7754, "step": 31912000 }, { "epoch": 92.37, "learning_rate": 3.829824124675806e-06, "loss": 1.7657, "step": 31912500 }, { "epoch": 92.38, "learning_rate": 3.829101924323824e-06, "loss": 1.8, "step": 31913000 }, { "epoch": 92.38, "learning_rate": 3.828378276676546e-06, "loss": 1.7511, "step": 31913500 }, { "epoch": 92.38, "learning_rate": 3.82765462902927e-06, "loss": 1.7652, "step": 31914000 }, { "epoch": 92.38, "learning_rate": 3.8269309813819934e-06, "loss": 1.7793, "step": 31914500 }, { "epoch": 92.38, "learning_rate": 3.8262073337347165e-06, "loss": 1.785, "step": 31915000 }, { "epoch": 92.38, "learning_rate": 3.8254836860874404e-06, "loss": 1.7492, "step": 31915500 }, { "epoch": 92.38, "learning_rate": 3.824760038440163e-06, "loss": 1.784, "step": 31916000 }, { "epoch": 92.39, "learning_rate": 3.824036390792887e-06, "loss": 1.7773, "step": 31916500 }, { "epoch": 92.39, "learning_rate": 3.82331274314561e-06, "loss": 1.7493, "step": 31917000 }, { "epoch": 92.39, "learning_rate": 3.822590542793628e-06, "loss": 1.7818, "step": 31917500 }, { "epoch": 92.39, "learning_rate": 3.821866895146351e-06, "loss": 1.7897, "step": 31918000 }, { "epoch": 92.39, "learning_rate": 3.821143247499074e-06, "loss": 1.7465, "step": 31918500 }, { "epoch": 92.39, "learning_rate": 3.820419599851798e-06, "loss": 1.7894, "step": 31919000 }, { "epoch": 92.39, "learning_rate": 3.81969595220452e-06, "loss": 1.7716, "step": 31919500 }, { "epoch": 92.4, "learning_rate": 3.818973751852538e-06, "loss": 1.7821, "step": 31920000 }, { "epoch": 92.4, "learning_rate": 3.818250104205261e-06, "loss": 1.7533, "step": 31920500 }, { "epoch": 92.4, "learning_rate": 3.817526456557985e-06, "loss": 1.7979, "step": 31921000 }, { "epoch": 92.4, "learning_rate": 3.8168028089107074e-06, "loss": 1.7507, "step": 31921500 }, { "epoch": 92.4, "learning_rate": 3.816079161263431e-06, "loss": 1.7766, "step": 31922000 }, { "epoch": 92.4, "learning_rate": 3.815356960911449e-06, "loss": 1.7583, "step": 31922500 }, { "epoch": 92.4, "learning_rate": 3.8146333132641725e-06, "loss": 1.779, "step": 31923000 }, { "epoch": 92.41, "learning_rate": 3.813909665616895e-06, "loss": 1.7748, "step": 31923500 }, { "epoch": 92.41, "learning_rate": 3.8131860179696187e-06, "loss": 1.7838, "step": 31924000 }, { "epoch": 92.41, "learning_rate": 3.8124623703223414e-06, "loss": 1.7797, "step": 31924500 }, { "epoch": 92.41, "learning_rate": 3.811738722675065e-06, "loss": 1.7736, "step": 31925000 }, { "epoch": 92.41, "learning_rate": 3.8110165223230825e-06, "loss": 1.7424, "step": 31925500 }, { "epoch": 92.41, "learning_rate": 3.810292874675806e-06, "loss": 1.7909, "step": 31926000 }, { "epoch": 92.41, "learning_rate": 3.809569227028529e-06, "loss": 1.7845, "step": 31926500 }, { "epoch": 92.42, "learning_rate": 3.8088455793812526e-06, "loss": 1.7831, "step": 31927000 }, { "epoch": 92.42, "learning_rate": 3.808121931733976e-06, "loss": 1.7706, "step": 31927500 }, { "epoch": 92.42, "learning_rate": 3.8073997313819934e-06, "loss": 1.7736, "step": 31928000 }, { "epoch": 92.42, "learning_rate": 3.806676083734717e-06, "loss": 1.7712, "step": 31928500 }, { "epoch": 92.42, "learning_rate": 3.80595243608744e-06, "loss": 1.7839, "step": 31929000 }, { "epoch": 92.42, "learning_rate": 3.8052287884401635e-06, "loss": 1.7828, "step": 31929500 }, { "epoch": 92.42, "learning_rate": 3.804506588088181e-06, "loss": 1.7627, "step": 31930000 }, { "epoch": 92.43, "learning_rate": 3.8037829404409046e-06, "loss": 1.7765, "step": 31930500 }, { "epoch": 92.43, "learning_rate": 3.8030592927936273e-06, "loss": 1.7624, "step": 31931000 }, { "epoch": 92.43, "learning_rate": 3.802335645146351e-06, "loss": 1.7927, "step": 31931500 }, { "epoch": 92.43, "learning_rate": 3.801611997499074e-06, "loss": 1.7875, "step": 31932000 }, { "epoch": 92.43, "learning_rate": 3.800889797147092e-06, "loss": 1.7547, "step": 31932500 }, { "epoch": 92.43, "learning_rate": 3.8001661494998146e-06, "loss": 1.7834, "step": 31933000 }, { "epoch": 92.43, "learning_rate": 3.799442501852538e-06, "loss": 1.7616, "step": 31933500 }, { "epoch": 92.44, "learning_rate": 3.7987188542052612e-06, "loss": 1.763, "step": 31934000 }, { "epoch": 92.44, "learning_rate": 3.7979952065579847e-06, "loss": 1.795, "step": 31934500 }, { "epoch": 92.44, "learning_rate": 3.797273006206002e-06, "loss": 1.7802, "step": 31935000 }, { "epoch": 92.44, "learning_rate": 3.796549358558726e-06, "loss": 1.782, "step": 31935500 }, { "epoch": 92.44, "learning_rate": 3.7958257109114486e-06, "loss": 1.8097, "step": 31936000 }, { "epoch": 92.44, "learning_rate": 3.795102063264172e-06, "loss": 1.768, "step": 31936500 }, { "epoch": 92.44, "learning_rate": 3.7943798629121897e-06, "loss": 1.7667, "step": 31937000 }, { "epoch": 92.45, "learning_rate": 3.7936562152649132e-06, "loss": 1.7599, "step": 31937500 }, { "epoch": 92.45, "learning_rate": 3.7929325676176367e-06, "loss": 1.7749, "step": 31938000 }, { "epoch": 92.45, "learning_rate": 3.7922089199703594e-06, "loss": 1.754, "step": 31938500 }, { "epoch": 92.45, "learning_rate": 3.791485272323083e-06, "loss": 1.7468, "step": 31939000 }, { "epoch": 92.45, "learning_rate": 3.790761624675806e-06, "loss": 1.7667, "step": 31939500 }, { "epoch": 92.45, "learning_rate": 3.7900379770285295e-06, "loss": 1.7966, "step": 31940000 }, { "epoch": 92.45, "learning_rate": 3.789314329381252e-06, "loss": 1.7634, "step": 31940500 }, { "epoch": 92.46, "learning_rate": 3.7885921290292702e-06, "loss": 1.7729, "step": 31941000 }, { "epoch": 92.46, "learning_rate": 3.7878684813819933e-06, "loss": 1.7492, "step": 31941500 }, { "epoch": 92.46, "learning_rate": 3.787144833734717e-06, "loss": 1.7782, "step": 31942000 }, { "epoch": 92.46, "learning_rate": 3.7864211860874403e-06, "loss": 1.7757, "step": 31942500 }, { "epoch": 92.46, "learning_rate": 3.785697538440163e-06, "loss": 1.783, "step": 31943000 }, { "epoch": 92.46, "learning_rate": 3.7849738907928865e-06, "loss": 1.7831, "step": 31943500 }, { "epoch": 92.46, "learning_rate": 3.7842502431456096e-06, "loss": 1.7426, "step": 31944000 }, { "epoch": 92.47, "learning_rate": 3.783526595498333e-06, "loss": 1.7915, "step": 31944500 }, { "epoch": 92.47, "learning_rate": 3.7828043951463507e-06, "loss": 1.7873, "step": 31945000 }, { "epoch": 92.47, "learning_rate": 3.782082194794368e-06, "loss": 1.8067, "step": 31945500 }, { "epoch": 92.47, "learning_rate": 3.7813585471470915e-06, "loss": 1.7795, "step": 31946000 }, { "epoch": 92.47, "learning_rate": 3.780634899499815e-06, "loss": 1.7824, "step": 31946500 }, { "epoch": 92.47, "learning_rate": 3.779911251852538e-06, "loss": 1.7606, "step": 31947000 }, { "epoch": 92.47, "learning_rate": 3.7791876042052616e-06, "loss": 1.7957, "step": 31947500 }, { "epoch": 92.48, "learning_rate": 3.7784639565579842e-06, "loss": 1.7616, "step": 31948000 }, { "epoch": 92.48, "learning_rate": 3.7777403089107078e-06, "loss": 1.7608, "step": 31948500 }, { "epoch": 92.48, "learning_rate": 3.7770181085587254e-06, "loss": 1.7832, "step": 31949000 }, { "epoch": 92.48, "learning_rate": 3.776294460911449e-06, "loss": 1.7764, "step": 31949500 }, { "epoch": 92.48, "learning_rate": 3.7755708132641716e-06, "loss": 1.7603, "step": 31950000 }, { "epoch": 92.48, "learning_rate": 3.7748471656168955e-06, "loss": 1.787, "step": 31950500 }, { "epoch": 92.49, "learning_rate": 3.774123517969619e-06, "loss": 1.7606, "step": 31951000 }, { "epoch": 92.49, "learning_rate": 3.7733998703223417e-06, "loss": 1.7622, "step": 31951500 }, { "epoch": 92.49, "learning_rate": 3.772676222675065e-06, "loss": 1.7729, "step": 31952000 }, { "epoch": 92.49, "learning_rate": 3.771952575027788e-06, "loss": 1.7907, "step": 31952500 }, { "epoch": 92.49, "learning_rate": 3.7712303746758064e-06, "loss": 1.7556, "step": 31953000 }, { "epoch": 92.49, "learning_rate": 3.770506727028529e-06, "loss": 1.79, "step": 31953500 }, { "epoch": 92.49, "learning_rate": 3.7697830793812525e-06, "loss": 1.802, "step": 31954000 }, { "epoch": 92.5, "learning_rate": 3.7690594317339756e-06, "loss": 1.7641, "step": 31954500 }, { "epoch": 92.5, "learning_rate": 3.768335784086699e-06, "loss": 1.761, "step": 31955000 }, { "epoch": 92.5, "learning_rate": 3.7676121364394226e-06, "loss": 1.7762, "step": 31955500 }, { "epoch": 92.5, "learning_rate": 3.7668884887921453e-06, "loss": 1.76, "step": 31956000 }, { "epoch": 92.5, "learning_rate": 3.7661648411448688e-06, "loss": 1.7792, "step": 31956500 }, { "epoch": 92.5, "learning_rate": 3.765441193497592e-06, "loss": 1.7523, "step": 31957000 }, { "epoch": 92.5, "learning_rate": 3.7647175458503154e-06, "loss": 1.7834, "step": 31957500 }, { "epoch": 92.51, "learning_rate": 3.7639953454983326e-06, "loss": 1.764, "step": 31958000 }, { "epoch": 92.51, "learning_rate": 3.763271697851056e-06, "loss": 1.7727, "step": 31958500 }, { "epoch": 92.51, "learning_rate": 3.762548050203779e-06, "loss": 1.77, "step": 31959000 }, { "epoch": 92.51, "learning_rate": 3.7618244025565027e-06, "loss": 1.7613, "step": 31959500 }, { "epoch": 92.51, "learning_rate": 3.7611022022045204e-06, "loss": 1.7973, "step": 31960000 }, { "epoch": 92.51, "learning_rate": 3.7603800018525385e-06, "loss": 1.7681, "step": 31960500 }, { "epoch": 92.51, "learning_rate": 3.759656354205261e-06, "loss": 1.7801, "step": 31961000 }, { "epoch": 92.52, "learning_rate": 3.7589327065579846e-06, "loss": 1.794, "step": 31961500 }, { "epoch": 92.52, "learning_rate": 3.7582090589107077e-06, "loss": 1.7689, "step": 31962000 }, { "epoch": 92.52, "learning_rate": 3.757485411263431e-06, "loss": 1.8089, "step": 31962500 }, { "epoch": 92.52, "learning_rate": 3.7567617636161547e-06, "loss": 1.7725, "step": 31963000 }, { "epoch": 92.52, "learning_rate": 3.7560381159688774e-06, "loss": 1.7806, "step": 31963500 }, { "epoch": 92.52, "learning_rate": 3.755314468321601e-06, "loss": 1.7774, "step": 31964000 }, { "epoch": 92.52, "learning_rate": 3.754590820674324e-06, "loss": 1.8076, "step": 31964500 }, { "epoch": 92.53, "learning_rate": 3.7538671730270475e-06, "loss": 1.7729, "step": 31965000 }, { "epoch": 92.53, "learning_rate": 3.7531449726750647e-06, "loss": 1.7624, "step": 31965500 }, { "epoch": 92.53, "learning_rate": 3.7524213250277886e-06, "loss": 1.7832, "step": 31966000 }, { "epoch": 92.53, "learning_rate": 3.751699124675806e-06, "loss": 1.7817, "step": 31966500 }, { "epoch": 92.53, "learning_rate": 3.7509769243238236e-06, "loss": 1.7891, "step": 31967000 }, { "epoch": 92.53, "learning_rate": 3.750253276676547e-06, "loss": 1.7847, "step": 31967500 }, { "epoch": 92.53, "learning_rate": 3.7495296290292706e-06, "loss": 1.7466, "step": 31968000 }, { "epoch": 92.54, "learning_rate": 3.7488059813819932e-06, "loss": 1.7834, "step": 31968500 }, { "epoch": 92.54, "learning_rate": 3.7480823337347167e-06, "loss": 1.7816, "step": 31969000 }, { "epoch": 92.54, "learning_rate": 3.74735868608744e-06, "loss": 1.7557, "step": 31969500 }, { "epoch": 92.54, "learning_rate": 3.7466350384401633e-06, "loss": 1.7948, "step": 31970000 }, { "epoch": 92.54, "learning_rate": 3.745911390792886e-06, "loss": 1.7924, "step": 31970500 }, { "epoch": 92.54, "learning_rate": 3.7451891904409045e-06, "loss": 1.7725, "step": 31971000 }, { "epoch": 92.54, "learning_rate": 3.744465542793627e-06, "loss": 1.762, "step": 31971500 }, { "epoch": 92.55, "learning_rate": 3.7437418951463506e-06, "loss": 1.7808, "step": 31972000 }, { "epoch": 92.55, "learning_rate": 3.743018247499074e-06, "loss": 1.785, "step": 31972500 }, { "epoch": 92.55, "learning_rate": 3.7422945998517972e-06, "loss": 1.7628, "step": 31973000 }, { "epoch": 92.55, "learning_rate": 3.7415709522045207e-06, "loss": 1.7749, "step": 31973500 }, { "epoch": 92.55, "learning_rate": 3.7408473045572434e-06, "loss": 1.7616, "step": 31974000 }, { "epoch": 92.55, "learning_rate": 3.740123656909967e-06, "loss": 1.7798, "step": 31974500 }, { "epoch": 92.55, "learning_rate": 3.73940000926269e-06, "loss": 1.778, "step": 31975000 }, { "epoch": 92.56, "learning_rate": 3.7386792562060027e-06, "loss": 1.7672, "step": 31975500 }, { "epoch": 92.56, "learning_rate": 3.7379556085587257e-06, "loss": 1.764, "step": 31976000 }, { "epoch": 92.56, "learning_rate": 3.7372319609114493e-06, "loss": 1.7858, "step": 31976500 }, { "epoch": 92.56, "learning_rate": 3.736508313264172e-06, "loss": 1.7549, "step": 31977000 }, { "epoch": 92.56, "learning_rate": 3.7357846656168954e-06, "loss": 1.7789, "step": 31977500 }, { "epoch": 92.56, "learning_rate": 3.735061017969618e-06, "loss": 1.7594, "step": 31978000 }, { "epoch": 92.56, "learning_rate": 3.734337370322342e-06, "loss": 1.767, "step": 31978500 }, { "epoch": 92.57, "learning_rate": 3.7336137226750655e-06, "loss": 1.7845, "step": 31979000 }, { "epoch": 92.57, "learning_rate": 3.7328915223230828e-06, "loss": 1.7841, "step": 31979500 }, { "epoch": 92.57, "learning_rate": 3.732167874675806e-06, "loss": 1.762, "step": 31980000 }, { "epoch": 92.57, "learning_rate": 3.7314442270285293e-06, "loss": 1.7452, "step": 31980500 }, { "epoch": 92.57, "learning_rate": 3.730720579381253e-06, "loss": 1.7686, "step": 31981000 }, { "epoch": 92.57, "learning_rate": 3.7299969317339755e-06, "loss": 1.7706, "step": 31981500 }, { "epoch": 92.57, "learning_rate": 3.729273284086699e-06, "loss": 1.7842, "step": 31982000 }, { "epoch": 92.58, "learning_rate": 3.728549636439422e-06, "loss": 1.7551, "step": 31982500 }, { "epoch": 92.58, "learning_rate": 3.7278259887921456e-06, "loss": 1.7456, "step": 31983000 }, { "epoch": 92.58, "learning_rate": 3.727103788440163e-06, "loss": 1.7719, "step": 31983500 }, { "epoch": 92.58, "learning_rate": 3.7263801407928863e-06, "loss": 1.7756, "step": 31984000 }, { "epoch": 92.58, "learning_rate": 3.7256564931456094e-06, "loss": 1.7615, "step": 31984500 }, { "epoch": 92.58, "learning_rate": 3.724932845498333e-06, "loss": 1.7608, "step": 31985000 }, { "epoch": 92.58, "learning_rate": 3.7242091978510564e-06, "loss": 1.7611, "step": 31985500 }, { "epoch": 92.59, "learning_rate": 3.723486997499074e-06, "loss": 1.7788, "step": 31986000 }, { "epoch": 92.59, "learning_rate": 3.7227633498517976e-06, "loss": 1.7954, "step": 31986500 }, { "epoch": 92.59, "learning_rate": 3.7220397022045203e-06, "loss": 1.7674, "step": 31987000 }, { "epoch": 92.59, "learning_rate": 3.721317501852538e-06, "loss": 1.7719, "step": 31987500 }, { "epoch": 92.59, "learning_rate": 3.7205938542052614e-06, "loss": 1.7806, "step": 31988000 }, { "epoch": 92.59, "learning_rate": 3.719870206557985e-06, "loss": 1.7666, "step": 31988500 }, { "epoch": 92.6, "learning_rate": 3.7191465589107076e-06, "loss": 1.7452, "step": 31989000 }, { "epoch": 92.6, "learning_rate": 3.718422911263431e-06, "loss": 1.7687, "step": 31989500 }, { "epoch": 92.6, "learning_rate": 3.717699263616154e-06, "loss": 1.7492, "step": 31990000 }, { "epoch": 92.6, "learning_rate": 3.7169756159688777e-06, "loss": 1.7738, "step": 31990500 }, { "epoch": 92.6, "learning_rate": 3.7162519683216003e-06, "loss": 1.7649, "step": 31991000 }, { "epoch": 92.6, "learning_rate": 3.715528320674324e-06, "loss": 1.7895, "step": 31991500 }, { "epoch": 92.6, "learning_rate": 3.7148061203223415e-06, "loss": 1.7775, "step": 31992000 }, { "epoch": 92.61, "learning_rate": 3.714082472675065e-06, "loss": 1.7643, "step": 31992500 }, { "epoch": 92.61, "learning_rate": 3.7133588250277885e-06, "loss": 1.7497, "step": 31993000 }, { "epoch": 92.61, "learning_rate": 3.712635177380511e-06, "loss": 1.7966, "step": 31993500 }, { "epoch": 92.61, "learning_rate": 3.711912977028529e-06, "loss": 1.8035, "step": 31994000 }, { "epoch": 92.61, "learning_rate": 3.7111893293812524e-06, "loss": 1.7864, "step": 31994500 }, { "epoch": 92.61, "learning_rate": 3.710465681733976e-06, "loss": 1.775, "step": 31995000 }, { "epoch": 92.61, "learning_rate": 3.709742034086699e-06, "loss": 1.7903, "step": 31995500 }, { "epoch": 92.62, "learning_rate": 3.7090183864394225e-06, "loss": 1.7622, "step": 31996000 }, { "epoch": 92.62, "learning_rate": 3.708294738792145e-06, "loss": 1.764, "step": 31996500 }, { "epoch": 92.62, "learning_rate": 3.7075710911448686e-06, "loss": 1.7797, "step": 31997000 }, { "epoch": 92.62, "learning_rate": 3.706847443497592e-06, "loss": 1.7771, "step": 31997500 }, { "epoch": 92.62, "learning_rate": 3.706123795850315e-06, "loss": 1.7781, "step": 31998000 }, { "epoch": 92.62, "learning_rate": 3.7054001482030387e-06, "loss": 1.7852, "step": 31998500 }, { "epoch": 92.62, "learning_rate": 3.7046765005557614e-06, "loss": 1.7556, "step": 31999000 }, { "epoch": 92.63, "learning_rate": 3.703952852908485e-06, "loss": 1.7576, "step": 31999500 }, { "epoch": 92.63, "learning_rate": 3.7032335471470917e-06, "loss": 1.7917, "step": 32000000 }, { "epoch": 92.63, "learning_rate": 3.702509899499815e-06, "loss": 1.7783, "step": 32000500 }, { "epoch": 92.63, "learning_rate": 3.7017862518525383e-06, "loss": 1.8063, "step": 32001000 }, { "epoch": 92.63, "learning_rate": 3.701062604205261e-06, "loss": 1.7661, "step": 32001500 }, { "epoch": 92.63, "learning_rate": 3.7003389565579845e-06, "loss": 1.7647, "step": 32002000 }, { "epoch": 92.63, "learning_rate": 3.699615308910708e-06, "loss": 1.7742, "step": 32002500 }, { "epoch": 92.64, "learning_rate": 3.698891661263431e-06, "loss": 1.7785, "step": 32003000 }, { "epoch": 92.64, "learning_rate": 3.6981680136161546e-06, "loss": 1.7839, "step": 32003500 }, { "epoch": 92.64, "learning_rate": 3.6974443659688772e-06, "loss": 1.7548, "step": 32004000 }, { "epoch": 92.64, "learning_rate": 3.6967221656168957e-06, "loss": 1.7795, "step": 32004500 }, { "epoch": 92.64, "learning_rate": 3.6959985179696184e-06, "loss": 1.7703, "step": 32005000 }, { "epoch": 92.64, "learning_rate": 3.6952763176176365e-06, "loss": 1.7813, "step": 32005500 }, { "epoch": 92.64, "learning_rate": 3.6945526699703596e-06, "loss": 1.7859, "step": 32006000 }, { "epoch": 92.65, "learning_rate": 3.693829022323083e-06, "loss": 1.7936, "step": 32006500 }, { "epoch": 92.65, "learning_rate": 3.6931053746758057e-06, "loss": 1.7538, "step": 32007000 }, { "epoch": 92.65, "learning_rate": 3.6923817270285292e-06, "loss": 1.7984, "step": 32007500 }, { "epoch": 92.65, "learning_rate": 3.6916580793812523e-06, "loss": 1.7891, "step": 32008000 }, { "epoch": 92.65, "learning_rate": 3.690934431733976e-06, "loss": 1.7977, "step": 32008500 }, { "epoch": 92.65, "learning_rate": 3.690212231381993e-06, "loss": 1.7701, "step": 32009000 }, { "epoch": 92.65, "learning_rate": 3.689488583734717e-06, "loss": 1.7649, "step": 32009500 }, { "epoch": 92.66, "learning_rate": 3.6887649360874405e-06, "loss": 1.7552, "step": 32010000 }, { "epoch": 92.66, "learning_rate": 3.688041288440163e-06, "loss": 1.7748, "step": 32010500 }, { "epoch": 92.66, "learning_rate": 3.6873176407928867e-06, "loss": 1.7695, "step": 32011000 }, { "epoch": 92.66, "learning_rate": 3.6865954404409043e-06, "loss": 1.7814, "step": 32011500 }, { "epoch": 92.66, "learning_rate": 3.685871792793628e-06, "loss": 1.7529, "step": 32012000 }, { "epoch": 92.66, "learning_rate": 3.6851481451463505e-06, "loss": 1.7835, "step": 32012500 }, { "epoch": 92.66, "learning_rate": 3.684424497499074e-06, "loss": 1.8008, "step": 32013000 }, { "epoch": 92.67, "learning_rate": 3.683700849851797e-06, "loss": 1.7747, "step": 32013500 }, { "epoch": 92.67, "learning_rate": 3.6829772022045206e-06, "loss": 1.7797, "step": 32014000 }, { "epoch": 92.67, "learning_rate": 3.682255001852538e-06, "loss": 1.7668, "step": 32014500 }, { "epoch": 92.67, "learning_rate": 3.6815313542052613e-06, "loss": 1.7889, "step": 32015000 }, { "epoch": 92.67, "learning_rate": 3.6808077065579844e-06, "loss": 1.7863, "step": 32015500 }, { "epoch": 92.67, "learning_rate": 3.680084058910708e-06, "loss": 1.784, "step": 32016000 }, { "epoch": 92.67, "learning_rate": 3.6793604112634314e-06, "loss": 1.7948, "step": 32016500 }, { "epoch": 92.68, "learning_rate": 3.678636763616154e-06, "loss": 1.7879, "step": 32017000 }, { "epoch": 92.68, "learning_rate": 3.6779131159688776e-06, "loss": 1.7856, "step": 32017500 }, { "epoch": 92.68, "learning_rate": 3.6771894683216007e-06, "loss": 1.794, "step": 32018000 }, { "epoch": 92.68, "learning_rate": 3.676465820674324e-06, "loss": 1.7661, "step": 32018500 }, { "epoch": 92.68, "learning_rate": 3.675742173027047e-06, "loss": 1.7865, "step": 32019000 }, { "epoch": 92.68, "learning_rate": 3.6750185253797703e-06, "loss": 1.7573, "step": 32019500 }, { "epoch": 92.68, "learning_rate": 3.674294877732494e-06, "loss": 1.788, "step": 32020000 }, { "epoch": 92.69, "learning_rate": 3.673574124675806e-06, "loss": 1.7869, "step": 32020500 }, { "epoch": 92.69, "learning_rate": 3.672850477028529e-06, "loss": 1.7952, "step": 32021000 }, { "epoch": 92.69, "learning_rate": 3.6721268293812527e-06, "loss": 1.778, "step": 32021500 }, { "epoch": 92.69, "learning_rate": 3.6714031817339753e-06, "loss": 1.7722, "step": 32022000 }, { "epoch": 92.69, "learning_rate": 3.670679534086699e-06, "loss": 1.7627, "step": 32022500 }, { "epoch": 92.69, "learning_rate": 3.669958781030011e-06, "loss": 1.77, "step": 32023000 }, { "epoch": 92.69, "learning_rate": 3.6692351333827346e-06, "loss": 1.7775, "step": 32023500 }, { "epoch": 92.7, "learning_rate": 3.6685114857354577e-06, "loss": 1.7771, "step": 32024000 }, { "epoch": 92.7, "learning_rate": 3.667787838088181e-06, "loss": 1.7914, "step": 32024500 }, { "epoch": 92.7, "learning_rate": 3.667064190440904e-06, "loss": 1.769, "step": 32025000 }, { "epoch": 92.7, "learning_rate": 3.6663405427936274e-06, "loss": 1.7979, "step": 32025500 }, { "epoch": 92.7, "learning_rate": 3.665616895146351e-06, "loss": 1.7792, "step": 32026000 }, { "epoch": 92.7, "learning_rate": 3.664893247499074e-06, "loss": 1.7856, "step": 32026500 }, { "epoch": 92.71, "learning_rate": 3.6641695998517974e-06, "loss": 1.7741, "step": 32027000 }, { "epoch": 92.71, "learning_rate": 3.66344595220452e-06, "loss": 1.7679, "step": 32027500 }, { "epoch": 92.71, "learning_rate": 3.662723751852538e-06, "loss": 1.7491, "step": 32028000 }, { "epoch": 92.71, "learning_rate": 3.6620001042052613e-06, "loss": 1.7584, "step": 32028500 }, { "epoch": 92.71, "learning_rate": 3.661276456557985e-06, "loss": 1.764, "step": 32029000 }, { "epoch": 92.71, "learning_rate": 3.6605528089107074e-06, "loss": 1.7705, "step": 32029500 }, { "epoch": 92.71, "learning_rate": 3.659829161263431e-06, "loss": 1.7531, "step": 32030000 }, { "epoch": 92.72, "learning_rate": 3.6591055136161545e-06, "loss": 1.7544, "step": 32030500 }, { "epoch": 92.72, "learning_rate": 3.658383313264172e-06, "loss": 1.774, "step": 32031000 }, { "epoch": 92.72, "learning_rate": 3.657659665616895e-06, "loss": 1.7868, "step": 32031500 }, { "epoch": 92.72, "learning_rate": 3.6569360179696187e-06, "loss": 1.7703, "step": 32032000 }, { "epoch": 92.72, "learning_rate": 3.6562123703223422e-06, "loss": 1.7876, "step": 32032500 }, { "epoch": 92.72, "learning_rate": 3.655488722675065e-06, "loss": 1.7697, "step": 32033000 }, { "epoch": 92.72, "learning_rate": 3.654766522323083e-06, "loss": 1.7987, "step": 32033500 }, { "epoch": 92.73, "learning_rate": 3.654042874675806e-06, "loss": 1.7685, "step": 32034000 }, { "epoch": 92.73, "learning_rate": 3.6533192270285296e-06, "loss": 1.7865, "step": 32034500 }, { "epoch": 92.73, "learning_rate": 3.652595579381252e-06, "loss": 1.7676, "step": 32035000 }, { "epoch": 92.73, "learning_rate": 3.6518719317339757e-06, "loss": 1.7751, "step": 32035500 }, { "epoch": 92.73, "learning_rate": 3.6511497313819934e-06, "loss": 1.7719, "step": 32036000 }, { "epoch": 92.73, "learning_rate": 3.650426083734717e-06, "loss": 1.792, "step": 32036500 }, { "epoch": 92.73, "learning_rate": 3.6497024360874395e-06, "loss": 1.7953, "step": 32037000 }, { "epoch": 92.74, "learning_rate": 3.6489787884401635e-06, "loss": 1.7553, "step": 32037500 }, { "epoch": 92.74, "learning_rate": 3.6482565880881807e-06, "loss": 1.7812, "step": 32038000 }, { "epoch": 92.74, "learning_rate": 3.6475329404409042e-06, "loss": 1.7846, "step": 32038500 }, { "epoch": 92.74, "learning_rate": 3.6468092927936273e-06, "loss": 1.7648, "step": 32039000 }, { "epoch": 92.74, "learning_rate": 3.646085645146351e-06, "loss": 1.7831, "step": 32039500 }, { "epoch": 92.74, "learning_rate": 3.6453619974990743e-06, "loss": 1.7745, "step": 32040000 }, { "epoch": 92.74, "learning_rate": 3.6446397971470916e-06, "loss": 1.764, "step": 32040500 }, { "epoch": 92.75, "learning_rate": 3.6439161494998146e-06, "loss": 1.7974, "step": 32041000 }, { "epoch": 92.75, "learning_rate": 3.6431939491478327e-06, "loss": 1.7731, "step": 32041500 }, { "epoch": 92.75, "learning_rate": 3.642470301500556e-06, "loss": 1.7703, "step": 32042000 }, { "epoch": 92.75, "learning_rate": 3.6417466538532793e-06, "loss": 1.7549, "step": 32042500 }, { "epoch": 92.75, "learning_rate": 3.641023006206003e-06, "loss": 1.7765, "step": 32043000 }, { "epoch": 92.75, "learning_rate": 3.6402993585587255e-06, "loss": 1.7747, "step": 32043500 }, { "epoch": 92.75, "learning_rate": 3.639577158206743e-06, "loss": 1.7924, "step": 32044000 }, { "epoch": 92.76, "learning_rate": 3.6388535105594667e-06, "loss": 1.7658, "step": 32044500 }, { "epoch": 92.76, "learning_rate": 3.63812986291219e-06, "loss": 1.7727, "step": 32045000 }, { "epoch": 92.76, "learning_rate": 3.637406215264913e-06, "loss": 1.7906, "step": 32045500 }, { "epoch": 92.76, "learning_rate": 3.6366825676176363e-06, "loss": 1.7705, "step": 32046000 }, { "epoch": 92.76, "learning_rate": 3.6359589199703594e-06, "loss": 1.775, "step": 32046500 }, { "epoch": 92.76, "learning_rate": 3.635235272323083e-06, "loss": 1.7783, "step": 32047000 }, { "epoch": 92.76, "learning_rate": 3.6345116246758056e-06, "loss": 1.7647, "step": 32047500 }, { "epoch": 92.77, "learning_rate": 3.633789424323824e-06, "loss": 1.786, "step": 32048000 }, { "epoch": 92.77, "learning_rate": 3.6330657766765468e-06, "loss": 1.8033, "step": 32048500 }, { "epoch": 92.77, "learning_rate": 3.6323421290292703e-06, "loss": 1.7554, "step": 32049000 }, { "epoch": 92.77, "learning_rate": 3.6316184813819938e-06, "loss": 1.788, "step": 32049500 }, { "epoch": 92.77, "learning_rate": 3.630894833734717e-06, "loss": 1.7604, "step": 32050000 }, { "epoch": 92.77, "learning_rate": 3.630172633382734e-06, "loss": 1.7639, "step": 32050500 }, { "epoch": 92.77, "learning_rate": 3.6294489857354576e-06, "loss": 1.7711, "step": 32051000 }, { "epoch": 92.78, "learning_rate": 3.628725338088181e-06, "loss": 1.7712, "step": 32051500 }, { "epoch": 92.78, "learning_rate": 3.628001690440904e-06, "loss": 1.79, "step": 32052000 }, { "epoch": 92.78, "learning_rate": 3.6272780427936277e-06, "loss": 1.7897, "step": 32052500 }, { "epoch": 92.78, "learning_rate": 3.6265558424416454e-06, "loss": 1.7733, "step": 32053000 }, { "epoch": 92.78, "learning_rate": 3.625832194794369e-06, "loss": 1.7718, "step": 32053500 }, { "epoch": 92.78, "learning_rate": 3.6251085471470915e-06, "loss": 1.7621, "step": 32054000 }, { "epoch": 92.78, "learning_rate": 3.624384899499815e-06, "loss": 1.8032, "step": 32054500 }, { "epoch": 92.79, "learning_rate": 3.6236626991478327e-06, "loss": 1.7702, "step": 32055000 }, { "epoch": 92.79, "learning_rate": 3.622939051500556e-06, "loss": 1.7734, "step": 32055500 }, { "epoch": 92.79, "learning_rate": 3.622215403853279e-06, "loss": 1.7538, "step": 32056000 }, { "epoch": 92.79, "learning_rate": 3.6214932035012974e-06, "loss": 1.8012, "step": 32056500 }, { "epoch": 92.79, "learning_rate": 3.62076955585402e-06, "loss": 1.7739, "step": 32057000 }, { "epoch": 92.79, "learning_rate": 3.6200459082067435e-06, "loss": 1.7914, "step": 32057500 }, { "epoch": 92.79, "learning_rate": 3.619322260559466e-06, "loss": 1.7885, "step": 32058000 }, { "epoch": 92.8, "learning_rate": 3.6185986129121897e-06, "loss": 1.7678, "step": 32058500 }, { "epoch": 92.8, "learning_rate": 3.617874965264913e-06, "loss": 1.7959, "step": 32059000 }, { "epoch": 92.8, "learning_rate": 3.6171513176176363e-06, "loss": 1.7622, "step": 32059500 }, { "epoch": 92.8, "learning_rate": 3.6164276699703598e-06, "loss": 1.7757, "step": 32060000 }, { "epoch": 92.8, "learning_rate": 3.6157040223230824e-06, "loss": 1.7716, "step": 32060500 }, { "epoch": 92.8, "learning_rate": 3.614981821971101e-06, "loss": 1.7628, "step": 32061000 }, { "epoch": 92.8, "learning_rate": 3.6142581743238236e-06, "loss": 1.7528, "step": 32061500 }, { "epoch": 92.81, "learning_rate": 3.613534526676547e-06, "loss": 1.7703, "step": 32062000 }, { "epoch": 92.81, "learning_rate": 3.61281087902927e-06, "loss": 1.7634, "step": 32062500 }, { "epoch": 92.81, "learning_rate": 3.6120872313819937e-06, "loss": 1.7712, "step": 32063000 }, { "epoch": 92.81, "learning_rate": 3.6113635837347172e-06, "loss": 1.7828, "step": 32063500 }, { "epoch": 92.81, "learning_rate": 3.61063993608744e-06, "loss": 1.7978, "step": 32064000 }, { "epoch": 92.81, "learning_rate": 3.6099177357354575e-06, "loss": 1.7777, "step": 32064500 }, { "epoch": 92.82, "learning_rate": 3.609194088088181e-06, "loss": 1.7716, "step": 32065000 }, { "epoch": 92.82, "learning_rate": 3.6084704404409046e-06, "loss": 1.7762, "step": 32065500 }, { "epoch": 92.82, "learning_rate": 3.607746792793627e-06, "loss": 1.7715, "step": 32066000 }, { "epoch": 92.82, "learning_rate": 3.6070231451463507e-06, "loss": 1.784, "step": 32066500 }, { "epoch": 92.82, "learning_rate": 3.6063009447943684e-06, "loss": 1.7685, "step": 32067000 }, { "epoch": 92.82, "learning_rate": 3.605577297147092e-06, "loss": 1.7768, "step": 32067500 }, { "epoch": 92.82, "learning_rate": 3.6048550967951096e-06, "loss": 1.7755, "step": 32068000 }, { "epoch": 92.83, "learning_rate": 3.604131449147833e-06, "loss": 1.7984, "step": 32068500 }, { "epoch": 92.83, "learning_rate": 3.6034078015005557e-06, "loss": 1.8005, "step": 32069000 }, { "epoch": 92.83, "learning_rate": 3.6026841538532792e-06, "loss": 1.7651, "step": 32069500 }, { "epoch": 92.83, "learning_rate": 3.6019605062060023e-06, "loss": 1.7886, "step": 32070000 }, { "epoch": 92.83, "learning_rate": 3.6012383058540204e-06, "loss": 1.8197, "step": 32070500 }, { "epoch": 92.83, "learning_rate": 3.600514658206743e-06, "loss": 1.7688, "step": 32071000 }, { "epoch": 92.83, "learning_rate": 3.5997910105594666e-06, "loss": 1.757, "step": 32071500 }, { "epoch": 92.84, "learning_rate": 3.5990673629121896e-06, "loss": 1.784, "step": 32072000 }, { "epoch": 92.84, "learning_rate": 3.598343715264913e-06, "loss": 1.8017, "step": 32072500 }, { "epoch": 92.84, "learning_rate": 3.5976200676176367e-06, "loss": 1.764, "step": 32073000 }, { "epoch": 92.84, "learning_rate": 3.5968964199703593e-06, "loss": 1.779, "step": 32073500 }, { "epoch": 92.84, "learning_rate": 3.596172772323083e-06, "loss": 1.7744, "step": 32074000 }, { "epoch": 92.84, "learning_rate": 3.5954505719711005e-06, "loss": 1.781, "step": 32074500 }, { "epoch": 92.84, "learning_rate": 3.594726924323824e-06, "loss": 1.7728, "step": 32075000 }, { "epoch": 92.85, "learning_rate": 3.594003276676547e-06, "loss": 1.7899, "step": 32075500 }, { "epoch": 92.85, "learning_rate": 3.5932796290292706e-06, "loss": 1.7678, "step": 32076000 }, { "epoch": 92.85, "learning_rate": 3.592557428677288e-06, "loss": 1.7582, "step": 32076500 }, { "epoch": 92.85, "learning_rate": 3.5918337810300113e-06, "loss": 1.7599, "step": 32077000 }, { "epoch": 92.85, "learning_rate": 3.5911101333827344e-06, "loss": 1.7797, "step": 32077500 }, { "epoch": 92.85, "learning_rate": 3.590386485735458e-06, "loss": 1.7813, "step": 32078000 }, { "epoch": 92.85, "learning_rate": 3.5896628380881806e-06, "loss": 1.7532, "step": 32078500 }, { "epoch": 92.86, "learning_rate": 3.588940637736199e-06, "loss": 1.7647, "step": 32079000 }, { "epoch": 92.86, "learning_rate": 3.5882169900889217e-06, "loss": 1.7886, "step": 32079500 }, { "epoch": 92.86, "learning_rate": 3.5874933424416453e-06, "loss": 1.7581, "step": 32080000 }, { "epoch": 92.86, "learning_rate": 3.586769694794368e-06, "loss": 1.7706, "step": 32080500 }, { "epoch": 92.86, "learning_rate": 3.586046047147092e-06, "loss": 1.7578, "step": 32081000 }, { "epoch": 92.86, "learning_rate": 3.5853223994998153e-06, "loss": 1.7915, "step": 32081500 }, { "epoch": 92.86, "learning_rate": 3.584598751852538e-06, "loss": 1.7543, "step": 32082000 }, { "epoch": 92.87, "learning_rate": 3.583876551500556e-06, "loss": 1.7735, "step": 32082500 }, { "epoch": 92.87, "learning_rate": 3.583152903853279e-06, "loss": 1.7543, "step": 32083000 }, { "epoch": 92.87, "learning_rate": 3.5824307035012964e-06, "loss": 1.7676, "step": 32083500 }, { "epoch": 92.87, "learning_rate": 3.58170705585402e-06, "loss": 1.7784, "step": 32084000 }, { "epoch": 92.87, "learning_rate": 3.580983408206744e-06, "loss": 1.7879, "step": 32084500 }, { "epoch": 92.87, "learning_rate": 3.5802597605594665e-06, "loss": 1.7893, "step": 32085000 }, { "epoch": 92.87, "learning_rate": 3.57953611291219e-06, "loss": 1.7725, "step": 32085500 }, { "epoch": 92.88, "learning_rate": 3.5788124652649127e-06, "loss": 1.7686, "step": 32086000 }, { "epoch": 92.88, "learning_rate": 3.578088817617636e-06, "loss": 1.7823, "step": 32086500 }, { "epoch": 92.88, "learning_rate": 3.5773651699703597e-06, "loss": 1.7811, "step": 32087000 }, { "epoch": 92.88, "learning_rate": 3.5766429696183774e-06, "loss": 1.7889, "step": 32087500 }, { "epoch": 92.88, "learning_rate": 3.575920769266395e-06, "loss": 1.7751, "step": 32088000 }, { "epoch": 92.88, "learning_rate": 3.5751971216191185e-06, "loss": 1.7505, "step": 32088500 }, { "epoch": 92.88, "learning_rate": 3.574474921267136e-06, "loss": 1.7647, "step": 32089000 }, { "epoch": 92.89, "learning_rate": 3.5737512736198597e-06, "loss": 1.775, "step": 32089500 }, { "epoch": 92.89, "learning_rate": 3.5730276259725824e-06, "loss": 1.7702, "step": 32090000 }, { "epoch": 92.89, "learning_rate": 3.572303978325306e-06, "loss": 1.7834, "step": 32090500 }, { "epoch": 92.89, "learning_rate": 3.571580330678029e-06, "loss": 1.7728, "step": 32091000 }, { "epoch": 92.89, "learning_rate": 3.570858130326047e-06, "loss": 1.7419, "step": 32091500 }, { "epoch": 92.89, "learning_rate": 3.5701344826787697e-06, "loss": 1.7758, "step": 32092000 }, { "epoch": 92.89, "learning_rate": 3.569410835031493e-06, "loss": 1.7897, "step": 32092500 }, { "epoch": 92.9, "learning_rate": 3.5686871873842163e-06, "loss": 1.7685, "step": 32093000 }, { "epoch": 92.9, "learning_rate": 3.56796353973694e-06, "loss": 1.7747, "step": 32093500 }, { "epoch": 92.9, "learning_rate": 3.5672398920896633e-06, "loss": 1.7791, "step": 32094000 }, { "epoch": 92.9, "learning_rate": 3.566516244442386e-06, "loss": 1.765, "step": 32094500 }, { "epoch": 92.9, "learning_rate": 3.5657925967951095e-06, "loss": 1.7858, "step": 32095000 }, { "epoch": 92.9, "learning_rate": 3.5650689491478325e-06, "loss": 1.7731, "step": 32095500 }, { "epoch": 92.9, "learning_rate": 3.564345301500556e-06, "loss": 1.7498, "step": 32096000 }, { "epoch": 92.91, "learning_rate": 3.5636216538532795e-06, "loss": 1.7634, "step": 32096500 }, { "epoch": 92.91, "learning_rate": 3.562898006206002e-06, "loss": 1.7866, "step": 32097000 }, { "epoch": 92.91, "learning_rate": 3.5621743585587257e-06, "loss": 1.7829, "step": 32097500 }, { "epoch": 92.91, "learning_rate": 3.561450710911449e-06, "loss": 1.7846, "step": 32098000 }, { "epoch": 92.91, "learning_rate": 3.5607270632641723e-06, "loss": 1.7972, "step": 32098500 }, { "epoch": 92.91, "learning_rate": 3.560003415616895e-06, "loss": 1.7561, "step": 32099000 }, { "epoch": 92.91, "learning_rate": 3.5592797679696185e-06, "loss": 1.7992, "step": 32099500 }, { "epoch": 92.92, "learning_rate": 3.558557567617636e-06, "loss": 1.7587, "step": 32100000 }, { "epoch": 92.92, "learning_rate": 3.5578339199703596e-06, "loss": 1.782, "step": 32100500 }, { "epoch": 92.92, "learning_rate": 3.557110272323083e-06, "loss": 1.7838, "step": 32101000 }, { "epoch": 92.92, "learning_rate": 3.556386624675806e-06, "loss": 1.7716, "step": 32101500 }, { "epoch": 92.92, "learning_rate": 3.5556644243238235e-06, "loss": 1.7713, "step": 32102000 }, { "epoch": 92.92, "learning_rate": 3.554940776676547e-06, "loss": 1.7684, "step": 32102500 }, { "epoch": 92.93, "learning_rate": 3.5542171290292705e-06, "loss": 1.7669, "step": 32103000 }, { "epoch": 92.93, "learning_rate": 3.5534934813819936e-06, "loss": 1.7678, "step": 32103500 }, { "epoch": 92.93, "learning_rate": 3.552769833734717e-06, "loss": 1.7901, "step": 32104000 }, { "epoch": 92.93, "learning_rate": 3.5520461860874397e-06, "loss": 1.7466, "step": 32104500 }, { "epoch": 92.93, "learning_rate": 3.551323985735458e-06, "loss": 1.7778, "step": 32105000 }, { "epoch": 92.93, "learning_rate": 3.550600338088181e-06, "loss": 1.7836, "step": 32105500 }, { "epoch": 92.93, "learning_rate": 3.5498766904409044e-06, "loss": 1.784, "step": 32106000 }, { "epoch": 92.94, "learning_rate": 3.549154490088922e-06, "loss": 1.7879, "step": 32106500 }, { "epoch": 92.94, "learning_rate": 3.5484308424416456e-06, "loss": 1.7701, "step": 32107000 }, { "epoch": 92.94, "learning_rate": 3.5477071947943682e-06, "loss": 1.7682, "step": 32107500 }, { "epoch": 92.94, "learning_rate": 3.5469849944423863e-06, "loss": 1.7861, "step": 32108000 }, { "epoch": 92.94, "learning_rate": 3.5462613467951094e-06, "loss": 1.7756, "step": 32108500 }, { "epoch": 92.94, "learning_rate": 3.545537699147833e-06, "loss": 1.7777, "step": 32109000 }, { "epoch": 92.94, "learning_rate": 3.5448140515005556e-06, "loss": 1.7561, "step": 32109500 }, { "epoch": 92.95, "learning_rate": 3.544091851148574e-06, "loss": 1.8059, "step": 32110000 }, { "epoch": 92.95, "learning_rate": 3.5433682035012967e-06, "loss": 1.7925, "step": 32110500 }, { "epoch": 92.95, "learning_rate": 3.5426445558540203e-06, "loss": 1.773, "step": 32111000 }, { "epoch": 92.95, "learning_rate": 3.541920908206743e-06, "loss": 1.7668, "step": 32111500 }, { "epoch": 92.95, "learning_rate": 3.541197260559467e-06, "loss": 1.7761, "step": 32112000 }, { "epoch": 92.95, "learning_rate": 3.5404736129121903e-06, "loss": 1.7826, "step": 32112500 }, { "epoch": 92.95, "learning_rate": 3.539749965264913e-06, "loss": 1.7907, "step": 32113000 }, { "epoch": 92.96, "learning_rate": 3.5390263176176365e-06, "loss": 1.7522, "step": 32113500 }, { "epoch": 92.96, "learning_rate": 3.538302669970359e-06, "loss": 1.769, "step": 32114000 }, { "epoch": 92.96, "learning_rate": 3.5375790223230827e-06, "loss": 1.7625, "step": 32114500 }, { "epoch": 92.96, "learning_rate": 3.5368553746758057e-06, "loss": 1.7607, "step": 32115000 }, { "epoch": 92.96, "learning_rate": 3.5361317270285292e-06, "loss": 1.7522, "step": 32115500 }, { "epoch": 92.96, "learning_rate": 3.5354080793812527e-06, "loss": 1.8123, "step": 32116000 }, { "epoch": 92.96, "learning_rate": 3.5346858790292704e-06, "loss": 1.7778, "step": 32116500 }, { "epoch": 92.97, "learning_rate": 3.533962231381994e-06, "loss": 1.7942, "step": 32117000 }, { "epoch": 92.97, "learning_rate": 3.5332385837347166e-06, "loss": 1.7577, "step": 32117500 }, { "epoch": 92.97, "learning_rate": 3.53251493608744e-06, "loss": 1.7679, "step": 32118000 }, { "epoch": 92.97, "learning_rate": 3.531791288440163e-06, "loss": 1.7619, "step": 32118500 }, { "epoch": 92.97, "learning_rate": 3.5310676407928867e-06, "loss": 1.7514, "step": 32119000 }, { "epoch": 92.97, "learning_rate": 3.5303439931456093e-06, "loss": 1.7649, "step": 32119500 }, { "epoch": 92.97, "learning_rate": 3.529620345498333e-06, "loss": 1.7684, "step": 32120000 }, { "epoch": 92.98, "learning_rate": 3.5288966978510563e-06, "loss": 1.7434, "step": 32120500 }, { "epoch": 92.98, "learning_rate": 3.5281730502037794e-06, "loss": 1.7863, "step": 32121000 }, { "epoch": 92.98, "learning_rate": 3.5274508498517975e-06, "loss": 1.7653, "step": 32121500 }, { "epoch": 92.98, "learning_rate": 3.52672720220452e-06, "loss": 1.7639, "step": 32122000 }, { "epoch": 92.98, "learning_rate": 3.5260035545572437e-06, "loss": 1.7587, "step": 32122500 }, { "epoch": 92.98, "learning_rate": 3.5252799069099668e-06, "loss": 1.7557, "step": 32123000 }, { "epoch": 92.98, "learning_rate": 3.5245562592626903e-06, "loss": 1.7641, "step": 32123500 }, { "epoch": 92.99, "learning_rate": 3.523832611615413e-06, "loss": 1.7649, "step": 32124000 }, { "epoch": 92.99, "learning_rate": 3.5231089639681364e-06, "loss": 1.7617, "step": 32124500 }, { "epoch": 92.99, "learning_rate": 3.52238531632086e-06, "loss": 1.7858, "step": 32125000 }, { "epoch": 92.99, "learning_rate": 3.5216631159688776e-06, "loss": 1.7771, "step": 32125500 }, { "epoch": 92.99, "learning_rate": 3.5209394683216003e-06, "loss": 1.7751, "step": 32126000 }, { "epoch": 92.99, "learning_rate": 3.5202172679696188e-06, "loss": 1.7712, "step": 32126500 }, { "epoch": 92.99, "learning_rate": 3.5194936203223414e-06, "loss": 1.7871, "step": 32127000 }, { "epoch": 93.0, "learning_rate": 3.518769972675065e-06, "loss": 1.7752, "step": 32127500 }, { "epoch": 93.0, "learning_rate": 3.5180463250277884e-06, "loss": 1.7816, "step": 32128000 }, { "epoch": 93.0, "learning_rate": 3.5173226773805115e-06, "loss": 1.7811, "step": 32128500 }, { "epoch": 93.0, "eval_accuracy": 0.6918156764263562, "eval_accuracy_mlm": 0.6613593936853893, "eval_accuracy_nsp": 0.8553436636372386, "eval_loss": 2.2011306285858154, "eval_runtime": 331.7916, "eval_samples_per_second": 1315.241, "eval_steps_per_second": 54.802, "step": 32128896 }, { "epoch": 93.0, "learning_rate": 3.516599029733235e-06, "loss": 1.7905, "step": 32129000 }, { "epoch": 93.0, "learning_rate": 3.5158753820859577e-06, "loss": 1.7911, "step": 32129500 }, { "epoch": 93.0, "learning_rate": 3.515151734438681e-06, "loss": 1.7695, "step": 32130000 }, { "epoch": 93.0, "learning_rate": 3.514429534086699e-06, "loss": 1.7532, "step": 32130500 }, { "epoch": 93.01, "learning_rate": 3.5137058864394224e-06, "loss": 1.7725, "step": 32131000 }, { "epoch": 93.01, "learning_rate": 3.512982238792145e-06, "loss": 1.7657, "step": 32131500 }, { "epoch": 93.01, "learning_rate": 3.5122600384401635e-06, "loss": 1.7716, "step": 32132000 }, { "epoch": 93.01, "learning_rate": 3.511536390792886e-06, "loss": 1.7464, "step": 32132500 }, { "epoch": 93.01, "learning_rate": 3.5108127431456097e-06, "loss": 1.7564, "step": 32133000 }, { "epoch": 93.01, "learning_rate": 3.5100890954983328e-06, "loss": 1.7777, "step": 32133500 }, { "epoch": 93.01, "learning_rate": 3.5093654478510563e-06, "loss": 1.7677, "step": 32134000 }, { "epoch": 93.02, "learning_rate": 3.5086432474990735e-06, "loss": 1.7675, "step": 32134500 }, { "epoch": 93.02, "learning_rate": 3.507919599851797e-06, "loss": 1.768, "step": 32135000 }, { "epoch": 93.02, "learning_rate": 3.50719595220452e-06, "loss": 1.7747, "step": 32135500 }, { "epoch": 93.02, "learning_rate": 3.5064723045572436e-06, "loss": 1.7666, "step": 32136000 }, { "epoch": 93.02, "learning_rate": 3.505748656909967e-06, "loss": 1.7683, "step": 32136500 }, { "epoch": 93.02, "learning_rate": 3.505026456557985e-06, "loss": 1.788, "step": 32137000 }, { "epoch": 93.02, "learning_rate": 3.5043028089107083e-06, "loss": 1.7528, "step": 32137500 }, { "epoch": 93.03, "learning_rate": 3.503579161263431e-06, "loss": 1.7937, "step": 32138000 }, { "epoch": 93.03, "learning_rate": 3.5028555136161545e-06, "loss": 1.7552, "step": 32138500 }, { "epoch": 93.03, "learning_rate": 3.502131865968877e-06, "loss": 1.7597, "step": 32139000 }, { "epoch": 93.03, "learning_rate": 3.5014096656168956e-06, "loss": 1.7638, "step": 32139500 }, { "epoch": 93.03, "learning_rate": 3.5006860179696183e-06, "loss": 1.7822, "step": 32140000 }, { "epoch": 93.03, "learning_rate": 3.499962370322342e-06, "loss": 1.7662, "step": 32140500 }, { "epoch": 93.04, "learning_rate": 3.499238722675065e-06, "loss": 1.7957, "step": 32141000 }, { "epoch": 93.04, "learning_rate": 3.498516522323083e-06, "loss": 1.7679, "step": 32141500 }, { "epoch": 93.04, "learning_rate": 3.4977928746758056e-06, "loss": 1.7644, "step": 32142000 }, { "epoch": 93.04, "learning_rate": 3.497069227028529e-06, "loss": 1.7723, "step": 32142500 }, { "epoch": 93.04, "learning_rate": 3.4963455793812522e-06, "loss": 1.7354, "step": 32143000 }, { "epoch": 93.04, "learning_rate": 3.4956219317339757e-06, "loss": 1.7951, "step": 32143500 }, { "epoch": 93.04, "learning_rate": 3.4948982840866992e-06, "loss": 1.7702, "step": 32144000 }, { "epoch": 93.05, "learning_rate": 3.494176083734717e-06, "loss": 1.7844, "step": 32144500 }, { "epoch": 93.05, "learning_rate": 3.4934524360874404e-06, "loss": 1.7702, "step": 32145000 }, { "epoch": 93.05, "learning_rate": 3.492728788440163e-06, "loss": 1.7538, "step": 32145500 }, { "epoch": 93.05, "learning_rate": 3.4920051407928866e-06, "loss": 1.7655, "step": 32146000 }, { "epoch": 93.05, "learning_rate": 3.4912814931456097e-06, "loss": 1.7743, "step": 32146500 }, { "epoch": 93.05, "learning_rate": 3.490560740088922e-06, "loss": 1.7625, "step": 32147000 }, { "epoch": 93.05, "learning_rate": 3.4898370924416454e-06, "loss": 1.7556, "step": 32147500 }, { "epoch": 93.06, "learning_rate": 3.489113444794368e-06, "loss": 1.7885, "step": 32148000 }, { "epoch": 93.06, "learning_rate": 3.4883897971470916e-06, "loss": 1.7674, "step": 32148500 }, { "epoch": 93.06, "learning_rate": 3.487666149499815e-06, "loss": 1.7646, "step": 32149000 }, { "epoch": 93.06, "learning_rate": 3.4869439491478328e-06, "loss": 1.7668, "step": 32149500 }, { "epoch": 93.06, "learning_rate": 3.4862203015005563e-06, "loss": 1.7882, "step": 32150000 }, { "epoch": 93.06, "learning_rate": 3.485496653853279e-06, "loss": 1.7773, "step": 32150500 }, { "epoch": 93.06, "learning_rate": 3.4847730062060024e-06, "loss": 1.7552, "step": 32151000 }, { "epoch": 93.07, "learning_rate": 3.4840493585587255e-06, "loss": 1.7524, "step": 32151500 }, { "epoch": 93.07, "learning_rate": 3.483325710911449e-06, "loss": 1.7848, "step": 32152000 }, { "epoch": 93.07, "learning_rate": 3.4826020632641717e-06, "loss": 1.7689, "step": 32152500 }, { "epoch": 93.07, "learning_rate": 3.481878415616895e-06, "loss": 1.7716, "step": 32153000 }, { "epoch": 93.07, "learning_rate": 3.4811547679696187e-06, "loss": 1.7692, "step": 32153500 }, { "epoch": 93.07, "learning_rate": 3.4804311203223418e-06, "loss": 1.7848, "step": 32154000 }, { "epoch": 93.07, "learning_rate": 3.4797074726750653e-06, "loss": 1.7721, "step": 32154500 }, { "epoch": 93.08, "learning_rate": 3.478983825027788e-06, "loss": 1.8005, "step": 32155000 }, { "epoch": 93.08, "learning_rate": 3.4782616246758064e-06, "loss": 1.7669, "step": 32155500 }, { "epoch": 93.08, "learning_rate": 3.4775394243238237e-06, "loss": 1.8124, "step": 32156000 }, { "epoch": 93.08, "learning_rate": 3.476815776676547e-06, "loss": 1.7858, "step": 32156500 }, { "epoch": 93.08, "learning_rate": 3.4760921290292703e-06, "loss": 1.7674, "step": 32157000 }, { "epoch": 93.08, "learning_rate": 3.4753684813819938e-06, "loss": 1.7704, "step": 32157500 }, { "epoch": 93.08, "learning_rate": 3.4746448337347164e-06, "loss": 1.7858, "step": 32158000 }, { "epoch": 93.09, "learning_rate": 3.47392118608744e-06, "loss": 1.7671, "step": 32158500 }, { "epoch": 93.09, "learning_rate": 3.4731989857354576e-06, "loss": 1.782, "step": 32159000 }, { "epoch": 93.09, "learning_rate": 3.472475338088181e-06, "loss": 1.7705, "step": 32159500 }, { "epoch": 93.09, "learning_rate": 3.4717516904409038e-06, "loss": 1.7498, "step": 32160000 }, { "epoch": 93.09, "learning_rate": 3.4710280427936273e-06, "loss": 1.7903, "step": 32160500 }, { "epoch": 93.09, "learning_rate": 3.470305842441645e-06, "loss": 1.7855, "step": 32161000 }, { "epoch": 93.09, "learning_rate": 3.4695821947943685e-06, "loss": 1.7577, "step": 32161500 }, { "epoch": 93.1, "learning_rate": 3.4688585471470915e-06, "loss": 1.7556, "step": 32162000 }, { "epoch": 93.1, "learning_rate": 3.468134899499815e-06, "loss": 1.7889, "step": 32162500 }, { "epoch": 93.1, "learning_rate": 3.4674112518525385e-06, "loss": 1.7758, "step": 32163000 }, { "epoch": 93.1, "learning_rate": 3.466687604205261e-06, "loss": 1.7717, "step": 32163500 }, { "epoch": 93.1, "learning_rate": 3.4659639565579847e-06, "loss": 1.7547, "step": 32164000 }, { "epoch": 93.1, "learning_rate": 3.4652417562060024e-06, "loss": 1.7651, "step": 32164500 }, { "epoch": 93.1, "learning_rate": 3.464518108558726e-06, "loss": 1.7805, "step": 32165000 }, { "epoch": 93.11, "learning_rate": 3.4637944609114485e-06, "loss": 1.7486, "step": 32165500 }, { "epoch": 93.11, "learning_rate": 3.463070813264172e-06, "loss": 1.7922, "step": 32166000 }, { "epoch": 93.11, "learning_rate": 3.462347165616895e-06, "loss": 1.7846, "step": 32166500 }, { "epoch": 93.11, "learning_rate": 3.4616249652649132e-06, "loss": 1.7606, "step": 32167000 }, { "epoch": 93.11, "learning_rate": 3.4609013176176363e-06, "loss": 1.7672, "step": 32167500 }, { "epoch": 93.11, "learning_rate": 3.4601791172656544e-06, "loss": 1.7759, "step": 32168000 }, { "epoch": 93.11, "learning_rate": 3.459456916913672e-06, "loss": 1.7727, "step": 32168500 }, { "epoch": 93.12, "learning_rate": 3.4587332692663956e-06, "loss": 1.797, "step": 32169000 }, { "epoch": 93.12, "learning_rate": 3.4580096216191182e-06, "loss": 1.7592, "step": 32169500 }, { "epoch": 93.12, "learning_rate": 3.4572859739718417e-06, "loss": 1.7682, "step": 32170000 }, { "epoch": 93.12, "learning_rate": 3.4565623263245644e-06, "loss": 1.7587, "step": 32170500 }, { "epoch": 93.12, "learning_rate": 3.4558386786772883e-06, "loss": 1.7504, "step": 32171000 }, { "epoch": 93.12, "learning_rate": 3.455115031030011e-06, "loss": 1.769, "step": 32171500 }, { "epoch": 93.12, "learning_rate": 3.4543913833827345e-06, "loss": 1.7529, "step": 32172000 }, { "epoch": 93.13, "learning_rate": 3.453667735735458e-06, "loss": 1.7735, "step": 32172500 }, { "epoch": 93.13, "learning_rate": 3.4529440880881806e-06, "loss": 1.7723, "step": 32173000 }, { "epoch": 93.13, "learning_rate": 3.452221887736199e-06, "loss": 1.7859, "step": 32173500 }, { "epoch": 93.13, "learning_rate": 3.451498240088922e-06, "loss": 1.7777, "step": 32174000 }, { "epoch": 93.13, "learning_rate": 3.4507745924416453e-06, "loss": 1.784, "step": 32174500 }, { "epoch": 93.13, "learning_rate": 3.4500509447943684e-06, "loss": 1.7686, "step": 32175000 }, { "epoch": 93.13, "learning_rate": 3.449327297147092e-06, "loss": 1.784, "step": 32175500 }, { "epoch": 93.14, "learning_rate": 3.4486036494998146e-06, "loss": 1.766, "step": 32176000 }, { "epoch": 93.14, "learning_rate": 3.4478814491478327e-06, "loss": 1.7949, "step": 32176500 }, { "epoch": 93.14, "learning_rate": 3.4471578015005557e-06, "loss": 1.777, "step": 32177000 }, { "epoch": 93.14, "learning_rate": 3.4464341538532792e-06, "loss": 1.7567, "step": 32177500 }, { "epoch": 93.14, "learning_rate": 3.4457105062060027e-06, "loss": 1.772, "step": 32178000 }, { "epoch": 93.14, "learning_rate": 3.4449868585587254e-06, "loss": 1.7538, "step": 32178500 }, { "epoch": 93.15, "learning_rate": 3.444263210911449e-06, "loss": 1.7868, "step": 32179000 }, { "epoch": 93.15, "learning_rate": 3.443539563264172e-06, "loss": 1.7834, "step": 32179500 }, { "epoch": 93.15, "learning_rate": 3.4428159156168955e-06, "loss": 1.7558, "step": 32180000 }, { "epoch": 93.15, "learning_rate": 3.442093715264913e-06, "loss": 1.7743, "step": 32180500 }, { "epoch": 93.15, "learning_rate": 3.4413700676176367e-06, "loss": 1.7863, "step": 32181000 }, { "epoch": 93.15, "learning_rate": 3.4406464199703593e-06, "loss": 1.7739, "step": 32181500 }, { "epoch": 93.15, "learning_rate": 3.439922772323083e-06, "loss": 1.7668, "step": 32182000 }, { "epoch": 93.16, "learning_rate": 3.4391991246758055e-06, "loss": 1.7726, "step": 32182500 }, { "epoch": 93.16, "learning_rate": 3.4384754770285294e-06, "loss": 1.7739, "step": 32183000 }, { "epoch": 93.16, "learning_rate": 3.4377532766765467e-06, "loss": 1.7532, "step": 32183500 }, { "epoch": 93.16, "learning_rate": 3.43702962902927e-06, "loss": 1.7547, "step": 32184000 }, { "epoch": 93.16, "learning_rate": 3.436307428677288e-06, "loss": 1.789, "step": 32184500 }, { "epoch": 93.16, "learning_rate": 3.4355837810300113e-06, "loss": 1.7438, "step": 32185000 }, { "epoch": 93.16, "learning_rate": 3.434860133382734e-06, "loss": 1.7729, "step": 32185500 }, { "epoch": 93.17, "learning_rate": 3.4341364857354575e-06, "loss": 1.7362, "step": 32186000 }, { "epoch": 93.17, "learning_rate": 3.4334128380881814e-06, "loss": 1.7694, "step": 32186500 }, { "epoch": 93.17, "learning_rate": 3.432689190440904e-06, "loss": 1.7911, "step": 32187000 }, { "epoch": 93.17, "learning_rate": 3.4319655427936276e-06, "loss": 1.7558, "step": 32187500 }, { "epoch": 93.17, "learning_rate": 3.4312433424416453e-06, "loss": 1.7717, "step": 32188000 }, { "epoch": 93.17, "learning_rate": 3.4305196947943688e-06, "loss": 1.7598, "step": 32188500 }, { "epoch": 93.17, "learning_rate": 3.4297960471470914e-06, "loss": 1.7743, "step": 32189000 }, { "epoch": 93.18, "learning_rate": 3.429072399499815e-06, "loss": 1.7687, "step": 32189500 }, { "epoch": 93.18, "learning_rate": 3.428348751852538e-06, "loss": 1.7612, "step": 32190000 }, { "epoch": 93.18, "learning_rate": 3.427626551500556e-06, "loss": 1.7831, "step": 32190500 }, { "epoch": 93.18, "learning_rate": 3.4269029038532788e-06, "loss": 1.7835, "step": 32191000 }, { "epoch": 93.18, "learning_rate": 3.4261792562060023e-06, "loss": 1.752, "step": 32191500 }, { "epoch": 93.18, "learning_rate": 3.4254556085587258e-06, "loss": 1.7601, "step": 32192000 }, { "epoch": 93.18, "learning_rate": 3.424731960911449e-06, "loss": 1.76, "step": 32192500 }, { "epoch": 93.19, "learning_rate": 3.4240083132641724e-06, "loss": 1.7543, "step": 32193000 }, { "epoch": 93.19, "learning_rate": 3.423284665616895e-06, "loss": 1.7659, "step": 32193500 }, { "epoch": 93.19, "learning_rate": 3.4225624652649135e-06, "loss": 1.7669, "step": 32194000 }, { "epoch": 93.19, "learning_rate": 3.421838817617636e-06, "loss": 1.7714, "step": 32194500 }, { "epoch": 93.19, "learning_rate": 3.4211151699703597e-06, "loss": 1.79, "step": 32195000 }, { "epoch": 93.19, "learning_rate": 3.4203915223230828e-06, "loss": 1.7429, "step": 32195500 }, { "epoch": 93.19, "learning_rate": 3.4196678746758063e-06, "loss": 1.7598, "step": 32196000 }, { "epoch": 93.2, "learning_rate": 3.4189456743238235e-06, "loss": 1.761, "step": 32196500 }, { "epoch": 93.2, "learning_rate": 3.418222026676547e-06, "loss": 1.785, "step": 32197000 }, { "epoch": 93.2, "learning_rate": 3.41749837902927e-06, "loss": 1.7386, "step": 32197500 }, { "epoch": 93.2, "learning_rate": 3.4167747313819936e-06, "loss": 1.7753, "step": 32198000 }, { "epoch": 93.2, "learning_rate": 3.416051083734717e-06, "loss": 1.7358, "step": 32198500 }, { "epoch": 93.2, "learning_rate": 3.4153274360874398e-06, "loss": 1.7905, "step": 32199000 }, { "epoch": 93.2, "learning_rate": 3.4146037884401633e-06, "loss": 1.7578, "step": 32199500 }, { "epoch": 93.21, "learning_rate": 3.4138801407928864e-06, "loss": 1.7639, "step": 32200000 }, { "epoch": 93.21, "learning_rate": 3.41315649314561e-06, "loss": 1.7949, "step": 32200500 }, { "epoch": 93.21, "learning_rate": 3.4124328454983325e-06, "loss": 1.763, "step": 32201000 }, { "epoch": 93.21, "learning_rate": 3.411709197851056e-06, "loss": 1.7849, "step": 32201500 }, { "epoch": 93.21, "learning_rate": 3.4109869974990737e-06, "loss": 1.7505, "step": 32202000 }, { "epoch": 93.21, "learning_rate": 3.410263349851797e-06, "loss": 1.7475, "step": 32202500 }, { "epoch": 93.21, "learning_rate": 3.40953970220452e-06, "loss": 1.7827, "step": 32203000 }, { "epoch": 93.22, "learning_rate": 3.4088160545572434e-06, "loss": 1.7672, "step": 32203500 }, { "epoch": 93.22, "learning_rate": 3.408093854205261e-06, "loss": 1.7426, "step": 32204000 }, { "epoch": 93.22, "learning_rate": 3.4073702065579845e-06, "loss": 1.7808, "step": 32204500 }, { "epoch": 93.22, "learning_rate": 3.406646558910708e-06, "loss": 1.809, "step": 32205000 }, { "epoch": 93.22, "learning_rate": 3.405922911263431e-06, "loss": 1.7824, "step": 32205500 }, { "epoch": 93.22, "learning_rate": 3.4051992636161546e-06, "loss": 1.787, "step": 32206000 }, { "epoch": 93.22, "learning_rate": 3.404477063264172e-06, "loss": 1.7617, "step": 32206500 }, { "epoch": 93.23, "learning_rate": 3.4037534156168954e-06, "loss": 1.7637, "step": 32207000 }, { "epoch": 93.23, "learning_rate": 3.4030297679696185e-06, "loss": 1.7796, "step": 32207500 }, { "epoch": 93.23, "learning_rate": 3.402306120322342e-06, "loss": 1.7386, "step": 32208000 }, { "epoch": 93.23, "learning_rate": 3.4015839199703596e-06, "loss": 1.7576, "step": 32208500 }, { "epoch": 93.23, "learning_rate": 3.400860272323083e-06, "loss": 1.8006, "step": 32209000 }, { "epoch": 93.23, "learning_rate": 3.400136624675806e-06, "loss": 1.7741, "step": 32209500 }, { "epoch": 93.23, "learning_rate": 3.3994129770285293e-06, "loss": 1.7759, "step": 32210000 }, { "epoch": 93.24, "learning_rate": 3.398689329381252e-06, "loss": 1.7453, "step": 32210500 }, { "epoch": 93.24, "learning_rate": 3.3979671290292705e-06, "loss": 1.758, "step": 32211000 }, { "epoch": 93.24, "learning_rate": 3.397243481381993e-06, "loss": 1.7796, "step": 32211500 }, { "epoch": 93.24, "learning_rate": 3.3965198337347166e-06, "loss": 1.7654, "step": 32212000 }, { "epoch": 93.24, "learning_rate": 3.39579618608744e-06, "loss": 1.7693, "step": 32212500 }, { "epoch": 93.24, "learning_rate": 3.3950725384401632e-06, "loss": 1.7731, "step": 32213000 }, { "epoch": 93.24, "learning_rate": 3.3943488907928867e-06, "loss": 1.7868, "step": 32213500 }, { "epoch": 93.25, "learning_rate": 3.3936266904409044e-06, "loss": 1.7832, "step": 32214000 }, { "epoch": 93.25, "learning_rate": 3.392903042793628e-06, "loss": 1.7688, "step": 32214500 }, { "epoch": 93.25, "learning_rate": 3.3921793951463506e-06, "loss": 1.7706, "step": 32215000 }, { "epoch": 93.25, "learning_rate": 3.391455747499074e-06, "loss": 1.7587, "step": 32215500 }, { "epoch": 93.25, "learning_rate": 3.3907335471470917e-06, "loss": 1.7876, "step": 32216000 }, { "epoch": 93.25, "learning_rate": 3.3900098994998153e-06, "loss": 1.7862, "step": 32216500 }, { "epoch": 93.26, "learning_rate": 3.389286251852538e-06, "loss": 1.7637, "step": 32217000 }, { "epoch": 93.26, "learning_rate": 3.3885626042052614e-06, "loss": 1.7673, "step": 32217500 }, { "epoch": 93.26, "learning_rate": 3.3878389565579845e-06, "loss": 1.7735, "step": 32218000 }, { "epoch": 93.26, "learning_rate": 3.387115308910708e-06, "loss": 1.7686, "step": 32218500 }, { "epoch": 93.26, "learning_rate": 3.3863916612634315e-06, "loss": 1.7711, "step": 32219000 }, { "epoch": 93.26, "learning_rate": 3.385668013616154e-06, "loss": 1.798, "step": 32219500 }, { "epoch": 93.26, "learning_rate": 3.384945813264172e-06, "loss": 1.781, "step": 32220000 }, { "epoch": 93.27, "learning_rate": 3.3842221656168953e-06, "loss": 1.7626, "step": 32220500 }, { "epoch": 93.27, "learning_rate": 3.383498517969619e-06, "loss": 1.7544, "step": 32221000 }, { "epoch": 93.27, "learning_rate": 3.3827748703223415e-06, "loss": 1.7716, "step": 32221500 }, { "epoch": 93.27, "learning_rate": 3.382051222675065e-06, "loss": 1.7644, "step": 32222000 }, { "epoch": 93.27, "learning_rate": 3.3813290223230827e-06, "loss": 1.7895, "step": 32222500 }, { "epoch": 93.27, "learning_rate": 3.380605374675806e-06, "loss": 1.7508, "step": 32223000 }, { "epoch": 93.27, "learning_rate": 3.3798817270285293e-06, "loss": 1.7579, "step": 32223500 }, { "epoch": 93.28, "learning_rate": 3.3791580793812528e-06, "loss": 1.7675, "step": 32224000 }, { "epoch": 93.28, "learning_rate": 3.3784344317339754e-06, "loss": 1.771, "step": 32224500 }, { "epoch": 93.28, "learning_rate": 3.3777122313819935e-06, "loss": 1.7929, "step": 32225000 }, { "epoch": 93.28, "learning_rate": 3.376990031030011e-06, "loss": 1.77, "step": 32225500 }, { "epoch": 93.28, "learning_rate": 3.3762663833827347e-06, "loss": 1.7343, "step": 32226000 }, { "epoch": 93.28, "learning_rate": 3.3755427357354578e-06, "loss": 1.7951, "step": 32226500 }, { "epoch": 93.28, "learning_rate": 3.3748190880881813e-06, "loss": 1.7748, "step": 32227000 }, { "epoch": 93.29, "learning_rate": 3.374095440440904e-06, "loss": 1.736, "step": 32227500 }, { "epoch": 93.29, "learning_rate": 3.3733717927936274e-06, "loss": 1.7607, "step": 32228000 }, { "epoch": 93.29, "learning_rate": 3.372648145146351e-06, "loss": 1.7651, "step": 32228500 }, { "epoch": 93.29, "learning_rate": 3.3719244974990736e-06, "loss": 1.7721, "step": 32229000 }, { "epoch": 93.29, "learning_rate": 3.3712022971470913e-06, "loss": 1.7892, "step": 32229500 }, { "epoch": 93.29, "learning_rate": 3.3704786494998148e-06, "loss": 1.8006, "step": 32230000 }, { "epoch": 93.29, "learning_rate": 3.3697550018525383e-06, "loss": 1.7442, "step": 32230500 }, { "epoch": 93.3, "learning_rate": 3.369032801500556e-06, "loss": 1.7621, "step": 32231000 }, { "epoch": 93.3, "learning_rate": 3.3683091538532795e-06, "loss": 1.766, "step": 32231500 }, { "epoch": 93.3, "learning_rate": 3.367585506206002e-06, "loss": 1.7854, "step": 32232000 }, { "epoch": 93.3, "learning_rate": 3.3668618585587256e-06, "loss": 1.7621, "step": 32232500 }, { "epoch": 93.3, "learning_rate": 3.3661382109114487e-06, "loss": 1.787, "step": 32233000 }, { "epoch": 93.3, "learning_rate": 3.365414563264172e-06, "loss": 1.7698, "step": 32233500 }, { "epoch": 93.3, "learning_rate": 3.36469236291219e-06, "loss": 1.7735, "step": 32234000 }, { "epoch": 93.31, "learning_rate": 3.3639687152649134e-06, "loss": 1.7692, "step": 32234500 }, { "epoch": 93.31, "learning_rate": 3.363245067617636e-06, "loss": 1.7659, "step": 32235000 }, { "epoch": 93.31, "learning_rate": 3.3625214199703595e-06, "loss": 1.7836, "step": 32235500 }, { "epoch": 93.31, "learning_rate": 3.361797772323083e-06, "loss": 1.7574, "step": 32236000 }, { "epoch": 93.31, "learning_rate": 3.361074124675806e-06, "loss": 1.7737, "step": 32236500 }, { "epoch": 93.31, "learning_rate": 3.3603504770285296e-06, "loss": 1.7616, "step": 32237000 }, { "epoch": 93.31, "learning_rate": 3.3596268293812523e-06, "loss": 1.7735, "step": 32237500 }, { "epoch": 93.32, "learning_rate": 3.3589046290292704e-06, "loss": 1.7848, "step": 32238000 }, { "epoch": 93.32, "learning_rate": 3.3581809813819935e-06, "loss": 1.7703, "step": 32238500 }, { "epoch": 93.32, "learning_rate": 3.357457333734717e-06, "loss": 1.7766, "step": 32239000 }, { "epoch": 93.32, "learning_rate": 3.3567336860874396e-06, "loss": 1.7552, "step": 32239500 }, { "epoch": 93.32, "learning_rate": 3.356010038440163e-06, "loss": 1.764, "step": 32240000 }, { "epoch": 93.32, "learning_rate": 3.355286390792886e-06, "loss": 1.7599, "step": 32240500 }, { "epoch": 93.32, "learning_rate": 3.3545627431456097e-06, "loss": 1.7948, "step": 32241000 }, { "epoch": 93.33, "learning_rate": 3.353840542793627e-06, "loss": 1.7789, "step": 32241500 }, { "epoch": 93.33, "learning_rate": 3.353116895146351e-06, "loss": 1.7822, "step": 32242000 }, { "epoch": 93.33, "learning_rate": 3.3523932474990744e-06, "loss": 1.76, "step": 32242500 }, { "epoch": 93.33, "learning_rate": 3.351669599851797e-06, "loss": 1.782, "step": 32243000 }, { "epoch": 93.33, "learning_rate": 3.3509473994998147e-06, "loss": 1.7632, "step": 32243500 }, { "epoch": 93.33, "learning_rate": 3.3502237518525382e-06, "loss": 1.786, "step": 32244000 }, { "epoch": 93.33, "learning_rate": 3.3495001042052617e-06, "loss": 1.7768, "step": 32244500 }, { "epoch": 93.34, "learning_rate": 3.3487764565579844e-06, "loss": 1.7744, "step": 32245000 }, { "epoch": 93.34, "learning_rate": 3.348054256206003e-06, "loss": 1.7679, "step": 32245500 }, { "epoch": 93.34, "learning_rate": 3.3473306085587256e-06, "loss": 1.7749, "step": 32246000 }, { "epoch": 93.34, "learning_rate": 3.346606960911449e-06, "loss": 1.7821, "step": 32246500 }, { "epoch": 93.34, "learning_rate": 3.3458833132641717e-06, "loss": 1.778, "step": 32247000 }, { "epoch": 93.34, "learning_rate": 3.3451596656168952e-06, "loss": 1.7699, "step": 32247500 }, { "epoch": 93.34, "learning_rate": 3.344437465264913e-06, "loss": 1.7697, "step": 32248000 }, { "epoch": 93.35, "learning_rate": 3.3437138176176364e-06, "loss": 1.7727, "step": 32248500 }, { "epoch": 93.35, "learning_rate": 3.3429901699703595e-06, "loss": 1.7687, "step": 32249000 }, { "epoch": 93.35, "learning_rate": 3.342266522323083e-06, "loss": 1.756, "step": 32249500 }, { "epoch": 93.35, "learning_rate": 3.3415428746758057e-06, "loss": 1.7744, "step": 32250000 }, { "epoch": 93.35, "learning_rate": 3.340819227028529e-06, "loss": 1.7795, "step": 32250500 }, { "epoch": 93.35, "learning_rate": 3.340097026676547e-06, "loss": 1.7523, "step": 32251000 }, { "epoch": 93.35, "learning_rate": 3.3393733790292703e-06, "loss": 1.7762, "step": 32251500 }, { "epoch": 93.36, "learning_rate": 3.338649731381994e-06, "loss": 1.7644, "step": 32252000 }, { "epoch": 93.36, "learning_rate": 3.3379260837347165e-06, "loss": 1.7589, "step": 32252500 }, { "epoch": 93.36, "learning_rate": 3.33720243608744e-06, "loss": 1.7493, "step": 32253000 }, { "epoch": 93.36, "learning_rate": 3.336478788440163e-06, "loss": 1.7938, "step": 32253500 }, { "epoch": 93.36, "learning_rate": 3.335756588088181e-06, "loss": 1.7997, "step": 32254000 }, { "epoch": 93.36, "learning_rate": 3.3350329404409043e-06, "loss": 1.7744, "step": 32254500 }, { "epoch": 93.37, "learning_rate": 3.3343092927936278e-06, "loss": 1.7966, "step": 32255000 }, { "epoch": 93.37, "learning_rate": 3.3335856451463504e-06, "loss": 1.7611, "step": 32255500 }, { "epoch": 93.37, "learning_rate": 3.332861997499074e-06, "loss": 1.7678, "step": 32256000 }, { "epoch": 93.37, "learning_rate": 3.3321383498517974e-06, "loss": 1.7709, "step": 32256500 }, { "epoch": 93.37, "learning_rate": 3.331416149499815e-06, "loss": 1.7803, "step": 32257000 }, { "epoch": 93.37, "learning_rate": 3.3306925018525378e-06, "loss": 1.7737, "step": 32257500 }, { "epoch": 93.37, "learning_rate": 3.3299688542052613e-06, "loss": 1.7569, "step": 32258000 }, { "epoch": 93.38, "learning_rate": 3.3292452065579848e-06, "loss": 1.7632, "step": 32258500 }, { "epoch": 93.38, "learning_rate": 3.328521558910708e-06, "loss": 1.7814, "step": 32259000 }, { "epoch": 93.38, "learning_rate": 3.3277979112634313e-06, "loss": 1.7759, "step": 32259500 }, { "epoch": 93.38, "learning_rate": 3.327074263616154e-06, "loss": 1.7676, "step": 32260000 }, { "epoch": 93.38, "learning_rate": 3.326352063264172e-06, "loss": 1.7574, "step": 32260500 }, { "epoch": 93.38, "learning_rate": 3.325628415616895e-06, "loss": 1.764, "step": 32261000 }, { "epoch": 93.38, "learning_rate": 3.3249047679696187e-06, "loss": 1.7844, "step": 32261500 }, { "epoch": 93.39, "learning_rate": 3.3241811203223413e-06, "loss": 1.7669, "step": 32262000 }, { "epoch": 93.39, "learning_rate": 3.323457472675065e-06, "loss": 1.7866, "step": 32262500 }, { "epoch": 93.39, "learning_rate": 3.3227338250277884e-06, "loss": 1.7409, "step": 32263000 }, { "epoch": 93.39, "learning_rate": 3.3220101773805114e-06, "loss": 1.7568, "step": 32263500 }, { "epoch": 93.39, "learning_rate": 3.321286529733235e-06, "loss": 1.753, "step": 32264000 }, { "epoch": 93.39, "learning_rate": 3.320565776676547e-06, "loss": 1.7571, "step": 32264500 }, { "epoch": 93.39, "learning_rate": 3.31984212902927e-06, "loss": 1.7324, "step": 32265000 }, { "epoch": 93.4, "learning_rate": 3.3191184813819934e-06, "loss": 1.8011, "step": 32265500 }, { "epoch": 93.4, "learning_rate": 3.318394833734717e-06, "loss": 1.779, "step": 32266000 }, { "epoch": 93.4, "learning_rate": 3.31767118608744e-06, "loss": 1.7497, "step": 32266500 }, { "epoch": 93.4, "learning_rate": 3.3169475384401635e-06, "loss": 1.78, "step": 32267000 }, { "epoch": 93.4, "learning_rate": 3.316223890792886e-06, "loss": 1.8095, "step": 32267500 }, { "epoch": 93.4, "learning_rate": 3.3155002431456096e-06, "loss": 1.764, "step": 32268000 }, { "epoch": 93.4, "learning_rate": 3.3147765954983327e-06, "loss": 1.7669, "step": 32268500 }, { "epoch": 93.41, "learning_rate": 3.314054395146351e-06, "loss": 1.7555, "step": 32269000 }, { "epoch": 93.41, "learning_rate": 3.3133307474990734e-06, "loss": 1.7645, "step": 32269500 }, { "epoch": 93.41, "learning_rate": 3.312608547147092e-06, "loss": 1.7654, "step": 32270000 }, { "epoch": 93.41, "learning_rate": 3.3118848994998146e-06, "loss": 1.7845, "step": 32270500 }, { "epoch": 93.41, "learning_rate": 3.311161251852538e-06, "loss": 1.7842, "step": 32271000 }, { "epoch": 93.41, "learning_rate": 3.310437604205261e-06, "loss": 1.7613, "step": 32271500 }, { "epoch": 93.41, "learning_rate": 3.3097139565579847e-06, "loss": 1.7953, "step": 32272000 }, { "epoch": 93.42, "learning_rate": 3.308991756206002e-06, "loss": 1.7764, "step": 32272500 }, { "epoch": 93.42, "learning_rate": 3.308268108558726e-06, "loss": 1.782, "step": 32273000 }, { "epoch": 93.42, "learning_rate": 3.3075444609114485e-06, "loss": 1.7765, "step": 32273500 }, { "epoch": 93.42, "learning_rate": 3.306820813264172e-06, "loss": 1.7609, "step": 32274000 }, { "epoch": 93.42, "learning_rate": 3.3060971656168956e-06, "loss": 1.81, "step": 32274500 }, { "epoch": 93.42, "learning_rate": 3.3053735179696182e-06, "loss": 1.7933, "step": 32275000 }, { "epoch": 93.42, "learning_rate": 3.3046498703223417e-06, "loss": 1.7812, "step": 32275500 }, { "epoch": 93.43, "learning_rate": 3.303926222675065e-06, "loss": 1.7866, "step": 32276000 }, { "epoch": 93.43, "learning_rate": 3.3032025750277883e-06, "loss": 1.7697, "step": 32276500 }, { "epoch": 93.43, "learning_rate": 3.302480374675806e-06, "loss": 1.7761, "step": 32277000 }, { "epoch": 93.43, "learning_rate": 3.301758174323824e-06, "loss": 1.7732, "step": 32277500 }, { "epoch": 93.43, "learning_rate": 3.3010345266765467e-06, "loss": 1.7809, "step": 32278000 }, { "epoch": 93.43, "learning_rate": 3.3003108790292702e-06, "loss": 1.771, "step": 32278500 }, { "epoch": 93.43, "learning_rate": 3.2995872313819933e-06, "loss": 1.7684, "step": 32279000 }, { "epoch": 93.44, "learning_rate": 3.298863583734717e-06, "loss": 1.7751, "step": 32279500 }, { "epoch": 93.44, "learning_rate": 3.2981399360874403e-06, "loss": 1.7651, "step": 32280000 }, { "epoch": 93.44, "learning_rate": 3.297416288440163e-06, "loss": 1.7795, "step": 32280500 }, { "epoch": 93.44, "learning_rate": 3.2966926407928865e-06, "loss": 1.7828, "step": 32281000 }, { "epoch": 93.44, "learning_rate": 3.2959689931456096e-06, "loss": 1.7739, "step": 32281500 }, { "epoch": 93.44, "learning_rate": 3.295245345498333e-06, "loss": 1.7745, "step": 32282000 }, { "epoch": 93.44, "learning_rate": 3.2945216978510557e-06, "loss": 1.763, "step": 32282500 }, { "epoch": 93.45, "learning_rate": 3.2937994974990742e-06, "loss": 1.7968, "step": 32283000 }, { "epoch": 93.45, "learning_rate": 3.293075849851797e-06, "loss": 1.7892, "step": 32283500 }, { "epoch": 93.45, "learning_rate": 3.2923522022045204e-06, "loss": 1.7934, "step": 32284000 }, { "epoch": 93.45, "learning_rate": 3.291628554557243e-06, "loss": 1.7833, "step": 32284500 }, { "epoch": 93.45, "learning_rate": 3.2909063542052616e-06, "loss": 1.7742, "step": 32285000 }, { "epoch": 93.45, "learning_rate": 3.2901827065579842e-06, "loss": 1.7639, "step": 32285500 }, { "epoch": 93.45, "learning_rate": 3.2894590589107077e-06, "loss": 1.7929, "step": 32286000 }, { "epoch": 93.46, "learning_rate": 3.2887354112634312e-06, "loss": 1.7786, "step": 32286500 }, { "epoch": 93.46, "learning_rate": 3.2880117636161543e-06, "loss": 1.7714, "step": 32287000 }, { "epoch": 93.46, "learning_rate": 3.2872895632641716e-06, "loss": 1.7972, "step": 32287500 }, { "epoch": 93.46, "learning_rate": 3.286565915616895e-06, "loss": 1.7666, "step": 32288000 }, { "epoch": 93.46, "learning_rate": 3.285842267969619e-06, "loss": 1.784, "step": 32288500 }, { "epoch": 93.46, "learning_rate": 3.2851186203223417e-06, "loss": 1.7859, "step": 32289000 }, { "epoch": 93.46, "learning_rate": 3.284394972675065e-06, "loss": 1.7606, "step": 32289500 }, { "epoch": 93.47, "learning_rate": 3.283671325027788e-06, "loss": 1.7507, "step": 32290000 }, { "epoch": 93.47, "learning_rate": 3.2829491246758063e-06, "loss": 1.7948, "step": 32290500 }, { "epoch": 93.47, "learning_rate": 3.282225477028529e-06, "loss": 1.8005, "step": 32291000 }, { "epoch": 93.47, "learning_rate": 3.2815018293812525e-06, "loss": 1.7695, "step": 32291500 }, { "epoch": 93.47, "learning_rate": 3.2807781817339756e-06, "loss": 1.756, "step": 32292000 }, { "epoch": 93.47, "learning_rate": 3.280054534086699e-06, "loss": 1.7698, "step": 32292500 }, { "epoch": 93.48, "learning_rate": 3.2793308864394226e-06, "loss": 1.7597, "step": 32293000 }, { "epoch": 93.48, "learning_rate": 3.2786072387921453e-06, "loss": 1.78, "step": 32293500 }, { "epoch": 93.48, "learning_rate": 3.277885038440163e-06, "loss": 1.7686, "step": 32294000 }, { "epoch": 93.48, "learning_rate": 3.2771613907928864e-06, "loss": 1.7376, "step": 32294500 }, { "epoch": 93.48, "learning_rate": 3.27643774314561e-06, "loss": 1.8024, "step": 32295000 }, { "epoch": 93.48, "learning_rate": 3.2757140954983326e-06, "loss": 1.7824, "step": 32295500 }, { "epoch": 93.48, "learning_rate": 3.274991895146351e-06, "loss": 1.7445, "step": 32296000 }, { "epoch": 93.49, "learning_rate": 3.2742682474990738e-06, "loss": 1.7964, "step": 32296500 }, { "epoch": 93.49, "learning_rate": 3.2735445998517973e-06, "loss": 1.7771, "step": 32297000 }, { "epoch": 93.49, "learning_rate": 3.2728209522045204e-06, "loss": 1.7551, "step": 32297500 }, { "epoch": 93.49, "learning_rate": 3.272097304557244e-06, "loss": 1.7931, "step": 32298000 }, { "epoch": 93.49, "learning_rate": 3.2713736569099665e-06, "loss": 1.7752, "step": 32298500 }, { "epoch": 93.49, "learning_rate": 3.27065000926269e-06, "loss": 1.7538, "step": 32299000 }, { "epoch": 93.49, "learning_rate": 3.2699278089107077e-06, "loss": 1.7632, "step": 32299500 }, { "epoch": 93.5, "learning_rate": 3.269204161263431e-06, "loss": 1.7664, "step": 32300000 }, { "epoch": 93.5, "learning_rate": 3.2684805136161547e-06, "loss": 1.7763, "step": 32300500 }, { "epoch": 93.5, "learning_rate": 3.2677568659688774e-06, "loss": 1.7673, "step": 32301000 }, { "epoch": 93.5, "learning_rate": 3.267033218321601e-06, "loss": 1.7616, "step": 32301500 }, { "epoch": 93.5, "learning_rate": 3.2663110179696185e-06, "loss": 1.7371, "step": 32302000 }, { "epoch": 93.5, "learning_rate": 3.265588817617636e-06, "loss": 1.7825, "step": 32302500 }, { "epoch": 93.5, "learning_rate": 3.2648651699703597e-06, "loss": 1.7654, "step": 32303000 }, { "epoch": 93.51, "learning_rate": 3.2641415223230832e-06, "loss": 1.7852, "step": 32303500 }, { "epoch": 93.51, "learning_rate": 3.2634193219711005e-06, "loss": 1.7538, "step": 32304000 }, { "epoch": 93.51, "learning_rate": 3.2626956743238235e-06, "loss": 1.7729, "step": 32304500 }, { "epoch": 93.51, "learning_rate": 3.261972026676547e-06, "loss": 1.7697, "step": 32305000 }, { "epoch": 93.51, "learning_rate": 3.2612483790292706e-06, "loss": 1.7548, "step": 32305500 }, { "epoch": 93.51, "learning_rate": 3.260524731381993e-06, "loss": 1.7481, "step": 32306000 }, { "epoch": 93.51, "learning_rate": 3.2598010837347167e-06, "loss": 1.7565, "step": 32306500 }, { "epoch": 93.52, "learning_rate": 3.25907743608744e-06, "loss": 1.7739, "step": 32307000 }, { "epoch": 93.52, "learning_rate": 3.2583537884401633e-06, "loss": 1.7719, "step": 32307500 }, { "epoch": 93.52, "learning_rate": 3.257630140792886e-06, "loss": 1.7544, "step": 32308000 }, { "epoch": 93.52, "learning_rate": 3.2569064931456095e-06, "loss": 1.7579, "step": 32308500 }, { "epoch": 93.52, "learning_rate": 3.256182845498333e-06, "loss": 1.7713, "step": 32309000 }, { "epoch": 93.52, "learning_rate": 3.2554606451463506e-06, "loss": 1.7801, "step": 32309500 }, { "epoch": 93.52, "learning_rate": 3.254736997499074e-06, "loss": 1.7834, "step": 32310000 }, { "epoch": 93.53, "learning_rate": 3.2540133498517972e-06, "loss": 1.755, "step": 32310500 }, { "epoch": 93.53, "learning_rate": 3.2532897022045207e-06, "loss": 1.7691, "step": 32311000 }, { "epoch": 93.53, "learning_rate": 3.2525660545572434e-06, "loss": 1.7892, "step": 32311500 }, { "epoch": 93.53, "learning_rate": 3.251842406909967e-06, "loss": 1.7554, "step": 32312000 }, { "epoch": 93.53, "learning_rate": 3.2511187592626895e-06, "loss": 1.7685, "step": 32312500 }, { "epoch": 93.53, "learning_rate": 3.2503951116154135e-06, "loss": 1.7747, "step": 32313000 }, { "epoch": 93.53, "learning_rate": 3.2496729112634307e-06, "loss": 1.7575, "step": 32313500 }, { "epoch": 93.54, "learning_rate": 3.2489492636161542e-06, "loss": 1.7581, "step": 32314000 }, { "epoch": 93.54, "learning_rate": 3.2482256159688773e-06, "loss": 1.7732, "step": 32314500 }, { "epoch": 93.54, "learning_rate": 3.247501968321601e-06, "loss": 1.7436, "step": 32315000 }, { "epoch": 93.54, "learning_rate": 3.246779767969618e-06, "loss": 1.776, "step": 32315500 }, { "epoch": 93.54, "learning_rate": 3.2460561203223416e-06, "loss": 1.7451, "step": 32316000 }, { "epoch": 93.54, "learning_rate": 3.2453324726750655e-06, "loss": 1.7693, "step": 32316500 }, { "epoch": 93.54, "learning_rate": 3.244608825027788e-06, "loss": 1.7653, "step": 32317000 }, { "epoch": 93.55, "learning_rate": 3.243886624675806e-06, "loss": 1.7623, "step": 32317500 }, { "epoch": 93.55, "learning_rate": 3.2431629770285293e-06, "loss": 1.753, "step": 32318000 }, { "epoch": 93.55, "learning_rate": 3.242439329381253e-06, "loss": 1.7903, "step": 32318500 }, { "epoch": 93.55, "learning_rate": 3.2417156817339755e-06, "loss": 1.7713, "step": 32319000 }, { "epoch": 93.55, "learning_rate": 3.240992034086699e-06, "loss": 1.7742, "step": 32319500 }, { "epoch": 93.55, "learning_rate": 3.240268386439422e-06, "loss": 1.7818, "step": 32320000 }, { "epoch": 93.55, "learning_rate": 3.2395447387921456e-06, "loss": 1.7632, "step": 32320500 }, { "epoch": 93.56, "learning_rate": 3.238821091144869e-06, "loss": 1.7859, "step": 32321000 }, { "epoch": 93.56, "learning_rate": 3.2380988907928863e-06, "loss": 1.7598, "step": 32321500 }, { "epoch": 93.56, "learning_rate": 3.2373752431456094e-06, "loss": 1.7672, "step": 32322000 }, { "epoch": 93.56, "learning_rate": 3.236651595498333e-06, "loss": 1.7916, "step": 32322500 }, { "epoch": 93.56, "learning_rate": 3.2359279478510564e-06, "loss": 1.7514, "step": 32323000 }, { "epoch": 93.56, "learning_rate": 3.235205747499074e-06, "loss": 1.7578, "step": 32323500 }, { "epoch": 93.56, "learning_rate": 3.2344820998517976e-06, "loss": 1.7562, "step": 32324000 }, { "epoch": 93.57, "learning_rate": 3.2337584522045203e-06, "loss": 1.7444, "step": 32324500 }, { "epoch": 93.57, "learning_rate": 3.2330348045572438e-06, "loss": 1.7734, "step": 32325000 }, { "epoch": 93.57, "learning_rate": 3.2323126042052614e-06, "loss": 1.7552, "step": 32325500 }, { "epoch": 93.57, "learning_rate": 3.231590403853279e-06, "loss": 1.7829, "step": 32326000 }, { "epoch": 93.57, "learning_rate": 3.2308667562060026e-06, "loss": 1.7694, "step": 32326500 }, { "epoch": 93.57, "learning_rate": 3.230143108558726e-06, "loss": 1.7456, "step": 32327000 }, { "epoch": 93.57, "learning_rate": 3.2294194609114488e-06, "loss": 1.7599, "step": 32327500 }, { "epoch": 93.58, "learning_rate": 3.2286958132641723e-06, "loss": 1.791, "step": 32328000 }, { "epoch": 93.58, "learning_rate": 3.22797361291219e-06, "loss": 1.7453, "step": 32328500 }, { "epoch": 93.58, "learning_rate": 3.2272499652649134e-06, "loss": 1.788, "step": 32329000 }, { "epoch": 93.58, "learning_rate": 3.226526317617636e-06, "loss": 1.7433, "step": 32329500 }, { "epoch": 93.58, "learning_rate": 3.2258026699703596e-06, "loss": 1.7421, "step": 32330000 }, { "epoch": 93.58, "learning_rate": 3.2250790223230827e-06, "loss": 1.7599, "step": 32330500 }, { "epoch": 93.59, "learning_rate": 3.224355374675806e-06, "loss": 1.7364, "step": 32331000 }, { "epoch": 93.59, "learning_rate": 3.223631727028529e-06, "loss": 1.7665, "step": 32331500 }, { "epoch": 93.59, "learning_rate": 3.2229080793812524e-06, "loss": 1.7845, "step": 32332000 }, { "epoch": 93.59, "learning_rate": 3.222184431733976e-06, "loss": 1.7515, "step": 32332500 }, { "epoch": 93.59, "learning_rate": 3.221460784086699e-06, "loss": 1.7627, "step": 32333000 }, { "epoch": 93.59, "learning_rate": 3.2207371364394224e-06, "loss": 1.7579, "step": 32333500 }, { "epoch": 93.59, "learning_rate": 3.220013488792145e-06, "loss": 1.7694, "step": 32334000 }, { "epoch": 93.6, "learning_rate": 3.2192898411448686e-06, "loss": 1.7952, "step": 32334500 }, { "epoch": 93.6, "learning_rate": 3.218566193497592e-06, "loss": 1.7667, "step": 32335000 }, { "epoch": 93.6, "learning_rate": 3.217842545850315e-06, "loss": 1.7901, "step": 32335500 }, { "epoch": 93.6, "learning_rate": 3.2171203454983324e-06, "loss": 1.782, "step": 32336000 }, { "epoch": 93.6, "learning_rate": 3.216396697851056e-06, "loss": 1.7633, "step": 32336500 }, { "epoch": 93.6, "learning_rate": 3.2156744974990736e-06, "loss": 1.758, "step": 32337000 }, { "epoch": 93.6, "learning_rate": 3.214950849851797e-06, "loss": 1.7766, "step": 32337500 }, { "epoch": 93.61, "learning_rate": 3.21422720220452e-06, "loss": 1.7546, "step": 32338000 }, { "epoch": 93.61, "learning_rate": 3.2135035545572437e-06, "loss": 1.7545, "step": 32338500 }, { "epoch": 93.61, "learning_rate": 3.212779906909967e-06, "loss": 1.7438, "step": 32339000 }, { "epoch": 93.61, "learning_rate": 3.21205625926269e-06, "loss": 1.7527, "step": 32339500 }, { "epoch": 93.61, "learning_rate": 3.2113326116154134e-06, "loss": 1.7609, "step": 32340000 }, { "epoch": 93.61, "learning_rate": 3.210608963968136e-06, "loss": 1.7718, "step": 32340500 }, { "epoch": 93.61, "learning_rate": 3.20988531632086e-06, "loss": 1.7735, "step": 32341000 }, { "epoch": 93.62, "learning_rate": 3.209163115968877e-06, "loss": 1.7958, "step": 32341500 }, { "epoch": 93.62, "learning_rate": 3.2084394683216007e-06, "loss": 1.7676, "step": 32342000 }, { "epoch": 93.62, "learning_rate": 3.2077158206743238e-06, "loss": 1.7674, "step": 32342500 }, { "epoch": 93.62, "learning_rate": 3.2069921730270473e-06, "loss": 1.7645, "step": 32343000 }, { "epoch": 93.62, "learning_rate": 3.2062699726750645e-06, "loss": 1.7787, "step": 32343500 }, { "epoch": 93.62, "learning_rate": 3.2055463250277885e-06, "loss": 1.7804, "step": 32344000 }, { "epoch": 93.62, "learning_rate": 3.204822677380512e-06, "loss": 1.7575, "step": 32344500 }, { "epoch": 93.63, "learning_rate": 3.2040990297332346e-06, "loss": 1.7493, "step": 32345000 }, { "epoch": 93.63, "learning_rate": 3.203375382085958e-06, "loss": 1.7556, "step": 32345500 }, { "epoch": 93.63, "learning_rate": 3.202653181733976e-06, "loss": 1.774, "step": 32346000 }, { "epoch": 93.63, "learning_rate": 3.2019295340866993e-06, "loss": 1.7627, "step": 32346500 }, { "epoch": 93.63, "learning_rate": 3.201205886439422e-06, "loss": 1.7466, "step": 32347000 }, { "epoch": 93.63, "learning_rate": 3.2004822387921455e-06, "loss": 1.767, "step": 32347500 }, { "epoch": 93.63, "learning_rate": 3.199760038440163e-06, "loss": 1.7641, "step": 32348000 }, { "epoch": 93.64, "learning_rate": 3.1990363907928866e-06, "loss": 1.7764, "step": 32348500 }, { "epoch": 93.64, "learning_rate": 3.1983127431456093e-06, "loss": 1.7831, "step": 32349000 }, { "epoch": 93.64, "learning_rate": 3.197589095498333e-06, "loss": 1.776, "step": 32349500 }, { "epoch": 93.64, "learning_rate": 3.196865447851056e-06, "loss": 1.7738, "step": 32350000 }, { "epoch": 93.64, "learning_rate": 3.1961418002037794e-06, "loss": 1.7703, "step": 32350500 }, { "epoch": 93.64, "learning_rate": 3.195419599851797e-06, "loss": 1.7485, "step": 32351000 }, { "epoch": 93.64, "learning_rate": 3.1946959522045206e-06, "loss": 1.7717, "step": 32351500 }, { "epoch": 93.65, "learning_rate": 3.1939723045572432e-06, "loss": 1.784, "step": 32352000 }, { "epoch": 93.65, "learning_rate": 3.1932486569099667e-06, "loss": 1.7886, "step": 32352500 }, { "epoch": 93.65, "learning_rate": 3.1925250092626902e-06, "loss": 1.7727, "step": 32353000 }, { "epoch": 93.65, "learning_rate": 3.1918013616154133e-06, "loss": 1.7542, "step": 32353500 }, { "epoch": 93.65, "learning_rate": 3.191077713968137e-06, "loss": 1.7472, "step": 32354000 }, { "epoch": 93.65, "learning_rate": 3.190355513616154e-06, "loss": 1.7815, "step": 32354500 }, { "epoch": 93.65, "learning_rate": 3.1896318659688776e-06, "loss": 1.7563, "step": 32355000 }, { "epoch": 93.66, "learning_rate": 3.1889082183216007e-06, "loss": 1.7641, "step": 32355500 }, { "epoch": 93.66, "learning_rate": 3.188184570674324e-06, "loss": 1.778, "step": 32356000 }, { "epoch": 93.66, "learning_rate": 3.187462370322342e-06, "loss": 1.7703, "step": 32356500 }, { "epoch": 93.66, "learning_rate": 3.18674016997036e-06, "loss": 1.7648, "step": 32357000 }, { "epoch": 93.66, "learning_rate": 3.1860165223230826e-06, "loss": 1.7602, "step": 32357500 }, { "epoch": 93.66, "learning_rate": 3.185292874675806e-06, "loss": 1.7618, "step": 32358000 }, { "epoch": 93.66, "learning_rate": 3.184569227028529e-06, "loss": 1.7842, "step": 32358500 }, { "epoch": 93.67, "learning_rate": 3.1838455793812527e-06, "loss": 1.7788, "step": 32359000 }, { "epoch": 93.67, "learning_rate": 3.1831219317339753e-06, "loss": 1.7862, "step": 32359500 }, { "epoch": 93.67, "learning_rate": 3.182398284086699e-06, "loss": 1.7682, "step": 32360000 }, { "epoch": 93.67, "learning_rate": 3.1816746364394223e-06, "loss": 1.7361, "step": 32360500 }, { "epoch": 93.67, "learning_rate": 3.18095243608744e-06, "loss": 1.757, "step": 32361000 }, { "epoch": 93.67, "learning_rate": 3.1802302357354577e-06, "loss": 1.7524, "step": 32361500 }, { "epoch": 93.67, "learning_rate": 3.1795080353834758e-06, "loss": 1.757, "step": 32362000 }, { "epoch": 93.68, "learning_rate": 3.1787843877361984e-06, "loss": 1.7719, "step": 32362500 }, { "epoch": 93.68, "learning_rate": 3.1780607400889224e-06, "loss": 1.761, "step": 32363000 }, { "epoch": 93.68, "learning_rate": 3.177337092441645e-06, "loss": 1.7651, "step": 32363500 }, { "epoch": 93.68, "learning_rate": 3.1766134447943685e-06, "loss": 1.7736, "step": 32364000 }, { "epoch": 93.68, "learning_rate": 3.175889797147091e-06, "loss": 1.7545, "step": 32364500 }, { "epoch": 93.68, "learning_rate": 3.1751661494998147e-06, "loss": 1.791, "step": 32365000 }, { "epoch": 93.68, "learning_rate": 3.174442501852538e-06, "loss": 1.7671, "step": 32365500 }, { "epoch": 93.69, "learning_rate": 3.1737188542052613e-06, "loss": 1.7721, "step": 32366000 }, { "epoch": 93.69, "learning_rate": 3.1729952065579848e-06, "loss": 1.7574, "step": 32366500 }, { "epoch": 93.69, "learning_rate": 3.1722715589107074e-06, "loss": 1.7729, "step": 32367000 }, { "epoch": 93.69, "learning_rate": 3.171547911263431e-06, "loss": 1.7642, "step": 32367500 }, { "epoch": 93.69, "learning_rate": 3.1708242636161544e-06, "loss": 1.7826, "step": 32368000 }, { "epoch": 93.69, "learning_rate": 3.170102063264172e-06, "loss": 1.7771, "step": 32368500 }, { "epoch": 93.7, "learning_rate": 3.169378415616895e-06, "loss": 1.7918, "step": 32369000 }, { "epoch": 93.7, "learning_rate": 3.1686547679696187e-06, "loss": 1.772, "step": 32369500 }, { "epoch": 93.7, "learning_rate": 3.167931120322342e-06, "loss": 1.7737, "step": 32370000 }, { "epoch": 93.7, "learning_rate": 3.167207472675065e-06, "loss": 1.7696, "step": 32370500 }, { "epoch": 93.7, "learning_rate": 3.166485272323083e-06, "loss": 1.7342, "step": 32371000 }, { "epoch": 93.7, "learning_rate": 3.165761624675806e-06, "loss": 1.775, "step": 32371500 }, { "epoch": 93.7, "learning_rate": 3.1650379770285295e-06, "loss": 1.8113, "step": 32372000 }, { "epoch": 93.71, "learning_rate": 3.164314329381252e-06, "loss": 1.7579, "step": 32372500 }, { "epoch": 93.71, "learning_rate": 3.1635906817339757e-06, "loss": 1.7918, "step": 32373000 }, { "epoch": 93.71, "learning_rate": 3.1628670340866988e-06, "loss": 1.7691, "step": 32373500 }, { "epoch": 93.71, "learning_rate": 3.1621433864394223e-06, "loss": 1.7846, "step": 32374000 }, { "epoch": 93.71, "learning_rate": 3.1614211860874395e-06, "loss": 1.7805, "step": 32374500 }, { "epoch": 93.71, "learning_rate": 3.160697538440163e-06, "loss": 1.7935, "step": 32375000 }, { "epoch": 93.71, "learning_rate": 3.159973890792886e-06, "loss": 1.7549, "step": 32375500 }, { "epoch": 93.72, "learning_rate": 3.1592502431456096e-06, "loss": 1.7773, "step": 32376000 }, { "epoch": 93.72, "learning_rate": 3.158526595498333e-06, "loss": 1.7872, "step": 32376500 }, { "epoch": 93.72, "learning_rate": 3.157804395146351e-06, "loss": 1.7878, "step": 32377000 }, { "epoch": 93.72, "learning_rate": 3.1570807474990743e-06, "loss": 1.7348, "step": 32377500 }, { "epoch": 93.72, "learning_rate": 3.156357099851797e-06, "loss": 1.762, "step": 32378000 }, { "epoch": 93.72, "learning_rate": 3.1556334522045205e-06, "loss": 1.7714, "step": 32378500 }, { "epoch": 93.72, "learning_rate": 3.154911251852538e-06, "loss": 1.7496, "step": 32379000 }, { "epoch": 93.73, "learning_rate": 3.1541876042052616e-06, "loss": 1.7748, "step": 32379500 }, { "epoch": 93.73, "learning_rate": 3.1534639565579843e-06, "loss": 1.7652, "step": 32380000 }, { "epoch": 93.73, "learning_rate": 3.152740308910708e-06, "loss": 1.7652, "step": 32380500 }, { "epoch": 93.73, "learning_rate": 3.1520181085587255e-06, "loss": 1.7741, "step": 32381000 }, { "epoch": 93.73, "learning_rate": 3.151294460911449e-06, "loss": 1.7607, "step": 32381500 }, { "epoch": 93.73, "learning_rate": 3.150570813264172e-06, "loss": 1.7636, "step": 32382000 }, { "epoch": 93.73, "learning_rate": 3.1498471656168956e-06, "loss": 1.7764, "step": 32382500 }, { "epoch": 93.74, "learning_rate": 3.1491235179696182e-06, "loss": 1.758, "step": 32383000 }, { "epoch": 93.74, "learning_rate": 3.1483998703223417e-06, "loss": 1.7802, "step": 32383500 }, { "epoch": 93.74, "learning_rate": 3.1476762226750652e-06, "loss": 1.7608, "step": 32384000 }, { "epoch": 93.74, "learning_rate": 3.146954022323083e-06, "loss": 1.7642, "step": 32384500 }, { "epoch": 93.74, "learning_rate": 3.1462303746758056e-06, "loss": 1.7556, "step": 32385000 }, { "epoch": 93.74, "learning_rate": 3.145508174323824e-06, "loss": 1.7652, "step": 32385500 }, { "epoch": 93.74, "learning_rate": 3.1447845266765467e-06, "loss": 1.7382, "step": 32386000 }, { "epoch": 93.75, "learning_rate": 3.1440608790292702e-06, "loss": 1.7529, "step": 32386500 }, { "epoch": 93.75, "learning_rate": 3.1433372313819937e-06, "loss": 1.7709, "step": 32387000 }, { "epoch": 93.75, "learning_rate": 3.142613583734717e-06, "loss": 1.7298, "step": 32387500 }, { "epoch": 93.75, "learning_rate": 3.1418899360874403e-06, "loss": 1.7871, "step": 32388000 }, { "epoch": 93.75, "learning_rate": 3.141166288440163e-06, "loss": 1.7568, "step": 32388500 }, { "epoch": 93.75, "learning_rate": 3.1404426407928865e-06, "loss": 1.7613, "step": 32389000 }, { "epoch": 93.75, "learning_rate": 3.139718993145609e-06, "loss": 1.7614, "step": 32389500 }, { "epoch": 93.76, "learning_rate": 3.1389967927936277e-06, "loss": 1.7616, "step": 32390000 }, { "epoch": 93.76, "learning_rate": 3.1382731451463503e-06, "loss": 1.778, "step": 32390500 }, { "epoch": 93.76, "learning_rate": 3.137549497499074e-06, "loss": 1.7506, "step": 32391000 }, { "epoch": 93.76, "learning_rate": 3.1368258498517973e-06, "loss": 1.7899, "step": 32391500 }, { "epoch": 93.76, "learning_rate": 3.1361022022045204e-06, "loss": 1.7466, "step": 32392000 }, { "epoch": 93.76, "learning_rate": 3.135378554557244e-06, "loss": 1.7611, "step": 32392500 }, { "epoch": 93.76, "learning_rate": 3.1346549069099666e-06, "loss": 1.7899, "step": 32393000 }, { "epoch": 93.77, "learning_rate": 3.13393125926269e-06, "loss": 1.778, "step": 32393500 }, { "epoch": 93.77, "learning_rate": 3.1332090589107078e-06, "loss": 1.7871, "step": 32394000 }, { "epoch": 93.77, "learning_rate": 3.1324854112634313e-06, "loss": 1.7622, "step": 32394500 }, { "epoch": 93.77, "learning_rate": 3.131761763616154e-06, "loss": 1.7744, "step": 32395000 }, { "epoch": 93.77, "learning_rate": 3.1310381159688774e-06, "loss": 1.7777, "step": 32395500 }, { "epoch": 93.77, "learning_rate": 3.1303144683216005e-06, "loss": 1.74, "step": 32396000 }, { "epoch": 93.77, "learning_rate": 3.1295922679696186e-06, "loss": 1.7758, "step": 32396500 }, { "epoch": 93.78, "learning_rate": 3.1288686203223417e-06, "loss": 1.7683, "step": 32397000 }, { "epoch": 93.78, "learning_rate": 3.128144972675065e-06, "loss": 1.7737, "step": 32397500 }, { "epoch": 93.78, "learning_rate": 3.1274213250277887e-06, "loss": 1.7714, "step": 32398000 }, { "epoch": 93.78, "learning_rate": 3.1266976773805113e-06, "loss": 1.7644, "step": 32398500 }, { "epoch": 93.78, "learning_rate": 3.125974029733235e-06, "loss": 1.7666, "step": 32399000 }, { "epoch": 93.78, "learning_rate": 3.125250382085958e-06, "loss": 1.7862, "step": 32399500 }, { "epoch": 93.78, "learning_rate": 3.1245267344386814e-06, "loss": 1.7647, "step": 32400000 }, { "epoch": 93.79, "learning_rate": 3.1238059813819937e-06, "loss": 1.7633, "step": 32400500 }, { "epoch": 93.79, "learning_rate": 3.1230823337347168e-06, "loss": 1.7364, "step": 32401000 }, { "epoch": 93.79, "learning_rate": 3.1223601333827345e-06, "loss": 1.7694, "step": 32401500 }, { "epoch": 93.79, "learning_rate": 3.121636485735458e-06, "loss": 1.7593, "step": 32402000 }, { "epoch": 93.79, "learning_rate": 3.120912838088181e-06, "loss": 1.7444, "step": 32402500 }, { "epoch": 93.79, "learning_rate": 3.120189190440904e-06, "loss": 1.8156, "step": 32403000 }, { "epoch": 93.79, "learning_rate": 3.1194669900889222e-06, "loss": 1.7464, "step": 32403500 }, { "epoch": 93.8, "learning_rate": 3.1187433424416453e-06, "loss": 1.7554, "step": 32404000 }, { "epoch": 93.8, "learning_rate": 3.1180196947943684e-06, "loss": 1.7836, "step": 32404500 }, { "epoch": 93.8, "learning_rate": 3.1172960471470915e-06, "loss": 1.7609, "step": 32405000 }, { "epoch": 93.8, "learning_rate": 3.116572399499815e-06, "loss": 1.7503, "step": 32405500 }, { "epoch": 93.8, "learning_rate": 3.1158501991478326e-06, "loss": 1.762, "step": 32406000 }, { "epoch": 93.8, "learning_rate": 3.1151265515005557e-06, "loss": 1.7651, "step": 32406500 }, { "epoch": 93.8, "learning_rate": 3.114402903853279e-06, "loss": 1.7441, "step": 32407000 }, { "epoch": 93.81, "learning_rate": 3.1136792562060023e-06, "loss": 1.7889, "step": 32407500 }, { "epoch": 93.81, "learning_rate": 3.112955608558726e-06, "loss": 1.7661, "step": 32408000 }, { "epoch": 93.81, "learning_rate": 3.112231960911449e-06, "loss": 1.7521, "step": 32408500 }, { "epoch": 93.81, "learning_rate": 3.111508313264172e-06, "loss": 1.7835, "step": 32409000 }, { "epoch": 93.81, "learning_rate": 3.110784665616895e-06, "loss": 1.767, "step": 32409500 }, { "epoch": 93.81, "learning_rate": 3.110062465264913e-06, "loss": 1.766, "step": 32410000 }, { "epoch": 93.82, "learning_rate": 3.1093388176176362e-06, "loss": 1.7603, "step": 32410500 }, { "epoch": 93.82, "learning_rate": 3.1086151699703593e-06, "loss": 1.7587, "step": 32411000 }, { "epoch": 93.82, "learning_rate": 3.107891522323083e-06, "loss": 1.7659, "step": 32411500 }, { "epoch": 93.82, "learning_rate": 3.1071678746758063e-06, "loss": 1.7718, "step": 32412000 }, { "epoch": 93.82, "learning_rate": 3.1064456743238236e-06, "loss": 1.7642, "step": 32412500 }, { "epoch": 93.82, "learning_rate": 3.105722026676547e-06, "loss": 1.7607, "step": 32413000 }, { "epoch": 93.82, "learning_rate": 3.1049983790292706e-06, "loss": 1.7486, "step": 32413500 }, { "epoch": 93.83, "learning_rate": 3.1042747313819936e-06, "loss": 1.761, "step": 32414000 }, { "epoch": 93.83, "learning_rate": 3.1035525310300113e-06, "loss": 1.7753, "step": 32414500 }, { "epoch": 93.83, "learning_rate": 3.102828883382735e-06, "loss": 1.7761, "step": 32415000 }, { "epoch": 93.83, "learning_rate": 3.102105235735458e-06, "loss": 1.7652, "step": 32415500 }, { "epoch": 93.83, "learning_rate": 3.101381588088181e-06, "loss": 1.7884, "step": 32416000 }, { "epoch": 93.83, "learning_rate": 3.100659387736199e-06, "loss": 1.7512, "step": 32416500 }, { "epoch": 93.83, "learning_rate": 3.099935740088922e-06, "loss": 1.7622, "step": 32417000 }, { "epoch": 93.84, "learning_rate": 3.0992120924416452e-06, "loss": 1.7446, "step": 32417500 }, { "epoch": 93.84, "learning_rate": 3.0984884447943683e-06, "loss": 1.7698, "step": 32418000 }, { "epoch": 93.84, "learning_rate": 3.0977647971470914e-06, "loss": 1.7712, "step": 32418500 }, { "epoch": 93.84, "learning_rate": 3.097041149499815e-06, "loss": 1.7964, "step": 32419000 }, { "epoch": 93.84, "learning_rate": 3.096317501852538e-06, "loss": 1.7525, "step": 32419500 }, { "epoch": 93.84, "learning_rate": 3.0955938542052615e-06, "loss": 1.7714, "step": 32420000 }, { "epoch": 93.84, "learning_rate": 3.0948702065579846e-06, "loss": 1.7707, "step": 32420500 }, { "epoch": 93.85, "learning_rate": 3.0941480062060022e-06, "loss": 1.7629, "step": 32421000 }, { "epoch": 93.85, "learning_rate": 3.0934243585587258e-06, "loss": 1.7539, "step": 32421500 }, { "epoch": 93.85, "learning_rate": 3.092700710911449e-06, "loss": 1.7429, "step": 32422000 }, { "epoch": 93.85, "learning_rate": 3.091977063264172e-06, "loss": 1.7707, "step": 32422500 }, { "epoch": 93.85, "learning_rate": 3.09125486291219e-06, "loss": 1.7377, "step": 32423000 }, { "epoch": 93.85, "learning_rate": 3.090531215264913e-06, "loss": 1.7732, "step": 32423500 }, { "epoch": 93.85, "learning_rate": 3.089807567617636e-06, "loss": 1.7464, "step": 32424000 }, { "epoch": 93.86, "learning_rate": 3.0890839199703597e-06, "loss": 1.7633, "step": 32424500 }, { "epoch": 93.86, "learning_rate": 3.0883602723230828e-06, "loss": 1.7668, "step": 32425000 }, { "epoch": 93.86, "learning_rate": 3.087636624675806e-06, "loss": 1.7977, "step": 32425500 }, { "epoch": 93.86, "learning_rate": 3.086914424323824e-06, "loss": 1.7609, "step": 32426000 }, { "epoch": 93.86, "learning_rate": 3.086190776676547e-06, "loss": 1.7676, "step": 32426500 }, { "epoch": 93.86, "learning_rate": 3.0854685763245647e-06, "loss": 1.7413, "step": 32427000 }, { "epoch": 93.86, "learning_rate": 3.084744928677288e-06, "loss": 1.7521, "step": 32427500 }, { "epoch": 93.87, "learning_rate": 3.0840212810300113e-06, "loss": 1.7912, "step": 32428000 }, { "epoch": 93.87, "learning_rate": 3.0832976333827343e-06, "loss": 1.7731, "step": 32428500 }, { "epoch": 93.87, "learning_rate": 3.0825754330307524e-06, "loss": 1.779, "step": 32429000 }, { "epoch": 93.87, "learning_rate": 3.0818517853834755e-06, "loss": 1.7538, "step": 32429500 }, { "epoch": 93.87, "learning_rate": 3.0811281377361986e-06, "loss": 1.7469, "step": 32430000 }, { "epoch": 93.87, "learning_rate": 3.0804044900889217e-06, "loss": 1.7338, "step": 32430500 }, { "epoch": 93.87, "learning_rate": 3.079680842441645e-06, "loss": 1.7561, "step": 32431000 }, { "epoch": 93.88, "learning_rate": 3.0789571947943687e-06, "loss": 1.8008, "step": 32431500 }, { "epoch": 93.88, "learning_rate": 3.078234994442386e-06, "loss": 1.7683, "step": 32432000 }, { "epoch": 93.88, "learning_rate": 3.0775113467951094e-06, "loss": 1.7644, "step": 32432500 }, { "epoch": 93.88, "learning_rate": 3.076787699147833e-06, "loss": 1.7843, "step": 32433000 }, { "epoch": 93.88, "learning_rate": 3.076064051500556e-06, "loss": 1.7591, "step": 32433500 }, { "epoch": 93.88, "learning_rate": 3.0753418511485737e-06, "loss": 1.7802, "step": 32434000 }, { "epoch": 93.88, "learning_rate": 3.074618203501297e-06, "loss": 1.7593, "step": 32434500 }, { "epoch": 93.89, "learning_rate": 3.0738945558540203e-06, "loss": 1.7728, "step": 32435000 }, { "epoch": 93.89, "learning_rate": 3.0731709082067434e-06, "loss": 1.7761, "step": 32435500 }, { "epoch": 93.89, "learning_rate": 3.0724472605594665e-06, "loss": 1.8038, "step": 32436000 }, { "epoch": 93.89, "learning_rate": 3.0717236129121895e-06, "loss": 1.7617, "step": 32436500 }, { "epoch": 93.89, "learning_rate": 3.070999965264913e-06, "loss": 1.7803, "step": 32437000 }, { "epoch": 93.89, "learning_rate": 3.0702763176176365e-06, "loss": 1.742, "step": 32437500 }, { "epoch": 93.89, "learning_rate": 3.069554117265654e-06, "loss": 1.7635, "step": 32438000 }, { "epoch": 93.9, "learning_rate": 3.0688304696183773e-06, "loss": 1.7637, "step": 32438500 }, { "epoch": 93.9, "learning_rate": 3.068106821971101e-06, "loss": 1.7783, "step": 32439000 }, { "epoch": 93.9, "learning_rate": 3.067383174323824e-06, "loss": 1.7727, "step": 32439500 }, { "epoch": 93.9, "learning_rate": 3.066659526676547e-06, "loss": 1.7896, "step": 32440000 }, { "epoch": 93.9, "learning_rate": 3.06593587902927e-06, "loss": 1.7578, "step": 32440500 }, { "epoch": 93.9, "learning_rate": 3.065213678677288e-06, "loss": 1.7752, "step": 32441000 }, { "epoch": 93.9, "learning_rate": 3.0644900310300112e-06, "loss": 1.7703, "step": 32441500 }, { "epoch": 93.91, "learning_rate": 3.0637663833827343e-06, "loss": 1.7612, "step": 32442000 }, { "epoch": 93.91, "learning_rate": 3.063042735735458e-06, "loss": 1.7934, "step": 32442500 }, { "epoch": 93.91, "learning_rate": 3.0623205353834755e-06, "loss": 1.7797, "step": 32443000 }, { "epoch": 93.91, "learning_rate": 3.0615968877361986e-06, "loss": 1.7495, "step": 32443500 }, { "epoch": 93.91, "learning_rate": 3.0608746873842167e-06, "loss": 1.7678, "step": 32444000 }, { "epoch": 93.91, "learning_rate": 3.0601510397369397e-06, "loss": 1.7875, "step": 32444500 }, { "epoch": 93.91, "learning_rate": 3.059427392089663e-06, "loss": 1.759, "step": 32445000 }, { "epoch": 93.92, "learning_rate": 3.0587037444423863e-06, "loss": 1.7954, "step": 32445500 }, { "epoch": 93.92, "learning_rate": 3.0579800967951094e-06, "loss": 1.7614, "step": 32446000 }, { "epoch": 93.92, "learning_rate": 3.057257896443127e-06, "loss": 1.7722, "step": 32446500 }, { "epoch": 93.92, "learning_rate": 3.0565342487958506e-06, "loss": 1.7576, "step": 32447000 }, { "epoch": 93.92, "learning_rate": 3.0558106011485737e-06, "loss": 1.7783, "step": 32447500 }, { "epoch": 93.92, "learning_rate": 3.055086953501297e-06, "loss": 1.7781, "step": 32448000 }, { "epoch": 93.93, "learning_rate": 3.0543633058540202e-06, "loss": 1.7371, "step": 32448500 }, { "epoch": 93.93, "learning_rate": 3.0536396582067433e-06, "loss": 1.7899, "step": 32449000 }, { "epoch": 93.93, "learning_rate": 3.0529160105594664e-06, "loss": 1.7987, "step": 32449500 }, { "epoch": 93.93, "learning_rate": 3.05219236291219e-06, "loss": 1.794, "step": 32450000 }, { "epoch": 93.93, "learning_rate": 3.051468715264913e-06, "loss": 1.778, "step": 32450500 }, { "epoch": 93.93, "learning_rate": 3.0507465149129307e-06, "loss": 1.7663, "step": 32451000 }, { "epoch": 93.93, "learning_rate": 3.050022867265654e-06, "loss": 1.7282, "step": 32451500 }, { "epoch": 93.94, "learning_rate": 3.0492992196183772e-06, "loss": 1.7682, "step": 32452000 }, { "epoch": 93.94, "learning_rate": 3.0485755719711003e-06, "loss": 1.7596, "step": 32452500 }, { "epoch": 93.94, "learning_rate": 3.047851924323824e-06, "loss": 1.7753, "step": 32453000 }, { "epoch": 93.94, "learning_rate": 3.0471297239718415e-06, "loss": 1.7652, "step": 32453500 }, { "epoch": 93.94, "learning_rate": 3.0464060763245646e-06, "loss": 1.7539, "step": 32454000 }, { "epoch": 93.94, "learning_rate": 3.045682428677288e-06, "loss": 1.7608, "step": 32454500 }, { "epoch": 93.94, "learning_rate": 3.0449602283253058e-06, "loss": 1.7603, "step": 32455000 }, { "epoch": 93.95, "learning_rate": 3.044236580678029e-06, "loss": 1.7817, "step": 32455500 }, { "epoch": 93.95, "learning_rate": 3.0435129330307523e-06, "loss": 1.744, "step": 32456000 }, { "epoch": 93.95, "learning_rate": 3.0427892853834754e-06, "loss": 1.7723, "step": 32456500 }, { "epoch": 93.95, "learning_rate": 3.042065637736199e-06, "loss": 1.7663, "step": 32457000 }, { "epoch": 93.95, "learning_rate": 3.041341990088922e-06, "loss": 1.7655, "step": 32457500 }, { "epoch": 93.95, "learning_rate": 3.040618342441645e-06, "loss": 1.7657, "step": 32458000 }, { "epoch": 93.95, "learning_rate": 3.039894694794368e-06, "loss": 1.7529, "step": 32458500 }, { "epoch": 93.96, "learning_rate": 3.0391724944423863e-06, "loss": 1.7918, "step": 32459000 }, { "epoch": 93.96, "learning_rate": 3.038450294090404e-06, "loss": 1.7491, "step": 32459500 }, { "epoch": 93.96, "learning_rate": 3.0377266464431274e-06, "loss": 1.7704, "step": 32460000 }, { "epoch": 93.96, "learning_rate": 3.0370029987958505e-06, "loss": 1.7646, "step": 32460500 }, { "epoch": 93.96, "learning_rate": 3.0362793511485736e-06, "loss": 1.7441, "step": 32461000 }, { "epoch": 93.96, "learning_rate": 3.0355557035012967e-06, "loss": 1.753, "step": 32461500 }, { "epoch": 93.96, "learning_rate": 3.03483205585402e-06, "loss": 1.7642, "step": 32462000 }, { "epoch": 93.97, "learning_rate": 3.034109855502038e-06, "loss": 1.7545, "step": 32462500 }, { "epoch": 93.97, "learning_rate": 3.033386207854761e-06, "loss": 1.7371, "step": 32463000 }, { "epoch": 93.97, "learning_rate": 3.0326625602074844e-06, "loss": 1.7785, "step": 32463500 }, { "epoch": 93.97, "learning_rate": 3.031938912560208e-06, "loss": 1.7554, "step": 32464000 }, { "epoch": 93.97, "learning_rate": 3.031215264912931e-06, "loss": 1.7843, "step": 32464500 }, { "epoch": 93.97, "learning_rate": 3.030491617265654e-06, "loss": 1.7669, "step": 32465000 }, { "epoch": 93.97, "learning_rate": 3.029767969618377e-06, "loss": 1.7441, "step": 32465500 }, { "epoch": 93.98, "learning_rate": 3.0290443219711003e-06, "loss": 1.777, "step": 32466000 }, { "epoch": 93.98, "learning_rate": 3.0283221216191184e-06, "loss": 1.7583, "step": 32466500 }, { "epoch": 93.98, "learning_rate": 3.0275984739718415e-06, "loss": 1.7603, "step": 32467000 }, { "epoch": 93.98, "learning_rate": 3.0268748263245645e-06, "loss": 1.7811, "step": 32467500 }, { "epoch": 93.98, "learning_rate": 3.026151178677288e-06, "loss": 1.7921, "step": 32468000 }, { "epoch": 93.98, "learning_rate": 3.0254275310300115e-06, "loss": 1.7679, "step": 32468500 }, { "epoch": 93.98, "learning_rate": 3.0247038833827346e-06, "loss": 1.7416, "step": 32469000 }, { "epoch": 93.99, "learning_rate": 3.0239816830307523e-06, "loss": 1.7597, "step": 32469500 }, { "epoch": 93.99, "learning_rate": 3.023258035383476e-06, "loss": 1.7785, "step": 32470000 }, { "epoch": 93.99, "learning_rate": 3.022534387736199e-06, "loss": 1.7719, "step": 32470500 }, { "epoch": 93.99, "learning_rate": 3.021810740088922e-06, "loss": 1.7471, "step": 32471000 }, { "epoch": 93.99, "learning_rate": 3.021087092441645e-06, "loss": 1.7696, "step": 32471500 }, { "epoch": 93.99, "learning_rate": 3.0203634447943685e-06, "loss": 1.7855, "step": 32472000 }, { "epoch": 93.99, "learning_rate": 3.0196412444423862e-06, "loss": 1.7609, "step": 32472500 }, { "epoch": 94.0, "learning_rate": 3.0189175967951093e-06, "loss": 1.7829, "step": 32473000 }, { "epoch": 94.0, "learning_rate": 3.018193949147833e-06, "loss": 1.7765, "step": 32473500 }, { "epoch": 94.0, "learning_rate": 3.017470301500556e-06, "loss": 1.7528, "step": 32474000 }, { "epoch": 94.0, "eval_accuracy": 0.6921448741011401, "eval_accuracy_mlm": 0.6621183487574016, "eval_accuracy_nsp": 0.8533293918686667, "eval_loss": 2.2147302627563477, "eval_runtime": 331.6976, "eval_samples_per_second": 1315.614, "eval_steps_per_second": 54.818, "step": 32474368 }, { "epoch": 94.0, "learning_rate": 3.016746653853279e-06, "loss": 1.7616, "step": 32474500 }, { "epoch": 94.0, "learning_rate": 3.016024453501297e-06, "loss": 1.7734, "step": 32475000 }, { "epoch": 94.0, "learning_rate": 3.01530080585402e-06, "loss": 1.7458, "step": 32475500 }, { "epoch": 94.0, "learning_rate": 3.0145771582067432e-06, "loss": 1.777, "step": 32476000 }, { "epoch": 94.01, "learning_rate": 3.0138535105594667e-06, "loss": 1.7721, "step": 32476500 }, { "epoch": 94.01, "learning_rate": 3.01312986291219e-06, "loss": 1.7724, "step": 32477000 }, { "epoch": 94.01, "learning_rate": 3.0124076625602075e-06, "loss": 1.7708, "step": 32477500 }, { "epoch": 94.01, "learning_rate": 3.011684014912931e-06, "loss": 1.7483, "step": 32478000 }, { "epoch": 94.01, "learning_rate": 3.0109618145609487e-06, "loss": 1.7307, "step": 32478500 }, { "epoch": 94.01, "learning_rate": 3.0102381669136717e-06, "loss": 1.7678, "step": 32479000 }, { "epoch": 94.01, "learning_rate": 3.0095145192663952e-06, "loss": 1.7561, "step": 32479500 }, { "epoch": 94.02, "learning_rate": 3.0087908716191183e-06, "loss": 1.7731, "step": 32480000 }, { "epoch": 94.02, "learning_rate": 3.0080672239718414e-06, "loss": 1.7764, "step": 32480500 }, { "epoch": 94.02, "learning_rate": 3.0073450236198595e-06, "loss": 1.7652, "step": 32481000 }, { "epoch": 94.02, "learning_rate": 3.0066213759725826e-06, "loss": 1.7412, "step": 32481500 }, { "epoch": 94.02, "learning_rate": 3.0058977283253057e-06, "loss": 1.7733, "step": 32482000 }, { "epoch": 94.02, "learning_rate": 3.005174080678029e-06, "loss": 1.7577, "step": 32482500 }, { "epoch": 94.02, "learning_rate": 3.0044504330307522e-06, "loss": 1.766, "step": 32483000 }, { "epoch": 94.03, "learning_rate": 3.0037267853834753e-06, "loss": 1.7738, "step": 32483500 }, { "epoch": 94.03, "learning_rate": 3.003003137736199e-06, "loss": 1.7458, "step": 32484000 }, { "epoch": 94.03, "learning_rate": 3.002279490088922e-06, "loss": 1.7704, "step": 32484500 }, { "epoch": 94.03, "learning_rate": 3.0015558424416454e-06, "loss": 1.7464, "step": 32485000 }, { "epoch": 94.03, "learning_rate": 3.000833642089663e-06, "loss": 1.7784, "step": 32485500 }, { "epoch": 94.03, "learning_rate": 3.000109994442386e-06, "loss": 1.755, "step": 32486000 }, { "epoch": 94.04, "learning_rate": 2.999387794090404e-06, "loss": 1.7591, "step": 32486500 }, { "epoch": 94.04, "learning_rate": 2.998664146443127e-06, "loss": 1.7522, "step": 32487000 }, { "epoch": 94.04, "learning_rate": 2.9979404987958504e-06, "loss": 1.774, "step": 32487500 }, { "epoch": 94.04, "learning_rate": 2.997216851148574e-06, "loss": 1.7767, "step": 32488000 }, { "epoch": 94.04, "learning_rate": 2.996493203501297e-06, "loss": 1.7729, "step": 32488500 }, { "epoch": 94.04, "learning_rate": 2.99576955585402e-06, "loss": 1.7668, "step": 32489000 }, { "epoch": 94.04, "learning_rate": 2.995045908206743e-06, "loss": 1.7762, "step": 32489500 }, { "epoch": 94.05, "learning_rate": 2.9943222605594667e-06, "loss": 1.7584, "step": 32490000 }, { "epoch": 94.05, "learning_rate": 2.99359861291219e-06, "loss": 1.7866, "step": 32490500 }, { "epoch": 94.05, "learning_rate": 2.9928778598555024e-06, "loss": 1.7945, "step": 32491000 }, { "epoch": 94.05, "learning_rate": 2.9921542122082255e-06, "loss": 1.7549, "step": 32491500 }, { "epoch": 94.05, "learning_rate": 2.9914305645609486e-06, "loss": 1.7618, "step": 32492000 }, { "epoch": 94.05, "learning_rate": 2.9907069169136717e-06, "loss": 1.7701, "step": 32492500 }, { "epoch": 94.05, "learning_rate": 2.9899832692663948e-06, "loss": 1.7396, "step": 32493000 }, { "epoch": 94.06, "learning_rate": 2.989261068914413e-06, "loss": 1.7794, "step": 32493500 }, { "epoch": 94.06, "learning_rate": 2.988537421267136e-06, "loss": 1.7679, "step": 32494000 }, { "epoch": 94.06, "learning_rate": 2.987813773619859e-06, "loss": 1.789, "step": 32494500 }, { "epoch": 94.06, "learning_rate": 2.987090125972583e-06, "loss": 1.7786, "step": 32495000 }, { "epoch": 94.06, "learning_rate": 2.986366478325306e-06, "loss": 1.7786, "step": 32495500 }, { "epoch": 94.06, "learning_rate": 2.985642830678029e-06, "loss": 1.7595, "step": 32496000 }, { "epoch": 94.06, "learning_rate": 2.984919183030752e-06, "loss": 1.7261, "step": 32496500 }, { "epoch": 94.07, "learning_rate": 2.9841955353834753e-06, "loss": 1.7606, "step": 32497000 }, { "epoch": 94.07, "learning_rate": 2.9834718877361988e-06, "loss": 1.7881, "step": 32497500 }, { "epoch": 94.07, "learning_rate": 2.9827496873842164e-06, "loss": 1.7765, "step": 32498000 }, { "epoch": 94.07, "learning_rate": 2.9820260397369395e-06, "loss": 1.763, "step": 32498500 }, { "epoch": 94.07, "learning_rate": 2.981302392089663e-06, "loss": 1.7468, "step": 32499000 }, { "epoch": 94.07, "learning_rate": 2.9805801917376807e-06, "loss": 1.7868, "step": 32499500 }, { "epoch": 94.07, "learning_rate": 2.9798565440904038e-06, "loss": 1.792, "step": 32500000 }, { "epoch": 94.08, "learning_rate": 2.9791328964431273e-06, "loss": 1.779, "step": 32500500 }, { "epoch": 94.08, "learning_rate": 2.978410696091145e-06, "loss": 1.7628, "step": 32501000 }, { "epoch": 94.08, "learning_rate": 2.977687048443868e-06, "loss": 1.7604, "step": 32501500 }, { "epoch": 94.08, "learning_rate": 2.9769634007965915e-06, "loss": 1.7546, "step": 32502000 }, { "epoch": 94.08, "learning_rate": 2.9762397531493146e-06, "loss": 1.7815, "step": 32502500 }, { "epoch": 94.08, "learning_rate": 2.975516105502038e-06, "loss": 1.7546, "step": 32503000 }, { "epoch": 94.08, "learning_rate": 2.9747924578547612e-06, "loss": 1.7707, "step": 32503500 }, { "epoch": 94.09, "learning_rate": 2.9740688102074843e-06, "loss": 1.7326, "step": 32504000 }, { "epoch": 94.09, "learning_rate": 2.973345162560208e-06, "loss": 1.7511, "step": 32504500 }, { "epoch": 94.09, "learning_rate": 2.972621514912931e-06, "loss": 1.7621, "step": 32505000 }, { "epoch": 94.09, "learning_rate": 2.971897867265654e-06, "loss": 1.7566, "step": 32505500 }, { "epoch": 94.09, "learning_rate": 2.9711742196183775e-06, "loss": 1.7533, "step": 32506000 }, { "epoch": 94.09, "learning_rate": 2.970452019266395e-06, "loss": 1.7468, "step": 32506500 }, { "epoch": 94.09, "learning_rate": 2.9697283716191182e-06, "loss": 1.7598, "step": 32507000 }, { "epoch": 94.1, "learning_rate": 2.9690047239718417e-06, "loss": 1.7918, "step": 32507500 }, { "epoch": 94.1, "learning_rate": 2.968281076324565e-06, "loss": 1.7924, "step": 32508000 }, { "epoch": 94.1, "learning_rate": 2.967557428677288e-06, "loss": 1.7793, "step": 32508500 }, { "epoch": 94.1, "learning_rate": 2.9668337810300114e-06, "loss": 1.7417, "step": 32509000 }, { "epoch": 94.1, "learning_rate": 2.9661101333827345e-06, "loss": 1.7477, "step": 32509500 }, { "epoch": 94.1, "learning_rate": 2.9653864857354575e-06, "loss": 1.7881, "step": 32510000 }, { "epoch": 94.1, "learning_rate": 2.9646628380881806e-06, "loss": 1.7469, "step": 32510500 }, { "epoch": 94.11, "learning_rate": 2.9639406377361987e-06, "loss": 1.7723, "step": 32511000 }, { "epoch": 94.11, "learning_rate": 2.963216990088922e-06, "loss": 1.7599, "step": 32511500 }, { "epoch": 94.11, "learning_rate": 2.962493342441645e-06, "loss": 1.7579, "step": 32512000 }, { "epoch": 94.11, "learning_rate": 2.9617696947943684e-06, "loss": 1.7516, "step": 32512500 }, { "epoch": 94.11, "learning_rate": 2.961046047147092e-06, "loss": 1.7654, "step": 32513000 }, { "epoch": 94.11, "learning_rate": 2.960322399499815e-06, "loss": 1.7355, "step": 32513500 }, { "epoch": 94.11, "learning_rate": 2.959598751852538e-06, "loss": 1.7679, "step": 32514000 }, { "epoch": 94.12, "learning_rate": 2.958876551500556e-06, "loss": 1.7705, "step": 32514500 }, { "epoch": 94.12, "learning_rate": 2.9581543511485734e-06, "loss": 1.7672, "step": 32515000 }, { "epoch": 94.12, "learning_rate": 2.957430703501297e-06, "loss": 1.754, "step": 32515500 }, { "epoch": 94.12, "learning_rate": 2.9567070558540204e-06, "loss": 1.7916, "step": 32516000 }, { "epoch": 94.12, "learning_rate": 2.9559834082067435e-06, "loss": 1.7549, "step": 32516500 }, { "epoch": 94.12, "learning_rate": 2.9552597605594666e-06, "loss": 1.7352, "step": 32517000 }, { "epoch": 94.12, "learning_rate": 2.9545361129121896e-06, "loss": 1.7777, "step": 32517500 }, { "epoch": 94.13, "learning_rate": 2.953812465264913e-06, "loss": 1.7437, "step": 32518000 }, { "epoch": 94.13, "learning_rate": 2.9530888176176362e-06, "loss": 1.7296, "step": 32518500 }, { "epoch": 94.13, "learning_rate": 2.9523651699703597e-06, "loss": 1.7546, "step": 32519000 }, { "epoch": 94.13, "learning_rate": 2.9516429696183774e-06, "loss": 1.7502, "step": 32519500 }, { "epoch": 94.13, "learning_rate": 2.9509193219711005e-06, "loss": 1.7866, "step": 32520000 }, { "epoch": 94.13, "learning_rate": 2.950195674323824e-06, "loss": 1.7721, "step": 32520500 }, { "epoch": 94.13, "learning_rate": 2.949472026676547e-06, "loss": 1.7631, "step": 32521000 }, { "epoch": 94.14, "learning_rate": 2.94874837902927e-06, "loss": 1.7465, "step": 32521500 }, { "epoch": 94.14, "learning_rate": 2.9480261786772883e-06, "loss": 1.771, "step": 32522000 }, { "epoch": 94.14, "learning_rate": 2.9473025310300113e-06, "loss": 1.775, "step": 32522500 }, { "epoch": 94.14, "learning_rate": 2.9465788833827344e-06, "loss": 1.7683, "step": 32523000 }, { "epoch": 94.14, "learning_rate": 2.9458552357354575e-06, "loss": 1.7777, "step": 32523500 }, { "epoch": 94.14, "learning_rate": 2.9451330353834756e-06, "loss": 1.756, "step": 32524000 }, { "epoch": 94.15, "learning_rate": 2.9444093877361987e-06, "loss": 1.7803, "step": 32524500 }, { "epoch": 94.15, "learning_rate": 2.9436857400889218e-06, "loss": 1.7578, "step": 32525000 }, { "epoch": 94.15, "learning_rate": 2.9429620924416453e-06, "loss": 1.7496, "step": 32525500 }, { "epoch": 94.15, "learning_rate": 2.942239892089663e-06, "loss": 1.749, "step": 32526000 }, { "epoch": 94.15, "learning_rate": 2.941516244442386e-06, "loss": 1.7446, "step": 32526500 }, { "epoch": 94.15, "learning_rate": 2.9407925967951095e-06, "loss": 1.7658, "step": 32527000 }, { "epoch": 94.15, "learning_rate": 2.940070396443127e-06, "loss": 1.7362, "step": 32527500 }, { "epoch": 94.16, "learning_rate": 2.9393467487958503e-06, "loss": 1.7574, "step": 32528000 }, { "epoch": 94.16, "learning_rate": 2.9386231011485738e-06, "loss": 1.7564, "step": 32528500 }, { "epoch": 94.16, "learning_rate": 2.937899453501297e-06, "loss": 1.7561, "step": 32529000 }, { "epoch": 94.16, "learning_rate": 2.9371758058540204e-06, "loss": 1.7722, "step": 32529500 }, { "epoch": 94.16, "learning_rate": 2.9364521582067434e-06, "loss": 1.7439, "step": 32530000 }, { "epoch": 94.16, "learning_rate": 2.9357285105594665e-06, "loss": 1.748, "step": 32530500 }, { "epoch": 94.16, "learning_rate": 2.93500486291219e-06, "loss": 1.7675, "step": 32531000 }, { "epoch": 94.17, "learning_rate": 2.9342826625602077e-06, "loss": 1.7447, "step": 32531500 }, { "epoch": 94.17, "learning_rate": 2.9335590149129308e-06, "loss": 1.7546, "step": 32532000 }, { "epoch": 94.17, "learning_rate": 2.9328353672656543e-06, "loss": 1.7905, "step": 32532500 }, { "epoch": 94.17, "learning_rate": 2.9321117196183774e-06, "loss": 1.7689, "step": 32533000 }, { "epoch": 94.17, "learning_rate": 2.9313880719711004e-06, "loss": 1.7632, "step": 32533500 }, { "epoch": 94.17, "learning_rate": 2.9306644243238235e-06, "loss": 1.7522, "step": 32534000 }, { "epoch": 94.17, "learning_rate": 2.9299422239718416e-06, "loss": 1.7702, "step": 32534500 }, { "epoch": 94.18, "learning_rate": 2.9292185763245647e-06, "loss": 1.7643, "step": 32535000 }, { "epoch": 94.18, "learning_rate": 2.9284949286772878e-06, "loss": 1.7723, "step": 32535500 }, { "epoch": 94.18, "learning_rate": 2.9277712810300113e-06, "loss": 1.7436, "step": 32536000 }, { "epoch": 94.18, "learning_rate": 2.927049080678029e-06, "loss": 1.7427, "step": 32536500 }, { "epoch": 94.18, "learning_rate": 2.926325433030752e-06, "loss": 1.7889, "step": 32537000 }, { "epoch": 94.18, "learning_rate": 2.9256017853834755e-06, "loss": 1.757, "step": 32537500 }, { "epoch": 94.18, "learning_rate": 2.9248781377361986e-06, "loss": 1.7802, "step": 32538000 }, { "epoch": 94.19, "learning_rate": 2.924154490088922e-06, "loss": 1.7831, "step": 32538500 }, { "epoch": 94.19, "learning_rate": 2.923430842441645e-06, "loss": 1.7607, "step": 32539000 }, { "epoch": 94.19, "learning_rate": 2.922708642089663e-06, "loss": 1.799, "step": 32539500 }, { "epoch": 94.19, "learning_rate": 2.9219849944423864e-06, "loss": 1.7519, "step": 32540000 }, { "epoch": 94.19, "learning_rate": 2.9212613467951095e-06, "loss": 1.7584, "step": 32540500 }, { "epoch": 94.19, "learning_rate": 2.9205376991478325e-06, "loss": 1.7575, "step": 32541000 }, { "epoch": 94.19, "learning_rate": 2.9198140515005556e-06, "loss": 1.7595, "step": 32541500 }, { "epoch": 94.2, "learning_rate": 2.9190918511485737e-06, "loss": 1.7768, "step": 32542000 }, { "epoch": 94.2, "learning_rate": 2.918368203501297e-06, "loss": 1.7342, "step": 32542500 }, { "epoch": 94.2, "learning_rate": 2.91764455585402e-06, "loss": 1.764, "step": 32543000 }, { "epoch": 94.2, "learning_rate": 2.9169209082067434e-06, "loss": 1.8116, "step": 32543500 }, { "epoch": 94.2, "learning_rate": 2.916197260559467e-06, "loss": 1.7661, "step": 32544000 }, { "epoch": 94.2, "learning_rate": 2.91547361291219e-06, "loss": 1.7654, "step": 32544500 }, { "epoch": 94.2, "learning_rate": 2.914749965264913e-06, "loss": 1.7599, "step": 32545000 }, { "epoch": 94.21, "learning_rate": 2.914026317617636e-06, "loss": 1.7627, "step": 32545500 }, { "epoch": 94.21, "learning_rate": 2.9133026699703596e-06, "loss": 1.7452, "step": 32546000 }, { "epoch": 94.21, "learning_rate": 2.9125804696183773e-06, "loss": 1.7762, "step": 32546500 }, { "epoch": 94.21, "learning_rate": 2.9118582692663954e-06, "loss": 1.7617, "step": 32547000 }, { "epoch": 94.21, "learning_rate": 2.9111346216191185e-06, "loss": 1.7638, "step": 32547500 }, { "epoch": 94.21, "learning_rate": 2.9104109739718416e-06, "loss": 1.7714, "step": 32548000 }, { "epoch": 94.21, "learning_rate": 2.9096873263245646e-06, "loss": 1.7486, "step": 32548500 }, { "epoch": 94.22, "learning_rate": 2.908963678677288e-06, "loss": 1.766, "step": 32549000 }, { "epoch": 94.22, "learning_rate": 2.908241478325306e-06, "loss": 1.7749, "step": 32549500 }, { "epoch": 94.22, "learning_rate": 2.907517830678029e-06, "loss": 1.755, "step": 32550000 }, { "epoch": 94.22, "learning_rate": 2.906794183030752e-06, "loss": 1.7641, "step": 32550500 }, { "epoch": 94.22, "learning_rate": 2.9060705353834755e-06, "loss": 1.7568, "step": 32551000 }, { "epoch": 94.22, "learning_rate": 2.905348335031493e-06, "loss": 1.7511, "step": 32551500 }, { "epoch": 94.22, "learning_rate": 2.9046246873842162e-06, "loss": 1.7546, "step": 32552000 }, { "epoch": 94.23, "learning_rate": 2.9039010397369397e-06, "loss": 1.7542, "step": 32552500 }, { "epoch": 94.23, "learning_rate": 2.9031788393849574e-06, "loss": 1.7699, "step": 32553000 }, { "epoch": 94.23, "learning_rate": 2.9024551917376805e-06, "loss": 1.7509, "step": 32553500 }, { "epoch": 94.23, "learning_rate": 2.901731544090404e-06, "loss": 1.7695, "step": 32554000 }, { "epoch": 94.23, "learning_rate": 2.901007896443127e-06, "loss": 1.7837, "step": 32554500 }, { "epoch": 94.23, "learning_rate": 2.9002842487958506e-06, "loss": 1.7548, "step": 32555000 }, { "epoch": 94.23, "learning_rate": 2.8995606011485737e-06, "loss": 1.781, "step": 32555500 }, { "epoch": 94.24, "learning_rate": 2.8988369535012968e-06, "loss": 1.7673, "step": 32556000 }, { "epoch": 94.24, "learning_rate": 2.8981133058540203e-06, "loss": 1.7621, "step": 32556500 }, { "epoch": 94.24, "learning_rate": 2.8973896582067433e-06, "loss": 1.7621, "step": 32557000 }, { "epoch": 94.24, "learning_rate": 2.8966660105594664e-06, "loss": 1.7759, "step": 32557500 }, { "epoch": 94.24, "learning_rate": 2.895945257502779e-06, "loss": 1.7825, "step": 32558000 }, { "epoch": 94.24, "learning_rate": 2.895221609855502e-06, "loss": 1.7468, "step": 32558500 }, { "epoch": 94.24, "learning_rate": 2.8944979622082253e-06, "loss": 1.7908, "step": 32559000 }, { "epoch": 94.25, "learning_rate": 2.8937757618562434e-06, "loss": 1.7765, "step": 32559500 }, { "epoch": 94.25, "learning_rate": 2.8930521142089664e-06, "loss": 1.767, "step": 32560000 }, { "epoch": 94.25, "learning_rate": 2.8923284665616895e-06, "loss": 1.7534, "step": 32560500 }, { "epoch": 94.25, "learning_rate": 2.891604818914413e-06, "loss": 1.7798, "step": 32561000 }, { "epoch": 94.25, "learning_rate": 2.890881171267136e-06, "loss": 1.768, "step": 32561500 }, { "epoch": 94.25, "learning_rate": 2.890157523619859e-06, "loss": 1.7639, "step": 32562000 }, { "epoch": 94.26, "learning_rate": 2.8894338759725827e-06, "loss": 1.7613, "step": 32562500 }, { "epoch": 94.26, "learning_rate": 2.8887102283253058e-06, "loss": 1.7659, "step": 32563000 }, { "epoch": 94.26, "learning_rate": 2.8879880279733234e-06, "loss": 1.7564, "step": 32563500 }, { "epoch": 94.26, "learning_rate": 2.887264380326047e-06, "loss": 1.7534, "step": 32564000 }, { "epoch": 94.26, "learning_rate": 2.88654073267877e-06, "loss": 1.7591, "step": 32564500 }, { "epoch": 94.26, "learning_rate": 2.8858170850314935e-06, "loss": 1.7912, "step": 32565000 }, { "epoch": 94.26, "learning_rate": 2.8850934373842166e-06, "loss": 1.7601, "step": 32565500 }, { "epoch": 94.27, "learning_rate": 2.8843697897369397e-06, "loss": 1.7568, "step": 32566000 }, { "epoch": 94.27, "learning_rate": 2.8836461420896628e-06, "loss": 1.7473, "step": 32566500 }, { "epoch": 94.27, "learning_rate": 2.882923941737681e-06, "loss": 1.7434, "step": 32567000 }, { "epoch": 94.27, "learning_rate": 2.882200294090404e-06, "loss": 1.7846, "step": 32567500 }, { "epoch": 94.27, "learning_rate": 2.881476646443127e-06, "loss": 1.7542, "step": 32568000 }, { "epoch": 94.27, "learning_rate": 2.88075299879585e-06, "loss": 1.7825, "step": 32568500 }, { "epoch": 94.27, "learning_rate": 2.8800293511485736e-06, "loss": 1.7757, "step": 32569000 }, { "epoch": 94.28, "learning_rate": 2.8793071507965913e-06, "loss": 1.7833, "step": 32569500 }, { "epoch": 94.28, "learning_rate": 2.8785835031493144e-06, "loss": 1.7612, "step": 32570000 }, { "epoch": 94.28, "learning_rate": 2.877859855502038e-06, "loss": 1.7646, "step": 32570500 }, { "epoch": 94.28, "learning_rate": 2.8771362078547614e-06, "loss": 1.7681, "step": 32571000 }, { "epoch": 94.28, "learning_rate": 2.8764125602074845e-06, "loss": 1.7321, "step": 32571500 }, { "epoch": 94.28, "learning_rate": 2.875690359855502e-06, "loss": 1.7792, "step": 32572000 }, { "epoch": 94.28, "learning_rate": 2.8749667122082256e-06, "loss": 1.7537, "step": 32572500 }, { "epoch": 94.29, "learning_rate": 2.8742430645609487e-06, "loss": 1.7776, "step": 32573000 }, { "epoch": 94.29, "learning_rate": 2.873519416913672e-06, "loss": 1.7643, "step": 32573500 }, { "epoch": 94.29, "learning_rate": 2.872795769266395e-06, "loss": 1.7531, "step": 32574000 }, { "epoch": 94.29, "learning_rate": 2.8720721216191184e-06, "loss": 1.7346, "step": 32574500 }, { "epoch": 94.29, "learning_rate": 2.871348473971842e-06, "loss": 1.767, "step": 32575000 }, { "epoch": 94.29, "learning_rate": 2.870624826324565e-06, "loss": 1.7629, "step": 32575500 }, { "epoch": 94.29, "learning_rate": 2.869901178677288e-06, "loss": 1.7834, "step": 32576000 }, { "epoch": 94.3, "learning_rate": 2.869177531030011e-06, "loss": 1.7642, "step": 32576500 }, { "epoch": 94.3, "learning_rate": 2.8684553306780292e-06, "loss": 1.7684, "step": 32577000 }, { "epoch": 94.3, "learning_rate": 2.8677316830307523e-06, "loss": 1.7462, "step": 32577500 }, { "epoch": 94.3, "learning_rate": 2.8670080353834754e-06, "loss": 1.7576, "step": 32578000 }, { "epoch": 94.3, "learning_rate": 2.866284387736199e-06, "loss": 1.7803, "step": 32578500 }, { "epoch": 94.3, "learning_rate": 2.8655621873842166e-06, "loss": 1.7645, "step": 32579000 }, { "epoch": 94.3, "learning_rate": 2.8648385397369396e-06, "loss": 1.7684, "step": 32579500 }, { "epoch": 94.31, "learning_rate": 2.864114892089663e-06, "loss": 1.7597, "step": 32580000 }, { "epoch": 94.31, "learning_rate": 2.8633912444423862e-06, "loss": 1.7888, "step": 32580500 }, { "epoch": 94.31, "learning_rate": 2.8626675967951093e-06, "loss": 1.7517, "step": 32581000 }, { "epoch": 94.31, "learning_rate": 2.861943949147833e-06, "loss": 1.7888, "step": 32581500 }, { "epoch": 94.31, "learning_rate": 2.8612217487958505e-06, "loss": 1.7672, "step": 32582000 }, { "epoch": 94.31, "learning_rate": 2.8604981011485736e-06, "loss": 1.7491, "step": 32582500 }, { "epoch": 94.31, "learning_rate": 2.859774453501297e-06, "loss": 1.75, "step": 32583000 }, { "epoch": 94.32, "learning_rate": 2.8590522531493147e-06, "loss": 1.7353, "step": 32583500 }, { "epoch": 94.32, "learning_rate": 2.858328605502038e-06, "loss": 1.7613, "step": 32584000 }, { "epoch": 94.32, "learning_rate": 2.8576049578547613e-06, "loss": 1.7788, "step": 32584500 }, { "epoch": 94.32, "learning_rate": 2.8568813102074844e-06, "loss": 1.753, "step": 32585000 }, { "epoch": 94.32, "learning_rate": 2.8561576625602075e-06, "loss": 1.7476, "step": 32585500 }, { "epoch": 94.32, "learning_rate": 2.855434014912931e-06, "loss": 1.7778, "step": 32586000 }, { "epoch": 94.32, "learning_rate": 2.854710367265654e-06, "loss": 1.7472, "step": 32586500 }, { "epoch": 94.33, "learning_rate": 2.853986719618377e-06, "loss": 1.767, "step": 32587000 }, { "epoch": 94.33, "learning_rate": 2.8532645192663953e-06, "loss": 1.7607, "step": 32587500 }, { "epoch": 94.33, "learning_rate": 2.8525408716191183e-06, "loss": 1.7714, "step": 32588000 }, { "epoch": 94.33, "learning_rate": 2.851818671267136e-06, "loss": 1.7535, "step": 32588500 }, { "epoch": 94.33, "learning_rate": 2.8510950236198595e-06, "loss": 1.7839, "step": 32589000 }, { "epoch": 94.33, "learning_rate": 2.8503713759725826e-06, "loss": 1.7893, "step": 32589500 }, { "epoch": 94.33, "learning_rate": 2.8496477283253057e-06, "loss": 1.7556, "step": 32590000 }, { "epoch": 94.34, "learning_rate": 2.8489240806780288e-06, "loss": 1.7393, "step": 32590500 }, { "epoch": 94.34, "learning_rate": 2.8482004330307523e-06, "loss": 1.7543, "step": 32591000 }, { "epoch": 94.34, "learning_rate": 2.8474767853834758e-06, "loss": 1.783, "step": 32591500 }, { "epoch": 94.34, "learning_rate": 2.846753137736199e-06, "loss": 1.7481, "step": 32592000 }, { "epoch": 94.34, "learning_rate": 2.8460309373842165e-06, "loss": 1.7666, "step": 32592500 }, { "epoch": 94.34, "learning_rate": 2.84530728973694e-06, "loss": 1.7663, "step": 32593000 }, { "epoch": 94.34, "learning_rate": 2.844583642089663e-06, "loss": 1.7658, "step": 32593500 }, { "epoch": 94.35, "learning_rate": 2.843859994442386e-06, "loss": 1.755, "step": 32594000 }, { "epoch": 94.35, "learning_rate": 2.8431363467951093e-06, "loss": 1.7387, "step": 32594500 }, { "epoch": 94.35, "learning_rate": 2.8424141464431274e-06, "loss": 1.7907, "step": 32595000 }, { "epoch": 94.35, "learning_rate": 2.8416904987958504e-06, "loss": 1.7643, "step": 32595500 }, { "epoch": 94.35, "learning_rate": 2.8409668511485735e-06, "loss": 1.7609, "step": 32596000 }, { "epoch": 94.35, "learning_rate": 2.8402432035012966e-06, "loss": 1.7898, "step": 32596500 }, { "epoch": 94.35, "learning_rate": 2.8395210031493147e-06, "loss": 1.7865, "step": 32597000 }, { "epoch": 94.36, "learning_rate": 2.8387973555020378e-06, "loss": 1.7355, "step": 32597500 }, { "epoch": 94.36, "learning_rate": 2.838075155150056e-06, "loss": 1.7711, "step": 32598000 }, { "epoch": 94.36, "learning_rate": 2.837351507502779e-06, "loss": 1.7638, "step": 32598500 }, { "epoch": 94.36, "learning_rate": 2.836627859855502e-06, "loss": 1.7513, "step": 32599000 }, { "epoch": 94.36, "learning_rate": 2.835904212208225e-06, "loss": 1.7723, "step": 32599500 }, { "epoch": 94.36, "learning_rate": 2.8351805645609486e-06, "loss": 1.768, "step": 32600000 }, { "epoch": 94.37, "learning_rate": 2.834456916913672e-06, "loss": 1.7656, "step": 32600500 }, { "epoch": 94.37, "learning_rate": 2.833733269266395e-06, "loss": 1.7392, "step": 32601000 }, { "epoch": 94.37, "learning_rate": 2.8330096216191183e-06, "loss": 1.772, "step": 32601500 }, { "epoch": 94.37, "learning_rate": 2.8322859739718414e-06, "loss": 1.765, "step": 32602000 }, { "epoch": 94.37, "learning_rate": 2.8315637736198595e-06, "loss": 1.768, "step": 32602500 }, { "epoch": 94.37, "learning_rate": 2.8308401259725825e-06, "loss": 1.7534, "step": 32603000 }, { "epoch": 94.37, "learning_rate": 2.8301164783253056e-06, "loss": 1.7535, "step": 32603500 }, { "epoch": 94.38, "learning_rate": 2.829392830678029e-06, "loss": 1.7861, "step": 32604000 }, { "epoch": 94.38, "learning_rate": 2.828670630326047e-06, "loss": 1.7615, "step": 32604500 }, { "epoch": 94.38, "learning_rate": 2.82794698267877e-06, "loss": 1.7372, "step": 32605000 }, { "epoch": 94.38, "learning_rate": 2.8272233350314934e-06, "loss": 1.7808, "step": 32605500 }, { "epoch": 94.38, "learning_rate": 2.8264996873842165e-06, "loss": 1.7466, "step": 32606000 }, { "epoch": 94.38, "learning_rate": 2.825777487032234e-06, "loss": 1.7837, "step": 32606500 }, { "epoch": 94.38, "learning_rate": 2.8250538393849576e-06, "loss": 1.7487, "step": 32607000 }, { "epoch": 94.39, "learning_rate": 2.8243301917376807e-06, "loss": 1.768, "step": 32607500 }, { "epoch": 94.39, "learning_rate": 2.8236065440904042e-06, "loss": 1.7663, "step": 32608000 }, { "epoch": 94.39, "learning_rate": 2.8228828964431273e-06, "loss": 1.8024, "step": 32608500 }, { "epoch": 94.39, "learning_rate": 2.822160696091145e-06, "loss": 1.7777, "step": 32609000 }, { "epoch": 94.39, "learning_rate": 2.8214370484438685e-06, "loss": 1.7436, "step": 32609500 }, { "epoch": 94.39, "learning_rate": 2.8207134007965916e-06, "loss": 1.7703, "step": 32610000 }, { "epoch": 94.39, "learning_rate": 2.8199897531493146e-06, "loss": 1.7406, "step": 32610500 }, { "epoch": 94.4, "learning_rate": 2.8192661055020377e-06, "loss": 1.7645, "step": 32611000 }, { "epoch": 94.4, "learning_rate": 2.8185424578547612e-06, "loss": 1.752, "step": 32611500 }, { "epoch": 94.4, "learning_rate": 2.8178188102074843e-06, "loss": 1.7545, "step": 32612000 }, { "epoch": 94.4, "learning_rate": 2.817096609855502e-06, "loss": 1.7911, "step": 32612500 }, { "epoch": 94.4, "learning_rate": 2.8163729622082255e-06, "loss": 1.767, "step": 32613000 }, { "epoch": 94.4, "learning_rate": 2.8156493145609486e-06, "loss": 1.7806, "step": 32613500 }, { "epoch": 94.4, "learning_rate": 2.8149256669136716e-06, "loss": 1.7665, "step": 32614000 }, { "epoch": 94.41, "learning_rate": 2.8142034665616897e-06, "loss": 1.7693, "step": 32614500 }, { "epoch": 94.41, "learning_rate": 2.813479818914413e-06, "loss": 1.7604, "step": 32615000 }, { "epoch": 94.41, "learning_rate": 2.812756171267136e-06, "loss": 1.7639, "step": 32615500 }, { "epoch": 94.41, "learning_rate": 2.8120325236198594e-06, "loss": 1.7548, "step": 32616000 }, { "epoch": 94.41, "learning_rate": 2.8113088759725825e-06, "loss": 1.7512, "step": 32616500 }, { "epoch": 94.41, "learning_rate": 2.8105866756206e-06, "loss": 1.7458, "step": 32617000 }, { "epoch": 94.41, "learning_rate": 2.8098630279733237e-06, "loss": 1.7558, "step": 32617500 }, { "epoch": 94.42, "learning_rate": 2.8091393803260467e-06, "loss": 1.7439, "step": 32618000 }, { "epoch": 94.42, "learning_rate": 2.8084157326787702e-06, "loss": 1.7734, "step": 32618500 }, { "epoch": 94.42, "learning_rate": 2.807693532326788e-06, "loss": 1.77, "step": 32619000 }, { "epoch": 94.42, "learning_rate": 2.806969884679511e-06, "loss": 1.7471, "step": 32619500 }, { "epoch": 94.42, "learning_rate": 2.8062462370322345e-06, "loss": 1.7793, "step": 32620000 }, { "epoch": 94.42, "learning_rate": 2.8055225893849576e-06, "loss": 1.7492, "step": 32620500 }, { "epoch": 94.42, "learning_rate": 2.8048003890329753e-06, "loss": 1.7602, "step": 32621000 }, { "epoch": 94.43, "learning_rate": 2.8040767413856988e-06, "loss": 1.7613, "step": 32621500 }, { "epoch": 94.43, "learning_rate": 2.8033545410337164e-06, "loss": 1.7484, "step": 32622000 }, { "epoch": 94.43, "learning_rate": 2.8026308933864395e-06, "loss": 1.7351, "step": 32622500 }, { "epoch": 94.43, "learning_rate": 2.801907245739163e-06, "loss": 1.7771, "step": 32623000 }, { "epoch": 94.43, "learning_rate": 2.801183598091886e-06, "loss": 1.7784, "step": 32623500 }, { "epoch": 94.43, "learning_rate": 2.800459950444609e-06, "loss": 1.7688, "step": 32624000 }, { "epoch": 94.43, "learning_rate": 2.7997363027973323e-06, "loss": 1.7699, "step": 32624500 }, { "epoch": 94.44, "learning_rate": 2.7990126551500553e-06, "loss": 1.7705, "step": 32625000 }, { "epoch": 94.44, "learning_rate": 2.7982890075027793e-06, "loss": 1.7715, "step": 32625500 }, { "epoch": 94.44, "learning_rate": 2.7975653598555024e-06, "loss": 1.7298, "step": 32626000 }, { "epoch": 94.44, "learning_rate": 2.7968417122082254e-06, "loss": 1.7449, "step": 32626500 }, { "epoch": 94.44, "learning_rate": 2.7961180645609485e-06, "loss": 1.7466, "step": 32627000 }, { "epoch": 94.44, "learning_rate": 2.7953944169136716e-06, "loss": 1.7697, "step": 32627500 }, { "epoch": 94.44, "learning_rate": 2.794670769266395e-06, "loss": 1.7574, "step": 32628000 }, { "epoch": 94.45, "learning_rate": 2.7939471216191186e-06, "loss": 1.7731, "step": 32628500 }, { "epoch": 94.45, "learning_rate": 2.793224921267136e-06, "loss": 1.7552, "step": 32629000 }, { "epoch": 94.45, "learning_rate": 2.7925012736198594e-06, "loss": 1.7791, "step": 32629500 }, { "epoch": 94.45, "learning_rate": 2.791777625972583e-06, "loss": 1.7714, "step": 32630000 }, { "epoch": 94.45, "learning_rate": 2.791053978325306e-06, "loss": 1.7605, "step": 32630500 }, { "epoch": 94.45, "learning_rate": 2.7903317779733236e-06, "loss": 1.7862, "step": 32631000 }, { "epoch": 94.45, "learning_rate": 2.789608130326047e-06, "loss": 1.7554, "step": 32631500 }, { "epoch": 94.46, "learning_rate": 2.78888448267877e-06, "loss": 1.7487, "step": 32632000 }, { "epoch": 94.46, "learning_rate": 2.788162282326788e-06, "loss": 1.7783, "step": 32632500 }, { "epoch": 94.46, "learning_rate": 2.7874386346795114e-06, "loss": 1.7495, "step": 32633000 }, { "epoch": 94.46, "learning_rate": 2.7867149870322345e-06, "loss": 1.7624, "step": 32633500 }, { "epoch": 94.46, "learning_rate": 2.7859913393849575e-06, "loss": 1.7628, "step": 32634000 }, { "epoch": 94.46, "learning_rate": 2.7852676917376806e-06, "loss": 1.7667, "step": 32634500 }, { "epoch": 94.46, "learning_rate": 2.784544044090404e-06, "loss": 1.7745, "step": 32635000 }, { "epoch": 94.47, "learning_rate": 2.783820396443127e-06, "loss": 1.7742, "step": 32635500 }, { "epoch": 94.47, "learning_rate": 2.7830967487958503e-06, "loss": 1.7653, "step": 32636000 }, { "epoch": 94.47, "learning_rate": 2.7823731011485738e-06, "loss": 1.7511, "step": 32636500 }, { "epoch": 94.47, "learning_rate": 2.7816509007965915e-06, "loss": 1.7436, "step": 32637000 }, { "epoch": 94.47, "learning_rate": 2.7809272531493145e-06, "loss": 1.7744, "step": 32637500 }, { "epoch": 94.47, "learning_rate": 2.780203605502038e-06, "loss": 1.7522, "step": 32638000 }, { "epoch": 94.48, "learning_rate": 2.779479957854761e-06, "loss": 1.7669, "step": 32638500 }, { "epoch": 94.48, "learning_rate": 2.7787563102074846e-06, "loss": 1.7587, "step": 32639000 }, { "epoch": 94.48, "learning_rate": 2.7780326625602077e-06, "loss": 1.7515, "step": 32639500 }, { "epoch": 94.48, "learning_rate": 2.777309014912931e-06, "loss": 1.7668, "step": 32640000 }, { "epoch": 94.48, "learning_rate": 2.776585367265654e-06, "loss": 1.7493, "step": 32640500 }, { "epoch": 94.48, "learning_rate": 2.775863166913672e-06, "loss": 1.7829, "step": 32641000 }, { "epoch": 94.48, "learning_rate": 2.775139519266395e-06, "loss": 1.7753, "step": 32641500 }, { "epoch": 94.49, "learning_rate": 2.774415871619118e-06, "loss": 1.7521, "step": 32642000 }, { "epoch": 94.49, "learning_rate": 2.7736936712671362e-06, "loss": 1.7727, "step": 32642500 }, { "epoch": 94.49, "learning_rate": 2.7729700236198593e-06, "loss": 1.7745, "step": 32643000 }, { "epoch": 94.49, "learning_rate": 2.7722463759725824e-06, "loss": 1.751, "step": 32643500 }, { "epoch": 94.49, "learning_rate": 2.7715227283253055e-06, "loss": 1.7854, "step": 32644000 }, { "epoch": 94.49, "learning_rate": 2.770799080678029e-06, "loss": 1.7584, "step": 32644500 }, { "epoch": 94.49, "learning_rate": 2.7700754330307525e-06, "loss": 1.7672, "step": 32645000 }, { "epoch": 94.5, "learning_rate": 2.7693517853834756e-06, "loss": 1.7655, "step": 32645500 }, { "epoch": 94.5, "learning_rate": 2.7686295850314932e-06, "loss": 1.7679, "step": 32646000 }, { "epoch": 94.5, "learning_rate": 2.7679059373842167e-06, "loss": 1.77, "step": 32646500 }, { "epoch": 94.5, "learning_rate": 2.76718228973694e-06, "loss": 1.7707, "step": 32647000 }, { "epoch": 94.5, "learning_rate": 2.766458642089663e-06, "loss": 1.7407, "step": 32647500 }, { "epoch": 94.5, "learning_rate": 2.765734994442386e-06, "loss": 1.7781, "step": 32648000 }, { "epoch": 94.5, "learning_rate": 2.7650113467951095e-06, "loss": 1.7527, "step": 32648500 }, { "epoch": 94.51, "learning_rate": 2.764287699147833e-06, "loss": 1.7689, "step": 32649000 }, { "epoch": 94.51, "learning_rate": 2.7635654987958502e-06, "loss": 1.7891, "step": 32649500 }, { "epoch": 94.51, "learning_rate": 2.7628418511485737e-06, "loss": 1.7504, "step": 32650000 }, { "epoch": 94.51, "learning_rate": 2.7621182035012972e-06, "loss": 1.7472, "step": 32650500 }, { "epoch": 94.51, "learning_rate": 2.7613945558540203e-06, "loss": 1.7343, "step": 32651000 }, { "epoch": 94.51, "learning_rate": 2.7606709082067434e-06, "loss": 1.735, "step": 32651500 }, { "epoch": 94.51, "learning_rate": 2.7599472605594665e-06, "loss": 1.7296, "step": 32652000 }, { "epoch": 94.52, "learning_rate": 2.7592236129121896e-06, "loss": 1.7946, "step": 32652500 }, { "epoch": 94.52, "learning_rate": 2.758499965264913e-06, "loss": 1.7577, "step": 32653000 }, { "epoch": 94.52, "learning_rate": 2.757776317617636e-06, "loss": 1.7332, "step": 32653500 }, { "epoch": 94.52, "learning_rate": 2.7570526699703596e-06, "loss": 1.7646, "step": 32654000 }, { "epoch": 94.52, "learning_rate": 2.7563304696183773e-06, "loss": 1.747, "step": 32654500 }, { "epoch": 94.52, "learning_rate": 2.7556068219711004e-06, "loss": 1.7737, "step": 32655000 }, { "epoch": 94.52, "learning_rate": 2.754883174323824e-06, "loss": 1.7482, "step": 32655500 }, { "epoch": 94.53, "learning_rate": 2.7541609739718416e-06, "loss": 1.7476, "step": 32656000 }, { "epoch": 94.53, "learning_rate": 2.7534387736198593e-06, "loss": 1.7858, "step": 32656500 }, { "epoch": 94.53, "learning_rate": 2.7527151259725823e-06, "loss": 1.7685, "step": 32657000 }, { "epoch": 94.53, "learning_rate": 2.751991478325306e-06, "loss": 1.7721, "step": 32657500 }, { "epoch": 94.53, "learning_rate": 2.751267830678029e-06, "loss": 1.7536, "step": 32658000 }, { "epoch": 94.53, "learning_rate": 2.7505441830307524e-06, "loss": 1.7621, "step": 32658500 }, { "epoch": 94.53, "learning_rate": 2.7498205353834755e-06, "loss": 1.7449, "step": 32659000 }, { "epoch": 94.54, "learning_rate": 2.7490968877361986e-06, "loss": 1.7603, "step": 32659500 }, { "epoch": 94.54, "learning_rate": 2.748373240088922e-06, "loss": 1.7456, "step": 32660000 }, { "epoch": 94.54, "learning_rate": 2.747649592441645e-06, "loss": 1.754, "step": 32660500 }, { "epoch": 94.54, "learning_rate": 2.7469259447943682e-06, "loss": 1.7614, "step": 32661000 }, { "epoch": 94.54, "learning_rate": 2.7462022971470918e-06, "loss": 1.7622, "step": 32661500 }, { "epoch": 94.54, "learning_rate": 2.7454800967951094e-06, "loss": 1.7673, "step": 32662000 }, { "epoch": 94.54, "learning_rate": 2.7447564491478325e-06, "loss": 1.7564, "step": 32662500 }, { "epoch": 94.55, "learning_rate": 2.744032801500556e-06, "loss": 1.7518, "step": 32663000 }, { "epoch": 94.55, "learning_rate": 2.743309153853279e-06, "loss": 1.77, "step": 32663500 }, { "epoch": 94.55, "learning_rate": 2.7425855062060026e-06, "loss": 1.7661, "step": 32664000 }, { "epoch": 94.55, "learning_rate": 2.7418618585587257e-06, "loss": 1.7545, "step": 32664500 }, { "epoch": 94.55, "learning_rate": 2.7411382109114488e-06, "loss": 1.7611, "step": 32665000 }, { "epoch": 94.55, "learning_rate": 2.740414563264172e-06, "loss": 1.7793, "step": 32665500 }, { "epoch": 94.55, "learning_rate": 2.739690915616895e-06, "loss": 1.7668, "step": 32666000 }, { "epoch": 94.56, "learning_rate": 2.738967267969619e-06, "loss": 1.7624, "step": 32666500 }, { "epoch": 94.56, "learning_rate": 2.738245067617636e-06, "loss": 1.7573, "step": 32667000 }, { "epoch": 94.56, "learning_rate": 2.737521419970359e-06, "loss": 1.7645, "step": 32667500 }, { "epoch": 94.56, "learning_rate": 2.7367977723230827e-06, "loss": 1.7741, "step": 32668000 }, { "epoch": 94.56, "learning_rate": 2.736074124675806e-06, "loss": 1.7395, "step": 32668500 }, { "epoch": 94.56, "learning_rate": 2.7353519243238234e-06, "loss": 1.7735, "step": 32669000 }, { "epoch": 94.56, "learning_rate": 2.734628276676547e-06, "loss": 1.7453, "step": 32669500 }, { "epoch": 94.57, "learning_rate": 2.7339046290292704e-06, "loss": 1.7772, "step": 32670000 }, { "epoch": 94.57, "learning_rate": 2.7331809813819935e-06, "loss": 1.7531, "step": 32670500 }, { "epoch": 94.57, "learning_rate": 2.7324573337347166e-06, "loss": 1.7718, "step": 32671000 }, { "epoch": 94.57, "learning_rate": 2.7317336860874397e-06, "loss": 1.7509, "step": 32671500 }, { "epoch": 94.57, "learning_rate": 2.731010038440163e-06, "loss": 1.7733, "step": 32672000 }, { "epoch": 94.57, "learning_rate": 2.7302863907928867e-06, "loss": 1.7404, "step": 32672500 }, { "epoch": 94.57, "learning_rate": 2.729564190440904e-06, "loss": 1.7485, "step": 32673000 }, { "epoch": 94.58, "learning_rate": 2.7288405427936274e-06, "loss": 1.7728, "step": 32673500 }, { "epoch": 94.58, "learning_rate": 2.728116895146351e-06, "loss": 1.7432, "step": 32674000 }, { "epoch": 94.58, "learning_rate": 2.727393247499074e-06, "loss": 1.7699, "step": 32674500 }, { "epoch": 94.58, "learning_rate": 2.726669599851797e-06, "loss": 1.7426, "step": 32675000 }, { "epoch": 94.58, "learning_rate": 2.725947399499815e-06, "loss": 1.7796, "step": 32675500 }, { "epoch": 94.58, "learning_rate": 2.7252237518525383e-06, "loss": 1.7526, "step": 32676000 }, { "epoch": 94.59, "learning_rate": 2.7245001042052614e-06, "loss": 1.7618, "step": 32676500 }, { "epoch": 94.59, "learning_rate": 2.7237764565579844e-06, "loss": 1.7534, "step": 32677000 }, { "epoch": 94.59, "learning_rate": 2.723052808910708e-06, "loss": 1.7786, "step": 32677500 }, { "epoch": 94.59, "learning_rate": 2.7223306085587256e-06, "loss": 1.7753, "step": 32678000 }, { "epoch": 94.59, "learning_rate": 2.7216069609114487e-06, "loss": 1.7737, "step": 32678500 }, { "epoch": 94.59, "learning_rate": 2.720883313264172e-06, "loss": 1.7594, "step": 32679000 }, { "epoch": 94.59, "learning_rate": 2.7201596656168953e-06, "loss": 1.7608, "step": 32679500 }, { "epoch": 94.6, "learning_rate": 2.7194360179696184e-06, "loss": 1.7397, "step": 32680000 }, { "epoch": 94.6, "learning_rate": 2.718712370322342e-06, "loss": 1.7641, "step": 32680500 }, { "epoch": 94.6, "learning_rate": 2.7179901699703595e-06, "loss": 1.7681, "step": 32681000 }, { "epoch": 94.6, "learning_rate": 2.7172665223230826e-06, "loss": 1.7485, "step": 32681500 }, { "epoch": 94.6, "learning_rate": 2.716542874675806e-06, "loss": 1.7724, "step": 32682000 }, { "epoch": 94.6, "learning_rate": 2.7158192270285292e-06, "loss": 1.7557, "step": 32682500 }, { "epoch": 94.6, "learning_rate": 2.715097026676547e-06, "loss": 1.7276, "step": 32683000 }, { "epoch": 94.61, "learning_rate": 2.7143733790292704e-06, "loss": 1.8072, "step": 32683500 }, { "epoch": 94.61, "learning_rate": 2.713651178677288e-06, "loss": 1.771, "step": 32684000 }, { "epoch": 94.61, "learning_rate": 2.712927531030011e-06, "loss": 1.7591, "step": 32684500 }, { "epoch": 94.61, "learning_rate": 2.7122038833827346e-06, "loss": 1.7582, "step": 32685000 }, { "epoch": 94.61, "learning_rate": 2.7114802357354577e-06, "loss": 1.7593, "step": 32685500 }, { "epoch": 94.61, "learning_rate": 2.710756588088181e-06, "loss": 1.7712, "step": 32686000 }, { "epoch": 94.61, "learning_rate": 2.7100329404409043e-06, "loss": 1.7858, "step": 32686500 }, { "epoch": 94.62, "learning_rate": 2.7093092927936274e-06, "loss": 1.7753, "step": 32687000 }, { "epoch": 94.62, "learning_rate": 2.7085856451463505e-06, "loss": 1.7589, "step": 32687500 }, { "epoch": 94.62, "learning_rate": 2.7078619974990736e-06, "loss": 1.7549, "step": 32688000 }, { "epoch": 94.62, "learning_rate": 2.707138349851797e-06, "loss": 1.7665, "step": 32688500 }, { "epoch": 94.62, "learning_rate": 2.7064161494998147e-06, "loss": 1.7631, "step": 32689000 }, { "epoch": 94.62, "learning_rate": 2.705692501852538e-06, "loss": 1.7598, "step": 32689500 }, { "epoch": 94.62, "learning_rate": 2.704970301500556e-06, "loss": 1.804, "step": 32690000 }, { "epoch": 94.63, "learning_rate": 2.704246653853279e-06, "loss": 1.7762, "step": 32690500 }, { "epoch": 94.63, "learning_rate": 2.703523006206002e-06, "loss": 1.7726, "step": 32691000 }, { "epoch": 94.63, "learning_rate": 2.7027993585587256e-06, "loss": 1.7529, "step": 32691500 }, { "epoch": 94.63, "learning_rate": 2.702075710911449e-06, "loss": 1.7471, "step": 32692000 }, { "epoch": 94.63, "learning_rate": 2.701352063264172e-06, "loss": 1.7373, "step": 32692500 }, { "epoch": 94.63, "learning_rate": 2.7006284156168952e-06, "loss": 1.7702, "step": 32693000 }, { "epoch": 94.63, "learning_rate": 2.6999047679696183e-06, "loss": 1.7713, "step": 32693500 }, { "epoch": 94.64, "learning_rate": 2.6991825676176364e-06, "loss": 1.7564, "step": 32694000 }, { "epoch": 94.64, "learning_rate": 2.6984589199703595e-06, "loss": 1.7808, "step": 32694500 }, { "epoch": 94.64, "learning_rate": 2.6977352723230826e-06, "loss": 1.7834, "step": 32695000 }, { "epoch": 94.64, "learning_rate": 2.6970116246758057e-06, "loss": 1.767, "step": 32695500 }, { "epoch": 94.64, "learning_rate": 2.6962879770285296e-06, "loss": 1.7435, "step": 32696000 }, { "epoch": 94.64, "learning_rate": 2.695565776676547e-06, "loss": 1.7552, "step": 32696500 }, { "epoch": 94.64, "learning_rate": 2.694843576324565e-06, "loss": 1.7745, "step": 32697000 }, { "epoch": 94.65, "learning_rate": 2.694119928677288e-06, "loss": 1.7552, "step": 32697500 }, { "epoch": 94.65, "learning_rate": 2.693396281030011e-06, "loss": 1.7614, "step": 32698000 }, { "epoch": 94.65, "learning_rate": 2.692672633382734e-06, "loss": 1.7441, "step": 32698500 }, { "epoch": 94.65, "learning_rate": 2.6919489857354577e-06, "loss": 1.7601, "step": 32699000 }, { "epoch": 94.65, "learning_rate": 2.691225338088181e-06, "loss": 1.7568, "step": 32699500 }, { "epoch": 94.65, "learning_rate": 2.6905016904409043e-06, "loss": 1.7697, "step": 32700000 }, { "epoch": 94.65, "learning_rate": 2.6897780427936273e-06, "loss": 1.7566, "step": 32700500 }, { "epoch": 94.66, "learning_rate": 2.6890558424416454e-06, "loss": 1.7756, "step": 32701000 }, { "epoch": 94.66, "learning_rate": 2.6883321947943685e-06, "loss": 1.7474, "step": 32701500 }, { "epoch": 94.66, "learning_rate": 2.687609994442386e-06, "loss": 1.7468, "step": 32702000 }, { "epoch": 94.66, "learning_rate": 2.6868863467951097e-06, "loss": 1.7727, "step": 32702500 }, { "epoch": 94.66, "learning_rate": 2.6861626991478328e-06, "loss": 1.7663, "step": 32703000 }, { "epoch": 94.66, "learning_rate": 2.685439051500556e-06, "loss": 1.775, "step": 32703500 }, { "epoch": 94.66, "learning_rate": 2.684715403853279e-06, "loss": 1.7586, "step": 32704000 }, { "epoch": 94.67, "learning_rate": 2.6839917562060024e-06, "loss": 1.7284, "step": 32704500 }, { "epoch": 94.67, "learning_rate": 2.6832681085587255e-06, "loss": 1.7588, "step": 32705000 }, { "epoch": 94.67, "learning_rate": 2.682544460911449e-06, "loss": 1.7951, "step": 32705500 }, { "epoch": 94.67, "learning_rate": 2.681820813264172e-06, "loss": 1.7778, "step": 32706000 }, { "epoch": 94.67, "learning_rate": 2.6810986129121898e-06, "loss": 1.7865, "step": 32706500 }, { "epoch": 94.67, "learning_rate": 2.6803749652649133e-06, "loss": 1.7586, "step": 32707000 }, { "epoch": 94.67, "learning_rate": 2.6796513176176364e-06, "loss": 1.7515, "step": 32707500 }, { "epoch": 94.68, "learning_rate": 2.6789276699703594e-06, "loss": 1.7378, "step": 32708000 }, { "epoch": 94.68, "learning_rate": 2.6782054696183775e-06, "loss": 1.7593, "step": 32708500 }, { "epoch": 94.68, "learning_rate": 2.6774818219711006e-06, "loss": 1.7572, "step": 32709000 }, { "epoch": 94.68, "learning_rate": 2.6767581743238237e-06, "loss": 1.7727, "step": 32709500 }, { "epoch": 94.68, "learning_rate": 2.676034526676547e-06, "loss": 1.7808, "step": 32710000 }, { "epoch": 94.68, "learning_rate": 2.6753108790292703e-06, "loss": 1.7661, "step": 32710500 }, { "epoch": 94.68, "learning_rate": 2.6745872313819934e-06, "loss": 1.7492, "step": 32711000 }, { "epoch": 94.69, "learning_rate": 2.6738635837347164e-06, "loss": 1.759, "step": 32711500 }, { "epoch": 94.69, "learning_rate": 2.67313993608744e-06, "loss": 1.7644, "step": 32712000 }, { "epoch": 94.69, "learning_rate": 2.6724191830307522e-06, "loss": 1.7738, "step": 32712500 }, { "epoch": 94.69, "learning_rate": 2.6716955353834753e-06, "loss": 1.756, "step": 32713000 }, { "epoch": 94.69, "learning_rate": 2.670971887736199e-06, "loss": 1.7492, "step": 32713500 }, { "epoch": 94.69, "learning_rate": 2.670248240088922e-06, "loss": 1.7549, "step": 32714000 }, { "epoch": 94.7, "learning_rate": 2.669524592441645e-06, "loss": 1.7668, "step": 32714500 }, { "epoch": 94.7, "learning_rate": 2.6688009447943685e-06, "loss": 1.7492, "step": 32715000 }, { "epoch": 94.7, "learning_rate": 2.6680772971470915e-06, "loss": 1.7723, "step": 32715500 }, { "epoch": 94.7, "learning_rate": 2.667353649499815e-06, "loss": 1.7545, "step": 32716000 }, { "epoch": 94.7, "learning_rate": 2.666630001852538e-06, "loss": 1.7702, "step": 32716500 }, { "epoch": 94.7, "learning_rate": 2.6659063542052612e-06, "loss": 1.7938, "step": 32717000 }, { "epoch": 94.7, "learning_rate": 2.6651827065579843e-06, "loss": 1.7678, "step": 32717500 }, { "epoch": 94.71, "learning_rate": 2.6644605062060024e-06, "loss": 1.7468, "step": 32718000 }, { "epoch": 94.71, "learning_rate": 2.6637368585587255e-06, "loss": 1.7857, "step": 32718500 }, { "epoch": 94.71, "learning_rate": 2.6630132109114486e-06, "loss": 1.7732, "step": 32719000 }, { "epoch": 94.71, "learning_rate": 2.662289563264172e-06, "loss": 1.7586, "step": 32719500 }, { "epoch": 94.71, "learning_rate": 2.6615659156168956e-06, "loss": 1.7685, "step": 32720000 }, { "epoch": 94.71, "learning_rate": 2.660843715264913e-06, "loss": 1.767, "step": 32720500 }, { "epoch": 94.71, "learning_rate": 2.6601200676176363e-06, "loss": 1.7537, "step": 32721000 }, { "epoch": 94.72, "learning_rate": 2.65939641997036e-06, "loss": 1.756, "step": 32721500 }, { "epoch": 94.72, "learning_rate": 2.658672772323083e-06, "loss": 1.7681, "step": 32722000 }, { "epoch": 94.72, "learning_rate": 2.6579505719711006e-06, "loss": 1.7467, "step": 32722500 }, { "epoch": 94.72, "learning_rate": 2.657226924323824e-06, "loss": 1.7721, "step": 32723000 }, { "epoch": 94.72, "learning_rate": 2.656503276676547e-06, "loss": 1.7571, "step": 32723500 }, { "epoch": 94.72, "learning_rate": 2.6557796290292702e-06, "loss": 1.7346, "step": 32724000 }, { "epoch": 94.72, "learning_rate": 2.6550559813819933e-06, "loss": 1.7507, "step": 32724500 }, { "epoch": 94.73, "learning_rate": 2.6543337810300114e-06, "loss": 1.7376, "step": 32725000 }, { "epoch": 94.73, "learning_rate": 2.6536101333827345e-06, "loss": 1.7688, "step": 32725500 }, { "epoch": 94.73, "learning_rate": 2.6528864857354576e-06, "loss": 1.7679, "step": 32726000 }, { "epoch": 94.73, "learning_rate": 2.6521628380881807e-06, "loss": 1.7622, "step": 32726500 }, { "epoch": 94.73, "learning_rate": 2.651439190440904e-06, "loss": 1.7415, "step": 32727000 }, { "epoch": 94.73, "learning_rate": 2.650716990088922e-06, "loss": 1.766, "step": 32727500 }, { "epoch": 94.73, "learning_rate": 2.64999478973694e-06, "loss": 1.7717, "step": 32728000 }, { "epoch": 94.74, "learning_rate": 2.649271142089663e-06, "loss": 1.7658, "step": 32728500 }, { "epoch": 94.74, "learning_rate": 2.648547494442386e-06, "loss": 1.7826, "step": 32729000 }, { "epoch": 94.74, "learning_rate": 2.647823846795109e-06, "loss": 1.7671, "step": 32729500 }, { "epoch": 94.74, "learning_rate": 2.6471001991478327e-06, "loss": 1.7703, "step": 32730000 }, { "epoch": 94.74, "learning_rate": 2.646376551500556e-06, "loss": 1.7707, "step": 32730500 }, { "epoch": 94.74, "learning_rate": 2.6456529038532793e-06, "loss": 1.764, "step": 32731000 }, { "epoch": 94.74, "learning_rate": 2.6449292562060023e-06, "loss": 1.7571, "step": 32731500 }, { "epoch": 94.75, "learning_rate": 2.6442056085587254e-06, "loss": 1.7568, "step": 32732000 }, { "epoch": 94.75, "learning_rate": 2.6434834082067435e-06, "loss": 1.7692, "step": 32732500 }, { "epoch": 94.75, "learning_rate": 2.6427597605594666e-06, "loss": 1.7634, "step": 32733000 }, { "epoch": 94.75, "learning_rate": 2.6420361129121897e-06, "loss": 1.7468, "step": 32733500 }, { "epoch": 94.75, "learning_rate": 2.641312465264913e-06, "loss": 1.771, "step": 32734000 }, { "epoch": 94.75, "learning_rate": 2.640590264912931e-06, "loss": 1.7745, "step": 32734500 }, { "epoch": 94.75, "learning_rate": 2.639866617265654e-06, "loss": 1.7716, "step": 32735000 }, { "epoch": 94.76, "learning_rate": 2.6391429696183774e-06, "loss": 1.7728, "step": 32735500 }, { "epoch": 94.76, "learning_rate": 2.6384193219711005e-06, "loss": 1.7485, "step": 32736000 }, { "epoch": 94.76, "learning_rate": 2.6376956743238236e-06, "loss": 1.7709, "step": 32736500 }, { "epoch": 94.76, "learning_rate": 2.6369734739718417e-06, "loss": 1.7287, "step": 32737000 }, { "epoch": 94.76, "learning_rate": 2.6362498263245648e-06, "loss": 1.7491, "step": 32737500 }, { "epoch": 94.76, "learning_rate": 2.635526178677288e-06, "loss": 1.7401, "step": 32738000 }, { "epoch": 94.76, "learning_rate": 2.6348025310300114e-06, "loss": 1.7521, "step": 32738500 }, { "epoch": 94.77, "learning_rate": 2.634080330678029e-06, "loss": 1.7649, "step": 32739000 }, { "epoch": 94.77, "learning_rate": 2.633356683030752e-06, "loss": 1.7509, "step": 32739500 }, { "epoch": 94.77, "learning_rate": 2.6326330353834756e-06, "loss": 1.7696, "step": 32740000 }, { "epoch": 94.77, "learning_rate": 2.6319093877361987e-06, "loss": 1.7556, "step": 32740500 }, { "epoch": 94.77, "learning_rate": 2.6311857400889218e-06, "loss": 1.7657, "step": 32741000 }, { "epoch": 94.77, "learning_rate": 2.6304620924416453e-06, "loss": 1.7778, "step": 32741500 }, { "epoch": 94.77, "learning_rate": 2.6297384447943684e-06, "loss": 1.7498, "step": 32742000 }, { "epoch": 94.78, "learning_rate": 2.6290147971470914e-06, "loss": 1.7619, "step": 32742500 }, { "epoch": 94.78, "learning_rate": 2.628291149499815e-06, "loss": 1.7701, "step": 32743000 }, { "epoch": 94.78, "learning_rate": 2.6275689491478326e-06, "loss": 1.7662, "step": 32743500 }, { "epoch": 94.78, "learning_rate": 2.6268453015005557e-06, "loss": 1.7675, "step": 32744000 }, { "epoch": 94.78, "learning_rate": 2.626123101148574e-06, "loss": 1.7485, "step": 32744500 }, { "epoch": 94.78, "learning_rate": 2.625399453501297e-06, "loss": 1.7681, "step": 32745000 }, { "epoch": 94.78, "learning_rate": 2.62467580585402e-06, "loss": 1.7292, "step": 32745500 }, { "epoch": 94.79, "learning_rate": 2.623952158206743e-06, "loss": 1.7261, "step": 32746000 }, { "epoch": 94.79, "learning_rate": 2.6232285105594665e-06, "loss": 1.7703, "step": 32746500 }, { "epoch": 94.79, "learning_rate": 2.62250486291219e-06, "loss": 1.7459, "step": 32747000 }, { "epoch": 94.79, "learning_rate": 2.6217826625602073e-06, "loss": 1.8035, "step": 32747500 }, { "epoch": 94.79, "learning_rate": 2.621059014912931e-06, "loss": 1.762, "step": 32748000 }, { "epoch": 94.79, "learning_rate": 2.6203353672656543e-06, "loss": 1.7495, "step": 32748500 }, { "epoch": 94.79, "learning_rate": 2.6196117196183774e-06, "loss": 1.7817, "step": 32749000 }, { "epoch": 94.8, "learning_rate": 2.6188880719711005e-06, "loss": 1.7591, "step": 32749500 }, { "epoch": 94.8, "learning_rate": 2.6181644243238235e-06, "loss": 1.7705, "step": 32750000 }, { "epoch": 94.8, "learning_rate": 2.617440776676547e-06, "loss": 1.7375, "step": 32750500 }, { "epoch": 94.8, "learning_rate": 2.6167171290292706e-06, "loss": 1.7832, "step": 32751000 }, { "epoch": 94.8, "learning_rate": 2.6159934813819936e-06, "loss": 1.7644, "step": 32751500 }, { "epoch": 94.8, "learning_rate": 2.6152712810300113e-06, "loss": 1.7594, "step": 32752000 }, { "epoch": 94.81, "learning_rate": 2.614547633382735e-06, "loss": 1.7759, "step": 32752500 }, { "epoch": 94.81, "learning_rate": 2.613823985735458e-06, "loss": 1.808, "step": 32753000 }, { "epoch": 94.81, "learning_rate": 2.613100338088181e-06, "loss": 1.7669, "step": 32753500 }, { "epoch": 94.81, "learning_rate": 2.612376690440904e-06, "loss": 1.7438, "step": 32754000 }, { "epoch": 94.81, "learning_rate": 2.611654490088922e-06, "loss": 1.7496, "step": 32754500 }, { "epoch": 94.81, "learning_rate": 2.6109308424416452e-06, "loss": 1.7713, "step": 32755000 }, { "epoch": 94.81, "learning_rate": 2.6102071947943683e-06, "loss": 1.7542, "step": 32755500 }, { "epoch": 94.82, "learning_rate": 2.6094835471470914e-06, "loss": 1.7735, "step": 32756000 }, { "epoch": 94.82, "learning_rate": 2.6087613467951095e-06, "loss": 1.7655, "step": 32756500 }, { "epoch": 94.82, "learning_rate": 2.6080376991478326e-06, "loss": 1.777, "step": 32757000 }, { "epoch": 94.82, "learning_rate": 2.6073140515005557e-06, "loss": 1.7745, "step": 32757500 }, { "epoch": 94.82, "learning_rate": 2.606590403853279e-06, "loss": 1.7681, "step": 32758000 }, { "epoch": 94.82, "learning_rate": 2.605868203501297e-06, "loss": 1.7467, "step": 32758500 }, { "epoch": 94.82, "learning_rate": 2.60514455585402e-06, "loss": 1.741, "step": 32759000 }, { "epoch": 94.83, "learning_rate": 2.6044209082067434e-06, "loss": 1.7907, "step": 32759500 }, { "epoch": 94.83, "learning_rate": 2.6036972605594665e-06, "loss": 1.7449, "step": 32760000 }, { "epoch": 94.83, "learning_rate": 2.60297361291219e-06, "loss": 1.7566, "step": 32760500 }, { "epoch": 94.83, "learning_rate": 2.602249965264913e-06, "loss": 1.7473, "step": 32761000 }, { "epoch": 94.83, "learning_rate": 2.601526317617636e-06, "loss": 1.777, "step": 32761500 }, { "epoch": 94.83, "learning_rate": 2.6008041172656543e-06, "loss": 1.7532, "step": 32762000 }, { "epoch": 94.83, "learning_rate": 2.6000804696183773e-06, "loss": 1.752, "step": 32762500 }, { "epoch": 94.84, "learning_rate": 2.5993568219711004e-06, "loss": 1.7494, "step": 32763000 }, { "epoch": 94.84, "learning_rate": 2.598633174323824e-06, "loss": 1.7772, "step": 32763500 }, { "epoch": 94.84, "learning_rate": 2.5979109739718416e-06, "loss": 1.7649, "step": 32764000 }, { "epoch": 94.84, "learning_rate": 2.5971873263245647e-06, "loss": 1.759, "step": 32764500 }, { "epoch": 94.84, "learning_rate": 2.596463678677288e-06, "loss": 1.7602, "step": 32765000 }, { "epoch": 94.84, "learning_rate": 2.5957400310300113e-06, "loss": 1.7645, "step": 32765500 }, { "epoch": 94.84, "learning_rate": 2.5950163833827343e-06, "loss": 1.7684, "step": 32766000 }, { "epoch": 94.85, "learning_rate": 2.5942941830307524e-06, "loss": 1.773, "step": 32766500 }, { "epoch": 94.85, "learning_rate": 2.5935705353834755e-06, "loss": 1.7625, "step": 32767000 }, { "epoch": 94.85, "learning_rate": 2.5928468877361986e-06, "loss": 1.76, "step": 32767500 }, { "epoch": 94.85, "learning_rate": 2.5921232400889217e-06, "loss": 1.7787, "step": 32768000 }, { "epoch": 94.85, "learning_rate": 2.5914010397369398e-06, "loss": 1.7754, "step": 32768500 }, { "epoch": 94.85, "learning_rate": 2.590677392089663e-06, "loss": 1.7675, "step": 32769000 }, { "epoch": 94.85, "learning_rate": 2.589955191737681e-06, "loss": 1.7449, "step": 32769500 }, { "epoch": 94.86, "learning_rate": 2.589231544090404e-06, "loss": 1.7405, "step": 32770000 }, { "epoch": 94.86, "learning_rate": 2.588507896443127e-06, "loss": 1.7382, "step": 32770500 }, { "epoch": 94.86, "learning_rate": 2.58778424879585e-06, "loss": 1.7648, "step": 32771000 }, { "epoch": 94.86, "learning_rate": 2.5870606011485737e-06, "loss": 1.7517, "step": 32771500 }, { "epoch": 94.86, "learning_rate": 2.5863369535012968e-06, "loss": 1.7719, "step": 32772000 }, { "epoch": 94.86, "learning_rate": 2.5856133058540203e-06, "loss": 1.7533, "step": 32772500 }, { "epoch": 94.86, "learning_rate": 2.5848896582067434e-06, "loss": 1.7828, "step": 32773000 }, { "epoch": 94.87, "learning_rate": 2.584167457854761e-06, "loss": 1.7709, "step": 32773500 }, { "epoch": 94.87, "learning_rate": 2.5834438102074845e-06, "loss": 1.7601, "step": 32774000 }, { "epoch": 94.87, "learning_rate": 2.5827216098555022e-06, "loss": 1.7443, "step": 32774500 }, { "epoch": 94.87, "learning_rate": 2.58199940950352e-06, "loss": 1.7605, "step": 32775000 }, { "epoch": 94.87, "learning_rate": 2.581275761856243e-06, "loss": 1.7781, "step": 32775500 }, { "epoch": 94.87, "learning_rate": 2.5805521142089665e-06, "loss": 1.7718, "step": 32776000 }, { "epoch": 94.87, "learning_rate": 2.5798284665616896e-06, "loss": 1.7452, "step": 32776500 }, { "epoch": 94.88, "learning_rate": 2.579104818914413e-06, "loss": 1.7518, "step": 32777000 }, { "epoch": 94.88, "learning_rate": 2.578381171267136e-06, "loss": 1.7614, "step": 32777500 }, { "epoch": 94.88, "learning_rate": 2.5776575236198592e-06, "loss": 1.7726, "step": 32778000 }, { "epoch": 94.88, "learning_rate": 2.5769338759725823e-06, "loss": 1.7672, "step": 32778500 }, { "epoch": 94.88, "learning_rate": 2.576210228325306e-06, "loss": 1.7537, "step": 32779000 }, { "epoch": 94.88, "learning_rate": 2.5754865806780293e-06, "loss": 1.75, "step": 32779500 }, { "epoch": 94.88, "learning_rate": 2.5747629330307524e-06, "loss": 1.76, "step": 32780000 }, { "epoch": 94.89, "learning_rate": 2.5740392853834755e-06, "loss": 1.7528, "step": 32780500 }, { "epoch": 94.89, "learning_rate": 2.5733170850314936e-06, "loss": 1.7748, "step": 32781000 }, { "epoch": 94.89, "learning_rate": 2.572594884679511e-06, "loss": 1.7329, "step": 32781500 }, { "epoch": 94.89, "learning_rate": 2.5718712370322343e-06, "loss": 1.748, "step": 32782000 }, { "epoch": 94.89, "learning_rate": 2.571147589384958e-06, "loss": 1.7614, "step": 32782500 }, { "epoch": 94.89, "learning_rate": 2.570423941737681e-06, "loss": 1.7688, "step": 32783000 }, { "epoch": 94.89, "learning_rate": 2.569700294090404e-06, "loss": 1.7375, "step": 32783500 }, { "epoch": 94.9, "learning_rate": 2.568976646443127e-06, "loss": 1.7865, "step": 32784000 }, { "epoch": 94.9, "learning_rate": 2.56825299879585e-06, "loss": 1.7774, "step": 32784500 }, { "epoch": 94.9, "learning_rate": 2.5675293511485736e-06, "loss": 1.7665, "step": 32785000 }, { "epoch": 94.9, "learning_rate": 2.566805703501297e-06, "loss": 1.7551, "step": 32785500 }, { "epoch": 94.9, "learning_rate": 2.5660835031493144e-06, "loss": 1.7716, "step": 32786000 }, { "epoch": 94.9, "learning_rate": 2.565359855502038e-06, "loss": 1.7301, "step": 32786500 }, { "epoch": 94.9, "learning_rate": 2.5646362078547614e-06, "loss": 1.7574, "step": 32787000 }, { "epoch": 94.91, "learning_rate": 2.5639125602074845e-06, "loss": 1.7521, "step": 32787500 }, { "epoch": 94.91, "learning_rate": 2.5631889125602076e-06, "loss": 1.7758, "step": 32788000 }, { "epoch": 94.91, "learning_rate": 2.5624652649129306e-06, "loss": 1.76, "step": 32788500 }, { "epoch": 94.91, "learning_rate": 2.5617430645609487e-06, "loss": 1.7326, "step": 32789000 }, { "epoch": 94.91, "learning_rate": 2.561019416913672e-06, "loss": 1.7463, "step": 32789500 }, { "epoch": 94.91, "learning_rate": 2.560295769266395e-06, "loss": 1.7841, "step": 32790000 }, { "epoch": 94.92, "learning_rate": 2.5595721216191184e-06, "loss": 1.7577, "step": 32790500 }, { "epoch": 94.92, "learning_rate": 2.5588484739718415e-06, "loss": 1.7393, "step": 32791000 }, { "epoch": 94.92, "learning_rate": 2.5581248263245646e-06, "loss": 1.7711, "step": 32791500 }, { "epoch": 94.92, "learning_rate": 2.5574040732678773e-06, "loss": 1.7703, "step": 32792000 }, { "epoch": 94.92, "learning_rate": 2.5566804256206003e-06, "loss": 1.7715, "step": 32792500 }, { "epoch": 94.92, "learning_rate": 2.5559567779733234e-06, "loss": 1.74, "step": 32793000 }, { "epoch": 94.92, "learning_rate": 2.555233130326047e-06, "loss": 1.7456, "step": 32793500 }, { "epoch": 94.93, "learning_rate": 2.55450948267877e-06, "loss": 1.7498, "step": 32794000 }, { "epoch": 94.93, "learning_rate": 2.553785835031493e-06, "loss": 1.7424, "step": 32794500 }, { "epoch": 94.93, "learning_rate": 2.5530621873842166e-06, "loss": 1.7566, "step": 32795000 }, { "epoch": 94.93, "learning_rate": 2.5523399870322343e-06, "loss": 1.7498, "step": 32795500 }, { "epoch": 94.93, "learning_rate": 2.5516163393849573e-06, "loss": 1.7781, "step": 32796000 }, { "epoch": 94.93, "learning_rate": 2.550892691737681e-06, "loss": 1.7729, "step": 32796500 }, { "epoch": 94.93, "learning_rate": 2.550169044090404e-06, "loss": 1.7251, "step": 32797000 }, { "epoch": 94.94, "learning_rate": 2.5494453964431274e-06, "loss": 1.7717, "step": 32797500 }, { "epoch": 94.94, "learning_rate": 2.5487217487958505e-06, "loss": 1.7417, "step": 32798000 }, { "epoch": 94.94, "learning_rate": 2.5479981011485736e-06, "loss": 1.7446, "step": 32798500 }, { "epoch": 94.94, "learning_rate": 2.5472759007965917e-06, "loss": 1.7798, "step": 32799000 }, { "epoch": 94.94, "learning_rate": 2.5465522531493148e-06, "loss": 1.7399, "step": 32799500 }, { "epoch": 94.94, "learning_rate": 2.545828605502038e-06, "loss": 1.7794, "step": 32800000 }, { "epoch": 94.94, "learning_rate": 2.545104957854761e-06, "loss": 1.7583, "step": 32800500 }, { "epoch": 94.95, "learning_rate": 2.544382757502779e-06, "loss": 1.7682, "step": 32801000 }, { "epoch": 94.95, "learning_rate": 2.543659109855502e-06, "loss": 1.7386, "step": 32801500 }, { "epoch": 94.95, "learning_rate": 2.542935462208225e-06, "loss": 1.7736, "step": 32802000 }, { "epoch": 94.95, "learning_rate": 2.5422118145609483e-06, "loss": 1.7809, "step": 32802500 }, { "epoch": 94.95, "learning_rate": 2.5414881669136718e-06, "loss": 1.7567, "step": 32803000 }, { "epoch": 94.95, "learning_rate": 2.5407659665616894e-06, "loss": 1.7593, "step": 32803500 }, { "epoch": 94.95, "learning_rate": 2.5400423189144125e-06, "loss": 1.7423, "step": 32804000 }, { "epoch": 94.96, "learning_rate": 2.539318671267136e-06, "loss": 1.7631, "step": 32804500 }, { "epoch": 94.96, "learning_rate": 2.5385950236198595e-06, "loss": 1.7416, "step": 32805000 }, { "epoch": 94.96, "learning_rate": 2.5378713759725826e-06, "loss": 1.779, "step": 32805500 }, { "epoch": 94.96, "learning_rate": 2.5371477283253057e-06, "loss": 1.7598, "step": 32806000 }, { "epoch": 94.96, "learning_rate": 2.5364240806780288e-06, "loss": 1.7695, "step": 32806500 }, { "epoch": 94.96, "learning_rate": 2.5357004330307523e-06, "loss": 1.7579, "step": 32807000 }, { "epoch": 94.96, "learning_rate": 2.5349767853834758e-06, "loss": 1.7585, "step": 32807500 }, { "epoch": 94.97, "learning_rate": 2.534253137736199e-06, "loss": 1.7398, "step": 32808000 }, { "epoch": 94.97, "learning_rate": 2.5335309373842165e-06, "loss": 1.7616, "step": 32808500 }, { "epoch": 94.97, "learning_rate": 2.53280728973694e-06, "loss": 1.7614, "step": 32809000 }, { "epoch": 94.97, "learning_rate": 2.532083642089663e-06, "loss": 1.7701, "step": 32809500 }, { "epoch": 94.97, "learning_rate": 2.531359994442386e-06, "loss": 1.7817, "step": 32810000 }, { "epoch": 94.97, "learning_rate": 2.5306377940904043e-06, "loss": 1.7317, "step": 32810500 }, { "epoch": 94.97, "learning_rate": 2.5299141464431274e-06, "loss": 1.777, "step": 32811000 }, { "epoch": 94.98, "learning_rate": 2.5291904987958505e-06, "loss": 1.7593, "step": 32811500 }, { "epoch": 94.98, "learning_rate": 2.5284668511485735e-06, "loss": 1.7379, "step": 32812000 }, { "epoch": 94.98, "learning_rate": 2.527743203501297e-06, "loss": 1.7259, "step": 32812500 }, { "epoch": 94.98, "learning_rate": 2.5270210031493147e-06, "loss": 1.7607, "step": 32813000 }, { "epoch": 94.98, "learning_rate": 2.526297355502038e-06, "loss": 1.7524, "step": 32813500 }, { "epoch": 94.98, "learning_rate": 2.525573707854761e-06, "loss": 1.761, "step": 32814000 }, { "epoch": 94.98, "learning_rate": 2.524851507502779e-06, "loss": 1.765, "step": 32814500 }, { "epoch": 94.99, "learning_rate": 2.524127859855502e-06, "loss": 1.7487, "step": 32815000 }, { "epoch": 94.99, "learning_rate": 2.523404212208225e-06, "loss": 1.749, "step": 32815500 }, { "epoch": 94.99, "learning_rate": 2.5226805645609486e-06, "loss": 1.7631, "step": 32816000 }, { "epoch": 94.99, "learning_rate": 2.5219569169136717e-06, "loss": 1.7304, "step": 32816500 }, { "epoch": 94.99, "learning_rate": 2.5212332692663952e-06, "loss": 1.7779, "step": 32817000 }, { "epoch": 94.99, "learning_rate": 2.520511068914413e-06, "loss": 1.7864, "step": 32817500 }, { "epoch": 94.99, "learning_rate": 2.519787421267136e-06, "loss": 1.7814, "step": 32818000 }, { "epoch": 95.0, "learning_rate": 2.5190637736198595e-06, "loss": 1.7744, "step": 32818500 }, { "epoch": 95.0, "learning_rate": 2.5183401259725826e-06, "loss": 1.7663, "step": 32819000 }, { "epoch": 95.0, "learning_rate": 2.5176164783253056e-06, "loss": 1.7554, "step": 32819500 }, { "epoch": 95.0, "eval_accuracy": 0.6925330877815309, "eval_accuracy_mlm": 0.662237515372476, "eval_accuracy_nsp": 0.8551695058961561, "eval_loss": 2.2054529190063477, "eval_runtime": 331.5069, "eval_samples_per_second": 1316.371, "eval_steps_per_second": 54.85, "step": 32819840 }, { "epoch": 95.0, "learning_rate": 2.516892830678029e-06, "loss": 1.7643, "step": 32820000 }, { "epoch": 95.0, "learning_rate": 2.5161691830307522e-06, "loss": 1.731, "step": 32820500 }, { "epoch": 95.0, "learning_rate": 2.5154455353834753e-06, "loss": 1.7529, "step": 32821000 }, { "epoch": 95.0, "learning_rate": 2.514721887736199e-06, "loss": 1.7481, "step": 32821500 }, { "epoch": 95.01, "learning_rate": 2.5139996873842165e-06, "loss": 1.7463, "step": 32822000 }, { "epoch": 95.01, "learning_rate": 2.5132760397369396e-06, "loss": 1.7767, "step": 32822500 }, { "epoch": 95.01, "learning_rate": 2.512552392089663e-06, "loss": 1.7618, "step": 32823000 }, { "epoch": 95.01, "learning_rate": 2.511828744442386e-06, "loss": 1.7303, "step": 32823500 }, { "epoch": 95.01, "learning_rate": 2.511106544090404e-06, "loss": 1.7836, "step": 32824000 }, { "epoch": 95.01, "learning_rate": 2.510382896443127e-06, "loss": 1.7603, "step": 32824500 }, { "epoch": 95.01, "learning_rate": 2.5096592487958504e-06, "loss": 1.7617, "step": 32825000 }, { "epoch": 95.02, "learning_rate": 2.508935601148574e-06, "loss": 1.7431, "step": 32825500 }, { "epoch": 95.02, "learning_rate": 2.508211953501297e-06, "loss": 1.7407, "step": 32826000 }, { "epoch": 95.02, "learning_rate": 2.50748830585402e-06, "loss": 1.7451, "step": 32826500 }, { "epoch": 95.02, "learning_rate": 2.506764658206743e-06, "loss": 1.7535, "step": 32827000 }, { "epoch": 95.02, "learning_rate": 2.5060410105594662e-06, "loss": 1.7285, "step": 32827500 }, { "epoch": 95.02, "learning_rate": 2.5053188102074843e-06, "loss": 1.7542, "step": 32828000 }, { "epoch": 95.03, "learning_rate": 2.5045951625602074e-06, "loss": 1.7442, "step": 32828500 }, { "epoch": 95.03, "learning_rate": 2.5038715149129305e-06, "loss": 1.7724, "step": 32829000 }, { "epoch": 95.03, "learning_rate": 2.5031478672656544e-06, "loss": 1.7792, "step": 32829500 }, { "epoch": 95.03, "learning_rate": 2.5024242196183775e-06, "loss": 1.745, "step": 32830000 }, { "epoch": 95.03, "learning_rate": 2.5017005719711006e-06, "loss": 1.787, "step": 32830500 }, { "epoch": 95.03, "learning_rate": 2.5009783716191183e-06, "loss": 1.7693, "step": 32831000 }, { "epoch": 95.03, "learning_rate": 2.5002547239718418e-06, "loss": 1.7725, "step": 32831500 }, { "epoch": 95.04, "learning_rate": 2.499531076324565e-06, "loss": 1.7098, "step": 32832000 }, { "epoch": 95.04, "learning_rate": 2.498807428677288e-06, "loss": 1.7607, "step": 32832500 }, { "epoch": 95.04, "learning_rate": 2.498083781030011e-06, "loss": 1.7447, "step": 32833000 }, { "epoch": 95.04, "learning_rate": 2.497361580678029e-06, "loss": 1.7515, "step": 32833500 }, { "epoch": 95.04, "learning_rate": 2.496637933030752e-06, "loss": 1.7127, "step": 32834000 }, { "epoch": 95.04, "learning_rate": 2.4959142853834753e-06, "loss": 1.7307, "step": 32834500 }, { "epoch": 95.04, "learning_rate": 2.4951920850314934e-06, "loss": 1.753, "step": 32835000 }, { "epoch": 95.05, "learning_rate": 2.4944684373842164e-06, "loss": 1.7284, "step": 32835500 }, { "epoch": 95.05, "learning_rate": 2.4937447897369395e-06, "loss": 1.7638, "step": 32836000 }, { "epoch": 95.05, "learning_rate": 2.493021142089663e-06, "loss": 1.7582, "step": 32836500 }, { "epoch": 95.05, "learning_rate": 2.492297494442386e-06, "loss": 1.7626, "step": 32837000 }, { "epoch": 95.05, "learning_rate": 2.491576741385699e-06, "loss": 1.757, "step": 32837500 }, { "epoch": 95.05, "learning_rate": 2.490853093738422e-06, "loss": 1.745, "step": 32838000 }, { "epoch": 95.05, "learning_rate": 2.490129446091145e-06, "loss": 1.7784, "step": 32838500 }, { "epoch": 95.06, "learning_rate": 2.489405798443868e-06, "loss": 1.7568, "step": 32839000 }, { "epoch": 95.06, "learning_rate": 2.4886821507965915e-06, "loss": 1.7528, "step": 32839500 }, { "epoch": 95.06, "learning_rate": 2.4879585031493146e-06, "loss": 1.7315, "step": 32840000 }, { "epoch": 95.06, "learning_rate": 2.487234855502038e-06, "loss": 1.7424, "step": 32840500 }, { "epoch": 95.06, "learning_rate": 2.486511207854761e-06, "loss": 1.7517, "step": 32841000 }, { "epoch": 95.06, "learning_rate": 2.4857875602074843e-06, "loss": 1.7608, "step": 32841500 }, { "epoch": 95.06, "learning_rate": 2.4850639125602078e-06, "loss": 1.7583, "step": 32842000 }, { "epoch": 95.07, "learning_rate": 2.4843417122082255e-06, "loss": 1.7441, "step": 32842500 }, { "epoch": 95.07, "learning_rate": 2.4836180645609485e-06, "loss": 1.7353, "step": 32843000 }, { "epoch": 95.07, "learning_rate": 2.482894416913672e-06, "loss": 1.7476, "step": 32843500 }, { "epoch": 95.07, "learning_rate": 2.482170769266395e-06, "loss": 1.7583, "step": 32844000 }, { "epoch": 95.07, "learning_rate": 2.481447121619118e-06, "loss": 1.7559, "step": 32844500 }, { "epoch": 95.07, "learning_rate": 2.4807234739718417e-06, "loss": 1.7593, "step": 32845000 }, { "epoch": 95.07, "learning_rate": 2.479999826324565e-06, "loss": 1.7496, "step": 32845500 }, { "epoch": 95.08, "learning_rate": 2.479276178677288e-06, "loss": 1.7494, "step": 32846000 }, { "epoch": 95.08, "learning_rate": 2.4785525310300114e-06, "loss": 1.7825, "step": 32846500 }, { "epoch": 95.08, "learning_rate": 2.477830330678029e-06, "loss": 1.7477, "step": 32847000 }, { "epoch": 95.08, "learning_rate": 2.477106683030752e-06, "loss": 1.7124, "step": 32847500 }, { "epoch": 95.08, "learning_rate": 2.4763830353834756e-06, "loss": 1.7429, "step": 32848000 }, { "epoch": 95.08, "learning_rate": 2.4756593877361987e-06, "loss": 1.7707, "step": 32848500 }, { "epoch": 95.08, "learning_rate": 2.474935740088922e-06, "loss": 1.7315, "step": 32849000 }, { "epoch": 95.09, "learning_rate": 2.47421353973694e-06, "loss": 1.7608, "step": 32849500 }, { "epoch": 95.09, "learning_rate": 2.473489892089663e-06, "loss": 1.7385, "step": 32850000 }, { "epoch": 95.09, "learning_rate": 2.472766244442386e-06, "loss": 1.7616, "step": 32850500 }, { "epoch": 95.09, "learning_rate": 2.472042596795109e-06, "loss": 1.7439, "step": 32851000 }, { "epoch": 95.09, "learning_rate": 2.4713203964431272e-06, "loss": 1.788, "step": 32851500 }, { "epoch": 95.09, "learning_rate": 2.4705967487958503e-06, "loss": 1.7665, "step": 32852000 }, { "epoch": 95.09, "learning_rate": 2.4698731011485734e-06, "loss": 1.7531, "step": 32852500 }, { "epoch": 95.1, "learning_rate": 2.469149453501297e-06, "loss": 1.7845, "step": 32853000 }, { "epoch": 95.1, "learning_rate": 2.4684258058540204e-06, "loss": 1.7663, "step": 32853500 }, { "epoch": 95.1, "learning_rate": 2.4677021582067435e-06, "loss": 1.7724, "step": 32854000 }, { "epoch": 95.1, "learning_rate": 2.4669785105594666e-06, "loss": 1.7569, "step": 32854500 }, { "epoch": 95.1, "learning_rate": 2.4662563102074847e-06, "loss": 1.7668, "step": 32855000 }, { "epoch": 95.1, "learning_rate": 2.4655326625602077e-06, "loss": 1.7436, "step": 32855500 }, { "epoch": 95.1, "learning_rate": 2.464809014912931e-06, "loss": 1.7636, "step": 32856000 }, { "epoch": 95.11, "learning_rate": 2.464085367265654e-06, "loss": 1.7408, "step": 32856500 }, { "epoch": 95.11, "learning_rate": 2.463361719618377e-06, "loss": 1.7676, "step": 32857000 }, { "epoch": 95.11, "learning_rate": 2.4626380719711005e-06, "loss": 1.7893, "step": 32857500 }, { "epoch": 95.11, "learning_rate": 2.461914424323824e-06, "loss": 1.7616, "step": 32858000 }, { "epoch": 95.11, "learning_rate": 2.461190776676547e-06, "loss": 1.7803, "step": 32858500 }, { "epoch": 95.11, "learning_rate": 2.4604685763245647e-06, "loss": 1.7861, "step": 32859000 }, { "epoch": 95.11, "learning_rate": 2.4597449286772882e-06, "loss": 1.783, "step": 32859500 }, { "epoch": 95.12, "learning_rate": 2.4590212810300113e-06, "loss": 1.7578, "step": 32860000 }, { "epoch": 95.12, "learning_rate": 2.4582976333827344e-06, "loss": 1.7452, "step": 32860500 }, { "epoch": 95.12, "learning_rate": 2.4575739857354575e-06, "loss": 1.7441, "step": 32861000 }, { "epoch": 95.12, "learning_rate": 2.4568517853834756e-06, "loss": 1.7427, "step": 32861500 }, { "epoch": 95.12, "learning_rate": 2.4561281377361987e-06, "loss": 1.7631, "step": 32862000 }, { "epoch": 95.12, "learning_rate": 2.4554044900889217e-06, "loss": 1.7664, "step": 32862500 }, { "epoch": 95.12, "learning_rate": 2.4546808424416452e-06, "loss": 1.7538, "step": 32863000 }, { "epoch": 95.13, "learning_rate": 2.4539571947943683e-06, "loss": 1.7415, "step": 32863500 }, { "epoch": 95.13, "learning_rate": 2.453233547147092e-06, "loss": 1.7447, "step": 32864000 }, { "epoch": 95.13, "learning_rate": 2.4525113467951095e-06, "loss": 1.7719, "step": 32864500 }, { "epoch": 95.13, "learning_rate": 2.4517876991478326e-06, "loss": 1.7361, "step": 32865000 }, { "epoch": 95.13, "learning_rate": 2.451064051500556e-06, "loss": 1.7501, "step": 32865500 }, { "epoch": 95.13, "learning_rate": 2.450340403853279e-06, "loss": 1.7449, "step": 32866000 }, { "epoch": 95.14, "learning_rate": 2.449618203501297e-06, "loss": 1.7776, "step": 32866500 }, { "epoch": 95.14, "learning_rate": 2.4488945558540203e-06, "loss": 1.7461, "step": 32867000 }, { "epoch": 95.14, "learning_rate": 2.4481709082067434e-06, "loss": 1.7458, "step": 32867500 }, { "epoch": 95.14, "learning_rate": 2.4474472605594665e-06, "loss": 1.7463, "step": 32868000 }, { "epoch": 95.14, "learning_rate": 2.44672361291219e-06, "loss": 1.7705, "step": 32868500 }, { "epoch": 95.14, "learning_rate": 2.4460014125602077e-06, "loss": 1.7793, "step": 32869000 }, { "epoch": 95.14, "learning_rate": 2.4452792122082254e-06, "loss": 1.758, "step": 32869500 }, { "epoch": 95.15, "learning_rate": 2.4445555645609484e-06, "loss": 1.7487, "step": 32870000 }, { "epoch": 95.15, "learning_rate": 2.443831916913672e-06, "loss": 1.7735, "step": 32870500 }, { "epoch": 95.15, "learning_rate": 2.443108269266395e-06, "loss": 1.7356, "step": 32871000 }, { "epoch": 95.15, "learning_rate": 2.4423846216191185e-06, "loss": 1.7588, "step": 32871500 }, { "epoch": 95.15, "learning_rate": 2.4416609739718416e-06, "loss": 1.7429, "step": 32872000 }, { "epoch": 95.15, "learning_rate": 2.4409373263245647e-06, "loss": 1.7631, "step": 32872500 }, { "epoch": 95.15, "learning_rate": 2.4402136786772878e-06, "loss": 1.76, "step": 32873000 }, { "epoch": 95.16, "learning_rate": 2.4394900310300113e-06, "loss": 1.7579, "step": 32873500 }, { "epoch": 95.16, "learning_rate": 2.4387663833827344e-06, "loss": 1.7622, "step": 32874000 }, { "epoch": 95.16, "learning_rate": 2.438042735735458e-06, "loss": 1.773, "step": 32874500 }, { "epoch": 95.16, "learning_rate": 2.437319088088181e-06, "loss": 1.7607, "step": 32875000 }, { "epoch": 95.16, "learning_rate": 2.4365968877361986e-06, "loss": 1.7625, "step": 32875500 }, { "epoch": 95.16, "learning_rate": 2.435873240088922e-06, "loss": 1.7693, "step": 32876000 }, { "epoch": 95.16, "learning_rate": 2.4351524870322344e-06, "loss": 1.7632, "step": 32876500 }, { "epoch": 95.17, "learning_rate": 2.4344288393849575e-06, "loss": 1.749, "step": 32877000 }, { "epoch": 95.17, "learning_rate": 2.4337051917376805e-06, "loss": 1.764, "step": 32877500 }, { "epoch": 95.17, "learning_rate": 2.432981544090404e-06, "loss": 1.7451, "step": 32878000 }, { "epoch": 95.17, "learning_rate": 2.432257896443127e-06, "loss": 1.7781, "step": 32878500 }, { "epoch": 95.17, "learning_rate": 2.4315342487958506e-06, "loss": 1.7613, "step": 32879000 }, { "epoch": 95.17, "learning_rate": 2.4308106011485737e-06, "loss": 1.7612, "step": 32879500 }, { "epoch": 95.17, "learning_rate": 2.430086953501297e-06, "loss": 1.7702, "step": 32880000 }, { "epoch": 95.18, "learning_rate": 2.429364753149315e-06, "loss": 1.7505, "step": 32880500 }, { "epoch": 95.18, "learning_rate": 2.428641105502038e-06, "loss": 1.7507, "step": 32881000 }, { "epoch": 95.18, "learning_rate": 2.427917457854761e-06, "loss": 1.751, "step": 32881500 }, { "epoch": 95.18, "learning_rate": 2.427193810207484e-06, "loss": 1.7464, "step": 32882000 }, { "epoch": 95.18, "learning_rate": 2.4264701625602076e-06, "loss": 1.7755, "step": 32882500 }, { "epoch": 95.18, "learning_rate": 2.425746514912931e-06, "loss": 1.7637, "step": 32883000 }, { "epoch": 95.18, "learning_rate": 2.4250243145609484e-06, "loss": 1.7732, "step": 32883500 }, { "epoch": 95.19, "learning_rate": 2.424300666913672e-06, "loss": 1.7565, "step": 32884000 }, { "epoch": 95.19, "learning_rate": 2.4235784665616896e-06, "loss": 1.7646, "step": 32884500 }, { "epoch": 95.19, "learning_rate": 2.4228548189144126e-06, "loss": 1.7623, "step": 32885000 }, { "epoch": 95.19, "learning_rate": 2.422131171267136e-06, "loss": 1.7621, "step": 32885500 }, { "epoch": 95.19, "learning_rate": 2.4214075236198597e-06, "loss": 1.7412, "step": 32886000 }, { "epoch": 95.19, "learning_rate": 2.4206838759725827e-06, "loss": 1.7586, "step": 32886500 }, { "epoch": 95.19, "learning_rate": 2.419960228325306e-06, "loss": 1.7599, "step": 32887000 }, { "epoch": 95.2, "learning_rate": 2.419236580678029e-06, "loss": 1.7526, "step": 32887500 }, { "epoch": 95.2, "learning_rate": 2.418512933030752e-06, "loss": 1.7463, "step": 32888000 }, { "epoch": 95.2, "learning_rate": 2.41779073267877e-06, "loss": 1.7639, "step": 32888500 }, { "epoch": 95.2, "learning_rate": 2.417067085031493e-06, "loss": 1.7391, "step": 32889000 }, { "epoch": 95.2, "learning_rate": 2.4163434373842162e-06, "loss": 1.758, "step": 32889500 }, { "epoch": 95.2, "learning_rate": 2.4156197897369397e-06, "loss": 1.7376, "step": 32890000 }, { "epoch": 95.2, "learning_rate": 2.4148961420896632e-06, "loss": 1.7904, "step": 32890500 }, { "epoch": 95.21, "learning_rate": 2.4141739417376805e-06, "loss": 1.774, "step": 32891000 }, { "epoch": 95.21, "learning_rate": 2.413450294090404e-06, "loss": 1.7313, "step": 32891500 }, { "epoch": 95.21, "learning_rate": 2.412726646443127e-06, "loss": 1.7533, "step": 32892000 }, { "epoch": 95.21, "learning_rate": 2.4120029987958506e-06, "loss": 1.7679, "step": 32892500 }, { "epoch": 95.21, "learning_rate": 2.4112793511485737e-06, "loss": 1.7598, "step": 32893000 }, { "epoch": 95.21, "learning_rate": 2.4105557035012967e-06, "loss": 1.7645, "step": 32893500 }, { "epoch": 95.21, "learning_rate": 2.409833503149315e-06, "loss": 1.7714, "step": 32894000 }, { "epoch": 95.22, "learning_rate": 2.409109855502038e-06, "loss": 1.7606, "step": 32894500 }, { "epoch": 95.22, "learning_rate": 2.408386207854761e-06, "loss": 1.7607, "step": 32895000 }, { "epoch": 95.22, "learning_rate": 2.4076625602074845e-06, "loss": 1.7313, "step": 32895500 }, { "epoch": 95.22, "learning_rate": 2.4069389125602076e-06, "loss": 1.7537, "step": 32896000 }, { "epoch": 95.22, "learning_rate": 2.4062152649129307e-06, "loss": 1.7616, "step": 32896500 }, { "epoch": 95.22, "learning_rate": 2.4054930645609488e-06, "loss": 1.7634, "step": 32897000 }, { "epoch": 95.22, "learning_rate": 2.404769416913672e-06, "loss": 1.7686, "step": 32897500 }, { "epoch": 95.23, "learning_rate": 2.404045769266395e-06, "loss": 1.7545, "step": 32898000 }, { "epoch": 95.23, "learning_rate": 2.403323568914413e-06, "loss": 1.7666, "step": 32898500 }, { "epoch": 95.23, "learning_rate": 2.402599921267136e-06, "loss": 1.7299, "step": 32899000 }, { "epoch": 95.23, "learning_rate": 2.401876273619859e-06, "loss": 1.7607, "step": 32899500 }, { "epoch": 95.23, "learning_rate": 2.4011526259725827e-06, "loss": 1.7708, "step": 32900000 }, { "epoch": 95.23, "learning_rate": 2.4004289783253058e-06, "loss": 1.7546, "step": 32900500 }, { "epoch": 95.23, "learning_rate": 2.3997053306780293e-06, "loss": 1.7636, "step": 32901000 }, { "epoch": 95.24, "learning_rate": 2.3989816830307523e-06, "loss": 1.7503, "step": 32901500 }, { "epoch": 95.24, "learning_rate": 2.3982580353834754e-06, "loss": 1.744, "step": 32902000 }, { "epoch": 95.24, "learning_rate": 2.3975343877361985e-06, "loss": 1.7379, "step": 32902500 }, { "epoch": 95.24, "learning_rate": 2.3968121873842166e-06, "loss": 1.7359, "step": 32903000 }, { "epoch": 95.24, "learning_rate": 2.3960885397369397e-06, "loss": 1.7731, "step": 32903500 }, { "epoch": 95.24, "learning_rate": 2.3953663393849574e-06, "loss": 1.749, "step": 32904000 }, { "epoch": 95.25, "learning_rate": 2.394642691737681e-06, "loss": 1.7544, "step": 32904500 }, { "epoch": 95.25, "learning_rate": 2.393919044090404e-06, "loss": 1.7574, "step": 32905000 }, { "epoch": 95.25, "learning_rate": 2.393195396443127e-06, "loss": 1.7542, "step": 32905500 }, { "epoch": 95.25, "learning_rate": 2.39247174879585e-06, "loss": 1.7284, "step": 32906000 }, { "epoch": 95.25, "learning_rate": 2.3917481011485736e-06, "loss": 1.7445, "step": 32906500 }, { "epoch": 95.25, "learning_rate": 2.391024453501297e-06, "loss": 1.7505, "step": 32907000 }, { "epoch": 95.25, "learning_rate": 2.39030080585402e-06, "loss": 1.7235, "step": 32907500 }, { "epoch": 95.26, "learning_rate": 2.389578605502038e-06, "loss": 1.7375, "step": 32908000 }, { "epoch": 95.26, "learning_rate": 2.3888549578547614e-06, "loss": 1.7585, "step": 32908500 }, { "epoch": 95.26, "learning_rate": 2.3881313102074845e-06, "loss": 1.726, "step": 32909000 }, { "epoch": 95.26, "learning_rate": 2.3874076625602075e-06, "loss": 1.7442, "step": 32909500 }, { "epoch": 95.26, "learning_rate": 2.3866840149129306e-06, "loss": 1.7684, "step": 32910000 }, { "epoch": 95.26, "learning_rate": 2.385960367265654e-06, "loss": 1.7645, "step": 32910500 }, { "epoch": 95.26, "learning_rate": 2.385238166913672e-06, "loss": 1.7596, "step": 32911000 }, { "epoch": 95.27, "learning_rate": 2.384514519266395e-06, "loss": 1.7702, "step": 32911500 }, { "epoch": 95.27, "learning_rate": 2.3837908716191184e-06, "loss": 1.7679, "step": 32912000 }, { "epoch": 95.27, "learning_rate": 2.383067223971842e-06, "loss": 1.7682, "step": 32912500 }, { "epoch": 95.27, "learning_rate": 2.382345023619859e-06, "loss": 1.7253, "step": 32913000 }, { "epoch": 95.27, "learning_rate": 2.3816213759725826e-06, "loss": 1.774, "step": 32913500 }, { "epoch": 95.27, "learning_rate": 2.3808977283253057e-06, "loss": 1.7407, "step": 32914000 }, { "epoch": 95.27, "learning_rate": 2.3801740806780292e-06, "loss": 1.7507, "step": 32914500 }, { "epoch": 95.28, "learning_rate": 2.3794504330307523e-06, "loss": 1.7228, "step": 32915000 }, { "epoch": 95.28, "learning_rate": 2.3787267853834754e-06, "loss": 1.7639, "step": 32915500 }, { "epoch": 95.28, "learning_rate": 2.3780045850314935e-06, "loss": 1.7822, "step": 32916000 }, { "epoch": 95.28, "learning_rate": 2.3772809373842166e-06, "loss": 1.741, "step": 32916500 }, { "epoch": 95.28, "learning_rate": 2.3765572897369396e-06, "loss": 1.7532, "step": 32917000 }, { "epoch": 95.28, "learning_rate": 2.3758336420896627e-06, "loss": 1.7503, "step": 32917500 }, { "epoch": 95.28, "learning_rate": 2.3751099944423862e-06, "loss": 1.7551, "step": 32918000 }, { "epoch": 95.29, "learning_rate": 2.374387794090404e-06, "loss": 1.7846, "step": 32918500 }, { "epoch": 95.29, "learning_rate": 2.373665593738422e-06, "loss": 1.7603, "step": 32919000 }, { "epoch": 95.29, "learning_rate": 2.372941946091145e-06, "loss": 1.7563, "step": 32919500 }, { "epoch": 95.29, "learning_rate": 2.372218298443868e-06, "loss": 1.7715, "step": 32920000 }, { "epoch": 95.29, "learning_rate": 2.3714946507965912e-06, "loss": 1.7624, "step": 32920500 }, { "epoch": 95.29, "learning_rate": 2.3707710031493147e-06, "loss": 1.7429, "step": 32921000 }, { "epoch": 95.29, "learning_rate": 2.370047355502038e-06, "loss": 1.7396, "step": 32921500 }, { "epoch": 95.3, "learning_rate": 2.3693237078547613e-06, "loss": 1.7425, "step": 32922000 }, { "epoch": 95.3, "learning_rate": 2.3686000602074844e-06, "loss": 1.738, "step": 32922500 }, { "epoch": 95.3, "learning_rate": 2.3678764125602075e-06, "loss": 1.7435, "step": 32923000 }, { "epoch": 95.3, "learning_rate": 2.367152764912931e-06, "loss": 1.764, "step": 32923500 }, { "epoch": 95.3, "learning_rate": 2.3664305645609487e-06, "loss": 1.7541, "step": 32924000 }, { "epoch": 95.3, "learning_rate": 2.3657069169136717e-06, "loss": 1.7518, "step": 32924500 }, { "epoch": 95.3, "learning_rate": 2.3649832692663952e-06, "loss": 1.7321, "step": 32925000 }, { "epoch": 95.31, "learning_rate": 2.3642596216191183e-06, "loss": 1.7755, "step": 32925500 }, { "epoch": 95.31, "learning_rate": 2.3635359739718414e-06, "loss": 1.7567, "step": 32926000 }, { "epoch": 95.31, "learning_rate": 2.3628123263245645e-06, "loss": 1.7528, "step": 32926500 }, { "epoch": 95.31, "learning_rate": 2.3620901259725826e-06, "loss": 1.7562, "step": 32927000 }, { "epoch": 95.31, "learning_rate": 2.3613679256206003e-06, "loss": 1.7838, "step": 32927500 }, { "epoch": 95.31, "learning_rate": 2.3606442779733238e-06, "loss": 1.7823, "step": 32928000 }, { "epoch": 95.31, "learning_rate": 2.359920630326047e-06, "loss": 1.7709, "step": 32928500 }, { "epoch": 95.32, "learning_rate": 2.35919698267877e-06, "loss": 1.7393, "step": 32929000 }, { "epoch": 95.32, "learning_rate": 2.358473335031493e-06, "loss": 1.7686, "step": 32929500 }, { "epoch": 95.32, "learning_rate": 2.3577496873842165e-06, "loss": 1.751, "step": 32930000 }, { "epoch": 95.32, "learning_rate": 2.35702603973694e-06, "loss": 1.7839, "step": 32930500 }, { "epoch": 95.32, "learning_rate": 2.356302392089663e-06, "loss": 1.7733, "step": 32931000 }, { "epoch": 95.32, "learning_rate": 2.3555801917376808e-06, "loss": 1.7776, "step": 32931500 }, { "epoch": 95.32, "learning_rate": 2.3548565440904043e-06, "loss": 1.7663, "step": 32932000 }, { "epoch": 95.33, "learning_rate": 2.3541328964431273e-06, "loss": 1.7423, "step": 32932500 }, { "epoch": 95.33, "learning_rate": 2.3534092487958504e-06, "loss": 1.7554, "step": 32933000 }, { "epoch": 95.33, "learning_rate": 2.3526856011485735e-06, "loss": 1.7639, "step": 32933500 }, { "epoch": 95.33, "learning_rate": 2.3519634007965916e-06, "loss": 1.7619, "step": 32934000 }, { "epoch": 95.33, "learning_rate": 2.3512397531493147e-06, "loss": 1.7716, "step": 32934500 }, { "epoch": 95.33, "learning_rate": 2.3505161055020378e-06, "loss": 1.7413, "step": 32935000 }, { "epoch": 95.33, "learning_rate": 2.349792457854761e-06, "loss": 1.7725, "step": 32935500 }, { "epoch": 95.34, "learning_rate": 2.3490688102074843e-06, "loss": 1.7498, "step": 32936000 }, { "epoch": 95.34, "learning_rate": 2.348346609855502e-06, "loss": 1.7653, "step": 32936500 }, { "epoch": 95.34, "learning_rate": 2.347622962208225e-06, "loss": 1.7321, "step": 32937000 }, { "epoch": 95.34, "learning_rate": 2.3468993145609486e-06, "loss": 1.7564, "step": 32937500 }, { "epoch": 95.34, "learning_rate": 2.346175666913672e-06, "loss": 1.7596, "step": 32938000 }, { "epoch": 95.34, "learning_rate": 2.3454534665616894e-06, "loss": 1.7772, "step": 32938500 }, { "epoch": 95.34, "learning_rate": 2.344729818914413e-06, "loss": 1.725, "step": 32939000 }, { "epoch": 95.35, "learning_rate": 2.3440061712671364e-06, "loss": 1.7664, "step": 32939500 }, { "epoch": 95.35, "learning_rate": 2.3432825236198594e-06, "loss": 1.7471, "step": 32940000 }, { "epoch": 95.35, "learning_rate": 2.3425588759725825e-06, "loss": 1.7569, "step": 32940500 }, { "epoch": 95.35, "learning_rate": 2.3418352283253056e-06, "loss": 1.7695, "step": 32941000 }, { "epoch": 95.35, "learning_rate": 2.341111580678029e-06, "loss": 1.7516, "step": 32941500 }, { "epoch": 95.35, "learning_rate": 2.340387933030752e-06, "loss": 1.7354, "step": 32942000 }, { "epoch": 95.36, "learning_rate": 2.3396642853834757e-06, "loss": 1.7724, "step": 32942500 }, { "epoch": 95.36, "learning_rate": 2.3389406377361988e-06, "loss": 1.7704, "step": 32943000 }, { "epoch": 95.36, "learning_rate": 2.3382184373842165e-06, "loss": 1.7498, "step": 32943500 }, { "epoch": 95.36, "learning_rate": 2.33749478973694e-06, "loss": 1.7659, "step": 32944000 }, { "epoch": 95.36, "learning_rate": 2.336771142089663e-06, "loss": 1.7402, "step": 32944500 }, { "epoch": 95.36, "learning_rate": 2.336047494442386e-06, "loss": 1.7584, "step": 32945000 }, { "epoch": 95.36, "learning_rate": 2.3353252940904042e-06, "loss": 1.7528, "step": 32945500 }, { "epoch": 95.37, "learning_rate": 2.3346016464431273e-06, "loss": 1.7455, "step": 32946000 }, { "epoch": 95.37, "learning_rate": 2.3338779987958504e-06, "loss": 1.7384, "step": 32946500 }, { "epoch": 95.37, "learning_rate": 2.3331543511485735e-06, "loss": 1.7602, "step": 32947000 }, { "epoch": 95.37, "learning_rate": 2.332430703501297e-06, "loss": 1.7394, "step": 32947500 }, { "epoch": 95.37, "learning_rate": 2.3317085031493146e-06, "loss": 1.7376, "step": 32948000 }, { "epoch": 95.37, "learning_rate": 2.3309848555020377e-06, "loss": 1.7615, "step": 32948500 }, { "epoch": 95.37, "learning_rate": 2.3302612078547612e-06, "loss": 1.7696, "step": 32949000 }, { "epoch": 95.38, "learning_rate": 2.3295375602074843e-06, "loss": 1.7295, "step": 32949500 }, { "epoch": 95.38, "learning_rate": 2.3288139125602074e-06, "loss": 1.7548, "step": 32950000 }, { "epoch": 95.38, "learning_rate": 2.328090264912931e-06, "loss": 1.7755, "step": 32950500 }, { "epoch": 95.38, "learning_rate": 2.327369511856243e-06, "loss": 1.7583, "step": 32951000 }, { "epoch": 95.38, "learning_rate": 2.3266458642089662e-06, "loss": 1.7752, "step": 32951500 }, { "epoch": 95.38, "learning_rate": 2.3259222165616897e-06, "loss": 1.7655, "step": 32952000 }, { "epoch": 95.38, "learning_rate": 2.325198568914413e-06, "loss": 1.7573, "step": 32952500 }, { "epoch": 95.39, "learning_rate": 2.324474921267136e-06, "loss": 1.7643, "step": 32953000 }, { "epoch": 95.39, "learning_rate": 2.3237512736198594e-06, "loss": 1.7386, "step": 32953500 }, { "epoch": 95.39, "learning_rate": 2.3230276259725825e-06, "loss": 1.7574, "step": 32954000 }, { "epoch": 95.39, "learning_rate": 2.322303978325306e-06, "loss": 1.7651, "step": 32954500 }, { "epoch": 95.39, "learning_rate": 2.3215817779733237e-06, "loss": 1.7439, "step": 32955000 }, { "epoch": 95.39, "learning_rate": 2.3208581303260467e-06, "loss": 1.7482, "step": 32955500 }, { "epoch": 95.39, "learning_rate": 2.3201359299740644e-06, "loss": 1.7453, "step": 32956000 }, { "epoch": 95.4, "learning_rate": 2.319412282326788e-06, "loss": 1.7471, "step": 32956500 }, { "epoch": 95.4, "learning_rate": 2.318688634679511e-06, "loss": 1.7677, "step": 32957000 }, { "epoch": 95.4, "learning_rate": 2.3179649870322345e-06, "loss": 1.7687, "step": 32957500 }, { "epoch": 95.4, "learning_rate": 2.3172413393849576e-06, "loss": 1.7515, "step": 32958000 }, { "epoch": 95.4, "learning_rate": 2.3165191390329753e-06, "loss": 1.7605, "step": 32958500 }, { "epoch": 95.4, "learning_rate": 2.3157954913856988e-06, "loss": 1.7403, "step": 32959000 }, { "epoch": 95.4, "learning_rate": 2.315071843738422e-06, "loss": 1.7605, "step": 32959500 }, { "epoch": 95.41, "learning_rate": 2.314348196091145e-06, "loss": 1.7495, "step": 32960000 }, { "epoch": 95.41, "learning_rate": 2.313624548443868e-06, "loss": 1.7591, "step": 32960500 }, { "epoch": 95.41, "learning_rate": 2.312902348091886e-06, "loss": 1.7671, "step": 32961000 }, { "epoch": 95.41, "learning_rate": 2.312178700444609e-06, "loss": 1.7875, "step": 32961500 }, { "epoch": 95.41, "learning_rate": 2.3114550527973323e-06, "loss": 1.733, "step": 32962000 }, { "epoch": 95.41, "learning_rate": 2.3107314051500553e-06, "loss": 1.7391, "step": 32962500 }, { "epoch": 95.41, "learning_rate": 2.310007757502779e-06, "loss": 1.7685, "step": 32963000 }, { "epoch": 95.42, "learning_rate": 2.3092855571507965e-06, "loss": 1.7476, "step": 32963500 }, { "epoch": 95.42, "learning_rate": 2.3085619095035196e-06, "loss": 1.7375, "step": 32964000 }, { "epoch": 95.42, "learning_rate": 2.307838261856243e-06, "loss": 1.749, "step": 32964500 }, { "epoch": 95.42, "learning_rate": 2.3071146142089666e-06, "loss": 1.7762, "step": 32965000 }, { "epoch": 95.42, "learning_rate": 2.3063909665616897e-06, "loss": 1.7445, "step": 32965500 }, { "epoch": 95.42, "learning_rate": 2.3056673189144128e-06, "loss": 1.7629, "step": 32966000 }, { "epoch": 95.42, "learning_rate": 2.304943671267136e-06, "loss": 1.7508, "step": 32966500 }, { "epoch": 95.43, "learning_rate": 2.3042200236198593e-06, "loss": 1.7615, "step": 32967000 }, { "epoch": 95.43, "learning_rate": 2.303497823267877e-06, "loss": 1.7663, "step": 32967500 }, { "epoch": 95.43, "learning_rate": 2.3027741756206e-06, "loss": 1.7433, "step": 32968000 }, { "epoch": 95.43, "learning_rate": 2.3020505279733236e-06, "loss": 1.7572, "step": 32968500 }, { "epoch": 95.43, "learning_rate": 2.301326880326047e-06, "loss": 1.7542, "step": 32969000 }, { "epoch": 95.43, "learning_rate": 2.3006061272693594e-06, "loss": 1.7658, "step": 32969500 }, { "epoch": 95.43, "learning_rate": 2.2998824796220825e-06, "loss": 1.7813, "step": 32970000 }, { "epoch": 95.44, "learning_rate": 2.2991588319748055e-06, "loss": 1.7833, "step": 32970500 }, { "epoch": 95.44, "learning_rate": 2.2984351843275286e-06, "loss": 1.7656, "step": 32971000 }, { "epoch": 95.44, "learning_rate": 2.297711536680252e-06, "loss": 1.7636, "step": 32971500 }, { "epoch": 95.44, "learning_rate": 2.29698933632827e-06, "loss": 1.7545, "step": 32972000 }, { "epoch": 95.44, "learning_rate": 2.296265688680993e-06, "loss": 1.7535, "step": 32972500 }, { "epoch": 95.44, "learning_rate": 2.2955420410337164e-06, "loss": 1.7655, "step": 32973000 }, { "epoch": 95.44, "learning_rate": 2.2948183933864395e-06, "loss": 1.7529, "step": 32973500 }, { "epoch": 95.45, "learning_rate": 2.294094745739163e-06, "loss": 1.7598, "step": 32974000 }, { "epoch": 95.45, "learning_rate": 2.293371098091886e-06, "loss": 1.7593, "step": 32974500 }, { "epoch": 95.45, "learning_rate": 2.292647450444609e-06, "loss": 1.7638, "step": 32975000 }, { "epoch": 95.45, "learning_rate": 2.2919238027973326e-06, "loss": 1.7408, "step": 32975500 }, { "epoch": 95.45, "learning_rate": 2.2912001551500557e-06, "loss": 1.742, "step": 32976000 }, { "epoch": 95.45, "learning_rate": 2.2904765075027788e-06, "loss": 1.7495, "step": 32976500 }, { "epoch": 95.45, "learning_rate": 2.2897543071507965e-06, "loss": 1.7675, "step": 32977000 }, { "epoch": 95.46, "learning_rate": 2.28903065950352e-06, "loss": 1.7592, "step": 32977500 }, { "epoch": 95.46, "learning_rate": 2.288307011856243e-06, "loss": 1.7743, "step": 32978000 }, { "epoch": 95.46, "learning_rate": 2.2875833642089665e-06, "loss": 1.7772, "step": 32978500 }, { "epoch": 95.46, "learning_rate": 2.2868611638569842e-06, "loss": 1.7963, "step": 32979000 }, { "epoch": 95.46, "learning_rate": 2.2861375162097073e-06, "loss": 1.7495, "step": 32979500 }, { "epoch": 95.46, "learning_rate": 2.285415315857725e-06, "loss": 1.7679, "step": 32980000 }, { "epoch": 95.47, "learning_rate": 2.2846916682104485e-06, "loss": 1.759, "step": 32980500 }, { "epoch": 95.47, "learning_rate": 2.2839680205631716e-06, "loss": 1.7483, "step": 32981000 }, { "epoch": 95.47, "learning_rate": 2.283244372915895e-06, "loss": 1.7517, "step": 32981500 }, { "epoch": 95.47, "learning_rate": 2.282520725268618e-06, "loss": 1.7598, "step": 32982000 }, { "epoch": 95.47, "learning_rate": 2.2817970776213412e-06, "loss": 1.7862, "step": 32982500 }, { "epoch": 95.47, "learning_rate": 2.2810734299740647e-06, "loss": 1.7376, "step": 32983000 }, { "epoch": 95.47, "learning_rate": 2.280349782326788e-06, "loss": 1.7603, "step": 32983500 }, { "epoch": 95.48, "learning_rate": 2.279626134679511e-06, "loss": 1.741, "step": 32984000 }, { "epoch": 95.48, "learning_rate": 2.278902487032234e-06, "loss": 1.7611, "step": 32984500 }, { "epoch": 95.48, "learning_rate": 2.278180286680252e-06, "loss": 1.7391, "step": 32985000 }, { "epoch": 95.48, "learning_rate": 2.277456639032975e-06, "loss": 1.7329, "step": 32985500 }, { "epoch": 95.48, "learning_rate": 2.2767329913856982e-06, "loss": 1.7501, "step": 32986000 }, { "epoch": 95.48, "learning_rate": 2.2760093437384217e-06, "loss": 1.7498, "step": 32986500 }, { "epoch": 95.48, "learning_rate": 2.2752856960911452e-06, "loss": 1.7522, "step": 32987000 }, { "epoch": 95.49, "learning_rate": 2.2745620484438683e-06, "loss": 1.726, "step": 32987500 }, { "epoch": 95.49, "learning_rate": 2.273839848091886e-06, "loss": 1.7622, "step": 32988000 }, { "epoch": 95.49, "learning_rate": 2.2731162004446095e-06, "loss": 1.7473, "step": 32988500 }, { "epoch": 95.49, "learning_rate": 2.2723925527973326e-06, "loss": 1.7608, "step": 32989000 }, { "epoch": 95.49, "learning_rate": 2.2716689051500557e-06, "loss": 1.7566, "step": 32989500 }, { "epoch": 95.49, "learning_rate": 2.2709452575027787e-06, "loss": 1.7458, "step": 32990000 }, { "epoch": 95.49, "learning_rate": 2.270223057150797e-06, "loss": 1.7559, "step": 32990500 }, { "epoch": 95.5, "learning_rate": 2.26949940950352e-06, "loss": 1.7389, "step": 32991000 }, { "epoch": 95.5, "learning_rate": 2.268775761856243e-06, "loss": 1.7433, "step": 32991500 }, { "epoch": 95.5, "learning_rate": 2.268052114208966e-06, "loss": 1.7256, "step": 32992000 }, { "epoch": 95.5, "learning_rate": 2.2673284665616896e-06, "loss": 1.7397, "step": 32992500 }, { "epoch": 95.5, "learning_rate": 2.266604818914413e-06, "loss": 1.7585, "step": 32993000 }, { "epoch": 95.5, "learning_rate": 2.2658826185624303e-06, "loss": 1.726, "step": 32993500 }, { "epoch": 95.5, "learning_rate": 2.265158970915154e-06, "loss": 1.7536, "step": 32994000 }, { "epoch": 95.51, "learning_rate": 2.2644353232678773e-06, "loss": 1.767, "step": 32994500 }, { "epoch": 95.51, "learning_rate": 2.2637131229158946e-06, "loss": 1.7344, "step": 32995000 }, { "epoch": 95.51, "learning_rate": 2.262989475268618e-06, "loss": 1.7513, "step": 32995500 }, { "epoch": 95.51, "learning_rate": 2.2622658276213416e-06, "loss": 1.7537, "step": 32996000 }, { "epoch": 95.51, "learning_rate": 2.2615421799740647e-06, "loss": 1.728, "step": 32996500 }, { "epoch": 95.51, "learning_rate": 2.2608185323267878e-06, "loss": 1.7579, "step": 32997000 }, { "epoch": 95.51, "learning_rate": 2.260094884679511e-06, "loss": 1.7644, "step": 32997500 }, { "epoch": 95.52, "learning_rate": 2.2593712370322343e-06, "loss": 1.7789, "step": 32998000 }, { "epoch": 95.52, "learning_rate": 2.258649036680252e-06, "loss": 1.7629, "step": 32998500 }, { "epoch": 95.52, "learning_rate": 2.257925389032975e-06, "loss": 1.7727, "step": 32999000 }, { "epoch": 95.52, "learning_rate": 2.2572017413856986e-06, "loss": 1.7859, "step": 32999500 }, { "epoch": 95.52, "learning_rate": 2.2564780937384217e-06, "loss": 1.7652, "step": 33000000 }, { "epoch": 95.52, "learning_rate": 2.255754446091145e-06, "loss": 1.7549, "step": 33000500 }, { "epoch": 95.52, "learning_rate": 2.2550307984438683e-06, "loss": 1.7544, "step": 33001000 }, { "epoch": 95.53, "learning_rate": 2.2543071507965913e-06, "loss": 1.7829, "step": 33001500 }, { "epoch": 95.53, "learning_rate": 2.253583503149315e-06, "loss": 1.7381, "step": 33002000 }, { "epoch": 95.53, "learning_rate": 2.2528613027973325e-06, "loss": 1.7685, "step": 33002500 }, { "epoch": 95.53, "learning_rate": 2.2521376551500556e-06, "loss": 1.7492, "step": 33003000 }, { "epoch": 95.53, "learning_rate": 2.251414007502779e-06, "loss": 1.7524, "step": 33003500 }, { "epoch": 95.53, "learning_rate": 2.250690359855502e-06, "loss": 1.7445, "step": 33004000 }, { "epoch": 95.53, "learning_rate": 2.2499667122082253e-06, "loss": 1.7712, "step": 33004500 }, { "epoch": 95.54, "learning_rate": 2.2492445118562434e-06, "loss": 1.7593, "step": 33005000 }, { "epoch": 95.54, "learning_rate": 2.2485208642089664e-06, "loss": 1.7569, "step": 33005500 }, { "epoch": 95.54, "learning_rate": 2.2477972165616895e-06, "loss": 1.7507, "step": 33006000 }, { "epoch": 95.54, "learning_rate": 2.2470735689144126e-06, "loss": 1.761, "step": 33006500 }, { "epoch": 95.54, "learning_rate": 2.246349921267136e-06, "loss": 1.7477, "step": 33007000 }, { "epoch": 95.54, "learning_rate": 2.2456277209151538e-06, "loss": 1.7608, "step": 33007500 }, { "epoch": 95.54, "learning_rate": 2.244904073267877e-06, "loss": 1.7681, "step": 33008000 }, { "epoch": 95.55, "learning_rate": 2.2441804256206004e-06, "loss": 1.7308, "step": 33008500 }, { "epoch": 95.55, "learning_rate": 2.2434567779733235e-06, "loss": 1.7751, "step": 33009000 }, { "epoch": 95.55, "learning_rate": 2.242734577621341e-06, "loss": 1.7632, "step": 33009500 }, { "epoch": 95.55, "learning_rate": 2.2420109299740646e-06, "loss": 1.7515, "step": 33010000 }, { "epoch": 95.55, "learning_rate": 2.2412872823267877e-06, "loss": 1.7633, "step": 33010500 }, { "epoch": 95.55, "learning_rate": 2.2405636346795112e-06, "loss": 1.7778, "step": 33011000 }, { "epoch": 95.55, "learning_rate": 2.2398399870322343e-06, "loss": 1.7514, "step": 33011500 }, { "epoch": 95.56, "learning_rate": 2.239117786680252e-06, "loss": 1.7417, "step": 33012000 }, { "epoch": 95.56, "learning_rate": 2.2383941390329755e-06, "loss": 1.7466, "step": 33012500 }, { "epoch": 95.56, "learning_rate": 2.2376704913856986e-06, "loss": 1.7843, "step": 33013000 }, { "epoch": 95.56, "learning_rate": 2.2369468437384216e-06, "loss": 1.7391, "step": 33013500 }, { "epoch": 95.56, "learning_rate": 2.2362246433864397e-06, "loss": 1.7509, "step": 33014000 }, { "epoch": 95.56, "learning_rate": 2.235500995739163e-06, "loss": 1.7696, "step": 33014500 }, { "epoch": 95.56, "learning_rate": 2.234777348091886e-06, "loss": 1.7334, "step": 33015000 }, { "epoch": 95.57, "learning_rate": 2.234053700444609e-06, "loss": 1.7429, "step": 33015500 }, { "epoch": 95.57, "learning_rate": 2.2333300527973325e-06, "loss": 1.7776, "step": 33016000 }, { "epoch": 95.57, "learning_rate": 2.23260785244535e-06, "loss": 1.7407, "step": 33016500 }, { "epoch": 95.57, "learning_rate": 2.2318842047980732e-06, "loss": 1.7567, "step": 33017000 }, { "epoch": 95.57, "learning_rate": 2.2311605571507967e-06, "loss": 1.7612, "step": 33017500 }, { "epoch": 95.57, "learning_rate": 2.2304369095035202e-06, "loss": 1.7625, "step": 33018000 }, { "epoch": 95.58, "learning_rate": 2.2297132618562433e-06, "loss": 1.7793, "step": 33018500 }, { "epoch": 95.58, "learning_rate": 2.2289896142089664e-06, "loss": 1.7492, "step": 33019000 }, { "epoch": 95.58, "learning_rate": 2.2282674138569845e-06, "loss": 1.7795, "step": 33019500 }, { "epoch": 95.58, "learning_rate": 2.2275437662097076e-06, "loss": 1.7593, "step": 33020000 }, { "epoch": 95.58, "learning_rate": 2.2268201185624307e-06, "loss": 1.7716, "step": 33020500 }, { "epoch": 95.58, "learning_rate": 2.2260964709151537e-06, "loss": 1.777, "step": 33021000 }, { "epoch": 95.58, "learning_rate": 2.225372823267877e-06, "loss": 1.7652, "step": 33021500 }, { "epoch": 95.59, "learning_rate": 2.2246491756206003e-06, "loss": 1.7857, "step": 33022000 }, { "epoch": 95.59, "learning_rate": 2.223926975268618e-06, "loss": 1.7678, "step": 33022500 }, { "epoch": 95.59, "learning_rate": 2.223203327621341e-06, "loss": 1.7365, "step": 33023000 }, { "epoch": 95.59, "learning_rate": 2.2224796799740646e-06, "loss": 1.7328, "step": 33023500 }, { "epoch": 95.59, "learning_rate": 2.221756032326788e-06, "loss": 1.7642, "step": 33024000 }, { "epoch": 95.59, "learning_rate": 2.221032384679511e-06, "loss": 1.7678, "step": 33024500 }, { "epoch": 95.59, "learning_rate": 2.220310184327529e-06, "loss": 1.7599, "step": 33025000 }, { "epoch": 95.6, "learning_rate": 2.2195865366802523e-06, "loss": 1.7414, "step": 33025500 }, { "epoch": 95.6, "learning_rate": 2.2188628890329754e-06, "loss": 1.7294, "step": 33026000 }, { "epoch": 95.6, "learning_rate": 2.2181392413856985e-06, "loss": 1.7467, "step": 33026500 }, { "epoch": 95.6, "learning_rate": 2.2174155937384216e-06, "loss": 1.7588, "step": 33027000 }, { "epoch": 95.6, "learning_rate": 2.2166933933864397e-06, "loss": 1.761, "step": 33027500 }, { "epoch": 95.6, "learning_rate": 2.2159697457391628e-06, "loss": 1.7599, "step": 33028000 }, { "epoch": 95.6, "learning_rate": 2.215246098091886e-06, "loss": 1.7426, "step": 33028500 }, { "epoch": 95.61, "learning_rate": 2.2145224504446093e-06, "loss": 1.7607, "step": 33029000 }, { "epoch": 95.61, "learning_rate": 2.2137988027973324e-06, "loss": 1.759, "step": 33029500 }, { "epoch": 95.61, "learning_rate": 2.21307660244535e-06, "loss": 1.7597, "step": 33030000 }, { "epoch": 95.61, "learning_rate": 2.2123529547980736e-06, "loss": 1.7851, "step": 33030500 }, { "epoch": 95.61, "learning_rate": 2.2116293071507967e-06, "loss": 1.7569, "step": 33031000 }, { "epoch": 95.61, "learning_rate": 2.2109056595035198e-06, "loss": 1.7529, "step": 33031500 }, { "epoch": 95.61, "learning_rate": 2.210183459151538e-06, "loss": 1.7352, "step": 33032000 }, { "epoch": 95.62, "learning_rate": 2.209459811504261e-06, "loss": 1.7531, "step": 33032500 }, { "epoch": 95.62, "learning_rate": 2.208736163856984e-06, "loss": 1.7688, "step": 33033000 }, { "epoch": 95.62, "learning_rate": 2.2080125162097075e-06, "loss": 1.7665, "step": 33033500 }, { "epoch": 95.62, "learning_rate": 2.2072888685624306e-06, "loss": 1.7564, "step": 33034000 }, { "epoch": 95.62, "learning_rate": 2.2065666682104483e-06, "loss": 1.7483, "step": 33034500 }, { "epoch": 95.62, "learning_rate": 2.2058430205631718e-06, "loss": 1.7748, "step": 33035000 }, { "epoch": 95.62, "learning_rate": 2.2051208202111895e-06, "loss": 1.7893, "step": 33035500 }, { "epoch": 95.63, "learning_rate": 2.2043971725639125e-06, "loss": 1.773, "step": 33036000 }, { "epoch": 95.63, "learning_rate": 2.203673524916636e-06, "loss": 1.7554, "step": 33036500 }, { "epoch": 95.63, "learning_rate": 2.202949877269359e-06, "loss": 1.7577, "step": 33037000 }, { "epoch": 95.63, "learning_rate": 2.202226229622082e-06, "loss": 1.7475, "step": 33037500 }, { "epoch": 95.63, "learning_rate": 2.2015025819748057e-06, "loss": 1.7311, "step": 33038000 }, { "epoch": 95.63, "learning_rate": 2.2007789343275288e-06, "loss": 1.7676, "step": 33038500 }, { "epoch": 95.63, "learning_rate": 2.200055286680252e-06, "loss": 1.7763, "step": 33039000 }, { "epoch": 95.64, "learning_rate": 2.199331639032975e-06, "loss": 1.7502, "step": 33039500 }, { "epoch": 95.64, "learning_rate": 2.198609438680993e-06, "loss": 1.7567, "step": 33040000 }, { "epoch": 95.64, "learning_rate": 2.1978872383290107e-06, "loss": 1.7731, "step": 33040500 }, { "epoch": 95.64, "learning_rate": 2.1971635906817342e-06, "loss": 1.7561, "step": 33041000 }, { "epoch": 95.64, "learning_rate": 2.1964399430344573e-06, "loss": 1.7512, "step": 33041500 }, { "epoch": 95.64, "learning_rate": 2.1957162953871804e-06, "loss": 1.7762, "step": 33042000 }, { "epoch": 95.64, "learning_rate": 2.1949926477399035e-06, "loss": 1.769, "step": 33042500 }, { "epoch": 95.65, "learning_rate": 2.194269000092627e-06, "loss": 1.7569, "step": 33043000 }, { "epoch": 95.65, "learning_rate": 2.1935467997406446e-06, "loss": 1.7819, "step": 33043500 }, { "epoch": 95.65, "learning_rate": 2.1928231520933677e-06, "loss": 1.7388, "step": 33044000 }, { "epoch": 95.65, "learning_rate": 2.1920995044460912e-06, "loss": 1.7633, "step": 33044500 }, { "epoch": 95.65, "learning_rate": 2.191377304094109e-06, "loss": 1.7482, "step": 33045000 }, { "epoch": 95.65, "learning_rate": 2.190653656446832e-06, "loss": 1.7733, "step": 33045500 }, { "epoch": 95.65, "learning_rate": 2.1899300087995555e-06, "loss": 1.7683, "step": 33046000 }, { "epoch": 95.66, "learning_rate": 2.189206361152279e-06, "loss": 1.7521, "step": 33046500 }, { "epoch": 95.66, "learning_rate": 2.188482713505002e-06, "loss": 1.7412, "step": 33047000 }, { "epoch": 95.66, "learning_rate": 2.187759065857725e-06, "loss": 1.752, "step": 33047500 }, { "epoch": 95.66, "learning_rate": 2.1870354182104482e-06, "loss": 1.734, "step": 33048000 }, { "epoch": 95.66, "learning_rate": 2.1863117705631717e-06, "loss": 1.7455, "step": 33048500 }, { "epoch": 95.66, "learning_rate": 2.1855881229158952e-06, "loss": 1.753, "step": 33049000 }, { "epoch": 95.66, "learning_rate": 2.1848644752686183e-06, "loss": 1.7368, "step": 33049500 }, { "epoch": 95.67, "learning_rate": 2.1841408276213414e-06, "loss": 1.7816, "step": 33050000 }, { "epoch": 95.67, "learning_rate": 2.1834171799740645e-06, "loss": 1.7665, "step": 33050500 }, { "epoch": 95.67, "learning_rate": 2.1826949796220826e-06, "loss": 1.7582, "step": 33051000 }, { "epoch": 95.67, "learning_rate": 2.1819713319748057e-06, "loss": 1.7598, "step": 33051500 }, { "epoch": 95.67, "learning_rate": 2.1812476843275287e-06, "loss": 1.7649, "step": 33052000 }, { "epoch": 95.67, "learning_rate": 2.180524036680252e-06, "loss": 1.7633, "step": 33052500 }, { "epoch": 95.67, "learning_rate": 2.17980183632827e-06, "loss": 1.7568, "step": 33053000 }, { "epoch": 95.68, "learning_rate": 2.179078188680993e-06, "loss": 1.7538, "step": 33053500 }, { "epoch": 95.68, "learning_rate": 2.178354541033716e-06, "loss": 1.7685, "step": 33054000 }, { "epoch": 95.68, "learning_rate": 2.1776308933864396e-06, "loss": 1.7539, "step": 33054500 }, { "epoch": 95.68, "learning_rate": 2.176910140329752e-06, "loss": 1.7556, "step": 33055000 }, { "epoch": 95.68, "learning_rate": 2.1761864926824753e-06, "loss": 1.7441, "step": 33055500 }, { "epoch": 95.68, "learning_rate": 2.1754628450351984e-06, "loss": 1.7592, "step": 33056000 }, { "epoch": 95.69, "learning_rate": 2.1747391973879215e-06, "loss": 1.7491, "step": 33056500 }, { "epoch": 95.69, "learning_rate": 2.1740155497406446e-06, "loss": 1.7794, "step": 33057000 }, { "epoch": 95.69, "learning_rate": 2.173291902093368e-06, "loss": 1.7664, "step": 33057500 }, { "epoch": 95.69, "learning_rate": 2.172568254446091e-06, "loss": 1.7658, "step": 33058000 }, { "epoch": 95.69, "learning_rate": 2.1718446067988147e-06, "loss": 1.7468, "step": 33058500 }, { "epoch": 95.69, "learning_rate": 2.1711224064468323e-06, "loss": 1.7403, "step": 33059000 }, { "epoch": 95.69, "learning_rate": 2.1703987587995554e-06, "loss": 1.7528, "step": 33059500 }, { "epoch": 95.7, "learning_rate": 2.169675111152279e-06, "loss": 1.7342, "step": 33060000 }, { "epoch": 95.7, "learning_rate": 2.168951463505002e-06, "loss": 1.7561, "step": 33060500 }, { "epoch": 95.7, "learning_rate": 2.168227815857725e-06, "loss": 1.746, "step": 33061000 }, { "epoch": 95.7, "learning_rate": 2.1675041682104486e-06, "loss": 1.7536, "step": 33061500 }, { "epoch": 95.7, "learning_rate": 2.1667805205631717e-06, "loss": 1.7565, "step": 33062000 }, { "epoch": 95.7, "learning_rate": 2.1660568729158948e-06, "loss": 1.7695, "step": 33062500 }, { "epoch": 95.7, "learning_rate": 2.165333225268618e-06, "loss": 1.7358, "step": 33063000 }, { "epoch": 95.71, "learning_rate": 2.1646095776213413e-06, "loss": 1.7724, "step": 33063500 }, { "epoch": 95.71, "learning_rate": 2.163885929974065e-06, "loss": 1.7436, "step": 33064000 }, { "epoch": 95.71, "learning_rate": 2.163162282326788e-06, "loss": 1.7465, "step": 33064500 }, { "epoch": 95.71, "learning_rate": 2.1624400819748056e-06, "loss": 1.7337, "step": 33065000 }, { "epoch": 95.71, "learning_rate": 2.1617164343275287e-06, "loss": 1.7634, "step": 33065500 }, { "epoch": 95.71, "learning_rate": 2.160992786680252e-06, "loss": 1.7439, "step": 33066000 }, { "epoch": 95.71, "learning_rate": 2.1602691390329753e-06, "loss": 1.7546, "step": 33066500 }, { "epoch": 95.72, "learning_rate": 2.1595454913856983e-06, "loss": 1.7344, "step": 33067000 }, { "epoch": 95.72, "learning_rate": 2.1588218437384214e-06, "loss": 1.7719, "step": 33067500 }, { "epoch": 95.72, "learning_rate": 2.1580996433864395e-06, "loss": 1.7523, "step": 33068000 }, { "epoch": 95.72, "learning_rate": 2.1573759957391626e-06, "loss": 1.7371, "step": 33068500 }, { "epoch": 95.72, "learning_rate": 2.1566523480918857e-06, "loss": 1.7785, "step": 33069000 }, { "epoch": 95.72, "learning_rate": 2.155928700444609e-06, "loss": 1.737, "step": 33069500 }, { "epoch": 95.72, "learning_rate": 2.1552050527973327e-06, "loss": 1.7751, "step": 33070000 }, { "epoch": 95.73, "learning_rate": 2.1544814051500558e-06, "loss": 1.7511, "step": 33070500 }, { "epoch": 95.73, "learning_rate": 2.1537592047980734e-06, "loss": 1.7778, "step": 33071000 }, { "epoch": 95.73, "learning_rate": 2.153035557150797e-06, "loss": 1.7641, "step": 33071500 }, { "epoch": 95.73, "learning_rate": 2.15231190950352e-06, "loss": 1.7362, "step": 33072000 }, { "epoch": 95.73, "learning_rate": 2.151588261856243e-06, "loss": 1.7722, "step": 33072500 }, { "epoch": 95.73, "learning_rate": 2.150866061504261e-06, "loss": 1.7616, "step": 33073000 }, { "epoch": 95.73, "learning_rate": 2.1501424138569843e-06, "loss": 1.7548, "step": 33073500 }, { "epoch": 95.74, "learning_rate": 2.1494187662097074e-06, "loss": 1.7349, "step": 33074000 }, { "epoch": 95.74, "learning_rate": 2.1486951185624304e-06, "loss": 1.7506, "step": 33074500 }, { "epoch": 95.74, "learning_rate": 2.147971470915154e-06, "loss": 1.7425, "step": 33075000 }, { "epoch": 95.74, "learning_rate": 2.1472492705631716e-06, "loss": 1.7534, "step": 33075500 }, { "epoch": 95.74, "learning_rate": 2.1465256229158947e-06, "loss": 1.745, "step": 33076000 }, { "epoch": 95.74, "learning_rate": 2.145801975268618e-06, "loss": 1.7855, "step": 33076500 }, { "epoch": 95.74, "learning_rate": 2.1450783276213413e-06, "loss": 1.7713, "step": 33077000 }, { "epoch": 95.75, "learning_rate": 2.144356127269359e-06, "loss": 1.7513, "step": 33077500 }, { "epoch": 95.75, "learning_rate": 2.1436324796220825e-06, "loss": 1.7492, "step": 33078000 }, { "epoch": 95.75, "learning_rate": 2.1429088319748055e-06, "loss": 1.7863, "step": 33078500 }, { "epoch": 95.75, "learning_rate": 2.142185184327529e-06, "loss": 1.7851, "step": 33079000 }, { "epoch": 95.75, "learning_rate": 2.1414629839755463e-06, "loss": 1.7514, "step": 33079500 }, { "epoch": 95.75, "learning_rate": 2.14073933632827e-06, "loss": 1.7474, "step": 33080000 }, { "epoch": 95.75, "learning_rate": 2.1400171359762875e-06, "loss": 1.7769, "step": 33080500 }, { "epoch": 95.76, "learning_rate": 2.1392949356243056e-06, "loss": 1.747, "step": 33081000 }, { "epoch": 95.76, "learning_rate": 2.1385712879770287e-06, "loss": 1.7529, "step": 33081500 }, { "epoch": 95.76, "learning_rate": 2.1378476403297517e-06, "loss": 1.7862, "step": 33082000 }, { "epoch": 95.76, "learning_rate": 2.137123992682475e-06, "loss": 1.7587, "step": 33082500 }, { "epoch": 95.76, "learning_rate": 2.1364003450351983e-06, "loss": 1.7696, "step": 33083000 }, { "epoch": 95.76, "learning_rate": 2.135676697387922e-06, "loss": 1.7466, "step": 33083500 }, { "epoch": 95.76, "learning_rate": 2.134953049740645e-06, "loss": 1.747, "step": 33084000 }, { "epoch": 95.77, "learning_rate": 2.134229402093368e-06, "loss": 1.7558, "step": 33084500 }, { "epoch": 95.77, "learning_rate": 2.133505754446091e-06, "loss": 1.7673, "step": 33085000 }, { "epoch": 95.77, "learning_rate": 2.1327821067988146e-06, "loss": 1.7933, "step": 33085500 }, { "epoch": 95.77, "learning_rate": 2.1320584591515377e-06, "loss": 1.7767, "step": 33086000 }, { "epoch": 95.77, "learning_rate": 2.1313348115042607e-06, "loss": 1.7549, "step": 33086500 }, { "epoch": 95.77, "learning_rate": 2.130612611152279e-06, "loss": 1.7484, "step": 33087000 }, { "epoch": 95.77, "learning_rate": 2.129888963505002e-06, "loss": 1.7705, "step": 33087500 }, { "epoch": 95.78, "learning_rate": 2.129165315857725e-06, "loss": 1.7527, "step": 33088000 }, { "epoch": 95.78, "learning_rate": 2.1284416682104485e-06, "loss": 1.7765, "step": 33088500 }, { "epoch": 95.78, "learning_rate": 2.1277180205631716e-06, "loss": 1.736, "step": 33089000 }, { "epoch": 95.78, "learning_rate": 2.126994372915895e-06, "loss": 1.7534, "step": 33089500 }, { "epoch": 95.78, "learning_rate": 2.126270725268618e-06, "loss": 1.7497, "step": 33090000 }, { "epoch": 95.78, "learning_rate": 2.1255470776213412e-06, "loss": 1.7544, "step": 33090500 }, { "epoch": 95.78, "learning_rate": 2.1248248772693593e-06, "loss": 1.7654, "step": 33091000 }, { "epoch": 95.79, "learning_rate": 2.1241012296220824e-06, "loss": 1.7735, "step": 33091500 }, { "epoch": 95.79, "learning_rate": 2.1233775819748055e-06, "loss": 1.7425, "step": 33092000 }, { "epoch": 95.79, "learning_rate": 2.1226568289181178e-06, "loss": 1.7444, "step": 33092500 }, { "epoch": 95.79, "learning_rate": 2.1219331812708413e-06, "loss": 1.7803, "step": 33093000 }, { "epoch": 95.79, "learning_rate": 2.1212095336235643e-06, "loss": 1.7778, "step": 33093500 }, { "epoch": 95.79, "learning_rate": 2.120485885976288e-06, "loss": 1.7716, "step": 33094000 }, { "epoch": 95.8, "learning_rate": 2.119762238329011e-06, "loss": 1.7665, "step": 33094500 }, { "epoch": 95.8, "learning_rate": 2.119038590681734e-06, "loss": 1.7694, "step": 33095000 }, { "epoch": 95.8, "learning_rate": 2.118314943034457e-06, "loss": 1.7654, "step": 33095500 }, { "epoch": 95.8, "learning_rate": 2.1175912953871806e-06, "loss": 1.7608, "step": 33096000 }, { "epoch": 95.8, "learning_rate": 2.1168676477399037e-06, "loss": 1.755, "step": 33096500 }, { "epoch": 95.8, "learning_rate": 2.116144000092627e-06, "loss": 1.7675, "step": 33097000 }, { "epoch": 95.8, "learning_rate": 2.1154203524453503e-06, "loss": 1.7714, "step": 33097500 }, { "epoch": 95.81, "learning_rate": 2.1146967047980733e-06, "loss": 1.7699, "step": 33098000 }, { "epoch": 95.81, "learning_rate": 2.1139730571507964e-06, "loss": 1.7691, "step": 33098500 }, { "epoch": 95.81, "learning_rate": 2.11324940950352e-06, "loss": 1.7536, "step": 33099000 }, { "epoch": 95.81, "learning_rate": 2.1125257618562434e-06, "loss": 1.7686, "step": 33099500 }, { "epoch": 95.81, "learning_rate": 2.1118035615042607e-06, "loss": 1.7655, "step": 33100000 }, { "epoch": 95.81, "learning_rate": 2.111079913856984e-06, "loss": 1.7643, "step": 33100500 }, { "epoch": 95.81, "learning_rate": 2.110357713505002e-06, "loss": 1.7584, "step": 33101000 }, { "epoch": 95.82, "learning_rate": 2.109634065857725e-06, "loss": 1.7696, "step": 33101500 }, { "epoch": 95.82, "learning_rate": 2.1089104182104484e-06, "loss": 1.7723, "step": 33102000 }, { "epoch": 95.82, "learning_rate": 2.108186770563172e-06, "loss": 1.7748, "step": 33102500 }, { "epoch": 95.82, "learning_rate": 2.107463122915895e-06, "loss": 1.7597, "step": 33103000 }, { "epoch": 95.82, "learning_rate": 2.106739475268618e-06, "loss": 1.7559, "step": 33103500 }, { "epoch": 95.82, "learning_rate": 2.106017274916636e-06, "loss": 1.7528, "step": 33104000 }, { "epoch": 95.82, "learning_rate": 2.1052936272693593e-06, "loss": 1.755, "step": 33104500 }, { "epoch": 95.83, "learning_rate": 2.1045699796220824e-06, "loss": 1.7593, "step": 33105000 }, { "epoch": 95.83, "learning_rate": 2.1038463319748054e-06, "loss": 1.7813, "step": 33105500 }, { "epoch": 95.83, "learning_rate": 2.103122684327529e-06, "loss": 1.7639, "step": 33106000 }, { "epoch": 95.83, "learning_rate": 2.102399036680252e-06, "loss": 1.7515, "step": 33106500 }, { "epoch": 95.83, "learning_rate": 2.1016768363282697e-06, "loss": 1.7874, "step": 33107000 }, { "epoch": 95.83, "learning_rate": 2.100953188680993e-06, "loss": 1.7477, "step": 33107500 }, { "epoch": 95.83, "learning_rate": 2.1002295410337163e-06, "loss": 1.735, "step": 33108000 }, { "epoch": 95.84, "learning_rate": 2.0995058933864394e-06, "loss": 1.7691, "step": 33108500 }, { "epoch": 95.84, "learning_rate": 2.098782245739163e-06, "loss": 1.7583, "step": 33109000 }, { "epoch": 95.84, "learning_rate": 2.098058598091886e-06, "loss": 1.7599, "step": 33109500 }, { "epoch": 95.84, "learning_rate": 2.0973363977399036e-06, "loss": 1.7593, "step": 33110000 }, { "epoch": 95.84, "learning_rate": 2.096612750092627e-06, "loss": 1.7567, "step": 33110500 }, { "epoch": 95.84, "learning_rate": 2.09588910244535e-06, "loss": 1.755, "step": 33111000 }, { "epoch": 95.84, "learning_rate": 2.0951654547980733e-06, "loss": 1.7646, "step": 33111500 }, { "epoch": 95.85, "learning_rate": 2.094441807150797e-06, "loss": 1.7534, "step": 33112000 }, { "epoch": 95.85, "learning_rate": 2.09371815950352e-06, "loss": 1.7391, "step": 33112500 }, { "epoch": 95.85, "learning_rate": 2.0929959591515375e-06, "loss": 1.7484, "step": 33113000 }, { "epoch": 95.85, "learning_rate": 2.092272311504261e-06, "loss": 1.7516, "step": 33113500 }, { "epoch": 95.85, "learning_rate": 2.091548663856984e-06, "loss": 1.7473, "step": 33114000 }, { "epoch": 95.85, "learning_rate": 2.0908250162097072e-06, "loss": 1.7742, "step": 33114500 }, { "epoch": 95.85, "learning_rate": 2.0901013685624307e-06, "loss": 1.7551, "step": 33115000 }, { "epoch": 95.86, "learning_rate": 2.089377720915154e-06, "loss": 1.7588, "step": 33115500 }, { "epoch": 95.86, "learning_rate": 2.0886540732678773e-06, "loss": 1.7671, "step": 33116000 }, { "epoch": 95.86, "learning_rate": 2.0879304256206004e-06, "loss": 1.7605, "step": 33116500 }, { "epoch": 95.86, "learning_rate": 2.087208225268618e-06, "loss": 1.7787, "step": 33117000 }, { "epoch": 95.86, "learning_rate": 2.0864845776213416e-06, "loss": 1.7566, "step": 33117500 }, { "epoch": 95.86, "learning_rate": 2.0857609299740646e-06, "loss": 1.753, "step": 33118000 }, { "epoch": 95.86, "learning_rate": 2.0850372823267877e-06, "loss": 1.7373, "step": 33118500 }, { "epoch": 95.87, "learning_rate": 2.084315081974806e-06, "loss": 1.7431, "step": 33119000 }, { "epoch": 95.87, "learning_rate": 2.083591434327529e-06, "loss": 1.7372, "step": 33119500 }, { "epoch": 95.87, "learning_rate": 2.082867786680252e-06, "loss": 1.7473, "step": 33120000 }, { "epoch": 95.87, "learning_rate": 2.082144139032975e-06, "loss": 1.7391, "step": 33120500 }, { "epoch": 95.87, "learning_rate": 2.081420491385698e-06, "loss": 1.7621, "step": 33121000 }, { "epoch": 95.87, "learning_rate": 2.0806982910337162e-06, "loss": 1.7766, "step": 33121500 }, { "epoch": 95.87, "learning_rate": 2.0799746433864393e-06, "loss": 1.7582, "step": 33122000 }, { "epoch": 95.88, "learning_rate": 2.0792509957391624e-06, "loss": 1.7687, "step": 33122500 }, { "epoch": 95.88, "learning_rate": 2.0785273480918863e-06, "loss": 1.7543, "step": 33123000 }, { "epoch": 95.88, "learning_rate": 2.0778037004446094e-06, "loss": 1.7831, "step": 33123500 }, { "epoch": 95.88, "learning_rate": 2.0770829473879217e-06, "loss": 1.7518, "step": 33124000 }, { "epoch": 95.88, "learning_rate": 2.0763592997406448e-06, "loss": 1.7466, "step": 33124500 }, { "epoch": 95.88, "learning_rate": 2.075635652093368e-06, "loss": 1.7586, "step": 33125000 }, { "epoch": 95.88, "learning_rate": 2.074912004446091e-06, "loss": 1.7426, "step": 33125500 }, { "epoch": 95.89, "learning_rate": 2.0741912513894036e-06, "loss": 1.7312, "step": 33126000 }, { "epoch": 95.89, "learning_rate": 2.073467603742127e-06, "loss": 1.7621, "step": 33126500 }, { "epoch": 95.89, "learning_rate": 2.0727454033901444e-06, "loss": 1.7536, "step": 33127000 }, { "epoch": 95.89, "learning_rate": 2.072021755742868e-06, "loss": 1.7533, "step": 33127500 }, { "epoch": 95.89, "learning_rate": 2.071298108095591e-06, "loss": 1.7574, "step": 33128000 }, { "epoch": 95.89, "learning_rate": 2.0705744604483144e-06, "loss": 1.769, "step": 33128500 }, { "epoch": 95.89, "learning_rate": 2.0698508128010375e-06, "loss": 1.7253, "step": 33129000 }, { "epoch": 95.9, "learning_rate": 2.0691271651537606e-06, "loss": 1.766, "step": 33129500 }, { "epoch": 95.9, "learning_rate": 2.0684035175064837e-06, "loss": 1.7471, "step": 33130000 }, { "epoch": 95.9, "learning_rate": 2.067679869859207e-06, "loss": 1.7698, "step": 33130500 }, { "epoch": 95.9, "learning_rate": 2.0669562222119307e-06, "loss": 1.7654, "step": 33131000 }, { "epoch": 95.9, "learning_rate": 2.0662325745646538e-06, "loss": 1.7661, "step": 33131500 }, { "epoch": 95.9, "learning_rate": 2.065508926917377e-06, "loss": 1.7639, "step": 33132000 }, { "epoch": 95.91, "learning_rate": 2.0647852792701e-06, "loss": 1.741, "step": 33132500 }, { "epoch": 95.91, "learning_rate": 2.0640616316228234e-06, "loss": 1.7771, "step": 33133000 }, { "epoch": 95.91, "learning_rate": 2.0633379839755465e-06, "loss": 1.7427, "step": 33133500 }, { "epoch": 95.91, "learning_rate": 2.06261433632827e-06, "loss": 1.7466, "step": 33134000 }, { "epoch": 95.91, "learning_rate": 2.061890688680993e-06, "loss": 1.777, "step": 33134500 }, { "epoch": 95.91, "learning_rate": 2.061167041033716e-06, "loss": 1.7483, "step": 33135000 }, { "epoch": 95.91, "learning_rate": 2.0604448406817343e-06, "loss": 1.7664, "step": 33135500 }, { "epoch": 95.92, "learning_rate": 2.0597211930344574e-06, "loss": 1.7398, "step": 33136000 }, { "epoch": 95.92, "learning_rate": 2.0589975453871804e-06, "loss": 1.7423, "step": 33136500 }, { "epoch": 95.92, "learning_rate": 2.0582753450351985e-06, "loss": 1.7632, "step": 33137000 }, { "epoch": 95.92, "learning_rate": 2.0575516973879216e-06, "loss": 1.7623, "step": 33137500 }, { "epoch": 95.92, "learning_rate": 2.0568280497406447e-06, "loss": 1.75, "step": 33138000 }, { "epoch": 95.92, "learning_rate": 2.0561044020933678e-06, "loss": 1.7434, "step": 33138500 }, { "epoch": 95.92, "learning_rate": 2.0553807544460913e-06, "loss": 1.7801, "step": 33139000 }, { "epoch": 95.93, "learning_rate": 2.0546571067988144e-06, "loss": 1.7551, "step": 33139500 }, { "epoch": 95.93, "learning_rate": 2.053933459151538e-06, "loss": 1.7534, "step": 33140000 }, { "epoch": 95.93, "learning_rate": 2.053209811504261e-06, "loss": 1.7584, "step": 33140500 }, { "epoch": 95.93, "learning_rate": 2.052486163856984e-06, "loss": 1.7568, "step": 33141000 }, { "epoch": 95.93, "learning_rate": 2.0517625162097075e-06, "loss": 1.7595, "step": 33141500 }, { "epoch": 95.93, "learning_rate": 2.0510388685624306e-06, "loss": 1.754, "step": 33142000 }, { "epoch": 95.93, "learning_rate": 2.0503152209151537e-06, "loss": 1.733, "step": 33142500 }, { "epoch": 95.94, "learning_rate": 2.0495915732678768e-06, "loss": 1.7494, "step": 33143000 }, { "epoch": 95.94, "learning_rate": 2.048869372915895e-06, "loss": 1.7802, "step": 33143500 }, { "epoch": 95.94, "learning_rate": 2.048145725268618e-06, "loss": 1.7716, "step": 33144000 }, { "epoch": 95.94, "learning_rate": 2.047422077621341e-06, "loss": 1.7555, "step": 33144500 }, { "epoch": 95.94, "learning_rate": 2.0466984299740645e-06, "loss": 1.7787, "step": 33145000 }, { "epoch": 95.94, "learning_rate": 2.045977676917377e-06, "loss": 1.7303, "step": 33145500 }, { "epoch": 95.94, "learning_rate": 2.0452540292701003e-06, "loss": 1.7589, "step": 33146000 }, { "epoch": 95.95, "learning_rate": 2.0445303816228234e-06, "loss": 1.7625, "step": 33146500 }, { "epoch": 95.95, "learning_rate": 2.0438067339755465e-06, "loss": 1.7533, "step": 33147000 }, { "epoch": 95.95, "learning_rate": 2.0430830863282696e-06, "loss": 1.7361, "step": 33147500 }, { "epoch": 95.95, "learning_rate": 2.0423623332715822e-06, "loss": 1.7547, "step": 33148000 }, { "epoch": 95.95, "learning_rate": 2.0416386856243053e-06, "loss": 1.7473, "step": 33148500 }, { "epoch": 95.95, "learning_rate": 2.040915037977029e-06, "loss": 1.7633, "step": 33149000 }, { "epoch": 95.95, "learning_rate": 2.040191390329752e-06, "loss": 1.7838, "step": 33149500 }, { "epoch": 95.96, "learning_rate": 2.0394691899777696e-06, "loss": 1.7363, "step": 33150000 }, { "epoch": 95.96, "learning_rate": 2.038745542330493e-06, "loss": 1.7519, "step": 33150500 }, { "epoch": 95.96, "learning_rate": 2.038021894683216e-06, "loss": 1.7534, "step": 33151000 }, { "epoch": 95.96, "learning_rate": 2.0372982470359392e-06, "loss": 1.7633, "step": 33151500 }, { "epoch": 95.96, "learning_rate": 2.0365745993886623e-06, "loss": 1.7723, "step": 33152000 }, { "epoch": 95.96, "learning_rate": 2.0358523990366804e-06, "loss": 1.7435, "step": 33152500 }, { "epoch": 95.96, "learning_rate": 2.0351287513894035e-06, "loss": 1.7373, "step": 33153000 }, { "epoch": 95.97, "learning_rate": 2.0344051037421266e-06, "loss": 1.7325, "step": 33153500 }, { "epoch": 95.97, "learning_rate": 2.03368145609485e-06, "loss": 1.7427, "step": 33154000 }, { "epoch": 95.97, "learning_rate": 2.0329578084475736e-06, "loss": 1.7589, "step": 33154500 }, { "epoch": 95.97, "learning_rate": 2.0322341608002967e-06, "loss": 1.7612, "step": 33155000 }, { "epoch": 95.97, "learning_rate": 2.0315105131530198e-06, "loss": 1.7562, "step": 33155500 }, { "epoch": 95.97, "learning_rate": 2.030786865505743e-06, "loss": 1.7442, "step": 33156000 }, { "epoch": 95.97, "learning_rate": 2.030064665153761e-06, "loss": 1.7341, "step": 33156500 }, { "epoch": 95.98, "learning_rate": 2.029342464801778e-06, "loss": 1.786, "step": 33157000 }, { "epoch": 95.98, "learning_rate": 2.0286188171545017e-06, "loss": 1.7515, "step": 33157500 }, { "epoch": 95.98, "learning_rate": 2.027895169507225e-06, "loss": 1.7389, "step": 33158000 }, { "epoch": 95.98, "learning_rate": 2.0271715218599483e-06, "loss": 1.7713, "step": 33158500 }, { "epoch": 95.98, "learning_rate": 2.0264478742126713e-06, "loss": 1.7569, "step": 33159000 }, { "epoch": 95.98, "learning_rate": 2.0257242265653944e-06, "loss": 1.7584, "step": 33159500 }, { "epoch": 95.98, "learning_rate": 2.025000578918118e-06, "loss": 1.7509, "step": 33160000 }, { "epoch": 95.99, "learning_rate": 2.0242769312708414e-06, "loss": 1.7463, "step": 33160500 }, { "epoch": 95.99, "learning_rate": 2.0235547309188587e-06, "loss": 1.755, "step": 33161000 }, { "epoch": 95.99, "learning_rate": 2.022831083271582e-06, "loss": 1.7677, "step": 33161500 }, { "epoch": 95.99, "learning_rate": 2.0221074356243057e-06, "loss": 1.76, "step": 33162000 }, { "epoch": 95.99, "learning_rate": 2.0213837879770288e-06, "loss": 1.745, "step": 33162500 }, { "epoch": 95.99, "learning_rate": 2.0206615876250464e-06, "loss": 1.7495, "step": 33163000 }, { "epoch": 95.99, "learning_rate": 2.01993793997777e-06, "loss": 1.7469, "step": 33163500 }, { "epoch": 96.0, "learning_rate": 2.019214292330493e-06, "loss": 1.7377, "step": 33164000 }, { "epoch": 96.0, "learning_rate": 2.018490644683216e-06, "loss": 1.7616, "step": 33164500 }, { "epoch": 96.0, "learning_rate": 2.017766997035939e-06, "loss": 1.7878, "step": 33165000 }, { "epoch": 96.0, "eval_accuracy": 0.6933727184526204, "eval_accuracy_mlm": 0.6632286489218934, "eval_accuracy_nsp": 0.8548922284399592, "eval_loss": 2.1980440616607666, "eval_runtime": 331.5917, "eval_samples_per_second": 1316.034, "eval_steps_per_second": 54.836, "step": 33165312 } ], "max_steps": 34547200, "num_train_epochs": 100, "total_flos": 4.543663551078709e+19, "trial_name": null, "trial_params": null }