{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 55300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.041591320072332e-10, "loss": 8.7035, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.8083182640144665e-09, "loss": 8.5842, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.7124773960216998e-09, "loss": 8.6549, "step": 60 }, { "epoch": 0.03, "learning_rate": 3.616636528028933e-09, "loss": 8.6371, "step": 80 }, { "epoch": 0.04, "learning_rate": 4.5207956600361664e-09, "loss": 8.6676, "step": 100 }, { "epoch": 0.04, "learning_rate": 5.4249547920433996e-09, "loss": 8.6693, "step": 120 }, { "epoch": 0.05, "learning_rate": 6.329113924050633e-09, "loss": 8.6823, "step": 140 }, { "epoch": 0.06, "learning_rate": 7.233273056057866e-09, "loss": 8.6305, "step": 160 }, { "epoch": 0.07, "learning_rate": 8.1374321880651e-09, "loss": 8.7476, "step": 180 }, { "epoch": 0.07, "learning_rate": 9.041591320072333e-09, "loss": 8.6453, "step": 200 }, { "epoch": 0.08, "learning_rate": 9.945750452079566e-09, "loss": 8.7242, "step": 220 }, { "epoch": 0.09, "learning_rate": 1.0849909584086799e-08, "loss": 8.8198, "step": 240 }, { "epoch": 0.09, "learning_rate": 1.1754068716094032e-08, "loss": 8.7031, "step": 260 }, { "epoch": 0.1, "learning_rate": 1.2658227848101265e-08, "loss": 8.6468, "step": 280 }, { "epoch": 0.11, "learning_rate": 1.3562386980108499e-08, "loss": 8.5893, "step": 300 }, { "epoch": 0.12, "learning_rate": 1.4466546112115732e-08, "loss": 8.6628, "step": 320 }, { "epoch": 0.12, "learning_rate": 1.5370705244122963e-08, "loss": 8.5913, "step": 340 }, { "epoch": 0.13, "learning_rate": 1.62748643761302e-08, "loss": 8.6978, "step": 360 }, { "epoch": 0.14, "learning_rate": 1.7179023508137433e-08, "loss": 8.6814, "step": 380 }, { "epoch": 0.14, "learning_rate": 1.8083182640144666e-08, "loss": 8.7502, "step": 400 }, { "epoch": 0.15, "learning_rate": 1.89873417721519e-08, "loss": 8.5715, "step": 420 }, { "epoch": 0.16, "learning_rate": 1.9891500904159132e-08, "loss": 8.7008, "step": 440 }, { "epoch": 0.17, "learning_rate": 2.0795660036166365e-08, "loss": 8.6715, "step": 460 }, { "epoch": 0.17, "learning_rate": 2.1699819168173598e-08, "loss": 8.5782, "step": 480 }, { "epoch": 0.18, "learning_rate": 2.260397830018083e-08, "loss": 8.6972, "step": 500 }, { "epoch": 0.19, "learning_rate": 2.3508137432188065e-08, "loss": 8.7056, "step": 520 }, { "epoch": 0.2, "learning_rate": 2.4412296564195298e-08, "loss": 8.7433, "step": 540 }, { "epoch": 0.2, "learning_rate": 2.531645569620253e-08, "loss": 8.6857, "step": 560 }, { "epoch": 0.21, "learning_rate": 2.6220614828209764e-08, "loss": 8.6377, "step": 580 }, { "epoch": 0.22, "learning_rate": 2.7124773960216997e-08, "loss": 8.6514, "step": 600 }, { "epoch": 0.22, "learning_rate": 2.802893309222423e-08, "loss": 8.668, "step": 620 }, { "epoch": 0.23, "learning_rate": 2.8933092224231463e-08, "loss": 8.7431, "step": 640 }, { "epoch": 0.24, "learning_rate": 2.983725135623869e-08, "loss": 8.7326, "step": 660 }, { "epoch": 0.25, "learning_rate": 3.0741410488245926e-08, "loss": 8.7252, "step": 680 }, { "epoch": 0.25, "learning_rate": 3.1645569620253166e-08, "loss": 8.6977, "step": 700 }, { "epoch": 0.26, "learning_rate": 3.25497287522604e-08, "loss": 8.7162, "step": 720 }, { "epoch": 0.27, "learning_rate": 3.345388788426763e-08, "loss": 8.6373, "step": 740 }, { "epoch": 0.27, "learning_rate": 3.4358047016274865e-08, "loss": 8.5484, "step": 760 }, { "epoch": 0.28, "learning_rate": 3.52622061482821e-08, "loss": 8.6158, "step": 780 }, { "epoch": 0.29, "learning_rate": 3.616636528028933e-08, "loss": 8.6236, "step": 800 }, { "epoch": 0.3, "learning_rate": 3.7070524412296565e-08, "loss": 8.6796, "step": 820 }, { "epoch": 0.3, "learning_rate": 3.79746835443038e-08, "loss": 8.6624, "step": 840 }, { "epoch": 0.31, "learning_rate": 3.887884267631103e-08, "loss": 8.6699, "step": 860 }, { "epoch": 0.32, "learning_rate": 3.9783001808318264e-08, "loss": 8.6308, "step": 880 }, { "epoch": 0.33, "learning_rate": 4.06871609403255e-08, "loss": 8.5808, "step": 900 }, { "epoch": 0.33, "learning_rate": 4.159132007233273e-08, "loss": 8.6027, "step": 920 }, { "epoch": 0.34, "learning_rate": 4.2495479204339963e-08, "loss": 8.7472, "step": 940 }, { "epoch": 0.35, "learning_rate": 4.3399638336347197e-08, "loss": 8.6128, "step": 960 }, { "epoch": 0.35, "learning_rate": 4.430379746835443e-08, "loss": 8.7902, "step": 980 }, { "epoch": 0.36, "learning_rate": 4.520795660036166e-08, "loss": 8.6797, "step": 1000 }, { "epoch": 0.37, "learning_rate": 4.6112115732368896e-08, "loss": 8.6378, "step": 1020 }, { "epoch": 0.38, "learning_rate": 4.701627486437613e-08, "loss": 8.6656, "step": 1040 }, { "epoch": 0.38, "learning_rate": 4.792043399638336e-08, "loss": 8.6539, "step": 1060 }, { "epoch": 0.39, "learning_rate": 4.8824593128390595e-08, "loss": 8.7486, "step": 1080 }, { "epoch": 0.4, "learning_rate": 4.972875226039783e-08, "loss": 8.6913, "step": 1100 }, { "epoch": 0.41, "learning_rate": 5.063291139240506e-08, "loss": 8.8054, "step": 1120 }, { "epoch": 0.41, "learning_rate": 5.1537070524412295e-08, "loss": 8.6303, "step": 1140 }, { "epoch": 0.42, "learning_rate": 5.244122965641953e-08, "loss": 8.6419, "step": 1160 }, { "epoch": 0.43, "learning_rate": 5.334538878842676e-08, "loss": 8.6868, "step": 1180 }, { "epoch": 0.43, "learning_rate": 5.4249547920433994e-08, "loss": 8.7265, "step": 1200 }, { "epoch": 0.44, "learning_rate": 5.515370705244123e-08, "loss": 8.6924, "step": 1220 }, { "epoch": 0.45, "learning_rate": 5.605786618444846e-08, "loss": 8.6787, "step": 1240 }, { "epoch": 0.46, "learning_rate": 5.6962025316455693e-08, "loss": 8.693, "step": 1260 }, { "epoch": 0.46, "learning_rate": 5.7866184448462927e-08, "loss": 8.5927, "step": 1280 }, { "epoch": 0.47, "learning_rate": 5.877034358047016e-08, "loss": 8.6922, "step": 1300 }, { "epoch": 0.48, "learning_rate": 5.967450271247739e-08, "loss": 8.9393, "step": 1320 }, { "epoch": 0.48, "learning_rate": 6.057866184448463e-08, "loss": 8.7923, "step": 1340 }, { "epoch": 0.49, "learning_rate": 6.148282097649185e-08, "loss": 8.7394, "step": 1360 }, { "epoch": 0.5, "learning_rate": 6.238698010849909e-08, "loss": 8.7055, "step": 1380 }, { "epoch": 0.51, "learning_rate": 6.329113924050633e-08, "loss": 8.7928, "step": 1400 }, { "epoch": 0.51, "learning_rate": 6.419529837251356e-08, "loss": 8.6412, "step": 1420 }, { "epoch": 0.52, "learning_rate": 6.50994575045208e-08, "loss": 8.6909, "step": 1440 }, { "epoch": 0.53, "learning_rate": 6.600361663652802e-08, "loss": 8.6415, "step": 1460 }, { "epoch": 0.54, "learning_rate": 6.690777576853526e-08, "loss": 8.7029, "step": 1480 }, { "epoch": 0.54, "learning_rate": 6.781193490054249e-08, "loss": 8.6085, "step": 1500 }, { "epoch": 0.55, "learning_rate": 6.871609403254973e-08, "loss": 8.6977, "step": 1520 }, { "epoch": 0.56, "learning_rate": 6.962025316455696e-08, "loss": 8.6144, "step": 1540 }, { "epoch": 0.56, "learning_rate": 7.05244122965642e-08, "loss": 8.647, "step": 1560 }, { "epoch": 0.57, "learning_rate": 7.142857142857142e-08, "loss": 8.701, "step": 1580 }, { "epoch": 0.58, "learning_rate": 7.233273056057866e-08, "loss": 8.6896, "step": 1600 }, { "epoch": 0.59, "learning_rate": 7.323688969258589e-08, "loss": 8.7102, "step": 1620 }, { "epoch": 0.59, "learning_rate": 7.414104882459313e-08, "loss": 8.7441, "step": 1640 }, { "epoch": 0.6, "learning_rate": 7.504520795660036e-08, "loss": 8.6805, "step": 1660 }, { "epoch": 0.61, "learning_rate": 7.59493670886076e-08, "loss": 8.7272, "step": 1680 }, { "epoch": 0.61, "learning_rate": 7.685352622061482e-08, "loss": 8.6238, "step": 1700 }, { "epoch": 0.62, "learning_rate": 7.775768535262206e-08, "loss": 8.8162, "step": 1720 }, { "epoch": 0.63, "learning_rate": 7.866184448462929e-08, "loss": 8.6112, "step": 1740 }, { "epoch": 0.64, "learning_rate": 7.956600361663653e-08, "loss": 8.6329, "step": 1760 }, { "epoch": 0.64, "learning_rate": 8.047016274864375e-08, "loss": 8.5605, "step": 1780 }, { "epoch": 0.65, "learning_rate": 8.1374321880651e-08, "loss": 8.6107, "step": 1800 }, { "epoch": 0.66, "learning_rate": 8.227848101265822e-08, "loss": 8.6376, "step": 1820 }, { "epoch": 0.67, "learning_rate": 8.318264014466546e-08, "loss": 8.6818, "step": 1840 }, { "epoch": 0.67, "learning_rate": 8.408679927667269e-08, "loss": 8.7487, "step": 1860 }, { "epoch": 0.68, "learning_rate": 8.499095840867993e-08, "loss": 8.6741, "step": 1880 }, { "epoch": 0.69, "learning_rate": 8.589511754068715e-08, "loss": 8.6679, "step": 1900 }, { "epoch": 0.69, "learning_rate": 8.679927667269439e-08, "loss": 8.6636, "step": 1920 }, { "epoch": 0.7, "learning_rate": 8.770343580470162e-08, "loss": 8.7159, "step": 1940 }, { "epoch": 0.71, "learning_rate": 8.860759493670886e-08, "loss": 8.5982, "step": 1960 }, { "epoch": 0.72, "learning_rate": 8.951175406871609e-08, "loss": 8.7386, "step": 1980 }, { "epoch": 0.72, "learning_rate": 9.041591320072333e-08, "loss": 8.6729, "step": 2000 }, { "epoch": 0.73, "learning_rate": 9.132007233273057e-08, "loss": 8.6625, "step": 2020 }, { "epoch": 0.74, "learning_rate": 9.222423146473779e-08, "loss": 8.6866, "step": 2040 }, { "epoch": 0.75, "learning_rate": 9.312839059674503e-08, "loss": 8.6284, "step": 2060 }, { "epoch": 0.75, "learning_rate": 9.403254972875226e-08, "loss": 8.6068, "step": 2080 }, { "epoch": 0.76, "learning_rate": 9.49367088607595e-08, "loss": 8.7023, "step": 2100 }, { "epoch": 0.77, "learning_rate": 9.584086799276672e-08, "loss": 8.6516, "step": 2120 }, { "epoch": 0.77, "learning_rate": 9.674502712477396e-08, "loss": 8.746, "step": 2140 }, { "epoch": 0.78, "learning_rate": 9.764918625678119e-08, "loss": 8.5947, "step": 2160 }, { "epoch": 0.79, "learning_rate": 9.855334538878843e-08, "loss": 8.7309, "step": 2180 }, { "epoch": 0.8, "learning_rate": 9.945750452079566e-08, "loss": 8.7407, "step": 2200 }, { "epoch": 0.8, "learning_rate": 1.003616636528029e-07, "loss": 8.7647, "step": 2220 }, { "epoch": 0.81, "learning_rate": 1.0126582278481012e-07, "loss": 8.6067, "step": 2240 }, { "epoch": 0.82, "learning_rate": 1.0216998191681736e-07, "loss": 8.6241, "step": 2260 }, { "epoch": 0.82, "learning_rate": 1.0307414104882459e-07, "loss": 8.6616, "step": 2280 }, { "epoch": 0.83, "learning_rate": 1.0397830018083183e-07, "loss": 8.7636, "step": 2300 }, { "epoch": 0.84, "learning_rate": 1.0488245931283906e-07, "loss": 8.6465, "step": 2320 }, { "epoch": 0.85, "learning_rate": 1.057866184448463e-07, "loss": 8.6704, "step": 2340 }, { "epoch": 0.85, "learning_rate": 1.0669077757685352e-07, "loss": 8.5897, "step": 2360 }, { "epoch": 0.86, "learning_rate": 1.0759493670886076e-07, "loss": 8.664, "step": 2380 }, { "epoch": 0.87, "learning_rate": 1.0849909584086799e-07, "loss": 8.7798, "step": 2400 }, { "epoch": 0.88, "learning_rate": 1.0940325497287523e-07, "loss": 8.6508, "step": 2420 }, { "epoch": 0.88, "learning_rate": 1.1030741410488245e-07, "loss": 8.7532, "step": 2440 }, { "epoch": 0.89, "learning_rate": 1.112115732368897e-07, "loss": 8.6628, "step": 2460 }, { "epoch": 0.9, "learning_rate": 1.1211573236889692e-07, "loss": 8.6175, "step": 2480 }, { "epoch": 0.9, "learning_rate": 1.1301989150090416e-07, "loss": 8.7609, "step": 2500 }, { "epoch": 0.91, "learning_rate": 1.1392405063291139e-07, "loss": 8.7609, "step": 2520 }, { "epoch": 0.92, "learning_rate": 1.1482820976491863e-07, "loss": 8.5844, "step": 2540 }, { "epoch": 0.93, "learning_rate": 1.1573236889692585e-07, "loss": 8.6872, "step": 2560 }, { "epoch": 0.93, "learning_rate": 1.1663652802893309e-07, "loss": 8.7795, "step": 2580 }, { "epoch": 0.94, "learning_rate": 1.1754068716094032e-07, "loss": 8.7157, "step": 2600 }, { "epoch": 0.95, "learning_rate": 1.1844484629294756e-07, "loss": 8.767, "step": 2620 }, { "epoch": 0.95, "learning_rate": 1.1934900542495477e-07, "loss": 8.6297, "step": 2640 }, { "epoch": 0.96, "learning_rate": 1.20253164556962e-07, "loss": 8.6604, "step": 2660 }, { "epoch": 0.97, "learning_rate": 1.2115732368896925e-07, "loss": 8.6753, "step": 2680 }, { "epoch": 0.98, "learning_rate": 1.220614828209765e-07, "loss": 8.8354, "step": 2700 }, { "epoch": 0.98, "learning_rate": 1.229656419529837e-07, "loss": 8.6935, "step": 2720 }, { "epoch": 0.99, "learning_rate": 1.2386980108499094e-07, "loss": 8.6787, "step": 2740 }, { "epoch": 1.0, "learning_rate": 1.2477396021699818e-07, "loss": 8.6191, "step": 2760 }, { "epoch": 1.01, "learning_rate": 1.256781193490054e-07, "loss": 8.6495, "step": 2780 }, { "epoch": 1.01, "learning_rate": 1.2658227848101266e-07, "loss": 8.7641, "step": 2800 }, { "epoch": 1.02, "learning_rate": 1.2748643761301988e-07, "loss": 8.7237, "step": 2820 }, { "epoch": 1.03, "learning_rate": 1.2839059674502712e-07, "loss": 8.6795, "step": 2840 }, { "epoch": 1.03, "learning_rate": 1.2929475587703433e-07, "loss": 8.6644, "step": 2860 }, { "epoch": 1.04, "learning_rate": 1.301989150090416e-07, "loss": 8.5847, "step": 2880 }, { "epoch": 1.05, "learning_rate": 1.311030741410488e-07, "loss": 8.5999, "step": 2900 }, { "epoch": 1.06, "learning_rate": 1.3200723327305605e-07, "loss": 8.6626, "step": 2920 }, { "epoch": 1.06, "learning_rate": 1.3291139240506326e-07, "loss": 8.7718, "step": 2940 }, { "epoch": 1.07, "learning_rate": 1.3381555153707053e-07, "loss": 8.5763, "step": 2960 }, { "epoch": 1.08, "learning_rate": 1.3471971066907774e-07, "loss": 8.6696, "step": 2980 }, { "epoch": 1.08, "learning_rate": 1.3562386980108498e-07, "loss": 8.6623, "step": 3000 }, { "epoch": 1.09, "learning_rate": 1.3652802893309222e-07, "loss": 8.6512, "step": 3020 }, { "epoch": 1.1, "learning_rate": 1.3743218806509946e-07, "loss": 8.6001, "step": 3040 }, { "epoch": 1.11, "learning_rate": 1.3833634719710667e-07, "loss": 8.6426, "step": 3060 }, { "epoch": 1.11, "learning_rate": 1.3924050632911391e-07, "loss": 8.7206, "step": 3080 }, { "epoch": 1.12, "learning_rate": 1.4014466546112115e-07, "loss": 8.6603, "step": 3100 }, { "epoch": 1.13, "learning_rate": 1.410488245931284e-07, "loss": 8.6396, "step": 3120 }, { "epoch": 1.14, "learning_rate": 1.419529837251356e-07, "loss": 8.7162, "step": 3140 }, { "epoch": 1.14, "learning_rate": 1.4285714285714285e-07, "loss": 8.6815, "step": 3160 }, { "epoch": 1.15, "learning_rate": 1.4376130198915009e-07, "loss": 8.6283, "step": 3180 }, { "epoch": 1.16, "learning_rate": 1.4466546112115733e-07, "loss": 8.76, "step": 3200 }, { "epoch": 1.16, "learning_rate": 1.4556962025316454e-07, "loss": 8.6929, "step": 3220 }, { "epoch": 1.17, "learning_rate": 1.4647377938517178e-07, "loss": 8.5704, "step": 3240 }, { "epoch": 1.18, "learning_rate": 1.4737793851717902e-07, "loss": 8.7375, "step": 3260 }, { "epoch": 1.19, "learning_rate": 1.4828209764918626e-07, "loss": 8.7367, "step": 3280 }, { "epoch": 1.19, "learning_rate": 1.4918625678119347e-07, "loss": 8.7486, "step": 3300 }, { "epoch": 1.2, "learning_rate": 1.500904159132007e-07, "loss": 8.6897, "step": 3320 }, { "epoch": 1.21, "learning_rate": 1.5099457504520795e-07, "loss": 8.6202, "step": 3340 }, { "epoch": 1.22, "learning_rate": 1.518987341772152e-07, "loss": 8.6978, "step": 3360 }, { "epoch": 1.22, "learning_rate": 1.528028933092224e-07, "loss": 8.6673, "step": 3380 }, { "epoch": 1.23, "learning_rate": 1.5370705244122964e-07, "loss": 8.6666, "step": 3400 }, { "epoch": 1.24, "learning_rate": 1.5461121157323688e-07, "loss": 8.6343, "step": 3420 }, { "epoch": 1.24, "learning_rate": 1.5551537070524412e-07, "loss": 8.6778, "step": 3440 }, { "epoch": 1.25, "learning_rate": 1.5641952983725134e-07, "loss": 8.5937, "step": 3460 }, { "epoch": 1.26, "learning_rate": 1.5732368896925858e-07, "loss": 8.7552, "step": 3480 }, { "epoch": 1.27, "learning_rate": 1.5822784810126582e-07, "loss": 8.7656, "step": 3500 }, { "epoch": 1.27, "learning_rate": 1.5913200723327306e-07, "loss": 8.7108, "step": 3520 }, { "epoch": 1.28, "learning_rate": 1.6003616636528027e-07, "loss": 8.8032, "step": 3540 }, { "epoch": 1.29, "learning_rate": 1.609403254972875e-07, "loss": 8.6476, "step": 3560 }, { "epoch": 1.29, "learning_rate": 1.6184448462929475e-07, "loss": 8.6317, "step": 3580 }, { "epoch": 1.3, "learning_rate": 1.62748643761302e-07, "loss": 8.6736, "step": 3600 }, { "epoch": 1.31, "learning_rate": 1.636528028933092e-07, "loss": 8.6469, "step": 3620 }, { "epoch": 1.32, "learning_rate": 1.6455696202531644e-07, "loss": 8.6429, "step": 3640 }, { "epoch": 1.32, "learning_rate": 1.6546112115732368e-07, "loss": 8.6454, "step": 3660 }, { "epoch": 1.33, "learning_rate": 1.6636528028933092e-07, "loss": 8.6146, "step": 3680 }, { "epoch": 1.34, "learning_rate": 1.6726943942133813e-07, "loss": 8.6884, "step": 3700 }, { "epoch": 1.35, "learning_rate": 1.6817359855334537e-07, "loss": 8.586, "step": 3720 }, { "epoch": 1.35, "learning_rate": 1.6907775768535261e-07, "loss": 8.6912, "step": 3740 }, { "epoch": 1.36, "learning_rate": 1.6998191681735985e-07, "loss": 8.6692, "step": 3760 }, { "epoch": 1.37, "learning_rate": 1.7088607594936707e-07, "loss": 8.7503, "step": 3780 }, { "epoch": 1.37, "learning_rate": 1.717902350813743e-07, "loss": 8.8529, "step": 3800 }, { "epoch": 1.38, "learning_rate": 1.7269439421338155e-07, "loss": 8.6796, "step": 3820 }, { "epoch": 1.39, "learning_rate": 1.7359855334538879e-07, "loss": 8.651, "step": 3840 }, { "epoch": 1.4, "learning_rate": 1.74502712477396e-07, "loss": 8.6298, "step": 3860 }, { "epoch": 1.4, "learning_rate": 1.7540687160940324e-07, "loss": 8.7301, "step": 3880 }, { "epoch": 1.41, "learning_rate": 1.7631103074141048e-07, "loss": 8.6036, "step": 3900 }, { "epoch": 1.42, "learning_rate": 1.7721518987341772e-07, "loss": 8.674, "step": 3920 }, { "epoch": 1.42, "learning_rate": 1.7811934900542493e-07, "loss": 8.7339, "step": 3940 }, { "epoch": 1.43, "learning_rate": 1.7902350813743217e-07, "loss": 8.7509, "step": 3960 }, { "epoch": 1.44, "learning_rate": 1.799276672694394e-07, "loss": 8.7087, "step": 3980 }, { "epoch": 1.45, "learning_rate": 1.8083182640144665e-07, "loss": 8.6391, "step": 4000 }, { "epoch": 1.45, "learning_rate": 1.8173598553345386e-07, "loss": 8.7756, "step": 4020 }, { "epoch": 1.46, "learning_rate": 1.8264014466546113e-07, "loss": 8.6629, "step": 4040 }, { "epoch": 1.47, "learning_rate": 1.8354430379746834e-07, "loss": 8.8972, "step": 4060 }, { "epoch": 1.48, "learning_rate": 1.8444846292947558e-07, "loss": 8.6525, "step": 4080 }, { "epoch": 1.48, "learning_rate": 1.853526220614828e-07, "loss": 8.6576, "step": 4100 }, { "epoch": 1.49, "learning_rate": 1.8625678119349006e-07, "loss": 8.6853, "step": 4120 }, { "epoch": 1.5, "learning_rate": 1.8716094032549728e-07, "loss": 8.5977, "step": 4140 }, { "epoch": 1.5, "learning_rate": 1.8806509945750452e-07, "loss": 8.6767, "step": 4160 }, { "epoch": 1.51, "learning_rate": 1.8896925858951173e-07, "loss": 8.5974, "step": 4180 }, { "epoch": 1.52, "learning_rate": 1.89873417721519e-07, "loss": 8.6597, "step": 4200 }, { "epoch": 1.53, "learning_rate": 1.907775768535262e-07, "loss": 8.6174, "step": 4220 }, { "epoch": 1.53, "learning_rate": 1.9168173598553345e-07, "loss": 8.657, "step": 4240 }, { "epoch": 1.54, "learning_rate": 1.9258589511754066e-07, "loss": 8.6542, "step": 4260 }, { "epoch": 1.55, "learning_rate": 1.9349005424954793e-07, "loss": 8.704, "step": 4280 }, { "epoch": 1.56, "learning_rate": 1.9439421338155514e-07, "loss": 8.6938, "step": 4300 }, { "epoch": 1.56, "learning_rate": 1.9529837251356238e-07, "loss": 8.592, "step": 4320 }, { "epoch": 1.57, "learning_rate": 1.962025316455696e-07, "loss": 8.7386, "step": 4340 }, { "epoch": 1.58, "learning_rate": 1.9710669077757686e-07, "loss": 8.5609, "step": 4360 }, { "epoch": 1.58, "learning_rate": 1.9801084990958407e-07, "loss": 8.7259, "step": 4380 }, { "epoch": 1.59, "learning_rate": 1.9891500904159131e-07, "loss": 8.642, "step": 4400 }, { "epoch": 1.6, "learning_rate": 1.9981916817359853e-07, "loss": 8.711, "step": 4420 }, { "epoch": 1.61, "learning_rate": 2.007233273056058e-07, "loss": 8.5766, "step": 4440 }, { "epoch": 1.61, "learning_rate": 2.01627486437613e-07, "loss": 8.8441, "step": 4460 }, { "epoch": 1.62, "learning_rate": 2.0253164556962025e-07, "loss": 8.6968, "step": 4480 }, { "epoch": 1.63, "learning_rate": 2.0343580470162746e-07, "loss": 8.6558, "step": 4500 }, { "epoch": 1.63, "learning_rate": 2.0433996383363473e-07, "loss": 8.708, "step": 4520 }, { "epoch": 1.64, "learning_rate": 2.0524412296564194e-07, "loss": 8.6468, "step": 4540 }, { "epoch": 1.65, "learning_rate": 2.0614828209764918e-07, "loss": 8.76, "step": 4560 }, { "epoch": 1.66, "learning_rate": 2.070524412296564e-07, "loss": 8.6234, "step": 4580 }, { "epoch": 1.66, "learning_rate": 2.0795660036166366e-07, "loss": 8.6519, "step": 4600 }, { "epoch": 1.67, "learning_rate": 2.0886075949367087e-07, "loss": 8.8524, "step": 4620 }, { "epoch": 1.68, "learning_rate": 2.097649186256781e-07, "loss": 8.6722, "step": 4640 }, { "epoch": 1.69, "learning_rate": 2.1066907775768532e-07, "loss": 8.7367, "step": 4660 }, { "epoch": 1.69, "learning_rate": 2.115732368896926e-07, "loss": 8.6933, "step": 4680 }, { "epoch": 1.7, "learning_rate": 2.124773960216998e-07, "loss": 8.5989, "step": 4700 }, { "epoch": 1.71, "learning_rate": 2.1338155515370704e-07, "loss": 8.7641, "step": 4720 }, { "epoch": 1.71, "learning_rate": 2.1428571428571426e-07, "loss": 8.6898, "step": 4740 }, { "epoch": 1.72, "learning_rate": 2.1518987341772152e-07, "loss": 8.7893, "step": 4760 }, { "epoch": 1.73, "learning_rate": 2.1609403254972874e-07, "loss": 8.6064, "step": 4780 }, { "epoch": 1.74, "learning_rate": 2.1699819168173598e-07, "loss": 8.6028, "step": 4800 }, { "epoch": 1.74, "learning_rate": 2.179023508137432e-07, "loss": 8.7767, "step": 4820 }, { "epoch": 1.75, "learning_rate": 2.1880650994575046e-07, "loss": 8.7362, "step": 4840 }, { "epoch": 1.76, "learning_rate": 2.1971066907775767e-07, "loss": 8.6766, "step": 4860 }, { "epoch": 1.76, "learning_rate": 2.206148282097649e-07, "loss": 8.6547, "step": 4880 }, { "epoch": 1.77, "learning_rate": 2.2151898734177212e-07, "loss": 8.6397, "step": 4900 }, { "epoch": 1.78, "learning_rate": 2.224231464737794e-07, "loss": 8.7055, "step": 4920 }, { "epoch": 1.79, "learning_rate": 2.233273056057866e-07, "loss": 8.5466, "step": 4940 }, { "epoch": 1.79, "learning_rate": 2.2423146473779384e-07, "loss": 8.6754, "step": 4960 }, { "epoch": 1.8, "learning_rate": 2.2513562386980105e-07, "loss": 8.6826, "step": 4980 }, { "epoch": 1.81, "learning_rate": 2.2603978300180832e-07, "loss": 8.6895, "step": 5000 }, { "epoch": 1.82, "learning_rate": 2.2694394213381553e-07, "loss": 8.7094, "step": 5020 }, { "epoch": 1.82, "learning_rate": 2.2784810126582277e-07, "loss": 8.7513, "step": 5040 }, { "epoch": 1.83, "learning_rate": 2.2875226039783001e-07, "loss": 8.6838, "step": 5060 }, { "epoch": 1.84, "learning_rate": 2.2965641952983725e-07, "loss": 8.6037, "step": 5080 }, { "epoch": 1.84, "learning_rate": 2.3056057866184447e-07, "loss": 8.7219, "step": 5100 }, { "epoch": 1.85, "learning_rate": 2.314647377938517e-07, "loss": 8.6567, "step": 5120 }, { "epoch": 1.86, "learning_rate": 2.3236889692585895e-07, "loss": 8.6228, "step": 5140 }, { "epoch": 1.87, "learning_rate": 2.3327305605786619e-07, "loss": 8.6443, "step": 5160 }, { "epoch": 1.87, "learning_rate": 2.341772151898734e-07, "loss": 8.6465, "step": 5180 }, { "epoch": 1.88, "learning_rate": 2.3508137432188064e-07, "loss": 8.6116, "step": 5200 }, { "epoch": 1.89, "learning_rate": 2.3598553345388788e-07, "loss": 8.7553, "step": 5220 }, { "epoch": 1.9, "learning_rate": 2.3688969258589512e-07, "loss": 8.7313, "step": 5240 }, { "epoch": 1.9, "learning_rate": 2.3779385171790233e-07, "loss": 8.6952, "step": 5260 }, { "epoch": 1.91, "learning_rate": 2.3869801084990954e-07, "loss": 8.6394, "step": 5280 }, { "epoch": 1.92, "learning_rate": 2.3960216998191684e-07, "loss": 8.8396, "step": 5300 }, { "epoch": 1.92, "learning_rate": 2.40506329113924e-07, "loss": 8.6893, "step": 5320 }, { "epoch": 1.93, "learning_rate": 2.4141048824593126e-07, "loss": 8.5754, "step": 5340 }, { "epoch": 1.94, "learning_rate": 2.423146473779385e-07, "loss": 8.7712, "step": 5360 }, { "epoch": 1.95, "learning_rate": 2.4321880650994574e-07, "loss": 8.8228, "step": 5380 }, { "epoch": 1.95, "learning_rate": 2.44122965641953e-07, "loss": 8.6878, "step": 5400 }, { "epoch": 1.96, "learning_rate": 2.450271247739602e-07, "loss": 8.6763, "step": 5420 }, { "epoch": 1.97, "learning_rate": 2.459312839059674e-07, "loss": 8.6758, "step": 5440 }, { "epoch": 1.97, "learning_rate": 2.468354430379747e-07, "loss": 8.7743, "step": 5460 }, { "epoch": 1.98, "learning_rate": 2.477396021699819e-07, "loss": 8.6171, "step": 5480 }, { "epoch": 1.99, "learning_rate": 2.4864376130198913e-07, "loss": 8.8626, "step": 5500 }, { "epoch": 2.0, "learning_rate": 2.4954792043399637e-07, "loss": 8.7344, "step": 5520 }, { "epoch": 2.0, "learning_rate": 2.504520795660036e-07, "loss": 8.7143, "step": 5540 }, { "epoch": 2.01, "learning_rate": 2.513562386980108e-07, "loss": 8.6419, "step": 5560 }, { "epoch": 2.02, "learning_rate": 2.522603978300181e-07, "loss": 8.6991, "step": 5580 }, { "epoch": 2.03, "learning_rate": 2.5316455696202533e-07, "loss": 8.8916, "step": 5600 }, { "epoch": 2.03, "learning_rate": 2.540687160940325e-07, "loss": 8.6356, "step": 5620 }, { "epoch": 2.04, "learning_rate": 2.5497287522603975e-07, "loss": 8.7768, "step": 5640 }, { "epoch": 2.05, "learning_rate": 2.5587703435804705e-07, "loss": 8.6408, "step": 5660 }, { "epoch": 2.05, "learning_rate": 2.5678119349005423e-07, "loss": 8.7009, "step": 5680 }, { "epoch": 2.06, "learning_rate": 2.576853526220615e-07, "loss": 8.6798, "step": 5700 }, { "epoch": 2.07, "learning_rate": 2.5858951175406866e-07, "loss": 8.7363, "step": 5720 }, { "epoch": 2.08, "learning_rate": 2.5949367088607595e-07, "loss": 8.7773, "step": 5740 }, { "epoch": 2.08, "learning_rate": 2.603978300180832e-07, "loss": 8.5859, "step": 5760 }, { "epoch": 2.09, "learning_rate": 2.613019891500904e-07, "loss": 8.7, "step": 5780 }, { "epoch": 2.1, "learning_rate": 2.622061482820976e-07, "loss": 8.6803, "step": 5800 }, { "epoch": 2.1, "learning_rate": 2.631103074141049e-07, "loss": 8.7215, "step": 5820 }, { "epoch": 2.11, "learning_rate": 2.640144665461121e-07, "loss": 8.6357, "step": 5840 }, { "epoch": 2.12, "learning_rate": 2.6491862567811934e-07, "loss": 8.6427, "step": 5860 }, { "epoch": 2.13, "learning_rate": 2.658227848101265e-07, "loss": 8.7049, "step": 5880 }, { "epoch": 2.13, "learning_rate": 2.667269439421338e-07, "loss": 8.6825, "step": 5900 }, { "epoch": 2.14, "learning_rate": 2.6763110307414106e-07, "loss": 8.5983, "step": 5920 }, { "epoch": 2.15, "learning_rate": 2.6853526220614824e-07, "loss": 8.7502, "step": 5940 }, { "epoch": 2.16, "learning_rate": 2.694394213381555e-07, "loss": 8.6624, "step": 5960 }, { "epoch": 2.16, "learning_rate": 2.703435804701628e-07, "loss": 8.586, "step": 5980 }, { "epoch": 2.17, "learning_rate": 2.7124773960216996e-07, "loss": 8.6519, "step": 6000 }, { "epoch": 2.18, "learning_rate": 2.721518987341772e-07, "loss": 8.6292, "step": 6020 }, { "epoch": 2.18, "learning_rate": 2.7305605786618444e-07, "loss": 8.6325, "step": 6040 }, { "epoch": 2.19, "learning_rate": 2.739602169981917e-07, "loss": 8.6599, "step": 6060 }, { "epoch": 2.2, "learning_rate": 2.748643761301989e-07, "loss": 8.6505, "step": 6080 }, { "epoch": 2.21, "learning_rate": 2.757685352622061e-07, "loss": 8.6876, "step": 6100 }, { "epoch": 2.21, "learning_rate": 2.7667269439421335e-07, "loss": 8.6015, "step": 6120 }, { "epoch": 2.22, "learning_rate": 2.7757685352622064e-07, "loss": 8.5816, "step": 6140 }, { "epoch": 2.23, "learning_rate": 2.7848101265822783e-07, "loss": 8.6318, "step": 6160 }, { "epoch": 2.24, "learning_rate": 2.7938517179023507e-07, "loss": 8.6486, "step": 6180 }, { "epoch": 2.24, "learning_rate": 2.802893309222423e-07, "loss": 8.6428, "step": 6200 }, { "epoch": 2.25, "learning_rate": 2.8119349005424955e-07, "loss": 8.6547, "step": 6220 }, { "epoch": 2.26, "learning_rate": 2.820976491862568e-07, "loss": 8.6352, "step": 6240 }, { "epoch": 2.26, "learning_rate": 2.83001808318264e-07, "loss": 8.6556, "step": 6260 }, { "epoch": 2.27, "learning_rate": 2.839059674502712e-07, "loss": 8.5903, "step": 6280 }, { "epoch": 2.28, "learning_rate": 2.848101265822785e-07, "loss": 8.645, "step": 6300 }, { "epoch": 2.29, "learning_rate": 2.857142857142857e-07, "loss": 8.6496, "step": 6320 }, { "epoch": 2.29, "learning_rate": 2.8661844484629293e-07, "loss": 8.678, "step": 6340 }, { "epoch": 2.3, "learning_rate": 2.8752260397830017e-07, "loss": 8.7506, "step": 6360 }, { "epoch": 2.31, "learning_rate": 2.884267631103074e-07, "loss": 8.7135, "step": 6380 }, { "epoch": 2.31, "learning_rate": 2.8933092224231465e-07, "loss": 8.6221, "step": 6400 }, { "epoch": 2.32, "learning_rate": 2.9023508137432184e-07, "loss": 8.646, "step": 6420 }, { "epoch": 2.33, "learning_rate": 2.911392405063291e-07, "loss": 8.5481, "step": 6440 }, { "epoch": 2.34, "learning_rate": 2.9204339963833637e-07, "loss": 8.7291, "step": 6460 }, { "epoch": 2.34, "learning_rate": 2.9294755877034356e-07, "loss": 8.6583, "step": 6480 }, { "epoch": 2.35, "learning_rate": 2.938517179023508e-07, "loss": 8.5624, "step": 6500 }, { "epoch": 2.36, "learning_rate": 2.9475587703435804e-07, "loss": 8.648, "step": 6520 }, { "epoch": 2.37, "learning_rate": 2.956600361663653e-07, "loss": 8.6145, "step": 6540 }, { "epoch": 2.37, "learning_rate": 2.965641952983725e-07, "loss": 8.6264, "step": 6560 }, { "epoch": 2.38, "learning_rate": 2.974683544303797e-07, "loss": 8.6417, "step": 6580 }, { "epoch": 2.39, "learning_rate": 2.9837251356238694e-07, "loss": 8.6012, "step": 6600 }, { "epoch": 2.39, "learning_rate": 2.9927667269439424e-07, "loss": 8.6653, "step": 6620 }, { "epoch": 2.4, "learning_rate": 3.001808318264014e-07, "loss": 8.7473, "step": 6640 }, { "epoch": 2.41, "learning_rate": 3.0108499095840866e-07, "loss": 8.6582, "step": 6660 }, { "epoch": 2.42, "learning_rate": 3.019891500904159e-07, "loss": 8.679, "step": 6680 }, { "epoch": 2.42, "learning_rate": 3.0289330922242314e-07, "loss": 8.6476, "step": 6700 }, { "epoch": 2.43, "learning_rate": 3.037974683544304e-07, "loss": 8.6075, "step": 6720 }, { "epoch": 2.44, "learning_rate": 3.0470162748643757e-07, "loss": 8.6942, "step": 6740 }, { "epoch": 2.44, "learning_rate": 3.056057866184448e-07, "loss": 8.6298, "step": 6760 }, { "epoch": 2.45, "learning_rate": 3.065099457504521e-07, "loss": 8.6439, "step": 6780 }, { "epoch": 2.46, "learning_rate": 3.074141048824593e-07, "loss": 8.6465, "step": 6800 }, { "epoch": 2.47, "learning_rate": 3.0831826401446653e-07, "loss": 8.6938, "step": 6820 }, { "epoch": 2.47, "learning_rate": 3.0922242314647377e-07, "loss": 8.6629, "step": 6840 }, { "epoch": 2.48, "learning_rate": 3.10126582278481e-07, "loss": 8.6653, "step": 6860 }, { "epoch": 2.49, "learning_rate": 3.1103074141048825e-07, "loss": 8.6678, "step": 6880 }, { "epoch": 2.5, "learning_rate": 3.1193490054249543e-07, "loss": 8.6495, "step": 6900 }, { "epoch": 2.5, "learning_rate": 3.128390596745027e-07, "loss": 8.6629, "step": 6920 }, { "epoch": 2.51, "learning_rate": 3.1374321880650997e-07, "loss": 8.6308, "step": 6940 }, { "epoch": 2.52, "learning_rate": 3.1464737793851715e-07, "loss": 8.723, "step": 6960 }, { "epoch": 2.52, "learning_rate": 3.155515370705244e-07, "loss": 8.7843, "step": 6980 }, { "epoch": 2.53, "learning_rate": 3.1645569620253163e-07, "loss": 8.7357, "step": 7000 }, { "epoch": 2.54, "learning_rate": 3.1735985533453887e-07, "loss": 8.6289, "step": 7020 }, { "epoch": 2.55, "learning_rate": 3.182640144665461e-07, "loss": 8.722, "step": 7040 }, { "epoch": 2.55, "learning_rate": 3.1916817359855335e-07, "loss": 8.6224, "step": 7060 }, { "epoch": 2.56, "learning_rate": 3.2007233273056054e-07, "loss": 8.6937, "step": 7080 }, { "epoch": 2.57, "learning_rate": 3.2097649186256783e-07, "loss": 8.6861, "step": 7100 }, { "epoch": 2.58, "learning_rate": 3.21880650994575e-07, "loss": 8.771, "step": 7120 }, { "epoch": 2.58, "learning_rate": 3.2278481012658226e-07, "loss": 8.6539, "step": 7140 }, { "epoch": 2.59, "learning_rate": 3.236889692585895e-07, "loss": 8.6444, "step": 7160 }, { "epoch": 2.6, "learning_rate": 3.2459312839059674e-07, "loss": 8.7097, "step": 7180 }, { "epoch": 2.6, "learning_rate": 3.25497287522604e-07, "loss": 8.6235, "step": 7200 }, { "epoch": 2.61, "learning_rate": 3.264014466546112e-07, "loss": 8.6301, "step": 7220 }, { "epoch": 2.62, "learning_rate": 3.273056057866184e-07, "loss": 8.6158, "step": 7240 }, { "epoch": 2.63, "learning_rate": 3.282097649186257e-07, "loss": 8.7017, "step": 7260 }, { "epoch": 2.63, "learning_rate": 3.291139240506329e-07, "loss": 8.5819, "step": 7280 }, { "epoch": 2.64, "learning_rate": 3.300180831826401e-07, "loss": 8.6706, "step": 7300 }, { "epoch": 2.65, "learning_rate": 3.3092224231464736e-07, "loss": 8.7503, "step": 7320 }, { "epoch": 2.65, "learning_rate": 3.318264014466546e-07, "loss": 8.651, "step": 7340 }, { "epoch": 2.66, "learning_rate": 3.3273056057866184e-07, "loss": 8.6765, "step": 7360 }, { "epoch": 2.67, "learning_rate": 3.336347197106691e-07, "loss": 8.6551, "step": 7380 }, { "epoch": 2.68, "learning_rate": 3.3453887884267627e-07, "loss": 8.5522, "step": 7400 }, { "epoch": 2.68, "learning_rate": 3.3544303797468356e-07, "loss": 8.6139, "step": 7420 }, { "epoch": 2.69, "learning_rate": 3.3634719710669075e-07, "loss": 8.6521, "step": 7440 }, { "epoch": 2.7, "learning_rate": 3.37251356238698e-07, "loss": 8.7296, "step": 7460 }, { "epoch": 2.71, "learning_rate": 3.3815551537070523e-07, "loss": 8.8073, "step": 7480 }, { "epoch": 2.71, "learning_rate": 3.3905967450271247e-07, "loss": 8.8213, "step": 7500 }, { "epoch": 2.72, "learning_rate": 3.399638336347197e-07, "loss": 8.792, "step": 7520 }, { "epoch": 2.73, "learning_rate": 3.4086799276672695e-07, "loss": 8.8606, "step": 7540 }, { "epoch": 2.73, "learning_rate": 3.4177215189873413e-07, "loss": 8.767, "step": 7560 }, { "epoch": 2.74, "learning_rate": 3.4267631103074143e-07, "loss": 8.6718, "step": 7580 }, { "epoch": 2.75, "learning_rate": 3.435804701627486e-07, "loss": 8.7104, "step": 7600 }, { "epoch": 2.76, "learning_rate": 3.4448462929475585e-07, "loss": 8.8865, "step": 7620 }, { "epoch": 2.76, "learning_rate": 3.453887884267631e-07, "loss": 8.7782, "step": 7640 }, { "epoch": 2.77, "learning_rate": 3.4629294755877033e-07, "loss": 8.7133, "step": 7660 }, { "epoch": 2.78, "learning_rate": 3.4719710669077757e-07, "loss": 8.6618, "step": 7680 }, { "epoch": 2.78, "learning_rate": 3.481012658227848e-07, "loss": 8.6329, "step": 7700 }, { "epoch": 2.79, "learning_rate": 3.49005424954792e-07, "loss": 8.6203, "step": 7720 }, { "epoch": 2.8, "learning_rate": 3.499095840867993e-07, "loss": 8.8011, "step": 7740 }, { "epoch": 2.81, "learning_rate": 3.508137432188065e-07, "loss": 8.6341, "step": 7760 }, { "epoch": 2.81, "learning_rate": 3.517179023508137e-07, "loss": 8.7076, "step": 7780 }, { "epoch": 2.82, "learning_rate": 3.5262206148282096e-07, "loss": 8.8637, "step": 7800 }, { "epoch": 2.83, "learning_rate": 3.535262206148282e-07, "loss": 8.8632, "step": 7820 }, { "epoch": 2.84, "learning_rate": 3.5443037974683544e-07, "loss": 8.7457, "step": 7840 }, { "epoch": 2.84, "learning_rate": 3.553345388788427e-07, "loss": 8.8141, "step": 7860 }, { "epoch": 2.85, "learning_rate": 3.5623869801084986e-07, "loss": 8.6005, "step": 7880 }, { "epoch": 2.86, "learning_rate": 3.5714285714285716e-07, "loss": 8.8337, "step": 7900 }, { "epoch": 2.86, "learning_rate": 3.5804701627486434e-07, "loss": 8.5335, "step": 7920 }, { "epoch": 2.87, "learning_rate": 3.589511754068716e-07, "loss": 8.7152, "step": 7940 }, { "epoch": 2.88, "learning_rate": 3.598553345388788e-07, "loss": 8.8067, "step": 7960 }, { "epoch": 2.89, "learning_rate": 3.6075949367088606e-07, "loss": 8.6357, "step": 7980 }, { "epoch": 2.89, "learning_rate": 3.616636528028933e-07, "loss": 8.6081, "step": 8000 }, { "epoch": 2.9, "learning_rate": 3.6256781193490054e-07, "loss": 8.7371, "step": 8020 }, { "epoch": 2.91, "learning_rate": 3.6347197106690773e-07, "loss": 8.6043, "step": 8040 }, { "epoch": 2.92, "learning_rate": 3.64376130198915e-07, "loss": 8.7781, "step": 8060 }, { "epoch": 2.92, "learning_rate": 3.6528028933092226e-07, "loss": 8.7021, "step": 8080 }, { "epoch": 2.93, "learning_rate": 3.6618444846292945e-07, "loss": 8.6577, "step": 8100 }, { "epoch": 2.94, "learning_rate": 3.670886075949367e-07, "loss": 8.7209, "step": 8120 }, { "epoch": 2.94, "learning_rate": 3.6799276672694393e-07, "loss": 8.7849, "step": 8140 }, { "epoch": 2.95, "learning_rate": 3.6889692585895117e-07, "loss": 8.6802, "step": 8160 }, { "epoch": 2.96, "learning_rate": 3.698010849909584e-07, "loss": 8.6445, "step": 8180 }, { "epoch": 2.97, "learning_rate": 3.707052441229656e-07, "loss": 8.8102, "step": 8200 }, { "epoch": 2.97, "learning_rate": 3.716094032549729e-07, "loss": 8.6827, "step": 8220 }, { "epoch": 2.98, "learning_rate": 3.7251356238698013e-07, "loss": 8.5796, "step": 8240 }, { "epoch": 2.99, "learning_rate": 3.734177215189873e-07, "loss": 8.6077, "step": 8260 }, { "epoch": 2.99, "learning_rate": 3.7432188065099455e-07, "loss": 8.6815, "step": 8280 }, { "epoch": 3.0, "learning_rate": 3.752260397830018e-07, "loss": 8.6782, "step": 8300 }, { "epoch": 3.01, "learning_rate": 3.7613019891500903e-07, "loss": 8.6266, "step": 8320 }, { "epoch": 3.02, "learning_rate": 3.7703435804701627e-07, "loss": 8.7515, "step": 8340 }, { "epoch": 3.02, "learning_rate": 3.7793851717902346e-07, "loss": 8.7789, "step": 8360 }, { "epoch": 3.03, "learning_rate": 3.7884267631103075e-07, "loss": 8.6853, "step": 8380 }, { "epoch": 3.04, "learning_rate": 3.79746835443038e-07, "loss": 8.6932, "step": 8400 }, { "epoch": 3.05, "learning_rate": 3.806509945750452e-07, "loss": 8.6517, "step": 8420 }, { "epoch": 3.05, "learning_rate": 3.815551537070524e-07, "loss": 8.7313, "step": 8440 }, { "epoch": 3.06, "learning_rate": 3.8245931283905966e-07, "loss": 8.6474, "step": 8460 }, { "epoch": 3.07, "learning_rate": 3.833634719710669e-07, "loss": 8.5971, "step": 8480 }, { "epoch": 3.07, "learning_rate": 3.8426763110307414e-07, "loss": 8.6707, "step": 8500 }, { "epoch": 3.08, "learning_rate": 3.851717902350813e-07, "loss": 8.6945, "step": 8520 }, { "epoch": 3.09, "learning_rate": 3.860759493670886e-07, "loss": 8.5875, "step": 8540 }, { "epoch": 3.1, "learning_rate": 3.8698010849909586e-07, "loss": 8.6613, "step": 8560 }, { "epoch": 3.1, "learning_rate": 3.8788426763110304e-07, "loss": 8.6394, "step": 8580 }, { "epoch": 3.11, "learning_rate": 3.887884267631103e-07, "loss": 8.6201, "step": 8600 }, { "epoch": 3.12, "learning_rate": 3.896925858951175e-07, "loss": 8.7747, "step": 8620 }, { "epoch": 3.12, "learning_rate": 3.9059674502712476e-07, "loss": 8.6703, "step": 8640 }, { "epoch": 3.13, "learning_rate": 3.91500904159132e-07, "loss": 8.6593, "step": 8660 }, { "epoch": 3.14, "learning_rate": 3.924050632911392e-07, "loss": 8.8128, "step": 8680 }, { "epoch": 3.15, "learning_rate": 3.933092224231465e-07, "loss": 8.625, "step": 8700 }, { "epoch": 3.15, "learning_rate": 3.942133815551537e-07, "loss": 8.7078, "step": 8720 }, { "epoch": 3.16, "learning_rate": 3.951175406871609e-07, "loss": 8.7598, "step": 8740 }, { "epoch": 3.17, "learning_rate": 3.9602169981916815e-07, "loss": 8.6145, "step": 8760 }, { "epoch": 3.18, "learning_rate": 3.969258589511754e-07, "loss": 8.5881, "step": 8780 }, { "epoch": 3.18, "learning_rate": 3.9783001808318263e-07, "loss": 8.6745, "step": 8800 }, { "epoch": 3.19, "learning_rate": 3.9873417721518987e-07, "loss": 8.7567, "step": 8820 }, { "epoch": 3.2, "learning_rate": 3.9963833634719705e-07, "loss": 8.6587, "step": 8840 }, { "epoch": 3.2, "learning_rate": 4.0054249547920435e-07, "loss": 8.6343, "step": 8860 }, { "epoch": 3.21, "learning_rate": 4.014466546112116e-07, "loss": 8.6116, "step": 8880 }, { "epoch": 3.22, "learning_rate": 4.0235081374321877e-07, "loss": 8.6474, "step": 8900 }, { "epoch": 3.23, "learning_rate": 4.03254972875226e-07, "loss": 8.5796, "step": 8920 }, { "epoch": 3.23, "learning_rate": 4.0415913200723325e-07, "loss": 8.8124, "step": 8940 }, { "epoch": 3.24, "learning_rate": 4.050632911392405e-07, "loss": 8.7305, "step": 8960 }, { "epoch": 3.25, "learning_rate": 4.0596745027124773e-07, "loss": 8.6832, "step": 8980 }, { "epoch": 3.25, "learning_rate": 4.068716094032549e-07, "loss": 8.6668, "step": 9000 }, { "epoch": 3.26, "learning_rate": 4.077757685352622e-07, "loss": 8.611, "step": 9020 }, { "epoch": 3.27, "learning_rate": 4.0867992766726945e-07, "loss": 8.6182, "step": 9040 }, { "epoch": 3.28, "learning_rate": 4.0958408679927664e-07, "loss": 8.6509, "step": 9060 }, { "epoch": 3.28, "learning_rate": 4.104882459312839e-07, "loss": 8.7003, "step": 9080 }, { "epoch": 3.29, "learning_rate": 4.1139240506329117e-07, "loss": 8.7466, "step": 9100 }, { "epoch": 3.3, "learning_rate": 4.1229656419529836e-07, "loss": 8.6764, "step": 9120 }, { "epoch": 3.31, "learning_rate": 4.132007233273056e-07, "loss": 8.8049, "step": 9140 }, { "epoch": 3.31, "learning_rate": 4.141048824593128e-07, "loss": 8.6367, "step": 9160 }, { "epoch": 3.32, "learning_rate": 4.150090415913201e-07, "loss": 8.6451, "step": 9180 }, { "epoch": 3.33, "learning_rate": 4.159132007233273e-07, "loss": 8.7919, "step": 9200 }, { "epoch": 3.33, "learning_rate": 4.168173598553345e-07, "loss": 8.6583, "step": 9220 }, { "epoch": 3.34, "learning_rate": 4.1772151898734174e-07, "loss": 8.6932, "step": 9240 }, { "epoch": 3.35, "learning_rate": 4.1862567811934904e-07, "loss": 8.7003, "step": 9260 }, { "epoch": 3.36, "learning_rate": 4.195298372513562e-07, "loss": 8.7071, "step": 9280 }, { "epoch": 3.36, "learning_rate": 4.2043399638336346e-07, "loss": 8.5826, "step": 9300 }, { "epoch": 3.37, "learning_rate": 4.2133815551537065e-07, "loss": 8.8741, "step": 9320 }, { "epoch": 3.38, "learning_rate": 4.2224231464737794e-07, "loss": 8.7105, "step": 9340 }, { "epoch": 3.39, "learning_rate": 4.231464737793852e-07, "loss": 8.7351, "step": 9360 }, { "epoch": 3.39, "learning_rate": 4.2405063291139237e-07, "loss": 8.6307, "step": 9380 }, { "epoch": 3.4, "learning_rate": 4.249547920433996e-07, "loss": 8.6908, "step": 9400 }, { "epoch": 3.41, "learning_rate": 4.258589511754069e-07, "loss": 8.7118, "step": 9420 }, { "epoch": 3.41, "learning_rate": 4.267631103074141e-07, "loss": 8.7018, "step": 9440 }, { "epoch": 3.42, "learning_rate": 4.2766726943942133e-07, "loss": 8.7443, "step": 9460 }, { "epoch": 3.43, "learning_rate": 4.285714285714285e-07, "loss": 8.6614, "step": 9480 }, { "epoch": 3.44, "learning_rate": 4.294755877034358e-07, "loss": 8.7883, "step": 9500 }, { "epoch": 3.44, "learning_rate": 4.3037974683544305e-07, "loss": 8.7251, "step": 9520 }, { "epoch": 3.45, "learning_rate": 4.3128390596745023e-07, "loss": 8.8043, "step": 9540 }, { "epoch": 3.46, "learning_rate": 4.3218806509945747e-07, "loss": 8.6045, "step": 9560 }, { "epoch": 3.46, "learning_rate": 4.3309222423146477e-07, "loss": 8.6793, "step": 9580 }, { "epoch": 3.47, "learning_rate": 4.3399638336347195e-07, "loss": 8.7049, "step": 9600 }, { "epoch": 3.48, "learning_rate": 4.349005424954792e-07, "loss": 8.6692, "step": 9620 }, { "epoch": 3.49, "learning_rate": 4.358047016274864e-07, "loss": 8.79, "step": 9640 }, { "epoch": 3.49, "learning_rate": 4.3670886075949367e-07, "loss": 8.7726, "step": 9660 }, { "epoch": 3.5, "learning_rate": 4.376130198915009e-07, "loss": 8.6092, "step": 9680 }, { "epoch": 3.51, "learning_rate": 4.385171790235081e-07, "loss": 8.7118, "step": 9700 }, { "epoch": 3.52, "learning_rate": 4.3942133815551534e-07, "loss": 8.6013, "step": 9720 }, { "epoch": 3.52, "learning_rate": 4.4032549728752263e-07, "loss": 8.762, "step": 9740 }, { "epoch": 3.53, "learning_rate": 4.412296564195298e-07, "loss": 8.6593, "step": 9760 }, { "epoch": 3.54, "learning_rate": 4.4213381555153706e-07, "loss": 8.6182, "step": 9780 }, { "epoch": 3.54, "learning_rate": 4.4303797468354424e-07, "loss": 8.6179, "step": 9800 }, { "epoch": 3.55, "learning_rate": 4.4394213381555154e-07, "loss": 8.7162, "step": 9820 }, { "epoch": 3.56, "learning_rate": 4.448462929475588e-07, "loss": 8.6335, "step": 9840 }, { "epoch": 3.57, "learning_rate": 4.4575045207956596e-07, "loss": 8.7033, "step": 9860 }, { "epoch": 3.57, "learning_rate": 4.466546112115732e-07, "loss": 8.6223, "step": 9880 }, { "epoch": 3.58, "learning_rate": 4.475587703435805e-07, "loss": 8.6641, "step": 9900 }, { "epoch": 3.59, "learning_rate": 4.484629294755877e-07, "loss": 8.7288, "step": 9920 }, { "epoch": 3.59, "learning_rate": 4.493670886075949e-07, "loss": 8.6236, "step": 9940 }, { "epoch": 3.6, "learning_rate": 4.502712477396021e-07, "loss": 8.6478, "step": 9960 }, { "epoch": 3.61, "learning_rate": 4.511754068716094e-07, "loss": 8.7282, "step": 9980 }, { "epoch": 3.62, "learning_rate": 4.5207956600361664e-07, "loss": 8.6599, "step": 10000 }, { "epoch": 3.62, "learning_rate": 4.5298372513562383e-07, "loss": 8.7713, "step": 10020 }, { "epoch": 3.63, "learning_rate": 4.5388788426763107e-07, "loss": 8.6667, "step": 10040 }, { "epoch": 3.64, "learning_rate": 4.5479204339963836e-07, "loss": 8.7424, "step": 10060 }, { "epoch": 3.65, "learning_rate": 4.5569620253164555e-07, "loss": 8.6676, "step": 10080 }, { "epoch": 3.65, "learning_rate": 4.566003616636528e-07, "loss": 8.7331, "step": 10100 }, { "epoch": 3.66, "learning_rate": 4.5750452079566003e-07, "loss": 8.6343, "step": 10120 }, { "epoch": 3.67, "learning_rate": 4.5840867992766727e-07, "loss": 8.6826, "step": 10140 }, { "epoch": 3.67, "learning_rate": 4.593128390596745e-07, "loss": 8.6749, "step": 10160 }, { "epoch": 3.68, "learning_rate": 4.602169981916817e-07, "loss": 8.6509, "step": 10180 }, { "epoch": 3.69, "learning_rate": 4.6112115732368893e-07, "loss": 8.6455, "step": 10200 }, { "epoch": 3.7, "learning_rate": 4.620253164556962e-07, "loss": 8.6079, "step": 10220 }, { "epoch": 3.7, "learning_rate": 4.629294755877034e-07, "loss": 8.6061, "step": 10240 }, { "epoch": 3.71, "learning_rate": 4.6383363471971065e-07, "loss": 8.6707, "step": 10260 }, { "epoch": 3.72, "learning_rate": 4.647377938517179e-07, "loss": 8.6731, "step": 10280 }, { "epoch": 3.73, "learning_rate": 4.6564195298372513e-07, "loss": 8.6561, "step": 10300 }, { "epoch": 3.73, "learning_rate": 4.6654611211573237e-07, "loss": 8.8374, "step": 10320 }, { "epoch": 3.74, "learning_rate": 4.6745027124773956e-07, "loss": 8.7873, "step": 10340 }, { "epoch": 3.75, "learning_rate": 4.683544303797468e-07, "loss": 8.5839, "step": 10360 }, { "epoch": 3.75, "learning_rate": 4.692585895117541e-07, "loss": 8.7062, "step": 10380 }, { "epoch": 3.76, "learning_rate": 4.701627486437613e-07, "loss": 8.5917, "step": 10400 }, { "epoch": 3.77, "learning_rate": 4.710669077757685e-07, "loss": 8.6824, "step": 10420 }, { "epoch": 3.78, "learning_rate": 4.7197106690777576e-07, "loss": 8.624, "step": 10440 }, { "epoch": 3.78, "learning_rate": 4.72875226039783e-07, "loss": 8.6687, "step": 10460 }, { "epoch": 3.79, "learning_rate": 4.7377938517179024e-07, "loss": 8.8627, "step": 10480 }, { "epoch": 3.8, "learning_rate": 4.746835443037974e-07, "loss": 8.6775, "step": 10500 }, { "epoch": 3.8, "learning_rate": 4.7558770343580466e-07, "loss": 8.6905, "step": 10520 }, { "epoch": 3.81, "learning_rate": 4.7649186256781196e-07, "loss": 8.6904, "step": 10540 }, { "epoch": 3.82, "learning_rate": 4.773960216998191e-07, "loss": 8.6781, "step": 10560 }, { "epoch": 3.83, "learning_rate": 4.783001808318264e-07, "loss": 8.6216, "step": 10580 }, { "epoch": 3.83, "learning_rate": 4.792043399638337e-07, "loss": 8.6004, "step": 10600 }, { "epoch": 3.84, "learning_rate": 4.801084990958408e-07, "loss": 8.6008, "step": 10620 }, { "epoch": 3.85, "learning_rate": 4.81012658227848e-07, "loss": 8.5841, "step": 10640 }, { "epoch": 3.86, "learning_rate": 4.819168173598553e-07, "loss": 8.6029, "step": 10660 }, { "epoch": 3.86, "learning_rate": 4.828209764918625e-07, "loss": 8.7195, "step": 10680 }, { "epoch": 3.87, "learning_rate": 4.837251356238698e-07, "loss": 8.7097, "step": 10700 }, { "epoch": 3.88, "learning_rate": 4.84629294755877e-07, "loss": 8.7943, "step": 10720 }, { "epoch": 3.88, "learning_rate": 4.855334538878842e-07, "loss": 8.6566, "step": 10740 }, { "epoch": 3.89, "learning_rate": 4.864376130198915e-07, "loss": 8.6458, "step": 10760 }, { "epoch": 3.9, "learning_rate": 4.873417721518987e-07, "loss": 8.8129, "step": 10780 }, { "epoch": 3.91, "learning_rate": 4.88245931283906e-07, "loss": 8.6123, "step": 10800 }, { "epoch": 3.91, "learning_rate": 4.891500904159131e-07, "loss": 8.7739, "step": 10820 }, { "epoch": 3.92, "learning_rate": 4.900542495479204e-07, "loss": 8.5971, "step": 10840 }, { "epoch": 3.93, "learning_rate": 4.909584086799277e-07, "loss": 8.6343, "step": 10860 }, { "epoch": 3.93, "learning_rate": 4.918625678119348e-07, "loss": 8.6995, "step": 10880 }, { "epoch": 3.94, "learning_rate": 4.927667269439422e-07, "loss": 8.7568, "step": 10900 }, { "epoch": 3.95, "learning_rate": 4.936708860759494e-07, "loss": 8.6049, "step": 10920 }, { "epoch": 3.96, "learning_rate": 4.945750452079565e-07, "loss": 8.6712, "step": 10940 }, { "epoch": 3.96, "learning_rate": 4.954792043399638e-07, "loss": 8.6592, "step": 10960 }, { "epoch": 3.97, "learning_rate": 4.96383363471971e-07, "loss": 8.6155, "step": 10980 }, { "epoch": 3.98, "learning_rate": 4.972875226039783e-07, "loss": 8.5876, "step": 11000 }, { "epoch": 3.99, "learning_rate": 4.981916817359855e-07, "loss": 8.7089, "step": 11020 }, { "epoch": 3.99, "learning_rate": 4.990958408679927e-07, "loss": 8.67, "step": 11040 }, { "epoch": 4.0, "learning_rate": 5e-07, "loss": 8.6221, "step": 11060 }, { "epoch": 4.01, "learning_rate": 4.999997478613401e-07, "loss": 8.7503, "step": 11080 }, { "epoch": 4.01, "learning_rate": 4.999989914458693e-07, "loss": 8.8729, "step": 11100 }, { "epoch": 4.02, "learning_rate": 4.99997730755113e-07, "loss": 8.7594, "step": 11120 }, { "epoch": 4.03, "learning_rate": 4.999959657916146e-07, "loss": 8.6993, "step": 11140 }, { "epoch": 4.04, "learning_rate": 4.999936965589338e-07, "loss": 8.8509, "step": 11160 }, { "epoch": 4.04, "learning_rate": 4.999909230616482e-07, "loss": 8.6098, "step": 11180 }, { "epoch": 4.05, "learning_rate": 4.99987645305352e-07, "loss": 8.6471, "step": 11200 }, { "epoch": 4.06, "learning_rate": 4.999838632966571e-07, "loss": 8.7941, "step": 11220 }, { "epoch": 4.07, "learning_rate": 4.999795770431919e-07, "loss": 8.6125, "step": 11240 }, { "epoch": 4.07, "learning_rate": 4.999747865536025e-07, "loss": 8.7692, "step": 11260 }, { "epoch": 4.08, "learning_rate": 4.999694918375516e-07, "loss": 8.6221, "step": 11280 }, { "epoch": 4.09, "learning_rate": 4.999636929057195e-07, "loss": 8.6307, "step": 11300 }, { "epoch": 4.09, "learning_rate": 4.99957389769803e-07, "loss": 8.6476, "step": 11320 }, { "epoch": 4.1, "learning_rate": 4.999505824425163e-07, "loss": 8.6193, "step": 11340 }, { "epoch": 4.11, "learning_rate": 4.999432709375907e-07, "loss": 8.5791, "step": 11360 }, { "epoch": 4.12, "learning_rate": 4.999354552697741e-07, "loss": 8.6525, "step": 11380 }, { "epoch": 4.12, "learning_rate": 4.999271354548316e-07, "loss": 8.6326, "step": 11400 }, { "epoch": 4.13, "learning_rate": 4.999183115095452e-07, "loss": 8.7265, "step": 11420 }, { "epoch": 4.14, "learning_rate": 4.999089834517138e-07, "loss": 8.6149, "step": 11440 }, { "epoch": 4.14, "learning_rate": 4.998991513001532e-07, "loss": 8.7124, "step": 11460 }, { "epoch": 4.15, "learning_rate": 4.998888150746957e-07, "loss": 8.5983, "step": 11480 }, { "epoch": 4.16, "learning_rate": 4.998779747961905e-07, "loss": 8.685, "step": 11500 }, { "epoch": 4.17, "learning_rate": 4.99866630486504e-07, "loss": 8.5874, "step": 11520 }, { "epoch": 4.17, "learning_rate": 4.998547821685187e-07, "loss": 8.6896, "step": 11540 }, { "epoch": 4.18, "learning_rate": 4.99842429866134e-07, "loss": 8.6125, "step": 11560 }, { "epoch": 4.19, "learning_rate": 4.998295736042658e-07, "loss": 8.6181, "step": 11580 }, { "epoch": 4.2, "learning_rate": 4.998162134088466e-07, "loss": 8.6908, "step": 11600 }, { "epoch": 4.2, "learning_rate": 4.998023493068254e-07, "loss": 8.611, "step": 11620 }, { "epoch": 4.21, "learning_rate": 4.997879813261676e-07, "loss": 8.6569, "step": 11640 }, { "epoch": 4.22, "learning_rate": 4.99773109495855e-07, "loss": 8.7846, "step": 11660 }, { "epoch": 4.22, "learning_rate": 4.997577338458857e-07, "loss": 8.6122, "step": 11680 }, { "epoch": 4.23, "learning_rate": 4.997418544072741e-07, "loss": 8.8447, "step": 11700 }, { "epoch": 4.24, "learning_rate": 4.997254712120507e-07, "loss": 8.7572, "step": 11720 }, { "epoch": 4.25, "learning_rate": 4.997085842932621e-07, "loss": 8.6279, "step": 11740 }, { "epoch": 4.25, "learning_rate": 4.996911936849713e-07, "loss": 8.6707, "step": 11760 }, { "epoch": 4.26, "learning_rate": 4.996732994222569e-07, "loss": 8.7163, "step": 11780 }, { "epoch": 4.27, "learning_rate": 4.996549015412135e-07, "loss": 8.7067, "step": 11800 }, { "epoch": 4.27, "learning_rate": 4.996360000789519e-07, "loss": 8.5964, "step": 11820 }, { "epoch": 4.28, "learning_rate": 4.996165950735983e-07, "loss": 8.8121, "step": 11840 }, { "epoch": 4.29, "learning_rate": 4.995966865642945e-07, "loss": 8.7009, "step": 11860 }, { "epoch": 4.3, "learning_rate": 4.995762745911985e-07, "loss": 8.6199, "step": 11880 }, { "epoch": 4.3, "learning_rate": 4.995553591954832e-07, "loss": 8.7139, "step": 11900 }, { "epoch": 4.31, "learning_rate": 4.995339404193373e-07, "loss": 8.6098, "step": 11920 }, { "epoch": 4.32, "learning_rate": 4.99512018305965e-07, "loss": 8.6987, "step": 11940 }, { "epoch": 4.33, "learning_rate": 4.994895928995854e-07, "loss": 8.6441, "step": 11960 }, { "epoch": 4.33, "learning_rate": 4.99466664245433e-07, "loss": 8.6625, "step": 11980 }, { "epoch": 4.34, "learning_rate": 4.994432323897575e-07, "loss": 8.6652, "step": 12000 }, { "epoch": 4.35, "learning_rate": 4.994192973798235e-07, "loss": 8.7255, "step": 12020 }, { "epoch": 4.35, "learning_rate": 4.993948592639104e-07, "loss": 8.6808, "step": 12040 }, { "epoch": 4.36, "learning_rate": 4.993699180913127e-07, "loss": 8.6249, "step": 12060 }, { "epoch": 4.37, "learning_rate": 4.993444739123394e-07, "loss": 8.8293, "step": 12080 }, { "epoch": 4.38, "learning_rate": 4.993185267783141e-07, "loss": 8.6877, "step": 12100 }, { "epoch": 4.38, "learning_rate": 4.992920767415752e-07, "loss": 8.6937, "step": 12120 }, { "epoch": 4.39, "learning_rate": 4.992651238554753e-07, "loss": 8.8042, "step": 12140 }, { "epoch": 4.4, "learning_rate": 4.992376681743811e-07, "loss": 8.631, "step": 12160 }, { "epoch": 4.41, "learning_rate": 4.992097097536739e-07, "loss": 8.6861, "step": 12180 }, { "epoch": 4.41, "learning_rate": 4.991812486497489e-07, "loss": 8.754, "step": 12200 }, { "epoch": 4.42, "learning_rate": 4.991522849200152e-07, "loss": 8.8765, "step": 12220 }, { "epoch": 4.43, "learning_rate": 4.991228186228956e-07, "loss": 8.7654, "step": 12240 }, { "epoch": 4.43, "learning_rate": 4.990928498178273e-07, "loss": 8.7668, "step": 12260 }, { "epoch": 4.44, "learning_rate": 4.990623785652603e-07, "loss": 8.711, "step": 12280 }, { "epoch": 4.45, "learning_rate": 4.990314049266585e-07, "loss": 8.6748, "step": 12300 }, { "epoch": 4.46, "learning_rate": 4.989999289644991e-07, "loss": 8.7944, "step": 12320 }, { "epoch": 4.46, "learning_rate": 4.989679507422728e-07, "loss": 8.699, "step": 12340 }, { "epoch": 4.47, "learning_rate": 4.989354703244829e-07, "loss": 8.618, "step": 12360 }, { "epoch": 4.48, "learning_rate": 4.98902487776646e-07, "loss": 8.7872, "step": 12380 }, { "epoch": 4.48, "learning_rate": 4.988690031652916e-07, "loss": 8.6499, "step": 12400 }, { "epoch": 4.49, "learning_rate": 4.988350165579616e-07, "loss": 8.641, "step": 12420 }, { "epoch": 4.5, "learning_rate": 4.98800528023211e-07, "loss": 8.6022, "step": 12440 }, { "epoch": 4.51, "learning_rate": 4.987655376306068e-07, "loss": 8.7124, "step": 12460 }, { "epoch": 4.51, "learning_rate": 4.987300454507285e-07, "loss": 8.6464, "step": 12480 }, { "epoch": 4.52, "learning_rate": 4.986940515551675e-07, "loss": 8.7243, "step": 12500 }, { "epoch": 4.53, "learning_rate": 4.986575560165277e-07, "loss": 8.8642, "step": 12520 }, { "epoch": 4.54, "learning_rate": 4.986205589084244e-07, "loss": 8.6188, "step": 12540 }, { "epoch": 4.54, "learning_rate": 4.985830603054849e-07, "loss": 8.6786, "step": 12560 }, { "epoch": 4.55, "learning_rate": 4.985450602833479e-07, "loss": 8.6291, "step": 12580 }, { "epoch": 4.56, "learning_rate": 4.985065589186638e-07, "loss": 8.6776, "step": 12600 }, { "epoch": 4.56, "learning_rate": 4.984675562890938e-07, "loss": 8.626, "step": 12620 }, { "epoch": 4.57, "learning_rate": 4.984280524733107e-07, "loss": 8.7193, "step": 12640 }, { "epoch": 4.58, "learning_rate": 4.983880475509977e-07, "loss": 8.6363, "step": 12660 }, { "epoch": 4.59, "learning_rate": 4.983475416028494e-07, "loss": 8.6651, "step": 12680 }, { "epoch": 4.59, "learning_rate": 4.983065347105706e-07, "loss": 8.6346, "step": 12700 }, { "epoch": 4.6, "learning_rate": 4.982650269568766e-07, "loss": 8.685, "step": 12720 }, { "epoch": 4.61, "learning_rate": 4.982230184254932e-07, "loss": 8.6415, "step": 12740 }, { "epoch": 4.61, "learning_rate": 4.981805092011564e-07, "loss": 8.5435, "step": 12760 }, { "epoch": 4.62, "learning_rate": 4.981374993696115e-07, "loss": 8.6561, "step": 12780 }, { "epoch": 4.63, "learning_rate": 4.980939890176143e-07, "loss": 8.6248, "step": 12800 }, { "epoch": 4.64, "learning_rate": 4.980499782329299e-07, "loss": 8.709, "step": 12820 }, { "epoch": 4.64, "learning_rate": 4.980054671043329e-07, "loss": 8.619, "step": 12840 }, { "epoch": 4.65, "learning_rate": 4.979604557216069e-07, "loss": 8.7461, "step": 12860 }, { "epoch": 4.66, "learning_rate": 4.979149441755452e-07, "loss": 8.7711, "step": 12880 }, { "epoch": 4.67, "learning_rate": 4.978689325579491e-07, "loss": 8.6337, "step": 12900 }, { "epoch": 4.67, "learning_rate": 4.978224209616292e-07, "loss": 8.7815, "step": 12920 }, { "epoch": 4.68, "learning_rate": 4.977754094804047e-07, "loss": 8.6441, "step": 12940 }, { "epoch": 4.69, "learning_rate": 4.977278982091027e-07, "loss": 8.765, "step": 12960 }, { "epoch": 4.69, "learning_rate": 4.976798872435586e-07, "loss": 8.5918, "step": 12980 }, { "epoch": 4.7, "learning_rate": 4.976313766806159e-07, "loss": 8.5984, "step": 13000 }, { "epoch": 4.71, "learning_rate": 4.975823666181255e-07, "loss": 8.7362, "step": 13020 }, { "epoch": 4.72, "learning_rate": 4.975328571549462e-07, "loss": 8.6716, "step": 13040 }, { "epoch": 4.72, "learning_rate": 4.97482848390944e-07, "loss": 8.7553, "step": 13060 }, { "epoch": 4.73, "learning_rate": 4.974323404269921e-07, "loss": 8.6592, "step": 13080 }, { "epoch": 4.74, "learning_rate": 4.973813333649703e-07, "loss": 8.7005, "step": 13100 }, { "epoch": 4.75, "learning_rate": 4.973298273077657e-07, "loss": 8.6346, "step": 13120 }, { "epoch": 4.75, "learning_rate": 4.972778223592717e-07, "loss": 8.6217, "step": 13140 }, { "epoch": 4.76, "learning_rate": 4.972253186243876e-07, "loss": 8.6394, "step": 13160 }, { "epoch": 4.77, "learning_rate": 4.971723162090196e-07, "loss": 8.8543, "step": 13180 }, { "epoch": 4.77, "learning_rate": 4.971188152200791e-07, "loss": 8.6189, "step": 13200 }, { "epoch": 4.78, "learning_rate": 4.970648157654835e-07, "loss": 8.6655, "step": 13220 }, { "epoch": 4.79, "learning_rate": 4.970103179541556e-07, "loss": 8.7302, "step": 13240 }, { "epoch": 4.8, "learning_rate": 4.969553218960234e-07, "loss": 8.629, "step": 13260 }, { "epoch": 4.8, "learning_rate": 4.9689982770202e-07, "loss": 8.6618, "step": 13280 }, { "epoch": 4.81, "learning_rate": 4.968438354840833e-07, "loss": 8.5832, "step": 13300 }, { "epoch": 4.82, "learning_rate": 4.967873453551557e-07, "loss": 8.6059, "step": 13320 }, { "epoch": 4.82, "learning_rate": 4.967303574291839e-07, "loss": 8.7163, "step": 13340 }, { "epoch": 4.83, "learning_rate": 4.966728718211188e-07, "loss": 8.6663, "step": 13360 }, { "epoch": 4.84, "learning_rate": 4.966148886469152e-07, "loss": 8.6538, "step": 13380 }, { "epoch": 4.85, "learning_rate": 4.965564080235315e-07, "loss": 8.6983, "step": 13400 }, { "epoch": 4.85, "learning_rate": 4.964974300689295e-07, "loss": 8.6548, "step": 13420 }, { "epoch": 4.86, "learning_rate": 4.964379549020741e-07, "loss": 8.6463, "step": 13440 }, { "epoch": 4.87, "learning_rate": 4.963779826429333e-07, "loss": 8.7331, "step": 13460 }, { "epoch": 4.88, "learning_rate": 4.963175134124775e-07, "loss": 8.7252, "step": 13480 }, { "epoch": 4.88, "learning_rate": 4.962565473326802e-07, "loss": 8.7028, "step": 13500 }, { "epoch": 4.89, "learning_rate": 4.961950845265162e-07, "loss": 8.6745, "step": 13520 }, { "epoch": 4.9, "learning_rate": 4.961331251179628e-07, "loss": 8.5407, "step": 13540 }, { "epoch": 4.9, "learning_rate": 4.960706692319991e-07, "loss": 8.7639, "step": 13560 }, { "epoch": 4.91, "learning_rate": 4.960077169946052e-07, "loss": 8.6626, "step": 13580 }, { "epoch": 4.92, "learning_rate": 4.959442685327627e-07, "loss": 8.7467, "step": 13600 }, { "epoch": 4.93, "learning_rate": 4.958803239744542e-07, "loss": 8.6506, "step": 13620 }, { "epoch": 4.93, "learning_rate": 4.958158834486628e-07, "loss": 8.5932, "step": 13640 }, { "epoch": 4.94, "learning_rate": 4.95750947085372e-07, "loss": 8.751, "step": 13660 }, { "epoch": 4.95, "learning_rate": 4.956855150155657e-07, "loss": 8.6431, "step": 13680 }, { "epoch": 4.95, "learning_rate": 4.956195873712273e-07, "loss": 8.7175, "step": 13700 }, { "epoch": 4.96, "learning_rate": 4.955531642853403e-07, "loss": 8.7028, "step": 13720 }, { "epoch": 4.97, "learning_rate": 4.954862458918873e-07, "loss": 8.7185, "step": 13740 }, { "epoch": 4.98, "learning_rate": 4.954188323258498e-07, "loss": 8.6743, "step": 13760 }, { "epoch": 4.98, "learning_rate": 4.953509237232085e-07, "loss": 8.63, "step": 13780 }, { "epoch": 4.99, "learning_rate": 4.952825202209426e-07, "loss": 8.5902, "step": 13800 }, { "epoch": 5.0, "learning_rate": 4.952136219570291e-07, "loss": 8.7626, "step": 13820 }, { "epoch": 5.01, "learning_rate": 4.951442290704437e-07, "loss": 8.6377, "step": 13840 }, { "epoch": 5.01, "learning_rate": 4.950743417011591e-07, "loss": 8.6489, "step": 13860 }, { "epoch": 5.02, "learning_rate": 4.950039599901459e-07, "loss": 8.5806, "step": 13880 }, { "epoch": 5.03, "learning_rate": 4.949330840793717e-07, "loss": 8.6237, "step": 13900 }, { "epoch": 5.03, "learning_rate": 4.94861714111801e-07, "loss": 8.5744, "step": 13920 }, { "epoch": 5.04, "learning_rate": 4.947898502313948e-07, "loss": 8.6261, "step": 13940 }, { "epoch": 5.05, "learning_rate": 4.947174925831103e-07, "loss": 8.6471, "step": 13960 }, { "epoch": 5.06, "learning_rate": 4.946446413129011e-07, "loss": 8.7867, "step": 13980 }, { "epoch": 5.06, "learning_rate": 4.945712965677158e-07, "loss": 8.6515, "step": 14000 }, { "epoch": 5.07, "learning_rate": 4.944974584954988e-07, "loss": 8.7224, "step": 14020 }, { "epoch": 5.08, "learning_rate": 4.944231272451899e-07, "loss": 8.6068, "step": 14040 }, { "epoch": 5.08, "learning_rate": 4.94348302966723e-07, "loss": 8.6677, "step": 14060 }, { "epoch": 5.09, "learning_rate": 4.94272985811027e-07, "loss": 8.7058, "step": 14080 }, { "epoch": 5.1, "learning_rate": 4.941971759300248e-07, "loss": 8.7201, "step": 14100 }, { "epoch": 5.11, "learning_rate": 4.941208734766332e-07, "loss": 8.6761, "step": 14120 }, { "epoch": 5.11, "learning_rate": 4.940440786047627e-07, "loss": 8.7667, "step": 14140 }, { "epoch": 5.12, "learning_rate": 4.939667914693168e-07, "loss": 8.7497, "step": 14160 }, { "epoch": 5.13, "learning_rate": 4.938890122261922e-07, "loss": 8.6883, "step": 14180 }, { "epoch": 5.14, "learning_rate": 4.93810741032278e-07, "loss": 8.6371, "step": 14200 }, { "epoch": 5.14, "learning_rate": 4.937319780454559e-07, "loss": 8.5895, "step": 14220 }, { "epoch": 5.15, "learning_rate": 4.936527234245994e-07, "loss": 8.7159, "step": 14240 }, { "epoch": 5.16, "learning_rate": 4.935729773295737e-07, "loss": 8.6975, "step": 14260 }, { "epoch": 5.16, "learning_rate": 4.934927399212354e-07, "loss": 8.7762, "step": 14280 }, { "epoch": 5.17, "learning_rate": 4.934120113614321e-07, "loss": 8.6464, "step": 14300 }, { "epoch": 5.18, "learning_rate": 4.933307918130022e-07, "loss": 8.6431, "step": 14320 }, { "epoch": 5.19, "learning_rate": 4.932490814397744e-07, "loss": 8.6835, "step": 14340 }, { "epoch": 5.19, "learning_rate": 4.931668804065674e-07, "loss": 8.7269, "step": 14360 }, { "epoch": 5.2, "learning_rate": 4.930841888791897e-07, "loss": 8.6963, "step": 14380 }, { "epoch": 5.21, "learning_rate": 4.93001007024439e-07, "loss": 8.7272, "step": 14400 }, { "epoch": 5.22, "learning_rate": 4.929173350101024e-07, "loss": 8.6172, "step": 14420 }, { "epoch": 5.22, "learning_rate": 4.928331730049555e-07, "loss": 8.6379, "step": 14440 }, { "epoch": 5.23, "learning_rate": 4.927485211787622e-07, "loss": 8.6049, "step": 14460 }, { "epoch": 5.24, "learning_rate": 4.926633797022744e-07, "loss": 8.7153, "step": 14480 }, { "epoch": 5.24, "learning_rate": 4.925777487472317e-07, "loss": 8.6468, "step": 14500 }, { "epoch": 5.25, "learning_rate": 4.924916284863614e-07, "loss": 8.7272, "step": 14520 }, { "epoch": 5.26, "learning_rate": 4.924050190933772e-07, "loss": 8.7109, "step": 14540 }, { "epoch": 5.27, "learning_rate": 4.923179207429798e-07, "loss": 8.7212, "step": 14560 }, { "epoch": 5.27, "learning_rate": 4.922303336108562e-07, "loss": 8.6957, "step": 14580 }, { "epoch": 5.28, "learning_rate": 4.92142257873679e-07, "loss": 8.7531, "step": 14600 }, { "epoch": 5.29, "learning_rate": 4.920536937091067e-07, "loss": 8.6634, "step": 14620 }, { "epoch": 5.29, "learning_rate": 4.919646412957829e-07, "loss": 8.687, "step": 14640 }, { "epoch": 5.3, "learning_rate": 4.918751008133362e-07, "loss": 8.7248, "step": 14660 }, { "epoch": 5.31, "learning_rate": 4.917850724423792e-07, "loss": 8.6262, "step": 14680 }, { "epoch": 5.32, "learning_rate": 4.916945563645093e-07, "loss": 8.7897, "step": 14700 }, { "epoch": 5.32, "learning_rate": 4.91603552762307e-07, "loss": 8.6599, "step": 14720 }, { "epoch": 5.33, "learning_rate": 4.915120618193368e-07, "loss": 8.6223, "step": 14740 }, { "epoch": 5.34, "learning_rate": 4.914200837201458e-07, "loss": 8.807, "step": 14760 }, { "epoch": 5.35, "learning_rate": 4.913276186502639e-07, "loss": 8.7852, "step": 14780 }, { "epoch": 5.35, "learning_rate": 4.912346667962032e-07, "loss": 8.6163, "step": 14800 }, { "epoch": 5.36, "learning_rate": 4.911412283454578e-07, "loss": 8.5919, "step": 14820 }, { "epoch": 5.37, "learning_rate": 4.910473034865032e-07, "loss": 8.6501, "step": 14840 }, { "epoch": 5.37, "learning_rate": 4.909528924087963e-07, "loss": 8.7825, "step": 14860 }, { "epoch": 5.38, "learning_rate": 4.908579953027743e-07, "loss": 8.7476, "step": 14880 }, { "epoch": 5.39, "learning_rate": 4.907626123598551e-07, "loss": 8.7136, "step": 14900 }, { "epoch": 5.4, "learning_rate": 4.906667437724366e-07, "loss": 8.6728, "step": 14920 }, { "epoch": 5.4, "learning_rate": 4.905703897338963e-07, "loss": 8.7006, "step": 14940 }, { "epoch": 5.41, "learning_rate": 4.904735504385906e-07, "loss": 8.68, "step": 14960 }, { "epoch": 5.42, "learning_rate": 4.903762260818551e-07, "loss": 8.5956, "step": 14980 }, { "epoch": 5.42, "learning_rate": 4.902784168600036e-07, "loss": 8.5962, "step": 15000 }, { "epoch": 5.43, "learning_rate": 4.90180122970328e-07, "loss": 8.7187, "step": 15020 }, { "epoch": 5.44, "learning_rate": 4.900813446110978e-07, "loss": 8.6258, "step": 15040 }, { "epoch": 5.45, "learning_rate": 4.899820819815598e-07, "loss": 8.6823, "step": 15060 }, { "epoch": 5.45, "learning_rate": 4.898823352819375e-07, "loss": 8.7023, "step": 15080 }, { "epoch": 5.46, "learning_rate": 4.897821047134309e-07, "loss": 8.7809, "step": 15100 }, { "epoch": 5.47, "learning_rate": 4.896813904782162e-07, "loss": 8.6179, "step": 15120 }, { "epoch": 5.48, "learning_rate": 4.895801927794448e-07, "loss": 8.7411, "step": 15140 }, { "epoch": 5.48, "learning_rate": 4.894785118212435e-07, "loss": 8.7115, "step": 15160 }, { "epoch": 5.49, "learning_rate": 4.893763478087139e-07, "loss": 8.6297, "step": 15180 }, { "epoch": 5.5, "learning_rate": 4.892737009479322e-07, "loss": 8.5999, "step": 15200 }, { "epoch": 5.5, "learning_rate": 4.891705714459482e-07, "loss": 8.7592, "step": 15220 }, { "epoch": 5.51, "learning_rate": 4.890669595107853e-07, "loss": 8.5834, "step": 15240 }, { "epoch": 5.52, "learning_rate": 4.889628653514402e-07, "loss": 8.6487, "step": 15260 }, { "epoch": 5.53, "learning_rate": 4.888582891778821e-07, "loss": 8.6445, "step": 15280 }, { "epoch": 5.53, "learning_rate": 4.887532312010527e-07, "loss": 8.697, "step": 15300 }, { "epoch": 5.54, "learning_rate": 4.886476916328654e-07, "loss": 8.6094, "step": 15320 }, { "epoch": 5.55, "learning_rate": 4.885416706862048e-07, "loss": 8.7348, "step": 15340 }, { "epoch": 5.56, "learning_rate": 4.88435168574927e-07, "loss": 8.7422, "step": 15360 }, { "epoch": 5.56, "learning_rate": 4.883281855138585e-07, "loss": 8.7982, "step": 15380 }, { "epoch": 5.57, "learning_rate": 4.882207217187954e-07, "loss": 8.64, "step": 15400 }, { "epoch": 5.58, "learning_rate": 4.881127774065044e-07, "loss": 8.74, "step": 15420 }, { "epoch": 5.58, "learning_rate": 4.880043527947205e-07, "loss": 8.6966, "step": 15440 }, { "epoch": 5.59, "learning_rate": 4.878954481021483e-07, "loss": 8.7114, "step": 15460 }, { "epoch": 5.6, "learning_rate": 4.877860635484606e-07, "loss": 8.6878, "step": 15480 }, { "epoch": 5.61, "learning_rate": 4.876761993542975e-07, "loss": 8.6922, "step": 15500 }, { "epoch": 5.61, "learning_rate": 4.875658557412676e-07, "loss": 8.6065, "step": 15520 }, { "epoch": 5.62, "learning_rate": 4.874550329319457e-07, "loss": 8.6935, "step": 15540 }, { "epoch": 5.63, "learning_rate": 4.873437311498736e-07, "loss": 8.766, "step": 15560 }, { "epoch": 5.63, "learning_rate": 4.872319506195592e-07, "loss": 8.6696, "step": 15580 }, { "epoch": 5.64, "learning_rate": 4.871196915664761e-07, "loss": 8.6916, "step": 15600 }, { "epoch": 5.65, "learning_rate": 4.870069542170629e-07, "loss": 8.6553, "step": 15620 }, { "epoch": 5.66, "learning_rate": 4.868937387987233e-07, "loss": 8.698, "step": 15640 }, { "epoch": 5.66, "learning_rate": 4.867800455398251e-07, "loss": 8.6451, "step": 15660 }, { "epoch": 5.67, "learning_rate": 4.866658746697001e-07, "loss": 8.6472, "step": 15680 }, { "epoch": 5.68, "learning_rate": 4.865512264186433e-07, "loss": 8.5794, "step": 15700 }, { "epoch": 5.69, "learning_rate": 4.864361010179128e-07, "loss": 8.6878, "step": 15720 }, { "epoch": 5.69, "learning_rate": 4.863204986997294e-07, "loss": 8.5777, "step": 15740 }, { "epoch": 5.7, "learning_rate": 4.862044196972751e-07, "loss": 8.724, "step": 15760 }, { "epoch": 5.71, "learning_rate": 4.860878642446943e-07, "loss": 8.7462, "step": 15780 }, { "epoch": 5.71, "learning_rate": 4.859708325770919e-07, "loss": 8.6529, "step": 15800 }, { "epoch": 5.72, "learning_rate": 4.858533249305336e-07, "loss": 8.628, "step": 15820 }, { "epoch": 5.73, "learning_rate": 4.857353415420452e-07, "loss": 8.6557, "step": 15840 }, { "epoch": 5.74, "learning_rate": 4.856168826496122e-07, "loss": 8.6378, "step": 15860 }, { "epoch": 5.74, "learning_rate": 4.854979484921789e-07, "loss": 8.6128, "step": 15880 }, { "epoch": 5.75, "learning_rate": 4.853785393096487e-07, "loss": 8.6342, "step": 15900 }, { "epoch": 5.76, "learning_rate": 4.852586553428828e-07, "loss": 8.623, "step": 15920 }, { "epoch": 5.76, "learning_rate": 4.851382968337004e-07, "loss": 8.6746, "step": 15940 }, { "epoch": 5.77, "learning_rate": 4.850174640248775e-07, "loss": 8.7209, "step": 15960 }, { "epoch": 5.78, "learning_rate": 4.848961571601475e-07, "loss": 8.676, "step": 15980 }, { "epoch": 5.79, "learning_rate": 4.847743764841993e-07, "loss": 8.6759, "step": 16000 }, { "epoch": 5.79, "learning_rate": 4.84652122242678e-07, "loss": 8.6827, "step": 16020 }, { "epoch": 5.8, "learning_rate": 4.845293946821836e-07, "loss": 8.6991, "step": 16040 }, { "epoch": 5.81, "learning_rate": 4.844061940502711e-07, "loss": 8.7061, "step": 16060 }, { "epoch": 5.82, "learning_rate": 4.842825205954495e-07, "loss": 8.6462, "step": 16080 }, { "epoch": 5.82, "learning_rate": 4.84158374567182e-07, "loss": 8.647, "step": 16100 }, { "epoch": 5.83, "learning_rate": 4.840337562158843e-07, "loss": 8.8409, "step": 16120 }, { "epoch": 5.84, "learning_rate": 4.839086657929256e-07, "loss": 8.6913, "step": 16140 }, { "epoch": 5.84, "learning_rate": 4.837831035506267e-07, "loss": 8.7391, "step": 16160 }, { "epoch": 5.85, "learning_rate": 4.836570697422605e-07, "loss": 8.7379, "step": 16180 }, { "epoch": 5.86, "learning_rate": 4.835305646220509e-07, "loss": 8.644, "step": 16200 }, { "epoch": 5.87, "learning_rate": 4.834035884451725e-07, "loss": 8.7277, "step": 16220 }, { "epoch": 5.87, "learning_rate": 4.832761414677502e-07, "loss": 8.6412, "step": 16240 }, { "epoch": 5.88, "learning_rate": 4.831482239468585e-07, "loss": 8.7398, "step": 16260 }, { "epoch": 5.89, "learning_rate": 4.83019836140521e-07, "loss": 8.754, "step": 16280 }, { "epoch": 5.9, "learning_rate": 4.828909783077099e-07, "loss": 8.6548, "step": 16300 }, { "epoch": 5.9, "learning_rate": 4.827616507083456e-07, "loss": 8.6114, "step": 16320 }, { "epoch": 5.91, "learning_rate": 4.826318536032958e-07, "loss": 8.6163, "step": 16340 }, { "epoch": 5.92, "learning_rate": 4.825015872543758e-07, "loss": 8.7388, "step": 16360 }, { "epoch": 5.92, "learning_rate": 4.823708519243467e-07, "loss": 8.7207, "step": 16380 }, { "epoch": 5.93, "learning_rate": 4.822396478769162e-07, "loss": 8.7599, "step": 16400 }, { "epoch": 5.94, "learning_rate": 4.821079753767371e-07, "loss": 8.7465, "step": 16420 }, { "epoch": 5.95, "learning_rate": 4.819758346894072e-07, "loss": 8.6565, "step": 16440 }, { "epoch": 5.95, "learning_rate": 4.818432260814688e-07, "loss": 8.6793, "step": 16460 }, { "epoch": 5.96, "learning_rate": 4.817101498204078e-07, "loss": 8.7599, "step": 16480 }, { "epoch": 5.97, "learning_rate": 4.815766061746537e-07, "loss": 8.7688, "step": 16500 }, { "epoch": 5.97, "learning_rate": 4.814425954135785e-07, "loss": 8.6962, "step": 16520 }, { "epoch": 5.98, "learning_rate": 4.813081178074968e-07, "loss": 8.7069, "step": 16540 }, { "epoch": 5.99, "learning_rate": 4.811731736276643e-07, "loss": 8.6498, "step": 16560 }, { "epoch": 6.0, "learning_rate": 4.810377631462785e-07, "loss": 8.6458, "step": 16580 }, { "epoch": 6.0, "learning_rate": 4.809018866364766e-07, "loss": 8.6952, "step": 16600 }, { "epoch": 6.01, "learning_rate": 4.80765544372337e-07, "loss": 8.6439, "step": 16620 }, { "epoch": 6.02, "learning_rate": 4.806287366288766e-07, "loss": 8.5903, "step": 16640 }, { "epoch": 6.03, "learning_rate": 4.804914636820516e-07, "loss": 8.7511, "step": 16660 }, { "epoch": 6.03, "learning_rate": 4.803537258087566e-07, "loss": 8.6639, "step": 16680 }, { "epoch": 6.04, "learning_rate": 4.802155232868239e-07, "loss": 8.9259, "step": 16700 }, { "epoch": 6.05, "learning_rate": 4.800768563950231e-07, "loss": 8.662, "step": 16720 }, { "epoch": 6.05, "learning_rate": 4.799377254130606e-07, "loss": 8.7333, "step": 16740 }, { "epoch": 6.06, "learning_rate": 4.797981306215784e-07, "loss": 8.6613, "step": 16760 }, { "epoch": 6.07, "learning_rate": 4.79658072302155e-07, "loss": 8.7945, "step": 16780 }, { "epoch": 6.08, "learning_rate": 4.795175507373028e-07, "loss": 8.6238, "step": 16800 }, { "epoch": 6.08, "learning_rate": 4.793765662104696e-07, "loss": 8.6998, "step": 16820 }, { "epoch": 6.09, "learning_rate": 4.792351190060363e-07, "loss": 8.7753, "step": 16840 }, { "epoch": 6.1, "learning_rate": 4.790932094093175e-07, "loss": 8.5856, "step": 16860 }, { "epoch": 6.1, "learning_rate": 4.789508377065603e-07, "loss": 8.6769, "step": 16880 }, { "epoch": 6.11, "learning_rate": 4.788080041849441e-07, "loss": 8.6106, "step": 16900 }, { "epoch": 6.12, "learning_rate": 4.786647091325796e-07, "loss": 8.6677, "step": 16920 }, { "epoch": 6.13, "learning_rate": 4.785209528385087e-07, "loss": 8.5975, "step": 16940 }, { "epoch": 6.13, "learning_rate": 4.783767355927033e-07, "loss": 8.7969, "step": 16960 }, { "epoch": 6.14, "learning_rate": 4.782320576860656e-07, "loss": 8.6307, "step": 16980 }, { "epoch": 6.15, "learning_rate": 4.780869194104268e-07, "loss": 8.5835, "step": 17000 }, { "epoch": 6.16, "learning_rate": 4.779413210585464e-07, "loss": 8.6421, "step": 17020 }, { "epoch": 6.16, "learning_rate": 4.777952629241122e-07, "loss": 8.7197, "step": 17040 }, { "epoch": 6.17, "learning_rate": 4.776487453017397e-07, "loss": 8.672, "step": 17060 }, { "epoch": 6.18, "learning_rate": 4.775017684869707e-07, "loss": 8.6878, "step": 17080 }, { "epoch": 6.18, "learning_rate": 4.773543327762737e-07, "loss": 8.6636, "step": 17100 }, { "epoch": 6.19, "learning_rate": 4.772064384670424e-07, "loss": 8.5946, "step": 17120 }, { "epoch": 6.2, "learning_rate": 4.77058085857596e-07, "loss": 8.7402, "step": 17140 }, { "epoch": 6.21, "learning_rate": 4.769092752471778e-07, "loss": 8.5723, "step": 17160 }, { "epoch": 6.21, "learning_rate": 4.7676000693595506e-07, "loss": 8.8408, "step": 17180 }, { "epoch": 6.22, "learning_rate": 4.766102812250183e-07, "loss": 8.723, "step": 17200 }, { "epoch": 6.23, "learning_rate": 4.764600984163808e-07, "loss": 8.6513, "step": 17220 }, { "epoch": 6.24, "learning_rate": 4.7630945881297746e-07, "loss": 8.6348, "step": 17240 }, { "epoch": 6.24, "learning_rate": 4.761583627186649e-07, "loss": 8.572, "step": 17260 }, { "epoch": 6.25, "learning_rate": 4.7600681043822044e-07, "loss": 8.7423, "step": 17280 }, { "epoch": 6.26, "learning_rate": 4.7585480227734163e-07, "loss": 8.6616, "step": 17300 }, { "epoch": 6.26, "learning_rate": 4.7570233854264564e-07, "loss": 8.7175, "step": 17320 }, { "epoch": 6.27, "learning_rate": 4.7554941954166826e-07, "loss": 8.7237, "step": 17340 }, { "epoch": 6.28, "learning_rate": 4.7539604558286395e-07, "loss": 8.8209, "step": 17360 }, { "epoch": 6.29, "learning_rate": 4.752422169756047e-07, "loss": 8.6999, "step": 17380 }, { "epoch": 6.29, "learning_rate": 4.7508793403017976e-07, "loss": 8.6965, "step": 17400 }, { "epoch": 6.3, "learning_rate": 4.749331970577946e-07, "loss": 8.6204, "step": 17420 }, { "epoch": 6.31, "learning_rate": 4.747780063705705e-07, "loss": 8.6613, "step": 17440 }, { "epoch": 6.31, "learning_rate": 4.7462236228154405e-07, "loss": 8.7295, "step": 17460 }, { "epoch": 6.32, "learning_rate": 4.744662651046666e-07, "loss": 8.6806, "step": 17480 }, { "epoch": 6.33, "learning_rate": 4.7430971515480304e-07, "loss": 8.6633, "step": 17500 }, { "epoch": 6.34, "learning_rate": 4.741527127477317e-07, "loss": 8.676, "step": 17520 }, { "epoch": 6.34, "learning_rate": 4.7399525820014376e-07, "loss": 8.6381, "step": 17540 }, { "epoch": 6.35, "learning_rate": 4.738373518296421e-07, "loss": 8.7071, "step": 17560 }, { "epoch": 6.36, "learning_rate": 4.7367899395474106e-07, "loss": 8.6584, "step": 17580 }, { "epoch": 6.37, "learning_rate": 4.7352018489486606e-07, "loss": 8.7591, "step": 17600 }, { "epoch": 6.37, "learning_rate": 4.7336092497035207e-07, "loss": 8.7067, "step": 17620 }, { "epoch": 6.38, "learning_rate": 4.732012145024439e-07, "loss": 8.6802, "step": 17640 }, { "epoch": 6.39, "learning_rate": 4.7304105381329484e-07, "loss": 8.9072, "step": 17660 }, { "epoch": 6.39, "learning_rate": 4.7288044322596663e-07, "loss": 8.6963, "step": 17680 }, { "epoch": 6.4, "learning_rate": 4.727193830644285e-07, "loss": 8.6309, "step": 17700 }, { "epoch": 6.41, "learning_rate": 4.725578736535562e-07, "loss": 8.6925, "step": 17720 }, { "epoch": 6.42, "learning_rate": 4.723959153191319e-07, "loss": 8.653, "step": 17740 }, { "epoch": 6.42, "learning_rate": 4.722335083878433e-07, "loss": 8.612, "step": 17760 }, { "epoch": 6.43, "learning_rate": 4.7207065318728296e-07, "loss": 8.7143, "step": 17780 }, { "epoch": 6.44, "learning_rate": 4.7190735004594753e-07, "loss": 8.6625, "step": 17800 }, { "epoch": 6.44, "learning_rate": 4.7174359929323735e-07, "loss": 8.6155, "step": 17820 }, { "epoch": 6.45, "learning_rate": 4.715794012594555e-07, "loss": 8.7971, "step": 17840 }, { "epoch": 6.46, "learning_rate": 4.7141475627580754e-07, "loss": 8.6821, "step": 17860 }, { "epoch": 6.47, "learning_rate": 4.712496646744002e-07, "loss": 8.6292, "step": 17880 }, { "epoch": 6.47, "learning_rate": 4.7108412678824134e-07, "loss": 8.6244, "step": 17900 }, { "epoch": 6.48, "learning_rate": 4.70918142951239e-07, "loss": 8.5691, "step": 17920 }, { "epoch": 6.49, "learning_rate": 4.7075171349820077e-07, "loss": 8.6863, "step": 17940 }, { "epoch": 6.5, "learning_rate": 4.705848387648329e-07, "loss": 8.6821, "step": 17960 }, { "epoch": 6.5, "learning_rate": 4.7041751908774007e-07, "loss": 8.6589, "step": 17980 }, { "epoch": 6.51, "learning_rate": 4.702497548044243e-07, "loss": 8.7016, "step": 18000 }, { "epoch": 6.52, "learning_rate": 4.700815462532845e-07, "loss": 8.725, "step": 18020 }, { "epoch": 6.52, "learning_rate": 4.699128937736157e-07, "loss": 8.6708, "step": 18040 }, { "epoch": 6.53, "learning_rate": 4.697437977056084e-07, "loss": 8.8955, "step": 18060 }, { "epoch": 6.54, "learning_rate": 4.695742583903478e-07, "loss": 8.7241, "step": 18080 }, { "epoch": 6.55, "learning_rate": 4.694042761698134e-07, "loss": 8.6222, "step": 18100 }, { "epoch": 6.55, "learning_rate": 4.692338513868776e-07, "loss": 8.7879, "step": 18120 }, { "epoch": 6.56, "learning_rate": 4.6906298438530604e-07, "loss": 8.6359, "step": 18140 }, { "epoch": 6.57, "learning_rate": 4.6889167550975613e-07, "loss": 8.6056, "step": 18160 }, { "epoch": 6.58, "learning_rate": 4.6871992510577644e-07, "loss": 8.6364, "step": 18180 }, { "epoch": 6.58, "learning_rate": 4.6854773351980647e-07, "loss": 8.6699, "step": 18200 }, { "epoch": 6.59, "learning_rate": 4.683751010991754e-07, "loss": 8.6423, "step": 18220 }, { "epoch": 6.6, "learning_rate": 4.682020281921017e-07, "loss": 8.7572, "step": 18240 }, { "epoch": 6.6, "learning_rate": 4.6802851514769227e-07, "loss": 8.7399, "step": 18260 }, { "epoch": 6.61, "learning_rate": 4.67854562315942e-07, "loss": 8.6589, "step": 18280 }, { "epoch": 6.62, "learning_rate": 4.6768017004773263e-07, "loss": 8.7953, "step": 18300 }, { "epoch": 6.63, "learning_rate": 4.6750533869483257e-07, "loss": 8.5799, "step": 18320 }, { "epoch": 6.63, "learning_rate": 4.6733006860989567e-07, "loss": 8.6455, "step": 18340 }, { "epoch": 6.64, "learning_rate": 4.6715436014646077e-07, "loss": 8.6309, "step": 18360 }, { "epoch": 6.65, "learning_rate": 4.669782136589512e-07, "loss": 8.6688, "step": 18380 }, { "epoch": 6.65, "learning_rate": 4.6680162950267356e-07, "loss": 8.6939, "step": 18400 }, { "epoch": 6.66, "learning_rate": 4.666246080338175e-07, "loss": 8.6871, "step": 18420 }, { "epoch": 6.67, "learning_rate": 4.6644714960945453e-07, "loss": 8.7004, "step": 18440 }, { "epoch": 6.68, "learning_rate": 4.662692545875378e-07, "loss": 8.6696, "step": 18460 }, { "epoch": 6.68, "learning_rate": 4.660909233269009e-07, "loss": 8.6115, "step": 18480 }, { "epoch": 6.69, "learning_rate": 4.6591215618725766e-07, "loss": 8.6684, "step": 18500 }, { "epoch": 6.7, "learning_rate": 4.657329535292007e-07, "loss": 8.7201, "step": 18520 }, { "epoch": 6.71, "learning_rate": 4.6555331571420155e-07, "loss": 8.6737, "step": 18540 }, { "epoch": 6.71, "learning_rate": 4.653732431046092e-07, "loss": 8.709, "step": 18560 }, { "epoch": 6.72, "learning_rate": 4.6519273606364984e-07, "loss": 8.5615, "step": 18580 }, { "epoch": 6.73, "learning_rate": 4.6501179495542585e-07, "loss": 8.6053, "step": 18600 }, { "epoch": 6.73, "learning_rate": 4.6483042014491527e-07, "loss": 8.6445, "step": 18620 }, { "epoch": 6.74, "learning_rate": 4.646486119979709e-07, "loss": 8.7607, "step": 18640 }, { "epoch": 6.75, "learning_rate": 4.6446637088131956e-07, "loss": 8.7539, "step": 18660 }, { "epoch": 6.76, "learning_rate": 4.642836971625616e-07, "loss": 8.6076, "step": 18680 }, { "epoch": 6.76, "learning_rate": 4.6410059121016984e-07, "loss": 8.7657, "step": 18700 }, { "epoch": 6.77, "learning_rate": 4.639170533934891e-07, "loss": 8.5961, "step": 18720 }, { "epoch": 6.78, "learning_rate": 4.6373308408273495e-07, "loss": 8.7081, "step": 18740 }, { "epoch": 6.78, "learning_rate": 4.635486836489938e-07, "loss": 8.8006, "step": 18760 }, { "epoch": 6.79, "learning_rate": 4.633638524642215e-07, "loss": 8.6963, "step": 18780 }, { "epoch": 6.8, "learning_rate": 4.631785909012426e-07, "loss": 8.6344, "step": 18800 }, { "epoch": 6.81, "learning_rate": 4.6299289933375007e-07, "loss": 8.7319, "step": 18820 }, { "epoch": 6.81, "learning_rate": 4.6280677813630397e-07, "loss": 8.7458, "step": 18840 }, { "epoch": 6.82, "learning_rate": 4.626202276843311e-07, "loss": 8.6465, "step": 18860 }, { "epoch": 6.83, "learning_rate": 4.624332483541242e-07, "loss": 8.5764, "step": 18880 }, { "epoch": 6.84, "learning_rate": 4.62245840522841e-07, "loss": 8.7452, "step": 18900 }, { "epoch": 6.84, "learning_rate": 4.6205800456850343e-07, "loss": 8.6529, "step": 18920 }, { "epoch": 6.85, "learning_rate": 4.618697408699973e-07, "loss": 8.6897, "step": 18940 }, { "epoch": 6.86, "learning_rate": 4.6168104980707103e-07, "loss": 8.7269, "step": 18960 }, { "epoch": 6.86, "learning_rate": 4.6149193176033505e-07, "loss": 8.6609, "step": 18980 }, { "epoch": 6.87, "learning_rate": 4.6130238711126123e-07, "loss": 8.6924, "step": 19000 }, { "epoch": 6.88, "learning_rate": 4.6111241624218166e-07, "loss": 8.7371, "step": 19020 }, { "epoch": 6.89, "learning_rate": 4.609220195362886e-07, "loss": 8.6188, "step": 19040 }, { "epoch": 6.89, "learning_rate": 4.607311973776328e-07, "loss": 8.7018, "step": 19060 }, { "epoch": 6.9, "learning_rate": 4.6053995015112343e-07, "loss": 8.5875, "step": 19080 }, { "epoch": 6.91, "learning_rate": 4.6034827824252715e-07, "loss": 8.7276, "step": 19100 }, { "epoch": 6.92, "learning_rate": 4.601561820384671e-07, "loss": 8.5814, "step": 19120 }, { "epoch": 6.92, "learning_rate": 4.5996366192642226e-07, "loss": 8.6103, "step": 19140 }, { "epoch": 6.93, "learning_rate": 4.597707182947268e-07, "loss": 8.6523, "step": 19160 }, { "epoch": 6.94, "learning_rate": 4.595773515325691e-07, "loss": 8.6586, "step": 19180 }, { "epoch": 6.94, "learning_rate": 4.593835620299911e-07, "loss": 8.6554, "step": 19200 }, { "epoch": 6.95, "learning_rate": 4.5918935017788724e-07, "loss": 8.6933, "step": 19220 }, { "epoch": 6.96, "learning_rate": 4.589947163680041e-07, "loss": 8.6822, "step": 19240 }, { "epoch": 6.97, "learning_rate": 4.5879966099293955e-07, "loss": 8.7641, "step": 19260 }, { "epoch": 6.97, "learning_rate": 4.5860418444614133e-07, "loss": 8.6162, "step": 19280 }, { "epoch": 6.98, "learning_rate": 4.5840828712190717e-07, "loss": 8.6388, "step": 19300 }, { "epoch": 6.99, "learning_rate": 4.5821196941538334e-07, "loss": 8.8059, "step": 19320 }, { "epoch": 6.99, "learning_rate": 4.580152317225641e-07, "loss": 8.6918, "step": 19340 }, { "epoch": 7.0, "learning_rate": 4.5781807444029075e-07, "loss": 8.6036, "step": 19360 }, { "epoch": 7.01, "learning_rate": 4.5762049796625124e-07, "loss": 8.697, "step": 19380 }, { "epoch": 7.02, "learning_rate": 4.5742250269897884e-07, "loss": 8.65, "step": 19400 }, { "epoch": 7.02, "learning_rate": 4.572240890378517e-07, "loss": 8.6848, "step": 19420 }, { "epoch": 7.03, "learning_rate": 4.570252573830918e-07, "loss": 8.7286, "step": 19440 }, { "epoch": 7.04, "learning_rate": 4.568260081357643e-07, "loss": 8.5952, "step": 19460 }, { "epoch": 7.05, "learning_rate": 4.5662634169777674e-07, "loss": 8.6095, "step": 19480 }, { "epoch": 7.05, "learning_rate": 4.5642625847187813e-07, "loss": 8.7558, "step": 19500 }, { "epoch": 7.06, "learning_rate": 4.5622575886165826e-07, "loss": 8.6469, "step": 19520 }, { "epoch": 7.07, "learning_rate": 4.5602484327154666e-07, "loss": 8.6547, "step": 19540 }, { "epoch": 7.07, "learning_rate": 4.55823512106812e-07, "loss": 8.6466, "step": 19560 }, { "epoch": 7.08, "learning_rate": 4.5562176577356146e-07, "loss": 8.7095, "step": 19580 }, { "epoch": 7.09, "learning_rate": 4.554196046787392e-07, "loss": 8.7402, "step": 19600 }, { "epoch": 7.1, "learning_rate": 4.552170292301264e-07, "loss": 8.6959, "step": 19620 }, { "epoch": 7.1, "learning_rate": 4.550140398363398e-07, "loss": 8.6991, "step": 19640 }, { "epoch": 7.11, "learning_rate": 4.5481063690683116e-07, "loss": 8.6932, "step": 19660 }, { "epoch": 7.12, "learning_rate": 4.546068208518865e-07, "loss": 8.702, "step": 19680 }, { "epoch": 7.12, "learning_rate": 4.5440259208262497e-07, "loss": 8.7574, "step": 19700 }, { "epoch": 7.13, "learning_rate": 4.5419795101099847e-07, "loss": 8.7992, "step": 19720 }, { "epoch": 7.14, "learning_rate": 4.539928980497902e-07, "loss": 8.7671, "step": 19740 }, { "epoch": 7.15, "learning_rate": 4.537874336126146e-07, "loss": 8.7184, "step": 19760 }, { "epoch": 7.15, "learning_rate": 4.535815581139157e-07, "loss": 8.6308, "step": 19780 }, { "epoch": 7.16, "learning_rate": 4.5337527196896715e-07, "loss": 8.7107, "step": 19800 }, { "epoch": 7.17, "learning_rate": 4.5316857559387036e-07, "loss": 8.6339, "step": 19820 }, { "epoch": 7.18, "learning_rate": 4.529614694055546e-07, "loss": 8.676, "step": 19840 }, { "epoch": 7.18, "learning_rate": 4.527539538217757e-07, "loss": 8.686, "step": 19860 }, { "epoch": 7.19, "learning_rate": 4.5254602926111533e-07, "loss": 8.7237, "step": 19880 }, { "epoch": 7.2, "learning_rate": 4.5233769614298e-07, "loss": 8.6244, "step": 19900 }, { "epoch": 7.2, "learning_rate": 4.521289548876003e-07, "loss": 8.6432, "step": 19920 }, { "epoch": 7.21, "learning_rate": 4.519198059160303e-07, "loss": 8.6141, "step": 19940 }, { "epoch": 7.22, "learning_rate": 4.517102496501462e-07, "loss": 8.6889, "step": 19960 }, { "epoch": 7.23, "learning_rate": 4.5150028651264596e-07, "loss": 8.5727, "step": 19980 }, { "epoch": 7.23, "learning_rate": 4.512899169270481e-07, "loss": 8.6158, "step": 20000 }, { "epoch": 7.24, "learning_rate": 4.510791413176912e-07, "loss": 8.7124, "step": 20020 }, { "epoch": 7.25, "learning_rate": 4.508679601097326e-07, "loss": 8.6239, "step": 20040 }, { "epoch": 7.25, "learning_rate": 4.5065637372914784e-07, "loss": 8.7528, "step": 20060 }, { "epoch": 7.26, "learning_rate": 4.504443826027298e-07, "loss": 8.6065, "step": 20080 }, { "epoch": 7.27, "learning_rate": 4.5023198715808783e-07, "loss": 8.7053, "step": 20100 }, { "epoch": 7.28, "learning_rate": 4.5001918782364665e-07, "loss": 8.6622, "step": 20120 }, { "epoch": 7.28, "learning_rate": 4.498059850286459e-07, "loss": 8.6676, "step": 20140 }, { "epoch": 7.29, "learning_rate": 4.4959237920313877e-07, "loss": 8.6896, "step": 20160 }, { "epoch": 7.3, "learning_rate": 4.493783707779916e-07, "loss": 8.7231, "step": 20180 }, { "epoch": 7.31, "learning_rate": 4.491639601848828e-07, "loss": 8.6187, "step": 20200 }, { "epoch": 7.31, "learning_rate": 4.489491478563019e-07, "loss": 8.7279, "step": 20220 }, { "epoch": 7.32, "learning_rate": 4.4873393422554894e-07, "loss": 8.6275, "step": 20240 }, { "epoch": 7.33, "learning_rate": 4.4851831972673324e-07, "loss": 8.6786, "step": 20260 }, { "epoch": 7.33, "learning_rate": 4.483023047947729e-07, "loss": 8.679, "step": 20280 }, { "epoch": 7.34, "learning_rate": 4.4808588986539355e-07, "loss": 8.6271, "step": 20300 }, { "epoch": 7.35, "learning_rate": 4.478690753751278e-07, "loss": 8.6284, "step": 20320 }, { "epoch": 7.36, "learning_rate": 4.476518617613142e-07, "loss": 8.7289, "step": 20340 }, { "epoch": 7.36, "learning_rate": 4.4743424946209627e-07, "loss": 8.7282, "step": 20360 }, { "epoch": 7.37, "learning_rate": 4.4721623891642185e-07, "loss": 8.709, "step": 20380 }, { "epoch": 7.38, "learning_rate": 4.46997830564042e-07, "loss": 8.8007, "step": 20400 }, { "epoch": 7.39, "learning_rate": 4.4677902484551023e-07, "loss": 8.7284, "step": 20420 }, { "epoch": 7.39, "learning_rate": 4.4655982220218167e-07, "loss": 8.6282, "step": 20440 }, { "epoch": 7.4, "learning_rate": 4.463402230762119e-07, "loss": 8.7566, "step": 20460 }, { "epoch": 7.41, "learning_rate": 4.461202279105565e-07, "loss": 8.7809, "step": 20480 }, { "epoch": 7.41, "learning_rate": 4.458998371489695e-07, "loss": 8.6693, "step": 20500 }, { "epoch": 7.42, "learning_rate": 4.4567905123600345e-07, "loss": 8.6117, "step": 20520 }, { "epoch": 7.43, "learning_rate": 4.4545787061700746e-07, "loss": 8.5389, "step": 20540 }, { "epoch": 7.44, "learning_rate": 4.4523629573812705e-07, "loss": 8.6785, "step": 20560 }, { "epoch": 7.44, "learning_rate": 4.45014327046303e-07, "loss": 8.6557, "step": 20580 }, { "epoch": 7.45, "learning_rate": 4.447919649892704e-07, "loss": 8.7228, "step": 20600 }, { "epoch": 7.46, "learning_rate": 4.445692100155579e-07, "loss": 8.661, "step": 20620 }, { "epoch": 7.46, "learning_rate": 4.443460625744865e-07, "loss": 8.5727, "step": 20640 }, { "epoch": 7.47, "learning_rate": 4.44122523116169e-07, "loss": 8.6239, "step": 20660 }, { "epoch": 7.48, "learning_rate": 4.438985920915089e-07, "loss": 8.62, "step": 20680 }, { "epoch": 7.49, "learning_rate": 4.436742699521997e-07, "loss": 8.7129, "step": 20700 }, { "epoch": 7.49, "learning_rate": 4.434495571507234e-07, "loss": 8.6496, "step": 20720 }, { "epoch": 7.5, "learning_rate": 4.432244541403506e-07, "loss": 8.7242, "step": 20740 }, { "epoch": 7.51, "learning_rate": 4.4299896137513837e-07, "loss": 8.7956, "step": 20760 }, { "epoch": 7.52, "learning_rate": 4.4277307930993045e-07, "loss": 8.7197, "step": 20780 }, { "epoch": 7.52, "learning_rate": 4.4254680840035554e-07, "loss": 8.8465, "step": 20800 }, { "epoch": 7.53, "learning_rate": 4.423201491028269e-07, "loss": 8.6558, "step": 20820 }, { "epoch": 7.54, "learning_rate": 4.42093101874541e-07, "loss": 8.7996, "step": 20840 }, { "epoch": 7.54, "learning_rate": 4.4186566717347693e-07, "loss": 8.8602, "step": 20860 }, { "epoch": 7.55, "learning_rate": 4.4163784545839543e-07, "loss": 8.5926, "step": 20880 }, { "epoch": 7.56, "learning_rate": 4.414096371888377e-07, "loss": 8.6053, "step": 20900 }, { "epoch": 7.57, "learning_rate": 4.411810428251248e-07, "loss": 8.6233, "step": 20920 }, { "epoch": 7.57, "learning_rate": 4.409520628283565e-07, "loss": 8.7897, "step": 20940 }, { "epoch": 7.58, "learning_rate": 4.407226976604105e-07, "loss": 8.6926, "step": 20960 }, { "epoch": 7.59, "learning_rate": 4.404929477839414e-07, "loss": 8.6766, "step": 20980 }, { "epoch": 7.59, "learning_rate": 4.402628136623798e-07, "loss": 8.6537, "step": 21000 }, { "epoch": 7.6, "learning_rate": 4.400322957599314e-07, "loss": 8.697, "step": 21020 }, { "epoch": 7.61, "learning_rate": 4.3980139454157607e-07, "loss": 8.6, "step": 21040 }, { "epoch": 7.62, "learning_rate": 4.3957011047306656e-07, "loss": 8.7509, "step": 21060 }, { "epoch": 7.62, "learning_rate": 4.393384440209284e-07, "loss": 8.8166, "step": 21080 }, { "epoch": 7.63, "learning_rate": 4.39106395652458e-07, "loss": 8.7277, "step": 21100 }, { "epoch": 7.64, "learning_rate": 4.3887396583572225e-07, "loss": 8.6775, "step": 21120 }, { "epoch": 7.65, "learning_rate": 4.386411550395575e-07, "loss": 8.637, "step": 21140 }, { "epoch": 7.65, "learning_rate": 4.3840796373356864e-07, "loss": 8.6908, "step": 21160 }, { "epoch": 7.66, "learning_rate": 4.381743923881279e-07, "loss": 8.6484, "step": 21180 }, { "epoch": 7.67, "learning_rate": 4.3794044147437437e-07, "loss": 8.6396, "step": 21200 }, { "epoch": 7.67, "learning_rate": 4.377061114642125e-07, "loss": 8.7759, "step": 21220 }, { "epoch": 7.68, "learning_rate": 4.3747140283031153e-07, "loss": 8.6919, "step": 21240 }, { "epoch": 7.69, "learning_rate": 4.3723631604610447e-07, "loss": 8.6463, "step": 21260 }, { "epoch": 7.7, "learning_rate": 4.3700085158578694e-07, "loss": 8.8272, "step": 21280 }, { "epoch": 7.7, "learning_rate": 4.367650099243166e-07, "loss": 8.6321, "step": 21300 }, { "epoch": 7.71, "learning_rate": 4.365287915374118e-07, "loss": 8.6541, "step": 21320 }, { "epoch": 7.72, "learning_rate": 4.362921969015509e-07, "loss": 8.6122, "step": 21340 }, { "epoch": 7.73, "learning_rate": 4.360552264939712e-07, "loss": 8.6558, "step": 21360 }, { "epoch": 7.73, "learning_rate": 4.358178807926677e-07, "loss": 8.7851, "step": 21380 }, { "epoch": 7.74, "learning_rate": 4.355801602763927e-07, "loss": 8.7331, "step": 21400 }, { "epoch": 7.75, "learning_rate": 4.353420654246546e-07, "loss": 8.6654, "step": 21420 }, { "epoch": 7.75, "learning_rate": 4.3510359671771647e-07, "loss": 8.6424, "step": 21440 }, { "epoch": 7.76, "learning_rate": 4.3486475463659593e-07, "loss": 8.6382, "step": 21460 }, { "epoch": 7.77, "learning_rate": 4.3462553966306357e-07, "loss": 8.7952, "step": 21480 }, { "epoch": 7.78, "learning_rate": 4.3438595227964205e-07, "loss": 8.6571, "step": 21500 }, { "epoch": 7.78, "learning_rate": 4.341459929696054e-07, "loss": 8.615, "step": 21520 }, { "epoch": 7.79, "learning_rate": 4.3390566221697764e-07, "loss": 8.6564, "step": 21540 }, { "epoch": 7.8, "learning_rate": 4.3366496050653235e-07, "loss": 8.7334, "step": 21560 }, { "epoch": 7.8, "learning_rate": 4.3342388832379094e-07, "loss": 8.5915, "step": 21580 }, { "epoch": 7.81, "learning_rate": 4.3318244615502254e-07, "loss": 8.7452, "step": 21600 }, { "epoch": 7.82, "learning_rate": 4.329406344872423e-07, "loss": 8.62, "step": 21620 }, { "epoch": 7.83, "learning_rate": 4.326984538082108e-07, "loss": 8.6302, "step": 21640 }, { "epoch": 7.83, "learning_rate": 4.3245590460643293e-07, "loss": 8.6475, "step": 21660 }, { "epoch": 7.84, "learning_rate": 4.32212987371157e-07, "loss": 8.771, "step": 21680 }, { "epoch": 7.85, "learning_rate": 4.3196970259237355e-07, "loss": 8.6497, "step": 21700 }, { "epoch": 7.86, "learning_rate": 4.3172605076081456e-07, "loss": 8.8053, "step": 21720 }, { "epoch": 7.86, "learning_rate": 4.3148203236795234e-07, "loss": 8.6689, "step": 21740 }, { "epoch": 7.87, "learning_rate": 4.312376479059988e-07, "loss": 8.6764, "step": 21760 }, { "epoch": 7.88, "learning_rate": 4.3099289786790405e-07, "loss": 8.6942, "step": 21780 }, { "epoch": 7.88, "learning_rate": 4.307477827473556e-07, "loss": 8.7068, "step": 21800 }, { "epoch": 7.89, "learning_rate": 4.305023030387775e-07, "loss": 8.6783, "step": 21820 }, { "epoch": 7.9, "learning_rate": 4.302564592373292e-07, "loss": 8.7257, "step": 21840 }, { "epoch": 7.91, "learning_rate": 4.300102518389044e-07, "loss": 8.7192, "step": 21860 }, { "epoch": 7.91, "learning_rate": 4.2976368134013033e-07, "loss": 8.7052, "step": 21880 }, { "epoch": 7.92, "learning_rate": 4.295167482383667e-07, "loss": 8.687, "step": 21900 }, { "epoch": 7.93, "learning_rate": 4.292694530317046e-07, "loss": 8.6849, "step": 21920 }, { "epoch": 7.93, "learning_rate": 4.2902179621896534e-07, "loss": 8.6997, "step": 21940 }, { "epoch": 7.94, "learning_rate": 4.2877377829969983e-07, "loss": 8.6191, "step": 21960 }, { "epoch": 7.95, "learning_rate": 4.2852539977418745e-07, "loss": 8.6416, "step": 21980 }, { "epoch": 7.96, "learning_rate": 4.2827666114343463e-07, "loss": 8.6274, "step": 22000 }, { "epoch": 7.96, "learning_rate": 4.2802756290917446e-07, "loss": 8.6461, "step": 22020 }, { "epoch": 7.97, "learning_rate": 4.2777810557386534e-07, "loss": 8.6694, "step": 22040 }, { "epoch": 7.98, "learning_rate": 4.2752828964068996e-07, "loss": 8.6447, "step": 22060 }, { "epoch": 7.99, "learning_rate": 4.2727811561355423e-07, "loss": 8.7354, "step": 22080 }, { "epoch": 7.99, "learning_rate": 4.2702758399708674e-07, "loss": 8.7295, "step": 22100 }, { "epoch": 8.0, "learning_rate": 4.2677669529663686e-07, "loss": 8.6513, "step": 22120 }, { "epoch": 8.01, "learning_rate": 4.2652545001827474e-07, "loss": 8.5954, "step": 22140 }, { "epoch": 8.01, "learning_rate": 4.262738486687895e-07, "loss": 8.6644, "step": 22160 }, { "epoch": 8.02, "learning_rate": 4.2602189175568847e-07, "loss": 8.6703, "step": 22180 }, { "epoch": 8.03, "learning_rate": 4.2576957978719636e-07, "loss": 8.7068, "step": 22200 }, { "epoch": 8.04, "learning_rate": 4.255169132722539e-07, "loss": 8.7908, "step": 22220 }, { "epoch": 8.04, "learning_rate": 4.252638927205172e-07, "loss": 8.7085, "step": 22240 }, { "epoch": 8.05, "learning_rate": 4.250105186423563e-07, "loss": 8.6468, "step": 22260 }, { "epoch": 8.06, "learning_rate": 4.2475679154885443e-07, "loss": 8.6714, "step": 22280 }, { "epoch": 8.07, "learning_rate": 4.2450271195180675e-07, "loss": 8.7756, "step": 22300 }, { "epoch": 8.07, "learning_rate": 4.242482803637197e-07, "loss": 8.8086, "step": 22320 }, { "epoch": 8.08, "learning_rate": 4.2399349729780954e-07, "loss": 8.7139, "step": 22340 }, { "epoch": 8.09, "learning_rate": 4.237383632680015e-07, "loss": 8.7571, "step": 22360 }, { "epoch": 8.09, "learning_rate": 4.2348287878892896e-07, "loss": 8.6348, "step": 22380 }, { "epoch": 8.1, "learning_rate": 4.232270443759319e-07, "loss": 8.7034, "step": 22400 }, { "epoch": 8.11, "learning_rate": 4.2297086054505626e-07, "loss": 8.6211, "step": 22420 }, { "epoch": 8.12, "learning_rate": 4.2271432781305293e-07, "loss": 8.578, "step": 22440 }, { "epoch": 8.12, "learning_rate": 4.224574466973765e-07, "loss": 8.6008, "step": 22460 }, { "epoch": 8.13, "learning_rate": 4.222002177161841e-07, "loss": 8.6203, "step": 22480 }, { "epoch": 8.14, "learning_rate": 4.219426413883348e-07, "loss": 8.7373, "step": 22500 }, { "epoch": 8.14, "learning_rate": 4.216847182333881e-07, "loss": 8.712, "step": 22520 }, { "epoch": 8.15, "learning_rate": 4.2142644877160325e-07, "loss": 8.6409, "step": 22540 }, { "epoch": 8.16, "learning_rate": 4.2116783352393803e-07, "loss": 8.7292, "step": 22560 }, { "epoch": 8.17, "learning_rate": 4.2090887301204763e-07, "loss": 8.602, "step": 22580 }, { "epoch": 8.17, "learning_rate": 4.2064956775828366e-07, "loss": 8.8186, "step": 22600 }, { "epoch": 8.18, "learning_rate": 4.203899182856932e-07, "loss": 8.6242, "step": 22620 }, { "epoch": 8.19, "learning_rate": 4.201299251180176e-07, "loss": 8.7123, "step": 22640 }, { "epoch": 8.2, "learning_rate": 4.198695887796914e-07, "loss": 8.8066, "step": 22660 }, { "epoch": 8.2, "learning_rate": 4.1960890979584155e-07, "loss": 8.6635, "step": 22680 }, { "epoch": 8.21, "learning_rate": 4.1934788869228603e-07, "loss": 8.6928, "step": 22700 }, { "epoch": 8.22, "learning_rate": 4.1908652599553293e-07, "loss": 8.6065, "step": 22720 }, { "epoch": 8.22, "learning_rate": 4.188248222327794e-07, "loss": 8.651, "step": 22740 }, { "epoch": 8.23, "learning_rate": 4.1856277793191044e-07, "loss": 8.5583, "step": 22760 }, { "epoch": 8.24, "learning_rate": 4.1830039362149807e-07, "loss": 8.6621, "step": 22780 }, { "epoch": 8.25, "learning_rate": 4.1803766983080006e-07, "loss": 8.7382, "step": 22800 }, { "epoch": 8.25, "learning_rate": 4.177746070897592e-07, "loss": 8.6508, "step": 22820 }, { "epoch": 8.26, "learning_rate": 4.1751120592900156e-07, "loss": 8.6554, "step": 22840 }, { "epoch": 8.27, "learning_rate": 4.1724746687983623e-07, "loss": 8.6529, "step": 22860 }, { "epoch": 8.27, "learning_rate": 4.169833904742537e-07, "loss": 8.6613, "step": 22880 }, { "epoch": 8.28, "learning_rate": 4.1671897724492475e-07, "loss": 8.7068, "step": 22900 }, { "epoch": 8.29, "learning_rate": 4.164542277252e-07, "loss": 8.7057, "step": 22920 }, { "epoch": 8.3, "learning_rate": 4.1618914244910797e-07, "loss": 8.6056, "step": 22940 }, { "epoch": 8.3, "learning_rate": 4.159237219513547e-07, "loss": 8.6663, "step": 22960 }, { "epoch": 8.31, "learning_rate": 4.1565796676732237e-07, "loss": 8.838, "step": 22980 }, { "epoch": 8.32, "learning_rate": 4.153918774330682e-07, "loss": 8.6266, "step": 23000 }, { "epoch": 8.33, "learning_rate": 4.151254544853234e-07, "loss": 8.6716, "step": 23020 }, { "epoch": 8.33, "learning_rate": 4.1485869846149233e-07, "loss": 8.8722, "step": 23040 }, { "epoch": 8.34, "learning_rate": 4.1459160989965087e-07, "loss": 8.7373, "step": 23060 }, { "epoch": 8.35, "learning_rate": 4.1432418933854586e-07, "loss": 8.5846, "step": 23080 }, { "epoch": 8.35, "learning_rate": 4.140564373175939e-07, "loss": 8.6194, "step": 23100 }, { "epoch": 8.36, "learning_rate": 4.1378835437687996e-07, "loss": 8.6622, "step": 23120 }, { "epoch": 8.37, "learning_rate": 4.135199410571567e-07, "loss": 8.6823, "step": 23140 }, { "epoch": 8.38, "learning_rate": 4.132511978998432e-07, "loss": 8.6646, "step": 23160 }, { "epoch": 8.38, "learning_rate": 4.1298212544702356e-07, "loss": 8.7136, "step": 23180 }, { "epoch": 8.39, "learning_rate": 4.1271272424144645e-07, "loss": 8.6156, "step": 23200 }, { "epoch": 8.4, "learning_rate": 4.124429948265235e-07, "loss": 8.6579, "step": 23220 }, { "epoch": 8.41, "learning_rate": 4.1217293774632844e-07, "loss": 8.5948, "step": 23240 }, { "epoch": 8.41, "learning_rate": 4.11902553545596e-07, "loss": 8.5581, "step": 23260 }, { "epoch": 8.42, "learning_rate": 4.116318427697205e-07, "loss": 8.7029, "step": 23280 }, { "epoch": 8.43, "learning_rate": 4.113608059647552e-07, "loss": 8.6475, "step": 23300 }, { "epoch": 8.43, "learning_rate": 4.1108944367741105e-07, "loss": 8.6973, "step": 23320 }, { "epoch": 8.44, "learning_rate": 4.108177564550554e-07, "loss": 8.6234, "step": 23340 }, { "epoch": 8.45, "learning_rate": 4.1054574484571105e-07, "loss": 8.7258, "step": 23360 }, { "epoch": 8.46, "learning_rate": 4.10273409398055e-07, "loss": 8.648, "step": 23380 }, { "epoch": 8.46, "learning_rate": 4.100007506614178e-07, "loss": 8.5639, "step": 23400 }, { "epoch": 8.47, "learning_rate": 4.097277691857819e-07, "loss": 8.7568, "step": 23420 }, { "epoch": 8.48, "learning_rate": 4.094544655217807e-07, "loss": 8.818, "step": 23440 }, { "epoch": 8.48, "learning_rate": 4.091808402206976e-07, "loss": 8.6586, "step": 23460 }, { "epoch": 8.49, "learning_rate": 4.0890689383446476e-07, "loss": 8.7856, "step": 23480 }, { "epoch": 8.5, "learning_rate": 4.086326269156618e-07, "loss": 8.5939, "step": 23500 }, { "epoch": 8.51, "learning_rate": 4.083580400175153e-07, "loss": 8.647, "step": 23520 }, { "epoch": 8.51, "learning_rate": 4.0808313369389693e-07, "loss": 8.6309, "step": 23540 }, { "epoch": 8.52, "learning_rate": 4.078079084993227e-07, "loss": 8.827, "step": 23560 }, { "epoch": 8.53, "learning_rate": 4.0753236498895215e-07, "loss": 8.7282, "step": 23580 }, { "epoch": 8.54, "learning_rate": 4.0725650371858646e-07, "loss": 8.6681, "step": 23600 }, { "epoch": 8.54, "learning_rate": 4.069803252446679e-07, "loss": 8.6488, "step": 23620 }, { "epoch": 8.55, "learning_rate": 4.0670383012427877e-07, "loss": 8.6818, "step": 23640 }, { "epoch": 8.56, "learning_rate": 4.0642701891513996e-07, "loss": 8.8065, "step": 23660 }, { "epoch": 8.56, "learning_rate": 4.0614989217560983e-07, "loss": 8.8094, "step": 23680 }, { "epoch": 8.57, "learning_rate": 4.058724504646834e-07, "loss": 8.7127, "step": 23700 }, { "epoch": 8.58, "learning_rate": 4.0559469434199077e-07, "loss": 8.6676, "step": 23720 }, { "epoch": 8.59, "learning_rate": 4.0531662436779654e-07, "loss": 8.7212, "step": 23740 }, { "epoch": 8.59, "learning_rate": 4.050382411029981e-07, "loss": 8.721, "step": 23760 }, { "epoch": 8.6, "learning_rate": 4.0475954510912513e-07, "loss": 8.6397, "step": 23780 }, { "epoch": 8.61, "learning_rate": 4.044805369483377e-07, "loss": 8.6683, "step": 23800 }, { "epoch": 8.61, "learning_rate": 4.0420121718342583e-07, "loss": 8.6248, "step": 23820 }, { "epoch": 8.62, "learning_rate": 4.0392158637780794e-07, "loss": 8.5869, "step": 23840 }, { "epoch": 8.63, "learning_rate": 4.0364164509553e-07, "loss": 8.6588, "step": 23860 }, { "epoch": 8.64, "learning_rate": 4.0336139390126424e-07, "loss": 8.692, "step": 23880 }, { "epoch": 8.64, "learning_rate": 4.0308083336030784e-07, "loss": 8.6193, "step": 23900 }, { "epoch": 8.65, "learning_rate": 4.027999640385821e-07, "loss": 8.6798, "step": 23920 }, { "epoch": 8.66, "learning_rate": 4.0251878650263107e-07, "loss": 8.7117, "step": 23940 }, { "epoch": 8.67, "learning_rate": 4.022373013196206e-07, "loss": 8.6933, "step": 23960 }, { "epoch": 8.67, "learning_rate": 4.019555090573372e-07, "loss": 8.5723, "step": 23980 }, { "epoch": 8.68, "learning_rate": 4.0167341028418655e-07, "loss": 8.6345, "step": 24000 }, { "epoch": 8.69, "learning_rate": 4.0139100556919266e-07, "loss": 8.6287, "step": 24020 }, { "epoch": 8.69, "learning_rate": 4.0110829548199667e-07, "loss": 8.621, "step": 24040 }, { "epoch": 8.7, "learning_rate": 4.008252805928559e-07, "loss": 8.7364, "step": 24060 }, { "epoch": 8.71, "learning_rate": 4.005419614726421e-07, "loss": 8.7642, "step": 24080 }, { "epoch": 8.72, "learning_rate": 4.00258338692841e-07, "loss": 8.6653, "step": 24100 }, { "epoch": 8.72, "learning_rate": 3.999744128255508e-07, "loss": 8.6865, "step": 24120 }, { "epoch": 8.73, "learning_rate": 3.996901844434809e-07, "loss": 8.6638, "step": 24140 }, { "epoch": 8.74, "learning_rate": 3.994056541199511e-07, "loss": 8.6719, "step": 24160 }, { "epoch": 8.75, "learning_rate": 3.9912082242889e-07, "loss": 8.6338, "step": 24180 }, { "epoch": 8.75, "learning_rate": 3.988356899448344e-07, "loss": 8.7657, "step": 24200 }, { "epoch": 8.76, "learning_rate": 3.9855025724292763e-07, "loss": 8.6599, "step": 24220 }, { "epoch": 8.77, "learning_rate": 3.982645248989186e-07, "loss": 8.5804, "step": 24240 }, { "epoch": 8.77, "learning_rate": 3.9797849348916074e-07, "loss": 8.7593, "step": 24260 }, { "epoch": 8.78, "learning_rate": 3.9769216359061063e-07, "loss": 8.7266, "step": 24280 }, { "epoch": 8.79, "learning_rate": 3.974055357808269e-07, "loss": 8.683, "step": 24300 }, { "epoch": 8.8, "learning_rate": 3.971186106379693e-07, "loss": 8.7335, "step": 24320 }, { "epoch": 8.8, "learning_rate": 3.968313887407971e-07, "loss": 8.7124, "step": 24340 }, { "epoch": 8.81, "learning_rate": 3.9654387066866833e-07, "loss": 8.6624, "step": 24360 }, { "epoch": 8.82, "learning_rate": 3.962560570015383e-07, "loss": 8.6476, "step": 24380 }, { "epoch": 8.82, "learning_rate": 3.9596794831995863e-07, "loss": 8.6399, "step": 24400 }, { "epoch": 8.83, "learning_rate": 3.9567954520507594e-07, "loss": 8.6308, "step": 24420 }, { "epoch": 8.84, "learning_rate": 3.953908482386311e-07, "loss": 8.7086, "step": 24440 }, { "epoch": 8.85, "learning_rate": 3.9510185800295715e-07, "loss": 8.7306, "step": 24460 }, { "epoch": 8.85, "learning_rate": 3.94812575080979e-07, "loss": 8.688, "step": 24480 }, { "epoch": 8.86, "learning_rate": 3.9452300005621206e-07, "loss": 8.7237, "step": 24500 }, { "epoch": 8.87, "learning_rate": 3.9423313351276075e-07, "loss": 8.7392, "step": 24520 }, { "epoch": 8.88, "learning_rate": 3.9394297603531756e-07, "loss": 8.6499, "step": 24540 }, { "epoch": 8.88, "learning_rate": 3.9365252820916186e-07, "loss": 8.6272, "step": 24560 }, { "epoch": 8.89, "learning_rate": 3.933617906201585e-07, "loss": 8.7747, "step": 24580 }, { "epoch": 8.9, "learning_rate": 3.930707638547571e-07, "loss": 8.744, "step": 24600 }, { "epoch": 8.9, "learning_rate": 3.927794484999905e-07, "loss": 8.6325, "step": 24620 }, { "epoch": 8.91, "learning_rate": 3.924878451434735e-07, "loss": 8.8209, "step": 24640 }, { "epoch": 8.92, "learning_rate": 3.9219595437340205e-07, "loss": 8.6263, "step": 24660 }, { "epoch": 8.93, "learning_rate": 3.9190377677855155e-07, "loss": 8.5573, "step": 24680 }, { "epoch": 8.93, "learning_rate": 3.916113129482762e-07, "loss": 8.6047, "step": 24700 }, { "epoch": 8.94, "learning_rate": 3.913185634725077e-07, "loss": 8.7075, "step": 24720 }, { "epoch": 8.95, "learning_rate": 3.9102552894175347e-07, "loss": 8.6059, "step": 24740 }, { "epoch": 8.95, "learning_rate": 3.907322099470963e-07, "loss": 8.7004, "step": 24760 }, { "epoch": 8.96, "learning_rate": 3.904386070801927e-07, "loss": 8.645, "step": 24780 }, { "epoch": 8.97, "learning_rate": 3.9014472093327164e-07, "loss": 8.7493, "step": 24800 }, { "epoch": 8.98, "learning_rate": 3.8985055209913367e-07, "loss": 8.7113, "step": 24820 }, { "epoch": 8.98, "learning_rate": 3.8955610117114946e-07, "loss": 8.7381, "step": 24840 }, { "epoch": 8.99, "learning_rate": 3.8926136874325867e-07, "loss": 8.6343, "step": 24860 }, { "epoch": 9.0, "learning_rate": 3.889663554099688e-07, "loss": 8.6153, "step": 24880 }, { "epoch": 9.01, "learning_rate": 3.88671061766354e-07, "loss": 8.7392, "step": 24900 }, { "epoch": 9.01, "learning_rate": 3.883754884080539e-07, "loss": 8.7271, "step": 24920 }, { "epoch": 9.02, "learning_rate": 3.880796359312722e-07, "loss": 8.7976, "step": 24940 }, { "epoch": 9.03, "learning_rate": 3.8778350493277566e-07, "loss": 8.6362, "step": 24960 }, { "epoch": 9.03, "learning_rate": 3.8748709600989296e-07, "loss": 8.7061, "step": 24980 }, { "epoch": 9.04, "learning_rate": 3.871904097605131e-07, "loss": 8.6856, "step": 25000 }, { "epoch": 9.05, "learning_rate": 3.8689344678308476e-07, "loss": 8.6853, "step": 25020 }, { "epoch": 9.06, "learning_rate": 3.8659620767661483e-07, "loss": 8.6812, "step": 25040 }, { "epoch": 9.06, "learning_rate": 3.862986930406669e-07, "loss": 8.5669, "step": 25060 }, { "epoch": 9.07, "learning_rate": 3.8600090347536064e-07, "loss": 8.747, "step": 25080 }, { "epoch": 9.08, "learning_rate": 3.8570283958137e-07, "loss": 8.7162, "step": 25100 }, { "epoch": 9.08, "learning_rate": 3.8540450195992255e-07, "loss": 8.6484, "step": 25120 }, { "epoch": 9.09, "learning_rate": 3.8510589121279787e-07, "loss": 8.6183, "step": 25140 }, { "epoch": 9.1, "learning_rate": 3.8480700794232634e-07, "loss": 8.5655, "step": 25160 }, { "epoch": 9.11, "learning_rate": 3.845078527513883e-07, "loss": 8.8672, "step": 25180 }, { "epoch": 9.11, "learning_rate": 3.842084262434125e-07, "loss": 8.6426, "step": 25200 }, { "epoch": 9.12, "learning_rate": 3.839087290223748e-07, "loss": 8.6784, "step": 25220 }, { "epoch": 9.13, "learning_rate": 3.8360876169279734e-07, "loss": 8.686, "step": 25240 }, { "epoch": 9.14, "learning_rate": 3.8330852485974697e-07, "loss": 8.6127, "step": 25260 }, { "epoch": 9.14, "learning_rate": 3.8300801912883414e-07, "loss": 8.6752, "step": 25280 }, { "epoch": 9.15, "learning_rate": 3.8270724510621177e-07, "loss": 8.7257, "step": 25300 }, { "epoch": 9.16, "learning_rate": 3.82406203398574e-07, "loss": 8.706, "step": 25320 }, { "epoch": 9.16, "learning_rate": 3.8210489461315485e-07, "loss": 8.6395, "step": 25340 }, { "epoch": 9.17, "learning_rate": 3.81803319357727e-07, "loss": 8.6969, "step": 25360 }, { "epoch": 9.18, "learning_rate": 3.8150147824060075e-07, "loss": 8.8156, "step": 25380 }, { "epoch": 9.19, "learning_rate": 3.8119937187062254e-07, "loss": 8.634, "step": 25400 }, { "epoch": 9.19, "learning_rate": 3.8089700085717405e-07, "loss": 8.7115, "step": 25420 }, { "epoch": 9.2, "learning_rate": 3.8059436581017044e-07, "loss": 8.7182, "step": 25440 }, { "epoch": 9.21, "learning_rate": 3.802914673400599e-07, "loss": 8.6773, "step": 25460 }, { "epoch": 9.22, "learning_rate": 3.7998830605782175e-07, "loss": 8.5986, "step": 25480 }, { "epoch": 9.22, "learning_rate": 3.796848825749652e-07, "loss": 8.6632, "step": 25500 }, { "epoch": 9.23, "learning_rate": 3.7938119750352885e-07, "loss": 8.7261, "step": 25520 }, { "epoch": 9.24, "learning_rate": 3.790772514560785e-07, "loss": 8.9573, "step": 25540 }, { "epoch": 9.24, "learning_rate": 3.787730450457065e-07, "loss": 8.8924, "step": 25560 }, { "epoch": 9.25, "learning_rate": 3.7846857888603056e-07, "loss": 8.6284, "step": 25580 }, { "epoch": 9.26, "learning_rate": 3.781638535911922e-07, "loss": 8.6416, "step": 25600 }, { "epoch": 9.27, "learning_rate": 3.7785886977585555e-07, "loss": 8.6983, "step": 25620 }, { "epoch": 9.27, "learning_rate": 3.775536280552063e-07, "loss": 8.6153, "step": 25640 }, { "epoch": 9.28, "learning_rate": 3.7724812904495035e-07, "loss": 8.6753, "step": 25660 }, { "epoch": 9.29, "learning_rate": 3.769423733613126e-07, "loss": 8.7574, "step": 25680 }, { "epoch": 9.29, "learning_rate": 3.7663636162103577e-07, "loss": 8.6958, "step": 25700 }, { "epoch": 9.3, "learning_rate": 3.76330094441379e-07, "loss": 8.6696, "step": 25720 }, { "epoch": 9.31, "learning_rate": 3.760235724401164e-07, "loss": 8.6922, "step": 25740 }, { "epoch": 9.32, "learning_rate": 3.757167962355365e-07, "loss": 8.7126, "step": 25760 }, { "epoch": 9.32, "learning_rate": 3.754097664464405e-07, "loss": 8.9095, "step": 25780 }, { "epoch": 9.33, "learning_rate": 3.7510248369214093e-07, "loss": 8.6746, "step": 25800 }, { "epoch": 9.34, "learning_rate": 3.7479494859246073e-07, "loss": 8.5895, "step": 25820 }, { "epoch": 9.35, "learning_rate": 3.744871617677319e-07, "loss": 8.6395, "step": 25840 }, { "epoch": 9.35, "learning_rate": 3.7417912383879394e-07, "loss": 8.7634, "step": 25860 }, { "epoch": 9.36, "learning_rate": 3.73870835426993e-07, "loss": 8.7353, "step": 25880 }, { "epoch": 9.37, "learning_rate": 3.735622971541807e-07, "loss": 8.6576, "step": 25900 }, { "epoch": 9.37, "learning_rate": 3.732535096427123e-07, "loss": 8.7598, "step": 25920 }, { "epoch": 9.38, "learning_rate": 3.7294447351544594e-07, "loss": 8.669, "step": 25940 }, { "epoch": 9.39, "learning_rate": 3.7263518939574136e-07, "loss": 8.595, "step": 25960 }, { "epoch": 9.4, "learning_rate": 3.723256579074583e-07, "loss": 8.6576, "step": 25980 }, { "epoch": 9.4, "learning_rate": 3.720158796749556e-07, "loss": 8.6325, "step": 26000 }, { "epoch": 9.41, "learning_rate": 3.7170585532308995e-07, "loss": 8.6388, "step": 26020 }, { "epoch": 9.42, "learning_rate": 3.713955854772143e-07, "loss": 8.6669, "step": 26040 }, { "epoch": 9.42, "learning_rate": 3.710850707631767e-07, "loss": 8.6721, "step": 26060 }, { "epoch": 9.43, "learning_rate": 3.707743118073195e-07, "loss": 8.7287, "step": 26080 }, { "epoch": 9.44, "learning_rate": 3.704633092364773e-07, "loss": 8.6791, "step": 26100 }, { "epoch": 9.45, "learning_rate": 3.7015206367797627e-07, "loss": 8.6707, "step": 26120 }, { "epoch": 9.45, "learning_rate": 3.698405757596327e-07, "loss": 8.6335, "step": 26140 }, { "epoch": 9.46, "learning_rate": 3.695288461097519e-07, "loss": 8.6813, "step": 26160 }, { "epoch": 9.47, "learning_rate": 3.692168753571265e-07, "loss": 8.6498, "step": 26180 }, { "epoch": 9.48, "learning_rate": 3.6890466413103574e-07, "loss": 8.6455, "step": 26200 }, { "epoch": 9.48, "learning_rate": 3.6859221306124353e-07, "loss": 8.626, "step": 26220 }, { "epoch": 9.49, "learning_rate": 3.682795227779981e-07, "loss": 8.6599, "step": 26240 }, { "epoch": 9.5, "learning_rate": 3.6796659391202976e-07, "loss": 8.6244, "step": 26260 }, { "epoch": 9.5, "learning_rate": 3.6765342709455035e-07, "loss": 8.6675, "step": 26280 }, { "epoch": 9.51, "learning_rate": 3.673400229572514e-07, "loss": 8.6763, "step": 26300 }, { "epoch": 9.52, "learning_rate": 3.670263821323034e-07, "loss": 8.7124, "step": 26320 }, { "epoch": 9.53, "learning_rate": 3.667125052523542e-07, "loss": 8.6563, "step": 26340 }, { "epoch": 9.53, "learning_rate": 3.6639839295052776e-07, "loss": 8.6153, "step": 26360 }, { "epoch": 9.54, "learning_rate": 3.660840458604228e-07, "loss": 8.663, "step": 26380 }, { "epoch": 9.55, "learning_rate": 3.657694646161119e-07, "loss": 8.7112, "step": 26400 }, { "epoch": 9.56, "learning_rate": 3.654546498521397e-07, "loss": 8.6633, "step": 26420 }, { "epoch": 9.56, "learning_rate": 3.6513960220352204e-07, "loss": 8.6195, "step": 26440 }, { "epoch": 9.57, "learning_rate": 3.6482432230574445e-07, "loss": 8.7926, "step": 26460 }, { "epoch": 9.58, "learning_rate": 3.645088107947609e-07, "loss": 8.6659, "step": 26480 }, { "epoch": 9.58, "learning_rate": 3.641930683069927e-07, "loss": 8.6512, "step": 26500 }, { "epoch": 9.59, "learning_rate": 3.638770954793268e-07, "loss": 8.6814, "step": 26520 }, { "epoch": 9.6, "learning_rate": 3.6356089294911494e-07, "loss": 8.7023, "step": 26540 }, { "epoch": 9.61, "learning_rate": 3.632444613541723e-07, "loss": 8.7315, "step": 26560 }, { "epoch": 9.61, "learning_rate": 3.629278013327759e-07, "loss": 8.5947, "step": 26580 }, { "epoch": 9.62, "learning_rate": 3.6261091352366363e-07, "loss": 8.8138, "step": 26600 }, { "epoch": 9.63, "learning_rate": 3.622937985660328e-07, "loss": 8.7118, "step": 26620 }, { "epoch": 9.63, "learning_rate": 3.6197645709953895e-07, "loss": 8.681, "step": 26640 }, { "epoch": 9.64, "learning_rate": 3.6165888976429447e-07, "loss": 8.7761, "step": 26660 }, { "epoch": 9.65, "learning_rate": 3.613410972008674e-07, "loss": 8.6516, "step": 26680 }, { "epoch": 9.66, "learning_rate": 3.610230800502802e-07, "loss": 8.6794, "step": 26700 }, { "epoch": 9.66, "learning_rate": 3.60704838954008e-07, "loss": 8.7636, "step": 26720 }, { "epoch": 9.67, "learning_rate": 3.6038637455397796e-07, "loss": 8.6292, "step": 26740 }, { "epoch": 9.68, "learning_rate": 3.6006768749256755e-07, "loss": 8.7857, "step": 26760 }, { "epoch": 9.69, "learning_rate": 3.597487784126035e-07, "loss": 8.6254, "step": 26780 }, { "epoch": 9.69, "learning_rate": 3.594296479573602e-07, "loss": 8.6402, "step": 26800 }, { "epoch": 9.7, "learning_rate": 3.591102967705586e-07, "loss": 8.6597, "step": 26820 }, { "epoch": 9.71, "learning_rate": 3.5879072549636494e-07, "loss": 8.7317, "step": 26840 }, { "epoch": 9.71, "learning_rate": 3.584709347793895e-07, "loss": 8.6803, "step": 26860 }, { "epoch": 9.72, "learning_rate": 3.581509252646851e-07, "loss": 8.6608, "step": 26880 }, { "epoch": 9.73, "learning_rate": 3.5783069759774587e-07, "loss": 8.6517, "step": 26900 }, { "epoch": 9.74, "learning_rate": 3.5751025242450596e-07, "loss": 8.6721, "step": 26920 }, { "epoch": 9.74, "learning_rate": 3.5718959039133836e-07, "loss": 8.7057, "step": 26940 }, { "epoch": 9.75, "learning_rate": 3.568687121450533e-07, "loss": 8.5605, "step": 26960 }, { "epoch": 9.76, "learning_rate": 3.565476183328975e-07, "loss": 8.666, "step": 26980 }, { "epoch": 9.76, "learning_rate": 3.5622630960255215e-07, "loss": 8.6459, "step": 27000 }, { "epoch": 9.77, "learning_rate": 3.5590478660213206e-07, "loss": 8.6509, "step": 27020 }, { "epoch": 9.78, "learning_rate": 3.5558304998018426e-07, "loss": 8.6461, "step": 27040 }, { "epoch": 9.79, "learning_rate": 3.5526110038568664e-07, "loss": 8.7623, "step": 27060 }, { "epoch": 9.79, "learning_rate": 3.5493893846804673e-07, "loss": 8.6492, "step": 27080 }, { "epoch": 9.8, "learning_rate": 3.546165648771004e-07, "loss": 8.6686, "step": 27100 }, { "epoch": 9.81, "learning_rate": 3.5429398026311037e-07, "loss": 8.7239, "step": 27120 }, { "epoch": 9.82, "learning_rate": 3.5397118527676505e-07, "loss": 8.6809, "step": 27140 }, { "epoch": 9.82, "learning_rate": 3.5364818056917724e-07, "loss": 8.6912, "step": 27160 }, { "epoch": 9.83, "learning_rate": 3.5332496679188264e-07, "loss": 8.6091, "step": 27180 }, { "epoch": 9.84, "learning_rate": 3.530015445968388e-07, "loss": 8.7204, "step": 27200 }, { "epoch": 9.84, "learning_rate": 3.5267791463642367e-07, "loss": 8.6489, "step": 27220 }, { "epoch": 9.85, "learning_rate": 3.5235407756343416e-07, "loss": 8.5754, "step": 27240 }, { "epoch": 9.86, "learning_rate": 3.520300340310852e-07, "loss": 8.6814, "step": 27260 }, { "epoch": 9.87, "learning_rate": 3.517057846930078e-07, "loss": 8.6847, "step": 27280 }, { "epoch": 9.87, "learning_rate": 3.5138133020324844e-07, "loss": 8.6579, "step": 27300 }, { "epoch": 9.88, "learning_rate": 3.510566712162673e-07, "loss": 8.716, "step": 27320 }, { "epoch": 9.89, "learning_rate": 3.5073180838693694e-07, "loss": 8.6446, "step": 27340 }, { "epoch": 9.9, "learning_rate": 3.5040674237054125e-07, "loss": 8.7245, "step": 27360 }, { "epoch": 9.9, "learning_rate": 3.500814738227739e-07, "loss": 8.7127, "step": 27380 }, { "epoch": 9.91, "learning_rate": 3.49756003399737e-07, "loss": 8.6278, "step": 27400 }, { "epoch": 9.92, "learning_rate": 3.494303317579401e-07, "loss": 8.781, "step": 27420 }, { "epoch": 9.92, "learning_rate": 3.491044595542985e-07, "loss": 8.6123, "step": 27440 }, { "epoch": 9.93, "learning_rate": 3.4877838744613194e-07, "loss": 8.6652, "step": 27460 }, { "epoch": 9.94, "learning_rate": 3.4845211609116354e-07, "loss": 8.6108, "step": 27480 }, { "epoch": 9.95, "learning_rate": 3.481256461475182e-07, "loss": 8.7342, "step": 27500 }, { "epoch": 9.95, "learning_rate": 3.4779897827372164e-07, "loss": 8.6786, "step": 27520 }, { "epoch": 9.96, "learning_rate": 3.474721131286985e-07, "loss": 8.7928, "step": 27540 }, { "epoch": 9.97, "learning_rate": 3.4714505137177163e-07, "loss": 8.6573, "step": 27560 }, { "epoch": 9.97, "learning_rate": 3.468177936626603e-07, "loss": 8.6552, "step": 27580 }, { "epoch": 9.98, "learning_rate": 3.4649034066147894e-07, "loss": 8.6789, "step": 27600 }, { "epoch": 9.99, "learning_rate": 3.461626930287361e-07, "loss": 8.6438, "step": 27620 }, { "epoch": 10.0, "learning_rate": 3.4583485142533303e-07, "loss": 8.6694, "step": 27640 }, { "epoch": 10.0, "learning_rate": 3.455068165125619e-07, "loss": 8.746, "step": 27660 }, { "epoch": 10.01, "learning_rate": 3.4517858895210493e-07, "loss": 8.705, "step": 27680 }, { "epoch": 10.02, "learning_rate": 3.448501694060332e-07, "loss": 8.6114, "step": 27700 }, { "epoch": 10.03, "learning_rate": 3.4452155853680454e-07, "loss": 8.7115, "step": 27720 }, { "epoch": 10.03, "learning_rate": 3.441927570072632e-07, "loss": 8.923, "step": 27740 }, { "epoch": 10.04, "learning_rate": 3.438637654806378e-07, "loss": 8.8009, "step": 27760 }, { "epoch": 10.05, "learning_rate": 3.4353458462053995e-07, "loss": 8.6399, "step": 27780 }, { "epoch": 10.05, "learning_rate": 3.432052150909637e-07, "loss": 8.941, "step": 27800 }, { "epoch": 10.06, "learning_rate": 3.428756575562832e-07, "loss": 8.7679, "step": 27820 }, { "epoch": 10.07, "learning_rate": 3.4254591268125214e-07, "loss": 8.7031, "step": 27840 }, { "epoch": 10.08, "learning_rate": 3.4221598113100194e-07, "loss": 8.7045, "step": 27860 }, { "epoch": 10.08, "learning_rate": 3.418858635710406e-07, "loss": 8.7079, "step": 27880 }, { "epoch": 10.09, "learning_rate": 3.415555606672512e-07, "loss": 8.6618, "step": 27900 }, { "epoch": 10.1, "learning_rate": 3.412250730858909e-07, "loss": 8.7842, "step": 27920 }, { "epoch": 10.1, "learning_rate": 3.408944014935892e-07, "loss": 8.7194, "step": 27940 }, { "epoch": 10.11, "learning_rate": 3.4056354655734686e-07, "loss": 8.7642, "step": 27960 }, { "epoch": 10.12, "learning_rate": 3.402325089445346e-07, "loss": 8.6615, "step": 27980 }, { "epoch": 10.13, "learning_rate": 3.399012893228912e-07, "loss": 8.7468, "step": 28000 }, { "epoch": 10.13, "learning_rate": 3.3956988836052293e-07, "loss": 8.671, "step": 28020 }, { "epoch": 10.14, "learning_rate": 3.392383067259018e-07, "loss": 8.6229, "step": 28040 }, { "epoch": 10.15, "learning_rate": 3.389065450878641e-07, "loss": 8.6014, "step": 28060 }, { "epoch": 10.16, "learning_rate": 3.3857460411560943e-07, "loss": 8.7231, "step": 28080 }, { "epoch": 10.16, "learning_rate": 3.38242484478699e-07, "loss": 8.5853, "step": 28100 }, { "epoch": 10.17, "learning_rate": 3.379101868470543e-07, "loss": 8.6662, "step": 28120 }, { "epoch": 10.18, "learning_rate": 3.375777118909561e-07, "loss": 8.6895, "step": 28140 }, { "epoch": 10.18, "learning_rate": 3.372450602810426e-07, "loss": 8.6535, "step": 28160 }, { "epoch": 10.19, "learning_rate": 3.3691223268830846e-07, "loss": 8.6333, "step": 28180 }, { "epoch": 10.2, "learning_rate": 3.3657922978410335e-07, "loss": 8.7247, "step": 28200 }, { "epoch": 10.21, "learning_rate": 3.3624605224013054e-07, "loss": 8.5743, "step": 28220 }, { "epoch": 10.21, "learning_rate": 3.3591270072844547e-07, "loss": 8.6759, "step": 28240 }, { "epoch": 10.22, "learning_rate": 3.355791759214546e-07, "loss": 8.6982, "step": 28260 }, { "epoch": 10.23, "learning_rate": 3.3524547849191396e-07, "loss": 8.7385, "step": 28280 }, { "epoch": 10.24, "learning_rate": 3.3491160911292774e-07, "loss": 8.6972, "step": 28300 }, { "epoch": 10.24, "learning_rate": 3.3457756845794687e-07, "loss": 8.6619, "step": 28320 }, { "epoch": 10.25, "learning_rate": 3.342433572007679e-07, "loss": 8.659, "step": 28340 }, { "epoch": 10.26, "learning_rate": 3.3390897601553146e-07, "loss": 8.6246, "step": 28360 }, { "epoch": 10.26, "learning_rate": 3.3357442557672096e-07, "loss": 8.6084, "step": 28380 }, { "epoch": 10.27, "learning_rate": 3.3323970655916115e-07, "loss": 8.5871, "step": 28400 }, { "epoch": 10.28, "learning_rate": 3.3290481963801696e-07, "loss": 8.6602, "step": 28420 }, { "epoch": 10.29, "learning_rate": 3.325697654887918e-07, "loss": 8.6066, "step": 28440 }, { "epoch": 10.29, "learning_rate": 3.3223454478732647e-07, "loss": 8.6817, "step": 28460 }, { "epoch": 10.3, "learning_rate": 3.3189915820979785e-07, "loss": 8.7466, "step": 28480 }, { "epoch": 10.31, "learning_rate": 3.3156360643271736e-07, "loss": 8.8099, "step": 28500 }, { "epoch": 10.31, "learning_rate": 3.312278901329295e-07, "loss": 8.6193, "step": 28520 }, { "epoch": 10.32, "learning_rate": 3.3089200998761077e-07, "loss": 8.7632, "step": 28540 }, { "epoch": 10.33, "learning_rate": 3.305559666742682e-07, "loss": 8.7607, "step": 28560 }, { "epoch": 10.34, "learning_rate": 3.3021976087073767e-07, "loss": 8.5753, "step": 28580 }, { "epoch": 10.34, "learning_rate": 3.298833932551832e-07, "loss": 8.7299, "step": 28600 }, { "epoch": 10.35, "learning_rate": 3.295468645060951e-07, "loss": 8.7158, "step": 28620 }, { "epoch": 10.36, "learning_rate": 3.2921017530228845e-07, "loss": 8.6602, "step": 28640 }, { "epoch": 10.37, "learning_rate": 3.288733263229022e-07, "loss": 8.6871, "step": 28660 }, { "epoch": 10.37, "learning_rate": 3.2853631824739756e-07, "loss": 8.751, "step": 28680 }, { "epoch": 10.38, "learning_rate": 3.281991517555568e-07, "loss": 8.7443, "step": 28700 }, { "epoch": 10.39, "learning_rate": 3.278618275274814e-07, "loss": 8.6701, "step": 28720 }, { "epoch": 10.39, "learning_rate": 3.2752434624359127e-07, "loss": 8.8152, "step": 28740 }, { "epoch": 10.4, "learning_rate": 3.2718670858462296e-07, "loss": 8.6241, "step": 28760 }, { "epoch": 10.41, "learning_rate": 3.2684891523162854e-07, "loss": 8.6202, "step": 28780 }, { "epoch": 10.42, "learning_rate": 3.2651096686597423e-07, "loss": 8.7507, "step": 28800 }, { "epoch": 10.42, "learning_rate": 3.261728641693387e-07, "loss": 8.6473, "step": 28820 }, { "epoch": 10.43, "learning_rate": 3.2583460782371215e-07, "loss": 8.582, "step": 28840 }, { "epoch": 10.44, "learning_rate": 3.254961985113944e-07, "loss": 8.77, "step": 28860 }, { "epoch": 10.44, "learning_rate": 3.2515763691499425e-07, "loss": 8.7138, "step": 28880 }, { "epoch": 10.45, "learning_rate": 3.2481892371742725e-07, "loss": 8.6842, "step": 28900 }, { "epoch": 10.46, "learning_rate": 3.2448005960191507e-07, "loss": 8.67, "step": 28920 }, { "epoch": 10.47, "learning_rate": 3.241410452519835e-07, "loss": 8.7029, "step": 28940 }, { "epoch": 10.47, "learning_rate": 3.2380188135146173e-07, "loss": 8.658, "step": 28960 }, { "epoch": 10.48, "learning_rate": 3.234625685844802e-07, "loss": 8.6853, "step": 28980 }, { "epoch": 10.49, "learning_rate": 3.2312310763547005e-07, "loss": 8.5897, "step": 29000 }, { "epoch": 10.5, "learning_rate": 3.227834991891609e-07, "loss": 8.7498, "step": 29020 }, { "epoch": 10.5, "learning_rate": 3.224437439305803e-07, "loss": 8.6559, "step": 29040 }, { "epoch": 10.51, "learning_rate": 3.2210384254505164e-07, "loss": 8.6665, "step": 29060 }, { "epoch": 10.52, "learning_rate": 3.2176379571819314e-07, "loss": 8.6583, "step": 29080 }, { "epoch": 10.52, "learning_rate": 3.214236041359164e-07, "loss": 8.6153, "step": 29100 }, { "epoch": 10.53, "learning_rate": 3.2108326848442503e-07, "loss": 8.6621, "step": 29120 }, { "epoch": 10.54, "learning_rate": 3.2074278945021326e-07, "loss": 8.6406, "step": 29140 }, { "epoch": 10.55, "learning_rate": 3.2040216772006457e-07, "loss": 8.7462, "step": 29160 }, { "epoch": 10.55, "learning_rate": 3.200614039810501e-07, "loss": 8.6706, "step": 29180 }, { "epoch": 10.56, "learning_rate": 3.197204989205276e-07, "loss": 8.6145, "step": 29200 }, { "epoch": 10.57, "learning_rate": 3.1937945322613976e-07, "loss": 8.6873, "step": 29220 }, { "epoch": 10.58, "learning_rate": 3.190382675858131e-07, "loss": 8.6631, "step": 29240 }, { "epoch": 10.58, "learning_rate": 3.186969426877563e-07, "loss": 8.8381, "step": 29260 }, { "epoch": 10.59, "learning_rate": 3.18355479220459e-07, "loss": 8.6286, "step": 29280 }, { "epoch": 10.6, "learning_rate": 3.1801387787269043e-07, "loss": 8.6504, "step": 29300 }, { "epoch": 10.6, "learning_rate": 3.1767213933349756e-07, "loss": 8.6191, "step": 29320 }, { "epoch": 10.61, "learning_rate": 3.173302642922046e-07, "loss": 8.7694, "step": 29340 }, { "epoch": 10.62, "learning_rate": 3.1698825343841086e-07, "loss": 8.7551, "step": 29360 }, { "epoch": 10.63, "learning_rate": 3.166461074619895e-07, "loss": 8.6852, "step": 29380 }, { "epoch": 10.63, "learning_rate": 3.1630382705308637e-07, "loss": 8.582, "step": 29400 }, { "epoch": 10.64, "learning_rate": 3.1596141290211854e-07, "loss": 8.7058, "step": 29420 }, { "epoch": 10.65, "learning_rate": 3.156188656997727e-07, "loss": 8.7603, "step": 29440 }, { "epoch": 10.65, "learning_rate": 3.1527618613700396e-07, "loss": 8.591, "step": 29460 }, { "epoch": 10.66, "learning_rate": 3.1493337490503457e-07, "loss": 8.7316, "step": 29480 }, { "epoch": 10.67, "learning_rate": 3.145904326953521e-07, "loss": 8.6253, "step": 29500 }, { "epoch": 10.68, "learning_rate": 3.142473601997086e-07, "loss": 8.5578, "step": 29520 }, { "epoch": 10.68, "learning_rate": 3.1390415811011864e-07, "loss": 8.6595, "step": 29540 }, { "epoch": 10.69, "learning_rate": 3.1356082711885846e-07, "loss": 8.6187, "step": 29560 }, { "epoch": 10.7, "learning_rate": 3.1321736791846416e-07, "loss": 8.7339, "step": 29580 }, { "epoch": 10.71, "learning_rate": 3.1287378120173045e-07, "loss": 8.7281, "step": 29600 }, { "epoch": 10.71, "learning_rate": 3.125300676617093e-07, "loss": 8.6875, "step": 29620 }, { "epoch": 10.72, "learning_rate": 3.121862279917084e-07, "loss": 8.5509, "step": 29640 }, { "epoch": 10.73, "learning_rate": 3.118422628852901e-07, "loss": 8.6831, "step": 29660 }, { "epoch": 10.73, "learning_rate": 3.1149817303626947e-07, "loss": 8.6986, "step": 29680 }, { "epoch": 10.74, "learning_rate": 3.111539591387135e-07, "loss": 8.6567, "step": 29700 }, { "epoch": 10.75, "learning_rate": 3.1080962188693907e-07, "loss": 8.7286, "step": 29720 }, { "epoch": 10.76, "learning_rate": 3.1046516197551204e-07, "loss": 8.6032, "step": 29740 }, { "epoch": 10.76, "learning_rate": 3.101205800992458e-07, "loss": 8.6943, "step": 29760 }, { "epoch": 10.77, "learning_rate": 3.097758769531996e-07, "loss": 8.7576, "step": 29780 }, { "epoch": 10.78, "learning_rate": 3.0943105323267746e-07, "loss": 8.6276, "step": 29800 }, { "epoch": 10.78, "learning_rate": 3.0908610963322626e-07, "loss": 8.6576, "step": 29820 }, { "epoch": 10.79, "learning_rate": 3.0874104685063515e-07, "loss": 8.6753, "step": 29840 }, { "epoch": 10.8, "learning_rate": 3.0839586558093333e-07, "loss": 8.6131, "step": 29860 }, { "epoch": 10.81, "learning_rate": 3.080505665203893e-07, "loss": 8.667, "step": 29880 }, { "epoch": 10.81, "learning_rate": 3.077051503655089e-07, "loss": 8.6642, "step": 29900 }, { "epoch": 10.82, "learning_rate": 3.073596178130342e-07, "loss": 8.6796, "step": 29920 }, { "epoch": 10.83, "learning_rate": 3.070139695599423e-07, "loss": 8.7781, "step": 29940 }, { "epoch": 10.84, "learning_rate": 3.066682063034433e-07, "loss": 8.6049, "step": 29960 }, { "epoch": 10.84, "learning_rate": 3.063223287409797e-07, "loss": 8.5715, "step": 29980 }, { "epoch": 10.85, "learning_rate": 3.059763375702241e-07, "loss": 8.7563, "step": 30000 }, { "epoch": 10.86, "learning_rate": 3.056302334890786e-07, "loss": 8.6739, "step": 30020 }, { "epoch": 10.86, "learning_rate": 3.05284017195673e-07, "loss": 8.6724, "step": 30040 }, { "epoch": 10.87, "learning_rate": 3.049376893883633e-07, "loss": 8.6921, "step": 30060 }, { "epoch": 10.88, "learning_rate": 3.0459125076573063e-07, "loss": 8.9008, "step": 30080 }, { "epoch": 10.89, "learning_rate": 3.0424470202657946e-07, "loss": 8.7367, "step": 30100 }, { "epoch": 10.89, "learning_rate": 3.038980438699366e-07, "loss": 8.5934, "step": 30120 }, { "epoch": 10.9, "learning_rate": 3.035512769950493e-07, "loss": 8.5653, "step": 30140 }, { "epoch": 10.91, "learning_rate": 3.0320440210138433e-07, "loss": 8.6243, "step": 30160 }, { "epoch": 10.92, "learning_rate": 3.028574198886262e-07, "loss": 8.6315, "step": 30180 }, { "epoch": 10.92, "learning_rate": 3.0251033105667594e-07, "loss": 8.651, "step": 30200 }, { "epoch": 10.93, "learning_rate": 3.021631363056497e-07, "loss": 8.5832, "step": 30220 }, { "epoch": 10.94, "learning_rate": 3.018158363358773e-07, "loss": 8.6438, "step": 30240 }, { "epoch": 10.94, "learning_rate": 3.0146843184790056e-07, "loss": 8.7393, "step": 30260 }, { "epoch": 10.95, "learning_rate": 3.0112092354247235e-07, "loss": 8.7362, "step": 30280 }, { "epoch": 10.96, "learning_rate": 3.00773312120555e-07, "loss": 8.7914, "step": 30300 }, { "epoch": 10.97, "learning_rate": 3.004255982833186e-07, "loss": 8.6087, "step": 30320 }, { "epoch": 10.97, "learning_rate": 3.0007778273214015e-07, "loss": 8.6884, "step": 30340 }, { "epoch": 10.98, "learning_rate": 2.997298661686014e-07, "loss": 8.6022, "step": 30360 }, { "epoch": 10.99, "learning_rate": 2.9938184929448816e-07, "loss": 8.6424, "step": 30380 }, { "epoch": 10.99, "learning_rate": 2.990337328117886e-07, "loss": 8.6903, "step": 30400 }, { "epoch": 11.0, "learning_rate": 2.986855174226915e-07, "loss": 8.6494, "step": 30420 }, { "epoch": 11.01, "learning_rate": 2.983372038295855e-07, "loss": 8.8256, "step": 30440 }, { "epoch": 11.02, "learning_rate": 2.979887927350573e-07, "loss": 8.7223, "step": 30460 }, { "epoch": 11.02, "learning_rate": 2.9764028484188985e-07, "loss": 8.7295, "step": 30480 }, { "epoch": 11.03, "learning_rate": 2.972916808530619e-07, "loss": 8.7298, "step": 30500 }, { "epoch": 11.04, "learning_rate": 2.969429814717456e-07, "loss": 8.8727, "step": 30520 }, { "epoch": 11.05, "learning_rate": 2.9659418740130587e-07, "loss": 8.6341, "step": 30540 }, { "epoch": 11.05, "learning_rate": 2.9624529934529845e-07, "loss": 8.7136, "step": 30560 }, { "epoch": 11.06, "learning_rate": 2.9589631800746864e-07, "loss": 8.712, "step": 30580 }, { "epoch": 11.07, "learning_rate": 2.955472440917498e-07, "loss": 8.6917, "step": 30600 }, { "epoch": 11.07, "learning_rate": 2.9519807830226234e-07, "loss": 8.655, "step": 30620 }, { "epoch": 11.08, "learning_rate": 2.948488213433118e-07, "loss": 8.6994, "step": 30640 }, { "epoch": 11.09, "learning_rate": 2.944994739193876e-07, "loss": 8.6317, "step": 30660 }, { "epoch": 11.1, "learning_rate": 2.9415003673516165e-07, "loss": 8.5908, "step": 30680 }, { "epoch": 11.1, "learning_rate": 2.9380051049548695e-07, "loss": 8.6299, "step": 30700 }, { "epoch": 11.11, "learning_rate": 2.9345089590539605e-07, "loss": 8.715, "step": 30720 }, { "epoch": 11.12, "learning_rate": 2.9310119367009987e-07, "loss": 8.7997, "step": 30740 }, { "epoch": 11.12, "learning_rate": 2.927514044949861e-07, "loss": 8.6665, "step": 30760 }, { "epoch": 11.13, "learning_rate": 2.9240152908561765e-07, "loss": 8.646, "step": 30780 }, { "epoch": 11.14, "learning_rate": 2.9205156814773143e-07, "loss": 8.6162, "step": 30800 }, { "epoch": 11.15, "learning_rate": 2.917015223872369e-07, "loss": 8.6797, "step": 30820 }, { "epoch": 11.15, "learning_rate": 2.913513925102146e-07, "loss": 8.7125, "step": 30840 }, { "epoch": 11.16, "learning_rate": 2.9100117922291476e-07, "loss": 8.7651, "step": 30860 }, { "epoch": 11.17, "learning_rate": 2.9065088323175594e-07, "loss": 8.7213, "step": 30880 }, { "epoch": 11.18, "learning_rate": 2.903005052433234e-07, "loss": 8.7114, "step": 30900 }, { "epoch": 11.18, "learning_rate": 2.8995004596436774e-07, "loss": 8.7222, "step": 30920 }, { "epoch": 11.19, "learning_rate": 2.8959950610180373e-07, "loss": 8.7304, "step": 30940 }, { "epoch": 11.2, "learning_rate": 2.892488863627085e-07, "loss": 8.6837, "step": 30960 }, { "epoch": 11.2, "learning_rate": 2.888981874543205e-07, "loss": 8.6943, "step": 30980 }, { "epoch": 11.21, "learning_rate": 2.8854741008403753e-07, "loss": 8.5617, "step": 31000 }, { "epoch": 11.22, "learning_rate": 2.881965549594161e-07, "loss": 8.8403, "step": 31020 }, { "epoch": 11.23, "learning_rate": 2.878456227881692e-07, "loss": 8.7576, "step": 31040 }, { "epoch": 11.23, "learning_rate": 2.8749461427816546e-07, "loss": 8.6772, "step": 31060 }, { "epoch": 11.24, "learning_rate": 2.871435301374273e-07, "loss": 8.6206, "step": 31080 }, { "epoch": 11.25, "learning_rate": 2.8679237107413e-07, "loss": 8.6233, "step": 31100 }, { "epoch": 11.25, "learning_rate": 2.864411377965995e-07, "loss": 8.6301, "step": 31120 }, { "epoch": 11.26, "learning_rate": 2.860898310133119e-07, "loss": 8.7266, "step": 31140 }, { "epoch": 11.27, "learning_rate": 2.8573845143289123e-07, "loss": 8.7034, "step": 31160 }, { "epoch": 11.28, "learning_rate": 2.853869997641086e-07, "loss": 8.6918, "step": 31180 }, { "epoch": 11.28, "learning_rate": 2.850354767158804e-07, "loss": 8.6865, "step": 31200 }, { "epoch": 11.29, "learning_rate": 2.846838829972671e-07, "loss": 8.6556, "step": 31220 }, { "epoch": 11.3, "learning_rate": 2.843322193174715e-07, "loss": 8.8643, "step": 31240 }, { "epoch": 11.31, "learning_rate": 2.8398048638583774e-07, "loss": 8.623, "step": 31260 }, { "epoch": 11.31, "learning_rate": 2.8362868491184965e-07, "loss": 8.6568, "step": 31280 }, { "epoch": 11.32, "learning_rate": 2.8327681560512925e-07, "loss": 8.6121, "step": 31300 }, { "epoch": 11.33, "learning_rate": 2.829248791754353e-07, "loss": 8.66, "step": 31320 }, { "epoch": 11.33, "learning_rate": 2.8257287633266205e-07, "loss": 8.6959, "step": 31340 }, { "epoch": 11.34, "learning_rate": 2.8222080778683766e-07, "loss": 8.9021, "step": 31360 }, { "epoch": 11.35, "learning_rate": 2.81868674248123e-07, "loss": 8.6877, "step": 31380 }, { "epoch": 11.36, "learning_rate": 2.8151647642680976e-07, "loss": 8.7424, "step": 31400 }, { "epoch": 11.36, "learning_rate": 2.811642150333196e-07, "loss": 8.8318, "step": 31420 }, { "epoch": 11.37, "learning_rate": 2.8081189077820206e-07, "loss": 8.6563, "step": 31440 }, { "epoch": 11.38, "learning_rate": 2.804595043721337e-07, "loss": 8.5683, "step": 31460 }, { "epoch": 11.39, "learning_rate": 2.801070565259165e-07, "loss": 8.6405, "step": 31480 }, { "epoch": 11.39, "learning_rate": 2.797545479504762e-07, "loss": 8.602, "step": 31500 }, { "epoch": 11.4, "learning_rate": 2.7940197935686123e-07, "loss": 8.6209, "step": 31520 }, { "epoch": 11.41, "learning_rate": 2.790493514562408e-07, "loss": 8.7519, "step": 31540 }, { "epoch": 11.41, "learning_rate": 2.78696664959904e-07, "loss": 8.639, "step": 31560 }, { "epoch": 11.42, "learning_rate": 2.783439205792581e-07, "loss": 8.7752, "step": 31580 }, { "epoch": 11.43, "learning_rate": 2.7799111902582693e-07, "loss": 8.5395, "step": 31600 }, { "epoch": 11.44, "learning_rate": 2.7763826101124996e-07, "loss": 8.6828, "step": 31620 }, { "epoch": 11.44, "learning_rate": 2.7728534724728023e-07, "loss": 8.6239, "step": 31640 }, { "epoch": 11.45, "learning_rate": 2.7693237844578336e-07, "loss": 8.6411, "step": 31660 }, { "epoch": 11.46, "learning_rate": 2.7657935531873606e-07, "loss": 8.61, "step": 31680 }, { "epoch": 11.46, "learning_rate": 2.762262785782245e-07, "loss": 8.6629, "step": 31700 }, { "epoch": 11.47, "learning_rate": 2.758731489364431e-07, "loss": 8.7071, "step": 31720 }, { "epoch": 11.48, "learning_rate": 2.7551996710569294e-07, "loss": 8.7268, "step": 31740 }, { "epoch": 11.49, "learning_rate": 2.751667337983803e-07, "loss": 8.6602, "step": 31760 }, { "epoch": 11.49, "learning_rate": 2.7481344972701545e-07, "loss": 8.8078, "step": 31780 }, { "epoch": 11.5, "learning_rate": 2.7446011560421087e-07, "loss": 8.7245, "step": 31800 }, { "epoch": 11.51, "learning_rate": 2.7410673214268017e-07, "loss": 8.7388, "step": 31820 }, { "epoch": 11.52, "learning_rate": 2.737533000552363e-07, "loss": 8.8027, "step": 31840 }, { "epoch": 11.52, "learning_rate": 2.733998200547906e-07, "loss": 8.6749, "step": 31860 }, { "epoch": 11.53, "learning_rate": 2.730462928543507e-07, "loss": 8.6042, "step": 31880 }, { "epoch": 11.54, "learning_rate": 2.726927191670197e-07, "loss": 8.6362, "step": 31900 }, { "epoch": 11.54, "learning_rate": 2.7233909970599426e-07, "loss": 8.7159, "step": 31920 }, { "epoch": 11.55, "learning_rate": 2.7198543518456356e-07, "loss": 8.8243, "step": 31940 }, { "epoch": 11.56, "learning_rate": 2.716317263161076e-07, "loss": 8.7407, "step": 31960 }, { "epoch": 11.57, "learning_rate": 2.712779738140957e-07, "loss": 8.7114, "step": 31980 }, { "epoch": 11.57, "learning_rate": 2.7092417839208537e-07, "loss": 8.6751, "step": 32000 }, { "epoch": 11.58, "learning_rate": 2.7057034076372073e-07, "loss": 8.6862, "step": 32020 }, { "epoch": 11.59, "learning_rate": 2.7021646164273084e-07, "loss": 8.6095, "step": 32040 }, { "epoch": 11.59, "learning_rate": 2.698625417429286e-07, "loss": 8.6848, "step": 32060 }, { "epoch": 11.6, "learning_rate": 2.695085817782091e-07, "loss": 8.7513, "step": 32080 }, { "epoch": 11.61, "learning_rate": 2.691545824625483e-07, "loss": 8.6242, "step": 32100 }, { "epoch": 11.62, "learning_rate": 2.6880054451000144e-07, "loss": 8.7933, "step": 32120 }, { "epoch": 11.62, "learning_rate": 2.6844646863470185e-07, "loss": 8.6506, "step": 32140 }, { "epoch": 11.63, "learning_rate": 2.6809235555085923e-07, "loss": 8.6979, "step": 32160 }, { "epoch": 11.64, "learning_rate": 2.677382059727583e-07, "loss": 8.6129, "step": 32180 }, { "epoch": 11.65, "learning_rate": 2.673840206147576e-07, "loss": 8.6981, "step": 32200 }, { "epoch": 11.65, "learning_rate": 2.670298001912875e-07, "loss": 8.6077, "step": 32220 }, { "epoch": 11.66, "learning_rate": 2.666755454168495e-07, "loss": 8.6092, "step": 32240 }, { "epoch": 11.67, "learning_rate": 2.663212570060141e-07, "loss": 8.651, "step": 32260 }, { "epoch": 11.67, "learning_rate": 2.659669356734198e-07, "loss": 8.7201, "step": 32280 }, { "epoch": 11.68, "learning_rate": 2.6561258213377133e-07, "loss": 8.6061, "step": 32300 }, { "epoch": 11.69, "learning_rate": 2.6525819710183867e-07, "loss": 8.7201, "step": 32320 }, { "epoch": 11.7, "learning_rate": 2.6490378129245496e-07, "loss": 8.6382, "step": 32340 }, { "epoch": 11.7, "learning_rate": 2.645493354205158e-07, "loss": 8.7026, "step": 32360 }, { "epoch": 11.71, "learning_rate": 2.6419486020097713e-07, "loss": 8.6788, "step": 32380 }, { "epoch": 11.72, "learning_rate": 2.638403563488542e-07, "loss": 8.6216, "step": 32400 }, { "epoch": 11.73, "learning_rate": 2.6348582457922006e-07, "loss": 8.7143, "step": 32420 }, { "epoch": 11.73, "learning_rate": 2.6313126560720413e-07, "loss": 8.6853, "step": 32440 }, { "epoch": 11.74, "learning_rate": 2.627766801479904e-07, "loss": 8.676, "step": 32460 }, { "epoch": 11.75, "learning_rate": 2.6242206891681663e-07, "loss": 8.7085, "step": 32480 }, { "epoch": 11.75, "learning_rate": 2.620674326289725e-07, "loss": 8.7902, "step": 32500 }, { "epoch": 11.76, "learning_rate": 2.6171277199979785e-07, "loss": 8.6366, "step": 32520 }, { "epoch": 11.77, "learning_rate": 2.613580877446822e-07, "loss": 8.5595, "step": 32540 }, { "epoch": 11.78, "learning_rate": 2.6100338057906243e-07, "loss": 8.6858, "step": 32560 }, { "epoch": 11.78, "learning_rate": 2.606486512184215e-07, "loss": 8.6326, "step": 32580 }, { "epoch": 11.79, "learning_rate": 2.602939003782875e-07, "loss": 8.6178, "step": 32600 }, { "epoch": 11.8, "learning_rate": 2.5993912877423147e-07, "loss": 8.6427, "step": 32620 }, { "epoch": 11.8, "learning_rate": 2.5958433712186656e-07, "loss": 8.7161, "step": 32640 }, { "epoch": 11.81, "learning_rate": 2.5922952613684627e-07, "loss": 8.6176, "step": 32660 }, { "epoch": 11.82, "learning_rate": 2.5887469653486327e-07, "loss": 8.5744, "step": 32680 }, { "epoch": 11.83, "learning_rate": 2.585198490316475e-07, "loss": 8.6941, "step": 32700 }, { "epoch": 11.83, "learning_rate": 2.5816498434296513e-07, "loss": 8.7242, "step": 32720 }, { "epoch": 11.84, "learning_rate": 2.5781010318461714e-07, "loss": 8.758, "step": 32740 }, { "epoch": 11.85, "learning_rate": 2.5745520627243756e-07, "loss": 8.7095, "step": 32760 }, { "epoch": 11.86, "learning_rate": 2.571002943222922e-07, "loss": 8.6995, "step": 32780 }, { "epoch": 11.86, "learning_rate": 2.567453680500774e-07, "loss": 8.6771, "step": 32800 }, { "epoch": 11.87, "learning_rate": 2.5639042817171804e-07, "loss": 8.6903, "step": 32820 }, { "epoch": 11.88, "learning_rate": 2.560354754031667e-07, "loss": 8.7104, "step": 32840 }, { "epoch": 11.88, "learning_rate": 2.5568051046040197e-07, "loss": 8.7613, "step": 32860 }, { "epoch": 11.89, "learning_rate": 2.553255340594268e-07, "loss": 8.6661, "step": 32880 }, { "epoch": 11.9, "learning_rate": 2.549705469162675e-07, "loss": 8.581, "step": 32900 }, { "epoch": 11.91, "learning_rate": 2.54615549746972e-07, "loss": 8.6058, "step": 32920 }, { "epoch": 11.91, "learning_rate": 2.5426054326760816e-07, "loss": 8.6671, "step": 32940 }, { "epoch": 11.92, "learning_rate": 2.53905528194263e-07, "loss": 8.7079, "step": 32960 }, { "epoch": 11.93, "learning_rate": 2.5355050524304067e-07, "loss": 8.7816, "step": 32980 }, { "epoch": 11.93, "learning_rate": 2.5319547513006124e-07, "loss": 8.7454, "step": 33000 }, { "epoch": 11.94, "learning_rate": 2.528404385714594e-07, "loss": 8.692, "step": 33020 }, { "epoch": 11.95, "learning_rate": 2.524853962833824e-07, "loss": 8.6828, "step": 33040 }, { "epoch": 11.96, "learning_rate": 2.521303489819896e-07, "loss": 8.6124, "step": 33060 }, { "epoch": 11.96, "learning_rate": 2.5177529738345005e-07, "loss": 8.6774, "step": 33080 }, { "epoch": 11.97, "learning_rate": 2.514202422039417e-07, "loss": 8.6826, "step": 33100 }, { "epoch": 11.98, "learning_rate": 2.510651841596496e-07, "loss": 8.6866, "step": 33120 }, { "epoch": 11.99, "learning_rate": 2.5071012396676473e-07, "loss": 8.5997, "step": 33140 }, { "epoch": 11.99, "learning_rate": 2.5035506234148213e-07, "loss": 8.6878, "step": 33160 }, { "epoch": 12.0, "learning_rate": 2.5e-07, "loss": 8.6028, "step": 33180 }, { "epoch": 12.01, "learning_rate": 2.4964493765851795e-07, "loss": 8.5968, "step": 33200 }, { "epoch": 12.01, "learning_rate": 2.492898760332353e-07, "loss": 8.7196, "step": 33220 }, { "epoch": 12.02, "learning_rate": 2.4893481584035043e-07, "loss": 8.5782, "step": 33240 }, { "epoch": 12.03, "learning_rate": 2.485797577960583e-07, "loss": 8.6388, "step": 33260 }, { "epoch": 12.04, "learning_rate": 2.4822470261655e-07, "loss": 8.6747, "step": 33280 }, { "epoch": 12.04, "learning_rate": 2.478696510180105e-07, "loss": 8.678, "step": 33300 }, { "epoch": 12.05, "learning_rate": 2.475146037166176e-07, "loss": 8.7884, "step": 33320 }, { "epoch": 12.06, "learning_rate": 2.471595614285407e-07, "loss": 8.6724, "step": 33340 }, { "epoch": 12.07, "learning_rate": 2.4680452486993874e-07, "loss": 8.5924, "step": 33360 }, { "epoch": 12.07, "learning_rate": 2.4644949475695936e-07, "loss": 8.6632, "step": 33380 }, { "epoch": 12.08, "learning_rate": 2.460944718057371e-07, "loss": 8.7362, "step": 33400 }, { "epoch": 12.09, "learning_rate": 2.4573945673239187e-07, "loss": 8.7602, "step": 33420 }, { "epoch": 12.09, "learning_rate": 2.453844502530281e-07, "loss": 8.6372, "step": 33440 }, { "epoch": 12.1, "learning_rate": 2.4502945308373243e-07, "loss": 8.6209, "step": 33460 }, { "epoch": 12.11, "learning_rate": 2.446744659405732e-07, "loss": 8.6308, "step": 33480 }, { "epoch": 12.12, "learning_rate": 2.44319489539598e-07, "loss": 8.6478, "step": 33500 }, { "epoch": 12.12, "learning_rate": 2.439645245968333e-07, "loss": 8.6312, "step": 33520 }, { "epoch": 12.13, "learning_rate": 2.43609571828282e-07, "loss": 8.6993, "step": 33540 }, { "epoch": 12.14, "learning_rate": 2.432546319499226e-07, "loss": 8.7386, "step": 33560 }, { "epoch": 12.14, "learning_rate": 2.4289970567770775e-07, "loss": 8.6758, "step": 33580 }, { "epoch": 12.15, "learning_rate": 2.4254479372756236e-07, "loss": 8.7626, "step": 33600 }, { "epoch": 12.16, "learning_rate": 2.4218989681538284e-07, "loss": 8.7846, "step": 33620 }, { "epoch": 12.17, "learning_rate": 2.4183501565703485e-07, "loss": 8.8279, "step": 33640 }, { "epoch": 12.17, "learning_rate": 2.4148015096835255e-07, "loss": 8.759, "step": 33660 }, { "epoch": 12.18, "learning_rate": 2.4112530346513676e-07, "loss": 8.6495, "step": 33680 }, { "epoch": 12.19, "learning_rate": 2.407704738631537e-07, "loss": 8.6556, "step": 33700 }, { "epoch": 12.2, "learning_rate": 2.404156628781335e-07, "loss": 8.6345, "step": 33720 }, { "epoch": 12.2, "learning_rate": 2.400608712257686e-07, "loss": 8.6331, "step": 33740 }, { "epoch": 12.21, "learning_rate": 2.3970609962171255e-07, "loss": 8.617, "step": 33760 }, { "epoch": 12.22, "learning_rate": 2.3935134878157853e-07, "loss": 8.7142, "step": 33780 }, { "epoch": 12.22, "learning_rate": 2.3899661942093755e-07, "loss": 8.6639, "step": 33800 }, { "epoch": 12.23, "learning_rate": 2.386419122553178e-07, "loss": 8.6693, "step": 33820 }, { "epoch": 12.24, "learning_rate": 2.382872280002022e-07, "loss": 8.7309, "step": 33840 }, { "epoch": 12.25, "learning_rate": 2.3793256737102757e-07, "loss": 8.7177, "step": 33860 }, { "epoch": 12.25, "learning_rate": 2.3757793108318337e-07, "loss": 8.6647, "step": 33880 }, { "epoch": 12.26, "learning_rate": 2.3722331985200956e-07, "loss": 8.725, "step": 33900 }, { "epoch": 12.27, "learning_rate": 2.368687343927959e-07, "loss": 8.5963, "step": 33920 }, { "epoch": 12.27, "learning_rate": 2.3651417542077994e-07, "loss": 8.694, "step": 33940 }, { "epoch": 12.28, "learning_rate": 2.361596436511458e-07, "loss": 8.7634, "step": 33960 }, { "epoch": 12.29, "learning_rate": 2.3580513979902295e-07, "loss": 8.9021, "step": 33980 }, { "epoch": 12.3, "learning_rate": 2.354506645794842e-07, "loss": 8.6636, "step": 34000 }, { "epoch": 12.3, "learning_rate": 2.3509621870754504e-07, "loss": 8.6979, "step": 34020 }, { "epoch": 12.31, "learning_rate": 2.347418028981614e-07, "loss": 8.6839, "step": 34040 }, { "epoch": 12.32, "learning_rate": 2.3438741786622862e-07, "loss": 8.6451, "step": 34060 }, { "epoch": 12.33, "learning_rate": 2.3403306432658023e-07, "loss": 8.6216, "step": 34080 }, { "epoch": 12.33, "learning_rate": 2.3367874299398583e-07, "loss": 8.7277, "step": 34100 }, { "epoch": 12.34, "learning_rate": 2.3332445458315048e-07, "loss": 8.6828, "step": 34120 }, { "epoch": 12.35, "learning_rate": 2.3297019980871242e-07, "loss": 8.7317, "step": 34140 }, { "epoch": 12.35, "learning_rate": 2.3261597938524244e-07, "loss": 8.7596, "step": 34160 }, { "epoch": 12.36, "learning_rate": 2.3226179402724173e-07, "loss": 8.6388, "step": 34180 }, { "epoch": 12.37, "learning_rate": 2.3190764444914078e-07, "loss": 8.6952, "step": 34200 }, { "epoch": 12.38, "learning_rate": 2.3155353136529818e-07, "loss": 8.5665, "step": 34220 }, { "epoch": 12.38, "learning_rate": 2.311994554899985e-07, "loss": 8.669, "step": 34240 }, { "epoch": 12.39, "learning_rate": 2.3084541753745173e-07, "loss": 8.6576, "step": 34260 }, { "epoch": 12.4, "learning_rate": 2.3049141822179097e-07, "loss": 8.6854, "step": 34280 }, { "epoch": 12.41, "learning_rate": 2.301374582570714e-07, "loss": 8.6454, "step": 34300 }, { "epoch": 12.41, "learning_rate": 2.2978353835726919e-07, "loss": 8.7634, "step": 34320 }, { "epoch": 12.42, "learning_rate": 2.2942965923627925e-07, "loss": 8.6704, "step": 34340 }, { "epoch": 12.43, "learning_rate": 2.290758216079146e-07, "loss": 8.7067, "step": 34360 }, { "epoch": 12.43, "learning_rate": 2.2872202618590437e-07, "loss": 8.6651, "step": 34380 }, { "epoch": 12.44, "learning_rate": 2.2836827368389245e-07, "loss": 8.7543, "step": 34400 }, { "epoch": 12.45, "learning_rate": 2.2801456481543645e-07, "loss": 8.6127, "step": 34420 }, { "epoch": 12.46, "learning_rate": 2.276609002940057e-07, "loss": 8.6231, "step": 34440 }, { "epoch": 12.46, "learning_rate": 2.2730728083298032e-07, "loss": 8.7681, "step": 34460 }, { "epoch": 12.47, "learning_rate": 2.2695370714564925e-07, "loss": 8.636, "step": 34480 }, { "epoch": 12.48, "learning_rate": 2.2660017994520938e-07, "loss": 8.6136, "step": 34500 }, { "epoch": 12.48, "learning_rate": 2.2624669994476368e-07, "loss": 8.7001, "step": 34520 }, { "epoch": 12.49, "learning_rate": 2.2589326785731986e-07, "loss": 8.6366, "step": 34540 }, { "epoch": 12.5, "learning_rate": 2.2553988439578914e-07, "loss": 8.6689, "step": 34560 }, { "epoch": 12.51, "learning_rate": 2.2518655027298464e-07, "loss": 8.5895, "step": 34580 }, { "epoch": 12.51, "learning_rate": 2.2483326620161975e-07, "loss": 8.5908, "step": 34600 }, { "epoch": 12.52, "learning_rate": 2.2448003289430712e-07, "loss": 8.6957, "step": 34620 }, { "epoch": 12.53, "learning_rate": 2.2412685106355693e-07, "loss": 8.6366, "step": 34640 }, { "epoch": 12.54, "learning_rate": 2.237737214217755e-07, "loss": 8.7087, "step": 34660 }, { "epoch": 12.54, "learning_rate": 2.2342064468126395e-07, "loss": 8.6485, "step": 34680 }, { "epoch": 12.55, "learning_rate": 2.2306762155421662e-07, "loss": 8.6545, "step": 34700 }, { "epoch": 12.56, "learning_rate": 2.2271465275271983e-07, "loss": 8.7019, "step": 34720 }, { "epoch": 12.56, "learning_rate": 2.2236173898875002e-07, "loss": 8.69, "step": 34740 }, { "epoch": 12.57, "learning_rate": 2.2200888097417302e-07, "loss": 8.638, "step": 34760 }, { "epoch": 12.58, "learning_rate": 2.216560794207419e-07, "loss": 8.7119, "step": 34780 }, { "epoch": 12.59, "learning_rate": 2.21303335040096e-07, "loss": 8.7205, "step": 34800 }, { "epoch": 12.59, "learning_rate": 2.2095064854375928e-07, "loss": 8.7169, "step": 34820 }, { "epoch": 12.6, "learning_rate": 2.2059802064313882e-07, "loss": 8.5366, "step": 34840 }, { "epoch": 12.61, "learning_rate": 2.2024545204952382e-07, "loss": 8.7096, "step": 34860 }, { "epoch": 12.61, "learning_rate": 2.1989294347408347e-07, "loss": 8.5825, "step": 34880 }, { "epoch": 12.62, "learning_rate": 2.195404956278663e-07, "loss": 8.6663, "step": 34900 }, { "epoch": 12.63, "learning_rate": 2.1918810922179803e-07, "loss": 8.6597, "step": 34920 }, { "epoch": 12.64, "learning_rate": 2.1883578496668043e-07, "loss": 8.586, "step": 34940 }, { "epoch": 12.64, "learning_rate": 2.1848352357319022e-07, "loss": 8.7007, "step": 34960 }, { "epoch": 12.65, "learning_rate": 2.1813132575187697e-07, "loss": 8.7152, "step": 34980 }, { "epoch": 12.66, "learning_rate": 2.1777919221316232e-07, "loss": 8.7078, "step": 35000 }, { "epoch": 12.67, "learning_rate": 2.1742712366733803e-07, "loss": 8.7375, "step": 35020 }, { "epoch": 12.67, "learning_rate": 2.1707512082456473e-07, "loss": 8.7171, "step": 35040 }, { "epoch": 12.68, "learning_rate": 2.167231843948708e-07, "loss": 8.6852, "step": 35060 }, { "epoch": 12.69, "learning_rate": 2.1637131508815027e-07, "loss": 8.6224, "step": 35080 }, { "epoch": 12.69, "learning_rate": 2.1601951361416223e-07, "loss": 8.7685, "step": 35100 }, { "epoch": 12.7, "learning_rate": 2.1566778068252858e-07, "loss": 8.7018, "step": 35120 }, { "epoch": 12.71, "learning_rate": 2.1531611700273295e-07, "loss": 8.7495, "step": 35140 }, { "epoch": 12.72, "learning_rate": 2.1496452328411964e-07, "loss": 8.7644, "step": 35160 }, { "epoch": 12.72, "learning_rate": 2.146130002358914e-07, "loss": 8.674, "step": 35180 }, { "epoch": 12.73, "learning_rate": 2.142615485671088e-07, "loss": 8.7433, "step": 35200 }, { "epoch": 12.74, "learning_rate": 2.139101689866881e-07, "loss": 8.615, "step": 35220 }, { "epoch": 12.75, "learning_rate": 2.135588622034005e-07, "loss": 8.642, "step": 35240 }, { "epoch": 12.75, "learning_rate": 2.1320762892587008e-07, "loss": 8.8543, "step": 35260 }, { "epoch": 12.76, "learning_rate": 2.128564698625726e-07, "loss": 8.562, "step": 35280 }, { "epoch": 12.77, "learning_rate": 2.1250538572183457e-07, "loss": 8.7055, "step": 35300 }, { "epoch": 12.77, "learning_rate": 2.1215437721183074e-07, "loss": 8.6682, "step": 35320 }, { "epoch": 12.78, "learning_rate": 2.1180344504058392e-07, "loss": 8.6209, "step": 35340 }, { "epoch": 12.79, "learning_rate": 2.1145258991596245e-07, "loss": 8.6196, "step": 35360 }, { "epoch": 12.8, "learning_rate": 2.1110181254567955e-07, "loss": 8.6175, "step": 35380 }, { "epoch": 12.8, "learning_rate": 2.1075111363729154e-07, "loss": 8.813, "step": 35400 }, { "epoch": 12.81, "learning_rate": 2.1040049389819624e-07, "loss": 8.5952, "step": 35420 }, { "epoch": 12.82, "learning_rate": 2.1004995403563224e-07, "loss": 8.7259, "step": 35440 }, { "epoch": 12.82, "learning_rate": 2.096994947566766e-07, "loss": 8.7216, "step": 35460 }, { "epoch": 12.83, "learning_rate": 2.0934911676824403e-07, "loss": 8.7122, "step": 35480 }, { "epoch": 12.84, "learning_rate": 2.089988207770852e-07, "loss": 8.614, "step": 35500 }, { "epoch": 12.85, "learning_rate": 2.086486074897854e-07, "loss": 8.6394, "step": 35520 }, { "epoch": 12.85, "learning_rate": 2.0829847761276316e-07, "loss": 8.6364, "step": 35540 }, { "epoch": 12.86, "learning_rate": 2.0794843185226865e-07, "loss": 8.7904, "step": 35560 }, { "epoch": 12.87, "learning_rate": 2.0759847091438238e-07, "loss": 8.6759, "step": 35580 }, { "epoch": 12.88, "learning_rate": 2.0724859550501393e-07, "loss": 8.5717, "step": 35600 }, { "epoch": 12.88, "learning_rate": 2.0689880632990008e-07, "loss": 8.7717, "step": 35620 }, { "epoch": 12.89, "learning_rate": 2.0654910409460396e-07, "loss": 8.6799, "step": 35640 }, { "epoch": 12.9, "learning_rate": 2.0619948950451316e-07, "loss": 8.6777, "step": 35660 }, { "epoch": 12.9, "learning_rate": 2.0584996326483838e-07, "loss": 8.6712, "step": 35680 }, { "epoch": 12.91, "learning_rate": 2.0550052608061248e-07, "loss": 8.6452, "step": 35700 }, { "epoch": 12.92, "learning_rate": 2.0515117865668815e-07, "loss": 8.6933, "step": 35720 }, { "epoch": 12.93, "learning_rate": 2.0480192169773763e-07, "loss": 8.7019, "step": 35740 }, { "epoch": 12.93, "learning_rate": 2.0445275590825024e-07, "loss": 8.7944, "step": 35760 }, { "epoch": 12.94, "learning_rate": 2.0410368199253142e-07, "loss": 8.7143, "step": 35780 }, { "epoch": 12.95, "learning_rate": 2.0375470065470158e-07, "loss": 8.6063, "step": 35800 }, { "epoch": 12.95, "learning_rate": 2.0340581259869405e-07, "loss": 8.6562, "step": 35820 }, { "epoch": 12.96, "learning_rate": 2.0305701852825438e-07, "loss": 8.6519, "step": 35840 }, { "epoch": 12.97, "learning_rate": 2.027083191469381e-07, "loss": 8.703, "step": 35860 }, { "epoch": 12.98, "learning_rate": 2.0235971515811013e-07, "loss": 8.6644, "step": 35880 }, { "epoch": 12.98, "learning_rate": 2.0201120726494278e-07, "loss": 8.751, "step": 35900 }, { "epoch": 12.99, "learning_rate": 2.016627961704144e-07, "loss": 8.6513, "step": 35920 }, { "epoch": 13.0, "learning_rate": 2.013144825773085e-07, "loss": 8.5942, "step": 35940 }, { "epoch": 13.01, "learning_rate": 2.0096626718821143e-07, "loss": 8.739, "step": 35960 }, { "epoch": 13.01, "learning_rate": 2.0061815070551184e-07, "loss": 8.6871, "step": 35980 }, { "epoch": 13.02, "learning_rate": 2.002701338313987e-07, "loss": 8.6548, "step": 36000 }, { "epoch": 13.03, "learning_rate": 1.9992221726785988e-07, "loss": 8.7896, "step": 36020 }, { "epoch": 13.03, "learning_rate": 1.995744017166814e-07, "loss": 8.5878, "step": 36040 }, { "epoch": 13.04, "learning_rate": 1.9922668787944497e-07, "loss": 8.6677, "step": 36060 }, { "epoch": 13.05, "learning_rate": 1.9887907645752765e-07, "loss": 8.5799, "step": 36080 }, { "epoch": 13.06, "learning_rate": 1.9853156815209955e-07, "loss": 8.6493, "step": 36100 }, { "epoch": 13.06, "learning_rate": 1.9818416366412275e-07, "loss": 8.7074, "step": 36120 }, { "epoch": 13.07, "learning_rate": 1.9783686369435031e-07, "loss": 8.6966, "step": 36140 }, { "epoch": 13.08, "learning_rate": 1.9748966894332404e-07, "loss": 8.623, "step": 36160 }, { "epoch": 13.08, "learning_rate": 1.9714258011137384e-07, "loss": 8.6557, "step": 36180 }, { "epoch": 13.09, "learning_rate": 1.9679559789861575e-07, "loss": 8.6353, "step": 36200 }, { "epoch": 13.1, "learning_rate": 1.9644872300495068e-07, "loss": 8.5737, "step": 36220 }, { "epoch": 13.11, "learning_rate": 1.9610195613006343e-07, "loss": 8.7244, "step": 36240 }, { "epoch": 13.11, "learning_rate": 1.9575529797342047e-07, "loss": 8.6988, "step": 36260 }, { "epoch": 13.12, "learning_rate": 1.954087492342694e-07, "loss": 8.6604, "step": 36280 }, { "epoch": 13.13, "learning_rate": 1.950623106116367e-07, "loss": 8.614, "step": 36300 }, { "epoch": 13.14, "learning_rate": 1.9471598280432705e-07, "loss": 8.7385, "step": 36320 }, { "epoch": 13.14, "learning_rate": 1.9436976651092142e-07, "loss": 8.6884, "step": 36340 }, { "epoch": 13.15, "learning_rate": 1.9402366242977592e-07, "loss": 8.6888, "step": 36360 }, { "epoch": 13.16, "learning_rate": 1.936776712590203e-07, "loss": 8.7554, "step": 36380 }, { "epoch": 13.16, "learning_rate": 1.933317936965566e-07, "loss": 8.7202, "step": 36400 }, { "epoch": 13.17, "learning_rate": 1.9298603044005774e-07, "loss": 8.7588, "step": 36420 }, { "epoch": 13.18, "learning_rate": 1.9264038218696576e-07, "loss": 8.6336, "step": 36440 }, { "epoch": 13.19, "learning_rate": 1.9229484963449112e-07, "loss": 8.6737, "step": 36460 }, { "epoch": 13.19, "learning_rate": 1.919494334796107e-07, "loss": 8.7338, "step": 36480 }, { "epoch": 13.2, "learning_rate": 1.9160413441906665e-07, "loss": 8.6593, "step": 36500 }, { "epoch": 13.21, "learning_rate": 1.9125895314936488e-07, "loss": 8.6246, "step": 36520 }, { "epoch": 13.22, "learning_rate": 1.909138903667738e-07, "loss": 8.6588, "step": 36540 }, { "epoch": 13.22, "learning_rate": 1.905689467673226e-07, "loss": 8.655, "step": 36560 }, { "epoch": 13.23, "learning_rate": 1.9022412304680042e-07, "loss": 8.6502, "step": 36580 }, { "epoch": 13.24, "learning_rate": 1.8987941990075415e-07, "loss": 8.6792, "step": 36600 }, { "epoch": 13.24, "learning_rate": 1.8953483802448796e-07, "loss": 8.6347, "step": 36620 }, { "epoch": 13.25, "learning_rate": 1.8919037811306104e-07, "loss": 8.64, "step": 36640 }, { "epoch": 13.26, "learning_rate": 1.8884604086128654e-07, "loss": 8.681, "step": 36660 }, { "epoch": 13.27, "learning_rate": 1.885018269637305e-07, "loss": 8.6094, "step": 36680 }, { "epoch": 13.27, "learning_rate": 1.8815773711470987e-07, "loss": 8.6675, "step": 36700 }, { "epoch": 13.28, "learning_rate": 1.8781377200829156e-07, "loss": 8.6894, "step": 36720 }, { "epoch": 13.29, "learning_rate": 1.8746993233829079e-07, "loss": 8.6208, "step": 36740 }, { "epoch": 13.29, "learning_rate": 1.8712621879826955e-07, "loss": 8.6134, "step": 36760 }, { "epoch": 13.3, "learning_rate": 1.867826320815359e-07, "loss": 8.7218, "step": 36780 }, { "epoch": 13.31, "learning_rate": 1.8643917288114146e-07, "loss": 8.8159, "step": 36800 }, { "epoch": 13.32, "learning_rate": 1.8609584188988133e-07, "loss": 8.7762, "step": 36820 }, { "epoch": 13.32, "learning_rate": 1.8575263980029147e-07, "loss": 8.7979, "step": 36840 }, { "epoch": 13.33, "learning_rate": 1.8540956730464785e-07, "loss": 8.6362, "step": 36860 }, { "epoch": 13.34, "learning_rate": 1.8506662509496546e-07, "loss": 8.7675, "step": 36880 }, { "epoch": 13.35, "learning_rate": 1.8472381386299596e-07, "loss": 8.6792, "step": 36900 }, { "epoch": 13.35, "learning_rate": 1.8438113430022733e-07, "loss": 8.6449, "step": 36920 }, { "epoch": 13.36, "learning_rate": 1.840385870978815e-07, "loss": 8.6976, "step": 36940 }, { "epoch": 13.37, "learning_rate": 1.8369617294691358e-07, "loss": 8.7641, "step": 36960 }, { "epoch": 13.37, "learning_rate": 1.8335389253801055e-07, "loss": 8.7507, "step": 36980 }, { "epoch": 13.38, "learning_rate": 1.8301174656158912e-07, "loss": 8.6479, "step": 37000 }, { "epoch": 13.39, "learning_rate": 1.826697357077954e-07, "loss": 8.6015, "step": 37020 }, { "epoch": 13.4, "learning_rate": 1.823278606665024e-07, "loss": 8.74, "step": 37040 }, { "epoch": 13.4, "learning_rate": 1.8198612212730963e-07, "loss": 8.6362, "step": 37060 }, { "epoch": 13.41, "learning_rate": 1.81644520779541e-07, "loss": 8.5745, "step": 37080 }, { "epoch": 13.42, "learning_rate": 1.8130305731224365e-07, "loss": 8.7794, "step": 37100 }, { "epoch": 13.42, "learning_rate": 1.8096173241418695e-07, "loss": 8.5795, "step": 37120 }, { "epoch": 13.43, "learning_rate": 1.8062054677386021e-07, "loss": 8.6263, "step": 37140 }, { "epoch": 13.44, "learning_rate": 1.8027950107947246e-07, "loss": 8.7687, "step": 37160 }, { "epoch": 13.45, "learning_rate": 1.7993859601894992e-07, "loss": 8.6807, "step": 37180 }, { "epoch": 13.45, "learning_rate": 1.795978322799354e-07, "loss": 8.7346, "step": 37200 }, { "epoch": 13.46, "learning_rate": 1.7925721054978674e-07, "loss": 8.6137, "step": 37220 }, { "epoch": 13.47, "learning_rate": 1.7891673151557492e-07, "loss": 8.7427, "step": 37240 }, { "epoch": 13.48, "learning_rate": 1.7857639586408364e-07, "loss": 8.6359, "step": 37260 }, { "epoch": 13.48, "learning_rate": 1.7823620428180692e-07, "loss": 8.7831, "step": 37280 }, { "epoch": 13.49, "learning_rate": 1.778961574549484e-07, "loss": 8.6557, "step": 37300 }, { "epoch": 13.5, "learning_rate": 1.775562560694197e-07, "loss": 8.645, "step": 37320 }, { "epoch": 13.5, "learning_rate": 1.7721650081083905e-07, "loss": 8.6701, "step": 37340 }, { "epoch": 13.51, "learning_rate": 1.7687689236452995e-07, "loss": 8.7074, "step": 37360 }, { "epoch": 13.52, "learning_rate": 1.765374314155198e-07, "loss": 8.7768, "step": 37380 }, { "epoch": 13.53, "learning_rate": 1.7619811864853827e-07, "loss": 8.5979, "step": 37400 }, { "epoch": 13.53, "learning_rate": 1.758589547480165e-07, "loss": 8.6545, "step": 37420 }, { "epoch": 13.54, "learning_rate": 1.75519940398085e-07, "loss": 8.6859, "step": 37440 }, { "epoch": 13.55, "learning_rate": 1.7518107628257273e-07, "loss": 8.6216, "step": 37460 }, { "epoch": 13.56, "learning_rate": 1.748423630850058e-07, "loss": 8.8257, "step": 37480 }, { "epoch": 13.56, "learning_rate": 1.7450380148860556e-07, "loss": 8.6443, "step": 37500 }, { "epoch": 13.57, "learning_rate": 1.741653921762879e-07, "loss": 8.5728, "step": 37520 }, { "epoch": 13.58, "learning_rate": 1.7382713583066125e-07, "loss": 8.6605, "step": 37540 }, { "epoch": 13.58, "learning_rate": 1.734890331340258e-07, "loss": 8.6944, "step": 37560 }, { "epoch": 13.59, "learning_rate": 1.731510847683714e-07, "loss": 8.6816, "step": 37580 }, { "epoch": 13.6, "learning_rate": 1.728132914153771e-07, "loss": 8.7527, "step": 37600 }, { "epoch": 13.61, "learning_rate": 1.7247565375640881e-07, "loss": 8.9439, "step": 37620 }, { "epoch": 13.61, "learning_rate": 1.7213817247251862e-07, "loss": 8.6915, "step": 37640 }, { "epoch": 13.62, "learning_rate": 1.7180084824444325e-07, "loss": 8.6954, "step": 37660 }, { "epoch": 13.63, "learning_rate": 1.7146368175260234e-07, "loss": 8.6402, "step": 37680 }, { "epoch": 13.63, "learning_rate": 1.711266736770978e-07, "loss": 8.7081, "step": 37700 }, { "epoch": 13.64, "learning_rate": 1.7078982469771163e-07, "loss": 8.5867, "step": 37720 }, { "epoch": 13.65, "learning_rate": 1.704531354939049e-07, "loss": 8.6839, "step": 37740 }, { "epoch": 13.66, "learning_rate": 1.7011660674481676e-07, "loss": 8.7227, "step": 37760 }, { "epoch": 13.66, "learning_rate": 1.6978023912926225e-07, "loss": 8.5406, "step": 37780 }, { "epoch": 13.67, "learning_rate": 1.6944403332573185e-07, "loss": 8.6903, "step": 37800 }, { "epoch": 13.68, "learning_rate": 1.6910799001238923e-07, "loss": 8.6448, "step": 37820 }, { "epoch": 13.69, "learning_rate": 1.6877210986707046e-07, "loss": 8.6563, "step": 37840 }, { "epoch": 13.69, "learning_rate": 1.6843639356728267e-07, "loss": 8.7411, "step": 37860 }, { "epoch": 13.7, "learning_rate": 1.6810084179020208e-07, "loss": 8.644, "step": 37880 }, { "epoch": 13.71, "learning_rate": 1.6776545521267354e-07, "loss": 8.6785, "step": 37900 }, { "epoch": 13.71, "learning_rate": 1.674302345112083e-07, "loss": 8.6754, "step": 37920 }, { "epoch": 13.72, "learning_rate": 1.6709518036198307e-07, "loss": 8.6037, "step": 37940 }, { "epoch": 13.73, "learning_rate": 1.6676029344083885e-07, "loss": 8.6496, "step": 37960 }, { "epoch": 13.74, "learning_rate": 1.66425574423279e-07, "loss": 8.5986, "step": 37980 }, { "epoch": 13.74, "learning_rate": 1.6609102398446852e-07, "loss": 8.6326, "step": 38000 }, { "epoch": 13.75, "learning_rate": 1.6575664279923212e-07, "loss": 8.8621, "step": 38020 }, { "epoch": 13.76, "learning_rate": 1.654224315420531e-07, "loss": 8.5913, "step": 38040 }, { "epoch": 13.76, "learning_rate": 1.650883908870723e-07, "loss": 8.6037, "step": 38060 }, { "epoch": 13.77, "learning_rate": 1.6475452150808597e-07, "loss": 8.631, "step": 38080 }, { "epoch": 13.78, "learning_rate": 1.6442082407854538e-07, "loss": 8.6065, "step": 38100 }, { "epoch": 13.79, "learning_rate": 1.6408729927155453e-07, "loss": 8.7009, "step": 38120 }, { "epoch": 13.79, "learning_rate": 1.6375394775986952e-07, "loss": 8.6739, "step": 38140 }, { "epoch": 13.8, "learning_rate": 1.6342077021589669e-07, "loss": 8.5858, "step": 38160 }, { "epoch": 13.81, "learning_rate": 1.6308776731169154e-07, "loss": 8.9962, "step": 38180 }, { "epoch": 13.82, "learning_rate": 1.6275493971895743e-07, "loss": 8.6618, "step": 38200 }, { "epoch": 13.82, "learning_rate": 1.624222881090439e-07, "loss": 8.7069, "step": 38220 }, { "epoch": 13.83, "learning_rate": 1.620898131529457e-07, "loss": 8.7122, "step": 38240 }, { "epoch": 13.84, "learning_rate": 1.61757515521301e-07, "loss": 8.6128, "step": 38260 }, { "epoch": 13.84, "learning_rate": 1.6142539588439052e-07, "loss": 8.9103, "step": 38280 }, { "epoch": 13.85, "learning_rate": 1.6109345491213585e-07, "loss": 8.6976, "step": 38300 }, { "epoch": 13.86, "learning_rate": 1.607616932740982e-07, "loss": 8.6669, "step": 38320 }, { "epoch": 13.87, "learning_rate": 1.6043011163947707e-07, "loss": 8.6298, "step": 38340 }, { "epoch": 13.87, "learning_rate": 1.600987106771089e-07, "loss": 8.6765, "step": 38360 }, { "epoch": 13.88, "learning_rate": 1.5976749105546545e-07, "loss": 8.7179, "step": 38380 }, { "epoch": 13.89, "learning_rate": 1.5943645344265312e-07, "loss": 8.7319, "step": 38400 }, { "epoch": 13.9, "learning_rate": 1.5910559850641076e-07, "loss": 8.6038, "step": 38420 }, { "epoch": 13.9, "learning_rate": 1.5877492691410913e-07, "loss": 8.6624, "step": 38440 }, { "epoch": 13.91, "learning_rate": 1.5844443933274886e-07, "loss": 8.6136, "step": 38460 }, { "epoch": 13.92, "learning_rate": 1.5811413642895943e-07, "loss": 8.8174, "step": 38480 }, { "epoch": 13.92, "learning_rate": 1.5778401886899806e-07, "loss": 8.7944, "step": 38500 }, { "epoch": 13.93, "learning_rate": 1.5745408731874776e-07, "loss": 8.5984, "step": 38520 }, { "epoch": 13.94, "learning_rate": 1.5712434244371675e-07, "loss": 8.8729, "step": 38540 }, { "epoch": 13.95, "learning_rate": 1.5679478490903635e-07, "loss": 8.6856, "step": 38560 }, { "epoch": 13.95, "learning_rate": 1.5646541537946003e-07, "loss": 8.6329, "step": 38580 }, { "epoch": 13.96, "learning_rate": 1.5613623451936232e-07, "loss": 8.6972, "step": 38600 }, { "epoch": 13.97, "learning_rate": 1.5580724299273677e-07, "loss": 8.7132, "step": 38620 }, { "epoch": 13.97, "learning_rate": 1.5547844146319544e-07, "loss": 8.6481, "step": 38640 }, { "epoch": 13.98, "learning_rate": 1.551498305939669e-07, "loss": 8.6931, "step": 38660 }, { "epoch": 13.99, "learning_rate": 1.5482141104789504e-07, "loss": 8.7225, "step": 38680 }, { "epoch": 14.0, "learning_rate": 1.5449318348743817e-07, "loss": 8.7001, "step": 38700 }, { "epoch": 14.0, "learning_rate": 1.5416514857466695e-07, "loss": 8.6532, "step": 38720 }, { "epoch": 14.01, "learning_rate": 1.5383730697126386e-07, "loss": 8.6646, "step": 38740 }, { "epoch": 14.02, "learning_rate": 1.5350965933852104e-07, "loss": 8.5851, "step": 38760 }, { "epoch": 14.03, "learning_rate": 1.5318220633733975e-07, "loss": 8.7631, "step": 38780 }, { "epoch": 14.03, "learning_rate": 1.528549486282284e-07, "loss": 8.7466, "step": 38800 }, { "epoch": 14.04, "learning_rate": 1.5252788687130143e-07, "loss": 8.7216, "step": 38820 }, { "epoch": 14.05, "learning_rate": 1.5220102172627837e-07, "loss": 8.5501, "step": 38840 }, { "epoch": 14.05, "learning_rate": 1.518743538524817e-07, "loss": 8.6958, "step": 38860 }, { "epoch": 14.06, "learning_rate": 1.515478839088365e-07, "loss": 8.6093, "step": 38880 }, { "epoch": 14.07, "learning_rate": 1.5122161255386812e-07, "loss": 8.6491, "step": 38900 }, { "epoch": 14.08, "learning_rate": 1.5089554044570149e-07, "loss": 8.6376, "step": 38920 }, { "epoch": 14.08, "learning_rate": 1.5056966824205988e-07, "loss": 8.729, "step": 38940 }, { "epoch": 14.09, "learning_rate": 1.5024399660026294e-07, "loss": 8.7798, "step": 38960 }, { "epoch": 14.1, "learning_rate": 1.4991852617722617e-07, "loss": 8.6727, "step": 38980 }, { "epoch": 14.1, "learning_rate": 1.495932576294588e-07, "loss": 8.6122, "step": 39000 }, { "epoch": 14.11, "learning_rate": 1.4926819161306306e-07, "loss": 8.7075, "step": 39020 }, { "epoch": 14.12, "learning_rate": 1.4894332878373276e-07, "loss": 8.7238, "step": 39040 }, { "epoch": 14.13, "learning_rate": 1.4861866979675152e-07, "loss": 8.7556, "step": 39060 }, { "epoch": 14.13, "learning_rate": 1.4829421530699222e-07, "loss": 8.7966, "step": 39080 }, { "epoch": 14.14, "learning_rate": 1.4796996596891487e-07, "loss": 8.7137, "step": 39100 }, { "epoch": 14.15, "learning_rate": 1.4764592243656582e-07, "loss": 8.6169, "step": 39120 }, { "epoch": 14.16, "learning_rate": 1.4732208536357636e-07, "loss": 8.6715, "step": 39140 }, { "epoch": 14.16, "learning_rate": 1.4699845540316123e-07, "loss": 8.6655, "step": 39160 }, { "epoch": 14.17, "learning_rate": 1.466750332081174e-07, "loss": 8.6515, "step": 39180 }, { "epoch": 14.18, "learning_rate": 1.4635181943082284e-07, "loss": 8.6899, "step": 39200 }, { "epoch": 14.18, "learning_rate": 1.4602881472323498e-07, "loss": 8.731, "step": 39220 }, { "epoch": 14.19, "learning_rate": 1.4570601973688966e-07, "loss": 8.6511, "step": 39240 }, { "epoch": 14.2, "learning_rate": 1.4538343512289957e-07, "loss": 8.6555, "step": 39260 }, { "epoch": 14.21, "learning_rate": 1.450610615319533e-07, "loss": 8.6992, "step": 39280 }, { "epoch": 14.21, "learning_rate": 1.447388996143134e-07, "loss": 8.6656, "step": 39300 }, { "epoch": 14.22, "learning_rate": 1.4441695001981585e-07, "loss": 8.6907, "step": 39320 }, { "epoch": 14.23, "learning_rate": 1.4409521339786808e-07, "loss": 8.8347, "step": 39340 }, { "epoch": 14.24, "learning_rate": 1.437736903974479e-07, "loss": 8.6904, "step": 39360 }, { "epoch": 14.24, "learning_rate": 1.4345238166710254e-07, "loss": 8.6437, "step": 39380 }, { "epoch": 14.25, "learning_rate": 1.4313128785494667e-07, "loss": 8.6775, "step": 39400 }, { "epoch": 14.26, "learning_rate": 1.4281040960866175e-07, "loss": 8.5845, "step": 39420 }, { "epoch": 14.26, "learning_rate": 1.4248974757549415e-07, "loss": 8.6379, "step": 39440 }, { "epoch": 14.27, "learning_rate": 1.421693024022542e-07, "loss": 8.6482, "step": 39460 }, { "epoch": 14.28, "learning_rate": 1.4184907473531496e-07, "loss": 8.7291, "step": 39480 }, { "epoch": 14.29, "learning_rate": 1.4152906522061047e-07, "loss": 8.7168, "step": 39500 }, { "epoch": 14.29, "learning_rate": 1.412092745036351e-07, "loss": 8.6693, "step": 39520 }, { "epoch": 14.3, "learning_rate": 1.4088970322944145e-07, "loss": 8.6391, "step": 39540 }, { "epoch": 14.31, "learning_rate": 1.405703520426399e-07, "loss": 8.705, "step": 39560 }, { "epoch": 14.31, "learning_rate": 1.402512215873965e-07, "loss": 8.84, "step": 39580 }, { "epoch": 14.32, "learning_rate": 1.3993231250743243e-07, "loss": 8.7896, "step": 39600 }, { "epoch": 14.33, "learning_rate": 1.3961362544602212e-07, "loss": 8.599, "step": 39620 }, { "epoch": 14.34, "learning_rate": 1.3929516104599202e-07, "loss": 8.7522, "step": 39640 }, { "epoch": 14.34, "learning_rate": 1.3897691994971985e-07, "loss": 8.6363, "step": 39660 }, { "epoch": 14.35, "learning_rate": 1.386589027991325e-07, "loss": 8.6951, "step": 39680 }, { "epoch": 14.36, "learning_rate": 1.3834111023570556e-07, "loss": 8.8011, "step": 39700 }, { "epoch": 14.37, "learning_rate": 1.3802354290046103e-07, "loss": 8.5725, "step": 39720 }, { "epoch": 14.37, "learning_rate": 1.377062014339672e-07, "loss": 8.7008, "step": 39740 }, { "epoch": 14.38, "learning_rate": 1.3738908647633634e-07, "loss": 8.6939, "step": 39760 }, { "epoch": 14.39, "learning_rate": 1.3707219866722408e-07, "loss": 8.6877, "step": 39780 }, { "epoch": 14.39, "learning_rate": 1.367555386458276e-07, "loss": 8.634, "step": 39800 }, { "epoch": 14.4, "learning_rate": 1.3643910705088501e-07, "loss": 8.719, "step": 39820 }, { "epoch": 14.41, "learning_rate": 1.3612290452067322e-07, "loss": 8.6471, "step": 39840 }, { "epoch": 14.42, "learning_rate": 1.3580693169300727e-07, "loss": 8.6649, "step": 39860 }, { "epoch": 14.42, "learning_rate": 1.3549118920523905e-07, "loss": 8.7158, "step": 39880 }, { "epoch": 14.43, "learning_rate": 1.3517567769425548e-07, "loss": 8.6799, "step": 39900 }, { "epoch": 14.44, "learning_rate": 1.3486039779647793e-07, "loss": 8.6747, "step": 39920 }, { "epoch": 14.44, "learning_rate": 1.3454535014786023e-07, "loss": 8.6028, "step": 39940 }, { "epoch": 14.45, "learning_rate": 1.3423053538388808e-07, "loss": 8.6774, "step": 39960 }, { "epoch": 14.46, "learning_rate": 1.3391595413957717e-07, "loss": 8.6907, "step": 39980 }, { "epoch": 14.47, "learning_rate": 1.3360160704947221e-07, "loss": 8.6227, "step": 40000 }, { "epoch": 14.47, "learning_rate": 1.3328749474764577e-07, "loss": 8.6221, "step": 40020 }, { "epoch": 14.48, "learning_rate": 1.329736178676965e-07, "loss": 8.6608, "step": 40040 }, { "epoch": 14.49, "learning_rate": 1.3265997704274856e-07, "loss": 8.7676, "step": 40060 }, { "epoch": 14.5, "learning_rate": 1.323465729054497e-07, "loss": 8.6608, "step": 40080 }, { "epoch": 14.5, "learning_rate": 1.3203340608797016e-07, "loss": 8.7215, "step": 40100 }, { "epoch": 14.51, "learning_rate": 1.317204772220019e-07, "loss": 8.6288, "step": 40120 }, { "epoch": 14.52, "learning_rate": 1.3140778693875637e-07, "loss": 8.5937, "step": 40140 }, { "epoch": 14.52, "learning_rate": 1.3109533586896432e-07, "loss": 8.6781, "step": 40160 }, { "epoch": 14.53, "learning_rate": 1.3078312464287354e-07, "loss": 8.6522, "step": 40180 }, { "epoch": 14.54, "learning_rate": 1.304711538902481e-07, "loss": 8.5842, "step": 40200 }, { "epoch": 14.55, "learning_rate": 1.301594242403673e-07, "loss": 8.695, "step": 40220 }, { "epoch": 14.55, "learning_rate": 1.2984793632202373e-07, "loss": 8.6941, "step": 40240 }, { "epoch": 14.56, "learning_rate": 1.2953669076352274e-07, "loss": 8.6796, "step": 40260 }, { "epoch": 14.57, "learning_rate": 1.2922568819268054e-07, "loss": 8.7716, "step": 40280 }, { "epoch": 14.58, "learning_rate": 1.2891492923682323e-07, "loss": 8.6689, "step": 40300 }, { "epoch": 14.58, "learning_rate": 1.2860441452278574e-07, "loss": 8.4937, "step": 40320 }, { "epoch": 14.59, "learning_rate": 1.2829414467691e-07, "loss": 8.695, "step": 40340 }, { "epoch": 14.6, "learning_rate": 1.2798412032504437e-07, "loss": 8.6764, "step": 40360 }, { "epoch": 14.6, "learning_rate": 1.276743420925418e-07, "loss": 8.5718, "step": 40380 }, { "epoch": 14.61, "learning_rate": 1.273648106042587e-07, "loss": 8.7333, "step": 40400 }, { "epoch": 14.62, "learning_rate": 1.270555264845541e-07, "loss": 8.6268, "step": 40420 }, { "epoch": 14.63, "learning_rate": 1.2674649035728768e-07, "loss": 8.8738, "step": 40440 }, { "epoch": 14.63, "learning_rate": 1.264377028458193e-07, "loss": 8.6499, "step": 40460 }, { "epoch": 14.64, "learning_rate": 1.2612916457300687e-07, "loss": 8.667, "step": 40480 }, { "epoch": 14.65, "learning_rate": 1.2582087616120607e-07, "loss": 8.816, "step": 40500 }, { "epoch": 14.65, "learning_rate": 1.2551283823226812e-07, "loss": 8.6488, "step": 40520 }, { "epoch": 14.66, "learning_rate": 1.2520505140753917e-07, "loss": 8.6484, "step": 40540 }, { "epoch": 14.67, "learning_rate": 1.2489751630785905e-07, "loss": 8.6768, "step": 40560 }, { "epoch": 14.68, "learning_rate": 1.2459023355355946e-07, "loss": 8.7502, "step": 40580 }, { "epoch": 14.68, "learning_rate": 1.2428320376446348e-07, "loss": 8.6466, "step": 40600 }, { "epoch": 14.69, "learning_rate": 1.2397642755988368e-07, "loss": 8.6937, "step": 40620 }, { "epoch": 14.7, "learning_rate": 1.2366990555862106e-07, "loss": 8.7133, "step": 40640 }, { "epoch": 14.71, "learning_rate": 1.233636383789642e-07, "loss": 8.6059, "step": 40660 }, { "epoch": 14.71, "learning_rate": 1.2305762663868728e-07, "loss": 8.5764, "step": 40680 }, { "epoch": 14.72, "learning_rate": 1.2275187095504962e-07, "loss": 8.8081, "step": 40700 }, { "epoch": 14.73, "learning_rate": 1.2244637194479376e-07, "loss": 8.6386, "step": 40720 }, { "epoch": 14.73, "learning_rate": 1.2214113022414446e-07, "loss": 8.762, "step": 40740 }, { "epoch": 14.74, "learning_rate": 1.2183614640880783e-07, "loss": 8.6669, "step": 40760 }, { "epoch": 14.75, "learning_rate": 1.2153142111396937e-07, "loss": 8.6712, "step": 40780 }, { "epoch": 14.76, "learning_rate": 1.2122695495429347e-07, "loss": 8.719, "step": 40800 }, { "epoch": 14.76, "learning_rate": 1.2092274854392156e-07, "loss": 8.7279, "step": 40820 }, { "epoch": 14.77, "learning_rate": 1.2061880249647113e-07, "loss": 8.6048, "step": 40840 }, { "epoch": 14.78, "learning_rate": 1.2031511742503478e-07, "loss": 8.7292, "step": 40860 }, { "epoch": 14.78, "learning_rate": 1.2001169394217825e-07, "loss": 8.6321, "step": 40880 }, { "epoch": 14.79, "learning_rate": 1.1970853265994007e-07, "loss": 8.6249, "step": 40900 }, { "epoch": 14.8, "learning_rate": 1.1940563418982959e-07, "loss": 8.5311, "step": 40920 }, { "epoch": 14.81, "learning_rate": 1.1910299914282601e-07, "loss": 8.8256, "step": 40940 }, { "epoch": 14.81, "learning_rate": 1.1880062812937753e-07, "loss": 8.9261, "step": 40960 }, { "epoch": 14.82, "learning_rate": 1.1849852175939928e-07, "loss": 8.6666, "step": 40980 }, { "epoch": 14.83, "learning_rate": 1.1819668064227303e-07, "loss": 8.6694, "step": 41000 }, { "epoch": 14.84, "learning_rate": 1.1789510538684522e-07, "loss": 8.7188, "step": 41020 }, { "epoch": 14.84, "learning_rate": 1.1759379660142597e-07, "loss": 8.6856, "step": 41040 }, { "epoch": 14.85, "learning_rate": 1.1729275489378826e-07, "loss": 8.5903, "step": 41060 }, { "epoch": 14.86, "learning_rate": 1.1699198087116588e-07, "loss": 8.6546, "step": 41080 }, { "epoch": 14.86, "learning_rate": 1.166914751402531e-07, "loss": 8.6771, "step": 41100 }, { "epoch": 14.87, "learning_rate": 1.1639123830720265e-07, "loss": 8.7304, "step": 41120 }, { "epoch": 14.88, "learning_rate": 1.160912709776252e-07, "loss": 8.707, "step": 41140 }, { "epoch": 14.89, "learning_rate": 1.1579157375658755e-07, "loss": 8.6712, "step": 41160 }, { "epoch": 14.89, "learning_rate": 1.1549214724861168e-07, "loss": 8.7817, "step": 41180 }, { "epoch": 14.9, "learning_rate": 1.151929920576737e-07, "loss": 8.7075, "step": 41200 }, { "epoch": 14.91, "learning_rate": 1.1489410878720216e-07, "loss": 8.7929, "step": 41220 }, { "epoch": 14.92, "learning_rate": 1.1459549804007748e-07, "loss": 8.6006, "step": 41240 }, { "epoch": 14.92, "learning_rate": 1.1429716041863008e-07, "loss": 8.7001, "step": 41260 }, { "epoch": 14.93, "learning_rate": 1.1399909652463943e-07, "loss": 8.6809, "step": 41280 }, { "epoch": 14.94, "learning_rate": 1.1370130695933316e-07, "loss": 8.6832, "step": 41300 }, { "epoch": 14.94, "learning_rate": 1.134037923233852e-07, "loss": 8.5958, "step": 41320 }, { "epoch": 14.95, "learning_rate": 1.1310655321691525e-07, "loss": 8.754, "step": 41340 }, { "epoch": 14.96, "learning_rate": 1.1280959023948692e-07, "loss": 8.7372, "step": 41360 }, { "epoch": 14.97, "learning_rate": 1.1251290399010712e-07, "loss": 8.6279, "step": 41380 }, { "epoch": 14.97, "learning_rate": 1.122164950672243e-07, "loss": 8.6381, "step": 41400 }, { "epoch": 14.98, "learning_rate": 1.1192036406872781e-07, "loss": 8.6657, "step": 41420 }, { "epoch": 14.99, "learning_rate": 1.1162451159194614e-07, "loss": 8.6235, "step": 41440 }, { "epoch": 14.99, "learning_rate": 1.1132893823364594e-07, "loss": 8.684, "step": 41460 }, { "epoch": 15.0, "learning_rate": 1.1103364459003126e-07, "loss": 8.651, "step": 41480 }, { "epoch": 15.01, "learning_rate": 1.1073863125674135e-07, "loss": 8.7139, "step": 41500 }, { "epoch": 15.02, "learning_rate": 1.1044389882885058e-07, "loss": 8.6635, "step": 41520 }, { "epoch": 15.02, "learning_rate": 1.1014944790086631e-07, "loss": 8.6469, "step": 41540 }, { "epoch": 15.03, "learning_rate": 1.0985527906672834e-07, "loss": 8.6706, "step": 41560 }, { "epoch": 15.04, "learning_rate": 1.0956139291980726e-07, "loss": 8.6693, "step": 41580 }, { "epoch": 15.05, "learning_rate": 1.0926779005290365e-07, "loss": 8.6948, "step": 41600 }, { "epoch": 15.05, "learning_rate": 1.0897447105824645e-07, "loss": 8.6314, "step": 41620 }, { "epoch": 15.06, "learning_rate": 1.0868143652749228e-07, "loss": 8.6982, "step": 41640 }, { "epoch": 15.07, "learning_rate": 1.0838868705172377e-07, "loss": 8.6151, "step": 41660 }, { "epoch": 15.07, "learning_rate": 1.0809622322144843e-07, "loss": 8.6785, "step": 41680 }, { "epoch": 15.08, "learning_rate": 1.07804045626598e-07, "loss": 8.8313, "step": 41700 }, { "epoch": 15.09, "learning_rate": 1.0751215485652643e-07, "loss": 8.7213, "step": 41720 }, { "epoch": 15.1, "learning_rate": 1.0722055150000947e-07, "loss": 8.6961, "step": 41740 }, { "epoch": 15.1, "learning_rate": 1.0692923614524279e-07, "loss": 8.73, "step": 41760 }, { "epoch": 15.11, "learning_rate": 1.0663820937984147e-07, "loss": 8.7056, "step": 41780 }, { "epoch": 15.12, "learning_rate": 1.063474717908382e-07, "loss": 8.6131, "step": 41800 }, { "epoch": 15.12, "learning_rate": 1.0605702396468238e-07, "loss": 8.6164, "step": 41820 }, { "epoch": 15.13, "learning_rate": 1.0576686648723923e-07, "loss": 8.6729, "step": 41840 }, { "epoch": 15.14, "learning_rate": 1.0547699994378786e-07, "loss": 8.7336, "step": 41860 }, { "epoch": 15.15, "learning_rate": 1.0518742491902097e-07, "loss": 8.7729, "step": 41880 }, { "epoch": 15.15, "learning_rate": 1.0489814199704292e-07, "loss": 8.7094, "step": 41900 }, { "epoch": 15.16, "learning_rate": 1.0460915176136892e-07, "loss": 8.7918, "step": 41920 }, { "epoch": 15.17, "learning_rate": 1.0432045479492399e-07, "loss": 8.6686, "step": 41940 }, { "epoch": 15.18, "learning_rate": 1.0403205168004132e-07, "loss": 8.5945, "step": 41960 }, { "epoch": 15.18, "learning_rate": 1.0374394299846168e-07, "loss": 8.6095, "step": 41980 }, { "epoch": 15.19, "learning_rate": 1.0345612933133166e-07, "loss": 8.687, "step": 42000 }, { "epoch": 15.2, "learning_rate": 1.0316861125920281e-07, "loss": 8.6749, "step": 42020 }, { "epoch": 15.2, "learning_rate": 1.0288138936203067e-07, "loss": 8.6988, "step": 42040 }, { "epoch": 15.21, "learning_rate": 1.02594464219173e-07, "loss": 8.645, "step": 42060 }, { "epoch": 15.22, "learning_rate": 1.0230783640938936e-07, "loss": 8.6744, "step": 42080 }, { "epoch": 15.23, "learning_rate": 1.0202150651083929e-07, "loss": 8.6319, "step": 42100 }, { "epoch": 15.23, "learning_rate": 1.0173547510108136e-07, "loss": 8.6984, "step": 42120 }, { "epoch": 15.24, "learning_rate": 1.0144974275707241e-07, "loss": 8.6916, "step": 42140 }, { "epoch": 15.25, "learning_rate": 1.0116431005516557e-07, "loss": 8.6657, "step": 42160 }, { "epoch": 15.25, "learning_rate": 1.0087917757111e-07, "loss": 8.6635, "step": 42180 }, { "epoch": 15.26, "learning_rate": 1.0059434588004897e-07, "loss": 8.759, "step": 42200 }, { "epoch": 15.27, "learning_rate": 1.0030981555651908e-07, "loss": 8.6307, "step": 42220 }, { "epoch": 15.28, "learning_rate": 1.0002558717444922e-07, "loss": 8.6877, "step": 42240 }, { "epoch": 15.28, "learning_rate": 9.974166130715894e-08, "loss": 8.6019, "step": 42260 }, { "epoch": 15.29, "learning_rate": 9.945803852735793e-08, "loss": 8.6728, "step": 42280 }, { "epoch": 15.3, "learning_rate": 9.917471940714412e-08, "loss": 8.7323, "step": 42300 }, { "epoch": 15.31, "learning_rate": 9.889170451800332e-08, "loss": 8.6797, "step": 42320 }, { "epoch": 15.31, "learning_rate": 9.860899443080741e-08, "loss": 8.6671, "step": 42340 }, { "epoch": 15.32, "learning_rate": 9.832658971581346e-08, "loss": 8.6717, "step": 42360 }, { "epoch": 15.33, "learning_rate": 9.804449094266279e-08, "loss": 8.6571, "step": 42380 }, { "epoch": 15.33, "learning_rate": 9.776269868037928e-08, "loss": 8.7102, "step": 42400 }, { "epoch": 15.34, "learning_rate": 9.748121349736891e-08, "loss": 8.6032, "step": 42420 }, { "epoch": 15.35, "learning_rate": 9.720003596141796e-08, "loss": 8.6771, "step": 42440 }, { "epoch": 15.36, "learning_rate": 9.691916663969214e-08, "loss": 8.6988, "step": 42460 }, { "epoch": 15.36, "learning_rate": 9.663860609873575e-08, "loss": 8.6573, "step": 42480 }, { "epoch": 15.37, "learning_rate": 9.635835490446992e-08, "loss": 8.7768, "step": 42500 }, { "epoch": 15.38, "learning_rate": 9.607841362219207e-08, "loss": 8.6382, "step": 42520 }, { "epoch": 15.39, "learning_rate": 9.579878281657428e-08, "loss": 8.6746, "step": 42540 }, { "epoch": 15.39, "learning_rate": 9.551946305166233e-08, "loss": 8.7247, "step": 42560 }, { "epoch": 15.4, "learning_rate": 9.524045489087493e-08, "loss": 8.6537, "step": 42580 }, { "epoch": 15.41, "learning_rate": 9.496175889700184e-08, "loss": 8.69, "step": 42600 }, { "epoch": 15.41, "learning_rate": 9.46833756322035e-08, "loss": 8.6793, "step": 42620 }, { "epoch": 15.42, "learning_rate": 9.440530565800927e-08, "loss": 8.6738, "step": 42640 }, { "epoch": 15.43, "learning_rate": 9.412754953531663e-08, "loss": 8.6723, "step": 42660 }, { "epoch": 15.44, "learning_rate": 9.385010782439018e-08, "loss": 8.6072, "step": 42680 }, { "epoch": 15.44, "learning_rate": 9.357298108486003e-08, "loss": 8.6054, "step": 42700 }, { "epoch": 15.45, "learning_rate": 9.329616987572122e-08, "loss": 8.6386, "step": 42720 }, { "epoch": 15.46, "learning_rate": 9.301967475533215e-08, "loss": 8.7196, "step": 42740 }, { "epoch": 15.46, "learning_rate": 9.274349628141359e-08, "loss": 8.7016, "step": 42760 }, { "epoch": 15.47, "learning_rate": 9.246763501104793e-08, "loss": 8.657, "step": 42780 }, { "epoch": 15.48, "learning_rate": 9.219209150067725e-08, "loss": 8.8457, "step": 42800 }, { "epoch": 15.49, "learning_rate": 9.191686630610313e-08, "loss": 8.591, "step": 42820 }, { "epoch": 15.49, "learning_rate": 9.164195998248469e-08, "loss": 8.5781, "step": 42840 }, { "epoch": 15.5, "learning_rate": 9.13673730843382e-08, "loss": 8.5978, "step": 42860 }, { "epoch": 15.51, "learning_rate": 9.109310616553534e-08, "loss": 8.64, "step": 42880 }, { "epoch": 15.52, "learning_rate": 9.08191597793024e-08, "loss": 8.6924, "step": 42900 }, { "epoch": 15.52, "learning_rate": 9.05455344782193e-08, "loss": 8.5971, "step": 42920 }, { "epoch": 15.53, "learning_rate": 9.027223081421806e-08, "loss": 8.6123, "step": 42940 }, { "epoch": 15.54, "learning_rate": 8.999924933858219e-08, "loss": 8.5796, "step": 42960 }, { "epoch": 15.54, "learning_rate": 8.972659060194504e-08, "loss": 8.6404, "step": 42980 }, { "epoch": 15.55, "learning_rate": 8.945425515428904e-08, "loss": 8.7294, "step": 43000 }, { "epoch": 15.56, "learning_rate": 8.918224354494466e-08, "loss": 8.7276, "step": 43020 }, { "epoch": 15.57, "learning_rate": 8.891055632258892e-08, "loss": 8.6615, "step": 43040 }, { "epoch": 15.57, "learning_rate": 8.863919403524478e-08, "loss": 8.6805, "step": 43060 }, { "epoch": 15.58, "learning_rate": 8.836815723027957e-08, "loss": 8.6901, "step": 43080 }, { "epoch": 15.59, "learning_rate": 8.809744645440403e-08, "loss": 8.6524, "step": 43100 }, { "epoch": 15.59, "learning_rate": 8.78270622536716e-08, "loss": 8.9045, "step": 43120 }, { "epoch": 15.6, "learning_rate": 8.75570051734765e-08, "loss": 8.7131, "step": 43140 }, { "epoch": 15.61, "learning_rate": 8.728727575855363e-08, "loss": 8.7249, "step": 43160 }, { "epoch": 15.62, "learning_rate": 8.701787455297646e-08, "loss": 8.6845, "step": 43180 }, { "epoch": 15.62, "learning_rate": 8.67488021001569e-08, "loss": 8.6726, "step": 43200 }, { "epoch": 15.63, "learning_rate": 8.648005894284324e-08, "loss": 8.7113, "step": 43220 }, { "epoch": 15.64, "learning_rate": 8.621164562312003e-08, "loss": 8.6752, "step": 43240 }, { "epoch": 15.65, "learning_rate": 8.594356268240616e-08, "loss": 8.6732, "step": 43260 }, { "epoch": 15.65, "learning_rate": 8.567581066145413e-08, "loss": 8.686, "step": 43280 }, { "epoch": 15.66, "learning_rate": 8.54083901003492e-08, "loss": 8.7233, "step": 43300 }, { "epoch": 15.67, "learning_rate": 8.514130153850768e-08, "loss": 8.7432, "step": 43320 }, { "epoch": 15.67, "learning_rate": 8.487454551467657e-08, "loss": 8.7446, "step": 43340 }, { "epoch": 15.68, "learning_rate": 8.460812256693178e-08, "loss": 8.9778, "step": 43360 }, { "epoch": 15.69, "learning_rate": 8.434203323267764e-08, "loss": 8.7359, "step": 43380 }, { "epoch": 15.7, "learning_rate": 8.407627804864523e-08, "loss": 8.7208, "step": 43400 }, { "epoch": 15.7, "learning_rate": 8.381085755089201e-08, "loss": 8.5855, "step": 43420 }, { "epoch": 15.71, "learning_rate": 8.354577227479995e-08, "loss": 8.6439, "step": 43440 }, { "epoch": 15.72, "learning_rate": 8.328102275507518e-08, "loss": 8.7329, "step": 43460 }, { "epoch": 15.73, "learning_rate": 8.301660952574633e-08, "loss": 8.6073, "step": 43480 }, { "epoch": 15.73, "learning_rate": 8.27525331201637e-08, "loss": 8.7215, "step": 43500 }, { "epoch": 15.74, "learning_rate": 8.24887940709984e-08, "loss": 8.6948, "step": 43520 }, { "epoch": 15.75, "learning_rate": 8.222539291024077e-08, "loss": 8.7347, "step": 43540 }, { "epoch": 15.75, "learning_rate": 8.19623301691999e-08, "loss": 8.6756, "step": 43560 }, { "epoch": 15.76, "learning_rate": 8.169960637850192e-08, "loss": 8.6516, "step": 43580 }, { "epoch": 15.77, "learning_rate": 8.143722206808959e-08, "loss": 8.6615, "step": 43600 }, { "epoch": 15.78, "learning_rate": 8.117517776722066e-08, "loss": 8.6372, "step": 43620 }, { "epoch": 15.78, "learning_rate": 8.091347400446702e-08, "loss": 8.6507, "step": 43640 }, { "epoch": 15.79, "learning_rate": 8.065211130771393e-08, "loss": 8.7741, "step": 43660 }, { "epoch": 15.8, "learning_rate": 8.039109020415838e-08, "loss": 8.5572, "step": 43680 }, { "epoch": 15.8, "learning_rate": 8.013041122030856e-08, "loss": 8.6496, "step": 43700 }, { "epoch": 15.81, "learning_rate": 7.987007488198244e-08, "loss": 8.6822, "step": 43720 }, { "epoch": 15.82, "learning_rate": 7.961008171430677e-08, "loss": 8.723, "step": 43740 }, { "epoch": 15.83, "learning_rate": 7.935043224171631e-08, "loss": 8.6827, "step": 43760 }, { "epoch": 15.83, "learning_rate": 7.909112698795231e-08, "loss": 8.6871, "step": 43780 }, { "epoch": 15.84, "learning_rate": 7.883216647606192e-08, "loss": 8.6921, "step": 43800 }, { "epoch": 15.85, "learning_rate": 7.857355122839673e-08, "loss": 8.7701, "step": 43820 }, { "epoch": 15.86, "learning_rate": 7.831528176661189e-08, "loss": 8.5903, "step": 43840 }, { "epoch": 15.86, "learning_rate": 7.805735861166527e-08, "loss": 8.6362, "step": 43860 }, { "epoch": 15.87, "learning_rate": 7.77997822838159e-08, "loss": 8.6959, "step": 43880 }, { "epoch": 15.88, "learning_rate": 7.754255330262353e-08, "loss": 8.6671, "step": 43900 }, { "epoch": 15.88, "learning_rate": 7.728567218694706e-08, "loss": 8.7909, "step": 43920 }, { "epoch": 15.89, "learning_rate": 7.702913945494368e-08, "loss": 8.6083, "step": 43940 }, { "epoch": 15.9, "learning_rate": 7.677295562406812e-08, "loss": 8.639, "step": 43960 }, { "epoch": 15.91, "learning_rate": 7.651712121107101e-08, "loss": 8.8751, "step": 43980 }, { "epoch": 15.91, "learning_rate": 7.626163673199848e-08, "loss": 8.6571, "step": 44000 }, { "epoch": 15.92, "learning_rate": 7.600650270219044e-08, "loss": 8.8169, "step": 44020 }, { "epoch": 15.93, "learning_rate": 7.57517196362803e-08, "loss": 8.6811, "step": 44040 }, { "epoch": 15.93, "learning_rate": 7.549728804819325e-08, "loss": 8.6199, "step": 44060 }, { "epoch": 15.94, "learning_rate": 7.524320845114557e-08, "loss": 8.6188, "step": 44080 }, { "epoch": 15.95, "learning_rate": 7.498948135764368e-08, "loss": 8.6039, "step": 44100 }, { "epoch": 15.96, "learning_rate": 7.473610727948271e-08, "loss": 8.6675, "step": 44120 }, { "epoch": 15.96, "learning_rate": 7.448308672774605e-08, "loss": 8.6623, "step": 44140 }, { "epoch": 15.97, "learning_rate": 7.423042021280369e-08, "loss": 8.8081, "step": 44160 }, { "epoch": 15.98, "learning_rate": 7.397810824431155e-08, "loss": 8.6835, "step": 44180 }, { "epoch": 15.99, "learning_rate": 7.372615133121057e-08, "loss": 8.6718, "step": 44200 }, { "epoch": 15.99, "learning_rate": 7.347454998172522e-08, "loss": 8.6558, "step": 44220 }, { "epoch": 16.0, "learning_rate": 7.322330470336313e-08, "loss": 8.6661, "step": 44240 }, { "epoch": 16.01, "learning_rate": 7.297241600291334e-08, "loss": 8.6431, "step": 44260 }, { "epoch": 16.01, "learning_rate": 7.272188438644574e-08, "loss": 8.6932, "step": 44280 }, { "epoch": 16.02, "learning_rate": 7.24717103593101e-08, "loss": 8.6602, "step": 44300 }, { "epoch": 16.03, "learning_rate": 7.222189442613464e-08, "loss": 8.6308, "step": 44320 }, { "epoch": 16.04, "learning_rate": 7.197243709082554e-08, "loss": 8.6254, "step": 44340 }, { "epoch": 16.04, "learning_rate": 7.172333885656542e-08, "loss": 8.6904, "step": 44360 }, { "epoch": 16.05, "learning_rate": 7.147460022581255e-08, "loss": 8.6725, "step": 44380 }, { "epoch": 16.06, "learning_rate": 7.122622170030016e-08, "loss": 8.763, "step": 44400 }, { "epoch": 16.07, "learning_rate": 7.097820378103464e-08, "loss": 8.6677, "step": 44420 }, { "epoch": 16.07, "learning_rate": 7.073054696829545e-08, "loss": 8.6222, "step": 44440 }, { "epoch": 16.08, "learning_rate": 7.04832517616333e-08, "loss": 8.7767, "step": 44460 }, { "epoch": 16.09, "learning_rate": 7.023631865986965e-08, "loss": 8.7476, "step": 44480 }, { "epoch": 16.09, "learning_rate": 6.998974816109565e-08, "loss": 8.7494, "step": 44500 }, { "epoch": 16.1, "learning_rate": 6.97435407626708e-08, "loss": 8.7064, "step": 44520 }, { "epoch": 16.11, "learning_rate": 6.949769696122249e-08, "loss": 8.7511, "step": 44540 }, { "epoch": 16.12, "learning_rate": 6.925221725264436e-08, "loss": 8.6708, "step": 44560 }, { "epoch": 16.12, "learning_rate": 6.900710213209596e-08, "loss": 8.6829, "step": 44580 }, { "epoch": 16.13, "learning_rate": 6.876235209400123e-08, "loss": 8.5974, "step": 44600 }, { "epoch": 16.14, "learning_rate": 6.851796763204765e-08, "loss": 8.612, "step": 44620 }, { "epoch": 16.14, "learning_rate": 6.827394923918553e-08, "loss": 8.5953, "step": 44640 }, { "epoch": 16.15, "learning_rate": 6.803029740762648e-08, "loss": 8.6415, "step": 44660 }, { "epoch": 16.16, "learning_rate": 6.778701262884304e-08, "loss": 8.6311, "step": 44680 }, { "epoch": 16.17, "learning_rate": 6.75440953935671e-08, "loss": 8.708, "step": 44700 }, { "epoch": 16.17, "learning_rate": 6.730154619178918e-08, "loss": 8.6652, "step": 44720 }, { "epoch": 16.18, "learning_rate": 6.705936551275773e-08, "loss": 8.6269, "step": 44740 }, { "epoch": 16.19, "learning_rate": 6.681755384497748e-08, "loss": 8.7657, "step": 44760 }, { "epoch": 16.2, "learning_rate": 6.657611167620908e-08, "loss": 8.709, "step": 44780 }, { "epoch": 16.2, "learning_rate": 6.633503949346775e-08, "loss": 8.6648, "step": 44800 }, { "epoch": 16.21, "learning_rate": 6.609433778302234e-08, "loss": 8.6801, "step": 44820 }, { "epoch": 16.22, "learning_rate": 6.585400703039465e-08, "loss": 8.7038, "step": 44840 }, { "epoch": 16.22, "learning_rate": 6.561404772035792e-08, "loss": 8.7411, "step": 44860 }, { "epoch": 16.23, "learning_rate": 6.537446033693645e-08, "loss": 8.6095, "step": 44880 }, { "epoch": 16.24, "learning_rate": 6.513524536340412e-08, "loss": 8.7588, "step": 44900 }, { "epoch": 16.25, "learning_rate": 6.489640328228354e-08, "loss": 8.6095, "step": 44920 }, { "epoch": 16.25, "learning_rate": 6.465793457534552e-08, "loss": 8.6027, "step": 44940 }, { "epoch": 16.26, "learning_rate": 6.441983972360729e-08, "loss": 8.5969, "step": 44960 }, { "epoch": 16.27, "learning_rate": 6.418211920733235e-08, "loss": 8.6493, "step": 44980 }, { "epoch": 16.27, "learning_rate": 6.39447735060288e-08, "loss": 8.6815, "step": 45000 }, { "epoch": 16.28, "learning_rate": 6.370780309844906e-08, "loss": 8.6133, "step": 45020 }, { "epoch": 16.29, "learning_rate": 6.347120846258818e-08, "loss": 8.743, "step": 45040 }, { "epoch": 16.3, "learning_rate": 6.323499007568336e-08, "loss": 8.6719, "step": 45060 }, { "epoch": 16.3, "learning_rate": 6.299914841421309e-08, "loss": 8.7164, "step": 45080 }, { "epoch": 16.31, "learning_rate": 6.276368395389556e-08, "loss": 8.5924, "step": 45100 }, { "epoch": 16.32, "learning_rate": 6.25285971696885e-08, "loss": 8.7207, "step": 45120 }, { "epoch": 16.33, "learning_rate": 6.229388853578748e-08, "loss": 8.691, "step": 45140 }, { "epoch": 16.33, "learning_rate": 6.20595585256256e-08, "loss": 8.6939, "step": 45160 }, { "epoch": 16.34, "learning_rate": 6.1825607611872e-08, "loss": 8.7609, "step": 45180 }, { "epoch": 16.35, "learning_rate": 6.159203626643137e-08, "loss": 8.6598, "step": 45200 }, { "epoch": 16.35, "learning_rate": 6.135884496044244e-08, "loss": 8.6806, "step": 45220 }, { "epoch": 16.36, "learning_rate": 6.112603416427776e-08, "loss": 8.6477, "step": 45240 }, { "epoch": 16.37, "learning_rate": 6.089360434754203e-08, "loss": 8.7188, "step": 45260 }, { "epoch": 16.38, "learning_rate": 6.066155597907157e-08, "loss": 8.6378, "step": 45280 }, { "epoch": 16.38, "learning_rate": 6.04298895269334e-08, "loss": 8.6899, "step": 45300 }, { "epoch": 16.39, "learning_rate": 6.019860545842392e-08, "loss": 8.6343, "step": 45320 }, { "epoch": 16.4, "learning_rate": 5.996770424006856e-08, "loss": 8.741, "step": 45340 }, { "epoch": 16.41, "learning_rate": 5.973718633762015e-08, "loss": 8.6815, "step": 45360 }, { "epoch": 16.41, "learning_rate": 5.950705221605859e-08, "loss": 8.6599, "step": 45380 }, { "epoch": 16.42, "learning_rate": 5.927730233958947e-08, "loss": 8.692, "step": 45400 }, { "epoch": 16.43, "learning_rate": 5.9047937171643494e-08, "loss": 8.7429, "step": 45420 }, { "epoch": 16.43, "learning_rate": 5.881895717487523e-08, "loss": 8.7445, "step": 45440 }, { "epoch": 16.44, "learning_rate": 5.8590362811162254e-08, "loss": 8.6593, "step": 45460 }, { "epoch": 16.45, "learning_rate": 5.836215454160453e-08, "loss": 8.8061, "step": 45480 }, { "epoch": 16.46, "learning_rate": 5.813433282652297e-08, "loss": 8.6217, "step": 45500 }, { "epoch": 16.46, "learning_rate": 5.7906898125458984e-08, "loss": 8.7685, "step": 45520 }, { "epoch": 16.47, "learning_rate": 5.767985089717312e-08, "loss": 8.7338, "step": 45540 }, { "epoch": 16.48, "learning_rate": 5.7453191599644405e-08, "loss": 8.6323, "step": 45560 }, { "epoch": 16.48, "learning_rate": 5.722692069006957e-08, "loss": 8.5866, "step": 45580 }, { "epoch": 16.49, "learning_rate": 5.700103862486158e-08, "loss": 8.7144, "step": 45600 }, { "epoch": 16.5, "learning_rate": 5.6775545859649446e-08, "loss": 8.6515, "step": 45620 }, { "epoch": 16.51, "learning_rate": 5.655044284927657e-08, "loss": 8.7815, "step": 45640 }, { "epoch": 16.51, "learning_rate": 5.632573004780031e-08, "loss": 8.6392, "step": 45660 }, { "epoch": 16.52, "learning_rate": 5.610140790849108e-08, "loss": 8.6613, "step": 45680 }, { "epoch": 16.53, "learning_rate": 5.587747688383099e-08, "loss": 8.7389, "step": 45700 }, { "epoch": 16.54, "learning_rate": 5.5653937425513526e-08, "loss": 8.6548, "step": 45720 }, { "epoch": 16.54, "learning_rate": 5.5430789984442064e-08, "loss": 8.661, "step": 45740 }, { "epoch": 16.55, "learning_rate": 5.520803501072954e-08, "loss": 8.7147, "step": 45760 }, { "epoch": 16.56, "learning_rate": 5.4985672953696995e-08, "loss": 8.7411, "step": 45780 }, { "epoch": 16.56, "learning_rate": 5.4763704261872906e-08, "loss": 8.6135, "step": 45800 }, { "epoch": 16.57, "learning_rate": 5.454212938299255e-08, "loss": 8.7409, "step": 45820 }, { "epoch": 16.58, "learning_rate": 5.432094876399654e-08, "loss": 8.6923, "step": 45840 }, { "epoch": 16.59, "learning_rate": 5.4100162851030447e-08, "loss": 8.5929, "step": 45860 }, { "epoch": 16.59, "learning_rate": 5.387977208944355e-08, "loss": 8.644, "step": 45880 }, { "epoch": 16.6, "learning_rate": 5.3659776923788036e-08, "loss": 8.648, "step": 45900 }, { "epoch": 16.61, "learning_rate": 5.344017779781834e-08, "loss": 8.594, "step": 45920 }, { "epoch": 16.61, "learning_rate": 5.32209751544897e-08, "loss": 8.7064, "step": 45940 }, { "epoch": 16.62, "learning_rate": 5.3002169435958e-08, "loss": 8.7087, "step": 45960 }, { "epoch": 16.63, "learning_rate": 5.278376108357818e-08, "loss": 8.8258, "step": 45980 }, { "epoch": 16.64, "learning_rate": 5.2565750537903716e-08, "loss": 8.758, "step": 46000 }, { "epoch": 16.64, "learning_rate": 5.2348138238685835e-08, "loss": 8.6735, "step": 46020 }, { "epoch": 16.65, "learning_rate": 5.213092462487215e-08, "loss": 8.6791, "step": 46040 }, { "epoch": 16.66, "learning_rate": 5.1914110134606445e-08, "loss": 8.6668, "step": 46060 }, { "epoch": 16.67, "learning_rate": 5.1697695205227126e-08, "loss": 8.6343, "step": 46080 }, { "epoch": 16.67, "learning_rate": 5.1481680273266713e-08, "loss": 8.6874, "step": 46100 }, { "epoch": 16.68, "learning_rate": 5.1266065774451086e-08, "loss": 8.6916, "step": 46120 }, { "epoch": 16.69, "learning_rate": 5.105085214369806e-08, "loss": 8.7685, "step": 46140 }, { "epoch": 16.69, "learning_rate": 5.0836039815117224e-08, "loss": 8.7846, "step": 46160 }, { "epoch": 16.7, "learning_rate": 5.062162922200844e-08, "loss": 8.7066, "step": 46180 }, { "epoch": 16.71, "learning_rate": 5.040762079686123e-08, "loss": 8.6493, "step": 46200 }, { "epoch": 16.72, "learning_rate": 5.019401497135414e-08, "loss": 8.6473, "step": 46220 }, { "epoch": 16.72, "learning_rate": 4.9980812176353274e-08, "loss": 8.6748, "step": 46240 }, { "epoch": 16.73, "learning_rate": 4.9768012841912147e-08, "loss": 8.7513, "step": 46260 }, { "epoch": 16.74, "learning_rate": 4.955561739727013e-08, "loss": 8.7193, "step": 46280 }, { "epoch": 16.75, "learning_rate": 4.9343626270852174e-08, "loss": 8.5842, "step": 46300 }, { "epoch": 16.75, "learning_rate": 4.9132039890267456e-08, "loss": 8.6781, "step": 46320 }, { "epoch": 16.76, "learning_rate": 4.89208586823088e-08, "loss": 8.7505, "step": 46340 }, { "epoch": 16.77, "learning_rate": 4.8710083072951904e-08, "loss": 8.839, "step": 46360 }, { "epoch": 16.77, "learning_rate": 4.849971348735405e-08, "loss": 8.6246, "step": 46380 }, { "epoch": 16.78, "learning_rate": 4.8289750349853834e-08, "loss": 8.6899, "step": 46400 }, { "epoch": 16.79, "learning_rate": 4.8080194083969764e-08, "loss": 8.7211, "step": 46420 }, { "epoch": 16.8, "learning_rate": 4.7871045112399674e-08, "loss": 8.7762, "step": 46440 }, { "epoch": 16.8, "learning_rate": 4.7662303857020056e-08, "loss": 8.7086, "step": 46460 }, { "epoch": 16.81, "learning_rate": 4.745397073888463e-08, "loss": 8.6422, "step": 46480 }, { "epoch": 16.82, "learning_rate": 4.724604617822428e-08, "loss": 8.6367, "step": 46500 }, { "epoch": 16.82, "learning_rate": 4.703853059444543e-08, "loss": 8.6699, "step": 46520 }, { "epoch": 16.83, "learning_rate": 4.683142440612967e-08, "loss": 8.7553, "step": 46540 }, { "epoch": 16.84, "learning_rate": 4.66247280310329e-08, "loss": 8.6603, "step": 46560 }, { "epoch": 16.85, "learning_rate": 4.641844188608421e-08, "loss": 8.7499, "step": 46580 }, { "epoch": 16.85, "learning_rate": 4.621256638738541e-08, "loss": 8.6107, "step": 46600 }, { "epoch": 16.86, "learning_rate": 4.600710195020982e-08, "loss": 8.5863, "step": 46620 }, { "epoch": 16.87, "learning_rate": 4.5802048989001556e-08, "loss": 8.8368, "step": 46640 }, { "epoch": 16.88, "learning_rate": 4.559740791737504e-08, "loss": 8.7823, "step": 46660 }, { "epoch": 16.88, "learning_rate": 4.539317914811353e-08, "loss": 8.5468, "step": 46680 }, { "epoch": 16.89, "learning_rate": 4.518936309316887e-08, "loss": 8.5938, "step": 46700 }, { "epoch": 16.9, "learning_rate": 4.498596016366027e-08, "loss": 8.6411, "step": 46720 }, { "epoch": 16.9, "learning_rate": 4.4782970769873614e-08, "loss": 8.6245, "step": 46740 }, { "epoch": 16.91, "learning_rate": 4.458039532126082e-08, "loss": 8.5955, "step": 46760 }, { "epoch": 16.92, "learning_rate": 4.4378234226438546e-08, "loss": 8.665, "step": 46780 }, { "epoch": 16.93, "learning_rate": 4.4176487893187956e-08, "loss": 8.7599, "step": 46800 }, { "epoch": 16.93, "learning_rate": 4.3975156728453336e-08, "loss": 8.6156, "step": 46820 }, { "epoch": 16.94, "learning_rate": 4.3774241138341775e-08, "loss": 8.6159, "step": 46840 }, { "epoch": 16.95, "learning_rate": 4.35737415281219e-08, "loss": 8.6734, "step": 46860 }, { "epoch": 16.95, "learning_rate": 4.3373658302223253e-08, "loss": 8.652, "step": 46880 }, { "epoch": 16.96, "learning_rate": 4.317399186423573e-08, "loss": 8.6976, "step": 46900 }, { "epoch": 16.97, "learning_rate": 4.29747426169082e-08, "loss": 8.6995, "step": 46920 }, { "epoch": 16.98, "learning_rate": 4.2775910962148334e-08, "loss": 8.6838, "step": 46940 }, { "epoch": 16.98, "learning_rate": 4.257749730102112e-08, "loss": 8.7379, "step": 46960 }, { "epoch": 16.99, "learning_rate": 4.2379502033748764e-08, "loss": 8.6209, "step": 46980 }, { "epoch": 17.0, "learning_rate": 4.218192555970923e-08, "loss": 8.7024, "step": 47000 }, { "epoch": 17.01, "learning_rate": 4.198476827743597e-08, "loss": 8.6773, "step": 47020 }, { "epoch": 17.01, "learning_rate": 4.1788030584616634e-08, "loss": 8.6747, "step": 47040 }, { "epoch": 17.02, "learning_rate": 4.159171287809279e-08, "loss": 8.8099, "step": 47060 }, { "epoch": 17.03, "learning_rate": 4.1395815553858624e-08, "loss": 8.7174, "step": 47080 }, { "epoch": 17.03, "learning_rate": 4.120033900706041e-08, "loss": 8.6592, "step": 47100 }, { "epoch": 17.04, "learning_rate": 4.1005283631995816e-08, "loss": 8.6115, "step": 47120 }, { "epoch": 17.05, "learning_rate": 4.081064982211274e-08, "loss": 8.8176, "step": 47140 }, { "epoch": 17.06, "learning_rate": 4.061643797000894e-08, "loss": 8.7223, "step": 47160 }, { "epoch": 17.06, "learning_rate": 4.042264846743085e-08, "loss": 8.6302, "step": 47180 }, { "epoch": 17.07, "learning_rate": 4.022928170527315e-08, "loss": 8.6813, "step": 47200 }, { "epoch": 17.08, "learning_rate": 4.003633807357767e-08, "loss": 8.6455, "step": 47220 }, { "epoch": 17.08, "learning_rate": 3.984381796153288e-08, "loss": 8.5833, "step": 47240 }, { "epoch": 17.09, "learning_rate": 3.9651721757472835e-08, "loss": 8.6188, "step": 47260 }, { "epoch": 17.1, "learning_rate": 3.946004984887652e-08, "loss": 8.6727, "step": 47280 }, { "epoch": 17.11, "learning_rate": 3.9268802622367234e-08, "loss": 8.6855, "step": 47300 }, { "epoch": 17.11, "learning_rate": 3.9077980463711384e-08, "loss": 8.81, "step": 47320 }, { "epoch": 17.12, "learning_rate": 3.888758375781828e-08, "loss": 8.761, "step": 47340 }, { "epoch": 17.13, "learning_rate": 3.869761288873882e-08, "loss": 8.6576, "step": 47360 }, { "epoch": 17.14, "learning_rate": 3.850806823966491e-08, "loss": 8.6554, "step": 47380 }, { "epoch": 17.14, "learning_rate": 3.831895019292897e-08, "loss": 8.6861, "step": 47400 }, { "epoch": 17.15, "learning_rate": 3.813025913000265e-08, "loss": 8.7043, "step": 47420 }, { "epoch": 17.16, "learning_rate": 3.7941995431496535e-08, "loss": 8.7619, "step": 47440 }, { "epoch": 17.16, "learning_rate": 3.775415947715899e-08, "loss": 8.726, "step": 47460 }, { "epoch": 17.17, "learning_rate": 3.7566751645875776e-08, "loss": 8.6323, "step": 47480 }, { "epoch": 17.18, "learning_rate": 3.7379772315668885e-08, "loss": 8.7214, "step": 47500 }, { "epoch": 17.19, "learning_rate": 3.7193221863696026e-08, "loss": 8.7395, "step": 47520 }, { "epoch": 17.19, "learning_rate": 3.700710066624993e-08, "loss": 8.6951, "step": 47540 }, { "epoch": 17.2, "learning_rate": 3.6821409098757336e-08, "loss": 8.6347, "step": 47560 }, { "epoch": 17.21, "learning_rate": 3.6636147535778483e-08, "loss": 8.6968, "step": 47580 }, { "epoch": 17.22, "learning_rate": 3.6451316351006153e-08, "loss": 8.6192, "step": 47600 }, { "epoch": 17.22, "learning_rate": 3.6266915917264994e-08, "loss": 8.6344, "step": 47620 }, { "epoch": 17.23, "learning_rate": 3.6082946606510956e-08, "loss": 8.8091, "step": 47640 }, { "epoch": 17.24, "learning_rate": 3.589940878983008e-08, "loss": 8.802, "step": 47660 }, { "epoch": 17.24, "learning_rate": 3.571630283743837e-08, "loss": 8.6752, "step": 47680 }, { "epoch": 17.25, "learning_rate": 3.5533629118680436e-08, "loss": 8.7103, "step": 47700 }, { "epoch": 17.26, "learning_rate": 3.535138800202911e-08, "loss": 8.7011, "step": 47720 }, { "epoch": 17.27, "learning_rate": 3.5169579855084755e-08, "loss": 8.7138, "step": 47740 }, { "epoch": 17.27, "learning_rate": 3.498820504457414e-08, "loss": 8.6862, "step": 47760 }, { "epoch": 17.28, "learning_rate": 3.4807263936350166e-08, "loss": 8.6514, "step": 47780 }, { "epoch": 17.29, "learning_rate": 3.4626756895390824e-08, "loss": 8.6566, "step": 47800 }, { "epoch": 17.29, "learning_rate": 3.444668428579844e-08, "loss": 8.6684, "step": 47820 }, { "epoch": 17.3, "learning_rate": 3.426704647079928e-08, "loss": 8.6983, "step": 47840 }, { "epoch": 17.31, "learning_rate": 3.4087843812742354e-08, "loss": 8.6276, "step": 47860 }, { "epoch": 17.32, "learning_rate": 3.390907667309906e-08, "loss": 8.6916, "step": 47880 }, { "epoch": 17.32, "learning_rate": 3.373074541246224e-08, "loss": 8.6749, "step": 47900 }, { "epoch": 17.33, "learning_rate": 3.355285039054545e-08, "loss": 8.6663, "step": 47920 }, { "epoch": 17.34, "learning_rate": 3.3375391966182526e-08, "loss": 8.626, "step": 47940 }, { "epoch": 17.35, "learning_rate": 3.3198370497326405e-08, "loss": 8.7064, "step": 47960 }, { "epoch": 17.35, "learning_rate": 3.3021786341048824e-08, "loss": 8.6254, "step": 47980 }, { "epoch": 17.36, "learning_rate": 3.284563985353925e-08, "loss": 8.715, "step": 48000 }, { "epoch": 17.37, "learning_rate": 3.2669931390104374e-08, "loss": 8.7273, "step": 48020 }, { "epoch": 17.37, "learning_rate": 3.249466130516745e-08, "loss": 8.6447, "step": 48040 }, { "epoch": 17.38, "learning_rate": 3.231982995226731e-08, "loss": 8.7037, "step": 48060 }, { "epoch": 17.39, "learning_rate": 3.2145437684058e-08, "loss": 8.6636, "step": 48080 }, { "epoch": 17.4, "learning_rate": 3.197148485230769e-08, "loss": 8.6266, "step": 48100 }, { "epoch": 17.4, "learning_rate": 3.179797180789831e-08, "loss": 8.7095, "step": 48120 }, { "epoch": 17.41, "learning_rate": 3.162489890082459e-08, "loss": 8.6365, "step": 48140 }, { "epoch": 17.42, "learning_rate": 3.14522664801935e-08, "loss": 8.6417, "step": 48160 }, { "epoch": 17.42, "learning_rate": 3.1280074894223545e-08, "loss": 8.6506, "step": 48180 }, { "epoch": 17.43, "learning_rate": 3.1108324490243864e-08, "loss": 8.771, "step": 48200 }, { "epoch": 17.44, "learning_rate": 3.093701561469394e-08, "loss": 8.7983, "step": 48220 }, { "epoch": 17.45, "learning_rate": 3.07661486131224e-08, "loss": 8.6677, "step": 48240 }, { "epoch": 17.45, "learning_rate": 3.059572383018666e-08, "loss": 8.6159, "step": 48260 }, { "epoch": 17.46, "learning_rate": 3.0425741609652166e-08, "loss": 8.5923, "step": 48280 }, { "epoch": 17.47, "learning_rate": 3.0256202294391576e-08, "loss": 8.6792, "step": 48300 }, { "epoch": 17.48, "learning_rate": 3.00871062263843e-08, "loss": 8.6226, "step": 48320 }, { "epoch": 17.48, "learning_rate": 2.991845374671553e-08, "loss": 8.6312, "step": 48340 }, { "epoch": 17.49, "learning_rate": 2.9750245195575703e-08, "loss": 8.6999, "step": 48360 }, { "epoch": 17.5, "learning_rate": 2.9582480912259984e-08, "loss": 8.6946, "step": 48380 }, { "epoch": 17.5, "learning_rate": 2.94151612351671e-08, "loss": 8.6892, "step": 48400 }, { "epoch": 17.51, "learning_rate": 2.924828650179928e-08, "loss": 8.6228, "step": 48420 }, { "epoch": 17.52, "learning_rate": 2.908185704876101e-08, "loss": 8.6398, "step": 48440 }, { "epoch": 17.53, "learning_rate": 2.8915873211758645e-08, "loss": 8.6749, "step": 48460 }, { "epoch": 17.53, "learning_rate": 2.875033532559984e-08, "loss": 8.7345, "step": 48480 }, { "epoch": 17.54, "learning_rate": 2.8585243724192466e-08, "loss": 8.6511, "step": 48500 }, { "epoch": 17.55, "learning_rate": 2.8420598740544476e-08, "loss": 8.616, "step": 48520 }, { "epoch": 17.56, "learning_rate": 2.825640070676269e-08, "loss": 8.8774, "step": 48540 }, { "epoch": 17.56, "learning_rate": 2.8092649954052473e-08, "loss": 8.6434, "step": 48560 }, { "epoch": 17.57, "learning_rate": 2.792934681271708e-08, "loss": 8.5202, "step": 48580 }, { "epoch": 17.58, "learning_rate": 2.7766491612156663e-08, "loss": 8.6703, "step": 48600 }, { "epoch": 17.58, "learning_rate": 2.7604084680868112e-08, "loss": 8.6421, "step": 48620 }, { "epoch": 17.59, "learning_rate": 2.74421263464438e-08, "loss": 8.717, "step": 48640 }, { "epoch": 17.6, "learning_rate": 2.7280616935571516e-08, "loss": 8.6583, "step": 48660 }, { "epoch": 17.61, "learning_rate": 2.7119556774033327e-08, "loss": 8.7085, "step": 48680 }, { "epoch": 17.61, "learning_rate": 2.6958946186705162e-08, "loss": 8.6937, "step": 48700 }, { "epoch": 17.62, "learning_rate": 2.679878549755618e-08, "loss": 8.6624, "step": 48720 }, { "epoch": 17.63, "learning_rate": 2.6639075029647935e-08, "loss": 8.6899, "step": 48740 }, { "epoch": 17.63, "learning_rate": 2.6479815105133974e-08, "loss": 8.7277, "step": 48760 }, { "epoch": 17.64, "learning_rate": 2.632100604525886e-08, "loss": 8.6808, "step": 48780 }, { "epoch": 17.65, "learning_rate": 2.616264817035793e-08, "loss": 8.722, "step": 48800 }, { "epoch": 17.66, "learning_rate": 2.6004741799856207e-08, "loss": 8.6183, "step": 48820 }, { "epoch": 17.66, "learning_rate": 2.5847287252268228e-08, "loss": 8.6058, "step": 48840 }, { "epoch": 17.67, "learning_rate": 2.569028484519692e-08, "loss": 8.6472, "step": 48860 }, { "epoch": 17.68, "learning_rate": 2.5533734895333363e-08, "loss": 8.8245, "step": 48880 }, { "epoch": 17.69, "learning_rate": 2.5377637718455884e-08, "loss": 8.6572, "step": 48900 }, { "epoch": 17.69, "learning_rate": 2.5221993629429505e-08, "loss": 8.5695, "step": 48920 }, { "epoch": 17.7, "learning_rate": 2.5066802942205452e-08, "loss": 8.7051, "step": 48940 }, { "epoch": 17.71, "learning_rate": 2.4912065969820206e-08, "loss": 8.688, "step": 48960 }, { "epoch": 17.71, "learning_rate": 2.475778302439524e-08, "loss": 8.732, "step": 48980 }, { "epoch": 17.72, "learning_rate": 2.4603954417136e-08, "loss": 8.6216, "step": 49000 }, { "epoch": 17.73, "learning_rate": 2.445058045833173e-08, "loss": 8.6969, "step": 49020 }, { "epoch": 17.74, "learning_rate": 2.4297661457354346e-08, "loss": 8.8897, "step": 49040 }, { "epoch": 17.74, "learning_rate": 2.41451977226583e-08, "loss": 8.552, "step": 49060 }, { "epoch": 17.75, "learning_rate": 2.3993189561779537e-08, "loss": 8.6289, "step": 49080 }, { "epoch": 17.76, "learning_rate": 2.3841637281335064e-08, "loss": 8.793, "step": 49100 }, { "epoch": 17.76, "learning_rate": 2.3690541187022545e-08, "loss": 8.6217, "step": 49120 }, { "epoch": 17.77, "learning_rate": 2.3539901583619183e-08, "loss": 8.6762, "step": 49140 }, { "epoch": 17.78, "learning_rate": 2.338971877498161e-08, "loss": 8.619, "step": 49160 }, { "epoch": 17.79, "learning_rate": 2.323999306404492e-08, "loss": 8.7034, "step": 49180 }, { "epoch": 17.79, "learning_rate": 2.30907247528222e-08, "loss": 8.613, "step": 49200 }, { "epoch": 17.8, "learning_rate": 2.2941914142404013e-08, "loss": 8.6475, "step": 49220 }, { "epoch": 17.81, "learning_rate": 2.2793561532957555e-08, "loss": 8.575, "step": 49240 }, { "epoch": 17.82, "learning_rate": 2.2645667223726322e-08, "loss": 8.7523, "step": 49260 }, { "epoch": 17.82, "learning_rate": 2.2498231513029236e-08, "loss": 8.7378, "step": 49280 }, { "epoch": 17.83, "learning_rate": 2.2351254698260296e-08, "loss": 8.5931, "step": 49300 }, { "epoch": 17.84, "learning_rate": 2.220473707588777e-08, "loss": 8.663, "step": 49320 }, { "epoch": 17.84, "learning_rate": 2.205867894145366e-08, "loss": 8.6425, "step": 49340 }, { "epoch": 17.85, "learning_rate": 2.191308058957328e-08, "loss": 8.55, "step": 49360 }, { "epoch": 17.86, "learning_rate": 2.1767942313934334e-08, "loss": 8.7109, "step": 49380 }, { "epoch": 17.87, "learning_rate": 2.1623264407296642e-08, "loss": 8.6994, "step": 49400 }, { "epoch": 17.87, "learning_rate": 2.147904716149135e-08, "loss": 8.6966, "step": 49420 }, { "epoch": 17.88, "learning_rate": 2.1335290867420337e-08, "loss": 8.6716, "step": 49440 }, { "epoch": 17.89, "learning_rate": 2.1191995815055876e-08, "loss": 8.6833, "step": 49460 }, { "epoch": 17.9, "learning_rate": 2.1049162293439587e-08, "loss": 8.7627, "step": 49480 }, { "epoch": 17.9, "learning_rate": 2.0906790590682455e-08, "loss": 8.6788, "step": 49500 }, { "epoch": 17.91, "learning_rate": 2.0764880993963675e-08, "loss": 8.6624, "step": 49520 }, { "epoch": 17.92, "learning_rate": 2.062343378953038e-08, "loss": 8.778, "step": 49540 }, { "epoch": 17.92, "learning_rate": 2.0482449262697126e-08, "loss": 8.7197, "step": 49560 }, { "epoch": 17.93, "learning_rate": 2.0341927697845012e-08, "loss": 8.7374, "step": 49580 }, { "epoch": 17.94, "learning_rate": 2.0201869378421497e-08, "loss": 8.6552, "step": 49600 }, { "epoch": 17.95, "learning_rate": 2.006227458693946e-08, "loss": 8.7903, "step": 49620 }, { "epoch": 17.95, "learning_rate": 1.9923143604976823e-08, "loss": 8.6071, "step": 49640 }, { "epoch": 17.96, "learning_rate": 1.978447671317604e-08, "loss": 8.6615, "step": 49660 }, { "epoch": 17.97, "learning_rate": 1.9646274191243318e-08, "loss": 8.5126, "step": 49680 }, { "epoch": 17.97, "learning_rate": 1.9508536317948356e-08, "loss": 8.6629, "step": 49700 }, { "epoch": 17.98, "learning_rate": 1.937126337112338e-08, "loss": 8.6126, "step": 49720 }, { "epoch": 17.99, "learning_rate": 1.923445562766296e-08, "loss": 8.9988, "step": 49740 }, { "epoch": 18.0, "learning_rate": 1.909811336352332e-08, "loss": 8.7915, "step": 49760 }, { "epoch": 18.0, "learning_rate": 1.8962236853721586e-08, "loss": 8.6122, "step": 49780 }, { "epoch": 18.01, "learning_rate": 1.882682637233568e-08, "loss": 8.7854, "step": 49800 }, { "epoch": 18.02, "learning_rate": 1.86918821925032e-08, "loss": 8.6883, "step": 49820 }, { "epoch": 18.03, "learning_rate": 1.855740458642141e-08, "loss": 8.6263, "step": 49840 }, { "epoch": 18.03, "learning_rate": 1.84233938253463e-08, "loss": 8.6004, "step": 49860 }, { "epoch": 18.04, "learning_rate": 1.828985017959217e-08, "loss": 8.5865, "step": 49880 }, { "epoch": 18.05, "learning_rate": 1.815677391853124e-08, "loss": 8.5881, "step": 49900 }, { "epoch": 18.05, "learning_rate": 1.8024165310592754e-08, "loss": 8.6077, "step": 49920 }, { "epoch": 18.06, "learning_rate": 1.789202462326289e-08, "loss": 8.6032, "step": 49940 }, { "epoch": 18.07, "learning_rate": 1.7760352123083798e-08, "loss": 8.6446, "step": 49960 }, { "epoch": 18.08, "learning_rate": 1.7629148075653243e-08, "loss": 8.6063, "step": 49980 }, { "epoch": 18.08, "learning_rate": 1.749841274562422e-08, "loss": 8.6, "step": 50000 }, { "epoch": 18.09, "learning_rate": 1.7368146396704113e-08, "loss": 8.5862, "step": 50020 }, { "epoch": 18.1, "learning_rate": 1.7238349291654435e-08, "loss": 8.6713, "step": 50040 }, { "epoch": 18.1, "learning_rate": 1.7109021692290114e-08, "loss": 8.5957, "step": 50060 }, { "epoch": 18.11, "learning_rate": 1.6980163859479007e-08, "loss": 8.6717, "step": 50080 }, { "epoch": 18.12, "learning_rate": 1.6851776053141503e-08, "loss": 8.6833, "step": 50100 }, { "epoch": 18.13, "learning_rate": 1.6723858532249778e-08, "loss": 8.6057, "step": 50120 }, { "epoch": 18.13, "learning_rate": 1.6596411554827522e-08, "loss": 8.7165, "step": 50140 }, { "epoch": 18.14, "learning_rate": 1.6469435377949175e-08, "loss": 8.6832, "step": 50160 }, { "epoch": 18.15, "learning_rate": 1.634293025773953e-08, "loss": 8.6771, "step": 50180 }, { "epoch": 18.16, "learning_rate": 1.6216896449373295e-08, "loss": 8.7166, "step": 50200 }, { "epoch": 18.16, "learning_rate": 1.6091334207074398e-08, "loss": 8.8346, "step": 50220 }, { "epoch": 18.17, "learning_rate": 1.596624378411565e-08, "loss": 8.7444, "step": 50240 }, { "epoch": 18.18, "learning_rate": 1.5841625432818057e-08, "loss": 8.7195, "step": 50260 }, { "epoch": 18.18, "learning_rate": 1.5717479404550455e-08, "loss": 8.7715, "step": 50280 }, { "epoch": 18.19, "learning_rate": 1.5593805949728977e-08, "loss": 8.7242, "step": 50300 }, { "epoch": 18.2, "learning_rate": 1.5470605317816436e-08, "loss": 8.7336, "step": 50320 }, { "epoch": 18.21, "learning_rate": 1.5347877757322076e-08, "loss": 8.7456, "step": 50340 }, { "epoch": 18.21, "learning_rate": 1.5225623515800673e-08, "loss": 8.9196, "step": 50360 }, { "epoch": 18.22, "learning_rate": 1.5103842839852527e-08, "loss": 8.6397, "step": 50380 }, { "epoch": 18.23, "learning_rate": 1.4982535975122474e-08, "loss": 8.7685, "step": 50400 }, { "epoch": 18.24, "learning_rate": 1.48617031662997e-08, "loss": 8.7352, "step": 50420 }, { "epoch": 18.24, "learning_rate": 1.4741344657117238e-08, "loss": 8.6774, "step": 50440 }, { "epoch": 18.25, "learning_rate": 1.4621460690351334e-08, "loss": 8.6874, "step": 50460 }, { "epoch": 18.26, "learning_rate": 1.4502051507821106e-08, "loss": 8.6548, "step": 50480 }, { "epoch": 18.26, "learning_rate": 1.438311735038783e-08, "loss": 8.6336, "step": 50500 }, { "epoch": 18.27, "learning_rate": 1.4264658457954743e-08, "loss": 8.5954, "step": 50520 }, { "epoch": 18.28, "learning_rate": 1.4146675069466401e-08, "loss": 8.6314, "step": 50540 }, { "epoch": 18.29, "learning_rate": 1.4029167422908105e-08, "loss": 8.6163, "step": 50560 }, { "epoch": 18.29, "learning_rate": 1.3912135755305753e-08, "loss": 8.756, "step": 50580 }, { "epoch": 18.3, "learning_rate": 1.3795580302724874e-08, "loss": 8.7309, "step": 50600 }, { "epoch": 18.31, "learning_rate": 1.3679501300270652e-08, "loss": 8.6124, "step": 50620 }, { "epoch": 18.31, "learning_rate": 1.3563898982087069e-08, "loss": 8.7536, "step": 50640 }, { "epoch": 18.32, "learning_rate": 1.3448773581356653e-08, "loss": 8.6723, "step": 50660 }, { "epoch": 18.33, "learning_rate": 1.3334125330299928e-08, "loss": 8.6517, "step": 50680 }, { "epoch": 18.34, "learning_rate": 1.3219954460174876e-08, "loss": 8.7087, "step": 50700 }, { "epoch": 18.34, "learning_rate": 1.3106261201276724e-08, "loss": 8.6847, "step": 50720 }, { "epoch": 18.35, "learning_rate": 1.2993045782937084e-08, "loss": 8.6975, "step": 50740 }, { "epoch": 18.36, "learning_rate": 1.2880308433523945e-08, "loss": 8.6035, "step": 50760 }, { "epoch": 18.37, "learning_rate": 1.2768049380440765e-08, "loss": 8.7271, "step": 50780 }, { "epoch": 18.37, "learning_rate": 1.2656268850126411e-08, "loss": 8.7072, "step": 50800 }, { "epoch": 18.38, "learning_rate": 1.254496706805433e-08, "loss": 8.806, "step": 50820 }, { "epoch": 18.39, "learning_rate": 1.243414425873246e-08, "loss": 8.6377, "step": 50840 }, { "epoch": 18.39, "learning_rate": 1.2323800645702431e-08, "loss": 8.7319, "step": 50860 }, { "epoch": 18.4, "learning_rate": 1.221393645153948e-08, "loss": 8.6606, "step": 50880 }, { "epoch": 18.41, "learning_rate": 1.2104551897851644e-08, "loss": 8.6349, "step": 50900 }, { "epoch": 18.42, "learning_rate": 1.1995647205279457e-08, "loss": 8.7337, "step": 50920 }, { "epoch": 18.42, "learning_rate": 1.1887222593495699e-08, "loss": 8.7268, "step": 50940 }, { "epoch": 18.43, "learning_rate": 1.1779278281204536e-08, "loss": 8.7171, "step": 50960 }, { "epoch": 18.44, "learning_rate": 1.1671814486141546e-08, "loss": 8.6429, "step": 50980 }, { "epoch": 18.44, "learning_rate": 1.156483142507289e-08, "loss": 8.6306, "step": 51000 }, { "epoch": 18.45, "learning_rate": 1.1458329313795146e-08, "loss": 8.7194, "step": 51020 }, { "epoch": 18.46, "learning_rate": 1.135230836713466e-08, "loss": 8.6686, "step": 51040 }, { "epoch": 18.47, "learning_rate": 1.1246768798947287e-08, "loss": 8.6497, "step": 51060 }, { "epoch": 18.47, "learning_rate": 1.1141710822117872e-08, "loss": 8.6738, "step": 51080 }, { "epoch": 18.48, "learning_rate": 1.1037134648559793e-08, "loss": 8.6014, "step": 51100 }, { "epoch": 18.49, "learning_rate": 1.0933040489214674e-08, "loss": 8.6016, "step": 51120 }, { "epoch": 18.5, "learning_rate": 1.0829428554051834e-08, "loss": 8.7236, "step": 51140 }, { "epoch": 18.5, "learning_rate": 1.0726299052067761e-08, "loss": 8.6591, "step": 51160 }, { "epoch": 18.51, "learning_rate": 1.0623652191286026e-08, "loss": 8.7178, "step": 51180 }, { "epoch": 18.52, "learning_rate": 1.0521488178756532e-08, "loss": 8.8135, "step": 51200 }, { "epoch": 18.52, "learning_rate": 1.0419807220555271e-08, "loss": 8.6145, "step": 51220 }, { "epoch": 18.53, "learning_rate": 1.0318609521783817e-08, "loss": 8.6767, "step": 51240 }, { "epoch": 18.54, "learning_rate": 1.0217895286568995e-08, "loss": 8.6303, "step": 51260 }, { "epoch": 18.55, "learning_rate": 1.0117664718062469e-08, "loss": 8.5655, "step": 51280 }, { "epoch": 18.55, "learning_rate": 1.001791801844018e-08, "loss": 8.6803, "step": 51300 }, { "epoch": 18.56, "learning_rate": 9.918655388902158e-09, "loss": 8.6766, "step": 51320 }, { "epoch": 18.57, "learning_rate": 9.81987702967202e-09, "loss": 8.6313, "step": 51340 }, { "epoch": 18.58, "learning_rate": 9.721583139996382e-09, "loss": 8.7152, "step": 51360 }, { "epoch": 18.58, "learning_rate": 9.623773918144895e-09, "loss": 8.6508, "step": 51380 }, { "epoch": 18.59, "learning_rate": 9.526449561409356e-09, "loss": 8.742, "step": 51400 }, { "epoch": 18.6, "learning_rate": 9.429610266103699e-09, "loss": 8.6364, "step": 51420 }, { "epoch": 18.6, "learning_rate": 9.333256227563341e-09, "loss": 8.8212, "step": 51440 }, { "epoch": 18.61, "learning_rate": 9.237387640144867e-09, "loss": 8.6756, "step": 51460 }, { "epoch": 18.62, "learning_rate": 9.14200469722573e-09, "loss": 8.5969, "step": 51480 }, { "epoch": 18.63, "learning_rate": 9.047107591203723e-09, "loss": 8.6711, "step": 51500 }, { "epoch": 18.63, "learning_rate": 8.952696513496755e-09, "loss": 8.6413, "step": 51520 }, { "epoch": 18.64, "learning_rate": 8.858771654542185e-09, "loss": 8.6178, "step": 51540 }, { "epoch": 18.65, "learning_rate": 8.76533320379677e-09, "loss": 8.7109, "step": 51560 }, { "epoch": 18.65, "learning_rate": 8.672381349736108e-09, "loss": 8.858, "step": 51580 }, { "epoch": 18.66, "learning_rate": 8.57991627985416e-09, "loss": 8.745, "step": 51600 }, { "epoch": 18.67, "learning_rate": 8.48793818066315e-09, "loss": 8.6337, "step": 51620 }, { "epoch": 18.68, "learning_rate": 8.396447237692921e-09, "loss": 8.7487, "step": 51640 }, { "epoch": 18.68, "learning_rate": 8.305443635490712e-09, "loss": 8.5949, "step": 51660 }, { "epoch": 18.69, "learning_rate": 8.214927557620766e-09, "loss": 8.6928, "step": 51680 }, { "epoch": 18.7, "learning_rate": 8.124899186663815e-09, "loss": 8.7684, "step": 51700 }, { "epoch": 18.71, "learning_rate": 8.035358704217039e-09, "loss": 8.6277, "step": 51720 }, { "epoch": 18.71, "learning_rate": 7.94630629089324e-09, "loss": 8.6413, "step": 51740 }, { "epoch": 18.72, "learning_rate": 7.85774212632101e-09, "loss": 8.6884, "step": 51760 }, { "epoch": 18.73, "learning_rate": 7.769666389143864e-09, "loss": 8.7835, "step": 51780 }, { "epoch": 18.73, "learning_rate": 7.682079257020163e-09, "loss": 8.6372, "step": 51800 }, { "epoch": 18.74, "learning_rate": 7.594980906622805e-09, "loss": 8.6274, "step": 51820 }, { "epoch": 18.75, "learning_rate": 7.508371513638618e-09, "loss": 8.6954, "step": 51840 }, { "epoch": 18.76, "learning_rate": 7.422251252768269e-09, "loss": 8.6183, "step": 51860 }, { "epoch": 18.76, "learning_rate": 7.336620297725666e-09, "loss": 8.6687, "step": 51880 }, { "epoch": 18.77, "learning_rate": 7.251478821237833e-09, "loss": 8.5802, "step": 51900 }, { "epoch": 18.78, "learning_rate": 7.1668269950444784e-09, "loss": 8.6163, "step": 51920 }, { "epoch": 18.78, "learning_rate": 7.0826649898974856e-09, "loss": 8.6173, "step": 51940 }, { "epoch": 18.79, "learning_rate": 6.998992975560919e-09, "loss": 8.6898, "step": 51960 }, { "epoch": 18.8, "learning_rate": 6.915811120810355e-09, "loss": 8.7128, "step": 51980 }, { "epoch": 18.81, "learning_rate": 6.833119593432607e-09, "loss": 8.6544, "step": 52000 }, { "epoch": 18.81, "learning_rate": 6.750918560225583e-09, "loss": 8.6416, "step": 52020 }, { "epoch": 18.82, "learning_rate": 6.66920818699776e-09, "loss": 8.7659, "step": 52040 }, { "epoch": 18.83, "learning_rate": 6.587988638567881e-09, "loss": 8.6386, "step": 52060 }, { "epoch": 18.84, "learning_rate": 6.50726007876462e-09, "loss": 8.7111, "step": 52080 }, { "epoch": 18.84, "learning_rate": 6.427022670426329e-09, "loss": 8.6797, "step": 52100 }, { "epoch": 18.85, "learning_rate": 6.347276575400628e-09, "loss": 8.664, "step": 52120 }, { "epoch": 18.86, "learning_rate": 6.268021954544095e-09, "loss": 8.6207, "step": 52140 }, { "epoch": 18.86, "learning_rate": 6.189258967721989e-09, "loss": 8.6631, "step": 52160 }, { "epoch": 18.87, "learning_rate": 6.110987773807835e-09, "loss": 8.7244, "step": 52180 }, { "epoch": 18.88, "learning_rate": 6.033208530683204e-09, "loss": 8.6522, "step": 52200 }, { "epoch": 18.89, "learning_rate": 5.955921395237318e-09, "loss": 8.698, "step": 52220 }, { "epoch": 18.89, "learning_rate": 5.879126523366751e-09, "loss": 8.6421, "step": 52240 }, { "epoch": 18.9, "learning_rate": 5.802824069975176e-09, "loss": 8.6436, "step": 52260 }, { "epoch": 18.91, "learning_rate": 5.727014188972979e-09, "loss": 8.6355, "step": 52280 }, { "epoch": 18.92, "learning_rate": 5.651697033277003e-09, "loss": 8.6781, "step": 52300 }, { "epoch": 18.92, "learning_rate": 5.576872754810113e-09, "loss": 8.7829, "step": 52320 }, { "epoch": 18.93, "learning_rate": 5.5025415045011066e-09, "loss": 8.9203, "step": 52340 }, { "epoch": 18.94, "learning_rate": 5.428703432284243e-09, "loss": 8.6602, "step": 52360 }, { "epoch": 18.94, "learning_rate": 5.355358687098938e-09, "loss": 8.7053, "step": 52380 }, { "epoch": 18.95, "learning_rate": 5.282507416889625e-09, "loss": 8.7101, "step": 52400 }, { "epoch": 18.96, "learning_rate": 5.210149768605177e-09, "loss": 8.7651, "step": 52420 }, { "epoch": 18.97, "learning_rate": 5.138285888199007e-09, "loss": 8.7112, "step": 52440 }, { "epoch": 18.97, "learning_rate": 5.066915920628301e-09, "loss": 8.7172, "step": 52460 }, { "epoch": 18.98, "learning_rate": 4.996040009854152e-09, "loss": 8.7541, "step": 52480 }, { "epoch": 18.99, "learning_rate": 4.925658298840979e-09, "loss": 8.7614, "step": 52500 }, { "epoch": 18.99, "learning_rate": 4.855770929556385e-09, "loss": 8.807, "step": 52520 }, { "epoch": 19.0, "learning_rate": 4.786378042970884e-09, "loss": 8.6, "step": 52540 }, { "epoch": 19.01, "learning_rate": 4.7174797790574264e-09, "loss": 8.7356, "step": 52560 }, { "epoch": 19.02, "learning_rate": 4.649076276791425e-09, "loss": 8.6023, "step": 52580 }, { "epoch": 19.02, "learning_rate": 4.5811676741501496e-09, "loss": 8.6362, "step": 52600 }, { "epoch": 19.03, "learning_rate": 4.513754108112722e-09, "loss": 8.748, "step": 52620 }, { "epoch": 19.04, "learning_rate": 4.446835714659647e-09, "loss": 8.679, "step": 52640 }, { "epoch": 19.05, "learning_rate": 4.380412628772645e-09, "loss": 8.6705, "step": 52660 }, { "epoch": 19.05, "learning_rate": 4.314484984434319e-09, "loss": 8.6436, "step": 52680 }, { "epoch": 19.06, "learning_rate": 4.249052914627988e-09, "loss": 8.6, "step": 52700 }, { "epoch": 19.07, "learning_rate": 4.184116551337241e-09, "loss": 8.9462, "step": 52720 }, { "epoch": 19.07, "learning_rate": 4.119676025545777e-09, "loss": 8.7455, "step": 52740 }, { "epoch": 19.08, "learning_rate": 4.055731467237283e-09, "loss": 8.6577, "step": 52760 }, { "epoch": 19.09, "learning_rate": 3.992283005394837e-09, "loss": 8.6922, "step": 52780 }, { "epoch": 19.1, "learning_rate": 3.929330768000949e-09, "loss": 8.6823, "step": 52800 }, { "epoch": 19.1, "learning_rate": 3.866874882037157e-09, "loss": 8.7113, "step": 52820 }, { "epoch": 19.11, "learning_rate": 3.8049154734838275e-09, "loss": 8.6948, "step": 52840 }, { "epoch": 19.12, "learning_rate": 3.743452667319846e-09, "loss": 8.6673, "step": 52860 }, { "epoch": 19.12, "learning_rate": 3.6824865875224043e-09, "loss": 8.7135, "step": 52880 }, { "epoch": 19.13, "learning_rate": 3.6220173570667424e-09, "loss": 8.6514, "step": 52900 }, { "epoch": 19.14, "learning_rate": 3.562045097925903e-09, "loss": 8.7106, "step": 52920 }, { "epoch": 19.15, "learning_rate": 3.502569931070509e-09, "loss": 8.6637, "step": 52940 }, { "epoch": 19.15, "learning_rate": 3.4435919764684572e-09, "loss": 8.6529, "step": 52960 }, { "epoch": 19.16, "learning_rate": 3.385111353084724e-09, "loss": 8.6605, "step": 52980 }, { "epoch": 19.17, "learning_rate": 3.3271281788811444e-09, "loss": 8.7399, "step": 53000 }, { "epoch": 19.18, "learning_rate": 3.26964257081605e-09, "loss": 8.6828, "step": 53020 }, { "epoch": 19.18, "learning_rate": 3.2126546448442704e-09, "loss": 8.7259, "step": 53040 }, { "epoch": 19.19, "learning_rate": 3.1561645159166596e-09, "loss": 8.7069, "step": 53060 }, { "epoch": 19.2, "learning_rate": 3.1001722979799306e-09, "loss": 8.6755, "step": 53080 }, { "epoch": 19.2, "learning_rate": 3.0446781039765725e-09, "loss": 8.637, "step": 53100 }, { "epoch": 19.21, "learning_rate": 2.989682045844405e-09, "loss": 8.6292, "step": 53120 }, { "epoch": 19.22, "learning_rate": 2.935184234516497e-09, "loss": 8.7152, "step": 53140 }, { "epoch": 19.23, "learning_rate": 2.8811847799208868e-09, "loss": 8.6, "step": 53160 }, { "epoch": 19.23, "learning_rate": 2.827683790980362e-09, "loss": 8.6343, "step": 53180 }, { "epoch": 19.24, "learning_rate": 2.774681375612292e-09, "loss": 8.6721, "step": 53200 }, { "epoch": 19.25, "learning_rate": 2.722177640728324e-09, "loss": 8.7229, "step": 53220 }, { "epoch": 19.25, "learning_rate": 2.6701726922342126e-09, "loss": 8.5951, "step": 53240 }, { "epoch": 19.26, "learning_rate": 2.6186666350296594e-09, "loss": 8.6535, "step": 53260 }, { "epoch": 19.27, "learning_rate": 2.5676595730079742e-09, "loss": 8.6983, "step": 53280 }, { "epoch": 19.28, "learning_rate": 2.517151609055995e-09, "loss": 8.5833, "step": 53300 }, { "epoch": 19.28, "learning_rate": 2.4671428450537824e-09, "loss": 8.6913, "step": 53320 }, { "epoch": 19.29, "learning_rate": 2.417633381874534e-09, "loss": 8.759, "step": 53340 }, { "epoch": 19.3, "learning_rate": 2.3686233193841722e-09, "loss": 8.6451, "step": 53360 }, { "epoch": 19.31, "learning_rate": 2.3201127564414223e-09, "loss": 8.6675, "step": 53380 }, { "epoch": 19.31, "learning_rate": 2.272101790897346e-09, "loss": 8.7818, "step": 53400 }, { "epoch": 19.32, "learning_rate": 2.224590519595282e-09, "loss": 8.5935, "step": 53420 }, { "epoch": 19.33, "learning_rate": 2.177579038370736e-09, "loss": 8.6203, "step": 53440 }, { "epoch": 19.33, "learning_rate": 2.13106744205091e-09, "loss": 8.6421, "step": 53460 }, { "epoch": 19.34, "learning_rate": 2.085055824454868e-09, "loss": 8.6868, "step": 53480 }, { "epoch": 19.35, "learning_rate": 2.039544278393007e-09, "loss": 8.7173, "step": 53500 }, { "epoch": 19.36, "learning_rate": 1.994532895667117e-09, "loss": 8.7867, "step": 53520 }, { "epoch": 19.36, "learning_rate": 1.95002176707007e-09, "loss": 8.6087, "step": 53540 }, { "epoch": 19.37, "learning_rate": 1.9060109823856583e-09, "loss": 8.7634, "step": 53560 }, { "epoch": 19.38, "learning_rate": 1.8625006303884527e-09, "loss": 8.5889, "step": 53580 }, { "epoch": 19.39, "learning_rate": 1.8194907988436093e-09, "loss": 8.5872, "step": 53600 }, { "epoch": 19.39, "learning_rate": 1.7769815745066474e-09, "loss": 8.629, "step": 53620 }, { "epoch": 19.4, "learning_rate": 1.7349730431233111e-09, "loss": 8.6705, "step": 53640 }, { "epoch": 19.41, "learning_rate": 1.6934652894294022e-09, "loss": 8.7446, "step": 53660 }, { "epoch": 19.41, "learning_rate": 1.6524583971505857e-09, "loss": 8.5836, "step": 53680 }, { "epoch": 19.42, "learning_rate": 1.6119524490022795e-09, "loss": 8.6287, "step": 53700 }, { "epoch": 19.43, "learning_rate": 1.5719475266893489e-09, "loss": 8.5938, "step": 53720 }, { "epoch": 19.44, "learning_rate": 1.5324437109061616e-09, "loss": 8.6851, "step": 53740 }, { "epoch": 19.44, "learning_rate": 1.493441081336172e-09, "loss": 8.7353, "step": 53760 }, { "epoch": 19.45, "learning_rate": 1.454939716651976e-09, "loss": 8.7456, "step": 53780 }, { "epoch": 19.46, "learning_rate": 1.4169396945150346e-09, "loss": 8.5975, "step": 53800 }, { "epoch": 19.46, "learning_rate": 1.3794410915755339e-09, "loss": 8.6356, "step": 53820 }, { "epoch": 19.47, "learning_rate": 1.3424439834722746e-09, "loss": 8.6342, "step": 53840 }, { "epoch": 19.48, "learning_rate": 1.3059484448324221e-09, "loss": 8.6636, "step": 53860 }, { "epoch": 19.49, "learning_rate": 1.269954549271507e-09, "loss": 8.7235, "step": 53880 }, { "epoch": 19.49, "learning_rate": 1.2344623693931467e-09, "loss": 8.7035, "step": 53900 }, { "epoch": 19.5, "learning_rate": 1.199471976788935e-09, "loss": 8.6849, "step": 53920 }, { "epoch": 19.51, "learning_rate": 1.1649834420383032e-09, "loss": 8.6341, "step": 53940 }, { "epoch": 19.52, "learning_rate": 1.1309968347084364e-09, "loss": 8.7291, "step": 53960 }, { "epoch": 19.52, "learning_rate": 1.0975122233539968e-09, "loss": 8.5852, "step": 53980 }, { "epoch": 19.53, "learning_rate": 1.0645296755171229e-09, "loss": 8.6188, "step": 54000 }, { "epoch": 19.54, "learning_rate": 1.0320492577272077e-09, "loss": 8.7887, "step": 54020 }, { "epoch": 19.54, "learning_rate": 1.0000710355008157e-09, "loss": 8.9253, "step": 54040 }, { "epoch": 19.55, "learning_rate": 9.685950733414882e-10, "loss": 8.5889, "step": 54060 }, { "epoch": 19.56, "learning_rate": 9.376214347397437e-10, "loss": 8.7428, "step": 54080 }, { "epoch": 19.57, "learning_rate": 9.071501821727167e-10, "loss": 8.7254, "step": 54100 }, { "epoch": 19.57, "learning_rate": 8.771813771042968e-10, "loss": 8.6894, "step": 54120 }, { "epoch": 19.58, "learning_rate": 8.47715079984851e-10, "loss": 8.7289, "step": 54140 }, { "epoch": 19.59, "learning_rate": 8.187513502510846e-10, "loss": 8.6529, "step": 54160 }, { "epoch": 19.59, "learning_rate": 7.902902463260419e-10, "loss": 8.6643, "step": 54180 }, { "epoch": 19.6, "learning_rate": 7.62331825618856e-10, "loss": 8.6873, "step": 54200 }, { "epoch": 19.61, "learning_rate": 7.348761445247209e-10, "loss": 8.6534, "step": 54220 }, { "epoch": 19.62, "learning_rate": 7.079232584247252e-10, "loss": 8.5672, "step": 54240 }, { "epoch": 19.62, "learning_rate": 6.814732216858243e-10, "loss": 8.6725, "step": 54260 }, { "epoch": 19.63, "learning_rate": 6.555260876606183e-10, "loss": 8.751, "step": 54280 }, { "epoch": 19.64, "learning_rate": 6.300819086873243e-10, "loss": 8.6916, "step": 54300 }, { "epoch": 19.65, "learning_rate": 6.051407360895822e-10, "loss": 8.6952, "step": 54320 }, { "epoch": 19.65, "learning_rate": 5.807026201765098e-10, "loss": 8.6289, "step": 54340 }, { "epoch": 19.66, "learning_rate": 5.567676102424534e-10, "loss": 8.7357, "step": 54360 }, { "epoch": 19.67, "learning_rate": 5.333357545669325e-10, "loss": 8.5972, "step": 54380 }, { "epoch": 19.67, "learning_rate": 5.10407100414556e-10, "loss": 8.6977, "step": 54400 }, { "epoch": 19.68, "learning_rate": 4.87981694034939e-10, "loss": 8.6525, "step": 54420 }, { "epoch": 19.69, "learning_rate": 4.660595806625645e-10, "loss": 8.7391, "step": 54440 }, { "epoch": 19.7, "learning_rate": 4.446408045167549e-10, "loss": 8.7213, "step": 54460 }, { "epoch": 19.7, "learning_rate": 4.2372540880147854e-10, "loss": 8.6799, "step": 54480 }, { "epoch": 19.71, "learning_rate": 4.0331343570540466e-10, "loss": 8.6531, "step": 54500 }, { "epoch": 19.72, "learning_rate": 3.834049264017092e-10, "loss": 8.7059, "step": 54520 }, { "epoch": 19.73, "learning_rate": 3.6399992104804713e-10, "loss": 8.6514, "step": 54540 }, { "epoch": 19.73, "learning_rate": 3.450984587863859e-10, "loss": 8.5528, "step": 54560 }, { "epoch": 19.74, "learning_rate": 3.267005777430887e-10, "loss": 8.944, "step": 54580 }, { "epoch": 19.75, "learning_rate": 3.088063150286924e-10, "loss": 8.6987, "step": 54600 }, { "epoch": 19.75, "learning_rate": 2.91415706737852e-10, "loss": 8.6739, "step": 54620 }, { "epoch": 19.76, "learning_rate": 2.74528787949313e-10, "loss": 8.702, "step": 54640 }, { "epoch": 19.77, "learning_rate": 2.5814559272588353e-10, "loss": 8.7295, "step": 54660 }, { "epoch": 19.78, "learning_rate": 2.4226615411424013e-10, "loss": 8.6574, "step": 54680 }, { "epoch": 19.78, "learning_rate": 2.268905041449276e-10, "loss": 8.6305, "step": 54700 }, { "epoch": 19.79, "learning_rate": 2.1201867383233153e-10, "loss": 8.6852, "step": 54720 }, { "epoch": 19.8, "learning_rate": 1.9765069317453918e-10, "loss": 8.6515, "step": 54740 }, { "epoch": 19.8, "learning_rate": 1.8378659115333984e-10, "loss": 8.7308, "step": 54760 }, { "epoch": 19.81, "learning_rate": 1.7042639573419672e-10, "loss": 8.6392, "step": 54780 }, { "epoch": 19.82, "learning_rate": 1.5757013386599738e-10, "loss": 8.6741, "step": 54800 }, { "epoch": 19.83, "learning_rate": 1.452178314813035e-10, "loss": 8.7132, "step": 54820 }, { "epoch": 19.83, "learning_rate": 1.3336951349599e-10, "loss": 8.6422, "step": 54840 }, { "epoch": 19.84, "learning_rate": 1.2202520380946713e-10, "loss": 8.6508, "step": 54860 }, { "epoch": 19.85, "learning_rate": 1.1118492530443058e-10, "loss": 8.5966, "step": 54880 }, { "epoch": 19.86, "learning_rate": 1.0084869984686162e-10, "loss": 8.7772, "step": 54900 }, { "epoch": 19.86, "learning_rate": 9.101654828613803e-11, "loss": 8.7014, "step": 54920 }, { "epoch": 19.87, "learning_rate": 8.168849045472881e-11, "loss": 8.7369, "step": 54940 }, { "epoch": 19.88, "learning_rate": 7.286454516833296e-11, "loss": 8.7147, "step": 54960 }, { "epoch": 19.88, "learning_rate": 6.454473022587947e-11, "loss": 8.593, "step": 54980 }, { "epoch": 19.89, "learning_rate": 5.672906240927755e-11, "loss": 8.6138, "step": 55000 }, { "epoch": 19.9, "learning_rate": 4.9417557483610875e-11, "loss": 8.6584, "step": 55020 }, { "epoch": 19.91, "learning_rate": 4.261023019697108e-11, "loss": 8.6232, "step": 55040 }, { "epoch": 19.91, "learning_rate": 3.630709428051326e-11, "loss": 8.7377, "step": 55060 }, { "epoch": 19.92, "learning_rate": 3.050816244831722e-11, "loss": 8.6403, "step": 55080 }, { "epoch": 19.93, "learning_rate": 2.5213446397470693e-11, "loss": 8.7605, "step": 55100 }, { "epoch": 19.93, "learning_rate": 2.0422956808013868e-11, "loss": 8.571, "step": 55120 }, { "epoch": 19.94, "learning_rate": 1.6136703342856107e-11, "loss": 8.6668, "step": 55140 }, { "epoch": 19.95, "learning_rate": 1.235469464785921e-11, "loss": 8.7839, "step": 55160 }, { "epoch": 19.96, "learning_rate": 9.07693835175416e-12, "loss": 8.7593, "step": 55180 }, { "epoch": 19.96, "learning_rate": 6.3034410661133574e-12, "loss": 8.732, "step": 55200 }, { "epoch": 19.97, "learning_rate": 4.034208385378379e-12, "loss": 8.725, "step": 55220 }, { "epoch": 19.98, "learning_rate": 2.2692448868877334e-12, "loss": 8.8461, "step": 55240 }, { "epoch": 19.99, "learning_rate": 1.0085541307103262e-12, "loss": 8.6615, "step": 55260 }, { "epoch": 19.99, "learning_rate": 2.521386598119957e-13, "loss": 8.6046, "step": 55280 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 8.6345, "step": 55300 }, { "epoch": 20.0, "step": 55300, "total_flos": 1.644510409710981e+18, "train_loss": 8.68246454138868, "train_runtime": 29891.7849, "train_samples_per_second": 3.7, "train_steps_per_second": 1.85 } ], "logging_steps": 20, "max_steps": 55300, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 100, "total_flos": 1.644510409710981e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }