diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,47947 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 10000, + "global_step": 5991, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": "2.0020e-07", + "loss": 8.0422, + "slid_loss": 8.0422, + "step": 1, + "time": 24.84 + }, + { + "epoch": 0.0, + "learning_rate": "4.0040e-07", + "loss": 7.2317, + "slid_loss": 7.6369, + "step": 2, + "time": 13.87 + }, + { + "epoch": 0.0, + "learning_rate": "6.0060e-07", + "loss": 7.3531, + "slid_loss": 7.5423, + "step": 3, + "time": 14.01 + }, + { + "epoch": 0.0, + "learning_rate": "8.0080e-07", + "loss": 7.9704, + "slid_loss": 7.6493, + "step": 4, + "time": 12.54 + }, + { + "epoch": 0.0, + "learning_rate": "1.0010e-06", + "loss": 7.8883, + "slid_loss": 7.6971, + "step": 5, + "time": 12.67 + }, + { + "epoch": 0.0, + "learning_rate": "1.2012e-06", + "loss": 7.5544, + "slid_loss": 7.6734, + "step": 6, + "time": 13.01 + }, + { + "epoch": 0.0, + "learning_rate": "1.4014e-06", + "loss": 8.2434, + "slid_loss": 7.7548, + "step": 7, + "time": 13.49 + }, + { + "epoch": 0.0, + "learning_rate": "1.6016e-06", + "loss": 7.7879, + "slid_loss": 7.7589, + "step": 8, + "time": 14.5 + }, + { + "epoch": 0.0, + "learning_rate": "1.8018e-06", + "loss": 7.3982, + "slid_loss": 7.7188, + "step": 9, + "time": 13.64 + }, + { + "epoch": 0.01, + "learning_rate": "2.0020e-06", + "loss": 7.4182, + "slid_loss": 7.6888, + "step": 10, + "time": 12.83 + }, + { + "epoch": 0.01, + "learning_rate": "2.2022e-06", + "loss": 7.752, + "slid_loss": 7.6945, + "step": 11, + "time": 13.34 + }, + { + "epoch": 0.01, + "learning_rate": "2.4024e-06", + "loss": 7.1904, + "slid_loss": 7.6525, + "step": 12, + "time": 13.1 + }, + { + "epoch": 0.01, + "learning_rate": "2.6026e-06", + "loss": 7.591, + "slid_loss": 7.6478, + "step": 13, + "time": 12.71 + }, + { + "epoch": 0.01, + "learning_rate": "2.8028e-06", + "loss": 6.7498, + "slid_loss": 7.5836, + "step": 14, + "time": 13.41 + }, + { + "epoch": 0.01, + "learning_rate": "3.0030e-06", + "loss": 6.9336, + "slid_loss": 7.5403, + "step": 15, + "time": 12.86 + }, + { + "epoch": 0.01, + "learning_rate": "3.2032e-06", + "loss": 6.901, + "slid_loss": 7.5004, + "step": 16, + "time": 11.63 + }, + { + "epoch": 0.01, + "learning_rate": "3.4034e-06", + "loss": 6.7857, + "slid_loss": 7.4583, + "step": 17, + "time": 13.67 + }, + { + "epoch": 0.01, + "learning_rate": "3.6036e-06", + "loss": 6.599, + "slid_loss": 7.4106, + "step": 18, + "time": 13.83 + }, + { + "epoch": 0.01, + "learning_rate": "3.8038e-06", + "loss": 6.4354, + "slid_loss": 7.3592, + "step": 19, + "time": 13.55 + }, + { + "epoch": 0.01, + "learning_rate": "4.0040e-06", + "loss": 5.7587, + "slid_loss": 7.2792, + "step": 20, + "time": 13.69 + }, + { + "epoch": 0.01, + "learning_rate": "4.2042e-06", + "loss": 6.3494, + "slid_loss": 7.2349, + "step": 21, + "time": 11.1 + }, + { + "epoch": 0.01, + "learning_rate": "4.4044e-06", + "loss": 5.7425, + "slid_loss": 7.1671, + "step": 22, + "time": 12.3 + }, + { + "epoch": 0.01, + "learning_rate": "4.6046e-06", + "loss": 5.4493, + "slid_loss": 7.0924, + "step": 23, + "time": 14.56 + }, + { + "epoch": 0.01, + "learning_rate": "4.8048e-06", + "loss": 5.756, + "slid_loss": 7.0367, + "step": 24, + "time": 13.41 + }, + { + "epoch": 0.01, + "learning_rate": "5.0050e-06", + "loss": 5.1867, + "slid_loss": 6.9627, + "step": 25, + "time": 11.26 + }, + { + "epoch": 0.01, + "learning_rate": "5.2052e-06", + "loss": 5.3632, + "slid_loss": 6.9012, + "step": 26, + "time": 11.23 + }, + { + "epoch": 0.01, + "learning_rate": "5.4054e-06", + "loss": 5.2109, + "slid_loss": 6.8386, + "step": 27, + "time": 13.71 + }, + { + "epoch": 0.01, + "learning_rate": "5.6056e-06", + "loss": 4.3727, + "slid_loss": 6.7505, + "step": 28, + "time": 13.78 + }, + { + "epoch": 0.01, + "learning_rate": "5.8058e-06", + "loss": 4.9872, + "slid_loss": 6.6897, + "step": 29, + "time": 12.79 + }, + { + "epoch": 0.02, + "learning_rate": "6.0060e-06", + "loss": 4.5096, + "slid_loss": 6.6171, + "step": 30, + "time": 14.05 + }, + { + "epoch": 0.02, + "learning_rate": "6.2062e-06", + "loss": 4.3011, + "slid_loss": 6.5424, + "step": 31, + "time": 13.69 + }, + { + "epoch": 0.02, + "learning_rate": "6.4064e-06", + "loss": 4.2147, + "slid_loss": 6.4696, + "step": 32, + "time": 11.22 + }, + { + "epoch": 0.02, + "learning_rate": "6.6066e-06", + "loss": 4.0249, + "slid_loss": 6.3955, + "step": 33, + "time": 13.04 + }, + { + "epoch": 0.02, + "learning_rate": "6.8068e-06", + "loss": 3.7888, + "slid_loss": 6.3189, + "step": 34, + "time": 12.88 + }, + { + "epoch": 0.02, + "learning_rate": "7.0070e-06", + "loss": 3.2251, + "slid_loss": 6.2305, + "step": 35, + "time": 13.34 + }, + { + "epoch": 0.02, + "learning_rate": "7.2072e-06", + "loss": 3.5923, + "slid_loss": 6.1572, + "step": 36, + "time": 11.9 + }, + { + "epoch": 0.02, + "learning_rate": "7.4074e-06", + "loss": 3.2495, + "slid_loss": 6.0786, + "step": 37, + "time": 14.42 + }, + { + "epoch": 0.02, + "learning_rate": "7.6076e-06", + "loss": 2.7819, + "slid_loss": 5.9918, + "step": 38, + "time": 13.49 + }, + { + "epoch": 0.02, + "learning_rate": "7.8078e-06", + "loss": 3.4164, + "slid_loss": 5.9258, + "step": 39, + "time": 11.34 + }, + { + "epoch": 0.02, + "learning_rate": "8.0080e-06", + "loss": 2.7652, + "slid_loss": 5.8468, + "step": 40, + "time": 12.82 + }, + { + "epoch": 0.02, + "learning_rate": "8.2082e-06", + "loss": 2.911, + "slid_loss": 5.7752, + "step": 41, + "time": 11.84 + }, + { + "epoch": 0.02, + "learning_rate": "8.4084e-06", + "loss": 2.5094, + "slid_loss": 5.6974, + "step": 42, + "time": 13.42 + }, + { + "epoch": 0.02, + "learning_rate": "8.6086e-06", + "loss": 2.4674, + "slid_loss": 5.6223, + "step": 43, + "time": 13.43 + }, + { + "epoch": 0.02, + "learning_rate": "8.8088e-06", + "loss": 2.8829, + "slid_loss": 5.5601, + "step": 44, + "time": 11.15 + }, + { + "epoch": 0.02, + "learning_rate": "9.0090e-06", + "loss": 2.0842, + "slid_loss": 5.4828, + "step": 45, + "time": 13.25 + }, + { + "epoch": 0.02, + "learning_rate": "9.2092e-06", + "loss": 1.9481, + "slid_loss": 5.406, + "step": 46, + "time": 13.77 + }, + { + "epoch": 0.02, + "learning_rate": "9.4094e-06", + "loss": 2.1369, + "slid_loss": 5.3364, + "step": 47, + "time": 13.72 + }, + { + "epoch": 0.02, + "learning_rate": "9.6096e-06", + "loss": 2.2663, + "slid_loss": 5.2725, + "step": 48, + "time": 13.65 + }, + { + "epoch": 0.02, + "learning_rate": "9.8098e-06", + "loss": 2.2488, + "slid_loss": 5.2108, + "step": 49, + "time": 11.53 + }, + { + "epoch": 0.03, + "learning_rate": "1.0010e-05", + "loss": 2.1479, + "slid_loss": 5.1495, + "step": 50, + "time": 13.3 + }, + { + "epoch": 0.03, + "learning_rate": "1.0210e-05", + "loss": 1.8055, + "slid_loss": 5.0839, + "step": 51, + "time": 11.76 + }, + { + "epoch": 0.03, + "learning_rate": "1.0410e-05", + "loss": 1.7888, + "slid_loss": 5.0206, + "step": 52, + "time": 12.45 + }, + { + "epoch": 0.03, + "learning_rate": "1.0611e-05", + "loss": 1.8985, + "slid_loss": 4.9617, + "step": 53, + "time": 11.39 + }, + { + "epoch": 0.03, + "learning_rate": "1.0811e-05", + "loss": 1.7801, + "slid_loss": 4.9027, + "step": 54, + "time": 14.76 + }, + { + "epoch": 0.03, + "learning_rate": "1.1011e-05", + "loss": 1.8989, + "slid_loss": 4.8481, + "step": 55, + "time": 13.11 + }, + { + "epoch": 0.03, + "learning_rate": "1.1211e-05", + "loss": 1.9185, + "slid_loss": 4.7958, + "step": 56, + "time": 13.71 + }, + { + "epoch": 0.03, + "learning_rate": "1.1411e-05", + "loss": 1.4543, + "slid_loss": 4.7372, + "step": 57, + "time": 12.59 + }, + { + "epoch": 0.03, + "learning_rate": "1.1612e-05", + "loss": 1.4012, + "slid_loss": 4.6797, + "step": 58, + "time": 12.31 + }, + { + "epoch": 0.03, + "learning_rate": "1.1812e-05", + "loss": 1.123, + "slid_loss": 4.6194, + "step": 59, + "time": 12.85 + }, + { + "epoch": 0.03, + "learning_rate": "1.2012e-05", + "loss": 1.6406, + "slid_loss": 4.5697, + "step": 60, + "time": 12.26 + }, + { + "epoch": 0.03, + "learning_rate": "1.2212e-05", + "loss": 1.5546, + "slid_loss": 4.5203, + "step": 61, + "time": 11.17 + }, + { + "epoch": 0.03, + "learning_rate": "1.2412e-05", + "loss": 1.3616, + "slid_loss": 4.4694, + "step": 62, + "time": 14.19 + }, + { + "epoch": 0.03, + "learning_rate": "1.2613e-05", + "loss": 1.3571, + "slid_loss": 4.42, + "step": 63, + "time": 14.25 + }, + { + "epoch": 0.03, + "learning_rate": "1.2813e-05", + "loss": 1.2239, + "slid_loss": 4.37, + "step": 64, + "time": 12.79 + }, + { + "epoch": 0.03, + "learning_rate": "1.3013e-05", + "loss": 1.3645, + "slid_loss": 4.3238, + "step": 65, + "time": 12.54 + }, + { + "epoch": 0.03, + "learning_rate": "1.3213e-05", + "loss": 1.3592, + "slid_loss": 4.2789, + "step": 66, + "time": 12.87 + }, + { + "epoch": 0.03, + "learning_rate": "1.3413e-05", + "loss": 1.383, + "slid_loss": 4.2356, + "step": 67, + "time": 13.87 + }, + { + "epoch": 0.03, + "learning_rate": "1.3614e-05", + "loss": 1.0186, + "slid_loss": 4.1883, + "step": 68, + "time": 10.96 + }, + { + "epoch": 0.03, + "learning_rate": "1.3814e-05", + "loss": 1.2965, + "slid_loss": 4.1464, + "step": 69, + "time": 12.75 + }, + { + "epoch": 0.04, + "learning_rate": "1.4014e-05", + "loss": 1.3113, + "slid_loss": 4.1059, + "step": 70, + "time": 13.34 + }, + { + "epoch": 0.04, + "learning_rate": "1.4214e-05", + "loss": 1.4706, + "slid_loss": 4.0688, + "step": 71, + "time": 14.26 + }, + { + "epoch": 0.04, + "learning_rate": "1.4414e-05", + "loss": 1.3088, + "slid_loss": 4.0305, + "step": 72, + "time": 11.94 + }, + { + "epoch": 0.04, + "learning_rate": "1.4615e-05", + "loss": 1.1183, + "slid_loss": 3.9906, + "step": 73, + "time": 11.98 + }, + { + "epoch": 0.04, + "learning_rate": "1.4815e-05", + "loss": 1.1557, + "slid_loss": 3.9523, + "step": 74, + "time": 13.2 + }, + { + "epoch": 0.04, + "learning_rate": "1.5015e-05", + "loss": 1.1556, + "slid_loss": 3.915, + "step": 75, + "time": 11.36 + }, + { + "epoch": 0.04, + "learning_rate": "1.5215e-05", + "loss": 1.2668, + "slid_loss": 3.8801, + "step": 76, + "time": 13.98 + }, + { + "epoch": 0.04, + "learning_rate": "1.5415e-05", + "loss": 1.086, + "slid_loss": 3.8438, + "step": 77, + "time": 12.67 + }, + { + "epoch": 0.04, + "learning_rate": "1.5616e-05", + "loss": 1.0011, + "slid_loss": 3.8074, + "step": 78, + "time": 11.34 + }, + { + "epoch": 0.04, + "learning_rate": "1.5816e-05", + "loss": 1.2947, + "slid_loss": 3.7756, + "step": 79, + "time": 12.85 + }, + { + "epoch": 0.04, + "learning_rate": "1.6016e-05", + "loss": 1.3001, + "slid_loss": 3.7447, + "step": 80, + "time": 14.17 + }, + { + "epoch": 0.04, + "learning_rate": "1.6216e-05", + "loss": 1.017, + "slid_loss": 3.711, + "step": 81, + "time": 11.05 + }, + { + "epoch": 0.04, + "learning_rate": "1.6416e-05", + "loss": 1.269, + "slid_loss": 3.6812, + "step": 82, + "time": 13.13 + }, + { + "epoch": 0.04, + "learning_rate": "1.6617e-05", + "loss": 1.068, + "slid_loss": 3.6497, + "step": 83, + "time": 11.84 + }, + { + "epoch": 0.04, + "learning_rate": "1.6817e-05", + "loss": 1.1038, + "slid_loss": 3.6194, + "step": 84, + "time": 13.91 + }, + { + "epoch": 0.04, + "learning_rate": "1.7017e-05", + "loss": 1.1999, + "slid_loss": 3.5909, + "step": 85, + "time": 12.79 + }, + { + "epoch": 0.04, + "learning_rate": "1.7217e-05", + "loss": 1.1327, + "slid_loss": 3.5624, + "step": 86, + "time": 13.17 + }, + { + "epoch": 0.04, + "learning_rate": "1.7417e-05", + "loss": 1.1092, + "slid_loss": 3.5342, + "step": 87, + "time": 12.48 + }, + { + "epoch": 0.04, + "learning_rate": "1.7618e-05", + "loss": 1.291, + "slid_loss": 3.5087, + "step": 88, + "time": 12.88 + }, + { + "epoch": 0.04, + "learning_rate": "1.7818e-05", + "loss": 1.239, + "slid_loss": 3.4832, + "step": 89, + "time": 11.89 + }, + { + "epoch": 0.05, + "learning_rate": "1.8018e-05", + "loss": 1.1866, + "slid_loss": 3.4576, + "step": 90, + "time": 12.92 + }, + { + "epoch": 0.05, + "learning_rate": "1.8218e-05", + "loss": 1.2326, + "slid_loss": 3.4332, + "step": 91, + "time": 13.3 + }, + { + "epoch": 0.05, + "learning_rate": "1.8418e-05", + "loss": 1.0341, + "slid_loss": 3.4071, + "step": 92, + "time": 13.74 + }, + { + "epoch": 0.05, + "learning_rate": "1.8619e-05", + "loss": 1.1382, + "slid_loss": 3.3827, + "step": 93, + "time": 13.48 + }, + { + "epoch": 0.05, + "learning_rate": "1.8819e-05", + "loss": 1.0274, + "slid_loss": 3.3577, + "step": 94, + "time": 13.49 + }, + { + "epoch": 0.05, + "learning_rate": "1.9019e-05", + "loss": 1.0207, + "slid_loss": 3.3331, + "step": 95, + "time": 12.74 + }, + { + "epoch": 0.05, + "learning_rate": "1.9219e-05", + "loss": 0.9809, + "slid_loss": 3.3086, + "step": 96, + "time": 11.96 + }, + { + "epoch": 0.05, + "learning_rate": "1.9419e-05", + "loss": 1.2021, + "slid_loss": 3.2868, + "step": 97, + "time": 13.26 + }, + { + "epoch": 0.05, + "learning_rate": "1.9620e-05", + "loss": 1.0748, + "slid_loss": 3.2643, + "step": 98, + "time": 14.33 + }, + { + "epoch": 0.05, + "learning_rate": "1.9820e-05", + "loss": 1.2888, + "slid_loss": 3.2443, + "step": 99, + "time": 14.07 + }, + { + "epoch": 0.05, + "learning_rate": "2.0020e-05", + "loss": 0.9945, + "slid_loss": 3.2218, + "step": 100, + "time": 12.18 + }, + { + "epoch": 0.05, + "learning_rate": "2.0220e-05", + "loss": 0.9915, + "slid_loss": 3.1513, + "step": 101, + "time": 12.77 + }, + { + "epoch": 0.05, + "learning_rate": "2.0420e-05", + "loss": 1.1431, + "slid_loss": 3.0904, + "step": 102, + "time": 13.11 + }, + { + "epoch": 0.05, + "learning_rate": "2.0621e-05", + "loss": 1.3557, + "slid_loss": 3.0305, + "step": 103, + "time": 11.94 + }, + { + "epoch": 0.05, + "learning_rate": "2.0821e-05", + "loss": 1.1159, + "slid_loss": 2.9619, + "step": 104, + "time": 10.84 + }, + { + "epoch": 0.05, + "learning_rate": "2.1021e-05", + "loss": 1.0927, + "slid_loss": 2.894, + "step": 105, + "time": 13.84 + }, + { + "epoch": 0.05, + "learning_rate": "2.1221e-05", + "loss": 1.2356, + "slid_loss": 2.8308, + "step": 106, + "time": 13.33 + }, + { + "epoch": 0.05, + "learning_rate": "2.1421e-05", + "loss": 1.2182, + "slid_loss": 2.7605, + "step": 107, + "time": 13.53 + }, + { + "epoch": 0.05, + "learning_rate": "2.1622e-05", + "loss": 1.133, + "slid_loss": 2.694, + "step": 108, + "time": 12.27 + }, + { + "epoch": 0.05, + "learning_rate": "2.1822e-05", + "loss": 1.0008, + "slid_loss": 2.63, + "step": 109, + "time": 13.35 + }, + { + "epoch": 0.06, + "learning_rate": "2.2022e-05", + "loss": 1.0932, + "slid_loss": 2.5667, + "step": 110, + "time": 14.03 + }, + { + "epoch": 0.06, + "learning_rate": "2.2222e-05", + "loss": 0.9232, + "slid_loss": 2.4985, + "step": 111, + "time": 10.7 + }, + { + "epoch": 0.06, + "learning_rate": "2.2422e-05", + "loss": 1.153, + "slid_loss": 2.4381, + "step": 112, + "time": 11.42 + }, + { + "epoch": 0.06, + "learning_rate": "2.2623e-05", + "loss": 1.2526, + "slid_loss": 2.3747, + "step": 113, + "time": 13.71 + }, + { + "epoch": 0.06, + "learning_rate": "2.2823e-05", + "loss": 1.3518, + "slid_loss": 2.3207, + "step": 114, + "time": 11.33 + }, + { + "epoch": 0.06, + "learning_rate": "2.3023e-05", + "loss": 1.0142, + "slid_loss": 2.2615, + "step": 115, + "time": 10.74 + }, + { + "epoch": 0.06, + "learning_rate": "2.3223e-05", + "loss": 1.1475, + "slid_loss": 2.204, + "step": 116, + "time": 12.68 + }, + { + "epoch": 0.06, + "learning_rate": "2.3423e-05", + "loss": 1.2697, + "slid_loss": 2.1488, + "step": 117, + "time": 13.15 + }, + { + "epoch": 0.06, + "learning_rate": "2.3624e-05", + "loss": 1.0928, + "slid_loss": 2.0938, + "step": 118, + "time": 12.68 + }, + { + "epoch": 0.06, + "learning_rate": "2.3824e-05", + "loss": 1.2549, + "slid_loss": 2.042, + "step": 119, + "time": 12.91 + }, + { + "epoch": 0.06, + "learning_rate": "2.4024e-05", + "loss": 1.0609, + "slid_loss": 1.995, + "step": 120, + "time": 12.81 + }, + { + "epoch": 0.06, + "learning_rate": "2.4224e-05", + "loss": 1.1962, + "slid_loss": 1.9435, + "step": 121, + "time": 12.31 + }, + { + "epoch": 0.06, + "learning_rate": "2.4424e-05", + "loss": 1.0908, + "slid_loss": 1.8969, + "step": 122, + "time": 13.19 + }, + { + "epoch": 0.06, + "learning_rate": "2.4625e-05", + "loss": 1.1179, + "slid_loss": 1.8536, + "step": 123, + "time": 12.73 + }, + { + "epoch": 0.06, + "learning_rate": "2.4825e-05", + "loss": 0.9503, + "slid_loss": 1.8056, + "step": 124, + "time": 13.62 + }, + { + "epoch": 0.06, + "learning_rate": "2.5025e-05", + "loss": 1.0419, + "slid_loss": 1.7641, + "step": 125, + "time": 13.39 + }, + { + "epoch": 0.06, + "learning_rate": "2.5225e-05", + "loss": 0.852, + "slid_loss": 1.719, + "step": 126, + "time": 10.71 + }, + { + "epoch": 0.06, + "learning_rate": "2.5425e-05", + "loss": 1.2094, + "slid_loss": 1.679, + "step": 127, + "time": 10.54 + }, + { + "epoch": 0.06, + "learning_rate": "2.5626e-05", + "loss": 1.2765, + "slid_loss": 1.648, + "step": 128, + "time": 13.89 + }, + { + "epoch": 0.06, + "learning_rate": "2.5826e-05", + "loss": 1.1147, + "slid_loss": 1.6093, + "step": 129, + "time": 11.14 + }, + { + "epoch": 0.07, + "learning_rate": "2.6026e-05", + "loss": 1.1327, + "slid_loss": 1.5755, + "step": 130, + "time": 12.6 + }, + { + "epoch": 0.07, + "learning_rate": "2.6226e-05", + "loss": 0.9429, + "slid_loss": 1.542, + "step": 131, + "time": 13.58 + }, + { + "epoch": 0.07, + "learning_rate": "2.6426e-05", + "loss": 1.2516, + "slid_loss": 1.5123, + "step": 132, + "time": 11.37 + }, + { + "epoch": 0.07, + "learning_rate": "2.6627e-05", + "loss": 1.1647, + "slid_loss": 1.4837, + "step": 133, + "time": 11.79 + }, + { + "epoch": 0.07, + "learning_rate": "2.6827e-05", + "loss": 1.0811, + "slid_loss": 1.4566, + "step": 134, + "time": 13.12 + }, + { + "epoch": 0.07, + "learning_rate": "2.7027e-05", + "loss": 1.2063, + "slid_loss": 1.4365, + "step": 135, + "time": 13.02 + }, + { + "epoch": 0.07, + "learning_rate": "2.7227e-05", + "loss": 1.1222, + "slid_loss": 1.4118, + "step": 136, + "time": 14.2 + }, + { + "epoch": 0.07, + "learning_rate": "2.7427e-05", + "loss": 0.9884, + "slid_loss": 1.3891, + "step": 137, + "time": 12.55 + }, + { + "epoch": 0.07, + "learning_rate": "2.7628e-05", + "loss": 1.096, + "slid_loss": 1.3723, + "step": 138, + "time": 12.2 + }, + { + "epoch": 0.07, + "learning_rate": "2.7828e-05", + "loss": 1.1358, + "slid_loss": 1.3495, + "step": 139, + "time": 11.8 + }, + { + "epoch": 0.07, + "learning_rate": "2.8028e-05", + "loss": 0.9489, + "slid_loss": 1.3313, + "step": 140, + "time": 13.2 + }, + { + "epoch": 0.07, + "learning_rate": "2.8228e-05", + "loss": 1.1331, + "slid_loss": 1.3135, + "step": 141, + "time": 10.55 + }, + { + "epoch": 0.07, + "learning_rate": "2.8428e-05", + "loss": 1.2979, + "slid_loss": 1.3014, + "step": 142, + "time": 13.36 + }, + { + "epoch": 0.07, + "learning_rate": "2.8629e-05", + "loss": 0.9282, + "slid_loss": 1.286, + "step": 143, + "time": 11.83 + }, + { + "epoch": 0.07, + "learning_rate": "2.8829e-05", + "loss": 1.2209, + "slid_loss": 1.2694, + "step": 144, + "time": 13.04 + }, + { + "epoch": 0.07, + "learning_rate": "2.9029e-05", + "loss": 1.1172, + "slid_loss": 1.2597, + "step": 145, + "time": 12.87 + }, + { + "epoch": 0.07, + "learning_rate": "2.9229e-05", + "loss": 1.06, + "slid_loss": 1.2509, + "step": 146, + "time": 13.95 + }, + { + "epoch": 0.07, + "learning_rate": "2.9429e-05", + "loss": 1.2086, + "slid_loss": 1.2416, + "step": 147, + "time": 12.3 + }, + { + "epoch": 0.07, + "learning_rate": "2.9630e-05", + "loss": 1.2831, + "slid_loss": 1.2317, + "step": 148, + "time": 12.79 + }, + { + "epoch": 0.07, + "learning_rate": "2.9830e-05", + "loss": 0.9384, + "slid_loss": 1.2186, + "step": 149, + "time": 13.11 + }, + { + "epoch": 0.08, + "learning_rate": "3.0030e-05", + "loss": 1.1216, + "slid_loss": 1.2084, + "step": 150, + "time": 12.74 + }, + { + "epoch": 0.08, + "learning_rate": "3.0230e-05", + "loss": 1.0599, + "slid_loss": 1.2009, + "step": 151, + "time": 10.98 + }, + { + "epoch": 0.08, + "learning_rate": "3.0430e-05", + "loss": 1.2127, + "slid_loss": 1.1952, + "step": 152, + "time": 12.2 + }, + { + "epoch": 0.08, + "learning_rate": "3.0631e-05", + "loss": 1.0153, + "slid_loss": 1.1863, + "step": 153, + "time": 12.22 + }, + { + "epoch": 0.08, + "learning_rate": "3.0831e-05", + "loss": 1.1935, + "slid_loss": 1.1805, + "step": 154, + "time": 13.96 + }, + { + "epoch": 0.08, + "learning_rate": "3.1031e-05", + "loss": 0.8643, + "slid_loss": 1.1701, + "step": 155, + "time": 10.62 + }, + { + "epoch": 0.08, + "learning_rate": "3.1231e-05", + "loss": 1.0327, + "slid_loss": 1.1613, + "step": 156, + "time": 11.23 + }, + { + "epoch": 0.08, + "learning_rate": "3.1431e-05", + "loss": 1.0838, + "slid_loss": 1.1576, + "step": 157, + "time": 10.86 + }, + { + "epoch": 0.08, + "learning_rate": "3.1632e-05", + "loss": 0.977, + "slid_loss": 1.1533, + "step": 158, + "time": 12.76 + }, + { + "epoch": 0.08, + "learning_rate": "3.1832e-05", + "loss": 0.9536, + "slid_loss": 1.1516, + "step": 159, + "time": 12.03 + }, + { + "epoch": 0.08, + "learning_rate": "3.2032e-05", + "loss": 1.2103, + "slid_loss": 1.1473, + "step": 160, + "time": 11.32 + }, + { + "epoch": 0.08, + "learning_rate": "3.2232e-05", + "loss": 1.0699, + "slid_loss": 1.1425, + "step": 161, + "time": 13.1 + }, + { + "epoch": 0.08, + "learning_rate": "3.2432e-05", + "loss": 0.9056, + "slid_loss": 1.1379, + "step": 162, + "time": 11.24 + }, + { + "epoch": 0.08, + "learning_rate": "3.2633e-05", + "loss": 1.1746, + "slid_loss": 1.1361, + "step": 163, + "time": 12.81 + }, + { + "epoch": 0.08, + "learning_rate": "3.2833e-05", + "loss": 1.1667, + "slid_loss": 1.1355, + "step": 164, + "time": 12.93 + }, + { + "epoch": 0.08, + "learning_rate": "3.3033e-05", + "loss": 1.0544, + "slid_loss": 1.1324, + "step": 165, + "time": 11.19 + }, + { + "epoch": 0.08, + "learning_rate": "3.3233e-05", + "loss": 1.0117, + "slid_loss": 1.1289, + "step": 166, + "time": 11.01 + }, + { + "epoch": 0.08, + "learning_rate": "3.3433e-05", + "loss": 1.1404, + "slid_loss": 1.1265, + "step": 167, + "time": 12.9 + }, + { + "epoch": 0.08, + "learning_rate": "3.3634e-05", + "loss": 1.0986, + "slid_loss": 1.1273, + "step": 168, + "time": 12.06 + }, + { + "epoch": 0.08, + "learning_rate": "3.3834e-05", + "loss": 0.9412, + "slid_loss": 1.1238, + "step": 169, + "time": 14.26 + }, + { + "epoch": 0.09, + "learning_rate": "3.4034e-05", + "loss": 0.8662, + "slid_loss": 1.1193, + "step": 170, + "time": 12.91 + }, + { + "epoch": 0.09, + "learning_rate": "3.4234e-05", + "loss": 1.1698, + "slid_loss": 1.1163, + "step": 171, + "time": 11.7 + }, + { + "epoch": 0.09, + "learning_rate": "3.4434e-05", + "loss": 1.3857, + "slid_loss": 1.1171, + "step": 172, + "time": 13.49 + }, + { + "epoch": 0.09, + "learning_rate": "3.4635e-05", + "loss": 1.15, + "slid_loss": 1.1174, + "step": 173, + "time": 11.69 + }, + { + "epoch": 0.09, + "learning_rate": "3.4835e-05", + "loss": 1.1332, + "slid_loss": 1.1172, + "step": 174, + "time": 13.77 + }, + { + "epoch": 0.09, + "learning_rate": "3.5035e-05", + "loss": 1.2576, + "slid_loss": 1.1182, + "step": 175, + "time": 13.16 + }, + { + "epoch": 0.09, + "learning_rate": "3.5235e-05", + "loss": 1.2776, + "slid_loss": 1.1183, + "step": 176, + "time": 11.45 + }, + { + "epoch": 0.09, + "learning_rate": "3.5435e-05", + "loss": 1.1654, + "slid_loss": 1.1191, + "step": 177, + "time": 13.82 + }, + { + "epoch": 0.09, + "learning_rate": "3.5636e-05", + "loss": 0.9115, + "slid_loss": 1.1182, + "step": 178, + "time": 10.71 + }, + { + "epoch": 0.09, + "learning_rate": "3.5836e-05", + "loss": 1.0676, + "slid_loss": 1.1159, + "step": 179, + "time": 14.02 + }, + { + "epoch": 0.09, + "learning_rate": "3.6036e-05", + "loss": 0.8518, + "slid_loss": 1.1114, + "step": 180, + "time": 13.33 + }, + { + "epoch": 0.09, + "learning_rate": "3.6236e-05", + "loss": 0.8806, + "slid_loss": 1.1101, + "step": 181, + "time": 12.02 + }, + { + "epoch": 0.09, + "learning_rate": "3.6436e-05", + "loss": 1.0107, + "slid_loss": 1.1075, + "step": 182, + "time": 12.77 + }, + { + "epoch": 0.09, + "learning_rate": "3.6637e-05", + "loss": 1.1599, + "slid_loss": 1.1084, + "step": 183, + "time": 12.31 + }, + { + "epoch": 0.09, + "learning_rate": "3.6837e-05", + "loss": 1.0629, + "slid_loss": 1.108, + "step": 184, + "time": 13.44 + }, + { + "epoch": 0.09, + "learning_rate": "3.7037e-05", + "loss": 1.0697, + "slid_loss": 1.1067, + "step": 185, + "time": 12.79 + }, + { + "epoch": 0.09, + "learning_rate": "3.7237e-05", + "loss": 1.0969, + "slid_loss": 1.1063, + "step": 186, + "time": 11.72 + }, + { + "epoch": 0.09, + "learning_rate": "3.7437e-05", + "loss": 1.0838, + "slid_loss": 1.1061, + "step": 187, + "time": 12.91 + }, + { + "epoch": 0.09, + "learning_rate": "3.7638e-05", + "loss": 1.0902, + "slid_loss": 1.1041, + "step": 188, + "time": 11.24 + }, + { + "epoch": 0.09, + "learning_rate": "3.7838e-05", + "loss": 1.2059, + "slid_loss": 1.1037, + "step": 189, + "time": 12.82 + }, + { + "epoch": 0.1, + "learning_rate": "3.8038e-05", + "loss": 0.9245, + "slid_loss": 1.1011, + "step": 190, + "time": 11.25 + }, + { + "epoch": 0.1, + "learning_rate": "3.8238e-05", + "loss": 1.0708, + "slid_loss": 1.0995, + "step": 191, + "time": 13.17 + }, + { + "epoch": 0.1, + "learning_rate": "3.8438e-05", + "loss": 0.8944, + "slid_loss": 1.0981, + "step": 192, + "time": 13.3 + }, + { + "epoch": 0.1, + "learning_rate": "3.8639e-05", + "loss": 0.9177, + "slid_loss": 1.0959, + "step": 193, + "time": 12.67 + }, + { + "epoch": 0.1, + "learning_rate": "3.8839e-05", + "loss": 1.1986, + "slid_loss": 1.0976, + "step": 194, + "time": 13.24 + }, + { + "epoch": 0.1, + "learning_rate": "3.9039e-05", + "loss": 1.2073, + "slid_loss": 1.0995, + "step": 195, + "time": 13.71 + }, + { + "epoch": 0.1, + "learning_rate": "3.9239e-05", + "loss": 0.8262, + "slid_loss": 1.0979, + "step": 196, + "time": 12.29 + }, + { + "epoch": 0.1, + "learning_rate": "3.9439e-05", + "loss": 1.0959, + "slid_loss": 1.0969, + "step": 197, + "time": 13.36 + }, + { + "epoch": 0.1, + "learning_rate": "3.9640e-05", + "loss": 1.0458, + "slid_loss": 1.0966, + "step": 198, + "time": 13.26 + }, + { + "epoch": 0.1, + "learning_rate": "3.9840e-05", + "loss": 1.0047, + "slid_loss": 1.0937, + "step": 199, + "time": 12.92 + }, + { + "epoch": 0.1, + "learning_rate": "4.0040e-05", + "loss": 1.2326, + "slid_loss": 1.0961, + "step": 200, + "time": 12.9 + }, + { + "epoch": 0.1, + "learning_rate": "4.0240e-05", + "loss": 1.2911, + "slid_loss": 1.0991, + "step": 201, + "time": 13.51 + }, + { + "epoch": 0.1, + "learning_rate": "4.0440e-05", + "loss": 0.8384, + "slid_loss": 1.0961, + "step": 202, + "time": 11.39 + }, + { + "epoch": 0.1, + "learning_rate": "4.0641e-05", + "loss": 0.996, + "slid_loss": 1.0925, + "step": 203, + "time": 11.97 + }, + { + "epoch": 0.1, + "learning_rate": "4.0841e-05", + "loss": 1.1749, + "slid_loss": 1.0931, + "step": 204, + "time": 13.15 + }, + { + "epoch": 0.1, + "learning_rate": "4.1041e-05", + "loss": 1.0994, + "slid_loss": 1.0931, + "step": 205, + "time": 12.06 + }, + { + "epoch": 0.1, + "learning_rate": "4.1241e-05", + "loss": 1.0138, + "slid_loss": 1.0909, + "step": 206, + "time": 12.02 + }, + { + "epoch": 0.1, + "learning_rate": "4.1441e-05", + "loss": 1.096, + "slid_loss": 1.0897, + "step": 207, + "time": 10.7 + }, + { + "epoch": 0.1, + "learning_rate": "4.1642e-05", + "loss": 1.2546, + "slid_loss": 1.0909, + "step": 208, + "time": 11.62 + }, + { + "epoch": 0.1, + "learning_rate": "4.1842e-05", + "loss": 1.0611, + "slid_loss": 1.0915, + "step": 209, + "time": 13.27 + }, + { + "epoch": 0.11, + "learning_rate": "4.2042e-05", + "loss": 1.1134, + "slid_loss": 1.0917, + "step": 210, + "time": 13.06 + }, + { + "epoch": 0.11, + "learning_rate": "4.2242e-05", + "loss": 1.0694, + "slid_loss": 1.0932, + "step": 211, + "time": 12.5 + }, + { + "epoch": 0.11, + "learning_rate": "4.2442e-05", + "loss": 0.9653, + "slid_loss": 1.0913, + "step": 212, + "time": 11.06 + }, + { + "epoch": 0.11, + "learning_rate": "4.2643e-05", + "loss": 0.8155, + "slid_loss": 1.0869, + "step": 213, + "time": 13.32 + }, + { + "epoch": 0.11, + "learning_rate": "4.2843e-05", + "loss": 1.3173, + "slid_loss": 1.0866, + "step": 214, + "time": 11.66 + }, + { + "epoch": 0.11, + "learning_rate": "4.3043e-05", + "loss": 1.0513, + "slid_loss": 1.0869, + "step": 215, + "time": 13.27 + }, + { + "epoch": 0.11, + "learning_rate": "4.3243e-05", + "loss": 1.0758, + "slid_loss": 1.0862, + "step": 216, + "time": 12.98 + }, + { + "epoch": 0.11, + "learning_rate": "4.3443e-05", + "loss": 1.153, + "slid_loss": 1.0851, + "step": 217, + "time": 10.8 + }, + { + "epoch": 0.11, + "learning_rate": "4.3644e-05", + "loss": 1.1182, + "slid_loss": 1.0853, + "step": 218, + "time": 10.84 + }, + { + "epoch": 0.11, + "learning_rate": "4.3844e-05", + "loss": 1.1513, + "slid_loss": 1.0843, + "step": 219, + "time": 11.8 + }, + { + "epoch": 0.11, + "learning_rate": "4.4044e-05", + "loss": 0.9724, + "slid_loss": 1.0834, + "step": 220, + "time": 11.35 + }, + { + "epoch": 0.11, + "learning_rate": "4.4244e-05", + "loss": 1.0353, + "slid_loss": 1.0818, + "step": 221, + "time": 11.36 + }, + { + "epoch": 0.11, + "learning_rate": "4.4444e-05", + "loss": 1.2189, + "slid_loss": 1.0831, + "step": 222, + "time": 12.71 + }, + { + "epoch": 0.11, + "learning_rate": "4.4645e-05", + "loss": 0.9456, + "slid_loss": 1.0813, + "step": 223, + "time": 12.09 + }, + { + "epoch": 0.11, + "learning_rate": "4.4845e-05", + "loss": 1.1094, + "slid_loss": 1.0829, + "step": 224, + "time": 12.83 + }, + { + "epoch": 0.11, + "learning_rate": "4.5045e-05", + "loss": 1.0607, + "slid_loss": 1.0831, + "step": 225, + "time": 13.13 + }, + { + "epoch": 0.11, + "learning_rate": "4.5245e-05", + "loss": 0.9757, + "slid_loss": 1.0844, + "step": 226, + "time": 11.19 + }, + { + "epoch": 0.11, + "learning_rate": "4.5445e-05", + "loss": 1.1958, + "slid_loss": 1.0842, + "step": 227, + "time": 12.23 + }, + { + "epoch": 0.11, + "learning_rate": "4.5646e-05", + "loss": 1.1259, + "slid_loss": 1.0827, + "step": 228, + "time": 12.04 + }, + { + "epoch": 0.11, + "learning_rate": "4.5846e-05", + "loss": 0.8205, + "slid_loss": 1.0798, + "step": 229, + "time": 13.3 + }, + { + "epoch": 0.12, + "learning_rate": "4.6046e-05", + "loss": 1.0848, + "slid_loss": 1.0793, + "step": 230, + "time": 11.08 + }, + { + "epoch": 0.12, + "learning_rate": "4.6246e-05", + "loss": 1.2351, + "slid_loss": 1.0822, + "step": 231, + "time": 12.24 + }, + { + "epoch": 0.12, + "learning_rate": "4.6446e-05", + "loss": 1.1665, + "slid_loss": 1.0814, + "step": 232, + "time": 14.39 + }, + { + "epoch": 0.12, + "learning_rate": "4.6647e-05", + "loss": 1.1424, + "slid_loss": 1.0811, + "step": 233, + "time": 11.69 + }, + { + "epoch": 0.12, + "learning_rate": "4.6847e-05", + "loss": 0.9951, + "slid_loss": 1.0803, + "step": 234, + "time": 12.86 + }, + { + "epoch": 0.12, + "learning_rate": "4.7047e-05", + "loss": 0.9402, + "slid_loss": 1.0776, + "step": 235, + "time": 13.87 + }, + { + "epoch": 0.12, + "learning_rate": "4.7247e-05", + "loss": 1.1128, + "slid_loss": 1.0775, + "step": 236, + "time": 13.3 + }, + { + "epoch": 0.12, + "learning_rate": "4.7447e-05", + "loss": 1.0533, + "slid_loss": 1.0782, + "step": 237, + "time": 13.07 + }, + { + "epoch": 0.12, + "learning_rate": "4.7648e-05", + "loss": 0.9692, + "slid_loss": 1.0769, + "step": 238, + "time": 11.64 + }, + { + "epoch": 0.12, + "learning_rate": "4.7848e-05", + "loss": 1.0161, + "slid_loss": 1.0757, + "step": 239, + "time": 10.84 + }, + { + "epoch": 0.12, + "learning_rate": "4.8048e-05", + "loss": 1.0551, + "slid_loss": 1.0768, + "step": 240, + "time": 13.88 + }, + { + "epoch": 0.12, + "learning_rate": "4.8248e-05", + "loss": 1.0879, + "slid_loss": 1.0763, + "step": 241, + "time": 10.7 + }, + { + "epoch": 0.12, + "learning_rate": "4.8448e-05", + "loss": 1.0015, + "slid_loss": 1.0734, + "step": 242, + "time": 11.06 + }, + { + "epoch": 0.12, + "learning_rate": "4.8649e-05", + "loss": 0.9753, + "slid_loss": 1.0738, + "step": 243, + "time": 10.56 + }, + { + "epoch": 0.12, + "learning_rate": "4.8849e-05", + "loss": 1.1781, + "slid_loss": 1.0734, + "step": 244, + "time": 13.82 + }, + { + "epoch": 0.12, + "learning_rate": "4.9049e-05", + "loss": 1.0077, + "slid_loss": 1.0723, + "step": 245, + "time": 13.87 + }, + { + "epoch": 0.12, + "learning_rate": "4.9249e-05", + "loss": 1.2966, + "slid_loss": 1.0747, + "step": 246, + "time": 12.77 + }, + { + "epoch": 0.12, + "learning_rate": "4.9449e-05", + "loss": 1.1401, + "slid_loss": 1.074, + "step": 247, + "time": 13.55 + }, + { + "epoch": 0.12, + "learning_rate": "4.9650e-05", + "loss": 0.871, + "slid_loss": 1.0699, + "step": 248, + "time": 12.23 + }, + { + "epoch": 0.12, + "learning_rate": "4.9850e-05", + "loss": 0.9402, + "slid_loss": 1.0699, + "step": 249, + "time": 12.25 + }, + { + "epoch": 0.13, + "learning_rate": "5.0050e-05", + "loss": 1.1643, + "slid_loss": 1.0703, + "step": 250, + "time": 13.99 + }, + { + "epoch": 0.13, + "learning_rate": "5.0250e-05", + "loss": 1.1154, + "slid_loss": 1.0709, + "step": 251, + "time": 11.47 + }, + { + "epoch": 0.13, + "learning_rate": "5.0450e-05", + "loss": 1.0435, + "slid_loss": 1.0692, + "step": 252, + "time": 12.04 + }, + { + "epoch": 0.13, + "learning_rate": "5.0651e-05", + "loss": 1.0597, + "slid_loss": 1.0696, + "step": 253, + "time": 11.22 + }, + { + "epoch": 0.13, + "learning_rate": "5.0851e-05", + "loss": 1.0648, + "slid_loss": 1.0683, + "step": 254, + "time": 11.39 + }, + { + "epoch": 0.13, + "learning_rate": "5.1051e-05", + "loss": 0.9755, + "slid_loss": 1.0694, + "step": 255, + "time": 11.01 + }, + { + "epoch": 0.13, + "learning_rate": "5.1251e-05", + "loss": 1.2978, + "slid_loss": 1.0721, + "step": 256, + "time": 12.27 + }, + { + "epoch": 0.13, + "learning_rate": "5.1451e-05", + "loss": 1.0362, + "slid_loss": 1.0716, + "step": 257, + "time": 13.28 + }, + { + "epoch": 0.13, + "learning_rate": "5.1652e-05", + "loss": 1.2597, + "slid_loss": 1.0744, + "step": 258, + "time": 13.2 + }, + { + "epoch": 0.13, + "learning_rate": "5.1852e-05", + "loss": 1.1653, + "slid_loss": 1.0766, + "step": 259, + "time": 13.27 + }, + { + "epoch": 0.13, + "learning_rate": "5.2052e-05", + "loss": 1.2637, + "slid_loss": 1.0771, + "step": 260, + "time": 13.38 + }, + { + "epoch": 0.13, + "learning_rate": "5.2252e-05", + "loss": 1.0767, + "slid_loss": 1.0772, + "step": 261, + "time": 13.29 + }, + { + "epoch": 0.13, + "learning_rate": "5.2452e-05", + "loss": 1.2036, + "slid_loss": 1.0801, + "step": 262, + "time": 14.04 + }, + { + "epoch": 0.13, + "learning_rate": "5.2653e-05", + "loss": 1.0717, + "slid_loss": 1.0791, + "step": 263, + "time": 11.77 + }, + { + "epoch": 0.13, + "learning_rate": "5.2853e-05", + "loss": 0.9408, + "slid_loss": 1.0769, + "step": 264, + "time": 11.59 + }, + { + "epoch": 0.13, + "learning_rate": "5.3053e-05", + "loss": 0.9595, + "slid_loss": 1.0759, + "step": 265, + "time": 11.48 + }, + { + "epoch": 0.13, + "learning_rate": "5.3253e-05", + "loss": 0.9825, + "slid_loss": 1.0756, + "step": 266, + "time": 13.31 + }, + { + "epoch": 0.13, + "learning_rate": "5.3453e-05", + "loss": 1.1811, + "slid_loss": 1.076, + "step": 267, + "time": 13.99 + }, + { + "epoch": 0.13, + "learning_rate": "5.3654e-05", + "loss": 1.0276, + "slid_loss": 1.0753, + "step": 268, + "time": 12.34 + }, + { + "epoch": 0.13, + "learning_rate": "5.3854e-05", + "loss": 1.1713, + "slid_loss": 1.0776, + "step": 269, + "time": 11.25 + }, + { + "epoch": 0.14, + "learning_rate": "5.4054e-05", + "loss": 1.0757, + "slid_loss": 1.0797, + "step": 270, + "time": 11.8 + }, + { + "epoch": 0.14, + "learning_rate": "5.4254e-05", + "loss": 0.8617, + "slid_loss": 1.0766, + "step": 271, + "time": 11.79 + }, + { + "epoch": 0.14, + "learning_rate": "5.4454e-05", + "loss": 0.9926, + "slid_loss": 1.0727, + "step": 272, + "time": 13.12 + }, + { + "epoch": 0.14, + "learning_rate": "5.4655e-05", + "loss": 1.0468, + "slid_loss": 1.0717, + "step": 273, + "time": 13.2 + }, + { + "epoch": 0.14, + "learning_rate": "5.4855e-05", + "loss": 1.0521, + "slid_loss": 1.0709, + "step": 274, + "time": 12.07 + }, + { + "epoch": 0.14, + "learning_rate": "5.5055e-05", + "loss": 0.9541, + "slid_loss": 1.0678, + "step": 275, + "time": 13.32 + }, + { + "epoch": 0.14, + "learning_rate": "5.5255e-05", + "loss": 1.1616, + "slid_loss": 1.0667, + "step": 276, + "time": 13.25 + }, + { + "epoch": 0.14, + "learning_rate": "5.5455e-05", + "loss": 0.9388, + "slid_loss": 1.0644, + "step": 277, + "time": 10.73 + }, + { + "epoch": 0.14, + "learning_rate": "5.5656e-05", + "loss": 1.1006, + "slid_loss": 1.0663, + "step": 278, + "time": 11.77 + }, + { + "epoch": 0.14, + "learning_rate": "5.5856e-05", + "loss": 1.1393, + "slid_loss": 1.067, + "step": 279, + "time": 12.5 + }, + { + "epoch": 0.14, + "learning_rate": "5.6056e-05", + "loss": 0.9874, + "slid_loss": 1.0684, + "step": 280, + "time": 11.18 + }, + { + "epoch": 0.14, + "learning_rate": "5.6256e-05", + "loss": 0.9684, + "slid_loss": 1.0692, + "step": 281, + "time": 12.33 + }, + { + "epoch": 0.14, + "learning_rate": "5.6456e-05", + "loss": 0.9591, + "slid_loss": 1.0687, + "step": 282, + "time": 11.35 + }, + { + "epoch": 0.14, + "learning_rate": "5.6657e-05", + "loss": 0.9636, + "slid_loss": 1.0668, + "step": 283, + "time": 13.21 + }, + { + "epoch": 0.14, + "learning_rate": "5.6857e-05", + "loss": 1.0545, + "slid_loss": 1.0667, + "step": 284, + "time": 12.14 + }, + { + "epoch": 0.14, + "learning_rate": "5.7057e-05", + "loss": 1.1401, + "slid_loss": 1.0674, + "step": 285, + "time": 10.46 + }, + { + "epoch": 0.14, + "learning_rate": "5.7257e-05", + "loss": 1.0793, + "slid_loss": 1.0672, + "step": 286, + "time": 13.28 + }, + { + "epoch": 0.14, + "learning_rate": "5.7457e-05", + "loss": 1.1137, + "slid_loss": 1.0675, + "step": 287, + "time": 11.34 + }, + { + "epoch": 0.14, + "learning_rate": "5.7658e-05", + "loss": 1.1113, + "slid_loss": 1.0677, + "step": 288, + "time": 11.64 + }, + { + "epoch": 0.14, + "learning_rate": "5.7858e-05", + "loss": 0.855, + "slid_loss": 1.0642, + "step": 289, + "time": 12.43 + }, + { + "epoch": 0.15, + "learning_rate": "5.8058e-05", + "loss": 1.0896, + "slid_loss": 1.0659, + "step": 290, + "time": 10.8 + }, + { + "epoch": 0.15, + "learning_rate": "5.8258e-05", + "loss": 1.0996, + "slid_loss": 1.0661, + "step": 291, + "time": 12.83 + }, + { + "epoch": 0.15, + "learning_rate": "5.8458e-05", + "loss": 1.1513, + "slid_loss": 1.0687, + "step": 292, + "time": 11.43 + }, + { + "epoch": 0.15, + "learning_rate": "5.8659e-05", + "loss": 1.197, + "slid_loss": 1.0715, + "step": 293, + "time": 12.3 + }, + { + "epoch": 0.15, + "learning_rate": "5.8859e-05", + "loss": 1.146, + "slid_loss": 1.071, + "step": 294, + "time": 12.89 + }, + { + "epoch": 0.15, + "learning_rate": "5.9059e-05", + "loss": 1.0675, + "slid_loss": 1.0696, + "step": 295, + "time": 13.83 + }, + { + "epoch": 0.15, + "learning_rate": "5.9259e-05", + "loss": 1.0813, + "slid_loss": 1.0721, + "step": 296, + "time": 12.8 + }, + { + "epoch": 0.15, + "learning_rate": "5.9459e-05", + "loss": 0.9345, + "slid_loss": 1.0705, + "step": 297, + "time": 12.28 + }, + { + "epoch": 0.15, + "learning_rate": "5.9660e-05", + "loss": 0.9622, + "slid_loss": 1.0697, + "step": 298, + "time": 11.61 + }, + { + "epoch": 0.15, + "learning_rate": "5.9860e-05", + "loss": 1.0599, + "slid_loss": 1.0702, + "step": 299, + "time": 13.63 + }, + { + "epoch": 0.15, + "learning_rate": "6.0060e-05", + "loss": 1.0169, + "slid_loss": 1.0681, + "step": 300, + "time": 11.63 + }, + { + "epoch": 0.15, + "learning_rate": "6.0260e-05", + "loss": 1.0137, + "slid_loss": 1.0653, + "step": 301, + "time": 11.24 + }, + { + "epoch": 0.15, + "learning_rate": "6.0460e-05", + "loss": 1.1683, + "slid_loss": 1.0686, + "step": 302, + "time": 11.26 + }, + { + "epoch": 0.15, + "learning_rate": "6.0661e-05", + "loss": 1.1012, + "slid_loss": 1.0696, + "step": 303, + "time": 12.09 + }, + { + "epoch": 0.15, + "learning_rate": "6.0861e-05", + "loss": 1.2247, + "slid_loss": 1.0701, + "step": 304, + "time": 12.87 + }, + { + "epoch": 0.15, + "learning_rate": "6.1061e-05", + "loss": 1.1609, + "slid_loss": 1.0708, + "step": 305, + "time": 11.11 + }, + { + "epoch": 0.15, + "learning_rate": "6.1261e-05", + "loss": 0.9331, + "slid_loss": 1.07, + "step": 306, + "time": 10.39 + }, + { + "epoch": 0.15, + "learning_rate": "6.1461e-05", + "loss": 1.234, + "slid_loss": 1.0713, + "step": 307, + "time": 12.99 + }, + { + "epoch": 0.15, + "learning_rate": "6.1662e-05", + "loss": 1.1233, + "slid_loss": 1.07, + "step": 308, + "time": 12.87 + }, + { + "epoch": 0.15, + "learning_rate": "6.1862e-05", + "loss": 0.9953, + "slid_loss": 1.0694, + "step": 309, + "time": 12.83 + }, + { + "epoch": 0.16, + "learning_rate": "6.2062e-05", + "loss": 0.9584, + "slid_loss": 1.0678, + "step": 310, + "time": 11.07 + }, + { + "epoch": 0.16, + "learning_rate": "6.2262e-05", + "loss": 1.0144, + "slid_loss": 1.0673, + "step": 311, + "time": 14.02 + }, + { + "epoch": 0.16, + "learning_rate": "6.2462e-05", + "loss": 0.918, + "slid_loss": 1.0668, + "step": 312, + "time": 11.25 + }, + { + "epoch": 0.16, + "learning_rate": "6.2663e-05", + "loss": 1.045, + "slid_loss": 1.0691, + "step": 313, + "time": 11.57 + }, + { + "epoch": 0.16, + "learning_rate": "6.2863e-05", + "loss": 0.7988, + "slid_loss": 1.0639, + "step": 314, + "time": 11.37 + }, + { + "epoch": 0.16, + "learning_rate": "6.3063e-05", + "loss": 1.2776, + "slid_loss": 1.0662, + "step": 315, + "time": 12.82 + }, + { + "epoch": 0.16, + "learning_rate": "6.3263e-05", + "loss": 1.0647, + "slid_loss": 1.0661, + "step": 316, + "time": 10.94 + }, + { + "epoch": 0.16, + "learning_rate": "6.3463e-05", + "loss": 1.1101, + "slid_loss": 1.0656, + "step": 317, + "time": 13.4 + }, + { + "epoch": 0.16, + "learning_rate": "6.3664e-05", + "loss": 1.0173, + "slid_loss": 1.0646, + "step": 318, + "time": 13.65 + }, + { + "epoch": 0.16, + "learning_rate": "6.3864e-05", + "loss": 0.833, + "slid_loss": 1.0614, + "step": 319, + "time": 13.85 + }, + { + "epoch": 0.16, + "learning_rate": "6.4064e-05", + "loss": 0.8322, + "slid_loss": 1.06, + "step": 320, + "time": 12.87 + }, + { + "epoch": 0.16, + "learning_rate": "6.4264e-05", + "loss": 1.1447, + "slid_loss": 1.0611, + "step": 321, + "time": 13.56 + }, + { + "epoch": 0.16, + "learning_rate": "6.4464e-05", + "loss": 1.0868, + "slid_loss": 1.0598, + "step": 322, + "time": 11.47 + }, + { + "epoch": 0.16, + "learning_rate": "6.4665e-05", + "loss": 1.1787, + "slid_loss": 1.0621, + "step": 323, + "time": 12.96 + }, + { + "epoch": 0.16, + "learning_rate": "6.4865e-05", + "loss": 0.9092, + "slid_loss": 1.0601, + "step": 324, + "time": 13.39 + }, + { + "epoch": 0.16, + "learning_rate": "6.5065e-05", + "loss": 0.8963, + "slid_loss": 1.0585, + "step": 325, + "time": 11.7 + }, + { + "epoch": 0.16, + "learning_rate": "6.5265e-05", + "loss": 0.9162, + "slid_loss": 1.0579, + "step": 326, + "time": 11.3 + }, + { + "epoch": 0.16, + "learning_rate": "6.5465e-05", + "loss": 1.0911, + "slid_loss": 1.0568, + "step": 327, + "time": 13.32 + }, + { + "epoch": 0.16, + "learning_rate": "6.5666e-05", + "loss": 0.8012, + "slid_loss": 1.0536, + "step": 328, + "time": 13.21 + }, + { + "epoch": 0.16, + "learning_rate": "6.5866e-05", + "loss": 1.2859, + "slid_loss": 1.0583, + "step": 329, + "time": 13.14 + }, + { + "epoch": 0.17, + "learning_rate": "6.6066e-05", + "loss": 1.0588, + "slid_loss": 1.058, + "step": 330, + "time": 12.57 + }, + { + "epoch": 0.17, + "learning_rate": "6.6266e-05", + "loss": 1.0931, + "slid_loss": 1.0566, + "step": 331, + "time": 11.22 + }, + { + "epoch": 0.17, + "learning_rate": "6.6466e-05", + "loss": 0.9762, + "slid_loss": 1.0547, + "step": 332, + "time": 12.33 + }, + { + "epoch": 0.17, + "learning_rate": "6.6667e-05", + "loss": 1.1419, + "slid_loss": 1.0547, + "step": 333, + "time": 13.34 + }, + { + "epoch": 0.17, + "learning_rate": "6.6867e-05", + "loss": 1.0541, + "slid_loss": 1.0553, + "step": 334, + "time": 13.1 + }, + { + "epoch": 0.17, + "learning_rate": "6.7067e-05", + "loss": 0.9778, + "slid_loss": 1.0556, + "step": 335, + "time": 14.07 + }, + { + "epoch": 0.17, + "learning_rate": "6.7267e-05", + "loss": 0.9329, + "slid_loss": 1.0538, + "step": 336, + "time": 10.68 + }, + { + "epoch": 0.17, + "learning_rate": "6.7467e-05", + "loss": 1.0943, + "slid_loss": 1.0542, + "step": 337, + "time": 12.88 + }, + { + "epoch": 0.17, + "learning_rate": "6.7668e-05", + "loss": 1.0681, + "slid_loss": 1.0552, + "step": 338, + "time": 11.0 + }, + { + "epoch": 0.17, + "learning_rate": "6.7868e-05", + "loss": 0.8287, + "slid_loss": 1.0534, + "step": 339, + "time": 13.72 + }, + { + "epoch": 0.17, + "learning_rate": "6.8068e-05", + "loss": 0.8201, + "slid_loss": 1.051, + "step": 340, + "time": 13.08 + }, + { + "epoch": 0.17, + "learning_rate": "6.8268e-05", + "loss": 1.1042, + "slid_loss": 1.0512, + "step": 341, + "time": 13.66 + }, + { + "epoch": 0.17, + "learning_rate": "6.8468e-05", + "loss": 1.0363, + "slid_loss": 1.0515, + "step": 342, + "time": 13.3 + }, + { + "epoch": 0.17, + "learning_rate": "6.8669e-05", + "loss": 0.931, + "slid_loss": 1.0511, + "step": 343, + "time": 11.7 + }, + { + "epoch": 0.17, + "learning_rate": "6.8869e-05", + "loss": 1.2084, + "slid_loss": 1.0514, + "step": 344, + "time": 13.52 + }, + { + "epoch": 0.17, + "learning_rate": "6.9069e-05", + "loss": 1.2553, + "slid_loss": 1.0539, + "step": 345, + "time": 13.07 + }, + { + "epoch": 0.17, + "learning_rate": "6.9269e-05", + "loss": 1.1759, + "slid_loss": 1.0526, + "step": 346, + "time": 10.49 + }, + { + "epoch": 0.17, + "learning_rate": "6.9469e-05", + "loss": 1.1513, + "slid_loss": 1.0528, + "step": 347, + "time": 12.92 + }, + { + "epoch": 0.17, + "learning_rate": "6.9670e-05", + "loss": 1.1435, + "slid_loss": 1.0555, + "step": 348, + "time": 13.2 + }, + { + "epoch": 0.17, + "learning_rate": "6.9870e-05", + "loss": 1.0427, + "slid_loss": 1.0565, + "step": 349, + "time": 12.78 + }, + { + "epoch": 0.18, + "learning_rate": "7.0070e-05", + "loss": 1.0606, + "slid_loss": 1.0555, + "step": 350, + "time": 13.61 + }, + { + "epoch": 0.18, + "learning_rate": "7.0270e-05", + "loss": 1.0741, + "slid_loss": 1.0551, + "step": 351, + "time": 11.05 + }, + { + "epoch": 0.18, + "learning_rate": "7.0470e-05", + "loss": 1.1205, + "slid_loss": 1.0558, + "step": 352, + "time": 14.17 + }, + { + "epoch": 0.18, + "learning_rate": "7.0671e-05", + "loss": 1.0403, + "slid_loss": 1.0556, + "step": 353, + "time": 12.44 + }, + { + "epoch": 0.18, + "learning_rate": "7.0871e-05", + "loss": 1.2179, + "slid_loss": 1.0572, + "step": 354, + "time": 11.55 + }, + { + "epoch": 0.18, + "learning_rate": "7.1071e-05", + "loss": 1.068, + "slid_loss": 1.0581, + "step": 355, + "time": 11.72 + }, + { + "epoch": 0.18, + "learning_rate": "7.1271e-05", + "loss": 0.7999, + "slid_loss": 1.0531, + "step": 356, + "time": 11.45 + }, + { + "epoch": 0.18, + "learning_rate": "7.1471e-05", + "loss": 1.0578, + "slid_loss": 1.0533, + "step": 357, + "time": 12.93 + }, + { + "epoch": 0.18, + "learning_rate": "7.1672e-05", + "loss": 0.8562, + "slid_loss": 1.0493, + "step": 358, + "time": 11.75 + }, + { + "epoch": 0.18, + "learning_rate": "7.1872e-05", + "loss": 1.0358, + "slid_loss": 1.048, + "step": 359, + "time": 13.85 + }, + { + "epoch": 0.18, + "learning_rate": "7.2072e-05", + "loss": 1.0718, + "slid_loss": 1.0461, + "step": 360, + "time": 12.86 + }, + { + "epoch": 0.18, + "learning_rate": "7.2272e-05", + "loss": 0.7825, + "slid_loss": 1.0431, + "step": 361, + "time": 13.71 + }, + { + "epoch": 0.18, + "learning_rate": "7.2472e-05", + "loss": 0.9153, + "slid_loss": 1.0403, + "step": 362, + "time": 13.59 + }, + { + "epoch": 0.18, + "learning_rate": "7.2673e-05", + "loss": 1.056, + "slid_loss": 1.0401, + "step": 363, + "time": 12.13 + }, + { + "epoch": 0.18, + "learning_rate": "7.2873e-05", + "loss": 0.9727, + "slid_loss": 1.0404, + "step": 364, + "time": 10.7 + }, + { + "epoch": 0.18, + "learning_rate": "7.3073e-05", + "loss": 1.0296, + "slid_loss": 1.0411, + "step": 365, + "time": 13.55 + }, + { + "epoch": 0.18, + "learning_rate": "7.3273e-05", + "loss": 1.2147, + "slid_loss": 1.0434, + "step": 366, + "time": 12.93 + }, + { + "epoch": 0.18, + "learning_rate": "7.3473e-05", + "loss": 1.1601, + "slid_loss": 1.0432, + "step": 367, + "time": 12.27 + }, + { + "epoch": 0.18, + "learning_rate": "7.3674e-05", + "loss": 0.958, + "slid_loss": 1.0425, + "step": 368, + "time": 12.78 + }, + { + "epoch": 0.18, + "learning_rate": "7.3874e-05", + "loss": 0.9585, + "slid_loss": 1.0404, + "step": 369, + "time": 11.29 + }, + { + "epoch": 0.19, + "learning_rate": "7.4074e-05", + "loss": 0.9578, + "slid_loss": 1.0392, + "step": 370, + "time": 11.61 + }, + { + "epoch": 0.19, + "learning_rate": "7.4274e-05", + "loss": 1.0401, + "slid_loss": 1.041, + "step": 371, + "time": 13.37 + }, + { + "epoch": 0.19, + "learning_rate": "7.4474e-05", + "loss": 1.0241, + "slid_loss": 1.0413, + "step": 372, + "time": 12.91 + }, + { + "epoch": 0.19, + "learning_rate": "7.4675e-05", + "loss": 1.0261, + "slid_loss": 1.0411, + "step": 373, + "time": 13.52 + }, + { + "epoch": 0.19, + "learning_rate": "7.4875e-05", + "loss": 0.9784, + "slid_loss": 1.0404, + "step": 374, + "time": 12.64 + }, + { + "epoch": 0.19, + "learning_rate": "7.5075e-05", + "loss": 1.0911, + "slid_loss": 1.0418, + "step": 375, + "time": 11.77 + }, + { + "epoch": 0.19, + "learning_rate": "7.5275e-05", + "loss": 1.1833, + "slid_loss": 1.042, + "step": 376, + "time": 10.74 + }, + { + "epoch": 0.19, + "learning_rate": "7.5475e-05", + "loss": 0.9577, + "slid_loss": 1.0422, + "step": 377, + "time": 13.55 + }, + { + "epoch": 0.19, + "learning_rate": "7.5676e-05", + "loss": 0.9546, + "slid_loss": 1.0407, + "step": 378, + "time": 13.37 + }, + { + "epoch": 0.19, + "learning_rate": "7.5876e-05", + "loss": 1.1766, + "slid_loss": 1.0411, + "step": 379, + "time": 11.56 + }, + { + "epoch": 0.19, + "learning_rate": "7.6076e-05", + "loss": 1.1361, + "slid_loss": 1.0426, + "step": 380, + "time": 11.08 + }, + { + "epoch": 0.19, + "learning_rate": "7.6276e-05", + "loss": 0.9145, + "slid_loss": 1.042, + "step": 381, + "time": 12.47 + }, + { + "epoch": 0.19, + "learning_rate": "7.6476e-05", + "loss": 0.911, + "slid_loss": 1.0415, + "step": 382, + "time": 10.35 + }, + { + "epoch": 0.19, + "learning_rate": "7.6677e-05", + "loss": 1.0658, + "slid_loss": 1.0426, + "step": 383, + "time": 12.75 + }, + { + "epoch": 0.19, + "learning_rate": "7.6877e-05", + "loss": 0.8894, + "slid_loss": 1.0409, + "step": 384, + "time": 11.58 + }, + { + "epoch": 0.19, + "learning_rate": "7.7077e-05", + "loss": 1.0326, + "slid_loss": 1.0398, + "step": 385, + "time": 11.08 + }, + { + "epoch": 0.19, + "learning_rate": "7.7277e-05", + "loss": 0.951, + "slid_loss": 1.0386, + "step": 386, + "time": 10.69 + }, + { + "epoch": 0.19, + "learning_rate": "7.7477e-05", + "loss": 1.0189, + "slid_loss": 1.0376, + "step": 387, + "time": 11.57 + }, + { + "epoch": 0.19, + "learning_rate": "7.7678e-05", + "loss": 0.8567, + "slid_loss": 1.0351, + "step": 388, + "time": 10.71 + }, + { + "epoch": 0.19, + "learning_rate": "7.7878e-05", + "loss": 1.1445, + "slid_loss": 1.038, + "step": 389, + "time": 11.45 + }, + { + "epoch": 0.2, + "learning_rate": "7.8078e-05", + "loss": 0.9079, + "slid_loss": 1.0361, + "step": 390, + "time": 13.73 + }, + { + "epoch": 0.2, + "learning_rate": "7.8278e-05", + "loss": 1.0662, + "slid_loss": 1.0358, + "step": 391, + "time": 12.62 + }, + { + "epoch": 0.2, + "learning_rate": "7.8478e-05", + "loss": 0.9086, + "slid_loss": 1.0334, + "step": 392, + "time": 13.7 + }, + { + "epoch": 0.2, + "learning_rate": "7.8679e-05", + "loss": 1.0805, + "slid_loss": 1.0322, + "step": 393, + "time": 11.53 + }, + { + "epoch": 0.2, + "learning_rate": "7.8879e-05", + "loss": 0.9736, + "slid_loss": 1.0305, + "step": 394, + "time": 11.29 + }, + { + "epoch": 0.2, + "learning_rate": "7.9079e-05", + "loss": 1.0156, + "slid_loss": 1.03, + "step": 395, + "time": 10.78 + }, + { + "epoch": 0.2, + "learning_rate": "7.9279e-05", + "loss": 0.9875, + "slid_loss": 1.029, + "step": 396, + "time": 13.39 + }, + { + "epoch": 0.2, + "learning_rate": "7.9479e-05", + "loss": 1.1571, + "slid_loss": 1.0313, + "step": 397, + "time": 11.1 + }, + { + "epoch": 0.2, + "learning_rate": "7.9680e-05", + "loss": 1.0999, + "slid_loss": 1.0326, + "step": 398, + "time": 13.39 + }, + { + "epoch": 0.2, + "learning_rate": "7.9880e-05", + "loss": 1.0246, + "slid_loss": 1.0323, + "step": 399, + "time": 11.02 + }, + { + "epoch": 0.2, + "learning_rate": "8.0080e-05", + "loss": 1.0586, + "slid_loss": 1.0327, + "step": 400, + "time": 11.58 + }, + { + "epoch": 0.2, + "learning_rate": "8.0280e-05", + "loss": 1.0985, + "slid_loss": 1.0335, + "step": 401, + "time": 13.46 + }, + { + "epoch": 0.2, + "learning_rate": "8.0480e-05", + "loss": 1.2311, + "slid_loss": 1.0342, + "step": 402, + "time": 11.94 + }, + { + "epoch": 0.2, + "learning_rate": "8.0681e-05", + "loss": 0.9286, + "slid_loss": 1.0324, + "step": 403, + "time": 14.13 + }, + { + "epoch": 0.2, + "learning_rate": "8.0881e-05", + "loss": 1.0123, + "slid_loss": 1.0303, + "step": 404, + "time": 10.98 + }, + { + "epoch": 0.2, + "learning_rate": "8.1081e-05", + "loss": 0.8683, + "slid_loss": 1.0274, + "step": 405, + "time": 12.88 + }, + { + "epoch": 0.2, + "learning_rate": "8.1281e-05", + "loss": 0.9592, + "slid_loss": 1.0277, + "step": 406, + "time": 13.74 + }, + { + "epoch": 0.2, + "learning_rate": "8.1481e-05", + "loss": 1.1093, + "slid_loss": 1.0264, + "step": 407, + "time": 11.66 + }, + { + "epoch": 0.2, + "learning_rate": "8.1682e-05", + "loss": 1.1521, + "slid_loss": 1.0267, + "step": 408, + "time": 12.29 + }, + { + "epoch": 0.2, + "learning_rate": "8.1882e-05", + "loss": 1.0594, + "slid_loss": 1.0273, + "step": 409, + "time": 13.24 + }, + { + "epoch": 0.21, + "learning_rate": "8.2082e-05", + "loss": 1.0637, + "slid_loss": 1.0284, + "step": 410, + "time": 11.64 + }, + { + "epoch": 0.21, + "learning_rate": "8.2282e-05", + "loss": 1.0991, + "slid_loss": 1.0292, + "step": 411, + "time": 13.48 + }, + { + "epoch": 0.21, + "learning_rate": "8.2482e-05", + "loss": 1.1504, + "slid_loss": 1.0316, + "step": 412, + "time": 10.63 + }, + { + "epoch": 0.21, + "learning_rate": "8.2683e-05", + "loss": 1.06, + "slid_loss": 1.0317, + "step": 413, + "time": 13.26 + }, + { + "epoch": 0.21, + "learning_rate": "8.2883e-05", + "loss": 1.1336, + "slid_loss": 1.0351, + "step": 414, + "time": 14.33 + }, + { + "epoch": 0.21, + "learning_rate": "8.3083e-05", + "loss": 1.0662, + "slid_loss": 1.0329, + "step": 415, + "time": 13.16 + }, + { + "epoch": 0.21, + "learning_rate": "8.3283e-05", + "loss": 1.0732, + "slid_loss": 1.033, + "step": 416, + "time": 12.44 + }, + { + "epoch": 0.21, + "learning_rate": "8.3483e-05", + "loss": 1.1264, + "slid_loss": 1.0332, + "step": 417, + "time": 12.85 + }, + { + "epoch": 0.21, + "learning_rate": "8.3684e-05", + "loss": 1.2063, + "slid_loss": 1.0351, + "step": 418, + "time": 12.65 + }, + { + "epoch": 0.21, + "learning_rate": "8.3884e-05", + "loss": 1.138, + "slid_loss": 1.0381, + "step": 419, + "time": 11.46 + }, + { + "epoch": 0.21, + "learning_rate": "8.4084e-05", + "loss": 0.9537, + "slid_loss": 1.0393, + "step": 420, + "time": 12.43 + }, + { + "epoch": 0.21, + "learning_rate": "8.4284e-05", + "loss": 1.1797, + "slid_loss": 1.0397, + "step": 421, + "time": 12.89 + }, + { + "epoch": 0.21, + "learning_rate": "8.4484e-05", + "loss": 1.1112, + "slid_loss": 1.0399, + "step": 422, + "time": 13.8 + }, + { + "epoch": 0.21, + "learning_rate": "8.4685e-05", + "loss": 0.8898, + "slid_loss": 1.037, + "step": 423, + "time": 11.58 + }, + { + "epoch": 0.21, + "learning_rate": "8.4885e-05", + "loss": 1.1535, + "slid_loss": 1.0395, + "step": 424, + "time": 11.1 + }, + { + "epoch": 0.21, + "learning_rate": "8.5085e-05", + "loss": 1.0874, + "slid_loss": 1.0414, + "step": 425, + "time": 13.06 + }, + { + "epoch": 0.21, + "learning_rate": "8.5285e-05", + "loss": 0.851, + "slid_loss": 1.0408, + "step": 426, + "time": 12.28 + }, + { + "epoch": 0.21, + "learning_rate": "8.5485e-05", + "loss": 1.1034, + "slid_loss": 1.0409, + "step": 427, + "time": 13.67 + }, + { + "epoch": 0.21, + "learning_rate": "8.5686e-05", + "loss": 0.9129, + "slid_loss": 1.042, + "step": 428, + "time": 12.27 + }, + { + "epoch": 0.21, + "learning_rate": "8.5886e-05", + "loss": 1.0475, + "slid_loss": 1.0396, + "step": 429, + "time": 11.39 + }, + { + "epoch": 0.22, + "learning_rate": "8.6086e-05", + "loss": 0.9446, + "slid_loss": 1.0385, + "step": 430, + "time": 12.44 + }, + { + "epoch": 0.22, + "learning_rate": "8.6286e-05", + "loss": 1.0582, + "slid_loss": 1.0381, + "step": 431, + "time": 13.51 + }, + { + "epoch": 0.22, + "learning_rate": "8.6486e-05", + "loss": 0.9226, + "slid_loss": 1.0376, + "step": 432, + "time": 11.98 + }, + { + "epoch": 0.22, + "learning_rate": "8.6687e-05", + "loss": 0.9985, + "slid_loss": 1.0361, + "step": 433, + "time": 12.91 + }, + { + "epoch": 0.22, + "learning_rate": "8.6887e-05", + "loss": 1.0457, + "slid_loss": 1.0361, + "step": 434, + "time": 12.98 + }, + { + "epoch": 0.22, + "learning_rate": "8.7087e-05", + "loss": 0.7719, + "slid_loss": 1.034, + "step": 435, + "time": 10.74 + }, + { + "epoch": 0.22, + "learning_rate": "8.7287e-05", + "loss": 1.0306, + "slid_loss": 1.035, + "step": 436, + "time": 11.54 + }, + { + "epoch": 0.22, + "learning_rate": "8.7487e-05", + "loss": 1.1763, + "slid_loss": 1.0358, + "step": 437, + "time": 11.78 + }, + { + "epoch": 0.22, + "learning_rate": "8.7688e-05", + "loss": 0.9122, + "slid_loss": 1.0342, + "step": 438, + "time": 12.7 + }, + { + "epoch": 0.22, + "learning_rate": "8.7888e-05", + "loss": 1.1009, + "slid_loss": 1.037, + "step": 439, + "time": 13.46 + }, + { + "epoch": 0.22, + "learning_rate": "8.8088e-05", + "loss": 1.1505, + "slid_loss": 1.0403, + "step": 440, + "time": 10.77 + }, + { + "epoch": 0.22, + "learning_rate": "8.8288e-05", + "loss": 0.9256, + "slid_loss": 1.0385, + "step": 441, + "time": 13.65 + }, + { + "epoch": 0.22, + "learning_rate": "8.8488e-05", + "loss": 1.2705, + "slid_loss": 1.0408, + "step": 442, + "time": 12.21 + }, + { + "epoch": 0.22, + "learning_rate": "8.8689e-05", + "loss": 0.9515, + "slid_loss": 1.041, + "step": 443, + "time": 11.4 + }, + { + "epoch": 0.22, + "learning_rate": "8.8889e-05", + "loss": 0.8109, + "slid_loss": 1.0371, + "step": 444, + "time": 13.69 + }, + { + "epoch": 0.22, + "learning_rate": "8.9089e-05", + "loss": 1.002, + "slid_loss": 1.0345, + "step": 445, + "time": 11.69 + }, + { + "epoch": 0.22, + "learning_rate": "8.9289e-05", + "loss": 0.7584, + "slid_loss": 1.0303, + "step": 446, + "time": 11.93 + }, + { + "epoch": 0.22, + "learning_rate": "8.9489e-05", + "loss": 1.008, + "slid_loss": 1.0289, + "step": 447, + "time": 13.6 + }, + { + "epoch": 0.22, + "learning_rate": "8.9690e-05", + "loss": 0.9071, + "slid_loss": 1.0266, + "step": 448, + "time": 13.13 + }, + { + "epoch": 0.22, + "learning_rate": "8.9890e-05", + "loss": 1.0122, + "slid_loss": 1.0262, + "step": 449, + "time": 11.14 + }, + { + "epoch": 0.23, + "learning_rate": "9.0090e-05", + "loss": 0.8973, + "slid_loss": 1.0246, + "step": 450, + "time": 11.31 + }, + { + "epoch": 0.23, + "learning_rate": "9.0290e-05", + "loss": 1.166, + "slid_loss": 1.0255, + "step": 451, + "time": 11.62 + }, + { + "epoch": 0.23, + "learning_rate": "9.0490e-05", + "loss": 1.0945, + "slid_loss": 1.0253, + "step": 452, + "time": 14.47 + }, + { + "epoch": 0.23, + "learning_rate": "9.0691e-05", + "loss": 1.1222, + "slid_loss": 1.0261, + "step": 453, + "time": 12.22 + }, + { + "epoch": 0.23, + "learning_rate": "9.0891e-05", + "loss": 1.1003, + "slid_loss": 1.0249, + "step": 454, + "time": 10.83 + }, + { + "epoch": 0.23, + "learning_rate": "9.1091e-05", + "loss": 1.0636, + "slid_loss": 1.0249, + "step": 455, + "time": 12.64 + }, + { + "epoch": 0.23, + "learning_rate": "9.1291e-05", + "loss": 0.8619, + "slid_loss": 1.0255, + "step": 456, + "time": 12.58 + }, + { + "epoch": 0.23, + "learning_rate": "9.1491e-05", + "loss": 1.0586, + "slid_loss": 1.0255, + "step": 457, + "time": 11.39 + }, + { + "epoch": 0.23, + "learning_rate": "9.1692e-05", + "loss": 0.977, + "slid_loss": 1.0267, + "step": 458, + "time": 14.27 + }, + { + "epoch": 0.23, + "learning_rate": "9.1892e-05", + "loss": 0.9013, + "slid_loss": 1.0254, + "step": 459, + "time": 13.72 + }, + { + "epoch": 0.23, + "learning_rate": "9.2092e-05", + "loss": 0.9637, + "slid_loss": 1.0243, + "step": 460, + "time": 11.14 + }, + { + "epoch": 0.23, + "learning_rate": "9.2292e-05", + "loss": 0.8454, + "slid_loss": 1.0249, + "step": 461, + "time": 11.33 + }, + { + "epoch": 0.23, + "learning_rate": "9.2492e-05", + "loss": 1.0381, + "slid_loss": 1.0261, + "step": 462, + "time": 11.32 + }, + { + "epoch": 0.23, + "learning_rate": "9.2693e-05", + "loss": 0.9977, + "slid_loss": 1.0256, + "step": 463, + "time": 13.54 + }, + { + "epoch": 0.23, + "learning_rate": "9.2893e-05", + "loss": 1.1129, + "slid_loss": 1.027, + "step": 464, + "time": 12.48 + }, + { + "epoch": 0.23, + "learning_rate": "9.3093e-05", + "loss": 1.0282, + "slid_loss": 1.0269, + "step": 465, + "time": 11.38 + }, + { + "epoch": 0.23, + "learning_rate": "9.3293e-05", + "loss": 0.8035, + "slid_loss": 1.0228, + "step": 466, + "time": 12.82 + }, + { + "epoch": 0.23, + "learning_rate": "9.3493e-05", + "loss": 1.0113, + "slid_loss": 1.0213, + "step": 467, + "time": 12.82 + }, + { + "epoch": 0.23, + "learning_rate": "9.3694e-05", + "loss": 1.0644, + "slid_loss": 1.0224, + "step": 468, + "time": 13.33 + }, + { + "epoch": 0.23, + "learning_rate": "9.3894e-05", + "loss": 0.8857, + "slid_loss": 1.0217, + "step": 469, + "time": 12.83 + }, + { + "epoch": 0.24, + "learning_rate": "9.4094e-05", + "loss": 0.9923, + "slid_loss": 1.022, + "step": 470, + "time": 13.22 + }, + { + "epoch": 0.24, + "learning_rate": "9.4294e-05", + "loss": 0.9556, + "slid_loss": 1.0212, + "step": 471, + "time": 11.63 + }, + { + "epoch": 0.24, + "learning_rate": "9.4494e-05", + "loss": 0.8804, + "slid_loss": 1.0197, + "step": 472, + "time": 11.3 + }, + { + "epoch": 0.24, + "learning_rate": "9.4695e-05", + "loss": 0.9223, + "slid_loss": 1.0187, + "step": 473, + "time": 10.72 + }, + { + "epoch": 0.24, + "learning_rate": "9.4895e-05", + "loss": 1.1838, + "slid_loss": 1.0208, + "step": 474, + "time": 14.32 + }, + { + "epoch": 0.24, + "learning_rate": "9.5095e-05", + "loss": 1.001, + "slid_loss": 1.0199, + "step": 475, + "time": 13.71 + }, + { + "epoch": 0.24, + "learning_rate": "9.5295e-05", + "loss": 1.1177, + "slid_loss": 1.0192, + "step": 476, + "time": 14.6 + }, + { + "epoch": 0.24, + "learning_rate": "9.5495e-05", + "loss": 1.0287, + "slid_loss": 1.0199, + "step": 477, + "time": 12.86 + }, + { + "epoch": 0.24, + "learning_rate": "9.5696e-05", + "loss": 1.0718, + "slid_loss": 1.0211, + "step": 478, + "time": 13.35 + }, + { + "epoch": 0.24, + "learning_rate": "9.5896e-05", + "loss": 1.0766, + "slid_loss": 1.0201, + "step": 479, + "time": 13.2 + }, + { + "epoch": 0.24, + "learning_rate": "9.6096e-05", + "loss": 1.2268, + "slid_loss": 1.021, + "step": 480, + "time": 11.49 + }, + { + "epoch": 0.24, + "learning_rate": "9.6296e-05", + "loss": 1.0514, + "slid_loss": 1.0224, + "step": 481, + "time": 13.52 + }, + { + "epoch": 0.24, + "learning_rate": "9.6496e-05", + "loss": 1.1218, + "slid_loss": 1.0245, + "step": 482, + "time": 14.45 + }, + { + "epoch": 0.24, + "learning_rate": "9.6697e-05", + "loss": 1.2372, + "slid_loss": 1.0262, + "step": 483, + "time": 12.97 + }, + { + "epoch": 0.24, + "learning_rate": "9.6897e-05", + "loss": 1.0238, + "slid_loss": 1.0275, + "step": 484, + "time": 13.01 + }, + { + "epoch": 0.24, + "learning_rate": "9.7097e-05", + "loss": 1.1477, + "slid_loss": 1.0287, + "step": 485, + "time": 10.36 + }, + { + "epoch": 0.24, + "learning_rate": "9.7297e-05", + "loss": 1.0227, + "slid_loss": 1.0294, + "step": 486, + "time": 12.88 + }, + { + "epoch": 0.24, + "learning_rate": "9.7497e-05", + "loss": 0.9845, + "slid_loss": 1.029, + "step": 487, + "time": 14.01 + }, + { + "epoch": 0.24, + "learning_rate": "9.7698e-05", + "loss": 0.996, + "slid_loss": 1.0304, + "step": 488, + "time": 12.64 + }, + { + "epoch": 0.24, + "learning_rate": "9.7898e-05", + "loss": 1.188, + "slid_loss": 1.0309, + "step": 489, + "time": 13.37 + }, + { + "epoch": 0.25, + "learning_rate": "9.8098e-05", + "loss": 1.1198, + "slid_loss": 1.033, + "step": 490, + "time": 11.92 + }, + { + "epoch": 0.25, + "learning_rate": "9.8298e-05", + "loss": 1.0437, + "slid_loss": 1.0328, + "step": 491, + "time": 12.88 + }, + { + "epoch": 0.25, + "learning_rate": "9.8498e-05", + "loss": 0.8394, + "slid_loss": 1.0321, + "step": 492, + "time": 13.15 + }, + { + "epoch": 0.25, + "learning_rate": "9.8699e-05", + "loss": 0.9099, + "slid_loss": 1.0304, + "step": 493, + "time": 11.2 + }, + { + "epoch": 0.25, + "learning_rate": "9.8899e-05", + "loss": 1.0072, + "slid_loss": 1.0307, + "step": 494, + "time": 12.29 + }, + { + "epoch": 0.25, + "learning_rate": "9.9099e-05", + "loss": 1.0247, + "slid_loss": 1.0308, + "step": 495, + "time": 11.41 + }, + { + "epoch": 0.25, + "learning_rate": "9.9299e-05", + "loss": 1.0071, + "slid_loss": 1.031, + "step": 496, + "time": 12.32 + }, + { + "epoch": 0.25, + "learning_rate": "9.9499e-05", + "loss": 1.0906, + "slid_loss": 1.0303, + "step": 497, + "time": 13.45 + }, + { + "epoch": 0.25, + "learning_rate": "9.9700e-05", + "loss": 1.0501, + "slid_loss": 1.0298, + "step": 498, + "time": 12.53 + }, + { + "epoch": 0.25, + "learning_rate": "9.9900e-05", + "loss": 1.1481, + "slid_loss": 1.0311, + "step": 499, + "time": 11.77 + }, + { + "epoch": 0.25, + "learning_rate": "1.0010e-04", + "loss": 1.0678, + "slid_loss": 1.0312, + "step": 500, + "time": 13.15 + }, + { + "epoch": 0.25, + "learning_rate": "1.0030e-04", + "loss": 1.1329, + "slid_loss": 1.0315, + "step": 501, + "time": 13.31 + }, + { + "epoch": 0.25, + "learning_rate": "1.0050e-04", + "loss": 0.9564, + "slid_loss": 1.0288, + "step": 502, + "time": 11.24 + }, + { + "epoch": 0.25, + "learning_rate": "1.0070e-04", + "loss": 0.9599, + "slid_loss": 1.0291, + "step": 503, + "time": 11.56 + }, + { + "epoch": 0.25, + "learning_rate": "1.0090e-04", + "loss": 0.9122, + "slid_loss": 1.0281, + "step": 504, + "time": 13.38 + }, + { + "epoch": 0.25, + "learning_rate": "1.0110e-04", + "loss": 1.0099, + "slid_loss": 1.0295, + "step": 505, + "time": 12.25 + }, + { + "epoch": 0.25, + "learning_rate": "1.0130e-04", + "loss": 1.1002, + "slid_loss": 1.0309, + "step": 506, + "time": 13.08 + }, + { + "epoch": 0.25, + "learning_rate": "1.0150e-04", + "loss": 1.0145, + "slid_loss": 1.0299, + "step": 507, + "time": 13.92 + }, + { + "epoch": 0.25, + "learning_rate": "1.0170e-04", + "loss": 1.1076, + "slid_loss": 1.0295, + "step": 508, + "time": 13.42 + }, + { + "epoch": 0.25, + "learning_rate": "1.0190e-04", + "loss": 0.9959, + "slid_loss": 1.0289, + "step": 509, + "time": 11.06 + }, + { + "epoch": 0.26, + "learning_rate": "1.0210e-04", + "loss": 1.0165, + "slid_loss": 1.0284, + "step": 510, + "time": 13.5 + }, + { + "epoch": 0.26, + "learning_rate": "1.0230e-04", + "loss": 0.9722, + "slid_loss": 1.0271, + "step": 511, + "time": 10.67 + }, + { + "epoch": 0.26, + "learning_rate": "1.0250e-04", + "loss": 0.8412, + "slid_loss": 1.024, + "step": 512, + "time": 13.98 + }, + { + "epoch": 0.26, + "learning_rate": "1.0270e-04", + "loss": 1.1392, + "slid_loss": 1.0248, + "step": 513, + "time": 13.38 + }, + { + "epoch": 0.26, + "learning_rate": "1.0290e-04", + "loss": 1.0093, + "slid_loss": 1.0236, + "step": 514, + "time": 11.75 + }, + { + "epoch": 0.26, + "learning_rate": "1.0310e-04", + "loss": 1.232, + "slid_loss": 1.0252, + "step": 515, + "time": 13.16 + }, + { + "epoch": 0.26, + "learning_rate": "1.0330e-04", + "loss": 1.119, + "slid_loss": 1.0257, + "step": 516, + "time": 11.76 + }, + { + "epoch": 0.26, + "learning_rate": "1.0350e-04", + "loss": 1.096, + "slid_loss": 1.0254, + "step": 517, + "time": 12.01 + }, + { + "epoch": 0.26, + "learning_rate": "1.0370e-04", + "loss": 1.0039, + "slid_loss": 1.0234, + "step": 518, + "time": 12.9 + }, + { + "epoch": 0.26, + "learning_rate": "1.0390e-04", + "loss": 0.9628, + "slid_loss": 1.0216, + "step": 519, + "time": 13.05 + }, + { + "epoch": 0.26, + "learning_rate": "1.0410e-04", + "loss": 0.9383, + "slid_loss": 1.0215, + "step": 520, + "time": 10.96 + }, + { + "epoch": 0.26, + "learning_rate": "1.0430e-04", + "loss": 1.0864, + "slid_loss": 1.0205, + "step": 521, + "time": 12.63 + }, + { + "epoch": 0.26, + "learning_rate": "1.0450e-04", + "loss": 0.9931, + "slid_loss": 1.0194, + "step": 522, + "time": 10.72 + }, + { + "epoch": 0.26, + "learning_rate": "1.0470e-04", + "loss": 1.0134, + "slid_loss": 1.0206, + "step": 523, + "time": 13.12 + }, + { + "epoch": 0.26, + "learning_rate": "1.0490e-04", + "loss": 0.8863, + "slid_loss": 1.0179, + "step": 524, + "time": 13.2 + }, + { + "epoch": 0.26, + "learning_rate": "1.0511e-04", + "loss": 0.9285, + "slid_loss": 1.0163, + "step": 525, + "time": 11.61 + }, + { + "epoch": 0.26, + "learning_rate": "1.0531e-04", + "loss": 1.0768, + "slid_loss": 1.0186, + "step": 526, + "time": 12.28 + }, + { + "epoch": 0.26, + "learning_rate": "1.0551e-04", + "loss": 1.0659, + "slid_loss": 1.0182, + "step": 527, + "time": 13.1 + }, + { + "epoch": 0.26, + "learning_rate": "1.0571e-04", + "loss": 1.2509, + "slid_loss": 1.0216, + "step": 528, + "time": 13.63 + }, + { + "epoch": 0.26, + "learning_rate": "1.0591e-04", + "loss": 0.9803, + "slid_loss": 1.0209, + "step": 529, + "time": 13.22 + }, + { + "epoch": 0.27, + "learning_rate": "1.0611e-04", + "loss": 0.928, + "slid_loss": 1.0208, + "step": 530, + "time": 13.47 + }, + { + "epoch": 0.27, + "learning_rate": "1.0631e-04", + "loss": 0.9759, + "slid_loss": 1.0199, + "step": 531, + "time": 12.2 + }, + { + "epoch": 0.27, + "learning_rate": "1.0651e-04", + "loss": 1.0016, + "slid_loss": 1.0207, + "step": 532, + "time": 13.54 + }, + { + "epoch": 0.27, + "learning_rate": "1.0671e-04", + "loss": 1.0597, + "slid_loss": 1.0213, + "step": 533, + "time": 14.25 + }, + { + "epoch": 0.27, + "learning_rate": "1.0691e-04", + "loss": 1.214, + "slid_loss": 1.023, + "step": 534, + "time": 10.76 + }, + { + "epoch": 0.27, + "learning_rate": "1.0711e-04", + "loss": 1.0863, + "slid_loss": 1.0262, + "step": 535, + "time": 11.46 + }, + { + "epoch": 0.27, + "learning_rate": "1.0731e-04", + "loss": 0.9218, + "slid_loss": 1.0251, + "step": 536, + "time": 13.79 + }, + { + "epoch": 0.27, + "learning_rate": "1.0751e-04", + "loss": 1.1468, + "slid_loss": 1.0248, + "step": 537, + "time": 11.27 + }, + { + "epoch": 0.27, + "learning_rate": "1.0771e-04", + "loss": 0.9972, + "slid_loss": 1.0256, + "step": 538, + "time": 13.9 + }, + { + "epoch": 0.27, + "learning_rate": "1.0791e-04", + "loss": 1.0165, + "slid_loss": 1.0248, + "step": 539, + "time": 11.58 + }, + { + "epoch": 0.27, + "learning_rate": "1.0811e-04", + "loss": 0.7607, + "slid_loss": 1.0209, + "step": 540, + "time": 11.05 + }, + { + "epoch": 0.27, + "learning_rate": "1.0831e-04", + "loss": 1.1148, + "slid_loss": 1.0228, + "step": 541, + "time": 13.85 + }, + { + "epoch": 0.27, + "learning_rate": "1.0851e-04", + "loss": 1.0843, + "slid_loss": 1.0209, + "step": 542, + "time": 11.88 + }, + { + "epoch": 0.27, + "learning_rate": "1.0871e-04", + "loss": 0.9329, + "slid_loss": 1.0207, + "step": 543, + "time": 13.02 + }, + { + "epoch": 0.27, + "learning_rate": "1.0891e-04", + "loss": 1.1073, + "slid_loss": 1.0237, + "step": 544, + "time": 11.7 + }, + { + "epoch": 0.27, + "learning_rate": "1.0911e-04", + "loss": 1.0202, + "slid_loss": 1.0239, + "step": 545, + "time": 12.69 + }, + { + "epoch": 0.27, + "learning_rate": "1.0931e-04", + "loss": 1.0937, + "slid_loss": 1.0272, + "step": 546, + "time": 13.45 + }, + { + "epoch": 0.27, + "learning_rate": "1.0951e-04", + "loss": 1.0506, + "slid_loss": 1.0277, + "step": 547, + "time": 11.21 + }, + { + "epoch": 0.27, + "learning_rate": "1.0971e-04", + "loss": 0.9751, + "slid_loss": 1.0283, + "step": 548, + "time": 11.99 + }, + { + "epoch": 0.27, + "learning_rate": "1.0991e-04", + "loss": 0.8865, + "slid_loss": 1.0271, + "step": 549, + "time": 11.93 + }, + { + "epoch": 0.28, + "learning_rate": "1.1011e-04", + "loss": 1.0886, + "slid_loss": 1.029, + "step": 550, + "time": 13.43 + }, + { + "epoch": 0.28, + "learning_rate": "1.1031e-04", + "loss": 1.0844, + "slid_loss": 1.0282, + "step": 551, + "time": 13.39 + }, + { + "epoch": 0.28, + "learning_rate": "1.1051e-04", + "loss": 1.0961, + "slid_loss": 1.0282, + "step": 552, + "time": 12.89 + }, + { + "epoch": 0.28, + "learning_rate": "1.1071e-04", + "loss": 1.0678, + "slid_loss": 1.0276, + "step": 553, + "time": 12.41 + }, + { + "epoch": 0.28, + "learning_rate": "1.1091e-04", + "loss": 1.1762, + "slid_loss": 1.0284, + "step": 554, + "time": 14.06 + }, + { + "epoch": 0.28, + "learning_rate": "1.1111e-04", + "loss": 0.8455, + "slid_loss": 1.0262, + "step": 555, + "time": 10.77 + }, + { + "epoch": 0.28, + "learning_rate": "1.1131e-04", + "loss": 1.0377, + "slid_loss": 1.028, + "step": 556, + "time": 14.52 + }, + { + "epoch": 0.28, + "learning_rate": "1.1151e-04", + "loss": 0.955, + "slid_loss": 1.0269, + "step": 557, + "time": 10.35 + }, + { + "epoch": 0.28, + "learning_rate": "1.1171e-04", + "loss": 0.952, + "slid_loss": 1.0267, + "step": 558, + "time": 14.2 + }, + { + "epoch": 0.28, + "learning_rate": "1.1191e-04", + "loss": 0.9363, + "slid_loss": 1.027, + "step": 559, + "time": 13.97 + }, + { + "epoch": 0.28, + "learning_rate": "1.1211e-04", + "loss": 0.9139, + "slid_loss": 1.0265, + "step": 560, + "time": 12.21 + }, + { + "epoch": 0.28, + "learning_rate": "1.1231e-04", + "loss": 1.0831, + "slid_loss": 1.0289, + "step": 561, + "time": 14.36 + }, + { + "epoch": 0.28, + "learning_rate": "1.1251e-04", + "loss": 1.1789, + "slid_loss": 1.0303, + "step": 562, + "time": 13.5 + }, + { + "epoch": 0.28, + "learning_rate": "1.1271e-04", + "loss": 0.9651, + "slid_loss": 1.03, + "step": 563, + "time": 13.8 + }, + { + "epoch": 0.28, + "learning_rate": "1.1291e-04", + "loss": 0.8648, + "slid_loss": 1.0275, + "step": 564, + "time": 13.68 + }, + { + "epoch": 0.28, + "learning_rate": "1.1311e-04", + "loss": 0.8898, + "slid_loss": 1.0261, + "step": 565, + "time": 13.6 + }, + { + "epoch": 0.28, + "learning_rate": "1.1331e-04", + "loss": 1.1236, + "slid_loss": 1.0293, + "step": 566, + "time": 13.14 + }, + { + "epoch": 0.28, + "learning_rate": "1.1351e-04", + "loss": 1.0824, + "slid_loss": 1.0301, + "step": 567, + "time": 12.93 + }, + { + "epoch": 0.28, + "learning_rate": "1.1371e-04", + "loss": 1.1669, + "slid_loss": 1.0311, + "step": 568, + "time": 13.75 + }, + { + "epoch": 0.28, + "learning_rate": "1.1391e-04", + "loss": 1.0129, + "slid_loss": 1.0324, + "step": 569, + "time": 11.33 + }, + { + "epoch": 0.29, + "learning_rate": "1.1411e-04", + "loss": 0.7666, + "slid_loss": 1.0301, + "step": 570, + "time": 12.78 + }, + { + "epoch": 0.29, + "learning_rate": "1.1431e-04", + "loss": 0.9578, + "slid_loss": 1.0301, + "step": 571, + "time": 11.75 + }, + { + "epoch": 0.29, + "learning_rate": "1.1451e-04", + "loss": 0.9569, + "slid_loss": 1.0309, + "step": 572, + "time": 11.54 + }, + { + "epoch": 0.29, + "learning_rate": "1.1471e-04", + "loss": 0.9753, + "slid_loss": 1.0314, + "step": 573, + "time": 13.17 + }, + { + "epoch": 0.29, + "learning_rate": "1.1491e-04", + "loss": 0.8787, + "slid_loss": 1.0284, + "step": 574, + "time": 13.06 + }, + { + "epoch": 0.29, + "learning_rate": "1.1512e-04", + "loss": 1.0274, + "slid_loss": 1.0286, + "step": 575, + "time": 13.75 + }, + { + "epoch": 0.29, + "learning_rate": "1.1532e-04", + "loss": 0.9347, + "slid_loss": 1.0268, + "step": 576, + "time": 13.02 + }, + { + "epoch": 0.29, + "learning_rate": "1.1552e-04", + "loss": 0.9989, + "slid_loss": 1.0265, + "step": 577, + "time": 14.14 + }, + { + "epoch": 0.29, + "learning_rate": "1.1572e-04", + "loss": 1.2837, + "slid_loss": 1.0286, + "step": 578, + "time": 11.3 + }, + { + "epoch": 0.29, + "learning_rate": "1.1592e-04", + "loss": 0.8309, + "slid_loss": 1.0262, + "step": 579, + "time": 11.1 + }, + { + "epoch": 0.29, + "learning_rate": "1.1612e-04", + "loss": 0.9512, + "slid_loss": 1.0234, + "step": 580, + "time": 11.53 + }, + { + "epoch": 0.29, + "learning_rate": "1.1632e-04", + "loss": 1.1786, + "slid_loss": 1.0247, + "step": 581, + "time": 12.83 + }, + { + "epoch": 0.29, + "learning_rate": "1.1652e-04", + "loss": 1.1382, + "slid_loss": 1.0248, + "step": 582, + "time": 11.45 + }, + { + "epoch": 0.29, + "learning_rate": "1.1672e-04", + "loss": 1.149, + "slid_loss": 1.024, + "step": 583, + "time": 12.98 + }, + { + "epoch": 0.29, + "learning_rate": "1.1692e-04", + "loss": 0.9631, + "slid_loss": 1.0234, + "step": 584, + "time": 10.93 + }, + { + "epoch": 0.29, + "learning_rate": "1.1712e-04", + "loss": 1.297, + "slid_loss": 1.0248, + "step": 585, + "time": 12.99 + }, + { + "epoch": 0.29, + "learning_rate": "1.1732e-04", + "loss": 1.1906, + "slid_loss": 1.0265, + "step": 586, + "time": 12.11 + }, + { + "epoch": 0.29, + "learning_rate": "1.1752e-04", + "loss": 1.0771, + "slid_loss": 1.0274, + "step": 587, + "time": 11.76 + }, + { + "epoch": 0.29, + "learning_rate": "1.1772e-04", + "loss": 1.1362, + "slid_loss": 1.0289, + "step": 588, + "time": 13.05 + }, + { + "epoch": 0.29, + "learning_rate": "1.1792e-04", + "loss": 1.0245, + "slid_loss": 1.0272, + "step": 589, + "time": 11.83 + }, + { + "epoch": 0.3, + "learning_rate": "1.1812e-04", + "loss": 1.0172, + "slid_loss": 1.0262, + "step": 590, + "time": 10.77 + }, + { + "epoch": 0.3, + "learning_rate": "1.1832e-04", + "loss": 0.9103, + "slid_loss": 1.0249, + "step": 591, + "time": 13.28 + }, + { + "epoch": 0.3, + "learning_rate": "1.1852e-04", + "loss": 0.8559, + "slid_loss": 1.025, + "step": 592, + "time": 13.09 + }, + { + "epoch": 0.3, + "learning_rate": "1.1872e-04", + "loss": 1.09, + "slid_loss": 1.0268, + "step": 593, + "time": 11.56 + }, + { + "epoch": 0.3, + "learning_rate": "1.1892e-04", + "loss": 0.9654, + "slid_loss": 1.0264, + "step": 594, + "time": 13.16 + }, + { + "epoch": 0.3, + "learning_rate": "1.1912e-04", + "loss": 0.9083, + "slid_loss": 1.0252, + "step": 595, + "time": 10.93 + }, + { + "epoch": 0.3, + "learning_rate": "1.1932e-04", + "loss": 1.0911, + "slid_loss": 1.0261, + "step": 596, + "time": 13.6 + }, + { + "epoch": 0.3, + "learning_rate": "1.1952e-04", + "loss": 0.9376, + "slid_loss": 1.0245, + "step": 597, + "time": 11.69 + }, + { + "epoch": 0.3, + "learning_rate": "1.1972e-04", + "loss": 0.9849, + "slid_loss": 1.0239, + "step": 598, + "time": 12.16 + }, + { + "epoch": 0.3, + "learning_rate": "1.1992e-04", + "loss": 0.8074, + "slid_loss": 1.0205, + "step": 599, + "time": 11.46 + }, + { + "epoch": 0.3, + "learning_rate": "1.2012e-04", + "loss": 1.0053, + "slid_loss": 1.0199, + "step": 600, + "time": 13.33 + }, + { + "epoch": 0.3, + "learning_rate": "1.2032e-04", + "loss": 0.9398, + "slid_loss": 1.0179, + "step": 601, + "time": 13.88 + }, + { + "epoch": 0.3, + "learning_rate": "1.2052e-04", + "loss": 0.9956, + "slid_loss": 1.0183, + "step": 602, + "time": 11.21 + }, + { + "epoch": 0.3, + "learning_rate": "1.2072e-04", + "loss": 0.981, + "slid_loss": 1.0185, + "step": 603, + "time": 12.69 + }, + { + "epoch": 0.3, + "learning_rate": "1.2092e-04", + "loss": 1.1753, + "slid_loss": 1.0212, + "step": 604, + "time": 12.14 + }, + { + "epoch": 0.3, + "learning_rate": "1.2112e-04", + "loss": 1.1217, + "slid_loss": 1.0223, + "step": 605, + "time": 10.76 + }, + { + "epoch": 0.3, + "learning_rate": "1.2132e-04", + "loss": 0.7848, + "slid_loss": 1.0191, + "step": 606, + "time": 11.76 + }, + { + "epoch": 0.3, + "learning_rate": "1.2152e-04", + "loss": 1.11, + "slid_loss": 1.0201, + "step": 607, + "time": 13.3 + }, + { + "epoch": 0.3, + "learning_rate": "1.2172e-04", + "loss": 0.9653, + "slid_loss": 1.0187, + "step": 608, + "time": 13.68 + }, + { + "epoch": 0.3, + "learning_rate": "1.2192e-04", + "loss": 1.0368, + "slid_loss": 1.0191, + "step": 609, + "time": 11.2 + }, + { + "epoch": 0.31, + "learning_rate": "1.2212e-04", + "loss": 0.897, + "slid_loss": 1.0179, + "step": 610, + "time": 13.56 + }, + { + "epoch": 0.31, + "learning_rate": "1.2232e-04", + "loss": 0.9455, + "slid_loss": 1.0176, + "step": 611, + "time": 11.21 + }, + { + "epoch": 0.31, + "learning_rate": "1.2252e-04", + "loss": 1.0219, + "slid_loss": 1.0194, + "step": 612, + "time": 11.1 + }, + { + "epoch": 0.31, + "learning_rate": "1.2272e-04", + "loss": 0.9289, + "slid_loss": 1.0173, + "step": 613, + "time": 12.91 + }, + { + "epoch": 0.31, + "learning_rate": "1.2292e-04", + "loss": 0.9362, + "slid_loss": 1.0166, + "step": 614, + "time": 13.09 + }, + { + "epoch": 0.31, + "learning_rate": "1.2312e-04", + "loss": 1.053, + "slid_loss": 1.0148, + "step": 615, + "time": 13.44 + }, + { + "epoch": 0.31, + "learning_rate": "1.2332e-04", + "loss": 0.9077, + "slid_loss": 1.0127, + "step": 616, + "time": 13.45 + }, + { + "epoch": 0.31, + "learning_rate": "1.2352e-04", + "loss": 0.8837, + "slid_loss": 1.0106, + "step": 617, + "time": 13.32 + }, + { + "epoch": 0.31, + "learning_rate": "1.2372e-04", + "loss": 1.0859, + "slid_loss": 1.0114, + "step": 618, + "time": 10.99 + }, + { + "epoch": 0.31, + "learning_rate": "1.2392e-04", + "loss": 0.9136, + "slid_loss": 1.0109, + "step": 619, + "time": 12.23 + }, + { + "epoch": 0.31, + "learning_rate": "1.2412e-04", + "loss": 0.9804, + "slid_loss": 1.0113, + "step": 620, + "time": 13.1 + }, + { + "epoch": 0.31, + "learning_rate": "1.2432e-04", + "loss": 1.0796, + "slid_loss": 1.0112, + "step": 621, + "time": 11.2 + }, + { + "epoch": 0.31, + "learning_rate": "1.2452e-04", + "loss": 1.0182, + "slid_loss": 1.0115, + "step": 622, + "time": 11.46 + }, + { + "epoch": 0.31, + "learning_rate": "1.2472e-04", + "loss": 1.1148, + "slid_loss": 1.0125, + "step": 623, + "time": 11.1 + }, + { + "epoch": 0.31, + "learning_rate": "1.2492e-04", + "loss": 1.1103, + "slid_loss": 1.0147, + "step": 624, + "time": 13.48 + }, + { + "epoch": 0.31, + "learning_rate": "1.2513e-04", + "loss": 1.0295, + "slid_loss": 1.0158, + "step": 625, + "time": 11.67 + }, + { + "epoch": 0.31, + "learning_rate": "1.2533e-04", + "loss": 0.9424, + "slid_loss": 1.0144, + "step": 626, + "time": 10.9 + }, + { + "epoch": 0.31, + "learning_rate": "1.2553e-04", + "loss": 0.9839, + "slid_loss": 1.0136, + "step": 627, + "time": 13.33 + }, + { + "epoch": 0.31, + "learning_rate": "1.2573e-04", + "loss": 1.034, + "slid_loss": 1.0114, + "step": 628, + "time": 12.86 + }, + { + "epoch": 0.31, + "learning_rate": "1.2593e-04", + "loss": 0.971, + "slid_loss": 1.0113, + "step": 629, + "time": 13.85 + }, + { + "epoch": 0.32, + "learning_rate": "1.2613e-04", + "loss": 0.9573, + "slid_loss": 1.0116, + "step": 630, + "time": 13.68 + }, + { + "epoch": 0.32, + "learning_rate": "1.2633e-04", + "loss": 1.1187, + "slid_loss": 1.013, + "step": 631, + "time": 12.25 + }, + { + "epoch": 0.32, + "learning_rate": "1.2653e-04", + "loss": 0.9695, + "slid_loss": 1.0127, + "step": 632, + "time": 13.35 + }, + { + "epoch": 0.32, + "learning_rate": "1.2673e-04", + "loss": 0.9114, + "slid_loss": 1.0112, + "step": 633, + "time": 13.52 + }, + { + "epoch": 0.32, + "learning_rate": "1.2693e-04", + "loss": 0.8978, + "slid_loss": 1.0081, + "step": 634, + "time": 13.89 + }, + { + "epoch": 0.32, + "learning_rate": "1.2713e-04", + "loss": 0.9248, + "slid_loss": 1.0065, + "step": 635, + "time": 12.94 + }, + { + "epoch": 0.32, + "learning_rate": "1.2733e-04", + "loss": 1.1075, + "slid_loss": 1.0083, + "step": 636, + "time": 13.74 + }, + { + "epoch": 0.32, + "learning_rate": "1.2753e-04", + "loss": 1.0802, + "slid_loss": 1.0077, + "step": 637, + "time": 12.89 + }, + { + "epoch": 0.32, + "learning_rate": "1.2773e-04", + "loss": 1.1554, + "slid_loss": 1.0092, + "step": 638, + "time": 12.89 + }, + { + "epoch": 0.32, + "learning_rate": "1.2793e-04", + "loss": 1.0077, + "slid_loss": 1.0092, + "step": 639, + "time": 12.27 + }, + { + "epoch": 0.32, + "learning_rate": "1.2813e-04", + "loss": 1.167, + "slid_loss": 1.0132, + "step": 640, + "time": 14.26 + }, + { + "epoch": 0.32, + "learning_rate": "1.2833e-04", + "loss": 0.9499, + "slid_loss": 1.0116, + "step": 641, + "time": 13.16 + }, + { + "epoch": 0.32, + "learning_rate": "1.2853e-04", + "loss": 1.0837, + "slid_loss": 1.0116, + "step": 642, + "time": 12.81 + }, + { + "epoch": 0.32, + "learning_rate": "1.2873e-04", + "loss": 1.0381, + "slid_loss": 1.0126, + "step": 643, + "time": 12.64 + }, + { + "epoch": 0.32, + "learning_rate": "1.2893e-04", + "loss": 0.7851, + "slid_loss": 1.0094, + "step": 644, + "time": 12.77 + }, + { + "epoch": 0.32, + "learning_rate": "1.2913e-04", + "loss": 1.2259, + "slid_loss": 1.0114, + "step": 645, + "time": 10.91 + }, + { + "epoch": 0.32, + "learning_rate": "1.2933e-04", + "loss": 0.8579, + "slid_loss": 1.0091, + "step": 646, + "time": 13.63 + }, + { + "epoch": 0.32, + "learning_rate": "1.2953e-04", + "loss": 0.8891, + "slid_loss": 1.0075, + "step": 647, + "time": 12.82 + }, + { + "epoch": 0.32, + "learning_rate": "1.2973e-04", + "loss": 1.0147, + "slid_loss": 1.0079, + "step": 648, + "time": 13.96 + }, + { + "epoch": 0.32, + "learning_rate": "1.2993e-04", + "loss": 1.069, + "slid_loss": 1.0097, + "step": 649, + "time": 13.89 + }, + { + "epoch": 0.33, + "learning_rate": "1.3013e-04", + "loss": 1.1534, + "slid_loss": 1.0103, + "step": 650, + "time": 12.82 + }, + { + "epoch": 0.33, + "learning_rate": "1.3033e-04", + "loss": 0.9059, + "slid_loss": 1.0086, + "step": 651, + "time": 13.42 + }, + { + "epoch": 0.33, + "learning_rate": "1.3053e-04", + "loss": 0.8183, + "slid_loss": 1.0058, + "step": 652, + "time": 11.26 + }, + { + "epoch": 0.33, + "learning_rate": "1.3073e-04", + "loss": 0.8762, + "slid_loss": 1.0039, + "step": 653, + "time": 12.94 + }, + { + "epoch": 0.33, + "learning_rate": "1.3093e-04", + "loss": 1.1827, + "slid_loss": 1.0039, + "step": 654, + "time": 11.02 + }, + { + "epoch": 0.33, + "learning_rate": "1.3113e-04", + "loss": 0.8486, + "slid_loss": 1.004, + "step": 655, + "time": 11.39 + }, + { + "epoch": 0.33, + "learning_rate": "1.3133e-04", + "loss": 0.9553, + "slid_loss": 1.0031, + "step": 656, + "time": 11.51 + }, + { + "epoch": 0.33, + "learning_rate": "1.3153e-04", + "loss": 0.8421, + "slid_loss": 1.002, + "step": 657, + "time": 11.56 + }, + { + "epoch": 0.33, + "learning_rate": "1.3173e-04", + "loss": 1.0224, + "slid_loss": 1.0027, + "step": 658, + "time": 13.72 + }, + { + "epoch": 0.33, + "learning_rate": "1.3193e-04", + "loss": 1.0923, + "slid_loss": 1.0043, + "step": 659, + "time": 12.78 + }, + { + "epoch": 0.33, + "learning_rate": "1.3213e-04", + "loss": 1.0727, + "slid_loss": 1.0059, + "step": 660, + "time": 13.83 + }, + { + "epoch": 0.33, + "learning_rate": "1.3233e-04", + "loss": 1.1727, + "slid_loss": 1.0068, + "step": 661, + "time": 12.87 + }, + { + "epoch": 0.33, + "learning_rate": "1.3253e-04", + "loss": 1.0056, + "slid_loss": 1.005, + "step": 662, + "time": 11.21 + }, + { + "epoch": 0.33, + "learning_rate": "1.3273e-04", + "loss": 0.8827, + "slid_loss": 1.0042, + "step": 663, + "time": 12.91 + }, + { + "epoch": 0.33, + "learning_rate": "1.3293e-04", + "loss": 1.0183, + "slid_loss": 1.0057, + "step": 664, + "time": 13.84 + }, + { + "epoch": 0.33, + "learning_rate": "1.3313e-04", + "loss": 1.1313, + "slid_loss": 1.0081, + "step": 665, + "time": 12.12 + }, + { + "epoch": 0.33, + "learning_rate": "1.3333e-04", + "loss": 0.8141, + "slid_loss": 1.0051, + "step": 666, + "time": 12.69 + }, + { + "epoch": 0.33, + "learning_rate": "1.3353e-04", + "loss": 0.8108, + "slid_loss": 1.0023, + "step": 667, + "time": 13.25 + }, + { + "epoch": 0.33, + "learning_rate": "1.3373e-04", + "loss": 0.9889, + "slid_loss": 1.0006, + "step": 668, + "time": 12.64 + }, + { + "epoch": 0.34, + "learning_rate": "1.3393e-04", + "loss": 0.938, + "slid_loss": 0.9998, + "step": 669, + "time": 13.45 + }, + { + "epoch": 0.34, + "learning_rate": "1.3413e-04", + "loss": 1.0123, + "slid_loss": 1.0023, + "step": 670, + "time": 12.75 + }, + { + "epoch": 0.34, + "learning_rate": "1.3433e-04", + "loss": 1.0558, + "slid_loss": 1.0032, + "step": 671, + "time": 10.78 + }, + { + "epoch": 0.34, + "learning_rate": "1.3453e-04", + "loss": 0.9611, + "slid_loss": 1.0033, + "step": 672, + "time": 11.42 + }, + { + "epoch": 0.34, + "learning_rate": "1.3473e-04", + "loss": 1.0436, + "slid_loss": 1.004, + "step": 673, + "time": 13.86 + }, + { + "epoch": 0.34, + "learning_rate": "1.3493e-04", + "loss": 0.8767, + "slid_loss": 1.0039, + "step": 674, + "time": 10.55 + }, + { + "epoch": 0.34, + "learning_rate": "1.3514e-04", + "loss": 0.8877, + "slid_loss": 1.0026, + "step": 675, + "time": 13.21 + }, + { + "epoch": 0.34, + "learning_rate": "1.3534e-04", + "loss": 0.9227, + "slid_loss": 1.0024, + "step": 676, + "time": 13.53 + }, + { + "epoch": 0.34, + "learning_rate": "1.3554e-04", + "loss": 0.8679, + "slid_loss": 1.0011, + "step": 677, + "time": 12.8 + }, + { + "epoch": 0.34, + "learning_rate": "1.3574e-04", + "loss": 0.9638, + "slid_loss": 0.9979, + "step": 678, + "time": 11.26 + }, + { + "epoch": 0.34, + "learning_rate": "1.3594e-04", + "loss": 1.0016, + "slid_loss": 0.9996, + "step": 679, + "time": 11.85 + }, + { + "epoch": 0.34, + "learning_rate": "1.3614e-04", + "loss": 0.9983, + "slid_loss": 1.0001, + "step": 680, + "time": 12.89 + }, + { + "epoch": 0.34, + "learning_rate": "1.3634e-04", + "loss": 0.8023, + "slid_loss": 0.9963, + "step": 681, + "time": 12.59 + }, + { + "epoch": 0.34, + "learning_rate": "1.3654e-04", + "loss": 0.7785, + "slid_loss": 0.9927, + "step": 682, + "time": 11.15 + }, + { + "epoch": 0.34, + "learning_rate": "1.3674e-04", + "loss": 1.2366, + "slid_loss": 0.9936, + "step": 683, + "time": 14.15 + }, + { + "epoch": 0.34, + "learning_rate": "1.3694e-04", + "loss": 1.0029, + "slid_loss": 0.994, + "step": 684, + "time": 11.36 + }, + { + "epoch": 0.34, + "learning_rate": "1.3714e-04", + "loss": 1.0928, + "slid_loss": 0.992, + "step": 685, + "time": 11.7 + }, + { + "epoch": 0.34, + "learning_rate": "1.3734e-04", + "loss": 1.0512, + "slid_loss": 0.9906, + "step": 686, + "time": 12.25 + }, + { + "epoch": 0.34, + "learning_rate": "1.3754e-04", + "loss": 0.9654, + "slid_loss": 0.9895, + "step": 687, + "time": 11.04 + }, + { + "epoch": 0.34, + "learning_rate": "1.3774e-04", + "loss": 0.8869, + "slid_loss": 0.987, + "step": 688, + "time": 13.21 + }, + { + "epoch": 0.35, + "learning_rate": "1.3794e-04", + "loss": 1.1295, + "slid_loss": 0.988, + "step": 689, + "time": 13.62 + }, + { + "epoch": 0.35, + "learning_rate": "1.3814e-04", + "loss": 1.0255, + "slid_loss": 0.9881, + "step": 690, + "time": 10.79 + }, + { + "epoch": 0.35, + "learning_rate": "1.3834e-04", + "loss": 1.0871, + "slid_loss": 0.9899, + "step": 691, + "time": 13.39 + }, + { + "epoch": 0.35, + "learning_rate": "1.3854e-04", + "loss": 1.0161, + "slid_loss": 0.9915, + "step": 692, + "time": 11.94 + }, + { + "epoch": 0.35, + "learning_rate": "1.3874e-04", + "loss": 1.0895, + "slid_loss": 0.9915, + "step": 693, + "time": 13.8 + }, + { + "epoch": 0.35, + "learning_rate": "1.3894e-04", + "loss": 0.9603, + "slid_loss": 0.9914, + "step": 694, + "time": 11.52 + }, + { + "epoch": 0.35, + "learning_rate": "1.3914e-04", + "loss": 1.0485, + "slid_loss": 0.9928, + "step": 695, + "time": 13.88 + }, + { + "epoch": 0.35, + "learning_rate": "1.3934e-04", + "loss": 1.0684, + "slid_loss": 0.9926, + "step": 696, + "time": 11.27 + }, + { + "epoch": 0.35, + "learning_rate": "1.3954e-04", + "loss": 0.8415, + "slid_loss": 0.9916, + "step": 697, + "time": 10.97 + }, + { + "epoch": 0.35, + "learning_rate": "1.3974e-04", + "loss": 0.9181, + "slid_loss": 0.991, + "step": 698, + "time": 11.76 + }, + { + "epoch": 0.35, + "learning_rate": "1.3994e-04", + "loss": 0.9356, + "slid_loss": 0.9922, + "step": 699, + "time": 10.66 + }, + { + "epoch": 0.35, + "learning_rate": "1.4014e-04", + "loss": 0.7954, + "slid_loss": 0.9901, + "step": 700, + "time": 13.62 + }, + { + "epoch": 0.35, + "learning_rate": "1.4034e-04", + "loss": 0.9117, + "slid_loss": 0.9899, + "step": 701, + "time": 13.16 + }, + { + "epoch": 0.35, + "learning_rate": "1.4054e-04", + "loss": 0.8496, + "slid_loss": 0.9884, + "step": 702, + "time": 12.75 + }, + { + "epoch": 0.35, + "learning_rate": "1.4074e-04", + "loss": 1.0164, + "slid_loss": 0.9888, + "step": 703, + "time": 12.27 + }, + { + "epoch": 0.35, + "learning_rate": "1.4094e-04", + "loss": 1.0084, + "slid_loss": 0.9871, + "step": 704, + "time": 10.86 + }, + { + "epoch": 0.35, + "learning_rate": "1.4114e-04", + "loss": 0.9271, + "slid_loss": 0.9851, + "step": 705, + "time": 13.4 + }, + { + "epoch": 0.35, + "learning_rate": "1.4134e-04", + "loss": 0.9715, + "slid_loss": 0.987, + "step": 706, + "time": 10.44 + }, + { + "epoch": 0.35, + "learning_rate": "1.4154e-04", + "loss": 0.9828, + "slid_loss": 0.9857, + "step": 707, + "time": 11.93 + }, + { + "epoch": 0.35, + "learning_rate": "1.4174e-04", + "loss": 0.8551, + "slid_loss": 0.9846, + "step": 708, + "time": 11.65 + }, + { + "epoch": 0.36, + "learning_rate": "1.4194e-04", + "loss": 0.8395, + "slid_loss": 0.9827, + "step": 709, + "time": 13.08 + }, + { + "epoch": 0.36, + "learning_rate": "1.4214e-04", + "loss": 1.0572, + "slid_loss": 0.9843, + "step": 710, + "time": 12.87 + }, + { + "epoch": 0.36, + "learning_rate": "1.4234e-04", + "loss": 0.9244, + "slid_loss": 0.9841, + "step": 711, + "time": 12.22 + }, + { + "epoch": 0.36, + "learning_rate": "1.4254e-04", + "loss": 0.8192, + "slid_loss": 0.982, + "step": 712, + "time": 13.47 + }, + { + "epoch": 0.36, + "learning_rate": "1.4274e-04", + "loss": 1.0233, + "slid_loss": 0.983, + "step": 713, + "time": 10.77 + }, + { + "epoch": 0.36, + "learning_rate": "1.4294e-04", + "loss": 1.0704, + "slid_loss": 0.9843, + "step": 714, + "time": 12.89 + }, + { + "epoch": 0.36, + "learning_rate": "1.4314e-04", + "loss": 1.024, + "slid_loss": 0.984, + "step": 715, + "time": 13.44 + }, + { + "epoch": 0.36, + "learning_rate": "1.4334e-04", + "loss": 0.9157, + "slid_loss": 0.9841, + "step": 716, + "time": 11.33 + }, + { + "epoch": 0.36, + "learning_rate": "1.4354e-04", + "loss": 1.1023, + "slid_loss": 0.9863, + "step": 717, + "time": 13.45 + }, + { + "epoch": 0.36, + "learning_rate": "1.4374e-04", + "loss": 0.865, + "slid_loss": 0.9841, + "step": 718, + "time": 13.06 + }, + { + "epoch": 0.36, + "learning_rate": "1.4394e-04", + "loss": 1.0494, + "slid_loss": 0.9854, + "step": 719, + "time": 12.85 + }, + { + "epoch": 0.36, + "learning_rate": "1.4414e-04", + "loss": 0.9105, + "slid_loss": 0.9847, + "step": 720, + "time": 13.28 + }, + { + "epoch": 0.36, + "learning_rate": "1.4434e-04", + "loss": 1.0226, + "slid_loss": 0.9842, + "step": 721, + "time": 11.85 + }, + { + "epoch": 0.36, + "learning_rate": "1.4454e-04", + "loss": 1.1819, + "slid_loss": 0.9858, + "step": 722, + "time": 11.06 + }, + { + "epoch": 0.36, + "learning_rate": "1.4474e-04", + "loss": 1.0174, + "slid_loss": 0.9848, + "step": 723, + "time": 13.3 + }, + { + "epoch": 0.36, + "learning_rate": "1.4494e-04", + "loss": 0.9735, + "slid_loss": 0.9835, + "step": 724, + "time": 10.41 + }, + { + "epoch": 0.36, + "learning_rate": "1.4515e-04", + "loss": 1.2064, + "slid_loss": 0.9852, + "step": 725, + "time": 13.33 + }, + { + "epoch": 0.36, + "learning_rate": "1.4535e-04", + "loss": 0.9788, + "slid_loss": 0.9856, + "step": 726, + "time": 13.67 + }, + { + "epoch": 0.36, + "learning_rate": "1.4555e-04", + "loss": 0.7934, + "slid_loss": 0.9837, + "step": 727, + "time": 13.96 + }, + { + "epoch": 0.36, + "learning_rate": "1.4575e-04", + "loss": 0.7614, + "slid_loss": 0.981, + "step": 728, + "time": 11.69 + }, + { + "epoch": 0.37, + "learning_rate": "1.4595e-04", + "loss": 1.1447, + "slid_loss": 0.9827, + "step": 729, + "time": 13.41 + }, + { + "epoch": 0.37, + "learning_rate": "1.4615e-04", + "loss": 0.8137, + "slid_loss": 0.9813, + "step": 730, + "time": 12.9 + }, + { + "epoch": 0.37, + "learning_rate": "1.4635e-04", + "loss": 0.9892, + "slid_loss": 0.98, + "step": 731, + "time": 13.16 + }, + { + "epoch": 0.37, + "learning_rate": "1.4655e-04", + "loss": 1.0339, + "slid_loss": 0.9806, + "step": 732, + "time": 11.23 + }, + { + "epoch": 0.37, + "learning_rate": "1.4675e-04", + "loss": 0.9262, + "slid_loss": 0.9808, + "step": 733, + "time": 12.12 + }, + { + "epoch": 0.37, + "learning_rate": "1.4695e-04", + "loss": 1.0197, + "slid_loss": 0.982, + "step": 734, + "time": 13.88 + }, + { + "epoch": 0.37, + "learning_rate": "1.4715e-04", + "loss": 0.8779, + "slid_loss": 0.9815, + "step": 735, + "time": 13.19 + }, + { + "epoch": 0.37, + "learning_rate": "1.4735e-04", + "loss": 1.0064, + "slid_loss": 0.9805, + "step": 736, + "time": 12.94 + }, + { + "epoch": 0.37, + "learning_rate": "1.4755e-04", + "loss": 0.9076, + "slid_loss": 0.9788, + "step": 737, + "time": 13.2 + }, + { + "epoch": 0.37, + "learning_rate": "1.4775e-04", + "loss": 0.8977, + "slid_loss": 0.9762, + "step": 738, + "time": 13.22 + }, + { + "epoch": 0.37, + "learning_rate": "1.4795e-04", + "loss": 0.9023, + "slid_loss": 0.9751, + "step": 739, + "time": 13.31 + }, + { + "epoch": 0.37, + "learning_rate": "1.4815e-04", + "loss": 1.0102, + "slid_loss": 0.9736, + "step": 740, + "time": 13.18 + }, + { + "epoch": 0.37, + "learning_rate": "1.4835e-04", + "loss": 0.9676, + "slid_loss": 0.9738, + "step": 741, + "time": 14.11 + }, + { + "epoch": 0.37, + "learning_rate": "1.4855e-04", + "loss": 1.1249, + "slid_loss": 0.9742, + "step": 742, + "time": 13.01 + }, + { + "epoch": 0.37, + "learning_rate": "1.4875e-04", + "loss": 1.1357, + "slid_loss": 0.9751, + "step": 743, + "time": 13.5 + }, + { + "epoch": 0.37, + "learning_rate": "1.4895e-04", + "loss": 1.0604, + "slid_loss": 0.9779, + "step": 744, + "time": 13.78 + }, + { + "epoch": 0.37, + "learning_rate": "1.4915e-04", + "loss": 0.9314, + "slid_loss": 0.9749, + "step": 745, + "time": 14.59 + }, + { + "epoch": 0.37, + "learning_rate": "1.4935e-04", + "loss": 1.1284, + "slid_loss": 0.9777, + "step": 746, + "time": 11.13 + }, + { + "epoch": 0.37, + "learning_rate": "1.4955e-04", + "loss": 0.9076, + "slid_loss": 0.9778, + "step": 747, + "time": 12.34 + }, + { + "epoch": 0.37, + "learning_rate": "1.4975e-04", + "loss": 0.9361, + "slid_loss": 0.9771, + "step": 748, + "time": 13.73 + }, + { + "epoch": 0.38, + "learning_rate": "1.4995e-04", + "loss": 0.7591, + "slid_loss": 0.974, + "step": 749, + "time": 11.89 + }, + { + "epoch": 0.38, + "learning_rate": "1.5015e-04", + "loss": 0.9461, + "slid_loss": 0.9719, + "step": 750, + "time": 12.68 + }, + { + "epoch": 0.38, + "learning_rate": "1.5035e-04", + "loss": 1.0343, + "slid_loss": 0.9732, + "step": 751, + "time": 12.26 + }, + { + "epoch": 0.38, + "learning_rate": "1.5055e-04", + "loss": 0.9188, + "slid_loss": 0.9742, + "step": 752, + "time": 12.22 + }, + { + "epoch": 0.38, + "learning_rate": "1.5075e-04", + "loss": 0.9328, + "slid_loss": 0.9747, + "step": 753, + "time": 12.18 + }, + { + "epoch": 0.38, + "learning_rate": "1.5095e-04", + "loss": 0.8651, + "slid_loss": 0.9716, + "step": 754, + "time": 11.62 + }, + { + "epoch": 0.38, + "learning_rate": "1.5115e-04", + "loss": 0.9478, + "slid_loss": 0.9726, + "step": 755, + "time": 12.91 + }, + { + "epoch": 0.38, + "learning_rate": "1.5135e-04", + "loss": 0.9802, + "slid_loss": 0.9728, + "step": 756, + "time": 11.51 + }, + { + "epoch": 0.38, + "learning_rate": "1.5155e-04", + "loss": 0.9281, + "slid_loss": 0.9737, + "step": 757, + "time": 11.12 + }, + { + "epoch": 0.38, + "learning_rate": "1.5175e-04", + "loss": 1.0797, + "slid_loss": 0.9742, + "step": 758, + "time": 13.49 + }, + { + "epoch": 0.38, + "learning_rate": "1.5195e-04", + "loss": 0.9293, + "slid_loss": 0.9726, + "step": 759, + "time": 13.43 + }, + { + "epoch": 0.38, + "learning_rate": "1.5215e-04", + "loss": 1.0858, + "slid_loss": 0.9727, + "step": 760, + "time": 12.35 + }, + { + "epoch": 0.38, + "learning_rate": "1.5235e-04", + "loss": 0.8686, + "slid_loss": 0.9697, + "step": 761, + "time": 12.79 + }, + { + "epoch": 0.38, + "learning_rate": "1.5255e-04", + "loss": 0.8529, + "slid_loss": 0.9682, + "step": 762, + "time": 13.28 + }, + { + "epoch": 0.38, + "learning_rate": "1.5275e-04", + "loss": 0.7732, + "slid_loss": 0.9671, + "step": 763, + "time": 11.8 + }, + { + "epoch": 0.38, + "learning_rate": "1.5295e-04", + "loss": 0.9974, + "slid_loss": 0.9669, + "step": 764, + "time": 13.63 + }, + { + "epoch": 0.38, + "learning_rate": "1.5315e-04", + "loss": 1.0363, + "slid_loss": 0.9659, + "step": 765, + "time": 13.33 + }, + { + "epoch": 0.38, + "learning_rate": "1.5335e-04", + "loss": 1.1253, + "slid_loss": 0.969, + "step": 766, + "time": 13.88 + }, + { + "epoch": 0.38, + "learning_rate": "1.5355e-04", + "loss": 0.9971, + "slid_loss": 0.9709, + "step": 767, + "time": 13.24 + }, + { + "epoch": 0.38, + "learning_rate": "1.5375e-04", + "loss": 0.691, + "slid_loss": 0.9679, + "step": 768, + "time": 10.8 + }, + { + "epoch": 0.39, + "learning_rate": "1.5395e-04", + "loss": 1.1506, + "slid_loss": 0.97, + "step": 769, + "time": 11.36 + }, + { + "epoch": 0.39, + "learning_rate": "1.5415e-04", + "loss": 0.987, + "slid_loss": 0.9698, + "step": 770, + "time": 13.23 + }, + { + "epoch": 0.39, + "learning_rate": "1.5435e-04", + "loss": 1.0185, + "slid_loss": 0.9694, + "step": 771, + "time": 11.62 + }, + { + "epoch": 0.39, + "learning_rate": "1.5455e-04", + "loss": 0.7668, + "slid_loss": 0.9675, + "step": 772, + "time": 13.73 + }, + { + "epoch": 0.39, + "learning_rate": "1.5475e-04", + "loss": 1.0059, + "slid_loss": 0.9671, + "step": 773, + "time": 13.8 + }, + { + "epoch": 0.39, + "learning_rate": "1.5495e-04", + "loss": 0.8471, + "slid_loss": 0.9668, + "step": 774, + "time": 13.12 + }, + { + "epoch": 0.39, + "learning_rate": "1.5516e-04", + "loss": 1.012, + "slid_loss": 0.968, + "step": 775, + "time": 11.46 + }, + { + "epoch": 0.39, + "learning_rate": "1.5536e-04", + "loss": 0.9691, + "slid_loss": 0.9685, + "step": 776, + "time": 13.29 + }, + { + "epoch": 0.39, + "learning_rate": "1.5556e-04", + "loss": 1.0104, + "slid_loss": 0.9699, + "step": 777, + "time": 13.72 + }, + { + "epoch": 0.39, + "learning_rate": "1.5576e-04", + "loss": 0.8037, + "slid_loss": 0.9683, + "step": 778, + "time": 12.97 + }, + { + "epoch": 0.39, + "learning_rate": "1.5596e-04", + "loss": 1.1606, + "slid_loss": 0.9699, + "step": 779, + "time": 11.29 + }, + { + "epoch": 0.39, + "learning_rate": "1.5616e-04", + "loss": 0.8973, + "slid_loss": 0.9689, + "step": 780, + "time": 13.37 + }, + { + "epoch": 0.39, + "learning_rate": "1.5636e-04", + "loss": 1.0781, + "slid_loss": 0.9717, + "step": 781, + "time": 12.84 + }, + { + "epoch": 0.39, + "learning_rate": "1.5656e-04", + "loss": 1.0446, + "slid_loss": 0.9743, + "step": 782, + "time": 11.34 + }, + { + "epoch": 0.39, + "learning_rate": "1.5676e-04", + "loss": 1.0116, + "slid_loss": 0.9721, + "step": 783, + "time": 12.44 + }, + { + "epoch": 0.39, + "learning_rate": "1.5696e-04", + "loss": 1.1168, + "slid_loss": 0.9732, + "step": 784, + "time": 10.25 + }, + { + "epoch": 0.39, + "learning_rate": "1.5716e-04", + "loss": 1.0352, + "slid_loss": 0.9726, + "step": 785, + "time": 13.2 + }, + { + "epoch": 0.39, + "learning_rate": "1.5736e-04", + "loss": 0.9437, + "slid_loss": 0.9716, + "step": 786, + "time": 13.59 + }, + { + "epoch": 0.39, + "learning_rate": "1.5756e-04", + "loss": 1.0161, + "slid_loss": 0.9721, + "step": 787, + "time": 11.05 + }, + { + "epoch": 0.39, + "learning_rate": "1.5776e-04", + "loss": 1.1084, + "slid_loss": 0.9743, + "step": 788, + "time": 12.73 + }, + { + "epoch": 0.4, + "learning_rate": "1.5796e-04", + "loss": 0.8961, + "slid_loss": 0.972, + "step": 789, + "time": 13.76 + }, + { + "epoch": 0.4, + "learning_rate": "1.5816e-04", + "loss": 1.0149, + "slid_loss": 0.9718, + "step": 790, + "time": 13.26 + }, + { + "epoch": 0.4, + "learning_rate": "1.5836e-04", + "loss": 1.0083, + "slid_loss": 0.9711, + "step": 791, + "time": 12.97 + }, + { + "epoch": 0.4, + "learning_rate": "1.5856e-04", + "loss": 1.0004, + "slid_loss": 0.9709, + "step": 792, + "time": 10.57 + }, + { + "epoch": 0.4, + "learning_rate": "1.5876e-04", + "loss": 0.8789, + "slid_loss": 0.9688, + "step": 793, + "time": 13.55 + }, + { + "epoch": 0.4, + "learning_rate": "1.5896e-04", + "loss": 1.0107, + "slid_loss": 0.9693, + "step": 794, + "time": 11.7 + }, + { + "epoch": 0.4, + "learning_rate": "1.5916e-04", + "loss": 0.92, + "slid_loss": 0.968, + "step": 795, + "time": 11.57 + }, + { + "epoch": 0.4, + "learning_rate": "1.5936e-04", + "loss": 1.0857, + "slid_loss": 0.9682, + "step": 796, + "time": 13.38 + }, + { + "epoch": 0.4, + "learning_rate": "1.5956e-04", + "loss": 1.0481, + "slid_loss": 0.9703, + "step": 797, + "time": 14.42 + }, + { + "epoch": 0.4, + "learning_rate": "1.5976e-04", + "loss": 1.1181, + "slid_loss": 0.9723, + "step": 798, + "time": 10.76 + }, + { + "epoch": 0.4, + "learning_rate": "1.5996e-04", + "loss": 0.9777, + "slid_loss": 0.9727, + "step": 799, + "time": 13.62 + }, + { + "epoch": 0.4, + "learning_rate": "1.6016e-04", + "loss": 0.9275, + "slid_loss": 0.974, + "step": 800, + "time": 11.95 + }, + { + "epoch": 0.4, + "learning_rate": "1.6036e-04", + "loss": 1.1629, + "slid_loss": 0.9765, + "step": 801, + "time": 12.86 + }, + { + "epoch": 0.4, + "learning_rate": "1.6056e-04", + "loss": 0.9711, + "slid_loss": 0.9777, + "step": 802, + "time": 12.96 + }, + { + "epoch": 0.4, + "learning_rate": "1.6076e-04", + "loss": 1.1016, + "slid_loss": 0.9786, + "step": 803, + "time": 12.8 + }, + { + "epoch": 0.4, + "learning_rate": "1.6096e-04", + "loss": 1.1025, + "slid_loss": 0.9795, + "step": 804, + "time": 12.76 + }, + { + "epoch": 0.4, + "learning_rate": "1.6116e-04", + "loss": 1.0308, + "slid_loss": 0.9806, + "step": 805, + "time": 11.61 + }, + { + "epoch": 0.4, + "learning_rate": "1.6136e-04", + "loss": 0.9866, + "slid_loss": 0.9807, + "step": 806, + "time": 12.77 + }, + { + "epoch": 0.4, + "learning_rate": "1.6156e-04", + "loss": 0.9597, + "slid_loss": 0.9805, + "step": 807, + "time": 10.79 + }, + { + "epoch": 0.4, + "learning_rate": "1.6176e-04", + "loss": 0.9146, + "slid_loss": 0.9811, + "step": 808, + "time": 12.84 + }, + { + "epoch": 0.41, + "learning_rate": "1.6196e-04", + "loss": 0.9863, + "slid_loss": 0.9825, + "step": 809, + "time": 14.7 + }, + { + "epoch": 0.41, + "learning_rate": "1.6216e-04", + "loss": 1.188, + "slid_loss": 0.9838, + "step": 810, + "time": 11.41 + }, + { + "epoch": 0.41, + "learning_rate": "1.6236e-04", + "loss": 1.1784, + "slid_loss": 0.9864, + "step": 811, + "time": 13.45 + }, + { + "epoch": 0.41, + "learning_rate": "1.6256e-04", + "loss": 0.9219, + "slid_loss": 0.9874, + "step": 812, + "time": 13.65 + }, + { + "epoch": 0.41, + "learning_rate": "1.6276e-04", + "loss": 0.9983, + "slid_loss": 0.9872, + "step": 813, + "time": 11.25 + }, + { + "epoch": 0.41, + "learning_rate": "1.6296e-04", + "loss": 1.1182, + "slid_loss": 0.9876, + "step": 814, + "time": 11.48 + }, + { + "epoch": 0.41, + "learning_rate": "1.6316e-04", + "loss": 1.0148, + "slid_loss": 0.9875, + "step": 815, + "time": 10.86 + }, + { + "epoch": 0.41, + "learning_rate": "1.6336e-04", + "loss": 1.0053, + "slid_loss": 0.9884, + "step": 816, + "time": 13.52 + }, + { + "epoch": 0.41, + "learning_rate": "1.6356e-04", + "loss": 0.965, + "slid_loss": 0.9871, + "step": 817, + "time": 12.26 + }, + { + "epoch": 0.41, + "learning_rate": "1.6376e-04", + "loss": 0.8542, + "slid_loss": 0.987, + "step": 818, + "time": 12.87 + }, + { + "epoch": 0.41, + "learning_rate": "1.6396e-04", + "loss": 1.12, + "slid_loss": 0.9877, + "step": 819, + "time": 12.72 + }, + { + "epoch": 0.41, + "learning_rate": "1.6416e-04", + "loss": 1.0156, + "slid_loss": 0.9887, + "step": 820, + "time": 13.33 + }, + { + "epoch": 0.41, + "learning_rate": "1.6436e-04", + "loss": 0.9238, + "slid_loss": 0.9877, + "step": 821, + "time": 13.34 + }, + { + "epoch": 0.41, + "learning_rate": "1.6456e-04", + "loss": 1.0843, + "slid_loss": 0.9868, + "step": 822, + "time": 11.69 + }, + { + "epoch": 0.41, + "learning_rate": "1.6476e-04", + "loss": 0.8746, + "slid_loss": 0.9853, + "step": 823, + "time": 12.84 + }, + { + "epoch": 0.41, + "learning_rate": "1.6496e-04", + "loss": 0.9413, + "slid_loss": 0.985, + "step": 824, + "time": 13.32 + }, + { + "epoch": 0.41, + "learning_rate": "1.6517e-04", + "loss": 0.9757, + "slid_loss": 0.9827, + "step": 825, + "time": 11.91 + }, + { + "epoch": 0.41, + "learning_rate": "1.6537e-04", + "loss": 1.0488, + "slid_loss": 0.9834, + "step": 826, + "time": 12.67 + }, + { + "epoch": 0.41, + "learning_rate": "1.6557e-04", + "loss": 1.1222, + "slid_loss": 0.9867, + "step": 827, + "time": 11.22 + }, + { + "epoch": 0.41, + "learning_rate": "1.6577e-04", + "loss": 0.9475, + "slid_loss": 0.9885, + "step": 828, + "time": 13.44 + }, + { + "epoch": 0.42, + "learning_rate": "1.6597e-04", + "loss": 1.0734, + "slid_loss": 0.9878, + "step": 829, + "time": 13.53 + }, + { + "epoch": 0.42, + "learning_rate": "1.6617e-04", + "loss": 1.0888, + "slid_loss": 0.9906, + "step": 830, + "time": 11.89 + }, + { + "epoch": 0.42, + "learning_rate": "1.6637e-04", + "loss": 0.9635, + "slid_loss": 0.9903, + "step": 831, + "time": 12.74 + }, + { + "epoch": 0.42, + "learning_rate": "1.6657e-04", + "loss": 0.983, + "slid_loss": 0.9898, + "step": 832, + "time": 11.9 + }, + { + "epoch": 0.42, + "learning_rate": "1.6677e-04", + "loss": 1.1005, + "slid_loss": 0.9916, + "step": 833, + "time": 13.32 + }, + { + "epoch": 0.42, + "learning_rate": "1.6697e-04", + "loss": 1.0374, + "slid_loss": 0.9917, + "step": 834, + "time": 12.35 + }, + { + "epoch": 0.42, + "learning_rate": "1.6717e-04", + "loss": 0.9939, + "slid_loss": 0.9929, + "step": 835, + "time": 10.85 + }, + { + "epoch": 0.42, + "learning_rate": "1.6737e-04", + "loss": 0.9431, + "slid_loss": 0.9923, + "step": 836, + "time": 13.33 + }, + { + "epoch": 0.42, + "learning_rate": "1.6757e-04", + "loss": 1.0484, + "slid_loss": 0.9937, + "step": 837, + "time": 12.21 + }, + { + "epoch": 0.42, + "learning_rate": "1.6777e-04", + "loss": 1.0223, + "slid_loss": 0.9949, + "step": 838, + "time": 13.96 + }, + { + "epoch": 0.42, + "learning_rate": "1.6797e-04", + "loss": 0.6853, + "slid_loss": 0.9928, + "step": 839, + "time": 11.03 + }, + { + "epoch": 0.42, + "learning_rate": "1.6817e-04", + "loss": 1.0876, + "slid_loss": 0.9935, + "step": 840, + "time": 12.19 + }, + { + "epoch": 0.42, + "learning_rate": "1.6837e-04", + "loss": 0.9407, + "slid_loss": 0.9933, + "step": 841, + "time": 13.01 + }, + { + "epoch": 0.42, + "learning_rate": "1.6857e-04", + "loss": 1.1554, + "slid_loss": 0.9936, + "step": 842, + "time": 12.79 + }, + { + "epoch": 0.42, + "learning_rate": "1.6877e-04", + "loss": 0.8587, + "slid_loss": 0.9908, + "step": 843, + "time": 12.79 + }, + { + "epoch": 0.42, + "learning_rate": "1.6897e-04", + "loss": 1.0211, + "slid_loss": 0.9904, + "step": 844, + "time": 13.95 + }, + { + "epoch": 0.42, + "learning_rate": "1.6917e-04", + "loss": 0.9841, + "slid_loss": 0.9909, + "step": 845, + "time": 13.16 + }, + { + "epoch": 0.42, + "learning_rate": "1.6937e-04", + "loss": 0.9368, + "slid_loss": 0.989, + "step": 846, + "time": 12.76 + }, + { + "epoch": 0.42, + "learning_rate": "1.6957e-04", + "loss": 1.2659, + "slid_loss": 0.9926, + "step": 847, + "time": 12.88 + }, + { + "epoch": 0.42, + "learning_rate": "1.6977e-04", + "loss": 1.0786, + "slid_loss": 0.994, + "step": 848, + "time": 12.93 + }, + { + "epoch": 0.43, + "learning_rate": "1.6997e-04", + "loss": 1.0253, + "slid_loss": 0.9967, + "step": 849, + "time": 13.15 + }, + { + "epoch": 0.43, + "learning_rate": "1.7017e-04", + "loss": 1.1558, + "slid_loss": 0.9988, + "step": 850, + "time": 11.56 + }, + { + "epoch": 0.43, + "learning_rate": "1.7037e-04", + "loss": 1.1061, + "slid_loss": 0.9995, + "step": 851, + "time": 12.88 + }, + { + "epoch": 0.43, + "learning_rate": "1.7057e-04", + "loss": 0.89, + "slid_loss": 0.9992, + "step": 852, + "time": 12.29 + }, + { + "epoch": 0.43, + "learning_rate": "1.7077e-04", + "loss": 1.0161, + "slid_loss": 1.0, + "step": 853, + "time": 11.23 + }, + { + "epoch": 0.43, + "learning_rate": "1.7097e-04", + "loss": 1.0536, + "slid_loss": 1.0019, + "step": 854, + "time": 12.64 + }, + { + "epoch": 0.43, + "learning_rate": "1.7117e-04", + "loss": 1.0295, + "slid_loss": 1.0027, + "step": 855, + "time": 12.86 + }, + { + "epoch": 0.43, + "learning_rate": "1.7137e-04", + "loss": 0.8385, + "slid_loss": 1.0013, + "step": 856, + "time": 11.48 + }, + { + "epoch": 0.43, + "learning_rate": "1.7157e-04", + "loss": 0.9722, + "slid_loss": 1.0018, + "step": 857, + "time": 13.26 + }, + { + "epoch": 0.43, + "learning_rate": "1.7177e-04", + "loss": 0.9868, + "slid_loss": 1.0008, + "step": 858, + "time": 10.76 + }, + { + "epoch": 0.43, + "learning_rate": "1.7197e-04", + "loss": 1.0207, + "slid_loss": 1.0018, + "step": 859, + "time": 12.93 + }, + { + "epoch": 0.43, + "learning_rate": "1.7217e-04", + "loss": 0.8272, + "slid_loss": 0.9992, + "step": 860, + "time": 11.28 + }, + { + "epoch": 0.43, + "learning_rate": "1.7237e-04", + "loss": 0.9593, + "slid_loss": 1.0001, + "step": 861, + "time": 11.71 + }, + { + "epoch": 0.43, + "learning_rate": "1.7257e-04", + "loss": 1.1016, + "slid_loss": 1.0026, + "step": 862, + "time": 11.5 + }, + { + "epoch": 0.43, + "learning_rate": "1.7277e-04", + "loss": 0.9424, + "slid_loss": 1.0043, + "step": 863, + "time": 12.04 + }, + { + "epoch": 0.43, + "learning_rate": "1.7297e-04", + "loss": 0.7802, + "slid_loss": 1.0021, + "step": 864, + "time": 13.26 + }, + { + "epoch": 0.43, + "learning_rate": "1.7317e-04", + "loss": 1.0711, + "slid_loss": 1.0024, + "step": 865, + "time": 12.75 + }, + { + "epoch": 0.43, + "learning_rate": "1.7337e-04", + "loss": 0.9024, + "slid_loss": 1.0002, + "step": 866, + "time": 13.33 + }, + { + "epoch": 0.43, + "learning_rate": "1.7357e-04", + "loss": 1.0532, + "slid_loss": 1.0008, + "step": 867, + "time": 13.08 + }, + { + "epoch": 0.43, + "learning_rate": "1.7377e-04", + "loss": 0.9445, + "slid_loss": 1.0033, + "step": 868, + "time": 13.38 + }, + { + "epoch": 0.44, + "learning_rate": "1.7397e-04", + "loss": 1.094, + "slid_loss": 1.0027, + "step": 869, + "time": 13.7 + }, + { + "epoch": 0.44, + "learning_rate": "1.7417e-04", + "loss": 0.9928, + "slid_loss": 1.0028, + "step": 870, + "time": 10.41 + }, + { + "epoch": 0.44, + "learning_rate": "1.7437e-04", + "loss": 0.9968, + "slid_loss": 1.0026, + "step": 871, + "time": 11.34 + }, + { + "epoch": 0.44, + "learning_rate": "1.7457e-04", + "loss": 1.1157, + "slid_loss": 1.0061, + "step": 872, + "time": 11.4 + }, + { + "epoch": 0.44, + "learning_rate": "1.7477e-04", + "loss": 1.0351, + "slid_loss": 1.0063, + "step": 873, + "time": 12.88 + }, + { + "epoch": 0.44, + "learning_rate": "1.7497e-04", + "loss": 0.8377, + "slid_loss": 1.0063, + "step": 874, + "time": 10.28 + }, + { + "epoch": 0.44, + "learning_rate": "1.7518e-04", + "loss": 0.968, + "slid_loss": 1.0058, + "step": 875, + "time": 10.67 + }, + { + "epoch": 0.44, + "learning_rate": "1.7538e-04", + "loss": 1.0009, + "slid_loss": 1.0061, + "step": 876, + "time": 11.4 + }, + { + "epoch": 0.44, + "learning_rate": "1.7558e-04", + "loss": 1.0325, + "slid_loss": 1.0064, + "step": 877, + "time": 13.04 + }, + { + "epoch": 0.44, + "learning_rate": "1.7578e-04", + "loss": 1.0159, + "slid_loss": 1.0085, + "step": 878, + "time": 10.76 + }, + { + "epoch": 0.44, + "learning_rate": "1.7598e-04", + "loss": 0.7395, + "slid_loss": 1.0043, + "step": 879, + "time": 11.3 + }, + { + "epoch": 0.44, + "learning_rate": "1.7618e-04", + "loss": 0.9787, + "slid_loss": 1.0051, + "step": 880, + "time": 10.66 + }, + { + "epoch": 0.44, + "learning_rate": "1.7638e-04", + "loss": 1.0721, + "slid_loss": 1.005, + "step": 881, + "time": 13.11 + }, + { + "epoch": 0.44, + "learning_rate": "1.7658e-04", + "loss": 1.1644, + "slid_loss": 1.0062, + "step": 882, + "time": 13.16 + }, + { + "epoch": 0.44, + "learning_rate": "1.7678e-04", + "loss": 0.9703, + "slid_loss": 1.0058, + "step": 883, + "time": 11.99 + }, + { + "epoch": 0.44, + "learning_rate": "1.7698e-04", + "loss": 0.8464, + "slid_loss": 1.0031, + "step": 884, + "time": 14.11 + }, + { + "epoch": 0.44, + "learning_rate": "1.7718e-04", + "loss": 0.8356, + "slid_loss": 1.0011, + "step": 885, + "time": 14.03 + }, + { + "epoch": 0.44, + "learning_rate": "1.7738e-04", + "loss": 1.036, + "slid_loss": 1.002, + "step": 886, + "time": 13.17 + }, + { + "epoch": 0.44, + "learning_rate": "1.7758e-04", + "loss": 0.8953, + "slid_loss": 1.0008, + "step": 887, + "time": 12.14 + }, + { + "epoch": 0.44, + "learning_rate": "1.7778e-04", + "loss": 1.062, + "slid_loss": 1.0004, + "step": 888, + "time": 11.35 + }, + { + "epoch": 0.45, + "learning_rate": "1.7798e-04", + "loss": 0.974, + "slid_loss": 1.0011, + "step": 889, + "time": 13.36 + }, + { + "epoch": 0.45, + "learning_rate": "1.7818e-04", + "loss": 0.7716, + "slid_loss": 0.9987, + "step": 890, + "time": 14.64 + }, + { + "epoch": 0.45, + "learning_rate": "1.7838e-04", + "loss": 0.9333, + "slid_loss": 0.998, + "step": 891, + "time": 12.65 + }, + { + "epoch": 0.45, + "learning_rate": "1.7858e-04", + "loss": 0.993, + "slid_loss": 0.9979, + "step": 892, + "time": 13.27 + }, + { + "epoch": 0.45, + "learning_rate": "1.7878e-04", + "loss": 1.015, + "slid_loss": 0.9992, + "step": 893, + "time": 11.07 + }, + { + "epoch": 0.45, + "learning_rate": "1.7898e-04", + "loss": 0.9053, + "slid_loss": 0.9982, + "step": 894, + "time": 11.85 + }, + { + "epoch": 0.45, + "learning_rate": "1.7918e-04", + "loss": 1.1113, + "slid_loss": 1.0001, + "step": 895, + "time": 13.65 + }, + { + "epoch": 0.45, + "learning_rate": "1.7938e-04", + "loss": 0.8451, + "slid_loss": 0.9977, + "step": 896, + "time": 13.29 + }, + { + "epoch": 0.45, + "learning_rate": "1.7958e-04", + "loss": 1.0186, + "slid_loss": 0.9974, + "step": 897, + "time": 12.99 + }, + { + "epoch": 0.45, + "learning_rate": "1.7978e-04", + "loss": 1.1995, + "slid_loss": 0.9982, + "step": 898, + "time": 13.8 + }, + { + "epoch": 0.45, + "learning_rate": "1.7998e-04", + "loss": 1.1038, + "slid_loss": 0.9995, + "step": 899, + "time": 11.34 + }, + { + "epoch": 0.45, + "learning_rate": "1.8018e-04", + "loss": 0.8819, + "slid_loss": 0.999, + "step": 900, + "time": 12.78 + }, + { + "epoch": 0.45, + "learning_rate": "1.8038e-04", + "loss": 1.0569, + "slid_loss": 0.998, + "step": 901, + "time": 11.05 + }, + { + "epoch": 0.45, + "learning_rate": "1.8058e-04", + "loss": 0.8932, + "slid_loss": 0.9972, + "step": 902, + "time": 13.52 + }, + { + "epoch": 0.45, + "learning_rate": "1.8078e-04", + "loss": 1.1022, + "slid_loss": 0.9972, + "step": 903, + "time": 11.82 + }, + { + "epoch": 0.45, + "learning_rate": "1.8098e-04", + "loss": 1.2032, + "slid_loss": 0.9982, + "step": 904, + "time": 12.02 + }, + { + "epoch": 0.45, + "learning_rate": "1.8118e-04", + "loss": 1.0504, + "slid_loss": 0.9984, + "step": 905, + "time": 11.47 + }, + { + "epoch": 0.45, + "learning_rate": "1.8138e-04", + "loss": 1.2869, + "slid_loss": 1.0014, + "step": 906, + "time": 13.76 + }, + { + "epoch": 0.45, + "learning_rate": "1.8158e-04", + "loss": 1.0623, + "slid_loss": 1.0024, + "step": 907, + "time": 11.68 + }, + { + "epoch": 0.45, + "learning_rate": "1.8178e-04", + "loss": 0.913, + "slid_loss": 1.0024, + "step": 908, + "time": 11.78 + }, + { + "epoch": 0.46, + "learning_rate": "1.8198e-04", + "loss": 0.9398, + "slid_loss": 1.0019, + "step": 909, + "time": 13.27 + }, + { + "epoch": 0.46, + "learning_rate": "1.8218e-04", + "loss": 0.8499, + "slid_loss": 0.9986, + "step": 910, + "time": 10.81 + }, + { + "epoch": 0.46, + "learning_rate": "1.8238e-04", + "loss": 0.9029, + "slid_loss": 0.9958, + "step": 911, + "time": 13.89 + }, + { + "epoch": 0.46, + "learning_rate": "1.8258e-04", + "loss": 1.1753, + "slid_loss": 0.9983, + "step": 912, + "time": 12.41 + }, + { + "epoch": 0.46, + "learning_rate": "1.8278e-04", + "loss": 1.0533, + "slid_loss": 0.9989, + "step": 913, + "time": 12.77 + }, + { + "epoch": 0.46, + "learning_rate": "1.8298e-04", + "loss": 1.0141, + "slid_loss": 0.9978, + "step": 914, + "time": 12.27 + }, + { + "epoch": 0.46, + "learning_rate": "1.8318e-04", + "loss": 0.8421, + "slid_loss": 0.9961, + "step": 915, + "time": 10.35 + }, + { + "epoch": 0.46, + "learning_rate": "1.8338e-04", + "loss": 0.8871, + "slid_loss": 0.9949, + "step": 916, + "time": 10.99 + }, + { + "epoch": 0.46, + "learning_rate": "1.8358e-04", + "loss": 0.8023, + "slid_loss": 0.9933, + "step": 917, + "time": 13.93 + }, + { + "epoch": 0.46, + "learning_rate": "1.8378e-04", + "loss": 1.1054, + "slid_loss": 0.9958, + "step": 918, + "time": 13.28 + }, + { + "epoch": 0.46, + "learning_rate": "1.8398e-04", + "loss": 0.9073, + "slid_loss": 0.9937, + "step": 919, + "time": 13.13 + }, + { + "epoch": 0.46, + "learning_rate": "1.8418e-04", + "loss": 1.0613, + "slid_loss": 0.9941, + "step": 920, + "time": 13.03 + }, + { + "epoch": 0.46, + "learning_rate": "1.8438e-04", + "loss": 0.9221, + "slid_loss": 0.9941, + "step": 921, + "time": 10.54 + }, + { + "epoch": 0.46, + "learning_rate": "1.8458e-04", + "loss": 0.9801, + "slid_loss": 0.9931, + "step": 922, + "time": 11.35 + }, + { + "epoch": 0.46, + "learning_rate": "1.8478e-04", + "loss": 1.0001, + "slid_loss": 0.9943, + "step": 923, + "time": 13.38 + }, + { + "epoch": 0.46, + "learning_rate": "1.8498e-04", + "loss": 0.8788, + "slid_loss": 0.9937, + "step": 924, + "time": 12.77 + }, + { + "epoch": 0.46, + "learning_rate": "1.8519e-04", + "loss": 0.9381, + "slid_loss": 0.9933, + "step": 925, + "time": 10.78 + }, + { + "epoch": 0.46, + "learning_rate": "1.8539e-04", + "loss": 0.9462, + "slid_loss": 0.9923, + "step": 926, + "time": 11.32 + }, + { + "epoch": 0.46, + "learning_rate": "1.8559e-04", + "loss": 1.0126, + "slid_loss": 0.9912, + "step": 927, + "time": 11.39 + }, + { + "epoch": 0.46, + "learning_rate": "1.8579e-04", + "loss": 0.8957, + "slid_loss": 0.9907, + "step": 928, + "time": 13.65 + }, + { + "epoch": 0.47, + "learning_rate": "1.8599e-04", + "loss": 0.9074, + "slid_loss": 0.989, + "step": 929, + "time": 12.05 + }, + { + "epoch": 0.47, + "learning_rate": "1.8619e-04", + "loss": 0.9293, + "slid_loss": 0.9874, + "step": 930, + "time": 12.16 + }, + { + "epoch": 0.47, + "learning_rate": "1.8639e-04", + "loss": 1.0061, + "slid_loss": 0.9879, + "step": 931, + "time": 11.89 + }, + { + "epoch": 0.47, + "learning_rate": "1.8659e-04", + "loss": 0.9501, + "slid_loss": 0.9875, + "step": 932, + "time": 13.22 + }, + { + "epoch": 0.47, + "learning_rate": "1.8679e-04", + "loss": 1.0155, + "slid_loss": 0.9867, + "step": 933, + "time": 13.67 + }, + { + "epoch": 0.47, + "learning_rate": "1.8699e-04", + "loss": 1.0143, + "slid_loss": 0.9865, + "step": 934, + "time": 12.22 + }, + { + "epoch": 0.47, + "learning_rate": "1.8719e-04", + "loss": 0.8843, + "slid_loss": 0.9854, + "step": 935, + "time": 11.47 + }, + { + "epoch": 0.47, + "learning_rate": "1.8739e-04", + "loss": 1.1457, + "slid_loss": 0.9874, + "step": 936, + "time": 11.55 + }, + { + "epoch": 0.47, + "learning_rate": "1.8759e-04", + "loss": 0.9521, + "slid_loss": 0.9864, + "step": 937, + "time": 13.18 + }, + { + "epoch": 0.47, + "learning_rate": "1.8779e-04", + "loss": 0.9237, + "slid_loss": 0.9854, + "step": 938, + "time": 14.03 + }, + { + "epoch": 0.47, + "learning_rate": "1.8799e-04", + "loss": 0.9677, + "slid_loss": 0.9883, + "step": 939, + "time": 13.23 + }, + { + "epoch": 0.47, + "learning_rate": "1.8819e-04", + "loss": 1.0528, + "slid_loss": 0.9879, + "step": 940, + "time": 13.34 + }, + { + "epoch": 0.47, + "learning_rate": "1.8839e-04", + "loss": 1.0544, + "slid_loss": 0.9891, + "step": 941, + "time": 13.63 + }, + { + "epoch": 0.47, + "learning_rate": "1.8859e-04", + "loss": 0.8545, + "slid_loss": 0.986, + "step": 942, + "time": 13.59 + }, + { + "epoch": 0.47, + "learning_rate": "1.8879e-04", + "loss": 0.8217, + "slid_loss": 0.9857, + "step": 943, + "time": 12.78 + }, + { + "epoch": 0.47, + "learning_rate": "1.8899e-04", + "loss": 1.0863, + "slid_loss": 0.9863, + "step": 944, + "time": 13.38 + }, + { + "epoch": 0.47, + "learning_rate": "1.8919e-04", + "loss": 0.9395, + "slid_loss": 0.9859, + "step": 945, + "time": 11.92 + }, + { + "epoch": 0.47, + "learning_rate": "1.8939e-04", + "loss": 0.9759, + "slid_loss": 0.9863, + "step": 946, + "time": 13.52 + }, + { + "epoch": 0.47, + "learning_rate": "1.8959e-04", + "loss": 0.8751, + "slid_loss": 0.9824, + "step": 947, + "time": 11.92 + }, + { + "epoch": 0.47, + "learning_rate": "1.8979e-04", + "loss": 0.9168, + "slid_loss": 0.9807, + "step": 948, + "time": 11.02 + }, + { + "epoch": 0.48, + "learning_rate": "1.8999e-04", + "loss": 1.1771, + "slid_loss": 0.9823, + "step": 949, + "time": 14.01 + }, + { + "epoch": 0.48, + "learning_rate": "1.9019e-04", + "loss": 0.8459, + "slid_loss": 0.9792, + "step": 950, + "time": 13.27 + }, + { + "epoch": 0.48, + "learning_rate": "1.9039e-04", + "loss": 1.027, + "slid_loss": 0.9784, + "step": 951, + "time": 11.71 + }, + { + "epoch": 0.48, + "learning_rate": "1.9059e-04", + "loss": 1.017, + "slid_loss": 0.9796, + "step": 952, + "time": 10.54 + }, + { + "epoch": 0.48, + "learning_rate": "1.9079e-04", + "loss": 1.045, + "slid_loss": 0.9799, + "step": 953, + "time": 12.74 + }, + { + "epoch": 0.48, + "learning_rate": "1.9099e-04", + "loss": 0.9951, + "slid_loss": 0.9793, + "step": 954, + "time": 13.71 + }, + { + "epoch": 0.48, + "learning_rate": "1.9119e-04", + "loss": 1.0681, + "slid_loss": 0.9797, + "step": 955, + "time": 12.47 + }, + { + "epoch": 0.48, + "learning_rate": "1.9139e-04", + "loss": 1.1254, + "slid_loss": 0.9826, + "step": 956, + "time": 12.72 + }, + { + "epoch": 0.48, + "learning_rate": "1.9159e-04", + "loss": 0.9721, + "slid_loss": 0.9826, + "step": 957, + "time": 12.91 + }, + { + "epoch": 0.48, + "learning_rate": "1.9179e-04", + "loss": 1.0831, + "slid_loss": 0.9836, + "step": 958, + "time": 13.41 + }, + { + "epoch": 0.48, + "learning_rate": "1.9199e-04", + "loss": 0.816, + "slid_loss": 0.9815, + "step": 959, + "time": 11.63 + }, + { + "epoch": 0.48, + "learning_rate": "1.9219e-04", + "loss": 0.9045, + "slid_loss": 0.9823, + "step": 960, + "time": 13.32 + }, + { + "epoch": 0.48, + "learning_rate": "1.9239e-04", + "loss": 1.0865, + "slid_loss": 0.9836, + "step": 961, + "time": 11.79 + }, + { + "epoch": 0.48, + "learning_rate": "1.9259e-04", + "loss": 0.6912, + "slid_loss": 0.9795, + "step": 962, + "time": 12.96 + }, + { + "epoch": 0.48, + "learning_rate": "1.9279e-04", + "loss": 0.9066, + "slid_loss": 0.9791, + "step": 963, + "time": 13.93 + }, + { + "epoch": 0.48, + "learning_rate": "1.9299e-04", + "loss": 0.9407, + "slid_loss": 0.9807, + "step": 964, + "time": 11.83 + }, + { + "epoch": 0.48, + "learning_rate": "1.9319e-04", + "loss": 1.0147, + "slid_loss": 0.9801, + "step": 965, + "time": 13.19 + }, + { + "epoch": 0.48, + "learning_rate": "1.9339e-04", + "loss": 0.8707, + "slid_loss": 0.9798, + "step": 966, + "time": 13.72 + }, + { + "epoch": 0.48, + "learning_rate": "1.9359e-04", + "loss": 1.0326, + "slid_loss": 0.9796, + "step": 967, + "time": 13.27 + }, + { + "epoch": 0.48, + "learning_rate": "1.9379e-04", + "loss": 1.1425, + "slid_loss": 0.9816, + "step": 968, + "time": 11.78 + }, + { + "epoch": 0.49, + "learning_rate": "1.9399e-04", + "loss": 0.947, + "slid_loss": 0.9801, + "step": 969, + "time": 13.14 + }, + { + "epoch": 0.49, + "learning_rate": "1.9419e-04", + "loss": 1.098, + "slid_loss": 0.9812, + "step": 970, + "time": 13.27 + }, + { + "epoch": 0.49, + "learning_rate": "1.9439e-04", + "loss": 0.9593, + "slid_loss": 0.9808, + "step": 971, + "time": 13.77 + }, + { + "epoch": 0.49, + "learning_rate": "1.9459e-04", + "loss": 0.8151, + "slid_loss": 0.9778, + "step": 972, + "time": 13.39 + }, + { + "epoch": 0.49, + "learning_rate": "1.9479e-04", + "loss": 1.0049, + "slid_loss": 0.9775, + "step": 973, + "time": 13.25 + }, + { + "epoch": 0.49, + "learning_rate": "1.9499e-04", + "loss": 0.9426, + "slid_loss": 0.9785, + "step": 974, + "time": 13.02 + }, + { + "epoch": 0.49, + "learning_rate": "1.9520e-04", + "loss": 1.0488, + "slid_loss": 0.9794, + "step": 975, + "time": 12.77 + }, + { + "epoch": 0.49, + "learning_rate": "1.9540e-04", + "loss": 1.0544, + "slid_loss": 0.9799, + "step": 976, + "time": 12.73 + }, + { + "epoch": 0.49, + "learning_rate": "1.9560e-04", + "loss": 1.0476, + "slid_loss": 0.98, + "step": 977, + "time": 12.23 + }, + { + "epoch": 0.49, + "learning_rate": "1.9580e-04", + "loss": 1.0649, + "slid_loss": 0.9805, + "step": 978, + "time": 14.09 + }, + { + "epoch": 0.49, + "learning_rate": "1.9600e-04", + "loss": 1.0402, + "slid_loss": 0.9835, + "step": 979, + "time": 12.25 + }, + { + "epoch": 0.49, + "learning_rate": "1.9620e-04", + "loss": 1.0792, + "slid_loss": 0.9845, + "step": 980, + "time": 10.7 + }, + { + "epoch": 0.49, + "learning_rate": "1.9640e-04", + "loss": 1.102, + "slid_loss": 0.9848, + "step": 981, + "time": 13.73 + }, + { + "epoch": 0.49, + "learning_rate": "1.9660e-04", + "loss": 0.9634, + "slid_loss": 0.9828, + "step": 982, + "time": 13.56 + }, + { + "epoch": 0.49, + "learning_rate": "1.9680e-04", + "loss": 1.1488, + "slid_loss": 0.9846, + "step": 983, + "time": 13.54 + }, + { + "epoch": 0.49, + "learning_rate": "1.9700e-04", + "loss": 0.9659, + "slid_loss": 0.9858, + "step": 984, + "time": 13.43 + }, + { + "epoch": 0.49, + "learning_rate": "1.9720e-04", + "loss": 1.0356, + "slid_loss": 0.9878, + "step": 985, + "time": 14.65 + }, + { + "epoch": 0.49, + "learning_rate": "1.9740e-04", + "loss": 0.9121, + "slid_loss": 0.9866, + "step": 986, + "time": 11.16 + }, + { + "epoch": 0.49, + "learning_rate": "1.9760e-04", + "loss": 0.8153, + "slid_loss": 0.9858, + "step": 987, + "time": 13.05 + }, + { + "epoch": 0.49, + "learning_rate": "1.9780e-04", + "loss": 1.0199, + "slid_loss": 0.9853, + "step": 988, + "time": 14.16 + }, + { + "epoch": 0.5, + "learning_rate": "1.9800e-04", + "loss": 0.8002, + "slid_loss": 0.9836, + "step": 989, + "time": 10.85 + }, + { + "epoch": 0.5, + "learning_rate": "1.9820e-04", + "loss": 0.7808, + "slid_loss": 0.9837, + "step": 990, + "time": 12.37 + }, + { + "epoch": 0.5, + "learning_rate": "1.9840e-04", + "loss": 1.0212, + "slid_loss": 0.9846, + "step": 991, + "time": 13.23 + }, + { + "epoch": 0.5, + "learning_rate": "1.9860e-04", + "loss": 1.0404, + "slid_loss": 0.9851, + "step": 992, + "time": 13.33 + }, + { + "epoch": 0.5, + "learning_rate": "1.9880e-04", + "loss": 0.9217, + "slid_loss": 0.9841, + "step": 993, + "time": 11.37 + }, + { + "epoch": 0.5, + "learning_rate": "1.9900e-04", + "loss": 0.9629, + "slid_loss": 0.9847, + "step": 994, + "time": 12.96 + }, + { + "epoch": 0.5, + "learning_rate": "1.9920e-04", + "loss": 0.9231, + "slid_loss": 0.9828, + "step": 995, + "time": 12.01 + }, + { + "epoch": 0.5, + "learning_rate": "1.9940e-04", + "loss": 0.8086, + "slid_loss": 0.9825, + "step": 996, + "time": 13.67 + }, + { + "epoch": 0.5, + "learning_rate": "1.9960e-04", + "loss": 1.0058, + "slid_loss": 0.9823, + "step": 997, + "time": 13.15 + }, + { + "epoch": 0.5, + "learning_rate": "1.9980e-04", + "loss": 0.9158, + "slid_loss": 0.9795, + "step": 998, + "time": 13.06 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 0.9405, + "slid_loss": 0.9779, + "step": 999, + "time": 12.42 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 0.9728, + "slid_loss": 0.9788, + "step": 1000, + "time": 11.52 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 0.7393, + "slid_loss": 0.9756, + "step": 1001, + "time": 14.17 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 0.9172, + "slid_loss": 0.9758, + "step": 1002, + "time": 13.69 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 1.0817, + "slid_loss": 0.9756, + "step": 1003, + "time": 14.13 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 0.8811, + "slid_loss": 0.9724, + "step": 1004, + "time": 10.67 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 1.1541, + "slid_loss": 0.9734, + "step": 1005, + "time": 13.55 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 0.48, + "slid_loss": 0.9654, + "step": 1006, + "time": 11.35 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 1.113, + "slid_loss": 0.9659, + "step": 1007, + "time": 12.54 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 1.0816, + "slid_loss": 0.9676, + "step": 1008, + "time": 13.46 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0703, + "slid_loss": 0.9689, + "step": 1009, + "time": 13.08 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.8944, + "slid_loss": 0.9693, + "step": 1010, + "time": 11.98 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.8544, + "slid_loss": 0.9688, + "step": 1011, + "time": 13.86 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.6582, + "slid_loss": 0.9637, + "step": 1012, + "time": 13.91 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.8141, + "slid_loss": 0.9613, + "step": 1013, + "time": 14.06 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.9662, + "slid_loss": 0.9608, + "step": 1014, + "time": 10.44 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0418, + "slid_loss": 0.9628, + "step": 1015, + "time": 10.99 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.915, + "slid_loss": 0.9631, + "step": 1016, + "time": 12.99 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.1316, + "slid_loss": 0.9664, + "step": 1017, + "time": 13.78 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.8006, + "slid_loss": 0.9633, + "step": 1018, + "time": 12.69 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0105, + "slid_loss": 0.9643, + "step": 1019, + "time": 12.64 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.059, + "slid_loss": 0.9643, + "step": 1020, + "time": 13.48 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.9283, + "slid_loss": 0.9644, + "step": 1021, + "time": 11.92 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.9157, + "slid_loss": 0.9637, + "step": 1022, + "time": 13.22 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.9837, + "slid_loss": 0.9636, + "step": 1023, + "time": 12.76 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0288, + "slid_loss": 0.9651, + "step": 1024, + "time": 12.8 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.9816, + "slid_loss": 0.9655, + "step": 1025, + "time": 13.25 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.9998, + "slid_loss": 0.966, + "step": 1026, + "time": 10.81 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0071, + "slid_loss": 0.966, + "step": 1027, + "time": 11.7 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.7527, + "slid_loss": 0.9646, + "step": 1028, + "time": 14.09 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.8627, + "slid_loss": 0.9641, + "step": 1029, + "time": 11.61 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.9446, + "slid_loss": 0.9643, + "step": 1030, + "time": 14.03 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.8675, + "slid_loss": 0.9629, + "step": 1031, + "time": 14.0 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.7108, + "slid_loss": 0.9605, + "step": 1032, + "time": 10.77 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.9815, + "slid_loss": 0.9601, + "step": 1033, + "time": 11.26 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 1.0481, + "slid_loss": 0.9605, + "step": 1034, + "time": 13.93 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.9047, + "slid_loss": 0.9607, + "step": 1035, + "time": 10.9 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.84, + "slid_loss": 0.9576, + "step": 1036, + "time": 12.38 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.9991, + "slid_loss": 0.9581, + "step": 1037, + "time": 13.3 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.8995, + "slid_loss": 0.9579, + "step": 1038, + "time": 12.91 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 1.0454, + "slid_loss": 0.9586, + "step": 1039, + "time": 11.96 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 1.0141, + "slid_loss": 0.9582, + "step": 1040, + "time": 11.55 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 0.9779, + "slid_loss": 0.9575, + "step": 1041, + "time": 13.79 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 0.8849, + "slid_loss": 0.9578, + "step": 1042, + "time": 12.76 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 1.1703, + "slid_loss": 0.9613, + "step": 1043, + "time": 12.88 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 1.0625, + "slid_loss": 0.961, + "step": 1044, + "time": 12.13 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 0.8516, + "slid_loss": 0.9602, + "step": 1045, + "time": 12.28 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 0.9203, + "slid_loss": 0.9596, + "step": 1046, + "time": 13.0 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 0.7755, + "slid_loss": 0.9586, + "step": 1047, + "time": 13.29 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 0.9682, + "slid_loss": 0.9591, + "step": 1048, + "time": 12.91 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0741, + "slid_loss": 0.9581, + "step": 1049, + "time": 13.73 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.8716, + "slid_loss": 0.9583, + "step": 1050, + "time": 10.94 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.8088, + "slid_loss": 0.9562, + "step": 1051, + "time": 13.36 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0283, + "slid_loss": 0.9563, + "step": 1052, + "time": 13.23 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.8953, + "slid_loss": 0.9548, + "step": 1053, + "time": 13.22 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.989, + "slid_loss": 0.9547, + "step": 1054, + "time": 11.01 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0536, + "slid_loss": 0.9546, + "step": 1055, + "time": 12.78 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0536, + "slid_loss": 0.9539, + "step": 1056, + "time": 11.81 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0161, + "slid_loss": 0.9543, + "step": 1057, + "time": 10.81 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.8758, + "slid_loss": 0.9522, + "step": 1058, + "time": 10.83 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.956, + "slid_loss": 0.9536, + "step": 1059, + "time": 13.76 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.8679, + "slid_loss": 0.9533, + "step": 1060, + "time": 11.23 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.903, + "slid_loss": 0.9514, + "step": 1061, + "time": 13.34 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.8999, + "slid_loss": 0.9535, + "step": 1062, + "time": 11.83 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.8102, + "slid_loss": 0.9525, + "step": 1063, + "time": 13.69 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0386, + "slid_loss": 0.9535, + "step": 1064, + "time": 13.42 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.8721, + "slid_loss": 0.9521, + "step": 1065, + "time": 13.47 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0565, + "slid_loss": 0.954, + "step": 1066, + "time": 12.86 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.2189, + "slid_loss": 0.9558, + "step": 1067, + "time": 12.39 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.9757, + "slid_loss": 0.9542, + "step": 1068, + "time": 13.89 + }, + { + "epoch": 0.54, + "learning_rate": "1.9999e-04", + "loss": 1.0037, + "slid_loss": 0.9547, + "step": 1069, + "time": 13.06 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.8542, + "slid_loss": 0.9523, + "step": 1070, + "time": 12.21 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.116, + "slid_loss": 0.9538, + "step": 1071, + "time": 13.84 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.9717, + "slid_loss": 0.9554, + "step": 1072, + "time": 11.75 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.9439, + "slid_loss": 0.9548, + "step": 1073, + "time": 13.35 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.8501, + "slid_loss": 0.9539, + "step": 1074, + "time": 10.91 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.1141, + "slid_loss": 0.9545, + "step": 1075, + "time": 12.55 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.2376, + "slid_loss": 0.9564, + "step": 1076, + "time": 13.47 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0369, + "slid_loss": 0.9563, + "step": 1077, + "time": 13.01 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.9201, + "slid_loss": 0.9548, + "step": 1078, + "time": 11.83 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.982, + "slid_loss": 0.9542, + "step": 1079, + "time": 13.65 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.1529, + "slid_loss": 0.955, + "step": 1080, + "time": 13.62 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.8053, + "slid_loss": 0.952, + "step": 1081, + "time": 12.26 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0179, + "slid_loss": 0.9525, + "step": 1082, + "time": 11.36 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0778, + "slid_loss": 0.9518, + "step": 1083, + "time": 11.53 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.9989, + "slid_loss": 0.9522, + "step": 1084, + "time": 12.79 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0135, + "slid_loss": 0.9519, + "step": 1085, + "time": 12.7 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0497, + "slid_loss": 0.9533, + "step": 1086, + "time": 12.8 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0241, + "slid_loss": 0.9554, + "step": 1087, + "time": 13.89 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0997, + "slid_loss": 0.9562, + "step": 1088, + "time": 13.43 + }, + { + "epoch": 0.55, + "learning_rate": "1.9998e-04", + "loss": 0.9065, + "slid_loss": 0.9573, + "step": 1089, + "time": 13.88 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.6196, + "slid_loss": 0.9557, + "step": 1090, + "time": 12.57 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.7485, + "slid_loss": 0.9529, + "step": 1091, + "time": 12.7 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.9271, + "slid_loss": 0.9518, + "step": 1092, + "time": 11.67 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.0643, + "slid_loss": 0.9532, + "step": 1093, + "time": 13.51 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.1231, + "slid_loss": 0.9548, + "step": 1094, + "time": 13.26 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.0678, + "slid_loss": 0.9563, + "step": 1095, + "time": 13.04 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.8706, + "slid_loss": 0.9569, + "step": 1096, + "time": 12.65 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.1153, + "slid_loss": 0.958, + "step": 1097, + "time": 13.76 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.8853, + "slid_loss": 0.9577, + "step": 1098, + "time": 13.92 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.8845, + "slid_loss": 0.9571, + "step": 1099, + "time": 12.09 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.0112, + "slid_loss": 0.9575, + "step": 1100, + "time": 11.47 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.9016, + "slid_loss": 0.9591, + "step": 1101, + "time": 10.57 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.8857, + "slid_loss": 0.9588, + "step": 1102, + "time": 13.56 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.1432, + "slid_loss": 0.9594, + "step": 1103, + "time": 11.18 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.1351, + "slid_loss": 0.962, + "step": 1104, + "time": 11.02 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.1862, + "slid_loss": 0.9623, + "step": 1105, + "time": 13.79 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.8701, + "slid_loss": 0.9662, + "step": 1106, + "time": 14.65 + }, + { + "epoch": 0.55, + "learning_rate": "1.9996e-04", + "loss": 1.0495, + "slid_loss": 0.9655, + "step": 1107, + "time": 12.83 + }, + { + "epoch": 0.55, + "learning_rate": "1.9996e-04", + "loss": 1.2972, + "slid_loss": 0.9677, + "step": 1108, + "time": 11.55 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.0207, + "slid_loss": 0.9672, + "step": 1109, + "time": 13.01 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.0444, + "slid_loss": 0.9687, + "step": 1110, + "time": 13.01 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.1148, + "slid_loss": 0.9713, + "step": 1111, + "time": 13.95 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.1082, + "slid_loss": 0.9758, + "step": 1112, + "time": 12.95 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.0004, + "slid_loss": 0.9777, + "step": 1113, + "time": 13.47 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.0342, + "slid_loss": 0.9784, + "step": 1114, + "time": 13.66 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 0.8196, + "slid_loss": 0.9761, + "step": 1115, + "time": 13.64 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 0.9132, + "slid_loss": 0.9761, + "step": 1116, + "time": 12.78 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 0.9672, + "slid_loss": 0.9745, + "step": 1117, + "time": 10.75 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 0.9451, + "slid_loss": 0.9759, + "step": 1118, + "time": 12.82 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.0194, + "slid_loss": 0.976, + "step": 1119, + "time": 12.11 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 0.7923, + "slid_loss": 0.9733, + "step": 1120, + "time": 14.22 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 0.7967, + "slid_loss": 0.972, + "step": 1121, + "time": 12.83 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 1.013, + "slid_loss": 0.973, + "step": 1122, + "time": 10.05 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 1.073, + "slid_loss": 0.9739, + "step": 1123, + "time": 13.4 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 0.8645, + "slid_loss": 0.9722, + "step": 1124, + "time": 11.66 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 0.8648, + "slid_loss": 0.9711, + "step": 1125, + "time": 13.19 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 1.0243, + "slid_loss": 0.9713, + "step": 1126, + "time": 13.81 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 0.7316, + "slid_loss": 0.9686, + "step": 1127, + "time": 14.16 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 1.0035, + "slid_loss": 0.9711, + "step": 1128, + "time": 12.94 + }, + { + "epoch": 0.57, + "learning_rate": "1.9995e-04", + "loss": 0.8882, + "slid_loss": 0.9713, + "step": 1129, + "time": 13.94 + }, + { + "epoch": 0.57, + "learning_rate": "1.9995e-04", + "loss": 0.8494, + "slid_loss": 0.9704, + "step": 1130, + "time": 12.29 + }, + { + "epoch": 0.57, + "learning_rate": "1.9995e-04", + "loss": 1.1736, + "slid_loss": 0.9734, + "step": 1131, + "time": 12.9 + }, + { + "epoch": 0.57, + "learning_rate": "1.9995e-04", + "loss": 0.8194, + "slid_loss": 0.9745, + "step": 1132, + "time": 11.31 + }, + { + "epoch": 0.57, + "learning_rate": "1.9995e-04", + "loss": 0.9923, + "slid_loss": 0.9746, + "step": 1133, + "time": 13.48 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.7299, + "slid_loss": 0.9714, + "step": 1134, + "time": 12.06 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.9284, + "slid_loss": 0.9717, + "step": 1135, + "time": 12.72 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.9441, + "slid_loss": 0.9727, + "step": 1136, + "time": 11.46 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 1.0213, + "slid_loss": 0.9729, + "step": 1137, + "time": 13.13 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 1.1123, + "slid_loss": 0.9751, + "step": 1138, + "time": 11.68 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.999, + "slid_loss": 0.9746, + "step": 1139, + "time": 10.88 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.9549, + "slid_loss": 0.974, + "step": 1140, + "time": 13.35 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 1.0059, + "slid_loss": 0.9743, + "step": 1141, + "time": 12.88 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 1.1227, + "slid_loss": 0.9767, + "step": 1142, + "time": 12.2 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.9333, + "slid_loss": 0.9743, + "step": 1143, + "time": 12.9 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.7758, + "slid_loss": 0.9714, + "step": 1144, + "time": 12.23 + }, + { + "epoch": 0.57, + "learning_rate": "1.9993e-04", + "loss": 0.7443, + "slid_loss": 0.9704, + "step": 1145, + "time": 13.65 + }, + { + "epoch": 0.57, + "learning_rate": "1.9993e-04", + "loss": 1.1396, + "slid_loss": 0.9726, + "step": 1146, + "time": 12.99 + }, + { + "epoch": 0.57, + "learning_rate": "1.9993e-04", + "loss": 0.8083, + "slid_loss": 0.9729, + "step": 1147, + "time": 10.33 + }, + { + "epoch": 0.57, + "learning_rate": "1.9993e-04", + "loss": 1.0093, + "slid_loss": 0.9733, + "step": 1148, + "time": 13.65 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 0.8935, + "slid_loss": 0.9715, + "step": 1149, + "time": 13.07 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 1.0075, + "slid_loss": 0.9729, + "step": 1150, + "time": 12.86 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 1.0646, + "slid_loss": 0.9754, + "step": 1151, + "time": 13.34 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 1.0635, + "slid_loss": 0.9758, + "step": 1152, + "time": 14.46 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 1.1199, + "slid_loss": 0.978, + "step": 1153, + "time": 13.2 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 0.945, + "slid_loss": 0.9776, + "step": 1154, + "time": 12.42 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 1.0097, + "slid_loss": 0.9771, + "step": 1155, + "time": 13.84 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 0.8537, + "slid_loss": 0.9751, + "step": 1156, + "time": 12.04 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 0.9424, + "slid_loss": 0.9744, + "step": 1157, + "time": 12.87 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 0.9964, + "slid_loss": 0.9756, + "step": 1158, + "time": 14.14 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 1.0013, + "slid_loss": 0.9761, + "step": 1159, + "time": 12.85 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 0.9542, + "slid_loss": 0.9769, + "step": 1160, + "time": 12.99 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 0.9438, + "slid_loss": 0.9773, + "step": 1161, + "time": 12.65 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 0.813, + "slid_loss": 0.9765, + "step": 1162, + "time": 10.68 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 0.8812, + "slid_loss": 0.9772, + "step": 1163, + "time": 12.91 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 0.8673, + "slid_loss": 0.9755, + "step": 1164, + "time": 13.38 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 0.8411, + "slid_loss": 0.9751, + "step": 1165, + "time": 10.0 + }, + { + "epoch": 0.58, + "learning_rate": "1.9991e-04", + "loss": 0.8775, + "slid_loss": 0.9734, + "step": 1166, + "time": 11.35 + }, + { + "epoch": 0.58, + "learning_rate": "1.9991e-04", + "loss": 0.8007, + "slid_loss": 0.9692, + "step": 1167, + "time": 13.14 + }, + { + "epoch": 0.58, + "learning_rate": "1.9991e-04", + "loss": 1.0175, + "slid_loss": 0.9696, + "step": 1168, + "time": 13.27 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 0.9264, + "slid_loss": 0.9688, + "step": 1169, + "time": 10.89 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 0.9765, + "slid_loss": 0.97, + "step": 1170, + "time": 13.13 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 0.8598, + "slid_loss": 0.9675, + "step": 1171, + "time": 11.8 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 1.0353, + "slid_loss": 0.9681, + "step": 1172, + "time": 13.17 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 0.8933, + "slid_loss": 0.9676, + "step": 1173, + "time": 12.19 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 1.0616, + "slid_loss": 0.9697, + "step": 1174, + "time": 13.38 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 1.0524, + "slid_loss": 0.9691, + "step": 1175, + "time": 13.32 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.7321, + "slid_loss": 0.964, + "step": 1176, + "time": 11.41 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.9152, + "slid_loss": 0.9628, + "step": 1177, + "time": 13.3 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.862, + "slid_loss": 0.9622, + "step": 1178, + "time": 13.13 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 1.0012, + "slid_loss": 0.9624, + "step": 1179, + "time": 13.38 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.8478, + "slid_loss": 0.9594, + "step": 1180, + "time": 11.31 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.977, + "slid_loss": 0.9611, + "step": 1181, + "time": 13.9 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.7662, + "slid_loss": 0.9586, + "step": 1182, + "time": 10.9 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.9885, + "slid_loss": 0.9577, + "step": 1183, + "time": 12.74 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.9405, + "slid_loss": 0.9571, + "step": 1184, + "time": 13.86 + }, + { + "epoch": 0.59, + "learning_rate": "1.9989e-04", + "loss": 0.9196, + "slid_loss": 0.9562, + "step": 1185, + "time": 11.98 + }, + { + "epoch": 0.59, + "learning_rate": "1.9989e-04", + "loss": 1.0578, + "slid_loss": 0.9563, + "step": 1186, + "time": 13.58 + }, + { + "epoch": 0.59, + "learning_rate": "1.9989e-04", + "loss": 0.7685, + "slid_loss": 0.9537, + "step": 1187, + "time": 12.09 + }, + { + "epoch": 0.59, + "learning_rate": "1.9989e-04", + "loss": 0.8594, + "slid_loss": 0.9513, + "step": 1188, + "time": 13.83 + }, + { + "epoch": 0.6, + "learning_rate": "1.9989e-04", + "loss": 0.8434, + "slid_loss": 0.9507, + "step": 1189, + "time": 12.81 + }, + { + "epoch": 0.6, + "learning_rate": "1.9989e-04", + "loss": 0.977, + "slid_loss": 0.9542, + "step": 1190, + "time": 11.29 + }, + { + "epoch": 0.6, + "learning_rate": "1.9989e-04", + "loss": 1.0209, + "slid_loss": 0.957, + "step": 1191, + "time": 13.49 + }, + { + "epoch": 0.6, + "learning_rate": "1.9989e-04", + "loss": 0.9963, + "slid_loss": 0.9577, + "step": 1192, + "time": 12.32 + }, + { + "epoch": 0.6, + "learning_rate": "1.9989e-04", + "loss": 0.8296, + "slid_loss": 0.9553, + "step": 1193, + "time": 11.86 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 1.0271, + "slid_loss": 0.9544, + "step": 1194, + "time": 13.27 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 0.8838, + "slid_loss": 0.9525, + "step": 1195, + "time": 11.11 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 1.0609, + "slid_loss": 0.9544, + "step": 1196, + "time": 12.05 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 0.9084, + "slid_loss": 0.9523, + "step": 1197, + "time": 10.6 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 0.9389, + "slid_loss": 0.9529, + "step": 1198, + "time": 13.54 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 0.8435, + "slid_loss": 0.9525, + "step": 1199, + "time": 10.98 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 0.8152, + "slid_loss": 0.9505, + "step": 1200, + "time": 11.2 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 0.8646, + "slid_loss": 0.9501, + "step": 1201, + "time": 13.39 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 1.1444, + "slid_loss": 0.9527, + "step": 1202, + "time": 13.95 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 0.9745, + "slid_loss": 0.951, + "step": 1203, + "time": 12.73 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 1.1122, + "slid_loss": 0.9508, + "step": 1204, + "time": 11.5 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 0.7688, + "slid_loss": 0.9466, + "step": 1205, + "time": 12.89 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 0.9128, + "slid_loss": 0.9471, + "step": 1206, + "time": 11.29 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 0.764, + "slid_loss": 0.9442, + "step": 1207, + "time": 12.82 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 0.9962, + "slid_loss": 0.9412, + "step": 1208, + "time": 13.95 + }, + { + "epoch": 0.61, + "learning_rate": "1.9987e-04", + "loss": 0.9803, + "slid_loss": 0.9408, + "step": 1209, + "time": 11.48 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 0.8016, + "slid_loss": 0.9384, + "step": 1210, + "time": 13.51 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 1.0545, + "slid_loss": 0.9378, + "step": 1211, + "time": 12.82 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 1.0358, + "slid_loss": 0.937, + "step": 1212, + "time": 10.96 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 1.0123, + "slid_loss": 0.9372, + "step": 1213, + "time": 13.31 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 0.952, + "slid_loss": 0.9363, + "step": 1214, + "time": 12.2 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 0.9256, + "slid_loss": 0.9374, + "step": 1215, + "time": 11.34 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 0.9751, + "slid_loss": 0.938, + "step": 1216, + "time": 11.53 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 0.8044, + "slid_loss": 0.9364, + "step": 1217, + "time": 11.6 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 0.9651, + "slid_loss": 0.9366, + "step": 1218, + "time": 13.2 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 1.0428, + "slid_loss": 0.9368, + "step": 1219, + "time": 13.56 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 1.1252, + "slid_loss": 0.9402, + "step": 1220, + "time": 11.3 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 0.9657, + "slid_loss": 0.9418, + "step": 1221, + "time": 14.33 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 0.8169, + "slid_loss": 0.9399, + "step": 1222, + "time": 11.68 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 0.9683, + "slid_loss": 0.9388, + "step": 1223, + "time": 11.28 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 1.0183, + "slid_loss": 0.9404, + "step": 1224, + "time": 13.54 + }, + { + "epoch": 0.61, + "learning_rate": "1.9984e-04", + "loss": 0.8069, + "slid_loss": 0.9398, + "step": 1225, + "time": 12.78 + }, + { + "epoch": 0.61, + "learning_rate": "1.9984e-04", + "loss": 1.05, + "slid_loss": 0.9401, + "step": 1226, + "time": 12.65 + }, + { + "epoch": 0.61, + "learning_rate": "1.9984e-04", + "loss": 1.1721, + "slid_loss": 0.9445, + "step": 1227, + "time": 13.68 + }, + { + "epoch": 0.61, + "learning_rate": "1.9984e-04", + "loss": 0.9998, + "slid_loss": 0.9444, + "step": 1228, + "time": 13.42 + }, + { + "epoch": 0.62, + "learning_rate": "1.9984e-04", + "loss": 0.9064, + "slid_loss": 0.9446, + "step": 1229, + "time": 11.59 + }, + { + "epoch": 0.62, + "learning_rate": "1.9984e-04", + "loss": 1.1523, + "slid_loss": 0.9476, + "step": 1230, + "time": 13.33 + }, + { + "epoch": 0.62, + "learning_rate": "1.9984e-04", + "loss": 0.9593, + "slid_loss": 0.9455, + "step": 1231, + "time": 13.37 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 0.7872, + "slid_loss": 0.9452, + "step": 1232, + "time": 10.76 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 0.903, + "slid_loss": 0.9443, + "step": 1233, + "time": 11.32 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 0.64, + "slid_loss": 0.9434, + "step": 1234, + "time": 13.75 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 0.8992, + "slid_loss": 0.9431, + "step": 1235, + "time": 12.14 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 0.8761, + "slid_loss": 0.9424, + "step": 1236, + "time": 11.98 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 1.0156, + "slid_loss": 0.9423, + "step": 1237, + "time": 14.88 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 0.979, + "slid_loss": 0.941, + "step": 1238, + "time": 13.46 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 0.9807, + "slid_loss": 0.9408, + "step": 1239, + "time": 10.88 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 1.033, + "slid_loss": 0.9416, + "step": 1240, + "time": 11.89 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 1.0751, + "slid_loss": 0.9423, + "step": 1241, + "time": 13.22 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 1.1071, + "slid_loss": 0.9421, + "step": 1242, + "time": 11.4 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 1.1819, + "slid_loss": 0.9446, + "step": 1243, + "time": 12.78 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 0.8678, + "slid_loss": 0.9456, + "step": 1244, + "time": 12.94 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 0.8866, + "slid_loss": 0.947, + "step": 1245, + "time": 13.65 + }, + { + "epoch": 0.62, + "learning_rate": "1.9981e-04", + "loss": 0.9169, + "slid_loss": 0.9447, + "step": 1246, + "time": 13.25 + }, + { + "epoch": 0.62, + "learning_rate": "1.9981e-04", + "loss": 0.8993, + "slid_loss": 0.9457, + "step": 1247, + "time": 11.06 + }, + { + "epoch": 0.62, + "learning_rate": "1.9981e-04", + "loss": 0.6256, + "slid_loss": 0.9418, + "step": 1248, + "time": 10.56 + }, + { + "epoch": 0.63, + "learning_rate": "1.9981e-04", + "loss": 0.77, + "slid_loss": 0.9406, + "step": 1249, + "time": 13.94 + }, + { + "epoch": 0.63, + "learning_rate": "1.9981e-04", + "loss": 0.9258, + "slid_loss": 0.9398, + "step": 1250, + "time": 12.79 + }, + { + "epoch": 0.63, + "learning_rate": "1.9981e-04", + "loss": 0.8971, + "slid_loss": 0.9381, + "step": 1251, + "time": 13.8 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 0.9345, + "slid_loss": 0.9368, + "step": 1252, + "time": 11.38 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 0.7929, + "slid_loss": 0.9335, + "step": 1253, + "time": 13.77 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 1.0051, + "slid_loss": 0.9341, + "step": 1254, + "time": 13.31 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 1.0786, + "slid_loss": 0.9348, + "step": 1255, + "time": 13.85 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 1.0291, + "slid_loss": 0.9366, + "step": 1256, + "time": 10.6 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 1.0701, + "slid_loss": 0.9379, + "step": 1257, + "time": 13.49 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 0.9637, + "slid_loss": 0.9375, + "step": 1258, + "time": 11.74 + }, + { + "epoch": 0.63, + "learning_rate": "1.9979e-04", + "loss": 1.1205, + "slid_loss": 0.9387, + "step": 1259, + "time": 13.17 + }, + { + "epoch": 0.63, + "learning_rate": "1.9979e-04", + "loss": 1.089, + "slid_loss": 0.9401, + "step": 1260, + "time": 13.57 + }, + { + "epoch": 0.63, + "learning_rate": "1.9979e-04", + "loss": 0.8548, + "slid_loss": 0.9392, + "step": 1261, + "time": 13.48 + }, + { + "epoch": 0.63, + "learning_rate": "1.9979e-04", + "loss": 0.8937, + "slid_loss": 0.94, + "step": 1262, + "time": 14.01 + }, + { + "epoch": 0.63, + "learning_rate": "1.9979e-04", + "loss": 0.7767, + "slid_loss": 0.9389, + "step": 1263, + "time": 11.33 + }, + { + "epoch": 0.63, + "learning_rate": "1.9979e-04", + "loss": 1.0084, + "slid_loss": 0.9404, + "step": 1264, + "time": 13.28 + }, + { + "epoch": 0.63, + "learning_rate": "1.9978e-04", + "loss": 0.8706, + "slid_loss": 0.9406, + "step": 1265, + "time": 12.85 + }, + { + "epoch": 0.63, + "learning_rate": "1.9978e-04", + "loss": 0.7606, + "slid_loss": 0.9395, + "step": 1266, + "time": 13.58 + }, + { + "epoch": 0.63, + "learning_rate": "1.9978e-04", + "loss": 0.9295, + "slid_loss": 0.9408, + "step": 1267, + "time": 11.2 + }, + { + "epoch": 0.63, + "learning_rate": "1.9978e-04", + "loss": 0.8271, + "slid_loss": 0.9389, + "step": 1268, + "time": 11.4 + }, + { + "epoch": 0.64, + "learning_rate": "1.9978e-04", + "loss": 0.9064, + "slid_loss": 0.9387, + "step": 1269, + "time": 11.39 + }, + { + "epoch": 0.64, + "learning_rate": "1.9978e-04", + "loss": 0.9694, + "slid_loss": 0.9386, + "step": 1270, + "time": 11.16 + }, + { + "epoch": 0.64, + "learning_rate": "1.9977e-04", + "loss": 1.1109, + "slid_loss": 0.9411, + "step": 1271, + "time": 10.48 + }, + { + "epoch": 0.64, + "learning_rate": "1.9977e-04", + "loss": 1.0475, + "slid_loss": 0.9412, + "step": 1272, + "time": 13.76 + }, + { + "epoch": 0.64, + "learning_rate": "1.9977e-04", + "loss": 1.0954, + "slid_loss": 0.9432, + "step": 1273, + "time": 10.86 + }, + { + "epoch": 0.64, + "learning_rate": "1.9977e-04", + "loss": 1.0527, + "slid_loss": 0.9432, + "step": 1274, + "time": 11.75 + }, + { + "epoch": 0.64, + "learning_rate": "1.9977e-04", + "loss": 1.0806, + "slid_loss": 0.9434, + "step": 1275, + "time": 12.91 + }, + { + "epoch": 0.64, + "learning_rate": "1.9977e-04", + "loss": 0.9079, + "slid_loss": 0.9452, + "step": 1276, + "time": 13.61 + }, + { + "epoch": 0.64, + "learning_rate": "1.9976e-04", + "loss": 0.6851, + "slid_loss": 0.9429, + "step": 1277, + "time": 10.44 + }, + { + "epoch": 0.64, + "learning_rate": "1.9976e-04", + "loss": 0.9501, + "slid_loss": 0.9438, + "step": 1278, + "time": 12.87 + }, + { + "epoch": 0.64, + "learning_rate": "1.9976e-04", + "loss": 1.0614, + "slid_loss": 0.9444, + "step": 1279, + "time": 11.29 + }, + { + "epoch": 0.64, + "learning_rate": "1.9976e-04", + "loss": 0.7871, + "slid_loss": 0.9438, + "step": 1280, + "time": 14.02 + }, + { + "epoch": 0.64, + "learning_rate": "1.9976e-04", + "loss": 0.8958, + "slid_loss": 0.943, + "step": 1281, + "time": 12.19 + }, + { + "epoch": 0.64, + "learning_rate": "1.9976e-04", + "loss": 0.938, + "slid_loss": 0.9447, + "step": 1282, + "time": 13.62 + }, + { + "epoch": 0.64, + "learning_rate": "1.9975e-04", + "loss": 0.9574, + "slid_loss": 0.9444, + "step": 1283, + "time": 13.68 + }, + { + "epoch": 0.64, + "learning_rate": "1.9975e-04", + "loss": 0.9099, + "slid_loss": 0.9441, + "step": 1284, + "time": 12.44 + }, + { + "epoch": 0.64, + "learning_rate": "1.9975e-04", + "loss": 1.06, + "slid_loss": 0.9455, + "step": 1285, + "time": 12.15 + }, + { + "epoch": 0.64, + "learning_rate": "1.9975e-04", + "loss": 1.0401, + "slid_loss": 0.9453, + "step": 1286, + "time": 12.89 + }, + { + "epoch": 0.64, + "learning_rate": "1.9975e-04", + "loss": 0.8875, + "slid_loss": 0.9465, + "step": 1287, + "time": 12.26 + }, + { + "epoch": 0.64, + "learning_rate": "1.9975e-04", + "loss": 0.9817, + "slid_loss": 0.9477, + "step": 1288, + "time": 10.82 + }, + { + "epoch": 0.65, + "learning_rate": "1.9974e-04", + "loss": 0.7788, + "slid_loss": 0.9471, + "step": 1289, + "time": 14.0 + }, + { + "epoch": 0.65, + "learning_rate": "1.9974e-04", + "loss": 0.9167, + "slid_loss": 0.9465, + "step": 1290, + "time": 10.72 + }, + { + "epoch": 0.65, + "learning_rate": "1.9974e-04", + "loss": 0.9565, + "slid_loss": 0.9458, + "step": 1291, + "time": 12.85 + }, + { + "epoch": 0.65, + "learning_rate": "1.9974e-04", + "loss": 1.001, + "slid_loss": 0.9459, + "step": 1292, + "time": 13.33 + }, + { + "epoch": 0.65, + "learning_rate": "1.9974e-04", + "loss": 0.789, + "slid_loss": 0.9455, + "step": 1293, + "time": 11.22 + }, + { + "epoch": 0.65, + "learning_rate": "1.9973e-04", + "loss": 0.8365, + "slid_loss": 0.9435, + "step": 1294, + "time": 11.14 + }, + { + "epoch": 0.65, + "learning_rate": "1.9973e-04", + "loss": 1.0339, + "slid_loss": 0.945, + "step": 1295, + "time": 13.28 + }, + { + "epoch": 0.65, + "learning_rate": "1.9973e-04", + "loss": 1.0724, + "slid_loss": 0.9452, + "step": 1296, + "time": 13.16 + }, + { + "epoch": 0.65, + "learning_rate": "1.9973e-04", + "loss": 1.0632, + "slid_loss": 0.9467, + "step": 1297, + "time": 14.19 + }, + { + "epoch": 0.65, + "learning_rate": "1.9973e-04", + "loss": 0.7551, + "slid_loss": 0.9449, + "step": 1298, + "time": 11.43 + }, + { + "epoch": 0.65, + "learning_rate": "1.9973e-04", + "loss": 0.9419, + "slid_loss": 0.9459, + "step": 1299, + "time": 12.18 + }, + { + "epoch": 0.65, + "learning_rate": "1.9972e-04", + "loss": 0.8521, + "slid_loss": 0.9462, + "step": 1300, + "time": 13.71 + }, + { + "epoch": 0.65, + "learning_rate": "1.9972e-04", + "loss": 0.9158, + "slid_loss": 0.9467, + "step": 1301, + "time": 10.81 + }, + { + "epoch": 0.65, + "learning_rate": "1.9972e-04", + "loss": 0.8836, + "slid_loss": 0.9441, + "step": 1302, + "time": 13.13 + }, + { + "epoch": 0.65, + "learning_rate": "1.9972e-04", + "loss": 0.8724, + "slid_loss": 0.9431, + "step": 1303, + "time": 13.28 + }, + { + "epoch": 0.65, + "learning_rate": "1.9972e-04", + "loss": 0.8201, + "slid_loss": 0.9402, + "step": 1304, + "time": 10.66 + }, + { + "epoch": 0.65, + "learning_rate": "1.9971e-04", + "loss": 0.9152, + "slid_loss": 0.9417, + "step": 1305, + "time": 11.49 + }, + { + "epoch": 0.65, + "learning_rate": "1.9971e-04", + "loss": 0.7945, + "slid_loss": 0.9405, + "step": 1306, + "time": 13.3 + }, + { + "epoch": 0.65, + "learning_rate": "1.9971e-04", + "loss": 1.0571, + "slid_loss": 0.9434, + "step": 1307, + "time": 10.93 + }, + { + "epoch": 0.65, + "learning_rate": "1.9971e-04", + "loss": 1.0161, + "slid_loss": 0.9436, + "step": 1308, + "time": 11.33 + }, + { + "epoch": 0.66, + "learning_rate": "1.9971e-04", + "loss": 0.7809, + "slid_loss": 0.9416, + "step": 1309, + "time": 13.04 + }, + { + "epoch": 0.66, + "learning_rate": "1.9970e-04", + "loss": 1.0217, + "slid_loss": 0.9438, + "step": 1310, + "time": 12.81 + }, + { + "epoch": 0.66, + "learning_rate": "1.9970e-04", + "loss": 0.9089, + "slid_loss": 0.9423, + "step": 1311, + "time": 10.74 + }, + { + "epoch": 0.66, + "learning_rate": "1.9970e-04", + "loss": 1.0653, + "slid_loss": 0.9426, + "step": 1312, + "time": 11.21 + }, + { + "epoch": 0.66, + "learning_rate": "1.9970e-04", + "loss": 0.9121, + "slid_loss": 0.9416, + "step": 1313, + "time": 11.2 + }, + { + "epoch": 0.66, + "learning_rate": "1.9970e-04", + "loss": 1.1991, + "slid_loss": 0.9441, + "step": 1314, + "time": 14.69 + }, + { + "epoch": 0.66, + "learning_rate": "1.9970e-04", + "loss": 0.9589, + "slid_loss": 0.9444, + "step": 1315, + "time": 12.77 + }, + { + "epoch": 0.66, + "learning_rate": "1.9969e-04", + "loss": 0.7707, + "slid_loss": 0.9424, + "step": 1316, + "time": 10.53 + }, + { + "epoch": 0.66, + "learning_rate": "1.9969e-04", + "loss": 0.9035, + "slid_loss": 0.9434, + "step": 1317, + "time": 13.9 + }, + { + "epoch": 0.66, + "learning_rate": "1.9969e-04", + "loss": 0.9275, + "slid_loss": 0.943, + "step": 1318, + "time": 11.51 + }, + { + "epoch": 0.66, + "learning_rate": "1.9969e-04", + "loss": 0.7916, + "slid_loss": 0.9405, + "step": 1319, + "time": 11.17 + }, + { + "epoch": 0.66, + "learning_rate": "1.9969e-04", + "loss": 0.9791, + "slid_loss": 0.939, + "step": 1320, + "time": 13.92 + }, + { + "epoch": 0.66, + "learning_rate": "1.9968e-04", + "loss": 0.9902, + "slid_loss": 0.9393, + "step": 1321, + "time": 13.49 + }, + { + "epoch": 0.66, + "learning_rate": "1.9968e-04", + "loss": 0.9574, + "slid_loss": 0.9407, + "step": 1322, + "time": 11.59 + }, + { + "epoch": 0.66, + "learning_rate": "1.9968e-04", + "loss": 0.8267, + "slid_loss": 0.9393, + "step": 1323, + "time": 12.84 + }, + { + "epoch": 0.66, + "learning_rate": "1.9968e-04", + "loss": 1.1293, + "slid_loss": 0.9404, + "step": 1324, + "time": 11.8 + }, + { + "epoch": 0.66, + "learning_rate": "1.9968e-04", + "loss": 0.9777, + "slid_loss": 0.9421, + "step": 1325, + "time": 12.36 + }, + { + "epoch": 0.66, + "learning_rate": "1.9967e-04", + "loss": 0.8237, + "slid_loss": 0.9398, + "step": 1326, + "time": 13.04 + }, + { + "epoch": 0.66, + "learning_rate": "1.9967e-04", + "loss": 0.9997, + "slid_loss": 0.9381, + "step": 1327, + "time": 11.35 + }, + { + "epoch": 0.66, + "learning_rate": "1.9967e-04", + "loss": 0.8081, + "slid_loss": 0.9362, + "step": 1328, + "time": 11.75 + }, + { + "epoch": 0.67, + "learning_rate": "1.9967e-04", + "loss": 0.7672, + "slid_loss": 0.9348, + "step": 1329, + "time": 11.11 + }, + { + "epoch": 0.67, + "learning_rate": "1.9967e-04", + "loss": 0.9852, + "slid_loss": 0.9331, + "step": 1330, + "time": 13.44 + }, + { + "epoch": 0.67, + "learning_rate": "1.9966e-04", + "loss": 1.0138, + "slid_loss": 0.9337, + "step": 1331, + "time": 13.66 + }, + { + "epoch": 0.67, + "learning_rate": "1.9966e-04", + "loss": 0.9457, + "slid_loss": 0.9353, + "step": 1332, + "time": 10.64 + }, + { + "epoch": 0.67, + "learning_rate": "1.9966e-04", + "loss": 0.9785, + "slid_loss": 0.936, + "step": 1333, + "time": 13.86 + }, + { + "epoch": 0.67, + "learning_rate": "1.9966e-04", + "loss": 0.953, + "slid_loss": 0.9391, + "step": 1334, + "time": 13.34 + }, + { + "epoch": 0.67, + "learning_rate": "1.9966e-04", + "loss": 0.9272, + "slid_loss": 0.9394, + "step": 1335, + "time": 13.89 + }, + { + "epoch": 0.67, + "learning_rate": "1.9965e-04", + "loss": 1.0109, + "slid_loss": 0.9408, + "step": 1336, + "time": 13.75 + }, + { + "epoch": 0.67, + "learning_rate": "1.9965e-04", + "loss": 1.1442, + "slid_loss": 0.9421, + "step": 1337, + "time": 13.97 + }, + { + "epoch": 0.67, + "learning_rate": "1.9965e-04", + "loss": 0.922, + "slid_loss": 0.9415, + "step": 1338, + "time": 13.4 + }, + { + "epoch": 0.67, + "learning_rate": "1.9965e-04", + "loss": 0.9563, + "slid_loss": 0.9412, + "step": 1339, + "time": 11.28 + }, + { + "epoch": 0.67, + "learning_rate": "1.9965e-04", + "loss": 0.9989, + "slid_loss": 0.9409, + "step": 1340, + "time": 11.95 + }, + { + "epoch": 0.67, + "learning_rate": "1.9964e-04", + "loss": 0.8735, + "slid_loss": 0.9389, + "step": 1341, + "time": 13.0 + }, + { + "epoch": 0.67, + "learning_rate": "1.9964e-04", + "loss": 1.0665, + "slid_loss": 0.9385, + "step": 1342, + "time": 13.32 + }, + { + "epoch": 0.67, + "learning_rate": "1.9964e-04", + "loss": 1.0326, + "slid_loss": 0.937, + "step": 1343, + "time": 10.96 + }, + { + "epoch": 0.67, + "learning_rate": "1.9964e-04", + "loss": 0.7872, + "slid_loss": 0.9362, + "step": 1344, + "time": 11.55 + }, + { + "epoch": 0.67, + "learning_rate": "1.9963e-04", + "loss": 0.9291, + "slid_loss": 0.9366, + "step": 1345, + "time": 13.37 + }, + { + "epoch": 0.67, + "learning_rate": "1.9963e-04", + "loss": 0.9814, + "slid_loss": 0.9373, + "step": 1346, + "time": 10.7 + }, + { + "epoch": 0.67, + "learning_rate": "1.9963e-04", + "loss": 1.1193, + "slid_loss": 0.9395, + "step": 1347, + "time": 13.77 + }, + { + "epoch": 0.68, + "learning_rate": "1.9963e-04", + "loss": 0.9178, + "slid_loss": 0.9424, + "step": 1348, + "time": 14.37 + }, + { + "epoch": 0.68, + "learning_rate": "1.9963e-04", + "loss": 1.0811, + "slid_loss": 0.9455, + "step": 1349, + "time": 11.32 + }, + { + "epoch": 0.68, + "learning_rate": "1.9962e-04", + "loss": 1.0889, + "slid_loss": 0.9471, + "step": 1350, + "time": 13.16 + }, + { + "epoch": 0.68, + "learning_rate": "1.9962e-04", + "loss": 0.99, + "slid_loss": 0.948, + "step": 1351, + "time": 14.31 + }, + { + "epoch": 0.68, + "learning_rate": "1.9962e-04", + "loss": 1.1007, + "slid_loss": 0.9497, + "step": 1352, + "time": 11.3 + }, + { + "epoch": 0.68, + "learning_rate": "1.9962e-04", + "loss": 1.0021, + "slid_loss": 0.9518, + "step": 1353, + "time": 13.92 + }, + { + "epoch": 0.68, + "learning_rate": "1.9962e-04", + "loss": 0.974, + "slid_loss": 0.9515, + "step": 1354, + "time": 11.11 + }, + { + "epoch": 0.68, + "learning_rate": "1.9961e-04", + "loss": 0.8903, + "slid_loss": 0.9496, + "step": 1355, + "time": 10.7 + }, + { + "epoch": 0.68, + "learning_rate": "1.9961e-04", + "loss": 0.9934, + "slid_loss": 0.9492, + "step": 1356, + "time": 11.6 + }, + { + "epoch": 0.68, + "learning_rate": "1.9961e-04", + "loss": 0.9353, + "slid_loss": 0.9479, + "step": 1357, + "time": 10.3 + }, + { + "epoch": 0.68, + "learning_rate": "1.9961e-04", + "loss": 0.8963, + "slid_loss": 0.9472, + "step": 1358, + "time": 11.25 + }, + { + "epoch": 0.68, + "learning_rate": "1.9960e-04", + "loss": 0.8839, + "slid_loss": 0.9449, + "step": 1359, + "time": 12.3 + }, + { + "epoch": 0.68, + "learning_rate": "1.9960e-04", + "loss": 0.8527, + "slid_loss": 0.9425, + "step": 1360, + "time": 10.83 + }, + { + "epoch": 0.68, + "learning_rate": "1.9960e-04", + "loss": 0.7124, + "slid_loss": 0.9411, + "step": 1361, + "time": 13.57 + }, + { + "epoch": 0.68, + "learning_rate": "1.9960e-04", + "loss": 0.9048, + "slid_loss": 0.9412, + "step": 1362, + "time": 12.78 + }, + { + "epoch": 0.68, + "learning_rate": "1.9960e-04", + "loss": 0.9514, + "slid_loss": 0.9429, + "step": 1363, + "time": 11.55 + }, + { + "epoch": 0.68, + "learning_rate": "1.9959e-04", + "loss": 0.9298, + "slid_loss": 0.9421, + "step": 1364, + "time": 13.22 + }, + { + "epoch": 0.68, + "learning_rate": "1.9959e-04", + "loss": 0.9104, + "slid_loss": 0.9425, + "step": 1365, + "time": 13.93 + }, + { + "epoch": 0.68, + "learning_rate": "1.9959e-04", + "loss": 0.8326, + "slid_loss": 0.9433, + "step": 1366, + "time": 14.37 + }, + { + "epoch": 0.68, + "learning_rate": "1.9959e-04", + "loss": 0.9284, + "slid_loss": 0.9433, + "step": 1367, + "time": 13.48 + }, + { + "epoch": 0.69, + "learning_rate": "1.9958e-04", + "loss": 1.0038, + "slid_loss": 0.945, + "step": 1368, + "time": 11.99 + }, + { + "epoch": 0.69, + "learning_rate": "1.9958e-04", + "loss": 0.6651, + "slid_loss": 0.9426, + "step": 1369, + "time": 11.72 + }, + { + "epoch": 0.69, + "learning_rate": "1.9958e-04", + "loss": 0.9965, + "slid_loss": 0.9429, + "step": 1370, + "time": 12.76 + }, + { + "epoch": 0.69, + "learning_rate": "1.9958e-04", + "loss": 0.9013, + "slid_loss": 0.9408, + "step": 1371, + "time": 11.54 + }, + { + "epoch": 0.69, + "learning_rate": "1.9958e-04", + "loss": 0.8169, + "slid_loss": 0.9385, + "step": 1372, + "time": 13.51 + }, + { + "epoch": 0.69, + "learning_rate": "1.9957e-04", + "loss": 1.0554, + "slid_loss": 0.9381, + "step": 1373, + "time": 13.64 + }, + { + "epoch": 0.69, + "learning_rate": "1.9957e-04", + "loss": 0.9444, + "slid_loss": 0.937, + "step": 1374, + "time": 12.81 + }, + { + "epoch": 0.69, + "learning_rate": "1.9957e-04", + "loss": 0.9829, + "slid_loss": 0.936, + "step": 1375, + "time": 12.55 + }, + { + "epoch": 0.69, + "learning_rate": "1.9957e-04", + "loss": 0.8035, + "slid_loss": 0.935, + "step": 1376, + "time": 13.85 + }, + { + "epoch": 0.69, + "learning_rate": "1.9956e-04", + "loss": 0.9302, + "slid_loss": 0.9374, + "step": 1377, + "time": 14.09 + }, + { + "epoch": 0.69, + "learning_rate": "1.9956e-04", + "loss": 0.84, + "slid_loss": 0.9363, + "step": 1378, + "time": 13.66 + }, + { + "epoch": 0.69, + "learning_rate": "1.9956e-04", + "loss": 0.735, + "slid_loss": 0.9331, + "step": 1379, + "time": 12.17 + }, + { + "epoch": 0.69, + "learning_rate": "1.9956e-04", + "loss": 0.9769, + "slid_loss": 0.935, + "step": 1380, + "time": 11.7 + }, + { + "epoch": 0.69, + "learning_rate": "1.9955e-04", + "loss": 0.8984, + "slid_loss": 0.935, + "step": 1381, + "time": 13.93 + }, + { + "epoch": 0.69, + "learning_rate": "1.9955e-04", + "loss": 0.959, + "slid_loss": 0.9352, + "step": 1382, + "time": 13.45 + }, + { + "epoch": 0.69, + "learning_rate": "1.9955e-04", + "loss": 1.0353, + "slid_loss": 0.936, + "step": 1383, + "time": 11.6 + }, + { + "epoch": 0.69, + "learning_rate": "1.9955e-04", + "loss": 0.942, + "slid_loss": 0.9363, + "step": 1384, + "time": 10.57 + }, + { + "epoch": 0.69, + "learning_rate": "1.9955e-04", + "loss": 1.0026, + "slid_loss": 0.9357, + "step": 1385, + "time": 12.81 + }, + { + "epoch": 0.69, + "learning_rate": "1.9954e-04", + "loss": 1.0907, + "slid_loss": 0.9362, + "step": 1386, + "time": 13.53 + }, + { + "epoch": 0.69, + "learning_rate": "1.9954e-04", + "loss": 0.7741, + "slid_loss": 0.9351, + "step": 1387, + "time": 12.23 + }, + { + "epoch": 0.7, + "learning_rate": "1.9954e-04", + "loss": 0.9571, + "slid_loss": 0.9348, + "step": 1388, + "time": 13.37 + }, + { + "epoch": 0.7, + "learning_rate": "1.9954e-04", + "loss": 0.7247, + "slid_loss": 0.9343, + "step": 1389, + "time": 11.21 + }, + { + "epoch": 0.7, + "learning_rate": "1.9953e-04", + "loss": 0.8932, + "slid_loss": 0.9341, + "step": 1390, + "time": 10.92 + }, + { + "epoch": 0.7, + "learning_rate": "1.9953e-04", + "loss": 0.8577, + "slid_loss": 0.9331, + "step": 1391, + "time": 12.58 + }, + { + "epoch": 0.7, + "learning_rate": "1.9953e-04", + "loss": 0.9915, + "slid_loss": 0.933, + "step": 1392, + "time": 11.05 + }, + { + "epoch": 0.7, + "learning_rate": "1.9953e-04", + "loss": 1.0929, + "slid_loss": 0.936, + "step": 1393, + "time": 12.81 + }, + { + "epoch": 0.7, + "learning_rate": "1.9952e-04", + "loss": 1.0756, + "slid_loss": 0.9384, + "step": 1394, + "time": 13.0 + }, + { + "epoch": 0.7, + "learning_rate": "1.9952e-04", + "loss": 0.8659, + "slid_loss": 0.9367, + "step": 1395, + "time": 12.91 + }, + { + "epoch": 0.7, + "learning_rate": "1.9952e-04", + "loss": 0.9219, + "slid_loss": 0.9352, + "step": 1396, + "time": 13.08 + }, + { + "epoch": 0.7, + "learning_rate": "1.9952e-04", + "loss": 1.0202, + "slid_loss": 0.9348, + "step": 1397, + "time": 13.35 + }, + { + "epoch": 0.7, + "learning_rate": "1.9951e-04", + "loss": 0.8728, + "slid_loss": 0.936, + "step": 1398, + "time": 12.8 + }, + { + "epoch": 0.7, + "learning_rate": "1.9951e-04", + "loss": 0.7164, + "slid_loss": 0.9337, + "step": 1399, + "time": 12.94 + }, + { + "epoch": 0.7, + "learning_rate": "1.9951e-04", + "loss": 0.8352, + "slid_loss": 0.9336, + "step": 1400, + "time": 14.17 + }, + { + "epoch": 0.7, + "learning_rate": "1.9951e-04", + "loss": 1.0367, + "slid_loss": 0.9348, + "step": 1401, + "time": 12.91 + }, + { + "epoch": 0.7, + "learning_rate": "1.9950e-04", + "loss": 0.893, + "slid_loss": 0.9349, + "step": 1402, + "time": 13.14 + }, + { + "epoch": 0.7, + "learning_rate": "1.9950e-04", + "loss": 0.9282, + "slid_loss": 0.9354, + "step": 1403, + "time": 13.86 + }, + { + "epoch": 0.7, + "learning_rate": "1.9950e-04", + "loss": 0.962, + "slid_loss": 0.9368, + "step": 1404, + "time": 13.85 + }, + { + "epoch": 0.7, + "learning_rate": "1.9950e-04", + "loss": 0.823, + "slid_loss": 0.9359, + "step": 1405, + "time": 10.94 + }, + { + "epoch": 0.7, + "learning_rate": "1.9949e-04", + "loss": 0.9737, + "slid_loss": 0.9377, + "step": 1406, + "time": 14.34 + }, + { + "epoch": 0.7, + "learning_rate": "1.9949e-04", + "loss": 0.8381, + "slid_loss": 0.9355, + "step": 1407, + "time": 12.27 + }, + { + "epoch": 0.71, + "learning_rate": "1.9949e-04", + "loss": 0.7996, + "slid_loss": 0.9333, + "step": 1408, + "time": 13.31 + }, + { + "epoch": 0.71, + "learning_rate": "1.9949e-04", + "loss": 1.0237, + "slid_loss": 0.9358, + "step": 1409, + "time": 11.75 + }, + { + "epoch": 0.71, + "learning_rate": "1.9948e-04", + "loss": 0.895, + "slid_loss": 0.9345, + "step": 1410, + "time": 13.89 + }, + { + "epoch": 0.71, + "learning_rate": "1.9948e-04", + "loss": 0.7558, + "slid_loss": 0.933, + "step": 1411, + "time": 14.0 + }, + { + "epoch": 0.71, + "learning_rate": "1.9948e-04", + "loss": 0.9651, + "slid_loss": 0.932, + "step": 1412, + "time": 11.18 + }, + { + "epoch": 0.71, + "learning_rate": "1.9948e-04", + "loss": 1.1361, + "slid_loss": 0.9342, + "step": 1413, + "time": 13.17 + }, + { + "epoch": 0.71, + "learning_rate": "1.9947e-04", + "loss": 0.8962, + "slid_loss": 0.9312, + "step": 1414, + "time": 13.16 + }, + { + "epoch": 0.71, + "learning_rate": "1.9947e-04", + "loss": 1.0973, + "slid_loss": 0.9326, + "step": 1415, + "time": 13.43 + }, + { + "epoch": 0.71, + "learning_rate": "1.9947e-04", + "loss": 0.8926, + "slid_loss": 0.9338, + "step": 1416, + "time": 12.5 + }, + { + "epoch": 0.71, + "learning_rate": "1.9947e-04", + "loss": 0.9314, + "slid_loss": 0.9341, + "step": 1417, + "time": 13.19 + }, + { + "epoch": 0.71, + "learning_rate": "1.9946e-04", + "loss": 0.8507, + "slid_loss": 0.9333, + "step": 1418, + "time": 12.06 + }, + { + "epoch": 0.71, + "learning_rate": "1.9946e-04", + "loss": 0.7083, + "slid_loss": 0.9325, + "step": 1419, + "time": 13.36 + }, + { + "epoch": 0.71, + "learning_rate": "1.9946e-04", + "loss": 0.928, + "slid_loss": 0.932, + "step": 1420, + "time": 13.73 + }, + { + "epoch": 0.71, + "learning_rate": "1.9946e-04", + "loss": 0.9936, + "slid_loss": 0.932, + "step": 1421, + "time": 11.61 + }, + { + "epoch": 0.71, + "learning_rate": "1.9945e-04", + "loss": 0.8847, + "slid_loss": 0.9313, + "step": 1422, + "time": 11.81 + }, + { + "epoch": 0.71, + "learning_rate": "1.9945e-04", + "loss": 0.9529, + "slid_loss": 0.9325, + "step": 1423, + "time": 11.25 + }, + { + "epoch": 0.71, + "learning_rate": "1.9945e-04", + "loss": 0.8202, + "slid_loss": 0.9294, + "step": 1424, + "time": 11.09 + }, + { + "epoch": 0.71, + "learning_rate": "1.9945e-04", + "loss": 0.9801, + "slid_loss": 0.9295, + "step": 1425, + "time": 12.76 + }, + { + "epoch": 0.71, + "learning_rate": "1.9944e-04", + "loss": 1.0725, + "slid_loss": 0.9319, + "step": 1426, + "time": 12.77 + }, + { + "epoch": 0.71, + "learning_rate": "1.9944e-04", + "loss": 0.9034, + "slid_loss": 0.931, + "step": 1427, + "time": 13.88 + }, + { + "epoch": 0.72, + "learning_rate": "1.9944e-04", + "loss": 0.9546, + "slid_loss": 0.9324, + "step": 1428, + "time": 10.9 + }, + { + "epoch": 0.72, + "learning_rate": "1.9944e-04", + "loss": 0.9634, + "slid_loss": 0.9344, + "step": 1429, + "time": 13.34 + }, + { + "epoch": 0.72, + "learning_rate": "1.9943e-04", + "loss": 1.0195, + "slid_loss": 0.9348, + "step": 1430, + "time": 14.09 + }, + { + "epoch": 0.72, + "learning_rate": "1.9943e-04", + "loss": 1.0249, + "slid_loss": 0.9349, + "step": 1431, + "time": 13.38 + }, + { + "epoch": 0.72, + "learning_rate": "1.9943e-04", + "loss": 0.957, + "slid_loss": 0.935, + "step": 1432, + "time": 11.23 + }, + { + "epoch": 0.72, + "learning_rate": "1.9943e-04", + "loss": 1.0221, + "slid_loss": 0.9354, + "step": 1433, + "time": 10.76 + }, + { + "epoch": 0.72, + "learning_rate": "1.9942e-04", + "loss": 0.9665, + "slid_loss": 0.9355, + "step": 1434, + "time": 11.0 + }, + { + "epoch": 0.72, + "learning_rate": "1.9942e-04", + "loss": 0.9655, + "slid_loss": 0.9359, + "step": 1435, + "time": 13.21 + }, + { + "epoch": 0.72, + "learning_rate": "1.9942e-04", + "loss": 0.6977, + "slid_loss": 0.9328, + "step": 1436, + "time": 11.0 + }, + { + "epoch": 0.72, + "learning_rate": "1.9941e-04", + "loss": 0.9761, + "slid_loss": 0.9311, + "step": 1437, + "time": 11.1 + }, + { + "epoch": 0.72, + "learning_rate": "1.9941e-04", + "loss": 0.8799, + "slid_loss": 0.9307, + "step": 1438, + "time": 12.01 + }, + { + "epoch": 0.72, + "learning_rate": "1.9941e-04", + "loss": 0.9586, + "slid_loss": 0.9307, + "step": 1439, + "time": 10.88 + }, + { + "epoch": 0.72, + "learning_rate": "1.9941e-04", + "loss": 0.8033, + "slid_loss": 0.9288, + "step": 1440, + "time": 11.67 + }, + { + "epoch": 0.72, + "learning_rate": "1.9940e-04", + "loss": 0.851, + "slid_loss": 0.9285, + "step": 1441, + "time": 13.57 + }, + { + "epoch": 0.72, + "learning_rate": "1.9940e-04", + "loss": 0.7407, + "slid_loss": 0.9253, + "step": 1442, + "time": 10.79 + }, + { + "epoch": 0.72, + "learning_rate": "1.9940e-04", + "loss": 0.9437, + "slid_loss": 0.9244, + "step": 1443, + "time": 14.29 + }, + { + "epoch": 0.72, + "learning_rate": "1.9940e-04", + "loss": 0.8681, + "slid_loss": 0.9252, + "step": 1444, + "time": 11.51 + }, + { + "epoch": 0.72, + "learning_rate": "1.9939e-04", + "loss": 0.7177, + "slid_loss": 0.9231, + "step": 1445, + "time": 10.47 + }, + { + "epoch": 0.72, + "learning_rate": "1.9939e-04", + "loss": 0.9042, + "slid_loss": 0.9223, + "step": 1446, + "time": 13.27 + }, + { + "epoch": 0.72, + "learning_rate": "1.9939e-04", + "loss": 0.7529, + "slid_loss": 0.9186, + "step": 1447, + "time": 12.06 + }, + { + "epoch": 0.73, + "learning_rate": "1.9939e-04", + "loss": 0.9792, + "slid_loss": 0.9193, + "step": 1448, + "time": 12.84 + }, + { + "epoch": 0.73, + "learning_rate": "1.9938e-04", + "loss": 0.901, + "slid_loss": 0.9175, + "step": 1449, + "time": 13.42 + }, + { + "epoch": 0.73, + "learning_rate": "1.9938e-04", + "loss": 0.8181, + "slid_loss": 0.9148, + "step": 1450, + "time": 13.23 + }, + { + "epoch": 0.73, + "learning_rate": "1.9938e-04", + "loss": 0.8266, + "slid_loss": 0.9131, + "step": 1451, + "time": 13.46 + }, + { + "epoch": 0.73, + "learning_rate": "1.9937e-04", + "loss": 0.9325, + "slid_loss": 0.9114, + "step": 1452, + "time": 12.95 + }, + { + "epoch": 0.73, + "learning_rate": "1.9937e-04", + "loss": 1.0768, + "slid_loss": 0.9122, + "step": 1453, + "time": 12.82 + }, + { + "epoch": 0.73, + "learning_rate": "1.9937e-04", + "loss": 0.9384, + "slid_loss": 0.9118, + "step": 1454, + "time": 13.39 + }, + { + "epoch": 0.73, + "learning_rate": "1.9937e-04", + "loss": 1.0132, + "slid_loss": 0.9131, + "step": 1455, + "time": 13.76 + }, + { + "epoch": 0.73, + "learning_rate": "1.9936e-04", + "loss": 0.8802, + "slid_loss": 0.9119, + "step": 1456, + "time": 11.86 + }, + { + "epoch": 0.73, + "learning_rate": "1.9936e-04", + "loss": 0.9396, + "slid_loss": 0.912, + "step": 1457, + "time": 10.82 + }, + { + "epoch": 0.73, + "learning_rate": "1.9936e-04", + "loss": 0.9155, + "slid_loss": 0.9122, + "step": 1458, + "time": 12.2 + }, + { + "epoch": 0.73, + "learning_rate": "1.9935e-04", + "loss": 1.0991, + "slid_loss": 0.9143, + "step": 1459, + "time": 10.78 + }, + { + "epoch": 0.73, + "learning_rate": "1.9935e-04", + "loss": 0.8188, + "slid_loss": 0.914, + "step": 1460, + "time": 12.56 + }, + { + "epoch": 0.73, + "learning_rate": "1.9935e-04", + "loss": 1.0984, + "slid_loss": 0.9178, + "step": 1461, + "time": 11.5 + }, + { + "epoch": 0.73, + "learning_rate": "1.9935e-04", + "loss": 0.9742, + "slid_loss": 0.9185, + "step": 1462, + "time": 13.28 + }, + { + "epoch": 0.73, + "learning_rate": "1.9934e-04", + "loss": 1.0649, + "slid_loss": 0.9197, + "step": 1463, + "time": 13.02 + }, + { + "epoch": 0.73, + "learning_rate": "1.9934e-04", + "loss": 0.9527, + "slid_loss": 0.9199, + "step": 1464, + "time": 10.74 + }, + { + "epoch": 0.73, + "learning_rate": "1.9934e-04", + "loss": 0.9588, + "slid_loss": 0.9204, + "step": 1465, + "time": 14.79 + }, + { + "epoch": 0.73, + "learning_rate": "1.9934e-04", + "loss": 0.9712, + "slid_loss": 0.9218, + "step": 1466, + "time": 10.96 + }, + { + "epoch": 0.73, + "learning_rate": "1.9933e-04", + "loss": 0.8632, + "slid_loss": 0.9211, + "step": 1467, + "time": 13.52 + }, + { + "epoch": 0.74, + "learning_rate": "1.9933e-04", + "loss": 0.8661, + "slid_loss": 0.9197, + "step": 1468, + "time": 11.05 + }, + { + "epoch": 0.74, + "learning_rate": "1.9933e-04", + "loss": 0.8336, + "slid_loss": 0.9214, + "step": 1469, + "time": 12.86 + }, + { + "epoch": 0.74, + "learning_rate": "1.9932e-04", + "loss": 0.9476, + "slid_loss": 0.9209, + "step": 1470, + "time": 13.42 + }, + { + "epoch": 0.74, + "learning_rate": "1.9932e-04", + "loss": 0.9597, + "slid_loss": 0.9215, + "step": 1471, + "time": 13.06 + }, + { + "epoch": 0.74, + "learning_rate": "1.9932e-04", + "loss": 0.9523, + "slid_loss": 0.9229, + "step": 1472, + "time": 10.85 + }, + { + "epoch": 0.74, + "learning_rate": "1.9932e-04", + "loss": 0.8893, + "slid_loss": 0.9212, + "step": 1473, + "time": 14.57 + }, + { + "epoch": 0.74, + "learning_rate": "1.9931e-04", + "loss": 0.8309, + "slid_loss": 0.9201, + "step": 1474, + "time": 11.18 + }, + { + "epoch": 0.74, + "learning_rate": "1.9931e-04", + "loss": 0.8479, + "slid_loss": 0.9187, + "step": 1475, + "time": 13.78 + }, + { + "epoch": 0.74, + "learning_rate": "1.9931e-04", + "loss": 1.0207, + "slid_loss": 0.9209, + "step": 1476, + "time": 10.73 + }, + { + "epoch": 0.74, + "learning_rate": "1.9930e-04", + "loss": 0.7394, + "slid_loss": 0.919, + "step": 1477, + "time": 13.57 + }, + { + "epoch": 0.74, + "learning_rate": "1.9930e-04", + "loss": 0.8444, + "slid_loss": 0.919, + "step": 1478, + "time": 12.89 + }, + { + "epoch": 0.74, + "learning_rate": "1.9930e-04", + "loss": 0.8974, + "slid_loss": 0.9207, + "step": 1479, + "time": 14.74 + }, + { + "epoch": 0.74, + "learning_rate": "1.9929e-04", + "loss": 0.9155, + "slid_loss": 0.92, + "step": 1480, + "time": 13.3 + }, + { + "epoch": 0.74, + "learning_rate": "1.9929e-04", + "loss": 0.7855, + "slid_loss": 0.9189, + "step": 1481, + "time": 11.91 + }, + { + "epoch": 0.74, + "learning_rate": "1.9929e-04", + "loss": 0.9455, + "slid_loss": 0.9188, + "step": 1482, + "time": 12.99 + }, + { + "epoch": 0.74, + "learning_rate": "1.9929e-04", + "loss": 0.9188, + "slid_loss": 0.9176, + "step": 1483, + "time": 11.7 + }, + { + "epoch": 0.74, + "learning_rate": "1.9928e-04", + "loss": 0.9808, + "slid_loss": 0.918, + "step": 1484, + "time": 10.89 + }, + { + "epoch": 0.74, + "learning_rate": "1.9928e-04", + "loss": 0.8722, + "slid_loss": 0.9167, + "step": 1485, + "time": 11.31 + }, + { + "epoch": 0.74, + "learning_rate": "1.9928e-04", + "loss": 0.9432, + "slid_loss": 0.9152, + "step": 1486, + "time": 11.36 + }, + { + "epoch": 0.74, + "learning_rate": "1.9927e-04", + "loss": 1.0218, + "slid_loss": 0.9177, + "step": 1487, + "time": 13.66 + }, + { + "epoch": 0.75, + "learning_rate": "1.9927e-04", + "loss": 0.7929, + "slid_loss": 0.9161, + "step": 1488, + "time": 13.0 + }, + { + "epoch": 0.75, + "learning_rate": "1.9927e-04", + "loss": 0.9477, + "slid_loss": 0.9183, + "step": 1489, + "time": 13.88 + }, + { + "epoch": 0.75, + "learning_rate": "1.9927e-04", + "loss": 0.967, + "slid_loss": 0.919, + "step": 1490, + "time": 10.82 + }, + { + "epoch": 0.75, + "learning_rate": "1.9926e-04", + "loss": 1.0034, + "slid_loss": 0.9205, + "step": 1491, + "time": 13.22 + }, + { + "epoch": 0.75, + "learning_rate": "1.9926e-04", + "loss": 1.0248, + "slid_loss": 0.9208, + "step": 1492, + "time": 11.94 + }, + { + "epoch": 0.75, + "learning_rate": "1.9926e-04", + "loss": 0.8936, + "slid_loss": 0.9188, + "step": 1493, + "time": 10.77 + }, + { + "epoch": 0.75, + "learning_rate": "1.9925e-04", + "loss": 0.9765, + "slid_loss": 0.9178, + "step": 1494, + "time": 11.74 + }, + { + "epoch": 0.75, + "learning_rate": "1.9925e-04", + "loss": 0.7625, + "slid_loss": 0.9168, + "step": 1495, + "time": 10.52 + }, + { + "epoch": 0.75, + "learning_rate": "1.9925e-04", + "loss": 0.8509, + "slid_loss": 0.9161, + "step": 1496, + "time": 13.45 + }, + { + "epoch": 0.75, + "learning_rate": "1.9924e-04", + "loss": 1.0385, + "slid_loss": 0.9163, + "step": 1497, + "time": 11.39 + }, + { + "epoch": 0.75, + "learning_rate": "1.9924e-04", + "loss": 0.89, + "slid_loss": 0.9164, + "step": 1498, + "time": 11.86 + }, + { + "epoch": 0.75, + "learning_rate": "1.9924e-04", + "loss": 0.9445, + "slid_loss": 0.9187, + "step": 1499, + "time": 13.31 + }, + { + "epoch": 0.75, + "learning_rate": "1.9923e-04", + "loss": 0.8723, + "slid_loss": 0.9191, + "step": 1500, + "time": 11.3 + }, + { + "epoch": 0.75, + "learning_rate": "1.9923e-04", + "loss": 0.9791, + "slid_loss": 0.9185, + "step": 1501, + "time": 12.95 + }, + { + "epoch": 0.75, + "learning_rate": "1.9923e-04", + "loss": 0.8779, + "slid_loss": 0.9184, + "step": 1502, + "time": 13.13 + }, + { + "epoch": 0.75, + "learning_rate": "1.9923e-04", + "loss": 1.0174, + "slid_loss": 0.9193, + "step": 1503, + "time": 13.37 + }, + { + "epoch": 0.75, + "learning_rate": "1.9922e-04", + "loss": 0.8114, + "slid_loss": 0.9177, + "step": 1504, + "time": 12.08 + }, + { + "epoch": 0.75, + "learning_rate": "1.9922e-04", + "loss": 0.8144, + "slid_loss": 0.9177, + "step": 1505, + "time": 10.69 + }, + { + "epoch": 0.75, + "learning_rate": "1.9922e-04", + "loss": 1.0348, + "slid_loss": 0.9183, + "step": 1506, + "time": 12.25 + }, + { + "epoch": 0.75, + "learning_rate": "1.9921e-04", + "loss": 0.9889, + "slid_loss": 0.9198, + "step": 1507, + "time": 13.33 + }, + { + "epoch": 0.76, + "learning_rate": "1.9921e-04", + "loss": 1.0301, + "slid_loss": 0.9221, + "step": 1508, + "time": 12.86 + }, + { + "epoch": 0.76, + "learning_rate": "1.9921e-04", + "loss": 0.8854, + "slid_loss": 0.9207, + "step": 1509, + "time": 12.84 + }, + { + "epoch": 0.76, + "learning_rate": "1.9920e-04", + "loss": 1.02, + "slid_loss": 0.922, + "step": 1510, + "time": 11.25 + }, + { + "epoch": 0.76, + "learning_rate": "1.9920e-04", + "loss": 0.9724, + "slid_loss": 0.9241, + "step": 1511, + "time": 12.85 + }, + { + "epoch": 0.76, + "learning_rate": "1.9920e-04", + "loss": 0.8073, + "slid_loss": 0.9225, + "step": 1512, + "time": 13.92 + }, + { + "epoch": 0.76, + "learning_rate": "1.9919e-04", + "loss": 0.9708, + "slid_loss": 0.9209, + "step": 1513, + "time": 11.92 + }, + { + "epoch": 0.76, + "learning_rate": "1.9919e-04", + "loss": 0.7853, + "slid_loss": 0.9198, + "step": 1514, + "time": 11.09 + }, + { + "epoch": 0.76, + "learning_rate": "1.9919e-04", + "loss": 0.9453, + "slid_loss": 0.9183, + "step": 1515, + "time": 13.55 + }, + { + "epoch": 0.76, + "learning_rate": "1.9919e-04", + "loss": 0.7971, + "slid_loss": 0.9173, + "step": 1516, + "time": 13.52 + }, + { + "epoch": 0.76, + "learning_rate": "1.9918e-04", + "loss": 0.932, + "slid_loss": 0.9173, + "step": 1517, + "time": 12.66 + }, + { + "epoch": 0.76, + "learning_rate": "1.9918e-04", + "loss": 0.9511, + "slid_loss": 0.9183, + "step": 1518, + "time": 11.33 + }, + { + "epoch": 0.76, + "learning_rate": "1.9918e-04", + "loss": 0.9017, + "slid_loss": 0.9202, + "step": 1519, + "time": 13.49 + }, + { + "epoch": 0.76, + "learning_rate": "1.9917e-04", + "loss": 0.8748, + "slid_loss": 0.9197, + "step": 1520, + "time": 14.13 + }, + { + "epoch": 0.76, + "learning_rate": "1.9917e-04", + "loss": 0.9422, + "slid_loss": 0.9192, + "step": 1521, + "time": 12.21 + }, + { + "epoch": 0.76, + "learning_rate": "1.9917e-04", + "loss": 0.8493, + "slid_loss": 0.9188, + "step": 1522, + "time": 11.29 + }, + { + "epoch": 0.76, + "learning_rate": "1.9916e-04", + "loss": 0.8821, + "slid_loss": 0.9181, + "step": 1523, + "time": 11.87 + }, + { + "epoch": 0.76, + "learning_rate": "1.9916e-04", + "loss": 0.9549, + "slid_loss": 0.9195, + "step": 1524, + "time": 13.19 + }, + { + "epoch": 0.76, + "learning_rate": "1.9916e-04", + "loss": 0.8202, + "slid_loss": 0.9179, + "step": 1525, + "time": 10.93 + }, + { + "epoch": 0.76, + "learning_rate": "1.9915e-04", + "loss": 0.8557, + "slid_loss": 0.9157, + "step": 1526, + "time": 12.81 + }, + { + "epoch": 0.76, + "learning_rate": "1.9915e-04", + "loss": 0.8954, + "slid_loss": 0.9156, + "step": 1527, + "time": 11.99 + }, + { + "epoch": 0.77, + "learning_rate": "1.9915e-04", + "loss": 0.8377, + "slid_loss": 0.9145, + "step": 1528, + "time": 11.96 + }, + { + "epoch": 0.77, + "learning_rate": "1.9914e-04", + "loss": 0.8673, + "slid_loss": 0.9135, + "step": 1529, + "time": 12.77 + }, + { + "epoch": 0.77, + "learning_rate": "1.9914e-04", + "loss": 0.838, + "slid_loss": 0.9117, + "step": 1530, + "time": 13.24 + }, + { + "epoch": 0.77, + "learning_rate": "1.9914e-04", + "loss": 1.0307, + "slid_loss": 0.9117, + "step": 1531, + "time": 13.79 + }, + { + "epoch": 0.77, + "learning_rate": "1.9913e-04", + "loss": 0.86, + "slid_loss": 0.9108, + "step": 1532, + "time": 12.31 + }, + { + "epoch": 0.77, + "learning_rate": "1.9913e-04", + "loss": 0.9378, + "slid_loss": 0.9099, + "step": 1533, + "time": 14.02 + }, + { + "epoch": 0.77, + "learning_rate": "1.9913e-04", + "loss": 0.9342, + "slid_loss": 0.9096, + "step": 1534, + "time": 14.33 + }, + { + "epoch": 0.77, + "learning_rate": "1.9912e-04", + "loss": 0.9948, + "slid_loss": 0.9099, + "step": 1535, + "time": 13.41 + }, + { + "epoch": 0.77, + "learning_rate": "1.9912e-04", + "loss": 0.9154, + "slid_loss": 0.9121, + "step": 1536, + "time": 12.52 + }, + { + "epoch": 0.77, + "learning_rate": "1.9912e-04", + "loss": 0.9249, + "slid_loss": 0.9116, + "step": 1537, + "time": 13.56 + }, + { + "epoch": 0.77, + "learning_rate": "1.9911e-04", + "loss": 0.9194, + "slid_loss": 0.912, + "step": 1538, + "time": 12.91 + }, + { + "epoch": 0.77, + "learning_rate": "1.9911e-04", + "loss": 1.0819, + "slid_loss": 0.9132, + "step": 1539, + "time": 13.57 + }, + { + "epoch": 0.77, + "learning_rate": "1.9911e-04", + "loss": 0.8186, + "slid_loss": 0.9134, + "step": 1540, + "time": 11.26 + }, + { + "epoch": 0.77, + "learning_rate": "1.9911e-04", + "loss": 0.893, + "slid_loss": 0.9138, + "step": 1541, + "time": 13.5 + }, + { + "epoch": 0.77, + "learning_rate": "1.9910e-04", + "loss": 0.962, + "slid_loss": 0.916, + "step": 1542, + "time": 13.27 + }, + { + "epoch": 0.77, + "learning_rate": "1.9910e-04", + "loss": 0.8615, + "slid_loss": 0.9152, + "step": 1543, + "time": 12.58 + }, + { + "epoch": 0.77, + "learning_rate": "1.9910e-04", + "loss": 0.8869, + "slid_loss": 0.9154, + "step": 1544, + "time": 13.79 + }, + { + "epoch": 0.77, + "learning_rate": "1.9909e-04", + "loss": 0.9629, + "slid_loss": 0.9178, + "step": 1545, + "time": 10.99 + }, + { + "epoch": 0.77, + "learning_rate": "1.9909e-04", + "loss": 0.8282, + "slid_loss": 0.917, + "step": 1546, + "time": 13.11 + }, + { + "epoch": 0.77, + "learning_rate": "1.9909e-04", + "loss": 0.9731, + "slid_loss": 0.9192, + "step": 1547, + "time": 11.36 + }, + { + "epoch": 0.78, + "learning_rate": "1.9908e-04", + "loss": 0.9308, + "slid_loss": 0.9188, + "step": 1548, + "time": 11.41 + }, + { + "epoch": 0.78, + "learning_rate": "1.9908e-04", + "loss": 0.9841, + "slid_loss": 0.9196, + "step": 1549, + "time": 13.38 + }, + { + "epoch": 0.78, + "learning_rate": "1.9908e-04", + "loss": 0.9846, + "slid_loss": 0.9213, + "step": 1550, + "time": 12.86 + }, + { + "epoch": 0.78, + "learning_rate": "1.9907e-04", + "loss": 0.8951, + "slid_loss": 0.9219, + "step": 1551, + "time": 11.38 + }, + { + "epoch": 0.78, + "learning_rate": "1.9907e-04", + "loss": 0.8355, + "slid_loss": 0.921, + "step": 1552, + "time": 12.41 + }, + { + "epoch": 0.78, + "learning_rate": "1.9907e-04", + "loss": 0.9748, + "slid_loss": 0.92, + "step": 1553, + "time": 10.99 + }, + { + "epoch": 0.78, + "learning_rate": "1.9906e-04", + "loss": 0.7802, + "slid_loss": 0.9184, + "step": 1554, + "time": 11.79 + }, + { + "epoch": 0.78, + "learning_rate": "1.9906e-04", + "loss": 1.024, + "slid_loss": 0.9185, + "step": 1555, + "time": 11.68 + }, + { + "epoch": 0.78, + "learning_rate": "1.9905e-04", + "loss": 0.8121, + "slid_loss": 0.9178, + "step": 1556, + "time": 12.25 + }, + { + "epoch": 0.78, + "learning_rate": "1.9905e-04", + "loss": 0.7831, + "slid_loss": 0.9162, + "step": 1557, + "time": 13.13 + }, + { + "epoch": 0.78, + "learning_rate": "1.9905e-04", + "loss": 0.8112, + "slid_loss": 0.9152, + "step": 1558, + "time": 12.3 + }, + { + "epoch": 0.78, + "learning_rate": "1.9904e-04", + "loss": 0.9295, + "slid_loss": 0.9135, + "step": 1559, + "time": 13.65 + }, + { + "epoch": 0.78, + "learning_rate": "1.9904e-04", + "loss": 0.8719, + "slid_loss": 0.914, + "step": 1560, + "time": 14.06 + }, + { + "epoch": 0.78, + "learning_rate": "1.9904e-04", + "loss": 0.9475, + "slid_loss": 0.9125, + "step": 1561, + "time": 13.68 + }, + { + "epoch": 0.78, + "learning_rate": "1.9903e-04", + "loss": 0.7853, + "slid_loss": 0.9106, + "step": 1562, + "time": 14.42 + }, + { + "epoch": 0.78, + "learning_rate": "1.9903e-04", + "loss": 0.9069, + "slid_loss": 0.909, + "step": 1563, + "time": 12.38 + }, + { + "epoch": 0.78, + "learning_rate": "1.9903e-04", + "loss": 1.1764, + "slid_loss": 0.9113, + "step": 1564, + "time": 11.29 + }, + { + "epoch": 0.78, + "learning_rate": "1.9902e-04", + "loss": 0.8195, + "slid_loss": 0.9099, + "step": 1565, + "time": 12.73 + }, + { + "epoch": 0.78, + "learning_rate": "1.9902e-04", + "loss": 0.7783, + "slid_loss": 0.908, + "step": 1566, + "time": 13.71 + }, + { + "epoch": 0.78, + "learning_rate": "1.9902e-04", + "loss": 1.0815, + "slid_loss": 0.9101, + "step": 1567, + "time": 12.22 + }, + { + "epoch": 0.79, + "learning_rate": "1.9901e-04", + "loss": 0.8953, + "slid_loss": 0.9104, + "step": 1568, + "time": 13.91 + }, + { + "epoch": 0.79, + "learning_rate": "1.9901e-04", + "loss": 0.9571, + "slid_loss": 0.9117, + "step": 1569, + "time": 12.02 + }, + { + "epoch": 0.79, + "learning_rate": "1.9901e-04", + "loss": 0.8514, + "slid_loss": 0.9107, + "step": 1570, + "time": 13.83 + }, + { + "epoch": 0.79, + "learning_rate": "1.9900e-04", + "loss": 0.9294, + "slid_loss": 0.9104, + "step": 1571, + "time": 12.19 + }, + { + "epoch": 0.79, + "learning_rate": "1.9900e-04", + "loss": 0.985, + "slid_loss": 0.9107, + "step": 1572, + "time": 13.29 + }, + { + "epoch": 0.79, + "learning_rate": "1.9900e-04", + "loss": 1.0733, + "slid_loss": 0.9126, + "step": 1573, + "time": 12.77 + }, + { + "epoch": 0.79, + "learning_rate": "1.9899e-04", + "loss": 1.0991, + "slid_loss": 0.9153, + "step": 1574, + "time": 13.94 + }, + { + "epoch": 0.79, + "learning_rate": "1.9899e-04", + "loss": 0.7967, + "slid_loss": 0.9147, + "step": 1575, + "time": 12.82 + }, + { + "epoch": 0.79, + "learning_rate": "1.9899e-04", + "loss": 1.0954, + "slid_loss": 0.9155, + "step": 1576, + "time": 13.27 + }, + { + "epoch": 0.79, + "learning_rate": "1.9898e-04", + "loss": 0.8703, + "slid_loss": 0.9168, + "step": 1577, + "time": 10.87 + }, + { + "epoch": 0.79, + "learning_rate": "1.9898e-04", + "loss": 0.9144, + "slid_loss": 0.9175, + "step": 1578, + "time": 13.27 + }, + { + "epoch": 0.79, + "learning_rate": "1.9898e-04", + "loss": 0.9051, + "slid_loss": 0.9176, + "step": 1579, + "time": 11.51 + }, + { + "epoch": 0.79, + "learning_rate": "1.9897e-04", + "loss": 0.8426, + "slid_loss": 0.9168, + "step": 1580, + "time": 11.32 + }, + { + "epoch": 0.79, + "learning_rate": "1.9897e-04", + "loss": 0.9281, + "slid_loss": 0.9183, + "step": 1581, + "time": 12.78 + }, + { + "epoch": 0.79, + "learning_rate": "1.9897e-04", + "loss": 0.8847, + "slid_loss": 0.9177, + "step": 1582, + "time": 11.13 + }, + { + "epoch": 0.79, + "learning_rate": "1.9896e-04", + "loss": 0.9248, + "slid_loss": 0.9177, + "step": 1583, + "time": 10.63 + }, + { + "epoch": 0.79, + "learning_rate": "1.9896e-04", + "loss": 0.6873, + "slid_loss": 0.9148, + "step": 1584, + "time": 13.67 + }, + { + "epoch": 0.79, + "learning_rate": "1.9895e-04", + "loss": 1.0243, + "slid_loss": 0.9163, + "step": 1585, + "time": 13.41 + }, + { + "epoch": 0.79, + "learning_rate": "1.9895e-04", + "loss": 1.0078, + "slid_loss": 0.917, + "step": 1586, + "time": 12.25 + }, + { + "epoch": 0.79, + "learning_rate": "1.9895e-04", + "loss": 0.7826, + "slid_loss": 0.9146, + "step": 1587, + "time": 13.12 + }, + { + "epoch": 0.8, + "learning_rate": "1.9894e-04", + "loss": 0.9176, + "slid_loss": 0.9158, + "step": 1588, + "time": 13.33 + }, + { + "epoch": 0.8, + "learning_rate": "1.9894e-04", + "loss": 1.0226, + "slid_loss": 0.9166, + "step": 1589, + "time": 12.8 + }, + { + "epoch": 0.8, + "learning_rate": "1.9894e-04", + "loss": 0.801, + "slid_loss": 0.9149, + "step": 1590, + "time": 11.47 + }, + { + "epoch": 0.8, + "learning_rate": "1.9893e-04", + "loss": 0.9781, + "slid_loss": 0.9147, + "step": 1591, + "time": 13.37 + }, + { + "epoch": 0.8, + "learning_rate": "1.9893e-04", + "loss": 0.9635, + "slid_loss": 0.914, + "step": 1592, + "time": 13.37 + }, + { + "epoch": 0.8, + "learning_rate": "1.9893e-04", + "loss": 0.9473, + "slid_loss": 0.9146, + "step": 1593, + "time": 13.29 + }, + { + "epoch": 0.8, + "learning_rate": "1.9892e-04", + "loss": 0.7933, + "slid_loss": 0.9127, + "step": 1594, + "time": 12.82 + }, + { + "epoch": 0.8, + "learning_rate": "1.9892e-04", + "loss": 1.0286, + "slid_loss": 0.9154, + "step": 1595, + "time": 13.58 + }, + { + "epoch": 0.8, + "learning_rate": "1.9891e-04", + "loss": 0.7488, + "slid_loss": 0.9144, + "step": 1596, + "time": 11.85 + }, + { + "epoch": 0.8, + "learning_rate": "1.9891e-04", + "loss": 1.0667, + "slid_loss": 0.9147, + "step": 1597, + "time": 13.73 + }, + { + "epoch": 0.8, + "learning_rate": "1.9891e-04", + "loss": 0.758, + "slid_loss": 0.9133, + "step": 1598, + "time": 12.52 + }, + { + "epoch": 0.8, + "learning_rate": "1.9890e-04", + "loss": 0.8493, + "slid_loss": 0.9124, + "step": 1599, + "time": 10.56 + }, + { + "epoch": 0.8, + "learning_rate": "1.9890e-04", + "loss": 0.886, + "slid_loss": 0.9125, + "step": 1600, + "time": 13.0 + }, + { + "epoch": 0.8, + "learning_rate": "1.9890e-04", + "loss": 0.7306, + "slid_loss": 0.91, + "step": 1601, + "time": 13.75 + }, + { + "epoch": 0.8, + "learning_rate": "1.9889e-04", + "loss": 0.8755, + "slid_loss": 0.91, + "step": 1602, + "time": 11.25 + }, + { + "epoch": 0.8, + "learning_rate": "1.9889e-04", + "loss": 0.9486, + "slid_loss": 0.9093, + "step": 1603, + "time": 10.59 + }, + { + "epoch": 0.8, + "learning_rate": "1.9889e-04", + "loss": 0.7408, + "slid_loss": 0.9086, + "step": 1604, + "time": 12.73 + }, + { + "epoch": 0.8, + "learning_rate": "1.9888e-04", + "loss": 1.0027, + "slid_loss": 0.9105, + "step": 1605, + "time": 12.78 + }, + { + "epoch": 0.8, + "learning_rate": "1.9888e-04", + "loss": 0.9719, + "slid_loss": 0.9099, + "step": 1606, + "time": 11.37 + }, + { + "epoch": 0.8, + "learning_rate": "1.9887e-04", + "loss": 0.7426, + "slid_loss": 0.9074, + "step": 1607, + "time": 12.96 + }, + { + "epoch": 0.81, + "learning_rate": "1.9887e-04", + "loss": 0.8153, + "slid_loss": 0.9053, + "step": 1608, + "time": 12.88 + }, + { + "epoch": 0.81, + "learning_rate": "1.9887e-04", + "loss": 1.0991, + "slid_loss": 0.9074, + "step": 1609, + "time": 13.28 + }, + { + "epoch": 0.81, + "learning_rate": "1.9886e-04", + "loss": 0.9823, + "slid_loss": 0.907, + "step": 1610, + "time": 11.75 + }, + { + "epoch": 0.81, + "learning_rate": "1.9886e-04", + "loss": 0.9077, + "slid_loss": 0.9064, + "step": 1611, + "time": 12.86 + }, + { + "epoch": 0.81, + "learning_rate": "1.9886e-04", + "loss": 1.0195, + "slid_loss": 0.9085, + "step": 1612, + "time": 12.63 + }, + { + "epoch": 0.81, + "learning_rate": "1.9885e-04", + "loss": 0.8187, + "slid_loss": 0.907, + "step": 1613, + "time": 11.85 + }, + { + "epoch": 0.81, + "learning_rate": "1.9885e-04", + "loss": 0.9876, + "slid_loss": 0.909, + "step": 1614, + "time": 14.03 + }, + { + "epoch": 0.81, + "learning_rate": "1.9884e-04", + "loss": 0.9962, + "slid_loss": 0.9095, + "step": 1615, + "time": 13.16 + }, + { + "epoch": 0.81, + "learning_rate": "1.9884e-04", + "loss": 0.9679, + "slid_loss": 0.9112, + "step": 1616, + "time": 14.1 + }, + { + "epoch": 0.81, + "learning_rate": "1.9884e-04", + "loss": 1.0664, + "slid_loss": 0.9126, + "step": 1617, + "time": 13.98 + }, + { + "epoch": 0.81, + "learning_rate": "1.9883e-04", + "loss": 1.0561, + "slid_loss": 0.9136, + "step": 1618, + "time": 13.41 + }, + { + "epoch": 0.81, + "learning_rate": "1.9883e-04", + "loss": 0.8456, + "slid_loss": 0.9131, + "step": 1619, + "time": 13.16 + }, + { + "epoch": 0.81, + "learning_rate": "1.9883e-04", + "loss": 0.8341, + "slid_loss": 0.9127, + "step": 1620, + "time": 13.83 + }, + { + "epoch": 0.81, + "learning_rate": "1.9882e-04", + "loss": 0.9852, + "slid_loss": 0.9131, + "step": 1621, + "time": 13.21 + }, + { + "epoch": 0.81, + "learning_rate": "1.9882e-04", + "loss": 0.8771, + "slid_loss": 0.9134, + "step": 1622, + "time": 13.48 + }, + { + "epoch": 0.81, + "learning_rate": "1.9881e-04", + "loss": 0.9924, + "slid_loss": 0.9145, + "step": 1623, + "time": 10.61 + }, + { + "epoch": 0.81, + "learning_rate": "1.9881e-04", + "loss": 0.783, + "slid_loss": 0.9127, + "step": 1624, + "time": 13.34 + }, + { + "epoch": 0.81, + "learning_rate": "1.9881e-04", + "loss": 1.0047, + "slid_loss": 0.9146, + "step": 1625, + "time": 11.64 + }, + { + "epoch": 0.81, + "learning_rate": "1.9880e-04", + "loss": 0.7387, + "slid_loss": 0.9134, + "step": 1626, + "time": 13.23 + }, + { + "epoch": 0.81, + "learning_rate": "1.9880e-04", + "loss": 0.9037, + "slid_loss": 0.9135, + "step": 1627, + "time": 11.79 + }, + { + "epoch": 0.82, + "learning_rate": "1.9880e-04", + "loss": 1.2352, + "slid_loss": 0.9175, + "step": 1628, + "time": 13.32 + }, + { + "epoch": 0.82, + "learning_rate": "1.9879e-04", + "loss": 0.9618, + "slid_loss": 0.9184, + "step": 1629, + "time": 11.51 + }, + { + "epoch": 0.82, + "learning_rate": "1.9879e-04", + "loss": 0.9013, + "slid_loss": 0.9191, + "step": 1630, + "time": 14.31 + }, + { + "epoch": 0.82, + "learning_rate": "1.9878e-04", + "loss": 0.8882, + "slid_loss": 0.9176, + "step": 1631, + "time": 10.9 + }, + { + "epoch": 0.82, + "learning_rate": "1.9878e-04", + "loss": 0.6489, + "slid_loss": 0.9155, + "step": 1632, + "time": 11.26 + }, + { + "epoch": 0.82, + "learning_rate": "1.9878e-04", + "loss": 0.8595, + "slid_loss": 0.9147, + "step": 1633, + "time": 11.25 + }, + { + "epoch": 0.82, + "learning_rate": "1.9877e-04", + "loss": 0.8493, + "slid_loss": 0.9139, + "step": 1634, + "time": 13.31 + }, + { + "epoch": 0.82, + "learning_rate": "1.9877e-04", + "loss": 0.87, + "slid_loss": 0.9126, + "step": 1635, + "time": 13.85 + }, + { + "epoch": 0.82, + "learning_rate": "1.9877e-04", + "loss": 0.7813, + "slid_loss": 0.9113, + "step": 1636, + "time": 12.04 + }, + { + "epoch": 0.82, + "learning_rate": "1.9876e-04", + "loss": 0.8402, + "slid_loss": 0.9105, + "step": 1637, + "time": 13.67 + }, + { + "epoch": 0.82, + "learning_rate": "1.9876e-04", + "loss": 0.8573, + "slid_loss": 0.9098, + "step": 1638, + "time": 12.06 + }, + { + "epoch": 0.82, + "learning_rate": "1.9875e-04", + "loss": 1.0337, + "slid_loss": 0.9093, + "step": 1639, + "time": 13.65 + }, + { + "epoch": 0.82, + "learning_rate": "1.9875e-04", + "loss": 0.9316, + "slid_loss": 0.9105, + "step": 1640, + "time": 10.86 + }, + { + "epoch": 0.82, + "learning_rate": "1.9875e-04", + "loss": 0.9285, + "slid_loss": 0.9108, + "step": 1641, + "time": 11.36 + }, + { + "epoch": 0.82, + "learning_rate": "1.9874e-04", + "loss": 0.847, + "slid_loss": 0.9097, + "step": 1642, + "time": 10.87 + }, + { + "epoch": 0.82, + "learning_rate": "1.9874e-04", + "loss": 0.8899, + "slid_loss": 0.91, + "step": 1643, + "time": 11.46 + }, + { + "epoch": 0.82, + "learning_rate": "1.9873e-04", + "loss": 1.0183, + "slid_loss": 0.9113, + "step": 1644, + "time": 13.39 + }, + { + "epoch": 0.82, + "learning_rate": "1.9873e-04", + "loss": 0.8866, + "slid_loss": 0.9105, + "step": 1645, + "time": 10.89 + }, + { + "epoch": 0.82, + "learning_rate": "1.9873e-04", + "loss": 1.0615, + "slid_loss": 0.9128, + "step": 1646, + "time": 10.76 + }, + { + "epoch": 0.82, + "learning_rate": "1.9872e-04", + "loss": 0.8384, + "slid_loss": 0.9115, + "step": 1647, + "time": 13.38 + }, + { + "epoch": 0.83, + "learning_rate": "1.9872e-04", + "loss": 0.9874, + "slid_loss": 0.9121, + "step": 1648, + "time": 11.02 + }, + { + "epoch": 0.83, + "learning_rate": "1.9871e-04", + "loss": 0.9521, + "slid_loss": 0.9117, + "step": 1649, + "time": 11.19 + }, + { + "epoch": 0.83, + "learning_rate": "1.9871e-04", + "loss": 0.7163, + "slid_loss": 0.9091, + "step": 1650, + "time": 10.4 + }, + { + "epoch": 0.83, + "learning_rate": "1.9871e-04", + "loss": 0.8406, + "slid_loss": 0.9085, + "step": 1651, + "time": 13.29 + }, + { + "epoch": 0.83, + "learning_rate": "1.9870e-04", + "loss": 0.9976, + "slid_loss": 0.9101, + "step": 1652, + "time": 10.85 + }, + { + "epoch": 0.83, + "learning_rate": "1.9870e-04", + "loss": 0.9412, + "slid_loss": 0.9098, + "step": 1653, + "time": 11.48 + }, + { + "epoch": 0.83, + "learning_rate": "1.9869e-04", + "loss": 0.8861, + "slid_loss": 0.9109, + "step": 1654, + "time": 11.83 + }, + { + "epoch": 0.83, + "learning_rate": "1.9869e-04", + "loss": 0.8336, + "slid_loss": 0.909, + "step": 1655, + "time": 12.73 + }, + { + "epoch": 0.83, + "learning_rate": "1.9869e-04", + "loss": 0.8648, + "slid_loss": 0.9095, + "step": 1656, + "time": 13.67 + }, + { + "epoch": 0.83, + "learning_rate": "1.9868e-04", + "loss": 0.8701, + "slid_loss": 0.9104, + "step": 1657, + "time": 12.45 + }, + { + "epoch": 0.83, + "learning_rate": "1.9868e-04", + "loss": 1.0138, + "slid_loss": 0.9124, + "step": 1658, + "time": 13.18 + }, + { + "epoch": 0.83, + "learning_rate": "1.9867e-04", + "loss": 0.9683, + "slid_loss": 0.9128, + "step": 1659, + "time": 13.26 + }, + { + "epoch": 0.83, + "learning_rate": "1.9867e-04", + "loss": 0.939, + "slid_loss": 0.9134, + "step": 1660, + "time": 12.91 + }, + { + "epoch": 0.83, + "learning_rate": "1.9867e-04", + "loss": 0.9672, + "slid_loss": 0.9136, + "step": 1661, + "time": 13.01 + }, + { + "epoch": 0.83, + "learning_rate": "1.9866e-04", + "loss": 0.9944, + "slid_loss": 0.9157, + "step": 1662, + "time": 13.49 + }, + { + "epoch": 0.83, + "learning_rate": "1.9866e-04", + "loss": 1.0, + "slid_loss": 0.9167, + "step": 1663, + "time": 13.56 + }, + { + "epoch": 0.83, + "learning_rate": "1.9865e-04", + "loss": 0.8752, + "slid_loss": 0.9137, + "step": 1664, + "time": 12.24 + }, + { + "epoch": 0.83, + "learning_rate": "1.9865e-04", + "loss": 0.9041, + "slid_loss": 0.9145, + "step": 1665, + "time": 11.86 + }, + { + "epoch": 0.83, + "learning_rate": "1.9865e-04", + "loss": 0.8783, + "slid_loss": 0.9155, + "step": 1666, + "time": 11.68 + }, + { + "epoch": 0.83, + "learning_rate": "1.9864e-04", + "loss": 0.8341, + "slid_loss": 0.913, + "step": 1667, + "time": 11.81 + }, + { + "epoch": 0.84, + "learning_rate": "1.9864e-04", + "loss": 0.9494, + "slid_loss": 0.9136, + "step": 1668, + "time": 11.75 + }, + { + "epoch": 0.84, + "learning_rate": "1.9863e-04", + "loss": 0.8966, + "slid_loss": 0.913, + "step": 1669, + "time": 13.37 + }, + { + "epoch": 0.84, + "learning_rate": "1.9863e-04", + "loss": 0.9768, + "slid_loss": 0.9142, + "step": 1670, + "time": 12.79 + }, + { + "epoch": 0.84, + "learning_rate": "1.9863e-04", + "loss": 0.902, + "slid_loss": 0.9139, + "step": 1671, + "time": 10.99 + }, + { + "epoch": 0.84, + "learning_rate": "1.9862e-04", + "loss": 0.9837, + "slid_loss": 0.9139, + "step": 1672, + "time": 11.3 + }, + { + "epoch": 0.84, + "learning_rate": "1.9862e-04", + "loss": 0.9503, + "slid_loss": 0.9127, + "step": 1673, + "time": 12.54 + }, + { + "epoch": 0.84, + "learning_rate": "1.9861e-04", + "loss": 0.8606, + "slid_loss": 0.9103, + "step": 1674, + "time": 12.23 + }, + { + "epoch": 0.84, + "learning_rate": "1.9861e-04", + "loss": 0.8586, + "slid_loss": 0.9109, + "step": 1675, + "time": 12.15 + }, + { + "epoch": 0.84, + "learning_rate": "1.9861e-04", + "loss": 0.9359, + "slid_loss": 0.9093, + "step": 1676, + "time": 13.39 + }, + { + "epoch": 0.84, + "learning_rate": "1.9860e-04", + "loss": 1.0444, + "slid_loss": 0.9111, + "step": 1677, + "time": 13.3 + }, + { + "epoch": 0.84, + "learning_rate": "1.9860e-04", + "loss": 0.9419, + "slid_loss": 0.9114, + "step": 1678, + "time": 11.39 + }, + { + "epoch": 0.84, + "learning_rate": "1.9859e-04", + "loss": 0.9236, + "slid_loss": 0.9115, + "step": 1679, + "time": 11.51 + }, + { + "epoch": 0.84, + "learning_rate": "1.9859e-04", + "loss": 0.7807, + "slid_loss": 0.9109, + "step": 1680, + "time": 13.59 + }, + { + "epoch": 0.84, + "learning_rate": "1.9859e-04", + "loss": 0.8506, + "slid_loss": 0.9101, + "step": 1681, + "time": 12.25 + }, + { + "epoch": 0.84, + "learning_rate": "1.9858e-04", + "loss": 0.8, + "slid_loss": 0.9093, + "step": 1682, + "time": 13.19 + }, + { + "epoch": 0.84, + "learning_rate": "1.9858e-04", + "loss": 0.8508, + "slid_loss": 0.9086, + "step": 1683, + "time": 13.1 + }, + { + "epoch": 0.84, + "learning_rate": "1.9857e-04", + "loss": 1.0426, + "slid_loss": 0.9121, + "step": 1684, + "time": 13.55 + }, + { + "epoch": 0.84, + "learning_rate": "1.9857e-04", + "loss": 0.916, + "slid_loss": 0.911, + "step": 1685, + "time": 11.26 + }, + { + "epoch": 0.84, + "learning_rate": "1.9856e-04", + "loss": 0.9572, + "slid_loss": 0.9105, + "step": 1686, + "time": 14.5 + }, + { + "epoch": 0.84, + "learning_rate": "1.9856e-04", + "loss": 1.0035, + "slid_loss": 0.9127, + "step": 1687, + "time": 11.1 + }, + { + "epoch": 0.85, + "learning_rate": "1.9856e-04", + "loss": 1.0541, + "slid_loss": 0.9141, + "step": 1688, + "time": 12.02 + }, + { + "epoch": 0.85, + "learning_rate": "1.9855e-04", + "loss": 0.8641, + "slid_loss": 0.9125, + "step": 1689, + "time": 11.16 + }, + { + "epoch": 0.85, + "learning_rate": "1.9855e-04", + "loss": 0.8366, + "slid_loss": 0.9129, + "step": 1690, + "time": 12.89 + }, + { + "epoch": 0.85, + "learning_rate": "1.9854e-04", + "loss": 0.9306, + "slid_loss": 0.9124, + "step": 1691, + "time": 11.88 + }, + { + "epoch": 0.85, + "learning_rate": "1.9854e-04", + "loss": 0.8591, + "slid_loss": 0.9113, + "step": 1692, + "time": 13.62 + }, + { + "epoch": 0.85, + "learning_rate": "1.9854e-04", + "loss": 0.8387, + "slid_loss": 0.9103, + "step": 1693, + "time": 11.37 + }, + { + "epoch": 0.85, + "learning_rate": "1.9853e-04", + "loss": 0.9149, + "slid_loss": 0.9115, + "step": 1694, + "time": 14.22 + }, + { + "epoch": 0.85, + "learning_rate": "1.9853e-04", + "loss": 0.8668, + "slid_loss": 0.9099, + "step": 1695, + "time": 11.33 + }, + { + "epoch": 0.85, + "learning_rate": "1.9852e-04", + "loss": 0.8479, + "slid_loss": 0.9108, + "step": 1696, + "time": 12.8 + }, + { + "epoch": 0.85, + "learning_rate": "1.9852e-04", + "loss": 0.9552, + "slid_loss": 0.9097, + "step": 1697, + "time": 11.47 + }, + { + "epoch": 0.85, + "learning_rate": "1.9851e-04", + "loss": 1.0582, + "slid_loss": 0.9127, + "step": 1698, + "time": 12.73 + }, + { + "epoch": 0.85, + "learning_rate": "1.9851e-04", + "loss": 0.9981, + "slid_loss": 0.9142, + "step": 1699, + "time": 11.31 + }, + { + "epoch": 0.85, + "learning_rate": "1.9851e-04", + "loss": 1.1137, + "slid_loss": 0.9165, + "step": 1700, + "time": 13.4 + }, + { + "epoch": 0.85, + "learning_rate": "1.9850e-04", + "loss": 0.8686, + "slid_loss": 0.9179, + "step": 1701, + "time": 13.21 + }, + { + "epoch": 0.85, + "learning_rate": "1.9850e-04", + "loss": 0.873, + "slid_loss": 0.9179, + "step": 1702, + "time": 12.9 + }, + { + "epoch": 0.85, + "learning_rate": "1.9849e-04", + "loss": 0.8427, + "slid_loss": 0.9168, + "step": 1703, + "time": 11.1 + }, + { + "epoch": 0.85, + "learning_rate": "1.9849e-04", + "loss": 0.8309, + "slid_loss": 0.9177, + "step": 1704, + "time": 12.85 + }, + { + "epoch": 0.85, + "learning_rate": "1.9848e-04", + "loss": 1.0159, + "slid_loss": 0.9178, + "step": 1705, + "time": 12.1 + }, + { + "epoch": 0.85, + "learning_rate": "1.9848e-04", + "loss": 0.7827, + "slid_loss": 0.9159, + "step": 1706, + "time": 11.09 + }, + { + "epoch": 0.85, + "learning_rate": "1.9848e-04", + "loss": 1.1325, + "slid_loss": 0.9198, + "step": 1707, + "time": 13.61 + }, + { + "epoch": 0.86, + "learning_rate": "1.9847e-04", + "loss": 0.8395, + "slid_loss": 0.9201, + "step": 1708, + "time": 14.16 + }, + { + "epoch": 0.86, + "learning_rate": "1.9847e-04", + "loss": 0.9621, + "slid_loss": 0.9187, + "step": 1709, + "time": 11.64 + }, + { + "epoch": 0.86, + "learning_rate": "1.9846e-04", + "loss": 1.0682, + "slid_loss": 0.9196, + "step": 1710, + "time": 12.28 + }, + { + "epoch": 0.86, + "learning_rate": "1.9846e-04", + "loss": 0.7275, + "slid_loss": 0.9178, + "step": 1711, + "time": 10.66 + }, + { + "epoch": 0.86, + "learning_rate": "1.9845e-04", + "loss": 1.0017, + "slid_loss": 0.9176, + "step": 1712, + "time": 11.93 + }, + { + "epoch": 0.86, + "learning_rate": "1.9845e-04", + "loss": 0.9339, + "slid_loss": 0.9187, + "step": 1713, + "time": 12.94 + }, + { + "epoch": 0.86, + "learning_rate": "1.9845e-04", + "loss": 1.0217, + "slid_loss": 0.9191, + "step": 1714, + "time": 14.01 + }, + { + "epoch": 0.86, + "learning_rate": "1.9844e-04", + "loss": 0.9043, + "slid_loss": 0.9182, + "step": 1715, + "time": 11.58 + }, + { + "epoch": 0.86, + "learning_rate": "1.9844e-04", + "loss": 1.0141, + "slid_loss": 0.9186, + "step": 1716, + "time": 13.86 + }, + { + "epoch": 0.86, + "learning_rate": "1.9843e-04", + "loss": 0.8444, + "slid_loss": 0.9164, + "step": 1717, + "time": 14.2 + }, + { + "epoch": 0.86, + "learning_rate": "1.9843e-04", + "loss": 0.8666, + "slid_loss": 0.9145, + "step": 1718, + "time": 13.4 + }, + { + "epoch": 0.86, + "learning_rate": "1.9842e-04", + "loss": 0.8385, + "slid_loss": 0.9144, + "step": 1719, + "time": 12.94 + }, + { + "epoch": 0.86, + "learning_rate": "1.9842e-04", + "loss": 0.6879, + "slid_loss": 0.913, + "step": 1720, + "time": 12.8 + }, + { + "epoch": 0.86, + "learning_rate": "1.9842e-04", + "loss": 0.8938, + "slid_loss": 0.9121, + "step": 1721, + "time": 11.31 + }, + { + "epoch": 0.86, + "learning_rate": "1.9841e-04", + "loss": 0.7837, + "slid_loss": 0.9111, + "step": 1722, + "time": 12.04 + }, + { + "epoch": 0.86, + "learning_rate": "1.9841e-04", + "loss": 0.8206, + "slid_loss": 0.9094, + "step": 1723, + "time": 12.8 + }, + { + "epoch": 0.86, + "learning_rate": "1.9840e-04", + "loss": 0.9268, + "slid_loss": 0.9108, + "step": 1724, + "time": 12.91 + }, + { + "epoch": 0.86, + "learning_rate": "1.9840e-04", + "loss": 0.9498, + "slid_loss": 0.9103, + "step": 1725, + "time": 11.28 + }, + { + "epoch": 0.86, + "learning_rate": "1.9839e-04", + "loss": 0.8197, + "slid_loss": 0.9111, + "step": 1726, + "time": 11.29 + }, + { + "epoch": 0.86, + "learning_rate": "1.9839e-04", + "loss": 0.6435, + "slid_loss": 0.9085, + "step": 1727, + "time": 13.05 + }, + { + "epoch": 0.87, + "learning_rate": "1.9838e-04", + "loss": 1.0693, + "slid_loss": 0.9068, + "step": 1728, + "time": 10.97 + }, + { + "epoch": 0.87, + "learning_rate": "1.9838e-04", + "loss": 1.0131, + "slid_loss": 0.9074, + "step": 1729, + "time": 12.5 + }, + { + "epoch": 0.87, + "learning_rate": "1.9838e-04", + "loss": 0.9467, + "slid_loss": 0.9078, + "step": 1730, + "time": 11.61 + }, + { + "epoch": 0.87, + "learning_rate": "1.9837e-04", + "loss": 0.9088, + "slid_loss": 0.908, + "step": 1731, + "time": 13.76 + }, + { + "epoch": 0.87, + "learning_rate": "1.9837e-04", + "loss": 0.8796, + "slid_loss": 0.9103, + "step": 1732, + "time": 13.36 + }, + { + "epoch": 0.87, + "learning_rate": "1.9836e-04", + "loss": 0.9241, + "slid_loss": 0.911, + "step": 1733, + "time": 13.72 + }, + { + "epoch": 0.87, + "learning_rate": "1.9836e-04", + "loss": 0.7605, + "slid_loss": 0.9101, + "step": 1734, + "time": 12.29 + }, + { + "epoch": 0.87, + "learning_rate": "1.9835e-04", + "loss": 0.8182, + "slid_loss": 0.9096, + "step": 1735, + "time": 13.26 + }, + { + "epoch": 0.87, + "learning_rate": "1.9835e-04", + "loss": 1.0216, + "slid_loss": 0.912, + "step": 1736, + "time": 13.53 + }, + { + "epoch": 0.87, + "learning_rate": "1.9834e-04", + "loss": 0.7488, + "slid_loss": 0.9111, + "step": 1737, + "time": 13.13 + }, + { + "epoch": 0.87, + "learning_rate": "1.9834e-04", + "loss": 0.8488, + "slid_loss": 0.911, + "step": 1738, + "time": 13.69 + }, + { + "epoch": 0.87, + "learning_rate": "1.9834e-04", + "loss": 0.9541, + "slid_loss": 0.9102, + "step": 1739, + "time": 11.69 + }, + { + "epoch": 0.87, + "learning_rate": "1.9833e-04", + "loss": 0.8527, + "slid_loss": 0.9094, + "step": 1740, + "time": 11.43 + }, + { + "epoch": 0.87, + "learning_rate": "1.9833e-04", + "loss": 0.8931, + "slid_loss": 0.909, + "step": 1741, + "time": 13.62 + }, + { + "epoch": 0.87, + "learning_rate": "1.9832e-04", + "loss": 0.8279, + "slid_loss": 0.9088, + "step": 1742, + "time": 13.05 + }, + { + "epoch": 0.87, + "learning_rate": "1.9832e-04", + "loss": 0.886, + "slid_loss": 0.9088, + "step": 1743, + "time": 12.21 + }, + { + "epoch": 0.87, + "learning_rate": "1.9831e-04", + "loss": 0.8431, + "slid_loss": 0.9071, + "step": 1744, + "time": 11.49 + }, + { + "epoch": 0.87, + "learning_rate": "1.9831e-04", + "loss": 0.9471, + "slid_loss": 0.9077, + "step": 1745, + "time": 13.9 + }, + { + "epoch": 0.87, + "learning_rate": "1.9830e-04", + "loss": 1.0888, + "slid_loss": 0.9079, + "step": 1746, + "time": 13.03 + }, + { + "epoch": 0.87, + "learning_rate": "1.9830e-04", + "loss": 0.875, + "slid_loss": 0.9083, + "step": 1747, + "time": 11.47 + }, + { + "epoch": 0.88, + "learning_rate": "1.9830e-04", + "loss": 1.0559, + "slid_loss": 0.909, + "step": 1748, + "time": 12.04 + }, + { + "epoch": 0.88, + "learning_rate": "1.9829e-04", + "loss": 0.9947, + "slid_loss": 0.9094, + "step": 1749, + "time": 11.61 + }, + { + "epoch": 0.88, + "learning_rate": "1.9829e-04", + "loss": 1.0246, + "slid_loss": 0.9125, + "step": 1750, + "time": 12.23 + }, + { + "epoch": 0.88, + "learning_rate": "1.9828e-04", + "loss": 0.8438, + "slid_loss": 0.9125, + "step": 1751, + "time": 13.09 + }, + { + "epoch": 0.88, + "learning_rate": "1.9828e-04", + "loss": 0.9163, + "slid_loss": 0.9117, + "step": 1752, + "time": 13.34 + }, + { + "epoch": 0.88, + "learning_rate": "1.9827e-04", + "loss": 0.7222, + "slid_loss": 0.9095, + "step": 1753, + "time": 11.29 + }, + { + "epoch": 0.88, + "learning_rate": "1.9827e-04", + "loss": 0.9022, + "slid_loss": 0.9097, + "step": 1754, + "time": 11.27 + }, + { + "epoch": 0.88, + "learning_rate": "1.9826e-04", + "loss": 0.9538, + "slid_loss": 0.9109, + "step": 1755, + "time": 11.07 + }, + { + "epoch": 0.88, + "learning_rate": "1.9826e-04", + "loss": 0.776, + "slid_loss": 0.91, + "step": 1756, + "time": 13.96 + }, + { + "epoch": 0.88, + "learning_rate": "1.9825e-04", + "loss": 1.0473, + "slid_loss": 0.9118, + "step": 1757, + "time": 13.24 + }, + { + "epoch": 0.88, + "learning_rate": "1.9825e-04", + "loss": 0.8419, + "slid_loss": 0.91, + "step": 1758, + "time": 13.56 + }, + { + "epoch": 0.88, + "learning_rate": "1.9825e-04", + "loss": 0.8436, + "slid_loss": 0.9088, + "step": 1759, + "time": 13.34 + }, + { + "epoch": 0.88, + "learning_rate": "1.9824e-04", + "loss": 0.8915, + "slid_loss": 0.9083, + "step": 1760, + "time": 11.98 + }, + { + "epoch": 0.88, + "learning_rate": "1.9824e-04", + "loss": 1.0054, + "slid_loss": 0.9087, + "step": 1761, + "time": 13.25 + }, + { + "epoch": 0.88, + "learning_rate": "1.9823e-04", + "loss": 0.724, + "slid_loss": 0.906, + "step": 1762, + "time": 11.35 + }, + { + "epoch": 0.88, + "learning_rate": "1.9823e-04", + "loss": 0.8594, + "slid_loss": 0.9046, + "step": 1763, + "time": 10.66 + }, + { + "epoch": 0.88, + "learning_rate": "1.9822e-04", + "loss": 0.9712, + "slid_loss": 0.9056, + "step": 1764, + "time": 11.58 + }, + { + "epoch": 0.88, + "learning_rate": "1.9822e-04", + "loss": 0.9153, + "slid_loss": 0.9057, + "step": 1765, + "time": 11.9 + }, + { + "epoch": 0.88, + "learning_rate": "1.9821e-04", + "loss": 0.8814, + "slid_loss": 0.9057, + "step": 1766, + "time": 11.11 + }, + { + "epoch": 0.88, + "learning_rate": "1.9821e-04", + "loss": 0.866, + "slid_loss": 0.906, + "step": 1767, + "time": 11.23 + }, + { + "epoch": 0.89, + "learning_rate": "1.9820e-04", + "loss": 1.0348, + "slid_loss": 0.9069, + "step": 1768, + "time": 10.66 + }, + { + "epoch": 0.89, + "learning_rate": "1.9820e-04", + "loss": 0.9371, + "slid_loss": 0.9073, + "step": 1769, + "time": 12.93 + }, + { + "epoch": 0.89, + "learning_rate": "1.9819e-04", + "loss": 0.857, + "slid_loss": 0.9061, + "step": 1770, + "time": 11.41 + }, + { + "epoch": 0.89, + "learning_rate": "1.9819e-04", + "loss": 0.8378, + "slid_loss": 0.9054, + "step": 1771, + "time": 11.93 + }, + { + "epoch": 0.89, + "learning_rate": "1.9819e-04", + "loss": 0.9476, + "slid_loss": 0.9051, + "step": 1772, + "time": 13.35 + }, + { + "epoch": 0.89, + "learning_rate": "1.9818e-04", + "loss": 0.8365, + "slid_loss": 0.9039, + "step": 1773, + "time": 11.34 + }, + { + "epoch": 0.89, + "learning_rate": "1.9818e-04", + "loss": 0.9432, + "slid_loss": 0.9048, + "step": 1774, + "time": 14.53 + }, + { + "epoch": 0.89, + "learning_rate": "1.9817e-04", + "loss": 0.8665, + "slid_loss": 0.9048, + "step": 1775, + "time": 10.49 + }, + { + "epoch": 0.89, + "learning_rate": "1.9817e-04", + "loss": 0.8107, + "slid_loss": 0.9036, + "step": 1776, + "time": 12.42 + }, + { + "epoch": 0.89, + "learning_rate": "1.9816e-04", + "loss": 0.9632, + "slid_loss": 0.9028, + "step": 1777, + "time": 12.78 + }, + { + "epoch": 0.89, + "learning_rate": "1.9816e-04", + "loss": 1.0084, + "slid_loss": 0.9034, + "step": 1778, + "time": 11.37 + }, + { + "epoch": 0.89, + "learning_rate": "1.9815e-04", + "loss": 0.9142, + "slid_loss": 0.9033, + "step": 1779, + "time": 12.12 + }, + { + "epoch": 0.89, + "learning_rate": "1.9815e-04", + "loss": 0.8774, + "slid_loss": 0.9043, + "step": 1780, + "time": 13.83 + }, + { + "epoch": 0.89, + "learning_rate": "1.9814e-04", + "loss": 0.8626, + "slid_loss": 0.9044, + "step": 1781, + "time": 11.02 + }, + { + "epoch": 0.89, + "learning_rate": "1.9814e-04", + "loss": 0.8882, + "slid_loss": 0.9053, + "step": 1782, + "time": 11.82 + }, + { + "epoch": 0.89, + "learning_rate": "1.9813e-04", + "loss": 1.0396, + "slid_loss": 0.9072, + "step": 1783, + "time": 12.27 + }, + { + "epoch": 0.89, + "learning_rate": "1.9813e-04", + "loss": 1.0379, + "slid_loss": 0.9072, + "step": 1784, + "time": 11.18 + }, + { + "epoch": 0.89, + "learning_rate": "1.9812e-04", + "loss": 0.8106, + "slid_loss": 0.9061, + "step": 1785, + "time": 13.44 + }, + { + "epoch": 0.89, + "learning_rate": "1.9812e-04", + "loss": 0.7467, + "slid_loss": 0.904, + "step": 1786, + "time": 11.52 + }, + { + "epoch": 0.89, + "learning_rate": "1.9811e-04", + "loss": 0.9223, + "slid_loss": 0.9032, + "step": 1787, + "time": 10.67 + }, + { + "epoch": 0.9, + "learning_rate": "1.9811e-04", + "loss": 0.8204, + "slid_loss": 0.9008, + "step": 1788, + "time": 13.58 + }, + { + "epoch": 0.9, + "learning_rate": "1.9811e-04", + "loss": 0.7896, + "slid_loss": 0.9001, + "step": 1789, + "time": 13.66 + }, + { + "epoch": 0.9, + "learning_rate": "1.9810e-04", + "loss": 0.9312, + "slid_loss": 0.901, + "step": 1790, + "time": 13.4 + }, + { + "epoch": 0.9, + "learning_rate": "1.9810e-04", + "loss": 0.9506, + "slid_loss": 0.9012, + "step": 1791, + "time": 11.36 + }, + { + "epoch": 0.9, + "learning_rate": "1.9809e-04", + "loss": 0.887, + "slid_loss": 0.9015, + "step": 1792, + "time": 12.84 + }, + { + "epoch": 0.9, + "learning_rate": "1.9809e-04", + "loss": 0.8267, + "slid_loss": 0.9014, + "step": 1793, + "time": 12.91 + }, + { + "epoch": 0.9, + "learning_rate": "1.9808e-04", + "loss": 1.0624, + "slid_loss": 0.9029, + "step": 1794, + "time": 12.05 + }, + { + "epoch": 0.9, + "learning_rate": "1.9808e-04", + "loss": 0.872, + "slid_loss": 0.9029, + "step": 1795, + "time": 13.48 + }, + { + "epoch": 0.9, + "learning_rate": "1.9807e-04", + "loss": 0.933, + "slid_loss": 0.9038, + "step": 1796, + "time": 11.13 + }, + { + "epoch": 0.9, + "learning_rate": "1.9807e-04", + "loss": 1.0571, + "slid_loss": 0.9048, + "step": 1797, + "time": 13.68 + }, + { + "epoch": 0.9, + "learning_rate": "1.9806e-04", + "loss": 0.9413, + "slid_loss": 0.9036, + "step": 1798, + "time": 13.14 + }, + { + "epoch": 0.9, + "learning_rate": "1.9806e-04", + "loss": 0.9847, + "slid_loss": 0.9035, + "step": 1799, + "time": 13.43 + }, + { + "epoch": 0.9, + "learning_rate": "1.9805e-04", + "loss": 0.9016, + "slid_loss": 0.9014, + "step": 1800, + "time": 14.17 + }, + { + "epoch": 0.9, + "learning_rate": "1.9805e-04", + "loss": 0.8061, + "slid_loss": 0.9008, + "step": 1801, + "time": 13.2 + }, + { + "epoch": 0.9, + "learning_rate": "1.9804e-04", + "loss": 1.0577, + "slid_loss": 0.9026, + "step": 1802, + "time": 12.86 + }, + { + "epoch": 0.9, + "learning_rate": "1.9804e-04", + "loss": 1.017, + "slid_loss": 0.9043, + "step": 1803, + "time": 13.34 + }, + { + "epoch": 0.9, + "learning_rate": "1.9803e-04", + "loss": 0.7456, + "slid_loss": 0.9035, + "step": 1804, + "time": 11.32 + }, + { + "epoch": 0.9, + "learning_rate": "1.9803e-04", + "loss": 0.9494, + "slid_loss": 0.9028, + "step": 1805, + "time": 13.35 + }, + { + "epoch": 0.9, + "learning_rate": "1.9802e-04", + "loss": 0.8553, + "slid_loss": 0.9036, + "step": 1806, + "time": 11.62 + }, + { + "epoch": 0.9, + "learning_rate": "1.9802e-04", + "loss": 0.7106, + "slid_loss": 0.8993, + "step": 1807, + "time": 13.11 + }, + { + "epoch": 0.91, + "learning_rate": "1.9801e-04", + "loss": 1.0044, + "slid_loss": 0.901, + "step": 1808, + "time": 13.09 + }, + { + "epoch": 0.91, + "learning_rate": "1.9801e-04", + "loss": 0.7762, + "slid_loss": 0.8991, + "step": 1809, + "time": 13.74 + }, + { + "epoch": 0.91, + "learning_rate": "1.9800e-04", + "loss": 0.8744, + "slid_loss": 0.8972, + "step": 1810, + "time": 13.13 + }, + { + "epoch": 0.91, + "learning_rate": "1.9800e-04", + "loss": 0.7418, + "slid_loss": 0.8973, + "step": 1811, + "time": 11.13 + }, + { + "epoch": 0.91, + "learning_rate": "1.9799e-04", + "loss": 0.7623, + "slid_loss": 0.8949, + "step": 1812, + "time": 11.84 + }, + { + "epoch": 0.91, + "learning_rate": "1.9799e-04", + "loss": 0.8503, + "slid_loss": 0.8941, + "step": 1813, + "time": 13.49 + }, + { + "epoch": 0.91, + "learning_rate": "1.9798e-04", + "loss": 1.0058, + "slid_loss": 0.8939, + "step": 1814, + "time": 13.59 + }, + { + "epoch": 0.91, + "learning_rate": "1.9798e-04", + "loss": 0.9964, + "slid_loss": 0.8949, + "step": 1815, + "time": 12.74 + }, + { + "epoch": 0.91, + "learning_rate": "1.9797e-04", + "loss": 0.6691, + "slid_loss": 0.8914, + "step": 1816, + "time": 10.89 + }, + { + "epoch": 0.91, + "learning_rate": "1.9797e-04", + "loss": 0.8783, + "slid_loss": 0.8918, + "step": 1817, + "time": 13.12 + }, + { + "epoch": 0.91, + "learning_rate": "1.9796e-04", + "loss": 0.7951, + "slid_loss": 0.891, + "step": 1818, + "time": 13.36 + }, + { + "epoch": 0.91, + "learning_rate": "1.9796e-04", + "loss": 0.6931, + "slid_loss": 0.8896, + "step": 1819, + "time": 13.28 + }, + { + "epoch": 0.91, + "learning_rate": "1.9795e-04", + "loss": 0.9503, + "slid_loss": 0.8922, + "step": 1820, + "time": 12.76 + }, + { + "epoch": 0.91, + "learning_rate": "1.9795e-04", + "loss": 0.899, + "slid_loss": 0.8923, + "step": 1821, + "time": 12.27 + }, + { + "epoch": 0.91, + "learning_rate": "1.9794e-04", + "loss": 1.0779, + "slid_loss": 0.8952, + "step": 1822, + "time": 12.12 + }, + { + "epoch": 0.91, + "learning_rate": "1.9794e-04", + "loss": 0.8073, + "slid_loss": 0.8951, + "step": 1823, + "time": 13.87 + }, + { + "epoch": 0.91, + "learning_rate": "1.9793e-04", + "loss": 0.8045, + "slid_loss": 0.8938, + "step": 1824, + "time": 12.73 + }, + { + "epoch": 0.91, + "learning_rate": "1.9793e-04", + "loss": 0.9039, + "slid_loss": 0.8934, + "step": 1825, + "time": 13.69 + }, + { + "epoch": 0.91, + "learning_rate": "1.9792e-04", + "loss": 0.8996, + "slid_loss": 0.8942, + "step": 1826, + "time": 13.35 + }, + { + "epoch": 0.91, + "learning_rate": "1.9792e-04", + "loss": 0.9448, + "slid_loss": 0.8972, + "step": 1827, + "time": 12.99 + }, + { + "epoch": 0.92, + "learning_rate": "1.9791e-04", + "loss": 0.8984, + "slid_loss": 0.8955, + "step": 1828, + "time": 13.64 + }, + { + "epoch": 0.92, + "learning_rate": "1.9791e-04", + "loss": 0.851, + "slid_loss": 0.8939, + "step": 1829, + "time": 11.06 + }, + { + "epoch": 0.92, + "learning_rate": "1.9790e-04", + "loss": 0.8163, + "slid_loss": 0.8926, + "step": 1830, + "time": 14.03 + }, + { + "epoch": 0.92, + "learning_rate": "1.9790e-04", + "loss": 0.9868, + "slid_loss": 0.8933, + "step": 1831, + "time": 11.59 + }, + { + "epoch": 0.92, + "learning_rate": "1.9789e-04", + "loss": 0.9451, + "slid_loss": 0.894, + "step": 1832, + "time": 13.07 + }, + { + "epoch": 0.92, + "learning_rate": "1.9789e-04", + "loss": 0.9238, + "slid_loss": 0.894, + "step": 1833, + "time": 11.9 + }, + { + "epoch": 0.92, + "learning_rate": "1.9788e-04", + "loss": 0.8727, + "slid_loss": 0.8951, + "step": 1834, + "time": 11.71 + }, + { + "epoch": 0.92, + "learning_rate": "1.9788e-04", + "loss": 0.7661, + "slid_loss": 0.8946, + "step": 1835, + "time": 12.17 + }, + { + "epoch": 0.92, + "learning_rate": "1.9787e-04", + "loss": 0.9232, + "slid_loss": 0.8936, + "step": 1836, + "time": 14.28 + }, + { + "epoch": 0.92, + "learning_rate": "1.9787e-04", + "loss": 0.7554, + "slid_loss": 0.8937, + "step": 1837, + "time": 12.36 + }, + { + "epoch": 0.92, + "learning_rate": "1.9786e-04", + "loss": 0.8748, + "slid_loss": 0.8939, + "step": 1838, + "time": 12.83 + }, + { + "epoch": 0.92, + "learning_rate": "1.9786e-04", + "loss": 0.9553, + "slid_loss": 0.894, + "step": 1839, + "time": 13.16 + }, + { + "epoch": 0.92, + "learning_rate": "1.9785e-04", + "loss": 0.8523, + "slid_loss": 0.8939, + "step": 1840, + "time": 13.73 + }, + { + "epoch": 0.92, + "learning_rate": "1.9785e-04", + "loss": 0.8243, + "slid_loss": 0.8933, + "step": 1841, + "time": 11.27 + }, + { + "epoch": 0.92, + "learning_rate": "1.9784e-04", + "loss": 0.8675, + "slid_loss": 0.8937, + "step": 1842, + "time": 13.3 + }, + { + "epoch": 0.92, + "learning_rate": "1.9784e-04", + "loss": 0.8937, + "slid_loss": 0.8937, + "step": 1843, + "time": 13.1 + }, + { + "epoch": 0.92, + "learning_rate": "1.9783e-04", + "loss": 0.8768, + "slid_loss": 0.8941, + "step": 1844, + "time": 11.12 + }, + { + "epoch": 0.92, + "learning_rate": "1.9783e-04", + "loss": 0.9124, + "slid_loss": 0.8937, + "step": 1845, + "time": 13.28 + }, + { + "epoch": 0.92, + "learning_rate": "1.9782e-04", + "loss": 0.8713, + "slid_loss": 0.8915, + "step": 1846, + "time": 11.94 + }, + { + "epoch": 0.92, + "learning_rate": "1.9782e-04", + "loss": 0.8873, + "slid_loss": 0.8917, + "step": 1847, + "time": 11.1 + }, + { + "epoch": 0.93, + "learning_rate": "1.9781e-04", + "loss": 0.7667, + "slid_loss": 0.8888, + "step": 1848, + "time": 13.81 + }, + { + "epoch": 0.93, + "learning_rate": "1.9781e-04", + "loss": 0.9218, + "slid_loss": 0.8881, + "step": 1849, + "time": 13.89 + }, + { + "epoch": 0.93, + "learning_rate": "1.9780e-04", + "loss": 0.6832, + "slid_loss": 0.8846, + "step": 1850, + "time": 12.96 + }, + { + "epoch": 0.93, + "learning_rate": "1.9780e-04", + "loss": 0.7759, + "slid_loss": 0.884, + "step": 1851, + "time": 11.64 + }, + { + "epoch": 0.93, + "learning_rate": "1.9779e-04", + "loss": 1.0095, + "slid_loss": 0.8849, + "step": 1852, + "time": 11.16 + }, + { + "epoch": 0.93, + "learning_rate": "1.9779e-04", + "loss": 0.7588, + "slid_loss": 0.8853, + "step": 1853, + "time": 12.49 + }, + { + "epoch": 0.93, + "learning_rate": "1.9778e-04", + "loss": 0.9259, + "slid_loss": 0.8855, + "step": 1854, + "time": 13.42 + }, + { + "epoch": 0.93, + "learning_rate": "1.9778e-04", + "loss": 0.9437, + "slid_loss": 0.8854, + "step": 1855, + "time": 11.26 + }, + { + "epoch": 0.93, + "learning_rate": "1.9777e-04", + "loss": 0.8965, + "slid_loss": 0.8866, + "step": 1856, + "time": 12.18 + }, + { + "epoch": 0.93, + "learning_rate": "1.9777e-04", + "loss": 1.0087, + "slid_loss": 0.8862, + "step": 1857, + "time": 13.44 + }, + { + "epoch": 0.93, + "learning_rate": "1.9776e-04", + "loss": 0.8648, + "slid_loss": 0.8864, + "step": 1858, + "time": 14.24 + }, + { + "epoch": 0.93, + "learning_rate": "1.9776e-04", + "loss": 0.9589, + "slid_loss": 0.8876, + "step": 1859, + "time": 11.96 + }, + { + "epoch": 0.93, + "learning_rate": "1.9775e-04", + "loss": 0.9323, + "slid_loss": 0.888, + "step": 1860, + "time": 12.74 + }, + { + "epoch": 0.93, + "learning_rate": "1.9775e-04", + "loss": 0.7532, + "slid_loss": 0.8855, + "step": 1861, + "time": 12.22 + }, + { + "epoch": 0.93, + "learning_rate": "1.9774e-04", + "loss": 0.9631, + "slid_loss": 0.8879, + "step": 1862, + "time": 12.8 + }, + { + "epoch": 0.93, + "learning_rate": "1.9774e-04", + "loss": 0.8033, + "slid_loss": 0.8873, + "step": 1863, + "time": 11.86 + }, + { + "epoch": 0.93, + "learning_rate": "1.9773e-04", + "loss": 0.868, + "slid_loss": 0.8863, + "step": 1864, + "time": 12.86 + }, + { + "epoch": 0.93, + "learning_rate": "1.9773e-04", + "loss": 0.9101, + "slid_loss": 0.8862, + "step": 1865, + "time": 12.3 + }, + { + "epoch": 0.93, + "learning_rate": "1.9772e-04", + "loss": 0.9113, + "slid_loss": 0.8865, + "step": 1866, + "time": 12.83 + }, + { + "epoch": 0.93, + "learning_rate": "1.9772e-04", + "loss": 1.0886, + "slid_loss": 0.8888, + "step": 1867, + "time": 13.69 + }, + { + "epoch": 0.94, + "learning_rate": "1.9771e-04", + "loss": 0.9488, + "slid_loss": 0.8879, + "step": 1868, + "time": 13.54 + }, + { + "epoch": 0.94, + "learning_rate": "1.9770e-04", + "loss": 0.8365, + "slid_loss": 0.8869, + "step": 1869, + "time": 13.31 + }, + { + "epoch": 0.94, + "learning_rate": "1.9770e-04", + "loss": 0.7192, + "slid_loss": 0.8855, + "step": 1870, + "time": 13.1 + }, + { + "epoch": 0.94, + "learning_rate": "1.9769e-04", + "loss": 0.8011, + "slid_loss": 0.8851, + "step": 1871, + "time": 11.53 + }, + { + "epoch": 0.94, + "learning_rate": "1.9769e-04", + "loss": 0.9273, + "slid_loss": 0.8849, + "step": 1872, + "time": 13.21 + }, + { + "epoch": 0.94, + "learning_rate": "1.9768e-04", + "loss": 0.9517, + "slid_loss": 0.8861, + "step": 1873, + "time": 13.17 + }, + { + "epoch": 0.94, + "learning_rate": "1.9768e-04", + "loss": 0.8787, + "slid_loss": 0.8854, + "step": 1874, + "time": 13.5 + }, + { + "epoch": 0.94, + "learning_rate": "1.9767e-04", + "loss": 1.0436, + "slid_loss": 0.8872, + "step": 1875, + "time": 13.32 + }, + { + "epoch": 0.94, + "learning_rate": "1.9767e-04", + "loss": 0.8032, + "slid_loss": 0.8871, + "step": 1876, + "time": 10.56 + }, + { + "epoch": 0.94, + "learning_rate": "1.9766e-04", + "loss": 1.0707, + "slid_loss": 0.8882, + "step": 1877, + "time": 12.62 + }, + { + "epoch": 0.94, + "learning_rate": "1.9766e-04", + "loss": 0.7576, + "slid_loss": 0.8857, + "step": 1878, + "time": 11.31 + }, + { + "epoch": 0.94, + "learning_rate": "1.9765e-04", + "loss": 0.9291, + "slid_loss": 0.8859, + "step": 1879, + "time": 11.66 + }, + { + "epoch": 0.94, + "learning_rate": "1.9765e-04", + "loss": 0.8912, + "slid_loss": 0.886, + "step": 1880, + "time": 14.06 + }, + { + "epoch": 0.94, + "learning_rate": "1.9764e-04", + "loss": 0.8185, + "slid_loss": 0.8856, + "step": 1881, + "time": 13.04 + }, + { + "epoch": 0.94, + "learning_rate": "1.9764e-04", + "loss": 0.8629, + "slid_loss": 0.8853, + "step": 1882, + "time": 11.29 + }, + { + "epoch": 0.94, + "learning_rate": "1.9763e-04", + "loss": 0.8972, + "slid_loss": 0.8839, + "step": 1883, + "time": 13.87 + }, + { + "epoch": 0.94, + "learning_rate": "1.9763e-04", + "loss": 0.899, + "slid_loss": 0.8825, + "step": 1884, + "time": 13.48 + }, + { + "epoch": 0.94, + "learning_rate": "1.9762e-04", + "loss": 0.8835, + "slid_loss": 0.8832, + "step": 1885, + "time": 13.79 + }, + { + "epoch": 0.94, + "learning_rate": "1.9762e-04", + "loss": 0.9245, + "slid_loss": 0.885, + "step": 1886, + "time": 11.55 + }, + { + "epoch": 0.94, + "learning_rate": "1.9761e-04", + "loss": 1.0442, + "slid_loss": 0.8862, + "step": 1887, + "time": 12.92 + }, + { + "epoch": 0.95, + "learning_rate": "1.9760e-04", + "loss": 0.9651, + "slid_loss": 0.8877, + "step": 1888, + "time": 11.86 + }, + { + "epoch": 0.95, + "learning_rate": "1.9760e-04", + "loss": 0.8329, + "slid_loss": 0.8881, + "step": 1889, + "time": 11.34 + }, + { + "epoch": 0.95, + "learning_rate": "1.9759e-04", + "loss": 0.8708, + "slid_loss": 0.8875, + "step": 1890, + "time": 13.51 + }, + { + "epoch": 0.95, + "learning_rate": "1.9759e-04", + "loss": 0.782, + "slid_loss": 0.8858, + "step": 1891, + "time": 12.86 + }, + { + "epoch": 0.95, + "learning_rate": "1.9758e-04", + "loss": 0.9001, + "slid_loss": 0.8859, + "step": 1892, + "time": 12.73 + }, + { + "epoch": 0.95, + "learning_rate": "1.9758e-04", + "loss": 0.9692, + "slid_loss": 0.8874, + "step": 1893, + "time": 10.98 + }, + { + "epoch": 0.95, + "learning_rate": "1.9757e-04", + "loss": 0.9217, + "slid_loss": 0.886, + "step": 1894, + "time": 13.44 + }, + { + "epoch": 0.95, + "learning_rate": "1.9757e-04", + "loss": 1.1519, + "slid_loss": 0.8888, + "step": 1895, + "time": 14.4 + }, + { + "epoch": 0.95, + "learning_rate": "1.9756e-04", + "loss": 0.8502, + "slid_loss": 0.8879, + "step": 1896, + "time": 11.52 + }, + { + "epoch": 0.95, + "learning_rate": "1.9756e-04", + "loss": 0.9453, + "slid_loss": 0.8868, + "step": 1897, + "time": 12.9 + }, + { + "epoch": 0.95, + "learning_rate": "1.9755e-04", + "loss": 0.9169, + "slid_loss": 0.8866, + "step": 1898, + "time": 13.95 + }, + { + "epoch": 0.95, + "learning_rate": "1.9755e-04", + "loss": 0.7531, + "slid_loss": 0.8842, + "step": 1899, + "time": 10.67 + }, + { + "epoch": 0.95, + "learning_rate": "1.9754e-04", + "loss": 0.692, + "slid_loss": 0.8821, + "step": 1900, + "time": 13.13 + }, + { + "epoch": 0.95, + "learning_rate": "1.9753e-04", + "loss": 0.7302, + "slid_loss": 0.8814, + "step": 1901, + "time": 11.49 + }, + { + "epoch": 0.95, + "learning_rate": "1.9753e-04", + "loss": 0.8172, + "slid_loss": 0.879, + "step": 1902, + "time": 11.04 + }, + { + "epoch": 0.95, + "learning_rate": "1.9752e-04", + "loss": 0.8661, + "slid_loss": 0.8775, + "step": 1903, + "time": 11.44 + }, + { + "epoch": 0.95, + "learning_rate": "1.9752e-04", + "loss": 0.799, + "slid_loss": 0.878, + "step": 1904, + "time": 13.5 + }, + { + "epoch": 0.95, + "learning_rate": "1.9751e-04", + "loss": 0.7465, + "slid_loss": 0.876, + "step": 1905, + "time": 11.77 + }, + { + "epoch": 0.95, + "learning_rate": "1.9751e-04", + "loss": 0.7515, + "slid_loss": 0.8749, + "step": 1906, + "time": 13.7 + }, + { + "epoch": 0.95, + "learning_rate": "1.9750e-04", + "loss": 0.9448, + "slid_loss": 0.8773, + "step": 1907, + "time": 13.62 + }, + { + "epoch": 0.96, + "learning_rate": "1.9750e-04", + "loss": 0.7165, + "slid_loss": 0.8744, + "step": 1908, + "time": 11.39 + }, + { + "epoch": 0.96, + "learning_rate": "1.9749e-04", + "loss": 0.9476, + "slid_loss": 0.8761, + "step": 1909, + "time": 13.15 + }, + { + "epoch": 0.96, + "learning_rate": "1.9749e-04", + "loss": 0.9344, + "slid_loss": 0.8767, + "step": 1910, + "time": 10.66 + }, + { + "epoch": 0.96, + "learning_rate": "1.9748e-04", + "loss": 0.903, + "slid_loss": 0.8783, + "step": 1911, + "time": 13.95 + }, + { + "epoch": 0.96, + "learning_rate": "1.9747e-04", + "loss": 0.8717, + "slid_loss": 0.8794, + "step": 1912, + "time": 11.52 + }, + { + "epoch": 0.96, + "learning_rate": "1.9747e-04", + "loss": 0.8995, + "slid_loss": 0.8799, + "step": 1913, + "time": 13.46 + }, + { + "epoch": 0.96, + "learning_rate": "1.9746e-04", + "loss": 0.8984, + "slid_loss": 0.8788, + "step": 1914, + "time": 10.71 + }, + { + "epoch": 0.96, + "learning_rate": "1.9746e-04", + "loss": 1.0919, + "slid_loss": 0.8798, + "step": 1915, + "time": 11.9 + }, + { + "epoch": 0.96, + "learning_rate": "1.9745e-04", + "loss": 0.9408, + "slid_loss": 0.8825, + "step": 1916, + "time": 13.25 + }, + { + "epoch": 0.96, + "learning_rate": "1.9745e-04", + "loss": 0.9195, + "slid_loss": 0.8829, + "step": 1917, + "time": 13.93 + }, + { + "epoch": 0.96, + "learning_rate": "1.9744e-04", + "loss": 0.8733, + "slid_loss": 0.8837, + "step": 1918, + "time": 13.79 + }, + { + "epoch": 0.96, + "learning_rate": "1.9744e-04", + "loss": 0.9996, + "slid_loss": 0.8868, + "step": 1919, + "time": 12.04 + }, + { + "epoch": 0.96, + "learning_rate": "1.9743e-04", + "loss": 0.93, + "slid_loss": 0.8866, + "step": 1920, + "time": 11.0 + }, + { + "epoch": 0.96, + "learning_rate": "1.9742e-04", + "loss": 0.9411, + "slid_loss": 0.887, + "step": 1921, + "time": 13.19 + }, + { + "epoch": 0.96, + "learning_rate": "1.9742e-04", + "loss": 0.7324, + "slid_loss": 0.8835, + "step": 1922, + "time": 10.64 + }, + { + "epoch": 0.96, + "learning_rate": "1.9741e-04", + "loss": 0.6778, + "slid_loss": 0.8822, + "step": 1923, + "time": 11.55 + }, + { + "epoch": 0.96, + "learning_rate": "1.9741e-04", + "loss": 0.8275, + "slid_loss": 0.8825, + "step": 1924, + "time": 11.75 + }, + { + "epoch": 0.96, + "learning_rate": "1.9740e-04", + "loss": 1.0307, + "slid_loss": 0.8837, + "step": 1925, + "time": 11.31 + }, + { + "epoch": 0.96, + "learning_rate": "1.9740e-04", + "loss": 0.754, + "slid_loss": 0.8823, + "step": 1926, + "time": 11.78 + }, + { + "epoch": 0.96, + "learning_rate": "1.9739e-04", + "loss": 0.8245, + "slid_loss": 0.8811, + "step": 1927, + "time": 13.73 + }, + { + "epoch": 0.97, + "learning_rate": "1.9739e-04", + "loss": 0.8983, + "slid_loss": 0.8811, + "step": 1928, + "time": 13.6 + }, + { + "epoch": 0.97, + "learning_rate": "1.9738e-04", + "loss": 0.892, + "slid_loss": 0.8815, + "step": 1929, + "time": 12.08 + }, + { + "epoch": 0.97, + "learning_rate": "1.9737e-04", + "loss": 0.8306, + "slid_loss": 0.8816, + "step": 1930, + "time": 12.27 + }, + { + "epoch": 0.97, + "learning_rate": "1.9737e-04", + "loss": 0.9404, + "slid_loss": 0.8812, + "step": 1931, + "time": 13.38 + }, + { + "epoch": 0.97, + "learning_rate": "1.9736e-04", + "loss": 0.7948, + "slid_loss": 0.8797, + "step": 1932, + "time": 12.14 + }, + { + "epoch": 0.97, + "learning_rate": "1.9736e-04", + "loss": 0.9527, + "slid_loss": 0.88, + "step": 1933, + "time": 11.41 + }, + { + "epoch": 0.97, + "learning_rate": "1.9735e-04", + "loss": 0.819, + "slid_loss": 0.8794, + "step": 1934, + "time": 13.26 + }, + { + "epoch": 0.97, + "learning_rate": "1.9735e-04", + "loss": 1.0143, + "slid_loss": 0.8819, + "step": 1935, + "time": 12.3 + }, + { + "epoch": 0.97, + "learning_rate": "1.9734e-04", + "loss": 0.8709, + "slid_loss": 0.8814, + "step": 1936, + "time": 13.44 + }, + { + "epoch": 0.97, + "learning_rate": "1.9734e-04", + "loss": 1.0523, + "slid_loss": 0.8843, + "step": 1937, + "time": 11.93 + }, + { + "epoch": 0.97, + "learning_rate": "1.9733e-04", + "loss": 0.7185, + "slid_loss": 0.8828, + "step": 1938, + "time": 10.72 + }, + { + "epoch": 0.97, + "learning_rate": "1.9732e-04", + "loss": 0.8965, + "slid_loss": 0.8822, + "step": 1939, + "time": 13.35 + }, + { + "epoch": 0.97, + "learning_rate": "1.9732e-04", + "loss": 0.9348, + "slid_loss": 0.883, + "step": 1940, + "time": 11.56 + }, + { + "epoch": 0.97, + "learning_rate": "1.9731e-04", + "loss": 0.9465, + "slid_loss": 0.8842, + "step": 1941, + "time": 11.65 + }, + { + "epoch": 0.97, + "learning_rate": "1.9731e-04", + "loss": 0.9031, + "slid_loss": 0.8846, + "step": 1942, + "time": 13.59 + }, + { + "epoch": 0.97, + "learning_rate": "1.9730e-04", + "loss": 0.9767, + "slid_loss": 0.8854, + "step": 1943, + "time": 14.08 + }, + { + "epoch": 0.97, + "learning_rate": "1.9730e-04", + "loss": 0.7649, + "slid_loss": 0.8843, + "step": 1944, + "time": 11.5 + }, + { + "epoch": 0.97, + "learning_rate": "1.9729e-04", + "loss": 1.0004, + "slid_loss": 0.8852, + "step": 1945, + "time": 11.33 + }, + { + "epoch": 0.97, + "learning_rate": "1.9728e-04", + "loss": 0.9091, + "slid_loss": 0.8856, + "step": 1946, + "time": 13.28 + }, + { + "epoch": 0.97, + "learning_rate": "1.9728e-04", + "loss": 0.7798, + "slid_loss": 0.8845, + "step": 1947, + "time": 11.35 + }, + { + "epoch": 0.98, + "learning_rate": "1.9727e-04", + "loss": 0.9637, + "slid_loss": 0.8865, + "step": 1948, + "time": 13.37 + }, + { + "epoch": 0.98, + "learning_rate": "1.9727e-04", + "loss": 0.8419, + "slid_loss": 0.8857, + "step": 1949, + "time": 13.71 + }, + { + "epoch": 0.98, + "learning_rate": "1.9726e-04", + "loss": 0.9707, + "slid_loss": 0.8885, + "step": 1950, + "time": 12.59 + }, + { + "epoch": 0.98, + "learning_rate": "1.9726e-04", + "loss": 0.987, + "slid_loss": 0.8906, + "step": 1951, + "time": 12.81 + }, + { + "epoch": 0.98, + "learning_rate": "1.9725e-04", + "loss": 0.8554, + "slid_loss": 0.8891, + "step": 1952, + "time": 13.49 + }, + { + "epoch": 0.98, + "learning_rate": "1.9724e-04", + "loss": 0.9736, + "slid_loss": 0.8913, + "step": 1953, + "time": 11.83 + }, + { + "epoch": 0.98, + "learning_rate": "1.9724e-04", + "loss": 0.6955, + "slid_loss": 0.8889, + "step": 1954, + "time": 11.36 + }, + { + "epoch": 0.98, + "learning_rate": "1.9723e-04", + "loss": 0.9347, + "slid_loss": 0.8889, + "step": 1955, + "time": 11.85 + }, + { + "epoch": 0.98, + "learning_rate": "1.9723e-04", + "loss": 0.8247, + "slid_loss": 0.8881, + "step": 1956, + "time": 12.42 + }, + { + "epoch": 0.98, + "learning_rate": "1.9722e-04", + "loss": 0.7731, + "slid_loss": 0.8858, + "step": 1957, + "time": 12.19 + }, + { + "epoch": 0.98, + "learning_rate": "1.9722e-04", + "loss": 0.904, + "slid_loss": 0.8862, + "step": 1958, + "time": 12.81 + }, + { + "epoch": 0.98, + "learning_rate": "1.9721e-04", + "loss": 0.9208, + "slid_loss": 0.8858, + "step": 1959, + "time": 13.32 + }, + { + "epoch": 0.98, + "learning_rate": "1.9720e-04", + "loss": 0.7733, + "slid_loss": 0.8842, + "step": 1960, + "time": 12.53 + }, + { + "epoch": 0.98, + "learning_rate": "1.9720e-04", + "loss": 0.9984, + "slid_loss": 0.8867, + "step": 1961, + "time": 13.58 + }, + { + "epoch": 0.98, + "learning_rate": "1.9719e-04", + "loss": 0.8948, + "slid_loss": 0.886, + "step": 1962, + "time": 10.74 + }, + { + "epoch": 0.98, + "learning_rate": "1.9719e-04", + "loss": 0.6466, + "slid_loss": 0.8844, + "step": 1963, + "time": 12.82 + }, + { + "epoch": 0.98, + "learning_rate": "1.9718e-04", + "loss": 0.931, + "slid_loss": 0.885, + "step": 1964, + "time": 13.43 + }, + { + "epoch": 0.98, + "learning_rate": "1.9718e-04", + "loss": 0.7496, + "slid_loss": 0.8834, + "step": 1965, + "time": 11.07 + }, + { + "epoch": 0.98, + "learning_rate": "1.9717e-04", + "loss": 0.7853, + "slid_loss": 0.8822, + "step": 1966, + "time": 12.88 + }, + { + "epoch": 0.98, + "learning_rate": "1.9716e-04", + "loss": 0.9014, + "slid_loss": 0.8803, + "step": 1967, + "time": 10.71 + }, + { + "epoch": 0.99, + "learning_rate": "1.9716e-04", + "loss": 0.8492, + "slid_loss": 0.8793, + "step": 1968, + "time": 10.88 + }, + { + "epoch": 0.99, + "learning_rate": "1.9715e-04", + "loss": 0.949, + "slid_loss": 0.8804, + "step": 1969, + "time": 14.69 + }, + { + "epoch": 0.99, + "learning_rate": "1.9715e-04", + "loss": 0.7538, + "slid_loss": 0.8808, + "step": 1970, + "time": 11.9 + }, + { + "epoch": 0.99, + "learning_rate": "1.9714e-04", + "loss": 0.9307, + "slid_loss": 0.8821, + "step": 1971, + "time": 12.32 + }, + { + "epoch": 0.99, + "learning_rate": "1.9713e-04", + "loss": 0.849, + "slid_loss": 0.8813, + "step": 1972, + "time": 13.39 + }, + { + "epoch": 0.99, + "learning_rate": "1.9713e-04", + "loss": 0.9099, + "slid_loss": 0.8809, + "step": 1973, + "time": 12.2 + }, + { + "epoch": 0.99, + "learning_rate": "1.9712e-04", + "loss": 0.9032, + "slid_loss": 0.8811, + "step": 1974, + "time": 11.33 + }, + { + "epoch": 0.99, + "learning_rate": "1.9712e-04", + "loss": 0.8761, + "slid_loss": 0.8794, + "step": 1975, + "time": 12.85 + }, + { + "epoch": 0.99, + "learning_rate": "1.9711e-04", + "loss": 0.9417, + "slid_loss": 0.8808, + "step": 1976, + "time": 14.23 + }, + { + "epoch": 0.99, + "learning_rate": "1.9711e-04", + "loss": 0.9206, + "slid_loss": 0.8793, + "step": 1977, + "time": 11.59 + }, + { + "epoch": 0.99, + "learning_rate": "1.9710e-04", + "loss": 0.8513, + "slid_loss": 0.8803, + "step": 1978, + "time": 13.43 + }, + { + "epoch": 0.99, + "learning_rate": "1.9709e-04", + "loss": 0.7471, + "slid_loss": 0.8784, + "step": 1979, + "time": 12.52 + }, + { + "epoch": 0.99, + "learning_rate": "1.9709e-04", + "loss": 0.7661, + "slid_loss": 0.8772, + "step": 1980, + "time": 13.62 + }, + { + "epoch": 0.99, + "learning_rate": "1.9708e-04", + "loss": 0.8567, + "slid_loss": 0.8776, + "step": 1981, + "time": 12.85 + }, + { + "epoch": 0.99, + "learning_rate": "1.9708e-04", + "loss": 0.9669, + "slid_loss": 0.8786, + "step": 1982, + "time": 10.95 + }, + { + "epoch": 0.99, + "learning_rate": "1.9707e-04", + "loss": 0.7738, + "slid_loss": 0.8774, + "step": 1983, + "time": 12.87 + }, + { + "epoch": 0.99, + "learning_rate": "1.9706e-04", + "loss": 0.8262, + "slid_loss": 0.8767, + "step": 1984, + "time": 13.34 + }, + { + "epoch": 0.99, + "learning_rate": "1.9706e-04", + "loss": 0.8043, + "slid_loss": 0.8759, + "step": 1985, + "time": 13.26 + }, + { + "epoch": 0.99, + "learning_rate": "1.9705e-04", + "loss": 0.864, + "slid_loss": 0.8753, + "step": 1986, + "time": 12.99 + }, + { + "epoch": 0.99, + "learning_rate": "1.9705e-04", + "loss": 0.792, + "slid_loss": 0.8727, + "step": 1987, + "time": 11.09 + }, + { + "epoch": 1.0, + "learning_rate": "1.9704e-04", + "loss": 0.9997, + "slid_loss": 0.8731, + "step": 1988, + "time": 14.05 + }, + { + "epoch": 1.0, + "learning_rate": "1.9703e-04", + "loss": 0.8838, + "slid_loss": 0.8736, + "step": 1989, + "time": 13.28 + }, + { + "epoch": 1.0, + "learning_rate": "1.9703e-04", + "loss": 0.916, + "slid_loss": 0.874, + "step": 1990, + "time": 12.12 + }, + { + "epoch": 1.0, + "learning_rate": "1.9702e-04", + "loss": 0.8806, + "slid_loss": 0.875, + "step": 1991, + "time": 13.7 + }, + { + "epoch": 1.0, + "learning_rate": "1.9702e-04", + "loss": 0.9141, + "slid_loss": 0.8752, + "step": 1992, + "time": 13.54 + }, + { + "epoch": 1.0, + "learning_rate": "1.9701e-04", + "loss": 0.7495, + "slid_loss": 0.873, + "step": 1993, + "time": 13.34 + }, + { + "epoch": 1.0, + "learning_rate": "1.9701e-04", + "loss": 1.0816, + "slid_loss": 0.8746, + "step": 1994, + "time": 13.54 + }, + { + "epoch": 1.0, + "learning_rate": "1.9700e-04", + "loss": 0.8679, + "slid_loss": 0.8717, + "step": 1995, + "time": 11.1 + }, + { + "epoch": 1.0, + "learning_rate": "1.9699e-04", + "loss": 0.8819, + "slid_loss": 0.872, + "step": 1996, + "time": 12.25 + }, + { + "epoch": 1.0, + "learning_rate": "1.9699e-04", + "loss": 0.9891, + "slid_loss": 0.8725, + "step": 1997, + "time": 14.73 + }, + { + "epoch": 1.0, + "learning_rate": "1.9698e-04", + "loss": 0.6822, + "slid_loss": 0.8701, + "step": 1998, + "time": 178.23 + }, + { + "epoch": 1.0, + "learning_rate": "1.9698e-04", + "loss": 1.0321, + "slid_loss": 0.8729, + "step": 1999, + "time": 12.97 + }, + { + "epoch": 1.0, + "learning_rate": "1.9697e-04", + "loss": 0.6352, + "slid_loss": 0.8724, + "step": 2000, + "time": 11.29 + }, + { + "epoch": 1.0, + "learning_rate": "1.9696e-04", + "loss": 1.0903, + "slid_loss": 0.876, + "step": 2001, + "time": 12.55 + }, + { + "epoch": 1.0, + "learning_rate": "1.9696e-04", + "loss": 0.6357, + "slid_loss": 0.8741, + "step": 2002, + "time": 10.47 + }, + { + "epoch": 1.0, + "learning_rate": "1.9695e-04", + "loss": 0.7781, + "slid_loss": 0.8733, + "step": 2003, + "time": 10.75 + }, + { + "epoch": 1.0, + "learning_rate": "1.9695e-04", + "loss": 0.8358, + "slid_loss": 0.8736, + "step": 2004, + "time": 12.95 + }, + { + "epoch": 1.0, + "learning_rate": "1.9694e-04", + "loss": 0.8552, + "slid_loss": 0.8747, + "step": 2005, + "time": 13.08 + }, + { + "epoch": 1.0, + "learning_rate": "1.9693e-04", + "loss": 0.9798, + "slid_loss": 0.877, + "step": 2006, + "time": 11.41 + }, + { + "epoch": 1.01, + "learning_rate": "1.9693e-04", + "loss": 0.8315, + "slid_loss": 0.8759, + "step": 2007, + "time": 14.12 + }, + { + "epoch": 1.01, + "learning_rate": "1.9692e-04", + "loss": 0.9151, + "slid_loss": 0.8779, + "step": 2008, + "time": 12.93 + }, + { + "epoch": 1.01, + "learning_rate": "1.9692e-04", + "loss": 0.8911, + "slid_loss": 0.8773, + "step": 2009, + "time": 13.2 + }, + { + "epoch": 1.01, + "learning_rate": "1.9691e-04", + "loss": 0.9571, + "slid_loss": 0.8775, + "step": 2010, + "time": 13.64 + }, + { + "epoch": 1.01, + "learning_rate": "1.9690e-04", + "loss": 0.9204, + "slid_loss": 0.8777, + "step": 2011, + "time": 13.37 + }, + { + "epoch": 1.01, + "learning_rate": "1.9690e-04", + "loss": 0.9093, + "slid_loss": 0.8781, + "step": 2012, + "time": 10.68 + }, + { + "epoch": 1.01, + "learning_rate": "1.9689e-04", + "loss": 0.81, + "slid_loss": 0.8772, + "step": 2013, + "time": 12.93 + }, + { + "epoch": 1.01, + "learning_rate": "1.9688e-04", + "loss": 0.7772, + "slid_loss": 0.876, + "step": 2014, + "time": 13.23 + }, + { + "epoch": 1.01, + "learning_rate": "1.9688e-04", + "loss": 0.8147, + "slid_loss": 0.8732, + "step": 2015, + "time": 11.74 + }, + { + "epoch": 1.01, + "learning_rate": "1.9687e-04", + "loss": 1.0712, + "slid_loss": 0.8745, + "step": 2016, + "time": 10.73 + }, + { + "epoch": 1.01, + "learning_rate": "1.9687e-04", + "loss": 1.1253, + "slid_loss": 0.8765, + "step": 2017, + "time": 11.36 + }, + { + "epoch": 1.01, + "learning_rate": "1.9686e-04", + "loss": 0.7639, + "slid_loss": 0.8755, + "step": 2018, + "time": 12.07 + }, + { + "epoch": 1.01, + "learning_rate": "1.9685e-04", + "loss": 0.8312, + "slid_loss": 0.8738, + "step": 2019, + "time": 12.8 + }, + { + "epoch": 1.01, + "learning_rate": "1.9685e-04", + "loss": 0.8763, + "slid_loss": 0.8732, + "step": 2020, + "time": 10.92 + }, + { + "epoch": 1.01, + "learning_rate": "1.9684e-04", + "loss": 0.842, + "slid_loss": 0.8722, + "step": 2021, + "time": 12.34 + }, + { + "epoch": 1.01, + "learning_rate": "1.9684e-04", + "loss": 0.9276, + "slid_loss": 0.8742, + "step": 2022, + "time": 13.3 + }, + { + "epoch": 1.01, + "learning_rate": "1.9683e-04", + "loss": 0.7719, + "slid_loss": 0.8751, + "step": 2023, + "time": 13.31 + }, + { + "epoch": 1.01, + "learning_rate": "1.9682e-04", + "loss": 0.9184, + "slid_loss": 0.876, + "step": 2024, + "time": 12.91 + }, + { + "epoch": 1.01, + "learning_rate": "1.9682e-04", + "loss": 0.7743, + "slid_loss": 0.8735, + "step": 2025, + "time": 13.6 + }, + { + "epoch": 1.01, + "learning_rate": "1.9681e-04", + "loss": 0.898, + "slid_loss": 0.8749, + "step": 2026, + "time": 12.17 + }, + { + "epoch": 1.02, + "learning_rate": "1.9681e-04", + "loss": 0.8141, + "slid_loss": 0.8748, + "step": 2027, + "time": 12.25 + }, + { + "epoch": 1.02, + "learning_rate": "1.9680e-04", + "loss": 0.8173, + "slid_loss": 0.874, + "step": 2028, + "time": 13.5 + }, + { + "epoch": 1.02, + "learning_rate": "1.9679e-04", + "loss": 1.02, + "slid_loss": 0.8753, + "step": 2029, + "time": 13.4 + }, + { + "epoch": 1.02, + "learning_rate": "1.9679e-04", + "loss": 0.8337, + "slid_loss": 0.8753, + "step": 2030, + "time": 14.28 + }, + { + "epoch": 1.02, + "learning_rate": "1.9678e-04", + "loss": 0.7813, + "slid_loss": 0.8737, + "step": 2031, + "time": 12.91 + }, + { + "epoch": 1.02, + "learning_rate": "1.9677e-04", + "loss": 0.6952, + "slid_loss": 0.8727, + "step": 2032, + "time": 12.2 + }, + { + "epoch": 1.02, + "learning_rate": "1.9677e-04", + "loss": 0.9342, + "slid_loss": 0.8725, + "step": 2033, + "time": 10.71 + }, + { + "epoch": 1.02, + "learning_rate": "1.9676e-04", + "loss": 1.0042, + "slid_loss": 0.8744, + "step": 2034, + "time": 12.16 + }, + { + "epoch": 1.02, + "learning_rate": "1.9676e-04", + "loss": 0.9067, + "slid_loss": 0.8733, + "step": 2035, + "time": 11.5 + }, + { + "epoch": 1.02, + "learning_rate": "1.9675e-04", + "loss": 0.843, + "slid_loss": 0.873, + "step": 2036, + "time": 13.12 + }, + { + "epoch": 1.02, + "learning_rate": "1.9674e-04", + "loss": 0.9348, + "slid_loss": 0.8719, + "step": 2037, + "time": 10.99 + }, + { + "epoch": 1.02, + "learning_rate": "1.9674e-04", + "loss": 0.895, + "slid_loss": 0.8736, + "step": 2038, + "time": 12.51 + }, + { + "epoch": 1.02, + "learning_rate": "1.9673e-04", + "loss": 1.0822, + "slid_loss": 0.8755, + "step": 2039, + "time": 11.53 + }, + { + "epoch": 1.02, + "learning_rate": "1.9673e-04", + "loss": 0.8327, + "slid_loss": 0.8745, + "step": 2040, + "time": 11.54 + }, + { + "epoch": 1.02, + "learning_rate": "1.9672e-04", + "loss": 0.941, + "slid_loss": 0.8744, + "step": 2041, + "time": 13.37 + }, + { + "epoch": 1.02, + "learning_rate": "1.9671e-04", + "loss": 0.9577, + "slid_loss": 0.875, + "step": 2042, + "time": 13.16 + }, + { + "epoch": 1.02, + "learning_rate": "1.9671e-04", + "loss": 0.8601, + "slid_loss": 0.8738, + "step": 2043, + "time": 11.39 + }, + { + "epoch": 1.02, + "learning_rate": "1.9670e-04", + "loss": 0.6998, + "slid_loss": 0.8731, + "step": 2044, + "time": 12.95 + }, + { + "epoch": 1.02, + "learning_rate": "1.9669e-04", + "loss": 0.9331, + "slid_loss": 0.8725, + "step": 2045, + "time": 13.53 + }, + { + "epoch": 1.02, + "learning_rate": "1.9669e-04", + "loss": 0.9281, + "slid_loss": 0.8727, + "step": 2046, + "time": 12.12 + }, + { + "epoch": 1.03, + "learning_rate": "1.9668e-04", + "loss": 0.9316, + "slid_loss": 0.8742, + "step": 2047, + "time": 12.87 + }, + { + "epoch": 1.03, + "learning_rate": "1.9668e-04", + "loss": 0.8064, + "slid_loss": 0.8726, + "step": 2048, + "time": 13.91 + }, + { + "epoch": 1.03, + "learning_rate": "1.9667e-04", + "loss": 0.7582, + "slid_loss": 0.8718, + "step": 2049, + "time": 10.83 + }, + { + "epoch": 1.03, + "learning_rate": "1.9666e-04", + "loss": 0.8115, + "slid_loss": 0.8702, + "step": 2050, + "time": 10.6 + }, + { + "epoch": 1.03, + "learning_rate": "1.9666e-04", + "loss": 0.8376, + "slid_loss": 0.8687, + "step": 2051, + "time": 13.75 + }, + { + "epoch": 1.03, + "learning_rate": "1.9665e-04", + "loss": 0.9459, + "slid_loss": 0.8696, + "step": 2052, + "time": 13.64 + }, + { + "epoch": 1.03, + "learning_rate": "1.9664e-04", + "loss": 0.9916, + "slid_loss": 0.8698, + "step": 2053, + "time": 12.55 + }, + { + "epoch": 1.03, + "learning_rate": "1.9664e-04", + "loss": 0.8823, + "slid_loss": 0.8716, + "step": 2054, + "time": 13.14 + }, + { + "epoch": 1.03, + "learning_rate": "1.9663e-04", + "loss": 0.711, + "slid_loss": 0.8694, + "step": 2055, + "time": 10.39 + }, + { + "epoch": 1.03, + "learning_rate": "1.9662e-04", + "loss": 0.8318, + "slid_loss": 0.8695, + "step": 2056, + "time": 11.9 + }, + { + "epoch": 1.03, + "learning_rate": "1.9662e-04", + "loss": 0.9527, + "slid_loss": 0.8713, + "step": 2057, + "time": 13.66 + }, + { + "epoch": 1.03, + "learning_rate": "1.9661e-04", + "loss": 0.9418, + "slid_loss": 0.8716, + "step": 2058, + "time": 13.67 + }, + { + "epoch": 1.03, + "learning_rate": "1.9661e-04", + "loss": 0.7895, + "slid_loss": 0.8703, + "step": 2059, + "time": 13.77 + }, + { + "epoch": 1.03, + "learning_rate": "1.9660e-04", + "loss": 0.9601, + "slid_loss": 0.8722, + "step": 2060, + "time": 14.18 + }, + { + "epoch": 1.03, + "learning_rate": "1.9659e-04", + "loss": 0.8197, + "slid_loss": 0.8704, + "step": 2061, + "time": 12.96 + }, + { + "epoch": 1.03, + "learning_rate": "1.9659e-04", + "loss": 0.8901, + "slid_loss": 0.8704, + "step": 2062, + "time": 13.83 + }, + { + "epoch": 1.03, + "learning_rate": "1.9658e-04", + "loss": 0.8666, + "slid_loss": 0.8726, + "step": 2063, + "time": 13.38 + }, + { + "epoch": 1.03, + "learning_rate": "1.9657e-04", + "loss": 0.8974, + "slid_loss": 0.8722, + "step": 2064, + "time": 13.45 + }, + { + "epoch": 1.03, + "learning_rate": "1.9657e-04", + "loss": 0.9765, + "slid_loss": 0.8745, + "step": 2065, + "time": 13.81 + }, + { + "epoch": 1.03, + "learning_rate": "1.9656e-04", + "loss": 0.9092, + "slid_loss": 0.8757, + "step": 2066, + "time": 13.24 + }, + { + "epoch": 1.04, + "learning_rate": "1.9655e-04", + "loss": 0.9264, + "slid_loss": 0.876, + "step": 2067, + "time": 13.39 + }, + { + "epoch": 1.04, + "learning_rate": "1.9655e-04", + "loss": 0.8146, + "slid_loss": 0.8756, + "step": 2068, + "time": 12.59 + }, + { + "epoch": 1.04, + "learning_rate": "1.9654e-04", + "loss": 0.9693, + "slid_loss": 0.8758, + "step": 2069, + "time": 12.8 + }, + { + "epoch": 1.04, + "learning_rate": "1.9654e-04", + "loss": 0.929, + "slid_loss": 0.8776, + "step": 2070, + "time": 11.26 + }, + { + "epoch": 1.04, + "learning_rate": "1.9653e-04", + "loss": 0.8411, + "slid_loss": 0.8767, + "step": 2071, + "time": 12.55 + }, + { + "epoch": 1.04, + "learning_rate": "1.9652e-04", + "loss": 0.7551, + "slid_loss": 0.8758, + "step": 2072, + "time": 11.82 + }, + { + "epoch": 1.04, + "learning_rate": "1.9652e-04", + "loss": 0.823, + "slid_loss": 0.8749, + "step": 2073, + "time": 13.52 + }, + { + "epoch": 1.04, + "learning_rate": "1.9651e-04", + "loss": 0.939, + "slid_loss": 0.8753, + "step": 2074, + "time": 13.78 + }, + { + "epoch": 1.04, + "learning_rate": "1.9650e-04", + "loss": 0.6891, + "slid_loss": 0.8734, + "step": 2075, + "time": 11.82 + }, + { + "epoch": 1.04, + "learning_rate": "1.9650e-04", + "loss": 0.8695, + "slid_loss": 0.8727, + "step": 2076, + "time": 13.64 + }, + { + "epoch": 1.04, + "learning_rate": "1.9649e-04", + "loss": 0.7935, + "slid_loss": 0.8714, + "step": 2077, + "time": 13.32 + }, + { + "epoch": 1.04, + "learning_rate": "1.9648e-04", + "loss": 1.0568, + "slid_loss": 0.8734, + "step": 2078, + "time": 11.59 + }, + { + "epoch": 1.04, + "learning_rate": "1.9648e-04", + "loss": 0.9554, + "slid_loss": 0.8755, + "step": 2079, + "time": 10.55 + }, + { + "epoch": 1.04, + "learning_rate": "1.9647e-04", + "loss": 0.8008, + "slid_loss": 0.8759, + "step": 2080, + "time": 12.7 + }, + { + "epoch": 1.04, + "learning_rate": "1.9647e-04", + "loss": 1.0939, + "slid_loss": 0.8782, + "step": 2081, + "time": 11.17 + }, + { + "epoch": 1.04, + "learning_rate": "1.9646e-04", + "loss": 0.7616, + "slid_loss": 0.8762, + "step": 2082, + "time": 11.34 + }, + { + "epoch": 1.04, + "learning_rate": "1.9645e-04", + "loss": 0.7029, + "slid_loss": 0.8755, + "step": 2083, + "time": 13.71 + }, + { + "epoch": 1.04, + "learning_rate": "1.9645e-04", + "loss": 0.7415, + "slid_loss": 0.8746, + "step": 2084, + "time": 11.3 + }, + { + "epoch": 1.04, + "learning_rate": "1.9644e-04", + "loss": 0.8565, + "slid_loss": 0.8752, + "step": 2085, + "time": 13.14 + }, + { + "epoch": 1.04, + "learning_rate": "1.9643e-04", + "loss": 0.8569, + "slid_loss": 0.8751, + "step": 2086, + "time": 12.79 + }, + { + "epoch": 1.05, + "learning_rate": "1.9643e-04", + "loss": 0.6654, + "slid_loss": 0.8738, + "step": 2087, + "time": 11.44 + }, + { + "epoch": 1.05, + "learning_rate": "1.9642e-04", + "loss": 0.8607, + "slid_loss": 0.8724, + "step": 2088, + "time": 11.41 + }, + { + "epoch": 1.05, + "learning_rate": "1.9641e-04", + "loss": 0.7926, + "slid_loss": 0.8715, + "step": 2089, + "time": 11.66 + }, + { + "epoch": 1.05, + "learning_rate": "1.9641e-04", + "loss": 1.0679, + "slid_loss": 0.873, + "step": 2090, + "time": 13.29 + }, + { + "epoch": 1.05, + "learning_rate": "1.9640e-04", + "loss": 0.6192, + "slid_loss": 0.8704, + "step": 2091, + "time": 14.11 + }, + { + "epoch": 1.05, + "learning_rate": "1.9639e-04", + "loss": 0.7283, + "slid_loss": 0.8686, + "step": 2092, + "time": 10.44 + }, + { + "epoch": 1.05, + "learning_rate": "1.9639e-04", + "loss": 0.9314, + "slid_loss": 0.8704, + "step": 2093, + "time": 11.76 + }, + { + "epoch": 1.05, + "learning_rate": "1.9638e-04", + "loss": 0.9044, + "slid_loss": 0.8686, + "step": 2094, + "time": 10.97 + }, + { + "epoch": 1.05, + "learning_rate": "1.9637e-04", + "loss": 0.6205, + "slid_loss": 0.8661, + "step": 2095, + "time": 12.8 + }, + { + "epoch": 1.05, + "learning_rate": "1.9637e-04", + "loss": 0.9718, + "slid_loss": 0.867, + "step": 2096, + "time": 14.04 + }, + { + "epoch": 1.05, + "learning_rate": "1.9636e-04", + "loss": 0.9256, + "slid_loss": 0.8664, + "step": 2097, + "time": 13.23 + }, + { + "epoch": 1.05, + "learning_rate": "1.9635e-04", + "loss": 0.8963, + "slid_loss": 0.8685, + "step": 2098, + "time": 13.11 + }, + { + "epoch": 1.05, + "learning_rate": "1.9635e-04", + "loss": 0.7545, + "slid_loss": 0.8658, + "step": 2099, + "time": 13.98 + }, + { + "epoch": 1.05, + "learning_rate": "1.9634e-04", + "loss": 0.8944, + "slid_loss": 0.8684, + "step": 2100, + "time": 11.49 + }, + { + "epoch": 1.05, + "learning_rate": "1.9633e-04", + "loss": 0.78, + "slid_loss": 0.8653, + "step": 2101, + "time": 11.36 + }, + { + "epoch": 1.05, + "learning_rate": "1.9633e-04", + "loss": 0.7643, + "slid_loss": 0.8665, + "step": 2102, + "time": 11.44 + }, + { + "epoch": 1.05, + "learning_rate": "1.9632e-04", + "loss": 0.8427, + "slid_loss": 0.8672, + "step": 2103, + "time": 12.03 + }, + { + "epoch": 1.05, + "learning_rate": "1.9632e-04", + "loss": 0.8298, + "slid_loss": 0.8671, + "step": 2104, + "time": 13.75 + }, + { + "epoch": 1.05, + "learning_rate": "1.9631e-04", + "loss": 0.8972, + "slid_loss": 0.8676, + "step": 2105, + "time": 11.78 + }, + { + "epoch": 1.05, + "learning_rate": "1.9630e-04", + "loss": 0.6819, + "slid_loss": 0.8646, + "step": 2106, + "time": 13.47 + }, + { + "epoch": 1.06, + "learning_rate": "1.9630e-04", + "loss": 0.826, + "slid_loss": 0.8645, + "step": 2107, + "time": 12.41 + }, + { + "epoch": 1.06, + "learning_rate": "1.9629e-04", + "loss": 0.8579, + "slid_loss": 0.8639, + "step": 2108, + "time": 11.9 + }, + { + "epoch": 1.06, + "learning_rate": "1.9628e-04", + "loss": 0.8258, + "slid_loss": 0.8633, + "step": 2109, + "time": 13.7 + }, + { + "epoch": 1.06, + "learning_rate": "1.9628e-04", + "loss": 0.8713, + "slid_loss": 0.8624, + "step": 2110, + "time": 12.99 + }, + { + "epoch": 1.06, + "learning_rate": "1.9627e-04", + "loss": 0.797, + "slid_loss": 0.8612, + "step": 2111, + "time": 11.02 + }, + { + "epoch": 1.06, + "learning_rate": "1.9626e-04", + "loss": 0.8869, + "slid_loss": 0.861, + "step": 2112, + "time": 12.99 + }, + { + "epoch": 1.06, + "learning_rate": "1.9626e-04", + "loss": 0.9468, + "slid_loss": 0.8623, + "step": 2113, + "time": 12.23 + }, + { + "epoch": 1.06, + "learning_rate": "1.9625e-04", + "loss": 0.8404, + "slid_loss": 0.863, + "step": 2114, + "time": 12.97 + }, + { + "epoch": 1.06, + "learning_rate": "1.9624e-04", + "loss": 0.6352, + "slid_loss": 0.8612, + "step": 2115, + "time": 11.99 + }, + { + "epoch": 1.06, + "learning_rate": "1.9624e-04", + "loss": 0.7809, + "slid_loss": 0.8583, + "step": 2116, + "time": 13.79 + }, + { + "epoch": 1.06, + "learning_rate": "1.9623e-04", + "loss": 0.9877, + "slid_loss": 0.8569, + "step": 2117, + "time": 11.29 + }, + { + "epoch": 1.06, + "learning_rate": "1.9622e-04", + "loss": 0.8508, + "slid_loss": 0.8578, + "step": 2118, + "time": 13.45 + }, + { + "epoch": 1.06, + "learning_rate": "1.9622e-04", + "loss": 0.7672, + "slid_loss": 0.8571, + "step": 2119, + "time": 13.23 + }, + { + "epoch": 1.06, + "learning_rate": "1.9621e-04", + "loss": 0.8251, + "slid_loss": 0.8566, + "step": 2120, + "time": 12.55 + }, + { + "epoch": 1.06, + "learning_rate": "1.9620e-04", + "loss": 0.7552, + "slid_loss": 0.8558, + "step": 2121, + "time": 13.57 + }, + { + "epoch": 1.06, + "learning_rate": "1.9620e-04", + "loss": 0.8699, + "slid_loss": 0.8552, + "step": 2122, + "time": 13.84 + }, + { + "epoch": 1.06, + "learning_rate": "1.9619e-04", + "loss": 0.9166, + "slid_loss": 0.8566, + "step": 2123, + "time": 11.0 + }, + { + "epoch": 1.06, + "learning_rate": "1.9618e-04", + "loss": 0.994, + "slid_loss": 0.8574, + "step": 2124, + "time": 11.43 + }, + { + "epoch": 1.06, + "learning_rate": "1.9618e-04", + "loss": 0.9803, + "slid_loss": 0.8594, + "step": 2125, + "time": 11.11 + }, + { + "epoch": 1.06, + "learning_rate": "1.9617e-04", + "loss": 0.9751, + "slid_loss": 0.8602, + "step": 2126, + "time": 11.59 + }, + { + "epoch": 1.07, + "learning_rate": "1.9616e-04", + "loss": 0.9481, + "slid_loss": 0.8616, + "step": 2127, + "time": 11.35 + }, + { + "epoch": 1.07, + "learning_rate": "1.9616e-04", + "loss": 0.9772, + "slid_loss": 0.8631, + "step": 2128, + "time": 13.76 + }, + { + "epoch": 1.07, + "learning_rate": "1.9615e-04", + "loss": 0.8673, + "slid_loss": 0.8616, + "step": 2129, + "time": 13.66 + }, + { + "epoch": 1.07, + "learning_rate": "1.9614e-04", + "loss": 1.0599, + "slid_loss": 0.8639, + "step": 2130, + "time": 13.33 + }, + { + "epoch": 1.07, + "learning_rate": "1.9614e-04", + "loss": 0.7178, + "slid_loss": 0.8632, + "step": 2131, + "time": 11.09 + }, + { + "epoch": 1.07, + "learning_rate": "1.9613e-04", + "loss": 0.8183, + "slid_loss": 0.8645, + "step": 2132, + "time": 11.13 + }, + { + "epoch": 1.07, + "learning_rate": "1.9612e-04", + "loss": 0.9395, + "slid_loss": 0.8645, + "step": 2133, + "time": 11.31 + }, + { + "epoch": 1.07, + "learning_rate": "1.9611e-04", + "loss": 0.7771, + "slid_loss": 0.8623, + "step": 2134, + "time": 13.37 + }, + { + "epoch": 1.07, + "learning_rate": "1.9611e-04", + "loss": 0.8401, + "slid_loss": 0.8616, + "step": 2135, + "time": 12.8 + }, + { + "epoch": 1.07, + "learning_rate": "1.9610e-04", + "loss": 0.8042, + "slid_loss": 0.8612, + "step": 2136, + "time": 12.31 + }, + { + "epoch": 1.07, + "learning_rate": "1.9609e-04", + "loss": 0.8467, + "slid_loss": 0.8603, + "step": 2137, + "time": 13.38 + }, + { + "epoch": 1.07, + "learning_rate": "1.9609e-04", + "loss": 0.9157, + "slid_loss": 0.8605, + "step": 2138, + "time": 12.36 + }, + { + "epoch": 1.07, + "learning_rate": "1.9608e-04", + "loss": 0.9829, + "slid_loss": 0.8595, + "step": 2139, + "time": 13.85 + }, + { + "epoch": 1.07, + "learning_rate": "1.9607e-04", + "loss": 0.7294, + "slid_loss": 0.8585, + "step": 2140, + "time": 13.58 + }, + { + "epoch": 1.07, + "learning_rate": "1.9607e-04", + "loss": 0.9256, + "slid_loss": 0.8584, + "step": 2141, + "time": 13.17 + }, + { + "epoch": 1.07, + "learning_rate": "1.9606e-04", + "loss": 0.9688, + "slid_loss": 0.8585, + "step": 2142, + "time": 12.15 + }, + { + "epoch": 1.07, + "learning_rate": "1.9605e-04", + "loss": 0.786, + "slid_loss": 0.8577, + "step": 2143, + "time": 13.23 + }, + { + "epoch": 1.07, + "learning_rate": "1.9605e-04", + "loss": 0.9443, + "slid_loss": 0.8602, + "step": 2144, + "time": 13.17 + }, + { + "epoch": 1.07, + "learning_rate": "1.9604e-04", + "loss": 0.9666, + "slid_loss": 0.8605, + "step": 2145, + "time": 11.7 + }, + { + "epoch": 1.07, + "learning_rate": "1.9603e-04", + "loss": 0.6098, + "slid_loss": 0.8573, + "step": 2146, + "time": 12.24 + }, + { + "epoch": 1.08, + "learning_rate": "1.9603e-04", + "loss": 1.0065, + "slid_loss": 0.8581, + "step": 2147, + "time": 11.18 + }, + { + "epoch": 1.08, + "learning_rate": "1.9602e-04", + "loss": 0.674, + "slid_loss": 0.8567, + "step": 2148, + "time": 11.47 + }, + { + "epoch": 1.08, + "learning_rate": "1.9601e-04", + "loss": 0.8462, + "slid_loss": 0.8576, + "step": 2149, + "time": 11.15 + }, + { + "epoch": 1.08, + "learning_rate": "1.9601e-04", + "loss": 0.9181, + "slid_loss": 0.8587, + "step": 2150, + "time": 11.43 + }, + { + "epoch": 1.08, + "learning_rate": "1.9600e-04", + "loss": 0.9792, + "slid_loss": 0.8601, + "step": 2151, + "time": 12.16 + }, + { + "epoch": 1.08, + "learning_rate": "1.9599e-04", + "loss": 0.7235, + "slid_loss": 0.8579, + "step": 2152, + "time": 12.8 + }, + { + "epoch": 1.08, + "learning_rate": "1.9599e-04", + "loss": 1.0628, + "slid_loss": 0.8586, + "step": 2153, + "time": 13.85 + }, + { + "epoch": 1.08, + "learning_rate": "1.9598e-04", + "loss": 0.6901, + "slid_loss": 0.8567, + "step": 2154, + "time": 13.87 + }, + { + "epoch": 1.08, + "learning_rate": "1.9597e-04", + "loss": 0.7855, + "slid_loss": 0.8574, + "step": 2155, + "time": 12.95 + }, + { + "epoch": 1.08, + "learning_rate": "1.9596e-04", + "loss": 0.868, + "slid_loss": 0.8578, + "step": 2156, + "time": 12.87 + }, + { + "epoch": 1.08, + "learning_rate": "1.9596e-04", + "loss": 0.7845, + "slid_loss": 0.8561, + "step": 2157, + "time": 11.06 + }, + { + "epoch": 1.08, + "learning_rate": "1.9595e-04", + "loss": 0.8614, + "slid_loss": 0.8553, + "step": 2158, + "time": 13.36 + }, + { + "epoch": 1.08, + "learning_rate": "1.9594e-04", + "loss": 0.7977, + "slid_loss": 0.8554, + "step": 2159, + "time": 13.66 + }, + { + "epoch": 1.08, + "learning_rate": "1.9594e-04", + "loss": 0.9027, + "slid_loss": 0.8548, + "step": 2160, + "time": 11.7 + }, + { + "epoch": 1.08, + "learning_rate": "1.9593e-04", + "loss": 0.896, + "slid_loss": 0.8556, + "step": 2161, + "time": 14.13 + }, + { + "epoch": 1.08, + "learning_rate": "1.9592e-04", + "loss": 0.9635, + "slid_loss": 0.8563, + "step": 2162, + "time": 11.36 + }, + { + "epoch": 1.08, + "learning_rate": "1.9592e-04", + "loss": 0.8447, + "slid_loss": 0.8561, + "step": 2163, + "time": 13.83 + }, + { + "epoch": 1.08, + "learning_rate": "1.9591e-04", + "loss": 0.8198, + "slid_loss": 0.8553, + "step": 2164, + "time": 11.34 + }, + { + "epoch": 1.08, + "learning_rate": "1.9590e-04", + "loss": 0.978, + "slid_loss": 0.8553, + "step": 2165, + "time": 14.73 + }, + { + "epoch": 1.08, + "learning_rate": "1.9590e-04", + "loss": 0.8328, + "slid_loss": 0.8546, + "step": 2166, + "time": 13.0 + }, + { + "epoch": 1.09, + "learning_rate": "1.9589e-04", + "loss": 0.7817, + "slid_loss": 0.8531, + "step": 2167, + "time": 13.55 + }, + { + "epoch": 1.09, + "learning_rate": "1.9588e-04", + "loss": 0.9885, + "slid_loss": 0.8548, + "step": 2168, + "time": 11.19 + }, + { + "epoch": 1.09, + "learning_rate": "1.9588e-04", + "loss": 0.8007, + "slid_loss": 0.8532, + "step": 2169, + "time": 14.14 + }, + { + "epoch": 1.09, + "learning_rate": "1.9587e-04", + "loss": 0.9196, + "slid_loss": 0.8531, + "step": 2170, + "time": 11.33 + }, + { + "epoch": 1.09, + "learning_rate": "1.9586e-04", + "loss": 0.8252, + "slid_loss": 0.8529, + "step": 2171, + "time": 13.51 + }, + { + "epoch": 1.09, + "learning_rate": "1.9585e-04", + "loss": 0.8541, + "slid_loss": 0.8539, + "step": 2172, + "time": 13.01 + }, + { + "epoch": 1.09, + "learning_rate": "1.9585e-04", + "loss": 0.8041, + "slid_loss": 0.8537, + "step": 2173, + "time": 12.91 + }, + { + "epoch": 1.09, + "learning_rate": "1.9584e-04", + "loss": 0.9363, + "slid_loss": 0.8537, + "step": 2174, + "time": 11.96 + }, + { + "epoch": 1.09, + "learning_rate": "1.9583e-04", + "loss": 0.842, + "slid_loss": 0.8552, + "step": 2175, + "time": 13.0 + }, + { + "epoch": 1.09, + "learning_rate": "1.9583e-04", + "loss": 0.7742, + "slid_loss": 0.8543, + "step": 2176, + "time": 11.34 + }, + { + "epoch": 1.09, + "learning_rate": "1.9582e-04", + "loss": 0.8684, + "slid_loss": 0.855, + "step": 2177, + "time": 11.73 + }, + { + "epoch": 1.09, + "learning_rate": "1.9581e-04", + "loss": 0.8589, + "slid_loss": 0.853, + "step": 2178, + "time": 12.07 + }, + { + "epoch": 1.09, + "learning_rate": "1.9581e-04", + "loss": 1.0156, + "slid_loss": 0.8536, + "step": 2179, + "time": 11.74 + }, + { + "epoch": 1.09, + "learning_rate": "1.9580e-04", + "loss": 0.9666, + "slid_loss": 0.8553, + "step": 2180, + "time": 13.26 + }, + { + "epoch": 1.09, + "learning_rate": "1.9579e-04", + "loss": 0.8305, + "slid_loss": 0.8527, + "step": 2181, + "time": 11.77 + }, + { + "epoch": 1.09, + "learning_rate": "1.9578e-04", + "loss": 0.963, + "slid_loss": 0.8547, + "step": 2182, + "time": 11.78 + }, + { + "epoch": 1.09, + "learning_rate": "1.9578e-04", + "loss": 0.9635, + "slid_loss": 0.8573, + "step": 2183, + "time": 10.76 + }, + { + "epoch": 1.09, + "learning_rate": "1.9577e-04", + "loss": 0.794, + "slid_loss": 0.8578, + "step": 2184, + "time": 11.39 + }, + { + "epoch": 1.09, + "learning_rate": "1.9576e-04", + "loss": 0.8653, + "slid_loss": 0.8579, + "step": 2185, + "time": 13.21 + }, + { + "epoch": 1.09, + "learning_rate": "1.9576e-04", + "loss": 0.8168, + "slid_loss": 0.8575, + "step": 2186, + "time": 12.84 + }, + { + "epoch": 1.1, + "learning_rate": "1.9575e-04", + "loss": 0.6772, + "slid_loss": 0.8576, + "step": 2187, + "time": 12.4 + }, + { + "epoch": 1.1, + "learning_rate": "1.9574e-04", + "loss": 0.9874, + "slid_loss": 0.8589, + "step": 2188, + "time": 11.4 + }, + { + "epoch": 1.1, + "learning_rate": "1.9573e-04", + "loss": 0.8957, + "slid_loss": 0.8599, + "step": 2189, + "time": 13.09 + }, + { + "epoch": 1.1, + "learning_rate": "1.9573e-04", + "loss": 0.9608, + "slid_loss": 0.8588, + "step": 2190, + "time": 13.55 + }, + { + "epoch": 1.1, + "learning_rate": "1.9572e-04", + "loss": 0.9184, + "slid_loss": 0.8618, + "step": 2191, + "time": 13.7 + }, + { + "epoch": 1.1, + "learning_rate": "1.9571e-04", + "loss": 0.9679, + "slid_loss": 0.8642, + "step": 2192, + "time": 12.35 + }, + { + "epoch": 1.1, + "learning_rate": "1.9571e-04", + "loss": 0.9292, + "slid_loss": 0.8642, + "step": 2193, + "time": 13.81 + }, + { + "epoch": 1.1, + "learning_rate": "1.9570e-04", + "loss": 0.8737, + "slid_loss": 0.8639, + "step": 2194, + "time": 12.96 + }, + { + "epoch": 1.1, + "learning_rate": "1.9569e-04", + "loss": 0.8286, + "slid_loss": 0.866, + "step": 2195, + "time": 11.71 + }, + { + "epoch": 1.1, + "learning_rate": "1.9569e-04", + "loss": 0.9267, + "slid_loss": 0.8655, + "step": 2196, + "time": 13.65 + }, + { + "epoch": 1.1, + "learning_rate": "1.9568e-04", + "loss": 0.9738, + "slid_loss": 0.866, + "step": 2197, + "time": 13.15 + }, + { + "epoch": 1.1, + "learning_rate": "1.9567e-04", + "loss": 0.8717, + "slid_loss": 0.8658, + "step": 2198, + "time": 11.64 + }, + { + "epoch": 1.1, + "learning_rate": "1.9566e-04", + "loss": 0.983, + "slid_loss": 0.868, + "step": 2199, + "time": 10.4 + }, + { + "epoch": 1.1, + "learning_rate": "1.9566e-04", + "loss": 0.8489, + "slid_loss": 0.8676, + "step": 2200, + "time": 13.53 + }, + { + "epoch": 1.1, + "learning_rate": "1.9565e-04", + "loss": 1.0182, + "slid_loss": 0.87, + "step": 2201, + "time": 13.97 + }, + { + "epoch": 1.1, + "learning_rate": "1.9564e-04", + "loss": 1.046, + "slid_loss": 0.8728, + "step": 2202, + "time": 12.14 + }, + { + "epoch": 1.1, + "learning_rate": "1.9564e-04", + "loss": 0.8373, + "slid_loss": 0.8727, + "step": 2203, + "time": 12.44 + }, + { + "epoch": 1.1, + "learning_rate": "1.9563e-04", + "loss": 0.855, + "slid_loss": 0.873, + "step": 2204, + "time": 13.83 + }, + { + "epoch": 1.1, + "learning_rate": "1.9562e-04", + "loss": 0.6272, + "slid_loss": 0.8703, + "step": 2205, + "time": 10.61 + }, + { + "epoch": 1.1, + "learning_rate": "1.9561e-04", + "loss": 0.816, + "slid_loss": 0.8716, + "step": 2206, + "time": 10.76 + }, + { + "epoch": 1.11, + "learning_rate": "1.9561e-04", + "loss": 0.5927, + "slid_loss": 0.8693, + "step": 2207, + "time": 9.89 + }, + { + "epoch": 1.11, + "learning_rate": "1.9560e-04", + "loss": 0.8669, + "slid_loss": 0.8694, + "step": 2208, + "time": 12.88 + }, + { + "epoch": 1.11, + "learning_rate": "1.9559e-04", + "loss": 0.8059, + "slid_loss": 0.8692, + "step": 2209, + "time": 12.79 + }, + { + "epoch": 1.11, + "learning_rate": "1.9559e-04", + "loss": 0.9238, + "slid_loss": 0.8697, + "step": 2210, + "time": 12.94 + }, + { + "epoch": 1.11, + "learning_rate": "1.9558e-04", + "loss": 0.8448, + "slid_loss": 0.8702, + "step": 2211, + "time": 13.96 + }, + { + "epoch": 1.11, + "learning_rate": "1.9557e-04", + "loss": 0.6992, + "slid_loss": 0.8683, + "step": 2212, + "time": 11.7 + }, + { + "epoch": 1.11, + "learning_rate": "1.9556e-04", + "loss": 0.7321, + "slid_loss": 0.8662, + "step": 2213, + "time": 11.65 + }, + { + "epoch": 1.11, + "learning_rate": "1.9556e-04", + "loss": 0.9289, + "slid_loss": 0.867, + "step": 2214, + "time": 12.51 + }, + { + "epoch": 1.11, + "learning_rate": "1.9555e-04", + "loss": 0.9578, + "slid_loss": 0.8703, + "step": 2215, + "time": 13.35 + }, + { + "epoch": 1.11, + "learning_rate": "1.9554e-04", + "loss": 0.8559, + "slid_loss": 0.871, + "step": 2216, + "time": 12.47 + }, + { + "epoch": 1.11, + "learning_rate": "1.9553e-04", + "loss": 0.8374, + "slid_loss": 0.8695, + "step": 2217, + "time": 13.27 + }, + { + "epoch": 1.11, + "learning_rate": "1.9553e-04", + "loss": 0.8713, + "slid_loss": 0.8697, + "step": 2218, + "time": 11.15 + }, + { + "epoch": 1.11, + "learning_rate": "1.9552e-04", + "loss": 0.9269, + "slid_loss": 0.8713, + "step": 2219, + "time": 12.18 + }, + { + "epoch": 1.11, + "learning_rate": "1.9551e-04", + "loss": 0.831, + "slid_loss": 0.8714, + "step": 2220, + "time": 13.95 + }, + { + "epoch": 1.11, + "learning_rate": "1.9551e-04", + "loss": 0.8952, + "slid_loss": 0.8728, + "step": 2221, + "time": 13.95 + }, + { + "epoch": 1.11, + "learning_rate": "1.9550e-04", + "loss": 1.0349, + "slid_loss": 0.8744, + "step": 2222, + "time": 13.14 + }, + { + "epoch": 1.11, + "learning_rate": "1.9549e-04", + "loss": 0.8154, + "slid_loss": 0.8734, + "step": 2223, + "time": 12.93 + }, + { + "epoch": 1.11, + "learning_rate": "1.9548e-04", + "loss": 0.9369, + "slid_loss": 0.8728, + "step": 2224, + "time": 13.43 + }, + { + "epoch": 1.11, + "learning_rate": "1.9548e-04", + "loss": 0.8108, + "slid_loss": 0.8712, + "step": 2225, + "time": 12.27 + }, + { + "epoch": 1.11, + "learning_rate": "1.9547e-04", + "loss": 1.0724, + "slid_loss": 0.8721, + "step": 2226, + "time": 11.48 + }, + { + "epoch": 1.12, + "learning_rate": "1.9546e-04", + "loss": 0.7923, + "slid_loss": 0.8706, + "step": 2227, + "time": 12.62 + }, + { + "epoch": 1.12, + "learning_rate": "1.9546e-04", + "loss": 1.0384, + "slid_loss": 0.8712, + "step": 2228, + "time": 13.66 + }, + { + "epoch": 1.12, + "learning_rate": "1.9545e-04", + "loss": 0.8984, + "slid_loss": 0.8715, + "step": 2229, + "time": 13.49 + }, + { + "epoch": 1.12, + "learning_rate": "1.9544e-04", + "loss": 0.9038, + "slid_loss": 0.8699, + "step": 2230, + "time": 12.85 + }, + { + "epoch": 1.12, + "learning_rate": "1.9543e-04", + "loss": 0.8088, + "slid_loss": 0.8708, + "step": 2231, + "time": 13.76 + }, + { + "epoch": 1.12, + "learning_rate": "1.9543e-04", + "loss": 0.8088, + "slid_loss": 0.8707, + "step": 2232, + "time": 13.57 + }, + { + "epoch": 1.12, + "learning_rate": "1.9542e-04", + "loss": 0.9316, + "slid_loss": 0.8707, + "step": 2233, + "time": 12.79 + }, + { + "epoch": 1.12, + "learning_rate": "1.9541e-04", + "loss": 0.9222, + "slid_loss": 0.8721, + "step": 2234, + "time": 12.1 + }, + { + "epoch": 1.12, + "learning_rate": "1.9540e-04", + "loss": 0.8599, + "slid_loss": 0.8723, + "step": 2235, + "time": 13.31 + }, + { + "epoch": 1.12, + "learning_rate": "1.9540e-04", + "loss": 0.9711, + "slid_loss": 0.874, + "step": 2236, + "time": 11.38 + }, + { + "epoch": 1.12, + "learning_rate": "1.9539e-04", + "loss": 0.8493, + "slid_loss": 0.874, + "step": 2237, + "time": 10.97 + }, + { + "epoch": 1.12, + "learning_rate": "1.9538e-04", + "loss": 0.7404, + "slid_loss": 0.8723, + "step": 2238, + "time": 13.87 + }, + { + "epoch": 1.12, + "learning_rate": "1.9537e-04", + "loss": 0.8189, + "slid_loss": 0.8706, + "step": 2239, + "time": 12.82 + }, + { + "epoch": 1.12, + "learning_rate": "1.9537e-04", + "loss": 0.8706, + "slid_loss": 0.872, + "step": 2240, + "time": 11.25 + }, + { + "epoch": 1.12, + "learning_rate": "1.9536e-04", + "loss": 0.8328, + "slid_loss": 0.8711, + "step": 2241, + "time": 10.88 + }, + { + "epoch": 1.12, + "learning_rate": "1.9535e-04", + "loss": 0.8349, + "slid_loss": 0.8698, + "step": 2242, + "time": 12.08 + }, + { + "epoch": 1.12, + "learning_rate": "1.9535e-04", + "loss": 0.9554, + "slid_loss": 0.8715, + "step": 2243, + "time": 12.83 + }, + { + "epoch": 1.12, + "learning_rate": "1.9534e-04", + "loss": 0.8418, + "slid_loss": 0.8704, + "step": 2244, + "time": 11.28 + }, + { + "epoch": 1.12, + "learning_rate": "1.9533e-04", + "loss": 0.8383, + "slid_loss": 0.8691, + "step": 2245, + "time": 12.83 + }, + { + "epoch": 1.12, + "learning_rate": "1.9532e-04", + "loss": 0.8353, + "slid_loss": 0.8714, + "step": 2246, + "time": 13.39 + }, + { + "epoch": 1.13, + "learning_rate": "1.9532e-04", + "loss": 0.8389, + "slid_loss": 0.8697, + "step": 2247, + "time": 11.92 + }, + { + "epoch": 1.13, + "learning_rate": "1.9531e-04", + "loss": 0.8731, + "slid_loss": 0.8717, + "step": 2248, + "time": 11.19 + }, + { + "epoch": 1.13, + "learning_rate": "1.9530e-04", + "loss": 0.8062, + "slid_loss": 0.8713, + "step": 2249, + "time": 12.9 + }, + { + "epoch": 1.13, + "learning_rate": "1.9529e-04", + "loss": 0.7483, + "slid_loss": 0.8696, + "step": 2250, + "time": 13.81 + }, + { + "epoch": 1.13, + "learning_rate": "1.9529e-04", + "loss": 0.8664, + "slid_loss": 0.8685, + "step": 2251, + "time": 10.28 + }, + { + "epoch": 1.13, + "learning_rate": "1.9528e-04", + "loss": 0.877, + "slid_loss": 0.87, + "step": 2252, + "time": 12.79 + }, + { + "epoch": 1.13, + "learning_rate": "1.9527e-04", + "loss": 0.9724, + "slid_loss": 0.8691, + "step": 2253, + "time": 13.71 + }, + { + "epoch": 1.13, + "learning_rate": "1.9526e-04", + "loss": 0.8898, + "slid_loss": 0.8711, + "step": 2254, + "time": 13.17 + }, + { + "epoch": 1.13, + "learning_rate": "1.9526e-04", + "loss": 0.7004, + "slid_loss": 0.8703, + "step": 2255, + "time": 10.78 + }, + { + "epoch": 1.13, + "learning_rate": "1.9525e-04", + "loss": 0.7312, + "slid_loss": 0.8689, + "step": 2256, + "time": 12.33 + }, + { + "epoch": 1.13, + "learning_rate": "1.9524e-04", + "loss": 0.9174, + "slid_loss": 0.8702, + "step": 2257, + "time": 12.8 + }, + { + "epoch": 1.13, + "learning_rate": "1.9523e-04", + "loss": 1.0879, + "slid_loss": 0.8725, + "step": 2258, + "time": 12.92 + }, + { + "epoch": 1.13, + "learning_rate": "1.9523e-04", + "loss": 0.9044, + "slid_loss": 0.8736, + "step": 2259, + "time": 11.3 + }, + { + "epoch": 1.13, + "learning_rate": "1.9522e-04", + "loss": 0.8615, + "slid_loss": 0.8731, + "step": 2260, + "time": 13.16 + }, + { + "epoch": 1.13, + "learning_rate": "1.9521e-04", + "loss": 0.7889, + "slid_loss": 0.8721, + "step": 2261, + "time": 13.34 + }, + { + "epoch": 1.13, + "learning_rate": "1.9520e-04", + "loss": 0.7818, + "slid_loss": 0.8703, + "step": 2262, + "time": 13.73 + }, + { + "epoch": 1.13, + "learning_rate": "1.9520e-04", + "loss": 0.8067, + "slid_loss": 0.8699, + "step": 2263, + "time": 13.23 + }, + { + "epoch": 1.13, + "learning_rate": "1.9519e-04", + "loss": 0.7319, + "slid_loss": 0.869, + "step": 2264, + "time": 13.47 + }, + { + "epoch": 1.13, + "learning_rate": "1.9518e-04", + "loss": 0.9796, + "slid_loss": 0.869, + "step": 2265, + "time": 11.32 + }, + { + "epoch": 1.13, + "learning_rate": "1.9517e-04", + "loss": 0.8959, + "slid_loss": 0.8696, + "step": 2266, + "time": 13.52 + }, + { + "epoch": 1.14, + "learning_rate": "1.9517e-04", + "loss": 0.7133, + "slid_loss": 0.869, + "step": 2267, + "time": 12.16 + }, + { + "epoch": 1.14, + "learning_rate": "1.9516e-04", + "loss": 1.023, + "slid_loss": 0.8693, + "step": 2268, + "time": 12.84 + }, + { + "epoch": 1.14, + "learning_rate": "1.9515e-04", + "loss": 0.9069, + "slid_loss": 0.8704, + "step": 2269, + "time": 11.55 + }, + { + "epoch": 1.14, + "learning_rate": "1.9514e-04", + "loss": 0.9214, + "slid_loss": 0.8704, + "step": 2270, + "time": 12.34 + }, + { + "epoch": 1.14, + "learning_rate": "1.9514e-04", + "loss": 0.7327, + "slid_loss": 0.8695, + "step": 2271, + "time": 10.59 + }, + { + "epoch": 1.14, + "learning_rate": "1.9513e-04", + "loss": 0.7969, + "slid_loss": 0.8689, + "step": 2272, + "time": 13.58 + }, + { + "epoch": 1.14, + "learning_rate": "1.9512e-04", + "loss": 0.8303, + "slid_loss": 0.8692, + "step": 2273, + "time": 11.17 + }, + { + "epoch": 1.14, + "learning_rate": "1.9511e-04", + "loss": 0.7505, + "slid_loss": 0.8673, + "step": 2274, + "time": 12.94 + }, + { + "epoch": 1.14, + "learning_rate": "1.9511e-04", + "loss": 0.6496, + "slid_loss": 0.8654, + "step": 2275, + "time": 11.56 + }, + { + "epoch": 1.14, + "learning_rate": "1.9510e-04", + "loss": 0.9791, + "slid_loss": 0.8674, + "step": 2276, + "time": 12.21 + }, + { + "epoch": 1.14, + "learning_rate": "1.9509e-04", + "loss": 0.8467, + "slid_loss": 0.8672, + "step": 2277, + "time": 13.17 + }, + { + "epoch": 1.14, + "learning_rate": "1.9508e-04", + "loss": 0.925, + "slid_loss": 0.8679, + "step": 2278, + "time": 13.56 + }, + { + "epoch": 1.14, + "learning_rate": "1.9508e-04", + "loss": 0.8734, + "slid_loss": 0.8664, + "step": 2279, + "time": 11.43 + }, + { + "epoch": 1.14, + "learning_rate": "1.9507e-04", + "loss": 0.8419, + "slid_loss": 0.8652, + "step": 2280, + "time": 12.74 + }, + { + "epoch": 1.14, + "learning_rate": "1.9506e-04", + "loss": 0.9987, + "slid_loss": 0.8669, + "step": 2281, + "time": 12.24 + }, + { + "epoch": 1.14, + "learning_rate": "1.9505e-04", + "loss": 0.7289, + "slid_loss": 0.8645, + "step": 2282, + "time": 12.79 + }, + { + "epoch": 1.14, + "learning_rate": "1.9505e-04", + "loss": 0.8158, + "slid_loss": 0.8631, + "step": 2283, + "time": 13.09 + }, + { + "epoch": 1.14, + "learning_rate": "1.9504e-04", + "loss": 0.9179, + "slid_loss": 0.8643, + "step": 2284, + "time": 11.86 + }, + { + "epoch": 1.14, + "learning_rate": "1.9503e-04", + "loss": 0.7639, + "slid_loss": 0.8633, + "step": 2285, + "time": 13.41 + }, + { + "epoch": 1.14, + "learning_rate": "1.9502e-04", + "loss": 0.9109, + "slid_loss": 0.8642, + "step": 2286, + "time": 13.67 + }, + { + "epoch": 1.15, + "learning_rate": "1.9502e-04", + "loss": 0.9577, + "slid_loss": 0.867, + "step": 2287, + "time": 13.44 + }, + { + "epoch": 1.15, + "learning_rate": "1.9501e-04", + "loss": 0.8175, + "slid_loss": 0.8653, + "step": 2288, + "time": 12.02 + }, + { + "epoch": 1.15, + "learning_rate": "1.9500e-04", + "loss": 0.9882, + "slid_loss": 0.8663, + "step": 2289, + "time": 13.01 + }, + { + "epoch": 1.15, + "learning_rate": "1.9499e-04", + "loss": 0.9169, + "slid_loss": 0.8658, + "step": 2290, + "time": 12.79 + }, + { + "epoch": 1.15, + "learning_rate": "1.9499e-04", + "loss": 0.8204, + "slid_loss": 0.8648, + "step": 2291, + "time": 13.54 + }, + { + "epoch": 1.15, + "learning_rate": "1.9498e-04", + "loss": 0.7944, + "slid_loss": 0.8631, + "step": 2292, + "time": 11.98 + }, + { + "epoch": 1.15, + "learning_rate": "1.9497e-04", + "loss": 0.7563, + "slid_loss": 0.8614, + "step": 2293, + "time": 12.95 + }, + { + "epoch": 1.15, + "learning_rate": "1.9496e-04", + "loss": 0.8826, + "slid_loss": 0.8615, + "step": 2294, + "time": 13.81 + }, + { + "epoch": 1.15, + "learning_rate": "1.9495e-04", + "loss": 0.883, + "slid_loss": 0.862, + "step": 2295, + "time": 13.43 + }, + { + "epoch": 1.15, + "learning_rate": "1.9495e-04", + "loss": 0.9214, + "slid_loss": 0.862, + "step": 2296, + "time": 13.38 + }, + { + "epoch": 1.15, + "learning_rate": "1.9494e-04", + "loss": 0.879, + "slid_loss": 0.861, + "step": 2297, + "time": 12.61 + }, + { + "epoch": 1.15, + "learning_rate": "1.9493e-04", + "loss": 0.8086, + "slid_loss": 0.8604, + "step": 2298, + "time": 12.6 + }, + { + "epoch": 1.15, + "learning_rate": "1.9492e-04", + "loss": 0.8069, + "slid_loss": 0.8586, + "step": 2299, + "time": 12.59 + }, + { + "epoch": 1.15, + "learning_rate": "1.9492e-04", + "loss": 0.9305, + "slid_loss": 0.8594, + "step": 2300, + "time": 10.71 + }, + { + "epoch": 1.15, + "learning_rate": "1.9491e-04", + "loss": 0.7914, + "slid_loss": 0.8572, + "step": 2301, + "time": 11.21 + }, + { + "epoch": 1.15, + "learning_rate": "1.9490e-04", + "loss": 0.8648, + "slid_loss": 0.8554, + "step": 2302, + "time": 11.97 + }, + { + "epoch": 1.15, + "learning_rate": "1.9489e-04", + "loss": 0.841, + "slid_loss": 0.8554, + "step": 2303, + "time": 10.97 + }, + { + "epoch": 1.15, + "learning_rate": "1.9489e-04", + "loss": 0.8642, + "slid_loss": 0.8555, + "step": 2304, + "time": 13.45 + }, + { + "epoch": 1.15, + "learning_rate": "1.9488e-04", + "loss": 1.0019, + "slid_loss": 0.8592, + "step": 2305, + "time": 13.81 + }, + { + "epoch": 1.15, + "learning_rate": "1.9487e-04", + "loss": 0.9159, + "slid_loss": 0.8602, + "step": 2306, + "time": 12.6 + }, + { + "epoch": 1.16, + "learning_rate": "1.9486e-04", + "loss": 0.8515, + "slid_loss": 0.8628, + "step": 2307, + "time": 11.21 + }, + { + "epoch": 1.16, + "learning_rate": "1.9485e-04", + "loss": 0.8736, + "slid_loss": 0.8629, + "step": 2308, + "time": 10.56 + }, + { + "epoch": 1.16, + "learning_rate": "1.9485e-04", + "loss": 0.8818, + "slid_loss": 0.8636, + "step": 2309, + "time": 11.4 + }, + { + "epoch": 1.16, + "learning_rate": "1.9484e-04", + "loss": 0.8513, + "slid_loss": 0.8629, + "step": 2310, + "time": 11.4 + }, + { + "epoch": 1.16, + "learning_rate": "1.9483e-04", + "loss": 0.8819, + "slid_loss": 0.8633, + "step": 2311, + "time": 13.04 + }, + { + "epoch": 1.16, + "learning_rate": "1.9482e-04", + "loss": 0.7904, + "slid_loss": 0.8642, + "step": 2312, + "time": 11.78 + }, + { + "epoch": 1.16, + "learning_rate": "1.9482e-04", + "loss": 0.8333, + "slid_loss": 0.8652, + "step": 2313, + "time": 11.33 + }, + { + "epoch": 1.16, + "learning_rate": "1.9481e-04", + "loss": 0.892, + "slid_loss": 0.8648, + "step": 2314, + "time": 14.04 + }, + { + "epoch": 1.16, + "learning_rate": "1.9480e-04", + "loss": 0.8311, + "slid_loss": 0.8636, + "step": 2315, + "time": 13.15 + }, + { + "epoch": 1.16, + "learning_rate": "1.9479e-04", + "loss": 0.7337, + "slid_loss": 0.8623, + "step": 2316, + "time": 11.36 + }, + { + "epoch": 1.16, + "learning_rate": "1.9479e-04", + "loss": 1.0926, + "slid_loss": 0.8649, + "step": 2317, + "time": 13.41 + }, + { + "epoch": 1.16, + "learning_rate": "1.9478e-04", + "loss": 0.8271, + "slid_loss": 0.8645, + "step": 2318, + "time": 10.87 + }, + { + "epoch": 1.16, + "learning_rate": "1.9477e-04", + "loss": 0.8647, + "slid_loss": 0.8638, + "step": 2319, + "time": 12.96 + }, + { + "epoch": 1.16, + "learning_rate": "1.9476e-04", + "loss": 1.0077, + "slid_loss": 0.8656, + "step": 2320, + "time": 11.11 + }, + { + "epoch": 1.16, + "learning_rate": "1.9475e-04", + "loss": 0.8998, + "slid_loss": 0.8656, + "step": 2321, + "time": 13.37 + }, + { + "epoch": 1.16, + "learning_rate": "1.9475e-04", + "loss": 0.8527, + "slid_loss": 0.8638, + "step": 2322, + "time": 12.87 + }, + { + "epoch": 1.16, + "learning_rate": "1.9474e-04", + "loss": 0.9302, + "slid_loss": 0.865, + "step": 2323, + "time": 13.0 + }, + { + "epoch": 1.16, + "learning_rate": "1.9473e-04", + "loss": 0.8187, + "slid_loss": 0.8638, + "step": 2324, + "time": 10.7 + }, + { + "epoch": 1.16, + "learning_rate": "1.9472e-04", + "loss": 0.7759, + "slid_loss": 0.8634, + "step": 2325, + "time": 11.72 + }, + { + "epoch": 1.16, + "learning_rate": "1.9471e-04", + "loss": 0.5675, + "slid_loss": 0.8584, + "step": 2326, + "time": 12.86 + }, + { + "epoch": 1.17, + "learning_rate": "1.9471e-04", + "loss": 0.8989, + "slid_loss": 0.8595, + "step": 2327, + "time": 13.08 + }, + { + "epoch": 1.17, + "learning_rate": "1.9470e-04", + "loss": 0.954, + "slid_loss": 0.8586, + "step": 2328, + "time": 12.74 + }, + { + "epoch": 1.17, + "learning_rate": "1.9469e-04", + "loss": 0.7825, + "slid_loss": 0.8575, + "step": 2329, + "time": 13.43 + }, + { + "epoch": 1.17, + "learning_rate": "1.9468e-04", + "loss": 0.7703, + "slid_loss": 0.8561, + "step": 2330, + "time": 11.21 + }, + { + "epoch": 1.17, + "learning_rate": "1.9468e-04", + "loss": 0.8286, + "slid_loss": 0.8563, + "step": 2331, + "time": 13.18 + }, + { + "epoch": 1.17, + "learning_rate": "1.9467e-04", + "loss": 0.8909, + "slid_loss": 0.8571, + "step": 2332, + "time": 13.23 + }, + { + "epoch": 1.17, + "learning_rate": "1.9466e-04", + "loss": 0.7952, + "slid_loss": 0.8558, + "step": 2333, + "time": 13.42 + }, + { + "epoch": 1.17, + "learning_rate": "1.9465e-04", + "loss": 0.8062, + "slid_loss": 0.8546, + "step": 2334, + "time": 11.44 + }, + { + "epoch": 1.17, + "learning_rate": "1.9464e-04", + "loss": 0.8281, + "slid_loss": 0.8543, + "step": 2335, + "time": 11.19 + }, + { + "epoch": 1.17, + "learning_rate": "1.9464e-04", + "loss": 1.0058, + "slid_loss": 0.8546, + "step": 2336, + "time": 13.13 + }, + { + "epoch": 1.17, + "learning_rate": "1.9463e-04", + "loss": 0.9367, + "slid_loss": 0.8555, + "step": 2337, + "time": 11.54 + }, + { + "epoch": 1.17, + "learning_rate": "1.9462e-04", + "loss": 0.8595, + "slid_loss": 0.8567, + "step": 2338, + "time": 13.54 + }, + { + "epoch": 1.17, + "learning_rate": "1.9461e-04", + "loss": 0.8182, + "slid_loss": 0.8567, + "step": 2339, + "time": 11.16 + }, + { + "epoch": 1.17, + "learning_rate": "1.9460e-04", + "loss": 0.8194, + "slid_loss": 0.8562, + "step": 2340, + "time": 10.85 + }, + { + "epoch": 1.17, + "learning_rate": "1.9460e-04", + "loss": 0.8333, + "slid_loss": 0.8562, + "step": 2341, + "time": 11.45 + }, + { + "epoch": 1.17, + "learning_rate": "1.9459e-04", + "loss": 0.7729, + "slid_loss": 0.8556, + "step": 2342, + "time": 14.03 + }, + { + "epoch": 1.17, + "learning_rate": "1.9458e-04", + "loss": 0.7829, + "slid_loss": 0.8539, + "step": 2343, + "time": 13.77 + }, + { + "epoch": 1.17, + "learning_rate": "1.9457e-04", + "loss": 0.8255, + "slid_loss": 0.8537, + "step": 2344, + "time": 13.13 + }, + { + "epoch": 1.17, + "learning_rate": "1.9457e-04", + "loss": 0.7175, + "slid_loss": 0.8525, + "step": 2345, + "time": 13.75 + }, + { + "epoch": 1.17, + "learning_rate": "1.9456e-04", + "loss": 0.9833, + "slid_loss": 0.854, + "step": 2346, + "time": 11.78 + }, + { + "epoch": 1.18, + "learning_rate": "1.9455e-04", + "loss": 0.85, + "slid_loss": 0.8541, + "step": 2347, + "time": 12.8 + }, + { + "epoch": 1.18, + "learning_rate": "1.9454e-04", + "loss": 0.7259, + "slid_loss": 0.8526, + "step": 2348, + "time": 13.02 + }, + { + "epoch": 1.18, + "learning_rate": "1.9453e-04", + "loss": 0.7599, + "slid_loss": 0.8521, + "step": 2349, + "time": 12.46 + }, + { + "epoch": 1.18, + "learning_rate": "1.9453e-04", + "loss": 0.8222, + "slid_loss": 0.8529, + "step": 2350, + "time": 11.33 + }, + { + "epoch": 1.18, + "learning_rate": "1.9452e-04", + "loss": 0.7886, + "slid_loss": 0.8521, + "step": 2351, + "time": 12.38 + }, + { + "epoch": 1.18, + "learning_rate": "1.9451e-04", + "loss": 0.8636, + "slid_loss": 0.852, + "step": 2352, + "time": 12.89 + }, + { + "epoch": 1.18, + "learning_rate": "1.9450e-04", + "loss": 0.8388, + "slid_loss": 0.8506, + "step": 2353, + "time": 11.08 + }, + { + "epoch": 1.18, + "learning_rate": "1.9449e-04", + "loss": 0.6698, + "slid_loss": 0.8484, + "step": 2354, + "time": 12.77 + }, + { + "epoch": 1.18, + "learning_rate": "1.9449e-04", + "loss": 0.9839, + "slid_loss": 0.8513, + "step": 2355, + "time": 10.88 + }, + { + "epoch": 1.18, + "learning_rate": "1.9448e-04", + "loss": 0.7166, + "slid_loss": 0.8511, + "step": 2356, + "time": 11.66 + }, + { + "epoch": 1.18, + "learning_rate": "1.9447e-04", + "loss": 1.0288, + "slid_loss": 0.8522, + "step": 2357, + "time": 13.14 + }, + { + "epoch": 1.18, + "learning_rate": "1.9446e-04", + "loss": 0.7811, + "slid_loss": 0.8492, + "step": 2358, + "time": 12.05 + }, + { + "epoch": 1.18, + "learning_rate": "1.9445e-04", + "loss": 0.9079, + "slid_loss": 0.8492, + "step": 2359, + "time": 11.18 + }, + { + "epoch": 1.18, + "learning_rate": "1.9445e-04", + "loss": 0.9089, + "slid_loss": 0.8497, + "step": 2360, + "time": 10.91 + }, + { + "epoch": 1.18, + "learning_rate": "1.9444e-04", + "loss": 0.8711, + "slid_loss": 0.8505, + "step": 2361, + "time": 11.29 + }, + { + "epoch": 1.18, + "learning_rate": "1.9443e-04", + "loss": 0.9897, + "slid_loss": 0.8526, + "step": 2362, + "time": 13.2 + }, + { + "epoch": 1.18, + "learning_rate": "1.9442e-04", + "loss": 0.9769, + "slid_loss": 0.8543, + "step": 2363, + "time": 13.59 + }, + { + "epoch": 1.18, + "learning_rate": "1.9441e-04", + "loss": 0.8438, + "slid_loss": 0.8554, + "step": 2364, + "time": 11.58 + }, + { + "epoch": 1.18, + "learning_rate": "1.9441e-04", + "loss": 0.7295, + "slid_loss": 0.8529, + "step": 2365, + "time": 12.8 + }, + { + "epoch": 1.18, + "learning_rate": "1.9440e-04", + "loss": 0.7836, + "slid_loss": 0.8518, + "step": 2366, + "time": 13.46 + }, + { + "epoch": 1.19, + "learning_rate": "1.9439e-04", + "loss": 0.6607, + "slid_loss": 0.8512, + "step": 2367, + "time": 12.77 + }, + { + "epoch": 1.19, + "learning_rate": "1.9438e-04", + "loss": 0.9421, + "slid_loss": 0.8504, + "step": 2368, + "time": 13.3 + }, + { + "epoch": 1.19, + "learning_rate": "1.9437e-04", + "loss": 0.9676, + "slid_loss": 0.851, + "step": 2369, + "time": 11.77 + }, + { + "epoch": 1.19, + "learning_rate": "1.9437e-04", + "loss": 1.0019, + "slid_loss": 0.8518, + "step": 2370, + "time": 14.01 + }, + { + "epoch": 1.19, + "learning_rate": "1.9436e-04", + "loss": 0.9013, + "slid_loss": 0.8535, + "step": 2371, + "time": 12.67 + }, + { + "epoch": 1.19, + "learning_rate": "1.9435e-04", + "loss": 0.8945, + "slid_loss": 0.8545, + "step": 2372, + "time": 14.32 + }, + { + "epoch": 1.19, + "learning_rate": "1.9434e-04", + "loss": 0.8985, + "slid_loss": 0.8552, + "step": 2373, + "time": 11.79 + }, + { + "epoch": 1.19, + "learning_rate": "1.9433e-04", + "loss": 0.7395, + "slid_loss": 0.8551, + "step": 2374, + "time": 11.35 + }, + { + "epoch": 1.19, + "learning_rate": "1.9433e-04", + "loss": 0.9165, + "slid_loss": 0.8578, + "step": 2375, + "time": 11.44 + }, + { + "epoch": 1.19, + "learning_rate": "1.9432e-04", + "loss": 0.8032, + "slid_loss": 0.856, + "step": 2376, + "time": 12.29 + }, + { + "epoch": 1.19, + "learning_rate": "1.9431e-04", + "loss": 0.8862, + "slid_loss": 0.8564, + "step": 2377, + "time": 13.72 + }, + { + "epoch": 1.19, + "learning_rate": "1.9430e-04", + "loss": 0.9083, + "slid_loss": 0.8562, + "step": 2378, + "time": 10.81 + }, + { + "epoch": 1.19, + "learning_rate": "1.9429e-04", + "loss": 0.8195, + "slid_loss": 0.8557, + "step": 2379, + "time": 13.5 + }, + { + "epoch": 1.19, + "learning_rate": "1.9428e-04", + "loss": 0.7676, + "slid_loss": 0.8549, + "step": 2380, + "time": 13.31 + }, + { + "epoch": 1.19, + "learning_rate": "1.9428e-04", + "loss": 0.8257, + "slid_loss": 0.8532, + "step": 2381, + "time": 11.31 + }, + { + "epoch": 1.19, + "learning_rate": "1.9427e-04", + "loss": 0.9566, + "slid_loss": 0.8555, + "step": 2382, + "time": 11.35 + }, + { + "epoch": 1.19, + "learning_rate": "1.9426e-04", + "loss": 0.9301, + "slid_loss": 0.8566, + "step": 2383, + "time": 11.51 + }, + { + "epoch": 1.19, + "learning_rate": "1.9425e-04", + "loss": 0.7803, + "slid_loss": 0.8553, + "step": 2384, + "time": 13.0 + }, + { + "epoch": 1.19, + "learning_rate": "1.9424e-04", + "loss": 0.9533, + "slid_loss": 0.8571, + "step": 2385, + "time": 12.34 + }, + { + "epoch": 1.19, + "learning_rate": "1.9424e-04", + "loss": 0.723, + "slid_loss": 0.8553, + "step": 2386, + "time": 13.52 + }, + { + "epoch": 1.2, + "learning_rate": "1.9423e-04", + "loss": 0.7972, + "slid_loss": 0.8537, + "step": 2387, + "time": 13.46 + }, + { + "epoch": 1.2, + "learning_rate": "1.9422e-04", + "loss": 0.6818, + "slid_loss": 0.8523, + "step": 2388, + "time": 11.3 + }, + { + "epoch": 1.2, + "learning_rate": "1.9421e-04", + "loss": 0.9807, + "slid_loss": 0.8522, + "step": 2389, + "time": 11.85 + }, + { + "epoch": 1.2, + "learning_rate": "1.9420e-04", + "loss": 0.7297, + "slid_loss": 0.8504, + "step": 2390, + "time": 12.3 + }, + { + "epoch": 1.2, + "learning_rate": "1.9420e-04", + "loss": 0.8388, + "slid_loss": 0.8505, + "step": 2391, + "time": 12.2 + }, + { + "epoch": 1.2, + "learning_rate": "1.9419e-04", + "loss": 0.883, + "slid_loss": 0.8514, + "step": 2392, + "time": 11.12 + }, + { + "epoch": 1.2, + "learning_rate": "1.9418e-04", + "loss": 0.9551, + "slid_loss": 0.8534, + "step": 2393, + "time": 11.36 + }, + { + "epoch": 1.2, + "learning_rate": "1.9417e-04", + "loss": 0.6971, + "slid_loss": 0.8516, + "step": 2394, + "time": 11.96 + }, + { + "epoch": 1.2, + "learning_rate": "1.9416e-04", + "loss": 0.9498, + "slid_loss": 0.8522, + "step": 2395, + "time": 12.01 + }, + { + "epoch": 1.2, + "learning_rate": "1.9415e-04", + "loss": 0.8433, + "slid_loss": 0.8514, + "step": 2396, + "time": 12.45 + }, + { + "epoch": 1.2, + "learning_rate": "1.9415e-04", + "loss": 0.9486, + "slid_loss": 0.8521, + "step": 2397, + "time": 12.84 + }, + { + "epoch": 1.2, + "learning_rate": "1.9414e-04", + "loss": 0.8712, + "slid_loss": 0.8528, + "step": 2398, + "time": 13.42 + }, + { + "epoch": 1.2, + "learning_rate": "1.9413e-04", + "loss": 0.9947, + "slid_loss": 0.8546, + "step": 2399, + "time": 13.62 + }, + { + "epoch": 1.2, + "learning_rate": "1.9412e-04", + "loss": 0.8212, + "slid_loss": 0.8536, + "step": 2400, + "time": 11.62 + }, + { + "epoch": 1.2, + "learning_rate": "1.9411e-04", + "loss": 0.7196, + "slid_loss": 0.8528, + "step": 2401, + "time": 12.84 + }, + { + "epoch": 1.2, + "learning_rate": "1.9410e-04", + "loss": 0.7334, + "slid_loss": 0.8515, + "step": 2402, + "time": 11.77 + }, + { + "epoch": 1.2, + "learning_rate": "1.9410e-04", + "loss": 0.9384, + "slid_loss": 0.8525, + "step": 2403, + "time": 14.21 + }, + { + "epoch": 1.2, + "learning_rate": "1.9409e-04", + "loss": 0.9502, + "slid_loss": 0.8534, + "step": 2404, + "time": 11.97 + }, + { + "epoch": 1.2, + "learning_rate": "1.9408e-04", + "loss": 0.6911, + "slid_loss": 0.8502, + "step": 2405, + "time": 11.05 + }, + { + "epoch": 1.2, + "learning_rate": "1.9407e-04", + "loss": 0.896, + "slid_loss": 0.8501, + "step": 2406, + "time": 11.28 + }, + { + "epoch": 1.21, + "learning_rate": "1.9406e-04", + "loss": 0.6737, + "slid_loss": 0.8483, + "step": 2407, + "time": 12.97 + }, + { + "epoch": 1.21, + "learning_rate": "1.9406e-04", + "loss": 0.7986, + "slid_loss": 0.8475, + "step": 2408, + "time": 11.5 + }, + { + "epoch": 1.21, + "learning_rate": "1.9405e-04", + "loss": 0.9576, + "slid_loss": 0.8483, + "step": 2409, + "time": 12.72 + }, + { + "epoch": 1.21, + "learning_rate": "1.9404e-04", + "loss": 0.8198, + "slid_loss": 0.848, + "step": 2410, + "time": 10.73 + }, + { + "epoch": 1.21, + "learning_rate": "1.9403e-04", + "loss": 1.0214, + "slid_loss": 0.8494, + "step": 2411, + "time": 11.87 + }, + { + "epoch": 1.21, + "learning_rate": "1.9402e-04", + "loss": 0.8155, + "slid_loss": 0.8496, + "step": 2412, + "time": 13.14 + }, + { + "epoch": 1.21, + "learning_rate": "1.9401e-04", + "loss": 0.8681, + "slid_loss": 0.85, + "step": 2413, + "time": 11.23 + }, + { + "epoch": 1.21, + "learning_rate": "1.9401e-04", + "loss": 0.7352, + "slid_loss": 0.8484, + "step": 2414, + "time": 12.9 + }, + { + "epoch": 1.21, + "learning_rate": "1.9400e-04", + "loss": 0.8816, + "slid_loss": 0.8489, + "step": 2415, + "time": 11.89 + }, + { + "epoch": 1.21, + "learning_rate": "1.9399e-04", + "loss": 0.9013, + "slid_loss": 0.8506, + "step": 2416, + "time": 11.32 + }, + { + "epoch": 1.21, + "learning_rate": "1.9398e-04", + "loss": 0.8181, + "slid_loss": 0.8478, + "step": 2417, + "time": 13.67 + }, + { + "epoch": 1.21, + "learning_rate": "1.9397e-04", + "loss": 0.9078, + "slid_loss": 0.8486, + "step": 2418, + "time": 13.28 + }, + { + "epoch": 1.21, + "learning_rate": "1.9396e-04", + "loss": 0.6844, + "slid_loss": 0.8468, + "step": 2419, + "time": 11.01 + }, + { + "epoch": 1.21, + "learning_rate": "1.9396e-04", + "loss": 0.8359, + "slid_loss": 0.8451, + "step": 2420, + "time": 13.87 + }, + { + "epoch": 1.21, + "learning_rate": "1.9395e-04", + "loss": 0.7008, + "slid_loss": 0.8431, + "step": 2421, + "time": 11.64 + }, + { + "epoch": 1.21, + "learning_rate": "1.9394e-04", + "loss": 1.0297, + "slid_loss": 0.8449, + "step": 2422, + "time": 12.65 + }, + { + "epoch": 1.21, + "learning_rate": "1.9393e-04", + "loss": 0.8948, + "slid_loss": 0.8445, + "step": 2423, + "time": 13.24 + }, + { + "epoch": 1.21, + "learning_rate": "1.9392e-04", + "loss": 0.832, + "slid_loss": 0.8447, + "step": 2424, + "time": 13.51 + }, + { + "epoch": 1.21, + "learning_rate": "1.9391e-04", + "loss": 0.905, + "slid_loss": 0.846, + "step": 2425, + "time": 13.07 + }, + { + "epoch": 1.21, + "learning_rate": "1.9391e-04", + "loss": 0.774, + "slid_loss": 0.848, + "step": 2426, + "time": 13.48 + }, + { + "epoch": 1.22, + "learning_rate": "1.9390e-04", + "loss": 0.644, + "slid_loss": 0.8455, + "step": 2427, + "time": 11.12 + }, + { + "epoch": 1.22, + "learning_rate": "1.9389e-04", + "loss": 0.8225, + "slid_loss": 0.8442, + "step": 2428, + "time": 13.42 + }, + { + "epoch": 1.22, + "learning_rate": "1.9388e-04", + "loss": 0.7744, + "slid_loss": 0.8441, + "step": 2429, + "time": 12.8 + }, + { + "epoch": 1.22, + "learning_rate": "1.9387e-04", + "loss": 0.9199, + "slid_loss": 0.8456, + "step": 2430, + "time": 12.73 + }, + { + "epoch": 1.22, + "learning_rate": "1.9386e-04", + "loss": 0.9616, + "slid_loss": 0.8469, + "step": 2431, + "time": 13.86 + }, + { + "epoch": 1.22, + "learning_rate": "1.9386e-04", + "loss": 0.803, + "slid_loss": 0.846, + "step": 2432, + "time": 13.36 + }, + { + "epoch": 1.22, + "learning_rate": "1.9385e-04", + "loss": 0.6803, + "slid_loss": 0.8449, + "step": 2433, + "time": 10.38 + }, + { + "epoch": 1.22, + "learning_rate": "1.9384e-04", + "loss": 0.8241, + "slid_loss": 0.8451, + "step": 2434, + "time": 11.64 + }, + { + "epoch": 1.22, + "learning_rate": "1.9383e-04", + "loss": 0.8551, + "slid_loss": 0.8453, + "step": 2435, + "time": 13.64 + }, + { + "epoch": 1.22, + "learning_rate": "1.9382e-04", + "loss": 0.7772, + "slid_loss": 0.843, + "step": 2436, + "time": 10.33 + }, + { + "epoch": 1.22, + "learning_rate": "1.9381e-04", + "loss": 0.8625, + "slid_loss": 0.8423, + "step": 2437, + "time": 12.85 + }, + { + "epoch": 1.22, + "learning_rate": "1.9380e-04", + "loss": 0.8025, + "slid_loss": 0.8417, + "step": 2438, + "time": 12.16 + }, + { + "epoch": 1.22, + "learning_rate": "1.9380e-04", + "loss": 0.8422, + "slid_loss": 0.842, + "step": 2439, + "time": 13.28 + }, + { + "epoch": 1.22, + "learning_rate": "1.9379e-04", + "loss": 0.771, + "slid_loss": 0.8415, + "step": 2440, + "time": 12.51 + }, + { + "epoch": 1.22, + "learning_rate": "1.9378e-04", + "loss": 0.7991, + "slid_loss": 0.8411, + "step": 2441, + "time": 11.41 + }, + { + "epoch": 1.22, + "learning_rate": "1.9377e-04", + "loss": 1.0172, + "slid_loss": 0.8436, + "step": 2442, + "time": 13.3 + }, + { + "epoch": 1.22, + "learning_rate": "1.9376e-04", + "loss": 0.7904, + "slid_loss": 0.8437, + "step": 2443, + "time": 11.62 + }, + { + "epoch": 1.22, + "learning_rate": "1.9375e-04", + "loss": 0.7811, + "slid_loss": 0.8432, + "step": 2444, + "time": 10.82 + }, + { + "epoch": 1.22, + "learning_rate": "1.9375e-04", + "loss": 0.8835, + "slid_loss": 0.8449, + "step": 2445, + "time": 12.24 + }, + { + "epoch": 1.22, + "learning_rate": "1.9374e-04", + "loss": 0.8951, + "slid_loss": 0.844, + "step": 2446, + "time": 12.78 + }, + { + "epoch": 1.23, + "learning_rate": "1.9373e-04", + "loss": 0.7377, + "slid_loss": 0.8429, + "step": 2447, + "time": 13.32 + }, + { + "epoch": 1.23, + "learning_rate": "1.9372e-04", + "loss": 0.8111, + "slid_loss": 0.8437, + "step": 2448, + "time": 12.8 + }, + { + "epoch": 1.23, + "learning_rate": "1.9371e-04", + "loss": 0.8488, + "slid_loss": 0.8446, + "step": 2449, + "time": 13.75 + }, + { + "epoch": 1.23, + "learning_rate": "1.9370e-04", + "loss": 0.6897, + "slid_loss": 0.8433, + "step": 2450, + "time": 12.81 + }, + { + "epoch": 1.23, + "learning_rate": "1.9369e-04", + "loss": 0.7967, + "slid_loss": 0.8434, + "step": 2451, + "time": 11.94 + }, + { + "epoch": 1.23, + "learning_rate": "1.9369e-04", + "loss": 0.8161, + "slid_loss": 0.8429, + "step": 2452, + "time": 11.88 + }, + { + "epoch": 1.23, + "learning_rate": "1.9368e-04", + "loss": 0.8159, + "slid_loss": 0.8427, + "step": 2453, + "time": 13.9 + }, + { + "epoch": 1.23, + "learning_rate": "1.9367e-04", + "loss": 0.8136, + "slid_loss": 0.8441, + "step": 2454, + "time": 13.49 + }, + { + "epoch": 1.23, + "learning_rate": "1.9366e-04", + "loss": 0.7701, + "slid_loss": 0.842, + "step": 2455, + "time": 13.44 + }, + { + "epoch": 1.23, + "learning_rate": "1.9365e-04", + "loss": 0.8608, + "slid_loss": 0.8434, + "step": 2456, + "time": 12.97 + }, + { + "epoch": 1.23, + "learning_rate": "1.9364e-04", + "loss": 0.7555, + "slid_loss": 0.8407, + "step": 2457, + "time": 10.91 + }, + { + "epoch": 1.23, + "learning_rate": "1.9364e-04", + "loss": 0.7798, + "slid_loss": 0.8407, + "step": 2458, + "time": 13.63 + }, + { + "epoch": 1.23, + "learning_rate": "1.9363e-04", + "loss": 0.9727, + "slid_loss": 0.8413, + "step": 2459, + "time": 12.98 + }, + { + "epoch": 1.23, + "learning_rate": "1.9362e-04", + "loss": 0.8874, + "slid_loss": 0.8411, + "step": 2460, + "time": 11.99 + }, + { + "epoch": 1.23, + "learning_rate": "1.9361e-04", + "loss": 0.9116, + "slid_loss": 0.8415, + "step": 2461, + "time": 13.14 + }, + { + "epoch": 1.23, + "learning_rate": "1.9360e-04", + "loss": 0.8195, + "slid_loss": 0.8398, + "step": 2462, + "time": 14.04 + }, + { + "epoch": 1.23, + "learning_rate": "1.9359e-04", + "loss": 0.7659, + "slid_loss": 0.8377, + "step": 2463, + "time": 13.51 + }, + { + "epoch": 1.23, + "learning_rate": "1.9358e-04", + "loss": 0.8779, + "slid_loss": 0.838, + "step": 2464, + "time": 11.82 + }, + { + "epoch": 1.23, + "learning_rate": "1.9358e-04", + "loss": 0.8148, + "slid_loss": 0.8389, + "step": 2465, + "time": 10.88 + }, + { + "epoch": 1.23, + "learning_rate": "1.9357e-04", + "loss": 0.8809, + "slid_loss": 0.8399, + "step": 2466, + "time": 13.0 + }, + { + "epoch": 1.24, + "learning_rate": "1.9356e-04", + "loss": 0.8617, + "slid_loss": 0.8419, + "step": 2467, + "time": 10.5 + }, + { + "epoch": 1.24, + "learning_rate": "1.9355e-04", + "loss": 0.8547, + "slid_loss": 0.841, + "step": 2468, + "time": 13.77 + }, + { + "epoch": 1.24, + "learning_rate": "1.9354e-04", + "loss": 0.8122, + "slid_loss": 0.8394, + "step": 2469, + "time": 12.09 + }, + { + "epoch": 1.24, + "learning_rate": "1.9353e-04", + "loss": 1.0351, + "slid_loss": 0.8398, + "step": 2470, + "time": 13.56 + }, + { + "epoch": 1.24, + "learning_rate": "1.9352e-04", + "loss": 0.7909, + "slid_loss": 0.8387, + "step": 2471, + "time": 11.46 + }, + { + "epoch": 1.24, + "learning_rate": "1.9352e-04", + "loss": 0.8238, + "slid_loss": 0.838, + "step": 2472, + "time": 11.04 + }, + { + "epoch": 1.24, + "learning_rate": "1.9351e-04", + "loss": 0.8855, + "slid_loss": 0.8378, + "step": 2473, + "time": 13.65 + }, + { + "epoch": 1.24, + "learning_rate": "1.9350e-04", + "loss": 0.686, + "slid_loss": 0.8373, + "step": 2474, + "time": 11.24 + }, + { + "epoch": 1.24, + "learning_rate": "1.9349e-04", + "loss": 0.7936, + "slid_loss": 0.8361, + "step": 2475, + "time": 13.22 + }, + { + "epoch": 1.24, + "learning_rate": "1.9348e-04", + "loss": 0.9272, + "slid_loss": 0.8373, + "step": 2476, + "time": 10.55 + }, + { + "epoch": 1.24, + "learning_rate": "1.9347e-04", + "loss": 0.8649, + "slid_loss": 0.8371, + "step": 2477, + "time": 13.33 + }, + { + "epoch": 1.24, + "learning_rate": "1.9346e-04", + "loss": 0.9119, + "slid_loss": 0.8371, + "step": 2478, + "time": 11.29 + }, + { + "epoch": 1.24, + "learning_rate": "1.9345e-04", + "loss": 0.8623, + "slid_loss": 0.8376, + "step": 2479, + "time": 13.58 + }, + { + "epoch": 1.24, + "learning_rate": "1.9345e-04", + "loss": 0.8181, + "slid_loss": 0.8381, + "step": 2480, + "time": 10.73 + }, + { + "epoch": 1.24, + "learning_rate": "1.9344e-04", + "loss": 0.8072, + "slid_loss": 0.8379, + "step": 2481, + "time": 13.67 + }, + { + "epoch": 1.24, + "learning_rate": "1.9343e-04", + "loss": 0.6875, + "slid_loss": 0.8352, + "step": 2482, + "time": 13.15 + }, + { + "epoch": 1.24, + "learning_rate": "1.9342e-04", + "loss": 0.7968, + "slid_loss": 0.8339, + "step": 2483, + "time": 11.49 + }, + { + "epoch": 1.24, + "learning_rate": "1.9341e-04", + "loss": 0.9685, + "slid_loss": 0.8357, + "step": 2484, + "time": 12.81 + }, + { + "epoch": 1.24, + "learning_rate": "1.9340e-04", + "loss": 0.7762, + "slid_loss": 0.834, + "step": 2485, + "time": 11.69 + }, + { + "epoch": 1.24, + "learning_rate": "1.9339e-04", + "loss": 0.8025, + "slid_loss": 0.8348, + "step": 2486, + "time": 13.22 + }, + { + "epoch": 1.25, + "learning_rate": "1.9339e-04", + "loss": 1.0464, + "slid_loss": 0.8372, + "step": 2487, + "time": 12.15 + }, + { + "epoch": 1.25, + "learning_rate": "1.9338e-04", + "loss": 0.8394, + "slid_loss": 0.8388, + "step": 2488, + "time": 12.29 + }, + { + "epoch": 1.25, + "learning_rate": "1.9337e-04", + "loss": 0.667, + "slid_loss": 0.8357, + "step": 2489, + "time": 13.11 + }, + { + "epoch": 1.25, + "learning_rate": "1.9336e-04", + "loss": 0.9689, + "slid_loss": 0.8381, + "step": 2490, + "time": 11.23 + }, + { + "epoch": 1.25, + "learning_rate": "1.9335e-04", + "loss": 0.7249, + "slid_loss": 0.8369, + "step": 2491, + "time": 13.98 + }, + { + "epoch": 1.25, + "learning_rate": "1.9334e-04", + "loss": 0.9086, + "slid_loss": 0.8372, + "step": 2492, + "time": 10.73 + }, + { + "epoch": 1.25, + "learning_rate": "1.9333e-04", + "loss": 0.8068, + "slid_loss": 0.8357, + "step": 2493, + "time": 13.08 + }, + { + "epoch": 1.25, + "learning_rate": "1.9332e-04", + "loss": 0.9477, + "slid_loss": 0.8382, + "step": 2494, + "time": 10.78 + }, + { + "epoch": 1.25, + "learning_rate": "1.9332e-04", + "loss": 0.7824, + "slid_loss": 0.8365, + "step": 2495, + "time": 10.35 + }, + { + "epoch": 1.25, + "learning_rate": "1.9331e-04", + "loss": 0.878, + "slid_loss": 0.8369, + "step": 2496, + "time": 13.14 + }, + { + "epoch": 1.25, + "learning_rate": "1.9330e-04", + "loss": 0.9087, + "slid_loss": 0.8365, + "step": 2497, + "time": 12.76 + }, + { + "epoch": 1.25, + "learning_rate": "1.9329e-04", + "loss": 0.7601, + "slid_loss": 0.8354, + "step": 2498, + "time": 10.84 + }, + { + "epoch": 1.25, + "learning_rate": "1.9328e-04", + "loss": 0.7658, + "slid_loss": 0.8331, + "step": 2499, + "time": 13.27 + }, + { + "epoch": 1.25, + "learning_rate": "1.9327e-04", + "loss": 0.9878, + "slid_loss": 0.8348, + "step": 2500, + "time": 12.87 + }, + { + "epoch": 1.25, + "learning_rate": "1.9326e-04", + "loss": 0.8674, + "slid_loss": 0.8362, + "step": 2501, + "time": 13.24 + }, + { + "epoch": 1.25, + "learning_rate": "1.9325e-04", + "loss": 0.7394, + "slid_loss": 0.8363, + "step": 2502, + "time": 12.84 + }, + { + "epoch": 1.25, + "learning_rate": "1.9325e-04", + "loss": 0.8628, + "slid_loss": 0.8355, + "step": 2503, + "time": 12.73 + }, + { + "epoch": 1.25, + "learning_rate": "1.9324e-04", + "loss": 0.8128, + "slid_loss": 0.8342, + "step": 2504, + "time": 13.37 + }, + { + "epoch": 1.25, + "learning_rate": "1.9323e-04", + "loss": 0.8619, + "slid_loss": 0.8359, + "step": 2505, + "time": 13.08 + }, + { + "epoch": 1.25, + "learning_rate": "1.9322e-04", + "loss": 0.8411, + "slid_loss": 0.8353, + "step": 2506, + "time": 14.15 + }, + { + "epoch": 1.26, + "learning_rate": "1.9321e-04", + "loss": 0.881, + "slid_loss": 0.8374, + "step": 2507, + "time": 12.22 + }, + { + "epoch": 1.26, + "learning_rate": "1.9320e-04", + "loss": 0.8283, + "slid_loss": 0.8377, + "step": 2508, + "time": 13.34 + }, + { + "epoch": 1.26, + "learning_rate": "1.9319e-04", + "loss": 0.7931, + "slid_loss": 0.836, + "step": 2509, + "time": 11.71 + }, + { + "epoch": 1.26, + "learning_rate": "1.9318e-04", + "loss": 0.8973, + "slid_loss": 0.8368, + "step": 2510, + "time": 12.71 + }, + { + "epoch": 1.26, + "learning_rate": "1.9318e-04", + "loss": 0.8073, + "slid_loss": 0.8347, + "step": 2511, + "time": 13.57 + }, + { + "epoch": 1.26, + "learning_rate": "1.9317e-04", + "loss": 0.9063, + "slid_loss": 0.8356, + "step": 2512, + "time": 11.71 + }, + { + "epoch": 1.26, + "learning_rate": "1.9316e-04", + "loss": 0.9274, + "slid_loss": 0.8362, + "step": 2513, + "time": 11.64 + }, + { + "epoch": 1.26, + "learning_rate": "1.9315e-04", + "loss": 0.7908, + "slid_loss": 0.8367, + "step": 2514, + "time": 10.96 + }, + { + "epoch": 1.26, + "learning_rate": "1.9314e-04", + "loss": 0.6318, + "slid_loss": 0.8342, + "step": 2515, + "time": 12.77 + }, + { + "epoch": 1.26, + "learning_rate": "1.9313e-04", + "loss": 0.8426, + "slid_loss": 0.8337, + "step": 2516, + "time": 13.19 + }, + { + "epoch": 1.26, + "learning_rate": "1.9312e-04", + "loss": 0.7419, + "slid_loss": 0.8329, + "step": 2517, + "time": 13.82 + }, + { + "epoch": 1.26, + "learning_rate": "1.9311e-04", + "loss": 0.8944, + "slid_loss": 0.8328, + "step": 2518, + "time": 12.0 + }, + { + "epoch": 1.26, + "learning_rate": "1.9310e-04", + "loss": 0.8141, + "slid_loss": 0.8341, + "step": 2519, + "time": 12.19 + }, + { + "epoch": 1.26, + "learning_rate": "1.9310e-04", + "loss": 0.862, + "slid_loss": 0.8343, + "step": 2520, + "time": 11.33 + }, + { + "epoch": 1.26, + "learning_rate": "1.9309e-04", + "loss": 0.8343, + "slid_loss": 0.8357, + "step": 2521, + "time": 11.12 + }, + { + "epoch": 1.26, + "learning_rate": "1.9308e-04", + "loss": 0.8593, + "slid_loss": 0.8339, + "step": 2522, + "time": 12.86 + }, + { + "epoch": 1.26, + "learning_rate": "1.9307e-04", + "loss": 0.8562, + "slid_loss": 0.8336, + "step": 2523, + "time": 12.35 + }, + { + "epoch": 1.26, + "learning_rate": "1.9306e-04", + "loss": 0.6791, + "slid_loss": 0.832, + "step": 2524, + "time": 11.58 + }, + { + "epoch": 1.26, + "learning_rate": "1.9305e-04", + "loss": 0.7465, + "slid_loss": 0.8304, + "step": 2525, + "time": 11.37 + }, + { + "epoch": 1.26, + "learning_rate": "1.9304e-04", + "loss": 0.7435, + "slid_loss": 0.8301, + "step": 2526, + "time": 11.98 + }, + { + "epoch": 1.27, + "learning_rate": "1.9303e-04", + "loss": 0.7157, + "slid_loss": 0.8309, + "step": 2527, + "time": 12.23 + }, + { + "epoch": 1.27, + "learning_rate": "1.9302e-04", + "loss": 0.8036, + "slid_loss": 0.8307, + "step": 2528, + "time": 11.8 + }, + { + "epoch": 1.27, + "learning_rate": "1.9302e-04", + "loss": 0.9299, + "slid_loss": 0.8322, + "step": 2529, + "time": 11.9 + }, + { + "epoch": 1.27, + "learning_rate": "1.9301e-04", + "loss": 0.9573, + "slid_loss": 0.8326, + "step": 2530, + "time": 12.04 + }, + { + "epoch": 1.27, + "learning_rate": "1.9300e-04", + "loss": 0.8582, + "slid_loss": 0.8316, + "step": 2531, + "time": 12.54 + }, + { + "epoch": 1.27, + "learning_rate": "1.9299e-04", + "loss": 0.7821, + "slid_loss": 0.8314, + "step": 2532, + "time": 13.38 + }, + { + "epoch": 1.27, + "learning_rate": "1.9298e-04", + "loss": 0.7939, + "slid_loss": 0.8325, + "step": 2533, + "time": 12.7 + }, + { + "epoch": 1.27, + "learning_rate": "1.9297e-04", + "loss": 0.74, + "slid_loss": 0.8317, + "step": 2534, + "time": 11.33 + }, + { + "epoch": 1.27, + "learning_rate": "1.9296e-04", + "loss": 0.9374, + "slid_loss": 0.8325, + "step": 2535, + "time": 12.06 + }, + { + "epoch": 1.27, + "learning_rate": "1.9295e-04", + "loss": 0.7923, + "slid_loss": 0.8326, + "step": 2536, + "time": 11.12 + }, + { + "epoch": 1.27, + "learning_rate": "1.9294e-04", + "loss": 0.7859, + "slid_loss": 0.8319, + "step": 2537, + "time": 11.14 + }, + { + "epoch": 1.27, + "learning_rate": "1.9294e-04", + "loss": 0.5979, + "slid_loss": 0.8298, + "step": 2538, + "time": 10.57 + }, + { + "epoch": 1.27, + "learning_rate": "1.9293e-04", + "loss": 0.7935, + "slid_loss": 0.8293, + "step": 2539, + "time": 13.0 + }, + { + "epoch": 1.27, + "learning_rate": "1.9292e-04", + "loss": 0.9492, + "slid_loss": 0.8311, + "step": 2540, + "time": 13.34 + }, + { + "epoch": 1.27, + "learning_rate": "1.9291e-04", + "loss": 0.8109, + "slid_loss": 0.8312, + "step": 2541, + "time": 13.41 + }, + { + "epoch": 1.27, + "learning_rate": "1.9290e-04", + "loss": 0.9201, + "slid_loss": 0.8303, + "step": 2542, + "time": 14.06 + }, + { + "epoch": 1.27, + "learning_rate": "1.9289e-04", + "loss": 0.9515, + "slid_loss": 0.8319, + "step": 2543, + "time": 13.12 + }, + { + "epoch": 1.27, + "learning_rate": "1.9288e-04", + "loss": 0.985, + "slid_loss": 0.8339, + "step": 2544, + "time": 13.32 + }, + { + "epoch": 1.27, + "learning_rate": "1.9287e-04", + "loss": 0.8717, + "slid_loss": 0.8338, + "step": 2545, + "time": 13.72 + }, + { + "epoch": 1.27, + "learning_rate": "1.9286e-04", + "loss": 0.8082, + "slid_loss": 0.8329, + "step": 2546, + "time": 11.88 + }, + { + "epoch": 1.28, + "learning_rate": "1.9285e-04", + "loss": 0.7279, + "slid_loss": 0.8328, + "step": 2547, + "time": 12.9 + }, + { + "epoch": 1.28, + "learning_rate": "1.9285e-04", + "loss": 0.7936, + "slid_loss": 0.8326, + "step": 2548, + "time": 11.44 + }, + { + "epoch": 1.28, + "learning_rate": "1.9284e-04", + "loss": 0.7361, + "slid_loss": 0.8315, + "step": 2549, + "time": 13.13 + }, + { + "epoch": 1.28, + "learning_rate": "1.9283e-04", + "loss": 0.8445, + "slid_loss": 0.8331, + "step": 2550, + "time": 13.32 + }, + { + "epoch": 1.28, + "learning_rate": "1.9282e-04", + "loss": 0.6548, + "slid_loss": 0.8316, + "step": 2551, + "time": 11.43 + }, + { + "epoch": 1.28, + "learning_rate": "1.9281e-04", + "loss": 0.9082, + "slid_loss": 0.8326, + "step": 2552, + "time": 13.51 + }, + { + "epoch": 1.28, + "learning_rate": "1.9280e-04", + "loss": 0.9513, + "slid_loss": 0.8339, + "step": 2553, + "time": 13.41 + }, + { + "epoch": 1.28, + "learning_rate": "1.9279e-04", + "loss": 1.0989, + "slid_loss": 0.8368, + "step": 2554, + "time": 12.88 + }, + { + "epoch": 1.28, + "learning_rate": "1.9278e-04", + "loss": 0.8445, + "slid_loss": 0.8375, + "step": 2555, + "time": 13.58 + }, + { + "epoch": 1.28, + "learning_rate": "1.9277e-04", + "loss": 0.8238, + "slid_loss": 0.8372, + "step": 2556, + "time": 13.39 + }, + { + "epoch": 1.28, + "learning_rate": "1.9276e-04", + "loss": 0.8054, + "slid_loss": 0.8376, + "step": 2557, + "time": 13.46 + }, + { + "epoch": 1.28, + "learning_rate": "1.9276e-04", + "loss": 0.984, + "slid_loss": 0.8397, + "step": 2558, + "time": 14.12 + }, + { + "epoch": 1.28, + "learning_rate": "1.9275e-04", + "loss": 0.9795, + "slid_loss": 0.8398, + "step": 2559, + "time": 10.91 + }, + { + "epoch": 1.28, + "learning_rate": "1.9274e-04", + "loss": 0.8384, + "slid_loss": 0.8393, + "step": 2560, + "time": 13.12 + }, + { + "epoch": 1.28, + "learning_rate": "1.9273e-04", + "loss": 0.8699, + "slid_loss": 0.8389, + "step": 2561, + "time": 13.08 + }, + { + "epoch": 1.28, + "learning_rate": "1.9272e-04", + "loss": 0.8382, + "slid_loss": 0.839, + "step": 2562, + "time": 11.54 + }, + { + "epoch": 1.28, + "learning_rate": "1.9271e-04", + "loss": 0.7797, + "slid_loss": 0.8392, + "step": 2563, + "time": 14.1 + }, + { + "epoch": 1.28, + "learning_rate": "1.9270e-04", + "loss": 0.8227, + "slid_loss": 0.8386, + "step": 2564, + "time": 12.87 + }, + { + "epoch": 1.28, + "learning_rate": "1.9269e-04", + "loss": 0.8921, + "slid_loss": 0.8394, + "step": 2565, + "time": 11.08 + }, + { + "epoch": 1.28, + "learning_rate": "1.9268e-04", + "loss": 0.6374, + "slid_loss": 0.837, + "step": 2566, + "time": 11.93 + }, + { + "epoch": 1.29, + "learning_rate": "1.9267e-04", + "loss": 0.8737, + "slid_loss": 0.8371, + "step": 2567, + "time": 12.82 + }, + { + "epoch": 1.29, + "learning_rate": "1.9266e-04", + "loss": 0.9557, + "slid_loss": 0.8381, + "step": 2568, + "time": 12.24 + }, + { + "epoch": 1.29, + "learning_rate": "1.9266e-04", + "loss": 0.8944, + "slid_loss": 0.8389, + "step": 2569, + "time": 12.29 + }, + { + "epoch": 1.29, + "learning_rate": "1.9265e-04", + "loss": 0.8351, + "slid_loss": 0.8369, + "step": 2570, + "time": 14.14 + }, + { + "epoch": 1.29, + "learning_rate": "1.9264e-04", + "loss": 0.813, + "slid_loss": 0.8371, + "step": 2571, + "time": 13.77 + }, + { + "epoch": 1.29, + "learning_rate": "1.9263e-04", + "loss": 0.961, + "slid_loss": 0.8385, + "step": 2572, + "time": 13.33 + }, + { + "epoch": 1.29, + "learning_rate": "1.9262e-04", + "loss": 0.9922, + "slid_loss": 0.8396, + "step": 2573, + "time": 12.93 + }, + { + "epoch": 1.29, + "learning_rate": "1.9261e-04", + "loss": 0.8633, + "slid_loss": 0.8414, + "step": 2574, + "time": 12.24 + }, + { + "epoch": 1.29, + "learning_rate": "1.9260e-04", + "loss": 0.9122, + "slid_loss": 0.8425, + "step": 2575, + "time": 13.64 + }, + { + "epoch": 1.29, + "learning_rate": "1.9259e-04", + "loss": 0.842, + "slid_loss": 0.8417, + "step": 2576, + "time": 11.95 + }, + { + "epoch": 1.29, + "learning_rate": "1.9258e-04", + "loss": 0.7475, + "slid_loss": 0.8405, + "step": 2577, + "time": 13.62 + }, + { + "epoch": 1.29, + "learning_rate": "1.9257e-04", + "loss": 0.8652, + "slid_loss": 0.84, + "step": 2578, + "time": 13.25 + }, + { + "epoch": 1.29, + "learning_rate": "1.9256e-04", + "loss": 0.7037, + "slid_loss": 0.8385, + "step": 2579, + "time": 13.14 + }, + { + "epoch": 1.29, + "learning_rate": "1.9255e-04", + "loss": 1.0444, + "slid_loss": 0.8407, + "step": 2580, + "time": 12.67 + }, + { + "epoch": 1.29, + "learning_rate": "1.9255e-04", + "loss": 0.8683, + "slid_loss": 0.8413, + "step": 2581, + "time": 13.17 + }, + { + "epoch": 1.29, + "learning_rate": "1.9254e-04", + "loss": 0.9056, + "slid_loss": 0.8435, + "step": 2582, + "time": 12.06 + }, + { + "epoch": 1.29, + "learning_rate": "1.9253e-04", + "loss": 0.912, + "slid_loss": 0.8447, + "step": 2583, + "time": 12.25 + }, + { + "epoch": 1.29, + "learning_rate": "1.9252e-04", + "loss": 0.8298, + "slid_loss": 0.8433, + "step": 2584, + "time": 11.44 + }, + { + "epoch": 1.29, + "learning_rate": "1.9251e-04", + "loss": 0.7558, + "slid_loss": 0.8431, + "step": 2585, + "time": 13.96 + }, + { + "epoch": 1.29, + "learning_rate": "1.9250e-04", + "loss": 0.9014, + "slid_loss": 0.8441, + "step": 2586, + "time": 13.27 + }, + { + "epoch": 1.3, + "learning_rate": "1.9249e-04", + "loss": 0.8101, + "slid_loss": 0.8417, + "step": 2587, + "time": 13.29 + }, + { + "epoch": 1.3, + "learning_rate": "1.9248e-04", + "loss": 0.8469, + "slid_loss": 0.8418, + "step": 2588, + "time": 12.81 + }, + { + "epoch": 1.3, + "learning_rate": "1.9247e-04", + "loss": 0.7799, + "slid_loss": 0.8429, + "step": 2589, + "time": 11.26 + }, + { + "epoch": 1.3, + "learning_rate": "1.9246e-04", + "loss": 0.9786, + "slid_loss": 0.843, + "step": 2590, + "time": 14.12 + }, + { + "epoch": 1.3, + "learning_rate": "1.9245e-04", + "loss": 0.6814, + "slid_loss": 0.8426, + "step": 2591, + "time": 13.95 + }, + { + "epoch": 1.3, + "learning_rate": "1.9244e-04", + "loss": 0.9346, + "slid_loss": 0.8428, + "step": 2592, + "time": 13.41 + }, + { + "epoch": 1.3, + "learning_rate": "1.9243e-04", + "loss": 0.7268, + "slid_loss": 0.842, + "step": 2593, + "time": 13.32 + }, + { + "epoch": 1.3, + "learning_rate": "1.9243e-04", + "loss": 0.729, + "slid_loss": 0.8398, + "step": 2594, + "time": 13.52 + }, + { + "epoch": 1.3, + "learning_rate": "1.9242e-04", + "loss": 0.8537, + "slid_loss": 0.8406, + "step": 2595, + "time": 13.32 + }, + { + "epoch": 1.3, + "learning_rate": "1.9241e-04", + "loss": 0.777, + "slid_loss": 0.8395, + "step": 2596, + "time": 11.2 + }, + { + "epoch": 1.3, + "learning_rate": "1.9240e-04", + "loss": 0.7222, + "slid_loss": 0.8377, + "step": 2597, + "time": 13.91 + }, + { + "epoch": 1.3, + "learning_rate": "1.9239e-04", + "loss": 0.7366, + "slid_loss": 0.8374, + "step": 2598, + "time": 13.53 + }, + { + "epoch": 1.3, + "learning_rate": "1.9238e-04", + "loss": 1.0599, + "slid_loss": 0.8404, + "step": 2599, + "time": 11.89 + }, + { + "epoch": 1.3, + "learning_rate": "1.9237e-04", + "loss": 0.8438, + "slid_loss": 0.8389, + "step": 2600, + "time": 12.3 + }, + { + "epoch": 1.3, + "learning_rate": "1.9236e-04", + "loss": 0.6112, + "slid_loss": 0.8364, + "step": 2601, + "time": 13.11 + }, + { + "epoch": 1.3, + "learning_rate": "1.9235e-04", + "loss": 0.8666, + "slid_loss": 0.8377, + "step": 2602, + "time": 13.41 + }, + { + "epoch": 1.3, + "learning_rate": "1.9234e-04", + "loss": 0.7811, + "slid_loss": 0.8368, + "step": 2603, + "time": 13.23 + }, + { + "epoch": 1.3, + "learning_rate": "1.9233e-04", + "loss": 0.6492, + "slid_loss": 0.8352, + "step": 2604, + "time": 13.21 + }, + { + "epoch": 1.3, + "learning_rate": "1.9232e-04", + "loss": 0.7729, + "slid_loss": 0.8343, + "step": 2605, + "time": 11.51 + }, + { + "epoch": 1.3, + "learning_rate": "1.9231e-04", + "loss": 0.7849, + "slid_loss": 0.8337, + "step": 2606, + "time": 12.04 + }, + { + "epoch": 1.31, + "learning_rate": "1.9230e-04", + "loss": 0.8519, + "slid_loss": 0.8335, + "step": 2607, + "time": 12.96 + }, + { + "epoch": 1.31, + "learning_rate": "1.9230e-04", + "loss": 1.0872, + "slid_loss": 0.836, + "step": 2608, + "time": 13.84 + }, + { + "epoch": 1.31, + "learning_rate": "1.9229e-04", + "loss": 0.9453, + "slid_loss": 0.8376, + "step": 2609, + "time": 12.85 + }, + { + "epoch": 1.31, + "learning_rate": "1.9228e-04", + "loss": 0.9016, + "slid_loss": 0.8376, + "step": 2610, + "time": 13.1 + }, + { + "epoch": 1.31, + "learning_rate": "1.9227e-04", + "loss": 1.0523, + "slid_loss": 0.8401, + "step": 2611, + "time": 12.09 + }, + { + "epoch": 1.31, + "learning_rate": "1.9226e-04", + "loss": 0.8825, + "slid_loss": 0.8398, + "step": 2612, + "time": 11.82 + }, + { + "epoch": 1.31, + "learning_rate": "1.9225e-04", + "loss": 0.8565, + "slid_loss": 0.8391, + "step": 2613, + "time": 13.29 + }, + { + "epoch": 1.31, + "learning_rate": "1.9224e-04", + "loss": 0.8885, + "slid_loss": 0.8401, + "step": 2614, + "time": 13.5 + }, + { + "epoch": 1.31, + "learning_rate": "1.9223e-04", + "loss": 1.0339, + "slid_loss": 0.8441, + "step": 2615, + "time": 13.5 + }, + { + "epoch": 1.31, + "learning_rate": "1.9222e-04", + "loss": 0.7565, + "slid_loss": 0.8433, + "step": 2616, + "time": 12.93 + }, + { + "epoch": 1.31, + "learning_rate": "1.9221e-04", + "loss": 0.732, + "slid_loss": 0.8432, + "step": 2617, + "time": 13.34 + }, + { + "epoch": 1.31, + "learning_rate": "1.9220e-04", + "loss": 0.9191, + "slid_loss": 0.8434, + "step": 2618, + "time": 12.77 + }, + { + "epoch": 1.31, + "learning_rate": "1.9219e-04", + "loss": 0.8324, + "slid_loss": 0.8436, + "step": 2619, + "time": 12.81 + }, + { + "epoch": 1.31, + "learning_rate": "1.9218e-04", + "loss": 0.819, + "slid_loss": 0.8432, + "step": 2620, + "time": 13.23 + }, + { + "epoch": 1.31, + "learning_rate": "1.9217e-04", + "loss": 0.7223, + "slid_loss": 0.842, + "step": 2621, + "time": 12.67 + }, + { + "epoch": 1.31, + "learning_rate": "1.9216e-04", + "loss": 0.9041, + "slid_loss": 0.8425, + "step": 2622, + "time": 13.33 + }, + { + "epoch": 1.31, + "learning_rate": "1.9216e-04", + "loss": 0.7987, + "slid_loss": 0.8419, + "step": 2623, + "time": 12.35 + }, + { + "epoch": 1.31, + "learning_rate": "1.9215e-04", + "loss": 0.8132, + "slid_loss": 0.8432, + "step": 2624, + "time": 12.2 + }, + { + "epoch": 1.31, + "learning_rate": "1.9214e-04", + "loss": 1.0746, + "slid_loss": 0.8465, + "step": 2625, + "time": 13.4 + }, + { + "epoch": 1.31, + "learning_rate": "1.9213e-04", + "loss": 0.9138, + "slid_loss": 0.8482, + "step": 2626, + "time": 13.65 + }, + { + "epoch": 1.32, + "learning_rate": "1.9212e-04", + "loss": 0.7271, + "slid_loss": 0.8483, + "step": 2627, + "time": 13.19 + }, + { + "epoch": 1.32, + "learning_rate": "1.9211e-04", + "loss": 0.9839, + "slid_loss": 0.8501, + "step": 2628, + "time": 12.2 + }, + { + "epoch": 1.32, + "learning_rate": "1.9210e-04", + "loss": 0.7281, + "slid_loss": 0.8481, + "step": 2629, + "time": 11.62 + }, + { + "epoch": 1.32, + "learning_rate": "1.9209e-04", + "loss": 0.9401, + "slid_loss": 0.848, + "step": 2630, + "time": 13.77 + }, + { + "epoch": 1.32, + "learning_rate": "1.9208e-04", + "loss": 0.9153, + "slid_loss": 0.8485, + "step": 2631, + "time": 12.85 + }, + { + "epoch": 1.32, + "learning_rate": "1.9207e-04", + "loss": 0.8321, + "slid_loss": 0.849, + "step": 2632, + "time": 10.58 + }, + { + "epoch": 1.32, + "learning_rate": "1.9206e-04", + "loss": 0.8955, + "slid_loss": 0.85, + "step": 2633, + "time": 11.22 + }, + { + "epoch": 1.32, + "learning_rate": "1.9205e-04", + "loss": 0.8297, + "slid_loss": 0.8509, + "step": 2634, + "time": 13.21 + }, + { + "epoch": 1.32, + "learning_rate": "1.9204e-04", + "loss": 0.9098, + "slid_loss": 0.8507, + "step": 2635, + "time": 13.68 + }, + { + "epoch": 1.32, + "learning_rate": "1.9203e-04", + "loss": 0.8082, + "slid_loss": 0.8508, + "step": 2636, + "time": 12.89 + }, + { + "epoch": 1.32, + "learning_rate": "1.9202e-04", + "loss": 0.9514, + "slid_loss": 0.8525, + "step": 2637, + "time": 10.94 + }, + { + "epoch": 1.32, + "learning_rate": "1.9201e-04", + "loss": 0.8141, + "slid_loss": 0.8546, + "step": 2638, + "time": 11.64 + }, + { + "epoch": 1.32, + "learning_rate": "1.9200e-04", + "loss": 0.7982, + "slid_loss": 0.8547, + "step": 2639, + "time": 12.75 + }, + { + "epoch": 1.32, + "learning_rate": "1.9199e-04", + "loss": 0.8513, + "slid_loss": 0.8537, + "step": 2640, + "time": 13.32 + }, + { + "epoch": 1.32, + "learning_rate": "1.9199e-04", + "loss": 0.841, + "slid_loss": 0.854, + "step": 2641, + "time": 12.0 + }, + { + "epoch": 1.32, + "learning_rate": "1.9198e-04", + "loss": 1.0098, + "slid_loss": 0.8549, + "step": 2642, + "time": 11.73 + }, + { + "epoch": 1.32, + "learning_rate": "1.9197e-04", + "loss": 0.7235, + "slid_loss": 0.8526, + "step": 2643, + "time": 13.24 + }, + { + "epoch": 1.32, + "learning_rate": "1.9196e-04", + "loss": 1.068, + "slid_loss": 0.8535, + "step": 2644, + "time": 12.86 + }, + { + "epoch": 1.32, + "learning_rate": "1.9195e-04", + "loss": 0.9695, + "slid_loss": 0.8544, + "step": 2645, + "time": 13.54 + }, + { + "epoch": 1.32, + "learning_rate": "1.9194e-04", + "loss": 0.9469, + "slid_loss": 0.8558, + "step": 2646, + "time": 13.12 + }, + { + "epoch": 1.33, + "learning_rate": "1.9193e-04", + "loss": 0.9812, + "slid_loss": 0.8584, + "step": 2647, + "time": 13.36 + }, + { + "epoch": 1.33, + "learning_rate": "1.9192e-04", + "loss": 0.8846, + "slid_loss": 0.8593, + "step": 2648, + "time": 14.01 + }, + { + "epoch": 1.33, + "learning_rate": "1.9191e-04", + "loss": 0.7821, + "slid_loss": 0.8597, + "step": 2649, + "time": 13.5 + }, + { + "epoch": 1.33, + "learning_rate": "1.9190e-04", + "loss": 0.8334, + "slid_loss": 0.8596, + "step": 2650, + "time": 12.61 + }, + { + "epoch": 1.33, + "learning_rate": "1.9189e-04", + "loss": 0.9401, + "slid_loss": 0.8625, + "step": 2651, + "time": 13.68 + }, + { + "epoch": 1.33, + "learning_rate": "1.9188e-04", + "loss": 0.8538, + "slid_loss": 0.8619, + "step": 2652, + "time": 13.42 + }, + { + "epoch": 1.33, + "learning_rate": "1.9187e-04", + "loss": 0.7837, + "slid_loss": 0.8603, + "step": 2653, + "time": 12.89 + }, + { + "epoch": 1.33, + "learning_rate": "1.9186e-04", + "loss": 0.8472, + "slid_loss": 0.8577, + "step": 2654, + "time": 12.93 + }, + { + "epoch": 1.33, + "learning_rate": "1.9185e-04", + "loss": 0.9595, + "slid_loss": 0.8589, + "step": 2655, + "time": 14.04 + }, + { + "epoch": 1.33, + "learning_rate": "1.9184e-04", + "loss": 0.7655, + "slid_loss": 0.8583, + "step": 2656, + "time": 13.36 + }, + { + "epoch": 1.33, + "learning_rate": "1.9183e-04", + "loss": 0.8124, + "slid_loss": 0.8584, + "step": 2657, + "time": 12.88 + }, + { + "epoch": 1.33, + "learning_rate": "1.9182e-04", + "loss": 0.8459, + "slid_loss": 0.857, + "step": 2658, + "time": 11.02 + }, + { + "epoch": 1.33, + "learning_rate": "1.9181e-04", + "loss": 0.9738, + "slid_loss": 0.8569, + "step": 2659, + "time": 12.97 + }, + { + "epoch": 1.33, + "learning_rate": "1.9180e-04", + "loss": 0.7857, + "slid_loss": 0.8564, + "step": 2660, + "time": 11.4 + }, + { + "epoch": 1.33, + "learning_rate": "1.9179e-04", + "loss": 0.6624, + "slid_loss": 0.8543, + "step": 2661, + "time": 11.67 + }, + { + "epoch": 1.33, + "learning_rate": "1.9178e-04", + "loss": 0.7369, + "slid_loss": 0.8533, + "step": 2662, + "time": 12.9 + }, + { + "epoch": 1.33, + "learning_rate": "1.9178e-04", + "loss": 0.6482, + "slid_loss": 0.852, + "step": 2663, + "time": 12.63 + }, + { + "epoch": 1.33, + "learning_rate": "1.9177e-04", + "loss": 0.9493, + "slid_loss": 0.8533, + "step": 2664, + "time": 12.85 + }, + { + "epoch": 1.33, + "learning_rate": "1.9176e-04", + "loss": 0.6319, + "slid_loss": 0.8507, + "step": 2665, + "time": 11.36 + }, + { + "epoch": 1.34, + "learning_rate": "1.9175e-04", + "loss": 0.8874, + "slid_loss": 0.8532, + "step": 2666, + "time": 13.58 + }, + { + "epoch": 1.34, + "learning_rate": "1.9174e-04", + "loss": 0.7022, + "slid_loss": 0.8514, + "step": 2667, + "time": 12.77 + }, + { + "epoch": 1.34, + "learning_rate": "1.9173e-04", + "loss": 0.754, + "slid_loss": 0.8494, + "step": 2668, + "time": 10.89 + }, + { + "epoch": 1.34, + "learning_rate": "1.9172e-04", + "loss": 0.7621, + "slid_loss": 0.8481, + "step": 2669, + "time": 11.05 + }, + { + "epoch": 1.34, + "learning_rate": "1.9171e-04", + "loss": 0.8536, + "slid_loss": 0.8483, + "step": 2670, + "time": 13.23 + }, + { + "epoch": 1.34, + "learning_rate": "1.9170e-04", + "loss": 0.765, + "slid_loss": 0.8478, + "step": 2671, + "time": 10.83 + }, + { + "epoch": 1.34, + "learning_rate": "1.9169e-04", + "loss": 0.7371, + "slid_loss": 0.8456, + "step": 2672, + "time": 12.22 + }, + { + "epoch": 1.34, + "learning_rate": "1.9168e-04", + "loss": 0.858, + "slid_loss": 0.8442, + "step": 2673, + "time": 13.06 + }, + { + "epoch": 1.34, + "learning_rate": "1.9167e-04", + "loss": 0.8, + "slid_loss": 0.8436, + "step": 2674, + "time": 11.47 + }, + { + "epoch": 1.34, + "learning_rate": "1.9166e-04", + "loss": 0.8464, + "slid_loss": 0.8429, + "step": 2675, + "time": 13.08 + }, + { + "epoch": 1.34, + "learning_rate": "1.9165e-04", + "loss": 0.8382, + "slid_loss": 0.8429, + "step": 2676, + "time": 11.02 + }, + { + "epoch": 1.34, + "learning_rate": "1.9164e-04", + "loss": 0.8478, + "slid_loss": 0.8439, + "step": 2677, + "time": 13.66 + }, + { + "epoch": 1.34, + "learning_rate": "1.9163e-04", + "loss": 0.9303, + "slid_loss": 0.8446, + "step": 2678, + "time": 12.23 + }, + { + "epoch": 1.34, + "learning_rate": "1.9162e-04", + "loss": 0.9421, + "slid_loss": 0.8469, + "step": 2679, + "time": 13.52 + }, + { + "epoch": 1.34, + "learning_rate": "1.9161e-04", + "loss": 0.8375, + "slid_loss": 0.8449, + "step": 2680, + "time": 10.96 + }, + { + "epoch": 1.34, + "learning_rate": "1.9160e-04", + "loss": 0.7558, + "slid_loss": 0.8437, + "step": 2681, + "time": 12.96 + }, + { + "epoch": 1.34, + "learning_rate": "1.9159e-04", + "loss": 0.9914, + "slid_loss": 0.8446, + "step": 2682, + "time": 13.62 + }, + { + "epoch": 1.34, + "learning_rate": "1.9158e-04", + "loss": 1.0348, + "slid_loss": 0.8458, + "step": 2683, + "time": 11.36 + }, + { + "epoch": 1.34, + "learning_rate": "1.9157e-04", + "loss": 0.61, + "slid_loss": 0.8436, + "step": 2684, + "time": 12.3 + }, + { + "epoch": 1.34, + "learning_rate": "1.9156e-04", + "loss": 1.0046, + "slid_loss": 0.8461, + "step": 2685, + "time": 12.64 + }, + { + "epoch": 1.35, + "learning_rate": "1.9155e-04", + "loss": 0.8781, + "slid_loss": 0.8459, + "step": 2686, + "time": 12.15 + }, + { + "epoch": 1.35, + "learning_rate": "1.9154e-04", + "loss": 0.7821, + "slid_loss": 0.8456, + "step": 2687, + "time": 13.73 + }, + { + "epoch": 1.35, + "learning_rate": "1.9153e-04", + "loss": 1.0092, + "slid_loss": 0.8472, + "step": 2688, + "time": 10.94 + }, + { + "epoch": 1.35, + "learning_rate": "1.9152e-04", + "loss": 0.7505, + "slid_loss": 0.8469, + "step": 2689, + "time": 12.81 + }, + { + "epoch": 1.35, + "learning_rate": "1.9151e-04", + "loss": 0.839, + "slid_loss": 0.8455, + "step": 2690, + "time": 12.12 + }, + { + "epoch": 1.35, + "learning_rate": "1.9150e-04", + "loss": 0.8293, + "slid_loss": 0.847, + "step": 2691, + "time": 10.96 + }, + { + "epoch": 1.35, + "learning_rate": "1.9149e-04", + "loss": 0.8133, + "slid_loss": 0.8458, + "step": 2692, + "time": 13.38 + }, + { + "epoch": 1.35, + "learning_rate": "1.9148e-04", + "loss": 0.9839, + "slid_loss": 0.8484, + "step": 2693, + "time": 11.44 + }, + { + "epoch": 1.35, + "learning_rate": "1.9147e-04", + "loss": 0.8286, + "slid_loss": 0.8494, + "step": 2694, + "time": 11.3 + }, + { + "epoch": 1.35, + "learning_rate": "1.9147e-04", + "loss": 0.8713, + "slid_loss": 0.8496, + "step": 2695, + "time": 13.91 + }, + { + "epoch": 1.35, + "learning_rate": "1.9146e-04", + "loss": 0.7626, + "slid_loss": 0.8494, + "step": 2696, + "time": 13.22 + }, + { + "epoch": 1.35, + "learning_rate": "1.9145e-04", + "loss": 0.8313, + "slid_loss": 0.8505, + "step": 2697, + "time": 12.73 + }, + { + "epoch": 1.35, + "learning_rate": "1.9144e-04", + "loss": 0.8481, + "slid_loss": 0.8516, + "step": 2698, + "time": 13.27 + }, + { + "epoch": 1.35, + "learning_rate": "1.9143e-04", + "loss": 0.7943, + "slid_loss": 0.849, + "step": 2699, + "time": 12.57 + }, + { + "epoch": 1.35, + "learning_rate": "1.9142e-04", + "loss": 0.8585, + "slid_loss": 0.8491, + "step": 2700, + "time": 11.25 + }, + { + "epoch": 1.35, + "learning_rate": "1.9141e-04", + "loss": 0.8331, + "slid_loss": 0.8513, + "step": 2701, + "time": 13.49 + }, + { + "epoch": 1.35, + "learning_rate": "1.9140e-04", + "loss": 0.7108, + "slid_loss": 0.8498, + "step": 2702, + "time": 13.41 + }, + { + "epoch": 1.35, + "learning_rate": "1.9139e-04", + "loss": 0.8907, + "slid_loss": 0.8509, + "step": 2703, + "time": 13.36 + }, + { + "epoch": 1.35, + "learning_rate": "1.9138e-04", + "loss": 0.9108, + "slid_loss": 0.8535, + "step": 2704, + "time": 13.05 + }, + { + "epoch": 1.35, + "learning_rate": "1.9137e-04", + "loss": 0.751, + "slid_loss": 0.8533, + "step": 2705, + "time": 12.29 + }, + { + "epoch": 1.36, + "learning_rate": "1.9136e-04", + "loss": 0.669, + "slid_loss": 0.8521, + "step": 2706, + "time": 13.81 + }, + { + "epoch": 1.36, + "learning_rate": "1.9135e-04", + "loss": 0.9371, + "slid_loss": 0.853, + "step": 2707, + "time": 12.84 + }, + { + "epoch": 1.36, + "learning_rate": "1.9134e-04", + "loss": 0.7937, + "slid_loss": 0.85, + "step": 2708, + "time": 14.29 + }, + { + "epoch": 1.36, + "learning_rate": "1.9133e-04", + "loss": 0.7925, + "slid_loss": 0.8485, + "step": 2709, + "time": 12.24 + }, + { + "epoch": 1.36, + "learning_rate": "1.9132e-04", + "loss": 0.7016, + "slid_loss": 0.8465, + "step": 2710, + "time": 11.3 + }, + { + "epoch": 1.36, + "learning_rate": "1.9131e-04", + "loss": 0.7717, + "slid_loss": 0.8437, + "step": 2711, + "time": 11.37 + }, + { + "epoch": 1.36, + "learning_rate": "1.9130e-04", + "loss": 0.8842, + "slid_loss": 0.8437, + "step": 2712, + "time": 13.66 + }, + { + "epoch": 1.36, + "learning_rate": "1.9129e-04", + "loss": 0.8593, + "slid_loss": 0.8437, + "step": 2713, + "time": 13.14 + }, + { + "epoch": 1.36, + "learning_rate": "1.9128e-04", + "loss": 0.8925, + "slid_loss": 0.8438, + "step": 2714, + "time": 11.43 + }, + { + "epoch": 1.36, + "learning_rate": "1.9127e-04", + "loss": 0.8708, + "slid_loss": 0.8421, + "step": 2715, + "time": 11.41 + }, + { + "epoch": 1.36, + "learning_rate": "1.9126e-04", + "loss": 0.8099, + "slid_loss": 0.8427, + "step": 2716, + "time": 13.32 + }, + { + "epoch": 1.36, + "learning_rate": "1.9125e-04", + "loss": 0.7615, + "slid_loss": 0.843, + "step": 2717, + "time": 13.18 + }, + { + "epoch": 1.36, + "learning_rate": "1.9124e-04", + "loss": 1.0307, + "slid_loss": 0.8441, + "step": 2718, + "time": 11.13 + }, + { + "epoch": 1.36, + "learning_rate": "1.9123e-04", + "loss": 0.7034, + "slid_loss": 0.8428, + "step": 2719, + "time": 12.83 + }, + { + "epoch": 1.36, + "learning_rate": "1.9122e-04", + "loss": 0.9373, + "slid_loss": 0.844, + "step": 2720, + "time": 11.93 + }, + { + "epoch": 1.36, + "learning_rate": "1.9121e-04", + "loss": 0.9125, + "slid_loss": 0.8459, + "step": 2721, + "time": 13.36 + }, + { + "epoch": 1.36, + "learning_rate": "1.9120e-04", + "loss": 1.0172, + "slid_loss": 0.847, + "step": 2722, + "time": 13.45 + }, + { + "epoch": 1.36, + "learning_rate": "1.9119e-04", + "loss": 0.8066, + "slid_loss": 0.8471, + "step": 2723, + "time": 13.34 + }, + { + "epoch": 1.36, + "learning_rate": "1.9118e-04", + "loss": 0.8608, + "slid_loss": 0.8476, + "step": 2724, + "time": 13.38 + }, + { + "epoch": 1.36, + "learning_rate": "1.9117e-04", + "loss": 0.6888, + "slid_loss": 0.8437, + "step": 2725, + "time": 13.15 + }, + { + "epoch": 1.37, + "learning_rate": "1.9116e-04", + "loss": 0.7478, + "slid_loss": 0.842, + "step": 2726, + "time": 11.32 + }, + { + "epoch": 1.37, + "learning_rate": "1.9115e-04", + "loss": 0.7223, + "slid_loss": 0.842, + "step": 2727, + "time": 13.65 + }, + { + "epoch": 1.37, + "learning_rate": "1.9114e-04", + "loss": 0.7674, + "slid_loss": 0.8398, + "step": 2728, + "time": 12.45 + }, + { + "epoch": 1.37, + "learning_rate": "1.9113e-04", + "loss": 0.7289, + "slid_loss": 0.8398, + "step": 2729, + "time": 11.04 + }, + { + "epoch": 1.37, + "learning_rate": "1.9112e-04", + "loss": 0.7428, + "slid_loss": 0.8379, + "step": 2730, + "time": 12.01 + }, + { + "epoch": 1.37, + "learning_rate": "1.9111e-04", + "loss": 0.8979, + "slid_loss": 0.8377, + "step": 2731, + "time": 14.51 + }, + { + "epoch": 1.37, + "learning_rate": "1.9110e-04", + "loss": 0.9677, + "slid_loss": 0.8391, + "step": 2732, + "time": 13.25 + }, + { + "epoch": 1.37, + "learning_rate": "1.9109e-04", + "loss": 0.8875, + "slid_loss": 0.839, + "step": 2733, + "time": 14.58 + }, + { + "epoch": 1.37, + "learning_rate": "1.9108e-04", + "loss": 0.87, + "slid_loss": 0.8394, + "step": 2734, + "time": 10.59 + }, + { + "epoch": 1.37, + "learning_rate": "1.9107e-04", + "loss": 0.7383, + "slid_loss": 0.8377, + "step": 2735, + "time": 11.0 + }, + { + "epoch": 1.37, + "learning_rate": "1.9106e-04", + "loss": 1.0173, + "slid_loss": 0.8398, + "step": 2736, + "time": 11.89 + }, + { + "epoch": 1.37, + "learning_rate": "1.9105e-04", + "loss": 0.9236, + "slid_loss": 0.8395, + "step": 2737, + "time": 12.83 + }, + { + "epoch": 1.37, + "learning_rate": "1.9104e-04", + "loss": 0.7673, + "slid_loss": 0.839, + "step": 2738, + "time": 13.51 + }, + { + "epoch": 1.37, + "learning_rate": "1.9103e-04", + "loss": 0.7791, + "slid_loss": 0.8388, + "step": 2739, + "time": 11.88 + }, + { + "epoch": 1.37, + "learning_rate": "1.9102e-04", + "loss": 0.9914, + "slid_loss": 0.8402, + "step": 2740, + "time": 11.34 + }, + { + "epoch": 1.37, + "learning_rate": "1.9101e-04", + "loss": 0.8835, + "slid_loss": 0.8406, + "step": 2741, + "time": 12.32 + }, + { + "epoch": 1.37, + "learning_rate": "1.9100e-04", + "loss": 0.5754, + "slid_loss": 0.8363, + "step": 2742, + "time": 13.79 + }, + { + "epoch": 1.37, + "learning_rate": "1.9099e-04", + "loss": 0.8126, + "slid_loss": 0.8372, + "step": 2743, + "time": 13.44 + }, + { + "epoch": 1.37, + "learning_rate": "1.9098e-04", + "loss": 0.8908, + "slid_loss": 0.8354, + "step": 2744, + "time": 12.84 + }, + { + "epoch": 1.37, + "learning_rate": "1.9097e-04", + "loss": 0.9336, + "slid_loss": 0.8351, + "step": 2745, + "time": 13.16 + }, + { + "epoch": 1.38, + "learning_rate": "1.9096e-04", + "loss": 0.7594, + "slid_loss": 0.8332, + "step": 2746, + "time": 11.8 + }, + { + "epoch": 1.38, + "learning_rate": "1.9095e-04", + "loss": 0.845, + "slid_loss": 0.8318, + "step": 2747, + "time": 12.47 + }, + { + "epoch": 1.38, + "learning_rate": "1.9094e-04", + "loss": 0.7526, + "slid_loss": 0.8305, + "step": 2748, + "time": 11.69 + }, + { + "epoch": 1.38, + "learning_rate": "1.9093e-04", + "loss": 0.8547, + "slid_loss": 0.8312, + "step": 2749, + "time": 13.0 + }, + { + "epoch": 1.38, + "learning_rate": "1.9092e-04", + "loss": 0.6244, + "slid_loss": 0.8291, + "step": 2750, + "time": 13.33 + }, + { + "epoch": 1.38, + "learning_rate": "1.9091e-04", + "loss": 0.8076, + "slid_loss": 0.8278, + "step": 2751, + "time": 11.29 + }, + { + "epoch": 1.38, + "learning_rate": "1.9090e-04", + "loss": 1.0251, + "slid_loss": 0.8295, + "step": 2752, + "time": 11.23 + }, + { + "epoch": 1.38, + "learning_rate": "1.9089e-04", + "loss": 0.8159, + "slid_loss": 0.8298, + "step": 2753, + "time": 12.17 + }, + { + "epoch": 1.38, + "learning_rate": "1.9088e-04", + "loss": 0.8912, + "slid_loss": 0.8303, + "step": 2754, + "time": 14.02 + }, + { + "epoch": 1.38, + "learning_rate": "1.9087e-04", + "loss": 1.007, + "slid_loss": 0.8308, + "step": 2755, + "time": 11.51 + }, + { + "epoch": 1.38, + "learning_rate": "1.9086e-04", + "loss": 0.6853, + "slid_loss": 0.83, + "step": 2756, + "time": 13.11 + }, + { + "epoch": 1.38, + "learning_rate": "1.9085e-04", + "loss": 0.7972, + "slid_loss": 0.8298, + "step": 2757, + "time": 10.57 + }, + { + "epoch": 1.38, + "learning_rate": "1.9084e-04", + "loss": 0.7728, + "slid_loss": 0.8291, + "step": 2758, + "time": 12.81 + }, + { + "epoch": 1.38, + "learning_rate": "1.9083e-04", + "loss": 0.8155, + "slid_loss": 0.8275, + "step": 2759, + "time": 12.17 + }, + { + "epoch": 1.38, + "learning_rate": "1.9082e-04", + "loss": 0.8802, + "slid_loss": 0.8284, + "step": 2760, + "time": 11.01 + }, + { + "epoch": 1.38, + "learning_rate": "1.9081e-04", + "loss": 0.7289, + "slid_loss": 0.8291, + "step": 2761, + "time": 14.08 + }, + { + "epoch": 1.38, + "learning_rate": "1.9080e-04", + "loss": 0.7877, + "slid_loss": 0.8296, + "step": 2762, + "time": 11.05 + }, + { + "epoch": 1.38, + "learning_rate": "1.9079e-04", + "loss": 0.8197, + "slid_loss": 0.8313, + "step": 2763, + "time": 13.96 + }, + { + "epoch": 1.38, + "learning_rate": "1.9078e-04", + "loss": 0.7978, + "slid_loss": 0.8298, + "step": 2764, + "time": 14.15 + }, + { + "epoch": 1.38, + "learning_rate": "1.9077e-04", + "loss": 0.9216, + "slid_loss": 0.8327, + "step": 2765, + "time": 12.17 + }, + { + "epoch": 1.39, + "learning_rate": "1.9076e-04", + "loss": 0.6525, + "slid_loss": 0.8304, + "step": 2766, + "time": 13.09 + }, + { + "epoch": 1.39, + "learning_rate": "1.9075e-04", + "loss": 0.8225, + "slid_loss": 0.8316, + "step": 2767, + "time": 11.76 + }, + { + "epoch": 1.39, + "learning_rate": "1.9074e-04", + "loss": 0.6863, + "slid_loss": 0.8309, + "step": 2768, + "time": 11.32 + }, + { + "epoch": 1.39, + "learning_rate": "1.9073e-04", + "loss": 0.8009, + "slid_loss": 0.8313, + "step": 2769, + "time": 12.2 + }, + { + "epoch": 1.39, + "learning_rate": "1.9072e-04", + "loss": 0.8201, + "slid_loss": 0.8309, + "step": 2770, + "time": 12.73 + }, + { + "epoch": 1.39, + "learning_rate": "1.9071e-04", + "loss": 0.8031, + "slid_loss": 0.8313, + "step": 2771, + "time": 12.68 + }, + { + "epoch": 1.39, + "learning_rate": "1.9070e-04", + "loss": 0.9814, + "slid_loss": 0.8338, + "step": 2772, + "time": 12.78 + }, + { + "epoch": 1.39, + "learning_rate": "1.9069e-04", + "loss": 0.8442, + "slid_loss": 0.8336, + "step": 2773, + "time": 12.21 + }, + { + "epoch": 1.39, + "learning_rate": "1.9068e-04", + "loss": 0.7124, + "slid_loss": 0.8327, + "step": 2774, + "time": 11.0 + }, + { + "epoch": 1.39, + "learning_rate": "1.9067e-04", + "loss": 0.9308, + "slid_loss": 0.8336, + "step": 2775, + "time": 13.34 + }, + { + "epoch": 1.39, + "learning_rate": "1.9066e-04", + "loss": 0.7953, + "slid_loss": 0.8332, + "step": 2776, + "time": 13.08 + }, + { + "epoch": 1.39, + "learning_rate": "1.9065e-04", + "loss": 0.915, + "slid_loss": 0.8338, + "step": 2777, + "time": 11.51 + }, + { + "epoch": 1.39, + "learning_rate": "1.9064e-04", + "loss": 0.8691, + "slid_loss": 0.8332, + "step": 2778, + "time": 13.63 + }, + { + "epoch": 1.39, + "learning_rate": "1.9063e-04", + "loss": 0.7617, + "slid_loss": 0.8314, + "step": 2779, + "time": 13.25 + }, + { + "epoch": 1.39, + "learning_rate": "1.9062e-04", + "loss": 0.8111, + "slid_loss": 0.8312, + "step": 2780, + "time": 12.92 + }, + { + "epoch": 1.39, + "learning_rate": "1.9061e-04", + "loss": 0.7724, + "slid_loss": 0.8313, + "step": 2781, + "time": 13.54 + }, + { + "epoch": 1.39, + "learning_rate": "1.9060e-04", + "loss": 0.8119, + "slid_loss": 0.8295, + "step": 2782, + "time": 11.87 + }, + { + "epoch": 1.39, + "learning_rate": "1.9059e-04", + "loss": 0.8569, + "slid_loss": 0.8277, + "step": 2783, + "time": 13.76 + }, + { + "epoch": 1.39, + "learning_rate": "1.9058e-04", + "loss": 0.8257, + "slid_loss": 0.8299, + "step": 2784, + "time": 13.32 + }, + { + "epoch": 1.39, + "learning_rate": "1.9057e-04", + "loss": 0.8674, + "slid_loss": 0.8285, + "step": 2785, + "time": 12.66 + }, + { + "epoch": 1.4, + "learning_rate": "1.9056e-04", + "loss": 0.8905, + "slid_loss": 0.8287, + "step": 2786, + "time": 12.88 + }, + { + "epoch": 1.4, + "learning_rate": "1.9055e-04", + "loss": 0.8626, + "slid_loss": 0.8295, + "step": 2787, + "time": 13.77 + }, + { + "epoch": 1.4, + "learning_rate": "1.9053e-04", + "loss": 0.8094, + "slid_loss": 0.8275, + "step": 2788, + "time": 12.94 + }, + { + "epoch": 1.4, + "learning_rate": "1.9052e-04", + "loss": 0.7419, + "slid_loss": 0.8274, + "step": 2789, + "time": 11.56 + }, + { + "epoch": 1.4, + "learning_rate": "1.9051e-04", + "loss": 0.8332, + "slid_loss": 0.8273, + "step": 2790, + "time": 12.07 + }, + { + "epoch": 1.4, + "learning_rate": "1.9050e-04", + "loss": 0.7394, + "slid_loss": 0.8264, + "step": 2791, + "time": 12.82 + }, + { + "epoch": 1.4, + "learning_rate": "1.9049e-04", + "loss": 0.8985, + "slid_loss": 0.8273, + "step": 2792, + "time": 11.68 + }, + { + "epoch": 1.4, + "learning_rate": "1.9048e-04", + "loss": 0.8075, + "slid_loss": 0.8255, + "step": 2793, + "time": 13.6 + }, + { + "epoch": 1.4, + "learning_rate": "1.9047e-04", + "loss": 0.6886, + "slid_loss": 0.8241, + "step": 2794, + "time": 13.1 + }, + { + "epoch": 1.4, + "learning_rate": "1.9046e-04", + "loss": 0.828, + "slid_loss": 0.8237, + "step": 2795, + "time": 11.69 + }, + { + "epoch": 1.4, + "learning_rate": "1.9045e-04", + "loss": 1.0177, + "slid_loss": 0.8262, + "step": 2796, + "time": 13.71 + }, + { + "epoch": 1.4, + "learning_rate": "1.9044e-04", + "loss": 0.9078, + "slid_loss": 0.827, + "step": 2797, + "time": 11.85 + }, + { + "epoch": 1.4, + "learning_rate": "1.9043e-04", + "loss": 0.742, + "slid_loss": 0.8259, + "step": 2798, + "time": 11.15 + }, + { + "epoch": 1.4, + "learning_rate": "1.9042e-04", + "loss": 0.6886, + "slid_loss": 0.8249, + "step": 2799, + "time": 11.09 + }, + { + "epoch": 1.4, + "learning_rate": "1.9041e-04", + "loss": 0.7508, + "slid_loss": 0.8238, + "step": 2800, + "time": 13.29 + }, + { + "epoch": 1.4, + "learning_rate": "1.9040e-04", + "loss": 1.0772, + "slid_loss": 0.8262, + "step": 2801, + "time": 11.07 + }, + { + "epoch": 1.4, + "learning_rate": "1.9039e-04", + "loss": 0.762, + "slid_loss": 0.8268, + "step": 2802, + "time": 11.23 + }, + { + "epoch": 1.4, + "learning_rate": "1.9038e-04", + "loss": 0.8298, + "slid_loss": 0.8261, + "step": 2803, + "time": 12.23 + }, + { + "epoch": 1.4, + "learning_rate": "1.9037e-04", + "loss": 0.9272, + "slid_loss": 0.8263, + "step": 2804, + "time": 12.17 + }, + { + "epoch": 1.4, + "learning_rate": "1.9036e-04", + "loss": 0.8002, + "slid_loss": 0.8268, + "step": 2805, + "time": 13.3 + }, + { + "epoch": 1.41, + "learning_rate": "1.9035e-04", + "loss": 0.7963, + "slid_loss": 0.8281, + "step": 2806, + "time": 12.8 + }, + { + "epoch": 1.41, + "learning_rate": "1.9034e-04", + "loss": 0.9004, + "slid_loss": 0.8277, + "step": 2807, + "time": 10.72 + }, + { + "epoch": 1.41, + "learning_rate": "1.9033e-04", + "loss": 0.8925, + "slid_loss": 0.8287, + "step": 2808, + "time": 12.84 + }, + { + "epoch": 1.41, + "learning_rate": "1.9032e-04", + "loss": 0.7695, + "slid_loss": 0.8285, + "step": 2809, + "time": 14.17 + }, + { + "epoch": 1.41, + "learning_rate": "1.9031e-04", + "loss": 0.8652, + "slid_loss": 0.8301, + "step": 2810, + "time": 14.07 + }, + { + "epoch": 1.41, + "learning_rate": "1.9030e-04", + "loss": 0.937, + "slid_loss": 0.8318, + "step": 2811, + "time": 13.29 + }, + { + "epoch": 1.41, + "learning_rate": "1.9029e-04", + "loss": 0.7805, + "slid_loss": 0.8307, + "step": 2812, + "time": 11.61 + }, + { + "epoch": 1.41, + "learning_rate": "1.9028e-04", + "loss": 0.7512, + "slid_loss": 0.8296, + "step": 2813, + "time": 12.19 + }, + { + "epoch": 1.41, + "learning_rate": "1.9027e-04", + "loss": 0.8221, + "slid_loss": 0.8289, + "step": 2814, + "time": 11.02 + }, + { + "epoch": 1.41, + "learning_rate": "1.9026e-04", + "loss": 0.8152, + "slid_loss": 0.8284, + "step": 2815, + "time": 12.91 + }, + { + "epoch": 1.41, + "learning_rate": "1.9025e-04", + "loss": 0.8024, + "slid_loss": 0.8283, + "step": 2816, + "time": 12.91 + }, + { + "epoch": 1.41, + "learning_rate": "1.9024e-04", + "loss": 0.8731, + "slid_loss": 0.8294, + "step": 2817, + "time": 10.74 + }, + { + "epoch": 1.41, + "learning_rate": "1.9023e-04", + "loss": 0.9158, + "slid_loss": 0.8283, + "step": 2818, + "time": 11.49 + }, + { + "epoch": 1.41, + "learning_rate": "1.9022e-04", + "loss": 0.728, + "slid_loss": 0.8285, + "step": 2819, + "time": 12.61 + }, + { + "epoch": 1.41, + "learning_rate": "1.9020e-04", + "loss": 0.7023, + "slid_loss": 0.8262, + "step": 2820, + "time": 13.23 + }, + { + "epoch": 1.41, + "learning_rate": "1.9019e-04", + "loss": 0.7741, + "slid_loss": 0.8248, + "step": 2821, + "time": 11.7 + }, + { + "epoch": 1.41, + "learning_rate": "1.9018e-04", + "loss": 0.8156, + "slid_loss": 0.8228, + "step": 2822, + "time": 11.1 + }, + { + "epoch": 1.41, + "learning_rate": "1.9017e-04", + "loss": 0.793, + "slid_loss": 0.8226, + "step": 2823, + "time": 12.01 + }, + { + "epoch": 1.41, + "learning_rate": "1.9016e-04", + "loss": 0.8973, + "slid_loss": 0.823, + "step": 2824, + "time": 12.2 + }, + { + "epoch": 1.41, + "learning_rate": "1.9015e-04", + "loss": 0.9001, + "slid_loss": 0.8251, + "step": 2825, + "time": 12.28 + }, + { + "epoch": 1.42, + "learning_rate": "1.9014e-04", + "loss": 0.8003, + "slid_loss": 0.8256, + "step": 2826, + "time": 10.59 + }, + { + "epoch": 1.42, + "learning_rate": "1.9013e-04", + "loss": 0.9042, + "slid_loss": 0.8275, + "step": 2827, + "time": 12.82 + }, + { + "epoch": 1.42, + "learning_rate": "1.9012e-04", + "loss": 0.7138, + "slid_loss": 0.8269, + "step": 2828, + "time": 12.95 + }, + { + "epoch": 1.42, + "learning_rate": "1.9011e-04", + "loss": 0.6842, + "slid_loss": 0.8265, + "step": 2829, + "time": 10.03 + }, + { + "epoch": 1.42, + "learning_rate": "1.9010e-04", + "loss": 0.9846, + "slid_loss": 0.8289, + "step": 2830, + "time": 10.92 + }, + { + "epoch": 1.42, + "learning_rate": "1.9009e-04", + "loss": 0.8039, + "slid_loss": 0.8279, + "step": 2831, + "time": 13.31 + }, + { + "epoch": 1.42, + "learning_rate": "1.9008e-04", + "loss": 0.8125, + "slid_loss": 0.8264, + "step": 2832, + "time": 13.44 + }, + { + "epoch": 1.42, + "learning_rate": "1.9007e-04", + "loss": 0.8439, + "slid_loss": 0.826, + "step": 2833, + "time": 10.33 + }, + { + "epoch": 1.42, + "learning_rate": "1.9006e-04", + "loss": 0.8794, + "slid_loss": 0.8261, + "step": 2834, + "time": 13.17 + }, + { + "epoch": 1.42, + "learning_rate": "1.9005e-04", + "loss": 0.8338, + "slid_loss": 0.827, + "step": 2835, + "time": 13.5 + }, + { + "epoch": 1.42, + "learning_rate": "1.9004e-04", + "loss": 0.9226, + "slid_loss": 0.8261, + "step": 2836, + "time": 13.55 + }, + { + "epoch": 1.42, + "learning_rate": "1.9003e-04", + "loss": 0.8085, + "slid_loss": 0.8249, + "step": 2837, + "time": 11.84 + }, + { + "epoch": 1.42, + "learning_rate": "1.9002e-04", + "loss": 0.8615, + "slid_loss": 0.8258, + "step": 2838, + "time": 12.98 + }, + { + "epoch": 1.42, + "learning_rate": "1.9001e-04", + "loss": 0.7007, + "slid_loss": 0.8251, + "step": 2839, + "time": 10.69 + }, + { + "epoch": 1.42, + "learning_rate": "1.9000e-04", + "loss": 0.7981, + "slid_loss": 0.8231, + "step": 2840, + "time": 13.4 + }, + { + "epoch": 1.42, + "learning_rate": "1.8999e-04", + "loss": 0.8873, + "slid_loss": 0.8232, + "step": 2841, + "time": 10.54 + }, + { + "epoch": 1.42, + "learning_rate": "1.8998e-04", + "loss": 0.9358, + "slid_loss": 0.8268, + "step": 2842, + "time": 13.42 + }, + { + "epoch": 1.42, + "learning_rate": "1.8996e-04", + "loss": 0.709, + "slid_loss": 0.8257, + "step": 2843, + "time": 10.56 + }, + { + "epoch": 1.42, + "learning_rate": "1.8995e-04", + "loss": 0.6331, + "slid_loss": 0.8232, + "step": 2844, + "time": 10.55 + }, + { + "epoch": 1.42, + "learning_rate": "1.8994e-04", + "loss": 0.681, + "slid_loss": 0.8206, + "step": 2845, + "time": 13.87 + }, + { + "epoch": 1.43, + "learning_rate": "1.8993e-04", + "loss": 0.845, + "slid_loss": 0.8215, + "step": 2846, + "time": 12.74 + }, + { + "epoch": 1.43, + "learning_rate": "1.8992e-04", + "loss": 0.759, + "slid_loss": 0.8206, + "step": 2847, + "time": 12.19 + }, + { + "epoch": 1.43, + "learning_rate": "1.8991e-04", + "loss": 0.9401, + "slid_loss": 0.8225, + "step": 2848, + "time": 10.69 + }, + { + "epoch": 1.43, + "learning_rate": "1.8990e-04", + "loss": 0.7292, + "slid_loss": 0.8213, + "step": 2849, + "time": 13.51 + }, + { + "epoch": 1.43, + "learning_rate": "1.8989e-04", + "loss": 0.7265, + "slid_loss": 0.8223, + "step": 2850, + "time": 13.16 + }, + { + "epoch": 1.43, + "learning_rate": "1.8988e-04", + "loss": 0.683, + "slid_loss": 0.821, + "step": 2851, + "time": 12.11 + }, + { + "epoch": 1.43, + "learning_rate": "1.8987e-04", + "loss": 0.8574, + "slid_loss": 0.8193, + "step": 2852, + "time": 11.39 + }, + { + "epoch": 1.43, + "learning_rate": "1.8986e-04", + "loss": 0.9195, + "slid_loss": 0.8204, + "step": 2853, + "time": 12.8 + }, + { + "epoch": 1.43, + "learning_rate": "1.8985e-04", + "loss": 0.6801, + "slid_loss": 0.8183, + "step": 2854, + "time": 12.96 + }, + { + "epoch": 1.43, + "learning_rate": "1.8984e-04", + "loss": 0.6449, + "slid_loss": 0.8147, + "step": 2855, + "time": 13.19 + }, + { + "epoch": 1.43, + "learning_rate": "1.8983e-04", + "loss": 0.6721, + "slid_loss": 0.8145, + "step": 2856, + "time": 13.8 + }, + { + "epoch": 1.43, + "learning_rate": "1.8982e-04", + "loss": 1.0246, + "slid_loss": 0.8168, + "step": 2857, + "time": 11.67 + }, + { + "epoch": 1.43, + "learning_rate": "1.8981e-04", + "loss": 1.1105, + "slid_loss": 0.8202, + "step": 2858, + "time": 11.58 + }, + { + "epoch": 1.43, + "learning_rate": "1.8980e-04", + "loss": 0.8699, + "slid_loss": 0.8207, + "step": 2859, + "time": 13.55 + }, + { + "epoch": 1.43, + "learning_rate": "1.8979e-04", + "loss": 0.9646, + "slid_loss": 0.8216, + "step": 2860, + "time": 11.53 + }, + { + "epoch": 1.43, + "learning_rate": "1.8977e-04", + "loss": 0.7921, + "slid_loss": 0.8222, + "step": 2861, + "time": 10.9 + }, + { + "epoch": 1.43, + "learning_rate": "1.8976e-04", + "loss": 0.8153, + "slid_loss": 0.8225, + "step": 2862, + "time": 13.11 + }, + { + "epoch": 1.43, + "learning_rate": "1.8975e-04", + "loss": 0.9221, + "slid_loss": 0.8235, + "step": 2863, + "time": 12.88 + }, + { + "epoch": 1.43, + "learning_rate": "1.8974e-04", + "loss": 0.9027, + "slid_loss": 0.8245, + "step": 2864, + "time": 13.69 + }, + { + "epoch": 1.43, + "learning_rate": "1.8973e-04", + "loss": 0.8017, + "slid_loss": 0.8233, + "step": 2865, + "time": 14.25 + }, + { + "epoch": 1.44, + "learning_rate": "1.8972e-04", + "loss": 0.8371, + "slid_loss": 0.8252, + "step": 2866, + "time": 13.0 + }, + { + "epoch": 1.44, + "learning_rate": "1.8971e-04", + "loss": 0.6857, + "slid_loss": 0.8238, + "step": 2867, + "time": 11.24 + }, + { + "epoch": 1.44, + "learning_rate": "1.8970e-04", + "loss": 0.9652, + "slid_loss": 0.8266, + "step": 2868, + "time": 11.44 + }, + { + "epoch": 1.44, + "learning_rate": "1.8969e-04", + "loss": 0.8813, + "slid_loss": 0.8274, + "step": 2869, + "time": 12.19 + }, + { + "epoch": 1.44, + "learning_rate": "1.8968e-04", + "loss": 0.7525, + "slid_loss": 0.8267, + "step": 2870, + "time": 13.95 + }, + { + "epoch": 1.44, + "learning_rate": "1.8967e-04", + "loss": 0.9234, + "slid_loss": 0.8279, + "step": 2871, + "time": 13.28 + }, + { + "epoch": 1.44, + "learning_rate": "1.8966e-04", + "loss": 0.8876, + "slid_loss": 0.827, + "step": 2872, + "time": 11.87 + }, + { + "epoch": 1.44, + "learning_rate": "1.8965e-04", + "loss": 0.8217, + "slid_loss": 0.8268, + "step": 2873, + "time": 13.4 + }, + { + "epoch": 1.44, + "learning_rate": "1.8964e-04", + "loss": 0.9676, + "slid_loss": 0.8293, + "step": 2874, + "time": 14.15 + }, + { + "epoch": 1.44, + "learning_rate": "1.8963e-04", + "loss": 0.8727, + "slid_loss": 0.8287, + "step": 2875, + "time": 13.42 + }, + { + "epoch": 1.44, + "learning_rate": "1.8962e-04", + "loss": 0.5696, + "slid_loss": 0.8265, + "step": 2876, + "time": 13.14 + }, + { + "epoch": 1.44, + "learning_rate": "1.8960e-04", + "loss": 0.7494, + "slid_loss": 0.8248, + "step": 2877, + "time": 12.88 + }, + { + "epoch": 1.44, + "learning_rate": "1.8959e-04", + "loss": 0.939, + "slid_loss": 0.8255, + "step": 2878, + "time": 12.62 + }, + { + "epoch": 1.44, + "learning_rate": "1.8958e-04", + "loss": 0.6748, + "slid_loss": 0.8247, + "step": 2879, + "time": 11.41 + }, + { + "epoch": 1.44, + "learning_rate": "1.8957e-04", + "loss": 0.8264, + "slid_loss": 0.8248, + "step": 2880, + "time": 13.18 + }, + { + "epoch": 1.44, + "learning_rate": "1.8956e-04", + "loss": 0.9013, + "slid_loss": 0.8261, + "step": 2881, + "time": 13.3 + }, + { + "epoch": 1.44, + "learning_rate": "1.8955e-04", + "loss": 0.8935, + "slid_loss": 0.8269, + "step": 2882, + "time": 13.77 + }, + { + "epoch": 1.44, + "learning_rate": "1.8954e-04", + "loss": 0.906, + "slid_loss": 0.8274, + "step": 2883, + "time": 12.81 + }, + { + "epoch": 1.44, + "learning_rate": "1.8953e-04", + "loss": 0.7041, + "slid_loss": 0.8262, + "step": 2884, + "time": 13.26 + }, + { + "epoch": 1.44, + "learning_rate": "1.8952e-04", + "loss": 0.8283, + "slid_loss": 0.8258, + "step": 2885, + "time": 12.78 + }, + { + "epoch": 1.45, + "learning_rate": "1.8951e-04", + "loss": 0.7192, + "slid_loss": 0.8241, + "step": 2886, + "time": 12.68 + }, + { + "epoch": 1.45, + "learning_rate": "1.8950e-04", + "loss": 0.8255, + "slid_loss": 0.8237, + "step": 2887, + "time": 11.25 + }, + { + "epoch": 1.45, + "learning_rate": "1.8949e-04", + "loss": 0.843, + "slid_loss": 0.8241, + "step": 2888, + "time": 12.76 + }, + { + "epoch": 1.45, + "learning_rate": "1.8948e-04", + "loss": 0.7386, + "slid_loss": 0.824, + "step": 2889, + "time": 11.9 + }, + { + "epoch": 1.45, + "learning_rate": "1.8947e-04", + "loss": 0.6763, + "slid_loss": 0.8225, + "step": 2890, + "time": 11.51 + }, + { + "epoch": 1.45, + "learning_rate": "1.8945e-04", + "loss": 0.9124, + "slid_loss": 0.8242, + "step": 2891, + "time": 13.73 + }, + { + "epoch": 1.45, + "learning_rate": "1.8944e-04", + "loss": 0.7814, + "slid_loss": 0.823, + "step": 2892, + "time": 12.09 + }, + { + "epoch": 1.45, + "learning_rate": "1.8943e-04", + "loss": 0.6851, + "slid_loss": 0.8218, + "step": 2893, + "time": 13.6 + }, + { + "epoch": 1.45, + "learning_rate": "1.8942e-04", + "loss": 0.6685, + "slid_loss": 0.8216, + "step": 2894, + "time": 13.29 + }, + { + "epoch": 1.45, + "learning_rate": "1.8941e-04", + "loss": 0.7162, + "slid_loss": 0.8205, + "step": 2895, + "time": 12.97 + }, + { + "epoch": 1.45, + "learning_rate": "1.8940e-04", + "loss": 0.8141, + "slid_loss": 0.8184, + "step": 2896, + "time": 11.6 + }, + { + "epoch": 1.45, + "learning_rate": "1.8939e-04", + "loss": 0.8797, + "slid_loss": 0.8182, + "step": 2897, + "time": 11.71 + }, + { + "epoch": 1.45, + "learning_rate": "1.8938e-04", + "loss": 0.6158, + "slid_loss": 0.8169, + "step": 2898, + "time": 13.3 + }, + { + "epoch": 1.45, + "learning_rate": "1.8937e-04", + "loss": 0.588, + "slid_loss": 0.8159, + "step": 2899, + "time": 13.89 + }, + { + "epoch": 1.45, + "learning_rate": "1.8936e-04", + "loss": 0.7552, + "slid_loss": 0.8159, + "step": 2900, + "time": 12.67 + }, + { + "epoch": 1.45, + "learning_rate": "1.8935e-04", + "loss": 0.9529, + "slid_loss": 0.8147, + "step": 2901, + "time": 13.3 + }, + { + "epoch": 1.45, + "learning_rate": "1.8934e-04", + "loss": 0.759, + "slid_loss": 0.8147, + "step": 2902, + "time": 14.2 + }, + { + "epoch": 1.45, + "learning_rate": "1.8933e-04", + "loss": 0.7131, + "slid_loss": 0.8135, + "step": 2903, + "time": 11.94 + }, + { + "epoch": 1.45, + "learning_rate": "1.8931e-04", + "loss": 0.8563, + "slid_loss": 0.8128, + "step": 2904, + "time": 11.16 + }, + { + "epoch": 1.45, + "learning_rate": "1.8930e-04", + "loss": 0.8644, + "slid_loss": 0.8134, + "step": 2905, + "time": 12.82 + }, + { + "epoch": 1.46, + "learning_rate": "1.8929e-04", + "loss": 0.6313, + "slid_loss": 0.8118, + "step": 2906, + "time": 13.76 + }, + { + "epoch": 1.46, + "learning_rate": "1.8928e-04", + "loss": 0.787, + "slid_loss": 0.8106, + "step": 2907, + "time": 11.21 + }, + { + "epoch": 1.46, + "learning_rate": "1.8927e-04", + "loss": 0.796, + "slid_loss": 0.8097, + "step": 2908, + "time": 11.37 + }, + { + "epoch": 1.46, + "learning_rate": "1.8926e-04", + "loss": 0.9111, + "slid_loss": 0.8111, + "step": 2909, + "time": 11.72 + }, + { + "epoch": 1.46, + "learning_rate": "1.8925e-04", + "loss": 0.963, + "slid_loss": 0.8121, + "step": 2910, + "time": 13.68 + }, + { + "epoch": 1.46, + "learning_rate": "1.8924e-04", + "loss": 0.8352, + "slid_loss": 0.811, + "step": 2911, + "time": 11.3 + }, + { + "epoch": 1.46, + "learning_rate": "1.8923e-04", + "loss": 0.8371, + "slid_loss": 0.8116, + "step": 2912, + "time": 11.75 + }, + { + "epoch": 1.46, + "learning_rate": "1.8922e-04", + "loss": 0.9097, + "slid_loss": 0.8132, + "step": 2913, + "time": 12.83 + }, + { + "epoch": 1.46, + "learning_rate": "1.8921e-04", + "loss": 0.6892, + "slid_loss": 0.8119, + "step": 2914, + "time": 13.23 + }, + { + "epoch": 1.46, + "learning_rate": "1.8920e-04", + "loss": 0.6516, + "slid_loss": 0.8102, + "step": 2915, + "time": 11.28 + }, + { + "epoch": 1.46, + "learning_rate": "1.8918e-04", + "loss": 0.635, + "slid_loss": 0.8086, + "step": 2916, + "time": 13.68 + }, + { + "epoch": 1.46, + "learning_rate": "1.8917e-04", + "loss": 0.7768, + "slid_loss": 0.8076, + "step": 2917, + "time": 12.8 + }, + { + "epoch": 1.46, + "learning_rate": "1.8916e-04", + "loss": 0.8365, + "slid_loss": 0.8068, + "step": 2918, + "time": 12.94 + }, + { + "epoch": 1.46, + "learning_rate": "1.8915e-04", + "loss": 0.7376, + "slid_loss": 0.8069, + "step": 2919, + "time": 13.54 + }, + { + "epoch": 1.46, + "learning_rate": "1.8914e-04", + "loss": 0.7744, + "slid_loss": 0.8076, + "step": 2920, + "time": 13.52 + }, + { + "epoch": 1.46, + "learning_rate": "1.8913e-04", + "loss": 0.6628, + "slid_loss": 0.8065, + "step": 2921, + "time": 11.12 + }, + { + "epoch": 1.46, + "learning_rate": "1.8912e-04", + "loss": 0.7826, + "slid_loss": 0.8062, + "step": 2922, + "time": 11.13 + }, + { + "epoch": 1.46, + "learning_rate": "1.8911e-04", + "loss": 0.8338, + "slid_loss": 0.8066, + "step": 2923, + "time": 11.0 + }, + { + "epoch": 1.46, + "learning_rate": "1.8910e-04", + "loss": 1.0118, + "slid_loss": 0.8077, + "step": 2924, + "time": 11.31 + }, + { + "epoch": 1.46, + "learning_rate": "1.8909e-04", + "loss": 0.8747, + "slid_loss": 0.8075, + "step": 2925, + "time": 13.57 + }, + { + "epoch": 1.47, + "learning_rate": "1.8908e-04", + "loss": 0.7571, + "slid_loss": 0.807, + "step": 2926, + "time": 12.82 + }, + { + "epoch": 1.47, + "learning_rate": "1.8907e-04", + "loss": 0.9222, + "slid_loss": 0.8072, + "step": 2927, + "time": 11.2 + }, + { + "epoch": 1.47, + "learning_rate": "1.8905e-04", + "loss": 0.8074, + "slid_loss": 0.8082, + "step": 2928, + "time": 13.5 + }, + { + "epoch": 1.47, + "learning_rate": "1.8904e-04", + "loss": 0.9557, + "slid_loss": 0.8109, + "step": 2929, + "time": 12.02 + }, + { + "epoch": 1.47, + "learning_rate": "1.8903e-04", + "loss": 0.8683, + "slid_loss": 0.8097, + "step": 2930, + "time": 12.25 + }, + { + "epoch": 1.47, + "learning_rate": "1.8902e-04", + "loss": 0.8733, + "slid_loss": 0.8104, + "step": 2931, + "time": 11.16 + }, + { + "epoch": 1.47, + "learning_rate": "1.8901e-04", + "loss": 0.8537, + "slid_loss": 0.8108, + "step": 2932, + "time": 11.23 + }, + { + "epoch": 1.47, + "learning_rate": "1.8900e-04", + "loss": 0.7235, + "slid_loss": 0.8096, + "step": 2933, + "time": 11.36 + }, + { + "epoch": 1.47, + "learning_rate": "1.8899e-04", + "loss": 0.6983, + "slid_loss": 0.8078, + "step": 2934, + "time": 11.41 + }, + { + "epoch": 1.47, + "learning_rate": "1.8898e-04", + "loss": 0.9397, + "slid_loss": 0.8089, + "step": 2935, + "time": 13.83 + }, + { + "epoch": 1.47, + "learning_rate": "1.8897e-04", + "loss": 0.7064, + "slid_loss": 0.8067, + "step": 2936, + "time": 13.67 + }, + { + "epoch": 1.47, + "learning_rate": "1.8896e-04", + "loss": 0.8611, + "slid_loss": 0.8072, + "step": 2937, + "time": 13.41 + }, + { + "epoch": 1.47, + "learning_rate": "1.8894e-04", + "loss": 0.8262, + "slid_loss": 0.8069, + "step": 2938, + "time": 12.28 + }, + { + "epoch": 1.47, + "learning_rate": "1.8893e-04", + "loss": 0.706, + "slid_loss": 0.8069, + "step": 2939, + "time": 10.7 + }, + { + "epoch": 1.47, + "learning_rate": "1.8892e-04", + "loss": 0.9484, + "slid_loss": 0.8084, + "step": 2940, + "time": 14.16 + }, + { + "epoch": 1.47, + "learning_rate": "1.8891e-04", + "loss": 0.9583, + "slid_loss": 0.8091, + "step": 2941, + "time": 13.13 + }, + { + "epoch": 1.47, + "learning_rate": "1.8890e-04", + "loss": 0.7193, + "slid_loss": 0.807, + "step": 2942, + "time": 10.79 + }, + { + "epoch": 1.47, + "learning_rate": "1.8889e-04", + "loss": 0.9179, + "slid_loss": 0.8091, + "step": 2943, + "time": 13.9 + }, + { + "epoch": 1.47, + "learning_rate": "1.8888e-04", + "loss": 1.0234, + "slid_loss": 0.813, + "step": 2944, + "time": 11.49 + }, + { + "epoch": 1.47, + "learning_rate": "1.8887e-04", + "loss": 0.9776, + "slid_loss": 0.8159, + "step": 2945, + "time": 12.75 + }, + { + "epoch": 1.48, + "learning_rate": "1.8886e-04", + "loss": 0.9972, + "slid_loss": 0.8175, + "step": 2946, + "time": 14.11 + }, + { + "epoch": 1.48, + "learning_rate": "1.8885e-04", + "loss": 0.848, + "slid_loss": 0.8183, + "step": 2947, + "time": 13.13 + }, + { + "epoch": 1.48, + "learning_rate": "1.8883e-04", + "loss": 0.8242, + "slid_loss": 0.8172, + "step": 2948, + "time": 12.76 + }, + { + "epoch": 1.48, + "learning_rate": "1.8882e-04", + "loss": 0.8331, + "slid_loss": 0.8182, + "step": 2949, + "time": 11.73 + }, + { + "epoch": 1.48, + "learning_rate": "1.8881e-04", + "loss": 0.8185, + "slid_loss": 0.8191, + "step": 2950, + "time": 11.34 + }, + { + "epoch": 1.48, + "learning_rate": "1.8880e-04", + "loss": 0.91, + "slid_loss": 0.8214, + "step": 2951, + "time": 11.81 + }, + { + "epoch": 1.48, + "learning_rate": "1.8879e-04", + "loss": 0.8113, + "slid_loss": 0.821, + "step": 2952, + "time": 11.86 + }, + { + "epoch": 1.48, + "learning_rate": "1.8878e-04", + "loss": 0.7584, + "slid_loss": 0.8193, + "step": 2953, + "time": 13.57 + }, + { + "epoch": 1.48, + "learning_rate": "1.8877e-04", + "loss": 0.8201, + "slid_loss": 0.8207, + "step": 2954, + "time": 11.25 + }, + { + "epoch": 1.48, + "learning_rate": "1.8876e-04", + "loss": 0.8075, + "slid_loss": 0.8224, + "step": 2955, + "time": 10.76 + }, + { + "epoch": 1.48, + "learning_rate": "1.8875e-04", + "loss": 0.7505, + "slid_loss": 0.8232, + "step": 2956, + "time": 10.8 + }, + { + "epoch": 1.48, + "learning_rate": "1.8874e-04", + "loss": 0.7997, + "slid_loss": 0.8209, + "step": 2957, + "time": 13.5 + }, + { + "epoch": 1.48, + "learning_rate": "1.8872e-04", + "loss": 0.7192, + "slid_loss": 0.817, + "step": 2958, + "time": 14.04 + }, + { + "epoch": 1.48, + "learning_rate": "1.8871e-04", + "loss": 0.7911, + "slid_loss": 0.8162, + "step": 2959, + "time": 13.59 + }, + { + "epoch": 1.48, + "learning_rate": "1.8870e-04", + "loss": 0.7489, + "slid_loss": 0.814, + "step": 2960, + "time": 13.91 + }, + { + "epoch": 1.48, + "learning_rate": "1.8869e-04", + "loss": 0.8123, + "slid_loss": 0.8142, + "step": 2961, + "time": 12.79 + }, + { + "epoch": 1.48, + "learning_rate": "1.8868e-04", + "loss": 0.7546, + "slid_loss": 0.8136, + "step": 2962, + "time": 12.54 + }, + { + "epoch": 1.48, + "learning_rate": "1.8867e-04", + "loss": 0.8926, + "slid_loss": 0.8133, + "step": 2963, + "time": 10.65 + }, + { + "epoch": 1.48, + "learning_rate": "1.8866e-04", + "loss": 0.7928, + "slid_loss": 0.8122, + "step": 2964, + "time": 13.35 + }, + { + "epoch": 1.48, + "learning_rate": "1.8865e-04", + "loss": 0.7502, + "slid_loss": 0.8117, + "step": 2965, + "time": 13.17 + }, + { + "epoch": 1.49, + "learning_rate": "1.8864e-04", + "loss": 0.7866, + "slid_loss": 0.8112, + "step": 2966, + "time": 11.5 + }, + { + "epoch": 1.49, + "learning_rate": "1.8862e-04", + "loss": 0.8647, + "slid_loss": 0.813, + "step": 2967, + "time": 12.99 + }, + { + "epoch": 1.49, + "learning_rate": "1.8861e-04", + "loss": 1.0026, + "slid_loss": 0.8134, + "step": 2968, + "time": 11.48 + }, + { + "epoch": 1.49, + "learning_rate": "1.8860e-04", + "loss": 0.7678, + "slid_loss": 0.8123, + "step": 2969, + "time": 13.23 + }, + { + "epoch": 1.49, + "learning_rate": "1.8859e-04", + "loss": 0.9241, + "slid_loss": 0.814, + "step": 2970, + "time": 12.77 + }, + { + "epoch": 1.49, + "learning_rate": "1.8858e-04", + "loss": 0.847, + "slid_loss": 0.8132, + "step": 2971, + "time": 11.36 + }, + { + "epoch": 1.49, + "learning_rate": "1.8857e-04", + "loss": 0.726, + "slid_loss": 0.8116, + "step": 2972, + "time": 11.43 + }, + { + "epoch": 1.49, + "learning_rate": "1.8856e-04", + "loss": 0.7699, + "slid_loss": 0.8111, + "step": 2973, + "time": 11.76 + }, + { + "epoch": 1.49, + "learning_rate": "1.8855e-04", + "loss": 0.9823, + "slid_loss": 0.8112, + "step": 2974, + "time": 13.02 + }, + { + "epoch": 1.49, + "learning_rate": "1.8854e-04", + "loss": 0.7328, + "slid_loss": 0.8098, + "step": 2975, + "time": 13.77 + }, + { + "epoch": 1.49, + "learning_rate": "1.8852e-04", + "loss": 0.6079, + "slid_loss": 0.8102, + "step": 2976, + "time": 13.43 + }, + { + "epoch": 1.49, + "learning_rate": "1.8851e-04", + "loss": 0.8852, + "slid_loss": 0.8116, + "step": 2977, + "time": 12.85 + }, + { + "epoch": 1.49, + "learning_rate": "1.8850e-04", + "loss": 0.838, + "slid_loss": 0.8105, + "step": 2978, + "time": 13.32 + }, + { + "epoch": 1.49, + "learning_rate": "1.8849e-04", + "loss": 0.9166, + "slid_loss": 0.813, + "step": 2979, + "time": 10.95 + }, + { + "epoch": 1.49, + "learning_rate": "1.8848e-04", + "loss": 0.6573, + "slid_loss": 0.8113, + "step": 2980, + "time": 12.89 + }, + { + "epoch": 1.49, + "learning_rate": "1.8847e-04", + "loss": 0.8279, + "slid_loss": 0.8105, + "step": 2981, + "time": 13.3 + }, + { + "epoch": 1.49, + "learning_rate": "1.8846e-04", + "loss": 0.7291, + "slid_loss": 0.8089, + "step": 2982, + "time": 11.29 + }, + { + "epoch": 1.49, + "learning_rate": "1.8845e-04", + "loss": 0.8309, + "slid_loss": 0.8081, + "step": 2983, + "time": 12.48 + }, + { + "epoch": 1.49, + "learning_rate": "1.8844e-04", + "loss": 0.7796, + "slid_loss": 0.8089, + "step": 2984, + "time": 11.99 + }, + { + "epoch": 1.49, + "learning_rate": "1.8842e-04", + "loss": 0.7351, + "slid_loss": 0.808, + "step": 2985, + "time": 13.2 + }, + { + "epoch": 1.5, + "learning_rate": "1.8841e-04", + "loss": 0.9028, + "slid_loss": 0.8098, + "step": 2986, + "time": 12.15 + }, + { + "epoch": 1.5, + "learning_rate": "1.8840e-04", + "loss": 0.6806, + "slid_loss": 0.8084, + "step": 2987, + "time": 13.38 + }, + { + "epoch": 1.5, + "learning_rate": "1.8839e-04", + "loss": 0.7475, + "slid_loss": 0.8074, + "step": 2988, + "time": 13.92 + }, + { + "epoch": 1.5, + "learning_rate": "1.8838e-04", + "loss": 0.8827, + "slid_loss": 0.8088, + "step": 2989, + "time": 13.48 + }, + { + "epoch": 1.5, + "learning_rate": "1.8837e-04", + "loss": 0.8141, + "slid_loss": 0.8102, + "step": 2990, + "time": 13.23 + }, + { + "epoch": 1.5, + "learning_rate": "1.8836e-04", + "loss": 0.861, + "slid_loss": 0.8097, + "step": 2991, + "time": 12.88 + }, + { + "epoch": 1.5, + "learning_rate": "1.8835e-04", + "loss": 0.803, + "slid_loss": 0.8099, + "step": 2992, + "time": 11.03 + }, + { + "epoch": 1.5, + "learning_rate": "1.8833e-04", + "loss": 0.7492, + "slid_loss": 0.8106, + "step": 2993, + "time": 13.42 + }, + { + "epoch": 1.5, + "learning_rate": "1.8832e-04", + "loss": 0.867, + "slid_loss": 0.8125, + "step": 2994, + "time": 13.36 + }, + { + "epoch": 1.5, + "learning_rate": "1.8831e-04", + "loss": 0.7432, + "slid_loss": 0.8128, + "step": 2995, + "time": 11.52 + }, + { + "epoch": 1.5, + "learning_rate": "1.8830e-04", + "loss": 0.7679, + "slid_loss": 0.8124, + "step": 2996, + "time": 12.35 + }, + { + "epoch": 1.5, + "learning_rate": "1.8829e-04", + "loss": 0.9355, + "slid_loss": 0.8129, + "step": 2997, + "time": 11.89 + }, + { + "epoch": 1.5, + "learning_rate": "1.8828e-04", + "loss": 0.9092, + "slid_loss": 0.8158, + "step": 2998, + "time": 13.16 + }, + { + "epoch": 1.5, + "learning_rate": "1.8827e-04", + "loss": 0.6423, + "slid_loss": 0.8164, + "step": 2999, + "time": 13.4 + }, + { + "epoch": 1.5, + "learning_rate": "1.8826e-04", + "loss": 0.8046, + "slid_loss": 0.8169, + "step": 3000, + "time": 10.63 + }, + { + "epoch": 1.5, + "learning_rate": "1.8824e-04", + "loss": 0.7232, + "slid_loss": 0.8146, + "step": 3001, + "time": 13.23 + }, + { + "epoch": 1.5, + "learning_rate": "1.8823e-04", + "loss": 0.7772, + "slid_loss": 0.8148, + "step": 3002, + "time": 12.75 + }, + { + "epoch": 1.5, + "learning_rate": "1.8822e-04", + "loss": 0.6843, + "slid_loss": 0.8145, + "step": 3003, + "time": 11.21 + }, + { + "epoch": 1.5, + "learning_rate": "1.8821e-04", + "loss": 0.7828, + "slid_loss": 0.8137, + "step": 3004, + "time": 14.53 + }, + { + "epoch": 1.5, + "learning_rate": "1.8820e-04", + "loss": 0.9975, + "slid_loss": 0.8151, + "step": 3005, + "time": 12.89 + }, + { + "epoch": 1.51, + "learning_rate": "1.8819e-04", + "loss": 0.7726, + "slid_loss": 0.8165, + "step": 3006, + "time": 11.05 + }, + { + "epoch": 1.51, + "learning_rate": "1.8818e-04", + "loss": 0.8494, + "slid_loss": 0.8171, + "step": 3007, + "time": 13.49 + }, + { + "epoch": 1.51, + "learning_rate": "1.8817e-04", + "loss": 0.8463, + "slid_loss": 0.8176, + "step": 3008, + "time": 13.19 + }, + { + "epoch": 1.51, + "learning_rate": "1.8815e-04", + "loss": 0.7862, + "slid_loss": 0.8164, + "step": 3009, + "time": 13.9 + }, + { + "epoch": 1.51, + "learning_rate": "1.8814e-04", + "loss": 0.8832, + "slid_loss": 0.8156, + "step": 3010, + "time": 11.8 + }, + { + "epoch": 1.51, + "learning_rate": "1.8813e-04", + "loss": 0.871, + "slid_loss": 0.8159, + "step": 3011, + "time": 13.27 + }, + { + "epoch": 1.51, + "learning_rate": "1.8812e-04", + "loss": 0.7713, + "slid_loss": 0.8153, + "step": 3012, + "time": 13.23 + }, + { + "epoch": 1.51, + "learning_rate": "1.8811e-04", + "loss": 0.7115, + "slid_loss": 0.8133, + "step": 3013, + "time": 13.59 + }, + { + "epoch": 1.51, + "learning_rate": "1.8810e-04", + "loss": 0.906, + "slid_loss": 0.8155, + "step": 3014, + "time": 13.0 + }, + { + "epoch": 1.51, + "learning_rate": "1.8809e-04", + "loss": 0.7676, + "slid_loss": 0.8166, + "step": 3015, + "time": 10.64 + }, + { + "epoch": 1.51, + "learning_rate": "1.8808e-04", + "loss": 0.728, + "slid_loss": 0.8175, + "step": 3016, + "time": 11.98 + }, + { + "epoch": 1.51, + "learning_rate": "1.8806e-04", + "loss": 0.8773, + "slid_loss": 0.8186, + "step": 3017, + "time": 13.8 + }, + { + "epoch": 1.51, + "learning_rate": "1.8805e-04", + "loss": 0.8994, + "slid_loss": 0.8192, + "step": 3018, + "time": 11.06 + }, + { + "epoch": 1.51, + "learning_rate": "1.8804e-04", + "loss": 0.6916, + "slid_loss": 0.8187, + "step": 3019, + "time": 12.12 + }, + { + "epoch": 1.51, + "learning_rate": "1.8803e-04", + "loss": 0.8044, + "slid_loss": 0.819, + "step": 3020, + "time": 13.54 + }, + { + "epoch": 1.51, + "learning_rate": "1.8802e-04", + "loss": 0.7133, + "slid_loss": 0.8195, + "step": 3021, + "time": 11.5 + }, + { + "epoch": 1.51, + "learning_rate": "1.8801e-04", + "loss": 0.6613, + "slid_loss": 0.8183, + "step": 3022, + "time": 13.38 + }, + { + "epoch": 1.51, + "learning_rate": "1.8800e-04", + "loss": 0.7845, + "slid_loss": 0.8178, + "step": 3023, + "time": 12.72 + }, + { + "epoch": 1.51, + "learning_rate": "1.8798e-04", + "loss": 0.7782, + "slid_loss": 0.8155, + "step": 3024, + "time": 12.95 + }, + { + "epoch": 1.51, + "learning_rate": "1.8797e-04", + "loss": 0.809, + "slid_loss": 0.8148, + "step": 3025, + "time": 10.81 + }, + { + "epoch": 1.52, + "learning_rate": "1.8796e-04", + "loss": 0.6881, + "slid_loss": 0.8141, + "step": 3026, + "time": 12.2 + }, + { + "epoch": 1.52, + "learning_rate": "1.8795e-04", + "loss": 0.7496, + "slid_loss": 0.8124, + "step": 3027, + "time": 12.33 + }, + { + "epoch": 1.52, + "learning_rate": "1.8794e-04", + "loss": 0.9554, + "slid_loss": 0.8139, + "step": 3028, + "time": 13.17 + }, + { + "epoch": 1.52, + "learning_rate": "1.8793e-04", + "loss": 0.7273, + "slid_loss": 0.8116, + "step": 3029, + "time": 11.05 + }, + { + "epoch": 1.52, + "learning_rate": "1.8792e-04", + "loss": 0.7752, + "slid_loss": 0.8107, + "step": 3030, + "time": 12.56 + }, + { + "epoch": 1.52, + "learning_rate": "1.8790e-04", + "loss": 0.7254, + "slid_loss": 0.8092, + "step": 3031, + "time": 11.78 + }, + { + "epoch": 1.52, + "learning_rate": "1.8789e-04", + "loss": 0.7009, + "slid_loss": 0.8077, + "step": 3032, + "time": 11.29 + }, + { + "epoch": 1.52, + "learning_rate": "1.8788e-04", + "loss": 0.8872, + "slid_loss": 0.8093, + "step": 3033, + "time": 13.2 + }, + { + "epoch": 1.52, + "learning_rate": "1.8787e-04", + "loss": 0.8567, + "slid_loss": 0.8109, + "step": 3034, + "time": 11.31 + }, + { + "epoch": 1.52, + "learning_rate": "1.8786e-04", + "loss": 0.8271, + "slid_loss": 0.8098, + "step": 3035, + "time": 13.43 + }, + { + "epoch": 1.52, + "learning_rate": "1.8785e-04", + "loss": 0.8551, + "slid_loss": 0.8113, + "step": 3036, + "time": 11.65 + }, + { + "epoch": 1.52, + "learning_rate": "1.8784e-04", + "loss": 0.72, + "slid_loss": 0.8098, + "step": 3037, + "time": 10.75 + }, + { + "epoch": 1.52, + "learning_rate": "1.8782e-04", + "loss": 0.861, + "slid_loss": 0.8102, + "step": 3038, + "time": 12.21 + }, + { + "epoch": 1.52, + "learning_rate": "1.8781e-04", + "loss": 0.7726, + "slid_loss": 0.8109, + "step": 3039, + "time": 12.85 + }, + { + "epoch": 1.52, + "learning_rate": "1.8780e-04", + "loss": 0.7793, + "slid_loss": 0.8092, + "step": 3040, + "time": 11.83 + }, + { + "epoch": 1.52, + "learning_rate": "1.8779e-04", + "loss": 0.8827, + "slid_loss": 0.8084, + "step": 3041, + "time": 12.8 + }, + { + "epoch": 1.52, + "learning_rate": "1.8778e-04", + "loss": 0.8283, + "slid_loss": 0.8095, + "step": 3042, + "time": 11.82 + }, + { + "epoch": 1.52, + "learning_rate": "1.8777e-04", + "loss": 0.8126, + "slid_loss": 0.8084, + "step": 3043, + "time": 14.08 + }, + { + "epoch": 1.52, + "learning_rate": "1.8776e-04", + "loss": 0.8486, + "slid_loss": 0.8067, + "step": 3044, + "time": 13.41 + }, + { + "epoch": 1.52, + "learning_rate": "1.8774e-04", + "loss": 0.8476, + "slid_loss": 0.8054, + "step": 3045, + "time": 11.97 + }, + { + "epoch": 1.53, + "learning_rate": "1.8773e-04", + "loss": 0.8996, + "slid_loss": 0.8044, + "step": 3046, + "time": 13.39 + }, + { + "epoch": 1.53, + "learning_rate": "1.8772e-04", + "loss": 0.7418, + "slid_loss": 0.8034, + "step": 3047, + "time": 13.82 + }, + { + "epoch": 1.53, + "learning_rate": "1.8771e-04", + "loss": 0.7342, + "slid_loss": 0.8025, + "step": 3048, + "time": 13.43 + }, + { + "epoch": 1.53, + "learning_rate": "1.8770e-04", + "loss": 0.8349, + "slid_loss": 0.8025, + "step": 3049, + "time": 13.26 + }, + { + "epoch": 1.53, + "learning_rate": "1.8769e-04", + "loss": 1.1113, + "slid_loss": 0.8054, + "step": 3050, + "time": 13.52 + }, + { + "epoch": 1.53, + "learning_rate": "1.8768e-04", + "loss": 0.8302, + "slid_loss": 0.8046, + "step": 3051, + "time": 11.84 + }, + { + "epoch": 1.53, + "learning_rate": "1.8766e-04", + "loss": 0.797, + "slid_loss": 0.8045, + "step": 3052, + "time": 11.35 + }, + { + "epoch": 1.53, + "learning_rate": "1.8765e-04", + "loss": 0.7305, + "slid_loss": 0.8042, + "step": 3053, + "time": 11.32 + }, + { + "epoch": 1.53, + "learning_rate": "1.8764e-04", + "loss": 0.8549, + "slid_loss": 0.8045, + "step": 3054, + "time": 13.53 + }, + { + "epoch": 1.53, + "learning_rate": "1.8763e-04", + "loss": 0.6989, + "slid_loss": 0.8035, + "step": 3055, + "time": 12.77 + }, + { + "epoch": 1.53, + "learning_rate": "1.8762e-04", + "loss": 0.8881, + "slid_loss": 0.8048, + "step": 3056, + "time": 12.58 + }, + { + "epoch": 1.53, + "learning_rate": "1.8761e-04", + "loss": 0.8736, + "slid_loss": 0.8056, + "step": 3057, + "time": 13.29 + }, + { + "epoch": 1.53, + "learning_rate": "1.8760e-04", + "loss": 0.739, + "slid_loss": 0.8058, + "step": 3058, + "time": 11.51 + }, + { + "epoch": 1.53, + "learning_rate": "1.8758e-04", + "loss": 0.7337, + "slid_loss": 0.8052, + "step": 3059, + "time": 11.72 + }, + { + "epoch": 1.53, + "learning_rate": "1.8757e-04", + "loss": 0.83, + "slid_loss": 0.806, + "step": 3060, + "time": 12.88 + }, + { + "epoch": 1.53, + "learning_rate": "1.8756e-04", + "loss": 0.8694, + "slid_loss": 0.8066, + "step": 3061, + "time": 12.98 + }, + { + "epoch": 1.53, + "learning_rate": "1.8755e-04", + "loss": 0.7044, + "slid_loss": 0.8061, + "step": 3062, + "time": 11.2 + }, + { + "epoch": 1.53, + "learning_rate": "1.8754e-04", + "loss": 0.8596, + "slid_loss": 0.8057, + "step": 3063, + "time": 12.23 + }, + { + "epoch": 1.53, + "learning_rate": "1.8753e-04", + "loss": 0.8187, + "slid_loss": 0.806, + "step": 3064, + "time": 13.46 + }, + { + "epoch": 1.53, + "learning_rate": "1.8751e-04", + "loss": 0.9094, + "slid_loss": 0.8076, + "step": 3065, + "time": 13.16 + }, + { + "epoch": 1.54, + "learning_rate": "1.8750e-04", + "loss": 0.8544, + "slid_loss": 0.8083, + "step": 3066, + "time": 14.0 + }, + { + "epoch": 1.54, + "learning_rate": "1.8749e-04", + "loss": 0.7104, + "slid_loss": 0.8067, + "step": 3067, + "time": 13.15 + }, + { + "epoch": 1.54, + "learning_rate": "1.8748e-04", + "loss": 0.9007, + "slid_loss": 0.8057, + "step": 3068, + "time": 10.96 + }, + { + "epoch": 1.54, + "learning_rate": "1.8747e-04", + "loss": 0.7119, + "slid_loss": 0.8051, + "step": 3069, + "time": 13.36 + }, + { + "epoch": 1.54, + "learning_rate": "1.8746e-04", + "loss": 0.822, + "slid_loss": 0.8041, + "step": 3070, + "time": 10.6 + }, + { + "epoch": 1.54, + "learning_rate": "1.8745e-04", + "loss": 0.8279, + "slid_loss": 0.8039, + "step": 3071, + "time": 12.19 + }, + { + "epoch": 1.54, + "learning_rate": "1.8743e-04", + "loss": 0.9077, + "slid_loss": 0.8058, + "step": 3072, + "time": 13.24 + }, + { + "epoch": 1.54, + "learning_rate": "1.8742e-04", + "loss": 0.7699, + "slid_loss": 0.8058, + "step": 3073, + "time": 13.37 + }, + { + "epoch": 1.54, + "learning_rate": "1.8741e-04", + "loss": 0.8309, + "slid_loss": 0.8042, + "step": 3074, + "time": 12.59 + }, + { + "epoch": 1.54, + "learning_rate": "1.8740e-04", + "loss": 0.7635, + "slid_loss": 0.8045, + "step": 3075, + "time": 13.26 + }, + { + "epoch": 1.54, + "learning_rate": "1.8739e-04", + "loss": 0.7344, + "slid_loss": 0.8058, + "step": 3076, + "time": 13.99 + }, + { + "epoch": 1.54, + "learning_rate": "1.8738e-04", + "loss": 0.9089, + "slid_loss": 0.8061, + "step": 3077, + "time": 13.34 + }, + { + "epoch": 1.54, + "learning_rate": "1.8736e-04", + "loss": 0.8576, + "slid_loss": 0.8062, + "step": 3078, + "time": 11.73 + }, + { + "epoch": 1.54, + "learning_rate": "1.8735e-04", + "loss": 0.9278, + "slid_loss": 0.8064, + "step": 3079, + "time": 14.22 + }, + { + "epoch": 1.54, + "learning_rate": "1.8734e-04", + "loss": 0.9228, + "slid_loss": 0.809, + "step": 3080, + "time": 10.56 + }, + { + "epoch": 1.54, + "learning_rate": "1.8733e-04", + "loss": 0.7716, + "slid_loss": 0.8085, + "step": 3081, + "time": 11.96 + }, + { + "epoch": 1.54, + "learning_rate": "1.8732e-04", + "loss": 0.7954, + "slid_loss": 0.8091, + "step": 3082, + "time": 13.54 + }, + { + "epoch": 1.54, + "learning_rate": "1.8731e-04", + "loss": 0.9177, + "slid_loss": 0.81, + "step": 3083, + "time": 13.95 + }, + { + "epoch": 1.54, + "learning_rate": "1.8729e-04", + "loss": 0.8893, + "slid_loss": 0.8111, + "step": 3084, + "time": 11.29 + }, + { + "epoch": 1.54, + "learning_rate": "1.8728e-04", + "loss": 0.8219, + "slid_loss": 0.8119, + "step": 3085, + "time": 12.95 + }, + { + "epoch": 1.55, + "learning_rate": "1.8727e-04", + "loss": 0.9379, + "slid_loss": 0.8123, + "step": 3086, + "time": 14.16 + }, + { + "epoch": 1.55, + "learning_rate": "1.8726e-04", + "loss": 0.7648, + "slid_loss": 0.8131, + "step": 3087, + "time": 13.4 + }, + { + "epoch": 1.55, + "learning_rate": "1.8725e-04", + "loss": 0.4967, + "slid_loss": 0.8106, + "step": 3088, + "time": 10.9 + }, + { + "epoch": 1.55, + "learning_rate": "1.8724e-04", + "loss": 0.856, + "slid_loss": 0.8104, + "step": 3089, + "time": 11.91 + }, + { + "epoch": 1.55, + "learning_rate": "1.8722e-04", + "loss": 0.8239, + "slid_loss": 0.8105, + "step": 3090, + "time": 11.8 + }, + { + "epoch": 1.55, + "learning_rate": "1.8721e-04", + "loss": 0.731, + "slid_loss": 0.8092, + "step": 3091, + "time": 10.89 + }, + { + "epoch": 1.55, + "learning_rate": "1.8720e-04", + "loss": 0.7223, + "slid_loss": 0.8084, + "step": 3092, + "time": 13.76 + }, + { + "epoch": 1.55, + "learning_rate": "1.8719e-04", + "loss": 0.7753, + "slid_loss": 0.8086, + "step": 3093, + "time": 13.27 + }, + { + "epoch": 1.55, + "learning_rate": "1.8718e-04", + "loss": 0.7889, + "slid_loss": 0.8078, + "step": 3094, + "time": 13.93 + }, + { + "epoch": 1.55, + "learning_rate": "1.8717e-04", + "loss": 0.7752, + "slid_loss": 0.8082, + "step": 3095, + "time": 11.14 + }, + { + "epoch": 1.55, + "learning_rate": "1.8715e-04", + "loss": 0.7224, + "slid_loss": 0.8077, + "step": 3096, + "time": 10.65 + }, + { + "epoch": 1.55, + "learning_rate": "1.8714e-04", + "loss": 0.8669, + "slid_loss": 0.807, + "step": 3097, + "time": 14.15 + }, + { + "epoch": 1.55, + "learning_rate": "1.8713e-04", + "loss": 0.6996, + "slid_loss": 0.8049, + "step": 3098, + "time": 11.94 + }, + { + "epoch": 1.55, + "learning_rate": "1.8712e-04", + "loss": 0.7615, + "slid_loss": 0.8061, + "step": 3099, + "time": 13.61 + }, + { + "epoch": 1.55, + "learning_rate": "1.8711e-04", + "loss": 0.7086, + "slid_loss": 0.8052, + "step": 3100, + "time": 11.56 + }, + { + "epoch": 1.55, + "learning_rate": "1.8710e-04", + "loss": 1.0548, + "slid_loss": 0.8085, + "step": 3101, + "time": 11.25 + }, + { + "epoch": 1.55, + "learning_rate": "1.8708e-04", + "loss": 0.9162, + "slid_loss": 0.8099, + "step": 3102, + "time": 13.15 + }, + { + "epoch": 1.55, + "learning_rate": "1.8707e-04", + "loss": 0.9069, + "slid_loss": 0.8121, + "step": 3103, + "time": 10.65 + }, + { + "epoch": 1.55, + "learning_rate": "1.8706e-04", + "loss": 0.9288, + "slid_loss": 0.8135, + "step": 3104, + "time": 11.74 + }, + { + "epoch": 1.55, + "learning_rate": "1.8705e-04", + "loss": 0.8496, + "slid_loss": 0.8121, + "step": 3105, + "time": 11.71 + }, + { + "epoch": 1.56, + "learning_rate": "1.8704e-04", + "loss": 0.7481, + "slid_loss": 0.8118, + "step": 3106, + "time": 12.74 + }, + { + "epoch": 1.56, + "learning_rate": "1.8703e-04", + "loss": 0.8988, + "slid_loss": 0.8123, + "step": 3107, + "time": 11.34 + }, + { + "epoch": 1.56, + "learning_rate": "1.8701e-04", + "loss": 0.7741, + "slid_loss": 0.8116, + "step": 3108, + "time": 13.37 + }, + { + "epoch": 1.56, + "learning_rate": "1.8700e-04", + "loss": 0.8298, + "slid_loss": 0.812, + "step": 3109, + "time": 13.19 + }, + { + "epoch": 1.56, + "learning_rate": "1.8699e-04", + "loss": 0.9996, + "slid_loss": 0.8132, + "step": 3110, + "time": 11.25 + }, + { + "epoch": 1.56, + "learning_rate": "1.8698e-04", + "loss": 0.8557, + "slid_loss": 0.813, + "step": 3111, + "time": 12.95 + }, + { + "epoch": 1.56, + "learning_rate": "1.8697e-04", + "loss": 0.861, + "slid_loss": 0.8139, + "step": 3112, + "time": 13.18 + }, + { + "epoch": 1.56, + "learning_rate": "1.8695e-04", + "loss": 0.747, + "slid_loss": 0.8143, + "step": 3113, + "time": 12.83 + }, + { + "epoch": 1.56, + "learning_rate": "1.8694e-04", + "loss": 0.8901, + "slid_loss": 0.8141, + "step": 3114, + "time": 11.02 + }, + { + "epoch": 1.56, + "learning_rate": "1.8693e-04", + "loss": 0.7484, + "slid_loss": 0.8139, + "step": 3115, + "time": 11.63 + }, + { + "epoch": 1.56, + "learning_rate": "1.8692e-04", + "loss": 0.69, + "slid_loss": 0.8136, + "step": 3116, + "time": 11.68 + }, + { + "epoch": 1.56, + "learning_rate": "1.8691e-04", + "loss": 0.6444, + "slid_loss": 0.8112, + "step": 3117, + "time": 13.52 + }, + { + "epoch": 1.56, + "learning_rate": "1.8690e-04", + "loss": 0.7266, + "slid_loss": 0.8095, + "step": 3118, + "time": 14.05 + }, + { + "epoch": 1.56, + "learning_rate": "1.8688e-04", + "loss": 0.7586, + "slid_loss": 0.8102, + "step": 3119, + "time": 13.34 + }, + { + "epoch": 1.56, + "learning_rate": "1.8687e-04", + "loss": 0.8399, + "slid_loss": 0.8105, + "step": 3120, + "time": 10.92 + }, + { + "epoch": 1.56, + "learning_rate": "1.8686e-04", + "loss": 0.7509, + "slid_loss": 0.8109, + "step": 3121, + "time": 13.64 + }, + { + "epoch": 1.56, + "learning_rate": "1.8685e-04", + "loss": 0.6373, + "slid_loss": 0.8107, + "step": 3122, + "time": 12.47 + }, + { + "epoch": 1.56, + "learning_rate": "1.8684e-04", + "loss": 0.7337, + "slid_loss": 0.8102, + "step": 3123, + "time": 13.51 + }, + { + "epoch": 1.56, + "learning_rate": "1.8682e-04", + "loss": 0.8369, + "slid_loss": 0.8107, + "step": 3124, + "time": 12.94 + }, + { + "epoch": 1.56, + "learning_rate": "1.8681e-04", + "loss": 0.8358, + "slid_loss": 0.811, + "step": 3125, + "time": 11.36 + }, + { + "epoch": 1.57, + "learning_rate": "1.8680e-04", + "loss": 0.8574, + "slid_loss": 0.8127, + "step": 3126, + "time": 11.07 + }, + { + "epoch": 1.57, + "learning_rate": "1.8679e-04", + "loss": 0.6869, + "slid_loss": 0.8121, + "step": 3127, + "time": 14.34 + }, + { + "epoch": 1.57, + "learning_rate": "1.8678e-04", + "loss": 0.89, + "slid_loss": 0.8114, + "step": 3128, + "time": 13.48 + }, + { + "epoch": 1.57, + "learning_rate": "1.8677e-04", + "loss": 0.9729, + "slid_loss": 0.8139, + "step": 3129, + "time": 12.15 + }, + { + "epoch": 1.57, + "learning_rate": "1.8675e-04", + "loss": 0.6155, + "slid_loss": 0.8123, + "step": 3130, + "time": 10.87 + }, + { + "epoch": 1.57, + "learning_rate": "1.8674e-04", + "loss": 0.7544, + "slid_loss": 0.8126, + "step": 3131, + "time": 13.04 + }, + { + "epoch": 1.57, + "learning_rate": "1.8673e-04", + "loss": 0.8037, + "slid_loss": 0.8136, + "step": 3132, + "time": 13.81 + }, + { + "epoch": 1.57, + "learning_rate": "1.8672e-04", + "loss": 0.713, + "slid_loss": 0.8119, + "step": 3133, + "time": 12.35 + }, + { + "epoch": 1.57, + "learning_rate": "1.8671e-04", + "loss": 0.7921, + "slid_loss": 0.8112, + "step": 3134, + "time": 11.67 + }, + { + "epoch": 1.57, + "learning_rate": "1.8669e-04", + "loss": 0.7888, + "slid_loss": 0.8108, + "step": 3135, + "time": 11.8 + }, + { + "epoch": 1.57, + "learning_rate": "1.8668e-04", + "loss": 0.7785, + "slid_loss": 0.8101, + "step": 3136, + "time": 14.08 + }, + { + "epoch": 1.57, + "learning_rate": "1.8667e-04", + "loss": 0.8979, + "slid_loss": 0.8118, + "step": 3137, + "time": 13.5 + }, + { + "epoch": 1.57, + "learning_rate": "1.8666e-04", + "loss": 0.6153, + "slid_loss": 0.8094, + "step": 3138, + "time": 11.28 + }, + { + "epoch": 1.57, + "learning_rate": "1.8665e-04", + "loss": 0.9105, + "slid_loss": 0.8108, + "step": 3139, + "time": 12.2 + }, + { + "epoch": 1.57, + "learning_rate": "1.8664e-04", + "loss": 0.8165, + "slid_loss": 0.8111, + "step": 3140, + "time": 12.16 + }, + { + "epoch": 1.57, + "learning_rate": "1.8662e-04", + "loss": 0.7469, + "slid_loss": 0.8098, + "step": 3141, + "time": 13.5 + }, + { + "epoch": 1.57, + "learning_rate": "1.8661e-04", + "loss": 0.8949, + "slid_loss": 0.8104, + "step": 3142, + "time": 11.49 + }, + { + "epoch": 1.57, + "learning_rate": "1.8660e-04", + "loss": 0.9088, + "slid_loss": 0.8114, + "step": 3143, + "time": 12.35 + }, + { + "epoch": 1.57, + "learning_rate": "1.8659e-04", + "loss": 0.795, + "slid_loss": 0.8109, + "step": 3144, + "time": 12.15 + }, + { + "epoch": 1.57, + "learning_rate": "1.8658e-04", + "loss": 0.7649, + "slid_loss": 0.81, + "step": 3145, + "time": 10.42 + }, + { + "epoch": 1.58, + "learning_rate": "1.8656e-04", + "loss": 0.738, + "slid_loss": 0.8084, + "step": 3146, + "time": 13.36 + }, + { + "epoch": 1.58, + "learning_rate": "1.8655e-04", + "loss": 0.7944, + "slid_loss": 0.809, + "step": 3147, + "time": 10.78 + }, + { + "epoch": 1.58, + "learning_rate": "1.8654e-04", + "loss": 0.6516, + "slid_loss": 0.8081, + "step": 3148, + "time": 13.53 + }, + { + "epoch": 1.58, + "learning_rate": "1.8653e-04", + "loss": 0.6866, + "slid_loss": 0.8066, + "step": 3149, + "time": 11.79 + }, + { + "epoch": 1.58, + "learning_rate": "1.8652e-04", + "loss": 0.7758, + "slid_loss": 0.8033, + "step": 3150, + "time": 10.58 + }, + { + "epoch": 1.58, + "learning_rate": "1.8650e-04", + "loss": 0.8836, + "slid_loss": 0.8038, + "step": 3151, + "time": 13.12 + }, + { + "epoch": 1.58, + "learning_rate": "1.8649e-04", + "loss": 0.6555, + "slid_loss": 0.8024, + "step": 3152, + "time": 13.48 + }, + { + "epoch": 1.58, + "learning_rate": "1.8648e-04", + "loss": 0.7817, + "slid_loss": 0.8029, + "step": 3153, + "time": 12.06 + }, + { + "epoch": 1.58, + "learning_rate": "1.8647e-04", + "loss": 0.8377, + "slid_loss": 0.8027, + "step": 3154, + "time": 13.82 + }, + { + "epoch": 1.58, + "learning_rate": "1.8646e-04", + "loss": 0.8379, + "slid_loss": 0.8041, + "step": 3155, + "time": 13.6 + }, + { + "epoch": 1.58, + "learning_rate": "1.8644e-04", + "loss": 0.8899, + "slid_loss": 0.8042, + "step": 3156, + "time": 11.22 + }, + { + "epoch": 1.58, + "learning_rate": "1.8643e-04", + "loss": 0.9657, + "slid_loss": 0.8051, + "step": 3157, + "time": 12.88 + }, + { + "epoch": 1.58, + "learning_rate": "1.8642e-04", + "loss": 0.7999, + "slid_loss": 0.8057, + "step": 3158, + "time": 13.75 + }, + { + "epoch": 1.58, + "learning_rate": "1.8641e-04", + "loss": 0.9572, + "slid_loss": 0.8079, + "step": 3159, + "time": 11.63 + }, + { + "epoch": 1.58, + "learning_rate": "1.8640e-04", + "loss": 0.8558, + "slid_loss": 0.8082, + "step": 3160, + "time": 12.97 + }, + { + "epoch": 1.58, + "learning_rate": "1.8638e-04", + "loss": 0.6508, + "slid_loss": 0.806, + "step": 3161, + "time": 13.55 + }, + { + "epoch": 1.58, + "learning_rate": "1.8637e-04", + "loss": 0.878, + "slid_loss": 0.8077, + "step": 3162, + "time": 10.99 + }, + { + "epoch": 1.58, + "learning_rate": "1.8636e-04", + "loss": 0.8372, + "slid_loss": 0.8075, + "step": 3163, + "time": 13.32 + }, + { + "epoch": 1.58, + "learning_rate": "1.8635e-04", + "loss": 0.7428, + "slid_loss": 0.8067, + "step": 3164, + "time": 11.07 + }, + { + "epoch": 1.58, + "learning_rate": "1.8634e-04", + "loss": 0.8151, + "slid_loss": 0.8058, + "step": 3165, + "time": 10.9 + }, + { + "epoch": 1.59, + "learning_rate": "1.8632e-04", + "loss": 0.8282, + "slid_loss": 0.8055, + "step": 3166, + "time": 13.28 + }, + { + "epoch": 1.59, + "learning_rate": "1.8631e-04", + "loss": 0.77, + "slid_loss": 0.8061, + "step": 3167, + "time": 13.69 + }, + { + "epoch": 1.59, + "learning_rate": "1.8630e-04", + "loss": 0.9847, + "slid_loss": 0.807, + "step": 3168, + "time": 12.07 + }, + { + "epoch": 1.59, + "learning_rate": "1.8629e-04", + "loss": 0.6799, + "slid_loss": 0.8067, + "step": 3169, + "time": 11.42 + }, + { + "epoch": 1.59, + "learning_rate": "1.8628e-04", + "loss": 0.9172, + "slid_loss": 0.8076, + "step": 3170, + "time": 14.03 + }, + { + "epoch": 1.59, + "learning_rate": "1.8626e-04", + "loss": 0.7185, + "slid_loss": 0.8065, + "step": 3171, + "time": 11.53 + }, + { + "epoch": 1.59, + "learning_rate": "1.8625e-04", + "loss": 0.9044, + "slid_loss": 0.8065, + "step": 3172, + "time": 13.44 + }, + { + "epoch": 1.59, + "learning_rate": "1.8624e-04", + "loss": 0.9603, + "slid_loss": 0.8084, + "step": 3173, + "time": 12.28 + }, + { + "epoch": 1.59, + "learning_rate": "1.8623e-04", + "loss": 0.7049, + "slid_loss": 0.8071, + "step": 3174, + "time": 12.64 + }, + { + "epoch": 1.59, + "learning_rate": "1.8622e-04", + "loss": 0.6801, + "slid_loss": 0.8063, + "step": 3175, + "time": 12.72 + }, + { + "epoch": 1.59, + "learning_rate": "1.8620e-04", + "loss": 1.0663, + "slid_loss": 0.8096, + "step": 3176, + "time": 13.27 + }, + { + "epoch": 1.59, + "learning_rate": "1.8619e-04", + "loss": 0.9997, + "slid_loss": 0.8105, + "step": 3177, + "time": 14.04 + }, + { + "epoch": 1.59, + "learning_rate": "1.8618e-04", + "loss": 0.7766, + "slid_loss": 0.8097, + "step": 3178, + "time": 12.87 + }, + { + "epoch": 1.59, + "learning_rate": "1.8617e-04", + "loss": 0.7485, + "slid_loss": 0.8079, + "step": 3179, + "time": 13.19 + }, + { + "epoch": 1.59, + "learning_rate": "1.8616e-04", + "loss": 0.8881, + "slid_loss": 0.8076, + "step": 3180, + "time": 13.19 + }, + { + "epoch": 1.59, + "learning_rate": "1.8614e-04", + "loss": 0.9406, + "slid_loss": 0.8093, + "step": 3181, + "time": 13.67 + }, + { + "epoch": 1.59, + "learning_rate": "1.8613e-04", + "loss": 0.8706, + "slid_loss": 0.81, + "step": 3182, + "time": 11.08 + }, + { + "epoch": 1.59, + "learning_rate": "1.8612e-04", + "loss": 0.6905, + "slid_loss": 0.8077, + "step": 3183, + "time": 13.51 + }, + { + "epoch": 1.59, + "learning_rate": "1.8611e-04", + "loss": 0.8873, + "slid_loss": 0.8077, + "step": 3184, + "time": 12.68 + }, + { + "epoch": 1.59, + "learning_rate": "1.8610e-04", + "loss": 0.8616, + "slid_loss": 0.8081, + "step": 3185, + "time": 13.38 + }, + { + "epoch": 1.6, + "learning_rate": "1.8608e-04", + "loss": 0.9112, + "slid_loss": 0.8078, + "step": 3186, + "time": 10.73 + }, + { + "epoch": 1.6, + "learning_rate": "1.8607e-04", + "loss": 0.8314, + "slid_loss": 0.8085, + "step": 3187, + "time": 12.97 + }, + { + "epoch": 1.6, + "learning_rate": "1.8606e-04", + "loss": 1.0511, + "slid_loss": 0.8141, + "step": 3188, + "time": 11.89 + }, + { + "epoch": 1.6, + "learning_rate": "1.8605e-04", + "loss": 0.6049, + "slid_loss": 0.8115, + "step": 3189, + "time": 12.87 + }, + { + "epoch": 1.6, + "learning_rate": "1.8603e-04", + "loss": 0.7404, + "slid_loss": 0.8107, + "step": 3190, + "time": 11.46 + }, + { + "epoch": 1.6, + "learning_rate": "1.8602e-04", + "loss": 0.7807, + "slid_loss": 0.8112, + "step": 3191, + "time": 13.16 + }, + { + "epoch": 1.6, + "learning_rate": "1.8601e-04", + "loss": 0.7234, + "slid_loss": 0.8112, + "step": 3192, + "time": 13.3 + }, + { + "epoch": 1.6, + "learning_rate": "1.8600e-04", + "loss": 1.0015, + "slid_loss": 0.8135, + "step": 3193, + "time": 11.11 + }, + { + "epoch": 1.6, + "learning_rate": "1.8599e-04", + "loss": 0.7953, + "slid_loss": 0.8135, + "step": 3194, + "time": 13.79 + }, + { + "epoch": 1.6, + "learning_rate": "1.8597e-04", + "loss": 0.6553, + "slid_loss": 0.8123, + "step": 3195, + "time": 12.94 + }, + { + "epoch": 1.6, + "learning_rate": "1.8596e-04", + "loss": 0.7292, + "slid_loss": 0.8124, + "step": 3196, + "time": 13.39 + }, + { + "epoch": 1.6, + "learning_rate": "1.8595e-04", + "loss": 0.6892, + "slid_loss": 0.8106, + "step": 3197, + "time": 13.38 + }, + { + "epoch": 1.6, + "learning_rate": "1.8594e-04", + "loss": 0.8674, + "slid_loss": 0.8123, + "step": 3198, + "time": 11.53 + }, + { + "epoch": 1.6, + "learning_rate": "1.8593e-04", + "loss": 0.8386, + "slid_loss": 0.8131, + "step": 3199, + "time": 13.27 + }, + { + "epoch": 1.6, + "learning_rate": "1.8591e-04", + "loss": 0.9316, + "slid_loss": 0.8153, + "step": 3200, + "time": 12.91 + }, + { + "epoch": 1.6, + "learning_rate": "1.8590e-04", + "loss": 0.792, + "slid_loss": 0.8127, + "step": 3201, + "time": 12.98 + }, + { + "epoch": 1.6, + "learning_rate": "1.8589e-04", + "loss": 0.7805, + "slid_loss": 0.8113, + "step": 3202, + "time": 11.4 + }, + { + "epoch": 1.6, + "learning_rate": "1.8588e-04", + "loss": 0.8897, + "slid_loss": 0.8112, + "step": 3203, + "time": 13.73 + }, + { + "epoch": 1.6, + "learning_rate": "1.8586e-04", + "loss": 0.8129, + "slid_loss": 0.81, + "step": 3204, + "time": 12.57 + }, + { + "epoch": 1.6, + "learning_rate": "1.8585e-04", + "loss": 0.7913, + "slid_loss": 0.8094, + "step": 3205, + "time": 13.44 + }, + { + "epoch": 1.61, + "learning_rate": "1.8584e-04", + "loss": 0.8264, + "slid_loss": 0.8102, + "step": 3206, + "time": 12.6 + }, + { + "epoch": 1.61, + "learning_rate": "1.8583e-04", + "loss": 0.8018, + "slid_loss": 0.8092, + "step": 3207, + "time": 11.79 + }, + { + "epoch": 1.61, + "learning_rate": "1.8582e-04", + "loss": 0.6142, + "slid_loss": 0.8076, + "step": 3208, + "time": 10.93 + }, + { + "epoch": 1.61, + "learning_rate": "1.8580e-04", + "loss": 0.8286, + "slid_loss": 0.8076, + "step": 3209, + "time": 11.44 + }, + { + "epoch": 1.61, + "learning_rate": "1.8579e-04", + "loss": 0.7895, + "slid_loss": 0.8055, + "step": 3210, + "time": 10.98 + }, + { + "epoch": 1.61, + "learning_rate": "1.8578e-04", + "loss": 1.0039, + "slid_loss": 0.807, + "step": 3211, + "time": 13.59 + }, + { + "epoch": 1.61, + "learning_rate": "1.8577e-04", + "loss": 0.6814, + "slid_loss": 0.8052, + "step": 3212, + "time": 10.77 + }, + { + "epoch": 1.61, + "learning_rate": "1.8575e-04", + "loss": 0.6262, + "slid_loss": 0.804, + "step": 3213, + "time": 10.52 + }, + { + "epoch": 1.61, + "learning_rate": "1.8574e-04", + "loss": 0.7118, + "slid_loss": 0.8022, + "step": 3214, + "time": 11.31 + }, + { + "epoch": 1.61, + "learning_rate": "1.8573e-04", + "loss": 0.6575, + "slid_loss": 0.8013, + "step": 3215, + "time": 13.15 + }, + { + "epoch": 1.61, + "learning_rate": "1.8572e-04", + "loss": 0.814, + "slid_loss": 0.8025, + "step": 3216, + "time": 11.05 + }, + { + "epoch": 1.61, + "learning_rate": "1.8571e-04", + "loss": 0.7889, + "slid_loss": 0.804, + "step": 3217, + "time": 10.98 + }, + { + "epoch": 1.61, + "learning_rate": "1.8569e-04", + "loss": 0.7905, + "slid_loss": 0.8046, + "step": 3218, + "time": 13.5 + }, + { + "epoch": 1.61, + "learning_rate": "1.8568e-04", + "loss": 0.8488, + "slid_loss": 0.8055, + "step": 3219, + "time": 12.36 + }, + { + "epoch": 1.61, + "learning_rate": "1.8567e-04", + "loss": 0.7787, + "slid_loss": 0.8049, + "step": 3220, + "time": 13.02 + }, + { + "epoch": 1.61, + "learning_rate": "1.8566e-04", + "loss": 0.8178, + "slid_loss": 0.8056, + "step": 3221, + "time": 13.82 + }, + { + "epoch": 1.61, + "learning_rate": "1.8564e-04", + "loss": 0.8469, + "slid_loss": 0.8077, + "step": 3222, + "time": 11.31 + }, + { + "epoch": 1.61, + "learning_rate": "1.8563e-04", + "loss": 0.8814, + "slid_loss": 0.8092, + "step": 3223, + "time": 10.55 + }, + { + "epoch": 1.61, + "learning_rate": "1.8562e-04", + "loss": 0.6894, + "slid_loss": 0.8077, + "step": 3224, + "time": 13.25 + }, + { + "epoch": 1.61, + "learning_rate": "1.8561e-04", + "loss": 0.7356, + "slid_loss": 0.8067, + "step": 3225, + "time": 11.74 + }, + { + "epoch": 1.62, + "learning_rate": "1.8560e-04", + "loss": 0.8554, + "slid_loss": 0.8067, + "step": 3226, + "time": 13.48 + }, + { + "epoch": 1.62, + "learning_rate": "1.8558e-04", + "loss": 0.7582, + "slid_loss": 0.8074, + "step": 3227, + "time": 12.38 + }, + { + "epoch": 1.62, + "learning_rate": "1.8557e-04", + "loss": 0.7731, + "slid_loss": 0.8062, + "step": 3228, + "time": 13.85 + }, + { + "epoch": 1.62, + "learning_rate": "1.8556e-04", + "loss": 0.812, + "slid_loss": 0.8046, + "step": 3229, + "time": 13.49 + }, + { + "epoch": 1.62, + "learning_rate": "1.8555e-04", + "loss": 0.6187, + "slid_loss": 0.8046, + "step": 3230, + "time": 12.68 + }, + { + "epoch": 1.62, + "learning_rate": "1.8553e-04", + "loss": 0.7973, + "slid_loss": 0.8051, + "step": 3231, + "time": 13.09 + }, + { + "epoch": 1.62, + "learning_rate": "1.8552e-04", + "loss": 0.6218, + "slid_loss": 0.8032, + "step": 3232, + "time": 12.83 + }, + { + "epoch": 1.62, + "learning_rate": "1.8551e-04", + "loss": 0.6521, + "slid_loss": 0.8026, + "step": 3233, + "time": 12.83 + }, + { + "epoch": 1.62, + "learning_rate": "1.8550e-04", + "loss": 0.7393, + "slid_loss": 0.8021, + "step": 3234, + "time": 11.72 + }, + { + "epoch": 1.62, + "learning_rate": "1.8548e-04", + "loss": 0.7413, + "slid_loss": 0.8016, + "step": 3235, + "time": 12.74 + }, + { + "epoch": 1.62, + "learning_rate": "1.8547e-04", + "loss": 0.738, + "slid_loss": 0.8012, + "step": 3236, + "time": 11.26 + }, + { + "epoch": 1.62, + "learning_rate": "1.8546e-04", + "loss": 0.7555, + "slid_loss": 0.7998, + "step": 3237, + "time": 12.97 + }, + { + "epoch": 1.62, + "learning_rate": "1.8545e-04", + "loss": 0.865, + "slid_loss": 0.8023, + "step": 3238, + "time": 12.76 + }, + { + "epoch": 1.62, + "learning_rate": "1.8544e-04", + "loss": 0.9677, + "slid_loss": 0.8029, + "step": 3239, + "time": 13.54 + }, + { + "epoch": 1.62, + "learning_rate": "1.8542e-04", + "loss": 0.8096, + "slid_loss": 0.8028, + "step": 3240, + "time": 11.51 + }, + { + "epoch": 1.62, + "learning_rate": "1.8541e-04", + "loss": 0.8434, + "slid_loss": 0.8038, + "step": 3241, + "time": 10.4 + }, + { + "epoch": 1.62, + "learning_rate": "1.8540e-04", + "loss": 0.986, + "slid_loss": 0.8047, + "step": 3242, + "time": 11.35 + }, + { + "epoch": 1.62, + "learning_rate": "1.8539e-04", + "loss": 0.7859, + "slid_loss": 0.8034, + "step": 3243, + "time": 13.17 + }, + { + "epoch": 1.62, + "learning_rate": "1.8537e-04", + "loss": 0.7951, + "slid_loss": 0.8034, + "step": 3244, + "time": 12.82 + }, + { + "epoch": 1.62, + "learning_rate": "1.8536e-04", + "loss": 0.754, + "slid_loss": 0.8033, + "step": 3245, + "time": 11.29 + }, + { + "epoch": 1.63, + "learning_rate": "1.8535e-04", + "loss": 0.7867, + "slid_loss": 0.8038, + "step": 3246, + "time": 14.04 + }, + { + "epoch": 1.63, + "learning_rate": "1.8534e-04", + "loss": 0.8969, + "slid_loss": 0.8048, + "step": 3247, + "time": 13.39 + }, + { + "epoch": 1.63, + "learning_rate": "1.8532e-04", + "loss": 0.8935, + "slid_loss": 0.8073, + "step": 3248, + "time": 12.98 + }, + { + "epoch": 1.63, + "learning_rate": "1.8531e-04", + "loss": 0.966, + "slid_loss": 0.8101, + "step": 3249, + "time": 14.32 + }, + { + "epoch": 1.63, + "learning_rate": "1.8530e-04", + "loss": 0.7608, + "slid_loss": 0.8099, + "step": 3250, + "time": 13.95 + }, + { + "epoch": 1.63, + "learning_rate": "1.8529e-04", + "loss": 0.7242, + "slid_loss": 0.8083, + "step": 3251, + "time": 11.43 + }, + { + "epoch": 1.63, + "learning_rate": "1.8527e-04", + "loss": 0.7074, + "slid_loss": 0.8088, + "step": 3252, + "time": 13.72 + }, + { + "epoch": 1.63, + "learning_rate": "1.8526e-04", + "loss": 0.6972, + "slid_loss": 0.808, + "step": 3253, + "time": 11.06 + }, + { + "epoch": 1.63, + "learning_rate": "1.8525e-04", + "loss": 0.8575, + "slid_loss": 0.8082, + "step": 3254, + "time": 13.76 + }, + { + "epoch": 1.63, + "learning_rate": "1.8524e-04", + "loss": 0.6936, + "slid_loss": 0.8067, + "step": 3255, + "time": 11.47 + }, + { + "epoch": 1.63, + "learning_rate": "1.8523e-04", + "loss": 0.6616, + "slid_loss": 0.8045, + "step": 3256, + "time": 12.73 + }, + { + "epoch": 1.63, + "learning_rate": "1.8521e-04", + "loss": 0.77, + "slid_loss": 0.8025, + "step": 3257, + "time": 12.98 + }, + { + "epoch": 1.63, + "learning_rate": "1.8520e-04", + "loss": 0.8598, + "slid_loss": 0.8031, + "step": 3258, + "time": 11.76 + }, + { + "epoch": 1.63, + "learning_rate": "1.8519e-04", + "loss": 0.7851, + "slid_loss": 0.8014, + "step": 3259, + "time": 12.3 + }, + { + "epoch": 1.63, + "learning_rate": "1.8518e-04", + "loss": 0.7527, + "slid_loss": 0.8004, + "step": 3260, + "time": 11.88 + }, + { + "epoch": 1.63, + "learning_rate": "1.8516e-04", + "loss": 0.8382, + "slid_loss": 0.8022, + "step": 3261, + "time": 10.58 + }, + { + "epoch": 1.63, + "learning_rate": "1.8515e-04", + "loss": 0.7832, + "slid_loss": 0.8013, + "step": 3262, + "time": 13.3 + }, + { + "epoch": 1.63, + "learning_rate": "1.8514e-04", + "loss": 0.8904, + "slid_loss": 0.8018, + "step": 3263, + "time": 13.71 + }, + { + "epoch": 1.63, + "learning_rate": "1.8513e-04", + "loss": 0.7712, + "slid_loss": 0.8021, + "step": 3264, + "time": 13.07 + }, + { + "epoch": 1.63, + "learning_rate": "1.8511e-04", + "loss": 0.7105, + "slid_loss": 0.801, + "step": 3265, + "time": 13.33 + }, + { + "epoch": 1.64, + "learning_rate": "1.8510e-04", + "loss": 0.8006, + "slid_loss": 0.8008, + "step": 3266, + "time": 13.84 + }, + { + "epoch": 1.64, + "learning_rate": "1.8509e-04", + "loss": 0.7951, + "slid_loss": 0.801, + "step": 3267, + "time": 11.29 + }, + { + "epoch": 1.64, + "learning_rate": "1.8508e-04", + "loss": 0.8741, + "slid_loss": 0.7999, + "step": 3268, + "time": 11.77 + }, + { + "epoch": 1.64, + "learning_rate": "1.8506e-04", + "loss": 0.5677, + "slid_loss": 0.7988, + "step": 3269, + "time": 12.48 + }, + { + "epoch": 1.64, + "learning_rate": "1.8505e-04", + "loss": 0.9526, + "slid_loss": 0.7991, + "step": 3270, + "time": 13.57 + }, + { + "epoch": 1.64, + "learning_rate": "1.8504e-04", + "loss": 0.8266, + "slid_loss": 0.8002, + "step": 3271, + "time": 11.3 + }, + { + "epoch": 1.64, + "learning_rate": "1.8503e-04", + "loss": 0.7916, + "slid_loss": 0.7991, + "step": 3272, + "time": 13.07 + }, + { + "epoch": 1.64, + "learning_rate": "1.8501e-04", + "loss": 0.8102, + "slid_loss": 0.7976, + "step": 3273, + "time": 11.63 + }, + { + "epoch": 1.64, + "learning_rate": "1.8500e-04", + "loss": 0.736, + "slid_loss": 0.7979, + "step": 3274, + "time": 13.4 + }, + { + "epoch": 1.64, + "learning_rate": "1.8499e-04", + "loss": 0.6638, + "slid_loss": 0.7978, + "step": 3275, + "time": 13.77 + }, + { + "epoch": 1.64, + "learning_rate": "1.8498e-04", + "loss": 0.7363, + "slid_loss": 0.7945, + "step": 3276, + "time": 13.28 + }, + { + "epoch": 1.64, + "learning_rate": "1.8496e-04", + "loss": 0.7444, + "slid_loss": 0.7919, + "step": 3277, + "time": 13.95 + }, + { + "epoch": 1.64, + "learning_rate": "1.8495e-04", + "loss": 0.9708, + "slid_loss": 0.7938, + "step": 3278, + "time": 13.23 + }, + { + "epoch": 1.64, + "learning_rate": "1.8494e-04", + "loss": 0.8053, + "slid_loss": 0.7944, + "step": 3279, + "time": 13.95 + }, + { + "epoch": 1.64, + "learning_rate": "1.8493e-04", + "loss": 0.6467, + "slid_loss": 0.792, + "step": 3280, + "time": 12.78 + }, + { + "epoch": 1.64, + "learning_rate": "1.8491e-04", + "loss": 1.0229, + "slid_loss": 0.7928, + "step": 3281, + "time": 12.02 + }, + { + "epoch": 1.64, + "learning_rate": "1.8490e-04", + "loss": 0.8314, + "slid_loss": 0.7924, + "step": 3282, + "time": 13.62 + }, + { + "epoch": 1.64, + "learning_rate": "1.8489e-04", + "loss": 0.7915, + "slid_loss": 0.7934, + "step": 3283, + "time": 11.06 + }, + { + "epoch": 1.64, + "learning_rate": "1.8488e-04", + "loss": 0.8828, + "slid_loss": 0.7934, + "step": 3284, + "time": 12.71 + }, + { + "epoch": 1.64, + "learning_rate": "1.8486e-04", + "loss": 0.755, + "slid_loss": 0.7923, + "step": 3285, + "time": 12.82 + }, + { + "epoch": 1.65, + "learning_rate": "1.8485e-04", + "loss": 0.7736, + "slid_loss": 0.7909, + "step": 3286, + "time": 13.68 + }, + { + "epoch": 1.65, + "learning_rate": "1.8484e-04", + "loss": 0.7495, + "slid_loss": 0.7901, + "step": 3287, + "time": 13.74 + }, + { + "epoch": 1.65, + "learning_rate": "1.8483e-04", + "loss": 0.7145, + "slid_loss": 0.7868, + "step": 3288, + "time": 13.79 + }, + { + "epoch": 1.65, + "learning_rate": "1.8481e-04", + "loss": 0.8079, + "slid_loss": 0.7888, + "step": 3289, + "time": 11.71 + }, + { + "epoch": 1.65, + "learning_rate": "1.8480e-04", + "loss": 0.8276, + "slid_loss": 0.7897, + "step": 3290, + "time": 11.29 + }, + { + "epoch": 1.65, + "learning_rate": "1.8479e-04", + "loss": 0.7082, + "slid_loss": 0.7889, + "step": 3291, + "time": 13.19 + }, + { + "epoch": 1.65, + "learning_rate": "1.8478e-04", + "loss": 0.9438, + "slid_loss": 0.7911, + "step": 3292, + "time": 13.9 + }, + { + "epoch": 1.65, + "learning_rate": "1.8476e-04", + "loss": 0.9315, + "slid_loss": 0.7904, + "step": 3293, + "time": 13.68 + }, + { + "epoch": 1.65, + "learning_rate": "1.8475e-04", + "loss": 0.765, + "slid_loss": 0.7901, + "step": 3294, + "time": 13.43 + }, + { + "epoch": 1.65, + "learning_rate": "1.8474e-04", + "loss": 0.9673, + "slid_loss": 0.7933, + "step": 3295, + "time": 12.19 + }, + { + "epoch": 1.65, + "learning_rate": "1.8473e-04", + "loss": 0.7638, + "slid_loss": 0.7936, + "step": 3296, + "time": 12.7 + }, + { + "epoch": 1.65, + "learning_rate": "1.8471e-04", + "loss": 0.7738, + "slid_loss": 0.7945, + "step": 3297, + "time": 11.78 + }, + { + "epoch": 1.65, + "learning_rate": "1.8470e-04", + "loss": 0.8369, + "slid_loss": 0.7941, + "step": 3298, + "time": 12.97 + }, + { + "epoch": 1.65, + "learning_rate": "1.8469e-04", + "loss": 0.8828, + "slid_loss": 0.7946, + "step": 3299, + "time": 13.52 + }, + { + "epoch": 1.65, + "learning_rate": "1.8468e-04", + "loss": 0.8769, + "slid_loss": 0.794, + "step": 3300, + "time": 13.37 + }, + { + "epoch": 1.65, + "learning_rate": "1.8466e-04", + "loss": 0.7749, + "slid_loss": 0.7939, + "step": 3301, + "time": 12.8 + }, + { + "epoch": 1.65, + "learning_rate": "1.8465e-04", + "loss": 0.6064, + "slid_loss": 0.7921, + "step": 3302, + "time": 12.46 + }, + { + "epoch": 1.65, + "learning_rate": "1.8464e-04", + "loss": 0.8596, + "slid_loss": 0.7918, + "step": 3303, + "time": 11.19 + }, + { + "epoch": 1.65, + "learning_rate": "1.8462e-04", + "loss": 0.6414, + "slid_loss": 0.7901, + "step": 3304, + "time": 13.81 + }, + { + "epoch": 1.65, + "learning_rate": "1.8461e-04", + "loss": 0.789, + "slid_loss": 0.7901, + "step": 3305, + "time": 13.43 + }, + { + "epoch": 1.66, + "learning_rate": "1.8460e-04", + "loss": 0.7549, + "slid_loss": 0.7894, + "step": 3306, + "time": 13.6 + }, + { + "epoch": 1.66, + "learning_rate": "1.8459e-04", + "loss": 0.6716, + "slid_loss": 0.7881, + "step": 3307, + "time": 11.9 + }, + { + "epoch": 1.66, + "learning_rate": "1.8457e-04", + "loss": 0.6839, + "slid_loss": 0.7888, + "step": 3308, + "time": 12.79 + }, + { + "epoch": 1.66, + "learning_rate": "1.8456e-04", + "loss": 0.7243, + "slid_loss": 0.7877, + "step": 3309, + "time": 11.24 + }, + { + "epoch": 1.66, + "learning_rate": "1.8455e-04", + "loss": 0.9618, + "slid_loss": 0.7894, + "step": 3310, + "time": 13.28 + }, + { + "epoch": 1.66, + "learning_rate": "1.8454e-04", + "loss": 0.7605, + "slid_loss": 0.787, + "step": 3311, + "time": 12.96 + }, + { + "epoch": 1.66, + "learning_rate": "1.8452e-04", + "loss": 0.761, + "slid_loss": 0.7878, + "step": 3312, + "time": 13.23 + }, + { + "epoch": 1.66, + "learning_rate": "1.8451e-04", + "loss": 0.7593, + "slid_loss": 0.7891, + "step": 3313, + "time": 14.07 + }, + { + "epoch": 1.66, + "learning_rate": "1.8450e-04", + "loss": 0.8894, + "slid_loss": 0.7909, + "step": 3314, + "time": 14.0 + }, + { + "epoch": 1.66, + "learning_rate": "1.8449e-04", + "loss": 0.7409, + "slid_loss": 0.7918, + "step": 3315, + "time": 11.07 + }, + { + "epoch": 1.66, + "learning_rate": "1.8447e-04", + "loss": 0.6787, + "slid_loss": 0.7904, + "step": 3316, + "time": 13.25 + }, + { + "epoch": 1.66, + "learning_rate": "1.8446e-04", + "loss": 0.8328, + "slid_loss": 0.7908, + "step": 3317, + "time": 12.77 + }, + { + "epoch": 1.66, + "learning_rate": "1.8445e-04", + "loss": 0.7553, + "slid_loss": 0.7905, + "step": 3318, + "time": 13.71 + }, + { + "epoch": 1.66, + "learning_rate": "1.8444e-04", + "loss": 0.6945, + "slid_loss": 0.7889, + "step": 3319, + "time": 11.12 + }, + { + "epoch": 1.66, + "learning_rate": "1.8442e-04", + "loss": 0.5889, + "slid_loss": 0.787, + "step": 3320, + "time": 14.08 + }, + { + "epoch": 1.66, + "learning_rate": "1.8441e-04", + "loss": 0.7126, + "slid_loss": 0.786, + "step": 3321, + "time": 11.27 + }, + { + "epoch": 1.66, + "learning_rate": "1.8440e-04", + "loss": 0.8871, + "slid_loss": 0.7864, + "step": 3322, + "time": 11.07 + }, + { + "epoch": 1.66, + "learning_rate": "1.8438e-04", + "loss": 0.744, + "slid_loss": 0.785, + "step": 3323, + "time": 11.14 + }, + { + "epoch": 1.66, + "learning_rate": "1.8437e-04", + "loss": 0.7176, + "slid_loss": 0.7853, + "step": 3324, + "time": 13.83 + }, + { + "epoch": 1.66, + "learning_rate": "1.8436e-04", + "loss": 0.6258, + "slid_loss": 0.7842, + "step": 3325, + "time": 13.21 + }, + { + "epoch": 1.67, + "learning_rate": "1.8435e-04", + "loss": 0.8283, + "slid_loss": 0.7839, + "step": 3326, + "time": 11.97 + }, + { + "epoch": 1.67, + "learning_rate": "1.8433e-04", + "loss": 0.9153, + "slid_loss": 0.7855, + "step": 3327, + "time": 13.79 + }, + { + "epoch": 1.67, + "learning_rate": "1.8432e-04", + "loss": 0.7604, + "slid_loss": 0.7854, + "step": 3328, + "time": 13.43 + }, + { + "epoch": 1.67, + "learning_rate": "1.8431e-04", + "loss": 0.7418, + "slid_loss": 0.7847, + "step": 3329, + "time": 13.72 + }, + { + "epoch": 1.67, + "learning_rate": "1.8430e-04", + "loss": 0.9484, + "slid_loss": 0.788, + "step": 3330, + "time": 13.84 + }, + { + "epoch": 1.67, + "learning_rate": "1.8428e-04", + "loss": 0.6888, + "slid_loss": 0.7869, + "step": 3331, + "time": 13.17 + }, + { + "epoch": 1.67, + "learning_rate": "1.8427e-04", + "loss": 0.8823, + "slid_loss": 0.7895, + "step": 3332, + "time": 12.33 + }, + { + "epoch": 1.67, + "learning_rate": "1.8426e-04", + "loss": 0.7367, + "slid_loss": 0.7903, + "step": 3333, + "time": 12.86 + }, + { + "epoch": 1.67, + "learning_rate": "1.8424e-04", + "loss": 0.7604, + "slid_loss": 0.7906, + "step": 3334, + "time": 11.38 + }, + { + "epoch": 1.67, + "learning_rate": "1.8423e-04", + "loss": 0.7816, + "slid_loss": 0.791, + "step": 3335, + "time": 10.57 + }, + { + "epoch": 1.67, + "learning_rate": "1.8422e-04", + "loss": 0.8446, + "slid_loss": 0.792, + "step": 3336, + "time": 11.78 + }, + { + "epoch": 1.67, + "learning_rate": "1.8421e-04", + "loss": 0.6683, + "slid_loss": 0.7911, + "step": 3337, + "time": 12.54 + }, + { + "epoch": 1.67, + "learning_rate": "1.8419e-04", + "loss": 0.8052, + "slid_loss": 0.7906, + "step": 3338, + "time": 10.74 + }, + { + "epoch": 1.67, + "learning_rate": "1.8418e-04", + "loss": 0.8608, + "slid_loss": 0.7895, + "step": 3339, + "time": 13.92 + }, + { + "epoch": 1.67, + "learning_rate": "1.8417e-04", + "loss": 0.7199, + "slid_loss": 0.7886, + "step": 3340, + "time": 12.48 + }, + { + "epoch": 1.67, + "learning_rate": "1.8416e-04", + "loss": 0.5904, + "slid_loss": 0.7861, + "step": 3341, + "time": 12.09 + }, + { + "epoch": 1.67, + "learning_rate": "1.8414e-04", + "loss": 0.8702, + "slid_loss": 0.7849, + "step": 3342, + "time": 13.29 + }, + { + "epoch": 1.67, + "learning_rate": "1.8413e-04", + "loss": 0.8304, + "slid_loss": 0.7853, + "step": 3343, + "time": 13.28 + }, + { + "epoch": 1.67, + "learning_rate": "1.8412e-04", + "loss": 0.8243, + "slid_loss": 0.7856, + "step": 3344, + "time": 12.87 + }, + { + "epoch": 1.68, + "learning_rate": "1.8410e-04", + "loss": 0.9255, + "slid_loss": 0.7873, + "step": 3345, + "time": 12.69 + }, + { + "epoch": 1.68, + "learning_rate": "1.8409e-04", + "loss": 0.7634, + "slid_loss": 0.7871, + "step": 3346, + "time": 11.64 + }, + { + "epoch": 1.68, + "learning_rate": "1.8408e-04", + "loss": 1.0052, + "slid_loss": 0.7882, + "step": 3347, + "time": 11.7 + }, + { + "epoch": 1.68, + "learning_rate": "1.8407e-04", + "loss": 0.7852, + "slid_loss": 0.7871, + "step": 3348, + "time": 13.54 + }, + { + "epoch": 1.68, + "learning_rate": "1.8405e-04", + "loss": 0.7991, + "slid_loss": 0.7854, + "step": 3349, + "time": 10.74 + }, + { + "epoch": 1.68, + "learning_rate": "1.8404e-04", + "loss": 0.8598, + "slid_loss": 0.7864, + "step": 3350, + "time": 11.3 + }, + { + "epoch": 1.68, + "learning_rate": "1.8403e-04", + "loss": 0.6747, + "slid_loss": 0.7859, + "step": 3351, + "time": 11.34 + }, + { + "epoch": 1.68, + "learning_rate": "1.8401e-04", + "loss": 0.9223, + "slid_loss": 0.7881, + "step": 3352, + "time": 13.51 + }, + { + "epoch": 1.68, + "learning_rate": "1.8400e-04", + "loss": 0.8392, + "slid_loss": 0.7895, + "step": 3353, + "time": 13.03 + }, + { + "epoch": 1.68, + "learning_rate": "1.8399e-04", + "loss": 0.8587, + "slid_loss": 0.7895, + "step": 3354, + "time": 13.57 + }, + { + "epoch": 1.68, + "learning_rate": "1.8398e-04", + "loss": 0.6339, + "slid_loss": 0.7889, + "step": 3355, + "time": 11.29 + }, + { + "epoch": 1.68, + "learning_rate": "1.8396e-04", + "loss": 0.74, + "slid_loss": 0.7897, + "step": 3356, + "time": 12.66 + }, + { + "epoch": 1.68, + "learning_rate": "1.8395e-04", + "loss": 0.7249, + "slid_loss": 0.7893, + "step": 3357, + "time": 11.61 + }, + { + "epoch": 1.68, + "learning_rate": "1.8394e-04", + "loss": 0.7554, + "slid_loss": 0.7882, + "step": 3358, + "time": 11.23 + }, + { + "epoch": 1.68, + "learning_rate": "1.8392e-04", + "loss": 0.9874, + "slid_loss": 0.7902, + "step": 3359, + "time": 13.73 + }, + { + "epoch": 1.68, + "learning_rate": "1.8391e-04", + "loss": 0.6767, + "slid_loss": 0.7895, + "step": 3360, + "time": 10.59 + }, + { + "epoch": 1.68, + "learning_rate": "1.8390e-04", + "loss": 0.7988, + "slid_loss": 0.7891, + "step": 3361, + "time": 10.79 + }, + { + "epoch": 1.68, + "learning_rate": "1.8389e-04", + "loss": 0.6944, + "slid_loss": 0.7882, + "step": 3362, + "time": 13.02 + }, + { + "epoch": 1.68, + "learning_rate": "1.8387e-04", + "loss": 0.8188, + "slid_loss": 0.7875, + "step": 3363, + "time": 12.8 + }, + { + "epoch": 1.68, + "learning_rate": "1.8386e-04", + "loss": 0.8056, + "slid_loss": 0.7878, + "step": 3364, + "time": 13.11 + }, + { + "epoch": 1.69, + "learning_rate": "1.8385e-04", + "loss": 0.876, + "slid_loss": 0.7895, + "step": 3365, + "time": 13.48 + }, + { + "epoch": 1.69, + "learning_rate": "1.8383e-04", + "loss": 0.9484, + "slid_loss": 0.791, + "step": 3366, + "time": 13.49 + }, + { + "epoch": 1.69, + "learning_rate": "1.8382e-04", + "loss": 0.9037, + "slid_loss": 0.792, + "step": 3367, + "time": 11.9 + }, + { + "epoch": 1.69, + "learning_rate": "1.8381e-04", + "loss": 0.6677, + "slid_loss": 0.79, + "step": 3368, + "time": 12.41 + }, + { + "epoch": 1.69, + "learning_rate": "1.8380e-04", + "loss": 0.604, + "slid_loss": 0.7903, + "step": 3369, + "time": 11.66 + }, + { + "epoch": 1.69, + "learning_rate": "1.8378e-04", + "loss": 0.587, + "slid_loss": 0.7867, + "step": 3370, + "time": 13.13 + }, + { + "epoch": 1.69, + "learning_rate": "1.8377e-04", + "loss": 0.7562, + "slid_loss": 0.786, + "step": 3371, + "time": 13.22 + }, + { + "epoch": 1.69, + "learning_rate": "1.8376e-04", + "loss": 0.8065, + "slid_loss": 0.7861, + "step": 3372, + "time": 13.73 + }, + { + "epoch": 1.69, + "learning_rate": "1.8374e-04", + "loss": 0.9136, + "slid_loss": 0.7872, + "step": 3373, + "time": 13.69 + }, + { + "epoch": 1.69, + "learning_rate": "1.8373e-04", + "loss": 0.7441, + "slid_loss": 0.7872, + "step": 3374, + "time": 13.93 + }, + { + "epoch": 1.69, + "learning_rate": "1.8372e-04", + "loss": 0.7994, + "slid_loss": 0.7886, + "step": 3375, + "time": 13.09 + }, + { + "epoch": 1.69, + "learning_rate": "1.8371e-04", + "loss": 0.7408, + "slid_loss": 0.7886, + "step": 3376, + "time": 10.71 + }, + { + "epoch": 1.69, + "learning_rate": "1.8369e-04", + "loss": 0.8679, + "slid_loss": 0.7899, + "step": 3377, + "time": 12.16 + }, + { + "epoch": 1.69, + "learning_rate": "1.8368e-04", + "loss": 0.7711, + "slid_loss": 0.7879, + "step": 3378, + "time": 11.38 + }, + { + "epoch": 1.69, + "learning_rate": "1.8367e-04", + "loss": 0.6868, + "slid_loss": 0.7867, + "step": 3379, + "time": 12.84 + }, + { + "epoch": 1.69, + "learning_rate": "1.8365e-04", + "loss": 0.8232, + "slid_loss": 0.7885, + "step": 3380, + "time": 13.73 + }, + { + "epoch": 1.69, + "learning_rate": "1.8364e-04", + "loss": 0.8369, + "slid_loss": 0.7866, + "step": 3381, + "time": 10.62 + }, + { + "epoch": 1.69, + "learning_rate": "1.8363e-04", + "loss": 0.8051, + "slid_loss": 0.7863, + "step": 3382, + "time": 13.17 + }, + { + "epoch": 1.69, + "learning_rate": "1.8362e-04", + "loss": 0.7596, + "slid_loss": 0.786, + "step": 3383, + "time": 12.22 + }, + { + "epoch": 1.69, + "learning_rate": "1.8360e-04", + "loss": 0.7305, + "slid_loss": 0.7845, + "step": 3384, + "time": 12.86 + }, + { + "epoch": 1.7, + "learning_rate": "1.8359e-04", + "loss": 0.7027, + "slid_loss": 0.784, + "step": 3385, + "time": 10.85 + }, + { + "epoch": 1.7, + "learning_rate": "1.8358e-04", + "loss": 0.8198, + "slid_loss": 0.7844, + "step": 3386, + "time": 12.65 + }, + { + "epoch": 1.7, + "learning_rate": "1.8356e-04", + "loss": 0.8336, + "slid_loss": 0.7853, + "step": 3387, + "time": 14.23 + }, + { + "epoch": 1.7, + "learning_rate": "1.8355e-04", + "loss": 0.7409, + "slid_loss": 0.7855, + "step": 3388, + "time": 10.75 + }, + { + "epoch": 1.7, + "learning_rate": "1.8354e-04", + "loss": 0.7802, + "slid_loss": 0.7853, + "step": 3389, + "time": 11.98 + }, + { + "epoch": 1.7, + "learning_rate": "1.8352e-04", + "loss": 0.8666, + "slid_loss": 0.7857, + "step": 3390, + "time": 13.45 + }, + { + "epoch": 1.7, + "learning_rate": "1.8351e-04", + "loss": 0.9802, + "slid_loss": 0.7884, + "step": 3391, + "time": 12.23 + }, + { + "epoch": 1.7, + "learning_rate": "1.8350e-04", + "loss": 0.6554, + "slid_loss": 0.7855, + "step": 3392, + "time": 13.45 + }, + { + "epoch": 1.7, + "learning_rate": "1.8349e-04", + "loss": 0.7587, + "slid_loss": 0.7838, + "step": 3393, + "time": 12.86 + }, + { + "epoch": 1.7, + "learning_rate": "1.8347e-04", + "loss": 0.911, + "slid_loss": 0.7852, + "step": 3394, + "time": 11.51 + }, + { + "epoch": 1.7, + "learning_rate": "1.8346e-04", + "loss": 0.6021, + "slid_loss": 0.7816, + "step": 3395, + "time": 11.67 + }, + { + "epoch": 1.7, + "learning_rate": "1.8345e-04", + "loss": 0.6841, + "slid_loss": 0.7808, + "step": 3396, + "time": 13.94 + }, + { + "epoch": 1.7, + "learning_rate": "1.8343e-04", + "loss": 0.7142, + "slid_loss": 0.7802, + "step": 3397, + "time": 12.22 + }, + { + "epoch": 1.7, + "learning_rate": "1.8342e-04", + "loss": 0.7111, + "slid_loss": 0.7789, + "step": 3398, + "time": 15.55 + }, + { + "epoch": 1.7, + "learning_rate": "1.8341e-04", + "loss": 0.9529, + "slid_loss": 0.7796, + "step": 3399, + "time": 11.81 + }, + { + "epoch": 1.7, + "learning_rate": "1.8339e-04", + "loss": 0.6675, + "slid_loss": 0.7775, + "step": 3400, + "time": 13.32 + }, + { + "epoch": 1.7, + "learning_rate": "1.8338e-04", + "loss": 0.7507, + "slid_loss": 0.7773, + "step": 3401, + "time": 13.66 + }, + { + "epoch": 1.7, + "learning_rate": "1.8337e-04", + "loss": 0.6296, + "slid_loss": 0.7775, + "step": 3402, + "time": 11.01 + }, + { + "epoch": 1.7, + "learning_rate": "1.8336e-04", + "loss": 0.8274, + "slid_loss": 0.7772, + "step": 3403, + "time": 13.48 + }, + { + "epoch": 1.7, + "learning_rate": "1.8334e-04", + "loss": 0.8306, + "slid_loss": 0.7791, + "step": 3404, + "time": 12.98 + }, + { + "epoch": 1.71, + "learning_rate": "1.8333e-04", + "loss": 0.7828, + "slid_loss": 0.779, + "step": 3405, + "time": 12.98 + }, + { + "epoch": 1.71, + "learning_rate": "1.8332e-04", + "loss": 0.881, + "slid_loss": 0.7803, + "step": 3406, + "time": 12.98 + }, + { + "epoch": 1.71, + "learning_rate": "1.8330e-04", + "loss": 0.6984, + "slid_loss": 0.7806, + "step": 3407, + "time": 13.27 + }, + { + "epoch": 1.71, + "learning_rate": "1.8329e-04", + "loss": 0.8525, + "slid_loss": 0.7822, + "step": 3408, + "time": 11.47 + }, + { + "epoch": 1.71, + "learning_rate": "1.8328e-04", + "loss": 0.7047, + "slid_loss": 0.782, + "step": 3409, + "time": 11.92 + }, + { + "epoch": 1.71, + "learning_rate": "1.8326e-04", + "loss": 0.7681, + "slid_loss": 0.7801, + "step": 3410, + "time": 13.8 + }, + { + "epoch": 1.71, + "learning_rate": "1.8325e-04", + "loss": 0.8343, + "slid_loss": 0.7808, + "step": 3411, + "time": 11.62 + }, + { + "epoch": 1.71, + "learning_rate": "1.8324e-04", + "loss": 0.7293, + "slid_loss": 0.7805, + "step": 3412, + "time": 13.63 + }, + { + "epoch": 1.71, + "learning_rate": "1.8323e-04", + "loss": 0.9106, + "slid_loss": 0.782, + "step": 3413, + "time": 13.61 + }, + { + "epoch": 1.71, + "learning_rate": "1.8321e-04", + "loss": 0.8489, + "slid_loss": 0.7816, + "step": 3414, + "time": 13.43 + }, + { + "epoch": 1.71, + "learning_rate": "1.8320e-04", + "loss": 0.7486, + "slid_loss": 0.7817, + "step": 3415, + "time": 13.8 + }, + { + "epoch": 1.71, + "learning_rate": "1.8319e-04", + "loss": 0.7644, + "slid_loss": 0.7826, + "step": 3416, + "time": 11.06 + }, + { + "epoch": 1.71, + "learning_rate": "1.8317e-04", + "loss": 0.6992, + "slid_loss": 0.7812, + "step": 3417, + "time": 12.69 + }, + { + "epoch": 1.71, + "learning_rate": "1.8316e-04", + "loss": 0.9269, + "slid_loss": 0.783, + "step": 3418, + "time": 11.52 + }, + { + "epoch": 1.71, + "learning_rate": "1.8315e-04", + "loss": 0.8724, + "slid_loss": 0.7847, + "step": 3419, + "time": 14.23 + }, + { + "epoch": 1.71, + "learning_rate": "1.8313e-04", + "loss": 0.797, + "slid_loss": 0.7868, + "step": 3420, + "time": 11.65 + }, + { + "epoch": 1.71, + "learning_rate": "1.8312e-04", + "loss": 0.6724, + "slid_loss": 0.7864, + "step": 3421, + "time": 12.29 + }, + { + "epoch": 1.71, + "learning_rate": "1.8311e-04", + "loss": 0.5675, + "slid_loss": 0.7832, + "step": 3422, + "time": 13.04 + }, + { + "epoch": 1.71, + "learning_rate": "1.8309e-04", + "loss": 0.7522, + "slid_loss": 0.7833, + "step": 3423, + "time": 13.2 + }, + { + "epoch": 1.71, + "learning_rate": "1.8308e-04", + "loss": 0.7421, + "slid_loss": 0.7835, + "step": 3424, + "time": 11.5 + }, + { + "epoch": 1.72, + "learning_rate": "1.8307e-04", + "loss": 0.9228, + "slid_loss": 0.7865, + "step": 3425, + "time": 11.88 + }, + { + "epoch": 1.72, + "learning_rate": "1.8306e-04", + "loss": 0.6117, + "slid_loss": 0.7843, + "step": 3426, + "time": 11.27 + }, + { + "epoch": 1.72, + "learning_rate": "1.8304e-04", + "loss": 0.7206, + "slid_loss": 0.7824, + "step": 3427, + "time": 12.51 + }, + { + "epoch": 1.72, + "learning_rate": "1.8303e-04", + "loss": 0.6242, + "slid_loss": 0.781, + "step": 3428, + "time": 12.89 + }, + { + "epoch": 1.72, + "learning_rate": "1.8302e-04", + "loss": 0.8544, + "slid_loss": 0.7822, + "step": 3429, + "time": 10.97 + }, + { + "epoch": 1.72, + "learning_rate": "1.8300e-04", + "loss": 0.6435, + "slid_loss": 0.7791, + "step": 3430, + "time": 11.37 + }, + { + "epoch": 1.72, + "learning_rate": "1.8299e-04", + "loss": 0.7022, + "slid_loss": 0.7792, + "step": 3431, + "time": 10.45 + }, + { + "epoch": 1.72, + "learning_rate": "1.8298e-04", + "loss": 0.8508, + "slid_loss": 0.7789, + "step": 3432, + "time": 12.24 + }, + { + "epoch": 1.72, + "learning_rate": "1.8296e-04", + "loss": 0.8141, + "slid_loss": 0.7797, + "step": 3433, + "time": 13.77 + }, + { + "epoch": 1.72, + "learning_rate": "1.8295e-04", + "loss": 0.7428, + "slid_loss": 0.7795, + "step": 3434, + "time": 13.06 + }, + { + "epoch": 1.72, + "learning_rate": "1.8294e-04", + "loss": 0.8864, + "slid_loss": 0.7806, + "step": 3435, + "time": 11.77 + }, + { + "epoch": 1.72, + "learning_rate": "1.8292e-04", + "loss": 0.7307, + "slid_loss": 0.7794, + "step": 3436, + "time": 13.11 + }, + { + "epoch": 1.72, + "learning_rate": "1.8291e-04", + "loss": 0.7646, + "slid_loss": 0.7804, + "step": 3437, + "time": 12.24 + }, + { + "epoch": 1.72, + "learning_rate": "1.8290e-04", + "loss": 0.8692, + "slid_loss": 0.781, + "step": 3438, + "time": 13.8 + }, + { + "epoch": 1.72, + "learning_rate": "1.8288e-04", + "loss": 0.7875, + "slid_loss": 0.7803, + "step": 3439, + "time": 12.13 + }, + { + "epoch": 1.72, + "learning_rate": "1.8287e-04", + "loss": 0.7457, + "slid_loss": 0.7806, + "step": 3440, + "time": 10.79 + }, + { + "epoch": 1.72, + "learning_rate": "1.8286e-04", + "loss": 0.627, + "slid_loss": 0.7809, + "step": 3441, + "time": 13.33 + }, + { + "epoch": 1.72, + "learning_rate": "1.8285e-04", + "loss": 0.8873, + "slid_loss": 0.7811, + "step": 3442, + "time": 12.01 + }, + { + "epoch": 1.72, + "learning_rate": "1.8283e-04", + "loss": 0.5245, + "slid_loss": 0.778, + "step": 3443, + "time": 14.16 + }, + { + "epoch": 1.72, + "learning_rate": "1.8282e-04", + "loss": 0.6735, + "slid_loss": 0.7765, + "step": 3444, + "time": 10.83 + }, + { + "epoch": 1.73, + "learning_rate": "1.8281e-04", + "loss": 0.643, + "slid_loss": 0.7737, + "step": 3445, + "time": 13.52 + }, + { + "epoch": 1.73, + "learning_rate": "1.8279e-04", + "loss": 0.7381, + "slid_loss": 0.7735, + "step": 3446, + "time": 12.29 + }, + { + "epoch": 1.73, + "learning_rate": "1.8278e-04", + "loss": 0.7587, + "slid_loss": 0.771, + "step": 3447, + "time": 12.97 + }, + { + "epoch": 1.73, + "learning_rate": "1.8277e-04", + "loss": 0.7812, + "slid_loss": 0.771, + "step": 3448, + "time": 12.17 + }, + { + "epoch": 1.73, + "learning_rate": "1.8275e-04", + "loss": 0.7687, + "slid_loss": 0.7706, + "step": 3449, + "time": 13.22 + }, + { + "epoch": 1.73, + "learning_rate": "1.8274e-04", + "loss": 0.7779, + "slid_loss": 0.7698, + "step": 3450, + "time": 14.33 + }, + { + "epoch": 1.73, + "learning_rate": "1.8273e-04", + "loss": 0.7361, + "slid_loss": 0.7704, + "step": 3451, + "time": 13.33 + }, + { + "epoch": 1.73, + "learning_rate": "1.8271e-04", + "loss": 0.8121, + "slid_loss": 0.7693, + "step": 3452, + "time": 12.95 + }, + { + "epoch": 1.73, + "learning_rate": "1.8270e-04", + "loss": 0.6825, + "slid_loss": 0.7678, + "step": 3453, + "time": 13.8 + }, + { + "epoch": 1.73, + "learning_rate": "1.8269e-04", + "loss": 0.614, + "slid_loss": 0.7653, + "step": 3454, + "time": 13.36 + }, + { + "epoch": 1.73, + "learning_rate": "1.8267e-04", + "loss": 0.713, + "slid_loss": 0.7661, + "step": 3455, + "time": 11.39 + }, + { + "epoch": 1.73, + "learning_rate": "1.8266e-04", + "loss": 0.882, + "slid_loss": 0.7675, + "step": 3456, + "time": 12.81 + }, + { + "epoch": 1.73, + "learning_rate": "1.8265e-04", + "loss": 0.8446, + "slid_loss": 0.7687, + "step": 3457, + "time": 11.73 + }, + { + "epoch": 1.73, + "learning_rate": "1.8263e-04", + "loss": 0.5787, + "slid_loss": 0.767, + "step": 3458, + "time": 11.29 + }, + { + "epoch": 1.73, + "learning_rate": "1.8262e-04", + "loss": 0.8663, + "slid_loss": 0.7658, + "step": 3459, + "time": 12.1 + }, + { + "epoch": 1.73, + "learning_rate": "1.8261e-04", + "loss": 0.8118, + "slid_loss": 0.7671, + "step": 3460, + "time": 11.9 + }, + { + "epoch": 1.73, + "learning_rate": "1.8259e-04", + "loss": 0.6457, + "slid_loss": 0.7656, + "step": 3461, + "time": 13.4 + }, + { + "epoch": 1.73, + "learning_rate": "1.8258e-04", + "loss": 0.7236, + "slid_loss": 0.7659, + "step": 3462, + "time": 12.81 + }, + { + "epoch": 1.73, + "learning_rate": "1.8257e-04", + "loss": 0.8272, + "slid_loss": 0.766, + "step": 3463, + "time": 11.55 + }, + { + "epoch": 1.73, + "learning_rate": "1.8255e-04", + "loss": 0.9117, + "slid_loss": 0.767, + "step": 3464, + "time": 13.08 + }, + { + "epoch": 1.74, + "learning_rate": "1.8254e-04", + "loss": 0.818, + "slid_loss": 0.7664, + "step": 3465, + "time": 13.33 + }, + { + "epoch": 1.74, + "learning_rate": "1.8253e-04", + "loss": 0.7795, + "slid_loss": 0.7647, + "step": 3466, + "time": 12.75 + }, + { + "epoch": 1.74, + "learning_rate": "1.8251e-04", + "loss": 0.6873, + "slid_loss": 0.7626, + "step": 3467, + "time": 13.27 + }, + { + "epoch": 1.74, + "learning_rate": "1.8250e-04", + "loss": 0.6385, + "slid_loss": 0.7623, + "step": 3468, + "time": 12.08 + }, + { + "epoch": 1.74, + "learning_rate": "1.8249e-04", + "loss": 0.689, + "slid_loss": 0.7631, + "step": 3469, + "time": 11.15 + }, + { + "epoch": 1.74, + "learning_rate": "1.8247e-04", + "loss": 0.8646, + "slid_loss": 0.7659, + "step": 3470, + "time": 12.03 + }, + { + "epoch": 1.74, + "learning_rate": "1.8246e-04", + "loss": 0.7045, + "slid_loss": 0.7654, + "step": 3471, + "time": 14.18 + }, + { + "epoch": 1.74, + "learning_rate": "1.8245e-04", + "loss": 0.9828, + "slid_loss": 0.7672, + "step": 3472, + "time": 12.01 + }, + { + "epoch": 1.74, + "learning_rate": "1.8243e-04", + "loss": 0.8054, + "slid_loss": 0.7661, + "step": 3473, + "time": 13.63 + }, + { + "epoch": 1.74, + "learning_rate": "1.8242e-04", + "loss": 0.5035, + "slid_loss": 0.7637, + "step": 3474, + "time": 11.89 + }, + { + "epoch": 1.74, + "learning_rate": "1.8241e-04", + "loss": 0.7448, + "slid_loss": 0.7631, + "step": 3475, + "time": 11.5 + }, + { + "epoch": 1.74, + "learning_rate": "1.8239e-04", + "loss": 0.8022, + "slid_loss": 0.7637, + "step": 3476, + "time": 12.84 + }, + { + "epoch": 1.74, + "learning_rate": "1.8238e-04", + "loss": 0.8365, + "slid_loss": 0.7634, + "step": 3477, + "time": 12.11 + }, + { + "epoch": 1.74, + "learning_rate": "1.8237e-04", + "loss": 0.7807, + "slid_loss": 0.7635, + "step": 3478, + "time": 13.66 + }, + { + "epoch": 1.74, + "learning_rate": "1.8235e-04", + "loss": 0.8156, + "slid_loss": 0.7648, + "step": 3479, + "time": 11.91 + }, + { + "epoch": 1.74, + "learning_rate": "1.8234e-04", + "loss": 0.6533, + "slid_loss": 0.7631, + "step": 3480, + "time": 11.66 + }, + { + "epoch": 1.74, + "learning_rate": "1.8233e-04", + "loss": 1.0499, + "slid_loss": 0.7652, + "step": 3481, + "time": 13.0 + }, + { + "epoch": 1.74, + "learning_rate": "1.8231e-04", + "loss": 0.7848, + "slid_loss": 0.765, + "step": 3482, + "time": 14.14 + }, + { + "epoch": 1.74, + "learning_rate": "1.8230e-04", + "loss": 0.8567, + "slid_loss": 0.766, + "step": 3483, + "time": 12.58 + }, + { + "epoch": 1.74, + "learning_rate": "1.8229e-04", + "loss": 0.7943, + "slid_loss": 0.7667, + "step": 3484, + "time": 10.96 + }, + { + "epoch": 1.75, + "learning_rate": "1.8227e-04", + "loss": 0.5337, + "slid_loss": 0.765, + "step": 3485, + "time": 13.44 + }, + { + "epoch": 1.75, + "learning_rate": "1.8226e-04", + "loss": 0.8097, + "slid_loss": 0.7649, + "step": 3486, + "time": 11.64 + }, + { + "epoch": 1.75, + "learning_rate": "1.8225e-04", + "loss": 0.9635, + "slid_loss": 0.7662, + "step": 3487, + "time": 11.29 + }, + { + "epoch": 1.75, + "learning_rate": "1.8223e-04", + "loss": 0.7664, + "slid_loss": 0.7664, + "step": 3488, + "time": 14.09 + }, + { + "epoch": 1.75, + "learning_rate": "1.8222e-04", + "loss": 0.5437, + "slid_loss": 0.764, + "step": 3489, + "time": 12.93 + }, + { + "epoch": 1.75, + "learning_rate": "1.8221e-04", + "loss": 0.7204, + "slid_loss": 0.7626, + "step": 3490, + "time": 12.74 + }, + { + "epoch": 1.75, + "learning_rate": "1.8219e-04", + "loss": 0.9082, + "slid_loss": 0.7619, + "step": 3491, + "time": 13.29 + }, + { + "epoch": 1.75, + "learning_rate": "1.8218e-04", + "loss": 0.702, + "slid_loss": 0.7623, + "step": 3492, + "time": 11.24 + }, + { + "epoch": 1.75, + "learning_rate": "1.8217e-04", + "loss": 0.7343, + "slid_loss": 0.7621, + "step": 3493, + "time": 13.06 + }, + { + "epoch": 1.75, + "learning_rate": "1.8215e-04", + "loss": 0.8914, + "slid_loss": 0.7619, + "step": 3494, + "time": 12.53 + }, + { + "epoch": 1.75, + "learning_rate": "1.8214e-04", + "loss": 0.7679, + "slid_loss": 0.7635, + "step": 3495, + "time": 12.81 + }, + { + "epoch": 1.75, + "learning_rate": "1.8213e-04", + "loss": 0.619, + "slid_loss": 0.7629, + "step": 3496, + "time": 11.64 + }, + { + "epoch": 1.75, + "learning_rate": "1.8211e-04", + "loss": 0.6495, + "slid_loss": 0.7623, + "step": 3497, + "time": 14.03 + }, + { + "epoch": 1.75, + "learning_rate": "1.8210e-04", + "loss": 0.7537, + "slid_loss": 0.7627, + "step": 3498, + "time": 13.48 + }, + { + "epoch": 1.75, + "learning_rate": "1.8209e-04", + "loss": 0.7943, + "slid_loss": 0.7611, + "step": 3499, + "time": 12.81 + }, + { + "epoch": 1.75, + "learning_rate": "1.8207e-04", + "loss": 0.7702, + "slid_loss": 0.7621, + "step": 3500, + "time": 12.78 + }, + { + "epoch": 1.75, + "learning_rate": "1.8206e-04", + "loss": 0.686, + "slid_loss": 0.7615, + "step": 3501, + "time": 11.3 + }, + { + "epoch": 1.75, + "learning_rate": "1.8205e-04", + "loss": 0.8069, + "slid_loss": 0.7632, + "step": 3502, + "time": 13.8 + }, + { + "epoch": 1.75, + "learning_rate": "1.8203e-04", + "loss": 0.9913, + "slid_loss": 0.7649, + "step": 3503, + "time": 12.71 + }, + { + "epoch": 1.75, + "learning_rate": "1.8202e-04", + "loss": 0.7947, + "slid_loss": 0.7645, + "step": 3504, + "time": 13.22 + }, + { + "epoch": 1.76, + "learning_rate": "1.8201e-04", + "loss": 0.7829, + "slid_loss": 0.7645, + "step": 3505, + "time": 12.23 + }, + { + "epoch": 1.76, + "learning_rate": "1.8199e-04", + "loss": 0.8107, + "slid_loss": 0.7638, + "step": 3506, + "time": 13.32 + }, + { + "epoch": 1.76, + "learning_rate": "1.8198e-04", + "loss": 0.7626, + "slid_loss": 0.7645, + "step": 3507, + "time": 12.68 + }, + { + "epoch": 1.76, + "learning_rate": "1.8197e-04", + "loss": 0.8275, + "slid_loss": 0.7642, + "step": 3508, + "time": 12.11 + }, + { + "epoch": 1.76, + "learning_rate": "1.8195e-04", + "loss": 0.6284, + "slid_loss": 0.7635, + "step": 3509, + "time": 11.78 + }, + { + "epoch": 1.76, + "learning_rate": "1.8194e-04", + "loss": 0.8479, + "slid_loss": 0.7643, + "step": 3510, + "time": 10.93 + }, + { + "epoch": 1.76, + "learning_rate": "1.8193e-04", + "loss": 0.7499, + "slid_loss": 0.7634, + "step": 3511, + "time": 13.35 + }, + { + "epoch": 1.76, + "learning_rate": "1.8191e-04", + "loss": 0.771, + "slid_loss": 0.7638, + "step": 3512, + "time": 11.35 + }, + { + "epoch": 1.76, + "learning_rate": "1.8190e-04", + "loss": 0.764, + "slid_loss": 0.7624, + "step": 3513, + "time": 10.68 + }, + { + "epoch": 1.76, + "learning_rate": "1.8189e-04", + "loss": 0.659, + "slid_loss": 0.7605, + "step": 3514, + "time": 13.72 + }, + { + "epoch": 1.76, + "learning_rate": "1.8187e-04", + "loss": 0.8562, + "slid_loss": 0.7615, + "step": 3515, + "time": 13.33 + }, + { + "epoch": 1.76, + "learning_rate": "1.8186e-04", + "loss": 0.8186, + "slid_loss": 0.7621, + "step": 3516, + "time": 11.97 + }, + { + "epoch": 1.76, + "learning_rate": "1.8185e-04", + "loss": 0.7574, + "slid_loss": 0.7627, + "step": 3517, + "time": 13.15 + }, + { + "epoch": 1.76, + "learning_rate": "1.8183e-04", + "loss": 0.6846, + "slid_loss": 0.7602, + "step": 3518, + "time": 13.4 + }, + { + "epoch": 1.76, + "learning_rate": "1.8182e-04", + "loss": 0.5781, + "slid_loss": 0.7573, + "step": 3519, + "time": 10.71 + }, + { + "epoch": 1.76, + "learning_rate": "1.8180e-04", + "loss": 0.6038, + "slid_loss": 0.7554, + "step": 3520, + "time": 11.36 + }, + { + "epoch": 1.76, + "learning_rate": "1.8179e-04", + "loss": 0.8292, + "slid_loss": 0.7569, + "step": 3521, + "time": 13.25 + }, + { + "epoch": 1.76, + "learning_rate": "1.8178e-04", + "loss": 0.7533, + "slid_loss": 0.7588, + "step": 3522, + "time": 14.03 + }, + { + "epoch": 1.76, + "learning_rate": "1.8176e-04", + "loss": 0.6269, + "slid_loss": 0.7575, + "step": 3523, + "time": 13.82 + }, + { + "epoch": 1.76, + "learning_rate": "1.8175e-04", + "loss": 0.8075, + "slid_loss": 0.7582, + "step": 3524, + "time": 13.73 + }, + { + "epoch": 1.77, + "learning_rate": "1.8174e-04", + "loss": 0.7012, + "slid_loss": 0.756, + "step": 3525, + "time": 12.96 + }, + { + "epoch": 1.77, + "learning_rate": "1.8172e-04", + "loss": 0.6304, + "slid_loss": 0.7562, + "step": 3526, + "time": 11.64 + }, + { + "epoch": 1.77, + "learning_rate": "1.8171e-04", + "loss": 0.7751, + "slid_loss": 0.7567, + "step": 3527, + "time": 13.09 + }, + { + "epoch": 1.77, + "learning_rate": "1.8170e-04", + "loss": 0.7771, + "slid_loss": 0.7582, + "step": 3528, + "time": 13.67 + }, + { + "epoch": 1.77, + "learning_rate": "1.8168e-04", + "loss": 0.7926, + "slid_loss": 0.7576, + "step": 3529, + "time": 12.97 + }, + { + "epoch": 1.77, + "learning_rate": "1.8167e-04", + "loss": 0.5916, + "slid_loss": 0.7571, + "step": 3530, + "time": 12.58 + }, + { + "epoch": 1.77, + "learning_rate": "1.8166e-04", + "loss": 0.7794, + "slid_loss": 0.7579, + "step": 3531, + "time": 11.4 + }, + { + "epoch": 1.77, + "learning_rate": "1.8164e-04", + "loss": 0.7945, + "slid_loss": 0.7573, + "step": 3532, + "time": 12.83 + }, + { + "epoch": 1.77, + "learning_rate": "1.8163e-04", + "loss": 0.8142, + "slid_loss": 0.7573, + "step": 3533, + "time": 13.24 + }, + { + "epoch": 1.77, + "learning_rate": "1.8162e-04", + "loss": 0.7643, + "slid_loss": 0.7575, + "step": 3534, + "time": 13.69 + }, + { + "epoch": 1.77, + "learning_rate": "1.8160e-04", + "loss": 0.7568, + "slid_loss": 0.7562, + "step": 3535, + "time": 13.44 + }, + { + "epoch": 1.77, + "learning_rate": "1.8159e-04", + "loss": 0.5588, + "slid_loss": 0.7545, + "step": 3536, + "time": 10.99 + }, + { + "epoch": 1.77, + "learning_rate": "1.8158e-04", + "loss": 0.8319, + "slid_loss": 0.7552, + "step": 3537, + "time": 11.35 + }, + { + "epoch": 1.77, + "learning_rate": "1.8156e-04", + "loss": 0.8709, + "slid_loss": 0.7552, + "step": 3538, + "time": 13.95 + }, + { + "epoch": 1.77, + "learning_rate": "1.8155e-04", + "loss": 0.823, + "slid_loss": 0.7555, + "step": 3539, + "time": 11.69 + }, + { + "epoch": 1.77, + "learning_rate": "1.8153e-04", + "loss": 0.8586, + "slid_loss": 0.7567, + "step": 3540, + "time": 13.62 + }, + { + "epoch": 1.77, + "learning_rate": "1.8152e-04", + "loss": 0.8551, + "slid_loss": 0.759, + "step": 3541, + "time": 12.89 + }, + { + "epoch": 1.77, + "learning_rate": "1.8151e-04", + "loss": 0.5624, + "slid_loss": 0.7557, + "step": 3542, + "time": 10.74 + }, + { + "epoch": 1.77, + "learning_rate": "1.8149e-04", + "loss": 0.6266, + "slid_loss": 0.7567, + "step": 3543, + "time": 13.45 + }, + { + "epoch": 1.77, + "learning_rate": "1.8148e-04", + "loss": 0.7791, + "slid_loss": 0.7578, + "step": 3544, + "time": 13.33 + }, + { + "epoch": 1.78, + "learning_rate": "1.8147e-04", + "loss": 0.8933, + "slid_loss": 0.7603, + "step": 3545, + "time": 13.44 + }, + { + "epoch": 1.78, + "learning_rate": "1.8145e-04", + "loss": 0.647, + "slid_loss": 0.7594, + "step": 3546, + "time": 13.46 + }, + { + "epoch": 1.78, + "learning_rate": "1.8144e-04", + "loss": 0.8541, + "slid_loss": 0.7603, + "step": 3547, + "time": 11.47 + }, + { + "epoch": 1.78, + "learning_rate": "1.8143e-04", + "loss": 0.9766, + "slid_loss": 0.7623, + "step": 3548, + "time": 11.19 + }, + { + "epoch": 1.78, + "learning_rate": "1.8141e-04", + "loss": 0.559, + "slid_loss": 0.7602, + "step": 3549, + "time": 12.21 + }, + { + "epoch": 1.78, + "learning_rate": "1.8140e-04", + "loss": 0.7074, + "slid_loss": 0.7595, + "step": 3550, + "time": 13.81 + }, + { + "epoch": 1.78, + "learning_rate": "1.8138e-04", + "loss": 0.865, + "slid_loss": 0.7608, + "step": 3551, + "time": 11.26 + }, + { + "epoch": 1.78, + "learning_rate": "1.8137e-04", + "loss": 0.7409, + "slid_loss": 0.7601, + "step": 3552, + "time": 11.67 + }, + { + "epoch": 1.78, + "learning_rate": "1.8136e-04", + "loss": 0.58, + "slid_loss": 0.759, + "step": 3553, + "time": 11.83 + }, + { + "epoch": 1.78, + "learning_rate": "1.8134e-04", + "loss": 0.697, + "slid_loss": 0.7599, + "step": 3554, + "time": 14.36 + }, + { + "epoch": 1.78, + "learning_rate": "1.8133e-04", + "loss": 0.8172, + "slid_loss": 0.7609, + "step": 3555, + "time": 13.11 + }, + { + "epoch": 1.78, + "learning_rate": "1.8132e-04", + "loss": 0.6835, + "slid_loss": 0.7589, + "step": 3556, + "time": 13.8 + }, + { + "epoch": 1.78, + "learning_rate": "1.8130e-04", + "loss": 0.6646, + "slid_loss": 0.7571, + "step": 3557, + "time": 11.95 + }, + { + "epoch": 1.78, + "learning_rate": "1.8129e-04", + "loss": 0.8299, + "slid_loss": 0.7596, + "step": 3558, + "time": 11.38 + }, + { + "epoch": 1.78, + "learning_rate": "1.8128e-04", + "loss": 0.7388, + "slid_loss": 0.7584, + "step": 3559, + "time": 13.62 + }, + { + "epoch": 1.78, + "learning_rate": "1.8126e-04", + "loss": 0.8105, + "slid_loss": 0.7583, + "step": 3560, + "time": 11.81 + }, + { + "epoch": 1.78, + "learning_rate": "1.8125e-04", + "loss": 0.6944, + "slid_loss": 0.7588, + "step": 3561, + "time": 12.78 + }, + { + "epoch": 1.78, + "learning_rate": "1.8123e-04", + "loss": 0.6427, + "slid_loss": 0.758, + "step": 3562, + "time": 13.36 + }, + { + "epoch": 1.78, + "learning_rate": "1.8122e-04", + "loss": 0.8258, + "slid_loss": 0.758, + "step": 3563, + "time": 13.28 + }, + { + "epoch": 1.78, + "learning_rate": "1.8121e-04", + "loss": 0.6873, + "slid_loss": 0.7558, + "step": 3564, + "time": 10.95 + }, + { + "epoch": 1.79, + "learning_rate": "1.8119e-04", + "loss": 0.8559, + "slid_loss": 0.7561, + "step": 3565, + "time": 13.77 + }, + { + "epoch": 1.79, + "learning_rate": "1.8118e-04", + "loss": 0.7265, + "slid_loss": 0.7556, + "step": 3566, + "time": 11.7 + }, + { + "epoch": 1.79, + "learning_rate": "1.8117e-04", + "loss": 0.725, + "slid_loss": 0.756, + "step": 3567, + "time": 12.84 + }, + { + "epoch": 1.79, + "learning_rate": "1.8115e-04", + "loss": 0.8146, + "slid_loss": 0.7578, + "step": 3568, + "time": 12.53 + }, + { + "epoch": 1.79, + "learning_rate": "1.8114e-04", + "loss": 0.6215, + "slid_loss": 0.7571, + "step": 3569, + "time": 13.31 + }, + { + "epoch": 1.79, + "learning_rate": "1.8113e-04", + "loss": 0.6851, + "slid_loss": 0.7553, + "step": 3570, + "time": 13.36 + }, + { + "epoch": 1.79, + "learning_rate": "1.8111e-04", + "loss": 0.7409, + "slid_loss": 0.7556, + "step": 3571, + "time": 12.86 + }, + { + "epoch": 1.79, + "learning_rate": "1.8110e-04", + "loss": 0.7604, + "slid_loss": 0.7534, + "step": 3572, + "time": 11.05 + }, + { + "epoch": 1.79, + "learning_rate": "1.8108e-04", + "loss": 0.6883, + "slid_loss": 0.7523, + "step": 3573, + "time": 13.15 + }, + { + "epoch": 1.79, + "learning_rate": "1.8107e-04", + "loss": 0.6533, + "slid_loss": 0.7537, + "step": 3574, + "time": 12.99 + }, + { + "epoch": 1.79, + "learning_rate": "1.8106e-04", + "loss": 0.9344, + "slid_loss": 0.7556, + "step": 3575, + "time": 12.81 + }, + { + "epoch": 1.79, + "learning_rate": "1.8104e-04", + "loss": 0.9376, + "slid_loss": 0.757, + "step": 3576, + "time": 13.52 + }, + { + "epoch": 1.79, + "learning_rate": "1.8103e-04", + "loss": 0.891, + "slid_loss": 0.7575, + "step": 3577, + "time": 12.17 + }, + { + "epoch": 1.79, + "learning_rate": "1.8102e-04", + "loss": 0.6519, + "slid_loss": 0.7563, + "step": 3578, + "time": 12.72 + }, + { + "epoch": 1.79, + "learning_rate": "1.8100e-04", + "loss": 0.5977, + "slid_loss": 0.7541, + "step": 3579, + "time": 10.75 + }, + { + "epoch": 1.79, + "learning_rate": "1.8099e-04", + "loss": 0.9098, + "slid_loss": 0.7566, + "step": 3580, + "time": 12.21 + }, + { + "epoch": 1.79, + "learning_rate": "1.8097e-04", + "loss": 0.8237, + "slid_loss": 0.7544, + "step": 3581, + "time": 12.88 + }, + { + "epoch": 1.79, + "learning_rate": "1.8096e-04", + "loss": 0.6796, + "slid_loss": 0.7533, + "step": 3582, + "time": 11.68 + }, + { + "epoch": 1.79, + "learning_rate": "1.8095e-04", + "loss": 0.8522, + "slid_loss": 0.7533, + "step": 3583, + "time": 12.72 + }, + { + "epoch": 1.79, + "learning_rate": "1.8093e-04", + "loss": 0.7831, + "slid_loss": 0.7532, + "step": 3584, + "time": 13.7 + }, + { + "epoch": 1.8, + "learning_rate": "1.8092e-04", + "loss": 0.7121, + "slid_loss": 0.755, + "step": 3585, + "time": 13.13 + }, + { + "epoch": 1.8, + "learning_rate": "1.8091e-04", + "loss": 0.8902, + "slid_loss": 0.7558, + "step": 3586, + "time": 13.8 + }, + { + "epoch": 1.8, + "learning_rate": "1.8089e-04", + "loss": 0.7681, + "slid_loss": 0.7538, + "step": 3587, + "time": 11.77 + }, + { + "epoch": 1.8, + "learning_rate": "1.8088e-04", + "loss": 0.7457, + "slid_loss": 0.7536, + "step": 3588, + "time": 12.85 + }, + { + "epoch": 1.8, + "learning_rate": "1.8087e-04", + "loss": 0.644, + "slid_loss": 0.7546, + "step": 3589, + "time": 14.76 + }, + { + "epoch": 1.8, + "learning_rate": "1.8085e-04", + "loss": 0.6761, + "slid_loss": 0.7542, + "step": 3590, + "time": 13.3 + }, + { + "epoch": 1.8, + "learning_rate": "1.8084e-04", + "loss": 0.6663, + "slid_loss": 0.7517, + "step": 3591, + "time": 11.22 + }, + { + "epoch": 1.8, + "learning_rate": "1.8082e-04", + "loss": 0.905, + "slid_loss": 0.7538, + "step": 3592, + "time": 10.96 + }, + { + "epoch": 1.8, + "learning_rate": "1.8081e-04", + "loss": 0.7845, + "slid_loss": 0.7543, + "step": 3593, + "time": 13.43 + }, + { + "epoch": 1.8, + "learning_rate": "1.8080e-04", + "loss": 0.7249, + "slid_loss": 0.7526, + "step": 3594, + "time": 14.52 + }, + { + "epoch": 1.8, + "learning_rate": "1.8078e-04", + "loss": 0.836, + "slid_loss": 0.7533, + "step": 3595, + "time": 13.22 + }, + { + "epoch": 1.8, + "learning_rate": "1.8077e-04", + "loss": 0.6966, + "slid_loss": 0.7541, + "step": 3596, + "time": 11.31 + }, + { + "epoch": 1.8, + "learning_rate": "1.8075e-04", + "loss": 0.6832, + "slid_loss": 0.7544, + "step": 3597, + "time": 13.58 + }, + { + "epoch": 1.8, + "learning_rate": "1.8074e-04", + "loss": 0.8506, + "slid_loss": 0.7554, + "step": 3598, + "time": 10.85 + }, + { + "epoch": 1.8, + "learning_rate": "1.8073e-04", + "loss": 0.7049, + "slid_loss": 0.7545, + "step": 3599, + "time": 11.92 + }, + { + "epoch": 1.8, + "learning_rate": "1.8071e-04", + "loss": 0.8369, + "slid_loss": 0.7551, + "step": 3600, + "time": 11.61 + }, + { + "epoch": 1.8, + "learning_rate": "1.8070e-04", + "loss": 0.867, + "slid_loss": 0.757, + "step": 3601, + "time": 13.13 + }, + { + "epoch": 1.8, + "learning_rate": "1.8069e-04", + "loss": 0.8461, + "slid_loss": 0.7573, + "step": 3602, + "time": 13.62 + }, + { + "epoch": 1.8, + "learning_rate": "1.8067e-04", + "loss": 0.7232, + "slid_loss": 0.7547, + "step": 3603, + "time": 11.36 + }, + { + "epoch": 1.8, + "learning_rate": "1.8066e-04", + "loss": 0.707, + "slid_loss": 0.7538, + "step": 3604, + "time": 13.68 + }, + { + "epoch": 1.81, + "learning_rate": "1.8064e-04", + "loss": 0.9433, + "slid_loss": 0.7554, + "step": 3605, + "time": 13.49 + }, + { + "epoch": 1.81, + "learning_rate": "1.8063e-04", + "loss": 0.693, + "slid_loss": 0.7542, + "step": 3606, + "time": 13.07 + }, + { + "epoch": 1.81, + "learning_rate": "1.8062e-04", + "loss": 0.6362, + "slid_loss": 0.753, + "step": 3607, + "time": 13.37 + }, + { + "epoch": 1.81, + "learning_rate": "1.8060e-04", + "loss": 0.8302, + "slid_loss": 0.753, + "step": 3608, + "time": 12.81 + }, + { + "epoch": 1.81, + "learning_rate": "1.8059e-04", + "loss": 0.7245, + "slid_loss": 0.7539, + "step": 3609, + "time": 11.55 + }, + { + "epoch": 1.81, + "learning_rate": "1.8058e-04", + "loss": 0.8712, + "slid_loss": 0.7542, + "step": 3610, + "time": 13.43 + }, + { + "epoch": 1.81, + "learning_rate": "1.8056e-04", + "loss": 0.7277, + "slid_loss": 0.754, + "step": 3611, + "time": 10.74 + }, + { + "epoch": 1.81, + "learning_rate": "1.8055e-04", + "loss": 0.6735, + "slid_loss": 0.753, + "step": 3612, + "time": 11.51 + }, + { + "epoch": 1.81, + "learning_rate": "1.8053e-04", + "loss": 0.6918, + "slid_loss": 0.7523, + "step": 3613, + "time": 11.33 + }, + { + "epoch": 1.81, + "learning_rate": "1.8052e-04", + "loss": 0.9679, + "slid_loss": 0.7553, + "step": 3614, + "time": 13.51 + }, + { + "epoch": 1.81, + "learning_rate": "1.8051e-04", + "loss": 0.8233, + "slid_loss": 0.755, + "step": 3615, + "time": 13.33 + }, + { + "epoch": 1.81, + "learning_rate": "1.8049e-04", + "loss": 0.714, + "slid_loss": 0.754, + "step": 3616, + "time": 11.61 + }, + { + "epoch": 1.81, + "learning_rate": "1.8048e-04", + "loss": 0.6512, + "slid_loss": 0.7529, + "step": 3617, + "time": 11.45 + }, + { + "epoch": 1.81, + "learning_rate": "1.8046e-04", + "loss": 0.7045, + "slid_loss": 0.7531, + "step": 3618, + "time": 13.03 + }, + { + "epoch": 1.81, + "learning_rate": "1.8045e-04", + "loss": 0.7617, + "slid_loss": 0.7549, + "step": 3619, + "time": 13.76 + }, + { + "epoch": 1.81, + "learning_rate": "1.8044e-04", + "loss": 0.8826, + "slid_loss": 0.7577, + "step": 3620, + "time": 11.3 + }, + { + "epoch": 1.81, + "learning_rate": "1.8042e-04", + "loss": 0.6843, + "slid_loss": 0.7563, + "step": 3621, + "time": 11.93 + }, + { + "epoch": 1.81, + "learning_rate": "1.8041e-04", + "loss": 0.7601, + "slid_loss": 0.7564, + "step": 3622, + "time": 12.79 + }, + { + "epoch": 1.81, + "learning_rate": "1.8040e-04", + "loss": 0.8602, + "slid_loss": 0.7587, + "step": 3623, + "time": 14.24 + }, + { + "epoch": 1.81, + "learning_rate": "1.8038e-04", + "loss": 0.8158, + "slid_loss": 0.7588, + "step": 3624, + "time": 12.9 + }, + { + "epoch": 1.82, + "learning_rate": "1.8037e-04", + "loss": 0.7977, + "slid_loss": 0.7597, + "step": 3625, + "time": 11.99 + }, + { + "epoch": 1.82, + "learning_rate": "1.8035e-04", + "loss": 0.8319, + "slid_loss": 0.7617, + "step": 3626, + "time": 11.69 + }, + { + "epoch": 1.82, + "learning_rate": "1.8034e-04", + "loss": 0.7891, + "slid_loss": 0.7619, + "step": 3627, + "time": 12.08 + }, + { + "epoch": 1.82, + "learning_rate": "1.8033e-04", + "loss": 0.8587, + "slid_loss": 0.7627, + "step": 3628, + "time": 11.45 + }, + { + "epoch": 1.82, + "learning_rate": "1.8031e-04", + "loss": 0.7176, + "slid_loss": 0.762, + "step": 3629, + "time": 11.24 + }, + { + "epoch": 1.82, + "learning_rate": "1.8030e-04", + "loss": 0.8172, + "slid_loss": 0.7642, + "step": 3630, + "time": 13.31 + }, + { + "epoch": 1.82, + "learning_rate": "1.8028e-04", + "loss": 0.7162, + "slid_loss": 0.7636, + "step": 3631, + "time": 13.23 + }, + { + "epoch": 1.82, + "learning_rate": "1.8027e-04", + "loss": 0.761, + "slid_loss": 0.7632, + "step": 3632, + "time": 11.33 + }, + { + "epoch": 1.82, + "learning_rate": "1.8026e-04", + "loss": 0.8278, + "slid_loss": 0.7634, + "step": 3633, + "time": 12.21 + }, + { + "epoch": 1.82, + "learning_rate": "1.8024e-04", + "loss": 0.7194, + "slid_loss": 0.7629, + "step": 3634, + "time": 14.06 + }, + { + "epoch": 1.82, + "learning_rate": "1.8023e-04", + "loss": 0.815, + "slid_loss": 0.7635, + "step": 3635, + "time": 11.87 + }, + { + "epoch": 1.82, + "learning_rate": "1.8021e-04", + "loss": 0.7704, + "slid_loss": 0.7656, + "step": 3636, + "time": 11.24 + }, + { + "epoch": 1.82, + "learning_rate": "1.8020e-04", + "loss": 0.6804, + "slid_loss": 0.7641, + "step": 3637, + "time": 11.73 + }, + { + "epoch": 1.82, + "learning_rate": "1.8019e-04", + "loss": 0.7834, + "slid_loss": 0.7632, + "step": 3638, + "time": 13.65 + }, + { + "epoch": 1.82, + "learning_rate": "1.8017e-04", + "loss": 0.6376, + "slid_loss": 0.7614, + "step": 3639, + "time": 13.29 + }, + { + "epoch": 1.82, + "learning_rate": "1.8016e-04", + "loss": 0.7506, + "slid_loss": 0.7603, + "step": 3640, + "time": 11.31 + }, + { + "epoch": 1.82, + "learning_rate": "1.8014e-04", + "loss": 0.6186, + "slid_loss": 0.7579, + "step": 3641, + "time": 13.28 + }, + { + "epoch": 1.82, + "learning_rate": "1.8013e-04", + "loss": 0.866, + "slid_loss": 0.761, + "step": 3642, + "time": 13.2 + }, + { + "epoch": 1.82, + "learning_rate": "1.8012e-04", + "loss": 0.806, + "slid_loss": 0.7628, + "step": 3643, + "time": 11.13 + }, + { + "epoch": 1.82, + "learning_rate": "1.8010e-04", + "loss": 0.7284, + "slid_loss": 0.7623, + "step": 3644, + "time": 13.76 + }, + { + "epoch": 1.83, + "learning_rate": "1.8009e-04", + "loss": 0.6416, + "slid_loss": 0.7597, + "step": 3645, + "time": 11.21 + }, + { + "epoch": 1.83, + "learning_rate": "1.8008e-04", + "loss": 0.6965, + "slid_loss": 0.7602, + "step": 3646, + "time": 14.1 + }, + { + "epoch": 1.83, + "learning_rate": "1.8006e-04", + "loss": 0.7516, + "slid_loss": 0.7592, + "step": 3647, + "time": 12.68 + }, + { + "epoch": 1.83, + "learning_rate": "1.8005e-04", + "loss": 0.8806, + "slid_loss": 0.7583, + "step": 3648, + "time": 11.46 + }, + { + "epoch": 1.83, + "learning_rate": "1.8003e-04", + "loss": 0.9578, + "slid_loss": 0.7622, + "step": 3649, + "time": 10.61 + }, + { + "epoch": 1.83, + "learning_rate": "1.8002e-04", + "loss": 0.6591, + "slid_loss": 0.7618, + "step": 3650, + "time": 11.66 + }, + { + "epoch": 1.83, + "learning_rate": "1.8001e-04", + "loss": 0.7996, + "slid_loss": 0.7611, + "step": 3651, + "time": 10.79 + }, + { + "epoch": 1.83, + "learning_rate": "1.7999e-04", + "loss": 0.8432, + "slid_loss": 0.7621, + "step": 3652, + "time": 12.22 + }, + { + "epoch": 1.83, + "learning_rate": "1.7998e-04", + "loss": 0.7127, + "slid_loss": 0.7635, + "step": 3653, + "time": 13.26 + }, + { + "epoch": 1.83, + "learning_rate": "1.7996e-04", + "loss": 0.7225, + "slid_loss": 0.7637, + "step": 3654, + "time": 13.27 + }, + { + "epoch": 1.83, + "learning_rate": "1.7995e-04", + "loss": 0.777, + "slid_loss": 0.7633, + "step": 3655, + "time": 13.45 + }, + { + "epoch": 1.83, + "learning_rate": "1.7994e-04", + "loss": 0.7034, + "slid_loss": 0.7635, + "step": 3656, + "time": 11.14 + }, + { + "epoch": 1.83, + "learning_rate": "1.7992e-04", + "loss": 0.7826, + "slid_loss": 0.7647, + "step": 3657, + "time": 13.15 + }, + { + "epoch": 1.83, + "learning_rate": "1.7991e-04", + "loss": 0.7792, + "slid_loss": 0.7642, + "step": 3658, + "time": 12.23 + }, + { + "epoch": 1.83, + "learning_rate": "1.7989e-04", + "loss": 0.7221, + "slid_loss": 0.764, + "step": 3659, + "time": 13.63 + }, + { + "epoch": 1.83, + "learning_rate": "1.7988e-04", + "loss": 0.6644, + "slid_loss": 0.7626, + "step": 3660, + "time": 12.97 + }, + { + "epoch": 1.83, + "learning_rate": "1.7987e-04", + "loss": 0.8246, + "slid_loss": 0.7639, + "step": 3661, + "time": 11.41 + }, + { + "epoch": 1.83, + "learning_rate": "1.7985e-04", + "loss": 0.7259, + "slid_loss": 0.7647, + "step": 3662, + "time": 12.89 + }, + { + "epoch": 1.83, + "learning_rate": "1.7984e-04", + "loss": 0.6637, + "slid_loss": 0.7631, + "step": 3663, + "time": 11.37 + }, + { + "epoch": 1.83, + "learning_rate": "1.7982e-04", + "loss": 0.7555, + "slid_loss": 0.7637, + "step": 3664, + "time": 11.31 + }, + { + "epoch": 1.84, + "learning_rate": "1.7981e-04", + "loss": 0.7184, + "slid_loss": 0.7624, + "step": 3665, + "time": 12.03 + }, + { + "epoch": 1.84, + "learning_rate": "1.7980e-04", + "loss": 0.7781, + "slid_loss": 0.7629, + "step": 3666, + "time": 12.93 + }, + { + "epoch": 1.84, + "learning_rate": "1.7978e-04", + "loss": 0.7793, + "slid_loss": 0.7634, + "step": 3667, + "time": 13.22 + }, + { + "epoch": 1.84, + "learning_rate": "1.7977e-04", + "loss": 0.8553, + "slid_loss": 0.7638, + "step": 3668, + "time": 13.29 + }, + { + "epoch": 1.84, + "learning_rate": "1.7975e-04", + "loss": 0.6263, + "slid_loss": 0.7639, + "step": 3669, + "time": 13.38 + }, + { + "epoch": 1.84, + "learning_rate": "1.7974e-04", + "loss": 0.7273, + "slid_loss": 0.7643, + "step": 3670, + "time": 14.04 + }, + { + "epoch": 1.84, + "learning_rate": "1.7972e-04", + "loss": 0.7995, + "slid_loss": 0.7649, + "step": 3671, + "time": 13.79 + }, + { + "epoch": 1.84, + "learning_rate": "1.7971e-04", + "loss": 0.6622, + "slid_loss": 0.7639, + "step": 3672, + "time": 10.53 + }, + { + "epoch": 1.84, + "learning_rate": "1.7970e-04", + "loss": 0.6337, + "slid_loss": 0.7634, + "step": 3673, + "time": 10.65 + }, + { + "epoch": 1.84, + "learning_rate": "1.7968e-04", + "loss": 0.7644, + "slid_loss": 0.7645, + "step": 3674, + "time": 12.81 + }, + { + "epoch": 1.84, + "learning_rate": "1.7967e-04", + "loss": 0.8063, + "slid_loss": 0.7632, + "step": 3675, + "time": 12.99 + }, + { + "epoch": 1.84, + "learning_rate": "1.7965e-04", + "loss": 0.7645, + "slid_loss": 0.7615, + "step": 3676, + "time": 13.82 + }, + { + "epoch": 1.84, + "learning_rate": "1.7964e-04", + "loss": 0.637, + "slid_loss": 0.7589, + "step": 3677, + "time": 11.27 + }, + { + "epoch": 1.84, + "learning_rate": "1.7963e-04", + "loss": 0.7383, + "slid_loss": 0.7598, + "step": 3678, + "time": 12.04 + }, + { + "epoch": 1.84, + "learning_rate": "1.7961e-04", + "loss": 0.7099, + "slid_loss": 0.7609, + "step": 3679, + "time": 10.92 + }, + { + "epoch": 1.84, + "learning_rate": "1.7960e-04", + "loss": 0.9643, + "slid_loss": 0.7615, + "step": 3680, + "time": 13.33 + }, + { + "epoch": 1.84, + "learning_rate": "1.7958e-04", + "loss": 0.9759, + "slid_loss": 0.763, + "step": 3681, + "time": 13.35 + }, + { + "epoch": 1.84, + "learning_rate": "1.7957e-04", + "loss": 0.7575, + "slid_loss": 0.7638, + "step": 3682, + "time": 11.78 + }, + { + "epoch": 1.84, + "learning_rate": "1.7956e-04", + "loss": 0.6958, + "slid_loss": 0.7622, + "step": 3683, + "time": 13.04 + }, + { + "epoch": 1.84, + "learning_rate": "1.7954e-04", + "loss": 0.8242, + "slid_loss": 0.7626, + "step": 3684, + "time": 13.65 + }, + { + "epoch": 1.85, + "learning_rate": "1.7953e-04", + "loss": 0.9862, + "slid_loss": 0.7653, + "step": 3685, + "time": 12.5 + }, + { + "epoch": 1.85, + "learning_rate": "1.7951e-04", + "loss": 0.6889, + "slid_loss": 0.7633, + "step": 3686, + "time": 11.23 + }, + { + "epoch": 1.85, + "learning_rate": "1.7950e-04", + "loss": 0.7869, + "slid_loss": 0.7635, + "step": 3687, + "time": 11.64 + }, + { + "epoch": 1.85, + "learning_rate": "1.7949e-04", + "loss": 0.9038, + "slid_loss": 0.7651, + "step": 3688, + "time": 13.47 + }, + { + "epoch": 1.85, + "learning_rate": "1.7947e-04", + "loss": 0.5758, + "slid_loss": 0.7644, + "step": 3689, + "time": 11.02 + }, + { + "epoch": 1.85, + "learning_rate": "1.7946e-04", + "loss": 0.6655, + "slid_loss": 0.7643, + "step": 3690, + "time": 11.74 + }, + { + "epoch": 1.85, + "learning_rate": "1.7944e-04", + "loss": 0.8437, + "slid_loss": 0.7661, + "step": 3691, + "time": 13.59 + }, + { + "epoch": 1.85, + "learning_rate": "1.7943e-04", + "loss": 0.9093, + "slid_loss": 0.7661, + "step": 3692, + "time": 11.86 + }, + { + "epoch": 1.85, + "learning_rate": "1.7941e-04", + "loss": 0.8084, + "slid_loss": 0.7664, + "step": 3693, + "time": 11.24 + }, + { + "epoch": 1.85, + "learning_rate": "1.7940e-04", + "loss": 0.809, + "slid_loss": 0.7672, + "step": 3694, + "time": 12.91 + }, + { + "epoch": 1.85, + "learning_rate": "1.7939e-04", + "loss": 0.5073, + "slid_loss": 0.7639, + "step": 3695, + "time": 12.36 + }, + { + "epoch": 1.85, + "learning_rate": "1.7937e-04", + "loss": 0.7655, + "slid_loss": 0.7646, + "step": 3696, + "time": 11.63 + }, + { + "epoch": 1.85, + "learning_rate": "1.7936e-04", + "loss": 0.5233, + "slid_loss": 0.763, + "step": 3697, + "time": 13.29 + }, + { + "epoch": 1.85, + "learning_rate": "1.7934e-04", + "loss": 0.7962, + "slid_loss": 0.7625, + "step": 3698, + "time": 11.23 + }, + { + "epoch": 1.85, + "learning_rate": "1.7933e-04", + "loss": 1.0074, + "slid_loss": 0.7655, + "step": 3699, + "time": 11.89 + }, + { + "epoch": 1.85, + "learning_rate": "1.7932e-04", + "loss": 0.7586, + "slid_loss": 0.7647, + "step": 3700, + "time": 12.21 + }, + { + "epoch": 1.85, + "learning_rate": "1.7930e-04", + "loss": 0.7445, + "slid_loss": 0.7635, + "step": 3701, + "time": 13.32 + }, + { + "epoch": 1.85, + "learning_rate": "1.7929e-04", + "loss": 0.7609, + "slid_loss": 0.7626, + "step": 3702, + "time": 10.96 + }, + { + "epoch": 1.85, + "learning_rate": "1.7927e-04", + "loss": 0.7678, + "slid_loss": 0.7631, + "step": 3703, + "time": 10.8 + }, + { + "epoch": 1.85, + "learning_rate": "1.7926e-04", + "loss": 0.6165, + "slid_loss": 0.7622, + "step": 3704, + "time": 11.03 + }, + { + "epoch": 1.86, + "learning_rate": "1.7924e-04", + "loss": 0.8287, + "slid_loss": 0.761, + "step": 3705, + "time": 13.31 + }, + { + "epoch": 1.86, + "learning_rate": "1.7923e-04", + "loss": 0.7944, + "slid_loss": 0.762, + "step": 3706, + "time": 11.75 + }, + { + "epoch": 1.86, + "learning_rate": "1.7922e-04", + "loss": 0.5561, + "slid_loss": 0.7612, + "step": 3707, + "time": 10.8 + }, + { + "epoch": 1.86, + "learning_rate": "1.7920e-04", + "loss": 0.5529, + "slid_loss": 0.7585, + "step": 3708, + "time": 12.82 + }, + { + "epoch": 1.86, + "learning_rate": "1.7919e-04", + "loss": 0.9128, + "slid_loss": 0.7604, + "step": 3709, + "time": 12.97 + }, + { + "epoch": 1.86, + "learning_rate": "1.7917e-04", + "loss": 0.7726, + "slid_loss": 0.7594, + "step": 3710, + "time": 12.9 + }, + { + "epoch": 1.86, + "learning_rate": "1.7916e-04", + "loss": 0.8644, + "slid_loss": 0.7607, + "step": 3711, + "time": 13.28 + }, + { + "epoch": 1.86, + "learning_rate": "1.7915e-04", + "loss": 0.6545, + "slid_loss": 0.7605, + "step": 3712, + "time": 13.08 + }, + { + "epoch": 1.86, + "learning_rate": "1.7913e-04", + "loss": 0.6953, + "slid_loss": 0.7606, + "step": 3713, + "time": 13.41 + }, + { + "epoch": 1.86, + "learning_rate": "1.7912e-04", + "loss": 0.743, + "slid_loss": 0.7583, + "step": 3714, + "time": 11.33 + }, + { + "epoch": 1.86, + "learning_rate": "1.7910e-04", + "loss": 0.7506, + "slid_loss": 0.7576, + "step": 3715, + "time": 10.58 + }, + { + "epoch": 1.86, + "learning_rate": "1.7909e-04", + "loss": 0.8874, + "slid_loss": 0.7593, + "step": 3716, + "time": 11.39 + }, + { + "epoch": 1.86, + "learning_rate": "1.7907e-04", + "loss": 0.7097, + "slid_loss": 0.7599, + "step": 3717, + "time": 13.98 + }, + { + "epoch": 1.86, + "learning_rate": "1.7906e-04", + "loss": 0.7994, + "slid_loss": 0.7609, + "step": 3718, + "time": 10.86 + }, + { + "epoch": 1.86, + "learning_rate": "1.7905e-04", + "loss": 0.7801, + "slid_loss": 0.7611, + "step": 3719, + "time": 12.93 + }, + { + "epoch": 1.86, + "learning_rate": "1.7903e-04", + "loss": 0.8578, + "slid_loss": 0.7608, + "step": 3720, + "time": 13.24 + }, + { + "epoch": 1.86, + "learning_rate": "1.7902e-04", + "loss": 0.8368, + "slid_loss": 0.7623, + "step": 3721, + "time": 11.02 + }, + { + "epoch": 1.86, + "learning_rate": "1.7900e-04", + "loss": 0.7424, + "slid_loss": 0.7622, + "step": 3722, + "time": 11.39 + }, + { + "epoch": 1.86, + "learning_rate": "1.7899e-04", + "loss": 0.7327, + "slid_loss": 0.7609, + "step": 3723, + "time": 12.07 + }, + { + "epoch": 1.86, + "learning_rate": "1.7897e-04", + "loss": 0.7521, + "slid_loss": 0.7602, + "step": 3724, + "time": 11.86 + }, + { + "epoch": 1.87, + "learning_rate": "1.7896e-04", + "loss": 0.7351, + "slid_loss": 0.7596, + "step": 3725, + "time": 10.68 + }, + { + "epoch": 1.87, + "learning_rate": "1.7895e-04", + "loss": 0.7633, + "slid_loss": 0.7589, + "step": 3726, + "time": 11.81 + }, + { + "epoch": 1.87, + "learning_rate": "1.7893e-04", + "loss": 0.7694, + "slid_loss": 0.7587, + "step": 3727, + "time": 13.45 + }, + { + "epoch": 1.87, + "learning_rate": "1.7892e-04", + "loss": 0.7598, + "slid_loss": 0.7577, + "step": 3728, + "time": 12.97 + }, + { + "epoch": 1.87, + "learning_rate": "1.7890e-04", + "loss": 0.7572, + "slid_loss": 0.7581, + "step": 3729, + "time": 12.3 + }, + { + "epoch": 1.87, + "learning_rate": "1.7889e-04", + "loss": 0.8127, + "slid_loss": 0.7581, + "step": 3730, + "time": 13.65 + }, + { + "epoch": 1.87, + "learning_rate": "1.7887e-04", + "loss": 0.7239, + "slid_loss": 0.7582, + "step": 3731, + "time": 12.85 + }, + { + "epoch": 1.87, + "learning_rate": "1.7886e-04", + "loss": 0.8201, + "slid_loss": 0.7588, + "step": 3732, + "time": 13.06 + }, + { + "epoch": 1.87, + "learning_rate": "1.7885e-04", + "loss": 0.7694, + "slid_loss": 0.7582, + "step": 3733, + "time": 12.84 + }, + { + "epoch": 1.87, + "learning_rate": "1.7883e-04", + "loss": 0.7462, + "slid_loss": 0.7584, + "step": 3734, + "time": 11.99 + }, + { + "epoch": 1.87, + "learning_rate": "1.7882e-04", + "loss": 0.729, + "slid_loss": 0.7576, + "step": 3735, + "time": 13.35 + }, + { + "epoch": 1.87, + "learning_rate": "1.7880e-04", + "loss": 0.733, + "slid_loss": 0.7572, + "step": 3736, + "time": 13.38 + }, + { + "epoch": 1.87, + "learning_rate": "1.7879e-04", + "loss": 0.8193, + "slid_loss": 0.7586, + "step": 3737, + "time": 12.69 + }, + { + "epoch": 1.87, + "learning_rate": "1.7877e-04", + "loss": 0.5753, + "slid_loss": 0.7565, + "step": 3738, + "time": 13.83 + }, + { + "epoch": 1.87, + "learning_rate": "1.7876e-04", + "loss": 0.7171, + "slid_loss": 0.7573, + "step": 3739, + "time": 11.24 + }, + { + "epoch": 1.87, + "learning_rate": "1.7875e-04", + "loss": 0.7894, + "slid_loss": 0.7577, + "step": 3740, + "time": 13.89 + }, + { + "epoch": 1.87, + "learning_rate": "1.7873e-04", + "loss": 0.8146, + "slid_loss": 0.7597, + "step": 3741, + "time": 11.69 + }, + { + "epoch": 1.87, + "learning_rate": "1.7872e-04", + "loss": 0.7895, + "slid_loss": 0.7589, + "step": 3742, + "time": 11.73 + }, + { + "epoch": 1.87, + "learning_rate": "1.7870e-04", + "loss": 0.7387, + "slid_loss": 0.7582, + "step": 3743, + "time": 11.45 + }, + { + "epoch": 1.87, + "learning_rate": "1.7869e-04", + "loss": 0.6495, + "slid_loss": 0.7574, + "step": 3744, + "time": 11.67 + }, + { + "epoch": 1.88, + "learning_rate": "1.7867e-04", + "loss": 0.6896, + "slid_loss": 0.7579, + "step": 3745, + "time": 11.72 + }, + { + "epoch": 1.88, + "learning_rate": "1.7866e-04", + "loss": 0.6378, + "slid_loss": 0.7573, + "step": 3746, + "time": 10.55 + }, + { + "epoch": 1.88, + "learning_rate": "1.7865e-04", + "loss": 0.8352, + "slid_loss": 0.7582, + "step": 3747, + "time": 11.27 + }, + { + "epoch": 1.88, + "learning_rate": "1.7863e-04", + "loss": 0.7183, + "slid_loss": 0.7565, + "step": 3748, + "time": 12.32 + }, + { + "epoch": 1.88, + "learning_rate": "1.7862e-04", + "loss": 0.7986, + "slid_loss": 0.7549, + "step": 3749, + "time": 13.67 + }, + { + "epoch": 1.88, + "learning_rate": "1.7860e-04", + "loss": 0.6876, + "slid_loss": 0.7552, + "step": 3750, + "time": 10.97 + }, + { + "epoch": 1.88, + "learning_rate": "1.7859e-04", + "loss": 0.8824, + "slid_loss": 0.7561, + "step": 3751, + "time": 14.01 + }, + { + "epoch": 1.88, + "learning_rate": "1.7857e-04", + "loss": 0.6893, + "slid_loss": 0.7545, + "step": 3752, + "time": 13.69 + }, + { + "epoch": 1.88, + "learning_rate": "1.7856e-04", + "loss": 0.773, + "slid_loss": 0.7551, + "step": 3753, + "time": 13.64 + }, + { + "epoch": 1.88, + "learning_rate": "1.7855e-04", + "loss": 0.7807, + "slid_loss": 0.7557, + "step": 3754, + "time": 13.97 + }, + { + "epoch": 1.88, + "learning_rate": "1.7853e-04", + "loss": 0.816, + "slid_loss": 0.7561, + "step": 3755, + "time": 13.31 + }, + { + "epoch": 1.88, + "learning_rate": "1.7852e-04", + "loss": 0.6542, + "slid_loss": 0.7556, + "step": 3756, + "time": 10.66 + }, + { + "epoch": 1.88, + "learning_rate": "1.7850e-04", + "loss": 0.7341, + "slid_loss": 0.7551, + "step": 3757, + "time": 11.03 + }, + { + "epoch": 1.88, + "learning_rate": "1.7849e-04", + "loss": 0.7137, + "slid_loss": 0.7545, + "step": 3758, + "time": 12.18 + }, + { + "epoch": 1.88, + "learning_rate": "1.7847e-04", + "loss": 0.7849, + "slid_loss": 0.7551, + "step": 3759, + "time": 13.3 + }, + { + "epoch": 1.88, + "learning_rate": "1.7846e-04", + "loss": 0.7494, + "slid_loss": 0.7559, + "step": 3760, + "time": 12.24 + }, + { + "epoch": 1.88, + "learning_rate": "1.7845e-04", + "loss": 0.8036, + "slid_loss": 0.7557, + "step": 3761, + "time": 12.72 + }, + { + "epoch": 1.88, + "learning_rate": "1.7843e-04", + "loss": 0.5837, + "slid_loss": 0.7543, + "step": 3762, + "time": 14.05 + }, + { + "epoch": 1.88, + "learning_rate": "1.7842e-04", + "loss": 0.6918, + "slid_loss": 0.7546, + "step": 3763, + "time": 13.84 + }, + { + "epoch": 1.88, + "learning_rate": "1.7840e-04", + "loss": 0.583, + "slid_loss": 0.7529, + "step": 3764, + "time": 13.02 + }, + { + "epoch": 1.89, + "learning_rate": "1.7839e-04", + "loss": 0.8134, + "slid_loss": 0.7538, + "step": 3765, + "time": 11.67 + }, + { + "epoch": 1.89, + "learning_rate": "1.7837e-04", + "loss": 0.8781, + "slid_loss": 0.7548, + "step": 3766, + "time": 10.89 + }, + { + "epoch": 1.89, + "learning_rate": "1.7836e-04", + "loss": 0.8613, + "slid_loss": 0.7556, + "step": 3767, + "time": 13.3 + }, + { + "epoch": 1.89, + "learning_rate": "1.7834e-04", + "loss": 0.7087, + "slid_loss": 0.7542, + "step": 3768, + "time": 13.62 + }, + { + "epoch": 1.89, + "learning_rate": "1.7833e-04", + "loss": 0.8308, + "slid_loss": 0.7562, + "step": 3769, + "time": 11.65 + }, + { + "epoch": 1.89, + "learning_rate": "1.7832e-04", + "loss": 0.6449, + "slid_loss": 0.7554, + "step": 3770, + "time": 11.67 + }, + { + "epoch": 1.89, + "learning_rate": "1.7830e-04", + "loss": 0.6714, + "slid_loss": 0.7541, + "step": 3771, + "time": 13.08 + }, + { + "epoch": 1.89, + "learning_rate": "1.7829e-04", + "loss": 0.7139, + "slid_loss": 0.7546, + "step": 3772, + "time": 13.42 + }, + { + "epoch": 1.89, + "learning_rate": "1.7827e-04", + "loss": 0.6379, + "slid_loss": 0.7547, + "step": 3773, + "time": 12.87 + }, + { + "epoch": 1.89, + "learning_rate": "1.7826e-04", + "loss": 0.6226, + "slid_loss": 0.7533, + "step": 3774, + "time": 13.3 + }, + { + "epoch": 1.89, + "learning_rate": "1.7824e-04", + "loss": 0.7082, + "slid_loss": 0.7523, + "step": 3775, + "time": 12.75 + }, + { + "epoch": 1.89, + "learning_rate": "1.7823e-04", + "loss": 0.8194, + "slid_loss": 0.7528, + "step": 3776, + "time": 11.38 + }, + { + "epoch": 1.89, + "learning_rate": "1.7821e-04", + "loss": 0.7725, + "slid_loss": 0.7542, + "step": 3777, + "time": 11.23 + }, + { + "epoch": 1.89, + "learning_rate": "1.7820e-04", + "loss": 0.5377, + "slid_loss": 0.7522, + "step": 3778, + "time": 12.33 + }, + { + "epoch": 1.89, + "learning_rate": "1.7819e-04", + "loss": 0.707, + "slid_loss": 0.7521, + "step": 3779, + "time": 13.39 + }, + { + "epoch": 1.89, + "learning_rate": "1.7817e-04", + "loss": 0.7597, + "slid_loss": 0.7501, + "step": 3780, + "time": 10.81 + }, + { + "epoch": 1.89, + "learning_rate": "1.7816e-04", + "loss": 0.7591, + "slid_loss": 0.7479, + "step": 3781, + "time": 12.5 + }, + { + "epoch": 1.89, + "learning_rate": "1.7814e-04", + "loss": 0.7305, + "slid_loss": 0.7477, + "step": 3782, + "time": 13.9 + }, + { + "epoch": 1.89, + "learning_rate": "1.7813e-04", + "loss": 0.7326, + "slid_loss": 0.748, + "step": 3783, + "time": 11.09 + }, + { + "epoch": 1.89, + "learning_rate": "1.7811e-04", + "loss": 0.7728, + "slid_loss": 0.7475, + "step": 3784, + "time": 11.92 + }, + { + "epoch": 1.9, + "learning_rate": "1.7810e-04", + "loss": 0.6714, + "slid_loss": 0.7444, + "step": 3785, + "time": 11.4 + }, + { + "epoch": 1.9, + "learning_rate": "1.7808e-04", + "loss": 0.7376, + "slid_loss": 0.7448, + "step": 3786, + "time": 13.77 + }, + { + "epoch": 1.9, + "learning_rate": "1.7807e-04", + "loss": 0.6464, + "slid_loss": 0.7434, + "step": 3787, + "time": 12.92 + }, + { + "epoch": 1.9, + "learning_rate": "1.7806e-04", + "loss": 0.8742, + "slid_loss": 0.7431, + "step": 3788, + "time": 12.46 + }, + { + "epoch": 1.9, + "learning_rate": "1.7804e-04", + "loss": 0.8001, + "slid_loss": 0.7454, + "step": 3789, + "time": 12.85 + }, + { + "epoch": 1.9, + "learning_rate": "1.7803e-04", + "loss": 0.6673, + "slid_loss": 0.7454, + "step": 3790, + "time": 11.34 + }, + { + "epoch": 1.9, + "learning_rate": "1.7801e-04", + "loss": 0.6323, + "slid_loss": 0.7433, + "step": 3791, + "time": 12.92 + }, + { + "epoch": 1.9, + "learning_rate": "1.7800e-04", + "loss": 0.7181, + "slid_loss": 0.7414, + "step": 3792, + "time": 13.49 + }, + { + "epoch": 1.9, + "learning_rate": "1.7798e-04", + "loss": 0.6517, + "slid_loss": 0.7398, + "step": 3793, + "time": 13.31 + }, + { + "epoch": 1.9, + "learning_rate": "1.7797e-04", + "loss": 0.6424, + "slid_loss": 0.7381, + "step": 3794, + "time": 10.63 + }, + { + "epoch": 1.9, + "learning_rate": "1.7795e-04", + "loss": 0.7643, + "slid_loss": 0.7407, + "step": 3795, + "time": 11.24 + }, + { + "epoch": 1.9, + "learning_rate": "1.7794e-04", + "loss": 0.6641, + "slid_loss": 0.7397, + "step": 3796, + "time": 12.93 + }, + { + "epoch": 1.9, + "learning_rate": "1.7793e-04", + "loss": 0.596, + "slid_loss": 0.7404, + "step": 3797, + "time": 14.19 + }, + { + "epoch": 1.9, + "learning_rate": "1.7791e-04", + "loss": 0.7058, + "slid_loss": 0.7395, + "step": 3798, + "time": 13.13 + }, + { + "epoch": 1.9, + "learning_rate": "1.7790e-04", + "loss": 0.7319, + "slid_loss": 0.7368, + "step": 3799, + "time": 12.86 + }, + { + "epoch": 1.9, + "learning_rate": "1.7788e-04", + "loss": 0.798, + "slid_loss": 0.7372, + "step": 3800, + "time": 11.24 + }, + { + "epoch": 1.9, + "learning_rate": "1.7787e-04", + "loss": 0.8288, + "slid_loss": 0.738, + "step": 3801, + "time": 12.43 + }, + { + "epoch": 1.9, + "learning_rate": "1.7785e-04", + "loss": 0.6605, + "slid_loss": 0.737, + "step": 3802, + "time": 11.14 + }, + { + "epoch": 1.9, + "learning_rate": "1.7784e-04", + "loss": 0.777, + "slid_loss": 0.7371, + "step": 3803, + "time": 14.04 + }, + { + "epoch": 1.9, + "learning_rate": "1.7782e-04", + "loss": 0.8039, + "slid_loss": 0.739, + "step": 3804, + "time": 12.43 + }, + { + "epoch": 1.91, + "learning_rate": "1.7781e-04", + "loss": 0.7378, + "slid_loss": 0.7381, + "step": 3805, + "time": 11.34 + }, + { + "epoch": 1.91, + "learning_rate": "1.7779e-04", + "loss": 0.8356, + "slid_loss": 0.7385, + "step": 3806, + "time": 13.04 + }, + { + "epoch": 1.91, + "learning_rate": "1.7778e-04", + "loss": 0.6279, + "slid_loss": 0.7392, + "step": 3807, + "time": 11.29 + }, + { + "epoch": 1.91, + "learning_rate": "1.7777e-04", + "loss": 0.7483, + "slid_loss": 0.7411, + "step": 3808, + "time": 12.35 + }, + { + "epoch": 1.91, + "learning_rate": "1.7775e-04", + "loss": 0.6659, + "slid_loss": 0.7387, + "step": 3809, + "time": 11.18 + }, + { + "epoch": 1.91, + "learning_rate": "1.7774e-04", + "loss": 0.6709, + "slid_loss": 0.7377, + "step": 3810, + "time": 12.44 + }, + { + "epoch": 1.91, + "learning_rate": "1.7772e-04", + "loss": 0.7532, + "slid_loss": 0.7365, + "step": 3811, + "time": 13.85 + }, + { + "epoch": 1.91, + "learning_rate": "1.7771e-04", + "loss": 0.76, + "slid_loss": 0.7376, + "step": 3812, + "time": 13.46 + }, + { + "epoch": 1.91, + "learning_rate": "1.7769e-04", + "loss": 0.6735, + "slid_loss": 0.7374, + "step": 3813, + "time": 13.64 + }, + { + "epoch": 1.91, + "learning_rate": "1.7768e-04", + "loss": 0.9812, + "slid_loss": 0.7398, + "step": 3814, + "time": 12.31 + }, + { + "epoch": 1.91, + "learning_rate": "1.7766e-04", + "loss": 1.0074, + "slid_loss": 0.7423, + "step": 3815, + "time": 12.85 + }, + { + "epoch": 1.91, + "learning_rate": "1.7765e-04", + "loss": 0.7046, + "slid_loss": 0.7405, + "step": 3816, + "time": 10.85 + }, + { + "epoch": 1.91, + "learning_rate": "1.7763e-04", + "loss": 0.8109, + "slid_loss": 0.7415, + "step": 3817, + "time": 12.98 + }, + { + "epoch": 1.91, + "learning_rate": "1.7762e-04", + "loss": 0.784, + "slid_loss": 0.7414, + "step": 3818, + "time": 13.35 + }, + { + "epoch": 1.91, + "learning_rate": "1.7761e-04", + "loss": 0.8476, + "slid_loss": 0.742, + "step": 3819, + "time": 12.8 + }, + { + "epoch": 1.91, + "learning_rate": "1.7759e-04", + "loss": 0.7621, + "slid_loss": 0.7411, + "step": 3820, + "time": 12.8 + }, + { + "epoch": 1.91, + "learning_rate": "1.7758e-04", + "loss": 0.8215, + "slid_loss": 0.7409, + "step": 3821, + "time": 11.19 + }, + { + "epoch": 1.91, + "learning_rate": "1.7756e-04", + "loss": 0.6995, + "slid_loss": 0.7405, + "step": 3822, + "time": 13.47 + }, + { + "epoch": 1.91, + "learning_rate": "1.7755e-04", + "loss": 0.5581, + "slid_loss": 0.7388, + "step": 3823, + "time": 13.48 + }, + { + "epoch": 1.91, + "learning_rate": "1.7753e-04", + "loss": 0.6193, + "slid_loss": 0.7374, + "step": 3824, + "time": 14.21 + }, + { + "epoch": 1.92, + "learning_rate": "1.7752e-04", + "loss": 0.6537, + "slid_loss": 0.7366, + "step": 3825, + "time": 13.17 + }, + { + "epoch": 1.92, + "learning_rate": "1.7750e-04", + "loss": 0.6912, + "slid_loss": 0.7359, + "step": 3826, + "time": 11.36 + }, + { + "epoch": 1.92, + "learning_rate": "1.7749e-04", + "loss": 0.8216, + "slid_loss": 0.7364, + "step": 3827, + "time": 12.58 + }, + { + "epoch": 1.92, + "learning_rate": "1.7747e-04", + "loss": 0.6149, + "slid_loss": 0.735, + "step": 3828, + "time": 11.34 + }, + { + "epoch": 1.92, + "learning_rate": "1.7746e-04", + "loss": 0.6921, + "slid_loss": 0.7343, + "step": 3829, + "time": 11.14 + }, + { + "epoch": 1.92, + "learning_rate": "1.7745e-04", + "loss": 0.794, + "slid_loss": 0.7341, + "step": 3830, + "time": 12.11 + }, + { + "epoch": 1.92, + "learning_rate": "1.7743e-04", + "loss": 0.8891, + "slid_loss": 0.7358, + "step": 3831, + "time": 12.85 + }, + { + "epoch": 1.92, + "learning_rate": "1.7742e-04", + "loss": 0.779, + "slid_loss": 0.7354, + "step": 3832, + "time": 13.26 + }, + { + "epoch": 1.92, + "learning_rate": "1.7740e-04", + "loss": 0.8271, + "slid_loss": 0.7359, + "step": 3833, + "time": 10.94 + }, + { + "epoch": 1.92, + "learning_rate": "1.7739e-04", + "loss": 0.6092, + "slid_loss": 0.7346, + "step": 3834, + "time": 10.62 + }, + { + "epoch": 1.92, + "learning_rate": "1.7737e-04", + "loss": 0.7396, + "slid_loss": 0.7347, + "step": 3835, + "time": 13.45 + }, + { + "epoch": 1.92, + "learning_rate": "1.7736e-04", + "loss": 0.716, + "slid_loss": 0.7345, + "step": 3836, + "time": 11.24 + }, + { + "epoch": 1.92, + "learning_rate": "1.7734e-04", + "loss": 0.8785, + "slid_loss": 0.7351, + "step": 3837, + "time": 14.75 + }, + { + "epoch": 1.92, + "learning_rate": "1.7733e-04", + "loss": 0.8058, + "slid_loss": 0.7374, + "step": 3838, + "time": 12.72 + }, + { + "epoch": 1.92, + "learning_rate": "1.7731e-04", + "loss": 0.7214, + "slid_loss": 0.7375, + "step": 3839, + "time": 12.61 + }, + { + "epoch": 1.92, + "learning_rate": "1.7730e-04", + "loss": 0.8529, + "slid_loss": 0.7381, + "step": 3840, + "time": 12.76 + }, + { + "epoch": 1.92, + "learning_rate": "1.7728e-04", + "loss": 0.9051, + "slid_loss": 0.739, + "step": 3841, + "time": 13.91 + }, + { + "epoch": 1.92, + "learning_rate": "1.7727e-04", + "loss": 0.7042, + "slid_loss": 0.7381, + "step": 3842, + "time": 12.26 + }, + { + "epoch": 1.92, + "learning_rate": "1.7725e-04", + "loss": 0.8035, + "slid_loss": 0.7388, + "step": 3843, + "time": 13.26 + }, + { + "epoch": 1.92, + "learning_rate": "1.7724e-04", + "loss": 0.7926, + "slid_loss": 0.7402, + "step": 3844, + "time": 12.22 + }, + { + "epoch": 1.93, + "learning_rate": "1.7723e-04", + "loss": 0.9493, + "slid_loss": 0.7428, + "step": 3845, + "time": 14.03 + }, + { + "epoch": 1.93, + "learning_rate": "1.7721e-04", + "loss": 0.5863, + "slid_loss": 0.7423, + "step": 3846, + "time": 12.92 + }, + { + "epoch": 1.93, + "learning_rate": "1.7720e-04", + "loss": 0.6425, + "slid_loss": 0.7404, + "step": 3847, + "time": 12.7 + }, + { + "epoch": 1.93, + "learning_rate": "1.7718e-04", + "loss": 0.6316, + "slid_loss": 0.7395, + "step": 3848, + "time": 11.67 + }, + { + "epoch": 1.93, + "learning_rate": "1.7717e-04", + "loss": 0.7972, + "slid_loss": 0.7395, + "step": 3849, + "time": 12.9 + }, + { + "epoch": 1.93, + "learning_rate": "1.7715e-04", + "loss": 0.8708, + "slid_loss": 0.7413, + "step": 3850, + "time": 11.33 + }, + { + "epoch": 1.93, + "learning_rate": "1.7714e-04", + "loss": 0.7505, + "slid_loss": 0.74, + "step": 3851, + "time": 11.29 + }, + { + "epoch": 1.93, + "learning_rate": "1.7712e-04", + "loss": 0.7166, + "slid_loss": 0.7403, + "step": 3852, + "time": 12.93 + }, + { + "epoch": 1.93, + "learning_rate": "1.7711e-04", + "loss": 0.6821, + "slid_loss": 0.7394, + "step": 3853, + "time": 10.92 + }, + { + "epoch": 1.93, + "learning_rate": "1.7709e-04", + "loss": 0.6811, + "slid_loss": 0.7384, + "step": 3854, + "time": 10.76 + }, + { + "epoch": 1.93, + "learning_rate": "1.7708e-04", + "loss": 1.0027, + "slid_loss": 0.7402, + "step": 3855, + "time": 13.83 + }, + { + "epoch": 1.93, + "learning_rate": "1.7706e-04", + "loss": 0.797, + "slid_loss": 0.7417, + "step": 3856, + "time": 13.18 + }, + { + "epoch": 1.93, + "learning_rate": "1.7705e-04", + "loss": 0.9274, + "slid_loss": 0.7436, + "step": 3857, + "time": 13.49 + }, + { + "epoch": 1.93, + "learning_rate": "1.7703e-04", + "loss": 0.7609, + "slid_loss": 0.7441, + "step": 3858, + "time": 12.81 + }, + { + "epoch": 1.93, + "learning_rate": "1.7702e-04", + "loss": 0.7998, + "slid_loss": 0.7442, + "step": 3859, + "time": 12.9 + }, + { + "epoch": 1.93, + "learning_rate": "1.7701e-04", + "loss": 0.7192, + "slid_loss": 0.7439, + "step": 3860, + "time": 11.3 + }, + { + "epoch": 1.93, + "learning_rate": "1.7699e-04", + "loss": 0.6228, + "slid_loss": 0.7421, + "step": 3861, + "time": 13.21 + }, + { + "epoch": 1.93, + "learning_rate": "1.7698e-04", + "loss": 0.5509, + "slid_loss": 0.7418, + "step": 3862, + "time": 13.23 + }, + { + "epoch": 1.93, + "learning_rate": "1.7696e-04", + "loss": 0.8904, + "slid_loss": 0.7438, + "step": 3863, + "time": 12.78 + }, + { + "epoch": 1.93, + "learning_rate": "1.7695e-04", + "loss": 0.5837, + "slid_loss": 0.7438, + "step": 3864, + "time": 11.87 + }, + { + "epoch": 1.94, + "learning_rate": "1.7693e-04", + "loss": 0.871, + "slid_loss": 0.7444, + "step": 3865, + "time": 13.64 + }, + { + "epoch": 1.94, + "learning_rate": "1.7692e-04", + "loss": 0.7855, + "slid_loss": 0.7434, + "step": 3866, + "time": 13.56 + }, + { + "epoch": 1.94, + "learning_rate": "1.7690e-04", + "loss": 0.7873, + "slid_loss": 0.7427, + "step": 3867, + "time": 12.36 + }, + { + "epoch": 1.94, + "learning_rate": "1.7689e-04", + "loss": 0.6404, + "slid_loss": 0.742, + "step": 3868, + "time": 13.81 + }, + { + "epoch": 1.94, + "learning_rate": "1.7687e-04", + "loss": 0.9086, + "slid_loss": 0.7428, + "step": 3869, + "time": 11.88 + }, + { + "epoch": 1.94, + "learning_rate": "1.7686e-04", + "loss": 0.689, + "slid_loss": 0.7432, + "step": 3870, + "time": 10.74 + }, + { + "epoch": 1.94, + "learning_rate": "1.7684e-04", + "loss": 0.8267, + "slid_loss": 0.7448, + "step": 3871, + "time": 13.4 + }, + { + "epoch": 1.94, + "learning_rate": "1.7683e-04", + "loss": 0.8539, + "slid_loss": 0.7462, + "step": 3872, + "time": 12.85 + }, + { + "epoch": 1.94, + "learning_rate": "1.7681e-04", + "loss": 0.7769, + "slid_loss": 0.7476, + "step": 3873, + "time": 12.83 + }, + { + "epoch": 1.94, + "learning_rate": "1.7680e-04", + "loss": 0.7796, + "slid_loss": 0.7491, + "step": 3874, + "time": 13.09 + }, + { + "epoch": 1.94, + "learning_rate": "1.7678e-04", + "loss": 0.7472, + "slid_loss": 0.7495, + "step": 3875, + "time": 12.16 + }, + { + "epoch": 1.94, + "learning_rate": "1.7677e-04", + "loss": 0.7617, + "slid_loss": 0.749, + "step": 3876, + "time": 11.06 + }, + { + "epoch": 1.94, + "learning_rate": "1.7675e-04", + "loss": 0.7885, + "slid_loss": 0.7491, + "step": 3877, + "time": 12.77 + }, + { + "epoch": 1.94, + "learning_rate": "1.7674e-04", + "loss": 0.7563, + "slid_loss": 0.7513, + "step": 3878, + "time": 13.65 + }, + { + "epoch": 1.94, + "learning_rate": "1.7673e-04", + "loss": 0.8053, + "slid_loss": 0.7523, + "step": 3879, + "time": 13.34 + }, + { + "epoch": 1.94, + "learning_rate": "1.7671e-04", + "loss": 0.6233, + "slid_loss": 0.7509, + "step": 3880, + "time": 13.52 + }, + { + "epoch": 1.94, + "learning_rate": "1.7670e-04", + "loss": 0.8344, + "slid_loss": 0.7517, + "step": 3881, + "time": 11.3 + }, + { + "epoch": 1.94, + "learning_rate": "1.7668e-04", + "loss": 0.7126, + "slid_loss": 0.7515, + "step": 3882, + "time": 10.68 + }, + { + "epoch": 1.94, + "learning_rate": "1.7667e-04", + "loss": 0.7864, + "slid_loss": 0.752, + "step": 3883, + "time": 10.92 + }, + { + "epoch": 1.94, + "learning_rate": "1.7665e-04", + "loss": 0.6484, + "slid_loss": 0.7508, + "step": 3884, + "time": 11.84 + }, + { + "epoch": 1.95, + "learning_rate": "1.7664e-04", + "loss": 0.6329, + "slid_loss": 0.7504, + "step": 3885, + "time": 12.8 + }, + { + "epoch": 1.95, + "learning_rate": "1.7662e-04", + "loss": 0.7064, + "slid_loss": 0.7501, + "step": 3886, + "time": 11.56 + }, + { + "epoch": 1.95, + "learning_rate": "1.7661e-04", + "loss": 0.7219, + "slid_loss": 0.7508, + "step": 3887, + "time": 13.73 + }, + { + "epoch": 1.95, + "learning_rate": "1.7659e-04", + "loss": 0.6535, + "slid_loss": 0.7486, + "step": 3888, + "time": 13.22 + }, + { + "epoch": 1.95, + "learning_rate": "1.7658e-04", + "loss": 0.5958, + "slid_loss": 0.7466, + "step": 3889, + "time": 11.58 + }, + { + "epoch": 1.95, + "learning_rate": "1.7656e-04", + "loss": 0.7218, + "slid_loss": 0.7471, + "step": 3890, + "time": 14.36 + }, + { + "epoch": 1.95, + "learning_rate": "1.7655e-04", + "loss": 0.6745, + "slid_loss": 0.7476, + "step": 3891, + "time": 13.31 + }, + { + "epoch": 1.95, + "learning_rate": "1.7653e-04", + "loss": 0.6486, + "slid_loss": 0.7469, + "step": 3892, + "time": 13.92 + }, + { + "epoch": 1.95, + "learning_rate": "1.7652e-04", + "loss": 0.6268, + "slid_loss": 0.7466, + "step": 3893, + "time": 12.18 + }, + { + "epoch": 1.95, + "learning_rate": "1.7650e-04", + "loss": 0.6151, + "slid_loss": 0.7463, + "step": 3894, + "time": 12.73 + }, + { + "epoch": 1.95, + "learning_rate": "1.7649e-04", + "loss": 0.9806, + "slid_loss": 0.7485, + "step": 3895, + "time": 13.37 + }, + { + "epoch": 1.95, + "learning_rate": "1.7647e-04", + "loss": 0.7562, + "slid_loss": 0.7494, + "step": 3896, + "time": 12.73 + }, + { + "epoch": 1.95, + "learning_rate": "1.7646e-04", + "loss": 0.6188, + "slid_loss": 0.7497, + "step": 3897, + "time": 13.7 + }, + { + "epoch": 1.95, + "learning_rate": "1.7644e-04", + "loss": 0.684, + "slid_loss": 0.7494, + "step": 3898, + "time": 13.39 + }, + { + "epoch": 1.95, + "learning_rate": "1.7643e-04", + "loss": 0.7373, + "slid_loss": 0.7495, + "step": 3899, + "time": 11.62 + }, + { + "epoch": 1.95, + "learning_rate": "1.7641e-04", + "loss": 0.5326, + "slid_loss": 0.7468, + "step": 3900, + "time": 11.16 + }, + { + "epoch": 1.95, + "learning_rate": "1.7640e-04", + "loss": 0.6135, + "slid_loss": 0.7447, + "step": 3901, + "time": 12.12 + }, + { + "epoch": 1.95, + "learning_rate": "1.7638e-04", + "loss": 0.6172, + "slid_loss": 0.7443, + "step": 3902, + "time": 13.91 + }, + { + "epoch": 1.95, + "learning_rate": "1.7637e-04", + "loss": 0.5872, + "slid_loss": 0.7424, + "step": 3903, + "time": 13.77 + }, + { + "epoch": 1.95, + "learning_rate": "1.7635e-04", + "loss": 0.7306, + "slid_loss": 0.7416, + "step": 3904, + "time": 12.34 + }, + { + "epoch": 1.96, + "learning_rate": "1.7634e-04", + "loss": 0.8972, + "slid_loss": 0.7432, + "step": 3905, + "time": 11.23 + }, + { + "epoch": 1.96, + "learning_rate": "1.7633e-04", + "loss": 0.7669, + "slid_loss": 0.7425, + "step": 3906, + "time": 13.15 + }, + { + "epoch": 1.96, + "learning_rate": "1.7631e-04", + "loss": 0.7768, + "slid_loss": 0.744, + "step": 3907, + "time": 14.09 + }, + { + "epoch": 1.96, + "learning_rate": "1.7630e-04", + "loss": 0.6234, + "slid_loss": 0.7428, + "step": 3908, + "time": 11.37 + }, + { + "epoch": 1.96, + "learning_rate": "1.7628e-04", + "loss": 0.5779, + "slid_loss": 0.7419, + "step": 3909, + "time": 10.85 + }, + { + "epoch": 1.96, + "learning_rate": "1.7627e-04", + "loss": 0.6733, + "slid_loss": 0.7419, + "step": 3910, + "time": 11.08 + }, + { + "epoch": 1.96, + "learning_rate": "1.7625e-04", + "loss": 0.7604, + "slid_loss": 0.742, + "step": 3911, + "time": 13.7 + }, + { + "epoch": 1.96, + "learning_rate": "1.7624e-04", + "loss": 0.8066, + "slid_loss": 0.7425, + "step": 3912, + "time": 10.74 + }, + { + "epoch": 1.96, + "learning_rate": "1.7622e-04", + "loss": 0.5676, + "slid_loss": 0.7414, + "step": 3913, + "time": 10.75 + }, + { + "epoch": 1.96, + "learning_rate": "1.7621e-04", + "loss": 0.8031, + "slid_loss": 0.7396, + "step": 3914, + "time": 13.21 + }, + { + "epoch": 1.96, + "learning_rate": "1.7619e-04", + "loss": 0.7417, + "slid_loss": 0.737, + "step": 3915, + "time": 11.17 + }, + { + "epoch": 1.96, + "learning_rate": "1.7618e-04", + "loss": 0.825, + "slid_loss": 0.7382, + "step": 3916, + "time": 13.42 + }, + { + "epoch": 1.96, + "learning_rate": "1.7616e-04", + "loss": 0.8533, + "slid_loss": 0.7386, + "step": 3917, + "time": 12.59 + }, + { + "epoch": 1.96, + "learning_rate": "1.7615e-04", + "loss": 0.7493, + "slid_loss": 0.7382, + "step": 3918, + "time": 10.5 + }, + { + "epoch": 1.96, + "learning_rate": "1.7613e-04", + "loss": 0.846, + "slid_loss": 0.7382, + "step": 3919, + "time": 12.8 + }, + { + "epoch": 1.96, + "learning_rate": "1.7612e-04", + "loss": 0.8331, + "slid_loss": 0.7389, + "step": 3920, + "time": 13.56 + }, + { + "epoch": 1.96, + "learning_rate": "1.7610e-04", + "loss": 0.7249, + "slid_loss": 0.738, + "step": 3921, + "time": 10.96 + }, + { + "epoch": 1.96, + "learning_rate": "1.7609e-04", + "loss": 0.6526, + "slid_loss": 0.7375, + "step": 3922, + "time": 11.88 + }, + { + "epoch": 1.96, + "learning_rate": "1.7607e-04", + "loss": 0.7812, + "slid_loss": 0.7397, + "step": 3923, + "time": 12.25 + }, + { + "epoch": 1.96, + "learning_rate": "1.7606e-04", + "loss": 0.749, + "slid_loss": 0.741, + "step": 3924, + "time": 13.03 + }, + { + "epoch": 1.97, + "learning_rate": "1.7604e-04", + "loss": 0.6915, + "slid_loss": 0.7414, + "step": 3925, + "time": 13.54 + }, + { + "epoch": 1.97, + "learning_rate": "1.7603e-04", + "loss": 0.7914, + "slid_loss": 0.7424, + "step": 3926, + "time": 13.26 + }, + { + "epoch": 1.97, + "learning_rate": "1.7601e-04", + "loss": 0.7963, + "slid_loss": 0.7421, + "step": 3927, + "time": 11.63 + }, + { + "epoch": 1.97, + "learning_rate": "1.7600e-04", + "loss": 0.7749, + "slid_loss": 0.7437, + "step": 3928, + "time": 10.61 + }, + { + "epoch": 1.97, + "learning_rate": "1.7598e-04", + "loss": 0.77, + "slid_loss": 0.7445, + "step": 3929, + "time": 12.81 + }, + { + "epoch": 1.97, + "learning_rate": "1.7597e-04", + "loss": 0.7842, + "slid_loss": 0.7444, + "step": 3930, + "time": 14.48 + }, + { + "epoch": 1.97, + "learning_rate": "1.7595e-04", + "loss": 0.7344, + "slid_loss": 0.7429, + "step": 3931, + "time": 12.86 + }, + { + "epoch": 1.97, + "learning_rate": "1.7594e-04", + "loss": 0.7147, + "slid_loss": 0.7422, + "step": 3932, + "time": 12.21 + }, + { + "epoch": 1.97, + "learning_rate": "1.7592e-04", + "loss": 0.8605, + "slid_loss": 0.7426, + "step": 3933, + "time": 12.78 + }, + { + "epoch": 1.97, + "learning_rate": "1.7591e-04", + "loss": 0.711, + "slid_loss": 0.7436, + "step": 3934, + "time": 13.5 + }, + { + "epoch": 1.97, + "learning_rate": "1.7589e-04", + "loss": 0.7803, + "slid_loss": 0.744, + "step": 3935, + "time": 13.62 + }, + { + "epoch": 1.97, + "learning_rate": "1.7588e-04", + "loss": 0.8027, + "slid_loss": 0.7449, + "step": 3936, + "time": 13.2 + }, + { + "epoch": 1.97, + "learning_rate": "1.7586e-04", + "loss": 0.689, + "slid_loss": 0.743, + "step": 3937, + "time": 11.14 + }, + { + "epoch": 1.97, + "learning_rate": "1.7585e-04", + "loss": 0.8013, + "slid_loss": 0.7429, + "step": 3938, + "time": 12.73 + }, + { + "epoch": 1.97, + "learning_rate": "1.7583e-04", + "loss": 0.6519, + "slid_loss": 0.7422, + "step": 3939, + "time": 11.98 + }, + { + "epoch": 1.97, + "learning_rate": "1.7582e-04", + "loss": 0.7166, + "slid_loss": 0.7409, + "step": 3940, + "time": 10.34 + }, + { + "epoch": 1.97, + "learning_rate": "1.7580e-04", + "loss": 0.6228, + "slid_loss": 0.738, + "step": 3941, + "time": 12.31 + }, + { + "epoch": 1.97, + "learning_rate": "1.7579e-04", + "loss": 0.8568, + "slid_loss": 0.7396, + "step": 3942, + "time": 13.5 + }, + { + "epoch": 1.97, + "learning_rate": "1.7577e-04", + "loss": 0.725, + "slid_loss": 0.7388, + "step": 3943, + "time": 11.35 + }, + { + "epoch": 1.97, + "learning_rate": "1.7576e-04", + "loss": 0.8287, + "slid_loss": 0.7391, + "step": 3944, + "time": 12.58 + }, + { + "epoch": 1.98, + "learning_rate": "1.7574e-04", + "loss": 0.7923, + "slid_loss": 0.7376, + "step": 3945, + "time": 13.57 + }, + { + "epoch": 1.98, + "learning_rate": "1.7573e-04", + "loss": 0.8619, + "slid_loss": 0.7403, + "step": 3946, + "time": 13.37 + }, + { + "epoch": 1.98, + "learning_rate": "1.7571e-04", + "loss": 0.8288, + "slid_loss": 0.7422, + "step": 3947, + "time": 13.56 + }, + { + "epoch": 1.98, + "learning_rate": "1.7570e-04", + "loss": 0.8171, + "slid_loss": 0.744, + "step": 3948, + "time": 11.0 + }, + { + "epoch": 1.98, + "learning_rate": "1.7568e-04", + "loss": 0.8024, + "slid_loss": 0.7441, + "step": 3949, + "time": 12.93 + }, + { + "epoch": 1.98, + "learning_rate": "1.7567e-04", + "loss": 0.832, + "slid_loss": 0.7437, + "step": 3950, + "time": 12.4 + }, + { + "epoch": 1.98, + "learning_rate": "1.7565e-04", + "loss": 0.7746, + "slid_loss": 0.744, + "step": 3951, + "time": 13.42 + }, + { + "epoch": 1.98, + "learning_rate": "1.7564e-04", + "loss": 0.6456, + "slid_loss": 0.7432, + "step": 3952, + "time": 13.17 + }, + { + "epoch": 1.98, + "learning_rate": "1.7562e-04", + "loss": 0.6822, + "slid_loss": 0.7432, + "step": 3953, + "time": 13.38 + }, + { + "epoch": 1.98, + "learning_rate": "1.7561e-04", + "loss": 0.6263, + "slid_loss": 0.7427, + "step": 3954, + "time": 11.96 + }, + { + "epoch": 1.98, + "learning_rate": "1.7559e-04", + "loss": 0.5526, + "slid_loss": 0.7382, + "step": 3955, + "time": 13.72 + }, + { + "epoch": 1.98, + "learning_rate": "1.7558e-04", + "loss": 0.6249, + "slid_loss": 0.7365, + "step": 3956, + "time": 12.72 + }, + { + "epoch": 1.98, + "learning_rate": "1.7556e-04", + "loss": 0.6782, + "slid_loss": 0.734, + "step": 3957, + "time": 13.04 + }, + { + "epoch": 1.98, + "learning_rate": "1.7555e-04", + "loss": 0.8345, + "slid_loss": 0.7347, + "step": 3958, + "time": 13.43 + }, + { + "epoch": 1.98, + "learning_rate": "1.7553e-04", + "loss": 0.7778, + "slid_loss": 0.7345, + "step": 3959, + "time": 11.3 + }, + { + "epoch": 1.98, + "learning_rate": "1.7552e-04", + "loss": 0.6275, + "slid_loss": 0.7336, + "step": 3960, + "time": 10.87 + }, + { + "epoch": 1.98, + "learning_rate": "1.7550e-04", + "loss": 0.5886, + "slid_loss": 0.7332, + "step": 3961, + "time": 12.2 + }, + { + "epoch": 1.98, + "learning_rate": "1.7549e-04", + "loss": 0.7216, + "slid_loss": 0.7349, + "step": 3962, + "time": 11.31 + }, + { + "epoch": 1.98, + "learning_rate": "1.7547e-04", + "loss": 0.724, + "slid_loss": 0.7333, + "step": 3963, + "time": 13.63 + }, + { + "epoch": 1.98, + "learning_rate": "1.7546e-04", + "loss": 0.6215, + "slid_loss": 0.7337, + "step": 3964, + "time": 12.95 + }, + { + "epoch": 1.99, + "learning_rate": "1.7544e-04", + "loss": 0.8335, + "slid_loss": 0.7333, + "step": 3965, + "time": 14.21 + }, + { + "epoch": 1.99, + "learning_rate": "1.7543e-04", + "loss": 0.7431, + "slid_loss": 0.7329, + "step": 3966, + "time": 12.9 + }, + { + "epoch": 1.99, + "learning_rate": "1.7541e-04", + "loss": 0.628, + "slid_loss": 0.7313, + "step": 3967, + "time": 10.46 + }, + { + "epoch": 1.99, + "learning_rate": "1.7540e-04", + "loss": 0.7685, + "slid_loss": 0.7325, + "step": 3968, + "time": 12.41 + }, + { + "epoch": 1.99, + "learning_rate": "1.7538e-04", + "loss": 0.6328, + "slid_loss": 0.7298, + "step": 3969, + "time": 13.52 + }, + { + "epoch": 1.99, + "learning_rate": "1.7537e-04", + "loss": 0.7036, + "slid_loss": 0.7299, + "step": 3970, + "time": 12.9 + }, + { + "epoch": 1.99, + "learning_rate": "1.7535e-04", + "loss": 0.756, + "slid_loss": 0.7292, + "step": 3971, + "time": 14.07 + }, + { + "epoch": 1.99, + "learning_rate": "1.7534e-04", + "loss": 0.7323, + "slid_loss": 0.728, + "step": 3972, + "time": 10.96 + }, + { + "epoch": 1.99, + "learning_rate": "1.7532e-04", + "loss": 0.7826, + "slid_loss": 0.7281, + "step": 3973, + "time": 13.16 + }, + { + "epoch": 1.99, + "learning_rate": "1.7531e-04", + "loss": 0.6164, + "slid_loss": 0.7264, + "step": 3974, + "time": 12.89 + }, + { + "epoch": 1.99, + "learning_rate": "1.7529e-04", + "loss": 0.7678, + "slid_loss": 0.7266, + "step": 3975, + "time": 11.48 + }, + { + "epoch": 1.99, + "learning_rate": "1.7528e-04", + "loss": 0.6015, + "slid_loss": 0.725, + "step": 3976, + "time": 13.76 + }, + { + "epoch": 1.99, + "learning_rate": "1.7526e-04", + "loss": 0.5581, + "slid_loss": 0.7227, + "step": 3977, + "time": 13.31 + }, + { + "epoch": 1.99, + "learning_rate": "1.7525e-04", + "loss": 0.7417, + "slid_loss": 0.7226, + "step": 3978, + "time": 11.8 + }, + { + "epoch": 1.99, + "learning_rate": "1.7523e-04", + "loss": 0.8371, + "slid_loss": 0.7229, + "step": 3979, + "time": 14.46 + }, + { + "epoch": 1.99, + "learning_rate": "1.7522e-04", + "loss": 0.7619, + "slid_loss": 0.7243, + "step": 3980, + "time": 11.41 + }, + { + "epoch": 1.99, + "learning_rate": "1.7520e-04", + "loss": 0.6988, + "slid_loss": 0.7229, + "step": 3981, + "time": 10.62 + }, + { + "epoch": 1.99, + "learning_rate": "1.7519e-04", + "loss": 0.7587, + "slid_loss": 0.7234, + "step": 3982, + "time": 12.37 + }, + { + "epoch": 1.99, + "learning_rate": "1.7517e-04", + "loss": 0.8445, + "slid_loss": 0.724, + "step": 3983, + "time": 13.53 + }, + { + "epoch": 1.99, + "learning_rate": "1.7516e-04", + "loss": 0.7631, + "slid_loss": 0.7251, + "step": 3984, + "time": 11.62 + }, + { + "epoch": 2.0, + "learning_rate": "1.7514e-04", + "loss": 0.5776, + "slid_loss": 0.7246, + "step": 3985, + "time": 12.34 + }, + { + "epoch": 2.0, + "learning_rate": "1.7513e-04", + "loss": 0.8152, + "slid_loss": 0.7257, + "step": 3986, + "time": 12.79 + }, + { + "epoch": 2.0, + "learning_rate": "1.7511e-04", + "loss": 0.7888, + "slid_loss": 0.7263, + "step": 3987, + "time": 11.94 + }, + { + "epoch": 2.0, + "learning_rate": "1.7510e-04", + "loss": 0.8136, + "slid_loss": 0.7279, + "step": 3988, + "time": 13.16 + }, + { + "epoch": 2.0, + "learning_rate": "1.7508e-04", + "loss": 0.7628, + "slid_loss": 0.7296, + "step": 3989, + "time": 11.51 + }, + { + "epoch": 2.0, + "learning_rate": "1.7507e-04", + "loss": 0.6691, + "slid_loss": 0.7291, + "step": 3990, + "time": 13.18 + }, + { + "epoch": 2.0, + "learning_rate": "1.7505e-04", + "loss": 0.7114, + "slid_loss": 0.7294, + "step": 3991, + "time": 11.27 + }, + { + "epoch": 2.0, + "learning_rate": "1.7504e-04", + "loss": 0.6687, + "slid_loss": 0.7296, + "step": 3992, + "time": 12.81 + }, + { + "epoch": 2.0, + "learning_rate": "1.7502e-04", + "loss": 0.7794, + "slid_loss": 0.7312, + "step": 3993, + "time": 12.67 + }, + { + "epoch": 2.0, + "learning_rate": "1.7501e-04", + "loss": 0.8311, + "slid_loss": 0.7333, + "step": 3994, + "time": 14.24 + }, + { + "epoch": 2.0, + "learning_rate": "1.7499e-04", + "loss": 0.6077, + "slid_loss": 0.7296, + "step": 3995, + "time": 170.16 + }, + { + "epoch": 2.0, + "learning_rate": "1.7497e-04", + "loss": 0.7179, + "slid_loss": 0.7292, + "step": 3996, + "time": 12.79 + }, + { + "epoch": 2.0, + "learning_rate": "1.7496e-04", + "loss": 0.7127, + "slid_loss": 0.7302, + "step": 3997, + "time": 13.45 + }, + { + "epoch": 2.0, + "learning_rate": "1.7494e-04", + "loss": 0.6584, + "slid_loss": 0.7299, + "step": 3998, + "time": 11.64 + }, + { + "epoch": 2.0, + "learning_rate": "1.7493e-04", + "loss": 0.6972, + "slid_loss": 0.7295, + "step": 3999, + "time": 11.59 + }, + { + "epoch": 2.0, + "learning_rate": "1.7491e-04", + "loss": 0.7958, + "slid_loss": 0.7321, + "step": 4000, + "time": 12.87 + }, + { + "epoch": 2.0, + "learning_rate": "1.7490e-04", + "loss": 0.6507, + "slid_loss": 0.7325, + "step": 4001, + "time": 13.31 + }, + { + "epoch": 2.0, + "learning_rate": "1.7488e-04", + "loss": 0.868, + "slid_loss": 0.735, + "step": 4002, + "time": 13.38 + }, + { + "epoch": 2.0, + "learning_rate": "1.7487e-04", + "loss": 0.8371, + "slid_loss": 0.7375, + "step": 4003, + "time": 11.69 + }, + { + "epoch": 2.01, + "learning_rate": "1.7485e-04", + "loss": 0.8231, + "slid_loss": 0.7384, + "step": 4004, + "time": 12.57 + }, + { + "epoch": 2.01, + "learning_rate": "1.7484e-04", + "loss": 0.8397, + "slid_loss": 0.7379, + "step": 4005, + "time": 11.89 + }, + { + "epoch": 2.01, + "learning_rate": "1.7482e-04", + "loss": 0.6705, + "slid_loss": 0.7369, + "step": 4006, + "time": 14.2 + }, + { + "epoch": 2.01, + "learning_rate": "1.7481e-04", + "loss": 0.8258, + "slid_loss": 0.7374, + "step": 4007, + "time": 13.54 + }, + { + "epoch": 2.01, + "learning_rate": "1.7479e-04", + "loss": 0.7419, + "slid_loss": 0.7386, + "step": 4008, + "time": 13.17 + }, + { + "epoch": 2.01, + "learning_rate": "1.7478e-04", + "loss": 0.7373, + "slid_loss": 0.7402, + "step": 4009, + "time": 13.15 + }, + { + "epoch": 2.01, + "learning_rate": "1.7476e-04", + "loss": 0.6474, + "slid_loss": 0.7399, + "step": 4010, + "time": 13.39 + }, + { + "epoch": 2.01, + "learning_rate": "1.7475e-04", + "loss": 0.7583, + "slid_loss": 0.7399, + "step": 4011, + "time": 12.36 + }, + { + "epoch": 2.01, + "learning_rate": "1.7473e-04", + "loss": 0.6187, + "slid_loss": 0.738, + "step": 4012, + "time": 14.45 + }, + { + "epoch": 2.01, + "learning_rate": "1.7472e-04", + "loss": 0.6339, + "slid_loss": 0.7387, + "step": 4013, + "time": 12.96 + }, + { + "epoch": 2.01, + "learning_rate": "1.7470e-04", + "loss": 0.7263, + "slid_loss": 0.7379, + "step": 4014, + "time": 11.09 + }, + { + "epoch": 2.01, + "learning_rate": "1.7469e-04", + "loss": 0.7197, + "slid_loss": 0.7377, + "step": 4015, + "time": 13.14 + }, + { + "epoch": 2.01, + "learning_rate": "1.7467e-04", + "loss": 0.7589, + "slid_loss": 0.737, + "step": 4016, + "time": 11.4 + }, + { + "epoch": 2.01, + "learning_rate": "1.7466e-04", + "loss": 0.6681, + "slid_loss": 0.7352, + "step": 4017, + "time": 10.83 + }, + { + "epoch": 2.01, + "learning_rate": "1.7464e-04", + "loss": 0.6357, + "slid_loss": 0.734, + "step": 4018, + "time": 13.76 + }, + { + "epoch": 2.01, + "learning_rate": "1.7463e-04", + "loss": 0.8594, + "slid_loss": 0.7342, + "step": 4019, + "time": 12.9 + }, + { + "epoch": 2.01, + "learning_rate": "1.7461e-04", + "loss": 0.6443, + "slid_loss": 0.7323, + "step": 4020, + "time": 11.03 + }, + { + "epoch": 2.01, + "learning_rate": "1.7460e-04", + "loss": 0.6274, + "slid_loss": 0.7313, + "step": 4021, + "time": 13.93 + }, + { + "epoch": 2.01, + "learning_rate": "1.7458e-04", + "loss": 0.8442, + "slid_loss": 0.7332, + "step": 4022, + "time": 10.93 + }, + { + "epoch": 2.01, + "learning_rate": "1.7456e-04", + "loss": 0.6053, + "slid_loss": 0.7315, + "step": 4023, + "time": 13.07 + }, + { + "epoch": 2.02, + "learning_rate": "1.7455e-04", + "loss": 0.5277, + "slid_loss": 0.7292, + "step": 4024, + "time": 10.89 + }, + { + "epoch": 2.02, + "learning_rate": "1.7453e-04", + "loss": 0.7179, + "slid_loss": 0.7295, + "step": 4025, + "time": 12.3 + }, + { + "epoch": 2.02, + "learning_rate": "1.7452e-04", + "loss": 0.797, + "slid_loss": 0.7296, + "step": 4026, + "time": 14.07 + }, + { + "epoch": 2.02, + "learning_rate": "1.7450e-04", + "loss": 0.8356, + "slid_loss": 0.73, + "step": 4027, + "time": 13.39 + }, + { + "epoch": 2.02, + "learning_rate": "1.7449e-04", + "loss": 0.5981, + "slid_loss": 0.7282, + "step": 4028, + "time": 14.3 + }, + { + "epoch": 2.02, + "learning_rate": "1.7447e-04", + "loss": 0.7733, + "slid_loss": 0.7282, + "step": 4029, + "time": 11.92 + }, + { + "epoch": 2.02, + "learning_rate": "1.7446e-04", + "loss": 0.7094, + "slid_loss": 0.7275, + "step": 4030, + "time": 13.2 + }, + { + "epoch": 2.02, + "learning_rate": "1.7444e-04", + "loss": 0.6286, + "slid_loss": 0.7264, + "step": 4031, + "time": 11.72 + }, + { + "epoch": 2.02, + "learning_rate": "1.7443e-04", + "loss": 0.7158, + "slid_loss": 0.7264, + "step": 4032, + "time": 13.25 + }, + { + "epoch": 2.02, + "learning_rate": "1.7441e-04", + "loss": 0.8492, + "slid_loss": 0.7263, + "step": 4033, + "time": 12.6 + }, + { + "epoch": 2.02, + "learning_rate": "1.7440e-04", + "loss": 0.7745, + "slid_loss": 0.727, + "step": 4034, + "time": 11.15 + }, + { + "epoch": 2.02, + "learning_rate": "1.7438e-04", + "loss": 0.7823, + "slid_loss": 0.727, + "step": 4035, + "time": 13.4 + }, + { + "epoch": 2.02, + "learning_rate": "1.7437e-04", + "loss": 0.8466, + "slid_loss": 0.7274, + "step": 4036, + "time": 13.58 + }, + { + "epoch": 2.02, + "learning_rate": "1.7435e-04", + "loss": 0.8469, + "slid_loss": 0.729, + "step": 4037, + "time": 13.78 + }, + { + "epoch": 2.02, + "learning_rate": "1.7434e-04", + "loss": 0.7221, + "slid_loss": 0.7282, + "step": 4038, + "time": 12.18 + }, + { + "epoch": 2.02, + "learning_rate": "1.7432e-04", + "loss": 0.7771, + "slid_loss": 0.7295, + "step": 4039, + "time": 10.92 + }, + { + "epoch": 2.02, + "learning_rate": "1.7431e-04", + "loss": 0.6376, + "slid_loss": 0.7287, + "step": 4040, + "time": 12.95 + }, + { + "epoch": 2.02, + "learning_rate": "1.7429e-04", + "loss": 0.7516, + "slid_loss": 0.7299, + "step": 4041, + "time": 11.16 + }, + { + "epoch": 2.02, + "learning_rate": "1.7427e-04", + "loss": 0.6119, + "slid_loss": 0.7275, + "step": 4042, + "time": 14.02 + }, + { + "epoch": 2.02, + "learning_rate": "1.7426e-04", + "loss": 0.8951, + "slid_loss": 0.7292, + "step": 4043, + "time": 11.25 + }, + { + "epoch": 2.03, + "learning_rate": "1.7424e-04", + "loss": 0.8529, + "slid_loss": 0.7294, + "step": 4044, + "time": 14.7 + }, + { + "epoch": 2.03, + "learning_rate": "1.7423e-04", + "loss": 0.7478, + "slid_loss": 0.729, + "step": 4045, + "time": 12.9 + }, + { + "epoch": 2.03, + "learning_rate": "1.7421e-04", + "loss": 0.6883, + "slid_loss": 0.7273, + "step": 4046, + "time": 14.29 + }, + { + "epoch": 2.03, + "learning_rate": "1.7420e-04", + "loss": 0.6609, + "slid_loss": 0.7256, + "step": 4047, + "time": 11.19 + }, + { + "epoch": 2.03, + "learning_rate": "1.7418e-04", + "loss": 0.9234, + "slid_loss": 0.7266, + "step": 4048, + "time": 11.29 + }, + { + "epoch": 2.03, + "learning_rate": "1.7417e-04", + "loss": 0.7488, + "slid_loss": 0.7261, + "step": 4049, + "time": 10.83 + }, + { + "epoch": 2.03, + "learning_rate": "1.7415e-04", + "loss": 0.6465, + "slid_loss": 0.7243, + "step": 4050, + "time": 12.68 + }, + { + "epoch": 2.03, + "learning_rate": "1.7414e-04", + "loss": 0.829, + "slid_loss": 0.7248, + "step": 4051, + "time": 11.32 + }, + { + "epoch": 2.03, + "learning_rate": "1.7412e-04", + "loss": 0.6485, + "slid_loss": 0.7248, + "step": 4052, + "time": 13.46 + }, + { + "epoch": 2.03, + "learning_rate": "1.7411e-04", + "loss": 0.6584, + "slid_loss": 0.7246, + "step": 4053, + "time": 11.63 + }, + { + "epoch": 2.03, + "learning_rate": "1.7409e-04", + "loss": 0.7225, + "slid_loss": 0.7256, + "step": 4054, + "time": 12.11 + }, + { + "epoch": 2.03, + "learning_rate": "1.7408e-04", + "loss": 0.7002, + "slid_loss": 0.727, + "step": 4055, + "time": 13.0 + }, + { + "epoch": 2.03, + "learning_rate": "1.7406e-04", + "loss": 0.7755, + "slid_loss": 0.7285, + "step": 4056, + "time": 13.59 + }, + { + "epoch": 2.03, + "learning_rate": "1.7405e-04", + "loss": 0.5969, + "slid_loss": 0.7277, + "step": 4057, + "time": 12.78 + }, + { + "epoch": 2.03, + "learning_rate": "1.7403e-04", + "loss": 0.8156, + "slid_loss": 0.7275, + "step": 4058, + "time": 13.11 + }, + { + "epoch": 2.03, + "learning_rate": "1.7401e-04", + "loss": 0.663, + "slid_loss": 0.7264, + "step": 4059, + "time": 12.89 + }, + { + "epoch": 2.03, + "learning_rate": "1.7400e-04", + "loss": 0.8681, + "slid_loss": 0.7288, + "step": 4060, + "time": 10.8 + }, + { + "epoch": 2.03, + "learning_rate": "1.7398e-04", + "loss": 0.6609, + "slid_loss": 0.7295, + "step": 4061, + "time": 14.2 + }, + { + "epoch": 2.03, + "learning_rate": "1.7397e-04", + "loss": 0.7667, + "slid_loss": 0.73, + "step": 4062, + "time": 10.9 + }, + { + "epoch": 2.03, + "learning_rate": "1.7395e-04", + "loss": 0.8074, + "slid_loss": 0.7308, + "step": 4063, + "time": 12.66 + }, + { + "epoch": 2.04, + "learning_rate": "1.7394e-04", + "loss": 0.7025, + "slid_loss": 0.7316, + "step": 4064, + "time": 11.24 + }, + { + "epoch": 2.04, + "learning_rate": "1.7392e-04", + "loss": 0.6565, + "slid_loss": 0.7298, + "step": 4065, + "time": 13.06 + }, + { + "epoch": 2.04, + "learning_rate": "1.7391e-04", + "loss": 0.759, + "slid_loss": 0.73, + "step": 4066, + "time": 11.6 + }, + { + "epoch": 2.04, + "learning_rate": "1.7389e-04", + "loss": 0.8317, + "slid_loss": 0.732, + "step": 4067, + "time": 11.63 + }, + { + "epoch": 2.04, + "learning_rate": "1.7388e-04", + "loss": 0.7343, + "slid_loss": 0.7317, + "step": 4068, + "time": 12.8 + }, + { + "epoch": 2.04, + "learning_rate": "1.7386e-04", + "loss": 0.635, + "slid_loss": 0.7317, + "step": 4069, + "time": 11.83 + }, + { + "epoch": 2.04, + "learning_rate": "1.7385e-04", + "loss": 0.8543, + "slid_loss": 0.7332, + "step": 4070, + "time": 11.65 + }, + { + "epoch": 2.04, + "learning_rate": "1.7383e-04", + "loss": 0.5268, + "slid_loss": 0.7309, + "step": 4071, + "time": 13.79 + }, + { + "epoch": 2.04, + "learning_rate": "1.7382e-04", + "loss": 0.7787, + "slid_loss": 0.7314, + "step": 4072, + "time": 13.17 + }, + { + "epoch": 2.04, + "learning_rate": "1.7380e-04", + "loss": 0.6385, + "slid_loss": 0.73, + "step": 4073, + "time": 11.43 + }, + { + "epoch": 2.04, + "learning_rate": "1.7378e-04", + "loss": 0.6491, + "slid_loss": 0.7303, + "step": 4074, + "time": 11.23 + }, + { + "epoch": 2.04, + "learning_rate": "1.7377e-04", + "loss": 0.7996, + "slid_loss": 0.7306, + "step": 4075, + "time": 11.03 + }, + { + "epoch": 2.04, + "learning_rate": "1.7375e-04", + "loss": 0.6386, + "slid_loss": 0.731, + "step": 4076, + "time": 13.94 + }, + { + "epoch": 2.04, + "learning_rate": "1.7374e-04", + "loss": 0.7088, + "slid_loss": 0.7325, + "step": 4077, + "time": 13.2 + }, + { + "epoch": 2.04, + "learning_rate": "1.7372e-04", + "loss": 0.7993, + "slid_loss": 0.7331, + "step": 4078, + "time": 13.58 + }, + { + "epoch": 2.04, + "learning_rate": "1.7371e-04", + "loss": 0.6942, + "slid_loss": 0.7316, + "step": 4079, + "time": 13.91 + }, + { + "epoch": 2.04, + "learning_rate": "1.7369e-04", + "loss": 0.6275, + "slid_loss": 0.7303, + "step": 4080, + "time": 12.49 + }, + { + "epoch": 2.04, + "learning_rate": "1.7368e-04", + "loss": 0.6089, + "slid_loss": 0.7294, + "step": 4081, + "time": 11.94 + }, + { + "epoch": 2.04, + "learning_rate": "1.7366e-04", + "loss": 0.5715, + "slid_loss": 0.7275, + "step": 4082, + "time": 12.89 + }, + { + "epoch": 2.04, + "learning_rate": "1.7365e-04", + "loss": 0.6844, + "slid_loss": 0.7259, + "step": 4083, + "time": 11.56 + }, + { + "epoch": 2.05, + "learning_rate": "1.7363e-04", + "loss": 0.7096, + "slid_loss": 0.7254, + "step": 4084, + "time": 13.62 + }, + { + "epoch": 2.05, + "learning_rate": "1.7362e-04", + "loss": 0.8115, + "slid_loss": 0.7277, + "step": 4085, + "time": 13.6 + }, + { + "epoch": 2.05, + "learning_rate": "1.7360e-04", + "loss": 0.7572, + "slid_loss": 0.7271, + "step": 4086, + "time": 13.05 + }, + { + "epoch": 2.05, + "learning_rate": "1.7358e-04", + "loss": 0.8453, + "slid_loss": 0.7277, + "step": 4087, + "time": 11.07 + }, + { + "epoch": 2.05, + "learning_rate": "1.7357e-04", + "loss": 0.6923, + "slid_loss": 0.7265, + "step": 4088, + "time": 13.08 + }, + { + "epoch": 2.05, + "learning_rate": "1.7355e-04", + "loss": 0.6716, + "slid_loss": 0.7256, + "step": 4089, + "time": 10.61 + }, + { + "epoch": 2.05, + "learning_rate": "1.7354e-04", + "loss": 0.7094, + "slid_loss": 0.726, + "step": 4090, + "time": 11.39 + }, + { + "epoch": 2.05, + "learning_rate": "1.7352e-04", + "loss": 0.7551, + "slid_loss": 0.7264, + "step": 4091, + "time": 13.82 + }, + { + "epoch": 2.05, + "learning_rate": "1.7351e-04", + "loss": 0.7625, + "slid_loss": 0.7274, + "step": 4092, + "time": 13.54 + }, + { + "epoch": 2.05, + "learning_rate": "1.7349e-04", + "loss": 0.6617, + "slid_loss": 0.7262, + "step": 4093, + "time": 13.47 + }, + { + "epoch": 2.05, + "learning_rate": "1.7348e-04", + "loss": 0.7442, + "slid_loss": 0.7253, + "step": 4094, + "time": 11.58 + }, + { + "epoch": 2.05, + "learning_rate": "1.7346e-04", + "loss": 0.6292, + "slid_loss": 0.7255, + "step": 4095, + "time": 13.02 + }, + { + "epoch": 2.05, + "learning_rate": "1.7345e-04", + "loss": 0.7298, + "slid_loss": 0.7256, + "step": 4096, + "time": 11.42 + }, + { + "epoch": 2.05, + "learning_rate": "1.7343e-04", + "loss": 0.7951, + "slid_loss": 0.7265, + "step": 4097, + "time": 13.39 + }, + { + "epoch": 2.05, + "learning_rate": "1.7341e-04", + "loss": 0.8837, + "slid_loss": 0.7287, + "step": 4098, + "time": 13.29 + }, + { + "epoch": 2.05, + "learning_rate": "1.7340e-04", + "loss": 0.7118, + "slid_loss": 0.7289, + "step": 4099, + "time": 12.11 + }, + { + "epoch": 2.05, + "learning_rate": "1.7338e-04", + "loss": 0.832, + "slid_loss": 0.7292, + "step": 4100, + "time": 12.14 + }, + { + "epoch": 2.05, + "learning_rate": "1.7337e-04", + "loss": 0.7713, + "slid_loss": 0.7304, + "step": 4101, + "time": 11.45 + }, + { + "epoch": 2.05, + "learning_rate": "1.7335e-04", + "loss": 0.7163, + "slid_loss": 0.7289, + "step": 4102, + "time": 11.42 + }, + { + "epoch": 2.05, + "learning_rate": "1.7334e-04", + "loss": 0.5479, + "slid_loss": 0.726, + "step": 4103, + "time": 13.72 + }, + { + "epoch": 2.06, + "learning_rate": "1.7332e-04", + "loss": 0.8541, + "slid_loss": 0.7263, + "step": 4104, + "time": 12.72 + }, + { + "epoch": 2.06, + "learning_rate": "1.7331e-04", + "loss": 0.7504, + "slid_loss": 0.7254, + "step": 4105, + "time": 11.27 + }, + { + "epoch": 2.06, + "learning_rate": "1.7329e-04", + "loss": 0.5868, + "slid_loss": 0.7246, + "step": 4106, + "time": 13.08 + }, + { + "epoch": 2.06, + "learning_rate": "1.7328e-04", + "loss": 0.5323, + "slid_loss": 0.7217, + "step": 4107, + "time": 11.83 + }, + { + "epoch": 2.06, + "learning_rate": "1.7326e-04", + "loss": 0.691, + "slid_loss": 0.7212, + "step": 4108, + "time": 11.69 + }, + { + "epoch": 2.06, + "learning_rate": "1.7324e-04", + "loss": 0.7255, + "slid_loss": 0.721, + "step": 4109, + "time": 12.16 + }, + { + "epoch": 2.06, + "learning_rate": "1.7323e-04", + "loss": 0.8383, + "slid_loss": 0.7229, + "step": 4110, + "time": 13.33 + }, + { + "epoch": 2.06, + "learning_rate": "1.7321e-04", + "loss": 0.8843, + "slid_loss": 0.7242, + "step": 4111, + "time": 13.51 + }, + { + "epoch": 2.06, + "learning_rate": "1.7320e-04", + "loss": 0.6561, + "slid_loss": 0.7246, + "step": 4112, + "time": 12.99 + }, + { + "epoch": 2.06, + "learning_rate": "1.7318e-04", + "loss": 0.745, + "slid_loss": 0.7257, + "step": 4113, + "time": 13.55 + }, + { + "epoch": 2.06, + "learning_rate": "1.7317e-04", + "loss": 0.6923, + "slid_loss": 0.7254, + "step": 4114, + "time": 14.01 + }, + { + "epoch": 2.06, + "learning_rate": "1.7315e-04", + "loss": 0.6676, + "slid_loss": 0.7248, + "step": 4115, + "time": 13.1 + }, + { + "epoch": 2.06, + "learning_rate": "1.7314e-04", + "loss": 0.7274, + "slid_loss": 0.7245, + "step": 4116, + "time": 13.3 + }, + { + "epoch": 2.06, + "learning_rate": "1.7312e-04", + "loss": 0.4851, + "slid_loss": 0.7227, + "step": 4117, + "time": 13.67 + }, + { + "epoch": 2.06, + "learning_rate": "1.7311e-04", + "loss": 0.7732, + "slid_loss": 0.7241, + "step": 4118, + "time": 11.47 + }, + { + "epoch": 2.06, + "learning_rate": "1.7309e-04", + "loss": 0.7079, + "slid_loss": 0.7226, + "step": 4119, + "time": 11.41 + }, + { + "epoch": 2.06, + "learning_rate": "1.7307e-04", + "loss": 0.6487, + "slid_loss": 0.7226, + "step": 4120, + "time": 14.33 + }, + { + "epoch": 2.06, + "learning_rate": "1.7306e-04", + "loss": 0.6718, + "slid_loss": 0.723, + "step": 4121, + "time": 11.1 + }, + { + "epoch": 2.06, + "learning_rate": "1.7304e-04", + "loss": 0.86, + "slid_loss": 0.7232, + "step": 4122, + "time": 11.42 + }, + { + "epoch": 2.06, + "learning_rate": "1.7303e-04", + "loss": 0.7135, + "slid_loss": 0.7243, + "step": 4123, + "time": 13.73 + }, + { + "epoch": 2.07, + "learning_rate": "1.7301e-04", + "loss": 0.6567, + "slid_loss": 0.7256, + "step": 4124, + "time": 12.32 + }, + { + "epoch": 2.07, + "learning_rate": "1.7300e-04", + "loss": 0.7961, + "slid_loss": 0.7263, + "step": 4125, + "time": 11.27 + }, + { + "epoch": 2.07, + "learning_rate": "1.7298e-04", + "loss": 0.7685, + "slid_loss": 0.7261, + "step": 4126, + "time": 11.69 + }, + { + "epoch": 2.07, + "learning_rate": "1.7297e-04", + "loss": 0.7345, + "slid_loss": 0.7251, + "step": 4127, + "time": 13.62 + }, + { + "epoch": 2.07, + "learning_rate": "1.7295e-04", + "loss": 0.8042, + "slid_loss": 0.7271, + "step": 4128, + "time": 13.36 + }, + { + "epoch": 2.07, + "learning_rate": "1.7293e-04", + "loss": 0.7911, + "slid_loss": 0.7273, + "step": 4129, + "time": 11.61 + }, + { + "epoch": 2.07, + "learning_rate": "1.7292e-04", + "loss": 0.5959, + "slid_loss": 0.7262, + "step": 4130, + "time": 12.13 + }, + { + "epoch": 2.07, + "learning_rate": "1.7290e-04", + "loss": 0.5307, + "slid_loss": 0.7252, + "step": 4131, + "time": 10.36 + }, + { + "epoch": 2.07, + "learning_rate": "1.7289e-04", + "loss": 0.6656, + "slid_loss": 0.7247, + "step": 4132, + "time": 13.72 + }, + { + "epoch": 2.07, + "learning_rate": "1.7287e-04", + "loss": 0.7083, + "slid_loss": 0.7233, + "step": 4133, + "time": 10.49 + }, + { + "epoch": 2.07, + "learning_rate": "1.7286e-04", + "loss": 0.6819, + "slid_loss": 0.7223, + "step": 4134, + "time": 13.2 + }, + { + "epoch": 2.07, + "learning_rate": "1.7284e-04", + "loss": 0.7088, + "slid_loss": 0.7216, + "step": 4135, + "time": 10.63 + }, + { + "epoch": 2.07, + "learning_rate": "1.7283e-04", + "loss": 0.7952, + "slid_loss": 0.7211, + "step": 4136, + "time": 13.07 + }, + { + "epoch": 2.07, + "learning_rate": "1.7281e-04", + "loss": 0.7528, + "slid_loss": 0.7202, + "step": 4137, + "time": 12.86 + }, + { + "epoch": 2.07, + "learning_rate": "1.7279e-04", + "loss": 0.7777, + "slid_loss": 0.7207, + "step": 4138, + "time": 12.85 + }, + { + "epoch": 2.07, + "learning_rate": "1.7278e-04", + "loss": 0.6641, + "slid_loss": 0.7196, + "step": 4139, + "time": 13.64 + }, + { + "epoch": 2.07, + "learning_rate": "1.7276e-04", + "loss": 0.5647, + "slid_loss": 0.7188, + "step": 4140, + "time": 13.32 + }, + { + "epoch": 2.07, + "learning_rate": "1.7275e-04", + "loss": 0.5885, + "slid_loss": 0.7172, + "step": 4141, + "time": 11.56 + }, + { + "epoch": 2.07, + "learning_rate": "1.7273e-04", + "loss": 0.7529, + "slid_loss": 0.7186, + "step": 4142, + "time": 11.82 + }, + { + "epoch": 2.07, + "learning_rate": "1.7272e-04", + "loss": 0.6841, + "slid_loss": 0.7165, + "step": 4143, + "time": 11.79 + }, + { + "epoch": 2.08, + "learning_rate": "1.7270e-04", + "loss": 0.6566, + "slid_loss": 0.7146, + "step": 4144, + "time": 11.13 + }, + { + "epoch": 2.08, + "learning_rate": "1.7269e-04", + "loss": 0.7284, + "slid_loss": 0.7144, + "step": 4145, + "time": 11.15 + }, + { + "epoch": 2.08, + "learning_rate": "1.7267e-04", + "loss": 0.7159, + "slid_loss": 0.7146, + "step": 4146, + "time": 11.64 + }, + { + "epoch": 2.08, + "learning_rate": "1.7265e-04", + "loss": 0.742, + "slid_loss": 0.7154, + "step": 4147, + "time": 13.72 + }, + { + "epoch": 2.08, + "learning_rate": "1.7264e-04", + "loss": 0.6177, + "slid_loss": 0.7124, + "step": 4148, + "time": 12.18 + }, + { + "epoch": 2.08, + "learning_rate": "1.7262e-04", + "loss": 0.7239, + "slid_loss": 0.7121, + "step": 4149, + "time": 13.48 + }, + { + "epoch": 2.08, + "learning_rate": "1.7261e-04", + "loss": 0.675, + "slid_loss": 0.7124, + "step": 4150, + "time": 10.67 + }, + { + "epoch": 2.08, + "learning_rate": "1.7259e-04", + "loss": 0.5962, + "slid_loss": 0.7101, + "step": 4151, + "time": 12.87 + }, + { + "epoch": 2.08, + "learning_rate": "1.7258e-04", + "loss": 0.7157, + "slid_loss": 0.7108, + "step": 4152, + "time": 10.93 + }, + { + "epoch": 2.08, + "learning_rate": "1.7256e-04", + "loss": 0.6343, + "slid_loss": 0.7105, + "step": 4153, + "time": 10.98 + }, + { + "epoch": 2.08, + "learning_rate": "1.7255e-04", + "loss": 0.7683, + "slid_loss": 0.711, + "step": 4154, + "time": 13.16 + }, + { + "epoch": 2.08, + "learning_rate": "1.7253e-04", + "loss": 0.82, + "slid_loss": 0.7122, + "step": 4155, + "time": 11.06 + }, + { + "epoch": 2.08, + "learning_rate": "1.7251e-04", + "loss": 0.5832, + "slid_loss": 0.7103, + "step": 4156, + "time": 13.45 + }, + { + "epoch": 2.08, + "learning_rate": "1.7250e-04", + "loss": 0.722, + "slid_loss": 0.7115, + "step": 4157, + "time": 11.38 + }, + { + "epoch": 2.08, + "learning_rate": "1.7248e-04", + "loss": 0.9995, + "slid_loss": 0.7134, + "step": 4158, + "time": 12.69 + }, + { + "epoch": 2.08, + "learning_rate": "1.7247e-04", + "loss": 0.7671, + "slid_loss": 0.7144, + "step": 4159, + "time": 10.69 + }, + { + "epoch": 2.08, + "learning_rate": "1.7245e-04", + "loss": 0.8149, + "slid_loss": 0.7139, + "step": 4160, + "time": 13.59 + }, + { + "epoch": 2.08, + "learning_rate": "1.7244e-04", + "loss": 0.7889, + "slid_loss": 0.7151, + "step": 4161, + "time": 13.16 + }, + { + "epoch": 2.08, + "learning_rate": "1.7242e-04", + "loss": 0.7972, + "slid_loss": 0.7154, + "step": 4162, + "time": 12.83 + }, + { + "epoch": 2.08, + "learning_rate": "1.7240e-04", + "loss": 0.6779, + "slid_loss": 0.7142, + "step": 4163, + "time": 13.57 + }, + { + "epoch": 2.09, + "learning_rate": "1.7239e-04", + "loss": 0.7179, + "slid_loss": 0.7143, + "step": 4164, + "time": 13.13 + }, + { + "epoch": 2.09, + "learning_rate": "1.7237e-04", + "loss": 0.6573, + "slid_loss": 0.7143, + "step": 4165, + "time": 11.82 + }, + { + "epoch": 2.09, + "learning_rate": "1.7236e-04", + "loss": 0.6633, + "slid_loss": 0.7134, + "step": 4166, + "time": 11.27 + }, + { + "epoch": 2.09, + "learning_rate": "1.7234e-04", + "loss": 0.7158, + "slid_loss": 0.7122, + "step": 4167, + "time": 11.36 + }, + { + "epoch": 2.09, + "learning_rate": "1.7233e-04", + "loss": 0.7512, + "slid_loss": 0.7124, + "step": 4168, + "time": 13.77 + }, + { + "epoch": 2.09, + "learning_rate": "1.7231e-04", + "loss": 0.6658, + "slid_loss": 0.7127, + "step": 4169, + "time": 10.26 + }, + { + "epoch": 2.09, + "learning_rate": "1.7230e-04", + "loss": 0.6462, + "slid_loss": 0.7106, + "step": 4170, + "time": 12.58 + }, + { + "epoch": 2.09, + "learning_rate": "1.7228e-04", + "loss": 0.6651, + "slid_loss": 0.712, + "step": 4171, + "time": 11.61 + }, + { + "epoch": 2.09, + "learning_rate": "1.7226e-04", + "loss": 0.7211, + "slid_loss": 0.7114, + "step": 4172, + "time": 10.99 + }, + { + "epoch": 2.09, + "learning_rate": "1.7225e-04", + "loss": 0.714, + "slid_loss": 0.7122, + "step": 4173, + "time": 10.97 + }, + { + "epoch": 2.09, + "learning_rate": "1.7223e-04", + "loss": 0.7618, + "slid_loss": 0.7133, + "step": 4174, + "time": 13.11 + }, + { + "epoch": 2.09, + "learning_rate": "1.7222e-04", + "loss": 0.646, + "slid_loss": 0.7117, + "step": 4175, + "time": 12.82 + }, + { + "epoch": 2.09, + "learning_rate": "1.7220e-04", + "loss": 0.7102, + "slid_loss": 0.7125, + "step": 4176, + "time": 12.07 + }, + { + "epoch": 2.09, + "learning_rate": "1.7219e-04", + "loss": 0.7582, + "slid_loss": 0.713, + "step": 4177, + "time": 11.32 + }, + { + "epoch": 2.09, + "learning_rate": "1.7217e-04", + "loss": 0.7222, + "slid_loss": 0.7122, + "step": 4178, + "time": 10.33 + }, + { + "epoch": 2.09, + "learning_rate": "1.7215e-04", + "loss": 0.839, + "slid_loss": 0.7136, + "step": 4179, + "time": 13.24 + }, + { + "epoch": 2.09, + "learning_rate": "1.7214e-04", + "loss": 0.6872, + "slid_loss": 0.7142, + "step": 4180, + "time": 11.17 + }, + { + "epoch": 2.09, + "learning_rate": "1.7212e-04", + "loss": 0.891, + "slid_loss": 0.7171, + "step": 4181, + "time": 12.64 + }, + { + "epoch": 2.09, + "learning_rate": "1.7211e-04", + "loss": 0.8297, + "slid_loss": 0.7196, + "step": 4182, + "time": 12.21 + }, + { + "epoch": 2.09, + "learning_rate": "1.7209e-04", + "loss": 0.6106, + "slid_loss": 0.7189, + "step": 4183, + "time": 13.39 + }, + { + "epoch": 2.1, + "learning_rate": "1.7208e-04", + "loss": 0.6809, + "slid_loss": 0.7186, + "step": 4184, + "time": 10.86 + }, + { + "epoch": 2.1, + "learning_rate": "1.7206e-04", + "loss": 0.6963, + "slid_loss": 0.7175, + "step": 4185, + "time": 12.83 + }, + { + "epoch": 2.1, + "learning_rate": "1.7204e-04", + "loss": 0.7905, + "slid_loss": 0.7178, + "step": 4186, + "time": 13.02 + }, + { + "epoch": 2.1, + "learning_rate": "1.7203e-04", + "loss": 0.7086, + "slid_loss": 0.7164, + "step": 4187, + "time": 13.78 + }, + { + "epoch": 2.1, + "learning_rate": "1.7201e-04", + "loss": 0.6608, + "slid_loss": 0.7161, + "step": 4188, + "time": 13.8 + }, + { + "epoch": 2.1, + "learning_rate": "1.7200e-04", + "loss": 0.7068, + "slid_loss": 0.7165, + "step": 4189, + "time": 12.91 + }, + { + "epoch": 2.1, + "learning_rate": "1.7198e-04", + "loss": 0.8759, + "slid_loss": 0.7181, + "step": 4190, + "time": 11.24 + }, + { + "epoch": 2.1, + "learning_rate": "1.7197e-04", + "loss": 0.76, + "slid_loss": 0.7182, + "step": 4191, + "time": 13.43 + }, + { + "epoch": 2.1, + "learning_rate": "1.7195e-04", + "loss": 0.6571, + "slid_loss": 0.7171, + "step": 4192, + "time": 13.61 + }, + { + "epoch": 2.1, + "learning_rate": "1.7193e-04", + "loss": 0.8185, + "slid_loss": 0.7187, + "step": 4193, + "time": 12.83 + }, + { + "epoch": 2.1, + "learning_rate": "1.7192e-04", + "loss": 0.8, + "slid_loss": 0.7192, + "step": 4194, + "time": 13.38 + }, + { + "epoch": 2.1, + "learning_rate": "1.7190e-04", + "loss": 0.664, + "slid_loss": 0.7196, + "step": 4195, + "time": 13.22 + }, + { + "epoch": 2.1, + "learning_rate": "1.7189e-04", + "loss": 0.8067, + "slid_loss": 0.7204, + "step": 4196, + "time": 12.16 + }, + { + "epoch": 2.1, + "learning_rate": "1.7187e-04", + "loss": 0.8796, + "slid_loss": 0.7212, + "step": 4197, + "time": 13.98 + }, + { + "epoch": 2.1, + "learning_rate": "1.7186e-04", + "loss": 0.7627, + "slid_loss": 0.72, + "step": 4198, + "time": 13.23 + }, + { + "epoch": 2.1, + "learning_rate": "1.7184e-04", + "loss": 0.6138, + "slid_loss": 0.719, + "step": 4199, + "time": 12.85 + }, + { + "epoch": 2.1, + "learning_rate": "1.7182e-04", + "loss": 0.6242, + "slid_loss": 0.7169, + "step": 4200, + "time": 11.99 + }, + { + "epoch": 2.1, + "learning_rate": "1.7181e-04", + "loss": 0.7837, + "slid_loss": 0.7171, + "step": 4201, + "time": 12.76 + }, + { + "epoch": 2.1, + "learning_rate": "1.7179e-04", + "loss": 0.7522, + "slid_loss": 0.7174, + "step": 4202, + "time": 13.43 + }, + { + "epoch": 2.1, + "learning_rate": "1.7178e-04", + "loss": 0.7869, + "slid_loss": 0.7198, + "step": 4203, + "time": 10.85 + }, + { + "epoch": 2.11, + "learning_rate": "1.7176e-04", + "loss": 0.8358, + "slid_loss": 0.7196, + "step": 4204, + "time": 13.68 + }, + { + "epoch": 2.11, + "learning_rate": "1.7175e-04", + "loss": 0.6348, + "slid_loss": 0.7185, + "step": 4205, + "time": 13.39 + }, + { + "epoch": 2.11, + "learning_rate": "1.7173e-04", + "loss": 0.6996, + "slid_loss": 0.7196, + "step": 4206, + "time": 10.71 + }, + { + "epoch": 2.11, + "learning_rate": "1.7171e-04", + "loss": 0.653, + "slid_loss": 0.7208, + "step": 4207, + "time": 12.83 + }, + { + "epoch": 2.11, + "learning_rate": "1.7170e-04", + "loss": 0.5925, + "slid_loss": 0.7198, + "step": 4208, + "time": 13.12 + }, + { + "epoch": 2.11, + "learning_rate": "1.7168e-04", + "loss": 0.6147, + "slid_loss": 0.7187, + "step": 4209, + "time": 12.95 + }, + { + "epoch": 2.11, + "learning_rate": "1.7167e-04", + "loss": 0.6657, + "slid_loss": 0.717, + "step": 4210, + "time": 11.47 + }, + { + "epoch": 2.11, + "learning_rate": "1.7165e-04", + "loss": 0.7575, + "slid_loss": 0.7157, + "step": 4211, + "time": 12.67 + }, + { + "epoch": 2.11, + "learning_rate": "1.7164e-04", + "loss": 0.6915, + "slid_loss": 0.7161, + "step": 4212, + "time": 13.17 + }, + { + "epoch": 2.11, + "learning_rate": "1.7162e-04", + "loss": 0.7729, + "slid_loss": 0.7164, + "step": 4213, + "time": 13.28 + }, + { + "epoch": 2.11, + "learning_rate": "1.7160e-04", + "loss": 0.8321, + "slid_loss": 0.7177, + "step": 4214, + "time": 13.36 + }, + { + "epoch": 2.11, + "learning_rate": "1.7159e-04", + "loss": 0.7236, + "slid_loss": 0.7183, + "step": 4215, + "time": 11.32 + }, + { + "epoch": 2.11, + "learning_rate": "1.7157e-04", + "loss": 0.7408, + "slid_loss": 0.7184, + "step": 4216, + "time": 13.16 + }, + { + "epoch": 2.11, + "learning_rate": "1.7156e-04", + "loss": 0.783, + "slid_loss": 0.7214, + "step": 4217, + "time": 13.37 + }, + { + "epoch": 2.11, + "learning_rate": "1.7154e-04", + "loss": 0.6848, + "slid_loss": 0.7205, + "step": 4218, + "time": 11.43 + }, + { + "epoch": 2.11, + "learning_rate": "1.7153e-04", + "loss": 0.8254, + "slid_loss": 0.7217, + "step": 4219, + "time": 12.2 + }, + { + "epoch": 2.11, + "learning_rate": "1.7151e-04", + "loss": 0.69, + "slid_loss": 0.7221, + "step": 4220, + "time": 10.83 + }, + { + "epoch": 2.11, + "learning_rate": "1.7149e-04", + "loss": 0.7304, + "slid_loss": 0.7227, + "step": 4221, + "time": 11.93 + }, + { + "epoch": 2.11, + "learning_rate": "1.7148e-04", + "loss": 0.8622, + "slid_loss": 0.7227, + "step": 4222, + "time": 10.33 + }, + { + "epoch": 2.11, + "learning_rate": "1.7146e-04", + "loss": 0.7003, + "slid_loss": 0.7226, + "step": 4223, + "time": 11.36 + }, + { + "epoch": 2.12, + "learning_rate": "1.7145e-04", + "loss": 0.6761, + "slid_loss": 0.7228, + "step": 4224, + "time": 13.39 + }, + { + "epoch": 2.12, + "learning_rate": "1.7143e-04", + "loss": 0.7693, + "slid_loss": 0.7225, + "step": 4225, + "time": 10.96 + }, + { + "epoch": 2.12, + "learning_rate": "1.7141e-04", + "loss": 0.7474, + "slid_loss": 0.7223, + "step": 4226, + "time": 12.69 + }, + { + "epoch": 2.12, + "learning_rate": "1.7140e-04", + "loss": 0.7149, + "slid_loss": 0.7221, + "step": 4227, + "time": 12.83 + }, + { + "epoch": 2.12, + "learning_rate": "1.7138e-04", + "loss": 0.5531, + "slid_loss": 0.7196, + "step": 4228, + "time": 12.97 + }, + { + "epoch": 2.12, + "learning_rate": "1.7137e-04", + "loss": 0.6449, + "slid_loss": 0.7181, + "step": 4229, + "time": 12.93 + }, + { + "epoch": 2.12, + "learning_rate": "1.7135e-04", + "loss": 0.7567, + "slid_loss": 0.7198, + "step": 4230, + "time": 11.39 + }, + { + "epoch": 2.12, + "learning_rate": "1.7134e-04", + "loss": 0.8074, + "slid_loss": 0.7225, + "step": 4231, + "time": 11.66 + }, + { + "epoch": 2.12, + "learning_rate": "1.7132e-04", + "loss": 0.7107, + "slid_loss": 0.723, + "step": 4232, + "time": 11.35 + }, + { + "epoch": 2.12, + "learning_rate": "1.7130e-04", + "loss": 0.7219, + "slid_loss": 0.7231, + "step": 4233, + "time": 12.86 + }, + { + "epoch": 2.12, + "learning_rate": "1.7129e-04", + "loss": 0.6765, + "slid_loss": 0.7231, + "step": 4234, + "time": 10.87 + }, + { + "epoch": 2.12, + "learning_rate": "1.7127e-04", + "loss": 0.6324, + "slid_loss": 0.7223, + "step": 4235, + "time": 13.18 + }, + { + "epoch": 2.12, + "learning_rate": "1.7126e-04", + "loss": 0.6221, + "slid_loss": 0.7206, + "step": 4236, + "time": 11.21 + }, + { + "epoch": 2.12, + "learning_rate": "1.7124e-04", + "loss": 0.6834, + "slid_loss": 0.7199, + "step": 4237, + "time": 13.42 + }, + { + "epoch": 2.12, + "learning_rate": "1.7123e-04", + "loss": 0.7396, + "slid_loss": 0.7195, + "step": 4238, + "time": 11.46 + }, + { + "epoch": 2.12, + "learning_rate": "1.7121e-04", + "loss": 0.7058, + "slid_loss": 0.7199, + "step": 4239, + "time": 12.97 + }, + { + "epoch": 2.12, + "learning_rate": "1.7119e-04", + "loss": 0.7905, + "slid_loss": 0.7222, + "step": 4240, + "time": 13.35 + }, + { + "epoch": 2.12, + "learning_rate": "1.7118e-04", + "loss": 0.7277, + "slid_loss": 0.7236, + "step": 4241, + "time": 13.39 + }, + { + "epoch": 2.12, + "learning_rate": "1.7116e-04", + "loss": 0.6767, + "slid_loss": 0.7228, + "step": 4242, + "time": 10.88 + }, + { + "epoch": 2.12, + "learning_rate": "1.7115e-04", + "loss": 0.7178, + "slid_loss": 0.7231, + "step": 4243, + "time": 12.03 + }, + { + "epoch": 2.13, + "learning_rate": "1.7113e-04", + "loss": 0.6273, + "slid_loss": 0.7228, + "step": 4244, + "time": 12.25 + }, + { + "epoch": 2.13, + "learning_rate": "1.7111e-04", + "loss": 0.9494, + "slid_loss": 0.725, + "step": 4245, + "time": 11.05 + }, + { + "epoch": 2.13, + "learning_rate": "1.7110e-04", + "loss": 0.6511, + "slid_loss": 0.7244, + "step": 4246, + "time": 13.1 + }, + { + "epoch": 2.13, + "learning_rate": "1.7108e-04", + "loss": 0.8094, + "slid_loss": 0.7251, + "step": 4247, + "time": 10.8 + }, + { + "epoch": 2.13, + "learning_rate": "1.7107e-04", + "loss": 0.7982, + "slid_loss": 0.7269, + "step": 4248, + "time": 13.27 + }, + { + "epoch": 2.13, + "learning_rate": "1.7105e-04", + "loss": 0.6635, + "slid_loss": 0.7263, + "step": 4249, + "time": 11.91 + }, + { + "epoch": 2.13, + "learning_rate": "1.7103e-04", + "loss": 0.5876, + "slid_loss": 0.7254, + "step": 4250, + "time": 10.88 + }, + { + "epoch": 2.13, + "learning_rate": "1.7102e-04", + "loss": 0.6043, + "slid_loss": 0.7255, + "step": 4251, + "time": 13.59 + }, + { + "epoch": 2.13, + "learning_rate": "1.7100e-04", + "loss": 0.7717, + "slid_loss": 0.726, + "step": 4252, + "time": 13.33 + }, + { + "epoch": 2.13, + "learning_rate": "1.7099e-04", + "loss": 0.582, + "slid_loss": 0.7255, + "step": 4253, + "time": 13.77 + }, + { + "epoch": 2.13, + "learning_rate": "1.7097e-04", + "loss": 0.902, + "slid_loss": 0.7269, + "step": 4254, + "time": 13.97 + }, + { + "epoch": 2.13, + "learning_rate": "1.7096e-04", + "loss": 0.5284, + "slid_loss": 0.7239, + "step": 4255, + "time": 13.3 + }, + { + "epoch": 2.13, + "learning_rate": "1.7094e-04", + "loss": 0.5098, + "slid_loss": 0.7232, + "step": 4256, + "time": 13.82 + }, + { + "epoch": 2.13, + "learning_rate": "1.7092e-04", + "loss": 0.7658, + "slid_loss": 0.7236, + "step": 4257, + "time": 11.0 + }, + { + "epoch": 2.13, + "learning_rate": "1.7091e-04", + "loss": 0.6143, + "slid_loss": 0.7198, + "step": 4258, + "time": 13.33 + }, + { + "epoch": 2.13, + "learning_rate": "1.7089e-04", + "loss": 0.6279, + "slid_loss": 0.7184, + "step": 4259, + "time": 13.27 + }, + { + "epoch": 2.13, + "learning_rate": "1.7088e-04", + "loss": 0.6638, + "slid_loss": 0.7169, + "step": 4260, + "time": 11.41 + }, + { + "epoch": 2.13, + "learning_rate": "1.7086e-04", + "loss": 0.7205, + "slid_loss": 0.7162, + "step": 4261, + "time": 14.04 + }, + { + "epoch": 2.13, + "learning_rate": "1.7084e-04", + "loss": 0.5696, + "slid_loss": 0.7139, + "step": 4262, + "time": 13.07 + }, + { + "epoch": 2.13, + "learning_rate": "1.7083e-04", + "loss": 0.6773, + "slid_loss": 0.7139, + "step": 4263, + "time": 13.46 + }, + { + "epoch": 2.14, + "learning_rate": "1.7081e-04", + "loss": 0.7848, + "slid_loss": 0.7146, + "step": 4264, + "time": 12.89 + }, + { + "epoch": 2.14, + "learning_rate": "1.7080e-04", + "loss": 0.8348, + "slid_loss": 0.7164, + "step": 4265, + "time": 13.38 + }, + { + "epoch": 2.14, + "learning_rate": "1.7078e-04", + "loss": 0.5772, + "slid_loss": 0.7155, + "step": 4266, + "time": 10.89 + }, + { + "epoch": 2.14, + "learning_rate": "1.7076e-04", + "loss": 0.7467, + "slid_loss": 0.7158, + "step": 4267, + "time": 11.79 + }, + { + "epoch": 2.14, + "learning_rate": "1.7075e-04", + "loss": 0.632, + "slid_loss": 0.7146, + "step": 4268, + "time": 13.04 + }, + { + "epoch": 2.14, + "learning_rate": "1.7073e-04", + "loss": 0.7915, + "slid_loss": 0.7159, + "step": 4269, + "time": 11.25 + }, + { + "epoch": 2.14, + "learning_rate": "1.7072e-04", + "loss": 0.7261, + "slid_loss": 0.7167, + "step": 4270, + "time": 13.64 + }, + { + "epoch": 2.14, + "learning_rate": "1.7070e-04", + "loss": 0.621, + "slid_loss": 0.7162, + "step": 4271, + "time": 13.77 + }, + { + "epoch": 2.14, + "learning_rate": "1.7069e-04", + "loss": 0.8395, + "slid_loss": 0.7174, + "step": 4272, + "time": 13.35 + }, + { + "epoch": 2.14, + "learning_rate": "1.7067e-04", + "loss": 0.714, + "slid_loss": 0.7174, + "step": 4273, + "time": 11.36 + }, + { + "epoch": 2.14, + "learning_rate": "1.7065e-04", + "loss": 0.7414, + "slid_loss": 0.7172, + "step": 4274, + "time": 12.8 + }, + { + "epoch": 2.14, + "learning_rate": "1.7064e-04", + "loss": 0.4951, + "slid_loss": 0.7157, + "step": 4275, + "time": 11.85 + }, + { + "epoch": 2.14, + "learning_rate": "1.7062e-04", + "loss": 0.7614, + "slid_loss": 0.7162, + "step": 4276, + "time": 13.05 + }, + { + "epoch": 2.14, + "learning_rate": "1.7061e-04", + "loss": 0.5411, + "slid_loss": 0.714, + "step": 4277, + "time": 12.92 + }, + { + "epoch": 2.14, + "learning_rate": "1.7059e-04", + "loss": 0.6821, + "slid_loss": 0.7136, + "step": 4278, + "time": 14.28 + }, + { + "epoch": 2.14, + "learning_rate": "1.7057e-04", + "loss": 0.5926, + "slid_loss": 0.7112, + "step": 4279, + "time": 11.74 + }, + { + "epoch": 2.14, + "learning_rate": "1.7056e-04", + "loss": 0.7369, + "slid_loss": 0.7117, + "step": 4280, + "time": 12.75 + }, + { + "epoch": 2.14, + "learning_rate": "1.7054e-04", + "loss": 0.6928, + "slid_loss": 0.7097, + "step": 4281, + "time": 13.36 + }, + { + "epoch": 2.14, + "learning_rate": "1.7053e-04", + "loss": 0.6352, + "slid_loss": 0.7077, + "step": 4282, + "time": 11.96 + }, + { + "epoch": 2.14, + "learning_rate": "1.7051e-04", + "loss": 0.8227, + "slid_loss": 0.7099, + "step": 4283, + "time": 12.75 + }, + { + "epoch": 2.15, + "learning_rate": "1.7049e-04", + "loss": 0.717, + "slid_loss": 0.7102, + "step": 4284, + "time": 11.35 + }, + { + "epoch": 2.15, + "learning_rate": "1.7048e-04", + "loss": 0.6143, + "slid_loss": 0.7094, + "step": 4285, + "time": 14.13 + }, + { + "epoch": 2.15, + "learning_rate": "1.7046e-04", + "loss": 0.7843, + "slid_loss": 0.7093, + "step": 4286, + "time": 10.88 + }, + { + "epoch": 2.15, + "learning_rate": "1.7045e-04", + "loss": 0.6557, + "slid_loss": 0.7088, + "step": 4287, + "time": 13.62 + }, + { + "epoch": 2.15, + "learning_rate": "1.7043e-04", + "loss": 0.608, + "slid_loss": 0.7083, + "step": 4288, + "time": 12.81 + }, + { + "epoch": 2.15, + "learning_rate": "1.7041e-04", + "loss": 0.7253, + "slid_loss": 0.7085, + "step": 4289, + "time": 13.08 + }, + { + "epoch": 2.15, + "learning_rate": "1.7040e-04", + "loss": 0.6004, + "slid_loss": 0.7057, + "step": 4290, + "time": 12.79 + }, + { + "epoch": 2.15, + "learning_rate": "1.7038e-04", + "loss": 0.6556, + "slid_loss": 0.7047, + "step": 4291, + "time": 11.81 + }, + { + "epoch": 2.15, + "learning_rate": "1.7037e-04", + "loss": 0.7041, + "slid_loss": 0.7051, + "step": 4292, + "time": 11.21 + }, + { + "epoch": 2.15, + "learning_rate": "1.7035e-04", + "loss": 0.5958, + "slid_loss": 0.7029, + "step": 4293, + "time": 12.48 + }, + { + "epoch": 2.15, + "learning_rate": "1.7033e-04", + "loss": 0.7806, + "slid_loss": 0.7027, + "step": 4294, + "time": 11.37 + }, + { + "epoch": 2.15, + "learning_rate": "1.7032e-04", + "loss": 0.8378, + "slid_loss": 0.7045, + "step": 4295, + "time": 11.49 + }, + { + "epoch": 2.15, + "learning_rate": "1.7030e-04", + "loss": 0.6802, + "slid_loss": 0.7032, + "step": 4296, + "time": 12.86 + }, + { + "epoch": 2.15, + "learning_rate": "1.7029e-04", + "loss": 0.7087, + "slid_loss": 0.7015, + "step": 4297, + "time": 11.64 + }, + { + "epoch": 2.15, + "learning_rate": "1.7027e-04", + "loss": 0.7117, + "slid_loss": 0.701, + "step": 4298, + "time": 13.78 + }, + { + "epoch": 2.15, + "learning_rate": "1.7025e-04", + "loss": 0.6694, + "slid_loss": 0.7015, + "step": 4299, + "time": 13.78 + }, + { + "epoch": 2.15, + "learning_rate": "1.7024e-04", + "loss": 0.6782, + "slid_loss": 0.7021, + "step": 4300, + "time": 11.1 + }, + { + "epoch": 2.15, + "learning_rate": "1.7022e-04", + "loss": 0.7764, + "slid_loss": 0.702, + "step": 4301, + "time": 13.55 + }, + { + "epoch": 2.15, + "learning_rate": "1.7021e-04", + "loss": 0.8603, + "slid_loss": 0.7031, + "step": 4302, + "time": 13.58 + }, + { + "epoch": 2.15, + "learning_rate": "1.7019e-04", + "loss": 0.6913, + "slid_loss": 0.7021, + "step": 4303, + "time": 12.19 + }, + { + "epoch": 2.16, + "learning_rate": "1.7017e-04", + "loss": 0.7437, + "slid_loss": 0.7012, + "step": 4304, + "time": 12.85 + }, + { + "epoch": 2.16, + "learning_rate": "1.7016e-04", + "loss": 0.6683, + "slid_loss": 0.7015, + "step": 4305, + "time": 12.93 + }, + { + "epoch": 2.16, + "learning_rate": "1.7014e-04", + "loss": 0.7219, + "slid_loss": 0.7018, + "step": 4306, + "time": 13.34 + }, + { + "epoch": 2.16, + "learning_rate": "1.7013e-04", + "loss": 0.7047, + "slid_loss": 0.7023, + "step": 4307, + "time": 13.17 + }, + { + "epoch": 2.16, + "learning_rate": "1.7011e-04", + "loss": 0.6939, + "slid_loss": 0.7033, + "step": 4308, + "time": 12.76 + }, + { + "epoch": 2.16, + "learning_rate": "1.7009e-04", + "loss": 0.6941, + "slid_loss": 0.7041, + "step": 4309, + "time": 13.05 + }, + { + "epoch": 2.16, + "learning_rate": "1.7008e-04", + "loss": 0.5648, + "slid_loss": 0.7031, + "step": 4310, + "time": 13.82 + }, + { + "epoch": 2.16, + "learning_rate": "1.7006e-04", + "loss": 0.6746, + "slid_loss": 0.7023, + "step": 4311, + "time": 12.76 + }, + { + "epoch": 2.16, + "learning_rate": "1.7005e-04", + "loss": 0.6636, + "slid_loss": 0.702, + "step": 4312, + "time": 13.85 + }, + { + "epoch": 2.16, + "learning_rate": "1.7003e-04", + "loss": 0.7626, + "slid_loss": 0.7019, + "step": 4313, + "time": 12.23 + }, + { + "epoch": 2.16, + "learning_rate": "1.7001e-04", + "loss": 0.7527, + "slid_loss": 0.7011, + "step": 4314, + "time": 11.9 + }, + { + "epoch": 2.16, + "learning_rate": "1.7000e-04", + "loss": 0.7351, + "slid_loss": 0.7012, + "step": 4315, + "time": 11.64 + }, + { + "epoch": 2.16, + "learning_rate": "1.6998e-04", + "loss": 0.7443, + "slid_loss": 0.7012, + "step": 4316, + "time": 11.72 + }, + { + "epoch": 2.16, + "learning_rate": "1.6997e-04", + "loss": 0.6803, + "slid_loss": 0.7002, + "step": 4317, + "time": 13.04 + }, + { + "epoch": 2.16, + "learning_rate": "1.6995e-04", + "loss": 0.5996, + "slid_loss": 0.6993, + "step": 4318, + "time": 13.52 + }, + { + "epoch": 2.16, + "learning_rate": "1.6993e-04", + "loss": 0.6872, + "slid_loss": 0.698, + "step": 4319, + "time": 13.65 + }, + { + "epoch": 2.16, + "learning_rate": "1.6992e-04", + "loss": 0.7798, + "slid_loss": 0.6989, + "step": 4320, + "time": 11.41 + }, + { + "epoch": 2.16, + "learning_rate": "1.6990e-04", + "loss": 0.7059, + "slid_loss": 0.6986, + "step": 4321, + "time": 12.03 + }, + { + "epoch": 2.16, + "learning_rate": "1.6989e-04", + "loss": 0.7604, + "slid_loss": 0.6976, + "step": 4322, + "time": 13.56 + }, + { + "epoch": 2.16, + "learning_rate": "1.6987e-04", + "loss": 0.7454, + "slid_loss": 0.6981, + "step": 4323, + "time": 12.9 + }, + { + "epoch": 2.17, + "learning_rate": "1.6985e-04", + "loss": 0.6467, + "slid_loss": 0.6978, + "step": 4324, + "time": 11.91 + }, + { + "epoch": 2.17, + "learning_rate": "1.6984e-04", + "loss": 0.6827, + "slid_loss": 0.6969, + "step": 4325, + "time": 13.18 + }, + { + "epoch": 2.17, + "learning_rate": "1.6982e-04", + "loss": 0.6686, + "slid_loss": 0.6961, + "step": 4326, + "time": 12.97 + }, + { + "epoch": 2.17, + "learning_rate": "1.6981e-04", + "loss": 0.774, + "slid_loss": 0.6967, + "step": 4327, + "time": 13.13 + }, + { + "epoch": 2.17, + "learning_rate": "1.6979e-04", + "loss": 0.659, + "slid_loss": 0.6978, + "step": 4328, + "time": 13.07 + }, + { + "epoch": 2.17, + "learning_rate": "1.6977e-04", + "loss": 0.7185, + "slid_loss": 0.6985, + "step": 4329, + "time": 13.57 + }, + { + "epoch": 2.17, + "learning_rate": "1.6976e-04", + "loss": 0.7926, + "slid_loss": 0.6989, + "step": 4330, + "time": 12.96 + }, + { + "epoch": 2.17, + "learning_rate": "1.6974e-04", + "loss": 0.7454, + "slid_loss": 0.6982, + "step": 4331, + "time": 13.34 + }, + { + "epoch": 2.17, + "learning_rate": "1.6973e-04", + "loss": 0.5221, + "slid_loss": 0.6963, + "step": 4332, + "time": 11.18 + }, + { + "epoch": 2.17, + "learning_rate": "1.6971e-04", + "loss": 0.7529, + "slid_loss": 0.6967, + "step": 4333, + "time": 13.29 + }, + { + "epoch": 2.17, + "learning_rate": "1.6969e-04", + "loss": 0.5493, + "slid_loss": 0.6954, + "step": 4334, + "time": 11.37 + }, + { + "epoch": 2.17, + "learning_rate": "1.6968e-04", + "loss": 0.7048, + "slid_loss": 0.6961, + "step": 4335, + "time": 13.6 + }, + { + "epoch": 2.17, + "learning_rate": "1.6966e-04", + "loss": 0.7915, + "slid_loss": 0.6978, + "step": 4336, + "time": 11.6 + }, + { + "epoch": 2.17, + "learning_rate": "1.6965e-04", + "loss": 0.6376, + "slid_loss": 0.6973, + "step": 4337, + "time": 12.34 + }, + { + "epoch": 2.17, + "learning_rate": "1.6963e-04", + "loss": 0.5395, + "slid_loss": 0.6953, + "step": 4338, + "time": 11.95 + }, + { + "epoch": 2.17, + "learning_rate": "1.6961e-04", + "loss": 0.8862, + "slid_loss": 0.6971, + "step": 4339, + "time": 12.08 + }, + { + "epoch": 2.17, + "learning_rate": "1.6960e-04", + "loss": 0.6211, + "slid_loss": 0.6955, + "step": 4340, + "time": 11.75 + }, + { + "epoch": 2.17, + "learning_rate": "1.6958e-04", + "loss": 0.5816, + "slid_loss": 0.694, + "step": 4341, + "time": 10.97 + }, + { + "epoch": 2.17, + "learning_rate": "1.6957e-04", + "loss": 0.6673, + "slid_loss": 0.6939, + "step": 4342, + "time": 13.06 + }, + { + "epoch": 2.17, + "learning_rate": "1.6955e-04", + "loss": 0.8093, + "slid_loss": 0.6948, + "step": 4343, + "time": 11.52 + }, + { + "epoch": 2.18, + "learning_rate": "1.6953e-04", + "loss": 0.5753, + "slid_loss": 0.6943, + "step": 4344, + "time": 13.12 + }, + { + "epoch": 2.18, + "learning_rate": "1.6952e-04", + "loss": 0.5828, + "slid_loss": 0.6906, + "step": 4345, + "time": 13.33 + }, + { + "epoch": 2.18, + "learning_rate": "1.6950e-04", + "loss": 0.7606, + "slid_loss": 0.6917, + "step": 4346, + "time": 13.25 + }, + { + "epoch": 2.18, + "learning_rate": "1.6948e-04", + "loss": 0.6963, + "slid_loss": 0.6906, + "step": 4347, + "time": 14.05 + }, + { + "epoch": 2.18, + "learning_rate": "1.6947e-04", + "loss": 0.666, + "slid_loss": 0.6893, + "step": 4348, + "time": 12.99 + }, + { + "epoch": 2.18, + "learning_rate": "1.6945e-04", + "loss": 0.5239, + "slid_loss": 0.6879, + "step": 4349, + "time": 11.63 + }, + { + "epoch": 2.18, + "learning_rate": "1.6944e-04", + "loss": 0.711, + "slid_loss": 0.6891, + "step": 4350, + "time": 10.56 + }, + { + "epoch": 2.18, + "learning_rate": "1.6942e-04", + "loss": 0.7784, + "slid_loss": 0.6908, + "step": 4351, + "time": 11.83 + }, + { + "epoch": 2.18, + "learning_rate": "1.6940e-04", + "loss": 0.5914, + "slid_loss": 0.689, + "step": 4352, + "time": 11.31 + }, + { + "epoch": 2.18, + "learning_rate": "1.6939e-04", + "loss": 0.6321, + "slid_loss": 0.6895, + "step": 4353, + "time": 13.18 + }, + { + "epoch": 2.18, + "learning_rate": "1.6937e-04", + "loss": 0.5958, + "slid_loss": 0.6865, + "step": 4354, + "time": 13.49 + }, + { + "epoch": 2.18, + "learning_rate": "1.6936e-04", + "loss": 0.6749, + "slid_loss": 0.688, + "step": 4355, + "time": 13.43 + }, + { + "epoch": 2.18, + "learning_rate": "1.6934e-04", + "loss": 0.662, + "slid_loss": 0.6895, + "step": 4356, + "time": 12.92 + }, + { + "epoch": 2.18, + "learning_rate": "1.6932e-04", + "loss": 0.8255, + "slid_loss": 0.6901, + "step": 4357, + "time": 11.4 + }, + { + "epoch": 2.18, + "learning_rate": "1.6931e-04", + "loss": 0.7384, + "slid_loss": 0.6913, + "step": 4358, + "time": 12.71 + }, + { + "epoch": 2.18, + "learning_rate": "1.6929e-04", + "loss": 0.8898, + "slid_loss": 0.6939, + "step": 4359, + "time": 11.73 + }, + { + "epoch": 2.18, + "learning_rate": "1.6928e-04", + "loss": 0.6842, + "slid_loss": 0.6941, + "step": 4360, + "time": 13.07 + }, + { + "epoch": 2.18, + "learning_rate": "1.6926e-04", + "loss": 0.6726, + "slid_loss": 0.6937, + "step": 4361, + "time": 14.6 + }, + { + "epoch": 2.18, + "learning_rate": "1.6924e-04", + "loss": 0.6024, + "slid_loss": 0.694, + "step": 4362, + "time": 11.63 + }, + { + "epoch": 2.18, + "learning_rate": "1.6923e-04", + "loss": 0.7609, + "slid_loss": 0.6948, + "step": 4363, + "time": 14.26 + }, + { + "epoch": 2.19, + "learning_rate": "1.6921e-04", + "loss": 0.8436, + "slid_loss": 0.6954, + "step": 4364, + "time": 12.37 + }, + { + "epoch": 2.19, + "learning_rate": "1.6919e-04", + "loss": 0.7383, + "slid_loss": 0.6944, + "step": 4365, + "time": 13.46 + }, + { + "epoch": 2.19, + "learning_rate": "1.6918e-04", + "loss": 0.5992, + "slid_loss": 0.6947, + "step": 4366, + "time": 12.87 + }, + { + "epoch": 2.19, + "learning_rate": "1.6916e-04", + "loss": 0.7532, + "slid_loss": 0.6947, + "step": 4367, + "time": 10.97 + }, + { + "epoch": 2.19, + "learning_rate": "1.6915e-04", + "loss": 0.6555, + "slid_loss": 0.695, + "step": 4368, + "time": 11.78 + }, + { + "epoch": 2.19, + "learning_rate": "1.6913e-04", + "loss": 0.5996, + "slid_loss": 0.693, + "step": 4369, + "time": 13.68 + }, + { + "epoch": 2.19, + "learning_rate": "1.6911e-04", + "loss": 0.784, + "slid_loss": 0.6936, + "step": 4370, + "time": 12.13 + }, + { + "epoch": 2.19, + "learning_rate": "1.6910e-04", + "loss": 0.6808, + "slid_loss": 0.6942, + "step": 4371, + "time": 14.18 + }, + { + "epoch": 2.19, + "learning_rate": "1.6908e-04", + "loss": 0.6408, + "slid_loss": 0.6922, + "step": 4372, + "time": 11.59 + }, + { + "epoch": 2.19, + "learning_rate": "1.6907e-04", + "loss": 0.7722, + "slid_loss": 0.6928, + "step": 4373, + "time": 12.73 + }, + { + "epoch": 2.19, + "learning_rate": "1.6905e-04", + "loss": 0.7221, + "slid_loss": 0.6926, + "step": 4374, + "time": 13.05 + }, + { + "epoch": 2.19, + "learning_rate": "1.6903e-04", + "loss": 0.7342, + "slid_loss": 0.695, + "step": 4375, + "time": 11.88 + }, + { + "epoch": 2.19, + "learning_rate": "1.6902e-04", + "loss": 0.7057, + "slid_loss": 0.6945, + "step": 4376, + "time": 12.83 + }, + { + "epoch": 2.19, + "learning_rate": "1.6900e-04", + "loss": 0.8114, + "slid_loss": 0.6972, + "step": 4377, + "time": 13.35 + }, + { + "epoch": 2.19, + "learning_rate": "1.6898e-04", + "loss": 0.6315, + "slid_loss": 0.6967, + "step": 4378, + "time": 13.33 + }, + { + "epoch": 2.19, + "learning_rate": "1.6897e-04", + "loss": 0.5408, + "slid_loss": 0.6961, + "step": 4379, + "time": 13.64 + }, + { + "epoch": 2.19, + "learning_rate": "1.6895e-04", + "loss": 0.5497, + "slid_loss": 0.6943, + "step": 4380, + "time": 12.21 + }, + { + "epoch": 2.19, + "learning_rate": "1.6894e-04", + "loss": 0.8584, + "slid_loss": 0.6959, + "step": 4381, + "time": 13.66 + }, + { + "epoch": 2.19, + "learning_rate": "1.6892e-04", + "loss": 0.7175, + "slid_loss": 0.6967, + "step": 4382, + "time": 13.6 + }, + { + "epoch": 2.19, + "learning_rate": "1.6890e-04", + "loss": 0.7523, + "slid_loss": 0.696, + "step": 4383, + "time": 12.22 + }, + { + "epoch": 2.2, + "learning_rate": "1.6889e-04", + "loss": 0.7253, + "slid_loss": 0.6961, + "step": 4384, + "time": 12.51 + }, + { + "epoch": 2.2, + "learning_rate": "1.6887e-04", + "loss": 0.6478, + "slid_loss": 0.6965, + "step": 4385, + "time": 10.6 + }, + { + "epoch": 2.2, + "learning_rate": "1.6886e-04", + "loss": 0.6452, + "slid_loss": 0.6951, + "step": 4386, + "time": 10.65 + }, + { + "epoch": 2.2, + "learning_rate": "1.6884e-04", + "loss": 0.7691, + "slid_loss": 0.6962, + "step": 4387, + "time": 13.13 + }, + { + "epoch": 2.2, + "learning_rate": "1.6882e-04", + "loss": 0.6658, + "slid_loss": 0.6968, + "step": 4388, + "time": 13.94 + }, + { + "epoch": 2.2, + "learning_rate": "1.6881e-04", + "loss": 0.8041, + "slid_loss": 0.6976, + "step": 4389, + "time": 13.09 + }, + { + "epoch": 2.2, + "learning_rate": "1.6879e-04", + "loss": 0.6956, + "slid_loss": 0.6985, + "step": 4390, + "time": 13.27 + }, + { + "epoch": 2.2, + "learning_rate": "1.6877e-04", + "loss": 0.7002, + "slid_loss": 0.699, + "step": 4391, + "time": 13.72 + }, + { + "epoch": 2.2, + "learning_rate": "1.6876e-04", + "loss": 0.5735, + "slid_loss": 0.6977, + "step": 4392, + "time": 13.54 + }, + { + "epoch": 2.2, + "learning_rate": "1.6874e-04", + "loss": 0.6447, + "slid_loss": 0.6981, + "step": 4393, + "time": 13.04 + }, + { + "epoch": 2.2, + "learning_rate": "1.6873e-04", + "loss": 0.5745, + "slid_loss": 0.6961, + "step": 4394, + "time": 12.7 + }, + { + "epoch": 2.2, + "learning_rate": "1.6871e-04", + "loss": 0.7716, + "slid_loss": 0.6954, + "step": 4395, + "time": 13.22 + }, + { + "epoch": 2.2, + "learning_rate": "1.6869e-04", + "loss": 0.7182, + "slid_loss": 0.6958, + "step": 4396, + "time": 13.22 + }, + { + "epoch": 2.2, + "learning_rate": "1.6868e-04", + "loss": 0.6299, + "slid_loss": 0.695, + "step": 4397, + "time": 13.56 + }, + { + "epoch": 2.2, + "learning_rate": "1.6866e-04", + "loss": 0.8612, + "slid_loss": 0.6965, + "step": 4398, + "time": 13.9 + }, + { + "epoch": 2.2, + "learning_rate": "1.6864e-04", + "loss": 0.6102, + "slid_loss": 0.6959, + "step": 4399, + "time": 13.53 + }, + { + "epoch": 2.2, + "learning_rate": "1.6863e-04", + "loss": 0.5806, + "slid_loss": 0.6949, + "step": 4400, + "time": 12.82 + }, + { + "epoch": 2.2, + "learning_rate": "1.6861e-04", + "loss": 0.666, + "slid_loss": 0.6938, + "step": 4401, + "time": 12.25 + }, + { + "epoch": 2.2, + "learning_rate": "1.6860e-04", + "loss": 0.8785, + "slid_loss": 0.694, + "step": 4402, + "time": 14.29 + }, + { + "epoch": 2.2, + "learning_rate": "1.6858e-04", + "loss": 0.6495, + "slid_loss": 0.6936, + "step": 4403, + "time": 13.45 + }, + { + "epoch": 2.21, + "learning_rate": "1.6856e-04", + "loss": 0.5138, + "slid_loss": 0.6913, + "step": 4404, + "time": 13.23 + }, + { + "epoch": 2.21, + "learning_rate": "1.6855e-04", + "loss": 0.614, + "slid_loss": 0.6908, + "step": 4405, + "time": 13.25 + }, + { + "epoch": 2.21, + "learning_rate": "1.6853e-04", + "loss": 0.7269, + "slid_loss": 0.6908, + "step": 4406, + "time": 13.71 + }, + { + "epoch": 2.21, + "learning_rate": "1.6851e-04", + "loss": 0.7519, + "slid_loss": 0.6913, + "step": 4407, + "time": 13.41 + }, + { + "epoch": 2.21, + "learning_rate": "1.6850e-04", + "loss": 0.77, + "slid_loss": 0.692, + "step": 4408, + "time": 14.02 + }, + { + "epoch": 2.21, + "learning_rate": "1.6848e-04", + "loss": 0.758, + "slid_loss": 0.6927, + "step": 4409, + "time": 11.82 + }, + { + "epoch": 2.21, + "learning_rate": "1.6847e-04", + "loss": 0.6626, + "slid_loss": 0.6937, + "step": 4410, + "time": 13.06 + }, + { + "epoch": 2.21, + "learning_rate": "1.6845e-04", + "loss": 0.7271, + "slid_loss": 0.6942, + "step": 4411, + "time": 10.52 + }, + { + "epoch": 2.21, + "learning_rate": "1.6843e-04", + "loss": 0.748, + "slid_loss": 0.695, + "step": 4412, + "time": 13.57 + }, + { + "epoch": 2.21, + "learning_rate": "1.6842e-04", + "loss": 0.6388, + "slid_loss": 0.6938, + "step": 4413, + "time": 11.25 + }, + { + "epoch": 2.21, + "learning_rate": "1.6840e-04", + "loss": 0.8351, + "slid_loss": 0.6946, + "step": 4414, + "time": 10.26 + }, + { + "epoch": 2.21, + "learning_rate": "1.6838e-04", + "loss": 0.6561, + "slid_loss": 0.6938, + "step": 4415, + "time": 13.3 + }, + { + "epoch": 2.21, + "learning_rate": "1.6837e-04", + "loss": 0.8322, + "slid_loss": 0.6947, + "step": 4416, + "time": 13.24 + }, + { + "epoch": 2.21, + "learning_rate": "1.6835e-04", + "loss": 0.8627, + "slid_loss": 0.6965, + "step": 4417, + "time": 13.5 + }, + { + "epoch": 2.21, + "learning_rate": "1.6834e-04", + "loss": 0.6708, + "slid_loss": 0.6972, + "step": 4418, + "time": 12.64 + }, + { + "epoch": 2.21, + "learning_rate": "1.6832e-04", + "loss": 0.7055, + "slid_loss": 0.6974, + "step": 4419, + "time": 11.97 + }, + { + "epoch": 2.21, + "learning_rate": "1.6830e-04", + "loss": 0.747, + "slid_loss": 0.6971, + "step": 4420, + "time": 13.33 + }, + { + "epoch": 2.21, + "learning_rate": "1.6829e-04", + "loss": 0.8573, + "slid_loss": 0.6986, + "step": 4421, + "time": 13.5 + }, + { + "epoch": 2.21, + "learning_rate": "1.6827e-04", + "loss": 0.7653, + "slid_loss": 0.6987, + "step": 4422, + "time": 12.07 + }, + { + "epoch": 2.21, + "learning_rate": "1.6825e-04", + "loss": 0.7223, + "slid_loss": 0.6984, + "step": 4423, + "time": 13.87 + }, + { + "epoch": 2.22, + "learning_rate": "1.6824e-04", + "loss": 0.5794, + "slid_loss": 0.6977, + "step": 4424, + "time": 13.28 + }, + { + "epoch": 2.22, + "learning_rate": "1.6822e-04", + "loss": 0.7103, + "slid_loss": 0.698, + "step": 4425, + "time": 13.74 + }, + { + "epoch": 2.22, + "learning_rate": "1.6821e-04", + "loss": 0.7192, + "slid_loss": 0.6985, + "step": 4426, + "time": 11.99 + }, + { + "epoch": 2.22, + "learning_rate": "1.6819e-04", + "loss": 0.8103, + "slid_loss": 0.6989, + "step": 4427, + "time": 13.43 + }, + { + "epoch": 2.22, + "learning_rate": "1.6817e-04", + "loss": 0.7583, + "slid_loss": 0.6999, + "step": 4428, + "time": 12.79 + }, + { + "epoch": 2.22, + "learning_rate": "1.6816e-04", + "loss": 0.8057, + "slid_loss": 0.7008, + "step": 4429, + "time": 11.19 + }, + { + "epoch": 2.22, + "learning_rate": "1.6814e-04", + "loss": 0.7985, + "slid_loss": 0.7008, + "step": 4430, + "time": 14.05 + }, + { + "epoch": 2.22, + "learning_rate": "1.6812e-04", + "loss": 0.8282, + "slid_loss": 0.7016, + "step": 4431, + "time": 12.85 + }, + { + "epoch": 2.22, + "learning_rate": "1.6811e-04", + "loss": 0.6504, + "slid_loss": 0.7029, + "step": 4432, + "time": 12.69 + }, + { + "epoch": 2.22, + "learning_rate": "1.6809e-04", + "loss": 0.7741, + "slid_loss": 0.7031, + "step": 4433, + "time": 14.1 + }, + { + "epoch": 2.22, + "learning_rate": "1.6808e-04", + "loss": 0.567, + "slid_loss": 0.7033, + "step": 4434, + "time": 11.61 + }, + { + "epoch": 2.22, + "learning_rate": "1.6806e-04", + "loss": 0.7222, + "slid_loss": 0.7035, + "step": 4435, + "time": 12.28 + }, + { + "epoch": 2.22, + "learning_rate": "1.6804e-04", + "loss": 0.7441, + "slid_loss": 0.703, + "step": 4436, + "time": 11.86 + }, + { + "epoch": 2.22, + "learning_rate": "1.6803e-04", + "loss": 0.7148, + "slid_loss": 0.7038, + "step": 4437, + "time": 11.44 + }, + { + "epoch": 2.22, + "learning_rate": "1.6801e-04", + "loss": 0.7038, + "slid_loss": 0.7054, + "step": 4438, + "time": 13.06 + }, + { + "epoch": 2.22, + "learning_rate": "1.6799e-04", + "loss": 0.6817, + "slid_loss": 0.7034, + "step": 4439, + "time": 13.26 + }, + { + "epoch": 2.22, + "learning_rate": "1.6798e-04", + "loss": 0.7382, + "slid_loss": 0.7046, + "step": 4440, + "time": 13.57 + }, + { + "epoch": 2.22, + "learning_rate": "1.6796e-04", + "loss": 0.5743, + "slid_loss": 0.7045, + "step": 4441, + "time": 10.74 + }, + { + "epoch": 2.22, + "learning_rate": "1.6795e-04", + "loss": 0.7658, + "slid_loss": 0.7055, + "step": 4442, + "time": 13.21 + }, + { + "epoch": 2.22, + "learning_rate": "1.6793e-04", + "loss": 0.7331, + "slid_loss": 0.7047, + "step": 4443, + "time": 11.78 + }, + { + "epoch": 2.23, + "learning_rate": "1.6791e-04", + "loss": 0.7222, + "slid_loss": 0.7062, + "step": 4444, + "time": 10.89 + }, + { + "epoch": 2.23, + "learning_rate": "1.6790e-04", + "loss": 0.6939, + "slid_loss": 0.7073, + "step": 4445, + "time": 11.15 + }, + { + "epoch": 2.23, + "learning_rate": "1.6788e-04", + "loss": 0.6863, + "slid_loss": 0.7065, + "step": 4446, + "time": 13.93 + }, + { + "epoch": 2.23, + "learning_rate": "1.6786e-04", + "loss": 0.7026, + "slid_loss": 0.7066, + "step": 4447, + "time": 11.75 + }, + { + "epoch": 2.23, + "learning_rate": "1.6785e-04", + "loss": 0.6407, + "slid_loss": 0.7064, + "step": 4448, + "time": 12.83 + }, + { + "epoch": 2.23, + "learning_rate": "1.6783e-04", + "loss": 0.6638, + "slid_loss": 0.7078, + "step": 4449, + "time": 11.45 + }, + { + "epoch": 2.23, + "learning_rate": "1.6781e-04", + "loss": 0.7509, + "slid_loss": 0.7082, + "step": 4450, + "time": 10.71 + }, + { + "epoch": 2.23, + "learning_rate": "1.6780e-04", + "loss": 0.5545, + "slid_loss": 0.7059, + "step": 4451, + "time": 11.26 + }, + { + "epoch": 2.23, + "learning_rate": "1.6778e-04", + "loss": 0.7192, + "slid_loss": 0.7072, + "step": 4452, + "time": 13.36 + }, + { + "epoch": 2.23, + "learning_rate": "1.6777e-04", + "loss": 0.6704, + "slid_loss": 0.7076, + "step": 4453, + "time": 11.13 + }, + { + "epoch": 2.23, + "learning_rate": "1.6775e-04", + "loss": 0.7697, + "slid_loss": 0.7093, + "step": 4454, + "time": 11.14 + }, + { + "epoch": 2.23, + "learning_rate": "1.6773e-04", + "loss": 0.7616, + "slid_loss": 0.7102, + "step": 4455, + "time": 11.59 + }, + { + "epoch": 2.23, + "learning_rate": "1.6772e-04", + "loss": 0.7844, + "slid_loss": 0.7114, + "step": 4456, + "time": 13.59 + }, + { + "epoch": 2.23, + "learning_rate": "1.6770e-04", + "loss": 0.6644, + "slid_loss": 0.7098, + "step": 4457, + "time": 12.97 + }, + { + "epoch": 2.23, + "learning_rate": "1.6768e-04", + "loss": 0.7135, + "slid_loss": 0.7095, + "step": 4458, + "time": 13.32 + }, + { + "epoch": 2.23, + "learning_rate": "1.6767e-04", + "loss": 0.6279, + "slid_loss": 0.7069, + "step": 4459, + "time": 11.1 + }, + { + "epoch": 2.23, + "learning_rate": "1.6765e-04", + "loss": 0.733, + "slid_loss": 0.7074, + "step": 4460, + "time": 11.31 + }, + { + "epoch": 2.23, + "learning_rate": "1.6763e-04", + "loss": 0.7354, + "slid_loss": 0.708, + "step": 4461, + "time": 13.54 + }, + { + "epoch": 2.23, + "learning_rate": "1.6762e-04", + "loss": 0.7482, + "slid_loss": 0.7095, + "step": 4462, + "time": 11.87 + }, + { + "epoch": 2.23, + "learning_rate": "1.6760e-04", + "loss": 0.8214, + "slid_loss": 0.7101, + "step": 4463, + "time": 13.67 + }, + { + "epoch": 2.24, + "learning_rate": "1.6759e-04", + "loss": 0.6983, + "slid_loss": 0.7087, + "step": 4464, + "time": 12.55 + }, + { + "epoch": 2.24, + "learning_rate": "1.6757e-04", + "loss": 0.7451, + "slid_loss": 0.7087, + "step": 4465, + "time": 13.0 + }, + { + "epoch": 2.24, + "learning_rate": "1.6755e-04", + "loss": 0.5234, + "slid_loss": 0.708, + "step": 4466, + "time": 10.59 + }, + { + "epoch": 2.24, + "learning_rate": "1.6754e-04", + "loss": 0.6666, + "slid_loss": 0.7071, + "step": 4467, + "time": 13.85 + }, + { + "epoch": 2.24, + "learning_rate": "1.6752e-04", + "loss": 0.6817, + "slid_loss": 0.7074, + "step": 4468, + "time": 11.41 + }, + { + "epoch": 2.24, + "learning_rate": "1.6750e-04", + "loss": 0.6481, + "slid_loss": 0.7078, + "step": 4469, + "time": 12.66 + }, + { + "epoch": 2.24, + "learning_rate": "1.6749e-04", + "loss": 0.6669, + "slid_loss": 0.7067, + "step": 4470, + "time": 13.31 + }, + { + "epoch": 2.24, + "learning_rate": "1.6747e-04", + "loss": 0.4612, + "slid_loss": 0.7045, + "step": 4471, + "time": 11.72 + }, + { + "epoch": 2.24, + "learning_rate": "1.6745e-04", + "loss": 0.7751, + "slid_loss": 0.7058, + "step": 4472, + "time": 12.07 + }, + { + "epoch": 2.24, + "learning_rate": "1.6744e-04", + "loss": 0.5911, + "slid_loss": 0.704, + "step": 4473, + "time": 13.83 + }, + { + "epoch": 2.24, + "learning_rate": "1.6742e-04", + "loss": 0.6649, + "slid_loss": 0.7034, + "step": 4474, + "time": 13.88 + }, + { + "epoch": 2.24, + "learning_rate": "1.6741e-04", + "loss": 0.6678, + "slid_loss": 0.7028, + "step": 4475, + "time": 12.82 + }, + { + "epoch": 2.24, + "learning_rate": "1.6739e-04", + "loss": 0.7405, + "slid_loss": 0.7031, + "step": 4476, + "time": 12.56 + }, + { + "epoch": 2.24, + "learning_rate": "1.6737e-04", + "loss": 0.7812, + "slid_loss": 0.7028, + "step": 4477, + "time": 13.93 + }, + { + "epoch": 2.24, + "learning_rate": "1.6736e-04", + "loss": 0.6785, + "slid_loss": 0.7033, + "step": 4478, + "time": 12.84 + }, + { + "epoch": 2.24, + "learning_rate": "1.6734e-04", + "loss": 0.6534, + "slid_loss": 0.7044, + "step": 4479, + "time": 13.51 + }, + { + "epoch": 2.24, + "learning_rate": "1.6732e-04", + "loss": 0.7946, + "slid_loss": 0.7069, + "step": 4480, + "time": 11.25 + }, + { + "epoch": 2.24, + "learning_rate": "1.6731e-04", + "loss": 0.7679, + "slid_loss": 0.706, + "step": 4481, + "time": 11.58 + }, + { + "epoch": 2.24, + "learning_rate": "1.6729e-04", + "loss": 0.6431, + "slid_loss": 0.7052, + "step": 4482, + "time": 13.11 + }, + { + "epoch": 2.24, + "learning_rate": "1.6727e-04", + "loss": 0.6768, + "slid_loss": 0.7045, + "step": 4483, + "time": 11.64 + }, + { + "epoch": 2.25, + "learning_rate": "1.6726e-04", + "loss": 0.69, + "slid_loss": 0.7041, + "step": 4484, + "time": 12.3 + }, + { + "epoch": 2.25, + "learning_rate": "1.6724e-04", + "loss": 0.6409, + "slid_loss": 0.704, + "step": 4485, + "time": 11.59 + }, + { + "epoch": 2.25, + "learning_rate": "1.6723e-04", + "loss": 0.5624, + "slid_loss": 0.7032, + "step": 4486, + "time": 13.92 + }, + { + "epoch": 2.25, + "learning_rate": "1.6721e-04", + "loss": 0.6869, + "slid_loss": 0.7024, + "step": 4487, + "time": 11.22 + }, + { + "epoch": 2.25, + "learning_rate": "1.6719e-04", + "loss": 0.6691, + "slid_loss": 0.7024, + "step": 4488, + "time": 13.39 + }, + { + "epoch": 2.25, + "learning_rate": "1.6718e-04", + "loss": 0.5237, + "slid_loss": 0.6996, + "step": 4489, + "time": 11.58 + }, + { + "epoch": 2.25, + "learning_rate": "1.6716e-04", + "loss": 0.5911, + "slid_loss": 0.6986, + "step": 4490, + "time": 13.69 + }, + { + "epoch": 2.25, + "learning_rate": "1.6714e-04", + "loss": 0.8622, + "slid_loss": 0.7002, + "step": 4491, + "time": 13.95 + }, + { + "epoch": 2.25, + "learning_rate": "1.6713e-04", + "loss": 0.7071, + "slid_loss": 0.7015, + "step": 4492, + "time": 11.87 + }, + { + "epoch": 2.25, + "learning_rate": "1.6711e-04", + "loss": 0.6165, + "slid_loss": 0.7012, + "step": 4493, + "time": 13.25 + }, + { + "epoch": 2.25, + "learning_rate": "1.6709e-04", + "loss": 0.8169, + "slid_loss": 0.7037, + "step": 4494, + "time": 13.71 + }, + { + "epoch": 2.25, + "learning_rate": "1.6708e-04", + "loss": 0.8433, + "slid_loss": 0.7044, + "step": 4495, + "time": 13.77 + }, + { + "epoch": 2.25, + "learning_rate": "1.6706e-04", + "loss": 0.7152, + "slid_loss": 0.7044, + "step": 4496, + "time": 12.27 + }, + { + "epoch": 2.25, + "learning_rate": "1.6704e-04", + "loss": 0.5642, + "slid_loss": 0.7037, + "step": 4497, + "time": 11.22 + }, + { + "epoch": 2.25, + "learning_rate": "1.6703e-04", + "loss": 0.5806, + "slid_loss": 0.7009, + "step": 4498, + "time": 13.42 + }, + { + "epoch": 2.25, + "learning_rate": "1.6701e-04", + "loss": 0.6109, + "slid_loss": 0.7009, + "step": 4499, + "time": 12.48 + }, + { + "epoch": 2.25, + "learning_rate": "1.6700e-04", + "loss": 0.5359, + "slid_loss": 0.7005, + "step": 4500, + "time": 12.89 + }, + { + "epoch": 2.25, + "learning_rate": "1.6698e-04", + "loss": 0.5423, + "slid_loss": 0.6992, + "step": 4501, + "time": 13.29 + }, + { + "epoch": 2.25, + "learning_rate": "1.6696e-04", + "loss": 0.6206, + "slid_loss": 0.6966, + "step": 4502, + "time": 11.96 + }, + { + "epoch": 2.25, + "learning_rate": "1.6695e-04", + "loss": 0.5285, + "slid_loss": 0.6954, + "step": 4503, + "time": 11.39 + }, + { + "epoch": 2.26, + "learning_rate": "1.6693e-04", + "loss": 0.9121, + "slid_loss": 0.6994, + "step": 4504, + "time": 13.71 + }, + { + "epoch": 2.26, + "learning_rate": "1.6691e-04", + "loss": 0.7101, + "slid_loss": 0.7004, + "step": 4505, + "time": 12.77 + }, + { + "epoch": 2.26, + "learning_rate": "1.6690e-04", + "loss": 0.6863, + "slid_loss": 0.7, + "step": 4506, + "time": 11.02 + }, + { + "epoch": 2.26, + "learning_rate": "1.6688e-04", + "loss": 0.7197, + "slid_loss": 0.6996, + "step": 4507, + "time": 11.48 + }, + { + "epoch": 2.26, + "learning_rate": "1.6686e-04", + "loss": 0.6808, + "slid_loss": 0.6988, + "step": 4508, + "time": 13.49 + }, + { + "epoch": 2.26, + "learning_rate": "1.6685e-04", + "loss": 0.6121, + "slid_loss": 0.6973, + "step": 4509, + "time": 10.83 + }, + { + "epoch": 2.26, + "learning_rate": "1.6683e-04", + "loss": 0.6406, + "slid_loss": 0.6971, + "step": 4510, + "time": 10.72 + }, + { + "epoch": 2.26, + "learning_rate": "1.6681e-04", + "loss": 0.7713, + "slid_loss": 0.6975, + "step": 4511, + "time": 13.52 + }, + { + "epoch": 2.26, + "learning_rate": "1.6680e-04", + "loss": 0.6531, + "slid_loss": 0.6966, + "step": 4512, + "time": 13.26 + }, + { + "epoch": 2.26, + "learning_rate": "1.6678e-04", + "loss": 0.7669, + "slid_loss": 0.6978, + "step": 4513, + "time": 13.11 + }, + { + "epoch": 2.26, + "learning_rate": "1.6676e-04", + "loss": 0.5369, + "slid_loss": 0.6949, + "step": 4514, + "time": 13.28 + }, + { + "epoch": 2.26, + "learning_rate": "1.6675e-04", + "loss": 0.7444, + "slid_loss": 0.6958, + "step": 4515, + "time": 11.76 + }, + { + "epoch": 2.26, + "learning_rate": "1.6673e-04", + "loss": 0.7025, + "slid_loss": 0.6945, + "step": 4516, + "time": 12.84 + }, + { + "epoch": 2.26, + "learning_rate": "1.6672e-04", + "loss": 0.745, + "slid_loss": 0.6933, + "step": 4517, + "time": 13.99 + }, + { + "epoch": 2.26, + "learning_rate": "1.6670e-04", + "loss": 0.7082, + "slid_loss": 0.6937, + "step": 4518, + "time": 12.95 + }, + { + "epoch": 2.26, + "learning_rate": "1.6668e-04", + "loss": 0.7689, + "slid_loss": 0.6943, + "step": 4519, + "time": 13.25 + }, + { + "epoch": 2.26, + "learning_rate": "1.6667e-04", + "loss": 0.6346, + "slid_loss": 0.6932, + "step": 4520, + "time": 11.22 + }, + { + "epoch": 2.26, + "learning_rate": "1.6665e-04", + "loss": 0.5614, + "slid_loss": 0.6902, + "step": 4521, + "time": 12.07 + }, + { + "epoch": 2.26, + "learning_rate": "1.6663e-04", + "loss": 0.6625, + "slid_loss": 0.6892, + "step": 4522, + "time": 13.1 + }, + { + "epoch": 2.26, + "learning_rate": "1.6662e-04", + "loss": 0.5634, + "slid_loss": 0.6876, + "step": 4523, + "time": 10.88 + }, + { + "epoch": 2.27, + "learning_rate": "1.6660e-04", + "loss": 0.8239, + "slid_loss": 0.69, + "step": 4524, + "time": 13.45 + }, + { + "epoch": 2.27, + "learning_rate": "1.6658e-04", + "loss": 0.7009, + "slid_loss": 0.6899, + "step": 4525, + "time": 13.14 + }, + { + "epoch": 2.27, + "learning_rate": "1.6657e-04", + "loss": 0.7467, + "slid_loss": 0.6902, + "step": 4526, + "time": 11.94 + }, + { + "epoch": 2.27, + "learning_rate": "1.6655e-04", + "loss": 0.7297, + "slid_loss": 0.6894, + "step": 4527, + "time": 11.73 + }, + { + "epoch": 2.27, + "learning_rate": "1.6653e-04", + "loss": 0.7522, + "slid_loss": 0.6893, + "step": 4528, + "time": 13.46 + }, + { + "epoch": 2.27, + "learning_rate": "1.6652e-04", + "loss": 0.8667, + "slid_loss": 0.69, + "step": 4529, + "time": 13.52 + }, + { + "epoch": 2.27, + "learning_rate": "1.6650e-04", + "loss": 0.6203, + "slid_loss": 0.6882, + "step": 4530, + "time": 13.67 + }, + { + "epoch": 2.27, + "learning_rate": "1.6648e-04", + "loss": 0.6804, + "slid_loss": 0.6867, + "step": 4531, + "time": 12.16 + }, + { + "epoch": 2.27, + "learning_rate": "1.6647e-04", + "loss": 0.5671, + "slid_loss": 0.6859, + "step": 4532, + "time": 10.49 + }, + { + "epoch": 2.27, + "learning_rate": "1.6645e-04", + "loss": 0.6738, + "slid_loss": 0.6849, + "step": 4533, + "time": 12.81 + }, + { + "epoch": 2.27, + "learning_rate": "1.6643e-04", + "loss": 0.7903, + "slid_loss": 0.6871, + "step": 4534, + "time": 12.97 + }, + { + "epoch": 2.27, + "learning_rate": "1.6642e-04", + "loss": 0.7271, + "slid_loss": 0.6871, + "step": 4535, + "time": 13.65 + }, + { + "epoch": 2.27, + "learning_rate": "1.6640e-04", + "loss": 0.8321, + "slid_loss": 0.688, + "step": 4536, + "time": 11.69 + }, + { + "epoch": 2.27, + "learning_rate": "1.6639e-04", + "loss": 0.7209, + "slid_loss": 0.6881, + "step": 4537, + "time": 13.77 + }, + { + "epoch": 2.27, + "learning_rate": "1.6637e-04", + "loss": 0.4918, + "slid_loss": 0.686, + "step": 4538, + "time": 11.98 + }, + { + "epoch": 2.27, + "learning_rate": "1.6635e-04", + "loss": 0.7109, + "slid_loss": 0.6863, + "step": 4539, + "time": 11.24 + }, + { + "epoch": 2.27, + "learning_rate": "1.6634e-04", + "loss": 0.7207, + "slid_loss": 0.6861, + "step": 4540, + "time": 13.66 + }, + { + "epoch": 2.27, + "learning_rate": "1.6632e-04", + "loss": 0.7133, + "slid_loss": 0.6875, + "step": 4541, + "time": 11.05 + }, + { + "epoch": 2.27, + "learning_rate": "1.6630e-04", + "loss": 0.6731, + "slid_loss": 0.6865, + "step": 4542, + "time": 13.24 + }, + { + "epoch": 2.27, + "learning_rate": "1.6629e-04", + "loss": 0.7384, + "slid_loss": 0.6866, + "step": 4543, + "time": 11.33 + }, + { + "epoch": 2.28, + "learning_rate": "1.6627e-04", + "loss": 0.5726, + "slid_loss": 0.6851, + "step": 4544, + "time": 10.52 + }, + { + "epoch": 2.28, + "learning_rate": "1.6625e-04", + "loss": 0.6932, + "slid_loss": 0.6851, + "step": 4545, + "time": 12.25 + }, + { + "epoch": 2.28, + "learning_rate": "1.6624e-04", + "loss": 0.6846, + "slid_loss": 0.6851, + "step": 4546, + "time": 13.23 + }, + { + "epoch": 2.28, + "learning_rate": "1.6622e-04", + "loss": 0.6344, + "slid_loss": 0.6844, + "step": 4547, + "time": 13.71 + }, + { + "epoch": 2.28, + "learning_rate": "1.6620e-04", + "loss": 0.7405, + "slid_loss": 0.6854, + "step": 4548, + "time": 12.71 + }, + { + "epoch": 2.28, + "learning_rate": "1.6619e-04", + "loss": 0.6195, + "slid_loss": 0.6849, + "step": 4549, + "time": 13.62 + }, + { + "epoch": 2.28, + "learning_rate": "1.6617e-04", + "loss": 0.762, + "slid_loss": 0.6851, + "step": 4550, + "time": 12.57 + }, + { + "epoch": 2.28, + "learning_rate": "1.6615e-04", + "loss": 0.5296, + "slid_loss": 0.6848, + "step": 4551, + "time": 12.31 + }, + { + "epoch": 2.28, + "learning_rate": "1.6614e-04", + "loss": 0.6629, + "slid_loss": 0.6842, + "step": 4552, + "time": 12.49 + }, + { + "epoch": 2.28, + "learning_rate": "1.6612e-04", + "loss": 0.824, + "slid_loss": 0.6858, + "step": 4553, + "time": 12.56 + }, + { + "epoch": 2.28, + "learning_rate": "1.6610e-04", + "loss": 0.863, + "slid_loss": 0.6867, + "step": 4554, + "time": 12.26 + }, + { + "epoch": 2.28, + "learning_rate": "1.6609e-04", + "loss": 0.6057, + "slid_loss": 0.6852, + "step": 4555, + "time": 13.26 + }, + { + "epoch": 2.28, + "learning_rate": "1.6607e-04", + "loss": 0.6804, + "slid_loss": 0.6841, + "step": 4556, + "time": 11.63 + }, + { + "epoch": 2.28, + "learning_rate": "1.6605e-04", + "loss": 0.6656, + "slid_loss": 0.6841, + "step": 4557, + "time": 11.08 + }, + { + "epoch": 2.28, + "learning_rate": "1.6604e-04", + "loss": 0.7714, + "slid_loss": 0.6847, + "step": 4558, + "time": 11.46 + }, + { + "epoch": 2.28, + "learning_rate": "1.6602e-04", + "loss": 0.6239, + "slid_loss": 0.6847, + "step": 4559, + "time": 12.36 + }, + { + "epoch": 2.28, + "learning_rate": "1.6601e-04", + "loss": 0.5347, + "slid_loss": 0.6827, + "step": 4560, + "time": 11.0 + }, + { + "epoch": 2.28, + "learning_rate": "1.6599e-04", + "loss": 0.8493, + "slid_loss": 0.6838, + "step": 4561, + "time": 11.44 + }, + { + "epoch": 2.28, + "learning_rate": "1.6597e-04", + "loss": 0.624, + "slid_loss": 0.6826, + "step": 4562, + "time": 11.0 + }, + { + "epoch": 2.28, + "learning_rate": "1.6596e-04", + "loss": 0.6903, + "slid_loss": 0.6813, + "step": 4563, + "time": 14.08 + }, + { + "epoch": 2.29, + "learning_rate": "1.6594e-04", + "loss": 0.6145, + "slid_loss": 0.6804, + "step": 4564, + "time": 13.59 + }, + { + "epoch": 2.29, + "learning_rate": "1.6592e-04", + "loss": 0.7998, + "slid_loss": 0.681, + "step": 4565, + "time": 13.29 + }, + { + "epoch": 2.29, + "learning_rate": "1.6591e-04", + "loss": 0.5466, + "slid_loss": 0.6812, + "step": 4566, + "time": 13.87 + }, + { + "epoch": 2.29, + "learning_rate": "1.6589e-04", + "loss": 0.7021, + "slid_loss": 0.6816, + "step": 4567, + "time": 13.47 + }, + { + "epoch": 2.29, + "learning_rate": "1.6587e-04", + "loss": 0.4778, + "slid_loss": 0.6795, + "step": 4568, + "time": 10.73 + }, + { + "epoch": 2.29, + "learning_rate": "1.6586e-04", + "loss": 0.7693, + "slid_loss": 0.6807, + "step": 4569, + "time": 13.09 + }, + { + "epoch": 2.29, + "learning_rate": "1.6584e-04", + "loss": 0.6047, + "slid_loss": 0.6801, + "step": 4570, + "time": 11.99 + }, + { + "epoch": 2.29, + "learning_rate": "1.6582e-04", + "loss": 0.5192, + "slid_loss": 0.6807, + "step": 4571, + "time": 11.09 + }, + { + "epoch": 2.29, + "learning_rate": "1.6581e-04", + "loss": 0.6326, + "slid_loss": 0.6793, + "step": 4572, + "time": 12.49 + }, + { + "epoch": 2.29, + "learning_rate": "1.6579e-04", + "loss": 0.6267, + "slid_loss": 0.6796, + "step": 4573, + "time": 11.4 + }, + { + "epoch": 2.29, + "learning_rate": "1.6577e-04", + "loss": 0.6087, + "slid_loss": 0.6791, + "step": 4574, + "time": 10.77 + }, + { + "epoch": 2.29, + "learning_rate": "1.6576e-04", + "loss": 0.5673, + "slid_loss": 0.6781, + "step": 4575, + "time": 12.81 + }, + { + "epoch": 2.29, + "learning_rate": "1.6574e-04", + "loss": 0.5256, + "slid_loss": 0.6759, + "step": 4576, + "time": 13.53 + }, + { + "epoch": 2.29, + "learning_rate": "1.6572e-04", + "loss": 0.5814, + "slid_loss": 0.6739, + "step": 4577, + "time": 10.76 + }, + { + "epoch": 2.29, + "learning_rate": "1.6571e-04", + "loss": 0.583, + "slid_loss": 0.673, + "step": 4578, + "time": 11.39 + }, + { + "epoch": 2.29, + "learning_rate": "1.6569e-04", + "loss": 0.6376, + "slid_loss": 0.6728, + "step": 4579, + "time": 13.15 + }, + { + "epoch": 2.29, + "learning_rate": "1.6567e-04", + "loss": 0.5937, + "slid_loss": 0.6708, + "step": 4580, + "time": 12.98 + }, + { + "epoch": 2.29, + "learning_rate": "1.6566e-04", + "loss": 0.6905, + "slid_loss": 0.67, + "step": 4581, + "time": 12.96 + }, + { + "epoch": 2.29, + "learning_rate": "1.6564e-04", + "loss": 0.7903, + "slid_loss": 0.6715, + "step": 4582, + "time": 11.83 + }, + { + "epoch": 2.29, + "learning_rate": "1.6562e-04", + "loss": 0.6631, + "slid_loss": 0.6714, + "step": 4583, + "time": 13.28 + }, + { + "epoch": 2.3, + "learning_rate": "1.6561e-04", + "loss": 0.8641, + "slid_loss": 0.6731, + "step": 4584, + "time": 13.2 + }, + { + "epoch": 2.3, + "learning_rate": "1.6559e-04", + "loss": 0.697, + "slid_loss": 0.6737, + "step": 4585, + "time": 11.9 + }, + { + "epoch": 2.3, + "learning_rate": "1.6557e-04", + "loss": 0.482, + "slid_loss": 0.6729, + "step": 4586, + "time": 13.59 + }, + { + "epoch": 2.3, + "learning_rate": "1.6556e-04", + "loss": 0.658, + "slid_loss": 0.6726, + "step": 4587, + "time": 12.87 + }, + { + "epoch": 2.3, + "learning_rate": "1.6554e-04", + "loss": 0.8267, + "slid_loss": 0.6741, + "step": 4588, + "time": 13.84 + }, + { + "epoch": 2.3, + "learning_rate": "1.6552e-04", + "loss": 0.7173, + "slid_loss": 0.6761, + "step": 4589, + "time": 13.35 + }, + { + "epoch": 2.3, + "learning_rate": "1.6551e-04", + "loss": 0.6985, + "slid_loss": 0.6771, + "step": 4590, + "time": 10.78 + }, + { + "epoch": 2.3, + "learning_rate": "1.6549e-04", + "loss": 0.8229, + "slid_loss": 0.6768, + "step": 4591, + "time": 11.44 + }, + { + "epoch": 2.3, + "learning_rate": "1.6547e-04", + "loss": 0.6594, + "slid_loss": 0.6763, + "step": 4592, + "time": 11.32 + }, + { + "epoch": 2.3, + "learning_rate": "1.6546e-04", + "loss": 0.7076, + "slid_loss": 0.6772, + "step": 4593, + "time": 12.84 + }, + { + "epoch": 2.3, + "learning_rate": "1.6544e-04", + "loss": 0.7737, + "slid_loss": 0.6768, + "step": 4594, + "time": 12.15 + }, + { + "epoch": 2.3, + "learning_rate": "1.6542e-04", + "loss": 0.7803, + "slid_loss": 0.6761, + "step": 4595, + "time": 13.97 + }, + { + "epoch": 2.3, + "learning_rate": "1.6541e-04", + "loss": 0.5819, + "slid_loss": 0.6748, + "step": 4596, + "time": 11.56 + }, + { + "epoch": 2.3, + "learning_rate": "1.6539e-04", + "loss": 0.5639, + "slid_loss": 0.6748, + "step": 4597, + "time": 13.75 + }, + { + "epoch": 2.3, + "learning_rate": "1.6537e-04", + "loss": 0.8255, + "slid_loss": 0.6772, + "step": 4598, + "time": 12.95 + }, + { + "epoch": 2.3, + "learning_rate": "1.6536e-04", + "loss": 0.701, + "slid_loss": 0.6781, + "step": 4599, + "time": 12.87 + }, + { + "epoch": 2.3, + "learning_rate": "1.6534e-04", + "loss": 0.6235, + "slid_loss": 0.679, + "step": 4600, + "time": 11.65 + }, + { + "epoch": 2.3, + "learning_rate": "1.6532e-04", + "loss": 0.8266, + "slid_loss": 0.6819, + "step": 4601, + "time": 11.45 + }, + { + "epoch": 2.3, + "learning_rate": "1.6531e-04", + "loss": 0.6619, + "slid_loss": 0.6823, + "step": 4602, + "time": 13.29 + }, + { + "epoch": 2.3, + "learning_rate": "1.6529e-04", + "loss": 0.7177, + "slid_loss": 0.6842, + "step": 4603, + "time": 13.95 + }, + { + "epoch": 2.31, + "learning_rate": "1.6527e-04", + "loss": 0.6923, + "slid_loss": 0.682, + "step": 4604, + "time": 11.84 + }, + { + "epoch": 2.31, + "learning_rate": "1.6526e-04", + "loss": 0.6342, + "slid_loss": 0.6812, + "step": 4605, + "time": 10.96 + }, + { + "epoch": 2.31, + "learning_rate": "1.6524e-04", + "loss": 0.7773, + "slid_loss": 0.6821, + "step": 4606, + "time": 12.04 + }, + { + "epoch": 2.31, + "learning_rate": "1.6522e-04", + "loss": 0.8017, + "slid_loss": 0.6829, + "step": 4607, + "time": 11.33 + }, + { + "epoch": 2.31, + "learning_rate": "1.6521e-04", + "loss": 0.6034, + "slid_loss": 0.6822, + "step": 4608, + "time": 11.63 + }, + { + "epoch": 2.31, + "learning_rate": "1.6519e-04", + "loss": 0.7372, + "slid_loss": 0.6834, + "step": 4609, + "time": 11.96 + }, + { + "epoch": 2.31, + "learning_rate": "1.6517e-04", + "loss": 0.8233, + "slid_loss": 0.6852, + "step": 4610, + "time": 12.72 + }, + { + "epoch": 2.31, + "learning_rate": "1.6516e-04", + "loss": 0.7008, + "slid_loss": 0.6845, + "step": 4611, + "time": 11.95 + }, + { + "epoch": 2.31, + "learning_rate": "1.6514e-04", + "loss": 0.8104, + "slid_loss": 0.6861, + "step": 4612, + "time": 10.97 + }, + { + "epoch": 2.31, + "learning_rate": "1.6512e-04", + "loss": 0.5733, + "slid_loss": 0.6842, + "step": 4613, + "time": 13.19 + }, + { + "epoch": 2.31, + "learning_rate": "1.6511e-04", + "loss": 0.6052, + "slid_loss": 0.6849, + "step": 4614, + "time": 12.0 + }, + { + "epoch": 2.31, + "learning_rate": "1.6509e-04", + "loss": 0.7091, + "slid_loss": 0.6845, + "step": 4615, + "time": 12.78 + }, + { + "epoch": 2.31, + "learning_rate": "1.6507e-04", + "loss": 0.8333, + "slid_loss": 0.6858, + "step": 4616, + "time": 13.54 + }, + { + "epoch": 2.31, + "learning_rate": "1.6506e-04", + "loss": 0.7799, + "slid_loss": 0.6862, + "step": 4617, + "time": 13.15 + }, + { + "epoch": 2.31, + "learning_rate": "1.6504e-04", + "loss": 0.7582, + "slid_loss": 0.6867, + "step": 4618, + "time": 12.29 + }, + { + "epoch": 2.31, + "learning_rate": "1.6502e-04", + "loss": 0.7352, + "slid_loss": 0.6863, + "step": 4619, + "time": 11.01 + }, + { + "epoch": 2.31, + "learning_rate": "1.6501e-04", + "loss": 0.8072, + "slid_loss": 0.688, + "step": 4620, + "time": 13.38 + }, + { + "epoch": 2.31, + "learning_rate": "1.6499e-04", + "loss": 0.6375, + "slid_loss": 0.6888, + "step": 4621, + "time": 12.97 + }, + { + "epoch": 2.31, + "learning_rate": "1.6497e-04", + "loss": 0.6036, + "slid_loss": 0.6882, + "step": 4622, + "time": 12.13 + }, + { + "epoch": 2.31, + "learning_rate": "1.6496e-04", + "loss": 0.72, + "slid_loss": 0.6898, + "step": 4623, + "time": 11.05 + }, + { + "epoch": 2.32, + "learning_rate": "1.6494e-04", + "loss": 0.7, + "slid_loss": 0.6885, + "step": 4624, + "time": 12.82 + }, + { + "epoch": 2.32, + "learning_rate": "1.6492e-04", + "loss": 0.7402, + "slid_loss": 0.6889, + "step": 4625, + "time": 10.53 + }, + { + "epoch": 2.32, + "learning_rate": "1.6491e-04", + "loss": 0.7763, + "slid_loss": 0.6892, + "step": 4626, + "time": 13.26 + }, + { + "epoch": 2.32, + "learning_rate": "1.6489e-04", + "loss": 0.682, + "slid_loss": 0.6888, + "step": 4627, + "time": 10.79 + }, + { + "epoch": 2.32, + "learning_rate": "1.6487e-04", + "loss": 0.6864, + "slid_loss": 0.6881, + "step": 4628, + "time": 12.84 + }, + { + "epoch": 2.32, + "learning_rate": "1.6486e-04", + "loss": 0.7332, + "slid_loss": 0.6868, + "step": 4629, + "time": 11.4 + }, + { + "epoch": 2.32, + "learning_rate": "1.6484e-04", + "loss": 0.777, + "slid_loss": 0.6883, + "step": 4630, + "time": 13.24 + }, + { + "epoch": 2.32, + "learning_rate": "1.6482e-04", + "loss": 0.791, + "slid_loss": 0.6894, + "step": 4631, + "time": 11.9 + }, + { + "epoch": 2.32, + "learning_rate": "1.6481e-04", + "loss": 0.6198, + "slid_loss": 0.69, + "step": 4632, + "time": 11.22 + }, + { + "epoch": 2.32, + "learning_rate": "1.6479e-04", + "loss": 0.6704, + "slid_loss": 0.6899, + "step": 4633, + "time": 11.72 + }, + { + "epoch": 2.32, + "learning_rate": "1.6477e-04", + "loss": 0.8018, + "slid_loss": 0.69, + "step": 4634, + "time": 12.81 + }, + { + "epoch": 2.32, + "learning_rate": "1.6476e-04", + "loss": 0.7622, + "slid_loss": 0.6904, + "step": 4635, + "time": 11.08 + }, + { + "epoch": 2.32, + "learning_rate": "1.6474e-04", + "loss": 0.6528, + "slid_loss": 0.6886, + "step": 4636, + "time": 11.24 + }, + { + "epoch": 2.32, + "learning_rate": "1.6472e-04", + "loss": 0.5955, + "slid_loss": 0.6874, + "step": 4637, + "time": 11.76 + }, + { + "epoch": 2.32, + "learning_rate": "1.6471e-04", + "loss": 0.6912, + "slid_loss": 0.6893, + "step": 4638, + "time": 13.01 + }, + { + "epoch": 2.32, + "learning_rate": "1.6469e-04", + "loss": 0.6344, + "slid_loss": 0.6886, + "step": 4639, + "time": 12.18 + }, + { + "epoch": 2.32, + "learning_rate": "1.6467e-04", + "loss": 0.5611, + "slid_loss": 0.687, + "step": 4640, + "time": 10.87 + }, + { + "epoch": 2.32, + "learning_rate": "1.6466e-04", + "loss": 0.7168, + "slid_loss": 0.687, + "step": 4641, + "time": 13.28 + }, + { + "epoch": 2.32, + "learning_rate": "1.6464e-04", + "loss": 0.6534, + "slid_loss": 0.6868, + "step": 4642, + "time": 10.86 + }, + { + "epoch": 2.32, + "learning_rate": "1.6462e-04", + "loss": 0.6783, + "slid_loss": 0.6862, + "step": 4643, + "time": 13.35 + }, + { + "epoch": 2.33, + "learning_rate": "1.6461e-04", + "loss": 0.6331, + "slid_loss": 0.6868, + "step": 4644, + "time": 12.27 + }, + { + "epoch": 2.33, + "learning_rate": "1.6459e-04", + "loss": 0.7386, + "slid_loss": 0.6873, + "step": 4645, + "time": 11.99 + }, + { + "epoch": 2.33, + "learning_rate": "1.6457e-04", + "loss": 0.6965, + "slid_loss": 0.6874, + "step": 4646, + "time": 11.28 + }, + { + "epoch": 2.33, + "learning_rate": "1.6456e-04", + "loss": 0.787, + "slid_loss": 0.6889, + "step": 4647, + "time": 13.86 + }, + { + "epoch": 2.33, + "learning_rate": "1.6454e-04", + "loss": 0.6327, + "slid_loss": 0.6878, + "step": 4648, + "time": 11.63 + }, + { + "epoch": 2.33, + "learning_rate": "1.6452e-04", + "loss": 0.7056, + "slid_loss": 0.6887, + "step": 4649, + "time": 13.34 + }, + { + "epoch": 2.33, + "learning_rate": "1.6451e-04", + "loss": 0.6947, + "slid_loss": 0.688, + "step": 4650, + "time": 13.01 + }, + { + "epoch": 2.33, + "learning_rate": "1.6449e-04", + "loss": 0.674, + "slid_loss": 0.6895, + "step": 4651, + "time": 11.81 + }, + { + "epoch": 2.33, + "learning_rate": "1.6447e-04", + "loss": 0.6216, + "slid_loss": 0.6891, + "step": 4652, + "time": 11.54 + }, + { + "epoch": 2.33, + "learning_rate": "1.6446e-04", + "loss": 0.7633, + "slid_loss": 0.6885, + "step": 4653, + "time": 11.01 + }, + { + "epoch": 2.33, + "learning_rate": "1.6444e-04", + "loss": 0.7324, + "slid_loss": 0.6872, + "step": 4654, + "time": 13.09 + }, + { + "epoch": 2.33, + "learning_rate": "1.6442e-04", + "loss": 0.6452, + "slid_loss": 0.6876, + "step": 4655, + "time": 12.82 + }, + { + "epoch": 2.33, + "learning_rate": "1.6441e-04", + "loss": 0.769, + "slid_loss": 0.6884, + "step": 4656, + "time": 10.95 + }, + { + "epoch": 2.33, + "learning_rate": "1.6439e-04", + "loss": 0.7443, + "slid_loss": 0.6892, + "step": 4657, + "time": 11.56 + }, + { + "epoch": 2.33, + "learning_rate": "1.6437e-04", + "loss": 0.6016, + "slid_loss": 0.6875, + "step": 4658, + "time": 12.94 + }, + { + "epoch": 2.33, + "learning_rate": "1.6436e-04", + "loss": 0.7225, + "slid_loss": 0.6885, + "step": 4659, + "time": 12.85 + }, + { + "epoch": 2.33, + "learning_rate": "1.6434e-04", + "loss": 0.7505, + "slid_loss": 0.6907, + "step": 4660, + "time": 12.93 + }, + { + "epoch": 2.33, + "learning_rate": "1.6432e-04", + "loss": 0.7449, + "slid_loss": 0.6896, + "step": 4661, + "time": 12.23 + }, + { + "epoch": 2.33, + "learning_rate": "1.6431e-04", + "loss": 0.6415, + "slid_loss": 0.6898, + "step": 4662, + "time": 10.69 + }, + { + "epoch": 2.34, + "learning_rate": "1.6429e-04", + "loss": 0.532, + "slid_loss": 0.6882, + "step": 4663, + "time": 13.56 + }, + { + "epoch": 2.34, + "learning_rate": "1.6427e-04", + "loss": 0.5421, + "slid_loss": 0.6875, + "step": 4664, + "time": 12.7 + }, + { + "epoch": 2.34, + "learning_rate": "1.6426e-04", + "loss": 0.7112, + "slid_loss": 0.6866, + "step": 4665, + "time": 14.06 + }, + { + "epoch": 2.34, + "learning_rate": "1.6424e-04", + "loss": 0.7998, + "slid_loss": 0.6891, + "step": 4666, + "time": 11.9 + }, + { + "epoch": 2.34, + "learning_rate": "1.6422e-04", + "loss": 0.6863, + "slid_loss": 0.689, + "step": 4667, + "time": 13.86 + }, + { + "epoch": 2.34, + "learning_rate": "1.6421e-04", + "loss": 0.6178, + "slid_loss": 0.6904, + "step": 4668, + "time": 13.78 + }, + { + "epoch": 2.34, + "learning_rate": "1.6419e-04", + "loss": 0.7526, + "slid_loss": 0.6902, + "step": 4669, + "time": 12.15 + }, + { + "epoch": 2.34, + "learning_rate": "1.6417e-04", + "loss": 0.6779, + "slid_loss": 0.6909, + "step": 4670, + "time": 13.61 + }, + { + "epoch": 2.34, + "learning_rate": "1.6416e-04", + "loss": 0.5758, + "slid_loss": 0.6915, + "step": 4671, + "time": 14.22 + }, + { + "epoch": 2.34, + "learning_rate": "1.6414e-04", + "loss": 0.6889, + "slid_loss": 0.6921, + "step": 4672, + "time": 11.21 + }, + { + "epoch": 2.34, + "learning_rate": "1.6412e-04", + "loss": 0.6926, + "slid_loss": 0.6927, + "step": 4673, + "time": 13.4 + }, + { + "epoch": 2.34, + "learning_rate": "1.6410e-04", + "loss": 0.5993, + "slid_loss": 0.6926, + "step": 4674, + "time": 11.39 + }, + { + "epoch": 2.34, + "learning_rate": "1.6409e-04", + "loss": 0.635, + "slid_loss": 0.6933, + "step": 4675, + "time": 12.86 + }, + { + "epoch": 2.34, + "learning_rate": "1.6407e-04", + "loss": 0.6822, + "slid_loss": 0.6949, + "step": 4676, + "time": 11.67 + }, + { + "epoch": 2.34, + "learning_rate": "1.6405e-04", + "loss": 0.8801, + "slid_loss": 0.6979, + "step": 4677, + "time": 13.4 + }, + { + "epoch": 2.34, + "learning_rate": "1.6404e-04", + "loss": 0.6742, + "slid_loss": 0.6988, + "step": 4678, + "time": 13.26 + }, + { + "epoch": 2.34, + "learning_rate": "1.6402e-04", + "loss": 0.5326, + "slid_loss": 0.6977, + "step": 4679, + "time": 13.36 + }, + { + "epoch": 2.34, + "learning_rate": "1.6400e-04", + "loss": 0.7106, + "slid_loss": 0.6989, + "step": 4680, + "time": 11.2 + }, + { + "epoch": 2.34, + "learning_rate": "1.6399e-04", + "loss": 0.6079, + "slid_loss": 0.6981, + "step": 4681, + "time": 11.05 + }, + { + "epoch": 2.34, + "learning_rate": "1.6397e-04", + "loss": 0.6914, + "slid_loss": 0.6971, + "step": 4682, + "time": 12.43 + }, + { + "epoch": 2.35, + "learning_rate": "1.6395e-04", + "loss": 0.6899, + "slid_loss": 0.6974, + "step": 4683, + "time": 10.88 + }, + { + "epoch": 2.35, + "learning_rate": "1.6394e-04", + "loss": 0.6704, + "slid_loss": 0.6954, + "step": 4684, + "time": 11.38 + }, + { + "epoch": 2.35, + "learning_rate": "1.6392e-04", + "loss": 0.7999, + "slid_loss": 0.6964, + "step": 4685, + "time": 11.14 + }, + { + "epoch": 2.35, + "learning_rate": "1.6390e-04", + "loss": 0.714, + "slid_loss": 0.6988, + "step": 4686, + "time": 13.57 + }, + { + "epoch": 2.35, + "learning_rate": "1.6389e-04", + "loss": 0.608, + "slid_loss": 0.6983, + "step": 4687, + "time": 12.83 + }, + { + "epoch": 2.35, + "learning_rate": "1.6387e-04", + "loss": 0.6723, + "slid_loss": 0.6967, + "step": 4688, + "time": 12.18 + }, + { + "epoch": 2.35, + "learning_rate": "1.6385e-04", + "loss": 0.5864, + "slid_loss": 0.6954, + "step": 4689, + "time": 13.87 + }, + { + "epoch": 2.35, + "learning_rate": "1.6384e-04", + "loss": 0.541, + "slid_loss": 0.6938, + "step": 4690, + "time": 11.21 + }, + { + "epoch": 2.35, + "learning_rate": "1.6382e-04", + "loss": 0.7366, + "slid_loss": 0.693, + "step": 4691, + "time": 13.18 + }, + { + "epoch": 2.35, + "learning_rate": "1.6380e-04", + "loss": 0.6883, + "slid_loss": 0.6933, + "step": 4692, + "time": 10.49 + }, + { + "epoch": 2.35, + "learning_rate": "1.6379e-04", + "loss": 0.5606, + "slid_loss": 0.6918, + "step": 4693, + "time": 11.04 + }, + { + "epoch": 2.35, + "learning_rate": "1.6377e-04", + "loss": 0.6637, + "slid_loss": 0.6907, + "step": 4694, + "time": 13.34 + }, + { + "epoch": 2.35, + "learning_rate": "1.6375e-04", + "loss": 0.5748, + "slid_loss": 0.6886, + "step": 4695, + "time": 13.22 + }, + { + "epoch": 2.35, + "learning_rate": "1.6374e-04", + "loss": 0.7794, + "slid_loss": 0.6906, + "step": 4696, + "time": 12.04 + }, + { + "epoch": 2.35, + "learning_rate": "1.6372e-04", + "loss": 0.7919, + "slid_loss": 0.6929, + "step": 4697, + "time": 13.47 + }, + { + "epoch": 2.35, + "learning_rate": "1.6370e-04", + "loss": 0.7743, + "slid_loss": 0.6924, + "step": 4698, + "time": 11.89 + }, + { + "epoch": 2.35, + "learning_rate": "1.6369e-04", + "loss": 0.6905, + "slid_loss": 0.6923, + "step": 4699, + "time": 13.92 + }, + { + "epoch": 2.35, + "learning_rate": "1.6367e-04", + "loss": 0.669, + "slid_loss": 0.6927, + "step": 4700, + "time": 13.27 + }, + { + "epoch": 2.35, + "learning_rate": "1.6365e-04", + "loss": 0.5476, + "slid_loss": 0.6899, + "step": 4701, + "time": 11.47 + }, + { + "epoch": 2.35, + "learning_rate": "1.6363e-04", + "loss": 0.7277, + "slid_loss": 0.6906, + "step": 4702, + "time": 13.3 + }, + { + "epoch": 2.36, + "learning_rate": "1.6362e-04", + "loss": 0.6902, + "slid_loss": 0.6903, + "step": 4703, + "time": 11.2 + }, + { + "epoch": 2.36, + "learning_rate": "1.6360e-04", + "loss": 0.7148, + "slid_loss": 0.6906, + "step": 4704, + "time": 11.33 + }, + { + "epoch": 2.36, + "learning_rate": "1.6358e-04", + "loss": 0.7518, + "slid_loss": 0.6917, + "step": 4705, + "time": 13.23 + }, + { + "epoch": 2.36, + "learning_rate": "1.6357e-04", + "loss": 0.6797, + "slid_loss": 0.6908, + "step": 4706, + "time": 13.23 + }, + { + "epoch": 2.36, + "learning_rate": "1.6355e-04", + "loss": 0.8656, + "slid_loss": 0.6914, + "step": 4707, + "time": 13.58 + }, + { + "epoch": 2.36, + "learning_rate": "1.6353e-04", + "loss": 0.6481, + "slid_loss": 0.6918, + "step": 4708, + "time": 11.2 + }, + { + "epoch": 2.36, + "learning_rate": "1.6352e-04", + "loss": 0.6151, + "slid_loss": 0.6906, + "step": 4709, + "time": 13.76 + }, + { + "epoch": 2.36, + "learning_rate": "1.6350e-04", + "loss": 0.7349, + "slid_loss": 0.6897, + "step": 4710, + "time": 12.78 + }, + { + "epoch": 2.36, + "learning_rate": "1.6348e-04", + "loss": 0.6877, + "slid_loss": 0.6896, + "step": 4711, + "time": 13.66 + }, + { + "epoch": 2.36, + "learning_rate": "1.6347e-04", + "loss": 0.6126, + "slid_loss": 0.6876, + "step": 4712, + "time": 10.75 + }, + { + "epoch": 2.36, + "learning_rate": "1.6345e-04", + "loss": 0.5657, + "slid_loss": 0.6875, + "step": 4713, + "time": 11.4 + }, + { + "epoch": 2.36, + "learning_rate": "1.6343e-04", + "loss": 0.8113, + "slid_loss": 0.6896, + "step": 4714, + "time": 13.35 + }, + { + "epoch": 2.36, + "learning_rate": "1.6342e-04", + "loss": 0.7098, + "slid_loss": 0.6896, + "step": 4715, + "time": 11.25 + }, + { + "epoch": 2.36, + "learning_rate": "1.6340e-04", + "loss": 0.5668, + "slid_loss": 0.687, + "step": 4716, + "time": 11.6 + }, + { + "epoch": 2.36, + "learning_rate": "1.6338e-04", + "loss": 0.73, + "slid_loss": 0.6865, + "step": 4717, + "time": 11.6 + }, + { + "epoch": 2.36, + "learning_rate": "1.6337e-04", + "loss": 0.7913, + "slid_loss": 0.6868, + "step": 4718, + "time": 12.85 + }, + { + "epoch": 2.36, + "learning_rate": "1.6335e-04", + "loss": 0.7753, + "slid_loss": 0.6872, + "step": 4719, + "time": 11.37 + }, + { + "epoch": 2.36, + "learning_rate": "1.6333e-04", + "loss": 0.7963, + "slid_loss": 0.6871, + "step": 4720, + "time": 11.76 + }, + { + "epoch": 2.36, + "learning_rate": "1.6331e-04", + "loss": 0.7264, + "slid_loss": 0.688, + "step": 4721, + "time": 13.42 + }, + { + "epoch": 2.36, + "learning_rate": "1.6330e-04", + "loss": 0.8435, + "slid_loss": 0.6904, + "step": 4722, + "time": 13.3 + }, + { + "epoch": 2.37, + "learning_rate": "1.6328e-04", + "loss": 0.7801, + "slid_loss": 0.691, + "step": 4723, + "time": 10.97 + }, + { + "epoch": 2.37, + "learning_rate": "1.6326e-04", + "loss": 0.5074, + "slid_loss": 0.689, + "step": 4724, + "time": 12.26 + }, + { + "epoch": 2.37, + "learning_rate": "1.6325e-04", + "loss": 0.6895, + "slid_loss": 0.6885, + "step": 4725, + "time": 12.69 + }, + { + "epoch": 2.37, + "learning_rate": "1.6323e-04", + "loss": 0.5207, + "slid_loss": 0.686, + "step": 4726, + "time": 14.31 + }, + { + "epoch": 2.37, + "learning_rate": "1.6321e-04", + "loss": 0.5587, + "slid_loss": 0.6847, + "step": 4727, + "time": 14.59 + }, + { + "epoch": 2.37, + "learning_rate": "1.6320e-04", + "loss": 0.541, + "slid_loss": 0.6833, + "step": 4728, + "time": 10.28 + }, + { + "epoch": 2.37, + "learning_rate": "1.6318e-04", + "loss": 0.6112, + "slid_loss": 0.6821, + "step": 4729, + "time": 14.11 + }, + { + "epoch": 2.37, + "learning_rate": "1.6316e-04", + "loss": 0.6767, + "slid_loss": 0.6811, + "step": 4730, + "time": 14.55 + }, + { + "epoch": 2.37, + "learning_rate": "1.6315e-04", + "loss": 0.6623, + "slid_loss": 0.6798, + "step": 4731, + "time": 11.09 + }, + { + "epoch": 2.37, + "learning_rate": "1.6313e-04", + "loss": 0.7639, + "slid_loss": 0.6812, + "step": 4732, + "time": 11.46 + }, + { + "epoch": 2.37, + "learning_rate": "1.6311e-04", + "loss": 0.8065, + "slid_loss": 0.6826, + "step": 4733, + "time": 13.95 + }, + { + "epoch": 2.37, + "learning_rate": "1.6310e-04", + "loss": 0.65, + "slid_loss": 0.6811, + "step": 4734, + "time": 13.24 + }, + { + "epoch": 2.37, + "learning_rate": "1.6308e-04", + "loss": 0.6352, + "slid_loss": 0.6798, + "step": 4735, + "time": 13.59 + }, + { + "epoch": 2.37, + "learning_rate": "1.6306e-04", + "loss": 0.607, + "slid_loss": 0.6793, + "step": 4736, + "time": 13.14 + }, + { + "epoch": 2.37, + "learning_rate": "1.6305e-04", + "loss": 0.7074, + "slid_loss": 0.6805, + "step": 4737, + "time": 13.34 + }, + { + "epoch": 2.37, + "learning_rate": "1.6303e-04", + "loss": 0.6395, + "slid_loss": 0.6799, + "step": 4738, + "time": 13.81 + }, + { + "epoch": 2.37, + "learning_rate": "1.6301e-04", + "loss": 0.6028, + "slid_loss": 0.6796, + "step": 4739, + "time": 13.46 + }, + { + "epoch": 2.37, + "learning_rate": "1.6299e-04", + "loss": 0.612, + "slid_loss": 0.6801, + "step": 4740, + "time": 10.46 + }, + { + "epoch": 2.37, + "learning_rate": "1.6298e-04", + "loss": 0.8293, + "slid_loss": 0.6813, + "step": 4741, + "time": 14.02 + }, + { + "epoch": 2.37, + "learning_rate": "1.6296e-04", + "loss": 0.6919, + "slid_loss": 0.6816, + "step": 4742, + "time": 11.19 + }, + { + "epoch": 2.38, + "learning_rate": "1.6294e-04", + "loss": 0.7235, + "slid_loss": 0.6821, + "step": 4743, + "time": 12.37 + }, + { + "epoch": 2.38, + "learning_rate": "1.6293e-04", + "loss": 0.7434, + "slid_loss": 0.6832, + "step": 4744, + "time": 10.95 + }, + { + "epoch": 2.38, + "learning_rate": "1.6291e-04", + "loss": 0.4787, + "slid_loss": 0.6806, + "step": 4745, + "time": 10.86 + }, + { + "epoch": 2.38, + "learning_rate": "1.6289e-04", + "loss": 0.6331, + "slid_loss": 0.68, + "step": 4746, + "time": 10.86 + }, + { + "epoch": 2.38, + "learning_rate": "1.6288e-04", + "loss": 0.5485, + "slid_loss": 0.6776, + "step": 4747, + "time": 13.17 + }, + { + "epoch": 2.38, + "learning_rate": "1.6286e-04", + "loss": 0.8335, + "slid_loss": 0.6796, + "step": 4748, + "time": 11.89 + }, + { + "epoch": 2.38, + "learning_rate": "1.6284e-04", + "loss": 0.6853, + "slid_loss": 0.6794, + "step": 4749, + "time": 13.29 + }, + { + "epoch": 2.38, + "learning_rate": "1.6283e-04", + "loss": 0.7147, + "slid_loss": 0.6796, + "step": 4750, + "time": 11.71 + }, + { + "epoch": 2.38, + "learning_rate": "1.6281e-04", + "loss": 0.6226, + "slid_loss": 0.6791, + "step": 4751, + "time": 13.59 + }, + { + "epoch": 2.38, + "learning_rate": "1.6279e-04", + "loss": 0.6752, + "slid_loss": 0.6796, + "step": 4752, + "time": 11.6 + }, + { + "epoch": 2.38, + "learning_rate": "1.6277e-04", + "loss": 0.6878, + "slid_loss": 0.6788, + "step": 4753, + "time": 12.02 + }, + { + "epoch": 2.38, + "learning_rate": "1.6276e-04", + "loss": 0.7173, + "slid_loss": 0.6787, + "step": 4754, + "time": 13.68 + }, + { + "epoch": 2.38, + "learning_rate": "1.6274e-04", + "loss": 0.7598, + "slid_loss": 0.6798, + "step": 4755, + "time": 13.79 + }, + { + "epoch": 2.38, + "learning_rate": "1.6272e-04", + "loss": 0.6164, + "slid_loss": 0.6783, + "step": 4756, + "time": 12.09 + }, + { + "epoch": 2.38, + "learning_rate": "1.6271e-04", + "loss": 0.6559, + "slid_loss": 0.6774, + "step": 4757, + "time": 13.31 + }, + { + "epoch": 2.38, + "learning_rate": "1.6269e-04", + "loss": 0.672, + "slid_loss": 0.6781, + "step": 4758, + "time": 13.3 + }, + { + "epoch": 2.38, + "learning_rate": "1.6267e-04", + "loss": 0.6513, + "slid_loss": 0.6774, + "step": 4759, + "time": 13.22 + }, + { + "epoch": 2.38, + "learning_rate": "1.6266e-04", + "loss": 0.7217, + "slid_loss": 0.6771, + "step": 4760, + "time": 12.73 + }, + { + "epoch": 2.38, + "learning_rate": "1.6264e-04", + "loss": 0.7703, + "slid_loss": 0.6774, + "step": 4761, + "time": 13.19 + }, + { + "epoch": 2.38, + "learning_rate": "1.6262e-04", + "loss": 0.5933, + "slid_loss": 0.6769, + "step": 4762, + "time": 11.81 + }, + { + "epoch": 2.39, + "learning_rate": "1.6261e-04", + "loss": 0.5748, + "slid_loss": 0.6773, + "step": 4763, + "time": 14.06 + }, + { + "epoch": 2.39, + "learning_rate": "1.6259e-04", + "loss": 0.6095, + "slid_loss": 0.678, + "step": 4764, + "time": 13.37 + }, + { + "epoch": 2.39, + "learning_rate": "1.6257e-04", + "loss": 0.7711, + "slid_loss": 0.6786, + "step": 4765, + "time": 11.37 + }, + { + "epoch": 2.39, + "learning_rate": "1.6256e-04", + "loss": 0.7059, + "slid_loss": 0.6777, + "step": 4766, + "time": 11.87 + }, + { + "epoch": 2.39, + "learning_rate": "1.6254e-04", + "loss": 0.6765, + "slid_loss": 0.6776, + "step": 4767, + "time": 12.13 + }, + { + "epoch": 2.39, + "learning_rate": "1.6252e-04", + "loss": 0.6734, + "slid_loss": 0.6781, + "step": 4768, + "time": 13.07 + }, + { + "epoch": 2.39, + "learning_rate": "1.6250e-04", + "loss": 0.6815, + "slid_loss": 0.6774, + "step": 4769, + "time": 13.58 + }, + { + "epoch": 2.39, + "learning_rate": "1.6249e-04", + "loss": 0.7531, + "slid_loss": 0.6782, + "step": 4770, + "time": 12.88 + }, + { + "epoch": 2.39, + "learning_rate": "1.6247e-04", + "loss": 0.7309, + "slid_loss": 0.6797, + "step": 4771, + "time": 12.89 + }, + { + "epoch": 2.39, + "learning_rate": "1.6245e-04", + "loss": 0.7671, + "slid_loss": 0.6805, + "step": 4772, + "time": 11.3 + }, + { + "epoch": 2.39, + "learning_rate": "1.6244e-04", + "loss": 0.5641, + "slid_loss": 0.6792, + "step": 4773, + "time": 11.88 + }, + { + "epoch": 2.39, + "learning_rate": "1.6242e-04", + "loss": 0.7822, + "slid_loss": 0.681, + "step": 4774, + "time": 13.44 + }, + { + "epoch": 2.39, + "learning_rate": "1.6240e-04", + "loss": 0.6067, + "slid_loss": 0.6808, + "step": 4775, + "time": 12.27 + }, + { + "epoch": 2.39, + "learning_rate": "1.6239e-04", + "loss": 0.6691, + "slid_loss": 0.6806, + "step": 4776, + "time": 12.61 + }, + { + "epoch": 2.39, + "learning_rate": "1.6237e-04", + "loss": 0.5373, + "slid_loss": 0.6772, + "step": 4777, + "time": 10.83 + }, + { + "epoch": 2.39, + "learning_rate": "1.6235e-04", + "loss": 0.7594, + "slid_loss": 0.6781, + "step": 4778, + "time": 13.81 + }, + { + "epoch": 2.39, + "learning_rate": "1.6233e-04", + "loss": 0.6635, + "slid_loss": 0.6794, + "step": 4779, + "time": 13.36 + }, + { + "epoch": 2.39, + "learning_rate": "1.6232e-04", + "loss": 0.7344, + "slid_loss": 0.6796, + "step": 4780, + "time": 12.63 + }, + { + "epoch": 2.39, + "learning_rate": "1.6230e-04", + "loss": 0.6391, + "slid_loss": 0.6799, + "step": 4781, + "time": 11.26 + }, + { + "epoch": 2.39, + "learning_rate": "1.6228e-04", + "loss": 0.6904, + "slid_loss": 0.6799, + "step": 4782, + "time": 12.41 + }, + { + "epoch": 2.4, + "learning_rate": "1.6227e-04", + "loss": 0.7252, + "slid_loss": 0.6803, + "step": 4783, + "time": 13.3 + }, + { + "epoch": 2.4, + "learning_rate": "1.6225e-04", + "loss": 0.7168, + "slid_loss": 0.6807, + "step": 4784, + "time": 11.16 + }, + { + "epoch": 2.4, + "learning_rate": "1.6223e-04", + "loss": 0.7248, + "slid_loss": 0.68, + "step": 4785, + "time": 13.09 + }, + { + "epoch": 2.4, + "learning_rate": "1.6222e-04", + "loss": 0.6824, + "slid_loss": 0.6797, + "step": 4786, + "time": 13.26 + }, + { + "epoch": 2.4, + "learning_rate": "1.6220e-04", + "loss": 0.7381, + "slid_loss": 0.681, + "step": 4787, + "time": 13.97 + }, + { + "epoch": 2.4, + "learning_rate": "1.6218e-04", + "loss": 0.6186, + "slid_loss": 0.6804, + "step": 4788, + "time": 11.86 + }, + { + "epoch": 2.4, + "learning_rate": "1.6217e-04", + "loss": 0.6478, + "slid_loss": 0.681, + "step": 4789, + "time": 12.19 + }, + { + "epoch": 2.4, + "learning_rate": "1.6215e-04", + "loss": 0.6942, + "slid_loss": 0.6826, + "step": 4790, + "time": 13.31 + }, + { + "epoch": 2.4, + "learning_rate": "1.6213e-04", + "loss": 0.5756, + "slid_loss": 0.681, + "step": 4791, + "time": 13.38 + }, + { + "epoch": 2.4, + "learning_rate": "1.6211e-04", + "loss": 0.6702, + "slid_loss": 0.6808, + "step": 4792, + "time": 13.2 + }, + { + "epoch": 2.4, + "learning_rate": "1.6210e-04", + "loss": 0.8374, + "slid_loss": 0.6835, + "step": 4793, + "time": 13.74 + }, + { + "epoch": 2.4, + "learning_rate": "1.6208e-04", + "loss": 0.6396, + "slid_loss": 0.6833, + "step": 4794, + "time": 13.35 + }, + { + "epoch": 2.4, + "learning_rate": "1.6206e-04", + "loss": 0.5661, + "slid_loss": 0.6832, + "step": 4795, + "time": 12.81 + }, + { + "epoch": 2.4, + "learning_rate": "1.6205e-04", + "loss": 0.5986, + "slid_loss": 0.6814, + "step": 4796, + "time": 11.58 + }, + { + "epoch": 2.4, + "learning_rate": "1.6203e-04", + "loss": 0.7332, + "slid_loss": 0.6808, + "step": 4797, + "time": 13.74 + }, + { + "epoch": 2.4, + "learning_rate": "1.6201e-04", + "loss": 0.651, + "slid_loss": 0.6796, + "step": 4798, + "time": 12.81 + }, + { + "epoch": 2.4, + "learning_rate": "1.6200e-04", + "loss": 0.7496, + "slid_loss": 0.6802, + "step": 4799, + "time": 11.3 + }, + { + "epoch": 2.4, + "learning_rate": "1.6198e-04", + "loss": 0.5112, + "slid_loss": 0.6786, + "step": 4800, + "time": 12.79 + }, + { + "epoch": 2.4, + "learning_rate": "1.6196e-04", + "loss": 0.7635, + "slid_loss": 0.6808, + "step": 4801, + "time": 10.43 + }, + { + "epoch": 2.4, + "learning_rate": "1.6194e-04", + "loss": 0.7099, + "slid_loss": 0.6806, + "step": 4802, + "time": 11.54 + }, + { + "epoch": 2.41, + "learning_rate": "1.6193e-04", + "loss": 0.7524, + "slid_loss": 0.6812, + "step": 4803, + "time": 11.65 + }, + { + "epoch": 2.41, + "learning_rate": "1.6191e-04", + "loss": 0.6907, + "slid_loss": 0.681, + "step": 4804, + "time": 12.41 + }, + { + "epoch": 2.41, + "learning_rate": "1.6189e-04", + "loss": 0.6034, + "slid_loss": 0.6795, + "step": 4805, + "time": 13.75 + }, + { + "epoch": 2.41, + "learning_rate": "1.6188e-04", + "loss": 0.6609, + "slid_loss": 0.6793, + "step": 4806, + "time": 13.43 + }, + { + "epoch": 2.41, + "learning_rate": "1.6186e-04", + "loss": 0.6617, + "slid_loss": 0.6772, + "step": 4807, + "time": 13.77 + }, + { + "epoch": 2.41, + "learning_rate": "1.6184e-04", + "loss": 0.6255, + "slid_loss": 0.677, + "step": 4808, + "time": 13.65 + }, + { + "epoch": 2.41, + "learning_rate": "1.6183e-04", + "loss": 0.6432, + "slid_loss": 0.6773, + "step": 4809, + "time": 12.04 + }, + { + "epoch": 2.41, + "learning_rate": "1.6181e-04", + "loss": 0.6219, + "slid_loss": 0.6762, + "step": 4810, + "time": 14.38 + }, + { + "epoch": 2.41, + "learning_rate": "1.6179e-04", + "loss": 0.702, + "slid_loss": 0.6763, + "step": 4811, + "time": 13.43 + }, + { + "epoch": 2.41, + "learning_rate": "1.6178e-04", + "loss": 0.7254, + "slid_loss": 0.6774, + "step": 4812, + "time": 13.38 + }, + { + "epoch": 2.41, + "learning_rate": "1.6176e-04", + "loss": 0.6666, + "slid_loss": 0.6785, + "step": 4813, + "time": 13.63 + }, + { + "epoch": 2.41, + "learning_rate": "1.6174e-04", + "loss": 0.6958, + "slid_loss": 0.6773, + "step": 4814, + "time": 13.54 + }, + { + "epoch": 2.41, + "learning_rate": "1.6172e-04", + "loss": 0.7244, + "slid_loss": 0.6774, + "step": 4815, + "time": 14.18 + }, + { + "epoch": 2.41, + "learning_rate": "1.6171e-04", + "loss": 0.7141, + "slid_loss": 0.6789, + "step": 4816, + "time": 13.54 + }, + { + "epoch": 2.41, + "learning_rate": "1.6169e-04", + "loss": 0.8464, + "slid_loss": 0.6801, + "step": 4817, + "time": 13.99 + }, + { + "epoch": 2.41, + "learning_rate": "1.6167e-04", + "loss": 0.5741, + "slid_loss": 0.6779, + "step": 4818, + "time": 13.07 + }, + { + "epoch": 2.41, + "learning_rate": "1.6166e-04", + "loss": 0.7798, + "slid_loss": 0.678, + "step": 4819, + "time": 13.82 + }, + { + "epoch": 2.41, + "learning_rate": "1.6164e-04", + "loss": 0.5449, + "slid_loss": 0.6754, + "step": 4820, + "time": 12.97 + }, + { + "epoch": 2.41, + "learning_rate": "1.6162e-04", + "loss": 0.5786, + "slid_loss": 0.674, + "step": 4821, + "time": 13.87 + }, + { + "epoch": 2.41, + "learning_rate": "1.6161e-04", + "loss": 0.6811, + "slid_loss": 0.6723, + "step": 4822, + "time": 13.0 + }, + { + "epoch": 2.42, + "learning_rate": "1.6159e-04", + "loss": 0.6025, + "slid_loss": 0.6706, + "step": 4823, + "time": 14.48 + }, + { + "epoch": 2.42, + "learning_rate": "1.6157e-04", + "loss": 0.6328, + "slid_loss": 0.6718, + "step": 4824, + "time": 12.4 + }, + { + "epoch": 2.42, + "learning_rate": "1.6155e-04", + "loss": 0.5794, + "slid_loss": 0.6707, + "step": 4825, + "time": 11.16 + }, + { + "epoch": 2.42, + "learning_rate": "1.6154e-04", + "loss": 0.7714, + "slid_loss": 0.6732, + "step": 4826, + "time": 14.3 + }, + { + "epoch": 2.42, + "learning_rate": "1.6152e-04", + "loss": 0.612, + "slid_loss": 0.6738, + "step": 4827, + "time": 12.41 + }, + { + "epoch": 2.42, + "learning_rate": "1.6150e-04", + "loss": 0.7034, + "slid_loss": 0.6754, + "step": 4828, + "time": 12.57 + }, + { + "epoch": 2.42, + "learning_rate": "1.6149e-04", + "loss": 0.7622, + "slid_loss": 0.6769, + "step": 4829, + "time": 11.5 + }, + { + "epoch": 2.42, + "learning_rate": "1.6147e-04", + "loss": 0.5658, + "slid_loss": 0.6758, + "step": 4830, + "time": 13.94 + }, + { + "epoch": 2.42, + "learning_rate": "1.6145e-04", + "loss": 0.6729, + "slid_loss": 0.6759, + "step": 4831, + "time": 12.85 + }, + { + "epoch": 2.42, + "learning_rate": "1.6144e-04", + "loss": 0.6497, + "slid_loss": 0.6747, + "step": 4832, + "time": 12.43 + }, + { + "epoch": 2.42, + "learning_rate": "1.6142e-04", + "loss": 0.6839, + "slid_loss": 0.6735, + "step": 4833, + "time": 10.83 + }, + { + "epoch": 2.42, + "learning_rate": "1.6140e-04", + "loss": 0.6172, + "slid_loss": 0.6732, + "step": 4834, + "time": 13.63 + }, + { + "epoch": 2.42, + "learning_rate": "1.6138e-04", + "loss": 0.6539, + "slid_loss": 0.6734, + "step": 4835, + "time": 12.29 + }, + { + "epoch": 2.42, + "learning_rate": "1.6137e-04", + "loss": 0.664, + "slid_loss": 0.6739, + "step": 4836, + "time": 14.02 + }, + { + "epoch": 2.42, + "learning_rate": "1.6135e-04", + "loss": 0.5603, + "slid_loss": 0.6725, + "step": 4837, + "time": 13.91 + }, + { + "epoch": 2.42, + "learning_rate": "1.6133e-04", + "loss": 0.8778, + "slid_loss": 0.6749, + "step": 4838, + "time": 10.49 + }, + { + "epoch": 2.42, + "learning_rate": "1.6132e-04", + "loss": 0.6272, + "slid_loss": 0.6751, + "step": 4839, + "time": 13.52 + }, + { + "epoch": 2.42, + "learning_rate": "1.6130e-04", + "loss": 0.6726, + "slid_loss": 0.6757, + "step": 4840, + "time": 12.19 + }, + { + "epoch": 2.42, + "learning_rate": "1.6128e-04", + "loss": 0.6621, + "slid_loss": 0.674, + "step": 4841, + "time": 12.38 + }, + { + "epoch": 2.42, + "learning_rate": "1.6126e-04", + "loss": 0.5415, + "slid_loss": 0.6725, + "step": 4842, + "time": 12.83 + }, + { + "epoch": 2.43, + "learning_rate": "1.6125e-04", + "loss": 0.8187, + "slid_loss": 0.6735, + "step": 4843, + "time": 13.44 + }, + { + "epoch": 2.43, + "learning_rate": "1.6123e-04", + "loss": 0.6037, + "slid_loss": 0.6721, + "step": 4844, + "time": 13.57 + }, + { + "epoch": 2.43, + "learning_rate": "1.6121e-04", + "loss": 0.6541, + "slid_loss": 0.6738, + "step": 4845, + "time": 14.52 + }, + { + "epoch": 2.43, + "learning_rate": "1.6120e-04", + "loss": 0.8008, + "slid_loss": 0.6755, + "step": 4846, + "time": 12.38 + }, + { + "epoch": 2.43, + "learning_rate": "1.6118e-04", + "loss": 0.6305, + "slid_loss": 0.6763, + "step": 4847, + "time": 11.6 + }, + { + "epoch": 2.43, + "learning_rate": "1.6116e-04", + "loss": 0.7717, + "slid_loss": 0.6757, + "step": 4848, + "time": 11.29 + }, + { + "epoch": 2.43, + "learning_rate": "1.6115e-04", + "loss": 0.8356, + "slid_loss": 0.6772, + "step": 4849, + "time": 12.22 + }, + { + "epoch": 2.43, + "learning_rate": "1.6113e-04", + "loss": 0.7717, + "slid_loss": 0.6778, + "step": 4850, + "time": 13.63 + }, + { + "epoch": 2.43, + "learning_rate": "1.6111e-04", + "loss": 0.6929, + "slid_loss": 0.6785, + "step": 4851, + "time": 12.53 + }, + { + "epoch": 2.43, + "learning_rate": "1.6109e-04", + "loss": 0.7698, + "slid_loss": 0.6794, + "step": 4852, + "time": 13.34 + }, + { + "epoch": 2.43, + "learning_rate": "1.6108e-04", + "loss": 0.6829, + "slid_loss": 0.6794, + "step": 4853, + "time": 13.5 + }, + { + "epoch": 2.43, + "learning_rate": "1.6106e-04", + "loss": 0.7451, + "slid_loss": 0.6797, + "step": 4854, + "time": 11.59 + }, + { + "epoch": 2.43, + "learning_rate": "1.6104e-04", + "loss": 0.7866, + "slid_loss": 0.6799, + "step": 4855, + "time": 13.12 + }, + { + "epoch": 2.43, + "learning_rate": "1.6103e-04", + "loss": 0.7046, + "slid_loss": 0.6808, + "step": 4856, + "time": 13.82 + }, + { + "epoch": 2.43, + "learning_rate": "1.6101e-04", + "loss": 0.5906, + "slid_loss": 0.6802, + "step": 4857, + "time": 13.53 + }, + { + "epoch": 2.43, + "learning_rate": "1.6099e-04", + "loss": 0.6266, + "slid_loss": 0.6797, + "step": 4858, + "time": 12.91 + }, + { + "epoch": 2.43, + "learning_rate": "1.6098e-04", + "loss": 0.7568, + "slid_loss": 0.6808, + "step": 4859, + "time": 13.65 + }, + { + "epoch": 2.43, + "learning_rate": "1.6096e-04", + "loss": 0.6243, + "slid_loss": 0.6798, + "step": 4860, + "time": 13.38 + }, + { + "epoch": 2.43, + "learning_rate": "1.6094e-04", + "loss": 0.7081, + "slid_loss": 0.6792, + "step": 4861, + "time": 12.35 + }, + { + "epoch": 2.43, + "learning_rate": "1.6092e-04", + "loss": 0.5752, + "slid_loss": 0.679, + "step": 4862, + "time": 11.23 + }, + { + "epoch": 2.44, + "learning_rate": "1.6091e-04", + "loss": 0.6973, + "slid_loss": 0.6802, + "step": 4863, + "time": 12.23 + }, + { + "epoch": 2.44, + "learning_rate": "1.6089e-04", + "loss": 0.5398, + "slid_loss": 0.6795, + "step": 4864, + "time": 13.63 + }, + { + "epoch": 2.44, + "learning_rate": "1.6087e-04", + "loss": 0.771, + "slid_loss": 0.6795, + "step": 4865, + "time": 10.7 + }, + { + "epoch": 2.44, + "learning_rate": "1.6086e-04", + "loss": 0.6144, + "slid_loss": 0.6786, + "step": 4866, + "time": 12.06 + }, + { + "epoch": 2.44, + "learning_rate": "1.6084e-04", + "loss": 0.5441, + "slid_loss": 0.6773, + "step": 4867, + "time": 11.23 + }, + { + "epoch": 2.44, + "learning_rate": "1.6082e-04", + "loss": 0.7105, + "slid_loss": 0.6777, + "step": 4868, + "time": 13.25 + }, + { + "epoch": 2.44, + "learning_rate": "1.6080e-04", + "loss": 0.7747, + "slid_loss": 0.6786, + "step": 4869, + "time": 11.89 + }, + { + "epoch": 2.44, + "learning_rate": "1.6079e-04", + "loss": 0.6836, + "slid_loss": 0.6779, + "step": 4870, + "time": 13.33 + }, + { + "epoch": 2.44, + "learning_rate": "1.6077e-04", + "loss": 0.8027, + "slid_loss": 0.6786, + "step": 4871, + "time": 11.61 + }, + { + "epoch": 2.44, + "learning_rate": "1.6075e-04", + "loss": 0.6358, + "slid_loss": 0.6773, + "step": 4872, + "time": 13.62 + }, + { + "epoch": 2.44, + "learning_rate": "1.6074e-04", + "loss": 0.6518, + "slid_loss": 0.6782, + "step": 4873, + "time": 12.83 + }, + { + "epoch": 2.44, + "learning_rate": "1.6072e-04", + "loss": 0.7432, + "slid_loss": 0.6778, + "step": 4874, + "time": 12.88 + }, + { + "epoch": 2.44, + "learning_rate": "1.6070e-04", + "loss": 0.628, + "slid_loss": 0.678, + "step": 4875, + "time": 12.22 + }, + { + "epoch": 2.44, + "learning_rate": "1.6068e-04", + "loss": 0.6198, + "slid_loss": 0.6775, + "step": 4876, + "time": 14.23 + }, + { + "epoch": 2.44, + "learning_rate": "1.6067e-04", + "loss": 0.6601, + "slid_loss": 0.6787, + "step": 4877, + "time": 12.81 + }, + { + "epoch": 2.44, + "learning_rate": "1.6065e-04", + "loss": 0.6856, + "slid_loss": 0.678, + "step": 4878, + "time": 12.72 + }, + { + "epoch": 2.44, + "learning_rate": "1.6063e-04", + "loss": 0.5874, + "slid_loss": 0.6772, + "step": 4879, + "time": 12.33 + }, + { + "epoch": 2.44, + "learning_rate": "1.6062e-04", + "loss": 0.6503, + "slid_loss": 0.6764, + "step": 4880, + "time": 11.48 + }, + { + "epoch": 2.44, + "learning_rate": "1.6060e-04", + "loss": 0.709, + "slid_loss": 0.6771, + "step": 4881, + "time": 12.93 + }, + { + "epoch": 2.44, + "learning_rate": "1.6058e-04", + "loss": 0.6649, + "slid_loss": 0.6768, + "step": 4882, + "time": 12.37 + }, + { + "epoch": 2.45, + "learning_rate": "1.6057e-04", + "loss": 0.7617, + "slid_loss": 0.6772, + "step": 4883, + "time": 11.26 + }, + { + "epoch": 2.45, + "learning_rate": "1.6055e-04", + "loss": 0.6013, + "slid_loss": 0.676, + "step": 4884, + "time": 12.14 + }, + { + "epoch": 2.45, + "learning_rate": "1.6053e-04", + "loss": 0.6901, + "slid_loss": 0.6757, + "step": 4885, + "time": 12.62 + }, + { + "epoch": 2.45, + "learning_rate": "1.6051e-04", + "loss": 0.7678, + "slid_loss": 0.6766, + "step": 4886, + "time": 14.3 + }, + { + "epoch": 2.45, + "learning_rate": "1.6050e-04", + "loss": 0.7647, + "slid_loss": 0.6768, + "step": 4887, + "time": 11.94 + }, + { + "epoch": 2.45, + "learning_rate": "1.6048e-04", + "loss": 0.6335, + "slid_loss": 0.677, + "step": 4888, + "time": 12.93 + }, + { + "epoch": 2.45, + "learning_rate": "1.6046e-04", + "loss": 0.8468, + "slid_loss": 0.679, + "step": 4889, + "time": 10.88 + }, + { + "epoch": 2.45, + "learning_rate": "1.6045e-04", + "loss": 0.6646, + "slid_loss": 0.6787, + "step": 4890, + "time": 10.81 + }, + { + "epoch": 2.45, + "learning_rate": "1.6043e-04", + "loss": 0.7215, + "slid_loss": 0.6801, + "step": 4891, + "time": 13.76 + }, + { + "epoch": 2.45, + "learning_rate": "1.6041e-04", + "loss": 0.6718, + "slid_loss": 0.6801, + "step": 4892, + "time": 13.99 + }, + { + "epoch": 2.45, + "learning_rate": "1.6039e-04", + "loss": 0.5974, + "slid_loss": 0.6777, + "step": 4893, + "time": 11.26 + }, + { + "epoch": 2.45, + "learning_rate": "1.6038e-04", + "loss": 0.7151, + "slid_loss": 0.6785, + "step": 4894, + "time": 12.94 + }, + { + "epoch": 2.45, + "learning_rate": "1.6036e-04", + "loss": 0.7091, + "slid_loss": 0.6799, + "step": 4895, + "time": 11.33 + }, + { + "epoch": 2.45, + "learning_rate": "1.6034e-04", + "loss": 0.8287, + "slid_loss": 0.6822, + "step": 4896, + "time": 11.44 + }, + { + "epoch": 2.45, + "learning_rate": "1.6033e-04", + "loss": 0.7218, + "slid_loss": 0.6821, + "step": 4897, + "time": 11.65 + }, + { + "epoch": 2.45, + "learning_rate": "1.6031e-04", + "loss": 0.6558, + "slid_loss": 0.6822, + "step": 4898, + "time": 13.35 + }, + { + "epoch": 2.45, + "learning_rate": "1.6029e-04", + "loss": 0.737, + "slid_loss": 0.682, + "step": 4899, + "time": 13.43 + }, + { + "epoch": 2.45, + "learning_rate": "1.6027e-04", + "loss": 0.9336, + "slid_loss": 0.6863, + "step": 4900, + "time": 13.49 + }, + { + "epoch": 2.45, + "learning_rate": "1.6026e-04", + "loss": 0.686, + "slid_loss": 0.6855, + "step": 4901, + "time": 11.61 + }, + { + "epoch": 2.45, + "learning_rate": "1.6024e-04", + "loss": 0.671, + "slid_loss": 0.6851, + "step": 4902, + "time": 13.77 + }, + { + "epoch": 2.46, + "learning_rate": "1.6022e-04", + "loss": 0.6445, + "slid_loss": 0.684, + "step": 4903, + "time": 10.77 + }, + { + "epoch": 2.46, + "learning_rate": "1.6021e-04", + "loss": 0.5704, + "slid_loss": 0.6828, + "step": 4904, + "time": 13.16 + }, + { + "epoch": 2.46, + "learning_rate": "1.6019e-04", + "loss": 0.7558, + "slid_loss": 0.6843, + "step": 4905, + "time": 11.88 + }, + { + "epoch": 2.46, + "learning_rate": "1.6017e-04", + "loss": 0.5082, + "slid_loss": 0.6828, + "step": 4906, + "time": 13.88 + }, + { + "epoch": 2.46, + "learning_rate": "1.6015e-04", + "loss": 0.6076, + "slid_loss": 0.6823, + "step": 4907, + "time": 11.05 + }, + { + "epoch": 2.46, + "learning_rate": "1.6014e-04", + "loss": 0.6675, + "slid_loss": 0.6827, + "step": 4908, + "time": 13.68 + }, + { + "epoch": 2.46, + "learning_rate": "1.6012e-04", + "loss": 0.7008, + "slid_loss": 0.6833, + "step": 4909, + "time": 12.96 + }, + { + "epoch": 2.46, + "learning_rate": "1.6010e-04", + "loss": 0.6719, + "slid_loss": 0.6838, + "step": 4910, + "time": 13.46 + }, + { + "epoch": 2.46, + "learning_rate": "1.6009e-04", + "loss": 0.8014, + "slid_loss": 0.6848, + "step": 4911, + "time": 13.93 + }, + { + "epoch": 2.46, + "learning_rate": "1.6007e-04", + "loss": 0.7183, + "slid_loss": 0.6847, + "step": 4912, + "time": 12.94 + }, + { + "epoch": 2.46, + "learning_rate": "1.6005e-04", + "loss": 0.7426, + "slid_loss": 0.6854, + "step": 4913, + "time": 11.39 + }, + { + "epoch": 2.46, + "learning_rate": "1.6004e-04", + "loss": 0.7326, + "slid_loss": 0.6858, + "step": 4914, + "time": 13.35 + }, + { + "epoch": 2.46, + "learning_rate": "1.6002e-04", + "loss": 0.558, + "slid_loss": 0.6841, + "step": 4915, + "time": 13.39 + }, + { + "epoch": 2.46, + "learning_rate": "1.6000e-04", + "loss": 0.5993, + "slid_loss": 0.683, + "step": 4916, + "time": 11.08 + }, + { + "epoch": 2.46, + "learning_rate": "1.5998e-04", + "loss": 0.6789, + "slid_loss": 0.6813, + "step": 4917, + "time": 13.14 + }, + { + "epoch": 2.46, + "learning_rate": "1.5997e-04", + "loss": 0.7483, + "slid_loss": 0.6831, + "step": 4918, + "time": 12.3 + }, + { + "epoch": 2.46, + "learning_rate": "1.5995e-04", + "loss": 0.6027, + "slid_loss": 0.6813, + "step": 4919, + "time": 12.89 + }, + { + "epoch": 2.46, + "learning_rate": "1.5993e-04", + "loss": 0.6661, + "slid_loss": 0.6825, + "step": 4920, + "time": 10.62 + }, + { + "epoch": 2.46, + "learning_rate": "1.5992e-04", + "loss": 0.6879, + "slid_loss": 0.6836, + "step": 4921, + "time": 11.62 + }, + { + "epoch": 2.46, + "learning_rate": "1.5990e-04", + "loss": 0.5925, + "slid_loss": 0.6827, + "step": 4922, + "time": 12.27 + }, + { + "epoch": 2.47, + "learning_rate": "1.5988e-04", + "loss": 0.8187, + "slid_loss": 0.6849, + "step": 4923, + "time": 11.18 + }, + { + "epoch": 2.47, + "learning_rate": "1.5986e-04", + "loss": 0.624, + "slid_loss": 0.6848, + "step": 4924, + "time": 10.87 + }, + { + "epoch": 2.47, + "learning_rate": "1.5985e-04", + "loss": 0.7769, + "slid_loss": 0.6868, + "step": 4925, + "time": 12.82 + }, + { + "epoch": 2.47, + "learning_rate": "1.5983e-04", + "loss": 0.7182, + "slid_loss": 0.6862, + "step": 4926, + "time": 10.99 + }, + { + "epoch": 2.47, + "learning_rate": "1.5981e-04", + "loss": 0.566, + "slid_loss": 0.6858, + "step": 4927, + "time": 14.0 + }, + { + "epoch": 2.47, + "learning_rate": "1.5980e-04", + "loss": 0.8462, + "slid_loss": 0.6872, + "step": 4928, + "time": 13.57 + }, + { + "epoch": 2.47, + "learning_rate": "1.5978e-04", + "loss": 0.614, + "slid_loss": 0.6857, + "step": 4929, + "time": 11.12 + }, + { + "epoch": 2.47, + "learning_rate": "1.5976e-04", + "loss": 0.6826, + "slid_loss": 0.6869, + "step": 4930, + "time": 10.83 + }, + { + "epoch": 2.47, + "learning_rate": "1.5974e-04", + "loss": 0.7645, + "slid_loss": 0.6878, + "step": 4931, + "time": 12.05 + }, + { + "epoch": 2.47, + "learning_rate": "1.5973e-04", + "loss": 0.6493, + "slid_loss": 0.6878, + "step": 4932, + "time": 13.2 + }, + { + "epoch": 2.47, + "learning_rate": "1.5971e-04", + "loss": 0.6637, + "slid_loss": 0.6876, + "step": 4933, + "time": 10.74 + }, + { + "epoch": 2.47, + "learning_rate": "1.5969e-04", + "loss": 0.6987, + "slid_loss": 0.6884, + "step": 4934, + "time": 13.33 + }, + { + "epoch": 2.47, + "learning_rate": "1.5968e-04", + "loss": 0.7765, + "slid_loss": 0.6896, + "step": 4935, + "time": 13.0 + }, + { + "epoch": 2.47, + "learning_rate": "1.5966e-04", + "loss": 0.7101, + "slid_loss": 0.6901, + "step": 4936, + "time": 13.41 + }, + { + "epoch": 2.47, + "learning_rate": "1.5964e-04", + "loss": 0.5927, + "slid_loss": 0.6904, + "step": 4937, + "time": 11.56 + }, + { + "epoch": 2.47, + "learning_rate": "1.5962e-04", + "loss": 0.5529, + "slid_loss": 0.6872, + "step": 4938, + "time": 12.96 + }, + { + "epoch": 2.47, + "learning_rate": "1.5961e-04", + "loss": 0.7162, + "slid_loss": 0.6881, + "step": 4939, + "time": 13.37 + }, + { + "epoch": 2.47, + "learning_rate": "1.5959e-04", + "loss": 0.5896, + "slid_loss": 0.6872, + "step": 4940, + "time": 11.24 + }, + { + "epoch": 2.47, + "learning_rate": "1.5957e-04", + "loss": 0.7264, + "slid_loss": 0.6879, + "step": 4941, + "time": 11.92 + }, + { + "epoch": 2.47, + "learning_rate": "1.5956e-04", + "loss": 0.8245, + "slid_loss": 0.6907, + "step": 4942, + "time": 13.78 + }, + { + "epoch": 2.48, + "learning_rate": "1.5954e-04", + "loss": 0.6439, + "slid_loss": 0.689, + "step": 4943, + "time": 13.73 + }, + { + "epoch": 2.48, + "learning_rate": "1.5952e-04", + "loss": 0.6777, + "slid_loss": 0.6897, + "step": 4944, + "time": 11.28 + }, + { + "epoch": 2.48, + "learning_rate": "1.5950e-04", + "loss": 0.7366, + "slid_loss": 0.6905, + "step": 4945, + "time": 13.02 + }, + { + "epoch": 2.48, + "learning_rate": "1.5949e-04", + "loss": 0.8126, + "slid_loss": 0.6906, + "step": 4946, + "time": 10.74 + }, + { + "epoch": 2.48, + "learning_rate": "1.5947e-04", + "loss": 0.5649, + "slid_loss": 0.69, + "step": 4947, + "time": 10.75 + }, + { + "epoch": 2.48, + "learning_rate": "1.5945e-04", + "loss": 0.733, + "slid_loss": 0.6896, + "step": 4948, + "time": 11.04 + }, + { + "epoch": 2.48, + "learning_rate": "1.5943e-04", + "loss": 0.8006, + "slid_loss": 0.6892, + "step": 4949, + "time": 13.85 + }, + { + "epoch": 2.48, + "learning_rate": "1.5942e-04", + "loss": 0.7115, + "slid_loss": 0.6886, + "step": 4950, + "time": 12.11 + }, + { + "epoch": 2.48, + "learning_rate": "1.5940e-04", + "loss": 0.6696, + "slid_loss": 0.6884, + "step": 4951, + "time": 11.97 + }, + { + "epoch": 2.48, + "learning_rate": "1.5938e-04", + "loss": 0.7108, + "slid_loss": 0.6878, + "step": 4952, + "time": 13.2 + }, + { + "epoch": 2.48, + "learning_rate": "1.5937e-04", + "loss": 0.7399, + "slid_loss": 0.6884, + "step": 4953, + "time": 12.89 + }, + { + "epoch": 2.48, + "learning_rate": "1.5935e-04", + "loss": 0.6738, + "slid_loss": 0.6877, + "step": 4954, + "time": 11.38 + }, + { + "epoch": 2.48, + "learning_rate": "1.5933e-04", + "loss": 0.7473, + "slid_loss": 0.6873, + "step": 4955, + "time": 10.93 + }, + { + "epoch": 2.48, + "learning_rate": "1.5931e-04", + "loss": 0.5523, + "slid_loss": 0.6858, + "step": 4956, + "time": 11.17 + }, + { + "epoch": 2.48, + "learning_rate": "1.5930e-04", + "loss": 0.7666, + "slid_loss": 0.6875, + "step": 4957, + "time": 13.74 + }, + { + "epoch": 2.48, + "learning_rate": "1.5928e-04", + "loss": 0.6118, + "slid_loss": 0.6874, + "step": 4958, + "time": 11.39 + }, + { + "epoch": 2.48, + "learning_rate": "1.5926e-04", + "loss": 0.7249, + "slid_loss": 0.6871, + "step": 4959, + "time": 13.39 + }, + { + "epoch": 2.48, + "learning_rate": "1.5925e-04", + "loss": 0.6965, + "slid_loss": 0.6878, + "step": 4960, + "time": 13.38 + }, + { + "epoch": 2.48, + "learning_rate": "1.5923e-04", + "loss": 0.5791, + "slid_loss": 0.6865, + "step": 4961, + "time": 12.7 + }, + { + "epoch": 2.48, + "learning_rate": "1.5921e-04", + "loss": 0.5463, + "slid_loss": 0.6862, + "step": 4962, + "time": 10.68 + }, + { + "epoch": 2.49, + "learning_rate": "1.5919e-04", + "loss": 0.6346, + "slid_loss": 0.6856, + "step": 4963, + "time": 12.85 + }, + { + "epoch": 2.49, + "learning_rate": "1.5918e-04", + "loss": 0.7389, + "slid_loss": 0.6876, + "step": 4964, + "time": 13.52 + }, + { + "epoch": 2.49, + "learning_rate": "1.5916e-04", + "loss": 0.5087, + "slid_loss": 0.6849, + "step": 4965, + "time": 13.75 + }, + { + "epoch": 2.49, + "learning_rate": "1.5914e-04", + "loss": 0.5038, + "slid_loss": 0.6838, + "step": 4966, + "time": 12.81 + }, + { + "epoch": 2.49, + "learning_rate": "1.5913e-04", + "loss": 0.6726, + "slid_loss": 0.6851, + "step": 4967, + "time": 11.22 + }, + { + "epoch": 2.49, + "learning_rate": "1.5911e-04", + "loss": 0.6139, + "slid_loss": 0.6842, + "step": 4968, + "time": 10.97 + }, + { + "epoch": 2.49, + "learning_rate": "1.5909e-04", + "loss": 0.6058, + "slid_loss": 0.6825, + "step": 4969, + "time": 13.06 + }, + { + "epoch": 2.49, + "learning_rate": "1.5907e-04", + "loss": 0.5759, + "slid_loss": 0.6814, + "step": 4970, + "time": 13.64 + }, + { + "epoch": 2.49, + "learning_rate": "1.5906e-04", + "loss": 0.6476, + "slid_loss": 0.6798, + "step": 4971, + "time": 12.7 + }, + { + "epoch": 2.49, + "learning_rate": "1.5904e-04", + "loss": 0.6736, + "slid_loss": 0.6802, + "step": 4972, + "time": 13.46 + }, + { + "epoch": 2.49, + "learning_rate": "1.5902e-04", + "loss": 0.6839, + "slid_loss": 0.6805, + "step": 4973, + "time": 13.69 + }, + { + "epoch": 2.49, + "learning_rate": "1.5901e-04", + "loss": 0.7985, + "slid_loss": 0.6811, + "step": 4974, + "time": 14.59 + }, + { + "epoch": 2.49, + "learning_rate": "1.5899e-04", + "loss": 0.6671, + "slid_loss": 0.6815, + "step": 4975, + "time": 13.76 + }, + { + "epoch": 2.49, + "learning_rate": "1.5897e-04", + "loss": 0.6968, + "slid_loss": 0.6823, + "step": 4976, + "time": 13.23 + }, + { + "epoch": 2.49, + "learning_rate": "1.5895e-04", + "loss": 0.6529, + "slid_loss": 0.6822, + "step": 4977, + "time": 11.28 + }, + { + "epoch": 2.49, + "learning_rate": "1.5894e-04", + "loss": 0.7086, + "slid_loss": 0.6824, + "step": 4978, + "time": 13.03 + }, + { + "epoch": 2.49, + "learning_rate": "1.5892e-04", + "loss": 0.7778, + "slid_loss": 0.6843, + "step": 4979, + "time": 14.01 + }, + { + "epoch": 2.49, + "learning_rate": "1.5890e-04", + "loss": 0.9235, + "slid_loss": 0.687, + "step": 4980, + "time": 11.38 + }, + { + "epoch": 2.49, + "learning_rate": "1.5889e-04", + "loss": 0.6785, + "slid_loss": 0.6867, + "step": 4981, + "time": 11.13 + }, + { + "epoch": 2.49, + "learning_rate": "1.5887e-04", + "loss": 0.6887, + "slid_loss": 0.687, + "step": 4982, + "time": 11.38 + }, + { + "epoch": 2.5, + "learning_rate": "1.5885e-04", + "loss": 0.6193, + "slid_loss": 0.6856, + "step": 4983, + "time": 11.5 + }, + { + "epoch": 2.5, + "learning_rate": "1.5883e-04", + "loss": 0.5502, + "slid_loss": 0.685, + "step": 4984, + "time": 13.01 + }, + { + "epoch": 2.5, + "learning_rate": "1.5882e-04", + "loss": 0.6116, + "slid_loss": 0.6843, + "step": 4985, + "time": 14.02 + }, + { + "epoch": 2.5, + "learning_rate": "1.5880e-04", + "loss": 0.6361, + "slid_loss": 0.6829, + "step": 4986, + "time": 11.35 + }, + { + "epoch": 2.5, + "learning_rate": "1.5878e-04", + "loss": 0.7889, + "slid_loss": 0.6832, + "step": 4987, + "time": 13.05 + }, + { + "epoch": 2.5, + "learning_rate": "1.5876e-04", + "loss": 0.592, + "slid_loss": 0.6828, + "step": 4988, + "time": 13.52 + }, + { + "epoch": 2.5, + "learning_rate": "1.5875e-04", + "loss": 0.6921, + "slid_loss": 0.6812, + "step": 4989, + "time": 12.47 + }, + { + "epoch": 2.5, + "learning_rate": "1.5873e-04", + "loss": 0.7143, + "slid_loss": 0.6817, + "step": 4990, + "time": 10.79 + }, + { + "epoch": 2.5, + "learning_rate": "1.5871e-04", + "loss": 0.7493, + "slid_loss": 0.682, + "step": 4991, + "time": 13.35 + }, + { + "epoch": 2.5, + "learning_rate": "1.5870e-04", + "loss": 0.6891, + "slid_loss": 0.6822, + "step": 4992, + "time": 12.41 + }, + { + "epoch": 2.5, + "learning_rate": "1.5868e-04", + "loss": 0.7439, + "slid_loss": 0.6836, + "step": 4993, + "time": 11.21 + }, + { + "epoch": 2.5, + "learning_rate": "1.5866e-04", + "loss": 0.5883, + "slid_loss": 0.6824, + "step": 4994, + "time": 13.58 + }, + { + "epoch": 2.5, + "learning_rate": "1.5864e-04", + "loss": 0.6575, + "slid_loss": 0.6818, + "step": 4995, + "time": 13.31 + }, + { + "epoch": 2.5, + "learning_rate": "1.5863e-04", + "loss": 0.6754, + "slid_loss": 0.6803, + "step": 4996, + "time": 10.65 + }, + { + "epoch": 2.5, + "learning_rate": "1.5861e-04", + "loss": 0.6657, + "slid_loss": 0.6798, + "step": 4997, + "time": 12.77 + }, + { + "epoch": 2.5, + "learning_rate": "1.5859e-04", + "loss": 0.5612, + "slid_loss": 0.6788, + "step": 4998, + "time": 13.11 + }, + { + "epoch": 2.5, + "learning_rate": "1.5858e-04", + "loss": 0.6003, + "slid_loss": 0.6774, + "step": 4999, + "time": 11.39 + }, + { + "epoch": 2.5, + "learning_rate": "1.5856e-04", + "loss": 0.6629, + "slid_loss": 0.6747, + "step": 5000, + "time": 13.45 + }, + { + "epoch": 2.5, + "learning_rate": "1.5854e-04", + "loss": 0.6788, + "slid_loss": 0.6747, + "step": 5001, + "time": 12.79 + }, + { + "epoch": 2.5, + "learning_rate": "1.5852e-04", + "loss": 0.5659, + "slid_loss": 0.6736, + "step": 5002, + "time": 11.77 + }, + { + "epoch": 2.51, + "learning_rate": "1.5851e-04", + "loss": 0.6055, + "slid_loss": 0.6732, + "step": 5003, + "time": 11.28 + }, + { + "epoch": 2.51, + "learning_rate": "1.5849e-04", + "loss": 0.6524, + "slid_loss": 0.674, + "step": 5004, + "time": 12.76 + }, + { + "epoch": 2.51, + "learning_rate": "1.5847e-04", + "loss": 0.7595, + "slid_loss": 0.6741, + "step": 5005, + "time": 10.52 + }, + { + "epoch": 2.51, + "learning_rate": "1.5845e-04", + "loss": 0.6646, + "slid_loss": 0.6756, + "step": 5006, + "time": 11.64 + }, + { + "epoch": 2.51, + "learning_rate": "1.5844e-04", + "loss": 0.8087, + "slid_loss": 0.6777, + "step": 5007, + "time": 11.61 + }, + { + "epoch": 2.51, + "learning_rate": "1.5842e-04", + "loss": 0.7505, + "slid_loss": 0.6785, + "step": 5008, + "time": 13.95 + }, + { + "epoch": 2.51, + "learning_rate": "1.5840e-04", + "loss": 0.6261, + "slid_loss": 0.6777, + "step": 5009, + "time": 11.48 + }, + { + "epoch": 2.51, + "learning_rate": "1.5839e-04", + "loss": 0.6611, + "slid_loss": 0.6776, + "step": 5010, + "time": 11.78 + }, + { + "epoch": 2.51, + "learning_rate": "1.5837e-04", + "loss": 0.7771, + "slid_loss": 0.6774, + "step": 5011, + "time": 12.67 + }, + { + "epoch": 2.51, + "learning_rate": "1.5835e-04", + "loss": 0.5506, + "slid_loss": 0.6757, + "step": 5012, + "time": 12.98 + }, + { + "epoch": 2.51, + "learning_rate": "1.5833e-04", + "loss": 0.5492, + "slid_loss": 0.6738, + "step": 5013, + "time": 13.1 + }, + { + "epoch": 2.51, + "learning_rate": "1.5832e-04", + "loss": 0.642, + "slid_loss": 0.6729, + "step": 5014, + "time": 13.32 + }, + { + "epoch": 2.51, + "learning_rate": "1.5830e-04", + "loss": 0.6581, + "slid_loss": 0.6739, + "step": 5015, + "time": 12.96 + }, + { + "epoch": 2.51, + "learning_rate": "1.5828e-04", + "loss": 0.6961, + "slid_loss": 0.6748, + "step": 5016, + "time": 13.52 + }, + { + "epoch": 2.51, + "learning_rate": "1.5827e-04", + "loss": 0.7676, + "slid_loss": 0.6757, + "step": 5017, + "time": 11.75 + }, + { + "epoch": 2.51, + "learning_rate": "1.5825e-04", + "loss": 0.5445, + "slid_loss": 0.6737, + "step": 5018, + "time": 12.86 + }, + { + "epoch": 2.51, + "learning_rate": "1.5823e-04", + "loss": 0.6442, + "slid_loss": 0.6741, + "step": 5019, + "time": 13.55 + }, + { + "epoch": 2.51, + "learning_rate": "1.5821e-04", + "loss": 0.5893, + "slid_loss": 0.6733, + "step": 5020, + "time": 13.17 + }, + { + "epoch": 2.51, + "learning_rate": "1.5820e-04", + "loss": 0.6049, + "slid_loss": 0.6725, + "step": 5021, + "time": 11.5 + }, + { + "epoch": 2.51, + "learning_rate": "1.5818e-04", + "loss": 0.6255, + "slid_loss": 0.6728, + "step": 5022, + "time": 12.21 + }, + { + "epoch": 2.52, + "learning_rate": "1.5816e-04", + "loss": 0.7319, + "slid_loss": 0.672, + "step": 5023, + "time": 13.17 + }, + { + "epoch": 2.52, + "learning_rate": "1.5814e-04", + "loss": 0.7415, + "slid_loss": 0.6731, + "step": 5024, + "time": 12.09 + }, + { + "epoch": 2.52, + "learning_rate": "1.5813e-04", + "loss": 0.6915, + "slid_loss": 0.6723, + "step": 5025, + "time": 13.79 + }, + { + "epoch": 2.52, + "learning_rate": "1.5811e-04", + "loss": 0.7032, + "slid_loss": 0.6721, + "step": 5026, + "time": 12.86 + }, + { + "epoch": 2.52, + "learning_rate": "1.5809e-04", + "loss": 0.72, + "slid_loss": 0.6737, + "step": 5027, + "time": 13.93 + }, + { + "epoch": 2.52, + "learning_rate": "1.5808e-04", + "loss": 0.6545, + "slid_loss": 0.6718, + "step": 5028, + "time": 11.27 + }, + { + "epoch": 2.52, + "learning_rate": "1.5806e-04", + "loss": 0.7018, + "slid_loss": 0.6726, + "step": 5029, + "time": 12.17 + }, + { + "epoch": 2.52, + "learning_rate": "1.5804e-04", + "loss": 0.673, + "slid_loss": 0.6725, + "step": 5030, + "time": 13.15 + }, + { + "epoch": 2.52, + "learning_rate": "1.5802e-04", + "loss": 0.5387, + "slid_loss": 0.6703, + "step": 5031, + "time": 10.78 + }, + { + "epoch": 2.52, + "learning_rate": "1.5801e-04", + "loss": 0.6175, + "slid_loss": 0.67, + "step": 5032, + "time": 12.76 + }, + { + "epoch": 2.52, + "learning_rate": "1.5799e-04", + "loss": 0.6587, + "slid_loss": 0.6699, + "step": 5033, + "time": 11.03 + }, + { + "epoch": 2.52, + "learning_rate": "1.5797e-04", + "loss": 0.7163, + "slid_loss": 0.6701, + "step": 5034, + "time": 12.98 + }, + { + "epoch": 2.52, + "learning_rate": "1.5795e-04", + "loss": 0.7092, + "slid_loss": 0.6694, + "step": 5035, + "time": 13.96 + }, + { + "epoch": 2.52, + "learning_rate": "1.5794e-04", + "loss": 0.5972, + "slid_loss": 0.6683, + "step": 5036, + "time": 11.42 + }, + { + "epoch": 2.52, + "learning_rate": "1.5792e-04", + "loss": 0.7666, + "slid_loss": 0.67, + "step": 5037, + "time": 12.24 + }, + { + "epoch": 2.52, + "learning_rate": "1.5790e-04", + "loss": 0.7622, + "slid_loss": 0.6721, + "step": 5038, + "time": 12.83 + }, + { + "epoch": 2.52, + "learning_rate": "1.5789e-04", + "loss": 0.6588, + "slid_loss": 0.6716, + "step": 5039, + "time": 11.74 + }, + { + "epoch": 2.52, + "learning_rate": "1.5787e-04", + "loss": 0.7245, + "slid_loss": 0.6729, + "step": 5040, + "time": 11.83 + }, + { + "epoch": 2.52, + "learning_rate": "1.5785e-04", + "loss": 0.7583, + "slid_loss": 0.6732, + "step": 5041, + "time": 13.74 + }, + { + "epoch": 2.52, + "learning_rate": "1.5783e-04", + "loss": 0.6685, + "slid_loss": 0.6717, + "step": 5042, + "time": 11.31 + }, + { + "epoch": 2.53, + "learning_rate": "1.5782e-04", + "loss": 0.757, + "slid_loss": 0.6728, + "step": 5043, + "time": 11.52 + }, + { + "epoch": 2.53, + "learning_rate": "1.5780e-04", + "loss": 0.7617, + "slid_loss": 0.6736, + "step": 5044, + "time": 13.45 + }, + { + "epoch": 2.53, + "learning_rate": "1.5778e-04", + "loss": 0.5978, + "slid_loss": 0.6722, + "step": 5045, + "time": 11.84 + }, + { + "epoch": 2.53, + "learning_rate": "1.5776e-04", + "loss": 0.6807, + "slid_loss": 0.6709, + "step": 5046, + "time": 11.82 + }, + { + "epoch": 2.53, + "learning_rate": "1.5775e-04", + "loss": 0.7412, + "slid_loss": 0.6727, + "step": 5047, + "time": 13.76 + }, + { + "epoch": 2.53, + "learning_rate": "1.5773e-04", + "loss": 0.7246, + "slid_loss": 0.6726, + "step": 5048, + "time": 13.3 + }, + { + "epoch": 2.53, + "learning_rate": "1.5771e-04", + "loss": 0.5515, + "slid_loss": 0.6701, + "step": 5049, + "time": 11.25 + }, + { + "epoch": 2.53, + "learning_rate": "1.5770e-04", + "loss": 0.7177, + "slid_loss": 0.6702, + "step": 5050, + "time": 11.9 + }, + { + "epoch": 2.53, + "learning_rate": "1.5768e-04", + "loss": 0.7939, + "slid_loss": 0.6714, + "step": 5051, + "time": 13.09 + }, + { + "epoch": 2.53, + "learning_rate": "1.5766e-04", + "loss": 0.6881, + "slid_loss": 0.6712, + "step": 5052, + "time": 12.31 + }, + { + "epoch": 2.53, + "learning_rate": "1.5764e-04", + "loss": 0.6027, + "slid_loss": 0.6698, + "step": 5053, + "time": 13.29 + }, + { + "epoch": 2.53, + "learning_rate": "1.5763e-04", + "loss": 0.4685, + "slid_loss": 0.6678, + "step": 5054, + "time": 10.73 + }, + { + "epoch": 2.53, + "learning_rate": "1.5761e-04", + "loss": 0.6403, + "slid_loss": 0.6667, + "step": 5055, + "time": 13.06 + }, + { + "epoch": 2.53, + "learning_rate": "1.5759e-04", + "loss": 0.512, + "slid_loss": 0.6663, + "step": 5056, + "time": 12.35 + }, + { + "epoch": 2.53, + "learning_rate": "1.5757e-04", + "loss": 0.5971, + "slid_loss": 0.6646, + "step": 5057, + "time": 11.17 + }, + { + "epoch": 2.53, + "learning_rate": "1.5756e-04", + "loss": 0.6998, + "slid_loss": 0.6655, + "step": 5058, + "time": 13.66 + }, + { + "epoch": 2.53, + "learning_rate": "1.5754e-04", + "loss": 0.6201, + "slid_loss": 0.6644, + "step": 5059, + "time": 14.26 + }, + { + "epoch": 2.53, + "learning_rate": "1.5752e-04", + "loss": 0.683, + "slid_loss": 0.6643, + "step": 5060, + "time": 12.72 + }, + { + "epoch": 2.53, + "learning_rate": "1.5751e-04", + "loss": 0.6833, + "slid_loss": 0.6653, + "step": 5061, + "time": 11.11 + }, + { + "epoch": 2.53, + "learning_rate": "1.5749e-04", + "loss": 0.6173, + "slid_loss": 0.666, + "step": 5062, + "time": 13.24 + }, + { + "epoch": 2.54, + "learning_rate": "1.5747e-04", + "loss": 0.644, + "slid_loss": 0.6661, + "step": 5063, + "time": 13.75 + }, + { + "epoch": 2.54, + "learning_rate": "1.5745e-04", + "loss": 0.6567, + "slid_loss": 0.6653, + "step": 5064, + "time": 11.06 + }, + { + "epoch": 2.54, + "learning_rate": "1.5744e-04", + "loss": 0.6788, + "slid_loss": 0.667, + "step": 5065, + "time": 12.56 + }, + { + "epoch": 2.54, + "learning_rate": "1.5742e-04", + "loss": 0.6116, + "slid_loss": 0.6681, + "step": 5066, + "time": 13.31 + }, + { + "epoch": 2.54, + "learning_rate": "1.5740e-04", + "loss": 0.7395, + "slid_loss": 0.6688, + "step": 5067, + "time": 13.65 + }, + { + "epoch": 2.54, + "learning_rate": "1.5738e-04", + "loss": 0.6765, + "slid_loss": 0.6694, + "step": 5068, + "time": 13.9 + }, + { + "epoch": 2.54, + "learning_rate": "1.5737e-04", + "loss": 0.7248, + "slid_loss": 0.6706, + "step": 5069, + "time": 14.02 + }, + { + "epoch": 2.54, + "learning_rate": "1.5735e-04", + "loss": 0.702, + "slid_loss": 0.6718, + "step": 5070, + "time": 12.24 + }, + { + "epoch": 2.54, + "learning_rate": "1.5733e-04", + "loss": 0.509, + "slid_loss": 0.6705, + "step": 5071, + "time": 11.57 + }, + { + "epoch": 2.54, + "learning_rate": "1.5732e-04", + "loss": 0.7321, + "slid_loss": 0.671, + "step": 5072, + "time": 13.31 + }, + { + "epoch": 2.54, + "learning_rate": "1.5730e-04", + "loss": 0.5698, + "slid_loss": 0.6699, + "step": 5073, + "time": 12.86 + }, + { + "epoch": 2.54, + "learning_rate": "1.5728e-04", + "loss": 0.8282, + "slid_loss": 0.6702, + "step": 5074, + "time": 12.93 + }, + { + "epoch": 2.54, + "learning_rate": "1.5726e-04", + "loss": 0.6237, + "slid_loss": 0.6698, + "step": 5075, + "time": 12.85 + }, + { + "epoch": 2.54, + "learning_rate": "1.5725e-04", + "loss": 0.8344, + "slid_loss": 0.6711, + "step": 5076, + "time": 13.41 + }, + { + "epoch": 2.54, + "learning_rate": "1.5723e-04", + "loss": 0.7421, + "slid_loss": 0.672, + "step": 5077, + "time": 12.7 + }, + { + "epoch": 2.54, + "learning_rate": "1.5721e-04", + "loss": 0.7637, + "slid_loss": 0.6726, + "step": 5078, + "time": 13.39 + }, + { + "epoch": 2.54, + "learning_rate": "1.5719e-04", + "loss": 0.6784, + "slid_loss": 0.6716, + "step": 5079, + "time": 13.66 + }, + { + "epoch": 2.54, + "learning_rate": "1.5718e-04", + "loss": 0.5351, + "slid_loss": 0.6677, + "step": 5080, + "time": 13.35 + }, + { + "epoch": 2.54, + "learning_rate": "1.5716e-04", + "loss": 0.4514, + "slid_loss": 0.6654, + "step": 5081, + "time": 13.74 + }, + { + "epoch": 2.54, + "learning_rate": "1.5714e-04", + "loss": 0.6773, + "slid_loss": 0.6653, + "step": 5082, + "time": 14.25 + }, + { + "epoch": 2.55, + "learning_rate": "1.5713e-04", + "loss": 0.4995, + "slid_loss": 0.6641, + "step": 5083, + "time": 11.52 + }, + { + "epoch": 2.55, + "learning_rate": "1.5711e-04", + "loss": 0.6308, + "slid_loss": 0.6649, + "step": 5084, + "time": 14.2 + }, + { + "epoch": 2.55, + "learning_rate": "1.5709e-04", + "loss": 0.7547, + "slid_loss": 0.6664, + "step": 5085, + "time": 11.78 + }, + { + "epoch": 2.55, + "learning_rate": "1.5707e-04", + "loss": 0.5174, + "slid_loss": 0.6652, + "step": 5086, + "time": 13.71 + }, + { + "epoch": 2.55, + "learning_rate": "1.5706e-04", + "loss": 0.5646, + "slid_loss": 0.6629, + "step": 5087, + "time": 13.06 + }, + { + "epoch": 2.55, + "learning_rate": "1.5704e-04", + "loss": 0.6062, + "slid_loss": 0.6631, + "step": 5088, + "time": 11.09 + }, + { + "epoch": 2.55, + "learning_rate": "1.5702e-04", + "loss": 0.5666, + "slid_loss": 0.6618, + "step": 5089, + "time": 13.6 + }, + { + "epoch": 2.55, + "learning_rate": "1.5700e-04", + "loss": 0.6803, + "slid_loss": 0.6615, + "step": 5090, + "time": 10.92 + }, + { + "epoch": 2.55, + "learning_rate": "1.5699e-04", + "loss": 0.6604, + "slid_loss": 0.6606, + "step": 5091, + "time": 11.94 + }, + { + "epoch": 2.55, + "learning_rate": "1.5697e-04", + "loss": 0.6049, + "slid_loss": 0.6597, + "step": 5092, + "time": 13.52 + }, + { + "epoch": 2.55, + "learning_rate": "1.5695e-04", + "loss": 0.5246, + "slid_loss": 0.6576, + "step": 5093, + "time": 12.17 + }, + { + "epoch": 2.55, + "learning_rate": "1.5693e-04", + "loss": 0.5319, + "slid_loss": 0.657, + "step": 5094, + "time": 13.41 + }, + { + "epoch": 2.55, + "learning_rate": "1.5692e-04", + "loss": 0.6187, + "slid_loss": 0.6566, + "step": 5095, + "time": 12.88 + }, + { + "epoch": 2.55, + "learning_rate": "1.5690e-04", + "loss": 0.6603, + "slid_loss": 0.6565, + "step": 5096, + "time": 12.08 + }, + { + "epoch": 2.55, + "learning_rate": "1.5688e-04", + "loss": 0.6491, + "slid_loss": 0.6563, + "step": 5097, + "time": 13.19 + }, + { + "epoch": 2.55, + "learning_rate": "1.5687e-04", + "loss": 0.6217, + "slid_loss": 0.6569, + "step": 5098, + "time": 12.35 + }, + { + "epoch": 2.55, + "learning_rate": "1.5685e-04", + "loss": 0.6985, + "slid_loss": 0.6579, + "step": 5099, + "time": 12.17 + }, + { + "epoch": 2.55, + "learning_rate": "1.5683e-04", + "loss": 0.6688, + "slid_loss": 0.6579, + "step": 5100, + "time": 12.57 + }, + { + "epoch": 2.55, + "learning_rate": "1.5681e-04", + "loss": 0.7649, + "slid_loss": 0.6588, + "step": 5101, + "time": 12.03 + }, + { + "epoch": 2.55, + "learning_rate": "1.5680e-04", + "loss": 0.5644, + "slid_loss": 0.6588, + "step": 5102, + "time": 11.4 + }, + { + "epoch": 2.56, + "learning_rate": "1.5678e-04", + "loss": 0.689, + "slid_loss": 0.6596, + "step": 5103, + "time": 13.53 + }, + { + "epoch": 2.56, + "learning_rate": "1.5676e-04", + "loss": 0.7238, + "slid_loss": 0.6603, + "step": 5104, + "time": 12.81 + }, + { + "epoch": 2.56, + "learning_rate": "1.5674e-04", + "loss": 0.7243, + "slid_loss": 0.66, + "step": 5105, + "time": 12.94 + }, + { + "epoch": 2.56, + "learning_rate": "1.5673e-04", + "loss": 0.7501, + "slid_loss": 0.6608, + "step": 5106, + "time": 11.54 + }, + { + "epoch": 2.56, + "learning_rate": "1.5671e-04", + "loss": 0.6817, + "slid_loss": 0.6596, + "step": 5107, + "time": 13.64 + }, + { + "epoch": 2.56, + "learning_rate": "1.5669e-04", + "loss": 0.7608, + "slid_loss": 0.6597, + "step": 5108, + "time": 12.01 + }, + { + "epoch": 2.56, + "learning_rate": "1.5668e-04", + "loss": 0.657, + "slid_loss": 0.66, + "step": 5109, + "time": 13.73 + }, + { + "epoch": 2.56, + "learning_rate": "1.5666e-04", + "loss": 0.5548, + "slid_loss": 0.6589, + "step": 5110, + "time": 10.74 + }, + { + "epoch": 2.56, + "learning_rate": "1.5664e-04", + "loss": 0.7179, + "slid_loss": 0.6583, + "step": 5111, + "time": 11.89 + }, + { + "epoch": 2.56, + "learning_rate": "1.5662e-04", + "loss": 0.7106, + "slid_loss": 0.6599, + "step": 5112, + "time": 11.71 + }, + { + "epoch": 2.56, + "learning_rate": "1.5661e-04", + "loss": 0.564, + "slid_loss": 0.6601, + "step": 5113, + "time": 12.93 + }, + { + "epoch": 2.56, + "learning_rate": "1.5659e-04", + "loss": 0.5657, + "slid_loss": 0.6593, + "step": 5114, + "time": 12.02 + }, + { + "epoch": 2.56, + "learning_rate": "1.5657e-04", + "loss": 0.7212, + "slid_loss": 0.6599, + "step": 5115, + "time": 14.23 + }, + { + "epoch": 2.56, + "learning_rate": "1.5655e-04", + "loss": 0.5779, + "slid_loss": 0.6587, + "step": 5116, + "time": 10.8 + }, + { + "epoch": 2.56, + "learning_rate": "1.5654e-04", + "loss": 0.5658, + "slid_loss": 0.6567, + "step": 5117, + "time": 10.67 + }, + { + "epoch": 2.56, + "learning_rate": "1.5652e-04", + "loss": 0.7086, + "slid_loss": 0.6584, + "step": 5118, + "time": 11.78 + }, + { + "epoch": 2.56, + "learning_rate": "1.5650e-04", + "loss": 0.6291, + "slid_loss": 0.6582, + "step": 5119, + "time": 12.77 + }, + { + "epoch": 2.56, + "learning_rate": "1.5648e-04", + "loss": 0.715, + "slid_loss": 0.6595, + "step": 5120, + "time": 10.74 + }, + { + "epoch": 2.56, + "learning_rate": "1.5647e-04", + "loss": 0.5794, + "slid_loss": 0.6592, + "step": 5121, + "time": 12.1 + }, + { + "epoch": 2.56, + "learning_rate": "1.5645e-04", + "loss": 0.7282, + "slid_loss": 0.6602, + "step": 5122, + "time": 11.62 + }, + { + "epoch": 2.57, + "learning_rate": "1.5643e-04", + "loss": 0.7891, + "slid_loss": 0.6608, + "step": 5123, + "time": 12.02 + }, + { + "epoch": 2.57, + "learning_rate": "1.5642e-04", + "loss": 0.7367, + "slid_loss": 0.6608, + "step": 5124, + "time": 13.37 + }, + { + "epoch": 2.57, + "learning_rate": "1.5640e-04", + "loss": 0.8719, + "slid_loss": 0.6626, + "step": 5125, + "time": 10.88 + }, + { + "epoch": 2.57, + "learning_rate": "1.5638e-04", + "loss": 0.6335, + "slid_loss": 0.6619, + "step": 5126, + "time": 13.01 + }, + { + "epoch": 2.57, + "learning_rate": "1.5636e-04", + "loss": 0.7616, + "slid_loss": 0.6623, + "step": 5127, + "time": 11.9 + }, + { + "epoch": 2.57, + "learning_rate": "1.5635e-04", + "loss": 0.7667, + "slid_loss": 0.6634, + "step": 5128, + "time": 13.94 + }, + { + "epoch": 2.57, + "learning_rate": "1.5633e-04", + "loss": 0.7452, + "slid_loss": 0.6639, + "step": 5129, + "time": 13.63 + }, + { + "epoch": 2.57, + "learning_rate": "1.5631e-04", + "loss": 0.5983, + "slid_loss": 0.6631, + "step": 5130, + "time": 13.72 + }, + { + "epoch": 2.57, + "learning_rate": "1.5629e-04", + "loss": 0.6504, + "slid_loss": 0.6642, + "step": 5131, + "time": 11.37 + }, + { + "epoch": 2.57, + "learning_rate": "1.5628e-04", + "loss": 0.5818, + "slid_loss": 0.6639, + "step": 5132, + "time": 13.76 + }, + { + "epoch": 2.57, + "learning_rate": "1.5626e-04", + "loss": 0.6834, + "slid_loss": 0.6641, + "step": 5133, + "time": 14.01 + }, + { + "epoch": 2.57, + "learning_rate": "1.5624e-04", + "loss": 0.5038, + "slid_loss": 0.662, + "step": 5134, + "time": 11.51 + }, + { + "epoch": 2.57, + "learning_rate": "1.5622e-04", + "loss": 0.5334, + "slid_loss": 0.6602, + "step": 5135, + "time": 14.14 + }, + { + "epoch": 2.57, + "learning_rate": "1.5621e-04", + "loss": 0.6945, + "slid_loss": 0.6612, + "step": 5136, + "time": 10.8 + }, + { + "epoch": 2.57, + "learning_rate": "1.5619e-04", + "loss": 0.8281, + "slid_loss": 0.6618, + "step": 5137, + "time": 12.95 + }, + { + "epoch": 2.57, + "learning_rate": "1.5617e-04", + "loss": 0.6984, + "slid_loss": 0.6612, + "step": 5138, + "time": 13.94 + }, + { + "epoch": 2.57, + "learning_rate": "1.5615e-04", + "loss": 0.6257, + "slid_loss": 0.6608, + "step": 5139, + "time": 12.77 + }, + { + "epoch": 2.57, + "learning_rate": "1.5614e-04", + "loss": 0.8017, + "slid_loss": 0.6616, + "step": 5140, + "time": 13.32 + }, + { + "epoch": 2.57, + "learning_rate": "1.5612e-04", + "loss": 0.6849, + "slid_loss": 0.6609, + "step": 5141, + "time": 12.47 + }, + { + "epoch": 2.57, + "learning_rate": "1.5610e-04", + "loss": 0.6758, + "slid_loss": 0.661, + "step": 5142, + "time": 10.89 + }, + { + "epoch": 2.58, + "learning_rate": "1.5609e-04", + "loss": 0.6353, + "slid_loss": 0.6597, + "step": 5143, + "time": 12.95 + }, + { + "epoch": 2.58, + "learning_rate": "1.5607e-04", + "loss": 0.7494, + "slid_loss": 0.6596, + "step": 5144, + "time": 13.47 + }, + { + "epoch": 2.58, + "learning_rate": "1.5605e-04", + "loss": 0.6485, + "slid_loss": 0.6601, + "step": 5145, + "time": 11.29 + }, + { + "epoch": 2.58, + "learning_rate": "1.5603e-04", + "loss": 0.6497, + "slid_loss": 0.6598, + "step": 5146, + "time": 13.7 + }, + { + "epoch": 2.58, + "learning_rate": "1.5602e-04", + "loss": 0.8124, + "slid_loss": 0.6605, + "step": 5147, + "time": 12.9 + }, + { + "epoch": 2.58, + "learning_rate": "1.5600e-04", + "loss": 0.8209, + "slid_loss": 0.6615, + "step": 5148, + "time": 13.49 + }, + { + "epoch": 2.58, + "learning_rate": "1.5598e-04", + "loss": 0.7921, + "slid_loss": 0.6639, + "step": 5149, + "time": 13.88 + }, + { + "epoch": 2.58, + "learning_rate": "1.5596e-04", + "loss": 0.7214, + "slid_loss": 0.6639, + "step": 5150, + "time": 13.66 + }, + { + "epoch": 2.58, + "learning_rate": "1.5595e-04", + "loss": 0.5838, + "slid_loss": 0.6618, + "step": 5151, + "time": 10.67 + }, + { + "epoch": 2.58, + "learning_rate": "1.5593e-04", + "loss": 0.5796, + "slid_loss": 0.6607, + "step": 5152, + "time": 12.97 + }, + { + "epoch": 2.58, + "learning_rate": "1.5591e-04", + "loss": 0.5746, + "slid_loss": 0.6605, + "step": 5153, + "time": 13.95 + }, + { + "epoch": 2.58, + "learning_rate": "1.5589e-04", + "loss": 0.7163, + "slid_loss": 0.6629, + "step": 5154, + "time": 10.51 + }, + { + "epoch": 2.58, + "learning_rate": "1.5588e-04", + "loss": 0.5547, + "slid_loss": 0.6621, + "step": 5155, + "time": 11.49 + }, + { + "epoch": 2.58, + "learning_rate": "1.5586e-04", + "loss": 0.73, + "slid_loss": 0.6643, + "step": 5156, + "time": 11.17 + }, + { + "epoch": 2.58, + "learning_rate": "1.5584e-04", + "loss": 0.578, + "slid_loss": 0.6641, + "step": 5157, + "time": 11.34 + }, + { + "epoch": 2.58, + "learning_rate": "1.5583e-04", + "loss": 0.8104, + "slid_loss": 0.6652, + "step": 5158, + "time": 13.52 + }, + { + "epoch": 2.58, + "learning_rate": "1.5581e-04", + "loss": 0.6661, + "slid_loss": 0.6656, + "step": 5159, + "time": 12.79 + }, + { + "epoch": 2.58, + "learning_rate": "1.5579e-04", + "loss": 0.5405, + "slid_loss": 0.6642, + "step": 5160, + "time": 11.56 + }, + { + "epoch": 2.58, + "learning_rate": "1.5577e-04", + "loss": 0.565, + "slid_loss": 0.663, + "step": 5161, + "time": 11.33 + }, + { + "epoch": 2.58, + "learning_rate": "1.5576e-04", + "loss": 0.5996, + "slid_loss": 0.6629, + "step": 5162, + "time": 13.2 + }, + { + "epoch": 2.59, + "learning_rate": "1.5574e-04", + "loss": 0.7033, + "slid_loss": 0.6634, + "step": 5163, + "time": 13.2 + }, + { + "epoch": 2.59, + "learning_rate": "1.5572e-04", + "loss": 0.7902, + "slid_loss": 0.6648, + "step": 5164, + "time": 13.66 + }, + { + "epoch": 2.59, + "learning_rate": "1.5570e-04", + "loss": 0.7, + "slid_loss": 0.665, + "step": 5165, + "time": 10.92 + }, + { + "epoch": 2.59, + "learning_rate": "1.5569e-04", + "loss": 0.6403, + "slid_loss": 0.6653, + "step": 5166, + "time": 12.66 + }, + { + "epoch": 2.59, + "learning_rate": "1.5567e-04", + "loss": 0.6692, + "slid_loss": 0.6646, + "step": 5167, + "time": 13.04 + }, + { + "epoch": 2.59, + "learning_rate": "1.5565e-04", + "loss": 0.7592, + "slid_loss": 0.6654, + "step": 5168, + "time": 12.67 + }, + { + "epoch": 2.59, + "learning_rate": "1.5563e-04", + "loss": 0.649, + "slid_loss": 0.6647, + "step": 5169, + "time": 11.63 + }, + { + "epoch": 2.59, + "learning_rate": "1.5562e-04", + "loss": 0.6808, + "slid_loss": 0.6644, + "step": 5170, + "time": 13.15 + }, + { + "epoch": 2.59, + "learning_rate": "1.5560e-04", + "loss": 0.581, + "slid_loss": 0.6652, + "step": 5171, + "time": 12.3 + }, + { + "epoch": 2.59, + "learning_rate": "1.5558e-04", + "loss": 0.681, + "slid_loss": 0.6646, + "step": 5172, + "time": 11.37 + }, + { + "epoch": 2.59, + "learning_rate": "1.5556e-04", + "loss": 0.6941, + "slid_loss": 0.6659, + "step": 5173, + "time": 13.84 + }, + { + "epoch": 2.59, + "learning_rate": "1.5555e-04", + "loss": 0.6415, + "slid_loss": 0.664, + "step": 5174, + "time": 10.72 + }, + { + "epoch": 2.59, + "learning_rate": "1.5553e-04", + "loss": 0.6598, + "slid_loss": 0.6644, + "step": 5175, + "time": 12.71 + }, + { + "epoch": 2.59, + "learning_rate": "1.5551e-04", + "loss": 0.5216, + "slid_loss": 0.6613, + "step": 5176, + "time": 13.73 + }, + { + "epoch": 2.59, + "learning_rate": "1.5550e-04", + "loss": 0.5923, + "slid_loss": 0.6598, + "step": 5177, + "time": 13.4 + }, + { + "epoch": 2.59, + "learning_rate": "1.5548e-04", + "loss": 0.7587, + "slid_loss": 0.6597, + "step": 5178, + "time": 13.13 + }, + { + "epoch": 2.59, + "learning_rate": "1.5546e-04", + "loss": 0.6066, + "slid_loss": 0.659, + "step": 5179, + "time": 12.78 + }, + { + "epoch": 2.59, + "learning_rate": "1.5544e-04", + "loss": 0.6736, + "slid_loss": 0.6604, + "step": 5180, + "time": 11.24 + }, + { + "epoch": 2.59, + "learning_rate": "1.5543e-04", + "loss": 0.7889, + "slid_loss": 0.6637, + "step": 5181, + "time": 11.93 + }, + { + "epoch": 2.59, + "learning_rate": "1.5541e-04", + "loss": 0.7237, + "slid_loss": 0.6642, + "step": 5182, + "time": 13.31 + }, + { + "epoch": 2.6, + "learning_rate": "1.5539e-04", + "loss": 0.783, + "slid_loss": 0.667, + "step": 5183, + "time": 13.23 + }, + { + "epoch": 2.6, + "learning_rate": "1.5537e-04", + "loss": 0.7462, + "slid_loss": 0.6682, + "step": 5184, + "time": 11.62 + }, + { + "epoch": 2.6, + "learning_rate": "1.5536e-04", + "loss": 0.682, + "slid_loss": 0.6675, + "step": 5185, + "time": 11.27 + }, + { + "epoch": 2.6, + "learning_rate": "1.5534e-04", + "loss": 0.7597, + "slid_loss": 0.6699, + "step": 5186, + "time": 13.78 + }, + { + "epoch": 2.6, + "learning_rate": "1.5532e-04", + "loss": 0.5996, + "slid_loss": 0.6702, + "step": 5187, + "time": 13.17 + }, + { + "epoch": 2.6, + "learning_rate": "1.5530e-04", + "loss": 0.5177, + "slid_loss": 0.6694, + "step": 5188, + "time": 12.57 + }, + { + "epoch": 2.6, + "learning_rate": "1.5529e-04", + "loss": 0.539, + "slid_loss": 0.6691, + "step": 5189, + "time": 12.82 + }, + { + "epoch": 2.6, + "learning_rate": "1.5527e-04", + "loss": 0.5764, + "slid_loss": 0.668, + "step": 5190, + "time": 12.86 + }, + { + "epoch": 2.6, + "learning_rate": "1.5525e-04", + "loss": 0.6198, + "slid_loss": 0.6676, + "step": 5191, + "time": 13.62 + }, + { + "epoch": 2.6, + "learning_rate": "1.5523e-04", + "loss": 0.6594, + "slid_loss": 0.6682, + "step": 5192, + "time": 13.41 + }, + { + "epoch": 2.6, + "learning_rate": "1.5522e-04", + "loss": 0.6857, + "slid_loss": 0.6698, + "step": 5193, + "time": 10.79 + }, + { + "epoch": 2.6, + "learning_rate": "1.5520e-04", + "loss": 0.753, + "slid_loss": 0.672, + "step": 5194, + "time": 13.58 + }, + { + "epoch": 2.6, + "learning_rate": "1.5518e-04", + "loss": 0.7378, + "slid_loss": 0.6732, + "step": 5195, + "time": 11.8 + }, + { + "epoch": 2.6, + "learning_rate": "1.5516e-04", + "loss": 0.7926, + "slid_loss": 0.6745, + "step": 5196, + "time": 12.95 + }, + { + "epoch": 2.6, + "learning_rate": "1.5515e-04", + "loss": 0.4925, + "slid_loss": 0.673, + "step": 5197, + "time": 13.6 + }, + { + "epoch": 2.6, + "learning_rate": "1.5513e-04", + "loss": 0.5838, + "slid_loss": 0.6726, + "step": 5198, + "time": 11.38 + }, + { + "epoch": 2.6, + "learning_rate": "1.5511e-04", + "loss": 0.7837, + "slid_loss": 0.6734, + "step": 5199, + "time": 11.79 + }, + { + "epoch": 2.6, + "learning_rate": "1.5510e-04", + "loss": 0.7624, + "slid_loss": 0.6744, + "step": 5200, + "time": 11.82 + }, + { + "epoch": 2.6, + "learning_rate": "1.5508e-04", + "loss": 0.6532, + "slid_loss": 0.6732, + "step": 5201, + "time": 13.3 + }, + { + "epoch": 2.6, + "learning_rate": "1.5506e-04", + "loss": 0.8426, + "slid_loss": 0.676, + "step": 5202, + "time": 13.67 + }, + { + "epoch": 2.61, + "learning_rate": "1.5504e-04", + "loss": 0.5571, + "slid_loss": 0.6747, + "step": 5203, + "time": 11.4 + }, + { + "epoch": 2.61, + "learning_rate": "1.5503e-04", + "loss": 0.5303, + "slid_loss": 0.6728, + "step": 5204, + "time": 11.35 + }, + { + "epoch": 2.61, + "learning_rate": "1.5501e-04", + "loss": 0.6412, + "slid_loss": 0.6719, + "step": 5205, + "time": 11.04 + }, + { + "epoch": 2.61, + "learning_rate": "1.5499e-04", + "loss": 0.6616, + "slid_loss": 0.6711, + "step": 5206, + "time": 11.76 + }, + { + "epoch": 2.61, + "learning_rate": "1.5497e-04", + "loss": 0.5809, + "slid_loss": 0.67, + "step": 5207, + "time": 12.06 + }, + { + "epoch": 2.61, + "learning_rate": "1.5496e-04", + "loss": 0.693, + "slid_loss": 0.6694, + "step": 5208, + "time": 12.25 + }, + { + "epoch": 2.61, + "learning_rate": "1.5494e-04", + "loss": 0.6536, + "slid_loss": 0.6693, + "step": 5209, + "time": 11.61 + }, + { + "epoch": 2.61, + "learning_rate": "1.5492e-04", + "loss": 0.6595, + "slid_loss": 0.6704, + "step": 5210, + "time": 11.73 + }, + { + "epoch": 2.61, + "learning_rate": "1.5490e-04", + "loss": 0.6647, + "slid_loss": 0.6699, + "step": 5211, + "time": 13.78 + }, + { + "epoch": 2.61, + "learning_rate": "1.5489e-04", + "loss": 0.5679, + "slid_loss": 0.6684, + "step": 5212, + "time": 13.76 + }, + { + "epoch": 2.61, + "learning_rate": "1.5487e-04", + "loss": 0.755, + "slid_loss": 0.6703, + "step": 5213, + "time": 10.89 + }, + { + "epoch": 2.61, + "learning_rate": "1.5485e-04", + "loss": 0.745, + "slid_loss": 0.6721, + "step": 5214, + "time": 12.63 + }, + { + "epoch": 2.61, + "learning_rate": "1.5483e-04", + "loss": 0.5515, + "slid_loss": 0.6704, + "step": 5215, + "time": 13.33 + }, + { + "epoch": 2.61, + "learning_rate": "1.5482e-04", + "loss": 0.6015, + "slid_loss": 0.6707, + "step": 5216, + "time": 11.12 + }, + { + "epoch": 2.61, + "learning_rate": "1.5480e-04", + "loss": 0.7705, + "slid_loss": 0.6727, + "step": 5217, + "time": 13.76 + }, + { + "epoch": 2.61, + "learning_rate": "1.5478e-04", + "loss": 0.784, + "slid_loss": 0.6735, + "step": 5218, + "time": 13.26 + }, + { + "epoch": 2.61, + "learning_rate": "1.5476e-04", + "loss": 0.6033, + "slid_loss": 0.6732, + "step": 5219, + "time": 13.41 + }, + { + "epoch": 2.61, + "learning_rate": "1.5475e-04", + "loss": 0.7081, + "slid_loss": 0.6731, + "step": 5220, + "time": 13.6 + }, + { + "epoch": 2.61, + "learning_rate": "1.5473e-04", + "loss": 0.6499, + "slid_loss": 0.6738, + "step": 5221, + "time": 13.49 + }, + { + "epoch": 2.61, + "learning_rate": "1.5471e-04", + "loss": 0.6217, + "slid_loss": 0.6728, + "step": 5222, + "time": 14.32 + }, + { + "epoch": 2.62, + "learning_rate": "1.5470e-04", + "loss": 0.5798, + "slid_loss": 0.6707, + "step": 5223, + "time": 10.55 + }, + { + "epoch": 2.62, + "learning_rate": "1.5468e-04", + "loss": 0.5971, + "slid_loss": 0.6693, + "step": 5224, + "time": 13.63 + }, + { + "epoch": 2.62, + "learning_rate": "1.5466e-04", + "loss": 0.8468, + "slid_loss": 0.669, + "step": 5225, + "time": 13.39 + }, + { + "epoch": 2.62, + "learning_rate": "1.5464e-04", + "loss": 0.6711, + "slid_loss": 0.6694, + "step": 5226, + "time": 13.78 + }, + { + "epoch": 2.62, + "learning_rate": "1.5463e-04", + "loss": 0.6389, + "slid_loss": 0.6682, + "step": 5227, + "time": 10.59 + }, + { + "epoch": 2.62, + "learning_rate": "1.5461e-04", + "loss": 0.7032, + "slid_loss": 0.6676, + "step": 5228, + "time": 12.12 + }, + { + "epoch": 2.62, + "learning_rate": "1.5459e-04", + "loss": 0.5984, + "slid_loss": 0.6661, + "step": 5229, + "time": 12.02 + }, + { + "epoch": 2.62, + "learning_rate": "1.5457e-04", + "loss": 0.5371, + "slid_loss": 0.6655, + "step": 5230, + "time": 10.89 + }, + { + "epoch": 2.62, + "learning_rate": "1.5456e-04", + "loss": 0.6487, + "slid_loss": 0.6655, + "step": 5231, + "time": 11.84 + }, + { + "epoch": 2.62, + "learning_rate": "1.5454e-04", + "loss": 0.6574, + "slid_loss": 0.6662, + "step": 5232, + "time": 11.56 + }, + { + "epoch": 2.62, + "learning_rate": "1.5452e-04", + "loss": 0.7635, + "slid_loss": 0.667, + "step": 5233, + "time": 11.54 + }, + { + "epoch": 2.62, + "learning_rate": "1.5450e-04", + "loss": 0.5146, + "slid_loss": 0.6671, + "step": 5234, + "time": 11.29 + }, + { + "epoch": 2.62, + "learning_rate": "1.5449e-04", + "loss": 0.892, + "slid_loss": 0.6707, + "step": 5235, + "time": 11.92 + }, + { + "epoch": 2.62, + "learning_rate": "1.5447e-04", + "loss": 0.707, + "slid_loss": 0.6708, + "step": 5236, + "time": 13.82 + }, + { + "epoch": 2.62, + "learning_rate": "1.5445e-04", + "loss": 0.7054, + "slid_loss": 0.6696, + "step": 5237, + "time": 11.72 + }, + { + "epoch": 2.62, + "learning_rate": "1.5443e-04", + "loss": 0.5723, + "slid_loss": 0.6683, + "step": 5238, + "time": 11.11 + }, + { + "epoch": 2.62, + "learning_rate": "1.5442e-04", + "loss": 0.5374, + "slid_loss": 0.6675, + "step": 5239, + "time": 13.04 + }, + { + "epoch": 2.62, + "learning_rate": "1.5440e-04", + "loss": 0.5898, + "slid_loss": 0.6653, + "step": 5240, + "time": 13.99 + }, + { + "epoch": 2.62, + "learning_rate": "1.5438e-04", + "loss": 0.6062, + "slid_loss": 0.6646, + "step": 5241, + "time": 11.52 + }, + { + "epoch": 2.62, + "learning_rate": "1.5436e-04", + "loss": 0.7838, + "slid_loss": 0.6656, + "step": 5242, + "time": 11.77 + }, + { + "epoch": 2.63, + "learning_rate": "1.5435e-04", + "loss": 0.6034, + "slid_loss": 0.6653, + "step": 5243, + "time": 12.91 + }, + { + "epoch": 2.63, + "learning_rate": "1.5433e-04", + "loss": 0.6066, + "slid_loss": 0.6639, + "step": 5244, + "time": 12.23 + }, + { + "epoch": 2.63, + "learning_rate": "1.5431e-04", + "loss": 0.8067, + "slid_loss": 0.6655, + "step": 5245, + "time": 12.73 + }, + { + "epoch": 2.63, + "learning_rate": "1.5429e-04", + "loss": 0.8537, + "slid_loss": 0.6675, + "step": 5246, + "time": 11.42 + }, + { + "epoch": 2.63, + "learning_rate": "1.5428e-04", + "loss": 0.7421, + "slid_loss": 0.6668, + "step": 5247, + "time": 13.36 + }, + { + "epoch": 2.63, + "learning_rate": "1.5426e-04", + "loss": 0.6125, + "slid_loss": 0.6647, + "step": 5248, + "time": 11.62 + }, + { + "epoch": 2.63, + "learning_rate": "1.5424e-04", + "loss": 0.5045, + "slid_loss": 0.6618, + "step": 5249, + "time": 13.69 + }, + { + "epoch": 2.63, + "learning_rate": "1.5423e-04", + "loss": 0.7658, + "slid_loss": 0.6623, + "step": 5250, + "time": 12.89 + }, + { + "epoch": 2.63, + "learning_rate": "1.5421e-04", + "loss": 0.5921, + "slid_loss": 0.6624, + "step": 5251, + "time": 12.18 + }, + { + "epoch": 2.63, + "learning_rate": "1.5419e-04", + "loss": 0.6689, + "slid_loss": 0.6633, + "step": 5252, + "time": 13.49 + }, + { + "epoch": 2.63, + "learning_rate": "1.5417e-04", + "loss": 0.5902, + "slid_loss": 0.6634, + "step": 5253, + "time": 13.26 + }, + { + "epoch": 2.63, + "learning_rate": "1.5416e-04", + "loss": 0.6624, + "slid_loss": 0.6629, + "step": 5254, + "time": 13.57 + }, + { + "epoch": 2.63, + "learning_rate": "1.5414e-04", + "loss": 0.5782, + "slid_loss": 0.6631, + "step": 5255, + "time": 11.18 + }, + { + "epoch": 2.63, + "learning_rate": "1.5412e-04", + "loss": 0.7544, + "slid_loss": 0.6634, + "step": 5256, + "time": 13.74 + }, + { + "epoch": 2.63, + "learning_rate": "1.5410e-04", + "loss": 0.7552, + "slid_loss": 0.6651, + "step": 5257, + "time": 11.62 + }, + { + "epoch": 2.63, + "learning_rate": "1.5409e-04", + "loss": 0.5661, + "slid_loss": 0.6627, + "step": 5258, + "time": 13.27 + }, + { + "epoch": 2.63, + "learning_rate": "1.5407e-04", + "loss": 0.6352, + "slid_loss": 0.6624, + "step": 5259, + "time": 10.75 + }, + { + "epoch": 2.63, + "learning_rate": "1.5405e-04", + "loss": 0.6818, + "slid_loss": 0.6638, + "step": 5260, + "time": 13.73 + }, + { + "epoch": 2.63, + "learning_rate": "1.5403e-04", + "loss": 0.6405, + "slid_loss": 0.6646, + "step": 5261, + "time": 13.62 + }, + { + "epoch": 2.63, + "learning_rate": "1.5402e-04", + "loss": 0.6133, + "slid_loss": 0.6647, + "step": 5262, + "time": 11.77 + }, + { + "epoch": 2.64, + "learning_rate": "1.5400e-04", + "loss": 0.7238, + "slid_loss": 0.6649, + "step": 5263, + "time": 11.02 + }, + { + "epoch": 2.64, + "learning_rate": "1.5398e-04", + "loss": 0.7333, + "slid_loss": 0.6643, + "step": 5264, + "time": 12.75 + }, + { + "epoch": 2.64, + "learning_rate": "1.5396e-04", + "loss": 0.5316, + "slid_loss": 0.6626, + "step": 5265, + "time": 13.62 + }, + { + "epoch": 2.64, + "learning_rate": "1.5395e-04", + "loss": 0.5942, + "slid_loss": 0.6622, + "step": 5266, + "time": 14.5 + }, + { + "epoch": 2.64, + "learning_rate": "1.5393e-04", + "loss": 0.57, + "slid_loss": 0.6612, + "step": 5267, + "time": 13.35 + }, + { + "epoch": 2.64, + "learning_rate": "1.5391e-04", + "loss": 0.5984, + "slid_loss": 0.6596, + "step": 5268, + "time": 13.09 + }, + { + "epoch": 2.64, + "learning_rate": "1.5389e-04", + "loss": 0.5878, + "slid_loss": 0.659, + "step": 5269, + "time": 11.31 + }, + { + "epoch": 2.64, + "learning_rate": "1.5388e-04", + "loss": 0.7361, + "slid_loss": 0.6595, + "step": 5270, + "time": 11.67 + }, + { + "epoch": 2.64, + "learning_rate": "1.5386e-04", + "loss": 0.5666, + "slid_loss": 0.6594, + "step": 5271, + "time": 13.43 + }, + { + "epoch": 2.64, + "learning_rate": "1.5384e-04", + "loss": 0.8878, + "slid_loss": 0.6614, + "step": 5272, + "time": 13.17 + }, + { + "epoch": 2.64, + "learning_rate": "1.5382e-04", + "loss": 0.611, + "slid_loss": 0.6606, + "step": 5273, + "time": 13.4 + }, + { + "epoch": 2.64, + "learning_rate": "1.5381e-04", + "loss": 0.7157, + "slid_loss": 0.6614, + "step": 5274, + "time": 13.04 + }, + { + "epoch": 2.64, + "learning_rate": "1.5379e-04", + "loss": 0.6897, + "slid_loss": 0.6617, + "step": 5275, + "time": 13.51 + }, + { + "epoch": 2.64, + "learning_rate": "1.5377e-04", + "loss": 0.5684, + "slid_loss": 0.6621, + "step": 5276, + "time": 13.41 + }, + { + "epoch": 2.64, + "learning_rate": "1.5375e-04", + "loss": 0.7325, + "slid_loss": 0.6635, + "step": 5277, + "time": 11.69 + }, + { + "epoch": 2.64, + "learning_rate": "1.5374e-04", + "loss": 0.5903, + "slid_loss": 0.6618, + "step": 5278, + "time": 10.9 + }, + { + "epoch": 2.64, + "learning_rate": "1.5372e-04", + "loss": 0.6663, + "slid_loss": 0.6624, + "step": 5279, + "time": 11.86 + }, + { + "epoch": 2.64, + "learning_rate": "1.5370e-04", + "loss": 0.7465, + "slid_loss": 0.6632, + "step": 5280, + "time": 11.36 + }, + { + "epoch": 2.64, + "learning_rate": "1.5369e-04", + "loss": 0.7341, + "slid_loss": 0.6626, + "step": 5281, + "time": 13.37 + }, + { + "epoch": 2.64, + "learning_rate": "1.5367e-04", + "loss": 0.6314, + "slid_loss": 0.6617, + "step": 5282, + "time": 10.64 + }, + { + "epoch": 2.65, + "learning_rate": "1.5365e-04", + "loss": 0.7826, + "slid_loss": 0.6617, + "step": 5283, + "time": 10.86 + }, + { + "epoch": 2.65, + "learning_rate": "1.5363e-04", + "loss": 0.7344, + "slid_loss": 0.6616, + "step": 5284, + "time": 11.41 + }, + { + "epoch": 2.65, + "learning_rate": "1.5362e-04", + "loss": 0.7893, + "slid_loss": 0.6626, + "step": 5285, + "time": 13.3 + }, + { + "epoch": 2.65, + "learning_rate": "1.5360e-04", + "loss": 0.5938, + "slid_loss": 0.661, + "step": 5286, + "time": 12.29 + }, + { + "epoch": 2.65, + "learning_rate": "1.5358e-04", + "loss": 0.5863, + "slid_loss": 0.6609, + "step": 5287, + "time": 11.11 + }, + { + "epoch": 2.65, + "learning_rate": "1.5356e-04", + "loss": 0.6253, + "slid_loss": 0.6619, + "step": 5288, + "time": 11.42 + }, + { + "epoch": 2.65, + "learning_rate": "1.5355e-04", + "loss": 0.6329, + "slid_loss": 0.6629, + "step": 5289, + "time": 13.56 + }, + { + "epoch": 2.65, + "learning_rate": "1.5353e-04", + "loss": 0.8268, + "slid_loss": 0.6654, + "step": 5290, + "time": 13.64 + }, + { + "epoch": 2.65, + "learning_rate": "1.5351e-04", + "loss": 0.6227, + "slid_loss": 0.6654, + "step": 5291, + "time": 11.19 + }, + { + "epoch": 2.65, + "learning_rate": "1.5349e-04", + "loss": 0.6377, + "slid_loss": 0.6652, + "step": 5292, + "time": 13.76 + }, + { + "epoch": 2.65, + "learning_rate": "1.5348e-04", + "loss": 0.4721, + "slid_loss": 0.6631, + "step": 5293, + "time": 13.72 + }, + { + "epoch": 2.65, + "learning_rate": "1.5346e-04", + "loss": 0.7097, + "slid_loss": 0.6626, + "step": 5294, + "time": 13.31 + }, + { + "epoch": 2.65, + "learning_rate": "1.5344e-04", + "loss": 0.7118, + "slid_loss": 0.6624, + "step": 5295, + "time": 10.56 + }, + { + "epoch": 2.65, + "learning_rate": "1.5342e-04", + "loss": 0.7025, + "slid_loss": 0.6615, + "step": 5296, + "time": 12.79 + }, + { + "epoch": 2.65, + "learning_rate": "1.5341e-04", + "loss": 0.7663, + "slid_loss": 0.6642, + "step": 5297, + "time": 14.27 + }, + { + "epoch": 2.65, + "learning_rate": "1.5339e-04", + "loss": 0.5737, + "slid_loss": 0.6641, + "step": 5298, + "time": 13.08 + }, + { + "epoch": 2.65, + "learning_rate": "1.5337e-04", + "loss": 0.6747, + "slid_loss": 0.663, + "step": 5299, + "time": 14.5 + }, + { + "epoch": 2.65, + "learning_rate": "1.5335e-04", + "loss": 0.7179, + "slid_loss": 0.6626, + "step": 5300, + "time": 13.11 + }, + { + "epoch": 2.65, + "learning_rate": "1.5334e-04", + "loss": 0.8383, + "slid_loss": 0.6644, + "step": 5301, + "time": 13.21 + }, + { + "epoch": 2.65, + "learning_rate": "1.5332e-04", + "loss": 0.6018, + "slid_loss": 0.662, + "step": 5302, + "time": 13.45 + }, + { + "epoch": 2.66, + "learning_rate": "1.5330e-04", + "loss": 0.6239, + "slid_loss": 0.6627, + "step": 5303, + "time": 11.77 + }, + { + "epoch": 2.66, + "learning_rate": "1.5328e-04", + "loss": 0.735, + "slid_loss": 0.6647, + "step": 5304, + "time": 13.55 + }, + { + "epoch": 2.66, + "learning_rate": "1.5327e-04", + "loss": 0.6462, + "slid_loss": 0.6648, + "step": 5305, + "time": 12.9 + }, + { + "epoch": 2.66, + "learning_rate": "1.5325e-04", + "loss": 0.7484, + "slid_loss": 0.6656, + "step": 5306, + "time": 12.3 + }, + { + "epoch": 2.66, + "learning_rate": "1.5323e-04", + "loss": 0.6366, + "slid_loss": 0.6662, + "step": 5307, + "time": 13.29 + }, + { + "epoch": 2.66, + "learning_rate": "1.5321e-04", + "loss": 0.6952, + "slid_loss": 0.6662, + "step": 5308, + "time": 12.23 + }, + { + "epoch": 2.66, + "learning_rate": "1.5320e-04", + "loss": 0.5203, + "slid_loss": 0.6649, + "step": 5309, + "time": 13.24 + }, + { + "epoch": 2.66, + "learning_rate": "1.5318e-04", + "loss": 0.618, + "slid_loss": 0.6645, + "step": 5310, + "time": 12.0 + }, + { + "epoch": 2.66, + "learning_rate": "1.5316e-04", + "loss": 0.6807, + "slid_loss": 0.6646, + "step": 5311, + "time": 13.76 + }, + { + "epoch": 2.66, + "learning_rate": "1.5314e-04", + "loss": 0.56, + "slid_loss": 0.6645, + "step": 5312, + "time": 14.31 + }, + { + "epoch": 2.66, + "learning_rate": "1.5313e-04", + "loss": 0.6351, + "slid_loss": 0.6633, + "step": 5313, + "time": 13.21 + }, + { + "epoch": 2.66, + "learning_rate": "1.5311e-04", + "loss": 0.7544, + "slid_loss": 0.6634, + "step": 5314, + "time": 12.32 + }, + { + "epoch": 2.66, + "learning_rate": "1.5309e-04", + "loss": 0.7548, + "slid_loss": 0.6655, + "step": 5315, + "time": 13.44 + }, + { + "epoch": 2.66, + "learning_rate": "1.5307e-04", + "loss": 0.7192, + "slid_loss": 0.6667, + "step": 5316, + "time": 11.72 + }, + { + "epoch": 2.66, + "learning_rate": "1.5306e-04", + "loss": 0.6172, + "slid_loss": 0.6651, + "step": 5317, + "time": 12.64 + }, + { + "epoch": 2.66, + "learning_rate": "1.5304e-04", + "loss": 0.6958, + "slid_loss": 0.6642, + "step": 5318, + "time": 11.3 + }, + { + "epoch": 2.66, + "learning_rate": "1.5302e-04", + "loss": 0.6028, + "slid_loss": 0.6642, + "step": 5319, + "time": 11.07 + }, + { + "epoch": 2.66, + "learning_rate": "1.5300e-04", + "loss": 0.6545, + "slid_loss": 0.6637, + "step": 5320, + "time": 12.74 + }, + { + "epoch": 2.66, + "learning_rate": "1.5299e-04", + "loss": 0.5343, + "slid_loss": 0.6625, + "step": 5321, + "time": 12.38 + }, + { + "epoch": 2.66, + "learning_rate": "1.5297e-04", + "loss": 0.7731, + "slid_loss": 0.6641, + "step": 5322, + "time": 12.88 + }, + { + "epoch": 2.67, + "learning_rate": "1.5295e-04", + "loss": 0.5658, + "slid_loss": 0.6639, + "step": 5323, + "time": 13.44 + }, + { + "epoch": 2.67, + "learning_rate": "1.5294e-04", + "loss": 0.5382, + "slid_loss": 0.6633, + "step": 5324, + "time": 13.62 + }, + { + "epoch": 2.67, + "learning_rate": "1.5292e-04", + "loss": 0.5822, + "slid_loss": 0.6607, + "step": 5325, + "time": 13.57 + }, + { + "epoch": 2.67, + "learning_rate": "1.5290e-04", + "loss": 0.8212, + "slid_loss": 0.6622, + "step": 5326, + "time": 13.42 + }, + { + "epoch": 2.67, + "learning_rate": "1.5288e-04", + "loss": 0.5869, + "slid_loss": 0.6617, + "step": 5327, + "time": 13.31 + }, + { + "epoch": 2.67, + "learning_rate": "1.5287e-04", + "loss": 0.5354, + "slid_loss": 0.66, + "step": 5328, + "time": 13.28 + }, + { + "epoch": 2.67, + "learning_rate": "1.5285e-04", + "loss": 0.8043, + "slid_loss": 0.662, + "step": 5329, + "time": 14.11 + }, + { + "epoch": 2.67, + "learning_rate": "1.5283e-04", + "loss": 0.6626, + "slid_loss": 0.6633, + "step": 5330, + "time": 11.84 + }, + { + "epoch": 2.67, + "learning_rate": "1.5281e-04", + "loss": 0.7307, + "slid_loss": 0.6641, + "step": 5331, + "time": 11.68 + }, + { + "epoch": 2.67, + "learning_rate": "1.5280e-04", + "loss": 0.595, + "slid_loss": 0.6635, + "step": 5332, + "time": 11.46 + }, + { + "epoch": 2.67, + "learning_rate": "1.5278e-04", + "loss": 0.5765, + "slid_loss": 0.6616, + "step": 5333, + "time": 11.29 + }, + { + "epoch": 2.67, + "learning_rate": "1.5276e-04", + "loss": 0.609, + "slid_loss": 0.6626, + "step": 5334, + "time": 12.88 + }, + { + "epoch": 2.67, + "learning_rate": "1.5274e-04", + "loss": 0.6652, + "slid_loss": 0.6603, + "step": 5335, + "time": 13.4 + }, + { + "epoch": 2.67, + "learning_rate": "1.5273e-04", + "loss": 0.4229, + "slid_loss": 0.6575, + "step": 5336, + "time": 11.07 + }, + { + "epoch": 2.67, + "learning_rate": "1.5271e-04", + "loss": 0.6816, + "slid_loss": 0.6572, + "step": 5337, + "time": 12.94 + }, + { + "epoch": 2.67, + "learning_rate": "1.5269e-04", + "loss": 0.6727, + "slid_loss": 0.6582, + "step": 5338, + "time": 12.31 + }, + { + "epoch": 2.67, + "learning_rate": "1.5267e-04", + "loss": 0.6401, + "slid_loss": 0.6592, + "step": 5339, + "time": 10.77 + }, + { + "epoch": 2.67, + "learning_rate": "1.5266e-04", + "loss": 0.6496, + "slid_loss": 0.6598, + "step": 5340, + "time": 11.37 + }, + { + "epoch": 2.67, + "learning_rate": "1.5264e-04", + "loss": 0.5444, + "slid_loss": 0.6592, + "step": 5341, + "time": 14.04 + }, + { + "epoch": 2.68, + "learning_rate": "1.5262e-04", + "loss": 0.6434, + "slid_loss": 0.6578, + "step": 5342, + "time": 13.62 + }, + { + "epoch": 2.68, + "learning_rate": "1.5260e-04", + "loss": 0.6709, + "slid_loss": 0.6585, + "step": 5343, + "time": 12.14 + }, + { + "epoch": 2.68, + "learning_rate": "1.5259e-04", + "loss": 0.6628, + "slid_loss": 0.6591, + "step": 5344, + "time": 13.39 + }, + { + "epoch": 2.68, + "learning_rate": "1.5257e-04", + "loss": 0.5991, + "slid_loss": 0.657, + "step": 5345, + "time": 11.34 + }, + { + "epoch": 2.68, + "learning_rate": "1.5255e-04", + "loss": 0.6692, + "slid_loss": 0.6551, + "step": 5346, + "time": 12.85 + }, + { + "epoch": 2.68, + "learning_rate": "1.5253e-04", + "loss": 0.5495, + "slid_loss": 0.6532, + "step": 5347, + "time": 14.0 + }, + { + "epoch": 2.68, + "learning_rate": "1.5252e-04", + "loss": 0.7862, + "slid_loss": 0.655, + "step": 5348, + "time": 13.34 + }, + { + "epoch": 2.68, + "learning_rate": "1.5250e-04", + "loss": 0.7871, + "slid_loss": 0.6578, + "step": 5349, + "time": 12.9 + }, + { + "epoch": 2.68, + "learning_rate": "1.5248e-04", + "loss": 0.4629, + "slid_loss": 0.6547, + "step": 5350, + "time": 13.04 + }, + { + "epoch": 2.68, + "learning_rate": "1.5246e-04", + "loss": 0.6224, + "slid_loss": 0.6551, + "step": 5351, + "time": 12.12 + }, + { + "epoch": 2.68, + "learning_rate": "1.5245e-04", + "loss": 0.6894, + "slid_loss": 0.6553, + "step": 5352, + "time": 10.75 + }, + { + "epoch": 2.68, + "learning_rate": "1.5243e-04", + "loss": 0.6451, + "slid_loss": 0.6558, + "step": 5353, + "time": 13.61 + }, + { + "epoch": 2.68, + "learning_rate": "1.5241e-04", + "loss": 0.7483, + "slid_loss": 0.6567, + "step": 5354, + "time": 11.12 + }, + { + "epoch": 2.68, + "learning_rate": "1.5239e-04", + "loss": 0.5352, + "slid_loss": 0.6562, + "step": 5355, + "time": 11.75 + }, + { + "epoch": 2.68, + "learning_rate": "1.5238e-04", + "loss": 0.4799, + "slid_loss": 0.6535, + "step": 5356, + "time": 12.81 + }, + { + "epoch": 2.68, + "learning_rate": "1.5236e-04", + "loss": 0.6838, + "slid_loss": 0.6528, + "step": 5357, + "time": 13.67 + }, + { + "epoch": 2.68, + "learning_rate": "1.5234e-04", + "loss": 0.6921, + "slid_loss": 0.654, + "step": 5358, + "time": 13.45 + }, + { + "epoch": 2.68, + "learning_rate": "1.5232e-04", + "loss": 0.6619, + "slid_loss": 0.6543, + "step": 5359, + "time": 12.75 + }, + { + "epoch": 2.68, + "learning_rate": "1.5231e-04", + "loss": 0.6546, + "slid_loss": 0.654, + "step": 5360, + "time": 11.13 + }, + { + "epoch": 2.68, + "learning_rate": "1.5229e-04", + "loss": 0.6852, + "slid_loss": 0.6545, + "step": 5361, + "time": 13.28 + }, + { + "epoch": 2.69, + "learning_rate": "1.5227e-04", + "loss": 0.6782, + "slid_loss": 0.6551, + "step": 5362, + "time": 11.26 + }, + { + "epoch": 2.69, + "learning_rate": "1.5225e-04", + "loss": 0.7535, + "slid_loss": 0.6554, + "step": 5363, + "time": 13.49 + }, + { + "epoch": 2.69, + "learning_rate": "1.5224e-04", + "loss": 0.5811, + "slid_loss": 0.6539, + "step": 5364, + "time": 11.24 + }, + { + "epoch": 2.69, + "learning_rate": "1.5222e-04", + "loss": 0.8114, + "slid_loss": 0.6567, + "step": 5365, + "time": 11.76 + }, + { + "epoch": 2.69, + "learning_rate": "1.5220e-04", + "loss": 0.6946, + "slid_loss": 0.6577, + "step": 5366, + "time": 13.97 + }, + { + "epoch": 2.69, + "learning_rate": "1.5218e-04", + "loss": 0.5637, + "slid_loss": 0.6576, + "step": 5367, + "time": 13.28 + }, + { + "epoch": 2.69, + "learning_rate": "1.5217e-04", + "loss": 0.7123, + "slid_loss": 0.6588, + "step": 5368, + "time": 11.56 + }, + { + "epoch": 2.69, + "learning_rate": "1.5215e-04", + "loss": 0.706, + "slid_loss": 0.66, + "step": 5369, + "time": 13.8 + }, + { + "epoch": 2.69, + "learning_rate": "1.5213e-04", + "loss": 0.5989, + "slid_loss": 0.6586, + "step": 5370, + "time": 13.2 + }, + { + "epoch": 2.69, + "learning_rate": "1.5211e-04", + "loss": 0.6886, + "slid_loss": 0.6598, + "step": 5371, + "time": 11.56 + }, + { + "epoch": 2.69, + "learning_rate": "1.5210e-04", + "loss": 0.5806, + "slid_loss": 0.6567, + "step": 5372, + "time": 12.86 + }, + { + "epoch": 2.69, + "learning_rate": "1.5208e-04", + "loss": 0.6748, + "slid_loss": 0.6574, + "step": 5373, + "time": 12.84 + }, + { + "epoch": 2.69, + "learning_rate": "1.5206e-04", + "loss": 0.5167, + "slid_loss": 0.6554, + "step": 5374, + "time": 12.85 + }, + { + "epoch": 2.69, + "learning_rate": "1.5204e-04", + "loss": 0.7136, + "slid_loss": 0.6556, + "step": 5375, + "time": 12.83 + }, + { + "epoch": 2.69, + "learning_rate": "1.5203e-04", + "loss": 0.5503, + "slid_loss": 0.6554, + "step": 5376, + "time": 13.05 + }, + { + "epoch": 2.69, + "learning_rate": "1.5201e-04", + "loss": 0.7564, + "slid_loss": 0.6557, + "step": 5377, + "time": 11.0 + }, + { + "epoch": 2.69, + "learning_rate": "1.5199e-04", + "loss": 0.6897, + "slid_loss": 0.6567, + "step": 5378, + "time": 14.17 + }, + { + "epoch": 2.69, + "learning_rate": "1.5197e-04", + "loss": 0.7267, + "slid_loss": 0.6573, + "step": 5379, + "time": 10.66 + }, + { + "epoch": 2.69, + "learning_rate": "1.5196e-04", + "loss": 0.5381, + "slid_loss": 0.6552, + "step": 5380, + "time": 11.61 + }, + { + "epoch": 2.69, + "learning_rate": "1.5194e-04", + "loss": 0.7082, + "slid_loss": 0.6549, + "step": 5381, + "time": 12.73 + }, + { + "epoch": 2.7, + "learning_rate": "1.5192e-04", + "loss": 0.6599, + "slid_loss": 0.6552, + "step": 5382, + "time": 10.76 + }, + { + "epoch": 2.7, + "learning_rate": "1.5190e-04", + "loss": 0.5934, + "slid_loss": 0.6533, + "step": 5383, + "time": 13.48 + }, + { + "epoch": 2.7, + "learning_rate": "1.5189e-04", + "loss": 0.5661, + "slid_loss": 0.6517, + "step": 5384, + "time": 13.33 + }, + { + "epoch": 2.7, + "learning_rate": "1.5187e-04", + "loss": 0.6348, + "slid_loss": 0.6501, + "step": 5385, + "time": 11.28 + }, + { + "epoch": 2.7, + "learning_rate": "1.5185e-04", + "loss": 0.8405, + "slid_loss": 0.6526, + "step": 5386, + "time": 12.86 + }, + { + "epoch": 2.7, + "learning_rate": "1.5184e-04", + "loss": 0.5798, + "slid_loss": 0.6525, + "step": 5387, + "time": 11.28 + }, + { + "epoch": 2.7, + "learning_rate": "1.5182e-04", + "loss": 0.4937, + "slid_loss": 0.6512, + "step": 5388, + "time": 12.81 + }, + { + "epoch": 2.7, + "learning_rate": "1.5180e-04", + "loss": 0.5884, + "slid_loss": 0.6507, + "step": 5389, + "time": 13.37 + }, + { + "epoch": 2.7, + "learning_rate": "1.5178e-04", + "loss": 0.6298, + "slid_loss": 0.6488, + "step": 5390, + "time": 12.37 + }, + { + "epoch": 2.7, + "learning_rate": "1.5177e-04", + "loss": 0.5618, + "slid_loss": 0.6482, + "step": 5391, + "time": 10.86 + }, + { + "epoch": 2.7, + "learning_rate": "1.5175e-04", + "loss": 0.808, + "slid_loss": 0.6499, + "step": 5392, + "time": 12.24 + }, + { + "epoch": 2.7, + "learning_rate": "1.5173e-04", + "loss": 0.6019, + "slid_loss": 0.6512, + "step": 5393, + "time": 13.75 + }, + { + "epoch": 2.7, + "learning_rate": "1.5171e-04", + "loss": 0.6171, + "slid_loss": 0.6502, + "step": 5394, + "time": 14.1 + }, + { + "epoch": 2.7, + "learning_rate": "1.5170e-04", + "loss": 0.7822, + "slid_loss": 0.6509, + "step": 5395, + "time": 14.42 + }, + { + "epoch": 2.7, + "learning_rate": "1.5168e-04", + "loss": 0.5937, + "slid_loss": 0.6499, + "step": 5396, + "time": 11.17 + }, + { + "epoch": 2.7, + "learning_rate": "1.5166e-04", + "loss": 0.6203, + "slid_loss": 0.6484, + "step": 5397, + "time": 13.35 + }, + { + "epoch": 2.7, + "learning_rate": "1.5164e-04", + "loss": 0.6803, + "slid_loss": 0.6495, + "step": 5398, + "time": 13.53 + }, + { + "epoch": 2.7, + "learning_rate": "1.5163e-04", + "loss": 0.7134, + "slid_loss": 0.6499, + "step": 5399, + "time": 11.24 + }, + { + "epoch": 2.7, + "learning_rate": "1.5161e-04", + "loss": 0.7814, + "slid_loss": 0.6505, + "step": 5400, + "time": 10.88 + }, + { + "epoch": 2.7, + "learning_rate": "1.5159e-04", + "loss": 0.7195, + "slid_loss": 0.6493, + "step": 5401, + "time": 14.36 + }, + { + "epoch": 2.71, + "learning_rate": "1.5157e-04", + "loss": 0.9848, + "slid_loss": 0.6531, + "step": 5402, + "time": 11.3 + }, + { + "epoch": 2.71, + "learning_rate": "1.5156e-04", + "loss": 0.556, + "slid_loss": 0.6525, + "step": 5403, + "time": 13.26 + }, + { + "epoch": 2.71, + "learning_rate": "1.5154e-04", + "loss": 0.7853, + "slid_loss": 0.653, + "step": 5404, + "time": 12.77 + }, + { + "epoch": 2.71, + "learning_rate": "1.5152e-04", + "loss": 0.6639, + "slid_loss": 0.6531, + "step": 5405, + "time": 13.78 + }, + { + "epoch": 2.71, + "learning_rate": "1.5150e-04", + "loss": 0.6251, + "slid_loss": 0.6519, + "step": 5406, + "time": 11.86 + }, + { + "epoch": 2.71, + "learning_rate": "1.5149e-04", + "loss": 0.729, + "slid_loss": 0.6528, + "step": 5407, + "time": 10.53 + }, + { + "epoch": 2.71, + "learning_rate": "1.5147e-04", + "loss": 0.8117, + "slid_loss": 0.654, + "step": 5408, + "time": 12.89 + }, + { + "epoch": 2.71, + "learning_rate": "1.5145e-04", + "loss": 0.7155, + "slid_loss": 0.6559, + "step": 5409, + "time": 13.09 + }, + { + "epoch": 2.71, + "learning_rate": "1.5143e-04", + "loss": 0.6544, + "slid_loss": 0.6563, + "step": 5410, + "time": 13.86 + }, + { + "epoch": 2.71, + "learning_rate": "1.5142e-04", + "loss": 0.646, + "slid_loss": 0.656, + "step": 5411, + "time": 13.66 + }, + { + "epoch": 2.71, + "learning_rate": "1.5140e-04", + "loss": 0.6452, + "slid_loss": 0.6568, + "step": 5412, + "time": 13.45 + }, + { + "epoch": 2.71, + "learning_rate": "1.5138e-04", + "loss": 0.6515, + "slid_loss": 0.657, + "step": 5413, + "time": 12.93 + }, + { + "epoch": 2.71, + "learning_rate": "1.5136e-04", + "loss": 0.727, + "slid_loss": 0.6567, + "step": 5414, + "time": 12.87 + }, + { + "epoch": 2.71, + "learning_rate": "1.5135e-04", + "loss": 0.6823, + "slid_loss": 0.656, + "step": 5415, + "time": 12.95 + }, + { + "epoch": 2.71, + "learning_rate": "1.5133e-04", + "loss": 0.6852, + "slid_loss": 0.6556, + "step": 5416, + "time": 11.06 + }, + { + "epoch": 2.71, + "learning_rate": "1.5131e-04", + "loss": 0.6466, + "slid_loss": 0.6559, + "step": 5417, + "time": 13.65 + }, + { + "epoch": 2.71, + "learning_rate": "1.5129e-04", + "loss": 0.5636, + "slid_loss": 0.6546, + "step": 5418, + "time": 11.04 + }, + { + "epoch": 2.71, + "learning_rate": "1.5128e-04", + "loss": 0.563, + "slid_loss": 0.6542, + "step": 5419, + "time": 11.45 + }, + { + "epoch": 2.71, + "learning_rate": "1.5126e-04", + "loss": 0.6727, + "slid_loss": 0.6544, + "step": 5420, + "time": 13.53 + }, + { + "epoch": 2.71, + "learning_rate": "1.5124e-04", + "loss": 0.7494, + "slid_loss": 0.6565, + "step": 5421, + "time": 10.97 + }, + { + "epoch": 2.72, + "learning_rate": "1.5122e-04", + "loss": 0.6315, + "slid_loss": 0.6551, + "step": 5422, + "time": 12.99 + }, + { + "epoch": 2.72, + "learning_rate": "1.5121e-04", + "loss": 0.673, + "slid_loss": 0.6562, + "step": 5423, + "time": 13.62 + }, + { + "epoch": 2.72, + "learning_rate": "1.5119e-04", + "loss": 0.5894, + "slid_loss": 0.6567, + "step": 5424, + "time": 13.61 + }, + { + "epoch": 2.72, + "learning_rate": "1.5117e-04", + "loss": 0.5356, + "slid_loss": 0.6562, + "step": 5425, + "time": 12.68 + }, + { + "epoch": 2.72, + "learning_rate": "1.5115e-04", + "loss": 0.7201, + "slid_loss": 0.6552, + "step": 5426, + "time": 13.73 + }, + { + "epoch": 2.72, + "learning_rate": "1.5114e-04", + "loss": 0.7382, + "slid_loss": 0.6567, + "step": 5427, + "time": 13.33 + }, + { + "epoch": 2.72, + "learning_rate": "1.5112e-04", + "loss": 0.639, + "slid_loss": 0.6578, + "step": 5428, + "time": 12.58 + }, + { + "epoch": 2.72, + "learning_rate": "1.5110e-04", + "loss": 0.7722, + "slid_loss": 0.6575, + "step": 5429, + "time": 14.29 + }, + { + "epoch": 2.72, + "learning_rate": "1.5108e-04", + "loss": 0.6549, + "slid_loss": 0.6574, + "step": 5430, + "time": 12.19 + }, + { + "epoch": 2.72, + "learning_rate": "1.5107e-04", + "loss": 0.5197, + "slid_loss": 0.6553, + "step": 5431, + "time": 11.11 + }, + { + "epoch": 2.72, + "learning_rate": "1.5105e-04", + "loss": 0.6326, + "slid_loss": 0.6556, + "step": 5432, + "time": 10.8 + }, + { + "epoch": 2.72, + "learning_rate": "1.5103e-04", + "loss": 0.5649, + "slid_loss": 0.6555, + "step": 5433, + "time": 10.45 + }, + { + "epoch": 2.72, + "learning_rate": "1.5101e-04", + "loss": 0.6002, + "slid_loss": 0.6554, + "step": 5434, + "time": 11.61 + }, + { + "epoch": 2.72, + "learning_rate": "1.5100e-04", + "loss": 0.6939, + "slid_loss": 0.6557, + "step": 5435, + "time": 10.4 + }, + { + "epoch": 2.72, + "learning_rate": "1.5098e-04", + "loss": 0.704, + "slid_loss": 0.6585, + "step": 5436, + "time": 13.93 + }, + { + "epoch": 2.72, + "learning_rate": "1.5096e-04", + "loss": 0.6666, + "slid_loss": 0.6584, + "step": 5437, + "time": 10.89 + }, + { + "epoch": 2.72, + "learning_rate": "1.5094e-04", + "loss": 0.5892, + "slid_loss": 0.6576, + "step": 5438, + "time": 11.4 + }, + { + "epoch": 2.72, + "learning_rate": "1.5093e-04", + "loss": 0.5123, + "slid_loss": 0.6563, + "step": 5439, + "time": 12.08 + }, + { + "epoch": 2.72, + "learning_rate": "1.5091e-04", + "loss": 0.5875, + "slid_loss": 0.6557, + "step": 5440, + "time": 13.53 + }, + { + "epoch": 2.72, + "learning_rate": "1.5089e-04", + "loss": 0.5817, + "slid_loss": 0.656, + "step": 5441, + "time": 13.13 + }, + { + "epoch": 2.73, + "learning_rate": "1.5087e-04", + "loss": 0.6022, + "slid_loss": 0.6556, + "step": 5442, + "time": 13.3 + }, + { + "epoch": 2.73, + "learning_rate": "1.5086e-04", + "loss": 0.6253, + "slid_loss": 0.6552, + "step": 5443, + "time": 11.44 + }, + { + "epoch": 2.73, + "learning_rate": "1.5084e-04", + "loss": 0.7103, + "slid_loss": 0.6556, + "step": 5444, + "time": 10.78 + }, + { + "epoch": 2.73, + "learning_rate": "1.5082e-04", + "loss": 0.6729, + "slid_loss": 0.6564, + "step": 5445, + "time": 13.51 + }, + { + "epoch": 2.73, + "learning_rate": "1.5080e-04", + "loss": 0.6724, + "slid_loss": 0.6564, + "step": 5446, + "time": 12.78 + }, + { + "epoch": 2.73, + "learning_rate": "1.5079e-04", + "loss": 0.6431, + "slid_loss": 0.6573, + "step": 5447, + "time": 10.69 + }, + { + "epoch": 2.73, + "learning_rate": "1.5077e-04", + "loss": 0.5855, + "slid_loss": 0.6553, + "step": 5448, + "time": 13.37 + }, + { + "epoch": 2.73, + "learning_rate": "1.5075e-04", + "loss": 0.4754, + "slid_loss": 0.6522, + "step": 5449, + "time": 10.95 + }, + { + "epoch": 2.73, + "learning_rate": "1.5073e-04", + "loss": 0.5532, + "slid_loss": 0.6531, + "step": 5450, + "time": 13.05 + }, + { + "epoch": 2.73, + "learning_rate": "1.5072e-04", + "loss": 0.6856, + "slid_loss": 0.6538, + "step": 5451, + "time": 13.0 + }, + { + "epoch": 2.73, + "learning_rate": "1.5070e-04", + "loss": 0.6506, + "slid_loss": 0.6534, + "step": 5452, + "time": 12.14 + }, + { + "epoch": 2.73, + "learning_rate": "1.5068e-04", + "loss": 0.5934, + "slid_loss": 0.6528, + "step": 5453, + "time": 13.44 + }, + { + "epoch": 2.73, + "learning_rate": "1.5066e-04", + "loss": 0.6964, + "slid_loss": 0.6523, + "step": 5454, + "time": 12.19 + }, + { + "epoch": 2.73, + "learning_rate": "1.5065e-04", + "loss": 0.5623, + "slid_loss": 0.6526, + "step": 5455, + "time": 12.76 + }, + { + "epoch": 2.73, + "learning_rate": "1.5063e-04", + "loss": 0.4555, + "slid_loss": 0.6524, + "step": 5456, + "time": 12.81 + }, + { + "epoch": 2.73, + "learning_rate": "1.5061e-04", + "loss": 0.6711, + "slid_loss": 0.6522, + "step": 5457, + "time": 10.71 + }, + { + "epoch": 2.73, + "learning_rate": "1.5059e-04", + "loss": 0.5894, + "slid_loss": 0.6512, + "step": 5458, + "time": 12.69 + }, + { + "epoch": 2.73, + "learning_rate": "1.5058e-04", + "loss": 0.5928, + "slid_loss": 0.6505, + "step": 5459, + "time": 12.9 + }, + { + "epoch": 2.73, + "learning_rate": "1.5056e-04", + "loss": 0.69, + "slid_loss": 0.6509, + "step": 5460, + "time": 13.17 + }, + { + "epoch": 2.73, + "learning_rate": "1.5054e-04", + "loss": 0.7621, + "slid_loss": 0.6516, + "step": 5461, + "time": 13.52 + }, + { + "epoch": 2.74, + "learning_rate": "1.5052e-04", + "loss": 0.6385, + "slid_loss": 0.6512, + "step": 5462, + "time": 12.83 + }, + { + "epoch": 2.74, + "learning_rate": "1.5051e-04", + "loss": 0.6126, + "slid_loss": 0.6498, + "step": 5463, + "time": 13.45 + }, + { + "epoch": 2.74, + "learning_rate": "1.5049e-04", + "loss": 0.6077, + "slid_loss": 0.6501, + "step": 5464, + "time": 13.21 + }, + { + "epoch": 2.74, + "learning_rate": "1.5047e-04", + "loss": 0.7326, + "slid_loss": 0.6493, + "step": 5465, + "time": 13.44 + }, + { + "epoch": 2.74, + "learning_rate": "1.5045e-04", + "loss": 0.6519, + "slid_loss": 0.6489, + "step": 5466, + "time": 10.6 + }, + { + "epoch": 2.74, + "learning_rate": "1.5044e-04", + "loss": 0.6811, + "slid_loss": 0.6501, + "step": 5467, + "time": 12.5 + }, + { + "epoch": 2.74, + "learning_rate": "1.5042e-04", + "loss": 0.7254, + "slid_loss": 0.6502, + "step": 5468, + "time": 11.81 + }, + { + "epoch": 2.74, + "learning_rate": "1.5040e-04", + "loss": 0.7136, + "slid_loss": 0.6503, + "step": 5469, + "time": 11.27 + }, + { + "epoch": 2.74, + "learning_rate": "1.5038e-04", + "loss": 0.6673, + "slid_loss": 0.6509, + "step": 5470, + "time": 12.44 + }, + { + "epoch": 2.74, + "learning_rate": "1.5037e-04", + "loss": 0.4657, + "slid_loss": 0.6487, + "step": 5471, + "time": 13.81 + }, + { + "epoch": 2.74, + "learning_rate": "1.5035e-04", + "loss": 0.68, + "slid_loss": 0.6497, + "step": 5472, + "time": 12.89 + }, + { + "epoch": 2.74, + "learning_rate": "1.5033e-04", + "loss": 0.5354, + "slid_loss": 0.6483, + "step": 5473, + "time": 13.24 + }, + { + "epoch": 2.74, + "learning_rate": "1.5031e-04", + "loss": 0.648, + "slid_loss": 0.6496, + "step": 5474, + "time": 12.08 + }, + { + "epoch": 2.74, + "learning_rate": "1.5030e-04", + "loss": 0.7164, + "slid_loss": 0.6497, + "step": 5475, + "time": 11.36 + }, + { + "epoch": 2.74, + "learning_rate": "1.5028e-04", + "loss": 0.7091, + "slid_loss": 0.6512, + "step": 5476, + "time": 11.4 + }, + { + "epoch": 2.74, + "learning_rate": "1.5026e-04", + "loss": 0.6785, + "slid_loss": 0.6505, + "step": 5477, + "time": 12.94 + }, + { + "epoch": 2.74, + "learning_rate": "1.5024e-04", + "loss": 0.555, + "slid_loss": 0.6491, + "step": 5478, + "time": 11.17 + }, + { + "epoch": 2.74, + "learning_rate": "1.5023e-04", + "loss": 0.5391, + "slid_loss": 0.6472, + "step": 5479, + "time": 11.37 + }, + { + "epoch": 2.74, + "learning_rate": "1.5021e-04", + "loss": 0.547, + "slid_loss": 0.6473, + "step": 5480, + "time": 13.41 + }, + { + "epoch": 2.74, + "learning_rate": "1.5019e-04", + "loss": 0.626, + "slid_loss": 0.6465, + "step": 5481, + "time": 12.72 + }, + { + "epoch": 2.75, + "learning_rate": "1.5017e-04", + "loss": 0.6328, + "slid_loss": 0.6462, + "step": 5482, + "time": 13.74 + }, + { + "epoch": 2.75, + "learning_rate": "1.5016e-04", + "loss": 0.8158, + "slid_loss": 0.6485, + "step": 5483, + "time": 11.88 + }, + { + "epoch": 2.75, + "learning_rate": "1.5014e-04", + "loss": 0.6012, + "slid_loss": 0.6488, + "step": 5484, + "time": 13.36 + }, + { + "epoch": 2.75, + "learning_rate": "1.5012e-04", + "loss": 0.6478, + "slid_loss": 0.6489, + "step": 5485, + "time": 13.29 + }, + { + "epoch": 2.75, + "learning_rate": "1.5010e-04", + "loss": 0.6019, + "slid_loss": 0.6466, + "step": 5486, + "time": 11.78 + }, + { + "epoch": 2.75, + "learning_rate": "1.5009e-04", + "loss": 0.6196, + "slid_loss": 0.647, + "step": 5487, + "time": 11.83 + }, + { + "epoch": 2.75, + "learning_rate": "1.5007e-04", + "loss": 0.5739, + "slid_loss": 0.6478, + "step": 5488, + "time": 13.75 + }, + { + "epoch": 2.75, + "learning_rate": "1.5005e-04", + "loss": 0.586, + "slid_loss": 0.6477, + "step": 5489, + "time": 10.65 + }, + { + "epoch": 2.75, + "learning_rate": "1.5003e-04", + "loss": 0.6011, + "slid_loss": 0.6474, + "step": 5490, + "time": 13.29 + }, + { + "epoch": 2.75, + "learning_rate": "1.5002e-04", + "loss": 0.7951, + "slid_loss": 0.6498, + "step": 5491, + "time": 11.26 + }, + { + "epoch": 2.75, + "learning_rate": "1.5000e-04", + "loss": 0.6957, + "slid_loss": 0.6487, + "step": 5492, + "time": 11.58 + }, + { + "epoch": 2.75, + "learning_rate": "1.4998e-04", + "loss": 0.5288, + "slid_loss": 0.6479, + "step": 5493, + "time": 10.99 + }, + { + "epoch": 2.75, + "learning_rate": "1.4997e-04", + "loss": 0.5811, + "slid_loss": 0.6476, + "step": 5494, + "time": 11.46 + }, + { + "epoch": 2.75, + "learning_rate": "1.4995e-04", + "loss": 0.5498, + "slid_loss": 0.6452, + "step": 5495, + "time": 12.3 + }, + { + "epoch": 2.75, + "learning_rate": "1.4993e-04", + "loss": 0.7555, + "slid_loss": 0.6469, + "step": 5496, + "time": 12.86 + }, + { + "epoch": 2.75, + "learning_rate": "1.4991e-04", + "loss": 0.575, + "slid_loss": 0.6464, + "step": 5497, + "time": 13.56 + }, + { + "epoch": 2.75, + "learning_rate": "1.4990e-04", + "loss": 0.6254, + "slid_loss": 0.6459, + "step": 5498, + "time": 11.09 + }, + { + "epoch": 2.75, + "learning_rate": "1.4988e-04", + "loss": 0.6686, + "slid_loss": 0.6454, + "step": 5499, + "time": 10.82 + }, + { + "epoch": 2.75, + "learning_rate": "1.4986e-04", + "loss": 0.7555, + "slid_loss": 0.6452, + "step": 5500, + "time": 13.53 + }, + { + "epoch": 2.75, + "learning_rate": "1.4984e-04", + "loss": 0.6999, + "slid_loss": 0.645, + "step": 5501, + "time": 11.92 + }, + { + "epoch": 2.76, + "learning_rate": "1.4983e-04", + "loss": 0.6297, + "slid_loss": 0.6414, + "step": 5502, + "time": 11.87 + }, + { + "epoch": 2.76, + "learning_rate": "1.4981e-04", + "loss": 0.5434, + "slid_loss": 0.6413, + "step": 5503, + "time": 10.59 + }, + { + "epoch": 2.76, + "learning_rate": "1.4979e-04", + "loss": 0.6455, + "slid_loss": 0.6399, + "step": 5504, + "time": 12.07 + }, + { + "epoch": 2.76, + "learning_rate": "1.4977e-04", + "loss": 0.663, + "slid_loss": 0.6399, + "step": 5505, + "time": 11.82 + }, + { + "epoch": 2.76, + "learning_rate": "1.4976e-04", + "loss": 0.6062, + "slid_loss": 0.6397, + "step": 5506, + "time": 11.13 + }, + { + "epoch": 2.76, + "learning_rate": "1.4974e-04", + "loss": 0.5337, + "slid_loss": 0.6377, + "step": 5507, + "time": 13.24 + }, + { + "epoch": 2.76, + "learning_rate": "1.4972e-04", + "loss": 0.6997, + "slid_loss": 0.6366, + "step": 5508, + "time": 13.66 + }, + { + "epoch": 2.76, + "learning_rate": "1.4970e-04", + "loss": 0.7407, + "slid_loss": 0.6369, + "step": 5509, + "time": 11.82 + }, + { + "epoch": 2.76, + "learning_rate": "1.4969e-04", + "loss": 0.628, + "slid_loss": 0.6366, + "step": 5510, + "time": 12.89 + }, + { + "epoch": 2.76, + "learning_rate": "1.4967e-04", + "loss": 0.751, + "slid_loss": 0.6376, + "step": 5511, + "time": 13.3 + }, + { + "epoch": 2.76, + "learning_rate": "1.4965e-04", + "loss": 0.5895, + "slid_loss": 0.6371, + "step": 5512, + "time": 10.28 + }, + { + "epoch": 2.76, + "learning_rate": "1.4963e-04", + "loss": 0.5879, + "slid_loss": 0.6365, + "step": 5513, + "time": 12.9 + }, + { + "epoch": 2.76, + "learning_rate": "1.4962e-04", + "loss": 0.5327, + "slid_loss": 0.6345, + "step": 5514, + "time": 13.48 + }, + { + "epoch": 2.76, + "learning_rate": "1.4960e-04", + "loss": 0.5301, + "slid_loss": 0.633, + "step": 5515, + "time": 13.92 + }, + { + "epoch": 2.76, + "learning_rate": "1.4958e-04", + "loss": 0.6513, + "slid_loss": 0.6327, + "step": 5516, + "time": 11.46 + }, + { + "epoch": 2.76, + "learning_rate": "1.4956e-04", + "loss": 0.5082, + "slid_loss": 0.6313, + "step": 5517, + "time": 10.81 + }, + { + "epoch": 2.76, + "learning_rate": "1.4955e-04", + "loss": 0.5022, + "slid_loss": 0.6307, + "step": 5518, + "time": 13.93 + }, + { + "epoch": 2.76, + "learning_rate": "1.4953e-04", + "loss": 0.6343, + "slid_loss": 0.6314, + "step": 5519, + "time": 12.96 + }, + { + "epoch": 2.76, + "learning_rate": "1.4951e-04", + "loss": 0.4884, + "slid_loss": 0.6295, + "step": 5520, + "time": 11.84 + }, + { + "epoch": 2.76, + "learning_rate": "1.4949e-04", + "loss": 0.5766, + "slid_loss": 0.6278, + "step": 5521, + "time": 12.0 + }, + { + "epoch": 2.77, + "learning_rate": "1.4948e-04", + "loss": 0.681, + "slid_loss": 0.6283, + "step": 5522, + "time": 12.81 + }, + { + "epoch": 2.77, + "learning_rate": "1.4946e-04", + "loss": 0.5161, + "slid_loss": 0.6267, + "step": 5523, + "time": 13.13 + }, + { + "epoch": 2.77, + "learning_rate": "1.4944e-04", + "loss": 0.5225, + "slid_loss": 0.6261, + "step": 5524, + "time": 11.0 + }, + { + "epoch": 2.77, + "learning_rate": "1.4942e-04", + "loss": 0.6702, + "slid_loss": 0.6274, + "step": 5525, + "time": 11.9 + }, + { + "epoch": 2.77, + "learning_rate": "1.4941e-04", + "loss": 0.5854, + "slid_loss": 0.6261, + "step": 5526, + "time": 12.12 + }, + { + "epoch": 2.77, + "learning_rate": "1.4939e-04", + "loss": 0.5979, + "slid_loss": 0.6246, + "step": 5527, + "time": 13.03 + }, + { + "epoch": 2.77, + "learning_rate": "1.4937e-04", + "loss": 0.7154, + "slid_loss": 0.6254, + "step": 5528, + "time": 13.35 + }, + { + "epoch": 2.77, + "learning_rate": "1.4935e-04", + "loss": 0.5874, + "slid_loss": 0.6236, + "step": 5529, + "time": 11.71 + }, + { + "epoch": 2.77, + "learning_rate": "1.4934e-04", + "loss": 0.5967, + "slid_loss": 0.623, + "step": 5530, + "time": 11.68 + }, + { + "epoch": 2.77, + "learning_rate": "1.4932e-04", + "loss": 0.6316, + "slid_loss": 0.6241, + "step": 5531, + "time": 14.77 + }, + { + "epoch": 2.77, + "learning_rate": "1.4930e-04", + "loss": 0.744, + "slid_loss": 0.6252, + "step": 5532, + "time": 13.9 + }, + { + "epoch": 2.77, + "learning_rate": "1.4928e-04", + "loss": 0.5931, + "slid_loss": 0.6255, + "step": 5533, + "time": 13.18 + }, + { + "epoch": 2.77, + "learning_rate": "1.4927e-04", + "loss": 0.7149, + "slid_loss": 0.6266, + "step": 5534, + "time": 12.77 + }, + { + "epoch": 2.77, + "learning_rate": "1.4925e-04", + "loss": 0.6598, + "slid_loss": 0.6263, + "step": 5535, + "time": 11.97 + }, + { + "epoch": 2.77, + "learning_rate": "1.4923e-04", + "loss": 0.6345, + "slid_loss": 0.6256, + "step": 5536, + "time": 14.01 + }, + { + "epoch": 2.77, + "learning_rate": "1.4921e-04", + "loss": 0.6755, + "slid_loss": 0.6257, + "step": 5537, + "time": 13.47 + }, + { + "epoch": 2.77, + "learning_rate": "1.4920e-04", + "loss": 0.567, + "slid_loss": 0.6255, + "step": 5538, + "time": 13.33 + }, + { + "epoch": 2.77, + "learning_rate": "1.4918e-04", + "loss": 0.7597, + "slid_loss": 0.628, + "step": 5539, + "time": 11.87 + }, + { + "epoch": 2.77, + "learning_rate": "1.4916e-04", + "loss": 0.5771, + "slid_loss": 0.6278, + "step": 5540, + "time": 11.06 + }, + { + "epoch": 2.77, + "learning_rate": "1.4914e-04", + "loss": 0.7657, + "slid_loss": 0.6297, + "step": 5541, + "time": 11.46 + }, + { + "epoch": 2.78, + "learning_rate": "1.4913e-04", + "loss": 0.5271, + "slid_loss": 0.6289, + "step": 5542, + "time": 13.45 + }, + { + "epoch": 2.78, + "learning_rate": "1.4911e-04", + "loss": 0.5799, + "slid_loss": 0.6285, + "step": 5543, + "time": 11.78 + }, + { + "epoch": 2.78, + "learning_rate": "1.4909e-04", + "loss": 0.555, + "slid_loss": 0.6269, + "step": 5544, + "time": 9.89 + }, + { + "epoch": 2.78, + "learning_rate": "1.4907e-04", + "loss": 0.5514, + "slid_loss": 0.6257, + "step": 5545, + "time": 11.16 + }, + { + "epoch": 2.78, + "learning_rate": "1.4906e-04", + "loss": 0.6207, + "slid_loss": 0.6252, + "step": 5546, + "time": 13.97 + }, + { + "epoch": 2.78, + "learning_rate": "1.4904e-04", + "loss": 0.6301, + "slid_loss": 0.6251, + "step": 5547, + "time": 13.35 + }, + { + "epoch": 2.78, + "learning_rate": "1.4902e-04", + "loss": 0.6855, + "slid_loss": 0.6261, + "step": 5548, + "time": 11.88 + }, + { + "epoch": 2.78, + "learning_rate": "1.4900e-04", + "loss": 0.6465, + "slid_loss": 0.6278, + "step": 5549, + "time": 11.53 + }, + { + "epoch": 2.78, + "learning_rate": "1.4899e-04", + "loss": 0.6038, + "slid_loss": 0.6283, + "step": 5550, + "time": 10.79 + }, + { + "epoch": 2.78, + "learning_rate": "1.4897e-04", + "loss": 0.702, + "slid_loss": 0.6284, + "step": 5551, + "time": 13.99 + }, + { + "epoch": 2.78, + "learning_rate": "1.4895e-04", + "loss": 0.7027, + "slid_loss": 0.629, + "step": 5552, + "time": 12.27 + }, + { + "epoch": 2.78, + "learning_rate": "1.4893e-04", + "loss": 0.5741, + "slid_loss": 0.6288, + "step": 5553, + "time": 13.29 + }, + { + "epoch": 2.78, + "learning_rate": "1.4892e-04", + "loss": 0.4889, + "slid_loss": 0.6267, + "step": 5554, + "time": 11.58 + }, + { + "epoch": 2.78, + "learning_rate": "1.4890e-04", + "loss": 0.4949, + "slid_loss": 0.626, + "step": 5555, + "time": 13.7 + }, + { + "epoch": 2.78, + "learning_rate": "1.4888e-04", + "loss": 0.4543, + "slid_loss": 0.626, + "step": 5556, + "time": 11.74 + }, + { + "epoch": 2.78, + "learning_rate": "1.4886e-04", + "loss": 0.7665, + "slid_loss": 0.627, + "step": 5557, + "time": 13.09 + }, + { + "epoch": 2.78, + "learning_rate": "1.4885e-04", + "loss": 0.667, + "slid_loss": 0.6277, + "step": 5558, + "time": 13.33 + }, + { + "epoch": 2.78, + "learning_rate": "1.4883e-04", + "loss": 0.6641, + "slid_loss": 0.6285, + "step": 5559, + "time": 14.56 + }, + { + "epoch": 2.78, + "learning_rate": "1.4881e-04", + "loss": 0.5995, + "slid_loss": 0.6276, + "step": 5560, + "time": 10.25 + }, + { + "epoch": 2.78, + "learning_rate": "1.4879e-04", + "loss": 0.7369, + "slid_loss": 0.6273, + "step": 5561, + "time": 13.71 + }, + { + "epoch": 2.79, + "learning_rate": "1.4878e-04", + "loss": 0.4202, + "slid_loss": 0.6251, + "step": 5562, + "time": 13.24 + }, + { + "epoch": 2.79, + "learning_rate": "1.4876e-04", + "loss": 0.5719, + "slid_loss": 0.6247, + "step": 5563, + "time": 13.84 + }, + { + "epoch": 2.79, + "learning_rate": "1.4874e-04", + "loss": 0.6721, + "slid_loss": 0.6254, + "step": 5564, + "time": 13.27 + }, + { + "epoch": 2.79, + "learning_rate": "1.4872e-04", + "loss": 0.5637, + "slid_loss": 0.6237, + "step": 5565, + "time": 13.05 + }, + { + "epoch": 2.79, + "learning_rate": "1.4871e-04", + "loss": 0.7274, + "slid_loss": 0.6244, + "step": 5566, + "time": 11.73 + }, + { + "epoch": 2.79, + "learning_rate": "1.4869e-04", + "loss": 0.5238, + "slid_loss": 0.6228, + "step": 5567, + "time": 11.84 + }, + { + "epoch": 2.79, + "learning_rate": "1.4867e-04", + "loss": 0.7668, + "slid_loss": 0.6233, + "step": 5568, + "time": 13.99 + }, + { + "epoch": 2.79, + "learning_rate": "1.4865e-04", + "loss": 0.5993, + "slid_loss": 0.6221, + "step": 5569, + "time": 12.67 + }, + { + "epoch": 2.79, + "learning_rate": "1.4864e-04", + "loss": 0.5928, + "slid_loss": 0.6214, + "step": 5570, + "time": 12.97 + }, + { + "epoch": 2.79, + "learning_rate": "1.4862e-04", + "loss": 0.6401, + "slid_loss": 0.6231, + "step": 5571, + "time": 13.31 + }, + { + "epoch": 2.79, + "learning_rate": "1.4860e-04", + "loss": 0.6565, + "slid_loss": 0.6229, + "step": 5572, + "time": 14.43 + }, + { + "epoch": 2.79, + "learning_rate": "1.4858e-04", + "loss": 0.5783, + "slid_loss": 0.6233, + "step": 5573, + "time": 12.95 + }, + { + "epoch": 2.79, + "learning_rate": "1.4857e-04", + "loss": 0.5749, + "slid_loss": 0.6226, + "step": 5574, + "time": 12.74 + }, + { + "epoch": 2.79, + "learning_rate": "1.4855e-04", + "loss": 0.6489, + "slid_loss": 0.6219, + "step": 5575, + "time": 13.64 + }, + { + "epoch": 2.79, + "learning_rate": "1.4853e-04", + "loss": 0.5916, + "slid_loss": 0.6207, + "step": 5576, + "time": 10.56 + }, + { + "epoch": 2.79, + "learning_rate": "1.4851e-04", + "loss": 0.6657, + "slid_loss": 0.6206, + "step": 5577, + "time": 12.81 + }, + { + "epoch": 2.79, + "learning_rate": "1.4850e-04", + "loss": 0.4683, + "slid_loss": 0.6197, + "step": 5578, + "time": 11.6 + }, + { + "epoch": 2.79, + "learning_rate": "1.4848e-04", + "loss": 0.7005, + "slid_loss": 0.6213, + "step": 5579, + "time": 13.41 + }, + { + "epoch": 2.79, + "learning_rate": "1.4846e-04", + "loss": 0.7362, + "slid_loss": 0.6232, + "step": 5580, + "time": 13.74 + }, + { + "epoch": 2.79, + "learning_rate": "1.4844e-04", + "loss": 0.6846, + "slid_loss": 0.6238, + "step": 5581, + "time": 11.28 + }, + { + "epoch": 2.8, + "learning_rate": "1.4843e-04", + "loss": 0.6001, + "slid_loss": 0.6235, + "step": 5582, + "time": 11.14 + }, + { + "epoch": 2.8, + "learning_rate": "1.4841e-04", + "loss": 0.6626, + "slid_loss": 0.622, + "step": 5583, + "time": 13.12 + }, + { + "epoch": 2.8, + "learning_rate": "1.4839e-04", + "loss": 0.5873, + "slid_loss": 0.6218, + "step": 5584, + "time": 13.25 + }, + { + "epoch": 2.8, + "learning_rate": "1.4837e-04", + "loss": 0.607, + "slid_loss": 0.6214, + "step": 5585, + "time": 13.22 + }, + { + "epoch": 2.8, + "learning_rate": "1.4836e-04", + "loss": 0.6653, + "slid_loss": 0.6221, + "step": 5586, + "time": 12.95 + }, + { + "epoch": 2.8, + "learning_rate": "1.4834e-04", + "loss": 0.6881, + "slid_loss": 0.6227, + "step": 5587, + "time": 13.06 + }, + { + "epoch": 2.8, + "learning_rate": "1.4832e-04", + "loss": 0.719, + "slid_loss": 0.6242, + "step": 5588, + "time": 10.98 + }, + { + "epoch": 2.8, + "learning_rate": "1.4830e-04", + "loss": 0.5432, + "slid_loss": 0.6238, + "step": 5589, + "time": 12.86 + }, + { + "epoch": 2.8, + "learning_rate": "1.4829e-04", + "loss": 0.5286, + "slid_loss": 0.623, + "step": 5590, + "time": 11.24 + }, + { + "epoch": 2.8, + "learning_rate": "1.4827e-04", + "loss": 0.6695, + "slid_loss": 0.6218, + "step": 5591, + "time": 13.35 + }, + { + "epoch": 2.8, + "learning_rate": "1.4825e-04", + "loss": 0.6779, + "slid_loss": 0.6216, + "step": 5592, + "time": 11.87 + }, + { + "epoch": 2.8, + "learning_rate": "1.4823e-04", + "loss": 0.6354, + "slid_loss": 0.6227, + "step": 5593, + "time": 11.28 + }, + { + "epoch": 2.8, + "learning_rate": "1.4822e-04", + "loss": 0.7014, + "slid_loss": 0.6239, + "step": 5594, + "time": 11.68 + }, + { + "epoch": 2.8, + "learning_rate": "1.4820e-04", + "loss": 0.5389, + "slid_loss": 0.6238, + "step": 5595, + "time": 11.52 + }, + { + "epoch": 2.8, + "learning_rate": "1.4818e-04", + "loss": 0.6622, + "slid_loss": 0.6228, + "step": 5596, + "time": 11.91 + }, + { + "epoch": 2.8, + "learning_rate": "1.4816e-04", + "loss": 0.8239, + "slid_loss": 0.6253, + "step": 5597, + "time": 13.53 + }, + { + "epoch": 2.8, + "learning_rate": "1.4815e-04", + "loss": 0.662, + "slid_loss": 0.6257, + "step": 5598, + "time": 12.82 + }, + { + "epoch": 2.8, + "learning_rate": "1.4813e-04", + "loss": 0.6885, + "slid_loss": 0.6259, + "step": 5599, + "time": 12.3 + }, + { + "epoch": 2.8, + "learning_rate": "1.4811e-04", + "loss": 0.4967, + "slid_loss": 0.6233, + "step": 5600, + "time": 10.56 + }, + { + "epoch": 2.8, + "learning_rate": "1.4810e-04", + "loss": 0.6984, + "slid_loss": 0.6233, + "step": 5601, + "time": 12.85 + }, + { + "epoch": 2.81, + "learning_rate": "1.4808e-04", + "loss": 0.6473, + "slid_loss": 0.6235, + "step": 5602, + "time": 10.76 + }, + { + "epoch": 2.81, + "learning_rate": "1.4806e-04", + "loss": 0.6709, + "slid_loss": 0.6247, + "step": 5603, + "time": 11.65 + }, + { + "epoch": 2.81, + "learning_rate": "1.4804e-04", + "loss": 0.5282, + "slid_loss": 0.6236, + "step": 5604, + "time": 13.59 + }, + { + "epoch": 2.81, + "learning_rate": "1.4803e-04", + "loss": 0.7493, + "slid_loss": 0.6244, + "step": 5605, + "time": 11.6 + }, + { + "epoch": 2.81, + "learning_rate": "1.4801e-04", + "loss": 0.5112, + "slid_loss": 0.6235, + "step": 5606, + "time": 11.54 + }, + { + "epoch": 2.81, + "learning_rate": "1.4799e-04", + "loss": 0.5239, + "slid_loss": 0.6234, + "step": 5607, + "time": 13.49 + }, + { + "epoch": 2.81, + "learning_rate": "1.4797e-04", + "loss": 0.5827, + "slid_loss": 0.6222, + "step": 5608, + "time": 13.33 + }, + { + "epoch": 2.81, + "learning_rate": "1.4796e-04", + "loss": 0.5574, + "slid_loss": 0.6204, + "step": 5609, + "time": 13.24 + }, + { + "epoch": 2.81, + "learning_rate": "1.4794e-04", + "loss": 0.5911, + "slid_loss": 0.62, + "step": 5610, + "time": 11.4 + }, + { + "epoch": 2.81, + "learning_rate": "1.4792e-04", + "loss": 0.7453, + "slid_loss": 0.6199, + "step": 5611, + "time": 12.78 + }, + { + "epoch": 2.81, + "learning_rate": "1.4790e-04", + "loss": 0.6202, + "slid_loss": 0.6202, + "step": 5612, + "time": 11.08 + }, + { + "epoch": 2.81, + "learning_rate": "1.4789e-04", + "loss": 0.8276, + "slid_loss": 0.6226, + "step": 5613, + "time": 13.67 + }, + { + "epoch": 2.81, + "learning_rate": "1.4787e-04", + "loss": 0.6451, + "slid_loss": 0.6238, + "step": 5614, + "time": 11.65 + }, + { + "epoch": 2.81, + "learning_rate": "1.4785e-04", + "loss": 0.6111, + "slid_loss": 0.6246, + "step": 5615, + "time": 13.0 + }, + { + "epoch": 2.81, + "learning_rate": "1.4783e-04", + "loss": 0.4659, + "slid_loss": 0.6227, + "step": 5616, + "time": 14.42 + }, + { + "epoch": 2.81, + "learning_rate": "1.4782e-04", + "loss": 0.6869, + "slid_loss": 0.6245, + "step": 5617, + "time": 13.02 + }, + { + "epoch": 2.81, + "learning_rate": "1.4780e-04", + "loss": 0.7101, + "slid_loss": 0.6266, + "step": 5618, + "time": 13.85 + }, + { + "epoch": 2.81, + "learning_rate": "1.4778e-04", + "loss": 0.5586, + "slid_loss": 0.6258, + "step": 5619, + "time": 10.82 + }, + { + "epoch": 2.81, + "learning_rate": "1.4776e-04", + "loss": 0.6589, + "slid_loss": 0.6275, + "step": 5620, + "time": 11.32 + }, + { + "epoch": 2.81, + "learning_rate": "1.4775e-04", + "loss": 0.7635, + "slid_loss": 0.6294, + "step": 5621, + "time": 13.29 + }, + { + "epoch": 2.82, + "learning_rate": "1.4773e-04", + "loss": 0.6361, + "slid_loss": 0.629, + "step": 5622, + "time": 10.84 + }, + { + "epoch": 2.82, + "learning_rate": "1.4771e-04", + "loss": 0.5727, + "slid_loss": 0.6295, + "step": 5623, + "time": 12.95 + }, + { + "epoch": 2.82, + "learning_rate": "1.4769e-04", + "loss": 0.679, + "slid_loss": 0.6311, + "step": 5624, + "time": 13.57 + }, + { + "epoch": 2.82, + "learning_rate": "1.4768e-04", + "loss": 0.7044, + "slid_loss": 0.6314, + "step": 5625, + "time": 13.41 + }, + { + "epoch": 2.82, + "learning_rate": "1.4766e-04", + "loss": 0.6185, + "slid_loss": 0.6318, + "step": 5626, + "time": 10.7 + }, + { + "epoch": 2.82, + "learning_rate": "1.4764e-04", + "loss": 0.6883, + "slid_loss": 0.6327, + "step": 5627, + "time": 11.11 + }, + { + "epoch": 2.82, + "learning_rate": "1.4762e-04", + "loss": 0.5415, + "slid_loss": 0.6309, + "step": 5628, + "time": 11.99 + }, + { + "epoch": 2.82, + "learning_rate": "1.4761e-04", + "loss": 0.7217, + "slid_loss": 0.6323, + "step": 5629, + "time": 13.73 + }, + { + "epoch": 2.82, + "learning_rate": "1.4759e-04", + "loss": 0.7452, + "slid_loss": 0.6338, + "step": 5630, + "time": 12.85 + }, + { + "epoch": 2.82, + "learning_rate": "1.4757e-04", + "loss": 0.5993, + "slid_loss": 0.6334, + "step": 5631, + "time": 12.99 + }, + { + "epoch": 2.82, + "learning_rate": "1.4755e-04", + "loss": 0.8256, + "slid_loss": 0.6342, + "step": 5632, + "time": 12.93 + }, + { + "epoch": 2.82, + "learning_rate": "1.4754e-04", + "loss": 0.6428, + "slid_loss": 0.6347, + "step": 5633, + "time": 13.67 + }, + { + "epoch": 2.82, + "learning_rate": "1.4752e-04", + "loss": 0.7186, + "slid_loss": 0.6348, + "step": 5634, + "time": 12.34 + }, + { + "epoch": 2.82, + "learning_rate": "1.4750e-04", + "loss": 0.6001, + "slid_loss": 0.6342, + "step": 5635, + "time": 13.36 + }, + { + "epoch": 2.82, + "learning_rate": "1.4748e-04", + "loss": 0.4595, + "slid_loss": 0.6324, + "step": 5636, + "time": 11.54 + }, + { + "epoch": 2.82, + "learning_rate": "1.4747e-04", + "loss": 0.6638, + "slid_loss": 0.6323, + "step": 5637, + "time": 13.99 + }, + { + "epoch": 2.82, + "learning_rate": "1.4745e-04", + "loss": 0.5843, + "slid_loss": 0.6325, + "step": 5638, + "time": 11.25 + }, + { + "epoch": 2.82, + "learning_rate": "1.4743e-04", + "loss": 0.5329, + "slid_loss": 0.6302, + "step": 5639, + "time": 13.28 + }, + { + "epoch": 2.82, + "learning_rate": "1.4741e-04", + "loss": 0.6403, + "slid_loss": 0.6309, + "step": 5640, + "time": 11.25 + }, + { + "epoch": 2.82, + "learning_rate": "1.4740e-04", + "loss": 0.5024, + "slid_loss": 0.6282, + "step": 5641, + "time": 11.29 + }, + { + "epoch": 2.83, + "learning_rate": "1.4738e-04", + "loss": 0.7734, + "slid_loss": 0.6307, + "step": 5642, + "time": 12.86 + }, + { + "epoch": 2.83, + "learning_rate": "1.4736e-04", + "loss": 0.71, + "slid_loss": 0.632, + "step": 5643, + "time": 10.76 + }, + { + "epoch": 2.83, + "learning_rate": "1.4734e-04", + "loss": 0.5962, + "slid_loss": 0.6324, + "step": 5644, + "time": 11.07 + }, + { + "epoch": 2.83, + "learning_rate": "1.4733e-04", + "loss": 0.5695, + "slid_loss": 0.6326, + "step": 5645, + "time": 12.15 + }, + { + "epoch": 2.83, + "learning_rate": "1.4731e-04", + "loss": 0.5995, + "slid_loss": 0.6324, + "step": 5646, + "time": 10.79 + }, + { + "epoch": 2.83, + "learning_rate": "1.4729e-04", + "loss": 0.681, + "slid_loss": 0.6329, + "step": 5647, + "time": 11.03 + }, + { + "epoch": 2.83, + "learning_rate": "1.4727e-04", + "loss": 0.5616, + "slid_loss": 0.6316, + "step": 5648, + "time": 12.84 + }, + { + "epoch": 2.83, + "learning_rate": "1.4726e-04", + "loss": 0.8091, + "slid_loss": 0.6333, + "step": 5649, + "time": 11.25 + }, + { + "epoch": 2.83, + "learning_rate": "1.4724e-04", + "loss": 0.766, + "slid_loss": 0.6349, + "step": 5650, + "time": 11.64 + }, + { + "epoch": 2.83, + "learning_rate": "1.4722e-04", + "loss": 0.6121, + "slid_loss": 0.634, + "step": 5651, + "time": 13.34 + }, + { + "epoch": 2.83, + "learning_rate": "1.4720e-04", + "loss": 0.6398, + "slid_loss": 0.6334, + "step": 5652, + "time": 12.81 + }, + { + "epoch": 2.83, + "learning_rate": "1.4719e-04", + "loss": 0.6065, + "slid_loss": 0.6337, + "step": 5653, + "time": 13.36 + }, + { + "epoch": 2.83, + "learning_rate": "1.4717e-04", + "loss": 0.7887, + "slid_loss": 0.6367, + "step": 5654, + "time": 12.94 + }, + { + "epoch": 2.83, + "learning_rate": "1.4715e-04", + "loss": 0.7193, + "slid_loss": 0.6389, + "step": 5655, + "time": 12.58 + }, + { + "epoch": 2.83, + "learning_rate": "1.4713e-04", + "loss": 0.5947, + "slid_loss": 0.6403, + "step": 5656, + "time": 13.49 + }, + { + "epoch": 2.83, + "learning_rate": "1.4712e-04", + "loss": 0.6988, + "slid_loss": 0.6396, + "step": 5657, + "time": 13.59 + }, + { + "epoch": 2.83, + "learning_rate": "1.4710e-04", + "loss": 0.627, + "slid_loss": 0.6393, + "step": 5658, + "time": 13.84 + }, + { + "epoch": 2.83, + "learning_rate": "1.4708e-04", + "loss": 0.5367, + "slid_loss": 0.638, + "step": 5659, + "time": 12.7 + }, + { + "epoch": 2.83, + "learning_rate": "1.4706e-04", + "loss": 0.7109, + "slid_loss": 0.6391, + "step": 5660, + "time": 12.54 + }, + { + "epoch": 2.83, + "learning_rate": "1.4705e-04", + "loss": 0.5374, + "slid_loss": 0.6371, + "step": 5661, + "time": 13.43 + }, + { + "epoch": 2.84, + "learning_rate": "1.4703e-04", + "loss": 0.5899, + "slid_loss": 0.6388, + "step": 5662, + "time": 12.84 + }, + { + "epoch": 2.84, + "learning_rate": "1.4701e-04", + "loss": 0.6818, + "slid_loss": 0.6399, + "step": 5663, + "time": 10.92 + }, + { + "epoch": 2.84, + "learning_rate": "1.4700e-04", + "loss": 0.5216, + "slid_loss": 0.6384, + "step": 5664, + "time": 13.89 + }, + { + "epoch": 2.84, + "learning_rate": "1.4698e-04", + "loss": 0.5746, + "slid_loss": 0.6385, + "step": 5665, + "time": 10.65 + }, + { + "epoch": 2.84, + "learning_rate": "1.4696e-04", + "loss": 0.5248, + "slid_loss": 0.6365, + "step": 5666, + "time": 14.15 + }, + { + "epoch": 2.84, + "learning_rate": "1.4694e-04", + "loss": 0.7348, + "slid_loss": 0.6386, + "step": 5667, + "time": 11.96 + }, + { + "epoch": 2.84, + "learning_rate": "1.4693e-04", + "loss": 0.6435, + "slid_loss": 0.6373, + "step": 5668, + "time": 14.41 + }, + { + "epoch": 2.84, + "learning_rate": "1.4691e-04", + "loss": 0.6887, + "slid_loss": 0.6382, + "step": 5669, + "time": 13.23 + }, + { + "epoch": 2.84, + "learning_rate": "1.4689e-04", + "loss": 0.6897, + "slid_loss": 0.6392, + "step": 5670, + "time": 12.24 + }, + { + "epoch": 2.84, + "learning_rate": "1.4687e-04", + "loss": 0.6477, + "slid_loss": 0.6393, + "step": 5671, + "time": 12.81 + }, + { + "epoch": 2.84, + "learning_rate": "1.4686e-04", + "loss": 0.4932, + "slid_loss": 0.6377, + "step": 5672, + "time": 12.19 + }, + { + "epoch": 2.84, + "learning_rate": "1.4684e-04", + "loss": 0.6184, + "slid_loss": 0.6381, + "step": 5673, + "time": 13.64 + }, + { + "epoch": 2.84, + "learning_rate": "1.4682e-04", + "loss": 0.6831, + "slid_loss": 0.6391, + "step": 5674, + "time": 12.86 + }, + { + "epoch": 2.84, + "learning_rate": "1.4680e-04", + "loss": 0.563, + "slid_loss": 0.6383, + "step": 5675, + "time": 12.03 + }, + { + "epoch": 2.84, + "learning_rate": "1.4679e-04", + "loss": 0.7216, + "slid_loss": 0.6396, + "step": 5676, + "time": 13.65 + }, + { + "epoch": 2.84, + "learning_rate": "1.4677e-04", + "loss": 0.6533, + "slid_loss": 0.6395, + "step": 5677, + "time": 13.27 + }, + { + "epoch": 2.84, + "learning_rate": "1.4675e-04", + "loss": 0.5901, + "slid_loss": 0.6407, + "step": 5678, + "time": 13.92 + }, + { + "epoch": 2.84, + "learning_rate": "1.4673e-04", + "loss": 0.5683, + "slid_loss": 0.6393, + "step": 5679, + "time": 13.18 + }, + { + "epoch": 2.84, + "learning_rate": "1.4672e-04", + "loss": 0.5946, + "slid_loss": 0.6379, + "step": 5680, + "time": 13.22 + }, + { + "epoch": 2.84, + "learning_rate": "1.4670e-04", + "loss": 0.6145, + "slid_loss": 0.6372, + "step": 5681, + "time": 13.38 + }, + { + "epoch": 2.85, + "learning_rate": "1.4668e-04", + "loss": 0.7031, + "slid_loss": 0.6383, + "step": 5682, + "time": 12.9 + }, + { + "epoch": 2.85, + "learning_rate": "1.4666e-04", + "loss": 0.5396, + "slid_loss": 0.637, + "step": 5683, + "time": 10.79 + }, + { + "epoch": 2.85, + "learning_rate": "1.4665e-04", + "loss": 0.5885, + "slid_loss": 0.637, + "step": 5684, + "time": 12.86 + }, + { + "epoch": 2.85, + "learning_rate": "1.4663e-04", + "loss": 0.5758, + "slid_loss": 0.6367, + "step": 5685, + "time": 10.76 + }, + { + "epoch": 2.85, + "learning_rate": "1.4661e-04", + "loss": 0.5709, + "slid_loss": 0.6358, + "step": 5686, + "time": 11.29 + }, + { + "epoch": 2.85, + "learning_rate": "1.4659e-04", + "loss": 0.6961, + "slid_loss": 0.6359, + "step": 5687, + "time": 10.95 + }, + { + "epoch": 2.85, + "learning_rate": "1.4658e-04", + "loss": 0.592, + "slid_loss": 0.6346, + "step": 5688, + "time": 13.32 + }, + { + "epoch": 2.85, + "learning_rate": "1.4656e-04", + "loss": 0.641, + "slid_loss": 0.6356, + "step": 5689, + "time": 11.32 + }, + { + "epoch": 2.85, + "learning_rate": "1.4654e-04", + "loss": 0.7687, + "slid_loss": 0.638, + "step": 5690, + "time": 13.16 + }, + { + "epoch": 2.85, + "learning_rate": "1.4652e-04", + "loss": 0.4987, + "slid_loss": 0.6363, + "step": 5691, + "time": 12.69 + }, + { + "epoch": 2.85, + "learning_rate": "1.4651e-04", + "loss": 0.6335, + "slid_loss": 0.6358, + "step": 5692, + "time": 13.14 + }, + { + "epoch": 2.85, + "learning_rate": "1.4649e-04", + "loss": 0.6013, + "slid_loss": 0.6355, + "step": 5693, + "time": 11.43 + }, + { + "epoch": 2.85, + "learning_rate": "1.4647e-04", + "loss": 0.7408, + "slid_loss": 0.6359, + "step": 5694, + "time": 13.06 + }, + { + "epoch": 2.85, + "learning_rate": "1.4645e-04", + "loss": 0.5083, + "slid_loss": 0.6356, + "step": 5695, + "time": 10.95 + }, + { + "epoch": 2.85, + "learning_rate": "1.4644e-04", + "loss": 0.6516, + "slid_loss": 0.6355, + "step": 5696, + "time": 12.94 + }, + { + "epoch": 2.85, + "learning_rate": "1.4642e-04", + "loss": 0.752, + "slid_loss": 0.6347, + "step": 5697, + "time": 14.47 + }, + { + "epoch": 2.85, + "learning_rate": "1.4640e-04", + "loss": 0.5668, + "slid_loss": 0.6338, + "step": 5698, + "time": 10.83 + }, + { + "epoch": 2.85, + "learning_rate": "1.4638e-04", + "loss": 0.4337, + "slid_loss": 0.6312, + "step": 5699, + "time": 12.95 + }, + { + "epoch": 2.85, + "learning_rate": "1.4637e-04", + "loss": 0.743, + "slid_loss": 0.6337, + "step": 5700, + "time": 12.94 + }, + { + "epoch": 2.85, + "learning_rate": "1.4635e-04", + "loss": 0.622, + "slid_loss": 0.6329, + "step": 5701, + "time": 13.14 + }, + { + "epoch": 2.86, + "learning_rate": "1.4633e-04", + "loss": 0.6788, + "slid_loss": 0.6333, + "step": 5702, + "time": 13.13 + }, + { + "epoch": 2.86, + "learning_rate": "1.4631e-04", + "loss": 0.5653, + "slid_loss": 0.6322, + "step": 5703, + "time": 13.4 + }, + { + "epoch": 2.86, + "learning_rate": "1.4630e-04", + "loss": 0.5929, + "slid_loss": 0.6329, + "step": 5704, + "time": 12.18 + }, + { + "epoch": 2.86, + "learning_rate": "1.4628e-04", + "loss": 0.5724, + "slid_loss": 0.6311, + "step": 5705, + "time": 10.57 + }, + { + "epoch": 2.86, + "learning_rate": "1.4626e-04", + "loss": 0.7368, + "slid_loss": 0.6333, + "step": 5706, + "time": 14.32 + }, + { + "epoch": 2.86, + "learning_rate": "1.4625e-04", + "loss": 0.6423, + "slid_loss": 0.6345, + "step": 5707, + "time": 12.64 + }, + { + "epoch": 2.86, + "learning_rate": "1.4623e-04", + "loss": 0.5811, + "slid_loss": 0.6345, + "step": 5708, + "time": 13.07 + }, + { + "epoch": 2.86, + "learning_rate": "1.4621e-04", + "loss": 0.5685, + "slid_loss": 0.6346, + "step": 5709, + "time": 12.13 + }, + { + "epoch": 2.86, + "learning_rate": "1.4619e-04", + "loss": 0.5719, + "slid_loss": 0.6344, + "step": 5710, + "time": 11.01 + }, + { + "epoch": 2.86, + "learning_rate": "1.4618e-04", + "loss": 0.6312, + "slid_loss": 0.6333, + "step": 5711, + "time": 13.16 + }, + { + "epoch": 2.86, + "learning_rate": "1.4616e-04", + "loss": 0.6021, + "slid_loss": 0.6331, + "step": 5712, + "time": 10.98 + }, + { + "epoch": 2.86, + "learning_rate": "1.4614e-04", + "loss": 0.6043, + "slid_loss": 0.6309, + "step": 5713, + "time": 12.54 + }, + { + "epoch": 2.86, + "learning_rate": "1.4612e-04", + "loss": 0.6015, + "slid_loss": 0.6304, + "step": 5714, + "time": 12.32 + }, + { + "epoch": 2.86, + "learning_rate": "1.4611e-04", + "loss": 0.6294, + "slid_loss": 0.6306, + "step": 5715, + "time": 11.43 + }, + { + "epoch": 2.86, + "learning_rate": "1.4609e-04", + "loss": 0.7042, + "slid_loss": 0.633, + "step": 5716, + "time": 11.57 + }, + { + "epoch": 2.86, + "learning_rate": "1.4607e-04", + "loss": 0.5188, + "slid_loss": 0.6313, + "step": 5717, + "time": 13.34 + }, + { + "epoch": 2.86, + "learning_rate": "1.4605e-04", + "loss": 0.6842, + "slid_loss": 0.6311, + "step": 5718, + "time": 12.67 + }, + { + "epoch": 2.86, + "learning_rate": "1.4604e-04", + "loss": 0.6768, + "slid_loss": 0.6322, + "step": 5719, + "time": 12.34 + }, + { + "epoch": 2.86, + "learning_rate": "1.4602e-04", + "loss": 0.7264, + "slid_loss": 0.6329, + "step": 5720, + "time": 12.57 + }, + { + "epoch": 2.86, + "learning_rate": "1.4600e-04", + "loss": 0.5325, + "slid_loss": 0.6306, + "step": 5721, + "time": 14.08 + }, + { + "epoch": 2.87, + "learning_rate": "1.4598e-04", + "loss": 0.6028, + "slid_loss": 0.6303, + "step": 5722, + "time": 12.24 + }, + { + "epoch": 2.87, + "learning_rate": "1.4597e-04", + "loss": 0.6796, + "slid_loss": 0.6313, + "step": 5723, + "time": 11.67 + }, + { + "epoch": 2.87, + "learning_rate": "1.4595e-04", + "loss": 0.6019, + "slid_loss": 0.6306, + "step": 5724, + "time": 11.16 + }, + { + "epoch": 2.87, + "learning_rate": "1.4593e-04", + "loss": 0.6965, + "slid_loss": 0.6305, + "step": 5725, + "time": 10.78 + }, + { + "epoch": 2.87, + "learning_rate": "1.4591e-04", + "loss": 0.6062, + "slid_loss": 0.6304, + "step": 5726, + "time": 12.22 + }, + { + "epoch": 2.87, + "learning_rate": "1.4590e-04", + "loss": 0.5634, + "slid_loss": 0.6291, + "step": 5727, + "time": 12.84 + }, + { + "epoch": 2.87, + "learning_rate": "1.4588e-04", + "loss": 0.4101, + "slid_loss": 0.6278, + "step": 5728, + "time": 12.82 + }, + { + "epoch": 2.87, + "learning_rate": "1.4586e-04", + "loss": 0.4774, + "slid_loss": 0.6254, + "step": 5729, + "time": 13.11 + }, + { + "epoch": 2.87, + "learning_rate": "1.4584e-04", + "loss": 0.5019, + "slid_loss": 0.6229, + "step": 5730, + "time": 13.25 + }, + { + "epoch": 2.87, + "learning_rate": "1.4583e-04", + "loss": 0.6673, + "slid_loss": 0.6236, + "step": 5731, + "time": 11.5 + }, + { + "epoch": 2.87, + "learning_rate": "1.4581e-04", + "loss": 0.6466, + "slid_loss": 0.6218, + "step": 5732, + "time": 13.17 + }, + { + "epoch": 2.87, + "learning_rate": "1.4579e-04", + "loss": 0.6169, + "slid_loss": 0.6216, + "step": 5733, + "time": 13.35 + }, + { + "epoch": 2.87, + "learning_rate": "1.4577e-04", + "loss": 0.4835, + "slid_loss": 0.6192, + "step": 5734, + "time": 12.86 + }, + { + "epoch": 2.87, + "learning_rate": "1.4576e-04", + "loss": 0.5105, + "slid_loss": 0.6183, + "step": 5735, + "time": 12.84 + }, + { + "epoch": 2.87, + "learning_rate": "1.4574e-04", + "loss": 0.5826, + "slid_loss": 0.6196, + "step": 5736, + "time": 13.33 + }, + { + "epoch": 2.87, + "learning_rate": "1.4572e-04", + "loss": 0.5175, + "slid_loss": 0.6181, + "step": 5737, + "time": 14.12 + }, + { + "epoch": 2.87, + "learning_rate": "1.4571e-04", + "loss": 0.6546, + "slid_loss": 0.6188, + "step": 5738, + "time": 11.11 + }, + { + "epoch": 2.87, + "learning_rate": "1.4569e-04", + "loss": 0.7062, + "slid_loss": 0.6205, + "step": 5739, + "time": 12.07 + }, + { + "epoch": 2.87, + "learning_rate": "1.4567e-04", + "loss": 0.7521, + "slid_loss": 0.6216, + "step": 5740, + "time": 14.21 + }, + { + "epoch": 2.87, + "learning_rate": "1.4565e-04", + "loss": 0.6838, + "slid_loss": 0.6235, + "step": 5741, + "time": 12.11 + }, + { + "epoch": 2.88, + "learning_rate": "1.4564e-04", + "loss": 0.4426, + "slid_loss": 0.6201, + "step": 5742, + "time": 11.4 + }, + { + "epoch": 2.88, + "learning_rate": "1.4562e-04", + "loss": 0.6379, + "slid_loss": 0.6194, + "step": 5743, + "time": 12.46 + }, + { + "epoch": 2.88, + "learning_rate": "1.4560e-04", + "loss": 0.5629, + "slid_loss": 0.6191, + "step": 5744, + "time": 11.71 + }, + { + "epoch": 2.88, + "learning_rate": "1.4558e-04", + "loss": 0.4988, + "slid_loss": 0.6184, + "step": 5745, + "time": 13.32 + }, + { + "epoch": 2.88, + "learning_rate": "1.4557e-04", + "loss": 0.5123, + "slid_loss": 0.6175, + "step": 5746, + "time": 13.88 + }, + { + "epoch": 2.88, + "learning_rate": "1.4555e-04", + "loss": 0.7251, + "slid_loss": 0.618, + "step": 5747, + "time": 12.85 + }, + { + "epoch": 2.88, + "learning_rate": "1.4553e-04", + "loss": 0.6151, + "slid_loss": 0.6185, + "step": 5748, + "time": 12.84 + }, + { + "epoch": 2.88, + "learning_rate": "1.4551e-04", + "loss": 0.7481, + "slid_loss": 0.6179, + "step": 5749, + "time": 12.73 + }, + { + "epoch": 2.88, + "learning_rate": "1.4550e-04", + "loss": 0.5892, + "slid_loss": 0.6161, + "step": 5750, + "time": 13.33 + }, + { + "epoch": 2.88, + "learning_rate": "1.4548e-04", + "loss": 0.6847, + "slid_loss": 0.6168, + "step": 5751, + "time": 13.82 + }, + { + "epoch": 2.88, + "learning_rate": "1.4546e-04", + "loss": 0.7127, + "slid_loss": 0.6176, + "step": 5752, + "time": 10.5 + }, + { + "epoch": 2.88, + "learning_rate": "1.4544e-04", + "loss": 0.7191, + "slid_loss": 0.6187, + "step": 5753, + "time": 13.03 + }, + { + "epoch": 2.88, + "learning_rate": "1.4543e-04", + "loss": 0.6756, + "slid_loss": 0.6176, + "step": 5754, + "time": 13.16 + }, + { + "epoch": 2.88, + "learning_rate": "1.4541e-04", + "loss": 0.5472, + "slid_loss": 0.6158, + "step": 5755, + "time": 13.19 + }, + { + "epoch": 2.88, + "learning_rate": "1.4539e-04", + "loss": 0.7134, + "slid_loss": 0.617, + "step": 5756, + "time": 12.94 + }, + { + "epoch": 2.88, + "learning_rate": "1.4537e-04", + "loss": 0.6768, + "slid_loss": 0.6168, + "step": 5757, + "time": 11.36 + }, + { + "epoch": 2.88, + "learning_rate": "1.4536e-04", + "loss": 0.6964, + "slid_loss": 0.6175, + "step": 5758, + "time": 13.33 + }, + { + "epoch": 2.88, + "learning_rate": "1.4534e-04", + "loss": 0.6288, + "slid_loss": 0.6184, + "step": 5759, + "time": 12.79 + }, + { + "epoch": 2.88, + "learning_rate": "1.4532e-04", + "loss": 0.6951, + "slid_loss": 0.6183, + "step": 5760, + "time": 11.7 + }, + { + "epoch": 2.88, + "learning_rate": "1.4530e-04", + "loss": 0.584, + "slid_loss": 0.6187, + "step": 5761, + "time": 12.78 + }, + { + "epoch": 2.89, + "learning_rate": "1.4529e-04", + "loss": 0.6018, + "slid_loss": 0.6189, + "step": 5762, + "time": 11.53 + }, + { + "epoch": 2.89, + "learning_rate": "1.4527e-04", + "loss": 0.462, + "slid_loss": 0.6167, + "step": 5763, + "time": 13.39 + }, + { + "epoch": 2.89, + "learning_rate": "1.4525e-04", + "loss": 0.4877, + "slid_loss": 0.6163, + "step": 5764, + "time": 11.67 + }, + { + "epoch": 2.89, + "learning_rate": "1.4524e-04", + "loss": 0.6346, + "slid_loss": 0.6169, + "step": 5765, + "time": 12.79 + }, + { + "epoch": 2.89, + "learning_rate": "1.4522e-04", + "loss": 0.7229, + "slid_loss": 0.6189, + "step": 5766, + "time": 11.8 + }, + { + "epoch": 2.89, + "learning_rate": "1.4520e-04", + "loss": 0.6273, + "slid_loss": 0.6178, + "step": 5767, + "time": 11.95 + }, + { + "epoch": 2.89, + "learning_rate": "1.4518e-04", + "loss": 0.7077, + "slid_loss": 0.6185, + "step": 5768, + "time": 12.72 + }, + { + "epoch": 2.89, + "learning_rate": "1.4517e-04", + "loss": 0.5978, + "slid_loss": 0.6176, + "step": 5769, + "time": 11.96 + }, + { + "epoch": 2.89, + "learning_rate": "1.4515e-04", + "loss": 0.6764, + "slid_loss": 0.6174, + "step": 5770, + "time": 11.29 + }, + { + "epoch": 2.89, + "learning_rate": "1.4513e-04", + "loss": 0.6163, + "slid_loss": 0.6171, + "step": 5771, + "time": 11.85 + }, + { + "epoch": 2.89, + "learning_rate": "1.4511e-04", + "loss": 0.5189, + "slid_loss": 0.6174, + "step": 5772, + "time": 10.73 + }, + { + "epoch": 2.89, + "learning_rate": "1.4510e-04", + "loss": 0.5588, + "slid_loss": 0.6168, + "step": 5773, + "time": 13.34 + }, + { + "epoch": 2.89, + "learning_rate": "1.4508e-04", + "loss": 0.7046, + "slid_loss": 0.617, + "step": 5774, + "time": 12.75 + }, + { + "epoch": 2.89, + "learning_rate": "1.4506e-04", + "loss": 0.5512, + "slid_loss": 0.6169, + "step": 5775, + "time": 12.88 + }, + { + "epoch": 2.89, + "learning_rate": "1.4504e-04", + "loss": 0.5942, + "slid_loss": 0.6156, + "step": 5776, + "time": 13.63 + }, + { + "epoch": 2.89, + "learning_rate": "1.4503e-04", + "loss": 0.6762, + "slid_loss": 0.6158, + "step": 5777, + "time": 10.8 + }, + { + "epoch": 2.89, + "learning_rate": "1.4501e-04", + "loss": 0.7479, + "slid_loss": 0.6174, + "step": 5778, + "time": 10.89 + }, + { + "epoch": 2.89, + "learning_rate": "1.4499e-04", + "loss": 0.522, + "slid_loss": 0.6169, + "step": 5779, + "time": 12.35 + }, + { + "epoch": 2.89, + "learning_rate": "1.4497e-04", + "loss": 0.6248, + "slid_loss": 0.6172, + "step": 5780, + "time": 13.1 + }, + { + "epoch": 2.89, + "learning_rate": "1.4496e-04", + "loss": 0.6985, + "slid_loss": 0.6181, + "step": 5781, + "time": 13.46 + }, + { + "epoch": 2.9, + "learning_rate": "1.4494e-04", + "loss": 0.7163, + "slid_loss": 0.6182, + "step": 5782, + "time": 13.09 + }, + { + "epoch": 2.9, + "learning_rate": "1.4492e-04", + "loss": 0.6242, + "slid_loss": 0.6191, + "step": 5783, + "time": 12.73 + }, + { + "epoch": 2.9, + "learning_rate": "1.4490e-04", + "loss": 0.7105, + "slid_loss": 0.6203, + "step": 5784, + "time": 11.37 + }, + { + "epoch": 2.9, + "learning_rate": "1.4489e-04", + "loss": 0.5926, + "slid_loss": 0.6204, + "step": 5785, + "time": 13.77 + }, + { + "epoch": 2.9, + "learning_rate": "1.4487e-04", + "loss": 0.4696, + "slid_loss": 0.6194, + "step": 5786, + "time": 14.32 + }, + { + "epoch": 2.9, + "learning_rate": "1.4485e-04", + "loss": 0.5995, + "slid_loss": 0.6185, + "step": 5787, + "time": 13.76 + }, + { + "epoch": 2.9, + "learning_rate": "1.4484e-04", + "loss": 0.51, + "slid_loss": 0.6176, + "step": 5788, + "time": 12.8 + }, + { + "epoch": 2.9, + "learning_rate": "1.4482e-04", + "loss": 0.5833, + "slid_loss": 0.6171, + "step": 5789, + "time": 13.58 + }, + { + "epoch": 2.9, + "learning_rate": "1.4480e-04", + "loss": 0.5707, + "slid_loss": 0.6151, + "step": 5790, + "time": 13.6 + }, + { + "epoch": 2.9, + "learning_rate": "1.4478e-04", + "loss": 0.7784, + "slid_loss": 0.6179, + "step": 5791, + "time": 11.71 + }, + { + "epoch": 2.9, + "learning_rate": "1.4477e-04", + "loss": 0.5344, + "slid_loss": 0.6169, + "step": 5792, + "time": 12.03 + }, + { + "epoch": 2.9, + "learning_rate": "1.4475e-04", + "loss": 0.7021, + "slid_loss": 0.6179, + "step": 5793, + "time": 12.88 + }, + { + "epoch": 2.9, + "learning_rate": "1.4473e-04", + "loss": 0.4975, + "slid_loss": 0.6155, + "step": 5794, + "time": 11.84 + }, + { + "epoch": 2.9, + "learning_rate": "1.4471e-04", + "loss": 0.5638, + "slid_loss": 0.616, + "step": 5795, + "time": 13.09 + }, + { + "epoch": 2.9, + "learning_rate": "1.4470e-04", + "loss": 0.6071, + "slid_loss": 0.6156, + "step": 5796, + "time": 13.82 + }, + { + "epoch": 2.9, + "learning_rate": "1.4468e-04", + "loss": 0.6247, + "slid_loss": 0.6143, + "step": 5797, + "time": 13.19 + }, + { + "epoch": 2.9, + "learning_rate": "1.4466e-04", + "loss": 0.5502, + "slid_loss": 0.6141, + "step": 5798, + "time": 13.32 + }, + { + "epoch": 2.9, + "learning_rate": "1.4464e-04", + "loss": 0.664, + "slid_loss": 0.6164, + "step": 5799, + "time": 13.44 + }, + { + "epoch": 2.9, + "learning_rate": "1.4463e-04", + "loss": 0.5164, + "slid_loss": 0.6142, + "step": 5800, + "time": 11.62 + }, + { + "epoch": 2.9, + "learning_rate": "1.4461e-04", + "loss": 0.5908, + "slid_loss": 0.6139, + "step": 5801, + "time": 11.32 + }, + { + "epoch": 2.91, + "learning_rate": "1.4459e-04", + "loss": 0.7096, + "slid_loss": 0.6142, + "step": 5802, + "time": 14.17 + }, + { + "epoch": 2.91, + "learning_rate": "1.4457e-04", + "loss": 0.7136, + "slid_loss": 0.6157, + "step": 5803, + "time": 13.87 + }, + { + "epoch": 2.91, + "learning_rate": "1.4456e-04", + "loss": 0.5622, + "slid_loss": 0.6153, + "step": 5804, + "time": 12.17 + }, + { + "epoch": 2.91, + "learning_rate": "1.4454e-04", + "loss": 0.6129, + "slid_loss": 0.6158, + "step": 5805, + "time": 12.15 + }, + { + "epoch": 2.91, + "learning_rate": "1.4452e-04", + "loss": 0.6627, + "slid_loss": 0.615, + "step": 5806, + "time": 11.39 + }, + { + "epoch": 2.91, + "learning_rate": "1.4450e-04", + "loss": 0.7464, + "slid_loss": 0.6161, + "step": 5807, + "time": 13.43 + }, + { + "epoch": 2.91, + "learning_rate": "1.4449e-04", + "loss": 0.4635, + "slid_loss": 0.6149, + "step": 5808, + "time": 14.41 + }, + { + "epoch": 2.91, + "learning_rate": "1.4447e-04", + "loss": 0.5906, + "slid_loss": 0.6151, + "step": 5809, + "time": 11.05 + }, + { + "epoch": 2.91, + "learning_rate": "1.4445e-04", + "loss": 0.5679, + "slid_loss": 0.6151, + "step": 5810, + "time": 13.32 + }, + { + "epoch": 2.91, + "learning_rate": "1.4444e-04", + "loss": 0.6498, + "slid_loss": 0.6152, + "step": 5811, + "time": 13.43 + }, + { + "epoch": 2.91, + "learning_rate": "1.4442e-04", + "loss": 0.7098, + "slid_loss": 0.6163, + "step": 5812, + "time": 13.22 + }, + { + "epoch": 2.91, + "learning_rate": "1.4440e-04", + "loss": 0.6444, + "slid_loss": 0.6167, + "step": 5813, + "time": 11.7 + }, + { + "epoch": 2.91, + "learning_rate": "1.4438e-04", + "loss": 0.6881, + "slid_loss": 0.6176, + "step": 5814, + "time": 13.06 + }, + { + "epoch": 2.91, + "learning_rate": "1.4437e-04", + "loss": 0.5831, + "slid_loss": 0.6171, + "step": 5815, + "time": 12.77 + }, + { + "epoch": 2.91, + "learning_rate": "1.4435e-04", + "loss": 0.534, + "slid_loss": 0.6154, + "step": 5816, + "time": 11.24 + }, + { + "epoch": 2.91, + "learning_rate": "1.4433e-04", + "loss": 0.5739, + "slid_loss": 0.616, + "step": 5817, + "time": 13.96 + }, + { + "epoch": 2.91, + "learning_rate": "1.4431e-04", + "loss": 0.6151, + "slid_loss": 0.6153, + "step": 5818, + "time": 13.47 + }, + { + "epoch": 2.91, + "learning_rate": "1.4430e-04", + "loss": 0.5349, + "slid_loss": 0.6139, + "step": 5819, + "time": 13.36 + }, + { + "epoch": 2.91, + "learning_rate": "1.4428e-04", + "loss": 0.6359, + "slid_loss": 0.613, + "step": 5820, + "time": 12.84 + }, + { + "epoch": 2.91, + "learning_rate": "1.4426e-04", + "loss": 0.604, + "slid_loss": 0.6137, + "step": 5821, + "time": 11.76 + }, + { + "epoch": 2.92, + "learning_rate": "1.4424e-04", + "loss": 0.7379, + "slid_loss": 0.615, + "step": 5822, + "time": 13.72 + }, + { + "epoch": 2.92, + "learning_rate": "1.4423e-04", + "loss": 0.4859, + "slid_loss": 0.6131, + "step": 5823, + "time": 10.84 + }, + { + "epoch": 2.92, + "learning_rate": "1.4421e-04", + "loss": 0.492, + "slid_loss": 0.612, + "step": 5824, + "time": 13.4 + }, + { + "epoch": 2.92, + "learning_rate": "1.4419e-04", + "loss": 0.5785, + "slid_loss": 0.6108, + "step": 5825, + "time": 11.8 + }, + { + "epoch": 2.92, + "learning_rate": "1.4417e-04", + "loss": 0.6764, + "slid_loss": 0.6115, + "step": 5826, + "time": 11.53 + }, + { + "epoch": 2.92, + "learning_rate": "1.4416e-04", + "loss": 0.614, + "slid_loss": 0.612, + "step": 5827, + "time": 12.28 + }, + { + "epoch": 2.92, + "learning_rate": "1.4414e-04", + "loss": 0.572, + "slid_loss": 0.6136, + "step": 5828, + "time": 11.38 + }, + { + "epoch": 2.92, + "learning_rate": "1.4412e-04", + "loss": 0.4877, + "slid_loss": 0.6137, + "step": 5829, + "time": 11.71 + }, + { + "epoch": 2.92, + "learning_rate": "1.4411e-04", + "loss": 0.6949, + "slid_loss": 0.6157, + "step": 5830, + "time": 11.86 + }, + { + "epoch": 2.92, + "learning_rate": "1.4409e-04", + "loss": 0.6962, + "slid_loss": 0.616, + "step": 5831, + "time": 11.03 + }, + { + "epoch": 2.92, + "learning_rate": "1.4407e-04", + "loss": 0.5713, + "slid_loss": 0.6152, + "step": 5832, + "time": 13.82 + }, + { + "epoch": 2.92, + "learning_rate": "1.4405e-04", + "loss": 0.5595, + "slid_loss": 0.6146, + "step": 5833, + "time": 13.18 + }, + { + "epoch": 2.92, + "learning_rate": "1.4404e-04", + "loss": 0.7247, + "slid_loss": 0.617, + "step": 5834, + "time": 11.45 + }, + { + "epoch": 2.92, + "learning_rate": "1.4402e-04", + "loss": 0.6881, + "slid_loss": 0.6188, + "step": 5835, + "time": 12.99 + }, + { + "epoch": 2.92, + "learning_rate": "1.4400e-04", + "loss": 0.5957, + "slid_loss": 0.619, + "step": 5836, + "time": 12.86 + }, + { + "epoch": 2.92, + "learning_rate": "1.4398e-04", + "loss": 0.5075, + "slid_loss": 0.6189, + "step": 5837, + "time": 11.85 + }, + { + "epoch": 2.92, + "learning_rate": "1.4397e-04", + "loss": 0.5614, + "slid_loss": 0.6179, + "step": 5838, + "time": 11.14 + }, + { + "epoch": 2.92, + "learning_rate": "1.4395e-04", + "loss": 0.7024, + "slid_loss": 0.6179, + "step": 5839, + "time": 11.69 + }, + { + "epoch": 2.92, + "learning_rate": "1.4393e-04", + "loss": 0.5669, + "slid_loss": 0.616, + "step": 5840, + "time": 12.26 + }, + { + "epoch": 2.92, + "learning_rate": "1.4391e-04", + "loss": 0.5766, + "slid_loss": 0.615, + "step": 5841, + "time": 13.14 + }, + { + "epoch": 2.93, + "learning_rate": "1.4390e-04", + "loss": 0.6992, + "slid_loss": 0.6175, + "step": 5842, + "time": 13.38 + }, + { + "epoch": 2.93, + "learning_rate": "1.4388e-04", + "loss": 0.6606, + "slid_loss": 0.6178, + "step": 5843, + "time": 12.21 + }, + { + "epoch": 2.93, + "learning_rate": "1.4386e-04", + "loss": 0.6652, + "slid_loss": 0.6188, + "step": 5844, + "time": 14.32 + }, + { + "epoch": 2.93, + "learning_rate": "1.4385e-04", + "loss": 0.7556, + "slid_loss": 0.6213, + "step": 5845, + "time": 13.35 + }, + { + "epoch": 2.93, + "learning_rate": "1.4383e-04", + "loss": 0.754, + "slid_loss": 0.6238, + "step": 5846, + "time": 12.88 + }, + { + "epoch": 2.93, + "learning_rate": "1.4381e-04", + "loss": 0.5681, + "slid_loss": 0.6222, + "step": 5847, + "time": 11.71 + }, + { + "epoch": 2.93, + "learning_rate": "1.4379e-04", + "loss": 0.5342, + "slid_loss": 0.6214, + "step": 5848, + "time": 13.38 + }, + { + "epoch": 2.93, + "learning_rate": "1.4378e-04", + "loss": 0.6872, + "slid_loss": 0.6208, + "step": 5849, + "time": 13.73 + }, + { + "epoch": 2.93, + "learning_rate": "1.4376e-04", + "loss": 0.7743, + "slid_loss": 0.6226, + "step": 5850, + "time": 13.4 + }, + { + "epoch": 2.93, + "learning_rate": "1.4374e-04", + "loss": 0.5953, + "slid_loss": 0.6217, + "step": 5851, + "time": 13.75 + }, + { + "epoch": 2.93, + "learning_rate": "1.4372e-04", + "loss": 0.5856, + "slid_loss": 0.6205, + "step": 5852, + "time": 13.73 + }, + { + "epoch": 2.93, + "learning_rate": "1.4371e-04", + "loss": 0.5777, + "slid_loss": 0.619, + "step": 5853, + "time": 11.85 + }, + { + "epoch": 2.93, + "learning_rate": "1.4369e-04", + "loss": 0.4217, + "slid_loss": 0.6165, + "step": 5854, + "time": 11.93 + }, + { + "epoch": 2.93, + "learning_rate": "1.4367e-04", + "loss": 0.6829, + "slid_loss": 0.6179, + "step": 5855, + "time": 13.13 + }, + { + "epoch": 2.93, + "learning_rate": "1.4365e-04", + "loss": 0.408, + "slid_loss": 0.6148, + "step": 5856, + "time": 10.9 + }, + { + "epoch": 2.93, + "learning_rate": "1.4364e-04", + "loss": 0.5446, + "slid_loss": 0.6135, + "step": 5857, + "time": 13.68 + }, + { + "epoch": 2.93, + "learning_rate": "1.4362e-04", + "loss": 0.7052, + "slid_loss": 0.6136, + "step": 5858, + "time": 11.33 + }, + { + "epoch": 2.93, + "learning_rate": "1.4360e-04", + "loss": 0.7606, + "slid_loss": 0.6149, + "step": 5859, + "time": 13.02 + }, + { + "epoch": 2.93, + "learning_rate": "1.4358e-04", + "loss": 0.7283, + "slid_loss": 0.6152, + "step": 5860, + "time": 12.91 + }, + { + "epoch": 2.93, + "learning_rate": "1.4357e-04", + "loss": 0.6774, + "slid_loss": 0.6162, + "step": 5861, + "time": 12.63 + }, + { + "epoch": 2.94, + "learning_rate": "1.4355e-04", + "loss": 0.5815, + "slid_loss": 0.616, + "step": 5862, + "time": 11.09 + }, + { + "epoch": 2.94, + "learning_rate": "1.4353e-04", + "loss": 0.4879, + "slid_loss": 0.6162, + "step": 5863, + "time": 13.86 + }, + { + "epoch": 2.94, + "learning_rate": "1.4352e-04", + "loss": 0.6011, + "slid_loss": 0.6173, + "step": 5864, + "time": 11.61 + }, + { + "epoch": 2.94, + "learning_rate": "1.4350e-04", + "loss": 0.7305, + "slid_loss": 0.6183, + "step": 5865, + "time": 12.79 + }, + { + "epoch": 2.94, + "learning_rate": "1.4348e-04", + "loss": 0.6998, + "slid_loss": 0.6181, + "step": 5866, + "time": 13.86 + }, + { + "epoch": 2.94, + "learning_rate": "1.4346e-04", + "loss": 0.6075, + "slid_loss": 0.6179, + "step": 5867, + "time": 11.37 + }, + { + "epoch": 2.94, + "learning_rate": "1.4345e-04", + "loss": 0.5417, + "slid_loss": 0.6162, + "step": 5868, + "time": 11.88 + }, + { + "epoch": 2.94, + "learning_rate": "1.4343e-04", + "loss": 0.6936, + "slid_loss": 0.6172, + "step": 5869, + "time": 10.54 + }, + { + "epoch": 2.94, + "learning_rate": "1.4341e-04", + "loss": 0.704, + "slid_loss": 0.6174, + "step": 5870, + "time": 13.31 + }, + { + "epoch": 2.94, + "learning_rate": "1.4339e-04", + "loss": 0.7129, + "slid_loss": 0.6184, + "step": 5871, + "time": 11.82 + }, + { + "epoch": 2.94, + "learning_rate": "1.4338e-04", + "loss": 0.8444, + "slid_loss": 0.6217, + "step": 5872, + "time": 10.73 + }, + { + "epoch": 2.94, + "learning_rate": "1.4336e-04", + "loss": 0.6344, + "slid_loss": 0.6224, + "step": 5873, + "time": 11.45 + }, + { + "epoch": 2.94, + "learning_rate": "1.4334e-04", + "loss": 0.5868, + "slid_loss": 0.6212, + "step": 5874, + "time": 12.25 + }, + { + "epoch": 2.94, + "learning_rate": "1.4332e-04", + "loss": 0.4699, + "slid_loss": 0.6204, + "step": 5875, + "time": 11.37 + }, + { + "epoch": 2.94, + "learning_rate": "1.4331e-04", + "loss": 0.5173, + "slid_loss": 0.6197, + "step": 5876, + "time": 13.6 + }, + { + "epoch": 2.94, + "learning_rate": "1.4329e-04", + "loss": 0.6834, + "slid_loss": 0.6197, + "step": 5877, + "time": 12.59 + }, + { + "epoch": 2.94, + "learning_rate": "1.4327e-04", + "loss": 0.6056, + "slid_loss": 0.6183, + "step": 5878, + "time": 11.61 + }, + { + "epoch": 2.94, + "learning_rate": "1.4326e-04", + "loss": 0.7216, + "slid_loss": 0.6203, + "step": 5879, + "time": 10.54 + }, + { + "epoch": 2.94, + "learning_rate": "1.4324e-04", + "loss": 0.685, + "slid_loss": 0.6209, + "step": 5880, + "time": 12.88 + }, + { + "epoch": 2.94, + "learning_rate": "1.4322e-04", + "loss": 0.636, + "slid_loss": 0.6203, + "step": 5881, + "time": 13.25 + }, + { + "epoch": 2.95, + "learning_rate": "1.4320e-04", + "loss": 0.6623, + "slid_loss": 0.6198, + "step": 5882, + "time": 14.09 + }, + { + "epoch": 2.95, + "learning_rate": "1.4319e-04", + "loss": 0.6451, + "slid_loss": 0.62, + "step": 5883, + "time": 12.35 + }, + { + "epoch": 2.95, + "learning_rate": "1.4317e-04", + "loss": 0.5325, + "slid_loss": 0.6182, + "step": 5884, + "time": 12.14 + }, + { + "epoch": 2.95, + "learning_rate": "1.4315e-04", + "loss": 0.6494, + "slid_loss": 0.6187, + "step": 5885, + "time": 13.33 + }, + { + "epoch": 2.95, + "learning_rate": "1.4313e-04", + "loss": 0.539, + "slid_loss": 0.6194, + "step": 5886, + "time": 11.89 + }, + { + "epoch": 2.95, + "learning_rate": "1.4312e-04", + "loss": 0.6124, + "slid_loss": 0.6196, + "step": 5887, + "time": 13.98 + }, + { + "epoch": 2.95, + "learning_rate": "1.4310e-04", + "loss": 0.641, + "slid_loss": 0.6209, + "step": 5888, + "time": 11.04 + }, + { + "epoch": 2.95, + "learning_rate": "1.4308e-04", + "loss": 0.8672, + "slid_loss": 0.6237, + "step": 5889, + "time": 13.58 + }, + { + "epoch": 2.95, + "learning_rate": "1.4307e-04", + "loss": 0.6834, + "slid_loss": 0.6248, + "step": 5890, + "time": 14.2 + }, + { + "epoch": 2.95, + "learning_rate": "1.4305e-04", + "loss": 0.6711, + "slid_loss": 0.6238, + "step": 5891, + "time": 11.43 + }, + { + "epoch": 2.95, + "learning_rate": "1.4303e-04", + "loss": 0.6971, + "slid_loss": 0.6254, + "step": 5892, + "time": 13.03 + }, + { + "epoch": 2.95, + "learning_rate": "1.4301e-04", + "loss": 0.6962, + "slid_loss": 0.6253, + "step": 5893, + "time": 13.29 + }, + { + "epoch": 2.95, + "learning_rate": "1.4300e-04", + "loss": 0.6882, + "slid_loss": 0.6273, + "step": 5894, + "time": 13.01 + }, + { + "epoch": 2.95, + "learning_rate": "1.4298e-04", + "loss": 0.5244, + "slid_loss": 0.6269, + "step": 5895, + "time": 13.35 + }, + { + "epoch": 2.95, + "learning_rate": "1.4296e-04", + "loss": 0.5677, + "slid_loss": 0.6265, + "step": 5896, + "time": 14.31 + }, + { + "epoch": 2.95, + "learning_rate": "1.4294e-04", + "loss": 0.5842, + "slid_loss": 0.6261, + "step": 5897, + "time": 10.89 + }, + { + "epoch": 2.95, + "learning_rate": "1.4293e-04", + "loss": 0.6407, + "slid_loss": 0.627, + "step": 5898, + "time": 12.73 + }, + { + "epoch": 2.95, + "learning_rate": "1.4291e-04", + "loss": 0.6147, + "slid_loss": 0.6265, + "step": 5899, + "time": 11.78 + }, + { + "epoch": 2.95, + "learning_rate": "1.4289e-04", + "loss": 0.6349, + "slid_loss": 0.6277, + "step": 5900, + "time": 14.4 + }, + { + "epoch": 2.95, + "learning_rate": "1.4287e-04", + "loss": 0.5272, + "slid_loss": 0.627, + "step": 5901, + "time": 11.23 + }, + { + "epoch": 2.96, + "learning_rate": "1.4286e-04", + "loss": 0.6097, + "slid_loss": 0.626, + "step": 5902, + "time": 12.16 + }, + { + "epoch": 2.96, + "learning_rate": "1.4284e-04", + "loss": 0.6919, + "slid_loss": 0.6258, + "step": 5903, + "time": 13.55 + }, + { + "epoch": 2.96, + "learning_rate": "1.4282e-04", + "loss": 0.5724, + "slid_loss": 0.6259, + "step": 5904, + "time": 11.0 + }, + { + "epoch": 2.96, + "learning_rate": "1.4281e-04", + "loss": 0.6312, + "slid_loss": 0.6261, + "step": 5905, + "time": 12.02 + }, + { + "epoch": 2.96, + "learning_rate": "1.4279e-04", + "loss": 0.5739, + "slid_loss": 0.6252, + "step": 5906, + "time": 13.66 + }, + { + "epoch": 2.96, + "learning_rate": "1.4277e-04", + "loss": 0.4904, + "slid_loss": 0.6226, + "step": 5907, + "time": 11.43 + }, + { + "epoch": 2.96, + "learning_rate": "1.4275e-04", + "loss": 0.5309, + "slid_loss": 0.6233, + "step": 5908, + "time": 10.96 + }, + { + "epoch": 2.96, + "learning_rate": "1.4274e-04", + "loss": 0.539, + "slid_loss": 0.6228, + "step": 5909, + "time": 13.15 + }, + { + "epoch": 2.96, + "learning_rate": "1.4272e-04", + "loss": 0.5975, + "slid_loss": 0.6231, + "step": 5910, + "time": 14.5 + }, + { + "epoch": 2.96, + "learning_rate": "1.4270e-04", + "loss": 0.5236, + "slid_loss": 0.6218, + "step": 5911, + "time": 13.09 + }, + { + "epoch": 2.96, + "learning_rate": "1.4268e-04", + "loss": 0.5717, + "slid_loss": 0.6205, + "step": 5912, + "time": 10.34 + }, + { + "epoch": 2.96, + "learning_rate": "1.4267e-04", + "loss": 0.6035, + "slid_loss": 0.62, + "step": 5913, + "time": 13.83 + }, + { + "epoch": 2.96, + "learning_rate": "1.4265e-04", + "loss": 0.641, + "slid_loss": 0.6196, + "step": 5914, + "time": 11.85 + }, + { + "epoch": 2.96, + "learning_rate": "1.4263e-04", + "loss": 0.5429, + "slid_loss": 0.6192, + "step": 5915, + "time": 12.9 + }, + { + "epoch": 2.96, + "learning_rate": "1.4262e-04", + "loss": 0.6071, + "slid_loss": 0.6199, + "step": 5916, + "time": 13.28 + }, + { + "epoch": 2.96, + "learning_rate": "1.4260e-04", + "loss": 0.7112, + "slid_loss": 0.6213, + "step": 5917, + "time": 13.64 + }, + { + "epoch": 2.96, + "learning_rate": "1.4258e-04", + "loss": 0.615, + "slid_loss": 0.6213, + "step": 5918, + "time": 13.33 + }, + { + "epoch": 2.96, + "learning_rate": "1.4256e-04", + "loss": 0.7038, + "slid_loss": 0.623, + "step": 5919, + "time": 12.79 + }, + { + "epoch": 2.96, + "learning_rate": "1.4255e-04", + "loss": 0.5985, + "slid_loss": 0.6226, + "step": 5920, + "time": 11.43 + }, + { + "epoch": 2.96, + "learning_rate": "1.4253e-04", + "loss": 0.5926, + "slid_loss": 0.6225, + "step": 5921, + "time": 11.68 + }, + { + "epoch": 2.97, + "learning_rate": "1.4251e-04", + "loss": 0.591, + "slid_loss": 0.621, + "step": 5922, + "time": 11.22 + }, + { + "epoch": 2.97, + "learning_rate": "1.4249e-04", + "loss": 0.5112, + "slid_loss": 0.6213, + "step": 5923, + "time": 13.68 + }, + { + "epoch": 2.97, + "learning_rate": "1.4248e-04", + "loss": 0.4709, + "slid_loss": 0.621, + "step": 5924, + "time": 11.01 + }, + { + "epoch": 2.97, + "learning_rate": "1.4246e-04", + "loss": 0.5543, + "slid_loss": 0.6208, + "step": 5925, + "time": 11.77 + }, + { + "epoch": 2.97, + "learning_rate": "1.4244e-04", + "loss": 0.3819, + "slid_loss": 0.6179, + "step": 5926, + "time": 11.14 + }, + { + "epoch": 2.97, + "learning_rate": "1.4243e-04", + "loss": 0.7091, + "slid_loss": 0.6188, + "step": 5927, + "time": 11.27 + }, + { + "epoch": 2.97, + "learning_rate": "1.4241e-04", + "loss": 0.6221, + "slid_loss": 0.6193, + "step": 5928, + "time": 12.29 + }, + { + "epoch": 2.97, + "learning_rate": "1.4239e-04", + "loss": 0.6015, + "slid_loss": 0.6204, + "step": 5929, + "time": 12.89 + }, + { + "epoch": 2.97, + "learning_rate": "1.4237e-04", + "loss": 0.5822, + "slid_loss": 0.6193, + "step": 5930, + "time": 12.04 + }, + { + "epoch": 2.97, + "learning_rate": "1.4236e-04", + "loss": 0.6457, + "slid_loss": 0.6188, + "step": 5931, + "time": 11.36 + }, + { + "epoch": 2.97, + "learning_rate": "1.4234e-04", + "loss": 0.698, + "slid_loss": 0.6201, + "step": 5932, + "time": 13.22 + }, + { + "epoch": 2.97, + "learning_rate": "1.4232e-04", + "loss": 0.541, + "slid_loss": 0.6199, + "step": 5933, + "time": 12.34 + }, + { + "epoch": 2.97, + "learning_rate": "1.4230e-04", + "loss": 0.614, + "slid_loss": 0.6188, + "step": 5934, + "time": 14.06 + }, + { + "epoch": 2.97, + "learning_rate": "1.4229e-04", + "loss": 0.5307, + "slid_loss": 0.6172, + "step": 5935, + "time": 13.26 + }, + { + "epoch": 2.97, + "learning_rate": "1.4227e-04", + "loss": 0.591, + "slid_loss": 0.6172, + "step": 5936, + "time": 11.46 + }, + { + "epoch": 2.97, + "learning_rate": "1.4225e-04", + "loss": 0.5849, + "slid_loss": 0.6179, + "step": 5937, + "time": 13.92 + }, + { + "epoch": 2.97, + "learning_rate": "1.4224e-04", + "loss": 0.6359, + "slid_loss": 0.6187, + "step": 5938, + "time": 11.32 + }, + { + "epoch": 2.97, + "learning_rate": "1.4222e-04", + "loss": 0.7401, + "slid_loss": 0.6191, + "step": 5939, + "time": 13.63 + }, + { + "epoch": 2.97, + "learning_rate": "1.4220e-04", + "loss": 0.6823, + "slid_loss": 0.6202, + "step": 5940, + "time": 13.24 + }, + { + "epoch": 2.97, + "learning_rate": "1.4218e-04", + "loss": 0.6508, + "slid_loss": 0.621, + "step": 5941, + "time": 12.21 + }, + { + "epoch": 2.98, + "learning_rate": "1.4217e-04", + "loss": 0.5471, + "slid_loss": 0.6194, + "step": 5942, + "time": 13.34 + }, + { + "epoch": 2.98, + "learning_rate": "1.4215e-04", + "loss": 0.5604, + "slid_loss": 0.6184, + "step": 5943, + "time": 11.8 + }, + { + "epoch": 2.98, + "learning_rate": "1.4213e-04", + "loss": 0.6337, + "slid_loss": 0.6181, + "step": 5944, + "time": 11.3 + }, + { + "epoch": 2.98, + "learning_rate": "1.4211e-04", + "loss": 0.5939, + "slid_loss": 0.6165, + "step": 5945, + "time": 12.82 + }, + { + "epoch": 2.98, + "learning_rate": "1.4210e-04", + "loss": 0.5162, + "slid_loss": 0.6141, + "step": 5946, + "time": 12.76 + }, + { + "epoch": 2.98, + "learning_rate": "1.4208e-04", + "loss": 0.6015, + "slid_loss": 0.6145, + "step": 5947, + "time": 11.6 + }, + { + "epoch": 2.98, + "learning_rate": "1.4206e-04", + "loss": 0.4315, + "slid_loss": 0.6134, + "step": 5948, + "time": 14.13 + }, + { + "epoch": 2.98, + "learning_rate": "1.4205e-04", + "loss": 0.6504, + "slid_loss": 0.6131, + "step": 5949, + "time": 11.13 + }, + { + "epoch": 2.98, + "learning_rate": "1.4203e-04", + "loss": 0.5617, + "slid_loss": 0.6109, + "step": 5950, + "time": 11.48 + }, + { + "epoch": 2.98, + "learning_rate": "1.4201e-04", + "loss": 0.791, + "slid_loss": 0.6129, + "step": 5951, + "time": 13.21 + }, + { + "epoch": 2.98, + "learning_rate": "1.4199e-04", + "loss": 0.412, + "slid_loss": 0.6112, + "step": 5952, + "time": 12.27 + }, + { + "epoch": 2.98, + "learning_rate": "1.4198e-04", + "loss": 0.6056, + "slid_loss": 0.6114, + "step": 5953, + "time": 13.64 + }, + { + "epoch": 2.98, + "learning_rate": "1.4196e-04", + "loss": 0.6276, + "slid_loss": 0.6135, + "step": 5954, + "time": 11.51 + }, + { + "epoch": 2.98, + "learning_rate": "1.4194e-04", + "loss": 0.7025, + "slid_loss": 0.6137, + "step": 5955, + "time": 13.15 + }, + { + "epoch": 2.98, + "learning_rate": "1.4192e-04", + "loss": 0.7176, + "slid_loss": 0.6168, + "step": 5956, + "time": 13.32 + }, + { + "epoch": 2.98, + "learning_rate": "1.4191e-04", + "loss": 0.6062, + "slid_loss": 0.6174, + "step": 5957, + "time": 11.04 + }, + { + "epoch": 2.98, + "learning_rate": "1.4189e-04", + "loss": 0.6513, + "slid_loss": 0.6169, + "step": 5958, + "time": 10.84 + }, + { + "epoch": 2.98, + "learning_rate": "1.4187e-04", + "loss": 0.5859, + "slid_loss": 0.6151, + "step": 5959, + "time": 11.6 + }, + { + "epoch": 2.98, + "learning_rate": "1.4186e-04", + "loss": 0.5068, + "slid_loss": 0.6129, + "step": 5960, + "time": 13.24 + }, + { + "epoch": 2.98, + "learning_rate": "1.4184e-04", + "loss": 0.6028, + "slid_loss": 0.6122, + "step": 5961, + "time": 11.65 + }, + { + "epoch": 2.99, + "learning_rate": "1.4182e-04", + "loss": 0.6925, + "slid_loss": 0.6133, + "step": 5962, + "time": 13.41 + }, + { + "epoch": 2.99, + "learning_rate": "1.4180e-04", + "loss": 0.6927, + "slid_loss": 0.6153, + "step": 5963, + "time": 12.82 + }, + { + "epoch": 2.99, + "learning_rate": "1.4179e-04", + "loss": 0.7083, + "slid_loss": 0.6164, + "step": 5964, + "time": 14.05 + }, + { + "epoch": 2.99, + "learning_rate": "1.4177e-04", + "loss": 0.7719, + "slid_loss": 0.6168, + "step": 5965, + "time": 13.36 + }, + { + "epoch": 2.99, + "learning_rate": "1.4175e-04", + "loss": 0.5637, + "slid_loss": 0.6154, + "step": 5966, + "time": 11.2 + }, + { + "epoch": 2.99, + "learning_rate": "1.4173e-04", + "loss": 0.6355, + "slid_loss": 0.6157, + "step": 5967, + "time": 13.14 + }, + { + "epoch": 2.99, + "learning_rate": "1.4172e-04", + "loss": 0.4962, + "slid_loss": 0.6153, + "step": 5968, + "time": 10.88 + }, + { + "epoch": 2.99, + "learning_rate": "1.4170e-04", + "loss": 0.6276, + "slid_loss": 0.6146, + "step": 5969, + "time": 12.11 + }, + { + "epoch": 2.99, + "learning_rate": "1.4168e-04", + "loss": 0.6307, + "slid_loss": 0.6139, + "step": 5970, + "time": 11.11 + }, + { + "epoch": 2.99, + "learning_rate": "1.4167e-04", + "loss": 0.6074, + "slid_loss": 0.6128, + "step": 5971, + "time": 11.25 + }, + { + "epoch": 2.99, + "learning_rate": "1.4165e-04", + "loss": 0.744, + "slid_loss": 0.6118, + "step": 5972, + "time": 12.24 + }, + { + "epoch": 2.99, + "learning_rate": "1.4163e-04", + "loss": 0.6319, + "slid_loss": 0.6118, + "step": 5973, + "time": 13.4 + }, + { + "epoch": 2.99, + "learning_rate": "1.4161e-04", + "loss": 0.6197, + "slid_loss": 0.6121, + "step": 5974, + "time": 13.99 + }, + { + "epoch": 2.99, + "learning_rate": "1.4160e-04", + "loss": 0.7169, + "slid_loss": 0.6146, + "step": 5975, + "time": 12.18 + }, + { + "epoch": 2.99, + "learning_rate": "1.4158e-04", + "loss": 0.6794, + "slid_loss": 0.6162, + "step": 5976, + "time": 13.49 + }, + { + "epoch": 2.99, + "learning_rate": "1.4156e-04", + "loss": 0.5673, + "slid_loss": 0.6151, + "step": 5977, + "time": 13.37 + }, + { + "epoch": 2.99, + "learning_rate": "1.4155e-04", + "loss": 0.6489, + "slid_loss": 0.6155, + "step": 5978, + "time": 12.87 + }, + { + "epoch": 2.99, + "learning_rate": "1.4153e-04", + "loss": 0.5278, + "slid_loss": 0.6135, + "step": 5979, + "time": 13.43 + }, + { + "epoch": 2.99, + "learning_rate": "1.4151e-04", + "loss": 0.5767, + "slid_loss": 0.6125, + "step": 5980, + "time": 11.28 + }, + { + "epoch": 2.99, + "learning_rate": "1.4149e-04", + "loss": 0.5966, + "slid_loss": 0.6121, + "step": 5981, + "time": 11.2 + }, + { + "epoch": 3.0, + "learning_rate": "1.4148e-04", + "loss": 0.7426, + "slid_loss": 0.6129, + "step": 5982, + "time": 12.82 + }, + { + "epoch": 3.0, + "learning_rate": "1.4146e-04", + "loss": 0.7101, + "slid_loss": 0.6135, + "step": 5983, + "time": 13.56 + }, + { + "epoch": 3.0, + "learning_rate": "1.4144e-04", + "loss": 0.6689, + "slid_loss": 0.6149, + "step": 5984, + "time": 13.48 + }, + { + "epoch": 3.0, + "learning_rate": "1.4142e-04", + "loss": 0.6185, + "slid_loss": 0.6146, + "step": 5985, + "time": 12.93 + }, + { + "epoch": 3.0, + "learning_rate": "1.4141e-04", + "loss": 0.6348, + "slid_loss": 0.6155, + "step": 5986, + "time": 14.04 + }, + { + "epoch": 3.0, + "learning_rate": "1.4139e-04", + "loss": 0.5942, + "slid_loss": 0.6154, + "step": 5987, + "time": 13.83 + }, + { + "epoch": 3.0, + "learning_rate": "1.4137e-04", + "loss": 0.4113, + "slid_loss": 0.6131, + "step": 5988, + "time": 11.09 + }, + { + "epoch": 3.0, + "learning_rate": "1.4136e-04", + "loss": 0.6724, + "slid_loss": 0.6111, + "step": 5989, + "time": 12.28 + }, + { + "epoch": 3.0, + "learning_rate": "1.4134e-04", + "loss": 0.558, + "slid_loss": 0.6099, + "step": 5990, + "time": 11.85 + }, + { + "epoch": 3.0, + "learning_rate": "1.4132e-04", + "loss": 0.702, + "slid_loss": 0.6102, + "step": 5991, + "time": 13.26 + } + ], + "logging_steps": 1.0, + "max_steps": 9985, + "num_train_epochs": 5, + "save_steps": 50000.0, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +}