diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6771 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.913265306122449, + "eval_steps": 800, + "global_step": 4500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0, + "loss": 2.7231, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 0, + "loss": 2.8688, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 3.0604, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 3.3619, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 3.1263, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 0.0, + "loss": 3.1305, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 1.5188198451414044e-06, + "loss": 1.2543, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 2.0735116692035353e-06, + "loss": 1.0489, + "step": 32 + }, + { + "epoch": 0.02, + "learning_rate": 2.420532010460384e-06, + "loss": 0.8238, + "step": 36 + }, + { + "epoch": 0.02, + "learning_rate": 2.6736916157045096e-06, + "loss": 0.6357, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 2.873103126046782e-06, + "loss": 0.9375, + "step": 44 + }, + { + "epoch": 0.02, + "learning_rate": 3.0376396902828088e-06, + "loss": 1.0042, + "step": 48 + }, + { + "epoch": 0.02, + "learning_rate": 3.177703030066258e-06, + "loss": 0.7668, + "step": 52 + }, + { + "epoch": 0.02, + "learning_rate": 3.299639602133279e-06, + "loss": 0.9168, + "step": 56 + }, + { + "epoch": 0.03, + "learning_rate": 3.407608173344298e-06, + "loss": 0.8997, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 3.5044824703694353e-06, + "loss": 1.0025, + "step": 64 + }, + { + "epoch": 0.03, + "learning_rate": 3.5923315143449394e-06, + "loss": 0.6651, + "step": 68 + }, + { + "epoch": 0.03, + "learning_rate": 3.6726945828900284e-06, + "loss": 0.699, + "step": 72 + }, + { + "epoch": 0.03, + "learning_rate": 3.746747919084026e-06, + "loss": 0.886, + "step": 76 + }, + { + "epoch": 0.03, + "learning_rate": 3.8154106182475455e-06, + "loss": 0.8937, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 3.8794145195304064e-06, + "loss": 0.6582, + "step": 84 + }, + { + "epoch": 0.04, + "learning_rate": 3.9393518556017876e-06, + "loss": 0.7974, + "step": 88 + }, + { + "epoch": 0.04, + "learning_rate": 3.995708648306083e-06, + "loss": 0.9472, + "step": 92 + }, + { + "epoch": 0.04, + "learning_rate": 4.0488886794862905e-06, + "loss": 0.8867, + "step": 96 + }, + { + "epoch": 0.04, + "learning_rate": 4.099231058976525e-06, + "loss": 1.061, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 4.1470233384070705e-06, + "loss": 0.7292, + "step": 104 + }, + { + "epoch": 0.05, + "learning_rate": 4.192511460845915e-06, + "loss": 0.9262, + "step": 108 + }, + { + "epoch": 0.05, + "learning_rate": 4.235907420412398e-06, + "loss": 0.7092, + "step": 112 + }, + { + "epoch": 0.05, + "learning_rate": 4.27739523661862e-06, + "loss": 0.7174, + "step": 116 + }, + { + "epoch": 0.05, + "learning_rate": 4.317135669700268e-06, + "loss": 0.8711, + "step": 120 + }, + { + "epoch": 0.05, + "learning_rate": 4.355269982485126e-06, + "loss": 0.9286, + "step": 124 + }, + { + "epoch": 0.05, + "learning_rate": 4.391922971188186e-06, + "loss": 0.9947, + "step": 128 + }, + { + "epoch": 0.06, + "learning_rate": 4.427205429264097e-06, + "loss": 0.8179, + "step": 132 + }, + { + "epoch": 0.06, + "learning_rate": 4.461216167003915e-06, + "loss": 0.6159, + "step": 136 + }, + { + "epoch": 0.06, + "learning_rate": 4.494043679663919e-06, + "loss": 0.8981, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 4.525767535063022e-06, + "loss": 0.9206, + "step": 144 + }, + { + "epoch": 0.06, + "learning_rate": 4.556459535424214e-06, + "loss": 0.7768, + "step": 148 + }, + { + "epoch": 0.06, + "learning_rate": 4.5861846961499975e-06, + "loss": 0.8515, + "step": 152 + }, + { + "epoch": 0.07, + "learning_rate": 4.6150020750907925e-06, + "loss": 0.8131, + "step": 156 + }, + { + "epoch": 0.07, + "learning_rate": 4.642965478900328e-06, + "loss": 1.1253, + "step": 160 + }, + { + "epoch": 0.07, + "learning_rate": 4.670124067711698e-06, + "loss": 0.9236, + "step": 164 + }, + { + "epoch": 0.07, + "learning_rate": 4.689992082159791e-06, + "loss": 0.8775, + "step": 168 + }, + { + "epoch": 0.07, + "learning_rate": 4.715848379822425e-06, + "loss": 0.8848, + "step": 172 + }, + { + "epoch": 0.07, + "learning_rate": 4.74101509336297e-06, + "loss": 0.829, + "step": 176 + }, + { + "epoch": 0.08, + "learning_rate": 4.76552805154028e-06, + "loss": 0.9367, + "step": 180 + }, + { + "epoch": 0.08, + "learning_rate": 4.789420361336724e-06, + "loss": 0.6566, + "step": 184 + }, + { + "epoch": 0.08, + "learning_rate": 4.812722676847563e-06, + "loss": 0.8104, + "step": 188 + }, + { + "epoch": 0.08, + "learning_rate": 4.835463435763974e-06, + "loss": 0.74, + "step": 192 + }, + { + "epoch": 0.08, + "learning_rate": 4.857669068026358e-06, + "loss": 0.6171, + "step": 196 + }, + { + "epoch": 0.09, + "learning_rate": 4.879364180487766e-06, + "loss": 1.0545, + "step": 200 + }, + { + "epoch": 0.09, + "learning_rate": 4.900571720823068e-06, + "loss": 0.9191, + "step": 204 + }, + { + "epoch": 0.09, + "learning_rate": 4.921313123421507e-06, + "loss": 0.6995, + "step": 208 + }, + { + "epoch": 0.09, + "learning_rate": 4.941608439588058e-06, + "loss": 0.8479, + "step": 212 + }, + { + "epoch": 0.09, + "learning_rate": 4.9614764540361516e-06, + "loss": 0.8976, + "step": 216 + }, + { + "epoch": 0.09, + "learning_rate": 4.980934789368156e-06, + "loss": 0.851, + "step": 220 + }, + { + "epoch": 0.1, + "learning_rate": 5e-06, + "loss": 0.7368, + "step": 224 + }, + { + "epoch": 0.1, + "learning_rate": 4.997812135355893e-06, + "loss": 1.0125, + "step": 228 + }, + { + "epoch": 0.1, + "learning_rate": 4.994894982497083e-06, + "loss": 0.9607, + "step": 232 + }, + { + "epoch": 0.1, + "learning_rate": 4.991977829638274e-06, + "loss": 0.6321, + "step": 236 + }, + { + "epoch": 0.1, + "learning_rate": 4.989060676779464e-06, + "loss": 0.7108, + "step": 240 + }, + { + "epoch": 0.1, + "learning_rate": 4.986143523920654e-06, + "loss": 0.7881, + "step": 244 + }, + { + "epoch": 0.11, + "learning_rate": 4.983226371061844e-06, + "loss": 0.7147, + "step": 248 + }, + { + "epoch": 0.11, + "learning_rate": 4.980309218203034e-06, + "loss": 0.9225, + "step": 252 + }, + { + "epoch": 0.11, + "learning_rate": 4.977392065344224e-06, + "loss": 0.8849, + "step": 256 + }, + { + "epoch": 0.11, + "learning_rate": 4.974474912485414e-06, + "loss": 0.942, + "step": 260 + }, + { + "epoch": 0.11, + "learning_rate": 4.971557759626604e-06, + "loss": 1.1629, + "step": 264 + }, + { + "epoch": 0.11, + "learning_rate": 4.968640606767795e-06, + "loss": 0.8053, + "step": 268 + }, + { + "epoch": 0.12, + "learning_rate": 4.965723453908986e-06, + "loss": 0.6558, + "step": 272 + }, + { + "epoch": 0.12, + "learning_rate": 4.962806301050176e-06, + "loss": 0.9606, + "step": 276 + }, + { + "epoch": 0.12, + "learning_rate": 4.959889148191366e-06, + "loss": 0.9533, + "step": 280 + }, + { + "epoch": 0.12, + "learning_rate": 4.956971995332556e-06, + "loss": 0.8758, + "step": 284 + }, + { + "epoch": 0.12, + "learning_rate": 4.954054842473746e-06, + "loss": 0.5825, + "step": 288 + }, + { + "epoch": 0.12, + "learning_rate": 4.951137689614936e-06, + "loss": 0.8221, + "step": 292 + }, + { + "epoch": 0.13, + "learning_rate": 4.948220536756126e-06, + "loss": 0.8954, + "step": 296 + }, + { + "epoch": 0.13, + "learning_rate": 4.945303383897317e-06, + "loss": 0.9549, + "step": 300 + }, + { + "epoch": 0.13, + "learning_rate": 4.942386231038507e-06, + "loss": 0.8821, + "step": 304 + }, + { + "epoch": 0.13, + "learning_rate": 4.939469078179697e-06, + "loss": 0.9861, + "step": 308 + }, + { + "epoch": 0.13, + "learning_rate": 4.936551925320887e-06, + "loss": 0.9681, + "step": 312 + }, + { + "epoch": 0.13, + "learning_rate": 4.933634772462078e-06, + "loss": 0.647, + "step": 316 + }, + { + "epoch": 0.14, + "learning_rate": 4.930717619603268e-06, + "loss": 0.8151, + "step": 320 + }, + { + "epoch": 0.14, + "learning_rate": 4.9278004667444575e-06, + "loss": 1.0373, + "step": 324 + }, + { + "epoch": 0.14, + "learning_rate": 4.9248833138856475e-06, + "loss": 0.8858, + "step": 328 + }, + { + "epoch": 0.14, + "learning_rate": 4.921966161026838e-06, + "loss": 0.8392, + "step": 332 + }, + { + "epoch": 0.14, + "learning_rate": 4.919049008168029e-06, + "loss": 1.0194, + "step": 336 + }, + { + "epoch": 0.14, + "learning_rate": 4.916131855309218e-06, + "loss": 0.7719, + "step": 340 + }, + { + "epoch": 0.15, + "learning_rate": 4.913214702450409e-06, + "loss": 0.6672, + "step": 344 + }, + { + "epoch": 0.15, + "learning_rate": 4.910297549591599e-06, + "loss": 0.8939, + "step": 348 + }, + { + "epoch": 0.15, + "learning_rate": 4.90738039673279e-06, + "loss": 0.6781, + "step": 352 + }, + { + "epoch": 0.15, + "learning_rate": 4.90446324387398e-06, + "loss": 0.7391, + "step": 356 + }, + { + "epoch": 0.15, + "learning_rate": 4.9015460910151695e-06, + "loss": 0.9476, + "step": 360 + }, + { + "epoch": 0.15, + "learning_rate": 4.8986289381563595e-06, + "loss": 0.6729, + "step": 364 + }, + { + "epoch": 0.16, + "learning_rate": 4.89571178529755e-06, + "loss": 0.8046, + "step": 368 + }, + { + "epoch": 0.16, + "learning_rate": 4.89279463243874e-06, + "loss": 0.8521, + "step": 372 + }, + { + "epoch": 0.16, + "learning_rate": 4.88987747957993e-06, + "loss": 0.8688, + "step": 376 + }, + { + "epoch": 0.16, + "learning_rate": 4.886960326721121e-06, + "loss": 0.875, + "step": 380 + }, + { + "epoch": 0.16, + "learning_rate": 4.884043173862311e-06, + "loss": 0.6733, + "step": 384 + }, + { + "epoch": 0.16, + "learning_rate": 4.881126021003501e-06, + "loss": 0.7755, + "step": 388 + }, + { + "epoch": 0.17, + "learning_rate": 4.878208868144691e-06, + "loss": 0.7047, + "step": 392 + }, + { + "epoch": 0.17, + "learning_rate": 4.8752917152858815e-06, + "loss": 0.6979, + "step": 396 + }, + { + "epoch": 0.17, + "learning_rate": 4.8723745624270714e-06, + "loss": 0.8033, + "step": 400 + }, + { + "epoch": 0.17, + "learning_rate": 4.869457409568261e-06, + "loss": 0.8039, + "step": 404 + }, + { + "epoch": 0.17, + "learning_rate": 4.866540256709452e-06, + "loss": 0.9366, + "step": 408 + }, + { + "epoch": 0.18, + "learning_rate": 4.863623103850642e-06, + "loss": 0.609, + "step": 412 + }, + { + "epoch": 0.18, + "learning_rate": 4.860705950991833e-06, + "loss": 0.7258, + "step": 416 + }, + { + "epoch": 0.18, + "learning_rate": 4.857788798133022e-06, + "loss": 1.018, + "step": 420 + }, + { + "epoch": 0.18, + "learning_rate": 4.854871645274213e-06, + "loss": 0.7338, + "step": 424 + }, + { + "epoch": 0.18, + "learning_rate": 4.851954492415403e-06, + "loss": 0.6595, + "step": 428 + }, + { + "epoch": 0.18, + "learning_rate": 4.8490373395565935e-06, + "loss": 0.7456, + "step": 432 + }, + { + "epoch": 0.19, + "learning_rate": 4.8461201866977834e-06, + "loss": 0.637, + "step": 436 + }, + { + "epoch": 0.19, + "learning_rate": 4.843203033838973e-06, + "loss": 0.8026, + "step": 440 + }, + { + "epoch": 0.19, + "learning_rate": 4.840285880980164e-06, + "loss": 0.8439, + "step": 444 + }, + { + "epoch": 0.19, + "learning_rate": 4.837368728121354e-06, + "loss": 0.646, + "step": 448 + }, + { + "epoch": 0.19, + "learning_rate": 4.834451575262544e-06, + "loss": 0.8644, + "step": 452 + }, + { + "epoch": 0.19, + "learning_rate": 4.831534422403734e-06, + "loss": 0.8595, + "step": 456 + }, + { + "epoch": 0.2, + "learning_rate": 4.828617269544925e-06, + "loss": 1.023, + "step": 460 + }, + { + "epoch": 0.2, + "learning_rate": 4.825700116686115e-06, + "loss": 0.7745, + "step": 464 + }, + { + "epoch": 0.2, + "learning_rate": 4.822782963827305e-06, + "loss": 0.9293, + "step": 468 + }, + { + "epoch": 0.2, + "learning_rate": 4.8198658109684954e-06, + "loss": 0.7542, + "step": 472 + }, + { + "epoch": 0.2, + "learning_rate": 4.816948658109685e-06, + "loss": 1.0059, + "step": 476 + }, + { + "epoch": 0.2, + "learning_rate": 4.814031505250875e-06, + "loss": 0.8302, + "step": 480 + }, + { + "epoch": 0.21, + "learning_rate": 4.811114352392065e-06, + "loss": 0.809, + "step": 484 + }, + { + "epoch": 0.21, + "learning_rate": 4.808197199533256e-06, + "loss": 0.7628, + "step": 488 + }, + { + "epoch": 0.21, + "learning_rate": 4.805280046674446e-06, + "loss": 0.6753, + "step": 492 + }, + { + "epoch": 0.21, + "learning_rate": 4.802362893815637e-06, + "loss": 0.8719, + "step": 496 + }, + { + "epoch": 0.21, + "learning_rate": 4.799445740956827e-06, + "loss": 0.7059, + "step": 500 + }, + { + "epoch": 0.21, + "learning_rate": 4.796528588098017e-06, + "loss": 0.6902, + "step": 504 + }, + { + "epoch": 0.22, + "learning_rate": 4.793611435239207e-06, + "loss": 1.104, + "step": 508 + }, + { + "epoch": 0.22, + "learning_rate": 4.790694282380397e-06, + "loss": 0.837, + "step": 512 + }, + { + "epoch": 0.22, + "learning_rate": 4.787777129521587e-06, + "loss": 0.794, + "step": 516 + }, + { + "epoch": 0.22, + "learning_rate": 4.784859976662777e-06, + "loss": 0.7997, + "step": 520 + }, + { + "epoch": 0.22, + "learning_rate": 4.781942823803968e-06, + "loss": 0.6738, + "step": 524 + }, + { + "epoch": 0.22, + "learning_rate": 4.779025670945158e-06, + "loss": 0.963, + "step": 528 + }, + { + "epoch": 0.23, + "learning_rate": 4.776108518086348e-06, + "loss": 0.6323, + "step": 532 + }, + { + "epoch": 0.23, + "learning_rate": 4.773191365227539e-06, + "loss": 0.7243, + "step": 536 + }, + { + "epoch": 0.23, + "learning_rate": 4.770274212368729e-06, + "loss": 0.8873, + "step": 540 + }, + { + "epoch": 0.23, + "learning_rate": 4.7673570595099186e-06, + "loss": 0.7435, + "step": 544 + }, + { + "epoch": 0.23, + "learning_rate": 4.7644399066511085e-06, + "loss": 0.8006, + "step": 548 + }, + { + "epoch": 0.23, + "learning_rate": 4.761522753792299e-06, + "loss": 0.9557, + "step": 552 + }, + { + "epoch": 0.24, + "learning_rate": 4.758605600933489e-06, + "loss": 0.7131, + "step": 556 + }, + { + "epoch": 0.24, + "learning_rate": 4.755688448074679e-06, + "loss": 0.8007, + "step": 560 + }, + { + "epoch": 0.24, + "learning_rate": 4.752771295215869e-06, + "loss": 0.8634, + "step": 564 + }, + { + "epoch": 0.24, + "learning_rate": 4.74985414235706e-06, + "loss": 0.7848, + "step": 568 + }, + { + "epoch": 0.24, + "learning_rate": 4.746936989498251e-06, + "loss": 0.6159, + "step": 572 + }, + { + "epoch": 0.24, + "learning_rate": 4.744019836639441e-06, + "loss": 0.7327, + "step": 576 + }, + { + "epoch": 0.25, + "learning_rate": 4.7411026837806305e-06, + "loss": 0.733, + "step": 580 + }, + { + "epoch": 0.25, + "learning_rate": 4.7381855309218205e-06, + "loss": 0.8081, + "step": 584 + }, + { + "epoch": 0.25, + "learning_rate": 4.735268378063011e-06, + "loss": 0.9809, + "step": 588 + }, + { + "epoch": 0.25, + "learning_rate": 4.732351225204201e-06, + "loss": 0.7101, + "step": 592 + }, + { + "epoch": 0.25, + "learning_rate": 4.729434072345391e-06, + "loss": 0.6195, + "step": 596 + }, + { + "epoch": 0.26, + "learning_rate": 4.726516919486582e-06, + "loss": 0.6483, + "step": 600 + }, + { + "epoch": 0.26, + "learning_rate": 4.723599766627772e-06, + "loss": 0.819, + "step": 604 + }, + { + "epoch": 0.26, + "learning_rate": 4.720682613768962e-06, + "loss": 0.7032, + "step": 608 + }, + { + "epoch": 0.26, + "learning_rate": 4.717765460910152e-06, + "loss": 0.7933, + "step": 612 + }, + { + "epoch": 0.26, + "learning_rate": 4.7148483080513425e-06, + "loss": 0.9605, + "step": 616 + }, + { + "epoch": 0.26, + "learning_rate": 4.7119311551925325e-06, + "loss": 0.7783, + "step": 620 + }, + { + "epoch": 0.27, + "learning_rate": 4.709014002333722e-06, + "loss": 0.7616, + "step": 624 + }, + { + "epoch": 0.27, + "learning_rate": 4.706096849474912e-06, + "loss": 0.6611, + "step": 628 + }, + { + "epoch": 0.27, + "learning_rate": 4.703179696616103e-06, + "loss": 0.656, + "step": 632 + }, + { + "epoch": 0.27, + "learning_rate": 4.700262543757294e-06, + "loss": 0.731, + "step": 636 + }, + { + "epoch": 0.27, + "learning_rate": 4.697345390898483e-06, + "loss": 0.6204, + "step": 640 + }, + { + "epoch": 0.27, + "learning_rate": 4.694428238039674e-06, + "loss": 0.695, + "step": 644 + }, + { + "epoch": 0.28, + "learning_rate": 4.691511085180864e-06, + "loss": 0.9691, + "step": 648 + }, + { + "epoch": 0.28, + "learning_rate": 4.6885939323220545e-06, + "loss": 0.5099, + "step": 652 + }, + { + "epoch": 0.28, + "learning_rate": 4.6856767794632445e-06, + "loss": 0.6427, + "step": 656 + }, + { + "epoch": 0.28, + "learning_rate": 4.682759626604434e-06, + "loss": 0.9209, + "step": 660 + }, + { + "epoch": 0.28, + "learning_rate": 4.679842473745624e-06, + "loss": 0.891, + "step": 664 + }, + { + "epoch": 0.28, + "learning_rate": 4.676925320886815e-06, + "loss": 0.7068, + "step": 668 + }, + { + "epoch": 0.29, + "learning_rate": 4.674008168028005e-06, + "loss": 0.4388, + "step": 672 + }, + { + "epoch": 0.29, + "learning_rate": 4.671091015169195e-06, + "loss": 0.7769, + "step": 676 + }, + { + "epoch": 0.29, + "learning_rate": 4.668173862310386e-06, + "loss": 1.03, + "step": 680 + }, + { + "epoch": 0.29, + "learning_rate": 4.665256709451576e-06, + "loss": 0.9345, + "step": 684 + }, + { + "epoch": 0.29, + "learning_rate": 4.662339556592766e-06, + "loss": 0.8261, + "step": 688 + }, + { + "epoch": 0.29, + "learning_rate": 4.659422403733956e-06, + "loss": 0.5926, + "step": 692 + }, + { + "epoch": 0.3, + "learning_rate": 4.656505250875146e-06, + "loss": 0.618, + "step": 696 + }, + { + "epoch": 0.3, + "learning_rate": 4.653588098016336e-06, + "loss": 0.8133, + "step": 700 + }, + { + "epoch": 0.3, + "learning_rate": 4.650670945157526e-06, + "loss": 0.7822, + "step": 704 + }, + { + "epoch": 0.3, + "learning_rate": 4.647753792298717e-06, + "loss": 0.6367, + "step": 708 + }, + { + "epoch": 0.3, + "learning_rate": 4.644836639439907e-06, + "loss": 0.7457, + "step": 712 + }, + { + "epoch": 0.3, + "learning_rate": 4.641919486581098e-06, + "loss": 0.5508, + "step": 716 + }, + { + "epoch": 0.31, + "learning_rate": 4.639002333722287e-06, + "loss": 0.8247, + "step": 720 + }, + { + "epoch": 0.31, + "learning_rate": 4.636085180863478e-06, + "loss": 0.8024, + "step": 724 + }, + { + "epoch": 0.31, + "learning_rate": 4.633168028004668e-06, + "loss": 0.5489, + "step": 728 + }, + { + "epoch": 0.31, + "learning_rate": 4.630250875145858e-06, + "loss": 0.5789, + "step": 732 + }, + { + "epoch": 0.31, + "learning_rate": 4.627333722287048e-06, + "loss": 0.7541, + "step": 736 + }, + { + "epoch": 0.31, + "learning_rate": 4.624416569428238e-06, + "loss": 0.6148, + "step": 740 + }, + { + "epoch": 0.32, + "learning_rate": 4.621499416569429e-06, + "loss": 0.98, + "step": 744 + }, + { + "epoch": 0.32, + "learning_rate": 4.618582263710619e-06, + "loss": 0.635, + "step": 748 + }, + { + "epoch": 0.32, + "learning_rate": 4.615665110851809e-06, + "loss": 0.9664, + "step": 752 + }, + { + "epoch": 0.32, + "learning_rate": 4.612747957992999e-06, + "loss": 0.8736, + "step": 756 + }, + { + "epoch": 0.32, + "learning_rate": 4.60983080513419e-06, + "loss": 0.6281, + "step": 760 + }, + { + "epoch": 0.32, + "learning_rate": 4.60691365227538e-06, + "loss": 0.9843, + "step": 764 + }, + { + "epoch": 0.33, + "learning_rate": 4.6039964994165695e-06, + "loss": 0.7795, + "step": 768 + }, + { + "epoch": 0.33, + "learning_rate": 4.60107934655776e-06, + "loss": 0.8437, + "step": 772 + }, + { + "epoch": 0.33, + "learning_rate": 4.59816219369895e-06, + "loss": 0.6626, + "step": 776 + }, + { + "epoch": 0.33, + "learning_rate": 4.59524504084014e-06, + "loss": 0.8164, + "step": 780 + }, + { + "epoch": 0.33, + "learning_rate": 4.59232788798133e-06, + "loss": 0.7359, + "step": 784 + }, + { + "epoch": 0.34, + "learning_rate": 4.589410735122521e-06, + "loss": 0.7709, + "step": 788 + }, + { + "epoch": 0.34, + "learning_rate": 4.586493582263711e-06, + "loss": 0.8953, + "step": 792 + }, + { + "epoch": 0.34, + "learning_rate": 4.583576429404902e-06, + "loss": 0.57, + "step": 796 + }, + { + "epoch": 0.34, + "learning_rate": 4.5806592765460916e-06, + "loss": 0.9151, + "step": 800 + }, + { + "epoch": 0.34, + "learning_rate": 4.5777421236872815e-06, + "loss": 0.7838, + "step": 804 + }, + { + "epoch": 0.34, + "learning_rate": 4.574824970828472e-06, + "loss": 0.8183, + "step": 808 + }, + { + "epoch": 0.35, + "learning_rate": 4.571907817969662e-06, + "loss": 0.9169, + "step": 812 + }, + { + "epoch": 0.35, + "learning_rate": 4.568990665110852e-06, + "loss": 0.6786, + "step": 816 + }, + { + "epoch": 0.35, + "learning_rate": 4.566073512252042e-06, + "loss": 0.7783, + "step": 820 + }, + { + "epoch": 0.35, + "learning_rate": 4.563156359393233e-06, + "loss": 0.811, + "step": 824 + }, + { + "epoch": 0.35, + "learning_rate": 4.560239206534423e-06, + "loss": 0.8965, + "step": 828 + }, + { + "epoch": 0.35, + "learning_rate": 4.557322053675613e-06, + "loss": 0.7526, + "step": 832 + }, + { + "epoch": 0.36, + "learning_rate": 4.5544049008168036e-06, + "loss": 0.6549, + "step": 836 + }, + { + "epoch": 0.36, + "learning_rate": 4.5514877479579935e-06, + "loss": 0.77, + "step": 840 + }, + { + "epoch": 0.36, + "learning_rate": 4.5485705950991834e-06, + "loss": 0.5399, + "step": 844 + }, + { + "epoch": 0.36, + "learning_rate": 4.545653442240373e-06, + "loss": 0.7214, + "step": 848 + }, + { + "epoch": 0.36, + "learning_rate": 4.542736289381564e-06, + "loss": 0.7454, + "step": 852 + }, + { + "epoch": 0.36, + "learning_rate": 4.539819136522754e-06, + "loss": 0.6774, + "step": 856 + }, + { + "epoch": 0.37, + "learning_rate": 4.536901983663944e-06, + "loss": 0.855, + "step": 860 + }, + { + "epoch": 0.37, + "learning_rate": 4.533984830805134e-06, + "loss": 0.8038, + "step": 864 + }, + { + "epoch": 0.37, + "learning_rate": 4.531067677946325e-06, + "loss": 0.6897, + "step": 868 + }, + { + "epoch": 0.37, + "learning_rate": 4.5281505250875156e-06, + "loss": 0.774, + "step": 872 + }, + { + "epoch": 0.37, + "learning_rate": 4.525233372228705e-06, + "loss": 0.9166, + "step": 876 + }, + { + "epoch": 0.37, + "learning_rate": 4.5223162193698954e-06, + "loss": 0.5643, + "step": 880 + }, + { + "epoch": 0.38, + "learning_rate": 4.519399066511085e-06, + "loss": 0.5427, + "step": 884 + }, + { + "epoch": 0.38, + "learning_rate": 4.516481913652276e-06, + "loss": 0.6834, + "step": 888 + }, + { + "epoch": 0.38, + "learning_rate": 4.513564760793466e-06, + "loss": 0.8979, + "step": 892 + }, + { + "epoch": 0.38, + "learning_rate": 4.510647607934656e-06, + "loss": 0.8804, + "step": 896 + }, + { + "epoch": 0.38, + "learning_rate": 4.507730455075847e-06, + "loss": 0.6715, + "step": 900 + }, + { + "epoch": 0.38, + "learning_rate": 4.504813302217037e-06, + "loss": 0.8331, + "step": 904 + }, + { + "epoch": 0.39, + "learning_rate": 4.501896149358227e-06, + "loss": 0.9623, + "step": 908 + }, + { + "epoch": 0.39, + "learning_rate": 4.498978996499417e-06, + "loss": 0.7293, + "step": 912 + }, + { + "epoch": 0.39, + "learning_rate": 4.496061843640607e-06, + "loss": 0.6704, + "step": 916 + }, + { + "epoch": 0.39, + "learning_rate": 4.493144690781797e-06, + "loss": 0.6452, + "step": 920 + }, + { + "epoch": 0.39, + "learning_rate": 4.490227537922987e-06, + "loss": 0.7556, + "step": 924 + }, + { + "epoch": 0.39, + "learning_rate": 4.487310385064177e-06, + "loss": 0.6235, + "step": 928 + }, + { + "epoch": 0.4, + "learning_rate": 4.484393232205368e-06, + "loss": 0.789, + "step": 932 + }, + { + "epoch": 0.4, + "learning_rate": 4.481476079346558e-06, + "loss": 0.7959, + "step": 936 + }, + { + "epoch": 0.4, + "learning_rate": 4.478558926487748e-06, + "loss": 0.8286, + "step": 940 + }, + { + "epoch": 0.4, + "learning_rate": 4.475641773628939e-06, + "loss": 0.8604, + "step": 944 + }, + { + "epoch": 0.4, + "learning_rate": 4.472724620770129e-06, + "loss": 0.6135, + "step": 948 + }, + { + "epoch": 0.4, + "learning_rate": 4.469807467911319e-06, + "loss": 0.4793, + "step": 952 + }, + { + "epoch": 0.41, + "learning_rate": 4.4668903150525085e-06, + "loss": 0.7225, + "step": 956 + }, + { + "epoch": 0.41, + "learning_rate": 4.463973162193699e-06, + "loss": 0.5255, + "step": 960 + }, + { + "epoch": 0.41, + "learning_rate": 4.461056009334889e-06, + "loss": 0.4695, + "step": 964 + }, + { + "epoch": 0.41, + "learning_rate": 4.45813885647608e-06, + "loss": 0.8741, + "step": 968 + }, + { + "epoch": 0.41, + "learning_rate": 4.45522170361727e-06, + "loss": 0.6311, + "step": 972 + }, + { + "epoch": 0.41, + "learning_rate": 4.45230455075846e-06, + "loss": 0.6423, + "step": 976 + }, + { + "epoch": 0.42, + "learning_rate": 4.449387397899651e-06, + "loss": 0.7879, + "step": 980 + }, + { + "epoch": 0.42, + "learning_rate": 4.446470245040841e-06, + "loss": 0.9635, + "step": 984 + }, + { + "epoch": 0.42, + "learning_rate": 4.4435530921820306e-06, + "loss": 0.6497, + "step": 988 + }, + { + "epoch": 0.42, + "learning_rate": 4.4406359393232205e-06, + "loss": 0.673, + "step": 992 + }, + { + "epoch": 0.42, + "learning_rate": 4.437718786464411e-06, + "loss": 0.7035, + "step": 996 + }, + { + "epoch": 0.43, + "learning_rate": 4.434801633605601e-06, + "loss": 0.7664, + "step": 1000 + }, + { + "epoch": 0.43, + "learning_rate": 4.431884480746791e-06, + "loss": 0.6817, + "step": 1004 + }, + { + "epoch": 0.43, + "learning_rate": 4.428967327887982e-06, + "loss": 0.7443, + "step": 1008 + }, + { + "epoch": 0.43, + "learning_rate": 4.426050175029172e-06, + "loss": 0.7205, + "step": 1012 + }, + { + "epoch": 0.43, + "learning_rate": 4.423133022170362e-06, + "loss": 0.6672, + "step": 1016 + }, + { + "epoch": 0.43, + "learning_rate": 4.420215869311552e-06, + "loss": 0.7427, + "step": 1020 + }, + { + "epoch": 0.44, + "learning_rate": 4.4172987164527425e-06, + "loss": 0.659, + "step": 1024 + }, + { + "epoch": 0.44, + "learning_rate": 4.4143815635939325e-06, + "loss": 0.832, + "step": 1028 + }, + { + "epoch": 0.44, + "learning_rate": 4.411464410735123e-06, + "loss": 0.7823, + "step": 1032 + }, + { + "epoch": 0.44, + "learning_rate": 4.408547257876313e-06, + "loss": 0.712, + "step": 1036 + }, + { + "epoch": 0.44, + "learning_rate": 4.405630105017503e-06, + "loss": 0.8165, + "step": 1040 + }, + { + "epoch": 0.44, + "learning_rate": 4.402712952158694e-06, + "loss": 0.6221, + "step": 1044 + }, + { + "epoch": 0.45, + "learning_rate": 4.399795799299884e-06, + "loss": 0.684, + "step": 1048 + }, + { + "epoch": 0.45, + "learning_rate": 4.396878646441074e-06, + "loss": 0.6845, + "step": 1052 + }, + { + "epoch": 0.45, + "learning_rate": 4.393961493582264e-06, + "loss": 0.8462, + "step": 1056 + }, + { + "epoch": 0.45, + "learning_rate": 4.3910443407234545e-06, + "loss": 0.6242, + "step": 1060 + }, + { + "epoch": 0.45, + "learning_rate": 4.3881271878646445e-06, + "loss": 0.5025, + "step": 1064 + }, + { + "epoch": 0.45, + "learning_rate": 4.385210035005834e-06, + "loss": 0.6651, + "step": 1068 + }, + { + "epoch": 0.46, + "learning_rate": 4.382292882147025e-06, + "loss": 0.7653, + "step": 1072 + }, + { + "epoch": 0.46, + "learning_rate": 4.379375729288215e-06, + "loss": 0.9375, + "step": 1076 + }, + { + "epoch": 0.46, + "learning_rate": 4.376458576429405e-06, + "loss": 0.6936, + "step": 1080 + }, + { + "epoch": 0.46, + "learning_rate": 4.373541423570595e-06, + "loss": 0.6709, + "step": 1084 + }, + { + "epoch": 0.46, + "learning_rate": 4.370624270711786e-06, + "loss": 0.7321, + "step": 1088 + }, + { + "epoch": 0.46, + "learning_rate": 4.367707117852976e-06, + "loss": 1.0432, + "step": 1092 + }, + { + "epoch": 0.47, + "learning_rate": 4.364789964994166e-06, + "loss": 0.6687, + "step": 1096 + }, + { + "epoch": 0.47, + "learning_rate": 4.3618728121353565e-06, + "loss": 0.9193, + "step": 1100 + }, + { + "epoch": 0.47, + "learning_rate": 4.358955659276546e-06, + "loss": 0.5975, + "step": 1104 + }, + { + "epoch": 0.47, + "learning_rate": 4.356038506417737e-06, + "loss": 0.7527, + "step": 1108 + }, + { + "epoch": 0.47, + "learning_rate": 4.353121353558927e-06, + "loss": 0.9754, + "step": 1112 + }, + { + "epoch": 0.47, + "learning_rate": 4.350204200700117e-06, + "loss": 0.8299, + "step": 1116 + }, + { + "epoch": 0.48, + "learning_rate": 4.347287047841307e-06, + "loss": 0.8524, + "step": 1120 + }, + { + "epoch": 0.48, + "learning_rate": 4.344369894982498e-06, + "loss": 0.8139, + "step": 1124 + }, + { + "epoch": 0.48, + "learning_rate": 4.341452742123688e-06, + "loss": 0.6144, + "step": 1128 + }, + { + "epoch": 0.48, + "learning_rate": 4.338535589264878e-06, + "loss": 0.8328, + "step": 1132 + }, + { + "epoch": 0.48, + "learning_rate": 4.3356184364060684e-06, + "loss": 0.5855, + "step": 1136 + }, + { + "epoch": 0.48, + "learning_rate": 4.332701283547258e-06, + "loss": 0.8227, + "step": 1140 + }, + { + "epoch": 0.49, + "learning_rate": 4.329784130688448e-06, + "loss": 0.6442, + "step": 1144 + }, + { + "epoch": 0.49, + "learning_rate": 4.326866977829638e-06, + "loss": 0.5148, + "step": 1148 + }, + { + "epoch": 0.49, + "learning_rate": 4.323949824970829e-06, + "loss": 0.8127, + "step": 1152 + }, + { + "epoch": 0.49, + "learning_rate": 4.321032672112019e-06, + "loss": 0.8909, + "step": 1156 + }, + { + "epoch": 0.49, + "learning_rate": 4.318115519253209e-06, + "loss": 0.6379, + "step": 1160 + }, + { + "epoch": 0.49, + "learning_rate": 4.315198366394399e-06, + "loss": 0.5912, + "step": 1164 + }, + { + "epoch": 0.5, + "learning_rate": 4.31228121353559e-06, + "loss": 0.6885, + "step": 1168 + }, + { + "epoch": 0.5, + "learning_rate": 4.3093640606767804e-06, + "loss": 0.8584, + "step": 1172 + }, + { + "epoch": 0.5, + "learning_rate": 4.3064469078179695e-06, + "loss": 0.6441, + "step": 1176 + }, + { + "epoch": 0.5, + "learning_rate": 4.30352975495916e-06, + "loss": 0.6933, + "step": 1180 + }, + { + "epoch": 0.5, + "learning_rate": 4.30061260210035e-06, + "loss": 0.6647, + "step": 1184 + }, + { + "epoch": 0.51, + "learning_rate": 4.297695449241541e-06, + "loss": 0.7737, + "step": 1188 + }, + { + "epoch": 0.51, + "learning_rate": 4.294778296382731e-06, + "loss": 0.8209, + "step": 1192 + }, + { + "epoch": 0.51, + "learning_rate": 4.291861143523921e-06, + "loss": 0.9695, + "step": 1196 + }, + { + "epoch": 0.51, + "learning_rate": 4.288943990665112e-06, + "loss": 0.855, + "step": 1200 + }, + { + "epoch": 0.51, + "learning_rate": 4.286026837806302e-06, + "loss": 0.7051, + "step": 1204 + }, + { + "epoch": 0.51, + "learning_rate": 4.283109684947492e-06, + "loss": 0.4676, + "step": 1208 + }, + { + "epoch": 0.52, + "learning_rate": 4.2801925320886815e-06, + "loss": 0.6563, + "step": 1212 + }, + { + "epoch": 0.52, + "learning_rate": 4.277275379229872e-06, + "loss": 0.6791, + "step": 1216 + }, + { + "epoch": 0.52, + "learning_rate": 4.274358226371062e-06, + "loss": 0.6531, + "step": 1220 + }, + { + "epoch": 0.52, + "learning_rate": 4.271441073512252e-06, + "loss": 0.7638, + "step": 1224 + }, + { + "epoch": 0.52, + "learning_rate": 4.268523920653442e-06, + "loss": 0.8035, + "step": 1228 + }, + { + "epoch": 0.52, + "learning_rate": 4.265606767794633e-06, + "loss": 0.6947, + "step": 1232 + }, + { + "epoch": 0.53, + "learning_rate": 4.262689614935823e-06, + "loss": 0.4111, + "step": 1236 + }, + { + "epoch": 0.53, + "learning_rate": 4.259772462077013e-06, + "loss": 0.787, + "step": 1240 + }, + { + "epoch": 0.53, + "learning_rate": 4.2568553092182036e-06, + "loss": 0.7891, + "step": 1244 + }, + { + "epoch": 0.53, + "learning_rate": 4.2539381563593935e-06, + "loss": 0.6001, + "step": 1248 + }, + { + "epoch": 0.53, + "learning_rate": 4.251021003500584e-06, + "loss": 0.5617, + "step": 1252 + }, + { + "epoch": 0.53, + "learning_rate": 4.248103850641773e-06, + "loss": 0.9523, + "step": 1256 + }, + { + "epoch": 0.54, + "learning_rate": 4.245186697782964e-06, + "loss": 0.8455, + "step": 1260 + }, + { + "epoch": 0.54, + "learning_rate": 4.242269544924154e-06, + "loss": 0.7674, + "step": 1264 + }, + { + "epoch": 0.54, + "learning_rate": 4.239352392065345e-06, + "loss": 0.5396, + "step": 1268 + }, + { + "epoch": 0.54, + "learning_rate": 4.236435239206535e-06, + "loss": 0.5736, + "step": 1272 + }, + { + "epoch": 0.54, + "learning_rate": 4.233518086347725e-06, + "loss": 0.5877, + "step": 1276 + }, + { + "epoch": 0.54, + "learning_rate": 4.2306009334889156e-06, + "loss": 0.6466, + "step": 1280 + }, + { + "epoch": 0.55, + "learning_rate": 4.2276837806301055e-06, + "loss": 0.4551, + "step": 1284 + }, + { + "epoch": 0.55, + "learning_rate": 4.2247666277712954e-06, + "loss": 0.5959, + "step": 1288 + }, + { + "epoch": 0.55, + "learning_rate": 4.221849474912485e-06, + "loss": 0.6903, + "step": 1292 + }, + { + "epoch": 0.55, + "learning_rate": 4.218932322053676e-06, + "loss": 0.7372, + "step": 1296 + }, + { + "epoch": 0.55, + "learning_rate": 4.216015169194866e-06, + "loss": 0.7741, + "step": 1300 + }, + { + "epoch": 0.55, + "learning_rate": 4.213098016336056e-06, + "loss": 0.6861, + "step": 1304 + }, + { + "epoch": 0.56, + "learning_rate": 4.210180863477247e-06, + "loss": 0.7325, + "step": 1308 + }, + { + "epoch": 0.56, + "learning_rate": 4.207263710618437e-06, + "loss": 0.6446, + "step": 1312 + }, + { + "epoch": 0.56, + "learning_rate": 4.204346557759627e-06, + "loss": 0.61, + "step": 1316 + }, + { + "epoch": 0.56, + "learning_rate": 4.201429404900817e-06, + "loss": 0.6286, + "step": 1320 + }, + { + "epoch": 0.56, + "learning_rate": 4.1985122520420074e-06, + "loss": 0.7191, + "step": 1324 + }, + { + "epoch": 0.56, + "learning_rate": 4.195595099183197e-06, + "loss": 0.8621, + "step": 1328 + }, + { + "epoch": 0.57, + "learning_rate": 4.192677946324388e-06, + "loss": 0.7634, + "step": 1332 + }, + { + "epoch": 0.57, + "learning_rate": 4.189760793465578e-06, + "loss": 0.7067, + "step": 1336 + }, + { + "epoch": 0.57, + "learning_rate": 4.186843640606768e-06, + "loss": 0.6049, + "step": 1340 + }, + { + "epoch": 0.57, + "learning_rate": 4.183926487747959e-06, + "loss": 0.3731, + "step": 1344 + }, + { + "epoch": 0.57, + "learning_rate": 4.181009334889149e-06, + "loss": 0.5955, + "step": 1348 + }, + { + "epoch": 0.57, + "learning_rate": 4.178092182030339e-06, + "loss": 0.5576, + "step": 1352 + }, + { + "epoch": 0.58, + "learning_rate": 4.175175029171529e-06, + "loss": 0.6731, + "step": 1356 + }, + { + "epoch": 0.58, + "learning_rate": 4.172257876312719e-06, + "loss": 0.8847, + "step": 1360 + }, + { + "epoch": 0.58, + "learning_rate": 4.169340723453909e-06, + "loss": 0.6158, + "step": 1364 + }, + { + "epoch": 0.58, + "learning_rate": 4.166423570595099e-06, + "loss": 0.7721, + "step": 1368 + }, + { + "epoch": 0.58, + "learning_rate": 4.16350641773629e-06, + "loss": 0.6517, + "step": 1372 + }, + { + "epoch": 0.59, + "learning_rate": 4.16058926487748e-06, + "loss": 0.6014, + "step": 1376 + }, + { + "epoch": 0.59, + "learning_rate": 4.15767211201867e-06, + "loss": 0.8049, + "step": 1380 + }, + { + "epoch": 0.59, + "learning_rate": 4.15475495915986e-06, + "loss": 0.7245, + "step": 1384 + }, + { + "epoch": 0.59, + "learning_rate": 4.151837806301051e-06, + "loss": 0.8411, + "step": 1388 + }, + { + "epoch": 0.59, + "learning_rate": 4.148920653442241e-06, + "loss": 0.6463, + "step": 1392 + }, + { + "epoch": 0.59, + "learning_rate": 4.1460035005834306e-06, + "loss": 0.5053, + "step": 1396 + }, + { + "epoch": 0.6, + "learning_rate": 4.143086347724621e-06, + "loss": 0.4919, + "step": 1400 + }, + { + "epoch": 0.6, + "learning_rate": 4.140169194865811e-06, + "loss": 0.7352, + "step": 1404 + }, + { + "epoch": 0.6, + "learning_rate": 4.137252042007002e-06, + "loss": 0.5577, + "step": 1408 + }, + { + "epoch": 0.6, + "learning_rate": 4.134334889148192e-06, + "loss": 0.7224, + "step": 1412 + }, + { + "epoch": 0.6, + "learning_rate": 4.131417736289382e-06, + "loss": 0.9195, + "step": 1416 + }, + { + "epoch": 0.6, + "learning_rate": 4.128500583430572e-06, + "loss": 0.7652, + "step": 1420 + }, + { + "epoch": 0.61, + "learning_rate": 4.125583430571763e-06, + "loss": 0.756, + "step": 1424 + }, + { + "epoch": 0.61, + "learning_rate": 4.122666277712953e-06, + "loss": 0.7378, + "step": 1428 + }, + { + "epoch": 0.61, + "learning_rate": 4.1197491248541425e-06, + "loss": 0.7264, + "step": 1432 + }, + { + "epoch": 0.61, + "learning_rate": 4.116831971995333e-06, + "loss": 0.7489, + "step": 1436 + }, + { + "epoch": 0.61, + "learning_rate": 4.113914819136523e-06, + "loss": 0.661, + "step": 1440 + }, + { + "epoch": 0.61, + "learning_rate": 4.110997666277713e-06, + "loss": 0.6761, + "step": 1444 + }, + { + "epoch": 0.62, + "learning_rate": 4.108080513418903e-06, + "loss": 0.62, + "step": 1448 + }, + { + "epoch": 0.62, + "learning_rate": 4.105163360560094e-06, + "loss": 0.5477, + "step": 1452 + }, + { + "epoch": 0.62, + "learning_rate": 4.102246207701284e-06, + "loss": 0.7107, + "step": 1456 + }, + { + "epoch": 0.62, + "learning_rate": 4.099329054842474e-06, + "loss": 0.9508, + "step": 1460 + }, + { + "epoch": 0.62, + "learning_rate": 4.096411901983664e-06, + "loss": 0.7668, + "step": 1464 + }, + { + "epoch": 0.62, + "learning_rate": 4.0934947491248545e-06, + "loss": 0.7971, + "step": 1468 + }, + { + "epoch": 0.63, + "learning_rate": 4.0905775962660445e-06, + "loss": 0.7532, + "step": 1472 + }, + { + "epoch": 0.63, + "learning_rate": 4.087660443407234e-06, + "loss": 0.6721, + "step": 1476 + }, + { + "epoch": 0.63, + "learning_rate": 4.084743290548425e-06, + "loss": 0.702, + "step": 1480 + }, + { + "epoch": 0.63, + "learning_rate": 4.081826137689615e-06, + "loss": 0.5931, + "step": 1484 + }, + { + "epoch": 0.63, + "learning_rate": 4.078908984830806e-06, + "loss": 0.8941, + "step": 1488 + }, + { + "epoch": 0.63, + "learning_rate": 4.075991831971996e-06, + "loss": 0.4728, + "step": 1492 + }, + { + "epoch": 0.64, + "learning_rate": 4.073074679113186e-06, + "loss": 0.5366, + "step": 1496 + }, + { + "epoch": 0.64, + "learning_rate": 4.070157526254377e-06, + "loss": 0.6567, + "step": 1500 + }, + { + "epoch": 0.64, + "learning_rate": 4.0672403733955665e-06, + "loss": 0.6594, + "step": 1504 + }, + { + "epoch": 0.64, + "learning_rate": 4.0643232205367565e-06, + "loss": 0.5096, + "step": 1508 + }, + { + "epoch": 0.64, + "learning_rate": 4.061406067677946e-06, + "loss": 0.6852, + "step": 1512 + }, + { + "epoch": 0.64, + "learning_rate": 4.058488914819137e-06, + "loss": 0.7667, + "step": 1516 + }, + { + "epoch": 0.65, + "learning_rate": 4.055571761960327e-06, + "loss": 0.662, + "step": 1520 + }, + { + "epoch": 0.65, + "learning_rate": 4.052654609101517e-06, + "loss": 0.6275, + "step": 1524 + }, + { + "epoch": 0.65, + "learning_rate": 4.049737456242707e-06, + "loss": 0.715, + "step": 1528 + }, + { + "epoch": 0.65, + "learning_rate": 4.046820303383898e-06, + "loss": 0.4791, + "step": 1532 + }, + { + "epoch": 0.65, + "learning_rate": 4.043903150525088e-06, + "loss": 0.6893, + "step": 1536 + }, + { + "epoch": 0.65, + "learning_rate": 4.040985997666278e-06, + "loss": 0.4941, + "step": 1540 + }, + { + "epoch": 0.66, + "learning_rate": 4.0380688448074685e-06, + "loss": 0.5576, + "step": 1544 + }, + { + "epoch": 0.66, + "learning_rate": 4.035151691948658e-06, + "loss": 0.7089, + "step": 1548 + }, + { + "epoch": 0.66, + "learning_rate": 4.032234539089848e-06, + "loss": 0.4944, + "step": 1552 + }, + { + "epoch": 0.66, + "learning_rate": 4.029317386231038e-06, + "loss": 0.4799, + "step": 1556 + }, + { + "epoch": 0.66, + "learning_rate": 4.026400233372229e-06, + "loss": 0.6483, + "step": 1560 + }, + { + "epoch": 0.66, + "learning_rate": 4.023483080513419e-06, + "loss": 0.7882, + "step": 1564 + }, + { + "epoch": 0.67, + "learning_rate": 4.02056592765461e-06, + "loss": 0.6979, + "step": 1568 + }, + { + "epoch": 0.67, + "learning_rate": 4.0176487747958e-06, + "loss": 0.6079, + "step": 1572 + }, + { + "epoch": 0.67, + "learning_rate": 4.01473162193699e-06, + "loss": 0.7046, + "step": 1576 + }, + { + "epoch": 0.67, + "learning_rate": 4.0118144690781804e-06, + "loss": 0.5994, + "step": 1580 + }, + { + "epoch": 0.67, + "learning_rate": 4.00889731621937e-06, + "loss": 0.628, + "step": 1584 + }, + { + "epoch": 0.68, + "learning_rate": 4.00598016336056e-06, + "loss": 0.4581, + "step": 1588 + }, + { + "epoch": 0.68, + "learning_rate": 4.00306301050175e-06, + "loss": 0.6283, + "step": 1592 + }, + { + "epoch": 0.68, + "learning_rate": 4.000145857642941e-06, + "loss": 0.6736, + "step": 1596 + }, + { + "epoch": 0.68, + "learning_rate": 3.997228704784131e-06, + "loss": 0.6118, + "step": 1600 + }, + { + "epoch": 0.68, + "learning_rate": 3.994311551925321e-06, + "loss": 0.8692, + "step": 1604 + }, + { + "epoch": 0.68, + "learning_rate": 3.991394399066512e-06, + "loss": 0.7431, + "step": 1608 + }, + { + "epoch": 0.69, + "learning_rate": 3.988477246207702e-06, + "loss": 0.6712, + "step": 1612 + }, + { + "epoch": 0.69, + "learning_rate": 3.985560093348892e-06, + "loss": 0.5141, + "step": 1616 + }, + { + "epoch": 0.69, + "learning_rate": 3.9826429404900815e-06, + "loss": 0.4037, + "step": 1620 + }, + { + "epoch": 0.69, + "learning_rate": 3.979725787631272e-06, + "loss": 0.5442, + "step": 1624 + }, + { + "epoch": 0.69, + "learning_rate": 3.976808634772462e-06, + "loss": 0.7256, + "step": 1628 + }, + { + "epoch": 0.69, + "learning_rate": 3.973891481913652e-06, + "loss": 0.7292, + "step": 1632 + }, + { + "epoch": 0.7, + "learning_rate": 3.970974329054843e-06, + "loss": 0.7739, + "step": 1636 + }, + { + "epoch": 0.7, + "learning_rate": 3.968057176196033e-06, + "loss": 0.6673, + "step": 1640 + }, + { + "epoch": 0.7, + "learning_rate": 3.965140023337224e-06, + "loss": 0.8099, + "step": 1644 + }, + { + "epoch": 0.7, + "learning_rate": 3.962222870478414e-06, + "loss": 0.5271, + "step": 1648 + }, + { + "epoch": 0.7, + "learning_rate": 3.9593057176196036e-06, + "loss": 0.7032, + "step": 1652 + }, + { + "epoch": 0.7, + "learning_rate": 3.9563885647607935e-06, + "loss": 0.6766, + "step": 1656 + }, + { + "epoch": 0.71, + "learning_rate": 3.953471411901984e-06, + "loss": 0.6286, + "step": 1660 + }, + { + "epoch": 0.71, + "learning_rate": 3.950554259043174e-06, + "loss": 0.6236, + "step": 1664 + }, + { + "epoch": 0.71, + "learning_rate": 3.947637106184364e-06, + "loss": 0.7438, + "step": 1668 + }, + { + "epoch": 0.71, + "learning_rate": 3.944719953325555e-06, + "loss": 0.6499, + "step": 1672 + }, + { + "epoch": 0.71, + "learning_rate": 3.941802800466745e-06, + "loss": 0.7669, + "step": 1676 + }, + { + "epoch": 0.71, + "learning_rate": 3.938885647607935e-06, + "loss": 0.4944, + "step": 1680 + }, + { + "epoch": 0.72, + "learning_rate": 3.935968494749125e-06, + "loss": 0.4765, + "step": 1684 + }, + { + "epoch": 0.72, + "learning_rate": 3.9330513418903156e-06, + "loss": 0.7785, + "step": 1688 + }, + { + "epoch": 0.72, + "learning_rate": 3.9301341890315055e-06, + "loss": 0.6521, + "step": 1692 + }, + { + "epoch": 0.72, + "learning_rate": 3.9272170361726954e-06, + "loss": 0.6744, + "step": 1696 + }, + { + "epoch": 0.72, + "learning_rate": 3.924299883313886e-06, + "loss": 0.6819, + "step": 1700 + }, + { + "epoch": 0.72, + "learning_rate": 3.921382730455076e-06, + "loss": 0.6566, + "step": 1704 + }, + { + "epoch": 0.73, + "learning_rate": 3.918465577596267e-06, + "loss": 0.7201, + "step": 1708 + }, + { + "epoch": 0.73, + "learning_rate": 3.915548424737456e-06, + "loss": 0.373, + "step": 1712 + }, + { + "epoch": 0.73, + "learning_rate": 3.912631271878647e-06, + "loss": 0.8481, + "step": 1716 + }, + { + "epoch": 0.73, + "learning_rate": 3.909714119019837e-06, + "loss": 0.6479, + "step": 1720 + }, + { + "epoch": 0.73, + "learning_rate": 3.9067969661610276e-06, + "loss": 0.5682, + "step": 1724 + }, + { + "epoch": 0.73, + "learning_rate": 3.9038798133022175e-06, + "loss": 0.651, + "step": 1728 + }, + { + "epoch": 0.74, + "learning_rate": 3.9009626604434074e-06, + "loss": 0.637, + "step": 1732 + }, + { + "epoch": 0.74, + "learning_rate": 3.898045507584598e-06, + "loss": 0.5119, + "step": 1736 + }, + { + "epoch": 0.74, + "learning_rate": 3.895128354725788e-06, + "loss": 0.5434, + "step": 1740 + }, + { + "epoch": 0.74, + "learning_rate": 3.892211201866978e-06, + "loss": 0.7295, + "step": 1744 + }, + { + "epoch": 0.74, + "learning_rate": 3.889294049008168e-06, + "loss": 0.7214, + "step": 1748 + }, + { + "epoch": 0.74, + "learning_rate": 3.886376896149359e-06, + "loss": 0.5946, + "step": 1752 + }, + { + "epoch": 0.75, + "learning_rate": 3.883459743290549e-06, + "loss": 0.6668, + "step": 1756 + }, + { + "epoch": 0.75, + "learning_rate": 3.880542590431739e-06, + "loss": 0.6054, + "step": 1760 + }, + { + "epoch": 0.75, + "learning_rate": 3.877625437572929e-06, + "loss": 0.6904, + "step": 1764 + }, + { + "epoch": 0.75, + "learning_rate": 3.874708284714119e-06, + "loss": 0.7803, + "step": 1768 + }, + { + "epoch": 0.75, + "learning_rate": 3.871791131855309e-06, + "loss": 0.6766, + "step": 1772 + }, + { + "epoch": 0.76, + "learning_rate": 3.868873978996499e-06, + "loss": 0.6957, + "step": 1776 + }, + { + "epoch": 0.76, + "learning_rate": 3.86595682613769e-06, + "loss": 0.8129, + "step": 1780 + }, + { + "epoch": 0.76, + "learning_rate": 3.86303967327888e-06, + "loss": 0.7652, + "step": 1784 + }, + { + "epoch": 0.76, + "learning_rate": 3.860122520420071e-06, + "loss": 0.4505, + "step": 1788 + }, + { + "epoch": 0.76, + "learning_rate": 3.85720536756126e-06, + "loss": 0.6318, + "step": 1792 + }, + { + "epoch": 0.76, + "learning_rate": 3.854288214702451e-06, + "loss": 0.7018, + "step": 1796 + }, + { + "epoch": 0.77, + "learning_rate": 3.8513710618436415e-06, + "loss": 0.8348, + "step": 1800 + }, + { + "epoch": 0.77, + "learning_rate": 3.848453908984831e-06, + "loss": 0.5523, + "step": 1804 + }, + { + "epoch": 0.77, + "learning_rate": 3.845536756126021e-06, + "loss": 0.5858, + "step": 1808 + }, + { + "epoch": 0.77, + "learning_rate": 3.842619603267211e-06, + "loss": 0.3905, + "step": 1812 + }, + { + "epoch": 0.77, + "learning_rate": 3.839702450408402e-06, + "loss": 0.7015, + "step": 1816 + }, + { + "epoch": 0.77, + "learning_rate": 3.836785297549592e-06, + "loss": 0.4228, + "step": 1820 + }, + { + "epoch": 0.78, + "learning_rate": 3.833868144690782e-06, + "loss": 0.7709, + "step": 1824 + }, + { + "epoch": 0.78, + "learning_rate": 3.830950991831972e-06, + "loss": 0.5833, + "step": 1828 + }, + { + "epoch": 0.78, + "learning_rate": 3.828033838973163e-06, + "loss": 0.6342, + "step": 1832 + }, + { + "epoch": 0.78, + "learning_rate": 3.825116686114353e-06, + "loss": 0.843, + "step": 1836 + }, + { + "epoch": 0.78, + "learning_rate": 3.8221995332555425e-06, + "loss": 0.5876, + "step": 1840 + }, + { + "epoch": 0.78, + "learning_rate": 3.819282380396733e-06, + "loss": 0.5432, + "step": 1844 + }, + { + "epoch": 0.79, + "learning_rate": 3.816365227537923e-06, + "loss": 0.7204, + "step": 1848 + }, + { + "epoch": 0.79, + "learning_rate": 3.8134480746791136e-06, + "loss": 0.4466, + "step": 1852 + }, + { + "epoch": 0.79, + "learning_rate": 3.8105309218203036e-06, + "loss": 0.623, + "step": 1856 + }, + { + "epoch": 0.79, + "learning_rate": 3.807613768961494e-06, + "loss": 0.3623, + "step": 1860 + }, + { + "epoch": 0.79, + "learning_rate": 3.804696616102684e-06, + "loss": 0.6491, + "step": 1864 + }, + { + "epoch": 0.79, + "learning_rate": 3.8017794632438742e-06, + "loss": 0.6469, + "step": 1868 + }, + { + "epoch": 0.8, + "learning_rate": 3.7988623103850646e-06, + "loss": 0.5412, + "step": 1872 + }, + { + "epoch": 0.8, + "learning_rate": 3.7959451575262545e-06, + "loss": 0.995, + "step": 1876 + }, + { + "epoch": 0.8, + "learning_rate": 3.793028004667445e-06, + "loss": 0.6152, + "step": 1880 + }, + { + "epoch": 0.8, + "learning_rate": 3.790110851808635e-06, + "loss": 0.7216, + "step": 1884 + }, + { + "epoch": 0.8, + "learning_rate": 3.787193698949825e-06, + "loss": 0.5137, + "step": 1888 + }, + { + "epoch": 0.8, + "learning_rate": 3.784276546091015e-06, + "loss": 0.6703, + "step": 1892 + }, + { + "epoch": 0.81, + "learning_rate": 3.7813593932322055e-06, + "loss": 0.5394, + "step": 1896 + }, + { + "epoch": 0.81, + "learning_rate": 3.7784422403733963e-06, + "loss": 0.6228, + "step": 1900 + }, + { + "epoch": 0.81, + "learning_rate": 3.7755250875145862e-06, + "loss": 0.6231, + "step": 1904 + }, + { + "epoch": 0.81, + "learning_rate": 3.7726079346557766e-06, + "loss": 0.6658, + "step": 1908 + }, + { + "epoch": 0.81, + "learning_rate": 3.7696907817969665e-06, + "loss": 0.5279, + "step": 1912 + }, + { + "epoch": 0.81, + "learning_rate": 3.766773628938157e-06, + "loss": 0.4709, + "step": 1916 + }, + { + "epoch": 0.82, + "learning_rate": 3.763856476079347e-06, + "loss": 0.6442, + "step": 1920 + }, + { + "epoch": 0.82, + "learning_rate": 3.760939323220537e-06, + "loss": 0.6787, + "step": 1924 + }, + { + "epoch": 0.82, + "learning_rate": 3.758022170361727e-06, + "loss": 0.6453, + "step": 1928 + }, + { + "epoch": 0.82, + "learning_rate": 3.7551050175029175e-06, + "loss": 0.7487, + "step": 1932 + }, + { + "epoch": 0.82, + "learning_rate": 3.752187864644108e-06, + "loss": 0.6078, + "step": 1936 + }, + { + "epoch": 0.82, + "learning_rate": 3.749270711785298e-06, + "loss": 0.5252, + "step": 1940 + }, + { + "epoch": 0.83, + "learning_rate": 3.746353558926488e-06, + "loss": 0.4936, + "step": 1944 + }, + { + "epoch": 0.83, + "learning_rate": 3.743436406067678e-06, + "loss": 0.4545, + "step": 1948 + }, + { + "epoch": 0.83, + "learning_rate": 3.7405192532088685e-06, + "loss": 0.7937, + "step": 1952 + }, + { + "epoch": 0.83, + "learning_rate": 3.7376021003500584e-06, + "loss": 0.8007, + "step": 1956 + }, + { + "epoch": 0.83, + "learning_rate": 3.7346849474912488e-06, + "loss": 0.4401, + "step": 1960 + }, + { + "epoch": 0.84, + "learning_rate": 3.731767794632439e-06, + "loss": 0.8051, + "step": 1964 + }, + { + "epoch": 0.84, + "learning_rate": 3.728850641773629e-06, + "loss": 0.7178, + "step": 1968 + }, + { + "epoch": 0.84, + "learning_rate": 3.72593348891482e-06, + "loss": 0.5673, + "step": 1972 + }, + { + "epoch": 0.84, + "learning_rate": 3.7230163360560094e-06, + "loss": 0.8238, + "step": 1976 + }, + { + "epoch": 0.84, + "learning_rate": 3.7200991831972e-06, + "loss": 0.646, + "step": 1980 + }, + { + "epoch": 0.84, + "learning_rate": 3.7171820303383897e-06, + "loss": 0.5166, + "step": 1984 + }, + { + "epoch": 0.85, + "learning_rate": 3.7142648774795804e-06, + "loss": 0.5725, + "step": 1988 + }, + { + "epoch": 0.85, + "learning_rate": 3.7113477246207704e-06, + "loss": 0.8298, + "step": 1992 + }, + { + "epoch": 0.85, + "learning_rate": 3.7084305717619607e-06, + "loss": 0.5858, + "step": 1996 + }, + { + "epoch": 0.85, + "learning_rate": 3.705513418903151e-06, + "loss": 0.5432, + "step": 2000 + }, + { + "epoch": 0.85, + "learning_rate": 3.702596266044341e-06, + "loss": 0.5454, + "step": 2004 + }, + { + "epoch": 0.85, + "learning_rate": 3.6996791131855314e-06, + "loss": 0.6, + "step": 2008 + }, + { + "epoch": 0.86, + "learning_rate": 3.6967619603267213e-06, + "loss": 0.5889, + "step": 2012 + }, + { + "epoch": 0.86, + "learning_rate": 3.6938448074679117e-06, + "loss": 0.7542, + "step": 2016 + }, + { + "epoch": 0.86, + "learning_rate": 3.6909276546091016e-06, + "loss": 0.7573, + "step": 2020 + }, + { + "epoch": 0.86, + "learning_rate": 3.688010501750292e-06, + "loss": 0.7429, + "step": 2024 + }, + { + "epoch": 0.86, + "learning_rate": 3.685093348891482e-06, + "loss": 0.6015, + "step": 2028 + }, + { + "epoch": 0.86, + "learning_rate": 3.6821761960326723e-06, + "loss": 0.6009, + "step": 2032 + }, + { + "epoch": 0.87, + "learning_rate": 3.6792590431738627e-06, + "loss": 0.4447, + "step": 2036 + }, + { + "epoch": 0.87, + "learning_rate": 3.6763418903150526e-06, + "loss": 0.667, + "step": 2040 + }, + { + "epoch": 0.87, + "learning_rate": 3.673424737456243e-06, + "loss": 0.594, + "step": 2044 + }, + { + "epoch": 0.87, + "learning_rate": 3.670507584597433e-06, + "loss": 0.6368, + "step": 2048 + }, + { + "epoch": 0.87, + "learning_rate": 3.6675904317386237e-06, + "loss": 0.5883, + "step": 2052 + }, + { + "epoch": 0.87, + "learning_rate": 3.6646732788798132e-06, + "loss": 0.7004, + "step": 2056 + }, + { + "epoch": 0.88, + "learning_rate": 3.661756126021004e-06, + "loss": 0.6169, + "step": 2060 + }, + { + "epoch": 0.88, + "learning_rate": 3.6588389731621944e-06, + "loss": 0.6757, + "step": 2064 + }, + { + "epoch": 0.88, + "learning_rate": 3.6559218203033843e-06, + "loss": 0.6134, + "step": 2068 + }, + { + "epoch": 0.88, + "learning_rate": 3.6530046674445747e-06, + "loss": 0.6381, + "step": 2072 + }, + { + "epoch": 0.88, + "learning_rate": 3.6500875145857646e-06, + "loss": 0.715, + "step": 2076 + }, + { + "epoch": 0.88, + "learning_rate": 3.647170361726955e-06, + "loss": 0.6319, + "step": 2080 + }, + { + "epoch": 0.89, + "learning_rate": 3.644253208868145e-06, + "loss": 0.4187, + "step": 2084 + }, + { + "epoch": 0.89, + "learning_rate": 3.6413360560093353e-06, + "loss": 0.5733, + "step": 2088 + }, + { + "epoch": 0.89, + "learning_rate": 3.638418903150525e-06, + "loss": 0.6943, + "step": 2092 + }, + { + "epoch": 0.89, + "learning_rate": 3.6355017502917156e-06, + "loss": 0.4808, + "step": 2096 + }, + { + "epoch": 0.89, + "learning_rate": 3.632584597432906e-06, + "loss": 0.4813, + "step": 2100 + }, + { + "epoch": 0.89, + "learning_rate": 3.629667444574096e-06, + "loss": 0.5777, + "step": 2104 + }, + { + "epoch": 0.9, + "learning_rate": 3.6267502917152862e-06, + "loss": 0.5967, + "step": 2108 + }, + { + "epoch": 0.9, + "learning_rate": 3.623833138856476e-06, + "loss": 0.5644, + "step": 2112 + }, + { + "epoch": 0.9, + "learning_rate": 3.6209159859976665e-06, + "loss": 0.6584, + "step": 2116 + }, + { + "epoch": 0.9, + "learning_rate": 3.6179988331388565e-06, + "loss": 0.5162, + "step": 2120 + }, + { + "epoch": 0.9, + "learning_rate": 3.615081680280047e-06, + "loss": 0.6585, + "step": 2124 + }, + { + "epoch": 0.9, + "learning_rate": 3.6121645274212368e-06, + "loss": 0.7716, + "step": 2128 + }, + { + "epoch": 0.91, + "learning_rate": 3.6092473745624276e-06, + "loss": 0.5249, + "step": 2132 + }, + { + "epoch": 0.91, + "learning_rate": 3.606330221703618e-06, + "loss": 0.722, + "step": 2136 + }, + { + "epoch": 0.91, + "learning_rate": 3.603413068844808e-06, + "loss": 0.6634, + "step": 2140 + }, + { + "epoch": 0.91, + "learning_rate": 3.6004959159859982e-06, + "loss": 0.6547, + "step": 2144 + }, + { + "epoch": 0.91, + "learning_rate": 3.597578763127188e-06, + "loss": 0.7804, + "step": 2148 + }, + { + "epoch": 0.91, + "learning_rate": 3.5946616102683785e-06, + "loss": 0.5373, + "step": 2152 + }, + { + "epoch": 0.92, + "learning_rate": 3.5917444574095685e-06, + "loss": 0.6122, + "step": 2156 + }, + { + "epoch": 0.92, + "learning_rate": 3.588827304550759e-06, + "loss": 0.5015, + "step": 2160 + }, + { + "epoch": 0.92, + "learning_rate": 3.585910151691949e-06, + "loss": 0.6872, + "step": 2164 + }, + { + "epoch": 0.92, + "learning_rate": 3.582992998833139e-06, + "loss": 0.3367, + "step": 2168 + }, + { + "epoch": 0.92, + "learning_rate": 3.5800758459743295e-06, + "loss": 0.4729, + "step": 2172 + }, + { + "epoch": 0.93, + "learning_rate": 3.5771586931155194e-06, + "loss": 0.7766, + "step": 2176 + }, + { + "epoch": 0.93, + "learning_rate": 3.5742415402567098e-06, + "loss": 0.7324, + "step": 2180 + }, + { + "epoch": 0.93, + "learning_rate": 3.5713243873978997e-06, + "loss": 0.5244, + "step": 2184 + }, + { + "epoch": 0.93, + "learning_rate": 3.56840723453909e-06, + "loss": 0.6321, + "step": 2188 + }, + { + "epoch": 0.93, + "learning_rate": 3.56549008168028e-06, + "loss": 0.6109, + "step": 2192 + }, + { + "epoch": 0.93, + "learning_rate": 3.5625729288214704e-06, + "loss": 0.5534, + "step": 2196 + }, + { + "epoch": 0.94, + "learning_rate": 3.559655775962661e-06, + "loss": 0.5453, + "step": 2200 + }, + { + "epoch": 0.94, + "learning_rate": 3.5567386231038507e-06, + "loss": 0.4379, + "step": 2204 + }, + { + "epoch": 0.94, + "learning_rate": 3.5538214702450415e-06, + "loss": 0.5268, + "step": 2208 + }, + { + "epoch": 0.94, + "learning_rate": 3.5509043173862314e-06, + "loss": 0.7081, + "step": 2212 + }, + { + "epoch": 0.94, + "learning_rate": 3.5479871645274218e-06, + "loss": 0.5149, + "step": 2216 + }, + { + "epoch": 0.94, + "learning_rate": 3.5450700116686117e-06, + "loss": 0.4048, + "step": 2220 + }, + { + "epoch": 0.95, + "learning_rate": 3.542152858809802e-06, + "loss": 0.7552, + "step": 2224 + }, + { + "epoch": 0.95, + "learning_rate": 3.539235705950992e-06, + "loss": 0.4624, + "step": 2228 + }, + { + "epoch": 0.95, + "learning_rate": 3.5363185530921824e-06, + "loss": 0.5872, + "step": 2232 + }, + { + "epoch": 0.95, + "learning_rate": 3.5334014002333727e-06, + "loss": 0.5265, + "step": 2236 + }, + { + "epoch": 0.95, + "learning_rate": 3.5304842473745627e-06, + "loss": 0.4802, + "step": 2240 + }, + { + "epoch": 0.95, + "learning_rate": 3.527567094515753e-06, + "loss": 0.6172, + "step": 2244 + }, + { + "epoch": 0.96, + "learning_rate": 3.524649941656943e-06, + "loss": 0.5788, + "step": 2248 + }, + { + "epoch": 0.96, + "learning_rate": 3.5217327887981333e-06, + "loss": 0.5426, + "step": 2252 + }, + { + "epoch": 0.96, + "learning_rate": 3.5188156359393233e-06, + "loss": 0.6427, + "step": 2256 + }, + { + "epoch": 0.96, + "learning_rate": 3.5158984830805136e-06, + "loss": 0.6474, + "step": 2260 + }, + { + "epoch": 0.96, + "learning_rate": 3.512981330221704e-06, + "loss": 0.5518, + "step": 2264 + }, + { + "epoch": 0.96, + "learning_rate": 3.510064177362894e-06, + "loss": 0.6028, + "step": 2268 + }, + { + "epoch": 0.97, + "learning_rate": 3.5071470245040843e-06, + "loss": 0.4178, + "step": 2272 + }, + { + "epoch": 0.97, + "learning_rate": 3.5042298716452742e-06, + "loss": 0.761, + "step": 2276 + }, + { + "epoch": 0.97, + "learning_rate": 3.501312718786465e-06, + "loss": 0.7076, + "step": 2280 + }, + { + "epoch": 0.97, + "learning_rate": 3.4983955659276545e-06, + "loss": 0.5466, + "step": 2284 + }, + { + "epoch": 0.97, + "learning_rate": 3.4954784130688453e-06, + "loss": 0.4539, + "step": 2288 + }, + { + "epoch": 0.97, + "learning_rate": 3.4925612602100353e-06, + "loss": 0.6436, + "step": 2292 + }, + { + "epoch": 0.98, + "learning_rate": 3.4896441073512256e-06, + "loss": 0.471, + "step": 2296 + }, + { + "epoch": 0.98, + "learning_rate": 3.486726954492416e-06, + "loss": 0.6848, + "step": 2300 + }, + { + "epoch": 0.98, + "learning_rate": 3.483809801633606e-06, + "loss": 0.6043, + "step": 2304 + }, + { + "epoch": 0.98, + "learning_rate": 3.4808926487747963e-06, + "loss": 0.6458, + "step": 2308 + }, + { + "epoch": 0.98, + "learning_rate": 3.4779754959159862e-06, + "loss": 0.6336, + "step": 2312 + }, + { + "epoch": 0.98, + "learning_rate": 3.4750583430571766e-06, + "loss": 0.5843, + "step": 2316 + }, + { + "epoch": 0.99, + "learning_rate": 3.4721411901983665e-06, + "loss": 0.4364, + "step": 2320 + }, + { + "epoch": 0.99, + "learning_rate": 3.469224037339557e-06, + "loss": 0.5796, + "step": 2324 + }, + { + "epoch": 0.99, + "learning_rate": 3.466306884480747e-06, + "loss": 0.5754, + "step": 2328 + }, + { + "epoch": 0.99, + "learning_rate": 3.463389731621937e-06, + "loss": 0.6848, + "step": 2332 + }, + { + "epoch": 0.99, + "learning_rate": 3.4604725787631276e-06, + "loss": 0.6489, + "step": 2336 + }, + { + "epoch": 0.99, + "learning_rate": 3.4575554259043175e-06, + "loss": 0.6255, + "step": 2340 + }, + { + "epoch": 1.0, + "learning_rate": 3.454638273045508e-06, + "loss": 0.4827, + "step": 2344 + }, + { + "epoch": 1.0, + "learning_rate": 3.451721120186698e-06, + "loss": 0.5178, + "step": 2348 + }, + { + "epoch": 1.0, + "learning_rate": 3.448803967327888e-06, + "loss": 0.5294, + "step": 2352 + }, + { + "epoch": 1.0, + "learning_rate": 3.445886814469078e-06, + "loss": 0.4968, + "step": 2356 + }, + { + "epoch": 1.0, + "learning_rate": 3.442969661610269e-06, + "loss": 0.7301, + "step": 2360 + }, + { + "epoch": 1.01, + "learning_rate": 3.4400525087514592e-06, + "loss": 0.5503, + "step": 2364 + }, + { + "epoch": 1.01, + "learning_rate": 3.437135355892649e-06, + "loss": 0.4724, + "step": 2368 + }, + { + "epoch": 1.01, + "learning_rate": 3.4342182030338395e-06, + "loss": 0.5471, + "step": 2372 + }, + { + "epoch": 1.01, + "learning_rate": 3.4313010501750295e-06, + "loss": 0.5414, + "step": 2376 + }, + { + "epoch": 1.01, + "learning_rate": 3.42838389731622e-06, + "loss": 0.4228, + "step": 2380 + }, + { + "epoch": 1.01, + "learning_rate": 3.4254667444574098e-06, + "loss": 0.6098, + "step": 2384 + }, + { + "epoch": 1.02, + "learning_rate": 3.4225495915986e-06, + "loss": 0.5935, + "step": 2388 + }, + { + "epoch": 1.02, + "learning_rate": 3.41963243873979e-06, + "loss": 0.5567, + "step": 2392 + }, + { + "epoch": 1.02, + "learning_rate": 3.4167152858809804e-06, + "loss": 0.4576, + "step": 2396 + }, + { + "epoch": 1.02, + "learning_rate": 3.413798133022171e-06, + "loss": 0.5125, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 3.4108809801633607e-06, + "loss": 0.4188, + "step": 2404 + }, + { + "epoch": 1.02, + "learning_rate": 3.407963827304551e-06, + "loss": 0.6133, + "step": 2408 + }, + { + "epoch": 1.03, + "learning_rate": 3.405046674445741e-06, + "loss": 0.5203, + "step": 2412 + }, + { + "epoch": 1.03, + "learning_rate": 3.4021295215869314e-06, + "loss": 0.5936, + "step": 2416 + }, + { + "epoch": 1.03, + "learning_rate": 3.3992123687281213e-06, + "loss": 0.5783, + "step": 2420 + }, + { + "epoch": 1.03, + "learning_rate": 3.3962952158693117e-06, + "loss": 0.5501, + "step": 2424 + }, + { + "epoch": 1.03, + "learning_rate": 3.3933780630105017e-06, + "loss": 0.6859, + "step": 2428 + }, + { + "epoch": 1.03, + "learning_rate": 3.390460910151692e-06, + "loss": 0.5361, + "step": 2432 + }, + { + "epoch": 1.04, + "learning_rate": 3.387543757292883e-06, + "loss": 0.3481, + "step": 2436 + }, + { + "epoch": 1.04, + "learning_rate": 3.3846266044340727e-06, + "loss": 0.5475, + "step": 2440 + }, + { + "epoch": 1.04, + "learning_rate": 3.381709451575263e-06, + "loss": 0.4606, + "step": 2444 + }, + { + "epoch": 1.04, + "learning_rate": 3.378792298716453e-06, + "loss": 0.3753, + "step": 2448 + }, + { + "epoch": 1.04, + "learning_rate": 3.3758751458576434e-06, + "loss": 0.5286, + "step": 2452 + }, + { + "epoch": 1.04, + "learning_rate": 3.3729579929988333e-06, + "loss": 0.5214, + "step": 2456 + }, + { + "epoch": 1.05, + "learning_rate": 3.3700408401400237e-06, + "loss": 0.4971, + "step": 2460 + }, + { + "epoch": 1.05, + "learning_rate": 3.367123687281214e-06, + "loss": 0.5731, + "step": 2464 + }, + { + "epoch": 1.05, + "learning_rate": 3.364206534422404e-06, + "loss": 0.6563, + "step": 2468 + }, + { + "epoch": 1.05, + "learning_rate": 3.3612893815635944e-06, + "loss": 0.3885, + "step": 2472 + }, + { + "epoch": 1.05, + "learning_rate": 3.3583722287047843e-06, + "loss": 0.7304, + "step": 2476 + }, + { + "epoch": 1.05, + "learning_rate": 3.3554550758459747e-06, + "loss": 0.6601, + "step": 2480 + }, + { + "epoch": 1.06, + "learning_rate": 3.3525379229871646e-06, + "loss": 0.4629, + "step": 2484 + }, + { + "epoch": 1.06, + "learning_rate": 3.349620770128355e-06, + "loss": 0.5769, + "step": 2488 + }, + { + "epoch": 1.06, + "learning_rate": 3.346703617269545e-06, + "loss": 0.5616, + "step": 2492 + }, + { + "epoch": 1.06, + "learning_rate": 3.3437864644107353e-06, + "loss": 0.4976, + "step": 2496 + }, + { + "epoch": 1.06, + "learning_rate": 3.340869311551926e-06, + "loss": 0.6104, + "step": 2500 + }, + { + "epoch": 1.06, + "learning_rate": 3.3379521586931156e-06, + "loss": 0.4875, + "step": 2504 + }, + { + "epoch": 1.07, + "learning_rate": 3.3350350058343064e-06, + "loss": 0.3093, + "step": 2508 + }, + { + "epoch": 1.07, + "learning_rate": 3.332117852975496e-06, + "loss": 0.3887, + "step": 2512 + }, + { + "epoch": 1.07, + "learning_rate": 3.3292007001166867e-06, + "loss": 0.6688, + "step": 2516 + }, + { + "epoch": 1.07, + "learning_rate": 3.3262835472578766e-06, + "loss": 0.479, + "step": 2520 + }, + { + "epoch": 1.07, + "learning_rate": 3.323366394399067e-06, + "loss": 0.616, + "step": 2524 + }, + { + "epoch": 1.07, + "learning_rate": 3.320449241540257e-06, + "loss": 0.8513, + "step": 2528 + }, + { + "epoch": 1.08, + "learning_rate": 3.3175320886814473e-06, + "loss": 0.4224, + "step": 2532 + }, + { + "epoch": 1.08, + "learning_rate": 3.3146149358226376e-06, + "loss": 0.4577, + "step": 2536 + }, + { + "epoch": 1.08, + "learning_rate": 3.3116977829638276e-06, + "loss": 0.4286, + "step": 2540 + }, + { + "epoch": 1.08, + "learning_rate": 3.308780630105018e-06, + "loss": 0.5298, + "step": 2544 + }, + { + "epoch": 1.08, + "learning_rate": 3.305863477246208e-06, + "loss": 0.4531, + "step": 2548 + }, + { + "epoch": 1.09, + "learning_rate": 3.3029463243873982e-06, + "loss": 0.561, + "step": 2552 + }, + { + "epoch": 1.09, + "learning_rate": 3.300029171528588e-06, + "loss": 0.4931, + "step": 2556 + }, + { + "epoch": 1.09, + "learning_rate": 3.2971120186697785e-06, + "loss": 0.6341, + "step": 2560 + }, + { + "epoch": 1.09, + "learning_rate": 3.294194865810969e-06, + "loss": 0.5096, + "step": 2564 + }, + { + "epoch": 1.09, + "learning_rate": 3.291277712952159e-06, + "loss": 0.3863, + "step": 2568 + }, + { + "epoch": 1.09, + "learning_rate": 3.288360560093349e-06, + "loss": 0.6633, + "step": 2572 + }, + { + "epoch": 1.1, + "learning_rate": 3.285443407234539e-06, + "loss": 0.7305, + "step": 2576 + }, + { + "epoch": 1.1, + "learning_rate": 3.2825262543757295e-06, + "loss": 0.627, + "step": 2580 + }, + { + "epoch": 1.1, + "learning_rate": 3.2796091015169194e-06, + "loss": 0.4865, + "step": 2584 + }, + { + "epoch": 1.1, + "learning_rate": 3.27669194865811e-06, + "loss": 0.507, + "step": 2588 + }, + { + "epoch": 1.1, + "learning_rate": 3.2737747957992997e-06, + "loss": 0.5226, + "step": 2592 + }, + { + "epoch": 1.1, + "learning_rate": 3.2708576429404905e-06, + "loss": 0.5523, + "step": 2596 + }, + { + "epoch": 1.11, + "learning_rate": 3.267940490081681e-06, + "loss": 0.5195, + "step": 2600 + }, + { + "epoch": 1.11, + "learning_rate": 3.265023337222871e-06, + "loss": 0.3034, + "step": 2604 + }, + { + "epoch": 1.11, + "learning_rate": 3.262106184364061e-06, + "loss": 0.7056, + "step": 2608 + }, + { + "epoch": 1.11, + "learning_rate": 3.259189031505251e-06, + "loss": 0.6256, + "step": 2612 + }, + { + "epoch": 1.11, + "learning_rate": 3.2562718786464415e-06, + "loss": 0.4434, + "step": 2616 + }, + { + "epoch": 1.11, + "learning_rate": 3.2533547257876314e-06, + "loss": 0.3871, + "step": 2620 + }, + { + "epoch": 1.12, + "learning_rate": 3.2504375729288218e-06, + "loss": 0.6902, + "step": 2624 + }, + { + "epoch": 1.12, + "learning_rate": 3.2475204200700117e-06, + "loss": 0.3734, + "step": 2628 + }, + { + "epoch": 1.12, + "learning_rate": 3.244603267211202e-06, + "loss": 0.4771, + "step": 2632 + }, + { + "epoch": 1.12, + "learning_rate": 3.2416861143523924e-06, + "loss": 0.5457, + "step": 2636 + }, + { + "epoch": 1.12, + "learning_rate": 3.2387689614935824e-06, + "loss": 0.5787, + "step": 2640 + }, + { + "epoch": 1.12, + "learning_rate": 3.2358518086347727e-06, + "loss": 0.7326, + "step": 2644 + }, + { + "epoch": 1.13, + "learning_rate": 3.2329346557759627e-06, + "loss": 0.4606, + "step": 2648 + }, + { + "epoch": 1.13, + "learning_rate": 3.230017502917153e-06, + "loss": 0.5832, + "step": 2652 + }, + { + "epoch": 1.13, + "learning_rate": 3.227100350058343e-06, + "loss": 0.4674, + "step": 2656 + }, + { + "epoch": 1.13, + "learning_rate": 3.2241831971995333e-06, + "loss": 0.3263, + "step": 2660 + }, + { + "epoch": 1.13, + "learning_rate": 3.221266044340724e-06, + "loss": 0.433, + "step": 2664 + }, + { + "epoch": 1.13, + "learning_rate": 3.218348891481914e-06, + "loss": 0.562, + "step": 2668 + }, + { + "epoch": 1.14, + "learning_rate": 3.2154317386231044e-06, + "loss": 0.5465, + "step": 2672 + }, + { + "epoch": 1.14, + "learning_rate": 3.2125145857642944e-06, + "loss": 0.5806, + "step": 2676 + }, + { + "epoch": 1.14, + "learning_rate": 3.2095974329054847e-06, + "loss": 0.5941, + "step": 2680 + }, + { + "epoch": 1.14, + "learning_rate": 3.2066802800466747e-06, + "loss": 0.5672, + "step": 2684 + }, + { + "epoch": 1.14, + "learning_rate": 3.203763127187865e-06, + "loss": 0.4044, + "step": 2688 + }, + { + "epoch": 1.14, + "learning_rate": 3.200845974329055e-06, + "loss": 0.6372, + "step": 2692 + }, + { + "epoch": 1.15, + "learning_rate": 3.1979288214702453e-06, + "loss": 0.5306, + "step": 2696 + }, + { + "epoch": 1.15, + "learning_rate": 3.1950116686114357e-06, + "loss": 0.6751, + "step": 2700 + }, + { + "epoch": 1.15, + "learning_rate": 3.1920945157526256e-06, + "loss": 0.3999, + "step": 2704 + }, + { + "epoch": 1.15, + "learning_rate": 3.189177362893816e-06, + "loss": 0.6431, + "step": 2708 + }, + { + "epoch": 1.15, + "learning_rate": 3.186260210035006e-06, + "loss": 0.7637, + "step": 2712 + }, + { + "epoch": 1.15, + "learning_rate": 3.1833430571761963e-06, + "loss": 0.4698, + "step": 2716 + }, + { + "epoch": 1.16, + "learning_rate": 3.1804259043173862e-06, + "loss": 0.5558, + "step": 2720 + }, + { + "epoch": 1.16, + "learning_rate": 3.1775087514585766e-06, + "loss": 0.5202, + "step": 2724 + }, + { + "epoch": 1.16, + "learning_rate": 3.1745915985997665e-06, + "loss": 0.4455, + "step": 2728 + }, + { + "epoch": 1.16, + "learning_rate": 3.171674445740957e-06, + "loss": 0.6568, + "step": 2732 + }, + { + "epoch": 1.16, + "learning_rate": 3.1687572928821477e-06, + "loss": 0.6879, + "step": 2736 + }, + { + "epoch": 1.16, + "learning_rate": 3.165840140023337e-06, + "loss": 0.474, + "step": 2740 + }, + { + "epoch": 1.17, + "learning_rate": 3.162922987164528e-06, + "loss": 0.5385, + "step": 2744 + }, + { + "epoch": 1.17, + "learning_rate": 3.160005834305718e-06, + "loss": 0.6955, + "step": 2748 + }, + { + "epoch": 1.17, + "learning_rate": 3.1570886814469083e-06, + "loss": 0.501, + "step": 2752 + }, + { + "epoch": 1.17, + "learning_rate": 3.1541715285880982e-06, + "loss": 0.5093, + "step": 2756 + }, + { + "epoch": 1.17, + "learning_rate": 3.1512543757292886e-06, + "loss": 0.3698, + "step": 2760 + }, + { + "epoch": 1.18, + "learning_rate": 3.148337222870479e-06, + "loss": 0.4271, + "step": 2764 + }, + { + "epoch": 1.18, + "learning_rate": 3.145420070011669e-06, + "loss": 0.5814, + "step": 2768 + }, + { + "epoch": 1.18, + "learning_rate": 3.1425029171528592e-06, + "loss": 0.3312, + "step": 2772 + }, + { + "epoch": 1.18, + "learning_rate": 3.139585764294049e-06, + "loss": 0.5727, + "step": 2776 + }, + { + "epoch": 1.18, + "learning_rate": 3.1366686114352395e-06, + "loss": 0.6723, + "step": 2780 + }, + { + "epoch": 1.18, + "learning_rate": 3.1337514585764295e-06, + "loss": 0.5772, + "step": 2784 + }, + { + "epoch": 1.19, + "learning_rate": 3.13083430571762e-06, + "loss": 0.2121, + "step": 2788 + }, + { + "epoch": 1.19, + "learning_rate": 3.1279171528588098e-06, + "loss": 0.5148, + "step": 2792 + }, + { + "epoch": 1.19, + "learning_rate": 3.125e-06, + "loss": 0.402, + "step": 2796 + }, + { + "epoch": 1.19, + "learning_rate": 3.1220828471411905e-06, + "loss": 0.4757, + "step": 2800 + }, + { + "epoch": 1.19, + "learning_rate": 3.1191656942823805e-06, + "loss": 0.5157, + "step": 2804 + }, + { + "epoch": 1.19, + "learning_rate": 3.1162485414235712e-06, + "loss": 0.5417, + "step": 2808 + }, + { + "epoch": 1.2, + "learning_rate": 3.1133313885647608e-06, + "loss": 0.3401, + "step": 2812 + }, + { + "epoch": 1.2, + "learning_rate": 3.1104142357059515e-06, + "loss": 0.6862, + "step": 2816 + }, + { + "epoch": 1.2, + "learning_rate": 3.107497082847141e-06, + "loss": 0.516, + "step": 2820 + }, + { + "epoch": 1.2, + "learning_rate": 3.104579929988332e-06, + "loss": 0.5201, + "step": 2824 + }, + { + "epoch": 1.2, + "learning_rate": 3.1016627771295218e-06, + "loss": 0.4436, + "step": 2828 + }, + { + "epoch": 1.2, + "learning_rate": 3.098745624270712e-06, + "loss": 0.5983, + "step": 2832 + }, + { + "epoch": 1.21, + "learning_rate": 3.0958284714119025e-06, + "loss": 0.5076, + "step": 2836 + }, + { + "epoch": 1.21, + "learning_rate": 3.0929113185530924e-06, + "loss": 0.4357, + "step": 2840 + }, + { + "epoch": 1.21, + "learning_rate": 3.089994165694283e-06, + "loss": 0.5015, + "step": 2844 + }, + { + "epoch": 1.21, + "learning_rate": 3.0870770128354727e-06, + "loss": 0.5186, + "step": 2848 + }, + { + "epoch": 1.21, + "learning_rate": 3.084159859976663e-06, + "loss": 0.5653, + "step": 2852 + }, + { + "epoch": 1.21, + "learning_rate": 3.081242707117853e-06, + "loss": 0.6339, + "step": 2856 + }, + { + "epoch": 1.22, + "learning_rate": 3.0783255542590434e-06, + "loss": 0.5534, + "step": 2860 + }, + { + "epoch": 1.22, + "learning_rate": 3.0754084014002338e-06, + "loss": 0.5144, + "step": 2864 + }, + { + "epoch": 1.22, + "learning_rate": 3.0724912485414237e-06, + "loss": 0.393, + "step": 2868 + }, + { + "epoch": 1.22, + "learning_rate": 3.069574095682614e-06, + "loss": 0.4453, + "step": 2872 + }, + { + "epoch": 1.22, + "learning_rate": 3.066656942823804e-06, + "loss": 0.5278, + "step": 2876 + }, + { + "epoch": 1.22, + "learning_rate": 3.0637397899649944e-06, + "loss": 0.7833, + "step": 2880 + }, + { + "epoch": 1.23, + "learning_rate": 3.0608226371061843e-06, + "loss": 0.4581, + "step": 2884 + }, + { + "epoch": 1.23, + "learning_rate": 3.0579054842473747e-06, + "loss": 0.3942, + "step": 2888 + }, + { + "epoch": 1.23, + "learning_rate": 3.0549883313885646e-06, + "loss": 0.6298, + "step": 2892 + }, + { + "epoch": 1.23, + "learning_rate": 3.0520711785297554e-06, + "loss": 0.6276, + "step": 2896 + }, + { + "epoch": 1.23, + "learning_rate": 3.0491540256709458e-06, + "loss": 0.737, + "step": 2900 + }, + { + "epoch": 1.23, + "learning_rate": 3.0462368728121357e-06, + "loss": 0.5208, + "step": 2904 + }, + { + "epoch": 1.24, + "learning_rate": 3.043319719953326e-06, + "loss": 0.5953, + "step": 2908 + }, + { + "epoch": 1.24, + "learning_rate": 3.040402567094516e-06, + "loss": 0.5373, + "step": 2912 + }, + { + "epoch": 1.24, + "learning_rate": 3.0374854142357064e-06, + "loss": 0.3865, + "step": 2916 + }, + { + "epoch": 1.24, + "learning_rate": 3.0345682613768963e-06, + "loss": 0.5451, + "step": 2920 + }, + { + "epoch": 1.24, + "learning_rate": 3.0316511085180867e-06, + "loss": 0.4434, + "step": 2924 + }, + { + "epoch": 1.24, + "learning_rate": 3.0287339556592766e-06, + "loss": 0.6256, + "step": 2928 + }, + { + "epoch": 1.25, + "learning_rate": 3.025816802800467e-06, + "loss": 0.4372, + "step": 2932 + }, + { + "epoch": 1.25, + "learning_rate": 3.0228996499416573e-06, + "loss": 0.4225, + "step": 2936 + }, + { + "epoch": 1.25, + "learning_rate": 3.0199824970828473e-06, + "loss": 0.6176, + "step": 2940 + }, + { + "epoch": 1.25, + "learning_rate": 3.0170653442240376e-06, + "loss": 0.5814, + "step": 2944 + }, + { + "epoch": 1.25, + "learning_rate": 3.0141481913652276e-06, + "loss": 0.6453, + "step": 2948 + }, + { + "epoch": 1.26, + "learning_rate": 3.011231038506418e-06, + "loss": 0.599, + "step": 2952 + }, + { + "epoch": 1.26, + "learning_rate": 3.008313885647608e-06, + "loss": 0.4661, + "step": 2956 + }, + { + "epoch": 1.26, + "learning_rate": 3.0053967327887982e-06, + "loss": 0.3218, + "step": 2960 + }, + { + "epoch": 1.26, + "learning_rate": 3.003208868144691e-06, + "loss": 0.6456, + "step": 2964 + }, + { + "epoch": 1.26, + "learning_rate": 3.0002917152858813e-06, + "loss": 0.2891, + "step": 2968 + }, + { + "epoch": 1.26, + "learning_rate": 2.9973745624270716e-06, + "loss": 0.4821, + "step": 2972 + }, + { + "epoch": 1.27, + "learning_rate": 2.9944574095682616e-06, + "loss": 0.733, + "step": 2976 + }, + { + "epoch": 1.27, + "learning_rate": 2.991540256709452e-06, + "loss": 0.2193, + "step": 2980 + }, + { + "epoch": 1.27, + "learning_rate": 2.988623103850642e-06, + "loss": 0.4148, + "step": 2984 + }, + { + "epoch": 1.27, + "learning_rate": 2.9857059509918322e-06, + "loss": 0.5474, + "step": 2988 + }, + { + "epoch": 1.27, + "learning_rate": 2.982788798133022e-06, + "loss": 0.4005, + "step": 2992 + }, + { + "epoch": 1.27, + "learning_rate": 2.9798716452742125e-06, + "loss": 0.5799, + "step": 2996 + }, + { + "epoch": 1.28, + "learning_rate": 2.9769544924154025e-06, + "loss": 0.4967, + "step": 3000 + }, + { + "epoch": 1.28, + "learning_rate": 2.974037339556593e-06, + "loss": 0.4672, + "step": 3004 + }, + { + "epoch": 1.28, + "learning_rate": 2.9711201866977836e-06, + "loss": 0.4892, + "step": 3008 + }, + { + "epoch": 1.28, + "learning_rate": 2.9682030338389735e-06, + "loss": 0.554, + "step": 3012 + }, + { + "epoch": 1.28, + "learning_rate": 2.965285880980164e-06, + "loss": 0.3553, + "step": 3016 + }, + { + "epoch": 1.28, + "learning_rate": 2.962368728121354e-06, + "loss": 0.5259, + "step": 3020 + }, + { + "epoch": 1.29, + "learning_rate": 2.959451575262544e-06, + "loss": 0.4461, + "step": 3024 + }, + { + "epoch": 1.29, + "learning_rate": 2.956534422403734e-06, + "loss": 0.4768, + "step": 3028 + }, + { + "epoch": 1.29, + "learning_rate": 2.9536172695449245e-06, + "loss": 0.4537, + "step": 3032 + }, + { + "epoch": 1.29, + "learning_rate": 2.9507001166861144e-06, + "loss": 0.5611, + "step": 3036 + }, + { + "epoch": 1.29, + "learning_rate": 2.947782963827305e-06, + "loss": 0.5684, + "step": 3040 + }, + { + "epoch": 1.29, + "learning_rate": 2.944865810968495e-06, + "loss": 0.2357, + "step": 3044 + }, + { + "epoch": 1.3, + "learning_rate": 2.941948658109685e-06, + "loss": 0.4909, + "step": 3048 + }, + { + "epoch": 1.3, + "learning_rate": 2.9390315052508755e-06, + "loss": 0.5225, + "step": 3052 + }, + { + "epoch": 1.3, + "learning_rate": 2.9361143523920654e-06, + "loss": 0.391, + "step": 3056 + }, + { + "epoch": 1.3, + "learning_rate": 2.9331971995332558e-06, + "loss": 0.4058, + "step": 3060 + }, + { + "epoch": 1.3, + "learning_rate": 2.9302800466744457e-06, + "loss": 0.446, + "step": 3064 + }, + { + "epoch": 1.3, + "learning_rate": 2.927362893815636e-06, + "loss": 0.3136, + "step": 3068 + }, + { + "epoch": 1.31, + "learning_rate": 2.924445740956827e-06, + "loss": 0.4259, + "step": 3072 + }, + { + "epoch": 1.31, + "learning_rate": 2.9215285880980164e-06, + "loss": 0.4293, + "step": 3076 + }, + { + "epoch": 1.31, + "learning_rate": 2.918611435239207e-06, + "loss": 0.6366, + "step": 3080 + }, + { + "epoch": 1.31, + "learning_rate": 2.9156942823803967e-06, + "loss": 0.5103, + "step": 3084 + }, + { + "epoch": 1.31, + "learning_rate": 2.9127771295215875e-06, + "loss": 0.4994, + "step": 3088 + }, + { + "epoch": 1.31, + "learning_rate": 2.9098599766627774e-06, + "loss": 0.5369, + "step": 3092 + }, + { + "epoch": 1.32, + "learning_rate": 2.9069428238039678e-06, + "loss": 0.4632, + "step": 3096 + }, + { + "epoch": 1.32, + "learning_rate": 2.9040256709451577e-06, + "loss": 0.3315, + "step": 3100 + }, + { + "epoch": 1.32, + "learning_rate": 2.901108518086348e-06, + "loss": 0.5776, + "step": 3104 + }, + { + "epoch": 1.32, + "learning_rate": 2.8981913652275384e-06, + "loss": 0.4545, + "step": 3108 + }, + { + "epoch": 1.32, + "learning_rate": 2.8952742123687284e-06, + "loss": 0.6122, + "step": 3112 + }, + { + "epoch": 1.32, + "learning_rate": 2.8923570595099187e-06, + "loss": 0.2894, + "step": 3116 + }, + { + "epoch": 1.33, + "learning_rate": 2.8894399066511087e-06, + "loss": 0.5543, + "step": 3120 + }, + { + "epoch": 1.33, + "learning_rate": 2.886522753792299e-06, + "loss": 0.524, + "step": 3124 + }, + { + "epoch": 1.33, + "learning_rate": 2.883605600933489e-06, + "loss": 0.5279, + "step": 3128 + }, + { + "epoch": 1.33, + "learning_rate": 2.8806884480746793e-06, + "loss": 0.3323, + "step": 3132 + }, + { + "epoch": 1.33, + "learning_rate": 2.8777712952158693e-06, + "loss": 0.5169, + "step": 3136 + }, + { + "epoch": 1.34, + "learning_rate": 2.8748541423570596e-06, + "loss": 0.4117, + "step": 3140 + }, + { + "epoch": 1.34, + "learning_rate": 2.87193698949825e-06, + "loss": 0.2958, + "step": 3144 + }, + { + "epoch": 1.34, + "learning_rate": 2.86901983663944e-06, + "loss": 0.5434, + "step": 3148 + }, + { + "epoch": 1.34, + "learning_rate": 2.8661026837806307e-06, + "loss": 0.3813, + "step": 3152 + }, + { + "epoch": 1.34, + "learning_rate": 2.8631855309218202e-06, + "loss": 0.4985, + "step": 3156 + }, + { + "epoch": 1.34, + "learning_rate": 2.860268378063011e-06, + "loss": 0.5145, + "step": 3160 + }, + { + "epoch": 1.35, + "learning_rate": 2.8573512252042005e-06, + "loss": 0.4176, + "step": 3164 + }, + { + "epoch": 1.35, + "learning_rate": 2.8544340723453913e-06, + "loss": 0.4034, + "step": 3168 + }, + { + "epoch": 1.35, + "learning_rate": 2.8515169194865817e-06, + "loss": 0.514, + "step": 3172 + }, + { + "epoch": 1.35, + "learning_rate": 2.8485997666277716e-06, + "loss": 0.4951, + "step": 3176 + }, + { + "epoch": 1.35, + "learning_rate": 2.845682613768962e-06, + "loss": 0.3946, + "step": 3180 + }, + { + "epoch": 1.35, + "learning_rate": 2.842765460910152e-06, + "loss": 0.4, + "step": 3184 + }, + { + "epoch": 1.36, + "learning_rate": 2.8398483080513423e-06, + "loss": 0.5394, + "step": 3188 + }, + { + "epoch": 1.36, + "learning_rate": 2.8369311551925322e-06, + "loss": 0.6328, + "step": 3192 + }, + { + "epoch": 1.36, + "learning_rate": 2.8340140023337226e-06, + "loss": 0.4449, + "step": 3196 + }, + { + "epoch": 1.36, + "learning_rate": 2.8310968494749125e-06, + "loss": 0.4787, + "step": 3200 + }, + { + "epoch": 1.36, + "learning_rate": 2.828179696616103e-06, + "loss": 0.3408, + "step": 3204 + }, + { + "epoch": 1.36, + "learning_rate": 2.8252625437572932e-06, + "loss": 0.4688, + "step": 3208 + }, + { + "epoch": 1.37, + "learning_rate": 2.822345390898483e-06, + "loss": 0.557, + "step": 3212 + }, + { + "epoch": 1.37, + "learning_rate": 2.8194282380396735e-06, + "loss": 0.5299, + "step": 3216 + }, + { + "epoch": 1.37, + "learning_rate": 2.8165110851808635e-06, + "loss": 0.468, + "step": 3220 + }, + { + "epoch": 1.37, + "learning_rate": 2.813593932322054e-06, + "loss": 0.3655, + "step": 3224 + }, + { + "epoch": 1.37, + "learning_rate": 2.8106767794632438e-06, + "loss": 0.2575, + "step": 3228 + }, + { + "epoch": 1.37, + "learning_rate": 2.807759626604434e-06, + "loss": 0.5705, + "step": 3232 + }, + { + "epoch": 1.38, + "learning_rate": 2.804842473745624e-06, + "loss": 0.4812, + "step": 3236 + }, + { + "epoch": 1.38, + "learning_rate": 2.801925320886815e-06, + "loss": 0.5761, + "step": 3240 + }, + { + "epoch": 1.38, + "learning_rate": 2.7990081680280052e-06, + "loss": 0.6039, + "step": 3244 + }, + { + "epoch": 1.38, + "learning_rate": 2.796091015169195e-06, + "loss": 0.3454, + "step": 3248 + }, + { + "epoch": 1.38, + "learning_rate": 2.7931738623103855e-06, + "loss": 0.5554, + "step": 3252 + }, + { + "epoch": 1.38, + "learning_rate": 2.7902567094515755e-06, + "loss": 0.4531, + "step": 3256 + }, + { + "epoch": 1.39, + "learning_rate": 2.787339556592766e-06, + "loss": 0.3332, + "step": 3260 + }, + { + "epoch": 1.39, + "learning_rate": 2.7844224037339558e-06, + "loss": 0.3159, + "step": 3264 + }, + { + "epoch": 1.39, + "learning_rate": 2.781505250875146e-06, + "loss": 0.498, + "step": 3268 + }, + { + "epoch": 1.39, + "learning_rate": 2.7785880980163365e-06, + "loss": 0.4386, + "step": 3272 + }, + { + "epoch": 1.39, + "learning_rate": 2.7756709451575264e-06, + "loss": 0.4108, + "step": 3276 + }, + { + "epoch": 1.39, + "learning_rate": 2.772753792298717e-06, + "loss": 0.3739, + "step": 3280 + }, + { + "epoch": 1.4, + "learning_rate": 2.7698366394399067e-06, + "loss": 0.3242, + "step": 3284 + }, + { + "epoch": 1.4, + "learning_rate": 2.766919486581097e-06, + "loss": 0.5226, + "step": 3288 + }, + { + "epoch": 1.4, + "learning_rate": 2.764002333722287e-06, + "loss": 0.655, + "step": 3292 + }, + { + "epoch": 1.4, + "learning_rate": 2.7610851808634774e-06, + "loss": 0.3379, + "step": 3296 + }, + { + "epoch": 1.4, + "learning_rate": 2.7581680280046673e-06, + "loss": 0.5461, + "step": 3300 + }, + { + "epoch": 1.4, + "learning_rate": 2.7552508751458577e-06, + "loss": 0.4614, + "step": 3304 + }, + { + "epoch": 1.41, + "learning_rate": 2.7523337222870485e-06, + "loss": 0.6258, + "step": 3308 + }, + { + "epoch": 1.41, + "learning_rate": 2.749416569428238e-06, + "loss": 0.3237, + "step": 3312 + }, + { + "epoch": 1.41, + "learning_rate": 2.7464994165694288e-06, + "loss": 0.4256, + "step": 3316 + }, + { + "epoch": 1.41, + "learning_rate": 2.7435822637106187e-06, + "loss": 0.4114, + "step": 3320 + }, + { + "epoch": 1.41, + "learning_rate": 2.740665110851809e-06, + "loss": 0.7195, + "step": 3324 + }, + { + "epoch": 1.41, + "learning_rate": 2.737747957992999e-06, + "loss": 0.5899, + "step": 3328 + }, + { + "epoch": 1.42, + "learning_rate": 2.7348308051341894e-06, + "loss": 0.4853, + "step": 3332 + }, + { + "epoch": 1.42, + "learning_rate": 2.7319136522753793e-06, + "loss": 0.4043, + "step": 3336 + }, + { + "epoch": 1.42, + "learning_rate": 2.7289964994165697e-06, + "loss": 0.5738, + "step": 3340 + }, + { + "epoch": 1.42, + "learning_rate": 2.72607934655776e-06, + "loss": 0.5704, + "step": 3344 + }, + { + "epoch": 1.42, + "learning_rate": 2.72316219369895e-06, + "loss": 0.6312, + "step": 3348 + }, + { + "epoch": 1.43, + "learning_rate": 2.7202450408401404e-06, + "loss": 0.3233, + "step": 3352 + }, + { + "epoch": 1.43, + "learning_rate": 2.7173278879813303e-06, + "loss": 0.5598, + "step": 3356 + }, + { + "epoch": 1.43, + "learning_rate": 2.7144107351225207e-06, + "loss": 0.3501, + "step": 3360 + }, + { + "epoch": 1.43, + "learning_rate": 2.7114935822637106e-06, + "loss": 0.3945, + "step": 3364 + }, + { + "epoch": 1.43, + "learning_rate": 2.708576429404901e-06, + "loss": 0.4801, + "step": 3368 + }, + { + "epoch": 1.43, + "learning_rate": 2.7056592765460913e-06, + "loss": 0.5775, + "step": 3372 + }, + { + "epoch": 1.44, + "learning_rate": 2.7027421236872813e-06, + "loss": 0.3939, + "step": 3376 + }, + { + "epoch": 1.44, + "learning_rate": 2.699824970828472e-06, + "loss": 0.4818, + "step": 3380 + }, + { + "epoch": 1.44, + "learning_rate": 2.6969078179696616e-06, + "loss": 0.4698, + "step": 3384 + }, + { + "epoch": 1.44, + "learning_rate": 2.6939906651108523e-06, + "loss": 0.3852, + "step": 3388 + }, + { + "epoch": 1.44, + "learning_rate": 2.691073512252042e-06, + "loss": 0.4516, + "step": 3392 + }, + { + "epoch": 1.44, + "learning_rate": 2.6881563593932326e-06, + "loss": 0.3766, + "step": 3396 + }, + { + "epoch": 1.45, + "learning_rate": 2.6852392065344226e-06, + "loss": 0.315, + "step": 3400 + }, + { + "epoch": 1.45, + "learning_rate": 2.682322053675613e-06, + "loss": 0.4293, + "step": 3404 + }, + { + "epoch": 1.45, + "learning_rate": 2.6794049008168033e-06, + "loss": 0.5251, + "step": 3408 + }, + { + "epoch": 1.45, + "learning_rate": 2.6764877479579932e-06, + "loss": 0.4517, + "step": 3412 + }, + { + "epoch": 1.45, + "learning_rate": 2.6735705950991836e-06, + "loss": 0.568, + "step": 3416 + }, + { + "epoch": 1.45, + "learning_rate": 2.6706534422403735e-06, + "loss": 0.5349, + "step": 3420 + }, + { + "epoch": 1.46, + "learning_rate": 2.667736289381564e-06, + "loss": 0.4316, + "step": 3424 + }, + { + "epoch": 1.46, + "learning_rate": 2.664819136522754e-06, + "loss": 0.372, + "step": 3428 + }, + { + "epoch": 1.46, + "learning_rate": 2.661901983663944e-06, + "loss": 0.4546, + "step": 3432 + }, + { + "epoch": 1.46, + "learning_rate": 2.658984830805134e-06, + "loss": 0.3791, + "step": 3436 + }, + { + "epoch": 1.46, + "learning_rate": 2.6560676779463245e-06, + "loss": 0.4281, + "step": 3440 + }, + { + "epoch": 1.46, + "learning_rate": 2.653150525087515e-06, + "loss": 0.3564, + "step": 3444 + }, + { + "epoch": 1.47, + "learning_rate": 2.650233372228705e-06, + "loss": 0.443, + "step": 3448 + }, + { + "epoch": 1.47, + "learning_rate": 2.647316219369895e-06, + "loss": 0.6713, + "step": 3452 + }, + { + "epoch": 1.47, + "learning_rate": 2.644399066511085e-06, + "loss": 0.2832, + "step": 3456 + }, + { + "epoch": 1.47, + "learning_rate": 2.641481913652276e-06, + "loss": 0.5267, + "step": 3460 + }, + { + "epoch": 1.47, + "learning_rate": 2.6385647607934654e-06, + "loss": 0.688, + "step": 3464 + }, + { + "epoch": 1.47, + "learning_rate": 2.635647607934656e-06, + "loss": 0.6487, + "step": 3468 + }, + { + "epoch": 1.48, + "learning_rate": 2.6327304550758466e-06, + "loss": 0.6653, + "step": 3472 + }, + { + "epoch": 1.48, + "learning_rate": 2.6298133022170365e-06, + "loss": 0.4218, + "step": 3476 + }, + { + "epoch": 1.48, + "learning_rate": 2.626896149358227e-06, + "loss": 0.5679, + "step": 3480 + }, + { + "epoch": 1.48, + "learning_rate": 2.623978996499417e-06, + "loss": 0.3429, + "step": 3484 + }, + { + "epoch": 1.48, + "learning_rate": 2.621061843640607e-06, + "loss": 0.4428, + "step": 3488 + }, + { + "epoch": 1.48, + "learning_rate": 2.618144690781797e-06, + "loss": 0.4073, + "step": 3492 + }, + { + "epoch": 1.49, + "learning_rate": 2.6152275379229875e-06, + "loss": 0.476, + "step": 3496 + }, + { + "epoch": 1.49, + "learning_rate": 2.6123103850641774e-06, + "loss": 0.3964, + "step": 3500 + }, + { + "epoch": 1.49, + "learning_rate": 2.6093932322053678e-06, + "loss": 0.4971, + "step": 3504 + }, + { + "epoch": 1.49, + "learning_rate": 2.606476079346558e-06, + "loss": 0.5309, + "step": 3508 + }, + { + "epoch": 1.49, + "learning_rate": 2.603558926487748e-06, + "loss": 0.7094, + "step": 3512 + }, + { + "epoch": 1.49, + "learning_rate": 2.6006417736289384e-06, + "loss": 0.4345, + "step": 3516 + }, + { + "epoch": 1.5, + "learning_rate": 2.5977246207701284e-06, + "loss": 0.5559, + "step": 3520 + }, + { + "epoch": 1.5, + "learning_rate": 2.5948074679113187e-06, + "loss": 0.519, + "step": 3524 + }, + { + "epoch": 1.5, + "learning_rate": 2.5918903150525087e-06, + "loss": 0.4054, + "step": 3528 + }, + { + "epoch": 1.5, + "learning_rate": 2.588973162193699e-06, + "loss": 0.4334, + "step": 3532 + }, + { + "epoch": 1.5, + "learning_rate": 2.586056009334889e-06, + "loss": 0.3535, + "step": 3536 + }, + { + "epoch": 1.51, + "learning_rate": 2.5831388564760793e-06, + "loss": 0.6168, + "step": 3540 + }, + { + "epoch": 1.51, + "learning_rate": 2.58022170361727e-06, + "loss": 0.3484, + "step": 3544 + }, + { + "epoch": 1.51, + "learning_rate": 2.57730455075846e-06, + "loss": 0.3373, + "step": 3548 + }, + { + "epoch": 1.51, + "learning_rate": 2.5743873978996504e-06, + "loss": 0.5002, + "step": 3552 + }, + { + "epoch": 1.51, + "learning_rate": 2.5714702450408404e-06, + "loss": 0.4713, + "step": 3556 + }, + { + "epoch": 1.51, + "learning_rate": 2.5685530921820307e-06, + "loss": 0.4236, + "step": 3560 + }, + { + "epoch": 1.52, + "learning_rate": 2.5656359393232207e-06, + "loss": 0.49, + "step": 3564 + }, + { + "epoch": 1.52, + "learning_rate": 2.562718786464411e-06, + "loss": 0.3107, + "step": 3568 + }, + { + "epoch": 1.52, + "learning_rate": 2.5598016336056014e-06, + "loss": 0.6111, + "step": 3572 + }, + { + "epoch": 1.52, + "learning_rate": 2.5568844807467913e-06, + "loss": 0.4386, + "step": 3576 + }, + { + "epoch": 1.52, + "learning_rate": 2.5539673278879817e-06, + "loss": 0.434, + "step": 3580 + }, + { + "epoch": 1.52, + "learning_rate": 2.5510501750291716e-06, + "loss": 0.3085, + "step": 3584 + }, + { + "epoch": 1.53, + "learning_rate": 2.548133022170362e-06, + "loss": 0.6368, + "step": 3588 + }, + { + "epoch": 1.53, + "learning_rate": 2.545215869311552e-06, + "loss": 0.3797, + "step": 3592 + }, + { + "epoch": 1.53, + "learning_rate": 2.5422987164527423e-06, + "loss": 0.5329, + "step": 3596 + }, + { + "epoch": 1.53, + "learning_rate": 2.5393815635939322e-06, + "loss": 0.5015, + "step": 3600 + }, + { + "epoch": 1.53, + "learning_rate": 2.5364644107351226e-06, + "loss": 0.333, + "step": 3604 + }, + { + "epoch": 1.53, + "learning_rate": 2.5335472578763134e-06, + "loss": 0.349, + "step": 3608 + }, + { + "epoch": 1.54, + "learning_rate": 2.530630105017503e-06, + "loss": 0.5365, + "step": 3612 + }, + { + "epoch": 1.54, + "learning_rate": 2.5277129521586937e-06, + "loss": 0.6777, + "step": 3616 + }, + { + "epoch": 1.54, + "learning_rate": 2.524795799299883e-06, + "loss": 0.5432, + "step": 3620 + }, + { + "epoch": 1.54, + "learning_rate": 2.521878646441074e-06, + "loss": 0.2763, + "step": 3624 + }, + { + "epoch": 1.54, + "learning_rate": 2.518961493582264e-06, + "loss": 0.5183, + "step": 3628 + }, + { + "epoch": 1.54, + "learning_rate": 2.5160443407234543e-06, + "loss": 0.4486, + "step": 3632 + }, + { + "epoch": 1.55, + "learning_rate": 2.513127187864644e-06, + "loss": 0.3521, + "step": 3636 + }, + { + "epoch": 1.55, + "learning_rate": 2.5102100350058346e-06, + "loss": 0.5112, + "step": 3640 + }, + { + "epoch": 1.55, + "learning_rate": 2.507292882147025e-06, + "loss": 0.378, + "step": 3644 + }, + { + "epoch": 1.55, + "learning_rate": 2.504375729288215e-06, + "loss": 0.4282, + "step": 3648 + }, + { + "epoch": 1.55, + "learning_rate": 2.5014585764294052e-06, + "loss": 0.5283, + "step": 3652 + }, + { + "epoch": 1.55, + "learning_rate": 2.4985414235705956e-06, + "loss": 0.5269, + "step": 3656 + }, + { + "epoch": 1.56, + "learning_rate": 2.4956242707117855e-06, + "loss": 0.3568, + "step": 3660 + }, + { + "epoch": 1.56, + "learning_rate": 2.492707117852976e-06, + "loss": 0.3911, + "step": 3664 + }, + { + "epoch": 1.56, + "learning_rate": 2.489789964994166e-06, + "loss": 0.554, + "step": 3668 + }, + { + "epoch": 1.56, + "learning_rate": 2.486872812135356e-06, + "loss": 0.6132, + "step": 3672 + }, + { + "epoch": 1.56, + "learning_rate": 2.483955659276546e-06, + "loss": 0.5473, + "step": 3676 + }, + { + "epoch": 1.56, + "learning_rate": 2.4810385064177365e-06, + "loss": 0.472, + "step": 3680 + }, + { + "epoch": 1.57, + "learning_rate": 2.4781213535589264e-06, + "loss": 0.4616, + "step": 3684 + }, + { + "epoch": 1.57, + "learning_rate": 2.4752042007001172e-06, + "loss": 0.5357, + "step": 3688 + }, + { + "epoch": 1.57, + "learning_rate": 2.472287047841307e-06, + "loss": 0.5629, + "step": 3692 + }, + { + "epoch": 1.57, + "learning_rate": 2.4693698949824975e-06, + "loss": 0.5284, + "step": 3696 + }, + { + "epoch": 1.57, + "learning_rate": 2.4664527421236875e-06, + "loss": 0.3988, + "step": 3700 + }, + { + "epoch": 1.57, + "learning_rate": 2.463535589264878e-06, + "loss": 0.7007, + "step": 3704 + }, + { + "epoch": 1.58, + "learning_rate": 2.4606184364060678e-06, + "loss": 0.2642, + "step": 3708 + }, + { + "epoch": 1.58, + "learning_rate": 2.457701283547258e-06, + "loss": 0.6179, + "step": 3712 + }, + { + "epoch": 1.58, + "learning_rate": 2.454784130688448e-06, + "loss": 0.5595, + "step": 3716 + }, + { + "epoch": 1.58, + "learning_rate": 2.4518669778296384e-06, + "loss": 0.4074, + "step": 3720 + }, + { + "epoch": 1.58, + "learning_rate": 2.448949824970829e-06, + "loss": 0.3635, + "step": 3724 + }, + { + "epoch": 1.59, + "learning_rate": 2.446032672112019e-06, + "loss": 0.3888, + "step": 3728 + }, + { + "epoch": 1.59, + "learning_rate": 2.443115519253209e-06, + "loss": 0.2755, + "step": 3732 + }, + { + "epoch": 1.59, + "learning_rate": 2.4401983663943995e-06, + "loss": 0.4511, + "step": 3736 + }, + { + "epoch": 1.59, + "learning_rate": 2.4372812135355894e-06, + "loss": 0.5516, + "step": 3740 + }, + { + "epoch": 1.59, + "learning_rate": 2.4343640606767798e-06, + "loss": 0.4041, + "step": 3744 + }, + { + "epoch": 1.59, + "learning_rate": 2.4314469078179697e-06, + "loss": 0.1592, + "step": 3748 + }, + { + "epoch": 1.6, + "learning_rate": 2.42852975495916e-06, + "loss": 0.5583, + "step": 3752 + }, + { + "epoch": 1.6, + "learning_rate": 2.4256126021003504e-06, + "loss": 0.445, + "step": 3756 + }, + { + "epoch": 1.6, + "learning_rate": 2.4226954492415404e-06, + "loss": 0.3201, + "step": 3760 + }, + { + "epoch": 1.6, + "learning_rate": 2.4197782963827307e-06, + "loss": 0.521, + "step": 3764 + }, + { + "epoch": 1.6, + "learning_rate": 2.416861143523921e-06, + "loss": 0.4229, + "step": 3768 + }, + { + "epoch": 1.6, + "learning_rate": 2.413943990665111e-06, + "loss": 0.4319, + "step": 3772 + }, + { + "epoch": 1.61, + "learning_rate": 2.4110268378063014e-06, + "loss": 0.4708, + "step": 3776 + }, + { + "epoch": 1.61, + "learning_rate": 2.4081096849474913e-06, + "loss": 0.5419, + "step": 3780 + }, + { + "epoch": 1.61, + "learning_rate": 2.4051925320886817e-06, + "loss": 0.4454, + "step": 3784 + }, + { + "epoch": 1.61, + "learning_rate": 2.402275379229872e-06, + "loss": 0.4804, + "step": 3788 + }, + { + "epoch": 1.61, + "learning_rate": 2.399358226371062e-06, + "loss": 0.5686, + "step": 3792 + }, + { + "epoch": 1.61, + "learning_rate": 2.3964410735122523e-06, + "loss": 0.5354, + "step": 3796 + }, + { + "epoch": 1.62, + "learning_rate": 2.3935239206534423e-06, + "loss": 0.5101, + "step": 3800 + }, + { + "epoch": 1.62, + "learning_rate": 2.3906067677946326e-06, + "loss": 0.3946, + "step": 3804 + }, + { + "epoch": 1.62, + "learning_rate": 2.387689614935823e-06, + "loss": 0.4951, + "step": 3808 + }, + { + "epoch": 1.62, + "learning_rate": 2.384772462077013e-06, + "loss": 0.4952, + "step": 3812 + }, + { + "epoch": 1.62, + "learning_rate": 2.3818553092182033e-06, + "loss": 0.5854, + "step": 3816 + }, + { + "epoch": 1.62, + "learning_rate": 2.3789381563593932e-06, + "loss": 0.4074, + "step": 3820 + }, + { + "epoch": 1.63, + "learning_rate": 2.3760210035005836e-06, + "loss": 0.4058, + "step": 3824 + }, + { + "epoch": 1.63, + "learning_rate": 2.373103850641774e-06, + "loss": 0.4952, + "step": 3828 + }, + { + "epoch": 1.63, + "learning_rate": 2.370186697782964e-06, + "loss": 0.5502, + "step": 3832 + }, + { + "epoch": 1.63, + "learning_rate": 2.3672695449241543e-06, + "loss": 0.4379, + "step": 3836 + }, + { + "epoch": 1.63, + "learning_rate": 2.364352392065344e-06, + "loss": 0.4695, + "step": 3840 + }, + { + "epoch": 1.63, + "learning_rate": 2.3614352392065346e-06, + "loss": 0.5127, + "step": 3844 + }, + { + "epoch": 1.64, + "learning_rate": 2.3585180863477245e-06, + "loss": 0.4037, + "step": 3848 + }, + { + "epoch": 1.64, + "learning_rate": 2.355600933488915e-06, + "loss": 0.3387, + "step": 3852 + }, + { + "epoch": 1.64, + "learning_rate": 2.3526837806301052e-06, + "loss": 0.5302, + "step": 3856 + }, + { + "epoch": 1.64, + "learning_rate": 2.3497666277712956e-06, + "loss": 0.4549, + "step": 3860 + }, + { + "epoch": 1.64, + "learning_rate": 2.3468494749124855e-06, + "loss": 0.4871, + "step": 3864 + }, + { + "epoch": 1.64, + "learning_rate": 2.343932322053676e-06, + "loss": 0.4167, + "step": 3868 + }, + { + "epoch": 1.65, + "learning_rate": 2.341015169194866e-06, + "loss": 0.4498, + "step": 3872 + }, + { + "epoch": 1.65, + "learning_rate": 2.338098016336056e-06, + "loss": 0.3591, + "step": 3876 + }, + { + "epoch": 1.65, + "learning_rate": 2.335180863477246e-06, + "loss": 0.3389, + "step": 3880 + }, + { + "epoch": 1.65, + "learning_rate": 2.3322637106184365e-06, + "loss": 0.4688, + "step": 3884 + }, + { + "epoch": 1.65, + "learning_rate": 2.329346557759627e-06, + "loss": 0.4151, + "step": 3888 + }, + { + "epoch": 1.65, + "learning_rate": 2.3264294049008172e-06, + "loss": 0.5132, + "step": 3892 + }, + { + "epoch": 1.66, + "learning_rate": 2.323512252042007e-06, + "loss": 0.3682, + "step": 3896 + }, + { + "epoch": 1.66, + "learning_rate": 2.3205950991831975e-06, + "loss": 0.3668, + "step": 3900 + }, + { + "epoch": 1.66, + "learning_rate": 2.3176779463243875e-06, + "loss": 0.2637, + "step": 3904 + }, + { + "epoch": 1.66, + "learning_rate": 2.314760793465578e-06, + "loss": 0.5291, + "step": 3908 + }, + { + "epoch": 1.66, + "learning_rate": 2.3118436406067678e-06, + "loss": 0.5459, + "step": 3912 + }, + { + "epoch": 1.66, + "learning_rate": 2.308926487747958e-06, + "loss": 0.5774, + "step": 3916 + }, + { + "epoch": 1.67, + "learning_rate": 2.306009334889148e-06, + "loss": 0.5955, + "step": 3920 + }, + { + "epoch": 1.67, + "learning_rate": 2.303092182030339e-06, + "loss": 0.2941, + "step": 3924 + }, + { + "epoch": 1.67, + "learning_rate": 2.300175029171529e-06, + "loss": 0.4735, + "step": 3928 + }, + { + "epoch": 1.67, + "learning_rate": 2.297257876312719e-06, + "loss": 0.27, + "step": 3932 + }, + { + "epoch": 1.67, + "learning_rate": 2.294340723453909e-06, + "loss": 0.496, + "step": 3936 + }, + { + "epoch": 1.68, + "learning_rate": 2.2914235705950995e-06, + "loss": 0.279, + "step": 3940 + }, + { + "epoch": 1.68, + "learning_rate": 2.2885064177362894e-06, + "loss": 0.3848, + "step": 3944 + }, + { + "epoch": 1.68, + "learning_rate": 2.2855892648774798e-06, + "loss": 0.4868, + "step": 3948 + }, + { + "epoch": 1.68, + "learning_rate": 2.2826721120186697e-06, + "loss": 0.5328, + "step": 3952 + }, + { + "epoch": 1.68, + "learning_rate": 2.2797549591598605e-06, + "loss": 0.3838, + "step": 3956 + }, + { + "epoch": 1.68, + "learning_rate": 2.2768378063010504e-06, + "loss": 0.4603, + "step": 3960 + }, + { + "epoch": 1.69, + "learning_rate": 2.2739206534422408e-06, + "loss": 0.515, + "step": 3964 + }, + { + "epoch": 1.69, + "learning_rate": 2.2710035005834307e-06, + "loss": 0.4371, + "step": 3968 + }, + { + "epoch": 1.69, + "learning_rate": 2.268086347724621e-06, + "loss": 0.5561, + "step": 3972 + }, + { + "epoch": 1.69, + "learning_rate": 2.265169194865811e-06, + "loss": 0.3882, + "step": 3976 + }, + { + "epoch": 1.69, + "learning_rate": 2.2622520420070014e-06, + "loss": 0.4774, + "step": 3980 + }, + { + "epoch": 1.69, + "learning_rate": 2.2593348891481913e-06, + "loss": 0.6546, + "step": 3984 + }, + { + "epoch": 1.7, + "learning_rate": 2.2564177362893817e-06, + "loss": 0.4335, + "step": 3988 + }, + { + "epoch": 1.7, + "learning_rate": 2.253500583430572e-06, + "loss": 0.3352, + "step": 3992 + }, + { + "epoch": 1.7, + "learning_rate": 2.2505834305717624e-06, + "loss": 0.2946, + "step": 3996 + }, + { + "epoch": 1.7, + "learning_rate": 2.2476662777129523e-06, + "loss": 0.2657, + "step": 4000 + }, + { + "epoch": 1.7, + "learning_rate": 2.2447491248541427e-06, + "loss": 0.4859, + "step": 4004 + }, + { + "epoch": 1.7, + "learning_rate": 2.2418319719953326e-06, + "loss": 0.3047, + "step": 4008 + }, + { + "epoch": 1.71, + "learning_rate": 2.238914819136523e-06, + "loss": 0.5828, + "step": 4012 + }, + { + "epoch": 1.71, + "learning_rate": 2.235997666277713e-06, + "loss": 0.4141, + "step": 4016 + }, + { + "epoch": 1.71, + "learning_rate": 2.2330805134189033e-06, + "loss": 0.4643, + "step": 4020 + }, + { + "epoch": 1.71, + "learning_rate": 2.2301633605600937e-06, + "loss": 0.5517, + "step": 4024 + }, + { + "epoch": 1.71, + "learning_rate": 2.2272462077012836e-06, + "loss": 0.551, + "step": 4028 + }, + { + "epoch": 1.71, + "learning_rate": 2.224329054842474e-06, + "loss": 0.4237, + "step": 4032 + }, + { + "epoch": 1.72, + "learning_rate": 2.2214119019836643e-06, + "loss": 0.4345, + "step": 4036 + }, + { + "epoch": 1.72, + "learning_rate": 2.2184947491248543e-06, + "loss": 0.428, + "step": 4040 + }, + { + "epoch": 1.72, + "learning_rate": 2.2155775962660446e-06, + "loss": 0.3503, + "step": 4044 + }, + { + "epoch": 1.72, + "learning_rate": 2.2126604434072346e-06, + "loss": 0.4281, + "step": 4048 + }, + { + "epoch": 1.72, + "learning_rate": 2.209743290548425e-06, + "loss": 0.5451, + "step": 4052 + }, + { + "epoch": 1.72, + "learning_rate": 2.2068261376896153e-06, + "loss": 0.6199, + "step": 4056 + }, + { + "epoch": 1.73, + "learning_rate": 2.2039089848308052e-06, + "loss": 0.6851, + "step": 4060 + }, + { + "epoch": 1.73, + "learning_rate": 2.2009918319719956e-06, + "loss": 0.3541, + "step": 4064 + }, + { + "epoch": 1.73, + "learning_rate": 2.1980746791131855e-06, + "loss": 0.4333, + "step": 4068 + }, + { + "epoch": 1.73, + "learning_rate": 2.195157526254376e-06, + "loss": 0.413, + "step": 4072 + }, + { + "epoch": 1.73, + "learning_rate": 2.1922403733955663e-06, + "loss": 0.4385, + "step": 4076 + }, + { + "epoch": 1.73, + "learning_rate": 2.189323220536756e-06, + "loss": 0.3163, + "step": 4080 + }, + { + "epoch": 1.74, + "learning_rate": 2.1864060676779466e-06, + "loss": 0.4678, + "step": 4084 + }, + { + "epoch": 1.74, + "learning_rate": 2.183488914819137e-06, + "loss": 0.2889, + "step": 4088 + }, + { + "epoch": 1.74, + "learning_rate": 2.180571761960327e-06, + "loss": 0.3339, + "step": 4092 + }, + { + "epoch": 1.74, + "learning_rate": 2.1776546091015172e-06, + "loss": 0.4381, + "step": 4096 + }, + { + "epoch": 1.74, + "learning_rate": 2.174737456242707e-06, + "loss": 0.4926, + "step": 4100 + }, + { + "epoch": 1.74, + "learning_rate": 2.1718203033838975e-06, + "loss": 0.4147, + "step": 4104 + }, + { + "epoch": 1.75, + "learning_rate": 2.1689031505250875e-06, + "loss": 0.525, + "step": 4108 + }, + { + "epoch": 1.75, + "learning_rate": 2.165985997666278e-06, + "loss": 0.6573, + "step": 4112 + }, + { + "epoch": 1.75, + "learning_rate": 2.163068844807468e-06, + "loss": 0.4188, + "step": 4116 + }, + { + "epoch": 1.75, + "learning_rate": 2.1601516919486586e-06, + "loss": 0.2622, + "step": 4120 + }, + { + "epoch": 1.75, + "learning_rate": 2.1572345390898485e-06, + "loss": 0.4533, + "step": 4124 + }, + { + "epoch": 1.76, + "learning_rate": 2.154317386231039e-06, + "loss": 0.5007, + "step": 4128 + }, + { + "epoch": 1.76, + "learning_rate": 2.151400233372229e-06, + "loss": 0.5307, + "step": 4132 + }, + { + "epoch": 1.76, + "learning_rate": 2.148483080513419e-06, + "loss": 0.4071, + "step": 4136 + }, + { + "epoch": 1.76, + "learning_rate": 2.145565927654609e-06, + "loss": 0.5252, + "step": 4140 + }, + { + "epoch": 1.76, + "learning_rate": 2.1426487747957995e-06, + "loss": 0.3672, + "step": 4144 + }, + { + "epoch": 1.76, + "learning_rate": 2.1397316219369894e-06, + "loss": 0.3608, + "step": 4148 + }, + { + "epoch": 1.77, + "learning_rate": 2.1368144690781798e-06, + "loss": 0.4581, + "step": 4152 + }, + { + "epoch": 1.77, + "learning_rate": 2.13389731621937e-06, + "loss": 0.4953, + "step": 4156 + }, + { + "epoch": 1.77, + "learning_rate": 2.1309801633605605e-06, + "loss": 0.515, + "step": 4160 + }, + { + "epoch": 1.77, + "learning_rate": 2.1280630105017504e-06, + "loss": 0.4272, + "step": 4164 + }, + { + "epoch": 1.77, + "learning_rate": 2.1251458576429408e-06, + "loss": 0.5713, + "step": 4168 + }, + { + "epoch": 1.77, + "learning_rate": 2.1222287047841307e-06, + "loss": 0.3837, + "step": 4172 + }, + { + "epoch": 1.78, + "learning_rate": 2.119311551925321e-06, + "loss": 0.4367, + "step": 4176 + }, + { + "epoch": 1.78, + "learning_rate": 2.116394399066511e-06, + "loss": 0.3561, + "step": 4180 + }, + { + "epoch": 1.78, + "learning_rate": 2.1134772462077014e-06, + "loss": 0.2825, + "step": 4184 + }, + { + "epoch": 1.78, + "learning_rate": 2.1105600933488917e-06, + "loss": 0.2891, + "step": 4188 + }, + { + "epoch": 1.78, + "learning_rate": 2.107642940490082e-06, + "loss": 0.4617, + "step": 4192 + }, + { + "epoch": 1.78, + "learning_rate": 2.104725787631272e-06, + "loss": 0.4423, + "step": 4196 + }, + { + "epoch": 1.79, + "learning_rate": 2.1018086347724624e-06, + "loss": 0.2344, + "step": 4200 + }, + { + "epoch": 1.79, + "learning_rate": 2.0988914819136523e-06, + "loss": 0.5355, + "step": 4204 + }, + { + "epoch": 1.79, + "learning_rate": 2.0959743290548427e-06, + "loss": 0.427, + "step": 4208 + }, + { + "epoch": 1.79, + "learning_rate": 2.0930571761960326e-06, + "loss": 0.3997, + "step": 4212 + }, + { + "epoch": 1.79, + "learning_rate": 2.090140023337223e-06, + "loss": 0.3945, + "step": 4216 + }, + { + "epoch": 1.79, + "learning_rate": 2.0872228704784134e-06, + "loss": 0.3998, + "step": 4220 + }, + { + "epoch": 1.8, + "learning_rate": 2.0843057176196037e-06, + "loss": 0.4695, + "step": 4224 + }, + { + "epoch": 1.8, + "learning_rate": 2.0813885647607937e-06, + "loss": 0.3051, + "step": 4228 + }, + { + "epoch": 1.8, + "learning_rate": 2.078471411901984e-06, + "loss": 0.4195, + "step": 4232 + }, + { + "epoch": 1.8, + "learning_rate": 2.075554259043174e-06, + "loss": 0.4064, + "step": 4236 + }, + { + "epoch": 1.8, + "learning_rate": 2.0726371061843643e-06, + "loss": 0.4709, + "step": 4240 + }, + { + "epoch": 1.8, + "learning_rate": 2.0697199533255543e-06, + "loss": 0.6638, + "step": 4244 + }, + { + "epoch": 1.81, + "learning_rate": 2.0668028004667446e-06, + "loss": 0.4436, + "step": 4248 + }, + { + "epoch": 1.81, + "learning_rate": 2.0638856476079346e-06, + "loss": 0.3434, + "step": 4252 + }, + { + "epoch": 1.81, + "learning_rate": 2.060968494749125e-06, + "loss": 0.2871, + "step": 4256 + }, + { + "epoch": 1.81, + "learning_rate": 2.0580513418903153e-06, + "loss": 0.2696, + "step": 4260 + }, + { + "epoch": 1.81, + "learning_rate": 2.0551341890315057e-06, + "loss": 0.373, + "step": 4264 + }, + { + "epoch": 1.81, + "learning_rate": 2.0522170361726956e-06, + "loss": 0.4656, + "step": 4268 + }, + { + "epoch": 1.82, + "learning_rate": 2.049299883313886e-06, + "loss": 0.3027, + "step": 4272 + }, + { + "epoch": 1.82, + "learning_rate": 2.046382730455076e-06, + "loss": 0.7287, + "step": 4276 + }, + { + "epoch": 1.82, + "learning_rate": 2.0434655775962663e-06, + "loss": 0.3299, + "step": 4280 + }, + { + "epoch": 1.82, + "learning_rate": 2.040548424737456e-06, + "loss": 0.3556, + "step": 4284 + }, + { + "epoch": 1.82, + "learning_rate": 2.0376312718786466e-06, + "loss": 0.3275, + "step": 4288 + }, + { + "epoch": 1.82, + "learning_rate": 2.034714119019837e-06, + "loss": 0.334, + "step": 4292 + }, + { + "epoch": 1.83, + "learning_rate": 2.031796966161027e-06, + "loss": 0.4437, + "step": 4296 + }, + { + "epoch": 1.83, + "learning_rate": 2.0288798133022172e-06, + "loss": 0.4776, + "step": 4300 + }, + { + "epoch": 1.83, + "learning_rate": 2.0259626604434076e-06, + "loss": 0.497, + "step": 4304 + }, + { + "epoch": 1.83, + "learning_rate": 2.0230455075845975e-06, + "loss": 0.6054, + "step": 4308 + }, + { + "epoch": 1.83, + "learning_rate": 2.020128354725788e-06, + "loss": 0.3877, + "step": 4312 + }, + { + "epoch": 1.84, + "learning_rate": 2.017211201866978e-06, + "loss": 0.3442, + "step": 4316 + }, + { + "epoch": 1.84, + "learning_rate": 2.014294049008168e-06, + "loss": 0.5557, + "step": 4320 + }, + { + "epoch": 1.84, + "learning_rate": 2.0113768961493586e-06, + "loss": 0.5137, + "step": 4324 + }, + { + "epoch": 1.84, + "learning_rate": 2.0084597432905485e-06, + "loss": 0.6162, + "step": 4328 + }, + { + "epoch": 1.84, + "learning_rate": 2.005542590431739e-06, + "loss": 0.4181, + "step": 4332 + }, + { + "epoch": 1.84, + "learning_rate": 2.002625437572929e-06, + "loss": 0.3354, + "step": 4336 + }, + { + "epoch": 1.85, + "learning_rate": 1.999708284714119e-06, + "loss": 0.3924, + "step": 4340 + }, + { + "epoch": 1.85, + "learning_rate": 1.9967911318553095e-06, + "loss": 0.3781, + "step": 4344 + }, + { + "epoch": 1.85, + "learning_rate": 1.9938739789964995e-06, + "loss": 0.4388, + "step": 4348 + }, + { + "epoch": 1.85, + "learning_rate": 1.99095682613769e-06, + "loss": 0.4512, + "step": 4352 + }, + { + "epoch": 1.85, + "learning_rate": 1.98803967327888e-06, + "loss": 0.4171, + "step": 4356 + }, + { + "epoch": 1.85, + "learning_rate": 1.98512252042007e-06, + "loss": 0.3768, + "step": 4360 + }, + { + "epoch": 1.86, + "learning_rate": 1.9822053675612605e-06, + "loss": 0.3517, + "step": 4364 + }, + { + "epoch": 1.86, + "learning_rate": 1.9792882147024504e-06, + "loss": 0.412, + "step": 4368 + }, + { + "epoch": 1.86, + "learning_rate": 1.9763710618436408e-06, + "loss": 0.2887, + "step": 4372 + }, + { + "epoch": 1.86, + "learning_rate": 1.9734539089848307e-06, + "loss": 0.3032, + "step": 4376 + }, + { + "epoch": 1.86, + "learning_rate": 1.970536756126021e-06, + "loss": 0.3046, + "step": 4380 + }, + { + "epoch": 1.86, + "learning_rate": 1.9676196032672114e-06, + "loss": 0.3144, + "step": 4384 + }, + { + "epoch": 1.87, + "learning_rate": 1.964702450408402e-06, + "loss": 0.533, + "step": 4388 + }, + { + "epoch": 1.87, + "learning_rate": 1.9617852975495917e-06, + "loss": 0.3993, + "step": 4392 + }, + { + "epoch": 1.87, + "learning_rate": 1.958868144690782e-06, + "loss": 0.3253, + "step": 4396 + }, + { + "epoch": 1.87, + "learning_rate": 1.955950991831972e-06, + "loss": 0.4888, + "step": 4400 + }, + { + "epoch": 1.87, + "learning_rate": 1.9530338389731624e-06, + "loss": 0.5357, + "step": 4404 + }, + { + "epoch": 1.87, + "learning_rate": 1.9501166861143524e-06, + "loss": 0.3057, + "step": 4408 + }, + { + "epoch": 1.88, + "learning_rate": 1.9471995332555427e-06, + "loss": 0.3553, + "step": 4412 + }, + { + "epoch": 1.88, + "learning_rate": 1.9442823803967327e-06, + "loss": 0.3878, + "step": 4416 + }, + { + "epoch": 1.88, + "learning_rate": 1.9413652275379234e-06, + "loss": 0.315, + "step": 4420 + }, + { + "epoch": 1.88, + "learning_rate": 1.9384480746791134e-06, + "loss": 0.3791, + "step": 4424 + }, + { + "epoch": 1.88, + "learning_rate": 1.9355309218203037e-06, + "loss": 0.4092, + "step": 4428 + }, + { + "epoch": 1.88, + "learning_rate": 1.9326137689614937e-06, + "loss": 0.3911, + "step": 4432 + }, + { + "epoch": 1.89, + "learning_rate": 1.929696616102684e-06, + "loss": 0.3135, + "step": 4436 + }, + { + "epoch": 1.89, + "learning_rate": 1.926779463243874e-06, + "loss": 0.3574, + "step": 4440 + }, + { + "epoch": 1.89, + "learning_rate": 1.9238623103850643e-06, + "loss": 0.3147, + "step": 4444 + }, + { + "epoch": 1.89, + "learning_rate": 1.9209451575262543e-06, + "loss": 0.5802, + "step": 4448 + }, + { + "epoch": 1.89, + "learning_rate": 1.9180280046674446e-06, + "loss": 0.5169, + "step": 4452 + }, + { + "epoch": 1.89, + "learning_rate": 1.915110851808635e-06, + "loss": 0.4599, + "step": 4456 + }, + { + "epoch": 1.9, + "learning_rate": 1.9121936989498254e-06, + "loss": 0.3763, + "step": 4460 + }, + { + "epoch": 1.9, + "learning_rate": 1.9092765460910153e-06, + "loss": 0.4633, + "step": 4464 + }, + { + "epoch": 1.9, + "learning_rate": 1.9063593932322055e-06, + "loss": 0.462, + "step": 4468 + }, + { + "epoch": 1.9, + "learning_rate": 1.9034422403733956e-06, + "loss": 0.4908, + "step": 4472 + }, + { + "epoch": 1.9, + "learning_rate": 1.900525087514586e-06, + "loss": 0.3367, + "step": 4476 + }, + { + "epoch": 1.9, + "learning_rate": 1.8976079346557761e-06, + "loss": 0.4497, + "step": 4480 + }, + { + "epoch": 1.91, + "learning_rate": 1.8946907817969663e-06, + "loss": 0.3253, + "step": 4484 + }, + { + "epoch": 1.91, + "learning_rate": 1.8917736289381566e-06, + "loss": 0.5464, + "step": 4488 + }, + { + "epoch": 1.91, + "learning_rate": 1.8888564760793468e-06, + "loss": 0.3004, + "step": 4492 + }, + { + "epoch": 1.91, + "learning_rate": 1.885939323220537e-06, + "loss": 0.3876, + "step": 4496 + }, + { + "epoch": 1.91, + "learning_rate": 1.883022170361727e-06, + "loss": 0.4443, + "step": 4500 + } + ], + "logging_steps": 4, + "max_steps": 7056, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "total_flos": 34569317253120.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}