{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.998299319727891, "eval_steps": 800, "global_step": 4700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0, "loss": 2.0847, "step": 4 }, { "epoch": 0.0, "learning_rate": 0, "loss": 2.5726, "step": 8 }, { "epoch": 0.01, "learning_rate": 0, "loss": 2.2415, "step": 12 }, { "epoch": 0.01, "learning_rate": 0, "loss": 1.8935, "step": 16 }, { "epoch": 0.01, "learning_rate": 0, "loss": 2.1994, "step": 20 }, { "epoch": 0.01, "learning_rate": 0, "loss": 2.1794, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.3082402064781276e-06, "loss": 1.5146, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.9623603097171917e-06, "loss": 0.8902, "step": 32 }, { "epoch": 0.02, "learning_rate": 2.3449960410798955e-06, "loss": 0.8521, "step": 36 }, { "epoch": 0.02, "learning_rate": 2.6164804129562553e-06, "loss": 0.7475, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.8270600516195322e-06, "loss": 0.6407, "step": 44 }, { "epoch": 0.02, "learning_rate": 2.99911614431896e-06, "loss": 0.5996, "step": 48 }, { "epoch": 0.02, "learning_rate": 3.144587497923142e-06, "loss": 0.733, "step": 52 }, { "epoch": 0.02, "learning_rate": 3.2706005161953197e-06, "loss": 0.7661, "step": 56 }, { "epoch": 0.03, "learning_rate": 3.381751875681663e-06, "loss": 0.5892, "step": 60 }, { "epoch": 0.03, "learning_rate": 3.4811801548585962e-06, "loss": 0.8165, "step": 64 }, { "epoch": 0.03, "learning_rate": 3.5711239740096387e-06, "loss": 0.4039, "step": 68 }, { "epoch": 0.03, "learning_rate": 3.6532362475580235e-06, "loss": 0.6821, "step": 72 }, { "epoch": 0.03, "learning_rate": 3.7287722169385123e-06, "loss": 0.8229, "step": 76 }, { "epoch": 0.03, "learning_rate": 3.7987076011622065e-06, "loss": 0.6447, "step": 80 }, { "epoch": 0.04, "learning_rate": 3.8638158862213e-06, "loss": 0.4965, "step": 84 }, { "epoch": 0.04, "learning_rate": 3.924720619434383e-06, "loss": 0.655, "step": 88 }, { "epoch": 0.04, "learning_rate": 3.9819318221826385e-06, "loss": 0.6818, "step": 92 }, { "epoch": 0.04, "learning_rate": 4.035871978920728e-06, "loss": 0.743, "step": 96 }, { "epoch": 0.04, "learning_rate": 4.086894990123906e-06, "loss": 0.8767, "step": 100 }, { "epoch": 0.04, "learning_rate": 4.135300258097659e-06, "loss": 0.6235, "step": 104 }, { "epoch": 0.05, "learning_rate": 4.18134333252491e-06, "loss": 0.6074, "step": 108 }, { "epoch": 0.05, "learning_rate": 4.225244077248703e-06, "loss": 0.6712, "step": 112 }, { "epoch": 0.05, "learning_rate": 4.267193020182443e-06, "loss": 0.7978, "step": 116 }, { "epoch": 0.05, "learning_rate": 4.3073563507970875e-06, "loss": 0.4904, "step": 120 }, { "epoch": 0.05, "learning_rate": 4.345879896760937e-06, "loss": 0.4131, "step": 124 }, { "epoch": 0.05, "learning_rate": 4.3828923201775755e-06, "loss": 0.6295, "step": 128 }, { "epoch": 0.06, "learning_rate": 4.418507710283431e-06, "loss": 0.9921, "step": 132 }, { "epoch": 0.06, "learning_rate": 4.45282770440127e-06, "loss": 0.6667, "step": 136 }, { "epoch": 0.06, "learning_rate": 4.485943236544386e-06, "loss": 0.657, "step": 140 }, { "epoch": 0.06, "learning_rate": 4.517935989460364e-06, "loss": 0.7705, "step": 144 }, { "epoch": 0.06, "learning_rate": 4.54887960849498e-06, "loss": 0.866, "step": 148 }, { "epoch": 0.06, "learning_rate": 4.578840722673447e-06, "loss": 0.8428, "step": 152 }, { "epoch": 0.07, "learning_rate": 4.607879808611406e-06, "loss": 0.7855, "step": 156 }, { "epoch": 0.07, "learning_rate": 4.636051925421702e-06, "loss": 0.7451, "step": 160 }, { "epoch": 0.07, "learning_rate": 4.663407343064547e-06, "loss": 0.5792, "step": 164 }, { "epoch": 0.07, "learning_rate": 4.689992082159791e-06, "loss": 0.9155, "step": 168 }, { "epoch": 0.07, "learning_rate": 4.715848379822425e-06, "loss": 0.7189, "step": 172 }, { "epoch": 0.07, "learning_rate": 4.74101509336297e-06, "loss": 0.5087, "step": 176 }, { "epoch": 0.08, "learning_rate": 4.76552805154028e-06, "loss": 0.9235, "step": 180 }, { "epoch": 0.08, "learning_rate": 4.789420361336724e-06, "loss": 0.7996, "step": 184 }, { "epoch": 0.08, "learning_rate": 4.812722676847563e-06, "loss": 0.5372, "step": 188 }, { "epoch": 0.08, "learning_rate": 4.835463435763974e-06, "loss": 0.8693, "step": 192 }, { "epoch": 0.08, "learning_rate": 4.857669068026358e-06, "loss": 0.6677, "step": 196 }, { "epoch": 0.09, "learning_rate": 4.879364180487766e-06, "loss": 0.5588, "step": 200 }, { "epoch": 0.09, "learning_rate": 4.900571720823068e-06, "loss": 0.6868, "step": 204 }, { "epoch": 0.09, "learning_rate": 4.921313123421507e-06, "loss": 0.6329, "step": 208 }, { "epoch": 0.09, "learning_rate": 4.941608439588058e-06, "loss": 0.5781, "step": 212 }, { "epoch": 0.09, "learning_rate": 4.9614764540361516e-06, "loss": 0.8552, "step": 216 }, { "epoch": 0.09, "learning_rate": 4.980934789368156e-06, "loss": 0.9137, "step": 220 }, { "epoch": 0.1, "learning_rate": 5e-06, "loss": 0.9521, "step": 224 }, { "epoch": 0.1, "learning_rate": 4.997812135355893e-06, "loss": 0.527, "step": 228 }, { "epoch": 0.1, "learning_rate": 4.994894982497083e-06, "loss": 0.6134, "step": 232 }, { "epoch": 0.1, "learning_rate": 4.991977829638274e-06, "loss": 0.9135, "step": 236 }, { "epoch": 0.1, "learning_rate": 4.989060676779464e-06, "loss": 0.6525, "step": 240 }, { "epoch": 0.1, "learning_rate": 4.986143523920654e-06, "loss": 0.9277, "step": 244 }, { "epoch": 0.11, "learning_rate": 4.983226371061844e-06, "loss": 1.0032, "step": 248 }, { "epoch": 0.11, "learning_rate": 4.980309218203034e-06, "loss": 0.7763, "step": 252 }, { "epoch": 0.11, "learning_rate": 4.977392065344224e-06, "loss": 0.7304, "step": 256 }, { "epoch": 0.11, "learning_rate": 4.974474912485414e-06, "loss": 0.7923, "step": 260 }, { "epoch": 0.11, "learning_rate": 4.971557759626604e-06, "loss": 0.9643, "step": 264 }, { "epoch": 0.11, "learning_rate": 4.968640606767795e-06, "loss": 0.6124, "step": 268 }, { "epoch": 0.12, "learning_rate": 4.965723453908986e-06, "loss": 0.5817, "step": 272 }, { "epoch": 0.12, "learning_rate": 4.962806301050176e-06, "loss": 0.7712, "step": 276 }, { "epoch": 0.12, "learning_rate": 4.959889148191366e-06, "loss": 0.4688, "step": 280 }, { "epoch": 0.12, "learning_rate": 4.956971995332556e-06, "loss": 0.7547, "step": 284 }, { "epoch": 0.12, "learning_rate": 4.954054842473746e-06, "loss": 0.7743, "step": 288 }, { "epoch": 0.12, "learning_rate": 4.951137689614936e-06, "loss": 0.71, "step": 292 }, { "epoch": 0.13, "learning_rate": 4.948220536756126e-06, "loss": 0.806, "step": 296 }, { "epoch": 0.13, "learning_rate": 4.945303383897317e-06, "loss": 0.5964, "step": 300 }, { "epoch": 0.13, "learning_rate": 4.942386231038507e-06, "loss": 0.6401, "step": 304 }, { "epoch": 0.13, "learning_rate": 4.939469078179697e-06, "loss": 0.887, "step": 308 }, { "epoch": 0.13, "learning_rate": 4.936551925320887e-06, "loss": 0.4782, "step": 312 }, { "epoch": 0.13, "learning_rate": 4.933634772462078e-06, "loss": 0.6871, "step": 316 }, { "epoch": 0.14, "learning_rate": 4.930717619603268e-06, "loss": 0.7753, "step": 320 }, { "epoch": 0.14, "learning_rate": 4.9278004667444575e-06, "loss": 0.671, "step": 324 }, { "epoch": 0.14, "learning_rate": 4.9248833138856475e-06, "loss": 0.5642, "step": 328 }, { "epoch": 0.14, "learning_rate": 4.921966161026838e-06, "loss": 0.5292, "step": 332 }, { "epoch": 0.14, "learning_rate": 4.919049008168029e-06, "loss": 0.712, "step": 336 }, { "epoch": 0.14, "learning_rate": 4.916131855309218e-06, "loss": 0.7544, "step": 340 }, { "epoch": 0.15, "learning_rate": 4.913214702450409e-06, "loss": 0.7463, "step": 344 }, { "epoch": 0.15, "learning_rate": 4.910297549591599e-06, "loss": 0.5543, "step": 348 }, { "epoch": 0.15, "learning_rate": 4.90738039673279e-06, "loss": 0.6754, "step": 352 }, { "epoch": 0.15, "learning_rate": 4.90446324387398e-06, "loss": 0.6768, "step": 356 }, { "epoch": 0.15, "learning_rate": 4.9015460910151695e-06, "loss": 0.6952, "step": 360 }, { "epoch": 0.15, "learning_rate": 4.8986289381563595e-06, "loss": 0.8318, "step": 364 }, { "epoch": 0.16, "learning_rate": 4.89571178529755e-06, "loss": 0.6242, "step": 368 }, { "epoch": 0.16, "learning_rate": 4.89279463243874e-06, "loss": 0.9217, "step": 372 }, { "epoch": 0.16, "learning_rate": 4.88987747957993e-06, "loss": 0.4959, "step": 376 }, { "epoch": 0.16, "learning_rate": 4.886960326721121e-06, "loss": 0.8203, "step": 380 }, { "epoch": 0.16, "learning_rate": 4.884043173862311e-06, "loss": 0.7889, "step": 384 }, { "epoch": 0.16, "learning_rate": 4.881126021003501e-06, "loss": 0.6178, "step": 388 }, { "epoch": 0.17, "learning_rate": 4.878208868144691e-06, "loss": 1.0419, "step": 392 }, { "epoch": 0.17, "learning_rate": 4.8752917152858815e-06, "loss": 0.9276, "step": 396 }, { "epoch": 0.17, "learning_rate": 4.8723745624270714e-06, "loss": 0.6587, "step": 400 }, { "epoch": 0.17, "learning_rate": 4.869457409568261e-06, "loss": 0.5996, "step": 404 }, { "epoch": 0.17, "learning_rate": 4.866540256709452e-06, "loss": 0.7183, "step": 408 }, { "epoch": 0.18, "learning_rate": 4.863623103850642e-06, "loss": 0.5382, "step": 412 }, { "epoch": 0.18, "learning_rate": 4.860705950991833e-06, "loss": 0.6379, "step": 416 }, { "epoch": 0.18, "learning_rate": 4.857788798133022e-06, "loss": 0.6881, "step": 420 }, { "epoch": 0.18, "learning_rate": 4.854871645274213e-06, "loss": 0.708, "step": 424 }, { "epoch": 0.18, "learning_rate": 4.851954492415403e-06, "loss": 0.5051, "step": 428 }, { "epoch": 0.18, "learning_rate": 4.8490373395565935e-06, "loss": 0.6938, "step": 432 }, { "epoch": 0.19, "learning_rate": 4.8461201866977834e-06, "loss": 0.7395, "step": 436 }, { "epoch": 0.19, "learning_rate": 4.843203033838973e-06, "loss": 0.8443, "step": 440 }, { "epoch": 0.19, "learning_rate": 4.840285880980164e-06, "loss": 0.6688, "step": 444 }, { "epoch": 0.19, "learning_rate": 4.837368728121354e-06, "loss": 0.7241, "step": 448 }, { "epoch": 0.19, "learning_rate": 4.834451575262544e-06, "loss": 0.6182, "step": 452 }, { "epoch": 0.19, "learning_rate": 4.831534422403734e-06, "loss": 0.5827, "step": 456 }, { "epoch": 0.2, "learning_rate": 4.828617269544925e-06, "loss": 0.5035, "step": 460 }, { "epoch": 0.2, "learning_rate": 4.825700116686115e-06, "loss": 0.6138, "step": 464 }, { "epoch": 0.2, "learning_rate": 4.822782963827305e-06, "loss": 0.7013, "step": 468 }, { "epoch": 0.2, "learning_rate": 4.8198658109684954e-06, "loss": 0.5948, "step": 472 }, { "epoch": 0.2, "learning_rate": 4.816948658109685e-06, "loss": 0.6721, "step": 476 }, { "epoch": 0.2, "learning_rate": 4.814031505250875e-06, "loss": 0.5647, "step": 480 }, { "epoch": 0.21, "learning_rate": 4.811114352392065e-06, "loss": 0.7064, "step": 484 }, { "epoch": 0.21, "learning_rate": 4.808197199533256e-06, "loss": 0.6298, "step": 488 }, { "epoch": 0.21, "learning_rate": 4.805280046674446e-06, "loss": 0.526, "step": 492 }, { "epoch": 0.21, "learning_rate": 4.802362893815637e-06, "loss": 0.5712, "step": 496 }, { "epoch": 0.21, "learning_rate": 4.799445740956827e-06, "loss": 0.7529, "step": 500 }, { "epoch": 0.21, "learning_rate": 4.796528588098017e-06, "loss": 1.0192, "step": 504 }, { "epoch": 0.22, "learning_rate": 4.793611435239207e-06, "loss": 0.9215, "step": 508 }, { "epoch": 0.22, "learning_rate": 4.790694282380397e-06, "loss": 0.8837, "step": 512 }, { "epoch": 0.22, "learning_rate": 4.787777129521587e-06, "loss": 0.8698, "step": 516 }, { "epoch": 0.22, "learning_rate": 4.784859976662777e-06, "loss": 0.7368, "step": 520 }, { "epoch": 0.22, "learning_rate": 4.781942823803968e-06, "loss": 0.5266, "step": 524 }, { "epoch": 0.22, "learning_rate": 4.779025670945158e-06, "loss": 0.6157, "step": 528 }, { "epoch": 0.23, "learning_rate": 4.776108518086348e-06, "loss": 0.5561, "step": 532 }, { "epoch": 0.23, "learning_rate": 4.773191365227539e-06, "loss": 0.5638, "step": 536 }, { "epoch": 0.23, "learning_rate": 4.770274212368729e-06, "loss": 0.7433, "step": 540 }, { "epoch": 0.23, "learning_rate": 4.7673570595099186e-06, "loss": 0.4696, "step": 544 }, { "epoch": 0.23, "learning_rate": 4.7644399066511085e-06, "loss": 0.4952, "step": 548 }, { "epoch": 0.23, "learning_rate": 4.761522753792299e-06, "loss": 0.6735, "step": 552 }, { "epoch": 0.24, "learning_rate": 4.758605600933489e-06, "loss": 0.6756, "step": 556 }, { "epoch": 0.24, "learning_rate": 4.755688448074679e-06, "loss": 0.8812, "step": 560 }, { "epoch": 0.24, "learning_rate": 4.752771295215869e-06, "loss": 0.5452, "step": 564 }, { "epoch": 0.24, "learning_rate": 4.74985414235706e-06, "loss": 0.5208, "step": 568 }, { "epoch": 0.24, "learning_rate": 4.746936989498251e-06, "loss": 0.8629, "step": 572 }, { "epoch": 0.24, "learning_rate": 4.744019836639441e-06, "loss": 0.6721, "step": 576 }, { "epoch": 0.25, "learning_rate": 4.7411026837806305e-06, "loss": 0.7716, "step": 580 }, { "epoch": 0.25, "learning_rate": 4.7381855309218205e-06, "loss": 0.3795, "step": 584 }, { "epoch": 0.25, "learning_rate": 4.735268378063011e-06, "loss": 0.6173, "step": 588 }, { "epoch": 0.25, "learning_rate": 4.732351225204201e-06, "loss": 0.5626, "step": 592 }, { "epoch": 0.25, "learning_rate": 4.729434072345391e-06, "loss": 0.7762, "step": 596 }, { "epoch": 0.26, "learning_rate": 4.726516919486582e-06, "loss": 0.658, "step": 600 }, { "epoch": 0.26, "learning_rate": 4.723599766627772e-06, "loss": 0.6109, "step": 604 }, { "epoch": 0.26, "learning_rate": 4.720682613768962e-06, "loss": 0.7475, "step": 608 }, { "epoch": 0.26, "learning_rate": 4.717765460910152e-06, "loss": 0.6066, "step": 612 }, { "epoch": 0.26, "learning_rate": 4.7148483080513425e-06, "loss": 0.4173, "step": 616 }, { "epoch": 0.26, "learning_rate": 4.7119311551925325e-06, "loss": 0.6297, "step": 620 }, { "epoch": 0.27, "learning_rate": 4.709014002333722e-06, "loss": 0.655, "step": 624 }, { "epoch": 0.27, "learning_rate": 4.706096849474912e-06, "loss": 0.8621, "step": 628 }, { "epoch": 0.27, "learning_rate": 4.703179696616103e-06, "loss": 0.5386, "step": 632 }, { "epoch": 0.27, "learning_rate": 4.700262543757294e-06, "loss": 0.7833, "step": 636 }, { "epoch": 0.27, "learning_rate": 4.697345390898483e-06, "loss": 0.5759, "step": 640 }, { "epoch": 0.27, "learning_rate": 4.694428238039674e-06, "loss": 0.7122, "step": 644 }, { "epoch": 0.28, "learning_rate": 4.691511085180864e-06, "loss": 0.6034, "step": 648 }, { "epoch": 0.28, "learning_rate": 4.6885939323220545e-06, "loss": 0.8556, "step": 652 }, { "epoch": 0.28, "learning_rate": 4.6856767794632445e-06, "loss": 0.6421, "step": 656 }, { "epoch": 0.28, "learning_rate": 4.682759626604434e-06, "loss": 0.6008, "step": 660 }, { "epoch": 0.28, "learning_rate": 4.679842473745624e-06, "loss": 0.7663, "step": 664 }, { "epoch": 0.28, "learning_rate": 4.676925320886815e-06, "loss": 0.5928, "step": 668 }, { "epoch": 0.29, "learning_rate": 4.674008168028005e-06, "loss": 0.7544, "step": 672 }, { "epoch": 0.29, "learning_rate": 4.671091015169195e-06, "loss": 0.5778, "step": 676 }, { "epoch": 0.29, "learning_rate": 4.668173862310386e-06, "loss": 0.7285, "step": 680 }, { "epoch": 0.29, "learning_rate": 4.665256709451576e-06, "loss": 0.5125, "step": 684 }, { "epoch": 0.29, "learning_rate": 4.662339556592766e-06, "loss": 0.6717, "step": 688 }, { "epoch": 0.29, "learning_rate": 4.659422403733956e-06, "loss": 0.8691, "step": 692 }, { "epoch": 0.3, "learning_rate": 4.656505250875146e-06, "loss": 0.4915, "step": 696 }, { "epoch": 0.3, "learning_rate": 4.653588098016336e-06, "loss": 0.789, "step": 700 }, { "epoch": 0.3, "learning_rate": 4.650670945157526e-06, "loss": 0.9127, "step": 704 }, { "epoch": 0.3, "learning_rate": 4.647753792298717e-06, "loss": 0.6563, "step": 708 }, { "epoch": 0.3, "learning_rate": 4.644836639439907e-06, "loss": 0.4648, "step": 712 }, { "epoch": 0.3, "learning_rate": 4.641919486581098e-06, "loss": 0.6367, "step": 716 }, { "epoch": 0.31, "learning_rate": 4.639002333722287e-06, "loss": 0.7212, "step": 720 }, { "epoch": 0.31, "learning_rate": 4.636085180863478e-06, "loss": 0.6034, "step": 724 }, { "epoch": 0.31, "learning_rate": 4.633168028004668e-06, "loss": 0.4951, "step": 728 }, { "epoch": 0.31, "learning_rate": 4.630250875145858e-06, "loss": 0.4122, "step": 732 }, { "epoch": 0.31, "learning_rate": 4.627333722287048e-06, "loss": 0.467, "step": 736 }, { "epoch": 0.31, "learning_rate": 4.624416569428238e-06, "loss": 0.7467, "step": 740 }, { "epoch": 0.32, "learning_rate": 4.621499416569429e-06, "loss": 0.6229, "step": 744 }, { "epoch": 0.32, "learning_rate": 4.618582263710619e-06, "loss": 0.7651, "step": 748 }, { "epoch": 0.32, "learning_rate": 4.615665110851809e-06, "loss": 0.8044, "step": 752 }, { "epoch": 0.32, "learning_rate": 4.612747957992999e-06, "loss": 0.4718, "step": 756 }, { "epoch": 0.32, "learning_rate": 4.60983080513419e-06, "loss": 0.5431, "step": 760 }, { "epoch": 0.32, "learning_rate": 4.60691365227538e-06, "loss": 0.5484, "step": 764 }, { "epoch": 0.33, "learning_rate": 4.6039964994165695e-06, "loss": 0.8095, "step": 768 }, { "epoch": 0.33, "learning_rate": 4.60107934655776e-06, "loss": 0.6142, "step": 772 }, { "epoch": 0.33, "learning_rate": 4.59816219369895e-06, "loss": 0.6883, "step": 776 }, { "epoch": 0.33, "learning_rate": 4.59524504084014e-06, "loss": 0.7341, "step": 780 }, { "epoch": 0.33, "learning_rate": 4.59232788798133e-06, "loss": 0.9504, "step": 784 }, { "epoch": 0.34, "learning_rate": 4.589410735122521e-06, "loss": 0.7445, "step": 788 }, { "epoch": 0.34, "learning_rate": 4.587222870478414e-06, "loss": 0.7637, "step": 792 }, { "epoch": 0.34, "learning_rate": 4.5843057176196035e-06, "loss": 1.0596, "step": 796 }, { "epoch": 0.34, "learning_rate": 4.5813885647607935e-06, "loss": 0.5224, "step": 800 }, { "epoch": 0.34, "learning_rate": 4.578471411901984e-06, "loss": 0.6143, "step": 804 }, { "epoch": 0.34, "learning_rate": 4.575554259043174e-06, "loss": 0.425, "step": 808 }, { "epoch": 0.35, "learning_rate": 4.572637106184364e-06, "loss": 0.7089, "step": 812 }, { "epoch": 0.35, "learning_rate": 4.569719953325555e-06, "loss": 0.6148, "step": 816 }, { "epoch": 0.35, "learning_rate": 4.566802800466745e-06, "loss": 0.5647, "step": 820 }, { "epoch": 0.35, "learning_rate": 4.563885647607935e-06, "loss": 0.7274, "step": 824 }, { "epoch": 0.35, "learning_rate": 4.560968494749125e-06, "loss": 0.8243, "step": 828 }, { "epoch": 0.35, "learning_rate": 4.5580513418903155e-06, "loss": 0.5999, "step": 832 }, { "epoch": 0.36, "learning_rate": 4.5551341890315054e-06, "loss": 0.4957, "step": 836 }, { "epoch": 0.36, "learning_rate": 4.552217036172696e-06, "loss": 0.7591, "step": 840 }, { "epoch": 0.36, "learning_rate": 4.549299883313886e-06, "loss": 0.4182, "step": 844 }, { "epoch": 0.36, "learning_rate": 4.546382730455076e-06, "loss": 0.5964, "step": 848 }, { "epoch": 0.36, "learning_rate": 4.543465577596267e-06, "loss": 0.6322, "step": 852 }, { "epoch": 0.36, "learning_rate": 4.540548424737457e-06, "loss": 0.4782, "step": 856 }, { "epoch": 0.37, "learning_rate": 4.537631271878647e-06, "loss": 0.5883, "step": 860 }, { "epoch": 0.37, "learning_rate": 4.534714119019837e-06, "loss": 0.5512, "step": 864 }, { "epoch": 0.37, "learning_rate": 4.5317969661610275e-06, "loss": 0.6806, "step": 868 }, { "epoch": 0.37, "learning_rate": 4.5288798133022174e-06, "loss": 0.6884, "step": 872 }, { "epoch": 0.37, "learning_rate": 4.525962660443407e-06, "loss": 0.5685, "step": 876 }, { "epoch": 0.37, "learning_rate": 4.523045507584598e-06, "loss": 0.5336, "step": 880 }, { "epoch": 0.38, "learning_rate": 4.520128354725788e-06, "loss": 0.508, "step": 884 }, { "epoch": 0.38, "learning_rate": 4.517211201866978e-06, "loss": 0.4367, "step": 888 }, { "epoch": 0.38, "learning_rate": 4.514294049008168e-06, "loss": 0.9553, "step": 892 }, { "epoch": 0.38, "learning_rate": 4.511376896149359e-06, "loss": 0.8762, "step": 896 }, { "epoch": 0.38, "learning_rate": 4.508459743290549e-06, "loss": 0.8939, "step": 900 }, { "epoch": 0.38, "learning_rate": 4.505542590431739e-06, "loss": 0.5025, "step": 904 }, { "epoch": 0.39, "learning_rate": 4.5026254375729286e-06, "loss": 0.8958, "step": 908 }, { "epoch": 0.39, "learning_rate": 4.499708284714119e-06, "loss": 0.7813, "step": 912 }, { "epoch": 0.39, "learning_rate": 4.49679113185531e-06, "loss": 0.6857, "step": 916 }, { "epoch": 0.39, "learning_rate": 4.4938739789965e-06, "loss": 0.7198, "step": 920 }, { "epoch": 0.39, "learning_rate": 4.49095682613769e-06, "loss": 0.5257, "step": 924 }, { "epoch": 0.39, "learning_rate": 4.48803967327888e-06, "loss": 0.542, "step": 928 }, { "epoch": 0.4, "learning_rate": 4.485122520420071e-06, "loss": 0.4848, "step": 932 }, { "epoch": 0.4, "learning_rate": 4.482205367561261e-06, "loss": 0.7363, "step": 936 }, { "epoch": 0.4, "learning_rate": 4.479288214702451e-06, "loss": 0.8313, "step": 940 }, { "epoch": 0.4, "learning_rate": 4.476371061843641e-06, "loss": 0.6864, "step": 944 }, { "epoch": 0.4, "learning_rate": 4.473453908984831e-06, "loss": 0.7911, "step": 948 }, { "epoch": 0.4, "learning_rate": 4.470536756126021e-06, "loss": 0.4418, "step": 952 }, { "epoch": 0.41, "learning_rate": 4.467619603267211e-06, "loss": 0.7467, "step": 956 }, { "epoch": 0.41, "learning_rate": 4.464702450408402e-06, "loss": 0.5449, "step": 960 }, { "epoch": 0.41, "learning_rate": 4.461785297549592e-06, "loss": 0.5699, "step": 964 }, { "epoch": 0.41, "learning_rate": 4.458868144690782e-06, "loss": 0.5095, "step": 968 }, { "epoch": 0.41, "learning_rate": 4.455950991831972e-06, "loss": 0.6546, "step": 972 }, { "epoch": 0.41, "learning_rate": 4.453033838973163e-06, "loss": 0.5868, "step": 976 }, { "epoch": 0.42, "learning_rate": 4.450116686114353e-06, "loss": 0.7554, "step": 980 }, { "epoch": 0.42, "learning_rate": 4.4471995332555425e-06, "loss": 0.7272, "step": 984 }, { "epoch": 0.42, "learning_rate": 4.444282380396733e-06, "loss": 0.5532, "step": 988 }, { "epoch": 0.42, "learning_rate": 4.441365227537923e-06, "loss": 0.5618, "step": 992 }, { "epoch": 0.42, "learning_rate": 4.438448074679114e-06, "loss": 0.5518, "step": 996 }, { "epoch": 0.43, "learning_rate": 4.435530921820304e-06, "loss": 0.7264, "step": 1000 }, { "epoch": 0.43, "learning_rate": 4.432613768961494e-06, "loss": 0.5429, "step": 1004 }, { "epoch": 0.43, "learning_rate": 4.429696616102684e-06, "loss": 0.6046, "step": 1008 }, { "epoch": 0.43, "learning_rate": 4.426779463243875e-06, "loss": 0.9232, "step": 1012 }, { "epoch": 0.43, "learning_rate": 4.4238623103850645e-06, "loss": 0.5118, "step": 1016 }, { "epoch": 0.43, "learning_rate": 4.4209451575262545e-06, "loss": 0.643, "step": 1020 }, { "epoch": 0.44, "learning_rate": 4.418028004667445e-06, "loss": 0.7772, "step": 1024 }, { "epoch": 0.44, "learning_rate": 4.415110851808635e-06, "loss": 0.3894, "step": 1028 }, { "epoch": 0.44, "learning_rate": 4.412193698949825e-06, "loss": 0.557, "step": 1032 }, { "epoch": 0.44, "learning_rate": 4.409276546091015e-06, "loss": 0.6952, "step": 1036 }, { "epoch": 0.44, "learning_rate": 4.406359393232206e-06, "loss": 0.7862, "step": 1040 }, { "epoch": 0.44, "learning_rate": 4.403442240373396e-06, "loss": 0.5889, "step": 1044 }, { "epoch": 0.45, "learning_rate": 4.400525087514586e-06, "loss": 0.8459, "step": 1048 }, { "epoch": 0.45, "learning_rate": 4.3976079346557765e-06, "loss": 0.4839, "step": 1052 }, { "epoch": 0.45, "learning_rate": 4.3946907817969665e-06, "loss": 0.5376, "step": 1056 }, { "epoch": 0.45, "learning_rate": 4.391773628938157e-06, "loss": 0.6901, "step": 1060 }, { "epoch": 0.45, "learning_rate": 4.388856476079346e-06, "loss": 0.6058, "step": 1064 }, { "epoch": 0.45, "learning_rate": 4.385939323220537e-06, "loss": 0.7326, "step": 1068 }, { "epoch": 0.46, "learning_rate": 4.383022170361727e-06, "loss": 0.5892, "step": 1072 }, { "epoch": 0.46, "learning_rate": 4.380105017502918e-06, "loss": 0.5026, "step": 1076 }, { "epoch": 0.46, "learning_rate": 4.377187864644108e-06, "loss": 0.6181, "step": 1080 }, { "epoch": 0.46, "learning_rate": 4.374270711785298e-06, "loss": 0.6994, "step": 1084 }, { "epoch": 0.46, "learning_rate": 4.3713535589264885e-06, "loss": 0.7436, "step": 1088 }, { "epoch": 0.46, "learning_rate": 4.3684364060676785e-06, "loss": 0.8489, "step": 1092 }, { "epoch": 0.47, "learning_rate": 4.365519253208868e-06, "loss": 0.4957, "step": 1096 }, { "epoch": 0.47, "learning_rate": 4.362602100350058e-06, "loss": 0.5761, "step": 1100 }, { "epoch": 0.47, "learning_rate": 4.359684947491249e-06, "loss": 0.5296, "step": 1104 }, { "epoch": 0.47, "learning_rate": 4.356767794632439e-06, "loss": 0.4946, "step": 1108 }, { "epoch": 0.47, "learning_rate": 4.353850641773629e-06, "loss": 0.5818, "step": 1112 }, { "epoch": 0.47, "learning_rate": 4.35093348891482e-06, "loss": 0.7388, "step": 1116 }, { "epoch": 0.48, "learning_rate": 4.34801633605601e-06, "loss": 0.7138, "step": 1120 }, { "epoch": 0.48, "learning_rate": 4.3450991831972e-06, "loss": 0.5748, "step": 1124 }, { "epoch": 0.48, "learning_rate": 4.34218203033839e-06, "loss": 0.6169, "step": 1128 }, { "epoch": 0.48, "learning_rate": 4.33926487747958e-06, "loss": 0.5938, "step": 1132 }, { "epoch": 0.48, "learning_rate": 4.33634772462077e-06, "loss": 0.6766, "step": 1136 }, { "epoch": 0.48, "learning_rate": 4.333430571761961e-06, "loss": 0.6961, "step": 1140 }, { "epoch": 0.49, "learning_rate": 4.330513418903151e-06, "loss": 0.7088, "step": 1144 }, { "epoch": 0.49, "learning_rate": 4.328325554259044e-06, "loss": 0.7907, "step": 1148 }, { "epoch": 0.49, "learning_rate": 4.325408401400234e-06, "loss": 0.5619, "step": 1152 }, { "epoch": 0.49, "learning_rate": 4.322491248541424e-06, "loss": 0.9847, "step": 1156 }, { "epoch": 0.49, "learning_rate": 4.319574095682614e-06, "loss": 0.5704, "step": 1160 }, { "epoch": 0.49, "learning_rate": 4.316656942823804e-06, "loss": 0.729, "step": 1164 }, { "epoch": 0.5, "learning_rate": 4.313739789964994e-06, "loss": 0.492, "step": 1168 }, { "epoch": 0.5, "learning_rate": 4.310822637106184e-06, "loss": 0.7427, "step": 1172 }, { "epoch": 0.5, "learning_rate": 4.307905484247375e-06, "loss": 0.5757, "step": 1176 }, { "epoch": 0.5, "learning_rate": 4.304988331388565e-06, "loss": 0.5469, "step": 1180 }, { "epoch": 0.5, "learning_rate": 4.302071178529756e-06, "loss": 0.605, "step": 1184 }, { "epoch": 0.51, "learning_rate": 4.299154025670946e-06, "loss": 0.4903, "step": 1188 }, { "epoch": 0.51, "learning_rate": 4.296236872812136e-06, "loss": 0.4479, "step": 1192 }, { "epoch": 0.51, "learning_rate": 4.293319719953326e-06, "loss": 0.5118, "step": 1196 }, { "epoch": 0.51, "learning_rate": 4.290402567094516e-06, "loss": 0.5867, "step": 1200 }, { "epoch": 0.51, "learning_rate": 4.287485414235706e-06, "loss": 0.499, "step": 1204 }, { "epoch": 0.51, "learning_rate": 4.284568261376896e-06, "loss": 0.5443, "step": 1208 }, { "epoch": 0.52, "learning_rate": 4.281651108518087e-06, "loss": 0.7138, "step": 1212 }, { "epoch": 0.52, "learning_rate": 4.278733955659277e-06, "loss": 0.7214, "step": 1216 }, { "epoch": 0.52, "learning_rate": 4.275816802800467e-06, "loss": 0.4798, "step": 1220 }, { "epoch": 0.52, "learning_rate": 4.272899649941658e-06, "loss": 0.6752, "step": 1224 }, { "epoch": 0.52, "learning_rate": 4.2699824970828476e-06, "loss": 0.4885, "step": 1228 }, { "epoch": 0.52, "learning_rate": 4.2670653442240375e-06, "loss": 0.3946, "step": 1232 }, { "epoch": 0.53, "learning_rate": 4.2641481913652275e-06, "loss": 0.6857, "step": 1236 }, { "epoch": 0.53, "learning_rate": 4.261231038506418e-06, "loss": 0.5198, "step": 1240 }, { "epoch": 0.53, "learning_rate": 4.258313885647608e-06, "loss": 0.7563, "step": 1244 }, { "epoch": 0.53, "learning_rate": 4.255396732788798e-06, "loss": 0.6442, "step": 1248 }, { "epoch": 0.53, "learning_rate": 4.252479579929989e-06, "loss": 0.539, "step": 1252 }, { "epoch": 0.53, "learning_rate": 4.249562427071179e-06, "loss": 0.53, "step": 1256 }, { "epoch": 0.54, "learning_rate": 4.24664527421237e-06, "loss": 0.5773, "step": 1260 }, { "epoch": 0.54, "learning_rate": 4.2437281213535596e-06, "loss": 0.5297, "step": 1264 }, { "epoch": 0.54, "learning_rate": 4.2408109684947495e-06, "loss": 0.7455, "step": 1268 }, { "epoch": 0.54, "learning_rate": 4.2378938156359394e-06, "loss": 0.3915, "step": 1272 }, { "epoch": 0.54, "learning_rate": 4.23497666277713e-06, "loss": 0.7302, "step": 1276 }, { "epoch": 0.54, "learning_rate": 4.23205950991832e-06, "loss": 0.5329, "step": 1280 }, { "epoch": 0.55, "learning_rate": 4.22914235705951e-06, "loss": 0.7163, "step": 1284 }, { "epoch": 0.55, "learning_rate": 4.226225204200701e-06, "loss": 0.5996, "step": 1288 }, { "epoch": 0.55, "learning_rate": 4.223308051341891e-06, "loss": 0.6922, "step": 1292 }, { "epoch": 0.55, "learning_rate": 4.220390898483081e-06, "loss": 0.7819, "step": 1296 }, { "epoch": 0.55, "learning_rate": 4.217473745624271e-06, "loss": 0.4539, "step": 1300 }, { "epoch": 0.55, "learning_rate": 4.2145565927654615e-06, "loss": 0.6079, "step": 1304 }, { "epoch": 0.56, "learning_rate": 4.2116394399066514e-06, "loss": 0.4377, "step": 1308 }, { "epoch": 0.56, "learning_rate": 4.208722287047841e-06, "loss": 0.4358, "step": 1312 }, { "epoch": 0.56, "learning_rate": 4.205805134189031e-06, "loss": 0.6401, "step": 1316 }, { "epoch": 0.56, "learning_rate": 4.202887981330222e-06, "loss": 0.6677, "step": 1320 }, { "epoch": 0.56, "learning_rate": 4.199970828471413e-06, "loss": 0.572, "step": 1324 }, { "epoch": 0.56, "learning_rate": 4.197053675612602e-06, "loss": 0.5973, "step": 1328 }, { "epoch": 0.57, "learning_rate": 4.194136522753793e-06, "loss": 0.679, "step": 1332 }, { "epoch": 0.57, "learning_rate": 4.191219369894983e-06, "loss": 0.5599, "step": 1336 }, { "epoch": 0.57, "learning_rate": 4.1883022170361735e-06, "loss": 0.5171, "step": 1340 }, { "epoch": 0.57, "learning_rate": 4.185385064177363e-06, "loss": 0.569, "step": 1344 }, { "epoch": 0.57, "learning_rate": 4.182467911318553e-06, "loss": 0.9033, "step": 1348 }, { "epoch": 0.57, "learning_rate": 4.179550758459744e-06, "loss": 0.8082, "step": 1352 }, { "epoch": 0.58, "learning_rate": 4.176633605600934e-06, "loss": 0.6142, "step": 1356 }, { "epoch": 0.58, "learning_rate": 4.173716452742124e-06, "loss": 0.4488, "step": 1360 }, { "epoch": 0.58, "learning_rate": 4.170799299883314e-06, "loss": 0.5348, "step": 1364 }, { "epoch": 0.58, "learning_rate": 4.167882147024505e-06, "loss": 0.3747, "step": 1368 }, { "epoch": 0.58, "learning_rate": 4.164964994165695e-06, "loss": 0.4663, "step": 1372 }, { "epoch": 0.59, "learning_rate": 4.162047841306885e-06, "loss": 0.5914, "step": 1376 }, { "epoch": 0.59, "learning_rate": 4.1591306884480746e-06, "loss": 0.4916, "step": 1380 }, { "epoch": 0.59, "learning_rate": 4.156213535589265e-06, "loss": 0.4981, "step": 1384 }, { "epoch": 0.59, "learning_rate": 4.153296382730455e-06, "loss": 0.6886, "step": 1388 }, { "epoch": 0.59, "learning_rate": 4.150379229871645e-06, "loss": 0.7889, "step": 1392 }, { "epoch": 0.59, "learning_rate": 4.147462077012836e-06, "loss": 0.4762, "step": 1396 }, { "epoch": 0.6, "learning_rate": 4.144544924154026e-06, "loss": 0.6236, "step": 1400 }, { "epoch": 0.6, "learning_rate": 4.141627771295217e-06, "loss": 0.4979, "step": 1404 }, { "epoch": 0.6, "learning_rate": 4.138710618436406e-06, "loss": 0.6086, "step": 1408 }, { "epoch": 0.6, "learning_rate": 4.135793465577597e-06, "loss": 0.4375, "step": 1412 }, { "epoch": 0.6, "learning_rate": 4.1328763127187866e-06, "loss": 0.6866, "step": 1416 }, { "epoch": 0.6, "learning_rate": 4.129959159859977e-06, "loss": 0.6476, "step": 1420 }, { "epoch": 0.61, "learning_rate": 4.127042007001167e-06, "loss": 0.5982, "step": 1424 }, { "epoch": 0.61, "learning_rate": 4.124124854142357e-06, "loss": 0.7084, "step": 1428 }, { "epoch": 0.61, "learning_rate": 4.121207701283548e-06, "loss": 0.6747, "step": 1432 }, { "epoch": 0.61, "learning_rate": 4.118290548424738e-06, "loss": 0.5161, "step": 1436 }, { "epoch": 0.61, "learning_rate": 4.115373395565928e-06, "loss": 0.4477, "step": 1440 }, { "epoch": 0.61, "learning_rate": 4.112456242707118e-06, "loss": 0.4698, "step": 1444 }, { "epoch": 0.62, "learning_rate": 4.109539089848309e-06, "loss": 0.3782, "step": 1448 }, { "epoch": 0.62, "learning_rate": 4.1066219369894985e-06, "loss": 0.5897, "step": 1452 }, { "epoch": 0.62, "learning_rate": 4.1037047841306885e-06, "loss": 0.6186, "step": 1456 }, { "epoch": 0.62, "learning_rate": 4.100787631271879e-06, "loss": 0.6515, "step": 1460 }, { "epoch": 0.62, "learning_rate": 4.097870478413069e-06, "loss": 0.7318, "step": 1464 }, { "epoch": 0.62, "learning_rate": 4.094953325554259e-06, "loss": 0.4301, "step": 1468 }, { "epoch": 0.63, "learning_rate": 4.092036172695449e-06, "loss": 0.587, "step": 1472 }, { "epoch": 0.63, "learning_rate": 4.08911901983664e-06, "loss": 0.4525, "step": 1476 }, { "epoch": 0.63, "learning_rate": 4.08620186697783e-06, "loss": 0.7056, "step": 1480 }, { "epoch": 0.63, "learning_rate": 4.08328471411902e-06, "loss": 0.497, "step": 1484 }, { "epoch": 0.63, "learning_rate": 4.0803675612602105e-06, "loss": 0.6571, "step": 1488 }, { "epoch": 0.63, "learning_rate": 4.0774504084014005e-06, "loss": 0.5894, "step": 1492 }, { "epoch": 0.64, "learning_rate": 4.074533255542591e-06, "loss": 0.5912, "step": 1496 }, { "epoch": 0.64, "learning_rate": 4.071616102683781e-06, "loss": 0.5286, "step": 1500 }, { "epoch": 0.64, "learning_rate": 4.068698949824971e-06, "loss": 0.5107, "step": 1504 }, { "epoch": 0.64, "learning_rate": 4.065781796966161e-06, "loss": 0.4842, "step": 1508 }, { "epoch": 0.64, "learning_rate": 4.062864644107352e-06, "loss": 0.4813, "step": 1512 }, { "epoch": 0.64, "learning_rate": 4.059947491248542e-06, "loss": 0.4813, "step": 1516 }, { "epoch": 0.65, "learning_rate": 4.057030338389732e-06, "loss": 0.575, "step": 1520 }, { "epoch": 0.65, "learning_rate": 4.0541131855309225e-06, "loss": 0.5535, "step": 1524 }, { "epoch": 0.65, "learning_rate": 4.0511960326721125e-06, "loss": 0.5936, "step": 1528 }, { "epoch": 0.65, "learning_rate": 4.048278879813302e-06, "loss": 0.691, "step": 1532 }, { "epoch": 0.65, "learning_rate": 4.045361726954492e-06, "loss": 0.4385, "step": 1536 }, { "epoch": 0.65, "learning_rate": 4.042444574095683e-06, "loss": 0.5595, "step": 1540 }, { "epoch": 0.66, "learning_rate": 4.039527421236873e-06, "loss": 0.5647, "step": 1544 }, { "epoch": 0.66, "learning_rate": 4.036610268378063e-06, "loss": 0.508, "step": 1548 }, { "epoch": 0.66, "learning_rate": 4.033693115519254e-06, "loss": 0.4794, "step": 1552 }, { "epoch": 0.66, "learning_rate": 4.030775962660444e-06, "loss": 0.5662, "step": 1556 }, { "epoch": 0.66, "learning_rate": 4.0278588098016345e-06, "loss": 0.6627, "step": 1560 }, { "epoch": 0.66, "learning_rate": 4.024941656942824e-06, "loss": 0.3683, "step": 1564 }, { "epoch": 0.67, "learning_rate": 4.022024504084014e-06, "loss": 0.6034, "step": 1568 }, { "epoch": 0.67, "learning_rate": 4.019107351225204e-06, "loss": 0.3611, "step": 1572 }, { "epoch": 0.67, "learning_rate": 4.016190198366395e-06, "loss": 0.639, "step": 1576 }, { "epoch": 0.67, "learning_rate": 4.013273045507585e-06, "loss": 0.5472, "step": 1580 }, { "epoch": 0.67, "learning_rate": 4.010355892648775e-06, "loss": 0.4576, "step": 1584 }, { "epoch": 0.68, "learning_rate": 4.007438739789966e-06, "loss": 0.5035, "step": 1588 }, { "epoch": 0.68, "learning_rate": 4.004521586931156e-06, "loss": 0.8007, "step": 1592 }, { "epoch": 0.68, "learning_rate": 4.001604434072346e-06, "loss": 0.6807, "step": 1596 }, { "epoch": 0.68, "learning_rate": 3.998687281213536e-06, "loss": 0.5574, "step": 1600 }, { "epoch": 0.68, "learning_rate": 3.995770128354726e-06, "loss": 0.5141, "step": 1604 }, { "epoch": 0.68, "learning_rate": 3.992852975495916e-06, "loss": 0.4496, "step": 1608 }, { "epoch": 0.69, "learning_rate": 3.989935822637106e-06, "loss": 0.7541, "step": 1612 }, { "epoch": 0.69, "learning_rate": 3.987018669778296e-06, "loss": 0.6673, "step": 1616 }, { "epoch": 0.69, "learning_rate": 3.984101516919487e-06, "loss": 0.6613, "step": 1620 }, { "epoch": 0.69, "learning_rate": 3.981184364060677e-06, "loss": 0.7404, "step": 1624 }, { "epoch": 0.69, "learning_rate": 3.978267211201867e-06, "loss": 0.5234, "step": 1628 }, { "epoch": 0.69, "learning_rate": 3.975350058343058e-06, "loss": 0.608, "step": 1632 }, { "epoch": 0.7, "learning_rate": 3.972432905484248e-06, "loss": 0.6614, "step": 1636 }, { "epoch": 0.7, "learning_rate": 3.969515752625438e-06, "loss": 0.4633, "step": 1640 }, { "epoch": 0.7, "learning_rate": 3.9665985997666275e-06, "loss": 0.5372, "step": 1644 }, { "epoch": 0.7, "learning_rate": 3.963681446907818e-06, "loss": 0.5298, "step": 1648 }, { "epoch": 0.7, "learning_rate": 3.960764294049009e-06, "loss": 0.5283, "step": 1652 }, { "epoch": 0.7, "learning_rate": 3.957847141190199e-06, "loss": 0.3638, "step": 1656 }, { "epoch": 0.71, "learning_rate": 3.954929988331389e-06, "loss": 0.3947, "step": 1660 }, { "epoch": 0.71, "learning_rate": 3.952012835472579e-06, "loss": 0.4943, "step": 1664 }, { "epoch": 0.71, "learning_rate": 3.94909568261377e-06, "loss": 0.5024, "step": 1668 }, { "epoch": 0.71, "learning_rate": 3.9461785297549596e-06, "loss": 0.6821, "step": 1672 }, { "epoch": 0.71, "learning_rate": 3.9432613768961495e-06, "loss": 0.4511, "step": 1676 }, { "epoch": 0.71, "learning_rate": 3.9403442240373394e-06, "loss": 0.4767, "step": 1680 }, { "epoch": 0.72, "learning_rate": 3.93742707117853e-06, "loss": 0.5588, "step": 1684 }, { "epoch": 0.72, "learning_rate": 3.93450991831972e-06, "loss": 0.7316, "step": 1688 }, { "epoch": 0.72, "learning_rate": 3.93159276546091e-06, "loss": 0.3692, "step": 1692 }, { "epoch": 0.72, "learning_rate": 3.928675612602101e-06, "loss": 0.7381, "step": 1696 }, { "epoch": 0.72, "learning_rate": 3.925758459743291e-06, "loss": 0.7333, "step": 1700 }, { "epoch": 0.72, "learning_rate": 3.922841306884481e-06, "loss": 0.5729, "step": 1704 }, { "epoch": 0.73, "learning_rate": 3.919924154025671e-06, "loss": 0.5423, "step": 1708 }, { "epoch": 0.73, "learning_rate": 3.9170070011668615e-06, "loss": 0.35, "step": 1712 }, { "epoch": 0.73, "learning_rate": 3.9140898483080514e-06, "loss": 0.3955, "step": 1716 }, { "epoch": 0.73, "learning_rate": 3.911172695449242e-06, "loss": 0.713, "step": 1720 }, { "epoch": 0.73, "learning_rate": 3.908255542590432e-06, "loss": 0.5724, "step": 1724 }, { "epoch": 0.73, "learning_rate": 3.905338389731622e-06, "loss": 0.5339, "step": 1728 }, { "epoch": 0.74, "learning_rate": 3.902421236872813e-06, "loss": 0.372, "step": 1732 }, { "epoch": 0.74, "learning_rate": 3.899504084014003e-06, "loss": 0.6898, "step": 1736 }, { "epoch": 0.74, "learning_rate": 3.896586931155193e-06, "loss": 0.6381, "step": 1740 }, { "epoch": 0.74, "learning_rate": 3.893669778296383e-06, "loss": 0.4658, "step": 1744 }, { "epoch": 0.74, "learning_rate": 3.8907526254375735e-06, "loss": 0.6277, "step": 1748 }, { "epoch": 0.74, "learning_rate": 3.887835472578763e-06, "loss": 0.476, "step": 1752 }, { "epoch": 0.75, "learning_rate": 3.884918319719953e-06, "loss": 0.5812, "step": 1756 }, { "epoch": 0.75, "learning_rate": 3.882001166861144e-06, "loss": 0.3055, "step": 1760 }, { "epoch": 0.75, "learning_rate": 3.879084014002334e-06, "loss": 0.7002, "step": 1764 }, { "epoch": 0.75, "learning_rate": 3.876166861143524e-06, "loss": 0.4782, "step": 1768 }, { "epoch": 0.75, "learning_rate": 3.873249708284714e-06, "loss": 0.4645, "step": 1772 }, { "epoch": 0.76, "learning_rate": 3.870332555425905e-06, "loss": 0.506, "step": 1776 }, { "epoch": 0.76, "learning_rate": 3.867415402567095e-06, "loss": 0.5115, "step": 1780 }, { "epoch": 0.76, "learning_rate": 3.864498249708285e-06, "loss": 0.5903, "step": 1784 }, { "epoch": 0.76, "learning_rate": 3.861581096849475e-06, "loss": 0.555, "step": 1788 }, { "epoch": 0.76, "learning_rate": 3.858663943990665e-06, "loss": 0.6398, "step": 1792 }, { "epoch": 0.76, "learning_rate": 3.855746791131856e-06, "loss": 0.5431, "step": 1796 }, { "epoch": 0.77, "learning_rate": 3.852829638273046e-06, "loss": 0.7979, "step": 1800 }, { "epoch": 0.77, "learning_rate": 3.849912485414236e-06, "loss": 0.3846, "step": 1804 }, { "epoch": 0.77, "learning_rate": 3.846995332555426e-06, "loss": 0.4568, "step": 1808 }, { "epoch": 0.77, "learning_rate": 3.844078179696617e-06, "loss": 0.7126, "step": 1812 }, { "epoch": 0.77, "learning_rate": 3.841161026837807e-06, "loss": 0.6972, "step": 1816 }, { "epoch": 0.77, "learning_rate": 3.838243873978997e-06, "loss": 0.495, "step": 1820 }, { "epoch": 0.78, "learning_rate": 3.835326721120187e-06, "loss": 0.5843, "step": 1824 }, { "epoch": 0.78, "learning_rate": 3.832409568261377e-06, "loss": 0.8, "step": 1828 }, { "epoch": 0.78, "learning_rate": 3.829492415402567e-06, "loss": 0.6066, "step": 1832 }, { "epoch": 0.78, "learning_rate": 3.826575262543757e-06, "loss": 0.5371, "step": 1836 }, { "epoch": 0.78, "learning_rate": 3.823658109684948e-06, "loss": 0.4662, "step": 1840 }, { "epoch": 0.78, "learning_rate": 3.820740956826138e-06, "loss": 0.4733, "step": 1844 }, { "epoch": 0.79, "learning_rate": 3.817823803967328e-06, "loss": 0.6339, "step": 1848 }, { "epoch": 0.79, "learning_rate": 3.814906651108519e-06, "loss": 0.5074, "step": 1852 }, { "epoch": 0.79, "learning_rate": 3.8119894982497086e-06, "loss": 0.7012, "step": 1856 }, { "epoch": 0.79, "learning_rate": 3.809072345390899e-06, "loss": 0.3957, "step": 1860 }, { "epoch": 0.79, "learning_rate": 3.806155192532089e-06, "loss": 0.4838, "step": 1864 }, { "epoch": 0.79, "learning_rate": 3.8032380396732793e-06, "loss": 0.4403, "step": 1868 }, { "epoch": 0.8, "learning_rate": 3.800320886814469e-06, "loss": 0.4519, "step": 1872 }, { "epoch": 0.8, "learning_rate": 3.7974037339556596e-06, "loss": 0.4998, "step": 1876 }, { "epoch": 0.8, "learning_rate": 3.7944865810968495e-06, "loss": 0.3915, "step": 1880 }, { "epoch": 0.8, "learning_rate": 3.79156942823804e-06, "loss": 0.4722, "step": 1884 }, { "epoch": 0.8, "learning_rate": 3.7886522753792302e-06, "loss": 0.4377, "step": 1888 }, { "epoch": 0.8, "learning_rate": 3.78573512252042e-06, "loss": 0.4053, "step": 1892 }, { "epoch": 0.81, "learning_rate": 3.782817969661611e-06, "loss": 0.6229, "step": 1896 }, { "epoch": 0.81, "learning_rate": 3.7799008168028005e-06, "loss": 0.573, "step": 1900 }, { "epoch": 0.81, "learning_rate": 3.7769836639439913e-06, "loss": 0.6595, "step": 1904 }, { "epoch": 0.81, "learning_rate": 3.7740665110851808e-06, "loss": 0.6739, "step": 1908 }, { "epoch": 0.81, "learning_rate": 3.7711493582263716e-06, "loss": 0.5746, "step": 1912 }, { "epoch": 0.81, "learning_rate": 3.7682322053675615e-06, "loss": 0.5315, "step": 1916 }, { "epoch": 0.82, "learning_rate": 3.765315052508752e-06, "loss": 0.545, "step": 1920 }, { "epoch": 0.82, "learning_rate": 3.7623978996499422e-06, "loss": 0.5491, "step": 1924 }, { "epoch": 0.82, "learning_rate": 3.759480746791132e-06, "loss": 0.4616, "step": 1928 }, { "epoch": 0.82, "learning_rate": 3.7565635939323225e-06, "loss": 0.5943, "step": 1932 }, { "epoch": 0.82, "learning_rate": 3.7536464410735125e-06, "loss": 0.7848, "step": 1936 }, { "epoch": 0.82, "learning_rate": 3.750729288214703e-06, "loss": 0.617, "step": 1940 }, { "epoch": 0.83, "learning_rate": 3.7478121353558928e-06, "loss": 0.469, "step": 1944 }, { "epoch": 0.83, "learning_rate": 3.744894982497083e-06, "loss": 0.3749, "step": 1948 }, { "epoch": 0.83, "learning_rate": 3.7419778296382735e-06, "loss": 0.4504, "step": 1952 }, { "epoch": 0.83, "learning_rate": 3.7390606767794634e-06, "loss": 0.5496, "step": 1956 }, { "epoch": 0.83, "learning_rate": 3.7361435239206538e-06, "loss": 0.5946, "step": 1960 }, { "epoch": 0.84, "learning_rate": 3.7332263710618437e-06, "loss": 0.5993, "step": 1964 }, { "epoch": 0.84, "learning_rate": 3.730309218203034e-06, "loss": 0.5006, "step": 1968 }, { "epoch": 0.84, "learning_rate": 3.727392065344224e-06, "loss": 0.5133, "step": 1972 }, { "epoch": 0.84, "learning_rate": 3.7244749124854144e-06, "loss": 0.5787, "step": 1976 }, { "epoch": 0.84, "learning_rate": 3.7215577596266043e-06, "loss": 0.3984, "step": 1980 }, { "epoch": 0.84, "learning_rate": 3.718640606767795e-06, "loss": 0.4427, "step": 1984 }, { "epoch": 0.85, "learning_rate": 3.7157234539089855e-06, "loss": 0.7057, "step": 1988 }, { "epoch": 0.85, "learning_rate": 3.7128063010501754e-06, "loss": 0.5082, "step": 1992 }, { "epoch": 0.85, "learning_rate": 3.7098891481913658e-06, "loss": 0.4868, "step": 1996 }, { "epoch": 0.85, "learning_rate": 3.7069719953325557e-06, "loss": 0.5882, "step": 2000 }, { "epoch": 0.85, "learning_rate": 3.704054842473746e-06, "loss": 0.5969, "step": 2004 }, { "epoch": 0.85, "learning_rate": 3.701137689614936e-06, "loss": 0.4813, "step": 2008 }, { "epoch": 0.86, "learning_rate": 3.6982205367561264e-06, "loss": 0.4511, "step": 2012 }, { "epoch": 0.86, "learning_rate": 3.6953033838973167e-06, "loss": 0.5281, "step": 2016 }, { "epoch": 0.86, "learning_rate": 3.6923862310385067e-06, "loss": 0.5461, "step": 2020 }, { "epoch": 0.86, "learning_rate": 3.689469078179697e-06, "loss": 0.5653, "step": 2024 }, { "epoch": 0.86, "learning_rate": 3.686551925320887e-06, "loss": 0.5701, "step": 2028 }, { "epoch": 0.86, "learning_rate": 3.6836347724620773e-06, "loss": 0.5516, "step": 2032 }, { "epoch": 0.87, "learning_rate": 3.6807176196032673e-06, "loss": 0.6763, "step": 2036 }, { "epoch": 0.87, "learning_rate": 3.6778004667444576e-06, "loss": 0.5188, "step": 2040 }, { "epoch": 0.87, "learning_rate": 3.6748833138856476e-06, "loss": 0.6355, "step": 2044 }, { "epoch": 0.87, "learning_rate": 3.671966161026838e-06, "loss": 0.3786, "step": 2048 }, { "epoch": 0.87, "learning_rate": 3.6690490081680287e-06, "loss": 0.4538, "step": 2052 }, { "epoch": 0.87, "learning_rate": 3.6661318553092182e-06, "loss": 0.4956, "step": 2056 }, { "epoch": 0.88, "learning_rate": 3.663214702450409e-06, "loss": 0.4748, "step": 2060 }, { "epoch": 0.88, "learning_rate": 3.660297549591599e-06, "loss": 0.5572, "step": 2064 }, { "epoch": 0.88, "learning_rate": 3.6573803967327893e-06, "loss": 0.5476, "step": 2068 }, { "epoch": 0.88, "learning_rate": 3.6544632438739793e-06, "loss": 0.6124, "step": 2072 }, { "epoch": 0.88, "learning_rate": 3.6515460910151696e-06, "loss": 0.6467, "step": 2076 }, { "epoch": 0.88, "learning_rate": 3.6486289381563596e-06, "loss": 0.5917, "step": 2080 }, { "epoch": 0.89, "learning_rate": 3.64571178529755e-06, "loss": 0.4462, "step": 2084 }, { "epoch": 0.89, "learning_rate": 3.6427946324387403e-06, "loss": 0.5173, "step": 2088 }, { "epoch": 0.89, "learning_rate": 3.6398774795799302e-06, "loss": 0.5402, "step": 2092 }, { "epoch": 0.89, "learning_rate": 3.6369603267211206e-06, "loss": 0.3434, "step": 2096 }, { "epoch": 0.89, "learning_rate": 3.6340431738623105e-06, "loss": 0.3308, "step": 2100 }, { "epoch": 0.89, "learning_rate": 3.631126021003501e-06, "loss": 0.4462, "step": 2104 }, { "epoch": 0.9, "learning_rate": 3.628208868144691e-06, "loss": 0.4822, "step": 2108 }, { "epoch": 0.9, "learning_rate": 3.625291715285881e-06, "loss": 0.4612, "step": 2112 }, { "epoch": 0.9, "learning_rate": 3.6223745624270716e-06, "loss": 0.5462, "step": 2116 }, { "epoch": 0.9, "learning_rate": 3.6194574095682615e-06, "loss": 0.6326, "step": 2120 }, { "epoch": 0.9, "learning_rate": 3.6165402567094523e-06, "loss": 0.664, "step": 2124 }, { "epoch": 0.9, "learning_rate": 3.613623103850642e-06, "loss": 0.4564, "step": 2128 }, { "epoch": 0.91, "learning_rate": 3.6107059509918326e-06, "loss": 0.4078, "step": 2132 }, { "epoch": 0.91, "learning_rate": 3.607788798133022e-06, "loss": 0.3981, "step": 2136 }, { "epoch": 0.91, "learning_rate": 3.604871645274213e-06, "loss": 0.5809, "step": 2140 }, { "epoch": 0.91, "learning_rate": 3.601954492415403e-06, "loss": 0.3539, "step": 2144 }, { "epoch": 0.91, "learning_rate": 3.599037339556593e-06, "loss": 0.4753, "step": 2148 }, { "epoch": 0.91, "learning_rate": 3.5961201866977835e-06, "loss": 0.4232, "step": 2152 }, { "epoch": 0.92, "learning_rate": 3.5932030338389735e-06, "loss": 0.5864, "step": 2156 }, { "epoch": 0.92, "learning_rate": 3.590285880980164e-06, "loss": 0.5046, "step": 2160 }, { "epoch": 0.92, "learning_rate": 3.5873687281213538e-06, "loss": 0.711, "step": 2164 }, { "epoch": 0.92, "learning_rate": 3.584451575262544e-06, "loss": 0.3999, "step": 2168 }, { "epoch": 0.92, "learning_rate": 3.581534422403734e-06, "loss": 0.4033, "step": 2172 }, { "epoch": 0.93, "learning_rate": 3.5786172695449245e-06, "loss": 0.3923, "step": 2176 }, { "epoch": 0.93, "learning_rate": 3.5757001166861144e-06, "loss": 0.4749, "step": 2180 }, { "epoch": 0.93, "learning_rate": 3.5727829638273048e-06, "loss": 0.5808, "step": 2184 }, { "epoch": 0.93, "learning_rate": 3.569865810968495e-06, "loss": 0.5079, "step": 2188 }, { "epoch": 0.93, "learning_rate": 3.566948658109685e-06, "loss": 0.6254, "step": 2192 }, { "epoch": 0.93, "learning_rate": 3.5640315052508754e-06, "loss": 0.5132, "step": 2196 }, { "epoch": 0.94, "learning_rate": 3.5611143523920654e-06, "loss": 0.3979, "step": 2200 }, { "epoch": 0.94, "learning_rate": 3.558197199533256e-06, "loss": 0.5037, "step": 2204 }, { "epoch": 0.94, "learning_rate": 3.5552800466744457e-06, "loss": 0.4465, "step": 2208 }, { "epoch": 0.94, "learning_rate": 3.5523628938156364e-06, "loss": 0.5237, "step": 2212 }, { "epoch": 0.94, "learning_rate": 3.549445740956827e-06, "loss": 0.4848, "step": 2216 }, { "epoch": 0.94, "learning_rate": 3.5465285880980167e-06, "loss": 0.6196, "step": 2220 }, { "epoch": 0.95, "learning_rate": 3.543611435239207e-06, "loss": 0.5312, "step": 2224 }, { "epoch": 0.95, "learning_rate": 3.540694282380397e-06, "loss": 0.7711, "step": 2228 }, { "epoch": 0.95, "learning_rate": 3.5377771295215874e-06, "loss": 0.3885, "step": 2232 }, { "epoch": 0.95, "learning_rate": 3.5348599766627773e-06, "loss": 0.4993, "step": 2236 }, { "epoch": 0.95, "learning_rate": 3.5319428238039677e-06, "loss": 0.6159, "step": 2240 }, { "epoch": 0.95, "learning_rate": 3.5290256709451576e-06, "loss": 0.5108, "step": 2244 }, { "epoch": 0.96, "learning_rate": 3.526108518086348e-06, "loss": 0.4773, "step": 2248 }, { "epoch": 0.96, "learning_rate": 3.5231913652275384e-06, "loss": 0.4661, "step": 2252 }, { "epoch": 0.96, "learning_rate": 3.5202742123687283e-06, "loss": 0.4629, "step": 2256 }, { "epoch": 0.96, "learning_rate": 3.5173570595099187e-06, "loss": 0.587, "step": 2260 }, { "epoch": 0.96, "learning_rate": 3.5144399066511086e-06, "loss": 0.4012, "step": 2264 }, { "epoch": 0.96, "learning_rate": 3.511522753792299e-06, "loss": 0.6225, "step": 2268 }, { "epoch": 0.97, "learning_rate": 3.508605600933489e-06, "loss": 0.5934, "step": 2272 }, { "epoch": 0.97, "learning_rate": 3.5056884480746793e-06, "loss": 0.5112, "step": 2276 }, { "epoch": 0.97, "learning_rate": 3.502771295215869e-06, "loss": 0.6217, "step": 2280 }, { "epoch": 0.97, "learning_rate": 3.4998541423570596e-06, "loss": 0.7376, "step": 2284 }, { "epoch": 0.97, "learning_rate": 3.4969369894982504e-06, "loss": 0.4367, "step": 2288 }, { "epoch": 0.97, "learning_rate": 3.4940198366394403e-06, "loss": 0.4153, "step": 2292 }, { "epoch": 0.98, "learning_rate": 3.4911026837806307e-06, "loss": 0.563, "step": 2296 }, { "epoch": 0.98, "learning_rate": 3.4881855309218206e-06, "loss": 0.4106, "step": 2300 }, { "epoch": 0.98, "learning_rate": 3.485268378063011e-06, "loss": 0.6708, "step": 2304 }, { "epoch": 0.98, "learning_rate": 3.482351225204201e-06, "loss": 0.4741, "step": 2308 }, { "epoch": 0.98, "learning_rate": 3.4794340723453913e-06, "loss": 0.6038, "step": 2312 }, { "epoch": 0.98, "learning_rate": 3.4765169194865816e-06, "loss": 0.3982, "step": 2316 }, { "epoch": 0.99, "learning_rate": 3.4735997666277716e-06, "loss": 0.7745, "step": 2320 }, { "epoch": 0.99, "learning_rate": 3.470682613768962e-06, "loss": 0.5016, "step": 2324 }, { "epoch": 0.99, "learning_rate": 3.467765460910152e-06, "loss": 0.3199, "step": 2328 }, { "epoch": 0.99, "learning_rate": 3.4648483080513422e-06, "loss": 0.5832, "step": 2332 }, { "epoch": 0.99, "learning_rate": 3.461931155192532e-06, "loss": 0.4076, "step": 2336 }, { "epoch": 0.99, "learning_rate": 3.4590140023337225e-06, "loss": 0.5615, "step": 2340 }, { "epoch": 1.0, "learning_rate": 3.4560968494749125e-06, "loss": 0.3989, "step": 2344 }, { "epoch": 1.0, "learning_rate": 3.453179696616103e-06, "loss": 0.4901, "step": 2348 }, { "epoch": 1.0, "learning_rate": 3.4502625437572936e-06, "loss": 0.4952, "step": 2352 }, { "epoch": 1.0, "learning_rate": 3.447345390898483e-06, "loss": 0.5235, "step": 2356 }, { "epoch": 1.0, "learning_rate": 3.444428238039674e-06, "loss": 0.3815, "step": 2360 }, { "epoch": 1.01, "learning_rate": 3.4415110851808634e-06, "loss": 0.5771, "step": 2364 }, { "epoch": 1.01, "learning_rate": 3.4385939323220542e-06, "loss": 0.7008, "step": 2368 }, { "epoch": 1.01, "learning_rate": 3.435676779463244e-06, "loss": 0.4909, "step": 2372 }, { "epoch": 1.01, "learning_rate": 3.4327596266044345e-06, "loss": 0.4042, "step": 2376 }, { "epoch": 1.01, "learning_rate": 3.4298424737456245e-06, "loss": 0.5892, "step": 2380 }, { "epoch": 1.01, "learning_rate": 3.426925320886815e-06, "loss": 0.538, "step": 2384 }, { "epoch": 1.02, "learning_rate": 3.424008168028005e-06, "loss": 0.409, "step": 2388 }, { "epoch": 1.02, "learning_rate": 3.421091015169195e-06, "loss": 0.5232, "step": 2392 }, { "epoch": 1.02, "learning_rate": 3.4181738623103855e-06, "loss": 0.6311, "step": 2396 }, { "epoch": 1.02, "learning_rate": 3.4152567094515754e-06, "loss": 0.4116, "step": 2400 }, { "epoch": 1.02, "learning_rate": 3.4123395565927658e-06, "loss": 0.5223, "step": 2404 }, { "epoch": 1.02, "learning_rate": 3.4094224037339557e-06, "loss": 0.6645, "step": 2408 }, { "epoch": 1.03, "learning_rate": 3.406505250875146e-06, "loss": 0.4037, "step": 2412 }, { "epoch": 1.03, "learning_rate": 3.4035880980163364e-06, "loss": 0.4992, "step": 2416 }, { "epoch": 1.03, "learning_rate": 3.4006709451575264e-06, "loss": 0.3654, "step": 2420 }, { "epoch": 1.03, "learning_rate": 3.3977537922987167e-06, "loss": 0.6107, "step": 2424 }, { "epoch": 1.03, "learning_rate": 3.3948366394399067e-06, "loss": 0.5753, "step": 2428 }, { "epoch": 1.03, "learning_rate": 3.3919194865810975e-06, "loss": 0.375, "step": 2432 }, { "epoch": 1.04, "learning_rate": 3.389002333722287e-06, "loss": 0.4033, "step": 2436 }, { "epoch": 1.04, "learning_rate": 3.3860851808634778e-06, "loss": 0.5717, "step": 2440 }, { "epoch": 1.04, "learning_rate": 3.3831680280046673e-06, "loss": 0.3352, "step": 2444 }, { "epoch": 1.04, "learning_rate": 3.380250875145858e-06, "loss": 0.3152, "step": 2448 }, { "epoch": 1.04, "learning_rate": 3.3773337222870484e-06, "loss": 0.6976, "step": 2452 }, { "epoch": 1.04, "learning_rate": 3.3744165694282384e-06, "loss": 0.6974, "step": 2456 }, { "epoch": 1.05, "learning_rate": 3.3714994165694287e-06, "loss": 0.4909, "step": 2460 }, { "epoch": 1.05, "learning_rate": 3.3685822637106187e-06, "loss": 0.4997, "step": 2464 }, { "epoch": 1.05, "learning_rate": 3.365665110851809e-06, "loss": 0.5293, "step": 2468 }, { "epoch": 1.05, "learning_rate": 3.362747957992999e-06, "loss": 0.5174, "step": 2472 }, { "epoch": 1.05, "learning_rate": 3.3598308051341893e-06, "loss": 0.4137, "step": 2476 }, { "epoch": 1.05, "learning_rate": 3.3569136522753793e-06, "loss": 0.3583, "step": 2480 }, { "epoch": 1.06, "learning_rate": 3.3539964994165696e-06, "loss": 0.4317, "step": 2484 }, { "epoch": 1.06, "learning_rate": 3.35107934655776e-06, "loss": 0.5334, "step": 2488 }, { "epoch": 1.06, "learning_rate": 3.34816219369895e-06, "loss": 0.4626, "step": 2492 }, { "epoch": 1.06, "learning_rate": 3.3452450408401403e-06, "loss": 0.3575, "step": 2496 }, { "epoch": 1.06, "learning_rate": 3.3423278879813302e-06, "loss": 0.3181, "step": 2500 }, { "epoch": 1.06, "learning_rate": 3.3394107351225206e-06, "loss": 0.4233, "step": 2504 }, { "epoch": 1.07, "learning_rate": 3.3364935822637105e-06, "loss": 0.6187, "step": 2508 }, { "epoch": 1.07, "learning_rate": 3.3335764294049013e-06, "loss": 0.4076, "step": 2512 }, { "epoch": 1.07, "learning_rate": 3.3306592765460917e-06, "loss": 0.3893, "step": 2516 }, { "epoch": 1.07, "learning_rate": 3.3277421236872816e-06, "loss": 0.4937, "step": 2520 }, { "epoch": 1.07, "learning_rate": 3.324824970828472e-06, "loss": 0.29, "step": 2524 }, { "epoch": 1.07, "learning_rate": 3.321907817969662e-06, "loss": 0.4274, "step": 2528 }, { "epoch": 1.08, "learning_rate": 3.3189906651108523e-06, "loss": 0.3029, "step": 2532 }, { "epoch": 1.08, "learning_rate": 3.3160735122520422e-06, "loss": 0.4773, "step": 2536 }, { "epoch": 1.08, "learning_rate": 3.3131563593932326e-06, "loss": 0.5116, "step": 2540 }, { "epoch": 1.08, "learning_rate": 3.3102392065344225e-06, "loss": 0.514, "step": 2544 }, { "epoch": 1.08, "learning_rate": 3.307322053675613e-06, "loss": 0.5396, "step": 2548 }, { "epoch": 1.09, "learning_rate": 3.3044049008168033e-06, "loss": 0.4585, "step": 2552 }, { "epoch": 1.09, "learning_rate": 3.301487747957993e-06, "loss": 0.6577, "step": 2556 }, { "epoch": 1.09, "learning_rate": 3.2985705950991836e-06, "loss": 0.4556, "step": 2560 }, { "epoch": 1.09, "learning_rate": 3.2956534422403735e-06, "loss": 0.3851, "step": 2564 }, { "epoch": 1.09, "learning_rate": 3.292736289381564e-06, "loss": 0.366, "step": 2568 }, { "epoch": 1.09, "learning_rate": 3.289819136522754e-06, "loss": 0.53, "step": 2572 }, { "epoch": 1.1, "learning_rate": 3.286901983663944e-06, "loss": 0.513, "step": 2576 }, { "epoch": 1.1, "learning_rate": 3.283984830805134e-06, "loss": 0.4743, "step": 2580 }, { "epoch": 1.1, "learning_rate": 3.2810676779463245e-06, "loss": 0.5324, "step": 2584 }, { "epoch": 1.1, "learning_rate": 3.2781505250875152e-06, "loss": 0.5894, "step": 2588 }, { "epoch": 1.1, "learning_rate": 3.2752333722287048e-06, "loss": 0.4134, "step": 2592 }, { "epoch": 1.1, "learning_rate": 3.2723162193698955e-06, "loss": 0.5402, "step": 2596 }, { "epoch": 1.11, "learning_rate": 3.2693990665110855e-06, "loss": 0.4749, "step": 2600 }, { "epoch": 1.11, "learning_rate": 3.266481913652276e-06, "loss": 0.3575, "step": 2604 }, { "epoch": 1.11, "learning_rate": 3.2635647607934658e-06, "loss": 0.3848, "step": 2608 }, { "epoch": 1.11, "learning_rate": 3.260647607934656e-06, "loss": 0.3512, "step": 2612 }, { "epoch": 1.11, "learning_rate": 3.2577304550758465e-06, "loss": 0.4186, "step": 2616 }, { "epoch": 1.11, "learning_rate": 3.2548133022170364e-06, "loss": 0.3952, "step": 2620 }, { "epoch": 1.12, "learning_rate": 3.251896149358227e-06, "loss": 0.3489, "step": 2624 }, { "epoch": 1.12, "learning_rate": 3.2489789964994167e-06, "loss": 0.5544, "step": 2628 }, { "epoch": 1.12, "learning_rate": 3.246061843640607e-06, "loss": 0.4824, "step": 2632 }, { "epoch": 1.12, "learning_rate": 3.243144690781797e-06, "loss": 0.3462, "step": 2636 }, { "epoch": 1.12, "learning_rate": 3.2402275379229874e-06, "loss": 0.4465, "step": 2640 }, { "epoch": 1.12, "learning_rate": 3.2373103850641773e-06, "loss": 0.4774, "step": 2644 }, { "epoch": 1.13, "learning_rate": 3.2343932322053677e-06, "loss": 0.32, "step": 2648 }, { "epoch": 1.13, "learning_rate": 3.231476079346558e-06, "loss": 0.5598, "step": 2652 }, { "epoch": 1.13, "learning_rate": 3.228558926487748e-06, "loss": 0.5406, "step": 2656 }, { "epoch": 1.13, "learning_rate": 3.225641773628939e-06, "loss": 0.3966, "step": 2660 }, { "epoch": 1.13, "learning_rate": 3.2227246207701283e-06, "loss": 0.6023, "step": 2664 }, { "epoch": 1.13, "learning_rate": 3.219807467911319e-06, "loss": 0.4532, "step": 2668 }, { "epoch": 1.14, "learning_rate": 3.2168903150525086e-06, "loss": 0.3336, "step": 2672 }, { "epoch": 1.14, "learning_rate": 3.2139731621936994e-06, "loss": 0.4411, "step": 2676 }, { "epoch": 1.14, "learning_rate": 3.2110560093348893e-06, "loss": 0.5039, "step": 2680 }, { "epoch": 1.14, "learning_rate": 3.2081388564760797e-06, "loss": 0.6932, "step": 2684 }, { "epoch": 1.14, "learning_rate": 3.20522170361727e-06, "loss": 0.5271, "step": 2688 }, { "epoch": 1.14, "learning_rate": 3.20230455075846e-06, "loss": 0.432, "step": 2692 }, { "epoch": 1.15, "learning_rate": 3.1993873978996504e-06, "loss": 0.4973, "step": 2696 }, { "epoch": 1.15, "learning_rate": 3.1964702450408403e-06, "loss": 0.6146, "step": 2700 }, { "epoch": 1.15, "learning_rate": 3.1935530921820307e-06, "loss": 0.3637, "step": 2704 }, { "epoch": 1.15, "learning_rate": 3.1906359393232206e-06, "loss": 0.4085, "step": 2708 }, { "epoch": 1.15, "learning_rate": 3.187718786464411e-06, "loss": 0.3726, "step": 2712 }, { "epoch": 1.15, "learning_rate": 3.1848016336056013e-06, "loss": 0.5446, "step": 2716 }, { "epoch": 1.16, "learning_rate": 3.1818844807467913e-06, "loss": 0.497, "step": 2720 }, { "epoch": 1.16, "learning_rate": 3.1789673278879816e-06, "loss": 0.3298, "step": 2724 }, { "epoch": 1.16, "learning_rate": 3.1760501750291716e-06, "loss": 0.491, "step": 2728 }, { "epoch": 1.16, "learning_rate": 3.173133022170362e-06, "loss": 0.3796, "step": 2732 }, { "epoch": 1.16, "learning_rate": 3.170215869311552e-06, "loss": 0.6953, "step": 2736 }, { "epoch": 1.16, "learning_rate": 3.1672987164527427e-06, "loss": 0.3953, "step": 2740 }, { "epoch": 1.17, "learning_rate": 3.164381563593932e-06, "loss": 0.3992, "step": 2744 }, { "epoch": 1.17, "learning_rate": 3.161464410735123e-06, "loss": 0.4851, "step": 2748 }, { "epoch": 1.17, "learning_rate": 3.1585472578763133e-06, "loss": 0.3364, "step": 2752 }, { "epoch": 1.17, "learning_rate": 3.1556301050175033e-06, "loss": 0.4477, "step": 2756 }, { "epoch": 1.17, "learning_rate": 3.1527129521586936e-06, "loss": 0.555, "step": 2760 }, { "epoch": 1.18, "learning_rate": 3.1497957992998836e-06, "loss": 0.5896, "step": 2764 }, { "epoch": 1.18, "learning_rate": 3.146878646441074e-06, "loss": 0.5281, "step": 2768 }, { "epoch": 1.18, "learning_rate": 3.143961493582264e-06, "loss": 0.4825, "step": 2772 }, { "epoch": 1.18, "learning_rate": 3.1410443407234542e-06, "loss": 0.3199, "step": 2776 }, { "epoch": 1.18, "learning_rate": 3.138127187864644e-06, "loss": 0.3897, "step": 2780 }, { "epoch": 1.18, "learning_rate": 3.1352100350058345e-06, "loss": 0.3698, "step": 2784 }, { "epoch": 1.19, "learning_rate": 3.132292882147025e-06, "loss": 0.4514, "step": 2788 }, { "epoch": 1.19, "learning_rate": 3.129375729288215e-06, "loss": 0.4257, "step": 2792 }, { "epoch": 1.19, "learning_rate": 3.126458576429405e-06, "loss": 0.4245, "step": 2796 }, { "epoch": 1.19, "learning_rate": 3.123541423570595e-06, "loss": 0.4945, "step": 2800 }, { "epoch": 1.19, "learning_rate": 3.1206242707117855e-06, "loss": 0.442, "step": 2804 }, { "epoch": 1.19, "learning_rate": 3.1177071178529754e-06, "loss": 0.6289, "step": 2808 }, { "epoch": 1.2, "learning_rate": 3.1147899649941658e-06, "loss": 0.4142, "step": 2812 }, { "epoch": 1.2, "learning_rate": 3.1118728121353566e-06, "loss": 0.4667, "step": 2816 }, { "epoch": 1.2, "learning_rate": 3.1089556592765465e-06, "loss": 0.4788, "step": 2820 }, { "epoch": 1.2, "learning_rate": 3.106038506417737e-06, "loss": 0.3926, "step": 2824 }, { "epoch": 1.2, "learning_rate": 3.103121353558927e-06, "loss": 0.553, "step": 2828 }, { "epoch": 1.2, "learning_rate": 3.100204200700117e-06, "loss": 0.4567, "step": 2832 }, { "epoch": 1.21, "learning_rate": 3.097287047841307e-06, "loss": 0.3615, "step": 2836 }, { "epoch": 1.21, "learning_rate": 3.0943698949824975e-06, "loss": 0.4364, "step": 2840 }, { "epoch": 1.21, "learning_rate": 3.0914527421236874e-06, "loss": 0.525, "step": 2844 }, { "epoch": 1.21, "learning_rate": 3.0885355892648778e-06, "loss": 0.4324, "step": 2848 }, { "epoch": 1.21, "learning_rate": 3.085618436406068e-06, "loss": 0.37, "step": 2852 }, { "epoch": 1.21, "learning_rate": 3.082701283547258e-06, "loss": 0.3175, "step": 2856 }, { "epoch": 1.22, "learning_rate": 3.0797841306884484e-06, "loss": 0.3043, "step": 2860 }, { "epoch": 1.22, "learning_rate": 3.0768669778296384e-06, "loss": 0.514, "step": 2864 }, { "epoch": 1.22, "learning_rate": 3.0739498249708287e-06, "loss": 0.4671, "step": 2868 }, { "epoch": 1.22, "learning_rate": 3.0710326721120187e-06, "loss": 0.4151, "step": 2872 }, { "epoch": 1.22, "learning_rate": 3.068115519253209e-06, "loss": 0.4077, "step": 2876 }, { "epoch": 1.22, "learning_rate": 3.065198366394399e-06, "loss": 0.5045, "step": 2880 }, { "epoch": 1.23, "learning_rate": 3.0622812135355893e-06, "loss": 0.3641, "step": 2884 }, { "epoch": 1.23, "learning_rate": 3.05936406067678e-06, "loss": 0.5006, "step": 2888 }, { "epoch": 1.23, "learning_rate": 3.0564469078179696e-06, "loss": 0.4298, "step": 2892 }, { "epoch": 1.23, "learning_rate": 3.0535297549591604e-06, "loss": 0.514, "step": 2896 }, { "epoch": 1.23, "learning_rate": 3.0506126021003504e-06, "loss": 0.4578, "step": 2900 }, { "epoch": 1.23, "learning_rate": 3.0476954492415407e-06, "loss": 0.3638, "step": 2904 }, { "epoch": 1.24, "learning_rate": 3.0447782963827307e-06, "loss": 0.6377, "step": 2908 }, { "epoch": 1.24, "learning_rate": 3.041861143523921e-06, "loss": 0.5282, "step": 2912 }, { "epoch": 1.24, "learning_rate": 3.0389439906651114e-06, "loss": 0.5388, "step": 2916 }, { "epoch": 1.24, "learning_rate": 3.0360268378063013e-06, "loss": 0.5937, "step": 2920 }, { "epoch": 1.24, "learning_rate": 3.0331096849474917e-06, "loss": 0.488, "step": 2924 }, { "epoch": 1.24, "learning_rate": 3.0301925320886816e-06, "loss": 0.4885, "step": 2928 }, { "epoch": 1.25, "learning_rate": 3.027275379229872e-06, "loss": 0.6639, "step": 2932 }, { "epoch": 1.25, "learning_rate": 3.024358226371062e-06, "loss": 0.4895, "step": 2936 }, { "epoch": 1.25, "learning_rate": 3.0214410735122523e-06, "loss": 0.2655, "step": 2940 }, { "epoch": 1.25, "learning_rate": 3.0185239206534422e-06, "loss": 0.6175, "step": 2944 }, { "epoch": 1.25, "learning_rate": 3.0156067677946326e-06, "loss": 0.4876, "step": 2948 }, { "epoch": 1.26, "learning_rate": 3.012689614935823e-06, "loss": 0.3615, "step": 2952 }, { "epoch": 1.26, "learning_rate": 3.009772462077013e-06, "loss": 0.3619, "step": 2956 }, { "epoch": 1.26, "learning_rate": 3.0068553092182033e-06, "loss": 0.4622, "step": 2960 }, { "epoch": 1.26, "learning_rate": 3.003938156359393e-06, "loss": 0.4855, "step": 2964 }, { "epoch": 1.26, "learning_rate": 3.001021003500584e-06, "loss": 0.3779, "step": 2968 }, { "epoch": 1.26, "learning_rate": 2.9981038506417735e-06, "loss": 0.5237, "step": 2972 }, { "epoch": 1.27, "learning_rate": 2.9951866977829643e-06, "loss": 0.541, "step": 2976 }, { "epoch": 1.27, "learning_rate": 2.992269544924154e-06, "loss": 0.4515, "step": 2980 }, { "epoch": 1.27, "learning_rate": 2.9893523920653446e-06, "loss": 0.5022, "step": 2984 }, { "epoch": 1.27, "learning_rate": 2.986435239206535e-06, "loss": 0.4295, "step": 2988 }, { "epoch": 1.27, "learning_rate": 2.983518086347725e-06, "loss": 0.438, "step": 2992 }, { "epoch": 1.27, "learning_rate": 2.9806009334889152e-06, "loss": 0.364, "step": 2996 }, { "epoch": 1.28, "learning_rate": 2.977683780630105e-06, "loss": 0.3795, "step": 3000 }, { "epoch": 1.28, "learning_rate": 2.9747666277712955e-06, "loss": 0.4839, "step": 3004 }, { "epoch": 1.28, "learning_rate": 2.9718494749124855e-06, "loss": 0.3864, "step": 3008 }, { "epoch": 1.28, "learning_rate": 2.968932322053676e-06, "loss": 0.5014, "step": 3012 }, { "epoch": 1.28, "learning_rate": 2.966015169194866e-06, "loss": 0.4356, "step": 3016 }, { "epoch": 1.28, "learning_rate": 2.963098016336056e-06, "loss": 0.5337, "step": 3020 }, { "epoch": 1.29, "learning_rate": 2.9601808634772465e-06, "loss": 0.4385, "step": 3024 }, { "epoch": 1.29, "learning_rate": 2.9572637106184364e-06, "loss": 0.5117, "step": 3028 }, { "epoch": 1.29, "learning_rate": 2.954346557759627e-06, "loss": 0.6061, "step": 3032 }, { "epoch": 1.29, "learning_rate": 2.9514294049008167e-06, "loss": 0.4433, "step": 3036 }, { "epoch": 1.29, "learning_rate": 2.948512252042007e-06, "loss": 0.3484, "step": 3040 }, { "epoch": 1.29, "learning_rate": 2.945595099183197e-06, "loss": 0.2477, "step": 3044 }, { "epoch": 1.3, "learning_rate": 2.942677946324388e-06, "loss": 0.2433, "step": 3048 }, { "epoch": 1.3, "learning_rate": 2.939760793465578e-06, "loss": 0.34, "step": 3052 }, { "epoch": 1.3, "learning_rate": 2.936843640606768e-06, "loss": 0.6435, "step": 3056 }, { "epoch": 1.3, "learning_rate": 2.9339264877479585e-06, "loss": 0.3208, "step": 3060 }, { "epoch": 1.3, "learning_rate": 2.9310093348891484e-06, "loss": 0.3085, "step": 3064 }, { "epoch": 1.3, "learning_rate": 2.928092182030339e-06, "loss": 0.362, "step": 3068 }, { "epoch": 1.31, "learning_rate": 2.9251750291715287e-06, "loss": 0.4297, "step": 3072 }, { "epoch": 1.31, "learning_rate": 2.922257876312719e-06, "loss": 0.4343, "step": 3076 }, { "epoch": 1.31, "learning_rate": 2.919340723453909e-06, "loss": 0.5577, "step": 3080 }, { "epoch": 1.31, "learning_rate": 2.9164235705950994e-06, "loss": 0.3442, "step": 3084 }, { "epoch": 1.31, "learning_rate": 2.9135064177362898e-06, "loss": 0.5667, "step": 3088 }, { "epoch": 1.31, "learning_rate": 2.9105892648774797e-06, "loss": 0.3254, "step": 3092 }, { "epoch": 1.32, "learning_rate": 2.90767211201867e-06, "loss": 0.2909, "step": 3096 }, { "epoch": 1.32, "learning_rate": 2.90475495915986e-06, "loss": 0.3682, "step": 3100 }, { "epoch": 1.32, "learning_rate": 2.9018378063010504e-06, "loss": 0.3107, "step": 3104 }, { "epoch": 1.32, "learning_rate": 2.8989206534422403e-06, "loss": 0.4328, "step": 3108 }, { "epoch": 1.32, "learning_rate": 2.8960035005834307e-06, "loss": 0.3674, "step": 3112 }, { "epoch": 1.32, "learning_rate": 2.8930863477246215e-06, "loss": 0.3329, "step": 3116 }, { "epoch": 1.33, "learning_rate": 2.890169194865811e-06, "loss": 0.6409, "step": 3120 }, { "epoch": 1.33, "learning_rate": 2.8872520420070018e-06, "loss": 0.5682, "step": 3124 }, { "epoch": 1.33, "learning_rate": 2.8843348891481917e-06, "loss": 0.5972, "step": 3128 }, { "epoch": 1.33, "learning_rate": 2.881417736289382e-06, "loss": 0.4621, "step": 3132 }, { "epoch": 1.33, "learning_rate": 2.878500583430572e-06, "loss": 0.4448, "step": 3136 }, { "epoch": 1.34, "learning_rate": 2.8755834305717624e-06, "loss": 0.2757, "step": 3140 }, { "epoch": 1.34, "learning_rate": 2.8726662777129523e-06, "loss": 0.5172, "step": 3144 }, { "epoch": 1.34, "learning_rate": 2.8697491248541427e-06, "loss": 0.4493, "step": 3148 }, { "epoch": 1.34, "learning_rate": 2.866831971995333e-06, "loss": 0.3437, "step": 3152 }, { "epoch": 1.34, "learning_rate": 2.863914819136523e-06, "loss": 0.286, "step": 3156 }, { "epoch": 1.34, "learning_rate": 2.8609976662777133e-06, "loss": 0.6001, "step": 3160 }, { "epoch": 1.35, "learning_rate": 2.8580805134189033e-06, "loss": 0.4373, "step": 3164 }, { "epoch": 1.35, "learning_rate": 2.8551633605600936e-06, "loss": 0.4974, "step": 3168 }, { "epoch": 1.35, "learning_rate": 2.8522462077012836e-06, "loss": 0.4817, "step": 3172 }, { "epoch": 1.35, "learning_rate": 2.849329054842474e-06, "loss": 0.4178, "step": 3176 }, { "epoch": 1.35, "learning_rate": 2.846411901983664e-06, "loss": 0.4527, "step": 3180 }, { "epoch": 1.35, "learning_rate": 2.8434947491248542e-06, "loss": 0.3193, "step": 3184 }, { "epoch": 1.36, "learning_rate": 2.8405775962660446e-06, "loss": 0.4259, "step": 3188 }, { "epoch": 1.36, "learning_rate": 2.8376604434072345e-06, "loss": 0.2256, "step": 3192 }, { "epoch": 1.36, "learning_rate": 2.8347432905484253e-06, "loss": 0.3772, "step": 3196 }, { "epoch": 1.36, "learning_rate": 2.831826137689615e-06, "loss": 0.3679, "step": 3200 }, { "epoch": 1.36, "learning_rate": 2.8289089848308056e-06, "loss": 0.2954, "step": 3204 }, { "epoch": 1.36, "learning_rate": 2.8259918319719955e-06, "loss": 0.4222, "step": 3208 }, { "epoch": 1.37, "learning_rate": 2.823074679113186e-06, "loss": 0.4063, "step": 3212 }, { "epoch": 1.37, "learning_rate": 2.8201575262543763e-06, "loss": 0.6235, "step": 3216 }, { "epoch": 1.37, "learning_rate": 2.817240373395566e-06, "loss": 0.2717, "step": 3220 }, { "epoch": 1.37, "learning_rate": 2.8143232205367566e-06, "loss": 0.374, "step": 3224 }, { "epoch": 1.37, "learning_rate": 2.8114060676779465e-06, "loss": 0.3534, "step": 3228 }, { "epoch": 1.37, "learning_rate": 2.808488914819137e-06, "loss": 0.3723, "step": 3232 }, { "epoch": 1.38, "learning_rate": 2.805571761960327e-06, "loss": 0.5241, "step": 3236 }, { "epoch": 1.38, "learning_rate": 2.802654609101517e-06, "loss": 0.5061, "step": 3240 }, { "epoch": 1.38, "learning_rate": 2.799737456242707e-06, "loss": 0.268, "step": 3244 }, { "epoch": 1.38, "learning_rate": 2.7968203033838975e-06, "loss": 0.3903, "step": 3248 }, { "epoch": 1.38, "learning_rate": 2.793903150525088e-06, "loss": 0.4536, "step": 3252 }, { "epoch": 1.38, "learning_rate": 2.7909859976662778e-06, "loss": 0.36, "step": 3256 }, { "epoch": 1.39, "learning_rate": 2.788068844807468e-06, "loss": 0.4255, "step": 3260 }, { "epoch": 1.39, "learning_rate": 2.785151691948658e-06, "loss": 0.5646, "step": 3264 }, { "epoch": 1.39, "learning_rate": 2.7822345390898484e-06, "loss": 0.3652, "step": 3268 }, { "epoch": 1.39, "learning_rate": 2.7793173862310384e-06, "loss": 0.3034, "step": 3272 }, { "epoch": 1.39, "learning_rate": 2.776400233372229e-06, "loss": 0.417, "step": 3276 }, { "epoch": 1.39, "learning_rate": 2.7734830805134187e-06, "loss": 0.5308, "step": 3280 }, { "epoch": 1.4, "learning_rate": 2.7705659276546095e-06, "loss": 0.5367, "step": 3284 }, { "epoch": 1.4, "learning_rate": 2.7676487747958e-06, "loss": 0.2237, "step": 3288 }, { "epoch": 1.4, "learning_rate": 2.7647316219369898e-06, "loss": 0.4497, "step": 3292 }, { "epoch": 1.4, "learning_rate": 2.76181446907818e-06, "loss": 0.351, "step": 3296 }, { "epoch": 1.4, "learning_rate": 2.75889731621937e-06, "loss": 0.6103, "step": 3300 }, { "epoch": 1.4, "learning_rate": 2.7559801633605604e-06, "loss": 0.3624, "step": 3304 }, { "epoch": 1.41, "learning_rate": 2.7530630105017504e-06, "loss": 0.4172, "step": 3308 }, { "epoch": 1.41, "learning_rate": 2.7501458576429407e-06, "loss": 0.5008, "step": 3312 }, { "epoch": 1.41, "learning_rate": 2.747228704784131e-06, "loss": 0.4368, "step": 3316 }, { "epoch": 1.41, "learning_rate": 2.744311551925321e-06, "loss": 0.4305, "step": 3320 }, { "epoch": 1.41, "learning_rate": 2.7413943990665114e-06, "loss": 0.3914, "step": 3324 }, { "epoch": 1.41, "learning_rate": 2.7384772462077013e-06, "loss": 0.3781, "step": 3328 }, { "epoch": 1.42, "learning_rate": 2.7355600933488917e-06, "loss": 0.4922, "step": 3332 }, { "epoch": 1.42, "learning_rate": 2.7326429404900816e-06, "loss": 0.3398, "step": 3336 }, { "epoch": 1.42, "learning_rate": 2.729725787631272e-06, "loss": 0.5107, "step": 3340 }, { "epoch": 1.42, "learning_rate": 2.726808634772462e-06, "loss": 0.5933, "step": 3344 }, { "epoch": 1.42, "learning_rate": 2.7238914819136523e-06, "loss": 0.4659, "step": 3348 }, { "epoch": 1.43, "learning_rate": 2.720974329054843e-06, "loss": 0.2262, "step": 3352 }, { "epoch": 1.43, "learning_rate": 2.718057176196033e-06, "loss": 0.4471, "step": 3356 }, { "epoch": 1.43, "learning_rate": 2.7151400233372234e-06, "loss": 0.7168, "step": 3360 }, { "epoch": 1.43, "learning_rate": 2.7122228704784133e-06, "loss": 0.1911, "step": 3364 }, { "epoch": 1.43, "learning_rate": 2.7093057176196037e-06, "loss": 0.3807, "step": 3368 }, { "epoch": 1.43, "learning_rate": 2.7063885647607936e-06, "loss": 0.3614, "step": 3372 }, { "epoch": 1.44, "learning_rate": 2.703471411901984e-06, "loss": 0.2861, "step": 3376 }, { "epoch": 1.44, "learning_rate": 2.700554259043174e-06, "loss": 0.3193, "step": 3380 }, { "epoch": 1.44, "learning_rate": 2.6976371061843643e-06, "loss": 0.4835, "step": 3384 }, { "epoch": 1.44, "learning_rate": 2.6947199533255546e-06, "loss": 0.4439, "step": 3388 }, { "epoch": 1.44, "learning_rate": 2.6918028004667446e-06, "loss": 0.2924, "step": 3392 }, { "epoch": 1.44, "learning_rate": 2.688885647607935e-06, "loss": 0.5311, "step": 3396 }, { "epoch": 1.45, "learning_rate": 2.685968494749125e-06, "loss": 0.4898, "step": 3400 }, { "epoch": 1.45, "learning_rate": 2.6830513418903152e-06, "loss": 0.2538, "step": 3404 }, { "epoch": 1.45, "learning_rate": 2.680134189031505e-06, "loss": 0.4381, "step": 3408 }, { "epoch": 1.45, "learning_rate": 2.6772170361726955e-06, "loss": 0.4718, "step": 3412 }, { "epoch": 1.45, "learning_rate": 2.6742998833138863e-06, "loss": 0.3369, "step": 3416 }, { "epoch": 1.45, "learning_rate": 2.671382730455076e-06, "loss": 0.3481, "step": 3420 }, { "epoch": 1.46, "learning_rate": 2.6684655775962666e-06, "loss": 0.2547, "step": 3424 }, { "epoch": 1.46, "learning_rate": 2.665548424737456e-06, "loss": 0.4183, "step": 3428 }, { "epoch": 1.46, "learning_rate": 2.662631271878647e-06, "loss": 0.4181, "step": 3432 }, { "epoch": 1.46, "learning_rate": 2.659714119019837e-06, "loss": 0.5512, "step": 3436 }, { "epoch": 1.46, "learning_rate": 2.6567969661610272e-06, "loss": 0.4187, "step": 3440 }, { "epoch": 1.46, "learning_rate": 2.653879813302217e-06, "loss": 0.2411, "step": 3444 }, { "epoch": 1.47, "learning_rate": 2.6509626604434075e-06, "loss": 0.3652, "step": 3448 }, { "epoch": 1.47, "learning_rate": 2.648045507584598e-06, "loss": 0.4122, "step": 3452 }, { "epoch": 1.47, "learning_rate": 2.645128354725788e-06, "loss": 0.2771, "step": 3456 }, { "epoch": 1.47, "learning_rate": 2.642211201866978e-06, "loss": 0.3256, "step": 3460 }, { "epoch": 1.47, "learning_rate": 2.639294049008168e-06, "loss": 0.53, "step": 3464 }, { "epoch": 1.47, "learning_rate": 2.6363768961493585e-06, "loss": 0.2602, "step": 3468 }, { "epoch": 1.48, "learning_rate": 2.6334597432905484e-06, "loss": 0.2461, "step": 3472 }, { "epoch": 1.48, "learning_rate": 2.630542590431739e-06, "loss": 0.3867, "step": 3476 }, { "epoch": 1.48, "learning_rate": 2.6276254375729287e-06, "loss": 0.4217, "step": 3480 }, { "epoch": 1.48, "learning_rate": 2.624708284714119e-06, "loss": 0.4234, "step": 3484 }, { "epoch": 1.48, "learning_rate": 2.6217911318553095e-06, "loss": 0.3664, "step": 3488 }, { "epoch": 1.48, "learning_rate": 2.6188739789964994e-06, "loss": 0.5729, "step": 3492 }, { "epoch": 1.49, "learning_rate": 2.6159568261376898e-06, "loss": 0.5175, "step": 3496 }, { "epoch": 1.49, "learning_rate": 2.6130396732788797e-06, "loss": 0.4228, "step": 3500 }, { "epoch": 1.49, "learning_rate": 2.6101225204200705e-06, "loss": 0.3762, "step": 3504 }, { "epoch": 1.49, "learning_rate": 2.60720536756126e-06, "loss": 0.4211, "step": 3508 }, { "epoch": 1.49, "learning_rate": 2.604288214702451e-06, "loss": 0.3969, "step": 3512 }, { "epoch": 1.49, "learning_rate": 2.601371061843641e-06, "loss": 0.3794, "step": 3516 }, { "epoch": 1.5, "learning_rate": 2.598453908984831e-06, "loss": 0.2235, "step": 3520 }, { "epoch": 1.5, "learning_rate": 2.5955367561260215e-06, "loss": 0.3842, "step": 3524 }, { "epoch": 1.5, "learning_rate": 2.5926196032672114e-06, "loss": 0.3081, "step": 3528 }, { "epoch": 1.5, "learning_rate": 2.5897024504084018e-06, "loss": 0.267, "step": 3532 }, { "epoch": 1.5, "learning_rate": 2.5867852975495917e-06, "loss": 0.2976, "step": 3536 }, { "epoch": 1.51, "learning_rate": 2.583868144690782e-06, "loss": 0.4383, "step": 3540 }, { "epoch": 1.51, "learning_rate": 2.580950991831972e-06, "loss": 0.3175, "step": 3544 }, { "epoch": 1.51, "learning_rate": 2.5780338389731624e-06, "loss": 0.3118, "step": 3548 }, { "epoch": 1.51, "learning_rate": 2.5751166861143527e-06, "loss": 0.4329, "step": 3552 }, { "epoch": 1.51, "learning_rate": 2.5721995332555427e-06, "loss": 0.4936, "step": 3556 }, { "epoch": 1.51, "learning_rate": 2.569282380396733e-06, "loss": 0.4872, "step": 3560 }, { "epoch": 1.52, "learning_rate": 2.566365227537923e-06, "loss": 0.431, "step": 3564 }, { "epoch": 1.52, "learning_rate": 2.5634480746791133e-06, "loss": 0.5265, "step": 3568 }, { "epoch": 1.52, "learning_rate": 2.5605309218203033e-06, "loss": 0.3655, "step": 3572 }, { "epoch": 1.52, "learning_rate": 2.5576137689614936e-06, "loss": 0.342, "step": 3576 }, { "epoch": 1.52, "learning_rate": 2.5546966161026836e-06, "loss": 0.4835, "step": 3580 }, { "epoch": 1.52, "learning_rate": 2.5517794632438743e-06, "loss": 0.2614, "step": 3584 }, { "epoch": 1.53, "learning_rate": 2.5488623103850647e-06, "loss": 0.3411, "step": 3588 }, { "epoch": 1.53, "learning_rate": 2.5459451575262546e-06, "loss": 0.4997, "step": 3592 }, { "epoch": 1.53, "learning_rate": 2.543028004667445e-06, "loss": 0.461, "step": 3596 }, { "epoch": 1.53, "learning_rate": 2.540110851808635e-06, "loss": 0.4112, "step": 3600 }, { "epoch": 1.53, "learning_rate": 2.5371936989498253e-06, "loss": 0.4968, "step": 3604 }, { "epoch": 1.53, "learning_rate": 2.5342765460910152e-06, "loss": 0.3994, "step": 3608 }, { "epoch": 1.54, "learning_rate": 2.5313593932322056e-06, "loss": 0.4559, "step": 3612 }, { "epoch": 1.54, "learning_rate": 2.528442240373396e-06, "loss": 0.3394, "step": 3616 }, { "epoch": 1.54, "learning_rate": 2.525525087514586e-06, "loss": 0.3952, "step": 3620 }, { "epoch": 1.54, "learning_rate": 2.5226079346557763e-06, "loss": 0.318, "step": 3624 }, { "epoch": 1.54, "learning_rate": 2.5196907817969662e-06, "loss": 0.3204, "step": 3628 }, { "epoch": 1.54, "learning_rate": 2.5167736289381566e-06, "loss": 0.3047, "step": 3632 }, { "epoch": 1.55, "learning_rate": 2.5138564760793465e-06, "loss": 0.3223, "step": 3636 }, { "epoch": 1.55, "learning_rate": 2.510939323220537e-06, "loss": 0.2073, "step": 3640 }, { "epoch": 1.55, "learning_rate": 2.508022170361727e-06, "loss": 0.3696, "step": 3644 }, { "epoch": 1.55, "learning_rate": 2.505105017502917e-06, "loss": 0.4404, "step": 3648 }, { "epoch": 1.55, "learning_rate": 2.502187864644108e-06, "loss": 0.3907, "step": 3652 }, { "epoch": 1.55, "learning_rate": 2.4992707117852975e-06, "loss": 0.4624, "step": 3656 }, { "epoch": 1.56, "learning_rate": 2.496353558926488e-06, "loss": 0.4258, "step": 3660 }, { "epoch": 1.56, "learning_rate": 2.493436406067678e-06, "loss": 0.421, "step": 3664 }, { "epoch": 1.56, "learning_rate": 2.4905192532088686e-06, "loss": 0.3276, "step": 3668 }, { "epoch": 1.56, "learning_rate": 2.4876021003500585e-06, "loss": 0.4259, "step": 3672 }, { "epoch": 1.56, "learning_rate": 2.484684947491249e-06, "loss": 0.3363, "step": 3676 }, { "epoch": 1.56, "learning_rate": 2.481767794632439e-06, "loss": 0.3734, "step": 3680 }, { "epoch": 1.57, "learning_rate": 2.478850641773629e-06, "loss": 0.4299, "step": 3684 }, { "epoch": 1.57, "learning_rate": 2.475933488914819e-06, "loss": 0.3697, "step": 3688 }, { "epoch": 1.57, "learning_rate": 2.4730163360560095e-06, "loss": 0.3721, "step": 3692 }, { "epoch": 1.57, "learning_rate": 2.4700991831971994e-06, "loss": 0.4954, "step": 3696 }, { "epoch": 1.57, "learning_rate": 2.46718203033839e-06, "loss": 0.2761, "step": 3700 }, { "epoch": 1.57, "learning_rate": 2.46426487747958e-06, "loss": 0.3485, "step": 3704 }, { "epoch": 1.58, "learning_rate": 2.4613477246207705e-06, "loss": 0.3959, "step": 3708 }, { "epoch": 1.58, "learning_rate": 2.4584305717619604e-06, "loss": 0.421, "step": 3712 }, { "epoch": 1.58, "learning_rate": 2.455513418903151e-06, "loss": 0.3158, "step": 3716 }, { "epoch": 1.58, "learning_rate": 2.4525962660443407e-06, "loss": 0.3167, "step": 3720 }, { "epoch": 1.58, "learning_rate": 2.449679113185531e-06, "loss": 0.4065, "step": 3724 }, { "epoch": 1.59, "learning_rate": 2.446761960326721e-06, "loss": 0.406, "step": 3728 }, { "epoch": 1.59, "learning_rate": 2.443844807467912e-06, "loss": 0.4013, "step": 3732 }, { "epoch": 1.59, "learning_rate": 2.4409276546091018e-06, "loss": 0.5664, "step": 3736 }, { "epoch": 1.59, "learning_rate": 2.438010501750292e-06, "loss": 0.3301, "step": 3740 }, { "epoch": 1.59, "learning_rate": 2.435093348891482e-06, "loss": 0.3695, "step": 3744 }, { "epoch": 1.59, "learning_rate": 2.4321761960326724e-06, "loss": 0.3426, "step": 3748 }, { "epoch": 1.6, "learning_rate": 2.4292590431738624e-06, "loss": 0.3434, "step": 3752 }, { "epoch": 1.6, "learning_rate": 2.4263418903150527e-06, "loss": 0.3886, "step": 3756 }, { "epoch": 1.6, "learning_rate": 2.4234247374562427e-06, "loss": 0.4117, "step": 3760 }, { "epoch": 1.6, "learning_rate": 2.4205075845974334e-06, "loss": 0.4477, "step": 3764 }, { "epoch": 1.6, "learning_rate": 2.4175904317386234e-06, "loss": 0.4769, "step": 3768 }, { "epoch": 1.6, "learning_rate": 2.4146732788798137e-06, "loss": 0.4984, "step": 3772 }, { "epoch": 1.61, "learning_rate": 2.4117561260210037e-06, "loss": 0.3964, "step": 3776 }, { "epoch": 1.61, "learning_rate": 2.408838973162194e-06, "loss": 0.4827, "step": 3780 }, { "epoch": 1.61, "learning_rate": 2.405921820303384e-06, "loss": 0.3075, "step": 3784 }, { "epoch": 1.61, "learning_rate": 2.4030046674445743e-06, "loss": 0.2245, "step": 3788 }, { "epoch": 1.61, "learning_rate": 2.4000875145857643e-06, "loss": 0.2683, "step": 3792 }, { "epoch": 1.61, "learning_rate": 2.3971703617269546e-06, "loss": 0.4515, "step": 3796 }, { "epoch": 1.62, "learning_rate": 2.394253208868145e-06, "loss": 0.3369, "step": 3800 }, { "epoch": 1.62, "learning_rate": 2.391336056009335e-06, "loss": 0.2854, "step": 3804 }, { "epoch": 1.62, "learning_rate": 2.3884189031505253e-06, "loss": 0.2712, "step": 3808 }, { "epoch": 1.62, "learning_rate": 2.3855017502917157e-06, "loss": 0.3827, "step": 3812 }, { "epoch": 1.62, "learning_rate": 2.3825845974329056e-06, "loss": 0.2348, "step": 3816 }, { "epoch": 1.62, "learning_rate": 2.379667444574096e-06, "loss": 0.2503, "step": 3820 }, { "epoch": 1.63, "learning_rate": 2.376750291715286e-06, "loss": 0.2814, "step": 3824 }, { "epoch": 1.63, "learning_rate": 2.3738331388564763e-06, "loss": 0.4045, "step": 3828 }, { "epoch": 1.63, "learning_rate": 2.3709159859976666e-06, "loss": 0.5534, "step": 3832 }, { "epoch": 1.63, "learning_rate": 2.3679988331388566e-06, "loss": 0.4016, "step": 3836 }, { "epoch": 1.63, "learning_rate": 2.365081680280047e-06, "loss": 0.4375, "step": 3840 }, { "epoch": 1.63, "learning_rate": 2.362164527421237e-06, "loss": 0.3761, "step": 3844 }, { "epoch": 1.64, "learning_rate": 2.3592473745624272e-06, "loss": 0.3525, "step": 3848 }, { "epoch": 1.64, "learning_rate": 2.3563302217036176e-06, "loss": 0.3385, "step": 3852 }, { "epoch": 1.64, "learning_rate": 2.3534130688448075e-06, "loss": 0.393, "step": 3856 }, { "epoch": 1.64, "learning_rate": 2.350495915985998e-06, "loss": 0.4507, "step": 3860 }, { "epoch": 1.64, "learning_rate": 2.3475787631271883e-06, "loss": 0.2481, "step": 3864 }, { "epoch": 1.64, "learning_rate": 2.344661610268378e-06, "loss": 0.2887, "step": 3868 }, { "epoch": 1.65, "learning_rate": 2.3417444574095686e-06, "loss": 0.3081, "step": 3872 }, { "epoch": 1.65, "learning_rate": 2.3388273045507585e-06, "loss": 0.3454, "step": 3876 }, { "epoch": 1.65, "learning_rate": 2.335910151691949e-06, "loss": 0.4006, "step": 3880 }, { "epoch": 1.65, "learning_rate": 2.332992998833139e-06, "loss": 0.3328, "step": 3884 }, { "epoch": 1.65, "learning_rate": 2.330075845974329e-06, "loss": 0.3802, "step": 3888 }, { "epoch": 1.65, "learning_rate": 2.3271586931155195e-06, "loss": 0.538, "step": 3892 }, { "epoch": 1.66, "learning_rate": 2.3242415402567095e-06, "loss": 0.4035, "step": 3896 }, { "epoch": 1.66, "learning_rate": 2.3213243873979e-06, "loss": 0.3538, "step": 3900 }, { "epoch": 1.66, "learning_rate": 2.31840723453909e-06, "loss": 0.2945, "step": 3904 }, { "epoch": 1.66, "learning_rate": 2.31549008168028e-06, "loss": 0.3023, "step": 3908 }, { "epoch": 1.66, "learning_rate": 2.3125729288214705e-06, "loss": 0.4806, "step": 3912 }, { "epoch": 1.66, "learning_rate": 2.3096557759626604e-06, "loss": 0.4009, "step": 3916 }, { "epoch": 1.67, "learning_rate": 2.306738623103851e-06, "loss": 0.4475, "step": 3920 }, { "epoch": 1.67, "learning_rate": 2.3038214702450407e-06, "loss": 0.2655, "step": 3924 }, { "epoch": 1.67, "learning_rate": 2.300904317386231e-06, "loss": 0.3898, "step": 3928 }, { "epoch": 1.67, "learning_rate": 2.2979871645274215e-06, "loss": 0.2847, "step": 3932 }, { "epoch": 1.67, "learning_rate": 2.295070011668612e-06, "loss": 0.5787, "step": 3936 }, { "epoch": 1.68, "learning_rate": 2.2921528588098018e-06, "loss": 0.4133, "step": 3940 }, { "epoch": 1.68, "learning_rate": 2.289235705950992e-06, "loss": 0.2984, "step": 3944 }, { "epoch": 1.68, "learning_rate": 2.286318553092182e-06, "loss": 0.3097, "step": 3948 }, { "epoch": 1.68, "learning_rate": 2.2834014002333724e-06, "loss": 0.3854, "step": 3952 }, { "epoch": 1.68, "learning_rate": 2.2804842473745624e-06, "loss": 0.5068, "step": 3956 }, { "epoch": 1.68, "learning_rate": 2.2775670945157527e-06, "loss": 0.3831, "step": 3960 }, { "epoch": 1.69, "learning_rate": 2.274649941656943e-06, "loss": 0.2283, "step": 3964 }, { "epoch": 1.69, "learning_rate": 2.2717327887981334e-06, "loss": 0.3432, "step": 3968 }, { "epoch": 1.69, "learning_rate": 2.2688156359393234e-06, "loss": 0.4152, "step": 3972 }, { "epoch": 1.69, "learning_rate": 2.2658984830805137e-06, "loss": 0.2857, "step": 3976 }, { "epoch": 1.69, "learning_rate": 2.2629813302217037e-06, "loss": 0.39, "step": 3980 }, { "epoch": 1.69, "learning_rate": 2.260064177362894e-06, "loss": 0.3972, "step": 3984 }, { "epoch": 1.7, "learning_rate": 2.257147024504084e-06, "loss": 0.3207, "step": 3988 }, { "epoch": 1.7, "learning_rate": 2.2542298716452743e-06, "loss": 0.4362, "step": 3992 }, { "epoch": 1.7, "learning_rate": 2.2513127187864643e-06, "loss": 0.3839, "step": 3996 }, { "epoch": 1.7, "learning_rate": 2.248395565927655e-06, "loss": 0.211, "step": 4000 }, { "epoch": 1.7, "learning_rate": 2.245478413068845e-06, "loss": 0.4071, "step": 4004 }, { "epoch": 1.7, "learning_rate": 2.2425612602100354e-06, "loss": 0.2785, "step": 4008 }, { "epoch": 1.71, "learning_rate": 2.2396441073512253e-06, "loss": 0.4274, "step": 4012 }, { "epoch": 1.71, "learning_rate": 2.2367269544924157e-06, "loss": 0.3813, "step": 4016 }, { "epoch": 1.71, "learning_rate": 2.2338098016336056e-06, "loss": 0.3138, "step": 4020 }, { "epoch": 1.71, "learning_rate": 2.230892648774796e-06, "loss": 0.3181, "step": 4024 }, { "epoch": 1.71, "learning_rate": 2.227975495915986e-06, "loss": 0.4108, "step": 4028 }, { "epoch": 1.71, "learning_rate": 2.2250583430571767e-06, "loss": 0.3285, "step": 4032 }, { "epoch": 1.72, "learning_rate": 2.2221411901983666e-06, "loss": 0.2244, "step": 4036 }, { "epoch": 1.72, "learning_rate": 2.219224037339557e-06, "loss": 0.4148, "step": 4040 }, { "epoch": 1.72, "learning_rate": 2.216306884480747e-06, "loss": 0.418, "step": 4044 }, { "epoch": 1.72, "learning_rate": 2.2133897316219373e-06, "loss": 0.4441, "step": 4048 }, { "epoch": 1.72, "learning_rate": 2.2104725787631272e-06, "loss": 0.358, "step": 4052 }, { "epoch": 1.72, "learning_rate": 2.2075554259043176e-06, "loss": 0.2615, "step": 4056 }, { "epoch": 1.73, "learning_rate": 2.2046382730455075e-06, "loss": 0.3992, "step": 4060 }, { "epoch": 1.73, "learning_rate": 2.201721120186698e-06, "loss": 0.4608, "step": 4064 }, { "epoch": 1.73, "learning_rate": 2.1988039673278883e-06, "loss": 0.2661, "step": 4068 }, { "epoch": 1.73, "learning_rate": 2.1958868144690786e-06, "loss": 0.4447, "step": 4072 }, { "epoch": 1.73, "learning_rate": 2.1929696616102686e-06, "loss": 0.3915, "step": 4076 }, { "epoch": 1.73, "learning_rate": 2.190052508751459e-06, "loss": 0.3283, "step": 4080 }, { "epoch": 1.74, "learning_rate": 2.187135355892649e-06, "loss": 0.3887, "step": 4084 }, { "epoch": 1.74, "learning_rate": 2.1842182030338392e-06, "loss": 0.3772, "step": 4088 }, { "epoch": 1.74, "learning_rate": 2.181301050175029e-06, "loss": 0.5242, "step": 4092 }, { "epoch": 1.74, "learning_rate": 2.1783838973162195e-06, "loss": 0.2624, "step": 4096 }, { "epoch": 1.74, "learning_rate": 2.17546674445741e-06, "loss": 0.4775, "step": 4100 }, { "epoch": 1.74, "learning_rate": 2.1725495915986e-06, "loss": 0.4693, "step": 4104 }, { "epoch": 1.75, "learning_rate": 2.16963243873979e-06, "loss": 0.2954, "step": 4108 }, { "epoch": 1.75, "learning_rate": 2.1667152858809806e-06, "loss": 0.386, "step": 4112 }, { "epoch": 1.75, "learning_rate": 2.1637981330221705e-06, "loss": 0.2375, "step": 4116 }, { "epoch": 1.75, "learning_rate": 2.160880980163361e-06, "loss": 0.456, "step": 4120 }, { "epoch": 1.75, "learning_rate": 2.157963827304551e-06, "loss": 0.5585, "step": 4124 }, { "epoch": 1.76, "learning_rate": 2.155046674445741e-06, "loss": 0.2531, "step": 4128 }, { "epoch": 1.76, "learning_rate": 2.1521295215869315e-06, "loss": 0.3648, "step": 4132 }, { "epoch": 1.76, "learning_rate": 2.1492123687281215e-06, "loss": 0.4723, "step": 4136 }, { "epoch": 1.76, "learning_rate": 2.146295215869312e-06, "loss": 0.2357, "step": 4140 }, { "epoch": 1.76, "learning_rate": 2.1433780630105018e-06, "loss": 0.4827, "step": 4144 }, { "epoch": 1.76, "learning_rate": 2.140460910151692e-06, "loss": 0.3387, "step": 4148 }, { "epoch": 1.77, "learning_rate": 2.137543757292882e-06, "loss": 0.3946, "step": 4152 }, { "epoch": 1.77, "learning_rate": 2.1346266044340724e-06, "loss": 0.1965, "step": 4156 }, { "epoch": 1.77, "learning_rate": 2.1317094515752628e-06, "loss": 0.3286, "step": 4160 }, { "epoch": 1.77, "learning_rate": 2.128792298716453e-06, "loss": 0.3196, "step": 4164 }, { "epoch": 1.77, "learning_rate": 2.125875145857643e-06, "loss": 0.2477, "step": 4168 }, { "epoch": 1.77, "learning_rate": 2.1229579929988334e-06, "loss": 0.3666, "step": 4172 }, { "epoch": 1.78, "learning_rate": 2.1200408401400234e-06, "loss": 0.5021, "step": 4176 }, { "epoch": 1.78, "learning_rate": 2.117852975495916e-06, "loss": 0.5295, "step": 4180 }, { "epoch": 1.78, "learning_rate": 2.1149358226371064e-06, "loss": 0.4222, "step": 4184 }, { "epoch": 1.78, "learning_rate": 2.1120186697782964e-06, "loss": 0.4243, "step": 4188 }, { "epoch": 1.78, "learning_rate": 2.1091015169194867e-06, "loss": 0.5245, "step": 4192 }, { "epoch": 1.78, "learning_rate": 2.106184364060677e-06, "loss": 0.2765, "step": 4196 }, { "epoch": 1.79, "learning_rate": 2.103267211201867e-06, "loss": 0.4902, "step": 4200 }, { "epoch": 1.79, "learning_rate": 2.1003500583430574e-06, "loss": 0.2659, "step": 4204 }, { "epoch": 1.79, "learning_rate": 2.0974329054842477e-06, "loss": 0.512, "step": 4208 }, { "epoch": 1.79, "learning_rate": 2.0945157526254377e-06, "loss": 0.369, "step": 4212 }, { "epoch": 1.79, "learning_rate": 2.091598599766628e-06, "loss": 0.4157, "step": 4216 }, { "epoch": 1.79, "learning_rate": 2.088681446907818e-06, "loss": 0.2203, "step": 4220 }, { "epoch": 1.8, "learning_rate": 2.0857642940490083e-06, "loss": 0.2195, "step": 4224 }, { "epoch": 1.8, "learning_rate": 2.0828471411901983e-06, "loss": 0.4235, "step": 4228 }, { "epoch": 1.8, "learning_rate": 2.0799299883313886e-06, "loss": 0.2623, "step": 4232 }, { "epoch": 1.8, "learning_rate": 2.077012835472579e-06, "loss": 0.4724, "step": 4236 }, { "epoch": 1.8, "learning_rate": 2.0740956826137694e-06, "loss": 0.5003, "step": 4240 }, { "epoch": 1.8, "learning_rate": 2.0711785297549593e-06, "loss": 0.3844, "step": 4244 }, { "epoch": 1.81, "learning_rate": 2.0682613768961497e-06, "loss": 0.402, "step": 4248 }, { "epoch": 1.81, "learning_rate": 2.0653442240373396e-06, "loss": 0.2482, "step": 4252 }, { "epoch": 1.81, "learning_rate": 2.06242707117853e-06, "loss": 0.3593, "step": 4256 }, { "epoch": 1.81, "learning_rate": 2.05950991831972e-06, "loss": 0.2561, "step": 4260 }, { "epoch": 1.81, "learning_rate": 2.0565927654609103e-06, "loss": 0.4176, "step": 4264 }, { "epoch": 1.81, "learning_rate": 2.0536756126021e-06, "loss": 0.2596, "step": 4268 }, { "epoch": 1.82, "learning_rate": 2.050758459743291e-06, "loss": 0.3554, "step": 4272 }, { "epoch": 1.82, "learning_rate": 2.047841306884481e-06, "loss": 0.3388, "step": 4276 }, { "epoch": 1.82, "learning_rate": 2.0449241540256713e-06, "loss": 0.4103, "step": 4280 }, { "epoch": 1.82, "learning_rate": 2.0420070011668612e-06, "loss": 0.3023, "step": 4284 }, { "epoch": 1.82, "learning_rate": 2.0390898483080516e-06, "loss": 0.4772, "step": 4288 }, { "epoch": 1.82, "learning_rate": 2.0361726954492415e-06, "loss": 0.2974, "step": 4292 }, { "epoch": 1.83, "learning_rate": 2.033255542590432e-06, "loss": 0.4114, "step": 4296 }, { "epoch": 1.83, "learning_rate": 2.030338389731622e-06, "loss": 0.2369, "step": 4300 }, { "epoch": 1.83, "learning_rate": 2.027421236872812e-06, "loss": 0.3393, "step": 4304 }, { "epoch": 1.83, "learning_rate": 2.0245040840140026e-06, "loss": 0.2298, "step": 4308 }, { "epoch": 1.83, "learning_rate": 2.021586931155193e-06, "loss": 0.3373, "step": 4312 }, { "epoch": 1.84, "learning_rate": 2.018669778296383e-06, "loss": 0.3498, "step": 4316 }, { "epoch": 1.84, "learning_rate": 2.0157526254375732e-06, "loss": 0.4742, "step": 4320 }, { "epoch": 1.84, "learning_rate": 2.012835472578763e-06, "loss": 0.3716, "step": 4324 }, { "epoch": 1.84, "learning_rate": 2.0099183197199535e-06, "loss": 0.4141, "step": 4328 }, { "epoch": 1.84, "learning_rate": 2.0070011668611435e-06, "loss": 0.3451, "step": 4332 }, { "epoch": 1.84, "learning_rate": 2.004084014002334e-06, "loss": 0.3455, "step": 4336 }, { "epoch": 1.85, "learning_rate": 2.001166861143524e-06, "loss": 0.4263, "step": 4340 }, { "epoch": 1.85, "learning_rate": 1.9982497082847146e-06, "loss": 0.3155, "step": 4344 }, { "epoch": 1.85, "learning_rate": 1.9953325554259045e-06, "loss": 0.254, "step": 4348 }, { "epoch": 1.85, "learning_rate": 1.992415402567095e-06, "loss": 0.2274, "step": 4352 }, { "epoch": 1.85, "learning_rate": 1.989498249708285e-06, "loss": 0.2758, "step": 4356 }, { "epoch": 1.85, "learning_rate": 1.986581096849475e-06, "loss": 0.2397, "step": 4360 }, { "epoch": 1.86, "learning_rate": 1.983663943990665e-06, "loss": 0.2506, "step": 4364 }, { "epoch": 1.86, "learning_rate": 1.9807467911318555e-06, "loss": 0.4136, "step": 4368 }, { "epoch": 1.86, "learning_rate": 1.977829638273046e-06, "loss": 0.3309, "step": 4372 }, { "epoch": 1.86, "learning_rate": 1.974912485414236e-06, "loss": 0.2924, "step": 4376 }, { "epoch": 1.86, "learning_rate": 1.971995332555426e-06, "loss": 0.1428, "step": 4380 }, { "epoch": 1.86, "learning_rate": 1.9690781796966165e-06, "loss": 0.4771, "step": 4384 }, { "epoch": 1.87, "learning_rate": 1.9661610268378064e-06, "loss": 0.4117, "step": 4388 }, { "epoch": 1.87, "learning_rate": 1.9632438739789968e-06, "loss": 0.1177, "step": 4392 }, { "epoch": 1.87, "learning_rate": 1.9603267211201867e-06, "loss": 0.538, "step": 4396 }, { "epoch": 1.87, "learning_rate": 1.957409568261377e-06, "loss": 0.2241, "step": 4400 }, { "epoch": 1.87, "learning_rate": 1.954492415402567e-06, "loss": 0.4846, "step": 4404 }, { "epoch": 1.87, "learning_rate": 1.9515752625437574e-06, "loss": 0.3649, "step": 4408 }, { "epoch": 1.88, "learning_rate": 1.9486581096849477e-06, "loss": 0.4989, "step": 4412 }, { "epoch": 1.88, "learning_rate": 1.945740956826138e-06, "loss": 0.3074, "step": 4416 }, { "epoch": 1.88, "learning_rate": 1.942823803967328e-06, "loss": 0.3055, "step": 4420 }, { "epoch": 1.88, "learning_rate": 1.9399066511085184e-06, "loss": 0.3966, "step": 4424 }, { "epoch": 1.88, "learning_rate": 1.9369894982497083e-06, "loss": 0.3061, "step": 4428 }, { "epoch": 1.88, "learning_rate": 1.9340723453908987e-06, "loss": 0.3059, "step": 4432 }, { "epoch": 1.89, "learning_rate": 1.9311551925320886e-06, "loss": 0.4346, "step": 4436 }, { "epoch": 1.89, "learning_rate": 1.928238039673279e-06, "loss": 0.4984, "step": 4440 }, { "epoch": 1.89, "learning_rate": 1.9253208868144694e-06, "loss": 0.2097, "step": 4444 }, { "epoch": 1.89, "learning_rate": 1.9224037339556593e-06, "loss": 0.4416, "step": 4448 }, { "epoch": 1.89, "learning_rate": 1.9194865810968497e-06, "loss": 0.3917, "step": 4452 }, { "epoch": 1.89, "learning_rate": 1.9165694282380396e-06, "loss": 0.2051, "step": 4456 }, { "epoch": 1.9, "learning_rate": 1.91365227537923e-06, "loss": 0.385, "step": 4460 }, { "epoch": 1.9, "learning_rate": 1.9107351225204203e-06, "loss": 0.5121, "step": 4464 }, { "epoch": 1.9, "learning_rate": 1.9078179696616103e-06, "loss": 0.3747, "step": 4468 }, { "epoch": 1.9, "learning_rate": 1.9049008168028008e-06, "loss": 0.2688, "step": 4472 }, { "epoch": 1.9, "learning_rate": 1.901983663943991e-06, "loss": 0.2459, "step": 4476 }, { "epoch": 1.9, "learning_rate": 1.8990665110851811e-06, "loss": 0.2148, "step": 4480 }, { "epoch": 1.91, "learning_rate": 1.8961493582263713e-06, "loss": 0.2333, "step": 4484 }, { "epoch": 1.91, "learning_rate": 1.8932322053675614e-06, "loss": 0.266, "step": 4488 }, { "epoch": 1.91, "learning_rate": 1.8903150525087516e-06, "loss": 0.3613, "step": 4492 }, { "epoch": 1.91, "learning_rate": 1.8873978996499417e-06, "loss": 0.1306, "step": 4496 }, { "epoch": 1.91, "learning_rate": 1.884480746791132e-06, "loss": 0.2521, "step": 4500 }, { "epoch": 1.91, "learning_rate": 1.881563593932322e-06, "loss": 0.2997, "step": 4504 }, { "epoch": 1.92, "learning_rate": 1.8786464410735124e-06, "loss": 0.4594, "step": 4508 }, { "epoch": 1.92, "learning_rate": 1.8757292882147028e-06, "loss": 0.2661, "step": 4512 }, { "epoch": 1.92, "learning_rate": 1.872812135355893e-06, "loss": 0.2776, "step": 4516 }, { "epoch": 1.92, "learning_rate": 1.869894982497083e-06, "loss": 0.4221, "step": 4520 }, { "epoch": 1.92, "learning_rate": 1.8669778296382732e-06, "loss": 0.234, "step": 4524 }, { "epoch": 1.93, "learning_rate": 1.8640606767794634e-06, "loss": 0.4304, "step": 4528 }, { "epoch": 1.93, "learning_rate": 1.8611435239206535e-06, "loss": 0.476, "step": 4532 }, { "epoch": 1.93, "learning_rate": 1.8582263710618437e-06, "loss": 0.2214, "step": 4536 }, { "epoch": 1.93, "learning_rate": 1.855309218203034e-06, "loss": 0.2805, "step": 4540 }, { "epoch": 1.93, "learning_rate": 1.8523920653442242e-06, "loss": 0.1151, "step": 4544 }, { "epoch": 1.93, "learning_rate": 1.8494749124854143e-06, "loss": 0.2069, "step": 4548 }, { "epoch": 1.94, "learning_rate": 1.8465577596266047e-06, "loss": 0.4162, "step": 4552 }, { "epoch": 1.94, "learning_rate": 1.8436406067677949e-06, "loss": 0.3101, "step": 4556 }, { "epoch": 1.94, "learning_rate": 1.840723453908985e-06, "loss": 0.272, "step": 4560 }, { "epoch": 1.94, "learning_rate": 1.8378063010501752e-06, "loss": 0.4017, "step": 4564 }, { "epoch": 1.94, "learning_rate": 1.8348891481913653e-06, "loss": 0.3501, "step": 4568 }, { "epoch": 1.94, "learning_rate": 1.8319719953325557e-06, "loss": 0.2287, "step": 4572 }, { "epoch": 1.95, "learning_rate": 1.8290548424737458e-06, "loss": 0.4951, "step": 4576 }, { "epoch": 1.95, "learning_rate": 1.826137689614936e-06, "loss": 0.5831, "step": 4580 }, { "epoch": 1.95, "learning_rate": 1.8232205367561261e-06, "loss": 0.3221, "step": 4584 }, { "epoch": 1.95, "learning_rate": 1.8203033838973163e-06, "loss": 0.3682, "step": 4588 }, { "epoch": 1.95, "learning_rate": 1.8173862310385066e-06, "loss": 0.299, "step": 4592 }, { "epoch": 1.95, "learning_rate": 1.8144690781796968e-06, "loss": 0.1729, "step": 4596 }, { "epoch": 1.96, "learning_rate": 1.811551925320887e-06, "loss": 0.214, "step": 4600 }, { "epoch": 1.96, "learning_rate": 1.808634772462077e-06, "loss": 0.3696, "step": 4604 }, { "epoch": 1.96, "learning_rate": 1.8057176196032674e-06, "loss": 0.4532, "step": 4608 }, { "epoch": 1.96, "learning_rate": 1.8028004667444576e-06, "loss": 0.241, "step": 4612 }, { "epoch": 1.96, "learning_rate": 1.7998833138856477e-06, "loss": 0.3685, "step": 4616 }, { "epoch": 1.96, "learning_rate": 1.796966161026838e-06, "loss": 0.3708, "step": 4620 }, { "epoch": 1.97, "learning_rate": 1.794049008168028e-06, "loss": 0.3228, "step": 4624 }, { "epoch": 1.97, "learning_rate": 1.7911318553092182e-06, "loss": 0.2311, "step": 4628 }, { "epoch": 1.97, "learning_rate": 1.7882147024504086e-06, "loss": 0.3598, "step": 4632 }, { "epoch": 1.97, "learning_rate": 1.7852975495915987e-06, "loss": 0.4134, "step": 4636 }, { "epoch": 1.97, "learning_rate": 1.782380396732789e-06, "loss": 0.2711, "step": 4640 }, { "epoch": 1.97, "learning_rate": 1.7794632438739792e-06, "loss": 0.503, "step": 4644 }, { "epoch": 1.98, "learning_rate": 1.7765460910151694e-06, "loss": 0.2192, "step": 4648 }, { "epoch": 1.98, "learning_rate": 1.7736289381563595e-06, "loss": 0.1547, "step": 4652 }, { "epoch": 1.98, "learning_rate": 1.7707117852975497e-06, "loss": 0.3002, "step": 4656 }, { "epoch": 1.98, "learning_rate": 1.7677946324387398e-06, "loss": 0.3846, "step": 4660 }, { "epoch": 1.98, "learning_rate": 1.76487747957993e-06, "loss": 0.4236, "step": 4664 }, { "epoch": 1.98, "learning_rate": 1.7619603267211201e-06, "loss": 0.3245, "step": 4668 }, { "epoch": 1.99, "learning_rate": 1.7590431738623107e-06, "loss": 0.3547, "step": 4672 }, { "epoch": 1.99, "learning_rate": 1.7561260210035008e-06, "loss": 0.2355, "step": 4676 }, { "epoch": 1.99, "learning_rate": 1.753208868144691e-06, "loss": 0.417, "step": 4680 }, { "epoch": 1.99, "learning_rate": 1.7502917152858811e-06, "loss": 0.4419, "step": 4684 }, { "epoch": 1.99, "learning_rate": 1.7473745624270713e-06, "loss": 0.4115, "step": 4688 }, { "epoch": 1.99, "learning_rate": 1.7444574095682615e-06, "loss": 0.2582, "step": 4692 }, { "epoch": 2.0, "learning_rate": 1.7415402567094516e-06, "loss": 0.3695, "step": 4696 }, { "epoch": 2.0, "learning_rate": 1.7386231038506418e-06, "loss": 0.3948, "step": 4700 } ], "logging_steps": 4, "max_steps": 7056, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 53908118568960.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }