diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7071 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.998299319727891, + "eval_steps": 800, + "global_step": 4700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0, + "loss": 2.0847, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 0, + "loss": 2.5726, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 2.2415, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 1.8935, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 2.1994, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 2.1794, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 1.3082402064781276e-06, + "loss": 1.5146, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 1.9623603097171917e-06, + "loss": 0.8902, + "step": 32 + }, + { + "epoch": 0.02, + "learning_rate": 2.3449960410798955e-06, + "loss": 0.8521, + "step": 36 + }, + { + "epoch": 0.02, + "learning_rate": 2.6164804129562553e-06, + "loss": 0.7475, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 2.8270600516195322e-06, + "loss": 0.6407, + "step": 44 + }, + { + "epoch": 0.02, + "learning_rate": 2.99911614431896e-06, + "loss": 0.5996, + "step": 48 + }, + { + "epoch": 0.02, + "learning_rate": 3.144587497923142e-06, + "loss": 0.733, + "step": 52 + }, + { + "epoch": 0.02, + "learning_rate": 3.2706005161953197e-06, + "loss": 0.7661, + "step": 56 + }, + { + "epoch": 0.03, + "learning_rate": 3.381751875681663e-06, + "loss": 0.5892, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 3.4811801548585962e-06, + "loss": 0.8165, + "step": 64 + }, + { + "epoch": 0.03, + "learning_rate": 3.5711239740096387e-06, + "loss": 0.4039, + "step": 68 + }, + { + "epoch": 0.03, + "learning_rate": 3.6532362475580235e-06, + "loss": 0.6821, + "step": 72 + }, + { + "epoch": 0.03, + "learning_rate": 3.7287722169385123e-06, + "loss": 0.8229, + "step": 76 + }, + { + "epoch": 0.03, + "learning_rate": 3.7987076011622065e-06, + "loss": 0.6447, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 3.8638158862213e-06, + "loss": 0.4965, + "step": 84 + }, + { + "epoch": 0.04, + "learning_rate": 3.924720619434383e-06, + "loss": 0.655, + "step": 88 + }, + { + "epoch": 0.04, + "learning_rate": 3.9819318221826385e-06, + "loss": 0.6818, + "step": 92 + }, + { + "epoch": 0.04, + "learning_rate": 4.035871978920728e-06, + "loss": 0.743, + "step": 96 + }, + { + "epoch": 0.04, + "learning_rate": 4.086894990123906e-06, + "loss": 0.8767, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 4.135300258097659e-06, + "loss": 0.6235, + "step": 104 + }, + { + "epoch": 0.05, + "learning_rate": 4.18134333252491e-06, + "loss": 0.6074, + "step": 108 + }, + { + "epoch": 0.05, + "learning_rate": 4.225244077248703e-06, + "loss": 0.6712, + "step": 112 + }, + { + "epoch": 0.05, + "learning_rate": 4.267193020182443e-06, + "loss": 0.7978, + "step": 116 + }, + { + "epoch": 0.05, + "learning_rate": 4.3073563507970875e-06, + "loss": 0.4904, + "step": 120 + }, + { + "epoch": 0.05, + "learning_rate": 4.345879896760937e-06, + "loss": 0.4131, + "step": 124 + }, + { + "epoch": 0.05, + "learning_rate": 4.3828923201775755e-06, + "loss": 0.6295, + "step": 128 + }, + { + "epoch": 0.06, + "learning_rate": 4.418507710283431e-06, + "loss": 0.9921, + "step": 132 + }, + { + "epoch": 0.06, + "learning_rate": 4.45282770440127e-06, + "loss": 0.6667, + "step": 136 + }, + { + "epoch": 0.06, + "learning_rate": 4.485943236544386e-06, + "loss": 0.657, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 4.517935989460364e-06, + "loss": 0.7705, + "step": 144 + }, + { + "epoch": 0.06, + "learning_rate": 4.54887960849498e-06, + "loss": 0.866, + "step": 148 + }, + { + "epoch": 0.06, + "learning_rate": 4.578840722673447e-06, + "loss": 0.8428, + "step": 152 + }, + { + "epoch": 0.07, + "learning_rate": 4.607879808611406e-06, + "loss": 0.7855, + "step": 156 + }, + { + "epoch": 0.07, + "learning_rate": 4.636051925421702e-06, + "loss": 0.7451, + "step": 160 + }, + { + "epoch": 0.07, + "learning_rate": 4.663407343064547e-06, + "loss": 0.5792, + "step": 164 + }, + { + "epoch": 0.07, + "learning_rate": 4.689992082159791e-06, + "loss": 0.9155, + "step": 168 + }, + { + "epoch": 0.07, + "learning_rate": 4.715848379822425e-06, + "loss": 0.7189, + "step": 172 + }, + { + "epoch": 0.07, + "learning_rate": 4.74101509336297e-06, + "loss": 0.5087, + "step": 176 + }, + { + "epoch": 0.08, + "learning_rate": 4.76552805154028e-06, + "loss": 0.9235, + "step": 180 + }, + { + "epoch": 0.08, + "learning_rate": 4.789420361336724e-06, + "loss": 0.7996, + "step": 184 + }, + { + "epoch": 0.08, + "learning_rate": 4.812722676847563e-06, + "loss": 0.5372, + "step": 188 + }, + { + "epoch": 0.08, + "learning_rate": 4.835463435763974e-06, + "loss": 0.8693, + "step": 192 + }, + { + "epoch": 0.08, + "learning_rate": 4.857669068026358e-06, + "loss": 0.6677, + "step": 196 + }, + { + "epoch": 0.09, + "learning_rate": 4.879364180487766e-06, + "loss": 0.5588, + "step": 200 + }, + { + "epoch": 0.09, + "learning_rate": 4.900571720823068e-06, + "loss": 0.6868, + "step": 204 + }, + { + "epoch": 0.09, + "learning_rate": 4.921313123421507e-06, + "loss": 0.6329, + "step": 208 + }, + { + "epoch": 0.09, + "learning_rate": 4.941608439588058e-06, + "loss": 0.5781, + "step": 212 + }, + { + "epoch": 0.09, + "learning_rate": 4.9614764540361516e-06, + "loss": 0.8552, + "step": 216 + }, + { + "epoch": 0.09, + "learning_rate": 4.980934789368156e-06, + "loss": 0.9137, + "step": 220 + }, + { + "epoch": 0.1, + "learning_rate": 5e-06, + "loss": 0.9521, + "step": 224 + }, + { + "epoch": 0.1, + "learning_rate": 4.997812135355893e-06, + "loss": 0.527, + "step": 228 + }, + { + "epoch": 0.1, + "learning_rate": 4.994894982497083e-06, + "loss": 0.6134, + "step": 232 + }, + { + "epoch": 0.1, + "learning_rate": 4.991977829638274e-06, + "loss": 0.9135, + "step": 236 + }, + { + "epoch": 0.1, + "learning_rate": 4.989060676779464e-06, + "loss": 0.6525, + "step": 240 + }, + { + "epoch": 0.1, + "learning_rate": 4.986143523920654e-06, + "loss": 0.9277, + "step": 244 + }, + { + "epoch": 0.11, + "learning_rate": 4.983226371061844e-06, + "loss": 1.0032, + "step": 248 + }, + { + "epoch": 0.11, + "learning_rate": 4.980309218203034e-06, + "loss": 0.7763, + "step": 252 + }, + { + "epoch": 0.11, + "learning_rate": 4.977392065344224e-06, + "loss": 0.7304, + "step": 256 + }, + { + "epoch": 0.11, + "learning_rate": 4.974474912485414e-06, + "loss": 0.7923, + "step": 260 + }, + { + "epoch": 0.11, + "learning_rate": 4.971557759626604e-06, + "loss": 0.9643, + "step": 264 + }, + { + "epoch": 0.11, + "learning_rate": 4.968640606767795e-06, + "loss": 0.6124, + "step": 268 + }, + { + "epoch": 0.12, + "learning_rate": 4.965723453908986e-06, + "loss": 0.5817, + "step": 272 + }, + { + "epoch": 0.12, + "learning_rate": 4.962806301050176e-06, + "loss": 0.7712, + "step": 276 + }, + { + "epoch": 0.12, + "learning_rate": 4.959889148191366e-06, + "loss": 0.4688, + "step": 280 + }, + { + "epoch": 0.12, + "learning_rate": 4.956971995332556e-06, + "loss": 0.7547, + "step": 284 + }, + { + "epoch": 0.12, + "learning_rate": 4.954054842473746e-06, + "loss": 0.7743, + "step": 288 + }, + { + "epoch": 0.12, + "learning_rate": 4.951137689614936e-06, + "loss": 0.71, + "step": 292 + }, + { + "epoch": 0.13, + "learning_rate": 4.948220536756126e-06, + "loss": 0.806, + "step": 296 + }, + { + "epoch": 0.13, + "learning_rate": 4.945303383897317e-06, + "loss": 0.5964, + "step": 300 + }, + { + "epoch": 0.13, + "learning_rate": 4.942386231038507e-06, + "loss": 0.6401, + "step": 304 + }, + { + "epoch": 0.13, + "learning_rate": 4.939469078179697e-06, + "loss": 0.887, + "step": 308 + }, + { + "epoch": 0.13, + "learning_rate": 4.936551925320887e-06, + "loss": 0.4782, + "step": 312 + }, + { + "epoch": 0.13, + "learning_rate": 4.933634772462078e-06, + "loss": 0.6871, + "step": 316 + }, + { + "epoch": 0.14, + "learning_rate": 4.930717619603268e-06, + "loss": 0.7753, + "step": 320 + }, + { + "epoch": 0.14, + "learning_rate": 4.9278004667444575e-06, + "loss": 0.671, + "step": 324 + }, + { + "epoch": 0.14, + "learning_rate": 4.9248833138856475e-06, + "loss": 0.5642, + "step": 328 + }, + { + "epoch": 0.14, + "learning_rate": 4.921966161026838e-06, + "loss": 0.5292, + "step": 332 + }, + { + "epoch": 0.14, + "learning_rate": 4.919049008168029e-06, + "loss": 0.712, + "step": 336 + }, + { + "epoch": 0.14, + "learning_rate": 4.916131855309218e-06, + "loss": 0.7544, + "step": 340 + }, + { + "epoch": 0.15, + "learning_rate": 4.913214702450409e-06, + "loss": 0.7463, + "step": 344 + }, + { + "epoch": 0.15, + "learning_rate": 4.910297549591599e-06, + "loss": 0.5543, + "step": 348 + }, + { + "epoch": 0.15, + "learning_rate": 4.90738039673279e-06, + "loss": 0.6754, + "step": 352 + }, + { + "epoch": 0.15, + "learning_rate": 4.90446324387398e-06, + "loss": 0.6768, + "step": 356 + }, + { + "epoch": 0.15, + "learning_rate": 4.9015460910151695e-06, + "loss": 0.6952, + "step": 360 + }, + { + "epoch": 0.15, + "learning_rate": 4.8986289381563595e-06, + "loss": 0.8318, + "step": 364 + }, + { + "epoch": 0.16, + "learning_rate": 4.89571178529755e-06, + "loss": 0.6242, + "step": 368 + }, + { + "epoch": 0.16, + "learning_rate": 4.89279463243874e-06, + "loss": 0.9217, + "step": 372 + }, + { + "epoch": 0.16, + "learning_rate": 4.88987747957993e-06, + "loss": 0.4959, + "step": 376 + }, + { + "epoch": 0.16, + "learning_rate": 4.886960326721121e-06, + "loss": 0.8203, + "step": 380 + }, + { + "epoch": 0.16, + "learning_rate": 4.884043173862311e-06, + "loss": 0.7889, + "step": 384 + }, + { + "epoch": 0.16, + "learning_rate": 4.881126021003501e-06, + "loss": 0.6178, + "step": 388 + }, + { + "epoch": 0.17, + "learning_rate": 4.878208868144691e-06, + "loss": 1.0419, + "step": 392 + }, + { + "epoch": 0.17, + "learning_rate": 4.8752917152858815e-06, + "loss": 0.9276, + "step": 396 + }, + { + "epoch": 0.17, + "learning_rate": 4.8723745624270714e-06, + "loss": 0.6587, + "step": 400 + }, + { + "epoch": 0.17, + "learning_rate": 4.869457409568261e-06, + "loss": 0.5996, + "step": 404 + }, + { + "epoch": 0.17, + "learning_rate": 4.866540256709452e-06, + "loss": 0.7183, + "step": 408 + }, + { + "epoch": 0.18, + "learning_rate": 4.863623103850642e-06, + "loss": 0.5382, + "step": 412 + }, + { + "epoch": 0.18, + "learning_rate": 4.860705950991833e-06, + "loss": 0.6379, + "step": 416 + }, + { + "epoch": 0.18, + "learning_rate": 4.857788798133022e-06, + "loss": 0.6881, + "step": 420 + }, + { + "epoch": 0.18, + "learning_rate": 4.854871645274213e-06, + "loss": 0.708, + "step": 424 + }, + { + "epoch": 0.18, + "learning_rate": 4.851954492415403e-06, + "loss": 0.5051, + "step": 428 + }, + { + "epoch": 0.18, + "learning_rate": 4.8490373395565935e-06, + "loss": 0.6938, + "step": 432 + }, + { + "epoch": 0.19, + "learning_rate": 4.8461201866977834e-06, + "loss": 0.7395, + "step": 436 + }, + { + "epoch": 0.19, + "learning_rate": 4.843203033838973e-06, + "loss": 0.8443, + "step": 440 + }, + { + "epoch": 0.19, + "learning_rate": 4.840285880980164e-06, + "loss": 0.6688, + "step": 444 + }, + { + "epoch": 0.19, + "learning_rate": 4.837368728121354e-06, + "loss": 0.7241, + "step": 448 + }, + { + "epoch": 0.19, + "learning_rate": 4.834451575262544e-06, + "loss": 0.6182, + "step": 452 + }, + { + "epoch": 0.19, + "learning_rate": 4.831534422403734e-06, + "loss": 0.5827, + "step": 456 + }, + { + "epoch": 0.2, + "learning_rate": 4.828617269544925e-06, + "loss": 0.5035, + "step": 460 + }, + { + "epoch": 0.2, + "learning_rate": 4.825700116686115e-06, + "loss": 0.6138, + "step": 464 + }, + { + "epoch": 0.2, + "learning_rate": 4.822782963827305e-06, + "loss": 0.7013, + "step": 468 + }, + { + "epoch": 0.2, + "learning_rate": 4.8198658109684954e-06, + "loss": 0.5948, + "step": 472 + }, + { + "epoch": 0.2, + "learning_rate": 4.816948658109685e-06, + "loss": 0.6721, + "step": 476 + }, + { + "epoch": 0.2, + "learning_rate": 4.814031505250875e-06, + "loss": 0.5647, + "step": 480 + }, + { + "epoch": 0.21, + "learning_rate": 4.811114352392065e-06, + "loss": 0.7064, + "step": 484 + }, + { + "epoch": 0.21, + "learning_rate": 4.808197199533256e-06, + "loss": 0.6298, + "step": 488 + }, + { + "epoch": 0.21, + "learning_rate": 4.805280046674446e-06, + "loss": 0.526, + "step": 492 + }, + { + "epoch": 0.21, + "learning_rate": 4.802362893815637e-06, + "loss": 0.5712, + "step": 496 + }, + { + "epoch": 0.21, + "learning_rate": 4.799445740956827e-06, + "loss": 0.7529, + "step": 500 + }, + { + "epoch": 0.21, + "learning_rate": 4.796528588098017e-06, + "loss": 1.0192, + "step": 504 + }, + { + "epoch": 0.22, + "learning_rate": 4.793611435239207e-06, + "loss": 0.9215, + "step": 508 + }, + { + "epoch": 0.22, + "learning_rate": 4.790694282380397e-06, + "loss": 0.8837, + "step": 512 + }, + { + "epoch": 0.22, + "learning_rate": 4.787777129521587e-06, + "loss": 0.8698, + "step": 516 + }, + { + "epoch": 0.22, + "learning_rate": 4.784859976662777e-06, + "loss": 0.7368, + "step": 520 + }, + { + "epoch": 0.22, + "learning_rate": 4.781942823803968e-06, + "loss": 0.5266, + "step": 524 + }, + { + "epoch": 0.22, + "learning_rate": 4.779025670945158e-06, + "loss": 0.6157, + "step": 528 + }, + { + "epoch": 0.23, + "learning_rate": 4.776108518086348e-06, + "loss": 0.5561, + "step": 532 + }, + { + "epoch": 0.23, + "learning_rate": 4.773191365227539e-06, + "loss": 0.5638, + "step": 536 + }, + { + "epoch": 0.23, + "learning_rate": 4.770274212368729e-06, + "loss": 0.7433, + "step": 540 + }, + { + "epoch": 0.23, + "learning_rate": 4.7673570595099186e-06, + "loss": 0.4696, + "step": 544 + }, + { + "epoch": 0.23, + "learning_rate": 4.7644399066511085e-06, + "loss": 0.4952, + "step": 548 + }, + { + "epoch": 0.23, + "learning_rate": 4.761522753792299e-06, + "loss": 0.6735, + "step": 552 + }, + { + "epoch": 0.24, + "learning_rate": 4.758605600933489e-06, + "loss": 0.6756, + "step": 556 + }, + { + "epoch": 0.24, + "learning_rate": 4.755688448074679e-06, + "loss": 0.8812, + "step": 560 + }, + { + "epoch": 0.24, + "learning_rate": 4.752771295215869e-06, + "loss": 0.5452, + "step": 564 + }, + { + "epoch": 0.24, + "learning_rate": 4.74985414235706e-06, + "loss": 0.5208, + "step": 568 + }, + { + "epoch": 0.24, + "learning_rate": 4.746936989498251e-06, + "loss": 0.8629, + "step": 572 + }, + { + "epoch": 0.24, + "learning_rate": 4.744019836639441e-06, + "loss": 0.6721, + "step": 576 + }, + { + "epoch": 0.25, + "learning_rate": 4.7411026837806305e-06, + "loss": 0.7716, + "step": 580 + }, + { + "epoch": 0.25, + "learning_rate": 4.7381855309218205e-06, + "loss": 0.3795, + "step": 584 + }, + { + "epoch": 0.25, + "learning_rate": 4.735268378063011e-06, + "loss": 0.6173, + "step": 588 + }, + { + "epoch": 0.25, + "learning_rate": 4.732351225204201e-06, + "loss": 0.5626, + "step": 592 + }, + { + "epoch": 0.25, + "learning_rate": 4.729434072345391e-06, + "loss": 0.7762, + "step": 596 + }, + { + "epoch": 0.26, + "learning_rate": 4.726516919486582e-06, + "loss": 0.658, + "step": 600 + }, + { + "epoch": 0.26, + "learning_rate": 4.723599766627772e-06, + "loss": 0.6109, + "step": 604 + }, + { + "epoch": 0.26, + "learning_rate": 4.720682613768962e-06, + "loss": 0.7475, + "step": 608 + }, + { + "epoch": 0.26, + "learning_rate": 4.717765460910152e-06, + "loss": 0.6066, + "step": 612 + }, + { + "epoch": 0.26, + "learning_rate": 4.7148483080513425e-06, + "loss": 0.4173, + "step": 616 + }, + { + "epoch": 0.26, + "learning_rate": 4.7119311551925325e-06, + "loss": 0.6297, + "step": 620 + }, + { + "epoch": 0.27, + "learning_rate": 4.709014002333722e-06, + "loss": 0.655, + "step": 624 + }, + { + "epoch": 0.27, + "learning_rate": 4.706096849474912e-06, + "loss": 0.8621, + "step": 628 + }, + { + "epoch": 0.27, + "learning_rate": 4.703179696616103e-06, + "loss": 0.5386, + "step": 632 + }, + { + "epoch": 0.27, + "learning_rate": 4.700262543757294e-06, + "loss": 0.7833, + "step": 636 + }, + { + "epoch": 0.27, + "learning_rate": 4.697345390898483e-06, + "loss": 0.5759, + "step": 640 + }, + { + "epoch": 0.27, + "learning_rate": 4.694428238039674e-06, + "loss": 0.7122, + "step": 644 + }, + { + "epoch": 0.28, + "learning_rate": 4.691511085180864e-06, + "loss": 0.6034, + "step": 648 + }, + { + "epoch": 0.28, + "learning_rate": 4.6885939323220545e-06, + "loss": 0.8556, + "step": 652 + }, + { + "epoch": 0.28, + "learning_rate": 4.6856767794632445e-06, + "loss": 0.6421, + "step": 656 + }, + { + "epoch": 0.28, + "learning_rate": 4.682759626604434e-06, + "loss": 0.6008, + "step": 660 + }, + { + "epoch": 0.28, + "learning_rate": 4.679842473745624e-06, + "loss": 0.7663, + "step": 664 + }, + { + "epoch": 0.28, + "learning_rate": 4.676925320886815e-06, + "loss": 0.5928, + "step": 668 + }, + { + "epoch": 0.29, + "learning_rate": 4.674008168028005e-06, + "loss": 0.7544, + "step": 672 + }, + { + "epoch": 0.29, + "learning_rate": 4.671091015169195e-06, + "loss": 0.5778, + "step": 676 + }, + { + "epoch": 0.29, + "learning_rate": 4.668173862310386e-06, + "loss": 0.7285, + "step": 680 + }, + { + "epoch": 0.29, + "learning_rate": 4.665256709451576e-06, + "loss": 0.5125, + "step": 684 + }, + { + "epoch": 0.29, + "learning_rate": 4.662339556592766e-06, + "loss": 0.6717, + "step": 688 + }, + { + "epoch": 0.29, + "learning_rate": 4.659422403733956e-06, + "loss": 0.8691, + "step": 692 + }, + { + "epoch": 0.3, + "learning_rate": 4.656505250875146e-06, + "loss": 0.4915, + "step": 696 + }, + { + "epoch": 0.3, + "learning_rate": 4.653588098016336e-06, + "loss": 0.789, + "step": 700 + }, + { + "epoch": 0.3, + "learning_rate": 4.650670945157526e-06, + "loss": 0.9127, + "step": 704 + }, + { + "epoch": 0.3, + "learning_rate": 4.647753792298717e-06, + "loss": 0.6563, + "step": 708 + }, + { + "epoch": 0.3, + "learning_rate": 4.644836639439907e-06, + "loss": 0.4648, + "step": 712 + }, + { + "epoch": 0.3, + "learning_rate": 4.641919486581098e-06, + "loss": 0.6367, + "step": 716 + }, + { + "epoch": 0.31, + "learning_rate": 4.639002333722287e-06, + "loss": 0.7212, + "step": 720 + }, + { + "epoch": 0.31, + "learning_rate": 4.636085180863478e-06, + "loss": 0.6034, + "step": 724 + }, + { + "epoch": 0.31, + "learning_rate": 4.633168028004668e-06, + "loss": 0.4951, + "step": 728 + }, + { + "epoch": 0.31, + "learning_rate": 4.630250875145858e-06, + "loss": 0.4122, + "step": 732 + }, + { + "epoch": 0.31, + "learning_rate": 4.627333722287048e-06, + "loss": 0.467, + "step": 736 + }, + { + "epoch": 0.31, + "learning_rate": 4.624416569428238e-06, + "loss": 0.7467, + "step": 740 + }, + { + "epoch": 0.32, + "learning_rate": 4.621499416569429e-06, + "loss": 0.6229, + "step": 744 + }, + { + "epoch": 0.32, + "learning_rate": 4.618582263710619e-06, + "loss": 0.7651, + "step": 748 + }, + { + "epoch": 0.32, + "learning_rate": 4.615665110851809e-06, + "loss": 0.8044, + "step": 752 + }, + { + "epoch": 0.32, + "learning_rate": 4.612747957992999e-06, + "loss": 0.4718, + "step": 756 + }, + { + "epoch": 0.32, + "learning_rate": 4.60983080513419e-06, + "loss": 0.5431, + "step": 760 + }, + { + "epoch": 0.32, + "learning_rate": 4.60691365227538e-06, + "loss": 0.5484, + "step": 764 + }, + { + "epoch": 0.33, + "learning_rate": 4.6039964994165695e-06, + "loss": 0.8095, + "step": 768 + }, + { + "epoch": 0.33, + "learning_rate": 4.60107934655776e-06, + "loss": 0.6142, + "step": 772 + }, + { + "epoch": 0.33, + "learning_rate": 4.59816219369895e-06, + "loss": 0.6883, + "step": 776 + }, + { + "epoch": 0.33, + "learning_rate": 4.59524504084014e-06, + "loss": 0.7341, + "step": 780 + }, + { + "epoch": 0.33, + "learning_rate": 4.59232788798133e-06, + "loss": 0.9504, + "step": 784 + }, + { + "epoch": 0.34, + "learning_rate": 4.589410735122521e-06, + "loss": 0.7445, + "step": 788 + }, + { + "epoch": 0.34, + "learning_rate": 4.587222870478414e-06, + "loss": 0.7637, + "step": 792 + }, + { + "epoch": 0.34, + "learning_rate": 4.5843057176196035e-06, + "loss": 1.0596, + "step": 796 + }, + { + "epoch": 0.34, + "learning_rate": 4.5813885647607935e-06, + "loss": 0.5224, + "step": 800 + }, + { + "epoch": 0.34, + "learning_rate": 4.578471411901984e-06, + "loss": 0.6143, + "step": 804 + }, + { + "epoch": 0.34, + "learning_rate": 4.575554259043174e-06, + "loss": 0.425, + "step": 808 + }, + { + "epoch": 0.35, + "learning_rate": 4.572637106184364e-06, + "loss": 0.7089, + "step": 812 + }, + { + "epoch": 0.35, + "learning_rate": 4.569719953325555e-06, + "loss": 0.6148, + "step": 816 + }, + { + "epoch": 0.35, + "learning_rate": 4.566802800466745e-06, + "loss": 0.5647, + "step": 820 + }, + { + "epoch": 0.35, + "learning_rate": 4.563885647607935e-06, + "loss": 0.7274, + "step": 824 + }, + { + "epoch": 0.35, + "learning_rate": 4.560968494749125e-06, + "loss": 0.8243, + "step": 828 + }, + { + "epoch": 0.35, + "learning_rate": 4.5580513418903155e-06, + "loss": 0.5999, + "step": 832 + }, + { + "epoch": 0.36, + "learning_rate": 4.5551341890315054e-06, + "loss": 0.4957, + "step": 836 + }, + { + "epoch": 0.36, + "learning_rate": 4.552217036172696e-06, + "loss": 0.7591, + "step": 840 + }, + { + "epoch": 0.36, + "learning_rate": 4.549299883313886e-06, + "loss": 0.4182, + "step": 844 + }, + { + "epoch": 0.36, + "learning_rate": 4.546382730455076e-06, + "loss": 0.5964, + "step": 848 + }, + { + "epoch": 0.36, + "learning_rate": 4.543465577596267e-06, + "loss": 0.6322, + "step": 852 + }, + { + "epoch": 0.36, + "learning_rate": 4.540548424737457e-06, + "loss": 0.4782, + "step": 856 + }, + { + "epoch": 0.37, + "learning_rate": 4.537631271878647e-06, + "loss": 0.5883, + "step": 860 + }, + { + "epoch": 0.37, + "learning_rate": 4.534714119019837e-06, + "loss": 0.5512, + "step": 864 + }, + { + "epoch": 0.37, + "learning_rate": 4.5317969661610275e-06, + "loss": 0.6806, + "step": 868 + }, + { + "epoch": 0.37, + "learning_rate": 4.5288798133022174e-06, + "loss": 0.6884, + "step": 872 + }, + { + "epoch": 0.37, + "learning_rate": 4.525962660443407e-06, + "loss": 0.5685, + "step": 876 + }, + { + "epoch": 0.37, + "learning_rate": 4.523045507584598e-06, + "loss": 0.5336, + "step": 880 + }, + { + "epoch": 0.38, + "learning_rate": 4.520128354725788e-06, + "loss": 0.508, + "step": 884 + }, + { + "epoch": 0.38, + "learning_rate": 4.517211201866978e-06, + "loss": 0.4367, + "step": 888 + }, + { + "epoch": 0.38, + "learning_rate": 4.514294049008168e-06, + "loss": 0.9553, + "step": 892 + }, + { + "epoch": 0.38, + "learning_rate": 4.511376896149359e-06, + "loss": 0.8762, + "step": 896 + }, + { + "epoch": 0.38, + "learning_rate": 4.508459743290549e-06, + "loss": 0.8939, + "step": 900 + }, + { + "epoch": 0.38, + "learning_rate": 4.505542590431739e-06, + "loss": 0.5025, + "step": 904 + }, + { + "epoch": 0.39, + "learning_rate": 4.5026254375729286e-06, + "loss": 0.8958, + "step": 908 + }, + { + "epoch": 0.39, + "learning_rate": 4.499708284714119e-06, + "loss": 0.7813, + "step": 912 + }, + { + "epoch": 0.39, + "learning_rate": 4.49679113185531e-06, + "loss": 0.6857, + "step": 916 + }, + { + "epoch": 0.39, + "learning_rate": 4.4938739789965e-06, + "loss": 0.7198, + "step": 920 + }, + { + "epoch": 0.39, + "learning_rate": 4.49095682613769e-06, + "loss": 0.5257, + "step": 924 + }, + { + "epoch": 0.39, + "learning_rate": 4.48803967327888e-06, + "loss": 0.542, + "step": 928 + }, + { + "epoch": 0.4, + "learning_rate": 4.485122520420071e-06, + "loss": 0.4848, + "step": 932 + }, + { + "epoch": 0.4, + "learning_rate": 4.482205367561261e-06, + "loss": 0.7363, + "step": 936 + }, + { + "epoch": 0.4, + "learning_rate": 4.479288214702451e-06, + "loss": 0.8313, + "step": 940 + }, + { + "epoch": 0.4, + "learning_rate": 4.476371061843641e-06, + "loss": 0.6864, + "step": 944 + }, + { + "epoch": 0.4, + "learning_rate": 4.473453908984831e-06, + "loss": 0.7911, + "step": 948 + }, + { + "epoch": 0.4, + "learning_rate": 4.470536756126021e-06, + "loss": 0.4418, + "step": 952 + }, + { + "epoch": 0.41, + "learning_rate": 4.467619603267211e-06, + "loss": 0.7467, + "step": 956 + }, + { + "epoch": 0.41, + "learning_rate": 4.464702450408402e-06, + "loss": 0.5449, + "step": 960 + }, + { + "epoch": 0.41, + "learning_rate": 4.461785297549592e-06, + "loss": 0.5699, + "step": 964 + }, + { + "epoch": 0.41, + "learning_rate": 4.458868144690782e-06, + "loss": 0.5095, + "step": 968 + }, + { + "epoch": 0.41, + "learning_rate": 4.455950991831972e-06, + "loss": 0.6546, + "step": 972 + }, + { + "epoch": 0.41, + "learning_rate": 4.453033838973163e-06, + "loss": 0.5868, + "step": 976 + }, + { + "epoch": 0.42, + "learning_rate": 4.450116686114353e-06, + "loss": 0.7554, + "step": 980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4471995332555425e-06, + "loss": 0.7272, + "step": 984 + }, + { + "epoch": 0.42, + "learning_rate": 4.444282380396733e-06, + "loss": 0.5532, + "step": 988 + }, + { + "epoch": 0.42, + "learning_rate": 4.441365227537923e-06, + "loss": 0.5618, + "step": 992 + }, + { + "epoch": 0.42, + "learning_rate": 4.438448074679114e-06, + "loss": 0.5518, + "step": 996 + }, + { + "epoch": 0.43, + "learning_rate": 4.435530921820304e-06, + "loss": 0.7264, + "step": 1000 + }, + { + "epoch": 0.43, + "learning_rate": 4.432613768961494e-06, + "loss": 0.5429, + "step": 1004 + }, + { + "epoch": 0.43, + "learning_rate": 4.429696616102684e-06, + "loss": 0.6046, + "step": 1008 + }, + { + "epoch": 0.43, + "learning_rate": 4.426779463243875e-06, + "loss": 0.9232, + "step": 1012 + }, + { + "epoch": 0.43, + "learning_rate": 4.4238623103850645e-06, + "loss": 0.5118, + "step": 1016 + }, + { + "epoch": 0.43, + "learning_rate": 4.4209451575262545e-06, + "loss": 0.643, + "step": 1020 + }, + { + "epoch": 0.44, + "learning_rate": 4.418028004667445e-06, + "loss": 0.7772, + "step": 1024 + }, + { + "epoch": 0.44, + "learning_rate": 4.415110851808635e-06, + "loss": 0.3894, + "step": 1028 + }, + { + "epoch": 0.44, + "learning_rate": 4.412193698949825e-06, + "loss": 0.557, + "step": 1032 + }, + { + "epoch": 0.44, + "learning_rate": 4.409276546091015e-06, + "loss": 0.6952, + "step": 1036 + }, + { + "epoch": 0.44, + "learning_rate": 4.406359393232206e-06, + "loss": 0.7862, + "step": 1040 + }, + { + "epoch": 0.44, + "learning_rate": 4.403442240373396e-06, + "loss": 0.5889, + "step": 1044 + }, + { + "epoch": 0.45, + "learning_rate": 4.400525087514586e-06, + "loss": 0.8459, + "step": 1048 + }, + { + "epoch": 0.45, + "learning_rate": 4.3976079346557765e-06, + "loss": 0.4839, + "step": 1052 + }, + { + "epoch": 0.45, + "learning_rate": 4.3946907817969665e-06, + "loss": 0.5376, + "step": 1056 + }, + { + "epoch": 0.45, + "learning_rate": 4.391773628938157e-06, + "loss": 0.6901, + "step": 1060 + }, + { + "epoch": 0.45, + "learning_rate": 4.388856476079346e-06, + "loss": 0.6058, + "step": 1064 + }, + { + "epoch": 0.45, + "learning_rate": 4.385939323220537e-06, + "loss": 0.7326, + "step": 1068 + }, + { + "epoch": 0.46, + "learning_rate": 4.383022170361727e-06, + "loss": 0.5892, + "step": 1072 + }, + { + "epoch": 0.46, + "learning_rate": 4.380105017502918e-06, + "loss": 0.5026, + "step": 1076 + }, + { + "epoch": 0.46, + "learning_rate": 4.377187864644108e-06, + "loss": 0.6181, + "step": 1080 + }, + { + "epoch": 0.46, + "learning_rate": 4.374270711785298e-06, + "loss": 0.6994, + "step": 1084 + }, + { + "epoch": 0.46, + "learning_rate": 4.3713535589264885e-06, + "loss": 0.7436, + "step": 1088 + }, + { + "epoch": 0.46, + "learning_rate": 4.3684364060676785e-06, + "loss": 0.8489, + "step": 1092 + }, + { + "epoch": 0.47, + "learning_rate": 4.365519253208868e-06, + "loss": 0.4957, + "step": 1096 + }, + { + "epoch": 0.47, + "learning_rate": 4.362602100350058e-06, + "loss": 0.5761, + "step": 1100 + }, + { + "epoch": 0.47, + "learning_rate": 4.359684947491249e-06, + "loss": 0.5296, + "step": 1104 + }, + { + "epoch": 0.47, + "learning_rate": 4.356767794632439e-06, + "loss": 0.4946, + "step": 1108 + }, + { + "epoch": 0.47, + "learning_rate": 4.353850641773629e-06, + "loss": 0.5818, + "step": 1112 + }, + { + "epoch": 0.47, + "learning_rate": 4.35093348891482e-06, + "loss": 0.7388, + "step": 1116 + }, + { + "epoch": 0.48, + "learning_rate": 4.34801633605601e-06, + "loss": 0.7138, + "step": 1120 + }, + { + "epoch": 0.48, + "learning_rate": 4.3450991831972e-06, + "loss": 0.5748, + "step": 1124 + }, + { + "epoch": 0.48, + "learning_rate": 4.34218203033839e-06, + "loss": 0.6169, + "step": 1128 + }, + { + "epoch": 0.48, + "learning_rate": 4.33926487747958e-06, + "loss": 0.5938, + "step": 1132 + }, + { + "epoch": 0.48, + "learning_rate": 4.33634772462077e-06, + "loss": 0.6766, + "step": 1136 + }, + { + "epoch": 0.48, + "learning_rate": 4.333430571761961e-06, + "loss": 0.6961, + "step": 1140 + }, + { + "epoch": 0.49, + "learning_rate": 4.330513418903151e-06, + "loss": 0.7088, + "step": 1144 + }, + { + "epoch": 0.49, + "learning_rate": 4.328325554259044e-06, + "loss": 0.7907, + "step": 1148 + }, + { + "epoch": 0.49, + "learning_rate": 4.325408401400234e-06, + "loss": 0.5619, + "step": 1152 + }, + { + "epoch": 0.49, + "learning_rate": 4.322491248541424e-06, + "loss": 0.9847, + "step": 1156 + }, + { + "epoch": 0.49, + "learning_rate": 4.319574095682614e-06, + "loss": 0.5704, + "step": 1160 + }, + { + "epoch": 0.49, + "learning_rate": 4.316656942823804e-06, + "loss": 0.729, + "step": 1164 + }, + { + "epoch": 0.5, + "learning_rate": 4.313739789964994e-06, + "loss": 0.492, + "step": 1168 + }, + { + "epoch": 0.5, + "learning_rate": 4.310822637106184e-06, + "loss": 0.7427, + "step": 1172 + }, + { + "epoch": 0.5, + "learning_rate": 4.307905484247375e-06, + "loss": 0.5757, + "step": 1176 + }, + { + "epoch": 0.5, + "learning_rate": 4.304988331388565e-06, + "loss": 0.5469, + "step": 1180 + }, + { + "epoch": 0.5, + "learning_rate": 4.302071178529756e-06, + "loss": 0.605, + "step": 1184 + }, + { + "epoch": 0.51, + "learning_rate": 4.299154025670946e-06, + "loss": 0.4903, + "step": 1188 + }, + { + "epoch": 0.51, + "learning_rate": 4.296236872812136e-06, + "loss": 0.4479, + "step": 1192 + }, + { + "epoch": 0.51, + "learning_rate": 4.293319719953326e-06, + "loss": 0.5118, + "step": 1196 + }, + { + "epoch": 0.51, + "learning_rate": 4.290402567094516e-06, + "loss": 0.5867, + "step": 1200 + }, + { + "epoch": 0.51, + "learning_rate": 4.287485414235706e-06, + "loss": 0.499, + "step": 1204 + }, + { + "epoch": 0.51, + "learning_rate": 4.284568261376896e-06, + "loss": 0.5443, + "step": 1208 + }, + { + "epoch": 0.52, + "learning_rate": 4.281651108518087e-06, + "loss": 0.7138, + "step": 1212 + }, + { + "epoch": 0.52, + "learning_rate": 4.278733955659277e-06, + "loss": 0.7214, + "step": 1216 + }, + { + "epoch": 0.52, + "learning_rate": 4.275816802800467e-06, + "loss": 0.4798, + "step": 1220 + }, + { + "epoch": 0.52, + "learning_rate": 4.272899649941658e-06, + "loss": 0.6752, + "step": 1224 + }, + { + "epoch": 0.52, + "learning_rate": 4.2699824970828476e-06, + "loss": 0.4885, + "step": 1228 + }, + { + "epoch": 0.52, + "learning_rate": 4.2670653442240375e-06, + "loss": 0.3946, + "step": 1232 + }, + { + "epoch": 0.53, + "learning_rate": 4.2641481913652275e-06, + "loss": 0.6857, + "step": 1236 + }, + { + "epoch": 0.53, + "learning_rate": 4.261231038506418e-06, + "loss": 0.5198, + "step": 1240 + }, + { + "epoch": 0.53, + "learning_rate": 4.258313885647608e-06, + "loss": 0.7563, + "step": 1244 + }, + { + "epoch": 0.53, + "learning_rate": 4.255396732788798e-06, + "loss": 0.6442, + "step": 1248 + }, + { + "epoch": 0.53, + "learning_rate": 4.252479579929989e-06, + "loss": 0.539, + "step": 1252 + }, + { + "epoch": 0.53, + "learning_rate": 4.249562427071179e-06, + "loss": 0.53, + "step": 1256 + }, + { + "epoch": 0.54, + "learning_rate": 4.24664527421237e-06, + "loss": 0.5773, + "step": 1260 + }, + { + "epoch": 0.54, + "learning_rate": 4.2437281213535596e-06, + "loss": 0.5297, + "step": 1264 + }, + { + "epoch": 0.54, + "learning_rate": 4.2408109684947495e-06, + "loss": 0.7455, + "step": 1268 + }, + { + "epoch": 0.54, + "learning_rate": 4.2378938156359394e-06, + "loss": 0.3915, + "step": 1272 + }, + { + "epoch": 0.54, + "learning_rate": 4.23497666277713e-06, + "loss": 0.7302, + "step": 1276 + }, + { + "epoch": 0.54, + "learning_rate": 4.23205950991832e-06, + "loss": 0.5329, + "step": 1280 + }, + { + "epoch": 0.55, + "learning_rate": 4.22914235705951e-06, + "loss": 0.7163, + "step": 1284 + }, + { + "epoch": 0.55, + "learning_rate": 4.226225204200701e-06, + "loss": 0.5996, + "step": 1288 + }, + { + "epoch": 0.55, + "learning_rate": 4.223308051341891e-06, + "loss": 0.6922, + "step": 1292 + }, + { + "epoch": 0.55, + "learning_rate": 4.220390898483081e-06, + "loss": 0.7819, + "step": 1296 + }, + { + "epoch": 0.55, + "learning_rate": 4.217473745624271e-06, + "loss": 0.4539, + "step": 1300 + }, + { + "epoch": 0.55, + "learning_rate": 4.2145565927654615e-06, + "loss": 0.6079, + "step": 1304 + }, + { + "epoch": 0.56, + "learning_rate": 4.2116394399066514e-06, + "loss": 0.4377, + "step": 1308 + }, + { + "epoch": 0.56, + "learning_rate": 4.208722287047841e-06, + "loss": 0.4358, + "step": 1312 + }, + { + "epoch": 0.56, + "learning_rate": 4.205805134189031e-06, + "loss": 0.6401, + "step": 1316 + }, + { + "epoch": 0.56, + "learning_rate": 4.202887981330222e-06, + "loss": 0.6677, + "step": 1320 + }, + { + "epoch": 0.56, + "learning_rate": 4.199970828471413e-06, + "loss": 0.572, + "step": 1324 + }, + { + "epoch": 0.56, + "learning_rate": 4.197053675612602e-06, + "loss": 0.5973, + "step": 1328 + }, + { + "epoch": 0.57, + "learning_rate": 4.194136522753793e-06, + "loss": 0.679, + "step": 1332 + }, + { + "epoch": 0.57, + "learning_rate": 4.191219369894983e-06, + "loss": 0.5599, + "step": 1336 + }, + { + "epoch": 0.57, + "learning_rate": 4.1883022170361735e-06, + "loss": 0.5171, + "step": 1340 + }, + { + "epoch": 0.57, + "learning_rate": 4.185385064177363e-06, + "loss": 0.569, + "step": 1344 + }, + { + "epoch": 0.57, + "learning_rate": 4.182467911318553e-06, + "loss": 0.9033, + "step": 1348 + }, + { + "epoch": 0.57, + "learning_rate": 4.179550758459744e-06, + "loss": 0.8082, + "step": 1352 + }, + { + "epoch": 0.58, + "learning_rate": 4.176633605600934e-06, + "loss": 0.6142, + "step": 1356 + }, + { + "epoch": 0.58, + "learning_rate": 4.173716452742124e-06, + "loss": 0.4488, + "step": 1360 + }, + { + "epoch": 0.58, + "learning_rate": 4.170799299883314e-06, + "loss": 0.5348, + "step": 1364 + }, + { + "epoch": 0.58, + "learning_rate": 4.167882147024505e-06, + "loss": 0.3747, + "step": 1368 + }, + { + "epoch": 0.58, + "learning_rate": 4.164964994165695e-06, + "loss": 0.4663, + "step": 1372 + }, + { + "epoch": 0.59, + "learning_rate": 4.162047841306885e-06, + "loss": 0.5914, + "step": 1376 + }, + { + "epoch": 0.59, + "learning_rate": 4.1591306884480746e-06, + "loss": 0.4916, + "step": 1380 + }, + { + "epoch": 0.59, + "learning_rate": 4.156213535589265e-06, + "loss": 0.4981, + "step": 1384 + }, + { + "epoch": 0.59, + "learning_rate": 4.153296382730455e-06, + "loss": 0.6886, + "step": 1388 + }, + { + "epoch": 0.59, + "learning_rate": 4.150379229871645e-06, + "loss": 0.7889, + "step": 1392 + }, + { + "epoch": 0.59, + "learning_rate": 4.147462077012836e-06, + "loss": 0.4762, + "step": 1396 + }, + { + "epoch": 0.6, + "learning_rate": 4.144544924154026e-06, + "loss": 0.6236, + "step": 1400 + }, + { + "epoch": 0.6, + "learning_rate": 4.141627771295217e-06, + "loss": 0.4979, + "step": 1404 + }, + { + "epoch": 0.6, + "learning_rate": 4.138710618436406e-06, + "loss": 0.6086, + "step": 1408 + }, + { + "epoch": 0.6, + "learning_rate": 4.135793465577597e-06, + "loss": 0.4375, + "step": 1412 + }, + { + "epoch": 0.6, + "learning_rate": 4.1328763127187866e-06, + "loss": 0.6866, + "step": 1416 + }, + { + "epoch": 0.6, + "learning_rate": 4.129959159859977e-06, + "loss": 0.6476, + "step": 1420 + }, + { + "epoch": 0.61, + "learning_rate": 4.127042007001167e-06, + "loss": 0.5982, + "step": 1424 + }, + { + "epoch": 0.61, + "learning_rate": 4.124124854142357e-06, + "loss": 0.7084, + "step": 1428 + }, + { + "epoch": 0.61, + "learning_rate": 4.121207701283548e-06, + "loss": 0.6747, + "step": 1432 + }, + { + "epoch": 0.61, + "learning_rate": 4.118290548424738e-06, + "loss": 0.5161, + "step": 1436 + }, + { + "epoch": 0.61, + "learning_rate": 4.115373395565928e-06, + "loss": 0.4477, + "step": 1440 + }, + { + "epoch": 0.61, + "learning_rate": 4.112456242707118e-06, + "loss": 0.4698, + "step": 1444 + }, + { + "epoch": 0.62, + "learning_rate": 4.109539089848309e-06, + "loss": 0.3782, + "step": 1448 + }, + { + "epoch": 0.62, + "learning_rate": 4.1066219369894985e-06, + "loss": 0.5897, + "step": 1452 + }, + { + "epoch": 0.62, + "learning_rate": 4.1037047841306885e-06, + "loss": 0.6186, + "step": 1456 + }, + { + "epoch": 0.62, + "learning_rate": 4.100787631271879e-06, + "loss": 0.6515, + "step": 1460 + }, + { + "epoch": 0.62, + "learning_rate": 4.097870478413069e-06, + "loss": 0.7318, + "step": 1464 + }, + { + "epoch": 0.62, + "learning_rate": 4.094953325554259e-06, + "loss": 0.4301, + "step": 1468 + }, + { + "epoch": 0.63, + "learning_rate": 4.092036172695449e-06, + "loss": 0.587, + "step": 1472 + }, + { + "epoch": 0.63, + "learning_rate": 4.08911901983664e-06, + "loss": 0.4525, + "step": 1476 + }, + { + "epoch": 0.63, + "learning_rate": 4.08620186697783e-06, + "loss": 0.7056, + "step": 1480 + }, + { + "epoch": 0.63, + "learning_rate": 4.08328471411902e-06, + "loss": 0.497, + "step": 1484 + }, + { + "epoch": 0.63, + "learning_rate": 4.0803675612602105e-06, + "loss": 0.6571, + "step": 1488 + }, + { + "epoch": 0.63, + "learning_rate": 4.0774504084014005e-06, + "loss": 0.5894, + "step": 1492 + }, + { + "epoch": 0.64, + "learning_rate": 4.074533255542591e-06, + "loss": 0.5912, + "step": 1496 + }, + { + "epoch": 0.64, + "learning_rate": 4.071616102683781e-06, + "loss": 0.5286, + "step": 1500 + }, + { + "epoch": 0.64, + "learning_rate": 4.068698949824971e-06, + "loss": 0.5107, + "step": 1504 + }, + { + "epoch": 0.64, + "learning_rate": 4.065781796966161e-06, + "loss": 0.4842, + "step": 1508 + }, + { + "epoch": 0.64, + "learning_rate": 4.062864644107352e-06, + "loss": 0.4813, + "step": 1512 + }, + { + "epoch": 0.64, + "learning_rate": 4.059947491248542e-06, + "loss": 0.4813, + "step": 1516 + }, + { + "epoch": 0.65, + "learning_rate": 4.057030338389732e-06, + "loss": 0.575, + "step": 1520 + }, + { + "epoch": 0.65, + "learning_rate": 4.0541131855309225e-06, + "loss": 0.5535, + "step": 1524 + }, + { + "epoch": 0.65, + "learning_rate": 4.0511960326721125e-06, + "loss": 0.5936, + "step": 1528 + }, + { + "epoch": 0.65, + "learning_rate": 4.048278879813302e-06, + "loss": 0.691, + "step": 1532 + }, + { + "epoch": 0.65, + "learning_rate": 4.045361726954492e-06, + "loss": 0.4385, + "step": 1536 + }, + { + "epoch": 0.65, + "learning_rate": 4.042444574095683e-06, + "loss": 0.5595, + "step": 1540 + }, + { + "epoch": 0.66, + "learning_rate": 4.039527421236873e-06, + "loss": 0.5647, + "step": 1544 + }, + { + "epoch": 0.66, + "learning_rate": 4.036610268378063e-06, + "loss": 0.508, + "step": 1548 + }, + { + "epoch": 0.66, + "learning_rate": 4.033693115519254e-06, + "loss": 0.4794, + "step": 1552 + }, + { + "epoch": 0.66, + "learning_rate": 4.030775962660444e-06, + "loss": 0.5662, + "step": 1556 + }, + { + "epoch": 0.66, + "learning_rate": 4.0278588098016345e-06, + "loss": 0.6627, + "step": 1560 + }, + { + "epoch": 0.66, + "learning_rate": 4.024941656942824e-06, + "loss": 0.3683, + "step": 1564 + }, + { + "epoch": 0.67, + "learning_rate": 4.022024504084014e-06, + "loss": 0.6034, + "step": 1568 + }, + { + "epoch": 0.67, + "learning_rate": 4.019107351225204e-06, + "loss": 0.3611, + "step": 1572 + }, + { + "epoch": 0.67, + "learning_rate": 4.016190198366395e-06, + "loss": 0.639, + "step": 1576 + }, + { + "epoch": 0.67, + "learning_rate": 4.013273045507585e-06, + "loss": 0.5472, + "step": 1580 + }, + { + "epoch": 0.67, + "learning_rate": 4.010355892648775e-06, + "loss": 0.4576, + "step": 1584 + }, + { + "epoch": 0.68, + "learning_rate": 4.007438739789966e-06, + "loss": 0.5035, + "step": 1588 + }, + { + "epoch": 0.68, + "learning_rate": 4.004521586931156e-06, + "loss": 0.8007, + "step": 1592 + }, + { + "epoch": 0.68, + "learning_rate": 4.001604434072346e-06, + "loss": 0.6807, + "step": 1596 + }, + { + "epoch": 0.68, + "learning_rate": 3.998687281213536e-06, + "loss": 0.5574, + "step": 1600 + }, + { + "epoch": 0.68, + "learning_rate": 3.995770128354726e-06, + "loss": 0.5141, + "step": 1604 + }, + { + "epoch": 0.68, + "learning_rate": 3.992852975495916e-06, + "loss": 0.4496, + "step": 1608 + }, + { + "epoch": 0.69, + "learning_rate": 3.989935822637106e-06, + "loss": 0.7541, + "step": 1612 + }, + { + "epoch": 0.69, + "learning_rate": 3.987018669778296e-06, + "loss": 0.6673, + "step": 1616 + }, + { + "epoch": 0.69, + "learning_rate": 3.984101516919487e-06, + "loss": 0.6613, + "step": 1620 + }, + { + "epoch": 0.69, + "learning_rate": 3.981184364060677e-06, + "loss": 0.7404, + "step": 1624 + }, + { + "epoch": 0.69, + "learning_rate": 3.978267211201867e-06, + "loss": 0.5234, + "step": 1628 + }, + { + "epoch": 0.69, + "learning_rate": 3.975350058343058e-06, + "loss": 0.608, + "step": 1632 + }, + { + "epoch": 0.7, + "learning_rate": 3.972432905484248e-06, + "loss": 0.6614, + "step": 1636 + }, + { + "epoch": 0.7, + "learning_rate": 3.969515752625438e-06, + "loss": 0.4633, + "step": 1640 + }, + { + "epoch": 0.7, + "learning_rate": 3.9665985997666275e-06, + "loss": 0.5372, + "step": 1644 + }, + { + "epoch": 0.7, + "learning_rate": 3.963681446907818e-06, + "loss": 0.5298, + "step": 1648 + }, + { + "epoch": 0.7, + "learning_rate": 3.960764294049009e-06, + "loss": 0.5283, + "step": 1652 + }, + { + "epoch": 0.7, + "learning_rate": 3.957847141190199e-06, + "loss": 0.3638, + "step": 1656 + }, + { + "epoch": 0.71, + "learning_rate": 3.954929988331389e-06, + "loss": 0.3947, + "step": 1660 + }, + { + "epoch": 0.71, + "learning_rate": 3.952012835472579e-06, + "loss": 0.4943, + "step": 1664 + }, + { + "epoch": 0.71, + "learning_rate": 3.94909568261377e-06, + "loss": 0.5024, + "step": 1668 + }, + { + "epoch": 0.71, + "learning_rate": 3.9461785297549596e-06, + "loss": 0.6821, + "step": 1672 + }, + { + "epoch": 0.71, + "learning_rate": 3.9432613768961495e-06, + "loss": 0.4511, + "step": 1676 + }, + { + "epoch": 0.71, + "learning_rate": 3.9403442240373394e-06, + "loss": 0.4767, + "step": 1680 + }, + { + "epoch": 0.72, + "learning_rate": 3.93742707117853e-06, + "loss": 0.5588, + "step": 1684 + }, + { + "epoch": 0.72, + "learning_rate": 3.93450991831972e-06, + "loss": 0.7316, + "step": 1688 + }, + { + "epoch": 0.72, + "learning_rate": 3.93159276546091e-06, + "loss": 0.3692, + "step": 1692 + }, + { + "epoch": 0.72, + "learning_rate": 3.928675612602101e-06, + "loss": 0.7381, + "step": 1696 + }, + { + "epoch": 0.72, + "learning_rate": 3.925758459743291e-06, + "loss": 0.7333, + "step": 1700 + }, + { + "epoch": 0.72, + "learning_rate": 3.922841306884481e-06, + "loss": 0.5729, + "step": 1704 + }, + { + "epoch": 0.73, + "learning_rate": 3.919924154025671e-06, + "loss": 0.5423, + "step": 1708 + }, + { + "epoch": 0.73, + "learning_rate": 3.9170070011668615e-06, + "loss": 0.35, + "step": 1712 + }, + { + "epoch": 0.73, + "learning_rate": 3.9140898483080514e-06, + "loss": 0.3955, + "step": 1716 + }, + { + "epoch": 0.73, + "learning_rate": 3.911172695449242e-06, + "loss": 0.713, + "step": 1720 + }, + { + "epoch": 0.73, + "learning_rate": 3.908255542590432e-06, + "loss": 0.5724, + "step": 1724 + }, + { + "epoch": 0.73, + "learning_rate": 3.905338389731622e-06, + "loss": 0.5339, + "step": 1728 + }, + { + "epoch": 0.74, + "learning_rate": 3.902421236872813e-06, + "loss": 0.372, + "step": 1732 + }, + { + "epoch": 0.74, + "learning_rate": 3.899504084014003e-06, + "loss": 0.6898, + "step": 1736 + }, + { + "epoch": 0.74, + "learning_rate": 3.896586931155193e-06, + "loss": 0.6381, + "step": 1740 + }, + { + "epoch": 0.74, + "learning_rate": 3.893669778296383e-06, + "loss": 0.4658, + "step": 1744 + }, + { + "epoch": 0.74, + "learning_rate": 3.8907526254375735e-06, + "loss": 0.6277, + "step": 1748 + }, + { + "epoch": 0.74, + "learning_rate": 3.887835472578763e-06, + "loss": 0.476, + "step": 1752 + }, + { + "epoch": 0.75, + "learning_rate": 3.884918319719953e-06, + "loss": 0.5812, + "step": 1756 + }, + { + "epoch": 0.75, + "learning_rate": 3.882001166861144e-06, + "loss": 0.3055, + "step": 1760 + }, + { + "epoch": 0.75, + "learning_rate": 3.879084014002334e-06, + "loss": 0.7002, + "step": 1764 + }, + { + "epoch": 0.75, + "learning_rate": 3.876166861143524e-06, + "loss": 0.4782, + "step": 1768 + }, + { + "epoch": 0.75, + "learning_rate": 3.873249708284714e-06, + "loss": 0.4645, + "step": 1772 + }, + { + "epoch": 0.76, + "learning_rate": 3.870332555425905e-06, + "loss": 0.506, + "step": 1776 + }, + { + "epoch": 0.76, + "learning_rate": 3.867415402567095e-06, + "loss": 0.5115, + "step": 1780 + }, + { + "epoch": 0.76, + "learning_rate": 3.864498249708285e-06, + "loss": 0.5903, + "step": 1784 + }, + { + "epoch": 0.76, + "learning_rate": 3.861581096849475e-06, + "loss": 0.555, + "step": 1788 + }, + { + "epoch": 0.76, + "learning_rate": 3.858663943990665e-06, + "loss": 0.6398, + "step": 1792 + }, + { + "epoch": 0.76, + "learning_rate": 3.855746791131856e-06, + "loss": 0.5431, + "step": 1796 + }, + { + "epoch": 0.77, + "learning_rate": 3.852829638273046e-06, + "loss": 0.7979, + "step": 1800 + }, + { + "epoch": 0.77, + "learning_rate": 3.849912485414236e-06, + "loss": 0.3846, + "step": 1804 + }, + { + "epoch": 0.77, + "learning_rate": 3.846995332555426e-06, + "loss": 0.4568, + "step": 1808 + }, + { + "epoch": 0.77, + "learning_rate": 3.844078179696617e-06, + "loss": 0.7126, + "step": 1812 + }, + { + "epoch": 0.77, + "learning_rate": 3.841161026837807e-06, + "loss": 0.6972, + "step": 1816 + }, + { + "epoch": 0.77, + "learning_rate": 3.838243873978997e-06, + "loss": 0.495, + "step": 1820 + }, + { + "epoch": 0.78, + "learning_rate": 3.835326721120187e-06, + "loss": 0.5843, + "step": 1824 + }, + { + "epoch": 0.78, + "learning_rate": 3.832409568261377e-06, + "loss": 0.8, + "step": 1828 + }, + { + "epoch": 0.78, + "learning_rate": 3.829492415402567e-06, + "loss": 0.6066, + "step": 1832 + }, + { + "epoch": 0.78, + "learning_rate": 3.826575262543757e-06, + "loss": 0.5371, + "step": 1836 + }, + { + "epoch": 0.78, + "learning_rate": 3.823658109684948e-06, + "loss": 0.4662, + "step": 1840 + }, + { + "epoch": 0.78, + "learning_rate": 3.820740956826138e-06, + "loss": 0.4733, + "step": 1844 + }, + { + "epoch": 0.79, + "learning_rate": 3.817823803967328e-06, + "loss": 0.6339, + "step": 1848 + }, + { + "epoch": 0.79, + "learning_rate": 3.814906651108519e-06, + "loss": 0.5074, + "step": 1852 + }, + { + "epoch": 0.79, + "learning_rate": 3.8119894982497086e-06, + "loss": 0.7012, + "step": 1856 + }, + { + "epoch": 0.79, + "learning_rate": 3.809072345390899e-06, + "loss": 0.3957, + "step": 1860 + }, + { + "epoch": 0.79, + "learning_rate": 3.806155192532089e-06, + "loss": 0.4838, + "step": 1864 + }, + { + "epoch": 0.79, + "learning_rate": 3.8032380396732793e-06, + "loss": 0.4403, + "step": 1868 + }, + { + "epoch": 0.8, + "learning_rate": 3.800320886814469e-06, + "loss": 0.4519, + "step": 1872 + }, + { + "epoch": 0.8, + "learning_rate": 3.7974037339556596e-06, + "loss": 0.4998, + "step": 1876 + }, + { + "epoch": 0.8, + "learning_rate": 3.7944865810968495e-06, + "loss": 0.3915, + "step": 1880 + }, + { + "epoch": 0.8, + "learning_rate": 3.79156942823804e-06, + "loss": 0.4722, + "step": 1884 + }, + { + "epoch": 0.8, + "learning_rate": 3.7886522753792302e-06, + "loss": 0.4377, + "step": 1888 + }, + { + "epoch": 0.8, + "learning_rate": 3.78573512252042e-06, + "loss": 0.4053, + "step": 1892 + }, + { + "epoch": 0.81, + "learning_rate": 3.782817969661611e-06, + "loss": 0.6229, + "step": 1896 + }, + { + "epoch": 0.81, + "learning_rate": 3.7799008168028005e-06, + "loss": 0.573, + "step": 1900 + }, + { + "epoch": 0.81, + "learning_rate": 3.7769836639439913e-06, + "loss": 0.6595, + "step": 1904 + }, + { + "epoch": 0.81, + "learning_rate": 3.7740665110851808e-06, + "loss": 0.6739, + "step": 1908 + }, + { + "epoch": 0.81, + "learning_rate": 3.7711493582263716e-06, + "loss": 0.5746, + "step": 1912 + }, + { + "epoch": 0.81, + "learning_rate": 3.7682322053675615e-06, + "loss": 0.5315, + "step": 1916 + }, + { + "epoch": 0.82, + "learning_rate": 3.765315052508752e-06, + "loss": 0.545, + "step": 1920 + }, + { + "epoch": 0.82, + "learning_rate": 3.7623978996499422e-06, + "loss": 0.5491, + "step": 1924 + }, + { + "epoch": 0.82, + "learning_rate": 3.759480746791132e-06, + "loss": 0.4616, + "step": 1928 + }, + { + "epoch": 0.82, + "learning_rate": 3.7565635939323225e-06, + "loss": 0.5943, + "step": 1932 + }, + { + "epoch": 0.82, + "learning_rate": 3.7536464410735125e-06, + "loss": 0.7848, + "step": 1936 + }, + { + "epoch": 0.82, + "learning_rate": 3.750729288214703e-06, + "loss": 0.617, + "step": 1940 + }, + { + "epoch": 0.83, + "learning_rate": 3.7478121353558928e-06, + "loss": 0.469, + "step": 1944 + }, + { + "epoch": 0.83, + "learning_rate": 3.744894982497083e-06, + "loss": 0.3749, + "step": 1948 + }, + { + "epoch": 0.83, + "learning_rate": 3.7419778296382735e-06, + "loss": 0.4504, + "step": 1952 + }, + { + "epoch": 0.83, + "learning_rate": 3.7390606767794634e-06, + "loss": 0.5496, + "step": 1956 + }, + { + "epoch": 0.83, + "learning_rate": 3.7361435239206538e-06, + "loss": 0.5946, + "step": 1960 + }, + { + "epoch": 0.84, + "learning_rate": 3.7332263710618437e-06, + "loss": 0.5993, + "step": 1964 + }, + { + "epoch": 0.84, + "learning_rate": 3.730309218203034e-06, + "loss": 0.5006, + "step": 1968 + }, + { + "epoch": 0.84, + "learning_rate": 3.727392065344224e-06, + "loss": 0.5133, + "step": 1972 + }, + { + "epoch": 0.84, + "learning_rate": 3.7244749124854144e-06, + "loss": 0.5787, + "step": 1976 + }, + { + "epoch": 0.84, + "learning_rate": 3.7215577596266043e-06, + "loss": 0.3984, + "step": 1980 + }, + { + "epoch": 0.84, + "learning_rate": 3.718640606767795e-06, + "loss": 0.4427, + "step": 1984 + }, + { + "epoch": 0.85, + "learning_rate": 3.7157234539089855e-06, + "loss": 0.7057, + "step": 1988 + }, + { + "epoch": 0.85, + "learning_rate": 3.7128063010501754e-06, + "loss": 0.5082, + "step": 1992 + }, + { + "epoch": 0.85, + "learning_rate": 3.7098891481913658e-06, + "loss": 0.4868, + "step": 1996 + }, + { + "epoch": 0.85, + "learning_rate": 3.7069719953325557e-06, + "loss": 0.5882, + "step": 2000 + }, + { + "epoch": 0.85, + "learning_rate": 3.704054842473746e-06, + "loss": 0.5969, + "step": 2004 + }, + { + "epoch": 0.85, + "learning_rate": 3.701137689614936e-06, + "loss": 0.4813, + "step": 2008 + }, + { + "epoch": 0.86, + "learning_rate": 3.6982205367561264e-06, + "loss": 0.4511, + "step": 2012 + }, + { + "epoch": 0.86, + "learning_rate": 3.6953033838973167e-06, + "loss": 0.5281, + "step": 2016 + }, + { + "epoch": 0.86, + "learning_rate": 3.6923862310385067e-06, + "loss": 0.5461, + "step": 2020 + }, + { + "epoch": 0.86, + "learning_rate": 3.689469078179697e-06, + "loss": 0.5653, + "step": 2024 + }, + { + "epoch": 0.86, + "learning_rate": 3.686551925320887e-06, + "loss": 0.5701, + "step": 2028 + }, + { + "epoch": 0.86, + "learning_rate": 3.6836347724620773e-06, + "loss": 0.5516, + "step": 2032 + }, + { + "epoch": 0.87, + "learning_rate": 3.6807176196032673e-06, + "loss": 0.6763, + "step": 2036 + }, + { + "epoch": 0.87, + "learning_rate": 3.6778004667444576e-06, + "loss": 0.5188, + "step": 2040 + }, + { + "epoch": 0.87, + "learning_rate": 3.6748833138856476e-06, + "loss": 0.6355, + "step": 2044 + }, + { + "epoch": 0.87, + "learning_rate": 3.671966161026838e-06, + "loss": 0.3786, + "step": 2048 + }, + { + "epoch": 0.87, + "learning_rate": 3.6690490081680287e-06, + "loss": 0.4538, + "step": 2052 + }, + { + "epoch": 0.87, + "learning_rate": 3.6661318553092182e-06, + "loss": 0.4956, + "step": 2056 + }, + { + "epoch": 0.88, + "learning_rate": 3.663214702450409e-06, + "loss": 0.4748, + "step": 2060 + }, + { + "epoch": 0.88, + "learning_rate": 3.660297549591599e-06, + "loss": 0.5572, + "step": 2064 + }, + { + "epoch": 0.88, + "learning_rate": 3.6573803967327893e-06, + "loss": 0.5476, + "step": 2068 + }, + { + "epoch": 0.88, + "learning_rate": 3.6544632438739793e-06, + "loss": 0.6124, + "step": 2072 + }, + { + "epoch": 0.88, + "learning_rate": 3.6515460910151696e-06, + "loss": 0.6467, + "step": 2076 + }, + { + "epoch": 0.88, + "learning_rate": 3.6486289381563596e-06, + "loss": 0.5917, + "step": 2080 + }, + { + "epoch": 0.89, + "learning_rate": 3.64571178529755e-06, + "loss": 0.4462, + "step": 2084 + }, + { + "epoch": 0.89, + "learning_rate": 3.6427946324387403e-06, + "loss": 0.5173, + "step": 2088 + }, + { + "epoch": 0.89, + "learning_rate": 3.6398774795799302e-06, + "loss": 0.5402, + "step": 2092 + }, + { + "epoch": 0.89, + "learning_rate": 3.6369603267211206e-06, + "loss": 0.3434, + "step": 2096 + }, + { + "epoch": 0.89, + "learning_rate": 3.6340431738623105e-06, + "loss": 0.3308, + "step": 2100 + }, + { + "epoch": 0.89, + "learning_rate": 3.631126021003501e-06, + "loss": 0.4462, + "step": 2104 + }, + { + "epoch": 0.9, + "learning_rate": 3.628208868144691e-06, + "loss": 0.4822, + "step": 2108 + }, + { + "epoch": 0.9, + "learning_rate": 3.625291715285881e-06, + "loss": 0.4612, + "step": 2112 + }, + { + "epoch": 0.9, + "learning_rate": 3.6223745624270716e-06, + "loss": 0.5462, + "step": 2116 + }, + { + "epoch": 0.9, + "learning_rate": 3.6194574095682615e-06, + "loss": 0.6326, + "step": 2120 + }, + { + "epoch": 0.9, + "learning_rate": 3.6165402567094523e-06, + "loss": 0.664, + "step": 2124 + }, + { + "epoch": 0.9, + "learning_rate": 3.613623103850642e-06, + "loss": 0.4564, + "step": 2128 + }, + { + "epoch": 0.91, + "learning_rate": 3.6107059509918326e-06, + "loss": 0.4078, + "step": 2132 + }, + { + "epoch": 0.91, + "learning_rate": 3.607788798133022e-06, + "loss": 0.3981, + "step": 2136 + }, + { + "epoch": 0.91, + "learning_rate": 3.604871645274213e-06, + "loss": 0.5809, + "step": 2140 + }, + { + "epoch": 0.91, + "learning_rate": 3.601954492415403e-06, + "loss": 0.3539, + "step": 2144 + }, + { + "epoch": 0.91, + "learning_rate": 3.599037339556593e-06, + "loss": 0.4753, + "step": 2148 + }, + { + "epoch": 0.91, + "learning_rate": 3.5961201866977835e-06, + "loss": 0.4232, + "step": 2152 + }, + { + "epoch": 0.92, + "learning_rate": 3.5932030338389735e-06, + "loss": 0.5864, + "step": 2156 + }, + { + "epoch": 0.92, + "learning_rate": 3.590285880980164e-06, + "loss": 0.5046, + "step": 2160 + }, + { + "epoch": 0.92, + "learning_rate": 3.5873687281213538e-06, + "loss": 0.711, + "step": 2164 + }, + { + "epoch": 0.92, + "learning_rate": 3.584451575262544e-06, + "loss": 0.3999, + "step": 2168 + }, + { + "epoch": 0.92, + "learning_rate": 3.581534422403734e-06, + "loss": 0.4033, + "step": 2172 + }, + { + "epoch": 0.93, + "learning_rate": 3.5786172695449245e-06, + "loss": 0.3923, + "step": 2176 + }, + { + "epoch": 0.93, + "learning_rate": 3.5757001166861144e-06, + "loss": 0.4749, + "step": 2180 + }, + { + "epoch": 0.93, + "learning_rate": 3.5727829638273048e-06, + "loss": 0.5808, + "step": 2184 + }, + { + "epoch": 0.93, + "learning_rate": 3.569865810968495e-06, + "loss": 0.5079, + "step": 2188 + }, + { + "epoch": 0.93, + "learning_rate": 3.566948658109685e-06, + "loss": 0.6254, + "step": 2192 + }, + { + "epoch": 0.93, + "learning_rate": 3.5640315052508754e-06, + "loss": 0.5132, + "step": 2196 + }, + { + "epoch": 0.94, + "learning_rate": 3.5611143523920654e-06, + "loss": 0.3979, + "step": 2200 + }, + { + "epoch": 0.94, + "learning_rate": 3.558197199533256e-06, + "loss": 0.5037, + "step": 2204 + }, + { + "epoch": 0.94, + "learning_rate": 3.5552800466744457e-06, + "loss": 0.4465, + "step": 2208 + }, + { + "epoch": 0.94, + "learning_rate": 3.5523628938156364e-06, + "loss": 0.5237, + "step": 2212 + }, + { + "epoch": 0.94, + "learning_rate": 3.549445740956827e-06, + "loss": 0.4848, + "step": 2216 + }, + { + "epoch": 0.94, + "learning_rate": 3.5465285880980167e-06, + "loss": 0.6196, + "step": 2220 + }, + { + "epoch": 0.95, + "learning_rate": 3.543611435239207e-06, + "loss": 0.5312, + "step": 2224 + }, + { + "epoch": 0.95, + "learning_rate": 3.540694282380397e-06, + "loss": 0.7711, + "step": 2228 + }, + { + "epoch": 0.95, + "learning_rate": 3.5377771295215874e-06, + "loss": 0.3885, + "step": 2232 + }, + { + "epoch": 0.95, + "learning_rate": 3.5348599766627773e-06, + "loss": 0.4993, + "step": 2236 + }, + { + "epoch": 0.95, + "learning_rate": 3.5319428238039677e-06, + "loss": 0.6159, + "step": 2240 + }, + { + "epoch": 0.95, + "learning_rate": 3.5290256709451576e-06, + "loss": 0.5108, + "step": 2244 + }, + { + "epoch": 0.96, + "learning_rate": 3.526108518086348e-06, + "loss": 0.4773, + "step": 2248 + }, + { + "epoch": 0.96, + "learning_rate": 3.5231913652275384e-06, + "loss": 0.4661, + "step": 2252 + }, + { + "epoch": 0.96, + "learning_rate": 3.5202742123687283e-06, + "loss": 0.4629, + "step": 2256 + }, + { + "epoch": 0.96, + "learning_rate": 3.5173570595099187e-06, + "loss": 0.587, + "step": 2260 + }, + { + "epoch": 0.96, + "learning_rate": 3.5144399066511086e-06, + "loss": 0.4012, + "step": 2264 + }, + { + "epoch": 0.96, + "learning_rate": 3.511522753792299e-06, + "loss": 0.6225, + "step": 2268 + }, + { + "epoch": 0.97, + "learning_rate": 3.508605600933489e-06, + "loss": 0.5934, + "step": 2272 + }, + { + "epoch": 0.97, + "learning_rate": 3.5056884480746793e-06, + "loss": 0.5112, + "step": 2276 + }, + { + "epoch": 0.97, + "learning_rate": 3.502771295215869e-06, + "loss": 0.6217, + "step": 2280 + }, + { + "epoch": 0.97, + "learning_rate": 3.4998541423570596e-06, + "loss": 0.7376, + "step": 2284 + }, + { + "epoch": 0.97, + "learning_rate": 3.4969369894982504e-06, + "loss": 0.4367, + "step": 2288 + }, + { + "epoch": 0.97, + "learning_rate": 3.4940198366394403e-06, + "loss": 0.4153, + "step": 2292 + }, + { + "epoch": 0.98, + "learning_rate": 3.4911026837806307e-06, + "loss": 0.563, + "step": 2296 + }, + { + "epoch": 0.98, + "learning_rate": 3.4881855309218206e-06, + "loss": 0.4106, + "step": 2300 + }, + { + "epoch": 0.98, + "learning_rate": 3.485268378063011e-06, + "loss": 0.6708, + "step": 2304 + }, + { + "epoch": 0.98, + "learning_rate": 3.482351225204201e-06, + "loss": 0.4741, + "step": 2308 + }, + { + "epoch": 0.98, + "learning_rate": 3.4794340723453913e-06, + "loss": 0.6038, + "step": 2312 + }, + { + "epoch": 0.98, + "learning_rate": 3.4765169194865816e-06, + "loss": 0.3982, + "step": 2316 + }, + { + "epoch": 0.99, + "learning_rate": 3.4735997666277716e-06, + "loss": 0.7745, + "step": 2320 + }, + { + "epoch": 0.99, + "learning_rate": 3.470682613768962e-06, + "loss": 0.5016, + "step": 2324 + }, + { + "epoch": 0.99, + "learning_rate": 3.467765460910152e-06, + "loss": 0.3199, + "step": 2328 + }, + { + "epoch": 0.99, + "learning_rate": 3.4648483080513422e-06, + "loss": 0.5832, + "step": 2332 + }, + { + "epoch": 0.99, + "learning_rate": 3.461931155192532e-06, + "loss": 0.4076, + "step": 2336 + }, + { + "epoch": 0.99, + "learning_rate": 3.4590140023337225e-06, + "loss": 0.5615, + "step": 2340 + }, + { + "epoch": 1.0, + "learning_rate": 3.4560968494749125e-06, + "loss": 0.3989, + "step": 2344 + }, + { + "epoch": 1.0, + "learning_rate": 3.453179696616103e-06, + "loss": 0.4901, + "step": 2348 + }, + { + "epoch": 1.0, + "learning_rate": 3.4502625437572936e-06, + "loss": 0.4952, + "step": 2352 + }, + { + "epoch": 1.0, + "learning_rate": 3.447345390898483e-06, + "loss": 0.5235, + "step": 2356 + }, + { + "epoch": 1.0, + "learning_rate": 3.444428238039674e-06, + "loss": 0.3815, + "step": 2360 + }, + { + "epoch": 1.01, + "learning_rate": 3.4415110851808634e-06, + "loss": 0.5771, + "step": 2364 + }, + { + "epoch": 1.01, + "learning_rate": 3.4385939323220542e-06, + "loss": 0.7008, + "step": 2368 + }, + { + "epoch": 1.01, + "learning_rate": 3.435676779463244e-06, + "loss": 0.4909, + "step": 2372 + }, + { + "epoch": 1.01, + "learning_rate": 3.4327596266044345e-06, + "loss": 0.4042, + "step": 2376 + }, + { + "epoch": 1.01, + "learning_rate": 3.4298424737456245e-06, + "loss": 0.5892, + "step": 2380 + }, + { + "epoch": 1.01, + "learning_rate": 3.426925320886815e-06, + "loss": 0.538, + "step": 2384 + }, + { + "epoch": 1.02, + "learning_rate": 3.424008168028005e-06, + "loss": 0.409, + "step": 2388 + }, + { + "epoch": 1.02, + "learning_rate": 3.421091015169195e-06, + "loss": 0.5232, + "step": 2392 + }, + { + "epoch": 1.02, + "learning_rate": 3.4181738623103855e-06, + "loss": 0.6311, + "step": 2396 + }, + { + "epoch": 1.02, + "learning_rate": 3.4152567094515754e-06, + "loss": 0.4116, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 3.4123395565927658e-06, + "loss": 0.5223, + "step": 2404 + }, + { + "epoch": 1.02, + "learning_rate": 3.4094224037339557e-06, + "loss": 0.6645, + "step": 2408 + }, + { + "epoch": 1.03, + "learning_rate": 3.406505250875146e-06, + "loss": 0.4037, + "step": 2412 + }, + { + "epoch": 1.03, + "learning_rate": 3.4035880980163364e-06, + "loss": 0.4992, + "step": 2416 + }, + { + "epoch": 1.03, + "learning_rate": 3.4006709451575264e-06, + "loss": 0.3654, + "step": 2420 + }, + { + "epoch": 1.03, + "learning_rate": 3.3977537922987167e-06, + "loss": 0.6107, + "step": 2424 + }, + { + "epoch": 1.03, + "learning_rate": 3.3948366394399067e-06, + "loss": 0.5753, + "step": 2428 + }, + { + "epoch": 1.03, + "learning_rate": 3.3919194865810975e-06, + "loss": 0.375, + "step": 2432 + }, + { + "epoch": 1.04, + "learning_rate": 3.389002333722287e-06, + "loss": 0.4033, + "step": 2436 + }, + { + "epoch": 1.04, + "learning_rate": 3.3860851808634778e-06, + "loss": 0.5717, + "step": 2440 + }, + { + "epoch": 1.04, + "learning_rate": 3.3831680280046673e-06, + "loss": 0.3352, + "step": 2444 + }, + { + "epoch": 1.04, + "learning_rate": 3.380250875145858e-06, + "loss": 0.3152, + "step": 2448 + }, + { + "epoch": 1.04, + "learning_rate": 3.3773337222870484e-06, + "loss": 0.6976, + "step": 2452 + }, + { + "epoch": 1.04, + "learning_rate": 3.3744165694282384e-06, + "loss": 0.6974, + "step": 2456 + }, + { + "epoch": 1.05, + "learning_rate": 3.3714994165694287e-06, + "loss": 0.4909, + "step": 2460 + }, + { + "epoch": 1.05, + "learning_rate": 3.3685822637106187e-06, + "loss": 0.4997, + "step": 2464 + }, + { + "epoch": 1.05, + "learning_rate": 3.365665110851809e-06, + "loss": 0.5293, + "step": 2468 + }, + { + "epoch": 1.05, + "learning_rate": 3.362747957992999e-06, + "loss": 0.5174, + "step": 2472 + }, + { + "epoch": 1.05, + "learning_rate": 3.3598308051341893e-06, + "loss": 0.4137, + "step": 2476 + }, + { + "epoch": 1.05, + "learning_rate": 3.3569136522753793e-06, + "loss": 0.3583, + "step": 2480 + }, + { + "epoch": 1.06, + "learning_rate": 3.3539964994165696e-06, + "loss": 0.4317, + "step": 2484 + }, + { + "epoch": 1.06, + "learning_rate": 3.35107934655776e-06, + "loss": 0.5334, + "step": 2488 + }, + { + "epoch": 1.06, + "learning_rate": 3.34816219369895e-06, + "loss": 0.4626, + "step": 2492 + }, + { + "epoch": 1.06, + "learning_rate": 3.3452450408401403e-06, + "loss": 0.3575, + "step": 2496 + }, + { + "epoch": 1.06, + "learning_rate": 3.3423278879813302e-06, + "loss": 0.3181, + "step": 2500 + }, + { + "epoch": 1.06, + "learning_rate": 3.3394107351225206e-06, + "loss": 0.4233, + "step": 2504 + }, + { + "epoch": 1.07, + "learning_rate": 3.3364935822637105e-06, + "loss": 0.6187, + "step": 2508 + }, + { + "epoch": 1.07, + "learning_rate": 3.3335764294049013e-06, + "loss": 0.4076, + "step": 2512 + }, + { + "epoch": 1.07, + "learning_rate": 3.3306592765460917e-06, + "loss": 0.3893, + "step": 2516 + }, + { + "epoch": 1.07, + "learning_rate": 3.3277421236872816e-06, + "loss": 0.4937, + "step": 2520 + }, + { + "epoch": 1.07, + "learning_rate": 3.324824970828472e-06, + "loss": 0.29, + "step": 2524 + }, + { + "epoch": 1.07, + "learning_rate": 3.321907817969662e-06, + "loss": 0.4274, + "step": 2528 + }, + { + "epoch": 1.08, + "learning_rate": 3.3189906651108523e-06, + "loss": 0.3029, + "step": 2532 + }, + { + "epoch": 1.08, + "learning_rate": 3.3160735122520422e-06, + "loss": 0.4773, + "step": 2536 + }, + { + "epoch": 1.08, + "learning_rate": 3.3131563593932326e-06, + "loss": 0.5116, + "step": 2540 + }, + { + "epoch": 1.08, + "learning_rate": 3.3102392065344225e-06, + "loss": 0.514, + "step": 2544 + }, + { + "epoch": 1.08, + "learning_rate": 3.307322053675613e-06, + "loss": 0.5396, + "step": 2548 + }, + { + "epoch": 1.09, + "learning_rate": 3.3044049008168033e-06, + "loss": 0.4585, + "step": 2552 + }, + { + "epoch": 1.09, + "learning_rate": 3.301487747957993e-06, + "loss": 0.6577, + "step": 2556 + }, + { + "epoch": 1.09, + "learning_rate": 3.2985705950991836e-06, + "loss": 0.4556, + "step": 2560 + }, + { + "epoch": 1.09, + "learning_rate": 3.2956534422403735e-06, + "loss": 0.3851, + "step": 2564 + }, + { + "epoch": 1.09, + "learning_rate": 3.292736289381564e-06, + "loss": 0.366, + "step": 2568 + }, + { + "epoch": 1.09, + "learning_rate": 3.289819136522754e-06, + "loss": 0.53, + "step": 2572 + }, + { + "epoch": 1.1, + "learning_rate": 3.286901983663944e-06, + "loss": 0.513, + "step": 2576 + }, + { + "epoch": 1.1, + "learning_rate": 3.283984830805134e-06, + "loss": 0.4743, + "step": 2580 + }, + { + "epoch": 1.1, + "learning_rate": 3.2810676779463245e-06, + "loss": 0.5324, + "step": 2584 + }, + { + "epoch": 1.1, + "learning_rate": 3.2781505250875152e-06, + "loss": 0.5894, + "step": 2588 + }, + { + "epoch": 1.1, + "learning_rate": 3.2752333722287048e-06, + "loss": 0.4134, + "step": 2592 + }, + { + "epoch": 1.1, + "learning_rate": 3.2723162193698955e-06, + "loss": 0.5402, + "step": 2596 + }, + { + "epoch": 1.11, + "learning_rate": 3.2693990665110855e-06, + "loss": 0.4749, + "step": 2600 + }, + { + "epoch": 1.11, + "learning_rate": 3.266481913652276e-06, + "loss": 0.3575, + "step": 2604 + }, + { + "epoch": 1.11, + "learning_rate": 3.2635647607934658e-06, + "loss": 0.3848, + "step": 2608 + }, + { + "epoch": 1.11, + "learning_rate": 3.260647607934656e-06, + "loss": 0.3512, + "step": 2612 + }, + { + "epoch": 1.11, + "learning_rate": 3.2577304550758465e-06, + "loss": 0.4186, + "step": 2616 + }, + { + "epoch": 1.11, + "learning_rate": 3.2548133022170364e-06, + "loss": 0.3952, + "step": 2620 + }, + { + "epoch": 1.12, + "learning_rate": 3.251896149358227e-06, + "loss": 0.3489, + "step": 2624 + }, + { + "epoch": 1.12, + "learning_rate": 3.2489789964994167e-06, + "loss": 0.5544, + "step": 2628 + }, + { + "epoch": 1.12, + "learning_rate": 3.246061843640607e-06, + "loss": 0.4824, + "step": 2632 + }, + { + "epoch": 1.12, + "learning_rate": 3.243144690781797e-06, + "loss": 0.3462, + "step": 2636 + }, + { + "epoch": 1.12, + "learning_rate": 3.2402275379229874e-06, + "loss": 0.4465, + "step": 2640 + }, + { + "epoch": 1.12, + "learning_rate": 3.2373103850641773e-06, + "loss": 0.4774, + "step": 2644 + }, + { + "epoch": 1.13, + "learning_rate": 3.2343932322053677e-06, + "loss": 0.32, + "step": 2648 + }, + { + "epoch": 1.13, + "learning_rate": 3.231476079346558e-06, + "loss": 0.5598, + "step": 2652 + }, + { + "epoch": 1.13, + "learning_rate": 3.228558926487748e-06, + "loss": 0.5406, + "step": 2656 + }, + { + "epoch": 1.13, + "learning_rate": 3.225641773628939e-06, + "loss": 0.3966, + "step": 2660 + }, + { + "epoch": 1.13, + "learning_rate": 3.2227246207701283e-06, + "loss": 0.6023, + "step": 2664 + }, + { + "epoch": 1.13, + "learning_rate": 3.219807467911319e-06, + "loss": 0.4532, + "step": 2668 + }, + { + "epoch": 1.14, + "learning_rate": 3.2168903150525086e-06, + "loss": 0.3336, + "step": 2672 + }, + { + "epoch": 1.14, + "learning_rate": 3.2139731621936994e-06, + "loss": 0.4411, + "step": 2676 + }, + { + "epoch": 1.14, + "learning_rate": 3.2110560093348893e-06, + "loss": 0.5039, + "step": 2680 + }, + { + "epoch": 1.14, + "learning_rate": 3.2081388564760797e-06, + "loss": 0.6932, + "step": 2684 + }, + { + "epoch": 1.14, + "learning_rate": 3.20522170361727e-06, + "loss": 0.5271, + "step": 2688 + }, + { + "epoch": 1.14, + "learning_rate": 3.20230455075846e-06, + "loss": 0.432, + "step": 2692 + }, + { + "epoch": 1.15, + "learning_rate": 3.1993873978996504e-06, + "loss": 0.4973, + "step": 2696 + }, + { + "epoch": 1.15, + "learning_rate": 3.1964702450408403e-06, + "loss": 0.6146, + "step": 2700 + }, + { + "epoch": 1.15, + "learning_rate": 3.1935530921820307e-06, + "loss": 0.3637, + "step": 2704 + }, + { + "epoch": 1.15, + "learning_rate": 3.1906359393232206e-06, + "loss": 0.4085, + "step": 2708 + }, + { + "epoch": 1.15, + "learning_rate": 3.187718786464411e-06, + "loss": 0.3726, + "step": 2712 + }, + { + "epoch": 1.15, + "learning_rate": 3.1848016336056013e-06, + "loss": 0.5446, + "step": 2716 + }, + { + "epoch": 1.16, + "learning_rate": 3.1818844807467913e-06, + "loss": 0.497, + "step": 2720 + }, + { + "epoch": 1.16, + "learning_rate": 3.1789673278879816e-06, + "loss": 0.3298, + "step": 2724 + }, + { + "epoch": 1.16, + "learning_rate": 3.1760501750291716e-06, + "loss": 0.491, + "step": 2728 + }, + { + "epoch": 1.16, + "learning_rate": 3.173133022170362e-06, + "loss": 0.3796, + "step": 2732 + }, + { + "epoch": 1.16, + "learning_rate": 3.170215869311552e-06, + "loss": 0.6953, + "step": 2736 + }, + { + "epoch": 1.16, + "learning_rate": 3.1672987164527427e-06, + "loss": 0.3953, + "step": 2740 + }, + { + "epoch": 1.17, + "learning_rate": 3.164381563593932e-06, + "loss": 0.3992, + "step": 2744 + }, + { + "epoch": 1.17, + "learning_rate": 3.161464410735123e-06, + "loss": 0.4851, + "step": 2748 + }, + { + "epoch": 1.17, + "learning_rate": 3.1585472578763133e-06, + "loss": 0.3364, + "step": 2752 + }, + { + "epoch": 1.17, + "learning_rate": 3.1556301050175033e-06, + "loss": 0.4477, + "step": 2756 + }, + { + "epoch": 1.17, + "learning_rate": 3.1527129521586936e-06, + "loss": 0.555, + "step": 2760 + }, + { + "epoch": 1.18, + "learning_rate": 3.1497957992998836e-06, + "loss": 0.5896, + "step": 2764 + }, + { + "epoch": 1.18, + "learning_rate": 3.146878646441074e-06, + "loss": 0.5281, + "step": 2768 + }, + { + "epoch": 1.18, + "learning_rate": 3.143961493582264e-06, + "loss": 0.4825, + "step": 2772 + }, + { + "epoch": 1.18, + "learning_rate": 3.1410443407234542e-06, + "loss": 0.3199, + "step": 2776 + }, + { + "epoch": 1.18, + "learning_rate": 3.138127187864644e-06, + "loss": 0.3897, + "step": 2780 + }, + { + "epoch": 1.18, + "learning_rate": 3.1352100350058345e-06, + "loss": 0.3698, + "step": 2784 + }, + { + "epoch": 1.19, + "learning_rate": 3.132292882147025e-06, + "loss": 0.4514, + "step": 2788 + }, + { + "epoch": 1.19, + "learning_rate": 3.129375729288215e-06, + "loss": 0.4257, + "step": 2792 + }, + { + "epoch": 1.19, + "learning_rate": 3.126458576429405e-06, + "loss": 0.4245, + "step": 2796 + }, + { + "epoch": 1.19, + "learning_rate": 3.123541423570595e-06, + "loss": 0.4945, + "step": 2800 + }, + { + "epoch": 1.19, + "learning_rate": 3.1206242707117855e-06, + "loss": 0.442, + "step": 2804 + }, + { + "epoch": 1.19, + "learning_rate": 3.1177071178529754e-06, + "loss": 0.6289, + "step": 2808 + }, + { + "epoch": 1.2, + "learning_rate": 3.1147899649941658e-06, + "loss": 0.4142, + "step": 2812 + }, + { + "epoch": 1.2, + "learning_rate": 3.1118728121353566e-06, + "loss": 0.4667, + "step": 2816 + }, + { + "epoch": 1.2, + "learning_rate": 3.1089556592765465e-06, + "loss": 0.4788, + "step": 2820 + }, + { + "epoch": 1.2, + "learning_rate": 3.106038506417737e-06, + "loss": 0.3926, + "step": 2824 + }, + { + "epoch": 1.2, + "learning_rate": 3.103121353558927e-06, + "loss": 0.553, + "step": 2828 + }, + { + "epoch": 1.2, + "learning_rate": 3.100204200700117e-06, + "loss": 0.4567, + "step": 2832 + }, + { + "epoch": 1.21, + "learning_rate": 3.097287047841307e-06, + "loss": 0.3615, + "step": 2836 + }, + { + "epoch": 1.21, + "learning_rate": 3.0943698949824975e-06, + "loss": 0.4364, + "step": 2840 + }, + { + "epoch": 1.21, + "learning_rate": 3.0914527421236874e-06, + "loss": 0.525, + "step": 2844 + }, + { + "epoch": 1.21, + "learning_rate": 3.0885355892648778e-06, + "loss": 0.4324, + "step": 2848 + }, + { + "epoch": 1.21, + "learning_rate": 3.085618436406068e-06, + "loss": 0.37, + "step": 2852 + }, + { + "epoch": 1.21, + "learning_rate": 3.082701283547258e-06, + "loss": 0.3175, + "step": 2856 + }, + { + "epoch": 1.22, + "learning_rate": 3.0797841306884484e-06, + "loss": 0.3043, + "step": 2860 + }, + { + "epoch": 1.22, + "learning_rate": 3.0768669778296384e-06, + "loss": 0.514, + "step": 2864 + }, + { + "epoch": 1.22, + "learning_rate": 3.0739498249708287e-06, + "loss": 0.4671, + "step": 2868 + }, + { + "epoch": 1.22, + "learning_rate": 3.0710326721120187e-06, + "loss": 0.4151, + "step": 2872 + }, + { + "epoch": 1.22, + "learning_rate": 3.068115519253209e-06, + "loss": 0.4077, + "step": 2876 + }, + { + "epoch": 1.22, + "learning_rate": 3.065198366394399e-06, + "loss": 0.5045, + "step": 2880 + }, + { + "epoch": 1.23, + "learning_rate": 3.0622812135355893e-06, + "loss": 0.3641, + "step": 2884 + }, + { + "epoch": 1.23, + "learning_rate": 3.05936406067678e-06, + "loss": 0.5006, + "step": 2888 + }, + { + "epoch": 1.23, + "learning_rate": 3.0564469078179696e-06, + "loss": 0.4298, + "step": 2892 + }, + { + "epoch": 1.23, + "learning_rate": 3.0535297549591604e-06, + "loss": 0.514, + "step": 2896 + }, + { + "epoch": 1.23, + "learning_rate": 3.0506126021003504e-06, + "loss": 0.4578, + "step": 2900 + }, + { + "epoch": 1.23, + "learning_rate": 3.0476954492415407e-06, + "loss": 0.3638, + "step": 2904 + }, + { + "epoch": 1.24, + "learning_rate": 3.0447782963827307e-06, + "loss": 0.6377, + "step": 2908 + }, + { + "epoch": 1.24, + "learning_rate": 3.041861143523921e-06, + "loss": 0.5282, + "step": 2912 + }, + { + "epoch": 1.24, + "learning_rate": 3.0389439906651114e-06, + "loss": 0.5388, + "step": 2916 + }, + { + "epoch": 1.24, + "learning_rate": 3.0360268378063013e-06, + "loss": 0.5937, + "step": 2920 + }, + { + "epoch": 1.24, + "learning_rate": 3.0331096849474917e-06, + "loss": 0.488, + "step": 2924 + }, + { + "epoch": 1.24, + "learning_rate": 3.0301925320886816e-06, + "loss": 0.4885, + "step": 2928 + }, + { + "epoch": 1.25, + "learning_rate": 3.027275379229872e-06, + "loss": 0.6639, + "step": 2932 + }, + { + "epoch": 1.25, + "learning_rate": 3.024358226371062e-06, + "loss": 0.4895, + "step": 2936 + }, + { + "epoch": 1.25, + "learning_rate": 3.0214410735122523e-06, + "loss": 0.2655, + "step": 2940 + }, + { + "epoch": 1.25, + "learning_rate": 3.0185239206534422e-06, + "loss": 0.6175, + "step": 2944 + }, + { + "epoch": 1.25, + "learning_rate": 3.0156067677946326e-06, + "loss": 0.4876, + "step": 2948 + }, + { + "epoch": 1.26, + "learning_rate": 3.012689614935823e-06, + "loss": 0.3615, + "step": 2952 + }, + { + "epoch": 1.26, + "learning_rate": 3.009772462077013e-06, + "loss": 0.3619, + "step": 2956 + }, + { + "epoch": 1.26, + "learning_rate": 3.0068553092182033e-06, + "loss": 0.4622, + "step": 2960 + }, + { + "epoch": 1.26, + "learning_rate": 3.003938156359393e-06, + "loss": 0.4855, + "step": 2964 + }, + { + "epoch": 1.26, + "learning_rate": 3.001021003500584e-06, + "loss": 0.3779, + "step": 2968 + }, + { + "epoch": 1.26, + "learning_rate": 2.9981038506417735e-06, + "loss": 0.5237, + "step": 2972 + }, + { + "epoch": 1.27, + "learning_rate": 2.9951866977829643e-06, + "loss": 0.541, + "step": 2976 + }, + { + "epoch": 1.27, + "learning_rate": 2.992269544924154e-06, + "loss": 0.4515, + "step": 2980 + }, + { + "epoch": 1.27, + "learning_rate": 2.9893523920653446e-06, + "loss": 0.5022, + "step": 2984 + }, + { + "epoch": 1.27, + "learning_rate": 2.986435239206535e-06, + "loss": 0.4295, + "step": 2988 + }, + { + "epoch": 1.27, + "learning_rate": 2.983518086347725e-06, + "loss": 0.438, + "step": 2992 + }, + { + "epoch": 1.27, + "learning_rate": 2.9806009334889152e-06, + "loss": 0.364, + "step": 2996 + }, + { + "epoch": 1.28, + "learning_rate": 2.977683780630105e-06, + "loss": 0.3795, + "step": 3000 + }, + { + "epoch": 1.28, + "learning_rate": 2.9747666277712955e-06, + "loss": 0.4839, + "step": 3004 + }, + { + "epoch": 1.28, + "learning_rate": 2.9718494749124855e-06, + "loss": 0.3864, + "step": 3008 + }, + { + "epoch": 1.28, + "learning_rate": 2.968932322053676e-06, + "loss": 0.5014, + "step": 3012 + }, + { + "epoch": 1.28, + "learning_rate": 2.966015169194866e-06, + "loss": 0.4356, + "step": 3016 + }, + { + "epoch": 1.28, + "learning_rate": 2.963098016336056e-06, + "loss": 0.5337, + "step": 3020 + }, + { + "epoch": 1.29, + "learning_rate": 2.9601808634772465e-06, + "loss": 0.4385, + "step": 3024 + }, + { + "epoch": 1.29, + "learning_rate": 2.9572637106184364e-06, + "loss": 0.5117, + "step": 3028 + }, + { + "epoch": 1.29, + "learning_rate": 2.954346557759627e-06, + "loss": 0.6061, + "step": 3032 + }, + { + "epoch": 1.29, + "learning_rate": 2.9514294049008167e-06, + "loss": 0.4433, + "step": 3036 + }, + { + "epoch": 1.29, + "learning_rate": 2.948512252042007e-06, + "loss": 0.3484, + "step": 3040 + }, + { + "epoch": 1.29, + "learning_rate": 2.945595099183197e-06, + "loss": 0.2477, + "step": 3044 + }, + { + "epoch": 1.3, + "learning_rate": 2.942677946324388e-06, + "loss": 0.2433, + "step": 3048 + }, + { + "epoch": 1.3, + "learning_rate": 2.939760793465578e-06, + "loss": 0.34, + "step": 3052 + }, + { + "epoch": 1.3, + "learning_rate": 2.936843640606768e-06, + "loss": 0.6435, + "step": 3056 + }, + { + "epoch": 1.3, + "learning_rate": 2.9339264877479585e-06, + "loss": 0.3208, + "step": 3060 + }, + { + "epoch": 1.3, + "learning_rate": 2.9310093348891484e-06, + "loss": 0.3085, + "step": 3064 + }, + { + "epoch": 1.3, + "learning_rate": 2.928092182030339e-06, + "loss": 0.362, + "step": 3068 + }, + { + "epoch": 1.31, + "learning_rate": 2.9251750291715287e-06, + "loss": 0.4297, + "step": 3072 + }, + { + "epoch": 1.31, + "learning_rate": 2.922257876312719e-06, + "loss": 0.4343, + "step": 3076 + }, + { + "epoch": 1.31, + "learning_rate": 2.919340723453909e-06, + "loss": 0.5577, + "step": 3080 + }, + { + "epoch": 1.31, + "learning_rate": 2.9164235705950994e-06, + "loss": 0.3442, + "step": 3084 + }, + { + "epoch": 1.31, + "learning_rate": 2.9135064177362898e-06, + "loss": 0.5667, + "step": 3088 + }, + { + "epoch": 1.31, + "learning_rate": 2.9105892648774797e-06, + "loss": 0.3254, + "step": 3092 + }, + { + "epoch": 1.32, + "learning_rate": 2.90767211201867e-06, + "loss": 0.2909, + "step": 3096 + }, + { + "epoch": 1.32, + "learning_rate": 2.90475495915986e-06, + "loss": 0.3682, + "step": 3100 + }, + { + "epoch": 1.32, + "learning_rate": 2.9018378063010504e-06, + "loss": 0.3107, + "step": 3104 + }, + { + "epoch": 1.32, + "learning_rate": 2.8989206534422403e-06, + "loss": 0.4328, + "step": 3108 + }, + { + "epoch": 1.32, + "learning_rate": 2.8960035005834307e-06, + "loss": 0.3674, + "step": 3112 + }, + { + "epoch": 1.32, + "learning_rate": 2.8930863477246215e-06, + "loss": 0.3329, + "step": 3116 + }, + { + "epoch": 1.33, + "learning_rate": 2.890169194865811e-06, + "loss": 0.6409, + "step": 3120 + }, + { + "epoch": 1.33, + "learning_rate": 2.8872520420070018e-06, + "loss": 0.5682, + "step": 3124 + }, + { + "epoch": 1.33, + "learning_rate": 2.8843348891481917e-06, + "loss": 0.5972, + "step": 3128 + }, + { + "epoch": 1.33, + "learning_rate": 2.881417736289382e-06, + "loss": 0.4621, + "step": 3132 + }, + { + "epoch": 1.33, + "learning_rate": 2.878500583430572e-06, + "loss": 0.4448, + "step": 3136 + }, + { + "epoch": 1.34, + "learning_rate": 2.8755834305717624e-06, + "loss": 0.2757, + "step": 3140 + }, + { + "epoch": 1.34, + "learning_rate": 2.8726662777129523e-06, + "loss": 0.5172, + "step": 3144 + }, + { + "epoch": 1.34, + "learning_rate": 2.8697491248541427e-06, + "loss": 0.4493, + "step": 3148 + }, + { + "epoch": 1.34, + "learning_rate": 2.866831971995333e-06, + "loss": 0.3437, + "step": 3152 + }, + { + "epoch": 1.34, + "learning_rate": 2.863914819136523e-06, + "loss": 0.286, + "step": 3156 + }, + { + "epoch": 1.34, + "learning_rate": 2.8609976662777133e-06, + "loss": 0.6001, + "step": 3160 + }, + { + "epoch": 1.35, + "learning_rate": 2.8580805134189033e-06, + "loss": 0.4373, + "step": 3164 + }, + { + "epoch": 1.35, + "learning_rate": 2.8551633605600936e-06, + "loss": 0.4974, + "step": 3168 + }, + { + "epoch": 1.35, + "learning_rate": 2.8522462077012836e-06, + "loss": 0.4817, + "step": 3172 + }, + { + "epoch": 1.35, + "learning_rate": 2.849329054842474e-06, + "loss": 0.4178, + "step": 3176 + }, + { + "epoch": 1.35, + "learning_rate": 2.846411901983664e-06, + "loss": 0.4527, + "step": 3180 + }, + { + "epoch": 1.35, + "learning_rate": 2.8434947491248542e-06, + "loss": 0.3193, + "step": 3184 + }, + { + "epoch": 1.36, + "learning_rate": 2.8405775962660446e-06, + "loss": 0.4259, + "step": 3188 + }, + { + "epoch": 1.36, + "learning_rate": 2.8376604434072345e-06, + "loss": 0.2256, + "step": 3192 + }, + { + "epoch": 1.36, + "learning_rate": 2.8347432905484253e-06, + "loss": 0.3772, + "step": 3196 + }, + { + "epoch": 1.36, + "learning_rate": 2.831826137689615e-06, + "loss": 0.3679, + "step": 3200 + }, + { + "epoch": 1.36, + "learning_rate": 2.8289089848308056e-06, + "loss": 0.2954, + "step": 3204 + }, + { + "epoch": 1.36, + "learning_rate": 2.8259918319719955e-06, + "loss": 0.4222, + "step": 3208 + }, + { + "epoch": 1.37, + "learning_rate": 2.823074679113186e-06, + "loss": 0.4063, + "step": 3212 + }, + { + "epoch": 1.37, + "learning_rate": 2.8201575262543763e-06, + "loss": 0.6235, + "step": 3216 + }, + { + "epoch": 1.37, + "learning_rate": 2.817240373395566e-06, + "loss": 0.2717, + "step": 3220 + }, + { + "epoch": 1.37, + "learning_rate": 2.8143232205367566e-06, + "loss": 0.374, + "step": 3224 + }, + { + "epoch": 1.37, + "learning_rate": 2.8114060676779465e-06, + "loss": 0.3534, + "step": 3228 + }, + { + "epoch": 1.37, + "learning_rate": 2.808488914819137e-06, + "loss": 0.3723, + "step": 3232 + }, + { + "epoch": 1.38, + "learning_rate": 2.805571761960327e-06, + "loss": 0.5241, + "step": 3236 + }, + { + "epoch": 1.38, + "learning_rate": 2.802654609101517e-06, + "loss": 0.5061, + "step": 3240 + }, + { + "epoch": 1.38, + "learning_rate": 2.799737456242707e-06, + "loss": 0.268, + "step": 3244 + }, + { + "epoch": 1.38, + "learning_rate": 2.7968203033838975e-06, + "loss": 0.3903, + "step": 3248 + }, + { + "epoch": 1.38, + "learning_rate": 2.793903150525088e-06, + "loss": 0.4536, + "step": 3252 + }, + { + "epoch": 1.38, + "learning_rate": 2.7909859976662778e-06, + "loss": 0.36, + "step": 3256 + }, + { + "epoch": 1.39, + "learning_rate": 2.788068844807468e-06, + "loss": 0.4255, + "step": 3260 + }, + { + "epoch": 1.39, + "learning_rate": 2.785151691948658e-06, + "loss": 0.5646, + "step": 3264 + }, + { + "epoch": 1.39, + "learning_rate": 2.7822345390898484e-06, + "loss": 0.3652, + "step": 3268 + }, + { + "epoch": 1.39, + "learning_rate": 2.7793173862310384e-06, + "loss": 0.3034, + "step": 3272 + }, + { + "epoch": 1.39, + "learning_rate": 2.776400233372229e-06, + "loss": 0.417, + "step": 3276 + }, + { + "epoch": 1.39, + "learning_rate": 2.7734830805134187e-06, + "loss": 0.5308, + "step": 3280 + }, + { + "epoch": 1.4, + "learning_rate": 2.7705659276546095e-06, + "loss": 0.5367, + "step": 3284 + }, + { + "epoch": 1.4, + "learning_rate": 2.7676487747958e-06, + "loss": 0.2237, + "step": 3288 + }, + { + "epoch": 1.4, + "learning_rate": 2.7647316219369898e-06, + "loss": 0.4497, + "step": 3292 + }, + { + "epoch": 1.4, + "learning_rate": 2.76181446907818e-06, + "loss": 0.351, + "step": 3296 + }, + { + "epoch": 1.4, + "learning_rate": 2.75889731621937e-06, + "loss": 0.6103, + "step": 3300 + }, + { + "epoch": 1.4, + "learning_rate": 2.7559801633605604e-06, + "loss": 0.3624, + "step": 3304 + }, + { + "epoch": 1.41, + "learning_rate": 2.7530630105017504e-06, + "loss": 0.4172, + "step": 3308 + }, + { + "epoch": 1.41, + "learning_rate": 2.7501458576429407e-06, + "loss": 0.5008, + "step": 3312 + }, + { + "epoch": 1.41, + "learning_rate": 2.747228704784131e-06, + "loss": 0.4368, + "step": 3316 + }, + { + "epoch": 1.41, + "learning_rate": 2.744311551925321e-06, + "loss": 0.4305, + "step": 3320 + }, + { + "epoch": 1.41, + "learning_rate": 2.7413943990665114e-06, + "loss": 0.3914, + "step": 3324 + }, + { + "epoch": 1.41, + "learning_rate": 2.7384772462077013e-06, + "loss": 0.3781, + "step": 3328 + }, + { + "epoch": 1.42, + "learning_rate": 2.7355600933488917e-06, + "loss": 0.4922, + "step": 3332 + }, + { + "epoch": 1.42, + "learning_rate": 2.7326429404900816e-06, + "loss": 0.3398, + "step": 3336 + }, + { + "epoch": 1.42, + "learning_rate": 2.729725787631272e-06, + "loss": 0.5107, + "step": 3340 + }, + { + "epoch": 1.42, + "learning_rate": 2.726808634772462e-06, + "loss": 0.5933, + "step": 3344 + }, + { + "epoch": 1.42, + "learning_rate": 2.7238914819136523e-06, + "loss": 0.4659, + "step": 3348 + }, + { + "epoch": 1.43, + "learning_rate": 2.720974329054843e-06, + "loss": 0.2262, + "step": 3352 + }, + { + "epoch": 1.43, + "learning_rate": 2.718057176196033e-06, + "loss": 0.4471, + "step": 3356 + }, + { + "epoch": 1.43, + "learning_rate": 2.7151400233372234e-06, + "loss": 0.7168, + "step": 3360 + }, + { + "epoch": 1.43, + "learning_rate": 2.7122228704784133e-06, + "loss": 0.1911, + "step": 3364 + }, + { + "epoch": 1.43, + "learning_rate": 2.7093057176196037e-06, + "loss": 0.3807, + "step": 3368 + }, + { + "epoch": 1.43, + "learning_rate": 2.7063885647607936e-06, + "loss": 0.3614, + "step": 3372 + }, + { + "epoch": 1.44, + "learning_rate": 2.703471411901984e-06, + "loss": 0.2861, + "step": 3376 + }, + { + "epoch": 1.44, + "learning_rate": 2.700554259043174e-06, + "loss": 0.3193, + "step": 3380 + }, + { + "epoch": 1.44, + "learning_rate": 2.6976371061843643e-06, + "loss": 0.4835, + "step": 3384 + }, + { + "epoch": 1.44, + "learning_rate": 2.6947199533255546e-06, + "loss": 0.4439, + "step": 3388 + }, + { + "epoch": 1.44, + "learning_rate": 2.6918028004667446e-06, + "loss": 0.2924, + "step": 3392 + }, + { + "epoch": 1.44, + "learning_rate": 2.688885647607935e-06, + "loss": 0.5311, + "step": 3396 + }, + { + "epoch": 1.45, + "learning_rate": 2.685968494749125e-06, + "loss": 0.4898, + "step": 3400 + }, + { + "epoch": 1.45, + "learning_rate": 2.6830513418903152e-06, + "loss": 0.2538, + "step": 3404 + }, + { + "epoch": 1.45, + "learning_rate": 2.680134189031505e-06, + "loss": 0.4381, + "step": 3408 + }, + { + "epoch": 1.45, + "learning_rate": 2.6772170361726955e-06, + "loss": 0.4718, + "step": 3412 + }, + { + "epoch": 1.45, + "learning_rate": 2.6742998833138863e-06, + "loss": 0.3369, + "step": 3416 + }, + { + "epoch": 1.45, + "learning_rate": 2.671382730455076e-06, + "loss": 0.3481, + "step": 3420 + }, + { + "epoch": 1.46, + "learning_rate": 2.6684655775962666e-06, + "loss": 0.2547, + "step": 3424 + }, + { + "epoch": 1.46, + "learning_rate": 2.665548424737456e-06, + "loss": 0.4183, + "step": 3428 + }, + { + "epoch": 1.46, + "learning_rate": 2.662631271878647e-06, + "loss": 0.4181, + "step": 3432 + }, + { + "epoch": 1.46, + "learning_rate": 2.659714119019837e-06, + "loss": 0.5512, + "step": 3436 + }, + { + "epoch": 1.46, + "learning_rate": 2.6567969661610272e-06, + "loss": 0.4187, + "step": 3440 + }, + { + "epoch": 1.46, + "learning_rate": 2.653879813302217e-06, + "loss": 0.2411, + "step": 3444 + }, + { + "epoch": 1.47, + "learning_rate": 2.6509626604434075e-06, + "loss": 0.3652, + "step": 3448 + }, + { + "epoch": 1.47, + "learning_rate": 2.648045507584598e-06, + "loss": 0.4122, + "step": 3452 + }, + { + "epoch": 1.47, + "learning_rate": 2.645128354725788e-06, + "loss": 0.2771, + "step": 3456 + }, + { + "epoch": 1.47, + "learning_rate": 2.642211201866978e-06, + "loss": 0.3256, + "step": 3460 + }, + { + "epoch": 1.47, + "learning_rate": 2.639294049008168e-06, + "loss": 0.53, + "step": 3464 + }, + { + "epoch": 1.47, + "learning_rate": 2.6363768961493585e-06, + "loss": 0.2602, + "step": 3468 + }, + { + "epoch": 1.48, + "learning_rate": 2.6334597432905484e-06, + "loss": 0.2461, + "step": 3472 + }, + { + "epoch": 1.48, + "learning_rate": 2.630542590431739e-06, + "loss": 0.3867, + "step": 3476 + }, + { + "epoch": 1.48, + "learning_rate": 2.6276254375729287e-06, + "loss": 0.4217, + "step": 3480 + }, + { + "epoch": 1.48, + "learning_rate": 2.624708284714119e-06, + "loss": 0.4234, + "step": 3484 + }, + { + "epoch": 1.48, + "learning_rate": 2.6217911318553095e-06, + "loss": 0.3664, + "step": 3488 + }, + { + "epoch": 1.48, + "learning_rate": 2.6188739789964994e-06, + "loss": 0.5729, + "step": 3492 + }, + { + "epoch": 1.49, + "learning_rate": 2.6159568261376898e-06, + "loss": 0.5175, + "step": 3496 + }, + { + "epoch": 1.49, + "learning_rate": 2.6130396732788797e-06, + "loss": 0.4228, + "step": 3500 + }, + { + "epoch": 1.49, + "learning_rate": 2.6101225204200705e-06, + "loss": 0.3762, + "step": 3504 + }, + { + "epoch": 1.49, + "learning_rate": 2.60720536756126e-06, + "loss": 0.4211, + "step": 3508 + }, + { + "epoch": 1.49, + "learning_rate": 2.604288214702451e-06, + "loss": 0.3969, + "step": 3512 + }, + { + "epoch": 1.49, + "learning_rate": 2.601371061843641e-06, + "loss": 0.3794, + "step": 3516 + }, + { + "epoch": 1.5, + "learning_rate": 2.598453908984831e-06, + "loss": 0.2235, + "step": 3520 + }, + { + "epoch": 1.5, + "learning_rate": 2.5955367561260215e-06, + "loss": 0.3842, + "step": 3524 + }, + { + "epoch": 1.5, + "learning_rate": 2.5926196032672114e-06, + "loss": 0.3081, + "step": 3528 + }, + { + "epoch": 1.5, + "learning_rate": 2.5897024504084018e-06, + "loss": 0.267, + "step": 3532 + }, + { + "epoch": 1.5, + "learning_rate": 2.5867852975495917e-06, + "loss": 0.2976, + "step": 3536 + }, + { + "epoch": 1.51, + "learning_rate": 2.583868144690782e-06, + "loss": 0.4383, + "step": 3540 + }, + { + "epoch": 1.51, + "learning_rate": 2.580950991831972e-06, + "loss": 0.3175, + "step": 3544 + }, + { + "epoch": 1.51, + "learning_rate": 2.5780338389731624e-06, + "loss": 0.3118, + "step": 3548 + }, + { + "epoch": 1.51, + "learning_rate": 2.5751166861143527e-06, + "loss": 0.4329, + "step": 3552 + }, + { + "epoch": 1.51, + "learning_rate": 2.5721995332555427e-06, + "loss": 0.4936, + "step": 3556 + }, + { + "epoch": 1.51, + "learning_rate": 2.569282380396733e-06, + "loss": 0.4872, + "step": 3560 + }, + { + "epoch": 1.52, + "learning_rate": 2.566365227537923e-06, + "loss": 0.431, + "step": 3564 + }, + { + "epoch": 1.52, + "learning_rate": 2.5634480746791133e-06, + "loss": 0.5265, + "step": 3568 + }, + { + "epoch": 1.52, + "learning_rate": 2.5605309218203033e-06, + "loss": 0.3655, + "step": 3572 + }, + { + "epoch": 1.52, + "learning_rate": 2.5576137689614936e-06, + "loss": 0.342, + "step": 3576 + }, + { + "epoch": 1.52, + "learning_rate": 2.5546966161026836e-06, + "loss": 0.4835, + "step": 3580 + }, + { + "epoch": 1.52, + "learning_rate": 2.5517794632438743e-06, + "loss": 0.2614, + "step": 3584 + }, + { + "epoch": 1.53, + "learning_rate": 2.5488623103850647e-06, + "loss": 0.3411, + "step": 3588 + }, + { + "epoch": 1.53, + "learning_rate": 2.5459451575262546e-06, + "loss": 0.4997, + "step": 3592 + }, + { + "epoch": 1.53, + "learning_rate": 2.543028004667445e-06, + "loss": 0.461, + "step": 3596 + }, + { + "epoch": 1.53, + "learning_rate": 2.540110851808635e-06, + "loss": 0.4112, + "step": 3600 + }, + { + "epoch": 1.53, + "learning_rate": 2.5371936989498253e-06, + "loss": 0.4968, + "step": 3604 + }, + { + "epoch": 1.53, + "learning_rate": 2.5342765460910152e-06, + "loss": 0.3994, + "step": 3608 + }, + { + "epoch": 1.54, + "learning_rate": 2.5313593932322056e-06, + "loss": 0.4559, + "step": 3612 + }, + { + "epoch": 1.54, + "learning_rate": 2.528442240373396e-06, + "loss": 0.3394, + "step": 3616 + }, + { + "epoch": 1.54, + "learning_rate": 2.525525087514586e-06, + "loss": 0.3952, + "step": 3620 + }, + { + "epoch": 1.54, + "learning_rate": 2.5226079346557763e-06, + "loss": 0.318, + "step": 3624 + }, + { + "epoch": 1.54, + "learning_rate": 2.5196907817969662e-06, + "loss": 0.3204, + "step": 3628 + }, + { + "epoch": 1.54, + "learning_rate": 2.5167736289381566e-06, + "loss": 0.3047, + "step": 3632 + }, + { + "epoch": 1.55, + "learning_rate": 2.5138564760793465e-06, + "loss": 0.3223, + "step": 3636 + }, + { + "epoch": 1.55, + "learning_rate": 2.510939323220537e-06, + "loss": 0.2073, + "step": 3640 + }, + { + "epoch": 1.55, + "learning_rate": 2.508022170361727e-06, + "loss": 0.3696, + "step": 3644 + }, + { + "epoch": 1.55, + "learning_rate": 2.505105017502917e-06, + "loss": 0.4404, + "step": 3648 + }, + { + "epoch": 1.55, + "learning_rate": 2.502187864644108e-06, + "loss": 0.3907, + "step": 3652 + }, + { + "epoch": 1.55, + "learning_rate": 2.4992707117852975e-06, + "loss": 0.4624, + "step": 3656 + }, + { + "epoch": 1.56, + "learning_rate": 2.496353558926488e-06, + "loss": 0.4258, + "step": 3660 + }, + { + "epoch": 1.56, + "learning_rate": 2.493436406067678e-06, + "loss": 0.421, + "step": 3664 + }, + { + "epoch": 1.56, + "learning_rate": 2.4905192532088686e-06, + "loss": 0.3276, + "step": 3668 + }, + { + "epoch": 1.56, + "learning_rate": 2.4876021003500585e-06, + "loss": 0.4259, + "step": 3672 + }, + { + "epoch": 1.56, + "learning_rate": 2.484684947491249e-06, + "loss": 0.3363, + "step": 3676 + }, + { + "epoch": 1.56, + "learning_rate": 2.481767794632439e-06, + "loss": 0.3734, + "step": 3680 + }, + { + "epoch": 1.57, + "learning_rate": 2.478850641773629e-06, + "loss": 0.4299, + "step": 3684 + }, + { + "epoch": 1.57, + "learning_rate": 2.475933488914819e-06, + "loss": 0.3697, + "step": 3688 + }, + { + "epoch": 1.57, + "learning_rate": 2.4730163360560095e-06, + "loss": 0.3721, + "step": 3692 + }, + { + "epoch": 1.57, + "learning_rate": 2.4700991831971994e-06, + "loss": 0.4954, + "step": 3696 + }, + { + "epoch": 1.57, + "learning_rate": 2.46718203033839e-06, + "loss": 0.2761, + "step": 3700 + }, + { + "epoch": 1.57, + "learning_rate": 2.46426487747958e-06, + "loss": 0.3485, + "step": 3704 + }, + { + "epoch": 1.58, + "learning_rate": 2.4613477246207705e-06, + "loss": 0.3959, + "step": 3708 + }, + { + "epoch": 1.58, + "learning_rate": 2.4584305717619604e-06, + "loss": 0.421, + "step": 3712 + }, + { + "epoch": 1.58, + "learning_rate": 2.455513418903151e-06, + "loss": 0.3158, + "step": 3716 + }, + { + "epoch": 1.58, + "learning_rate": 2.4525962660443407e-06, + "loss": 0.3167, + "step": 3720 + }, + { + "epoch": 1.58, + "learning_rate": 2.449679113185531e-06, + "loss": 0.4065, + "step": 3724 + }, + { + "epoch": 1.59, + "learning_rate": 2.446761960326721e-06, + "loss": 0.406, + "step": 3728 + }, + { + "epoch": 1.59, + "learning_rate": 2.443844807467912e-06, + "loss": 0.4013, + "step": 3732 + }, + { + "epoch": 1.59, + "learning_rate": 2.4409276546091018e-06, + "loss": 0.5664, + "step": 3736 + }, + { + "epoch": 1.59, + "learning_rate": 2.438010501750292e-06, + "loss": 0.3301, + "step": 3740 + }, + { + "epoch": 1.59, + "learning_rate": 2.435093348891482e-06, + "loss": 0.3695, + "step": 3744 + }, + { + "epoch": 1.59, + "learning_rate": 2.4321761960326724e-06, + "loss": 0.3426, + "step": 3748 + }, + { + "epoch": 1.6, + "learning_rate": 2.4292590431738624e-06, + "loss": 0.3434, + "step": 3752 + }, + { + "epoch": 1.6, + "learning_rate": 2.4263418903150527e-06, + "loss": 0.3886, + "step": 3756 + }, + { + "epoch": 1.6, + "learning_rate": 2.4234247374562427e-06, + "loss": 0.4117, + "step": 3760 + }, + { + "epoch": 1.6, + "learning_rate": 2.4205075845974334e-06, + "loss": 0.4477, + "step": 3764 + }, + { + "epoch": 1.6, + "learning_rate": 2.4175904317386234e-06, + "loss": 0.4769, + "step": 3768 + }, + { + "epoch": 1.6, + "learning_rate": 2.4146732788798137e-06, + "loss": 0.4984, + "step": 3772 + }, + { + "epoch": 1.61, + "learning_rate": 2.4117561260210037e-06, + "loss": 0.3964, + "step": 3776 + }, + { + "epoch": 1.61, + "learning_rate": 2.408838973162194e-06, + "loss": 0.4827, + "step": 3780 + }, + { + "epoch": 1.61, + "learning_rate": 2.405921820303384e-06, + "loss": 0.3075, + "step": 3784 + }, + { + "epoch": 1.61, + "learning_rate": 2.4030046674445743e-06, + "loss": 0.2245, + "step": 3788 + }, + { + "epoch": 1.61, + "learning_rate": 2.4000875145857643e-06, + "loss": 0.2683, + "step": 3792 + }, + { + "epoch": 1.61, + "learning_rate": 2.3971703617269546e-06, + "loss": 0.4515, + "step": 3796 + }, + { + "epoch": 1.62, + "learning_rate": 2.394253208868145e-06, + "loss": 0.3369, + "step": 3800 + }, + { + "epoch": 1.62, + "learning_rate": 2.391336056009335e-06, + "loss": 0.2854, + "step": 3804 + }, + { + "epoch": 1.62, + "learning_rate": 2.3884189031505253e-06, + "loss": 0.2712, + "step": 3808 + }, + { + "epoch": 1.62, + "learning_rate": 2.3855017502917157e-06, + "loss": 0.3827, + "step": 3812 + }, + { + "epoch": 1.62, + "learning_rate": 2.3825845974329056e-06, + "loss": 0.2348, + "step": 3816 + }, + { + "epoch": 1.62, + "learning_rate": 2.379667444574096e-06, + "loss": 0.2503, + "step": 3820 + }, + { + "epoch": 1.63, + "learning_rate": 2.376750291715286e-06, + "loss": 0.2814, + "step": 3824 + }, + { + "epoch": 1.63, + "learning_rate": 2.3738331388564763e-06, + "loss": 0.4045, + "step": 3828 + }, + { + "epoch": 1.63, + "learning_rate": 2.3709159859976666e-06, + "loss": 0.5534, + "step": 3832 + }, + { + "epoch": 1.63, + "learning_rate": 2.3679988331388566e-06, + "loss": 0.4016, + "step": 3836 + }, + { + "epoch": 1.63, + "learning_rate": 2.365081680280047e-06, + "loss": 0.4375, + "step": 3840 + }, + { + "epoch": 1.63, + "learning_rate": 2.362164527421237e-06, + "loss": 0.3761, + "step": 3844 + }, + { + "epoch": 1.64, + "learning_rate": 2.3592473745624272e-06, + "loss": 0.3525, + "step": 3848 + }, + { + "epoch": 1.64, + "learning_rate": 2.3563302217036176e-06, + "loss": 0.3385, + "step": 3852 + }, + { + "epoch": 1.64, + "learning_rate": 2.3534130688448075e-06, + "loss": 0.393, + "step": 3856 + }, + { + "epoch": 1.64, + "learning_rate": 2.350495915985998e-06, + "loss": 0.4507, + "step": 3860 + }, + { + "epoch": 1.64, + "learning_rate": 2.3475787631271883e-06, + "loss": 0.2481, + "step": 3864 + }, + { + "epoch": 1.64, + "learning_rate": 2.344661610268378e-06, + "loss": 0.2887, + "step": 3868 + }, + { + "epoch": 1.65, + "learning_rate": 2.3417444574095686e-06, + "loss": 0.3081, + "step": 3872 + }, + { + "epoch": 1.65, + "learning_rate": 2.3388273045507585e-06, + "loss": 0.3454, + "step": 3876 + }, + { + "epoch": 1.65, + "learning_rate": 2.335910151691949e-06, + "loss": 0.4006, + "step": 3880 + }, + { + "epoch": 1.65, + "learning_rate": 2.332992998833139e-06, + "loss": 0.3328, + "step": 3884 + }, + { + "epoch": 1.65, + "learning_rate": 2.330075845974329e-06, + "loss": 0.3802, + "step": 3888 + }, + { + "epoch": 1.65, + "learning_rate": 2.3271586931155195e-06, + "loss": 0.538, + "step": 3892 + }, + { + "epoch": 1.66, + "learning_rate": 2.3242415402567095e-06, + "loss": 0.4035, + "step": 3896 + }, + { + "epoch": 1.66, + "learning_rate": 2.3213243873979e-06, + "loss": 0.3538, + "step": 3900 + }, + { + "epoch": 1.66, + "learning_rate": 2.31840723453909e-06, + "loss": 0.2945, + "step": 3904 + }, + { + "epoch": 1.66, + "learning_rate": 2.31549008168028e-06, + "loss": 0.3023, + "step": 3908 + }, + { + "epoch": 1.66, + "learning_rate": 2.3125729288214705e-06, + "loss": 0.4806, + "step": 3912 + }, + { + "epoch": 1.66, + "learning_rate": 2.3096557759626604e-06, + "loss": 0.4009, + "step": 3916 + }, + { + "epoch": 1.67, + "learning_rate": 2.306738623103851e-06, + "loss": 0.4475, + "step": 3920 + }, + { + "epoch": 1.67, + "learning_rate": 2.3038214702450407e-06, + "loss": 0.2655, + "step": 3924 + }, + { + "epoch": 1.67, + "learning_rate": 2.300904317386231e-06, + "loss": 0.3898, + "step": 3928 + }, + { + "epoch": 1.67, + "learning_rate": 2.2979871645274215e-06, + "loss": 0.2847, + "step": 3932 + }, + { + "epoch": 1.67, + "learning_rate": 2.295070011668612e-06, + "loss": 0.5787, + "step": 3936 + }, + { + "epoch": 1.68, + "learning_rate": 2.2921528588098018e-06, + "loss": 0.4133, + "step": 3940 + }, + { + "epoch": 1.68, + "learning_rate": 2.289235705950992e-06, + "loss": 0.2984, + "step": 3944 + }, + { + "epoch": 1.68, + "learning_rate": 2.286318553092182e-06, + "loss": 0.3097, + "step": 3948 + }, + { + "epoch": 1.68, + "learning_rate": 2.2834014002333724e-06, + "loss": 0.3854, + "step": 3952 + }, + { + "epoch": 1.68, + "learning_rate": 2.2804842473745624e-06, + "loss": 0.5068, + "step": 3956 + }, + { + "epoch": 1.68, + "learning_rate": 2.2775670945157527e-06, + "loss": 0.3831, + "step": 3960 + }, + { + "epoch": 1.69, + "learning_rate": 2.274649941656943e-06, + "loss": 0.2283, + "step": 3964 + }, + { + "epoch": 1.69, + "learning_rate": 2.2717327887981334e-06, + "loss": 0.3432, + "step": 3968 + }, + { + "epoch": 1.69, + "learning_rate": 2.2688156359393234e-06, + "loss": 0.4152, + "step": 3972 + }, + { + "epoch": 1.69, + "learning_rate": 2.2658984830805137e-06, + "loss": 0.2857, + "step": 3976 + }, + { + "epoch": 1.69, + "learning_rate": 2.2629813302217037e-06, + "loss": 0.39, + "step": 3980 + }, + { + "epoch": 1.69, + "learning_rate": 2.260064177362894e-06, + "loss": 0.3972, + "step": 3984 + }, + { + "epoch": 1.7, + "learning_rate": 2.257147024504084e-06, + "loss": 0.3207, + "step": 3988 + }, + { + "epoch": 1.7, + "learning_rate": 2.2542298716452743e-06, + "loss": 0.4362, + "step": 3992 + }, + { + "epoch": 1.7, + "learning_rate": 2.2513127187864643e-06, + "loss": 0.3839, + "step": 3996 + }, + { + "epoch": 1.7, + "learning_rate": 2.248395565927655e-06, + "loss": 0.211, + "step": 4000 + }, + { + "epoch": 1.7, + "learning_rate": 2.245478413068845e-06, + "loss": 0.4071, + "step": 4004 + }, + { + "epoch": 1.7, + "learning_rate": 2.2425612602100354e-06, + "loss": 0.2785, + "step": 4008 + }, + { + "epoch": 1.71, + "learning_rate": 2.2396441073512253e-06, + "loss": 0.4274, + "step": 4012 + }, + { + "epoch": 1.71, + "learning_rate": 2.2367269544924157e-06, + "loss": 0.3813, + "step": 4016 + }, + { + "epoch": 1.71, + "learning_rate": 2.2338098016336056e-06, + "loss": 0.3138, + "step": 4020 + }, + { + "epoch": 1.71, + "learning_rate": 2.230892648774796e-06, + "loss": 0.3181, + "step": 4024 + }, + { + "epoch": 1.71, + "learning_rate": 2.227975495915986e-06, + "loss": 0.4108, + "step": 4028 + }, + { + "epoch": 1.71, + "learning_rate": 2.2250583430571767e-06, + "loss": 0.3285, + "step": 4032 + }, + { + "epoch": 1.72, + "learning_rate": 2.2221411901983666e-06, + "loss": 0.2244, + "step": 4036 + }, + { + "epoch": 1.72, + "learning_rate": 2.219224037339557e-06, + "loss": 0.4148, + "step": 4040 + }, + { + "epoch": 1.72, + "learning_rate": 2.216306884480747e-06, + "loss": 0.418, + "step": 4044 + }, + { + "epoch": 1.72, + "learning_rate": 2.2133897316219373e-06, + "loss": 0.4441, + "step": 4048 + }, + { + "epoch": 1.72, + "learning_rate": 2.2104725787631272e-06, + "loss": 0.358, + "step": 4052 + }, + { + "epoch": 1.72, + "learning_rate": 2.2075554259043176e-06, + "loss": 0.2615, + "step": 4056 + }, + { + "epoch": 1.73, + "learning_rate": 2.2046382730455075e-06, + "loss": 0.3992, + "step": 4060 + }, + { + "epoch": 1.73, + "learning_rate": 2.201721120186698e-06, + "loss": 0.4608, + "step": 4064 + }, + { + "epoch": 1.73, + "learning_rate": 2.1988039673278883e-06, + "loss": 0.2661, + "step": 4068 + }, + { + "epoch": 1.73, + "learning_rate": 2.1958868144690786e-06, + "loss": 0.4447, + "step": 4072 + }, + { + "epoch": 1.73, + "learning_rate": 2.1929696616102686e-06, + "loss": 0.3915, + "step": 4076 + }, + { + "epoch": 1.73, + "learning_rate": 2.190052508751459e-06, + "loss": 0.3283, + "step": 4080 + }, + { + "epoch": 1.74, + "learning_rate": 2.187135355892649e-06, + "loss": 0.3887, + "step": 4084 + }, + { + "epoch": 1.74, + "learning_rate": 2.1842182030338392e-06, + "loss": 0.3772, + "step": 4088 + }, + { + "epoch": 1.74, + "learning_rate": 2.181301050175029e-06, + "loss": 0.5242, + "step": 4092 + }, + { + "epoch": 1.74, + "learning_rate": 2.1783838973162195e-06, + "loss": 0.2624, + "step": 4096 + }, + { + "epoch": 1.74, + "learning_rate": 2.17546674445741e-06, + "loss": 0.4775, + "step": 4100 + }, + { + "epoch": 1.74, + "learning_rate": 2.1725495915986e-06, + "loss": 0.4693, + "step": 4104 + }, + { + "epoch": 1.75, + "learning_rate": 2.16963243873979e-06, + "loss": 0.2954, + "step": 4108 + }, + { + "epoch": 1.75, + "learning_rate": 2.1667152858809806e-06, + "loss": 0.386, + "step": 4112 + }, + { + "epoch": 1.75, + "learning_rate": 2.1637981330221705e-06, + "loss": 0.2375, + "step": 4116 + }, + { + "epoch": 1.75, + "learning_rate": 2.160880980163361e-06, + "loss": 0.456, + "step": 4120 + }, + { + "epoch": 1.75, + "learning_rate": 2.157963827304551e-06, + "loss": 0.5585, + "step": 4124 + }, + { + "epoch": 1.76, + "learning_rate": 2.155046674445741e-06, + "loss": 0.2531, + "step": 4128 + }, + { + "epoch": 1.76, + "learning_rate": 2.1521295215869315e-06, + "loss": 0.3648, + "step": 4132 + }, + { + "epoch": 1.76, + "learning_rate": 2.1492123687281215e-06, + "loss": 0.4723, + "step": 4136 + }, + { + "epoch": 1.76, + "learning_rate": 2.146295215869312e-06, + "loss": 0.2357, + "step": 4140 + }, + { + "epoch": 1.76, + "learning_rate": 2.1433780630105018e-06, + "loss": 0.4827, + "step": 4144 + }, + { + "epoch": 1.76, + "learning_rate": 2.140460910151692e-06, + "loss": 0.3387, + "step": 4148 + }, + { + "epoch": 1.77, + "learning_rate": 2.137543757292882e-06, + "loss": 0.3946, + "step": 4152 + }, + { + "epoch": 1.77, + "learning_rate": 2.1346266044340724e-06, + "loss": 0.1965, + "step": 4156 + }, + { + "epoch": 1.77, + "learning_rate": 2.1317094515752628e-06, + "loss": 0.3286, + "step": 4160 + }, + { + "epoch": 1.77, + "learning_rate": 2.128792298716453e-06, + "loss": 0.3196, + "step": 4164 + }, + { + "epoch": 1.77, + "learning_rate": 2.125875145857643e-06, + "loss": 0.2477, + "step": 4168 + }, + { + "epoch": 1.77, + "learning_rate": 2.1229579929988334e-06, + "loss": 0.3666, + "step": 4172 + }, + { + "epoch": 1.78, + "learning_rate": 2.1200408401400234e-06, + "loss": 0.5021, + "step": 4176 + }, + { + "epoch": 1.78, + "learning_rate": 2.117852975495916e-06, + "loss": 0.5295, + "step": 4180 + }, + { + "epoch": 1.78, + "learning_rate": 2.1149358226371064e-06, + "loss": 0.4222, + "step": 4184 + }, + { + "epoch": 1.78, + "learning_rate": 2.1120186697782964e-06, + "loss": 0.4243, + "step": 4188 + }, + { + "epoch": 1.78, + "learning_rate": 2.1091015169194867e-06, + "loss": 0.5245, + "step": 4192 + }, + { + "epoch": 1.78, + "learning_rate": 2.106184364060677e-06, + "loss": 0.2765, + "step": 4196 + }, + { + "epoch": 1.79, + "learning_rate": 2.103267211201867e-06, + "loss": 0.4902, + "step": 4200 + }, + { + "epoch": 1.79, + "learning_rate": 2.1003500583430574e-06, + "loss": 0.2659, + "step": 4204 + }, + { + "epoch": 1.79, + "learning_rate": 2.0974329054842477e-06, + "loss": 0.512, + "step": 4208 + }, + { + "epoch": 1.79, + "learning_rate": 2.0945157526254377e-06, + "loss": 0.369, + "step": 4212 + }, + { + "epoch": 1.79, + "learning_rate": 2.091598599766628e-06, + "loss": 0.4157, + "step": 4216 + }, + { + "epoch": 1.79, + "learning_rate": 2.088681446907818e-06, + "loss": 0.2203, + "step": 4220 + }, + { + "epoch": 1.8, + "learning_rate": 2.0857642940490083e-06, + "loss": 0.2195, + "step": 4224 + }, + { + "epoch": 1.8, + "learning_rate": 2.0828471411901983e-06, + "loss": 0.4235, + "step": 4228 + }, + { + "epoch": 1.8, + "learning_rate": 2.0799299883313886e-06, + "loss": 0.2623, + "step": 4232 + }, + { + "epoch": 1.8, + "learning_rate": 2.077012835472579e-06, + "loss": 0.4724, + "step": 4236 + }, + { + "epoch": 1.8, + "learning_rate": 2.0740956826137694e-06, + "loss": 0.5003, + "step": 4240 + }, + { + "epoch": 1.8, + "learning_rate": 2.0711785297549593e-06, + "loss": 0.3844, + "step": 4244 + }, + { + "epoch": 1.81, + "learning_rate": 2.0682613768961497e-06, + "loss": 0.402, + "step": 4248 + }, + { + "epoch": 1.81, + "learning_rate": 2.0653442240373396e-06, + "loss": 0.2482, + "step": 4252 + }, + { + "epoch": 1.81, + "learning_rate": 2.06242707117853e-06, + "loss": 0.3593, + "step": 4256 + }, + { + "epoch": 1.81, + "learning_rate": 2.05950991831972e-06, + "loss": 0.2561, + "step": 4260 + }, + { + "epoch": 1.81, + "learning_rate": 2.0565927654609103e-06, + "loss": 0.4176, + "step": 4264 + }, + { + "epoch": 1.81, + "learning_rate": 2.0536756126021e-06, + "loss": 0.2596, + "step": 4268 + }, + { + "epoch": 1.82, + "learning_rate": 2.050758459743291e-06, + "loss": 0.3554, + "step": 4272 + }, + { + "epoch": 1.82, + "learning_rate": 2.047841306884481e-06, + "loss": 0.3388, + "step": 4276 + }, + { + "epoch": 1.82, + "learning_rate": 2.0449241540256713e-06, + "loss": 0.4103, + "step": 4280 + }, + { + "epoch": 1.82, + "learning_rate": 2.0420070011668612e-06, + "loss": 0.3023, + "step": 4284 + }, + { + "epoch": 1.82, + "learning_rate": 2.0390898483080516e-06, + "loss": 0.4772, + "step": 4288 + }, + { + "epoch": 1.82, + "learning_rate": 2.0361726954492415e-06, + "loss": 0.2974, + "step": 4292 + }, + { + "epoch": 1.83, + "learning_rate": 2.033255542590432e-06, + "loss": 0.4114, + "step": 4296 + }, + { + "epoch": 1.83, + "learning_rate": 2.030338389731622e-06, + "loss": 0.2369, + "step": 4300 + }, + { + "epoch": 1.83, + "learning_rate": 2.027421236872812e-06, + "loss": 0.3393, + "step": 4304 + }, + { + "epoch": 1.83, + "learning_rate": 2.0245040840140026e-06, + "loss": 0.2298, + "step": 4308 + }, + { + "epoch": 1.83, + "learning_rate": 2.021586931155193e-06, + "loss": 0.3373, + "step": 4312 + }, + { + "epoch": 1.84, + "learning_rate": 2.018669778296383e-06, + "loss": 0.3498, + "step": 4316 + }, + { + "epoch": 1.84, + "learning_rate": 2.0157526254375732e-06, + "loss": 0.4742, + "step": 4320 + }, + { + "epoch": 1.84, + "learning_rate": 2.012835472578763e-06, + "loss": 0.3716, + "step": 4324 + }, + { + "epoch": 1.84, + "learning_rate": 2.0099183197199535e-06, + "loss": 0.4141, + "step": 4328 + }, + { + "epoch": 1.84, + "learning_rate": 2.0070011668611435e-06, + "loss": 0.3451, + "step": 4332 + }, + { + "epoch": 1.84, + "learning_rate": 2.004084014002334e-06, + "loss": 0.3455, + "step": 4336 + }, + { + "epoch": 1.85, + "learning_rate": 2.001166861143524e-06, + "loss": 0.4263, + "step": 4340 + }, + { + "epoch": 1.85, + "learning_rate": 1.9982497082847146e-06, + "loss": 0.3155, + "step": 4344 + }, + { + "epoch": 1.85, + "learning_rate": 1.9953325554259045e-06, + "loss": 0.254, + "step": 4348 + }, + { + "epoch": 1.85, + "learning_rate": 1.992415402567095e-06, + "loss": 0.2274, + "step": 4352 + }, + { + "epoch": 1.85, + "learning_rate": 1.989498249708285e-06, + "loss": 0.2758, + "step": 4356 + }, + { + "epoch": 1.85, + "learning_rate": 1.986581096849475e-06, + "loss": 0.2397, + "step": 4360 + }, + { + "epoch": 1.86, + "learning_rate": 1.983663943990665e-06, + "loss": 0.2506, + "step": 4364 + }, + { + "epoch": 1.86, + "learning_rate": 1.9807467911318555e-06, + "loss": 0.4136, + "step": 4368 + }, + { + "epoch": 1.86, + "learning_rate": 1.977829638273046e-06, + "loss": 0.3309, + "step": 4372 + }, + { + "epoch": 1.86, + "learning_rate": 1.974912485414236e-06, + "loss": 0.2924, + "step": 4376 + }, + { + "epoch": 1.86, + "learning_rate": 1.971995332555426e-06, + "loss": 0.1428, + "step": 4380 + }, + { + "epoch": 1.86, + "learning_rate": 1.9690781796966165e-06, + "loss": 0.4771, + "step": 4384 + }, + { + "epoch": 1.87, + "learning_rate": 1.9661610268378064e-06, + "loss": 0.4117, + "step": 4388 + }, + { + "epoch": 1.87, + "learning_rate": 1.9632438739789968e-06, + "loss": 0.1177, + "step": 4392 + }, + { + "epoch": 1.87, + "learning_rate": 1.9603267211201867e-06, + "loss": 0.538, + "step": 4396 + }, + { + "epoch": 1.87, + "learning_rate": 1.957409568261377e-06, + "loss": 0.2241, + "step": 4400 + }, + { + "epoch": 1.87, + "learning_rate": 1.954492415402567e-06, + "loss": 0.4846, + "step": 4404 + }, + { + "epoch": 1.87, + "learning_rate": 1.9515752625437574e-06, + "loss": 0.3649, + "step": 4408 + }, + { + "epoch": 1.88, + "learning_rate": 1.9486581096849477e-06, + "loss": 0.4989, + "step": 4412 + }, + { + "epoch": 1.88, + "learning_rate": 1.945740956826138e-06, + "loss": 0.3074, + "step": 4416 + }, + { + "epoch": 1.88, + "learning_rate": 1.942823803967328e-06, + "loss": 0.3055, + "step": 4420 + }, + { + "epoch": 1.88, + "learning_rate": 1.9399066511085184e-06, + "loss": 0.3966, + "step": 4424 + }, + { + "epoch": 1.88, + "learning_rate": 1.9369894982497083e-06, + "loss": 0.3061, + "step": 4428 + }, + { + "epoch": 1.88, + "learning_rate": 1.9340723453908987e-06, + "loss": 0.3059, + "step": 4432 + }, + { + "epoch": 1.89, + "learning_rate": 1.9311551925320886e-06, + "loss": 0.4346, + "step": 4436 + }, + { + "epoch": 1.89, + "learning_rate": 1.928238039673279e-06, + "loss": 0.4984, + "step": 4440 + }, + { + "epoch": 1.89, + "learning_rate": 1.9253208868144694e-06, + "loss": 0.2097, + "step": 4444 + }, + { + "epoch": 1.89, + "learning_rate": 1.9224037339556593e-06, + "loss": 0.4416, + "step": 4448 + }, + { + "epoch": 1.89, + "learning_rate": 1.9194865810968497e-06, + "loss": 0.3917, + "step": 4452 + }, + { + "epoch": 1.89, + "learning_rate": 1.9165694282380396e-06, + "loss": 0.2051, + "step": 4456 + }, + { + "epoch": 1.9, + "learning_rate": 1.91365227537923e-06, + "loss": 0.385, + "step": 4460 + }, + { + "epoch": 1.9, + "learning_rate": 1.9107351225204203e-06, + "loss": 0.5121, + "step": 4464 + }, + { + "epoch": 1.9, + "learning_rate": 1.9078179696616103e-06, + "loss": 0.3747, + "step": 4468 + }, + { + "epoch": 1.9, + "learning_rate": 1.9049008168028008e-06, + "loss": 0.2688, + "step": 4472 + }, + { + "epoch": 1.9, + "learning_rate": 1.901983663943991e-06, + "loss": 0.2459, + "step": 4476 + }, + { + "epoch": 1.9, + "learning_rate": 1.8990665110851811e-06, + "loss": 0.2148, + "step": 4480 + }, + { + "epoch": 1.91, + "learning_rate": 1.8961493582263713e-06, + "loss": 0.2333, + "step": 4484 + }, + { + "epoch": 1.91, + "learning_rate": 1.8932322053675614e-06, + "loss": 0.266, + "step": 4488 + }, + { + "epoch": 1.91, + "learning_rate": 1.8903150525087516e-06, + "loss": 0.3613, + "step": 4492 + }, + { + "epoch": 1.91, + "learning_rate": 1.8873978996499417e-06, + "loss": 0.1306, + "step": 4496 + }, + { + "epoch": 1.91, + "learning_rate": 1.884480746791132e-06, + "loss": 0.2521, + "step": 4500 + }, + { + "epoch": 1.91, + "learning_rate": 1.881563593932322e-06, + "loss": 0.2997, + "step": 4504 + }, + { + "epoch": 1.92, + "learning_rate": 1.8786464410735124e-06, + "loss": 0.4594, + "step": 4508 + }, + { + "epoch": 1.92, + "learning_rate": 1.8757292882147028e-06, + "loss": 0.2661, + "step": 4512 + }, + { + "epoch": 1.92, + "learning_rate": 1.872812135355893e-06, + "loss": 0.2776, + "step": 4516 + }, + { + "epoch": 1.92, + "learning_rate": 1.869894982497083e-06, + "loss": 0.4221, + "step": 4520 + }, + { + "epoch": 1.92, + "learning_rate": 1.8669778296382732e-06, + "loss": 0.234, + "step": 4524 + }, + { + "epoch": 1.93, + "learning_rate": 1.8640606767794634e-06, + "loss": 0.4304, + "step": 4528 + }, + { + "epoch": 1.93, + "learning_rate": 1.8611435239206535e-06, + "loss": 0.476, + "step": 4532 + }, + { + "epoch": 1.93, + "learning_rate": 1.8582263710618437e-06, + "loss": 0.2214, + "step": 4536 + }, + { + "epoch": 1.93, + "learning_rate": 1.855309218203034e-06, + "loss": 0.2805, + "step": 4540 + }, + { + "epoch": 1.93, + "learning_rate": 1.8523920653442242e-06, + "loss": 0.1151, + "step": 4544 + }, + { + "epoch": 1.93, + "learning_rate": 1.8494749124854143e-06, + "loss": 0.2069, + "step": 4548 + }, + { + "epoch": 1.94, + "learning_rate": 1.8465577596266047e-06, + "loss": 0.4162, + "step": 4552 + }, + { + "epoch": 1.94, + "learning_rate": 1.8436406067677949e-06, + "loss": 0.3101, + "step": 4556 + }, + { + "epoch": 1.94, + "learning_rate": 1.840723453908985e-06, + "loss": 0.272, + "step": 4560 + }, + { + "epoch": 1.94, + "learning_rate": 1.8378063010501752e-06, + "loss": 0.4017, + "step": 4564 + }, + { + "epoch": 1.94, + "learning_rate": 1.8348891481913653e-06, + "loss": 0.3501, + "step": 4568 + }, + { + "epoch": 1.94, + "learning_rate": 1.8319719953325557e-06, + "loss": 0.2287, + "step": 4572 + }, + { + "epoch": 1.95, + "learning_rate": 1.8290548424737458e-06, + "loss": 0.4951, + "step": 4576 + }, + { + "epoch": 1.95, + "learning_rate": 1.826137689614936e-06, + "loss": 0.5831, + "step": 4580 + }, + { + "epoch": 1.95, + "learning_rate": 1.8232205367561261e-06, + "loss": 0.3221, + "step": 4584 + }, + { + "epoch": 1.95, + "learning_rate": 1.8203033838973163e-06, + "loss": 0.3682, + "step": 4588 + }, + { + "epoch": 1.95, + "learning_rate": 1.8173862310385066e-06, + "loss": 0.299, + "step": 4592 + }, + { + "epoch": 1.95, + "learning_rate": 1.8144690781796968e-06, + "loss": 0.1729, + "step": 4596 + }, + { + "epoch": 1.96, + "learning_rate": 1.811551925320887e-06, + "loss": 0.214, + "step": 4600 + }, + { + "epoch": 1.96, + "learning_rate": 1.808634772462077e-06, + "loss": 0.3696, + "step": 4604 + }, + { + "epoch": 1.96, + "learning_rate": 1.8057176196032674e-06, + "loss": 0.4532, + "step": 4608 + }, + { + "epoch": 1.96, + "learning_rate": 1.8028004667444576e-06, + "loss": 0.241, + "step": 4612 + }, + { + "epoch": 1.96, + "learning_rate": 1.7998833138856477e-06, + "loss": 0.3685, + "step": 4616 + }, + { + "epoch": 1.96, + "learning_rate": 1.796966161026838e-06, + "loss": 0.3708, + "step": 4620 + }, + { + "epoch": 1.97, + "learning_rate": 1.794049008168028e-06, + "loss": 0.3228, + "step": 4624 + }, + { + "epoch": 1.97, + "learning_rate": 1.7911318553092182e-06, + "loss": 0.2311, + "step": 4628 + }, + { + "epoch": 1.97, + "learning_rate": 1.7882147024504086e-06, + "loss": 0.3598, + "step": 4632 + }, + { + "epoch": 1.97, + "learning_rate": 1.7852975495915987e-06, + "loss": 0.4134, + "step": 4636 + }, + { + "epoch": 1.97, + "learning_rate": 1.782380396732789e-06, + "loss": 0.2711, + "step": 4640 + }, + { + "epoch": 1.97, + "learning_rate": 1.7794632438739792e-06, + "loss": 0.503, + "step": 4644 + }, + { + "epoch": 1.98, + "learning_rate": 1.7765460910151694e-06, + "loss": 0.2192, + "step": 4648 + }, + { + "epoch": 1.98, + "learning_rate": 1.7736289381563595e-06, + "loss": 0.1547, + "step": 4652 + }, + { + "epoch": 1.98, + "learning_rate": 1.7707117852975497e-06, + "loss": 0.3002, + "step": 4656 + }, + { + "epoch": 1.98, + "learning_rate": 1.7677946324387398e-06, + "loss": 0.3846, + "step": 4660 + }, + { + "epoch": 1.98, + "learning_rate": 1.76487747957993e-06, + "loss": 0.4236, + "step": 4664 + }, + { + "epoch": 1.98, + "learning_rate": 1.7619603267211201e-06, + "loss": 0.3245, + "step": 4668 + }, + { + "epoch": 1.99, + "learning_rate": 1.7590431738623107e-06, + "loss": 0.3547, + "step": 4672 + }, + { + "epoch": 1.99, + "learning_rate": 1.7561260210035008e-06, + "loss": 0.2355, + "step": 4676 + }, + { + "epoch": 1.99, + "learning_rate": 1.753208868144691e-06, + "loss": 0.417, + "step": 4680 + }, + { + "epoch": 1.99, + "learning_rate": 1.7502917152858811e-06, + "loss": 0.4419, + "step": 4684 + }, + { + "epoch": 1.99, + "learning_rate": 1.7473745624270713e-06, + "loss": 0.4115, + "step": 4688 + }, + { + "epoch": 1.99, + "learning_rate": 1.7444574095682615e-06, + "loss": 0.2582, + "step": 4692 + }, + { + "epoch": 2.0, + "learning_rate": 1.7415402567094516e-06, + "loss": 0.3695, + "step": 4696 + }, + { + "epoch": 2.0, + "learning_rate": 1.7386231038506418e-06, + "loss": 0.3948, + "step": 4700 + } + ], + "logging_steps": 4, + "max_steps": 7056, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "total_flos": 53908118568960.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}