|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.998299319727891, |
|
"eval_steps": 800, |
|
"global_step": 4700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0, |
|
"loss": 2.0847, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0, |
|
"loss": 2.5726, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0, |
|
"loss": 2.2415, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0, |
|
"loss": 1.8935, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0, |
|
"loss": 2.1994, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0, |
|
"loss": 2.1794, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3082402064781276e-06, |
|
"loss": 1.5146, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9623603097171917e-06, |
|
"loss": 0.8902, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.3449960410798955e-06, |
|
"loss": 0.8521, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.6164804129562553e-06, |
|
"loss": 0.7475, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.8270600516195322e-06, |
|
"loss": 0.6407, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.99911614431896e-06, |
|
"loss": 0.5996, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.144587497923142e-06, |
|
"loss": 0.733, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.2706005161953197e-06, |
|
"loss": 0.7661, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.381751875681663e-06, |
|
"loss": 0.5892, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.4811801548585962e-06, |
|
"loss": 0.8165, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.5711239740096387e-06, |
|
"loss": 0.4039, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.6532362475580235e-06, |
|
"loss": 0.6821, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7287722169385123e-06, |
|
"loss": 0.8229, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7987076011622065e-06, |
|
"loss": 0.6447, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.8638158862213e-06, |
|
"loss": 0.4965, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.924720619434383e-06, |
|
"loss": 0.655, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9819318221826385e-06, |
|
"loss": 0.6818, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.035871978920728e-06, |
|
"loss": 0.743, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.086894990123906e-06, |
|
"loss": 0.8767, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.135300258097659e-06, |
|
"loss": 0.6235, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.18134333252491e-06, |
|
"loss": 0.6074, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.225244077248703e-06, |
|
"loss": 0.6712, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.267193020182443e-06, |
|
"loss": 0.7978, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.3073563507970875e-06, |
|
"loss": 0.4904, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.345879896760937e-06, |
|
"loss": 0.4131, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.3828923201775755e-06, |
|
"loss": 0.6295, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.418507710283431e-06, |
|
"loss": 0.9921, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.45282770440127e-06, |
|
"loss": 0.6667, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.485943236544386e-06, |
|
"loss": 0.657, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.517935989460364e-06, |
|
"loss": 0.7705, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.54887960849498e-06, |
|
"loss": 0.866, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.578840722673447e-06, |
|
"loss": 0.8428, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.607879808611406e-06, |
|
"loss": 0.7855, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.636051925421702e-06, |
|
"loss": 0.7451, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.663407343064547e-06, |
|
"loss": 0.5792, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.689992082159791e-06, |
|
"loss": 0.9155, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.715848379822425e-06, |
|
"loss": 0.7189, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.74101509336297e-06, |
|
"loss": 0.5087, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.76552805154028e-06, |
|
"loss": 0.9235, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.789420361336724e-06, |
|
"loss": 0.7996, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.812722676847563e-06, |
|
"loss": 0.5372, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.835463435763974e-06, |
|
"loss": 0.8693, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.857669068026358e-06, |
|
"loss": 0.6677, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.879364180487766e-06, |
|
"loss": 0.5588, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.900571720823068e-06, |
|
"loss": 0.6868, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.921313123421507e-06, |
|
"loss": 0.6329, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.941608439588058e-06, |
|
"loss": 0.5781, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9614764540361516e-06, |
|
"loss": 0.8552, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.980934789368156e-06, |
|
"loss": 0.9137, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9521, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.997812135355893e-06, |
|
"loss": 0.527, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.994894982497083e-06, |
|
"loss": 0.6134, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.991977829638274e-06, |
|
"loss": 0.9135, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.989060676779464e-06, |
|
"loss": 0.6525, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.986143523920654e-06, |
|
"loss": 0.9277, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.983226371061844e-06, |
|
"loss": 1.0032, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.980309218203034e-06, |
|
"loss": 0.7763, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.977392065344224e-06, |
|
"loss": 0.7304, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.974474912485414e-06, |
|
"loss": 0.7923, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.971557759626604e-06, |
|
"loss": 0.9643, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.968640606767795e-06, |
|
"loss": 0.6124, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.965723453908986e-06, |
|
"loss": 0.5817, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.962806301050176e-06, |
|
"loss": 0.7712, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.959889148191366e-06, |
|
"loss": 0.4688, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.956971995332556e-06, |
|
"loss": 0.7547, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.954054842473746e-06, |
|
"loss": 0.7743, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.951137689614936e-06, |
|
"loss": 0.71, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.948220536756126e-06, |
|
"loss": 0.806, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.945303383897317e-06, |
|
"loss": 0.5964, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.942386231038507e-06, |
|
"loss": 0.6401, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.939469078179697e-06, |
|
"loss": 0.887, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.936551925320887e-06, |
|
"loss": 0.4782, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.933634772462078e-06, |
|
"loss": 0.6871, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.930717619603268e-06, |
|
"loss": 0.7753, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9278004667444575e-06, |
|
"loss": 0.671, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9248833138856475e-06, |
|
"loss": 0.5642, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.921966161026838e-06, |
|
"loss": 0.5292, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.919049008168029e-06, |
|
"loss": 0.712, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.916131855309218e-06, |
|
"loss": 0.7544, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.913214702450409e-06, |
|
"loss": 0.7463, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.910297549591599e-06, |
|
"loss": 0.5543, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.90738039673279e-06, |
|
"loss": 0.6754, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.90446324387398e-06, |
|
"loss": 0.6768, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9015460910151695e-06, |
|
"loss": 0.6952, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.8986289381563595e-06, |
|
"loss": 0.8318, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.89571178529755e-06, |
|
"loss": 0.6242, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.89279463243874e-06, |
|
"loss": 0.9217, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.88987747957993e-06, |
|
"loss": 0.4959, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.886960326721121e-06, |
|
"loss": 0.8203, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.884043173862311e-06, |
|
"loss": 0.7889, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.881126021003501e-06, |
|
"loss": 0.6178, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.878208868144691e-06, |
|
"loss": 1.0419, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.8752917152858815e-06, |
|
"loss": 0.9276, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.8723745624270714e-06, |
|
"loss": 0.6587, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.869457409568261e-06, |
|
"loss": 0.5996, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.866540256709452e-06, |
|
"loss": 0.7183, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.863623103850642e-06, |
|
"loss": 0.5382, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.860705950991833e-06, |
|
"loss": 0.6379, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.857788798133022e-06, |
|
"loss": 0.6881, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.854871645274213e-06, |
|
"loss": 0.708, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.851954492415403e-06, |
|
"loss": 0.5051, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.8490373395565935e-06, |
|
"loss": 0.6938, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8461201866977834e-06, |
|
"loss": 0.7395, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.843203033838973e-06, |
|
"loss": 0.8443, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.840285880980164e-06, |
|
"loss": 0.6688, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.837368728121354e-06, |
|
"loss": 0.7241, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.834451575262544e-06, |
|
"loss": 0.6182, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.831534422403734e-06, |
|
"loss": 0.5827, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.828617269544925e-06, |
|
"loss": 0.5035, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.825700116686115e-06, |
|
"loss": 0.6138, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.822782963827305e-06, |
|
"loss": 0.7013, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8198658109684954e-06, |
|
"loss": 0.5948, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.816948658109685e-06, |
|
"loss": 0.6721, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.814031505250875e-06, |
|
"loss": 0.5647, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.811114352392065e-06, |
|
"loss": 0.7064, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.808197199533256e-06, |
|
"loss": 0.6298, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.805280046674446e-06, |
|
"loss": 0.526, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.802362893815637e-06, |
|
"loss": 0.5712, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.799445740956827e-06, |
|
"loss": 0.7529, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.796528588098017e-06, |
|
"loss": 1.0192, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.793611435239207e-06, |
|
"loss": 0.9215, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.790694282380397e-06, |
|
"loss": 0.8837, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.787777129521587e-06, |
|
"loss": 0.8698, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.784859976662777e-06, |
|
"loss": 0.7368, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.781942823803968e-06, |
|
"loss": 0.5266, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.779025670945158e-06, |
|
"loss": 0.6157, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.776108518086348e-06, |
|
"loss": 0.5561, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.773191365227539e-06, |
|
"loss": 0.5638, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.770274212368729e-06, |
|
"loss": 0.7433, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7673570595099186e-06, |
|
"loss": 0.4696, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7644399066511085e-06, |
|
"loss": 0.4952, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.761522753792299e-06, |
|
"loss": 0.6735, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.758605600933489e-06, |
|
"loss": 0.6756, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.755688448074679e-06, |
|
"loss": 0.8812, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.752771295215869e-06, |
|
"loss": 0.5452, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.74985414235706e-06, |
|
"loss": 0.5208, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.746936989498251e-06, |
|
"loss": 0.8629, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.744019836639441e-06, |
|
"loss": 0.6721, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.7411026837806305e-06, |
|
"loss": 0.7716, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.7381855309218205e-06, |
|
"loss": 0.3795, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.735268378063011e-06, |
|
"loss": 0.6173, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.732351225204201e-06, |
|
"loss": 0.5626, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.729434072345391e-06, |
|
"loss": 0.7762, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.726516919486582e-06, |
|
"loss": 0.658, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.723599766627772e-06, |
|
"loss": 0.6109, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.720682613768962e-06, |
|
"loss": 0.7475, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.717765460910152e-06, |
|
"loss": 0.6066, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.7148483080513425e-06, |
|
"loss": 0.4173, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.7119311551925325e-06, |
|
"loss": 0.6297, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.709014002333722e-06, |
|
"loss": 0.655, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.706096849474912e-06, |
|
"loss": 0.8621, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.703179696616103e-06, |
|
"loss": 0.5386, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.700262543757294e-06, |
|
"loss": 0.7833, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.697345390898483e-06, |
|
"loss": 0.5759, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.694428238039674e-06, |
|
"loss": 0.7122, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.691511085180864e-06, |
|
"loss": 0.6034, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.6885939323220545e-06, |
|
"loss": 0.8556, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.6856767794632445e-06, |
|
"loss": 0.6421, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.682759626604434e-06, |
|
"loss": 0.6008, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.679842473745624e-06, |
|
"loss": 0.7663, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.676925320886815e-06, |
|
"loss": 0.5928, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.674008168028005e-06, |
|
"loss": 0.7544, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.671091015169195e-06, |
|
"loss": 0.5778, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.668173862310386e-06, |
|
"loss": 0.7285, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.665256709451576e-06, |
|
"loss": 0.5125, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.662339556592766e-06, |
|
"loss": 0.6717, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.659422403733956e-06, |
|
"loss": 0.8691, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.656505250875146e-06, |
|
"loss": 0.4915, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.653588098016336e-06, |
|
"loss": 0.789, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.650670945157526e-06, |
|
"loss": 0.9127, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.647753792298717e-06, |
|
"loss": 0.6563, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.644836639439907e-06, |
|
"loss": 0.4648, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.641919486581098e-06, |
|
"loss": 0.6367, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.639002333722287e-06, |
|
"loss": 0.7212, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.636085180863478e-06, |
|
"loss": 0.6034, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.633168028004668e-06, |
|
"loss": 0.4951, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.630250875145858e-06, |
|
"loss": 0.4122, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.627333722287048e-06, |
|
"loss": 0.467, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.624416569428238e-06, |
|
"loss": 0.7467, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.621499416569429e-06, |
|
"loss": 0.6229, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.618582263710619e-06, |
|
"loss": 0.7651, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.615665110851809e-06, |
|
"loss": 0.8044, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.612747957992999e-06, |
|
"loss": 0.4718, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.60983080513419e-06, |
|
"loss": 0.5431, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.60691365227538e-06, |
|
"loss": 0.5484, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6039964994165695e-06, |
|
"loss": 0.8095, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.60107934655776e-06, |
|
"loss": 0.6142, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.59816219369895e-06, |
|
"loss": 0.6883, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.59524504084014e-06, |
|
"loss": 0.7341, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.59232788798133e-06, |
|
"loss": 0.9504, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.589410735122521e-06, |
|
"loss": 0.7445, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.587222870478414e-06, |
|
"loss": 0.7637, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.5843057176196035e-06, |
|
"loss": 1.0596, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.5813885647607935e-06, |
|
"loss": 0.5224, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.578471411901984e-06, |
|
"loss": 0.6143, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.575554259043174e-06, |
|
"loss": 0.425, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.572637106184364e-06, |
|
"loss": 0.7089, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.569719953325555e-06, |
|
"loss": 0.6148, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.566802800466745e-06, |
|
"loss": 0.5647, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.563885647607935e-06, |
|
"loss": 0.7274, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.560968494749125e-06, |
|
"loss": 0.8243, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.5580513418903155e-06, |
|
"loss": 0.5999, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.5551341890315054e-06, |
|
"loss": 0.4957, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.552217036172696e-06, |
|
"loss": 0.7591, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.549299883313886e-06, |
|
"loss": 0.4182, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.546382730455076e-06, |
|
"loss": 0.5964, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.543465577596267e-06, |
|
"loss": 0.6322, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.540548424737457e-06, |
|
"loss": 0.4782, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.537631271878647e-06, |
|
"loss": 0.5883, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.534714119019837e-06, |
|
"loss": 0.5512, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.5317969661610275e-06, |
|
"loss": 0.6806, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.5288798133022174e-06, |
|
"loss": 0.6884, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.525962660443407e-06, |
|
"loss": 0.5685, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.523045507584598e-06, |
|
"loss": 0.5336, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.520128354725788e-06, |
|
"loss": 0.508, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.517211201866978e-06, |
|
"loss": 0.4367, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.514294049008168e-06, |
|
"loss": 0.9553, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.511376896149359e-06, |
|
"loss": 0.8762, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.508459743290549e-06, |
|
"loss": 0.8939, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.505542590431739e-06, |
|
"loss": 0.5025, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.5026254375729286e-06, |
|
"loss": 0.8958, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.499708284714119e-06, |
|
"loss": 0.7813, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.49679113185531e-06, |
|
"loss": 0.6857, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.4938739789965e-06, |
|
"loss": 0.7198, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.49095682613769e-06, |
|
"loss": 0.5257, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.48803967327888e-06, |
|
"loss": 0.542, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.485122520420071e-06, |
|
"loss": 0.4848, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.482205367561261e-06, |
|
"loss": 0.7363, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.479288214702451e-06, |
|
"loss": 0.8313, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.476371061843641e-06, |
|
"loss": 0.6864, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.473453908984831e-06, |
|
"loss": 0.7911, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.470536756126021e-06, |
|
"loss": 0.4418, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.467619603267211e-06, |
|
"loss": 0.7467, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.464702450408402e-06, |
|
"loss": 0.5449, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.461785297549592e-06, |
|
"loss": 0.5699, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.458868144690782e-06, |
|
"loss": 0.5095, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.455950991831972e-06, |
|
"loss": 0.6546, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.453033838973163e-06, |
|
"loss": 0.5868, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.450116686114353e-06, |
|
"loss": 0.7554, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.4471995332555425e-06, |
|
"loss": 0.7272, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.444282380396733e-06, |
|
"loss": 0.5532, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.441365227537923e-06, |
|
"loss": 0.5618, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.438448074679114e-06, |
|
"loss": 0.5518, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.435530921820304e-06, |
|
"loss": 0.7264, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.432613768961494e-06, |
|
"loss": 0.5429, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.429696616102684e-06, |
|
"loss": 0.6046, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.426779463243875e-06, |
|
"loss": 0.9232, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.4238623103850645e-06, |
|
"loss": 0.5118, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.4209451575262545e-06, |
|
"loss": 0.643, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.418028004667445e-06, |
|
"loss": 0.7772, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.415110851808635e-06, |
|
"loss": 0.3894, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.412193698949825e-06, |
|
"loss": 0.557, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.409276546091015e-06, |
|
"loss": 0.6952, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.406359393232206e-06, |
|
"loss": 0.7862, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.403442240373396e-06, |
|
"loss": 0.5889, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.400525087514586e-06, |
|
"loss": 0.8459, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.3976079346557765e-06, |
|
"loss": 0.4839, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.3946907817969665e-06, |
|
"loss": 0.5376, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.391773628938157e-06, |
|
"loss": 0.6901, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.388856476079346e-06, |
|
"loss": 0.6058, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.385939323220537e-06, |
|
"loss": 0.7326, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.383022170361727e-06, |
|
"loss": 0.5892, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.380105017502918e-06, |
|
"loss": 0.5026, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.377187864644108e-06, |
|
"loss": 0.6181, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.374270711785298e-06, |
|
"loss": 0.6994, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.3713535589264885e-06, |
|
"loss": 0.7436, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.3684364060676785e-06, |
|
"loss": 0.8489, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.365519253208868e-06, |
|
"loss": 0.4957, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.362602100350058e-06, |
|
"loss": 0.5761, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.359684947491249e-06, |
|
"loss": 0.5296, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.356767794632439e-06, |
|
"loss": 0.4946, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.353850641773629e-06, |
|
"loss": 0.5818, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.35093348891482e-06, |
|
"loss": 0.7388, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.34801633605601e-06, |
|
"loss": 0.7138, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.3450991831972e-06, |
|
"loss": 0.5748, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.34218203033839e-06, |
|
"loss": 0.6169, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.33926487747958e-06, |
|
"loss": 0.5938, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.33634772462077e-06, |
|
"loss": 0.6766, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.333430571761961e-06, |
|
"loss": 0.6961, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.330513418903151e-06, |
|
"loss": 0.7088, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.328325554259044e-06, |
|
"loss": 0.7907, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.325408401400234e-06, |
|
"loss": 0.5619, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.322491248541424e-06, |
|
"loss": 0.9847, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.319574095682614e-06, |
|
"loss": 0.5704, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.316656942823804e-06, |
|
"loss": 0.729, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.313739789964994e-06, |
|
"loss": 0.492, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.310822637106184e-06, |
|
"loss": 0.7427, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.307905484247375e-06, |
|
"loss": 0.5757, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.304988331388565e-06, |
|
"loss": 0.5469, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.302071178529756e-06, |
|
"loss": 0.605, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.299154025670946e-06, |
|
"loss": 0.4903, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.296236872812136e-06, |
|
"loss": 0.4479, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.293319719953326e-06, |
|
"loss": 0.5118, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.290402567094516e-06, |
|
"loss": 0.5867, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.287485414235706e-06, |
|
"loss": 0.499, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.284568261376896e-06, |
|
"loss": 0.5443, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.281651108518087e-06, |
|
"loss": 0.7138, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.278733955659277e-06, |
|
"loss": 0.7214, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.275816802800467e-06, |
|
"loss": 0.4798, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.272899649941658e-06, |
|
"loss": 0.6752, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.2699824970828476e-06, |
|
"loss": 0.4885, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.2670653442240375e-06, |
|
"loss": 0.3946, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.2641481913652275e-06, |
|
"loss": 0.6857, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.261231038506418e-06, |
|
"loss": 0.5198, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.258313885647608e-06, |
|
"loss": 0.7563, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.255396732788798e-06, |
|
"loss": 0.6442, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.252479579929989e-06, |
|
"loss": 0.539, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.249562427071179e-06, |
|
"loss": 0.53, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.24664527421237e-06, |
|
"loss": 0.5773, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.2437281213535596e-06, |
|
"loss": 0.5297, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.2408109684947495e-06, |
|
"loss": 0.7455, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.2378938156359394e-06, |
|
"loss": 0.3915, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.23497666277713e-06, |
|
"loss": 0.7302, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.23205950991832e-06, |
|
"loss": 0.5329, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.22914235705951e-06, |
|
"loss": 0.7163, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.226225204200701e-06, |
|
"loss": 0.5996, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.223308051341891e-06, |
|
"loss": 0.6922, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.220390898483081e-06, |
|
"loss": 0.7819, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.217473745624271e-06, |
|
"loss": 0.4539, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.2145565927654615e-06, |
|
"loss": 0.6079, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.2116394399066514e-06, |
|
"loss": 0.4377, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.208722287047841e-06, |
|
"loss": 0.4358, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.205805134189031e-06, |
|
"loss": 0.6401, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.202887981330222e-06, |
|
"loss": 0.6677, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.199970828471413e-06, |
|
"loss": 0.572, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.197053675612602e-06, |
|
"loss": 0.5973, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.194136522753793e-06, |
|
"loss": 0.679, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.191219369894983e-06, |
|
"loss": 0.5599, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.1883022170361735e-06, |
|
"loss": 0.5171, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.185385064177363e-06, |
|
"loss": 0.569, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.182467911318553e-06, |
|
"loss": 0.9033, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.179550758459744e-06, |
|
"loss": 0.8082, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.176633605600934e-06, |
|
"loss": 0.6142, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.173716452742124e-06, |
|
"loss": 0.4488, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.170799299883314e-06, |
|
"loss": 0.5348, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.167882147024505e-06, |
|
"loss": 0.3747, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.164964994165695e-06, |
|
"loss": 0.4663, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.162047841306885e-06, |
|
"loss": 0.5914, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.1591306884480746e-06, |
|
"loss": 0.4916, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.156213535589265e-06, |
|
"loss": 0.4981, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.153296382730455e-06, |
|
"loss": 0.6886, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.150379229871645e-06, |
|
"loss": 0.7889, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.147462077012836e-06, |
|
"loss": 0.4762, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.144544924154026e-06, |
|
"loss": 0.6236, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.141627771295217e-06, |
|
"loss": 0.4979, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.138710618436406e-06, |
|
"loss": 0.6086, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.135793465577597e-06, |
|
"loss": 0.4375, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.1328763127187866e-06, |
|
"loss": 0.6866, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.129959159859977e-06, |
|
"loss": 0.6476, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.127042007001167e-06, |
|
"loss": 0.5982, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.124124854142357e-06, |
|
"loss": 0.7084, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.121207701283548e-06, |
|
"loss": 0.6747, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.118290548424738e-06, |
|
"loss": 0.5161, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.115373395565928e-06, |
|
"loss": 0.4477, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.112456242707118e-06, |
|
"loss": 0.4698, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.109539089848309e-06, |
|
"loss": 0.3782, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.1066219369894985e-06, |
|
"loss": 0.5897, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.1037047841306885e-06, |
|
"loss": 0.6186, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.100787631271879e-06, |
|
"loss": 0.6515, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.097870478413069e-06, |
|
"loss": 0.7318, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.094953325554259e-06, |
|
"loss": 0.4301, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.092036172695449e-06, |
|
"loss": 0.587, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.08911901983664e-06, |
|
"loss": 0.4525, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.08620186697783e-06, |
|
"loss": 0.7056, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.08328471411902e-06, |
|
"loss": 0.497, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.0803675612602105e-06, |
|
"loss": 0.6571, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.0774504084014005e-06, |
|
"loss": 0.5894, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.074533255542591e-06, |
|
"loss": 0.5912, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.071616102683781e-06, |
|
"loss": 0.5286, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.068698949824971e-06, |
|
"loss": 0.5107, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.065781796966161e-06, |
|
"loss": 0.4842, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.062864644107352e-06, |
|
"loss": 0.4813, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.059947491248542e-06, |
|
"loss": 0.4813, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.057030338389732e-06, |
|
"loss": 0.575, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.0541131855309225e-06, |
|
"loss": 0.5535, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.0511960326721125e-06, |
|
"loss": 0.5936, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.048278879813302e-06, |
|
"loss": 0.691, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.045361726954492e-06, |
|
"loss": 0.4385, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.042444574095683e-06, |
|
"loss": 0.5595, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.039527421236873e-06, |
|
"loss": 0.5647, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.036610268378063e-06, |
|
"loss": 0.508, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.033693115519254e-06, |
|
"loss": 0.4794, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.030775962660444e-06, |
|
"loss": 0.5662, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.0278588098016345e-06, |
|
"loss": 0.6627, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.024941656942824e-06, |
|
"loss": 0.3683, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.022024504084014e-06, |
|
"loss": 0.6034, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.019107351225204e-06, |
|
"loss": 0.3611, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.016190198366395e-06, |
|
"loss": 0.639, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.013273045507585e-06, |
|
"loss": 0.5472, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.010355892648775e-06, |
|
"loss": 0.4576, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.007438739789966e-06, |
|
"loss": 0.5035, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.004521586931156e-06, |
|
"loss": 0.8007, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.001604434072346e-06, |
|
"loss": 0.6807, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.998687281213536e-06, |
|
"loss": 0.5574, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.995770128354726e-06, |
|
"loss": 0.5141, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.992852975495916e-06, |
|
"loss": 0.4496, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.989935822637106e-06, |
|
"loss": 0.7541, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.987018669778296e-06, |
|
"loss": 0.6673, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.984101516919487e-06, |
|
"loss": 0.6613, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.981184364060677e-06, |
|
"loss": 0.7404, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.978267211201867e-06, |
|
"loss": 0.5234, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.975350058343058e-06, |
|
"loss": 0.608, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.972432905484248e-06, |
|
"loss": 0.6614, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.969515752625438e-06, |
|
"loss": 0.4633, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.9665985997666275e-06, |
|
"loss": 0.5372, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.963681446907818e-06, |
|
"loss": 0.5298, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.960764294049009e-06, |
|
"loss": 0.5283, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.957847141190199e-06, |
|
"loss": 0.3638, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.954929988331389e-06, |
|
"loss": 0.3947, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.952012835472579e-06, |
|
"loss": 0.4943, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.94909568261377e-06, |
|
"loss": 0.5024, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.9461785297549596e-06, |
|
"loss": 0.6821, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.9432613768961495e-06, |
|
"loss": 0.4511, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.9403442240373394e-06, |
|
"loss": 0.4767, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.93742707117853e-06, |
|
"loss": 0.5588, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.93450991831972e-06, |
|
"loss": 0.7316, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.93159276546091e-06, |
|
"loss": 0.3692, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.928675612602101e-06, |
|
"loss": 0.7381, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.925758459743291e-06, |
|
"loss": 0.7333, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.922841306884481e-06, |
|
"loss": 0.5729, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.919924154025671e-06, |
|
"loss": 0.5423, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.9170070011668615e-06, |
|
"loss": 0.35, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.9140898483080514e-06, |
|
"loss": 0.3955, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.911172695449242e-06, |
|
"loss": 0.713, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.908255542590432e-06, |
|
"loss": 0.5724, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.905338389731622e-06, |
|
"loss": 0.5339, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.902421236872813e-06, |
|
"loss": 0.372, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.899504084014003e-06, |
|
"loss": 0.6898, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.896586931155193e-06, |
|
"loss": 0.6381, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.893669778296383e-06, |
|
"loss": 0.4658, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.8907526254375735e-06, |
|
"loss": 0.6277, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.887835472578763e-06, |
|
"loss": 0.476, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.884918319719953e-06, |
|
"loss": 0.5812, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.882001166861144e-06, |
|
"loss": 0.3055, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.879084014002334e-06, |
|
"loss": 0.7002, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.876166861143524e-06, |
|
"loss": 0.4782, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.873249708284714e-06, |
|
"loss": 0.4645, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.870332555425905e-06, |
|
"loss": 0.506, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.867415402567095e-06, |
|
"loss": 0.5115, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.864498249708285e-06, |
|
"loss": 0.5903, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.861581096849475e-06, |
|
"loss": 0.555, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.858663943990665e-06, |
|
"loss": 0.6398, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.855746791131856e-06, |
|
"loss": 0.5431, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.852829638273046e-06, |
|
"loss": 0.7979, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.849912485414236e-06, |
|
"loss": 0.3846, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.846995332555426e-06, |
|
"loss": 0.4568, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.844078179696617e-06, |
|
"loss": 0.7126, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.841161026837807e-06, |
|
"loss": 0.6972, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.838243873978997e-06, |
|
"loss": 0.495, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.835326721120187e-06, |
|
"loss": 0.5843, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.832409568261377e-06, |
|
"loss": 0.8, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.829492415402567e-06, |
|
"loss": 0.6066, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.826575262543757e-06, |
|
"loss": 0.5371, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.823658109684948e-06, |
|
"loss": 0.4662, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.820740956826138e-06, |
|
"loss": 0.4733, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.817823803967328e-06, |
|
"loss": 0.6339, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.814906651108519e-06, |
|
"loss": 0.5074, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.8119894982497086e-06, |
|
"loss": 0.7012, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.809072345390899e-06, |
|
"loss": 0.3957, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.806155192532089e-06, |
|
"loss": 0.4838, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.8032380396732793e-06, |
|
"loss": 0.4403, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.800320886814469e-06, |
|
"loss": 0.4519, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7974037339556596e-06, |
|
"loss": 0.4998, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7944865810968495e-06, |
|
"loss": 0.3915, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.79156942823804e-06, |
|
"loss": 0.4722, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7886522753792302e-06, |
|
"loss": 0.4377, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.78573512252042e-06, |
|
"loss": 0.4053, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.782817969661611e-06, |
|
"loss": 0.6229, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7799008168028005e-06, |
|
"loss": 0.573, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7769836639439913e-06, |
|
"loss": 0.6595, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7740665110851808e-06, |
|
"loss": 0.6739, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7711493582263716e-06, |
|
"loss": 0.5746, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7682322053675615e-06, |
|
"loss": 0.5315, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.765315052508752e-06, |
|
"loss": 0.545, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.7623978996499422e-06, |
|
"loss": 0.5491, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.759480746791132e-06, |
|
"loss": 0.4616, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.7565635939323225e-06, |
|
"loss": 0.5943, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.7536464410735125e-06, |
|
"loss": 0.7848, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.750729288214703e-06, |
|
"loss": 0.617, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7478121353558928e-06, |
|
"loss": 0.469, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.744894982497083e-06, |
|
"loss": 0.3749, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7419778296382735e-06, |
|
"loss": 0.4504, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7390606767794634e-06, |
|
"loss": 0.5496, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7361435239206538e-06, |
|
"loss": 0.5946, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7332263710618437e-06, |
|
"loss": 0.5993, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.730309218203034e-06, |
|
"loss": 0.5006, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.727392065344224e-06, |
|
"loss": 0.5133, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7244749124854144e-06, |
|
"loss": 0.5787, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7215577596266043e-06, |
|
"loss": 0.3984, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.718640606767795e-06, |
|
"loss": 0.4427, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.7157234539089855e-06, |
|
"loss": 0.7057, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.7128063010501754e-06, |
|
"loss": 0.5082, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.7098891481913658e-06, |
|
"loss": 0.4868, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.7069719953325557e-06, |
|
"loss": 0.5882, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.704054842473746e-06, |
|
"loss": 0.5969, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.701137689614936e-06, |
|
"loss": 0.4813, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.6982205367561264e-06, |
|
"loss": 0.4511, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.6953033838973167e-06, |
|
"loss": 0.5281, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.6923862310385067e-06, |
|
"loss": 0.5461, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.689469078179697e-06, |
|
"loss": 0.5653, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.686551925320887e-06, |
|
"loss": 0.5701, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.6836347724620773e-06, |
|
"loss": 0.5516, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.6807176196032673e-06, |
|
"loss": 0.6763, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.6778004667444576e-06, |
|
"loss": 0.5188, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.6748833138856476e-06, |
|
"loss": 0.6355, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.671966161026838e-06, |
|
"loss": 0.3786, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.6690490081680287e-06, |
|
"loss": 0.4538, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.6661318553092182e-06, |
|
"loss": 0.4956, |
|
"step": 2056 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.663214702450409e-06, |
|
"loss": 0.4748, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.660297549591599e-06, |
|
"loss": 0.5572, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6573803967327893e-06, |
|
"loss": 0.5476, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6544632438739793e-06, |
|
"loss": 0.6124, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6515460910151696e-06, |
|
"loss": 0.6467, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6486289381563596e-06, |
|
"loss": 0.5917, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.64571178529755e-06, |
|
"loss": 0.4462, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6427946324387403e-06, |
|
"loss": 0.5173, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6398774795799302e-06, |
|
"loss": 0.5402, |
|
"step": 2092 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6369603267211206e-06, |
|
"loss": 0.3434, |
|
"step": 2096 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6340431738623105e-06, |
|
"loss": 0.3308, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.631126021003501e-06, |
|
"loss": 0.4462, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.628208868144691e-06, |
|
"loss": 0.4822, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.625291715285881e-06, |
|
"loss": 0.4612, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.6223745624270716e-06, |
|
"loss": 0.5462, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.6194574095682615e-06, |
|
"loss": 0.6326, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.6165402567094523e-06, |
|
"loss": 0.664, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.613623103850642e-06, |
|
"loss": 0.4564, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.6107059509918326e-06, |
|
"loss": 0.4078, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.607788798133022e-06, |
|
"loss": 0.3981, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.604871645274213e-06, |
|
"loss": 0.5809, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.601954492415403e-06, |
|
"loss": 0.3539, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.599037339556593e-06, |
|
"loss": 0.4753, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.5961201866977835e-06, |
|
"loss": 0.4232, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.5932030338389735e-06, |
|
"loss": 0.5864, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.590285880980164e-06, |
|
"loss": 0.5046, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.5873687281213538e-06, |
|
"loss": 0.711, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.584451575262544e-06, |
|
"loss": 0.3999, |
|
"step": 2168 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.581534422403734e-06, |
|
"loss": 0.4033, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5786172695449245e-06, |
|
"loss": 0.3923, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5757001166861144e-06, |
|
"loss": 0.4749, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5727829638273048e-06, |
|
"loss": 0.5808, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.569865810968495e-06, |
|
"loss": 0.5079, |
|
"step": 2188 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.566948658109685e-06, |
|
"loss": 0.6254, |
|
"step": 2192 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5640315052508754e-06, |
|
"loss": 0.5132, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5611143523920654e-06, |
|
"loss": 0.3979, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.558197199533256e-06, |
|
"loss": 0.5037, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5552800466744457e-06, |
|
"loss": 0.4465, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5523628938156364e-06, |
|
"loss": 0.5237, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.549445740956827e-06, |
|
"loss": 0.4848, |
|
"step": 2216 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5465285880980167e-06, |
|
"loss": 0.6196, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.543611435239207e-06, |
|
"loss": 0.5312, |
|
"step": 2224 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.540694282380397e-06, |
|
"loss": 0.7711, |
|
"step": 2228 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5377771295215874e-06, |
|
"loss": 0.3885, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5348599766627773e-06, |
|
"loss": 0.4993, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5319428238039677e-06, |
|
"loss": 0.6159, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5290256709451576e-06, |
|
"loss": 0.5108, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.526108518086348e-06, |
|
"loss": 0.4773, |
|
"step": 2248 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5231913652275384e-06, |
|
"loss": 0.4661, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5202742123687283e-06, |
|
"loss": 0.4629, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5173570595099187e-06, |
|
"loss": 0.587, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5144399066511086e-06, |
|
"loss": 0.4012, |
|
"step": 2264 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.511522753792299e-06, |
|
"loss": 0.6225, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.508605600933489e-06, |
|
"loss": 0.5934, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.5056884480746793e-06, |
|
"loss": 0.5112, |
|
"step": 2276 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.502771295215869e-06, |
|
"loss": 0.6217, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.4998541423570596e-06, |
|
"loss": 0.7376, |
|
"step": 2284 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.4969369894982504e-06, |
|
"loss": 0.4367, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.4940198366394403e-06, |
|
"loss": 0.4153, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.4911026837806307e-06, |
|
"loss": 0.563, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.4881855309218206e-06, |
|
"loss": 0.4106, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.485268378063011e-06, |
|
"loss": 0.6708, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.482351225204201e-06, |
|
"loss": 0.4741, |
|
"step": 2308 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.4794340723453913e-06, |
|
"loss": 0.6038, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.4765169194865816e-06, |
|
"loss": 0.3982, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.4735997666277716e-06, |
|
"loss": 0.7745, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.470682613768962e-06, |
|
"loss": 0.5016, |
|
"step": 2324 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.467765460910152e-06, |
|
"loss": 0.3199, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.4648483080513422e-06, |
|
"loss": 0.5832, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.461931155192532e-06, |
|
"loss": 0.4076, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.4590140023337225e-06, |
|
"loss": 0.5615, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.4560968494749125e-06, |
|
"loss": 0.3989, |
|
"step": 2344 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.453179696616103e-06, |
|
"loss": 0.4901, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.4502625437572936e-06, |
|
"loss": 0.4952, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.447345390898483e-06, |
|
"loss": 0.5235, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.444428238039674e-06, |
|
"loss": 0.3815, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4415110851808634e-06, |
|
"loss": 0.5771, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4385939323220542e-06, |
|
"loss": 0.7008, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.435676779463244e-06, |
|
"loss": 0.4909, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4327596266044345e-06, |
|
"loss": 0.4042, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4298424737456245e-06, |
|
"loss": 0.5892, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.426925320886815e-06, |
|
"loss": 0.538, |
|
"step": 2384 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.424008168028005e-06, |
|
"loss": 0.409, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.421091015169195e-06, |
|
"loss": 0.5232, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.4181738623103855e-06, |
|
"loss": 0.6311, |
|
"step": 2396 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.4152567094515754e-06, |
|
"loss": 0.4116, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.4123395565927658e-06, |
|
"loss": 0.5223, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.4094224037339557e-06, |
|
"loss": 0.6645, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.406505250875146e-06, |
|
"loss": 0.4037, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.4035880980163364e-06, |
|
"loss": 0.4992, |
|
"step": 2416 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.4006709451575264e-06, |
|
"loss": 0.3654, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.3977537922987167e-06, |
|
"loss": 0.6107, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.3948366394399067e-06, |
|
"loss": 0.5753, |
|
"step": 2428 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.3919194865810975e-06, |
|
"loss": 0.375, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.389002333722287e-06, |
|
"loss": 0.4033, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.3860851808634778e-06, |
|
"loss": 0.5717, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.3831680280046673e-06, |
|
"loss": 0.3352, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.380250875145858e-06, |
|
"loss": 0.3152, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.3773337222870484e-06, |
|
"loss": 0.6976, |
|
"step": 2452 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.3744165694282384e-06, |
|
"loss": 0.6974, |
|
"step": 2456 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.3714994165694287e-06, |
|
"loss": 0.4909, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.3685822637106187e-06, |
|
"loss": 0.4997, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.365665110851809e-06, |
|
"loss": 0.5293, |
|
"step": 2468 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.362747957992999e-06, |
|
"loss": 0.5174, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.3598308051341893e-06, |
|
"loss": 0.4137, |
|
"step": 2476 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.3569136522753793e-06, |
|
"loss": 0.3583, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.3539964994165696e-06, |
|
"loss": 0.4317, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.35107934655776e-06, |
|
"loss": 0.5334, |
|
"step": 2488 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.34816219369895e-06, |
|
"loss": 0.4626, |
|
"step": 2492 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.3452450408401403e-06, |
|
"loss": 0.3575, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.3423278879813302e-06, |
|
"loss": 0.3181, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.3394107351225206e-06, |
|
"loss": 0.4233, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.3364935822637105e-06, |
|
"loss": 0.6187, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.3335764294049013e-06, |
|
"loss": 0.4076, |
|
"step": 2512 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.3306592765460917e-06, |
|
"loss": 0.3893, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.3277421236872816e-06, |
|
"loss": 0.4937, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.324824970828472e-06, |
|
"loss": 0.29, |
|
"step": 2524 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.321907817969662e-06, |
|
"loss": 0.4274, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.3189906651108523e-06, |
|
"loss": 0.3029, |
|
"step": 2532 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.3160735122520422e-06, |
|
"loss": 0.4773, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.3131563593932326e-06, |
|
"loss": 0.5116, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.3102392065344225e-06, |
|
"loss": 0.514, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.307322053675613e-06, |
|
"loss": 0.5396, |
|
"step": 2548 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.3044049008168033e-06, |
|
"loss": 0.4585, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.301487747957993e-06, |
|
"loss": 0.6577, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.2985705950991836e-06, |
|
"loss": 0.4556, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.2956534422403735e-06, |
|
"loss": 0.3851, |
|
"step": 2564 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.292736289381564e-06, |
|
"loss": 0.366, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.289819136522754e-06, |
|
"loss": 0.53, |
|
"step": 2572 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.286901983663944e-06, |
|
"loss": 0.513, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.283984830805134e-06, |
|
"loss": 0.4743, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.2810676779463245e-06, |
|
"loss": 0.5324, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.2781505250875152e-06, |
|
"loss": 0.5894, |
|
"step": 2588 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.2752333722287048e-06, |
|
"loss": 0.4134, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.2723162193698955e-06, |
|
"loss": 0.5402, |
|
"step": 2596 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.2693990665110855e-06, |
|
"loss": 0.4749, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.266481913652276e-06, |
|
"loss": 0.3575, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.2635647607934658e-06, |
|
"loss": 0.3848, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.260647607934656e-06, |
|
"loss": 0.3512, |
|
"step": 2612 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.2577304550758465e-06, |
|
"loss": 0.4186, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.2548133022170364e-06, |
|
"loss": 0.3952, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.251896149358227e-06, |
|
"loss": 0.3489, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.2489789964994167e-06, |
|
"loss": 0.5544, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.246061843640607e-06, |
|
"loss": 0.4824, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.243144690781797e-06, |
|
"loss": 0.3462, |
|
"step": 2636 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.2402275379229874e-06, |
|
"loss": 0.4465, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.2373103850641773e-06, |
|
"loss": 0.4774, |
|
"step": 2644 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.2343932322053677e-06, |
|
"loss": 0.32, |
|
"step": 2648 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.231476079346558e-06, |
|
"loss": 0.5598, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.228558926487748e-06, |
|
"loss": 0.5406, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.225641773628939e-06, |
|
"loss": 0.3966, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.2227246207701283e-06, |
|
"loss": 0.6023, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.219807467911319e-06, |
|
"loss": 0.4532, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2168903150525086e-06, |
|
"loss": 0.3336, |
|
"step": 2672 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2139731621936994e-06, |
|
"loss": 0.4411, |
|
"step": 2676 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2110560093348893e-06, |
|
"loss": 0.5039, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2081388564760797e-06, |
|
"loss": 0.6932, |
|
"step": 2684 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.20522170361727e-06, |
|
"loss": 0.5271, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.20230455075846e-06, |
|
"loss": 0.432, |
|
"step": 2692 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.1993873978996504e-06, |
|
"loss": 0.4973, |
|
"step": 2696 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.1964702450408403e-06, |
|
"loss": 0.6146, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.1935530921820307e-06, |
|
"loss": 0.3637, |
|
"step": 2704 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.1906359393232206e-06, |
|
"loss": 0.4085, |
|
"step": 2708 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.187718786464411e-06, |
|
"loss": 0.3726, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.1848016336056013e-06, |
|
"loss": 0.5446, |
|
"step": 2716 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.1818844807467913e-06, |
|
"loss": 0.497, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.1789673278879816e-06, |
|
"loss": 0.3298, |
|
"step": 2724 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.1760501750291716e-06, |
|
"loss": 0.491, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.173133022170362e-06, |
|
"loss": 0.3796, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.170215869311552e-06, |
|
"loss": 0.6953, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.1672987164527427e-06, |
|
"loss": 0.3953, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.164381563593932e-06, |
|
"loss": 0.3992, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.161464410735123e-06, |
|
"loss": 0.4851, |
|
"step": 2748 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.1585472578763133e-06, |
|
"loss": 0.3364, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.1556301050175033e-06, |
|
"loss": 0.4477, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.1527129521586936e-06, |
|
"loss": 0.555, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.1497957992998836e-06, |
|
"loss": 0.5896, |
|
"step": 2764 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.146878646441074e-06, |
|
"loss": 0.5281, |
|
"step": 2768 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.143961493582264e-06, |
|
"loss": 0.4825, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.1410443407234542e-06, |
|
"loss": 0.3199, |
|
"step": 2776 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.138127187864644e-06, |
|
"loss": 0.3897, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.1352100350058345e-06, |
|
"loss": 0.3698, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.132292882147025e-06, |
|
"loss": 0.4514, |
|
"step": 2788 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.129375729288215e-06, |
|
"loss": 0.4257, |
|
"step": 2792 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.126458576429405e-06, |
|
"loss": 0.4245, |
|
"step": 2796 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.123541423570595e-06, |
|
"loss": 0.4945, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.1206242707117855e-06, |
|
"loss": 0.442, |
|
"step": 2804 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.1177071178529754e-06, |
|
"loss": 0.6289, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.1147899649941658e-06, |
|
"loss": 0.4142, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.1118728121353566e-06, |
|
"loss": 0.4667, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.1089556592765465e-06, |
|
"loss": 0.4788, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.106038506417737e-06, |
|
"loss": 0.3926, |
|
"step": 2824 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.103121353558927e-06, |
|
"loss": 0.553, |
|
"step": 2828 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.100204200700117e-06, |
|
"loss": 0.4567, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.097287047841307e-06, |
|
"loss": 0.3615, |
|
"step": 2836 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.0943698949824975e-06, |
|
"loss": 0.4364, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.0914527421236874e-06, |
|
"loss": 0.525, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.0885355892648778e-06, |
|
"loss": 0.4324, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.085618436406068e-06, |
|
"loss": 0.37, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.082701283547258e-06, |
|
"loss": 0.3175, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.0797841306884484e-06, |
|
"loss": 0.3043, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.0768669778296384e-06, |
|
"loss": 0.514, |
|
"step": 2864 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.0739498249708287e-06, |
|
"loss": 0.4671, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.0710326721120187e-06, |
|
"loss": 0.4151, |
|
"step": 2872 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.068115519253209e-06, |
|
"loss": 0.4077, |
|
"step": 2876 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.065198366394399e-06, |
|
"loss": 0.5045, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.0622812135355893e-06, |
|
"loss": 0.3641, |
|
"step": 2884 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.05936406067678e-06, |
|
"loss": 0.5006, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.0564469078179696e-06, |
|
"loss": 0.4298, |
|
"step": 2892 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.0535297549591604e-06, |
|
"loss": 0.514, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.0506126021003504e-06, |
|
"loss": 0.4578, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.0476954492415407e-06, |
|
"loss": 0.3638, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.0447782963827307e-06, |
|
"loss": 0.6377, |
|
"step": 2908 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.041861143523921e-06, |
|
"loss": 0.5282, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.0389439906651114e-06, |
|
"loss": 0.5388, |
|
"step": 2916 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.0360268378063013e-06, |
|
"loss": 0.5937, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.0331096849474917e-06, |
|
"loss": 0.488, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.0301925320886816e-06, |
|
"loss": 0.4885, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.027275379229872e-06, |
|
"loss": 0.6639, |
|
"step": 2932 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.024358226371062e-06, |
|
"loss": 0.4895, |
|
"step": 2936 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.0214410735122523e-06, |
|
"loss": 0.2655, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.0185239206534422e-06, |
|
"loss": 0.6175, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.0156067677946326e-06, |
|
"loss": 0.4876, |
|
"step": 2948 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.012689614935823e-06, |
|
"loss": 0.3615, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.009772462077013e-06, |
|
"loss": 0.3619, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.0068553092182033e-06, |
|
"loss": 0.4622, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.003938156359393e-06, |
|
"loss": 0.4855, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.001021003500584e-06, |
|
"loss": 0.3779, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9981038506417735e-06, |
|
"loss": 0.5237, |
|
"step": 2972 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.9951866977829643e-06, |
|
"loss": 0.541, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.992269544924154e-06, |
|
"loss": 0.4515, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.9893523920653446e-06, |
|
"loss": 0.5022, |
|
"step": 2984 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.986435239206535e-06, |
|
"loss": 0.4295, |
|
"step": 2988 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.983518086347725e-06, |
|
"loss": 0.438, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.9806009334889152e-06, |
|
"loss": 0.364, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.977683780630105e-06, |
|
"loss": 0.3795, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.9747666277712955e-06, |
|
"loss": 0.4839, |
|
"step": 3004 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.9718494749124855e-06, |
|
"loss": 0.3864, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.968932322053676e-06, |
|
"loss": 0.5014, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.966015169194866e-06, |
|
"loss": 0.4356, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.963098016336056e-06, |
|
"loss": 0.5337, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.9601808634772465e-06, |
|
"loss": 0.4385, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.9572637106184364e-06, |
|
"loss": 0.5117, |
|
"step": 3028 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.954346557759627e-06, |
|
"loss": 0.6061, |
|
"step": 3032 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.9514294049008167e-06, |
|
"loss": 0.4433, |
|
"step": 3036 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.948512252042007e-06, |
|
"loss": 0.3484, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.945595099183197e-06, |
|
"loss": 0.2477, |
|
"step": 3044 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.942677946324388e-06, |
|
"loss": 0.2433, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.939760793465578e-06, |
|
"loss": 0.34, |
|
"step": 3052 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.936843640606768e-06, |
|
"loss": 0.6435, |
|
"step": 3056 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.9339264877479585e-06, |
|
"loss": 0.3208, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.9310093348891484e-06, |
|
"loss": 0.3085, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.928092182030339e-06, |
|
"loss": 0.362, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9251750291715287e-06, |
|
"loss": 0.4297, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.922257876312719e-06, |
|
"loss": 0.4343, |
|
"step": 3076 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.919340723453909e-06, |
|
"loss": 0.5577, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9164235705950994e-06, |
|
"loss": 0.3442, |
|
"step": 3084 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9135064177362898e-06, |
|
"loss": 0.5667, |
|
"step": 3088 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9105892648774797e-06, |
|
"loss": 0.3254, |
|
"step": 3092 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.90767211201867e-06, |
|
"loss": 0.2909, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.90475495915986e-06, |
|
"loss": 0.3682, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.9018378063010504e-06, |
|
"loss": 0.3107, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8989206534422403e-06, |
|
"loss": 0.4328, |
|
"step": 3108 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8960035005834307e-06, |
|
"loss": 0.3674, |
|
"step": 3112 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8930863477246215e-06, |
|
"loss": 0.3329, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.890169194865811e-06, |
|
"loss": 0.6409, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.8872520420070018e-06, |
|
"loss": 0.5682, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.8843348891481917e-06, |
|
"loss": 0.5972, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.881417736289382e-06, |
|
"loss": 0.4621, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.878500583430572e-06, |
|
"loss": 0.4448, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.8755834305717624e-06, |
|
"loss": 0.2757, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.8726662777129523e-06, |
|
"loss": 0.5172, |
|
"step": 3144 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.8697491248541427e-06, |
|
"loss": 0.4493, |
|
"step": 3148 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.866831971995333e-06, |
|
"loss": 0.3437, |
|
"step": 3152 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.863914819136523e-06, |
|
"loss": 0.286, |
|
"step": 3156 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.8609976662777133e-06, |
|
"loss": 0.6001, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.8580805134189033e-06, |
|
"loss": 0.4373, |
|
"step": 3164 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.8551633605600936e-06, |
|
"loss": 0.4974, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.8522462077012836e-06, |
|
"loss": 0.4817, |
|
"step": 3172 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.849329054842474e-06, |
|
"loss": 0.4178, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.846411901983664e-06, |
|
"loss": 0.4527, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.8434947491248542e-06, |
|
"loss": 0.3193, |
|
"step": 3184 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8405775962660446e-06, |
|
"loss": 0.4259, |
|
"step": 3188 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8376604434072345e-06, |
|
"loss": 0.2256, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8347432905484253e-06, |
|
"loss": 0.3772, |
|
"step": 3196 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.831826137689615e-06, |
|
"loss": 0.3679, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8289089848308056e-06, |
|
"loss": 0.2954, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8259918319719955e-06, |
|
"loss": 0.4222, |
|
"step": 3208 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.823074679113186e-06, |
|
"loss": 0.4063, |
|
"step": 3212 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.8201575262543763e-06, |
|
"loss": 0.6235, |
|
"step": 3216 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.817240373395566e-06, |
|
"loss": 0.2717, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.8143232205367566e-06, |
|
"loss": 0.374, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.8114060676779465e-06, |
|
"loss": 0.3534, |
|
"step": 3228 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.808488914819137e-06, |
|
"loss": 0.3723, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.805571761960327e-06, |
|
"loss": 0.5241, |
|
"step": 3236 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.802654609101517e-06, |
|
"loss": 0.5061, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.799737456242707e-06, |
|
"loss": 0.268, |
|
"step": 3244 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7968203033838975e-06, |
|
"loss": 0.3903, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.793903150525088e-06, |
|
"loss": 0.4536, |
|
"step": 3252 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7909859976662778e-06, |
|
"loss": 0.36, |
|
"step": 3256 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.788068844807468e-06, |
|
"loss": 0.4255, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.785151691948658e-06, |
|
"loss": 0.5646, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.7822345390898484e-06, |
|
"loss": 0.3652, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.7793173862310384e-06, |
|
"loss": 0.3034, |
|
"step": 3272 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.776400233372229e-06, |
|
"loss": 0.417, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.7734830805134187e-06, |
|
"loss": 0.5308, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7705659276546095e-06, |
|
"loss": 0.5367, |
|
"step": 3284 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7676487747958e-06, |
|
"loss": 0.2237, |
|
"step": 3288 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7647316219369898e-06, |
|
"loss": 0.4497, |
|
"step": 3292 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.76181446907818e-06, |
|
"loss": 0.351, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.75889731621937e-06, |
|
"loss": 0.6103, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7559801633605604e-06, |
|
"loss": 0.3624, |
|
"step": 3304 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7530630105017504e-06, |
|
"loss": 0.4172, |
|
"step": 3308 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7501458576429407e-06, |
|
"loss": 0.5008, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.747228704784131e-06, |
|
"loss": 0.4368, |
|
"step": 3316 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.744311551925321e-06, |
|
"loss": 0.4305, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7413943990665114e-06, |
|
"loss": 0.3914, |
|
"step": 3324 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7384772462077013e-06, |
|
"loss": 0.3781, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.7355600933488917e-06, |
|
"loss": 0.4922, |
|
"step": 3332 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.7326429404900816e-06, |
|
"loss": 0.3398, |
|
"step": 3336 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.729725787631272e-06, |
|
"loss": 0.5107, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.726808634772462e-06, |
|
"loss": 0.5933, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.7238914819136523e-06, |
|
"loss": 0.4659, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.720974329054843e-06, |
|
"loss": 0.2262, |
|
"step": 3352 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.718057176196033e-06, |
|
"loss": 0.4471, |
|
"step": 3356 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.7151400233372234e-06, |
|
"loss": 0.7168, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.7122228704784133e-06, |
|
"loss": 0.1911, |
|
"step": 3364 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.7093057176196037e-06, |
|
"loss": 0.3807, |
|
"step": 3368 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.7063885647607936e-06, |
|
"loss": 0.3614, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.703471411901984e-06, |
|
"loss": 0.2861, |
|
"step": 3376 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.700554259043174e-06, |
|
"loss": 0.3193, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6976371061843643e-06, |
|
"loss": 0.4835, |
|
"step": 3384 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6947199533255546e-06, |
|
"loss": 0.4439, |
|
"step": 3388 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6918028004667446e-06, |
|
"loss": 0.2924, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.688885647607935e-06, |
|
"loss": 0.5311, |
|
"step": 3396 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.685968494749125e-06, |
|
"loss": 0.4898, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6830513418903152e-06, |
|
"loss": 0.2538, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.680134189031505e-06, |
|
"loss": 0.4381, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6772170361726955e-06, |
|
"loss": 0.4718, |
|
"step": 3412 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6742998833138863e-06, |
|
"loss": 0.3369, |
|
"step": 3416 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.671382730455076e-06, |
|
"loss": 0.3481, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.6684655775962666e-06, |
|
"loss": 0.2547, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.665548424737456e-06, |
|
"loss": 0.4183, |
|
"step": 3428 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.662631271878647e-06, |
|
"loss": 0.4181, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.659714119019837e-06, |
|
"loss": 0.5512, |
|
"step": 3436 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.6567969661610272e-06, |
|
"loss": 0.4187, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.653879813302217e-06, |
|
"loss": 0.2411, |
|
"step": 3444 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.6509626604434075e-06, |
|
"loss": 0.3652, |
|
"step": 3448 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.648045507584598e-06, |
|
"loss": 0.4122, |
|
"step": 3452 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.645128354725788e-06, |
|
"loss": 0.2771, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.642211201866978e-06, |
|
"loss": 0.3256, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.639294049008168e-06, |
|
"loss": 0.53, |
|
"step": 3464 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.6363768961493585e-06, |
|
"loss": 0.2602, |
|
"step": 3468 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.6334597432905484e-06, |
|
"loss": 0.2461, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.630542590431739e-06, |
|
"loss": 0.3867, |
|
"step": 3476 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.6276254375729287e-06, |
|
"loss": 0.4217, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.624708284714119e-06, |
|
"loss": 0.4234, |
|
"step": 3484 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.6217911318553095e-06, |
|
"loss": 0.3664, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.6188739789964994e-06, |
|
"loss": 0.5729, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.6159568261376898e-06, |
|
"loss": 0.5175, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.6130396732788797e-06, |
|
"loss": 0.4228, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.6101225204200705e-06, |
|
"loss": 0.3762, |
|
"step": 3504 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.60720536756126e-06, |
|
"loss": 0.4211, |
|
"step": 3508 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.604288214702451e-06, |
|
"loss": 0.3969, |
|
"step": 3512 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.601371061843641e-06, |
|
"loss": 0.3794, |
|
"step": 3516 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.598453908984831e-06, |
|
"loss": 0.2235, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5955367561260215e-06, |
|
"loss": 0.3842, |
|
"step": 3524 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5926196032672114e-06, |
|
"loss": 0.3081, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5897024504084018e-06, |
|
"loss": 0.267, |
|
"step": 3532 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5867852975495917e-06, |
|
"loss": 0.2976, |
|
"step": 3536 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.583868144690782e-06, |
|
"loss": 0.4383, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.580950991831972e-06, |
|
"loss": 0.3175, |
|
"step": 3544 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.5780338389731624e-06, |
|
"loss": 0.3118, |
|
"step": 3548 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.5751166861143527e-06, |
|
"loss": 0.4329, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.5721995332555427e-06, |
|
"loss": 0.4936, |
|
"step": 3556 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.569282380396733e-06, |
|
"loss": 0.4872, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.566365227537923e-06, |
|
"loss": 0.431, |
|
"step": 3564 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.5634480746791133e-06, |
|
"loss": 0.5265, |
|
"step": 3568 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.5605309218203033e-06, |
|
"loss": 0.3655, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.5576137689614936e-06, |
|
"loss": 0.342, |
|
"step": 3576 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.5546966161026836e-06, |
|
"loss": 0.4835, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.5517794632438743e-06, |
|
"loss": 0.2614, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.5488623103850647e-06, |
|
"loss": 0.3411, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.5459451575262546e-06, |
|
"loss": 0.4997, |
|
"step": 3592 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.543028004667445e-06, |
|
"loss": 0.461, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.540110851808635e-06, |
|
"loss": 0.4112, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.5371936989498253e-06, |
|
"loss": 0.4968, |
|
"step": 3604 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.5342765460910152e-06, |
|
"loss": 0.3994, |
|
"step": 3608 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.5313593932322056e-06, |
|
"loss": 0.4559, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.528442240373396e-06, |
|
"loss": 0.3394, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.525525087514586e-06, |
|
"loss": 0.3952, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.5226079346557763e-06, |
|
"loss": 0.318, |
|
"step": 3624 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.5196907817969662e-06, |
|
"loss": 0.3204, |
|
"step": 3628 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.5167736289381566e-06, |
|
"loss": 0.3047, |
|
"step": 3632 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5138564760793465e-06, |
|
"loss": 0.3223, |
|
"step": 3636 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.510939323220537e-06, |
|
"loss": 0.2073, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.508022170361727e-06, |
|
"loss": 0.3696, |
|
"step": 3644 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.505105017502917e-06, |
|
"loss": 0.4404, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.502187864644108e-06, |
|
"loss": 0.3907, |
|
"step": 3652 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4992707117852975e-06, |
|
"loss": 0.4624, |
|
"step": 3656 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.496353558926488e-06, |
|
"loss": 0.4258, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.493436406067678e-06, |
|
"loss": 0.421, |
|
"step": 3664 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4905192532088686e-06, |
|
"loss": 0.3276, |
|
"step": 3668 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4876021003500585e-06, |
|
"loss": 0.4259, |
|
"step": 3672 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.484684947491249e-06, |
|
"loss": 0.3363, |
|
"step": 3676 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.481767794632439e-06, |
|
"loss": 0.3734, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.478850641773629e-06, |
|
"loss": 0.4299, |
|
"step": 3684 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.475933488914819e-06, |
|
"loss": 0.3697, |
|
"step": 3688 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.4730163360560095e-06, |
|
"loss": 0.3721, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.4700991831971994e-06, |
|
"loss": 0.4954, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.46718203033839e-06, |
|
"loss": 0.2761, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.46426487747958e-06, |
|
"loss": 0.3485, |
|
"step": 3704 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.4613477246207705e-06, |
|
"loss": 0.3959, |
|
"step": 3708 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.4584305717619604e-06, |
|
"loss": 0.421, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.455513418903151e-06, |
|
"loss": 0.3158, |
|
"step": 3716 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.4525962660443407e-06, |
|
"loss": 0.3167, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.449679113185531e-06, |
|
"loss": 0.4065, |
|
"step": 3724 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.446761960326721e-06, |
|
"loss": 0.406, |
|
"step": 3728 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.443844807467912e-06, |
|
"loss": 0.4013, |
|
"step": 3732 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.4409276546091018e-06, |
|
"loss": 0.5664, |
|
"step": 3736 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.438010501750292e-06, |
|
"loss": 0.3301, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.435093348891482e-06, |
|
"loss": 0.3695, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.4321761960326724e-06, |
|
"loss": 0.3426, |
|
"step": 3748 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4292590431738624e-06, |
|
"loss": 0.3434, |
|
"step": 3752 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4263418903150527e-06, |
|
"loss": 0.3886, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4234247374562427e-06, |
|
"loss": 0.4117, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4205075845974334e-06, |
|
"loss": 0.4477, |
|
"step": 3764 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4175904317386234e-06, |
|
"loss": 0.4769, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4146732788798137e-06, |
|
"loss": 0.4984, |
|
"step": 3772 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.4117561260210037e-06, |
|
"loss": 0.3964, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.408838973162194e-06, |
|
"loss": 0.4827, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.405921820303384e-06, |
|
"loss": 0.3075, |
|
"step": 3784 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.4030046674445743e-06, |
|
"loss": 0.2245, |
|
"step": 3788 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.4000875145857643e-06, |
|
"loss": 0.2683, |
|
"step": 3792 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3971703617269546e-06, |
|
"loss": 0.4515, |
|
"step": 3796 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.394253208868145e-06, |
|
"loss": 0.3369, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.391336056009335e-06, |
|
"loss": 0.2854, |
|
"step": 3804 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3884189031505253e-06, |
|
"loss": 0.2712, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3855017502917157e-06, |
|
"loss": 0.3827, |
|
"step": 3812 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3825845974329056e-06, |
|
"loss": 0.2348, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.379667444574096e-06, |
|
"loss": 0.2503, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.376750291715286e-06, |
|
"loss": 0.2814, |
|
"step": 3824 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.3738331388564763e-06, |
|
"loss": 0.4045, |
|
"step": 3828 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.3709159859976666e-06, |
|
"loss": 0.5534, |
|
"step": 3832 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.3679988331388566e-06, |
|
"loss": 0.4016, |
|
"step": 3836 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.365081680280047e-06, |
|
"loss": 0.4375, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.362164527421237e-06, |
|
"loss": 0.3761, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.3592473745624272e-06, |
|
"loss": 0.3525, |
|
"step": 3848 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.3563302217036176e-06, |
|
"loss": 0.3385, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.3534130688448075e-06, |
|
"loss": 0.393, |
|
"step": 3856 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.350495915985998e-06, |
|
"loss": 0.4507, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.3475787631271883e-06, |
|
"loss": 0.2481, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.344661610268378e-06, |
|
"loss": 0.2887, |
|
"step": 3868 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.3417444574095686e-06, |
|
"loss": 0.3081, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.3388273045507585e-06, |
|
"loss": 0.3454, |
|
"step": 3876 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.335910151691949e-06, |
|
"loss": 0.4006, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.332992998833139e-06, |
|
"loss": 0.3328, |
|
"step": 3884 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.330075845974329e-06, |
|
"loss": 0.3802, |
|
"step": 3888 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.3271586931155195e-06, |
|
"loss": 0.538, |
|
"step": 3892 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.3242415402567095e-06, |
|
"loss": 0.4035, |
|
"step": 3896 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.3213243873979e-06, |
|
"loss": 0.3538, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.31840723453909e-06, |
|
"loss": 0.2945, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.31549008168028e-06, |
|
"loss": 0.3023, |
|
"step": 3908 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.3125729288214705e-06, |
|
"loss": 0.4806, |
|
"step": 3912 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.3096557759626604e-06, |
|
"loss": 0.4009, |
|
"step": 3916 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.306738623103851e-06, |
|
"loss": 0.4475, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.3038214702450407e-06, |
|
"loss": 0.2655, |
|
"step": 3924 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.300904317386231e-06, |
|
"loss": 0.3898, |
|
"step": 3928 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2979871645274215e-06, |
|
"loss": 0.2847, |
|
"step": 3932 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.295070011668612e-06, |
|
"loss": 0.5787, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2921528588098018e-06, |
|
"loss": 0.4133, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.289235705950992e-06, |
|
"loss": 0.2984, |
|
"step": 3944 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.286318553092182e-06, |
|
"loss": 0.3097, |
|
"step": 3948 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2834014002333724e-06, |
|
"loss": 0.3854, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2804842473745624e-06, |
|
"loss": 0.5068, |
|
"step": 3956 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2775670945157527e-06, |
|
"loss": 0.3831, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.274649941656943e-06, |
|
"loss": 0.2283, |
|
"step": 3964 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.2717327887981334e-06, |
|
"loss": 0.3432, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.2688156359393234e-06, |
|
"loss": 0.4152, |
|
"step": 3972 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.2658984830805137e-06, |
|
"loss": 0.2857, |
|
"step": 3976 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.2629813302217037e-06, |
|
"loss": 0.39, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.260064177362894e-06, |
|
"loss": 0.3972, |
|
"step": 3984 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.257147024504084e-06, |
|
"loss": 0.3207, |
|
"step": 3988 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.2542298716452743e-06, |
|
"loss": 0.4362, |
|
"step": 3992 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.2513127187864643e-06, |
|
"loss": 0.3839, |
|
"step": 3996 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.248395565927655e-06, |
|
"loss": 0.211, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.245478413068845e-06, |
|
"loss": 0.4071, |
|
"step": 4004 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.2425612602100354e-06, |
|
"loss": 0.2785, |
|
"step": 4008 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.2396441073512253e-06, |
|
"loss": 0.4274, |
|
"step": 4012 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.2367269544924157e-06, |
|
"loss": 0.3813, |
|
"step": 4016 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.2338098016336056e-06, |
|
"loss": 0.3138, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.230892648774796e-06, |
|
"loss": 0.3181, |
|
"step": 4024 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.227975495915986e-06, |
|
"loss": 0.4108, |
|
"step": 4028 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.2250583430571767e-06, |
|
"loss": 0.3285, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.2221411901983666e-06, |
|
"loss": 0.2244, |
|
"step": 4036 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.219224037339557e-06, |
|
"loss": 0.4148, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.216306884480747e-06, |
|
"loss": 0.418, |
|
"step": 4044 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.2133897316219373e-06, |
|
"loss": 0.4441, |
|
"step": 4048 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.2104725787631272e-06, |
|
"loss": 0.358, |
|
"step": 4052 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.2075554259043176e-06, |
|
"loss": 0.2615, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.2046382730455075e-06, |
|
"loss": 0.3992, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.201721120186698e-06, |
|
"loss": 0.4608, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1988039673278883e-06, |
|
"loss": 0.2661, |
|
"step": 4068 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1958868144690786e-06, |
|
"loss": 0.4447, |
|
"step": 4072 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1929696616102686e-06, |
|
"loss": 0.3915, |
|
"step": 4076 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.190052508751459e-06, |
|
"loss": 0.3283, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.187135355892649e-06, |
|
"loss": 0.3887, |
|
"step": 4084 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1842182030338392e-06, |
|
"loss": 0.3772, |
|
"step": 4088 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.181301050175029e-06, |
|
"loss": 0.5242, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1783838973162195e-06, |
|
"loss": 0.2624, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.17546674445741e-06, |
|
"loss": 0.4775, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1725495915986e-06, |
|
"loss": 0.4693, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.16963243873979e-06, |
|
"loss": 0.2954, |
|
"step": 4108 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.1667152858809806e-06, |
|
"loss": 0.386, |
|
"step": 4112 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.1637981330221705e-06, |
|
"loss": 0.2375, |
|
"step": 4116 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.160880980163361e-06, |
|
"loss": 0.456, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.157963827304551e-06, |
|
"loss": 0.5585, |
|
"step": 4124 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.155046674445741e-06, |
|
"loss": 0.2531, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1521295215869315e-06, |
|
"loss": 0.3648, |
|
"step": 4132 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1492123687281215e-06, |
|
"loss": 0.4723, |
|
"step": 4136 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.146295215869312e-06, |
|
"loss": 0.2357, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1433780630105018e-06, |
|
"loss": 0.4827, |
|
"step": 4144 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.140460910151692e-06, |
|
"loss": 0.3387, |
|
"step": 4148 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.137543757292882e-06, |
|
"loss": 0.3946, |
|
"step": 4152 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.1346266044340724e-06, |
|
"loss": 0.1965, |
|
"step": 4156 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.1317094515752628e-06, |
|
"loss": 0.3286, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.128792298716453e-06, |
|
"loss": 0.3196, |
|
"step": 4164 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.125875145857643e-06, |
|
"loss": 0.2477, |
|
"step": 4168 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.1229579929988334e-06, |
|
"loss": 0.3666, |
|
"step": 4172 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1200408401400234e-06, |
|
"loss": 0.5021, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.117852975495916e-06, |
|
"loss": 0.5295, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1149358226371064e-06, |
|
"loss": 0.4222, |
|
"step": 4184 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1120186697782964e-06, |
|
"loss": 0.4243, |
|
"step": 4188 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1091015169194867e-06, |
|
"loss": 0.5245, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.106184364060677e-06, |
|
"loss": 0.2765, |
|
"step": 4196 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.103267211201867e-06, |
|
"loss": 0.4902, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.1003500583430574e-06, |
|
"loss": 0.2659, |
|
"step": 4204 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0974329054842477e-06, |
|
"loss": 0.512, |
|
"step": 4208 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0945157526254377e-06, |
|
"loss": 0.369, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.091598599766628e-06, |
|
"loss": 0.4157, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.088681446907818e-06, |
|
"loss": 0.2203, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0857642940490083e-06, |
|
"loss": 0.2195, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0828471411901983e-06, |
|
"loss": 0.4235, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0799299883313886e-06, |
|
"loss": 0.2623, |
|
"step": 4232 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.077012835472579e-06, |
|
"loss": 0.4724, |
|
"step": 4236 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0740956826137694e-06, |
|
"loss": 0.5003, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0711785297549593e-06, |
|
"loss": 0.3844, |
|
"step": 4244 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.0682613768961497e-06, |
|
"loss": 0.402, |
|
"step": 4248 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.0653442240373396e-06, |
|
"loss": 0.2482, |
|
"step": 4252 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.06242707117853e-06, |
|
"loss": 0.3593, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.05950991831972e-06, |
|
"loss": 0.2561, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.0565927654609103e-06, |
|
"loss": 0.4176, |
|
"step": 4264 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.0536756126021e-06, |
|
"loss": 0.2596, |
|
"step": 4268 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.050758459743291e-06, |
|
"loss": 0.3554, |
|
"step": 4272 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.047841306884481e-06, |
|
"loss": 0.3388, |
|
"step": 4276 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0449241540256713e-06, |
|
"loss": 0.4103, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0420070011668612e-06, |
|
"loss": 0.3023, |
|
"step": 4284 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0390898483080516e-06, |
|
"loss": 0.4772, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0361726954492415e-06, |
|
"loss": 0.2974, |
|
"step": 4292 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.033255542590432e-06, |
|
"loss": 0.4114, |
|
"step": 4296 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.030338389731622e-06, |
|
"loss": 0.2369, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.027421236872812e-06, |
|
"loss": 0.3393, |
|
"step": 4304 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.0245040840140026e-06, |
|
"loss": 0.2298, |
|
"step": 4308 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.021586931155193e-06, |
|
"loss": 0.3373, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.018669778296383e-06, |
|
"loss": 0.3498, |
|
"step": 4316 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.0157526254375732e-06, |
|
"loss": 0.4742, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.012835472578763e-06, |
|
"loss": 0.3716, |
|
"step": 4324 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.0099183197199535e-06, |
|
"loss": 0.4141, |
|
"step": 4328 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.0070011668611435e-06, |
|
"loss": 0.3451, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.004084014002334e-06, |
|
"loss": 0.3455, |
|
"step": 4336 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.001166861143524e-06, |
|
"loss": 0.4263, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9982497082847146e-06, |
|
"loss": 0.3155, |
|
"step": 4344 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9953325554259045e-06, |
|
"loss": 0.254, |
|
"step": 4348 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.992415402567095e-06, |
|
"loss": 0.2274, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.989498249708285e-06, |
|
"loss": 0.2758, |
|
"step": 4356 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.986581096849475e-06, |
|
"loss": 0.2397, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.983663943990665e-06, |
|
"loss": 0.2506, |
|
"step": 4364 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9807467911318555e-06, |
|
"loss": 0.4136, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.977829638273046e-06, |
|
"loss": 0.3309, |
|
"step": 4372 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.974912485414236e-06, |
|
"loss": 0.2924, |
|
"step": 4376 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.971995332555426e-06, |
|
"loss": 0.1428, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9690781796966165e-06, |
|
"loss": 0.4771, |
|
"step": 4384 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.9661610268378064e-06, |
|
"loss": 0.4117, |
|
"step": 4388 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.9632438739789968e-06, |
|
"loss": 0.1177, |
|
"step": 4392 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.9603267211201867e-06, |
|
"loss": 0.538, |
|
"step": 4396 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.957409568261377e-06, |
|
"loss": 0.2241, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.954492415402567e-06, |
|
"loss": 0.4846, |
|
"step": 4404 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.9515752625437574e-06, |
|
"loss": 0.3649, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.9486581096849477e-06, |
|
"loss": 0.4989, |
|
"step": 4412 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.945740956826138e-06, |
|
"loss": 0.3074, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.942823803967328e-06, |
|
"loss": 0.3055, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.9399066511085184e-06, |
|
"loss": 0.3966, |
|
"step": 4424 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.9369894982497083e-06, |
|
"loss": 0.3061, |
|
"step": 4428 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.9340723453908987e-06, |
|
"loss": 0.3059, |
|
"step": 4432 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.9311551925320886e-06, |
|
"loss": 0.4346, |
|
"step": 4436 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.928238039673279e-06, |
|
"loss": 0.4984, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.9253208868144694e-06, |
|
"loss": 0.2097, |
|
"step": 4444 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.9224037339556593e-06, |
|
"loss": 0.4416, |
|
"step": 4448 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.9194865810968497e-06, |
|
"loss": 0.3917, |
|
"step": 4452 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.9165694282380396e-06, |
|
"loss": 0.2051, |
|
"step": 4456 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.91365227537923e-06, |
|
"loss": 0.385, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.9107351225204203e-06, |
|
"loss": 0.5121, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.9078179696616103e-06, |
|
"loss": 0.3747, |
|
"step": 4468 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.9049008168028008e-06, |
|
"loss": 0.2688, |
|
"step": 4472 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.901983663943991e-06, |
|
"loss": 0.2459, |
|
"step": 4476 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8990665110851811e-06, |
|
"loss": 0.2148, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8961493582263713e-06, |
|
"loss": 0.2333, |
|
"step": 4484 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8932322053675614e-06, |
|
"loss": 0.266, |
|
"step": 4488 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8903150525087516e-06, |
|
"loss": 0.3613, |
|
"step": 4492 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8873978996499417e-06, |
|
"loss": 0.1306, |
|
"step": 4496 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.884480746791132e-06, |
|
"loss": 0.2521, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.881563593932322e-06, |
|
"loss": 0.2997, |
|
"step": 4504 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8786464410735124e-06, |
|
"loss": 0.4594, |
|
"step": 4508 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8757292882147028e-06, |
|
"loss": 0.2661, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.872812135355893e-06, |
|
"loss": 0.2776, |
|
"step": 4516 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.869894982497083e-06, |
|
"loss": 0.4221, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8669778296382732e-06, |
|
"loss": 0.234, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8640606767794634e-06, |
|
"loss": 0.4304, |
|
"step": 4528 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8611435239206535e-06, |
|
"loss": 0.476, |
|
"step": 4532 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8582263710618437e-06, |
|
"loss": 0.2214, |
|
"step": 4536 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.855309218203034e-06, |
|
"loss": 0.2805, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8523920653442242e-06, |
|
"loss": 0.1151, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8494749124854143e-06, |
|
"loss": 0.2069, |
|
"step": 4548 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8465577596266047e-06, |
|
"loss": 0.4162, |
|
"step": 4552 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8436406067677949e-06, |
|
"loss": 0.3101, |
|
"step": 4556 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.840723453908985e-06, |
|
"loss": 0.272, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8378063010501752e-06, |
|
"loss": 0.4017, |
|
"step": 4564 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8348891481913653e-06, |
|
"loss": 0.3501, |
|
"step": 4568 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8319719953325557e-06, |
|
"loss": 0.2287, |
|
"step": 4572 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.8290548424737458e-06, |
|
"loss": 0.4951, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.826137689614936e-06, |
|
"loss": 0.5831, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.8232205367561261e-06, |
|
"loss": 0.3221, |
|
"step": 4584 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.8203033838973163e-06, |
|
"loss": 0.3682, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.8173862310385066e-06, |
|
"loss": 0.299, |
|
"step": 4592 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.8144690781796968e-06, |
|
"loss": 0.1729, |
|
"step": 4596 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.811551925320887e-06, |
|
"loss": 0.214, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.808634772462077e-06, |
|
"loss": 0.3696, |
|
"step": 4604 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.8057176196032674e-06, |
|
"loss": 0.4532, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.8028004667444576e-06, |
|
"loss": 0.241, |
|
"step": 4612 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7998833138856477e-06, |
|
"loss": 0.3685, |
|
"step": 4616 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.796966161026838e-06, |
|
"loss": 0.3708, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.794049008168028e-06, |
|
"loss": 0.3228, |
|
"step": 4624 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7911318553092182e-06, |
|
"loss": 0.2311, |
|
"step": 4628 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7882147024504086e-06, |
|
"loss": 0.3598, |
|
"step": 4632 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7852975495915987e-06, |
|
"loss": 0.4134, |
|
"step": 4636 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.782380396732789e-06, |
|
"loss": 0.2711, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7794632438739792e-06, |
|
"loss": 0.503, |
|
"step": 4644 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7765460910151694e-06, |
|
"loss": 0.2192, |
|
"step": 4648 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7736289381563595e-06, |
|
"loss": 0.1547, |
|
"step": 4652 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7707117852975497e-06, |
|
"loss": 0.3002, |
|
"step": 4656 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7677946324387398e-06, |
|
"loss": 0.3846, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.76487747957993e-06, |
|
"loss": 0.4236, |
|
"step": 4664 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7619603267211201e-06, |
|
"loss": 0.3245, |
|
"step": 4668 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7590431738623107e-06, |
|
"loss": 0.3547, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7561260210035008e-06, |
|
"loss": 0.2355, |
|
"step": 4676 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.753208868144691e-06, |
|
"loss": 0.417, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7502917152858811e-06, |
|
"loss": 0.4419, |
|
"step": 4684 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7473745624270713e-06, |
|
"loss": 0.4115, |
|
"step": 4688 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7444574095682615e-06, |
|
"loss": 0.2582, |
|
"step": 4692 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.7415402567094516e-06, |
|
"loss": 0.3695, |
|
"step": 4696 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.7386231038506418e-06, |
|
"loss": 0.3948, |
|
"step": 4700 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 7056, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 53908118568960.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|