kullm-solar-S / trainer_state.json
heavytail's picture
Upload folder using huggingface_hub
8cceaf8 verified
raw
history blame
142 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.998299319727891,
"eval_steps": 800,
"global_step": 4700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0,
"loss": 2.0847,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 0,
"loss": 2.5726,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 2.2415,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 1.8935,
"step": 16
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 2.1994,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 2.1794,
"step": 24
},
{
"epoch": 0.01,
"learning_rate": 1.3082402064781276e-06,
"loss": 1.5146,
"step": 28
},
{
"epoch": 0.01,
"learning_rate": 1.9623603097171917e-06,
"loss": 0.8902,
"step": 32
},
{
"epoch": 0.02,
"learning_rate": 2.3449960410798955e-06,
"loss": 0.8521,
"step": 36
},
{
"epoch": 0.02,
"learning_rate": 2.6164804129562553e-06,
"loss": 0.7475,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 2.8270600516195322e-06,
"loss": 0.6407,
"step": 44
},
{
"epoch": 0.02,
"learning_rate": 2.99911614431896e-06,
"loss": 0.5996,
"step": 48
},
{
"epoch": 0.02,
"learning_rate": 3.144587497923142e-06,
"loss": 0.733,
"step": 52
},
{
"epoch": 0.02,
"learning_rate": 3.2706005161953197e-06,
"loss": 0.7661,
"step": 56
},
{
"epoch": 0.03,
"learning_rate": 3.381751875681663e-06,
"loss": 0.5892,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 3.4811801548585962e-06,
"loss": 0.8165,
"step": 64
},
{
"epoch": 0.03,
"learning_rate": 3.5711239740096387e-06,
"loss": 0.4039,
"step": 68
},
{
"epoch": 0.03,
"learning_rate": 3.6532362475580235e-06,
"loss": 0.6821,
"step": 72
},
{
"epoch": 0.03,
"learning_rate": 3.7287722169385123e-06,
"loss": 0.8229,
"step": 76
},
{
"epoch": 0.03,
"learning_rate": 3.7987076011622065e-06,
"loss": 0.6447,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 3.8638158862213e-06,
"loss": 0.4965,
"step": 84
},
{
"epoch": 0.04,
"learning_rate": 3.924720619434383e-06,
"loss": 0.655,
"step": 88
},
{
"epoch": 0.04,
"learning_rate": 3.9819318221826385e-06,
"loss": 0.6818,
"step": 92
},
{
"epoch": 0.04,
"learning_rate": 4.035871978920728e-06,
"loss": 0.743,
"step": 96
},
{
"epoch": 0.04,
"learning_rate": 4.086894990123906e-06,
"loss": 0.8767,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 4.135300258097659e-06,
"loss": 0.6235,
"step": 104
},
{
"epoch": 0.05,
"learning_rate": 4.18134333252491e-06,
"loss": 0.6074,
"step": 108
},
{
"epoch": 0.05,
"learning_rate": 4.225244077248703e-06,
"loss": 0.6712,
"step": 112
},
{
"epoch": 0.05,
"learning_rate": 4.267193020182443e-06,
"loss": 0.7978,
"step": 116
},
{
"epoch": 0.05,
"learning_rate": 4.3073563507970875e-06,
"loss": 0.4904,
"step": 120
},
{
"epoch": 0.05,
"learning_rate": 4.345879896760937e-06,
"loss": 0.4131,
"step": 124
},
{
"epoch": 0.05,
"learning_rate": 4.3828923201775755e-06,
"loss": 0.6295,
"step": 128
},
{
"epoch": 0.06,
"learning_rate": 4.418507710283431e-06,
"loss": 0.9921,
"step": 132
},
{
"epoch": 0.06,
"learning_rate": 4.45282770440127e-06,
"loss": 0.6667,
"step": 136
},
{
"epoch": 0.06,
"learning_rate": 4.485943236544386e-06,
"loss": 0.657,
"step": 140
},
{
"epoch": 0.06,
"learning_rate": 4.517935989460364e-06,
"loss": 0.7705,
"step": 144
},
{
"epoch": 0.06,
"learning_rate": 4.54887960849498e-06,
"loss": 0.866,
"step": 148
},
{
"epoch": 0.06,
"learning_rate": 4.578840722673447e-06,
"loss": 0.8428,
"step": 152
},
{
"epoch": 0.07,
"learning_rate": 4.607879808611406e-06,
"loss": 0.7855,
"step": 156
},
{
"epoch": 0.07,
"learning_rate": 4.636051925421702e-06,
"loss": 0.7451,
"step": 160
},
{
"epoch": 0.07,
"learning_rate": 4.663407343064547e-06,
"loss": 0.5792,
"step": 164
},
{
"epoch": 0.07,
"learning_rate": 4.689992082159791e-06,
"loss": 0.9155,
"step": 168
},
{
"epoch": 0.07,
"learning_rate": 4.715848379822425e-06,
"loss": 0.7189,
"step": 172
},
{
"epoch": 0.07,
"learning_rate": 4.74101509336297e-06,
"loss": 0.5087,
"step": 176
},
{
"epoch": 0.08,
"learning_rate": 4.76552805154028e-06,
"loss": 0.9235,
"step": 180
},
{
"epoch": 0.08,
"learning_rate": 4.789420361336724e-06,
"loss": 0.7996,
"step": 184
},
{
"epoch": 0.08,
"learning_rate": 4.812722676847563e-06,
"loss": 0.5372,
"step": 188
},
{
"epoch": 0.08,
"learning_rate": 4.835463435763974e-06,
"loss": 0.8693,
"step": 192
},
{
"epoch": 0.08,
"learning_rate": 4.857669068026358e-06,
"loss": 0.6677,
"step": 196
},
{
"epoch": 0.09,
"learning_rate": 4.879364180487766e-06,
"loss": 0.5588,
"step": 200
},
{
"epoch": 0.09,
"learning_rate": 4.900571720823068e-06,
"loss": 0.6868,
"step": 204
},
{
"epoch": 0.09,
"learning_rate": 4.921313123421507e-06,
"loss": 0.6329,
"step": 208
},
{
"epoch": 0.09,
"learning_rate": 4.941608439588058e-06,
"loss": 0.5781,
"step": 212
},
{
"epoch": 0.09,
"learning_rate": 4.9614764540361516e-06,
"loss": 0.8552,
"step": 216
},
{
"epoch": 0.09,
"learning_rate": 4.980934789368156e-06,
"loss": 0.9137,
"step": 220
},
{
"epoch": 0.1,
"learning_rate": 5e-06,
"loss": 0.9521,
"step": 224
},
{
"epoch": 0.1,
"learning_rate": 4.997812135355893e-06,
"loss": 0.527,
"step": 228
},
{
"epoch": 0.1,
"learning_rate": 4.994894982497083e-06,
"loss": 0.6134,
"step": 232
},
{
"epoch": 0.1,
"learning_rate": 4.991977829638274e-06,
"loss": 0.9135,
"step": 236
},
{
"epoch": 0.1,
"learning_rate": 4.989060676779464e-06,
"loss": 0.6525,
"step": 240
},
{
"epoch": 0.1,
"learning_rate": 4.986143523920654e-06,
"loss": 0.9277,
"step": 244
},
{
"epoch": 0.11,
"learning_rate": 4.983226371061844e-06,
"loss": 1.0032,
"step": 248
},
{
"epoch": 0.11,
"learning_rate": 4.980309218203034e-06,
"loss": 0.7763,
"step": 252
},
{
"epoch": 0.11,
"learning_rate": 4.977392065344224e-06,
"loss": 0.7304,
"step": 256
},
{
"epoch": 0.11,
"learning_rate": 4.974474912485414e-06,
"loss": 0.7923,
"step": 260
},
{
"epoch": 0.11,
"learning_rate": 4.971557759626604e-06,
"loss": 0.9643,
"step": 264
},
{
"epoch": 0.11,
"learning_rate": 4.968640606767795e-06,
"loss": 0.6124,
"step": 268
},
{
"epoch": 0.12,
"learning_rate": 4.965723453908986e-06,
"loss": 0.5817,
"step": 272
},
{
"epoch": 0.12,
"learning_rate": 4.962806301050176e-06,
"loss": 0.7712,
"step": 276
},
{
"epoch": 0.12,
"learning_rate": 4.959889148191366e-06,
"loss": 0.4688,
"step": 280
},
{
"epoch": 0.12,
"learning_rate": 4.956971995332556e-06,
"loss": 0.7547,
"step": 284
},
{
"epoch": 0.12,
"learning_rate": 4.954054842473746e-06,
"loss": 0.7743,
"step": 288
},
{
"epoch": 0.12,
"learning_rate": 4.951137689614936e-06,
"loss": 0.71,
"step": 292
},
{
"epoch": 0.13,
"learning_rate": 4.948220536756126e-06,
"loss": 0.806,
"step": 296
},
{
"epoch": 0.13,
"learning_rate": 4.945303383897317e-06,
"loss": 0.5964,
"step": 300
},
{
"epoch": 0.13,
"learning_rate": 4.942386231038507e-06,
"loss": 0.6401,
"step": 304
},
{
"epoch": 0.13,
"learning_rate": 4.939469078179697e-06,
"loss": 0.887,
"step": 308
},
{
"epoch": 0.13,
"learning_rate": 4.936551925320887e-06,
"loss": 0.4782,
"step": 312
},
{
"epoch": 0.13,
"learning_rate": 4.933634772462078e-06,
"loss": 0.6871,
"step": 316
},
{
"epoch": 0.14,
"learning_rate": 4.930717619603268e-06,
"loss": 0.7753,
"step": 320
},
{
"epoch": 0.14,
"learning_rate": 4.9278004667444575e-06,
"loss": 0.671,
"step": 324
},
{
"epoch": 0.14,
"learning_rate": 4.9248833138856475e-06,
"loss": 0.5642,
"step": 328
},
{
"epoch": 0.14,
"learning_rate": 4.921966161026838e-06,
"loss": 0.5292,
"step": 332
},
{
"epoch": 0.14,
"learning_rate": 4.919049008168029e-06,
"loss": 0.712,
"step": 336
},
{
"epoch": 0.14,
"learning_rate": 4.916131855309218e-06,
"loss": 0.7544,
"step": 340
},
{
"epoch": 0.15,
"learning_rate": 4.913214702450409e-06,
"loss": 0.7463,
"step": 344
},
{
"epoch": 0.15,
"learning_rate": 4.910297549591599e-06,
"loss": 0.5543,
"step": 348
},
{
"epoch": 0.15,
"learning_rate": 4.90738039673279e-06,
"loss": 0.6754,
"step": 352
},
{
"epoch": 0.15,
"learning_rate": 4.90446324387398e-06,
"loss": 0.6768,
"step": 356
},
{
"epoch": 0.15,
"learning_rate": 4.9015460910151695e-06,
"loss": 0.6952,
"step": 360
},
{
"epoch": 0.15,
"learning_rate": 4.8986289381563595e-06,
"loss": 0.8318,
"step": 364
},
{
"epoch": 0.16,
"learning_rate": 4.89571178529755e-06,
"loss": 0.6242,
"step": 368
},
{
"epoch": 0.16,
"learning_rate": 4.89279463243874e-06,
"loss": 0.9217,
"step": 372
},
{
"epoch": 0.16,
"learning_rate": 4.88987747957993e-06,
"loss": 0.4959,
"step": 376
},
{
"epoch": 0.16,
"learning_rate": 4.886960326721121e-06,
"loss": 0.8203,
"step": 380
},
{
"epoch": 0.16,
"learning_rate": 4.884043173862311e-06,
"loss": 0.7889,
"step": 384
},
{
"epoch": 0.16,
"learning_rate": 4.881126021003501e-06,
"loss": 0.6178,
"step": 388
},
{
"epoch": 0.17,
"learning_rate": 4.878208868144691e-06,
"loss": 1.0419,
"step": 392
},
{
"epoch": 0.17,
"learning_rate": 4.8752917152858815e-06,
"loss": 0.9276,
"step": 396
},
{
"epoch": 0.17,
"learning_rate": 4.8723745624270714e-06,
"loss": 0.6587,
"step": 400
},
{
"epoch": 0.17,
"learning_rate": 4.869457409568261e-06,
"loss": 0.5996,
"step": 404
},
{
"epoch": 0.17,
"learning_rate": 4.866540256709452e-06,
"loss": 0.7183,
"step": 408
},
{
"epoch": 0.18,
"learning_rate": 4.863623103850642e-06,
"loss": 0.5382,
"step": 412
},
{
"epoch": 0.18,
"learning_rate": 4.860705950991833e-06,
"loss": 0.6379,
"step": 416
},
{
"epoch": 0.18,
"learning_rate": 4.857788798133022e-06,
"loss": 0.6881,
"step": 420
},
{
"epoch": 0.18,
"learning_rate": 4.854871645274213e-06,
"loss": 0.708,
"step": 424
},
{
"epoch": 0.18,
"learning_rate": 4.851954492415403e-06,
"loss": 0.5051,
"step": 428
},
{
"epoch": 0.18,
"learning_rate": 4.8490373395565935e-06,
"loss": 0.6938,
"step": 432
},
{
"epoch": 0.19,
"learning_rate": 4.8461201866977834e-06,
"loss": 0.7395,
"step": 436
},
{
"epoch": 0.19,
"learning_rate": 4.843203033838973e-06,
"loss": 0.8443,
"step": 440
},
{
"epoch": 0.19,
"learning_rate": 4.840285880980164e-06,
"loss": 0.6688,
"step": 444
},
{
"epoch": 0.19,
"learning_rate": 4.837368728121354e-06,
"loss": 0.7241,
"step": 448
},
{
"epoch": 0.19,
"learning_rate": 4.834451575262544e-06,
"loss": 0.6182,
"step": 452
},
{
"epoch": 0.19,
"learning_rate": 4.831534422403734e-06,
"loss": 0.5827,
"step": 456
},
{
"epoch": 0.2,
"learning_rate": 4.828617269544925e-06,
"loss": 0.5035,
"step": 460
},
{
"epoch": 0.2,
"learning_rate": 4.825700116686115e-06,
"loss": 0.6138,
"step": 464
},
{
"epoch": 0.2,
"learning_rate": 4.822782963827305e-06,
"loss": 0.7013,
"step": 468
},
{
"epoch": 0.2,
"learning_rate": 4.8198658109684954e-06,
"loss": 0.5948,
"step": 472
},
{
"epoch": 0.2,
"learning_rate": 4.816948658109685e-06,
"loss": 0.6721,
"step": 476
},
{
"epoch": 0.2,
"learning_rate": 4.814031505250875e-06,
"loss": 0.5647,
"step": 480
},
{
"epoch": 0.21,
"learning_rate": 4.811114352392065e-06,
"loss": 0.7064,
"step": 484
},
{
"epoch": 0.21,
"learning_rate": 4.808197199533256e-06,
"loss": 0.6298,
"step": 488
},
{
"epoch": 0.21,
"learning_rate": 4.805280046674446e-06,
"loss": 0.526,
"step": 492
},
{
"epoch": 0.21,
"learning_rate": 4.802362893815637e-06,
"loss": 0.5712,
"step": 496
},
{
"epoch": 0.21,
"learning_rate": 4.799445740956827e-06,
"loss": 0.7529,
"step": 500
},
{
"epoch": 0.21,
"learning_rate": 4.796528588098017e-06,
"loss": 1.0192,
"step": 504
},
{
"epoch": 0.22,
"learning_rate": 4.793611435239207e-06,
"loss": 0.9215,
"step": 508
},
{
"epoch": 0.22,
"learning_rate": 4.790694282380397e-06,
"loss": 0.8837,
"step": 512
},
{
"epoch": 0.22,
"learning_rate": 4.787777129521587e-06,
"loss": 0.8698,
"step": 516
},
{
"epoch": 0.22,
"learning_rate": 4.784859976662777e-06,
"loss": 0.7368,
"step": 520
},
{
"epoch": 0.22,
"learning_rate": 4.781942823803968e-06,
"loss": 0.5266,
"step": 524
},
{
"epoch": 0.22,
"learning_rate": 4.779025670945158e-06,
"loss": 0.6157,
"step": 528
},
{
"epoch": 0.23,
"learning_rate": 4.776108518086348e-06,
"loss": 0.5561,
"step": 532
},
{
"epoch": 0.23,
"learning_rate": 4.773191365227539e-06,
"loss": 0.5638,
"step": 536
},
{
"epoch": 0.23,
"learning_rate": 4.770274212368729e-06,
"loss": 0.7433,
"step": 540
},
{
"epoch": 0.23,
"learning_rate": 4.7673570595099186e-06,
"loss": 0.4696,
"step": 544
},
{
"epoch": 0.23,
"learning_rate": 4.7644399066511085e-06,
"loss": 0.4952,
"step": 548
},
{
"epoch": 0.23,
"learning_rate": 4.761522753792299e-06,
"loss": 0.6735,
"step": 552
},
{
"epoch": 0.24,
"learning_rate": 4.758605600933489e-06,
"loss": 0.6756,
"step": 556
},
{
"epoch": 0.24,
"learning_rate": 4.755688448074679e-06,
"loss": 0.8812,
"step": 560
},
{
"epoch": 0.24,
"learning_rate": 4.752771295215869e-06,
"loss": 0.5452,
"step": 564
},
{
"epoch": 0.24,
"learning_rate": 4.74985414235706e-06,
"loss": 0.5208,
"step": 568
},
{
"epoch": 0.24,
"learning_rate": 4.746936989498251e-06,
"loss": 0.8629,
"step": 572
},
{
"epoch": 0.24,
"learning_rate": 4.744019836639441e-06,
"loss": 0.6721,
"step": 576
},
{
"epoch": 0.25,
"learning_rate": 4.7411026837806305e-06,
"loss": 0.7716,
"step": 580
},
{
"epoch": 0.25,
"learning_rate": 4.7381855309218205e-06,
"loss": 0.3795,
"step": 584
},
{
"epoch": 0.25,
"learning_rate": 4.735268378063011e-06,
"loss": 0.6173,
"step": 588
},
{
"epoch": 0.25,
"learning_rate": 4.732351225204201e-06,
"loss": 0.5626,
"step": 592
},
{
"epoch": 0.25,
"learning_rate": 4.729434072345391e-06,
"loss": 0.7762,
"step": 596
},
{
"epoch": 0.26,
"learning_rate": 4.726516919486582e-06,
"loss": 0.658,
"step": 600
},
{
"epoch": 0.26,
"learning_rate": 4.723599766627772e-06,
"loss": 0.6109,
"step": 604
},
{
"epoch": 0.26,
"learning_rate": 4.720682613768962e-06,
"loss": 0.7475,
"step": 608
},
{
"epoch": 0.26,
"learning_rate": 4.717765460910152e-06,
"loss": 0.6066,
"step": 612
},
{
"epoch": 0.26,
"learning_rate": 4.7148483080513425e-06,
"loss": 0.4173,
"step": 616
},
{
"epoch": 0.26,
"learning_rate": 4.7119311551925325e-06,
"loss": 0.6297,
"step": 620
},
{
"epoch": 0.27,
"learning_rate": 4.709014002333722e-06,
"loss": 0.655,
"step": 624
},
{
"epoch": 0.27,
"learning_rate": 4.706096849474912e-06,
"loss": 0.8621,
"step": 628
},
{
"epoch": 0.27,
"learning_rate": 4.703179696616103e-06,
"loss": 0.5386,
"step": 632
},
{
"epoch": 0.27,
"learning_rate": 4.700262543757294e-06,
"loss": 0.7833,
"step": 636
},
{
"epoch": 0.27,
"learning_rate": 4.697345390898483e-06,
"loss": 0.5759,
"step": 640
},
{
"epoch": 0.27,
"learning_rate": 4.694428238039674e-06,
"loss": 0.7122,
"step": 644
},
{
"epoch": 0.28,
"learning_rate": 4.691511085180864e-06,
"loss": 0.6034,
"step": 648
},
{
"epoch": 0.28,
"learning_rate": 4.6885939323220545e-06,
"loss": 0.8556,
"step": 652
},
{
"epoch": 0.28,
"learning_rate": 4.6856767794632445e-06,
"loss": 0.6421,
"step": 656
},
{
"epoch": 0.28,
"learning_rate": 4.682759626604434e-06,
"loss": 0.6008,
"step": 660
},
{
"epoch": 0.28,
"learning_rate": 4.679842473745624e-06,
"loss": 0.7663,
"step": 664
},
{
"epoch": 0.28,
"learning_rate": 4.676925320886815e-06,
"loss": 0.5928,
"step": 668
},
{
"epoch": 0.29,
"learning_rate": 4.674008168028005e-06,
"loss": 0.7544,
"step": 672
},
{
"epoch": 0.29,
"learning_rate": 4.671091015169195e-06,
"loss": 0.5778,
"step": 676
},
{
"epoch": 0.29,
"learning_rate": 4.668173862310386e-06,
"loss": 0.7285,
"step": 680
},
{
"epoch": 0.29,
"learning_rate": 4.665256709451576e-06,
"loss": 0.5125,
"step": 684
},
{
"epoch": 0.29,
"learning_rate": 4.662339556592766e-06,
"loss": 0.6717,
"step": 688
},
{
"epoch": 0.29,
"learning_rate": 4.659422403733956e-06,
"loss": 0.8691,
"step": 692
},
{
"epoch": 0.3,
"learning_rate": 4.656505250875146e-06,
"loss": 0.4915,
"step": 696
},
{
"epoch": 0.3,
"learning_rate": 4.653588098016336e-06,
"loss": 0.789,
"step": 700
},
{
"epoch": 0.3,
"learning_rate": 4.650670945157526e-06,
"loss": 0.9127,
"step": 704
},
{
"epoch": 0.3,
"learning_rate": 4.647753792298717e-06,
"loss": 0.6563,
"step": 708
},
{
"epoch": 0.3,
"learning_rate": 4.644836639439907e-06,
"loss": 0.4648,
"step": 712
},
{
"epoch": 0.3,
"learning_rate": 4.641919486581098e-06,
"loss": 0.6367,
"step": 716
},
{
"epoch": 0.31,
"learning_rate": 4.639002333722287e-06,
"loss": 0.7212,
"step": 720
},
{
"epoch": 0.31,
"learning_rate": 4.636085180863478e-06,
"loss": 0.6034,
"step": 724
},
{
"epoch": 0.31,
"learning_rate": 4.633168028004668e-06,
"loss": 0.4951,
"step": 728
},
{
"epoch": 0.31,
"learning_rate": 4.630250875145858e-06,
"loss": 0.4122,
"step": 732
},
{
"epoch": 0.31,
"learning_rate": 4.627333722287048e-06,
"loss": 0.467,
"step": 736
},
{
"epoch": 0.31,
"learning_rate": 4.624416569428238e-06,
"loss": 0.7467,
"step": 740
},
{
"epoch": 0.32,
"learning_rate": 4.621499416569429e-06,
"loss": 0.6229,
"step": 744
},
{
"epoch": 0.32,
"learning_rate": 4.618582263710619e-06,
"loss": 0.7651,
"step": 748
},
{
"epoch": 0.32,
"learning_rate": 4.615665110851809e-06,
"loss": 0.8044,
"step": 752
},
{
"epoch": 0.32,
"learning_rate": 4.612747957992999e-06,
"loss": 0.4718,
"step": 756
},
{
"epoch": 0.32,
"learning_rate": 4.60983080513419e-06,
"loss": 0.5431,
"step": 760
},
{
"epoch": 0.32,
"learning_rate": 4.60691365227538e-06,
"loss": 0.5484,
"step": 764
},
{
"epoch": 0.33,
"learning_rate": 4.6039964994165695e-06,
"loss": 0.8095,
"step": 768
},
{
"epoch": 0.33,
"learning_rate": 4.60107934655776e-06,
"loss": 0.6142,
"step": 772
},
{
"epoch": 0.33,
"learning_rate": 4.59816219369895e-06,
"loss": 0.6883,
"step": 776
},
{
"epoch": 0.33,
"learning_rate": 4.59524504084014e-06,
"loss": 0.7341,
"step": 780
},
{
"epoch": 0.33,
"learning_rate": 4.59232788798133e-06,
"loss": 0.9504,
"step": 784
},
{
"epoch": 0.34,
"learning_rate": 4.589410735122521e-06,
"loss": 0.7445,
"step": 788
},
{
"epoch": 0.34,
"learning_rate": 4.587222870478414e-06,
"loss": 0.7637,
"step": 792
},
{
"epoch": 0.34,
"learning_rate": 4.5843057176196035e-06,
"loss": 1.0596,
"step": 796
},
{
"epoch": 0.34,
"learning_rate": 4.5813885647607935e-06,
"loss": 0.5224,
"step": 800
},
{
"epoch": 0.34,
"learning_rate": 4.578471411901984e-06,
"loss": 0.6143,
"step": 804
},
{
"epoch": 0.34,
"learning_rate": 4.575554259043174e-06,
"loss": 0.425,
"step": 808
},
{
"epoch": 0.35,
"learning_rate": 4.572637106184364e-06,
"loss": 0.7089,
"step": 812
},
{
"epoch": 0.35,
"learning_rate": 4.569719953325555e-06,
"loss": 0.6148,
"step": 816
},
{
"epoch": 0.35,
"learning_rate": 4.566802800466745e-06,
"loss": 0.5647,
"step": 820
},
{
"epoch": 0.35,
"learning_rate": 4.563885647607935e-06,
"loss": 0.7274,
"step": 824
},
{
"epoch": 0.35,
"learning_rate": 4.560968494749125e-06,
"loss": 0.8243,
"step": 828
},
{
"epoch": 0.35,
"learning_rate": 4.5580513418903155e-06,
"loss": 0.5999,
"step": 832
},
{
"epoch": 0.36,
"learning_rate": 4.5551341890315054e-06,
"loss": 0.4957,
"step": 836
},
{
"epoch": 0.36,
"learning_rate": 4.552217036172696e-06,
"loss": 0.7591,
"step": 840
},
{
"epoch": 0.36,
"learning_rate": 4.549299883313886e-06,
"loss": 0.4182,
"step": 844
},
{
"epoch": 0.36,
"learning_rate": 4.546382730455076e-06,
"loss": 0.5964,
"step": 848
},
{
"epoch": 0.36,
"learning_rate": 4.543465577596267e-06,
"loss": 0.6322,
"step": 852
},
{
"epoch": 0.36,
"learning_rate": 4.540548424737457e-06,
"loss": 0.4782,
"step": 856
},
{
"epoch": 0.37,
"learning_rate": 4.537631271878647e-06,
"loss": 0.5883,
"step": 860
},
{
"epoch": 0.37,
"learning_rate": 4.534714119019837e-06,
"loss": 0.5512,
"step": 864
},
{
"epoch": 0.37,
"learning_rate": 4.5317969661610275e-06,
"loss": 0.6806,
"step": 868
},
{
"epoch": 0.37,
"learning_rate": 4.5288798133022174e-06,
"loss": 0.6884,
"step": 872
},
{
"epoch": 0.37,
"learning_rate": 4.525962660443407e-06,
"loss": 0.5685,
"step": 876
},
{
"epoch": 0.37,
"learning_rate": 4.523045507584598e-06,
"loss": 0.5336,
"step": 880
},
{
"epoch": 0.38,
"learning_rate": 4.520128354725788e-06,
"loss": 0.508,
"step": 884
},
{
"epoch": 0.38,
"learning_rate": 4.517211201866978e-06,
"loss": 0.4367,
"step": 888
},
{
"epoch": 0.38,
"learning_rate": 4.514294049008168e-06,
"loss": 0.9553,
"step": 892
},
{
"epoch": 0.38,
"learning_rate": 4.511376896149359e-06,
"loss": 0.8762,
"step": 896
},
{
"epoch": 0.38,
"learning_rate": 4.508459743290549e-06,
"loss": 0.8939,
"step": 900
},
{
"epoch": 0.38,
"learning_rate": 4.505542590431739e-06,
"loss": 0.5025,
"step": 904
},
{
"epoch": 0.39,
"learning_rate": 4.5026254375729286e-06,
"loss": 0.8958,
"step": 908
},
{
"epoch": 0.39,
"learning_rate": 4.499708284714119e-06,
"loss": 0.7813,
"step": 912
},
{
"epoch": 0.39,
"learning_rate": 4.49679113185531e-06,
"loss": 0.6857,
"step": 916
},
{
"epoch": 0.39,
"learning_rate": 4.4938739789965e-06,
"loss": 0.7198,
"step": 920
},
{
"epoch": 0.39,
"learning_rate": 4.49095682613769e-06,
"loss": 0.5257,
"step": 924
},
{
"epoch": 0.39,
"learning_rate": 4.48803967327888e-06,
"loss": 0.542,
"step": 928
},
{
"epoch": 0.4,
"learning_rate": 4.485122520420071e-06,
"loss": 0.4848,
"step": 932
},
{
"epoch": 0.4,
"learning_rate": 4.482205367561261e-06,
"loss": 0.7363,
"step": 936
},
{
"epoch": 0.4,
"learning_rate": 4.479288214702451e-06,
"loss": 0.8313,
"step": 940
},
{
"epoch": 0.4,
"learning_rate": 4.476371061843641e-06,
"loss": 0.6864,
"step": 944
},
{
"epoch": 0.4,
"learning_rate": 4.473453908984831e-06,
"loss": 0.7911,
"step": 948
},
{
"epoch": 0.4,
"learning_rate": 4.470536756126021e-06,
"loss": 0.4418,
"step": 952
},
{
"epoch": 0.41,
"learning_rate": 4.467619603267211e-06,
"loss": 0.7467,
"step": 956
},
{
"epoch": 0.41,
"learning_rate": 4.464702450408402e-06,
"loss": 0.5449,
"step": 960
},
{
"epoch": 0.41,
"learning_rate": 4.461785297549592e-06,
"loss": 0.5699,
"step": 964
},
{
"epoch": 0.41,
"learning_rate": 4.458868144690782e-06,
"loss": 0.5095,
"step": 968
},
{
"epoch": 0.41,
"learning_rate": 4.455950991831972e-06,
"loss": 0.6546,
"step": 972
},
{
"epoch": 0.41,
"learning_rate": 4.453033838973163e-06,
"loss": 0.5868,
"step": 976
},
{
"epoch": 0.42,
"learning_rate": 4.450116686114353e-06,
"loss": 0.7554,
"step": 980
},
{
"epoch": 0.42,
"learning_rate": 4.4471995332555425e-06,
"loss": 0.7272,
"step": 984
},
{
"epoch": 0.42,
"learning_rate": 4.444282380396733e-06,
"loss": 0.5532,
"step": 988
},
{
"epoch": 0.42,
"learning_rate": 4.441365227537923e-06,
"loss": 0.5618,
"step": 992
},
{
"epoch": 0.42,
"learning_rate": 4.438448074679114e-06,
"loss": 0.5518,
"step": 996
},
{
"epoch": 0.43,
"learning_rate": 4.435530921820304e-06,
"loss": 0.7264,
"step": 1000
},
{
"epoch": 0.43,
"learning_rate": 4.432613768961494e-06,
"loss": 0.5429,
"step": 1004
},
{
"epoch": 0.43,
"learning_rate": 4.429696616102684e-06,
"loss": 0.6046,
"step": 1008
},
{
"epoch": 0.43,
"learning_rate": 4.426779463243875e-06,
"loss": 0.9232,
"step": 1012
},
{
"epoch": 0.43,
"learning_rate": 4.4238623103850645e-06,
"loss": 0.5118,
"step": 1016
},
{
"epoch": 0.43,
"learning_rate": 4.4209451575262545e-06,
"loss": 0.643,
"step": 1020
},
{
"epoch": 0.44,
"learning_rate": 4.418028004667445e-06,
"loss": 0.7772,
"step": 1024
},
{
"epoch": 0.44,
"learning_rate": 4.415110851808635e-06,
"loss": 0.3894,
"step": 1028
},
{
"epoch": 0.44,
"learning_rate": 4.412193698949825e-06,
"loss": 0.557,
"step": 1032
},
{
"epoch": 0.44,
"learning_rate": 4.409276546091015e-06,
"loss": 0.6952,
"step": 1036
},
{
"epoch": 0.44,
"learning_rate": 4.406359393232206e-06,
"loss": 0.7862,
"step": 1040
},
{
"epoch": 0.44,
"learning_rate": 4.403442240373396e-06,
"loss": 0.5889,
"step": 1044
},
{
"epoch": 0.45,
"learning_rate": 4.400525087514586e-06,
"loss": 0.8459,
"step": 1048
},
{
"epoch": 0.45,
"learning_rate": 4.3976079346557765e-06,
"loss": 0.4839,
"step": 1052
},
{
"epoch": 0.45,
"learning_rate": 4.3946907817969665e-06,
"loss": 0.5376,
"step": 1056
},
{
"epoch": 0.45,
"learning_rate": 4.391773628938157e-06,
"loss": 0.6901,
"step": 1060
},
{
"epoch": 0.45,
"learning_rate": 4.388856476079346e-06,
"loss": 0.6058,
"step": 1064
},
{
"epoch": 0.45,
"learning_rate": 4.385939323220537e-06,
"loss": 0.7326,
"step": 1068
},
{
"epoch": 0.46,
"learning_rate": 4.383022170361727e-06,
"loss": 0.5892,
"step": 1072
},
{
"epoch": 0.46,
"learning_rate": 4.380105017502918e-06,
"loss": 0.5026,
"step": 1076
},
{
"epoch": 0.46,
"learning_rate": 4.377187864644108e-06,
"loss": 0.6181,
"step": 1080
},
{
"epoch": 0.46,
"learning_rate": 4.374270711785298e-06,
"loss": 0.6994,
"step": 1084
},
{
"epoch": 0.46,
"learning_rate": 4.3713535589264885e-06,
"loss": 0.7436,
"step": 1088
},
{
"epoch": 0.46,
"learning_rate": 4.3684364060676785e-06,
"loss": 0.8489,
"step": 1092
},
{
"epoch": 0.47,
"learning_rate": 4.365519253208868e-06,
"loss": 0.4957,
"step": 1096
},
{
"epoch": 0.47,
"learning_rate": 4.362602100350058e-06,
"loss": 0.5761,
"step": 1100
},
{
"epoch": 0.47,
"learning_rate": 4.359684947491249e-06,
"loss": 0.5296,
"step": 1104
},
{
"epoch": 0.47,
"learning_rate": 4.356767794632439e-06,
"loss": 0.4946,
"step": 1108
},
{
"epoch": 0.47,
"learning_rate": 4.353850641773629e-06,
"loss": 0.5818,
"step": 1112
},
{
"epoch": 0.47,
"learning_rate": 4.35093348891482e-06,
"loss": 0.7388,
"step": 1116
},
{
"epoch": 0.48,
"learning_rate": 4.34801633605601e-06,
"loss": 0.7138,
"step": 1120
},
{
"epoch": 0.48,
"learning_rate": 4.3450991831972e-06,
"loss": 0.5748,
"step": 1124
},
{
"epoch": 0.48,
"learning_rate": 4.34218203033839e-06,
"loss": 0.6169,
"step": 1128
},
{
"epoch": 0.48,
"learning_rate": 4.33926487747958e-06,
"loss": 0.5938,
"step": 1132
},
{
"epoch": 0.48,
"learning_rate": 4.33634772462077e-06,
"loss": 0.6766,
"step": 1136
},
{
"epoch": 0.48,
"learning_rate": 4.333430571761961e-06,
"loss": 0.6961,
"step": 1140
},
{
"epoch": 0.49,
"learning_rate": 4.330513418903151e-06,
"loss": 0.7088,
"step": 1144
},
{
"epoch": 0.49,
"learning_rate": 4.328325554259044e-06,
"loss": 0.7907,
"step": 1148
},
{
"epoch": 0.49,
"learning_rate": 4.325408401400234e-06,
"loss": 0.5619,
"step": 1152
},
{
"epoch": 0.49,
"learning_rate": 4.322491248541424e-06,
"loss": 0.9847,
"step": 1156
},
{
"epoch": 0.49,
"learning_rate": 4.319574095682614e-06,
"loss": 0.5704,
"step": 1160
},
{
"epoch": 0.49,
"learning_rate": 4.316656942823804e-06,
"loss": 0.729,
"step": 1164
},
{
"epoch": 0.5,
"learning_rate": 4.313739789964994e-06,
"loss": 0.492,
"step": 1168
},
{
"epoch": 0.5,
"learning_rate": 4.310822637106184e-06,
"loss": 0.7427,
"step": 1172
},
{
"epoch": 0.5,
"learning_rate": 4.307905484247375e-06,
"loss": 0.5757,
"step": 1176
},
{
"epoch": 0.5,
"learning_rate": 4.304988331388565e-06,
"loss": 0.5469,
"step": 1180
},
{
"epoch": 0.5,
"learning_rate": 4.302071178529756e-06,
"loss": 0.605,
"step": 1184
},
{
"epoch": 0.51,
"learning_rate": 4.299154025670946e-06,
"loss": 0.4903,
"step": 1188
},
{
"epoch": 0.51,
"learning_rate": 4.296236872812136e-06,
"loss": 0.4479,
"step": 1192
},
{
"epoch": 0.51,
"learning_rate": 4.293319719953326e-06,
"loss": 0.5118,
"step": 1196
},
{
"epoch": 0.51,
"learning_rate": 4.290402567094516e-06,
"loss": 0.5867,
"step": 1200
},
{
"epoch": 0.51,
"learning_rate": 4.287485414235706e-06,
"loss": 0.499,
"step": 1204
},
{
"epoch": 0.51,
"learning_rate": 4.284568261376896e-06,
"loss": 0.5443,
"step": 1208
},
{
"epoch": 0.52,
"learning_rate": 4.281651108518087e-06,
"loss": 0.7138,
"step": 1212
},
{
"epoch": 0.52,
"learning_rate": 4.278733955659277e-06,
"loss": 0.7214,
"step": 1216
},
{
"epoch": 0.52,
"learning_rate": 4.275816802800467e-06,
"loss": 0.4798,
"step": 1220
},
{
"epoch": 0.52,
"learning_rate": 4.272899649941658e-06,
"loss": 0.6752,
"step": 1224
},
{
"epoch": 0.52,
"learning_rate": 4.2699824970828476e-06,
"loss": 0.4885,
"step": 1228
},
{
"epoch": 0.52,
"learning_rate": 4.2670653442240375e-06,
"loss": 0.3946,
"step": 1232
},
{
"epoch": 0.53,
"learning_rate": 4.2641481913652275e-06,
"loss": 0.6857,
"step": 1236
},
{
"epoch": 0.53,
"learning_rate": 4.261231038506418e-06,
"loss": 0.5198,
"step": 1240
},
{
"epoch": 0.53,
"learning_rate": 4.258313885647608e-06,
"loss": 0.7563,
"step": 1244
},
{
"epoch": 0.53,
"learning_rate": 4.255396732788798e-06,
"loss": 0.6442,
"step": 1248
},
{
"epoch": 0.53,
"learning_rate": 4.252479579929989e-06,
"loss": 0.539,
"step": 1252
},
{
"epoch": 0.53,
"learning_rate": 4.249562427071179e-06,
"loss": 0.53,
"step": 1256
},
{
"epoch": 0.54,
"learning_rate": 4.24664527421237e-06,
"loss": 0.5773,
"step": 1260
},
{
"epoch": 0.54,
"learning_rate": 4.2437281213535596e-06,
"loss": 0.5297,
"step": 1264
},
{
"epoch": 0.54,
"learning_rate": 4.2408109684947495e-06,
"loss": 0.7455,
"step": 1268
},
{
"epoch": 0.54,
"learning_rate": 4.2378938156359394e-06,
"loss": 0.3915,
"step": 1272
},
{
"epoch": 0.54,
"learning_rate": 4.23497666277713e-06,
"loss": 0.7302,
"step": 1276
},
{
"epoch": 0.54,
"learning_rate": 4.23205950991832e-06,
"loss": 0.5329,
"step": 1280
},
{
"epoch": 0.55,
"learning_rate": 4.22914235705951e-06,
"loss": 0.7163,
"step": 1284
},
{
"epoch": 0.55,
"learning_rate": 4.226225204200701e-06,
"loss": 0.5996,
"step": 1288
},
{
"epoch": 0.55,
"learning_rate": 4.223308051341891e-06,
"loss": 0.6922,
"step": 1292
},
{
"epoch": 0.55,
"learning_rate": 4.220390898483081e-06,
"loss": 0.7819,
"step": 1296
},
{
"epoch": 0.55,
"learning_rate": 4.217473745624271e-06,
"loss": 0.4539,
"step": 1300
},
{
"epoch": 0.55,
"learning_rate": 4.2145565927654615e-06,
"loss": 0.6079,
"step": 1304
},
{
"epoch": 0.56,
"learning_rate": 4.2116394399066514e-06,
"loss": 0.4377,
"step": 1308
},
{
"epoch": 0.56,
"learning_rate": 4.208722287047841e-06,
"loss": 0.4358,
"step": 1312
},
{
"epoch": 0.56,
"learning_rate": 4.205805134189031e-06,
"loss": 0.6401,
"step": 1316
},
{
"epoch": 0.56,
"learning_rate": 4.202887981330222e-06,
"loss": 0.6677,
"step": 1320
},
{
"epoch": 0.56,
"learning_rate": 4.199970828471413e-06,
"loss": 0.572,
"step": 1324
},
{
"epoch": 0.56,
"learning_rate": 4.197053675612602e-06,
"loss": 0.5973,
"step": 1328
},
{
"epoch": 0.57,
"learning_rate": 4.194136522753793e-06,
"loss": 0.679,
"step": 1332
},
{
"epoch": 0.57,
"learning_rate": 4.191219369894983e-06,
"loss": 0.5599,
"step": 1336
},
{
"epoch": 0.57,
"learning_rate": 4.1883022170361735e-06,
"loss": 0.5171,
"step": 1340
},
{
"epoch": 0.57,
"learning_rate": 4.185385064177363e-06,
"loss": 0.569,
"step": 1344
},
{
"epoch": 0.57,
"learning_rate": 4.182467911318553e-06,
"loss": 0.9033,
"step": 1348
},
{
"epoch": 0.57,
"learning_rate": 4.179550758459744e-06,
"loss": 0.8082,
"step": 1352
},
{
"epoch": 0.58,
"learning_rate": 4.176633605600934e-06,
"loss": 0.6142,
"step": 1356
},
{
"epoch": 0.58,
"learning_rate": 4.173716452742124e-06,
"loss": 0.4488,
"step": 1360
},
{
"epoch": 0.58,
"learning_rate": 4.170799299883314e-06,
"loss": 0.5348,
"step": 1364
},
{
"epoch": 0.58,
"learning_rate": 4.167882147024505e-06,
"loss": 0.3747,
"step": 1368
},
{
"epoch": 0.58,
"learning_rate": 4.164964994165695e-06,
"loss": 0.4663,
"step": 1372
},
{
"epoch": 0.59,
"learning_rate": 4.162047841306885e-06,
"loss": 0.5914,
"step": 1376
},
{
"epoch": 0.59,
"learning_rate": 4.1591306884480746e-06,
"loss": 0.4916,
"step": 1380
},
{
"epoch": 0.59,
"learning_rate": 4.156213535589265e-06,
"loss": 0.4981,
"step": 1384
},
{
"epoch": 0.59,
"learning_rate": 4.153296382730455e-06,
"loss": 0.6886,
"step": 1388
},
{
"epoch": 0.59,
"learning_rate": 4.150379229871645e-06,
"loss": 0.7889,
"step": 1392
},
{
"epoch": 0.59,
"learning_rate": 4.147462077012836e-06,
"loss": 0.4762,
"step": 1396
},
{
"epoch": 0.6,
"learning_rate": 4.144544924154026e-06,
"loss": 0.6236,
"step": 1400
},
{
"epoch": 0.6,
"learning_rate": 4.141627771295217e-06,
"loss": 0.4979,
"step": 1404
},
{
"epoch": 0.6,
"learning_rate": 4.138710618436406e-06,
"loss": 0.6086,
"step": 1408
},
{
"epoch": 0.6,
"learning_rate": 4.135793465577597e-06,
"loss": 0.4375,
"step": 1412
},
{
"epoch": 0.6,
"learning_rate": 4.1328763127187866e-06,
"loss": 0.6866,
"step": 1416
},
{
"epoch": 0.6,
"learning_rate": 4.129959159859977e-06,
"loss": 0.6476,
"step": 1420
},
{
"epoch": 0.61,
"learning_rate": 4.127042007001167e-06,
"loss": 0.5982,
"step": 1424
},
{
"epoch": 0.61,
"learning_rate": 4.124124854142357e-06,
"loss": 0.7084,
"step": 1428
},
{
"epoch": 0.61,
"learning_rate": 4.121207701283548e-06,
"loss": 0.6747,
"step": 1432
},
{
"epoch": 0.61,
"learning_rate": 4.118290548424738e-06,
"loss": 0.5161,
"step": 1436
},
{
"epoch": 0.61,
"learning_rate": 4.115373395565928e-06,
"loss": 0.4477,
"step": 1440
},
{
"epoch": 0.61,
"learning_rate": 4.112456242707118e-06,
"loss": 0.4698,
"step": 1444
},
{
"epoch": 0.62,
"learning_rate": 4.109539089848309e-06,
"loss": 0.3782,
"step": 1448
},
{
"epoch": 0.62,
"learning_rate": 4.1066219369894985e-06,
"loss": 0.5897,
"step": 1452
},
{
"epoch": 0.62,
"learning_rate": 4.1037047841306885e-06,
"loss": 0.6186,
"step": 1456
},
{
"epoch": 0.62,
"learning_rate": 4.100787631271879e-06,
"loss": 0.6515,
"step": 1460
},
{
"epoch": 0.62,
"learning_rate": 4.097870478413069e-06,
"loss": 0.7318,
"step": 1464
},
{
"epoch": 0.62,
"learning_rate": 4.094953325554259e-06,
"loss": 0.4301,
"step": 1468
},
{
"epoch": 0.63,
"learning_rate": 4.092036172695449e-06,
"loss": 0.587,
"step": 1472
},
{
"epoch": 0.63,
"learning_rate": 4.08911901983664e-06,
"loss": 0.4525,
"step": 1476
},
{
"epoch": 0.63,
"learning_rate": 4.08620186697783e-06,
"loss": 0.7056,
"step": 1480
},
{
"epoch": 0.63,
"learning_rate": 4.08328471411902e-06,
"loss": 0.497,
"step": 1484
},
{
"epoch": 0.63,
"learning_rate": 4.0803675612602105e-06,
"loss": 0.6571,
"step": 1488
},
{
"epoch": 0.63,
"learning_rate": 4.0774504084014005e-06,
"loss": 0.5894,
"step": 1492
},
{
"epoch": 0.64,
"learning_rate": 4.074533255542591e-06,
"loss": 0.5912,
"step": 1496
},
{
"epoch": 0.64,
"learning_rate": 4.071616102683781e-06,
"loss": 0.5286,
"step": 1500
},
{
"epoch": 0.64,
"learning_rate": 4.068698949824971e-06,
"loss": 0.5107,
"step": 1504
},
{
"epoch": 0.64,
"learning_rate": 4.065781796966161e-06,
"loss": 0.4842,
"step": 1508
},
{
"epoch": 0.64,
"learning_rate": 4.062864644107352e-06,
"loss": 0.4813,
"step": 1512
},
{
"epoch": 0.64,
"learning_rate": 4.059947491248542e-06,
"loss": 0.4813,
"step": 1516
},
{
"epoch": 0.65,
"learning_rate": 4.057030338389732e-06,
"loss": 0.575,
"step": 1520
},
{
"epoch": 0.65,
"learning_rate": 4.0541131855309225e-06,
"loss": 0.5535,
"step": 1524
},
{
"epoch": 0.65,
"learning_rate": 4.0511960326721125e-06,
"loss": 0.5936,
"step": 1528
},
{
"epoch": 0.65,
"learning_rate": 4.048278879813302e-06,
"loss": 0.691,
"step": 1532
},
{
"epoch": 0.65,
"learning_rate": 4.045361726954492e-06,
"loss": 0.4385,
"step": 1536
},
{
"epoch": 0.65,
"learning_rate": 4.042444574095683e-06,
"loss": 0.5595,
"step": 1540
},
{
"epoch": 0.66,
"learning_rate": 4.039527421236873e-06,
"loss": 0.5647,
"step": 1544
},
{
"epoch": 0.66,
"learning_rate": 4.036610268378063e-06,
"loss": 0.508,
"step": 1548
},
{
"epoch": 0.66,
"learning_rate": 4.033693115519254e-06,
"loss": 0.4794,
"step": 1552
},
{
"epoch": 0.66,
"learning_rate": 4.030775962660444e-06,
"loss": 0.5662,
"step": 1556
},
{
"epoch": 0.66,
"learning_rate": 4.0278588098016345e-06,
"loss": 0.6627,
"step": 1560
},
{
"epoch": 0.66,
"learning_rate": 4.024941656942824e-06,
"loss": 0.3683,
"step": 1564
},
{
"epoch": 0.67,
"learning_rate": 4.022024504084014e-06,
"loss": 0.6034,
"step": 1568
},
{
"epoch": 0.67,
"learning_rate": 4.019107351225204e-06,
"loss": 0.3611,
"step": 1572
},
{
"epoch": 0.67,
"learning_rate": 4.016190198366395e-06,
"loss": 0.639,
"step": 1576
},
{
"epoch": 0.67,
"learning_rate": 4.013273045507585e-06,
"loss": 0.5472,
"step": 1580
},
{
"epoch": 0.67,
"learning_rate": 4.010355892648775e-06,
"loss": 0.4576,
"step": 1584
},
{
"epoch": 0.68,
"learning_rate": 4.007438739789966e-06,
"loss": 0.5035,
"step": 1588
},
{
"epoch": 0.68,
"learning_rate": 4.004521586931156e-06,
"loss": 0.8007,
"step": 1592
},
{
"epoch": 0.68,
"learning_rate": 4.001604434072346e-06,
"loss": 0.6807,
"step": 1596
},
{
"epoch": 0.68,
"learning_rate": 3.998687281213536e-06,
"loss": 0.5574,
"step": 1600
},
{
"epoch": 0.68,
"learning_rate": 3.995770128354726e-06,
"loss": 0.5141,
"step": 1604
},
{
"epoch": 0.68,
"learning_rate": 3.992852975495916e-06,
"loss": 0.4496,
"step": 1608
},
{
"epoch": 0.69,
"learning_rate": 3.989935822637106e-06,
"loss": 0.7541,
"step": 1612
},
{
"epoch": 0.69,
"learning_rate": 3.987018669778296e-06,
"loss": 0.6673,
"step": 1616
},
{
"epoch": 0.69,
"learning_rate": 3.984101516919487e-06,
"loss": 0.6613,
"step": 1620
},
{
"epoch": 0.69,
"learning_rate": 3.981184364060677e-06,
"loss": 0.7404,
"step": 1624
},
{
"epoch": 0.69,
"learning_rate": 3.978267211201867e-06,
"loss": 0.5234,
"step": 1628
},
{
"epoch": 0.69,
"learning_rate": 3.975350058343058e-06,
"loss": 0.608,
"step": 1632
},
{
"epoch": 0.7,
"learning_rate": 3.972432905484248e-06,
"loss": 0.6614,
"step": 1636
},
{
"epoch": 0.7,
"learning_rate": 3.969515752625438e-06,
"loss": 0.4633,
"step": 1640
},
{
"epoch": 0.7,
"learning_rate": 3.9665985997666275e-06,
"loss": 0.5372,
"step": 1644
},
{
"epoch": 0.7,
"learning_rate": 3.963681446907818e-06,
"loss": 0.5298,
"step": 1648
},
{
"epoch": 0.7,
"learning_rate": 3.960764294049009e-06,
"loss": 0.5283,
"step": 1652
},
{
"epoch": 0.7,
"learning_rate": 3.957847141190199e-06,
"loss": 0.3638,
"step": 1656
},
{
"epoch": 0.71,
"learning_rate": 3.954929988331389e-06,
"loss": 0.3947,
"step": 1660
},
{
"epoch": 0.71,
"learning_rate": 3.952012835472579e-06,
"loss": 0.4943,
"step": 1664
},
{
"epoch": 0.71,
"learning_rate": 3.94909568261377e-06,
"loss": 0.5024,
"step": 1668
},
{
"epoch": 0.71,
"learning_rate": 3.9461785297549596e-06,
"loss": 0.6821,
"step": 1672
},
{
"epoch": 0.71,
"learning_rate": 3.9432613768961495e-06,
"loss": 0.4511,
"step": 1676
},
{
"epoch": 0.71,
"learning_rate": 3.9403442240373394e-06,
"loss": 0.4767,
"step": 1680
},
{
"epoch": 0.72,
"learning_rate": 3.93742707117853e-06,
"loss": 0.5588,
"step": 1684
},
{
"epoch": 0.72,
"learning_rate": 3.93450991831972e-06,
"loss": 0.7316,
"step": 1688
},
{
"epoch": 0.72,
"learning_rate": 3.93159276546091e-06,
"loss": 0.3692,
"step": 1692
},
{
"epoch": 0.72,
"learning_rate": 3.928675612602101e-06,
"loss": 0.7381,
"step": 1696
},
{
"epoch": 0.72,
"learning_rate": 3.925758459743291e-06,
"loss": 0.7333,
"step": 1700
},
{
"epoch": 0.72,
"learning_rate": 3.922841306884481e-06,
"loss": 0.5729,
"step": 1704
},
{
"epoch": 0.73,
"learning_rate": 3.919924154025671e-06,
"loss": 0.5423,
"step": 1708
},
{
"epoch": 0.73,
"learning_rate": 3.9170070011668615e-06,
"loss": 0.35,
"step": 1712
},
{
"epoch": 0.73,
"learning_rate": 3.9140898483080514e-06,
"loss": 0.3955,
"step": 1716
},
{
"epoch": 0.73,
"learning_rate": 3.911172695449242e-06,
"loss": 0.713,
"step": 1720
},
{
"epoch": 0.73,
"learning_rate": 3.908255542590432e-06,
"loss": 0.5724,
"step": 1724
},
{
"epoch": 0.73,
"learning_rate": 3.905338389731622e-06,
"loss": 0.5339,
"step": 1728
},
{
"epoch": 0.74,
"learning_rate": 3.902421236872813e-06,
"loss": 0.372,
"step": 1732
},
{
"epoch": 0.74,
"learning_rate": 3.899504084014003e-06,
"loss": 0.6898,
"step": 1736
},
{
"epoch": 0.74,
"learning_rate": 3.896586931155193e-06,
"loss": 0.6381,
"step": 1740
},
{
"epoch": 0.74,
"learning_rate": 3.893669778296383e-06,
"loss": 0.4658,
"step": 1744
},
{
"epoch": 0.74,
"learning_rate": 3.8907526254375735e-06,
"loss": 0.6277,
"step": 1748
},
{
"epoch": 0.74,
"learning_rate": 3.887835472578763e-06,
"loss": 0.476,
"step": 1752
},
{
"epoch": 0.75,
"learning_rate": 3.884918319719953e-06,
"loss": 0.5812,
"step": 1756
},
{
"epoch": 0.75,
"learning_rate": 3.882001166861144e-06,
"loss": 0.3055,
"step": 1760
},
{
"epoch": 0.75,
"learning_rate": 3.879084014002334e-06,
"loss": 0.7002,
"step": 1764
},
{
"epoch": 0.75,
"learning_rate": 3.876166861143524e-06,
"loss": 0.4782,
"step": 1768
},
{
"epoch": 0.75,
"learning_rate": 3.873249708284714e-06,
"loss": 0.4645,
"step": 1772
},
{
"epoch": 0.76,
"learning_rate": 3.870332555425905e-06,
"loss": 0.506,
"step": 1776
},
{
"epoch": 0.76,
"learning_rate": 3.867415402567095e-06,
"loss": 0.5115,
"step": 1780
},
{
"epoch": 0.76,
"learning_rate": 3.864498249708285e-06,
"loss": 0.5903,
"step": 1784
},
{
"epoch": 0.76,
"learning_rate": 3.861581096849475e-06,
"loss": 0.555,
"step": 1788
},
{
"epoch": 0.76,
"learning_rate": 3.858663943990665e-06,
"loss": 0.6398,
"step": 1792
},
{
"epoch": 0.76,
"learning_rate": 3.855746791131856e-06,
"loss": 0.5431,
"step": 1796
},
{
"epoch": 0.77,
"learning_rate": 3.852829638273046e-06,
"loss": 0.7979,
"step": 1800
},
{
"epoch": 0.77,
"learning_rate": 3.849912485414236e-06,
"loss": 0.3846,
"step": 1804
},
{
"epoch": 0.77,
"learning_rate": 3.846995332555426e-06,
"loss": 0.4568,
"step": 1808
},
{
"epoch": 0.77,
"learning_rate": 3.844078179696617e-06,
"loss": 0.7126,
"step": 1812
},
{
"epoch": 0.77,
"learning_rate": 3.841161026837807e-06,
"loss": 0.6972,
"step": 1816
},
{
"epoch": 0.77,
"learning_rate": 3.838243873978997e-06,
"loss": 0.495,
"step": 1820
},
{
"epoch": 0.78,
"learning_rate": 3.835326721120187e-06,
"loss": 0.5843,
"step": 1824
},
{
"epoch": 0.78,
"learning_rate": 3.832409568261377e-06,
"loss": 0.8,
"step": 1828
},
{
"epoch": 0.78,
"learning_rate": 3.829492415402567e-06,
"loss": 0.6066,
"step": 1832
},
{
"epoch": 0.78,
"learning_rate": 3.826575262543757e-06,
"loss": 0.5371,
"step": 1836
},
{
"epoch": 0.78,
"learning_rate": 3.823658109684948e-06,
"loss": 0.4662,
"step": 1840
},
{
"epoch": 0.78,
"learning_rate": 3.820740956826138e-06,
"loss": 0.4733,
"step": 1844
},
{
"epoch": 0.79,
"learning_rate": 3.817823803967328e-06,
"loss": 0.6339,
"step": 1848
},
{
"epoch": 0.79,
"learning_rate": 3.814906651108519e-06,
"loss": 0.5074,
"step": 1852
},
{
"epoch": 0.79,
"learning_rate": 3.8119894982497086e-06,
"loss": 0.7012,
"step": 1856
},
{
"epoch": 0.79,
"learning_rate": 3.809072345390899e-06,
"loss": 0.3957,
"step": 1860
},
{
"epoch": 0.79,
"learning_rate": 3.806155192532089e-06,
"loss": 0.4838,
"step": 1864
},
{
"epoch": 0.79,
"learning_rate": 3.8032380396732793e-06,
"loss": 0.4403,
"step": 1868
},
{
"epoch": 0.8,
"learning_rate": 3.800320886814469e-06,
"loss": 0.4519,
"step": 1872
},
{
"epoch": 0.8,
"learning_rate": 3.7974037339556596e-06,
"loss": 0.4998,
"step": 1876
},
{
"epoch": 0.8,
"learning_rate": 3.7944865810968495e-06,
"loss": 0.3915,
"step": 1880
},
{
"epoch": 0.8,
"learning_rate": 3.79156942823804e-06,
"loss": 0.4722,
"step": 1884
},
{
"epoch": 0.8,
"learning_rate": 3.7886522753792302e-06,
"loss": 0.4377,
"step": 1888
},
{
"epoch": 0.8,
"learning_rate": 3.78573512252042e-06,
"loss": 0.4053,
"step": 1892
},
{
"epoch": 0.81,
"learning_rate": 3.782817969661611e-06,
"loss": 0.6229,
"step": 1896
},
{
"epoch": 0.81,
"learning_rate": 3.7799008168028005e-06,
"loss": 0.573,
"step": 1900
},
{
"epoch": 0.81,
"learning_rate": 3.7769836639439913e-06,
"loss": 0.6595,
"step": 1904
},
{
"epoch": 0.81,
"learning_rate": 3.7740665110851808e-06,
"loss": 0.6739,
"step": 1908
},
{
"epoch": 0.81,
"learning_rate": 3.7711493582263716e-06,
"loss": 0.5746,
"step": 1912
},
{
"epoch": 0.81,
"learning_rate": 3.7682322053675615e-06,
"loss": 0.5315,
"step": 1916
},
{
"epoch": 0.82,
"learning_rate": 3.765315052508752e-06,
"loss": 0.545,
"step": 1920
},
{
"epoch": 0.82,
"learning_rate": 3.7623978996499422e-06,
"loss": 0.5491,
"step": 1924
},
{
"epoch": 0.82,
"learning_rate": 3.759480746791132e-06,
"loss": 0.4616,
"step": 1928
},
{
"epoch": 0.82,
"learning_rate": 3.7565635939323225e-06,
"loss": 0.5943,
"step": 1932
},
{
"epoch": 0.82,
"learning_rate": 3.7536464410735125e-06,
"loss": 0.7848,
"step": 1936
},
{
"epoch": 0.82,
"learning_rate": 3.750729288214703e-06,
"loss": 0.617,
"step": 1940
},
{
"epoch": 0.83,
"learning_rate": 3.7478121353558928e-06,
"loss": 0.469,
"step": 1944
},
{
"epoch": 0.83,
"learning_rate": 3.744894982497083e-06,
"loss": 0.3749,
"step": 1948
},
{
"epoch": 0.83,
"learning_rate": 3.7419778296382735e-06,
"loss": 0.4504,
"step": 1952
},
{
"epoch": 0.83,
"learning_rate": 3.7390606767794634e-06,
"loss": 0.5496,
"step": 1956
},
{
"epoch": 0.83,
"learning_rate": 3.7361435239206538e-06,
"loss": 0.5946,
"step": 1960
},
{
"epoch": 0.84,
"learning_rate": 3.7332263710618437e-06,
"loss": 0.5993,
"step": 1964
},
{
"epoch": 0.84,
"learning_rate": 3.730309218203034e-06,
"loss": 0.5006,
"step": 1968
},
{
"epoch": 0.84,
"learning_rate": 3.727392065344224e-06,
"loss": 0.5133,
"step": 1972
},
{
"epoch": 0.84,
"learning_rate": 3.7244749124854144e-06,
"loss": 0.5787,
"step": 1976
},
{
"epoch": 0.84,
"learning_rate": 3.7215577596266043e-06,
"loss": 0.3984,
"step": 1980
},
{
"epoch": 0.84,
"learning_rate": 3.718640606767795e-06,
"loss": 0.4427,
"step": 1984
},
{
"epoch": 0.85,
"learning_rate": 3.7157234539089855e-06,
"loss": 0.7057,
"step": 1988
},
{
"epoch": 0.85,
"learning_rate": 3.7128063010501754e-06,
"loss": 0.5082,
"step": 1992
},
{
"epoch": 0.85,
"learning_rate": 3.7098891481913658e-06,
"loss": 0.4868,
"step": 1996
},
{
"epoch": 0.85,
"learning_rate": 3.7069719953325557e-06,
"loss": 0.5882,
"step": 2000
},
{
"epoch": 0.85,
"learning_rate": 3.704054842473746e-06,
"loss": 0.5969,
"step": 2004
},
{
"epoch": 0.85,
"learning_rate": 3.701137689614936e-06,
"loss": 0.4813,
"step": 2008
},
{
"epoch": 0.86,
"learning_rate": 3.6982205367561264e-06,
"loss": 0.4511,
"step": 2012
},
{
"epoch": 0.86,
"learning_rate": 3.6953033838973167e-06,
"loss": 0.5281,
"step": 2016
},
{
"epoch": 0.86,
"learning_rate": 3.6923862310385067e-06,
"loss": 0.5461,
"step": 2020
},
{
"epoch": 0.86,
"learning_rate": 3.689469078179697e-06,
"loss": 0.5653,
"step": 2024
},
{
"epoch": 0.86,
"learning_rate": 3.686551925320887e-06,
"loss": 0.5701,
"step": 2028
},
{
"epoch": 0.86,
"learning_rate": 3.6836347724620773e-06,
"loss": 0.5516,
"step": 2032
},
{
"epoch": 0.87,
"learning_rate": 3.6807176196032673e-06,
"loss": 0.6763,
"step": 2036
},
{
"epoch": 0.87,
"learning_rate": 3.6778004667444576e-06,
"loss": 0.5188,
"step": 2040
},
{
"epoch": 0.87,
"learning_rate": 3.6748833138856476e-06,
"loss": 0.6355,
"step": 2044
},
{
"epoch": 0.87,
"learning_rate": 3.671966161026838e-06,
"loss": 0.3786,
"step": 2048
},
{
"epoch": 0.87,
"learning_rate": 3.6690490081680287e-06,
"loss": 0.4538,
"step": 2052
},
{
"epoch": 0.87,
"learning_rate": 3.6661318553092182e-06,
"loss": 0.4956,
"step": 2056
},
{
"epoch": 0.88,
"learning_rate": 3.663214702450409e-06,
"loss": 0.4748,
"step": 2060
},
{
"epoch": 0.88,
"learning_rate": 3.660297549591599e-06,
"loss": 0.5572,
"step": 2064
},
{
"epoch": 0.88,
"learning_rate": 3.6573803967327893e-06,
"loss": 0.5476,
"step": 2068
},
{
"epoch": 0.88,
"learning_rate": 3.6544632438739793e-06,
"loss": 0.6124,
"step": 2072
},
{
"epoch": 0.88,
"learning_rate": 3.6515460910151696e-06,
"loss": 0.6467,
"step": 2076
},
{
"epoch": 0.88,
"learning_rate": 3.6486289381563596e-06,
"loss": 0.5917,
"step": 2080
},
{
"epoch": 0.89,
"learning_rate": 3.64571178529755e-06,
"loss": 0.4462,
"step": 2084
},
{
"epoch": 0.89,
"learning_rate": 3.6427946324387403e-06,
"loss": 0.5173,
"step": 2088
},
{
"epoch": 0.89,
"learning_rate": 3.6398774795799302e-06,
"loss": 0.5402,
"step": 2092
},
{
"epoch": 0.89,
"learning_rate": 3.6369603267211206e-06,
"loss": 0.3434,
"step": 2096
},
{
"epoch": 0.89,
"learning_rate": 3.6340431738623105e-06,
"loss": 0.3308,
"step": 2100
},
{
"epoch": 0.89,
"learning_rate": 3.631126021003501e-06,
"loss": 0.4462,
"step": 2104
},
{
"epoch": 0.9,
"learning_rate": 3.628208868144691e-06,
"loss": 0.4822,
"step": 2108
},
{
"epoch": 0.9,
"learning_rate": 3.625291715285881e-06,
"loss": 0.4612,
"step": 2112
},
{
"epoch": 0.9,
"learning_rate": 3.6223745624270716e-06,
"loss": 0.5462,
"step": 2116
},
{
"epoch": 0.9,
"learning_rate": 3.6194574095682615e-06,
"loss": 0.6326,
"step": 2120
},
{
"epoch": 0.9,
"learning_rate": 3.6165402567094523e-06,
"loss": 0.664,
"step": 2124
},
{
"epoch": 0.9,
"learning_rate": 3.613623103850642e-06,
"loss": 0.4564,
"step": 2128
},
{
"epoch": 0.91,
"learning_rate": 3.6107059509918326e-06,
"loss": 0.4078,
"step": 2132
},
{
"epoch": 0.91,
"learning_rate": 3.607788798133022e-06,
"loss": 0.3981,
"step": 2136
},
{
"epoch": 0.91,
"learning_rate": 3.604871645274213e-06,
"loss": 0.5809,
"step": 2140
},
{
"epoch": 0.91,
"learning_rate": 3.601954492415403e-06,
"loss": 0.3539,
"step": 2144
},
{
"epoch": 0.91,
"learning_rate": 3.599037339556593e-06,
"loss": 0.4753,
"step": 2148
},
{
"epoch": 0.91,
"learning_rate": 3.5961201866977835e-06,
"loss": 0.4232,
"step": 2152
},
{
"epoch": 0.92,
"learning_rate": 3.5932030338389735e-06,
"loss": 0.5864,
"step": 2156
},
{
"epoch": 0.92,
"learning_rate": 3.590285880980164e-06,
"loss": 0.5046,
"step": 2160
},
{
"epoch": 0.92,
"learning_rate": 3.5873687281213538e-06,
"loss": 0.711,
"step": 2164
},
{
"epoch": 0.92,
"learning_rate": 3.584451575262544e-06,
"loss": 0.3999,
"step": 2168
},
{
"epoch": 0.92,
"learning_rate": 3.581534422403734e-06,
"loss": 0.4033,
"step": 2172
},
{
"epoch": 0.93,
"learning_rate": 3.5786172695449245e-06,
"loss": 0.3923,
"step": 2176
},
{
"epoch": 0.93,
"learning_rate": 3.5757001166861144e-06,
"loss": 0.4749,
"step": 2180
},
{
"epoch": 0.93,
"learning_rate": 3.5727829638273048e-06,
"loss": 0.5808,
"step": 2184
},
{
"epoch": 0.93,
"learning_rate": 3.569865810968495e-06,
"loss": 0.5079,
"step": 2188
},
{
"epoch": 0.93,
"learning_rate": 3.566948658109685e-06,
"loss": 0.6254,
"step": 2192
},
{
"epoch": 0.93,
"learning_rate": 3.5640315052508754e-06,
"loss": 0.5132,
"step": 2196
},
{
"epoch": 0.94,
"learning_rate": 3.5611143523920654e-06,
"loss": 0.3979,
"step": 2200
},
{
"epoch": 0.94,
"learning_rate": 3.558197199533256e-06,
"loss": 0.5037,
"step": 2204
},
{
"epoch": 0.94,
"learning_rate": 3.5552800466744457e-06,
"loss": 0.4465,
"step": 2208
},
{
"epoch": 0.94,
"learning_rate": 3.5523628938156364e-06,
"loss": 0.5237,
"step": 2212
},
{
"epoch": 0.94,
"learning_rate": 3.549445740956827e-06,
"loss": 0.4848,
"step": 2216
},
{
"epoch": 0.94,
"learning_rate": 3.5465285880980167e-06,
"loss": 0.6196,
"step": 2220
},
{
"epoch": 0.95,
"learning_rate": 3.543611435239207e-06,
"loss": 0.5312,
"step": 2224
},
{
"epoch": 0.95,
"learning_rate": 3.540694282380397e-06,
"loss": 0.7711,
"step": 2228
},
{
"epoch": 0.95,
"learning_rate": 3.5377771295215874e-06,
"loss": 0.3885,
"step": 2232
},
{
"epoch": 0.95,
"learning_rate": 3.5348599766627773e-06,
"loss": 0.4993,
"step": 2236
},
{
"epoch": 0.95,
"learning_rate": 3.5319428238039677e-06,
"loss": 0.6159,
"step": 2240
},
{
"epoch": 0.95,
"learning_rate": 3.5290256709451576e-06,
"loss": 0.5108,
"step": 2244
},
{
"epoch": 0.96,
"learning_rate": 3.526108518086348e-06,
"loss": 0.4773,
"step": 2248
},
{
"epoch": 0.96,
"learning_rate": 3.5231913652275384e-06,
"loss": 0.4661,
"step": 2252
},
{
"epoch": 0.96,
"learning_rate": 3.5202742123687283e-06,
"loss": 0.4629,
"step": 2256
},
{
"epoch": 0.96,
"learning_rate": 3.5173570595099187e-06,
"loss": 0.587,
"step": 2260
},
{
"epoch": 0.96,
"learning_rate": 3.5144399066511086e-06,
"loss": 0.4012,
"step": 2264
},
{
"epoch": 0.96,
"learning_rate": 3.511522753792299e-06,
"loss": 0.6225,
"step": 2268
},
{
"epoch": 0.97,
"learning_rate": 3.508605600933489e-06,
"loss": 0.5934,
"step": 2272
},
{
"epoch": 0.97,
"learning_rate": 3.5056884480746793e-06,
"loss": 0.5112,
"step": 2276
},
{
"epoch": 0.97,
"learning_rate": 3.502771295215869e-06,
"loss": 0.6217,
"step": 2280
},
{
"epoch": 0.97,
"learning_rate": 3.4998541423570596e-06,
"loss": 0.7376,
"step": 2284
},
{
"epoch": 0.97,
"learning_rate": 3.4969369894982504e-06,
"loss": 0.4367,
"step": 2288
},
{
"epoch": 0.97,
"learning_rate": 3.4940198366394403e-06,
"loss": 0.4153,
"step": 2292
},
{
"epoch": 0.98,
"learning_rate": 3.4911026837806307e-06,
"loss": 0.563,
"step": 2296
},
{
"epoch": 0.98,
"learning_rate": 3.4881855309218206e-06,
"loss": 0.4106,
"step": 2300
},
{
"epoch": 0.98,
"learning_rate": 3.485268378063011e-06,
"loss": 0.6708,
"step": 2304
},
{
"epoch": 0.98,
"learning_rate": 3.482351225204201e-06,
"loss": 0.4741,
"step": 2308
},
{
"epoch": 0.98,
"learning_rate": 3.4794340723453913e-06,
"loss": 0.6038,
"step": 2312
},
{
"epoch": 0.98,
"learning_rate": 3.4765169194865816e-06,
"loss": 0.3982,
"step": 2316
},
{
"epoch": 0.99,
"learning_rate": 3.4735997666277716e-06,
"loss": 0.7745,
"step": 2320
},
{
"epoch": 0.99,
"learning_rate": 3.470682613768962e-06,
"loss": 0.5016,
"step": 2324
},
{
"epoch": 0.99,
"learning_rate": 3.467765460910152e-06,
"loss": 0.3199,
"step": 2328
},
{
"epoch": 0.99,
"learning_rate": 3.4648483080513422e-06,
"loss": 0.5832,
"step": 2332
},
{
"epoch": 0.99,
"learning_rate": 3.461931155192532e-06,
"loss": 0.4076,
"step": 2336
},
{
"epoch": 0.99,
"learning_rate": 3.4590140023337225e-06,
"loss": 0.5615,
"step": 2340
},
{
"epoch": 1.0,
"learning_rate": 3.4560968494749125e-06,
"loss": 0.3989,
"step": 2344
},
{
"epoch": 1.0,
"learning_rate": 3.453179696616103e-06,
"loss": 0.4901,
"step": 2348
},
{
"epoch": 1.0,
"learning_rate": 3.4502625437572936e-06,
"loss": 0.4952,
"step": 2352
},
{
"epoch": 1.0,
"learning_rate": 3.447345390898483e-06,
"loss": 0.5235,
"step": 2356
},
{
"epoch": 1.0,
"learning_rate": 3.444428238039674e-06,
"loss": 0.3815,
"step": 2360
},
{
"epoch": 1.01,
"learning_rate": 3.4415110851808634e-06,
"loss": 0.5771,
"step": 2364
},
{
"epoch": 1.01,
"learning_rate": 3.4385939323220542e-06,
"loss": 0.7008,
"step": 2368
},
{
"epoch": 1.01,
"learning_rate": 3.435676779463244e-06,
"loss": 0.4909,
"step": 2372
},
{
"epoch": 1.01,
"learning_rate": 3.4327596266044345e-06,
"loss": 0.4042,
"step": 2376
},
{
"epoch": 1.01,
"learning_rate": 3.4298424737456245e-06,
"loss": 0.5892,
"step": 2380
},
{
"epoch": 1.01,
"learning_rate": 3.426925320886815e-06,
"loss": 0.538,
"step": 2384
},
{
"epoch": 1.02,
"learning_rate": 3.424008168028005e-06,
"loss": 0.409,
"step": 2388
},
{
"epoch": 1.02,
"learning_rate": 3.421091015169195e-06,
"loss": 0.5232,
"step": 2392
},
{
"epoch": 1.02,
"learning_rate": 3.4181738623103855e-06,
"loss": 0.6311,
"step": 2396
},
{
"epoch": 1.02,
"learning_rate": 3.4152567094515754e-06,
"loss": 0.4116,
"step": 2400
},
{
"epoch": 1.02,
"learning_rate": 3.4123395565927658e-06,
"loss": 0.5223,
"step": 2404
},
{
"epoch": 1.02,
"learning_rate": 3.4094224037339557e-06,
"loss": 0.6645,
"step": 2408
},
{
"epoch": 1.03,
"learning_rate": 3.406505250875146e-06,
"loss": 0.4037,
"step": 2412
},
{
"epoch": 1.03,
"learning_rate": 3.4035880980163364e-06,
"loss": 0.4992,
"step": 2416
},
{
"epoch": 1.03,
"learning_rate": 3.4006709451575264e-06,
"loss": 0.3654,
"step": 2420
},
{
"epoch": 1.03,
"learning_rate": 3.3977537922987167e-06,
"loss": 0.6107,
"step": 2424
},
{
"epoch": 1.03,
"learning_rate": 3.3948366394399067e-06,
"loss": 0.5753,
"step": 2428
},
{
"epoch": 1.03,
"learning_rate": 3.3919194865810975e-06,
"loss": 0.375,
"step": 2432
},
{
"epoch": 1.04,
"learning_rate": 3.389002333722287e-06,
"loss": 0.4033,
"step": 2436
},
{
"epoch": 1.04,
"learning_rate": 3.3860851808634778e-06,
"loss": 0.5717,
"step": 2440
},
{
"epoch": 1.04,
"learning_rate": 3.3831680280046673e-06,
"loss": 0.3352,
"step": 2444
},
{
"epoch": 1.04,
"learning_rate": 3.380250875145858e-06,
"loss": 0.3152,
"step": 2448
},
{
"epoch": 1.04,
"learning_rate": 3.3773337222870484e-06,
"loss": 0.6976,
"step": 2452
},
{
"epoch": 1.04,
"learning_rate": 3.3744165694282384e-06,
"loss": 0.6974,
"step": 2456
},
{
"epoch": 1.05,
"learning_rate": 3.3714994165694287e-06,
"loss": 0.4909,
"step": 2460
},
{
"epoch": 1.05,
"learning_rate": 3.3685822637106187e-06,
"loss": 0.4997,
"step": 2464
},
{
"epoch": 1.05,
"learning_rate": 3.365665110851809e-06,
"loss": 0.5293,
"step": 2468
},
{
"epoch": 1.05,
"learning_rate": 3.362747957992999e-06,
"loss": 0.5174,
"step": 2472
},
{
"epoch": 1.05,
"learning_rate": 3.3598308051341893e-06,
"loss": 0.4137,
"step": 2476
},
{
"epoch": 1.05,
"learning_rate": 3.3569136522753793e-06,
"loss": 0.3583,
"step": 2480
},
{
"epoch": 1.06,
"learning_rate": 3.3539964994165696e-06,
"loss": 0.4317,
"step": 2484
},
{
"epoch": 1.06,
"learning_rate": 3.35107934655776e-06,
"loss": 0.5334,
"step": 2488
},
{
"epoch": 1.06,
"learning_rate": 3.34816219369895e-06,
"loss": 0.4626,
"step": 2492
},
{
"epoch": 1.06,
"learning_rate": 3.3452450408401403e-06,
"loss": 0.3575,
"step": 2496
},
{
"epoch": 1.06,
"learning_rate": 3.3423278879813302e-06,
"loss": 0.3181,
"step": 2500
},
{
"epoch": 1.06,
"learning_rate": 3.3394107351225206e-06,
"loss": 0.4233,
"step": 2504
},
{
"epoch": 1.07,
"learning_rate": 3.3364935822637105e-06,
"loss": 0.6187,
"step": 2508
},
{
"epoch": 1.07,
"learning_rate": 3.3335764294049013e-06,
"loss": 0.4076,
"step": 2512
},
{
"epoch": 1.07,
"learning_rate": 3.3306592765460917e-06,
"loss": 0.3893,
"step": 2516
},
{
"epoch": 1.07,
"learning_rate": 3.3277421236872816e-06,
"loss": 0.4937,
"step": 2520
},
{
"epoch": 1.07,
"learning_rate": 3.324824970828472e-06,
"loss": 0.29,
"step": 2524
},
{
"epoch": 1.07,
"learning_rate": 3.321907817969662e-06,
"loss": 0.4274,
"step": 2528
},
{
"epoch": 1.08,
"learning_rate": 3.3189906651108523e-06,
"loss": 0.3029,
"step": 2532
},
{
"epoch": 1.08,
"learning_rate": 3.3160735122520422e-06,
"loss": 0.4773,
"step": 2536
},
{
"epoch": 1.08,
"learning_rate": 3.3131563593932326e-06,
"loss": 0.5116,
"step": 2540
},
{
"epoch": 1.08,
"learning_rate": 3.3102392065344225e-06,
"loss": 0.514,
"step": 2544
},
{
"epoch": 1.08,
"learning_rate": 3.307322053675613e-06,
"loss": 0.5396,
"step": 2548
},
{
"epoch": 1.09,
"learning_rate": 3.3044049008168033e-06,
"loss": 0.4585,
"step": 2552
},
{
"epoch": 1.09,
"learning_rate": 3.301487747957993e-06,
"loss": 0.6577,
"step": 2556
},
{
"epoch": 1.09,
"learning_rate": 3.2985705950991836e-06,
"loss": 0.4556,
"step": 2560
},
{
"epoch": 1.09,
"learning_rate": 3.2956534422403735e-06,
"loss": 0.3851,
"step": 2564
},
{
"epoch": 1.09,
"learning_rate": 3.292736289381564e-06,
"loss": 0.366,
"step": 2568
},
{
"epoch": 1.09,
"learning_rate": 3.289819136522754e-06,
"loss": 0.53,
"step": 2572
},
{
"epoch": 1.1,
"learning_rate": 3.286901983663944e-06,
"loss": 0.513,
"step": 2576
},
{
"epoch": 1.1,
"learning_rate": 3.283984830805134e-06,
"loss": 0.4743,
"step": 2580
},
{
"epoch": 1.1,
"learning_rate": 3.2810676779463245e-06,
"loss": 0.5324,
"step": 2584
},
{
"epoch": 1.1,
"learning_rate": 3.2781505250875152e-06,
"loss": 0.5894,
"step": 2588
},
{
"epoch": 1.1,
"learning_rate": 3.2752333722287048e-06,
"loss": 0.4134,
"step": 2592
},
{
"epoch": 1.1,
"learning_rate": 3.2723162193698955e-06,
"loss": 0.5402,
"step": 2596
},
{
"epoch": 1.11,
"learning_rate": 3.2693990665110855e-06,
"loss": 0.4749,
"step": 2600
},
{
"epoch": 1.11,
"learning_rate": 3.266481913652276e-06,
"loss": 0.3575,
"step": 2604
},
{
"epoch": 1.11,
"learning_rate": 3.2635647607934658e-06,
"loss": 0.3848,
"step": 2608
},
{
"epoch": 1.11,
"learning_rate": 3.260647607934656e-06,
"loss": 0.3512,
"step": 2612
},
{
"epoch": 1.11,
"learning_rate": 3.2577304550758465e-06,
"loss": 0.4186,
"step": 2616
},
{
"epoch": 1.11,
"learning_rate": 3.2548133022170364e-06,
"loss": 0.3952,
"step": 2620
},
{
"epoch": 1.12,
"learning_rate": 3.251896149358227e-06,
"loss": 0.3489,
"step": 2624
},
{
"epoch": 1.12,
"learning_rate": 3.2489789964994167e-06,
"loss": 0.5544,
"step": 2628
},
{
"epoch": 1.12,
"learning_rate": 3.246061843640607e-06,
"loss": 0.4824,
"step": 2632
},
{
"epoch": 1.12,
"learning_rate": 3.243144690781797e-06,
"loss": 0.3462,
"step": 2636
},
{
"epoch": 1.12,
"learning_rate": 3.2402275379229874e-06,
"loss": 0.4465,
"step": 2640
},
{
"epoch": 1.12,
"learning_rate": 3.2373103850641773e-06,
"loss": 0.4774,
"step": 2644
},
{
"epoch": 1.13,
"learning_rate": 3.2343932322053677e-06,
"loss": 0.32,
"step": 2648
},
{
"epoch": 1.13,
"learning_rate": 3.231476079346558e-06,
"loss": 0.5598,
"step": 2652
},
{
"epoch": 1.13,
"learning_rate": 3.228558926487748e-06,
"loss": 0.5406,
"step": 2656
},
{
"epoch": 1.13,
"learning_rate": 3.225641773628939e-06,
"loss": 0.3966,
"step": 2660
},
{
"epoch": 1.13,
"learning_rate": 3.2227246207701283e-06,
"loss": 0.6023,
"step": 2664
},
{
"epoch": 1.13,
"learning_rate": 3.219807467911319e-06,
"loss": 0.4532,
"step": 2668
},
{
"epoch": 1.14,
"learning_rate": 3.2168903150525086e-06,
"loss": 0.3336,
"step": 2672
},
{
"epoch": 1.14,
"learning_rate": 3.2139731621936994e-06,
"loss": 0.4411,
"step": 2676
},
{
"epoch": 1.14,
"learning_rate": 3.2110560093348893e-06,
"loss": 0.5039,
"step": 2680
},
{
"epoch": 1.14,
"learning_rate": 3.2081388564760797e-06,
"loss": 0.6932,
"step": 2684
},
{
"epoch": 1.14,
"learning_rate": 3.20522170361727e-06,
"loss": 0.5271,
"step": 2688
},
{
"epoch": 1.14,
"learning_rate": 3.20230455075846e-06,
"loss": 0.432,
"step": 2692
},
{
"epoch": 1.15,
"learning_rate": 3.1993873978996504e-06,
"loss": 0.4973,
"step": 2696
},
{
"epoch": 1.15,
"learning_rate": 3.1964702450408403e-06,
"loss": 0.6146,
"step": 2700
},
{
"epoch": 1.15,
"learning_rate": 3.1935530921820307e-06,
"loss": 0.3637,
"step": 2704
},
{
"epoch": 1.15,
"learning_rate": 3.1906359393232206e-06,
"loss": 0.4085,
"step": 2708
},
{
"epoch": 1.15,
"learning_rate": 3.187718786464411e-06,
"loss": 0.3726,
"step": 2712
},
{
"epoch": 1.15,
"learning_rate": 3.1848016336056013e-06,
"loss": 0.5446,
"step": 2716
},
{
"epoch": 1.16,
"learning_rate": 3.1818844807467913e-06,
"loss": 0.497,
"step": 2720
},
{
"epoch": 1.16,
"learning_rate": 3.1789673278879816e-06,
"loss": 0.3298,
"step": 2724
},
{
"epoch": 1.16,
"learning_rate": 3.1760501750291716e-06,
"loss": 0.491,
"step": 2728
},
{
"epoch": 1.16,
"learning_rate": 3.173133022170362e-06,
"loss": 0.3796,
"step": 2732
},
{
"epoch": 1.16,
"learning_rate": 3.170215869311552e-06,
"loss": 0.6953,
"step": 2736
},
{
"epoch": 1.16,
"learning_rate": 3.1672987164527427e-06,
"loss": 0.3953,
"step": 2740
},
{
"epoch": 1.17,
"learning_rate": 3.164381563593932e-06,
"loss": 0.3992,
"step": 2744
},
{
"epoch": 1.17,
"learning_rate": 3.161464410735123e-06,
"loss": 0.4851,
"step": 2748
},
{
"epoch": 1.17,
"learning_rate": 3.1585472578763133e-06,
"loss": 0.3364,
"step": 2752
},
{
"epoch": 1.17,
"learning_rate": 3.1556301050175033e-06,
"loss": 0.4477,
"step": 2756
},
{
"epoch": 1.17,
"learning_rate": 3.1527129521586936e-06,
"loss": 0.555,
"step": 2760
},
{
"epoch": 1.18,
"learning_rate": 3.1497957992998836e-06,
"loss": 0.5896,
"step": 2764
},
{
"epoch": 1.18,
"learning_rate": 3.146878646441074e-06,
"loss": 0.5281,
"step": 2768
},
{
"epoch": 1.18,
"learning_rate": 3.143961493582264e-06,
"loss": 0.4825,
"step": 2772
},
{
"epoch": 1.18,
"learning_rate": 3.1410443407234542e-06,
"loss": 0.3199,
"step": 2776
},
{
"epoch": 1.18,
"learning_rate": 3.138127187864644e-06,
"loss": 0.3897,
"step": 2780
},
{
"epoch": 1.18,
"learning_rate": 3.1352100350058345e-06,
"loss": 0.3698,
"step": 2784
},
{
"epoch": 1.19,
"learning_rate": 3.132292882147025e-06,
"loss": 0.4514,
"step": 2788
},
{
"epoch": 1.19,
"learning_rate": 3.129375729288215e-06,
"loss": 0.4257,
"step": 2792
},
{
"epoch": 1.19,
"learning_rate": 3.126458576429405e-06,
"loss": 0.4245,
"step": 2796
},
{
"epoch": 1.19,
"learning_rate": 3.123541423570595e-06,
"loss": 0.4945,
"step": 2800
},
{
"epoch": 1.19,
"learning_rate": 3.1206242707117855e-06,
"loss": 0.442,
"step": 2804
},
{
"epoch": 1.19,
"learning_rate": 3.1177071178529754e-06,
"loss": 0.6289,
"step": 2808
},
{
"epoch": 1.2,
"learning_rate": 3.1147899649941658e-06,
"loss": 0.4142,
"step": 2812
},
{
"epoch": 1.2,
"learning_rate": 3.1118728121353566e-06,
"loss": 0.4667,
"step": 2816
},
{
"epoch": 1.2,
"learning_rate": 3.1089556592765465e-06,
"loss": 0.4788,
"step": 2820
},
{
"epoch": 1.2,
"learning_rate": 3.106038506417737e-06,
"loss": 0.3926,
"step": 2824
},
{
"epoch": 1.2,
"learning_rate": 3.103121353558927e-06,
"loss": 0.553,
"step": 2828
},
{
"epoch": 1.2,
"learning_rate": 3.100204200700117e-06,
"loss": 0.4567,
"step": 2832
},
{
"epoch": 1.21,
"learning_rate": 3.097287047841307e-06,
"loss": 0.3615,
"step": 2836
},
{
"epoch": 1.21,
"learning_rate": 3.0943698949824975e-06,
"loss": 0.4364,
"step": 2840
},
{
"epoch": 1.21,
"learning_rate": 3.0914527421236874e-06,
"loss": 0.525,
"step": 2844
},
{
"epoch": 1.21,
"learning_rate": 3.0885355892648778e-06,
"loss": 0.4324,
"step": 2848
},
{
"epoch": 1.21,
"learning_rate": 3.085618436406068e-06,
"loss": 0.37,
"step": 2852
},
{
"epoch": 1.21,
"learning_rate": 3.082701283547258e-06,
"loss": 0.3175,
"step": 2856
},
{
"epoch": 1.22,
"learning_rate": 3.0797841306884484e-06,
"loss": 0.3043,
"step": 2860
},
{
"epoch": 1.22,
"learning_rate": 3.0768669778296384e-06,
"loss": 0.514,
"step": 2864
},
{
"epoch": 1.22,
"learning_rate": 3.0739498249708287e-06,
"loss": 0.4671,
"step": 2868
},
{
"epoch": 1.22,
"learning_rate": 3.0710326721120187e-06,
"loss": 0.4151,
"step": 2872
},
{
"epoch": 1.22,
"learning_rate": 3.068115519253209e-06,
"loss": 0.4077,
"step": 2876
},
{
"epoch": 1.22,
"learning_rate": 3.065198366394399e-06,
"loss": 0.5045,
"step": 2880
},
{
"epoch": 1.23,
"learning_rate": 3.0622812135355893e-06,
"loss": 0.3641,
"step": 2884
},
{
"epoch": 1.23,
"learning_rate": 3.05936406067678e-06,
"loss": 0.5006,
"step": 2888
},
{
"epoch": 1.23,
"learning_rate": 3.0564469078179696e-06,
"loss": 0.4298,
"step": 2892
},
{
"epoch": 1.23,
"learning_rate": 3.0535297549591604e-06,
"loss": 0.514,
"step": 2896
},
{
"epoch": 1.23,
"learning_rate": 3.0506126021003504e-06,
"loss": 0.4578,
"step": 2900
},
{
"epoch": 1.23,
"learning_rate": 3.0476954492415407e-06,
"loss": 0.3638,
"step": 2904
},
{
"epoch": 1.24,
"learning_rate": 3.0447782963827307e-06,
"loss": 0.6377,
"step": 2908
},
{
"epoch": 1.24,
"learning_rate": 3.041861143523921e-06,
"loss": 0.5282,
"step": 2912
},
{
"epoch": 1.24,
"learning_rate": 3.0389439906651114e-06,
"loss": 0.5388,
"step": 2916
},
{
"epoch": 1.24,
"learning_rate": 3.0360268378063013e-06,
"loss": 0.5937,
"step": 2920
},
{
"epoch": 1.24,
"learning_rate": 3.0331096849474917e-06,
"loss": 0.488,
"step": 2924
},
{
"epoch": 1.24,
"learning_rate": 3.0301925320886816e-06,
"loss": 0.4885,
"step": 2928
},
{
"epoch": 1.25,
"learning_rate": 3.027275379229872e-06,
"loss": 0.6639,
"step": 2932
},
{
"epoch": 1.25,
"learning_rate": 3.024358226371062e-06,
"loss": 0.4895,
"step": 2936
},
{
"epoch": 1.25,
"learning_rate": 3.0214410735122523e-06,
"loss": 0.2655,
"step": 2940
},
{
"epoch": 1.25,
"learning_rate": 3.0185239206534422e-06,
"loss": 0.6175,
"step": 2944
},
{
"epoch": 1.25,
"learning_rate": 3.0156067677946326e-06,
"loss": 0.4876,
"step": 2948
},
{
"epoch": 1.26,
"learning_rate": 3.012689614935823e-06,
"loss": 0.3615,
"step": 2952
},
{
"epoch": 1.26,
"learning_rate": 3.009772462077013e-06,
"loss": 0.3619,
"step": 2956
},
{
"epoch": 1.26,
"learning_rate": 3.0068553092182033e-06,
"loss": 0.4622,
"step": 2960
},
{
"epoch": 1.26,
"learning_rate": 3.003938156359393e-06,
"loss": 0.4855,
"step": 2964
},
{
"epoch": 1.26,
"learning_rate": 3.001021003500584e-06,
"loss": 0.3779,
"step": 2968
},
{
"epoch": 1.26,
"learning_rate": 2.9981038506417735e-06,
"loss": 0.5237,
"step": 2972
},
{
"epoch": 1.27,
"learning_rate": 2.9951866977829643e-06,
"loss": 0.541,
"step": 2976
},
{
"epoch": 1.27,
"learning_rate": 2.992269544924154e-06,
"loss": 0.4515,
"step": 2980
},
{
"epoch": 1.27,
"learning_rate": 2.9893523920653446e-06,
"loss": 0.5022,
"step": 2984
},
{
"epoch": 1.27,
"learning_rate": 2.986435239206535e-06,
"loss": 0.4295,
"step": 2988
},
{
"epoch": 1.27,
"learning_rate": 2.983518086347725e-06,
"loss": 0.438,
"step": 2992
},
{
"epoch": 1.27,
"learning_rate": 2.9806009334889152e-06,
"loss": 0.364,
"step": 2996
},
{
"epoch": 1.28,
"learning_rate": 2.977683780630105e-06,
"loss": 0.3795,
"step": 3000
},
{
"epoch": 1.28,
"learning_rate": 2.9747666277712955e-06,
"loss": 0.4839,
"step": 3004
},
{
"epoch": 1.28,
"learning_rate": 2.9718494749124855e-06,
"loss": 0.3864,
"step": 3008
},
{
"epoch": 1.28,
"learning_rate": 2.968932322053676e-06,
"loss": 0.5014,
"step": 3012
},
{
"epoch": 1.28,
"learning_rate": 2.966015169194866e-06,
"loss": 0.4356,
"step": 3016
},
{
"epoch": 1.28,
"learning_rate": 2.963098016336056e-06,
"loss": 0.5337,
"step": 3020
},
{
"epoch": 1.29,
"learning_rate": 2.9601808634772465e-06,
"loss": 0.4385,
"step": 3024
},
{
"epoch": 1.29,
"learning_rate": 2.9572637106184364e-06,
"loss": 0.5117,
"step": 3028
},
{
"epoch": 1.29,
"learning_rate": 2.954346557759627e-06,
"loss": 0.6061,
"step": 3032
},
{
"epoch": 1.29,
"learning_rate": 2.9514294049008167e-06,
"loss": 0.4433,
"step": 3036
},
{
"epoch": 1.29,
"learning_rate": 2.948512252042007e-06,
"loss": 0.3484,
"step": 3040
},
{
"epoch": 1.29,
"learning_rate": 2.945595099183197e-06,
"loss": 0.2477,
"step": 3044
},
{
"epoch": 1.3,
"learning_rate": 2.942677946324388e-06,
"loss": 0.2433,
"step": 3048
},
{
"epoch": 1.3,
"learning_rate": 2.939760793465578e-06,
"loss": 0.34,
"step": 3052
},
{
"epoch": 1.3,
"learning_rate": 2.936843640606768e-06,
"loss": 0.6435,
"step": 3056
},
{
"epoch": 1.3,
"learning_rate": 2.9339264877479585e-06,
"loss": 0.3208,
"step": 3060
},
{
"epoch": 1.3,
"learning_rate": 2.9310093348891484e-06,
"loss": 0.3085,
"step": 3064
},
{
"epoch": 1.3,
"learning_rate": 2.928092182030339e-06,
"loss": 0.362,
"step": 3068
},
{
"epoch": 1.31,
"learning_rate": 2.9251750291715287e-06,
"loss": 0.4297,
"step": 3072
},
{
"epoch": 1.31,
"learning_rate": 2.922257876312719e-06,
"loss": 0.4343,
"step": 3076
},
{
"epoch": 1.31,
"learning_rate": 2.919340723453909e-06,
"loss": 0.5577,
"step": 3080
},
{
"epoch": 1.31,
"learning_rate": 2.9164235705950994e-06,
"loss": 0.3442,
"step": 3084
},
{
"epoch": 1.31,
"learning_rate": 2.9135064177362898e-06,
"loss": 0.5667,
"step": 3088
},
{
"epoch": 1.31,
"learning_rate": 2.9105892648774797e-06,
"loss": 0.3254,
"step": 3092
},
{
"epoch": 1.32,
"learning_rate": 2.90767211201867e-06,
"loss": 0.2909,
"step": 3096
},
{
"epoch": 1.32,
"learning_rate": 2.90475495915986e-06,
"loss": 0.3682,
"step": 3100
},
{
"epoch": 1.32,
"learning_rate": 2.9018378063010504e-06,
"loss": 0.3107,
"step": 3104
},
{
"epoch": 1.32,
"learning_rate": 2.8989206534422403e-06,
"loss": 0.4328,
"step": 3108
},
{
"epoch": 1.32,
"learning_rate": 2.8960035005834307e-06,
"loss": 0.3674,
"step": 3112
},
{
"epoch": 1.32,
"learning_rate": 2.8930863477246215e-06,
"loss": 0.3329,
"step": 3116
},
{
"epoch": 1.33,
"learning_rate": 2.890169194865811e-06,
"loss": 0.6409,
"step": 3120
},
{
"epoch": 1.33,
"learning_rate": 2.8872520420070018e-06,
"loss": 0.5682,
"step": 3124
},
{
"epoch": 1.33,
"learning_rate": 2.8843348891481917e-06,
"loss": 0.5972,
"step": 3128
},
{
"epoch": 1.33,
"learning_rate": 2.881417736289382e-06,
"loss": 0.4621,
"step": 3132
},
{
"epoch": 1.33,
"learning_rate": 2.878500583430572e-06,
"loss": 0.4448,
"step": 3136
},
{
"epoch": 1.34,
"learning_rate": 2.8755834305717624e-06,
"loss": 0.2757,
"step": 3140
},
{
"epoch": 1.34,
"learning_rate": 2.8726662777129523e-06,
"loss": 0.5172,
"step": 3144
},
{
"epoch": 1.34,
"learning_rate": 2.8697491248541427e-06,
"loss": 0.4493,
"step": 3148
},
{
"epoch": 1.34,
"learning_rate": 2.866831971995333e-06,
"loss": 0.3437,
"step": 3152
},
{
"epoch": 1.34,
"learning_rate": 2.863914819136523e-06,
"loss": 0.286,
"step": 3156
},
{
"epoch": 1.34,
"learning_rate": 2.8609976662777133e-06,
"loss": 0.6001,
"step": 3160
},
{
"epoch": 1.35,
"learning_rate": 2.8580805134189033e-06,
"loss": 0.4373,
"step": 3164
},
{
"epoch": 1.35,
"learning_rate": 2.8551633605600936e-06,
"loss": 0.4974,
"step": 3168
},
{
"epoch": 1.35,
"learning_rate": 2.8522462077012836e-06,
"loss": 0.4817,
"step": 3172
},
{
"epoch": 1.35,
"learning_rate": 2.849329054842474e-06,
"loss": 0.4178,
"step": 3176
},
{
"epoch": 1.35,
"learning_rate": 2.846411901983664e-06,
"loss": 0.4527,
"step": 3180
},
{
"epoch": 1.35,
"learning_rate": 2.8434947491248542e-06,
"loss": 0.3193,
"step": 3184
},
{
"epoch": 1.36,
"learning_rate": 2.8405775962660446e-06,
"loss": 0.4259,
"step": 3188
},
{
"epoch": 1.36,
"learning_rate": 2.8376604434072345e-06,
"loss": 0.2256,
"step": 3192
},
{
"epoch": 1.36,
"learning_rate": 2.8347432905484253e-06,
"loss": 0.3772,
"step": 3196
},
{
"epoch": 1.36,
"learning_rate": 2.831826137689615e-06,
"loss": 0.3679,
"step": 3200
},
{
"epoch": 1.36,
"learning_rate": 2.8289089848308056e-06,
"loss": 0.2954,
"step": 3204
},
{
"epoch": 1.36,
"learning_rate": 2.8259918319719955e-06,
"loss": 0.4222,
"step": 3208
},
{
"epoch": 1.37,
"learning_rate": 2.823074679113186e-06,
"loss": 0.4063,
"step": 3212
},
{
"epoch": 1.37,
"learning_rate": 2.8201575262543763e-06,
"loss": 0.6235,
"step": 3216
},
{
"epoch": 1.37,
"learning_rate": 2.817240373395566e-06,
"loss": 0.2717,
"step": 3220
},
{
"epoch": 1.37,
"learning_rate": 2.8143232205367566e-06,
"loss": 0.374,
"step": 3224
},
{
"epoch": 1.37,
"learning_rate": 2.8114060676779465e-06,
"loss": 0.3534,
"step": 3228
},
{
"epoch": 1.37,
"learning_rate": 2.808488914819137e-06,
"loss": 0.3723,
"step": 3232
},
{
"epoch": 1.38,
"learning_rate": 2.805571761960327e-06,
"loss": 0.5241,
"step": 3236
},
{
"epoch": 1.38,
"learning_rate": 2.802654609101517e-06,
"loss": 0.5061,
"step": 3240
},
{
"epoch": 1.38,
"learning_rate": 2.799737456242707e-06,
"loss": 0.268,
"step": 3244
},
{
"epoch": 1.38,
"learning_rate": 2.7968203033838975e-06,
"loss": 0.3903,
"step": 3248
},
{
"epoch": 1.38,
"learning_rate": 2.793903150525088e-06,
"loss": 0.4536,
"step": 3252
},
{
"epoch": 1.38,
"learning_rate": 2.7909859976662778e-06,
"loss": 0.36,
"step": 3256
},
{
"epoch": 1.39,
"learning_rate": 2.788068844807468e-06,
"loss": 0.4255,
"step": 3260
},
{
"epoch": 1.39,
"learning_rate": 2.785151691948658e-06,
"loss": 0.5646,
"step": 3264
},
{
"epoch": 1.39,
"learning_rate": 2.7822345390898484e-06,
"loss": 0.3652,
"step": 3268
},
{
"epoch": 1.39,
"learning_rate": 2.7793173862310384e-06,
"loss": 0.3034,
"step": 3272
},
{
"epoch": 1.39,
"learning_rate": 2.776400233372229e-06,
"loss": 0.417,
"step": 3276
},
{
"epoch": 1.39,
"learning_rate": 2.7734830805134187e-06,
"loss": 0.5308,
"step": 3280
},
{
"epoch": 1.4,
"learning_rate": 2.7705659276546095e-06,
"loss": 0.5367,
"step": 3284
},
{
"epoch": 1.4,
"learning_rate": 2.7676487747958e-06,
"loss": 0.2237,
"step": 3288
},
{
"epoch": 1.4,
"learning_rate": 2.7647316219369898e-06,
"loss": 0.4497,
"step": 3292
},
{
"epoch": 1.4,
"learning_rate": 2.76181446907818e-06,
"loss": 0.351,
"step": 3296
},
{
"epoch": 1.4,
"learning_rate": 2.75889731621937e-06,
"loss": 0.6103,
"step": 3300
},
{
"epoch": 1.4,
"learning_rate": 2.7559801633605604e-06,
"loss": 0.3624,
"step": 3304
},
{
"epoch": 1.41,
"learning_rate": 2.7530630105017504e-06,
"loss": 0.4172,
"step": 3308
},
{
"epoch": 1.41,
"learning_rate": 2.7501458576429407e-06,
"loss": 0.5008,
"step": 3312
},
{
"epoch": 1.41,
"learning_rate": 2.747228704784131e-06,
"loss": 0.4368,
"step": 3316
},
{
"epoch": 1.41,
"learning_rate": 2.744311551925321e-06,
"loss": 0.4305,
"step": 3320
},
{
"epoch": 1.41,
"learning_rate": 2.7413943990665114e-06,
"loss": 0.3914,
"step": 3324
},
{
"epoch": 1.41,
"learning_rate": 2.7384772462077013e-06,
"loss": 0.3781,
"step": 3328
},
{
"epoch": 1.42,
"learning_rate": 2.7355600933488917e-06,
"loss": 0.4922,
"step": 3332
},
{
"epoch": 1.42,
"learning_rate": 2.7326429404900816e-06,
"loss": 0.3398,
"step": 3336
},
{
"epoch": 1.42,
"learning_rate": 2.729725787631272e-06,
"loss": 0.5107,
"step": 3340
},
{
"epoch": 1.42,
"learning_rate": 2.726808634772462e-06,
"loss": 0.5933,
"step": 3344
},
{
"epoch": 1.42,
"learning_rate": 2.7238914819136523e-06,
"loss": 0.4659,
"step": 3348
},
{
"epoch": 1.43,
"learning_rate": 2.720974329054843e-06,
"loss": 0.2262,
"step": 3352
},
{
"epoch": 1.43,
"learning_rate": 2.718057176196033e-06,
"loss": 0.4471,
"step": 3356
},
{
"epoch": 1.43,
"learning_rate": 2.7151400233372234e-06,
"loss": 0.7168,
"step": 3360
},
{
"epoch": 1.43,
"learning_rate": 2.7122228704784133e-06,
"loss": 0.1911,
"step": 3364
},
{
"epoch": 1.43,
"learning_rate": 2.7093057176196037e-06,
"loss": 0.3807,
"step": 3368
},
{
"epoch": 1.43,
"learning_rate": 2.7063885647607936e-06,
"loss": 0.3614,
"step": 3372
},
{
"epoch": 1.44,
"learning_rate": 2.703471411901984e-06,
"loss": 0.2861,
"step": 3376
},
{
"epoch": 1.44,
"learning_rate": 2.700554259043174e-06,
"loss": 0.3193,
"step": 3380
},
{
"epoch": 1.44,
"learning_rate": 2.6976371061843643e-06,
"loss": 0.4835,
"step": 3384
},
{
"epoch": 1.44,
"learning_rate": 2.6947199533255546e-06,
"loss": 0.4439,
"step": 3388
},
{
"epoch": 1.44,
"learning_rate": 2.6918028004667446e-06,
"loss": 0.2924,
"step": 3392
},
{
"epoch": 1.44,
"learning_rate": 2.688885647607935e-06,
"loss": 0.5311,
"step": 3396
},
{
"epoch": 1.45,
"learning_rate": 2.685968494749125e-06,
"loss": 0.4898,
"step": 3400
},
{
"epoch": 1.45,
"learning_rate": 2.6830513418903152e-06,
"loss": 0.2538,
"step": 3404
},
{
"epoch": 1.45,
"learning_rate": 2.680134189031505e-06,
"loss": 0.4381,
"step": 3408
},
{
"epoch": 1.45,
"learning_rate": 2.6772170361726955e-06,
"loss": 0.4718,
"step": 3412
},
{
"epoch": 1.45,
"learning_rate": 2.6742998833138863e-06,
"loss": 0.3369,
"step": 3416
},
{
"epoch": 1.45,
"learning_rate": 2.671382730455076e-06,
"loss": 0.3481,
"step": 3420
},
{
"epoch": 1.46,
"learning_rate": 2.6684655775962666e-06,
"loss": 0.2547,
"step": 3424
},
{
"epoch": 1.46,
"learning_rate": 2.665548424737456e-06,
"loss": 0.4183,
"step": 3428
},
{
"epoch": 1.46,
"learning_rate": 2.662631271878647e-06,
"loss": 0.4181,
"step": 3432
},
{
"epoch": 1.46,
"learning_rate": 2.659714119019837e-06,
"loss": 0.5512,
"step": 3436
},
{
"epoch": 1.46,
"learning_rate": 2.6567969661610272e-06,
"loss": 0.4187,
"step": 3440
},
{
"epoch": 1.46,
"learning_rate": 2.653879813302217e-06,
"loss": 0.2411,
"step": 3444
},
{
"epoch": 1.47,
"learning_rate": 2.6509626604434075e-06,
"loss": 0.3652,
"step": 3448
},
{
"epoch": 1.47,
"learning_rate": 2.648045507584598e-06,
"loss": 0.4122,
"step": 3452
},
{
"epoch": 1.47,
"learning_rate": 2.645128354725788e-06,
"loss": 0.2771,
"step": 3456
},
{
"epoch": 1.47,
"learning_rate": 2.642211201866978e-06,
"loss": 0.3256,
"step": 3460
},
{
"epoch": 1.47,
"learning_rate": 2.639294049008168e-06,
"loss": 0.53,
"step": 3464
},
{
"epoch": 1.47,
"learning_rate": 2.6363768961493585e-06,
"loss": 0.2602,
"step": 3468
},
{
"epoch": 1.48,
"learning_rate": 2.6334597432905484e-06,
"loss": 0.2461,
"step": 3472
},
{
"epoch": 1.48,
"learning_rate": 2.630542590431739e-06,
"loss": 0.3867,
"step": 3476
},
{
"epoch": 1.48,
"learning_rate": 2.6276254375729287e-06,
"loss": 0.4217,
"step": 3480
},
{
"epoch": 1.48,
"learning_rate": 2.624708284714119e-06,
"loss": 0.4234,
"step": 3484
},
{
"epoch": 1.48,
"learning_rate": 2.6217911318553095e-06,
"loss": 0.3664,
"step": 3488
},
{
"epoch": 1.48,
"learning_rate": 2.6188739789964994e-06,
"loss": 0.5729,
"step": 3492
},
{
"epoch": 1.49,
"learning_rate": 2.6159568261376898e-06,
"loss": 0.5175,
"step": 3496
},
{
"epoch": 1.49,
"learning_rate": 2.6130396732788797e-06,
"loss": 0.4228,
"step": 3500
},
{
"epoch": 1.49,
"learning_rate": 2.6101225204200705e-06,
"loss": 0.3762,
"step": 3504
},
{
"epoch": 1.49,
"learning_rate": 2.60720536756126e-06,
"loss": 0.4211,
"step": 3508
},
{
"epoch": 1.49,
"learning_rate": 2.604288214702451e-06,
"loss": 0.3969,
"step": 3512
},
{
"epoch": 1.49,
"learning_rate": 2.601371061843641e-06,
"loss": 0.3794,
"step": 3516
},
{
"epoch": 1.5,
"learning_rate": 2.598453908984831e-06,
"loss": 0.2235,
"step": 3520
},
{
"epoch": 1.5,
"learning_rate": 2.5955367561260215e-06,
"loss": 0.3842,
"step": 3524
},
{
"epoch": 1.5,
"learning_rate": 2.5926196032672114e-06,
"loss": 0.3081,
"step": 3528
},
{
"epoch": 1.5,
"learning_rate": 2.5897024504084018e-06,
"loss": 0.267,
"step": 3532
},
{
"epoch": 1.5,
"learning_rate": 2.5867852975495917e-06,
"loss": 0.2976,
"step": 3536
},
{
"epoch": 1.51,
"learning_rate": 2.583868144690782e-06,
"loss": 0.4383,
"step": 3540
},
{
"epoch": 1.51,
"learning_rate": 2.580950991831972e-06,
"loss": 0.3175,
"step": 3544
},
{
"epoch": 1.51,
"learning_rate": 2.5780338389731624e-06,
"loss": 0.3118,
"step": 3548
},
{
"epoch": 1.51,
"learning_rate": 2.5751166861143527e-06,
"loss": 0.4329,
"step": 3552
},
{
"epoch": 1.51,
"learning_rate": 2.5721995332555427e-06,
"loss": 0.4936,
"step": 3556
},
{
"epoch": 1.51,
"learning_rate": 2.569282380396733e-06,
"loss": 0.4872,
"step": 3560
},
{
"epoch": 1.52,
"learning_rate": 2.566365227537923e-06,
"loss": 0.431,
"step": 3564
},
{
"epoch": 1.52,
"learning_rate": 2.5634480746791133e-06,
"loss": 0.5265,
"step": 3568
},
{
"epoch": 1.52,
"learning_rate": 2.5605309218203033e-06,
"loss": 0.3655,
"step": 3572
},
{
"epoch": 1.52,
"learning_rate": 2.5576137689614936e-06,
"loss": 0.342,
"step": 3576
},
{
"epoch": 1.52,
"learning_rate": 2.5546966161026836e-06,
"loss": 0.4835,
"step": 3580
},
{
"epoch": 1.52,
"learning_rate": 2.5517794632438743e-06,
"loss": 0.2614,
"step": 3584
},
{
"epoch": 1.53,
"learning_rate": 2.5488623103850647e-06,
"loss": 0.3411,
"step": 3588
},
{
"epoch": 1.53,
"learning_rate": 2.5459451575262546e-06,
"loss": 0.4997,
"step": 3592
},
{
"epoch": 1.53,
"learning_rate": 2.543028004667445e-06,
"loss": 0.461,
"step": 3596
},
{
"epoch": 1.53,
"learning_rate": 2.540110851808635e-06,
"loss": 0.4112,
"step": 3600
},
{
"epoch": 1.53,
"learning_rate": 2.5371936989498253e-06,
"loss": 0.4968,
"step": 3604
},
{
"epoch": 1.53,
"learning_rate": 2.5342765460910152e-06,
"loss": 0.3994,
"step": 3608
},
{
"epoch": 1.54,
"learning_rate": 2.5313593932322056e-06,
"loss": 0.4559,
"step": 3612
},
{
"epoch": 1.54,
"learning_rate": 2.528442240373396e-06,
"loss": 0.3394,
"step": 3616
},
{
"epoch": 1.54,
"learning_rate": 2.525525087514586e-06,
"loss": 0.3952,
"step": 3620
},
{
"epoch": 1.54,
"learning_rate": 2.5226079346557763e-06,
"loss": 0.318,
"step": 3624
},
{
"epoch": 1.54,
"learning_rate": 2.5196907817969662e-06,
"loss": 0.3204,
"step": 3628
},
{
"epoch": 1.54,
"learning_rate": 2.5167736289381566e-06,
"loss": 0.3047,
"step": 3632
},
{
"epoch": 1.55,
"learning_rate": 2.5138564760793465e-06,
"loss": 0.3223,
"step": 3636
},
{
"epoch": 1.55,
"learning_rate": 2.510939323220537e-06,
"loss": 0.2073,
"step": 3640
},
{
"epoch": 1.55,
"learning_rate": 2.508022170361727e-06,
"loss": 0.3696,
"step": 3644
},
{
"epoch": 1.55,
"learning_rate": 2.505105017502917e-06,
"loss": 0.4404,
"step": 3648
},
{
"epoch": 1.55,
"learning_rate": 2.502187864644108e-06,
"loss": 0.3907,
"step": 3652
},
{
"epoch": 1.55,
"learning_rate": 2.4992707117852975e-06,
"loss": 0.4624,
"step": 3656
},
{
"epoch": 1.56,
"learning_rate": 2.496353558926488e-06,
"loss": 0.4258,
"step": 3660
},
{
"epoch": 1.56,
"learning_rate": 2.493436406067678e-06,
"loss": 0.421,
"step": 3664
},
{
"epoch": 1.56,
"learning_rate": 2.4905192532088686e-06,
"loss": 0.3276,
"step": 3668
},
{
"epoch": 1.56,
"learning_rate": 2.4876021003500585e-06,
"loss": 0.4259,
"step": 3672
},
{
"epoch": 1.56,
"learning_rate": 2.484684947491249e-06,
"loss": 0.3363,
"step": 3676
},
{
"epoch": 1.56,
"learning_rate": 2.481767794632439e-06,
"loss": 0.3734,
"step": 3680
},
{
"epoch": 1.57,
"learning_rate": 2.478850641773629e-06,
"loss": 0.4299,
"step": 3684
},
{
"epoch": 1.57,
"learning_rate": 2.475933488914819e-06,
"loss": 0.3697,
"step": 3688
},
{
"epoch": 1.57,
"learning_rate": 2.4730163360560095e-06,
"loss": 0.3721,
"step": 3692
},
{
"epoch": 1.57,
"learning_rate": 2.4700991831971994e-06,
"loss": 0.4954,
"step": 3696
},
{
"epoch": 1.57,
"learning_rate": 2.46718203033839e-06,
"loss": 0.2761,
"step": 3700
},
{
"epoch": 1.57,
"learning_rate": 2.46426487747958e-06,
"loss": 0.3485,
"step": 3704
},
{
"epoch": 1.58,
"learning_rate": 2.4613477246207705e-06,
"loss": 0.3959,
"step": 3708
},
{
"epoch": 1.58,
"learning_rate": 2.4584305717619604e-06,
"loss": 0.421,
"step": 3712
},
{
"epoch": 1.58,
"learning_rate": 2.455513418903151e-06,
"loss": 0.3158,
"step": 3716
},
{
"epoch": 1.58,
"learning_rate": 2.4525962660443407e-06,
"loss": 0.3167,
"step": 3720
},
{
"epoch": 1.58,
"learning_rate": 2.449679113185531e-06,
"loss": 0.4065,
"step": 3724
},
{
"epoch": 1.59,
"learning_rate": 2.446761960326721e-06,
"loss": 0.406,
"step": 3728
},
{
"epoch": 1.59,
"learning_rate": 2.443844807467912e-06,
"loss": 0.4013,
"step": 3732
},
{
"epoch": 1.59,
"learning_rate": 2.4409276546091018e-06,
"loss": 0.5664,
"step": 3736
},
{
"epoch": 1.59,
"learning_rate": 2.438010501750292e-06,
"loss": 0.3301,
"step": 3740
},
{
"epoch": 1.59,
"learning_rate": 2.435093348891482e-06,
"loss": 0.3695,
"step": 3744
},
{
"epoch": 1.59,
"learning_rate": 2.4321761960326724e-06,
"loss": 0.3426,
"step": 3748
},
{
"epoch": 1.6,
"learning_rate": 2.4292590431738624e-06,
"loss": 0.3434,
"step": 3752
},
{
"epoch": 1.6,
"learning_rate": 2.4263418903150527e-06,
"loss": 0.3886,
"step": 3756
},
{
"epoch": 1.6,
"learning_rate": 2.4234247374562427e-06,
"loss": 0.4117,
"step": 3760
},
{
"epoch": 1.6,
"learning_rate": 2.4205075845974334e-06,
"loss": 0.4477,
"step": 3764
},
{
"epoch": 1.6,
"learning_rate": 2.4175904317386234e-06,
"loss": 0.4769,
"step": 3768
},
{
"epoch": 1.6,
"learning_rate": 2.4146732788798137e-06,
"loss": 0.4984,
"step": 3772
},
{
"epoch": 1.61,
"learning_rate": 2.4117561260210037e-06,
"loss": 0.3964,
"step": 3776
},
{
"epoch": 1.61,
"learning_rate": 2.408838973162194e-06,
"loss": 0.4827,
"step": 3780
},
{
"epoch": 1.61,
"learning_rate": 2.405921820303384e-06,
"loss": 0.3075,
"step": 3784
},
{
"epoch": 1.61,
"learning_rate": 2.4030046674445743e-06,
"loss": 0.2245,
"step": 3788
},
{
"epoch": 1.61,
"learning_rate": 2.4000875145857643e-06,
"loss": 0.2683,
"step": 3792
},
{
"epoch": 1.61,
"learning_rate": 2.3971703617269546e-06,
"loss": 0.4515,
"step": 3796
},
{
"epoch": 1.62,
"learning_rate": 2.394253208868145e-06,
"loss": 0.3369,
"step": 3800
},
{
"epoch": 1.62,
"learning_rate": 2.391336056009335e-06,
"loss": 0.2854,
"step": 3804
},
{
"epoch": 1.62,
"learning_rate": 2.3884189031505253e-06,
"loss": 0.2712,
"step": 3808
},
{
"epoch": 1.62,
"learning_rate": 2.3855017502917157e-06,
"loss": 0.3827,
"step": 3812
},
{
"epoch": 1.62,
"learning_rate": 2.3825845974329056e-06,
"loss": 0.2348,
"step": 3816
},
{
"epoch": 1.62,
"learning_rate": 2.379667444574096e-06,
"loss": 0.2503,
"step": 3820
},
{
"epoch": 1.63,
"learning_rate": 2.376750291715286e-06,
"loss": 0.2814,
"step": 3824
},
{
"epoch": 1.63,
"learning_rate": 2.3738331388564763e-06,
"loss": 0.4045,
"step": 3828
},
{
"epoch": 1.63,
"learning_rate": 2.3709159859976666e-06,
"loss": 0.5534,
"step": 3832
},
{
"epoch": 1.63,
"learning_rate": 2.3679988331388566e-06,
"loss": 0.4016,
"step": 3836
},
{
"epoch": 1.63,
"learning_rate": 2.365081680280047e-06,
"loss": 0.4375,
"step": 3840
},
{
"epoch": 1.63,
"learning_rate": 2.362164527421237e-06,
"loss": 0.3761,
"step": 3844
},
{
"epoch": 1.64,
"learning_rate": 2.3592473745624272e-06,
"loss": 0.3525,
"step": 3848
},
{
"epoch": 1.64,
"learning_rate": 2.3563302217036176e-06,
"loss": 0.3385,
"step": 3852
},
{
"epoch": 1.64,
"learning_rate": 2.3534130688448075e-06,
"loss": 0.393,
"step": 3856
},
{
"epoch": 1.64,
"learning_rate": 2.350495915985998e-06,
"loss": 0.4507,
"step": 3860
},
{
"epoch": 1.64,
"learning_rate": 2.3475787631271883e-06,
"loss": 0.2481,
"step": 3864
},
{
"epoch": 1.64,
"learning_rate": 2.344661610268378e-06,
"loss": 0.2887,
"step": 3868
},
{
"epoch": 1.65,
"learning_rate": 2.3417444574095686e-06,
"loss": 0.3081,
"step": 3872
},
{
"epoch": 1.65,
"learning_rate": 2.3388273045507585e-06,
"loss": 0.3454,
"step": 3876
},
{
"epoch": 1.65,
"learning_rate": 2.335910151691949e-06,
"loss": 0.4006,
"step": 3880
},
{
"epoch": 1.65,
"learning_rate": 2.332992998833139e-06,
"loss": 0.3328,
"step": 3884
},
{
"epoch": 1.65,
"learning_rate": 2.330075845974329e-06,
"loss": 0.3802,
"step": 3888
},
{
"epoch": 1.65,
"learning_rate": 2.3271586931155195e-06,
"loss": 0.538,
"step": 3892
},
{
"epoch": 1.66,
"learning_rate": 2.3242415402567095e-06,
"loss": 0.4035,
"step": 3896
},
{
"epoch": 1.66,
"learning_rate": 2.3213243873979e-06,
"loss": 0.3538,
"step": 3900
},
{
"epoch": 1.66,
"learning_rate": 2.31840723453909e-06,
"loss": 0.2945,
"step": 3904
},
{
"epoch": 1.66,
"learning_rate": 2.31549008168028e-06,
"loss": 0.3023,
"step": 3908
},
{
"epoch": 1.66,
"learning_rate": 2.3125729288214705e-06,
"loss": 0.4806,
"step": 3912
},
{
"epoch": 1.66,
"learning_rate": 2.3096557759626604e-06,
"loss": 0.4009,
"step": 3916
},
{
"epoch": 1.67,
"learning_rate": 2.306738623103851e-06,
"loss": 0.4475,
"step": 3920
},
{
"epoch": 1.67,
"learning_rate": 2.3038214702450407e-06,
"loss": 0.2655,
"step": 3924
},
{
"epoch": 1.67,
"learning_rate": 2.300904317386231e-06,
"loss": 0.3898,
"step": 3928
},
{
"epoch": 1.67,
"learning_rate": 2.2979871645274215e-06,
"loss": 0.2847,
"step": 3932
},
{
"epoch": 1.67,
"learning_rate": 2.295070011668612e-06,
"loss": 0.5787,
"step": 3936
},
{
"epoch": 1.68,
"learning_rate": 2.2921528588098018e-06,
"loss": 0.4133,
"step": 3940
},
{
"epoch": 1.68,
"learning_rate": 2.289235705950992e-06,
"loss": 0.2984,
"step": 3944
},
{
"epoch": 1.68,
"learning_rate": 2.286318553092182e-06,
"loss": 0.3097,
"step": 3948
},
{
"epoch": 1.68,
"learning_rate": 2.2834014002333724e-06,
"loss": 0.3854,
"step": 3952
},
{
"epoch": 1.68,
"learning_rate": 2.2804842473745624e-06,
"loss": 0.5068,
"step": 3956
},
{
"epoch": 1.68,
"learning_rate": 2.2775670945157527e-06,
"loss": 0.3831,
"step": 3960
},
{
"epoch": 1.69,
"learning_rate": 2.274649941656943e-06,
"loss": 0.2283,
"step": 3964
},
{
"epoch": 1.69,
"learning_rate": 2.2717327887981334e-06,
"loss": 0.3432,
"step": 3968
},
{
"epoch": 1.69,
"learning_rate": 2.2688156359393234e-06,
"loss": 0.4152,
"step": 3972
},
{
"epoch": 1.69,
"learning_rate": 2.2658984830805137e-06,
"loss": 0.2857,
"step": 3976
},
{
"epoch": 1.69,
"learning_rate": 2.2629813302217037e-06,
"loss": 0.39,
"step": 3980
},
{
"epoch": 1.69,
"learning_rate": 2.260064177362894e-06,
"loss": 0.3972,
"step": 3984
},
{
"epoch": 1.7,
"learning_rate": 2.257147024504084e-06,
"loss": 0.3207,
"step": 3988
},
{
"epoch": 1.7,
"learning_rate": 2.2542298716452743e-06,
"loss": 0.4362,
"step": 3992
},
{
"epoch": 1.7,
"learning_rate": 2.2513127187864643e-06,
"loss": 0.3839,
"step": 3996
},
{
"epoch": 1.7,
"learning_rate": 2.248395565927655e-06,
"loss": 0.211,
"step": 4000
},
{
"epoch": 1.7,
"learning_rate": 2.245478413068845e-06,
"loss": 0.4071,
"step": 4004
},
{
"epoch": 1.7,
"learning_rate": 2.2425612602100354e-06,
"loss": 0.2785,
"step": 4008
},
{
"epoch": 1.71,
"learning_rate": 2.2396441073512253e-06,
"loss": 0.4274,
"step": 4012
},
{
"epoch": 1.71,
"learning_rate": 2.2367269544924157e-06,
"loss": 0.3813,
"step": 4016
},
{
"epoch": 1.71,
"learning_rate": 2.2338098016336056e-06,
"loss": 0.3138,
"step": 4020
},
{
"epoch": 1.71,
"learning_rate": 2.230892648774796e-06,
"loss": 0.3181,
"step": 4024
},
{
"epoch": 1.71,
"learning_rate": 2.227975495915986e-06,
"loss": 0.4108,
"step": 4028
},
{
"epoch": 1.71,
"learning_rate": 2.2250583430571767e-06,
"loss": 0.3285,
"step": 4032
},
{
"epoch": 1.72,
"learning_rate": 2.2221411901983666e-06,
"loss": 0.2244,
"step": 4036
},
{
"epoch": 1.72,
"learning_rate": 2.219224037339557e-06,
"loss": 0.4148,
"step": 4040
},
{
"epoch": 1.72,
"learning_rate": 2.216306884480747e-06,
"loss": 0.418,
"step": 4044
},
{
"epoch": 1.72,
"learning_rate": 2.2133897316219373e-06,
"loss": 0.4441,
"step": 4048
},
{
"epoch": 1.72,
"learning_rate": 2.2104725787631272e-06,
"loss": 0.358,
"step": 4052
},
{
"epoch": 1.72,
"learning_rate": 2.2075554259043176e-06,
"loss": 0.2615,
"step": 4056
},
{
"epoch": 1.73,
"learning_rate": 2.2046382730455075e-06,
"loss": 0.3992,
"step": 4060
},
{
"epoch": 1.73,
"learning_rate": 2.201721120186698e-06,
"loss": 0.4608,
"step": 4064
},
{
"epoch": 1.73,
"learning_rate": 2.1988039673278883e-06,
"loss": 0.2661,
"step": 4068
},
{
"epoch": 1.73,
"learning_rate": 2.1958868144690786e-06,
"loss": 0.4447,
"step": 4072
},
{
"epoch": 1.73,
"learning_rate": 2.1929696616102686e-06,
"loss": 0.3915,
"step": 4076
},
{
"epoch": 1.73,
"learning_rate": 2.190052508751459e-06,
"loss": 0.3283,
"step": 4080
},
{
"epoch": 1.74,
"learning_rate": 2.187135355892649e-06,
"loss": 0.3887,
"step": 4084
},
{
"epoch": 1.74,
"learning_rate": 2.1842182030338392e-06,
"loss": 0.3772,
"step": 4088
},
{
"epoch": 1.74,
"learning_rate": 2.181301050175029e-06,
"loss": 0.5242,
"step": 4092
},
{
"epoch": 1.74,
"learning_rate": 2.1783838973162195e-06,
"loss": 0.2624,
"step": 4096
},
{
"epoch": 1.74,
"learning_rate": 2.17546674445741e-06,
"loss": 0.4775,
"step": 4100
},
{
"epoch": 1.74,
"learning_rate": 2.1725495915986e-06,
"loss": 0.4693,
"step": 4104
},
{
"epoch": 1.75,
"learning_rate": 2.16963243873979e-06,
"loss": 0.2954,
"step": 4108
},
{
"epoch": 1.75,
"learning_rate": 2.1667152858809806e-06,
"loss": 0.386,
"step": 4112
},
{
"epoch": 1.75,
"learning_rate": 2.1637981330221705e-06,
"loss": 0.2375,
"step": 4116
},
{
"epoch": 1.75,
"learning_rate": 2.160880980163361e-06,
"loss": 0.456,
"step": 4120
},
{
"epoch": 1.75,
"learning_rate": 2.157963827304551e-06,
"loss": 0.5585,
"step": 4124
},
{
"epoch": 1.76,
"learning_rate": 2.155046674445741e-06,
"loss": 0.2531,
"step": 4128
},
{
"epoch": 1.76,
"learning_rate": 2.1521295215869315e-06,
"loss": 0.3648,
"step": 4132
},
{
"epoch": 1.76,
"learning_rate": 2.1492123687281215e-06,
"loss": 0.4723,
"step": 4136
},
{
"epoch": 1.76,
"learning_rate": 2.146295215869312e-06,
"loss": 0.2357,
"step": 4140
},
{
"epoch": 1.76,
"learning_rate": 2.1433780630105018e-06,
"loss": 0.4827,
"step": 4144
},
{
"epoch": 1.76,
"learning_rate": 2.140460910151692e-06,
"loss": 0.3387,
"step": 4148
},
{
"epoch": 1.77,
"learning_rate": 2.137543757292882e-06,
"loss": 0.3946,
"step": 4152
},
{
"epoch": 1.77,
"learning_rate": 2.1346266044340724e-06,
"loss": 0.1965,
"step": 4156
},
{
"epoch": 1.77,
"learning_rate": 2.1317094515752628e-06,
"loss": 0.3286,
"step": 4160
},
{
"epoch": 1.77,
"learning_rate": 2.128792298716453e-06,
"loss": 0.3196,
"step": 4164
},
{
"epoch": 1.77,
"learning_rate": 2.125875145857643e-06,
"loss": 0.2477,
"step": 4168
},
{
"epoch": 1.77,
"learning_rate": 2.1229579929988334e-06,
"loss": 0.3666,
"step": 4172
},
{
"epoch": 1.78,
"learning_rate": 2.1200408401400234e-06,
"loss": 0.5021,
"step": 4176
},
{
"epoch": 1.78,
"learning_rate": 2.117852975495916e-06,
"loss": 0.5295,
"step": 4180
},
{
"epoch": 1.78,
"learning_rate": 2.1149358226371064e-06,
"loss": 0.4222,
"step": 4184
},
{
"epoch": 1.78,
"learning_rate": 2.1120186697782964e-06,
"loss": 0.4243,
"step": 4188
},
{
"epoch": 1.78,
"learning_rate": 2.1091015169194867e-06,
"loss": 0.5245,
"step": 4192
},
{
"epoch": 1.78,
"learning_rate": 2.106184364060677e-06,
"loss": 0.2765,
"step": 4196
},
{
"epoch": 1.79,
"learning_rate": 2.103267211201867e-06,
"loss": 0.4902,
"step": 4200
},
{
"epoch": 1.79,
"learning_rate": 2.1003500583430574e-06,
"loss": 0.2659,
"step": 4204
},
{
"epoch": 1.79,
"learning_rate": 2.0974329054842477e-06,
"loss": 0.512,
"step": 4208
},
{
"epoch": 1.79,
"learning_rate": 2.0945157526254377e-06,
"loss": 0.369,
"step": 4212
},
{
"epoch": 1.79,
"learning_rate": 2.091598599766628e-06,
"loss": 0.4157,
"step": 4216
},
{
"epoch": 1.79,
"learning_rate": 2.088681446907818e-06,
"loss": 0.2203,
"step": 4220
},
{
"epoch": 1.8,
"learning_rate": 2.0857642940490083e-06,
"loss": 0.2195,
"step": 4224
},
{
"epoch": 1.8,
"learning_rate": 2.0828471411901983e-06,
"loss": 0.4235,
"step": 4228
},
{
"epoch": 1.8,
"learning_rate": 2.0799299883313886e-06,
"loss": 0.2623,
"step": 4232
},
{
"epoch": 1.8,
"learning_rate": 2.077012835472579e-06,
"loss": 0.4724,
"step": 4236
},
{
"epoch": 1.8,
"learning_rate": 2.0740956826137694e-06,
"loss": 0.5003,
"step": 4240
},
{
"epoch": 1.8,
"learning_rate": 2.0711785297549593e-06,
"loss": 0.3844,
"step": 4244
},
{
"epoch": 1.81,
"learning_rate": 2.0682613768961497e-06,
"loss": 0.402,
"step": 4248
},
{
"epoch": 1.81,
"learning_rate": 2.0653442240373396e-06,
"loss": 0.2482,
"step": 4252
},
{
"epoch": 1.81,
"learning_rate": 2.06242707117853e-06,
"loss": 0.3593,
"step": 4256
},
{
"epoch": 1.81,
"learning_rate": 2.05950991831972e-06,
"loss": 0.2561,
"step": 4260
},
{
"epoch": 1.81,
"learning_rate": 2.0565927654609103e-06,
"loss": 0.4176,
"step": 4264
},
{
"epoch": 1.81,
"learning_rate": 2.0536756126021e-06,
"loss": 0.2596,
"step": 4268
},
{
"epoch": 1.82,
"learning_rate": 2.050758459743291e-06,
"loss": 0.3554,
"step": 4272
},
{
"epoch": 1.82,
"learning_rate": 2.047841306884481e-06,
"loss": 0.3388,
"step": 4276
},
{
"epoch": 1.82,
"learning_rate": 2.0449241540256713e-06,
"loss": 0.4103,
"step": 4280
},
{
"epoch": 1.82,
"learning_rate": 2.0420070011668612e-06,
"loss": 0.3023,
"step": 4284
},
{
"epoch": 1.82,
"learning_rate": 2.0390898483080516e-06,
"loss": 0.4772,
"step": 4288
},
{
"epoch": 1.82,
"learning_rate": 2.0361726954492415e-06,
"loss": 0.2974,
"step": 4292
},
{
"epoch": 1.83,
"learning_rate": 2.033255542590432e-06,
"loss": 0.4114,
"step": 4296
},
{
"epoch": 1.83,
"learning_rate": 2.030338389731622e-06,
"loss": 0.2369,
"step": 4300
},
{
"epoch": 1.83,
"learning_rate": 2.027421236872812e-06,
"loss": 0.3393,
"step": 4304
},
{
"epoch": 1.83,
"learning_rate": 2.0245040840140026e-06,
"loss": 0.2298,
"step": 4308
},
{
"epoch": 1.83,
"learning_rate": 2.021586931155193e-06,
"loss": 0.3373,
"step": 4312
},
{
"epoch": 1.84,
"learning_rate": 2.018669778296383e-06,
"loss": 0.3498,
"step": 4316
},
{
"epoch": 1.84,
"learning_rate": 2.0157526254375732e-06,
"loss": 0.4742,
"step": 4320
},
{
"epoch": 1.84,
"learning_rate": 2.012835472578763e-06,
"loss": 0.3716,
"step": 4324
},
{
"epoch": 1.84,
"learning_rate": 2.0099183197199535e-06,
"loss": 0.4141,
"step": 4328
},
{
"epoch": 1.84,
"learning_rate": 2.0070011668611435e-06,
"loss": 0.3451,
"step": 4332
},
{
"epoch": 1.84,
"learning_rate": 2.004084014002334e-06,
"loss": 0.3455,
"step": 4336
},
{
"epoch": 1.85,
"learning_rate": 2.001166861143524e-06,
"loss": 0.4263,
"step": 4340
},
{
"epoch": 1.85,
"learning_rate": 1.9982497082847146e-06,
"loss": 0.3155,
"step": 4344
},
{
"epoch": 1.85,
"learning_rate": 1.9953325554259045e-06,
"loss": 0.254,
"step": 4348
},
{
"epoch": 1.85,
"learning_rate": 1.992415402567095e-06,
"loss": 0.2274,
"step": 4352
},
{
"epoch": 1.85,
"learning_rate": 1.989498249708285e-06,
"loss": 0.2758,
"step": 4356
},
{
"epoch": 1.85,
"learning_rate": 1.986581096849475e-06,
"loss": 0.2397,
"step": 4360
},
{
"epoch": 1.86,
"learning_rate": 1.983663943990665e-06,
"loss": 0.2506,
"step": 4364
},
{
"epoch": 1.86,
"learning_rate": 1.9807467911318555e-06,
"loss": 0.4136,
"step": 4368
},
{
"epoch": 1.86,
"learning_rate": 1.977829638273046e-06,
"loss": 0.3309,
"step": 4372
},
{
"epoch": 1.86,
"learning_rate": 1.974912485414236e-06,
"loss": 0.2924,
"step": 4376
},
{
"epoch": 1.86,
"learning_rate": 1.971995332555426e-06,
"loss": 0.1428,
"step": 4380
},
{
"epoch": 1.86,
"learning_rate": 1.9690781796966165e-06,
"loss": 0.4771,
"step": 4384
},
{
"epoch": 1.87,
"learning_rate": 1.9661610268378064e-06,
"loss": 0.4117,
"step": 4388
},
{
"epoch": 1.87,
"learning_rate": 1.9632438739789968e-06,
"loss": 0.1177,
"step": 4392
},
{
"epoch": 1.87,
"learning_rate": 1.9603267211201867e-06,
"loss": 0.538,
"step": 4396
},
{
"epoch": 1.87,
"learning_rate": 1.957409568261377e-06,
"loss": 0.2241,
"step": 4400
},
{
"epoch": 1.87,
"learning_rate": 1.954492415402567e-06,
"loss": 0.4846,
"step": 4404
},
{
"epoch": 1.87,
"learning_rate": 1.9515752625437574e-06,
"loss": 0.3649,
"step": 4408
},
{
"epoch": 1.88,
"learning_rate": 1.9486581096849477e-06,
"loss": 0.4989,
"step": 4412
},
{
"epoch": 1.88,
"learning_rate": 1.945740956826138e-06,
"loss": 0.3074,
"step": 4416
},
{
"epoch": 1.88,
"learning_rate": 1.942823803967328e-06,
"loss": 0.3055,
"step": 4420
},
{
"epoch": 1.88,
"learning_rate": 1.9399066511085184e-06,
"loss": 0.3966,
"step": 4424
},
{
"epoch": 1.88,
"learning_rate": 1.9369894982497083e-06,
"loss": 0.3061,
"step": 4428
},
{
"epoch": 1.88,
"learning_rate": 1.9340723453908987e-06,
"loss": 0.3059,
"step": 4432
},
{
"epoch": 1.89,
"learning_rate": 1.9311551925320886e-06,
"loss": 0.4346,
"step": 4436
},
{
"epoch": 1.89,
"learning_rate": 1.928238039673279e-06,
"loss": 0.4984,
"step": 4440
},
{
"epoch": 1.89,
"learning_rate": 1.9253208868144694e-06,
"loss": 0.2097,
"step": 4444
},
{
"epoch": 1.89,
"learning_rate": 1.9224037339556593e-06,
"loss": 0.4416,
"step": 4448
},
{
"epoch": 1.89,
"learning_rate": 1.9194865810968497e-06,
"loss": 0.3917,
"step": 4452
},
{
"epoch": 1.89,
"learning_rate": 1.9165694282380396e-06,
"loss": 0.2051,
"step": 4456
},
{
"epoch": 1.9,
"learning_rate": 1.91365227537923e-06,
"loss": 0.385,
"step": 4460
},
{
"epoch": 1.9,
"learning_rate": 1.9107351225204203e-06,
"loss": 0.5121,
"step": 4464
},
{
"epoch": 1.9,
"learning_rate": 1.9078179696616103e-06,
"loss": 0.3747,
"step": 4468
},
{
"epoch": 1.9,
"learning_rate": 1.9049008168028008e-06,
"loss": 0.2688,
"step": 4472
},
{
"epoch": 1.9,
"learning_rate": 1.901983663943991e-06,
"loss": 0.2459,
"step": 4476
},
{
"epoch": 1.9,
"learning_rate": 1.8990665110851811e-06,
"loss": 0.2148,
"step": 4480
},
{
"epoch": 1.91,
"learning_rate": 1.8961493582263713e-06,
"loss": 0.2333,
"step": 4484
},
{
"epoch": 1.91,
"learning_rate": 1.8932322053675614e-06,
"loss": 0.266,
"step": 4488
},
{
"epoch": 1.91,
"learning_rate": 1.8903150525087516e-06,
"loss": 0.3613,
"step": 4492
},
{
"epoch": 1.91,
"learning_rate": 1.8873978996499417e-06,
"loss": 0.1306,
"step": 4496
},
{
"epoch": 1.91,
"learning_rate": 1.884480746791132e-06,
"loss": 0.2521,
"step": 4500
},
{
"epoch": 1.91,
"learning_rate": 1.881563593932322e-06,
"loss": 0.2997,
"step": 4504
},
{
"epoch": 1.92,
"learning_rate": 1.8786464410735124e-06,
"loss": 0.4594,
"step": 4508
},
{
"epoch": 1.92,
"learning_rate": 1.8757292882147028e-06,
"loss": 0.2661,
"step": 4512
},
{
"epoch": 1.92,
"learning_rate": 1.872812135355893e-06,
"loss": 0.2776,
"step": 4516
},
{
"epoch": 1.92,
"learning_rate": 1.869894982497083e-06,
"loss": 0.4221,
"step": 4520
},
{
"epoch": 1.92,
"learning_rate": 1.8669778296382732e-06,
"loss": 0.234,
"step": 4524
},
{
"epoch": 1.93,
"learning_rate": 1.8640606767794634e-06,
"loss": 0.4304,
"step": 4528
},
{
"epoch": 1.93,
"learning_rate": 1.8611435239206535e-06,
"loss": 0.476,
"step": 4532
},
{
"epoch": 1.93,
"learning_rate": 1.8582263710618437e-06,
"loss": 0.2214,
"step": 4536
},
{
"epoch": 1.93,
"learning_rate": 1.855309218203034e-06,
"loss": 0.2805,
"step": 4540
},
{
"epoch": 1.93,
"learning_rate": 1.8523920653442242e-06,
"loss": 0.1151,
"step": 4544
},
{
"epoch": 1.93,
"learning_rate": 1.8494749124854143e-06,
"loss": 0.2069,
"step": 4548
},
{
"epoch": 1.94,
"learning_rate": 1.8465577596266047e-06,
"loss": 0.4162,
"step": 4552
},
{
"epoch": 1.94,
"learning_rate": 1.8436406067677949e-06,
"loss": 0.3101,
"step": 4556
},
{
"epoch": 1.94,
"learning_rate": 1.840723453908985e-06,
"loss": 0.272,
"step": 4560
},
{
"epoch": 1.94,
"learning_rate": 1.8378063010501752e-06,
"loss": 0.4017,
"step": 4564
},
{
"epoch": 1.94,
"learning_rate": 1.8348891481913653e-06,
"loss": 0.3501,
"step": 4568
},
{
"epoch": 1.94,
"learning_rate": 1.8319719953325557e-06,
"loss": 0.2287,
"step": 4572
},
{
"epoch": 1.95,
"learning_rate": 1.8290548424737458e-06,
"loss": 0.4951,
"step": 4576
},
{
"epoch": 1.95,
"learning_rate": 1.826137689614936e-06,
"loss": 0.5831,
"step": 4580
},
{
"epoch": 1.95,
"learning_rate": 1.8232205367561261e-06,
"loss": 0.3221,
"step": 4584
},
{
"epoch": 1.95,
"learning_rate": 1.8203033838973163e-06,
"loss": 0.3682,
"step": 4588
},
{
"epoch": 1.95,
"learning_rate": 1.8173862310385066e-06,
"loss": 0.299,
"step": 4592
},
{
"epoch": 1.95,
"learning_rate": 1.8144690781796968e-06,
"loss": 0.1729,
"step": 4596
},
{
"epoch": 1.96,
"learning_rate": 1.811551925320887e-06,
"loss": 0.214,
"step": 4600
},
{
"epoch": 1.96,
"learning_rate": 1.808634772462077e-06,
"loss": 0.3696,
"step": 4604
},
{
"epoch": 1.96,
"learning_rate": 1.8057176196032674e-06,
"loss": 0.4532,
"step": 4608
},
{
"epoch": 1.96,
"learning_rate": 1.8028004667444576e-06,
"loss": 0.241,
"step": 4612
},
{
"epoch": 1.96,
"learning_rate": 1.7998833138856477e-06,
"loss": 0.3685,
"step": 4616
},
{
"epoch": 1.96,
"learning_rate": 1.796966161026838e-06,
"loss": 0.3708,
"step": 4620
},
{
"epoch": 1.97,
"learning_rate": 1.794049008168028e-06,
"loss": 0.3228,
"step": 4624
},
{
"epoch": 1.97,
"learning_rate": 1.7911318553092182e-06,
"loss": 0.2311,
"step": 4628
},
{
"epoch": 1.97,
"learning_rate": 1.7882147024504086e-06,
"loss": 0.3598,
"step": 4632
},
{
"epoch": 1.97,
"learning_rate": 1.7852975495915987e-06,
"loss": 0.4134,
"step": 4636
},
{
"epoch": 1.97,
"learning_rate": 1.782380396732789e-06,
"loss": 0.2711,
"step": 4640
},
{
"epoch": 1.97,
"learning_rate": 1.7794632438739792e-06,
"loss": 0.503,
"step": 4644
},
{
"epoch": 1.98,
"learning_rate": 1.7765460910151694e-06,
"loss": 0.2192,
"step": 4648
},
{
"epoch": 1.98,
"learning_rate": 1.7736289381563595e-06,
"loss": 0.1547,
"step": 4652
},
{
"epoch": 1.98,
"learning_rate": 1.7707117852975497e-06,
"loss": 0.3002,
"step": 4656
},
{
"epoch": 1.98,
"learning_rate": 1.7677946324387398e-06,
"loss": 0.3846,
"step": 4660
},
{
"epoch": 1.98,
"learning_rate": 1.76487747957993e-06,
"loss": 0.4236,
"step": 4664
},
{
"epoch": 1.98,
"learning_rate": 1.7619603267211201e-06,
"loss": 0.3245,
"step": 4668
},
{
"epoch": 1.99,
"learning_rate": 1.7590431738623107e-06,
"loss": 0.3547,
"step": 4672
},
{
"epoch": 1.99,
"learning_rate": 1.7561260210035008e-06,
"loss": 0.2355,
"step": 4676
},
{
"epoch": 1.99,
"learning_rate": 1.753208868144691e-06,
"loss": 0.417,
"step": 4680
},
{
"epoch": 1.99,
"learning_rate": 1.7502917152858811e-06,
"loss": 0.4419,
"step": 4684
},
{
"epoch": 1.99,
"learning_rate": 1.7473745624270713e-06,
"loss": 0.4115,
"step": 4688
},
{
"epoch": 1.99,
"learning_rate": 1.7444574095682615e-06,
"loss": 0.2582,
"step": 4692
},
{
"epoch": 2.0,
"learning_rate": 1.7415402567094516e-06,
"loss": 0.3695,
"step": 4696
},
{
"epoch": 2.0,
"learning_rate": 1.7386231038506418e-06,
"loss": 0.3948,
"step": 4700
}
],
"logging_steps": 4,
"max_steps": 7056,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 53908118568960.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}