Lavy-instruct / trainer_state.json
chitb's picture
Upload folder using huggingface_hub
7124db5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9989708404802744,
"eval_steps": 500,
"global_step": 728,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.090909090909091e-07,
"loss": 1.4022,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 1.8181818181818183e-06,
"loss": 1.4239,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 2.7272727272727272e-06,
"loss": 1.3843,
"step": 9
},
{
"epoch": 0.02,
"learning_rate": 3.6363636363636366e-06,
"loss": 1.3722,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 4.5454545454545455e-06,
"loss": 1.3411,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 5.4545454545454545e-06,
"loss": 1.3187,
"step": 18
},
{
"epoch": 0.03,
"learning_rate": 6.363636363636364e-06,
"loss": 1.284,
"step": 21
},
{
"epoch": 0.03,
"learning_rate": 7.272727272727273e-06,
"loss": 1.2492,
"step": 24
},
{
"epoch": 0.04,
"learning_rate": 8.181818181818183e-06,
"loss": 1.2658,
"step": 27
},
{
"epoch": 0.04,
"learning_rate": 9.090909090909091e-06,
"loss": 1.2173,
"step": 30
},
{
"epoch": 0.05,
"learning_rate": 1e-05,
"loss": 1.2302,
"step": 33
},
{
"epoch": 0.05,
"learning_rate": 1.0909090909090909e-05,
"loss": 1.2301,
"step": 36
},
{
"epoch": 0.05,
"learning_rate": 1.181818181818182e-05,
"loss": 1.1855,
"step": 39
},
{
"epoch": 0.06,
"learning_rate": 1.2727272727272728e-05,
"loss": 1.2094,
"step": 42
},
{
"epoch": 0.06,
"learning_rate": 1.3636363636363637e-05,
"loss": 1.1788,
"step": 45
},
{
"epoch": 0.07,
"learning_rate": 1.4545454545454546e-05,
"loss": 1.1804,
"step": 48
},
{
"epoch": 0.07,
"learning_rate": 1.5454545454545454e-05,
"loss": 1.166,
"step": 51
},
{
"epoch": 0.07,
"learning_rate": 1.6363636363636366e-05,
"loss": 1.1256,
"step": 54
},
{
"epoch": 0.08,
"learning_rate": 1.7272727272727274e-05,
"loss": 1.1289,
"step": 57
},
{
"epoch": 0.08,
"learning_rate": 1.8181818181818182e-05,
"loss": 1.1392,
"step": 60
},
{
"epoch": 0.09,
"learning_rate": 1.9090909090909094e-05,
"loss": 1.131,
"step": 63
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 1.1288,
"step": 66
},
{
"epoch": 0.09,
"learning_rate": 1.9999900994429424e-05,
"loss": 1.1198,
"step": 69
},
{
"epoch": 0.1,
"learning_rate": 1.999960397967811e-05,
"loss": 1.1281,
"step": 72
},
{
"epoch": 0.1,
"learning_rate": 1.9999108961627284e-05,
"loss": 1.134,
"step": 75
},
{
"epoch": 0.11,
"learning_rate": 1.9998415950078858e-05,
"loss": 1.1148,
"step": 78
},
{
"epoch": 0.11,
"learning_rate": 1.9997524958755226e-05,
"loss": 1.1162,
"step": 81
},
{
"epoch": 0.12,
"learning_rate": 1.9996436005299013e-05,
"loss": 1.12,
"step": 84
},
{
"epoch": 0.12,
"learning_rate": 1.999514911127271e-05,
"loss": 1.12,
"step": 87
},
{
"epoch": 0.12,
"learning_rate": 1.9993664302158255e-05,
"loss": 1.0938,
"step": 90
},
{
"epoch": 0.13,
"learning_rate": 1.9991981607356517e-05,
"loss": 1.0838,
"step": 93
},
{
"epoch": 0.13,
"learning_rate": 1.9990101060186732e-05,
"loss": 1.1078,
"step": 96
},
{
"epoch": 0.14,
"learning_rate": 1.998802269788583e-05,
"loss": 1.1037,
"step": 99
},
{
"epoch": 0.14,
"learning_rate": 1.9985746561607696e-05,
"loss": 1.0804,
"step": 102
},
{
"epoch": 0.14,
"learning_rate": 1.998327269642237e-05,
"loss": 1.0977,
"step": 105
},
{
"epoch": 0.15,
"learning_rate": 1.998060115131513e-05,
"loss": 1.1036,
"step": 108
},
{
"epoch": 0.15,
"learning_rate": 1.9977731979185556e-05,
"loss": 1.1109,
"step": 111
},
{
"epoch": 0.16,
"learning_rate": 1.9974665236846443e-05,
"loss": 1.0937,
"step": 114
},
{
"epoch": 0.16,
"learning_rate": 1.9971400985022712e-05,
"loss": 1.0834,
"step": 117
},
{
"epoch": 0.16,
"learning_rate": 1.9967939288350184e-05,
"loss": 1.1002,
"step": 120
},
{
"epoch": 0.17,
"learning_rate": 1.9964280215374312e-05,
"loss": 1.0847,
"step": 123
},
{
"epoch": 0.17,
"learning_rate": 1.9960423838548814e-05,
"loss": 1.0845,
"step": 126
},
{
"epoch": 0.18,
"learning_rate": 1.995637023423425e-05,
"loss": 1.0984,
"step": 129
},
{
"epoch": 0.18,
"learning_rate": 1.9952119482696504e-05,
"loss": 1.0836,
"step": 132
},
{
"epoch": 0.19,
"learning_rate": 1.9947671668105185e-05,
"loss": 1.082,
"step": 135
},
{
"epoch": 0.19,
"learning_rate": 1.9943026878531985e-05,
"loss": 1.0707,
"step": 138
},
{
"epoch": 0.19,
"learning_rate": 1.9938185205948906e-05,
"loss": 1.0545,
"step": 141
},
{
"epoch": 0.2,
"learning_rate": 1.993314674622646e-05,
"loss": 1.0618,
"step": 144
},
{
"epoch": 0.2,
"learning_rate": 1.992791159913177e-05,
"loss": 1.0514,
"step": 147
},
{
"epoch": 0.21,
"learning_rate": 1.992247986832658e-05,
"loss": 1.0733,
"step": 150
},
{
"epoch": 0.21,
"learning_rate": 1.99168516613652e-05,
"loss": 1.0712,
"step": 153
},
{
"epoch": 0.21,
"learning_rate": 1.991102708969241e-05,
"loss": 1.0788,
"step": 156
},
{
"epoch": 0.22,
"learning_rate": 1.9905006268641212e-05,
"loss": 1.0744,
"step": 159
},
{
"epoch": 0.22,
"learning_rate": 1.9898789317430577e-05,
"loss": 1.0621,
"step": 162
},
{
"epoch": 0.23,
"learning_rate": 1.9892376359163058e-05,
"loss": 1.0598,
"step": 165
},
{
"epoch": 0.23,
"learning_rate": 1.9885767520822377e-05,
"loss": 1.095,
"step": 168
},
{
"epoch": 0.23,
"learning_rate": 1.9878962933270896e-05,
"loss": 1.0666,
"step": 171
},
{
"epoch": 0.24,
"learning_rate": 1.987196273124703e-05,
"loss": 1.0657,
"step": 174
},
{
"epoch": 0.24,
"learning_rate": 1.986476705336258e-05,
"loss": 1.0691,
"step": 177
},
{
"epoch": 0.25,
"learning_rate": 1.9857376042099982e-05,
"loss": 1.0663,
"step": 180
},
{
"epoch": 0.25,
"learning_rate": 1.9849789843809496e-05,
"loss": 1.0476,
"step": 183
},
{
"epoch": 0.26,
"learning_rate": 1.9842008608706295e-05,
"loss": 1.0509,
"step": 186
},
{
"epoch": 0.26,
"learning_rate": 1.983403249086751e-05,
"loss": 1.0622,
"step": 189
},
{
"epoch": 0.26,
"learning_rate": 1.9825861648229154e-05,
"loss": 1.0708,
"step": 192
},
{
"epoch": 0.27,
"learning_rate": 1.981749624258302e-05,
"loss": 1.0672,
"step": 195
},
{
"epoch": 0.27,
"learning_rate": 1.9808936439573455e-05,
"loss": 1.0627,
"step": 198
},
{
"epoch": 0.28,
"learning_rate": 1.9800182408694096e-05,
"loss": 1.0726,
"step": 201
},
{
"epoch": 0.28,
"learning_rate": 1.9791234323284515e-05,
"loss": 1.0558,
"step": 204
},
{
"epoch": 0.28,
"learning_rate": 1.9782092360526763e-05,
"loss": 1.0677,
"step": 207
},
{
"epoch": 0.29,
"learning_rate": 1.977275670144189e-05,
"loss": 1.0422,
"step": 210
},
{
"epoch": 0.29,
"learning_rate": 1.9763227530886348e-05,
"loss": 1.0364,
"step": 213
},
{
"epoch": 0.3,
"learning_rate": 1.9753505037548334e-05,
"loss": 1.0475,
"step": 216
},
{
"epoch": 0.3,
"learning_rate": 1.974358941394404e-05,
"loss": 1.0508,
"step": 219
},
{
"epoch": 0.3,
"learning_rate": 1.973348085641387e-05,
"loss": 1.0595,
"step": 222
},
{
"epoch": 0.31,
"learning_rate": 1.972317956511852e-05,
"loss": 1.0528,
"step": 225
},
{
"epoch": 0.31,
"learning_rate": 1.971268574403503e-05,
"loss": 1.0562,
"step": 228
},
{
"epoch": 0.32,
"learning_rate": 1.970199960095276e-05,
"loss": 1.0329,
"step": 231
},
{
"epoch": 0.32,
"learning_rate": 1.9691121347469235e-05,
"loss": 1.045,
"step": 234
},
{
"epoch": 0.33,
"learning_rate": 1.9680051198986004e-05,
"loss": 1.0561,
"step": 237
},
{
"epoch": 0.33,
"learning_rate": 1.9668789374704337e-05,
"loss": 1.0449,
"step": 240
},
{
"epoch": 0.33,
"learning_rate": 1.9657336097620904e-05,
"loss": 1.0359,
"step": 243
},
{
"epoch": 0.34,
"learning_rate": 1.964569159452335e-05,
"loss": 1.0359,
"step": 246
},
{
"epoch": 0.34,
"learning_rate": 1.963385609598581e-05,
"loss": 1.0271,
"step": 249
},
{
"epoch": 0.35,
"learning_rate": 1.9621829836364335e-05,
"loss": 1.0563,
"step": 252
},
{
"epoch": 0.35,
"learning_rate": 1.9609613053792276e-05,
"loss": 1.0416,
"step": 255
},
{
"epoch": 0.35,
"learning_rate": 1.9597205990175528e-05,
"loss": 1.0578,
"step": 258
},
{
"epoch": 0.36,
"learning_rate": 1.958460889118778e-05,
"loss": 1.0461,
"step": 261
},
{
"epoch": 0.36,
"learning_rate": 1.9571822006265623e-05,
"loss": 1.0262,
"step": 264
},
{
"epoch": 0.37,
"learning_rate": 1.9558845588603625e-05,
"loss": 1.0254,
"step": 267
},
{
"epoch": 0.37,
"learning_rate": 1.9545679895149315e-05,
"loss": 1.0642,
"step": 270
},
{
"epoch": 0.37,
"learning_rate": 1.9532325186598093e-05,
"loss": 1.0456,
"step": 273
},
{
"epoch": 0.38,
"learning_rate": 1.951878172738806e-05,
"loss": 1.0358,
"step": 276
},
{
"epoch": 0.38,
"learning_rate": 1.9505049785694803e-05,
"loss": 1.0409,
"step": 279
},
{
"epoch": 0.39,
"learning_rate": 1.9491129633426068e-05,
"loss": 1.0382,
"step": 282
},
{
"epoch": 0.39,
"learning_rate": 1.9477021546216376e-05,
"loss": 1.0415,
"step": 285
},
{
"epoch": 0.4,
"learning_rate": 1.9462725803421566e-05,
"loss": 1.0308,
"step": 288
},
{
"epoch": 0.4,
"learning_rate": 1.9448242688113286e-05,
"loss": 1.0376,
"step": 291
},
{
"epoch": 0.4,
"learning_rate": 1.9433572487073343e-05,
"loss": 1.0259,
"step": 294
},
{
"epoch": 0.41,
"learning_rate": 1.9418715490788066e-05,
"loss": 1.0496,
"step": 297
},
{
"epoch": 0.41,
"learning_rate": 1.9403671993442534e-05,
"loss": 1.0519,
"step": 300
},
{
"epoch": 0.42,
"learning_rate": 1.9388442292914754e-05,
"loss": 1.0418,
"step": 303
},
{
"epoch": 0.42,
"learning_rate": 1.937302669076976e-05,
"loss": 1.0372,
"step": 306
},
{
"epoch": 0.42,
"learning_rate": 1.9357425492253662e-05,
"loss": 1.0347,
"step": 309
},
{
"epoch": 0.43,
"learning_rate": 1.934163900628756e-05,
"loss": 1.0253,
"step": 312
},
{
"epoch": 0.43,
"learning_rate": 1.9325667545461466e-05,
"loss": 1.0401,
"step": 315
},
{
"epoch": 0.44,
"learning_rate": 1.9309511426028105e-05,
"loss": 1.0282,
"step": 318
},
{
"epoch": 0.44,
"learning_rate": 1.9293170967896632e-05,
"loss": 1.0306,
"step": 321
},
{
"epoch": 0.44,
"learning_rate": 1.9276646494626333e-05,
"loss": 1.0313,
"step": 324
},
{
"epoch": 0.45,
"learning_rate": 1.9259938333420183e-05,
"loss": 1.0433,
"step": 327
},
{
"epoch": 0.45,
"learning_rate": 1.9243046815118387e-05,
"loss": 1.0232,
"step": 330
},
{
"epoch": 0.46,
"learning_rate": 1.922597227419183e-05,
"loss": 1.0222,
"step": 333
},
{
"epoch": 0.46,
"learning_rate": 1.9208715048735446e-05,
"loss": 1.0186,
"step": 336
},
{
"epoch": 0.47,
"learning_rate": 1.9191275480461525e-05,
"loss": 1.033,
"step": 339
},
{
"epoch": 0.47,
"learning_rate": 1.9173653914692947e-05,
"loss": 1.0342,
"step": 342
},
{
"epoch": 0.47,
"learning_rate": 1.9155850700356345e-05,
"loss": 1.035,
"step": 345
},
{
"epoch": 0.48,
"learning_rate": 1.91378661899752e-05,
"loss": 1.0206,
"step": 348
},
{
"epoch": 0.48,
"learning_rate": 1.9119700739662857e-05,
"loss": 1.0435,
"step": 351
},
{
"epoch": 0.49,
"learning_rate": 1.910135470911547e-05,
"loss": 1.0181,
"step": 354
},
{
"epoch": 0.49,
"learning_rate": 1.908282846160488e-05,
"loss": 1.0267,
"step": 357
},
{
"epoch": 0.49,
"learning_rate": 1.9064122363971426e-05,
"loss": 1.0365,
"step": 360
},
{
"epoch": 0.5,
"learning_rate": 1.904523678661669e-05,
"loss": 1.0381,
"step": 363
},
{
"epoch": 0.5,
"learning_rate": 1.9026172103496138e-05,
"loss": 1.0048,
"step": 366
},
{
"epoch": 0.51,
"learning_rate": 1.900692869211174e-05,
"loss": 1.0392,
"step": 369
},
{
"epoch": 0.51,
"learning_rate": 1.898750693350447e-05,
"loss": 1.0278,
"step": 372
},
{
"epoch": 0.51,
"learning_rate": 1.8967907212246803e-05,
"loss": 1.013,
"step": 375
},
{
"epoch": 0.52,
"learning_rate": 1.8948129916435048e-05,
"loss": 1.0385,
"step": 378
},
{
"epoch": 0.52,
"learning_rate": 1.8928175437681698e-05,
"loss": 1.0168,
"step": 381
},
{
"epoch": 0.53,
"learning_rate": 1.8908044171107658e-05,
"loss": 1.0123,
"step": 384
},
{
"epoch": 0.53,
"learning_rate": 1.8887736515334443e-05,
"loss": 1.015,
"step": 387
},
{
"epoch": 0.54,
"learning_rate": 1.8867252872476255e-05,
"loss": 1.0265,
"step": 390
},
{
"epoch": 0.54,
"learning_rate": 1.884659364813205e-05,
"loss": 0.9997,
"step": 393
},
{
"epoch": 0.54,
"learning_rate": 1.8825759251377484e-05,
"loss": 1.0109,
"step": 396
},
{
"epoch": 0.55,
"learning_rate": 1.8804750094756827e-05,
"loss": 1.0199,
"step": 399
},
{
"epoch": 0.55,
"learning_rate": 1.8783566594274783e-05,
"loss": 0.9998,
"step": 402
},
{
"epoch": 0.56,
"learning_rate": 1.8762209169388262e-05,
"loss": 1.0088,
"step": 405
},
{
"epoch": 0.56,
"learning_rate": 1.8740678242998077e-05,
"loss": 1.0022,
"step": 408
},
{
"epoch": 0.56,
"learning_rate": 1.8718974241440552e-05,
"loss": 1.0216,
"step": 411
},
{
"epoch": 0.57,
"learning_rate": 1.8697097594479103e-05,
"loss": 1.0248,
"step": 414
},
{
"epoch": 0.57,
"learning_rate": 1.867504873529571e-05,
"loss": 0.9974,
"step": 417
},
{
"epoch": 0.58,
"learning_rate": 1.865282810048235e-05,
"loss": 1.0138,
"step": 420
},
{
"epoch": 0.58,
"learning_rate": 1.8630436130032353e-05,
"loss": 1.0004,
"step": 423
},
{
"epoch": 0.58,
"learning_rate": 1.860787326733168e-05,
"loss": 1.0081,
"step": 426
},
{
"epoch": 0.59,
"learning_rate": 1.8585139959150144e-05,
"loss": 1.0238,
"step": 429
},
{
"epoch": 0.59,
"learning_rate": 1.856223665563258e-05,
"loss": 1.0328,
"step": 432
},
{
"epoch": 0.6,
"learning_rate": 1.8539163810289914e-05,
"loss": 1.0071,
"step": 435
},
{
"epoch": 0.6,
"learning_rate": 1.8515921879990187e-05,
"loss": 1.0134,
"step": 438
},
{
"epoch": 0.61,
"learning_rate": 1.8492511324949516e-05,
"loss": 1.0181,
"step": 441
},
{
"epoch": 0.61,
"learning_rate": 1.8468932608722975e-05,
"loss": 1.0363,
"step": 444
},
{
"epoch": 0.61,
"learning_rate": 1.8445186198195406e-05,
"loss": 1.0011,
"step": 447
},
{
"epoch": 0.62,
"learning_rate": 1.8421272563572202e-05,
"loss": 0.9993,
"step": 450
},
{
"epoch": 0.62,
"learning_rate": 1.8397192178369965e-05,
"loss": 1.0201,
"step": 453
},
{
"epoch": 0.63,
"learning_rate": 1.837294551940716e-05,
"loss": 0.987,
"step": 456
},
{
"epoch": 0.63,
"learning_rate": 1.834853306679464e-05,
"loss": 1.0106,
"step": 459
},
{
"epoch": 0.63,
"learning_rate": 1.8323955303926165e-05,
"loss": 1.0034,
"step": 462
},
{
"epoch": 0.64,
"learning_rate": 1.8299212717468825e-05,
"loss": 1.0095,
"step": 465
},
{
"epoch": 0.64,
"learning_rate": 1.8274305797353397e-05,
"loss": 0.9921,
"step": 468
},
{
"epoch": 0.65,
"learning_rate": 1.824923503676465e-05,
"loss": 0.9859,
"step": 471
},
{
"epoch": 0.65,
"learning_rate": 1.822400093213157e-05,
"loss": 1.017,
"step": 474
},
{
"epoch": 0.65,
"learning_rate": 1.8198603983117546e-05,
"loss": 1.0118,
"step": 477
},
{
"epoch": 0.66,
"learning_rate": 1.8173044692610466e-05,
"loss": 0.9912,
"step": 480
},
{
"epoch": 0.66,
"learning_rate": 1.8147323566712755e-05,
"loss": 1.0162,
"step": 483
},
{
"epoch": 0.67,
"learning_rate": 1.8121441114731366e-05,
"loss": 1.0089,
"step": 486
},
{
"epoch": 0.67,
"learning_rate": 1.809539784916768e-05,
"loss": 0.9752,
"step": 489
},
{
"epoch": 0.68,
"learning_rate": 1.806919428570737e-05,
"loss": 1.007,
"step": 492
},
{
"epoch": 0.68,
"learning_rate": 1.804283094321019e-05,
"loss": 1.0145,
"step": 495
},
{
"epoch": 0.68,
"learning_rate": 1.8016308343699686e-05,
"loss": 1.0008,
"step": 498
},
{
"epoch": 0.69,
"learning_rate": 1.798962701235289e-05,
"loss": 1.0067,
"step": 501
},
{
"epoch": 0.69,
"learning_rate": 1.796278747748988e-05,
"loss": 1.0017,
"step": 504
},
{
"epoch": 0.7,
"learning_rate": 1.7935790270563345e-05,
"loss": 1.0086,
"step": 507
},
{
"epoch": 0.7,
"learning_rate": 1.790863592614807e-05,
"loss": 0.9884,
"step": 510
},
{
"epoch": 0.7,
"learning_rate": 1.788132498193032e-05,
"loss": 1.0028,
"step": 513
},
{
"epoch": 0.71,
"learning_rate": 1.7853857978697223e-05,
"loss": 1.0055,
"step": 516
},
{
"epoch": 0.71,
"learning_rate": 1.7826235460326043e-05,
"loss": 1.005,
"step": 519
},
{
"epoch": 0.72,
"learning_rate": 1.7798457973773418e-05,
"loss": 1.002,
"step": 522
},
{
"epoch": 0.72,
"learning_rate": 1.7770526069064525e-05,
"loss": 0.9838,
"step": 525
},
{
"epoch": 0.72,
"learning_rate": 1.7742440299282203e-05,
"loss": 1.001,
"step": 528
},
{
"epoch": 0.73,
"learning_rate": 1.7714201220555982e-05,
"loss": 0.9984,
"step": 531
},
{
"epoch": 0.73,
"learning_rate": 1.7685809392051084e-05,
"loss": 1.0035,
"step": 534
},
{
"epoch": 0.74,
"learning_rate": 1.765726537595734e-05,
"loss": 1.0076,
"step": 537
},
{
"epoch": 0.74,
"learning_rate": 1.7628569737478076e-05,
"loss": 0.9936,
"step": 540
},
{
"epoch": 0.75,
"learning_rate": 1.7599723044818898e-05,
"loss": 1.0053,
"step": 543
},
{
"epoch": 0.75,
"learning_rate": 1.7570725869176468e-05,
"loss": 0.9968,
"step": 546
},
{
"epoch": 0.75,
"learning_rate": 1.7541578784727163e-05,
"loss": 1.0059,
"step": 549
},
{
"epoch": 0.76,
"learning_rate": 1.751228236861573e-05,
"loss": 1.0059,
"step": 552
},
{
"epoch": 0.76,
"learning_rate": 1.7482837200943845e-05,
"loss": 1.0081,
"step": 555
},
{
"epoch": 0.77,
"learning_rate": 1.7453243864758638e-05,
"loss": 1.0215,
"step": 558
},
{
"epoch": 0.77,
"learning_rate": 1.7423502946041133e-05,
"loss": 0.9935,
"step": 561
},
{
"epoch": 0.77,
"learning_rate": 1.739361503369466e-05,
"loss": 0.9945,
"step": 564
},
{
"epoch": 0.78,
"learning_rate": 1.7363580719533173e-05,
"loss": 0.9926,
"step": 567
},
{
"epoch": 0.78,
"learning_rate": 1.733340059826956e-05,
"loss": 0.9946,
"step": 570
},
{
"epoch": 0.79,
"learning_rate": 1.7303075267503845e-05,
"loss": 1.0079,
"step": 573
},
{
"epoch": 0.79,
"learning_rate": 1.7272605327711364e-05,
"loss": 1.0212,
"step": 576
},
{
"epoch": 0.79,
"learning_rate": 1.7241991382230872e-05,
"loss": 0.993,
"step": 579
},
{
"epoch": 0.8,
"learning_rate": 1.72112340372526e-05,
"loss": 0.9843,
"step": 582
},
{
"epoch": 0.8,
"learning_rate": 1.718033390180624e-05,
"loss": 0.9837,
"step": 585
},
{
"epoch": 0.81,
"learning_rate": 1.71492915877489e-05,
"loss": 0.959,
"step": 588
},
{
"epoch": 0.81,
"learning_rate": 1.7118107709752986e-05,
"loss": 0.9895,
"step": 591
},
{
"epoch": 0.82,
"learning_rate": 1.7086782885294026e-05,
"loss": 0.99,
"step": 594
},
{
"epoch": 0.82,
"learning_rate": 1.7055317734638444e-05,
"loss": 1.006,
"step": 597
},
{
"epoch": 0.82,
"learning_rate": 1.702371288083127e-05,
"loss": 1.0009,
"step": 600
},
{
"epoch": 0.83,
"learning_rate": 1.6991968949683835e-05,
"loss": 0.9758,
"step": 603
},
{
"epoch": 0.83,
"learning_rate": 1.6960086569761332e-05,
"loss": 0.9801,
"step": 606
},
{
"epoch": 0.84,
"learning_rate": 1.6928066372370407e-05,
"loss": 0.9833,
"step": 609
},
{
"epoch": 0.84,
"learning_rate": 1.689590899154664e-05,
"loss": 0.9846,
"step": 612
},
{
"epoch": 0.84,
"learning_rate": 1.6863615064042003e-05,
"loss": 0.9752,
"step": 615
},
{
"epoch": 0.85,
"learning_rate": 1.6831185229312237e-05,
"loss": 0.9869,
"step": 618
},
{
"epoch": 0.85,
"learning_rate": 1.67986201295042e-05,
"loss": 0.9869,
"step": 621
},
{
"epoch": 0.86,
"learning_rate": 1.676592040944315e-05,
"loss": 0.9878,
"step": 624
},
{
"epoch": 0.86,
"learning_rate": 1.6733086716619976e-05,
"loss": 0.9938,
"step": 627
},
{
"epoch": 0.86,
"learning_rate": 1.6700119701178378e-05,
"loss": 1.0045,
"step": 630
},
{
"epoch": 0.87,
"learning_rate": 1.666702001590199e-05,
"loss": 1.0088,
"step": 633
},
{
"epoch": 0.87,
"learning_rate": 1.6633788316201455e-05,
"loss": 0.998,
"step": 636
},
{
"epoch": 0.88,
"learning_rate": 1.6600425260101453e-05,
"loss": 1.0017,
"step": 639
},
{
"epoch": 0.88,
"learning_rate": 1.6566931508227663e-05,
"loss": 0.9995,
"step": 642
},
{
"epoch": 0.89,
"learning_rate": 1.6533307723793688e-05,
"loss": 1.0012,
"step": 645
},
{
"epoch": 0.89,
"learning_rate": 1.649955457258792e-05,
"loss": 0.9807,
"step": 648
},
{
"epoch": 0.89,
"learning_rate": 1.6465672722960365e-05,
"loss": 0.9664,
"step": 651
},
{
"epoch": 0.9,
"learning_rate": 1.6431662845809388e-05,
"loss": 0.9707,
"step": 654
},
{
"epoch": 0.9,
"learning_rate": 1.6397525614568446e-05,
"loss": 0.983,
"step": 657
},
{
"epoch": 0.91,
"learning_rate": 1.6363261705192757e-05,
"loss": 1.0061,
"step": 660
},
{
"epoch": 0.91,
"learning_rate": 1.6328871796145894e-05,
"loss": 0.9899,
"step": 663
},
{
"epoch": 0.91,
"learning_rate": 1.629435656838637e-05,
"loss": 0.9795,
"step": 666
},
{
"epoch": 0.92,
"learning_rate": 1.6259716705354154e-05,
"loss": 1.0002,
"step": 669
},
{
"epoch": 0.92,
"learning_rate": 1.6224952892957122e-05,
"loss": 0.9837,
"step": 672
},
{
"epoch": 0.93,
"learning_rate": 1.6190065819557496e-05,
"loss": 0.9872,
"step": 675
},
{
"epoch": 0.93,
"learning_rate": 1.615505617595819e-05,
"loss": 0.9797,
"step": 678
},
{
"epoch": 0.93,
"learning_rate": 1.6119924655389158e-05,
"loss": 0.9926,
"step": 681
},
{
"epoch": 0.94,
"learning_rate": 1.6084671953493645e-05,
"loss": 0.9884,
"step": 684
},
{
"epoch": 0.94,
"learning_rate": 1.6049298768314425e-05,
"loss": 0.9918,
"step": 687
},
{
"epoch": 0.95,
"learning_rate": 1.6013805800279977e-05,
"loss": 0.9829,
"step": 690
},
{
"epoch": 0.95,
"learning_rate": 1.5978193752190607e-05,
"loss": 0.9854,
"step": 693
},
{
"epoch": 0.96,
"learning_rate": 1.5942463329204546e-05,
"loss": 0.9751,
"step": 696
},
{
"epoch": 0.96,
"learning_rate": 1.5906615238823974e-05,
"loss": 0.9945,
"step": 699
},
{
"epoch": 0.96,
"learning_rate": 1.5870650190881023e-05,
"loss": 0.9957,
"step": 702
},
{
"epoch": 0.97,
"learning_rate": 1.583456889752371e-05,
"loss": 1.0047,
"step": 705
},
{
"epoch": 0.97,
"learning_rate": 1.579837207320184e-05,
"loss": 0.9921,
"step": 708
},
{
"epoch": 0.98,
"learning_rate": 1.5762060434652863e-05,
"loss": 0.9839,
"step": 711
},
{
"epoch": 0.98,
"learning_rate": 1.572563470088768e-05,
"loss": 0.9922,
"step": 714
},
{
"epoch": 0.98,
"learning_rate": 1.56890955931764e-05,
"loss": 0.9752,
"step": 717
},
{
"epoch": 0.99,
"learning_rate": 1.565244383503407e-05,
"loss": 0.9778,
"step": 720
},
{
"epoch": 0.99,
"learning_rate": 1.5615680152206324e-05,
"loss": 0.9795,
"step": 723
},
{
"epoch": 1.0,
"learning_rate": 1.557880527265505e-05,
"loss": 0.9774,
"step": 726
}
],
"logging_steps": 3,
"max_steps": 2184,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500.0,
"total_flos": 4.694048596218085e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}