llama_caller / trainer_state.json
musabg's picture
Upload folder using huggingface_hub
b25c2a4
raw
history blame
No virus
104 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 858,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 7.692307692307694e-07,
"loss": 6.3448,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.5384615384615387e-06,
"loss": 6.3545,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 2.307692307692308e-06,
"loss": 5.8293,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 3.0769230769230774e-06,
"loss": 5.7241,
"step": 4
},
{
"epoch": 0.02,
"learning_rate": 3.846153846153847e-06,
"loss": 5.7151,
"step": 5
},
{
"epoch": 0.02,
"learning_rate": 4.615384615384616e-06,
"loss": 5.759,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 5.384615384615385e-06,
"loss": 5.7633,
"step": 7
},
{
"epoch": 0.03,
"learning_rate": 6.153846153846155e-06,
"loss": 5.5305,
"step": 8
},
{
"epoch": 0.03,
"learning_rate": 6.923076923076923e-06,
"loss": 5.9615,
"step": 9
},
{
"epoch": 0.03,
"learning_rate": 7.692307692307694e-06,
"loss": 5.8986,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 8.461538461538462e-06,
"loss": 5.9003,
"step": 11
},
{
"epoch": 0.04,
"learning_rate": 9.230769230769232e-06,
"loss": 5.4062,
"step": 12
},
{
"epoch": 0.05,
"learning_rate": 1e-05,
"loss": 5.8606,
"step": 13
},
{
"epoch": 0.05,
"learning_rate": 1.076923076923077e-05,
"loss": 5.4724,
"step": 14
},
{
"epoch": 0.05,
"learning_rate": 1.1538461538461538e-05,
"loss": 5.4155,
"step": 15
},
{
"epoch": 0.06,
"learning_rate": 1.230769230769231e-05,
"loss": 5.1616,
"step": 16
},
{
"epoch": 0.06,
"learning_rate": 1.3076923076923078e-05,
"loss": 5.0633,
"step": 17
},
{
"epoch": 0.06,
"learning_rate": 1.3846153846153847e-05,
"loss": 5.918,
"step": 18
},
{
"epoch": 0.07,
"learning_rate": 1.4615384615384615e-05,
"loss": 5.4574,
"step": 19
},
{
"epoch": 0.07,
"learning_rate": 1.5384615384615387e-05,
"loss": 4.5698,
"step": 20
},
{
"epoch": 0.07,
"learning_rate": 1.6153846153846154e-05,
"loss": 4.9255,
"step": 21
},
{
"epoch": 0.08,
"learning_rate": 1.6923076923076924e-05,
"loss": 4.7709,
"step": 22
},
{
"epoch": 0.08,
"learning_rate": 1.7692307692307694e-05,
"loss": 4.5418,
"step": 23
},
{
"epoch": 0.08,
"learning_rate": 1.8461538461538465e-05,
"loss": 4.3907,
"step": 24
},
{
"epoch": 0.09,
"learning_rate": 1.923076923076923e-05,
"loss": 3.796,
"step": 25
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 3.9047,
"step": 26
},
{
"epoch": 0.09,
"learning_rate": 1.9999928710990414e-05,
"loss": 3.6515,
"step": 27
},
{
"epoch": 0.1,
"learning_rate": 1.999971484497808e-05,
"loss": 3.4483,
"step": 28
},
{
"epoch": 0.1,
"learning_rate": 1.999935840501225e-05,
"loss": 2.9432,
"step": 29
},
{
"epoch": 0.1,
"learning_rate": 1.9998859396174982e-05,
"loss": 2.694,
"step": 30
},
{
"epoch": 0.11,
"learning_rate": 1.9998217825581043e-05,
"loss": 2.9966,
"step": 31
},
{
"epoch": 0.11,
"learning_rate": 1.999743370237782e-05,
"loss": 2.437,
"step": 32
},
{
"epoch": 0.12,
"learning_rate": 1.9996507037745184e-05,
"loss": 2.3988,
"step": 33
},
{
"epoch": 0.12,
"learning_rate": 1.9995437844895337e-05,
"loss": 2.1954,
"step": 34
},
{
"epoch": 0.12,
"learning_rate": 1.999422613907262e-05,
"loss": 2.0812,
"step": 35
},
{
"epoch": 0.13,
"learning_rate": 1.9992871937553292e-05,
"loss": 1.884,
"step": 36
},
{
"epoch": 0.13,
"learning_rate": 1.9991375259645293e-05,
"loss": 1.8956,
"step": 37
},
{
"epoch": 0.13,
"learning_rate": 1.998973612668796e-05,
"loss": 1.8294,
"step": 38
},
{
"epoch": 0.14,
"learning_rate": 1.9987954562051724e-05,
"loss": 1.5839,
"step": 39
},
{
"epoch": 0.14,
"learning_rate": 1.9986030591137785e-05,
"loss": 1.5102,
"step": 40
},
{
"epoch": 0.14,
"learning_rate": 1.998396424137773e-05,
"loss": 1.3267,
"step": 41
},
{
"epoch": 0.15,
"learning_rate": 1.9981755542233175e-05,
"loss": 1.1175,
"step": 42
},
{
"epoch": 0.15,
"learning_rate": 1.9979404525195313e-05,
"loss": 1.0045,
"step": 43
},
{
"epoch": 0.15,
"learning_rate": 1.9976911223784473e-05,
"loss": 0.846,
"step": 44
},
{
"epoch": 0.16,
"learning_rate": 1.9974275673549654e-05,
"loss": 0.7292,
"step": 45
},
{
"epoch": 0.16,
"learning_rate": 1.9971497912068014e-05,
"loss": 0.613,
"step": 46
},
{
"epoch": 0.16,
"learning_rate": 1.9968577978944323e-05,
"loss": 0.5431,
"step": 47
},
{
"epoch": 0.17,
"learning_rate": 1.9965515915810408e-05,
"loss": 0.4976,
"step": 48
},
{
"epoch": 0.17,
"learning_rate": 1.9962311766324562e-05,
"loss": 0.4278,
"step": 49
},
{
"epoch": 0.17,
"learning_rate": 1.995896557617091e-05,
"loss": 0.4254,
"step": 50
},
{
"epoch": 0.18,
"learning_rate": 1.9955477393058774e-05,
"loss": 0.4062,
"step": 51
},
{
"epoch": 0.18,
"learning_rate": 1.995184726672197e-05,
"loss": 0.3502,
"step": 52
},
{
"epoch": 0.19,
"learning_rate": 1.9948075248918126e-05,
"loss": 0.3671,
"step": 53
},
{
"epoch": 0.19,
"learning_rate": 1.9944161393427923e-05,
"loss": 0.3904,
"step": 54
},
{
"epoch": 0.19,
"learning_rate": 1.9940105756054337e-05,
"loss": 0.3622,
"step": 55
},
{
"epoch": 0.2,
"learning_rate": 1.9935908394621844e-05,
"loss": 0.3523,
"step": 56
},
{
"epoch": 0.2,
"learning_rate": 1.9931569368975588e-05,
"loss": 0.3344,
"step": 57
},
{
"epoch": 0.2,
"learning_rate": 1.992708874098054e-05,
"loss": 0.3586,
"step": 58
},
{
"epoch": 0.21,
"learning_rate": 1.992246657452061e-05,
"loss": 0.3559,
"step": 59
},
{
"epoch": 0.21,
"learning_rate": 1.9917702935497725e-05,
"loss": 0.3494,
"step": 60
},
{
"epoch": 0.21,
"learning_rate": 1.991279789183091e-05,
"loss": 0.3458,
"step": 61
},
{
"epoch": 0.22,
"learning_rate": 1.99077515134553e-05,
"loss": 0.3344,
"step": 62
},
{
"epoch": 0.22,
"learning_rate": 1.9902563872321174e-05,
"loss": 0.3354,
"step": 63
},
{
"epoch": 0.22,
"learning_rate": 1.9897235042392876e-05,
"loss": 0.3404,
"step": 64
},
{
"epoch": 0.23,
"learning_rate": 1.989176509964781e-05,
"loss": 0.3622,
"step": 65
},
{
"epoch": 0.23,
"learning_rate": 1.9886154122075344e-05,
"loss": 0.3428,
"step": 66
},
{
"epoch": 0.23,
"learning_rate": 1.9880402189675677e-05,
"loss": 0.3557,
"step": 67
},
{
"epoch": 0.24,
"learning_rate": 1.9874509384458726e-05,
"loss": 0.3492,
"step": 68
},
{
"epoch": 0.24,
"learning_rate": 1.986847579044294e-05,
"loss": 0.3463,
"step": 69
},
{
"epoch": 0.24,
"learning_rate": 1.986230149365411e-05,
"loss": 0.3409,
"step": 70
},
{
"epoch": 0.25,
"learning_rate": 1.9855986582124128e-05,
"loss": 0.344,
"step": 71
},
{
"epoch": 0.25,
"learning_rate": 1.9849531145889758e-05,
"loss": 0.3085,
"step": 72
},
{
"epoch": 0.26,
"learning_rate": 1.9842935276991332e-05,
"loss": 0.332,
"step": 73
},
{
"epoch": 0.26,
"learning_rate": 1.983619906947144e-05,
"loss": 0.3198,
"step": 74
},
{
"epoch": 0.26,
"learning_rate": 1.982932261937359e-05,
"loss": 0.331,
"step": 75
},
{
"epoch": 0.27,
"learning_rate": 1.9822306024740855e-05,
"loss": 0.3298,
"step": 76
},
{
"epoch": 0.27,
"learning_rate": 1.9815149385614446e-05,
"loss": 0.3223,
"step": 77
},
{
"epoch": 0.27,
"learning_rate": 1.9807852804032306e-05,
"loss": 0.3082,
"step": 78
},
{
"epoch": 0.28,
"learning_rate": 1.980041638402765e-05,
"loss": 0.342,
"step": 79
},
{
"epoch": 0.28,
"learning_rate": 1.9792840231627482e-05,
"loss": 0.3217,
"step": 80
},
{
"epoch": 0.28,
"learning_rate": 1.9785124454851082e-05,
"loss": 0.3222,
"step": 81
},
{
"epoch": 0.29,
"learning_rate": 1.977726916370847e-05,
"loss": 0.3533,
"step": 82
},
{
"epoch": 0.29,
"learning_rate": 1.9769274470198827e-05,
"loss": 0.3269,
"step": 83
},
{
"epoch": 0.29,
"learning_rate": 1.976114048830891e-05,
"loss": 0.3448,
"step": 84
},
{
"epoch": 0.3,
"learning_rate": 1.9752867334011422e-05,
"loss": 0.3524,
"step": 85
},
{
"epoch": 0.3,
"learning_rate": 1.974445512526336e-05,
"loss": 0.338,
"step": 86
},
{
"epoch": 0.3,
"learning_rate": 1.9735903982004324e-05,
"loss": 0.3146,
"step": 87
},
{
"epoch": 0.31,
"learning_rate": 1.9727214026154827e-05,
"loss": 0.3093,
"step": 88
},
{
"epoch": 0.31,
"learning_rate": 1.971838538161454e-05,
"loss": 0.3247,
"step": 89
},
{
"epoch": 0.31,
"learning_rate": 1.9709418174260523e-05,
"loss": 0.3215,
"step": 90
},
{
"epoch": 0.32,
"learning_rate": 1.9700312531945444e-05,
"loss": 0.324,
"step": 91
},
{
"epoch": 0.32,
"learning_rate": 1.9691068584495744e-05,
"loss": 0.3608,
"step": 92
},
{
"epoch": 0.33,
"learning_rate": 1.96816864637098e-05,
"loss": 0.3412,
"step": 93
},
{
"epoch": 0.33,
"learning_rate": 1.967216630335603e-05,
"loss": 0.3461,
"step": 94
},
{
"epoch": 0.33,
"learning_rate": 1.9662508239170993e-05,
"loss": 0.3355,
"step": 95
},
{
"epoch": 0.34,
"learning_rate": 1.9652712408857452e-05,
"loss": 0.3223,
"step": 96
},
{
"epoch": 0.34,
"learning_rate": 1.9642778952082425e-05,
"loss": 0.3196,
"step": 97
},
{
"epoch": 0.34,
"learning_rate": 1.9632708010475166e-05,
"loss": 0.3386,
"step": 98
},
{
"epoch": 0.35,
"learning_rate": 1.9622499727625162e-05,
"loss": 0.3265,
"step": 99
},
{
"epoch": 0.35,
"learning_rate": 1.961215424908009e-05,
"loss": 0.3304,
"step": 100
},
{
"epoch": 0.35,
"learning_rate": 1.9601671722343737e-05,
"loss": 0.33,
"step": 101
},
{
"epoch": 0.36,
"learning_rate": 1.959105229687389e-05,
"loss": 0.3344,
"step": 102
},
{
"epoch": 0.36,
"learning_rate": 1.9580296124080215e-05,
"loss": 0.3304,
"step": 103
},
{
"epoch": 0.36,
"learning_rate": 1.956940335732209e-05,
"loss": 0.3467,
"step": 104
},
{
"epoch": 0.37,
"learning_rate": 1.955837415190643e-05,
"loss": 0.3317,
"step": 105
},
{
"epoch": 0.37,
"learning_rate": 1.954720866508546e-05,
"loss": 0.309,
"step": 106
},
{
"epoch": 0.37,
"learning_rate": 1.9535907056054475e-05,
"loss": 0.328,
"step": 107
},
{
"epoch": 0.38,
"learning_rate": 1.9524469485949586e-05,
"loss": 0.3443,
"step": 108
},
{
"epoch": 0.38,
"learning_rate": 1.9512896117845393e-05,
"loss": 0.3142,
"step": 109
},
{
"epoch": 0.38,
"learning_rate": 1.9501187116752694e-05,
"loss": 0.3182,
"step": 110
},
{
"epoch": 0.39,
"learning_rate": 1.94893426496161e-05,
"loss": 0.3485,
"step": 111
},
{
"epoch": 0.39,
"learning_rate": 1.9477362885311684e-05,
"loss": 0.328,
"step": 112
},
{
"epoch": 0.4,
"learning_rate": 1.946524799464455e-05,
"loss": 0.321,
"step": 113
},
{
"epoch": 0.4,
"learning_rate": 1.9452998150346403e-05,
"loss": 0.3118,
"step": 114
},
{
"epoch": 0.4,
"learning_rate": 1.9440613527073106e-05,
"loss": 0.3362,
"step": 115
},
{
"epoch": 0.41,
"learning_rate": 1.9428094301402164e-05,
"loss": 0.3339,
"step": 116
},
{
"epoch": 0.41,
"learning_rate": 1.941544065183021e-05,
"loss": 0.328,
"step": 117
},
{
"epoch": 0.41,
"learning_rate": 1.9402652758770476e-05,
"loss": 0.3226,
"step": 118
},
{
"epoch": 0.42,
"learning_rate": 1.938973080455021e-05,
"loss": 0.3083,
"step": 119
},
{
"epoch": 0.42,
"learning_rate": 1.9376674973408077e-05,
"loss": 0.3247,
"step": 120
},
{
"epoch": 0.42,
"learning_rate": 1.9363485451491523e-05,
"loss": 0.3234,
"step": 121
},
{
"epoch": 0.43,
"learning_rate": 1.9350162426854152e-05,
"loss": 0.3004,
"step": 122
},
{
"epoch": 0.43,
"learning_rate": 1.9336706089452995e-05,
"loss": 0.3346,
"step": 123
},
{
"epoch": 0.43,
"learning_rate": 1.932311663114586e-05,
"loss": 0.3407,
"step": 124
},
{
"epoch": 0.44,
"learning_rate": 1.930939424568854e-05,
"loss": 0.3201,
"step": 125
},
{
"epoch": 0.44,
"learning_rate": 1.9295539128732096e-05,
"loss": 0.3362,
"step": 126
},
{
"epoch": 0.44,
"learning_rate": 1.9281551477820038e-05,
"loss": 0.3295,
"step": 127
},
{
"epoch": 0.45,
"learning_rate": 1.9267431492385524e-05,
"loss": 0.3507,
"step": 128
},
{
"epoch": 0.45,
"learning_rate": 1.9253179373748504e-05,
"loss": 0.3482,
"step": 129
},
{
"epoch": 0.45,
"learning_rate": 1.9238795325112867e-05,
"loss": 0.3215,
"step": 130
},
{
"epoch": 0.46,
"learning_rate": 1.9224279551563533e-05,
"loss": 0.3517,
"step": 131
},
{
"epoch": 0.46,
"learning_rate": 1.920963226006352e-05,
"loss": 0.3089,
"step": 132
},
{
"epoch": 0.47,
"learning_rate": 1.919485365945101e-05,
"loss": 0.3251,
"step": 133
},
{
"epoch": 0.47,
"learning_rate": 1.917994396043636e-05,
"loss": 0.3404,
"step": 134
},
{
"epoch": 0.47,
"learning_rate": 1.9164903375599113e-05,
"loss": 0.3213,
"step": 135
},
{
"epoch": 0.48,
"learning_rate": 1.9149732119384942e-05,
"loss": 0.3384,
"step": 136
},
{
"epoch": 0.48,
"learning_rate": 1.9134430408102615e-05,
"loss": 0.3019,
"step": 137
},
{
"epoch": 0.48,
"learning_rate": 1.91189984599209e-05,
"loss": 0.3008,
"step": 138
},
{
"epoch": 0.49,
"learning_rate": 1.9103436494865463e-05,
"loss": 0.3201,
"step": 139
},
{
"epoch": 0.49,
"learning_rate": 1.908774473481571e-05,
"loss": 0.3281,
"step": 140
},
{
"epoch": 0.49,
"learning_rate": 1.907192340350165e-05,
"loss": 0.3266,
"step": 141
},
{
"epoch": 0.5,
"learning_rate": 1.9055972726500696e-05,
"loss": 0.3099,
"step": 142
},
{
"epoch": 0.5,
"learning_rate": 1.9039892931234434e-05,
"loss": 0.3302,
"step": 143
},
{
"epoch": 0.5,
"learning_rate": 1.9023684246965407e-05,
"loss": 0.3064,
"step": 144
},
{
"epoch": 0.51,
"learning_rate": 1.9007346904793817e-05,
"loss": 0.3015,
"step": 145
},
{
"epoch": 0.51,
"learning_rate": 1.899088113765426e-05,
"loss": 0.3221,
"step": 146
},
{
"epoch": 0.51,
"learning_rate": 1.897428718031238e-05,
"loss": 0.2858,
"step": 147
},
{
"epoch": 0.52,
"learning_rate": 1.895756526936153e-05,
"loss": 0.3266,
"step": 148
},
{
"epoch": 0.52,
"learning_rate": 1.8940715643219406e-05,
"loss": 0.3145,
"step": 149
},
{
"epoch": 0.52,
"learning_rate": 1.8923738542124644e-05,
"loss": 0.3107,
"step": 150
},
{
"epoch": 0.53,
"learning_rate": 1.8906634208133386e-05,
"loss": 0.3132,
"step": 151
},
{
"epoch": 0.53,
"learning_rate": 1.8889402885115834e-05,
"loss": 0.3041,
"step": 152
},
{
"epoch": 0.53,
"learning_rate": 1.8872044818752782e-05,
"loss": 0.3207,
"step": 153
},
{
"epoch": 0.54,
"learning_rate": 1.8854560256532098e-05,
"loss": 0.3468,
"step": 154
},
{
"epoch": 0.54,
"learning_rate": 1.8836949447745217e-05,
"loss": 0.3361,
"step": 155
},
{
"epoch": 0.55,
"learning_rate": 1.881921264348355e-05,
"loss": 0.3132,
"step": 156
},
{
"epoch": 0.55,
"learning_rate": 1.8801350096634946e-05,
"loss": 0.3392,
"step": 157
},
{
"epoch": 0.55,
"learning_rate": 1.8783362061880063e-05,
"loss": 0.2692,
"step": 158
},
{
"epoch": 0.56,
"learning_rate": 1.8765248795688726e-05,
"loss": 0.2989,
"step": 159
},
{
"epoch": 0.56,
"learning_rate": 1.8747010556316304e-05,
"loss": 0.3209,
"step": 160
},
{
"epoch": 0.56,
"learning_rate": 1.8728647603800004e-05,
"loss": 0.3125,
"step": 161
},
{
"epoch": 0.57,
"learning_rate": 1.8710160199955158e-05,
"loss": 0.2904,
"step": 162
},
{
"epoch": 0.57,
"learning_rate": 1.869154860837151e-05,
"loss": 0.3057,
"step": 163
},
{
"epoch": 0.57,
"learning_rate": 1.8672813094409453e-05,
"loss": 0.3337,
"step": 164
},
{
"epoch": 0.58,
"learning_rate": 1.8653953925196225e-05,
"loss": 0.3283,
"step": 165
},
{
"epoch": 0.58,
"learning_rate": 1.863497136962213e-05,
"loss": 0.3011,
"step": 166
},
{
"epoch": 0.58,
"learning_rate": 1.8615865698336683e-05,
"loss": 0.3222,
"step": 167
},
{
"epoch": 0.59,
"learning_rate": 1.8596637183744762e-05,
"loss": 0.3322,
"step": 168
},
{
"epoch": 0.59,
"learning_rate": 1.8577286100002723e-05,
"loss": 0.3321,
"step": 169
},
{
"epoch": 0.59,
"learning_rate": 1.8557812723014476e-05,
"loss": 0.3174,
"step": 170
},
{
"epoch": 0.6,
"learning_rate": 1.853821733042758e-05,
"loss": 0.2946,
"step": 171
},
{
"epoch": 0.6,
"learning_rate": 1.851850020162926e-05,
"loss": 0.2985,
"step": 172
},
{
"epoch": 0.6,
"learning_rate": 1.8498661617742426e-05,
"loss": 0.3402,
"step": 173
},
{
"epoch": 0.61,
"learning_rate": 1.8478701861621686e-05,
"loss": 0.3113,
"step": 174
},
{
"epoch": 0.61,
"learning_rate": 1.8458621217849285e-05,
"loss": 0.3225,
"step": 175
},
{
"epoch": 0.62,
"learning_rate": 1.8438419972731066e-05,
"loss": 0.3239,
"step": 176
},
{
"epoch": 0.62,
"learning_rate": 1.841809841429238e-05,
"loss": 0.3042,
"step": 177
},
{
"epoch": 0.62,
"learning_rate": 1.8397656832273982e-05,
"loss": 0.3129,
"step": 178
},
{
"epoch": 0.63,
"learning_rate": 1.8377095518127896e-05,
"loss": 0.3148,
"step": 179
},
{
"epoch": 0.63,
"learning_rate": 1.8356414765013267e-05,
"loss": 0.3144,
"step": 180
},
{
"epoch": 0.63,
"learning_rate": 1.8335614867792183e-05,
"loss": 0.3121,
"step": 181
},
{
"epoch": 0.64,
"learning_rate": 1.8314696123025456e-05,
"loss": 0.3249,
"step": 182
},
{
"epoch": 0.64,
"learning_rate": 1.8293658828968397e-05,
"loss": 0.2985,
"step": 183
},
{
"epoch": 0.64,
"learning_rate": 1.8272503285566587e-05,
"loss": 0.3251,
"step": 184
},
{
"epoch": 0.65,
"learning_rate": 1.825122979445157e-05,
"loss": 0.3253,
"step": 185
},
{
"epoch": 0.65,
"learning_rate": 1.8229838658936566e-05,
"loss": 0.3189,
"step": 186
},
{
"epoch": 0.65,
"learning_rate": 1.820833018401215e-05,
"loss": 0.3205,
"step": 187
},
{
"epoch": 0.66,
"learning_rate": 1.81867046763419e-05,
"loss": 0.2983,
"step": 188
},
{
"epoch": 0.66,
"learning_rate": 1.8164962444258016e-05,
"loss": 0.2948,
"step": 189
},
{
"epoch": 0.66,
"learning_rate": 1.8143103797756942e-05,
"loss": 0.2961,
"step": 190
},
{
"epoch": 0.67,
"learning_rate": 1.812112904849492e-05,
"loss": 0.2914,
"step": 191
},
{
"epoch": 0.67,
"learning_rate": 1.8099038509783586e-05,
"loss": 0.3055,
"step": 192
},
{
"epoch": 0.67,
"learning_rate": 1.807683249658545e-05,
"loss": 0.306,
"step": 193
},
{
"epoch": 0.68,
"learning_rate": 1.805451132550946e-05,
"loss": 0.3117,
"step": 194
},
{
"epoch": 0.68,
"learning_rate": 1.803207531480645e-05,
"loss": 0.3164,
"step": 195
},
{
"epoch": 0.69,
"learning_rate": 1.8009524784364615e-05,
"loss": 0.322,
"step": 196
},
{
"epoch": 0.69,
"learning_rate": 1.7986860055704952e-05,
"loss": 0.3073,
"step": 197
},
{
"epoch": 0.69,
"learning_rate": 1.7964081451976673e-05,
"loss": 0.3275,
"step": 198
},
{
"epoch": 0.7,
"learning_rate": 1.7941189297952598e-05,
"loss": 0.3176,
"step": 199
},
{
"epoch": 0.7,
"learning_rate": 1.791818392002452e-05,
"loss": 0.3147,
"step": 200
},
{
"epoch": 0.7,
"learning_rate": 1.7895065646198567e-05,
"loss": 0.2904,
"step": 201
},
{
"epoch": 0.71,
"learning_rate": 1.7871834806090502e-05,
"loss": 0.3099,
"step": 202
},
{
"epoch": 0.71,
"learning_rate": 1.7848491730921046e-05,
"loss": 0.2985,
"step": 203
},
{
"epoch": 0.71,
"learning_rate": 1.7825036753511143e-05,
"loss": 0.32,
"step": 204
},
{
"epoch": 0.72,
"learning_rate": 1.780147020827721e-05,
"loss": 0.3185,
"step": 205
},
{
"epoch": 0.72,
"learning_rate": 1.7777792431226384e-05,
"loss": 0.3074,
"step": 206
},
{
"epoch": 0.72,
"learning_rate": 1.7754003759951714e-05,
"loss": 0.3202,
"step": 207
},
{
"epoch": 0.73,
"learning_rate": 1.773010453362737e-05,
"loss": 0.3039,
"step": 208
},
{
"epoch": 0.73,
"learning_rate": 1.7706095093003787e-05,
"loss": 0.3006,
"step": 209
},
{
"epoch": 0.73,
"learning_rate": 1.7681975780402807e-05,
"loss": 0.3183,
"step": 210
},
{
"epoch": 0.74,
"learning_rate": 1.7657746939712817e-05,
"loss": 0.3519,
"step": 211
},
{
"epoch": 0.74,
"learning_rate": 1.7633408916383826e-05,
"loss": 0.3174,
"step": 212
},
{
"epoch": 0.74,
"learning_rate": 1.760896205742255e-05,
"loss": 0.2977,
"step": 213
},
{
"epoch": 0.75,
"learning_rate": 1.7584406711387462e-05,
"loss": 0.2996,
"step": 214
},
{
"epoch": 0.75,
"learning_rate": 1.755974322838382e-05,
"loss": 0.313,
"step": 215
},
{
"epoch": 0.76,
"learning_rate": 1.7534971960058684e-05,
"loss": 0.304,
"step": 216
},
{
"epoch": 0.76,
"learning_rate": 1.7510093259595887e-05,
"loss": 0.2892,
"step": 217
},
{
"epoch": 0.76,
"learning_rate": 1.7485107481711014e-05,
"loss": 0.3173,
"step": 218
},
{
"epoch": 0.77,
"learning_rate": 1.7460014982646334e-05,
"loss": 0.3111,
"step": 219
},
{
"epoch": 0.77,
"learning_rate": 1.743481612016573e-05,
"loss": 0.3033,
"step": 220
},
{
"epoch": 0.77,
"learning_rate": 1.7409511253549592e-05,
"loss": 0.3089,
"step": 221
},
{
"epoch": 0.78,
"learning_rate": 1.7384100743589698e-05,
"loss": 0.306,
"step": 222
},
{
"epoch": 0.78,
"learning_rate": 1.735858495258406e-05,
"loss": 0.3195,
"step": 223
},
{
"epoch": 0.78,
"learning_rate": 1.733296424433178e-05,
"loss": 0.3039,
"step": 224
},
{
"epoch": 0.79,
"learning_rate": 1.7307238984127832e-05,
"loss": 0.3149,
"step": 225
},
{
"epoch": 0.79,
"learning_rate": 1.7281409538757886e-05,
"loss": 0.329,
"step": 226
},
{
"epoch": 0.79,
"learning_rate": 1.7255476276493057e-05,
"loss": 0.2977,
"step": 227
},
{
"epoch": 0.8,
"learning_rate": 1.722943956708466e-05,
"loss": 0.3077,
"step": 228
},
{
"epoch": 0.8,
"learning_rate": 1.720329978175894e-05,
"loss": 0.3393,
"step": 229
},
{
"epoch": 0.8,
"learning_rate": 1.7177057293211786e-05,
"loss": 0.3094,
"step": 230
},
{
"epoch": 0.81,
"learning_rate": 1.715071247560339e-05,
"loss": 0.303,
"step": 231
},
{
"epoch": 0.81,
"learning_rate": 1.7124265704552948e-05,
"loss": 0.301,
"step": 232
},
{
"epoch": 0.81,
"learning_rate": 1.7097717357133286e-05,
"loss": 0.3235,
"step": 233
},
{
"epoch": 0.82,
"learning_rate": 1.7071067811865477e-05,
"loss": 0.3173,
"step": 234
},
{
"epoch": 0.82,
"learning_rate": 1.704431744871346e-05,
"loss": 0.2904,
"step": 235
},
{
"epoch": 0.83,
"learning_rate": 1.701746664907862e-05,
"loss": 0.305,
"step": 236
},
{
"epoch": 0.83,
"learning_rate": 1.6990515795794332e-05,
"loss": 0.3235,
"step": 237
},
{
"epoch": 0.83,
"learning_rate": 1.696346527312053e-05,
"loss": 0.3007,
"step": 238
},
{
"epoch": 0.84,
"learning_rate": 1.6936315466738204e-05,
"loss": 0.3199,
"step": 239
},
{
"epoch": 0.84,
"learning_rate": 1.6909066763743914e-05,
"loss": 0.3113,
"step": 240
},
{
"epoch": 0.84,
"learning_rate": 1.6881719552644275e-05,
"loss": 0.2918,
"step": 241
},
{
"epoch": 0.85,
"learning_rate": 1.68542742233504e-05,
"loss": 0.2935,
"step": 242
},
{
"epoch": 0.85,
"learning_rate": 1.682673116717236e-05,
"loss": 0.2945,
"step": 243
},
{
"epoch": 0.85,
"learning_rate": 1.6799090776813597e-05,
"loss": 0.3153,
"step": 244
},
{
"epoch": 0.86,
"learning_rate": 1.677135344636532e-05,
"loss": 0.3163,
"step": 245
},
{
"epoch": 0.86,
"learning_rate": 1.674351957130089e-05,
"loss": 0.2929,
"step": 246
},
{
"epoch": 0.86,
"learning_rate": 1.6715589548470187e-05,
"loss": 0.3227,
"step": 247
},
{
"epoch": 0.87,
"learning_rate": 1.6687563776093943e-05,
"loss": 0.2947,
"step": 248
},
{
"epoch": 0.87,
"learning_rate": 1.6659442653758064e-05,
"loss": 0.3437,
"step": 249
},
{
"epoch": 0.87,
"learning_rate": 1.6631226582407954e-05,
"loss": 0.3386,
"step": 250
},
{
"epoch": 0.88,
"learning_rate": 1.660291596434276e-05,
"loss": 0.3186,
"step": 251
},
{
"epoch": 0.88,
"learning_rate": 1.6574511203209667e-05,
"loss": 0.3302,
"step": 252
},
{
"epoch": 0.88,
"learning_rate": 1.654601270399814e-05,
"loss": 0.3249,
"step": 253
},
{
"epoch": 0.89,
"learning_rate": 1.651742087303412e-05,
"loss": 0.3012,
"step": 254
},
{
"epoch": 0.89,
"learning_rate": 1.648873611797429e-05,
"loss": 0.2925,
"step": 255
},
{
"epoch": 0.9,
"learning_rate": 1.645995884780019e-05,
"loss": 0.3111,
"step": 256
},
{
"epoch": 0.9,
"learning_rate": 1.6431089472812445e-05,
"loss": 0.3074,
"step": 257
},
{
"epoch": 0.9,
"learning_rate": 1.640212840462488e-05,
"loss": 0.3157,
"step": 258
},
{
"epoch": 0.91,
"learning_rate": 1.6373076056158676e-05,
"loss": 0.3237,
"step": 259
},
{
"epoch": 0.91,
"learning_rate": 1.6343932841636455e-05,
"loss": 0.2834,
"step": 260
},
{
"epoch": 0.91,
"learning_rate": 1.6314699176576404e-05,
"loss": 0.3072,
"step": 261
},
{
"epoch": 0.92,
"learning_rate": 1.6285375477786322e-05,
"loss": 0.2928,
"step": 262
},
{
"epoch": 0.92,
"learning_rate": 1.62559621633577e-05,
"loss": 0.3005,
"step": 263
},
{
"epoch": 0.92,
"learning_rate": 1.6226459652659752e-05,
"loss": 0.3014,
"step": 264
},
{
"epoch": 0.93,
"learning_rate": 1.619686836633343e-05,
"loss": 0.3053,
"step": 265
},
{
"epoch": 0.93,
"learning_rate": 1.6167188726285433e-05,
"loss": 0.3024,
"step": 266
},
{
"epoch": 0.93,
"learning_rate": 1.6137421155682186e-05,
"loss": 0.3018,
"step": 267
},
{
"epoch": 0.94,
"learning_rate": 1.6107566078943818e-05,
"loss": 0.3139,
"step": 268
},
{
"epoch": 0.94,
"learning_rate": 1.6077623921738102e-05,
"loss": 0.2872,
"step": 269
},
{
"epoch": 0.94,
"learning_rate": 1.6047595110974376e-05,
"loss": 0.2917,
"step": 270
},
{
"epoch": 0.95,
"learning_rate": 1.6017480074797484e-05,
"loss": 0.2947,
"step": 271
},
{
"epoch": 0.95,
"learning_rate": 1.598727924258164e-05,
"loss": 0.2684,
"step": 272
},
{
"epoch": 0.95,
"learning_rate": 1.5956993044924334e-05,
"loss": 0.3185,
"step": 273
},
{
"epoch": 0.96,
"learning_rate": 1.592662191364017e-05,
"loss": 0.3205,
"step": 274
},
{
"epoch": 0.96,
"learning_rate": 1.589616628175472e-05,
"loss": 0.304,
"step": 275
},
{
"epoch": 0.97,
"learning_rate": 1.5865626583498355e-05,
"loss": 0.2973,
"step": 276
},
{
"epoch": 0.97,
"learning_rate": 1.5835003254300038e-05,
"loss": 0.3054,
"step": 277
},
{
"epoch": 0.97,
"learning_rate": 1.5804296730781134e-05,
"loss": 0.3002,
"step": 278
},
{
"epoch": 0.98,
"learning_rate": 1.5773507450749172e-05,
"loss": 0.3246,
"step": 279
},
{
"epoch": 0.98,
"learning_rate": 1.574263585319161e-05,
"loss": 0.2929,
"step": 280
},
{
"epoch": 0.98,
"learning_rate": 1.5711682378269567e-05,
"loss": 0.3271,
"step": 281
},
{
"epoch": 0.99,
"learning_rate": 1.568064746731156e-05,
"loss": 0.3068,
"step": 282
},
{
"epoch": 0.99,
"learning_rate": 1.56495315628072e-05,
"loss": 0.2965,
"step": 283
},
{
"epoch": 0.99,
"learning_rate": 1.5618335108400893e-05,
"loss": 0.3142,
"step": 284
},
{
"epoch": 1.0,
"learning_rate": 1.5587058548885505e-05,
"loss": 0.3324,
"step": 285
},
{
"epoch": 1.0,
"learning_rate": 1.5555702330196024e-05,
"loss": 0.3296,
"step": 286
},
{
"epoch": 1.0,
"eval_loss": 0.301243394613266,
"eval_runtime": 40.9753,
"eval_samples_per_second": 18.231,
"eval_steps_per_second": 0.586,
"step": 286
},
{
"epoch": 1.0,
"learning_rate": 1.5524266899403206e-05,
"loss": 0.256,
"step": 287
},
{
"epoch": 1.01,
"learning_rate": 1.5492752704707198e-05,
"loss": 0.2517,
"step": 288
},
{
"epoch": 1.01,
"learning_rate": 1.546116019543115e-05,
"loss": 0.2426,
"step": 289
},
{
"epoch": 1.01,
"learning_rate": 1.542948982201479e-05,
"loss": 0.236,
"step": 290
},
{
"epoch": 1.02,
"learning_rate": 1.5397742036008033e-05,
"loss": 0.2415,
"step": 291
},
{
"epoch": 1.02,
"learning_rate": 1.536591729006453e-05,
"loss": 0.2506,
"step": 292
},
{
"epoch": 1.02,
"learning_rate": 1.5334016037935197e-05,
"loss": 0.2427,
"step": 293
},
{
"epoch": 1.03,
"learning_rate": 1.530203873446177e-05,
"loss": 0.2154,
"step": 294
},
{
"epoch": 1.03,
"learning_rate": 1.526998583557031e-05,
"loss": 0.2462,
"step": 295
},
{
"epoch": 1.03,
"learning_rate": 1.5237857798264701e-05,
"loss": 0.2327,
"step": 296
},
{
"epoch": 1.04,
"learning_rate": 1.520565508062013e-05,
"loss": 0.2352,
"step": 297
},
{
"epoch": 1.04,
"learning_rate": 1.5173378141776569e-05,
"loss": 0.2478,
"step": 298
},
{
"epoch": 1.05,
"learning_rate": 1.5141027441932217e-05,
"loss": 0.2364,
"step": 299
},
{
"epoch": 1.05,
"learning_rate": 1.5108603442336949e-05,
"loss": 0.2393,
"step": 300
},
{
"epoch": 1.05,
"learning_rate": 1.5076106605285725e-05,
"loss": 0.25,
"step": 301
},
{
"epoch": 1.06,
"learning_rate": 1.5043537394112008e-05,
"loss": 0.2348,
"step": 302
},
{
"epoch": 1.06,
"learning_rate": 1.5010896273181166e-05,
"loss": 0.2279,
"step": 303
},
{
"epoch": 1.06,
"learning_rate": 1.4978183707883828e-05,
"loss": 0.2225,
"step": 304
},
{
"epoch": 1.07,
"learning_rate": 1.4945400164629277e-05,
"loss": 0.2349,
"step": 305
},
{
"epoch": 1.07,
"learning_rate": 1.4912546110838775e-05,
"loss": 0.24,
"step": 306
},
{
"epoch": 1.07,
"learning_rate": 1.4879622014938914e-05,
"loss": 0.2254,
"step": 307
},
{
"epoch": 1.08,
"learning_rate": 1.4846628346354934e-05,
"loss": 0.2321,
"step": 308
},
{
"epoch": 1.08,
"learning_rate": 1.4813565575504023e-05,
"loss": 0.2375,
"step": 309
},
{
"epoch": 1.08,
"learning_rate": 1.4780434173788617e-05,
"loss": 0.2294,
"step": 310
},
{
"epoch": 1.09,
"learning_rate": 1.4747234613589688e-05,
"loss": 0.2424,
"step": 311
},
{
"epoch": 1.09,
"learning_rate": 1.4713967368259981e-05,
"loss": 0.2402,
"step": 312
},
{
"epoch": 1.09,
"learning_rate": 1.4680632912117287e-05,
"loss": 0.2424,
"step": 313
},
{
"epoch": 1.1,
"learning_rate": 1.4647231720437687e-05,
"loss": 0.2394,
"step": 314
},
{
"epoch": 1.1,
"learning_rate": 1.4613764269448752e-05,
"loss": 0.254,
"step": 315
},
{
"epoch": 1.1,
"learning_rate": 1.458023103632277e-05,
"loss": 0.2367,
"step": 316
},
{
"epoch": 1.11,
"learning_rate": 1.4546632499169938e-05,
"loss": 0.2392,
"step": 317
},
{
"epoch": 1.11,
"learning_rate": 1.4512969137031538e-05,
"loss": 0.2214,
"step": 318
},
{
"epoch": 1.12,
"learning_rate": 1.4479241429873121e-05,
"loss": 0.2439,
"step": 319
},
{
"epoch": 1.12,
"learning_rate": 1.444544985857766e-05,
"loss": 0.2445,
"step": 320
},
{
"epoch": 1.12,
"learning_rate": 1.4411594904938682e-05,
"loss": 0.241,
"step": 321
},
{
"epoch": 1.13,
"learning_rate": 1.4377677051653404e-05,
"loss": 0.2244,
"step": 322
},
{
"epoch": 1.13,
"learning_rate": 1.434369678231587e-05,
"loss": 0.245,
"step": 323
},
{
"epoch": 1.13,
"learning_rate": 1.4309654581410024e-05,
"loss": 0.2158,
"step": 324
},
{
"epoch": 1.14,
"learning_rate": 1.4275550934302822e-05,
"loss": 0.2378,
"step": 325
},
{
"epoch": 1.14,
"learning_rate": 1.4241386327237312e-05,
"loss": 0.2303,
"step": 326
},
{
"epoch": 1.14,
"learning_rate": 1.420716124732569e-05,
"loss": 0.2329,
"step": 327
},
{
"epoch": 1.15,
"learning_rate": 1.4172876182542372e-05,
"loss": 0.2388,
"step": 328
},
{
"epoch": 1.15,
"learning_rate": 1.4138531621717018e-05,
"loss": 0.2242,
"step": 329
},
{
"epoch": 1.15,
"learning_rate": 1.410412805452757e-05,
"loss": 0.2396,
"step": 330
},
{
"epoch": 1.16,
"learning_rate": 1.4069665971493276e-05,
"loss": 0.244,
"step": 331
},
{
"epoch": 1.16,
"learning_rate": 1.4035145863967692e-05,
"loss": 0.2335,
"step": 332
},
{
"epoch": 1.16,
"learning_rate": 1.4000568224131672e-05,
"loss": 0.2326,
"step": 333
},
{
"epoch": 1.17,
"learning_rate": 1.3965933544986351e-05,
"loss": 0.2299,
"step": 334
},
{
"epoch": 1.17,
"learning_rate": 1.3931242320346132e-05,
"loss": 0.2377,
"step": 335
},
{
"epoch": 1.17,
"learning_rate": 1.3896495044831622e-05,
"loss": 0.2347,
"step": 336
},
{
"epoch": 1.18,
"learning_rate": 1.3861692213862585e-05,
"loss": 0.23,
"step": 337
},
{
"epoch": 1.18,
"learning_rate": 1.3826834323650899e-05,
"loss": 0.2336,
"step": 338
},
{
"epoch": 1.19,
"learning_rate": 1.3791921871193456e-05,
"loss": 0.2382,
"step": 339
},
{
"epoch": 1.19,
"learning_rate": 1.3756955354265085e-05,
"loss": 0.2595,
"step": 340
},
{
"epoch": 1.19,
"learning_rate": 1.3721935271411464e-05,
"loss": 0.2497,
"step": 341
},
{
"epoch": 1.2,
"learning_rate": 1.368686212194199e-05,
"loss": 0.2245,
"step": 342
},
{
"epoch": 1.2,
"learning_rate": 1.3651736405922686e-05,
"loss": 0.2213,
"step": 343
},
{
"epoch": 1.2,
"learning_rate": 1.361655862416905e-05,
"loss": 0.2231,
"step": 344
},
{
"epoch": 1.21,
"learning_rate": 1.3581329278238928e-05,
"loss": 0.2354,
"step": 345
},
{
"epoch": 1.21,
"learning_rate": 1.3546048870425356e-05,
"loss": 0.2328,
"step": 346
},
{
"epoch": 1.21,
"learning_rate": 1.3510717903749402e-05,
"loss": 0.2189,
"step": 347
},
{
"epoch": 1.22,
"learning_rate": 1.3475336881952988e-05,
"loss": 0.248,
"step": 348
},
{
"epoch": 1.22,
"learning_rate": 1.3439906309491713e-05,
"loss": 0.2427,
"step": 349
},
{
"epoch": 1.22,
"learning_rate": 1.340442669152766e-05,
"loss": 0.2249,
"step": 350
},
{
"epoch": 1.23,
"learning_rate": 1.3368898533922202e-05,
"loss": 0.2513,
"step": 351
},
{
"epoch": 1.23,
"learning_rate": 1.3333322343228763e-05,
"loss": 0.2323,
"step": 352
},
{
"epoch": 1.23,
"learning_rate": 1.3297698626685631e-05,
"loss": 0.231,
"step": 353
},
{
"epoch": 1.24,
"learning_rate": 1.3262027892208696e-05,
"loss": 0.24,
"step": 354
},
{
"epoch": 1.24,
"learning_rate": 1.3226310648384222e-05,
"loss": 0.2334,
"step": 355
},
{
"epoch": 1.24,
"learning_rate": 1.31905474044616e-05,
"loss": 0.246,
"step": 356
},
{
"epoch": 1.25,
"learning_rate": 1.315473867034608e-05,
"loss": 0.2309,
"step": 357
},
{
"epoch": 1.25,
"learning_rate": 1.311888495659149e-05,
"loss": 0.2425,
"step": 358
},
{
"epoch": 1.26,
"learning_rate": 1.3082986774392992e-05,
"loss": 0.2362,
"step": 359
},
{
"epoch": 1.26,
"learning_rate": 1.3047044635579748e-05,
"loss": 0.2358,
"step": 360
},
{
"epoch": 1.26,
"learning_rate": 1.3011059052607657e-05,
"loss": 0.2245,
"step": 361
},
{
"epoch": 1.27,
"learning_rate": 1.297503053855203e-05,
"loss": 0.2274,
"step": 362
},
{
"epoch": 1.27,
"learning_rate": 1.2938959607100288e-05,
"loss": 0.2404,
"step": 363
},
{
"epoch": 1.27,
"learning_rate": 1.2902846772544625e-05,
"loss": 0.2283,
"step": 364
},
{
"epoch": 1.28,
"learning_rate": 1.2866692549774683e-05,
"loss": 0.2378,
"step": 365
},
{
"epoch": 1.28,
"learning_rate": 1.2830497454270206e-05,
"loss": 0.2299,
"step": 366
},
{
"epoch": 1.28,
"learning_rate": 1.2794262002093698e-05,
"loss": 0.2334,
"step": 367
},
{
"epoch": 1.29,
"learning_rate": 1.2757986709883059e-05,
"loss": 0.2416,
"step": 368
},
{
"epoch": 1.29,
"learning_rate": 1.2721672094844221e-05,
"loss": 0.2301,
"step": 369
},
{
"epoch": 1.29,
"learning_rate": 1.2685318674743769e-05,
"loss": 0.2263,
"step": 370
},
{
"epoch": 1.3,
"learning_rate": 1.2648926967901567e-05,
"loss": 0.231,
"step": 371
},
{
"epoch": 1.3,
"learning_rate": 1.2612497493183365e-05,
"loss": 0.2387,
"step": 372
},
{
"epoch": 1.3,
"learning_rate": 1.2576030769993393e-05,
"loss": 0.2397,
"step": 373
},
{
"epoch": 1.31,
"learning_rate": 1.2539527318266971e-05,
"loss": 0.2244,
"step": 374
},
{
"epoch": 1.31,
"learning_rate": 1.2502987658463077e-05,
"loss": 0.2308,
"step": 375
},
{
"epoch": 1.31,
"learning_rate": 1.2466412311556952e-05,
"loss": 0.238,
"step": 376
},
{
"epoch": 1.32,
"learning_rate": 1.242980179903264e-05,
"loss": 0.2455,
"step": 377
},
{
"epoch": 1.32,
"learning_rate": 1.2393156642875579e-05,
"loss": 0.2362,
"step": 378
},
{
"epoch": 1.33,
"learning_rate": 1.2356477365565147e-05,
"loss": 0.2286,
"step": 379
},
{
"epoch": 1.33,
"learning_rate": 1.2319764490067212e-05,
"loss": 0.2353,
"step": 380
},
{
"epoch": 1.33,
"learning_rate": 1.2283018539826686e-05,
"loss": 0.2215,
"step": 381
},
{
"epoch": 1.34,
"learning_rate": 1.2246240038760042e-05,
"loss": 0.2211,
"step": 382
},
{
"epoch": 1.34,
"learning_rate": 1.2209429511247865e-05,
"loss": 0.226,
"step": 383
},
{
"epoch": 1.34,
"learning_rate": 1.217258748212737e-05,
"loss": 0.2359,
"step": 384
},
{
"epoch": 1.35,
"learning_rate": 1.2135714476684902e-05,
"loss": 0.2251,
"step": 385
},
{
"epoch": 1.35,
"learning_rate": 1.2098811020648475e-05,
"loss": 0.2377,
"step": 386
},
{
"epoch": 1.35,
"learning_rate": 1.2061877640180255e-05,
"loss": 0.2235,
"step": 387
},
{
"epoch": 1.36,
"learning_rate": 1.2024914861869064e-05,
"loss": 0.2436,
"step": 388
},
{
"epoch": 1.36,
"learning_rate": 1.1987923212722872e-05,
"loss": 0.2262,
"step": 389
},
{
"epoch": 1.36,
"learning_rate": 1.1950903220161286e-05,
"loss": 0.2168,
"step": 390
},
{
"epoch": 1.37,
"learning_rate": 1.1913855412008025e-05,
"loss": 0.2366,
"step": 391
},
{
"epoch": 1.37,
"learning_rate": 1.1876780316483401e-05,
"loss": 0.2478,
"step": 392
},
{
"epoch": 1.37,
"learning_rate": 1.1839678462196785e-05,
"loss": 0.2349,
"step": 393
},
{
"epoch": 1.38,
"learning_rate": 1.180255037813906e-05,
"loss": 0.2293,
"step": 394
},
{
"epoch": 1.38,
"learning_rate": 1.1765396593675098e-05,
"loss": 0.2381,
"step": 395
},
{
"epoch": 1.38,
"learning_rate": 1.1728217638536196e-05,
"loss": 0.2567,
"step": 396
},
{
"epoch": 1.39,
"learning_rate": 1.1691014042812537e-05,
"loss": 0.2463,
"step": 397
},
{
"epoch": 1.39,
"learning_rate": 1.1653786336945614e-05,
"loss": 0.2293,
"step": 398
},
{
"epoch": 1.4,
"learning_rate": 1.1616535051720686e-05,
"loss": 0.2422,
"step": 399
},
{
"epoch": 1.4,
"learning_rate": 1.1579260718259197e-05,
"loss": 0.2443,
"step": 400
},
{
"epoch": 1.4,
"learning_rate": 1.1541963868011212e-05,
"loss": 0.2423,
"step": 401
},
{
"epoch": 1.41,
"learning_rate": 1.1504645032747832e-05,
"loss": 0.2475,
"step": 402
},
{
"epoch": 1.41,
"learning_rate": 1.1467304744553618e-05,
"loss": 0.2264,
"step": 403
},
{
"epoch": 1.41,
"learning_rate": 1.1429943535819005e-05,
"loss": 0.2289,
"step": 404
},
{
"epoch": 1.42,
"learning_rate": 1.1392561939232707e-05,
"loss": 0.2275,
"step": 405
},
{
"epoch": 1.42,
"learning_rate": 1.1355160487774119e-05,
"loss": 0.2271,
"step": 406
},
{
"epoch": 1.42,
"learning_rate": 1.1317739714705732e-05,
"loss": 0.2301,
"step": 407
},
{
"epoch": 1.43,
"learning_rate": 1.128030015356551e-05,
"loss": 0.2282,
"step": 408
},
{
"epoch": 1.43,
"learning_rate": 1.124284233815931e-05,
"loss": 0.2382,
"step": 409
},
{
"epoch": 1.43,
"learning_rate": 1.1205366802553231e-05,
"loss": 0.245,
"step": 410
},
{
"epoch": 1.44,
"learning_rate": 1.1167874081066046e-05,
"loss": 0.231,
"step": 411
},
{
"epoch": 1.44,
"learning_rate": 1.1130364708261552e-05,
"loss": 0.2351,
"step": 412
},
{
"epoch": 1.44,
"learning_rate": 1.1092839218940949e-05,
"loss": 0.2352,
"step": 413
},
{
"epoch": 1.45,
"learning_rate": 1.1055298148135236e-05,
"loss": 0.2297,
"step": 414
},
{
"epoch": 1.45,
"learning_rate": 1.1017742031097562e-05,
"loss": 0.225,
"step": 415
},
{
"epoch": 1.45,
"learning_rate": 1.098017140329561e-05,
"loss": 0.2324,
"step": 416
},
{
"epoch": 1.46,
"learning_rate": 1.094258680040394e-05,
"loss": 0.2435,
"step": 417
},
{
"epoch": 1.46,
"learning_rate": 1.090498875829638e-05,
"loss": 0.2341,
"step": 418
},
{
"epoch": 1.47,
"learning_rate": 1.0867377813038367e-05,
"loss": 0.2323,
"step": 419
},
{
"epoch": 1.47,
"learning_rate": 1.0829754500879308e-05,
"loss": 0.2324,
"step": 420
},
{
"epoch": 1.47,
"learning_rate": 1.079211935824494e-05,
"loss": 0.2287,
"step": 421
},
{
"epoch": 1.48,
"learning_rate": 1.0754472921729661e-05,
"loss": 0.2339,
"step": 422
},
{
"epoch": 1.48,
"learning_rate": 1.0716815728088911e-05,
"loss": 0.2426,
"step": 423
},
{
"epoch": 1.48,
"learning_rate": 1.0679148314231504e-05,
"loss": 0.2523,
"step": 424
},
{
"epoch": 1.49,
"learning_rate": 1.0641471217211959e-05,
"loss": 0.2467,
"step": 425
},
{
"epoch": 1.49,
"learning_rate": 1.0603784974222862e-05,
"loss": 0.2384,
"step": 426
},
{
"epoch": 1.49,
"learning_rate": 1.05660901225872e-05,
"loss": 0.2311,
"step": 427
},
{
"epoch": 1.5,
"learning_rate": 1.0528387199750706e-05,
"loss": 0.2375,
"step": 428
},
{
"epoch": 1.5,
"learning_rate": 1.0490676743274181e-05,
"loss": 0.2319,
"step": 429
},
{
"epoch": 1.5,
"learning_rate": 1.0452959290825846e-05,
"loss": 0.223,
"step": 430
},
{
"epoch": 1.51,
"learning_rate": 1.0415235380173663e-05,
"loss": 0.2497,
"step": 431
},
{
"epoch": 1.51,
"learning_rate": 1.0377505549177683e-05,
"loss": 0.2572,
"step": 432
},
{
"epoch": 1.51,
"learning_rate": 1.033977033578236e-05,
"loss": 0.2269,
"step": 433
},
{
"epoch": 1.52,
"learning_rate": 1.030203027800889e-05,
"loss": 0.2383,
"step": 434
},
{
"epoch": 1.52,
"learning_rate": 1.0264285913947545e-05,
"loss": 0.2434,
"step": 435
},
{
"epoch": 1.52,
"learning_rate": 1.0226537781749988e-05,
"loss": 0.2184,
"step": 436
},
{
"epoch": 1.53,
"learning_rate": 1.0188786419621613e-05,
"loss": 0.2272,
"step": 437
},
{
"epoch": 1.53,
"learning_rate": 1.015103236581386e-05,
"loss": 0.2362,
"step": 438
},
{
"epoch": 1.53,
"learning_rate": 1.0113276158616555e-05,
"loss": 0.2336,
"step": 439
},
{
"epoch": 1.54,
"learning_rate": 1.0075518336350218e-05,
"loss": 0.2342,
"step": 440
},
{
"epoch": 1.54,
"learning_rate": 1.0037759437358398e-05,
"loss": 0.2301,
"step": 441
},
{
"epoch": 1.55,
"learning_rate": 1e-05,
"loss": 0.2445,
"step": 442
},
{
"epoch": 1.55,
"learning_rate": 9.962240562641602e-06,
"loss": 0.2292,
"step": 443
},
{
"epoch": 1.55,
"learning_rate": 9.924481663649785e-06,
"loss": 0.236,
"step": 444
},
{
"epoch": 1.56,
"learning_rate": 9.886723841383447e-06,
"loss": 0.236,
"step": 445
},
{
"epoch": 1.56,
"learning_rate": 9.848967634186142e-06,
"loss": 0.2292,
"step": 446
},
{
"epoch": 1.56,
"learning_rate": 9.811213580378389e-06,
"loss": 0.2351,
"step": 447
},
{
"epoch": 1.57,
"learning_rate": 9.773462218250014e-06,
"loss": 0.2272,
"step": 448
},
{
"epoch": 1.57,
"learning_rate": 9.735714086052458e-06,
"loss": 0.248,
"step": 449
},
{
"epoch": 1.57,
"learning_rate": 9.697969721991114e-06,
"loss": 0.2421,
"step": 450
},
{
"epoch": 1.58,
"learning_rate": 9.660229664217644e-06,
"loss": 0.2322,
"step": 451
},
{
"epoch": 1.58,
"learning_rate": 9.62249445082232e-06,
"loss": 0.2393,
"step": 452
},
{
"epoch": 1.58,
"learning_rate": 9.584764619826339e-06,
"loss": 0.2368,
"step": 453
},
{
"epoch": 1.59,
"learning_rate": 9.547040709174159e-06,
"loss": 0.2274,
"step": 454
},
{
"epoch": 1.59,
"learning_rate": 9.50932325672582e-06,
"loss": 0.228,
"step": 455
},
{
"epoch": 1.59,
"learning_rate": 9.471612800249295e-06,
"loss": 0.2388,
"step": 456
},
{
"epoch": 1.6,
"learning_rate": 9.433909877412801e-06,
"loss": 0.2501,
"step": 457
},
{
"epoch": 1.6,
"learning_rate": 9.39621502577714e-06,
"loss": 0.2353,
"step": 458
},
{
"epoch": 1.6,
"learning_rate": 9.358528782788045e-06,
"loss": 0.2394,
"step": 459
},
{
"epoch": 1.61,
"learning_rate": 9.320851685768498e-06,
"loss": 0.2354,
"step": 460
},
{
"epoch": 1.61,
"learning_rate": 9.28318427191109e-06,
"loss": 0.251,
"step": 461
},
{
"epoch": 1.62,
"learning_rate": 9.24552707827034e-06,
"loss": 0.2306,
"step": 462
},
{
"epoch": 1.62,
"learning_rate": 9.207880641755065e-06,
"loss": 0.2397,
"step": 463
},
{
"epoch": 1.62,
"learning_rate": 9.170245499120694e-06,
"loss": 0.2174,
"step": 464
},
{
"epoch": 1.63,
"learning_rate": 9.132622186961637e-06,
"loss": 0.2045,
"step": 465
},
{
"epoch": 1.63,
"learning_rate": 9.095011241703623e-06,
"loss": 0.2372,
"step": 466
},
{
"epoch": 1.63,
"learning_rate": 9.057413199596066e-06,
"loss": 0.2527,
"step": 467
},
{
"epoch": 1.64,
"learning_rate": 9.019828596704394e-06,
"loss": 0.2497,
"step": 468
},
{
"epoch": 1.64,
"learning_rate": 8.982257968902438e-06,
"loss": 0.2511,
"step": 469
},
{
"epoch": 1.64,
"learning_rate": 8.944701851864767e-06,
"loss": 0.2455,
"step": 470
},
{
"epoch": 1.65,
"learning_rate": 8.907160781059053e-06,
"loss": 0.2305,
"step": 471
},
{
"epoch": 1.65,
"learning_rate": 8.869635291738452e-06,
"loss": 0.2327,
"step": 472
},
{
"epoch": 1.65,
"learning_rate": 8.832125918933955e-06,
"loss": 0.2469,
"step": 473
},
{
"epoch": 1.66,
"learning_rate": 8.79463319744677e-06,
"loss": 0.2562,
"step": 474
},
{
"epoch": 1.66,
"learning_rate": 8.757157661840693e-06,
"loss": 0.2452,
"step": 475
},
{
"epoch": 1.66,
"learning_rate": 8.719699846434493e-06,
"loss": 0.226,
"step": 476
},
{
"epoch": 1.67,
"learning_rate": 8.682260285294272e-06,
"loss": 0.2434,
"step": 477
},
{
"epoch": 1.67,
"learning_rate": 8.644839512225886e-06,
"loss": 0.2321,
"step": 478
},
{
"epoch": 1.67,
"learning_rate": 8.607438060767296e-06,
"loss": 0.2233,
"step": 479
},
{
"epoch": 1.68,
"learning_rate": 8.570056464180998e-06,
"loss": 0.2311,
"step": 480
},
{
"epoch": 1.68,
"learning_rate": 8.532695255446384e-06,
"loss": 0.2439,
"step": 481
},
{
"epoch": 1.69,
"learning_rate": 8.49535496725217e-06,
"loss": 0.238,
"step": 482
},
{
"epoch": 1.69,
"learning_rate": 8.458036131988792e-06,
"loss": 0.2322,
"step": 483
},
{
"epoch": 1.69,
"learning_rate": 8.420739281740806e-06,
"loss": 0.2276,
"step": 484
},
{
"epoch": 1.7,
"learning_rate": 8.383464948279319e-06,
"loss": 0.2401,
"step": 485
},
{
"epoch": 1.7,
"learning_rate": 8.346213663054388e-06,
"loss": 0.2328,
"step": 486
},
{
"epoch": 1.7,
"learning_rate": 8.308985957187466e-06,
"loss": 0.2292,
"step": 487
},
{
"epoch": 1.71,
"learning_rate": 8.271782361463806e-06,
"loss": 0.2211,
"step": 488
},
{
"epoch": 1.71,
"learning_rate": 8.234603406324909e-06,
"loss": 0.2379,
"step": 489
},
{
"epoch": 1.71,
"learning_rate": 8.197449621860944e-06,
"loss": 0.2438,
"step": 490
},
{
"epoch": 1.72,
"learning_rate": 8.16032153780322e-06,
"loss": 0.2556,
"step": 491
},
{
"epoch": 1.72,
"learning_rate": 8.123219683516604e-06,
"loss": 0.217,
"step": 492
},
{
"epoch": 1.72,
"learning_rate": 8.08614458799198e-06,
"loss": 0.2489,
"step": 493
},
{
"epoch": 1.73,
"learning_rate": 8.04909677983872e-06,
"loss": 0.2447,
"step": 494
},
{
"epoch": 1.73,
"learning_rate": 8.01207678727713e-06,
"loss": 0.2486,
"step": 495
},
{
"epoch": 1.73,
"learning_rate": 7.975085138130938e-06,
"loss": 0.2513,
"step": 496
},
{
"epoch": 1.74,
"learning_rate": 7.938122359819745e-06,
"loss": 0.2451,
"step": 497
},
{
"epoch": 1.74,
"learning_rate": 7.901188979351527e-06,
"loss": 0.2301,
"step": 498
},
{
"epoch": 1.74,
"learning_rate": 7.864285523315097e-06,
"loss": 0.2475,
"step": 499
},
{
"epoch": 1.75,
"learning_rate": 7.827412517872634e-06,
"loss": 0.2394,
"step": 500
},
{
"epoch": 1.75,
"learning_rate": 7.790570488752137e-06,
"loss": 0.2304,
"step": 501
},
{
"epoch": 1.76,
"learning_rate": 7.753759961239965e-06,
"loss": 0.2185,
"step": 502
},
{
"epoch": 1.76,
"learning_rate": 7.716981460173319e-06,
"loss": 0.2383,
"step": 503
},
{
"epoch": 1.76,
"learning_rate": 7.680235509932791e-06,
"loss": 0.2208,
"step": 504
},
{
"epoch": 1.77,
"learning_rate": 7.643522634434856e-06,
"loss": 0.232,
"step": 505
},
{
"epoch": 1.77,
"learning_rate": 7.606843357124426e-06,
"loss": 0.2213,
"step": 506
},
{
"epoch": 1.77,
"learning_rate": 7.570198200967363e-06,
"loss": 0.2409,
"step": 507
},
{
"epoch": 1.78,
"learning_rate": 7.5335876884430495e-06,
"loss": 0.2161,
"step": 508
},
{
"epoch": 1.78,
"learning_rate": 7.497012341536924e-06,
"loss": 0.2244,
"step": 509
},
{
"epoch": 1.78,
"learning_rate": 7.460472681733031e-06,
"loss": 0.2406,
"step": 510
},
{
"epoch": 1.79,
"learning_rate": 7.423969230006609e-06,
"loss": 0.2504,
"step": 511
},
{
"epoch": 1.79,
"learning_rate": 7.387502506816638e-06,
"loss": 0.235,
"step": 512
},
{
"epoch": 1.79,
"learning_rate": 7.351073032098437e-06,
"loss": 0.2376,
"step": 513
},
{
"epoch": 1.8,
"learning_rate": 7.314681325256232e-06,
"loss": 0.2281,
"step": 514
},
{
"epoch": 1.8,
"learning_rate": 7.278327905155783e-06,
"loss": 0.2215,
"step": 515
},
{
"epoch": 1.8,
"learning_rate": 7.242013290116944e-06,
"loss": 0.2446,
"step": 516
},
{
"epoch": 1.81,
"learning_rate": 7.205737997906307e-06,
"loss": 0.2385,
"step": 517
},
{
"epoch": 1.81,
"learning_rate": 7.169502545729798e-06,
"loss": 0.2401,
"step": 518
},
{
"epoch": 1.81,
"learning_rate": 7.133307450225322e-06,
"loss": 0.2354,
"step": 519
},
{
"epoch": 1.82,
"learning_rate": 7.097153227455379e-06,
"loss": 0.2367,
"step": 520
},
{
"epoch": 1.82,
"learning_rate": 7.0610403928997114e-06,
"loss": 0.246,
"step": 521
},
{
"epoch": 1.83,
"learning_rate": 7.024969461447973e-06,
"loss": 0.2227,
"step": 522
},
{
"epoch": 1.83,
"learning_rate": 6.9889409473923445e-06,
"loss": 0.2342,
"step": 523
},
{
"epoch": 1.83,
"learning_rate": 6.952955364420255e-06,
"loss": 0.2398,
"step": 524
},
{
"epoch": 1.84,
"learning_rate": 6.91701322560701e-06,
"loss": 0.22,
"step": 525
},
{
"epoch": 1.84,
"learning_rate": 6.881115043408512e-06,
"loss": 0.2343,
"step": 526
},
{
"epoch": 1.84,
"learning_rate": 6.845261329653923e-06,
"loss": 0.244,
"step": 527
},
{
"epoch": 1.85,
"learning_rate": 6.809452595538403e-06,
"loss": 0.2396,
"step": 528
},
{
"epoch": 1.85,
"learning_rate": 6.7736893516157795e-06,
"loss": 0.2296,
"step": 529
},
{
"epoch": 1.85,
"learning_rate": 6.7379721077913095e-06,
"loss": 0.2295,
"step": 530
},
{
"epoch": 1.86,
"learning_rate": 6.70230137331437e-06,
"loss": 0.245,
"step": 531
},
{
"epoch": 1.86,
"learning_rate": 6.666677656771239e-06,
"loss": 0.2305,
"step": 532
},
{
"epoch": 1.86,
"learning_rate": 6.631101466077801e-06,
"loss": 0.2527,
"step": 533
},
{
"epoch": 1.87,
"learning_rate": 6.595573308472338e-06,
"loss": 0.2522,
"step": 534
},
{
"epoch": 1.87,
"learning_rate": 6.56009369050829e-06,
"loss": 0.2481,
"step": 535
},
{
"epoch": 1.87,
"learning_rate": 6.524663118047015e-06,
"loss": 0.2197,
"step": 536
},
{
"epoch": 1.88,
"learning_rate": 6.489282096250602e-06,
"loss": 0.2209,
"step": 537
},
{
"epoch": 1.88,
"learning_rate": 6.453951129574644e-06,
"loss": 0.2253,
"step": 538
},
{
"epoch": 1.88,
"learning_rate": 6.4186707217610735e-06,
"loss": 0.2205,
"step": 539
},
{
"epoch": 1.89,
"learning_rate": 6.3834413758309525e-06,
"loss": 0.2536,
"step": 540
},
{
"epoch": 1.89,
"learning_rate": 6.348263594077319e-06,
"loss": 0.229,
"step": 541
},
{
"epoch": 1.9,
"learning_rate": 6.3131378780580134e-06,
"loss": 0.2434,
"step": 542
},
{
"epoch": 1.9,
"learning_rate": 6.278064728588542e-06,
"loss": 0.2428,
"step": 543
},
{
"epoch": 1.9,
"learning_rate": 6.243044645734917e-06,
"loss": 0.2441,
"step": 544
},
{
"epoch": 1.91,
"learning_rate": 6.208078128806549e-06,
"loss": 0.2493,
"step": 545
},
{
"epoch": 1.91,
"learning_rate": 6.173165676349103e-06,
"loss": 0.234,
"step": 546
},
{
"epoch": 1.91,
"learning_rate": 6.138307786137415e-06,
"loss": 0.2335,
"step": 547
},
{
"epoch": 1.92,
"learning_rate": 6.103504955168382e-06,
"loss": 0.2273,
"step": 548
},
{
"epoch": 1.92,
"learning_rate": 6.0687576796538685e-06,
"loss": 0.2285,
"step": 549
},
{
"epoch": 1.92,
"learning_rate": 6.0340664550136494e-06,
"loss": 0.23,
"step": 550
},
{
"epoch": 1.93,
"learning_rate": 5.999431775868329e-06,
"loss": 0.2425,
"step": 551
},
{
"epoch": 1.93,
"learning_rate": 5.96485413603231e-06,
"loss": 0.2407,
"step": 552
},
{
"epoch": 1.93,
"learning_rate": 5.930334028506726e-06,
"loss": 0.2289,
"step": 553
},
{
"epoch": 1.94,
"learning_rate": 5.895871945472434e-06,
"loss": 0.2091,
"step": 554
},
{
"epoch": 1.94,
"learning_rate": 5.861468378282984e-06,
"loss": 0.2212,
"step": 555
},
{
"epoch": 1.94,
"learning_rate": 5.827123817457631e-06,
"loss": 0.2393,
"step": 556
},
{
"epoch": 1.95,
"learning_rate": 5.792838752674309e-06,
"loss": 0.2308,
"step": 557
},
{
"epoch": 1.95,
"learning_rate": 5.75861367276269e-06,
"loss": 0.2339,
"step": 558
},
{
"epoch": 1.95,
"learning_rate": 5.724449065697182e-06,
"loss": 0.2286,
"step": 559
},
{
"epoch": 1.96,
"learning_rate": 5.690345418589978e-06,
"loss": 0.2509,
"step": 560
},
{
"epoch": 1.96,
"learning_rate": 5.656303217684133e-06,
"loss": 0.2434,
"step": 561
},
{
"epoch": 1.97,
"learning_rate": 5.622322948346595e-06,
"loss": 0.2346,
"step": 562
},
{
"epoch": 1.97,
"learning_rate": 5.588405095061322e-06,
"loss": 0.245,
"step": 563
},
{
"epoch": 1.97,
"learning_rate": 5.55455014142234e-06,
"loss": 0.2255,
"step": 564
},
{
"epoch": 1.98,
"learning_rate": 5.5207585701268805e-06,
"loss": 0.2267,
"step": 565
},
{
"epoch": 1.98,
"learning_rate": 5.4870308629684675e-06,
"loss": 0.2529,
"step": 566
},
{
"epoch": 1.98,
"learning_rate": 5.453367500830069e-06,
"loss": 0.2457,
"step": 567
},
{
"epoch": 1.99,
"learning_rate": 5.419768963677233e-06,
"loss": 0.2378,
"step": 568
},
{
"epoch": 1.99,
"learning_rate": 5.3862357305512524e-06,
"loss": 0.2198,
"step": 569
},
{
"epoch": 1.99,
"learning_rate": 5.352768279562315e-06,
"loss": 0.2448,
"step": 570
},
{
"epoch": 2.0,
"learning_rate": 5.319367087882717e-06,
"loss": 0.2713,
"step": 571
},
{
"epoch": 2.0,
"learning_rate": 5.286032631740023e-06,
"loss": 0.2335,
"step": 572
},
{
"epoch": 2.0,
"eval_loss": 0.3002837300300598,
"eval_runtime": 165.4835,
"eval_samples_per_second": 4.514,
"eval_steps_per_second": 0.145,
"step": 572
},
{
"epoch": 2.0,
"learning_rate": 5.2527653864103124e-06,
"loss": 0.1816,
"step": 573
},
{
"epoch": 2.01,
"learning_rate": 5.219565826211382e-06,
"loss": 0.1793,
"step": 574
},
{
"epoch": 2.01,
"learning_rate": 5.18643442449598e-06,
"loss": 0.1813,
"step": 575
},
{
"epoch": 2.01,
"learning_rate": 5.15337165364507e-06,
"loss": 0.1827,
"step": 576
},
{
"epoch": 2.02,
"learning_rate": 5.1203779850610865e-06,
"loss": 0.1763,
"step": 577
},
{
"epoch": 2.02,
"learning_rate": 5.087453889161229e-06,
"loss": 0.1937,
"step": 578
},
{
"epoch": 2.02,
"learning_rate": 5.054599835370724e-06,
"loss": 0.1733,
"step": 579
},
{
"epoch": 2.03,
"learning_rate": 5.021816292116175e-06,
"loss": 0.1892,
"step": 580
},
{
"epoch": 2.03,
"learning_rate": 4.989103726818836e-06,
"loss": 0.1766,
"step": 581
},
{
"epoch": 2.03,
"learning_rate": 4.956462605887994e-06,
"loss": 0.1728,
"step": 582
},
{
"epoch": 2.04,
"learning_rate": 4.92389339471428e-06,
"loss": 0.178,
"step": 583
},
{
"epoch": 2.04,
"learning_rate": 4.891396557663056e-06,
"loss": 0.1998,
"step": 584
},
{
"epoch": 2.05,
"learning_rate": 4.858972558067784e-06,
"loss": 0.1779,
"step": 585
},
{
"epoch": 2.05,
"learning_rate": 4.826621858223431e-06,
"loss": 0.1827,
"step": 586
},
{
"epoch": 2.05,
"learning_rate": 4.794344919379872e-06,
"loss": 0.1717,
"step": 587
},
{
"epoch": 2.06,
"learning_rate": 4.762142201735299e-06,
"loss": 0.1767,
"step": 588
},
{
"epoch": 2.06,
"learning_rate": 4.730014164429689e-06,
"loss": 0.1754,
"step": 589
},
{
"epoch": 2.06,
"learning_rate": 4.697961265538231e-06,
"loss": 0.1693,
"step": 590
},
{
"epoch": 2.07,
"learning_rate": 4.665983962064807e-06,
"loss": 0.1814,
"step": 591
},
{
"epoch": 2.07,
"learning_rate": 4.6340827099354734e-06,
"loss": 0.1857,
"step": 592
},
{
"epoch": 2.07,
"learning_rate": 4.60225796399197e-06,
"loss": 0.1727,
"step": 593
},
{
"epoch": 2.08,
"learning_rate": 4.570510177985213e-06,
"loss": 0.1726,
"step": 594
},
{
"epoch": 2.08,
"learning_rate": 4.538839804568857e-06,
"loss": 0.1771,
"step": 595
},
{
"epoch": 2.08,
"learning_rate": 4.5072472952928015e-06,
"loss": 0.1735,
"step": 596
},
{
"epoch": 2.09,
"learning_rate": 4.475733100596795e-06,
"loss": 0.1716,
"step": 597
},
{
"epoch": 2.09,
"learning_rate": 4.444297669803981e-06,
"loss": 0.1667,
"step": 598
},
{
"epoch": 2.09,
"learning_rate": 4.412941451114499e-06,
"loss": 0.1808,
"step": 599
},
{
"epoch": 2.1,
"learning_rate": 4.381664891599111e-06,
"loss": 0.1858,
"step": 600
},
{
"epoch": 2.1,
"learning_rate": 4.350468437192801e-06,
"loss": 0.1798,
"step": 601
},
{
"epoch": 2.1,
"learning_rate": 4.319352532688444e-06,
"loss": 0.172,
"step": 602
},
{
"epoch": 2.11,
"learning_rate": 4.288317621730434e-06,
"loss": 0.1827,
"step": 603
},
{
"epoch": 2.11,
"learning_rate": 4.257364146808394e-06,
"loss": 0.1782,
"step": 604
},
{
"epoch": 2.12,
"learning_rate": 4.226492549250829e-06,
"loss": 0.1728,
"step": 605
},
{
"epoch": 2.12,
"learning_rate": 4.1957032692188685e-06,
"loss": 0.1727,
"step": 606
},
{
"epoch": 2.12,
"learning_rate": 4.164996745699966e-06,
"loss": 0.1807,
"step": 607
},
{
"epoch": 2.13,
"learning_rate": 4.134373416501652e-06,
"loss": 0.1742,
"step": 608
},
{
"epoch": 2.13,
"learning_rate": 4.103833718245282e-06,
"loss": 0.1783,
"step": 609
},
{
"epoch": 2.13,
"learning_rate": 4.073378086359834e-06,
"loss": 0.1821,
"step": 610
},
{
"epoch": 2.14,
"learning_rate": 4.043006955075667e-06,
"loss": 0.171,
"step": 611
},
{
"epoch": 2.14,
"learning_rate": 4.0127207574183576e-06,
"loss": 0.1798,
"step": 612
},
{
"epoch": 2.14,
"learning_rate": 3.9825199252025185e-06,
"loss": 0.1747,
"step": 613
},
{
"epoch": 2.15,
"learning_rate": 3.952404889025626e-06,
"loss": 0.1792,
"step": 614
},
{
"epoch": 2.15,
"learning_rate": 3.922376078261905e-06,
"loss": 0.1712,
"step": 615
},
{
"epoch": 2.15,
"learning_rate": 3.892433921056184e-06,
"loss": 0.1724,
"step": 616
},
{
"epoch": 2.16,
"learning_rate": 3.862578844317817e-06,
"loss": 0.1755,
"step": 617
},
{
"epoch": 2.16,
"learning_rate": 3.832811273714569e-06,
"loss": 0.1834,
"step": 618
},
{
"epoch": 2.16,
"learning_rate": 3.8031316336665725e-06,
"loss": 0.1735,
"step": 619
},
{
"epoch": 2.17,
"learning_rate": 3.7735403473402475e-06,
"loss": 0.1738,
"step": 620
},
{
"epoch": 2.17,
"learning_rate": 3.7440378366423e-06,
"loss": 0.176,
"step": 621
},
{
"epoch": 2.17,
"learning_rate": 3.714624522213681e-06,
"loss": 0.1803,
"step": 622
},
{
"epoch": 2.18,
"learning_rate": 3.6853008234236023e-06,
"loss": 0.1704,
"step": 623
},
{
"epoch": 2.18,
"learning_rate": 3.6560671583635467e-06,
"loss": 0.1804,
"step": 624
},
{
"epoch": 2.19,
"learning_rate": 3.626923943841325e-06,
"loss": 0.1823,
"step": 625
},
{
"epoch": 2.19,
"learning_rate": 3.5978715953751207e-06,
"loss": 0.184,
"step": 626
},
{
"epoch": 2.19,
"learning_rate": 3.568910527187557e-06,
"loss": 0.1731,
"step": 627
},
{
"epoch": 2.2,
"learning_rate": 3.5400411521998123e-06,
"loss": 0.1812,
"step": 628
},
{
"epoch": 2.2,
"learning_rate": 3.5112638820257115e-06,
"loss": 0.1786,
"step": 629
},
{
"epoch": 2.2,
"learning_rate": 3.4825791269658782e-06,
"loss": 0.1806,
"step": 630
},
{
"epoch": 2.21,
"learning_rate": 3.453987296001866e-06,
"loss": 0.1803,
"step": 631
},
{
"epoch": 2.21,
"learning_rate": 3.4254887967903373e-06,
"loss": 0.1745,
"step": 632
},
{
"epoch": 2.21,
"learning_rate": 3.397084035657243e-06,
"loss": 0.1852,
"step": 633
},
{
"epoch": 2.22,
"learning_rate": 3.3687734175920505e-06,
"loss": 0.1852,
"step": 634
},
{
"epoch": 2.22,
"learning_rate": 3.3405573462419362e-06,
"loss": 0.1529,
"step": 635
},
{
"epoch": 2.22,
"learning_rate": 3.3124362239060627e-06,
"loss": 0.1798,
"step": 636
},
{
"epoch": 2.23,
"learning_rate": 3.284410451529816e-06,
"loss": 0.1723,
"step": 637
},
{
"epoch": 2.23,
"learning_rate": 3.2564804286991137e-06,
"loss": 0.1769,
"step": 638
},
{
"epoch": 2.23,
"learning_rate": 3.2286465536346857e-06,
"loss": 0.1831,
"step": 639
},
{
"epoch": 2.24,
"learning_rate": 3.2009092231864047e-06,
"loss": 0.1845,
"step": 640
},
{
"epoch": 2.24,
"learning_rate": 3.173268832827643e-06,
"loss": 0.1609,
"step": 641
},
{
"epoch": 2.24,
"learning_rate": 3.145725776649602e-06,
"loss": 0.1716,
"step": 642
},
{
"epoch": 2.25,
"learning_rate": 3.1182804473557295e-06,
"loss": 0.1853,
"step": 643
},
{
"epoch": 2.25,
"learning_rate": 3.0909332362560875e-06,
"loss": 0.172,
"step": 644
},
{
"epoch": 2.26,
"learning_rate": 3.0636845332617994e-06,
"loss": 0.1818,
"step": 645
},
{
"epoch": 2.26,
"learning_rate": 3.036534726879473e-06,
"loss": 0.1801,
"step": 646
},
{
"epoch": 2.26,
"learning_rate": 3.0094842042056706e-06,
"loss": 0.1942,
"step": 647
},
{
"epoch": 2.27,
"learning_rate": 2.982533350921383e-06,
"loss": 0.177,
"step": 648
},
{
"epoch": 2.27,
"learning_rate": 2.9556825512865418e-06,
"loss": 0.1784,
"step": 649
},
{
"epoch": 2.27,
"learning_rate": 2.9289321881345257e-06,
"loss": 0.179,
"step": 650
},
{
"epoch": 2.28,
"learning_rate": 2.902282642866716e-06,
"loss": 0.1772,
"step": 651
},
{
"epoch": 2.28,
"learning_rate": 2.8757342954470537e-06,
"loss": 0.1745,
"step": 652
},
{
"epoch": 2.28,
"learning_rate": 2.8492875243966114e-06,
"loss": 0.1796,
"step": 653
},
{
"epoch": 2.29,
"learning_rate": 2.8229427067882165e-06,
"loss": 0.1744,
"step": 654
},
{
"epoch": 2.29,
"learning_rate": 2.7967002182410596e-06,
"loss": 0.177,
"step": 655
},
{
"epoch": 2.29,
"learning_rate": 2.7705604329153434e-06,
"loss": 0.177,
"step": 656
},
{
"epoch": 2.3,
"learning_rate": 2.7445237235069453e-06,
"loss": 0.1765,
"step": 657
},
{
"epoch": 2.3,
"learning_rate": 2.7185904612421177e-06,
"loss": 0.1784,
"step": 658
},
{
"epoch": 2.3,
"learning_rate": 2.6927610158721708e-06,
"loss": 0.1796,
"step": 659
},
{
"epoch": 2.31,
"learning_rate": 2.6670357556682245e-06,
"loss": 0.1842,
"step": 660
},
{
"epoch": 2.31,
"learning_rate": 2.6414150474159405e-06,
"loss": 0.1843,
"step": 661
},
{
"epoch": 2.31,
"learning_rate": 2.615899256410306e-06,
"loss": 0.1816,
"step": 662
},
{
"epoch": 2.32,
"learning_rate": 2.5904887464504115e-06,
"loss": 0.1872,
"step": 663
},
{
"epoch": 2.32,
"learning_rate": 2.565183879834272e-06,
"loss": 0.1793,
"step": 664
},
{
"epoch": 2.33,
"learning_rate": 2.53998501735367e-06,
"loss": 0.1846,
"step": 665
},
{
"epoch": 2.33,
"learning_rate": 2.514892518288988e-06,
"loss": 0.1696,
"step": 666
},
{
"epoch": 2.33,
"learning_rate": 2.4899067404041156e-06,
"loss": 0.1821,
"step": 667
},
{
"epoch": 2.34,
"learning_rate": 2.465028039941316e-06,
"loss": 0.1667,
"step": 668
},
{
"epoch": 2.34,
"learning_rate": 2.4402567716161806e-06,
"loss": 0.186,
"step": 669
},
{
"epoch": 2.34,
"learning_rate": 2.415593288612541e-06,
"loss": 0.1727,
"step": 670
},
{
"epoch": 2.35,
"learning_rate": 2.3910379425774544e-06,
"loss": 0.1783,
"step": 671
},
{
"epoch": 2.35,
"learning_rate": 2.366591083616178e-06,
"loss": 0.1595,
"step": 672
},
{
"epoch": 2.35,
"learning_rate": 2.3422530602871874e-06,
"loss": 0.1577,
"step": 673
},
{
"epoch": 2.36,
"learning_rate": 2.318024219597196e-06,
"loss": 0.1816,
"step": 674
},
{
"epoch": 2.36,
"learning_rate": 2.2939049069962183e-06,
"loss": 0.182,
"step": 675
},
{
"epoch": 2.36,
"learning_rate": 2.26989546637263e-06,
"loss": 0.1792,
"step": 676
},
{
"epoch": 2.37,
"learning_rate": 2.2459962400482848e-06,
"loss": 0.1826,
"step": 677
},
{
"epoch": 2.37,
"learning_rate": 2.222207568773619e-06,
"loss": 0.1798,
"step": 678
},
{
"epoch": 2.37,
"learning_rate": 2.1985297917227922e-06,
"loss": 0.1721,
"step": 679
},
{
"epoch": 2.38,
"learning_rate": 2.1749632464888594e-06,
"loss": 0.1786,
"step": 680
},
{
"epoch": 2.38,
"learning_rate": 2.1515082690789535e-06,
"loss": 0.1832,
"step": 681
},
{
"epoch": 2.38,
"learning_rate": 2.1281651939094996e-06,
"loss": 0.1741,
"step": 682
},
{
"epoch": 2.39,
"learning_rate": 2.1049343538014354e-06,
"loss": 0.1692,
"step": 683
},
{
"epoch": 2.39,
"learning_rate": 2.0818160799754826e-06,
"loss": 0.181,
"step": 684
},
{
"epoch": 2.4,
"learning_rate": 2.0588107020474056e-06,
"loss": 0.1675,
"step": 685
},
{
"epoch": 2.4,
"learning_rate": 2.03591854802333e-06,
"loss": 0.1842,
"step": 686
},
{
"epoch": 2.4,
"learning_rate": 2.0131399442950507e-06,
"loss": 0.1693,
"step": 687
},
{
"epoch": 2.41,
"learning_rate": 1.990475215635388e-06,
"loss": 0.1825,
"step": 688
},
{
"epoch": 2.41,
"learning_rate": 1.967924685193552e-06,
"loss": 0.1864,
"step": 689
},
{
"epoch": 2.41,
"learning_rate": 1.94548867449054e-06,
"loss": 0.1732,
"step": 690
},
{
"epoch": 2.42,
"learning_rate": 1.9231675034145515e-06,
"loss": 0.1752,
"step": 691
},
{
"epoch": 2.42,
"learning_rate": 1.9009614902164175e-06,
"loss": 0.1724,
"step": 692
},
{
"epoch": 2.42,
"learning_rate": 1.8788709515050808e-06,
"loss": 0.1729,
"step": 693
},
{
"epoch": 2.43,
"learning_rate": 1.8568962022430637e-06,
"loss": 0.177,
"step": 694
},
{
"epoch": 2.43,
"learning_rate": 1.8350375557419875e-06,
"loss": 0.186,
"step": 695
},
{
"epoch": 2.43,
"learning_rate": 1.813295323658103e-06,
"loss": 0.1835,
"step": 696
},
{
"epoch": 2.44,
"learning_rate": 1.791669815987852e-06,
"loss": 0.1777,
"step": 697
},
{
"epoch": 2.44,
"learning_rate": 1.7701613410634367e-06,
"loss": 0.1762,
"step": 698
},
{
"epoch": 2.44,
"learning_rate": 1.7487702055484345e-06,
"loss": 0.1754,
"step": 699
},
{
"epoch": 2.45,
"learning_rate": 1.7274967144334155e-06,
"loss": 0.1846,
"step": 700
},
{
"epoch": 2.45,
"learning_rate": 1.7063411710316047e-06,
"loss": 0.1677,
"step": 701
},
{
"epoch": 2.45,
"learning_rate": 1.6853038769745466e-06,
"loss": 0.1734,
"step": 702
},
{
"epoch": 2.46,
"learning_rate": 1.6643851322078176e-06,
"loss": 0.1817,
"step": 703
},
{
"epoch": 2.46,
"learning_rate": 1.643585234986733e-06,
"loss": 0.1695,
"step": 704
},
{
"epoch": 2.47,
"learning_rate": 1.6229044818721062e-06,
"loss": 0.1866,
"step": 705
},
{
"epoch": 2.47,
"learning_rate": 1.6023431677260215e-06,
"loss": 0.1666,
"step": 706
},
{
"epoch": 2.47,
"learning_rate": 1.5819015857076215e-06,
"loss": 0.1903,
"step": 707
},
{
"epoch": 2.48,
"learning_rate": 1.5615800272689352e-06,
"loss": 0.1652,
"step": 708
},
{
"epoch": 2.48,
"learning_rate": 1.5413787821507141e-06,
"loss": 0.1727,
"step": 709
},
{
"epoch": 2.48,
"learning_rate": 1.5212981383783154e-06,
"loss": 0.1794,
"step": 710
},
{
"epoch": 2.49,
"learning_rate": 1.5013383822575766e-06,
"loss": 0.18,
"step": 711
},
{
"epoch": 2.49,
"learning_rate": 1.4814997983707457e-06,
"loss": 0.1676,
"step": 712
},
{
"epoch": 2.49,
"learning_rate": 1.4617826695724224e-06,
"loss": 0.17,
"step": 713
},
{
"epoch": 2.5,
"learning_rate": 1.4421872769855262e-06,
"loss": 0.1875,
"step": 714
},
{
"epoch": 2.5,
"learning_rate": 1.4227138999972801e-06,
"loss": 0.1725,
"step": 715
},
{
"epoch": 2.5,
"learning_rate": 1.403362816255236e-06,
"loss": 0.183,
"step": 716
},
{
"epoch": 2.51,
"learning_rate": 1.3841343016633168e-06,
"loss": 0.1806,
"step": 717
},
{
"epoch": 2.51,
"learning_rate": 1.3650286303778715e-06,
"loss": 0.1901,
"step": 718
},
{
"epoch": 2.51,
"learning_rate": 1.3460460748037773e-06,
"loss": 0.1834,
"step": 719
},
{
"epoch": 2.52,
"learning_rate": 1.3271869055905495e-06,
"loss": 0.1769,
"step": 720
},
{
"epoch": 2.52,
"learning_rate": 1.3084513916284913e-06,
"loss": 0.1691,
"step": 721
},
{
"epoch": 2.52,
"learning_rate": 1.2898398000448441e-06,
"loss": 0.1835,
"step": 722
},
{
"epoch": 2.53,
"learning_rate": 1.2713523961999996e-06,
"loss": 0.1878,
"step": 723
},
{
"epoch": 2.53,
"learning_rate": 1.2529894436836965e-06,
"loss": 0.178,
"step": 724
},
{
"epoch": 2.53,
"learning_rate": 1.2347512043112753e-06,
"loss": 0.1772,
"step": 725
},
{
"epoch": 2.54,
"learning_rate": 1.2166379381199423e-06,
"loss": 0.1823,
"step": 726
},
{
"epoch": 2.54,
"learning_rate": 1.1986499033650557e-06,
"loss": 0.176,
"step": 727
},
{
"epoch": 2.55,
"learning_rate": 1.1807873565164507e-06,
"loss": 0.1685,
"step": 728
},
{
"epoch": 2.55,
"learning_rate": 1.1630505522547853e-06,
"loss": 0.1721,
"step": 729
},
{
"epoch": 2.55,
"learning_rate": 1.1454397434679022e-06,
"loss": 0.1601,
"step": 730
},
{
"epoch": 2.56,
"learning_rate": 1.12795518124722e-06,
"loss": 0.1755,
"step": 731
},
{
"epoch": 2.56,
"learning_rate": 1.11059711488417e-06,
"loss": 0.1735,
"step": 732
},
{
"epoch": 2.56,
"learning_rate": 1.0933657918666175e-06,
"loss": 0.1627,
"step": 733
},
{
"epoch": 2.57,
"learning_rate": 1.0762614578753571e-06,
"loss": 0.1694,
"step": 734
},
{
"epoch": 2.57,
"learning_rate": 1.0592843567805944e-06,
"loss": 0.1933,
"step": 735
},
{
"epoch": 2.57,
"learning_rate": 1.042434730638473e-06,
"loss": 0.1702,
"step": 736
},
{
"epoch": 2.58,
"learning_rate": 1.0257128196876233e-06,
"loss": 0.1776,
"step": 737
},
{
"epoch": 2.58,
"learning_rate": 1.0091188623457415e-06,
"loss": 0.1745,
"step": 738
},
{
"epoch": 2.58,
"learning_rate": 9.926530952061831e-07,
"loss": 0.1746,
"step": 739
},
{
"epoch": 2.59,
"learning_rate": 9.763157530345957e-07,
"loss": 0.1832,
"step": 740
},
{
"epoch": 2.59,
"learning_rate": 9.601070687655667e-07,
"loss": 0.1721,
"step": 741
},
{
"epoch": 2.59,
"learning_rate": 9.440272734993072e-07,
"loss": 0.1767,
"step": 742
},
{
"epoch": 2.6,
"learning_rate": 9.280765964983529e-07,
"loss": 0.1743,
"step": 743
},
{
"epoch": 2.6,
"learning_rate": 9.122552651842931e-07,
"loss": 0.1906,
"step": 744
},
{
"epoch": 2.6,
"learning_rate": 8.965635051345411e-07,
"loss": 0.1703,
"step": 745
},
{
"epoch": 2.61,
"learning_rate": 8.810015400790994e-07,
"loss": 0.1847,
"step": 746
},
{
"epoch": 2.61,
"learning_rate": 8.655695918973861e-07,
"loss": 0.178,
"step": 747
},
{
"epoch": 2.62,
"learning_rate": 8.502678806150588e-07,
"loss": 0.1808,
"step": 748
},
{
"epoch": 2.62,
"learning_rate": 8.350966244008896e-07,
"loss": 0.1768,
"step": 749
},
{
"epoch": 2.62,
"learning_rate": 8.200560395636414e-07,
"loss": 0.1871,
"step": 750
},
{
"epoch": 2.63,
"learning_rate": 8.051463405489956e-07,
"loss": 0.176,
"step": 751
},
{
"epoch": 2.63,
"learning_rate": 7.903677399364839e-07,
"loss": 0.1715,
"step": 752
},
{
"epoch": 2.63,
"learning_rate": 7.757204484364699e-07,
"loss": 0.1839,
"step": 753
},
{
"epoch": 2.64,
"learning_rate": 7.612046748871327e-07,
"loss": 0.1799,
"step": 754
},
{
"epoch": 2.64,
"learning_rate": 7.468206262514965e-07,
"loss": 0.1664,
"step": 755
},
{
"epoch": 2.64,
"learning_rate": 7.325685076144795e-07,
"loss": 0.1742,
"step": 756
},
{
"epoch": 2.65,
"learning_rate": 7.184485221799631e-07,
"loss": 0.1811,
"step": 757
},
{
"epoch": 2.65,
"learning_rate": 7.044608712679058e-07,
"loss": 0.1805,
"step": 758
},
{
"epoch": 2.65,
"learning_rate": 6.90605754311462e-07,
"loss": 0.1733,
"step": 759
},
{
"epoch": 2.66,
"learning_rate": 6.768833688541443e-07,
"loss": 0.1746,
"step": 760
},
{
"epoch": 2.66,
"learning_rate": 6.632939105470049e-07,
"loss": 0.1745,
"step": 761
},
{
"epoch": 2.66,
"learning_rate": 6.498375731458529e-07,
"loss": 0.176,
"step": 762
},
{
"epoch": 2.67,
"learning_rate": 6.365145485084767e-07,
"loss": 0.1773,
"step": 763
},
{
"epoch": 2.67,
"learning_rate": 6.233250265919266e-07,
"loss": 0.1801,
"step": 764
},
{
"epoch": 2.67,
"learning_rate": 6.102691954497908e-07,
"loss": 0.177,
"step": 765
},
{
"epoch": 2.68,
"learning_rate": 5.973472412295256e-07,
"loss": 0.1753,
"step": 766
},
{
"epoch": 2.68,
"learning_rate": 5.845593481697931e-07,
"loss": 0.1721,
"step": 767
},
{
"epoch": 2.69,
"learning_rate": 5.719056985978388e-07,
"loss": 0.1906,
"step": 768
},
{
"epoch": 2.69,
"learning_rate": 5.59386472926895e-07,
"loss": 0.192,
"step": 769
},
{
"epoch": 2.69,
"learning_rate": 5.470018496535967e-07,
"loss": 0.1909,
"step": 770
},
{
"epoch": 2.7,
"learning_rate": 5.347520053554544e-07,
"loss": 0.1836,
"step": 771
},
{
"epoch": 2.7,
"learning_rate": 5.22637114688318e-07,
"loss": 0.1815,
"step": 772
},
{
"epoch": 2.7,
"learning_rate": 5.106573503839018e-07,
"loss": 0.1717,
"step": 773
},
{
"epoch": 2.71,
"learning_rate": 4.988128832473105e-07,
"loss": 0.1843,
"step": 774
},
{
"epoch": 2.71,
"learning_rate": 4.871038821546104e-07,
"loss": 0.1721,
"step": 775
},
{
"epoch": 2.71,
"learning_rate": 4.755305140504185e-07,
"loss": 0.1919,
"step": 776
},
{
"epoch": 2.72,
"learning_rate": 4.6409294394552774e-07,
"loss": 0.1768,
"step": 777
},
{
"epoch": 2.72,
"learning_rate": 4.5279133491454406e-07,
"loss": 0.1926,
"step": 778
},
{
"epoch": 2.72,
"learning_rate": 4.416258480935731e-07,
"loss": 0.1749,
"step": 779
},
{
"epoch": 2.73,
"learning_rate": 4.305966426779118e-07,
"loss": 0.176,
"step": 780
},
{
"epoch": 2.73,
"learning_rate": 4.197038759197869e-07,
"loss": 0.1823,
"step": 781
},
{
"epoch": 2.73,
"learning_rate": 4.089477031261113e-07,
"loss": 0.1808,
"step": 782
},
{
"epoch": 2.74,
"learning_rate": 3.983282776562647e-07,
"loss": 0.1722,
"step": 783
},
{
"epoch": 2.74,
"learning_rate": 3.878457509199107e-07,
"loss": 0.174,
"step": 784
},
{
"epoch": 2.74,
"learning_rate": 3.7750027237484e-07,
"loss": 0.1742,
"step": 785
},
{
"epoch": 2.75,
"learning_rate": 3.6729198952483725e-07,
"loss": 0.178,
"step": 786
},
{
"epoch": 2.75,
"learning_rate": 3.572210479175753e-07,
"loss": 0.1695,
"step": 787
},
{
"epoch": 2.76,
"learning_rate": 3.4728759114254774e-07,
"loss": 0.1676,
"step": 788
},
{
"epoch": 2.76,
"learning_rate": 3.374917608290107e-07,
"loss": 0.1848,
"step": 789
},
{
"epoch": 2.76,
"learning_rate": 3.278336966439744e-07,
"loss": 0.187,
"step": 790
},
{
"epoch": 2.77,
"learning_rate": 3.1831353629020345e-07,
"loss": 0.1757,
"step": 791
},
{
"epoch": 2.77,
"learning_rate": 3.089314155042589e-07,
"loss": 0.1715,
"step": 792
},
{
"epoch": 2.77,
"learning_rate": 2.996874680545603e-07,
"loss": 0.1661,
"step": 793
},
{
"epoch": 2.78,
"learning_rate": 2.905818257394799e-07,
"loss": 0.184,
"step": 794
},
{
"epoch": 2.78,
"learning_rate": 2.816146183854618e-07,
"loss": 0.1794,
"step": 795
},
{
"epoch": 2.78,
"learning_rate": 2.727859738451721e-07,
"loss": 0.1744,
"step": 796
},
{
"epoch": 2.79,
"learning_rate": 2.640960179956764e-07,
"loss": 0.1644,
"step": 797
},
{
"epoch": 2.79,
"learning_rate": 2.5554487473664404e-07,
"loss": 0.1781,
"step": 798
},
{
"epoch": 2.79,
"learning_rate": 2.471326659885809e-07,
"loss": 0.1831,
"step": 799
},
{
"epoch": 2.8,
"learning_rate": 2.388595116910919e-07,
"loss": 0.1818,
"step": 800
},
{
"epoch": 2.8,
"learning_rate": 2.3072552980117568e-07,
"loss": 0.191,
"step": 801
},
{
"epoch": 2.8,
"learning_rate": 2.2273083629153148e-07,
"loss": 0.1834,
"step": 802
},
{
"epoch": 2.81,
"learning_rate": 2.1487554514891706e-07,
"loss": 0.1709,
"step": 803
},
{
"epoch": 2.81,
"learning_rate": 2.0715976837251793e-07,
"loss": 0.1698,
"step": 804
},
{
"epoch": 2.81,
"learning_rate": 1.9958361597235076e-07,
"loss": 0.1833,
"step": 805
},
{
"epoch": 2.82,
"learning_rate": 1.921471959676957e-07,
"loss": 0.1738,
"step": 806
},
{
"epoch": 2.82,
"learning_rate": 1.8485061438555552e-07,
"loss": 0.168,
"step": 807
},
{
"epoch": 2.83,
"learning_rate": 1.7769397525914668e-07,
"loss": 0.1849,
"step": 808
},
{
"epoch": 2.83,
"learning_rate": 1.706773806264106e-07,
"loss": 0.1741,
"step": 809
},
{
"epoch": 2.83,
"learning_rate": 1.6380093052856482e-07,
"loss": 0.1777,
"step": 810
},
{
"epoch": 2.84,
"learning_rate": 1.5706472300867082e-07,
"loss": 0.2177,
"step": 811
},
{
"epoch": 2.84,
"learning_rate": 1.5046885411024393e-07,
"loss": 0.175,
"step": 812
},
{
"epoch": 2.84,
"learning_rate": 1.4401341787587454e-07,
"loss": 0.1731,
"step": 813
},
{
"epoch": 2.85,
"learning_rate": 1.3769850634589356e-07,
"loss": 0.1778,
"step": 814
},
{
"epoch": 2.85,
"learning_rate": 1.3152420955706014e-07,
"loss": 0.1704,
"step": 815
},
{
"epoch": 2.85,
"learning_rate": 1.2549061554127494e-07,
"loss": 0.1729,
"step": 816
},
{
"epoch": 2.86,
"learning_rate": 1.195978103243234e-07,
"loss": 0.1831,
"step": 817
},
{
"epoch": 2.86,
"learning_rate": 1.1384587792465873e-07,
"loss": 0.166,
"step": 818
},
{
"epoch": 2.86,
"learning_rate": 1.0823490035218986e-07,
"loss": 0.1784,
"step": 819
},
{
"epoch": 2.87,
"learning_rate": 1.0276495760712768e-07,
"loss": 0.1825,
"step": 820
},
{
"epoch": 2.87,
"learning_rate": 9.743612767882937e-08,
"loss": 0.1838,
"step": 821
},
{
"epoch": 2.87,
"learning_rate": 9.224848654469932e-08,
"loss": 0.1793,
"step": 822
},
{
"epoch": 2.88,
"learning_rate": 8.720210816909436e-08,
"loss": 0.1735,
"step": 823
},
{
"epoch": 2.88,
"learning_rate": 8.229706450227804e-08,
"loss": 0.1773,
"step": 824
},
{
"epoch": 2.88,
"learning_rate": 7.753342547939357e-08,
"loss": 0.1783,
"step": 825
},
{
"epoch": 2.89,
"learning_rate": 7.291125901946027e-08,
"loss": 0.1779,
"step": 826
},
{
"epoch": 2.89,
"learning_rate": 6.843063102441317e-08,
"loss": 0.1824,
"step": 827
},
{
"epoch": 2.9,
"learning_rate": 6.409160537815818e-08,
"loss": 0.1727,
"step": 828
},
{
"epoch": 2.9,
"learning_rate": 5.9894243945664e-08,
"loss": 0.1756,
"step": 829
},
{
"epoch": 2.9,
"learning_rate": 5.5838606572078404e-08,
"loss": 0.1828,
"step": 830
},
{
"epoch": 2.91,
"learning_rate": 5.192475108187545e-08,
"loss": 0.1693,
"step": 831
},
{
"epoch": 2.91,
"learning_rate": 4.815273327803183e-08,
"loss": 0.1911,
"step": 832
},
{
"epoch": 2.91,
"learning_rate": 4.4522606941228564e-08,
"loss": 0.1885,
"step": 833
},
{
"epoch": 2.92,
"learning_rate": 4.103442382909051e-08,
"loss": 0.1746,
"step": 834
},
{
"epoch": 2.92,
"learning_rate": 3.7688233675439164e-08,
"loss": 0.1931,
"step": 835
},
{
"epoch": 2.92,
"learning_rate": 3.448408418959326e-08,
"loss": 0.1707,
"step": 836
},
{
"epoch": 2.93,
"learning_rate": 3.1422021055679266e-08,
"loss": 0.1713,
"step": 837
},
{
"epoch": 2.93,
"learning_rate": 2.850208793198861e-08,
"loss": 0.1746,
"step": 838
},
{
"epoch": 2.93,
"learning_rate": 2.572432645034817e-08,
"loss": 0.1714,
"step": 839
},
{
"epoch": 2.94,
"learning_rate": 2.308877621553185e-08,
"loss": 0.1758,
"step": 840
},
{
"epoch": 2.94,
"learning_rate": 2.059547480469104e-08,
"loss": 0.1741,
"step": 841
},
{
"epoch": 2.94,
"learning_rate": 1.824445776682504e-08,
"loss": 0.179,
"step": 842
},
{
"epoch": 2.95,
"learning_rate": 1.603575862226925e-08,
"loss": 0.1797,
"step": 843
},
{
"epoch": 2.95,
"learning_rate": 1.396940886221776e-08,
"loss": 0.1738,
"step": 844
},
{
"epoch": 2.95,
"learning_rate": 1.2045437948275952e-08,
"loss": 0.1819,
"step": 845
},
{
"epoch": 2.96,
"learning_rate": 1.0263873312040818e-08,
"loss": 0.1698,
"step": 846
},
{
"epoch": 2.96,
"learning_rate": 8.62474035470795e-09,
"loss": 0.182,
"step": 847
},
{
"epoch": 2.97,
"learning_rate": 7.128062446709605e-09,
"loss": 0.1692,
"step": 848
},
{
"epoch": 2.97,
"learning_rate": 5.773860927383856e-09,
"loss": 0.1778,
"step": 849
},
{
"epoch": 2.97,
"learning_rate": 4.562155104665955e-09,
"loss": 0.1786,
"step": 850
},
{
"epoch": 2.98,
"learning_rate": 3.492962254819654e-09,
"loss": 0.1657,
"step": 851
},
{
"epoch": 2.98,
"learning_rate": 2.5662976221840772e-09,
"loss": 0.1741,
"step": 852
},
{
"epoch": 2.98,
"learning_rate": 1.7821744189605583e-09,
"loss": 0.1843,
"step": 853
},
{
"epoch": 2.99,
"learning_rate": 1.1406038250205699e-09,
"loss": 0.1725,
"step": 854
},
{
"epoch": 2.99,
"learning_rate": 6.41594987752514e-10,
"loss": 0.1748,
"step": 855
},
{
"epoch": 2.99,
"learning_rate": 2.851550219240551e-10,
"loss": 0.1902,
"step": 856
},
{
"epoch": 3.0,
"learning_rate": 7.128900958774942e-11,
"loss": 0.1918,
"step": 857
},
{
"epoch": 3.0,
"learning_rate": 0.0,
"loss": 0.1749,
"step": 858
},
{
"epoch": 3.0,
"eval_loss": 0.322337806224823,
"eval_runtime": 42.6722,
"eval_samples_per_second": 17.506,
"eval_steps_per_second": 0.562,
"step": 858
},
{
"epoch": 3.0,
"step": 858,
"total_flos": 2.1832875959648256e+18,
"train_loss": 0.43623367181191075,
"train_runtime": 26982.3823,
"train_samples_per_second": 4.067,
"train_steps_per_second": 0.032
}
],
"max_steps": 858,
"num_train_epochs": 3,
"total_flos": 2.1832875959648256e+18,
"trial_name": null,
"trial_params": null
}