align2llava-7b-lora-question / trainer_state.json
Huanghz2001
add model weights
57c5c96
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.985994397759104,
"eval_steps": 500,
"global_step": 890,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 7.407407407407407e-07,
"loss": 0.7103,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.4814814814814815e-06,
"loss": 0.6821,
"step": 2
},
{
"epoch": 0.02,
"learning_rate": 2.222222222222222e-06,
"loss": 0.6943,
"step": 3
},
{
"epoch": 0.02,
"learning_rate": 2.962962962962963e-06,
"loss": 0.7532,
"step": 4
},
{
"epoch": 0.03,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.7674,
"step": 5
},
{
"epoch": 0.03,
"learning_rate": 4.444444444444444e-06,
"loss": 0.8014,
"step": 6
},
{
"epoch": 0.04,
"learning_rate": 5.185185185185185e-06,
"loss": 0.6357,
"step": 7
},
{
"epoch": 0.04,
"learning_rate": 5.925925925925926e-06,
"loss": 0.864,
"step": 8
},
{
"epoch": 0.05,
"learning_rate": 6.666666666666667e-06,
"loss": 0.759,
"step": 9
},
{
"epoch": 0.06,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.7527,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 8.148148148148148e-06,
"loss": 0.5794,
"step": 11
},
{
"epoch": 0.07,
"learning_rate": 8.888888888888888e-06,
"loss": 0.6923,
"step": 12
},
{
"epoch": 0.07,
"learning_rate": 9.62962962962963e-06,
"loss": 0.8002,
"step": 13
},
{
"epoch": 0.08,
"learning_rate": 1.037037037037037e-05,
"loss": 0.7658,
"step": 14
},
{
"epoch": 0.08,
"learning_rate": 1.1111111111111113e-05,
"loss": 0.6802,
"step": 15
},
{
"epoch": 0.09,
"learning_rate": 1.1851851851851852e-05,
"loss": 0.7179,
"step": 16
},
{
"epoch": 0.1,
"learning_rate": 1.2592592592592593e-05,
"loss": 0.6769,
"step": 17
},
{
"epoch": 0.1,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.775,
"step": 18
},
{
"epoch": 0.11,
"learning_rate": 1.4074074074074075e-05,
"loss": 0.706,
"step": 19
},
{
"epoch": 0.11,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.6469,
"step": 20
},
{
"epoch": 0.12,
"learning_rate": 1.555555555555556e-05,
"loss": 0.6636,
"step": 21
},
{
"epoch": 0.12,
"learning_rate": 1.6296296296296297e-05,
"loss": 0.7414,
"step": 22
},
{
"epoch": 0.13,
"learning_rate": 1.7037037037037038e-05,
"loss": 0.6922,
"step": 23
},
{
"epoch": 0.13,
"learning_rate": 1.7777777777777777e-05,
"loss": 0.7115,
"step": 24
},
{
"epoch": 0.14,
"learning_rate": 1.851851851851852e-05,
"loss": 0.7318,
"step": 25
},
{
"epoch": 0.15,
"learning_rate": 1.925925925925926e-05,
"loss": 0.7085,
"step": 26
},
{
"epoch": 0.15,
"learning_rate": 2e-05,
"loss": 0.6182,
"step": 27
},
{
"epoch": 0.16,
"learning_rate": 1.999993374057257e-05,
"loss": 0.5571,
"step": 28
},
{
"epoch": 0.16,
"learning_rate": 1.9999734963168334e-05,
"loss": 0.6527,
"step": 29
},
{
"epoch": 0.17,
"learning_rate": 1.9999403670421477e-05,
"loss": 0.6486,
"step": 30
},
{
"epoch": 0.17,
"learning_rate": 1.999893986672224e-05,
"loss": 0.6242,
"step": 31
},
{
"epoch": 0.18,
"learning_rate": 1.9998343558216907e-05,
"loss": 0.5885,
"step": 32
},
{
"epoch": 0.18,
"learning_rate": 1.9997614752807686e-05,
"loss": 0.6577,
"step": 33
},
{
"epoch": 0.19,
"learning_rate": 1.9996753460152623e-05,
"loss": 0.6103,
"step": 34
},
{
"epoch": 0.2,
"learning_rate": 1.9995759691665472e-05,
"loss": 0.6059,
"step": 35
},
{
"epoch": 0.2,
"learning_rate": 1.9994633460515537e-05,
"loss": 0.6007,
"step": 36
},
{
"epoch": 0.21,
"learning_rate": 1.9993374781627502e-05,
"loss": 0.6695,
"step": 37
},
{
"epoch": 0.21,
"learning_rate": 1.9991983671681238e-05,
"loss": 0.6567,
"step": 38
},
{
"epoch": 0.22,
"learning_rate": 1.9990460149111577e-05,
"loss": 0.572,
"step": 39
},
{
"epoch": 0.22,
"learning_rate": 1.9988804234108063e-05,
"loss": 0.5961,
"step": 40
},
{
"epoch": 0.23,
"learning_rate": 1.9987015948614693e-05,
"loss": 0.5571,
"step": 41
},
{
"epoch": 0.24,
"learning_rate": 1.9985095316329617e-05,
"loss": 0.66,
"step": 42
},
{
"epoch": 0.24,
"learning_rate": 1.9983042362704837e-05,
"loss": 0.572,
"step": 43
},
{
"epoch": 0.25,
"learning_rate": 1.9980857114945867e-05,
"loss": 0.5764,
"step": 44
},
{
"epoch": 0.25,
"learning_rate": 1.997853960201135e-05,
"loss": 0.5713,
"step": 45
},
{
"epoch": 0.26,
"learning_rate": 1.9976089854612703e-05,
"loss": 0.5751,
"step": 46
},
{
"epoch": 0.26,
"learning_rate": 1.9973507905213703e-05,
"loss": 0.6085,
"step": 47
},
{
"epoch": 0.27,
"learning_rate": 1.9970793788030044e-05,
"loss": 0.6194,
"step": 48
},
{
"epoch": 0.27,
"learning_rate": 1.9967947539028895e-05,
"loss": 0.6664,
"step": 49
},
{
"epoch": 0.28,
"learning_rate": 1.996496919592843e-05,
"loss": 0.5766,
"step": 50
},
{
"epoch": 0.29,
"learning_rate": 1.99618587981973e-05,
"loss": 0.6304,
"step": 51
},
{
"epoch": 0.29,
"learning_rate": 1.9958616387054147e-05,
"loss": 0.6435,
"step": 52
},
{
"epoch": 0.3,
"learning_rate": 1.9955242005467025e-05,
"loss": 0.6252,
"step": 53
},
{
"epoch": 0.3,
"learning_rate": 1.995173569815286e-05,
"loss": 0.5294,
"step": 54
},
{
"epoch": 0.31,
"learning_rate": 1.9948097511576832e-05,
"loss": 0.5257,
"step": 55
},
{
"epoch": 0.31,
"learning_rate": 1.9944327493951775e-05,
"loss": 0.6807,
"step": 56
},
{
"epoch": 0.32,
"learning_rate": 1.9940425695237523e-05,
"loss": 0.5713,
"step": 57
},
{
"epoch": 0.32,
"learning_rate": 1.9936392167140277e-05,
"loss": 0.495,
"step": 58
},
{
"epoch": 0.33,
"learning_rate": 1.993222696311188e-05,
"loss": 0.5769,
"step": 59
},
{
"epoch": 0.34,
"learning_rate": 1.9927930138349142e-05,
"loss": 0.5936,
"step": 60
},
{
"epoch": 0.34,
"learning_rate": 1.9923501749793097e-05,
"loss": 0.5846,
"step": 61
},
{
"epoch": 0.35,
"learning_rate": 1.9918941856128237e-05,
"loss": 0.5906,
"step": 62
},
{
"epoch": 0.35,
"learning_rate": 1.9914250517781754e-05,
"loss": 0.6047,
"step": 63
},
{
"epoch": 0.36,
"learning_rate": 1.9909427796922725e-05,
"loss": 0.6452,
"step": 64
},
{
"epoch": 0.36,
"learning_rate": 1.9904473757461295e-05,
"loss": 0.6057,
"step": 65
},
{
"epoch": 0.37,
"learning_rate": 1.989938846504783e-05,
"loss": 0.574,
"step": 66
},
{
"epoch": 0.38,
"learning_rate": 1.9894171987072038e-05,
"loss": 0.543,
"step": 67
},
{
"epoch": 0.38,
"learning_rate": 1.988882439266209e-05,
"loss": 0.5276,
"step": 68
},
{
"epoch": 0.39,
"learning_rate": 1.9883345752683695e-05,
"loss": 0.5865,
"step": 69
},
{
"epoch": 0.39,
"learning_rate": 1.9877736139739162e-05,
"loss": 0.5735,
"step": 70
},
{
"epoch": 0.4,
"learning_rate": 1.9871995628166442e-05,
"loss": 0.7098,
"step": 71
},
{
"epoch": 0.4,
"learning_rate": 1.9866124294038135e-05,
"loss": 0.5513,
"step": 72
},
{
"epoch": 0.41,
"learning_rate": 1.986012221516049e-05,
"loss": 0.5809,
"step": 73
},
{
"epoch": 0.41,
"learning_rate": 1.9853989471072362e-05,
"loss": 0.5783,
"step": 74
},
{
"epoch": 0.42,
"learning_rate": 1.9847726143044184e-05,
"loss": 0.6436,
"step": 75
},
{
"epoch": 0.43,
"learning_rate": 1.9841332314076856e-05,
"loss": 0.6535,
"step": 76
},
{
"epoch": 0.43,
"learning_rate": 1.9834808068900666e-05,
"loss": 0.6482,
"step": 77
},
{
"epoch": 0.44,
"learning_rate": 1.9828153493974164e-05,
"loss": 0.6028,
"step": 78
},
{
"epoch": 0.44,
"learning_rate": 1.982136867748302e-05,
"loss": 0.6415,
"step": 79
},
{
"epoch": 0.45,
"learning_rate": 1.9814453709338844e-05,
"loss": 0.6029,
"step": 80
},
{
"epoch": 0.45,
"learning_rate": 1.9807408681178e-05,
"loss": 0.5399,
"step": 81
},
{
"epoch": 0.46,
"learning_rate": 1.980023368636039e-05,
"loss": 0.5999,
"step": 82
},
{
"epoch": 0.46,
"learning_rate": 1.979292881996823e-05,
"loss": 0.5641,
"step": 83
},
{
"epoch": 0.47,
"learning_rate": 1.978549417880477e-05,
"loss": 0.5859,
"step": 84
},
{
"epoch": 0.48,
"learning_rate": 1.9777929861393027e-05,
"loss": 0.5758,
"step": 85
},
{
"epoch": 0.48,
"learning_rate": 1.977023596797446e-05,
"loss": 0.6422,
"step": 86
},
{
"epoch": 0.49,
"learning_rate": 1.9762412600507676e-05,
"loss": 0.5897,
"step": 87
},
{
"epoch": 0.49,
"learning_rate": 1.975445986266704e-05,
"loss": 0.5696,
"step": 88
},
{
"epoch": 0.5,
"learning_rate": 1.974637785984132e-05,
"loss": 0.5959,
"step": 89
},
{
"epoch": 0.5,
"learning_rate": 1.9738166699132294e-05,
"loss": 0.5746,
"step": 90
},
{
"epoch": 0.51,
"learning_rate": 1.9729826489353322e-05,
"loss": 0.5889,
"step": 91
},
{
"epoch": 0.52,
"learning_rate": 1.9721357341027912e-05,
"loss": 0.6424,
"step": 92
},
{
"epoch": 0.52,
"learning_rate": 1.9712759366388245e-05,
"loss": 0.6227,
"step": 93
},
{
"epoch": 0.53,
"learning_rate": 1.9704032679373696e-05,
"loss": 0.5688,
"step": 94
},
{
"epoch": 0.53,
"learning_rate": 1.9695177395629324e-05,
"loss": 0.5844,
"step": 95
},
{
"epoch": 0.54,
"learning_rate": 1.968619363250434e-05,
"loss": 0.6762,
"step": 96
},
{
"epoch": 0.54,
"learning_rate": 1.967708150905053e-05,
"loss": 0.5813,
"step": 97
},
{
"epoch": 0.55,
"learning_rate": 1.9667841146020725e-05,
"loss": 0.5886,
"step": 98
},
{
"epoch": 0.55,
"learning_rate": 1.9658472665867147e-05,
"loss": 0.6162,
"step": 99
},
{
"epoch": 0.56,
"learning_rate": 1.9648976192739832e-05,
"loss": 0.6416,
"step": 100
},
{
"epoch": 0.57,
"learning_rate": 1.9639351852484946e-05,
"loss": 0.5724,
"step": 101
},
{
"epoch": 0.57,
"learning_rate": 1.962959977264315e-05,
"loss": 0.5684,
"step": 102
},
{
"epoch": 0.58,
"learning_rate": 1.961972008244789e-05,
"loss": 0.6096,
"step": 103
},
{
"epoch": 0.58,
"learning_rate": 1.960971291282368e-05,
"loss": 0.4976,
"step": 104
},
{
"epoch": 0.59,
"learning_rate": 1.9599578396384393e-05,
"loss": 0.5809,
"step": 105
},
{
"epoch": 0.59,
"learning_rate": 1.9589316667431482e-05,
"loss": 0.5922,
"step": 106
},
{
"epoch": 0.6,
"learning_rate": 1.9578927861952196e-05,
"loss": 0.6357,
"step": 107
},
{
"epoch": 0.61,
"learning_rate": 1.9568412117617806e-05,
"loss": 0.5654,
"step": 108
},
{
"epoch": 0.61,
"learning_rate": 1.955776957378174e-05,
"loss": 0.5984,
"step": 109
},
{
"epoch": 0.62,
"learning_rate": 1.9547000371477782e-05,
"loss": 0.5969,
"step": 110
},
{
"epoch": 0.62,
"learning_rate": 1.953610465341816e-05,
"loss": 0.5235,
"step": 111
},
{
"epoch": 0.63,
"learning_rate": 1.9525082563991685e-05,
"loss": 0.579,
"step": 112
},
{
"epoch": 0.63,
"learning_rate": 1.9513934249261823e-05,
"loss": 0.6039,
"step": 113
},
{
"epoch": 0.64,
"learning_rate": 1.9502659856964764e-05,
"loss": 0.554,
"step": 114
},
{
"epoch": 0.64,
"learning_rate": 1.949125953650747e-05,
"loss": 0.5577,
"step": 115
},
{
"epoch": 0.65,
"learning_rate": 1.9479733438965666e-05,
"loss": 0.583,
"step": 116
},
{
"epoch": 0.66,
"learning_rate": 1.9468081717081893e-05,
"loss": 0.56,
"step": 117
},
{
"epoch": 0.66,
"learning_rate": 1.9456304525263426e-05,
"loss": 0.5364,
"step": 118
},
{
"epoch": 0.67,
"learning_rate": 1.944440201958027e-05,
"loss": 0.5265,
"step": 119
},
{
"epoch": 0.67,
"learning_rate": 1.943237435776306e-05,
"loss": 0.4921,
"step": 120
},
{
"epoch": 0.68,
"learning_rate": 1.9420221699200997e-05,
"loss": 0.511,
"step": 121
},
{
"epoch": 0.68,
"learning_rate": 1.9407944204939718e-05,
"loss": 0.5473,
"step": 122
},
{
"epoch": 0.69,
"learning_rate": 1.9395542037679172e-05,
"loss": 0.5453,
"step": 123
},
{
"epoch": 0.69,
"learning_rate": 1.938301536177146e-05,
"loss": 0.5311,
"step": 124
},
{
"epoch": 0.7,
"learning_rate": 1.9370364343218656e-05,
"loss": 0.6088,
"step": 125
},
{
"epoch": 0.71,
"learning_rate": 1.935758914967061e-05,
"loss": 0.5417,
"step": 126
},
{
"epoch": 0.71,
"learning_rate": 1.9344689950422722e-05,
"loss": 0.5624,
"step": 127
},
{
"epoch": 0.72,
"learning_rate": 1.9331666916413708e-05,
"loss": 0.4885,
"step": 128
},
{
"epoch": 0.72,
"learning_rate": 1.9318520220223318e-05,
"loss": 0.5462,
"step": 129
},
{
"epoch": 0.73,
"learning_rate": 1.9305250036070067e-05,
"loss": 0.5564,
"step": 130
},
{
"epoch": 0.73,
"learning_rate": 1.9291856539808917e-05,
"loss": 0.5349,
"step": 131
},
{
"epoch": 0.74,
"learning_rate": 1.9278339908928943e-05,
"loss": 0.611,
"step": 132
},
{
"epoch": 0.75,
"learning_rate": 1.9264700322550992e-05,
"loss": 0.5866,
"step": 133
},
{
"epoch": 0.75,
"learning_rate": 1.9250937961425303e-05,
"loss": 0.5398,
"step": 134
},
{
"epoch": 0.76,
"learning_rate": 1.9237053007929103e-05,
"loss": 0.5617,
"step": 135
},
{
"epoch": 0.76,
"learning_rate": 1.9223045646064215e-05,
"loss": 0.5583,
"step": 136
},
{
"epoch": 0.77,
"learning_rate": 1.9208916061454584e-05,
"loss": 0.6215,
"step": 137
},
{
"epoch": 0.77,
"learning_rate": 1.9194664441343853e-05,
"loss": 0.5457,
"step": 138
},
{
"epoch": 0.78,
"learning_rate": 1.9180290974592863e-05,
"loss": 0.5172,
"step": 139
},
{
"epoch": 0.78,
"learning_rate": 1.916579585167714e-05,
"loss": 0.4998,
"step": 140
},
{
"epoch": 0.79,
"learning_rate": 1.9151179264684402e-05,
"loss": 0.5927,
"step": 141
},
{
"epoch": 0.8,
"learning_rate": 1.9136441407311985e-05,
"loss": 0.612,
"step": 142
},
{
"epoch": 0.8,
"learning_rate": 1.9121582474864283e-05,
"loss": 0.5085,
"step": 143
},
{
"epoch": 0.81,
"learning_rate": 1.9106602664250166e-05,
"loss": 0.5767,
"step": 144
},
{
"epoch": 0.81,
"learning_rate": 1.9091502173980375e-05,
"loss": 0.5648,
"step": 145
},
{
"epoch": 0.82,
"learning_rate": 1.9076281204164875e-05,
"loss": 0.5313,
"step": 146
},
{
"epoch": 0.82,
"learning_rate": 1.906093995651021e-05,
"loss": 0.5076,
"step": 147
},
{
"epoch": 0.83,
"learning_rate": 1.904547863431685e-05,
"loss": 0.6202,
"step": 148
},
{
"epoch": 0.83,
"learning_rate": 1.9029897442476453e-05,
"loss": 0.5545,
"step": 149
},
{
"epoch": 0.84,
"learning_rate": 1.9014196587469195e-05,
"loss": 0.592,
"step": 150
},
{
"epoch": 0.85,
"learning_rate": 1.8998376277361008e-05,
"loss": 0.4594,
"step": 151
},
{
"epoch": 0.85,
"learning_rate": 1.898243672180083e-05,
"loss": 0.5107,
"step": 152
},
{
"epoch": 0.86,
"learning_rate": 1.896637813201782e-05,
"loss": 0.6135,
"step": 153
},
{
"epoch": 0.86,
"learning_rate": 1.8950200720818584e-05,
"loss": 0.5757,
"step": 154
},
{
"epoch": 0.87,
"learning_rate": 1.8933904702584315e-05,
"loss": 0.6307,
"step": 155
},
{
"epoch": 0.87,
"learning_rate": 1.8917490293267976e-05,
"loss": 0.4862,
"step": 156
},
{
"epoch": 0.88,
"learning_rate": 1.890095771039145e-05,
"loss": 0.5353,
"step": 157
},
{
"epoch": 0.89,
"learning_rate": 1.888430717304262e-05,
"loss": 0.5666,
"step": 158
},
{
"epoch": 0.89,
"learning_rate": 1.886753890187251e-05,
"loss": 0.5232,
"step": 159
},
{
"epoch": 0.9,
"learning_rate": 1.8850653119092325e-05,
"loss": 0.5406,
"step": 160
},
{
"epoch": 0.9,
"learning_rate": 1.8833650048470524e-05,
"loss": 0.6003,
"step": 161
},
{
"epoch": 0.91,
"learning_rate": 1.8816529915329855e-05,
"loss": 0.4384,
"step": 162
},
{
"epoch": 0.91,
"learning_rate": 1.8799292946544363e-05,
"loss": 0.567,
"step": 163
},
{
"epoch": 0.92,
"learning_rate": 1.8781939370536376e-05,
"loss": 0.498,
"step": 164
},
{
"epoch": 0.92,
"learning_rate": 1.8764469417273507e-05,
"loss": 0.5388,
"step": 165
},
{
"epoch": 0.93,
"learning_rate": 1.874688331826557e-05,
"loss": 0.5327,
"step": 166
},
{
"epoch": 0.94,
"learning_rate": 1.8729181306561534e-05,
"loss": 0.5766,
"step": 167
},
{
"epoch": 0.94,
"learning_rate": 1.8711363616746438e-05,
"loss": 0.5231,
"step": 168
},
{
"epoch": 0.95,
"learning_rate": 1.8693430484938256e-05,
"loss": 0.5461,
"step": 169
},
{
"epoch": 0.95,
"learning_rate": 1.8675382148784805e-05,
"loss": 0.5535,
"step": 170
},
{
"epoch": 0.96,
"learning_rate": 1.865721884746057e-05,
"loss": 0.482,
"step": 171
},
{
"epoch": 0.96,
"learning_rate": 1.8638940821663536e-05,
"loss": 0.5298,
"step": 172
},
{
"epoch": 0.97,
"learning_rate": 1.862054831361201e-05,
"loss": 0.5593,
"step": 173
},
{
"epoch": 0.97,
"learning_rate": 1.86020415670414e-05,
"loss": 0.5528,
"step": 174
},
{
"epoch": 0.98,
"learning_rate": 1.8583420827200996e-05,
"loss": 0.5267,
"step": 175
},
{
"epoch": 0.99,
"learning_rate": 1.8564686340850707e-05,
"loss": 0.5772,
"step": 176
},
{
"epoch": 0.99,
"learning_rate": 1.8545838356257804e-05,
"loss": 0.5183,
"step": 177
},
{
"epoch": 1.0,
"learning_rate": 1.8526877123193615e-05,
"loss": 0.5926,
"step": 178
},
{
"epoch": 1.0,
"learning_rate": 1.8507802892930232e-05,
"loss": 0.5419,
"step": 179
},
{
"epoch": 1.01,
"learning_rate": 1.8488615918237175e-05,
"loss": 0.4824,
"step": 180
},
{
"epoch": 1.01,
"learning_rate": 1.846931645337803e-05,
"loss": 0.4525,
"step": 181
},
{
"epoch": 1.02,
"learning_rate": 1.8449904754107092e-05,
"loss": 0.512,
"step": 182
},
{
"epoch": 1.03,
"learning_rate": 1.843038107766599e-05,
"loss": 0.5819,
"step": 183
},
{
"epoch": 1.03,
"learning_rate": 1.8410745682780234e-05,
"loss": 0.5038,
"step": 184
},
{
"epoch": 1.04,
"learning_rate": 1.8390998829655834e-05,
"loss": 0.5107,
"step": 185
},
{
"epoch": 1.04,
"learning_rate": 1.8371140779975826e-05,
"loss": 0.4921,
"step": 186
},
{
"epoch": 1.05,
"learning_rate": 1.8351171796896815e-05,
"loss": 0.4871,
"step": 187
},
{
"epoch": 1.05,
"learning_rate": 1.833109214504547e-05,
"loss": 0.433,
"step": 188
},
{
"epoch": 1.06,
"learning_rate": 1.8310902090515045e-05,
"loss": 0.5284,
"step": 189
},
{
"epoch": 1.06,
"learning_rate": 1.8290601900861823e-05,
"loss": 0.5185,
"step": 190
},
{
"epoch": 1.07,
"learning_rate": 1.8270191845101602e-05,
"loss": 0.5145,
"step": 191
},
{
"epoch": 1.08,
"learning_rate": 1.8249672193706097e-05,
"loss": 0.4786,
"step": 192
},
{
"epoch": 1.08,
"learning_rate": 1.8229043218599382e-05,
"loss": 0.54,
"step": 193
},
{
"epoch": 1.09,
"learning_rate": 1.8208305193154266e-05,
"loss": 0.4964,
"step": 194
},
{
"epoch": 1.09,
"learning_rate": 1.81874583921887e-05,
"loss": 0.5433,
"step": 195
},
{
"epoch": 1.1,
"learning_rate": 1.816650309196209e-05,
"loss": 0.449,
"step": 196
},
{
"epoch": 1.1,
"learning_rate": 1.8145439570171684e-05,
"loss": 0.3654,
"step": 197
},
{
"epoch": 1.11,
"learning_rate": 1.8124268105948857e-05,
"loss": 0.4365,
"step": 198
},
{
"epoch": 1.11,
"learning_rate": 1.8102988979855428e-05,
"loss": 0.5445,
"step": 199
},
{
"epoch": 1.12,
"learning_rate": 1.808160247387994e-05,
"loss": 0.5459,
"step": 200
},
{
"epoch": 1.13,
"learning_rate": 1.8060108871433922e-05,
"loss": 0.5086,
"step": 201
},
{
"epoch": 1.13,
"learning_rate": 1.803850845734813e-05,
"loss": 0.4476,
"step": 202
},
{
"epoch": 1.14,
"learning_rate": 1.8016801517868784e-05,
"loss": 0.4729,
"step": 203
},
{
"epoch": 1.14,
"learning_rate": 1.799498834065375e-05,
"loss": 0.544,
"step": 204
},
{
"epoch": 1.15,
"learning_rate": 1.7973069214768766e-05,
"loss": 0.534,
"step": 205
},
{
"epoch": 1.15,
"learning_rate": 1.7951044430683567e-05,
"loss": 0.4706,
"step": 206
},
{
"epoch": 1.16,
"learning_rate": 1.792891428026808e-05,
"loss": 0.4989,
"step": 207
},
{
"epoch": 1.17,
"learning_rate": 1.7906679056788516e-05,
"loss": 0.4368,
"step": 208
},
{
"epoch": 1.17,
"learning_rate": 1.788433905490351e-05,
"loss": 0.482,
"step": 209
},
{
"epoch": 1.18,
"learning_rate": 1.7861894570660218e-05,
"loss": 0.4386,
"step": 210
},
{
"epoch": 1.18,
"learning_rate": 1.783934590149037e-05,
"loss": 0.5124,
"step": 211
},
{
"epoch": 1.19,
"learning_rate": 1.7816693346206343e-05,
"loss": 0.4603,
"step": 212
},
{
"epoch": 1.19,
"learning_rate": 1.779393720499721e-05,
"loss": 0.4748,
"step": 213
},
{
"epoch": 1.2,
"learning_rate": 1.777107777942475e-05,
"loss": 0.5926,
"step": 214
},
{
"epoch": 1.2,
"learning_rate": 1.7748115372419453e-05,
"loss": 0.468,
"step": 215
},
{
"epoch": 1.21,
"learning_rate": 1.77250502882765e-05,
"loss": 0.4214,
"step": 216
},
{
"epoch": 1.22,
"learning_rate": 1.7701882832651752e-05,
"loss": 0.5297,
"step": 217
},
{
"epoch": 1.22,
"learning_rate": 1.7678613312557674e-05,
"loss": 0.4366,
"step": 218
},
{
"epoch": 1.23,
"learning_rate": 1.7655242036359287e-05,
"loss": 0.5079,
"step": 219
},
{
"epoch": 1.23,
"learning_rate": 1.763176931377006e-05,
"loss": 0.4313,
"step": 220
},
{
"epoch": 1.24,
"learning_rate": 1.760819545584783e-05,
"loss": 0.5726,
"step": 221
},
{
"epoch": 1.24,
"learning_rate": 1.758452077499066e-05,
"loss": 0.5352,
"step": 222
},
{
"epoch": 1.25,
"learning_rate": 1.7560745584932712e-05,
"loss": 0.5007,
"step": 223
},
{
"epoch": 1.25,
"learning_rate": 1.7536870200740076e-05,
"loss": 0.5506,
"step": 224
},
{
"epoch": 1.26,
"learning_rate": 1.751289493880662e-05,
"loss": 0.486,
"step": 225
},
{
"epoch": 1.27,
"learning_rate": 1.748882011684976e-05,
"loss": 0.4781,
"step": 226
},
{
"epoch": 1.27,
"learning_rate": 1.7464646053906283e-05,
"loss": 0.4453,
"step": 227
},
{
"epoch": 1.28,
"learning_rate": 1.74403730703281e-05,
"loss": 0.5034,
"step": 228
},
{
"epoch": 1.28,
"learning_rate": 1.7416001487778017e-05,
"loss": 0.5038,
"step": 229
},
{
"epoch": 1.29,
"learning_rate": 1.7391531629225448e-05,
"loss": 0.4896,
"step": 230
},
{
"epoch": 1.29,
"learning_rate": 1.736696381894216e-05,
"loss": 0.4516,
"step": 231
},
{
"epoch": 1.3,
"learning_rate": 1.7342298382497964e-05,
"loss": 0.5462,
"step": 232
},
{
"epoch": 1.31,
"learning_rate": 1.7317535646756395e-05,
"loss": 0.4461,
"step": 233
},
{
"epoch": 1.31,
"learning_rate": 1.729267593987039e-05,
"loss": 0.3791,
"step": 234
},
{
"epoch": 1.32,
"learning_rate": 1.726771959127794e-05,
"loss": 0.4906,
"step": 235
},
{
"epoch": 1.32,
"learning_rate": 1.724266693169772e-05,
"loss": 0.4422,
"step": 236
},
{
"epoch": 1.33,
"learning_rate": 1.7217518293124705e-05,
"loss": 0.5453,
"step": 237
},
{
"epoch": 1.33,
"learning_rate": 1.7192274008825772e-05,
"loss": 0.4437,
"step": 238
},
{
"epoch": 1.34,
"learning_rate": 1.7166934413335286e-05,
"loss": 0.4239,
"step": 239
},
{
"epoch": 1.34,
"learning_rate": 1.7141499842450667e-05,
"loss": 0.4213,
"step": 240
},
{
"epoch": 1.35,
"learning_rate": 1.7115970633227935e-05,
"loss": 0.4767,
"step": 241
},
{
"epoch": 1.36,
"learning_rate": 1.7090347123977248e-05,
"loss": 0.4196,
"step": 242
},
{
"epoch": 1.36,
"learning_rate": 1.7064629654258415e-05,
"loss": 0.4207,
"step": 243
},
{
"epoch": 1.37,
"learning_rate": 1.7038818564876396e-05,
"loss": 0.3871,
"step": 244
},
{
"epoch": 1.37,
"learning_rate": 1.70129141978768e-05,
"loss": 0.4131,
"step": 245
},
{
"epoch": 1.38,
"learning_rate": 1.6986916896541324e-05,
"loss": 0.3793,
"step": 246
},
{
"epoch": 1.38,
"learning_rate": 1.6960827005383236e-05,
"loss": 0.4892,
"step": 247
},
{
"epoch": 1.39,
"learning_rate": 1.6934644870142777e-05,
"loss": 0.4063,
"step": 248
},
{
"epoch": 1.39,
"learning_rate": 1.690837083778261e-05,
"loss": 0.402,
"step": 249
},
{
"epoch": 1.4,
"learning_rate": 1.688200525648321e-05,
"loss": 0.399,
"step": 250
},
{
"epoch": 1.41,
"learning_rate": 1.6855548475638225e-05,
"loss": 0.3342,
"step": 251
},
{
"epoch": 1.41,
"learning_rate": 1.6829000845849897e-05,
"loss": 0.5165,
"step": 252
},
{
"epoch": 1.42,
"learning_rate": 1.6802362718924374e-05,
"loss": 0.4079,
"step": 253
},
{
"epoch": 1.42,
"learning_rate": 1.6775634447867064e-05,
"loss": 0.4615,
"step": 254
},
{
"epoch": 1.43,
"learning_rate": 1.6748816386877953e-05,
"loss": 0.3851,
"step": 255
},
{
"epoch": 1.43,
"learning_rate": 1.672190889134691e-05,
"loss": 0.4107,
"step": 256
},
{
"epoch": 1.44,
"learning_rate": 1.6694912317848993e-05,
"loss": 0.4375,
"step": 257
},
{
"epoch": 1.45,
"learning_rate": 1.6667827024139697e-05,
"loss": 0.3976,
"step": 258
},
{
"epoch": 1.45,
"learning_rate": 1.6640653369150233e-05,
"loss": 0.3648,
"step": 259
},
{
"epoch": 1.46,
"learning_rate": 1.6613391712982765e-05,
"loss": 0.4331,
"step": 260
},
{
"epoch": 1.46,
"learning_rate": 1.658604241690564e-05,
"loss": 0.4544,
"step": 261
},
{
"epoch": 1.47,
"learning_rate": 1.65586058433486e-05,
"loss": 0.3694,
"step": 262
},
{
"epoch": 1.47,
"learning_rate": 1.653108235589797e-05,
"loss": 0.5195,
"step": 263
},
{
"epoch": 1.48,
"learning_rate": 1.6503472319291864e-05,
"loss": 0.3779,
"step": 264
},
{
"epoch": 1.48,
"learning_rate": 1.6475776099415312e-05,
"loss": 0.3748,
"step": 265
},
{
"epoch": 1.49,
"learning_rate": 1.6447994063295454e-05,
"loss": 0.5245,
"step": 266
},
{
"epoch": 1.5,
"learning_rate": 1.6420126579096656e-05,
"loss": 0.5061,
"step": 267
},
{
"epoch": 1.5,
"learning_rate": 1.6392174016115623e-05,
"loss": 0.5081,
"step": 268
},
{
"epoch": 1.51,
"learning_rate": 1.6364136744776517e-05,
"loss": 0.4957,
"step": 269
},
{
"epoch": 1.51,
"learning_rate": 1.633601513662605e-05,
"loss": 0.4111,
"step": 270
},
{
"epoch": 1.52,
"learning_rate": 1.630780956432855e-05,
"loss": 0.4892,
"step": 271
},
{
"epoch": 1.52,
"learning_rate": 1.627952040166104e-05,
"loss": 0.5418,
"step": 272
},
{
"epoch": 1.53,
"learning_rate": 1.625114802350825e-05,
"loss": 0.4968,
"step": 273
},
{
"epoch": 1.54,
"learning_rate": 1.6222692805857698e-05,
"loss": 0.4889,
"step": 274
},
{
"epoch": 1.54,
"learning_rate": 1.6194155125794664e-05,
"loss": 0.4632,
"step": 275
},
{
"epoch": 1.55,
"learning_rate": 1.6165535361497218e-05,
"loss": 0.4094,
"step": 276
},
{
"epoch": 1.55,
"learning_rate": 1.61368338922312e-05,
"loss": 0.3617,
"step": 277
},
{
"epoch": 1.56,
"learning_rate": 1.610805109834519e-05,
"loss": 0.5753,
"step": 278
},
{
"epoch": 1.56,
"learning_rate": 1.607918736126548e-05,
"loss": 0.4541,
"step": 279
},
{
"epoch": 1.57,
"learning_rate": 1.605024306349101e-05,
"loss": 0.4225,
"step": 280
},
{
"epoch": 1.57,
"learning_rate": 1.60212185885883e-05,
"loss": 0.4963,
"step": 281
},
{
"epoch": 1.58,
"learning_rate": 1.599211432118636e-05,
"loss": 0.4529,
"step": 282
},
{
"epoch": 1.59,
"learning_rate": 1.5962930646971617e-05,
"loss": 0.4056,
"step": 283
},
{
"epoch": 1.59,
"learning_rate": 1.5933667952682777e-05,
"loss": 0.3916,
"step": 284
},
{
"epoch": 1.6,
"learning_rate": 1.590432662610571e-05,
"loss": 0.4661,
"step": 285
},
{
"epoch": 1.6,
"learning_rate": 1.587490705606832e-05,
"loss": 0.4605,
"step": 286
},
{
"epoch": 1.61,
"learning_rate": 1.5845409632435385e-05,
"loss": 0.4583,
"step": 287
},
{
"epoch": 1.61,
"learning_rate": 1.5815834746103376e-05,
"loss": 0.3606,
"step": 288
},
{
"epoch": 1.62,
"learning_rate": 1.5786182788995302e-05,
"loss": 0.4493,
"step": 289
},
{
"epoch": 1.62,
"learning_rate": 1.5756454154055508e-05,
"loss": 0.4135,
"step": 290
},
{
"epoch": 1.63,
"learning_rate": 1.5726649235244455e-05,
"loss": 0.4268,
"step": 291
},
{
"epoch": 1.64,
"learning_rate": 1.5696768427533515e-05,
"loss": 0.3712,
"step": 292
},
{
"epoch": 1.64,
"learning_rate": 1.5666812126899732e-05,
"loss": 0.4449,
"step": 293
},
{
"epoch": 1.65,
"learning_rate": 1.563678073032057e-05,
"loss": 0.4755,
"step": 294
},
{
"epoch": 1.65,
"learning_rate": 1.5606674635768656e-05,
"loss": 0.3875,
"step": 295
},
{
"epoch": 1.66,
"learning_rate": 1.557649424220651e-05,
"loss": 0.3713,
"step": 296
},
{
"epoch": 1.66,
"learning_rate": 1.554623994958125e-05,
"loss": 0.3304,
"step": 297
},
{
"epoch": 1.67,
"learning_rate": 1.55159121588193e-05,
"loss": 0.4069,
"step": 298
},
{
"epoch": 1.68,
"learning_rate": 1.5485511271821065e-05,
"loss": 0.3478,
"step": 299
},
{
"epoch": 1.68,
"learning_rate": 1.5455037691455626e-05,
"loss": 0.4394,
"step": 300
},
{
"epoch": 1.69,
"learning_rate": 1.542449182155537e-05,
"loss": 0.3944,
"step": 301
},
{
"epoch": 1.69,
"learning_rate": 1.5393874066910677e-05,
"loss": 0.3854,
"step": 302
},
{
"epoch": 1.7,
"learning_rate": 1.536318483326452e-05,
"loss": 0.4872,
"step": 303
},
{
"epoch": 1.7,
"learning_rate": 1.533242452730711e-05,
"loss": 0.3055,
"step": 304
},
{
"epoch": 1.71,
"learning_rate": 1.5301593556670497e-05,
"loss": 0.3962,
"step": 305
},
{
"epoch": 1.71,
"learning_rate": 1.5270692329923176e-05,
"loss": 0.3613,
"step": 306
},
{
"epoch": 1.72,
"learning_rate": 1.5239721256564665e-05,
"loss": 0.4131,
"step": 307
},
{
"epoch": 1.73,
"learning_rate": 1.5208680747020083e-05,
"loss": 0.4807,
"step": 308
},
{
"epoch": 1.73,
"learning_rate": 1.5177571212634705e-05,
"loss": 0.4358,
"step": 309
},
{
"epoch": 1.74,
"learning_rate": 1.5146393065668518e-05,
"loss": 0.3484,
"step": 310
},
{
"epoch": 1.74,
"learning_rate": 1.5115146719290761e-05,
"loss": 0.4296,
"step": 311
},
{
"epoch": 1.75,
"learning_rate": 1.508383258757443e-05,
"loss": 0.3912,
"step": 312
},
{
"epoch": 1.75,
"learning_rate": 1.5052451085490816e-05,
"loss": 0.2749,
"step": 313
},
{
"epoch": 1.76,
"learning_rate": 1.5021002628903994e-05,
"loss": 0.458,
"step": 314
},
{
"epoch": 1.76,
"learning_rate": 1.4989487634565302e-05,
"loss": 0.5715,
"step": 315
},
{
"epoch": 1.77,
"learning_rate": 1.4957906520107846e-05,
"loss": 0.3927,
"step": 316
},
{
"epoch": 1.78,
"learning_rate": 1.492625970404093e-05,
"loss": 0.4411,
"step": 317
},
{
"epoch": 1.78,
"learning_rate": 1.4894547605744541e-05,
"loss": 0.4991,
"step": 318
},
{
"epoch": 1.79,
"learning_rate": 1.486277064546377e-05,
"loss": 0.3269,
"step": 319
},
{
"epoch": 1.79,
"learning_rate": 1.4830929244303262e-05,
"loss": 0.4627,
"step": 320
},
{
"epoch": 1.8,
"learning_rate": 1.4799023824221614e-05,
"loss": 0.2916,
"step": 321
},
{
"epoch": 1.8,
"learning_rate": 1.47670548080258e-05,
"loss": 0.3889,
"step": 322
},
{
"epoch": 1.81,
"learning_rate": 1.4735022619365564e-05,
"loss": 0.4178,
"step": 323
},
{
"epoch": 1.82,
"learning_rate": 1.4702927682727798e-05,
"loss": 0.4041,
"step": 324
},
{
"epoch": 1.82,
"learning_rate": 1.4670770423430933e-05,
"loss": 0.3819,
"step": 325
},
{
"epoch": 1.83,
"learning_rate": 1.463855126761928e-05,
"loss": 0.3658,
"step": 326
},
{
"epoch": 1.83,
"learning_rate": 1.460627064225741e-05,
"loss": 0.3239,
"step": 327
},
{
"epoch": 1.84,
"learning_rate": 1.4573928975124466e-05,
"loss": 0.4211,
"step": 328
},
{
"epoch": 1.84,
"learning_rate": 1.4541526694808524e-05,
"loss": 0.4433,
"step": 329
},
{
"epoch": 1.85,
"learning_rate": 1.4509064230700881e-05,
"loss": 0.4274,
"step": 330
},
{
"epoch": 1.85,
"learning_rate": 1.4476542012990412e-05,
"loss": 0.3624,
"step": 331
},
{
"epoch": 1.86,
"learning_rate": 1.4443960472657803e-05,
"loss": 0.3273,
"step": 332
},
{
"epoch": 1.87,
"learning_rate": 1.441132004146991e-05,
"loss": 0.293,
"step": 333
},
{
"epoch": 1.87,
"learning_rate": 1.437862115197398e-05,
"loss": 0.3547,
"step": 334
},
{
"epoch": 1.88,
"learning_rate": 1.4345864237491956e-05,
"loss": 0.3781,
"step": 335
},
{
"epoch": 1.88,
"learning_rate": 1.4313049732114716e-05,
"loss": 0.4118,
"step": 336
},
{
"epoch": 1.89,
"learning_rate": 1.4280178070696332e-05,
"loss": 0.4482,
"step": 337
},
{
"epoch": 1.89,
"learning_rate": 1.4247249688848293e-05,
"loss": 0.3746,
"step": 338
},
{
"epoch": 1.9,
"learning_rate": 1.4214265022933744e-05,
"loss": 0.296,
"step": 339
},
{
"epoch": 1.9,
"learning_rate": 1.4181224510061702e-05,
"loss": 0.3738,
"step": 340
},
{
"epoch": 1.91,
"learning_rate": 1.4148128588081258e-05,
"loss": 0.3376,
"step": 341
},
{
"epoch": 1.92,
"learning_rate": 1.4114977695575784e-05,
"loss": 0.392,
"step": 342
},
{
"epoch": 1.92,
"learning_rate": 1.4081772271857108e-05,
"loss": 0.3484,
"step": 343
},
{
"epoch": 1.93,
"learning_rate": 1.4048512756959705e-05,
"loss": 0.4307,
"step": 344
},
{
"epoch": 1.93,
"learning_rate": 1.4015199591634851e-05,
"loss": 0.4013,
"step": 345
},
{
"epoch": 1.94,
"learning_rate": 1.3981833217344811e-05,
"loss": 0.3606,
"step": 346
},
{
"epoch": 1.94,
"learning_rate": 1.3948414076256945e-05,
"loss": 0.3624,
"step": 347
},
{
"epoch": 1.95,
"learning_rate": 1.391494261123789e-05,
"loss": 0.3829,
"step": 348
},
{
"epoch": 1.96,
"learning_rate": 1.388141926584766e-05,
"loss": 0.3793,
"step": 349
},
{
"epoch": 1.96,
"learning_rate": 1.3847844484333799e-05,
"loss": 0.3459,
"step": 350
},
{
"epoch": 1.97,
"learning_rate": 1.3814218711625457e-05,
"loss": 0.3636,
"step": 351
},
{
"epoch": 1.97,
"learning_rate": 1.3780542393327534e-05,
"loss": 0.4333,
"step": 352
},
{
"epoch": 1.98,
"learning_rate": 1.3746815975714734e-05,
"loss": 0.3606,
"step": 353
},
{
"epoch": 1.98,
"learning_rate": 1.3713039905725689e-05,
"loss": 0.3884,
"step": 354
},
{
"epoch": 1.99,
"learning_rate": 1.3679214630957e-05,
"loss": 0.3226,
"step": 355
},
{
"epoch": 1.99,
"learning_rate": 1.364534059965735e-05,
"loss": 0.3981,
"step": 356
},
{
"epoch": 2.0,
"learning_rate": 1.3611418260721507e-05,
"loss": 0.4448,
"step": 357
},
{
"epoch": 2.01,
"learning_rate": 1.3577448063684436e-05,
"loss": 0.3331,
"step": 358
},
{
"epoch": 2.01,
"learning_rate": 1.3543430458715285e-05,
"loss": 0.4168,
"step": 359
},
{
"epoch": 2.02,
"learning_rate": 1.350936589661147e-05,
"loss": 0.3058,
"step": 360
},
{
"epoch": 2.02,
"learning_rate": 1.347525482879266e-05,
"loss": 0.306,
"step": 361
},
{
"epoch": 2.03,
"learning_rate": 1.3441097707294828e-05,
"loss": 0.2487,
"step": 362
},
{
"epoch": 2.03,
"learning_rate": 1.3406894984764227e-05,
"loss": 0.2154,
"step": 363
},
{
"epoch": 2.04,
"learning_rate": 1.3372647114451422e-05,
"loss": 0.3322,
"step": 364
},
{
"epoch": 2.04,
"learning_rate": 1.3338354550205274e-05,
"loss": 0.327,
"step": 365
},
{
"epoch": 2.05,
"learning_rate": 1.3304017746466912e-05,
"loss": 0.2238,
"step": 366
},
{
"epoch": 2.06,
"learning_rate": 1.326963715826373e-05,
"loss": 0.2261,
"step": 367
},
{
"epoch": 2.06,
"learning_rate": 1.3235213241203344e-05,
"loss": 0.308,
"step": 368
},
{
"epoch": 2.07,
"learning_rate": 1.3200746451467561e-05,
"loss": 0.2169,
"step": 369
},
{
"epoch": 2.07,
"learning_rate": 1.3166237245806333e-05,
"loss": 0.2629,
"step": 370
},
{
"epoch": 2.08,
"learning_rate": 1.3131686081531698e-05,
"loss": 0.2017,
"step": 371
},
{
"epoch": 2.08,
"learning_rate": 1.3097093416511733e-05,
"loss": 0.2351,
"step": 372
},
{
"epoch": 2.09,
"learning_rate": 1.306245970916447e-05,
"loss": 0.218,
"step": 373
},
{
"epoch": 2.1,
"learning_rate": 1.3027785418451836e-05,
"loss": 0.2733,
"step": 374
},
{
"epoch": 2.1,
"learning_rate": 1.2993071003873561e-05,
"loss": 0.2352,
"step": 375
},
{
"epoch": 2.11,
"learning_rate": 1.2958316925461084e-05,
"loss": 0.173,
"step": 376
},
{
"epoch": 2.11,
"learning_rate": 1.2923523643771485e-05,
"loss": 0.1737,
"step": 377
},
{
"epoch": 2.12,
"learning_rate": 1.2888691619881338e-05,
"loss": 0.201,
"step": 378
},
{
"epoch": 2.12,
"learning_rate": 1.285382131538064e-05,
"loss": 0.2115,
"step": 379
},
{
"epoch": 2.13,
"learning_rate": 1.2818913192366668e-05,
"loss": 0.2861,
"step": 380
},
{
"epoch": 2.13,
"learning_rate": 1.2783967713437882e-05,
"loss": 0.2525,
"step": 381
},
{
"epoch": 2.14,
"learning_rate": 1.2748985341687755e-05,
"loss": 0.2232,
"step": 382
},
{
"epoch": 2.15,
"learning_rate": 1.2713966540698678e-05,
"loss": 0.1702,
"step": 383
},
{
"epoch": 2.15,
"learning_rate": 1.2678911774535787e-05,
"loss": 0.2434,
"step": 384
},
{
"epoch": 2.16,
"learning_rate": 1.2643821507740832e-05,
"loss": 0.2051,
"step": 385
},
{
"epoch": 2.16,
"learning_rate": 1.260869620532601e-05,
"loss": 0.1868,
"step": 386
},
{
"epoch": 2.17,
"learning_rate": 1.257353633276781e-05,
"loss": 0.3293,
"step": 387
},
{
"epoch": 2.17,
"learning_rate": 1.2538342356000827e-05,
"loss": 0.182,
"step": 388
},
{
"epoch": 2.18,
"learning_rate": 1.2503114741411619e-05,
"loss": 0.3612,
"step": 389
},
{
"epoch": 2.18,
"learning_rate": 1.2467853955832501e-05,
"loss": 0.3242,
"step": 390
},
{
"epoch": 2.19,
"learning_rate": 1.2432560466535362e-05,
"loss": 0.2208,
"step": 391
},
{
"epoch": 2.2,
"learning_rate": 1.2397234741225481e-05,
"loss": 0.2242,
"step": 392
},
{
"epoch": 2.2,
"learning_rate": 1.2361877248035326e-05,
"loss": 0.2588,
"step": 393
},
{
"epoch": 2.21,
"learning_rate": 1.2326488455518346e-05,
"loss": 0.2354,
"step": 394
},
{
"epoch": 2.21,
"learning_rate": 1.2291068832642768e-05,
"loss": 0.2369,
"step": 395
},
{
"epoch": 2.22,
"learning_rate": 1.2255618848785378e-05,
"loss": 0.2476,
"step": 396
},
{
"epoch": 2.22,
"learning_rate": 1.2220138973725304e-05,
"loss": 0.2932,
"step": 397
},
{
"epoch": 2.23,
"learning_rate": 1.2184629677637785e-05,
"loss": 0.2847,
"step": 398
},
{
"epoch": 2.24,
"learning_rate": 1.214909143108795e-05,
"loss": 0.1917,
"step": 399
},
{
"epoch": 2.24,
"learning_rate": 1.2113524705024566e-05,
"loss": 0.2162,
"step": 400
},
{
"epoch": 2.25,
"learning_rate": 1.2077929970773823e-05,
"loss": 0.4052,
"step": 401
},
{
"epoch": 2.25,
"learning_rate": 1.2042307700033055e-05,
"loss": 0.2115,
"step": 402
},
{
"epoch": 2.26,
"learning_rate": 1.200665836486452e-05,
"loss": 0.2472,
"step": 403
},
{
"epoch": 2.26,
"learning_rate": 1.1970982437689122e-05,
"loss": 0.2308,
"step": 404
},
{
"epoch": 2.27,
"learning_rate": 1.1935280391280166e-05,
"loss": 0.4317,
"step": 405
},
{
"epoch": 2.27,
"learning_rate": 1.1899552698757078e-05,
"loss": 0.2008,
"step": 406
},
{
"epoch": 2.28,
"learning_rate": 1.1863799833579152e-05,
"loss": 0.3238,
"step": 407
},
{
"epoch": 2.29,
"learning_rate": 1.1828022269539261e-05,
"loss": 0.2102,
"step": 408
},
{
"epoch": 2.29,
"learning_rate": 1.1792220480757588e-05,
"loss": 0.2714,
"step": 409
},
{
"epoch": 2.3,
"learning_rate": 1.175639494167534e-05,
"loss": 0.2974,
"step": 410
},
{
"epoch": 2.3,
"learning_rate": 1.1720546127048454e-05,
"loss": 0.2219,
"step": 411
},
{
"epoch": 2.31,
"learning_rate": 1.1684674511941317e-05,
"loss": 0.2705,
"step": 412
},
{
"epoch": 2.31,
"learning_rate": 1.1648780571720466e-05,
"loss": 0.204,
"step": 413
},
{
"epoch": 2.32,
"learning_rate": 1.1612864782048288e-05,
"loss": 0.2364,
"step": 414
},
{
"epoch": 2.32,
"learning_rate": 1.1576927618876711e-05,
"loss": 0.2857,
"step": 415
},
{
"epoch": 2.33,
"learning_rate": 1.1540969558440911e-05,
"loss": 0.2875,
"step": 416
},
{
"epoch": 2.34,
"learning_rate": 1.1504991077252986e-05,
"loss": 0.2031,
"step": 417
},
{
"epoch": 2.34,
"learning_rate": 1.1468992652095643e-05,
"loss": 0.3449,
"step": 418
},
{
"epoch": 2.35,
"learning_rate": 1.1432974760015895e-05,
"loss": 0.1838,
"step": 419
},
{
"epoch": 2.35,
"learning_rate": 1.1396937878318723e-05,
"loss": 0.1535,
"step": 420
},
{
"epoch": 2.36,
"learning_rate": 1.1360882484560756e-05,
"loss": 0.2436,
"step": 421
},
{
"epoch": 2.36,
"learning_rate": 1.1324809056543942e-05,
"loss": 0.2042,
"step": 422
},
{
"epoch": 2.37,
"learning_rate": 1.1288718072309219e-05,
"loss": 0.2509,
"step": 423
},
{
"epoch": 2.38,
"learning_rate": 1.125261001013018e-05,
"loss": 0.2028,
"step": 424
},
{
"epoch": 2.38,
"learning_rate": 1.1216485348506725e-05,
"loss": 0.3245,
"step": 425
},
{
"epoch": 2.39,
"learning_rate": 1.1180344566158739e-05,
"loss": 0.2351,
"step": 426
},
{
"epoch": 2.39,
"learning_rate": 1.114418814201972e-05,
"loss": 0.3043,
"step": 427
},
{
"epoch": 2.4,
"learning_rate": 1.1108016555230476e-05,
"loss": 0.1388,
"step": 428
},
{
"epoch": 2.4,
"learning_rate": 1.1071830285132722e-05,
"loss": 0.1641,
"step": 429
},
{
"epoch": 2.41,
"learning_rate": 1.1035629811262772e-05,
"loss": 0.2735,
"step": 430
},
{
"epoch": 2.41,
"learning_rate": 1.099941561334515e-05,
"loss": 0.1502,
"step": 431
},
{
"epoch": 2.42,
"learning_rate": 1.0963188171286275e-05,
"loss": 0.1695,
"step": 432
},
{
"epoch": 2.43,
"learning_rate": 1.0926947965168045e-05,
"loss": 0.3094,
"step": 433
},
{
"epoch": 2.43,
"learning_rate": 1.0890695475241534e-05,
"loss": 0.1815,
"step": 434
},
{
"epoch": 2.44,
"learning_rate": 1.0854431181920575e-05,
"loss": 0.1553,
"step": 435
},
{
"epoch": 2.44,
"learning_rate": 1.0818155565775443e-05,
"loss": 0.3224,
"step": 436
},
{
"epoch": 2.45,
"learning_rate": 1.0781869107526441e-05,
"loss": 0.2147,
"step": 437
},
{
"epoch": 2.45,
"learning_rate": 1.074557228803756e-05,
"loss": 0.2274,
"step": 438
},
{
"epoch": 2.46,
"learning_rate": 1.0709265588310098e-05,
"loss": 0.2727,
"step": 439
},
{
"epoch": 2.46,
"learning_rate": 1.0672949489476281e-05,
"loss": 0.3137,
"step": 440
},
{
"epoch": 2.47,
"learning_rate": 1.063662447279289e-05,
"loss": 0.2506,
"step": 441
},
{
"epoch": 2.48,
"learning_rate": 1.0600291019634887e-05,
"loss": 0.3214,
"step": 442
},
{
"epoch": 2.48,
"learning_rate": 1.0563949611489032e-05,
"loss": 0.2394,
"step": 443
},
{
"epoch": 2.49,
"learning_rate": 1.0527600729947509e-05,
"loss": 0.1914,
"step": 444
},
{
"epoch": 2.49,
"learning_rate": 1.0491244856701528e-05,
"loss": 0.2376,
"step": 445
},
{
"epoch": 2.5,
"learning_rate": 1.0454882473534961e-05,
"loss": 0.2176,
"step": 446
},
{
"epoch": 2.5,
"learning_rate": 1.0418514062317944e-05,
"loss": 0.219,
"step": 447
},
{
"epoch": 2.51,
"learning_rate": 1.03821401050005e-05,
"loss": 0.2096,
"step": 448
},
{
"epoch": 2.52,
"learning_rate": 1.0345761083606145e-05,
"loss": 0.1899,
"step": 449
},
{
"epoch": 2.52,
"learning_rate": 1.03093774802255e-05,
"loss": 0.298,
"step": 450
},
{
"epoch": 2.53,
"learning_rate": 1.027298977700992e-05,
"loss": 0.2769,
"step": 451
},
{
"epoch": 2.53,
"learning_rate": 1.0236598456165075e-05,
"loss": 0.1469,
"step": 452
},
{
"epoch": 2.54,
"learning_rate": 1.0200203999944582e-05,
"loss": 0.2712,
"step": 453
},
{
"epoch": 2.54,
"learning_rate": 1.0163806890643606e-05,
"loss": 0.1468,
"step": 454
},
{
"epoch": 2.55,
"learning_rate": 1.0127407610592478e-05,
"loss": 0.1594,
"step": 455
},
{
"epoch": 2.55,
"learning_rate": 1.009100664215028e-05,
"loss": 0.2028,
"step": 456
},
{
"epoch": 2.56,
"learning_rate": 1.0054604467698486e-05,
"loss": 0.2682,
"step": 457
},
{
"epoch": 2.57,
"learning_rate": 1.0018201569634534e-05,
"loss": 0.2491,
"step": 458
},
{
"epoch": 2.57,
"learning_rate": 9.98179843036547e-06,
"loss": 0.1398,
"step": 459
},
{
"epoch": 2.58,
"learning_rate": 9.94539553230152e-06,
"loss": 0.1858,
"step": 460
},
{
"epoch": 2.58,
"learning_rate": 9.908993357849721e-06,
"loss": 0.1752,
"step": 461
},
{
"epoch": 2.59,
"learning_rate": 9.872592389407525e-06,
"loss": 0.2003,
"step": 462
},
{
"epoch": 2.59,
"learning_rate": 9.836193109356397e-06,
"loss": 0.2766,
"step": 463
},
{
"epoch": 2.6,
"learning_rate": 9.799796000055423e-06,
"loss": 0.1685,
"step": 464
},
{
"epoch": 2.61,
"learning_rate": 9.763401543834927e-06,
"loss": 0.2731,
"step": 465
},
{
"epoch": 2.61,
"learning_rate": 9.727010222990083e-06,
"loss": 0.3123,
"step": 466
},
{
"epoch": 2.62,
"learning_rate": 9.690622519774502e-06,
"loss": 0.1975,
"step": 467
},
{
"epoch": 2.62,
"learning_rate": 9.654238916393862e-06,
"loss": 0.1872,
"step": 468
},
{
"epoch": 2.63,
"learning_rate": 9.617859894999503e-06,
"loss": 0.3035,
"step": 469
},
{
"epoch": 2.63,
"learning_rate": 9.581485937682058e-06,
"loss": 0.2127,
"step": 470
},
{
"epoch": 2.64,
"learning_rate": 9.545117526465042e-06,
"loss": 0.1978,
"step": 471
},
{
"epoch": 2.64,
"learning_rate": 9.508755143298472e-06,
"loss": 0.1328,
"step": 472
},
{
"epoch": 2.65,
"learning_rate": 9.472399270052494e-06,
"loss": 0.2843,
"step": 473
},
{
"epoch": 2.66,
"learning_rate": 9.43605038851097e-06,
"loss": 0.1747,
"step": 474
},
{
"epoch": 2.66,
"learning_rate": 9.399708980365116e-06,
"loss": 0.1645,
"step": 475
},
{
"epoch": 2.67,
"learning_rate": 9.363375527207112e-06,
"loss": 0.3202,
"step": 476
},
{
"epoch": 2.67,
"learning_rate": 9.327050510523722e-06,
"loss": 0.3103,
"step": 477
},
{
"epoch": 2.68,
"learning_rate": 9.290734411689905e-06,
"loss": 0.1847,
"step": 478
},
{
"epoch": 2.68,
"learning_rate": 9.254427711962442e-06,
"loss": 0.184,
"step": 479
},
{
"epoch": 2.69,
"learning_rate": 9.21813089247356e-06,
"loss": 0.2261,
"step": 480
},
{
"epoch": 2.69,
"learning_rate": 9.181844434224559e-06,
"loss": 0.2897,
"step": 481
},
{
"epoch": 2.7,
"learning_rate": 9.145568818079428e-06,
"loss": 0.1777,
"step": 482
},
{
"epoch": 2.71,
"learning_rate": 9.109304524758471e-06,
"loss": 0.2002,
"step": 483
},
{
"epoch": 2.71,
"learning_rate": 9.073052034831956e-06,
"loss": 0.1853,
"step": 484
},
{
"epoch": 2.72,
"learning_rate": 9.03681182871373e-06,
"loss": 0.2472,
"step": 485
},
{
"epoch": 2.72,
"learning_rate": 9.000584386654853e-06,
"loss": 0.1652,
"step": 486
},
{
"epoch": 2.73,
"learning_rate": 8.964370188737233e-06,
"loss": 0.2922,
"step": 487
},
{
"epoch": 2.73,
"learning_rate": 8.928169714867278e-06,
"loss": 0.2188,
"step": 488
},
{
"epoch": 2.74,
"learning_rate": 8.891983444769525e-06,
"loss": 0.1846,
"step": 489
},
{
"epoch": 2.75,
"learning_rate": 8.855811857980282e-06,
"loss": 0.1617,
"step": 490
},
{
"epoch": 2.75,
"learning_rate": 8.819655433841263e-06,
"loss": 0.1757,
"step": 491
},
{
"epoch": 2.76,
"learning_rate": 8.783514651493277e-06,
"loss": 0.1097,
"step": 492
},
{
"epoch": 2.76,
"learning_rate": 8.747389989869823e-06,
"loss": 0.1673,
"step": 493
},
{
"epoch": 2.77,
"learning_rate": 8.711281927690784e-06,
"loss": 0.21,
"step": 494
},
{
"epoch": 2.77,
"learning_rate": 8.675190943456058e-06,
"loss": 0.1687,
"step": 495
},
{
"epoch": 2.78,
"learning_rate": 8.639117515439249e-06,
"loss": 0.3681,
"step": 496
},
{
"epoch": 2.78,
"learning_rate": 8.60306212168128e-06,
"loss": 0.1588,
"step": 497
},
{
"epoch": 2.79,
"learning_rate": 8.56702523998411e-06,
"loss": 0.1737,
"step": 498
},
{
"epoch": 2.8,
"learning_rate": 8.531007347904359e-06,
"loss": 0.2373,
"step": 499
},
{
"epoch": 2.8,
"learning_rate": 8.49500892274702e-06,
"loss": 0.1959,
"step": 500
},
{
"epoch": 2.81,
"learning_rate": 8.45903044155909e-06,
"loss": 0.2995,
"step": 501
},
{
"epoch": 2.81,
"learning_rate": 8.42307238112329e-06,
"loss": 0.1762,
"step": 502
},
{
"epoch": 2.82,
"learning_rate": 8.387135217951712e-06,
"loss": 0.2474,
"step": 503
},
{
"epoch": 2.82,
"learning_rate": 8.351219428279536e-06,
"loss": 0.209,
"step": 504
},
{
"epoch": 2.83,
"learning_rate": 8.315325488058685e-06,
"loss": 0.2326,
"step": 505
},
{
"epoch": 2.83,
"learning_rate": 8.279453872951551e-06,
"loss": 0.184,
"step": 506
},
{
"epoch": 2.84,
"learning_rate": 8.243605058324661e-06,
"loss": 0.1522,
"step": 507
},
{
"epoch": 2.85,
"learning_rate": 8.207779519242414e-06,
"loss": 0.2101,
"step": 508
},
{
"epoch": 2.85,
"learning_rate": 8.17197773046074e-06,
"loss": 0.145,
"step": 509
},
{
"epoch": 2.86,
"learning_rate": 8.136200166420851e-06,
"loss": 0.1839,
"step": 510
},
{
"epoch": 2.86,
"learning_rate": 8.100447301242922e-06,
"loss": 0.1873,
"step": 511
},
{
"epoch": 2.87,
"learning_rate": 8.064719608719836e-06,
"loss": 0.2005,
"step": 512
},
{
"epoch": 2.87,
"learning_rate": 8.029017562310883e-06,
"loss": 0.1421,
"step": 513
},
{
"epoch": 2.88,
"learning_rate": 7.993341635135483e-06,
"loss": 0.0907,
"step": 514
},
{
"epoch": 2.89,
"learning_rate": 7.957692299966947e-06,
"loss": 0.2526,
"step": 515
},
{
"epoch": 2.89,
"learning_rate": 7.92207002922618e-06,
"loss": 0.1397,
"step": 516
},
{
"epoch": 2.9,
"learning_rate": 7.886475294975437e-06,
"loss": 0.1703,
"step": 517
},
{
"epoch": 2.9,
"learning_rate": 7.850908568912054e-06,
"loss": 0.1455,
"step": 518
},
{
"epoch": 2.91,
"learning_rate": 7.815370322362218e-06,
"loss": 0.2021,
"step": 519
},
{
"epoch": 2.91,
"learning_rate": 7.7798610262747e-06,
"loss": 0.1626,
"step": 520
},
{
"epoch": 2.92,
"learning_rate": 7.744381151214627e-06,
"loss": 0.1398,
"step": 521
},
{
"epoch": 2.92,
"learning_rate": 7.708931167357234e-06,
"loss": 0.1332,
"step": 522
},
{
"epoch": 2.93,
"learning_rate": 7.673511544481657e-06,
"loss": 0.1373,
"step": 523
},
{
"epoch": 2.94,
"learning_rate": 7.638122751964679e-06,
"loss": 0.2003,
"step": 524
},
{
"epoch": 2.94,
"learning_rate": 7.602765258774525e-06,
"loss": 0.2053,
"step": 525
},
{
"epoch": 2.95,
"learning_rate": 7.567439533464639e-06,
"loss": 0.1173,
"step": 526
},
{
"epoch": 2.95,
"learning_rate": 7.5321460441675025e-06,
"loss": 0.1643,
"step": 527
},
{
"epoch": 2.96,
"learning_rate": 7.496885258588382e-06,
"loss": 0.2189,
"step": 528
},
{
"epoch": 2.96,
"learning_rate": 7.461657643999177e-06,
"loss": 0.0793,
"step": 529
},
{
"epoch": 2.97,
"learning_rate": 7.426463667232193e-06,
"loss": 0.1223,
"step": 530
},
{
"epoch": 2.97,
"learning_rate": 7.391303794673992e-06,
"loss": 0.2586,
"step": 531
},
{
"epoch": 2.98,
"learning_rate": 7.356178492259172e-06,
"loss": 0.2636,
"step": 532
},
{
"epoch": 2.99,
"learning_rate": 7.321088225464215e-06,
"loss": 0.2175,
"step": 533
},
{
"epoch": 2.99,
"learning_rate": 7.286033459301325e-06,
"loss": 0.2066,
"step": 534
},
{
"epoch": 3.0,
"learning_rate": 7.2510146583122495e-06,
"loss": 0.2811,
"step": 535
},
{
"epoch": 3.0,
"learning_rate": 7.216032286562122e-06,
"loss": 0.1953,
"step": 536
},
{
"epoch": 3.01,
"learning_rate": 7.181086807633331e-06,
"loss": 0.079,
"step": 537
},
{
"epoch": 3.01,
"learning_rate": 7.146178684619363e-06,
"loss": 0.1624,
"step": 538
},
{
"epoch": 3.02,
"learning_rate": 7.111308380118666e-06,
"loss": 0.0925,
"step": 539
},
{
"epoch": 3.03,
"learning_rate": 7.07647635622852e-06,
"loss": 0.1313,
"step": 540
},
{
"epoch": 3.03,
"learning_rate": 7.041683074538916e-06,
"loss": 0.1277,
"step": 541
},
{
"epoch": 3.04,
"learning_rate": 7.006928996126443e-06,
"loss": 0.0887,
"step": 542
},
{
"epoch": 3.04,
"learning_rate": 6.972214581548166e-06,
"loss": 0.0726,
"step": 543
},
{
"epoch": 3.05,
"learning_rate": 6.937540290835534e-06,
"loss": 0.0916,
"step": 544
},
{
"epoch": 3.05,
"learning_rate": 6.902906583488269e-06,
"loss": 0.1172,
"step": 545
},
{
"epoch": 3.06,
"learning_rate": 6.868313918468305e-06,
"loss": 0.0554,
"step": 546
},
{
"epoch": 3.06,
"learning_rate": 6.8337627541936714e-06,
"loss": 0.0917,
"step": 547
},
{
"epoch": 3.07,
"learning_rate": 6.799253548532445e-06,
"loss": 0.1536,
"step": 548
},
{
"epoch": 3.08,
"learning_rate": 6.7647867587966595e-06,
"loss": 0.1399,
"step": 549
},
{
"epoch": 3.08,
"learning_rate": 6.730362841736273e-06,
"loss": 0.0534,
"step": 550
},
{
"epoch": 3.09,
"learning_rate": 6.69598225353309e-06,
"loss": 0.0762,
"step": 551
},
{
"epoch": 3.09,
"learning_rate": 6.66164544979473e-06,
"loss": 0.0916,
"step": 552
},
{
"epoch": 3.1,
"learning_rate": 6.627352885548577e-06,
"loss": 0.1082,
"step": 553
},
{
"epoch": 3.1,
"learning_rate": 6.593105015235776e-06,
"loss": 0.0895,
"step": 554
},
{
"epoch": 3.11,
"learning_rate": 6.558902292705176e-06,
"loss": 0.0775,
"step": 555
},
{
"epoch": 3.11,
"learning_rate": 6.524745171207339e-06,
"loss": 0.0712,
"step": 556
},
{
"epoch": 3.12,
"learning_rate": 6.490634103388531e-06,
"loss": 0.1253,
"step": 557
},
{
"epoch": 3.13,
"learning_rate": 6.456569541284717e-06,
"loss": 0.0682,
"step": 558
},
{
"epoch": 3.13,
"learning_rate": 6.422551936315568e-06,
"loss": 0.0681,
"step": 559
},
{
"epoch": 3.14,
"learning_rate": 6.388581739278492e-06,
"loss": 0.125,
"step": 560
},
{
"epoch": 3.14,
"learning_rate": 6.354659400342653e-06,
"loss": 0.1133,
"step": 561
},
{
"epoch": 3.15,
"learning_rate": 6.320785369043001e-06,
"loss": 0.1175,
"step": 562
},
{
"epoch": 3.15,
"learning_rate": 6.286960094274317e-06,
"loss": 0.0562,
"step": 563
},
{
"epoch": 3.16,
"learning_rate": 6.253184024285267e-06,
"loss": 0.0525,
"step": 564
},
{
"epoch": 3.17,
"learning_rate": 6.219457606672468e-06,
"loss": 0.0563,
"step": 565
},
{
"epoch": 3.17,
"learning_rate": 6.185781288374545e-06,
"loss": 0.1036,
"step": 566
},
{
"epoch": 3.18,
"learning_rate": 6.152155515666206e-06,
"loss": 0.1093,
"step": 567
},
{
"epoch": 3.18,
"learning_rate": 6.1185807341523405e-06,
"loss": 0.0887,
"step": 568
},
{
"epoch": 3.19,
"learning_rate": 6.085057388762114e-06,
"loss": 0.0806,
"step": 569
},
{
"epoch": 3.19,
"learning_rate": 6.051585923743059e-06,
"loss": 0.0476,
"step": 570
},
{
"epoch": 3.2,
"learning_rate": 6.018166782655193e-06,
"loss": 0.0671,
"step": 571
},
{
"epoch": 3.2,
"learning_rate": 5.984800408365148e-06,
"loss": 0.0792,
"step": 572
},
{
"epoch": 3.21,
"learning_rate": 5.951487243040299e-06,
"loss": 0.117,
"step": 573
},
{
"epoch": 3.22,
"learning_rate": 5.918227728142895e-06,
"loss": 0.1318,
"step": 574
},
{
"epoch": 3.22,
"learning_rate": 5.885022304424217e-06,
"loss": 0.1019,
"step": 575
},
{
"epoch": 3.23,
"learning_rate": 5.851871411918744e-06,
"loss": 0.1148,
"step": 576
},
{
"epoch": 3.23,
"learning_rate": 5.8187754899383046e-06,
"loss": 0.1174,
"step": 577
},
{
"epoch": 3.24,
"learning_rate": 5.78573497706626e-06,
"loss": 0.1148,
"step": 578
},
{
"epoch": 3.24,
"learning_rate": 5.752750311151709e-06,
"loss": 0.0892,
"step": 579
},
{
"epoch": 3.25,
"learning_rate": 5.719821929303671e-06,
"loss": 0.0542,
"step": 580
},
{
"epoch": 3.25,
"learning_rate": 5.686950267885284e-06,
"loss": 0.0643,
"step": 581
},
{
"epoch": 3.26,
"learning_rate": 5.654135762508048e-06,
"loss": 0.0896,
"step": 582
},
{
"epoch": 3.27,
"learning_rate": 5.621378848026022e-06,
"loss": 0.1358,
"step": 583
},
{
"epoch": 3.27,
"learning_rate": 5.588679958530095e-06,
"loss": 0.1124,
"step": 584
},
{
"epoch": 3.28,
"learning_rate": 5.5560395273421985e-06,
"loss": 0.0881,
"step": 585
},
{
"epoch": 3.28,
"learning_rate": 5.523457987009595e-06,
"loss": 0.0542,
"step": 586
},
{
"epoch": 3.29,
"learning_rate": 5.4909357692991185e-06,
"loss": 0.0532,
"step": 587
},
{
"epoch": 3.29,
"learning_rate": 5.458473305191483e-06,
"loss": 0.074,
"step": 588
},
{
"epoch": 3.3,
"learning_rate": 5.426071024875537e-06,
"loss": 0.0824,
"step": 589
},
{
"epoch": 3.31,
"learning_rate": 5.393729357742594e-06,
"loss": 0.101,
"step": 590
},
{
"epoch": 3.31,
"learning_rate": 5.3614487323807195e-06,
"loss": 0.1189,
"step": 591
},
{
"epoch": 3.32,
"learning_rate": 5.32922957656907e-06,
"loss": 0.0934,
"step": 592
},
{
"epoch": 3.32,
"learning_rate": 5.2970723172722024e-06,
"loss": 0.119,
"step": 593
},
{
"epoch": 3.33,
"learning_rate": 5.26497738063444e-06,
"loss": 0.1257,
"step": 594
},
{
"epoch": 3.33,
"learning_rate": 5.232945191974201e-06,
"loss": 0.1053,
"step": 595
},
{
"epoch": 3.34,
"learning_rate": 5.200976175778391e-06,
"loss": 0.0889,
"step": 596
},
{
"epoch": 3.34,
"learning_rate": 5.1690707556967414e-06,
"loss": 0.1302,
"step": 597
},
{
"epoch": 3.35,
"learning_rate": 5.1372293545362295e-06,
"loss": 0.0722,
"step": 598
},
{
"epoch": 3.36,
"learning_rate": 5.105452394255464e-06,
"loss": 0.1034,
"step": 599
},
{
"epoch": 3.36,
"learning_rate": 5.0737402959590755e-06,
"loss": 0.1166,
"step": 600
},
{
"epoch": 3.37,
"learning_rate": 5.042093479892159e-06,
"loss": 0.1144,
"step": 601
},
{
"epoch": 3.37,
"learning_rate": 5.010512365434698e-06,
"loss": 0.104,
"step": 602
},
{
"epoch": 3.38,
"learning_rate": 4.97899737109601e-06,
"loss": 0.0702,
"step": 603
},
{
"epoch": 3.38,
"learning_rate": 4.947548914509188e-06,
"loss": 0.1022,
"step": 604
},
{
"epoch": 3.39,
"learning_rate": 4.9161674124255735e-06,
"loss": 0.0742,
"step": 605
},
{
"epoch": 3.39,
"learning_rate": 4.884853280709242e-06,
"loss": 0.0361,
"step": 606
},
{
"epoch": 3.4,
"learning_rate": 4.8536069343314825e-06,
"loss": 0.0495,
"step": 607
},
{
"epoch": 3.41,
"learning_rate": 4.8224287873653e-06,
"loss": 0.0704,
"step": 608
},
{
"epoch": 3.41,
"learning_rate": 4.79131925297992e-06,
"loss": 0.0491,
"step": 609
},
{
"epoch": 3.42,
"learning_rate": 4.760278743435335e-06,
"loss": 0.0676,
"step": 610
},
{
"epoch": 3.42,
"learning_rate": 4.729307670076826e-06,
"loss": 0.045,
"step": 611
},
{
"epoch": 3.43,
"learning_rate": 4.698406443329504e-06,
"loss": 0.0878,
"step": 612
},
{
"epoch": 3.43,
"learning_rate": 4.667575472692895e-06,
"loss": 0.105,
"step": 613
},
{
"epoch": 3.44,
"learning_rate": 4.636815166735483e-06,
"loss": 0.0878,
"step": 614
},
{
"epoch": 3.45,
"learning_rate": 4.606125933089327e-06,
"loss": 0.067,
"step": 615
},
{
"epoch": 3.45,
"learning_rate": 4.575508178444631e-06,
"loss": 0.0409,
"step": 616
},
{
"epoch": 3.46,
"learning_rate": 4.544962308544376e-06,
"loss": 0.0684,
"step": 617
},
{
"epoch": 3.46,
"learning_rate": 4.514488728178936e-06,
"loss": 0.1082,
"step": 618
},
{
"epoch": 3.47,
"learning_rate": 4.4840878411807045e-06,
"loss": 0.0357,
"step": 619
},
{
"epoch": 3.47,
"learning_rate": 4.453760050418752e-06,
"loss": 0.0964,
"step": 620
},
{
"epoch": 3.48,
"learning_rate": 4.42350575779349e-06,
"loss": 0.2207,
"step": 621
},
{
"epoch": 3.48,
"learning_rate": 4.393325364231347e-06,
"loss": 0.126,
"step": 622
},
{
"epoch": 3.49,
"learning_rate": 4.363219269679435e-06,
"loss": 0.186,
"step": 623
},
{
"epoch": 3.5,
"learning_rate": 4.333187873100271e-06,
"loss": 0.0499,
"step": 624
},
{
"epoch": 3.5,
"learning_rate": 4.303231572466485e-06,
"loss": 0.0623,
"step": 625
},
{
"epoch": 3.51,
"learning_rate": 4.2733507647555485e-06,
"loss": 0.1039,
"step": 626
},
{
"epoch": 3.51,
"learning_rate": 4.243545845944497e-06,
"loss": 0.0616,
"step": 627
},
{
"epoch": 3.52,
"learning_rate": 4.213817211004699e-06,
"loss": 0.1,
"step": 628
},
{
"epoch": 3.52,
"learning_rate": 4.184165253896626e-06,
"loss": 0.0274,
"step": 629
},
{
"epoch": 3.53,
"learning_rate": 4.154590367564619e-06,
"loss": 0.0727,
"step": 630
},
{
"epoch": 3.54,
"learning_rate": 4.125092943931682e-06,
"loss": 0.0445,
"step": 631
},
{
"epoch": 3.54,
"learning_rate": 4.095673373894293e-06,
"loss": 0.0453,
"step": 632
},
{
"epoch": 3.55,
"learning_rate": 4.066332047317226e-06,
"loss": 0.0475,
"step": 633
},
{
"epoch": 3.55,
"learning_rate": 4.0370693530283866e-06,
"loss": 0.0382,
"step": 634
},
{
"epoch": 3.56,
"learning_rate": 4.007885678813645e-06,
"loss": 0.105,
"step": 635
},
{
"epoch": 3.56,
"learning_rate": 3.978781411411705e-06,
"loss": 0.1038,
"step": 636
},
{
"epoch": 3.57,
"learning_rate": 3.949756936508991e-06,
"loss": 0.0496,
"step": 637
},
{
"epoch": 3.57,
"learning_rate": 3.920812638734522e-06,
"loss": 0.083,
"step": 638
},
{
"epoch": 3.58,
"learning_rate": 3.891948901654815e-06,
"loss": 0.0469,
"step": 639
},
{
"epoch": 3.59,
"learning_rate": 3.863166107768801e-06,
"loss": 0.065,
"step": 640
},
{
"epoch": 3.59,
"learning_rate": 3.834464638502784e-06,
"loss": 0.0823,
"step": 641
},
{
"epoch": 3.6,
"learning_rate": 3.80584487420534e-06,
"loss": 0.0669,
"step": 642
},
{
"epoch": 3.6,
"learning_rate": 3.7773071941423044e-06,
"loss": 0.099,
"step": 643
},
{
"epoch": 3.61,
"learning_rate": 3.7488519764917497e-06,
"loss": 0.0454,
"step": 644
},
{
"epoch": 3.61,
"learning_rate": 3.720479598338964e-06,
"loss": 0.0859,
"step": 645
},
{
"epoch": 3.62,
"learning_rate": 3.692190435671452e-06,
"loss": 0.0854,
"step": 646
},
{
"epoch": 3.62,
"learning_rate": 3.6639848633739526e-06,
"loss": 0.1394,
"step": 647
},
{
"epoch": 3.63,
"learning_rate": 3.635863255223484e-06,
"loss": 0.1065,
"step": 648
},
{
"epoch": 3.64,
"learning_rate": 3.607825983884381e-06,
"loss": 0.0307,
"step": 649
},
{
"epoch": 3.64,
"learning_rate": 3.5798734209033483e-06,
"loss": 0.101,
"step": 650
},
{
"epoch": 3.65,
"learning_rate": 3.5520059367045467e-06,
"loss": 0.0424,
"step": 651
},
{
"epoch": 3.65,
"learning_rate": 3.524223900584689e-06,
"loss": 0.0733,
"step": 652
},
{
"epoch": 3.66,
"learning_rate": 3.4965276807081407e-06,
"loss": 0.0638,
"step": 653
},
{
"epoch": 3.66,
"learning_rate": 3.468917644102031e-06,
"loss": 0.0883,
"step": 654
},
{
"epoch": 3.67,
"learning_rate": 3.441394156651402e-06,
"loss": 0.0632,
"step": 655
},
{
"epoch": 3.68,
"learning_rate": 3.4139575830943584e-06,
"loss": 0.0702,
"step": 656
},
{
"epoch": 3.68,
"learning_rate": 3.386608287017236e-06,
"loss": 0.0398,
"step": 657
},
{
"epoch": 3.69,
"learning_rate": 3.359346630849771e-06,
"loss": 0.061,
"step": 658
},
{
"epoch": 3.69,
"learning_rate": 3.332172975860306e-06,
"loss": 0.0586,
"step": 659
},
{
"epoch": 3.7,
"learning_rate": 3.305087682151008e-06,
"loss": 0.0565,
"step": 660
},
{
"epoch": 3.7,
"learning_rate": 3.2780911086530907e-06,
"loss": 0.0411,
"step": 661
},
{
"epoch": 3.71,
"learning_rate": 3.2511836131220533e-06,
"loss": 0.1056,
"step": 662
},
{
"epoch": 3.71,
"learning_rate": 3.2243655521329354e-06,
"loss": 0.1198,
"step": 663
},
{
"epoch": 3.72,
"learning_rate": 3.197637281075626e-06,
"loss": 0.0735,
"step": 664
},
{
"epoch": 3.73,
"learning_rate": 3.170999154150105e-06,
"loss": 0.0778,
"step": 665
},
{
"epoch": 3.73,
"learning_rate": 3.144451524361779e-06,
"loss": 0.0854,
"step": 666
},
{
"epoch": 3.74,
"learning_rate": 3.1179947435167936e-06,
"loss": 0.0361,
"step": 667
},
{
"epoch": 3.74,
"learning_rate": 3.09162916221739e-06,
"loss": 0.077,
"step": 668
},
{
"epoch": 3.75,
"learning_rate": 3.065355129857227e-06,
"loss": 0.0393,
"step": 669
},
{
"epoch": 3.75,
"learning_rate": 3.0391729946167714e-06,
"loss": 0.0729,
"step": 670
},
{
"epoch": 3.76,
"learning_rate": 3.013083103458676e-06,
"loss": 0.0661,
"step": 671
},
{
"epoch": 3.76,
"learning_rate": 2.987085802123203e-06,
"loss": 0.091,
"step": 672
},
{
"epoch": 3.77,
"learning_rate": 2.961181435123607e-06,
"loss": 0.1311,
"step": 673
},
{
"epoch": 3.78,
"learning_rate": 2.935370345741588e-06,
"loss": 0.0448,
"step": 674
},
{
"epoch": 3.78,
"learning_rate": 2.9096528760227526e-06,
"loss": 0.061,
"step": 675
},
{
"epoch": 3.79,
"learning_rate": 2.8840293667720653e-06,
"loss": 0.0871,
"step": 676
},
{
"epoch": 3.79,
"learning_rate": 2.8585001575493355e-06,
"loss": 0.0813,
"step": 677
},
{
"epoch": 3.8,
"learning_rate": 2.833065586664715e-06,
"loss": 0.0655,
"step": 678
},
{
"epoch": 3.8,
"learning_rate": 2.8077259911742294e-06,
"loss": 0.0277,
"step": 679
},
{
"epoch": 3.81,
"learning_rate": 2.7824817068752984e-06,
"loss": 0.0729,
"step": 680
},
{
"epoch": 3.82,
"learning_rate": 2.757333068302285e-06,
"loss": 0.055,
"step": 681
},
{
"epoch": 3.82,
"learning_rate": 2.7322804087220623e-06,
"loss": 0.0535,
"step": 682
},
{
"epoch": 3.83,
"learning_rate": 2.7073240601296125e-06,
"loss": 0.0465,
"step": 683
},
{
"epoch": 3.83,
"learning_rate": 2.682464353243609e-06,
"loss": 0.069,
"step": 684
},
{
"epoch": 3.84,
"learning_rate": 2.6577016175020396e-06,
"loss": 0.1123,
"step": 685
},
{
"epoch": 3.84,
"learning_rate": 2.63303618105784e-06,
"loss": 0.0341,
"step": 686
},
{
"epoch": 3.85,
"learning_rate": 2.6084683707745506e-06,
"loss": 0.1113,
"step": 687
},
{
"epoch": 3.85,
"learning_rate": 2.5839985122219847e-06,
"loss": 0.1063,
"step": 688
},
{
"epoch": 3.86,
"learning_rate": 2.5596269296719023e-06,
"loss": 0.0454,
"step": 689
},
{
"epoch": 3.87,
"learning_rate": 2.5353539460937203e-06,
"loss": 0.0449,
"step": 690
},
{
"epoch": 3.87,
"learning_rate": 2.5111798831502422e-06,
"loss": 0.037,
"step": 691
},
{
"epoch": 3.88,
"learning_rate": 2.4871050611933835e-06,
"loss": 0.0452,
"step": 692
},
{
"epoch": 3.88,
"learning_rate": 2.4631297992599256e-06,
"loss": 0.0449,
"step": 693
},
{
"epoch": 3.89,
"learning_rate": 2.4392544150672883e-06,
"loss": 0.0457,
"step": 694
},
{
"epoch": 3.89,
"learning_rate": 2.4154792250093406e-06,
"loss": 0.056,
"step": 695
},
{
"epoch": 3.9,
"learning_rate": 2.391804544152172e-06,
"loss": 0.0735,
"step": 696
},
{
"epoch": 3.9,
"learning_rate": 2.368230686229943e-06,
"loss": 0.07,
"step": 697
},
{
"epoch": 3.91,
"learning_rate": 2.3447579636407136e-06,
"loss": 0.0783,
"step": 698
},
{
"epoch": 3.92,
"learning_rate": 2.321386687442326e-06,
"loss": 0.0342,
"step": 699
},
{
"epoch": 3.92,
"learning_rate": 2.2981171673482514e-06,
"loss": 0.0428,
"step": 700
},
{
"epoch": 3.93,
"learning_rate": 2.274949711723502e-06,
"loss": 0.0635,
"step": 701
},
{
"epoch": 3.93,
"learning_rate": 2.2518846275805494e-06,
"loss": 0.0423,
"step": 702
},
{
"epoch": 3.94,
"learning_rate": 2.2289222205752502e-06,
"loss": 0.0545,
"step": 703
},
{
"epoch": 3.94,
"learning_rate": 2.206062795002791e-06,
"loss": 0.1033,
"step": 704
},
{
"epoch": 3.95,
"learning_rate": 2.1833066537936587e-06,
"loss": 0.0602,
"step": 705
},
{
"epoch": 3.96,
"learning_rate": 2.1606540985096314e-06,
"loss": 0.0486,
"step": 706
},
{
"epoch": 3.96,
"learning_rate": 2.138105429339783e-06,
"loss": 0.0522,
"step": 707
},
{
"epoch": 3.97,
"learning_rate": 2.1156609450964906e-06,
"loss": 0.0558,
"step": 708
},
{
"epoch": 3.97,
"learning_rate": 2.093320943211488e-06,
"loss": 0.0451,
"step": 709
},
{
"epoch": 3.98,
"learning_rate": 2.0710857197319224e-06,
"loss": 0.0602,
"step": 710
},
{
"epoch": 3.98,
"learning_rate": 2.0489555693164344e-06,
"loss": 0.0793,
"step": 711
},
{
"epoch": 3.99,
"learning_rate": 2.0269307852312394e-06,
"loss": 0.1172,
"step": 712
},
{
"epoch": 3.99,
"learning_rate": 2.005011659346251e-06,
"loss": 0.082,
"step": 713
},
{
"epoch": 4.0,
"learning_rate": 1.983198482131219e-06,
"loss": 0.0743,
"step": 714
},
{
"epoch": 4.01,
"learning_rate": 1.961491542651871e-06,
"loss": 0.0835,
"step": 715
},
{
"epoch": 4.01,
"learning_rate": 1.9398911285660817e-06,
"loss": 0.0809,
"step": 716
},
{
"epoch": 4.02,
"learning_rate": 1.9183975261200625e-06,
"loss": 0.0302,
"step": 717
},
{
"epoch": 4.02,
"learning_rate": 1.8970110201445734e-06,
"loss": 0.0273,
"step": 718
},
{
"epoch": 4.03,
"learning_rate": 1.875731894051146e-06,
"loss": 0.0424,
"step": 719
},
{
"epoch": 4.03,
"learning_rate": 1.8545604298283204e-06,
"loss": 0.0351,
"step": 720
},
{
"epoch": 4.04,
"learning_rate": 1.8334969080379116e-06,
"loss": 0.0422,
"step": 721
},
{
"epoch": 4.04,
"learning_rate": 1.8125416078113035e-06,
"loss": 0.0375,
"step": 722
},
{
"epoch": 4.05,
"learning_rate": 1.7916948068457352e-06,
"loss": 0.0455,
"step": 723
},
{
"epoch": 4.06,
"learning_rate": 1.7709567814006213e-06,
"loss": 0.0742,
"step": 724
},
{
"epoch": 4.06,
"learning_rate": 1.7503278062939044e-06,
"loss": 0.0343,
"step": 725
},
{
"epoch": 4.07,
"learning_rate": 1.7298081548984003e-06,
"loss": 0.0659,
"step": 726
},
{
"epoch": 4.07,
"learning_rate": 1.7093980991381787e-06,
"loss": 0.0475,
"step": 727
},
{
"epoch": 4.08,
"learning_rate": 1.6890979094849592e-06,
"loss": 0.0327,
"step": 728
},
{
"epoch": 4.08,
"learning_rate": 1.6689078549545313e-06,
"loss": 0.0411,
"step": 729
},
{
"epoch": 4.09,
"learning_rate": 1.6488282031031889e-06,
"loss": 0.0426,
"step": 730
},
{
"epoch": 4.1,
"learning_rate": 1.6288592200241771e-06,
"loss": 0.0377,
"step": 731
},
{
"epoch": 4.1,
"learning_rate": 1.6090011703441687e-06,
"loss": 0.0172,
"step": 732
},
{
"epoch": 4.11,
"learning_rate": 1.5892543172197694e-06,
"loss": 0.0189,
"step": 733
},
{
"epoch": 4.11,
"learning_rate": 1.5696189223340152e-06,
"loss": 0.0543,
"step": 734
},
{
"epoch": 4.12,
"learning_rate": 1.5500952458929087e-06,
"loss": 0.0363,
"step": 735
},
{
"epoch": 4.12,
"learning_rate": 1.5306835466219738e-06,
"loss": 0.0342,
"step": 736
},
{
"epoch": 4.13,
"learning_rate": 1.5113840817628267e-06,
"loss": 0.0234,
"step": 737
},
{
"epoch": 4.13,
"learning_rate": 1.4921971070697683e-06,
"loss": 0.0738,
"step": 738
},
{
"epoch": 4.14,
"learning_rate": 1.4731228768063888e-06,
"loss": 0.0304,
"step": 739
},
{
"epoch": 4.15,
"learning_rate": 1.4541616437422002e-06,
"loss": 0.0366,
"step": 740
},
{
"epoch": 4.15,
"learning_rate": 1.4353136591492933e-06,
"loss": 0.0232,
"step": 741
},
{
"epoch": 4.16,
"learning_rate": 1.4165791727990063e-06,
"loss": 0.0322,
"step": 742
},
{
"epoch": 4.16,
"learning_rate": 1.3979584329586005e-06,
"loss": 0.0387,
"step": 743
},
{
"epoch": 4.17,
"learning_rate": 1.3794516863879925e-06,
"loss": 0.066,
"step": 744
},
{
"epoch": 4.17,
"learning_rate": 1.3610591783364647e-06,
"loss": 0.0339,
"step": 745
},
{
"epoch": 4.18,
"learning_rate": 1.3427811525394318e-06,
"loss": 0.0206,
"step": 746
},
{
"epoch": 4.18,
"learning_rate": 1.324617851215194e-06,
"loss": 0.0351,
"step": 747
},
{
"epoch": 4.19,
"learning_rate": 1.3065695150617463e-06,
"loss": 0.0325,
"step": 748
},
{
"epoch": 4.2,
"learning_rate": 1.2886363832535652e-06,
"loss": 0.0352,
"step": 749
},
{
"epoch": 4.2,
"learning_rate": 1.270818693438468e-06,
"loss": 0.0378,
"step": 750
},
{
"epoch": 4.21,
"learning_rate": 1.253116681734432e-06,
"loss": 0.0165,
"step": 751
},
{
"epoch": 4.21,
"learning_rate": 1.2355305827264951e-06,
"loss": 0.0818,
"step": 752
},
{
"epoch": 4.22,
"learning_rate": 1.2180606294636244e-06,
"loss": 0.0396,
"step": 753
},
{
"epoch": 4.22,
"learning_rate": 1.200707053455642e-06,
"loss": 0.0353,
"step": 754
},
{
"epoch": 4.23,
"learning_rate": 1.1834700846701463e-06,
"loss": 0.0288,
"step": 755
},
{
"epoch": 4.24,
"learning_rate": 1.1663499515294763e-06,
"loss": 0.0209,
"step": 756
},
{
"epoch": 4.24,
"learning_rate": 1.149346880907678e-06,
"loss": 0.0237,
"step": 757
},
{
"epoch": 4.25,
"learning_rate": 1.1324610981274953e-06,
"loss": 0.0597,
"step": 758
},
{
"epoch": 4.25,
"learning_rate": 1.1156928269573831e-06,
"loss": 0.026,
"step": 759
},
{
"epoch": 4.26,
"learning_rate": 1.099042289608555e-06,
"loss": 0.0387,
"step": 760
},
{
"epoch": 4.26,
"learning_rate": 1.0825097067320267e-06,
"loss": 0.04,
"step": 761
},
{
"epoch": 4.27,
"learning_rate": 1.0660952974156901e-06,
"loss": 0.0267,
"step": 762
},
{
"epoch": 4.27,
"learning_rate": 1.0497992791814183e-06,
"loss": 0.0364,
"step": 763
},
{
"epoch": 4.28,
"learning_rate": 1.0336218679821785e-06,
"loss": 0.0417,
"step": 764
},
{
"epoch": 4.29,
"learning_rate": 1.017563278199174e-06,
"loss": 0.0384,
"step": 765
},
{
"epoch": 4.29,
"learning_rate": 1.0016237226389946e-06,
"loss": 0.0244,
"step": 766
},
{
"epoch": 4.3,
"learning_rate": 9.85803412530808e-07,
"loss": 0.0257,
"step": 767
},
{
"epoch": 4.3,
"learning_rate": 9.70102557523549e-07,
"loss": 0.0209,
"step": 768
},
{
"epoch": 4.31,
"learning_rate": 9.545213656831543e-07,
"loss": 0.0525,
"step": 769
},
{
"epoch": 4.31,
"learning_rate": 9.39060043489789e-07,
"loss": 0.0228,
"step": 770
},
{
"epoch": 4.32,
"learning_rate": 9.237187958351279e-07,
"loss": 0.0389,
"step": 771
},
{
"epoch": 4.32,
"learning_rate": 9.084978260196264e-07,
"loss": 0.0216,
"step": 772
},
{
"epoch": 4.33,
"learning_rate": 8.93397335749836e-07,
"loss": 0.0275,
"step": 773
},
{
"epoch": 4.34,
"learning_rate": 8.784175251357208e-07,
"loss": 0.0275,
"step": 774
},
{
"epoch": 4.34,
"learning_rate": 8.635585926880185e-07,
"loss": 0.0757,
"step": 775
},
{
"epoch": 4.35,
"learning_rate": 8.488207353155986e-07,
"loss": 0.0548,
"step": 776
},
{
"epoch": 4.35,
"learning_rate": 8.342041483228613e-07,
"loss": 0.0359,
"step": 777
},
{
"epoch": 4.36,
"learning_rate": 8.197090254071405e-07,
"loss": 0.0363,
"step": 778
},
{
"epoch": 4.36,
"learning_rate": 8.053355586561496e-07,
"loss": 0.1003,
"step": 779
},
{
"epoch": 4.37,
"learning_rate": 7.910839385454183e-07,
"loss": 0.0588,
"step": 780
},
{
"epoch": 4.38,
"learning_rate": 7.769543539357904e-07,
"loss": 0.0438,
"step": 781
},
{
"epoch": 4.38,
"learning_rate": 7.629469920708987e-07,
"loss": 0.0549,
"step": 782
},
{
"epoch": 4.39,
"learning_rate": 7.490620385747016e-07,
"loss": 0.0529,
"step": 783
},
{
"epoch": 4.39,
"learning_rate": 7.352996774490096e-07,
"loss": 0.0484,
"step": 784
},
{
"epoch": 4.4,
"learning_rate": 7.216600910710581e-07,
"loss": 0.032,
"step": 785
},
{
"epoch": 4.4,
"learning_rate": 7.081434601910863e-07,
"loss": 0.024,
"step": 786
},
{
"epoch": 4.41,
"learning_rate": 6.947499639299337e-07,
"loss": 0.0406,
"step": 787
},
{
"epoch": 4.41,
"learning_rate": 6.814797797766837e-07,
"loss": 0.0465,
"step": 788
},
{
"epoch": 4.42,
"learning_rate": 6.683330835862933e-07,
"loss": 0.0439,
"step": 789
},
{
"epoch": 4.43,
"learning_rate": 6.55310049577279e-07,
"loss": 0.0572,
"step": 790
},
{
"epoch": 4.43,
"learning_rate": 6.424108503293924e-07,
"loss": 0.0431,
"step": 791
},
{
"epoch": 4.44,
"learning_rate": 6.296356567813466e-07,
"loss": 0.0216,
"step": 792
},
{
"epoch": 4.44,
"learning_rate": 6.169846382285427e-07,
"loss": 0.039,
"step": 793
},
{
"epoch": 4.45,
"learning_rate": 6.044579623208313e-07,
"loss": 0.0155,
"step": 794
},
{
"epoch": 4.45,
"learning_rate": 5.920557950602856e-07,
"loss": 0.0199,
"step": 795
},
{
"epoch": 4.46,
"learning_rate": 5.797783007990077e-07,
"loss": 0.0288,
"step": 796
},
{
"epoch": 4.46,
"learning_rate": 5.676256422369419e-07,
"loss": 0.0761,
"step": 797
},
{
"epoch": 4.47,
"learning_rate": 5.555979804197331e-07,
"loss": 0.0628,
"step": 798
},
{
"epoch": 4.48,
"learning_rate": 5.43695474736573e-07,
"loss": 0.0241,
"step": 799
},
{
"epoch": 4.48,
"learning_rate": 5.31918282918109e-07,
"loss": 0.0408,
"step": 800
},
{
"epoch": 4.49,
"learning_rate": 5.202665610343338e-07,
"loss": 0.0458,
"step": 801
},
{
"epoch": 4.49,
"learning_rate": 5.087404634925353e-07,
"loss": 0.0435,
"step": 802
},
{
"epoch": 4.5,
"learning_rate": 4.973401430352353e-07,
"loss": 0.0309,
"step": 803
},
{
"epoch": 4.5,
"learning_rate": 4.860657507381772e-07,
"loss": 0.061,
"step": 804
},
{
"epoch": 4.51,
"learning_rate": 4.749174360083153e-07,
"loss": 0.0426,
"step": 805
},
{
"epoch": 4.52,
"learning_rate": 4.6389534658184077e-07,
"loss": 0.0233,
"step": 806
},
{
"epoch": 4.52,
"learning_rate": 4.5299962852221934e-07,
"loss": 0.0486,
"step": 807
},
{
"epoch": 4.53,
"learning_rate": 4.422304262182586e-07,
"loss": 0.0194,
"step": 808
},
{
"epoch": 4.53,
"learning_rate": 4.3158788238219726e-07,
"loss": 0.037,
"step": 809
},
{
"epoch": 4.54,
"learning_rate": 4.2107213804780556e-07,
"loss": 0.0332,
"step": 810
},
{
"epoch": 4.54,
"learning_rate": 4.106833325685222e-07,
"loss": 0.0968,
"step": 811
},
{
"epoch": 4.55,
"learning_rate": 4.004216036156083e-07,
"loss": 0.0332,
"step": 812
},
{
"epoch": 4.55,
"learning_rate": 3.9028708717632204e-07,
"loss": 0.0253,
"step": 813
},
{
"epoch": 4.56,
"learning_rate": 3.802799175521155e-07,
"loss": 0.0649,
"step": 814
},
{
"epoch": 4.57,
"learning_rate": 3.7040022735685076e-07,
"loss": 0.0337,
"step": 815
},
{
"epoch": 4.57,
"learning_rate": 3.606481475150536e-07,
"loss": 0.0692,
"step": 816
},
{
"epoch": 4.58,
"learning_rate": 3.5102380726016905e-07,
"loss": 0.0585,
"step": 817
},
{
"epoch": 4.58,
"learning_rate": 3.4152733413285175e-07,
"loss": 0.0382,
"step": 818
},
{
"epoch": 4.59,
"learning_rate": 3.321588539792775e-07,
"loss": 0.0619,
"step": 819
},
{
"epoch": 4.59,
"learning_rate": 3.229184909494709e-07,
"loss": 0.0452,
"step": 820
},
{
"epoch": 4.6,
"learning_rate": 3.1380636749566487e-07,
"loss": 0.0493,
"step": 821
},
{
"epoch": 4.61,
"learning_rate": 3.048226043706759e-07,
"loss": 0.0217,
"step": 822
},
{
"epoch": 4.61,
"learning_rate": 2.9596732062630584e-07,
"loss": 0.0195,
"step": 823
},
{
"epoch": 4.62,
"learning_rate": 2.8724063361175725e-07,
"loss": 0.0173,
"step": 824
},
{
"epoch": 4.62,
"learning_rate": 2.786426589720914e-07,
"loss": 0.0355,
"step": 825
},
{
"epoch": 4.63,
"learning_rate": 2.701735106466796e-07,
"loss": 0.0319,
"step": 826
},
{
"epoch": 4.63,
"learning_rate": 2.6183330086770766e-07,
"loss": 0.0583,
"step": 827
},
{
"epoch": 4.64,
"learning_rate": 2.5362214015868246e-07,
"loss": 0.0428,
"step": 828
},
{
"epoch": 4.64,
"learning_rate": 2.4554013733296244e-07,
"loss": 0.0322,
"step": 829
},
{
"epoch": 4.65,
"learning_rate": 2.375873994923239e-07,
"loss": 0.0512,
"step": 830
},
{
"epoch": 4.66,
"learning_rate": 2.2976403202553789e-07,
"loss": 0.0292,
"step": 831
},
{
"epoch": 4.66,
"learning_rate": 2.220701386069768e-07,
"loss": 0.0659,
"step": 832
},
{
"epoch": 4.67,
"learning_rate": 2.1450582119523221e-07,
"loss": 0.0196,
"step": 833
},
{
"epoch": 4.67,
"learning_rate": 2.0707118003177264e-07,
"loss": 0.029,
"step": 834
},
{
"epoch": 4.68,
"learning_rate": 1.997663136396122e-07,
"loss": 0.0283,
"step": 835
},
{
"epoch": 4.68,
"learning_rate": 1.925913188220052e-07,
"loss": 0.0437,
"step": 836
},
{
"epoch": 4.69,
"learning_rate": 1.8554629066115827e-07,
"loss": 0.0616,
"step": 837
},
{
"epoch": 4.69,
"learning_rate": 1.7863132251698e-07,
"loss": 0.0379,
"step": 838
},
{
"epoch": 4.7,
"learning_rate": 1.7184650602583564e-07,
"loss": 0.03,
"step": 839
},
{
"epoch": 4.71,
"learning_rate": 1.651919310993366e-07,
"loss": 0.0462,
"step": 840
},
{
"epoch": 4.71,
"learning_rate": 1.586676859231473e-07,
"loss": 0.0226,
"step": 841
},
{
"epoch": 4.72,
"learning_rate": 1.5227385695581798e-07,
"loss": 0.053,
"step": 842
},
{
"epoch": 4.72,
"learning_rate": 1.4601052892763812e-07,
"loss": 0.0357,
"step": 843
},
{
"epoch": 4.73,
"learning_rate": 1.3987778483951388e-07,
"loss": 0.0654,
"step": 844
},
{
"epoch": 4.73,
"learning_rate": 1.3387570596186673e-07,
"loss": 0.0359,
"step": 845
},
{
"epoch": 4.74,
"learning_rate": 1.2800437183355886e-07,
"loss": 0.0196,
"step": 846
},
{
"epoch": 4.75,
"learning_rate": 1.2226386026083835e-07,
"loss": 0.0323,
"step": 847
},
{
"epoch": 4.75,
"learning_rate": 1.1665424731630681e-07,
"loss": 0.035,
"step": 848
},
{
"epoch": 4.76,
"learning_rate": 1.1117560733791222e-07,
"loss": 0.0269,
"step": 849
},
{
"epoch": 4.76,
"learning_rate": 1.058280129279654e-07,
"loss": 0.029,
"step": 850
},
{
"epoch": 4.77,
"learning_rate": 1.0061153495217413e-07,
"loss": 0.0473,
"step": 851
},
{
"epoch": 4.77,
"learning_rate": 9.552624253870713e-08,
"loss": 0.0442,
"step": 852
},
{
"epoch": 4.78,
"learning_rate": 9.05722030772771e-08,
"loss": 0.0343,
"step": 853
},
{
"epoch": 4.78,
"learning_rate": 8.574948221824808e-08,
"loss": 0.044,
"step": 854
},
{
"epoch": 4.79,
"learning_rate": 8.105814387176503e-08,
"loss": 0.0698,
"step": 855
},
{
"epoch": 4.8,
"learning_rate": 7.64982502069056e-08,
"loss": 0.0303,
"step": 856
},
{
"epoch": 4.8,
"learning_rate": 7.20698616508586e-08,
"loss": 0.0268,
"step": 857
},
{
"epoch": 4.81,
"learning_rate": 6.777303688812132e-08,
"loss": 0.0561,
"step": 858
},
{
"epoch": 4.81,
"learning_rate": 6.360783285972671e-08,
"loss": 0.0439,
"step": 859
},
{
"epoch": 4.82,
"learning_rate": 5.95743047624775e-08,
"loss": 0.0155,
"step": 860
},
{
"epoch": 4.82,
"learning_rate": 5.5672506048228825e-08,
"loss": 0.0229,
"step": 861
},
{
"epoch": 4.83,
"learning_rate": 5.190248842316892e-08,
"loss": 0.0313,
"step": 862
},
{
"epoch": 4.83,
"learning_rate": 4.826430184714071e-08,
"loss": 0.0438,
"step": 863
},
{
"epoch": 4.84,
"learning_rate": 4.475799453297569e-08,
"loss": 0.0379,
"step": 864
},
{
"epoch": 4.85,
"learning_rate": 4.138361294585669e-08,
"loss": 0.0399,
"step": 865
},
{
"epoch": 4.85,
"learning_rate": 3.8141201802701643e-08,
"loss": 0.032,
"step": 866
},
{
"epoch": 4.86,
"learning_rate": 3.503080407157411e-08,
"loss": 0.0363,
"step": 867
},
{
"epoch": 4.86,
"learning_rate": 3.2052460971104814e-08,
"loss": 0.0273,
"step": 868
},
{
"epoch": 4.87,
"learning_rate": 2.9206211969958764e-08,
"loss": 0.0177,
"step": 869
},
{
"epoch": 4.87,
"learning_rate": 2.6492094786300103e-08,
"loss": 0.0293,
"step": 870
},
{
"epoch": 4.88,
"learning_rate": 2.3910145387299188e-08,
"loss": 0.0275,
"step": 871
},
{
"epoch": 4.89,
"learning_rate": 2.146039798865407e-08,
"loss": 0.0559,
"step": 872
},
{
"epoch": 4.89,
"learning_rate": 1.9142885054136418e-08,
"loss": 0.0678,
"step": 873
},
{
"epoch": 4.9,
"learning_rate": 1.6957637295161867e-08,
"loss": 0.0611,
"step": 874
},
{
"epoch": 4.9,
"learning_rate": 1.4904683670384777e-08,
"loss": 0.0316,
"step": 875
},
{
"epoch": 4.91,
"learning_rate": 1.2984051385310782e-08,
"loss": 0.0201,
"step": 876
},
{
"epoch": 4.91,
"learning_rate": 1.119576589193927e-08,
"loss": 0.032,
"step": 877
},
{
"epoch": 4.92,
"learning_rate": 9.539850888423685e-09,
"loss": 0.0504,
"step": 878
},
{
"epoch": 4.92,
"learning_rate": 8.016328318761757e-09,
"loss": 0.0558,
"step": 879
},
{
"epoch": 4.93,
"learning_rate": 6.625218372499076e-09,
"loss": 0.0374,
"step": 880
},
{
"epoch": 4.94,
"learning_rate": 5.366539484464861e-09,
"loss": 0.037,
"step": 881
},
{
"epoch": 4.94,
"learning_rate": 4.24030833452882e-09,
"loss": 0.0382,
"step": 882
},
{
"epoch": 4.95,
"learning_rate": 3.246539847376884e-09,
"loss": 0.04,
"step": 883
},
{
"epoch": 4.95,
"learning_rate": 2.385247192314699e-09,
"loss": 0.0317,
"step": 884
},
{
"epoch": 4.96,
"learning_rate": 1.656441783093321e-09,
"loss": 0.0259,
"step": 885
},
{
"epoch": 4.96,
"learning_rate": 1.0601332777604445e-09,
"loss": 0.0597,
"step": 886
},
{
"epoch": 4.97,
"learning_rate": 5.963295785271772e-10,
"loss": 0.0221,
"step": 887
},
{
"epoch": 4.97,
"learning_rate": 2.650368316658991e-10,
"loss": 0.0327,
"step": 888
},
{
"epoch": 4.98,
"learning_rate": 6.625942743254676e-11,
"loss": 0.0655,
"step": 889
},
{
"epoch": 4.99,
"learning_rate": 0.0,
"loss": 0.0232,
"step": 890
},
{
"epoch": 4.99,
"step": 890,
"total_flos": 2.311116018375721e+17,
"train_loss": 0.27695610632575796,
"train_runtime": 29463.0503,
"train_samples_per_second": 1.939,
"train_steps_per_second": 0.03
}
],
"logging_steps": 1.0,
"max_steps": 890,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 200,
"total_flos": 2.311116018375721e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}